feat: support alerts based on bucket diff state (#119)
* feat: support alerts based on bucket diff status * fix: correct document count calculation * fix: empty param `event_id ` * fix: removing first and last time bucket * fix: removing first and last time bucket * chore: update bucket diff algorithm for improved accuracy * refactor: optimize bucket diff algorithm * feat: trigger bucket diff content change alert using expression * feat: include bucket diff type in alert message API response * chore: update release notes * feat: add alert rule template to detect cluster metrics collection anomaly * chore: update release notes
This commit is contained in:
parent
80e2a4356a
commit
f910203599
|
@ -1761,3 +1761,157 @@ POST $[[SETUP_INDEX_PREFIX]]alert-rule/$[[SETUP_DOC_TYPE]]/builtin-cal8n7p7h710d
|
||||||
"id": "$[[SETUP_USER_ID]]"
|
"id": "$[[SETUP_USER_ID]]"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
POST $[[SETUP_INDEX_PREFIX]]alert-rule/$[[SETUP_DOC_TYPE]]/builtin-cujivv5ath26drn6bcl0
|
||||||
|
{
|
||||||
|
"id": "builtin-cujivv5ath26drn6bcl0",
|
||||||
|
"created": "2025-02-08T18:20:44.273334+08:00",
|
||||||
|
"updated": "2025-02-12T16:31:05.672771+08:00",
|
||||||
|
"name": "Cluster Metrics Collection Anomaly",
|
||||||
|
"enabled": true,
|
||||||
|
"resource": {
|
||||||
|
"resource_id": "$[[SETUP_RESOURCE_ID]]",
|
||||||
|
"resource_name": "$[[SETUP_RESOURCE_NAME]]",
|
||||||
|
"type": "elasticsearch",
|
||||||
|
"objects": [
|
||||||
|
".infini_metrics*"
|
||||||
|
],
|
||||||
|
"filter": {},
|
||||||
|
"raw_filter": {
|
||||||
|
"bool": {
|
||||||
|
"must": [
|
||||||
|
{
|
||||||
|
"terms": {
|
||||||
|
"metadata.name": [
|
||||||
|
"cluster_health",
|
||||||
|
"cluster_stats",
|
||||||
|
"index_stats",
|
||||||
|
"node_stats",
|
||||||
|
"shard_stats"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"time_field": "timestamp",
|
||||||
|
"context": {
|
||||||
|
"fields": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"metrics": {
|
||||||
|
"bucket_size": "1m",
|
||||||
|
"groups": [
|
||||||
|
{
|
||||||
|
"field": "metadata.labels.cluster_id",
|
||||||
|
"limit": 5
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"field": "metadata.name",
|
||||||
|
"limit": 5
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"formula": "a",
|
||||||
|
"items": [
|
||||||
|
{
|
||||||
|
"name": "a",
|
||||||
|
"field": "agent.id",
|
||||||
|
"statistic": "count"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"bucket_label": {
|
||||||
|
"enabled": false
|
||||||
|
},
|
||||||
|
"expression": "count(agent.id)"
|
||||||
|
},
|
||||||
|
"bucket_conditions": {
|
||||||
|
"operator": "any",
|
||||||
|
"items": [
|
||||||
|
{
|
||||||
|
"minimum_period_match": 1,
|
||||||
|
"operator": "lt",
|
||||||
|
"values": [
|
||||||
|
"0"
|
||||||
|
],
|
||||||
|
"priority": "critical",
|
||||||
|
"type": "content",
|
||||||
|
"bucket_count": 10
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"notification_config": {
|
||||||
|
"enabled": true,
|
||||||
|
"title": "🔥 [{{.rule_name}}] Alerting",
|
||||||
|
"message": "{{range .results}}\n{{$cn := lookup \"category=metadata, object=cluster, property=name, default=N/A\" (index .group_values 0) }}\n{{$cu := printf \"%s/#/cluster/monitor/elasticsearch/%s\" $.env.INFINI_CONSOLE_ENDPOINT (index .group_values 0)}}\nCluster [[{{$cn}}]({{$cu}}?_g=%7B%22timeRange%22:%7B%22min%22:%22{{$.min}}%22%2C%22max%22:%22{{$.max}}%22%7D%7D)] ({{index .group_values 1}}) metrics has dropped at {{.issue_timestamp | datetime}};\n{{end}}",
|
||||||
|
"normal": [
|
||||||
|
{
|
||||||
|
"id": "cgnb2nt3q95nmusjl65g",
|
||||||
|
"enabled": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "cgiospt3q95q49k3u00g",
|
||||||
|
"enabled": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "cj865st3q95rega919ig",
|
||||||
|
"enabled": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "cgnb2r53q95nmusjl6vg",
|
||||||
|
"enabled": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "ch1os6t3q95lk6lepkq0",
|
||||||
|
"enabled": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "cgnb2kt3q95nmusjl64g",
|
||||||
|
"enabled": true
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"throttle_period": "6h",
|
||||||
|
"accept_time_range": {
|
||||||
|
"start": "00:00",
|
||||||
|
"end": "23:59"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"category": "Platform",
|
||||||
|
"recovery_notification_config": {
|
||||||
|
"enabled": true,
|
||||||
|
"title": "🌈 [{{.rule_name}}] Resolved",
|
||||||
|
"message": "EventID: {{.event_id}} \nTarget: {{.resource_name}}-{{.objects}} \nTriggerAt: {{.trigger_at | datetime}} \nResolveAt: {{.timestamp | datetime}} \nDuration: {{.duration}} ",
|
||||||
|
"normal": [
|
||||||
|
{
|
||||||
|
"id": "cj8bq8d3q95ogankugqg",
|
||||||
|
"enabled": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "cj8ctat3q95l9ebbntlg",
|
||||||
|
"enabled": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "cj8atf53q95lhahebg8g",
|
||||||
|
"enabled": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "cj8e9s53q95gsdbb054g",
|
||||||
|
"enabled": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "cj8e9gt3q95gsdbb0170",
|
||||||
|
"enabled": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "cj86l0l3q95rrpfea6ug",
|
||||||
|
"enabled": true
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"event_enabled": true
|
||||||
|
},
|
||||||
|
"schedule": {
|
||||||
|
"interval": "1m"
|
||||||
|
},
|
||||||
|
"creator": {
|
||||||
|
"name": "$[[SETUP_USERNAME]]",
|
||||||
|
"id": "$[[SETUP_USER_ID]]"
|
||||||
|
}
|
||||||
|
}
|
|
@ -12,6 +12,7 @@ Information about release notes of INFINI Console is provided here.
|
||||||
### Breaking changes
|
### Breaking changes
|
||||||
|
|
||||||
### Features
|
### Features
|
||||||
|
- Support alerts based on bucket diff state (#119)
|
||||||
|
|
||||||
### Bug fix
|
### Bug fix
|
||||||
|
|
||||||
|
|
|
@ -12,7 +12,7 @@ title: "版本历史"
|
||||||
### Breaking changes
|
### Breaking changes
|
||||||
|
|
||||||
### Features
|
### Features
|
||||||
|
- 告警功能支持根据桶之间文档数差值和内容差异告警 (#119)
|
||||||
### Bug fix
|
### Bug fix
|
||||||
|
|
||||||
### Improvements
|
### Improvements
|
||||||
|
|
|
@ -44,6 +44,28 @@ func (cond *Condition) GetMinimumPeriodMatch() int {
|
||||||
return minPeriodMatch
|
return minPeriodMatch
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (cond *Condition) GetMaxBucketCount() int {
|
||||||
|
var bucketCount = 0
|
||||||
|
for _, citem := range cond.Items {
|
||||||
|
if citem.BucketCount > bucketCount {
|
||||||
|
bucketCount = citem.BucketCount
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return bucketCount
|
||||||
|
}
|
||||||
|
|
||||||
|
// BucketDiffType represents the type of bucket difference
|
||||||
|
type BucketDiffType string
|
||||||
|
|
||||||
|
// Constants defining possible bucket difference types
|
||||||
|
const (
|
||||||
|
// BucketDiffTypeSize indicates the difference in bucket size
|
||||||
|
BucketDiffTypeSize BucketDiffType = "size"
|
||||||
|
|
||||||
|
// BucketDiffTypeContent indicates the difference in bucket content
|
||||||
|
BucketDiffTypeContent BucketDiffType = "content"
|
||||||
|
)
|
||||||
|
|
||||||
type ConditionItem struct {
|
type ConditionItem struct {
|
||||||
//MetricName string `json:"metric"`
|
//MetricName string `json:"metric"`
|
||||||
MinimumPeriodMatch int `json:"minimum_period_match"`
|
MinimumPeriodMatch int `json:"minimum_period_match"`
|
||||||
|
@ -51,6 +73,10 @@ type ConditionItem struct {
|
||||||
Values []string `json:"values"`
|
Values []string `json:"values"`
|
||||||
Priority string `json:"priority"`
|
Priority string `json:"priority"`
|
||||||
Expression string `json:"expression,omitempty"`
|
Expression string `json:"expression,omitempty"`
|
||||||
|
//bucket condition type, e.g: size, content
|
||||||
|
Type BucketDiffType `json:"type,omitempty"`
|
||||||
|
// Represents the number of buckets in the bucket condition type.
|
||||||
|
BucketCount int `json:"bucket_count,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
func (cond *ConditionItem) GenerateConditionExpression() (conditionExpression string, err error) {
|
func (cond *ConditionItem) GenerateConditionExpression() (conditionExpression string, err error) {
|
||||||
|
|
|
@ -81,10 +81,15 @@ type QueryResult struct {
|
||||||
|
|
||||||
type MetricData struct {
|
type MetricData struct {
|
||||||
GroupValues []string `json:"group_values"`
|
GroupValues []string `json:"group_values"`
|
||||||
Data map[string][]TimeMetricData `json:"data"`
|
Data map[string][]MetricDataItem `json:"data"`
|
||||||
}
|
}
|
||||||
|
|
||||||
type TimeMetricData []interface{}
|
type MetricDataItem struct {
|
||||||
|
Timestamp interface{} `json:"timestamp,omitempty"`
|
||||||
|
Value interface{} `json:"value"`
|
||||||
|
Groups []string `json:"groups,omitempty"`
|
||||||
|
DocCount int `json:"doc_count,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
type AlertMetricItem struct {
|
type AlertMetricItem struct {
|
||||||
common.MetricItem
|
common.MetricItem
|
||||||
|
|
|
@ -56,6 +56,7 @@ type Rule struct {
|
||||||
} `json:"creator" elastic_mapping:"creator:{type:object}"`
|
} `json:"creator" elastic_mapping:"creator:{type:object}"`
|
||||||
Category string `json:"category,omitempty" elastic_mapping:"category: { type: keyword,copy_to:search_text }"`
|
Category string `json:"category,omitempty" elastic_mapping:"category: { type: keyword,copy_to:search_text }"`
|
||||||
Tags []string `json:"tags,omitempty" elastic_mapping:"tags: { type: keyword,copy_to:search_text }"`
|
Tags []string `json:"tags,omitempty" elastic_mapping:"tags: { type: keyword,copy_to:search_text }"`
|
||||||
|
BucketConditions *Condition `json:"bucket_conditions" elastic_mapping:"bucket_conditions:{type:object}"`
|
||||||
}
|
}
|
||||||
|
|
||||||
func (rule *Rule) GetOrInitExpression() (string, error) {
|
func (rule *Rule) GetOrInitExpression() (string, error) {
|
||||||
|
|
|
@ -404,13 +404,23 @@ func (h *AlertAPI) getAlertMessage(w http.ResponseWriter, req *http.Request, ps
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
metricExpression, _ := rule.Metrics.GenerateExpression()
|
metricExpression, _ := rule.Metrics.GenerateExpression()
|
||||||
var hitCondition string
|
var (
|
||||||
for i, cond := range rule.Conditions.Items {
|
hitCondition string
|
||||||
|
bucketDiffType string
|
||||||
|
)
|
||||||
|
conditions := rule.Conditions
|
||||||
|
if rule.BucketConditions != nil {
|
||||||
|
conditions = *rule.BucketConditions
|
||||||
|
}
|
||||||
|
for i, cond := range conditions.Items {
|
||||||
expression, _ := cond.GenerateConditionExpression()
|
expression, _ := cond.GenerateConditionExpression()
|
||||||
if cond.Priority == message.Priority {
|
if cond.Priority == message.Priority {
|
||||||
hitCondition = strings.ReplaceAll(expression, "result", "")
|
hitCondition = strings.ReplaceAll(expression, "result", "")
|
||||||
|
if rule.BucketConditions != nil {
|
||||||
|
bucketDiffType = string(cond.Type)
|
||||||
}
|
}
|
||||||
rule.Conditions.Items[i].Expression = strings.ReplaceAll(expression, "result", metricExpression)
|
}
|
||||||
|
conditions.Items[i].Expression = strings.ReplaceAll(expression, "result", metricExpression)
|
||||||
}
|
}
|
||||||
var duration time.Duration
|
var duration time.Duration
|
||||||
if message.Status == alerting.MessageStateRecovered {
|
if message.Status == alerting.MessageStateRecovered {
|
||||||
|
@ -432,6 +442,8 @@ func (h *AlertAPI) getAlertMessage(w http.ResponseWriter, req *http.Request, ps
|
||||||
"resource_id": rule.Resource.ID,
|
"resource_id": rule.Resource.ID,
|
||||||
"resource_objects": rule.Resource.Objects,
|
"resource_objects": rule.Resource.Objects,
|
||||||
"conditions": rule.Conditions,
|
"conditions": rule.Conditions,
|
||||||
|
"bucket_conditions": rule.BucketConditions,
|
||||||
|
"bucket_diff_type": bucketDiffType,
|
||||||
"duration": duration.Milliseconds(),
|
"duration": duration.Milliseconds(),
|
||||||
"ignored_time": message.IgnoredTime,
|
"ignored_time": message.IgnoredTime,
|
||||||
"ignored_reason": message.IgnoredReason,
|
"ignored_reason": message.IgnoredReason,
|
||||||
|
|
|
@ -329,9 +329,16 @@ func getQueryTimeRange(rule *alerting.Rule, filterParam *alerting.FilterParam) (
|
||||||
} else {
|
} else {
|
||||||
return nil, fmt.Errorf("period interval: %s is too small", rule.Metrics.BucketSize)
|
return nil, fmt.Errorf("period interval: %s is too small", rule.Metrics.BucketSize)
|
||||||
}
|
}
|
||||||
bucketCount := rule.Conditions.GetMinimumPeriodMatch() + 1
|
var bucketCount int
|
||||||
|
if rule.BucketConditions != nil {
|
||||||
|
bucketCount = rule.BucketConditions.GetMaxBucketCount()
|
||||||
|
//for removing first and last time bucket
|
||||||
|
bucketCount += 2
|
||||||
|
} else {
|
||||||
|
bucketCount = rule.Conditions.GetMinimumPeriodMatch() + 1
|
||||||
|
}
|
||||||
if bucketCount <= 0 {
|
if bucketCount <= 0 {
|
||||||
bucketCount = 1
|
bucketCount = 2
|
||||||
}
|
}
|
||||||
duration, err := time.ParseDuration(fmt.Sprintf("%d%s", value*bucketCount, units))
|
duration, err := time.ParseDuration(fmt.Sprintf("%d%s", value*bucketCount, units))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -484,7 +491,7 @@ func (engine *Engine) GetTargetMetricData(rule *alerting.Rule, isFilterNaN bool,
|
||||||
} else {
|
} else {
|
||||||
targetData = alerting.MetricData{
|
targetData = alerting.MetricData{
|
||||||
GroupValues: md.GroupValues,
|
GroupValues: md.GroupValues,
|
||||||
Data: map[string][]alerting.TimeMetricData{},
|
Data: map[string][]alerting.MetricDataItem{},
|
||||||
}
|
}
|
||||||
expression, err := govaluate.NewEvaluableExpression(rule.Metrics.Formula)
|
expression, err := govaluate.NewEvaluableExpression(rule.Metrics.Formula)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -508,14 +515,14 @@ func (engine *Engine) GetTargetMetricData(rule *alerting.Rule, isFilterNaN bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
//drop nil value bucket
|
//drop nil value bucket
|
||||||
if v == nil || len(v[i]) < 2 {
|
if v == nil {
|
||||||
continue DataLoop
|
continue DataLoop
|
||||||
}
|
}
|
||||||
if _, ok := v[i][1].(float64); !ok {
|
if _, ok := v[i].Value.(float64); !ok {
|
||||||
continue DataLoop
|
continue DataLoop
|
||||||
}
|
}
|
||||||
parameters[k] = v[i][1]
|
parameters[k] = v[i].Value
|
||||||
timestamp = v[i][0]
|
timestamp = v[i].Timestamp
|
||||||
}
|
}
|
||||||
if len(parameters) == 0 {
|
if len(parameters) == 0 {
|
||||||
continue
|
continue
|
||||||
|
@ -528,13 +535,13 @@ func (engine *Engine) GetTargetMetricData(rule *alerting.Rule, isFilterNaN bool,
|
||||||
if r, ok := result.(float64); ok {
|
if r, ok := result.(float64); ok {
|
||||||
if math.IsNaN(r) || math.IsInf(r, 0) {
|
if math.IsNaN(r) || math.IsInf(r, 0) {
|
||||||
if !isFilterNaN {
|
if !isFilterNaN {
|
||||||
targetData.Data["result"] = append(targetData.Data["result"], []interface{}{timestamp, math.NaN()})
|
targetData.Data["result"] = append(targetData.Data["result"], alerting.MetricDataItem{Timestamp: timestamp, Value: math.NaN()})
|
||||||
}
|
}
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
targetData.Data["result"] = append(targetData.Data["result"], []interface{}{timestamp, result})
|
targetData.Data["result"] = append(targetData.Data["result"], alerting.MetricDataItem{Timestamp: timestamp, Value: result})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
targetMetricData = append(targetMetricData, targetData)
|
targetMetricData = append(targetMetricData, targetData)
|
||||||
|
@ -554,6 +561,9 @@ func (engine *Engine) CheckCondition(rule *alerting.Rule) (*alerting.ConditionRe
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return conditionResult, err
|
return conditionResult, err
|
||||||
}
|
}
|
||||||
|
if rule.BucketConditions != nil {
|
||||||
|
return engine.CheckBucketCondition(rule, targetMetricData, queryResult)
|
||||||
|
}
|
||||||
for idx, targetData := range targetMetricData {
|
for idx, targetData := range targetMetricData {
|
||||||
if idx == 0 {
|
if idx == 0 {
|
||||||
sort.Slice(rule.Conditions.Items, func(i, j int) bool {
|
sort.Slice(rule.Conditions.Items, func(i, j int) bool {
|
||||||
|
@ -579,16 +589,16 @@ func (engine *Engine) CheckCondition(rule *alerting.Rule) (*alerting.ConditionRe
|
||||||
triggerCount := 0
|
triggerCount := 0
|
||||||
for i := 0; i < dataLength; i++ {
|
for i := 0; i < dataLength; i++ {
|
||||||
//clear nil value
|
//clear nil value
|
||||||
if targetData.Data[dataKey][i][1] == nil {
|
if targetData.Data[dataKey][i].Value == nil {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
if r, ok := targetData.Data[dataKey][i][1].(float64); ok {
|
if r, ok := targetData.Data[dataKey][i].Value.(float64); ok {
|
||||||
if math.IsNaN(r) {
|
if math.IsNaN(r) {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
evaluateResult, err := expression.Evaluate(map[string]interface{}{
|
evaluateResult, err := expression.Evaluate(map[string]interface{}{
|
||||||
"result": targetData.Data[dataKey][i][1],
|
"result": targetData.Data[dataKey][i].Value,
|
||||||
})
|
})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return conditionResult, fmt.Errorf("evaluate rule [%s] error: %w", rule.ID, err)
|
return conditionResult, fmt.Errorf("evaluate rule [%s] error: %w", rule.ID, err)
|
||||||
|
@ -603,12 +613,12 @@ func (engine *Engine) CheckCondition(rule *alerting.Rule) (*alerting.ConditionRe
|
||||||
resultItem := alerting.ConditionResultItem{
|
resultItem := alerting.ConditionResultItem{
|
||||||
GroupValues: targetData.GroupValues,
|
GroupValues: targetData.GroupValues,
|
||||||
ConditionItem: &cond,
|
ConditionItem: &cond,
|
||||||
ResultValue: targetData.Data[dataKey][i][1],
|
ResultValue: targetData.Data[dataKey][i].Value,
|
||||||
IssueTimestamp: targetData.Data[dataKey][i][0],
|
IssueTimestamp: targetData.Data[dataKey][i].Timestamp,
|
||||||
RelationValues: map[string]interface{}{},
|
RelationValues: map[string]interface{}{},
|
||||||
}
|
}
|
||||||
for _, metric := range rule.Metrics.Items {
|
for _, metric := range rule.Metrics.Items {
|
||||||
resultItem.RelationValues[metric.Name] = queryResult.MetricData[idx].Data[metric.Name][i][1]
|
resultItem.RelationValues[metric.Name] = queryResult.MetricData[idx].Data[metric.Name][i].Value
|
||||||
}
|
}
|
||||||
resultItems = append(resultItems, resultItem)
|
resultItems = append(resultItems, resultItem)
|
||||||
break LoopCondition
|
break LoopCondition
|
||||||
|
@ -621,6 +631,155 @@ func (engine *Engine) CheckCondition(rule *alerting.Rule) (*alerting.ConditionRe
|
||||||
conditionResult.ResultItems = resultItems
|
conditionResult.ResultItems = resultItems
|
||||||
return conditionResult, nil
|
return conditionResult, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type BucketDiffState struct {
|
||||||
|
ContentChangeState int
|
||||||
|
DocCount int
|
||||||
|
}
|
||||||
|
|
||||||
|
func (engine *Engine) CheckBucketCondition(rule *alerting.Rule, targetMetricData []alerting.MetricData, queryResult *alerting.QueryResult) (*alerting.ConditionResult, error) {
|
||||||
|
var resultItems []alerting.ConditionResultItem
|
||||||
|
conditionResult := &alerting.ConditionResult{
|
||||||
|
QueryResult: queryResult,
|
||||||
|
}
|
||||||
|
//transform targetMetricData
|
||||||
|
var (
|
||||||
|
times = map[int64]struct{}{}
|
||||||
|
buckets = map[string]map[int64]int{}
|
||||||
|
maxTime int64
|
||||||
|
minTime = time.Now().UnixMilli()
|
||||||
|
)
|
||||||
|
for _, targetData := range targetMetricData {
|
||||||
|
for _, v := range targetData.Data {
|
||||||
|
for _, item := range v {
|
||||||
|
if tv, ok := item.Timestamp.(float64); ok {
|
||||||
|
timestamp := int64(tv)
|
||||||
|
if timestamp < minTime {
|
||||||
|
minTime = timestamp
|
||||||
|
}
|
||||||
|
if timestamp > maxTime {
|
||||||
|
maxTime = timestamp
|
||||||
|
}
|
||||||
|
if _, ok = times[timestamp]; !ok {
|
||||||
|
times[timestamp] = struct{}{}
|
||||||
|
}
|
||||||
|
bucketKey := strings.Join(targetData.GroupValues, "*")
|
||||||
|
if _, ok = buckets[bucketKey]; !ok {
|
||||||
|
buckets[bucketKey] = map[int64]int{}
|
||||||
|
}
|
||||||
|
buckets[bucketKey][timestamp] = item.DocCount
|
||||||
|
} else {
|
||||||
|
log.Warnf("invalid timestamp type: %T", item.Timestamp)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
var timesArr []int64
|
||||||
|
for t := range times {
|
||||||
|
timesArr = append(timesArr, t)
|
||||||
|
}
|
||||||
|
sort.Slice(timesArr, func(i, j int) bool {
|
||||||
|
return timesArr[i] < timesArr[j] // Ascending order
|
||||||
|
})
|
||||||
|
|
||||||
|
// Remove the first bucket if its timestamp equals minTime, and
|
||||||
|
// the last bucket if its timestamp equals maxTime
|
||||||
|
if len(timesArr) > 0 && timesArr[0] == minTime {
|
||||||
|
// Remove first bucket if timestamp matches minTime
|
||||||
|
timesArr = timesArr[1:]
|
||||||
|
}
|
||||||
|
if len(timesArr) > 0 && timesArr[len(timesArr)-1] == maxTime {
|
||||||
|
// Remove last bucket if timestamp matches maxTime
|
||||||
|
timesArr = timesArr[:len(timesArr)-1]
|
||||||
|
}
|
||||||
|
|
||||||
|
//check bucket diff
|
||||||
|
diffResult := map[string]map[int64]BucketDiffState{}
|
||||||
|
for grps, bk := range buckets {
|
||||||
|
hasPre := false
|
||||||
|
if _, ok := diffResult[grps]; !ok {
|
||||||
|
diffResult[grps] = map[int64]BucketDiffState{}
|
||||||
|
}
|
||||||
|
for i, t := range timesArr {
|
||||||
|
if v, ok := bk[t]; !ok {
|
||||||
|
if hasPre {
|
||||||
|
diffResult[grps][t] = BucketDiffState{
|
||||||
|
ContentChangeState: -1,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// reset hasPre to false
|
||||||
|
hasPre = false
|
||||||
|
} else {
|
||||||
|
if !hasPre {
|
||||||
|
if i > 0 {
|
||||||
|
diffResult[grps][t] = BucketDiffState{
|
||||||
|
ContentChangeState: 1,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
diffResult[grps][t] = BucketDiffState{
|
||||||
|
ContentChangeState: 0,
|
||||||
|
DocCount: v - bk[timesArr[i-1]],
|
||||||
|
}
|
||||||
|
}
|
||||||
|
hasPre = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
sort.Slice(rule.BucketConditions.Items, func(i, j int) bool {
|
||||||
|
return alerting.PriorityWeights[rule.BucketConditions.Items[i].Priority] > alerting.PriorityWeights[rule.BucketConditions.Items[j].Priority]
|
||||||
|
})
|
||||||
|
|
||||||
|
for grps, states := range diffResult {
|
||||||
|
LoopCondition:
|
||||||
|
for _, cond := range rule.BucketConditions.Items {
|
||||||
|
conditionExpression, err := cond.GenerateConditionExpression()
|
||||||
|
if err != nil {
|
||||||
|
return conditionResult, err
|
||||||
|
}
|
||||||
|
expression, err := govaluate.NewEvaluableExpression(conditionExpression)
|
||||||
|
if err != nil {
|
||||||
|
return conditionResult, err
|
||||||
|
}
|
||||||
|
triggerCount := 0
|
||||||
|
for t, state := range states {
|
||||||
|
resultValue := state.DocCount
|
||||||
|
if cond.Type == alerting.BucketDiffTypeContent {
|
||||||
|
resultValue = state.ContentChangeState
|
||||||
|
}
|
||||||
|
evaluateResult, err := expression.Evaluate(map[string]interface{}{
|
||||||
|
"result": resultValue,
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
return conditionResult, fmt.Errorf("evaluate rule [%s] error: %w", rule.ID, err)
|
||||||
|
}
|
||||||
|
if evaluateResult == true {
|
||||||
|
triggerCount += 1
|
||||||
|
} else {
|
||||||
|
triggerCount = 0
|
||||||
|
}
|
||||||
|
if triggerCount >= cond.MinimumPeriodMatch {
|
||||||
|
groupValues := strings.Split(grps, "*")
|
||||||
|
log.Debugf("triggered condition %v, groups: %v\n", cond, groupValues)
|
||||||
|
resultItem := alerting.ConditionResultItem{
|
||||||
|
GroupValues: groupValues,
|
||||||
|
ConditionItem: &cond,
|
||||||
|
ResultValue: resultValue,
|
||||||
|
IssueTimestamp: t,
|
||||||
|
RelationValues: map[string]interface{}{},
|
||||||
|
}
|
||||||
|
resultItems = append(resultItems, resultItem)
|
||||||
|
break LoopCondition
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
conditionResult.QueryResult.MetricData = targetMetricData
|
||||||
|
conditionResult.ResultItems = resultItems
|
||||||
|
return conditionResult, nil
|
||||||
|
}
|
||||||
func (engine *Engine) Do(rule *alerting.Rule) error {
|
func (engine *Engine) Do(rule *alerting.Rule) error {
|
||||||
|
|
||||||
var (
|
var (
|
||||||
|
@ -755,15 +914,9 @@ func (engine *Engine) Do(rule *alerting.Rule) error {
|
||||||
})
|
})
|
||||||
|
|
||||||
alertItem.Priority = priority
|
alertItem.Priority = priority
|
||||||
title, message := rule.GetNotificationTitleAndMessage()
|
var newAlertMessage *alerting.AlertMessage
|
||||||
err = attachTitleMessageToCtx(title, message, paramsCtx)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
alertItem.Message = paramsCtx[alerting2.ParamMessage].(string)
|
|
||||||
alertItem.Title = paramsCtx[alerting2.ParamTitle].(string)
|
|
||||||
if alertMessage == nil || alertMessage.Status == alerting.MessageStateRecovered {
|
if alertMessage == nil || alertMessage.Status == alerting.MessageStateRecovered {
|
||||||
msg := &alerting.AlertMessage{
|
newAlertMessage = &alerting.AlertMessage{
|
||||||
RuleID: rule.ID,
|
RuleID: rule.ID,
|
||||||
Created: alertItem.Created,
|
Created: alertItem.Created,
|
||||||
Updated: time.Now(),
|
Updated: time.Now(),
|
||||||
|
@ -772,13 +925,25 @@ func (engine *Engine) Do(rule *alerting.Rule) error {
|
||||||
ResourceName: rule.Resource.Name,
|
ResourceName: rule.Resource.Name,
|
||||||
Status: alerting.MessageStateAlerting,
|
Status: alerting.MessageStateAlerting,
|
||||||
Priority: priority,
|
Priority: priority,
|
||||||
Title: alertItem.Title,
|
|
||||||
Message: alertItem.Message,
|
|
||||||
Tags: rule.Tags,
|
Tags: rule.Tags,
|
||||||
Category: rule.Category,
|
Category: rule.Category,
|
||||||
}
|
}
|
||||||
alertMessage = msg
|
paramsCtx[alerting2.ParamEventID] = newAlertMessage.ID
|
||||||
err = saveAlertMessage(msg)
|
} else {
|
||||||
|
paramsCtx[alerting2.ParamEventID] = alertMessage.ID
|
||||||
|
}
|
||||||
|
title, message := rule.GetNotificationTitleAndMessage()
|
||||||
|
err = attachTitleMessageToCtx(title, message, paramsCtx)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
alertItem.Message = paramsCtx[alerting2.ParamMessage].(string)
|
||||||
|
alertItem.Title = paramsCtx[alerting2.ParamTitle].(string)
|
||||||
|
if newAlertMessage != nil {
|
||||||
|
alertMessage = newAlertMessage
|
||||||
|
alertMessage.Title = alertItem.Title
|
||||||
|
alertMessage.Message = alertItem.Message
|
||||||
|
err = saveAlertMessage(newAlertMessage)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("save alert message error: %w", err)
|
return fmt.Errorf("save alert message error: %w", err)
|
||||||
}
|
}
|
||||||
|
@ -813,10 +978,10 @@ func (engine *Engine) Do(rule *alerting.Rule) error {
|
||||||
log.Debugf("check condition result of rule %s is %v", conditionResults, rule.ID)
|
log.Debugf("check condition result of rule %s is %v", conditionResults, rule.ID)
|
||||||
|
|
||||||
// if alert message status equals ignored , then skip sending message to channel
|
// if alert message status equals ignored , then skip sending message to channel
|
||||||
if alertMessage != nil && alertMessage.Status == alerting.MessageStateIgnored {
|
if alertMessage.Status == alerting.MessageStateIgnored {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
if alertMessage != nil && paramsCtx != nil {
|
if paramsCtx != nil {
|
||||||
paramsCtx[alerting2.ParamEventID] = alertMessage.ID
|
paramsCtx[alerting2.ParamEventID] = alertMessage.ID
|
||||||
}
|
}
|
||||||
// if channel is not enabled return
|
// if channel is not enabled return
|
||||||
|
@ -1135,12 +1300,16 @@ func collectMetricData(agg interface{}, groupValues string, metricData *[]alerti
|
||||||
if timeBks, ok := aggM["time_buckets"].(map[string]interface{}); ok {
|
if timeBks, ok := aggM["time_buckets"].(map[string]interface{}); ok {
|
||||||
if bks, ok := timeBks["buckets"].([]interface{}); ok {
|
if bks, ok := timeBks["buckets"].([]interface{}); ok {
|
||||||
md := alerting.MetricData{
|
md := alerting.MetricData{
|
||||||
Data: map[string][]alerting.TimeMetricData{},
|
Data: map[string][]alerting.MetricDataItem{},
|
||||||
GroupValues: strings.Split(groupValues, "*"),
|
GroupValues: strings.Split(groupValues, "*"),
|
||||||
}
|
}
|
||||||
for _, bk := range bks {
|
for _, bk := range bks {
|
||||||
if bkM, ok := bk.(map[string]interface{}); ok {
|
if bkM, ok := bk.(map[string]interface{}); ok {
|
||||||
|
|
||||||
|
var docCount int
|
||||||
|
if v, ok := bkM["doc_count"]; ok {
|
||||||
|
docCount = int(v.(float64))
|
||||||
|
}
|
||||||
for k, v := range bkM {
|
for k, v := range bkM {
|
||||||
if k == "key" || k == "key_as_string" || k == "doc_count" {
|
if k == "key" || k == "key_as_string" || k == "doc_count" {
|
||||||
continue
|
continue
|
||||||
|
@ -1150,20 +1319,20 @@ func collectMetricData(agg interface{}, groupValues string, metricData *[]alerti
|
||||||
}
|
}
|
||||||
if vm, ok := v.(map[string]interface{}); ok {
|
if vm, ok := v.(map[string]interface{}); ok {
|
||||||
if metricVal, ok := vm["value"]; ok {
|
if metricVal, ok := vm["value"]; ok {
|
||||||
md.Data[k] = append(md.Data[k], alerting.TimeMetricData{bkM["key"], metricVal})
|
md.Data[k] = append(md.Data[k], alerting.MetricDataItem{Timestamp: bkM["key"], Value: metricVal, DocCount: docCount})
|
||||||
} else {
|
} else {
|
||||||
//percentiles agg type
|
//percentiles agg type
|
||||||
switch vm["values"].(type) {
|
switch vm["values"].(type) {
|
||||||
case []interface{}:
|
case []interface{}:
|
||||||
for _, val := range vm["values"].([]interface{}) {
|
for _, val := range vm["values"].([]interface{}) {
|
||||||
if valM, ok := val.(map[string]interface{}); ok {
|
if valM, ok := val.(map[string]interface{}); ok {
|
||||||
md.Data[k] = append(md.Data[k], alerting.TimeMetricData{bkM["key"], valM["value"]})
|
md.Data[k] = append(md.Data[k], alerting.MetricDataItem{Timestamp: bkM["key"], Value: valM["value"], DocCount: docCount})
|
||||||
}
|
}
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
case map[string]interface{}:
|
case map[string]interface{}:
|
||||||
for _, val := range vm["values"].(map[string]interface{}) {
|
for _, val := range vm["values"].(map[string]interface{}) {
|
||||||
md.Data[k] = append(md.Data[k], alerting.TimeMetricData{bkM["key"], val})
|
md.Data[k] = append(md.Data[k], alerting.MetricDataItem{Timestamp: bkM["key"], Value: val, DocCount: docCount})
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue