From b9de7b67a4373879705fc5960cbe73f27afb7350 Mon Sep 17 00:00:00 2001 From: liugq Date: Wed, 18 May 2022 15:05:07 +0800 Subject: [PATCH 01/36] update alert api --- main.go | 1 + model/alerting/alert.go | 32 ++- model/alerting/condition.go | 1 - model/alerting/metric.go | 2 + model/alerting/rule.go | 12 +- model/alerting/rule_test.go | 6 +- plugin/api/alerting/alert.go | 20 +- plugin/api/alerting/api.go | 5 +- plugin/api/alerting/message.go | 222 ++++++++++++++++++ plugin/api/alerting/rule.go | 87 +++++-- service/alerting/constants.go | 4 +- service/alerting/elasticsearch/engine.go | 159 +++++++++---- service/alerting/elasticsearch/engine_test.go | 10 +- service/alerting/funcs/date.go | 18 +- service/alerting/funcs/function.go | 12 +- service/alerting/parameter.go | 3 +- 16 files changed, 488 insertions(+), 106 deletions(-) create mode 100644 plugin/api/alerting/message.go diff --git a/main.go b/main.go index cb158c3d..51d3dd55 100644 --- a/main.go +++ b/main.go @@ -112,6 +112,7 @@ func main() { orm.RegisterSchemaWithIndexName(gateway.Instance{}, "gateway-instance") orm.RegisterSchemaWithIndexName(alerting.Rule{}, "alert-rule") orm.RegisterSchemaWithIndexName(alerting.Alert{}, "alert-history") + orm.RegisterSchemaWithIndexName(alerting.AlertMessage{}, "alert-message") api.RegisterSchema() go func() { diff --git a/model/alerting/alert.go b/model/alerting/alert.go index 6d80afb8..293ffcd1 100644 --- a/model/alerting/alert.go +++ b/model/alerting/alert.go @@ -18,8 +18,9 @@ type Alert struct { Expression string `json:"expression" elastic_mapping:"expression: { type: keyword, copy_to:search_text }"` Objects []string `json:"objects" elastic_mapping:"objects: { type:keyword,copy_to:search_text }"` Severity string `json:"severity" elastic_mapping:"severity: { type: keyword }"` - Content string `json:"content" elastic_mapping:"context: { type: keyword, copy_to:search_text }"` - AcknowledgedTime interface{} `json:"acknowledged_time,omitempty"` + Title string `json:"title" elastic_mapping:"title: { type: keyword }"` + Message string `json:"message" elastic_mapping:"context: { type: keyword, copy_to:search_text }"` + AcknowledgedTime interface{} `json:"acknowledged_time,omitempty"` ActionExecutionResults []ActionExecutionResult `json:"action_execution_results"` Users []string `json:"users,omitempty"` State string `json:"state"` @@ -43,10 +44,28 @@ type ActionExecutionResult struct { const ( AlertStateActive string = "active" AlertStateAcknowledge = "acknowledged" - AlertStateNormal = "normal" - AlertStateError = "error" + AlertStateOK = "normal" + AlertStateError = "error" ) +const ( + MessageStateActive = "active" + MessageStateIgnored = "ignored" + MessageStateRecovered = "recovered" +) + +type AlertMessage struct { + ID string `json:"id,omitempty" elastic_meta:"_id" elastic_mapping:"id: { type: keyword }"` + Created time.Time `json:"created,omitempty" elastic_mapping:"created: { type: date }"` + Updated time.Time `json:"updated,omitempty" elastic_mapping:"updated: { type: date }"` + RuleID string `json:"rule_id" elastic_mapping:"rule_id: { type: keyword,copy_to:search_text }"` + Title string `json:"title" elastic_mapping:"title: { type: keyword,copy_to:search_text }"` + Message string `json:"message" elastic_mapping:"content: { type: keyword,copy_to:search_text }"` + Status string `json:"status" elastic_mapping:"status: { type: keyword,copy_to:search_text }"` + IgnoredTime time.Time `json:"ignored_time,omitempty" elastic_mapping:"ignored_time: { type: date }"` + Severity string `json:"severity" elastic_mapping:"severity: { type: keyword }"` + SearchText string `json:"-" elastic_mapping:"search_text:{type:text,index_prefixes:{},index_phrases:true, analyzer:suggest_text_search }"` +} /* { @@ -54,4 +73,7 @@ const ( ResourceID ResourceName } -*/ \ No newline at end of file +*/ + +//message status (Active, Ignore, Recover) +//rule status (Active, Error, OK) \ No newline at end of file diff --git a/model/alerting/condition.go b/model/alerting/condition.go index 494f790a..097f3c6f 100644 --- a/model/alerting/condition.go +++ b/model/alerting/condition.go @@ -17,7 +17,6 @@ type ConditionItem struct { Operator string `json:"operator"` Values []string `json:"values"` Severity string `json:"severity"` - Message string `json:"message"` } func (cond *ConditionItem) GenerateConditionExpression()(conditionExpression string, err error){ valueLength := len(cond.Values) diff --git a/model/alerting/metric.go b/model/alerting/metric.go index ca40e945..5f8fd0cb 100644 --- a/model/alerting/metric.go +++ b/model/alerting/metric.go @@ -15,6 +15,8 @@ type Metric struct { Items []MetricItem `json:"items"` Formula string `json:"formula,omitempty"` Expression string `json:"expression" elastic_mapping:"expression:{type:keyword,copy_to:search_text}"` //告警表达式,自动生成 eg: avg(cpu) > 80 + Title string `json:"title"` //text template + Message string `json:"message"` // text template } func (m *Metric) GenerateExpression() (string, error){ if len(m.Items) == 1 { diff --git a/model/alerting/rule.go b/model/alerting/rule.go index dafeb501..c6e4c223 100644 --- a/model/alerting/rule.go +++ b/model/alerting/rule.go @@ -52,12 +52,13 @@ func (rule *Rule) GetOrInitExpression() (string, error){ } type RuleChannel struct { - Normal []Channel `json:"normal"` + Enabled bool `json:"enabled"` + Normal []Channel `json:"normal,omitempty"` Escalation []Channel `json:"escalation,omitempty"` - ThrottlePeriod string `json:"throttle_period"` //沉默周期 - AcceptTimeRange TimeRange `json:"accept_time_range"` + ThrottlePeriod string `json:"throttle_period,omitempty"` //沉默周期 + AcceptTimeRange TimeRange `json:"accept_time_range,omitempty"` EscalationThrottlePeriod string `json:"escalation_throttle_period,omitempty"` - EscalationEnabled bool `json:"escalation_enabled"` + EscalationEnabled bool `json:"escalation_enabled,omitempty"` } type MessageTemplate struct{ @@ -71,6 +72,9 @@ type TimeRange struct { } func (tr *TimeRange) Include( t time.Time) bool { + if tr.Start == "" || tr.End == "" { + return true + } currentTimeStr := t.Format("15:04") return tr.Start <= currentTimeStr && currentTimeStr <= tr.End } diff --git a/model/alerting/rule_test.go b/model/alerting/rule_test.go index 21583231..93e1e257 100644 --- a/model/alerting/rule_test.go +++ b/model/alerting/rule_test.go @@ -55,7 +55,7 @@ func TestCreateRule( t *testing.T) { //Conditions: Condition{ // Operator: "any", // Items: []ConditionItem{ - // { MinimumPeriodMatch: 1, Operator: "gte", Values: []string{"1"}, Severity: "error", Message: "集群健康状态为 Red"}, + // { MinimumPeriodMatch: 1, Operator: "gte", Values: []string{"1"}, Severity: "error", AlertMessage: "集群健康状态为 Red"}, // }, //}, @@ -80,7 +80,7 @@ func TestCreateRule( t *testing.T) { Normal: []Channel{ {Name: "钉钉", Type: ChannelWebhook, Webhook: &CustomWebhook{ HeaderParams: map[string]string{ - "Content-Type": "application/json", + "Message-Type": "application/json", }, Body: `{"msgtype": "text","text": {"content":"告警通知: {{ctx.message}}"}}`, Method: http.MethodPost, @@ -90,7 +90,7 @@ func TestCreateRule( t *testing.T) { Escalation: []Channel{ {Type: ChannelWebhook, Name: "微信", Webhook: &CustomWebhook{ HeaderParams: map[string]string{ - "Content-Type": "application/json", + "Message-Type": "application/json", }, Body: `{"msgtype": "text","text": {"content":"告警通知: {{ctx.message}}"}}`, Method: http.MethodPost, diff --git a/plugin/api/alerting/alert.go b/plugin/api/alerting/alert.go index 0fdf70c7..a3138561 100644 --- a/plugin/api/alerting/alert.go +++ b/plugin/api/alerting/alert.go @@ -92,9 +92,16 @@ func (h *AlertAPI) searchAlert(w http.ResponseWriter, req *http.Request, ps http state = h.GetParameterOrDefault(req, "state", "") severity = h.GetParameterOrDefault(req, "severity", "") sort = h.GetParameterOrDefault(req, "sort", "") + ruleID = h.GetParameterOrDefault(req, "rule_id", "") + min = h.GetParameterOrDefault(req, "min", "") + max = h.GetParameterOrDefault(req, "max", "") mustBuilder = &strings.Builder{} sortBuilder = strings.Builder{} ) + mustBuilder.WriteString(fmt.Sprintf(`{"range":{"created":{"gte":"%s", "lte": "%s"}}}`, min, max)) + if ruleID != "" { + mustBuilder.WriteString(fmt.Sprintf(`,{"term":{"rule_id":{"value":"%s"}}}`, ruleID)) + } if sort != "" { sortParts := strings.Split(sort, ",") @@ -103,24 +110,17 @@ func (h *AlertAPI) searchAlert(w http.ResponseWriter, req *http.Request, ps http } } sortBuilder.WriteString(`{"created":{ "order": "desc"}}`) - hasFilter := false + if keyword != "" { mustBuilder.WriteString(fmt.Sprintf(`{"query_string":{"default_field":"*","query": "%s"}}`, keyword)) - hasFilter = true } if state != "" { - if hasFilter { - mustBuilder.WriteString(",") - } + mustBuilder.WriteString(",") mustBuilder.WriteString(fmt.Sprintf(`{"term":{"state":{"value":"%s"}}}`, state)) - hasFilter = true } if severity != "" { - if hasFilter { - mustBuilder.WriteString(",") - } + mustBuilder.WriteString(",") mustBuilder.WriteString(fmt.Sprintf(`{"term":{"severity":{"value":"%s"}}}`, severity)) - hasFilter = true } size, _ := strconv.Atoi(strSize) if size <= 0 { diff --git a/plugin/api/alerting/api.go b/plugin/api/alerting/api.go index 0a29d4b1..c54a0a8a 100644 --- a/plugin/api/alerting/api.go +++ b/plugin/api/alerting/api.go @@ -35,9 +35,12 @@ func (alert *AlertAPI) Init() { api.HandleAPIMethod(api.GET, "/alerting/alert/_search", alert.searchAlert) api.HandleAPIMethod(api.GET, "/alerting/alert/:alert_id", alert.getAlert) - api.HandleAPIMethod(api.POST, "/alerting/alert/_acknowledge", alert.acknowledgeAlert) api.HandleAPIMethod(api.GET, "/alerting/template/parameters", alert.getTemplateParams) + api.HandleAPIMethod(api.POST, "/alerting/message/_search", alert.searchAlertMessage) + api.HandleAPIMethod(api.POST, "/alerting/message/_ignore", alert.ignoreAlertMessage) + api.HandleAPIMethod(api.GET, "/alerting/message/_stats", alert.getAlertMessageStats) + //just for test //api.HandleAPIMethod(api.GET, "/alerting/rule/test", alert.testRule) diff --git a/plugin/api/alerting/message.go b/plugin/api/alerting/message.go new file mode 100644 index 00000000..5644f816 --- /dev/null +++ b/plugin/api/alerting/message.go @@ -0,0 +1,222 @@ +/* Copyright © INFINI Ltd. All rights reserved. + * web: https://infinilabs.com + * mail: hello#infini.ltd */ + +package alerting + +import ( + "fmt" + log "github.com/cihub/seelog" + "infini.sh/console/model/alerting" + httprouter "infini.sh/framework/core/api/router" + "infini.sh/framework/core/elastic" + "infini.sh/framework/core/orm" + "infini.sh/framework/core/util" + "net/http" + "time" +) + +func (h *AlertAPI) ignoreAlertMessage(w http.ResponseWriter, req *http.Request, ps httprouter.Params) { + body := struct { + MessageIDs []string `json:"ids"` + }{} + err := h.DecodeJSON(req, &body) + if err != nil { + h.WriteError(w, err.Error(), http.StatusInternalServerError) + return + } + + if len(body.MessageIDs) == 0 { + h.WriteError(w, "alert ids should not be empty", http.StatusInternalServerError) + return + } + queryDsl := util.MapStr{ + "query": util.MapStr{ + "terms": util.MapStr{ + "_id": body.MessageIDs, + }, + }, + "script": util.MapStr{ + "source": fmt.Sprintf("ctx._source['status'] = '%s';ctx._source['ignored_time']='%s'", alerting.MessageStateIgnored, time.Now().Format(time.RFC3339Nano)), + }, + } + err = orm.UpdateBy(alerting.AlertMessage{}, util.MustToJSONBytes(queryDsl)) + if err != nil { + h.WriteError(w, err.Error(), http.StatusInternalServerError) + log.Error(err) + return + } + + h.WriteJSON(w, util.MapStr{ + "ids": body.MessageIDs, + "result": "updated", + }, 200) +} + +func (h *AlertAPI) getAlertMessageStats(w http.ResponseWriter, req *http.Request, ps httprouter.Params) { + esClient := elastic.GetClient(h.Config.Elasticsearch) + queryDsl := util.MapStr{ + "size": 0, + "query": util.MapStr{ + "bool": util.MapStr{ + "must_not": []util.MapStr{ + { + "terms": util.MapStr{ + "status": []string{ + alerting.MessageStateRecovered, + }, + }, + }, + }, + }, + }, + "aggs": util.MapStr{ + "terms_by_severity": util.MapStr{ + "terms": util.MapStr{ + "field": "severity", + "size": 5, + }, + }, + }, + } + + searchRes, err := esClient.SearchWithRawQueryDSL(orm.GetWildcardIndexName(alerting.AlertMessage{}), util.MustToJSONBytes(queryDsl) ) + if err != nil { + h.WriteJSON(w, util.MapStr{ + "error": err.Error(), + }, http.StatusInternalServerError) + return + } + statusCounts := map[string]interface{}{} + if termsAgg, ok := searchRes.Aggregations["terms_by_severity"]; ok { + for _, bk := range termsAgg.Buckets { + if status, ok := bk["key"].(string); ok { + statusCounts[status] = bk["doc_count"] + } + } + } + for _, status := range []string{"warning", "error", "critical"} { + if _, ok := statusCounts[status]; !ok { + statusCounts[status] = 0 + } + } + h.WriteJSON(w, util.MapStr{ + "alert": util.MapStr{ + "current": statusCounts, + }, + }, http.StatusOK) +} + + +func (h *AlertAPI) searchAlertMessage(w http.ResponseWriter, req *http.Request, ps httprouter.Params) { + resBody:=util.MapStr{} + reqBody := struct{ + Keyword string `json:"keyword"` + Size int `json:"size"` + From int `json:"from"` + Aggregations []elastic.SearchAggParam `json:"aggs"` + Highlight elastic.SearchHighlightParam `json:"highlight"` + Filter elastic.SearchFilterParam `json:"filter"` + Sort []string `json:"sort"` + SearchField string `json:"search_field"` + }{} + err := h.DecodeJSON(req, &reqBody) + if err != nil { + resBody["error"] = err.Error() + h.WriteJSON(w,resBody, http.StatusInternalServerError ) + return + } + if reqBody.Size <= 0 { + reqBody.Size = 20 + } + aggs := elastic.BuildSearchTermAggregations(reqBody.Aggregations) + filter := elastic.BuildSearchTermFilter(reqBody.Filter) + var should []util.MapStr + if reqBody.SearchField != ""{ + should = []util.MapStr{ + { + "prefix": util.MapStr{ + reqBody.SearchField: util.MapStr{ + "value": reqBody.Keyword, + "boost": 20, + }, + }, + }, + { + "match": util.MapStr{ + reqBody.SearchField: util.MapStr{ + "query": reqBody.Keyword, + "fuzziness": "AUTO", + "max_expansions": 10, + "prefix_length": 2, + "fuzzy_transpositions": true, + "boost": 2, + }, + }, + }, + } + }else{ + if reqBody.Keyword != ""{ + should = []util.MapStr{ + { + "match": util.MapStr{ + "search_text": util.MapStr{ + "query": reqBody.Keyword, + "fuzziness": "AUTO", + "max_expansions": 10, + "prefix_length": 2, + "fuzzy_transpositions": true, + "boost": 2, + }, + }, + }, + { + "query_string": util.MapStr{ + "fields": []string{"*"}, + "query": reqBody.Keyword, + "fuzziness": "AUTO", + "fuzzy_prefix_length": 2, + "fuzzy_max_expansions": 10, + "fuzzy_transpositions": true, + "allow_leading_wildcard": false, + }, + }, + } + } + } + boolQuery := util.MapStr{ + "filter": filter, + } + if len(should) > 0 { + boolQuery["should"] = should + boolQuery["minimum_should_match"] = 1 + } + query := util.MapStr{ + "aggs": aggs, + "size": reqBody.Size, + "from": reqBody.From, + "highlight": elastic.BuildSearchHighlight(&reqBody.Highlight), + "query": util.MapStr{ + "bool": boolQuery, + }, + } + if len(reqBody.Sort) > 1 { + query["sort"] = []util.MapStr{ + { + reqBody.Sort[0]: util.MapStr{ + "order": reqBody.Sort[1], + }, + }, + } + } + dsl := util.MustToJSONBytes(query) + response, err := elastic.GetClient(h.Config.Elasticsearch).SearchWithRawQueryDSL(orm.GetIndexName(alerting.AlertMessage{}), dsl) + if err != nil { + resBody["error"] = err.Error() + h.WriteJSON(w,resBody, http.StatusInternalServerError ) + return + } + h.WriteJSONHeader(w) + w.Write(util.MustToJSONBytes(response)) + +} \ No newline at end of file diff --git a/plugin/api/alerting/rule.go b/plugin/api/alerting/rule.go index 1abd055d..0d422f35 100644 --- a/plugin/api/alerting/rule.go +++ b/plugin/api/alerting/rule.go @@ -479,6 +479,39 @@ func (alertAPI *AlertAPI) getTemplateParams(w http.ResponseWriter, req *http.Req }, http.StatusOK) } +func (alertAPI *AlertAPI) getPreviewMetricData(w http.ResponseWriter, req *http.Request, ps httprouter.Params) { + rule := &alerting.Rule{} + err := alertAPI.DecodeJSON(req, rule) + if err != nil { + log.Error(err) + alertAPI.WriteJSON(w, util.MapStr{ + "error": err.Error(), + }, http.StatusInternalServerError) + return + } + var ( + minStr = alertAPI.Get(req, "min", "") + maxStr = alertAPI.Get(req, "max", "") + ) + bucketSize, min, max, err := api.GetMetricRangeAndBucketSize(minStr, maxStr, 60, 15) + filterParam := &alerting.FilterParam{ + Start: min, + End: max, + BucketSize: fmt.Sprintf("%ds", bucketSize), + } + metricItem, err := getRuleMetricData(rule, filterParam) + if err != nil { + log.Error(err) + alertAPI.WriteJSON(w, util.MapStr{ + "error": err.Error(), + }, http.StatusInternalServerError) + return + } + alertAPI.WriteJSON(w, util.MapStr{ + "metric": metricItem, + }, http.StatusOK) +} + func (alertAPI *AlertAPI) getMetricData(w http.ResponseWriter, req *http.Request, ps httprouter.Params) { rule := &alerting.Rule{ ID: ps.ByName("rule_id"), @@ -501,8 +534,7 @@ func (alertAPI *AlertAPI) getMetricData(w http.ResponseWriter, req *http.Request End: max, BucketSize: fmt.Sprintf("%ds", bucketSize), } - eng := alerting2.GetEngine(rule.Resource.Type) - metricData, err := eng.GetTargetMetricData(rule, true, filterParam) + metricItem, err := getRuleMetricData(rule, filterParam) if err != nil { log.Error(err) alertAPI.WriteJSON(w, util.MapStr{ @@ -510,18 +542,29 @@ func (alertAPI *AlertAPI) getMetricData(w http.ResponseWriter, req *http.Request }, http.StatusInternalServerError) return } - //var filteredMetricData []alerting.MetricData - title := rule.Metrics.Formula - if title == "" && len( rule.Conditions.Items) > 0{ - title,_ = rule.Conditions.Items[0].GenerateConditionExpression() + alertAPI.WriteJSON(w, util.MapStr{ + "metric": metricItem, + }, http.StatusOK) +} + +func getRuleMetricData( rule *alerting.Rule, filterParam *alerting.FilterParam) (*common.MetricItem, error) { + eng := alerting2.GetEngine(rule.Resource.Type) + metricData, err := eng.GetTargetMetricData(rule, true, filterParam) + if err != nil { + return nil, err } + //var filteredMetricData []alerting.MetricData + //title := rule.Metrics.Formula + //if title == "" && len( rule.Conditions.Items) > 0{ + // title,_ = rule.Conditions.Items[0].GenerateConditionExpression() + //} var metricItem = common.MetricItem{ Group: rule.ID, - Key: rule.ID, + Key: rule.ID, Axis: []*common.MetricAxis{ - {ID: util.GetUUID(), Group: rule.ID, Title: title, FormatType: "num", Position: "left",ShowGridLines: true, + {ID: util.GetUUID(), Group: rule.ID, Title: "", FormatType: "num", Position: "left", ShowGridLines: true, TickFormat: "0,0.[00]", - Ticks: 5}, + Ticks: 5}, }, } var sampleData []alerting.TimeMetricData @@ -534,18 +577,18 @@ func (alertAPI *AlertAPI) getMetricData(w http.ResponseWriter, req *http.Request sampleData = md.Data["result"] } metricItem.Lines = append(metricItem.Lines, &common.MetricLine{ - Data: md.Data["result"], + Data: md.Data["result"], BucketSize: filterParam.BucketSize, Metric: common.MetricSummary{ - Label: strings.Join(md.GroupValues, "-"), - Group: rule.ID, + Label: strings.Join(md.GroupValues, "-"), + Group: rule.ID, TickFormat: "0,0.[00]", FormatType: "num", }, }) } //add guidelines - for _, cond := range rule.Conditions.Items{ + for _, cond := range rule.Conditions.Items { if len(cond.Values) > 0 { val, err := strconv.ParseFloat(cond.Values[0], 64) if err != nil { @@ -553,9 +596,9 @@ func (alertAPI *AlertAPI) getMetricData(w http.ResponseWriter, req *http.Request continue } if sampleData != nil { - newData := make([]alerting.TimeMetricData,0, len(sampleData)) + newData := make([]alerting.TimeMetricData, 0, len(sampleData)) for _, td := range sampleData { - if len(td) < 2{ + if len(td) < 2 { continue } newData = append(newData, alerting.TimeMetricData{ @@ -563,11 +606,11 @@ func (alertAPI *AlertAPI) getMetricData(w http.ResponseWriter, req *http.Request }) } metricItem.Lines = append(metricItem.Lines, &common.MetricLine{ - Data: newData, + Data: newData, BucketSize: filterParam.BucketSize, Metric: common.MetricSummary{ - Label: "", - Group: rule.ID, + Label: "", + Group: rule.ID, TickFormat: "0,0.[00]", FormatType: "num", }, @@ -575,9 +618,7 @@ func (alertAPI *AlertAPI) getMetricData(w http.ResponseWriter, req *http.Request } } } - alertAPI.WriteJSON(w, util.MapStr{ - "metric": metricItem, - }, http.StatusOK) + return &metricItem, nil } @@ -609,7 +650,7 @@ func (alertAPI *AlertAPI) getMetricData(w http.ResponseWriter, req *http.Request // Conditions: alerting.Condition{ // Operator: "any", // Items: []alerting.ConditionItem{ -// {MinimumPeriodMatch: 5, Operator: "gte", Values: []string{"90"}, Severity: "error", Message: "cpu使用率大于90%"}, +// {MinimumPeriodMatch: 5, Operator: "gte", Values: []string{"90"}, Severity: "error", AlertMessage: "cpu使用率大于90%"}, // }, // }, // @@ -617,7 +658,7 @@ func (alertAPI *AlertAPI) getMetricData(w http.ResponseWriter, req *http.Request // Normal: []alerting.Channel{ // {Name: "钉钉", Type: alerting.ChannelWebhook, Webhook: &alerting.CustomWebhook{ // HeaderParams: map[string]string{ -// "Content-Type": "application/json", +// "Message-Type": "application/json", // }, // Body: `{"msgtype": "text","text": {"content":"告警通知: {{ctx.message}}"}}`, // Method: http.MethodPost, diff --git a/service/alerting/constants.go b/service/alerting/constants.go index dba24266..80860944 100644 --- a/service/alerting/constants.go +++ b/service/alerting/constants.go @@ -8,6 +8,7 @@ const ( KVLastNotificationTime = "alert_last_notification_time" KVLastTermStartTime = "alert_last_term_start_time" KVLastEscalationTime = "alert_last_escalation_time" + KVLastMessageState = "alert_last_message_state" ) @@ -17,7 +18,8 @@ const ( ParamResourceName = "resource_name" // 资源名称 如集群名称 es-v714 ParamEventID = "event_id" // 检查事件 ID ParamResults = "results" // - ParamMessage = "message" //检查消息 自定义 + ParamMessage = "message" //检查消息 自定义(模版渲染) + ParamTitle = "title" ParamPresetValue = "preset_value" //检查预设值 float64 ParamResultValue = "result_value" //检查结果 {group_tags:["cluster-xxx", "node-xxx"], check_values:[]} Severity = "severity" //告警等级 diff --git a/service/alerting/elasticsearch/engine.go b/service/alerting/elasticsearch/engine.go index e514ab4f..ae78db78 100644 --- a/service/alerting/elasticsearch/engine.go +++ b/service/alerting/elasticsearch/engine.go @@ -575,44 +575,93 @@ func (engine *Engine) Do(rule *alerting.Rule) error { Expression: rule.Metrics.Expression, Objects: rule.Resource.Objects, Conditions: rule.Conditions, - State: alerting.AlertStateNormal, + State: alerting.AlertStateOK, } checkResults, err := engine.CheckCondition(rule) alertItem.ConditionResult = checkResults if err != nil { return err } - lastAlertItem := alerting.Alert{} - err = getLastAlert(rule.ID, &lastAlertItem) + alertMessage, err := getLastAlertMessage(rule.ID, 2 * time.Minute) if err != nil { - return err + return fmt.Errorf("get alert message error: %w", err) } conditionResults := checkResults.ResultItems + var paramsCtx map[string]interface{} if len(conditionResults) == 0 { alertItem.Severity = "info" - alertItem.Content = "" - alertItem.State = alerting.AlertStateNormal + alertItem.State = alerting.AlertStateOK + if alertMessage != nil && alertMessage.Status != alerting.MessageStateRecovered { + alertMessage.Status = alerting.MessageStateRecovered + alertMessage.Updated = time.Now() + err = saveAlertMessage(alertMessage) + if err != nil { + return fmt.Errorf("save alert message error: %w", err) + } + } return nil }else{ - if lastAlertItem.State == "" || lastAlertItem.State == alerting.AlertStateNormal { - rule.LastTermStartTime = time.Now() - strTime := rule.LastTermStartTime.UTC().Format(time.RFC3339) - kv.AddValue(alerting2.KVLastTermStartTime, []byte(rule.ID), []byte(strTime)) - } - log.Debugf("check condition result of rule %s is %v", conditionResults, rule.ID ) + paramsCtx = newParameterCtx(rule, checkResults,alertItem.ID, alertItem.Created.Unix()) var ( severity = conditionResults[0].ConditionItem.Severity - content string + tplBytes []byte + message string + title string ) + tplBytes, err = resolveMessage(rule.Metrics.Message, paramsCtx) + if err != nil { + return fmt.Errorf("resolve content template error: %w", err) + } + message = string(tplBytes) + paramsCtx[alerting2.ParamMessage] = message + tplBytes, err = resolveMessage(rule.Metrics.Title, paramsCtx) + if err != nil { + return fmt.Errorf("resolve title template error: %w", err) + } + title = string(tplBytes) + paramsCtx[alerting2.ParamTitle] = title for _, conditionResult := range conditionResults { if alerting.SeverityWeights[severity] < alerting.SeverityWeights[conditionResult.ConditionItem.Severity] { severity = conditionResult.ConditionItem.Severity - content = conditionResult.ConditionItem.Message } } + alertItem.Severity = severity - alertItem.Content = content + alertItem.Message = message + alertItem.Title = title alertItem.State = alerting.AlertStateActive + if alertMessage == nil || alertMessage.Status == alerting.MessageStateRecovered { + msg := &alerting.AlertMessage{ + RuleID: rule.ID, + Created: time.Now(), + Updated: time.Now(), + ID: util.GetUUID(), + Status: alerting.MessageStateActive, + Severity: severity, + Title: title, + Message: message, + } + err = saveAlertMessage(msg) + if err != nil { + return fmt.Errorf("save alert message error: %w", err) + } + }else{ + alertMessage.Title = title + alertMessage.Message = message + err = saveAlertMessage(alertMessage) + if err != nil { + return fmt.Errorf("save alert message error: %w", err) + } + } + log.Debugf("check condition result of rule %s is %v", conditionResults, rule.ID ) + } + // if alert message status equals ignored , then skip sending message to channel + if alertMessage != nil && alertMessage.Status == alerting.MessageStateIgnored { + return nil + } + // if channel is not enabled return + if !rule.Channels.Enabled { + return nil } if rule.Channels.AcceptTimeRange.Include(time.Now()) { @@ -633,9 +682,11 @@ func (engine *Engine) Do(rule *alerting.Rule) error { period := time.Now().Sub(rule.LastNotificationTime.Local()) //log.Error(lastAlertItem.ID, period, periodDuration) - paramsCtx := newParameterCtx(rule, checkResults,alertItem.ID, alertItem.Created.UnixNano()/1e6) + if paramsCtx == nil { + paramsCtx = newParameterCtx(rule, checkResults,alertItem.ID, alertItem.Created.Unix()) + } - if lastAlertItem.ID == "" || period > periodDuration { + if alertMessage == nil || period > periodDuration { actionResults, errCount := performChannels(rule.Channels.Normal, paramsCtx) alertItem.ActionExecutionResults = actionResults //change and save last notification time in local kv store when action error count equals zero @@ -646,25 +697,16 @@ func (engine *Engine) Do(rule *alerting.Rule) error { alertItem.IsNotified = true } } - isAck, err := hasAcknowledgedRule(rule.ID, rule.LastTermStartTime) - if err != nil { - alertItem.Error = err.Error() - return err - } - if rule.Channels.EscalationEnabled && lastAlertItem.ID !="" && !isAck { + + if rule.Channels.EscalationEnabled { throttlePeriod, err := time.ParseDuration(rule.Channels.EscalationThrottlePeriod) if err != nil { return err } - //change and save last term start time in local kv store when action error count equals zero - if rule.LastTermStartTime.IsZero(){ - tm, err := readTimeFromKV(alerting2.KVLastTermStartTime, []byte(rule.ID)) - if err != nil { - return fmt.Errorf("get last term start time from kv error: %w", err) - } - if !tm.IsZero(){ - rule.LastTermStartTime = tm - } + + rule.LastTermStartTime = time.Now() + if alertMessage != nil { + rule.LastTermStartTime = alertMessage.Created } if time.Now().Sub(rule.LastTermStartTime.Local()) > throttlePeriod { if rule.LastEscalationTime.IsZero(){ @@ -698,7 +740,6 @@ func newParameterCtx(rule *alerting.Rule, checkResults *alerting.ConditionResult var conditionParams []util.MapStr for _, resultItem := range checkResults.ResultItems { conditionParams = append(conditionParams, util.MapStr{ - alerting2.ParamMessage: resultItem.ConditionItem.Message, alerting2.ParamPresetValue: resultItem.ConditionItem.Values, alerting2.Severity: resultItem.ConditionItem.Severity, alerting2.ParamGroupValues: resultItem.GroupValues, @@ -724,7 +765,7 @@ func (engine *Engine) Test(rule *alerting.Rule) ([]alerting.ActionExecutionResul return nil, fmt.Errorf("check condition error:%w", err) } var actionResults []alerting.ActionExecutionResult - paramsCtx := newParameterCtx(rule, checkResults, util.GetUUID(), time.Now().UnixNano()/1e6) + paramsCtx := newParameterCtx(rule, checkResults, util.GetUUID(), time.Now().Unix()) if len(rule.Channels.Normal) > 0 { actionResults, _ = performChannels(rule.Channels.Normal, paramsCtx) }else if len(rule.Channels.Escalation) > 0{ @@ -879,7 +920,7 @@ func collectMetricData(agg interface{}, groupValues string, metricData *[]alerti } } -func getLastAlert(ruleID string, alertItem *alerting.Alert) error { +func getLastAlertMessageFromES(ruleID string, message *alerting.AlertMessage) error { queryDsl := util.MapStr{ "size": 1, "sort": []util.MapStr{ @@ -900,15 +941,53 @@ func getLastAlert(ruleID string, alertItem *alerting.Alert) error { q := orm.Query{ RawQuery: util.MustToJSONBytes(queryDsl), } - err, searchResult := orm.Search(alertItem, &q ) + err, searchResult := orm.Search(alerting.AlertMessage{}, &q ) + if err != nil { + return err + } + if len(searchResult.Result) == 0 { + return nil + } + messageBytes := util.MustToJSONBytes(searchResult.Result[0]) + return util.FromJSONBytes(messageBytes, message) +} + +func getLastAlertMessage(ruleID string, duration time.Duration) (*alerting.AlertMessage, error ){ + messageBytes, err := kv.GetValue(alerting2.KVLastMessageState, []byte(ruleID)) + if err != nil { + return nil, err + } + if messageBytes == nil { + return nil, nil + } + message := &alerting.AlertMessage{} + err = util.FromJSONBytes(messageBytes, message) + if err != nil { + return nil, err + } + if time.Now().Sub(message.Updated) > duration { + err = getLastAlertMessageFromES(ruleID, message) + return message, err + } + return message, nil +} + +func saveAlertMessageToES(message *alerting.AlertMessage) error { + return orm.Save(message) +} + +func saveAlertMessage(message *alerting.AlertMessage) error { + err := saveAlertMessageToES(message) if err != nil { return err } - if len(searchResult.Result) == 0 { - return nil + + messageBytes, err := util.ToJSONBytes(message) + if err != nil { + return err } - alertBytes := util.MustToJSONBytes(searchResult.Result[0]) - return util.FromJSONBytes(alertBytes, alertItem) + err = kv.AddValue(alerting2.KVLastMessageState, []byte(message.RuleID), messageBytes) + return err } func hasAcknowledgedRule(ruleID string, startTime time.Time) (bool, error){ diff --git a/service/alerting/elasticsearch/engine_test.go b/service/alerting/elasticsearch/engine_test.go index 492a4324..82d68215 100644 --- a/service/alerting/elasticsearch/engine_test.go +++ b/service/alerting/elasticsearch/engine_test.go @@ -75,7 +75,7 @@ func TestEngine( t *testing.T) { Normal: []alerting.Channel{ {Name: "钉钉", Type: alerting.ChannelWebhook, Webhook: &alerting.CustomWebhook{ HeaderParams: map[string]string{ - "Content-Type": "application/json", + "Message-Type": "application/json", }, Body: `{"msgtype": "text","text": {"content":"告警通知: {{ctx.message}}"}}`, Method: http.MethodPost, @@ -85,7 +85,7 @@ func TestEngine( t *testing.T) { Escalation: []alerting.Channel{ {Type: alerting.ChannelWebhook, Name: "微信", Webhook: &alerting.CustomWebhook{ HeaderParams: map[string]string{ - "Content-Type": "application/json", + "Message-Type": "application/json", }, Body: `{"msgtype": "text","text": {"content":"告警通知: {{ctx.message}}"}}`, Method: http.MethodPost, @@ -153,7 +153,7 @@ func TestGeneratePercentilesAggQuery(t *testing.T) { // Conditions: alerting.Condition{ // Operator: "any", // Items: []alerting.ConditionItem{ - // {MinimumPeriodMatch: 5, Operator: "gte", Values: []string{"90"}, Severity: "error", Message: "cpu使用率大于90%"}, + // {MinimumPeriodMatch: 5, Operator: "gte", Values: []string{"90"}, Severity: "error", AlertMessage: "cpu使用率大于90%"}, // }, // }, // @@ -161,7 +161,7 @@ func TestGeneratePercentilesAggQuery(t *testing.T) { // Normal: []alerting.Channel{ // {Name: "钉钉", Type: alerting.ChannelWebhook, Webhook: &alerting.CustomWebhook{ // HeaderParams: map[string]string{ - // "Content-Type": "application/json", + // "Message-Type": "application/json", // }, // Body: `{"msgtype": "text","text": {"content":"告警通知: {{ctx.message}}"}}`, // Method: http.MethodPost, @@ -222,7 +222,7 @@ func TestGeneratePercentilesAggQuery(t *testing.T) { Normal: []alerting.Channel{ {Name: "钉钉", Type: alerting.ChannelWebhook, Webhook: &alerting.CustomWebhook{ HeaderParams: map[string]string{ - "Content-Type": "application/json", + "Message-Type": "application/json", }, Body: `{"msgtype": "text","text": {"content":"告警通知: {{ctx.message}}"}}`, Method: http.MethodPost, diff --git a/service/alerting/funcs/date.go b/service/alerting/funcs/date.go index 468931ba..8ace3f05 100644 --- a/service/alerting/funcs/date.go +++ b/service/alerting/funcs/date.go @@ -9,19 +9,22 @@ import ( "time" ) -func date(fmt string, date interface{}) string { - return dateInZone(fmt, date, "Local") +func datetimeInZone(zone string, date interface{}) string{ + return _dateInZone("2006-01-02 15:04:05", date, zone) +} +func datetime(date interface{}) string{ + return _dateInZone("2006-01-02 15:04:05", date, "Local") } -func htmlDate(date interface{}) string { - return dateInZone("2006-01-02", date, "Local") +func date(date interface{}) string { + return _dateInZone("2006-01-02", date, "Local") } -func htmlDateInZone(date interface{}, zone string) string { - return dateInZone("2006-01-02", date, zone) +func dateInZone(zone string, date interface{}) string { + return _dateInZone("2006-01-02", date, zone) } -func dateInZone(fmt string, date interface{}, zone string) string { +func _dateInZone(fmt string, date interface{}, zone string) string { var t time.Time switch date := date.(type) { default: @@ -34,6 +37,7 @@ func dateInZone(fmt string, date interface{}, zone string) string { t = time.Unix(date, 0) case int: t = time.Unix(int64(date), 0) + case int32: t = time.Unix(int64(date), 0) case string: diff --git a/service/alerting/funcs/function.go b/service/alerting/funcs/function.go index 7c0a15ad..f6fcaba6 100644 --- a/service/alerting/funcs/function.go +++ b/service/alerting/funcs/function.go @@ -18,11 +18,13 @@ func GenericFuncMap() template.FuncMap { } var genericMap = map[string]interface{}{ - "hello": func() string { return "Hello!" }, + "hello": func() string { return "Hello!" }, "format_bytes": formatBytes, - "to_fixed": toFixed, - "date": date, + "to_fixed": toFixed, + "date": date, "date_in_zone": dateInZone, - "to_upper": strings.ToUpper, - "to_lower": strings.ToLower, + "datetime": datetime, + "datetime_in_zone": datetimeInZone, + "to_upper": strings.ToUpper, + "to_lower": strings.ToLower, } diff --git a/service/alerting/parameter.go b/service/alerting/parameter.go index 2fe8128b..48c5d226 100644 --- a/service/alerting/parameter.go +++ b/service/alerting/parameter.go @@ -18,8 +18,9 @@ func GetTemplateParameters() []ParameterMeta { {ParamResourceID, "string", "resource uuid", "c9f663tath2e5a0vksjg", nil}, {ParamResourceName, "string", "resource name", "es-v716", nil}, {ParamEventID, "string", "identifier for check details", "c9f663tath2e5a0vksjx", nil}, + {ParamTitle, "string", "", "xxx cpu used 95%", nil}, + {ParamMessage, "string", "", "disk used 90%", nil}, {ParamResults, "array", "", "", []ParameterMeta{ - {ParamMessage, "string", "", "disk used 90%", nil}, {ParamPresetValue, "array", "", "[\"90\"]", nil}, {Severity, "string", "", "error", nil}, {ParamGroupValues, "array", "", "[\"cluster-xxx\", \"node-xxx\"]", nil}, From 7fa5489a7fd3711d08834f64931256166a292a00 Mon Sep 17 00:00:00 2001 From: liugq Date: Wed, 18 May 2022 15:38:24 +0800 Subject: [PATCH 02/36] update alert message search api --- plugin/api/alerting/api.go | 2 +- plugin/api/alerting/message.go | 165 +++++++++++---------------------- 2 files changed, 57 insertions(+), 110 deletions(-) diff --git a/plugin/api/alerting/api.go b/plugin/api/alerting/api.go index c54a0a8a..41bdb9f5 100644 --- a/plugin/api/alerting/api.go +++ b/plugin/api/alerting/api.go @@ -37,7 +37,7 @@ func (alert *AlertAPI) Init() { api.HandleAPIMethod(api.GET, "/alerting/alert/:alert_id", alert.getAlert) api.HandleAPIMethod(api.GET, "/alerting/template/parameters", alert.getTemplateParams) - api.HandleAPIMethod(api.POST, "/alerting/message/_search", alert.searchAlertMessage) + api.HandleAPIMethod(api.GET, "/alerting/message/_search", alert.searchAlertMessage) api.HandleAPIMethod(api.POST, "/alerting/message/_ignore", alert.ignoreAlertMessage) api.HandleAPIMethod(api.GET, "/alerting/message/_stats", alert.getAlertMessageStats) diff --git a/plugin/api/alerting/message.go b/plugin/api/alerting/message.go index 5644f816..50537e55 100644 --- a/plugin/api/alerting/message.go +++ b/plugin/api/alerting/message.go @@ -13,6 +13,8 @@ import ( "infini.sh/framework/core/orm" "infini.sh/framework/core/util" "net/http" + "strconv" + "strings" "time" ) @@ -109,114 +111,59 @@ func (h *AlertAPI) getAlertMessageStats(w http.ResponseWriter, req *http.Request func (h *AlertAPI) searchAlertMessage(w http.ResponseWriter, req *http.Request, ps httprouter.Params) { - resBody:=util.MapStr{} - reqBody := struct{ - Keyword string `json:"keyword"` - Size int `json:"size"` - From int `json:"from"` - Aggregations []elastic.SearchAggParam `json:"aggs"` - Highlight elastic.SearchHighlightParam `json:"highlight"` - Filter elastic.SearchFilterParam `json:"filter"` - Sort []string `json:"sort"` - SearchField string `json:"search_field"` - }{} - err := h.DecodeJSON(req, &reqBody) - if err != nil { - resBody["error"] = err.Error() - h.WriteJSON(w,resBody, http.StatusInternalServerError ) - return - } - if reqBody.Size <= 0 { - reqBody.Size = 20 - } - aggs := elastic.BuildSearchTermAggregations(reqBody.Aggregations) - filter := elastic.BuildSearchTermFilter(reqBody.Filter) - var should []util.MapStr - if reqBody.SearchField != ""{ - should = []util.MapStr{ - { - "prefix": util.MapStr{ - reqBody.SearchField: util.MapStr{ - "value": reqBody.Keyword, - "boost": 20, - }, - }, - }, - { - "match": util.MapStr{ - reqBody.SearchField: util.MapStr{ - "query": reqBody.Keyword, - "fuzziness": "AUTO", - "max_expansions": 10, - "prefix_length": 2, - "fuzzy_transpositions": true, - "boost": 2, - }, - }, - }, - } - }else{ - if reqBody.Keyword != ""{ - should = []util.MapStr{ - { - "match": util.MapStr{ - "search_text": util.MapStr{ - "query": reqBody.Keyword, - "fuzziness": "AUTO", - "max_expansions": 10, - "prefix_length": 2, - "fuzzy_transpositions": true, - "boost": 2, - }, - }, - }, - { - "query_string": util.MapStr{ - "fields": []string{"*"}, - "query": reqBody.Keyword, - "fuzziness": "AUTO", - "fuzzy_prefix_length": 2, - "fuzzy_max_expansions": 10, - "fuzzy_transpositions": true, - "allow_leading_wildcard": false, - }, - }, - } - } - } - boolQuery := util.MapStr{ - "filter": filter, - } - if len(should) > 0 { - boolQuery["should"] = should - boolQuery["minimum_should_match"] = 1 - } - query := util.MapStr{ - "aggs": aggs, - "size": reqBody.Size, - "from": reqBody.From, - "highlight": elastic.BuildSearchHighlight(&reqBody.Highlight), - "query": util.MapStr{ - "bool": boolQuery, - }, - } - if len(reqBody.Sort) > 1 { - query["sort"] = []util.MapStr{ - { - reqBody.Sort[0]: util.MapStr{ - "order": reqBody.Sort[1], - }, - }, - } - } - dsl := util.MustToJSONBytes(query) - response, err := elastic.GetClient(h.Config.Elasticsearch).SearchWithRawQueryDSL(orm.GetIndexName(alerting.AlertMessage{}), dsl) - if err != nil { - resBody["error"] = err.Error() - h.WriteJSON(w,resBody, http.StatusInternalServerError ) - return - } - h.WriteJSONHeader(w) - w.Write(util.MustToJSONBytes(response)) + var ( + queryDSL = `{"sort":[%s],"query":{"bool":{"must":[%s]}}, "size": %d, "from": %d}` + strSize = h.GetParameterOrDefault(req, "size", "20") + strFrom = h.GetParameterOrDefault(req, "from", "0") + status = h.GetParameterOrDefault(req, "status", "") + severity = h.GetParameterOrDefault(req, "severity", "") + sort = h.GetParameterOrDefault(req, "sort", "") + ruleID = h.GetParameterOrDefault(req, "rule_id", "") + min = h.GetParameterOrDefault(req, "min", "") + max = h.GetParameterOrDefault(req, "max", "") + mustBuilder = &strings.Builder{} + sortBuilder = strings.Builder{} + ) + mustBuilder.WriteString(fmt.Sprintf(`{"range":{"created":{"gte":"%s", "lte": "%s"}}}`, min, max)) + if ruleID != "" { + mustBuilder.WriteString(fmt.Sprintf(`,{"term":{"rule_id":{"value":"%s"}}}`, ruleID)) + } + + if sort != "" { + sortParts := strings.Split(sort, ",") + if len(sortParts) == 2 && sortParts[1] != "created" { + sortBuilder.WriteString(fmt.Sprintf(`{"%s":{ "order": "%s"}},`, sortParts[0], sortParts[1])) + } + } + sortBuilder.WriteString(`{"created":{ "order": "desc"}}`) + + if status != "" { + mustBuilder.WriteString(",") + mustBuilder.WriteString(fmt.Sprintf(`{"term":{"status":{"value":"%s"}}}`, status)) + } + if severity != "" { + mustBuilder.WriteString(",") + mustBuilder.WriteString(fmt.Sprintf(`{"term":{"severity":{"value":"%s"}}}`, severity)) + } + size, _ := strconv.Atoi(strSize) + if size <= 0 { + size = 20 + } + from, _ := strconv.Atoi(strFrom) + if from < 0 { + from = 0 + } + + q := orm.Query{} + queryDSL = fmt.Sprintf(queryDSL, sortBuilder.String(), mustBuilder.String(), size, from) + q.RawQuery = []byte(queryDSL) + + err, res := orm.Search(&alerting.AlertMessage{}, &q) + if err != nil { + log.Error(err) + h.WriteError(w, err.Error(), http.StatusInternalServerError) + return + } + h.Write(w, res.Raw) } \ No newline at end of file From a7d694cd1290a9f512e8785797fa070b4b17dea9 Mon Sep 17 00:00:00 2001 From: liugq Date: Wed, 18 May 2022 18:28:06 +0800 Subject: [PATCH 03/36] update alert api --- model/alerting/condition.go | 1 + plugin/api/alerting/api.go | 1 + plugin/api/alerting/message.go | 48 ++++++++++++++++++++++++ plugin/api/alerting/rule.go | 16 ++++++++ service/alerting/elasticsearch/engine.go | 45 +--------------------- 5 files changed, 68 insertions(+), 43 deletions(-) diff --git a/model/alerting/condition.go b/model/alerting/condition.go index 097f3c6f..f242ac15 100644 --- a/model/alerting/condition.go +++ b/model/alerting/condition.go @@ -18,6 +18,7 @@ type ConditionItem struct { Values []string `json:"values"` Severity string `json:"severity"` } + func (cond *ConditionItem) GenerateConditionExpression()(conditionExpression string, err error){ valueLength := len(cond.Values) if valueLength == 0 { diff --git a/plugin/api/alerting/api.go b/plugin/api/alerting/api.go index 41bdb9f5..d2f2a0df 100644 --- a/plugin/api/alerting/api.go +++ b/plugin/api/alerting/api.go @@ -40,6 +40,7 @@ func (alert *AlertAPI) Init() { api.HandleAPIMethod(api.GET, "/alerting/message/_search", alert.searchAlertMessage) api.HandleAPIMethod(api.POST, "/alerting/message/_ignore", alert.ignoreAlertMessage) api.HandleAPIMethod(api.GET, "/alerting/message/_stats", alert.getAlertMessageStats) + api.HandleAPIMethod(api.GET, "/alerting/message/:message_id", alert.getAlertMessage) //just for test diff --git a/plugin/api/alerting/message.go b/plugin/api/alerting/message.go index 50537e55..a09a3c78 100644 --- a/plugin/api/alerting/message.go +++ b/plugin/api/alerting/message.go @@ -166,4 +166,52 @@ func (h *AlertAPI) searchAlertMessage(w http.ResponseWriter, req *http.Request, return } h.Write(w, res.Raw) +} + +func (h *AlertAPI) getAlertMessage(w http.ResponseWriter, req *http.Request, ps httprouter.Params) { + message := &alerting.AlertMessage{ + ID: ps.ByName("message_id"), + } + exists, err := orm.Get(message) + if !exists || err != nil { + log.Error(err) + h.WriteJSON(w, util.MapStr{ + "_id": message.ID, + "found": false, + }, http.StatusNotFound) + return + } + rule := &alerting.Rule{ + ID: message.RuleID, + } + exists, err = orm.Get(rule) + if !exists || err != nil { + log.Error(err) + h.WriteError(w, fmt.Sprintf("rule[%s] not found", rule.ID), http.StatusInternalServerError) + return + } + conditionExpressions := make([]string, 0, len(rule.Conditions.Items)) + metricExpression, _ := rule.Metrics.GenerateExpression() + for _, cond := range rule.Conditions.Items { + expression, _ := cond.GenerateConditionExpression() + conditionExpressions = append(conditionExpressions, strings.ReplaceAll(expression, "result", metricExpression)) + } + var duration time.Duration + if message.Status == alerting.MessageStateRecovered { + duration = message.Updated.Sub(message.Created) + }else{ + duration = time.Now().Sub(message.Created) + } + detailObj := util.MapStr{ + "title": message.Title, + "message": message.Message, + "severity": message.Severity, + "created": message.Created, + "updated": message.Updated, + "resource_name": rule.Resource.Name, + "resource_object": rule.Resource.Objects, + "condition_expressions": conditionExpressions, + "duration": duration.Milliseconds(), + } + h.WriteJSON(w, detailObj, http.StatusOK) } \ No newline at end of file diff --git a/plugin/api/alerting/rule.go b/plugin/api/alerting/rule.go index 0d422f35..98da1348 100644 --- a/plugin/api/alerting/rule.go +++ b/plugin/api/alerting/rule.go @@ -221,6 +221,22 @@ func (alertAPI *AlertAPI) deleteRule(w http.ResponseWriter, req *http.Request, p task.DeleteTask(obj.ID) clearKV(obj.ID) + delDsl := util.MapStr{ + "query": util.MapStr{ + "term": util.MapStr{ + "rule_id": id, + }, + }, + } + err = orm.DeleteBy(alerting.AlertMessage{}, util.MustToJSONBytes(delDsl)) + if err != nil { + log.Error(err) + } + err = orm.DeleteBy(alerting.Alert{}, util.MustToJSONBytes(delDsl)) + if err != nil { + log.Error(err) + } + alertAPI.WriteJSON(w, util.MapStr{ "_id": obj.ID, "result": "deleted", diff --git a/service/alerting/elasticsearch/engine.go b/service/alerting/elasticsearch/engine.go index ae78db78..c39e8d4b 100644 --- a/service/alerting/elasticsearch/engine.go +++ b/service/alerting/elasticsearch/engine.go @@ -973,10 +973,12 @@ func getLastAlertMessage(ruleID string, duration time.Duration) (*alerting.Alert } func saveAlertMessageToES(message *alerting.AlertMessage) error { + message.Updated = time.Now() return orm.Save(message) } func saveAlertMessage(message *alerting.AlertMessage) error { + //todo diff message if not change , then skip save to es ? err := saveAlertMessageToES(message) if err != nil { return err @@ -990,49 +992,6 @@ func saveAlertMessage(message *alerting.AlertMessage) error { return err } -func hasAcknowledgedRule(ruleID string, startTime time.Time) (bool, error){ - queryDsl := util.MapStr{ - "size": 1, - "query": util.MapStr{ - "bool": util.MapStr{ - "must":[]util.MapStr{ - { - "term": util.MapStr{ - "rule_id": util.MapStr{ - "value": ruleID, - }, - }, - }, - { - "term": util.MapStr{ - "state": alerting.AlertStateAcknowledge, - }, - }, - { - "range": util.MapStr{ - "created": util.MapStr{ - "gte": startTime, - }, - }, - }, - }, - - }, - }, - } - q := orm.Query{ - WildcardIndex: true, - RawQuery: util.MustToJSONBytes(queryDsl), - } - err, searchResult := orm.Search(alerting.Alert{}, &q ) - if err != nil { - return false, err - } - if len(searchResult.Result) == 0 { - return false, nil - } - return true, nil -} func readTimeFromKV(bucketKey string, key []byte)(time.Time, error){ timeBytes, err := kv.GetValue(bucketKey, key) From b2735be2e3e372a0d005a2fbafd5bbb85572aaf7 Mon Sep 17 00:00:00 2001 From: liugq Date: Wed, 18 May 2022 19:17:04 +0800 Subject: [PATCH 04/36] attach duration to alert message in search api --- plugin/api/alerting/message.go | 34 +++++++++++++++++++++++++++++++--- 1 file changed, 31 insertions(+), 3 deletions(-) diff --git a/plugin/api/alerting/message.go b/plugin/api/alerting/message.go index a09a3c78..a06feb74 100644 --- a/plugin/api/alerting/message.go +++ b/plugin/api/alerting/message.go @@ -120,8 +120,8 @@ func (h *AlertAPI) searchAlertMessage(w http.ResponseWriter, req *http.Request, severity = h.GetParameterOrDefault(req, "severity", "") sort = h.GetParameterOrDefault(req, "sort", "") ruleID = h.GetParameterOrDefault(req, "rule_id", "") - min = h.GetParameterOrDefault(req, "min", "") - max = h.GetParameterOrDefault(req, "max", "") + min = h.GetParameterOrDefault(req, "min", "now-1d") + max = h.GetParameterOrDefault(req, "max", "now") mustBuilder = &strings.Builder{} sortBuilder = strings.Builder{} ) @@ -165,7 +165,35 @@ func (h *AlertAPI) searchAlertMessage(w http.ResponseWriter, req *http.Request, h.WriteError(w, err.Error(), http.StatusInternalServerError) return } - h.Write(w, res.Raw) + esRes := elastic.SearchResponse{} + err = util.FromJSONBytes(res.Raw, &esRes) + if err != nil { + log.Error(err) + h.WriteError(w, err.Error(), http.StatusInternalServerError) + return + } + for _, hit := range esRes.Hits.Hits { + created, _ := parseTime(hit.Source["created"], time.RFC3339) + updated, _ := parseTime(hit.Source["updated"], time.RFC3339) + if !created.IsZero() && !updated.IsZero() { + endTime := time.Now() + if hit.Source["status"] == alerting.MessageStateRecovered { + endTime = updated + } + hit.Source["duration"] = endTime.Sub(created).Milliseconds() + } + + } + h.WriteJSON(w, esRes, http.StatusOK) +} + +func parseTime( t interface{}, layout string) (time.Time, error){ + switch t.(type) { + case string: + return time.Parse(layout, t.(string)) + default: + return time.Time{}, fmt.Errorf("unsupport time type") + } } func (h *AlertAPI) getAlertMessage(w http.ResponseWriter, req *http.Request, ps httprouter.Params) { From 3aaedf5743775330531ba5f0a7327fe26a66566a Mon Sep 17 00:00:00 2001 From: liugq Date: Wed, 18 May 2022 21:08:53 +0800 Subject: [PATCH 05/36] update alert api --- model/alerting/alert.go | 9 +-- plugin/api/alerting/alert.go | 38 ---------- plugin/api/alerting/api.go | 1 + plugin/api/alerting/message.go | 1 + plugin/api/alerting/rule.go | 97 ++++++++++++++++++++---- service/alerting/elasticsearch/engine.go | 4 +- 6 files changed, 90 insertions(+), 60 deletions(-) diff --git a/model/alerting/alert.go b/model/alerting/alert.go index 293ffcd1..8ff4b6d7 100644 --- a/model/alerting/alert.go +++ b/model/alerting/alert.go @@ -42,15 +42,14 @@ type ActionExecutionResult struct { } const ( - AlertStateActive string = "active" - AlertStateAcknowledge = "acknowledged" - AlertStateOK = "normal" + AlertStateAlerting string = "alerting" + AlertStateOK = "ok" AlertStateError = "error" ) const ( - MessageStateActive = "active" - MessageStateIgnored = "ignored" + MessageStateAlerting = "alerting" + MessageStateIgnored = "ignored" MessageStateRecovered = "recovered" ) diff --git a/plugin/api/alerting/alert.go b/plugin/api/alerting/alert.go index a3138561..6e3117fc 100644 --- a/plugin/api/alerting/alert.go +++ b/plugin/api/alerting/alert.go @@ -44,44 +44,6 @@ func (h *AlertAPI) getAlert(w http.ResponseWriter, req *http.Request, ps httprou }, 200) } -func (h *AlertAPI) acknowledgeAlert(w http.ResponseWriter, req *http.Request, ps httprouter.Params) { - body := struct { - AlertIDs []string `json:"ids"` - }{} - err := h.DecodeJSON(req, &body) - if err != nil { - h.WriteError(w, err.Error(), http.StatusInternalServerError) - return - } - - if len(body.AlertIDs) == 0 { - h.WriteError(w, "alert ids should not be empty", http.StatusInternalServerError) - return - } - queryDsl := util.MapStr{ - "query": util.MapStr{ - "terms": util.MapStr{ - "_id": body.AlertIDs, - }, - }, - "script": util.MapStr{ - "source": fmt.Sprintf("ctx._source['state'] = '%s'", alerting.AlertStateAcknowledge), - }, - } - err = orm.UpdateBy(alerting.Alert{}, util.MustToJSONBytes(queryDsl)) - if err != nil { - h.WriteError(w, err.Error(), http.StatusInternalServerError) - log.Error(err) - return - } - - h.WriteJSON(w, util.MapStr{ - "ids": body.AlertIDs, - "result": "updated", - }, 200) -} - - func (h *AlertAPI) searchAlert(w http.ResponseWriter, req *http.Request, ps httprouter.Params) { var ( diff --git a/plugin/api/alerting/api.go b/plugin/api/alerting/api.go index d2f2a0df..005d13fb 100644 --- a/plugin/api/alerting/api.go +++ b/plugin/api/alerting/api.go @@ -26,6 +26,7 @@ func (alert *AlertAPI) Init() { api.HandleAPIMethod(api.POST, "/alerting/rule/info", alert.fetchAlertInfos) api.HandleAPIMethod(api.POST, "/alerting/rule/:rule_id/_enable", alert.enableRule) api.HandleAPIMethod(api.GET, "/alerting/rule/:rule_id/metric", alert.getMetricData) + api.HandleAPIMethod(api.GET, "/alerting/rule/:rule_id/info", alert.getRuleDetail) api.HandleAPIMethod(api.GET, "/alerting/channel/:channel_id", alert.getChannel) api.HandleAPIMethod(api.POST, "/alerting/channel", alert.createChannel) diff --git a/plugin/api/alerting/message.go b/plugin/api/alerting/message.go index a06feb74..da592566 100644 --- a/plugin/api/alerting/message.go +++ b/plugin/api/alerting/message.go @@ -240,6 +240,7 @@ func (h *AlertAPI) getAlertMessage(w http.ResponseWriter, req *http.Request, ps "resource_object": rule.Resource.Objects, "condition_expressions": conditionExpressions, "duration": duration.Milliseconds(), + "status": message.Status, } h.WriteJSON(w, detailObj, http.StatusOK) } \ No newline at end of file diff --git a/plugin/api/alerting/rule.go b/plugin/api/alerting/rule.go index 98da1348..18498c9b 100644 --- a/plugin/api/alerting/rule.go +++ b/plugin/api/alerting/rule.go @@ -90,7 +90,6 @@ func (alertAPI *AlertAPI) createRule(w http.ResponseWriter, req *http.Request, p } func (alertAPI *AlertAPI) getRule(w http.ResponseWriter, req *http.Request, ps httprouter.Params) { id := ps.MustGetParameter("rule_id") - obj := alerting.Rule{} obj.ID = id @@ -103,12 +102,6 @@ func (alertAPI *AlertAPI) getRule(w http.ResponseWriter, req *http.Request, ps h }, http.StatusNotFound) return } - if err != nil { - log.Error(err) - alertAPI.WriteError(w, err.Error(), http.StatusInternalServerError) - log.Error(err) - return - } alertAPI.WriteJSON(w, util.MapStr{ "found": true, @@ -118,6 +111,85 @@ func (alertAPI *AlertAPI) getRule(w http.ResponseWriter, req *http.Request, ps h } +func (alertAPI *AlertAPI) getRuleDetail(w http.ResponseWriter, req *http.Request, ps httprouter.Params) { + id := ps.MustGetParameter("rule_id") + obj := alerting.Rule{} + obj.ID = id + + exists, err := orm.Get(&obj) + if !exists || err != nil { + log.Error(err) + alertAPI.WriteJSON(w, util.MapStr{ + "_id": id, + "found": false, + }, http.StatusNotFound) + return + } + conditionExpressions := make([]string, 0, len(obj.Conditions.Items)) + metricExpression, _ := obj.Metrics.GenerateExpression() + for _, cond := range obj.Conditions.Items { + expression, _ := cond.GenerateConditionExpression() + conditionExpressions = append(conditionExpressions, strings.ReplaceAll(expression, "result", metricExpression)) + } + alertNumbers, err := alertAPI.getRuleAlertMessageNumbers([]string{obj.ID}) + if err != nil { + log.Error(err) + alertAPI.WriteJSON(w, util.MapStr{ + "error": err.Error(), + }, http.StatusInternalServerError) + return + } + queryDSL := util.MapStr{ + "_source": "state", + "size": 1, + "sort": []util.MapStr{ + { + "created": util.MapStr{ + "order": "desc", + }, + }, + }, + "query": util.MapStr{ + "term": util.MapStr{ + "rule_id": util.MapStr{ + "value": obj.ID, + }, + }, + }, + } + q := &orm.Query{ + WildcardIndex: true, + RawQuery: util.MustToJSONBytes(queryDSL), + } + err, result := orm.Search(alerting.Alert{}, q) + if err != nil { + log.Error(err) + alertAPI.WriteJSON(w, util.MapStr{ + "error": err.Error(), + }, http.StatusInternalServerError) + return + } + var state interface{} = "N/A" + if len(result.Result) > 0 { + if resultM, ok := result.Result[0].(map[string]interface{}); ok { + state = resultM["state"] + } + } + + detailObj := util.MapStr{ + "resource_name": obj.Resource.Name, + "resource_objects": obj.Resource.Objects, + "period_interval": obj.Metrics.PeriodInterval, + "updated": obj.Updated, + "condition_expressions": conditionExpressions, + "message_count": alertNumbers[obj.ID], + "state": state, + } + + alertAPI.WriteJSON(w, detailObj, 200) + +} + func (alertAPI *AlertAPI) updateRule(w http.ResponseWriter, req *http.Request, ps httprouter.Params) { id := ps.MustGetParameter("rule_id") obj := &alerting.Rule{} @@ -288,7 +360,7 @@ func (alertAPI *AlertAPI) searchRule(w http.ResponseWriter, req *http.Request, p w.Write(searchResult.Raw) } -func (alertAPI *AlertAPI) getRuleAlertNumbers(ruleIDs []string) ( map[string]interface{},error) { +func (alertAPI *AlertAPI) getRuleAlertMessageNumbers(ruleIDs []string) ( map[string]interface{},error) { esClient := elastic.GetClient(alertAPI.Config.Elasticsearch) queryDsl := util.MapStr{ "size": 0, @@ -300,11 +372,6 @@ func (alertAPI *AlertAPI) getRuleAlertNumbers(ruleIDs []string) ( map[string]int "rule_id": ruleIDs, }, }, - { - "terms": util.MapStr{ - "state": []string{alerting.AlertStateError, alerting.AlertStateActive}, - }, - }, }, }, }, @@ -317,7 +384,7 @@ func (alertAPI *AlertAPI) getRuleAlertNumbers(ruleIDs []string) ( map[string]int }, } - searchRes, err := esClient.SearchWithRawQueryDSL(orm.GetWildcardIndexName(alerting.Alert{}), util.MustToJSONBytes(queryDsl) ) + searchRes, err := esClient.SearchWithRawQueryDSL(orm.GetWildcardIndexName(alerting.AlertMessage{}), util.MustToJSONBytes(queryDsl) ) if err != nil { return nil, err } @@ -372,7 +439,7 @@ func (alertAPI *AlertAPI) fetchAlertInfos(w http.ResponseWriter, req *http.Reque alertAPI.WriteJSON(w, util.MapStr{}, http.StatusOK) return } - alertNumbers, err := alertAPI.getRuleAlertNumbers(ruleIDs) + alertNumbers, err := alertAPI.getRuleAlertMessageNumbers(ruleIDs) if err != nil { log.Error(err) alertAPI.WriteJSON(w, util.MapStr{ diff --git a/service/alerting/elasticsearch/engine.go b/service/alerting/elasticsearch/engine.go index c39e8d4b..4fac2a4d 100644 --- a/service/alerting/elasticsearch/engine.go +++ b/service/alerting/elasticsearch/engine.go @@ -629,14 +629,14 @@ func (engine *Engine) Do(rule *alerting.Rule) error { alertItem.Severity = severity alertItem.Message = message alertItem.Title = title - alertItem.State = alerting.AlertStateActive + alertItem.State = alerting.AlertStateAlerting if alertMessage == nil || alertMessage.Status == alerting.MessageStateRecovered { msg := &alerting.AlertMessage{ RuleID: rule.ID, Created: time.Now(), Updated: time.Now(), ID: util.GetUUID(), - Status: alerting.MessageStateActive, + Status: alerting.MessageStateAlerting, Severity: severity, Title: title, Message: message, From 78ad60478c1bab155a5c1f2f4faae53ce84ed27b Mon Sep 17 00:00:00 2001 From: liugq Date: Wed, 18 May 2022 21:17:21 +0800 Subject: [PATCH 06/36] update alert api --- plugin/api/alerting/rule.go | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/plugin/api/alerting/rule.go b/plugin/api/alerting/rule.go index 18498c9b..1866c884 100644 --- a/plugin/api/alerting/rule.go +++ b/plugin/api/alerting/rule.go @@ -372,6 +372,11 @@ func (alertAPI *AlertAPI) getRuleAlertMessageNumbers(ruleIDs []string) ( map[str "rule_id": ruleIDs, }, }, + { + "terms": util.MapStr{ + "status": []string{alerting.MessageStateAlerting, alerting.MessageStateIgnored}, + }, + }, }, }, }, From 01de0fc42c8911fc24fa40a7177ae9c82e3d1d68 Mon Sep 17 00:00:00 2001 From: liugq Date: Thu, 19 May 2022 11:17:07 +0800 Subject: [PATCH 07/36] update alerting api --- model/alerting/condition.go | 1 + plugin/api/alerting/message.go | 8 +-- plugin/api/alerting/rule.go | 92 +++++++++++++++------------------- 3 files changed, 46 insertions(+), 55 deletions(-) diff --git a/model/alerting/condition.go b/model/alerting/condition.go index f242ac15..6c2784c4 100644 --- a/model/alerting/condition.go +++ b/model/alerting/condition.go @@ -17,6 +17,7 @@ type ConditionItem struct { Operator string `json:"operator"` Values []string `json:"values"` Severity string `json:"severity"` + Expression string `json:"expression,omitempty"` } func (cond *ConditionItem) GenerateConditionExpression()(conditionExpression string, err error){ diff --git a/plugin/api/alerting/message.go b/plugin/api/alerting/message.go index da592566..d8a2d67e 100644 --- a/plugin/api/alerting/message.go +++ b/plugin/api/alerting/message.go @@ -218,11 +218,10 @@ func (h *AlertAPI) getAlertMessage(w http.ResponseWriter, req *http.Request, ps h.WriteError(w, fmt.Sprintf("rule[%s] not found", rule.ID), http.StatusInternalServerError) return } - conditionExpressions := make([]string, 0, len(rule.Conditions.Items)) metricExpression, _ := rule.Metrics.GenerateExpression() - for _, cond := range rule.Conditions.Items { + for i, cond := range rule.Conditions.Items { expression, _ := cond.GenerateConditionExpression() - conditionExpressions = append(conditionExpressions, strings.ReplaceAll(expression, "result", metricExpression)) + rule.Conditions.Items[i].Expression = strings.ReplaceAll(expression, "result", metricExpression) } var duration time.Duration if message.Status == alerting.MessageStateRecovered { @@ -238,8 +237,9 @@ func (h *AlertAPI) getAlertMessage(w http.ResponseWriter, req *http.Request, ps "updated": message.Updated, "resource_name": rule.Resource.Name, "resource_object": rule.Resource.Objects, - "condition_expressions": conditionExpressions, + "conditions": rule.Conditions, "duration": duration.Milliseconds(), + "ignored_time": message.IgnoredTime, "status": message.Status, } h.WriteJSON(w, detailObj, http.StatusOK) diff --git a/plugin/api/alerting/rule.go b/plugin/api/alerting/rule.go index 1866c884..ee9cc4e0 100644 --- a/plugin/api/alerting/rule.go +++ b/plugin/api/alerting/rule.go @@ -19,7 +19,6 @@ import ( "infini.sh/framework/modules/elastic/api" "infini.sh/framework/modules/elastic/common" "net/http" - "strconv" "strings" "time" ) @@ -125,11 +124,10 @@ func (alertAPI *AlertAPI) getRuleDetail(w http.ResponseWriter, req *http.Request }, http.StatusNotFound) return } - conditionExpressions := make([]string, 0, len(obj.Conditions.Items)) metricExpression, _ := obj.Metrics.GenerateExpression() - for _, cond := range obj.Conditions.Items { + for i, cond := range obj.Conditions.Items { expression, _ := cond.GenerateConditionExpression() - conditionExpressions = append(conditionExpressions, strings.ReplaceAll(expression, "result", metricExpression)) + obj.Conditions.Items[i].Expression = strings.ReplaceAll(expression, "result", metricExpression) } alertNumbers, err := alertAPI.getRuleAlertMessageNumbers([]string{obj.ID}) if err != nil { @@ -179,11 +177,12 @@ func (alertAPI *AlertAPI) getRuleDetail(w http.ResponseWriter, req *http.Request detailObj := util.MapStr{ "resource_name": obj.Resource.Name, "resource_objects": obj.Resource.Objects, - "period_interval": obj.Metrics.PeriodInterval, + "period_interval": obj.Metrics.PeriodInterval, //统计周期 "updated": obj.Updated, - "condition_expressions": conditionExpressions, - "message_count": alertNumbers[obj.ID], + "conditions": obj.Conditions, + "message_count": alertNumbers[obj.ID], //所有关联告警消息数(包括已恢复的) "state": state, + "enabled": obj.Enabled, } alertAPI.WriteJSON(w, detailObj, 200) @@ -372,11 +371,11 @@ func (alertAPI *AlertAPI) getRuleAlertMessageNumbers(ruleIDs []string) ( map[str "rule_id": ruleIDs, }, }, - { - "terms": util.MapStr{ - "status": []string{alerting.MessageStateAlerting, alerting.MessageStateIgnored}, - }, - }, + //{ + // "terms": util.MapStr{ + // "status": []string{alerting.MessageStateAlerting, alerting.MessageStateIgnored}, + // }, + //}, }, }, }, @@ -444,21 +443,12 @@ func (alertAPI *AlertAPI) fetchAlertInfos(w http.ResponseWriter, req *http.Reque alertAPI.WriteJSON(w, util.MapStr{}, http.StatusOK) return } - alertNumbers, err := alertAPI.getRuleAlertMessageNumbers(ruleIDs) - if err != nil { - log.Error(err) - alertAPI.WriteJSON(w, util.MapStr{ - "error": err.Error(), - }, http.StatusInternalServerError) - return - } latestAlertInfos := map[string]util.MapStr{} for _, hit := range searchRes.Hits.Hits { if ruleID, ok := hit.Source["rule_id"].(string); ok { latestAlertInfos[ruleID] = util.MapStr{ "status": hit.Source["state"], - "alert_count": alertNumbers[ruleID], } } @@ -676,36 +666,36 @@ func getRuleMetricData( rule *alerting.Rule, filterParam *alerting.FilterParam) }) } //add guidelines - for _, cond := range rule.Conditions.Items { - if len(cond.Values) > 0 { - val, err := strconv.ParseFloat(cond.Values[0], 64) - if err != nil { - log.Errorf("parse condition value error: %v", err) - continue - } - if sampleData != nil { - newData := make([]alerting.TimeMetricData, 0, len(sampleData)) - for _, td := range sampleData { - if len(td) < 2 { - continue - } - newData = append(newData, alerting.TimeMetricData{ - td[0], val, - }) - } - metricItem.Lines = append(metricItem.Lines, &common.MetricLine{ - Data: newData, - BucketSize: filterParam.BucketSize, - Metric: common.MetricSummary{ - Label: "", - Group: rule.ID, - TickFormat: "0,0.[00]", - FormatType: "num", - }, - }) - } - } - } + //for _, cond := range rule.Conditions.Items { + // if len(cond.Values) > 0 { + // val, err := strconv.ParseFloat(cond.Values[0], 64) + // if err != nil { + // log.Errorf("parse condition value error: %v", err) + // continue + // } + // if sampleData != nil { + // newData := make([]alerting.TimeMetricData, 0, len(sampleData)) + // for _, td := range sampleData { + // if len(td) < 2 { + // continue + // } + // newData = append(newData, alerting.TimeMetricData{ + // td[0], val, + // }) + // } + // metricItem.Lines = append(metricItem.Lines, &common.MetricLine{ + // Data: newData, + // BucketSize: filterParam.BucketSize, + // Metric: common.MetricSummary{ + // Label: "", + // Group: rule.ID, + // TickFormat: "0,0.[00]", + // FormatType: "num", + // }, + // }) + // } + // } + //} return &metricItem, nil } From f4b87b0122f76782dd7a4064f93d6c3daa510935 Mon Sep 17 00:00:00 2001 From: liugq Date: Thu, 19 May 2022 12:05:14 +0800 Subject: [PATCH 08/36] update alerting api --- plugin/api/alerting/message.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/plugin/api/alerting/message.go b/plugin/api/alerting/message.go index d8a2d67e..e5bbe3b3 100644 --- a/plugin/api/alerting/message.go +++ b/plugin/api/alerting/message.go @@ -230,6 +230,8 @@ func (h *AlertAPI) getAlertMessage(w http.ResponseWriter, req *http.Request, ps duration = time.Now().Sub(message.Created) } detailObj := util.MapStr{ + "message_id": message.ID, + "rule_id": message.RuleID, "title": message.Title, "message": message.Message, "severity": message.Severity, From 6571d75a21446e7355a732a616b8736ec69be0ec Mon Sep 17 00:00:00 2001 From: liugq Date: Fri, 20 May 2022 16:33:35 +0800 Subject: [PATCH 09/36] add auth to gateway instance api --- plugin/api/gateway/api.go | 15 ++++++++------- plugin/api/init.go | 11 ++++++----- 2 files changed, 14 insertions(+), 12 deletions(-) diff --git a/plugin/api/gateway/api.go b/plugin/api/gateway/api.go index 7624624b..ab1ac7f4 100644 --- a/plugin/api/gateway/api.go +++ b/plugin/api/gateway/api.go @@ -6,6 +6,7 @@ package gateway import ( "infini.sh/framework/core/api" + "infini.sh/framework/core/api/rbac/enum" ) type GatewayAPI struct { @@ -15,12 +16,12 @@ type GatewayAPI struct { func init() { gateway:=GatewayAPI{} api.HandleAPIMethod(api.POST, "/gateway/instance/try_connect", gateway.tryConnect) - api.HandleAPIMethod(api.GET, "/gateway/instance/:instance_id", gateway.getInstance) - api.HandleAPIMethod(api.POST, "/gateway/instance", gateway.createInstance) - api.HandleAPIMethod(api.PUT, "/gateway/instance/:instance_id", gateway.updateInstance) - api.HandleAPIMethod(api.DELETE, "/gateway/instance/:instance_id", gateway.deleteInstance) - api.HandleAPIMethod(api.GET, "/gateway/instance/_search", gateway.searchInstance) - api.HandleAPIMethod(api.POST, "/gateway/instance/status", gateway.getInstanceStatus) + api.HandleAPIMethod(api.GET, "/gateway/instance/:instance_id", gateway.RequirePermission(gateway.getInstance, enum.PermissionGatewayInstanceRead)) + api.HandleAPIMethod(api.POST, "/gateway/instance", gateway.RequirePermission(gateway.createInstance, enum.PermissionGatewayInstanceWrite)) + api.HandleAPIMethod(api.PUT, "/gateway/instance/:instance_id", gateway.RequirePermission(gateway.updateInstance, enum.PermissionGatewayInstanceWrite)) + api.HandleAPIMethod(api.DELETE, "/gateway/instance/:instance_id", gateway.RequirePermission(gateway.deleteInstance, enum.PermissionGatewayInstanceWrite)) + api.HandleAPIMethod(api.GET, "/gateway/instance/_search", gateway.RequirePermission(gateway.searchInstance, enum.PermissionGatewayInstanceRead)) + api.HandleAPIMethod(api.POST, "/gateway/instance/status", gateway.RequirePermission(gateway.getInstanceStatus, enum.PermissionGatewayInstanceRead)) - api.HandleAPIMethod(api.POST, "/gateway/instance/:instance_id/_proxy", gateway.proxy) + api.HandleAPIMethod(api.POST, "/gateway/instance/:instance_id/_proxy", gateway.RequirePermission(gateway.proxy, enum.PermissionGatewayInstanceRead)) } diff --git a/plugin/api/init.go b/plugin/api/init.go index 178e64ab..c69abd01 100644 --- a/plugin/api/init.go +++ b/plugin/api/init.go @@ -5,6 +5,7 @@ import ( "infini.sh/console/plugin/api/alerting" "infini.sh/console/plugin/api/index_management" "infini.sh/framework/core/api" + "infini.sh/framework/core/api/rbac/enum" "path" ) @@ -15,7 +16,7 @@ func Init(cfg *config.AppConfig) { } var pathPrefix = "/_search-center/" var esPrefix = "/elasticsearch/:id/" - api.HandleAPIMethod(api.GET, path.Join(pathPrefix, "elasticsearch/overview"), handler.ElasticsearchOverviewAction) + api.HandleAPIMethod(api.GET, path.Join(pathPrefix, "elasticsearch/overview"), handler.RequirePermission(handler.ElasticsearchOverviewAction, enum.PermissionElasticsearchMetricRead)) //api.HandleAPIMethod(api.POST, "/api/get_indices",index_management.API1) api.HandleAPIMethod(api.GET, path.Join(pathPrefix, "dict/_search"), handler.GetDictListAction) @@ -41,10 +42,10 @@ func Init(cfg *config.AppConfig) { api.HandleAPIMethod(api.DELETE, path.Join(esPrefix, "index/:index"), handler.HandleDeleteIndexAction) api.HandleAPIMethod(api.POST, path.Join(esPrefix, "index/:index"), handler.HandleCreateIndexAction) - api.HandleAPIMethod(api.POST, path.Join(pathPrefix, "elasticsearch/command"), handler.HandleAddCommonCommandAction) - api.HandleAPIMethod(api.PUT, path.Join(pathPrefix, "elasticsearch/command/:cid"), handler.HandleSaveCommonCommandAction) - api.HandleAPIMethod(api.GET, path.Join(pathPrefix, "elasticsearch/command"), handler.HandleQueryCommonCommandAction) - api.HandleAPIMethod(api.DELETE, path.Join(pathPrefix, "elasticsearch/command/:cid"), handler.HandleDeleteCommonCommandAction) + api.HandleAPIMethod(api.POST, path.Join(pathPrefix, "elasticsearch/command"), handler.RequirePermission(handler.HandleAddCommonCommandAction, enum.PermissionCommandWrite)) + api.HandleAPIMethod(api.PUT, path.Join(pathPrefix, "elasticsearch/command/:cid"), handler.RequirePermission(handler.HandleSaveCommonCommandAction, enum.PermissionCommandWrite)) + api.HandleAPIMethod(api.GET, path.Join(pathPrefix, "elasticsearch/command"), handler.RequirePermission(handler.HandleQueryCommonCommandAction, enum.PermissionCommandRead)) + api.HandleAPIMethod(api.DELETE, path.Join(pathPrefix, "elasticsearch/command/:cid"), handler.RequirePermission(handler.HandleDeleteCommonCommandAction,enum.PermissionCommandWrite)) //task.RegisterScheduleTask(task.ScheduleTask{ // Description: "sync reindex task result", From 0342193fa2d69af3b4ba5ae315e2df7e80040a01 Mon Sep 17 00:00:00 2001 From: liugq Date: Sun, 22 May 2022 10:53:23 +0800 Subject: [PATCH 10/36] attach auth to alert api --- plugin/api/alerting/api.go | 39 +++++++++++++++++++------------------- 1 file changed, 20 insertions(+), 19 deletions(-) diff --git a/plugin/api/alerting/api.go b/plugin/api/alerting/api.go index 005d13fb..5ab8f2d5 100644 --- a/plugin/api/alerting/api.go +++ b/plugin/api/alerting/api.go @@ -7,6 +7,7 @@ package alerting import ( "infini.sh/console/config" "infini.sh/framework/core/api" + "infini.sh/framework/core/api/rbac/enum" ) @@ -16,32 +17,32 @@ type AlertAPI struct { } func (alert *AlertAPI) Init() { - api.HandleAPIMethod(api.GET, "/alerting/rule/:rule_id", alert.getRule) - api.HandleAPIMethod(api.POST, "/alerting/rule", alert.createRule) + api.HandleAPIMethod(api.GET, "/alerting/rule/:rule_id", alert.RequirePermission(alert.getRule,enum.PermissionAlertRuleRead)) + api.HandleAPIMethod(api.POST, "/alerting/rule", alert.RequirePermission(alert.createRule, enum.PermissionAlertRuleWrite)) api.HandleAPIMethod(api.POST, "/alerting/rule/test", alert.sendTestMessage) - api.HandleAPIMethod(api.DELETE, "/alerting/rule/:rule_id", alert.deleteRule) - api.HandleAPIMethod(api.PUT, "/alerting/rule/:rule_id", alert.updateRule) - api.HandleAPIMethod(api.GET, "/alerting/rule/_search", alert.searchRule) + api.HandleAPIMethod(api.DELETE, "/alerting/rule/:rule_id", alert.RequirePermission(alert.deleteRule, enum.PermissionAlertRuleWrite)) + api.HandleAPIMethod(api.PUT, "/alerting/rule/:rule_id", alert.RequirePermission(alert.updateRule, enum.PermissionAlertRuleWrite)) + api.HandleAPIMethod(api.GET, "/alerting/rule/_search", alert.RequirePermission(alert.searchRule, enum.PermissionAlertRuleRead)) api.HandleAPIMethod(api.GET, "/alerting/stats", alert.getAlertStats) api.HandleAPIMethod(api.POST, "/alerting/rule/info", alert.fetchAlertInfos) - api.HandleAPIMethod(api.POST, "/alerting/rule/:rule_id/_enable", alert.enableRule) - api.HandleAPIMethod(api.GET, "/alerting/rule/:rule_id/metric", alert.getMetricData) - api.HandleAPIMethod(api.GET, "/alerting/rule/:rule_id/info", alert.getRuleDetail) + api.HandleAPIMethod(api.POST, "/alerting/rule/:rule_id/_enable", alert.RequirePermission(alert.enableRule, enum.PermissionAlertRuleWrite)) + api.HandleAPIMethod(api.GET, "/alerting/rule/:rule_id/metric", alert.RequirePermission(alert.getMetricData, enum.PermissionAlertRuleRead)) + api.HandleAPIMethod(api.GET, "/alerting/rule/:rule_id/info", alert.RequirePermission(alert.getRuleDetail, enum.PermissionAlertRuleRead, enum.PermissionAlertMessageRead)) - api.HandleAPIMethod(api.GET, "/alerting/channel/:channel_id", alert.getChannel) - api.HandleAPIMethod(api.POST, "/alerting/channel", alert.createChannel) - api.HandleAPIMethod(api.DELETE, "/alerting/channel/:channel_id", alert.deleteChannel) - api.HandleAPIMethod(api.PUT, "/alerting/channel/:channel_id", alert.updateChannel) - api.HandleAPIMethod(api.GET, "/alerting/channel/_search", alert.searchChannel) + api.HandleAPIMethod(api.GET, "/alerting/channel/:channel_id", alert.RequirePermission(alert.getChannel, enum.PermissionAlertChannelRead)) + api.HandleAPIMethod(api.POST, "/alerting/channel", alert.RequirePermission(alert.createChannel, enum.PermissionAlertChannelWrite)) + api.HandleAPIMethod(api.DELETE, "/alerting/channel/:channel_id", alert.RequirePermission(alert.deleteChannel, enum.PermissionAlertChannelWrite)) + api.HandleAPIMethod(api.PUT, "/alerting/channel/:channel_id", alert.RequirePermission(alert.updateChannel, enum.PermissionAlertChannelWrite)) + api.HandleAPIMethod(api.GET, "/alerting/channel/_search", alert.RequirePermission(alert.searchChannel, enum.PermissionAlertChannelRead)) - api.HandleAPIMethod(api.GET, "/alerting/alert/_search", alert.searchAlert) - api.HandleAPIMethod(api.GET, "/alerting/alert/:alert_id", alert.getAlert) + api.HandleAPIMethod(api.GET, "/alerting/alert/_search", alert.RequirePermission(alert.searchAlert, enum.PermissionAlertHistoryRead)) + api.HandleAPIMethod(api.GET, "/alerting/alert/:alert_id", alert.RequirePermission(alert.getAlert, enum.PermissionAlertHistoryRead)) api.HandleAPIMethod(api.GET, "/alerting/template/parameters", alert.getTemplateParams) - api.HandleAPIMethod(api.GET, "/alerting/message/_search", alert.searchAlertMessage) - api.HandleAPIMethod(api.POST, "/alerting/message/_ignore", alert.ignoreAlertMessage) - api.HandleAPIMethod(api.GET, "/alerting/message/_stats", alert.getAlertMessageStats) - api.HandleAPIMethod(api.GET, "/alerting/message/:message_id", alert.getAlertMessage) + api.HandleAPIMethod(api.GET, "/alerting/message/_search", alert.RequirePermission(alert.searchAlertMessage, enum.PermissionElasticsearchMetricRead)) + api.HandleAPIMethod(api.POST, "/alerting/message/_ignore", alert.RequirePermission(alert.ignoreAlertMessage, enum.PermissionAlertMessageWrite)) + api.HandleAPIMethod(api.GET, "/alerting/message/_stats", alert.RequirePermission(alert.getAlertMessageStats, enum.PermissionAlertMessageRead)) + api.HandleAPIMethod(api.GET, "/alerting/message/:message_id", alert.RequirePermission(alert.getAlertMessage, enum.PermissionAlertMessageRead)) //just for test From 5ef6bc19659bb0ef445d7197fe902a07d9962185 Mon Sep 17 00:00:00 2001 From: liugq Date: Sun, 22 May 2022 14:50:30 +0800 Subject: [PATCH 11/36] update alert api --- model/alerting/condition.go | 9 +++ plugin/api/alerting/rule.go | 20 +++--- service/alerting/elasticsearch/engine.go | 79 +++++++++++++----------- service/alerting/engine.go | 2 +- 4 files changed, 66 insertions(+), 44 deletions(-) diff --git a/model/alerting/condition.go b/model/alerting/condition.go index 6c2784c4..cec3d075 100644 --- a/model/alerting/condition.go +++ b/model/alerting/condition.go @@ -10,6 +10,15 @@ type Condition struct { Operator string `json:"operator"` Items []ConditionItem `json:"items"` } +func (cond *Condition) GetMinimumPeriodMatch() int{ + var minPeriodMatch = 0 + for _, citem := range cond.Items { + if citem.MinimumPeriodMatch > minPeriodMatch { + minPeriodMatch = citem.MinimumPeriodMatch + } + } + return minPeriodMatch +} type ConditionItem struct { //MetricName string `json:"metric"` diff --git a/plugin/api/alerting/rule.go b/plugin/api/alerting/rule.go index ee9cc4e0..479639a9 100644 --- a/plugin/api/alerting/rule.go +++ b/plugin/api/alerting/rule.go @@ -577,7 +577,7 @@ func (alertAPI *AlertAPI) getPreviewMetricData(w http.ResponseWriter, req *http. End: max, BucketSize: fmt.Sprintf("%ds", bucketSize), } - metricItem, err := getRuleMetricData(rule, filterParam) + metricItem, _, err := getRuleMetricData(rule, filterParam) if err != nil { log.Error(err) alertAPI.WriteJSON(w, util.MapStr{ @@ -612,7 +612,7 @@ func (alertAPI *AlertAPI) getMetricData(w http.ResponseWriter, req *http.Request End: max, BucketSize: fmt.Sprintf("%ds", bucketSize), } - metricItem, err := getRuleMetricData(rule, filterParam) + metricItem, queryResult, err := getRuleMetricData(rule, filterParam) if err != nil { log.Error(err) alertAPI.WriteJSON(w, util.MapStr{ @@ -620,16 +620,20 @@ func (alertAPI *AlertAPI) getMetricData(w http.ResponseWriter, req *http.Request }, http.StatusInternalServerError) return } - alertAPI.WriteJSON(w, util.MapStr{ + resBody := util.MapStr{ "metric": metricItem, - }, http.StatusOK) + } + if alertAPI.GetParameter(req, "debug") == "1" { + resBody["query"] = queryResult.Query + } + alertAPI.WriteJSON(w,resBody, http.StatusOK) } -func getRuleMetricData( rule *alerting.Rule, filterParam *alerting.FilterParam) (*common.MetricItem, error) { +func getRuleMetricData( rule *alerting.Rule, filterParam *alerting.FilterParam) (*common.MetricItem, *alerting.QueryResult, error) { eng := alerting2.GetEngine(rule.Resource.Type) - metricData, err := eng.GetTargetMetricData(rule, true, filterParam) + metricData, queryResult, err := eng.GetTargetMetricData(rule, true, filterParam) if err != nil { - return nil, err + return nil,queryResult, err } //var filteredMetricData []alerting.MetricData //title := rule.Metrics.Formula @@ -696,7 +700,7 @@ func getRuleMetricData( rule *alerting.Rule, filterParam *alerting.FilterParam) // } // } //} - return &metricItem, nil + return &metricItem,queryResult, nil } diff --git a/service/alerting/elasticsearch/engine.go b/service/alerting/elasticsearch/engine.go index 4fac2a4d..6351c9a3 100644 --- a/service/alerting/elasticsearch/engine.go +++ b/service/alerting/elasticsearch/engine.go @@ -41,7 +41,6 @@ func (engine *Engine) GenerateQuery(rule *alerting.Rule, filterParam *alerting.F if err != nil { return nil, err } - //todo generate agg if len(rule.Metrics.Items) == 0 { return nil, fmt.Errorf("metric items should not be empty") } @@ -133,7 +132,7 @@ func (engine *Engine) generateAgg(metricItem *alerting.MetricItem) map[string]in case "rate": aggType = "max" isPipeline = true - case "medium": + case "medium": // from es version 6.6 aggType = "median_absolute_deviation" case "p99", "p95","p90","p80","p50": aggType = "percentiles" @@ -304,7 +303,11 @@ func (engine *Engine) GenerateRawFilter(rule *alerting.Rule, filterParam *alerti }else{ return nil, fmt.Errorf("period interval: %s is too small", rule.Metrics.PeriodInterval) } - duration, err := time.ParseDuration(fmt.Sprintf("%d%s", value * 15, units)) + bucketCount := rule.Conditions.GetMinimumPeriodMatch() + 1 + if bucketCount <= 0 { + bucketCount = 1 + } + duration, err := time.ParseDuration(fmt.Sprintf("%d%s", value * bucketCount, units)) if err != nil { return nil, err } @@ -385,10 +388,10 @@ func (engine *Engine) ExecuteQuery(rule *alerting.Rule, filterParam *alerting.Fi queryResult.MetricData = metricData return queryResult, nil } -func (engine *Engine) GetTargetMetricData(rule *alerting.Rule, isFilterNaN bool, filterParam *alerting.FilterParam)([]alerting.MetricData, error){ +func (engine *Engine) GetTargetMetricData(rule *alerting.Rule, isFilterNaN bool, filterParam *alerting.FilterParam)([]alerting.MetricData, *alerting.QueryResult, error){ queryResult, err := engine.ExecuteQuery(rule, filterParam) if err != nil { - return nil, err + return nil, queryResult, err } var targetMetricData []alerting.MetricData for _, md := range queryResult.MetricData { @@ -402,7 +405,7 @@ func (engine *Engine) GetTargetMetricData(rule *alerting.Rule, isFilterNaN bool, } expression, err := govaluate.NewEvaluableExpression(rule.Metrics.Formula) if err != nil { - return nil, err + return nil, queryResult, err } dataLength := 0 for _, v := range md.Data { @@ -429,7 +432,7 @@ func (engine *Engine) GetTargetMetricData(rule *alerting.Rule, isFilterNaN bool, } result, err := expression.Evaluate(parameters) if err != nil { - return nil, err + return nil, queryResult, err } if r, ok := result.(float64); ok { if math.IsNaN(r) || math.IsInf(r, 0 ){ @@ -445,25 +448,20 @@ func (engine *Engine) GetTargetMetricData(rule *alerting.Rule, isFilterNaN bool, } targetMetricData = append(targetMetricData, targetData) } - return targetMetricData, nil + return targetMetricData, queryResult, nil } //CheckCondition check whether rule conditions triggered or not -//if triggered returns an array of ConditionResult +//if triggered returns an ConditionResult //sort conditions by severity desc before check , and then if condition is true, then continue check another group func (engine *Engine) CheckCondition(rule *alerting.Rule)(*alerting.ConditionResult, error){ - queryResult, err := engine.ExecuteQuery(rule, nil) + var resultItems []alerting.ConditionResultItem + targetMetricData, queryResult, err := engine.GetTargetMetricData(rule, false, nil) conditionResult := &alerting.ConditionResult{ QueryResult: queryResult, } if err != nil { return conditionResult, err } - - var resultItems []alerting.ConditionResultItem - targetMetricData, err := engine.GetTargetMetricData(rule, false, nil) - if err != nil { - return nil, err - } for idx, targetData := range targetMetricData { if idx == 0 { sort.Slice(rule.Conditions.Items, func(i, j int) bool { @@ -604,22 +602,11 @@ func (engine *Engine) Do(rule *alerting.Rule) error { paramsCtx = newParameterCtx(rule, checkResults,alertItem.ID, alertItem.Created.Unix()) var ( severity = conditionResults[0].ConditionItem.Severity - tplBytes []byte - message string - title string ) - tplBytes, err = resolveMessage(rule.Metrics.Message, paramsCtx) + err = attachTitleMessageToCtx(rule, paramsCtx) if err != nil { - return fmt.Errorf("resolve content template error: %w", err) + return err } - message = string(tplBytes) - paramsCtx[alerting2.ParamMessage] = message - tplBytes, err = resolveMessage(rule.Metrics.Title, paramsCtx) - if err != nil { - return fmt.Errorf("resolve title template error: %w", err) - } - title = string(tplBytes) - paramsCtx[alerting2.ParamTitle] = title for _, conditionResult := range conditionResults { if alerting.SeverityWeights[severity] < alerting.SeverityWeights[conditionResult.ConditionItem.Severity] { severity = conditionResult.ConditionItem.Severity @@ -627,8 +614,8 @@ func (engine *Engine) Do(rule *alerting.Rule) error { } alertItem.Severity = severity - alertItem.Message = message - alertItem.Title = title + alertItem.Message = paramsCtx[alerting2.ParamMessage].(string) + alertItem.Title = paramsCtx[alerting2.ParamTitle].(string) alertItem.State = alerting.AlertStateAlerting if alertMessage == nil || alertMessage.Status == alerting.MessageStateRecovered { msg := &alerting.AlertMessage{ @@ -638,16 +625,16 @@ func (engine *Engine) Do(rule *alerting.Rule) error { ID: util.GetUUID(), Status: alerting.MessageStateAlerting, Severity: severity, - Title: title, - Message: message, + Title: alertItem.Title, + Message: alertItem.Message, } err = saveAlertMessage(msg) if err != nil { return fmt.Errorf("save alert message error: %w", err) } }else{ - alertMessage.Title = title - alertMessage.Message = message + alertMessage.Title = alertItem.Title + alertMessage.Message = alertItem.Message err = saveAlertMessage(alertMessage) if err != nil { return fmt.Errorf("save alert message error: %w", err) @@ -736,6 +723,24 @@ func (engine *Engine) Do(rule *alerting.Rule) error { return nil } +func attachTitleMessageToCtx(rule *alerting.Rule, paramsCtx map[string]interface{}) error{ + var ( + tplBytes []byte + err error + ) + tplBytes, err = resolveMessage(rule.Metrics.Message, paramsCtx) + if err != nil { + return fmt.Errorf("resolve message template error: %w", err) + } + paramsCtx[alerting2.ParamMessage] = string(tplBytes) + tplBytes, err = resolveMessage(rule.Metrics.Title, paramsCtx) + if err != nil { + return fmt.Errorf("resolve title template error: %w", err) + } + paramsCtx[alerting2.ParamTitle] = string(tplBytes) + return nil +} + func newParameterCtx(rule *alerting.Rule, checkResults *alerting.ConditionResult, eventID string, eventTimestamp interface{} ) map[string]interface{}{ var conditionParams []util.MapStr for _, resultItem := range checkResults.ResultItems { @@ -766,6 +771,10 @@ func (engine *Engine) Test(rule *alerting.Rule) ([]alerting.ActionExecutionResul } var actionResults []alerting.ActionExecutionResult paramsCtx := newParameterCtx(rule, checkResults, util.GetUUID(), time.Now().Unix()) + err = attachTitleMessageToCtx(rule, paramsCtx) + if err != nil { + return nil, err + } if len(rule.Channels.Normal) > 0 { actionResults, _ = performChannels(rule.Channels.Normal, paramsCtx) }else if len(rule.Channels.Escalation) > 0{ diff --git a/service/alerting/engine.go b/service/alerting/engine.go index 5f3c548b..a6a4debd 100644 --- a/service/alerting/engine.go +++ b/service/alerting/engine.go @@ -17,7 +17,7 @@ type Engine interface { CheckCondition(rule *alerting.Rule)(*alerting.ConditionResult, error) GenerateTask(rule *alerting.Rule) func(ctx context.Context) Test(rule *alerting.Rule) ([]alerting.ActionExecutionResult, error) - GetTargetMetricData(rule *alerting.Rule, isFilterNaN bool, filterParam *alerting.FilterParam)([]alerting.MetricData, error) + GetTargetMetricData(rule *alerting.Rule, isFilterNaN bool, filterParam *alerting.FilterParam)([]alerting.MetricData, *alerting.QueryResult, error) } var ( From bfd79d751b51d5355c28bca10832423f576f350a Mon Sep 17 00:00:00 2001 From: liugq Date: Sun, 22 May 2022 15:28:26 +0800 Subject: [PATCH 12/36] fixed no metric data when length of metric item equals 1 --- plugin/api/alerting/rule.go | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/plugin/api/alerting/rule.go b/plugin/api/alerting/rule.go index 479639a9..13dd2fa8 100644 --- a/plugin/api/alerting/rule.go +++ b/plugin/api/alerting/rule.go @@ -655,11 +655,19 @@ func getRuleMetricData( rule *alerting.Rule, filterParam *alerting.FilterParam) continue } //filteredMetricData = append(filteredMetricData, md) + + targetData := md.Data["result"] + if len(rule.Metrics.Items) == 1 { + for k, _ := range md.Data { + targetData = md.Data[k] + break + } + } if sampleData == nil { - sampleData = md.Data["result"] + sampleData = targetData } metricItem.Lines = append(metricItem.Lines, &common.MetricLine{ - Data: md.Data["result"], + Data: targetData, BucketSize: filterParam.BucketSize, Metric: common.MetricSummary{ Label: strings.Join(md.GroupValues, "-"), From 9f9ed6894f627701603ba82dd5f3b6b26bfd3acf Mon Sep 17 00:00:00 2001 From: liugq Date: Mon, 23 May 2022 11:43:50 +0800 Subject: [PATCH 13/36] update alerting api --- model/alerting/metric.go | 1 - model/alerting/rule_test.go | 1 - plugin/api/alerting/message.go | 2 +- plugin/api/alerting/rule.go | 132 +++++++++++++----- service/alerting/elasticsearch/engine_test.go | 2 - 5 files changed, 99 insertions(+), 39 deletions(-) diff --git a/model/alerting/metric.go b/model/alerting/metric.go index 5f8fd0cb..86f7979f 100644 --- a/model/alerting/metric.go +++ b/model/alerting/metric.go @@ -11,7 +11,6 @@ import ( type Metric struct { PeriodInterval string `json:"period_interval"` - MaxPeriods int `json:"max_periods"` Items []MetricItem `json:"items"` Formula string `json:"formula,omitempty"` Expression string `json:"expression" elastic_mapping:"expression:{type:keyword,copy_to:search_text}"` //告警表达式,自动生成 eg: avg(cpu) > 80 diff --git a/model/alerting/rule_test.go b/model/alerting/rule_test.go index 93e1e257..1f1b4e43 100644 --- a/model/alerting/rule_test.go +++ b/model/alerting/rule_test.go @@ -61,7 +61,6 @@ func TestCreateRule( t *testing.T) { Metrics: Metric{ PeriodInterval: "1m", - MaxPeriods: 15, Items: []MetricItem{ {Name: "a", Field: "payload.elasticsearch.node_stats.fs.total.free_in_bytes", Statistic: "min", Group: []string{"metadata.labels.cluster_id", "metadata.labels.node_id"}}, {Name: "b", Field: "payload.elasticsearch.node_stats.fs.total.total_in_bytes", Statistic: "max", Group: []string{"metadata.labels.cluster_id", "metadata.labels.node_id"}}, diff --git a/plugin/api/alerting/message.go b/plugin/api/alerting/message.go index e5bbe3b3..5ec3a730 100644 --- a/plugin/api/alerting/message.go +++ b/plugin/api/alerting/message.go @@ -215,7 +215,7 @@ func (h *AlertAPI) getAlertMessage(w http.ResponseWriter, req *http.Request, ps exists, err = orm.Get(rule) if !exists || err != nil { log.Error(err) - h.WriteError(w, fmt.Sprintf("rule[%s] not found", rule.ID), http.StatusInternalServerError) + h.WriteError(w, fmt.Sprintf("rule [%s] not found", rule.ID), http.StatusInternalServerError) return } metricExpression, _ := rule.Metrics.GenerateExpression() diff --git a/plugin/api/alerting/rule.go b/plugin/api/alerting/rule.go index 13dd2fa8..f0ecb1b8 100644 --- a/plugin/api/alerting/rule.go +++ b/plugin/api/alerting/rule.go @@ -7,13 +7,16 @@ package alerting import ( "fmt" log "github.com/cihub/seelog" + "github.com/r3labs/diff/v2" "infini.sh/console/model/alerting" alerting2 "infini.sh/console/service/alerting" _ "infini.sh/console/service/alerting/elasticsearch" httprouter "infini.sh/framework/core/api/router" "infini.sh/framework/core/elastic" + "infini.sh/framework/core/event" "infini.sh/framework/core/kv" "infini.sh/framework/core/orm" + "infini.sh/framework/core/queue" "infini.sh/framework/core/task" "infini.sh/framework/core/util" "infini.sh/framework/modules/elastic/api" @@ -55,7 +58,6 @@ func (alertAPI *AlertAPI) createRule(w http.ResponseWriter, req *http.Request, p ids = append(ids, rule.ID) rule.Created = time.Now() rule.Updated = time.Now() - rule.Metrics.MaxPeriods = 15 if rule.Schedule.Interval == ""{ rule.Schedule.Interval = "1m" } @@ -68,6 +70,11 @@ func (alertAPI *AlertAPI) createRule(w http.ResponseWriter, req *http.Request, p }, http.StatusInternalServerError) return } + saveAlertActivity("alerting_rule_change", "create", util.MapStr{ + "cluster_id": rule.Resource.ID, + "rule_id": rule.ID, + "cluster_name": rule.Resource.Name, + },nil, &rule) eng := alerting2.GetEngine(rule.Resource.Type) if rule.Enabled { ruleTask := task.ScheduleTask{ @@ -189,12 +196,55 @@ func (alertAPI *AlertAPI) getRuleDetail(w http.ResponseWriter, req *http.Request } +func saveActivity(activityInfo *event.Activity){ + queueConfig := queue.GetOrInitConfig("platform##activities") + if queueConfig.Labels == nil { + queueConfig.Labels = map[string]interface{}{ + "type": "platform", + "name": "activity", + "category": "elasticsearch", + "activity": true, + } + } + err := queue.Push(queueConfig, util.MustToJSONBytes(event.Event{ + Timestamp: time.Now(), + Metadata: event.EventMetadata{ + Category: "elasticsearch", + Name: "activity", + }, + Fields: util.MapStr{ + "activity": activityInfo, + }})) + if err != nil { + log.Error(err) + } +} + +func saveAlertActivity(name, typ string, labels map[string]interface{}, changelog diff.Changelog, oldState interface{}){ + activityInfo := &event.Activity{ + ID: util.GetUUID(), + Timestamp: time.Now(), + Metadata: event.ActivityMetadata{ + Category: "elasticsearch", + Group: "platform", + Name: name, + Type: typ, + Labels: labels, + }, + Changelog: changelog, + Fields: util.MapStr{ + "rule": oldState, + }, + } + saveActivity(activityInfo) +} + func (alertAPI *AlertAPI) updateRule(w http.ResponseWriter, req *http.Request, ps httprouter.Params) { id := ps.MustGetParameter("rule_id") - obj := &alerting.Rule{} + oldRule := &alerting.Rule{} - obj.ID = id - exists, err := orm.Get(obj) + oldRule.ID = id + exists, err := orm.Get(oldRule) if !exists || err != nil { log.Error(err) alertAPI.WriteJSON(w, util.MapStr{ @@ -204,35 +254,46 @@ func (alertAPI *AlertAPI) updateRule(w http.ResponseWriter, req *http.Request, p return } - id = obj.ID - create := obj.Created - obj = &alerting.Rule{} - err = alertAPI.DecodeJSON(req, obj) + id = oldRule.ID + create := oldRule.Created + rule := &alerting.Rule{ + } + err = alertAPI.DecodeJSON(req, rule) if err != nil { alertAPI.WriteError(w, err.Error(), http.StatusInternalServerError) log.Error(err) return } + rule.Metrics.Expression, err = rule.Metrics.GenerateExpression() + if err != nil { + alertAPI.WriteError(w, err.Error(), http.StatusInternalServerError) + log.Error(err) + return + } + changeLog, err := util.DiffTwoObject(oldRule, rule) + if err != nil { + log.Error(err) + } //protect - obj.ID = id - obj.Created = create - obj.Updated = time.Now() - obj.Metrics.Expression, err = obj.Metrics.GenerateExpression() - if err != nil { - alertAPI.WriteError(w, err.Error(), http.StatusInternalServerError) - log.Error(err) - return - } - err = orm.Update(obj) - if err != nil { - alertAPI.WriteError(w, err.Error(), http.StatusInternalServerError) - log.Error(err) - return - } + rule.ID = id + rule.Created = create + rule.Updated = time.Now() - if obj.Enabled { - exists, err = checkResourceExists(obj) + err = orm.Update(rule) + if err != nil { + alertAPI.WriteError(w, err.Error(), http.StatusInternalServerError) + log.Error(err) + return + } + saveAlertActivity("alerting_rule_change", "update", util.MapStr{ + "cluster_id": rule.Resource.ID, + "rule_id": rule.ID, + "cluster_name": rule.Resource.Name, + },changeLog, oldRule) + + if rule.Enabled { + exists, err = checkResourceExists(rule) if err != nil || !exists { log.Error(err) alertAPI.WriteJSON(w, util.MapStr{ @@ -242,22 +303,22 @@ func (alertAPI *AlertAPI) updateRule(w http.ResponseWriter, req *http.Request, p } //update task task.StopTask(id) - eng := alerting2.GetEngine(obj.Resource.Type) + eng := alerting2.GetEngine(rule.Resource.Type) ruleTask := task.ScheduleTask{ - ID: obj.ID, - Interval: obj.Schedule.Interval, - Description: obj.Metrics.Expression, - Task: eng.GenerateTask(obj), + ID: rule.ID, + Interval: rule.Schedule.Interval, + Description: rule.Metrics.Expression, + Task: eng.GenerateTask(rule), } task.RegisterScheduleTask(ruleTask) task.StartTask(ruleTask.ID) }else{ task.DeleteTask(id) } - clearKV(obj.ID) + clearKV(rule.ID) alertAPI.WriteJSON(w, util.MapStr{ - "_id": obj.ID, + "_id": rule.ID, "result": "updated", }, 200) } @@ -289,6 +350,11 @@ func (alertAPI *AlertAPI) deleteRule(w http.ResponseWriter, req *http.Request, p log.Error(err) return } + saveAlertActivity("alerting_rule_change", "delete", util.MapStr{ + "cluster_id": obj.Resource.ID, + "rule_id": obj.ID, + "cluster_name": obj.Resource.Name, + },nil, &obj) task.DeleteTask(obj.ID) clearKV(obj.ID) @@ -654,8 +720,6 @@ func getRuleMetricData( rule *alerting.Rule, filterParam *alerting.FilterParam) if len(md.Data) == 0 { continue } - //filteredMetricData = append(filteredMetricData, md) - targetData := md.Data["result"] if len(rule.Metrics.Items) == 1 { for k, _ := range md.Data { diff --git a/service/alerting/elasticsearch/engine_test.go b/service/alerting/elasticsearch/engine_test.go index 82d68215..6a0eccb2 100644 --- a/service/alerting/elasticsearch/engine_test.go +++ b/service/alerting/elasticsearch/engine_test.go @@ -55,7 +55,6 @@ func TestEngine( t *testing.T) { Metrics: alerting.Metric{ PeriodInterval: "1m", - MaxPeriods: 15, Items: []alerting.MetricItem{ {Name: "a", Field: "payload.elasticsearch.node_stats.fs.total.free_in_bytes", Statistic: "min", Group: []string{"metadata.labels.cluster_id", "metadata.labels.node_id"}}, {Name: "b", Field: "payload.elasticsearch.node_stats.fs.total.total_in_bytes", Statistic: "max", Group: []string{"metadata.labels.cluster_id", "metadata.labels.node_id"}}, @@ -204,7 +203,6 @@ func TestGeneratePercentilesAggQuery(t *testing.T) { Metrics: alerting.Metric{ PeriodInterval: "1m", - MaxPeriods: 15, Items: []alerting.MetricItem{ {Name: "a", Field: "payload.elasticsearch.index_stats.total.search.query_total", Statistic: "rate", Group: []string{"metadata.labels.cluster_id"}}, {Name: "b", Field: "payload.elasticsearch.index_stats.total.search.query_time_in_millis", Statistic: "rate", Group: []string{"metadata.labels.cluster_id"}}, From 8d614422d9343e4cf743dfde11df41c60d58b058 Mon Sep 17 00:00:00 2001 From: liugq Date: Mon, 23 May 2022 16:11:24 +0800 Subject: [PATCH 14/36] update alert message api --- plugin/api/alerting/message.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugin/api/alerting/message.go b/plugin/api/alerting/message.go index 5ec3a730..1360f260 100644 --- a/plugin/api/alerting/message.go +++ b/plugin/api/alerting/message.go @@ -238,7 +238,7 @@ func (h *AlertAPI) getAlertMessage(w http.ResponseWriter, req *http.Request, ps "created": message.Created, "updated": message.Updated, "resource_name": rule.Resource.Name, - "resource_object": rule.Resource.Objects, + "resource_objects": rule.Resource.Objects, "conditions": rule.Conditions, "duration": duration.Milliseconds(), "ignored_time": message.IgnoredTime, From faf3d1be25ced80e5476416c9ba9b20ff86453e2 Mon Sep 17 00:00:00 2001 From: liugq Date: Tue, 24 May 2022 17:00:46 +0800 Subject: [PATCH 15/36] add first_group_value --- service/alerting/elasticsearch/engine.go | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/service/alerting/elasticsearch/engine.go b/service/alerting/elasticsearch/engine.go index 6351c9a3..543cde8c 100644 --- a/service/alerting/elasticsearch/engine.go +++ b/service/alerting/elasticsearch/engine.go @@ -742,8 +742,16 @@ func attachTitleMessageToCtx(rule *alerting.Rule, paramsCtx map[string]interface } func newParameterCtx(rule *alerting.Rule, checkResults *alerting.ConditionResult, eventID string, eventTimestamp interface{} ) map[string]interface{}{ - var conditionParams []util.MapStr - for _, resultItem := range checkResults.ResultItems { + var ( + conditionParams []util.MapStr + firstGroupValue string + firstPresetValue string + ) + for i, resultItem := range checkResults.ResultItems { + if i == 0 { + firstGroupValue = strings.Join(resultItem.GroupValues, ",") + firstPresetValue = strings.Join(resultItem.ConditionItem.Values, ",") + } conditionParams = append(conditionParams, util.MapStr{ alerting2.ParamPresetValue: resultItem.ConditionItem.Values, alerting2.Severity: resultItem.ConditionItem.Severity, @@ -760,6 +768,8 @@ func newParameterCtx(rule *alerting.Rule, checkResults *alerting.ConditionResult alerting2.ParamEventID: eventID, alerting2.ParamTimestamp: eventTimestamp, alerting2.ParamResults: conditionParams, + "first_group_value": firstGroupValue, + "first_preset_value": firstPresetValue, } return paramsCtx } From fecb25c71ecacfd0ffd6543151bc45d83c225962 Mon Sep 17 00:00:00 2001 From: liugq Date: Tue, 24 May 2022 17:15:52 +0800 Subject: [PATCH 16/36] add first_group_value --- service/alerting/elasticsearch/engine.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/service/alerting/elasticsearch/engine.go b/service/alerting/elasticsearch/engine.go index 543cde8c..ac1c3b9f 100644 --- a/service/alerting/elasticsearch/engine.go +++ b/service/alerting/elasticsearch/engine.go @@ -599,6 +599,7 @@ func (engine *Engine) Do(rule *alerting.Rule) error { } return nil }else{ + alertItem.State = alerting.AlertStateAlerting paramsCtx = newParameterCtx(rule, checkResults,alertItem.ID, alertItem.Created.Unix()) var ( severity = conditionResults[0].ConditionItem.Severity @@ -616,7 +617,6 @@ func (engine *Engine) Do(rule *alerting.Rule) error { alertItem.Severity = severity alertItem.Message = paramsCtx[alerting2.ParamMessage].(string) alertItem.Title = paramsCtx[alerting2.ParamTitle].(string) - alertItem.State = alerting.AlertStateAlerting if alertMessage == nil || alertMessage.Status == alerting.MessageStateRecovered { msg := &alerting.AlertMessage{ RuleID: rule.ID, From 3ce0621ef720f05a4300a2b6161c8cb11e29cacb Mon Sep 17 00:00:00 2001 From: liugq Date: Tue, 24 May 2022 20:28:48 +0800 Subject: [PATCH 17/36] rename preset_value to threshold --- service/alerting/constants.go | 10 +++++----- service/alerting/elasticsearch/engine.go | 10 +++++----- service/alerting/parameter.go | 2 +- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/service/alerting/constants.go b/service/alerting/constants.go index 80860944..c546ca8c 100644 --- a/service/alerting/constants.go +++ b/service/alerting/constants.go @@ -19,11 +19,11 @@ const ( ParamEventID = "event_id" // 检查事件 ID ParamResults = "results" // ParamMessage = "message" //检查消息 自定义(模版渲染) - ParamTitle = "title" - ParamPresetValue = "preset_value" //检查预设值 float64 - ParamResultValue = "result_value" //检查结果 {group_tags:["cluster-xxx", "node-xxx"], check_values:[]} - Severity = "severity" //告警等级 - ParamTimestamp = "timestamp" //事件产生时间戳 + ParamTitle = "title" + ParamThreshold = "threshold" //检查预设值 []string + ParamResultValue = "result_value" //检查结果 {group_tags:["cluster-xxx", "node-xxx"], check_values:[]} + Severity = "severity" //告警等级 + ParamTimestamp = "timestamp" //事件产生时间戳 ParamGroupValues = "group_values" ParamIssueTimestamp = "issue_timestamp" ParamRelationValues = "relation_values" diff --git a/service/alerting/elasticsearch/engine.go b/service/alerting/elasticsearch/engine.go index ac1c3b9f..6f73628e 100644 --- a/service/alerting/elasticsearch/engine.go +++ b/service/alerting/elasticsearch/engine.go @@ -745,15 +745,15 @@ func newParameterCtx(rule *alerting.Rule, checkResults *alerting.ConditionResult var ( conditionParams []util.MapStr firstGroupValue string - firstPresetValue string + firstThreshold string ) for i, resultItem := range checkResults.ResultItems { if i == 0 { firstGroupValue = strings.Join(resultItem.GroupValues, ",") - firstPresetValue = strings.Join(resultItem.ConditionItem.Values, ",") + firstThreshold = strings.Join(resultItem.ConditionItem.Values, ",") } conditionParams = append(conditionParams, util.MapStr{ - alerting2.ParamPresetValue: resultItem.ConditionItem.Values, + alerting2.ParamThreshold: resultItem.ConditionItem.Values, alerting2.Severity: resultItem.ConditionItem.Severity, alerting2.ParamGroupValues: resultItem.GroupValues, alerting2.ParamIssueTimestamp: resultItem.IssueTimestamp, @@ -768,8 +768,8 @@ func newParameterCtx(rule *alerting.Rule, checkResults *alerting.ConditionResult alerting2.ParamEventID: eventID, alerting2.ParamTimestamp: eventTimestamp, alerting2.ParamResults: conditionParams, - "first_group_value": firstGroupValue, - "first_preset_value": firstPresetValue, + "first_group_value": firstGroupValue, + "first_threshold": firstThreshold, } return paramsCtx } diff --git a/service/alerting/parameter.go b/service/alerting/parameter.go index 48c5d226..25ba5fcc 100644 --- a/service/alerting/parameter.go +++ b/service/alerting/parameter.go @@ -21,7 +21,7 @@ func GetTemplateParameters() []ParameterMeta { {ParamTitle, "string", "", "xxx cpu used 95%", nil}, {ParamMessage, "string", "", "disk used 90%", nil}, {ParamResults, "array", "", "", []ParameterMeta{ - {ParamPresetValue, "array", "", "[\"90\"]", nil}, + {ParamThreshold, "array", "", "[\"90\"]", nil}, {Severity, "string", "", "error", nil}, {ParamGroupValues, "array", "", "[\"cluster-xxx\", \"node-xxx\"]", nil}, {ParamIssueTimestamp, "date", "", "2022-05-11T11:50:55+08:00", nil}, From 61f025be9f1febbfc0442744614b037a948b6d8b Mon Sep 17 00:00:00 2001 From: liugq Date: Wed, 25 May 2022 11:19:56 +0800 Subject: [PATCH 18/36] update alerting ignore api --- plugin/api/alerting/message.go | 34 +++++++++++++++++++++++++++++----- 1 file changed, 29 insertions(+), 5 deletions(-) diff --git a/plugin/api/alerting/message.go b/plugin/api/alerting/message.go index 1360f260..69b0f0d4 100644 --- a/plugin/api/alerting/message.go +++ b/plugin/api/alerting/message.go @@ -8,8 +8,10 @@ import ( "fmt" log "github.com/cihub/seelog" "infini.sh/console/model/alerting" + alerting2 "infini.sh/console/service/alerting" httprouter "infini.sh/framework/core/api/router" "infini.sh/framework/core/elastic" + "infini.sh/framework/core/kv" "infini.sh/framework/core/orm" "infini.sh/framework/core/util" "net/http" @@ -20,7 +22,7 @@ import ( func (h *AlertAPI) ignoreAlertMessage(w http.ResponseWriter, req *http.Request, ps httprouter.Params) { body := struct { - MessageIDs []string `json:"ids"` + Messages []alerting.AlertMessage `json:"messages"` }{} err := h.DecodeJSON(req, &body) if err != nil { @@ -28,14 +30,31 @@ func (h *AlertAPI) ignoreAlertMessage(w http.ResponseWriter, req *http.Request, return } - if len(body.MessageIDs) == 0 { + if len(body.Messages) == 0 { h.WriteError(w, "alert ids should not be empty", http.StatusInternalServerError) return } + messageIDs := make([]string, 0, len(body.Messages)) + for _, msg := range body.Messages { + messageIDs = append(messageIDs, msg.ID) + } queryDsl := util.MapStr{ "query": util.MapStr{ - "terms": util.MapStr{ - "_id": body.MessageIDs, + "bool": util.MapStr{ + "must": []util.MapStr{ + { + "terms": util.MapStr{ + "_id": messageIDs, + }, + }, + { + "term": util.MapStr{ + "status": util.MapStr{ + "value": alerting.MessageStateAlerting, + }, + }, + }, + }, }, }, "script": util.MapStr{ @@ -48,9 +67,14 @@ func (h *AlertAPI) ignoreAlertMessage(w http.ResponseWriter, req *http.Request, log.Error(err) return } + //delete kv cache + for _, msg := range body.Messages { + _ = kv.DeleteKey(alerting2.KVLastMessageState, []byte(msg.RuleID)) + } + h.WriteJSON(w, util.MapStr{ - "ids": body.MessageIDs, + "ids": messageIDs, "result": "updated", }, 200) } From 0438651474d5d4a7d1876c9da28afd56f488bc7a Mon Sep 17 00:00:00 2001 From: liugq Date: Wed, 25 May 2022 12:58:04 +0800 Subject: [PATCH 19/36] update alerting ignore api --- plugin/api/alerting/message.go | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/plugin/api/alerting/message.go b/plugin/api/alerting/message.go index 69b0f0d4..fb57fe0b 100644 --- a/plugin/api/alerting/message.go +++ b/plugin/api/alerting/message.go @@ -67,9 +67,17 @@ func (h *AlertAPI) ignoreAlertMessage(w http.ResponseWriter, req *http.Request, log.Error(err) return } - //delete kv cache + //update kv cache for _, msg := range body.Messages { - _ = kv.DeleteKey(alerting2.KVLastMessageState, []byte(msg.RuleID)) + stateBytes, err := kv.GetValue(alerting2.KVLastMessageState, []byte(msg.RuleID)) + if err != nil && stateBytes != nil { + message := &alerting.AlertMessage{} + util.MustFromJSONBytes(stateBytes, message) + if message.Status == alerting.MessageStateAlerting { + message.Status = alerting.MessageStateIgnored + } + kv.AddValue(alerting2.KVLastMessageState, []byte(msg.RuleID), util.MustToJSONBytes(message)) + } } From 2c07a3f535f3587783377cd0fe67b02223950bc3 Mon Sep 17 00:00:00 2001 From: liugq Date: Wed, 25 May 2022 13:21:56 +0800 Subject: [PATCH 20/36] update alerting ignore api --- plugin/api/alerting/message.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugin/api/alerting/message.go b/plugin/api/alerting/message.go index fb57fe0b..00d13d7e 100644 --- a/plugin/api/alerting/message.go +++ b/plugin/api/alerting/message.go @@ -75,8 +75,8 @@ func (h *AlertAPI) ignoreAlertMessage(w http.ResponseWriter, req *http.Request, util.MustFromJSONBytes(stateBytes, message) if message.Status == alerting.MessageStateAlerting { message.Status = alerting.MessageStateIgnored + _ = kv.AddValue(alerting2.KVLastMessageState, []byte(msg.RuleID), util.MustToJSONBytes(message)) } - kv.AddValue(alerting2.KVLastMessageState, []byte(msg.RuleID), util.MustToJSONBytes(message)) } } From 8cfea1fc36d565d28858715ee2a1d12b7686bcde Mon Sep 17 00:00:00 2001 From: liugq Date: Wed, 25 May 2022 15:42:33 +0800 Subject: [PATCH 21/36] fixed alerting bug --- plugin/api/alerting/message.go | 14 ++------- service/alerting/elasticsearch/engine.go | 38 +++++++++++++----------- 2 files changed, 24 insertions(+), 28 deletions(-) diff --git a/plugin/api/alerting/message.go b/plugin/api/alerting/message.go index 00d13d7e..f9296ed1 100644 --- a/plugin/api/alerting/message.go +++ b/plugin/api/alerting/message.go @@ -31,7 +31,7 @@ func (h *AlertAPI) ignoreAlertMessage(w http.ResponseWriter, req *http.Request, } if len(body.Messages) == 0 { - h.WriteError(w, "alert ids should not be empty", http.StatusInternalServerError) + h.WriteError(w, "messages should not be empty", http.StatusInternalServerError) return } messageIDs := make([]string, 0, len(body.Messages)) @@ -67,17 +67,9 @@ func (h *AlertAPI) ignoreAlertMessage(w http.ResponseWriter, req *http.Request, log.Error(err) return } - //update kv cache + //delete kv cache for _, msg := range body.Messages { - stateBytes, err := kv.GetValue(alerting2.KVLastMessageState, []byte(msg.RuleID)) - if err != nil && stateBytes != nil { - message := &alerting.AlertMessage{} - util.MustFromJSONBytes(stateBytes, message) - if message.Status == alerting.MessageStateAlerting { - message.Status = alerting.MessageStateIgnored - _ = kv.AddValue(alerting2.KVLastMessageState, []byte(msg.RuleID), util.MustToJSONBytes(message)) - } - } + _ = kv.DeleteKey(alerting2.KVLastMessageState, []byte(msg.RuleID)) } diff --git a/service/alerting/elasticsearch/engine.go b/service/alerting/elasticsearch/engine.go index 6f73628e..b2f84df7 100644 --- a/service/alerting/elasticsearch/engine.go +++ b/service/alerting/elasticsearch/engine.go @@ -547,14 +547,18 @@ func (engine *Engine) Do(rule *alerting.Rule) error { } } if alertItem != nil { - for _, actionResult := range alertItem.ActionExecutionResults { - if actionResult.Error != "" { - alertItem.Error = actionResult.Error + if err != nil{ + alertItem.State = alerting.AlertStateError + alertItem.Error = err.Error() + }else { + for _, actionResult := range alertItem.ActionExecutionResults { + if actionResult.Error != "" { + alertItem.Error = actionResult.Error + alertItem.State = alerting.AlertStateError + } } } - if alertItem.Error != ""{ - alertItem.State = alerting.AlertStateError - } + err = orm.Save(alertItem) if err != nil { log.Error(err) @@ -976,19 +980,19 @@ func getLastAlertMessage(ruleID string, duration time.Duration) (*alerting.Alert if err != nil { return nil, err } - if messageBytes == nil { - return nil, nil - } message := &alerting.AlertMessage{} - err = util.FromJSONBytes(messageBytes, message) - if err != nil { - return nil, err + if messageBytes != nil { + + err = util.FromJSONBytes(messageBytes, message) + if err != nil { + return nil, err + } + if time.Now().Sub(message.Updated) <= duration { + return message, nil + } } - if time.Now().Sub(message.Updated) > duration { - err = getLastAlertMessageFromES(ruleID, message) - return message, err - } - return message, nil + err = getLastAlertMessageFromES(ruleID, message) + return message, err } func saveAlertMessageToES(message *alerting.AlertMessage) error { From e1471378ce44a869d3adb56cbdcb49ce3cb72830 Mon Sep 17 00:00:00 2001 From: liugq Date: Wed, 25 May 2022 15:48:03 +0800 Subject: [PATCH 22/36] fixed alerting bug --- service/alerting/elasticsearch/engine.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/service/alerting/elasticsearch/engine.go b/service/alerting/elasticsearch/engine.go index b2f84df7..df6efca0 100644 --- a/service/alerting/elasticsearch/engine.go +++ b/service/alerting/elasticsearch/engine.go @@ -495,7 +495,7 @@ func (engine *Engine) CheckCondition(rule *alerting.Rule)(*alerting.ConditionRes "result": targetData.Data[dataKey][i][1], }) if err != nil { - return nil, err + return nil, fmt.Errorf("evaluate rule [%s] error: %w", rule.ID, err) } if evaluateResult == true { triggerCount += 1 From e72f0e940a7bf4817f93212fc4433ca10dfbc544 Mon Sep 17 00:00:00 2001 From: liugq Date: Wed, 25 May 2022 16:40:07 +0800 Subject: [PATCH 23/36] fixed alerting bug --- service/alerting/elasticsearch/engine.go | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/service/alerting/elasticsearch/engine.go b/service/alerting/elasticsearch/engine.go index df6efca0..62d1c708 100644 --- a/service/alerting/elasticsearch/engine.go +++ b/service/alerting/elasticsearch/engine.go @@ -943,7 +943,7 @@ func collectMetricData(agg interface{}, groupValues string, metricData *[]alerti } } -func getLastAlertMessageFromES(ruleID string, message *alerting.AlertMessage) error { +func getLastAlertMessageFromES(ruleID string) (*alerting.AlertMessage, error) { queryDsl := util.MapStr{ "size": 1, "sort": []util.MapStr{ @@ -966,13 +966,15 @@ func getLastAlertMessageFromES(ruleID string, message *alerting.AlertMessage) e } err, searchResult := orm.Search(alerting.AlertMessage{}, &q ) if err != nil { - return err + return nil, err } if len(searchResult.Result) == 0 { - return nil + return nil, nil } messageBytes := util.MustToJSONBytes(searchResult.Result[0]) - return util.FromJSONBytes(messageBytes, message) + message := &alerting.AlertMessage{} + err = util.FromJSONBytes(messageBytes, message) + return message, err } func getLastAlertMessage(ruleID string, duration time.Duration) (*alerting.AlertMessage, error ){ @@ -991,7 +993,7 @@ func getLastAlertMessage(ruleID string, duration time.Duration) (*alerting.Alert return message, nil } } - err = getLastAlertMessageFromES(ruleID, message) + message, err = getLastAlertMessageFromES(ruleID) return message, err } From 9bb29fba12d5fbfd931d011ebf6c98dde6497d7f Mon Sep 17 00:00:00 2001 From: liugq Date: Wed, 25 May 2022 16:48:30 +0800 Subject: [PATCH 24/36] fixed alerting bug --- service/alerting/elasticsearch/engine.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/service/alerting/elasticsearch/engine.go b/service/alerting/elasticsearch/engine.go index 62d1c708..722cd1ee 100644 --- a/service/alerting/elasticsearch/engine.go +++ b/service/alerting/elasticsearch/engine.go @@ -495,7 +495,7 @@ func (engine *Engine) CheckCondition(rule *alerting.Rule)(*alerting.ConditionRes "result": targetData.Data[dataKey][i][1], }) if err != nil { - return nil, fmt.Errorf("evaluate rule [%s] error: %w", rule.ID, err) + return conditionResult, fmt.Errorf("evaluate rule [%s] error: %w", rule.ID, err) } if evaluateResult == true { triggerCount += 1 From f39ddacb0f7de417de33ceccfcf2b2f2aa8d4e35 Mon Sep 17 00:00:00 2001 From: liugq Date: Wed, 25 May 2022 17:39:35 +0800 Subject: [PATCH 25/36] add rule name --- model/alerting/rule.go | 2 +- service/alerting/elasticsearch/engine.go | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/model/alerting/rule.go b/model/alerting/rule.go index c6e4c223..e676c61d 100644 --- a/model/alerting/rule.go +++ b/model/alerting/rule.go @@ -13,7 +13,7 @@ type Rule struct { ID string `json:"id,omitempty" elastic_meta:"_id" elastic_mapping:"id: { type: keyword }"` Created time.Time `json:"created,omitempty" elastic_mapping:"created: { type: date }"` Updated time.Time `json:"updated,omitempty" elastic_mapping:"updated: { type: date }"` - //Name string `json:"name" elastic_mapping:"name:{type:keyword,copy_to:search_text}"` + Name string `json:"name" elastic_mapping:"name:{type:keyword,copy_to:search_text}"` Enabled bool `json:"enabled" elastic_mapping:"enabled:{type:keyword}"` Resource Resource `json:"resource" elastic_mapping:"resource:{type:object}"` Metrics Metric `json:"metrics" elastic_mapping:"metrics:{type:object}"` diff --git a/service/alerting/elasticsearch/engine.go b/service/alerting/elasticsearch/engine.go index 722cd1ee..b19b4cfb 100644 --- a/service/alerting/elasticsearch/engine.go +++ b/service/alerting/elasticsearch/engine.go @@ -486,6 +486,10 @@ func (engine *Engine) CheckCondition(rule *alerting.Rule)(*alerting.ConditionRes } triggerCount := 0 for i := 0; i < dataLength; i++ { + //clear nil value + if targetData.Data[dataKey][i][1] == nil { + continue + } if r, ok := targetData.Data[dataKey][i][1].(float64); ok { if math.IsNaN(r){ continue From e352cb7fa1a54172e40c5f43eeaa8e48015ae363 Mon Sep 17 00:00:00 2001 From: liugq Date: Wed, 25 May 2022 19:03:48 +0800 Subject: [PATCH 26/36] add rule name --- plugin/api/alerting/message.go | 1 + plugin/api/alerting/rule.go | 1 + 2 files changed, 2 insertions(+) diff --git a/plugin/api/alerting/message.go b/plugin/api/alerting/message.go index f9296ed1..25ea5c5d 100644 --- a/plugin/api/alerting/message.go +++ b/plugin/api/alerting/message.go @@ -256,6 +256,7 @@ func (h *AlertAPI) getAlertMessage(w http.ResponseWriter, req *http.Request, ps detailObj := util.MapStr{ "message_id": message.ID, "rule_id": message.RuleID, + "rule_name": rule.Name, "title": message.Title, "message": message.Message, "severity": message.Severity, diff --git a/plugin/api/alerting/rule.go b/plugin/api/alerting/rule.go index f0ecb1b8..6cc054be 100644 --- a/plugin/api/alerting/rule.go +++ b/plugin/api/alerting/rule.go @@ -182,6 +182,7 @@ func (alertAPI *AlertAPI) getRuleDetail(w http.ResponseWriter, req *http.Request } detailObj := util.MapStr{ + "rule_name": obj.Name, "resource_name": obj.Resource.Name, "resource_objects": obj.Resource.Objects, "period_interval": obj.Metrics.PeriodInterval, //统计周期 From 8d7ecd1fc3c6364fb8d04f8127ab8c2496642947 Mon Sep 17 00:00:00 2001 From: liugq Date: Wed, 25 May 2022 20:35:20 +0800 Subject: [PATCH 27/36] copy rule name to alert record --- model/alerting/alert.go | 1 + service/alerting/elasticsearch/engine.go | 2 ++ 2 files changed, 3 insertions(+) diff --git a/model/alerting/alert.go b/model/alerting/alert.go index 8ff4b6d7..10b3421c 100644 --- a/model/alerting/alert.go +++ b/model/alerting/alert.go @@ -13,6 +13,7 @@ type Alert struct { Created time.Time `json:"created,omitempty" elastic_mapping:"created: { type: date }"` Updated time.Time `json:"updated,omitempty" elastic_mapping:"updated: { type: date }"` RuleID string `json:"rule_id" elastic_mapping:"rule_id: { type: keyword }"` + RuleName string `json:"rule_name" elastic_mapping:"rule_name: { type: keyword }"` ResourceID string `json:"resource_id" elastic_mapping:"resource_id: { type: keyword }"` ResourceName string `json:"resource_name" elastic_mapping:"resource_name: { type: keyword }"` Expression string `json:"expression" elastic_mapping:"expression: { type: keyword, copy_to:search_text }"` diff --git a/service/alerting/elasticsearch/engine.go b/service/alerting/elasticsearch/engine.go index b19b4cfb..b30b2dcc 100644 --- a/service/alerting/elasticsearch/engine.go +++ b/service/alerting/elasticsearch/engine.go @@ -542,6 +542,7 @@ func (engine *Engine) Do(rule *alerting.Rule) error { Created: time.Now(), Updated: time.Now(), RuleID: rule.ID, + RuleName: rule.Name, ResourceID: rule.Resource.ID, ResourceName: rule.Resource.Name, Expression: rule.Metrics.Expression, @@ -576,6 +577,7 @@ func (engine *Engine) Do(rule *alerting.Rule) error { Created: time.Now(), Updated: time.Now(), RuleID: rule.ID, + RuleName: rule.Name, ResourceID: rule.Resource.ID, ResourceName: rule.Resource.Name, Expression: rule.Metrics.Expression, From c773e7b9e8686f803a00b2d5bdbf4c82e79d3935 Mon Sep 17 00:00:00 2001 From: liugq Date: Wed, 25 May 2022 20:54:13 +0800 Subject: [PATCH 28/36] add template param rule_name --- service/alerting/elasticsearch/engine.go | 1 + 1 file changed, 1 insertion(+) diff --git a/service/alerting/elasticsearch/engine.go b/service/alerting/elasticsearch/engine.go index b30b2dcc..af2807ad 100644 --- a/service/alerting/elasticsearch/engine.go +++ b/service/alerting/elasticsearch/engine.go @@ -780,6 +780,7 @@ func newParameterCtx(rule *alerting.Rule, checkResults *alerting.ConditionResult alerting2.ParamResults: conditionParams, "first_group_value": firstGroupValue, "first_threshold": firstThreshold, + "rule_name": rule.Name, } return paramsCtx } From 310497242b52f3e20843349c20e6e354ce88e0a4 Mon Sep 17 00:00:00 2001 From: liugq Date: Thu, 26 May 2022 12:11:20 +0800 Subject: [PATCH 29/36] auth api --- plugin/api/alerting/api.go | 2 +- plugin/api/alerting/channel.go | 37 +++++++++++++++--------- plugin/api/index_management/indices.go | 15 ++++++++-- plugin/api/init.go | 2 +- service/alerting/elasticsearch/engine.go | 1 + 5 files changed, 40 insertions(+), 17 deletions(-) diff --git a/plugin/api/alerting/api.go b/plugin/api/alerting/api.go index 5ab8f2d5..e9fc7ba3 100644 --- a/plugin/api/alerting/api.go +++ b/plugin/api/alerting/api.go @@ -31,7 +31,7 @@ func (alert *AlertAPI) Init() { api.HandleAPIMethod(api.GET, "/alerting/channel/:channel_id", alert.RequirePermission(alert.getChannel, enum.PermissionAlertChannelRead)) api.HandleAPIMethod(api.POST, "/alerting/channel", alert.RequirePermission(alert.createChannel, enum.PermissionAlertChannelWrite)) - api.HandleAPIMethod(api.DELETE, "/alerting/channel/:channel_id", alert.RequirePermission(alert.deleteChannel, enum.PermissionAlertChannelWrite)) + api.HandleAPIMethod(api.DELETE, "/alerting/channel", alert.RequirePermission(alert.deleteChannel, enum.PermissionAlertChannelWrite)) api.HandleAPIMethod(api.PUT, "/alerting/channel/:channel_id", alert.RequirePermission(alert.updateChannel, enum.PermissionAlertChannelWrite)) api.HandleAPIMethod(api.GET, "/alerting/channel/_search", alert.RequirePermission(alert.searchChannel, enum.PermissionAlertChannelRead)) diff --git a/plugin/api/alerting/channel.go b/plugin/api/alerting/channel.go index c372954d..f38a5fa0 100644 --- a/plugin/api/alerting/channel.go +++ b/plugin/api/alerting/channel.go @@ -107,21 +107,32 @@ func (h *AlertAPI) updateChannel(w http.ResponseWriter, req *http.Request, ps ht } func (h *AlertAPI) deleteChannel(w http.ResponseWriter, req *http.Request, ps httprouter.Params) { - id := ps.MustGetParameter("channel_id") - - obj := alerting.Channel{} - obj.ID = id - - exists, err := orm.Get(&obj) - if !exists || err != nil { - h.WriteJSON(w, util.MapStr{ - "_id": id, - "result": "not_found", - }, http.StatusNotFound) + reqBody := struct { + ChannelIDs []string `json:"ids"` + }{} + err := h.DecodeJSON(req, &reqBody) + if err != nil { + h.WriteError(w, err.Error(), http.StatusInternalServerError) + log.Error(err) return } + if len(reqBody.ChannelIDs) == 0 { + if err != nil { + h.WriteError(w, "channel ids required", http.StatusInternalServerError) + log.Error(err) + return + } + } - err = orm.Delete(&obj) + queryDsl := util.MapStr{ + "query": util.MapStr{ + "terms": util.MapStr{ + "id": reqBody.ChannelIDs, + }, + }, + } + + err = orm.DeleteBy(alerting.Channel{}, util.MustToJSONBytes(queryDsl)) if err != nil { h.WriteError(w, err.Error(), http.StatusInternalServerError) log.Error(err) @@ -129,7 +140,7 @@ func (h *AlertAPI) deleteChannel(w http.ResponseWriter, req *http.Request, ps ht } h.WriteJSON(w, util.MapStr{ - "_id": obj.ID, + "ids": reqBody.ChannelIDs , "result": "deleted", }, 200) } diff --git a/plugin/api/index_management/indices.go b/plugin/api/index_management/indices.go index 6ae775d6..227668e3 100644 --- a/plugin/api/index_management/indices.go +++ b/plugin/api/index_management/indices.go @@ -1,11 +1,12 @@ package index_management import ( + log "github.com/cihub/seelog" httprouter "infini.sh/framework/core/api/router" "infini.sh/framework/core/elastic" "infini.sh/framework/core/util" "net/http" - log "github.com/cihub/seelog" + "strings" ) func (handler APIHandler) HandleGetMappingsAction(w http.ResponseWriter, req *http.Request, ps httprouter.Params) { @@ -39,7 +40,17 @@ func (handler APIHandler) HandleGetMappingsAction(w http.ResponseWriter, req *ht func (handler APIHandler) HandleGetIndicesAction(w http.ResponseWriter, req *http.Request, ps httprouter.Params) { targetClusterID := ps.ByName("id") client := elastic.GetClient(targetClusterID) - catIndices, err := client.GetIndices("") + //filter indices + allowedIndices, hasAllPrivilege := handler.GetAllowedIndices(req, targetClusterID) + if !hasAllPrivilege && len(allowedIndices) == 0 { + handler.WriteJSON(w, []interface{}{} , http.StatusOK) + return + } + strIndices := "" + if !hasAllPrivilege { + strIndices = strings.Join(allowedIndices, ",") + } + catIndices, err := client.GetIndices(strIndices) resBody := util.MapStr{} if err != nil { log.Error(err) diff --git a/plugin/api/init.go b/plugin/api/init.go index c69abd01..5670da83 100644 --- a/plugin/api/init.go +++ b/plugin/api/init.go @@ -35,7 +35,7 @@ func Init(cfg *config.AppConfig) { api.HandleAPIMethod(api.GET, path.Join(pathPrefix, "rebuild/_search"), handler.HandleGetRebuildListAction) api.HandleAPIMethod(api.DELETE, path.Join(pathPrefix, "rebuild/:id"), handler.HandleDeleteRebuildAction) - api.HandleAPIMethod(api.GET, path.Join(esPrefix, "_cat/indices"), handler.HandleGetIndicesAction) + api.HandleAPIMethod(api.GET, path.Join(esPrefix, "_cat/indices"), handler.RequireLogin(handler.HandleGetIndicesAction)) api.HandleAPIMethod(api.GET, path.Join(esPrefix, "index/:index/_mappings"), handler.HandleGetMappingsAction) api.HandleAPIMethod(api.GET, path.Join(esPrefix, "index/:index/_settings"), handler.HandleGetSettingsAction) api.HandleAPIMethod(api.PUT, path.Join(esPrefix, "index/:index/_settings"),handler.HandleUpdateSettingsAction) diff --git a/service/alerting/elasticsearch/engine.go b/service/alerting/elasticsearch/engine.go index af2807ad..d1d22250 100644 --- a/service/alerting/elasticsearch/engine.go +++ b/service/alerting/elasticsearch/engine.go @@ -45,6 +45,7 @@ func (engine *Engine) GenerateQuery(rule *alerting.Rule, filterParam *alerting.F return nil, fmt.Errorf("metric items should not be empty") } basicAggs := util.MapStr{} + //todo bucket sort (es 6.1) bucket script (es 2.0) for _, metricItem := range rule.Metrics.Items { metricAggs := engine.generateAgg(&metricItem) if err = util.MergeFields(basicAggs, metricAggs, true); err != nil { From 30ec0b6bfbbe97d0cdac3d192f77c36ba959e896 Mon Sep 17 00:00:00 2001 From: liugq Date: Thu, 26 May 2022 16:41:34 +0800 Subject: [PATCH 30/36] filter data --- plugin/api/index_management/elasticsearch.go | 52 ++++++++++++++------ 1 file changed, 37 insertions(+), 15 deletions(-) diff --git a/plugin/api/index_management/elasticsearch.go b/plugin/api/index_management/elasticsearch.go index 4356c13d..345898f1 100644 --- a/plugin/api/index_management/elasticsearch.go +++ b/plugin/api/index_management/elasticsearch.go @@ -25,8 +25,24 @@ func (handler APIHandler) ElasticsearchOverviewAction(w http.ResponseWriter, req // return true //}) esClient := elastic.GetClient(handler.Config.Elasticsearch) + queryDsl := util.MapStr{ + "size": 100, + } + clusterFilter, hasAllPrivilege := handler.GetClusterFilter(req, "_id") + if !hasAllPrivilege && clusterFilter == nil{ + handler.WriteJSON(w, util.MapStr{ + "nodes_count": 0, + "cluster_count":0, + "total_used_store_in_bytes": 0, + "hosts_count": 0, + }, http.StatusOK) + return + } + if !hasAllPrivilege { + queryDsl["query"] = clusterFilter + } - searchRes, err := esClient.SearchWithRawQueryDSL(orm.GetIndexName(elastic.ElasticsearchConfig{}), nil) + searchRes, err := esClient.SearchWithRawQueryDSL(orm.GetIndexName(elastic.ElasticsearchConfig{}), util.MustToJSONBytes(queryDsl)) if err != nil { log.Error(err) handler.WriteJSON(w, util.MapStr{ @@ -64,11 +80,11 @@ func (handler APIHandler) ElasticsearchOverviewAction(w http.ResponseWriter, req } } - hostCount, err := handler.getMetricCount(orm.GetIndexName(elastic.NodeConfig{}), "metadata.host") + hostCount, err := handler.getMetricCount(orm.GetIndexName(elastic.NodeConfig{}), "metadata.host", clusterIDs) if err != nil{ log.Error(err) } - nodeCount, err := handler.getMetricCount(orm.GetIndexName(elastic.NodeConfig{}), "id") + nodeCount, err := handler.getMetricCount(orm.GetIndexName(elastic.NodeConfig{}), "id", clusterIDs) if err != nil{ log.Error(err) } @@ -130,20 +146,26 @@ func (handler APIHandler) getLatestClusterMonitorData(clusterIDs []interface{}) } -func (handler APIHandler) getMetricCount(indexName, field string) (interface{}, error){ +func (handler APIHandler) getMetricCount(indexName, field string, clusterIDs []interface{}) (interface{}, error){ client := elastic.GetClient(handler.Config.Elasticsearch) - queryDSL := `{ + queryDSL := util.MapStr{ "size": 0, - "aggs": { - "field_count": { - "cardinality": { - "field": "%s" - } - } - } -}` - queryDSL = fmt.Sprintf(queryDSL, field) - searchRes, err := client.SearchWithRawQueryDSL(indexName, []byte(queryDSL)) + "aggs": util.MapStr{ + "field_count": util.MapStr{ + "cardinality": util.MapStr{ + "field": field, + }, + }, + }, +} + if len(clusterIDs) > 0 { + queryDSL["query"] = util.MapStr{ + "terms": util.MapStr{ + "metadata.cluster_id": clusterIDs, + }, + } + } + searchRes, err := client.SearchWithRawQueryDSL(indexName, util.MustToJSONBytes(queryDSL)) if err != nil { log.Error(err) return 0, err From 5069e4cf3978206f99b0e473859dc2e365458707 Mon Sep 17 00:00:00 2001 From: liugq Date: Thu, 26 May 2022 17:26:43 +0800 Subject: [PATCH 31/36] add template param severity --- service/alerting/elasticsearch/engine.go | 122 ++++++++++++++--------- 1 file changed, 74 insertions(+), 48 deletions(-) diff --git a/service/alerting/elasticsearch/engine.go b/service/alerting/elasticsearch/engine.go index d1d22250..338936dc 100644 --- a/service/alerting/elasticsearch/engine.go +++ b/service/alerting/elasticsearch/engine.go @@ -609,50 +609,55 @@ func (engine *Engine) Do(rule *alerting.Rule) error { } } return nil - }else{ - alertItem.State = alerting.AlertStateAlerting - paramsCtx = newParameterCtx(rule, checkResults,alertItem.ID, alertItem.Created.Unix()) - var ( - severity = conditionResults[0].ConditionItem.Severity - ) - err = attachTitleMessageToCtx(rule, paramsCtx) - if err != nil { - return err - } - for _, conditionResult := range conditionResults { - if alerting.SeverityWeights[severity] < alerting.SeverityWeights[conditionResult.ConditionItem.Severity] { - severity = conditionResult.ConditionItem.Severity - } - } - - alertItem.Severity = severity - alertItem.Message = paramsCtx[alerting2.ParamMessage].(string) - alertItem.Title = paramsCtx[alerting2.ParamTitle].(string) - if alertMessage == nil || alertMessage.Status == alerting.MessageStateRecovered { - msg := &alerting.AlertMessage{ - RuleID: rule.ID, - Created: time.Now(), - Updated: time.Now(), - ID: util.GetUUID(), - Status: alerting.MessageStateAlerting, - Severity: severity, - Title: alertItem.Title, - Message: alertItem.Message, - } - err = saveAlertMessage(msg) - if err != nil { - return fmt.Errorf("save alert message error: %w", err) - } - }else{ - alertMessage.Title = alertItem.Title - alertMessage.Message = alertItem.Message - err = saveAlertMessage(alertMessage) - if err != nil { - return fmt.Errorf("save alert message error: %w", err) - } - } - log.Debugf("check condition result of rule %s is %v", conditionResults, rule.ID ) } + alertItem.State = alerting.AlertStateAlerting + + var ( + severity = conditionResults[0].ConditionItem.Severity + ) + for _, conditionResult := range conditionResults { + if alerting.SeverityWeights[severity] < alerting.SeverityWeights[conditionResult.ConditionItem.Severity] { + severity = conditionResult.ConditionItem.Severity + } + } + paramsCtx = newParameterCtx(rule, checkResults, util.MapStr{ + alerting2.ParamEventID: alertItem.ID, + alerting2.ParamTimestamp: alertItem.Created.Unix(), + "severity": severity, + }) + + alertItem.Severity = severity + err = attachTitleMessageToCtx(rule, paramsCtx) + if err != nil { + return err + } + alertItem.Message = paramsCtx[alerting2.ParamMessage].(string) + alertItem.Title = paramsCtx[alerting2.ParamTitle].(string) + if alertMessage == nil || alertMessage.Status == alerting.MessageStateRecovered { + msg := &alerting.AlertMessage{ + RuleID: rule.ID, + Created: time.Now(), + Updated: time.Now(), + ID: util.GetUUID(), + Status: alerting.MessageStateAlerting, + Severity: severity, + Title: alertItem.Title, + Message: alertItem.Message, + } + err = saveAlertMessage(msg) + if err != nil { + return fmt.Errorf("save alert message error: %w", err) + } + }else{ + alertMessage.Title = alertItem.Title + alertMessage.Message = alertItem.Message + err = saveAlertMessage(alertMessage) + if err != nil { + return fmt.Errorf("save alert message error: %w", err) + } + } + log.Debugf("check condition result of rule %s is %v", conditionResults, rule.ID ) + // if alert message status equals ignored , then skip sending message to channel if alertMessage != nil && alertMessage.Status == alerting.MessageStateIgnored { return nil @@ -681,7 +686,11 @@ func (engine *Engine) Do(rule *alerting.Rule) error { //log.Error(lastAlertItem.ID, period, periodDuration) if paramsCtx == nil { - paramsCtx = newParameterCtx(rule, checkResults,alertItem.ID, alertItem.Created.Unix()) + paramsCtx = newParameterCtx(rule, checkResults, util.MapStr{ + alerting2.ParamEventID: alertItem.ID, + alerting2.ParamTimestamp: alertItem.Created.Unix(), + "severity": severity, + }) } if alertMessage == nil || period > periodDuration { @@ -752,7 +761,7 @@ func attachTitleMessageToCtx(rule *alerting.Rule, paramsCtx map[string]interface return nil } -func newParameterCtx(rule *alerting.Rule, checkResults *alerting.ConditionResult, eventID string, eventTimestamp interface{} ) map[string]interface{}{ +func newParameterCtx(rule *alerting.Rule, checkResults *alerting.ConditionResult, extraParams map[string]interface{} ) map[string]interface{}{ var ( conditionParams []util.MapStr firstGroupValue string @@ -776,13 +785,15 @@ func newParameterCtx(rule *alerting.Rule, checkResults *alerting.ConditionResult alerting2.ParamRuleID: rule.ID, alerting2.ParamResourceID: rule.Resource.ID, alerting2.ParamResourceName: rule.Resource.Name, - alerting2.ParamEventID: eventID, - alerting2.ParamTimestamp: eventTimestamp, alerting2.ParamResults: conditionParams, "first_group_value": firstGroupValue, "first_threshold": firstThreshold, "rule_name": rule.Name, } + err := util.MergeFields(paramsCtx, extraParams, true) + if err != nil { + log.Errorf("merge template params error: %v", err) + } return paramsCtx } @@ -792,7 +803,22 @@ func (engine *Engine) Test(rule *alerting.Rule) ([]alerting.ActionExecutionResul return nil, fmt.Errorf("check condition error:%w", err) } var actionResults []alerting.ActionExecutionResult - paramsCtx := newParameterCtx(rule, checkResults, util.GetUUID(), time.Now().Unix()) + var ( + severity = "warning" + ) + if len(checkResults.ResultItems) > 0 { + for _, conditionResult := range checkResults.ResultItems { + if alerting.SeverityWeights[severity] < alerting.SeverityWeights[conditionResult.ConditionItem.Severity] { + severity = conditionResult.ConditionItem.Severity + } + } + } + + paramsCtx := newParameterCtx(rule, checkResults,util.MapStr{ + alerting2.ParamEventID: util.GetUUID(), + alerting2.ParamTimestamp: time.Now().Unix(), + "severity": severity, + } ) err = attachTitleMessageToCtx(rule, paramsCtx) if err != nil { return nil, err From 1d8cd167742a5f759ff1ce77229a3cdb09fc4101 Mon Sep 17 00:00:00 2001 From: liugq Date: Thu, 26 May 2022 18:25:36 +0800 Subject: [PATCH 32/36] sort condition results by severity, result_value --- service/alerting/elasticsearch/engine.go | 33 +++++++++++++++--------- 1 file changed, 21 insertions(+), 12 deletions(-) diff --git a/service/alerting/elasticsearch/engine.go b/service/alerting/elasticsearch/engine.go index 338936dc..be461b88 100644 --- a/service/alerting/elasticsearch/engine.go +++ b/service/alerting/elasticsearch/engine.go @@ -623,7 +623,6 @@ func (engine *Engine) Do(rule *alerting.Rule) error { paramsCtx = newParameterCtx(rule, checkResults, util.MapStr{ alerting2.ParamEventID: alertItem.ID, alerting2.ParamTimestamp: alertItem.Created.Unix(), - "severity": severity, }) alertItem.Severity = severity @@ -766,8 +765,28 @@ func newParameterCtx(rule *alerting.Rule, checkResults *alerting.ConditionResult conditionParams []util.MapStr firstGroupValue string firstThreshold string + severity string ) + if len(checkResults.ResultItems) > 0 { + severity = checkResults.ResultItems[0].ConditionItem.Severity + sort.Slice(checkResults.ResultItems, func(i, j int) bool { + if alerting.SeverityWeights[checkResults.ResultItems[i].ConditionItem.Severity] > alerting.SeverityWeights[checkResults.ResultItems[j].ConditionItem.Severity] { + return true + } + return false + }) + sort.Slice(checkResults.ResultItems, func(i, j int) bool { + if vi, ok := checkResults.ResultItems[i].ResultValue.(float64); ok { + if vj, ok := checkResults.ResultItems[j].ResultValue.(float64); ok { + return vi > vj + } + } + return false + }) + } + for i, resultItem := range checkResults.ResultItems { + if i == 0 { firstGroupValue = strings.Join(resultItem.GroupValues, ",") firstThreshold = strings.Join(resultItem.ConditionItem.Values, ",") @@ -789,6 +808,7 @@ func newParameterCtx(rule *alerting.Rule, checkResults *alerting.ConditionResult "first_group_value": firstGroupValue, "first_threshold": firstThreshold, "rule_name": rule.Name, + "severity": severity, } err := util.MergeFields(paramsCtx, extraParams, true) if err != nil { @@ -803,21 +823,10 @@ func (engine *Engine) Test(rule *alerting.Rule) ([]alerting.ActionExecutionResul return nil, fmt.Errorf("check condition error:%w", err) } var actionResults []alerting.ActionExecutionResult - var ( - severity = "warning" - ) - if len(checkResults.ResultItems) > 0 { - for _, conditionResult := range checkResults.ResultItems { - if alerting.SeverityWeights[severity] < alerting.SeverityWeights[conditionResult.ConditionItem.Severity] { - severity = conditionResult.ConditionItem.Severity - } - } - } paramsCtx := newParameterCtx(rule, checkResults,util.MapStr{ alerting2.ParamEventID: util.GetUUID(), alerting2.ParamTimestamp: time.Now().Unix(), - "severity": severity, } ) err = attachTitleMessageToCtx(rule, paramsCtx) if err != nil { From e9fdb7ab2251294f4398052a7cbce6f7fe551745 Mon Sep 17 00:00:00 2001 From: liugq Date: Fri, 27 May 2022 18:35:57 +0800 Subject: [PATCH 33/36] add alert nodata status --- model/alerting/alert.go | 1 + model/alerting/metric.go | 1 + service/alerting/elasticsearch/engine.go | 146 ++++++++++++++++------- 3 files changed, 104 insertions(+), 44 deletions(-) diff --git a/model/alerting/alert.go b/model/alerting/alert.go index 10b3421c..2307b888 100644 --- a/model/alerting/alert.go +++ b/model/alerting/alert.go @@ -46,6 +46,7 @@ const ( AlertStateAlerting string = "alerting" AlertStateOK = "ok" AlertStateError = "error" + AlertStateNodata = "nodata" ) const ( diff --git a/model/alerting/metric.go b/model/alerting/metric.go index 86f7979f..5336d628 100644 --- a/model/alerting/metric.go +++ b/model/alerting/metric.go @@ -52,6 +52,7 @@ type QueryResult struct { Query string `json:"query"` Raw string `json:"raw"` MetricData []MetricData `json:"metric_data"` + Nodata bool `json:"nodata"` } type MetricData struct { diff --git a/service/alerting/elasticsearch/engine.go b/service/alerting/elasticsearch/engine.go index be461b88..7e5eeb2b 100644 --- a/service/alerting/elasticsearch/engine.go +++ b/service/alerting/elasticsearch/engine.go @@ -41,6 +41,10 @@ func (engine *Engine) GenerateQuery(rule *alerting.Rule, filterParam *alerting.F if err != nil { return nil, err } + timeFilter, err := engine.generateTimeFilter(rule, filterParam) + if err != nil { + return nil, err + } if len(rule.Metrics.Items) == 0 { return nil, fmt.Errorf("metric items should not be empty") } @@ -52,6 +56,14 @@ func (engine *Engine) GenerateQuery(rule *alerting.Rule, filterParam *alerting.F return nil, err } } + if len(filter) > 0 { + basicAggs = util.MapStr{ + "filter_agg": util.MapStr{ + "filter": filter, + "aggs": basicAggs, + }, + } + } targetESVersion := elastic.GetMetadata(rule.Resource.ID).Config.Version intervalField, err := elastic.GetDateHistogramIntervalField(targetESVersion) if err != nil { @@ -95,9 +107,7 @@ func (engine *Engine) GenerateQuery(rule *alerting.Rule, filterParam *alerting.F "time_buckets": timeAggs, } } - lastGroupAgg = groupAgg - } rootAggs = util.MapStr{ util.GetUUID(): lastGroupAgg, @@ -110,7 +120,7 @@ func (engine *Engine) GenerateQuery(rule *alerting.Rule, filterParam *alerting.F return util.MapStr{ "size": 0, - "query": filter, + "query": timeFilter, "aggs": rootAggs, }, nil } @@ -263,19 +273,7 @@ func (engine *Engine) ConvertFilterQueryToDsl(fq *alerting.FilterQuery) (map[str return resultQuery, nil } -func (engine *Engine) GenerateRawFilter(rule *alerting.Rule, filterParam *alerting.FilterParam) (map[string]interface{}, error) { - query := map[string]interface{}{} - var err error - if rule.Resource.RawFilter != nil { - query = util.DeepCopy(rule.Resource.RawFilter).(map[string]interface{}) - }else{ - if !rule.Resource.Filter.IsEmpty(){ - query, err = engine.ConvertFilterQueryToDsl(&rule.Resource.Filter) - if err != nil { - return nil, err - } - } - } +func (engine *Engine) generateTimeFilter(rule *alerting.Rule, filterParam *alerting.FilterParam) (map[string]interface{}, error){ var ( timeStart interface{} timeEnd interface{} @@ -324,36 +322,56 @@ func (engine *Engine) GenerateRawFilter(rule *alerting.Rule, filterParam *alerti }, }, } + return timeQuery, nil +} - if boolQ, ok := query["bool"].(map[string]interface{}); ok { - if mustQ, ok := boolQ["must"]; ok { - - if mustArr, ok := mustQ.([]interface{}); ok { - boolQ["must"] = append(mustArr, timeQuery) - - }else{ - return nil, fmt.Errorf("must query: %v is not valid in filter", mustQ) - } - }else{ - boolQ["must"] = []interface{}{ - timeQuery, - } - } +func (engine *Engine) GenerateRawFilter(rule *alerting.Rule, filterParam *alerting.FilterParam) (map[string]interface{}, error) { + query := map[string]interface{}{} + var err error + if rule.Resource.RawFilter != nil { + query = util.DeepCopy(rule.Resource.RawFilter).(map[string]interface{}) }else{ - must := []interface{}{ - timeQuery, - } - if len(query) > 0 { - if _, ok = query["match_all"]; !ok { - must = append(must, query) + if !rule.Resource.Filter.IsEmpty(){ + query, err = engine.ConvertFilterQueryToDsl(&rule.Resource.Filter) + if err != nil { + return nil, err } } - query = util.MapStr{ - "bool": util.MapStr{ - "must": must, - }, - } } + //timeQuery, err := engine.generateTimeFilter(rule, filterParam) + //if err != nil { + // return nil, err + //} + // + //if boolQ, ok := query["bool"].(map[string]interface{}); ok { + // if mustQ, ok := boolQ["must"]; ok { + // + // if mustArr, ok := mustQ.([]interface{}); ok { + // boolQ["must"] = append(mustArr, timeQuery) + // + // }else{ + // return nil, fmt.Errorf("must query: %v is not valid in filter", mustQ) + // } + // }else{ + // boolQ["must"] = []interface{}{ + // timeQuery, + // } + // } + //}else{ + // must := []interface{}{ + // timeQuery, + // } + // if len(query) > 0 { + // if _, ok = query["match_all"]; !ok { + // must = append(must, query) + // } + // } + // query = util.MapStr{ + // "bool": util.MapStr{ + // "must": must, + // }, + // } + //} return query, nil } @@ -373,7 +391,10 @@ func (engine *Engine) ExecuteQuery(rule *alerting.Rule, filterParam *alerting.Fi queryResult.Query = string(queryDslBytes) searchRes, err := esClient.SearchWithRawQueryDSL(indexName, queryDslBytes) if err != nil { - return nil, err + return queryResult, err + } + if searchRes.GetTotal() == 0 { + queryResult.Nodata = true } if searchRes.StatusCode != 200 { return nil, fmt.Errorf("search error: %s", string(searchRes.RawResult.Body)) @@ -599,8 +620,11 @@ func (engine *Engine) Do(rule *alerting.Rule) error { var paramsCtx map[string]interface{} if len(conditionResults) == 0 { alertItem.Severity = "info" - alertItem.State = alerting.AlertStateOK - if alertMessage != nil && alertMessage.Status != alerting.MessageStateRecovered { + if checkResults.QueryResult.Nodata { + alertItem.State = alerting.AlertStateNodata + } + + if alertMessage != nil && alertMessage.Status != alerting.MessageStateRecovered && !checkResults.QueryResult.Nodata { alertMessage.Status = alerting.MessageStateRecovered alertMessage.Updated = time.Now() err = saveAlertMessage(alertMessage) @@ -929,6 +953,40 @@ func collectMetricData(agg interface{}, groupValues string, metricData *[]alerti if k == "key" || k == "key_as_string" || k== "doc_count"{ continue } + //has filter + if k == "filter_agg" { + if filterM, ok := v.(map[string]interface{}); ok { + for fk, fv := range filterM { + if fk == "doc_count" { + continue + } + if vm, ok := fv.(map[string]interface{}); ok { + if metricVal, ok := vm["value"]; ok { + md.Data[fk] = append(md.Data[fk], alerting.TimeMetricData{bkM["key"], metricVal}) + }else{ + //percentiles agg type + switch vm["values"].(type) { + case []interface{}: + for _, val := range vm["values"].([]interface{}) { + if valM, ok := val.(map[string]interface{}); ok { + md.Data[fk] = append(md.Data[fk], alerting.TimeMetricData{bkM["key"], valM["value"]}) + } + break + } + case map[string]interface{}: + for _, val := range vm["values"].(map[string]interface{}) { + md.Data[fk] = append(md.Data[fk], alerting.TimeMetricData{bkM["key"], val}) + break + } + } + + } + + } + } + } + continue + } if vm, ok := v.(map[string]interface{}); ok { if metricVal, ok := vm["value"]; ok { md.Data[k] = append(md.Data[k], alerting.TimeMetricData{bkM["key"], metricVal}) From 3f54d52588a2ff3d027355ce966ee9d9ed3e54e6 Mon Sep 17 00:00:00 2001 From: liugq Date: Mon, 30 May 2022 12:47:31 +0800 Subject: [PATCH 34/36] add condition expression in rule searcht api --- plugin/api/alerting/rule.go | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/plugin/api/alerting/rule.go b/plugin/api/alerting/rule.go index 6cc054be..4b5adad8 100644 --- a/plugin/api/alerting/rule.go +++ b/plugin/api/alerting/rule.go @@ -422,8 +422,26 @@ func (alertAPI *AlertAPI) searchRule(w http.ResponseWriter, req *http.Request, p log.Error(err) return } + searchRes := elastic.SearchResponse{} + err = util.FromJSONBytes(searchResult.Raw, &searchRes) + if err != nil { + alertAPI.WriteError(w, err.Error(), http.StatusInternalServerError) + log.Error(err) + return + } + for _, hit := range searchRes.Hits.Hits { + hitRule := alerting.Rule{} + hitBytes, _ := util.ToJSONBytes(hit.Source) + util.FromJSONBytes(hitBytes, &hitRule) + metricExpression, _ := hitRule.Metrics.GenerateExpression() + for i, cond := range hitRule.Conditions.Items { + expression, _ := cond.GenerateConditionExpression() + hitRule.Conditions.Items[i].Expression = strings.ReplaceAll(expression, "result", metricExpression) + } + hit.Source["conditions"] = hitRule.Conditions + } - w.Write(searchResult.Raw) + alertAPI.WriteJSON(w, searchRes, http.StatusOK) } func (alertAPI *AlertAPI) getRuleAlertMessageNumbers(ruleIDs []string) ( map[string]interface{},error) { From eaaf44bd7ad2a106132fce30133a61e5f2d2f6e3 Mon Sep 17 00:00:00 2001 From: liugq Date: Mon, 30 May 2022 18:16:06 +0800 Subject: [PATCH 35/36] add condition expression to alert info --- service/alerting/elasticsearch/engine.go | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/service/alerting/elasticsearch/engine.go b/service/alerting/elasticsearch/engine.go index 7e5eeb2b..1ca25cb2 100644 --- a/service/alerting/elasticsearch/engine.go +++ b/service/alerting/elasticsearch/engine.go @@ -594,6 +594,12 @@ func (engine *Engine) Do(rule *alerting.Rule) error { }() log.Tracef("start check condition of rule %s", rule.ID) + //todo do only once when rule not change + metricExpression, _ := rule.Metrics.GenerateExpression() + for i, cond := range rule.Conditions.Items { + expression, _ := cond.GenerateConditionExpression() + rule.Conditions.Items[i].Expression = strings.ReplaceAll(expression, "result", metricExpression) + } alertItem = &alerting.Alert{ ID: util.GetUUID(), Created: time.Now(), From ef3aa89a9c80a83ec73b714d34053956d9a3d6fc Mon Sep 17 00:00:00 2001 From: liugq Date: Wed, 1 Jun 2022 12:40:41 +0800 Subject: [PATCH 36/36] return query dsl after search error --- service/alerting/elasticsearch/engine.go | 142 ++++++++++++++--------- 1 file changed, 86 insertions(+), 56 deletions(-) diff --git a/service/alerting/elasticsearch/engine.go b/service/alerting/elasticsearch/engine.go index 1ca25cb2..0d44db3d 100644 --- a/service/alerting/elasticsearch/engine.go +++ b/service/alerting/elasticsearch/engine.go @@ -56,14 +56,6 @@ func (engine *Engine) GenerateQuery(rule *alerting.Rule, filterParam *alerting.F return nil, err } } - if len(filter) > 0 { - basicAggs = util.MapStr{ - "filter_agg": util.MapStr{ - "filter": filter, - "aggs": basicAggs, - }, - } - } targetESVersion := elastic.GetMetadata(rule.Resource.ID).Config.Version intervalField, err := elastic.GetDateHistogramIntervalField(targetESVersion) if err != nil { @@ -74,11 +66,22 @@ func (engine *Engine) GenerateQuery(rule *alerting.Rule, filterParam *alerting.F periodInterval = filterParam.BucketSize } timeAggs := util.MapStr{ - "date_histogram": util.MapStr{ - "field": rule.Resource.TimeField, - intervalField: periodInterval, + "time_buckets": util.MapStr{ + "date_histogram": util.MapStr{ + "field": rule.Resource.TimeField, + intervalField: periodInterval, + }, + "aggs": basicAggs, }, - "aggs": basicAggs, + } + + if len(filter) > 0 { + timeAggs = util.MapStr{ + "filter_agg": util.MapStr{ + "filter": filter, + "aggs": timeAggs, + }, + } } var rootAggs util.MapStr groups := rule.Metrics.Items[0].Group @@ -103,9 +106,7 @@ func (engine *Engine) GenerateQuery(rule *alerting.Rule, filterParam *alerting.F groupID: lastGroupAgg, } }else{ - groupAgg["aggs"] = util.MapStr{ - "time_buckets": timeAggs, - } + groupAgg["aggs"] = timeAggs } lastGroupAgg = groupAgg } @@ -113,9 +114,7 @@ func (engine *Engine) GenerateQuery(rule *alerting.Rule, filterParam *alerting.F util.GetUUID(): lastGroupAgg, } }else{ - rootAggs = util.MapStr{ - "time_buckets": timeAggs, - } + rootAggs = timeAggs } return util.MapStr{ @@ -310,8 +309,8 @@ func (engine *Engine) generateTimeFilter(rule *alerting.Rule, filterParam *alert if err != nil { return nil, err } - timeStart = time.Now().Add(-duration).Format(time.RFC3339Nano) - timeEnd = time.Now().Format(time.RFC3339Nano) + timeStart = time.Now().Add(-duration).UnixMilli() //.Format(time.RFC3339Nano) + timeEnd = time.Now().UnixMilli() } timeQuery := util.MapStr{ @@ -397,16 +396,42 @@ func (engine *Engine) ExecuteQuery(rule *alerting.Rule, filterParam *alerting.Fi queryResult.Nodata = true } if searchRes.StatusCode != 200 { - return nil, fmt.Errorf("search error: %s", string(searchRes.RawResult.Body)) + return queryResult, fmt.Errorf("search error: %s", string(searchRes.RawResult.Body)) } queryResult.Raw = string(searchRes.RawResult.Body) searchResult := map[string]interface{}{} err = util.FromJSONBytes(searchRes.RawResult.Body, &searchResult) if err != nil { - return nil, err + return queryResult, err } metricData := []alerting.MetricData{} collectMetricData(searchResult["aggregations"], "", &metricData) + //将 rate 求导数据 除以 bucket size (单位 /s) + //statisticM := map[string] string{} + //for _, mi := range rule.Metrics.Items { + // statisticM[mi.Name] = mi.Statistic + //} + //var periodInterval = rule.Metrics.PeriodInterval + //if filterParam != nil && filterParam.BucketSize != "" { + // periodInterval = filterParam.BucketSize + //} + //interval, err := time.ParseDuration(periodInterval) + //if err != nil { + // log.Error(err) + //} + //for i, _ := range metricData { + // for k, d := range metricData[i].Data { + // if statisticM[k] == "rate" { + // for _, td := range d { + // if len(td) > 1 { + // if v, ok := td[1].(float64); ok { + // td[1] = v / interval.Seconds() + // } + // } + // } + // } + // } + //} queryResult.MetricData = metricData return queryResult, nil } @@ -960,39 +985,39 @@ func collectMetricData(agg interface{}, groupValues string, metricData *[]alerti continue } //has filter - if k == "filter_agg" { - if filterM, ok := v.(map[string]interface{}); ok { - for fk, fv := range filterM { - if fk == "doc_count" { - continue - } - if vm, ok := fv.(map[string]interface{}); ok { - if metricVal, ok := vm["value"]; ok { - md.Data[fk] = append(md.Data[fk], alerting.TimeMetricData{bkM["key"], metricVal}) - }else{ - //percentiles agg type - switch vm["values"].(type) { - case []interface{}: - for _, val := range vm["values"].([]interface{}) { - if valM, ok := val.(map[string]interface{}); ok { - md.Data[fk] = append(md.Data[fk], alerting.TimeMetricData{bkM["key"], valM["value"]}) - } - break - } - case map[string]interface{}: - for _, val := range vm["values"].(map[string]interface{}) { - md.Data[fk] = append(md.Data[fk], alerting.TimeMetricData{bkM["key"], val}) - break - } - } - - } - - } - } - } - continue - } + //if k == "filter_agg" { + // if filterM, ok := v.(map[string]interface{}); ok { + // for fk, fv := range filterM { + // if fk == "doc_count" { + // continue + // } + // if vm, ok := fv.(map[string]interface{}); ok { + // if metricVal, ok := vm["value"]; ok { + // md.Data[fk] = append(md.Data[fk], alerting.TimeMetricData{bkM["key"], metricVal}) + // }else{ + // //percentiles agg type + // switch vm["values"].(type) { + // case []interface{}: + // for _, val := range vm["values"].([]interface{}) { + // if valM, ok := val.(map[string]interface{}); ok { + // md.Data[fk] = append(md.Data[fk], alerting.TimeMetricData{bkM["key"], valM["value"]}) + // } + // break + // } + // case map[string]interface{}: + // for _, val := range vm["values"].(map[string]interface{}) { + // md.Data[fk] = append(md.Data[fk], alerting.TimeMetricData{bkM["key"], val}) + // break + // } + // } + // + // } + // + // } + // } + // } + // continue + //} if vm, ok := v.(map[string]interface{}); ok { if metricVal, ok := vm["value"]; ok { md.Data[k] = append(md.Data[k], alerting.TimeMetricData{bkM["key"], metricVal}) @@ -1038,7 +1063,12 @@ func collectMetricData(agg interface{}, groupValues string, metricData *[]alerti if groupValues != "" { newGroupValues = fmt.Sprintf("%s*%s", groupValues, currentGroup) } - collectMetricData(bk, newGroupValues, metricData) + if filterAgg, ok := bkVal["filter_agg"].(map[string]interface{}); ok { + collectMetricData(filterAgg, newGroupValues, metricData) + }else{ + collectMetricData(bk, newGroupValues, metricData) + } + } }