console/plugin/api/alerting/rule.go

581 lines
14 KiB
Go

/* Copyright © INFINI Ltd. All rights reserved.
* web: https://infinilabs.com
* mail: hello#infini.ltd */
package alerting
import (
"fmt"
log "github.com/cihub/seelog"
"infini.sh/console/model/alerting"
alerting2 "infini.sh/console/service/alerting"
_ "infini.sh/console/service/alerting/elasticsearch"
httprouter "infini.sh/framework/core/api/router"
"infini.sh/framework/core/elastic"
"infini.sh/framework/core/kv"
"infini.sh/framework/core/orm"
"infini.sh/framework/core/task"
"infini.sh/framework/core/util"
"infini.sh/framework/modules/elastic/api"
"net/http"
"time"
)
func (alertAPI *AlertAPI) createRule(w http.ResponseWriter, req *http.Request, ps httprouter.Params) {
rules := []alerting.Rule{}
err := alertAPI.DecodeJSON(req, &rules)
if err != nil {
log.Error(err)
alertAPI.WriteJSON(w, util.MapStr{
"error": err.Error(),
}, http.StatusInternalServerError)
return
}
var ids []string
for _, rule := range rules {
exists, err := checkResourceExists(&rule)
if err != nil || !exists {
log.Error(err)
alertAPI.WriteJSON(w, util.MapStr{
"error": err.Error(),
}, http.StatusInternalServerError)
return
}
rule.Metrics.Expression, err = rule.Metrics.GenerateExpression()
if err != nil {
log.Error(err)
alertAPI.WriteJSON(w, util.MapStr{
"error": err.Error(),
}, http.StatusInternalServerError)
return
}
rule.ID = util.GetUUID()
ids = append(ids, rule.ID)
rule.Created = time.Now()
rule.Updated = time.Now()
rule.Metrics.MaxPeriods = 15
if rule.Schedule.Interval == ""{
rule.Schedule.Interval = "1m"
}
err = orm.Save(rule)
if err != nil {
log.Error(err)
alertAPI.WriteJSON(w, util.MapStr{
"error": err.Error(),
}, http.StatusInternalServerError)
return
}
eng := alerting2.GetEngine(rule.Resource.Type)
if rule.Enabled {
ruleTask := task.ScheduleTask{
ID: rule.ID,
Interval: rule.Schedule.Interval,
Description: rule.Metrics.Expression,
Task: eng.GenerateTask(&rule),
}
task.RegisterScheduleTask(ruleTask)
task.StartTask(ruleTask.ID)
}
}
alertAPI.WriteJSON(w, util.MapStr{
"result": "created",
"ids": ids,
}, http.StatusOK)
}
func (alertAPI *AlertAPI) getRule(w http.ResponseWriter, req *http.Request, ps httprouter.Params) {
id := ps.MustGetParameter("rule_id")
obj := alerting.Rule{}
obj.ID = id
exists, err := orm.Get(&obj)
if !exists || err != nil {
log.Error(err)
alertAPI.WriteJSON(w, util.MapStr{
"_id": id,
"found": false,
}, http.StatusNotFound)
return
}
if err != nil {
log.Error(err)
alertAPI.WriteError(w, err.Error(), http.StatusInternalServerError)
log.Error(err)
return
}
alertAPI.WriteJSON(w, util.MapStr{
"found": true,
"_id": id,
"_source": obj,
}, 200)
}
func (alertAPI *AlertAPI) updateRule(w http.ResponseWriter, req *http.Request, ps httprouter.Params) {
id := ps.MustGetParameter("rule_id")
obj := &alerting.Rule{}
obj.ID = id
exists, err := orm.Get(obj)
if !exists || err != nil {
log.Error(err)
alertAPI.WriteJSON(w, util.MapStr{
"_id": id,
"result": "not_found",
}, http.StatusNotFound)
return
}
id = obj.ID
create := obj.Created
obj = &alerting.Rule{}
err = alertAPI.DecodeJSON(req, obj)
if err != nil {
alertAPI.WriteError(w, err.Error(), http.StatusInternalServerError)
log.Error(err)
return
}
//protect
obj.ID = id
obj.Created = create
obj.Updated = time.Now()
obj.Metrics.Expression, err = obj.Metrics.GenerateExpression()
if err != nil {
alertAPI.WriteError(w, err.Error(), http.StatusInternalServerError)
log.Error(err)
return
}
err = orm.Update(obj)
if err != nil {
alertAPI.WriteError(w, err.Error(), http.StatusInternalServerError)
log.Error(err)
return
}
if obj.Enabled {
exists, err = checkResourceExists(obj)
if err != nil || !exists {
log.Error(err)
alertAPI.WriteJSON(w, util.MapStr{
"error": err.Error(),
}, http.StatusInternalServerError)
return
}
//update task
task.StopTask(id)
eng := alerting2.GetEngine(obj.Resource.Type)
ruleTask := task.ScheduleTask{
ID: obj.ID,
Interval: obj.Schedule.Interval,
Description: obj.Metrics.Expression,
Task: eng.GenerateTask(obj),
}
task.RegisterScheduleTask(ruleTask)
task.StartTask(ruleTask.ID)
}else{
task.DeleteTask(id)
}
clearKV(obj.ID)
alertAPI.WriteJSON(w, util.MapStr{
"_id": obj.ID,
"result": "updated",
}, 200)
}
func clearKV(ruleID string){
_ = kv.DeleteKey(alerting2.KVLastNotificationTime, []byte(ruleID))
_ = kv.DeleteKey(alerting2.KVLastEscalationTime, []byte(ruleID))
}
func (alertAPI *AlertAPI) deleteRule(w http.ResponseWriter, req *http.Request, ps httprouter.Params) {
id := ps.MustGetParameter("rule_id")
obj := alerting.Rule{}
obj.ID = id
exists, err := orm.Get(&obj)
if !exists || err != nil {
log.Error(err)
alertAPI.WriteJSON(w, util.MapStr{
"_id": id,
"result": "not_found",
}, http.StatusNotFound)
return
}
err = orm.Delete(&obj)
if err != nil {
alertAPI.WriteError(w, err.Error(), http.StatusInternalServerError)
log.Error(err)
return
}
task.DeleteTask(obj.ID)
clearKV(obj.ID)
alertAPI.WriteJSON(w, util.MapStr{
"_id": obj.ID,
"result": "deleted",
}, 200)
}
func (alertAPI *AlertAPI) searchRule(w http.ResponseWriter, req *http.Request, ps httprouter.Params) {
var (
keyword = alertAPI.GetParameterOrDefault(req, "keyword", "")
from = alertAPI.GetIntOrDefault(req, "from", 0)
size = alertAPI.GetIntOrDefault(req, "size", 20)
)
mustQuery := []util.MapStr{
}
if keyword != "" {
mustQuery = append(mustQuery, util.MapStr{
"match": util.MapStr{
"search_text": util.MapStr{
"query": keyword,
"fuzziness": "AUTO",
"max_expansions": 10,
"prefix_length": 2,
"fuzzy_transpositions": true,
"boost": 50,
},
},
})
}
queryDSL := util.MapStr{
"from": from,
"size": size,
"query": util.MapStr{
"bool": util.MapStr{
"must": mustQuery,
},
},
}
q := &orm.Query{
RawQuery: util.MustToJSONBytes(queryDSL),
}
err, searchResult := orm.Search(alerting.Rule{}, q)
if err != nil {
alertAPI.WriteError(w, err.Error(), http.StatusInternalServerError)
log.Error(err)
return
}
w.Write(searchResult.Raw)
}
func (alertAPI *AlertAPI) getRuleAlertNumbers(ruleIDs []string) ( map[string]interface{},error) {
esClient := elastic.GetClient(alertAPI.Config.Elasticsearch)
queryDsl := util.MapStr{
"size": 0,
"query": util.MapStr{
"bool": util.MapStr{
"must": []util.MapStr{
{
"terms": util.MapStr{
"rule_id": ruleIDs,
},
},
{
"terms": util.MapStr{
"state": []string{alerting.AlertStateError, alerting.AlertStateActive},
},
},
},
},
},
"aggs": util.MapStr{
"terms_rule_id": util.MapStr{
"terms": util.MapStr{
"field": "rule_id",
},
},
},
}
searchRes, err := esClient.SearchWithRawQueryDSL(orm.GetWildcardIndexName(alerting.Alert{}), util.MustToJSONBytes(queryDsl) )
if err != nil {
return nil, err
}
ruleAlertNumbers := map[string]interface{}{}
if termRules, ok := searchRes.Aggregations["terms_rule_id"]; ok {
for _, bk := range termRules.Buckets {
key := bk["key"].(string)
ruleAlertNumbers[key] = bk["doc_count"]
}
}
return ruleAlertNumbers, nil
}
func (alertAPI *AlertAPI) fetchAlertInfos(w http.ResponseWriter, req *http.Request, ps httprouter.Params) {
var ruleIDs = []string{}
alertAPI.DecodeJSON(req, &ruleIDs)
if len(ruleIDs) == 0 {
alertAPI.WriteJSON(w, util.MapStr{}, http.StatusOK)
return
}
esClient := elastic.GetClient(alertAPI.Config.Elasticsearch)
queryDsl := util.MapStr{
"_source": []string{"state", "rule_id"},
"sort": []util.MapStr{
{
"created": util.MapStr{
"order": "desc",
},
},
},
"collapse": util.MapStr{
"field": "rule_id",
},
"query": util.MapStr{
"terms": util.MapStr{
"rule_id": ruleIDs,
},
},
}
searchRes, err := esClient.SearchWithRawQueryDSL(orm.GetWildcardIndexName(alerting.Alert{}), util.MustToJSONBytes(queryDsl) )
if err != nil {
log.Error(err)
alertAPI.WriteJSON(w, util.MapStr{
"error": err.Error(),
}, http.StatusInternalServerError)
return
}
if len(searchRes.Hits.Hits) == 0 {
alertAPI.WriteJSON(w, util.MapStr{}, http.StatusOK)
return
}
alertNumbers, err := alertAPI.getRuleAlertNumbers(ruleIDs)
if err != nil {
log.Error(err)
alertAPI.WriteJSON(w, util.MapStr{
"error": err.Error(),
}, http.StatusInternalServerError)
return
}
latestAlertInfos := map[string]util.MapStr{}
for _, hit := range searchRes.Hits.Hits {
if ruleID, ok := hit.Source["rule_id"].(string); ok {
latestAlertInfos[ruleID] = util.MapStr{
"status": hit.Source["state"],
"alert_count": alertNumbers[ruleID],
}
}
}
alertAPI.WriteJSON(w, latestAlertInfos, http.StatusOK)
}
func (alertAPI *AlertAPI) enableRule(w http.ResponseWriter, req *http.Request, ps httprouter.Params) {
reqObj := alerting.Rule{}
err := alertAPI.DecodeJSON(req, &reqObj)
if err != nil {
log.Error(err)
alertAPI.WriteError(w, fmt.Sprintf("request format error:%v", err), http.StatusInternalServerError)
return
}
id := ps.MustGetParameter("rule_id")
obj := alerting.Rule{}
obj.ID = id
exists, err := orm.Get(&obj)
if !exists || err != nil {
log.Error(err)
alertAPI.WriteJSON(w, util.MapStr{
"_id": id,
"result": "not_found",
}, http.StatusNotFound)
return
}
if reqObj.Enabled {
eng := alerting2.GetEngine(obj.Resource.Type)
ruleTask := task.ScheduleTask{
ID: obj.ID,
Interval: obj.Schedule.Interval,
Description: obj.Metrics.Expression,
Task: eng.GenerateTask(&obj),
}
task.RegisterScheduleTask(ruleTask)
task.StartTask(ruleTask.ID)
}else{
task.DeleteTask(id)
}
obj.Enabled = reqObj.Enabled
err = orm.Save(obj)
if err != nil {
log.Error(err)
alertAPI.WriteError(w, fmt.Sprintf("save rule error:%v", err), http.StatusInternalServerError)
return
}
alertAPI.WriteJSON(w, util.MapStr{
"result": "updated",
"_id": id,
}, http.StatusOK)
}
func (alertAPI *AlertAPI) sendTestMessage(w http.ResponseWriter, req *http.Request, ps httprouter.Params) {
rule := alerting.Rule{}
err := alertAPI.DecodeJSON(req, &rule)
if err != nil {
log.Error(err)
alertAPI.WriteJSON(w, util.MapStr{
"error": err.Error(),
}, http.StatusInternalServerError)
return
}
if rule.ID == "" {
rule.ID = util.GetUUID()
}
eng := alerting2.GetEngine(rule.Resource.Type)
actionResults, err := eng.Test(&rule)
if err != nil {
log.Error(err)
alertAPI.WriteJSON(w, util.MapStr{
"error": err.Error(),
}, http.StatusInternalServerError)
return
}
alertAPI.WriteJSON(w, util.MapStr{
"action_results": actionResults,
}, http.StatusOK)
}
func checkResourceExists(rule *alerting.Rule) (bool, error) {
if rule.Resource.ID == "" {
return false, fmt.Errorf("resource id can not be empty")
}
switch rule.Resource.Type {
case "elasticsearch":
obj := elastic.ElasticsearchConfig{}
obj.ID = rule.Resource.ID
ok, err := orm.Get(&obj)
if err != nil {
return false, err
}
if rule.Resource.Name == "" {
rule.Resource.Name = obj.Name
}
return ok && obj.Name != "", nil
default:
return false, fmt.Errorf("unsupport resource type: %s", rule.Resource.Type)
}
}
func (alertAPI *AlertAPI) getTemplateParams(w http.ResponseWriter, req *http.Request, ps httprouter.Params) {
alertAPI.WriteJSON(w, util.MapStr{
"template_params": alerting2.GetTemplateParameters(),
}, http.StatusOK)
}
func (alertAPI *AlertAPI) getMetricData(w http.ResponseWriter, req *http.Request, ps httprouter.Params) {
rule := &alerting.Rule{
ID: ps.ByName("rule_id"),
}
exists, err := orm.Get(rule)
if !exists || err != nil {
alertAPI.WriteJSON(w, util.MapStr{
"_id": rule.ID,
"found": false,
}, http.StatusNotFound)
return
}
var (
minStr = alertAPI.Get(req, "min", "")
maxStr = alertAPI.Get(req, "max", "")
)
bucketSize, min, max, err := api.GetMetricRangeAndBucketSize(minStr, maxStr, 60, 15)
filterParam := &alerting.FilterParam{
Start: min,
End: max,
BucketSize: fmt.Sprintf("%ds", bucketSize),
}
eng := alerting2.GetEngine(rule.Resource.Type)
metricData, err := eng.GetTargetMetricData(rule, true, filterParam)
if err != nil {
log.Error(err)
alertAPI.WriteJSON(w, util.MapStr{
"error": err.Error(),
}, http.StatusInternalServerError)
return
}
var filteredMetricData []alerting.MetricData
for _, md := range metricData {
if len(md.Data) == 0 {
continue
}
filteredMetricData = append(filteredMetricData, md)
}
alertAPI.WriteJSON(w, util.MapStr{
"metrics": filteredMetricData,
}, http.StatusOK)
}
//func (alertAPI *AlertAPI) testRule(w http.ResponseWriter, req *http.Request, ps httprouter.Params) {
// rule := alerting.Rule{
// ID: util.GetUUID(),
// Created: time.Now(),
// Updated: time.Now(),
// Enabled: true,
// Resource: alerting.Resource{
// ID: "c8i18llath2blrusdjng",
// Type: "elasticsearch",
// Objects: []string{".infini_metrics*"},
// TimeField: "timestamp",
// RawFilter: map[string]interface{}{
// "bool": util.MapStr{
// "must": []util.MapStr{},
// },
// },
// },
//
// Metrics: alerting.Metric{
// PeriodInterval: "1m",
// MaxPeriods: 15,
// Items: []alerting.MetricItem{
// {Name: "a", Field: "payload.elasticsearch.node_stats.os.cpu.percent", Statistic: "p99", Group: []string{"metadata.labels.cluster_id", "metadata.labels.node_id"}},
// },
// },
// Conditions: alerting.Condition{
// Operator: "any",
// Items: []alerting.ConditionItem{
// {MinimumPeriodMatch: 5, Operator: "gte", Values: []string{"90"}, Severity: "error", Message: "cpu使用率大于90%"},
// },
// },
//
// Channels: alerting.RuleChannel{
// Normal: []alerting.Channel{
// {Name: "钉钉", Type: alerting.ChannelWebhook, Webhook: &alerting.CustomWebhook{
// HeaderParams: map[string]string{
// "Content-Type": "application/json",
// },
// Body: `{"msgtype": "text","text": {"content":"告警通知: {{ctx.message}}"}}`,
// Method: http.MethodPost,
// URL: "https://oapi.dingtalk.com/robot/send?access_token=XXXXXX",
// }},
// },
// ThrottlePeriod: "1h",
// AcceptTimeRange: alerting.TimeRange{
// Start: "8:00",
// End: "21:00",
// },
// EscalationEnabled: true,
// EscalationThrottlePeriod: "30m",
// },
// }
// eng := alerting2.GetEngine(rule.Resource.Type)
// data, err := eng.ExecuteQuery(&rule)
// if err != nil {
// log.Error(err)
// }
// alertAPI.WriteJSON(w, data, http.StatusOK)
//}