update alert api
This commit is contained in:
parent
6c75b5aeba
commit
162cf120c9
9
main.go
9
main.go
|
@ -8,6 +8,7 @@ import (
|
||||||
"infini.sh/console/model/alerting"
|
"infini.sh/console/model/alerting"
|
||||||
"infini.sh/console/model/gateway"
|
"infini.sh/console/model/gateway"
|
||||||
_ "infini.sh/console/plugin"
|
_ "infini.sh/console/plugin"
|
||||||
|
alerting2 "infini.sh/console/service/alerting"
|
||||||
"infini.sh/framework"
|
"infini.sh/framework"
|
||||||
"infini.sh/framework/core/elastic"
|
"infini.sh/framework/core/elastic"
|
||||||
"infini.sh/framework/core/env"
|
"infini.sh/framework/core/env"
|
||||||
|
@ -27,6 +28,7 @@ import (
|
||||||
_ "infini.sh/framework/plugins"
|
_ "infini.sh/framework/plugins"
|
||||||
api2 "infini.sh/gateway/api"
|
api2 "infini.sh/gateway/api"
|
||||||
_ "infini.sh/gateway/proxy"
|
_ "infini.sh/gateway/proxy"
|
||||||
|
log "src/github.com/cihub/seelog"
|
||||||
)
|
)
|
||||||
|
|
||||||
var appConfig *config.AppConfig
|
var appConfig *config.AppConfig
|
||||||
|
@ -130,7 +132,12 @@ func main() {
|
||||||
|
|
||||||
api.RegisterSchema()
|
api.RegisterSchema()
|
||||||
|
|
||||||
|
go func() {
|
||||||
|
err := alerting2.InitTasks()
|
||||||
|
if err != nil {
|
||||||
|
log.Errorf("init alerting task error: %v", err)
|
||||||
|
}
|
||||||
|
}()
|
||||||
}, nil) {
|
}, nil) {
|
||||||
app.Run()
|
app.Run()
|
||||||
}
|
}
|
||||||
|
|
|
@ -26,11 +26,12 @@ type Alert struct {
|
||||||
Error string `json:"error,omitempty"`
|
Error string `json:"error,omitempty"`
|
||||||
IsNotified bool `json:"is_notified" elastic_mapping:"is_notified: { type: boolean }"` //标识本次检测是否发送了告警通知
|
IsNotified bool `json:"is_notified" elastic_mapping:"is_notified: { type: boolean }"` //标识本次检测是否发送了告警通知
|
||||||
IsEscalated bool `json:"is_escalated" elastic_mapping:"is_escalated: { type: boolean }"` //标识本次检测是否发送了升级告警通知
|
IsEscalated bool `json:"is_escalated" elastic_mapping:"is_escalated: { type: boolean }"` //标识本次检测是否发送了升级告警通知
|
||||||
|
Conditions Condition `json:"condition"`
|
||||||
|
ConditionResult *ConditionResult `json:"condition_result,omitempty" elastic_mapping:"condition_result: { type: object,enabled:false }"`
|
||||||
SearchText string `json:"-" elastic_mapping:"search_text:{type:text,index_prefixes:{},index_phrases:true, analyzer:suggest_text_search }"`
|
SearchText string `json:"-" elastic_mapping:"search_text:{type:text,index_prefixes:{},index_phrases:true, analyzer:suggest_text_search }"`
|
||||||
}
|
}
|
||||||
|
|
||||||
type ActionExecutionResult struct {
|
type ActionExecutionResult struct {
|
||||||
//ActionId string `json:"action_id"`
|
|
||||||
LastExecutionTime int `json:"last_execution_time"`
|
LastExecutionTime int `json:"last_execution_time"`
|
||||||
Error string `json:"error"`
|
Error string `json:"error"`
|
||||||
Result string `json:"result"`
|
Result string `json:"result"`
|
||||||
|
|
|
@ -19,6 +19,10 @@ type ConditionItem struct {
|
||||||
}
|
}
|
||||||
|
|
||||||
type ConditionResult struct {
|
type ConditionResult struct {
|
||||||
|
ResultItems []ConditionResultItem `json:"result_items"`
|
||||||
|
QueryResult *QueryResult `json:"query_result"`
|
||||||
|
}
|
||||||
|
type ConditionResultItem struct {
|
||||||
GroupValues []string `json:"group_values"`
|
GroupValues []string `json:"group_values"`
|
||||||
ConditionItem *ConditionItem `json:"condition_item"`
|
ConditionItem *ConditionItem `json:"condition_item"`
|
||||||
}
|
}
|
||||||
|
|
|
@ -48,6 +48,12 @@ type MetricItem struct {
|
||||||
Group []string `json:"group"` //bucket group
|
Group []string `json:"group"` //bucket group
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type QueryResult struct {
|
||||||
|
Query string `json:"query"`
|
||||||
|
Raw string `json:"raw"`
|
||||||
|
MetricData []MetricData `json:"metric_data"`
|
||||||
|
}
|
||||||
|
|
||||||
type MetricData struct {
|
type MetricData struct {
|
||||||
GroupValues []string `json:"group_values"`
|
GroupValues []string `json:"group_values"`
|
||||||
Data map[string][]TimeMetricData `json:"data"`
|
Data map[string][]TimeMetricData `json:"data"`
|
||||||
|
|
|
@ -27,10 +27,10 @@ type Rule struct {
|
||||||
|
|
||||||
type RuleChannel struct {
|
type RuleChannel struct {
|
||||||
Normal []Channel `json:"normal"`
|
Normal []Channel `json:"normal"`
|
||||||
Escalation []Channel `json:"escalation"`
|
Escalation []Channel `json:"escalation,omitempty"`
|
||||||
ThrottlePeriod string `json:"throttle_period"` //沉默周期
|
ThrottlePeriod string `json:"throttle_period"` //沉默周期
|
||||||
AcceptTimeRange TimeRange `json:"accept_time_range"`
|
AcceptTimeRange TimeRange `json:"accept_time_range"`
|
||||||
EscalationThrottlePeriod string `json:"escalation_throttle_period"`
|
EscalationThrottlePeriod string `json:"escalation_throttle_period,omitempty"`
|
||||||
EscalationEnabled bool `json:"escalation_enabled"`
|
EscalationEnabled bool `json:"escalation_enabled"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -31,5 +31,6 @@ func (alert *AlertAPI) Init() {
|
||||||
|
|
||||||
//just for test
|
//just for test
|
||||||
//api.HandleAPIMethod(api.GET, "/alerting/rule/test", alert.testRule)
|
//api.HandleAPIMethod(api.GET, "/alerting/rule/test", alert.testRule)
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -109,10 +109,10 @@ func (alertAPI *AlertAPI) getRule(w http.ResponseWriter, req *http.Request, ps h
|
||||||
|
|
||||||
func (alertAPI *AlertAPI) updateRule(w http.ResponseWriter, req *http.Request, ps httprouter.Params) {
|
func (alertAPI *AlertAPI) updateRule(w http.ResponseWriter, req *http.Request, ps httprouter.Params) {
|
||||||
id := ps.MustGetParameter("rule_id")
|
id := ps.MustGetParameter("rule_id")
|
||||||
obj := alerting.Rule{}
|
obj := &alerting.Rule{}
|
||||||
|
|
||||||
obj.ID = id
|
obj.ID = id
|
||||||
exists, err := orm.Get(&obj)
|
exists, err := orm.Get(obj)
|
||||||
if !exists || err != nil {
|
if !exists || err != nil {
|
||||||
alertAPI.WriteJSON(w, util.MapStr{
|
alertAPI.WriteJSON(w, util.MapStr{
|
||||||
"_id": id,
|
"_id": id,
|
||||||
|
@ -123,8 +123,8 @@ func (alertAPI *AlertAPI) updateRule(w http.ResponseWriter, req *http.Request, p
|
||||||
|
|
||||||
id = obj.ID
|
id = obj.ID
|
||||||
create := obj.Created
|
create := obj.Created
|
||||||
obj = alerting.Rule{}
|
obj = &alerting.Rule{}
|
||||||
err = alertAPI.DecodeJSON(req, &obj)
|
err = alertAPI.DecodeJSON(req, obj)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
alertAPI.WriteError(w, err.Error(), http.StatusInternalServerError)
|
alertAPI.WriteError(w, err.Error(), http.StatusInternalServerError)
|
||||||
log.Error(err)
|
log.Error(err)
|
||||||
|
@ -135,7 +135,13 @@ func (alertAPI *AlertAPI) updateRule(w http.ResponseWriter, req *http.Request, p
|
||||||
obj.ID = id
|
obj.ID = id
|
||||||
obj.Created = create
|
obj.Created = create
|
||||||
obj.Updated = time.Now()
|
obj.Updated = time.Now()
|
||||||
err = orm.Update(&obj)
|
err = obj.Metrics.RefreshExpression()
|
||||||
|
if err != nil {
|
||||||
|
alertAPI.WriteError(w, err.Error(), http.StatusInternalServerError)
|
||||||
|
log.Error(err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
err = orm.Update(obj)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
alertAPI.WriteError(w, err.Error(), http.StatusInternalServerError)
|
alertAPI.WriteError(w, err.Error(), http.StatusInternalServerError)
|
||||||
log.Error(err)
|
log.Error(err)
|
||||||
|
@ -143,6 +149,13 @@ func (alertAPI *AlertAPI) updateRule(w http.ResponseWriter, req *http.Request, p
|
||||||
}
|
}
|
||||||
|
|
||||||
if obj.Enabled {
|
if obj.Enabled {
|
||||||
|
exists, err = checkResourceExists(obj)
|
||||||
|
if err != nil || !exists {
|
||||||
|
alertAPI.WriteJSON(w, util.MapStr{
|
||||||
|
"error": err.Error(),
|
||||||
|
}, http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
//update task
|
//update task
|
||||||
task.StopTask(id)
|
task.StopTask(id)
|
||||||
eng := alerting2.GetEngine(obj.Resource.Type)
|
eng := alerting2.GetEngine(obj.Resource.Type)
|
||||||
|
@ -150,7 +163,7 @@ func (alertAPI *AlertAPI) updateRule(w http.ResponseWriter, req *http.Request, p
|
||||||
ID: obj.ID,
|
ID: obj.ID,
|
||||||
Interval: obj.Schedule.Interval,
|
Interval: obj.Schedule.Interval,
|
||||||
Description: obj.Metrics.Expression,
|
Description: obj.Metrics.Expression,
|
||||||
Task: eng.GenerateTask(&obj),
|
Task: eng.GenerateTask(obj),
|
||||||
}
|
}
|
||||||
task.RegisterScheduleTask(ruleTask)
|
task.RegisterScheduleTask(ruleTask)
|
||||||
task.StartTask(ruleTask.ID)
|
task.StartTask(ruleTask.ID)
|
||||||
|
|
|
@ -18,6 +18,8 @@ import (
|
||||||
"infini.sh/framework/core/orm"
|
"infini.sh/framework/core/orm"
|
||||||
"infini.sh/framework/core/util"
|
"infini.sh/framework/core/util"
|
||||||
"io"
|
"io"
|
||||||
|
"math"
|
||||||
|
"runtime/debug"
|
||||||
"sort"
|
"sort"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
|
@ -44,7 +46,10 @@ func (engine *Engine) GenerateQuery(rule *alerting.Rule) (interface{}, error) {
|
||||||
}
|
}
|
||||||
basicAggs := util.MapStr{}
|
basicAggs := util.MapStr{}
|
||||||
for _, metricItem := range rule.Metrics.Items {
|
for _, metricItem := range rule.Metrics.Items {
|
||||||
basicAggs[metricItem.Name] = engine.generateAgg(&metricItem)
|
metricAggs := engine.generateAgg(&metricItem)
|
||||||
|
if err = util.MergeFields(basicAggs, metricAggs, true); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
}
|
}
|
||||||
timeAggs := util.MapStr{
|
timeAggs := util.MapStr{
|
||||||
"date_histogram": util.MapStr{
|
"date_histogram": util.MapStr{
|
||||||
|
@ -95,7 +100,7 @@ func (engine *Engine) GenerateQuery(rule *alerting.Rule) (interface{}, error) {
|
||||||
}, nil
|
}, nil
|
||||||
}
|
}
|
||||||
//generateAgg convert statistic of metric item to elasticsearch aggregation
|
//generateAgg convert statistic of metric item to elasticsearch aggregation
|
||||||
func (engine *Engine) generateAgg(metricItem *alerting.MetricItem) interface{}{
|
func (engine *Engine) generateAgg(metricItem *alerting.MetricItem) map[string]interface{}{
|
||||||
var (
|
var (
|
||||||
aggType = "value_count"
|
aggType = "value_count"
|
||||||
field = metricItem.Field
|
field = metricItem.Field
|
||||||
|
@ -104,11 +109,15 @@ func (engine *Engine) generateAgg(metricItem *alerting.MetricItem) interface{}{
|
||||||
field = "_id"
|
field = "_id"
|
||||||
}
|
}
|
||||||
var percent = 0.0
|
var percent = 0.0
|
||||||
|
var isPipeline = false
|
||||||
switch metricItem.Statistic {
|
switch metricItem.Statistic {
|
||||||
case "max", "min", "sum", "avg":
|
case "max", "min", "sum", "avg":
|
||||||
aggType = metricItem.Statistic
|
aggType = metricItem.Statistic
|
||||||
case "count", "value_count":
|
case "count", "value_count":
|
||||||
aggType = "value_count"
|
aggType = "value_count"
|
||||||
|
case "rate":
|
||||||
|
aggType = "max"
|
||||||
|
isPipeline = true
|
||||||
case "medium":
|
case "medium":
|
||||||
aggType = "median_absolute_deviation"
|
aggType = "median_absolute_deviation"
|
||||||
case "p99", "p95","p90","p80","p50":
|
case "p99", "p95","p90","p80","p50":
|
||||||
|
@ -122,9 +131,22 @@ func (engine *Engine) generateAgg(metricItem *alerting.MetricItem) interface{}{
|
||||||
if aggType == "percentiles" {
|
if aggType == "percentiles" {
|
||||||
aggValue["percents"] = []interface{}{percent}
|
aggValue["percents"] = []interface{}{percent}
|
||||||
}
|
}
|
||||||
return util.MapStr{
|
aggs := util.MapStr{
|
||||||
aggType: aggValue,
|
metricItem.Name: util.MapStr{
|
||||||
|
aggType: aggValue,
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
if !isPipeline{
|
||||||
|
return aggs
|
||||||
|
}
|
||||||
|
pipelineAggID := util.GetUUID()
|
||||||
|
aggs[pipelineAggID] = aggs[metricItem.Name]
|
||||||
|
aggs[metricItem.Name] = util.MapStr{
|
||||||
|
"derivative": util.MapStr{
|
||||||
|
"buckets_path": pipelineAggID,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
return aggs
|
||||||
}
|
}
|
||||||
|
|
||||||
func (engine *Engine) ConvertFilterQueryToDsl(fq *alerting.FilterQuery) (map[string]interface{}, error){
|
func (engine *Engine) ConvertFilterQueryToDsl(fq *alerting.FilterQuery) (map[string]interface{}, error){
|
||||||
|
@ -285,8 +307,10 @@ func (engine *Engine) GenerateRawFilter(rule *alerting.Rule) (map[string]interfa
|
||||||
must := []interface{}{
|
must := []interface{}{
|
||||||
timeQuery,
|
timeQuery,
|
||||||
}
|
}
|
||||||
if _, ok := query["match_all"]; !ok {
|
if len(query) > 0 {
|
||||||
must = append(must, query)
|
if _, ok = query["match_all"]; !ok {
|
||||||
|
must = append(must, query)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
query = util.MapStr{
|
query = util.MapStr{
|
||||||
"bool": util.MapStr{
|
"bool": util.MapStr{
|
||||||
|
@ -297,8 +321,9 @@ func (engine *Engine) GenerateRawFilter(rule *alerting.Rule) (map[string]interfa
|
||||||
return query, nil
|
return query, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (engine *Engine) ExecuteQuery(rule *alerting.Rule)([]alerting.MetricData, error){
|
func (engine *Engine) ExecuteQuery(rule *alerting.Rule)(*alerting.QueryResult, error){
|
||||||
esClient := elastic.GetClient(rule.Resource.ID)
|
esClient := elastic.GetClient(rule.Resource.ID)
|
||||||
|
queryResult := &alerting.QueryResult{}
|
||||||
indexName := strings.Join(rule.Resource.Objects, ",")
|
indexName := strings.Join(rule.Resource.Objects, ",")
|
||||||
queryDsl, err := engine.GenerateQuery(rule)
|
queryDsl, err := engine.GenerateQuery(rule)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -308,6 +333,7 @@ func (engine *Engine) ExecuteQuery(rule *alerting.Rule)([]alerting.MetricData, e
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
queryResult.Query = string(queryDslBytes)
|
||||||
searchRes, err := esClient.SearchWithRawQueryDSL(indexName, queryDslBytes)
|
searchRes, err := esClient.SearchWithRawQueryDSL(indexName, queryDslBytes)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
|
@ -315,6 +341,7 @@ func (engine *Engine) ExecuteQuery(rule *alerting.Rule)([]alerting.MetricData, e
|
||||||
if searchRes.StatusCode != 200 {
|
if searchRes.StatusCode != 200 {
|
||||||
return nil, fmt.Errorf("search error: %s", string(searchRes.RawResult.Body))
|
return nil, fmt.Errorf("search error: %s", string(searchRes.RawResult.Body))
|
||||||
}
|
}
|
||||||
|
queryResult.Raw = string(searchRes.RawResult.Body)
|
||||||
searchResult := map[string]interface{}{}
|
searchResult := map[string]interface{}{}
|
||||||
err = util.FromJSONBytes(searchRes.RawResult.Body, &searchResult)
|
err = util.FromJSONBytes(searchRes.RawResult.Body, &searchResult)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -322,18 +349,24 @@ func (engine *Engine) ExecuteQuery(rule *alerting.Rule)([]alerting.MetricData, e
|
||||||
}
|
}
|
||||||
metricData := []alerting.MetricData{}
|
metricData := []alerting.MetricData{}
|
||||||
collectMetricData(searchResult["aggregations"], "", &metricData)
|
collectMetricData(searchResult["aggregations"], "", &metricData)
|
||||||
return metricData, nil
|
queryResult.MetricData = metricData
|
||||||
|
return queryResult, nil
|
||||||
}
|
}
|
||||||
//CheckCondition check whether rule conditions triggered or not
|
//CheckCondition check whether rule conditions triggered or not
|
||||||
//if triggered returns an array of ConditionResult
|
//if triggered returns an array of ConditionResult
|
||||||
//sort conditions by severity desc before check , and then if condition is true, then continue check another group
|
//sort conditions by severity desc before check , and then if condition is true, then continue check another group
|
||||||
func (engine *Engine) CheckCondition(rule *alerting.Rule)([]alerting.ConditionResult, error){
|
func (engine *Engine) CheckCondition(rule *alerting.Rule)(*alerting.ConditionResult, error){
|
||||||
metricData, err := engine.ExecuteQuery(rule)
|
queryResult, err := engine.ExecuteQuery(rule)
|
||||||
if err != nil {
|
conditionResult := &alerting.ConditionResult{
|
||||||
return nil, err
|
QueryResult: queryResult,
|
||||||
}
|
}
|
||||||
var conditionResults []alerting.ConditionResult
|
if err != nil {
|
||||||
for _, md := range metricData {
|
return conditionResult, err
|
||||||
|
}
|
||||||
|
|
||||||
|
var resultItems []alerting.ConditionResultItem
|
||||||
|
var targetMetricData []alerting.MetricData
|
||||||
|
for _, md := range queryResult.MetricData {
|
||||||
var targetData alerting.MetricData
|
var targetData alerting.MetricData
|
||||||
if len(rule.Metrics.Items) == 1 {
|
if len(rule.Metrics.Items) == 1 {
|
||||||
targetData = md
|
targetData = md
|
||||||
|
@ -344,7 +377,7 @@ func (engine *Engine) CheckCondition(rule *alerting.Rule)([]alerting.ConditionRe
|
||||||
}
|
}
|
||||||
expression, err := govaluate.NewEvaluableExpression(rule.Metrics.Formula)
|
expression, err := govaluate.NewEvaluableExpression(rule.Metrics.Formula)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return conditionResult, err
|
||||||
}
|
}
|
||||||
dataLength := 0
|
dataLength := 0
|
||||||
for _, v := range md.Data {
|
for _, v := range md.Data {
|
||||||
|
@ -357,8 +390,14 @@ func (engine *Engine) CheckCondition(rule *alerting.Rule)([]alerting.ConditionRe
|
||||||
}
|
}
|
||||||
var timestamp interface{}
|
var timestamp interface{}
|
||||||
for k, v := range md.Data {
|
for k, v := range md.Data {
|
||||||
|
if len(k) == 20 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
//drop nil value bucket
|
//drop nil value bucket
|
||||||
if v[i][1] == nil {
|
if len(v[i]) < 2 {
|
||||||
|
continue DataLoop
|
||||||
|
}
|
||||||
|
if _, ok := v[i][1].(float64); !ok {
|
||||||
continue DataLoop
|
continue DataLoop
|
||||||
}
|
}
|
||||||
parameters[k] = v[i][1]
|
parameters[k] = v[i][1]
|
||||||
|
@ -366,11 +405,18 @@ func (engine *Engine) CheckCondition(rule *alerting.Rule)([]alerting.ConditionRe
|
||||||
}
|
}
|
||||||
result, err := expression.Evaluate(parameters)
|
result, err := expression.Evaluate(parameters)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return conditionResult, err
|
||||||
}
|
}
|
||||||
|
if r, ok := result.(float64); ok {
|
||||||
|
if math.IsNaN(r){
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
targetData.Data["result"] = append(targetData.Data["result"], []interface{}{timestamp, result})
|
targetData.Data["result"] = append(targetData.Data["result"], []interface{}{timestamp, result})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
targetMetricData = append(targetMetricData, targetData)
|
||||||
sort.Slice(rule.Conditions.Items, func(i, j int) bool {
|
sort.Slice(rule.Conditions.Items, func(i, j int) bool {
|
||||||
return alerting.SeverityWeights[rule.Conditions.Items[i].Severity] > alerting.SeverityWeights[rule.Conditions.Items[j].Severity]
|
return alerting.SeverityWeights[rule.Conditions.Items[i].Severity] > alerting.SeverityWeights[rule.Conditions.Items[j].Severity]
|
||||||
})
|
})
|
||||||
|
@ -379,7 +425,7 @@ func (engine *Engine) CheckCondition(rule *alerting.Rule)([]alerting.ConditionRe
|
||||||
conditionExpression := ""
|
conditionExpression := ""
|
||||||
valueLength := len(cond.Values)
|
valueLength := len(cond.Values)
|
||||||
if valueLength == 0 {
|
if valueLength == 0 {
|
||||||
return nil, fmt.Errorf("condition values: %v should not be empty", cond.Values)
|
return conditionResult, fmt.Errorf("condition values: %v should not be empty", cond.Values)
|
||||||
}
|
}
|
||||||
switch cond.Operator {
|
switch cond.Operator {
|
||||||
case "equals":
|
case "equals":
|
||||||
|
@ -394,15 +440,15 @@ func (engine *Engine) CheckCondition(rule *alerting.Rule)([]alerting.ConditionRe
|
||||||
conditionExpression = fmt.Sprintf("result < %v", cond.Values[0])
|
conditionExpression = fmt.Sprintf("result < %v", cond.Values[0])
|
||||||
case "range":
|
case "range":
|
||||||
if valueLength != 2 {
|
if valueLength != 2 {
|
||||||
return nil, fmt.Errorf("length of %s condition values should be 2", cond.Operator)
|
return conditionResult, fmt.Errorf("length of %s condition values should be 2", cond.Operator)
|
||||||
}
|
}
|
||||||
conditionExpression = fmt.Sprintf("result >= %v && result <= %v", cond.Values[0], cond.Values[1])
|
conditionExpression = fmt.Sprintf("result >= %v && result <= %v", cond.Values[0], cond.Values[1])
|
||||||
default:
|
default:
|
||||||
return nil, fmt.Errorf("unsupport condition operator: %s", cond.Operator)
|
return conditionResult, fmt.Errorf("unsupport condition operator: %s", cond.Operator)
|
||||||
}
|
}
|
||||||
expression, err := govaluate.NewEvaluableExpression(conditionExpression)
|
expression, err := govaluate.NewEvaluableExpression(conditionExpression)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return conditionResult, err
|
||||||
}
|
}
|
||||||
dataLength := 0
|
dataLength := 0
|
||||||
dataKey := ""
|
dataKey := ""
|
||||||
|
@ -412,20 +458,20 @@ func (engine *Engine) CheckCondition(rule *alerting.Rule)([]alerting.ConditionRe
|
||||||
}
|
}
|
||||||
triggerCount := 0
|
triggerCount := 0
|
||||||
for i := 0; i < dataLength; i++ {
|
for i := 0; i < dataLength; i++ {
|
||||||
conditionResult, err := expression.Evaluate(map[string]interface{}{
|
evaluateResult, err := expression.Evaluate(map[string]interface{}{
|
||||||
"result": targetData.Data[dataKey][i][1],
|
"result": targetData.Data[dataKey][i][1],
|
||||||
})
|
})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
if conditionResult == true {
|
if evaluateResult == true {
|
||||||
triggerCount += 1
|
triggerCount += 1
|
||||||
}else {
|
}else {
|
||||||
triggerCount = 0
|
triggerCount = 0
|
||||||
}
|
}
|
||||||
if triggerCount >= cond.MinimumPeriodMatch {
|
if triggerCount >= cond.MinimumPeriodMatch {
|
||||||
log.Debugf("triggered condition %v, groups: %v\n", cond, targetData.GroupValues)
|
log.Debugf("triggered condition %v, groups: %v\n", cond, targetData.GroupValues)
|
||||||
conditionResults = append(conditionResults, alerting.ConditionResult{
|
resultItems = append(resultItems, alerting.ConditionResultItem{
|
||||||
GroupValues: targetData.GroupValues,
|
GroupValues: targetData.GroupValues,
|
||||||
ConditionItem: &cond,
|
ConditionItem: &cond,
|
||||||
})
|
})
|
||||||
|
@ -435,7 +481,9 @@ func (engine *Engine) CheckCondition(rule *alerting.Rule)([]alerting.ConditionRe
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return conditionResults, nil
|
conditionResult.QueryResult.MetricData = targetMetricData
|
||||||
|
conditionResult.ResultItems = resultItems
|
||||||
|
return conditionResult, nil
|
||||||
}
|
}
|
||||||
func (engine *Engine) Do(rule *alerting.Rule) error {
|
func (engine *Engine) Do(rule *alerting.Rule) error {
|
||||||
|
|
||||||
|
@ -474,7 +522,18 @@ func (engine *Engine) Do(rule *alerting.Rule) error {
|
||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
log.Tracef("start check condition of rule %s", rule.ID)
|
log.Tracef("start check condition of rule %s", rule.ID)
|
||||||
conditionResults, err := engine.CheckCondition(rule)
|
checkResults, err := engine.CheckCondition(rule)
|
||||||
|
alertItem = &alerting.Alert{
|
||||||
|
ID: util.GetUUID(),
|
||||||
|
Created: time.Now(),
|
||||||
|
Updated: time.Now(),
|
||||||
|
RuleID: rule.ID,
|
||||||
|
ResourceID: rule.Resource.ID,
|
||||||
|
Expression: rule.Metrics.Expression,
|
||||||
|
Objects: rule.Resource.Objects,
|
||||||
|
ConditionResult: checkResults,
|
||||||
|
Conditions: rule.Conditions,
|
||||||
|
}
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
@ -483,21 +542,11 @@ func (engine *Engine) Do(rule *alerting.Rule) error {
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
conditionResults := checkResults.ResultItems
|
||||||
if len(conditionResults) == 0 {
|
if len(conditionResults) == 0 {
|
||||||
if lastAlertItem.State != alerting.AlertStateNormal && lastAlertItem.ID != "" {
|
alertItem.Severity = "info"
|
||||||
alertItem = &alerting.Alert{
|
alertItem.Content = ""
|
||||||
ID: util.GetUUID(),
|
alertItem.State = alerting.AlertStateNormal
|
||||||
Created: time.Now(),
|
|
||||||
Updated: time.Now(),
|
|
||||||
RuleID: rule.ID,
|
|
||||||
ResourceID: rule.Resource.ID,
|
|
||||||
Expression: rule.Metrics.Expression,
|
|
||||||
Objects: rule.Resource.Objects,
|
|
||||||
Severity: "info",
|
|
||||||
Content: "",
|
|
||||||
State: alerting.AlertStateNormal,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return nil
|
return nil
|
||||||
}else{
|
}else{
|
||||||
if lastAlertItem.State == "" || lastAlertItem.State == alerting.AlertStateNormal {
|
if lastAlertItem.State == "" || lastAlertItem.State == alerting.AlertStateNormal {
|
||||||
|
@ -511,22 +560,12 @@ func (engine *Engine) Do(rule *alerting.Rule) error {
|
||||||
for _, conditionResult := range conditionResults {
|
for _, conditionResult := range conditionResults {
|
||||||
if alerting.SeverityWeights[severity] < alerting.SeverityWeights[conditionResult.ConditionItem.Severity] {
|
if alerting.SeverityWeights[severity] < alerting.SeverityWeights[conditionResult.ConditionItem.Severity] {
|
||||||
severity = conditionResult.ConditionItem.Severity
|
severity = conditionResult.ConditionItem.Severity
|
||||||
|
content = conditionResult.ConditionItem.Message
|
||||||
}
|
}
|
||||||
content += conditionResult.ConditionItem.Message + ";"
|
|
||||||
}
|
|
||||||
alertItem = &alerting.Alert{
|
|
||||||
ID: util.GetUUID(),
|
|
||||||
Created: time.Now(),
|
|
||||||
Updated: time.Now(),
|
|
||||||
RuleID: rule.ID,
|
|
||||||
ResourceID: rule.Resource.ID,
|
|
||||||
ResourceName: rule.Resource.Name,
|
|
||||||
Expression: rule.Metrics.Expression,
|
|
||||||
Objects: rule.Resource.Objects,
|
|
||||||
Severity: severity,
|
|
||||||
Content: content,
|
|
||||||
State: alerting.AlertStateActive,
|
|
||||||
}
|
}
|
||||||
|
alertItem.Severity = severity
|
||||||
|
alertItem.Content = content
|
||||||
|
alertItem.State = alerting.AlertStateActive
|
||||||
}
|
}
|
||||||
|
|
||||||
if rule.Channels.AcceptTimeRange.Include(time.Now()) {
|
if rule.Channels.AcceptTimeRange.Include(time.Now()) {
|
||||||
|
@ -571,7 +610,7 @@ func (engine *Engine) Do(rule *alerting.Rule) error {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func performChannels(channels []alerting.Channel, conditionResults []alerting.ConditionResult) []alerting.ActionExecutionResult {
|
func performChannels(channels []alerting.Channel, conditionResults []alerting.ConditionResultItem) []alerting.ActionExecutionResult {
|
||||||
var message string
|
var message string
|
||||||
for _, conditionResult := range conditionResults {
|
for _, conditionResult := range conditionResults {
|
||||||
message += fmt.Sprintf("severity: %s\t message:%s\t groups:%v\t timestamp: %v;", conditionResult.ConditionItem.Severity, conditionResult.ConditionItem.Message, conditionResult.GroupValues, time.Now())
|
message += fmt.Sprintf("severity: %s\t message:%s\t groups:%v\t timestamp: %v;", conditionResult.ConditionItem.Severity, conditionResult.ConditionItem.Message, conditionResult.GroupValues, time.Now())
|
||||||
|
@ -632,6 +671,12 @@ func performChannel(channel *alerting.Channel, ctx []byte) ([]byte, error) {
|
||||||
}
|
}
|
||||||
func (engine *Engine) GenerateTask(rule *alerting.Rule) func(ctx context.Context) {
|
func (engine *Engine) GenerateTask(rule *alerting.Rule) func(ctx context.Context) {
|
||||||
return func(ctx context.Context) {
|
return func(ctx context.Context) {
|
||||||
|
defer func() {
|
||||||
|
if err := recover(); err != nil {
|
||||||
|
log.Error(err)
|
||||||
|
debug.PrintStack()
|
||||||
|
}
|
||||||
|
}()
|
||||||
err := engine.Do(rule)
|
err := engine.Do(rule)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Error(err)
|
log.Error(err)
|
||||||
|
|
|
@ -126,6 +126,57 @@ func TestGenerateAgg(t *testing.T) {
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestGeneratePercentilesAggQuery(t *testing.T) {
|
func TestGeneratePercentilesAggQuery(t *testing.T) {
|
||||||
|
//rule := alerting.Rule{
|
||||||
|
// ID: util.GetUUID(),
|
||||||
|
// Created: time.Now(),
|
||||||
|
// Updated: time.Now(),
|
||||||
|
// Enabled: true,
|
||||||
|
// Resource: alerting.Resource{
|
||||||
|
// ID: "c8i18llath2blrusdjng",
|
||||||
|
// Type: "elasticsearch",
|
||||||
|
// Objects: []string{".infini_metrics*"},
|
||||||
|
// TimeField: "timestamp",
|
||||||
|
// RawFilter: map[string]interface{}{
|
||||||
|
// "match_all": util.MapStr{
|
||||||
|
//
|
||||||
|
// },
|
||||||
|
// },
|
||||||
|
// },
|
||||||
|
//
|
||||||
|
// Metrics: alerting.Metric{
|
||||||
|
// PeriodInterval: "1m",
|
||||||
|
// MaxPeriods: 15,
|
||||||
|
// Items: []alerting.MetricItem{
|
||||||
|
// {Name: "a", Field: "payload.elasticsearch.node_stats.os.cpu.percent", Statistic: "p99", Group: []string{"metadata.labels.cluster_id", "metadata.labels.node_id"}},
|
||||||
|
// },
|
||||||
|
// },
|
||||||
|
// Conditions: alerting.Condition{
|
||||||
|
// Operator: "any",
|
||||||
|
// Items: []alerting.ConditionItem{
|
||||||
|
// {MinimumPeriodMatch: 5, Operator: "gte", Values: []string{"90"}, Severity: "error", Message: "cpu使用率大于90%"},
|
||||||
|
// },
|
||||||
|
// },
|
||||||
|
//
|
||||||
|
// Channels: alerting.RuleChannel{
|
||||||
|
// Normal: []alerting.Channel{
|
||||||
|
// {Name: "钉钉", Type: alerting.ChannelWebhook, Webhook: &alerting.CustomWebhook{
|
||||||
|
// HeaderParams: map[string]string{
|
||||||
|
// "Content-Type": "application/json",
|
||||||
|
// },
|
||||||
|
// Body: `{"msgtype": "text","text": {"content":"告警通知: {{ctx.message}}"}}`,
|
||||||
|
// Method: http.MethodPost,
|
||||||
|
// URL: "https://oapi.dingtalk.com/robot/send?access_token=XXXXXX",
|
||||||
|
// }},
|
||||||
|
// },
|
||||||
|
// ThrottlePeriod: "1h",
|
||||||
|
// AcceptTimeRange: alerting.TimeRange{
|
||||||
|
// Start: "8:00",
|
||||||
|
// End: "21:00",
|
||||||
|
// },
|
||||||
|
// EscalationEnabled: true,
|
||||||
|
// EscalationThrottlePeriod: "30m",
|
||||||
|
// },
|
||||||
|
//}
|
||||||
rule := alerting.Rule{
|
rule := alerting.Rule{
|
||||||
ID: util.GetUUID(),
|
ID: util.GetUUID(),
|
||||||
Created: time.Now(),
|
Created: time.Now(),
|
||||||
|
@ -137,8 +188,16 @@ func TestGeneratePercentilesAggQuery(t *testing.T) {
|
||||||
Objects: []string{".infini_metrics*"},
|
Objects: []string{".infini_metrics*"},
|
||||||
TimeField: "timestamp",
|
TimeField: "timestamp",
|
||||||
RawFilter: map[string]interface{}{
|
RawFilter: map[string]interface{}{
|
||||||
"match_all": util.MapStr{
|
"bool": map[string]interface{}{
|
||||||
|
"must": []interface{}{
|
||||||
|
util.MapStr{
|
||||||
|
"term": util.MapStr{
|
||||||
|
"metadata.name": util.MapStr{
|
||||||
|
"value": "index_stats",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
@ -147,13 +206,15 @@ func TestGeneratePercentilesAggQuery(t *testing.T) {
|
||||||
PeriodInterval: "1m",
|
PeriodInterval: "1m",
|
||||||
MaxPeriods: 15,
|
MaxPeriods: 15,
|
||||||
Items: []alerting.MetricItem{
|
Items: []alerting.MetricItem{
|
||||||
{Name: "a", Field: "payload.elasticsearch.node_stats.os.cpu.percent", Statistic: "p99", Group: []string{"metadata.labels.cluster_id", "metadata.labels.node_id"}},
|
{Name: "a", Field: "payload.elasticsearch.index_stats.total.search.query_total", Statistic: "rate", Group: []string{"metadata.labels.cluster_id"}},
|
||||||
|
{Name: "b", Field: "payload.elasticsearch.index_stats.total.search.query_time_in_millis", Statistic: "rate", Group: []string{"metadata.labels.cluster_id"}},
|
||||||
},
|
},
|
||||||
|
Formula: "b/a",
|
||||||
},
|
},
|
||||||
Conditions: alerting.Condition{
|
Conditions: alerting.Condition{
|
||||||
Operator: "any",
|
Operator: "any",
|
||||||
Items: []alerting.ConditionItem{
|
Items: []alerting.ConditionItem{
|
||||||
{MinimumPeriodMatch: 5, Operator: "gte", Values: []string{"90"}, Severity: "error", Message: "cpu使用率大于90%"},
|
{MinimumPeriodMatch: 1, Operator: "gte", Values: []string{"10"}, Severity: "warning", Message: "搜索延迟大于10ms"},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
|
||||||
|
@ -170,7 +231,7 @@ func TestGeneratePercentilesAggQuery(t *testing.T) {
|
||||||
},
|
},
|
||||||
ThrottlePeriod: "1h",
|
ThrottlePeriod: "1h",
|
||||||
AcceptTimeRange: alerting.TimeRange{
|
AcceptTimeRange: alerting.TimeRange{
|
||||||
Start: "8:00",
|
Start: "08:00",
|
||||||
End: "21:00",
|
End: "21:00",
|
||||||
},
|
},
|
||||||
EscalationEnabled: true,
|
EscalationEnabled: true,
|
||||||
|
@ -209,11 +270,13 @@ func TestConvertFilterQuery(t *testing.T) {
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
var targetDsl = `{"bool":{"must":[{"term":{"metadata.category":{"value":"elasticsearch"}}},{"terms":{"metadata.name":["index_stats","node_stats"]}},{"bool":{"must_not":[{"range":{"timestamp":{"gt":"2022-04-16T16:16:39.168605+08:00"}}}]}}]}}`
|
||||||
eng := &Engine{}
|
eng := &Engine{}
|
||||||
q, err := eng.ConvertFilterQueryToDsl(&fq)
|
q, err := eng.ConvertFilterQueryToDsl(&fq)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
fmt.Println(util.MustToJSON(q))
|
if dsl := util.MustToJSON(q); dsl != targetDsl {
|
||||||
|
t.Errorf("expect dsl %s but got %s", targetDsl, dsl)
|
||||||
|
}
|
||||||
}
|
}
|
|
@ -13,8 +13,8 @@ import (
|
||||||
|
|
||||||
type Engine interface {
|
type Engine interface {
|
||||||
GenerateQuery(rule *alerting.Rule) (interface{}, error)
|
GenerateQuery(rule *alerting.Rule) (interface{}, error)
|
||||||
ExecuteQuery(rule *alerting.Rule)([]alerting.MetricData, error)
|
ExecuteQuery(rule *alerting.Rule)(*alerting.QueryResult, error)
|
||||||
CheckCondition(rule *alerting.Rule)([]alerting.ConditionResult, error)
|
CheckCondition(rule *alerting.Rule)(*alerting.ConditionResult, error)
|
||||||
GenerateTask(rule *alerting.Rule) func(ctx context.Context)
|
GenerateTask(rule *alerting.Rule) func(ctx context.Context)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue