add alert recovery notification
This commit is contained in:
parent
464a2f9bea
commit
0f4bb211d5
|
@ -13,6 +13,8 @@ type Channel struct {
|
||||||
Priority int `json:"priority,omitempty"`
|
Priority int `json:"priority,omitempty"`
|
||||||
Webhook *CustomWebhook `json:"webhook,omitempty" elastic_mapping:"webhook:{type:object}"`
|
Webhook *CustomWebhook `json:"webhook,omitempty" elastic_mapping:"webhook:{type:object}"`
|
||||||
SearchText string `json:"-" elastic_mapping:"search_text:{type:text,index_prefixes:{},index_phrases:true, analyzer:suggest_text_search }"`
|
SearchText string `json:"-" elastic_mapping:"search_text:{type:text,index_prefixes:{},index_phrases:true, analyzer:suggest_text_search }"`
|
||||||
|
SubType string `json:"sub_type" elastic_mapping:"sub_type:{type:keyword,copy_to:search_text}"`
|
||||||
|
Email *Email `json:"email,omitempty" elastic_mapping:"email:{type:object}"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -13,9 +13,9 @@ import (
|
||||||
|
|
||||||
type Metric struct {
|
type Metric struct {
|
||||||
insight.Metric
|
insight.Metric
|
||||||
Expression string `json:"expression" elastic_mapping:"expression:{type:keyword,copy_to:search_text}"` //告警表达式,自动生成 eg: avg(cpu) > 80
|
Title string `json:"title,omitempty"` //text template
|
||||||
Title string `json:"title"` //text template
|
Message string `json:"message,omitempty"` // text template
|
||||||
Message string `json:"message"` // text template
|
Expression string `json:"expression,omitempty" elastic_mapping:"expression:{type:keyword,copy_to:search_text}"` //告警表达式,自动生成 eg: avg(cpu) > 80
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -17,9 +17,11 @@ type Rule struct {
|
||||||
Enabled bool `json:"enabled" elastic_mapping:"enabled:{type:keyword}"`
|
Enabled bool `json:"enabled" elastic_mapping:"enabled:{type:keyword}"`
|
||||||
Resource Resource `json:"resource" elastic_mapping:"resource:{type:object}"`
|
Resource Resource `json:"resource" elastic_mapping:"resource:{type:object}"`
|
||||||
Metrics Metric `json:"metrics" elastic_mapping:"metrics:{type:object}"`
|
Metrics Metric `json:"metrics" elastic_mapping:"metrics:{type:object}"`
|
||||||
Conditions Condition `json:"conditions" elastic_mapping:"conditions:{type:object}"`
|
Conditions Condition `json:"conditions" elastic_mapping:"conditions:{type:object}"`
|
||||||
Channels RuleChannel `json:"channels" elastic_mapping:"channels:{type:object}"`
|
Channels NotificationConfig `json:"channels,omitempty" elastic_mapping:"channels:{type:object}"`
|
||||||
Schedule Schedule `json:"schedule" elastic_mapping:"schedule:{type:object}"`
|
NotificationConfig *NotificationConfig `json:"notification_config,omitempty" elastic_mapping:"notification_config:{type:object}"`
|
||||||
|
RecoveryNotificationConfig *RecoveryNotificationConfig `json:"recovery_notification_config,omitempty" elastic_mapping:"recovery_notification_config:{type:object}"`
|
||||||
|
Schedule Schedule `json:"schedule" elastic_mapping:"schedule:{type:object}"`
|
||||||
LastNotificationTime time.Time `json:"-" elastic_mapping:"last_notification_time:{type:date}"`
|
LastNotificationTime time.Time `json:"-" elastic_mapping:"last_notification_time:{type:date}"`
|
||||||
LastTermStartTime time.Time `json:"-"` //标识最近一轮告警的开始时间
|
LastTermStartTime time.Time `json:"-"` //标识最近一轮告警的开始时间
|
||||||
LastEscalationTime time.Time `json:"-"` //标识最近一次告警升级发送通知的时间
|
LastEscalationTime time.Time `json:"-"` //标识最近一次告警升级发送通知的时间
|
||||||
|
@ -54,9 +56,24 @@ func (rule *Rule) GetOrInitExpression() (string, error){
|
||||||
rule.Expression = strings.ReplaceAll(sb.String(), "result", metricExp)
|
rule.Expression = strings.ReplaceAll(sb.String(), "result", metricExp)
|
||||||
return rule.Expression, nil
|
return rule.Expression, nil
|
||||||
}
|
}
|
||||||
|
//GetNotificationConfig for adapter old version config
|
||||||
|
func (rule *Rule) GetNotificationConfig() *NotificationConfig {
|
||||||
|
if rule.NotificationConfig != nil {
|
||||||
|
return rule.NotificationConfig
|
||||||
|
}
|
||||||
|
return &rule.Channels
|
||||||
|
}
|
||||||
|
func (rule *Rule) GetNotificationTitleAndMessage() (string, string) {
|
||||||
|
if rule.NotificationConfig != nil {
|
||||||
|
return rule.NotificationConfig.Title, rule.NotificationConfig.Message
|
||||||
|
}
|
||||||
|
return rule.Metrics.Title, rule.Metrics.Message
|
||||||
|
}
|
||||||
|
|
||||||
type RuleChannel struct {
|
type NotificationConfig struct {
|
||||||
Enabled bool `json:"enabled"`
|
Enabled bool `json:"enabled"`
|
||||||
|
Title string `json:"title,omitempty"` //text template
|
||||||
|
Message string `json:"message,omitempty"` // text template
|
||||||
Normal []Channel `json:"normal,omitempty"`
|
Normal []Channel `json:"normal,omitempty"`
|
||||||
Escalation []Channel `json:"escalation,omitempty"`
|
Escalation []Channel `json:"escalation,omitempty"`
|
||||||
ThrottlePeriod string `json:"throttle_period,omitempty"` //沉默周期
|
ThrottlePeriod string `json:"throttle_period,omitempty"` //沉默周期
|
||||||
|
@ -65,6 +82,14 @@ type RuleChannel struct {
|
||||||
EscalationEnabled bool `json:"escalation_enabled,omitempty"`
|
EscalationEnabled bool `json:"escalation_enabled,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type RecoveryNotificationConfig struct {
|
||||||
|
Enabled bool `json:"enabled"`
|
||||||
|
Title string `json:"title"` //text template
|
||||||
|
Message string `json:"message"` // text template
|
||||||
|
AcceptTimeRange TimeRange `json:"accept_time_range,omitempty"`
|
||||||
|
Channels []Channel `json:"channels,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
type MessageTemplate struct{
|
type MessageTemplate struct{
|
||||||
Type string `json:"type"`
|
Type string `json:"type"`
|
||||||
Source string `json:"source"`
|
Source string `json:"source"`
|
||||||
|
|
|
@ -75,7 +75,7 @@ func TestCreateRule( t *testing.T) {
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
|
||||||
Channels: RuleChannel{
|
Channels: NotificationConfig{
|
||||||
Normal: []Channel{
|
Normal: []Channel{
|
||||||
{Name: "钉钉", Type: ChannelWebhook, Webhook: &CustomWebhook{
|
{Name: "钉钉", Type: ChannelWebhook, Webhook: &CustomWebhook{
|
||||||
HeaderParams: map[string]string{
|
HeaderParams: map[string]string{
|
||||||
|
|
|
@ -10,3 +10,14 @@ type CustomWebhook struct {
|
||||||
URL string `json:"url,omitempty"`
|
URL string `json:"url,omitempty"`
|
||||||
Body string `json:"body" elastic_mapping:"body:{type:text}"`
|
Body string `json:"body" elastic_mapping:"body:{type:text}"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type Email struct {
|
||||||
|
ServerID string `json:"server_id"`
|
||||||
|
Recipients struct {
|
||||||
|
To []string `json:"to" elastic_mapping:"to:{type:keyword}"`
|
||||||
|
CC []string `json:"cc" elastic_mapping:"cc:{type:keyword}"`
|
||||||
|
BCC []string `json:"bcc" elastic_mapping:"bcc:{type:keyword}"`
|
||||||
|
} `json:"recipients" elastic_mapping:"recipients:{type:object}"`
|
||||||
|
Subject string `json:"subject" elastic_mapping:"subject:{type:text}"`
|
||||||
|
Body string `json:"body" elastic_mapping:"body:{type:text}"`
|
||||||
|
}
|
|
@ -0,0 +1,34 @@
|
||||||
|
/* Copyright © INFINI Ltd. All rights reserved.
|
||||||
|
* Web: https://infinilabs.com
|
||||||
|
* Email: hello#infini.ltd */
|
||||||
|
|
||||||
|
package action
|
||||||
|
|
||||||
|
import (
|
||||||
|
"infini.sh/console/model/alerting"
|
||||||
|
"infini.sh/framework/core/queue"
|
||||||
|
"infini.sh/framework/core/util"
|
||||||
|
)
|
||||||
|
|
||||||
|
type EmailAction struct {
|
||||||
|
Data *alerting.Email
|
||||||
|
Subject string
|
||||||
|
Body string
|
||||||
|
}
|
||||||
|
|
||||||
|
const EmailQueueName = "alert_email_messages"
|
||||||
|
|
||||||
|
func (act *EmailAction) Execute()([]byte, error){
|
||||||
|
queueCfg := queue.GetOrInitConfig(EmailQueueName)
|
||||||
|
emailMsg := util.MapStr{
|
||||||
|
"email": act.Data.Recipients.To,
|
||||||
|
"template": "raw",
|
||||||
|
"variables": util.MapStr{
|
||||||
|
"subject": act.Subject,
|
||||||
|
"body": act.Body,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
emailMsgBytes := util.MustToJSONBytes(emailMsg)
|
||||||
|
err := queue.Push(queueCfg, emailMsgBytes)
|
||||||
|
return nil, err
|
||||||
|
}
|
|
@ -679,6 +679,22 @@ func (engine *Engine) Do(rule *alerting.Rule) error {
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("save alert message error: %w", err)
|
return fmt.Errorf("save alert message error: %w", err)
|
||||||
}
|
}
|
||||||
|
// send recover message to channel
|
||||||
|
recoverCfg := rule.RecoveryNotificationConfig
|
||||||
|
if recoverCfg != nil && recoverCfg.Enabled {
|
||||||
|
if recoverCfg.AcceptTimeRange.Include(time.Now()) {
|
||||||
|
paramsCtx = newParameterCtx(rule, checkResults, util.MapStr{
|
||||||
|
alerting2.ParamEventID: alertItem.ID,
|
||||||
|
alerting2.ParamTimestamp: alertItem.Created.Unix(),
|
||||||
|
})
|
||||||
|
err = attachTitleMessageToCtx(recoverCfg.Title, recoverCfg.Message, paramsCtx)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
actionResults, _ := performChannels(recoverCfg.Channels, paramsCtx)
|
||||||
|
alertItem.ActionExecutionResults = actionResults
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
@ -698,7 +714,8 @@ func (engine *Engine) Do(rule *alerting.Rule) error {
|
||||||
})
|
})
|
||||||
|
|
||||||
alertItem.Priority = priority
|
alertItem.Priority = priority
|
||||||
err = attachTitleMessageToCtx(rule, paramsCtx)
|
title, message := rule.GetNotificationTitleAndMessage()
|
||||||
|
err = attachTitleMessageToCtx(title, message, paramsCtx)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
@ -707,7 +724,7 @@ func (engine *Engine) Do(rule *alerting.Rule) error {
|
||||||
if alertMessage == nil || alertMessage.Status == alerting.MessageStateRecovered {
|
if alertMessage == nil || alertMessage.Status == alerting.MessageStateRecovered {
|
||||||
msg := &alerting.AlertMessage{
|
msg := &alerting.AlertMessage{
|
||||||
RuleID: rule.ID,
|
RuleID: rule.ID,
|
||||||
Created: time.Now(),
|
Created: alertItem.Created,
|
||||||
Updated: time.Now(),
|
Updated: time.Now(),
|
||||||
ID: util.GetUUID(),
|
ID: util.GetUUID(),
|
||||||
ResourceID: rule.Resource.ID,
|
ResourceID: rule.Resource.ID,
|
||||||
|
@ -756,12 +773,13 @@ func (engine *Engine) Do(rule *alerting.Rule) error {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
// if channel is not enabled return
|
// if channel is not enabled return
|
||||||
if !rule.Channels.Enabled {
|
notifyCfg := rule.GetNotificationConfig()
|
||||||
|
if notifyCfg == nil || !notifyCfg.Enabled {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
if rule.Channels.AcceptTimeRange.Include(time.Now()) {
|
if notifyCfg.AcceptTimeRange.Include(time.Now()) {
|
||||||
periodDuration, err := time.ParseDuration(rule.Channels.ThrottlePeriod)
|
periodDuration, err := time.ParseDuration(notifyCfg.ThrottlePeriod)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
alertItem.Error = err.Error()
|
alertItem.Error = err.Error()
|
||||||
return err
|
return err
|
||||||
|
@ -787,7 +805,7 @@ func (engine *Engine) Do(rule *alerting.Rule) error {
|
||||||
}
|
}
|
||||||
|
|
||||||
if alertMessage == nil || period > periodDuration {
|
if alertMessage == nil || period > periodDuration {
|
||||||
actionResults, errCount := performChannels(rule.Channels.Normal, paramsCtx)
|
actionResults, errCount := performChannels(notifyCfg.Normal, paramsCtx)
|
||||||
alertItem.ActionExecutionResults = actionResults
|
alertItem.ActionExecutionResults = actionResults
|
||||||
//change and save last notification time in local kv store when action error count equals zero
|
//change and save last notification time in local kv store when action error count equals zero
|
||||||
if errCount == 0 {
|
if errCount == 0 {
|
||||||
|
@ -798,8 +816,8 @@ func (engine *Engine) Do(rule *alerting.Rule) error {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if rule.Channels.EscalationEnabled {
|
if notifyCfg.EscalationEnabled {
|
||||||
throttlePeriod, err := time.ParseDuration(rule.Channels.EscalationThrottlePeriod)
|
throttlePeriod, err := time.ParseDuration(notifyCfg.EscalationThrottlePeriod)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
@ -819,7 +837,7 @@ func (engine *Engine) Do(rule *alerting.Rule) error {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if time.Now().Sub(rule.LastEscalationTime.Local()) > periodDuration {
|
if time.Now().Sub(rule.LastEscalationTime.Local()) > periodDuration {
|
||||||
actionResults, errCount := performChannels(rule.Channels.Escalation, paramsCtx)
|
actionResults, errCount := performChannels(notifyCfg.Escalation, paramsCtx)
|
||||||
alertItem.ActionExecutionResults = actionResults
|
alertItem.ActionExecutionResults = actionResults
|
||||||
//todo init last escalation time when create task (by last alert item is escalated)
|
//todo init last escalation time when create task (by last alert item is escalated)
|
||||||
if errCount == 0 {
|
if errCount == 0 {
|
||||||
|
@ -836,17 +854,17 @@ func (engine *Engine) Do(rule *alerting.Rule) error {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func attachTitleMessageToCtx(rule *alerting.Rule, paramsCtx map[string]interface{}) error{
|
func attachTitleMessageToCtx(title, message string, paramsCtx map[string]interface{}) error{
|
||||||
var (
|
var (
|
||||||
tplBytes []byte
|
tplBytes []byte
|
||||||
err error
|
err error
|
||||||
)
|
)
|
||||||
tplBytes, err = resolveMessage(rule.Metrics.Message, paramsCtx)
|
tplBytes, err = resolveMessage(message, paramsCtx)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("resolve message template error: %w", err)
|
return fmt.Errorf("resolve message template error: %w", err)
|
||||||
}
|
}
|
||||||
paramsCtx[alerting2.ParamMessage] = string(tplBytes)
|
paramsCtx[alerting2.ParamMessage] = string(tplBytes)
|
||||||
tplBytes, err = resolveMessage(rule.Metrics.Title, paramsCtx)
|
tplBytes, err = resolveMessage(title, paramsCtx)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("resolve title template error: %w", err)
|
return fmt.Errorf("resolve title template error: %w", err)
|
||||||
}
|
}
|
||||||
|
@ -927,7 +945,8 @@ func (engine *Engine) Test(rule *alerting.Rule) ([]alerting.ActionExecutionResul
|
||||||
alerting2.ParamEventID: util.GetUUID(),
|
alerting2.ParamEventID: util.GetUUID(),
|
||||||
alerting2.ParamTimestamp: time.Now().Unix(),
|
alerting2.ParamTimestamp: time.Now().Unix(),
|
||||||
} )
|
} )
|
||||||
err = attachTitleMessageToCtx(rule, paramsCtx)
|
title, message := rule.GetNotificationTitleAndMessage()
|
||||||
|
err = attachTitleMessageToCtx(title, message, paramsCtx)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
@ -997,6 +1016,20 @@ func performChannel(channel *alerting.Channel, ctx map[string]interface{}) ([]by
|
||||||
Data: &wh,
|
Data: &wh,
|
||||||
Message: string(message),
|
Message: string(message),
|
||||||
}
|
}
|
||||||
|
case alerting.ChannelEmail:
|
||||||
|
message, err = resolveMessage(channel.Email.Body, ctx)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err, message
|
||||||
|
}
|
||||||
|
subjectBytes, err := resolveMessage(channel.Email.Subject, ctx)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err, nil
|
||||||
|
}
|
||||||
|
act = &action.EmailAction{
|
||||||
|
Data: channel.Email,
|
||||||
|
Subject: string(subjectBytes),
|
||||||
|
Body: string(message),
|
||||||
|
}
|
||||||
default:
|
default:
|
||||||
return nil, fmt.Errorf("unsupported action type: %s", channel.Type), message
|
return nil, fmt.Errorf("unsupported action type: %s", channel.Type), message
|
||||||
}
|
}
|
||||||
|
|
|
@ -70,7 +70,7 @@ func TestEngine( t *testing.T) {
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
|
||||||
Channels: alerting.RuleChannel{
|
Channels: alerting.NotificationConfig{
|
||||||
Normal: []alerting.Channel{
|
Normal: []alerting.Channel{
|
||||||
{Name: "钉钉", Type: alerting.ChannelWebhook, Webhook: &alerting.CustomWebhook{
|
{Name: "钉钉", Type: alerting.ChannelWebhook, Webhook: &alerting.CustomWebhook{
|
||||||
HeaderParams: map[string]string{
|
HeaderParams: map[string]string{
|
||||||
|
@ -156,7 +156,7 @@ func TestGeneratePercentilesAggQuery(t *testing.T) {
|
||||||
// },
|
// },
|
||||||
// },
|
// },
|
||||||
//
|
//
|
||||||
// Channels: alerting.RuleChannel{
|
// Channels: alerting.NotificationConfig{
|
||||||
// Normal: []alerting.Channel{
|
// Normal: []alerting.Channel{
|
||||||
// {Name: "钉钉", Type: alerting.ChannelWebhook, Webhook: &alerting.CustomWebhook{
|
// {Name: "钉钉", Type: alerting.ChannelWebhook, Webhook: &alerting.CustomWebhook{
|
||||||
// HeaderParams: map[string]string{
|
// HeaderParams: map[string]string{
|
||||||
|
@ -216,7 +216,7 @@ func TestGeneratePercentilesAggQuery(t *testing.T) {
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
|
||||||
Channels: alerting.RuleChannel{
|
Channels: alerting.NotificationConfig{
|
||||||
Normal: []alerting.Channel{
|
Normal: []alerting.Channel{
|
||||||
{Name: "钉钉", Type: alerting.ChannelWebhook, Webhook: &alerting.CustomWebhook{
|
{Name: "钉钉", Type: alerting.ChannelWebhook, Webhook: &alerting.CustomWebhook{
|
||||||
HeaderParams: map[string]string{
|
HeaderParams: map[string]string{
|
||||||
|
|
Loading…
Reference in New Issue