add alert recovery notification
This commit is contained in:
parent
464a2f9bea
commit
0f4bb211d5
|
@ -13,6 +13,8 @@ type Channel struct {
|
|||
Priority int `json:"priority,omitempty"`
|
||||
Webhook *CustomWebhook `json:"webhook,omitempty" elastic_mapping:"webhook:{type:object}"`
|
||||
SearchText string `json:"-" elastic_mapping:"search_text:{type:text,index_prefixes:{},index_phrases:true, analyzer:suggest_text_search }"`
|
||||
SubType string `json:"sub_type" elastic_mapping:"sub_type:{type:keyword,copy_to:search_text}"`
|
||||
Email *Email `json:"email,omitempty" elastic_mapping:"email:{type:object}"`
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -13,9 +13,9 @@ import (
|
|||
|
||||
type Metric struct {
|
||||
insight.Metric
|
||||
Expression string `json:"expression" elastic_mapping:"expression:{type:keyword,copy_to:search_text}"` //告警表达式,自动生成 eg: avg(cpu) > 80
|
||||
Title string `json:"title"` //text template
|
||||
Message string `json:"message"` // text template
|
||||
Title string `json:"title,omitempty"` //text template
|
||||
Message string `json:"message,omitempty"` // text template
|
||||
Expression string `json:"expression,omitempty" elastic_mapping:"expression:{type:keyword,copy_to:search_text}"` //告警表达式,自动生成 eg: avg(cpu) > 80
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -18,7 +18,9 @@ type Rule struct {
|
|||
Resource Resource `json:"resource" elastic_mapping:"resource:{type:object}"`
|
||||
Metrics Metric `json:"metrics" elastic_mapping:"metrics:{type:object}"`
|
||||
Conditions Condition `json:"conditions" elastic_mapping:"conditions:{type:object}"`
|
||||
Channels RuleChannel `json:"channels" elastic_mapping:"channels:{type:object}"`
|
||||
Channels NotificationConfig `json:"channels,omitempty" elastic_mapping:"channels:{type:object}"`
|
||||
NotificationConfig *NotificationConfig `json:"notification_config,omitempty" elastic_mapping:"notification_config:{type:object}"`
|
||||
RecoveryNotificationConfig *RecoveryNotificationConfig `json:"recovery_notification_config,omitempty" elastic_mapping:"recovery_notification_config:{type:object}"`
|
||||
Schedule Schedule `json:"schedule" elastic_mapping:"schedule:{type:object}"`
|
||||
LastNotificationTime time.Time `json:"-" elastic_mapping:"last_notification_time:{type:date}"`
|
||||
LastTermStartTime time.Time `json:"-"` //标识最近一轮告警的开始时间
|
||||
|
@ -54,9 +56,24 @@ func (rule *Rule) GetOrInitExpression() (string, error){
|
|||
rule.Expression = strings.ReplaceAll(sb.String(), "result", metricExp)
|
||||
return rule.Expression, nil
|
||||
}
|
||||
//GetNotificationConfig for adapter old version config
|
||||
func (rule *Rule) GetNotificationConfig() *NotificationConfig {
|
||||
if rule.NotificationConfig != nil {
|
||||
return rule.NotificationConfig
|
||||
}
|
||||
return &rule.Channels
|
||||
}
|
||||
func (rule *Rule) GetNotificationTitleAndMessage() (string, string) {
|
||||
if rule.NotificationConfig != nil {
|
||||
return rule.NotificationConfig.Title, rule.NotificationConfig.Message
|
||||
}
|
||||
return rule.Metrics.Title, rule.Metrics.Message
|
||||
}
|
||||
|
||||
type RuleChannel struct {
|
||||
type NotificationConfig struct {
|
||||
Enabled bool `json:"enabled"`
|
||||
Title string `json:"title,omitempty"` //text template
|
||||
Message string `json:"message,omitempty"` // text template
|
||||
Normal []Channel `json:"normal,omitempty"`
|
||||
Escalation []Channel `json:"escalation,omitempty"`
|
||||
ThrottlePeriod string `json:"throttle_period,omitempty"` //沉默周期
|
||||
|
@ -65,6 +82,14 @@ type RuleChannel struct {
|
|||
EscalationEnabled bool `json:"escalation_enabled,omitempty"`
|
||||
}
|
||||
|
||||
type RecoveryNotificationConfig struct {
|
||||
Enabled bool `json:"enabled"`
|
||||
Title string `json:"title"` //text template
|
||||
Message string `json:"message"` // text template
|
||||
AcceptTimeRange TimeRange `json:"accept_time_range,omitempty"`
|
||||
Channels []Channel `json:"channels,omitempty"`
|
||||
}
|
||||
|
||||
type MessageTemplate struct{
|
||||
Type string `json:"type"`
|
||||
Source string `json:"source"`
|
||||
|
|
|
@ -75,7 +75,7 @@ func TestCreateRule( t *testing.T) {
|
|||
},
|
||||
},
|
||||
|
||||
Channels: RuleChannel{
|
||||
Channels: NotificationConfig{
|
||||
Normal: []Channel{
|
||||
{Name: "钉钉", Type: ChannelWebhook, Webhook: &CustomWebhook{
|
||||
HeaderParams: map[string]string{
|
||||
|
|
|
@ -10,3 +10,14 @@ type CustomWebhook struct {
|
|||
URL string `json:"url,omitempty"`
|
||||
Body string `json:"body" elastic_mapping:"body:{type:text}"`
|
||||
}
|
||||
|
||||
type Email struct {
|
||||
ServerID string `json:"server_id"`
|
||||
Recipients struct {
|
||||
To []string `json:"to" elastic_mapping:"to:{type:keyword}"`
|
||||
CC []string `json:"cc" elastic_mapping:"cc:{type:keyword}"`
|
||||
BCC []string `json:"bcc" elastic_mapping:"bcc:{type:keyword}"`
|
||||
} `json:"recipients" elastic_mapping:"recipients:{type:object}"`
|
||||
Subject string `json:"subject" elastic_mapping:"subject:{type:text}"`
|
||||
Body string `json:"body" elastic_mapping:"body:{type:text}"`
|
||||
}
|
|
@ -0,0 +1,34 @@
|
|||
/* Copyright © INFINI Ltd. All rights reserved.
|
||||
* Web: https://infinilabs.com
|
||||
* Email: hello#infini.ltd */
|
||||
|
||||
package action
|
||||
|
||||
import (
|
||||
"infini.sh/console/model/alerting"
|
||||
"infini.sh/framework/core/queue"
|
||||
"infini.sh/framework/core/util"
|
||||
)
|
||||
|
||||
type EmailAction struct {
|
||||
Data *alerting.Email
|
||||
Subject string
|
||||
Body string
|
||||
}
|
||||
|
||||
const EmailQueueName = "alert_email_messages"
|
||||
|
||||
func (act *EmailAction) Execute()([]byte, error){
|
||||
queueCfg := queue.GetOrInitConfig(EmailQueueName)
|
||||
emailMsg := util.MapStr{
|
||||
"email": act.Data.Recipients.To,
|
||||
"template": "raw",
|
||||
"variables": util.MapStr{
|
||||
"subject": act.Subject,
|
||||
"body": act.Body,
|
||||
},
|
||||
}
|
||||
emailMsgBytes := util.MustToJSONBytes(emailMsg)
|
||||
err := queue.Push(queueCfg, emailMsgBytes)
|
||||
return nil, err
|
||||
}
|
|
@ -679,6 +679,22 @@ func (engine *Engine) Do(rule *alerting.Rule) error {
|
|||
if err != nil {
|
||||
return fmt.Errorf("save alert message error: %w", err)
|
||||
}
|
||||
// send recover message to channel
|
||||
recoverCfg := rule.RecoveryNotificationConfig
|
||||
if recoverCfg != nil && recoverCfg.Enabled {
|
||||
if recoverCfg.AcceptTimeRange.Include(time.Now()) {
|
||||
paramsCtx = newParameterCtx(rule, checkResults, util.MapStr{
|
||||
alerting2.ParamEventID: alertItem.ID,
|
||||
alerting2.ParamTimestamp: alertItem.Created.Unix(),
|
||||
})
|
||||
err = attachTitleMessageToCtx(recoverCfg.Title, recoverCfg.Message, paramsCtx)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
actionResults, _ := performChannels(recoverCfg.Channels, paramsCtx)
|
||||
alertItem.ActionExecutionResults = actionResults
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
@ -698,7 +714,8 @@ func (engine *Engine) Do(rule *alerting.Rule) error {
|
|||
})
|
||||
|
||||
alertItem.Priority = priority
|
||||
err = attachTitleMessageToCtx(rule, paramsCtx)
|
||||
title, message := rule.GetNotificationTitleAndMessage()
|
||||
err = attachTitleMessageToCtx(title, message, paramsCtx)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
@ -707,7 +724,7 @@ func (engine *Engine) Do(rule *alerting.Rule) error {
|
|||
if alertMessage == nil || alertMessage.Status == alerting.MessageStateRecovered {
|
||||
msg := &alerting.AlertMessage{
|
||||
RuleID: rule.ID,
|
||||
Created: time.Now(),
|
||||
Created: alertItem.Created,
|
||||
Updated: time.Now(),
|
||||
ID: util.GetUUID(),
|
||||
ResourceID: rule.Resource.ID,
|
||||
|
@ -756,12 +773,13 @@ func (engine *Engine) Do(rule *alerting.Rule) error {
|
|||
return nil
|
||||
}
|
||||
// if channel is not enabled return
|
||||
if !rule.Channels.Enabled {
|
||||
notifyCfg := rule.GetNotificationConfig()
|
||||
if notifyCfg == nil || !notifyCfg.Enabled {
|
||||
return nil
|
||||
}
|
||||
|
||||
if rule.Channels.AcceptTimeRange.Include(time.Now()) {
|
||||
periodDuration, err := time.ParseDuration(rule.Channels.ThrottlePeriod)
|
||||
if notifyCfg.AcceptTimeRange.Include(time.Now()) {
|
||||
periodDuration, err := time.ParseDuration(notifyCfg.ThrottlePeriod)
|
||||
if err != nil {
|
||||
alertItem.Error = err.Error()
|
||||
return err
|
||||
|
@ -787,7 +805,7 @@ func (engine *Engine) Do(rule *alerting.Rule) error {
|
|||
}
|
||||
|
||||
if alertMessage == nil || period > periodDuration {
|
||||
actionResults, errCount := performChannels(rule.Channels.Normal, paramsCtx)
|
||||
actionResults, errCount := performChannels(notifyCfg.Normal, paramsCtx)
|
||||
alertItem.ActionExecutionResults = actionResults
|
||||
//change and save last notification time in local kv store when action error count equals zero
|
||||
if errCount == 0 {
|
||||
|
@ -798,8 +816,8 @@ func (engine *Engine) Do(rule *alerting.Rule) error {
|
|||
}
|
||||
}
|
||||
|
||||
if rule.Channels.EscalationEnabled {
|
||||
throttlePeriod, err := time.ParseDuration(rule.Channels.EscalationThrottlePeriod)
|
||||
if notifyCfg.EscalationEnabled {
|
||||
throttlePeriod, err := time.ParseDuration(notifyCfg.EscalationThrottlePeriod)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
@ -819,7 +837,7 @@ func (engine *Engine) Do(rule *alerting.Rule) error {
|
|||
}
|
||||
}
|
||||
if time.Now().Sub(rule.LastEscalationTime.Local()) > periodDuration {
|
||||
actionResults, errCount := performChannels(rule.Channels.Escalation, paramsCtx)
|
||||
actionResults, errCount := performChannels(notifyCfg.Escalation, paramsCtx)
|
||||
alertItem.ActionExecutionResults = actionResults
|
||||
//todo init last escalation time when create task (by last alert item is escalated)
|
||||
if errCount == 0 {
|
||||
|
@ -836,17 +854,17 @@ func (engine *Engine) Do(rule *alerting.Rule) error {
|
|||
return nil
|
||||
}
|
||||
|
||||
func attachTitleMessageToCtx(rule *alerting.Rule, paramsCtx map[string]interface{}) error{
|
||||
func attachTitleMessageToCtx(title, message string, paramsCtx map[string]interface{}) error{
|
||||
var (
|
||||
tplBytes []byte
|
||||
err error
|
||||
)
|
||||
tplBytes, err = resolveMessage(rule.Metrics.Message, paramsCtx)
|
||||
tplBytes, err = resolveMessage(message, paramsCtx)
|
||||
if err != nil {
|
||||
return fmt.Errorf("resolve message template error: %w", err)
|
||||
}
|
||||
paramsCtx[alerting2.ParamMessage] = string(tplBytes)
|
||||
tplBytes, err = resolveMessage(rule.Metrics.Title, paramsCtx)
|
||||
tplBytes, err = resolveMessage(title, paramsCtx)
|
||||
if err != nil {
|
||||
return fmt.Errorf("resolve title template error: %w", err)
|
||||
}
|
||||
|
@ -927,7 +945,8 @@ func (engine *Engine) Test(rule *alerting.Rule) ([]alerting.ActionExecutionResul
|
|||
alerting2.ParamEventID: util.GetUUID(),
|
||||
alerting2.ParamTimestamp: time.Now().Unix(),
|
||||
} )
|
||||
err = attachTitleMessageToCtx(rule, paramsCtx)
|
||||
title, message := rule.GetNotificationTitleAndMessage()
|
||||
err = attachTitleMessageToCtx(title, message, paramsCtx)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
@ -997,6 +1016,20 @@ func performChannel(channel *alerting.Channel, ctx map[string]interface{}) ([]by
|
|||
Data: &wh,
|
||||
Message: string(message),
|
||||
}
|
||||
case alerting.ChannelEmail:
|
||||
message, err = resolveMessage(channel.Email.Body, ctx)
|
||||
if err != nil {
|
||||
return nil, err, message
|
||||
}
|
||||
subjectBytes, err := resolveMessage(channel.Email.Subject, ctx)
|
||||
if err != nil {
|
||||
return nil, err, nil
|
||||
}
|
||||
act = &action.EmailAction{
|
||||
Data: channel.Email,
|
||||
Subject: string(subjectBytes),
|
||||
Body: string(message),
|
||||
}
|
||||
default:
|
||||
return nil, fmt.Errorf("unsupported action type: %s", channel.Type), message
|
||||
}
|
||||
|
|
|
@ -70,7 +70,7 @@ func TestEngine( t *testing.T) {
|
|||
},
|
||||
},
|
||||
|
||||
Channels: alerting.RuleChannel{
|
||||
Channels: alerting.NotificationConfig{
|
||||
Normal: []alerting.Channel{
|
||||
{Name: "钉钉", Type: alerting.ChannelWebhook, Webhook: &alerting.CustomWebhook{
|
||||
HeaderParams: map[string]string{
|
||||
|
@ -156,7 +156,7 @@ func TestGeneratePercentilesAggQuery(t *testing.T) {
|
|||
// },
|
||||
// },
|
||||
//
|
||||
// Channels: alerting.RuleChannel{
|
||||
// Channels: alerting.NotificationConfig{
|
||||
// Normal: []alerting.Channel{
|
||||
// {Name: "钉钉", Type: alerting.ChannelWebhook, Webhook: &alerting.CustomWebhook{
|
||||
// HeaderParams: map[string]string{
|
||||
|
@ -216,7 +216,7 @@ func TestGeneratePercentilesAggQuery(t *testing.T) {
|
|||
},
|
||||
},
|
||||
|
||||
Channels: alerting.RuleChannel{
|
||||
Channels: alerting.NotificationConfig{
|
||||
Normal: []alerting.Channel{
|
||||
{Name: "钉钉", Type: alerting.ChannelWebhook, Webhook: &alerting.CustomWebhook{
|
||||
HeaderParams: map[string]string{
|
||||
|
|
Loading…
Reference in New Issue