Merge pull request 'alert' (#119) from zhangweiii/pcm-coordinator:master into master

Former-commit-id: ea1f5f0a8493fd0eca71323698e9ef255e16876f
This commit is contained in:
zhangweiii 2024-04-19 16:58:11 +08:00
commit 9b315b2e6a
18 changed files with 296 additions and 52 deletions

View File

@ -48,22 +48,25 @@ type (
)
type (
syncClusterLoadReq {
ClusterLoadRecords []ClusterLoadRecord `json:"clusterLoadRecords"`
}
ClusterLoadRecord {
AdapterId int64 `json:"adapterId"`
ClusterName string `json:"clusterName"`
CpuAvail float64 `json:"cpuAvail"`
CpuTotal float64 `json:"cpuTotal"`
CpuUtilisation float64 `json:"cpuUtilisation"`
MemoryAvail float64 `json:"memoryAvail"`
MemoryUtilisation float64 `json:"memoryUtilisation"`
MemoryTotal float64 `json:"memoryTotal"`
DiskAvail float64 `json:"diskAvail"`
DiskTotal float64 `json:"diskTotal"`
DiskUtilisation float64 `json:"diskUtilisation"`
}
syncClusterLoadReq {
clusterLoadRecords []ClusterLoadRecord `json:"clusterLoadRecords"`
}
ClusterLoadRecord {
AdapterId int64 `json:"adapterId,optional"`
ClusterName string `json:"clusterName,optional"`
CpuAvail float64 `json:"cpuAvail,optional"`
CpuTotal float64 `json:"cpuTotal,optional"`
CpuUtilisation float64 `json:"cpuUtilisation,optional"`
MemoryAvail float64 `json:"memoryAvail,optional"`
MemoryUtilisation float64 `json:"memoryUtilisation,optional"`
MemoryTotal float64 `json:"memoryTotal,optional"`
DiskAvail float64 `json:"diskAvail,optional"`
DiskTotal float64 `json:"diskTotal,optional"`
DiskUtilisation float64 `json:"diskUtilisation,optional"`
PodsUtilisation float64 `json:"podsUtilisation,optional"`
PodsCount int64 `json:"podsCount,optional"`
PodsTotal int64 `json:"podsTotal,optional"`
}
)
type (

View File

@ -1,7 +1,7 @@
syntax = "v1"
type CreateAlertRuleReq {
CLusterId int64 `json:"clusterId"`
CLusterId string `json:"clusterId"`
ClusterName string `json:"clusterName"`
Name string `json:"name"`
PromQL string `json:"promQL"`
@ -12,6 +12,9 @@ type CreateAlertRuleReq {
}
type (
AlertRulesReq {
AlertType string `form:"alertType"`
}
AlertRulesResp {
alertRules []AlertRule `json:"alertRules"`
}
@ -20,6 +23,7 @@ type (
Id int64 `json:"id"`
ClusterName string `json:"clusterName"`
Name string `json:"name"`
AlertType string `json:"alertType"`
PromQL string `json:"promQL"`
Duration string `json:"duration"`
Annotations string `json:"annotations"`
@ -37,4 +41,19 @@ type (
data interface{} `json:"data"`
msg string `json:"msg"`
}
)
)
type (
alertListReq {
alertType string `form:"alertType"`
adapterId string `form:"adapterId,optional"`
clusterId string `form:"clusterId,optional"`
}
alertListResp {
alertMap map[string]interface{} `json:"alertMap"`
}
)
type SyncClusterAlertReq {
AlertRecordsMap map[string]interface{} `json:"alertRecordsMap"`
}

View File

@ -975,7 +975,7 @@ service pcm {
@doc "alert rules"
@handler alertRulesHandler
get /monitoring/alert/rule returns (AlertRulesResp)
get /monitoring/alert/rule (AlertRulesReq)returns (AlertRulesResp)
@doc "cluster resource load"
@handler clustersLoadHandler
@ -984,4 +984,12 @@ service pcm {
@doc "node resource load"
@handler nodesLoadTopHandler
get /monitoring/node/top (nodesLoadTopReq) returns (nodesLoadTopResp)
@doc "alert list"
@handler alertListHandler
get /monitoring/alert/list (alertListReq) returns (alertListResp)
@doc "Synchronize Cluster alert Information"
@handler syncClusterAlertHandler
post /core/syncClusterAlert (SyncClusterAlertReq)
}

View File

@ -0,0 +1,25 @@
package monitoring
import (
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/repository/result"
"net/http"
"github.com/zeromicro/go-zero/rest/httpx"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/logic/monitoring"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types"
)
func AlertListHandler(svcCtx *svc.ServiceContext) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
var req types.AlertListReq
if err := httpx.Parse(r, &req); err != nil {
httpx.ErrorCtx(r.Context(), w, err)
return
}
l := monitoring.NewAlertListLogic(r.Context(), svcCtx)
resp, err := l.AlertList(&req)
result.HttpResult(r, w, resp, err)
}
}

View File

@ -4,14 +4,22 @@ import (
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/repository/result"
"net/http"
"github.com/zeromicro/go-zero/rest/httpx"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/logic/monitoring"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types"
)
func AlertRulesHandler(svcCtx *svc.ServiceContext) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
var req types.AlertRulesReq
if err := httpx.Parse(r, &req); err != nil {
httpx.ErrorCtx(r.Context(), w, err)
return
}
l := monitoring.NewAlertRulesLogic(r.Context(), svcCtx)
resp, err := l.AlertRules()
resp, err := l.AlertRules(&req)
result.HttpResult(r, w, resp, err)
}
}

View File

@ -1,6 +1,7 @@
package monitoring
import (
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/repository/result"
"net/http"
"github.com/zeromicro/go-zero/rest/httpx"
@ -19,10 +20,6 @@ func ClustersLoadHandler(svcCtx *svc.ServiceContext) http.HandlerFunc {
l := monitoring.NewClustersLoadLogic(r.Context(), svcCtx)
resp, err := l.ClustersLoad(&req)
if err != nil {
httpx.ErrorCtx(r.Context(), w, err)
} else {
httpx.OkJsonCtx(r.Context(), w, resp)
}
result.HttpResult(r, w, resp, err)
}
}

View File

@ -0,0 +1,28 @@
package monitoring
import (
"net/http"
"github.com/zeromicro/go-zero/rest/httpx"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/logic/monitoring"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types"
)
func SyncClusterAlertHandler(svcCtx *svc.ServiceContext) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
var req types.SyncClusterAlertReq
if err := httpx.Parse(r, &req); err != nil {
httpx.ErrorCtx(r.Context(), w, err)
return
}
l := monitoring.NewSyncClusterAlertLogic(r.Context(), svcCtx)
err := l.SyncClusterAlert(&req)
if err != nil {
httpx.ErrorCtx(r.Context(), w, err)
} else {
httpx.Ok(w)
}
}
}

View File

@ -1237,6 +1237,16 @@ func RegisterHandlers(server *rest.Server, serverCtx *svc.ServiceContext) {
Path: "/monitoring/node/top",
Handler: monitoring.NodesLoadTopHandler(serverCtx),
},
{
Method: http.MethodGet,
Path: "/monitoring/alert/list",
Handler: monitoring.AlertListHandler(serverCtx),
},
{
Method: http.MethodPost,
Path: "/core/syncClusterAlert",
Handler: monitoring.SyncClusterAlertHandler(serverCtx),
},
},
rest.WithPrefix("/pcm/v1"),
)

View File

@ -38,6 +38,10 @@ func (l *SyncClusterLoadLogic) SyncClusterLoad(req *types.SyncClusterLoadReq) er
tracker.ClusterDiskUtilisationGauge.WithLabelValues(record.ClusterName, strconv.FormatInt(record.AdapterId, 10)).Set(record.DiskUtilisation)
tracker.ClusterDiskAvailGauge.WithLabelValues(record.ClusterName, strconv.FormatInt(record.AdapterId, 10)).Set(record.DiskAvail)
tracker.ClusterDiskTotalGauge.WithLabelValues(record.ClusterName, strconv.FormatInt(record.AdapterId, 10)).Set(record.DiskTotal)
tracker.ClusterPodUtilisationGauge.WithLabelValues(record.ClusterName, strconv.FormatInt(record.AdapterId, 10)).Set(record.PodsUtilisation)
tracker.ClusterPodCountGauge.WithLabelValues(record.ClusterName, strconv.FormatInt(record.AdapterId, 10)).Set(float64(record.PodsCount))
tracker.ClusterPodTotalGauge.WithLabelValues(record.ClusterName, strconv.FormatInt(record.AdapterId, 10)).Set(float64(record.PodsTotal))
}
}
return nil

View File

@ -0,0 +1,62 @@
package monitoring
import (
"context"
"fmt"
v1 "github.com/prometheus/client_golang/api/prometheus/v1"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types"
"k8s.io/apimachinery/pkg/util/json"
"github.com/zeromicro/go-zero/core/logx"
)
type AlertListLogic struct {
logx.Logger
ctx context.Context
svcCtx *svc.ServiceContext
}
func NewAlertListLogic(ctx context.Context, svcCtx *svc.ServiceContext) *AlertListLogic {
return &AlertListLogic{
Logger: logx.WithContext(ctx),
ctx: ctx,
svcCtx: svcCtx,
}
}
type AlertListResp struct {
Mode int `json:"code"`
Msg string `json:"msg"`
Data map[string][]*v1.Alert `json:"data"`
}
func (l *AlertListLogic) AlertList(req *types.AlertListReq) (resp *types.AlertListResp, err error) {
resp = &types.AlertListResp{
AlertMap: make(map[string]interface{}),
}
// query server http url.
var clusterArray []string
sql := "select distinct tc.name from t_adapter ta,t_cluster tc where ta.id = tc.adapter_id and label = 'kubernetes' and ta.type = ?"
if len(req.AdapterId) > 0 {
sql = fmt.Sprintf("select distinct tc.name from t_adapter ta,t_cluster tc where ta.id = tc.adapter_id and label = 'kubernetes' and ta.type = ? and ta.id = %s", req.AdapterId)
}
if len(req.ClusterId) > 0 {
sql = fmt.Sprintf("select distinct tc.name from t_adapter ta,t_cluster tc where ta.id = tc.adapter_id and label = 'kubernetes' and ta.type = ? and tc.id = %s", req.ClusterId)
}
l.svcCtx.DbEngin.Raw(sql, req.AlertType).Scan(&clusterArray)
for _, clusterName := range clusterArray {
getResult := l.svcCtx.RedisClient.Get(l.ctx, clusterName)
if len(getResult.Val()) != 0 {
var alerts []v1.Alert
json.Unmarshal([]byte(getResult.Val()), &alerts)
resp.AlertMap[clusterName] = alerts
}
}
return resp, nil
}

View File

@ -23,10 +23,10 @@ func NewAlertRulesLogic(ctx context.Context, svcCtx *svc.ServiceContext) *AlertR
}
}
func (l *AlertRulesLogic) AlertRules() (resp *types.AlertRulesResp, err error) {
func (l *AlertRulesLogic) AlertRules(req *types.AlertRulesReq) (resp *types.AlertRulesResp, err error) {
resp = &types.AlertRulesResp{}
var alertRules []types.AlertRule
l.svcCtx.DbEngin.Raw("SELECT ar.id,ar.*,GROUP_CONCAT(tc.`name` ORDER BY tc.`name` ASC SEPARATOR ',') as cluster_name FROM alert_rule ar JOIN t_cluster tc ON ar.cluster_id = tc.id WHERE ar.deleted_at IS NULL AND tc.deleted_at IS NULL GROUP BY ar.id").Scan(&alertRules)
l.svcCtx.DbEngin.Raw("SELECT ar.id,ar.*,GROUP_CONCAT(tc.`name` ORDER BY tc.`name` ASC SEPARATOR ',') as cluster_name FROM alert_rule ar JOIN t_cluster tc ON ar.cluster_id = tc.id WHERE ar.alert_type = ? AND ar.deleted_at IS NULL AND tc.deleted_at IS NULL GROUP BY ar.id", req.AlertType).Scan(&alertRules)
resp.AlertRules = alertRules
return resp, nil
}

View File

@ -27,7 +27,7 @@ func NewClustersLoadLogic(ctx context.Context, svcCtx *svc.ServiceContext) *Clus
func (l *ClustersLoadLogic) ClustersLoad(req *types.ClustersLoadReq) (resp *types.ClustersLoadResp, err error) {
resp = &types.ClustersLoadResp{}
metrics := []string{"cluster_cpu_utilisation", "cluster_cpu_avail", "cluster_cpu_total", "cluster_memory_total", "cluster_memory_avail", "cluster_memory_utilisation", "cluster_disk_utilisation", "cluster_disk_avail", "cluster_disk_total"}
metrics := []string{"cluster_cpu_utilisation", "cluster_cpu_avail", "cluster_cpu_total", "cluster_memory_total", "cluster_memory_avail", "cluster_memory_utilisation", "cluster_disk_utilisation", "cluster_disk_avail", "cluster_disk_total", "cluster_pod_utilisation"}
result := l.svcCtx.PromClient.GetNamedMetrics(metrics, time.Now(), tracker.ClusterOption{ClusterName: req.ClusterName})
resp.Data = result
return resp, nil

View File

@ -59,6 +59,7 @@ func (l *CreateAlertRuleLogic) CreateAlertRule(req *types.CreateAlertRuleReq) er
ForceContentType("application/json").
Get(server + "/api/v1/monitoring/rule/selector")
if err != nil || response.IsError() {
logx.Error(response)
return err
}
// Data Filling

View File

@ -0,0 +1,44 @@
package monitoring
import (
"context"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types"
"k8s.io/apimachinery/pkg/util/json"
"time"
"github.com/zeromicro/go-zero/core/logx"
)
type SyncClusterAlertLogic struct {
logx.Logger
ctx context.Context
svcCtx *svc.ServiceContext
}
func NewSyncClusterAlertLogic(ctx context.Context, svcCtx *svc.ServiceContext) *SyncClusterAlertLogic {
return &SyncClusterAlertLogic{
Logger: logx.WithContext(ctx),
ctx: ctx,
svcCtx: svcCtx,
}
}
func (l *SyncClusterAlertLogic) SyncClusterAlert(req *types.SyncClusterAlertReq) error {
if len(req.AlertRecordsMap) != 0 {
for k, v := range req.AlertRecordsMap {
bytes, err := json.Marshal(v)
if err != nil {
return err
}
setCmd := l.svcCtx.RedisClient.Set(l.ctx, k, bytes, 1*time.Minute)
if setCmd.Err() != nil {
logx.Error(setCmd.Err())
}
}
}
return nil
}

View File

@ -72,6 +72,7 @@ func NewServiceContext(c config.Config) *ServiceContext {
panic("InitSnowflake err")
}
httpClient := resty.New()
httpClient.SetTimeout(1 * time.Second)
alertClient := tracker.NewAlertClient(c.Monitoring.AlertUrl)
if err != nil {
logx.Errorf("InitPrometheus err: %v", err)

View File

@ -44,17 +44,20 @@ type SyncClusterLoadReq struct {
}
type ClusterLoadRecord struct {
AdapterId int64 `json:"adapterId"`
ClusterName string `json:"clusterName"`
CpuAvail float64 `json:"cpuAvail"`
CpuTotal float64 `json:"cpuTotal"`
CpuUtilisation float64 `json:"cpuUtilisation"`
MemoryAvail float64 `json:"memoryAvail"`
MemoryUtilisation float64 `json:"memoryUtilisation"`
MemoryTotal float64 `json:"memoryTotal"`
DiskAvail float64 `json:"diskAvail"`
DiskTotal float64 `json:"diskTotal"`
DiskUtilisation float64 `json:"diskUtilisation"`
AdapterId int64 `json:"adapterId,optional"`
ClusterName string `json:"clusterName,optional"`
CpuAvail float64 `json:"cpuAvail,optional"`
CpuTotal float64 `json:"cpuTotal,optional"`
CpuUtilisation float64 `json:"cpuUtilisation,optional"`
MemoryAvail float64 `json:"memoryAvail,optional"`
MemoryUtilisation float64 `json:"memoryUtilisation,optional"`
MemoryTotal float64 `json:"memoryTotal,optional"`
DiskAvail float64 `json:"diskAvail,optional"`
DiskTotal float64 `json:"diskTotal,optional"`
DiskUtilisation float64 `json:"diskUtilisation,optional"`
PodsUtilisation float64 `json:"podsUtilisation,optional"`
PodsCount int64 `json:"podsCount,optional"`
PodsTotal int64 `json:"podsTotal,optional"`
}
type GetClusterListReq struct {
@ -1133,9 +1136,9 @@ type HpcResourceReq struct {
}
type HpcResourceResp struct {
Code int32 `json:"code"`
Msg string `json:"msg"`
Data HPCResource `json:"data"`
Code int32 `json:"code"`
Msg string `json:"msg"`
HPCResource HPCResource `json:"hpcResource"`
}
type HPCResource struct {
@ -5526,7 +5529,7 @@ type AiAlgorithmsResp struct {
}
type CreateAlertRuleReq struct {
CLusterId int64 `json:"clusterId"`
CLusterId string `json:"clusterId"`
ClusterName string `json:"clusterName"`
Name string `json:"name"`
PromQL string `json:"promQL"`
@ -5536,6 +5539,10 @@ type CreateAlertRuleReq struct {
AlertType string `json:"alertType"`
}
type AlertRulesReq struct {
AlertType string `form:"alertType"`
}
type AlertRulesResp struct {
AlertRules []AlertRule `json:"alertRules"`
}
@ -5544,6 +5551,7 @@ type AlertRule struct {
Id int64 `json:"id"`
ClusterName string `json:"clusterName"`
Name string `json:"name"`
AlertType string `json:"alertType"`
PromQL string `json:"promQL"`
Duration string `json:"duration"`
Annotations string `json:"annotations"`
@ -5560,3 +5568,17 @@ type NodesLoadTopResp struct {
Data interface{} `json:"data"`
Msg string `json:"msg"`
}
type AlertListReq struct {
AlertType string `form:"alertType"`
AdapterId string `form:"adapterId,optional"`
ClusterId string `form:"clusterId,optional"`
}
type AlertListResp struct {
AlertMap map[string]interface{} `json:"alertMap"`
}
type SyncClusterAlertReq struct {
AlertRecordsMap map[string]interface{} `json:"alertRecordsMap"`
}

View File

@ -27,9 +27,10 @@ var promQLTemplates = map[string]string{
"cluster_cpu_total": "cluster_cpu_total{$1}",
"cluster_memory_total": "cluster_memory_total{$1}",
"cluster_disk_total": "cluster_disk_total{$1}",
"cluster_cpu_avail": "cluster_cpu_total{$1}",
"cluster_memory_avail": "cluster_memory_total{$1}",
"cluster_disk_avail": "cluster_disk_total{$1}",
"cluster_cpu_avail": "cluster_cpu_avail{$1}",
"cluster_memory_avail": "cluster_memory_avail{$1}",
"cluster_disk_avail": "cluster_disk_avail{$1}",
"cluster_pod_utilisation": "cluster_pod_utilisation{$1}",
// center
"center_cpu_utilisation": "(sum by (adapter_id)(cluster_cpu_total{$1})-sum by (adapter_id)(cluster_cpu_avail{$1}))/sum by (adapter_id)(cluster_cpu_total{$1})",
@ -95,8 +96,8 @@ func makeExpr(metric string, opts QueryOptions) string {
func makeClusterMetricExpr(tmpl string, o QueryOptions) string {
var clusterSelector string
if o.AdapterId != 0 && o.ClusterName != "" {
clusterSelector = fmt.Sprintf(`adapter_id="%d",cluster_name="%s"`, o.AdapterId, o.ClusterName)
if o.ClusterName != "" {
clusterSelector = fmt.Sprintf(`cluster_name="%s"`, o.ClusterName)
}
return strings.Replace(tmpl, "$1", clusterSelector, -1)

View File

@ -66,6 +66,18 @@ var (
Name: "cluster_disk_total",
Help: "Cluster Disk Total.",
}, []string{"cluster_name", "adapter_id"})
ClusterPodUtilisationGauge = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Name: "cluster_pod_utilisation",
Help: "Cluster Pod Utilisation.",
}, []string{"cluster_name", "adapter_id"})
ClusterPodCountGauge = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Name: "cluster_pod_count",
Help: "Cluster Pod Count.",
}, []string{"cluster_name", "adapter_id"})
ClusterPodTotalGauge = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Name: "cluster_pod_total",
Help: "Cluster Pod total.",
}, []string{"cluster_name", "adapter_id"})
metrics = []prometheus.Collector{
ClusterCpuUtilisationGauge,
@ -77,6 +89,9 @@ var (
ClusterDiskUtilisationGauge,
ClusterDiskAvailGauge,
ClusterDiskTotalGauge,
ClusterPodUtilisationGauge,
ClusterPodCountGauge,
ClusterPodTotalGauge,
}
)
@ -275,7 +290,3 @@ func (p Prometheus) GetRawData(expr string, o QueryOption) (model.Value, error)
}
return value, nil
}
func AddAlertRule() {
}