diff --git a/api/desc/monitoring/pcm-monitoring.api b/api/desc/monitoring/pcm-monitoring.api index befa84bb..9c5cee7c 100644 --- a/api/desc/monitoring/pcm-monitoring.api +++ b/api/desc/monitoring/pcm-monitoring.api @@ -11,6 +11,12 @@ type CreateAlertRuleReq { AlertType string `json:"alertType"` } +type DeleteAlertRuleReq { + Id int64 `form:"id"` + ClusterName string `form:"clusterName"` + Name string `form:"name"` +} + type ( AlertRulesReq { AlertType string `form:"alertType"` @@ -73,36 +79,35 @@ type ( ) type ( - adapterInfoReq{ + adapterInfoReq { clusterId string `form:"clusterId"` } - adapterInfoResp{ + adapterInfoResp { name string `json:"name"` version string `json:"version"` } ) type ( -scheduleSituationResp{ - nodes []NodeRegion `json:"nodes"` - links []Link `json:"links"` - categories []Category `json:"categories"` -} + scheduleSituationResp { + nodes []NodeRegion `json:"nodes"` + links []Link `json:"links"` + categories []Category `json:"categories"` + } -NodeRegion{ - id string `json:"id"` - name string `json:"name"` - category int `json:"category"` - value int `json:"value"` -} + NodeRegion { + id string `json:"id"` + name string `json:"name"` + category int `json:"category"` + value int `json:"value"` + } -Link{ - source string `json:"source"` - target string `json:"target"` -} - -Category{ - name string `json:"name"` -} + Link { + source string `json:"source"` + target string `json:"target"` + } + Category { + name string `json:"name"` + } ) \ No newline at end of file diff --git a/api/desc/pcm.api b/api/desc/pcm.api index 03779d46..7278fae0 100644 --- a/api/desc/pcm.api +++ b/api/desc/pcm.api @@ -1031,6 +1031,9 @@ service pcm { @handler alertRulesHandler get /monitoring/alert/rule (AlertRulesReq) returns (AlertRulesResp) + @handler DeleteAlertRuleHandler + delete /cloud/alert/rule (DeleteAlertRuleReq) + @doc "cluster resource load" @handler clustersLoadHandler get /monitoring/cluster/load (clustersLoadReq) returns (clustersLoadResp) diff --git a/api/internal/handler/monitoring/deletealertrulehandler.go b/api/internal/handler/monitoring/deletealertrulehandler.go new file mode 100644 index 00000000..e7593a92 --- /dev/null +++ b/api/internal/handler/monitoring/deletealertrulehandler.go @@ -0,0 +1,25 @@ +package monitoring + +import ( + "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/repository/result" + "net/http" + + "github.com/zeromicro/go-zero/rest/httpx" + "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/logic/monitoring" + "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc" + "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types" +) + +func DeleteAlertRuleHandler(svcCtx *svc.ServiceContext) http.HandlerFunc { + return func(w http.ResponseWriter, r *http.Request) { + var req types.DeleteAlertRuleReq + if err := httpx.Parse(r, &req); err != nil { + httpx.ErrorCtx(r.Context(), w, err) + return + } + + l := monitoring.NewDeleteAlertRuleLogic(r.Context(), svcCtx) + err := l.DeleteAlertRule(&req) + result.HttpResult(r, w, nil, err) + } +} diff --git a/api/internal/handler/routes.go b/api/internal/handler/routes.go index d63d02e1..03a8dab5 100644 --- a/api/internal/handler/routes.go +++ b/api/internal/handler/routes.go @@ -1302,6 +1302,11 @@ func RegisterHandlers(server *rest.Server, serverCtx *svc.ServiceContext) { Path: "/monitoring/alert/rule", Handler: monitoring.AlertRulesHandler(serverCtx), }, + { + Method: http.MethodDelete, + Path: "/cloud/alert/rule", + Handler: monitoring.DeleteAlertRuleHandler(serverCtx), + }, { Method: http.MethodGet, Path: "/monitoring/cluster/load", diff --git a/api/internal/logic/monitoring/createalertrulelogic.go b/api/internal/logic/monitoring/createalertrulelogic.go index c9685c98..fb19d23f 100644 --- a/api/internal/logic/monitoring/createalertrulelogic.go +++ b/api/internal/logic/monitoring/createalertrulelogic.go @@ -46,7 +46,7 @@ func (l *CreateAlertRuleLogic) CreateAlertRule(req *types.CreateAlertRuleReq) er return tx.Error } - // query server http url. + // query cluster http url. var server string l.svcCtx.DbEngin.Raw("select ta.server from t_adapter ta,t_cluster tc where ta.id = tc.adapter_id and tc.name = ?", &req.ClusterName).Scan(&server) diff --git a/api/internal/logic/monitoring/deletealertrulelogic.go b/api/internal/logic/monitoring/deletealertrulelogic.go new file mode 100644 index 00000000..aa718829 --- /dev/null +++ b/api/internal/logic/monitoring/deletealertrulelogic.go @@ -0,0 +1,66 @@ +package monitoring + +import ( + "context" + + "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc" + "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types" + + "github.com/zeromicro/go-zero/core/logx" +) + +type DeleteAlertRuleLogic struct { + logx.Logger + ctx context.Context + svcCtx *svc.ServiceContext +} + +func NewDeleteAlertRuleLogic(ctx context.Context, svcCtx *svc.ServiceContext) *DeleteAlertRuleLogic { + return &DeleteAlertRuleLogic{ + Logger: logx.WithContext(ctx), + ctx: ctx, + svcCtx: svcCtx, + } +} + +func (l *DeleteAlertRuleLogic) DeleteAlertRule(req *types.DeleteAlertRuleReq) error { + // Delete data from the database + l.svcCtx.DbEngin.Delete(&types.AlertRule{}, "id = ?", req.Id) + + // query cluster http url. + var server string + l.svcCtx.DbEngin.Raw("select ta.server from t_adapter ta,t_cluster tc where ta.id = tc.adapter_id and tc.name = ?", &req.ClusterName).Scan(&server) + + // create prometheus rule + response, err := l.svcCtx.HttpClient.R(). + SetBody(&CrdStruct{ + ClusterName: req.ClusterName, + Name: req.Name, + Grv: Grv{ + Group: "monitoring.coreos.com", + Version: "v1", + Resource: "prometheusrules", + }, + }). + ForceContentType("application/json"). + Delete(server + "/api/v1/crd") + if err != nil { + return err + } + if err != nil || response.IsError() { + return err + } + return nil +} + +type Grv struct { + Group string `json:"group"` + Version string `json:"version"` + Resource string `json:"resource"` +} + +type CrdStruct struct { + ClusterName string `json:"clusterName"` + Grv Grv `json:"grv"` + Name string `json:"name"` +} diff --git a/api/internal/logic/monitoring/schedulesituationlogic.go b/api/internal/logic/monitoring/schedulesituationlogic.go index 3dc1b64a..d2b41697 100644 --- a/api/internal/logic/monitoring/schedulesituationlogic.go +++ b/api/internal/logic/monitoring/schedulesituationlogic.go @@ -33,28 +33,28 @@ func (l *ScheduleSituationLogic) ScheduleSituation() (resp *types.ScheduleSituat // hpc var hpcLinks []string - tx = l.svcCtx.DbEngin.Raw("SELECT GROUP_CONCAT(cluster_id SEPARATOR ',') as cluster_ids FROM task_hpc WHERE deleted_at IS NULL GROUP BY task_id HAVING COUNT(*) > 1;").Scan(&hpcLinks) + tx = l.svcCtx.DbEngin.Raw("SELECT distinct GROUP_CONCAT( distinct cluster_id SEPARATOR ',') as cluster_ids FROM task_hpc WHERE deleted_at IS NULL GROUP BY task_id HAVING COUNT(*) > 1;").Scan(&hpcLinks) if tx.Error != nil { return nil, tx.Error } LinksHandler(hpcLinks, resp) // cloud var cloudLinks []string - tx = l.svcCtx.DbEngin.Raw("SELECT GROUP_CONCAT(cluster_id SEPARATOR ',') as cluster_ids FROM task_cloud WHERE deleted_at IS NULL GROUP BY task_id HAVING COUNT(*) > 1;").Scan(&cloudLinks) + tx = l.svcCtx.DbEngin.Raw("SELECT distinct GROUP_CONCAT(distinct cluster_id SEPARATOR ',') as cluster_ids FROM task_cloud WHERE deleted_at IS NULL GROUP BY task_id HAVING COUNT(*) > 1;").Scan(&cloudLinks) if tx.Error != nil { return nil, tx.Error } LinksHandler(cloudLinks, resp) // ai var aiLinks []string - tx = l.svcCtx.DbEngin.Raw("SELECT GROUP_CONCAT(cluster_id SEPARATOR ',') as cluster_ids FROM task_ai WHERE deleted_at IS NULL GROUP BY task_id HAVING COUNT(*) > 1;").Scan(&aiLinks) + tx = l.svcCtx.DbEngin.Raw("SELECT distinct GROUP_CONCAT(distinct cluster_id SEPARATOR ',') as cluster_ids FROM task_ai WHERE deleted_at IS NULL GROUP BY task_id HAVING COUNT(*) > 1;").Scan(&aiLinks) if tx.Error != nil { return nil, tx.Error } LinksHandler(aiLinks, resp) // vm var vmLinks []string - tx = l.svcCtx.DbEngin.Raw("SELECT GROUP_CONCAT(cluster_id SEPARATOR ',') as cluster_ids FROM task_vm WHERE deleted_at IS NULL GROUP BY task_id HAVING COUNT(*) > 1;").Scan(&vmLinks) + tx = l.svcCtx.DbEngin.Raw("SELECT distinct GROUP_CONCAT(distinct cluster_id SEPARATOR ',') as cluster_ids FROM task_vm WHERE deleted_at IS NULL GROUP BY task_id HAVING COUNT(*) > 1;").Scan(&vmLinks) if tx.Error != nil { return nil, tx.Error } diff --git a/api/internal/types/types.go b/api/internal/types/types.go index fa258e8a..c66655db 100644 --- a/api/internal/types/types.go +++ b/api/internal/types/types.go @@ -1164,7 +1164,7 @@ type CommitHpcTaskReq struct { Description string `json:"description,optional"` TenantId int64 `json:"tenantId,optional"` TaskId int64 `json:"taskId,optional"` - AdapterIds []string `json:"adapterIds"` + AdapterIds []string `json:"adapterId"` MatchLabels map[string]string `json:"matchLabels,optional"` CardCount int64 `json:"cardCount,optional"` WorkDir string `json:"workDir,optional"` //paratera:workingDir @@ -5750,6 +5750,12 @@ type CreateAlertRuleReq struct { AlertType string `json:"alertType"` } +type DeleteAlertRuleReq struct { + Id int64 `form:"id"` + ClusterName string `form:"clusterName"` + Name string `form:"name"` +} + type AlertRulesReq struct { AlertType string `form:"alertType"` AdapterId string `form:"adapterId,optional"` diff --git a/pkg/tracker/tracker.go b/pkg/tracker/tracker.go index fdc094c0..7941f74b 100644 --- a/pkg/tracker/tracker.go +++ b/pkg/tracker/tracker.go @@ -78,6 +78,18 @@ var ( Name: "cluster_pod_total", Help: "Cluster Pod total.", }, []string{"cluster_name", "adapter_id"}) + ClusterCpuCoreHoursGauge = prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Name: "cluster_cpu_core_hours", + Help: "Cluster Cpu Core Hours.", + }, []string{"cluster_name", "adapter_id"}) + ClusterCardsAvailGauge = prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Name: "cluster_cards_avail", + Help: "Cluster Cards Available.", + }, []string{"cluster_name", "adapter_id"}) + ClusterGpuAvailGauge = prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Name: "cluster_gpu_avail", + Help: "Cluster Gpu Available.", + }, []string{"cluster_name", "adapter_id"}) metrics = []prometheus.Collector{ ClusterCpuUtilisationGauge,