Merge pull request 'delete alertrule' (#165) from zhangweiii/pcm-coordinator:master into master

Former-commit-id: d4bf35b8268451baae199a1ec005bd4c3dd88a83
This commit is contained in:
zhangweiii 2024-05-11 18:31:26 +08:00
commit 9a34b347d2
9 changed files with 149 additions and 27 deletions

View File

@ -11,6 +11,12 @@ type CreateAlertRuleReq {
AlertType string `json:"alertType"`
}
type DeleteAlertRuleReq {
Id int64 `form:"id"`
ClusterName string `form:"clusterName"`
Name string `form:"name"`
}
type (
AlertRulesReq {
AlertType string `form:"alertType"`
@ -104,5 +110,4 @@ Link{
Category {
name string `json:"name"`
}
)

View File

@ -1031,6 +1031,9 @@ service pcm {
@handler alertRulesHandler
get /monitoring/alert/rule (AlertRulesReq) returns (AlertRulesResp)
@handler DeleteAlertRuleHandler
delete /cloud/alert/rule (DeleteAlertRuleReq)
@doc "cluster resource load"
@handler clustersLoadHandler
get /monitoring/cluster/load (clustersLoadReq) returns (clustersLoadResp)

View File

@ -0,0 +1,25 @@
package monitoring
import (
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/repository/result"
"net/http"
"github.com/zeromicro/go-zero/rest/httpx"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/logic/monitoring"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types"
)
func DeleteAlertRuleHandler(svcCtx *svc.ServiceContext) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
var req types.DeleteAlertRuleReq
if err := httpx.Parse(r, &req); err != nil {
httpx.ErrorCtx(r.Context(), w, err)
return
}
l := monitoring.NewDeleteAlertRuleLogic(r.Context(), svcCtx)
err := l.DeleteAlertRule(&req)
result.HttpResult(r, w, nil, err)
}
}

View File

@ -1302,6 +1302,11 @@ func RegisterHandlers(server *rest.Server, serverCtx *svc.ServiceContext) {
Path: "/monitoring/alert/rule",
Handler: monitoring.AlertRulesHandler(serverCtx),
},
{
Method: http.MethodDelete,
Path: "/cloud/alert/rule",
Handler: monitoring.DeleteAlertRuleHandler(serverCtx),
},
{
Method: http.MethodGet,
Path: "/monitoring/cluster/load",

View File

@ -46,7 +46,7 @@ func (l *CreateAlertRuleLogic) CreateAlertRule(req *types.CreateAlertRuleReq) er
return tx.Error
}
// query server http url.
// query cluster http url.
var server string
l.svcCtx.DbEngin.Raw("select ta.server from t_adapter ta,t_cluster tc where ta.id = tc.adapter_id and tc.name = ?", &req.ClusterName).Scan(&server)

View File

@ -0,0 +1,66 @@
package monitoring
import (
"context"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types"
"github.com/zeromicro/go-zero/core/logx"
)
type DeleteAlertRuleLogic struct {
logx.Logger
ctx context.Context
svcCtx *svc.ServiceContext
}
func NewDeleteAlertRuleLogic(ctx context.Context, svcCtx *svc.ServiceContext) *DeleteAlertRuleLogic {
return &DeleteAlertRuleLogic{
Logger: logx.WithContext(ctx),
ctx: ctx,
svcCtx: svcCtx,
}
}
func (l *DeleteAlertRuleLogic) DeleteAlertRule(req *types.DeleteAlertRuleReq) error {
// Delete data from the database
l.svcCtx.DbEngin.Delete(&types.AlertRule{}, "id = ?", req.Id)
// query cluster http url.
var server string
l.svcCtx.DbEngin.Raw("select ta.server from t_adapter ta,t_cluster tc where ta.id = tc.adapter_id and tc.name = ?", &req.ClusterName).Scan(&server)
// create prometheus rule
response, err := l.svcCtx.HttpClient.R().
SetBody(&CrdStruct{
ClusterName: req.ClusterName,
Name: req.Name,
Grv: Grv{
Group: "monitoring.coreos.com",
Version: "v1",
Resource: "prometheusrules",
},
}).
ForceContentType("application/json").
Delete(server + "/api/v1/crd")
if err != nil {
return err
}
if err != nil || response.IsError() {
return err
}
return nil
}
type Grv struct {
Group string `json:"group"`
Version string `json:"version"`
Resource string `json:"resource"`
}
type CrdStruct struct {
ClusterName string `json:"clusterName"`
Grv Grv `json:"grv"`
Name string `json:"name"`
}

View File

@ -33,28 +33,28 @@ func (l *ScheduleSituationLogic) ScheduleSituation() (resp *types.ScheduleSituat
// hpc
var hpcLinks []string
tx = l.svcCtx.DbEngin.Raw("SELECT GROUP_CONCAT(cluster_id SEPARATOR ',') as cluster_ids FROM task_hpc WHERE deleted_at IS NULL GROUP BY task_id HAVING COUNT(*) > 1;").Scan(&hpcLinks)
tx = l.svcCtx.DbEngin.Raw("SELECT distinct GROUP_CONCAT( distinct cluster_id SEPARATOR ',') as cluster_ids FROM task_hpc WHERE deleted_at IS NULL GROUP BY task_id HAVING COUNT(*) > 1;").Scan(&hpcLinks)
if tx.Error != nil {
return nil, tx.Error
}
LinksHandler(hpcLinks, resp)
// cloud
var cloudLinks []string
tx = l.svcCtx.DbEngin.Raw("SELECT GROUP_CONCAT(cluster_id SEPARATOR ',') as cluster_ids FROM task_cloud WHERE deleted_at IS NULL GROUP BY task_id HAVING COUNT(*) > 1;").Scan(&cloudLinks)
tx = l.svcCtx.DbEngin.Raw("SELECT distinct GROUP_CONCAT(distinct cluster_id SEPARATOR ',') as cluster_ids FROM task_cloud WHERE deleted_at IS NULL GROUP BY task_id HAVING COUNT(*) > 1;").Scan(&cloudLinks)
if tx.Error != nil {
return nil, tx.Error
}
LinksHandler(cloudLinks, resp)
// ai
var aiLinks []string
tx = l.svcCtx.DbEngin.Raw("SELECT GROUP_CONCAT(cluster_id SEPARATOR ',') as cluster_ids FROM task_ai WHERE deleted_at IS NULL GROUP BY task_id HAVING COUNT(*) > 1;").Scan(&aiLinks)
tx = l.svcCtx.DbEngin.Raw("SELECT distinct GROUP_CONCAT(distinct cluster_id SEPARATOR ',') as cluster_ids FROM task_ai WHERE deleted_at IS NULL GROUP BY task_id HAVING COUNT(*) > 1;").Scan(&aiLinks)
if tx.Error != nil {
return nil, tx.Error
}
LinksHandler(aiLinks, resp)
// vm
var vmLinks []string
tx = l.svcCtx.DbEngin.Raw("SELECT GROUP_CONCAT(cluster_id SEPARATOR ',') as cluster_ids FROM task_vm WHERE deleted_at IS NULL GROUP BY task_id HAVING COUNT(*) > 1;").Scan(&vmLinks)
tx = l.svcCtx.DbEngin.Raw("SELECT distinct GROUP_CONCAT(distinct cluster_id SEPARATOR ',') as cluster_ids FROM task_vm WHERE deleted_at IS NULL GROUP BY task_id HAVING COUNT(*) > 1;").Scan(&vmLinks)
if tx.Error != nil {
return nil, tx.Error
}

View File

@ -1164,7 +1164,7 @@ type CommitHpcTaskReq struct {
Description string `json:"description,optional"`
TenantId int64 `json:"tenantId,optional"`
TaskId int64 `json:"taskId,optional"`
AdapterIds []string `json:"adapterIds"`
AdapterIds []string `json:"adapterId"`
MatchLabels map[string]string `json:"matchLabels,optional"`
CardCount int64 `json:"cardCount,optional"`
WorkDir string `json:"workDir,optional"` //paratera:workingDir
@ -5750,6 +5750,12 @@ type CreateAlertRuleReq struct {
AlertType string `json:"alertType"`
}
type DeleteAlertRuleReq struct {
Id int64 `form:"id"`
ClusterName string `form:"clusterName"`
Name string `form:"name"`
}
type AlertRulesReq struct {
AlertType string `form:"alertType"`
AdapterId string `form:"adapterId,optional"`

View File

@ -78,6 +78,18 @@ var (
Name: "cluster_pod_total",
Help: "Cluster Pod total.",
}, []string{"cluster_name", "adapter_id"})
ClusterCpuCoreHoursGauge = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Name: "cluster_cpu_core_hours",
Help: "Cluster Cpu Core Hours.",
}, []string{"cluster_name", "adapter_id"})
ClusterCardsAvailGauge = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Name: "cluster_cards_avail",
Help: "Cluster Cards Available.",
}, []string{"cluster_name", "adapter_id"})
ClusterGpuAvailGauge = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Name: "cluster_gpu_avail",
Help: "Cluster Gpu Available.",
}, []string{"cluster_name", "adapter_id"})
metrics = []prometheus.Collector{
ClusterCpuUtilisationGauge,