diff --git a/api/client/task_impl.go b/api/client/task_impl.go index cb716d65..8b141cea 100644 --- a/api/client/task_impl.go +++ b/api/client/task_impl.go @@ -1,6 +1,7 @@ package client import ( + "github.com/jinzhu/copier" "github.com/zeromicro/go-zero/core/logx" "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/constants" "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/models" @@ -39,6 +40,18 @@ func (t task) PullTaskInfo(pullTaskInfoReq PullTaskInfoReq) (*PullTaskInfoResp, var hpcModelList []models.TaskHpc findModelList(pullTaskInfoReq.AdapterId, t.client.DbEngin, &hpcModelList) utils.Convert(hpcModelList, &result.HpcInfoList) + if len(result.HpcInfoList) > 0 { + for i, hpcInfo := range hpcModelList { + err := copier.CopyWithOption(result.HpcInfoList[i], hpcInfo, copier.Option{Converters: utils.Converters}) + if err != nil { + return nil, err + } + var clusterType string + t.client.DbEngin.Raw("SELECT label FROM `t_cluster` where id = ? ", hpcInfo.ClusterId).Scan(&clusterType) + + result.HpcInfoList[i].ClusterType = clusterType + } + } case 0: var cloudModelList []models.Cloud findModelList(pullTaskInfoReq.AdapterId, t.client.DbEngin, &cloudModelList) diff --git a/api/client/types.go b/api/client/types.go index 69e828ab..155ad628 100644 --- a/api/client/types.go +++ b/api/client/types.go @@ -1,35 +1,50 @@ package client +import ( + "database/sql" + "time" +) + type HpcInfo struct { - ParticipantId int64 `json:"participantId,omitempty"` - TaskId int64 `json:"taskId,omitempty"` - JobId string `json:"jobId,omitempty"` - Name string `json:"name,omitempty"` - Status string `json:"status,omitempty"` - StartTime string `json:"startTime,omitempty"` - RunningTime int64 `json:"runningTime,omitempty"` - Result string `json:"result,omitempty"` - WorkDir string `json:"workDir,omitempty"` - WallTime string `json:"wallTime,omitempty"` - CmdScript string `json:"cmdScript,omitempty"` - DerivedEs string `json:"derivedEs,omitempty"` - Cluster string `json:"cluster,omitempty"` - BlockId string `json:"blockId,omitempty"` - AllocNodes uint32 `json:"allocNodes,omitempty"` - AllocCpu uint32 `json:"allocCpu,omitempty"` - Version string `json:"version,omitempty"` - Account string `json:"account,omitempty"` - ExitCode uint32 `json:"exitCode,omitempty"` - AssocId uint32 `json:"assocId,omitempty"` - AppType string `json:"appType,omitempty"` - AppName string `json:"appName,omitempty"` - Queue string `json:"queue,omitempty"` - SubmitType string `json:"submitType,omitempty"` - NNode string `json:"nNode,omitempty"` - StdOutFile string `json:"stdOutFile,omitempty"` - StdErrFile string `json:"stdErrFile,omitempty"` - StdInput string `json:"stdInput,omitempty"` - Environment string `json:"environment,omitempty"` + Id int64 `json:"id"` // id + TaskId int64 `json:"task_id"` // 任务id + JobId string `json:"job_id"` // 作业id(在第三方系统中的作业id) + ClusterId int64 `json:"cluster_id"` // 执行任务的集群id + ClusterType string `json:"cluster_type"` // 执行任务的集群类型 + Name string `json:"name"` // 名称 + Status string `json:"status"` // 状态 + CmdScript string `json:"cmd_script"` + StartTime string `json:"start_time"` // 开始时间 + RunningTime int64 `json:"running_time"` // 运行时间 + DerivedEs string `json:"derived_es"` + Cluster string `json:"cluster"` + BlockId int64 `json:"block_id"` + AllocNodes int64 `json:"alloc_nodes"` + AllocCpu int64 `json:"alloc_cpu"` + CardCount int64 `json:"card_count"` // 卡数 + Version string `json:"version"` + Account string `json:"account"` + WorkDir string `json:"work_dir"` // 工作路径 + AssocId int64 `json:"assoc_id"` + ExitCode int64 `json:"exit_code"` + WallTime string `json:"wall_time"` // 最大运行时间 + Result string `json:"result"` // 运行结果 + DeletedAt sql.NullTime `json:"deleted_at"` // 删除时间 + YamlString string `json:"yaml_string"` + AppType string `json:"app_type"` // 应用类型 + AppName string `json:"app_name"` // 应用名称 + Queue string `json:"queue"` // 队列名称 + SubmitType string `json:"submit_type"` // cmd(命令行模式) + NNode string `json:"n_node"` // 节点个数(当指定该参数时,GAP_NODE_STRING必须为"") + StdOutFile string `json:"std_out_file"` // 工作路径/std.err.%j + StdErrFile string `json:"std_err_file"` // 工作路径/std.err.%j + StdInput string `json:"std_input"` + Environment string `json:"environment"` + DeletedFlag int64 `json:"deleted_flag"` // 是否删除(0-否,1-是) + CreatedBy int64 `json:"created_by"` // 创建人 + CreatedTime time.Time `json:"created_time"` // 创建时间 + UpdatedBy int64 `json:"updated_by"` // 更新人 + UpdatedTime time.Time `json:"updated_time"` // 更新时间 } type CloudInfo struct { diff --git a/api/desc/core/pcm-core.api b/api/desc/core/pcm-core.api index 6ec35622..e5334e24 100644 --- a/api/desc/core/pcm-core.api +++ b/api/desc/core/pcm-core.api @@ -22,7 +22,7 @@ type ( centerResourcesResp { CentersIndex []CenterIndex `json:"centersIndex"` } - CenterIndex{ + CenterIndex { name string `json:"name"` cpu float32 `json:"cpu"` memory float32 `json:"memory"` @@ -30,6 +30,20 @@ type ( centerType string `json:"centerType"` } ) +type ( + syncClusterLoadReq { + clusterLoadRecords []ClusterLoadRecord `json:"clusterLoadRecords"` + } + ClusterLoadRecord { + ClusterName string `json:"clusterName"` + CpuUsage float64 `json:"cpuUsage"` + MemoryUsage float64 `json:"memoryUsage"` + DiskUsage float64 `json:"diskUsage"` + } +) + + + type ( getClusterListReq { Id int64 `form:"id"` @@ -650,7 +664,7 @@ type clusterSumReq { } -type clusterSumReqResp{ +type clusterSumReqResp { ClusterSum int `json:"ClusterSum,omitempty"` AdapterSum int `json:"AdapterSum,omitempty"` TaskSum int `json:"TaskSum,omitempty"` diff --git a/api/desc/pcm.api b/api/desc/pcm.api index 5759332e..72d06db0 100644 --- a/api/desc/pcm.api +++ b/api/desc/pcm.api @@ -104,6 +104,14 @@ service pcm { @doc "Center Resources top3" @handler centerResourcesHandler get /core/centerResources returns (centerResourcesResp) + + @doc "Synchronize Cluster Load Information" + @handler syncClusterLoadHandler + post /core/syncClusterLoad (syncClusterLoadReq) + + @doc "metrics" + @handler metricsHandler + get /core/metrics } //hpc二级接口 diff --git a/api/internal/handler/core/metricshandler.go b/api/internal/handler/core/metricshandler.go new file mode 100644 index 00000000..fb157525 --- /dev/null +++ b/api/internal/handler/core/metricshandler.go @@ -0,0 +1,12 @@ +package core + +import ( + "github.com/prometheus/client_golang/prometheus/promhttp" + "net/http" + + "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc" +) + +func MetricsHandler(svcCtx *svc.ServiceContext) http.HandlerFunc { + return promhttp.Handler().ServeHTTP +} diff --git a/api/internal/handler/core/syncclusterloadhandler.go b/api/internal/handler/core/syncclusterloadhandler.go new file mode 100644 index 00000000..cd8de3a3 --- /dev/null +++ b/api/internal/handler/core/syncclusterloadhandler.go @@ -0,0 +1,25 @@ +package core + +import ( + "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/repository/result" + "net/http" + + "github.com/zeromicro/go-zero/rest/httpx" + "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/logic/core" + "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc" + "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types" +) + +func SyncClusterLoadHandler(svcCtx *svc.ServiceContext) http.HandlerFunc { + return func(w http.ResponseWriter, r *http.Request) { + var req types.SyncClusterLoadReq + if err := httpx.Parse(r, &req); err != nil { + httpx.ErrorCtx(r.Context(), w, err) + return + } + + l := core.NewSyncClusterLoadLogic(r.Context(), svcCtx) + err := l.SyncClusterLoad(&req) + result.HttpResult(r, w, nil, err) + } +} diff --git a/api/internal/handler/routes.go b/api/internal/handler/routes.go index 25b362f8..799d3e73 100644 --- a/api/internal/handler/routes.go +++ b/api/internal/handler/routes.go @@ -124,6 +124,16 @@ func RegisterHandlers(server *rest.Server, serverCtx *svc.ServiceContext) { Path: "/core/centerResources", Handler: core.CenterResourcesHandler(serverCtx), }, + { + Method: http.MethodPost, + Path: "/core/syncClusterLoad", + Handler: core.SyncClusterLoadHandler(serverCtx), + }, + { + Method: http.MethodGet, + Path: "/core/metrics", + Handler: core.MetricsHandler(serverCtx), + }, }, rest.WithPrefix("/pcm/v1"), ) diff --git a/api/internal/logic/core/metricslogic.go b/api/internal/logic/core/metricslogic.go new file mode 100644 index 00000000..9f947fe3 --- /dev/null +++ b/api/internal/logic/core/metricslogic.go @@ -0,0 +1,28 @@ +package core + +import ( + "context" + + "github.com/zeromicro/go-zero/core/logx" + "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc" +) + +type MetricsLogic struct { + logx.Logger + ctx context.Context + svcCtx *svc.ServiceContext +} + +func NewMetricsLogic(ctx context.Context, svcCtx *svc.ServiceContext) *MetricsLogic { + return &MetricsLogic{ + Logger: logx.WithContext(ctx), + ctx: ctx, + svcCtx: svcCtx, + } +} + +func (l *MetricsLogic) Metrics() error { + // todo: add your logic here and delete this line + + return nil +} diff --git a/api/internal/logic/core/syncclusterloadlogic.go b/api/internal/logic/core/syncclusterloadlogic.go new file mode 100644 index 00000000..2c0f87e9 --- /dev/null +++ b/api/internal/logic/core/syncclusterloadlogic.go @@ -0,0 +1,35 @@ +package core + +import ( + "context" + "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc" + "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types" + "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/tracker" + + "github.com/zeromicro/go-zero/core/logx" +) + +type SyncClusterLoadLogic struct { + logx.Logger + ctx context.Context + svcCtx *svc.ServiceContext +} + +func NewSyncClusterLoadLogic(ctx context.Context, svcCtx *svc.ServiceContext) *SyncClusterLoadLogic { + return &SyncClusterLoadLogic{ + Logger: logx.WithContext(ctx), + ctx: ctx, + svcCtx: svcCtx, + } +} + +func (l *SyncClusterLoadLogic) SyncClusterLoad(req *types.SyncClusterLoadReq) error { + if len(req.ClusterLoadRecords) != 0 { + for _, record := range req.ClusterLoadRecords { + tracker.ClusterCpuGauge.WithLabelValues(record.ClusterName).Set(record.CpuUsage) + tracker.ClusterMemoryGauge.WithLabelValues(record.ClusterName).Set(record.MemoryUsage) + tracker.ClusterDiskGauge.WithLabelValues(record.ClusterName).Set(record.DiskUsage) + } + } + return nil +} diff --git a/api/internal/types/types.go b/api/internal/types/types.go index cc26cc34..868f3109 100644 --- a/api/internal/types/types.go +++ b/api/internal/types/types.go @@ -24,6 +24,17 @@ type CenterIndex struct { CenterType string `json:"centerType"` } +type SyncClusterLoadReq struct { + ClusterLoadRecords []ClusterLoadRecord `json:"clusterLoadRecords"` +} + +type ClusterLoadRecord struct { + ClusterName string `json:"clusterName"` + CpuUsage float64 `json:"cpuUsage"` + MemoryUsage float64 `json:"memoryUsage"` + DiskUsage float64 `json:"diskUsage"` +} + type GetClusterListReq struct { Id int64 `form:"id"` } diff --git a/go.mod b/go.mod index cae9b25c..13dde252 100644 --- a/go.mod +++ b/go.mod @@ -27,7 +27,6 @@ require ( gitlink.org.cn/JointCloud/pcm-openstack v0.0.0-20240307072630-6ff50727536a gitlink.org.cn/JointCloud/pcm-slurm v0.0.0-20240301080743-8b94bbaf57f5 gitlink.org.cn/jcce-pcm/pcm-ac v0.0.0-20240301085553-f6ad88fa357a - gitlink.org.cn/jcce-pcm/pcm-coordinator v0.1.19 gitlink.org.cn/jcce-pcm/pcm-participant-ceph v0.0.0-20230904090036-24fc730ec87d gitlink.org.cn/jcce-pcm/pcm-participant-modelarts v0.0.0-20231101085149-724c7c4cc090 gitlink.org.cn/jcce-pcm/pcm-participant-octopus v0.0.0-20240222124813-e275cfa342f4 @@ -44,6 +43,7 @@ require ( ) require ( + github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161 // indirect github.com/Microsoft/go-winio v0.6.1 // indirect github.com/alibabacloud-go/debug v0.0.0-20190504072949-9472017b5c68 // indirect github.com/alibabacloud-go/tea v1.1.17 // indirect @@ -97,9 +97,6 @@ require ( github.com/prometheus/procfs v0.12.0 // indirect github.com/redis/go-redis/v9 v9.5.1 // indirect github.com/spaolacci/murmur3 v1.1.0 // indirect - gitlink.org.cn/jcce-pcm/pcm-participant-kubernetes v0.0.0-20231214084401-de9ac5db7246 // indirect - gitlink.org.cn/jcce-pcm/pcm-participant-openstack v0.0.0-20231102023739-81a3d353c10d // indirect - gitlink.org.cn/jcce-pcm/pcm-slurm v0.0.0-20231107115628-f74106c47dfa // indirect go.etcd.io/etcd/api/v3 v3.5.12 // indirect go.etcd.io/etcd/client/pkg/v3 v3.5.12 // indirect go.etcd.io/etcd/client/v3 v3.5.12 // indirect @@ -117,6 +114,7 @@ require ( go.uber.org/multierr v1.11.0 // indirect go.uber.org/zap v1.27.0 // indirect golang.org/x/crypto v0.21.0 // indirect + golang.org/x/exp v0.0.0-20231110203233-9a3e6036ecaa // indirect golang.org/x/mod v0.15.0 // indirect golang.org/x/net v0.22.0 // indirect golang.org/x/oauth2 v0.18.0 // indirect diff --git a/go.sum b/go.sum index 6905bf91..ebd640ab 100644 --- a/go.sum +++ b/go.sum @@ -1006,20 +1006,12 @@ gitlink.org.cn/JointCloud/pcm-slurm v0.0.0-20240301080743-8b94bbaf57f5 h1:+/5vnz gitlink.org.cn/JointCloud/pcm-slurm v0.0.0-20240301080743-8b94bbaf57f5/go.mod h1:97AlUXN13g9UN3+9/DzCHpeoU5sbdyv0IQuTEHNexzQ= gitlink.org.cn/jcce-pcm/pcm-ac v0.0.0-20240301085553-f6ad88fa357a h1:fY1KmyZ6O7wVBvgt2HB+C9e1DncJdk/Wkv8m5Qz7abw= gitlink.org.cn/jcce-pcm/pcm-ac v0.0.0-20240301085553-f6ad88fa357a/go.mod h1:oMaWf5sEDFKTfCbIlT6/7IFI3f6PsuiRnWzzQruSF5Q= -gitlink.org.cn/jcce-pcm/pcm-coordinator v0.1.19 h1:qeBcLo7NTGPsowxxgc7dD+fdWHEOZBrt1vY26+3wv+k= -gitlink.org.cn/jcce-pcm/pcm-coordinator v0.1.19/go.mod h1:0dHxKCTjH3ud1qRQZjE6EqXSs3NTOpiHWTpaip4mrWE= gitlink.org.cn/jcce-pcm/pcm-participant-ceph v0.0.0-20230904090036-24fc730ec87d h1:DHjl/rLuH2gKYtY0MKMGNQDHFT12APg25RlMUQo+tHk= gitlink.org.cn/jcce-pcm/pcm-participant-ceph v0.0.0-20230904090036-24fc730ec87d/go.mod h1:r/KLzUpupCV5jdxSfgDhc2pVjP0fBi3VhAWRttsBn30= -gitlink.org.cn/jcce-pcm/pcm-participant-kubernetes v0.0.0-20231214084401-de9ac5db7246 h1:VVyI1H3hRv5tDWHt41jIlrucmxF10z3bMqv/hIwCcw0= -gitlink.org.cn/jcce-pcm/pcm-participant-kubernetes v0.0.0-20231214084401-de9ac5db7246/go.mod h1:LM+XeDayimN6b1AY7AhNbbhq9HJyS0u7tszMCNsNmAo= gitlink.org.cn/jcce-pcm/pcm-participant-modelarts v0.0.0-20231101085149-724c7c4cc090 h1:jztlHo72bcWM1jUwvG3Hfk2K+AJL0RvlsdIqlktH/MI= gitlink.org.cn/jcce-pcm/pcm-participant-modelarts v0.0.0-20231101085149-724c7c4cc090/go.mod h1:pisJKAI8FRFFUcBaH3Gob+ENXWRM97rpuYmv9s1raag= gitlink.org.cn/jcce-pcm/pcm-participant-octopus v0.0.0-20240222124813-e275cfa342f4 h1:NrxKAZ5uAzshB9EHcPw+XTOTzpxb5HslNRMYBrFC1Qo= gitlink.org.cn/jcce-pcm/pcm-participant-octopus v0.0.0-20240222124813-e275cfa342f4/go.mod h1:uyvpVqG1jHDXX+ubXI0RBwnWXzVykD/mliqGQIDvRoo= -gitlink.org.cn/jcce-pcm/pcm-participant-openstack v0.0.0-20231102023739-81a3d353c10d h1:hdSxVD+AN7W6j847/GsnNgOAX5IdRQRV1KLz+d4FlS0= -gitlink.org.cn/jcce-pcm/pcm-participant-openstack v0.0.0-20231102023739-81a3d353c10d/go.mod h1:m75SVNfNa1TUBlQtBfR0CeETQ0ez2RIUqlSCn1Mb/js= -gitlink.org.cn/jcce-pcm/pcm-slurm v0.0.0-20231107115628-f74106c47dfa h1:U0YV9ju5OPpUe8iUk4OEUtYJlINgpI0vgLC1IfZ2JUY= -gitlink.org.cn/jcce-pcm/pcm-slurm v0.0.0-20231107115628-f74106c47dfa/go.mod h1:tqj8GWoM2P21agWvJyUwN1U37CqfALwZTkRs9Ekgrbw= go.etcd.io/etcd/api/v3 v3.5.7/go.mod h1:9qew1gCdDDLu+VwmeG+iFpL+QlpHTo7iubavdVDgCAA= go.etcd.io/etcd/api/v3 v3.5.12 h1:W4sw5ZoU2Juc9gBWuLk5U6fHfNVyY1WC5g9uiXZio/c= go.etcd.io/etcd/api/v3 v3.5.12/go.mod h1:Ot+o0SWSyT6uHhA56al1oCED0JImsRiU9Dc26+C2a+4= diff --git a/pkg/tracker/queryoptions.go b/pkg/tracker/queryoptions.go index 06968907..bc0bc9ac 100644 --- a/pkg/tracker/queryoptions.go +++ b/pkg/tracker/queryoptions.go @@ -254,6 +254,7 @@ type ControllerOption struct { Namespace string Kind string WorkloadName string + PodsName string Level string } diff --git a/pkg/tracker/tracker.go b/pkg/tracker/tracker.go index c24b07f9..298d7635 100644 --- a/pkg/tracker/tracker.go +++ b/pkg/tracker/tracker.go @@ -18,6 +18,7 @@ import ( "context" "github.com/prometheus/client_golang/api" v1 "github.com/prometheus/client_golang/api/prometheus/v1" + "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" "strconv" "strings" @@ -25,6 +26,30 @@ import ( "time" ) +var ( + ClusterCpuGauge = prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Name: "cluster_cpu_usage", + Help: "Cluster CPU Utilization Rate.", + }, []string{"cluster_name"}) + ClusterMemoryGauge = prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Name: "cluster_memory_usage", + Help: "Cluster Memory Utilization Rate.", + }, []string{"cluster_name"}) + ClusterDiskGauge = prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Name: "cluster_disk_usage", + Help: "Cluster Disk Utilization Rate.", + }, []string{"cluster_name"}) + metrics = []prometheus.Collector{ + ClusterCpuGauge, + ClusterMemoryGauge, + ClusterDiskGauge, + } +) + +func init() { + prometheus.MustRegister(metrics...) +} + type Prometheus struct { prometheus Interface client v1.API