From 58263827d12fe223633fa71c972646d5e6dba685 Mon Sep 17 00:00:00 2001 From: zhouqunjie Date: Fri, 19 Apr 2024 16:04:36 +0800 Subject: [PATCH] hpc resource overview Former-commit-id: cfb5c293ce9cb59518c3a202a7519b794cff2aeb --- api/desc/hpc/pcm-hpc.api | 224 ++++++++++++------------ api/internal/logic/hpc/resourcelogic.go | 32 ++-- api/internal/types/types.go | 18 +- pkg/models/tclusterresourcemodel.go | 29 +++ pkg/models/tclusterresourcemodel_gen.go | 93 ++++++++++ 5 files changed, 264 insertions(+), 132 deletions(-) create mode 100644 pkg/models/tclusterresourcemodel.go create mode 100644 pkg/models/tclusterresourcemodel_gen.go diff --git a/api/desc/hpc/pcm-hpc.api b/api/desc/hpc/pcm-hpc.api index d86f262c..d5f15c65 100644 --- a/api/desc/hpc/pcm-hpc.api +++ b/api/desc/hpc/pcm-hpc.api @@ -1,137 +1,137 @@ syntax = "v1" info( - title: "type title here" - desc: "type desc here" - author: "type author here" - email: "type email here" - version: "type version here" + title: "type title here" + desc: "type desc here" + author: "type author here" + email: "type email here" + version: "type version here" ) type ( - commitHpcTaskReq { - Name string `json:"name"` // paratera:jobName - Description string `json:"description,optional"` - tenantId int64 `json:"tenantId,optional"` - TaskId int64 `json:"taskId,optional"` - AdapterId string `json:"adapterId,optional"` - MatchLabels map[string]string `json:"matchLabels,optional"` - CardCount int64 `json:"cardCount,optional"` - WorkDir string `json:"workDir,optional"` //paratera:workingDir - WallTime string `json:"wallTime,optional"` - CmdScript string `json:"cmdScript,optional"` // paratera:bootScript - AppType string `json:"appType,optional"` - AppName string `json:"appName,optional"` // paratera:jobGroupName ac:appname - Queue string `json:"queue,optional"` - NNode string `json:"nNode,optional"` - SubmitType string `json:"submitType,optional"` - StdOutFile string `json:"stdOutFile,optional"` - StdErrFile string `json:"stdErrFile,optional"` - StdInput string `json:"stdInput,optional"` - Environment map[string]string `json:"environment,optional"` - ClusterType string `json:"clusterType,optional"` - } + commitHpcTaskReq { + Name string `json:"name"` // paratera:jobName + Description string `json:"description,optional"` + TenantId int64 `json:"tenantId,optional"` + TaskId int64 `json:"taskId,optional"` + AdapterId string `json:"adapterId,optional"` + MatchLabels map[string]string `json:"matchLabels,optional"` + CardCount int64 `json:"cardCount,optional"` + WorkDir string `json:"workDir,optional"` //paratera:workingDir + WallTime string `json:"wallTime,optional"` + CmdScript string `json:"cmdScript,optional"` // paratera:bootScript + AppType string `json:"appType,optional"` + AppName string `json:"appName,optional"` // paratera:jobGroupName ac:appname + Queue string `json:"queue,optional"` + NNode string `json:"nNode,optional"` + SubmitType string `json:"submitType,optional"` + StdOutFile string `json:"stdOutFile,optional"` + StdErrFile string `json:"stdErrFile,optional"` + StdInput string `json:"stdInput,optional"` + Environment map[string]string `json:"environment,optional"` + ClusterType string `json:"clusterType,optional"` + } - commitHpcTaskResp { - TaskId int64 `json:"taskId"` - Code int32 `json:"code"` - Msg string `json:"msg"` - } + commitHpcTaskResp { + TaskId int64 `json:"taskId"` + Code int32 `json:"code"` + Msg string `json:"msg"` + } ) type ( - hpcOverViewReq { - } - hpcOverViewResp { - Code int32 `json:"code"` - Msg string `json:"msg"` - Data HPCOverView `json:"data"` - } - HPCOverView { - AdapterCount int32 `json:"adapterCount"` - StackCount int32 `json:"stackCount"` - ClusterCount int32 `json:"clusterCount"` - TaskCount int32 `json:"taskCount"` - } + hpcOverViewReq { + } + hpcOverViewResp { + Code int32 `json:"code"` + Msg string `json:"msg"` + Data HPCOverView `json:"data"` + } + HPCOverView { + AdapterCount int32 `json:"adapterCount"` + StackCount int32 `json:"stackCount"` + ClusterCount int32 `json:"clusterCount"` + TaskCount int32 `json:"taskCount"` + } ) type ( - hpcAdapterSummaryReq { - } - hpcAdapterSummaryResp { - Code int32 `json:"code"` - Msg string `json:"msg"` - Data []HPCAdapterSummary `json:"data"` - } - HPCAdapterSummary { - AdapterName string `json:"adapterName"` - StackCount int32 `json:"stackCount"` - ClusterCount int32 `json:"clusterCount"` - TaskCount int32 `json:"taskCount"` - } + hpcAdapterSummaryReq { + } + hpcAdapterSummaryResp { + Code int32 `json:"code"` + Msg string `json:"msg"` + Data []HPCAdapterSummary `json:"data"` + } + HPCAdapterSummary { + AdapterName string `json:"adapterName"` + StackCount int32 `json:"stackCount"` + ClusterCount int32 `json:"clusterCount"` + TaskCount int32 `json:"taskCount"` + } ) type ( - hpcJobReq { - } - hpcJobResp { - Code int32 `json:"code"` - Msg string `json:"msg"` - Data []Job `json:"data"` - } - Job { - JobName string `json:"jobName"` - JobDesc string `json:"jobDesc"` - SubmitTime string `json:"submitTime"` - JobStatus string `json:"jobStatus"` - AdapterName string `json:"adapterName"` - ClusterName string `json:"clusterName"` - ClusterType string `json:"clusterType"` - } + hpcJobReq { + } + hpcJobResp { + Code int32 `json:"code"` + Msg string `json:"msg"` + Data []Job `json:"data"` + } + Job { + JobName string `json:"jobName"` + JobDesc string `json:"jobDesc"` + SubmitTime string `json:"submitTime"` + JobStatus string `json:"jobStatus"` + AdapterName string `json:"adapterName"` + ClusterName string `json:"clusterName"` + ClusterType string `json:"clusterType"` + } ) type ( - hpcResourceReq { - } - hpcResourceResp { - Code int32 `json:"code"` - Msg string `json:"msg"` - HPCResource HPCResource `json:"hpcResource"` - } - HPCResource { - GPUCardsTotal int32 `json:"gpuCoresTotal"` - CPUCoresTotal int32 `json:"cpuCoresTotal"` - RAMTotal int32 `json:"ramTotal"` - GPUCardsUsed int32 `json:"gpuCoresUsed"` - CPUCoresUsed int32 `json:"cpuCoresUsed"` - RAMUsed int32 `json:"ramUsed"` - GPURate float32 `json:"gpuRate"` - CPURate float32 `json:"cpuRate"` - RAMRate float32 `json:"ramRate"` - } + hpcResourceReq { + } + hpcResourceResp { + Code int32 `json:"code"` + Msg string `json:"msg"` + HPCResource HPCResource `json:"hpcResource"` + } + HPCResource { + GPUCardsTotal float64 `json:"gpuCoresTotal"` + CPUCoresTotal float64 `json:"cpuCoresTotal"` + RAMTotal float64 `json:"ramTotal"` + GPUCardsUsed float64 `json:"gpuCoresUsed"` + CPUCoresUsed float64 `json:"cpuCoresUsed"` + RAMUsed float64 `json:"ramUsed"` + GPURate float64 `json:"gpuRate"` + CPURate float64 `json:"cpuRate"` + RAMRate float64 `json:"ramRate"` + } ) type QueueAssetsResp { - QueueAssets []QueueAsset `json:"queueAsset"` + QueueAssets []QueueAsset `json:"queueAsset"` } type QueueAsset { - TenantName string `json:"tenantName"` //租户名称 - ParticipantId int64 `json:"participantId"` - AclHosts string `json:"aclHosts"` // 可用节点,多个节点用逗号隔开 - QueNodes string `json:"queNodes"` //队列节点总数 - QueMinNodect string `json:"queMinNodect,omitempty"` //队列最小节点数 - QueMaxNgpus string `json:"queMaxNgpus,omitempty"` //队列最大GPU卡数 - QueMaxPPN string `json:"queMaxPPN,omitempty"` //使用该队列作业最大CPU核心数 - QueChargeRate string `json:"queChargeRate,omitempty"` //费率 - QueMaxNcpus string `json:"queMaxNcpus,omitempty"` //用户最大可用核心数 - QueMaxNdcus string `json:"queMaxNdcus,omitempty"` //队列总DCU卡数 - QueueName string `json:"queueName,omitempty"` //队列名称 - QueMinNcpus string `json:"queMinNcpus,omitempty"` //队列最小CPU核数 - QueFreeNodes string `json:"queFreeNodes,omitempty"` //队列空闲节点数 - QueMaxNodect string `json:"queMaxNodect,omitempty"` //队列作业最大节点数 - QueMaxGpuPN string `json:"queMaxGpuPN,omitempty"` //队列单作业最大GPU卡数 - QueMaxWalltime string `json:"queMaxWalltime,omitempty"` //队列最大运行时间 - QueMaxDcuPN string `json:"queMaxDcuPN,omitempty"` //队列单作业最大DCU卡数 - QueFreeNcpus string `json:"queFreeNcpus"` //队列空闲cpu数 - QueNcpus string `json:"queNcpus"` //队列cpu数 + TenantName string `json:"tenantName"` //租户名称 + ParticipantId int64 `json:"participantId"` + AclHosts string `json:"aclHosts"` // 可用节点,多个节点用逗号隔开 + QueNodes string `json:"queNodes"` //队列节点总数 + QueMinNodect string `json:"queMinNodect,omitempty"` //队列最小节点数 + QueMaxNgpus string `json:"queMaxNgpus,omitempty"` //队列最大GPU卡数 + QueMaxPPN string `json:"queMaxPPN,omitempty"` //使用该队列作业最大CPU核心数 + QueChargeRate string `json:"queChargeRate,omitempty"` //费率 + QueMaxNcpus string `json:"queMaxNcpus,omitempty"` //用户最大可用核心数 + QueMaxNdcus string `json:"queMaxNdcus,omitempty"` //队列总DCU卡数 + QueueName string `json:"queueName,omitempty"` //队列名称 + QueMinNcpus string `json:"queMinNcpus,omitempty"` //队列最小CPU核数 + QueFreeNodes string `json:"queFreeNodes,omitempty"` //队列空闲节点数 + QueMaxNodect string `json:"queMaxNodect,omitempty"` //队列作业最大节点数 + QueMaxGpuPN string `json:"queMaxGpuPN,omitempty"` //队列单作业最大GPU卡数 + QueMaxWalltime string `json:"queMaxWalltime,omitempty"` //队列最大运行时间 + QueMaxDcuPN string `json:"queMaxDcuPN,omitempty"` //队列单作业最大DCU卡数 + QueFreeNcpus string `json:"queFreeNcpus"` //队列空闲cpu数 + QueNcpus string `json:"queNcpus"` //队列cpu数 } \ No newline at end of file diff --git a/api/internal/logic/hpc/resourcelogic.go b/api/internal/logic/hpc/resourcelogic.go index b88e94a1..fd620904 100644 --- a/api/internal/logic/hpc/resourcelogic.go +++ b/api/internal/logic/hpc/resourcelogic.go @@ -2,7 +2,6 @@ package hpc import ( "context" - "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc" "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types" @@ -25,18 +24,29 @@ func NewResourceLogic(ctx context.Context, svcCtx *svc.ServiceContext) *Resource func (l *ResourceLogic) Resource(req *types.HpcResourceReq) (resp *types.HpcResourceResp, err error) { - l.svcCtx.DbEngin.Raw("SELECT th.NAME as job_name,t.description as job_desc,t.commit_time as submit_time,th.STATUS as job_status,ta.name as adapter_name,tc.name as cluster_name,tc.label as cluster_type FROM task_hpc th LEFT JOIN task t ON t.id = th.task_id JOIN t_cluster tc on th.cluster_id = tc.id JOIN t_adapter ta on tc.adapter_id = ta.id") + type hpcResourceOV struct { + CpuAvail float64 `json:"cpu_avail"` + CpuTotal float64 `json:"cpu_total"` + MemAvail float64 `json:"mem_avail"` + MemTotal float64 `json:"mem_total"` + DiskAvail float64 `json:"disk_avail"` + DiskTotal float64 `json:"disk_total"` + GpuAvail float64 `json:"gpu_avail"` + GpuTotal float64 `json:"gpu_total"` + } + var hrov hpcResourceOV + l.svcCtx.DbEngin.Raw("SELECT sum(cpu_avail) as cpu_avail,sum(cpu_total) as cpu_total,sum(mem_avail) as mem_avail,sum(mem_total) as mem_total,sum(disk_avail) as disk_avail,sum(disk_total) as disk_total,sum(gpu_avail) as gpu_avail,sum(gpu_total) as gpu_total FROM t_cluster_resource where cluster_type = 2").Scan(&hrov) hpcResource := types.HPCResource{ - GPUCardsTotal: 0, - CPUCoresTotal: 0, - RAMTotal: 0, - GPUCardsUsed: 0, - CPUCoresUsed: 0, - RAMUsed: 0, - GPURate: 0, - CPURate: 0, - RAMRate: 0, + GPUCardsTotal: hrov.GpuTotal, + CPUCoresTotal: hrov.CpuTotal, + RAMTotal: hrov.MemTotal, + GPUCardsUsed: hrov.GpuTotal - hrov.GpuAvail, + CPUCoresUsed: hrov.CpuTotal - hrov.CpuAvail, + RAMUsed: hrov.MemTotal - hrov.MemAvail, + GPURate: (hrov.GpuTotal - hrov.GpuAvail) / hrov.GpuTotal, + CPURate: (hrov.CpuTotal - hrov.CpuAvail) / hrov.CpuTotal, + RAMRate: (hrov.MemTotal - hrov.MemAvail) / hrov.MemTotal, } resp = &types.HpcResourceResp{ diff --git a/api/internal/types/types.go b/api/internal/types/types.go index 55ad50af..cb489383 100644 --- a/api/internal/types/types.go +++ b/api/internal/types/types.go @@ -937,15 +937,15 @@ type HpcResourceResp struct { } type HPCResource struct { - GPUCardsTotal int32 `json:"gpuCoresTotal"` - CPUCoresTotal int32 `json:"cpuCoresTotal"` - RAMTotal int32 `json:"ramTotal"` - GPUCardsUsed int32 `json:"gpuCoresUsed"` - CPUCoresUsed int32 `json:"cpuCoresUsed"` - RAMUsed int32 `json:"ramUsed"` - GPURate float32 `json:"gpuRate"` - CPURate float32 `json:"cpuRate"` - RAMRate float32 `json:"ramRate"` + GPUCardsTotal float64 `json:"gpuCoresTotal"` + CPUCoresTotal float64 `json:"cpuCoresTotal"` + RAMTotal float64 `json:"ramTotal"` + GPUCardsUsed float64 `json:"gpuCoresUsed"` + CPUCoresUsed float64 `json:"cpuCoresUsed"` + RAMUsed float64 `json:"ramUsed"` + GPURate float64 `json:"gpuRate"` + CPURate float64 `json:"cpuRate"` + RAMRate float64 `json:"ramRate"` } type QueueAssetsResp struct { diff --git a/pkg/models/tclusterresourcemodel.go b/pkg/models/tclusterresourcemodel.go new file mode 100644 index 00000000..42c1fb2e --- /dev/null +++ b/pkg/models/tclusterresourcemodel.go @@ -0,0 +1,29 @@ +package models + +import "github.com/zeromicro/go-zero/core/stores/sqlx" + +var _ TClusterResourceModel = (*customTClusterResourceModel)(nil) + +type ( + // TClusterResourceModel is an interface to be customized, add more methods here, + // and implement the added methods in customTClusterResourceModel. + TClusterResourceModel interface { + tClusterResourceModel + withSession(session sqlx.Session) TClusterResourceModel + } + + customTClusterResourceModel struct { + *defaultTClusterResourceModel + } +) + +// NewTClusterResourceModel returns a model for the database table. +func NewTClusterResourceModel(conn sqlx.SqlConn) TClusterResourceModel { + return &customTClusterResourceModel{ + defaultTClusterResourceModel: newTClusterResourceModel(conn), + } +} + +func (m *customTClusterResourceModel) withSession(session sqlx.Session) TClusterResourceModel { + return NewTClusterResourceModel(sqlx.NewSqlConnFromSession(session)) +} diff --git a/pkg/models/tclusterresourcemodel_gen.go b/pkg/models/tclusterresourcemodel_gen.go new file mode 100644 index 00000000..bf4068e6 --- /dev/null +++ b/pkg/models/tclusterresourcemodel_gen.go @@ -0,0 +1,93 @@ +// Code generated by goctl. DO NOT EDIT. + +package models + +import ( + "context" + "database/sql" + "fmt" + "strings" + + "github.com/zeromicro/go-zero/core/stores/builder" + "github.com/zeromicro/go-zero/core/stores/sqlc" + "github.com/zeromicro/go-zero/core/stores/sqlx" + "github.com/zeromicro/go-zero/core/stringx" +) + +var ( + tClusterResourceFieldNames = builder.RawFieldNames(&TClusterResource{}) + tClusterResourceRows = strings.Join(tClusterResourceFieldNames, ",") + tClusterResourceRowsExpectAutoSet = strings.Join(stringx.Remove(tClusterResourceFieldNames, "`create_at`", "`create_time`", "`created_at`", "`update_at`", "`update_time`", "`updated_at`"), ",") + tClusterResourceRowsWithPlaceHolder = strings.Join(stringx.Remove(tClusterResourceFieldNames, "`cluster_id`", "`create_at`", "`create_time`", "`created_at`", "`update_at`", "`update_time`", "`updated_at`"), "=?,") + "=?" +) + +type ( + tClusterResourceModel interface { + Insert(ctx context.Context, data *TClusterResource) (sql.Result, error) + FindOne(ctx context.Context, clusterId int64) (*TClusterResource, error) + Update(ctx context.Context, data *TClusterResource) error + Delete(ctx context.Context, clusterId int64) error + } + + defaultTClusterResourceModel struct { + conn sqlx.SqlConn + table string + } + + TClusterResource struct { + ClusterId int64 `db:"cluster_id"` + ClusterName string `db:"cluster_name"` + ClusterType int64 `db:"cluster_type"` // 类型0->容器,1->智算,2->超算,3-虚拟机 + CpuAvail float64 `db:"cpu_avail"` + CpuTotal float64 `db:"cpu_total"` + MemAvail float64 `db:"mem_avail"` + MemTotal float64 `db:"mem_total"` + DiskAvail float64 `db:"disk_avail"` + DiskTotal float64 `db:"disk_total"` + GpuAvail float64 `db:"gpu_avail"` + GpuTotal float64 `db:"gpu_total"` + } +) + +func newTClusterResourceModel(conn sqlx.SqlConn) *defaultTClusterResourceModel { + return &defaultTClusterResourceModel{ + conn: conn, + table: "`t_cluster_resource`", + } +} + +func (m *defaultTClusterResourceModel) Delete(ctx context.Context, clusterId int64) error { + query := fmt.Sprintf("delete from %s where `cluster_id` = ?", m.table) + _, err := m.conn.ExecCtx(ctx, query, clusterId) + return err +} + +func (m *defaultTClusterResourceModel) FindOne(ctx context.Context, clusterId int64) (*TClusterResource, error) { + query := fmt.Sprintf("select %s from %s where `cluster_id` = ? limit 1", tClusterResourceRows, m.table) + var resp TClusterResource + err := m.conn.QueryRowCtx(ctx, &resp, query, clusterId) + switch err { + case nil: + return &resp, nil + case sqlc.ErrNotFound: + return nil, ErrNotFound + default: + return nil, err + } +} + +func (m *defaultTClusterResourceModel) Insert(ctx context.Context, data *TClusterResource) (sql.Result, error) { + query := fmt.Sprintf("insert into %s (%s) values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)", m.table, tClusterResourceRowsExpectAutoSet) + ret, err := m.conn.ExecCtx(ctx, query, data.ClusterId, data.ClusterName, data.ClusterType, data.CpuAvail, data.CpuTotal, data.MemAvail, data.MemTotal, data.DiskAvail, data.DiskTotal, data.GpuAvail, data.GpuTotal) + return ret, err +} + +func (m *defaultTClusterResourceModel) Update(ctx context.Context, data *TClusterResource) error { + query := fmt.Sprintf("update %s set %s where `cluster_id` = ?", m.table, tClusterResourceRowsWithPlaceHolder) + _, err := m.conn.ExecCtx(ctx, query, data.ClusterName, data.ClusterType, data.CpuAvail, data.CpuTotal, data.MemAvail, data.MemTotal, data.DiskAvail, data.DiskTotal, data.GpuAvail, data.GpuTotal, data.ClusterId) + return err +} + +func (m *defaultTClusterResourceModel) tableName() string { + return m.table +}