hpc新增字段

Former-commit-id: 5958b28a8285b807d4cf693d64e9cf313be41235
This commit is contained in:
zhangwei 2024-09-04 16:56:22 +08:00
parent 37afd2a284
commit c9a754e79b
5 changed files with 123 additions and 127 deletions

View File

@ -1,137 +1,138 @@
syntax = "v1" syntax = "v1"
info( info(
title: "type title here" title: "type title here"
desc: "type desc here" desc: "type desc here"
author: "type author here" author: "type author here"
email: "type email here" email: "type email here"
version: "type version here" version: "type version here"
) )
type ( type (
commitHpcTaskReq { commitHpcTaskReq {
Name string `json:"name"` // paratera:jobName Name string `json:"name"` // paratera:jobName
Description string `json:"description,optional"` Description string `json:"description,optional"`
TenantId int64 `json:"tenantId,optional"` TenantId int64 `json:"tenantId,optional"`
TaskId int64 `json:"taskId,optional"` TaskId int64 `json:"taskId,optional"`
AdapterIds []string `json:"adapterIds"` AdapterIds []string `json:"adapterIds"`
MatchLabels map[string]string `json:"matchLabels,optional"` MatchLabels map[string]string `json:"matchLabels,optional"`
CardCount int64 `json:"cardCount,optional"` CardCount int64 `json:"cardCount,optional"`
WorkDir string `json:"workDir,optional"` //paratera:workingDir WorkDir string `json:"workDir,optional"` //paratera:workingDir
WallTime string `json:"wallTime,optional"` WallTime string `json:"wallTime,optional"`
CmdScript string `json:"cmdScript,optional"` // paratera:bootScript CmdScript string `json:"cmdScript,optional"` // paratera:bootScript
AppType string `json:"appType,optional"` AppType string `json:"appType,optional"`
AppName string `json:"appName,optional"` // paratera:jobGroupName ac:appname AppName string `json:"appName,optional"` // paratera:jobGroupName ac:appname
Queue string `json:"queue,optional"` Queue string `json:"queue,optional"`
NNode string `json:"nNode,optional"` NNode string `json:"nNode,optional"`
SubmitType string `json:"submitType,optional"` SubmitType string `json:"submitType,optional"`
StdOutFile string `json:"stdOutFile,optional"` StdOutFile string `json:"stdOutFile,optional"`
StdErrFile string `json:"stdErrFile,optional"` StdErrFile string `json:"stdErrFile,optional"`
StdInput string `json:"stdInput,optional"` StdInput string `json:"stdInput,optional"`
Environment map[string]string `json:"environment,optional"` Environment map[string]string `json:"environment,optional"`
ClusterType string `json:"clusterType,optional"` ClusterType string `json:"clusterType,optional"`
} Partition string `json:"partition"`
}
commitHpcTaskResp { commitHpcTaskResp {
TaskId int64 `json:"taskId"` TaskId int64 `json:"taskId"`
Code int32 `json:"code"` Code int32 `json:"code"`
Msg string `json:"msg"` Msg string `json:"msg"`
} }
) )
type ( type (
hpcOverViewReq { hpcOverViewReq {
} }
hpcOverViewResp { hpcOverViewResp {
Code int32 `json:"code"` Code int32 `json:"code"`
Msg string `json:"msg"` Msg string `json:"msg"`
Data HPCOverView `json:"data"` Data HPCOverView `json:"data"`
} }
HPCOverView { HPCOverView {
AdapterCount int32 `json:"adapterCount"` AdapterCount int32 `json:"adapterCount"`
StackCount int32 `json:"stackCount"` StackCount int32 `json:"stackCount"`
ClusterCount int32 `json:"clusterCount"` ClusterCount int32 `json:"clusterCount"`
TaskCount int32 `json:"taskCount"` TaskCount int32 `json:"taskCount"`
} }
) )
type ( type (
hpcAdapterSummaryReq { hpcAdapterSummaryReq {
} }
hpcAdapterSummaryResp { hpcAdapterSummaryResp {
Code int32 `json:"code"` Code int32 `json:"code"`
Msg string `json:"msg"` Msg string `json:"msg"`
Data []HPCAdapterSummary `json:"data"` Data []HPCAdapterSummary `json:"data"`
} }
HPCAdapterSummary { HPCAdapterSummary {
AdapterName string `json:"adapterName"` AdapterName string `json:"adapterName"`
StackCount int32 `json:"stackCount"` StackCount int32 `json:"stackCount"`
ClusterCount int32 `json:"clusterCount"` ClusterCount int32 `json:"clusterCount"`
TaskCount int32 `json:"taskCount"` TaskCount int32 `json:"taskCount"`
} }
) )
type ( type (
hpcJobReq { hpcJobReq {
} }
hpcJobResp { hpcJobResp {
Code int32 `json:"code"` Code int32 `json:"code"`
Msg string `json:"msg"` Msg string `json:"msg"`
Data []Job `json:"data"` Data []Job `json:"data"`
} }
Job { Job {
JobName string `json:"jobName"` JobName string `json:"jobName"`
JobDesc string `json:"jobDesc"` JobDesc string `json:"jobDesc"`
SubmitTime string `json:"submitTime"` SubmitTime string `json:"submitTime"`
JobStatus string `json:"jobStatus"` JobStatus string `json:"jobStatus"`
AdapterName string `json:"adapterName"` AdapterName string `json:"adapterName"`
ClusterName string `json:"clusterName"` ClusterName string `json:"clusterName"`
ClusterType string `json:"clusterType"` ClusterType string `json:"clusterType"`
} }
) )
type ( type (
hpcResourceReq { hpcResourceReq {
} }
hpcResourceResp { hpcResourceResp {
Code int32 `json:"code"` Code int32 `json:"code"`
Msg string `json:"msg"` Msg string `json:"msg"`
Data HPCResource `json:"data"` Data HPCResource `json:"data"`
} }
HPCResource { HPCResource {
GPUCardsTotal float64 `json:"gpuCoresTotal"` GPUCardsTotal float64 `json:"gpuCoresTotal"`
CPUCoresTotal float64 `json:"cpuCoresTotal"` CPUCoresTotal float64 `json:"cpuCoresTotal"`
RAMTotal float64 `json:"ramTotal"` RAMTotal float64 `json:"ramTotal"`
GPUCardsUsed float64 `json:"gpuCoresUsed"` GPUCardsUsed float64 `json:"gpuCoresUsed"`
CPUCoresUsed float64 `json:"cpuCoresUsed"` CPUCoresUsed float64 `json:"cpuCoresUsed"`
RAMUsed float64 `json:"ramUsed"` RAMUsed float64 `json:"ramUsed"`
GPURate float64 `json:"gpuRate"` GPURate float64 `json:"gpuRate"`
CPURate float64 `json:"cpuRate"` CPURate float64 `json:"cpuRate"`
RAMRate float64 `json:"ramRate"` RAMRate float64 `json:"ramRate"`
} }
) )
type QueueAssetsResp { type QueueAssetsResp {
QueueAssets []QueueAsset `json:"queueAsset"` QueueAssets []QueueAsset `json:"queueAsset"`
} }
type QueueAsset { type QueueAsset {
TenantName string `json:"tenantName"` //租户名称 TenantName string `json:"tenantName"` //租户名称
ParticipantId int64 `json:"participantId"` ParticipantId int64 `json:"participantId"`
AclHosts string `json:"aclHosts"` // 可用节点,多个节点用逗号隔开 AclHosts string `json:"aclHosts"` // 可用节点,多个节点用逗号隔开
QueNodes string `json:"queNodes"` //队列节点总数 QueNodes string `json:"queNodes"` //队列节点总数
QueMinNodect string `json:"queMinNodect,omitempty"` //队列最小节点数 QueMinNodect string `json:"queMinNodect,omitempty"` //队列最小节点数
QueMaxNgpus string `json:"queMaxNgpus,omitempty"` //队列最大GPU卡数 QueMaxNgpus string `json:"queMaxNgpus,omitempty"` //队列最大GPU卡数
QueMaxPPN string `json:"queMaxPPN,omitempty"` //使用该队列作业最大CPU核心数 QueMaxPPN string `json:"queMaxPPN,omitempty"` //使用该队列作业最大CPU核心数
QueChargeRate string `json:"queChargeRate,omitempty"` //费率 QueChargeRate string `json:"queChargeRate,omitempty"` //费率
QueMaxNcpus string `json:"queMaxNcpus,omitempty"` //用户最大可用核心数 QueMaxNcpus string `json:"queMaxNcpus,omitempty"` //用户最大可用核心数
QueMaxNdcus string `json:"queMaxNdcus,omitempty"` //队列总DCU卡数 QueMaxNdcus string `json:"queMaxNdcus,omitempty"` //队列总DCU卡数
QueueName string `json:"queueName,omitempty"` //队列名称 QueueName string `json:"queueName,omitempty"` //队列名称
QueMinNcpus string `json:"queMinNcpus,omitempty"` //队列最小CPU核数 QueMinNcpus string `json:"queMinNcpus,omitempty"` //队列最小CPU核数
QueFreeNodes string `json:"queFreeNodes,omitempty"` //队列空闲节点数 QueFreeNodes string `json:"queFreeNodes,omitempty"` //队列空闲节点数
QueMaxNodect string `json:"queMaxNodect,omitempty"` //队列作业最大节点数 QueMaxNodect string `json:"queMaxNodect,omitempty"` //队列作业最大节点数
QueMaxGpuPN string `json:"queMaxGpuPN,omitempty"` //队列单作业最大GPU卡数 QueMaxGpuPN string `json:"queMaxGpuPN,omitempty"` //队列单作业最大GPU卡数
QueMaxWalltime string `json:"queMaxWalltime,omitempty"` //队列最大运行时间 QueMaxWalltime string `json:"queMaxWalltime,omitempty"` //队列最大运行时间
QueMaxDcuPN string `json:"queMaxDcuPN,omitempty"` //队列单作业最大DCU卡数 QueMaxDcuPN string `json:"queMaxDcuPN,omitempty"` //队列单作业最大DCU卡数
QueFreeNcpus string `json:"queFreeNcpus"` //队列空闲cpu数 QueFreeNcpus string `json:"queFreeNcpus"` //队列空闲cpu数
QueNcpus string `json:"queNcpus"` //队列cpu数 QueNcpus string `json:"queNcpus"` //队列cpu数
} }

View File

@ -18,14 +18,12 @@ Monitoring:
#rpc #rpc
THRpcConf: THRpcConf:
target: nacos://10.206.0.12:8848/pcm.th.rpc?timeout=30s&namespaceid=test&groupname=DEFAULT_GROUP&appName=pcm.core.api
# Endpoints: # Endpoints:
# - 127.0.0.1:8888 # - 127.0.0.1:8888
NonBlock: true NonBlock: true
#rpc #rpc
ModelArtsRpcConf: ModelArtsRpcConf:
# target: nacos://10.206.0.12:8848/pcm.modelarts.rpc?timeout=30s&namespaceid=test&groupname=DEFAULT_GROUP&appName=pcm.core.api
Endpoints: Endpoints:
- 127.0.0.1:2002 - 127.0.0.1:2002
NonBlock: true NonBlock: true
@ -33,36 +31,31 @@ ModelArtsRpcConf:
#rpc #rpc
ModelArtsImgRpcConf: ModelArtsImgRpcConf:
target: nacos://10.206.0.12:8848/pcm.modelarts.rpc?timeout=30s&namespaceid=test&groupname=DEFAULT_GROUP&appName=pcm.core.api Endpoints:
# Endpoints: - 127.0.0.1:2007
# - 127.0.0.1:2002 NonBlock: true
NonBlock: true
#rpc #rpc
ACRpcConf: ACRpcConf:
target: nacos://10.206.0.12:8848/pcm.ac.rpc?timeout=30s&namespaceid=test&groupname=DEFAULT_GROUP&appName=pcm.core.api Endpoints:
# Endpoints: - 127.0.0.1:8888
# - 127.0.0.1:8888
NonBlock: true NonBlock: true
Timeout: 50000 Timeout: 50000
#rpc #rpc
CephRpcConf: CephRpcConf:
# target: nacos://10.206.0.12:8848/pcm.ceph.rpc?timeout=30s&namespaceid=test&groupname=DEFAULT_GROUP&appName=pcm.core.api
Endpoints: Endpoints:
- pcm-participant-ceph-service:2008 - pcm-participant-ceph-service:2008
NonBlock: true NonBlock: true
Timeout: 50000 Timeout: 50000
OctopusRpcConf: OctopusRpcConf:
target: nacos://10.206.0.12:8848/pcm.octopus.rpc?timeout=30s&namespaceid=test&groupname=DEFAULT_GROUP&appName=pcm.core.api Endpoints:
# Endpoints: - 127.0.0.1:8888
# - 127.0.0.1:8888
NonBlock: true NonBlock: true
Timeout: 20000 Timeout: 20000
OpenstackRpcConf: OpenstackRpcConf:
# target: nacos://10.206.0.12:8848/pcm.openstack.rpc?timeout=30s&namespaceid=test&groupname=DEFAULT_GROUP&appName=pcm.core.api
Endpoints: Endpoints:
- 127.0.0.1:2010 - 127.0.0.1:2010
NonBlock: true NonBlock: true
@ -70,7 +63,6 @@ OpenstackRpcConf:
# core rpc # core rpc
PcmCoreRpcConf: PcmCoreRpcConf:
# target: nacos://10.206.0.12:8848/pcm.core.rpc?timeout=30s&namespaceid=test&groupname=DEFAULT_GROUP&appName=pcm.core.api
Endpoints: Endpoints:
- pcm-core-rpc:2004 - pcm-core-rpc:2004
NonBlock: true NonBlock: true

View File

@ -90,6 +90,7 @@ func (l *CommitHpcTaskLogic) CommitHpcTask(req *types.CommitHpcTaskReq) (resp *t
StdOutFile: req.StdOutFile, StdOutFile: req.StdOutFile,
StdErrFile: req.StdErrFile, StdErrFile: req.StdErrFile,
StdInput: req.StdInput, StdInput: req.StdInput,
Partition: req.Partition,
DeletedFlag: 0, DeletedFlag: 0,
CreatedBy: 0, CreatedBy: 0,
CreatedTime: time.Now(), CreatedTime: time.Now(),

View File

@ -1242,6 +1242,7 @@ type CommitHpcTaskReq struct {
StdInput string `json:"stdInput,optional"` StdInput string `json:"stdInput,optional"`
Environment map[string]string `json:"environment,optional"` Environment map[string]string `json:"environment,optional"`
ClusterType string `json:"clusterType,optional"` ClusterType string `json:"clusterType,optional"`
Partition string `json:"partition"`
} }
type CommitHpcTaskResp struct { type CommitHpcTaskResp struct {

View File

@ -71,6 +71,7 @@ type (
StdOutFile string `db:"std_out_file"` // 工作路径/std.err.%j StdOutFile string `db:"std_out_file"` // 工作路径/std.err.%j
StdErrFile string `db:"std_err_file"` // 工作路径/std.err.%j StdErrFile string `db:"std_err_file"` // 工作路径/std.err.%j
StdInput string `db:"std_input"` StdInput string `db:"std_input"`
Partition string `db:"partition"`
Environment string `db:"environment"` Environment string `db:"environment"`
DeletedFlag int64 `db:"deleted_flag"` // 是否删除0-否1-是) DeletedFlag int64 `db:"deleted_flag"` // 是否删除0-否1-是)
CreatedBy int64 `db:"created_by"` // 创建人 CreatedBy int64 `db:"created_by"` // 创建人