diff --git a/desc/hpc/pcm-hpc.api b/desc/hpc/pcm-hpc.api index 8098b417..a2d8c5f7 100644 --- a/desc/hpc/pcm-hpc.api +++ b/desc/hpc/pcm-hpc.api @@ -1,137 +1,138 @@ syntax = "v1" info( - title: "type title here" - desc: "type desc here" - author: "type author here" - email: "type email here" - version: "type version here" + title: "type title here" + desc: "type desc here" + author: "type author here" + email: "type email here" + version: "type version here" ) type ( - commitHpcTaskReq { - Name string `json:"name"` // paratera:jobName - Description string `json:"description,optional"` - TenantId int64 `json:"tenantId,optional"` - TaskId int64 `json:"taskId,optional"` - AdapterIds []string `json:"adapterIds"` - MatchLabels map[string]string `json:"matchLabels,optional"` - CardCount int64 `json:"cardCount,optional"` - WorkDir string `json:"workDir,optional"` //paratera:workingDir - WallTime string `json:"wallTime,optional"` - CmdScript string `json:"cmdScript,optional"` // paratera:bootScript - AppType string `json:"appType,optional"` - AppName string `json:"appName,optional"` // paratera:jobGroupName ac:appname - Queue string `json:"queue,optional"` - NNode string `json:"nNode,optional"` - SubmitType string `json:"submitType,optional"` - StdOutFile string `json:"stdOutFile,optional"` - StdErrFile string `json:"stdErrFile,optional"` - StdInput string `json:"stdInput,optional"` - Environment map[string]string `json:"environment,optional"` - ClusterType string `json:"clusterType,optional"` - } + commitHpcTaskReq { + Name string `json:"name"` // paratera:jobName + Description string `json:"description,optional"` + TenantId int64 `json:"tenantId,optional"` + TaskId int64 `json:"taskId,optional"` + AdapterIds []string `json:"adapterIds"` + MatchLabels map[string]string `json:"matchLabels,optional"` + CardCount int64 `json:"cardCount,optional"` + WorkDir string `json:"workDir,optional"` //paratera:workingDir + WallTime string `json:"wallTime,optional"` + CmdScript string `json:"cmdScript,optional"` // paratera:bootScript + AppType string `json:"appType,optional"` + AppName string `json:"appName,optional"` // paratera:jobGroupName ac:appname + Queue string `json:"queue,optional"` + NNode string `json:"nNode,optional"` + SubmitType string `json:"submitType,optional"` + StdOutFile string `json:"stdOutFile,optional"` + StdErrFile string `json:"stdErrFile,optional"` + StdInput string `json:"stdInput,optional"` + Environment map[string]string `json:"environment,optional"` + ClusterType string `json:"clusterType,optional"` + Partition string `json:"partition"` + } - commitHpcTaskResp { - TaskId int64 `json:"taskId"` - Code int32 `json:"code"` - Msg string `json:"msg"` - } + commitHpcTaskResp { + TaskId int64 `json:"taskId"` + Code int32 `json:"code"` + Msg string `json:"msg"` + } ) type ( - hpcOverViewReq { - } - hpcOverViewResp { - Code int32 `json:"code"` - Msg string `json:"msg"` - Data HPCOverView `json:"data"` - } - HPCOverView { - AdapterCount int32 `json:"adapterCount"` - StackCount int32 `json:"stackCount"` - ClusterCount int32 `json:"clusterCount"` - TaskCount int32 `json:"taskCount"` - } + hpcOverViewReq { + } + hpcOverViewResp { + Code int32 `json:"code"` + Msg string `json:"msg"` + Data HPCOverView `json:"data"` + } + HPCOverView { + AdapterCount int32 `json:"adapterCount"` + StackCount int32 `json:"stackCount"` + ClusterCount int32 `json:"clusterCount"` + TaskCount int32 `json:"taskCount"` + } ) type ( - hpcAdapterSummaryReq { - } - hpcAdapterSummaryResp { - Code int32 `json:"code"` - Msg string `json:"msg"` - Data []HPCAdapterSummary `json:"data"` - } - HPCAdapterSummary { - AdapterName string `json:"adapterName"` - StackCount int32 `json:"stackCount"` - ClusterCount int32 `json:"clusterCount"` - TaskCount int32 `json:"taskCount"` - } + hpcAdapterSummaryReq { + } + hpcAdapterSummaryResp { + Code int32 `json:"code"` + Msg string `json:"msg"` + Data []HPCAdapterSummary `json:"data"` + } + HPCAdapterSummary { + AdapterName string `json:"adapterName"` + StackCount int32 `json:"stackCount"` + ClusterCount int32 `json:"clusterCount"` + TaskCount int32 `json:"taskCount"` + } ) type ( - hpcJobReq { - } - hpcJobResp { - Code int32 `json:"code"` - Msg string `json:"msg"` - Data []Job `json:"data"` - } - Job { - JobName string `json:"jobName"` - JobDesc string `json:"jobDesc"` - SubmitTime string `json:"submitTime"` - JobStatus string `json:"jobStatus"` - AdapterName string `json:"adapterName"` - ClusterName string `json:"clusterName"` - ClusterType string `json:"clusterType"` - } + hpcJobReq { + } + hpcJobResp { + Code int32 `json:"code"` + Msg string `json:"msg"` + Data []Job `json:"data"` + } + Job { + JobName string `json:"jobName"` + JobDesc string `json:"jobDesc"` + SubmitTime string `json:"submitTime"` + JobStatus string `json:"jobStatus"` + AdapterName string `json:"adapterName"` + ClusterName string `json:"clusterName"` + ClusterType string `json:"clusterType"` + } ) type ( - hpcResourceReq { - } - hpcResourceResp { - Code int32 `json:"code"` - Msg string `json:"msg"` - Data HPCResource `json:"data"` - } - HPCResource { - GPUCardsTotal float64 `json:"gpuCoresTotal"` - CPUCoresTotal float64 `json:"cpuCoresTotal"` - RAMTotal float64 `json:"ramTotal"` - GPUCardsUsed float64 `json:"gpuCoresUsed"` - CPUCoresUsed float64 `json:"cpuCoresUsed"` - RAMUsed float64 `json:"ramUsed"` - GPURate float64 `json:"gpuRate"` - CPURate float64 `json:"cpuRate"` - RAMRate float64 `json:"ramRate"` - } + hpcResourceReq { + } + hpcResourceResp { + Code int32 `json:"code"` + Msg string `json:"msg"` + Data HPCResource `json:"data"` + } + HPCResource { + GPUCardsTotal float64 `json:"gpuCoresTotal"` + CPUCoresTotal float64 `json:"cpuCoresTotal"` + RAMTotal float64 `json:"ramTotal"` + GPUCardsUsed float64 `json:"gpuCoresUsed"` + CPUCoresUsed float64 `json:"cpuCoresUsed"` + RAMUsed float64 `json:"ramUsed"` + GPURate float64 `json:"gpuRate"` + CPURate float64 `json:"cpuRate"` + RAMRate float64 `json:"ramRate"` + } ) type QueueAssetsResp { - QueueAssets []QueueAsset `json:"queueAsset"` + QueueAssets []QueueAsset `json:"queueAsset"` } type QueueAsset { - TenantName string `json:"tenantName"` //租户名称 - ParticipantId int64 `json:"participantId"` - AclHosts string `json:"aclHosts"` // 可用节点,多个节点用逗号隔开 - QueNodes string `json:"queNodes"` //队列节点总数 - QueMinNodect string `json:"queMinNodect,omitempty"` //队列最小节点数 - QueMaxNgpus string `json:"queMaxNgpus,omitempty"` //队列最大GPU卡数 - QueMaxPPN string `json:"queMaxPPN,omitempty"` //使用该队列作业最大CPU核心数 - QueChargeRate string `json:"queChargeRate,omitempty"` //费率 - QueMaxNcpus string `json:"queMaxNcpus,omitempty"` //用户最大可用核心数 - QueMaxNdcus string `json:"queMaxNdcus,omitempty"` //队列总DCU卡数 - QueueName string `json:"queueName,omitempty"` //队列名称 - QueMinNcpus string `json:"queMinNcpus,omitempty"` //队列最小CPU核数 - QueFreeNodes string `json:"queFreeNodes,omitempty"` //队列空闲节点数 - QueMaxNodect string `json:"queMaxNodect,omitempty"` //队列作业最大节点数 - QueMaxGpuPN string `json:"queMaxGpuPN,omitempty"` //队列单作业最大GPU卡数 - QueMaxWalltime string `json:"queMaxWalltime,omitempty"` //队列最大运行时间 - QueMaxDcuPN string `json:"queMaxDcuPN,omitempty"` //队列单作业最大DCU卡数 - QueFreeNcpus string `json:"queFreeNcpus"` //队列空闲cpu数 - QueNcpus string `json:"queNcpus"` //队列cpu数 + TenantName string `json:"tenantName"` //租户名称 + ParticipantId int64 `json:"participantId"` + AclHosts string `json:"aclHosts"` // 可用节点,多个节点用逗号隔开 + QueNodes string `json:"queNodes"` //队列节点总数 + QueMinNodect string `json:"queMinNodect,omitempty"` //队列最小节点数 + QueMaxNgpus string `json:"queMaxNgpus,omitempty"` //队列最大GPU卡数 + QueMaxPPN string `json:"queMaxPPN,omitempty"` //使用该队列作业最大CPU核心数 + QueChargeRate string `json:"queChargeRate,omitempty"` //费率 + QueMaxNcpus string `json:"queMaxNcpus,omitempty"` //用户最大可用核心数 + QueMaxNdcus string `json:"queMaxNdcus,omitempty"` //队列总DCU卡数 + QueueName string `json:"queueName,omitempty"` //队列名称 + QueMinNcpus string `json:"queMinNcpus,omitempty"` //队列最小CPU核数 + QueFreeNodes string `json:"queFreeNodes,omitempty"` //队列空闲节点数 + QueMaxNodect string `json:"queMaxNodect,omitempty"` //队列作业最大节点数 + QueMaxGpuPN string `json:"queMaxGpuPN,omitempty"` //队列单作业最大GPU卡数 + QueMaxWalltime string `json:"queMaxWalltime,omitempty"` //队列最大运行时间 + QueMaxDcuPN string `json:"queMaxDcuPN,omitempty"` //队列单作业最大DCU卡数 + QueFreeNcpus string `json:"queFreeNcpus"` //队列空闲cpu数 + QueNcpus string `json:"queNcpus"` //队列cpu数 } \ No newline at end of file diff --git a/etc/pcm.yaml b/etc/pcm.yaml index e2e14f99..11ef6519 100644 --- a/etc/pcm.yaml +++ b/etc/pcm.yaml @@ -18,14 +18,12 @@ Monitoring: #rpc THRpcConf: - target: nacos://10.206.0.12:8848/pcm.th.rpc?timeout=30s&namespaceid=test&groupname=DEFAULT_GROUP&appName=pcm.core.api # Endpoints: # - 127.0.0.1:8888 NonBlock: true #rpc ModelArtsRpcConf: - # target: nacos://10.206.0.12:8848/pcm.modelarts.rpc?timeout=30s&namespaceid=test&groupname=DEFAULT_GROUP&appName=pcm.core.api Endpoints: - 127.0.0.1:2002 NonBlock: true @@ -33,36 +31,31 @@ ModelArtsRpcConf: #rpc ModelArtsImgRpcConf: - target: nacos://10.206.0.12:8848/pcm.modelarts.rpc?timeout=30s&namespaceid=test&groupname=DEFAULT_GROUP&appName=pcm.core.api - # Endpoints: - # - 127.0.0.1:2002 - NonBlock: true + Endpoints: + - 127.0.0.1:2007 + NonBlock: true #rpc ACRpcConf: - target: nacos://10.206.0.12:8848/pcm.ac.rpc?timeout=30s&namespaceid=test&groupname=DEFAULT_GROUP&appName=pcm.core.api - # Endpoints: - # - 127.0.0.1:8888 + Endpoints: + - 127.0.0.1:8888 NonBlock: true Timeout: 50000 #rpc CephRpcConf: - # target: nacos://10.206.0.12:8848/pcm.ceph.rpc?timeout=30s&namespaceid=test&groupname=DEFAULT_GROUP&appName=pcm.core.api Endpoints: - pcm-participant-ceph-service:2008 NonBlock: true Timeout: 50000 OctopusRpcConf: - target: nacos://10.206.0.12:8848/pcm.octopus.rpc?timeout=30s&namespaceid=test&groupname=DEFAULT_GROUP&appName=pcm.core.api - # Endpoints: - # - 127.0.0.1:8888 + Endpoints: + - 127.0.0.1:8888 NonBlock: true Timeout: 20000 OpenstackRpcConf: - # target: nacos://10.206.0.12:8848/pcm.openstack.rpc?timeout=30s&namespaceid=test&groupname=DEFAULT_GROUP&appName=pcm.core.api Endpoints: - 127.0.0.1:2010 NonBlock: true @@ -70,7 +63,6 @@ OpenstackRpcConf: # core rpc PcmCoreRpcConf: - # target: nacos://10.206.0.12:8848/pcm.core.rpc?timeout=30s&namespaceid=test&groupname=DEFAULT_GROUP&appName=pcm.core.api Endpoints: - pcm-core-rpc:2004 NonBlock: true diff --git a/internal/logic/hpc/commithpctasklogic.go b/internal/logic/hpc/commithpctasklogic.go index 7ce450a1..4ec29f41 100644 --- a/internal/logic/hpc/commithpctasklogic.go +++ b/internal/logic/hpc/commithpctasklogic.go @@ -90,6 +90,7 @@ func (l *CommitHpcTaskLogic) CommitHpcTask(req *types.CommitHpcTaskReq) (resp *t StdOutFile: req.StdOutFile, StdErrFile: req.StdErrFile, StdInput: req.StdInput, + Partition: req.Partition, DeletedFlag: 0, CreatedBy: 0, CreatedTime: time.Now(), diff --git a/internal/types/types.go b/internal/types/types.go index 7d5da4ef..8d364247 100644 --- a/internal/types/types.go +++ b/internal/types/types.go @@ -1242,6 +1242,7 @@ type CommitHpcTaskReq struct { StdInput string `json:"stdInput,optional"` Environment map[string]string `json:"environment,optional"` ClusterType string `json:"clusterType,optional"` + Partition string `json:"partition"` } type CommitHpcTaskResp struct { diff --git a/pkg/models/taskhpcmodel_gen.go b/pkg/models/taskhpcmodel_gen.go index 05b41aca..4438709b 100644 --- a/pkg/models/taskhpcmodel_gen.go +++ b/pkg/models/taskhpcmodel_gen.go @@ -71,6 +71,7 @@ type ( StdOutFile string `db:"std_out_file"` // 工作路径/std.err.%j StdErrFile string `db:"std_err_file"` // 工作路径/std.err.%j StdInput string `db:"std_input"` + Partition string `db:"partition"` Environment string `db:"environment"` DeletedFlag int64 `db:"deleted_flag"` // 是否删除(0-否,1-是) CreatedBy int64 `db:"created_by"` // 创建人