hpc新增字段
Former-commit-id: 5958b28a8285b807d4cf693d64e9cf313be41235
This commit is contained in:
parent
37afd2a284
commit
c9a754e79b
|
@ -1,137 +1,138 @@
|
|||
syntax = "v1"
|
||||
|
||||
info(
|
||||
title: "type title here"
|
||||
desc: "type desc here"
|
||||
author: "type author here"
|
||||
email: "type email here"
|
||||
version: "type version here"
|
||||
title: "type title here"
|
||||
desc: "type desc here"
|
||||
author: "type author here"
|
||||
email: "type email here"
|
||||
version: "type version here"
|
||||
)
|
||||
|
||||
type (
|
||||
commitHpcTaskReq {
|
||||
Name string `json:"name"` // paratera:jobName
|
||||
Description string `json:"description,optional"`
|
||||
TenantId int64 `json:"tenantId,optional"`
|
||||
TaskId int64 `json:"taskId,optional"`
|
||||
AdapterIds []string `json:"adapterIds"`
|
||||
MatchLabels map[string]string `json:"matchLabels,optional"`
|
||||
CardCount int64 `json:"cardCount,optional"`
|
||||
WorkDir string `json:"workDir,optional"` //paratera:workingDir
|
||||
WallTime string `json:"wallTime,optional"`
|
||||
CmdScript string `json:"cmdScript,optional"` // paratera:bootScript
|
||||
AppType string `json:"appType,optional"`
|
||||
AppName string `json:"appName,optional"` // paratera:jobGroupName ac:appname
|
||||
Queue string `json:"queue,optional"`
|
||||
NNode string `json:"nNode,optional"`
|
||||
SubmitType string `json:"submitType,optional"`
|
||||
StdOutFile string `json:"stdOutFile,optional"`
|
||||
StdErrFile string `json:"stdErrFile,optional"`
|
||||
StdInput string `json:"stdInput,optional"`
|
||||
Environment map[string]string `json:"environment,optional"`
|
||||
ClusterType string `json:"clusterType,optional"`
|
||||
}
|
||||
commitHpcTaskReq {
|
||||
Name string `json:"name"` // paratera:jobName
|
||||
Description string `json:"description,optional"`
|
||||
TenantId int64 `json:"tenantId,optional"`
|
||||
TaskId int64 `json:"taskId,optional"`
|
||||
AdapterIds []string `json:"adapterIds"`
|
||||
MatchLabels map[string]string `json:"matchLabels,optional"`
|
||||
CardCount int64 `json:"cardCount,optional"`
|
||||
WorkDir string `json:"workDir,optional"` //paratera:workingDir
|
||||
WallTime string `json:"wallTime,optional"`
|
||||
CmdScript string `json:"cmdScript,optional"` // paratera:bootScript
|
||||
AppType string `json:"appType,optional"`
|
||||
AppName string `json:"appName,optional"` // paratera:jobGroupName ac:appname
|
||||
Queue string `json:"queue,optional"`
|
||||
NNode string `json:"nNode,optional"`
|
||||
SubmitType string `json:"submitType,optional"`
|
||||
StdOutFile string `json:"stdOutFile,optional"`
|
||||
StdErrFile string `json:"stdErrFile,optional"`
|
||||
StdInput string `json:"stdInput,optional"`
|
||||
Environment map[string]string `json:"environment,optional"`
|
||||
ClusterType string `json:"clusterType,optional"`
|
||||
Partition string `json:"partition"`
|
||||
}
|
||||
|
||||
commitHpcTaskResp {
|
||||
TaskId int64 `json:"taskId"`
|
||||
Code int32 `json:"code"`
|
||||
Msg string `json:"msg"`
|
||||
}
|
||||
commitHpcTaskResp {
|
||||
TaskId int64 `json:"taskId"`
|
||||
Code int32 `json:"code"`
|
||||
Msg string `json:"msg"`
|
||||
}
|
||||
)
|
||||
|
||||
type (
|
||||
hpcOverViewReq {
|
||||
}
|
||||
hpcOverViewResp {
|
||||
Code int32 `json:"code"`
|
||||
Msg string `json:"msg"`
|
||||
Data HPCOverView `json:"data"`
|
||||
}
|
||||
HPCOverView {
|
||||
AdapterCount int32 `json:"adapterCount"`
|
||||
StackCount int32 `json:"stackCount"`
|
||||
ClusterCount int32 `json:"clusterCount"`
|
||||
TaskCount int32 `json:"taskCount"`
|
||||
}
|
||||
hpcOverViewReq {
|
||||
}
|
||||
hpcOverViewResp {
|
||||
Code int32 `json:"code"`
|
||||
Msg string `json:"msg"`
|
||||
Data HPCOverView `json:"data"`
|
||||
}
|
||||
HPCOverView {
|
||||
AdapterCount int32 `json:"adapterCount"`
|
||||
StackCount int32 `json:"stackCount"`
|
||||
ClusterCount int32 `json:"clusterCount"`
|
||||
TaskCount int32 `json:"taskCount"`
|
||||
}
|
||||
)
|
||||
|
||||
type (
|
||||
hpcAdapterSummaryReq {
|
||||
}
|
||||
hpcAdapterSummaryResp {
|
||||
Code int32 `json:"code"`
|
||||
Msg string `json:"msg"`
|
||||
Data []HPCAdapterSummary `json:"data"`
|
||||
}
|
||||
HPCAdapterSummary {
|
||||
AdapterName string `json:"adapterName"`
|
||||
StackCount int32 `json:"stackCount"`
|
||||
ClusterCount int32 `json:"clusterCount"`
|
||||
TaskCount int32 `json:"taskCount"`
|
||||
}
|
||||
hpcAdapterSummaryReq {
|
||||
}
|
||||
hpcAdapterSummaryResp {
|
||||
Code int32 `json:"code"`
|
||||
Msg string `json:"msg"`
|
||||
Data []HPCAdapterSummary `json:"data"`
|
||||
}
|
||||
HPCAdapterSummary {
|
||||
AdapterName string `json:"adapterName"`
|
||||
StackCount int32 `json:"stackCount"`
|
||||
ClusterCount int32 `json:"clusterCount"`
|
||||
TaskCount int32 `json:"taskCount"`
|
||||
}
|
||||
)
|
||||
|
||||
type (
|
||||
hpcJobReq {
|
||||
}
|
||||
hpcJobResp {
|
||||
Code int32 `json:"code"`
|
||||
Msg string `json:"msg"`
|
||||
Data []Job `json:"data"`
|
||||
}
|
||||
Job {
|
||||
JobName string `json:"jobName"`
|
||||
JobDesc string `json:"jobDesc"`
|
||||
SubmitTime string `json:"submitTime"`
|
||||
JobStatus string `json:"jobStatus"`
|
||||
AdapterName string `json:"adapterName"`
|
||||
ClusterName string `json:"clusterName"`
|
||||
ClusterType string `json:"clusterType"`
|
||||
}
|
||||
hpcJobReq {
|
||||
}
|
||||
hpcJobResp {
|
||||
Code int32 `json:"code"`
|
||||
Msg string `json:"msg"`
|
||||
Data []Job `json:"data"`
|
||||
}
|
||||
Job {
|
||||
JobName string `json:"jobName"`
|
||||
JobDesc string `json:"jobDesc"`
|
||||
SubmitTime string `json:"submitTime"`
|
||||
JobStatus string `json:"jobStatus"`
|
||||
AdapterName string `json:"adapterName"`
|
||||
ClusterName string `json:"clusterName"`
|
||||
ClusterType string `json:"clusterType"`
|
||||
}
|
||||
)
|
||||
|
||||
type (
|
||||
hpcResourceReq {
|
||||
}
|
||||
hpcResourceResp {
|
||||
Code int32 `json:"code"`
|
||||
Msg string `json:"msg"`
|
||||
Data HPCResource `json:"data"`
|
||||
}
|
||||
HPCResource {
|
||||
GPUCardsTotal float64 `json:"gpuCoresTotal"`
|
||||
CPUCoresTotal float64 `json:"cpuCoresTotal"`
|
||||
RAMTotal float64 `json:"ramTotal"`
|
||||
GPUCardsUsed float64 `json:"gpuCoresUsed"`
|
||||
CPUCoresUsed float64 `json:"cpuCoresUsed"`
|
||||
RAMUsed float64 `json:"ramUsed"`
|
||||
GPURate float64 `json:"gpuRate"`
|
||||
CPURate float64 `json:"cpuRate"`
|
||||
RAMRate float64 `json:"ramRate"`
|
||||
}
|
||||
hpcResourceReq {
|
||||
}
|
||||
hpcResourceResp {
|
||||
Code int32 `json:"code"`
|
||||
Msg string `json:"msg"`
|
||||
Data HPCResource `json:"data"`
|
||||
}
|
||||
HPCResource {
|
||||
GPUCardsTotal float64 `json:"gpuCoresTotal"`
|
||||
CPUCoresTotal float64 `json:"cpuCoresTotal"`
|
||||
RAMTotal float64 `json:"ramTotal"`
|
||||
GPUCardsUsed float64 `json:"gpuCoresUsed"`
|
||||
CPUCoresUsed float64 `json:"cpuCoresUsed"`
|
||||
RAMUsed float64 `json:"ramUsed"`
|
||||
GPURate float64 `json:"gpuRate"`
|
||||
CPURate float64 `json:"cpuRate"`
|
||||
RAMRate float64 `json:"ramRate"`
|
||||
}
|
||||
)
|
||||
|
||||
type QueueAssetsResp {
|
||||
QueueAssets []QueueAsset `json:"queueAsset"`
|
||||
QueueAssets []QueueAsset `json:"queueAsset"`
|
||||
}
|
||||
type QueueAsset {
|
||||
TenantName string `json:"tenantName"` //租户名称
|
||||
ParticipantId int64 `json:"participantId"`
|
||||
AclHosts string `json:"aclHosts"` // 可用节点,多个节点用逗号隔开
|
||||
QueNodes string `json:"queNodes"` //队列节点总数
|
||||
QueMinNodect string `json:"queMinNodect,omitempty"` //队列最小节点数
|
||||
QueMaxNgpus string `json:"queMaxNgpus,omitempty"` //队列最大GPU卡数
|
||||
QueMaxPPN string `json:"queMaxPPN,omitempty"` //使用该队列作业最大CPU核心数
|
||||
QueChargeRate string `json:"queChargeRate,omitempty"` //费率
|
||||
QueMaxNcpus string `json:"queMaxNcpus,omitempty"` //用户最大可用核心数
|
||||
QueMaxNdcus string `json:"queMaxNdcus,omitempty"` //队列总DCU卡数
|
||||
QueueName string `json:"queueName,omitempty"` //队列名称
|
||||
QueMinNcpus string `json:"queMinNcpus,omitempty"` //队列最小CPU核数
|
||||
QueFreeNodes string `json:"queFreeNodes,omitempty"` //队列空闲节点数
|
||||
QueMaxNodect string `json:"queMaxNodect,omitempty"` //队列作业最大节点数
|
||||
QueMaxGpuPN string `json:"queMaxGpuPN,omitempty"` //队列单作业最大GPU卡数
|
||||
QueMaxWalltime string `json:"queMaxWalltime,omitempty"` //队列最大运行时间
|
||||
QueMaxDcuPN string `json:"queMaxDcuPN,omitempty"` //队列单作业最大DCU卡数
|
||||
QueFreeNcpus string `json:"queFreeNcpus"` //队列空闲cpu数
|
||||
QueNcpus string `json:"queNcpus"` //队列cpu数
|
||||
TenantName string `json:"tenantName"` //租户名称
|
||||
ParticipantId int64 `json:"participantId"`
|
||||
AclHosts string `json:"aclHosts"` // 可用节点,多个节点用逗号隔开
|
||||
QueNodes string `json:"queNodes"` //队列节点总数
|
||||
QueMinNodect string `json:"queMinNodect,omitempty"` //队列最小节点数
|
||||
QueMaxNgpus string `json:"queMaxNgpus,omitempty"` //队列最大GPU卡数
|
||||
QueMaxPPN string `json:"queMaxPPN,omitempty"` //使用该队列作业最大CPU核心数
|
||||
QueChargeRate string `json:"queChargeRate,omitempty"` //费率
|
||||
QueMaxNcpus string `json:"queMaxNcpus,omitempty"` //用户最大可用核心数
|
||||
QueMaxNdcus string `json:"queMaxNdcus,omitempty"` //队列总DCU卡数
|
||||
QueueName string `json:"queueName,omitempty"` //队列名称
|
||||
QueMinNcpus string `json:"queMinNcpus,omitempty"` //队列最小CPU核数
|
||||
QueFreeNodes string `json:"queFreeNodes,omitempty"` //队列空闲节点数
|
||||
QueMaxNodect string `json:"queMaxNodect,omitempty"` //队列作业最大节点数
|
||||
QueMaxGpuPN string `json:"queMaxGpuPN,omitempty"` //队列单作业最大GPU卡数
|
||||
QueMaxWalltime string `json:"queMaxWalltime,omitempty"` //队列最大运行时间
|
||||
QueMaxDcuPN string `json:"queMaxDcuPN,omitempty"` //队列单作业最大DCU卡数
|
||||
QueFreeNcpus string `json:"queFreeNcpus"` //队列空闲cpu数
|
||||
QueNcpus string `json:"queNcpus"` //队列cpu数
|
||||
}
|
22
etc/pcm.yaml
22
etc/pcm.yaml
|
@ -18,14 +18,12 @@ Monitoring:
|
|||
|
||||
#rpc
|
||||
THRpcConf:
|
||||
target: nacos://10.206.0.12:8848/pcm.th.rpc?timeout=30s&namespaceid=test&groupname=DEFAULT_GROUP&appName=pcm.core.api
|
||||
# Endpoints:
|
||||
# - 127.0.0.1:8888
|
||||
NonBlock: true
|
||||
|
||||
#rpc
|
||||
ModelArtsRpcConf:
|
||||
# target: nacos://10.206.0.12:8848/pcm.modelarts.rpc?timeout=30s&namespaceid=test&groupname=DEFAULT_GROUP&appName=pcm.core.api
|
||||
Endpoints:
|
||||
- 127.0.0.1:2002
|
||||
NonBlock: true
|
||||
|
@ -33,36 +31,31 @@ ModelArtsRpcConf:
|
|||
|
||||
#rpc
|
||||
ModelArtsImgRpcConf:
|
||||
target: nacos://10.206.0.12:8848/pcm.modelarts.rpc?timeout=30s&namespaceid=test&groupname=DEFAULT_GROUP&appName=pcm.core.api
|
||||
# Endpoints:
|
||||
# - 127.0.0.1:2002
|
||||
NonBlock: true
|
||||
Endpoints:
|
||||
- 127.0.0.1:2007
|
||||
NonBlock: true
|
||||
|
||||
#rpc
|
||||
ACRpcConf:
|
||||
target: nacos://10.206.0.12:8848/pcm.ac.rpc?timeout=30s&namespaceid=test&groupname=DEFAULT_GROUP&appName=pcm.core.api
|
||||
# Endpoints:
|
||||
# - 127.0.0.1:8888
|
||||
Endpoints:
|
||||
- 127.0.0.1:8888
|
||||
NonBlock: true
|
||||
Timeout: 50000
|
||||
|
||||
#rpc
|
||||
CephRpcConf:
|
||||
# target: nacos://10.206.0.12:8848/pcm.ceph.rpc?timeout=30s&namespaceid=test&groupname=DEFAULT_GROUP&appName=pcm.core.api
|
||||
Endpoints:
|
||||
- pcm-participant-ceph-service:2008
|
||||
NonBlock: true
|
||||
Timeout: 50000
|
||||
|
||||
OctopusRpcConf:
|
||||
target: nacos://10.206.0.12:8848/pcm.octopus.rpc?timeout=30s&namespaceid=test&groupname=DEFAULT_GROUP&appName=pcm.core.api
|
||||
# Endpoints:
|
||||
# - 127.0.0.1:8888
|
||||
Endpoints:
|
||||
- 127.0.0.1:8888
|
||||
NonBlock: true
|
||||
Timeout: 20000
|
||||
|
||||
OpenstackRpcConf:
|
||||
# target: nacos://10.206.0.12:8848/pcm.openstack.rpc?timeout=30s&namespaceid=test&groupname=DEFAULT_GROUP&appName=pcm.core.api
|
||||
Endpoints:
|
||||
- 127.0.0.1:2010
|
||||
NonBlock: true
|
||||
|
@ -70,7 +63,6 @@ OpenstackRpcConf:
|
|||
|
||||
# core rpc
|
||||
PcmCoreRpcConf:
|
||||
# target: nacos://10.206.0.12:8848/pcm.core.rpc?timeout=30s&namespaceid=test&groupname=DEFAULT_GROUP&appName=pcm.core.api
|
||||
Endpoints:
|
||||
- pcm-core-rpc:2004
|
||||
NonBlock: true
|
||||
|
|
|
@ -90,6 +90,7 @@ func (l *CommitHpcTaskLogic) CommitHpcTask(req *types.CommitHpcTaskReq) (resp *t
|
|||
StdOutFile: req.StdOutFile,
|
||||
StdErrFile: req.StdErrFile,
|
||||
StdInput: req.StdInput,
|
||||
Partition: req.Partition,
|
||||
DeletedFlag: 0,
|
||||
CreatedBy: 0,
|
||||
CreatedTime: time.Now(),
|
||||
|
|
|
@ -1242,6 +1242,7 @@ type CommitHpcTaskReq struct {
|
|||
StdInput string `json:"stdInput,optional"`
|
||||
Environment map[string]string `json:"environment,optional"`
|
||||
ClusterType string `json:"clusterType,optional"`
|
||||
Partition string `json:"partition"`
|
||||
}
|
||||
|
||||
type CommitHpcTaskResp struct {
|
||||
|
|
|
@ -71,6 +71,7 @@ type (
|
|||
StdOutFile string `db:"std_out_file"` // 工作路径/std.err.%j
|
||||
StdErrFile string `db:"std_err_file"` // 工作路径/std.err.%j
|
||||
StdInput string `db:"std_input"`
|
||||
Partition string `db:"partition"`
|
||||
Environment string `db:"environment"`
|
||||
DeletedFlag int64 `db:"deleted_flag"` // 是否删除(0-否,1-是)
|
||||
CreatedBy int64 `db:"created_by"` // 创建人
|
||||
|
|
Loading…
Reference in New Issue