存算联动调整2
Former-commit-id: 8bdfaa2320f3234cc5031da346a5b5490d1ce4d6
This commit is contained in:
parent
3915d1953a
commit
10e231544c
|
@ -6,6 +6,7 @@ import (
|
||||||
"gitlink.org.cn/jcce-pcm/pcm-ac/hpcAC"
|
"gitlink.org.cn/jcce-pcm/pcm-ac/hpcAC"
|
||||||
"gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/svc"
|
"gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/svc"
|
||||||
"gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/models"
|
"gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/models"
|
||||||
|
"gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/utils"
|
||||||
"strings"
|
"strings"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -16,9 +17,95 @@ type ShuguangHpc struct {
|
||||||
}
|
}
|
||||||
|
|
||||||
const (
|
const (
|
||||||
SHUGUANGHPC_CUSTOM_RESOURCE_ID = "10240 // 10G"
|
GAP_WALL_TIME_24H = "24:00:00"
|
||||||
|
TASK_SHUGUANG_PREFIX = "ShuguangHPC"
|
||||||
|
NEWLINE = "\n"
|
||||||
|
JOBNAME = "JOBNAME"
|
||||||
|
GAP_CMD_FILE = "cmd"
|
||||||
|
GAP_NNODE = "1" // 节点个数
|
||||||
|
GAP_NODE_STRING = ""
|
||||||
|
GAP_APPNAME = "BASE"
|
||||||
|
GAP_QUEUE = "wzhdtest"
|
||||||
|
GAP_WORK_DIR = "/work/home/acgnnmfbwo/BASE/JOBNAME"
|
||||||
|
GAP_STD_OUT_FILE = "/work/home/acgnnmfbwo/BASE/JOBNAME/std.out.%j"
|
||||||
|
GAP_STD_ERR_FILE = "/work/home/acgnnmfbwo/BASE/JOBNAME/std.err.%j"
|
||||||
|
StrJobManagerID = 1637920656
|
||||||
|
Apptype = "BASIC"
|
||||||
|
EXPORT = "export"
|
||||||
|
GAP_NPROC = "1"
|
||||||
|
GAP_NDCU = "1"
|
||||||
|
GAP_EXCLUSIVE = ""
|
||||||
|
GAP_PPN = ""
|
||||||
|
GAP_NGPU = ""
|
||||||
|
GAP_MULTI_SUB = ""
|
||||||
)
|
)
|
||||||
|
|
||||||
|
var RESOURCEMAP = map[string]ResourceSpec{
|
||||||
|
"Nd99eGNoBFC2ZTycKDlqD37heWTOmrMS": {
|
||||||
|
GAP_NNODE: "1",
|
||||||
|
GAP_NPROC: "2",
|
||||||
|
GAP_NDCU: "1",
|
||||||
|
},
|
||||||
|
"uAmLkz6jgSZkC6o8JywG7Yo2aiFPPOBO": {
|
||||||
|
GAP_NNODE: "1",
|
||||||
|
GAP_NPROC: "4",
|
||||||
|
GAP_NDCU: "2",
|
||||||
|
},
|
||||||
|
"D71OZQYrRabJc2nfL2GDWOdLEfbiMzYH": {
|
||||||
|
GAP_NNODE: "1",
|
||||||
|
GAP_NPROC: "8",
|
||||||
|
GAP_NDCU: "4",
|
||||||
|
},
|
||||||
|
"sXUMrGmgMDFJaLi6dPiB9LkHjFb3lvL5": {
|
||||||
|
GAP_NNODE: "1",
|
||||||
|
GAP_NPROC: "16",
|
||||||
|
GAP_NDCU: "4",
|
||||||
|
},
|
||||||
|
"ZfCKQKbNbQl9RPwlSyWLah1Gf7Ti7uJA": {
|
||||||
|
GAP_NNODE: "1",
|
||||||
|
GAP_NPROC: "32",
|
||||||
|
GAP_NDCU: "4",
|
||||||
|
},
|
||||||
|
"cfEI4ulTNo2gYUozzdG59URByUjwLl3x": {
|
||||||
|
GAP_NNODE: "2",
|
||||||
|
GAP_NPROC: "4",
|
||||||
|
GAP_NDCU: "2",
|
||||||
|
},
|
||||||
|
"vtbkaks8bErhpLRkUDiPDUHq6ssotFpD": {
|
||||||
|
GAP_NNODE: "2",
|
||||||
|
GAP_NPROC: "8",
|
||||||
|
GAP_NDCU: "4",
|
||||||
|
},
|
||||||
|
"QJXZFJSReVWWQfkvQjGyEq1JpDHN55Oh": {
|
||||||
|
GAP_NNODE: "2",
|
||||||
|
GAP_NPROC: "16",
|
||||||
|
GAP_NDCU: "4",
|
||||||
|
},
|
||||||
|
"79xSdy48yLbVLl9DqEV6tQ2J6jaHe5KO": {
|
||||||
|
GAP_NNODE: "2",
|
||||||
|
GAP_NPROC: "32",
|
||||||
|
GAP_NDCU: "8",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
var RESOURCESPECS = map[string]string{
|
||||||
|
"Nd99eGNoBFC2ZTycKDlqD37heWTOmrMS": "1*NODE, CPU:2, 1*DCU",
|
||||||
|
"uAmLkz6jgSZkC6o8JywG7Yo2aiFPPOBO": "1*NODE, CPU:4, 2*DCU",
|
||||||
|
"D71OZQYrRabJc2nfL2GDWOdLEfbiMzYH": "1*NODE, CPU:8, 4*DCU",
|
||||||
|
"sXUMrGmgMDFJaLi6dPiB9LkHjFb3lvL5": "1*NODE, CPU:16, 4*DCU",
|
||||||
|
"ZfCKQKbNbQl9RPwlSyWLah1Gf7Ti7uJA": "1*NODE, CPU:32, 4*DCU",
|
||||||
|
"cfEI4ulTNo2gYUozzdG59URByUjwLl3x": "2*NODE, CPU:4, 2*DCU",
|
||||||
|
"vtbkaks8bErhpLRkUDiPDUHq6ssotFpD": "2*NODE, CPU:8, 4*DCU",
|
||||||
|
"QJXZFJSReVWWQfkvQjGyEq1JpDHN55Oh": "2*NODE, CPU:16, 4*DCU",
|
||||||
|
"79xSdy48yLbVLl9DqEV6tQ2J6jaHe5KO": "2*NODE, CPU:32, 8*DCU",
|
||||||
|
}
|
||||||
|
|
||||||
|
type ResourceSpec struct {
|
||||||
|
GAP_NNODE string
|
||||||
|
GAP_NPROC string
|
||||||
|
GAP_NDCU string
|
||||||
|
}
|
||||||
|
|
||||||
func NewShuguangHpc(ctx context.Context, svcCtx *svc.ServiceContext, participant *models.StorelinkCenter) *ShuguangHpc {
|
func NewShuguangHpc(ctx context.Context, svcCtx *svc.ServiceContext, participant *models.StorelinkCenter) *ShuguangHpc {
|
||||||
return &ShuguangHpc{ctx: ctx, svcCtx: svcCtx, participant: participant}
|
return &ShuguangHpc{ctx: ctx, svcCtx: svcCtx, participant: participant}
|
||||||
}
|
}
|
||||||
|
@ -39,7 +126,8 @@ func (s ShuguangHpc) SubmitTask(imageId string, cmd string, envs []string, param
|
||||||
// shuguangHpc提交任务
|
// shuguangHpc提交任务
|
||||||
|
|
||||||
//判断是否resourceId匹配自定义资源Id
|
//判断是否resourceId匹配自定义资源Id
|
||||||
if resourceId != SHUGUANGAI_CUSTOM_RESOURCE_ID {
|
_, isMapContainsKey := RESOURCESPECS[resourceId]
|
||||||
|
if !isMapContainsKey {
|
||||||
return nil, errors.New("shuguangHpc资源Id不存在")
|
return nil, errors.New("shuguangHpc资源Id不存在")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -47,37 +135,43 @@ func (s ShuguangHpc) SubmitTask(imageId string, cmd string, envs []string, param
|
||||||
var env string
|
var env string
|
||||||
for _, e := range envs {
|
for _, e := range envs {
|
||||||
s := strings.Split(e, COMMA)
|
s := strings.Split(e, COMMA)
|
||||||
env += s[0] + "=" + s[1] + SPACE
|
env += EXPORT + SPACE + s[0] + EQUAL + s[1] + NEWLINE
|
||||||
}
|
}
|
||||||
|
|
||||||
//请求
|
//请求
|
||||||
|
taskName := TASK_SHUGUANG_PREFIX + UNDERSCORE + UNDERSCORE + utils.RandomString(10)
|
||||||
|
GAP_WORK_DIR := strings.Replace(GAP_WORK_DIR, JOBNAME, taskName, -1)
|
||||||
|
GAP_STD_OUT_FILE := strings.Replace(GAP_STD_OUT_FILE, JOBNAME, taskName, -1)
|
||||||
|
GAP_STD_ERR_FILE := strings.Replace(GAP_STD_ERR_FILE, JOBNAME, taskName, -1)
|
||||||
|
|
||||||
req := &hpcAC.SubmitJobReq{
|
req := &hpcAC.SubmitJobReq{
|
||||||
Apptype: "",
|
Apptype: Apptype,
|
||||||
Appname: "",
|
Appname: GAP_APPNAME,
|
||||||
StrJobManagerID: 0,
|
StrJobManagerID: StrJobManagerID,
|
||||||
MapAppJobInfo: &hpcAC.MapAppJobInfo{
|
MapAppJobInfo: &hpcAC.MapAppJobInfo{
|
||||||
GAP_CMD_FILE: "echo $TESTDIR; echo $TESTENV; sleep 30",
|
GAP_CMD_FILE: cmd,
|
||||||
GAP_NNODE: "1",
|
GAP_NNODE: GAP_NNODE,
|
||||||
GAP_NODE_STRING: "",
|
GAP_NODE_STRING: GAP_NODE_STRING,
|
||||||
GAP_SUBMIT_TYPE: "cmd",
|
GAP_SUBMIT_TYPE: GAP_CMD_FILE,
|
||||||
GAP_JOB_NAME: "testSlurmjob1",
|
GAP_JOB_NAME: taskName,
|
||||||
GAP_WORK_DIR: "/work/home/acgnnmfbwo/BASE/testSlurmjob1",
|
GAP_WORK_DIR: GAP_WORK_DIR,
|
||||||
GAP_QUEUE: "wzhdtest",
|
GAP_QUEUE: GAP_QUEUE,
|
||||||
GAP_NPROC: "1",
|
GAP_NPROC: GAP_NPROC,
|
||||||
GAP_PPN: "",
|
GAP_PPN: GAP_PPN,
|
||||||
GAP_NGPU: "",
|
GAP_NGPU: GAP_NGPU,
|
||||||
GAP_NDCU: "1",
|
GAP_NDCU: GAP_NDCU,
|
||||||
GAP_WALL_TIME: "01:00:00",
|
GAP_WALL_TIME: GAP_WALL_TIME_24H,
|
||||||
GAP_EXCLUSIVE: "",
|
GAP_EXCLUSIVE: GAP_EXCLUSIVE,
|
||||||
GAP_APPNAME: "BASE",
|
GAP_APPNAME: GAP_APPNAME,
|
||||||
GAP_MULTI_SUB: "",
|
GAP_MULTI_SUB: GAP_MULTI_SUB,
|
||||||
GAP_STD_OUT_FILE: "/work/home/acgnnmfbwo/BASE/testSlurmjob1/std.out.%j",
|
GAP_STD_OUT_FILE: GAP_STD_OUT_FILE,
|
||||||
GAP_STD_ERR_FILE: "/work/home/acgnnmfbwo/BASE/testSlurmjob1/std.err.%j",
|
GAP_STD_ERR_FILE: GAP_STD_ERR_FILE,
|
||||||
GAP_SCHEDULER_OPT_WEB: "export TESTDIR=/bin/emacs\nexport TESTENV=12345",
|
GAP_SCHEDULER_OPT_WEB: env,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
updateRequestByResouceId(resourceId, req)
|
||||||
|
|
||||||
resp, err := s.svcCtx.ACRpc.SubmitJob(s.ctx, req)
|
resp, err := s.svcCtx.ACRpc.SubmitJob(s.ctx, req)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
|
@ -107,3 +201,10 @@ func (s ShuguangHpc) DeleteTask(taskId string) (interface{}, error) {
|
||||||
//TODO implement me
|
//TODO implement me
|
||||||
panic("implement me")
|
panic("implement me")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func updateRequestByResouceId(resourceId string, req *hpcAC.SubmitJobReq) {
|
||||||
|
spec := RESOURCEMAP[resourceId]
|
||||||
|
req.MapAppJobInfo.GAP_NNODE = spec.GAP_NNODE
|
||||||
|
req.MapAppJobInfo.GAP_NPROC = spec.GAP_NPROC
|
||||||
|
req.MapAppJobInfo.GAP_NDCU = spec.GAP_NDCU
|
||||||
|
}
|
||||||
|
|
|
@ -41,6 +41,7 @@ const (
|
||||||
PY_PARAM_PREFIX = "--"
|
PY_PARAM_PREFIX = "--"
|
||||||
SPACE = " "
|
SPACE = " "
|
||||||
UNDERSCORE = "_"
|
UNDERSCORE = "_"
|
||||||
|
EQUAL = "="
|
||||||
COMMA = ","
|
COMMA = ","
|
||||||
TYPE_OCTOPUS = "1"
|
TYPE_OCTOPUS = "1"
|
||||||
TYPE_MODELARTS = "2"
|
TYPE_MODELARTS = "2"
|
||||||
|
|
Loading…
Reference in New Issue