From 10e231544c549f6ec233e09b1702bd403cd50111 Mon Sep 17 00:00:00 2001 From: tzwang Date: Thu, 7 Dec 2023 18:05:34 +0800 Subject: [PATCH] =?UTF-8?q?=E5=AD=98=E7=AE=97=E8=81=94=E5=8A=A8=E8=B0=83?= =?UTF-8?q?=E6=95=B42?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Former-commit-id: 8bdfaa2320f3234cc5031da346a5b5490d1ce4d6 --- api/internal/storeLink/shuguangHpc.go | 149 +++++++++++++++++++++----- api/internal/storeLink/storeLink.go | 1 + 2 files changed, 126 insertions(+), 24 deletions(-) diff --git a/api/internal/storeLink/shuguangHpc.go b/api/internal/storeLink/shuguangHpc.go index 31d84123..0354b712 100644 --- a/api/internal/storeLink/shuguangHpc.go +++ b/api/internal/storeLink/shuguangHpc.go @@ -6,6 +6,7 @@ import ( "gitlink.org.cn/jcce-pcm/pcm-ac/hpcAC" "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/svc" "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/models" + "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/utils" "strings" ) @@ -16,9 +17,95 @@ type ShuguangHpc struct { } const ( - SHUGUANGHPC_CUSTOM_RESOURCE_ID = "10240 // 10G" + GAP_WALL_TIME_24H = "24:00:00" + TASK_SHUGUANG_PREFIX = "ShuguangHPC" + NEWLINE = "\n" + JOBNAME = "JOBNAME" + GAP_CMD_FILE = "cmd" + GAP_NNODE = "1" // 节点个数 + GAP_NODE_STRING = "" + GAP_APPNAME = "BASE" + GAP_QUEUE = "wzhdtest" + GAP_WORK_DIR = "/work/home/acgnnmfbwo/BASE/JOBNAME" + GAP_STD_OUT_FILE = "/work/home/acgnnmfbwo/BASE/JOBNAME/std.out.%j" + GAP_STD_ERR_FILE = "/work/home/acgnnmfbwo/BASE/JOBNAME/std.err.%j" + StrJobManagerID = 1637920656 + Apptype = "BASIC" + EXPORT = "export" + GAP_NPROC = "1" + GAP_NDCU = "1" + GAP_EXCLUSIVE = "" + GAP_PPN = "" + GAP_NGPU = "" + GAP_MULTI_SUB = "" ) +var RESOURCEMAP = map[string]ResourceSpec{ + "Nd99eGNoBFC2ZTycKDlqD37heWTOmrMS": { + GAP_NNODE: "1", + GAP_NPROC: "2", + GAP_NDCU: "1", + }, + "uAmLkz6jgSZkC6o8JywG7Yo2aiFPPOBO": { + GAP_NNODE: "1", + GAP_NPROC: "4", + GAP_NDCU: "2", + }, + "D71OZQYrRabJc2nfL2GDWOdLEfbiMzYH": { + GAP_NNODE: "1", + GAP_NPROC: "8", + GAP_NDCU: "4", + }, + "sXUMrGmgMDFJaLi6dPiB9LkHjFb3lvL5": { + GAP_NNODE: "1", + GAP_NPROC: "16", + GAP_NDCU: "4", + }, + "ZfCKQKbNbQl9RPwlSyWLah1Gf7Ti7uJA": { + GAP_NNODE: "1", + GAP_NPROC: "32", + GAP_NDCU: "4", + }, + "cfEI4ulTNo2gYUozzdG59URByUjwLl3x": { + GAP_NNODE: "2", + GAP_NPROC: "4", + GAP_NDCU: "2", + }, + "vtbkaks8bErhpLRkUDiPDUHq6ssotFpD": { + GAP_NNODE: "2", + GAP_NPROC: "8", + GAP_NDCU: "4", + }, + "QJXZFJSReVWWQfkvQjGyEq1JpDHN55Oh": { + GAP_NNODE: "2", + GAP_NPROC: "16", + GAP_NDCU: "4", + }, + "79xSdy48yLbVLl9DqEV6tQ2J6jaHe5KO": { + GAP_NNODE: "2", + GAP_NPROC: "32", + GAP_NDCU: "8", + }, +} + +var RESOURCESPECS = map[string]string{ + "Nd99eGNoBFC2ZTycKDlqD37heWTOmrMS": "1*NODE, CPU:2, 1*DCU", + "uAmLkz6jgSZkC6o8JywG7Yo2aiFPPOBO": "1*NODE, CPU:4, 2*DCU", + "D71OZQYrRabJc2nfL2GDWOdLEfbiMzYH": "1*NODE, CPU:8, 4*DCU", + "sXUMrGmgMDFJaLi6dPiB9LkHjFb3lvL5": "1*NODE, CPU:16, 4*DCU", + "ZfCKQKbNbQl9RPwlSyWLah1Gf7Ti7uJA": "1*NODE, CPU:32, 4*DCU", + "cfEI4ulTNo2gYUozzdG59URByUjwLl3x": "2*NODE, CPU:4, 2*DCU", + "vtbkaks8bErhpLRkUDiPDUHq6ssotFpD": "2*NODE, CPU:8, 4*DCU", + "QJXZFJSReVWWQfkvQjGyEq1JpDHN55Oh": "2*NODE, CPU:16, 4*DCU", + "79xSdy48yLbVLl9DqEV6tQ2J6jaHe5KO": "2*NODE, CPU:32, 8*DCU", +} + +type ResourceSpec struct { + GAP_NNODE string + GAP_NPROC string + GAP_NDCU string +} + func NewShuguangHpc(ctx context.Context, svcCtx *svc.ServiceContext, participant *models.StorelinkCenter) *ShuguangHpc { return &ShuguangHpc{ctx: ctx, svcCtx: svcCtx, participant: participant} } @@ -39,7 +126,8 @@ func (s ShuguangHpc) SubmitTask(imageId string, cmd string, envs []string, param // shuguangHpc提交任务 //判断是否resourceId匹配自定义资源Id - if resourceId != SHUGUANGAI_CUSTOM_RESOURCE_ID { + _, isMapContainsKey := RESOURCESPECS[resourceId] + if !isMapContainsKey { return nil, errors.New("shuguangHpc资源Id不存在") } @@ -47,37 +135,43 @@ func (s ShuguangHpc) SubmitTask(imageId string, cmd string, envs []string, param var env string for _, e := range envs { s := strings.Split(e, COMMA) - env += s[0] + "=" + s[1] + SPACE + env += EXPORT + SPACE + s[0] + EQUAL + s[1] + NEWLINE } //请求 + taskName := TASK_SHUGUANG_PREFIX + UNDERSCORE + UNDERSCORE + utils.RandomString(10) + GAP_WORK_DIR := strings.Replace(GAP_WORK_DIR, JOBNAME, taskName, -1) + GAP_STD_OUT_FILE := strings.Replace(GAP_STD_OUT_FILE, JOBNAME, taskName, -1) + GAP_STD_ERR_FILE := strings.Replace(GAP_STD_ERR_FILE, JOBNAME, taskName, -1) req := &hpcAC.SubmitJobReq{ - Apptype: "", - Appname: "", - StrJobManagerID: 0, + Apptype: Apptype, + Appname: GAP_APPNAME, + StrJobManagerID: StrJobManagerID, MapAppJobInfo: &hpcAC.MapAppJobInfo{ - GAP_CMD_FILE: "echo $TESTDIR; echo $TESTENV; sleep 30", - GAP_NNODE: "1", - GAP_NODE_STRING: "", - GAP_SUBMIT_TYPE: "cmd", - GAP_JOB_NAME: "testSlurmjob1", - GAP_WORK_DIR: "/work/home/acgnnmfbwo/BASE/testSlurmjob1", - GAP_QUEUE: "wzhdtest", - GAP_NPROC: "1", - GAP_PPN: "", - GAP_NGPU: "", - GAP_NDCU: "1", - GAP_WALL_TIME: "01:00:00", - GAP_EXCLUSIVE: "", - GAP_APPNAME: "BASE", - GAP_MULTI_SUB: "", - GAP_STD_OUT_FILE: "/work/home/acgnnmfbwo/BASE/testSlurmjob1/std.out.%j", - GAP_STD_ERR_FILE: "/work/home/acgnnmfbwo/BASE/testSlurmjob1/std.err.%j", - GAP_SCHEDULER_OPT_WEB: "export TESTDIR=/bin/emacs\nexport TESTENV=12345", + GAP_CMD_FILE: cmd, + GAP_NNODE: GAP_NNODE, + GAP_NODE_STRING: GAP_NODE_STRING, + GAP_SUBMIT_TYPE: GAP_CMD_FILE, + GAP_JOB_NAME: taskName, + GAP_WORK_DIR: GAP_WORK_DIR, + GAP_QUEUE: GAP_QUEUE, + GAP_NPROC: GAP_NPROC, + GAP_PPN: GAP_PPN, + GAP_NGPU: GAP_NGPU, + GAP_NDCU: GAP_NDCU, + GAP_WALL_TIME: GAP_WALL_TIME_24H, + GAP_EXCLUSIVE: GAP_EXCLUSIVE, + GAP_APPNAME: GAP_APPNAME, + GAP_MULTI_SUB: GAP_MULTI_SUB, + GAP_STD_OUT_FILE: GAP_STD_OUT_FILE, + GAP_STD_ERR_FILE: GAP_STD_ERR_FILE, + GAP_SCHEDULER_OPT_WEB: env, }, } + updateRequestByResouceId(resourceId, req) + resp, err := s.svcCtx.ACRpc.SubmitJob(s.ctx, req) if err != nil { return nil, err @@ -107,3 +201,10 @@ func (s ShuguangHpc) DeleteTask(taskId string) (interface{}, error) { //TODO implement me panic("implement me") } + +func updateRequestByResouceId(resourceId string, req *hpcAC.SubmitJobReq) { + spec := RESOURCEMAP[resourceId] + req.MapAppJobInfo.GAP_NNODE = spec.GAP_NNODE + req.MapAppJobInfo.GAP_NPROC = spec.GAP_NPROC + req.MapAppJobInfo.GAP_NDCU = spec.GAP_NDCU +} diff --git a/api/internal/storeLink/storeLink.go b/api/internal/storeLink/storeLink.go index c405145a..7acc1332 100644 --- a/api/internal/storeLink/storeLink.go +++ b/api/internal/storeLink/storeLink.go @@ -41,6 +41,7 @@ const ( PY_PARAM_PREFIX = "--" SPACE = " " UNDERSCORE = "_" + EQUAL = "=" COMMA = "," TYPE_OCTOPUS = "1" TYPE_MODELARTS = "2"