存算联动修改

Former-commit-id: 0af7b90c9109000228f809ddd63ce133e97bbde0
This commit is contained in:
tzwang 2023-10-31 15:24:05 +08:00
parent 1c900f24ba
commit 07bedcda4d
3 changed files with 17 additions and 10 deletions

View File

@ -5,8 +5,10 @@ import (
"gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/svc" "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/svc"
"gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/models" "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/models"
"gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/utils" "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/utils"
"gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/utils/timeutils"
"gitlink.org.cn/jcce-pcm/pcm-participant-octopus/octopus" "gitlink.org.cn/jcce-pcm/pcm-participant-octopus/octopus"
"strings" "strings"
"time"
) )
type OctopusLink struct { type OctopusLink struct {
@ -20,7 +22,7 @@ type OctopusLink struct {
const ( const (
IMG_NAME_PREFIX = "oct_" IMG_NAME_PREFIX = "oct_"
IMG_VERSION_PREFIX = "version_" IMG_VERSION_PREFIX = "version_"
TASK_NAME_PREFIX = "trainJob_" TASK_NAME_PREFIX = "trainJob"
RESOURCE_POOL = "common-pool" RESOURCE_POOL = "common-pool"
) )
@ -120,11 +122,13 @@ func (o *OctopusLink) SubmitTask(imageId string, cmd string, params []string, re
prms = append(prms, &p) prms = append(prms, &p)
} }
dateStr := timeutils.UnixTimeToString(time.Now().Unix())
req := &octopus.CreateTrainJobReq{ req := &octopus.CreateTrainJobReq{
Platform: o.participant.Name, Platform: o.participant.Name,
Params: &octopus.CreateTrainJobParam{ Params: &octopus.CreateTrainJobParam{
ImageId: imageId, ImageId: imageId,
Name: TASK_NAME_PREFIX + utils.RandomString(7), Name: TASK_NAME_PREFIX + UNDERSCORE + utils.RandomString(7) + UNDERSCORE + dateStr,
ResourcePool: RESOURCE_POOL, ResourcePool: RESOURCE_POOL,
Config: []*octopus.Config{ Config: []*octopus.Config{
{ {

View File

@ -23,8 +23,6 @@ const (
WORKER_NUMBER = 1 WORKER_NUMBER = 1
WORKER_CPU_NUMBER = 5 WORKER_CPU_NUMBER = 5
WORKER_GPU_NUMBER = 1 WORKER_GPU_NUMBER = 1
PY_PARAM_PREFIX = "--"
SPACE = " "
SHUGUANGAI_CUSTOM_RESOURCE_ID = "WodTB2rJ8SobMgQ1nrtR245jxOrsovFi" SHUGUANGAI_CUSTOM_RESOURCE_ID = "WodTB2rJ8SobMgQ1nrtR245jxOrsovFi"
SHUGUANGAI_CUSTOM_RESOURCE_NAME = "1*DCU, CPU:5, 内存:10GB" SHUGUANGAI_CUSTOM_RESOURCE_NAME = "1*DCU, CPU:5, 内存:10GB"
) )
@ -87,7 +85,7 @@ func (s *ShuguangAi) SubmitTask(imageId string, cmd string, params []string, res
req := &hpcAC.SubmitPytorchTaskReq{ req := &hpcAC.SubmitPytorchTaskReq{
Params: &hpcAC.SubmitPytorchTaskParams{ Params: &hpcAC.SubmitPytorchTaskParams{
TaskName: TASK_PYTORCH_PREFIX + "_" + utils.RandomString(7) + dateStr, TaskName: TASK_PYTORCH_PREFIX + UNDERSCORE + utils.RandomString(7) + UNDERSCORE + dateStr,
WorkPath: WorkPath, WorkPath: WorkPath,
IsDistributed: false, IsDistributed: false,
IsHvd: false, IsHvd: false,

View File

@ -25,6 +25,9 @@ type Linkage interface {
} }
const ( const (
PY_PARAM_PREFIX = "--"
SPACE = " "
UNDERSCORE = "_"
COMMA = "," COMMA = ","
TYPE_OCTOPUS = "1" TYPE_OCTOPUS = "1"
TYPE_MODELARTS = "2" TYPE_MODELARTS = "2"
@ -248,11 +251,13 @@ func ConvertType[T any](in *T, participant *models.StorelinkCenter) (interface{}
inresp := (interface{})(in).(*hpcAC.GetPytorchTaskResp) inresp := (interface{})(in).(*hpcAC.GetPytorchTaskResp)
if inresp.Code == "0" { if inresp.Code == "0" {
resp.Success = true resp.Success = true
resp.Task.TaskId = inresp.Data.Id var task types.TaskSl
resp.Task.TaskName = inresp.Data.TaskName task.TaskId = inresp.Data.Id
resp.Task.TaskStatus = inresp.Data.Status task.TaskName = inresp.Data.TaskName
resp.Task.StartedAt = timeutils.StringToUnixTime(inresp.Data.StartTime) task.TaskStatus = inresp.Data.Status
resp.Task.CompletedAt = timeutils.StringToUnixTime(inresp.Data.EndTime) task.StartedAt = timeutils.StringToUnixTime(inresp.Data.StartTime)
task.CompletedAt = timeutils.StringToUnixTime(inresp.Data.EndTime)
resp.Task = &task
} else { } else {
resp.Success = false resp.Success = false
resp.ErrorMsg = inresp.Msg resp.ErrorMsg = inresp.Msg