存算联动修改

Former-commit-id: 0af7b90c9109000228f809ddd63ce133e97bbde0
This commit is contained in:
tzwang 2023-10-31 15:24:05 +08:00
parent 1c900f24ba
commit 07bedcda4d
3 changed files with 17 additions and 10 deletions

View File

@ -5,8 +5,10 @@ import (
"gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/svc"
"gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/models"
"gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/utils"
"gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/utils/timeutils"
"gitlink.org.cn/jcce-pcm/pcm-participant-octopus/octopus"
"strings"
"time"
)
type OctopusLink struct {
@ -20,7 +22,7 @@ type OctopusLink struct {
const (
IMG_NAME_PREFIX = "oct_"
IMG_VERSION_PREFIX = "version_"
TASK_NAME_PREFIX = "trainJob_"
TASK_NAME_PREFIX = "trainJob"
RESOURCE_POOL = "common-pool"
)
@ -120,11 +122,13 @@ func (o *OctopusLink) SubmitTask(imageId string, cmd string, params []string, re
prms = append(prms, &p)
}
dateStr := timeutils.UnixTimeToString(time.Now().Unix())
req := &octopus.CreateTrainJobReq{
Platform: o.participant.Name,
Params: &octopus.CreateTrainJobParam{
ImageId: imageId,
Name: TASK_NAME_PREFIX + utils.RandomString(7),
Name: TASK_NAME_PREFIX + UNDERSCORE + utils.RandomString(7) + UNDERSCORE + dateStr,
ResourcePool: RESOURCE_POOL,
Config: []*octopus.Config{
{

View File

@ -23,8 +23,6 @@ const (
WORKER_NUMBER = 1
WORKER_CPU_NUMBER = 5
WORKER_GPU_NUMBER = 1
PY_PARAM_PREFIX = "--"
SPACE = " "
SHUGUANGAI_CUSTOM_RESOURCE_ID = "WodTB2rJ8SobMgQ1nrtR245jxOrsovFi"
SHUGUANGAI_CUSTOM_RESOURCE_NAME = "1*DCU, CPU:5, 内存:10GB"
)
@ -87,7 +85,7 @@ func (s *ShuguangAi) SubmitTask(imageId string, cmd string, params []string, res
req := &hpcAC.SubmitPytorchTaskReq{
Params: &hpcAC.SubmitPytorchTaskParams{
TaskName: TASK_PYTORCH_PREFIX + "_" + utils.RandomString(7) + dateStr,
TaskName: TASK_PYTORCH_PREFIX + UNDERSCORE + utils.RandomString(7) + UNDERSCORE + dateStr,
WorkPath: WorkPath,
IsDistributed: false,
IsHvd: false,

View File

@ -25,6 +25,9 @@ type Linkage interface {
}
const (
PY_PARAM_PREFIX = "--"
SPACE = " "
UNDERSCORE = "_"
COMMA = ","
TYPE_OCTOPUS = "1"
TYPE_MODELARTS = "2"
@ -248,11 +251,13 @@ func ConvertType[T any](in *T, participant *models.StorelinkCenter) (interface{}
inresp := (interface{})(in).(*hpcAC.GetPytorchTaskResp)
if inresp.Code == "0" {
resp.Success = true
resp.Task.TaskId = inresp.Data.Id
resp.Task.TaskName = inresp.Data.TaskName
resp.Task.TaskStatus = inresp.Data.Status
resp.Task.StartedAt = timeutils.StringToUnixTime(inresp.Data.StartTime)
resp.Task.CompletedAt = timeutils.StringToUnixTime(inresp.Data.EndTime)
var task types.TaskSl
task.TaskId = inresp.Data.Id
task.TaskName = inresp.Data.TaskName
task.TaskStatus = inresp.Data.Status
task.StartedAt = timeutils.StringToUnixTime(inresp.Data.StartTime)
task.CompletedAt = timeutils.StringToUnixTime(inresp.Data.EndTime)
resp.Task = &task
} else {
resp.Success = false
resp.ErrorMsg = inresp.Msg