From a27df05a723e83e615922c3de12009c234451e4f Mon Sep 17 00:00:00 2001 From: tzwang Date: Thu, 26 Oct 2023 11:26:23 +0800 Subject: [PATCH] =?UTF-8?q?=E5=AD=98=E7=AE=97=E8=81=94=E5=8A=A8=E4=BF=AE?= =?UTF-8?q?=E6=94=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Former-commit-id: b5db884b426210a385394d5e4ed10bdbc4484e17 --- api/desc/storelink/pcm-storelink.api | 12 +-- api/internal/storeLink/shuguangai.go | 20 +++- api/internal/storeLink/storeLink.go | 136 ++++++++++++++++++--------- pkg/utils/timeutils/time.go | 8 ++ 4 files changed, 121 insertions(+), 55 deletions(-) diff --git a/api/desc/storelink/pcm-storelink.api b/api/desc/storelink/pcm-storelink.api index e6ecddec..021087b5 100644 --- a/api/desc/storelink/pcm-storelink.api +++ b/api/desc/storelink/pcm-storelink.api @@ -15,7 +15,7 @@ type ( UploadLinkImageResp { Success bool `json:"success"` - Image ImageSl `json:"image"` + Image *ImageSl `json:"image"` ErrorMsg string `json:"errorMsg"` } @@ -31,7 +31,7 @@ type ( GetLinkImageListResp { Success bool `json:"success"` - Images []ImageSl `json:"images"` + Images []*ImageSl `json:"images"` ErrorMsg string `json:"errorMsg"` } @@ -49,7 +49,7 @@ type ( PartId int64 `json:"partId"` ImageId string `json:"imageId"` Cmd string `json:"cmd"` - Params []ParamSl `json:"params"` + Params []*ParamSl `json:"params"` ResourceId string `json:"resourceId"` } @@ -71,7 +71,7 @@ type ( GetLinkTaskResp { Success bool `json:"success"` - Task TaskSl `json:"task"` + Task *TaskSl `json:"task"` ErrorMsg string `json:"errorMsg"` } @@ -99,7 +99,7 @@ type ( GetParticipantsResp { Success bool `json:"success"` - Participants []ParticipantSl `json:"participant"` + Participants []*ParticipantSl `json:"participant"` } GetResourceSpecsReq { @@ -108,7 +108,7 @@ type ( GetResourceSpecsResp { Success bool `json:"success"` - ResourceSpecs []ResourceSpecSl `json:"resourceSpecs"` + ResourceSpecs []*ResourceSpecSl `json:"resourceSpecs"` } ResourceSpecSl { diff --git a/api/internal/storeLink/shuguangai.go b/api/internal/storeLink/shuguangai.go index dc33569d..1c42ba3f 100644 --- a/api/internal/storeLink/shuguangai.go +++ b/api/internal/storeLink/shuguangai.go @@ -11,6 +11,13 @@ type ShuguangAi struct { svcCtx *svc.ServiceContext } +const ( + DCU = "dcu" + PYTORCH = "Pytorch" + TENSORFLOW = "Tensorflow" + Wzhdtest = "wzhdtest" +) + func NewShuguangAi(ctx context.Context, svcCtx *svc.ServiceContext) *ShuguangAi { return &ShuguangAi{ctx: ctx, svcCtx: svcCtx} } @@ -25,7 +32,10 @@ func (s *ShuguangAi) DeleteImage(imageId string) (interface{}, error) { func (s *ShuguangAi) QueryImageList() (interface{}, error) { // shuguangAi获取镜像列表 - req := &hpcAC.GetImageListAiReq{} + req := &hpcAC.GetImageListAiReq{ + AcceleratorType: DCU, + TaskType: PYTORCH, + } resp, err := s.svcCtx.ACRpc.GetImageListAi(s.ctx, req) if err != nil { return nil, err @@ -60,7 +70,9 @@ func (s *ShuguangAi) SubmitTask(imageId string, cmd string, params []string, res func (s *ShuguangAi) QueryTask(taskId string) (interface{}, error) { // shuguangAi获取任务 - req := &hpcAC.GetPytorchTaskReq{} + req := &hpcAC.GetPytorchTaskReq{ + Id: taskId, + } resp, err := s.svcCtx.ACRpc.GetPytorchTask(s.ctx, req) if err != nil { return nil, err @@ -77,7 +89,9 @@ func (s *ShuguangAi) QueryTask(taskId string) (interface{}, error) { func (s *ShuguangAi) DeleteTask(taskId string) (interface{}, error) { // shuguangAi删除任务 - req := &hpcAC.DeleteTaskAiReq{} + req := &hpcAC.DeleteTaskAiReq{ + Ids: taskId, + } resp, err := s.svcCtx.ACRpc.DeleteTaskAi(s.ctx, req) if err != nil { return nil, err diff --git a/api/internal/storeLink/storeLink.go b/api/internal/storeLink/storeLink.go index 8c75728f..6a0e7e9c 100644 --- a/api/internal/storeLink/storeLink.go +++ b/api/internal/storeLink/storeLink.go @@ -6,6 +6,8 @@ import ( "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/svc" "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/types" "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/models" + "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/utils/timeutils" + "gitlink.org.cn/jcce-pcm/pcm-participant-ac/hpcAC" "gitlink.org.cn/jcce-pcm/pcm-participant-modelarts/modelarts" "gitlink.org.cn/jcce-pcm/pcm-participant-octopus/octopus" "gorm.io/gorm" @@ -75,7 +77,6 @@ func ConvertType[T any](in *T) (interface{}, error) { } return resp, nil - case *octopus.GetUserImageListResp: var resp types.GetLinkImageListResp inresp := (interface{})(in).(*octopus.GetUserImageListResp) @@ -94,7 +95,37 @@ func ConvertType[T any](in *T) (interface{}, error) { resp.Images = append(resp.Images, image) } return resp, nil + case *modelarts.ListReposDetailsResp: + var resp types.GetLinkImageListResp + inresp := (interface{})(in).(*modelarts.ListReposDetailsResp) + if inresp.Errors != nil { + resp.Success = false + resp.ErrorMsg = inresp.Errors[0].ErrorMessage + resp.Images = nil + return resp, nil + } + + for _, v := range inresp.Items { + for _, r := range v.Tags { + var image types.ImageSl + image.ImageId = v.Namespace + "/" + v.Name + ":" + r + image.ImageName = v.Name + image.ImageStatus = "succeed" + resp.Images = append(resp.Images, image) + } + } + return resp, nil + case *hpcAC.GetImageListAiResp: + var resp types.GetLinkImageListResp + inresp := (interface{})(in).(*hpcAC.GetImageListAiResp) + + if inresp.Code == "0" { + resp.Success = true + } else { + resp.Success = false + } + return resp, nil case *octopus.DeleteImageResp: var resp types.DeleteLinkImageResp inresp := (interface{})(in).(*octopus.DeleteImageResp) @@ -118,7 +149,29 @@ func ConvertType[T any](in *T) (interface{}, error) { resp.TaskId = inresp.Payload.JobId return resp, nil + case *modelarts.CreateTrainingJobResp: + var resp types.SubmitLinkTaskResp + inresp := (interface{})(in).(*modelarts.CreateTrainingJobResp) + if inresp.ErrorMsg != "" { + resp.ErrorMsg = inresp.ErrorMsg + resp.Success = false + return resp, nil + } + + resp.TaskId = inresp.Metadata.Id + + return resp, nil + case *hpcAC.SubmitTaskAiResp: + var resp types.SubmitLinkTaskResp + inresp := (interface{})(in).(*hpcAC.SubmitTaskAiResp) + + if inresp.Code == "0" { + resp.Success = true + } else { + resp.Success = false + } + return resp, nil case *octopus.GetTrainJobResp: var resp types.GetLinkTaskResp inresp := (interface{})(in).(*octopus.GetTrainJobResp) @@ -134,51 +187,6 @@ func ConvertType[T any](in *T) (interface{}, error) { resp.Task.CompletedAt = inresp.Payload.TrainJob.CompletedAt resp.Task.TaskStatus = inresp.Payload.TrainJob.Status - return resp, nil - - case *octopus.DeleteTrainJobResp: - var resp types.DeleteLinkTaskResp - inresp := (interface{})(in).(*octopus.DeleteTrainJobResp) - resp.Success = inresp.Success - if !resp.Success { - resp.ErrorMsg = inresp.Error.Message - return resp, nil - } - - return resp, nil - case *modelarts.ListReposDetailsResp: - var resp types.GetLinkImageListResp - inresp := (interface{})(in).(*modelarts.ListReposDetailsResp) - - if inresp.Errors != nil { - resp.Success = false - resp.ErrorMsg = inresp.Errors[0].ErrorMessage - resp.Images = nil - return resp, nil - } - - for _, v := range inresp.Items { - for _, r := range v.Tags { - var image types.ImageSl - image.ImageId = v.Namespace + "/" + v.Name + ":" + r - image.ImageName = v.Name - image.ImageStatus = "succeed" - resp.Images = append(resp.Images, image) - } - } - return resp, nil - case *modelarts.CreateTrainingJobResp: - var resp types.SubmitLinkTaskResp - inresp := (interface{})(in).(*modelarts.CreateTrainingJobResp) - - if inresp.ErrorMsg != "" { - resp.ErrorMsg = inresp.ErrorMsg - resp.Success = false - return resp, nil - } - - resp.TaskId = inresp.Metadata.Id - return resp, nil case *modelarts.JobResponse: var resp types.GetLinkTaskResp @@ -195,6 +203,32 @@ func ConvertType[T any](in *T) (interface{}, error) { resp.Task.CompletedAt = int64(inresp.Status.Duration) resp.Task.TaskStatus = inresp.Status.Phase + return resp, nil + case *hpcAC.GetPytorchTaskResp: + var resp types.GetLinkTaskResp + inresp := (interface{})(in).(*hpcAC.GetPytorchTaskResp) + if inresp.Code == "0" { + resp.Success = true + resp.Task.TaskId = inresp.Data.Id + resp.Task.TaskName = inresp.Data.TaskName + resp.Task.TaskStatus = inresp.Data.Status + resp.Task.StartedAt = timeutils.StringToUnixTime(inresp.Data.StartTime) + resp.Task.CompletedAt = timeutils.StringToUnixTime(inresp.Data.EndTime) + } else { + resp.Success = false + resp.ErrorMsg = inresp.Msg + } + + return resp, nil + case *octopus.DeleteTrainJobResp: + var resp types.DeleteLinkTaskResp + inresp := (interface{})(in).(*octopus.DeleteTrainJobResp) + resp.Success = inresp.Success + if !resp.Success { + resp.ErrorMsg = inresp.Error.Message + return resp, nil + } + return resp, nil case *modelarts.DeleteTrainingJobResp: var resp types.DeleteLinkTaskResp @@ -205,6 +239,16 @@ func ConvertType[T any](in *T) (interface{}, error) { return resp, nil } return resp, nil + case *hpcAC.DeleteTaskAiResp: + var resp types.DeleteLinkTaskResp + inresp := (interface{})(in).(*hpcAC.DeleteTaskAiResp) + if inresp.Code == "0" { + resp.Success = true + } else { + resp.Success = false + resp.ErrorMsg = inresp.Msg + } + return resp, nil default: return nil, errors.New("type convert fail") } diff --git a/pkg/utils/timeutils/time.go b/pkg/utils/timeutils/time.go index 09b4d2c3..0cb1de81 100644 --- a/pkg/utils/timeutils/time.go +++ b/pkg/utils/timeutils/time.go @@ -47,3 +47,11 @@ func TimeRemoveZone(tm time.Time) time.Time { } return parse } + +func StringToUnixTime(str string) int64 { + dt, err := time.ParseInLocation("2006-01-02 15:04:05", str, time.Local) + if err != nil { + return 0 + } + return dt.Unix() +}