存算联动修改

Former-commit-id: b5db884b426210a385394d5e4ed10bdbc4484e17
This commit is contained in:
tzwang 2023-10-26 11:26:23 +08:00
parent f32a50d4b5
commit a27df05a72
4 changed files with 121 additions and 55 deletions

View File

@ -15,7 +15,7 @@ type (
UploadLinkImageResp {
Success bool `json:"success"`
Image ImageSl `json:"image"`
Image *ImageSl `json:"image"`
ErrorMsg string `json:"errorMsg"`
}
@ -31,7 +31,7 @@ type (
GetLinkImageListResp {
Success bool `json:"success"`
Images []ImageSl `json:"images"`
Images []*ImageSl `json:"images"`
ErrorMsg string `json:"errorMsg"`
}
@ -49,7 +49,7 @@ type (
PartId int64 `json:"partId"`
ImageId string `json:"imageId"`
Cmd string `json:"cmd"`
Params []ParamSl `json:"params"`
Params []*ParamSl `json:"params"`
ResourceId string `json:"resourceId"`
}
@ -71,7 +71,7 @@ type (
GetLinkTaskResp {
Success bool `json:"success"`
Task TaskSl `json:"task"`
Task *TaskSl `json:"task"`
ErrorMsg string `json:"errorMsg"`
}
@ -99,7 +99,7 @@ type (
GetParticipantsResp {
Success bool `json:"success"`
Participants []ParticipantSl `json:"participant"`
Participants []*ParticipantSl `json:"participant"`
}
GetResourceSpecsReq {
@ -108,7 +108,7 @@ type (
GetResourceSpecsResp {
Success bool `json:"success"`
ResourceSpecs []ResourceSpecSl `json:"resourceSpecs"`
ResourceSpecs []*ResourceSpecSl `json:"resourceSpecs"`
}
ResourceSpecSl {

View File

@ -11,6 +11,13 @@ type ShuguangAi struct {
svcCtx *svc.ServiceContext
}
const (
DCU = "dcu"
PYTORCH = "Pytorch"
TENSORFLOW = "Tensorflow"
Wzhdtest = "wzhdtest"
)
func NewShuguangAi(ctx context.Context, svcCtx *svc.ServiceContext) *ShuguangAi {
return &ShuguangAi{ctx: ctx, svcCtx: svcCtx}
}
@ -25,7 +32,10 @@ func (s *ShuguangAi) DeleteImage(imageId string) (interface{}, error) {
func (s *ShuguangAi) QueryImageList() (interface{}, error) {
// shuguangAi获取镜像列表
req := &hpcAC.GetImageListAiReq{}
req := &hpcAC.GetImageListAiReq{
AcceleratorType: DCU,
TaskType: PYTORCH,
}
resp, err := s.svcCtx.ACRpc.GetImageListAi(s.ctx, req)
if err != nil {
return nil, err
@ -60,7 +70,9 @@ func (s *ShuguangAi) SubmitTask(imageId string, cmd string, params []string, res
func (s *ShuguangAi) QueryTask(taskId string) (interface{}, error) {
// shuguangAi获取任务
req := &hpcAC.GetPytorchTaskReq{}
req := &hpcAC.GetPytorchTaskReq{
Id: taskId,
}
resp, err := s.svcCtx.ACRpc.GetPytorchTask(s.ctx, req)
if err != nil {
return nil, err
@ -77,7 +89,9 @@ func (s *ShuguangAi) QueryTask(taskId string) (interface{}, error) {
func (s *ShuguangAi) DeleteTask(taskId string) (interface{}, error) {
// shuguangAi删除任务
req := &hpcAC.DeleteTaskAiReq{}
req := &hpcAC.DeleteTaskAiReq{
Ids: taskId,
}
resp, err := s.svcCtx.ACRpc.DeleteTaskAi(s.ctx, req)
if err != nil {
return nil, err

View File

@ -6,6 +6,8 @@ import (
"gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/svc"
"gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/types"
"gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/models"
"gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/utils/timeutils"
"gitlink.org.cn/jcce-pcm/pcm-participant-ac/hpcAC"
"gitlink.org.cn/jcce-pcm/pcm-participant-modelarts/modelarts"
"gitlink.org.cn/jcce-pcm/pcm-participant-octopus/octopus"
"gorm.io/gorm"
@ -75,7 +77,6 @@ func ConvertType[T any](in *T) (interface{}, error) {
}
return resp, nil
case *octopus.GetUserImageListResp:
var resp types.GetLinkImageListResp
inresp := (interface{})(in).(*octopus.GetUserImageListResp)
@ -94,7 +95,37 @@ func ConvertType[T any](in *T) (interface{}, error) {
resp.Images = append(resp.Images, image)
}
return resp, nil
case *modelarts.ListReposDetailsResp:
var resp types.GetLinkImageListResp
inresp := (interface{})(in).(*modelarts.ListReposDetailsResp)
if inresp.Errors != nil {
resp.Success = false
resp.ErrorMsg = inresp.Errors[0].ErrorMessage
resp.Images = nil
return resp, nil
}
for _, v := range inresp.Items {
for _, r := range v.Tags {
var image types.ImageSl
image.ImageId = v.Namespace + "/" + v.Name + ":" + r
image.ImageName = v.Name
image.ImageStatus = "succeed"
resp.Images = append(resp.Images, image)
}
}
return resp, nil
case *hpcAC.GetImageListAiResp:
var resp types.GetLinkImageListResp
inresp := (interface{})(in).(*hpcAC.GetImageListAiResp)
if inresp.Code == "0" {
resp.Success = true
} else {
resp.Success = false
}
return resp, nil
case *octopus.DeleteImageResp:
var resp types.DeleteLinkImageResp
inresp := (interface{})(in).(*octopus.DeleteImageResp)
@ -118,7 +149,29 @@ func ConvertType[T any](in *T) (interface{}, error) {
resp.TaskId = inresp.Payload.JobId
return resp, nil
case *modelarts.CreateTrainingJobResp:
var resp types.SubmitLinkTaskResp
inresp := (interface{})(in).(*modelarts.CreateTrainingJobResp)
if inresp.ErrorMsg != "" {
resp.ErrorMsg = inresp.ErrorMsg
resp.Success = false
return resp, nil
}
resp.TaskId = inresp.Metadata.Id
return resp, nil
case *hpcAC.SubmitTaskAiResp:
var resp types.SubmitLinkTaskResp
inresp := (interface{})(in).(*hpcAC.SubmitTaskAiResp)
if inresp.Code == "0" {
resp.Success = true
} else {
resp.Success = false
}
return resp, nil
case *octopus.GetTrainJobResp:
var resp types.GetLinkTaskResp
inresp := (interface{})(in).(*octopus.GetTrainJobResp)
@ -134,51 +187,6 @@ func ConvertType[T any](in *T) (interface{}, error) {
resp.Task.CompletedAt = inresp.Payload.TrainJob.CompletedAt
resp.Task.TaskStatus = inresp.Payload.TrainJob.Status
return resp, nil
case *octopus.DeleteTrainJobResp:
var resp types.DeleteLinkTaskResp
inresp := (interface{})(in).(*octopus.DeleteTrainJobResp)
resp.Success = inresp.Success
if !resp.Success {
resp.ErrorMsg = inresp.Error.Message
return resp, nil
}
return resp, nil
case *modelarts.ListReposDetailsResp:
var resp types.GetLinkImageListResp
inresp := (interface{})(in).(*modelarts.ListReposDetailsResp)
if inresp.Errors != nil {
resp.Success = false
resp.ErrorMsg = inresp.Errors[0].ErrorMessage
resp.Images = nil
return resp, nil
}
for _, v := range inresp.Items {
for _, r := range v.Tags {
var image types.ImageSl
image.ImageId = v.Namespace + "/" + v.Name + ":" + r
image.ImageName = v.Name
image.ImageStatus = "succeed"
resp.Images = append(resp.Images, image)
}
}
return resp, nil
case *modelarts.CreateTrainingJobResp:
var resp types.SubmitLinkTaskResp
inresp := (interface{})(in).(*modelarts.CreateTrainingJobResp)
if inresp.ErrorMsg != "" {
resp.ErrorMsg = inresp.ErrorMsg
resp.Success = false
return resp, nil
}
resp.TaskId = inresp.Metadata.Id
return resp, nil
case *modelarts.JobResponse:
var resp types.GetLinkTaskResp
@ -195,6 +203,32 @@ func ConvertType[T any](in *T) (interface{}, error) {
resp.Task.CompletedAt = int64(inresp.Status.Duration)
resp.Task.TaskStatus = inresp.Status.Phase
return resp, nil
case *hpcAC.GetPytorchTaskResp:
var resp types.GetLinkTaskResp
inresp := (interface{})(in).(*hpcAC.GetPytorchTaskResp)
if inresp.Code == "0" {
resp.Success = true
resp.Task.TaskId = inresp.Data.Id
resp.Task.TaskName = inresp.Data.TaskName
resp.Task.TaskStatus = inresp.Data.Status
resp.Task.StartedAt = timeutils.StringToUnixTime(inresp.Data.StartTime)
resp.Task.CompletedAt = timeutils.StringToUnixTime(inresp.Data.EndTime)
} else {
resp.Success = false
resp.ErrorMsg = inresp.Msg
}
return resp, nil
case *octopus.DeleteTrainJobResp:
var resp types.DeleteLinkTaskResp
inresp := (interface{})(in).(*octopus.DeleteTrainJobResp)
resp.Success = inresp.Success
if !resp.Success {
resp.ErrorMsg = inresp.Error.Message
return resp, nil
}
return resp, nil
case *modelarts.DeleteTrainingJobResp:
var resp types.DeleteLinkTaskResp
@ -205,6 +239,16 @@ func ConvertType[T any](in *T) (interface{}, error) {
return resp, nil
}
return resp, nil
case *hpcAC.DeleteTaskAiResp:
var resp types.DeleteLinkTaskResp
inresp := (interface{})(in).(*hpcAC.DeleteTaskAiResp)
if inresp.Code == "0" {
resp.Success = true
} else {
resp.Success = false
resp.ErrorMsg = inresp.Msg
}
return resp, nil
default:
return nil, errors.New("type convert fail")
}

View File

@ -47,3 +47,11 @@ func TimeRemoveZone(tm time.Time) time.Time {
}
return parse
}
func StringToUnixTime(str string) int64 {
dt, err := time.ParseInLocation("2006-01-02 15:04:05", str, time.Local)
if err != nil {
return 0
}
return dt.Unix()
}