From 61f7363302dce44e9fc100e08199cb35f36cba07 Mon Sep 17 00:00:00 2001 From: jagger Date: Mon, 20 May 2024 19:58:33 +0800 Subject: [PATCH] fix bug Signed-off-by: jagger Former-commit-id: adc07bd843d2938c09d0932a8fe9cf1031a30005 --- .../scheduler/schedulers/aiScheduler.go | 12 ++++ api/internal/scheduler/service/aiService.go | 2 +- api/internal/storeLink/modelarts.go | 61 ++++++++++++++++--- api/internal/storeLink/storeLink.go | 2 +- 4 files changed, 68 insertions(+), 9 deletions(-) diff --git a/api/internal/scheduler/schedulers/aiScheduler.go b/api/internal/scheduler/schedulers/aiScheduler.go index f3bd56e1..c033f9c9 100644 --- a/api/internal/scheduler/schedulers/aiScheduler.go +++ b/api/internal/scheduler/schedulers/aiScheduler.go @@ -31,6 +31,7 @@ import ( "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/constants" "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/models" "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/utils" + "gitlink.org.cn/JointCloud/pcm-modelarts/client/modelartsservice" "gitlink.org.cn/JointCloud/pcm-octopus/octopus" "sync" ) @@ -340,6 +341,17 @@ func convertType(in interface{}) (*AiResult, error) { result.Msg = resp.Error.Message } + return &result, nil + case *modelartsservice.CreateTrainingJobResp: + resp := (in).(*modelartsservice.CreateTrainingJobResp) + + if resp.ErrorMsg != "" { + result.Msg = resp.ErrorMsg + } else { + + result.JobId = resp.Metadata.Id + } + return &result, nil default: return nil, errors.New("ai task response failed") diff --git a/api/internal/scheduler/service/aiService.go b/api/internal/scheduler/service/aiService.go index 45b6da6d..0567e4a3 100644 --- a/api/internal/scheduler/service/aiService.go +++ b/api/internal/scheduler/service/aiService.go @@ -64,7 +64,7 @@ func InitAiClusterMap(conf *config.Config, clusters []types.ClusterInfo) (map[st id, _ := strconv.ParseInt(c.Id, 10, 64) modelArtsRpc := modelartsservice.NewModelArtsService(zrpc.MustNewClient(conf.ModelArtsRpcConf)) modelArtsImgRpc := imagesservice.NewImagesService(zrpc.MustNewClient(conf.ModelArtsImgRpcConf)) - modelarts := storeLink.NewModelArtsLink(modelArtsRpc, modelArtsImgRpc, c.Nickname, id) + modelarts := storeLink.NewModelArtsLink(modelArtsRpc, modelArtsImgRpc, c.Name, id, c.Nickname) collectorMap[c.Id] = modelarts executorMap[c.Id] = modelarts case SHUGUANGAI: diff --git a/api/internal/storeLink/modelarts.go b/api/internal/storeLink/modelarts.go index ee8df706..c91df822 100644 --- a/api/internal/storeLink/modelarts.go +++ b/api/internal/storeLink/modelarts.go @@ -27,6 +27,10 @@ import ( "strings" ) +const ( + Ascend = "Ascend" +) + type ModelArtsLink struct { modelArtsRpc modelartsservice.ModelArtsService modelArtsImgRpc imagesservice.ImagesService @@ -36,8 +40,8 @@ type ModelArtsLink struct { pageSize int32 } -func NewModelArtsLink(modelArtsRpc modelartsservice.ModelArtsService, modelArtsImgRpc imagesservice.ImagesService, name string, id int64) *ModelArtsLink { - return &ModelArtsLink{modelArtsRpc: modelArtsRpc, modelArtsImgRpc: modelArtsImgRpc, platform: name, participantId: id, pageIndex: 1, pageSize: 100} +func NewModelArtsLink(modelArtsRpc modelartsservice.ModelArtsService, modelArtsImgRpc imagesservice.ImagesService, name string, id int64, nickname string) *ModelArtsLink { + return &ModelArtsLink{modelArtsRpc: modelArtsRpc, modelArtsImgRpc: modelArtsImgRpc, platform: nickname, participantId: id, pageIndex: 0, pageSize: 50} } func (m *ModelArtsLink) UploadImage(ctx context.Context, path string) (interface{}, error) { @@ -87,6 +91,7 @@ func (m *ModelArtsLink) SubmitTask(ctx context.Context, imageId string, cmd stri WorkspaceId: "0", }, Algorithm: &modelarts.Algorithms{ + Id: algorithmId, Engine: &modelarts.EngineCreateTraining{ ImageUrl: imageId, }, @@ -184,7 +189,9 @@ func (m *ModelArtsLink) GetAlgorithms(ctx context.Context) ([]*collector.Algorit } func (m *ModelArtsLink) GetComputeCards(ctx context.Context) ([]string, error) { - return nil, nil + var cards []string + cards = append(cards, Ascend) + return cards, nil } func (m *ModelArtsLink) GetUserBalance(ctx context.Context) (float64, error) { @@ -224,6 +231,10 @@ func (m *ModelArtsLink) GenerateSubmitParams(ctx context.Context, option *option if err != nil { return err } + err = m.generateAlgorithmId(ctx, option) + if err != nil { + return err + } err = m.generateImageId(option) if err != nil { return err @@ -244,10 +255,7 @@ func (m *ModelArtsLink) GenerateSubmitParams(ctx context.Context, option *option } func (m *ModelArtsLink) generateResourceId(ctx context.Context, option *option.AiOption) error { - _, err := m.QuerySpecs(ctx) - if err != nil { - return err - } + option.ResourceId = "modelarts.kat1.xlarge" return nil } @@ -270,3 +278,42 @@ func (m *ModelArtsLink) generateParams(option *option.AiOption) error { return nil } + +func (m *ModelArtsLink) generateAlgorithmId(ctx context.Context, option *option.AiOption) error { + req := &modelarts.ListAlgorithmsReq{ + Platform: m.platform, + Offset: m.pageIndex, + Limit: m.pageSize, + } + resp, err := m.modelArtsRpc.ListAlgorithms(ctx, req) + if err != nil { + return err + } + if resp.ErrorMsg != "" { + return errors.New("failed to get algorithmId") + } + + for _, algorithm := range resp.Items { + engVersion := algorithm.JobConfig.Engine.EngineVersion + if strings.Contains(engVersion, option.TaskType) { + ns := strings.Split(algorithm.Metadata.Name, DASH) + if ns[0] != option.TaskType { + continue + } + if ns[1] != option.DatasetsName { + continue + } + if ns[2] != option.AlgorithmName { + continue + } + option.AlgorithmId = algorithm.Metadata.Id + return nil + } + } + + if option.AlgorithmId == "" { + return errors.New("Algorithm does not exist") + } + + return errors.New("failed to get AlgorithmId") +} diff --git a/api/internal/storeLink/storeLink.go b/api/internal/storeLink/storeLink.go index e3e86a46..24958177 100644 --- a/api/internal/storeLink/storeLink.go +++ b/api/internal/storeLink/storeLink.go @@ -99,7 +99,7 @@ func NewStoreLink(octopusRpc octopusclient.Octopus, modelArtsRpc modelartsservic linkStruct := NewOctopusLink(octopusRpc, participant.Name, participant.Id) return &StoreLink{ILinkage: linkStruct} case TYPE_MODELARTS: - linkStruct := NewModelArtsLink(modelArtsRpc, modelArtsImgRpc, participant.Name, participant.Id) + linkStruct := NewModelArtsLink(modelArtsRpc, modelArtsImgRpc, participant.Name, participant.Id, "") return &StoreLink{ILinkage: linkStruct} case TYPE_SHUGUANGAI: linkStruct := NewShuguangAi(aCRpc, participant.Name, participant.Id)