From 5eda8232b15960e60327e5206fe064be95c29fcd Mon Sep 17 00:00:00 2001 From: tzwang Date: Fri, 30 Aug 2024 09:47:46 +0800 Subject: [PATCH 1/2] updated imageinference logics Former-commit-id: 99ca94792385319288589344217693282e5d3eff --- internal/logic/inference/imageinferencelogic.go | 7 +++++-- internal/scheduler/database/aiStorage.go | 8 ++++---- .../inference/imageInference/imageInference.go | 17 ++++++++++++++--- internal/storeLink/octopus.go | 9 +++++++++ 4 files changed, 32 insertions(+), 9 deletions(-) diff --git a/internal/logic/inference/imageinferencelogic.go b/internal/logic/inference/imageinferencelogic.go index 2b26e8b9..a1a4df53 100644 --- a/internal/logic/inference/imageinferencelogic.go +++ b/internal/logic/inference/imageinferencelogic.go @@ -46,11 +46,14 @@ func (l *ImageInferenceLogic) ImageInfer(r *http.Request, req *types.ImageInfere if err != nil { return nil, err } + if instance == nil { + return nil, errors.New("instance is empty ") + } instanceList = append(instanceList, instance) } if len(instanceList) == 0 { - return nil, errors.New("instances are empty") + return nil, errors.New("instanceList are empty") } // process uploaded images @@ -110,7 +113,7 @@ func (l *ImageInferenceLogic) ImageInfer(r *http.Request, req *types.ImageInfere } // set strategy - if opt.Strategy != "" { + if opt.Strategy == "" { return nil, errors.New("strategy is empty") } diff --git a/internal/scheduler/database/aiStorage.go b/internal/scheduler/database/aiStorage.go index 4bf9725d..603f5719 100644 --- a/internal/scheduler/database/aiStorage.go +++ b/internal/scheduler/database/aiStorage.go @@ -413,23 +413,23 @@ func (s *AiStorage) UpdateInferDeployInstance(instance *models.AiInferDeployInst } func (s *AiStorage) GetInferDeployInstanceById(id int64) (*models.AiInferDeployInstance, error) { - var deployIns models.AiInferDeployInstance + var deployIns *models.AiInferDeployInstance tx := s.DbEngin.Raw("select * from ai_infer_deploy_instance where `id` = ?", id).Scan(&deployIns) if tx.Error != nil { logx.Errorf(tx.Error.Error()) return nil, tx.Error } - return &deployIns, nil + return deployIns, nil } func (s *AiStorage) GetDeployTaskById(id int64) (*models.AiDeployInstanceTask, error) { - var task models.AiDeployInstanceTask + var task *models.AiDeployInstanceTask tx := s.DbEngin.Raw("select * from ai_deploy_instance_task where `id` = ?", id).Scan(&task) if tx.Error != nil { logx.Errorf(tx.Error.Error()) return nil, tx.Error } - return &task, nil + return task, nil } func (s *AiStorage) GetDeployTaskListByType(modelType string) ([]*models.AiDeployInstanceTask, error) { diff --git a/internal/scheduler/service/inference/imageInference/imageInference.go b/internal/scheduler/service/inference/imageInference/imageInference.go index 101ba45f..700eb864 100644 --- a/internal/scheduler/service/inference/imageInference/imageInference.go +++ b/internal/scheduler/service/inference/imageInference/imageInference.go @@ -198,6 +198,8 @@ func (i *ImageInference) filterClusters() ([]*FilteredCluster, error) { for _, cluster := range i.clusters { var inferurls []*inference.InferUrl var clustertype string + var clusterName string + for _, instance := range i.instances { clusterId := strconv.FormatInt(instance.ClusterId, 10) adapterId := strconv.FormatInt(instance.AdapterId, 10) @@ -214,6 +216,7 @@ func (i *ImageInference) filterClusters() ([]*FilteredCluster, error) { inferurls = append(inferurls, &url) clustertype = deployInstance.ClusterType + clusterName = deployInstance.ClusterName } } if len(inferurls) == 0 { @@ -224,7 +227,8 @@ func (i *ImageInference) filterClusters() ([]*FilteredCluster, error) { var f FilteredCluster f.urls = inferurls - f.clusterName = cluster.ClusterName + f.clusterId = cluster.ClusterId + f.clusterName = clusterName f.clusterType = clustertype f.imageNum = cluster.Replicas cs = append(cs, &f) @@ -448,12 +452,19 @@ func (i *ImageInference) saveAiSubTasks(id int64, aiTaskList []*models.TaskAi, c } func getInferResult(url string, file multipart.File, fileName string, clusterId string, clusterType string, inferAdapter map[string]map[string]inference.ICluster, adapterId string) (string, error) { - inferMap := inferAdapter[adapterId] + adapter, found := inferAdapter[adapterId] + if !found { + return "", errors.New("adapterId not found") + } + iCluster, found := adapter[clusterId] + if !found { + return "", errors.New("clusterId not found") + } switch clusterType { case storeLink.TYPE_OCTOPUS: r := http.Request{} - result, err := inferMap[clusterId].GetInferResult(r.Context(), url, file, fileName) + result, err := iCluster.GetInferResult(r.Context(), url, file, fileName) if err != nil { return "", err } diff --git a/internal/storeLink/octopus.go b/internal/storeLink/octopus.go index 6aa7cd6f..2317ad7a 100644 --- a/internal/storeLink/octopus.go +++ b/internal/storeLink/octopus.go @@ -1158,12 +1158,21 @@ func (o *OctopusLink) GetInferDeployInstance(ctx context.Context, id string) (*i url := strings.Replace(resp.Payload.Notebook.Tasks[0].Url, FORWARD_SLASH, "", -1) inferUrl := DOMAIN + url + var card string + if resp.Payload.Notebook.Desc != "" { + str := strings.Split(resp.Payload.Notebook.Desc, FORWARD_SLASH) + if len(str) == 3 { + card = str[2] + } + } + ins.InstanceName = resp.Payload.Notebook.Name ins.InstanceId = resp.Payload.Notebook.Id ins.ClusterName = o.platform ins.Status = resp.Payload.Notebook.Status ins.ClusterType = TYPE_OCTOPUS ins.InferUrl = inferUrl + ins.InferCard = card return ins, nil } From 837d9401c0dda457cf249a099b170d0a223e9b7d Mon Sep 17 00:00:00 2001 From: qiwang <1364512070@qq.com> Date: Fri, 30 Aug 2024 10:17:59 +0800 Subject: [PATCH 2/2] fix: Complete the modelarts createServeic of ai Former-commit-id: 1d51c0bcbdf224ed77fadc75ebf8d0a33cd0fbcc --- internal/storeLink/modelarts.go | 38 +++++++++++++++++++++++++++------ 1 file changed, 32 insertions(+), 6 deletions(-) diff --git a/internal/storeLink/modelarts.go b/internal/storeLink/modelarts.go index 3c76bfcc..cbf03c53 100644 --- a/internal/storeLink/modelarts.go +++ b/internal/storeLink/modelarts.go @@ -261,13 +261,25 @@ func (m *ModelArtsLink) UploadAlgorithmCode(ctx context.Context, resourceType st } func (m *ModelArtsLink) GetModelId(ctx context.Context, option *option.AiOption, ifoption *option.InferOption) error { - req := &modelarts.ListModelReq{} - specResp, err := m.modelArtsRpc.ListModels(ctx, req) + req := &modelarts.ListModelReq{ + Platform: m.platform, + Limit: int64(m.pageIndex), + Offset: int64(m.pageSize), + ModelName: option.ModelName, + //ModelType: option.ModelType, + } + ListResp, err := m.modelArtsRpc.ListModels(ctx, req) + for _, Models := range ListResp.Models { + if Models.ModelName == option.ModelName { + ifoption.ModelId = Models.ModelId + return nil + } + } if err != nil { return err } - return nil + return errors.New("failed to set ModelId") } func (m *ModelArtsLink) GetTrainingTaskLog(ctx context.Context, taskId string, instanceNum string) (string, error) { @@ -347,7 +359,7 @@ func (m *ModelArtsLink) Execute(ctx context.Context, option *option.AiOption) (i } func (m *ModelArtsLink) GenerateSubmitParams(ctx context.Context, option *option.AiOption) error { - err := m.generateResourceId(ctx, option) + err := m.generateResourceId(ctx, option, nil) if err != nil { return err } @@ -552,8 +564,13 @@ func (m *ModelArtsLink) GetInferResult(ctx context.Context, url string, file mul func (m *ModelArtsLink) CreateInferDeployInstance(ctx context.Context, option *option.InferOption) (string, error) { + err := m.GetModelId(ctx, nil, option) + if err != nil { + return "", err + } + configParam := &modelarts.ServiceConfig{ - Specification: option.Specification, + Specification: "modelarts.kat1.xlarge.al", Weight: option.Weight, ModelId: option.ModelId, InstanceCount: option.InstanceCount, @@ -575,5 +592,14 @@ func (m *ModelArtsLink) CreateInferDeployInstance(ctx context.Context, option *o } func (m *ModelArtsLink) CheckModelExistence(ctx context.Context, name string, mtype string) bool { - return false + ifoption := &option.InferOption{ + ModelName: name, + ModelType: mtype, + } + err := m.GetModelId(ctx, nil, ifoption) + if err != nil { + return false + } + + return true }