From bcb664c704074d168090182746eb98b1b965975e Mon Sep 17 00:00:00 2001 From: tzwang Date: Mon, 2 Sep 2024 17:20:45 +0800 Subject: [PATCH 1/2] updated createdeploytask logic Former-commit-id: b90f79d81f32bc7f78b9978717a076e807e42d17 --- .../logic/inference/createdeploytasklogic.go | 2 +- .../logic/inference/deployinstancelistlogic.go | 17 ++++++++++++++--- internal/scheduler/database/aiStorage.go | 14 ++++++++++++-- internal/storeLink/octopus.go | 6 ++++++ 4 files changed, 33 insertions(+), 6 deletions(-) diff --git a/internal/logic/inference/createdeploytasklogic.go b/internal/logic/inference/createdeploytasklogic.go index 318950ec..27f561bc 100644 --- a/internal/logic/inference/createdeploytasklogic.go +++ b/internal/logic/inference/createdeploytasklogic.go @@ -95,7 +95,7 @@ func (l *CreateDeployTaskLogic) createDeployInstance(taskId int64, adapterId str return err } - _, err = l.svcCtx.Scheduler.AiStorages.SaveInferDeployInstance(taskId, ins.InstanceId, ins.InstanceName, aid, adapterName, cid, clusterName, ins.ModelName, ins.ModelType, ins.InferCard) + _, err = l.svcCtx.Scheduler.AiStorages.SaveInferDeployInstance(taskId, ins.InstanceId, ins.InstanceName, aid, adapterName, cid, clusterName, ins.ModelName, ins.ModelType, ins.InferCard, ins.ClusterType) if err != nil { return err } diff --git a/internal/logic/inference/deployinstancelistlogic.go b/internal/logic/inference/deployinstancelistlogic.go index 053d2fe7..af0c7abc 100644 --- a/internal/logic/inference/deployinstancelistlogic.go +++ b/internal/logic/inference/deployinstancelistlogic.go @@ -52,7 +52,10 @@ func (l *DeployInstanceListLogic) DeployInstanceList(req *types.DeployInstanceLi return nil, errors.New(err.Error()) } - deployTasks := l.GenerateDeployTasks(tasklist) + deployTasks, err := l.GenerateDeployTasks(tasklist) + if err != nil { + return nil, errors.New(err.Error()) + } slices := make([][]*models.AiInferDeployInstance, len(deployTasks)) for i := 0; i < len(deployTasks); i++ { slices[i] = deployTasks[i].Instances @@ -83,12 +86,20 @@ func (l *DeployInstanceListLogic) DeployInstanceList(req *types.DeployInstanceLi return } -func (l *DeployInstanceListLogic) GenerateDeployTasks(tasklist []*models.AiDeployInstanceTask) []*DeployTask { +func (l *DeployInstanceListLogic) GenerateDeployTasks(tasklist []*models.AiDeployInstanceTask) ([]*DeployTask, error) { var tasks []*DeployTask for _, t := range tasklist { list, err := l.svcCtx.Scheduler.AiStorages.GetInstanceListByDeployTaskId(t.Id) if err != nil { logx.Errorf("db GetInstanceListByDeployTaskId error") + return nil, errors.New(err.Error()) + } + if len(list) == 0 { + err := l.svcCtx.Scheduler.AiStorages.DeleteDeployTaskById(t.Id) + if err != nil { + logx.Errorf("db DeleteByDeployTaskId error") + return nil, errors.New(err.Error()) + } continue } deployTask := &DeployTask{ @@ -99,7 +110,7 @@ func (l *DeployInstanceListLogic) GenerateDeployTasks(tasklist []*models.AiDeplo } tasks = append(tasks, deployTask) } - return tasks + return tasks, nil } type DeployTask struct { diff --git a/internal/scheduler/database/aiStorage.go b/internal/scheduler/database/aiStorage.go index 603f5719..3cc70c7b 100644 --- a/internal/scheduler/database/aiStorage.go +++ b/internal/scheduler/database/aiStorage.go @@ -374,7 +374,7 @@ func (s *AiStorage) AddNoticeInfo(adapterId string, adapterName string, clusterI } func (s *AiStorage) SaveInferDeployInstance(taskId int64, instanceId string, instanceName string, adapterId int64, - adapterName string, clusterId int64, clusterName string, modelName string, modelType string, inferCard string) (int64, error) { + adapterName string, clusterId int64, clusterName string, modelName string, modelType string, inferCard string, clusterType string) (int64, error) { startTime := time.Now().Format(time.RFC3339) // 构建主任务结构体 insModel := models.AiInferDeployInstance{ @@ -388,7 +388,8 @@ func (s *AiStorage) SaveInferDeployInstance(taskId int64, instanceId string, ins ModelName: modelName, ModelType: modelType, InferCard: inferCard, - Status: constants.Stopped, + ClusterType: clusterType, + Status: constants.Deploying, CreateTime: startTime, UpdateTime: startTime, } @@ -464,6 +465,15 @@ func (s *AiStorage) UpdateDeployTask(task *models.AiDeployInstanceTask, needUpda return nil } +func (s *AiStorage) DeleteDeployTaskById(id int64) error { + tx := s.DbEngin.Delete(&models.AiDeployInstanceTask{}, id) + if tx.Error != nil { + logx.Errorf(tx.Error.Error()) + return tx.Error + } + return nil +} + func (s *AiStorage) UpdateDeployTaskById(id int64) error { task, err := s.GetDeployTaskById(id) if err != nil { diff --git a/internal/storeLink/octopus.go b/internal/storeLink/octopus.go index 2317ad7a..0cb68ac7 100644 --- a/internal/storeLink/octopus.go +++ b/internal/storeLink/octopus.go @@ -1158,10 +1158,14 @@ func (o *OctopusLink) GetInferDeployInstance(ctx context.Context, id string) (*i url := strings.Replace(resp.Payload.Notebook.Tasks[0].Url, FORWARD_SLASH, "", -1) inferUrl := DOMAIN + url + var modelType string + var modelName string var card string if resp.Payload.Notebook.Desc != "" { str := strings.Split(resp.Payload.Notebook.Desc, FORWARD_SLASH) if len(str) == 3 { + modelType = str[0] + modelName = str[1] card = str[2] } } @@ -1171,6 +1175,8 @@ func (o *OctopusLink) GetInferDeployInstance(ctx context.Context, id string) (*i ins.ClusterName = o.platform ins.Status = resp.Payload.Notebook.Status ins.ClusterType = TYPE_OCTOPUS + ins.ModelType = modelType + ins.ModelName = modelName ins.InferUrl = inferUrl ins.InferCard = card From 91bfc6e6b3f29ceb5c878cc7e73637d34341c32e Mon Sep 17 00:00:00 2001 From: tzwang Date: Mon, 2 Sep 2024 17:25:31 +0800 Subject: [PATCH 2/2] updated getadapterbymodel api Former-commit-id: 8a6175e6d646a05c9298b7d9747a671b37f43dd7 --- desc/inference/inference.api | 2 +- internal/types/types.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/desc/inference/inference.api b/desc/inference/inference.api index 07d8ee01..f6ac8568 100644 --- a/desc/inference/inference.api +++ b/desc/inference/inference.api @@ -200,7 +200,7 @@ type ( AdapterAvail { AdapterId string `json:"adapterId"` - AdapterName string `json:"taskName"` + AdapterName string `json:"adapterName"` Clusters []*ClusterAvail `json:"clusters"` } diff --git a/internal/types/types.go b/internal/types/types.go index 7d5da4ef..284d0383 100644 --- a/internal/types/types.go +++ b/internal/types/types.go @@ -6096,7 +6096,7 @@ type GetAdaptersByModelResp struct { type AdapterAvail struct { AdapterId string `json:"adapterId"` - AdapterName string `json:"taskName"` + AdapterName string `json:"adapterName"` Clusters []*ClusterAvail `json:"clusters"` }