From 13c3b60b57a7238ac3d3e746869a3dd8be48a769 Mon Sep 17 00:00:00 2001 From: tzwang Date: Thu, 29 Aug 2024 15:43:27 +0800 Subject: [PATCH 1/4] updated CreateDeployTaskReq types Former-commit-id: 078ffecc6a3009541eeae1614faaa6df81069956 --- desc/inference/inference.api | 2 +- internal/types/types.go | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/desc/inference/inference.api b/desc/inference/inference.api index 962b9700..07d8ee01 100644 --- a/desc/inference/inference.api +++ b/desc/inference/inference.api @@ -182,7 +182,7 @@ type ( TaskDesc string `form:"taskDesc"` ModelName string `form:"modelName"` ModelType string `form:"modelType"` - AdapterClusterMap map[string]string `form:"adapterClusterMap"` + AdapterClusterMap map[string][]string `form:"adapterClusterMap"` } CreateDeployTaskResp { diff --git a/internal/types/types.go b/internal/types/types.go index 5af8a866..7d5da4ef 100644 --- a/internal/types/types.go +++ b/internal/types/types.go @@ -6075,11 +6075,11 @@ type GetDeployTasksByTypeResp struct { } type CreateDeployTaskReq struct { - TaskName string `form:"taskName"` - TaskDesc string `form:"taskDesc"` - ModelName string `form:"modelName"` - ModelType string `form:"modelType"` - AdapterClusterMap map[string]string `form:"adapterClusterMap"` + TaskName string `form:"taskName"` + TaskDesc string `form:"taskDesc"` + ModelName string `form:"modelName"` + ModelType string `form:"modelType"` + AdapterClusterMap map[string][]string `form:"adapterClusterMap"` } type CreateDeployTaskResp struct { From 04e72f95998ad80b09b9d445de431f0b35b87616 Mon Sep 17 00:00:00 2001 From: tzwang Date: Thu, 29 Aug 2024 16:49:42 +0800 Subject: [PATCH 2/4] updated createDeployInstance logics Former-commit-id: ae411caf7a3a2f1f6ad11a55bdcff0130641ed4c --- .../logic/inference/createdeploytasklogic.go | 76 ++++++++++++++++++- internal/scheduler/database/aiStorage.go | 27 +++---- 2 files changed, 89 insertions(+), 14 deletions(-) diff --git a/internal/logic/inference/createdeploytasklogic.go b/internal/logic/inference/createdeploytasklogic.go index 1daa8fc5..831c5428 100644 --- a/internal/logic/inference/createdeploytasklogic.go +++ b/internal/logic/inference/createdeploytasklogic.go @@ -2,6 +2,9 @@ package inference import ( "context" + "errors" + "gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/schedulers/option" + "strconv" "gitlink.org.cn/JointCloud/pcm-coordinator/internal/svc" "gitlink.org.cn/JointCloud/pcm-coordinator/internal/types" @@ -24,7 +27,78 @@ func NewCreateDeployTaskLogic(ctx context.Context, svcCtx *svc.ServiceContext) * } func (l *CreateDeployTaskLogic) CreateDeployTask(req *types.CreateDeployTaskReq) (resp *types.CreateDeployTaskResp, err error) { - // todo: add your logic here and delete this line + resp = &types.CreateDeployTaskResp{} + + if len(req.AdapterClusterMap) == 0 { + return nil, errors.New("adapters are empty") + } + + opt := &option.InferOption{ + TaskName: req.TaskName, + ModelType: req.ModelType, + ModelName: req.ModelName, + Cmd: "", + } + + taskId, err := l.svcCtx.Scheduler.AiStorages.SaveInferDeployTask(req.TaskName, req.ModelName, req.ModelType, req.TaskDesc) + if err != nil { + return nil, err + } + + for aid, v := range req.AdapterClusterMap { + for _, cid := range v { + err = l.createDeployInstance(taskId, aid, cid, opt) + if err != nil { + return nil, err + } + } + } return } + +func (l *CreateDeployTaskLogic) createDeployInstance(taskId int64, adapterId string, clusterId string, opt *option.InferOption) error { + cmap, found := l.svcCtx.Scheduler.AiService.InferenceAdapterMap[adapterId] + if !found { + + } + iCluster, found := cmap[clusterId] + if !found { + + } + insId, err := iCluster.CreateInferDeployInstance(l.ctx, opt) + if err != nil { + return err + } + + aid, err := strconv.ParseInt(adapterId, 10, 64) + if err != nil { + return err + } + cid, err := strconv.ParseInt(clusterId, 10, 64) + if err != nil { + return err + } + + adapterName, err := l.svcCtx.Scheduler.AiStorages.GetAdapterNameById(adapterId) + if err != nil { + return err + } + + clusterName, err := l.svcCtx.Scheduler.AiStorages.GetClusterNameById(clusterId) + if err != nil { + return err + } + + ins, err := iCluster.GetInferDeployInstance(l.ctx, insId) + if err != nil { + return err + } + + _, err = l.svcCtx.Scheduler.AiStorages.SaveInferDeployInstance(taskId, ins.InstanceId, ins.InstanceName, aid, adapterName, cid, clusterName, ins.ModelName, ins.ModelType, ins.InferCard) + if err != nil { + return err + } + + return nil +} diff --git a/internal/scheduler/database/aiStorage.go b/internal/scheduler/database/aiStorage.go index 6f9c4285..4bf9725d 100644 --- a/internal/scheduler/database/aiStorage.go +++ b/internal/scheduler/database/aiStorage.go @@ -373,23 +373,24 @@ func (s *AiStorage) AddNoticeInfo(adapterId string, adapterName string, clusterI } } -func (s *AiStorage) SaveInferDeployInstance(instanceId string, instanceName string, adapterId int64, +func (s *AiStorage) SaveInferDeployInstance(taskId int64, instanceId string, instanceName string, adapterId int64, adapterName string, clusterId int64, clusterName string, modelName string, modelType string, inferCard string) (int64, error) { startTime := time.Now().Format(time.RFC3339) // 构建主任务结构体 insModel := models.AiInferDeployInstance{ - InstanceId: instanceId, - InstanceName: instanceName, - AdapterId: adapterId, - AdapterName: adapterName, - ClusterId: clusterId, - ClusterName: clusterName, - ModelName: modelName, - ModelType: modelType, - InferCard: inferCard, - Status: constants.Saved, - CreateTime: startTime, - UpdateTime: startTime, + DeployInstanceTaskId: taskId, + InstanceId: instanceId, + InstanceName: instanceName, + AdapterId: adapterId, + AdapterName: adapterName, + ClusterId: clusterId, + ClusterName: clusterName, + ModelName: modelName, + ModelType: modelType, + InferCard: inferCard, + Status: constants.Stopped, + CreateTime: startTime, + UpdateTime: startTime, } // 保存任务数据到数据库 tx := s.DbEngin.Table("ai_infer_deploy_instance").Create(&insModel) From 3ebe01f35d982153e5d7091f6f4ced8cb1dfff93 Mon Sep 17 00:00:00 2001 From: tzwang Date: Thu, 29 Aug 2024 16:54:09 +0800 Subject: [PATCH 3/4] updated CreateDeployTask api Former-commit-id: fb4fc498ca0bb78cbeb823d64e5ff39cb46e6838 --- desc/pcm.api | 2 +- internal/handler/routes.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/desc/pcm.api b/desc/pcm.api index f075d178..4e00a2d3 100644 --- a/desc/pcm.api +++ b/desc/pcm.api @@ -973,7 +973,7 @@ service pcm { get /inference/getDeployTasksByType (GetDeployTasksByTypeReq) returns (GetDeployTasksByTypeResp) @handler CreateDeployTask - get /inference/createDeployTask (CreateDeployTaskReq) returns (CreateDeployTaskResp) + post /inference/createDeployTask (CreateDeployTaskReq) returns (CreateDeployTaskResp) @handler GetAdaptersByModel get /inference/getAdaptersByModel (GetAdaptersByModelReq) returns (GetAdaptersByModelResp) diff --git a/internal/handler/routes.go b/internal/handler/routes.go index ceb90566..35dfa798 100644 --- a/internal/handler/routes.go +++ b/internal/handler/routes.go @@ -1234,7 +1234,7 @@ func RegisterHandlers(server *rest.Server, serverCtx *svc.ServiceContext) { Handler: inference.GetDeployTasksByTypeHandler(serverCtx), }, { - Method: http.MethodGet, + Method: http.MethodPost, Path: "/inference/createDeployTask", Handler: inference.CreateDeployTaskHandler(serverCtx), }, From ef72b1014f2f83cbb9de5b2c67c7cb702212333b Mon Sep 17 00:00:00 2001 From: tzwang Date: Thu, 29 Aug 2024 17:21:10 +0800 Subject: [PATCH 4/4] updated createDeployInstance logics Former-commit-id: 5b8465ca651503401f6fd00de93268e6e6ae8d41 --- internal/logic/inference/createdeploytasklogic.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/internal/logic/inference/createdeploytasklogic.go b/internal/logic/inference/createdeploytasklogic.go index 831c5428..318950ec 100644 --- a/internal/logic/inference/createdeploytasklogic.go +++ b/internal/logic/inference/createdeploytasklogic.go @@ -60,11 +60,11 @@ func (l *CreateDeployTaskLogic) CreateDeployTask(req *types.CreateDeployTaskReq) func (l *CreateDeployTaskLogic) createDeployInstance(taskId int64, adapterId string, clusterId string, opt *option.InferOption) error { cmap, found := l.svcCtx.Scheduler.AiService.InferenceAdapterMap[adapterId] if !found { - + return errors.New("adapterId not exist: " + adapterId) } iCluster, found := cmap[clusterId] if !found { - + return errors.New("clusterId not exist: " + clusterId) } insId, err := iCluster.CreateInferDeployInstance(l.ctx, opt) if err != nil {