From ec761c8a2c66d05b2028580c8a5ac9c943b8c466 Mon Sep 17 00:00:00 2001 From: jagger Date: Fri, 29 Mar 2024 10:48:50 +0800 Subject: [PATCH 01/16] fix bugs Signed-off-by: jagger Former-commit-id: 8bb7ace86b617d878c0d83ce641d4b373cfcf349 --- api/internal/logic/core/pagelisttasklogic.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/api/internal/logic/core/pagelisttasklogic.go b/api/internal/logic/core/pagelisttasklogic.go index 976f4735..dd9293fe 100644 --- a/api/internal/logic/core/pagelisttasklogic.go +++ b/api/internal/logic/core/pagelisttasklogic.go @@ -30,14 +30,16 @@ func (l *PageListTaskLogic) PageListTask(req *types.PageTaskReq) (resp *types.Pa resp = &types.PageResult{} var list []types.TaskModel db := l.svcCtx.DbEngin.Model(&types.TaskModel{}).Table("task") + + db = db.Where("deleted_at is null") if req.Name != "" { db = db.Where("name LIKE ?", "%"+req.Name+"%") } - db = db.Where("deleted_at is null").Limit(limit).Offset(offset) //count total var total int64 err = db.Count(&total).Error + db.Limit(limit).Offset(offset) if err != nil { return resp, err From 96f3a16b71c4213df8f4a5a51a16e808d79a2b64 Mon Sep 17 00:00:00 2001 From: tzwang Date: Fri, 29 Mar 2024 17:45:28 +0800 Subject: [PATCH 02/16] modified all missing request context Former-commit-id: 9cfed9e9e30fc6e50f29784e6b6477911806c798 --- .../schedule/schedulegetalgorithmslogic.go | 11 ++- .../schedule/schedulegetdatasetslogic.go | 5 +- .../logic/storelink/deletelinkimagelogic.go | 4 +- .../logic/storelink/deletelinktasklogic.go | 4 +- .../logic/storelink/getaispecslogic.go | 4 +- .../logic/storelink/getlinkimagelistlogic.go | 4 +- .../logic/storelink/getlinktasklogic.go | 4 +- .../logic/storelink/submitlinktasklogic.go | 4 +- .../logic/storelink/uploadlinkimagelogic.go | 4 +- api/internal/mqs/ScheduleAi.go | 17 ++-- .../scheduler/schedulers/aiScheduler.go | 10 ++- api/internal/scheduler/service/aiService.go | 14 +-- .../scheduler/service/collector/collector.go | 8 +- .../scheduler/service/executor/aiExecutor.go | 3 +- api/internal/storeLink/modelarts.go | 63 +++++++------- api/internal/storeLink/octopus.go | 87 +++++++++---------- api/internal/storeLink/shuguangHpc.go | 31 ++++--- api/internal/storeLink/shuguangai.go | 81 +++++++++-------- api/internal/storeLink/storeLink.go | 32 +++---- api/internal/svc/servicecontext.go | 21 ++++- 20 files changed, 213 insertions(+), 198 deletions(-) diff --git a/api/internal/logic/schedule/schedulegetalgorithmslogic.go b/api/internal/logic/schedule/schedulegetalgorithmslogic.go index fe949b41..2c78efd3 100644 --- a/api/internal/logic/schedule/schedulegetalgorithmslogic.go +++ b/api/internal/logic/schedule/schedulegetalgorithmslogic.go @@ -2,6 +2,7 @@ package schedule import ( "context" + "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/storeLink" "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc" "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types" @@ -24,7 +25,11 @@ func NewScheduleGetAlgorithmsLogic(ctx context.Context, svcCtx *svc.ServiceConte } func (l *ScheduleGetAlgorithmsLogic) ScheduleGetAlgorithms(req *types.AiAlgorithmsReq) (resp *types.AiAlgorithmsResp, err error) { - // todo: add your logic here and delete this line - - return + resp = &types.AiAlgorithmsResp{} + algorithms, err := storeLink.GetAlgorithms(l.ctx, l.svcCtx.Scheduler.ResourceCollector, req.ResourceType, req.TaskType, req.Dataset) + if err != nil { + return nil, err + } + resp.Algorithms = algorithms + return resp, nil } diff --git a/api/internal/logic/schedule/schedulegetdatasetslogic.go b/api/internal/logic/schedule/schedulegetdatasetslogic.go index 94005178..f7aeab14 100644 --- a/api/internal/logic/schedule/schedulegetdatasetslogic.go +++ b/api/internal/logic/schedule/schedulegetdatasetslogic.go @@ -2,9 +2,7 @@ package schedule import ( "context" - "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/service" "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/storeLink" - "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc" "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types" @@ -27,8 +25,7 @@ func NewScheduleGetDatasetsLogic(ctx context.Context, svcCtx *svc.ServiceContext func (l *ScheduleGetDatasetsLogic) ScheduleGetDatasets() (resp *types.AiDatasetsResp, err error) { resp = &types.AiDatasetsResp{} - _, colMap := service.InitAiClusterMap(l.ctx, l.svcCtx) - names, err := storeLink.GetDatasetsNames(colMap) + names, err := storeLink.GetDatasetsNames(l.ctx, l.svcCtx.Scheduler.ResourceCollector) if err != nil { return nil, err } diff --git a/api/internal/logic/storelink/deletelinkimagelogic.go b/api/internal/logic/storelink/deletelinkimagelogic.go index 501d2596..25eb2f4d 100644 --- a/api/internal/logic/storelink/deletelinkimagelogic.go +++ b/api/internal/logic/storelink/deletelinkimagelogic.go @@ -47,12 +47,12 @@ func (l *DeleteLinkImageLogic) DeleteLinkImage(req *types.DeleteLinkImageReq) (r return resp, nil } - storelink := storeLink.NewStoreLink(l.ctx, l.svcCtx, participant) + storelink := storeLink.NewStoreLink(l.svcCtx, participant) if storelink == nil { return nil, nil } - img, err := storelink.ILinkage.DeleteImage(req.ImageId) + img, err := storelink.ILinkage.DeleteImage(l.ctx, req.ImageId) if err != nil { return nil, err } diff --git a/api/internal/logic/storelink/deletelinktasklogic.go b/api/internal/logic/storelink/deletelinktasklogic.go index c325a60b..8f377f62 100644 --- a/api/internal/logic/storelink/deletelinktasklogic.go +++ b/api/internal/logic/storelink/deletelinktasklogic.go @@ -47,12 +47,12 @@ func (l *DeleteLinkTaskLogic) DeleteLinkTask(req *types.DeleteLinkTaskReq) (resp return resp, nil } - storelink := storeLink.NewStoreLink(l.ctx, l.svcCtx, participant) + storelink := storeLink.NewStoreLink(l.svcCtx, participant) if storelink == nil { return nil, nil } - task, err := storelink.ILinkage.DeleteTask(req.TaskId) + task, err := storelink.ILinkage.DeleteTask(l.ctx, req.TaskId) if err != nil { return nil, err } diff --git a/api/internal/logic/storelink/getaispecslogic.go b/api/internal/logic/storelink/getaispecslogic.go index 2b0aa68f..62dc6a06 100644 --- a/api/internal/logic/storelink/getaispecslogic.go +++ b/api/internal/logic/storelink/getaispecslogic.go @@ -47,12 +47,12 @@ func (l *GetAISpecsLogic) GetAISpecs(req *types.GetResourceSpecsReq) (resp *type return resp, nil } - storelink := storeLink.NewStoreLink(l.ctx, l.svcCtx, participant) + storelink := storeLink.NewStoreLink(l.svcCtx, participant) if storelink == nil { return nil, nil } - specs, err := storelink.ILinkage.QuerySpecs() + specs, err := storelink.ILinkage.QuerySpecs(l.ctx) if err != nil { return nil, err } diff --git a/api/internal/logic/storelink/getlinkimagelistlogic.go b/api/internal/logic/storelink/getlinkimagelistlogic.go index 4fdae263..48e37d7b 100644 --- a/api/internal/logic/storelink/getlinkimagelistlogic.go +++ b/api/internal/logic/storelink/getlinkimagelistlogic.go @@ -47,12 +47,12 @@ func (l *GetLinkImageListLogic) GetLinkImageList(req *types.GetLinkImageListReq) return resp, nil } - storelink := storeLink.NewStoreLink(l.ctx, l.svcCtx, participant) + storelink := storeLink.NewStoreLink(l.svcCtx, participant) if storelink == nil { return nil, nil } - list, err := storelink.ILinkage.QueryImageList() + list, err := storelink.ILinkage.QueryImageList(l.ctx) if err != nil { return nil, err } diff --git a/api/internal/logic/storelink/getlinktasklogic.go b/api/internal/logic/storelink/getlinktasklogic.go index 301f7a46..24e7b3b7 100644 --- a/api/internal/logic/storelink/getlinktasklogic.go +++ b/api/internal/logic/storelink/getlinktasklogic.go @@ -48,12 +48,12 @@ func (l *GetLinkTaskLogic) GetLinkTask(req *types.GetLinkTaskReq) (resp *types.G return resp, nil } - storelink := storeLink.NewStoreLink(l.ctx, l.svcCtx, participant) + storelink := storeLink.NewStoreLink(l.svcCtx, participant) if storelink == nil { return nil, nil } - task, err := storelink.ILinkage.QueryTask(req.TaskId) + task, err := storelink.ILinkage.QueryTask(l.ctx, req.TaskId) if err != nil { return nil, err } diff --git a/api/internal/logic/storelink/submitlinktasklogic.go b/api/internal/logic/storelink/submitlinktasklogic.go index 53536ffe..971d75fc 100644 --- a/api/internal/logic/storelink/submitlinktasklogic.go +++ b/api/internal/logic/storelink/submitlinktasklogic.go @@ -48,7 +48,7 @@ func (l *SubmitLinkTaskLogic) SubmitLinkTask(req *types.SubmitLinkTaskReq) (resp return resp, nil } - storelink := storeLink.NewStoreLink(l.ctx, l.svcCtx, participant) + storelink := storeLink.NewStoreLink(l.svcCtx, participant) if storelink == nil { return nil, nil } @@ -67,7 +67,7 @@ func (l *SubmitLinkTaskLogic) SubmitLinkTask(req *types.SubmitLinkTaskReq) (resp envs = append(envs, env) } } - task, err := storelink.ILinkage.SubmitTask(req.ImageId, req.Cmd, envs, params, req.ResourceId, "", "", "pytorch") + task, err := storelink.ILinkage.SubmitTask(l.ctx, req.ImageId, req.Cmd, envs, params, req.ResourceId, "", "", "pytorch") if err != nil { return nil, err } diff --git a/api/internal/logic/storelink/uploadlinkimagelogic.go b/api/internal/logic/storelink/uploadlinkimagelogic.go index 95c4a9da..5bd7514a 100644 --- a/api/internal/logic/storelink/uploadlinkimagelogic.go +++ b/api/internal/logic/storelink/uploadlinkimagelogic.go @@ -48,12 +48,12 @@ func (l *UploadLinkImageLogic) UploadLinkImage(req *types.UploadLinkImageReq) (r return resp, nil } - storelink := storeLink.NewStoreLink(l.ctx, l.svcCtx, participant) + storelink := storeLink.NewStoreLink(l.svcCtx, participant) if storelink == nil { return nil, nil } - img, err := storelink.ILinkage.UploadImage(req.FilePath) + img, err := storelink.ILinkage.UploadImage(l.ctx, req.FilePath) if err != nil { return nil, err } diff --git a/api/internal/mqs/ScheduleAi.go b/api/internal/mqs/ScheduleAi.go index c090a795..61713ad2 100644 --- a/api/internal/mqs/ScheduleAi.go +++ b/api/internal/mqs/ScheduleAi.go @@ -16,9 +16,7 @@ package mqs import ( "context" - "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler" "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/schedulers" - "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/service" "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc" ) @@ -27,25 +25,22 @@ import ( Listening to the payment flow status change notification message queue */ type AiQueue struct { - ctx context.Context - svcCtx *svc.ServiceContext - scheduler *scheduler.Scheduler + ctx context.Context + svcCtx *svc.ServiceContext } func NewAiMq(ctx context.Context, svcCtx *svc.ServiceContext) *AiQueue { - aiExecutorMap, aiCollectorMap := service.InitAiClusterMap(ctx, svcCtx) return &AiQueue{ - ctx: ctx, - svcCtx: svcCtx, - scheduler: scheduler.NewSchdlr(aiCollectorMap, nil, aiExecutorMap), + ctx: ctx, + svcCtx: svcCtx, } } func (l *AiQueue) Consume(val string) error { - aiSchdl, _ := schedulers.NewAiScheduler(val, l.scheduler, nil) + aiSchdl, _ := schedulers.NewAiScheduler(l.ctx, val, l.svcCtx.Scheduler, nil) // 调度算法 - err := l.scheduler.AssignAndSchedule(aiSchdl) + err := l.svcCtx.Scheduler.AssignAndSchedule(aiSchdl) if err != nil { return err } diff --git a/api/internal/scheduler/schedulers/aiScheduler.go b/api/internal/scheduler/schedulers/aiScheduler.go index 1667f8af..bbb2b59c 100644 --- a/api/internal/scheduler/schedulers/aiScheduler.go +++ b/api/internal/scheduler/schedulers/aiScheduler.go @@ -15,6 +15,7 @@ package schedulers import ( + "context" "errors" "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler" "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/schedulers/option" @@ -32,10 +33,11 @@ type AiScheduler struct { task *response.TaskInfo *scheduler.Scheduler option *option.AiOption + ctx context.Context } -func NewAiScheduler(val string, scheduler *scheduler.Scheduler, option *option.AiOption) (*AiScheduler, error) { - return &AiScheduler{yamlString: val, Scheduler: scheduler, option: option}, nil +func NewAiScheduler(ctx context.Context, val string, scheduler *scheduler.Scheduler, option *option.AiOption) (*AiScheduler, error) { + return &AiScheduler{ctx: ctx, yamlString: val, Scheduler: scheduler, option: option}, nil } func (as *AiScheduler) GetNewStructForDb(task *response.TaskInfo, resource string, participantId int64) (interface{}, error) { @@ -104,7 +106,7 @@ func (as *AiScheduler) AssignTask(clusters []*strategy.AssignedCluster) error { continue } go func() { - _, err := executorMap[c.Name].Execute(as.option) + _, err := executorMap[c.Name].Execute(as.ctx, as.option) if err != nil { // TODO: database operation } @@ -127,7 +129,7 @@ func (as *AiScheduler) findClustersWithResources() ([]*collector.ResourceStats, wg.Add(1) rc := resourceCollector go func() { - spec, err := rc.GetResourceStats() + spec, err := rc.GetResourceStats(as.ctx) if err != nil { errCh <- err wg.Done() diff --git a/api/internal/scheduler/service/aiService.go b/api/internal/scheduler/service/aiService.go index a3773562..5b21003b 100644 --- a/api/internal/scheduler/service/aiService.go +++ b/api/internal/scheduler/service/aiService.go @@ -1,11 +1,13 @@ package service import ( - "context" "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/service/collector" "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/service/executor" "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/storeLink" - "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc" + "gitlink.org.cn/jcce-pcm/pcm-ac/hpcacclient" + "gitlink.org.cn/jcce-pcm/pcm-participant-modelarts/client/imagesservice" + "gitlink.org.cn/jcce-pcm/pcm-participant-modelarts/client/modelartsservice" + "gitlink.org.cn/jcce-pcm/pcm-participant-octopus/octopusclient" ) const ( @@ -24,21 +26,21 @@ var ( } ) -func InitAiClusterMap(ctx context.Context, svcCtx *svc.ServiceContext) (*map[string]executor.AiExecutor, *map[string]collector.AiCollector) { +func InitAiClusterMap(octopusRpc octopusclient.Octopus, modelArtsRpc modelartsservice.ModelArtsService, modelArtsImgRpc imagesservice.ImagesService, aCRpc hpcacclient.HpcAC) (*map[string]executor.AiExecutor, *map[string]collector.AiCollector) { executorMap := make(map[string]executor.AiExecutor) collectorMap := make(map[string]collector.AiCollector) for k, v := range AiTypeMap { switch v { case OCTOPUS: - octopus := storeLink.NewOctopusLink(ctx, svcCtx, k, 0) + octopus := storeLink.NewOctopusLink(octopusRpc, k, 0) collectorMap[k] = octopus executorMap[k] = octopus case MODELARTS: - modelarts := storeLink.NewModelArtsLink(ctx, svcCtx, k, 0) + modelarts := storeLink.NewModelArtsLink(modelArtsRpc, modelArtsImgRpc, k, 0) collectorMap[k] = modelarts executorMap[k] = modelarts case SHUGUANGAI: - sgai := storeLink.NewShuguangAi(ctx, svcCtx, k, 0) + sgai := storeLink.NewShuguangAi(aCRpc, k, 0) collectorMap[k] = sgai executorMap[k] = sgai } diff --git a/api/internal/scheduler/service/collector/collector.go b/api/internal/scheduler/service/collector/collector.go index 8d67175c..39a05e5e 100644 --- a/api/internal/scheduler/service/collector/collector.go +++ b/api/internal/scheduler/service/collector/collector.go @@ -1,9 +1,11 @@ package collector +import "context" + type AiCollector interface { - GetResourceStats() (*ResourceStats, error) - GetDatasetsSpecs() ([]*DatasetsSpecs, error) - GetAlgorithms() ([]*Algorithm, error) + GetResourceStats(ctx context.Context) (*ResourceStats, error) + GetDatasetsSpecs(ctx context.Context) ([]*DatasetsSpecs, error) + GetAlgorithms(ctx context.Context) ([]*Algorithm, error) } type ResourceStats struct { diff --git a/api/internal/scheduler/service/executor/aiExecutor.go b/api/internal/scheduler/service/executor/aiExecutor.go index fae0a90a..de3e49da 100644 --- a/api/internal/scheduler/service/executor/aiExecutor.go +++ b/api/internal/scheduler/service/executor/aiExecutor.go @@ -1,9 +1,10 @@ package executor import ( + "context" "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/schedulers/option" ) type AiExecutor interface { - Execute(option *option.AiOption) (interface{}, error) + Execute(ctx context.Context, option *option.AiOption) (interface{}, error) } diff --git a/api/internal/storeLink/modelarts.go b/api/internal/storeLink/modelarts.go index 64c3437b..6dffc2ec 100644 --- a/api/internal/storeLink/modelarts.go +++ b/api/internal/storeLink/modelarts.go @@ -18,44 +18,45 @@ import ( "context" "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/schedulers/option" "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/service/collector" - "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc" "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/utils" + "gitlink.org.cn/jcce-pcm/pcm-participant-modelarts/client/imagesservice" + "gitlink.org.cn/jcce-pcm/pcm-participant-modelarts/client/modelartsservice" "gitlink.org.cn/jcce-pcm/pcm-participant-modelarts/modelarts" "strconv" "strings" ) type ModelArtsLink struct { - ctx context.Context - svcCtx *svc.ServiceContext - platform string - participantId int64 - pageIndex int32 - pageSize int32 + modelArtsRpc modelartsservice.ModelArtsService + modelArtsImgRpc imagesservice.ImagesService + platform string + participantId int64 + pageIndex int32 + pageSize int32 } -func NewModelArtsLink(ctx context.Context, svcCtx *svc.ServiceContext, name string, id int64) *ModelArtsLink { - return &ModelArtsLink{ctx: ctx, svcCtx: svcCtx, platform: name, participantId: id, pageIndex: 1, pageSize: 100} +func NewModelArtsLink(modelArtsRpc modelartsservice.ModelArtsService, modelArtsImgRpc imagesservice.ImagesService, name string, id int64) *ModelArtsLink { + return &ModelArtsLink{modelArtsRpc: modelArtsRpc, modelArtsImgRpc: modelArtsImgRpc, platform: name, participantId: id, pageIndex: 1, pageSize: 100} } -func (m *ModelArtsLink) UploadImage(path string) (interface{}, error) { +func (m *ModelArtsLink) UploadImage(ctx context.Context, path string) (interface{}, error) { //TODO modelArts上传镜像 return nil, nil } -func (m *ModelArtsLink) DeleteImage(imageId string) (interface{}, error) { +func (m *ModelArtsLink) DeleteImage(ctx context.Context, imageId string) (interface{}, error) { // TODO modelArts删除镜像 return nil, nil } -func (m *ModelArtsLink) QueryImageList() (interface{}, error) { +func (m *ModelArtsLink) QueryImageList(ctx context.Context) (interface{}, error) { // modelArts获取镜像列表 req := &modelarts.ListRepoReq{ Offset: "0", Limit: strconv.Itoa(int(m.pageSize)), Platform: m.platform, } - resp, err := m.svcCtx.ModelArtsImgRpc.ListReposDetails(m.ctx, req) + resp, err := m.modelArtsImgRpc.ListReposDetails(ctx, req) if err != nil { return nil, err } @@ -63,7 +64,7 @@ func (m *ModelArtsLink) QueryImageList() (interface{}, error) { return resp, nil } -func (m *ModelArtsLink) SubmitTask(imageId string, cmd string, envs []string, params []string, resourceId string, datasetsId string, algorithmId string, aiType string) (interface{}, error) { +func (m *ModelArtsLink) SubmitTask(ctx context.Context, imageId string, cmd string, envs []string, params []string, resourceId string, datasetsId string, algorithmId string, aiType string) (interface{}, error) { // modelArts提交任务 environments := make(map[string]string) parameters := make([]*modelarts.ParametersTrainJob, 0) @@ -100,7 +101,7 @@ func (m *ModelArtsLink) SubmitTask(imageId string, cmd string, envs []string, pa }, Platform: m.platform, } - resp, err := m.svcCtx.ModelArtsRpc.CreateTrainingJob(m.ctx, req) + resp, err := m.modelArtsRpc.CreateTrainingJob(ctx, req) if err != nil { return nil, err } @@ -108,13 +109,13 @@ func (m *ModelArtsLink) SubmitTask(imageId string, cmd string, envs []string, pa return resp, nil } -func (m *ModelArtsLink) QueryTask(taskId string) (interface{}, error) { +func (m *ModelArtsLink) QueryTask(ctx context.Context, taskId string) (interface{}, error) { // 获取任务 req := &modelarts.DetailTrainingJobsReq{ TrainingJobId: taskId, Platform: m.platform, } - resp, err := m.svcCtx.ModelArtsRpc.GetTrainingJobs(m.ctx, req) + resp, err := m.modelArtsRpc.GetTrainingJobs(ctx, req) if err != nil { return nil, err } @@ -122,13 +123,13 @@ func (m *ModelArtsLink) QueryTask(taskId string) (interface{}, error) { return resp, nil } -func (m *ModelArtsLink) DeleteTask(taskId string) (interface{}, error) { +func (m *ModelArtsLink) DeleteTask(ctx context.Context, taskId string) (interface{}, error) { // 删除任务 req := &modelarts.DeleteTrainingJobReq{ TrainingJobId: taskId, Platform: m.platform, } - resp, err := m.svcCtx.ModelArtsRpc.DeleteTrainingJob(m.ctx, req) + resp, err := m.modelArtsRpc.DeleteTrainingJob(ctx, req) if err != nil { return nil, err } @@ -136,12 +137,12 @@ func (m *ModelArtsLink) DeleteTask(taskId string) (interface{}, error) { return resp, nil } -func (m *ModelArtsLink) QuerySpecs() (interface{}, error) { +func (m *ModelArtsLink) QuerySpecs(ctx context.Context) (interface{}, error) { // octopus查询资源规格 req := &modelarts.TrainingJobFlavorsReq{ Platform: m.platform, } - resp, err := m.svcCtx.ModelArtsRpc.GetTrainingJobFlavors(m.ctx, req) + resp, err := m.modelArtsRpc.GetTrainingJobFlavors(ctx, req) if err != nil { return nil, err } @@ -149,32 +150,32 @@ func (m *ModelArtsLink) QuerySpecs() (interface{}, error) { return resp, nil } -func (m *ModelArtsLink) GetResourceStats() (*collector.ResourceStats, error) { +func (m *ModelArtsLink) GetResourceStats(ctx context.Context) (*collector.ResourceStats, error) { return nil, nil } -func (m *ModelArtsLink) GetDatasetsSpecs() ([]*collector.DatasetsSpecs, error) { +func (m *ModelArtsLink) GetDatasetsSpecs(ctx context.Context) ([]*collector.DatasetsSpecs, error) { return nil, nil } -func (m *ModelArtsLink) GetAlgorithms() ([]*collector.Algorithm, error) { +func (m *ModelArtsLink) GetAlgorithms(ctx context.Context) ([]*collector.Algorithm, error) { return nil, nil } -func (m *ModelArtsLink) Execute(option *option.AiOption) (interface{}, error) { - err := m.GenerateSubmitParams(option) +func (m *ModelArtsLink) Execute(ctx context.Context, option *option.AiOption) (interface{}, error) { + err := m.GenerateSubmitParams(ctx, option) if err != nil { return nil, err } - task, err := m.SubmitTask(option.ImageId, option.Cmd, option.Envs, option.Params, option.ResourceId, option.DatasetsId, option.AlgorithmId, option.TaskType) + task, err := m.SubmitTask(ctx, option.ImageId, option.Cmd, option.Envs, option.Params, option.ResourceId, option.DatasetsId, option.AlgorithmId, option.TaskType) if err != nil { return nil, err } return task, nil } -func (m *ModelArtsLink) GenerateSubmitParams(option *option.AiOption) error { - err := m.generateResourceId(option) +func (m *ModelArtsLink) GenerateSubmitParams(ctx context.Context, option *option.AiOption) error { + err := m.generateResourceId(ctx, option) if err != nil { return err } @@ -197,8 +198,8 @@ func (m *ModelArtsLink) GenerateSubmitParams(option *option.AiOption) error { return nil } -func (m *ModelArtsLink) generateResourceId(option *option.AiOption) error { - _, err := m.QuerySpecs() +func (m *ModelArtsLink) generateResourceId(ctx context.Context, option *option.AiOption) error { + _, err := m.QuerySpecs(ctx) if err != nil { return err } diff --git a/api/internal/storeLink/octopus.go b/api/internal/storeLink/octopus.go index b3bd546a..2422ada3 100644 --- a/api/internal/storeLink/octopus.go +++ b/api/internal/storeLink/octopus.go @@ -19,17 +19,16 @@ import ( "errors" "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/schedulers/option" "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/service/collector" - "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc" "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/utils" "gitlink.org.cn/jcce-pcm/pcm-participant-octopus/octopus" + "gitlink.org.cn/jcce-pcm/pcm-participant-octopus/octopusclient" "math" "strconv" "strings" ) type OctopusLink struct { - ctx context.Context - svcCtx *svc.ServiceContext + octopusRpc octopusclient.Octopus pageIndex int32 pageSize int32 platform string @@ -66,11 +65,11 @@ var ( } ) -func NewOctopusLink(ctx context.Context, svcCtx *svc.ServiceContext, name string, id int64) *OctopusLink { - return &OctopusLink{ctx: ctx, svcCtx: svcCtx, platform: name, participantId: id, pageIndex: 1, pageSize: 100} +func NewOctopusLink(octopusRpc octopusclient.Octopus, name string, id int64) *OctopusLink { + return &OctopusLink{octopusRpc: octopusRpc, platform: name, participantId: id, pageIndex: 1, pageSize: 100} } -func (o *OctopusLink) UploadImage(path string) (interface{}, error) { +func (o *OctopusLink) UploadImage(ctx context.Context, path string) (interface{}, error) { // octopus创建镜像 createReq := &octopus.CreateImageReq{ Platform: o.platform, @@ -80,7 +79,7 @@ func (o *OctopusLink) UploadImage(path string) (interface{}, error) { ImageVersion: IMG_VERSION_PREFIX + utils.RandomString(7), }, } - createResp, err := o.svcCtx.OctopusRpc.CreateImage(o.ctx, createReq) + createResp, err := o.octopusRpc.CreateImage(ctx, createReq) if err != nil { return nil, err } @@ -94,7 +93,7 @@ func (o *OctopusLink) UploadImage(path string) (interface{}, error) { FileName: "", }, } - uploadResp, err := o.svcCtx.OctopusRpc.UploadImage(o.ctx, uploadReq) + uploadResp, err := o.octopusRpc.UploadImage(ctx, uploadReq) if err != nil { return nil, err } @@ -104,13 +103,13 @@ func (o *OctopusLink) UploadImage(path string) (interface{}, error) { return uploadResp, nil } -func (o *OctopusLink) DeleteImage(imageId string) (interface{}, error) { +func (o *OctopusLink) DeleteImage(ctx context.Context, imageId string) (interface{}, error) { // octopus删除镜像 req := &octopus.DeleteImageReq{ Platform: o.platform, ImageId: imageId, } - resp, err := o.svcCtx.OctopusRpc.DeleteImage(o.ctx, req) + resp, err := o.octopusRpc.DeleteImage(ctx, req) if err != nil { return nil, err } @@ -118,14 +117,14 @@ func (o *OctopusLink) DeleteImage(imageId string) (interface{}, error) { return resp, nil } -func (o *OctopusLink) QueryImageList() (interface{}, error) { +func (o *OctopusLink) QueryImageList(ctx context.Context) (interface{}, error) { // octopus获取镜像列表 req := &octopus.GetUserImageListReq{ Platform: o.platform, PageIndex: o.pageIndex, PageSize: o.pageSize, } - resp, err := o.svcCtx.OctopusRpc.GetUserImageList(o.ctx, req) + resp, err := o.octopusRpc.GetUserImageList(ctx, req) if err != nil { return nil, err } @@ -133,7 +132,7 @@ func (o *OctopusLink) QueryImageList() (interface{}, error) { return resp, nil } -func (o *OctopusLink) SubmitTask(imageId string, cmd string, envs []string, params []string, resourceId string, datasetsId string, algorithmId string, aiType string) (interface{}, error) { +func (o *OctopusLink) SubmitTask(ctx context.Context, imageId string, cmd string, envs []string, params []string, resourceId string, datasetsId string, algorithmId string, aiType string) (interface{}, error) { // octopus提交任务 // python参数 @@ -176,7 +175,7 @@ func (o *OctopusLink) SubmitTask(imageId string, cmd string, envs []string, para AlgorithmVersion: VERSION, }, } - resp, err := o.svcCtx.OctopusRpc.CreateTrainJob(o.ctx, req) + resp, err := o.octopusRpc.CreateTrainJob(ctx, req) if err != nil { return nil, err } @@ -184,13 +183,13 @@ func (o *OctopusLink) SubmitTask(imageId string, cmd string, envs []string, para return resp, nil } -func (o *OctopusLink) QueryTask(taskId string) (interface{}, error) { +func (o *OctopusLink) QueryTask(ctx context.Context, taskId string) (interface{}, error) { // octopus获取任务 req := &octopus.GetTrainJobReq{ Platform: o.platform, Id: taskId, } - resp, err := o.svcCtx.OctopusRpc.GetTrainJob(o.ctx, req) + resp, err := o.octopusRpc.GetTrainJob(ctx, req) if err != nil { return nil, err } @@ -198,13 +197,13 @@ func (o *OctopusLink) QueryTask(taskId string) (interface{}, error) { return resp, nil } -func (o *OctopusLink) DeleteTask(taskId string) (interface{}, error) { +func (o *OctopusLink) DeleteTask(ctx context.Context, taskId string) (interface{}, error) { // octopus删除任务 req := &octopus.DeleteTrainJobReq{ Platform: o.platform, JobIds: []string{taskId}, } - resp, err := o.svcCtx.OctopusRpc.DeleteTrainJob(o.ctx, req) + resp, err := o.octopusRpc.DeleteTrainJob(ctx, req) if err != nil { return nil, err } @@ -212,13 +211,13 @@ func (o *OctopusLink) DeleteTask(taskId string) (interface{}, error) { return resp, nil } -func (o *OctopusLink) QuerySpecs() (interface{}, error) { +func (o *OctopusLink) QuerySpecs(ctx context.Context) (interface{}, error) { // octopus查询资源规格 req := &octopus.GetResourceSpecsReq{ Platform: o.platform, ResourcePool: RESOURCE_POOL, } - resp, err := o.svcCtx.OctopusRpc.GetResourceSpecs(o.ctx, req) + resp, err := o.octopusRpc.GetResourceSpecs(ctx, req) if err != nil { return nil, err } @@ -226,12 +225,12 @@ func (o *OctopusLink) QuerySpecs() (interface{}, error) { return resp, nil } -func (o *OctopusLink) GetResourceStats() (*collector.ResourceStats, error) { +func (o *OctopusLink) GetResourceStats(ctx context.Context) (*collector.ResourceStats, error) { req := &octopus.GetResourceSpecsReq{ Platform: o.platform, ResourcePool: RESOURCE_POOL, } - specResp, err := o.svcCtx.OctopusRpc.GetResourceSpecs(o.ctx, req) + specResp, err := o.octopusRpc.GetResourceSpecs(ctx, req) if err != nil { return nil, err } @@ -241,7 +240,7 @@ func (o *OctopusLink) GetResourceStats() (*collector.ResourceStats, error) { balanceReq := &octopus.GetUserBalanceReq{ Platform: o.platform, } - balanceResp, err := o.svcCtx.OctopusRpc.GetUserBalance(o.ctx, balanceReq) + balanceResp, err := o.octopusRpc.GetUserBalance(ctx, balanceReq) if err != nil { return nil, err } @@ -294,13 +293,13 @@ func (o *OctopusLink) GetResourceStats() (*collector.ResourceStats, error) { return resourceStats, nil } -func (o *OctopusLink) GetDatasetsSpecs() ([]*collector.DatasetsSpecs, error) { +func (o *OctopusLink) GetDatasetsSpecs(ctx context.Context) ([]*collector.DatasetsSpecs, error) { req := &octopus.GetMyDatasetListReq{ Platform: o.platform, PageIndex: o.pageIndex, PageSize: o.pageSize, } - resp, err := o.svcCtx.OctopusRpc.GetMyDatasetList(o.ctx, req) + resp, err := o.octopusRpc.GetMyDatasetList(ctx, req) if err != nil { return nil, err } @@ -315,7 +314,7 @@ func (o *OctopusLink) GetDatasetsSpecs() ([]*collector.DatasetsSpecs, error) { return specs, nil } -func (o *OctopusLink) GetAlgorithms() ([]*collector.Algorithm, error) { +func (o *OctopusLink) GetAlgorithms(ctx context.Context) ([]*collector.Algorithm, error) { var algorithms []*collector.Algorithm req := &octopus.GetMyAlgorithmListReq{ @@ -323,7 +322,7 @@ func (o *OctopusLink) GetAlgorithms() ([]*collector.Algorithm, error) { PageIndex: o.pageIndex, PageSize: o.pageSize, } - resp, err := o.svcCtx.OctopusRpc.GetMyAlgorithmList(o.ctx, req) + resp, err := o.octopusRpc.GetMyAlgorithmList(ctx, req) if err != nil { return nil, err } @@ -338,32 +337,32 @@ func (o *OctopusLink) GetAlgorithms() ([]*collector.Algorithm, error) { return algorithms, nil } -func (o *OctopusLink) Execute(option *option.AiOption) (interface{}, error) { - err := o.GenerateSubmitParams(option) +func (o *OctopusLink) Execute(ctx context.Context, option *option.AiOption) (interface{}, error) { + err := o.GenerateSubmitParams(ctx, option) if err != nil { return nil, err } - task, err := o.SubmitTask(option.ImageId, option.Cmd, option.Envs, option.Params, option.ResourceId, option.DatasetsId, option.AlgorithmId, option.TaskType) + task, err := o.SubmitTask(ctx, option.ImageId, option.Cmd, option.Envs, option.Params, option.ResourceId, option.DatasetsId, option.AlgorithmId, option.TaskType) if err != nil { return nil, err } return task, nil } -func (o *OctopusLink) GenerateSubmitParams(option *option.AiOption) error { - err := o.generateResourceId(option) +func (o *OctopusLink) GenerateSubmitParams(ctx context.Context, option *option.AiOption) error { + err := o.generateResourceId(ctx, option) if err != nil { return err } - err = o.generateDatasetsId(option) + err = o.generateDatasetsId(ctx, option) if err != nil { return err } - err = o.generateImageId(option) + err = o.generateImageId(ctx, option) if err != nil { return err } - err = o.generateAlgorithmId(option) + err = o.generateAlgorithmId(ctx, option) if err != nil { return err } @@ -382,7 +381,7 @@ func (o *OctopusLink) GenerateSubmitParams(option *option.AiOption) error { return nil } -func (o *OctopusLink) generateResourceId(option *option.AiOption) error { +func (o *OctopusLink) generateResourceId(ctx context.Context, option *option.AiOption) error { if option.ResourceType == "" { return errors.New("ResourceType not set") } @@ -390,7 +389,7 @@ func (o *OctopusLink) generateResourceId(option *option.AiOption) error { Platform: o.platform, ResourcePool: RESOURCE_POOL, } - specResp, err := o.svcCtx.OctopusRpc.GetResourceSpecs(o.ctx, req) + specResp, err := o.octopusRpc.GetResourceSpecs(ctx, req) if err != nil { return err } @@ -418,7 +417,7 @@ func (o *OctopusLink) generateResourceId(option *option.AiOption) error { return errors.New("failed to get ResourceId") } -func (o *OctopusLink) generateDatasetsId(option *option.AiOption) error { +func (o *OctopusLink) generateDatasetsId(ctx context.Context, option *option.AiOption) error { if option.DatasetsName == "" { return errors.New("DatasetsName not set") } @@ -427,7 +426,7 @@ func (o *OctopusLink) generateDatasetsId(option *option.AiOption) error { PageIndex: o.pageIndex, PageSize: o.pageSize, } - resp, err := o.svcCtx.OctopusRpc.GetMyDatasetList(o.ctx, req) + resp, err := o.octopusRpc.GetMyDatasetList(ctx, req) if err != nil { return err } @@ -443,7 +442,7 @@ func (o *OctopusLink) generateDatasetsId(option *option.AiOption) error { return errors.New("failed to get DatasetsId") } -func (o *OctopusLink) generateImageId(option *option.AiOption) error { +func (o *OctopusLink) generateImageId(ctx context.Context, option *option.AiOption) error { if option.TaskType == "" { return errors.New("TaskType not set") } @@ -453,7 +452,7 @@ func (o *OctopusLink) generateImageId(option *option.AiOption) error { PageIndex: o.pageIndex, PageSize: o.pageSize, } - resp, err := o.svcCtx.OctopusRpc.GetUserImageList(o.ctx, req) + resp, err := o.octopusRpc.GetUserImageList(ctx, req) if err != nil { return err } @@ -475,7 +474,7 @@ func (o *OctopusLink) generateImageId(option *option.AiOption) error { PageIndex: o.pageIndex, PageSize: o.pageSize, } - preImgResp, err := o.svcCtx.OctopusRpc.GetPresetImageList(o.ctx, preImgReq) + preImgResp, err := o.octopusRpc.GetPresetImageList(ctx, preImgReq) if err != nil { return err } @@ -495,7 +494,7 @@ func (o *OctopusLink) generateImageId(option *option.AiOption) error { return errors.New("failed to get ImageId") } -func (o *OctopusLink) generateAlgorithmId(option *option.AiOption) error { +func (o *OctopusLink) generateAlgorithmId(ctx context.Context, option *option.AiOption) error { // temporarily set algorithm to cnn if option.AlgorithmName == "" { switch option.DatasetsName { @@ -511,7 +510,7 @@ func (o *OctopusLink) generateAlgorithmId(option *option.AiOption) error { PageIndex: o.pageIndex, PageSize: o.pageSize, } - resp, err := o.svcCtx.OctopusRpc.GetMyAlgorithmList(o.ctx, req) + resp, err := o.octopusRpc.GetMyAlgorithmList(ctx, req) if err != nil { return err } diff --git a/api/internal/storeLink/shuguangHpc.go b/api/internal/storeLink/shuguangHpc.go index b4a0768d..d5ffc8ec 100644 --- a/api/internal/storeLink/shuguangHpc.go +++ b/api/internal/storeLink/shuguangHpc.go @@ -4,17 +4,16 @@ import ( "context" "errors" "fmt" - "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc" "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types" "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/utils" "gitlink.org.cn/jcce-pcm/pcm-ac/hpcAC" + "gitlink.org.cn/jcce-pcm/pcm-ac/hpcacclient" "strconv" "strings" ) type ShuguangHpc struct { - ctx context.Context - svcCtx *svc.ServiceContext + aCRpc hpcacclient.HpcAC platform string participantId int64 } @@ -128,23 +127,23 @@ type ResourceSpecHpc struct { GAP_NDCU string } -func NewShuguangHpc(ctx context.Context, svcCtx *svc.ServiceContext, name string, id int64) *ShuguangHpc { - return &ShuguangHpc{ctx: ctx, svcCtx: svcCtx, platform: name, participantId: id} +func NewShuguangHpc(aCRpc hpcacclient.HpcAC, name string, id int64) *ShuguangHpc { + return &ShuguangHpc{aCRpc: aCRpc, platform: name, participantId: id} } -func (s ShuguangHpc) UploadImage(path string) (interface{}, error) { +func (s ShuguangHpc) UploadImage(ctx context.Context, path string) (interface{}, error) { return nil, nil } -func (s ShuguangHpc) DeleteImage(imageId string) (interface{}, error) { +func (s ShuguangHpc) DeleteImage(ctx context.Context, imageId string) (interface{}, error) { return nil, nil } -func (s ShuguangHpc) QueryImageList() (interface{}, error) { +func (s ShuguangHpc) QueryImageList(ctx context.Context) (interface{}, error) { return nil, nil } -func (s ShuguangHpc) SubmitTask(imageId string, cmd string, envs []string, params []string, resourceId string, datasetsId string, algorithmId string, aiType string) (interface{}, error) { +func (s ShuguangHpc) SubmitTask(ctx context.Context, imageId string, cmd string, envs []string, params []string, resourceId string, datasetsId string, algorithmId string, aiType string) (interface{}, error) { // shuguangHpc提交任务 //判断是否resourceId匹配自定义资源Id @@ -194,7 +193,7 @@ func (s ShuguangHpc) SubmitTask(imageId string, cmd string, envs []string, param updateSGHpcRequestByResourceId(resourceId, req) - resp, err := s.svcCtx.ACRpc.SubmitJob(s.ctx, req) + resp, err := s.aCRpc.SubmitJob(ctx, req) if err != nil { return nil, err } @@ -203,12 +202,12 @@ func (s ShuguangHpc) SubmitTask(imageId string, cmd string, envs []string, param } -func (s ShuguangHpc) QueryTask(taskId string) (interface{}, error) { +func (s ShuguangHpc) QueryTask(ctx context.Context, taskId string) (interface{}, error) { //实时作业 reqC := &hpcAC.JobDetailReq{ JobId: taskId, } - respC, err := s.svcCtx.ACRpc.GetJobDetail(s.ctx, reqC) + respC, err := s.aCRpc.GetJobDetail(ctx, reqC) if err != nil { return nil, err } @@ -223,7 +222,7 @@ func (s ShuguangHpc) QueryTask(taskId string) (interface{}, error) { JobmanagerId: strconv.Itoa(StrJobManagerID), } - respH, err := s.svcCtx.ACRpc.HistoryJobDetail(s.ctx, reqH) + respH, err := s.aCRpc.HistoryJobDetail(ctx, reqH) if err != nil { return nil, err } @@ -232,7 +231,7 @@ func (s ShuguangHpc) QueryTask(taskId string) (interface{}, error) { } } -func (s ShuguangHpc) QuerySpecs() (interface{}, error) { +func (s ShuguangHpc) QuerySpecs(ctx context.Context) (interface{}, error) { resp := &types.GetResourceSpecsResp{} for k, v := range RESOURCESPECSHPC { @@ -248,12 +247,12 @@ func (s ShuguangHpc) QuerySpecs() (interface{}, error) { return resp, nil } -func (s ShuguangHpc) DeleteTask(taskId string) (interface{}, error) { +func (s ShuguangHpc) DeleteTask(ctx context.Context, taskId string) (interface{}, error) { strJobInfoMap := fmt.Sprintf(StrJobInfoMap, StrJobManagerID, Username, taskId) req := &hpcAC.DeleteJobReq{ StrJobInfoMap: strJobInfoMap, } - resp, err := s.svcCtx.ACRpc.DeleteJob(s.ctx, req) + resp, err := s.aCRpc.DeleteJob(ctx, req) if err != nil { return nil, err } diff --git a/api/internal/storeLink/shuguangai.go b/api/internal/storeLink/shuguangai.go index 654e12b4..e7da3222 100644 --- a/api/internal/storeLink/shuguangai.go +++ b/api/internal/storeLink/shuguangai.go @@ -20,10 +20,10 @@ import ( "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/common" "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/schedulers/option" "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/service/collector" - "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc" "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types" "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/utils" "gitlink.org.cn/jcce-pcm/pcm-ac/hpcAC" + "gitlink.org.cn/jcce-pcm/pcm-ac/hpcacclient" "strconv" "strings" ) @@ -91,31 +91,30 @@ type ResourceSpecSGAI struct { } type ShuguangAi struct { - ctx context.Context - svcCtx *svc.ServiceContext + aCRpc hpcacclient.HpcAC platform string participantId int64 } -func NewShuguangAi(ctx context.Context, svcCtx *svc.ServiceContext, name string, id int64) *ShuguangAi { - return &ShuguangAi{ctx: ctx, svcCtx: svcCtx, platform: name, participantId: id} +func NewShuguangAi(aCRpc hpcacclient.HpcAC, name string, id int64) *ShuguangAi { + return &ShuguangAi{aCRpc: aCRpc, platform: name, participantId: id} } -func (s *ShuguangAi) UploadImage(path string) (interface{}, error) { +func (s *ShuguangAi) UploadImage(ctx context.Context, path string) (interface{}, error) { return nil, nil } -func (s *ShuguangAi) DeleteImage(imageId string) (interface{}, error) { +func (s *ShuguangAi) DeleteImage(ctx context.Context, imageId string) (interface{}, error) { return nil, nil } -func (s *ShuguangAi) QueryImageList() (interface{}, error) { +func (s *ShuguangAi) QueryImageList(ctx context.Context) (interface{}, error) { // shuguangAi获取镜像列表 req := &hpcAC.GetImageListAiReq{ AcceleratorType: DCU, TaskType: PYTORCH, } - resp, err := s.svcCtx.ACRpc.GetImageListAi(s.ctx, req) + resp, err := s.aCRpc.GetImageListAi(ctx, req) if err != nil { return nil, err } @@ -123,7 +122,7 @@ func (s *ShuguangAi) QueryImageList() (interface{}, error) { return resp, nil } -func (s *ShuguangAi) SubmitPytorchTask(imageId string, cmd string, envs []string, params []string, resourceId string, datasetsId string, algorithmId string) (interface{}, error) { +func (s *ShuguangAi) SubmitPytorchTask(ctx context.Context, imageId string, cmd string, envs []string, params []string, resourceId string, datasetsId string, algorithmId string) (interface{}, error) { //判断是否resourceId匹配自定义资源Id _, isMapContainsKey := RESOURCESPECSAI[resourceId] if !isMapContainsKey { @@ -132,7 +131,7 @@ func (s *ShuguangAi) SubmitPytorchTask(imageId string, cmd string, envs []string //根据imageId获取imagePath, version imageReq := &hpcAC.GetImageAiByIdReq{ImageId: imageId} - imageResp, err := s.svcCtx.ACRpc.GetImageAiById(s.ctx, imageReq) + imageResp, err := s.aCRpc.GetImageAiById(ctx, imageReq) if err != nil { return nil, err } @@ -176,7 +175,7 @@ func (s *ShuguangAi) SubmitPytorchTask(imageId string, cmd string, envs []string updateSGAIRequestByResourceId(resourceId, req) - resp, err := s.svcCtx.ACRpc.SubmitPytorchTask(s.ctx, req) + resp, err := s.aCRpc.SubmitPytorchTask(ctx, req) if err != nil { return nil, err } @@ -191,7 +190,7 @@ func updateSGAIRequestByResourceId(resourceId string, req *hpcAC.SubmitPytorchTa req.Params.WorkerRamSize = spec.RAM } -func (s *ShuguangAi) SubmitTensorflowTask(imageId string, cmd string, envs []string, params []string, resourceId string, datasetsId string, algorithmId string) (interface{}, error) { +func (s *ShuguangAi) SubmitTensorflowTask(ctx context.Context, imageId string, cmd string, envs []string, params []string, resourceId string, datasetsId string, algorithmId string) (interface{}, error) { //req := &hpcAC.SubmitTensorflowTaskReq{ // Params: &hpcAC.SubmitTensorflowTaskParams{ // @@ -200,7 +199,7 @@ func (s *ShuguangAi) SubmitTensorflowTask(imageId string, cmd string, envs []str return nil, nil } -func (s *ShuguangAi) SubmitTask(imageId string, cmd string, envs []string, params []string, resourceId string, datasetsId string, algorithmId string, aiType string) (interface{}, error) { +func (s *ShuguangAi) SubmitTask(ctx context.Context, imageId string, cmd string, envs []string, params []string, resourceId string, datasetsId string, algorithmId string, aiType string) (interface{}, error) { // set algorithmId temporarily for storelink submit if algorithmId == "" { algorithmId = "pytorch-mnist-fcn" @@ -209,13 +208,13 @@ func (s *ShuguangAi) SubmitTask(imageId string, cmd string, envs []string, param // shuguangAi提交任务 switch aiType { case PYTORCH_TASK: - task, err := s.SubmitPytorchTask(imageId, cmd, envs, params, resourceId, datasetsId, algorithmId) + task, err := s.SubmitPytorchTask(ctx, imageId, cmd, envs, params, resourceId, datasetsId, algorithmId) if err != nil { return nil, err } return task, nil case TENSORFLOW_TASK: - task, err := s.SubmitTensorflowTask(imageId, cmd, envs, params, resourceId, datasetsId, algorithmId) + task, err := s.SubmitTensorflowTask(ctx, imageId, cmd, envs, params, resourceId, datasetsId, algorithmId) if err != nil { return nil, err } @@ -224,12 +223,12 @@ func (s *ShuguangAi) SubmitTask(imageId string, cmd string, envs []string, param return nil, errors.New("shuguangAi不支持的任务类型") } -func (s *ShuguangAi) QueryTask(taskId string) (interface{}, error) { +func (s *ShuguangAi) QueryTask(ctx context.Context, taskId string) (interface{}, error) { // shuguangAi获取任务 req := &hpcAC.GetPytorchTaskReq{ Id: taskId, } - resp, err := s.svcCtx.ACRpc.GetPytorchTask(s.ctx, req) + resp, err := s.aCRpc.GetPytorchTask(ctx, req) if err != nil { return nil, err } @@ -237,12 +236,12 @@ func (s *ShuguangAi) QueryTask(taskId string) (interface{}, error) { return resp, nil } -func (s *ShuguangAi) DeleteTask(taskId string) (interface{}, error) { +func (s *ShuguangAi) DeleteTask(ctx context.Context, taskId string) (interface{}, error) { // shuguangAi删除任务 req := &hpcAC.DeleteTaskAiReq{ Ids: taskId, } - resp, err := s.svcCtx.ACRpc.DeleteTaskAi(s.ctx, req) + resp, err := s.aCRpc.DeleteTaskAi(ctx, req) if err != nil { return nil, err } @@ -250,7 +249,7 @@ func (s *ShuguangAi) DeleteTask(taskId string) (interface{}, error) { return resp, nil } -func (s *ShuguangAi) QuerySpecs() (interface{}, error) { +func (s *ShuguangAi) QuerySpecs(ctx context.Context) (interface{}, error) { resp := &types.GetResourceSpecsResp{} for k, v := range RESOURCESPECSAI { @@ -266,10 +265,10 @@ func (s *ShuguangAi) QuerySpecs() (interface{}, error) { return resp, nil } -func (s *ShuguangAi) GetResourceStats() (*collector.ResourceStats, error) { +func (s *ShuguangAi) GetResourceStats(ctx context.Context) (*collector.ResourceStats, error) { //balance userReq := &hpcAC.GetUserInfoReq{} - userinfo, err := s.svcCtx.ACRpc.GetUserInfo(s.ctx, userReq) + userinfo, err := s.aCRpc.GetUserInfo(ctx, userReq) if err != nil { return nil, err } @@ -277,7 +276,7 @@ func (s *ShuguangAi) GetResourceStats() (*collector.ResourceStats, error) { //resource limit limitReq := &hpcAC.QueueReq{} - limitResp, err := s.svcCtx.ACRpc.QueryUserQuotasLimit(s.ctx, limitReq) + limitResp, err := s.aCRpc.QueryUserQuotasLimit(ctx, limitReq) if err != nil { return nil, err } @@ -286,7 +285,7 @@ func (s *ShuguangAi) GetResourceStats() (*collector.ResourceStats, error) { //disk diskReq := &hpcAC.ParaStorQuotaReq{} - diskResp, err := s.svcCtx.ACRpc.ParaStorQuota(s.ctx, diskReq) + diskResp, err := s.aCRpc.ParaStorQuota(ctx, diskReq) if err != nil { return nil, err } @@ -295,14 +294,14 @@ func (s *ShuguangAi) GetResourceStats() (*collector.ResourceStats, error) { availDisk := common.RoundFloat((diskResp.Data[0].Threshold-diskResp.Data[0].Usage)*KB*KB*KB, 3) //memory - nodeResp, err := s.svcCtx.ACRpc.GetNodeResources(s.ctx, nil) + nodeResp, err := s.aCRpc.GetNodeResources(ctx, nil) if err != nil { return nil, err } memSize := common.RoundFloat(float64(nodeResp.Data.MemorySize)*KB*KB, 3) // MB to BYTES //resources being occupied - memberJobResp, err := s.svcCtx.ACRpc.GetMemberJobs(s.ctx, nil) + memberJobResp, err := s.aCRpc.GetMemberJobs(ctx, nil) if err != nil { return nil, err } @@ -361,9 +360,9 @@ func (s *ShuguangAi) GetResourceStats() (*collector.ResourceStats, error) { return resourceStats, nil } -func (s *ShuguangAi) GetDatasetsSpecs() ([]*collector.DatasetsSpecs, error) { +func (s *ShuguangAi) GetDatasetsSpecs(ctx context.Context) ([]*collector.DatasetsSpecs, error) { req := &hpcAC.GetFileListReq{Limit: 100, Path: DATASETS_DIR, Start: 0} - list, err := s.svcCtx.ACRpc.GetFileList(s.ctx, req) + list, err := s.aCRpc.GetFileList(ctx, req) if err != nil { return nil, err } @@ -378,12 +377,12 @@ func (s *ShuguangAi) GetDatasetsSpecs() ([]*collector.DatasetsSpecs, error) { return specs, nil } -func (s *ShuguangAi) GetAlgorithms() ([]*collector.Algorithm, error) { +func (s *ShuguangAi) GetAlgorithms(ctx context.Context) ([]*collector.Algorithm, error) { var algorithms []*collector.Algorithm for _, t := range GetTaskTypes() { taskType := t req := &hpcAC.GetFileListReq{Limit: 100, Path: ALGORITHM_DIR + FORWARD_SLASH + taskType, Start: 0} - list, err := s.svcCtx.ACRpc.GetFileList(s.ctx, req) + list, err := s.aCRpc.GetFileList(ctx, req) if err != nil { return nil, err } @@ -398,28 +397,28 @@ func (s *ShuguangAi) GetAlgorithms() ([]*collector.Algorithm, error) { return algorithms, nil } -func (s *ShuguangAi) Execute(option *option.AiOption) (interface{}, error) { - err := s.GenerateSubmitParams(option) +func (s *ShuguangAi) Execute(ctx context.Context, option *option.AiOption) (interface{}, error) { + err := s.GenerateSubmitParams(ctx, option) if err != nil { return nil, err } - task, err := s.SubmitTask(option.ImageId, option.Cmd, option.Envs, option.Params, option.ResourceId, option.DatasetsId, option.AlgorithmId, option.TaskType) + task, err := s.SubmitTask(ctx, option.ImageId, option.Cmd, option.Envs, option.Params, option.ResourceId, option.DatasetsId, option.AlgorithmId, option.TaskType) if err != nil { return nil, err } return task, nil } -func (s *ShuguangAi) GenerateSubmitParams(option *option.AiOption) error { +func (s *ShuguangAi) GenerateSubmitParams(ctx context.Context, option *option.AiOption) error { err := s.generateResourceId(option) if err != nil { return err } - err = s.generateImageId(option) + err = s.generateImageId(ctx, option) if err != nil { return err } - err = s.generateAlgorithmId(option) + err = s.generateAlgorithmId(ctx, option) if err != nil { return err } @@ -473,7 +472,7 @@ func (s *ShuguangAi) generateResourceId(option *option.AiOption) error { return errors.New("failed to get ResourceId") } -func (s *ShuguangAi) generateImageId(option *option.AiOption) error { +func (s *ShuguangAi) generateImageId(ctx context.Context, option *option.AiOption) error { if option.TaskType == "" { return errors.New("TaskType not set") } @@ -482,7 +481,7 @@ func (s *ShuguangAi) generateImageId(option *option.AiOption) error { AcceleratorType: DCU, TaskType: taskType, } - resp, err := s.svcCtx.ACRpc.GetImageListAi(s.ctx, req) + resp, err := s.aCRpc.GetImageListAi(ctx, req) if err != nil { return err } @@ -502,13 +501,13 @@ func (s *ShuguangAi) generateImageId(option *option.AiOption) error { return errors.New("failed to get ImageId") } -func (s *ShuguangAi) generateAlgorithmId(option *option.AiOption) error { +func (s *ShuguangAi) generateAlgorithmId(ctx context.Context, option *option.AiOption) error { if option.DatasetsName == "" { return errors.New("DatasetsName not set") } req := &hpcAC.GetFileListReq{Limit: 100, Path: ALGORITHM_DIR + FORWARD_SLASH + option.TaskType, Start: 0} - list, err := s.svcCtx.ACRpc.GetFileList(s.ctx, req) + list, err := s.aCRpc.GetFileList(ctx, req) if err != nil { return err } diff --git a/api/internal/storeLink/storeLink.go b/api/internal/storeLink/storeLink.go index a0bb1919..62a1079d 100644 --- a/api/internal/storeLink/storeLink.go +++ b/api/internal/storeLink/storeLink.go @@ -32,13 +32,13 @@ import ( ) type Linkage interface { - UploadImage(path string) (interface{}, error) - DeleteImage(imageId string) (interface{}, error) - QueryImageList() (interface{}, error) - SubmitTask(imageId string, cmd string, envs []string, params []string, resourceId string, datasetsId string, algorithmId string, aiType string) (interface{}, error) - QueryTask(taskId string) (interface{}, error) - QuerySpecs() (interface{}, error) - DeleteTask(taskId string) (interface{}, error) + UploadImage(ctx context.Context, path string) (interface{}, error) + DeleteImage(ctx context.Context, imageId string) (interface{}, error) + QueryImageList(ctx context.Context) (interface{}, error) + SubmitTask(ctx context.Context, imageId string, cmd string, envs []string, params []string, resourceId string, datasetsId string, algorithmId string, aiType string) (interface{}, error) + QueryTask(ctx context.Context, taskId string) (interface{}, error) + QuerySpecs(ctx context.Context) (interface{}, error) + DeleteTask(ctx context.Context, taskId string) (interface{}, error) } const ( @@ -89,19 +89,19 @@ type StoreLink struct { ILinkage Linkage } -func NewStoreLink(ctx context.Context, svcCtx *svc.ServiceContext, participant *models.StorelinkCenter) *StoreLink { +func NewStoreLink(svcCtx *svc.ServiceContext, participant *models.StorelinkCenter) *StoreLink { switch participant.Type { case TYPE_OCTOPUS: - linkStruct := NewOctopusLink(ctx, svcCtx, participant.Name, participant.Id) + linkStruct := NewOctopusLink(svcCtx.OctopusRpc, participant.Name, participant.Id) return &StoreLink{ILinkage: linkStruct} case TYPE_MODELARTS: - linkStruct := NewModelArtsLink(ctx, svcCtx, participant.Name, participant.Id) + linkStruct := NewModelArtsLink(svcCtx.ModelArtsRpc, svcCtx.ModelArtsImgRpc, participant.Name, participant.Id) return &StoreLink{ILinkage: linkStruct} case TYPE_SHUGUANGAI: - linkStruct := NewShuguangAi(ctx, svcCtx, participant.Name, participant.Id) + linkStruct := NewShuguangAi(svcCtx.ACRpc, participant.Name, participant.Id) return &StoreLink{ILinkage: linkStruct} case TYPE_SHUGUANGHPC: - linkStruct := NewShuguangHpc(ctx, svcCtx, participant.Name, participant.Id) + linkStruct := NewShuguangHpc(svcCtx.ACRpc, participant.Name, participant.Id) return &StoreLink{ILinkage: linkStruct} default: return nil @@ -124,7 +124,7 @@ func GetResourceTypes() []string { return resourceTypes } -func GetDatasetsNames(collectorMap *map[string]collector.AiCollector) ([]string, error) { +func GetDatasetsNames(ctx context.Context, collectorMap *map[string]collector.AiCollector) ([]string, error) { var wg sync.WaitGroup var errCh = make(chan error, len(*collectorMap)) var errs []error @@ -136,7 +136,7 @@ func GetDatasetsNames(collectorMap *map[string]collector.AiCollector) ([]string, c := col go func() { var ns []string - specs, err := c.GetDatasetsSpecs() + specs, err := c.GetDatasetsSpecs(ctx) if err != nil { errCh <- err wg.Done() @@ -176,7 +176,7 @@ func GetDatasetsNames(collectorMap *map[string]collector.AiCollector) ([]string, return names, nil } -func GetAlgorithms(collectorMap *map[string]collector.AiCollector, resourceType string, taskType string, dataset string) ([]string, error) { +func GetAlgorithms(ctx context.Context, collectorMap *map[string]collector.AiCollector, resourceType string, taskType string, dataset string) ([]string, error) { var names []string var wg sync.WaitGroup var errCh = make(chan error, len(*collectorMap)) @@ -189,7 +189,7 @@ func GetAlgorithms(collectorMap *map[string]collector.AiCollector, resourceType c := col go func() { var ns []string - algorithms, err := c.GetAlgorithms() + algorithms, err := c.GetAlgorithms(ctx) if err != nil { errCh <- err wg.Done() diff --git a/api/internal/svc/servicecontext.go b/api/internal/svc/servicecontext.go index 5caebbf0..95dc0a46 100644 --- a/api/internal/svc/servicecontext.go +++ b/api/internal/svc/servicecontext.go @@ -22,6 +22,7 @@ import ( "github.com/zeromicro/go-zero/core/logx" "github.com/zeromicro/go-zero/zrpc" "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/config" + "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler" "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/tracker" "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/utils" "gitlink.org.cn/JointCloud/pcm-coordinator/rpc/client/participantservice" @@ -58,6 +59,7 @@ type ServiceContext struct { PromClient tracker.Prometheus AlertClient *alert.AlertmanagerAPI HttpClient *resty.Client + Scheduler *scheduler.Scheduler } func NewServiceContext(c config.Config) *ServiceContext { @@ -110,16 +112,26 @@ func NewServiceContext(c config.Config) *ServiceContext { Addr: c.Redis.Host, Password: c.Redis.Pass, }) + + // scheduler + octopusRpc := octopusclient.NewOctopus(zrpc.MustNewClient(c.OctopusRpcConf)) + aCRpc := hpcacclient.NewHpcAC(zrpc.MustNewClient(c.ACRpcConf)) + modelArtsRpc := modelartsservice.NewModelArtsService(zrpc.MustNewClient(c.ModelArtsRpcConf)) + modelArtsImgRpc := imagesservice.NewImagesService(zrpc.MustNewClient(c.ModelArtsImgRpcConf)) + //aiExecutor, resourceCollector := service2.InitAiClusterMap(octopusRpc, modelArtsRpc, modelArtsImgRpc, aCRpc) + //storage := &database.AiStorage{DbEngin: dbEngin} + scheduler := scheduler.NewSchdlr(nil, nil, nil) + return &ServiceContext{ Cron: cron.New(cron.WithSeconds()), DbEngin: dbEngin, Config: c, RedisClient: redisClient, - ModelArtsRpc: modelartsservice.NewModelArtsService(zrpc.MustNewClient(c.ModelArtsRpcConf)), - ModelArtsImgRpc: imagesservice.NewImagesService(zrpc.MustNewClient(c.ModelArtsImgRpcConf)), + ModelArtsRpc: modelArtsRpc, + ModelArtsImgRpc: modelArtsImgRpc, CephRpc: cephclient.NewCeph(zrpc.MustNewClient(c.CephRpcConf)), - ACRpc: hpcacclient.NewHpcAC(zrpc.MustNewClient(c.ACRpcConf)), - OctopusRpc: octopusclient.NewOctopus(zrpc.MustNewClient(c.OctopusRpcConf)), + ACRpc: aCRpc, + OctopusRpc: octopusRpc, OpenstackRpc: openstackclient.NewOpenstack(zrpc.MustNewClient(c.OpenstackRpcConf)), K8sRpc: kubernetesclient.NewKubernetes(zrpc.MustNewClient(c.K8sNativeConf)), MonitorClient: make(map[int64]tracker.Prometheus), @@ -127,5 +139,6 @@ func NewServiceContext(c config.Config) *ServiceContext { PromClient: promClient, AlertClient: alertClient, HttpClient: httpClient, + Scheduler: scheduler, } } From 6b2ae7c0f516256d8173fe12cec4acf280eb1374 Mon Sep 17 00:00:00 2001 From: tzwang Date: Sat, 30 Mar 2024 16:44:10 +0800 Subject: [PATCH 03/16] fix cycle import err Former-commit-id: 7b1ada2f3eeaf0d03d67d2b534de2eda17e840ff --- .../logic/schedule/schedulesubmitlogic.go | 27 ++++++++++++++++-- .../logic/storelink/deletelinkimagelogic.go | 2 +- .../logic/storelink/deletelinktasklogic.go | 2 +- .../logic/storelink/getaispecslogic.go | 2 +- .../logic/storelink/getlinkimagelistlogic.go | 2 +- .../logic/storelink/getlinktasklogic.go | 2 +- .../logic/storelink/submitlinktasklogic.go | 2 +- .../logic/storelink/uploadlinkimagelogic.go | 2 +- api/internal/scheduler/database/aiStorage.go | 28 ++++++++++++++++++- api/internal/scheduler/scheduler.go | 6 ++-- .../scheduler/schedulers/aiScheduler.go | 1 + api/internal/storeLink/storeLink.go | 15 ++++++---- api/internal/svc/servicecontext.go | 8 ++++-- 13 files changed, 77 insertions(+), 22 deletions(-) diff --git a/api/internal/logic/schedule/schedulesubmitlogic.go b/api/internal/logic/schedule/schedulesubmitlogic.go index fad58867..fc7469f9 100644 --- a/api/internal/logic/schedule/schedulesubmitlogic.go +++ b/api/internal/logic/schedule/schedulesubmitlogic.go @@ -2,6 +2,8 @@ package schedule import ( "context" + "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/schedulers" + "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/schedulers/option" "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc" "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types" @@ -24,7 +26,28 @@ func NewScheduleSubmitLogic(ctx context.Context, svcCtx *svc.ServiceContext) *Sc } func (l *ScheduleSubmitLogic) ScheduleSubmit(req *types.ScheduleReq) (resp *types.ScheduleResp, err error) { - // todo: add your logic here and delete this line + resp = &types.ScheduleResp{} + opt := &option.AiOption{ + ResourceType: req.AiOption.ResourceType, + Tops: 0, + TaskType: req.AiOption.TaskType, + DatasetsName: req.AiOption.Datasets, + AlgorithmName: "cnn", + StrategyName: req.AiOption.Strategy, + ClusterToStaticWeight: nil, + Params: []string{ + "epoch,1", + }, + } + aiSchdl, err := schedulers.NewAiScheduler(l.ctx, "", l.svcCtx.Scheduler, opt) + if err != nil { + return nil, err + } - return + err = l.svcCtx.Scheduler.AssignAndSchedule(aiSchdl) + if err != nil { + return nil, err + } + + return resp, nil } diff --git a/api/internal/logic/storelink/deletelinkimagelogic.go b/api/internal/logic/storelink/deletelinkimagelogic.go index 25eb2f4d..6b2f91e5 100644 --- a/api/internal/logic/storelink/deletelinkimagelogic.go +++ b/api/internal/logic/storelink/deletelinkimagelogic.go @@ -47,7 +47,7 @@ func (l *DeleteLinkImageLogic) DeleteLinkImage(req *types.DeleteLinkImageReq) (r return resp, nil } - storelink := storeLink.NewStoreLink(l.svcCtx, participant) + storelink := storeLink.NewStoreLink(l.svcCtx.OctopusRpc, l.svcCtx.ModelArtsRpc, l.svcCtx.ModelArtsImgRpc, l.svcCtx.ACRpc, participant) if storelink == nil { return nil, nil } diff --git a/api/internal/logic/storelink/deletelinktasklogic.go b/api/internal/logic/storelink/deletelinktasklogic.go index 8f377f62..1321f983 100644 --- a/api/internal/logic/storelink/deletelinktasklogic.go +++ b/api/internal/logic/storelink/deletelinktasklogic.go @@ -47,7 +47,7 @@ func (l *DeleteLinkTaskLogic) DeleteLinkTask(req *types.DeleteLinkTaskReq) (resp return resp, nil } - storelink := storeLink.NewStoreLink(l.svcCtx, participant) + storelink := storeLink.NewStoreLink(l.svcCtx.OctopusRpc, l.svcCtx.ModelArtsRpc, l.svcCtx.ModelArtsImgRpc, l.svcCtx.ACRpc, participant) if storelink == nil { return nil, nil } diff --git a/api/internal/logic/storelink/getaispecslogic.go b/api/internal/logic/storelink/getaispecslogic.go index 62dc6a06..2ba8ee38 100644 --- a/api/internal/logic/storelink/getaispecslogic.go +++ b/api/internal/logic/storelink/getaispecslogic.go @@ -47,7 +47,7 @@ func (l *GetAISpecsLogic) GetAISpecs(req *types.GetResourceSpecsReq) (resp *type return resp, nil } - storelink := storeLink.NewStoreLink(l.svcCtx, participant) + storelink := storeLink.NewStoreLink(l.svcCtx.OctopusRpc, l.svcCtx.ModelArtsRpc, l.svcCtx.ModelArtsImgRpc, l.svcCtx.ACRpc, participant) if storelink == nil { return nil, nil } diff --git a/api/internal/logic/storelink/getlinkimagelistlogic.go b/api/internal/logic/storelink/getlinkimagelistlogic.go index 48e37d7b..9e811da8 100644 --- a/api/internal/logic/storelink/getlinkimagelistlogic.go +++ b/api/internal/logic/storelink/getlinkimagelistlogic.go @@ -47,7 +47,7 @@ func (l *GetLinkImageListLogic) GetLinkImageList(req *types.GetLinkImageListReq) return resp, nil } - storelink := storeLink.NewStoreLink(l.svcCtx, participant) + storelink := storeLink.NewStoreLink(l.svcCtx.OctopusRpc, l.svcCtx.ModelArtsRpc, l.svcCtx.ModelArtsImgRpc, l.svcCtx.ACRpc, participant) if storelink == nil { return nil, nil } diff --git a/api/internal/logic/storelink/getlinktasklogic.go b/api/internal/logic/storelink/getlinktasklogic.go index 24e7b3b7..97aab7af 100644 --- a/api/internal/logic/storelink/getlinktasklogic.go +++ b/api/internal/logic/storelink/getlinktasklogic.go @@ -48,7 +48,7 @@ func (l *GetLinkTaskLogic) GetLinkTask(req *types.GetLinkTaskReq) (resp *types.G return resp, nil } - storelink := storeLink.NewStoreLink(l.svcCtx, participant) + storelink := storeLink.NewStoreLink(l.svcCtx.OctopusRpc, l.svcCtx.ModelArtsRpc, l.svcCtx.ModelArtsImgRpc, l.svcCtx.ACRpc, participant) if storelink == nil { return nil, nil } diff --git a/api/internal/logic/storelink/submitlinktasklogic.go b/api/internal/logic/storelink/submitlinktasklogic.go index 971d75fc..5e8f9d14 100644 --- a/api/internal/logic/storelink/submitlinktasklogic.go +++ b/api/internal/logic/storelink/submitlinktasklogic.go @@ -48,7 +48,7 @@ func (l *SubmitLinkTaskLogic) SubmitLinkTask(req *types.SubmitLinkTaskReq) (resp return resp, nil } - storelink := storeLink.NewStoreLink(l.svcCtx, participant) + storelink := storeLink.NewStoreLink(l.svcCtx.OctopusRpc, l.svcCtx.ModelArtsRpc, l.svcCtx.ModelArtsImgRpc, l.svcCtx.ACRpc, participant) if storelink == nil { return nil, nil } diff --git a/api/internal/logic/storelink/uploadlinkimagelogic.go b/api/internal/logic/storelink/uploadlinkimagelogic.go index 5bd7514a..60beade5 100644 --- a/api/internal/logic/storelink/uploadlinkimagelogic.go +++ b/api/internal/logic/storelink/uploadlinkimagelogic.go @@ -48,7 +48,7 @@ func (l *UploadLinkImageLogic) UploadLinkImage(req *types.UploadLinkImageReq) (r return resp, nil } - storelink := storeLink.NewStoreLink(l.svcCtx, participant) + storelink := storeLink.NewStoreLink(l.svcCtx.OctopusRpc, l.svcCtx.ModelArtsRpc, l.svcCtx.ModelArtsImgRpc, l.svcCtx.ACRpc, participant) if storelink == nil { return nil, nil } diff --git a/api/internal/scheduler/database/aiStorage.go b/api/internal/scheduler/database/aiStorage.go index aa984f16..edcc5969 100644 --- a/api/internal/scheduler/database/aiStorage.go +++ b/api/internal/scheduler/database/aiStorage.go @@ -2,18 +2,44 @@ package database import ( "github.com/zeromicro/go-zero/core/logx" + "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/strategy" "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types" + "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/constants" + "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/models" "gorm.io/gorm" + "time" ) type AiStorage struct { DbEngin *gorm.DB } -func (s *AiStorage) GetParticipants() { +func (s *AiStorage) GetParticipants() (*types.ClusterListResp, error) { var resp types.ClusterListResp tx := s.DbEngin.Raw("select * from t_cluster where `deleted_at` IS NULL ORDER BY create_time Desc").Scan(&resp.List) if tx.Error != nil { logx.Errorf(tx.Error.Error()) + return nil, tx.Error } + return &resp, nil +} + +func (s *AiStorage) SaveTask(cluster strategy.AssignedCluster) error { + // 构建主任务结构体 + taskModel := models.Task{ + Status: constants.Saved, + Description: "ai task", + Name: "testAi", + CommitTime: time.Now(), + } + // 保存任务数据到数据库 + tx := s.DbEngin.Create(&taskModel) + if tx.Error != nil { + return tx.Error + } + return nil +} + +func (s *AiStorage) UpdateTask() error { + return nil } diff --git a/api/internal/scheduler/scheduler.go b/api/internal/scheduler/scheduler.go index 8c3265db..281788e6 100644 --- a/api/internal/scheduler/scheduler.go +++ b/api/internal/scheduler/scheduler.go @@ -39,7 +39,7 @@ type Scheduler struct { result []string //pID:子任务yamlstring 键值对 participantRpc participantservice.ParticipantService ResourceCollector *map[string]collector.AiCollector - Storages database.Storage + AiStorages *database.AiStorage AiExecutor *map[string]executor.AiExecutor mu sync.RWMutex } @@ -59,8 +59,8 @@ func NewScheduler(subSchedule SubSchedule, val string, dbEngin *gorm.DB, partici return &Scheduler{task: task, subSchedule: subSchedule, dbEngin: dbEngin, participantRpc: participantRpc}, nil } -func NewSchdlr(resourceCollector *map[string]collector.AiCollector, storages database.Storage, aiExecutor *map[string]executor.AiExecutor) *Scheduler { - return &Scheduler{ResourceCollector: resourceCollector, Storages: storages, AiExecutor: aiExecutor} +func NewSchdlr(resourceCollector *map[string]collector.AiCollector, storages *database.AiStorage, aiExecutor *map[string]executor.AiExecutor) *Scheduler { + return &Scheduler{ResourceCollector: resourceCollector, AiStorages: storages, AiExecutor: aiExecutor} } func (s *Scheduler) SpecifyClusters() { diff --git a/api/internal/scheduler/schedulers/aiScheduler.go b/api/internal/scheduler/schedulers/aiScheduler.go index bbb2b59c..024ef7ae 100644 --- a/api/internal/scheduler/schedulers/aiScheduler.go +++ b/api/internal/scheduler/schedulers/aiScheduler.go @@ -58,6 +58,7 @@ func (as *AiScheduler) PickOptimalStrategy() (strategy.Strategy, error) { } resources, err := as.findClustersWithResources() + if err != nil { return nil, err } diff --git a/api/internal/storeLink/storeLink.go b/api/internal/storeLink/storeLink.go index 62a1079d..ce00a540 100644 --- a/api/internal/storeLink/storeLink.go +++ b/api/internal/storeLink/storeLink.go @@ -19,13 +19,16 @@ import ( "github.com/pkg/errors" "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/common" "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/service/collector" - "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc" "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types" "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/models" "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/utils/timeutils" "gitlink.org.cn/jcce-pcm/pcm-ac/hpcAC" + "gitlink.org.cn/jcce-pcm/pcm-ac/hpcacclient" + "gitlink.org.cn/jcce-pcm/pcm-participant-modelarts/client/imagesservice" + "gitlink.org.cn/jcce-pcm/pcm-participant-modelarts/client/modelartsservice" "gitlink.org.cn/jcce-pcm/pcm-participant-modelarts/modelarts" "gitlink.org.cn/jcce-pcm/pcm-participant-octopus/octopus" + "gitlink.org.cn/jcce-pcm/pcm-participant-octopus/octopusclient" "gorm.io/gorm" "strings" "sync" @@ -89,19 +92,19 @@ type StoreLink struct { ILinkage Linkage } -func NewStoreLink(svcCtx *svc.ServiceContext, participant *models.StorelinkCenter) *StoreLink { +func NewStoreLink(octopusRpc octopusclient.Octopus, modelArtsRpc modelartsservice.ModelArtsService, modelArtsImgRpc imagesservice.ImagesService, aCRpc hpcacclient.HpcAC, participant *models.StorelinkCenter) *StoreLink { switch participant.Type { case TYPE_OCTOPUS: - linkStruct := NewOctopusLink(svcCtx.OctopusRpc, participant.Name, participant.Id) + linkStruct := NewOctopusLink(octopusRpc, participant.Name, participant.Id) return &StoreLink{ILinkage: linkStruct} case TYPE_MODELARTS: - linkStruct := NewModelArtsLink(svcCtx.ModelArtsRpc, svcCtx.ModelArtsImgRpc, participant.Name, participant.Id) + linkStruct := NewModelArtsLink(modelArtsRpc, modelArtsImgRpc, participant.Name, participant.Id) return &StoreLink{ILinkage: linkStruct} case TYPE_SHUGUANGAI: - linkStruct := NewShuguangAi(svcCtx.ACRpc, participant.Name, participant.Id) + linkStruct := NewShuguangAi(aCRpc, participant.Name, participant.Id) return &StoreLink{ILinkage: linkStruct} case TYPE_SHUGUANGHPC: - linkStruct := NewShuguangHpc(svcCtx.ACRpc, participant.Name, participant.Id) + linkStruct := NewShuguangHpc(aCRpc, participant.Name, participant.Id) return &StoreLink{ILinkage: linkStruct} default: return nil diff --git a/api/internal/svc/servicecontext.go b/api/internal/svc/servicecontext.go index 95dc0a46..4bec99a7 100644 --- a/api/internal/svc/servicecontext.go +++ b/api/internal/svc/servicecontext.go @@ -23,6 +23,8 @@ import ( "github.com/zeromicro/go-zero/zrpc" "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/config" "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler" + "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/database" + "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/service" "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/tracker" "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/utils" "gitlink.org.cn/JointCloud/pcm-coordinator/rpc/client/participantservice" @@ -118,9 +120,9 @@ func NewServiceContext(c config.Config) *ServiceContext { aCRpc := hpcacclient.NewHpcAC(zrpc.MustNewClient(c.ACRpcConf)) modelArtsRpc := modelartsservice.NewModelArtsService(zrpc.MustNewClient(c.ModelArtsRpcConf)) modelArtsImgRpc := imagesservice.NewImagesService(zrpc.MustNewClient(c.ModelArtsImgRpcConf)) - //aiExecutor, resourceCollector := service2.InitAiClusterMap(octopusRpc, modelArtsRpc, modelArtsImgRpc, aCRpc) - //storage := &database.AiStorage{DbEngin: dbEngin} - scheduler := scheduler.NewSchdlr(nil, nil, nil) + aiExecutor, resourceCollector := service.InitAiClusterMap(octopusRpc, modelArtsRpc, modelArtsImgRpc, aCRpc) + storage := &database.AiStorage{DbEngin: dbEngin} + scheduler := scheduler.NewSchdlr(resourceCollector, storage, aiExecutor) return &ServiceContext{ Cron: cron.New(cron.WithSeconds()), From b09a45bd5bfcf8e0ed44318da5afe4a0af2d2044 Mon Sep 17 00:00:00 2001 From: tzwang Date: Mon, 1 Apr 2024 11:22:09 +0800 Subject: [PATCH 04/16] added scheduler api aioptions Former-commit-id: 634213a6182c1dc2e17417df9ceaf1289743e2ca --- api/desc/schedule/pcm-schedule.api | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/api/desc/schedule/pcm-schedule.api b/api/desc/schedule/pcm-schedule.api index d9946a33..0e425b2e 100644 --- a/api/desc/schedule/pcm-schedule.api +++ b/api/desc/schedule/pcm-schedule.api @@ -20,10 +20,18 @@ type ( } AiOption { + TaskName string `json:"taskName"` + AiClusterId string `json:"aiClusterId,optional"` ResourceType string `json:"resourceType"` + Tops float64 `json:"Tops,optional"` TaskType string `json:"taskType"` Datasets string `json:"datasets"` + Algorithm string `json:"algorithm"` Strategy string `json:"strategy"` + StaticWeightMap map[string]int32 `json:"staticWeightMap,optional"` + Params []string `json:"params,optional"` + Envs []string `json:"envs,optional"` + Cmd string `json:"cmd,optional"` } AiResourceTypesResp { From 81b5676559e45c2e575d1be9a4406871df948f02 Mon Sep 17 00:00:00 2001 From: qiwang <1364512070@qq.com> Date: Mon, 1 Apr 2024 12:25:51 +0800 Subject: [PATCH 05/16] feat:modify listserver interface Former-commit-id: 6e81bf49976c4aaa87228857c7e7c21f980d4c25 --- api/desc/core/pcm-core.api | 4 ++++ api/desc/vm/pcm-vm.api | 21 ++++++++++++--------- api/internal/types/types.go | 25 ++++++++++++++++--------- go.mod | 8 ++++---- go.sum | 8 ++++++++ 5 files changed, 44 insertions(+), 22 deletions(-) diff --git a/api/desc/core/pcm-core.api b/api/desc/core/pcm-core.api index 07987124..c3a938c0 100644 --- a/api/desc/core/pcm-core.api +++ b/api/desc/core/pcm-core.api @@ -157,7 +157,11 @@ type ( Block_device_mapping_v2Commit { uuid string `json:"uuid,optional"` } + commitVmTaskResp { + VmTask []VmTask `json:"vmTask" copier:"VmTask"` + } + VmTask{ Id string `json:"id" copier:"Id"` Links []VmLinks `json:"links" copier:"Links"` OSDCFDiskConfig string `json:"OS_DCF_diskConfig" copier:"OSDCFDiskConfig"` diff --git a/api/desc/vm/pcm-vm.api b/api/desc/vm/pcm-vm.api index 9822af39..b632439e 100644 --- a/api/desc/vm/pcm-vm.api +++ b/api/desc/vm/pcm-vm.api @@ -122,15 +122,18 @@ type ( ServersDetailed { //created string `json:"created" copier:"created"` - id string `json:"id" copier:"id"` - key_name string `json:"key_name" copier:"key_name"` - locked bool `json:"locked" copier:"locked"` - name string `json:"name" copier:"name"` - progress uint32 `json:"progress" copier:"progress"` - status string `json:"status" copier:"status"` - tenant_id string `json:"tenant_id" copier:"tenant_id"` - updated string `json:"updated" copier:"updated"` - user_id string `json:"user_id" copier:"user_id"` + Id string `json:"Id" copier:"Id"` + Name string `json:"Name" copier:"Name"` + OSTaskState uint32 `json:"OSTaskState" copier:"OSTaskState"` + Status string `json:"Status" copier:"Status"` + VmState string `json:"VmState" copier:"VmState"` + OS_EXT_SRV_ATTR_Instance_Name string `json:"OS_EXT_SRV_ATTR_Instance_Name" copier:"OS_EXT_SRV_ATTR_Instance_Name"` + Created string `json:"Created" copier:"Created"` + HostId string `json:"HostId" copier:"HostId"` + Ip string `json:"Ip" copier:"Ip"` + Image string `json:"Image" copier:"Image"` + Updated string `json:"Updated" copier:"Updated"` + Flavor string `json:"Flavor" copier:"Flavor"` } ) diff --git a/api/internal/types/types.go b/api/internal/types/types.go index 88271634..d691b37c 100644 --- a/api/internal/types/types.go +++ b/api/internal/types/types.go @@ -147,6 +147,10 @@ type Block_device_mapping_v2Commit struct { } type CommitVmTaskResp struct { + VmTask []VmTask `json:"vmTask" copier:"VmTask"` +} + +type VmTask struct { Id string `json:"id" copier:"Id"` Links []VmLinks `json:"links" copier:"Links"` OSDCFDiskConfig string `json:"OS_DCF_diskConfig" copier:"OSDCFDiskConfig"` @@ -2707,15 +2711,18 @@ type ListServersDetailedResp struct { } type ServersDetailed struct { - Id string `json:"id" copier:"id"` - Key_name string `json:"key_name" copier:"key_name"` - Locked bool `json:"locked" copier:"locked"` - Name string `json:"name" copier:"name"` - Progress uint32 `json:"progress" copier:"progress"` - Status string `json:"status" copier:"status"` - Tenant_id string `json:"tenant_id" copier:"tenant_id"` - Updated string `json:"updated" copier:"updated"` - User_id string `json:"user_id" copier:"user_id"` + Id string `json:"Id" copier:"Id"` + Name string `json:"Name" copier:"Name"` + OSTaskState uint32 `json:"OSTaskState" copier:"OSTaskState"` + Status string `json:"Status" copier:"Status"` + VmState string `json:"VmState" copier:"VmState"` + OS_EXT_SRV_ATTR_Instance_Name string `json:"OS_EXT_SRV_ATTR_Instance_Name" copier:"OS_EXT_SRV_ATTR_Instance_Name"` + Created string `json:"Created" copier:"Created"` + HostId string `json:"HostId" copier:"HostId"` + Ip string `json:"Ip" copier:"Ip"` + Image string `json:"Image" copier:"Image"` + Updated string `json:"Updated" copier:"Updated"` + Flavor string `json:"Flavor" copier:"Flavor"` } type GetServersDetailedByIdReq struct { diff --git a/go.mod b/go.mod index d50b93a2..50582b8b 100644 --- a/go.mod +++ b/go.mod @@ -24,7 +24,7 @@ require ( github.com/rs/zerolog v1.28.0 github.com/zeromicro/go-zero v1.6.3 gitlink.org.cn/JointCloud/pcm-kubernetes v0.0.0-20240301071143-347480abff2c - gitlink.org.cn/JointCloud/pcm-openstack v0.0.0-20240328020739-cbdd8f5b226b + gitlink.org.cn/JointCloud/pcm-openstack v0.0.0-20240401022404-2f1425735f0d gitlink.org.cn/JointCloud/pcm-slurm v0.0.0-20240301080743-8b94bbaf57f5 gitlink.org.cn/jcce-pcm/pcm-ac v0.0.0-20240301085553-f6ad88fa357a gitlink.org.cn/jcce-pcm/pcm-participant-ceph v0.0.0-20230904090036-24fc730ec87d @@ -136,9 +136,9 @@ require ( github.com/spaolacci/murmur3 v1.1.0 // indirect github.com/xhit/go-str2duration/v2 v2.1.0 // indirect github.com/xlab/treeprint v1.2.0 // indirect - go.etcd.io/etcd/api/v3 v3.5.12 // indirect - go.etcd.io/etcd/client/pkg/v3 v3.5.12 // indirect - go.etcd.io/etcd/client/v3 v3.5.12 // indirect + go.etcd.io/etcd/api/v3 v3.5.13 // indirect + go.etcd.io/etcd/client/pkg/v3 v3.5.13 // indirect + go.etcd.io/etcd/client/v3 v3.5.13 // indirect go.mongodb.org/mongo-driver v1.13.1 // indirect go.opentelemetry.io/otel v1.24.0 // indirect go.opentelemetry.io/otel/exporters/jaeger v1.17.0 // indirect diff --git a/go.sum b/go.sum index 948fc031..419d417e 100644 --- a/go.sum +++ b/go.sum @@ -1105,6 +1105,8 @@ gitlink.org.cn/JointCloud/pcm-openstack v0.0.0-20240327072142-008a85e3c315 h1:km gitlink.org.cn/JointCloud/pcm-openstack v0.0.0-20240327072142-008a85e3c315/go.mod h1:i2rrbMQ+Fve345BY9Heh4MUqVTAimZQElQhzzRee5B8= gitlink.org.cn/JointCloud/pcm-openstack v0.0.0-20240328020739-cbdd8f5b226b h1:suRANMHQPhKKmgdJOZcbFYDJ0NUQkUGgVvMKxw75BQI= gitlink.org.cn/JointCloud/pcm-openstack v0.0.0-20240328020739-cbdd8f5b226b/go.mod h1:i2rrbMQ+Fve345BY9Heh4MUqVTAimZQElQhzzRee5B8= +gitlink.org.cn/JointCloud/pcm-openstack v0.0.0-20240401022404-2f1425735f0d h1:ZX/Kg8eKdaAfDsTd+Y+TrJsUvxp/DpbWUp+Ij4CtR+s= +gitlink.org.cn/JointCloud/pcm-openstack v0.0.0-20240401022404-2f1425735f0d/go.mod h1:i2rrbMQ+Fve345BY9Heh4MUqVTAimZQElQhzzRee5B8= gitlink.org.cn/JointCloud/pcm-slurm v0.0.0-20240301080743-8b94bbaf57f5 h1:+/5vnzkJBfMRnya1NrhOzlroUtRa5ePiYbPKlHLoLV0= gitlink.org.cn/JointCloud/pcm-slurm v0.0.0-20240301080743-8b94bbaf57f5/go.mod h1:97AlUXN13g9UN3+9/DzCHpeoU5sbdyv0IQuTEHNexzQ= gitlink.org.cn/jcce-pcm/pcm-ac v0.0.0-20240301085553-f6ad88fa357a h1:fY1KmyZ6O7wVBvgt2HB+C9e1DncJdk/Wkv8m5Qz7abw= @@ -1118,12 +1120,18 @@ gitlink.org.cn/jcce-pcm/pcm-participant-octopus v0.0.0-20240222124813-e275cfa342 go.etcd.io/etcd/api/v3 v3.5.7/go.mod h1:9qew1gCdDDLu+VwmeG+iFpL+QlpHTo7iubavdVDgCAA= go.etcd.io/etcd/api/v3 v3.5.12 h1:W4sw5ZoU2Juc9gBWuLk5U6fHfNVyY1WC5g9uiXZio/c= go.etcd.io/etcd/api/v3 v3.5.12/go.mod h1:Ot+o0SWSyT6uHhA56al1oCED0JImsRiU9Dc26+C2a+4= +go.etcd.io/etcd/api/v3 v3.5.13 h1:8WXU2/NBge6AUF1K1gOexB6e07NgsN1hXK0rSTtgSp4= +go.etcd.io/etcd/api/v3 v3.5.13/go.mod h1:gBqlqkcMMZMVTMm4NDZloEVJzxQOQIls8splbqBDa0c= go.etcd.io/etcd/client/pkg/v3 v3.5.7/go.mod h1:o0Abi1MK86iad3YrWhgUsbGx1pmTS+hrORWc2CamuhY= go.etcd.io/etcd/client/pkg/v3 v3.5.12 h1:EYDL6pWwyOsylrQyLp2w+HkQ46ATiOvoEdMarindU2A= go.etcd.io/etcd/client/pkg/v3 v3.5.12/go.mod h1:seTzl2d9APP8R5Y2hFL3NVlD6qC/dOT+3kvrqPyTas4= +go.etcd.io/etcd/client/pkg/v3 v3.5.13 h1:RVZSAnWWWiI5IrYAXjQorajncORbS0zI48LQlE2kQWg= +go.etcd.io/etcd/client/pkg/v3 v3.5.13/go.mod h1:XxHT4u1qU12E2+po+UVPrEeL94Um6zL58ppuJWXSAB8= go.etcd.io/etcd/client/v3 v3.5.7/go.mod h1:sOWmj9DZUMyAngS7QQwCyAXXAL6WhgTOPLNS/NabQgw= go.etcd.io/etcd/client/v3 v3.5.12 h1:v5lCPXn1pf1Uu3M4laUE2hp/geOTc5uPcYYsNe1lDxg= go.etcd.io/etcd/client/v3 v3.5.12/go.mod h1:tSbBCakoWmmddL+BKVAJHa9km+O/E+bumDe9mSbPiqw= +go.etcd.io/etcd/client/v3 v3.5.13 h1:o0fHTNJLeO0MyVbc7I3fsCf6nrOqn5d+diSarKnB2js= +go.etcd.io/etcd/client/v3 v3.5.13/go.mod h1:cqiAeY8b5DEEcpxvgWKsbLIWNM/8Wy2xJSDMtioMcoI= go.mongodb.org/mongo-driver v1.11.4/go.mod h1:PTSz5yu21bkT/wXpkS7WR5f0ddqw5quethTUn9WM+2g= go.mongodb.org/mongo-driver v1.13.1 h1:YIc7HTYsKndGK4RFzJ3covLz1byri52x0IoMB0Pt/vk= go.mongodb.org/mongo-driver v1.13.1/go.mod h1:wcDf1JBCXy2mOW0bWHwO/IOYqdca1MPCwDtFu/Z9+eo= From ac80f68dc91308cf2275770bbeff5a33904cfd73 Mon Sep 17 00:00:00 2001 From: qiwang <1364512070@qq.com> Date: Mon, 1 Apr 2024 12:39:07 +0800 Subject: [PATCH 06/16] feat:modify listserver interface Former-commit-id: 0a1f53f6faac1ed7de0db11738cc657bb52a0bd0 --- api/etc/pcm.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/api/etc/pcm.yaml b/api/etc/pcm.yaml index df1c585d..b2b4766e 100644 --- a/api/etc/pcm.yaml +++ b/api/etc/pcm.yaml @@ -6,7 +6,6 @@ Timeout: 50000 DB: DataSource: root:uJpLd6u-J?HC1@(10.206.0.12:3306)/pcm?parseTime=true&loc=Local - # DataSource: root:uJpLd6u-J?HC1@(47.92.88.143:3306)/pcm?parseTime=true&loc=Local Redis: Host: 10.206.0.12:6379 Pass: redisPW123 From 69757c9f5b965222dc71e38b9be07977a5a51fa4 Mon Sep 17 00:00:00 2001 From: tzwang Date: Mon, 1 Apr 2024 17:53:42 +0800 Subject: [PATCH 07/16] updated return type of aischeduler submit Former-commit-id: c8de6c78b18e2159f4c6ea4abcac77221f7eae48 --- api/desc/schedule/pcm-schedule.api | 2 -- .../logic/schedule/schedulesubmitlogic.go | 2 +- api/internal/mqs/ScheduleAi.go | 2 +- api/internal/scheduler/scheduler.go | 14 ++++---- .../scheduler/schedulers/aiScheduler.go | 33 ++++++++++++++++--- .../scheduler/schedulers/cloudScheduler.go | 4 +-- .../scheduler/schedulers/hpcScheduler.go | 4 +-- .../scheduler/schedulers/vmScheduler.go | 2 +- 8 files changed, 43 insertions(+), 20 deletions(-) diff --git a/api/desc/schedule/pcm-schedule.api b/api/desc/schedule/pcm-schedule.api index 0e425b2e..72ba71a7 100644 --- a/api/desc/schedule/pcm-schedule.api +++ b/api/desc/schedule/pcm-schedule.api @@ -14,8 +14,6 @@ type ( ScheduleResp { Success bool `json:"success"` - TaskId string `json:"taskId"` - ClusterId string `json:"clusterId"` ErrorMsg string `json:"errorMsg"` } diff --git a/api/internal/logic/schedule/schedulesubmitlogic.go b/api/internal/logic/schedule/schedulesubmitlogic.go index fc7469f9..a4704def 100644 --- a/api/internal/logic/schedule/schedulesubmitlogic.go +++ b/api/internal/logic/schedule/schedulesubmitlogic.go @@ -44,7 +44,7 @@ func (l *ScheduleSubmitLogic) ScheduleSubmit(req *types.ScheduleReq) (resp *type return nil, err } - err = l.svcCtx.Scheduler.AssignAndSchedule(aiSchdl) + _, err = l.svcCtx.Scheduler.AssignAndSchedule(aiSchdl) if err != nil { return nil, err } diff --git a/api/internal/mqs/ScheduleAi.go b/api/internal/mqs/ScheduleAi.go index 61713ad2..2e47695a 100644 --- a/api/internal/mqs/ScheduleAi.go +++ b/api/internal/mqs/ScheduleAi.go @@ -40,7 +40,7 @@ func (l *AiQueue) Consume(val string) error { aiSchdl, _ := schedulers.NewAiScheduler(l.ctx, val, l.svcCtx.Scheduler, nil) // 调度算法 - err := l.svcCtx.Scheduler.AssignAndSchedule(aiSchdl) + _, err := l.svcCtx.Scheduler.AssignAndSchedule(aiSchdl) if err != nil { return err } diff --git a/api/internal/scheduler/scheduler.go b/api/internal/scheduler/scheduler.go index 281788e6..2ed27228 100644 --- a/api/internal/scheduler/scheduler.go +++ b/api/internal/scheduler/scheduler.go @@ -47,7 +47,7 @@ type Scheduler struct { type SubSchedule interface { GetNewStructForDb(task *response.TaskInfo, resource string, participantId int64) (interface{}, error) PickOptimalStrategy() (strategy.Strategy, error) - AssignTask(clusters []*strategy.AssignedCluster) error + AssignTask(clusters []*strategy.AssignedCluster) ([]interface{}, error) } func NewScheduler(subSchedule SubSchedule, val string, dbEngin *gorm.DB, participantRpc participantservice.ParticipantService) (*Scheduler, error) { @@ -130,7 +130,7 @@ func (s *Scheduler) TempAssign() error { return nil } -func (s *Scheduler) AssignAndSchedule(ss SubSchedule) error { +func (s *Scheduler) AssignAndSchedule(ss SubSchedule) ([]interface{}, error) { //// 已指定 ParticipantId //if s.task.ParticipantId != 0 { // return nil @@ -153,12 +153,12 @@ func (s *Scheduler) AssignAndSchedule(ss SubSchedule) error { strategy, err := ss.PickOptimalStrategy() if err != nil { - return err + return nil, err } clusters, err := strategy.Schedule() if err != nil { - return err + return nil, err } //集群数量不满足,指定到标签匹配后第一个集群 @@ -167,12 +167,12 @@ func (s *Scheduler) AssignAndSchedule(ss SubSchedule) error { // return nil //} - err = ss.AssignTask(clusters) + resp, err := ss.AssignTask(clusters) if err != nil { - return err + return nil, err } - return nil + return resp, nil } func (s *Scheduler) SaveToDb() error { diff --git a/api/internal/scheduler/schedulers/aiScheduler.go b/api/internal/scheduler/schedulers/aiScheduler.go index 024ef7ae..4b62f427 100644 --- a/api/internal/scheduler/schedulers/aiScheduler.go +++ b/api/internal/scheduler/schedulers/aiScheduler.go @@ -95,27 +95,52 @@ func (as *AiScheduler) PickOptimalStrategy() (strategy.Strategy, error) { return nil, errors.New("no strategy has been chosen") } -func (as *AiScheduler) AssignTask(clusters []*strategy.AssignedCluster) error { +func (as *AiScheduler) AssignTask(clusters []*strategy.AssignedCluster) ([]interface{}, error) { if clusters == nil { - return errors.New("clusters is nil") + return nil, errors.New("clusters is nil") } + var wg sync.WaitGroup + var result []interface{} + var errs []error + var ch = make(chan interface{}, len(clusters)) + var errCh = make(chan error, len(clusters)) + executorMap := *as.AiExecutor for _, cluster := range clusters { c := cluster if cluster.Replicas == 0 { continue } + wg.Add(1) go func() { - _, err := executorMap[c.Name].Execute(as.ctx, as.option) + resp, err := executorMap[c.Name].Execute(as.ctx, as.option) if err != nil { // TODO: database operation + errCh <- err + wg.Done() + return } // TODO: database operation + ch <- resp + wg.Done() }() } + wg.Wait() - return nil + for s := range ch { + result = append(result, s) + } + + for e := range errCh { + errs = append(errs, e) + } + + if len(errs) != 0 { + return nil, errors.New("submit task failed") + } + + return result, nil } func (as *AiScheduler) findClustersWithResources() ([]*collector.ResourceStats, error) { diff --git a/api/internal/scheduler/schedulers/cloudScheduler.go b/api/internal/scheduler/schedulers/cloudScheduler.go index df5d91b5..097635ab 100644 --- a/api/internal/scheduler/schedulers/cloudScheduler.go +++ b/api/internal/scheduler/schedulers/cloudScheduler.go @@ -115,6 +115,6 @@ func (cs *CloudScheduler) genTaskAndProviders() (*providerPricing.Task, []*provi return nil, providerList, nil } -func (cs *CloudScheduler) AssignTask(clusters []*strategy.AssignedCluster) error { - return nil +func (cs *CloudScheduler) AssignTask(clusters []*strategy.AssignedCluster) ([]interface{}, error) { + return nil, nil } diff --git a/api/internal/scheduler/schedulers/hpcScheduler.go b/api/internal/scheduler/schedulers/hpcScheduler.go index 1a305302..319b8183 100644 --- a/api/internal/scheduler/schedulers/hpcScheduler.go +++ b/api/internal/scheduler/schedulers/hpcScheduler.go @@ -50,6 +50,6 @@ func (h *HpcScheduler) genTaskAndProviders(task *response.TaskInfo) (*providerPr return nil, nil } -func (h *HpcScheduler) AssignTask(clusters []*strategy.AssignedCluster) error { - return nil +func (h *HpcScheduler) AssignTask(clusters []*strategy.AssignedCluster) ([]interface{}, error) { + return nil, nil } diff --git a/api/internal/scheduler/schedulers/vmScheduler.go b/api/internal/scheduler/schedulers/vmScheduler.go index d2a3bb91..80e89c4e 100644 --- a/api/internal/scheduler/schedulers/vmScheduler.go +++ b/api/internal/scheduler/schedulers/vmScheduler.go @@ -64,7 +64,7 @@ func (vm *VmScheduler) genTaskAndProviders() (*providerPricing.Task, []*provider return nil, providerList, nil } -func (v VmScheduler) AssignTask(clusters []*strategy.AssignedCluster) error { +func (v VmScheduler) AssignTask(clusters []*strategy.AssignedCluster) ([]interface{}, error) { //TODO implement me panic("implement me") } From 0c4ba98073cb452e019ea77cce9710f27a4ccec0 Mon Sep 17 00:00:00 2001 From: Jake <450705171@qq.com> Date: Tue, 2 Apr 2024 11:33:19 +0800 Subject: [PATCH 08/16] fix:ai option params Former-commit-id: 365e08ce5e2d3d18f21415e38c60983ae3ea378f --- api/desc/hpc/pcm-hpc.api | 2 +- api/internal/types/types.go | 141 ++++-------------------------------- 2 files changed, 16 insertions(+), 127 deletions(-) diff --git a/api/desc/hpc/pcm-hpc.api b/api/desc/hpc/pcm-hpc.api index 9ba36b70..d86f262c 100644 --- a/api/desc/hpc/pcm-hpc.api +++ b/api/desc/hpc/pcm-hpc.api @@ -14,7 +14,7 @@ type ( Description string `json:"description,optional"` tenantId int64 `json:"tenantId,optional"` TaskId int64 `json:"taskId,optional"` - AdapterId int64 `json:"adapterId,optional"` + AdapterId string `json:"adapterId,optional"` MatchLabels map[string]string `json:"matchLabels,optional"` CardCount int64 `json:"cardCount,optional"` WorkDir string `json:"workDir,optional"` //paratera:workingDir diff --git a/api/internal/types/types.go b/api/internal/types/types.go index d6214767..bc35feeb 100644 --- a/api/internal/types/types.go +++ b/api/internal/types/types.go @@ -870,7 +870,7 @@ type CommitHpcTaskReq struct { Description string `json:"description,optional"` TenantId int64 `json:"tenantId,optional"` TaskId int64 `json:"taskId,optional"` - AdapterId int64 `json:"adapterId,optional"` + AdapterId string `json:"adapterId,optional"` MatchLabels map[string]string `json:"matchLabels,optional"` CardCount int64 `json:"cardCount,optional"` WorkDir string `json:"workDir,optional"` //paratera:workingDir @@ -5226,17 +5226,23 @@ type ScheduleReq struct { } type ScheduleResp struct { - Success bool `json:"success"` - TaskId string `json:"taskId"` - ClusterId string `json:"clusterId"` - ErrorMsg string `json:"errorMsg"` + Success bool `json:"success"` + ErrorMsg string `json:"errorMsg"` } type AiOption struct { - ResourceType string `json:"resourceType"` - TaskType string `json:"taskType"` - Datasets string `json:"datasets"` - Strategy string `json:"strategy"` + TaskName string `json:"taskName"` + AiClusterId string `json:"aiClusterId,optional"` + ResourceType string `json:"resourceType"` + Tops float64 `json:"Tops,optional"` + TaskType string `json:"taskType"` + Datasets string `json:"datasets"` + Algorithm string `json:"algorithm"` + Strategy string `json:"strategy"` + StaticWeightMap map[string]int32 `json:"staticWeightMap,optional"` + Params []string `json:"params,optional"` + Envs []string `json:"envs,optional"` + Cmd string `json:"cmd,optional"` } type AiResourceTypesResp struct { @@ -5265,123 +5271,6 @@ type AiAlgorithmsResp struct { Algorithms []string `json:"algorithms"` } -type PullTaskInfoReq struct { - AdapterId int64 `form:"adapterId"` -} - -type PullTaskInfoResp struct { - HpcInfoList []*HpcInfo `json:"HpcInfoList,omitempty"` - CloudInfoList []*CloudInfo `json:"CloudInfoList,omitempty"` - AiInfoList []*AiInfo `json:"AiInfoList,omitempty"` - VmInfoList []*VmInfo `json:"VmInfoList,omitempty"` -} - -type HpcInfo struct { - Id int64 `json:"id"` // id - TaskId int64 `json:"task_id"` // 任务id - JobId string `json:"job_id"` // 作业id(在第三方系统中的作业id) - AdapterId int64 `json:"adapter_id"` // 执行任务的适配器id - ClusterId int64 `json:"cluster_id"` // 执行任务的集群id - ClusterType string `json:"cluster_type"` // 执行任务的集群类型 - Name string `json:"name"` // 名称 - Status string `json:"status"` // 状态 - CmdScript string `json:"cmd_script"` - StartTime string `json:"start_time"` // 开始时间 - RunningTime int64 `json:"running_time"` // 运行时间 - DerivedEs string `json:"derived_es"` - Cluster string `json:"cluster"` - BlockId int64 `json:"block_id"` - AllocNodes int64 `json:"alloc_nodes"` - AllocCpu int64 `json:"alloc_cpu"` - CardCount int64 `json:"card_count"` // 卡数 - Version string `json:"version"` - Account string `json:"account"` - WorkDir string `json:"work_dir"` // 工作路径 - AssocId int64 `json:"assoc_id"` - ExitCode int64 `json:"exit_code"` - WallTime string `json:"wall_time"` // 最大运行时间 - Result string `json:"result"` // 运行结果 - DeletedAt string `json:"deleted_at"` // 删除时间 - YamlString string `json:"yaml_string"` - AppType string `json:"app_type"` // 应用类型 - AppName string `json:"app_name"` // 应用名称 - Queue string `json:"queue"` // 队列名称 - SubmitType string `json:"submit_type"` // cmd(命令行模式) - NNode string `json:"n_node"` // 节点个数(当指定该参数时,GAP_NODE_STRING必须为"") - StdOutFile string `json:"std_out_file"` // 工作路径/std.err.%j - StdErrFile string `json:"std_err_file"` // 工作路径/std.err.%j - StdInput string `json:"std_input"` - Environment string `json:"environment"` - DeletedFlag int64 `json:"deleted_flag"` // 是否删除(0-否,1-是) - CreatedBy int64 `json:"created_by"` // 创建人 - CreatedTime string `json:"created_time"` // 创建时间 - UpdatedBy int64 `json:"updated_by"` // 更新人 - UpdatedTime string `json:"updated_time"` // 更新时间 -} - -type CloudInfo struct { - Participant int64 `json:"participant,omitempty"` - Id int64 `json:"id,omitempty"` - TaskId int64 `json:"taskId,omitempty"` - ApiVersion string `json:"apiVersion,omitempty"` - Kind string `json:"kind,omitempty"` - Namespace string `json:"namespace,omitempty"` - Name string `json:"name,omitempty"` - Status string `json:"status,omitempty"` - StartTime string `json:"startTime,omitempty"` - RunningTime int64 `json:"runningTime,omitempty"` - Result string `json:"result,omitempty"` - YamlString string `json:"yamlString,omitempty"` -} - -type AiInfo struct { - ParticipantId int64 `json:"participantId,omitempty"` - TaskId int64 `json:"taskId,omitempty"` - ProjectId string `json:"project_id,omitempty"` - Name string `json:"name,omitempty"` - Status string `json:"status,omitempty"` - StartTime string `json:"startTime,omitempty"` - RunningTime int64 `json:"runningTime,omitempty"` - Result string `json:"result,omitempty"` - JobId string `json:"jobId,omitempty"` - CreateTime string `json:"createTime,omitempty"` - ImageUrl string `json:"imageUrl,omitempty"` - Command string `json:"command,omitempty"` - FlavorId string `json:"flavorId,omitempty"` - SubscriptionId string `json:"subscriptionId,omitempty"` - ItemVersionId string `json:"itemVersionId,omitempty"` -} - -type VmInfo struct { - ParticipantId int64 `json:"participantId,omitempty"` - TaskId int64 `json:"taskId,omitempty"` - Name string `json:"name,omitempty"` - FlavorRef string `json:"flavor_ref,omitempty"` - ImageRef string `json:"image_ref,omitempty"` - NetworkUuid string `json:"network_uuid,omitempty"` - BlockUuid string `json:"block_uuid,omitempty"` - SourceType string `json:"source_type,omitempty"` - DeleteOnTermination bool `json:"delete_on_termination,omitempty"` - State string `json:"state,omitempty"` -} - -type PushTaskInfoReq struct { - AdapterId int64 `json:"adapterId"` - HpcInfoList []*HpcInfo `json:"hpcInfoList"` - CloudInfoList []*CloudInfo `json:"cloudInfoList"` - AiInfoList []*AiInfo `json:"aiInfoList"` - VmInfoList []*VmInfo `json:"vmInfoList"` -} - -type PushTaskInfoResp struct { - Code int64 `json:"code"` - Msg string `json:"msg"` -} - -type PushResourceInfoReq struct { - AdapterId int64 `json:"adapterId"` -} - type CreateAlertRuleReq struct { ClusterName string `json:"clusterName"` Namespace string `json:"namespace"` From 2809aef72f412d059c2ff399af20660daf0866b2 Mon Sep 17 00:00:00 2001 From: qiwang <1364512070@qq.com> Date: Tue, 2 Apr 2024 16:24:00 +0800 Subject: [PATCH 09/16] feat:add create mulserver interface Former-commit-id: daa650029ae7bf4beeebfe34b1bec5c686c91229 --- api/desc/pcm.api | 4 + api/desc/vm/pcm-vm.api | 61 +++++++++++--- api/etc/pcm.yaml | 3 +- api/internal/handler/routes.go | 5 ++ .../handler/vm/createmulserverhandler.go | 28 +++++++ .../logic/core/commitvmtasktemplogic.go | 21 ++++- api/internal/logic/vm/createmulserverlogic.go | 82 +++++++++++++++++++ api/internal/types/types.go | 63 +++++++++++--- go.mod | 10 +-- go.sum | 16 ++-- 10 files changed, 252 insertions(+), 41 deletions(-) create mode 100644 api/internal/handler/vm/createmulserverhandler.go create mode 100644 api/internal/logic/vm/createmulserverlogic.go diff --git a/api/desc/pcm.api b/api/desc/pcm.api index 24fd3043..ab6669c7 100644 --- a/api/desc/pcm.api +++ b/api/desc/pcm.api @@ -405,6 +405,10 @@ service pcm { @handler CreateServerHandler post /vm/createServer (CreateServerReq) returns (CreateServerResp) + @doc "跨域创建虚拟机" + @handler CreateMulServerHandler + post /vm/createMulServer (CreateMulServerReq) returns (CreateMulServerResp) + @doc "根据ID查询虚拟机详情" @handler GetServersDetailedByIdHandler get /vm/getServersDetailedById (GetServersDetailedByIdReq) returns (GetServersDetailedByIdResp) diff --git a/api/desc/vm/pcm-vm.api b/api/desc/vm/pcm-vm.api index b632439e..fb21c68c 100644 --- a/api/desc/vm/pcm-vm.api +++ b/api/desc/vm/pcm-vm.api @@ -121,19 +121,19 @@ type ( } ServersDetailed { - //created string `json:"created" copier:"created"` - Id string `json:"Id" copier:"Id"` - Name string `json:"Name" copier:"Name"` - OSTaskState uint32 `json:"OSTaskState" copier:"OSTaskState"` - Status string `json:"Status" copier:"Status"` - VmState string `json:"VmState" copier:"VmState"` - OS_EXT_SRV_ATTR_Instance_Name string `json:"OS_EXT_SRV_ATTR_Instance_Name" copier:"OS_EXT_SRV_ATTR_Instance_Name"` - Created string `json:"Created" copier:"Created"` - HostId string `json:"HostId" copier:"HostId"` - Ip string `json:"Ip" copier:"Ip"` - Image string `json:"Image" copier:"Image"` - Updated string `json:"Updated" copier:"Updated"` - Flavor string `json:"Flavor" copier:"Flavor"` + Id string `json:"id" copier:"Id"` + Name string `json:"name" copier:"Name"` + OSTaskState uint32 `json:"os_task_state" copier:"OSTaskState"` + Status string `json:"status" copier:"Status"` + VmState string `json:"vm_state" copier:"VmState"` + OS_EXT_SRV_ATTR_Instance_Name string `json:"os_ext_srv_attr_instance_name" copier:"OS_EXT_SRV_ATTR_Instance_Name"` + Created string `json:"created" copier:"Created"` + HostId string `json:"hostId" copier:"HostId"` + Ip string `json:"ip" copier:"Ip"` + Image string `json:"image" copier:"Image"` + Updated string `json:"updated" copier:"Updated"` + Flavor string `json:"flavor" copier:"Flavor"` + Key_name string `json:"key_name" copier:"Key_name"` } ) @@ -368,6 +368,41 @@ type ( } ) +type ( + CreateMulServerReq { + CreateMulServer []CreateMulServer `json:"createMulServer,optional"` + } + CreateMulServer { + Platform string `json:"platform,optional"` + CrServer MulCrServer `json:"crserver" copier:"CrServer"` + } + MulCrServer { + Server MulServer `json:"server" copier:"Server"` + } + MulServer { + AvailabilityZone string `json:"availability_zone" copier:"AvailabilityZone"` + Name string `json:"name,optional" copier:"Name"` + FlavorRef string `json:"flavorRef,optional" copier:"FlavorRef"` + Description string `json:"description,optional" copier:"Description"` + ImageRef string `json:"imageRef,optional" copier:"ImageRef"` + Networks []CreNetwork `json:"networks,optional" copier:"Networks"` + MinCount int32 `json:"min_count,optional" copier:"MinCount"` + } + CreateMulServerResp { + Server []MulServerResp `json:"server" copier:"Server"` + Code int32 `json:"code,omitempty"` + Msg string `json:"msg,omitempty"` + ErrorMsg string `json:"errorMsg,omitempty"` + } + MulServerResp { + Id string `json:"id" copier:"Id"` + Links []Links `json:"links" copier:"Links"` + OSDCFDiskConfig string `json:"OS_DCF_diskConfig" copier:"OSDCFDiskConfig"` + SecurityGroups []Security_groups_server `json:"security_groups" copier:"SecurityGroups"` + AdminPass string `json:"adminPass" copier:"AdminPass"` + } +) + type( RebuildServerReq{ ServerId string `json:"server_id" copier:"ServerId"` diff --git a/api/etc/pcm.yaml b/api/etc/pcm.yaml index b2b4766e..1be194eb 100644 --- a/api/etc/pcm.yaml +++ b/api/etc/pcm.yaml @@ -5,7 +5,8 @@ Port: 8999 Timeout: 50000 DB: - DataSource: root:uJpLd6u-J?HC1@(10.206.0.12:3306)/pcm?parseTime=true&loc=Local + # DataSource: root:uJpLd6u-J?HC1@(10.206.0.12:3306)/pcm?parseTime=true&loc=Local + DataSource: root:uJpLd6u-J?HC1@(47.92.88.143:3306)/pcm?parseTime=true&loc=Local Redis: Host: 10.206.0.12:6379 Pass: redisPW123 diff --git a/api/internal/handler/routes.go b/api/internal/handler/routes.go index 45e701c2..91736dbf 100644 --- a/api/internal/handler/routes.go +++ b/api/internal/handler/routes.go @@ -476,6 +476,11 @@ func RegisterHandlers(server *rest.Server, serverCtx *svc.ServiceContext) { Path: "/vm/createServer", Handler: vm.CreateServerHandler(serverCtx), }, + { + Method: http.MethodPost, + Path: "/vm/createMulServer", + Handler: vm.CreateMulServerHandler(serverCtx), + }, { Method: http.MethodGet, Path: "/vm/getServersDetailedById", diff --git a/api/internal/handler/vm/createmulserverhandler.go b/api/internal/handler/vm/createmulserverhandler.go new file mode 100644 index 00000000..613f1ca2 --- /dev/null +++ b/api/internal/handler/vm/createmulserverhandler.go @@ -0,0 +1,28 @@ +package vm + +import ( + "net/http" + + "github.com/zeromicro/go-zero/rest/httpx" + "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/logic/vm" + "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc" + "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types" +) + +func CreateMulServerHandler(svcCtx *svc.ServiceContext) http.HandlerFunc { + return func(w http.ResponseWriter, r *http.Request) { + var req types.CreateMulServerReq + if err := httpx.Parse(r, &req); err != nil { + httpx.ErrorCtx(r.Context(), w, err) + return + } + + l := vm.NewCreateMulServerLogic(r.Context(), svcCtx) + resp, err := l.CreateMulServer(&req) + if err != nil { + httpx.ErrorCtx(r.Context(), w, err) + } else { + httpx.OkJsonCtx(r.Context(), w, resp) + } + } +} diff --git a/api/internal/logic/core/commitvmtasktemplogic.go b/api/internal/logic/core/commitvmtasktemplogic.go index f9501d46..85137007 100644 --- a/api/internal/logic/core/commitvmtasktemplogic.go +++ b/api/internal/logic/core/commitvmtasktemplogic.go @@ -2,6 +2,13 @@ package core import ( "context" + "github.com/jinzhu/copier" + "github.com/pkg/errors" + "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/helper/xerr" + "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/repository/result" + "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/utils" + "gitlink.org.cn/JointCloud/pcm-openstack/openstack" + "k8s.io/apimachinery/pkg/util/json" "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc" "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types" @@ -25,6 +32,18 @@ func NewCommitVmTaskTempLogic(ctx context.Context, svcCtx *svc.ServiceContext) * func (l *CommitVmTaskTempLogic) CommitVmTaskTemp(req *types.CommitVmTaskReq) (resp *types.CommitVmTaskResp, err error) { // todo: add your logic here and delete this line - + CreateServerReq := &openstack.CreateServerReq{} + err = copier.CopyWithOption(CreateServerReq, req, copier.Option{Converters: utils.Converters}) + CreateServerResp, err := l.svcCtx.OpenstackRpc.CreateServer(l.ctx, CreateServerReq) + if err != nil { + return nil, errors.Wrapf(xerr.NewErrMsg("Failed to get Servers list"), "Failed to get db Servers list err : %v ,req:%+v", err, req) + } + marshal, err := json.Marshal(&CreateServerResp) + if err != nil { + return nil, result.NewDefaultError(err.Error()) + } + json.Unmarshal(marshal, &resp) + err = copier.CopyWithOption(&resp, &CreateServerResp, copier.Option{Converters: utils.Converters}) + return resp, err return } diff --git a/api/internal/logic/vm/createmulserverlogic.go b/api/internal/logic/vm/createmulserverlogic.go new file mode 100644 index 00000000..b59f575c --- /dev/null +++ b/api/internal/logic/vm/createmulserverlogic.go @@ -0,0 +1,82 @@ +package vm + +import ( + "context" + "fmt" + "github.com/jinzhu/copier" + "github.com/pkg/errors" + "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/helper/xerr" + "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/repository/result" + "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/utils" + "gitlink.org.cn/JointCloud/pcm-openstack/openstack" + "k8s.io/apimachinery/pkg/util/json" + + "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc" + "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types" + + "github.com/zeromicro/go-zero/core/logx" +) + +type CreateMulServerLogic struct { + logx.Logger + ctx context.Context + svcCtx *svc.ServiceContext +} + +func NewCreateMulServerLogic(ctx context.Context, svcCtx *svc.ServiceContext) *CreateMulServerLogic { + return &CreateMulServerLogic{ + Logger: logx.WithContext(ctx), + ctx: ctx, + svcCtx: svcCtx, + } +} + +// ServerLinks 表示服务器链接的结构体 +type ServerLinks struct { + Href string `json:"href"` // 注意:在JSON中,"href "有一个额外的空格,需要移除 + Rel string `json:"rel"` +} + +// SecurityGroup 表示安全组的结构体 +type SecurityGroup struct { + Name string `json:"name"` +} + +// Server 表示服务器的结构体 +type Server struct { + ID string `json:"id"` + Links []ServerLinks `json:"links"` + OSDCFDiskConfig string `json:"OS_DCF_diskConfig"` + SecurityGroups []SecurityGroup `json:"security_groups"` + AdminPass string `json:"adminPass"` +} + +// Response 表示整个响应的结构体 +type Response struct { + Server Server `json:"server"` +} + +func (l *CreateMulServerLogic) CreateMulServer(req *types.CreateMulServerReq) (resp *types.CreateMulServerResp, err error) { + // todo: add your logic here and delete this line + CreateServerReq := &openstack.CreateServerReq{} + var response Response + fmt.Println("请求入参:", req) + for _, server := range req.CreateMulServer { + fmt.Println("入参参数:", server) + err = copier.CopyWithOption(CreateServerReq, server, copier.Option{Converters: utils.Converters}) + CreateServerResp, err := l.svcCtx.OpenstackRpc.CreateServer(l.ctx, CreateServerReq) + fmt.Println("返回结果:", CreateServerResp) + if err != nil { + return nil, errors.Wrapf(xerr.NewErrMsg("Failed to create Server list"), "Failed to get db Server list err : %v ,req:%+v", err, req) + } + marshal, err := json.Marshal(&CreateServerResp) + fmt.Println("返回结果b:", marshal) + if err != nil { + return nil, result.NewDefaultError(err.Error()) + } + json.Unmarshal(marshal, &response) + } + err = copier.CopyWithOption(&resp, &response, copier.Option{Converters: utils.Converters}) + fmt.Println("返回结果c:", resp) + return resp, err +} diff --git a/api/internal/types/types.go b/api/internal/types/types.go index d6214767..2fa9ef29 100644 --- a/api/internal/types/types.go +++ b/api/internal/types/types.go @@ -2735,18 +2735,19 @@ type ListServersDetailedResp struct { } type ServersDetailed struct { - Id string `json:"Id" copier:"Id"` - Name string `json:"Name" copier:"Name"` - OSTaskState uint32 `json:"OSTaskState" copier:"OSTaskState"` - Status string `json:"Status" copier:"Status"` - VmState string `json:"VmState" copier:"VmState"` - OS_EXT_SRV_ATTR_Instance_Name string `json:"OS_EXT_SRV_ATTR_Instance_Name" copier:"OS_EXT_SRV_ATTR_Instance_Name"` - Created string `json:"Created" copier:"Created"` - HostId string `json:"HostId" copier:"HostId"` - Ip string `json:"Ip" copier:"Ip"` - Image string `json:"Image" copier:"Image"` - Updated string `json:"Updated" copier:"Updated"` - Flavor string `json:"Flavor" copier:"Flavor"` + Id string `json:"id" copier:"Id"` + Name string `json:"name" copier:"Name"` + OSTaskState uint32 `json:"os_task_state" copier:"OSTaskState"` + Status string `json:"status" copier:"Status"` + VmState string `json:"vm_state" copier:"VmState"` + OS_EXT_SRV_ATTR_Instance_Name string `json:"os_ext_srv_attr_instance_name" copier:"OS_EXT_SRV_ATTR_Instance_Name"` + Created string `json:"created" copier:"Created"` + HostId string `json:"hostId" copier:"HostId"` + Ip string `json:"ip" copier:"Ip"` + Image string `json:"image" copier:"Image"` + Updated string `json:"updated" copier:"Updated"` + Flavor string `json:"flavor" copier:"Flavor"` + Key_name string `json:"key_name" copier:"Key_name"` } type GetServersDetailedByIdReq struct { @@ -2983,6 +2984,44 @@ type ServerResp struct { AdminPass string `json:"adminPass" copier:"AdminPass"` } +type CreateMulServerReq struct { + CreateMulServer []CreateMulServer `json:"createMulServer,optional"` +} + +type CreateMulServer struct { + Platform string `json:"platform,optional"` + CrServer MulCrServer `json:"crserver" copier:"CrServer"` +} + +type MulCrServer struct { + Server MulServer `json:"server" copier:"Server"` +} + +type MulServer struct { + AvailabilityZone string `json:"availability_zone" copier:"AvailabilityZone"` + Name string `json:"name,optional" copier:"Name"` + FlavorRef string `json:"flavorRef,optional" copier:"FlavorRef"` + Description string `json:"description,optional" copier:"Description"` + ImageRef string `json:"imageRef,optional" copier:"ImageRef"` + Networks []CreNetwork `json:"networks,optional" copier:"Networks"` + MinCount int32 `json:"min_count,optional" copier:"MinCount"` +} + +type CreateMulServerResp struct { + Server []MulServerResp `json:"server" copier:"Server"` + Code int32 `json:"code,omitempty"` + Msg string `json:"msg,omitempty"` + ErrorMsg string `json:"errorMsg,omitempty"` +} + +type MulServerResp struct { + Id string `json:"id" copier:"Id"` + Links []Links `json:"links" copier:"Links"` + OSDCFDiskConfig string `json:"OS_DCF_diskConfig" copier:"OSDCFDiskConfig"` + SecurityGroups []Security_groups_server `json:"security_groups" copier:"SecurityGroups"` + AdminPass string `json:"adminPass" copier:"AdminPass"` +} + type RebuildServerReq struct { ServerId string `json:"server_id" copier:"ServerId"` Platform string `form:"platform,optional"` diff --git a/go.mod b/go.mod index 1717af8a..03bfbc16 100644 --- a/go.mod +++ b/go.mod @@ -1,8 +1,6 @@ module gitlink.org.cn/JointCloud/pcm-coordinator -go 1.22 - -toolchain go1.22.1 +go 1.21 require ( github.com/JCCE-nudt/zero-contrib/zrpc/registry/nacos v0.0.0-20230419021610-13bbc83fbc3c @@ -25,7 +23,7 @@ require ( github.com/rs/zerolog v1.28.0 github.com/zeromicro/go-zero v1.6.3 gitlink.org.cn/JointCloud/pcm-kubernetes v0.0.0-20240301071143-347480abff2c - gitlink.org.cn/JointCloud/pcm-openstack v0.0.0-20240401022404-2f1425735f0d + gitlink.org.cn/JointCloud/pcm-openstack v0.0.0-20240402074843-46c7d05954e6 gitlink.org.cn/JointCloud/pcm-slurm v0.0.0-20240301080743-8b94bbaf57f5 gitlink.org.cn/jcce-pcm/pcm-ac v0.0.0-20240301085553-f6ad88fa357a gitlink.org.cn/jcce-pcm/pcm-participant-ceph v0.0.0-20230904090036-24fc730ec87d @@ -164,8 +162,8 @@ require ( golang.org/x/time v0.5.0 // indirect golang.org/x/tools v0.18.0 // indirect google.golang.org/appengine v1.6.8 // indirect - google.golang.org/genproto/googleapis/api v0.0.0-20240325203815-454cdb8f5daa // indirect - google.golang.org/genproto/googleapis/rpc v0.0.0-20240325203815-454cdb8f5daa // indirect + google.golang.org/genproto/googleapis/api v0.0.0-20240401170217-c3f982113cda // indirect + google.golang.org/genproto/googleapis/rpc v0.0.0-20240401170217-c3f982113cda // indirect gopkg.in/inf.v0 v0.9.1 // indirect gopkg.in/ini.v1 v1.67.0 // indirect gopkg.in/natefinch/lumberjack.v2 v2.2.1 // indirect diff --git a/go.sum b/go.sum index 4c2e07cc..60c36c6f 100644 --- a/go.sum +++ b/go.sum @@ -464,6 +464,7 @@ github.com/cenkalti/backoff/v4 v4.3.0/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyY github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= github.com/census-instrumentation/opencensus-proto v0.3.0/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= github.com/census-instrumentation/opencensus-proto v0.4.1/go.mod h1:4T9NM4+4Vw91VeyqjLS6ao50K5bOcLKN6Q42XnYaRYw= +github.com/cespare/xxhash v1.1.0 h1:a6HrQnmkObjyL+Gs60czilIUGqrzKutQD6XZog3p+ko= github.com/cespare/xxhash v1.1.0/go.mod h1:XrSqR1VqqWfGrhpAt58auRo0WTKS1nRRg3ghfAqPWnc= github.com/cespare/xxhash/v2 v2.1.1/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/cespare/xxhash/v2 v2.1.2/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= @@ -728,6 +729,7 @@ github.com/gorilla/sessions v1.2.1/go.mod h1:dk2InVEVJ0sfLlnXv9EAgkf6ecYs/i80K/z github.com/gorilla/websocket v1.4.2/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE= github.com/gregjones/httpcache v0.0.0-20180305231024-9cad4c3443a7/go.mod h1:FecbI9+v66THATjSRHfNgh1IVFe/9kFxbXtjV0ctIMA= github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0/go.mod h1:8NvIoxWQoOIhqOTXgfV/d3M/q6VIi02HzZEHgUlZvzk= +github.com/grpc-ecosystem/grpc-gateway v1.16.0 h1:gmcG1KaJ57LophUzW0Hy8NmPhnMZb4M0+kPpLofRdBo= github.com/grpc-ecosystem/grpc-gateway v1.16.0/go.mod h1:BDjrQk3hbvj6Nolgz8mAMFbcEtjT1g+wF4CSlocrBnw= github.com/grpc-ecosystem/grpc-gateway/v2 v2.7.0/go.mod h1:hgWBS7lorOAVIJEQMi4ZsPv9hVvWI6+ch50m39Pf2Ks= github.com/grpc-ecosystem/grpc-gateway/v2 v2.11.3/go.mod h1:o//XUCC/F+yRGJoPO/VU0GSB0f8Nhgmxx0VIRUvaC0w= @@ -1079,10 +1081,10 @@ github.com/zeromicro/go-zero v1.6.3 h1:OL0NnHD5LdRNDolfcK9vUkJt7K8TcBE3RkzfM8poO github.com/zeromicro/go-zero v1.6.3/go.mod h1:XZL435ZxVi9MSXXtw2MRQhHgx6OoX3++MRMOE9xU70c= gitlink.org.cn/JointCloud/pcm-kubernetes v0.0.0-20240301071143-347480abff2c h1:2Wl/hvaSFjh6fmCSIQhjkr9llMRREQeqcXNLZ/HPY18= gitlink.org.cn/JointCloud/pcm-kubernetes v0.0.0-20240301071143-347480abff2c/go.mod h1:lSRfGs+PxFvw7CcndHWRd6UlLlGrZn0b0hp5cfaMNGw= -gitlink.org.cn/JointCloud/pcm-openstack v0.0.0-20240328020739-cbdd8f5b226b h1:suRANMHQPhKKmgdJOZcbFYDJ0NUQkUGgVvMKxw75BQI= -gitlink.org.cn/JointCloud/pcm-openstack v0.0.0-20240328020739-cbdd8f5b226b/go.mod h1:i2rrbMQ+Fve345BY9Heh4MUqVTAimZQElQhzzRee5B8= gitlink.org.cn/JointCloud/pcm-openstack v0.0.0-20240401022404-2f1425735f0d h1:ZX/Kg8eKdaAfDsTd+Y+TrJsUvxp/DpbWUp+Ij4CtR+s= gitlink.org.cn/JointCloud/pcm-openstack v0.0.0-20240401022404-2f1425735f0d/go.mod h1:i2rrbMQ+Fve345BY9Heh4MUqVTAimZQElQhzzRee5B8= +gitlink.org.cn/JointCloud/pcm-openstack v0.0.0-20240402074843-46c7d05954e6 h1:d40gT5SaARH82SWJMMOao9iJ4QxrnjswjHFmU3tCPac= +gitlink.org.cn/JointCloud/pcm-openstack v0.0.0-20240402074843-46c7d05954e6/go.mod h1:i2rrbMQ+Fve345BY9Heh4MUqVTAimZQElQhzzRee5B8= gitlink.org.cn/JointCloud/pcm-slurm v0.0.0-20240301080743-8b94bbaf57f5 h1:+/5vnzkJBfMRnya1NrhOzlroUtRa5ePiYbPKlHLoLV0= gitlink.org.cn/JointCloud/pcm-slurm v0.0.0-20240301080743-8b94bbaf57f5/go.mod h1:97AlUXN13g9UN3+9/DzCHpeoU5sbdyv0IQuTEHNexzQ= gitlink.org.cn/jcce-pcm/pcm-ac v0.0.0-20240301085553-f6ad88fa357a h1:fY1KmyZ6O7wVBvgt2HB+C9e1DncJdk/Wkv8m5Qz7abw= @@ -1094,18 +1096,12 @@ gitlink.org.cn/jcce-pcm/pcm-participant-modelarts v0.0.0-20231101085149-724c7c4c gitlink.org.cn/jcce-pcm/pcm-participant-octopus v0.0.0-20240222124813-e275cfa342f4 h1:NrxKAZ5uAzshB9EHcPw+XTOTzpxb5HslNRMYBrFC1Qo= gitlink.org.cn/jcce-pcm/pcm-participant-octopus v0.0.0-20240222124813-e275cfa342f4/go.mod h1:uyvpVqG1jHDXX+ubXI0RBwnWXzVykD/mliqGQIDvRoo= go.etcd.io/etcd/api/v3 v3.5.7/go.mod h1:9qew1gCdDDLu+VwmeG+iFpL+QlpHTo7iubavdVDgCAA= -go.etcd.io/etcd/api/v3 v3.5.12 h1:W4sw5ZoU2Juc9gBWuLk5U6fHfNVyY1WC5g9uiXZio/c= -go.etcd.io/etcd/api/v3 v3.5.12/go.mod h1:Ot+o0SWSyT6uHhA56al1oCED0JImsRiU9Dc26+C2a+4= go.etcd.io/etcd/api/v3 v3.5.13 h1:8WXU2/NBge6AUF1K1gOexB6e07NgsN1hXK0rSTtgSp4= go.etcd.io/etcd/api/v3 v3.5.13/go.mod h1:gBqlqkcMMZMVTMm4NDZloEVJzxQOQIls8splbqBDa0c= go.etcd.io/etcd/client/pkg/v3 v3.5.7/go.mod h1:o0Abi1MK86iad3YrWhgUsbGx1pmTS+hrORWc2CamuhY= -go.etcd.io/etcd/client/pkg/v3 v3.5.12 h1:EYDL6pWwyOsylrQyLp2w+HkQ46ATiOvoEdMarindU2A= -go.etcd.io/etcd/client/pkg/v3 v3.5.12/go.mod h1:seTzl2d9APP8R5Y2hFL3NVlD6qC/dOT+3kvrqPyTas4= go.etcd.io/etcd/client/pkg/v3 v3.5.13 h1:RVZSAnWWWiI5IrYAXjQorajncORbS0zI48LQlE2kQWg= go.etcd.io/etcd/client/pkg/v3 v3.5.13/go.mod h1:XxHT4u1qU12E2+po+UVPrEeL94Um6zL58ppuJWXSAB8= go.etcd.io/etcd/client/v3 v3.5.7/go.mod h1:sOWmj9DZUMyAngS7QQwCyAXXAL6WhgTOPLNS/NabQgw= -go.etcd.io/etcd/client/v3 v3.5.12 h1:v5lCPXn1pf1Uu3M4laUE2hp/geOTc5uPcYYsNe1lDxg= -go.etcd.io/etcd/client/v3 v3.5.12/go.mod h1:tSbBCakoWmmddL+BKVAJHa9km+O/E+bumDe9mSbPiqw= go.etcd.io/etcd/client/v3 v3.5.13 h1:o0fHTNJLeO0MyVbc7I3fsCf6nrOqn5d+diSarKnB2js= go.etcd.io/etcd/client/v3 v3.5.13/go.mod h1:cqiAeY8b5DEEcpxvgWKsbLIWNM/8Wy2xJSDMtioMcoI= go.mongodb.org/mongo-driver v1.11.4/go.mod h1:PTSz5yu21bkT/wXpkS7WR5f0ddqw5quethTUn9WM+2g= @@ -1755,8 +1751,12 @@ google.golang.org/genproto v0.0.0-20230110181048-76db0878b65f/go.mod h1:RGgjbofJ google.golang.org/genproto v0.0.0-20230123190316-2c411cf9d197/go.mod h1:RGgjbofJ8xD9Sq1VVhDM1Vok1vRONV+rg+CjzG4SZKM= google.golang.org/genproto/googleapis/api v0.0.0-20240325203815-454cdb8f5daa h1:Jt1XW5PaLXF1/ePZrznsh/aAUvI7Adfc3LY1dAKlzRs= google.golang.org/genproto/googleapis/api v0.0.0-20240325203815-454cdb8f5daa/go.mod h1:K4kfzHtI0kqWA79gecJarFtDn/Mls+GxQcg3Zox91Ac= +google.golang.org/genproto/googleapis/api v0.0.0-20240401170217-c3f982113cda h1:b6F6WIV4xHHD0FA4oIyzU6mHWg2WI2X1RBehwa5QN38= +google.golang.org/genproto/googleapis/api v0.0.0-20240401170217-c3f982113cda/go.mod h1:AHcE/gZH76Bk/ROZhQphlRoWo5xKDEtz3eVEO1LfA8c= google.golang.org/genproto/googleapis/rpc v0.0.0-20240325203815-454cdb8f5daa h1:RBgMaUMP+6soRkik4VoN8ojR2nex2TqZwjSSogic+eo= google.golang.org/genproto/googleapis/rpc v0.0.0-20240325203815-454cdb8f5daa/go.mod h1:WtryC6hu0hhx87FDGxWCDptyssuo68sk10vYjF+T9fY= +google.golang.org/genproto/googleapis/rpc v0.0.0-20240401170217-c3f982113cda h1:LI5DOvAxUPMv/50agcLLoo+AdWc1irS9Rzz4vPuD1V4= +google.golang.org/genproto/googleapis/rpc v0.0.0-20240401170217-c3f982113cda/go.mod h1:WtryC6hu0hhx87FDGxWCDptyssuo68sk10vYjF+T9fY= google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c= google.golang.org/grpc v1.20.1/go.mod h1:10oTOabMzJvdu6/UiuZezV6QK5dSlG84ov/aaiqXj38= google.golang.org/grpc v1.21.1/go.mod h1:oYelfM1adQP15Ek0mdvEgi9Df8B9CZIaU1084ijfRaM= From 2599f8a564afc085272faed85e661e1502f32fee Mon Sep 17 00:00:00 2001 From: qiwang <1364512070@qq.com> Date: Tue, 2 Apr 2024 16:28:47 +0800 Subject: [PATCH 10/16] feat:add create mulserver interface Former-commit-id: b917936a2027f599e1f71088de8c808a607c272d --- api/etc/pcm.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/api/etc/pcm.yaml b/api/etc/pcm.yaml index 1be194eb..79bde9f1 100644 --- a/api/etc/pcm.yaml +++ b/api/etc/pcm.yaml @@ -5,8 +5,8 @@ Port: 8999 Timeout: 50000 DB: - # DataSource: root:uJpLd6u-J?HC1@(10.206.0.12:3306)/pcm?parseTime=true&loc=Local - DataSource: root:uJpLd6u-J?HC1@(47.92.88.143:3306)/pcm?parseTime=true&loc=Local + DataSource: root:uJpLd6u-J?HC1@(10.206.0.12:3306)/pcm?parseTime=true&loc=Local + # DataSource: root:uJpLd6u-J?HC1@(47.92.88.143:3306)/pcm?parseTime=true&loc=Local Redis: Host: 10.206.0.12:6379 Pass: redisPW123 From db55f5e5a3f585c7f466416d6f533bb01db9a593 Mon Sep 17 00:00:00 2001 From: tzwang Date: Tue, 2 Apr 2024 16:53:27 +0800 Subject: [PATCH 11/16] fix getresources and dynamicResources strategy bugs Former-commit-id: fb05379230be7708ed91f5c21254af271a2b9237 --- .../logic/schedule/schedulesubmitlogic.go | 10 +++--- api/internal/scheduler/database/aiStorage.go | 5 ++- .../scheduler/schedulers/aiScheduler.go | 31 ++++++++++++++++--- .../scheduler/strategy/dynamicResources.go | 2 +- api/internal/storeLink/shuguangai.go | 30 +++++++++--------- 5 files changed, 49 insertions(+), 29 deletions(-) diff --git a/api/internal/logic/schedule/schedulesubmitlogic.go b/api/internal/logic/schedule/schedulesubmitlogic.go index a4704def..d5105390 100644 --- a/api/internal/logic/schedule/schedulesubmitlogic.go +++ b/api/internal/logic/schedule/schedulesubmitlogic.go @@ -28,11 +28,11 @@ func NewScheduleSubmitLogic(ctx context.Context, svcCtx *svc.ServiceContext) *Sc func (l *ScheduleSubmitLogic) ScheduleSubmit(req *types.ScheduleReq) (resp *types.ScheduleResp, err error) { resp = &types.ScheduleResp{} opt := &option.AiOption{ - ResourceType: req.AiOption.ResourceType, - Tops: 0, - TaskType: req.AiOption.TaskType, - DatasetsName: req.AiOption.Datasets, - AlgorithmName: "cnn", + ResourceType: req.AiOption.ResourceType, + Tops: 0, + TaskType: req.AiOption.TaskType, + DatasetsName: req.AiOption.Datasets, + //AlgorithmName: "cnn", StrategyName: req.AiOption.Strategy, ClusterToStaticWeight: nil, Params: []string{ diff --git a/api/internal/scheduler/database/aiStorage.go b/api/internal/scheduler/database/aiStorage.go index edcc5969..670aec91 100644 --- a/api/internal/scheduler/database/aiStorage.go +++ b/api/internal/scheduler/database/aiStorage.go @@ -2,7 +2,6 @@ package database import ( "github.com/zeromicro/go-zero/core/logx" - "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/strategy" "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types" "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/constants" "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/models" @@ -24,12 +23,12 @@ func (s *AiStorage) GetParticipants() (*types.ClusterListResp, error) { return &resp, nil } -func (s *AiStorage) SaveTask(cluster strategy.AssignedCluster) error { +func (s *AiStorage) SaveTask(name string) error { // 构建主任务结构体 taskModel := models.Task{ Status: constants.Saved, Description: "ai task", - Name: "testAi", + Name: name, CommitTime: time.Now(), } // 保存任务数据到数据库 diff --git a/api/internal/scheduler/schedulers/aiScheduler.go b/api/internal/scheduler/schedulers/aiScheduler.go index 4b62f427..8ff45161 100644 --- a/api/internal/scheduler/schedulers/aiScheduler.go +++ b/api/internal/scheduler/schedulers/aiScheduler.go @@ -100,6 +100,8 @@ func (as *AiScheduler) AssignTask(clusters []*strategy.AssignedCluster) ([]inter return nil, errors.New("clusters is nil") } + //res := struct { + //}{} var wg sync.WaitGroup var result []interface{} var errs []error @@ -115,6 +117,7 @@ func (as *AiScheduler) AssignTask(clusters []*strategy.AssignedCluster) ([]inter wg.Add(1) go func() { resp, err := executorMap[c.Name].Execute(as.ctx, as.option) + if err != nil { // TODO: database operation errCh <- err @@ -122,15 +125,20 @@ func (as *AiScheduler) AssignTask(clusters []*strategy.AssignedCluster) ([]inter return } // TODO: database operation - ch <- resp + data := struct { + Resp interface{} + ClusterId int64 + }{ + Resp: resp, + ClusterId: c.ParticipantId, + } + ch <- data wg.Done() }() } wg.Wait() - - for s := range ch { - result = append(result, s) - } + close(ch) + close(errCh) for e := range errCh { errs = append(errs, e) @@ -140,6 +148,19 @@ func (as *AiScheduler) AssignTask(clusters []*strategy.AssignedCluster) ([]inter return nil, errors.New("submit task failed") } + for s := range ch { + data := (s).(struct { + Resp interface{} + ClusterId int64 + }) + + result = append(result, data.Resp) + } + + err := as.AiStorages.SaveTask(as.option.TaskName) + if err != nil { + return nil, err + } return result, nil } diff --git a/api/internal/scheduler/strategy/dynamicResources.go b/api/internal/scheduler/strategy/dynamicResources.go index c8d4052f..bfb78263 100644 --- a/api/internal/scheduler/strategy/dynamicResources.go +++ b/api/internal/scheduler/strategy/dynamicResources.go @@ -49,7 +49,7 @@ func (ps *DynamicResourcesStrategy) Schedule() ([]*AssignedCluster, error) { if opt.ResourceType == "computeCard" { var maxCurrentCardHours float64 for _, card := range res.CardsAvail { - cardHours := common.RoundFloat(card.TOpsAtFp16*card.CardHours, 3) + cardHours := common.RoundFloat( /*card.TOpsAtFp16**/ card.CardHours, 3) if cardHours > maxCurrentCardHours { maxCurrentCardHours = cardHours } diff --git a/api/internal/storeLink/shuguangai.go b/api/internal/storeLink/shuguangai.go index e7da3222..a108d8b7 100644 --- a/api/internal/storeLink/shuguangai.go +++ b/api/internal/storeLink/shuguangai.go @@ -284,14 +284,14 @@ func (s *ShuguangAi) GetResourceStats(ctx context.Context) (*collector.ResourceS totalDcu := limitResp.Data.AccountMaxDcu //disk - diskReq := &hpcAC.ParaStorQuotaReq{} - diskResp, err := s.aCRpc.ParaStorQuota(ctx, diskReq) - if err != nil { - return nil, err - } - - totalDisk := common.RoundFloat(diskResp.Data[0].Threshold*KB*KB*KB, 3) - availDisk := common.RoundFloat((diskResp.Data[0].Threshold-diskResp.Data[0].Usage)*KB*KB*KB, 3) + //diskReq := &hpcAC.ParaStorQuotaReq{} + //diskResp, err := s.aCRpc.ParaStorQuota(ctx, diskReq) + //if err != nil { + // return nil, err + //} + // + //totalDisk := common.RoundFloat(diskResp.Data[0].Threshold*KB*KB*KB, 3) + //availDisk := common.RoundFloat((diskResp.Data[0].Threshold-diskResp.Data[0].Usage)*KB*KB*KB, 3) //memory nodeResp, err := s.aCRpc.GetNodeResources(ctx, nil) @@ -349,12 +349,12 @@ func (s *ShuguangAi) GetResourceStats(ctx context.Context) (*collector.ResourceS Balance: balance, CpuCoreTotal: totalCpu, CpuCoreAvail: CpuCoreAvail, - DiskTotal: totalDisk, - DiskAvail: availDisk, - MemTotal: memSize, - MemAvail: MemAvail, - CpuCoreHours: cpuHours, - CardsAvail: cards, + //DiskTotal: totalDisk, + //DiskAvail: availDisk, + MemTotal: memSize, + MemAvail: MemAvail, + CpuCoreHours: cpuHours, + CardsAvail: cards, } return resourceStats, nil @@ -381,7 +381,7 @@ func (s *ShuguangAi) GetAlgorithms(ctx context.Context) ([]*collector.Algorithm, var algorithms []*collector.Algorithm for _, t := range GetTaskTypes() { taskType := t - req := &hpcAC.GetFileListReq{Limit: 100, Path: ALGORITHM_DIR + FORWARD_SLASH + taskType, Start: 0} + req := &hpcAC.GetFileListReq{Limit: 100, Path: ALGORITHM_DIR + FORWARD_SLASH + taskType, Start: 0, Order: "asc", OrderBy: "name", KeyWord: ""} list, err := s.aCRpc.GetFileList(ctx, req) if err != nil { return nil, err From dbe2363339b60661a000b0cfb1dcaf561c3ca5fd Mon Sep 17 00:00:00 2001 From: tzwang Date: Tue, 2 Apr 2024 18:35:42 +0800 Subject: [PATCH 12/16] updated type convert function Former-commit-id: c1a500ed22fc2922eb4d37d3651cca180c73f948 --- .../logic/schedule/schedulesubmitlogic.go | 43 ++++++++++++++----- .../scheduler/schedulers/aiScheduler.go | 13 ++---- api/internal/storeLink/storeLink.go | 17 ++++++++ 3 files changed, 54 insertions(+), 19 deletions(-) diff --git a/api/internal/logic/schedule/schedulesubmitlogic.go b/api/internal/logic/schedule/schedulesubmitlogic.go index d5105390..8dbccdae 100644 --- a/api/internal/logic/schedule/schedulesubmitlogic.go +++ b/api/internal/logic/schedule/schedulesubmitlogic.go @@ -4,6 +4,7 @@ import ( "context" "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/schedulers" "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/schedulers/option" + "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/storeLink" "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc" "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types" @@ -28,26 +29,48 @@ func NewScheduleSubmitLogic(ctx context.Context, svcCtx *svc.ServiceContext) *Sc func (l *ScheduleSubmitLogic) ScheduleSubmit(req *types.ScheduleReq) (resp *types.ScheduleResp, err error) { resp = &types.ScheduleResp{} opt := &option.AiOption{ - ResourceType: req.AiOption.ResourceType, - Tops: 0, - TaskType: req.AiOption.TaskType, - DatasetsName: req.AiOption.Datasets, - //AlgorithmName: "cnn", + ResourceType: req.AiOption.ResourceType, + Tops: req.AiOption.Tops, + TaskType: req.AiOption.TaskType, + DatasetsName: req.AiOption.Datasets, + AlgorithmName: req.AiOption.Algorithm, StrategyName: req.AiOption.Strategy, - ClusterToStaticWeight: nil, - Params: []string{ - "epoch,1", - }, + ClusterToStaticWeight: req.AiOption.StaticWeightMap, + Params: req.AiOption.Params, + Envs: req.AiOption.Envs, + Cmd: req.AiOption.Cmd, } aiSchdl, err := schedulers.NewAiScheduler(l.ctx, "", l.svcCtx.Scheduler, opt) if err != nil { return nil, err } - _, err = l.svcCtx.Scheduler.AssignAndSchedule(aiSchdl) + results, err := l.svcCtx.Scheduler.AssignAndSchedule(aiSchdl) if err != nil { return nil, err } + //转换成统一返回类型 + for _, r := range results { + sResp, err := storeLink.ConvertType(r, resp, nil) + if err != nil { + return nil, err + } + + if sResp.(*types.ScheduleResp).ErrorMsg != "" { + resp.ErrorMsg = sResp.(*types.ScheduleResp).ErrorMsg + "\n" + } + } + + if resp.ErrorMsg == "" { + resp.Success = true + err = l.svcCtx.Scheduler.AiStorages.SaveTask(req.AiOption.TaskName) + if err != nil { + return nil, err + } + } else { + resp.Success = false + } + return resp, nil } diff --git a/api/internal/scheduler/schedulers/aiScheduler.go b/api/internal/scheduler/schedulers/aiScheduler.go index 8ff45161..a9e73f17 100644 --- a/api/internal/scheduler/schedulers/aiScheduler.go +++ b/api/internal/scheduler/schedulers/aiScheduler.go @@ -100,8 +100,6 @@ func (as *AiScheduler) AssignTask(clusters []*strategy.AssignedCluster) ([]inter return nil, errors.New("clusters is nil") } - //res := struct { - //}{} var wg sync.WaitGroup var result []interface{} var errs []error @@ -116,15 +114,16 @@ func (as *AiScheduler) AssignTask(clusters []*strategy.AssignedCluster) ([]inter } wg.Add(1) go func() { + //var resp interface{} + //var err error resp, err := executorMap[c.Name].Execute(as.ctx, as.option) if err != nil { - // TODO: database operation errCh <- err wg.Done() return } - // TODO: database operation + data := struct { Resp interface{} ClusterId int64 @@ -153,14 +152,10 @@ func (as *AiScheduler) AssignTask(clusters []*strategy.AssignedCluster) ([]inter Resp interface{} ClusterId int64 }) - + // TODO: database operation result = append(result, data.Resp) } - err := as.AiStorages.SaveTask(as.option.TaskName) - if err != nil { - return nil, err - } return result, nil } diff --git a/api/internal/storeLink/storeLink.go b/api/internal/storeLink/storeLink.go index ce00a540..b3c0ba2d 100644 --- a/api/internal/storeLink/storeLink.go +++ b/api/internal/storeLink/storeLink.go @@ -370,6 +370,14 @@ func ConvertType(in interface{}, out interface{}, participant *models.StorelinkC resp.TaskId = inresp.Payload.JobId + return resp, nil + case *types.ScheduleResp: + resp := (interface{})(out).(*types.ScheduleResp) + resp.Success = inresp.Success + if !resp.Success { + resp.ErrorMsg = inresp.Error.Message + return resp, nil + } return resp, nil } return nil, nil @@ -404,6 +412,15 @@ func ConvertType(in interface{}, out interface{}, participant *models.StorelinkC resp.ErrorMsg = inresp.Msg } return resp, nil + case *types.ScheduleResp: + resp := (interface{})(out).(*types.ScheduleResp) + if inresp.Code == "0" { + resp.Success = true + } else { + resp.Success = false + resp.ErrorMsg = inresp.Msg + } + return resp, nil } return nil, nil From 6ad1cbfcfbc32ee4db73a4e639c49bbcd0072076 Mon Sep 17 00:00:00 2001 From: qiwang <1364512070@qq.com> Date: Wed, 3 Apr 2024 14:48:44 +0800 Subject: [PATCH 13/16] feat:Add the survival_time field Former-commit-id: fbb71d90bcbd61e6fd109f49bcd077c9bb9fc3c2 --- api/desc/vm/pcm-vm.api | 1 + api/etc/pcm.yaml | 4 ++-- api/internal/logic/vm/listserversdetailedlogic.go | 13 +++---------- api/internal/types/types.go | 1 + go.mod | 2 +- go.sum | 2 ++ 6 files changed, 10 insertions(+), 13 deletions(-) diff --git a/api/desc/vm/pcm-vm.api b/api/desc/vm/pcm-vm.api index fb21c68c..28062302 100644 --- a/api/desc/vm/pcm-vm.api +++ b/api/desc/vm/pcm-vm.api @@ -134,6 +134,7 @@ type ( Updated string `json:"updated" copier:"Updated"` Flavor string `json:"flavor" copier:"Flavor"` Key_name string `json:"key_name" copier:"Key_name"` + Survival_time int32 `json:"survival_time" copier:"Survival_time"` } ) diff --git a/api/etc/pcm.yaml b/api/etc/pcm.yaml index 79bde9f1..2cb4aef9 100644 --- a/api/etc/pcm.yaml +++ b/api/etc/pcm.yaml @@ -5,8 +5,8 @@ Port: 8999 Timeout: 50000 DB: - DataSource: root:uJpLd6u-J?HC1@(10.206.0.12:3306)/pcm?parseTime=true&loc=Local - # DataSource: root:uJpLd6u-J?HC1@(47.92.88.143:3306)/pcm?parseTime=true&loc=Local + #DataSource: root:uJpLd6u-J?HC1@(10.206.0.12:3306)/pcm?parseTime=true&loc=Local + DataSource: root:uJpLd6u-J?HC1@(47.92.88.143:3306)/pcm?parseTime=true&loc=Local Redis: Host: 10.206.0.12:6379 Pass: redisPW123 diff --git a/api/internal/logic/vm/listserversdetailedlogic.go b/api/internal/logic/vm/listserversdetailedlogic.go index 63fdbe80..79b217e5 100644 --- a/api/internal/logic/vm/listserversdetailedlogic.go +++ b/api/internal/logic/vm/listserversdetailedlogic.go @@ -18,15 +18,12 @@ import ( "context" "github.com/jinzhu/copier" "github.com/pkg/errors" + "github.com/zeromicro/go-zero/core/logx" "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc" "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types" "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/helper/xerr" - error2 "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/repository/error" "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/utils" "gitlink.org.cn/JointCloud/pcm-openstack/openstack" - "k8s.io/apimachinery/pkg/util/json" - - "github.com/zeromicro/go-zero/core/logx" ) type ListServersDetailedLogic struct { @@ -44,18 +41,14 @@ func NewListServersDetailedLogic(ctx context.Context, svcCtx *svc.ServiceContext } func (l *ListServersDetailedLogic) ListServersDetailed(req *types.ListServersDetailedReq) (resp *types.ListServersDetailedResp, err error) { - // todo: add your logic here and delete this line + resp = &types.ListServersDetailedResp{} ListServersDetailedReq := &openstack.ListServersDetailedReq{} err = copier.CopyWithOption(ListServersDetailedReq, req, copier.Option{Converters: utils.Converters}) ListServersDetailedResp, err := l.svcCtx.OpenstackRpc.ListServersDetailed(l.ctx, ListServersDetailedReq) if err != nil { return nil, errors.Wrapf(xerr.NewErrMsg("Failed to get Servers list"), "Failed to get db Servers list err : %v ,req:%+v", err, req) } - marshal, err := json.Marshal(&ListServersDetailedResp) - if err != nil { - return nil, error2.NewDefaultError(err.Error()) - } - json.Unmarshal(marshal, &resp) + utils.Convert(&ListServersDetailedResp.Servers, &resp.ServersDetailed) err = copier.CopyWithOption(&resp, &ListServersDetailedResp, copier.Option{Converters: utils.Converters}) return resp, err diff --git a/api/internal/types/types.go b/api/internal/types/types.go index 2fa9ef29..819b0367 100644 --- a/api/internal/types/types.go +++ b/api/internal/types/types.go @@ -2748,6 +2748,7 @@ type ServersDetailed struct { Updated string `json:"updated" copier:"Updated"` Flavor string `json:"flavor" copier:"Flavor"` Key_name string `json:"key_name" copier:"Key_name"` + Survival_time int32 `json:"survival_time" copier:"Survival_time"` } type GetServersDetailedByIdReq struct { diff --git a/go.mod b/go.mod index 03bfbc16..53068e7a 100644 --- a/go.mod +++ b/go.mod @@ -23,7 +23,7 @@ require ( github.com/rs/zerolog v1.28.0 github.com/zeromicro/go-zero v1.6.3 gitlink.org.cn/JointCloud/pcm-kubernetes v0.0.0-20240301071143-347480abff2c - gitlink.org.cn/JointCloud/pcm-openstack v0.0.0-20240402074843-46c7d05954e6 + gitlink.org.cn/JointCloud/pcm-openstack v0.0.0-20240403033338-e7edabad4203 gitlink.org.cn/JointCloud/pcm-slurm v0.0.0-20240301080743-8b94bbaf57f5 gitlink.org.cn/jcce-pcm/pcm-ac v0.0.0-20240301085553-f6ad88fa357a gitlink.org.cn/jcce-pcm/pcm-participant-ceph v0.0.0-20230904090036-24fc730ec87d diff --git a/go.sum b/go.sum index 60c36c6f..e4a29585 100644 --- a/go.sum +++ b/go.sum @@ -1085,6 +1085,8 @@ gitlink.org.cn/JointCloud/pcm-openstack v0.0.0-20240401022404-2f1425735f0d h1:ZX gitlink.org.cn/JointCloud/pcm-openstack v0.0.0-20240401022404-2f1425735f0d/go.mod h1:i2rrbMQ+Fve345BY9Heh4MUqVTAimZQElQhzzRee5B8= gitlink.org.cn/JointCloud/pcm-openstack v0.0.0-20240402074843-46c7d05954e6 h1:d40gT5SaARH82SWJMMOao9iJ4QxrnjswjHFmU3tCPac= gitlink.org.cn/JointCloud/pcm-openstack v0.0.0-20240402074843-46c7d05954e6/go.mod h1:i2rrbMQ+Fve345BY9Heh4MUqVTAimZQElQhzzRee5B8= +gitlink.org.cn/JointCloud/pcm-openstack v0.0.0-20240403033338-e7edabad4203 h1:s6PsZ1+bev294IWdZRlV7mnOwI1+UzFcldVW/BqhQzI= +gitlink.org.cn/JointCloud/pcm-openstack v0.0.0-20240403033338-e7edabad4203/go.mod h1:i2rrbMQ+Fve345BY9Heh4MUqVTAimZQElQhzzRee5B8= gitlink.org.cn/JointCloud/pcm-slurm v0.0.0-20240301080743-8b94bbaf57f5 h1:+/5vnzkJBfMRnya1NrhOzlroUtRa5ePiYbPKlHLoLV0= gitlink.org.cn/JointCloud/pcm-slurm v0.0.0-20240301080743-8b94bbaf57f5/go.mod h1:97AlUXN13g9UN3+9/DzCHpeoU5sbdyv0IQuTEHNexzQ= gitlink.org.cn/jcce-pcm/pcm-ac v0.0.0-20240301085553-f6ad88fa357a h1:fY1KmyZ6O7wVBvgt2HB+C9e1DncJdk/Wkv8m5Qz7abw= From c57cbc89ed046cba1ba615c3aec45e0adbe8fe5e Mon Sep 17 00:00:00 2001 From: qiwang <1364512070@qq.com> Date: Wed, 3 Apr 2024 14:52:32 +0800 Subject: [PATCH 14/16] feat:Add the survival_time field Former-commit-id: 60101077836282000110003c176979638ad5a6ad --- api/etc/pcm.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/api/etc/pcm.yaml b/api/etc/pcm.yaml index 2cb4aef9..79bde9f1 100644 --- a/api/etc/pcm.yaml +++ b/api/etc/pcm.yaml @@ -5,8 +5,8 @@ Port: 8999 Timeout: 50000 DB: - #DataSource: root:uJpLd6u-J?HC1@(10.206.0.12:3306)/pcm?parseTime=true&loc=Local - DataSource: root:uJpLd6u-J?HC1@(47.92.88.143:3306)/pcm?parseTime=true&loc=Local + DataSource: root:uJpLd6u-J?HC1@(10.206.0.12:3306)/pcm?parseTime=true&loc=Local + # DataSource: root:uJpLd6u-J?HC1@(47.92.88.143:3306)/pcm?parseTime=true&loc=Local Redis: Host: 10.206.0.12:6379 Pass: redisPW123 From cdd7fe9dca810ce3653fbbe862ba115be6187ac2 Mon Sep 17 00:00:00 2001 From: tzwang Date: Sun, 7 Apr 2024 16:50:30 +0800 Subject: [PATCH 15/16] updated schedule submit task api logic Former-commit-id: 72fe516fb25e2feb760391799265642341d16277 --- api/desc/schedule/pcm-schedule.api | 10 ++- .../logic/schedule/schedulesubmitlogic.go | 15 +--- .../scheduler/schedulers/aiScheduler.go | 81 ++++++++++++++----- .../schedulers/option/cloudOption.go | 9 +++ .../scheduler/strategy/dynamicResources.go | 2 + 5 files changed, 85 insertions(+), 32 deletions(-) create mode 100644 api/internal/scheduler/schedulers/option/cloudOption.go diff --git a/api/desc/schedule/pcm-schedule.api b/api/desc/schedule/pcm-schedule.api index 72ba71a7..eafba3de 100644 --- a/api/desc/schedule/pcm-schedule.api +++ b/api/desc/schedule/pcm-schedule.api @@ -13,8 +13,14 @@ type ( } ScheduleResp { - Success bool `json:"success"` - ErrorMsg string `json:"errorMsg"` + Results []*ScheduleResult `json:"results"` + } + + ScheduleResult { + ClusterId string `json:"clusterId"` + TaskId string `json:"taskId"` + Replica int32 `json:"replica"` + Msg string `json:"msg"` } AiOption { diff --git a/api/internal/logic/schedule/schedulesubmitlogic.go b/api/internal/logic/schedule/schedulesubmitlogic.go index 8dbccdae..074d31c0 100644 --- a/api/internal/logic/schedule/schedulesubmitlogic.go +++ b/api/internal/logic/schedule/schedulesubmitlogic.go @@ -4,8 +4,6 @@ import ( "context" "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/schedulers" "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/schedulers/option" - "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/storeLink" - "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc" "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types" @@ -50,15 +48,10 @@ func (l *ScheduleSubmitLogic) ScheduleSubmit(req *types.ScheduleReq) (resp *type return nil, err } - //转换成统一返回类型 - for _, r := range results { - sResp, err := storeLink.ConvertType(r, resp, nil) - if err != nil { - return nil, err - } - - if sResp.(*types.ScheduleResp).ErrorMsg != "" { - resp.ErrorMsg = sResp.(*types.ScheduleResp).ErrorMsg + "\n" + switch opt.GetOptionType() { + case option.AI: + for _, result := range results { + _ = (result).(*schedulers.AiResult) } } diff --git a/api/internal/scheduler/schedulers/aiScheduler.go b/api/internal/scheduler/schedulers/aiScheduler.go index a9e73f17..0533f22c 100644 --- a/api/internal/scheduler/schedulers/aiScheduler.go +++ b/api/internal/scheduler/schedulers/aiScheduler.go @@ -16,6 +16,7 @@ package schedulers import ( "context" + "encoding/json" "errors" "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler" "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/schedulers/option" @@ -25,6 +26,9 @@ import ( "gitlink.org.cn/JointCloud/pcm-coordinator/api/pkg/response" "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/models" "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/utils" + "gitlink.org.cn/jcce-pcm/pcm-ac/hpcAC" + "gitlink.org.cn/jcce-pcm/pcm-participant-octopus/octopus" + "strconv" "sync" ) @@ -36,6 +40,13 @@ type AiScheduler struct { ctx context.Context } +type AiResult struct { + taskId string + clusterId string + replica int32 + msg string +} + func NewAiScheduler(ctx context.Context, val string, scheduler *scheduler.Scheduler, option *option.AiOption) (*AiScheduler, error) { return &AiScheduler{ctx: ctx, yamlString: val, Scheduler: scheduler, option: option}, nil } @@ -101,9 +112,9 @@ func (as *AiScheduler) AssignTask(clusters []*strategy.AssignedCluster) ([]inter } var wg sync.WaitGroup - var result []interface{} + var results []interface{} var errs []error - var ch = make(chan interface{}, len(clusters)) + var ch = make(chan *AiResult, len(clusters)) var errCh = make(chan error, len(clusters)) executorMap := *as.AiExecutor @@ -114,9 +125,8 @@ func (as *AiScheduler) AssignTask(clusters []*strategy.AssignedCluster) ([]inter } wg.Add(1) go func() { - //var resp interface{} - //var err error - resp, err := executorMap[c.Name].Execute(as.ctx, as.option) + opt, _ := cloneAiOption(as.option) + resp, err := executorMap[c.Name].Execute(as.ctx, opt) if err != nil { errCh <- err @@ -124,14 +134,11 @@ func (as *AiScheduler) AssignTask(clusters []*strategy.AssignedCluster) ([]inter return } - data := struct { - Resp interface{} - ClusterId int64 - }{ - Resp: resp, - ClusterId: c.ParticipantId, - } - ch <- data + result, _ := convertType(resp) + result.replica = c.Replicas + result.clusterId = strconv.FormatInt(c.ParticipantId, 10) + + ch <- result wg.Done() }() } @@ -148,15 +155,11 @@ func (as *AiScheduler) AssignTask(clusters []*strategy.AssignedCluster) ([]inter } for s := range ch { - data := (s).(struct { - Resp interface{} - ClusterId int64 - }) // TODO: database operation - result = append(result, data.Resp) + results = append(results, s) } - return result, nil + return results, nil } func (as *AiScheduler) findClustersWithResources() ([]*collector.ResourceStats, error) { @@ -202,3 +205,43 @@ func (as *AiScheduler) findClustersWithResources() ([]*collector.ResourceStats, } return resourceSpecs, nil } + +func convertType(in interface{}) (*AiResult, error) { + var result AiResult + switch (in).(type) { + case *hpcAC.SubmitTaskAiResp: + resp := (in).(*hpcAC.SubmitTaskAiResp) + if resp.Code == "0" { + result.taskId = resp.Data + } else { + result.msg = resp.Msg + } + return &result, nil + case *octopus.CreateTrainJobResp: + resp := (in).(*octopus.CreateTrainJobResp) + + if resp.Success { + result.taskId = resp.Payload.JobId + } else { + result.msg = resp.Error.Message + } + + return &result, nil + default: + return nil, errors.New("ai task response failed") + } +} + +func cloneAiOption(opt *option.AiOption) (*option.AiOption, error) { + origJSON, err := json.Marshal(opt) + if err != nil { + return nil, err + } + + clone := option.AiOption{} + if err = json.Unmarshal(origJSON, &clone); err != nil { + return nil, err + } + + return &clone, nil +} diff --git a/api/internal/scheduler/schedulers/option/cloudOption.go b/api/internal/scheduler/schedulers/option/cloudOption.go new file mode 100644 index 00000000..fe6274ed --- /dev/null +++ b/api/internal/scheduler/schedulers/option/cloudOption.go @@ -0,0 +1,9 @@ +package option + +type CloudOption struct { + task interface{} +} + +func (c CloudOption) GetOptionType() string { + return CLOUD +} diff --git a/api/internal/scheduler/strategy/dynamicResources.go b/api/internal/scheduler/strategy/dynamicResources.go index bfb78263..cb1d4922 100644 --- a/api/internal/scheduler/strategy/dynamicResources.go +++ b/api/internal/scheduler/strategy/dynamicResources.go @@ -64,6 +64,8 @@ func (ps *DynamicResourcesStrategy) Schedule() ([]*AssignedCluster, error) { } results = append(results, &assignedCluster) return results, nil + case option.CLOUD: + } return nil, errors.New("failed to apply DynamicResourcesStrategy") From 74789e940daaa852fa120202b9164fd6d198d4bd Mon Sep 17 00:00:00 2001 From: Jake <450705171@qq.com> Date: Sun, 7 Apr 2024 16:57:54 +0800 Subject: [PATCH 16/16] goctl generate types Former-commit-id: 36cf4966933f78d170ae6a43f4af3f4d100c803f --- api/internal/types/types.go | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/api/internal/types/types.go b/api/internal/types/types.go index d055cc65..65aa7de6 100644 --- a/api/internal/types/types.go +++ b/api/internal/types/types.go @@ -5266,8 +5266,14 @@ type ScheduleReq struct { } type ScheduleResp struct { - Success bool `json:"success"` - ErrorMsg string `json:"errorMsg"` + Results []*ScheduleResult `json:"results"` +} + +type ScheduleResult struct { + ClusterId string `json:"clusterId"` + TaskId string `json:"taskId"` + Replica int32 `json:"replica"` + Msg string `json:"msg"` } type AiOption struct {