From c6b6692a27d8be1d06c27f1b909e6cba58e32b2c Mon Sep 17 00:00:00 2001 From: tzwang Date: Wed, 17 Jan 2024 17:07:04 +0800 Subject: [PATCH 01/18] scheduler refactor updated3 Former-commit-id: 651ce567abb9ad90e4d037eb9dca493d2fbf1d3a --- pkg/scheduler/database/cloudStorage.go | 22 +++++++++------ pkg/scheduler/database/storage.go | 6 ++-- pkg/scheduler/entity/entity.go | 13 +++++++++ .../strategies/replicationStrategy.go | 28 +++++++++++++++++-- 4 files changed, 56 insertions(+), 13 deletions(-) create mode 100644 pkg/scheduler/entity/entity.go diff --git a/pkg/scheduler/database/cloudStorage.go b/pkg/scheduler/database/cloudStorage.go index 929ba159..d6e0e022 100644 --- a/pkg/scheduler/database/cloudStorage.go +++ b/pkg/scheduler/database/cloudStorage.go @@ -1,6 +1,9 @@ package database -import "gorm.io/gorm" +import ( + "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/scheduler/entity" + "gorm.io/gorm" +) type CloudStorage struct { dbEngin *gorm.DB @@ -10,8 +13,8 @@ func NewCloudStorage(dbEngin *gorm.DB) *CloudStorage { return &CloudStorage{dbEngin: dbEngin} } -func (c *CloudStorage) GetProviderParams() ([]providerParams, error) { - var proParams []providerParams +func (c *CloudStorage) GetProviderParams() ([]entity.ProviderParams, error) { + var proParams []entity.ProviderParams sqlstr := "SELECT SUM(a.disk_avail) as disk_avail,SUM(a.mem_avail) as mem_avail,SUM(a.cpu_total * a.cpu_usable) as cpu_avail,participant_id from (SELECT * from sc_node_avail_info where created_time in (SELECT MAX(created_time) as time from sc_node_avail_info where deleted_flag = 0 GROUP BY participant_id,node_name)) a GROUP BY a.participant_id" c.dbEngin.Raw(sqlstr).Scan(&proParams) if len(proParams) == 0 { @@ -20,9 +23,12 @@ func (c *CloudStorage) GetProviderParams() ([]providerParams, error) { return proParams, nil } -type providerParams struct { - Disk_avail float64 - Mem_avail float64 - Cpu_avail float64 - Participant_id int64 +func (c *CloudStorage) FindAvailableParticipants() ([]entity.Participant, error) { + var parts []entity.Participant + sqlstr := "select id as participant_id, name as name from sc_participant_phy_info" + c.dbEngin.Raw(sqlstr).Scan(&parts) + if len(parts) == 0 { + return nil, nil + } + return parts, nil } diff --git a/pkg/scheduler/database/storage.go b/pkg/scheduler/database/storage.go index 7c293d77..d2371f59 100644 --- a/pkg/scheduler/database/storage.go +++ b/pkg/scheduler/database/storage.go @@ -1,8 +1,10 @@ package database +import "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/scheduler/entity" + type Storage interface { - GetProviderParams() ([]providerParams, error) - FindAvailableCluster() + GetProviderParams() ([]entity.ProviderParams, error) + FindAvailableParticipants() ([]entity.Participant, error) } type NSIDSpecified interface { diff --git a/pkg/scheduler/entity/entity.go b/pkg/scheduler/entity/entity.go new file mode 100644 index 00000000..33e48dba --- /dev/null +++ b/pkg/scheduler/entity/entity.go @@ -0,0 +1,13 @@ +package entity + +type ProviderParams struct { + Disk_avail float64 + Mem_avail float64 + Cpu_avail float64 + Participant_id int64 +} + +type Participant struct { + Name string + Participant_id int64 +} diff --git a/pkg/scheduler/strategies/replicationStrategy.go b/pkg/scheduler/strategies/replicationStrategy.go index 569fad69..2a699c5a 100644 --- a/pkg/scheduler/strategies/replicationStrategy.go +++ b/pkg/scheduler/strategies/replicationStrategy.go @@ -1,12 +1,34 @@ package strategies +import ( + "github.com/pkg/errors" + "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/scheduler/entity" +) + type ReplicationStrategy struct { + replicas int32 + participants []entity.Participant } -func (ps *ReplicationStrategy) findAvailableCLuster() error { - return nil +func NewReplicationStrategy(participants []entity.Participant, replicas int32) *ReplicationStrategy { + return &ReplicationStrategy{replicas: replicas, + participants: participants, + } } func (ps *ReplicationStrategy) Schedule() ([]*AssignedCluster, error) { - return nil, nil + if ps.replicas < 1 { + return nil, errors.New("replicas must be greater than 0") + } + + if ps.participants == nil { + return nil, errors.New("participantId must be set") + } + + var results []*AssignedCluster + for _, p := range ps.participants { + cluster := &AssignedCluster{ParticipantId: p.Participant_id, Name: p.Name, Replicas: ps.replicas} + results = append(results, cluster) + } + return results, nil } From aac4ee3c67620982154968f6cb6aa8b2c555302a Mon Sep 17 00:00:00 2001 From: tzwang Date: Fri, 19 Jan 2024 17:31:57 +0800 Subject: [PATCH 02/18] scheduler refactor updated Former-commit-id: 3c4f190ce82f0a0c48533548a67fd23bf7f1c3ea --- pkg/scheduler/aiScheduler.go | 4 ++++ pkg/scheduler/cloudScheduler.go | 4 ++++ pkg/scheduler/collector/acCollector.go | 13 +++++++++++++ pkg/scheduler/collector/aiCollector.go | 8 -------- pkg/scheduler/collector/collector.go | 13 ++++++++++++- pkg/scheduler/common.go | 1 + pkg/scheduler/hpcScheduler.go | 4 ++++ pkg/scheduler/scheduler.go | 7 ++++++- .../{replicationStrategy.go => replication.go} | 0 .../{priceBasedStrategy.go => resourcePricing.go} | 0 pkg/scheduler/strategies/staticWeight.go | 10 ++++++++++ 11 files changed, 54 insertions(+), 10 deletions(-) create mode 100644 pkg/scheduler/collector/acCollector.go delete mode 100644 pkg/scheduler/collector/aiCollector.go rename pkg/scheduler/strategies/{replicationStrategy.go => replication.go} (100%) rename pkg/scheduler/strategies/{priceBasedStrategy.go => resourcePricing.go} (100%) create mode 100644 pkg/scheduler/strategies/staticWeight.go diff --git a/pkg/scheduler/aiScheduler.go b/pkg/scheduler/aiScheduler.go index cbec54f2..52708f30 100644 --- a/pkg/scheduler/aiScheduler.go +++ b/pkg/scheduler/aiScheduler.go @@ -52,3 +52,7 @@ func (as *AiScheduler) pickOptimalStrategy() (strategies.Strategy, error) { func (as *AiScheduler) genTaskAndProviders() (*providerPricing.Task, []*providerPricing.Provider) { return nil, nil } + +func (as *AiScheduler) assignTask(clusters []*strategies.AssignedCluster) error { + return nil +} diff --git a/pkg/scheduler/cloudScheduler.go b/pkg/scheduler/cloudScheduler.go index 37bd9ca6..95121182 100644 --- a/pkg/scheduler/cloudScheduler.go +++ b/pkg/scheduler/cloudScheduler.go @@ -116,3 +116,7 @@ func (cs *CloudScheduler) genTaskAndProviders() (*providerPricing.Task, []*provi return nil, providerList, nil } + +func (cs *CloudScheduler) assignTask(clusters []*strategies.AssignedCluster) error { + return nil +} diff --git a/pkg/scheduler/collector/acCollector.go b/pkg/scheduler/collector/acCollector.go new file mode 100644 index 00000000..fdeee694 --- /dev/null +++ b/pkg/scheduler/collector/acCollector.go @@ -0,0 +1,13 @@ +package collector + +//单条作业费=作业运行秒数×(CPU核心数*CPU单价+GPU卡数×GPU单价+DCU卡数×DCU单价)/3600 +//CPU单价=队列CPU费率×计算中心CPU单价 +//GPU单价=队列GPU费率×计算中心GPU单价 +//DCU单价=队列DCU费率×计算中心DCU单价 + +type ShuguangAiCollector struct { +} + +func (a *ShuguangAiCollector) getResourceSpecs() { + +} diff --git a/pkg/scheduler/collector/aiCollector.go b/pkg/scheduler/collector/aiCollector.go deleted file mode 100644 index e383cba3..00000000 --- a/pkg/scheduler/collector/aiCollector.go +++ /dev/null @@ -1,8 +0,0 @@ -package collector - -type AiCollector struct { -} - -func (a *AiCollector) getResourceSpecs() { - -} diff --git a/pkg/scheduler/collector/collector.go b/pkg/scheduler/collector/collector.go index 7e0739c5..73bcffd0 100644 --- a/pkg/scheduler/collector/collector.go +++ b/pkg/scheduler/collector/collector.go @@ -1,8 +1,19 @@ package collector type ResourceCollector interface { - getResourceSpecs() + getResourceSpecs() ([]ResourceSpecs, error) } type ResourceSpecs struct { + CpuAvail float64 + MemAvail float64 + DiskAvail float64 + GpuAvail float64 + CardAvail []Card +} + +type Card struct { + Type string + Name string + TOpsAtFp16 float64 } diff --git a/pkg/scheduler/common.go b/pkg/scheduler/common.go index 62270a92..7b2cab8e 100644 --- a/pkg/scheduler/common.go +++ b/pkg/scheduler/common.go @@ -24,6 +24,7 @@ import ( type scheduleService interface { getNewStructForDb(task *response.TaskInfo, resource string, participantId int64) (interface{}, error) pickOptimalStrategy() (strategies.Strategy, error) + assignTask(clusters []*strategies.AssignedCluster) error } // 求交集 diff --git a/pkg/scheduler/hpcScheduler.go b/pkg/scheduler/hpcScheduler.go index bf964bb7..af6416e6 100644 --- a/pkg/scheduler/hpcScheduler.go +++ b/pkg/scheduler/hpcScheduler.go @@ -49,3 +49,7 @@ func (h *HpcScheduler) pickOptimalStrategy() (strategies.Strategy, error) { func (h *HpcScheduler) genTaskAndProviders(task *response.TaskInfo) (*providerPricing.Task, []*providerPricing.Provider) { return nil, nil } + +func (h *HpcScheduler) assignTask(clusters []*strategies.AssignedCluster) error { + return nil +} diff --git a/pkg/scheduler/scheduler.go b/pkg/scheduler/scheduler.go index e05351c8..e04952a9 100644 --- a/pkg/scheduler/scheduler.go +++ b/pkg/scheduler/scheduler.go @@ -136,7 +136,7 @@ func (s *scheduler) AssignAndSchedule() error { return err } - _, err = strategy.Schedule() + clusters, err := strategy.Schedule() if err != nil { return err } @@ -147,6 +147,11 @@ func (s *scheduler) AssignAndSchedule() error { // return nil //} + err = s.scheduleService.assignTask(clusters) + if err != nil { + return err + } + return nil } diff --git a/pkg/scheduler/strategies/replicationStrategy.go b/pkg/scheduler/strategies/replication.go similarity index 100% rename from pkg/scheduler/strategies/replicationStrategy.go rename to pkg/scheduler/strategies/replication.go diff --git a/pkg/scheduler/strategies/priceBasedStrategy.go b/pkg/scheduler/strategies/resourcePricing.go similarity index 100% rename from pkg/scheduler/strategies/priceBasedStrategy.go rename to pkg/scheduler/strategies/resourcePricing.go diff --git a/pkg/scheduler/strategies/staticWeight.go b/pkg/scheduler/strategies/staticWeight.go new file mode 100644 index 00000000..8e08d219 --- /dev/null +++ b/pkg/scheduler/strategies/staticWeight.go @@ -0,0 +1,10 @@ +package strategies + +type StaticWeightStrategy struct { + // TODO: add fields +} + +func (ps *StaticWeightStrategy) Schedule() ([]*AssignedCluster, error) { + // TODO: implement the scheduling logic return nil, nil + return nil, nil +} From 6c8dae6a780261b279e91e0e8fecd182843a75db Mon Sep 17 00:00:00 2001 From: tzwang Date: Mon, 22 Jan 2024 23:15:01 +0800 Subject: [PATCH 03/18] modified ai scheduler struct Former-commit-id: 761d378c4d78dd20bc749e077c44a3b9ef772412 --- pkg/scheduler/collector/acCollector.go | 7 +++++-- pkg/scheduler/collector/collector.go | 3 ++- pkg/scheduler/scheduler.go | 15 +++++++++------ 3 files changed, 16 insertions(+), 9 deletions(-) diff --git a/pkg/scheduler/collector/acCollector.go b/pkg/scheduler/collector/acCollector.go index fdeee694..8107a4c4 100644 --- a/pkg/scheduler/collector/acCollector.go +++ b/pkg/scheduler/collector/acCollector.go @@ -1,13 +1,16 @@ package collector +import "gitlink.org.cn/jcce-pcm/pcm-ac/hpcacclient" + //单条作业费=作业运行秒数×(CPU核心数*CPU单价+GPU卡数×GPU单价+DCU卡数×DCU单价)/3600 //CPU单价=队列CPU费率×计算中心CPU单价 //GPU单价=队列GPU费率×计算中心GPU单价 //DCU单价=队列DCU费率×计算中心DCU单价 type ShuguangAiCollector struct { + ACRpc hpcacclient.HpcAC } -func (a *ShuguangAiCollector) getResourceSpecs() { - +func (a *ShuguangAiCollector) getResourceSpecs() (*ResourceSpecs, error) { + return nil, nil } diff --git a/pkg/scheduler/collector/collector.go b/pkg/scheduler/collector/collector.go index 73bcffd0..06f31d24 100644 --- a/pkg/scheduler/collector/collector.go +++ b/pkg/scheduler/collector/collector.go @@ -1,7 +1,7 @@ package collector type ResourceCollector interface { - getResourceSpecs() ([]ResourceSpecs, error) + getResourceSpecs() (*ResourceSpecs, error) } type ResourceSpecs struct { @@ -10,6 +10,7 @@ type ResourceSpecs struct { DiskAvail float64 GpuAvail float64 CardAvail []Card + Balance float64 } type Card struct { diff --git a/pkg/scheduler/scheduler.go b/pkg/scheduler/scheduler.go index e04952a9..b981f37a 100644 --- a/pkg/scheduler/scheduler.go +++ b/pkg/scheduler/scheduler.go @@ -19,6 +19,7 @@ import ( "github.com/pkg/errors" "github.com/zeromicro/go-zero/core/logx" "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/pkg/response" + "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/scheduler/collector" "gitlink.org.cn/jcce-pcm/pcm-coordinator/rpc/client/participantservice" "gorm.io/gorm" "sigs.k8s.io/yaml" @@ -26,12 +27,14 @@ import ( ) type scheduler struct { - task *response.TaskInfo - participantIds []int64 - scheduleService scheduleService - dbEngin *gorm.DB - result []string //pID:子任务yamlstring 键值对 - participantRpc participantservice.ParticipantService + task *response.TaskInfo + participantIds []int64 + scheduleService scheduleService + dbEngin *gorm.DB + result []string //pID:子任务yamlstring 键值对 + participantRpc participantservice.ParticipantService + resourceCollectors []collector.ResourceCollector + //storelink } func NewScheduler(scheduleService scheduleService, val string, dbEngin *gorm.DB, participantRpc participantservice.ParticipantService) (*scheduler, error) { From 0c87a541c51d2b4468643cb45a4d465481ab7c89 Mon Sep 17 00:00:00 2001 From: tzwang Date: Tue, 23 Jan 2024 11:42:10 +0800 Subject: [PATCH 04/18] scheduler refactor updated Former-commit-id: 95e2b32695d5f67ecdee979ccc2e483fc8847f40 --- api/internal/mqs/ScheduleAi.go | 31 ++++++------- pkg/scheduler/aiScheduler.go | 15 ++++--- pkg/scheduler/collector/collector.go | 13 +++--- pkg/scheduler/scheduler.go | 66 +++++++++++++++------------- 4 files changed, 65 insertions(+), 60 deletions(-) diff --git a/api/internal/mqs/ScheduleAi.go b/api/internal/mqs/ScheduleAi.go index ec5c942e..7498a513 100644 --- a/api/internal/mqs/ScheduleAi.go +++ b/api/internal/mqs/ScheduleAi.go @@ -17,7 +17,8 @@ package mqs import ( "context" "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/svc" - scheduler2 "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/scheduler" + "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/scheduler" + "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/scheduler/collector" ) /* @@ -25,36 +26,32 @@ import ( Listening to the payment flow status change notification message queue */ type AiQueue struct { - ctx context.Context - svcCtx *svc.ServiceContext + ctx context.Context + svcCtx *svc.ServiceContext + scheduler *scheduler.Scheduler } func NewAiMq(ctx context.Context, svcCtx *svc.ServiceContext) *AiQueue { + acCollector := &collector.ShuguangAiCollector{ACRpc: svcCtx.ACRpc} + resourceCollectiors := []collector.ResourceCollector{acCollector} return &AiQueue{ - ctx: ctx, - svcCtx: svcCtx, + ctx: ctx, + svcCtx: svcCtx, + scheduler: scheduler.NewScheduler2(resourceCollectiors, nil), } } func (l *AiQueue) Consume(val string) error { // 接受消息, 根据标签筛选过滤 - aiSchdl := scheduler2.NewAiScheduler(val) - schdl, err := scheduler2.NewScheduler(aiSchdl, val, l.svcCtx.DbEngin, nil) - if err != nil { - return err - } - schdl.MatchLabels() + aiSchdl := scheduler.NewAiScheduler(val, nil) + + //schdl.MatchLabels() // 调度算法 - err = schdl.AssignAndSchedule() + err := l.scheduler.AssignAndSchedule(aiSchdl) if err != nil { return err } - // 存储数据 - err = schdl.SaveToDb() - if err != nil { - return err - } return nil } diff --git a/pkg/scheduler/aiScheduler.go b/pkg/scheduler/aiScheduler.go index 52708f30..8f63c39c 100644 --- a/pkg/scheduler/aiScheduler.go +++ b/pkg/scheduler/aiScheduler.go @@ -24,12 +24,14 @@ import ( ) type AiScheduler struct { - yamlString string - collector collector.ResourceCollector + yamlString string + resourceCollectors []collector.ResourceCollector + task *response.TaskInfo + //storelink } -func NewAiScheduler(val string) *AiScheduler { - return &AiScheduler{yamlString: val} +func NewAiScheduler(val string, resourceCollectors []collector.ResourceCollector) *AiScheduler { + return &AiScheduler{yamlString: val, resourceCollectors: resourceCollectors} } func (as *AiScheduler) getNewStructForDb(task *response.TaskInfo, resource string, participantId int64) (interface{}, error) { @@ -44,9 +46,8 @@ func (as *AiScheduler) getNewStructForDb(task *response.TaskInfo, resource strin } func (as *AiScheduler) pickOptimalStrategy() (strategies.Strategy, error) { - //a, b := as.genTaskAndProviders() - - return nil, nil + strategy := strategies.NewReplicationStrategy(nil, 0) + return strategy, nil } func (as *AiScheduler) genTaskAndProviders() (*providerPricing.Task, []*providerPricing.Provider) { diff --git a/pkg/scheduler/collector/collector.go b/pkg/scheduler/collector/collector.go index 06f31d24..5ce28493 100644 --- a/pkg/scheduler/collector/collector.go +++ b/pkg/scheduler/collector/collector.go @@ -5,12 +5,13 @@ type ResourceCollector interface { } type ResourceSpecs struct { - CpuAvail float64 - MemAvail float64 - DiskAvail float64 - GpuAvail float64 - CardAvail []Card - Balance float64 + ParticipantId int64 + CpuAvail float64 + MemAvail float64 + DiskAvail float64 + GpuAvail float64 + CardAvail []Card + Balance float64 } type Card struct { diff --git a/pkg/scheduler/scheduler.go b/pkg/scheduler/scheduler.go index b981f37a..3aae57e9 100644 --- a/pkg/scheduler/scheduler.go +++ b/pkg/scheduler/scheduler.go @@ -20,13 +20,14 @@ import ( "github.com/zeromicro/go-zero/core/logx" "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/pkg/response" "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/scheduler/collector" + "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/scheduler/database" "gitlink.org.cn/jcce-pcm/pcm-coordinator/rpc/client/participantservice" "gorm.io/gorm" "sigs.k8s.io/yaml" "strings" ) -type scheduler struct { +type Scheduler struct { task *response.TaskInfo participantIds []int64 scheduleService scheduleService @@ -34,19 +35,24 @@ type scheduler struct { result []string //pID:子任务yamlstring 键值对 participantRpc participantservice.ParticipantService resourceCollectors []collector.ResourceCollector + storages []database.Storage //storelink } -func NewScheduler(scheduleService scheduleService, val string, dbEngin *gorm.DB, participantRpc participantservice.ParticipantService) (*scheduler, error) { +func NewScheduler(scheduleService scheduleService, val string, dbEngin *gorm.DB, participantRpc participantservice.ParticipantService) (*Scheduler, error) { var task *response.TaskInfo err := json.Unmarshal([]byte(val), &task) if err != nil { return nil, errors.New("create scheduler failed : " + err.Error()) } - return &scheduler{task: task, scheduleService: scheduleService, dbEngin: dbEngin, participantRpc: participantRpc}, nil + return &Scheduler{task: task, scheduleService: scheduleService, dbEngin: dbEngin, participantRpc: participantRpc}, nil } -func (s *scheduler) SpecifyClusters() { +func NewScheduler2(resourceCollectors []collector.ResourceCollector, storages []database.Storage) *Scheduler { + return &Scheduler{resourceCollectors: resourceCollectors, storages: storages} +} + +func (s *Scheduler) SpecifyClusters() { // 如果已指定集群名,通过数据库查询后返回p端ip列表 if len(s.task.Clusters) != 0 { s.dbEngin.Raw("select id from sc_participant_phy_info where `name` in (?)", s.task.Clusters).Scan(&s.participantIds) @@ -54,7 +60,7 @@ func (s *scheduler) SpecifyClusters() { } } -func (s *scheduler) SpecifyNsID() { +func (s *Scheduler) SpecifyNsID() { // 未指定集群名,只指定nsID if len(s.task.Clusters) == 0 { if len(s.task.NsID) != 0 { @@ -70,7 +76,7 @@ func (s *scheduler) SpecifyNsID() { } } -func (s *scheduler) MatchLabels() { +func (s *Scheduler) MatchLabels() { var ids []int64 count := 0 @@ -93,7 +99,7 @@ func (s *scheduler) MatchLabels() { } // TempAssign todo 屏蔽原调度算法 -func (s *scheduler) TempAssign() error { +func (s *Scheduler) TempAssign() error { //需要判断task中的资源类型,针对metadata中的多个kind做不同处理 //输入副本数和集群列表,最终结果输出为pID对应副本数量列表,针对多个kind需要做拆分和重新拼接组合 @@ -113,28 +119,28 @@ func (s *scheduler) TempAssign() error { return nil } -func (s *scheduler) AssignAndSchedule() error { - // 已指定 ParticipantId - if s.task.ParticipantId != 0 { - return nil - } - // 标签匹配以及后,未找到ParticipantIds - if len(s.participantIds) == 0 { - return errors.New("未找到匹配的ParticipantIds") - } +func (s *Scheduler) AssignAndSchedule(ss scheduleService) error { + //// 已指定 ParticipantId + //if s.task.ParticipantId != 0 { + // return nil + //} + //// 标签匹配以及后,未找到ParticipantIds + //if len(s.participantIds) == 0 { + // return errors.New("未找到匹配的ParticipantIds") + //} + // + //// 指定或者标签匹配的结果只有一个集群,给任务信息指定 + //if len(s.participantIds) == 1 { + // s.task.ParticipantId = s.participantIds[0] + // //replicas := s.task.Metadata.(map[string]interface{})["spec"].(map[string]interface{})["replicas"].(float64) + // //result := make(map[int64]string) + // //result[s.participantIds[0]] = strconv.FormatFloat(replicas, 'f', 2, 64) + // //s.result = result + // + // return nil + //} - // 指定或者标签匹配的结果只有一个集群,给任务信息指定 - if len(s.participantIds) == 1 { - s.task.ParticipantId = s.participantIds[0] - //replicas := s.task.Metadata.(map[string]interface{})["spec"].(map[string]interface{})["replicas"].(float64) - //result := make(map[int64]string) - //result[s.participantIds[0]] = strconv.FormatFloat(replicas, 'f', 2, 64) - //s.result = result - - return nil - } - - strategy, err := s.scheduleService.pickOptimalStrategy() + strategy, err := ss.pickOptimalStrategy() if err != nil { return err } @@ -150,7 +156,7 @@ func (s *scheduler) AssignAndSchedule() error { // return nil //} - err = s.scheduleService.assignTask(clusters) + err = ss.assignTask(clusters) if err != nil { return err } @@ -158,7 +164,7 @@ func (s *scheduler) AssignAndSchedule() error { return nil } -func (s *scheduler) SaveToDb() error { +func (s *Scheduler) SaveToDb() error { for _, participantId := range s.participantIds { From 73c2c4346897af56912645f073bd27c111772753 Mon Sep 17 00:00:00 2001 From: tzwang Date: Tue, 23 Jan 2024 17:51:11 +0800 Subject: [PATCH 05/18] scheduler refactor updated Former-commit-id: 8d2fffb6f6b50350549876e18a36527231d12dc2 --- api/internal/mqs/ScheduleAi.go | 11 ++-- api/internal/mqs/ScheduleHpc.go | 20 ------- pkg/scheduler/aiScheduler.go | 58 ++++++++++++++++--- .../{ => proxy}/collector/acCollector.go | 3 +- .../{ => proxy}/collector/collector.go | 3 +- pkg/scheduler/proxy/executor/acExecutor.go | 32 ++++++++++ pkg/scheduler/proxy/executor/executor.go | 17 ++++++ pkg/scheduler/scheduler.go | 9 +-- 8 files changed, 114 insertions(+), 39 deletions(-) rename pkg/scheduler/{ => proxy}/collector/acCollector.go (84%) rename pkg/scheduler/{ => proxy}/collector/collector.go (83%) create mode 100644 pkg/scheduler/proxy/executor/acExecutor.go create mode 100644 pkg/scheduler/proxy/executor/executor.go diff --git a/api/internal/mqs/ScheduleAi.go b/api/internal/mqs/ScheduleAi.go index 7498a513..45ef84df 100644 --- a/api/internal/mqs/ScheduleAi.go +++ b/api/internal/mqs/ScheduleAi.go @@ -18,7 +18,8 @@ import ( "context" "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/svc" "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/scheduler" - "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/scheduler/collector" + "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/scheduler/proxy/collector" + "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/scheduler/proxy/executor" ) /* @@ -34,18 +35,18 @@ type AiQueue struct { func NewAiMq(ctx context.Context, svcCtx *svc.ServiceContext) *AiQueue { acCollector := &collector.ShuguangAiCollector{ACRpc: svcCtx.ACRpc} resourceCollectiors := []collector.ResourceCollector{acCollector} + executorMap := make(map[string]executor.Executor) + executorMap["ai"] = &executor.ShuguangAiExecutor{ACRpc: svcCtx.ACRpc} return &AiQueue{ ctx: ctx, svcCtx: svcCtx, - scheduler: scheduler.NewScheduler2(resourceCollectiors, nil), + scheduler: scheduler.NewScheduler2(resourceCollectiors, nil, executorMap), } } func (l *AiQueue) Consume(val string) error { // 接受消息, 根据标签筛选过滤 - aiSchdl := scheduler.NewAiScheduler(val, nil) - - //schdl.MatchLabels() + aiSchdl, _ := scheduler.NewAiScheduler(val, l.scheduler) // 调度算法 err := l.scheduler.AssignAndSchedule(aiSchdl) diff --git a/api/internal/mqs/ScheduleHpc.go b/api/internal/mqs/ScheduleHpc.go index 1e188e92..f0b56aee 100644 --- a/api/internal/mqs/ScheduleHpc.go +++ b/api/internal/mqs/ScheduleHpc.go @@ -17,7 +17,6 @@ package mqs import ( "context" "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/svc" - scheduler2 "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/scheduler" ) /* @@ -37,24 +36,5 @@ func NewHpcMq(ctx context.Context, svcCtx *svc.ServiceContext) *HpcMq { } func (l *HpcMq) Consume(val string) error { - // 接受消息, 根据标签筛选过滤 - hpcSchdl := scheduler2.NewHpcScheduler(val) - schdl, err := scheduler2.NewScheduler(hpcSchdl, val, l.svcCtx.DbEngin, nil) - if err != nil { - return err - } - schdl.MatchLabels() - - // 调度算法 - err = schdl.AssignAndSchedule() - if err != nil { - return err - } - - // 存储数据 - err = schdl.SaveToDb() - if err != nil { - return err - } return nil } diff --git a/pkg/scheduler/aiScheduler.go b/pkg/scheduler/aiScheduler.go index 8f63c39c..d5771faf 100644 --- a/pkg/scheduler/aiScheduler.go +++ b/pkg/scheduler/aiScheduler.go @@ -15,23 +15,24 @@ package scheduler import ( + "errors" "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/pkg/response" "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/models" "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/scheduler/algorithm/providerPricing" - "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/scheduler/collector" + "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/scheduler/entity" + "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/scheduler/proxy/collector" "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/scheduler/strategies" "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/utils" ) type AiScheduler struct { - yamlString string - resourceCollectors []collector.ResourceCollector - task *response.TaskInfo - //storelink + yamlString string + task *response.TaskInfo + *Scheduler } -func NewAiScheduler(val string, resourceCollectors []collector.ResourceCollector) *AiScheduler { - return &AiScheduler{yamlString: val, resourceCollectors: resourceCollectors} +func NewAiScheduler(val string, scheduler *Scheduler) (*AiScheduler, error) { + return &AiScheduler{yamlString: val, Scheduler: scheduler}, nil } func (as *AiScheduler) getNewStructForDb(task *response.TaskInfo, resource string, participantId int64) (interface{}, error) { @@ -46,14 +47,55 @@ func (as *AiScheduler) getNewStructForDb(task *response.TaskInfo, resource strin } func (as *AiScheduler) pickOptimalStrategy() (strategies.Strategy, error) { - strategy := strategies.NewReplicationStrategy(nil, 0) + resources, err := as.findProvidersWithResource() + if err != nil { + return nil, err + } + + if len(resources) < 2 /*|| as.task */ { + var pros []entity.Participant + for _, resource := range resources { + pros = append(pros, entity.Participant{ + Participant_id: resource.ParticipantId, + Name: resource.Name, + }) + } + strategy := strategies.NewReplicationStrategy(nil, 0) + return strategy, nil + } + + task, providerList := as.genTaskAndProviders() + if err != nil { + return nil, nil + } + strategy := strategies.NewPricingStrategy(task, providerList...) return strategy, nil } func (as *AiScheduler) genTaskAndProviders() (*providerPricing.Task, []*providerPricing.Provider) { + return nil, nil } func (as *AiScheduler) assignTask(clusters []*strategies.AssignedCluster) error { + if clusters == nil { + return errors.New("clusters is nil") + } + return nil } + +func (as *AiScheduler) findProvidersWithResource() ([]*collector.ResourceSpecs, error) { + var resourceSpecs []*collector.ResourceSpecs + for _, resourceCollector := range as.resourceCollectors { + spec, err := resourceCollector.GetResourceSpecs() + if err != nil { + continue + } + resourceSpecs = append(resourceSpecs, spec) + } + if len(resourceSpecs) == 0 { + return nil, errors.New("no resource found") + } + return resourceSpecs, nil +} diff --git a/pkg/scheduler/collector/acCollector.go b/pkg/scheduler/proxy/collector/acCollector.go similarity index 84% rename from pkg/scheduler/collector/acCollector.go rename to pkg/scheduler/proxy/collector/acCollector.go index 8107a4c4..587639cc 100644 --- a/pkg/scheduler/collector/acCollector.go +++ b/pkg/scheduler/proxy/collector/acCollector.go @@ -8,9 +8,10 @@ import "gitlink.org.cn/jcce-pcm/pcm-ac/hpcacclient" //DCU单价=队列DCU费率×计算中心DCU单价 type ShuguangAiCollector struct { + Name string ACRpc hpcacclient.HpcAC } -func (a *ShuguangAiCollector) getResourceSpecs() (*ResourceSpecs, error) { +func (a *ShuguangAiCollector) GetResourceSpecs() (*ResourceSpecs, error) { return nil, nil } diff --git a/pkg/scheduler/collector/collector.go b/pkg/scheduler/proxy/collector/collector.go similarity index 83% rename from pkg/scheduler/collector/collector.go rename to pkg/scheduler/proxy/collector/collector.go index 5ce28493..0f1d5720 100644 --- a/pkg/scheduler/collector/collector.go +++ b/pkg/scheduler/proxy/collector/collector.go @@ -1,11 +1,12 @@ package collector type ResourceCollector interface { - getResourceSpecs() (*ResourceSpecs, error) + GetResourceSpecs() (*ResourceSpecs, error) } type ResourceSpecs struct { ParticipantId int64 + Name string CpuAvail float64 MemAvail float64 DiskAvail float64 diff --git a/pkg/scheduler/proxy/executor/acExecutor.go b/pkg/scheduler/proxy/executor/acExecutor.go new file mode 100644 index 00000000..cd7b6916 --- /dev/null +++ b/pkg/scheduler/proxy/executor/acExecutor.go @@ -0,0 +1,32 @@ +package executor + +import "gitlink.org.cn/jcce-pcm/pcm-ac/hpcacclient" + +type ShuguangAiExecutor struct { + Name string + ACRpc hpcacclient.HpcAC +} + +func (s ShuguangAiExecutor) QueryImageList() ([]Image, error) { + //TODO implement me + panic("implement me") +} + +func (s ShuguangAiExecutor) SubmitTask(imageId string, cmd string, envs []string, params []string, resourceId string) (Task, error) { + //TODO implement me + panic("implement me") +} + +func (s ShuguangAiExecutor) QueryTask(taskId string) (Task, error) { + //TODO implement me + panic("implement me") +} + +func (s ShuguangAiExecutor) QuerySpecs() (Spec, error) { + //TODO implement me + panic("implement me") +} + +func NewShuguangAiExecutor(name string, acRpc hpcacclient.HpcAC) *ShuguangAiExecutor { + return &ShuguangAiExecutor{Name: name, ACRpc: acRpc} +} diff --git a/pkg/scheduler/proxy/executor/executor.go b/pkg/scheduler/proxy/executor/executor.go new file mode 100644 index 00000000..61055927 --- /dev/null +++ b/pkg/scheduler/proxy/executor/executor.go @@ -0,0 +1,17 @@ +package executor + +type Executor interface { + QueryImageList() ([]Image, error) + SubmitTask(imageId string, cmd string, envs []string, params []string, resourceId string) (Task, error) + QueryTask(taskId string) (Task, error) + QuerySpecs() (Spec, error) +} + +type Image struct { +} + +type Task struct { +} + +type Spec struct { +} diff --git a/pkg/scheduler/scheduler.go b/pkg/scheduler/scheduler.go index 3aae57e9..3efebaba 100644 --- a/pkg/scheduler/scheduler.go +++ b/pkg/scheduler/scheduler.go @@ -19,8 +19,9 @@ import ( "github.com/pkg/errors" "github.com/zeromicro/go-zero/core/logx" "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/pkg/response" - "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/scheduler/collector" "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/scheduler/database" + "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/scheduler/proxy/collector" + "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/scheduler/proxy/executor" "gitlink.org.cn/jcce-pcm/pcm-coordinator/rpc/client/participantservice" "gorm.io/gorm" "sigs.k8s.io/yaml" @@ -36,7 +37,7 @@ type Scheduler struct { participantRpc participantservice.ParticipantService resourceCollectors []collector.ResourceCollector storages []database.Storage - //storelink + aiExecutor map[string]executor.Executor } func NewScheduler(scheduleService scheduleService, val string, dbEngin *gorm.DB, participantRpc participantservice.ParticipantService) (*Scheduler, error) { @@ -48,8 +49,8 @@ func NewScheduler(scheduleService scheduleService, val string, dbEngin *gorm.DB, return &Scheduler{task: task, scheduleService: scheduleService, dbEngin: dbEngin, participantRpc: participantRpc}, nil } -func NewScheduler2(resourceCollectors []collector.ResourceCollector, storages []database.Storage) *Scheduler { - return &Scheduler{resourceCollectors: resourceCollectors, storages: storages} +func NewScheduler2(resourceCollectors []collector.ResourceCollector, storages []database.Storage, aiExecutor map[string]executor.Executor) *Scheduler { + return &Scheduler{resourceCollectors: resourceCollectors, storages: storages, aiExecutor: aiExecutor} } func (s *Scheduler) SpecifyClusters() { From 9039fe2daae5c675db551d5d772a489f54863967 Mon Sep 17 00:00:00 2001 From: jagger Date: Wed, 24 Jan 2024 09:56:49 +0800 Subject: [PATCH 06/18] fix: ci/cd bug Closes #10 Signed-off-by: jagger Former-commit-id: d47ff7976541a396699a31066e8d753386a63904 --- .devops/api.yml | 12 ++---------- .devops/rpc.yml | 12 ++---------- api/etc/pcm.yaml | 10 +++++----- api/pcm-coordinator-api.yaml | 4 ---- rpc/pcm-coordinator-rpc.yaml | 4 ---- 5 files changed, 9 insertions(+), 33 deletions(-) diff --git a/.devops/api.yml b/.devops/api.yml index d8bcc618..ecf4f08a 100644 --- a/.devops/api.yml +++ b/.devops/api.yml @@ -10,12 +10,6 @@ global: required: false type: STRING hidden: false - - ref: nacos_host - name: nacos_host - value: '"10.206.0.12"' - required: false - type: STRING - hidden: false - ref: secret_name name: "" value: '"jcce-aliyuncs"' @@ -45,8 +39,8 @@ workflow: name: git clone task: git_clone@1.2.6 input: - remote_url: '"https://gitlink.org.cn/jcce-pcm/pcm-coordinator.git"' - ref: '"refs/heads/JCOS"' + remote_url: '"https://gitlink.org.cn/JointCloud/pcm-coordinator.git"' + ref: '"refs/heads/master"' commit_id: '""' depth: 1 needs: @@ -93,14 +87,12 @@ workflow: IMAGE_NAME: '"registry.cn-hangzhou.aliyuncs.com/jcce/pcm-coordinator-api"' IMAGE_TAG: git_clone_0.commit_time SECRET_NAME: global.secret_name - NACOS_HOST: global.nacos_host PROJECT_NAME: global.project_name PROJECT_PATH: git_clone_0.git_path script: - cd ${PROJECT_PATH}/api - sed -i "s#image_name#${IMAGE_NAME}:${IMAGE_TAG}#" ${PROJECT_NAME}.yaml - sed -i "s#secret_name#${SECRET_NAME}#" ${PROJECT_NAME}.yaml - - sed -i "s#nacos_host#${NACOS_HOST}#" ${PROJECT_NAME}.yaml - cat ${PROJECT_NAME}.yaml needs: - git_clone_0 diff --git a/.devops/rpc.yml b/.devops/rpc.yml index 2881c8e2..f95e1bbc 100644 --- a/.devops/rpc.yml +++ b/.devops/rpc.yml @@ -10,12 +10,6 @@ global: required: false type: STRING hidden: false - - ref: nacos_host - name: nacos_host - value: '"10.206.0.12"' - required: false - type: STRING - hidden: false - ref: secret_name name: "" value: '"jcce-aliyuncs"' @@ -45,8 +39,8 @@ workflow: name: git clone task: git_clone@1.2.6 input: - remote_url: '"https://gitlink.org.cn/jcce-pcm/pcm-coordinator.git"' - ref: '"refs/heads/JCOS"' + remote_url: '"https://gitlink.org.cn/JointCloud/pcm-coordinator.git"' + ref: '"refs/heads/master"' commit_id: '""' depth: 1 needs: @@ -93,14 +87,12 @@ workflow: IMAGE_NAME: '"registry.cn-hangzhou.aliyuncs.com/jcce/pcm-coordinator-rpc"' IMAGE_TAG: git_clone_0.commit_time SECRET_NAME: global.secret_name - NACOS_HOST: global.nacos_host PROJECT_NAME: global.project_name PROJECT_PATH: git_clone_0.git_path script: - cd ${PROJECT_PATH}/rpc - sed -i "s#image_name#${IMAGE_NAME}:${IMAGE_TAG}#" ${PROJECT_NAME}.yaml - sed -i "s#secret_name#${SECRET_NAME}#" ${PROJECT_NAME}.yaml - - sed -i "s#nacos_host#${NACOS_HOST}#" ${PROJECT_NAME}.yaml - cat ${PROJECT_NAME}.yaml needs: - git_clone_0 diff --git a/api/etc/pcm.yaml b/api/etc/pcm.yaml index a8ff97f9..11d07339 100644 --- a/api/etc/pcm.yaml +++ b/api/etc/pcm.yaml @@ -5,7 +5,7 @@ Port: 8999 Timeout: 50000 DB: - DataSource: root:uJpLd6u-J?HC1@(119.45.100.73:3306)/pcm?parseTime=true&loc=Local + DataSource: root:uJpLd6u-J?HC1@(10.206.0.12:3306)/pcm?parseTime=true&loc=Local Redis: Host: 10.206.0.12:6379 @@ -19,7 +19,7 @@ Cache: K8sNativeConf: # target: nacos://10.206.0.12:8848/pcm.kubenative.rpc?timeout=30s&namespaceid=test&groupname=DEFAULT_GROUP&appName=pcm.core.api Endpoints: - - pcm-participant-kubernetes-service:2003 + - 127.0.0.1:2003 NonBlock: true #rpc @@ -53,9 +53,9 @@ ACRpcConf: #rpc CephRpcConf: - target: nacos://10.206.0.12:8848/pcm.ceph.rpc?timeout=30s&namespaceid=test&groupname=DEFAULT_GROUP&appName=pcm.core.api - # Endpoints: - # - 127.0.0.1:8888 + # target: nacos://10.206.0.12:8848/pcm.ceph.rpc?timeout=30s&namespaceid=test&groupname=DEFAULT_GROUP&appName=pcm.core.api + Endpoints: + - pcm-participant-ceph-service:2008 NonBlock: true Timeout: 50000 diff --git a/api/pcm-coordinator-api.yaml b/api/pcm-coordinator-api.yaml index 7c5d835d..7cfcedbc 100644 --- a/api/pcm-coordinator-api.yaml +++ b/api/pcm-coordinator-api.yaml @@ -16,10 +16,6 @@ spec: labels: k8s-app: pcm-coordinator-api spec: - hostAliases: - - hostnames: - - nacos.jcce.dev - ip: nacos_host imagePullSecrets: - name: secret_name containers: diff --git a/rpc/pcm-coordinator-rpc.yaml b/rpc/pcm-coordinator-rpc.yaml index bd853bb6..769467d3 100644 --- a/rpc/pcm-coordinator-rpc.yaml +++ b/rpc/pcm-coordinator-rpc.yaml @@ -16,10 +16,6 @@ spec: labels: k8s-app: pcm-coordinator-rpc spec: - hostAliases: - - hostnames: - - nacos.jcce.dev - ip: nacos_host imagePullSecrets: - name: secret_name containers: From b9e79dc671f13752fa561245e972f2e540da56e9 Mon Sep 17 00:00:00 2001 From: tzwang Date: Wed, 24 Jan 2024 11:16:59 +0800 Subject: [PATCH 07/18] scheduler refactor modified Former-commit-id: 1e70763e7f5008da3ad10efcebb0d5338d8665ca --- api/internal/mqs/ScheduleAi.go | 7 +++-- api/internal/mqs/ScheduleCloud.go | 3 +- pkg/scheduler/{ => common}/common.go | 16 +++++----- pkg/scheduler/scheduler.go | 29 ++++++++++--------- pkg/scheduler/{ => schedulers}/aiScheduler.go | 23 ++++++++------- .../{ => schedulers}/cloudScheduler.go | 12 ++++---- .../{ => schedulers}/hpcScheduler.go | 10 +++---- pkg/scheduler/schedulers/vmScheduler.go | 24 +++++++++++++++ .../collector/acCollector.go | 0 .../{proxy => service}/collector/collector.go | 0 .../{proxy => service}/executor/acExecutor.go | 0 .../{proxy => service}/executor/executor.go | 0 .../{strategies => strategy}/replication.go | 2 +- .../resourcePricing.go | 2 +- .../{strategies => strategy}/staticWeight.go | 2 +- .../{strategies => strategy}/strategy.go | 2 +- 16 files changed, 80 insertions(+), 52 deletions(-) rename pkg/scheduler/{ => common}/common.go (76%) rename pkg/scheduler/{ => schedulers}/aiScheduler.go (76%) rename pkg/scheduler/{ => schedulers}/cloudScheduler.go (90%) rename pkg/scheduler/{ => schedulers}/hpcScheduler.go (82%) create mode 100644 pkg/scheduler/schedulers/vmScheduler.go rename pkg/scheduler/{proxy => service}/collector/acCollector.go (100%) rename pkg/scheduler/{proxy => service}/collector/collector.go (100%) rename pkg/scheduler/{proxy => service}/executor/acExecutor.go (100%) rename pkg/scheduler/{proxy => service}/executor/executor.go (100%) rename pkg/scheduler/{strategies => strategy}/replication.go (97%) rename pkg/scheduler/{strategies => strategy}/resourcePricing.go (99%) rename pkg/scheduler/{strategies => strategy}/staticWeight.go (91%) rename pkg/scheduler/{strategies => strategy}/strategy.go (90%) diff --git a/api/internal/mqs/ScheduleAi.go b/api/internal/mqs/ScheduleAi.go index 45ef84df..311bee00 100644 --- a/api/internal/mqs/ScheduleAi.go +++ b/api/internal/mqs/ScheduleAi.go @@ -18,8 +18,9 @@ import ( "context" "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/svc" "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/scheduler" - "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/scheduler/proxy/collector" - "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/scheduler/proxy/executor" + "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/scheduler/schedulers" + "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/scheduler/service/collector" + "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/scheduler/service/executor" ) /* @@ -46,7 +47,7 @@ func NewAiMq(ctx context.Context, svcCtx *svc.ServiceContext) *AiQueue { func (l *AiQueue) Consume(val string) error { // 接受消息, 根据标签筛选过滤 - aiSchdl, _ := scheduler.NewAiScheduler(val, l.scheduler) + aiSchdl, _ := schedulers.NewAiScheduler(val, l.scheduler) // 调度算法 err := l.scheduler.AssignAndSchedule(aiSchdl) diff --git a/api/internal/mqs/ScheduleCloud.go b/api/internal/mqs/ScheduleCloud.go index c40a5e9b..cb7c9d7b 100644 --- a/api/internal/mqs/ScheduleCloud.go +++ b/api/internal/mqs/ScheduleCloud.go @@ -18,6 +18,7 @@ import ( "context" "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/svc" "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/scheduler" + "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/scheduler/schedulers" ) /* @@ -37,7 +38,7 @@ func NewCloudMq(ctx context.Context, svcCtx *svc.ServiceContext) *CloudMq { func (l *CloudMq) Consume(val string) error { // 接受消息, 根据标签筛选过滤 - cloudScheduler := scheduler.NewCloudScheduler() + cloudScheduler := schedulers.NewCloudScheduler() schdl, err := scheduler.NewScheduler(cloudScheduler, val, l.svcCtx.DbEngin, l.svcCtx.ParticipantRpc) if err != nil { return err diff --git a/pkg/scheduler/common.go b/pkg/scheduler/common/common.go similarity index 76% rename from pkg/scheduler/common.go rename to pkg/scheduler/common/common.go index 7b2cab8e..fb71cc8c 100644 --- a/pkg/scheduler/common.go +++ b/pkg/scheduler/common/common.go @@ -12,23 +12,23 @@ */ -package scheduler +package common import ( "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/pkg/response" - "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/scheduler/strategies" + "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/scheduler/strategy" "math/rand" "time" ) -type scheduleService interface { - getNewStructForDb(task *response.TaskInfo, resource string, participantId int64) (interface{}, error) - pickOptimalStrategy() (strategies.Strategy, error) - assignTask(clusters []*strategies.AssignedCluster) error +type SubSchedule interface { + GetNewStructForDb(task *response.TaskInfo, resource string, participantId int64) (interface{}, error) + PickOptimalStrategy() (strategy.Strategy, error) + AssignTask(clusters []*strategy.AssignedCluster) error } // 求交集 -func intersect(slice1, slice2 []int64) []int64 { +func Intersect(slice1, slice2 []int64) []int64 { m := make(map[int64]int) nn := make([]int64, 0) for _, v := range slice1 { @@ -44,7 +44,7 @@ func intersect(slice1, slice2 []int64) []int64 { return nn } -func micsSlice(origin []int64, count int) []int64 { +func MicsSlice(origin []int64, count int) []int64 { tmpOrigin := make([]int64, len(origin)) copy(tmpOrigin, origin) //一定要seed diff --git a/pkg/scheduler/scheduler.go b/pkg/scheduler/scheduler.go index 3efebaba..1407568e 100644 --- a/pkg/scheduler/scheduler.go +++ b/pkg/scheduler/scheduler.go @@ -19,9 +19,10 @@ import ( "github.com/pkg/errors" "github.com/zeromicro/go-zero/core/logx" "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/pkg/response" + "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/scheduler/common" "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/scheduler/database" - "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/scheduler/proxy/collector" - "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/scheduler/proxy/executor" + "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/scheduler/service/collector" + "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/scheduler/service/executor" "gitlink.org.cn/jcce-pcm/pcm-coordinator/rpc/client/participantservice" "gorm.io/gorm" "sigs.k8s.io/yaml" @@ -31,26 +32,26 @@ import ( type Scheduler struct { task *response.TaskInfo participantIds []int64 - scheduleService scheduleService + subSchedule common.SubSchedule dbEngin *gorm.DB result []string //pID:子任务yamlstring 键值对 participantRpc participantservice.ParticipantService - resourceCollectors []collector.ResourceCollector - storages []database.Storage - aiExecutor map[string]executor.Executor + ResourceCollectors []collector.ResourceCollector + Storages []database.Storage + AiExecutor map[string]executor.Executor } -func NewScheduler(scheduleService scheduleService, val string, dbEngin *gorm.DB, participantRpc participantservice.ParticipantService) (*Scheduler, error) { +func NewScheduler(subSchedule common.SubSchedule, val string, dbEngin *gorm.DB, participantRpc participantservice.ParticipantService) (*Scheduler, error) { var task *response.TaskInfo err := json.Unmarshal([]byte(val), &task) if err != nil { return nil, errors.New("create scheduler failed : " + err.Error()) } - return &Scheduler{task: task, scheduleService: scheduleService, dbEngin: dbEngin, participantRpc: participantRpc}, nil + return &Scheduler{task: task, subSchedule: subSchedule, dbEngin: dbEngin, participantRpc: participantRpc}, nil } func NewScheduler2(resourceCollectors []collector.ResourceCollector, storages []database.Storage, aiExecutor map[string]executor.Executor) *Scheduler { - return &Scheduler{resourceCollectors: resourceCollectors, storages: storages, aiExecutor: aiExecutor} + return &Scheduler{ResourceCollectors: resourceCollectors, Storages: storages, AiExecutor: aiExecutor} } func (s *Scheduler) SpecifyClusters() { @@ -90,7 +91,7 @@ func (s *Scheduler) MatchLabels() { if count == 0 { ids = participantIds } - ids = intersect(ids, participantIds) + ids = common.Intersect(ids, participantIds) count++ } s.participantIds = ids @@ -120,7 +121,7 @@ func (s *Scheduler) TempAssign() error { return nil } -func (s *Scheduler) AssignAndSchedule(ss scheduleService) error { +func (s *Scheduler) AssignAndSchedule(ss common.SubSchedule) error { //// 已指定 ParticipantId //if s.task.ParticipantId != 0 { // return nil @@ -141,7 +142,7 @@ func (s *Scheduler) AssignAndSchedule(ss scheduleService) error { // return nil //} - strategy, err := ss.pickOptimalStrategy() + strategy, err := ss.PickOptimalStrategy() if err != nil { return err } @@ -157,7 +158,7 @@ func (s *Scheduler) AssignAndSchedule(ss scheduleService) error { // return nil //} - err = ss.assignTask(clusters) + err = ss.AssignTask(clusters) if err != nil { return err } @@ -170,7 +171,7 @@ func (s *Scheduler) SaveToDb() error { for _, participantId := range s.participantIds { for _, resource := range s.task.Metadata { - structForDb, err := s.scheduleService.getNewStructForDb(s.task, resource, participantId) + structForDb, err := s.subSchedule.GetNewStructForDb(s.task, resource, participantId) if err != nil { return err } diff --git a/pkg/scheduler/aiScheduler.go b/pkg/scheduler/schedulers/aiScheduler.go similarity index 76% rename from pkg/scheduler/aiScheduler.go rename to pkg/scheduler/schedulers/aiScheduler.go index d5771faf..feef4e93 100644 --- a/pkg/scheduler/aiScheduler.go +++ b/pkg/scheduler/schedulers/aiScheduler.go @@ -12,30 +12,31 @@ */ -package scheduler +package schedulers import ( "errors" "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/pkg/response" "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/models" + "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/scheduler" "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/scheduler/algorithm/providerPricing" "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/scheduler/entity" - "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/scheduler/proxy/collector" - "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/scheduler/strategies" + "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/scheduler/service/collector" + "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/scheduler/strategy" "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/utils" ) type AiScheduler struct { yamlString string task *response.TaskInfo - *Scheduler + *scheduler.Scheduler } -func NewAiScheduler(val string, scheduler *Scheduler) (*AiScheduler, error) { +func NewAiScheduler(val string, scheduler *scheduler.Scheduler) (*AiScheduler, error) { return &AiScheduler{yamlString: val, Scheduler: scheduler}, nil } -func (as *AiScheduler) getNewStructForDb(task *response.TaskInfo, resource string, participantId int64) (interface{}, error) { +func (as *AiScheduler) GetNewStructForDb(task *response.TaskInfo, resource string, participantId int64) (interface{}, error) { ai := models.Ai{ ParticipantId: participantId, TaskId: task.TaskId, @@ -46,7 +47,7 @@ func (as *AiScheduler) getNewStructForDb(task *response.TaskInfo, resource strin return ai, nil } -func (as *AiScheduler) pickOptimalStrategy() (strategies.Strategy, error) { +func (as *AiScheduler) PickOptimalStrategy() (strategy.Strategy, error) { resources, err := as.findProvidersWithResource() if err != nil { return nil, err @@ -60,7 +61,7 @@ func (as *AiScheduler) pickOptimalStrategy() (strategies.Strategy, error) { Name: resource.Name, }) } - strategy := strategies.NewReplicationStrategy(nil, 0) + strategy := strategy.NewReplicationStrategy(nil, 0) return strategy, nil } @@ -68,7 +69,7 @@ func (as *AiScheduler) pickOptimalStrategy() (strategies.Strategy, error) { if err != nil { return nil, nil } - strategy := strategies.NewPricingStrategy(task, providerList...) + strategy := strategy.NewPricingStrategy(task, providerList...) return strategy, nil } @@ -77,7 +78,7 @@ func (as *AiScheduler) genTaskAndProviders() (*providerPricing.Task, []*provider return nil, nil } -func (as *AiScheduler) assignTask(clusters []*strategies.AssignedCluster) error { +func (as *AiScheduler) AssignTask(clusters []*strategy.AssignedCluster) error { if clusters == nil { return errors.New("clusters is nil") } @@ -87,7 +88,7 @@ func (as *AiScheduler) assignTask(clusters []*strategies.AssignedCluster) error func (as *AiScheduler) findProvidersWithResource() ([]*collector.ResourceSpecs, error) { var resourceSpecs []*collector.ResourceSpecs - for _, resourceCollector := range as.resourceCollectors { + for _, resourceCollector := range as.ResourceCollectors { spec, err := resourceCollector.GetResourceSpecs() if err != nil { continue diff --git a/pkg/scheduler/cloudScheduler.go b/pkg/scheduler/schedulers/cloudScheduler.go similarity index 90% rename from pkg/scheduler/cloudScheduler.go rename to pkg/scheduler/schedulers/cloudScheduler.go index 95121182..89d46795 100644 --- a/pkg/scheduler/cloudScheduler.go +++ b/pkg/scheduler/schedulers/cloudScheduler.go @@ -12,7 +12,7 @@ */ -package scheduler +package schedulers import ( "bytes" @@ -20,7 +20,7 @@ import ( "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/models" "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/scheduler/algorithm/providerPricing" "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/scheduler/database" - "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/scheduler/strategies" + "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/scheduler/strategy" "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/utils" "io" "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" @@ -37,17 +37,17 @@ func NewCloudScheduler() *CloudScheduler { return &CloudScheduler{} } -func (cs *CloudScheduler) pickOptimalStrategy() (strategies.Strategy, error) { +func (cs *CloudScheduler) PickOptimalStrategy() (strategy.Strategy, error) { task, providerList, err := cs.genTaskAndProviders() if err != nil { return nil, nil } //调度算法 - strategy := strategies.NewPricingStrategy(task, providerList...) + strategy := strategy.NewPricingStrategy(task, providerList...) return strategy, nil } -func (cs *CloudScheduler) getNewStructForDb(task *response.TaskInfo, resource string, participantId int64) (interface{}, error) { +func (cs *CloudScheduler) GetNewStructForDb(task *response.TaskInfo, resource string, participantId int64) (interface{}, error) { cloud := cs.UnMarshalK8sStruct(resource, task.TaskId, task.NsID) cloud.Id = utils.GenSnowflakeID() cloud.NsID = task.NsID @@ -117,6 +117,6 @@ func (cs *CloudScheduler) genTaskAndProviders() (*providerPricing.Task, []*provi return nil, providerList, nil } -func (cs *CloudScheduler) assignTask(clusters []*strategies.AssignedCluster) error { +func (cs *CloudScheduler) AssignTask(clusters []*strategy.AssignedCluster) error { return nil } diff --git a/pkg/scheduler/hpcScheduler.go b/pkg/scheduler/schedulers/hpcScheduler.go similarity index 82% rename from pkg/scheduler/hpcScheduler.go rename to pkg/scheduler/schedulers/hpcScheduler.go index af6416e6..92d49d84 100644 --- a/pkg/scheduler/hpcScheduler.go +++ b/pkg/scheduler/schedulers/hpcScheduler.go @@ -12,14 +12,14 @@ */ -package scheduler +package schedulers import ( "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/pkg/response" "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/constants" "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/models" "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/scheduler/algorithm/providerPricing" - "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/scheduler/strategies" + "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/scheduler/strategy" "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/utils" ) @@ -31,7 +31,7 @@ func NewHpcScheduler(val string) *HpcScheduler { return &HpcScheduler{yamlString: val} } -func (h *HpcScheduler) getNewStructForDb(task *response.TaskInfo, resource string, participantId int64) (interface{}, error) { +func (h *HpcScheduler) GetNewStructForDb(task *response.TaskInfo, resource string, participantId int64) (interface{}, error) { hpc := models.Hpc{} utils.Convert(task.Metadata, &hpc) hpc.Id = utils.GenSnowflakeID() @@ -42,7 +42,7 @@ func (h *HpcScheduler) getNewStructForDb(task *response.TaskInfo, resource strin return hpc, nil } -func (h *HpcScheduler) pickOptimalStrategy() (strategies.Strategy, error) { +func (h *HpcScheduler) PickOptimalStrategy() (strategy.Strategy, error) { return nil, nil } @@ -50,6 +50,6 @@ func (h *HpcScheduler) genTaskAndProviders(task *response.TaskInfo) (*providerPr return nil, nil } -func (h *HpcScheduler) assignTask(clusters []*strategies.AssignedCluster) error { +func (h *HpcScheduler) AssignTask(clusters []*strategy.AssignedCluster) error { return nil } diff --git a/pkg/scheduler/schedulers/vmScheduler.go b/pkg/scheduler/schedulers/vmScheduler.go new file mode 100644 index 00000000..ad4b7de0 --- /dev/null +++ b/pkg/scheduler/schedulers/vmScheduler.go @@ -0,0 +1,24 @@ +package schedulers + +import ( + "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/pkg/response" + "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/scheduler/strategy" +) + +type VmScheduler struct { +} + +func (v VmScheduler) GetNewStructForDb(task *response.TaskInfo, resource string, participantId int64) (interface{}, error) { + //TODO implement me + panic("implement me") +} + +func (v VmScheduler) PickOptimalStrategy() (strategy.Strategy, error) { + //TODO implement me + panic("implement me") +} + +func (v VmScheduler) AssignTask(clusters []*strategy.AssignedCluster) error { + //TODO implement me + panic("implement me") +} diff --git a/pkg/scheduler/proxy/collector/acCollector.go b/pkg/scheduler/service/collector/acCollector.go similarity index 100% rename from pkg/scheduler/proxy/collector/acCollector.go rename to pkg/scheduler/service/collector/acCollector.go diff --git a/pkg/scheduler/proxy/collector/collector.go b/pkg/scheduler/service/collector/collector.go similarity index 100% rename from pkg/scheduler/proxy/collector/collector.go rename to pkg/scheduler/service/collector/collector.go diff --git a/pkg/scheduler/proxy/executor/acExecutor.go b/pkg/scheduler/service/executor/acExecutor.go similarity index 100% rename from pkg/scheduler/proxy/executor/acExecutor.go rename to pkg/scheduler/service/executor/acExecutor.go diff --git a/pkg/scheduler/proxy/executor/executor.go b/pkg/scheduler/service/executor/executor.go similarity index 100% rename from pkg/scheduler/proxy/executor/executor.go rename to pkg/scheduler/service/executor/executor.go diff --git a/pkg/scheduler/strategies/replication.go b/pkg/scheduler/strategy/replication.go similarity index 97% rename from pkg/scheduler/strategies/replication.go rename to pkg/scheduler/strategy/replication.go index 2a699c5a..08d7e29f 100644 --- a/pkg/scheduler/strategies/replication.go +++ b/pkg/scheduler/strategy/replication.go @@ -1,4 +1,4 @@ -package strategies +package strategy import ( "github.com/pkg/errors" diff --git a/pkg/scheduler/strategies/resourcePricing.go b/pkg/scheduler/strategy/resourcePricing.go similarity index 99% rename from pkg/scheduler/strategies/resourcePricing.go rename to pkg/scheduler/strategy/resourcePricing.go index 5c620fa3..f909f62f 100644 --- a/pkg/scheduler/strategies/resourcePricing.go +++ b/pkg/scheduler/strategy/resourcePricing.go @@ -12,7 +12,7 @@ */ -package strategies +package strategy import ( "errors" diff --git a/pkg/scheduler/strategies/staticWeight.go b/pkg/scheduler/strategy/staticWeight.go similarity index 91% rename from pkg/scheduler/strategies/staticWeight.go rename to pkg/scheduler/strategy/staticWeight.go index 8e08d219..3aa5d769 100644 --- a/pkg/scheduler/strategies/staticWeight.go +++ b/pkg/scheduler/strategy/staticWeight.go @@ -1,4 +1,4 @@ -package strategies +package strategy type StaticWeightStrategy struct { // TODO: add fields diff --git a/pkg/scheduler/strategies/strategy.go b/pkg/scheduler/strategy/strategy.go similarity index 90% rename from pkg/scheduler/strategies/strategy.go rename to pkg/scheduler/strategy/strategy.go index e265acdd..1502dc21 100644 --- a/pkg/scheduler/strategies/strategy.go +++ b/pkg/scheduler/strategy/strategy.go @@ -1,4 +1,4 @@ -package strategies +package strategy type Strategy interface { Schedule() ([]*AssignedCluster, error) From 8882790591817ee675dca816f1eac53d70150de0 Mon Sep 17 00:00:00 2001 From: tzwang Date: Wed, 24 Jan 2024 17:59:33 +0800 Subject: [PATCH 08/18] scheduler refactor modified Former-commit-id: dbf14eb6b8c151587c8f48319a3d3a7cedbedb06 --- api/internal/mqs/ScheduleAi.go | 10 ++-- pkg/scheduler/scheduler.go | 22 ++++----- pkg/scheduler/schedulers/aiScheduler.go | 2 +- pkg/scheduler/service/aiService.go | 49 +++++++++++++++++++ .../service/collector/acCollector.go | 17 ------- pkg/scheduler/service/executor/acExecutor.go | 32 ------------ pkg/scheduler/service/impl/modelarts.go | 44 +++++++++++++++++ pkg/scheduler/service/impl/octopus.go | 42 ++++++++++++++++ pkg/scheduler/service/impl/shuguangAi.go | 45 +++++++++++++++++ pkg/scheduler/strategy/dynamicResources.go | 4 ++ 10 files changed, 199 insertions(+), 68 deletions(-) create mode 100644 pkg/scheduler/service/aiService.go delete mode 100644 pkg/scheduler/service/collector/acCollector.go delete mode 100644 pkg/scheduler/service/executor/acExecutor.go create mode 100644 pkg/scheduler/service/impl/modelarts.go create mode 100644 pkg/scheduler/service/impl/octopus.go create mode 100644 pkg/scheduler/service/impl/shuguangAi.go create mode 100644 pkg/scheduler/strategy/dynamicResources.go diff --git a/api/internal/mqs/ScheduleAi.go b/api/internal/mqs/ScheduleAi.go index 311bee00..6cc7dc2f 100644 --- a/api/internal/mqs/ScheduleAi.go +++ b/api/internal/mqs/ScheduleAi.go @@ -19,8 +19,7 @@ import ( "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/svc" "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/scheduler" "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/scheduler/schedulers" - "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/scheduler/service/collector" - "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/scheduler/service/executor" + "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/scheduler/service" ) /* @@ -34,14 +33,11 @@ type AiQueue struct { } func NewAiMq(ctx context.Context, svcCtx *svc.ServiceContext) *AiQueue { - acCollector := &collector.ShuguangAiCollector{ACRpc: svcCtx.ACRpc} - resourceCollectiors := []collector.ResourceCollector{acCollector} - executorMap := make(map[string]executor.Executor) - executorMap["ai"] = &executor.ShuguangAiExecutor{ACRpc: svcCtx.ACRpc} + aiExecutorMap, aiCollectorMap := service.InitAiClusterMap(svcCtx.ACRpc, svcCtx.ModelArtsRpc, svcCtx.ModelArtsImgRpc, svcCtx.OctopusRpc) return &AiQueue{ ctx: ctx, svcCtx: svcCtx, - scheduler: scheduler.NewScheduler2(resourceCollectiors, nil, executorMap), + scheduler: scheduler.NewScheduler2(aiCollectorMap, nil, aiExecutorMap), } } diff --git a/pkg/scheduler/scheduler.go b/pkg/scheduler/scheduler.go index 1407568e..49d3a150 100644 --- a/pkg/scheduler/scheduler.go +++ b/pkg/scheduler/scheduler.go @@ -30,15 +30,15 @@ import ( ) type Scheduler struct { - task *response.TaskInfo - participantIds []int64 - subSchedule common.SubSchedule - dbEngin *gorm.DB - result []string //pID:子任务yamlstring 键值对 - participantRpc participantservice.ParticipantService - ResourceCollectors []collector.ResourceCollector - Storages []database.Storage - AiExecutor map[string]executor.Executor + task *response.TaskInfo + participantIds []int64 + subSchedule common.SubSchedule + dbEngin *gorm.DB + result []string //pID:子任务yamlstring 键值对 + participantRpc participantservice.ParticipantService + ResourceCollector *map[string]collector.ResourceCollector + Storages database.Storage + AiExecutor *map[string]executor.Executor } func NewScheduler(subSchedule common.SubSchedule, val string, dbEngin *gorm.DB, participantRpc participantservice.ParticipantService) (*Scheduler, error) { @@ -50,8 +50,8 @@ func NewScheduler(subSchedule common.SubSchedule, val string, dbEngin *gorm.DB, return &Scheduler{task: task, subSchedule: subSchedule, dbEngin: dbEngin, participantRpc: participantRpc}, nil } -func NewScheduler2(resourceCollectors []collector.ResourceCollector, storages []database.Storage, aiExecutor map[string]executor.Executor) *Scheduler { - return &Scheduler{ResourceCollectors: resourceCollectors, Storages: storages, AiExecutor: aiExecutor} +func NewScheduler2(resourceCollector *map[string]collector.ResourceCollector, storages database.Storage, aiExecutor *map[string]executor.Executor) *Scheduler { + return &Scheduler{ResourceCollector: resourceCollector, Storages: storages, AiExecutor: aiExecutor} } func (s *Scheduler) SpecifyClusters() { diff --git a/pkg/scheduler/schedulers/aiScheduler.go b/pkg/scheduler/schedulers/aiScheduler.go index feef4e93..d127b266 100644 --- a/pkg/scheduler/schedulers/aiScheduler.go +++ b/pkg/scheduler/schedulers/aiScheduler.go @@ -88,7 +88,7 @@ func (as *AiScheduler) AssignTask(clusters []*strategy.AssignedCluster) error { func (as *AiScheduler) findProvidersWithResource() ([]*collector.ResourceSpecs, error) { var resourceSpecs []*collector.ResourceSpecs - for _, resourceCollector := range as.ResourceCollectors { + for _, resourceCollector := range *as.ResourceCollector { spec, err := resourceCollector.GetResourceSpecs() if err != nil { continue diff --git a/pkg/scheduler/service/aiService.go b/pkg/scheduler/service/aiService.go new file mode 100644 index 00000000..9fa919dd --- /dev/null +++ b/pkg/scheduler/service/aiService.go @@ -0,0 +1,49 @@ +package service + +import ( + "gitlink.org.cn/jcce-pcm/pcm-ac/hpcacclient" + "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/scheduler/service/collector" + "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/scheduler/service/executor" + "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/scheduler/service/impl" + "gitlink.org.cn/jcce-pcm/pcm-participant-modelarts/client/imagesservice" + "gitlink.org.cn/jcce-pcm/pcm-participant-modelarts/client/modelartsservice" + "gitlink.org.cn/jcce-pcm/pcm-participant-octopus/octopusclient" +) + +const ( + OCTOPUS = "Octopus" + MODELARTS = "Modelarts" + SHUGUANGAI = "ShuguangAi" +) + +var ( + AiTypeMap = map[string]string{ + "Hanwuji": OCTOPUS, + "Suiyan": OCTOPUS, + "Sailingsi": OCTOPUS, + "modelarts-CloudBrain2": MODELARTS, + "ShuguangAi": SHUGUANGAI, + } +) + +func InitAiClusterMap(ACRpc hpcacclient.HpcAC, ModelArtsRpc modelartsservice.ModelArtsService, ModelArtsImgRpc imagesservice.ImagesService, OctopusRpc octopusclient.Octopus) (*map[string]executor.Executor, *map[string]collector.ResourceCollector) { + executorMap := make(map[string]executor.Executor) + collectorMap := make(map[string]collector.ResourceCollector) + for k, v := range AiTypeMap { + switch v { + case OCTOPUS: + octopus := impl.NewOctopusExecutor(OctopusRpc, k) + collectorMap[k] = octopus + executorMap[k] = octopus + case MODELARTS: + modelarts := impl.NewModelartsExecutor(ModelArtsRpc, ModelArtsImgRpc, k) + collectorMap[k] = modelarts + executorMap[k] = modelarts + case SHUGUANGAI: + sgai := impl.NewShuguangAiExecutor(ACRpc, k) + collectorMap[k] = sgai + executorMap[k] = sgai + } + } + return &executorMap, &collectorMap +} diff --git a/pkg/scheduler/service/collector/acCollector.go b/pkg/scheduler/service/collector/acCollector.go deleted file mode 100644 index 587639cc..00000000 --- a/pkg/scheduler/service/collector/acCollector.go +++ /dev/null @@ -1,17 +0,0 @@ -package collector - -import "gitlink.org.cn/jcce-pcm/pcm-ac/hpcacclient" - -//单条作业费=作业运行秒数×(CPU核心数*CPU单价+GPU卡数×GPU单价+DCU卡数×DCU单价)/3600 -//CPU单价=队列CPU费率×计算中心CPU单价 -//GPU单价=队列GPU费率×计算中心GPU单价 -//DCU单价=队列DCU费率×计算中心DCU单价 - -type ShuguangAiCollector struct { - Name string - ACRpc hpcacclient.HpcAC -} - -func (a *ShuguangAiCollector) GetResourceSpecs() (*ResourceSpecs, error) { - return nil, nil -} diff --git a/pkg/scheduler/service/executor/acExecutor.go b/pkg/scheduler/service/executor/acExecutor.go deleted file mode 100644 index cd7b6916..00000000 --- a/pkg/scheduler/service/executor/acExecutor.go +++ /dev/null @@ -1,32 +0,0 @@ -package executor - -import "gitlink.org.cn/jcce-pcm/pcm-ac/hpcacclient" - -type ShuguangAiExecutor struct { - Name string - ACRpc hpcacclient.HpcAC -} - -func (s ShuguangAiExecutor) QueryImageList() ([]Image, error) { - //TODO implement me - panic("implement me") -} - -func (s ShuguangAiExecutor) SubmitTask(imageId string, cmd string, envs []string, params []string, resourceId string) (Task, error) { - //TODO implement me - panic("implement me") -} - -func (s ShuguangAiExecutor) QueryTask(taskId string) (Task, error) { - //TODO implement me - panic("implement me") -} - -func (s ShuguangAiExecutor) QuerySpecs() (Spec, error) { - //TODO implement me - panic("implement me") -} - -func NewShuguangAiExecutor(name string, acRpc hpcacclient.HpcAC) *ShuguangAiExecutor { - return &ShuguangAiExecutor{Name: name, ACRpc: acRpc} -} diff --git a/pkg/scheduler/service/impl/modelarts.go b/pkg/scheduler/service/impl/modelarts.go new file mode 100644 index 00000000..76f702b7 --- /dev/null +++ b/pkg/scheduler/service/impl/modelarts.go @@ -0,0 +1,44 @@ +package impl + +import ( + "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/scheduler/service/collector" + "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/scheduler/service/executor" + "gitlink.org.cn/jcce-pcm/pcm-participant-modelarts/client/imagesservice" + "gitlink.org.cn/jcce-pcm/pcm-participant-modelarts/client/modelartsservice" +) + +type ModelArtsExecutor struct { + Name string + pageIndex int32 + pageSize int32 + ModelArtsRpc modelartsservice.ModelArtsService + ModelArtsImgRpc imagesservice.ImagesService +} + +func NewModelartsExecutor(modelArtsRpc modelartsservice.ModelArtsService, modelArtsImgRpc imagesservice.ImagesService, name string) *ModelArtsExecutor { + return &ModelArtsExecutor{Name: name, ModelArtsRpc: modelArtsRpc, ModelArtsImgRpc: modelArtsImgRpc, pageIndex: 1, pageSize: 100} +} + +func (m ModelArtsExecutor) QueryImageList() ([]executor.Image, error) { + //TODO implement me + panic("implement me") +} + +func (m ModelArtsExecutor) SubmitTask(imageId string, cmd string, envs []string, params []string, resourceId string) (executor.Task, error) { + //TODO implement me + panic("implement me") +} + +func (m ModelArtsExecutor) QueryTask(taskId string) (executor.Task, error) { + //TODO implement me + panic("implement me") +} + +func (m ModelArtsExecutor) QuerySpecs() (executor.Spec, error) { + //TODO implement me + panic("implement me") +} + +func (a *ModelArtsExecutor) GetResourceSpecs() (*collector.ResourceSpecs, error) { + return nil, nil +} diff --git a/pkg/scheduler/service/impl/octopus.go b/pkg/scheduler/service/impl/octopus.go new file mode 100644 index 00000000..a4b6c5b8 --- /dev/null +++ b/pkg/scheduler/service/impl/octopus.go @@ -0,0 +1,42 @@ +package impl + +import ( + "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/scheduler/service/collector" + "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/scheduler/service/executor" + "gitlink.org.cn/jcce-pcm/pcm-participant-octopus/octopusclient" +) + +type OctopusExecutor struct { + Name string + pageIndex int32 + pageSize int32 + OctopusRpc octopusclient.Octopus +} + +func NewOctopusExecutor(OctopusRpc octopusclient.Octopus, name string) *OctopusExecutor { + return &OctopusExecutor{OctopusRpc: OctopusRpc, Name: name, pageIndex: 1, pageSize: 100} +} + +func (o OctopusExecutor) QueryImageList() ([]executor.Image, error) { + //TODO implement me + panic("implement me") +} + +func (o OctopusExecutor) SubmitTask(imageId string, cmd string, envs []string, params []string, resourceId string) (executor.Task, error) { + //TODO implement me + panic("implement me") +} + +func (o OctopusExecutor) QueryTask(taskId string) (executor.Task, error) { + //TODO implement me + panic("implement me") +} + +func (o OctopusExecutor) QuerySpecs() (executor.Spec, error) { + //TODO implement me + panic("implement me") +} + +func (a *OctopusExecutor) GetResourceSpecs() (*collector.ResourceSpecs, error) { + return nil, nil +} diff --git a/pkg/scheduler/service/impl/shuguangAi.go b/pkg/scheduler/service/impl/shuguangAi.go new file mode 100644 index 00000000..049455c6 --- /dev/null +++ b/pkg/scheduler/service/impl/shuguangAi.go @@ -0,0 +1,45 @@ +package impl + +import ( + "gitlink.org.cn/jcce-pcm/pcm-ac/hpcacclient" + "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/scheduler/service/collector" + "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/scheduler/service/executor" +) + +//单条作业费=作业运行秒数×(CPU核心数*CPU单价+GPU卡数×GPU单价+DCU卡数×DCU单价)/3600 +//CPU单价=队列CPU费率×计算中心CPU单价 +//GPU单价=队列GPU费率×计算中心GPU单价 +//DCU单价=队列DCU费率×计算中心DCU单价 + +type ShuguangAiExecutor struct { + Name string + ACRpc hpcacclient.HpcAC +} + +func NewShuguangAiExecutor(acRpc hpcacclient.HpcAC, name string) *ShuguangAiExecutor { + return &ShuguangAiExecutor{Name: name, ACRpc: acRpc} +} + +func (s ShuguangAiExecutor) QueryImageList() ([]executor.Image, error) { + //TODO implement me + panic("implement me") +} + +func (s ShuguangAiExecutor) SubmitTask(imageId string, cmd string, envs []string, params []string, resourceId string) (executor.Task, error) { + //TODO implement me + panic("implement me") +} + +func (s ShuguangAiExecutor) QueryTask(taskId string) (executor.Task, error) { + //TODO implement me + panic("implement me") +} + +func (s ShuguangAiExecutor) QuerySpecs() (executor.Spec, error) { + //TODO implement me + panic("implement me") +} + +func (a *ShuguangAiExecutor) GetResourceSpecs() (*collector.ResourceSpecs, error) { + return nil, nil +} diff --git a/pkg/scheduler/strategy/dynamicResources.go b/pkg/scheduler/strategy/dynamicResources.go new file mode 100644 index 00000000..579333f2 --- /dev/null +++ b/pkg/scheduler/strategy/dynamicResources.go @@ -0,0 +1,4 @@ +package strategy + +type DynamicResourcesStrategy struct { +} From e5a37df3c7ae46b714b4cd7a872fcc699446aeec Mon Sep 17 00:00:00 2001 From: tzwang Date: Thu, 25 Jan 2024 18:06:11 +0800 Subject: [PATCH 09/18] moved the scheduler module api internal Former-commit-id: eb408269108d5d32b47da2f6d4ccf92096487237 --- .../algorithm/providerPricing/pricing.go | 0 .../algorithm/providerPricing/provider.go | 0 .../algorithm/providerPricing/strategy.go | 0 .../algorithm/providerPricing/task.go | 0 .../internal}/scheduler/common/common.go | 2 +- .../internal}/scheduler/database/aiStorage.go | 0 .../scheduler/database/cloudStorage.go | 2 +- .../internal}/scheduler/database/storage.go | 2 +- .../internal}/scheduler/entity/entity.go | 0 {pkg => api/internal}/scheduler/scheduler.go | 8 +- .../scheduler/schedulers/aiScheduler.go | 10 +- .../scheduler/schedulers/cloudScheduler.go | 6 +- .../scheduler/schedulers/hpcScheduler.go | 4 +- .../scheduler/schedulers/vmScheduler.go | 2 +- .../internal}/scheduler/service/aiService.go | 6 +- .../scheduler/service/collector/collector.go | 0 .../scheduler/service/executor/executor.go | 0 .../scheduler/service/impl/modelarts.go | 4 +- .../scheduler/service/impl/octopus.go | 4 +- .../scheduler/service/impl/shuguangAi.go | 4 +- .../scheduler/strategy/dynamicResources.go | 0 .../scheduler/strategy/replication.go | 2 +- .../scheduler/strategy/resourcePricing.go | 2 +- .../scheduler/strategy/staticWeight.go | 0 .../internal}/scheduler/strategy/strategy.go | 0 api/internal/storeLink/storeLink.go | 344 ++++++++++++++++++ 26 files changed, 373 insertions(+), 29 deletions(-) rename {pkg => api/internal}/scheduler/algorithm/providerPricing/pricing.go (100%) rename {pkg => api/internal}/scheduler/algorithm/providerPricing/provider.go (100%) rename {pkg => api/internal}/scheduler/algorithm/providerPricing/strategy.go (100%) rename {pkg => api/internal}/scheduler/algorithm/providerPricing/task.go (100%) rename {pkg => api/internal}/scheduler/common/common.go (95%) rename {pkg => api/internal}/scheduler/database/aiStorage.go (100%) rename {pkg => api/internal}/scheduler/database/cloudStorage.go (93%) rename {pkg => api/internal}/scheduler/database/storage.go (73%) rename {pkg => api/internal}/scheduler/entity/entity.go (100%) rename {pkg => api/internal}/scheduler/scheduler.go (94%) rename {pkg => api/internal}/scheduler/schedulers/aiScheduler.go (87%) rename {pkg => api/internal}/scheduler/schedulers/cloudScheduler.go (93%) rename {pkg => api/internal}/scheduler/schedulers/hpcScheduler.go (90%) rename {pkg => api/internal}/scheduler/schedulers/vmScheduler.go (87%) rename {pkg => api/internal}/scheduler/service/aiService.go (85%) rename {pkg => api/internal}/scheduler/service/collector/collector.go (100%) rename {pkg => api/internal}/scheduler/service/executor/executor.go (100%) rename {pkg => api/internal}/scheduler/service/impl/modelarts.go (88%) rename {pkg => api/internal}/scheduler/service/impl/octopus.go (86%) rename {pkg => api/internal}/scheduler/service/impl/shuguangAi.go (87%) rename {pkg => api/internal}/scheduler/strategy/dynamicResources.go (100%) rename {pkg => api/internal}/scheduler/strategy/replication.go (91%) rename {pkg => api/internal}/scheduler/strategy/resourcePricing.go (98%) rename {pkg => api/internal}/scheduler/strategy/staticWeight.go (100%) rename {pkg => api/internal}/scheduler/strategy/strategy.go (100%) diff --git a/pkg/scheduler/algorithm/providerPricing/pricing.go b/api/internal/scheduler/algorithm/providerPricing/pricing.go similarity index 100% rename from pkg/scheduler/algorithm/providerPricing/pricing.go rename to api/internal/scheduler/algorithm/providerPricing/pricing.go diff --git a/pkg/scheduler/algorithm/providerPricing/provider.go b/api/internal/scheduler/algorithm/providerPricing/provider.go similarity index 100% rename from pkg/scheduler/algorithm/providerPricing/provider.go rename to api/internal/scheduler/algorithm/providerPricing/provider.go diff --git a/pkg/scheduler/algorithm/providerPricing/strategy.go b/api/internal/scheduler/algorithm/providerPricing/strategy.go similarity index 100% rename from pkg/scheduler/algorithm/providerPricing/strategy.go rename to api/internal/scheduler/algorithm/providerPricing/strategy.go diff --git a/pkg/scheduler/algorithm/providerPricing/task.go b/api/internal/scheduler/algorithm/providerPricing/task.go similarity index 100% rename from pkg/scheduler/algorithm/providerPricing/task.go rename to api/internal/scheduler/algorithm/providerPricing/task.go diff --git a/pkg/scheduler/common/common.go b/api/internal/scheduler/common/common.go similarity index 95% rename from pkg/scheduler/common/common.go rename to api/internal/scheduler/common/common.go index fb71cc8c..3da3609c 100644 --- a/pkg/scheduler/common/common.go +++ b/api/internal/scheduler/common/common.go @@ -15,8 +15,8 @@ package common import ( + "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/scheduler/strategy" "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/pkg/response" - "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/scheduler/strategy" "math/rand" "time" ) diff --git a/pkg/scheduler/database/aiStorage.go b/api/internal/scheduler/database/aiStorage.go similarity index 100% rename from pkg/scheduler/database/aiStorage.go rename to api/internal/scheduler/database/aiStorage.go diff --git a/pkg/scheduler/database/cloudStorage.go b/api/internal/scheduler/database/cloudStorage.go similarity index 93% rename from pkg/scheduler/database/cloudStorage.go rename to api/internal/scheduler/database/cloudStorage.go index d6e0e022..c8329954 100644 --- a/pkg/scheduler/database/cloudStorage.go +++ b/api/internal/scheduler/database/cloudStorage.go @@ -1,7 +1,7 @@ package database import ( - "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/scheduler/entity" + "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/scheduler/entity" "gorm.io/gorm" ) diff --git a/pkg/scheduler/database/storage.go b/api/internal/scheduler/database/storage.go similarity index 73% rename from pkg/scheduler/database/storage.go rename to api/internal/scheduler/database/storage.go index d2371f59..ed545d54 100644 --- a/pkg/scheduler/database/storage.go +++ b/api/internal/scheduler/database/storage.go @@ -1,6 +1,6 @@ package database -import "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/scheduler/entity" +import "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/scheduler/entity" type Storage interface { GetProviderParams() ([]entity.ProviderParams, error) diff --git a/pkg/scheduler/entity/entity.go b/api/internal/scheduler/entity/entity.go similarity index 100% rename from pkg/scheduler/entity/entity.go rename to api/internal/scheduler/entity/entity.go diff --git a/pkg/scheduler/scheduler.go b/api/internal/scheduler/scheduler.go similarity index 94% rename from pkg/scheduler/scheduler.go rename to api/internal/scheduler/scheduler.go index 49d3a150..e63a3873 100644 --- a/pkg/scheduler/scheduler.go +++ b/api/internal/scheduler/scheduler.go @@ -18,11 +18,11 @@ import ( "encoding/json" "github.com/pkg/errors" "github.com/zeromicro/go-zero/core/logx" + "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/scheduler/common" + "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/scheduler/database" + "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/scheduler/service/collector" + "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/scheduler/service/executor" "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/pkg/response" - "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/scheduler/common" - "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/scheduler/database" - "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/scheduler/service/collector" - "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/scheduler/service/executor" "gitlink.org.cn/jcce-pcm/pcm-coordinator/rpc/client/participantservice" "gorm.io/gorm" "sigs.k8s.io/yaml" diff --git a/pkg/scheduler/schedulers/aiScheduler.go b/api/internal/scheduler/schedulers/aiScheduler.go similarity index 87% rename from pkg/scheduler/schedulers/aiScheduler.go rename to api/internal/scheduler/schedulers/aiScheduler.go index d127b266..ab9f4eb0 100644 --- a/pkg/scheduler/schedulers/aiScheduler.go +++ b/api/internal/scheduler/schedulers/aiScheduler.go @@ -16,13 +16,13 @@ package schedulers import ( "errors" + "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/scheduler" + "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/scheduler/algorithm/providerPricing" + "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/scheduler/entity" + "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/scheduler/service/collector" + "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/scheduler/strategy" "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/pkg/response" "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/models" - "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/scheduler" - "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/scheduler/algorithm/providerPricing" - "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/scheduler/entity" - "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/scheduler/service/collector" - "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/scheduler/strategy" "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/utils" ) diff --git a/pkg/scheduler/schedulers/cloudScheduler.go b/api/internal/scheduler/schedulers/cloudScheduler.go similarity index 93% rename from pkg/scheduler/schedulers/cloudScheduler.go rename to api/internal/scheduler/schedulers/cloudScheduler.go index 89d46795..6d9fcbe1 100644 --- a/pkg/scheduler/schedulers/cloudScheduler.go +++ b/api/internal/scheduler/schedulers/cloudScheduler.go @@ -16,11 +16,11 @@ package schedulers import ( "bytes" + "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/scheduler/algorithm/providerPricing" + "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/scheduler/database" + "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/scheduler/strategy" "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/pkg/response" "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/models" - "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/scheduler/algorithm/providerPricing" - "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/scheduler/database" - "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/scheduler/strategy" "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/utils" "io" "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" diff --git a/pkg/scheduler/schedulers/hpcScheduler.go b/api/internal/scheduler/schedulers/hpcScheduler.go similarity index 90% rename from pkg/scheduler/schedulers/hpcScheduler.go rename to api/internal/scheduler/schedulers/hpcScheduler.go index 92d49d84..97620c91 100644 --- a/pkg/scheduler/schedulers/hpcScheduler.go +++ b/api/internal/scheduler/schedulers/hpcScheduler.go @@ -15,11 +15,11 @@ package schedulers import ( + "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/scheduler/algorithm/providerPricing" + "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/scheduler/strategy" "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/pkg/response" "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/constants" "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/models" - "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/scheduler/algorithm/providerPricing" - "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/scheduler/strategy" "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/utils" ) diff --git a/pkg/scheduler/schedulers/vmScheduler.go b/api/internal/scheduler/schedulers/vmScheduler.go similarity index 87% rename from pkg/scheduler/schedulers/vmScheduler.go rename to api/internal/scheduler/schedulers/vmScheduler.go index ad4b7de0..7020ad7b 100644 --- a/pkg/scheduler/schedulers/vmScheduler.go +++ b/api/internal/scheduler/schedulers/vmScheduler.go @@ -1,8 +1,8 @@ package schedulers import ( + "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/scheduler/strategy" "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/pkg/response" - "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/scheduler/strategy" ) type VmScheduler struct { diff --git a/pkg/scheduler/service/aiService.go b/api/internal/scheduler/service/aiService.go similarity index 85% rename from pkg/scheduler/service/aiService.go rename to api/internal/scheduler/service/aiService.go index 9fa919dd..1a6949b7 100644 --- a/pkg/scheduler/service/aiService.go +++ b/api/internal/scheduler/service/aiService.go @@ -2,9 +2,9 @@ package service import ( "gitlink.org.cn/jcce-pcm/pcm-ac/hpcacclient" - "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/scheduler/service/collector" - "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/scheduler/service/executor" - "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/scheduler/service/impl" + "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/scheduler/service/collector" + "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/scheduler/service/executor" + "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/scheduler/service/impl" "gitlink.org.cn/jcce-pcm/pcm-participant-modelarts/client/imagesservice" "gitlink.org.cn/jcce-pcm/pcm-participant-modelarts/client/modelartsservice" "gitlink.org.cn/jcce-pcm/pcm-participant-octopus/octopusclient" diff --git a/pkg/scheduler/service/collector/collector.go b/api/internal/scheduler/service/collector/collector.go similarity index 100% rename from pkg/scheduler/service/collector/collector.go rename to api/internal/scheduler/service/collector/collector.go diff --git a/pkg/scheduler/service/executor/executor.go b/api/internal/scheduler/service/executor/executor.go similarity index 100% rename from pkg/scheduler/service/executor/executor.go rename to api/internal/scheduler/service/executor/executor.go diff --git a/pkg/scheduler/service/impl/modelarts.go b/api/internal/scheduler/service/impl/modelarts.go similarity index 88% rename from pkg/scheduler/service/impl/modelarts.go rename to api/internal/scheduler/service/impl/modelarts.go index 76f702b7..eb4eae84 100644 --- a/pkg/scheduler/service/impl/modelarts.go +++ b/api/internal/scheduler/service/impl/modelarts.go @@ -1,8 +1,8 @@ package impl import ( - "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/scheduler/service/collector" - "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/scheduler/service/executor" + "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/scheduler/service/collector" + "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/scheduler/service/executor" "gitlink.org.cn/jcce-pcm/pcm-participant-modelarts/client/imagesservice" "gitlink.org.cn/jcce-pcm/pcm-participant-modelarts/client/modelartsservice" ) diff --git a/pkg/scheduler/service/impl/octopus.go b/api/internal/scheduler/service/impl/octopus.go similarity index 86% rename from pkg/scheduler/service/impl/octopus.go rename to api/internal/scheduler/service/impl/octopus.go index a4b6c5b8..a0bf944b 100644 --- a/pkg/scheduler/service/impl/octopus.go +++ b/api/internal/scheduler/service/impl/octopus.go @@ -1,8 +1,8 @@ package impl import ( - "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/scheduler/service/collector" - "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/scheduler/service/executor" + "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/scheduler/service/collector" + "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/scheduler/service/executor" "gitlink.org.cn/jcce-pcm/pcm-participant-octopus/octopusclient" ) diff --git a/pkg/scheduler/service/impl/shuguangAi.go b/api/internal/scheduler/service/impl/shuguangAi.go similarity index 87% rename from pkg/scheduler/service/impl/shuguangAi.go rename to api/internal/scheduler/service/impl/shuguangAi.go index 049455c6..a54c55f6 100644 --- a/pkg/scheduler/service/impl/shuguangAi.go +++ b/api/internal/scheduler/service/impl/shuguangAi.go @@ -2,8 +2,8 @@ package impl import ( "gitlink.org.cn/jcce-pcm/pcm-ac/hpcacclient" - "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/scheduler/service/collector" - "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/scheduler/service/executor" + "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/scheduler/service/collector" + "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/scheduler/service/executor" ) //单条作业费=作业运行秒数×(CPU核心数*CPU单价+GPU卡数×GPU单价+DCU卡数×DCU单价)/3600 diff --git a/pkg/scheduler/strategy/dynamicResources.go b/api/internal/scheduler/strategy/dynamicResources.go similarity index 100% rename from pkg/scheduler/strategy/dynamicResources.go rename to api/internal/scheduler/strategy/dynamicResources.go diff --git a/pkg/scheduler/strategy/replication.go b/api/internal/scheduler/strategy/replication.go similarity index 91% rename from pkg/scheduler/strategy/replication.go rename to api/internal/scheduler/strategy/replication.go index 08d7e29f..88ecd6fb 100644 --- a/pkg/scheduler/strategy/replication.go +++ b/api/internal/scheduler/strategy/replication.go @@ -2,7 +2,7 @@ package strategy import ( "github.com/pkg/errors" - "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/scheduler/entity" + "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/scheduler/entity" ) type ReplicationStrategy struct { diff --git a/pkg/scheduler/strategy/resourcePricing.go b/api/internal/scheduler/strategy/resourcePricing.go similarity index 98% rename from pkg/scheduler/strategy/resourcePricing.go rename to api/internal/scheduler/strategy/resourcePricing.go index f909f62f..2abf2af6 100644 --- a/pkg/scheduler/strategy/resourcePricing.go +++ b/api/internal/scheduler/strategy/resourcePricing.go @@ -16,7 +16,7 @@ package strategy import ( "errors" - "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/scheduler/algorithm/providerPricing" + "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/scheduler/algorithm/providerPricing" ) type PricingStrategy struct { diff --git a/pkg/scheduler/strategy/staticWeight.go b/api/internal/scheduler/strategy/staticWeight.go similarity index 100% rename from pkg/scheduler/strategy/staticWeight.go rename to api/internal/scheduler/strategy/staticWeight.go diff --git a/pkg/scheduler/strategy/strategy.go b/api/internal/scheduler/strategy/strategy.go similarity index 100% rename from pkg/scheduler/strategy/strategy.go rename to api/internal/scheduler/strategy/strategy.go diff --git a/api/internal/storeLink/storeLink.go b/api/internal/storeLink/storeLink.go index cac8d642..596b70d8 100644 --- a/api/internal/storeLink/storeLink.go +++ b/api/internal/storeLink/storeLink.go @@ -102,6 +102,350 @@ func GetParticipantById(partId int64, dbEngin *gorm.DB) *models.StorelinkCenter return &participant } +func ConvertType2[T any, RESP any](in *T, out *RESP, participant *models.StorelinkCenter) (interface{}, error) { + + switch (interface{})(in).(type) { + case *octopus.UploadImageResp: + inresp := (interface{})(in).(*octopus.UploadImageResp) + switch (interface{})(out).(type) { + case *types.UploadLinkImageResp: + resp := (interface{})(out).(*types.UploadLinkImageResp) + resp.Success = inresp.Success + if !resp.Success { + resp.ErrorMsg = inresp.Error.Message + return resp, nil + } + return resp, nil + } + return nil, nil + + case *octopus.DeleteImageResp: + inresp := (interface{})(in).(*octopus.DeleteImageResp) + var resp types.DeleteLinkImageResp + resp.Success = inresp.Success + if !resp.Success { + resp.ErrorMsg = inresp.Error.Message + return resp, nil + } + + return resp, nil + + case *octopus.GetUserImageListResp: + inresp := (interface{})(in).(*octopus.GetUserImageListResp) + var resp types.GetLinkImageListResp + resp.Success = inresp.Success + if !resp.Success { + resp.ErrorMsg = inresp.Error.Message + resp.Images = nil + return resp, nil + } + + for _, v := range inresp.Payload.Images { + var image types.ImageSl + image.ImageId = v.Image.Id + image.ImageName = v.Image.ImageName + image.ImageStatus = OctImgStatus[v.Image.ImageStatus] + resp.Images = append(resp.Images, &image) + } + return resp, nil + case *modelarts.ListReposDetailsResp: + inresp := (interface{})(in).(*modelarts.ListReposDetailsResp) + var resp types.GetLinkImageListResp + + if inresp.Errors != nil { + resp.Success = false + resp.ErrorMsg = inresp.Errors[0].ErrorMessage + resp.Images = nil + return resp, nil + } + + resp.Success = true + for _, v := range inresp.Items { + for _, r := range v.Tags { + var image types.ImageSl + image.ImageId = v.Namespace + "/" + v.Name + ":" + r + image.ImageName = v.Name + image.ImageStatus = "created" + resp.Images = append(resp.Images, &image) + } + } + return resp, nil + case *hpcAC.GetImageListAiResp: + inresp := (interface{})(in).(*hpcAC.GetImageListAiResp) + var resp types.GetLinkImageListResp + + if inresp.Code == "0" { + resp.Success = true + for _, img := range inresp.Data { + var image types.ImageSl + image.ImageId = img.ImageId + image.ImageName = img.Version + image.ImageStatus = "created" + resp.Images = append(resp.Images, &image) + } + } else { + resp.Success = false + resp.ErrorMsg = inresp.Msg + resp.Images = nil + } + return resp, nil + + case *octopus.CreateTrainJobResp: + inresp := (interface{})(in).(*octopus.CreateTrainJobResp) + var resp types.SubmitLinkTaskResp + + resp.Success = inresp.Success + if !resp.Success { + resp.ErrorMsg = inresp.Error.Message + return resp, nil + } + + resp.TaskId = inresp.Payload.JobId + + return resp, nil + case *modelarts.CreateTrainingJobResp: + inresp := (interface{})(in).(*modelarts.CreateTrainingJobResp) + var resp types.SubmitLinkTaskResp + + if inresp.ErrorMsg != "" { + resp.ErrorMsg = inresp.ErrorMsg + resp.Success = false + return resp, nil + } + resp.Success = true + resp.TaskId = inresp.Metadata.Id + + return resp, nil + case *hpcAC.SubmitTaskAiResp: + inresp := (interface{})(in).(*hpcAC.SubmitTaskAiResp) + var resp types.SubmitLinkTaskResp + + if inresp.Code == "0" { + resp.Success = true + resp.TaskId = inresp.Data + } else { + resp.Success = false + resp.ErrorMsg = inresp.Msg + } + return resp, nil + case *hpcAC.SubmitJobResp: + inresp := (interface{})(in).(*hpcAC.SubmitJobResp) + var resp types.SubmitLinkTaskResp + + if inresp.Code == "0" { + resp.Success = true + resp.TaskId = inresp.Data + } else { + resp.Success = false + resp.ErrorMsg = inresp.Msg + } + return resp, nil + case *octopus.GetTrainJobResp: + inresp := (interface{})(in).(*octopus.GetTrainJobResp) + var resp types.GetLinkTaskResp + + resp.Success = inresp.Success + if !resp.Success { + resp.ErrorMsg = inresp.Error.Message + return resp, nil + } + + var task types.TaskSl + task.TaskId = inresp.Payload.TrainJob.Id + task.TaskName = inresp.Payload.TrainJob.Name + task.StartedAt = inresp.Payload.TrainJob.StartedAt + task.CompletedAt = inresp.Payload.TrainJob.CompletedAt + task.TaskStatus = inresp.Payload.TrainJob.Status + + resp.Task = &task + return resp, nil + case *modelarts.JobResponse: + inresp := (interface{})(in).(*modelarts.JobResponse) + var resp types.GetLinkTaskResp + + if inresp.ErrorMsg != "" { + resp.ErrorMsg = inresp.ErrorMsg + resp.Success = false + return resp, nil + } + resp.Success = true + resp.Task = &types.TaskSl{} + resp.Task.TaskId = inresp.Metadata.Id + resp.Task.TaskName = inresp.Metadata.Name + resp.Task.StartedAt = int64(inresp.Status.StartTime) + resp.Task.CompletedAt = int64(inresp.Status.Duration) + resp.Task.TaskStatus = inresp.Status.Phase + + return resp, nil + case *hpcAC.GetPytorchTaskResp: + inresp := (interface{})(in).(*hpcAC.GetPytorchTaskResp) + var resp types.GetLinkTaskResp + + if inresp.Code == "0" { + resp.Success = true + var task types.TaskSl + task.TaskId = inresp.Data.Id + task.TaskName = inresp.Data.TaskName + task.TaskStatus = inresp.Data.Status + task.StartedAt = timeutils.StringToUnixTime(inresp.Data.StartTime) + task.CompletedAt = timeutils.StringToUnixTime(inresp.Data.EndTime) + resp.Task = &task + } else { + resp.Success = false + resp.ErrorMsg = inresp.Msg + resp.Task = nil + } + + return resp, nil + case *hpcAC.GetJobDetailResp: + inresp := (interface{})(in).(*hpcAC.GetJobDetailResp) + var resp types.GetLinkTaskResp + + if inresp.Code == "0" { + resp.Success = true + var task types.TaskSl + task.TaskId = inresp.Data.JobId + task.TaskName = inresp.Data.JobName + task.TaskStatus = AcStatus[inresp.Data.JobStatus] + task.StartedAt = timeutils.StringToUnixTime(inresp.Data.JobStartTime) + task.CompletedAt = timeutils.StringToUnixTime(inresp.Data.JobEndTime) + resp.Task = &task + } else { + resp.Success = false + resp.ErrorMsg = inresp.Msg + resp.Task = nil + } + + return resp, nil + case *hpcAC.HistoryJobDetailResp: + inresp := (interface{})(in).(*hpcAC.HistoryJobDetailResp) + var resp types.GetLinkTaskResp + + if inresp.Code == "0" { + resp.Success = true + var task types.TaskSl + task.TaskId = inresp.Data.JobId + task.TaskName = inresp.Data.JobName + task.TaskStatus = AcStatus[inresp.Data.JobState] + task.StartedAt = timeutils.StringToUnixTime(inresp.Data.JobStartTime) + task.CompletedAt = timeutils.StringToUnixTime(inresp.Data.JobEndTime) + resp.Task = &task + } else { + resp.Success = false + resp.ErrorMsg = inresp.Msg + resp.Task = nil + } + + return resp, nil + case *octopus.DeleteTrainJobResp: + inresp := (interface{})(in).(*octopus.DeleteTrainJobResp) + var resp types.DeleteLinkTaskResp + + resp.Success = inresp.Success + if !resp.Success { + resp.ErrorMsg = inresp.Error.Message + return resp, nil + } + + return resp, nil + case *modelarts.DeleteTrainingJobResp: + inresp := (interface{})(in).(*modelarts.DeleteTrainingJobResp) + var resp types.DeleteLinkTaskResp + + if inresp.ErrorMsg != "" { + resp.ErrorMsg = inresp.ErrorMsg + resp.Success = false + return resp, nil + } + resp.Success = true + return resp, nil + case *hpcAC.DeleteTaskAiResp: + inresp := (interface{})(in).(*hpcAC.DeleteTaskAiResp) + var resp types.DeleteLinkTaskResp + + if inresp.Code == "0" { + resp.Success = true + } else { + resp.Success = false + resp.ErrorMsg = inresp.Msg + } + return resp, nil + case *hpcAC.DeleteJobResp: + inresp := (interface{})(in).(*hpcAC.DeleteJobResp) + var resp types.DeleteLinkTaskResp + + if inresp.Code == "0" { + resp.Success = true + } else { + resp.Success = false + resp.ErrorMsg = inresp.Msg + } + return resp, nil + case *octopus.GetResourceSpecsResp: + inresp := (interface{})(in).(*octopus.GetResourceSpecsResp) + var resp types.GetResourceSpecsResp + + resp.Success = inresp.Success + if !resp.Success { + resp.ResourceSpecs = nil + return resp, nil + } + + for _, spec := range inresp.TrainResourceSpecs { + var respec types.ResourceSpecSl + respec.SpecId = spec.Id + respec.SpecName = spec.Name + respec.ParticipantId = participant.Id + respec.ParticipantName = participant.Name + respec.SpecPrice = spec.Price + resp.ResourceSpecs = append(resp.ResourceSpecs, &respec) + } + + return resp, nil + case *hpcAC.GetResourceSpecResp: + inresp := (interface{})(in).(*hpcAC.GetResourceSpecResp) + var resp types.GetResourceSpecsResp + + if inresp.Code != "0" { + resp.Success = false + resp.ResourceSpecs = nil + } else { + var spec types.ResourceSpecSl + resp.Success = true + spec.ParticipantName = participant.Name + spec.ParticipantId = participant.Id + spec.SpecName = SHUGUANGAI_CUSTOM_RESOURCE_NAME + spec.SpecId = SHUGUANGAI_CUSTOM_RESOURCE_ID + resp.ResourceSpecs = append(resp.ResourceSpecs, &spec) + } + return resp, nil + case *modelarts.TrainingJobFlavorsResp: + inresp := (interface{})(in).(*modelarts.TrainingJobFlavorsResp) + var resp types.GetResourceSpecsResp + resp.Success = true + + if inresp.Flavors == nil { + resp.Success = false + resp.ResourceSpecs = nil + return resp, nil + } + + for _, spec := range inresp.Flavors { + var respec types.ResourceSpecSl + respec.SpecId = spec.FlavorId + respec.SpecName = spec.FlavorName + respec.ParticipantId = participant.Id + respec.ParticipantName = participant.Name + respec.SpecPrice = 0 + resp.ResourceSpecs = append(resp.ResourceSpecs, &respec) + } + + return resp, nil + default: + return nil, errors.New("type convert fail") + } +} + func ConvertType[T any](in *T, participant *models.StorelinkCenter) (interface{}, error) { switch (interface{})(in).(type) { From 53213a85259f123c8155f3aac2a77cf5a8b526e1 Mon Sep 17 00:00:00 2001 From: tzwang Date: Fri, 26 Jan 2024 09:30:12 +0800 Subject: [PATCH 10/18] refactor the scheduler Former-commit-id: 326d505dc13aa4fd61c43ae368f4ab829a0abd9b --- api/internal/mqs/ScheduleAi.go | 6 +++--- api/internal/mqs/ScheduleCloud.go | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/api/internal/mqs/ScheduleAi.go b/api/internal/mqs/ScheduleAi.go index 6cc7dc2f..836d0170 100644 --- a/api/internal/mqs/ScheduleAi.go +++ b/api/internal/mqs/ScheduleAi.go @@ -16,10 +16,10 @@ package mqs import ( "context" + "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/scheduler" + "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/scheduler/schedulers" + "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/scheduler/service" "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/svc" - "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/scheduler" - "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/scheduler/schedulers" - "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/scheduler/service" ) /* diff --git a/api/internal/mqs/ScheduleCloud.go b/api/internal/mqs/ScheduleCloud.go index cb7c9d7b..4c419c5c 100644 --- a/api/internal/mqs/ScheduleCloud.go +++ b/api/internal/mqs/ScheduleCloud.go @@ -16,9 +16,9 @@ package mqs import ( "context" + "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/scheduler" + "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/scheduler/schedulers" "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/svc" - "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/scheduler" - "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/scheduler/schedulers" ) /* From 449f3d7eec81ac88662bd5f0d678bb0abc30e09a Mon Sep 17 00:00:00 2001 From: tzwang Date: Fri, 26 Jan 2024 17:18:09 +0800 Subject: [PATCH 11/18] refactor the scheduler and storelink modules Former-commit-id: b72e4e416a990958fe6210629a461434614ceca4 --- .../logic/storelink/deletelinkimagelogic.go | 15 +- .../logic/storelink/deletelinktasklogic.go | 15 +- .../logic/storelink/getaispecslogic.go | 15 +- .../logic/storelink/getlinkimagelistlogic.go | 15 +- .../logic/storelink/getlinktasklogic.go | 15 +- .../logic/storelink/submitlinktasklogic.go | 13 +- .../logic/storelink/uploadlinkimagelogic.go | 15 +- api/internal/mqs/ScheduleAi.go | 2 +- .../scheduler/schedulers/aiScheduler.go | 2 + api/internal/scheduler/service/aiService.go | 16 +- .../scheduler/service/executor/executor.go | 17 +- .../scheduler/service/impl/modelarts.go | 44 -- .../scheduler/service/impl/octopus.go | 42 -- .../scheduler/service/impl/shuguangAi.go | 45 -- api/internal/storeLink/modelarts.go | 62 +- api/internal/storeLink/octopus.go | 93 +-- api/internal/storeLink/shuguangHpc.go | 61 +- api/internal/storeLink/shuguangai.go | 57 +- api/internal/storeLink/storeLink.go | 548 ++++++++++-------- 19 files changed, 494 insertions(+), 598 deletions(-) delete mode 100644 api/internal/scheduler/service/impl/modelarts.go delete mode 100644 api/internal/scheduler/service/impl/octopus.go delete mode 100644 api/internal/scheduler/service/impl/shuguangAi.go diff --git a/api/internal/logic/storelink/deletelinkimagelogic.go b/api/internal/logic/storelink/deletelinkimagelogic.go index 3e092aaa..1a730f0a 100644 --- a/api/internal/logic/storelink/deletelinkimagelogic.go +++ b/api/internal/logic/storelink/deletelinkimagelogic.go @@ -57,9 +57,18 @@ func (l *DeleteLinkImageLogic) DeleteLinkImage(req *types.DeleteLinkImageReq) (r return nil, err } if img == nil { - return nil, nil + return nil, storeLink.ERROR_RESP_EMPTY } - imgResp := img.(types.DeleteLinkImageResp) - return &imgResp, nil + resp = &types.DeleteLinkImageResp{} + //转换成统一返回类型 + imgResp, err := storeLink.ConvertType(img, resp, participant) + if err != nil { + return nil, err + } + if imgResp == nil { + return nil, storeLink.ERROR_CONVERT_EMPTY + } + + return imgResp.(*types.DeleteLinkImageResp), nil } diff --git a/api/internal/logic/storelink/deletelinktasklogic.go b/api/internal/logic/storelink/deletelinktasklogic.go index 420dfad5..d4bfce4f 100644 --- a/api/internal/logic/storelink/deletelinktasklogic.go +++ b/api/internal/logic/storelink/deletelinktasklogic.go @@ -57,9 +57,18 @@ func (l *DeleteLinkTaskLogic) DeleteLinkTask(req *types.DeleteLinkTaskReq) (resp return nil, err } if task == nil { - return nil, nil + return nil, storeLink.ERROR_RESP_EMPTY } - taskResp := task.(types.DeleteLinkTaskResp) - return &taskResp, nil + resp = &types.DeleteLinkTaskResp{} + //转换成统一返回类型 + taskResp, err := storeLink.ConvertType(task, resp, participant) + if err != nil { + return nil, err + } + if taskResp == nil { + return nil, storeLink.ERROR_CONVERT_EMPTY + } + + return taskResp.(*types.DeleteLinkTaskResp), nil } diff --git a/api/internal/logic/storelink/getaispecslogic.go b/api/internal/logic/storelink/getaispecslogic.go index e358f5d8..ec49eb44 100644 --- a/api/internal/logic/storelink/getaispecslogic.go +++ b/api/internal/logic/storelink/getaispecslogic.go @@ -57,9 +57,18 @@ func (l *GetAISpecsLogic) GetAISpecs(req *types.GetResourceSpecsReq) (resp *type return nil, err } if specs == nil { - return nil, nil + return nil, storeLink.ERROR_RESP_EMPTY } - specsResp := specs.(types.GetResourceSpecsResp) - return &specsResp, nil + resp = &types.GetResourceSpecsResp{} + //转换成统一返回类型 + specsResp, err := storeLink.ConvertType(specs, resp, participant) + if err != nil { + return nil, err + } + if specsResp == nil { + return nil, storeLink.ERROR_CONVERT_EMPTY + } + + return specsResp.(*types.GetResourceSpecsResp), nil } diff --git a/api/internal/logic/storelink/getlinkimagelistlogic.go b/api/internal/logic/storelink/getlinkimagelistlogic.go index 99250ca2..d6d51c40 100644 --- a/api/internal/logic/storelink/getlinkimagelistlogic.go +++ b/api/internal/logic/storelink/getlinkimagelistlogic.go @@ -57,9 +57,18 @@ func (l *GetLinkImageListLogic) GetLinkImageList(req *types.GetLinkImageListReq) return nil, err } if list == nil { - return nil, nil + return nil, storeLink.ERROR_RESP_EMPTY } - listResp := list.(types.GetLinkImageListResp) - return &listResp, nil + resp = &types.GetLinkImageListResp{} + //转换成统一返回类型 + listResp, err := storeLink.ConvertType(list, resp, participant) + if err != nil { + return nil, err + } + if listResp == nil { + return nil, storeLink.ERROR_CONVERT_EMPTY + } + + return listResp.(*types.GetLinkImageListResp), nil } diff --git a/api/internal/logic/storelink/getlinktasklogic.go b/api/internal/logic/storelink/getlinktasklogic.go index b1a04c35..f7da753f 100644 --- a/api/internal/logic/storelink/getlinktasklogic.go +++ b/api/internal/logic/storelink/getlinktasklogic.go @@ -58,9 +58,18 @@ func (l *GetLinkTaskLogic) GetLinkTask(req *types.GetLinkTaskReq) (resp *types.G return nil, err } if task == nil { - return nil, nil + return nil, storeLink.ERROR_RESP_EMPTY } - taskResp := task.(types.GetLinkTaskResp) - return &taskResp, nil + resp = &types.GetLinkTaskResp{} + //转换成统一返回类型 + taskResp, err := storeLink.ConvertType(task, resp, participant) + if err != nil { + return nil, err + } + if taskResp == nil { + return nil, storeLink.ERROR_CONVERT_EMPTY + } + + return taskResp.(*types.GetLinkTaskResp), nil } diff --git a/api/internal/logic/storelink/submitlinktasklogic.go b/api/internal/logic/storelink/submitlinktasklogic.go index 5fc3bae3..98f77f85 100644 --- a/api/internal/logic/storelink/submitlinktasklogic.go +++ b/api/internal/logic/storelink/submitlinktasklogic.go @@ -72,6 +72,15 @@ func (l *SubmitLinkTaskLogic) SubmitLinkTask(req *types.SubmitLinkTaskReq) (resp return nil, err } - taskResp := task.(types.SubmitLinkTaskResp) - return &taskResp, nil + resp = &types.SubmitLinkTaskResp{} + //转换成统一返回类型 + taskResp, err := storeLink.ConvertType(task, resp, participant) + if err != nil { + return nil, err + } + if taskResp == nil { + return nil, storeLink.ERROR_CONVERT_EMPTY + } + + return taskResp.(*types.SubmitLinkTaskResp), nil } diff --git a/api/internal/logic/storelink/uploadlinkimagelogic.go b/api/internal/logic/storelink/uploadlinkimagelogic.go index 93c579d4..92e89958 100644 --- a/api/internal/logic/storelink/uploadlinkimagelogic.go +++ b/api/internal/logic/storelink/uploadlinkimagelogic.go @@ -58,9 +58,18 @@ func (l *UploadLinkImageLogic) UploadLinkImage(req *types.UploadLinkImageReq) (r return nil, err } if img == nil { - return nil, nil + return nil, storeLink.ERROR_RESP_EMPTY } - imgResp := img.(types.UploadLinkImageResp) - return &imgResp, nil + resp = &types.UploadLinkImageResp{} + //转换成统一返回类型 + imgResp, err := storeLink.ConvertType(img, resp, participant) + if err != nil { + return nil, err + } + if imgResp == nil { + return nil, storeLink.ERROR_CONVERT_EMPTY + } + + return imgResp.(*types.UploadLinkImageResp), nil } diff --git a/api/internal/mqs/ScheduleAi.go b/api/internal/mqs/ScheduleAi.go index 836d0170..17fd5520 100644 --- a/api/internal/mqs/ScheduleAi.go +++ b/api/internal/mqs/ScheduleAi.go @@ -33,7 +33,7 @@ type AiQueue struct { } func NewAiMq(ctx context.Context, svcCtx *svc.ServiceContext) *AiQueue { - aiExecutorMap, aiCollectorMap := service.InitAiClusterMap(svcCtx.ACRpc, svcCtx.ModelArtsRpc, svcCtx.ModelArtsImgRpc, svcCtx.OctopusRpc) + aiExecutorMap, aiCollectorMap := service.InitAiClusterMap(ctx, svcCtx) return &AiQueue{ ctx: ctx, svcCtx: svcCtx, diff --git a/api/internal/scheduler/schedulers/aiScheduler.go b/api/internal/scheduler/schedulers/aiScheduler.go index ab9f4eb0..5f9f2d22 100644 --- a/api/internal/scheduler/schedulers/aiScheduler.go +++ b/api/internal/scheduler/schedulers/aiScheduler.go @@ -83,6 +83,8 @@ func (as *AiScheduler) AssignTask(clusters []*strategy.AssignedCluster) error { return errors.New("clusters is nil") } + _ = *as.AiExecutor + return nil } diff --git a/api/internal/scheduler/service/aiService.go b/api/internal/scheduler/service/aiService.go index 1a6949b7..3b46596b 100644 --- a/api/internal/scheduler/service/aiService.go +++ b/api/internal/scheduler/service/aiService.go @@ -1,13 +1,11 @@ package service import ( - "gitlink.org.cn/jcce-pcm/pcm-ac/hpcacclient" + "context" "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/scheduler/service/collector" "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/scheduler/service/executor" - "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/scheduler/service/impl" - "gitlink.org.cn/jcce-pcm/pcm-participant-modelarts/client/imagesservice" - "gitlink.org.cn/jcce-pcm/pcm-participant-modelarts/client/modelartsservice" - "gitlink.org.cn/jcce-pcm/pcm-participant-octopus/octopusclient" + "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/storeLink" + "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/svc" ) const ( @@ -26,21 +24,21 @@ var ( } ) -func InitAiClusterMap(ACRpc hpcacclient.HpcAC, ModelArtsRpc modelartsservice.ModelArtsService, ModelArtsImgRpc imagesservice.ImagesService, OctopusRpc octopusclient.Octopus) (*map[string]executor.Executor, *map[string]collector.ResourceCollector) { +func InitAiClusterMap(ctx context.Context, svcCtx *svc.ServiceContext) (*map[string]executor.Executor, *map[string]collector.ResourceCollector) { executorMap := make(map[string]executor.Executor) collectorMap := make(map[string]collector.ResourceCollector) for k, v := range AiTypeMap { switch v { case OCTOPUS: - octopus := impl.NewOctopusExecutor(OctopusRpc, k) + octopus := storeLink.NewOctopusLink(ctx, svcCtx, k, 0) collectorMap[k] = octopus executorMap[k] = octopus case MODELARTS: - modelarts := impl.NewModelartsExecutor(ModelArtsRpc, ModelArtsImgRpc, k) + modelarts := storeLink.NewModelArtsLink(ctx, svcCtx, k, 0) collectorMap[k] = modelarts executorMap[k] = modelarts case SHUGUANGAI: - sgai := impl.NewShuguangAiExecutor(ACRpc, k) + sgai := storeLink.NewShuguangAi(ctx, svcCtx, k, 0) collectorMap[k] = sgai executorMap[k] = sgai } diff --git a/api/internal/scheduler/service/executor/executor.go b/api/internal/scheduler/service/executor/executor.go index 61055927..917e45ca 100644 --- a/api/internal/scheduler/service/executor/executor.go +++ b/api/internal/scheduler/service/executor/executor.go @@ -1,17 +1,8 @@ package executor type Executor interface { - QueryImageList() ([]Image, error) - SubmitTask(imageId string, cmd string, envs []string, params []string, resourceId string) (Task, error) - QueryTask(taskId string) (Task, error) - QuerySpecs() (Spec, error) -} - -type Image struct { -} - -type Task struct { -} - -type Spec struct { + QueryImageList() (interface{}, error) + SubmitTask(imageId string, cmd string, envs []string, params []string, resourceId string) (interface{}, error) + QueryTask(taskId string) (interface{}, error) + QuerySpecs() (interface{}, error) } diff --git a/api/internal/scheduler/service/impl/modelarts.go b/api/internal/scheduler/service/impl/modelarts.go deleted file mode 100644 index eb4eae84..00000000 --- a/api/internal/scheduler/service/impl/modelarts.go +++ /dev/null @@ -1,44 +0,0 @@ -package impl - -import ( - "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/scheduler/service/collector" - "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/scheduler/service/executor" - "gitlink.org.cn/jcce-pcm/pcm-participant-modelarts/client/imagesservice" - "gitlink.org.cn/jcce-pcm/pcm-participant-modelarts/client/modelartsservice" -) - -type ModelArtsExecutor struct { - Name string - pageIndex int32 - pageSize int32 - ModelArtsRpc modelartsservice.ModelArtsService - ModelArtsImgRpc imagesservice.ImagesService -} - -func NewModelartsExecutor(modelArtsRpc modelartsservice.ModelArtsService, modelArtsImgRpc imagesservice.ImagesService, name string) *ModelArtsExecutor { - return &ModelArtsExecutor{Name: name, ModelArtsRpc: modelArtsRpc, ModelArtsImgRpc: modelArtsImgRpc, pageIndex: 1, pageSize: 100} -} - -func (m ModelArtsExecutor) QueryImageList() ([]executor.Image, error) { - //TODO implement me - panic("implement me") -} - -func (m ModelArtsExecutor) SubmitTask(imageId string, cmd string, envs []string, params []string, resourceId string) (executor.Task, error) { - //TODO implement me - panic("implement me") -} - -func (m ModelArtsExecutor) QueryTask(taskId string) (executor.Task, error) { - //TODO implement me - panic("implement me") -} - -func (m ModelArtsExecutor) QuerySpecs() (executor.Spec, error) { - //TODO implement me - panic("implement me") -} - -func (a *ModelArtsExecutor) GetResourceSpecs() (*collector.ResourceSpecs, error) { - return nil, nil -} diff --git a/api/internal/scheduler/service/impl/octopus.go b/api/internal/scheduler/service/impl/octopus.go deleted file mode 100644 index a0bf944b..00000000 --- a/api/internal/scheduler/service/impl/octopus.go +++ /dev/null @@ -1,42 +0,0 @@ -package impl - -import ( - "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/scheduler/service/collector" - "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/scheduler/service/executor" - "gitlink.org.cn/jcce-pcm/pcm-participant-octopus/octopusclient" -) - -type OctopusExecutor struct { - Name string - pageIndex int32 - pageSize int32 - OctopusRpc octopusclient.Octopus -} - -func NewOctopusExecutor(OctopusRpc octopusclient.Octopus, name string) *OctopusExecutor { - return &OctopusExecutor{OctopusRpc: OctopusRpc, Name: name, pageIndex: 1, pageSize: 100} -} - -func (o OctopusExecutor) QueryImageList() ([]executor.Image, error) { - //TODO implement me - panic("implement me") -} - -func (o OctopusExecutor) SubmitTask(imageId string, cmd string, envs []string, params []string, resourceId string) (executor.Task, error) { - //TODO implement me - panic("implement me") -} - -func (o OctopusExecutor) QueryTask(taskId string) (executor.Task, error) { - //TODO implement me - panic("implement me") -} - -func (o OctopusExecutor) QuerySpecs() (executor.Spec, error) { - //TODO implement me - panic("implement me") -} - -func (a *OctopusExecutor) GetResourceSpecs() (*collector.ResourceSpecs, error) { - return nil, nil -} diff --git a/api/internal/scheduler/service/impl/shuguangAi.go b/api/internal/scheduler/service/impl/shuguangAi.go deleted file mode 100644 index a54c55f6..00000000 --- a/api/internal/scheduler/service/impl/shuguangAi.go +++ /dev/null @@ -1,45 +0,0 @@ -package impl - -import ( - "gitlink.org.cn/jcce-pcm/pcm-ac/hpcacclient" - "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/scheduler/service/collector" - "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/scheduler/service/executor" -) - -//单条作业费=作业运行秒数×(CPU核心数*CPU单价+GPU卡数×GPU单价+DCU卡数×DCU单价)/3600 -//CPU单价=队列CPU费率×计算中心CPU单价 -//GPU单价=队列GPU费率×计算中心GPU单价 -//DCU单价=队列DCU费率×计算中心DCU单价 - -type ShuguangAiExecutor struct { - Name string - ACRpc hpcacclient.HpcAC -} - -func NewShuguangAiExecutor(acRpc hpcacclient.HpcAC, name string) *ShuguangAiExecutor { - return &ShuguangAiExecutor{Name: name, ACRpc: acRpc} -} - -func (s ShuguangAiExecutor) QueryImageList() ([]executor.Image, error) { - //TODO implement me - panic("implement me") -} - -func (s ShuguangAiExecutor) SubmitTask(imageId string, cmd string, envs []string, params []string, resourceId string) (executor.Task, error) { - //TODO implement me - panic("implement me") -} - -func (s ShuguangAiExecutor) QueryTask(taskId string) (executor.Task, error) { - //TODO implement me - panic("implement me") -} - -func (s ShuguangAiExecutor) QuerySpecs() (executor.Spec, error) { - //TODO implement me - panic("implement me") -} - -func (a *ShuguangAiExecutor) GetResourceSpecs() (*collector.ResourceSpecs, error) { - return nil, nil -} diff --git a/api/internal/storeLink/modelarts.go b/api/internal/storeLink/modelarts.go index 77aa7f07..350bd0ec 100644 --- a/api/internal/storeLink/modelarts.go +++ b/api/internal/storeLink/modelarts.go @@ -16,8 +16,8 @@ package storeLink import ( "context" + "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/scheduler/service/collector" "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/svc" - "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/models" "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/utils" "gitlink.org.cn/jcce-pcm/pcm-participant-modelarts/modelarts" "strconv" @@ -25,16 +25,16 @@ import ( ) type ModelArtsLink struct { - ctx context.Context - svcCtx *svc.ServiceContext - platform string - pageIndex int32 - pageSize int32 - participant *models.StorelinkCenter + ctx context.Context + svcCtx *svc.ServiceContext + platform string + participantId int64 + pageIndex int32 + pageSize int32 } -func NewModelArtsLink(ctx context.Context, svcCtx *svc.ServiceContext, participant *models.StorelinkCenter) *ModelArtsLink { - return &ModelArtsLink{ctx: ctx, svcCtx: svcCtx, participant: participant, platform: participant.Name, pageIndex: 1, pageSize: 100} +func NewModelArtsLink(ctx context.Context, svcCtx *svc.ServiceContext, name string, id int64) *ModelArtsLink { + return &ModelArtsLink{ctx: ctx, svcCtx: svcCtx, platform: name, participantId: id, pageIndex: 1, pageSize: 100} } func (o *ModelArtsLink) UploadImage(path string) (interface{}, error) { @@ -59,13 +59,7 @@ func (o *ModelArtsLink) QueryImageList() (interface{}, error) { return nil, err } - //转换成统一返回类型 - imgListResp, err := ConvertType[modelarts.ListReposDetailsResp](resp, nil) - if err != nil { - return nil, err - } - - return imgListResp, nil + return resp, nil } func (o *ModelArtsLink) SubmitTask(imageId string, cmd string, envs []string, params []string, resourceId string) (interface{}, error) { @@ -110,13 +104,7 @@ func (o *ModelArtsLink) SubmitTask(imageId string, cmd string, envs []string, pa return nil, err } - //转换成统一返回类型 - submitResp, err := ConvertType[modelarts.CreateTrainingJobResp](resp, nil) - if err != nil { - return nil, err - } - - return submitResp, nil + return resp, nil } func (o *ModelArtsLink) QueryTask(taskId string) (interface{}, error) { @@ -130,13 +118,7 @@ func (o *ModelArtsLink) QueryTask(taskId string) (interface{}, error) { return nil, err } - //转换成统一返回类型 - taskResp, err := ConvertType[modelarts.JobResponse](resp, o.participant) - if err != nil { - return nil, err - } - - return taskResp, nil + return resp, nil } func (o *ModelArtsLink) DeleteTask(taskId string) (interface{}, error) { @@ -150,13 +132,7 @@ func (o *ModelArtsLink) DeleteTask(taskId string) (interface{}, error) { return nil, err } - //转换成统一返回类型 - deleteResp, err := ConvertType[modelarts.DeleteTrainingJobResp](resp, nil) - if err != nil { - return nil, err - } - - return deleteResp, nil + return resp, nil } func (o *ModelArtsLink) QuerySpecs() (interface{}, error) { @@ -169,11 +145,9 @@ func (o *ModelArtsLink) QuerySpecs() (interface{}, error) { return nil, err } - //转换成统一返回类型 - specsResp, err := ConvertType[modelarts.TrainingJobFlavorsResp](resp, o.participant) - if err != nil { - return nil, err - } - - return specsResp, nil + return resp, nil +} + +func (o *ModelArtsLink) GetResourceSpecs() (*collector.ResourceSpecs, error) { + return nil, nil } diff --git a/api/internal/storeLink/octopus.go b/api/internal/storeLink/octopus.go index ea935db2..370ec23c 100644 --- a/api/internal/storeLink/octopus.go +++ b/api/internal/storeLink/octopus.go @@ -16,19 +16,20 @@ package storeLink import ( "context" + "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/scheduler/service/collector" "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/svc" - "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/models" "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/utils" "gitlink.org.cn/jcce-pcm/pcm-participant-octopus/octopus" "strings" ) type OctopusLink struct { - ctx context.Context - svcCtx *svc.ServiceContext - pageIndex int32 - pageSize int32 - participant *models.StorelinkCenter + ctx context.Context + svcCtx *svc.ServiceContext + pageIndex int32 + pageSize int32 + platform string + participantId int64 } const ( @@ -38,14 +39,14 @@ const ( RESOURCE_POOL = "common-pool" ) -func NewOctopusLink(ctx context.Context, svcCtx *svc.ServiceContext, participant *models.StorelinkCenter) *OctopusLink { - return &OctopusLink{ctx: ctx, svcCtx: svcCtx, participant: participant, pageIndex: 1, pageSize: 100} +func NewOctopusLink(ctx context.Context, svcCtx *svc.ServiceContext, name string, id int64) *OctopusLink { + return &OctopusLink{ctx: ctx, svcCtx: svcCtx, platform: name, participantId: id, pageIndex: 1, pageSize: 100} } func (o *OctopusLink) UploadImage(path string) (interface{}, error) { // octopus创建镜像 createReq := &octopus.CreateImageReq{ - Platform: o.participant.Name, + Platform: o.platform, CreateImage: &octopus.CreateImage{ SourceType: 1, ImageName: IMG_NAME_PREFIX + utils.RandomString(7), @@ -59,7 +60,7 @@ func (o *OctopusLink) UploadImage(path string) (interface{}, error) { // octopus上传镜像 uploadReq := &octopus.UploadImageReq{ - Platform: o.participant.Name, + Platform: o.platform, ImageId: createResp.Payload.ImageId, Params: &octopus.UploadImageParam{ Domain: "", @@ -73,19 +74,13 @@ func (o *OctopusLink) UploadImage(path string) (interface{}, error) { // Todo 实际上传 - //转换成统一返回类型 - resp, err := ConvertType[octopus.UploadImageResp](uploadResp, nil) - if err != nil { - return nil, err - } - - return resp, nil + return uploadResp, nil } func (o *OctopusLink) DeleteImage(imageId string) (interface{}, error) { // octopus删除镜像 req := &octopus.DeleteImageReq{ - Platform: o.participant.Name, + Platform: o.platform, ImageId: imageId, } resp, err := o.svcCtx.OctopusRpc.DeleteImage(o.ctx, req) @@ -93,19 +88,13 @@ func (o *OctopusLink) DeleteImage(imageId string) (interface{}, error) { return nil, err } - //转换成统一返回类型 - deleteResp, err := ConvertType[octopus.DeleteImageResp](resp, nil) - if err != nil { - return nil, err - } - - return deleteResp, nil + return resp, nil } func (o *OctopusLink) QueryImageList() (interface{}, error) { // octopus获取镜像列表 req := &octopus.GetUserImageListReq{ - Platform: o.participant.Name, + Platform: o.platform, PageIndex: o.pageIndex, PageSize: o.pageSize, } @@ -114,13 +103,7 @@ func (o *OctopusLink) QueryImageList() (interface{}, error) { return nil, err } - //转换成统一返回类型 - imgListResp, err := ConvertType[octopus.GetUserImageListResp](resp, nil) - if err != nil { - return nil, err - } - - return imgListResp, nil + return resp, nil } func (o *OctopusLink) SubmitTask(imageId string, cmd string, envs []string, params []string, resourceId string) (interface{}, error) { @@ -144,7 +127,7 @@ func (o *OctopusLink) SubmitTask(imageId string, cmd string, envs []string, para } req := &octopus.CreateTrainJobReq{ - Platform: o.participant.Name, + Platform: o.platform, Params: &octopus.CreateTrainJobParam{ ImageId: imageId, Name: TASK_NAME_PREFIX + UNDERSCORE + utils.RandomString(10), @@ -167,19 +150,13 @@ func (o *OctopusLink) SubmitTask(imageId string, cmd string, envs []string, para return nil, err } - //转换成统一返回类型 - submitResp, err := ConvertType[octopus.CreateTrainJobResp](resp, nil) - if err != nil { - return nil, err - } - - return submitResp, nil + return resp, nil } func (o *OctopusLink) QueryTask(taskId string) (interface{}, error) { // octopus获取任务 req := &octopus.GetTrainJobReq{ - Platform: o.participant.Name, + Platform: o.platform, Id: taskId, } resp, err := o.svcCtx.OctopusRpc.GetTrainJob(o.ctx, req) @@ -187,19 +164,13 @@ func (o *OctopusLink) QueryTask(taskId string) (interface{}, error) { return nil, err } - //转换成统一返回类型 - taskResp, err := ConvertType[octopus.GetTrainJobResp](resp, nil) - if err != nil { - return nil, err - } - - return taskResp, nil + return resp, nil } func (o *OctopusLink) DeleteTask(taskId string) (interface{}, error) { // octopus删除任务 req := &octopus.DeleteTrainJobReq{ - Platform: o.participant.Name, + Platform: o.platform, JobIds: []string{taskId}, } resp, err := o.svcCtx.OctopusRpc.DeleteTrainJob(o.ctx, req) @@ -207,19 +178,13 @@ func (o *OctopusLink) DeleteTask(taskId string) (interface{}, error) { return nil, err } - //转换成统一返回类型 - deleteResp, err := ConvertType[octopus.DeleteTrainJobResp](resp, nil) - if err != nil { - return nil, err - } - - return deleteResp, nil + return resp, nil } func (o *OctopusLink) QuerySpecs() (interface{}, error) { // octopus查询资源规格 req := &octopus.GetResourceSpecsReq{ - Platform: o.participant.Name, + Platform: o.platform, ResourcePool: "common-pool", } resp, err := o.svcCtx.OctopusRpc.GetResourceSpecs(o.ctx, req) @@ -227,11 +192,9 @@ func (o *OctopusLink) QuerySpecs() (interface{}, error) { return nil, err } - //转换成统一返回类型 - specsResp, err := ConvertType[octopus.GetResourceSpecsResp](resp, o.participant) - if err != nil { - return nil, err - } - - return specsResp, nil + return resp, nil +} + +func (o *OctopusLink) GetResourceSpecs() (*collector.ResourceSpecs, error) { + return nil, nil } diff --git a/api/internal/storeLink/shuguangHpc.go b/api/internal/storeLink/shuguangHpc.go index 8e226478..7c80b456 100644 --- a/api/internal/storeLink/shuguangHpc.go +++ b/api/internal/storeLink/shuguangHpc.go @@ -7,16 +7,16 @@ import ( "gitlink.org.cn/jcce-pcm/pcm-ac/hpcAC" "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/svc" "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/types" - "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/models" "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/utils" "strconv" "strings" ) type ShuguangHpc struct { - ctx context.Context - svcCtx *svc.ServiceContext - participant *models.StorelinkCenter + ctx context.Context + svcCtx *svc.ServiceContext + platform string + participantId int64 } const ( @@ -128,8 +128,8 @@ type ResourceSpec struct { GAP_NDCU string } -func NewShuguangHpc(ctx context.Context, svcCtx *svc.ServiceContext, participant *models.StorelinkCenter) *ShuguangHpc { - return &ShuguangHpc{ctx: ctx, svcCtx: svcCtx, participant: participant} +func NewShuguangHpc(ctx context.Context, svcCtx *svc.ServiceContext, name string, id int64) *ShuguangHpc { + return &ShuguangHpc{ctx: ctx, svcCtx: svcCtx, platform: name, participantId: id} } func (s ShuguangHpc) UploadImage(path string) (interface{}, error) { @@ -199,13 +199,7 @@ func (s ShuguangHpc) SubmitTask(imageId string, cmd string, envs []string, param return nil, err } - //转换成统一返回类型 - submitResp, err := ConvertType[hpcAC.SubmitJobResp](resp, nil) - if err != nil { - return nil, err - } - - return submitResp, nil + return resp, nil } @@ -221,30 +215,21 @@ func (s ShuguangHpc) QueryTask(taskId string) (interface{}, error) { //实时作业检查是否成功 if respC.Data != nil && respC.Data.JobEndTime != "" { - taskRespC, err := ConvertType[hpcAC.GetJobDetailResp](respC, nil) + return respC, nil + } else { + //历史作业 + reqH := &hpcAC.HistoryJobDetailReq{ + JobId: taskId, + JobmanagerId: strconv.Itoa(StrJobManagerID), + } + + respH, err := s.svcCtx.ACRpc.HistoryJobDetail(s.ctx, reqH) if err != nil { return nil, err } - return taskRespC, nil - } - //历史作业 - reqH := &hpcAC.HistoryJobDetailReq{ - JobId: taskId, - JobmanagerId: strconv.Itoa(StrJobManagerID), + return respH, nil } - - respH, err := s.svcCtx.ACRpc.HistoryJobDetail(s.ctx, reqH) - if err != nil { - return nil, err - } - - taskRespH, err := ConvertType[hpcAC.HistoryJobDetailResp](respH, nil) - if err != nil { - return nil, err - } - - return taskRespH, nil } func (s ShuguangHpc) QuerySpecs() (interface{}, error) { @@ -254,8 +239,8 @@ func (s ShuguangHpc) QuerySpecs() (interface{}, error) { var respec types.ResourceSpecSl respec.SpecId = k respec.SpecName = v - respec.ParticipantId = s.participant.Id - respec.ParticipantName = s.participant.Name + respec.ParticipantId = s.participantId + respec.ParticipantName = s.platform resp.ResourceSpecs = append(resp.ResourceSpecs, &respec) } @@ -273,13 +258,7 @@ func (s ShuguangHpc) DeleteTask(taskId string) (interface{}, error) { return nil, err } - //转换成统一返回类型 - taskResp, err := ConvertType[hpcAC.DeleteJobResp](resp, nil) - if err != nil { - return nil, err - } - - return taskResp, nil + return resp, nil } func updateRequestByResourceId(resourceId string, req *hpcAC.SubmitJobReq) { diff --git a/api/internal/storeLink/shuguangai.go b/api/internal/storeLink/shuguangai.go index d243f7bb..49b2edf2 100644 --- a/api/internal/storeLink/shuguangai.go +++ b/api/internal/storeLink/shuguangai.go @@ -18,16 +18,17 @@ import ( "context" "errors" "gitlink.org.cn/jcce-pcm/pcm-ac/hpcAC" + "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/scheduler/service/collector" "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/svc" - "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/models" "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/utils" "strings" ) type ShuguangAi struct { - ctx context.Context - svcCtx *svc.ServiceContext - participant *models.StorelinkCenter + ctx context.Context + svcCtx *svc.ServiceContext + platform string + participantId int64 } const ( @@ -47,8 +48,8 @@ const ( PythonCodePath = "/work/home/acgnnmfbwo/111111/py/test.py" ) -func NewShuguangAi(ctx context.Context, svcCtx *svc.ServiceContext, participant *models.StorelinkCenter) *ShuguangAi { - return &ShuguangAi{ctx: ctx, svcCtx: svcCtx, participant: participant} +func NewShuguangAi(ctx context.Context, svcCtx *svc.ServiceContext, name string, id int64) *ShuguangAi { + return &ShuguangAi{ctx: ctx, svcCtx: svcCtx, platform: name, participantId: id} } func (s *ShuguangAi) UploadImage(path string) (interface{}, error) { @@ -70,13 +71,7 @@ func (s *ShuguangAi) QueryImageList() (interface{}, error) { return nil, err } - //转换成统一返回类型 - imgListResp, err := ConvertType[hpcAC.GetImageListAiResp](resp, nil) - if err != nil { - return nil, err - } - - return imgListResp, nil + return resp, nil } func (s *ShuguangAi) SubmitTask(imageId string, cmd string, envs []string, params []string, resourceId string) (interface{}, error) { @@ -133,13 +128,7 @@ func (s *ShuguangAi) SubmitTask(imageId string, cmd string, envs []string, param return nil, err } - //转换成统一返回类型 - submitResp, err := ConvertType[hpcAC.SubmitTaskAiResp](resp, nil) - if err != nil { - return nil, err - } - - return submitResp, nil + return resp, nil } func (s *ShuguangAi) QueryTask(taskId string) (interface{}, error) { @@ -152,13 +141,7 @@ func (s *ShuguangAi) QueryTask(taskId string) (interface{}, error) { return nil, err } - //转换成统一返回类型 - taskResp, err := ConvertType[hpcAC.GetPytorchTaskResp](resp, nil) - if err != nil { - return nil, err - } - - return taskResp, nil + return resp, nil } func (s *ShuguangAi) DeleteTask(taskId string) (interface{}, error) { @@ -171,13 +154,7 @@ func (s *ShuguangAi) DeleteTask(taskId string) (interface{}, error) { return nil, err } - //转换成统一返回类型 - deleteResp, err := ConvertType[hpcAC.DeleteTaskAiResp](resp, nil) - if err != nil { - return nil, err - } - - return deleteResp, nil + return resp, nil } func (o *ShuguangAi) QuerySpecs() (interface{}, error) { @@ -191,11 +168,9 @@ func (o *ShuguangAi) QuerySpecs() (interface{}, error) { return nil, err } - //转换成统一返回类型 - specsResp, err := ConvertType[hpcAC.GetResourceSpecResp](specs, o.participant) - if err != nil { - return nil, err - } - - return specsResp, nil + return specs, nil +} + +func (o *ShuguangAi) GetResourceSpecs() (*collector.ResourceSpecs, error) { + return nil, nil } diff --git a/api/internal/storeLink/storeLink.go b/api/internal/storeLink/storeLink.go index 596b70d8..18046da4 100644 --- a/api/internal/storeLink/storeLink.go +++ b/api/internal/storeLink/storeLink.go @@ -65,6 +65,8 @@ var ( "3": SHUGUANGAI, "4": SHUGUANGHPC, } + ERROR_RESP_EMPTY = errors.New("resp empty error") + ERROR_CONVERT_EMPTY = errors.New("convert empty error") ) type StoreLink struct { @@ -74,16 +76,16 @@ type StoreLink struct { func NewStoreLink(ctx context.Context, svcCtx *svc.ServiceContext, participant *models.StorelinkCenter) *StoreLink { switch participant.Type { case TYPE_OCTOPUS: - linkStruct := NewOctopusLink(ctx, svcCtx, participant) + linkStruct := NewOctopusLink(ctx, svcCtx, participant.Name, participant.Id) return &StoreLink{ILinkage: linkStruct} case TYPE_MODELARTS: - linkStruct := NewModelArtsLink(ctx, svcCtx, participant) + linkStruct := NewModelArtsLink(ctx, svcCtx, participant.Name, participant.Id) return &StoreLink{ILinkage: linkStruct} case TYPE_SHUGUANGAI: - linkStruct := NewShuguangAi(ctx, svcCtx, participant) + linkStruct := NewShuguangAi(ctx, svcCtx, participant.Name, participant.Id) return &StoreLink{ILinkage: linkStruct} case TYPE_SHUGUANGHPC: - linkStruct := NewShuguangHpc(ctx, svcCtx, participant) + linkStruct := NewShuguangHpc(ctx, svcCtx, participant.Name, participant.Id) return &StoreLink{ILinkage: linkStruct} default: return nil @@ -102,7 +104,7 @@ func GetParticipantById(partId int64, dbEngin *gorm.DB) *models.StorelinkCenter return &participant } -func ConvertType2[T any, RESP any](in *T, out *RESP, participant *models.StorelinkCenter) (interface{}, error) { +func ConvertType(in interface{}, out interface{}, participant *models.StorelinkCenter) (interface{}, error) { switch (interface{})(in).(type) { case *octopus.UploadImageResp: @@ -121,332 +123,412 @@ func ConvertType2[T any, RESP any](in *T, out *RESP, participant *models.Storeli case *octopus.DeleteImageResp: inresp := (interface{})(in).(*octopus.DeleteImageResp) - var resp types.DeleteLinkImageResp - resp.Success = inresp.Success - if !resp.Success { - resp.ErrorMsg = inresp.Error.Message + switch (interface{})(out).(type) { + case *types.DeleteLinkImageResp: + resp := (interface{})(out).(*types.DeleteLinkImageResp) + resp.Success = inresp.Success + if !resp.Success { + resp.ErrorMsg = inresp.Error.Message + return resp, nil + } return resp, nil } - - return resp, nil + return nil, nil case *octopus.GetUserImageListResp: inresp := (interface{})(in).(*octopus.GetUserImageListResp) - var resp types.GetLinkImageListResp - resp.Success = inresp.Success - if !resp.Success { - resp.ErrorMsg = inresp.Error.Message - resp.Images = nil + switch (interface{})(out).(type) { + case *types.GetLinkImageListResp: + resp := (interface{})(out).(*types.GetLinkImageListResp) + resp.Success = inresp.Success + if !resp.Success { + resp.ErrorMsg = inresp.Error.Message + resp.Images = nil + return resp, nil + } + + for _, v := range inresp.Payload.Images { + var image types.ImageSl + image.ImageId = v.Image.Id + image.ImageName = v.Image.ImageName + image.ImageStatus = OctImgStatus[v.Image.ImageStatus] + resp.Images = append(resp.Images, &image) + } return resp, nil } + return nil, nil - for _, v := range inresp.Payload.Images { - var image types.ImageSl - image.ImageId = v.Image.Id - image.ImageName = v.Image.ImageName - image.ImageStatus = OctImgStatus[v.Image.ImageStatus] - resp.Images = append(resp.Images, &image) - } - return resp, nil case *modelarts.ListReposDetailsResp: inresp := (interface{})(in).(*modelarts.ListReposDetailsResp) - var resp types.GetLinkImageListResp + switch (interface{})(out).(type) { + case *types.GetLinkImageListResp: + resp := (interface{})(out).(*types.GetLinkImageListResp) + if inresp.Errors != nil { + resp.Success = false + resp.ErrorMsg = inresp.Errors[0].ErrorMessage + resp.Images = nil + return resp, nil + } - if inresp.Errors != nil { - resp.Success = false - resp.ErrorMsg = inresp.Errors[0].ErrorMessage - resp.Images = nil + resp.Success = true + for _, v := range inresp.Items { + for _, r := range v.Tags { + var image types.ImageSl + image.ImageId = v.Namespace + "/" + v.Name + ":" + r + image.ImageName = v.Name + image.ImageStatus = "created" + resp.Images = append(resp.Images, &image) + } + } return resp, nil } + return nil, nil - resp.Success = true - for _, v := range inresp.Items { - for _, r := range v.Tags { - var image types.ImageSl - image.ImageId = v.Namespace + "/" + v.Name + ":" + r - image.ImageName = v.Name - image.ImageStatus = "created" - resp.Images = append(resp.Images, &image) - } - } - return resp, nil case *hpcAC.GetImageListAiResp: inresp := (interface{})(in).(*hpcAC.GetImageListAiResp) - var resp types.GetLinkImageListResp - - if inresp.Code == "0" { - resp.Success = true - for _, img := range inresp.Data { - var image types.ImageSl - image.ImageId = img.ImageId - image.ImageName = img.Version - image.ImageStatus = "created" - resp.Images = append(resp.Images, &image) + switch (interface{})(out).(type) { + case *types.GetLinkImageListResp: + resp := (interface{})(out).(*types.GetLinkImageListResp) + if inresp.Code == "0" { + resp.Success = true + for _, img := range inresp.Data { + var image types.ImageSl + image.ImageId = img.ImageId + image.ImageName = img.Version + image.ImageStatus = "created" + resp.Images = append(resp.Images, &image) + } + } else { + resp.Success = false + resp.ErrorMsg = inresp.Msg + resp.Images = nil } - } else { - resp.Success = false - resp.ErrorMsg = inresp.Msg - resp.Images = nil + return resp, nil } - return resp, nil + return nil, nil case *octopus.CreateTrainJobResp: inresp := (interface{})(in).(*octopus.CreateTrainJobResp) - var resp types.SubmitLinkTaskResp + switch (interface{})(out).(type) { + case *types.SubmitLinkTaskResp: + resp := (interface{})(out).(*types.SubmitLinkTaskResp) + resp.Success = inresp.Success + if !resp.Success { + resp.ErrorMsg = inresp.Error.Message + return resp, nil + } + + resp.TaskId = inresp.Payload.JobId - resp.Success = inresp.Success - if !resp.Success { - resp.ErrorMsg = inresp.Error.Message return resp, nil } + return nil, nil - resp.TaskId = inresp.Payload.JobId - - return resp, nil case *modelarts.CreateTrainingJobResp: inresp := (interface{})(in).(*modelarts.CreateTrainingJobResp) - var resp types.SubmitLinkTaskResp + switch (interface{})(out).(type) { + case *types.SubmitLinkTaskResp: + resp := (interface{})(out).(*types.SubmitLinkTaskResp) + if inresp.ErrorMsg != "" { + resp.ErrorMsg = inresp.ErrorMsg + resp.Success = false + return resp, nil + } + resp.Success = true + resp.TaskId = inresp.Metadata.Id - if inresp.ErrorMsg != "" { - resp.ErrorMsg = inresp.ErrorMsg - resp.Success = false return resp, nil } - resp.Success = true - resp.TaskId = inresp.Metadata.Id + return nil, nil - return resp, nil case *hpcAC.SubmitTaskAiResp: inresp := (interface{})(in).(*hpcAC.SubmitTaskAiResp) - var resp types.SubmitLinkTaskResp - - if inresp.Code == "0" { - resp.Success = true - resp.TaskId = inresp.Data - } else { - resp.Success = false - resp.ErrorMsg = inresp.Msg + switch (interface{})(out).(type) { + case *types.SubmitLinkTaskResp: + resp := (interface{})(out).(*types.SubmitLinkTaskResp) + if inresp.Code == "0" { + resp.Success = true + resp.TaskId = inresp.Data + } else { + resp.Success = false + resp.ErrorMsg = inresp.Msg + } + return resp, nil } - return resp, nil + return nil, nil + case *hpcAC.SubmitJobResp: inresp := (interface{})(in).(*hpcAC.SubmitJobResp) - var resp types.SubmitLinkTaskResp - - if inresp.Code == "0" { - resp.Success = true - resp.TaskId = inresp.Data - } else { - resp.Success = false - resp.ErrorMsg = inresp.Msg + switch (interface{})(out).(type) { + case *types.SubmitLinkTaskResp: + resp := (interface{})(out).(*types.SubmitLinkTaskResp) + if inresp.Code == "0" { + resp.Success = true + resp.TaskId = inresp.Data + } else { + resp.Success = false + resp.ErrorMsg = inresp.Msg + } + return resp, nil } - return resp, nil + return nil, nil + case *octopus.GetTrainJobResp: inresp := (interface{})(in).(*octopus.GetTrainJobResp) - var resp types.GetLinkTaskResp + switch (interface{})(out).(type) { + case *types.GetLinkTaskResp: + resp := (interface{})(out).(*types.GetLinkTaskResp) + resp.Success = inresp.Success + if !resp.Success { + resp.ErrorMsg = inresp.Error.Message + return resp, nil + } - resp.Success = inresp.Success - if !resp.Success { - resp.ErrorMsg = inresp.Error.Message + var task types.TaskSl + task.TaskId = inresp.Payload.TrainJob.Id + task.TaskName = inresp.Payload.TrainJob.Name + task.StartedAt = inresp.Payload.TrainJob.StartedAt + task.CompletedAt = inresp.Payload.TrainJob.CompletedAt + task.TaskStatus = inresp.Payload.TrainJob.Status + + resp.Task = &task return resp, nil } + return nil, nil - var task types.TaskSl - task.TaskId = inresp.Payload.TrainJob.Id - task.TaskName = inresp.Payload.TrainJob.Name - task.StartedAt = inresp.Payload.TrainJob.StartedAt - task.CompletedAt = inresp.Payload.TrainJob.CompletedAt - task.TaskStatus = inresp.Payload.TrainJob.Status - - resp.Task = &task - return resp, nil case *modelarts.JobResponse: inresp := (interface{})(in).(*modelarts.JobResponse) - var resp types.GetLinkTaskResp + switch (interface{})(out).(type) { + case *types.GetLinkTaskResp: + resp := (interface{})(out).(*types.GetLinkTaskResp) + if inresp.ErrorMsg != "" { + resp.ErrorMsg = inresp.ErrorMsg + resp.Success = false + return resp, nil + } + resp.Success = true + resp.Task = &types.TaskSl{} + resp.Task.TaskId = inresp.Metadata.Id + resp.Task.TaskName = inresp.Metadata.Name + resp.Task.StartedAt = int64(inresp.Status.StartTime) + resp.Task.CompletedAt = int64(inresp.Status.Duration) + resp.Task.TaskStatus = inresp.Status.Phase - if inresp.ErrorMsg != "" { - resp.ErrorMsg = inresp.ErrorMsg - resp.Success = false return resp, nil } - resp.Success = true - resp.Task = &types.TaskSl{} - resp.Task.TaskId = inresp.Metadata.Id - resp.Task.TaskName = inresp.Metadata.Name - resp.Task.StartedAt = int64(inresp.Status.StartTime) - resp.Task.CompletedAt = int64(inresp.Status.Duration) - resp.Task.TaskStatus = inresp.Status.Phase + return nil, nil - return resp, nil case *hpcAC.GetPytorchTaskResp: inresp := (interface{})(in).(*hpcAC.GetPytorchTaskResp) - var resp types.GetLinkTaskResp + switch (interface{})(out).(type) { + case *types.GetLinkTaskResp: + resp := (interface{})(out).(*types.GetLinkTaskResp) + if inresp.Code == "0" { + resp.Success = true + var task types.TaskSl + task.TaskId = inresp.Data.Id + task.TaskName = inresp.Data.TaskName + task.TaskStatus = inresp.Data.Status + task.StartedAt = timeutils.StringToUnixTime(inresp.Data.StartTime) + task.CompletedAt = timeutils.StringToUnixTime(inresp.Data.EndTime) + resp.Task = &task + } else { + resp.Success = false + resp.ErrorMsg = inresp.Msg + resp.Task = nil + } - if inresp.Code == "0" { - resp.Success = true - var task types.TaskSl - task.TaskId = inresp.Data.Id - task.TaskName = inresp.Data.TaskName - task.TaskStatus = inresp.Data.Status - task.StartedAt = timeutils.StringToUnixTime(inresp.Data.StartTime) - task.CompletedAt = timeutils.StringToUnixTime(inresp.Data.EndTime) - resp.Task = &task - } else { - resp.Success = false - resp.ErrorMsg = inresp.Msg - resp.Task = nil + return resp, nil } + return nil, nil - return resp, nil case *hpcAC.GetJobDetailResp: inresp := (interface{})(in).(*hpcAC.GetJobDetailResp) - var resp types.GetLinkTaskResp + switch (interface{})(out).(type) { + case *types.GetLinkTaskResp: + resp := (interface{})(out).(*types.GetLinkTaskResp) + if inresp.Code == "0" { + resp.Success = true + var task types.TaskSl + task.TaskId = inresp.Data.JobId + task.TaskName = inresp.Data.JobName + task.TaskStatus = AcStatus[inresp.Data.JobStatus] + task.StartedAt = timeutils.StringToUnixTime(inresp.Data.JobStartTime) + task.CompletedAt = timeutils.StringToUnixTime(inresp.Data.JobEndTime) + resp.Task = &task + } else { + resp.Success = false + resp.ErrorMsg = inresp.Msg + resp.Task = nil + } - if inresp.Code == "0" { - resp.Success = true - var task types.TaskSl - task.TaskId = inresp.Data.JobId - task.TaskName = inresp.Data.JobName - task.TaskStatus = AcStatus[inresp.Data.JobStatus] - task.StartedAt = timeutils.StringToUnixTime(inresp.Data.JobStartTime) - task.CompletedAt = timeutils.StringToUnixTime(inresp.Data.JobEndTime) - resp.Task = &task - } else { - resp.Success = false - resp.ErrorMsg = inresp.Msg - resp.Task = nil + return resp, nil } + return nil, nil - return resp, nil case *hpcAC.HistoryJobDetailResp: inresp := (interface{})(in).(*hpcAC.HistoryJobDetailResp) - var resp types.GetLinkTaskResp + switch (interface{})(out).(type) { + case *types.GetLinkTaskResp: + resp := (interface{})(out).(*types.GetLinkTaskResp) + if inresp.Code == "0" { + resp.Success = true + var task types.TaskSl + task.TaskId = inresp.Data.JobId + task.TaskName = inresp.Data.JobName + task.TaskStatus = AcStatus[inresp.Data.JobState] + task.StartedAt = timeutils.StringToUnixTime(inresp.Data.JobStartTime) + task.CompletedAt = timeutils.StringToUnixTime(inresp.Data.JobEndTime) + resp.Task = &task + } else { + resp.Success = false + resp.ErrorMsg = inresp.Msg + resp.Task = nil + } - if inresp.Code == "0" { - resp.Success = true - var task types.TaskSl - task.TaskId = inresp.Data.JobId - task.TaskName = inresp.Data.JobName - task.TaskStatus = AcStatus[inresp.Data.JobState] - task.StartedAt = timeutils.StringToUnixTime(inresp.Data.JobStartTime) - task.CompletedAt = timeutils.StringToUnixTime(inresp.Data.JobEndTime) - resp.Task = &task - } else { - resp.Success = false - resp.ErrorMsg = inresp.Msg - resp.Task = nil + return resp, nil } + return nil, nil - return resp, nil case *octopus.DeleteTrainJobResp: inresp := (interface{})(in).(*octopus.DeleteTrainJobResp) - var resp types.DeleteLinkTaskResp + switch (interface{})(out).(type) { + case *types.DeleteLinkTaskResp: + resp := (interface{})(out).(*types.DeleteLinkTaskResp) + resp.Success = inresp.Success + if !resp.Success { + resp.ErrorMsg = inresp.Error.Message + return resp, nil + } - resp.Success = inresp.Success - if !resp.Success { - resp.ErrorMsg = inresp.Error.Message return resp, nil } + return nil, nil - return resp, nil case *modelarts.DeleteTrainingJobResp: inresp := (interface{})(in).(*modelarts.DeleteTrainingJobResp) - var resp types.DeleteLinkTaskResp - - if inresp.ErrorMsg != "" { - resp.ErrorMsg = inresp.ErrorMsg - resp.Success = false + switch (interface{})(out).(type) { + case *types.DeleteLinkTaskResp: + resp := (interface{})(out).(*types.DeleteLinkTaskResp) + if inresp.ErrorMsg != "" { + resp.ErrorMsg = inresp.ErrorMsg + resp.Success = false + return resp, nil + } + resp.Success = true return resp, nil } - resp.Success = true - return resp, nil + return nil, nil + case *hpcAC.DeleteTaskAiResp: inresp := (interface{})(in).(*hpcAC.DeleteTaskAiResp) - var resp types.DeleteLinkTaskResp - - if inresp.Code == "0" { - resp.Success = true - } else { - resp.Success = false - resp.ErrorMsg = inresp.Msg + switch (interface{})(out).(type) { + case *types.DeleteLinkTaskResp: + resp := (interface{})(out).(*types.DeleteLinkTaskResp) + if inresp.Code == "0" { + resp.Success = true + } else { + resp.Success = false + resp.ErrorMsg = inresp.Msg + } + return resp, nil } - return resp, nil + return nil, nil + case *hpcAC.DeleteJobResp: inresp := (interface{})(in).(*hpcAC.DeleteJobResp) - var resp types.DeleteLinkTaskResp - - if inresp.Code == "0" { - resp.Success = true - } else { - resp.Success = false - resp.ErrorMsg = inresp.Msg + switch (interface{})(out).(type) { + case *types.DeleteLinkTaskResp: + resp := (interface{})(out).(*types.DeleteLinkTaskResp) + if inresp.Code == "0" { + resp.Success = true + } else { + resp.Success = false + resp.ErrorMsg = inresp.Msg + } + return resp, nil } - return resp, nil + return nil, nil + case *octopus.GetResourceSpecsResp: inresp := (interface{})(in).(*octopus.GetResourceSpecsResp) - var resp types.GetResourceSpecsResp + switch (interface{})(out).(type) { + case *types.GetResourceSpecsResp: + resp := (interface{})(out).(*types.GetResourceSpecsResp) + resp.Success = inresp.Success + if !resp.Success { + resp.ResourceSpecs = nil + return resp, nil + } + + for _, spec := range inresp.TrainResourceSpecs { + var respec types.ResourceSpecSl + respec.SpecId = spec.Id + respec.SpecName = spec.Name + respec.ParticipantId = participant.Id + respec.ParticipantName = participant.Name + respec.SpecPrice = spec.Price + resp.ResourceSpecs = append(resp.ResourceSpecs, &respec) + } - resp.Success = inresp.Success - if !resp.Success { - resp.ResourceSpecs = nil return resp, nil } + return nil, nil - for _, spec := range inresp.TrainResourceSpecs { - var respec types.ResourceSpecSl - respec.SpecId = spec.Id - respec.SpecName = spec.Name - respec.ParticipantId = participant.Id - respec.ParticipantName = participant.Name - respec.SpecPrice = spec.Price - resp.ResourceSpecs = append(resp.ResourceSpecs, &respec) - } - - return resp, nil case *hpcAC.GetResourceSpecResp: inresp := (interface{})(in).(*hpcAC.GetResourceSpecResp) - var resp types.GetResourceSpecsResp - - if inresp.Code != "0" { - resp.Success = false - resp.ResourceSpecs = nil - } else { - var spec types.ResourceSpecSl - resp.Success = true - spec.ParticipantName = participant.Name - spec.ParticipantId = participant.Id - spec.SpecName = SHUGUANGAI_CUSTOM_RESOURCE_NAME - spec.SpecId = SHUGUANGAI_CUSTOM_RESOURCE_ID - resp.ResourceSpecs = append(resp.ResourceSpecs, &spec) - } - return resp, nil - case *modelarts.TrainingJobFlavorsResp: - inresp := (interface{})(in).(*modelarts.TrainingJobFlavorsResp) - var resp types.GetResourceSpecsResp - resp.Success = true - - if inresp.Flavors == nil { - resp.Success = false - resp.ResourceSpecs = nil + switch (interface{})(out).(type) { + case *types.GetResourceSpecsResp: + resp := (interface{})(out).(*types.GetResourceSpecsResp) + if inresp.Code != "0" { + resp.Success = false + resp.ResourceSpecs = nil + } else { + var spec types.ResourceSpecSl + resp.Success = true + spec.ParticipantName = participant.Name + spec.ParticipantId = participant.Id + spec.SpecName = SHUGUANGAI_CUSTOM_RESOURCE_NAME + spec.SpecId = SHUGUANGAI_CUSTOM_RESOURCE_ID + resp.ResourceSpecs = append(resp.ResourceSpecs, &spec) + } return resp, nil } + return nil, nil - for _, spec := range inresp.Flavors { - var respec types.ResourceSpecSl - respec.SpecId = spec.FlavorId - respec.SpecName = spec.FlavorName - respec.ParticipantId = participant.Id - respec.ParticipantName = participant.Name - respec.SpecPrice = 0 - resp.ResourceSpecs = append(resp.ResourceSpecs, &respec) + case *modelarts.TrainingJobFlavorsResp: + inresp := (interface{})(in).(*modelarts.TrainingJobFlavorsResp) + switch (interface{})(out).(type) { + case *types.GetResourceSpecsResp: + resp := (interface{})(out).(*types.GetResourceSpecsResp) + resp.Success = true + + if inresp.Flavors == nil { + resp.Success = false + resp.ResourceSpecs = nil + return resp, nil + } + + for _, spec := range inresp.Flavors { + var respec types.ResourceSpecSl + respec.SpecId = spec.FlavorId + respec.SpecName = spec.FlavorName + respec.ParticipantId = participant.Id + respec.ParticipantName = participant.Name + respec.SpecPrice = 0 + resp.ResourceSpecs = append(resp.ResourceSpecs, &respec) + } + + return resp, nil } + return nil, nil - return resp, nil default: return nil, errors.New("type convert fail") } } -func ConvertType[T any](in *T, participant *models.StorelinkCenter) (interface{}, error) { +func ConvertTypeOld[T any](in *T, participant *models.StorelinkCenter) (interface{}, error) { switch (interface{})(in).(type) { case *octopus.UploadImageResp: From 4cf1e23cf96aca1b53c6d471f2b7c1a20e4929f7 Mon Sep 17 00:00:00 2001 From: Jo Yang Date: Mon, 29 Jan 2024 08:54:43 +0800 Subject: [PATCH 12/18] static weight algorithm Former-commit-id: 2c1e37f6fb9226e9069ee1e1d4ec77c3cf91c11c --- api/internal/scheduler/entity/entity.go | 6 ++ .../scheduler/strategy/staticWeight.go | 61 ++++++++++++++++++- 2 files changed, 66 insertions(+), 1 deletion(-) diff --git a/api/internal/scheduler/entity/entity.go b/api/internal/scheduler/entity/entity.go index 33e48dba..7f5f6951 100644 --- a/api/internal/scheduler/entity/entity.go +++ b/api/internal/scheduler/entity/entity.go @@ -11,3 +11,9 @@ type Participant struct { Name string Participant_id int64 } + +type WeightP struct { + Participant_id int64 + Weight int32 + Name string +} diff --git a/api/internal/scheduler/strategy/staticWeight.go b/api/internal/scheduler/strategy/staticWeight.go index 3aa5d769..b028baaa 100644 --- a/api/internal/scheduler/strategy/staticWeight.go +++ b/api/internal/scheduler/strategy/staticWeight.go @@ -1,10 +1,69 @@ package strategy +import ( + "errors" + "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/scheduler/entity" +) + type StaticWeightStrategy struct { // TODO: add fields + + //每个 + num int32 + weights []entity.WeightP } func (ps *StaticWeightStrategy) Schedule() ([]*AssignedCluster, error) { // TODO: implement the scheduling logic return nil, nil - return nil, nil + + if ps.num < 1 { + return nil, errors.New("numbers must be greater than 0") + } + + if ps.weights == nil { + return nil, errors.New("weight must be set") + } + + var weightSum int32 + weightSum = 0 + for _, w := range ps.weights { + weightSum += w.Weight + } + + weightRatio := make([]float64, len(ps.weights)) + for i, w := range ps.weights { + weightRatio[i] = float64(w.Weight) / float64(weightSum) + } + + var rest = ps.num + var results []*AssignedCluster + + for i := 0; i < len(ps.weights); i++ { + + var n = int(float64(ps.num) * weightRatio[i]) + rest -= int32(n) + + cluster := &AssignedCluster{ParticipantId: ps.weights[i].Participant_id, Name: ps.weights[i].Name, Replicas: int32(n)} + results = append(results, cluster) + } + + if rest != 0 { + if rest < 0 { // 如果差值小于0,需要增加某些元素的值 + for i := len(ps.weights) - 1; rest < 0 && i >= 0; i-- { + if results[i].Replicas < ps.weights[i].Weight { + results[i].Replicas++ + rest++ + } + } + } else { + for i := len(ps.weights) - 1; rest > 0 && i >= 0; i-- { + if results[i].Replicas < ps.weights[i].Weight { + results[i].Replicas-- + rest-- + } + } + } + } + + return results, nil } From 7d91e83a4a4be6593a79733848eb34e770a4ca6c Mon Sep 17 00:00:00 2001 From: tzwang Date: Mon, 29 Jan 2024 17:42:07 +0800 Subject: [PATCH 13/18] added options for the strategy and the scheduler Former-commit-id: a81ff31b7c3b6634496fa453bdd439542e964848 --- api/internal/scheduler/scheduler.go | 4 ++-- .../scheduler/schedulers/aiScheduler.go | 15 ++++++++++++--- .../scheduler/schedulers/option/aiOption.go | 17 +++++++++++++++++ .../scheduler/service/executor/aiExecutor.go | 11 +++++++++++ .../scheduler/service/executor/executor.go | 8 -------- .../scheduler/strategy/dynamicResources.go | 4 ++++ .../scheduler/strategy/option/option.go | 5 +++++ .../strategy/option/resourcePricingOption.go | 4 ++++ api/internal/scheduler/strategy/strategy.go | 3 --- api/internal/storeLink/modelarts.go | 5 +++++ api/internal/storeLink/octopus.go | 5 +++++ api/internal/storeLink/shuguangai.go | 5 +++++ 12 files changed, 70 insertions(+), 16 deletions(-) create mode 100644 api/internal/scheduler/schedulers/option/aiOption.go create mode 100644 api/internal/scheduler/service/executor/aiExecutor.go delete mode 100644 api/internal/scheduler/service/executor/executor.go create mode 100644 api/internal/scheduler/strategy/option/option.go create mode 100644 api/internal/scheduler/strategy/option/resourcePricingOption.go diff --git a/api/internal/scheduler/scheduler.go b/api/internal/scheduler/scheduler.go index e63a3873..5788b8b3 100644 --- a/api/internal/scheduler/scheduler.go +++ b/api/internal/scheduler/scheduler.go @@ -38,7 +38,7 @@ type Scheduler struct { participantRpc participantservice.ParticipantService ResourceCollector *map[string]collector.ResourceCollector Storages database.Storage - AiExecutor *map[string]executor.Executor + AiExecutor *map[string]executor.AiExecutor } func NewScheduler(subSchedule common.SubSchedule, val string, dbEngin *gorm.DB, participantRpc participantservice.ParticipantService) (*Scheduler, error) { @@ -50,7 +50,7 @@ func NewScheduler(subSchedule common.SubSchedule, val string, dbEngin *gorm.DB, return &Scheduler{task: task, subSchedule: subSchedule, dbEngin: dbEngin, participantRpc: participantRpc}, nil } -func NewScheduler2(resourceCollector *map[string]collector.ResourceCollector, storages database.Storage, aiExecutor *map[string]executor.Executor) *Scheduler { +func NewScheduler2(resourceCollector *map[string]collector.ResourceCollector, storages database.Storage, aiExecutor *map[string]executor.AiExecutor) *Scheduler { return &Scheduler{ResourceCollector: resourceCollector, Storages: storages, AiExecutor: aiExecutor} } diff --git a/api/internal/scheduler/schedulers/aiScheduler.go b/api/internal/scheduler/schedulers/aiScheduler.go index 5f9f2d22..300c6613 100644 --- a/api/internal/scheduler/schedulers/aiScheduler.go +++ b/api/internal/scheduler/schedulers/aiScheduler.go @@ -19,6 +19,7 @@ import ( "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/scheduler" "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/scheduler/algorithm/providerPricing" "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/scheduler/entity" + "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/scheduler/schedulers/option" "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/scheduler/service/collector" "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/scheduler/strategy" "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/pkg/response" @@ -30,6 +31,7 @@ type AiScheduler struct { yamlString string task *response.TaskInfo *scheduler.Scheduler + option option.AiOption } func NewAiScheduler(val string, scheduler *scheduler.Scheduler) (*AiScheduler, error) { @@ -48,7 +50,7 @@ func (as *AiScheduler) GetNewStructForDb(task *response.TaskInfo, resource strin } func (as *AiScheduler) PickOptimalStrategy() (strategy.Strategy, error) { - resources, err := as.findProvidersWithResource() + resources, err := as.findClustersWithResource() if err != nil { return nil, err } @@ -83,12 +85,19 @@ func (as *AiScheduler) AssignTask(clusters []*strategy.AssignedCluster) error { return errors.New("clusters is nil") } - _ = *as.AiExecutor + executorMap := *as.AiExecutor + for _, cluster := range clusters { + _, err := executorMap[cluster.Name].Execute(option.AiOption{}) + if err != nil { + // TODO: database operation + } + // TODO: database operation + } return nil } -func (as *AiScheduler) findProvidersWithResource() ([]*collector.ResourceSpecs, error) { +func (as *AiScheduler) findClustersWithResource() ([]*collector.ResourceSpecs, error) { var resourceSpecs []*collector.ResourceSpecs for _, resourceCollector := range *as.ResourceCollector { spec, err := resourceCollector.GetResourceSpecs() diff --git a/api/internal/scheduler/schedulers/option/aiOption.go b/api/internal/scheduler/schedulers/option/aiOption.go new file mode 100644 index 00000000..2d45383c --- /dev/null +++ b/api/internal/scheduler/schedulers/option/aiOption.go @@ -0,0 +1,17 @@ +package option + +type AiOption struct { + aiType string // shuguangAi/octopus + resourceType string // cpu/gpu/compute card + taskType string // pytorch/tensorflow + + imageId string + specId string + datasetsId string + codeId string + + cmd string + + datasets string + code string +} diff --git a/api/internal/scheduler/service/executor/aiExecutor.go b/api/internal/scheduler/service/executor/aiExecutor.go new file mode 100644 index 00000000..a52ab062 --- /dev/null +++ b/api/internal/scheduler/service/executor/aiExecutor.go @@ -0,0 +1,11 @@ +package executor + +import ( + "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/scheduler/schedulers/option" + "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/storeLink" +) + +type AiExecutor interface { + Execute(option option.AiOption) (interface{}, error) + storeLink.Linkage +} diff --git a/api/internal/scheduler/service/executor/executor.go b/api/internal/scheduler/service/executor/executor.go deleted file mode 100644 index 917e45ca..00000000 --- a/api/internal/scheduler/service/executor/executor.go +++ /dev/null @@ -1,8 +0,0 @@ -package executor - -type Executor interface { - QueryImageList() (interface{}, error) - SubmitTask(imageId string, cmd string, envs []string, params []string, resourceId string) (interface{}, error) - QueryTask(taskId string) (interface{}, error) - QuerySpecs() (interface{}, error) -} diff --git a/api/internal/scheduler/strategy/dynamicResources.go b/api/internal/scheduler/strategy/dynamicResources.go index 579333f2..4bb4a83e 100644 --- a/api/internal/scheduler/strategy/dynamicResources.go +++ b/api/internal/scheduler/strategy/dynamicResources.go @@ -2,3 +2,7 @@ package strategy type DynamicResourcesStrategy struct { } + +func (ps *DynamicResourcesStrategy) Schedule() ([]*AssignedCluster, error) { + return nil, nil +} diff --git a/api/internal/scheduler/strategy/option/option.go b/api/internal/scheduler/strategy/option/option.go new file mode 100644 index 00000000..9b6328f5 --- /dev/null +++ b/api/internal/scheduler/strategy/option/option.go @@ -0,0 +1,5 @@ +package option + +type Option interface { + GetOption() (interface{}, error) +} diff --git a/api/internal/scheduler/strategy/option/resourcePricingOption.go b/api/internal/scheduler/strategy/option/resourcePricingOption.go new file mode 100644 index 00000000..dcf2920a --- /dev/null +++ b/api/internal/scheduler/strategy/option/resourcePricingOption.go @@ -0,0 +1,4 @@ +package option + +type ResourcePricingOption struct { +} diff --git a/api/internal/scheduler/strategy/strategy.go b/api/internal/scheduler/strategy/strategy.go index 1502dc21..af23fbf2 100644 --- a/api/internal/scheduler/strategy/strategy.go +++ b/api/internal/scheduler/strategy/strategy.go @@ -9,6 +9,3 @@ type AssignedCluster struct { Name string Replicas int32 } - -type Options struct { -} diff --git a/api/internal/storeLink/modelarts.go b/api/internal/storeLink/modelarts.go index 350bd0ec..31489205 100644 --- a/api/internal/storeLink/modelarts.go +++ b/api/internal/storeLink/modelarts.go @@ -16,6 +16,7 @@ package storeLink import ( "context" + "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/scheduler/schedulers/option" "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/scheduler/service/collector" "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/svc" "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/utils" @@ -151,3 +152,7 @@ func (o *ModelArtsLink) QuerySpecs() (interface{}, error) { func (o *ModelArtsLink) GetResourceSpecs() (*collector.ResourceSpecs, error) { return nil, nil } + +func (o *ModelArtsLink) Execute(option option.AiOption) (interface{}, error) { + return nil, nil +} diff --git a/api/internal/storeLink/octopus.go b/api/internal/storeLink/octopus.go index 370ec23c..b40da2ee 100644 --- a/api/internal/storeLink/octopus.go +++ b/api/internal/storeLink/octopus.go @@ -16,6 +16,7 @@ package storeLink import ( "context" + "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/scheduler/schedulers/option" "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/scheduler/service/collector" "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/svc" "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/utils" @@ -198,3 +199,7 @@ func (o *OctopusLink) QuerySpecs() (interface{}, error) { func (o *OctopusLink) GetResourceSpecs() (*collector.ResourceSpecs, error) { return nil, nil } + +func (o *OctopusLink) Execute(option option.AiOption) (interface{}, error) { + return nil, nil +} diff --git a/api/internal/storeLink/shuguangai.go b/api/internal/storeLink/shuguangai.go index 49b2edf2..ad84315d 100644 --- a/api/internal/storeLink/shuguangai.go +++ b/api/internal/storeLink/shuguangai.go @@ -18,6 +18,7 @@ import ( "context" "errors" "gitlink.org.cn/jcce-pcm/pcm-ac/hpcAC" + "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/scheduler/schedulers/option" "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/scheduler/service/collector" "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/svc" "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/utils" @@ -174,3 +175,7 @@ func (o *ShuguangAi) QuerySpecs() (interface{}, error) { func (o *ShuguangAi) GetResourceSpecs() (*collector.ResourceSpecs, error) { return nil, nil } + +func (o *ShuguangAi) Execute(option option.AiOption) (interface{}, error) { + return nil, nil +} From 125b94cf9c42af614918fa987abbfebd4acc90f7 Mon Sep 17 00:00:00 2001 From: tzwang Date: Tue, 30 Jan 2024 17:33:48 +0800 Subject: [PATCH 14/18] updated strategies of the scheduler and their params Former-commit-id: c301ea7fef461e20cbe801947fa9551cbcff0caf --- api/internal/scheduler/scheduler.go | 2 + .../scheduler/schedulers/aiScheduler.go | 2 +- api/internal/scheduler/service/aiService.go | 4 +- .../scheduler/strategy/option/option.go | 5 -- .../strategy/option/resourcePricingOption.go | 4 - .../scheduler/strategy/params/params.go | 9 ++ .../strategy/params/replicationParams.go | 16 ++++ .../strategy/params/resourcePricingParams.go | 26 ++++++ .../scheduler/strategy/replication.go | 7 +- .../scheduler/strategy/staticWeight.go | 6 ++ .../scheduler/strategy/test/strategy_test.go | 90 +++++++++++++++++++ 11 files changed, 156 insertions(+), 15 deletions(-) delete mode 100644 api/internal/scheduler/strategy/option/option.go delete mode 100644 api/internal/scheduler/strategy/option/resourcePricingOption.go create mode 100644 api/internal/scheduler/strategy/params/params.go create mode 100644 api/internal/scheduler/strategy/params/replicationParams.go create mode 100644 api/internal/scheduler/strategy/params/resourcePricingParams.go create mode 100644 api/internal/scheduler/strategy/test/strategy_test.go diff --git a/api/internal/scheduler/scheduler.go b/api/internal/scheduler/scheduler.go index 5788b8b3..e791c9a0 100644 --- a/api/internal/scheduler/scheduler.go +++ b/api/internal/scheduler/scheduler.go @@ -27,6 +27,7 @@ import ( "gorm.io/gorm" "sigs.k8s.io/yaml" "strings" + "sync" ) type Scheduler struct { @@ -39,6 +40,7 @@ type Scheduler struct { ResourceCollector *map[string]collector.ResourceCollector Storages database.Storage AiExecutor *map[string]executor.AiExecutor + mu sync.RWMutex } func NewScheduler(subSchedule common.SubSchedule, val string, dbEngin *gorm.DB, participantRpc participantservice.ParticipantService) (*Scheduler, error) { diff --git a/api/internal/scheduler/schedulers/aiScheduler.go b/api/internal/scheduler/schedulers/aiScheduler.go index 300c6613..68fc15f1 100644 --- a/api/internal/scheduler/schedulers/aiScheduler.go +++ b/api/internal/scheduler/schedulers/aiScheduler.go @@ -63,7 +63,7 @@ func (as *AiScheduler) PickOptimalStrategy() (strategy.Strategy, error) { Name: resource.Name, }) } - strategy := strategy.NewReplicationStrategy(nil, 0) + strategy := strategy.NewReplicationStrategy(nil) return strategy, nil } diff --git a/api/internal/scheduler/service/aiService.go b/api/internal/scheduler/service/aiService.go index 3b46596b..29ab4653 100644 --- a/api/internal/scheduler/service/aiService.go +++ b/api/internal/scheduler/service/aiService.go @@ -24,8 +24,8 @@ var ( } ) -func InitAiClusterMap(ctx context.Context, svcCtx *svc.ServiceContext) (*map[string]executor.Executor, *map[string]collector.ResourceCollector) { - executorMap := make(map[string]executor.Executor) +func InitAiClusterMap(ctx context.Context, svcCtx *svc.ServiceContext) (*map[string]executor.AiExecutor, *map[string]collector.ResourceCollector) { + executorMap := make(map[string]executor.AiExecutor) collectorMap := make(map[string]collector.ResourceCollector) for k, v := range AiTypeMap { switch v { diff --git a/api/internal/scheduler/strategy/option/option.go b/api/internal/scheduler/strategy/option/option.go deleted file mode 100644 index 9b6328f5..00000000 --- a/api/internal/scheduler/strategy/option/option.go +++ /dev/null @@ -1,5 +0,0 @@ -package option - -type Option interface { - GetOption() (interface{}, error) -} diff --git a/api/internal/scheduler/strategy/option/resourcePricingOption.go b/api/internal/scheduler/strategy/option/resourcePricingOption.go deleted file mode 100644 index dcf2920a..00000000 --- a/api/internal/scheduler/strategy/option/resourcePricingOption.go +++ /dev/null @@ -1,4 +0,0 @@ -package option - -type ResourcePricingOption struct { -} diff --git a/api/internal/scheduler/strategy/params/params.go b/api/internal/scheduler/strategy/params/params.go new file mode 100644 index 00000000..44c29f13 --- /dev/null +++ b/api/internal/scheduler/strategy/params/params.go @@ -0,0 +1,9 @@ +package params + +import ( + "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/scheduler/service/collector" +) + +type Params struct { + resources []*collector.ResourceSpecs +} diff --git a/api/internal/scheduler/strategy/params/replicationParams.go b/api/internal/scheduler/strategy/params/replicationParams.go new file mode 100644 index 00000000..7adfaea0 --- /dev/null +++ b/api/internal/scheduler/strategy/params/replicationParams.go @@ -0,0 +1,16 @@ +package params + +import "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/scheduler/entity" + +type ReplicationOption struct { + replicas int32 + participants []entity.Participant +} + +func (o *ReplicationOption) GetReplicas() int32 { + return o.replicas +} + +func (o *ReplicationOption) GetParticipants() []entity.Participant { + return o.participants +} diff --git a/api/internal/scheduler/strategy/params/resourcePricingParams.go b/api/internal/scheduler/strategy/params/resourcePricingParams.go new file mode 100644 index 00000000..10fe7bd6 --- /dev/null +++ b/api/internal/scheduler/strategy/params/resourcePricingParams.go @@ -0,0 +1,26 @@ +package params + +import ( + "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/scheduler/algorithm/providerPricing" +) + +type ResourcePricingOption struct { + replicas int32 + task *providerPricing.Task + providers []*providerPricing.Provider + *Params +} + +func NewResourcePricingOption(params *Params) *ResourcePricingOption { + return &ResourcePricingOption{ + Params: params, + } +} + +func (r *ResourcePricingOption) GetReplicas() int32 { + return r.replicas +} + +func (r *ResourcePricingOption) GetProviders() []*providerPricing.Provider { + return r.providers +} diff --git a/api/internal/scheduler/strategy/replication.go b/api/internal/scheduler/strategy/replication.go index 88ecd6fb..3f9549b9 100644 --- a/api/internal/scheduler/strategy/replication.go +++ b/api/internal/scheduler/strategy/replication.go @@ -3,6 +3,7 @@ package strategy import ( "github.com/pkg/errors" "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/scheduler/entity" + "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/scheduler/strategy/params" ) type ReplicationStrategy struct { @@ -10,9 +11,9 @@ type ReplicationStrategy struct { participants []entity.Participant } -func NewReplicationStrategy(participants []entity.Participant, replicas int32) *ReplicationStrategy { - return &ReplicationStrategy{replicas: replicas, - participants: participants, +func NewReplicationStrategy(params *params.ReplicationOption) *ReplicationStrategy { + return &ReplicationStrategy{replicas: params.GetReplicas(), + participants: params.GetParticipants(), } } diff --git a/api/internal/scheduler/strategy/staticWeight.go b/api/internal/scheduler/strategy/staticWeight.go index b028baaa..5215771d 100644 --- a/api/internal/scheduler/strategy/staticWeight.go +++ b/api/internal/scheduler/strategy/staticWeight.go @@ -13,6 +13,12 @@ type StaticWeightStrategy struct { weights []entity.WeightP } +func NewStaticWeightStrategy(weights []entity.WeightP, replicas int32) *StaticWeightStrategy { + return &StaticWeightStrategy{weights: weights, + num: replicas, + } +} + func (ps *StaticWeightStrategy) Schedule() ([]*AssignedCluster, error) { // TODO: implement the scheduling logic return nil, nil diff --git a/api/internal/scheduler/strategy/test/strategy_test.go b/api/internal/scheduler/strategy/test/strategy_test.go new file mode 100644 index 00000000..767d71c4 --- /dev/null +++ b/api/internal/scheduler/strategy/test/strategy_test.go @@ -0,0 +1,90 @@ +package test + +import ( + "fmt" + "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/scheduler/entity" + "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/scheduler/strategy" + "testing" +) + +func TestReplication(t *testing.T) { + parts := []entity.Participant{ + {Name: "test1", Participant_id: 1}, + {Name: "test2", Participant_id: 2}, + {Name: "test3", Participant_id: 3}, + } + tests := []struct { + name string + replica int32 + ps []entity.Participant + }{ + { + name: "test1", + replica: 1, + ps: parts, + }, + { + name: "test2", + replica: 2, + ps: parts, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + repl := strategy.NewReplicationStrategy(nil) + schedule, err := repl.Schedule() + if err != nil { + return + } + for _, cluster := range schedule { + fmt.Println(cluster) + } + + }) + } + +} + +func TestStaticWeight(t *testing.T) { + parts := []entity.WeightP{ + {Name: "p1", Participant_id: 1, Weight: 3}, + {Name: "p2", Participant_id: 2, Weight: 5}, + {Name: "p3", Participant_id: 3, Weight: 2}, + } + tests := []struct { + name string + replica int32 + ps []entity.WeightP + }{ + { + name: "test1", + replica: 1, + ps: parts, + }, + { + name: "test2", + replica: 5, + ps: parts, + }, + { + name: "test2", + replica: 6, + ps: parts, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + repl := strategy.NewStaticWeightStrategy(tt.ps, tt.replica) + schedule, err := repl.Schedule() + if err != nil { + return + } + for _, cluster := range schedule { + fmt.Println(cluster) + } + + }) + } +} From b04a3f1255a418b9d812959c2d5a424a1f53fd0d Mon Sep 17 00:00:00 2001 From: tzwang Date: Wed, 31 Jan 2024 11:24:00 +0800 Subject: [PATCH 15/18] updated aiScheduler Former-commit-id: 9b8d271bb13c6cfb7273ec76084ce8c123fbe3be --- .../scheduler/schedulers/aiScheduler.go | 31 ++++++------------ .../scheduler/schedulers/cloudScheduler.go | 8 ++--- .../strategy/{params => param}/params.go | 4 +-- .../scheduler/strategy/param/replication.go | 23 +++++++++++++ .../strategy/param/resourcePricing.go | 32 +++++++++++++++++++ .../strategy/params/replicationParams.go | 16 ---------- .../strategy/params/resourcePricingParams.go | 26 --------------- .../scheduler/strategy/replication.go | 6 ++-- .../scheduler/strategy/resourcePricing.go | 5 ++- .../scheduler/strategy/test/strategy_test.go | 18 ++++++++++- 10 files changed, 93 insertions(+), 76 deletions(-) rename api/internal/scheduler/strategy/{params => param}/params.go (69%) create mode 100644 api/internal/scheduler/strategy/param/replication.go create mode 100644 api/internal/scheduler/strategy/param/resourcePricing.go delete mode 100644 api/internal/scheduler/strategy/params/replicationParams.go delete mode 100644 api/internal/scheduler/strategy/params/resourcePricingParams.go diff --git a/api/internal/scheduler/schedulers/aiScheduler.go b/api/internal/scheduler/schedulers/aiScheduler.go index 68fc15f1..4310bd46 100644 --- a/api/internal/scheduler/schedulers/aiScheduler.go +++ b/api/internal/scheduler/schedulers/aiScheduler.go @@ -17,11 +17,10 @@ package schedulers import ( "errors" "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/scheduler" - "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/scheduler/algorithm/providerPricing" - "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/scheduler/entity" "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/scheduler/schedulers/option" "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/scheduler/service/collector" "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/scheduler/strategy" + "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/scheduler/strategy/param" "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/pkg/response" "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/models" "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/utils" @@ -50,36 +49,24 @@ func (as *AiScheduler) GetNewStructForDb(task *response.TaskInfo, resource strin } func (as *AiScheduler) PickOptimalStrategy() (strategy.Strategy, error) { - resources, err := as.findClustersWithResource() + resources, err := as.findClustersWithResources() if err != nil { return nil, err } + if len(resources) == 0 { + return nil, errors.New("no cluster has resources") + } + params := ¶m.Params{Resources: resources} if len(resources) < 2 /*|| as.task */ { - var pros []entity.Participant - for _, resource := range resources { - pros = append(pros, entity.Participant{ - Participant_id: resource.ParticipantId, - Name: resource.Name, - }) - } - strategy := strategy.NewReplicationStrategy(nil) + strategy := strategy.NewReplicationStrategy(¶m.ReplicationParams{Params: params /*, Replicas: 1*/}) return strategy, nil } - task, providerList := as.genTaskAndProviders() - if err != nil { - return nil, nil - } - strategy := strategy.NewPricingStrategy(task, providerList...) + strategy := strategy.NewPricingStrategy(¶m.ResourcePricingParams{Params: params}) return strategy, nil } -func (as *AiScheduler) genTaskAndProviders() (*providerPricing.Task, []*providerPricing.Provider) { - - return nil, nil -} - func (as *AiScheduler) AssignTask(clusters []*strategy.AssignedCluster) error { if clusters == nil { return errors.New("clusters is nil") @@ -97,7 +84,7 @@ func (as *AiScheduler) AssignTask(clusters []*strategy.AssignedCluster) error { return nil } -func (as *AiScheduler) findClustersWithResource() ([]*collector.ResourceSpecs, error) { +func (as *AiScheduler) findClustersWithResources() ([]*collector.ResourceSpecs, error) { var resourceSpecs []*collector.ResourceSpecs for _, resourceCollector := range *as.ResourceCollector { spec, err := resourceCollector.GetResourceSpecs() diff --git a/api/internal/scheduler/schedulers/cloudScheduler.go b/api/internal/scheduler/schedulers/cloudScheduler.go index 6d9fcbe1..6024be02 100644 --- a/api/internal/scheduler/schedulers/cloudScheduler.go +++ b/api/internal/scheduler/schedulers/cloudScheduler.go @@ -19,6 +19,7 @@ import ( "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/scheduler/algorithm/providerPricing" "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/scheduler/database" "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/scheduler/strategy" + "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/scheduler/strategy/param" "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/pkg/response" "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/models" "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/utils" @@ -38,12 +39,9 @@ func NewCloudScheduler() *CloudScheduler { } func (cs *CloudScheduler) PickOptimalStrategy() (strategy.Strategy, error) { - task, providerList, err := cs.genTaskAndProviders() - if err != nil { - return nil, nil - } + //获取所有计算中心 //调度算法 - strategy := strategy.NewPricingStrategy(task, providerList...) + strategy := strategy.NewPricingStrategy(¶m.ResourcePricingParams{}) return strategy, nil } diff --git a/api/internal/scheduler/strategy/params/params.go b/api/internal/scheduler/strategy/param/params.go similarity index 69% rename from api/internal/scheduler/strategy/params/params.go rename to api/internal/scheduler/strategy/param/params.go index 44c29f13..78270fc0 100644 --- a/api/internal/scheduler/strategy/params/params.go +++ b/api/internal/scheduler/strategy/param/params.go @@ -1,9 +1,9 @@ -package params +package param import ( "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/scheduler/service/collector" ) type Params struct { - resources []*collector.ResourceSpecs + Resources []*collector.ResourceSpecs } diff --git a/api/internal/scheduler/strategy/param/replication.go b/api/internal/scheduler/strategy/param/replication.go new file mode 100644 index 00000000..6699ce6b --- /dev/null +++ b/api/internal/scheduler/strategy/param/replication.go @@ -0,0 +1,23 @@ +package param + +import "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/scheduler/entity" + +type ReplicationParams struct { + Replicas int32 + *Params +} + +func (r *ReplicationParams) GetReplicas() int32 { + return r.Replicas +} + +func (r *ReplicationParams) GetParticipants() []*entity.Participant { + var participants []*entity.Participant + for _, resource := range r.Resources { + participants = append(participants, &entity.Participant{ + Participant_id: resource.ParticipantId, + Name: resource.Name, + }) + } + return participants +} diff --git a/api/internal/scheduler/strategy/param/resourcePricing.go b/api/internal/scheduler/strategy/param/resourcePricing.go new file mode 100644 index 00000000..570e4422 --- /dev/null +++ b/api/internal/scheduler/strategy/param/resourcePricing.go @@ -0,0 +1,32 @@ +package param + +import ( + "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/scheduler/algorithm/providerPricing" +) + +type ResourcePricingParams struct { + replicas int32 + task *providerPricing.Task + *Params +} + +func (r *ResourcePricingParams) GetReplicas() int32 { + return r.replicas +} + +func (r *ResourcePricingParams) GetTask() *providerPricing.Task { + return r.task +} + +func (r *ResourcePricingParams) GetProviders() []*providerPricing.Provider { + var providerList []*providerPricing.Provider + for _, resource := range r.Resources { + provider := providerPricing.NewProvider( + resource.ParticipantId, + resource.CpuAvail, + resource.MemAvail, + resource.DiskAvail, 0.0, 0.0, 0.0) + providerList = append(providerList, provider) + } + return providerList +} diff --git a/api/internal/scheduler/strategy/params/replicationParams.go b/api/internal/scheduler/strategy/params/replicationParams.go deleted file mode 100644 index 7adfaea0..00000000 --- a/api/internal/scheduler/strategy/params/replicationParams.go +++ /dev/null @@ -1,16 +0,0 @@ -package params - -import "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/scheduler/entity" - -type ReplicationOption struct { - replicas int32 - participants []entity.Participant -} - -func (o *ReplicationOption) GetReplicas() int32 { - return o.replicas -} - -func (o *ReplicationOption) GetParticipants() []entity.Participant { - return o.participants -} diff --git a/api/internal/scheduler/strategy/params/resourcePricingParams.go b/api/internal/scheduler/strategy/params/resourcePricingParams.go deleted file mode 100644 index 10fe7bd6..00000000 --- a/api/internal/scheduler/strategy/params/resourcePricingParams.go +++ /dev/null @@ -1,26 +0,0 @@ -package params - -import ( - "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/scheduler/algorithm/providerPricing" -) - -type ResourcePricingOption struct { - replicas int32 - task *providerPricing.Task - providers []*providerPricing.Provider - *Params -} - -func NewResourcePricingOption(params *Params) *ResourcePricingOption { - return &ResourcePricingOption{ - Params: params, - } -} - -func (r *ResourcePricingOption) GetReplicas() int32 { - return r.replicas -} - -func (r *ResourcePricingOption) GetProviders() []*providerPricing.Provider { - return r.providers -} diff --git a/api/internal/scheduler/strategy/replication.go b/api/internal/scheduler/strategy/replication.go index 3f9549b9..fad6fcde 100644 --- a/api/internal/scheduler/strategy/replication.go +++ b/api/internal/scheduler/strategy/replication.go @@ -3,15 +3,15 @@ package strategy import ( "github.com/pkg/errors" "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/scheduler/entity" - "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/scheduler/strategy/params" + "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/scheduler/strategy/param" ) type ReplicationStrategy struct { replicas int32 - participants []entity.Participant + participants []*entity.Participant } -func NewReplicationStrategy(params *params.ReplicationOption) *ReplicationStrategy { +func NewReplicationStrategy(params *param.ReplicationParams) *ReplicationStrategy { return &ReplicationStrategy{replicas: params.GetReplicas(), participants: params.GetParticipants(), } diff --git a/api/internal/scheduler/strategy/resourcePricing.go b/api/internal/scheduler/strategy/resourcePricing.go index 2abf2af6..e1614164 100644 --- a/api/internal/scheduler/strategy/resourcePricing.go +++ b/api/internal/scheduler/strategy/resourcePricing.go @@ -17,6 +17,7 @@ package strategy import ( "errors" "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/scheduler/algorithm/providerPricing" + "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/scheduler/strategy/param" ) type PricingStrategy struct { @@ -25,7 +26,9 @@ type PricingStrategy struct { StrategyList []*providerPricing.Strategy } -func NewPricingStrategy(task *providerPricing.Task, providers ...*providerPricing.Provider) *PricingStrategy { +func NewPricingStrategy(params *param.ResourcePricingParams) *PricingStrategy { + providers := params.GetProviders() + task := params.GetTask() var providerList []*providerPricing.Provider var res [][]int diff --git a/api/internal/scheduler/strategy/test/strategy_test.go b/api/internal/scheduler/strategy/test/strategy_test.go index 767d71c4..53f24cb9 100644 --- a/api/internal/scheduler/strategy/test/strategy_test.go +++ b/api/internal/scheduler/strategy/test/strategy_test.go @@ -3,7 +3,9 @@ package test import ( "fmt" "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/scheduler/entity" + "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/scheduler/service/collector" "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/scheduler/strategy" + "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/scheduler/strategy/param" "testing" ) @@ -13,10 +15,23 @@ func TestReplication(t *testing.T) { {Name: "test2", Participant_id: 2}, {Name: "test3", Participant_id: 3}, } + res := []*collector.ResourceSpecs{ + { + ParticipantId: 1, + Name: "test1", + }, + { + ParticipantId: 1, + Name: "test2"}, + { + ParticipantId: 1, + Name: "test3"}, + } tests := []struct { name string replica int32 ps []entity.Participant + res []*collector.ResourceSpecs }{ { name: "test1", @@ -32,7 +47,8 @@ func TestReplication(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - repl := strategy.NewReplicationStrategy(nil) + params := ¶m.Params{Resources: res} + repl := strategy.NewReplicationStrategy(¶m.ReplicationParams{Params: params, Replicas: tt.replica}) schedule, err := repl.Schedule() if err != nil { return From 3cc16100fa60f947d8f1881923eedb005967c268 Mon Sep 17 00:00:00 2001 From: tzwang Date: Wed, 31 Jan 2024 17:45:14 +0800 Subject: [PATCH 16/18] added shuguangai cluster resources function Former-commit-id: 3f03d5c3a29e5e12f399e3da7087ab92e5f802e5 --- api/internal/scheduler/service/aiService.go | 2 +- .../scheduler/strategy/test/strategy_test.go | 4 ++-- api/internal/storeLink/shuguangai.go | 23 +++++++++++++++++++ 3 files changed, 26 insertions(+), 3 deletions(-) diff --git a/api/internal/scheduler/service/aiService.go b/api/internal/scheduler/service/aiService.go index 29ab4653..2ea57cec 100644 --- a/api/internal/scheduler/service/aiService.go +++ b/api/internal/scheduler/service/aiService.go @@ -19,7 +19,7 @@ var ( "Hanwuji": OCTOPUS, "Suiyan": OCTOPUS, "Sailingsi": OCTOPUS, - "modelarts-CloudBrain2": MODELARTS, + "Modelarts-CloudBrain2": MODELARTS, "ShuguangAi": SHUGUANGAI, } ) diff --git a/api/internal/scheduler/strategy/test/strategy_test.go b/api/internal/scheduler/strategy/test/strategy_test.go index 53f24cb9..6331d136 100644 --- a/api/internal/scheduler/strategy/test/strategy_test.go +++ b/api/internal/scheduler/strategy/test/strategy_test.go @@ -15,7 +15,7 @@ func TestReplication(t *testing.T) { {Name: "test2", Participant_id: 2}, {Name: "test3", Participant_id: 3}, } - res := []*collector.ResourceSpecs{ + rsc := []*collector.ResourceSpecs{ { ParticipantId: 1, Name: "test1", @@ -47,7 +47,7 @@ func TestReplication(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - params := ¶m.Params{Resources: res} + params := ¶m.Params{Resources: rsc} repl := strategy.NewReplicationStrategy(¶m.ReplicationParams{Params: params, Replicas: tt.replica}) schedule, err := repl.Schedule() if err != nil { diff --git a/api/internal/storeLink/shuguangai.go b/api/internal/storeLink/shuguangai.go index ad84315d..3d027d41 100644 --- a/api/internal/storeLink/shuguangai.go +++ b/api/internal/storeLink/shuguangai.go @@ -22,6 +22,7 @@ import ( "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/scheduler/service/collector" "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/svc" "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/utils" + "strconv" "strings" ) @@ -173,6 +174,28 @@ func (o *ShuguangAi) QuerySpecs() (interface{}, error) { } func (o *ShuguangAi) GetResourceSpecs() (*collector.ResourceSpecs, error) { + userReq := &hpcAC.GetUserInfoReq{} + userinfo, err := o.svcCtx.ACRpc.GetUserInfo(o.ctx, userReq) + if err != nil { + return nil, err + } + limitReq := &hpcAC.QueueReq{} + _, err = o.svcCtx.ACRpc.QueryUserQuotasLimit(o.ctx, limitReq) + if err != nil { + return nil, err + } + diskReq := &hpcAC.ParaStorQuotaReq{} + _, err = o.svcCtx.ACRpc.ParaStorQuota(o.ctx, diskReq) + if err != nil { + return nil, err + } + + balance, _ := strconv.ParseFloat(userinfo.Data.AccountBalance, 64) + _ = &collector.ResourceSpecs{ + ParticipantId: o.participantId, + Name: o.platform, + Balance: balance, + } return nil, nil } From 3d3e2e80bc4697fbe1e7e19cd6811849c37ca13a Mon Sep 17 00:00:00 2001 From: jagger Date: Wed, 31 Jan 2024 18:48:48 +0800 Subject: [PATCH 17/18] add adapter impl Signed-off-by: jagger Former-commit-id: a6090b9e548b1e708441d4e8524186927500b57c --- api/desc/core/pcm-core.api | 94 ++++++++++++++++++- api/desc/pcm.api | 38 ++++++++ .../handler/adapters/adapterslisthandler.go | 24 +++++ .../handler/adapters/clusterlisthandler.go | 24 +++++ .../handler/adapters/createadapterhandler.go | 24 +++++ .../handler/adapters/createclusterhandler.go | 24 +++++ .../handler/adapters/deleteadapterhandler.go | 24 +++++ .../handler/adapters/deleteclusterhandler.go | 24 +++++ .../handler/adapters/getadapterhandler.go | 24 +++++ .../handler/adapters/getclusterhandler.go | 24 +++++ .../handler/adapters/updateadapterhandler.go | 24 +++++ .../handler/adapters/updateclusterhandler.go | 24 +++++ api/internal/handler/routes.go | 57 +++++++++++ .../logic/adapters/adapterslistlogic.go | 34 +++++++ .../logic/adapters/clusterlistlogic.go | 34 +++++++ .../logic/adapters/createadapterlogic.go | 35 +++++++ .../logic/adapters/createclusterlogic.go | 41 ++++++++ .../logic/adapters/deleteadapterlogic.go | 44 +++++++++ .../logic/adapters/deleteclusterlogic.go | 39 ++++++++ .../logic/adapters/getadapterlogic.go | 35 +++++++ .../logic/adapters/getclusterlogic.go | 36 +++++++ .../logic/adapters/updateadapterlogic.go | 30 ++++++ .../logic/adapters/updateclusterlogic.go | 30 ++++++ api/internal/types/types.go | 94 +++++++++++++++++++ pkg/utils/snowflake.go | 5 + 25 files changed, 885 insertions(+), 1 deletion(-) create mode 100644 api/internal/handler/adapters/adapterslisthandler.go create mode 100644 api/internal/handler/adapters/clusterlisthandler.go create mode 100644 api/internal/handler/adapters/createadapterhandler.go create mode 100644 api/internal/handler/adapters/createclusterhandler.go create mode 100644 api/internal/handler/adapters/deleteadapterhandler.go create mode 100644 api/internal/handler/adapters/deleteclusterhandler.go create mode 100644 api/internal/handler/adapters/getadapterhandler.go create mode 100644 api/internal/handler/adapters/getclusterhandler.go create mode 100644 api/internal/handler/adapters/updateadapterhandler.go create mode 100644 api/internal/handler/adapters/updateclusterhandler.go create mode 100644 api/internal/logic/adapters/adapterslistlogic.go create mode 100644 api/internal/logic/adapters/clusterlistlogic.go create mode 100644 api/internal/logic/adapters/createadapterlogic.go create mode 100644 api/internal/logic/adapters/createclusterlogic.go create mode 100644 api/internal/logic/adapters/deleteadapterlogic.go create mode 100644 api/internal/logic/adapters/deleteclusterlogic.go create mode 100644 api/internal/logic/adapters/getadapterlogic.go create mode 100644 api/internal/logic/adapters/getclusterlogic.go create mode 100644 api/internal/logic/adapters/updateadapterlogic.go create mode 100644 api/internal/logic/adapters/updateclusterlogic.go diff --git a/api/desc/core/pcm-core.api b/api/desc/core/pcm-core.api index 771ec7e0..5d3a2c6f 100644 --- a/api/desc/core/pcm-core.api +++ b/api/desc/core/pcm-core.api @@ -704,4 +704,96 @@ type ( Msg string `json:"msg,omitempty"` Data interface{} `json:"data,omitempty"` } -) \ No newline at end of file +) + +type ( + AdapterReq { + Id string `json:"id,optional" db:"id"` + Name string `json:"name,optional"` + Type string `json:"type,optional"` + Nickname string `json:"nickname,optional"` + Version string `json:"version,optional"` + Server string `json:"server,optional"` + } + AdapterDelReq { + Id string `form:"id,optional" db:"id"` + } + AdapterInfo { + Id string `json:"id,omitempty" db:"id"` + Name string `json:"name,omitempty" db:"name"` + Type string `json:"type,omitempty" db:"type"` + Nickname string `json:"nickname,omitempty" db:"nickname"` + Version string `json:"version,omitempty" db:"version"` + Server string `json:"server,omitempty" db:"server"` + CreateTime string `json:"createTime,omitempty" db:"create_time" gorm:"autoCreateTime"` + } + AdapterResp { + Code int `json:"code,omitempty"` + Msg string `json:"msg,omitempty"` + Data AdapterInfo `json:"data,omitempty"` + } + AdapterListResp { + Code int `json:"code,omitempty"` + Msg string `json:"msg,omitempty"` + Data []AdapterInfo `json:"data,omitempty"` + } +) + +type ClusterReq { + Id string `json:"id,optional"` + AdapterId string `json:"adapterId,optional"` + Name string `json:"name,optional"` + Nickname string `json:"nickname,optional"` + Description string `json:"description,optional"` + Server string `json:"server,optional"` + MonitorServer string `json:"monitorServer,optional"` + Username string `json:"username,optional"` + Password string `json:"password,optional"` + Token string `json:"token,optional"` + Ak string `json:"ak,optional"` + Sk string `json:"sk,optional"` + Region string `json:"region,optional"` + ProjectId string `json:"projectId,optional"` + Version string `json:"version,optional"` + Label string `json:"label,optional"` + OwnerId string `json:"ownerId,omitempty,optional"` + AuthType string `json:"authType,optional"` +} + +type ClusterDelReq { + Id string `form:"id,optional"` +} + +type ClusterInfo { + Id string `json:"id,omitempty" db:"id"` + AdapterId string `json:"adapterId,omitempty" db:"adapter_id"` + Name string `json:"name,omitempty" db:"name"` + Nickname string `json:"nickname,omitempty" db:"nickname"` + Description string `json:"description,omitempty" db:"description"` + Server string `json:"server,omitempty" db:"server"` + MonitorServer string `json:"monitorServer,omitempty" db:"monitor_server"` + Username string `json:"username,omitempty" db:"username"` + Password string `json:"password,omitempty" db:"password"` + Token string `json:"token,omitempty" db:"token"` + Ak string `json:"ak,omitempty" db:"ak"` + Sk string `json:"sk,omitempty" db:"sk"` + Region string `json:"region,omitempty" db:"region"` + ProjectId string `json:"projectId,omitempty" db:"project_id"` + Version string `json:"version,omitempty" db:"version"` + Label string `json:"label,omitempty" db:"label"` + OwnerId string `json:"ownerId,omitempty" db:"owner_id"` + AuthType string `json:"authType,omitempty" db:"auth_type"` + CreateTime string `json:"createTime,omitempty" db:"created_time" gorm:"autoCreateTime"` +} + +type ClusterResp { + Code int `json:"code,omitempty"` + Msg string `json:"msg,omitempty"` + Data ClusterInfo `json:"data,omitempty"` +} + +type ClusterListResp { + Code int `json:"code,omitempty"` + Msg string `json:"msg,omitempty"` + Data []ClusterInfo `json:"data,omitempty"` +} \ No newline at end of file diff --git a/api/desc/pcm.api b/api/desc/pcm.api index 2e1b3817..1f264fe9 100644 --- a/api/desc/pcm.api +++ b/api/desc/pcm.api @@ -570,4 +570,42 @@ service pcm { @doc "启动应用" @handler StartAppByAppName put /apps/startApp (DeleteAppReq) returns (AppResp) +} + +// 接口 +@server( + prefix: pcm/v1 + group : adapters +) + +service pcm { + @handler AdaptersListHandler + get /adapter/list (AdapterReq) returns (AdapterListResp) + + @handler CreateAdapterHandler + post /adapter/create (AdapterReq) returns (AdapterResp) + + @handler UpdateAdapterHandler + put /adapter/update (AdapterReq) returns (AdapterResp) + + @handler DeleteAdapterHandler + delete /adapter/delete (AdapterDelReq) returns (AdapterResp) + + @handler GetAdapterHandler + get /adapter/get (AdapterDelReq) returns (AdapterResp) + + @handler ClusterListHandler + get /adapter/cluster/list (ClusterReq) returns (ClusterListResp) + + @handler CreateClusterHandler + post /adapter/cluster/create (ClusterReq) returns (ClusterResp) + + @handler UpdateClusterHandler + put /adapter/cluster/update (ClusterReq) returns (ClusterResp) + + @handler DeleteClusterHandler + delete /adapter/cluster/delete (ClusterDelReq) returns (ClusterResp) + + @handler GetClusterHandler + get /adapter/cluster/get (ClusterDelReq) returns (ClusterResp) } \ No newline at end of file diff --git a/api/internal/handler/adapters/adapterslisthandler.go b/api/internal/handler/adapters/adapterslisthandler.go new file mode 100644 index 00000000..b2b17e5e --- /dev/null +++ b/api/internal/handler/adapters/adapterslisthandler.go @@ -0,0 +1,24 @@ +package adapters + +import ( + "github.com/zeromicro/go-zero/rest/httpx" + "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/logic/adapters" + "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/svc" + "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/types" + "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/repository/result" + "net/http" +) + +func AdaptersListHandler(svcCtx *svc.ServiceContext) http.HandlerFunc { + return func(w http.ResponseWriter, r *http.Request) { + var req types.AdapterReq + if err := httpx.Parse(r, &req); err != nil { + result.ParamErrorResult(r, w, err) + return + } + + l := adapters.NewAdaptersListLogic(r.Context(), svcCtx) + resp, err := l.AdaptersList(&req) + result.HttpResult(r, w, resp, err) + } +} diff --git a/api/internal/handler/adapters/clusterlisthandler.go b/api/internal/handler/adapters/clusterlisthandler.go new file mode 100644 index 00000000..c2612fa8 --- /dev/null +++ b/api/internal/handler/adapters/clusterlisthandler.go @@ -0,0 +1,24 @@ +package adapters + +import ( + "github.com/zeromicro/go-zero/rest/httpx" + "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/logic/adapters" + "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/svc" + "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/types" + "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/repository/result" + "net/http" +) + +func ClusterListHandler(svcCtx *svc.ServiceContext) http.HandlerFunc { + return func(w http.ResponseWriter, r *http.Request) { + var req types.ClusterReq + if err := httpx.Parse(r, &req); err != nil { + result.ParamErrorResult(r, w, err) + return + } + + l := adapters.NewClusterListLogic(r.Context(), svcCtx) + resp, err := l.ClusterList(&req) + result.HttpResult(r, w, resp, err) + } +} diff --git a/api/internal/handler/adapters/createadapterhandler.go b/api/internal/handler/adapters/createadapterhandler.go new file mode 100644 index 00000000..e99d001e --- /dev/null +++ b/api/internal/handler/adapters/createadapterhandler.go @@ -0,0 +1,24 @@ +package adapters + +import ( + "github.com/zeromicro/go-zero/rest/httpx" + "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/logic/adapters" + "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/svc" + "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/types" + "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/repository/result" + "net/http" +) + +func CreateAdapterHandler(svcCtx *svc.ServiceContext) http.HandlerFunc { + return func(w http.ResponseWriter, r *http.Request) { + var req types.AdapterReq + if err := httpx.Parse(r, &req); err != nil { + result.ParamErrorResult(r, w, err) + return + } + + l := adapters.NewCreateAdapterLogic(r.Context(), svcCtx) + resp, err := l.CreateAdapter(&req) + result.HttpResult(r, w, resp, err) + } +} diff --git a/api/internal/handler/adapters/createclusterhandler.go b/api/internal/handler/adapters/createclusterhandler.go new file mode 100644 index 00000000..7938a705 --- /dev/null +++ b/api/internal/handler/adapters/createclusterhandler.go @@ -0,0 +1,24 @@ +package adapters + +import ( + "github.com/zeromicro/go-zero/rest/httpx" + "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/logic/adapters" + "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/svc" + "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/types" + "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/repository/result" + "net/http" +) + +func CreateClusterHandler(svcCtx *svc.ServiceContext) http.HandlerFunc { + return func(w http.ResponseWriter, r *http.Request) { + var req types.ClusterReq + if err := httpx.Parse(r, &req); err != nil { + result.ParamErrorResult(r, w, err) + return + } + + l := adapters.NewCreateClusterLogic(r.Context(), svcCtx) + resp, err := l.CreateCluster(&req) + result.HttpResult(r, w, resp, err) + } +} diff --git a/api/internal/handler/adapters/deleteadapterhandler.go b/api/internal/handler/adapters/deleteadapterhandler.go new file mode 100644 index 00000000..958ef721 --- /dev/null +++ b/api/internal/handler/adapters/deleteadapterhandler.go @@ -0,0 +1,24 @@ +package adapters + +import ( + "github.com/zeromicro/go-zero/rest/httpx" + "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/logic/adapters" + "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/svc" + "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/types" + "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/repository/result" + "net/http" +) + +func DeleteAdapterHandler(svcCtx *svc.ServiceContext) http.HandlerFunc { + return func(w http.ResponseWriter, r *http.Request) { + var req types.AdapterDelReq + if err := httpx.Parse(r, &req); err != nil { + result.ParamErrorResult(r, w, err) + return + } + + l := adapters.NewDeleteAdapterLogic(r.Context(), svcCtx) + resp, err := l.DeleteAdapter(&req) + result.HttpResult(r, w, resp, err) + } +} diff --git a/api/internal/handler/adapters/deleteclusterhandler.go b/api/internal/handler/adapters/deleteclusterhandler.go new file mode 100644 index 00000000..643d822e --- /dev/null +++ b/api/internal/handler/adapters/deleteclusterhandler.go @@ -0,0 +1,24 @@ +package adapters + +import ( + "github.com/zeromicro/go-zero/rest/httpx" + "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/logic/adapters" + "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/svc" + "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/types" + "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/repository/result" + "net/http" +) + +func DeleteClusterHandler(svcCtx *svc.ServiceContext) http.HandlerFunc { + return func(w http.ResponseWriter, r *http.Request) { + var req types.ClusterDelReq + if err := httpx.Parse(r, &req); err != nil { + result.ParamErrorResult(r, w, err) + return + } + + l := adapters.NewDeleteClusterLogic(r.Context(), svcCtx) + resp, err := l.DeleteCluster(&req) + result.HttpResult(r, w, resp, err) + } +} diff --git a/api/internal/handler/adapters/getadapterhandler.go b/api/internal/handler/adapters/getadapterhandler.go new file mode 100644 index 00000000..9e8274f0 --- /dev/null +++ b/api/internal/handler/adapters/getadapterhandler.go @@ -0,0 +1,24 @@ +package adapters + +import ( + "github.com/zeromicro/go-zero/rest/httpx" + "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/logic/adapters" + "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/svc" + "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/types" + "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/repository/result" + "net/http" +) + +func GetAdapterHandler(svcCtx *svc.ServiceContext) http.HandlerFunc { + return func(w http.ResponseWriter, r *http.Request) { + var req types.AdapterDelReq + if err := httpx.Parse(r, &req); err != nil { + result.ParamErrorResult(r, w, err) + return + } + + l := adapters.NewGetAdapterLogic(r.Context(), svcCtx) + resp, err := l.GetAdapter(&req) + result.HttpResult(r, w, resp, err) + } +} diff --git a/api/internal/handler/adapters/getclusterhandler.go b/api/internal/handler/adapters/getclusterhandler.go new file mode 100644 index 00000000..c1f271e4 --- /dev/null +++ b/api/internal/handler/adapters/getclusterhandler.go @@ -0,0 +1,24 @@ +package adapters + +import ( + "github.com/zeromicro/go-zero/rest/httpx" + "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/logic/adapters" + "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/svc" + "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/types" + "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/repository/result" + "net/http" +) + +func GetClusterHandler(svcCtx *svc.ServiceContext) http.HandlerFunc { + return func(w http.ResponseWriter, r *http.Request) { + var req types.ClusterDelReq + if err := httpx.Parse(r, &req); err != nil { + result.ParamErrorResult(r, w, err) + return + } + + l := adapters.NewGetClusterLogic(r.Context(), svcCtx) + resp, err := l.GetCluster(&req) + result.HttpResult(r, w, resp, err) + } +} diff --git a/api/internal/handler/adapters/updateadapterhandler.go b/api/internal/handler/adapters/updateadapterhandler.go new file mode 100644 index 00000000..9e991b8a --- /dev/null +++ b/api/internal/handler/adapters/updateadapterhandler.go @@ -0,0 +1,24 @@ +package adapters + +import ( + "github.com/zeromicro/go-zero/rest/httpx" + "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/logic/adapters" + "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/svc" + "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/types" + "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/repository/result" + "net/http" +) + +func UpdateAdapterHandler(svcCtx *svc.ServiceContext) http.HandlerFunc { + return func(w http.ResponseWriter, r *http.Request) { + var req types.AdapterReq + if err := httpx.Parse(r, &req); err != nil { + result.ParamErrorResult(r, w, err) + return + } + + l := adapters.NewUpdateAdapterLogic(r.Context(), svcCtx) + resp, err := l.UpdateAdapter(&req) + result.HttpResult(r, w, resp, err) + } +} diff --git a/api/internal/handler/adapters/updateclusterhandler.go b/api/internal/handler/adapters/updateclusterhandler.go new file mode 100644 index 00000000..90320e23 --- /dev/null +++ b/api/internal/handler/adapters/updateclusterhandler.go @@ -0,0 +1,24 @@ +package adapters + +import ( + "github.com/zeromicro/go-zero/rest/httpx" + "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/logic/adapters" + "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/svc" + "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/types" + "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/repository/result" + "net/http" +) + +func UpdateClusterHandler(svcCtx *svc.ServiceContext) http.HandlerFunc { + return func(w http.ResponseWriter, r *http.Request) { + var req types.ClusterReq + if err := httpx.Parse(r, &req); err != nil { + result.ParamErrorResult(r, w, err) + return + } + + l := adapters.NewUpdateClusterLogic(r.Context(), svcCtx) + resp, err := l.UpdateCluster(&req) + result.HttpResult(r, w, resp, err) + } +} diff --git a/api/internal/handler/routes.go b/api/internal/handler/routes.go index 67994398..bdda439d 100644 --- a/api/internal/handler/routes.go +++ b/api/internal/handler/routes.go @@ -4,6 +4,7 @@ package handler import ( "net/http" + adapters "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/handler/adapters" ai "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/handler/ai" apps "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/handler/apps" cloud "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/handler/cloud" @@ -692,4 +693,60 @@ func RegisterHandlers(server *rest.Server, serverCtx *svc.ServiceContext) { }, rest.WithPrefix("/pcm/v1"), ) + + server.AddRoutes( + []rest.Route{ + { + Method: http.MethodGet, + Path: "/adapter/list", + Handler: adapters.AdaptersListHandler(serverCtx), + }, + { + Method: http.MethodPost, + Path: "/adapter/create", + Handler: adapters.CreateAdapterHandler(serverCtx), + }, + { + Method: http.MethodPut, + Path: "/adapter/update", + Handler: adapters.UpdateAdapterHandler(serverCtx), + }, + { + Method: http.MethodDelete, + Path: "/adapter/delete", + Handler: adapters.DeleteAdapterHandler(serverCtx), + }, + { + Method: http.MethodGet, + Path: "/adapter/get", + Handler: adapters.GetAdapterHandler(serverCtx), + }, + { + Method: http.MethodGet, + Path: "/adapter/cluster/list", + Handler: adapters.ClusterListHandler(serverCtx), + }, + { + Method: http.MethodPost, + Path: "/adapter/cluster/create", + Handler: adapters.CreateClusterHandler(serverCtx), + }, + { + Method: http.MethodPut, + Path: "/adapter/cluster/update", + Handler: adapters.UpdateClusterHandler(serverCtx), + }, + { + Method: http.MethodDelete, + Path: "/adapter/cluster/delete", + Handler: adapters.DeleteClusterHandler(serverCtx), + }, + { + Method: http.MethodGet, + Path: "/adapter/cluster/get", + Handler: adapters.GetClusterHandler(serverCtx), + }, + }, + rest.WithPrefix("/pcm/v1"), + ) } diff --git a/api/internal/logic/adapters/adapterslistlogic.go b/api/internal/logic/adapters/adapterslistlogic.go new file mode 100644 index 00000000..6a11b7c3 --- /dev/null +++ b/api/internal/logic/adapters/adapterslistlogic.go @@ -0,0 +1,34 @@ +package adapters + +import ( + "context" + + "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/svc" + "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/types" + + "github.com/zeromicro/go-zero/core/logx" +) + +type AdaptersListLogic struct { + logx.Logger + ctx context.Context + svcCtx *svc.ServiceContext +} + +func NewAdaptersListLogic(ctx context.Context, svcCtx *svc.ServiceContext) *AdaptersListLogic { + return &AdaptersListLogic{ + Logger: logx.WithContext(ctx), + ctx: ctx, + svcCtx: svcCtx, + } +} + +func (l *AdaptersListLogic) AdaptersList(req *types.AdapterReq) (resp *types.AdapterListResp, err error) { + resp = &types.AdapterListResp{} + tx := l.svcCtx.DbEngin.Raw("select * from t_adapter where `deleted_at` IS NULL ORDER BY create_time Desc").Scan(&resp.Data) + if tx.Error != nil { + logx.Errorf(tx.Error.Error()) + return nil, tx.Error + } + return resp, nil +} diff --git a/api/internal/logic/adapters/clusterlistlogic.go b/api/internal/logic/adapters/clusterlistlogic.go new file mode 100644 index 00000000..b97c087d --- /dev/null +++ b/api/internal/logic/adapters/clusterlistlogic.go @@ -0,0 +1,34 @@ +package adapters + +import ( + "context" + + "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/svc" + "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/types" + + "github.com/zeromicro/go-zero/core/logx" +) + +type ClusterListLogic struct { + logx.Logger + ctx context.Context + svcCtx *svc.ServiceContext +} + +func NewClusterListLogic(ctx context.Context, svcCtx *svc.ServiceContext) *ClusterListLogic { + return &ClusterListLogic{ + Logger: logx.WithContext(ctx), + ctx: ctx, + svcCtx: svcCtx, + } +} + +func (l *ClusterListLogic) ClusterList(req *types.ClusterReq) (resp *types.ClusterListResp, err error) { + resp = &types.ClusterListResp{} + tx := l.svcCtx.DbEngin.Raw("select * from t_cluster where `deleted_at` IS NULL ORDER BY create_time Desc").Scan(&resp.Data) + if tx.Error != nil { + logx.Errorf(tx.Error.Error()) + return nil, tx.Error + } + return resp, nil +} diff --git a/api/internal/logic/adapters/createadapterlogic.go b/api/internal/logic/adapters/createadapterlogic.go new file mode 100644 index 00000000..38b12cec --- /dev/null +++ b/api/internal/logic/adapters/createadapterlogic.go @@ -0,0 +1,35 @@ +package adapters + +import ( + "context" + "fmt" + "github.com/zeromicro/go-zero/core/logx" + "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/svc" + "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/types" + "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/utils" + "time" +) + +type CreateAdapterLogic struct { + logx.Logger + ctx context.Context + svcCtx *svc.ServiceContext +} + +func NewCreateAdapterLogic(ctx context.Context, svcCtx *svc.ServiceContext) *CreateAdapterLogic { + return &CreateAdapterLogic{ + Logger: logx.WithContext(ctx), + ctx: ctx, + svcCtx: svcCtx, + } +} + +func (l *CreateAdapterLogic) CreateAdapter(req *types.AdapterReq) (resp *types.AdapterResp, err error) { + adapter := types.AdapterInfo{} + utils.Convert(req, &adapter) + adapter.Id = utils.GenSnowflakeIDStr() + adapter.CreateTime = time.Now().Format("2006-01-02 15:04:05") + tx := l.svcCtx.DbEngin.Table("t_adapter").Create(&adapter) + fmt.Print(tx.Error) + return +} diff --git a/api/internal/logic/adapters/createclusterlogic.go b/api/internal/logic/adapters/createclusterlogic.go new file mode 100644 index 00000000..60f84a57 --- /dev/null +++ b/api/internal/logic/adapters/createclusterlogic.go @@ -0,0 +1,41 @@ +package adapters + +import ( + "context" + "errors" + "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/utils" + "time" + + "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/svc" + "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/types" + + "github.com/zeromicro/go-zero/core/logx" +) + +type CreateClusterLogic struct { + logx.Logger + ctx context.Context + svcCtx *svc.ServiceContext +} + +func NewCreateClusterLogic(ctx context.Context, svcCtx *svc.ServiceContext) *CreateClusterLogic { + return &CreateClusterLogic{ + Logger: logx.WithContext(ctx), + ctx: ctx, + svcCtx: svcCtx, + } +} + +func (l *CreateClusterLogic) CreateCluster(req *types.ClusterReq) (resp *types.ClusterResp, err error) { + cluster := types.ClusterInfo{} + utils.Convert(req, &cluster) + cluster.Id = utils.GenSnowflakeIDStr() + cluster.CreateTime = time.Now().Format("2006-01-02 15:04:05") + cluster.OwnerId = "0" + tx := l.svcCtx.DbEngin.Table("t_cluster").Create(&cluster) + if tx.Error != nil { + logx.Errorf(tx.Error.Error()) + return nil, errors.New("cluster create failed") + } + return +} diff --git a/api/internal/logic/adapters/deleteadapterlogic.go b/api/internal/logic/adapters/deleteadapterlogic.go new file mode 100644 index 00000000..f3030bce --- /dev/null +++ b/api/internal/logic/adapters/deleteadapterlogic.go @@ -0,0 +1,44 @@ +package adapters + +import ( + "context" + "github.com/pkg/errors" + + "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/svc" + "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/types" + + "github.com/zeromicro/go-zero/core/logx" +) + +type DeleteAdapterLogic struct { + logx.Logger + ctx context.Context + svcCtx *svc.ServiceContext +} + +func NewDeleteAdapterLogic(ctx context.Context, svcCtx *svc.ServiceContext) *DeleteAdapterLogic { + return &DeleteAdapterLogic{ + Logger: logx.WithContext(ctx), + ctx: ctx, + svcCtx: svcCtx, + } +} + +func (l *DeleteAdapterLogic) DeleteAdapter(req *types.AdapterDelReq) (resp *types.AdapterResp, err error) { + var sId int64 + l.svcCtx.DbEngin.Table("t_adapter").Raw("select a.id from t_cluster c left join t_adapter a on c.adapter_id=a.id where a.id = ? ", req.Id).Scan(&sId) + if sId != 0 { + return nil, errors.New("Delete failed,The adapter is associated with a cluster") + } + db := l.svcCtx.DbEngin.Table("t_adapter").Where("id = ?", req.Id).First(&types.AdapterInfo{}) + if db.Error != nil { + logx.Errorf("err %v", db.Error.Error()) + return nil, errors.New("Adapter does not exist") + } + tx := l.svcCtx.DbEngin.Table("t_adapter").Delete(types.AdapterInfo{}, req.Id) + if tx.Error != nil { + logx.Errorf("err %v", db.Error.Error()) + return nil, errors.New("Delete adapter failed") + } + return +} diff --git a/api/internal/logic/adapters/deleteclusterlogic.go b/api/internal/logic/adapters/deleteclusterlogic.go new file mode 100644 index 00000000..f2b75209 --- /dev/null +++ b/api/internal/logic/adapters/deleteclusterlogic.go @@ -0,0 +1,39 @@ +package adapters + +import ( + "context" + "github.com/pkg/errors" + + "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/svc" + "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/types" + + "github.com/zeromicro/go-zero/core/logx" +) + +type DeleteClusterLogic struct { + logx.Logger + ctx context.Context + svcCtx *svc.ServiceContext +} + +func NewDeleteClusterLogic(ctx context.Context, svcCtx *svc.ServiceContext) *DeleteClusterLogic { + return &DeleteClusterLogic{ + Logger: logx.WithContext(ctx), + ctx: ctx, + svcCtx: svcCtx, + } +} + +func (l *DeleteClusterLogic) DeleteCluster(req *types.ClusterDelReq) (resp *types.ClusterResp, err error) { + db := l.svcCtx.DbEngin.Table("t_cluster").Where("id = ?", req.Id).First(&types.ClusterInfo{}) + if db.Error != nil { + logx.Errorf("err %v", db.Error.Error()) + return nil, errors.New("Cluster does not exist") + } + tx := l.svcCtx.DbEngin.Table("t_cluster").Delete(types.AdapterInfo{}, req.Id) + if tx.Error != nil { + logx.Errorf("err %v", db.Error.Error()) + return nil, errors.New("Delete Cluster failed") + } + return +} diff --git a/api/internal/logic/adapters/getadapterlogic.go b/api/internal/logic/adapters/getadapterlogic.go new file mode 100644 index 00000000..06aef43d --- /dev/null +++ b/api/internal/logic/adapters/getadapterlogic.go @@ -0,0 +1,35 @@ +package adapters + +import ( + "context" + "github.com/pkg/errors" + + "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/svc" + "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/types" + + "github.com/zeromicro/go-zero/core/logx" +) + +type GetAdapterLogic struct { + logx.Logger + ctx context.Context + svcCtx *svc.ServiceContext +} + +func NewGetAdapterLogic(ctx context.Context, svcCtx *svc.ServiceContext) *GetAdapterLogic { + return &GetAdapterLogic{ + Logger: logx.WithContext(ctx), + ctx: ctx, + svcCtx: svcCtx, + } +} + +func (l *GetAdapterLogic) GetAdapter(req *types.AdapterDelReq) (resp *types.AdapterResp, err error) { + resp = &types.AdapterResp{} + db := l.svcCtx.DbEngin.Table("t_adapter").Where("id = ?", req.Id).First(&resp.Data) + if db.Error != nil { + logx.Errorf("err %v", db.Error.Error()) + return nil, errors.New("Adapter does not exist") + } + return +} diff --git a/api/internal/logic/adapters/getclusterlogic.go b/api/internal/logic/adapters/getclusterlogic.go new file mode 100644 index 00000000..bf554ed0 --- /dev/null +++ b/api/internal/logic/adapters/getclusterlogic.go @@ -0,0 +1,36 @@ +package adapters + +import ( + "context" + "github.com/pkg/errors" + + "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/svc" + "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/types" + + "github.com/zeromicro/go-zero/core/logx" +) + +type GetClusterLogic struct { + logx.Logger + ctx context.Context + svcCtx *svc.ServiceContext +} + +func NewGetClusterLogic(ctx context.Context, svcCtx *svc.ServiceContext) *GetClusterLogic { + return &GetClusterLogic{ + Logger: logx.WithContext(ctx), + ctx: ctx, + svcCtx: svcCtx, + } +} + +func (l *GetClusterLogic) GetCluster(req *types.ClusterDelReq) (resp *types.ClusterResp, err error) { + resp = &types.ClusterResp{} + db := l.svcCtx.DbEngin.Table("t_cluster").Where("id = ?", req.Id).First(&resp.Data) + if db.Error != nil { + logx.Errorf("err %v", db.Error.Error()) + return nil, errors.New("Adapter does not exist") + } + + return +} diff --git a/api/internal/logic/adapters/updateadapterlogic.go b/api/internal/logic/adapters/updateadapterlogic.go new file mode 100644 index 00000000..511a8be7 --- /dev/null +++ b/api/internal/logic/adapters/updateadapterlogic.go @@ -0,0 +1,30 @@ +package adapters + +import ( + "context" + + "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/svc" + "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/types" + + "github.com/zeromicro/go-zero/core/logx" +) + +type UpdateAdapterLogic struct { + logx.Logger + ctx context.Context + svcCtx *svc.ServiceContext +} + +func NewUpdateAdapterLogic(ctx context.Context, svcCtx *svc.ServiceContext) *UpdateAdapterLogic { + return &UpdateAdapterLogic{ + Logger: logx.WithContext(ctx), + ctx: ctx, + svcCtx: svcCtx, + } +} + +func (l *UpdateAdapterLogic) UpdateAdapter(req *types.AdapterReq) (resp *types.AdapterResp, err error) { + // todo: add your logic here and delete this line + + return +} diff --git a/api/internal/logic/adapters/updateclusterlogic.go b/api/internal/logic/adapters/updateclusterlogic.go new file mode 100644 index 00000000..2bab3ce2 --- /dev/null +++ b/api/internal/logic/adapters/updateclusterlogic.go @@ -0,0 +1,30 @@ +package adapters + +import ( + "context" + + "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/svc" + "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/types" + + "github.com/zeromicro/go-zero/core/logx" +) + +type UpdateClusterLogic struct { + logx.Logger + ctx context.Context + svcCtx *svc.ServiceContext +} + +func NewUpdateClusterLogic(ctx context.Context, svcCtx *svc.ServiceContext) *UpdateClusterLogic { + return &UpdateClusterLogic{ + Logger: logx.WithContext(ctx), + ctx: ctx, + svcCtx: svcCtx, + } +} + +func (l *UpdateClusterLogic) UpdateCluster(req *types.ClusterReq) (resp *types.ClusterResp, err error) { + // todo: add your logic here and delete this line + + return +} diff --git a/api/internal/types/types.go b/api/internal/types/types.go index 91420796..ad9e8536 100644 --- a/api/internal/types/types.go +++ b/api/internal/types/types.go @@ -668,6 +668,100 @@ type AppResp struct { Data interface{} `json:"data,omitempty"` } +type AdapterReq struct { + Id string `json:"id,optional" db:"id"` + Name string `json:"name,optional"` + Type string `json:"type,optional"` + Nickname string `json:"nickname,optional"` + Version string `json:"version,optional"` + Server string `json:"server,optional"` +} + +type AdapterDelReq struct { + Id string `form:"id,optional" db:"id"` +} + +type AdapterInfo struct { + Id string `json:"id,omitempty" db:"id"` + Name string `json:"name,omitempty" db:"name"` + Type string `json:"type,omitempty" db:"type"` + Nickname string `json:"nickname,omitempty" db:"nickname"` + Version string `json:"version,omitempty" db:"version"` + Server string `json:"server,omitempty" db:"server"` + CreateTime string `json:"createTime,omitempty" db:"create_time" gorm:"autoCreateTime"` +} + +type AdapterResp struct { + Code int `json:"code,omitempty"` + Msg string `json:"msg,omitempty"` + Data AdapterInfo `json:"data,omitempty"` +} + +type AdapterListResp struct { + Code int `json:"code,omitempty"` + Msg string `json:"msg,omitempty"` + Data []AdapterInfo `json:"data,omitempty"` +} + +type ClusterReq struct { + Id string `json:"id,optional"` + AdapterId string `json:"adapterId,optional"` + Name string `json:"name,optional"` + Nickname string `json:"nickname,optional"` + Description string `json:"description,optional"` + Server string `json:"server,optional"` + MonitorServer string `json:"monitorServer,optional"` + Username string `json:"username,optional"` + Password string `json:"password,optional"` + Token string `json:"token,optional"` + Ak string `json:"ak,optional"` + Sk string `json:"sk,optional"` + Region string `json:"region,optional"` + ProjectId string `json:"projectId,optional"` + Version string `json:"version,optional"` + Label string `json:"label,optional"` + OwnerId string `json:"ownerId,omitempty,optional"` + AuthType string `json:"authType,optional"` +} + +type ClusterDelReq struct { + Id string `form:"id,optional"` +} + +type ClusterInfo struct { + Id string `json:"id,omitempty" db:"id"` + AdapterId string `json:"adapterId,omitempty" db:"adapter_id"` + Name string `json:"name,omitempty" db:"name"` + Nickname string `json:"nickname,omitempty" db:"nickname"` + Description string `json:"description,omitempty" db:"description"` + Server string `json:"server,omitempty" db:"server"` + MonitorServer string `json:"monitorServer,omitempty" db:"monitor_server"` + Username string `json:"username,omitempty" db:"username"` + Password string `json:"password,omitempty" db:"password"` + Token string `json:"token,omitempty" db:"token"` + Ak string `json:"ak,omitempty" db:"ak"` + Sk string `json:"sk,omitempty" db:"sk"` + Region string `json:"region,omitempty" db:"region"` + ProjectId string `json:"projectId,omitempty" db:"project_id"` + Version string `json:"version,omitempty" db:"version"` + Label string `json:"label,omitempty" db:"label"` + OwnerId string `json:"ownerId,omitempty" db:"owner_id"` + AuthType string `json:"authType,omitempty" db:"auth_type"` + CreateTime string `json:"createTime,omitempty" db:"created_time" gorm:"autoCreateTime"` +} + +type ClusterResp struct { + Code int `json:"code,omitempty"` + Msg string `json:"msg,omitempty"` + Data ClusterInfo `json:"data,omitempty"` +} + +type ClusterListResp struct { + Code int `json:"code,omitempty"` + Msg string `json:"msg,omitempty"` + Data []ClusterInfo `json:"data,omitempty"` +} + type Job struct { SlurmVersion string `json:"slurmVersion"` Name string `json:"name"` diff --git a/pkg/utils/snowflake.go b/pkg/utils/snowflake.go index fb727d43..9e677f3c 100644 --- a/pkg/utils/snowflake.go +++ b/pkg/utils/snowflake.go @@ -36,3 +36,8 @@ func InitSnowflake(machineID int64) (err error) { func GenSnowflakeID() int64 { return node.Generate().Int64() } + +// machineId 工作id +func GenSnowflakeIDStr() string { + return node.Generate().String() +} From c18e5a484cbf4e052fefcc20e9c97235fad18a5d Mon Sep 17 00:00:00 2001 From: tzwang Date: Thu, 1 Feb 2024 16:29:55 +0800 Subject: [PATCH 18/18] modified executor implementations Former-commit-id: 4cf3a7804d5668bb0e1c3bd09891225b310598b9 --- .../logic/storelink/submitlinktasklogic.go | 2 +- .../scheduler/schedulers/aiScheduler.go | 4 ++-- .../scheduler/schedulers/option/aiOption.go | 23 ++++++++++-------- .../scheduler/service/executor/aiExecutor.go | 2 +- api/internal/storeLink/modelarts.go | 10 +++++--- api/internal/storeLink/octopus.go | 10 +++++--- api/internal/storeLink/shuguangHpc.go | 2 +- api/internal/storeLink/shuguangai.go | 24 +++++++++++++++---- api/internal/storeLink/storeLink.go | 2 +- 9 files changed, 52 insertions(+), 27 deletions(-) diff --git a/api/internal/logic/storelink/submitlinktasklogic.go b/api/internal/logic/storelink/submitlinktasklogic.go index 98f77f85..80dfab96 100644 --- a/api/internal/logic/storelink/submitlinktasklogic.go +++ b/api/internal/logic/storelink/submitlinktasklogic.go @@ -67,7 +67,7 @@ func (l *SubmitLinkTaskLogic) SubmitLinkTask(req *types.SubmitLinkTaskReq) (resp envs = append(envs, env) } } - task, err := storelink.ILinkage.SubmitTask(req.ImageId, req.Cmd, envs, params, req.ResourceId) + task, err := storelink.ILinkage.SubmitTask(req.ImageId, req.Cmd, envs, params, req.ResourceId, "") if err != nil { return nil, err } diff --git a/api/internal/scheduler/schedulers/aiScheduler.go b/api/internal/scheduler/schedulers/aiScheduler.go index 4310bd46..8b11faf5 100644 --- a/api/internal/scheduler/schedulers/aiScheduler.go +++ b/api/internal/scheduler/schedulers/aiScheduler.go @@ -30,7 +30,7 @@ type AiScheduler struct { yamlString string task *response.TaskInfo *scheduler.Scheduler - option option.AiOption + option *option.AiOption } func NewAiScheduler(val string, scheduler *scheduler.Scheduler) (*AiScheduler, error) { @@ -74,7 +74,7 @@ func (as *AiScheduler) AssignTask(clusters []*strategy.AssignedCluster) error { executorMap := *as.AiExecutor for _, cluster := range clusters { - _, err := executorMap[cluster.Name].Execute(option.AiOption{}) + _, err := executorMap[cluster.Name].Execute(as.option) if err != nil { // TODO: database operation } diff --git a/api/internal/scheduler/schedulers/option/aiOption.go b/api/internal/scheduler/schedulers/option/aiOption.go index 2d45383c..b1029d37 100644 --- a/api/internal/scheduler/schedulers/option/aiOption.go +++ b/api/internal/scheduler/schedulers/option/aiOption.go @@ -1,17 +1,20 @@ package option type AiOption struct { - aiType string // shuguangAi/octopus - resourceType string // cpu/gpu/compute card - taskType string // pytorch/tensorflow + AiType string // shuguangAi/octopus + ResourceType string // cpu/gpu/compute card + TaskType string // pytorch/tensorflow - imageId string - specId string - datasetsId string - codeId string + ImageId string + SpecId string + DatasetsId string + CodeId string + ResourceId string - cmd string + Cmd string + Envs []string + Params []string - datasets string - code string + Datasets string + Code string } diff --git a/api/internal/scheduler/service/executor/aiExecutor.go b/api/internal/scheduler/service/executor/aiExecutor.go index a52ab062..abe91b0c 100644 --- a/api/internal/scheduler/service/executor/aiExecutor.go +++ b/api/internal/scheduler/service/executor/aiExecutor.go @@ -6,6 +6,6 @@ import ( ) type AiExecutor interface { - Execute(option option.AiOption) (interface{}, error) + Execute(option *option.AiOption) (interface{}, error) storeLink.Linkage } diff --git a/api/internal/storeLink/modelarts.go b/api/internal/storeLink/modelarts.go index 31489205..14a8a181 100644 --- a/api/internal/storeLink/modelarts.go +++ b/api/internal/storeLink/modelarts.go @@ -63,7 +63,7 @@ func (o *ModelArtsLink) QueryImageList() (interface{}, error) { return resp, nil } -func (o *ModelArtsLink) SubmitTask(imageId string, cmd string, envs []string, params []string, resourceId string) (interface{}, error) { +func (o *ModelArtsLink) SubmitTask(imageId string, cmd string, envs []string, params []string, resourceId string, aiType string) (interface{}, error) { // modelArts提交任务 environments := make(map[string]string) parameters := make([]*modelarts.ParametersTrainJob, 0) @@ -153,6 +153,10 @@ func (o *ModelArtsLink) GetResourceSpecs() (*collector.ResourceSpecs, error) { return nil, nil } -func (o *ModelArtsLink) Execute(option option.AiOption) (interface{}, error) { - return nil, nil +func (o *ModelArtsLink) Execute(option *option.AiOption) (interface{}, error) { + task, err := o.SubmitTask(option.ImageId, option.Cmd, option.Envs, option.Params, option.ResourceId, option.AiType) + if err != nil { + return nil, err + } + return task, nil } diff --git a/api/internal/storeLink/octopus.go b/api/internal/storeLink/octopus.go index b40da2ee..cdc97ea9 100644 --- a/api/internal/storeLink/octopus.go +++ b/api/internal/storeLink/octopus.go @@ -107,7 +107,7 @@ func (o *OctopusLink) QueryImageList() (interface{}, error) { return resp, nil } -func (o *OctopusLink) SubmitTask(imageId string, cmd string, envs []string, params []string, resourceId string) (interface{}, error) { +func (o *OctopusLink) SubmitTask(imageId string, cmd string, envs []string, params []string, resourceId string, aiType string) (interface{}, error) { // octopus提交任务 // python参数 @@ -200,6 +200,10 @@ func (o *OctopusLink) GetResourceSpecs() (*collector.ResourceSpecs, error) { return nil, nil } -func (o *OctopusLink) Execute(option option.AiOption) (interface{}, error) { - return nil, nil +func (o *OctopusLink) Execute(option *option.AiOption) (interface{}, error) { + task, err := o.SubmitTask(option.ImageId, option.Cmd, option.Envs, option.Params, option.ResourceId, option.AiType) + if err != nil { + return nil, err + } + return task, nil } diff --git a/api/internal/storeLink/shuguangHpc.go b/api/internal/storeLink/shuguangHpc.go index 7c80b456..f7f0af82 100644 --- a/api/internal/storeLink/shuguangHpc.go +++ b/api/internal/storeLink/shuguangHpc.go @@ -144,7 +144,7 @@ func (s ShuguangHpc) QueryImageList() (interface{}, error) { return nil, nil } -func (s ShuguangHpc) SubmitTask(imageId string, cmd string, envs []string, params []string, resourceId string) (interface{}, error) { +func (s ShuguangHpc) SubmitTask(imageId string, cmd string, envs []string, params []string, resourceId string, aiType string) (interface{}, error) { // shuguangHpc提交任务 //判断是否resourceId匹配自定义资源Id diff --git a/api/internal/storeLink/shuguangai.go b/api/internal/storeLink/shuguangai.go index 3d027d41..57fecfc6 100644 --- a/api/internal/storeLink/shuguangai.go +++ b/api/internal/storeLink/shuguangai.go @@ -76,9 +76,7 @@ func (s *ShuguangAi) QueryImageList() (interface{}, error) { return resp, nil } -func (s *ShuguangAi) SubmitTask(imageId string, cmd string, envs []string, params []string, resourceId string) (interface{}, error) { - // shuguangAi提交任务 - +func (s *ShuguangAi) SubmitPytorchTask(imageId string, cmd string, envs []string, params []string, resourceId string) (interface{}, error) { //判断是否resourceId匹配自定义资源Id if resourceId != SHUGUANGAI_CUSTOM_RESOURCE_ID { return nil, errors.New("shuguangAi资源Id不存在") @@ -133,6 +131,18 @@ func (s *ShuguangAi) SubmitTask(imageId string, cmd string, envs []string, param return resp, nil } +func (s *ShuguangAi) SubmitTask(imageId string, cmd string, envs []string, params []string, resourceId string, aiType string) (interface{}, error) { + // shuguangAi提交任务 + if aiType == PYTORCH { + task, err := s.SubmitPytorchTask(imageId, cmd, envs, params, resourceId) + if err != nil { + return nil, err + } + return task, nil + } + return nil, errors.New("shuguangAi不支持的任务类型") +} + func (s *ShuguangAi) QueryTask(taskId string) (interface{}, error) { // shuguangAi获取任务 req := &hpcAC.GetPytorchTaskReq{ @@ -199,6 +209,10 @@ func (o *ShuguangAi) GetResourceSpecs() (*collector.ResourceSpecs, error) { return nil, nil } -func (o *ShuguangAi) Execute(option option.AiOption) (interface{}, error) { - return nil, nil +func (o *ShuguangAi) Execute(option *option.AiOption) (interface{}, error) { + task, err := o.SubmitTask(option.ImageId, option.Cmd, option.Envs, option.Params, option.ResourceId, option.AiType) + if err != nil { + return nil, err + } + return task, nil } diff --git a/api/internal/storeLink/storeLink.go b/api/internal/storeLink/storeLink.go index 18046da4..3a644d8e 100644 --- a/api/internal/storeLink/storeLink.go +++ b/api/internal/storeLink/storeLink.go @@ -31,7 +31,7 @@ type Linkage interface { UploadImage(path string) (interface{}, error) DeleteImage(imageId string) (interface{}, error) QueryImageList() (interface{}, error) - SubmitTask(imageId string, cmd string, envs []string, params []string, resourceId string) (interface{}, error) + SubmitTask(imageId string, cmd string, envs []string, params []string, resourceId string, aiType string) (interface{}, error) QueryTask(taskId string) (interface{}, error) QuerySpecs() (interface{}, error) DeleteTask(taskId string) (interface{}, error)