From aac4ee3c67620982154968f6cb6aa8b2c555302a Mon Sep 17 00:00:00 2001 From: tzwang Date: Fri, 19 Jan 2024 17:31:57 +0800 Subject: [PATCH] scheduler refactor updated Former-commit-id: 3c4f190ce82f0a0c48533548a67fd23bf7f1c3ea --- pkg/scheduler/aiScheduler.go | 4 ++++ pkg/scheduler/cloudScheduler.go | 4 ++++ pkg/scheduler/collector/acCollector.go | 13 +++++++++++++ pkg/scheduler/collector/aiCollector.go | 8 -------- pkg/scheduler/collector/collector.go | 13 ++++++++++++- pkg/scheduler/common.go | 1 + pkg/scheduler/hpcScheduler.go | 4 ++++ pkg/scheduler/scheduler.go | 7 ++++++- .../{replicationStrategy.go => replication.go} | 0 .../{priceBasedStrategy.go => resourcePricing.go} | 0 pkg/scheduler/strategies/staticWeight.go | 10 ++++++++++ 11 files changed, 54 insertions(+), 10 deletions(-) create mode 100644 pkg/scheduler/collector/acCollector.go delete mode 100644 pkg/scheduler/collector/aiCollector.go rename pkg/scheduler/strategies/{replicationStrategy.go => replication.go} (100%) rename pkg/scheduler/strategies/{priceBasedStrategy.go => resourcePricing.go} (100%) create mode 100644 pkg/scheduler/strategies/staticWeight.go diff --git a/pkg/scheduler/aiScheduler.go b/pkg/scheduler/aiScheduler.go index cbec54f2..52708f30 100644 --- a/pkg/scheduler/aiScheduler.go +++ b/pkg/scheduler/aiScheduler.go @@ -52,3 +52,7 @@ func (as *AiScheduler) pickOptimalStrategy() (strategies.Strategy, error) { func (as *AiScheduler) genTaskAndProviders() (*providerPricing.Task, []*providerPricing.Provider) { return nil, nil } + +func (as *AiScheduler) assignTask(clusters []*strategies.AssignedCluster) error { + return nil +} diff --git a/pkg/scheduler/cloudScheduler.go b/pkg/scheduler/cloudScheduler.go index 37bd9ca6..95121182 100644 --- a/pkg/scheduler/cloudScheduler.go +++ b/pkg/scheduler/cloudScheduler.go @@ -116,3 +116,7 @@ func (cs *CloudScheduler) genTaskAndProviders() (*providerPricing.Task, []*provi return nil, providerList, nil } + +func (cs *CloudScheduler) assignTask(clusters []*strategies.AssignedCluster) error { + return nil +} diff --git a/pkg/scheduler/collector/acCollector.go b/pkg/scheduler/collector/acCollector.go new file mode 100644 index 00000000..fdeee694 --- /dev/null +++ b/pkg/scheduler/collector/acCollector.go @@ -0,0 +1,13 @@ +package collector + +//单条作业费=作业运行秒数×(CPU核心数*CPU单价+GPU卡数×GPU单价+DCU卡数×DCU单价)/3600 +//CPU单价=队列CPU费率×计算中心CPU单价 +//GPU单价=队列GPU费率×计算中心GPU单价 +//DCU单价=队列DCU费率×计算中心DCU单价 + +type ShuguangAiCollector struct { +} + +func (a *ShuguangAiCollector) getResourceSpecs() { + +} diff --git a/pkg/scheduler/collector/aiCollector.go b/pkg/scheduler/collector/aiCollector.go deleted file mode 100644 index e383cba3..00000000 --- a/pkg/scheduler/collector/aiCollector.go +++ /dev/null @@ -1,8 +0,0 @@ -package collector - -type AiCollector struct { -} - -func (a *AiCollector) getResourceSpecs() { - -} diff --git a/pkg/scheduler/collector/collector.go b/pkg/scheduler/collector/collector.go index 7e0739c5..73bcffd0 100644 --- a/pkg/scheduler/collector/collector.go +++ b/pkg/scheduler/collector/collector.go @@ -1,8 +1,19 @@ package collector type ResourceCollector interface { - getResourceSpecs() + getResourceSpecs() ([]ResourceSpecs, error) } type ResourceSpecs struct { + CpuAvail float64 + MemAvail float64 + DiskAvail float64 + GpuAvail float64 + CardAvail []Card +} + +type Card struct { + Type string + Name string + TOpsAtFp16 float64 } diff --git a/pkg/scheduler/common.go b/pkg/scheduler/common.go index 62270a92..7b2cab8e 100644 --- a/pkg/scheduler/common.go +++ b/pkg/scheduler/common.go @@ -24,6 +24,7 @@ import ( type scheduleService interface { getNewStructForDb(task *response.TaskInfo, resource string, participantId int64) (interface{}, error) pickOptimalStrategy() (strategies.Strategy, error) + assignTask(clusters []*strategies.AssignedCluster) error } // 求交集 diff --git a/pkg/scheduler/hpcScheduler.go b/pkg/scheduler/hpcScheduler.go index bf964bb7..af6416e6 100644 --- a/pkg/scheduler/hpcScheduler.go +++ b/pkg/scheduler/hpcScheduler.go @@ -49,3 +49,7 @@ func (h *HpcScheduler) pickOptimalStrategy() (strategies.Strategy, error) { func (h *HpcScheduler) genTaskAndProviders(task *response.TaskInfo) (*providerPricing.Task, []*providerPricing.Provider) { return nil, nil } + +func (h *HpcScheduler) assignTask(clusters []*strategies.AssignedCluster) error { + return nil +} diff --git a/pkg/scheduler/scheduler.go b/pkg/scheduler/scheduler.go index e05351c8..e04952a9 100644 --- a/pkg/scheduler/scheduler.go +++ b/pkg/scheduler/scheduler.go @@ -136,7 +136,7 @@ func (s *scheduler) AssignAndSchedule() error { return err } - _, err = strategy.Schedule() + clusters, err := strategy.Schedule() if err != nil { return err } @@ -147,6 +147,11 @@ func (s *scheduler) AssignAndSchedule() error { // return nil //} + err = s.scheduleService.assignTask(clusters) + if err != nil { + return err + } + return nil } diff --git a/pkg/scheduler/strategies/replicationStrategy.go b/pkg/scheduler/strategies/replication.go similarity index 100% rename from pkg/scheduler/strategies/replicationStrategy.go rename to pkg/scheduler/strategies/replication.go diff --git a/pkg/scheduler/strategies/priceBasedStrategy.go b/pkg/scheduler/strategies/resourcePricing.go similarity index 100% rename from pkg/scheduler/strategies/priceBasedStrategy.go rename to pkg/scheduler/strategies/resourcePricing.go diff --git a/pkg/scheduler/strategies/staticWeight.go b/pkg/scheduler/strategies/staticWeight.go new file mode 100644 index 00000000..8e08d219 --- /dev/null +++ b/pkg/scheduler/strategies/staticWeight.go @@ -0,0 +1,10 @@ +package strategies + +type StaticWeightStrategy struct { + // TODO: add fields +} + +func (ps *StaticWeightStrategy) Schedule() ([]*AssignedCluster, error) { + // TODO: implement the scheduling logic return nil, nil + return nil, nil +}