Merge pull request 'scheduler module modified' (#12) from tzwang/pcm-coordinator:master into master

Former-commit-id: 7cceae0438dce12298a59c312b1ffd05db791083
This commit is contained in:
tzwang 2024-01-24 18:10:59 +08:00
commit bf27a2d75c
23 changed files with 435 additions and 189 deletions

View File

@ -17,7 +17,9 @@ package mqs
import (
"context"
"gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/svc"
scheduler2 "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/scheduler"
"gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/scheduler"
"gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/scheduler/schedulers"
"gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/scheduler/service"
)
/*
@ -25,36 +27,29 @@ import (
Listening to the payment flow status change notification message queue
*/
type AiQueue struct {
ctx context.Context
svcCtx *svc.ServiceContext
ctx context.Context
svcCtx *svc.ServiceContext
scheduler *scheduler.Scheduler
}
func NewAiMq(ctx context.Context, svcCtx *svc.ServiceContext) *AiQueue {
aiExecutorMap, aiCollectorMap := service.InitAiClusterMap(svcCtx.ACRpc, svcCtx.ModelArtsRpc, svcCtx.ModelArtsImgRpc, svcCtx.OctopusRpc)
return &AiQueue{
ctx: ctx,
svcCtx: svcCtx,
ctx: ctx,
svcCtx: svcCtx,
scheduler: scheduler.NewScheduler2(aiCollectorMap, nil, aiExecutorMap),
}
}
func (l *AiQueue) Consume(val string) error {
// 接受消息, 根据标签筛选过滤
aiSchdl := scheduler2.NewAiScheduler(val)
schdl, err := scheduler2.NewScheduler(aiSchdl, val, l.svcCtx.DbEngin, nil)
if err != nil {
return err
}
schdl.MatchLabels()
aiSchdl, _ := schedulers.NewAiScheduler(val, l.scheduler)
// 调度算法
err = schdl.AssignAndSchedule()
err := l.scheduler.AssignAndSchedule(aiSchdl)
if err != nil {
return err
}
// 存储数据
err = schdl.SaveToDb()
if err != nil {
return err
}
return nil
}

View File

@ -18,6 +18,7 @@ import (
"context"
"gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/svc"
"gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/scheduler"
"gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/scheduler/schedulers"
)
/*
@ -37,7 +38,7 @@ func NewCloudMq(ctx context.Context, svcCtx *svc.ServiceContext) *CloudMq {
func (l *CloudMq) Consume(val string) error {
// 接受消息, 根据标签筛选过滤
cloudScheduler := scheduler.NewCloudScheduler()
cloudScheduler := schedulers.NewCloudScheduler()
schdl, err := scheduler.NewScheduler(cloudScheduler, val, l.svcCtx.DbEngin, l.svcCtx.ParticipantRpc)
if err != nil {
return err

View File

@ -17,7 +17,6 @@ package mqs
import (
"context"
"gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/svc"
scheduler2 "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/scheduler"
)
/*
@ -37,24 +36,5 @@ func NewHpcMq(ctx context.Context, svcCtx *svc.ServiceContext) *HpcMq {
}
func (l *HpcMq) Consume(val string) error {
// 接受消息, 根据标签筛选过滤
hpcSchdl := scheduler2.NewHpcScheduler(val)
schdl, err := scheduler2.NewScheduler(hpcSchdl, val, l.svcCtx.DbEngin, nil)
if err != nil {
return err
}
schdl.MatchLabels()
// 调度算法
err = schdl.AssignAndSchedule()
if err != nil {
return err
}
// 存储数据
err = schdl.SaveToDb()
if err != nil {
return err
}
return nil
}

View File

@ -1,58 +0,0 @@
/*
Copyright (c) [2023] [pcm]
[pcm-coordinator] is licensed under Mulan PSL v2.
You can use this software according to the terms and conditions of the Mulan PSL v2.
You may obtain a copy of Mulan PSL v2 at:
http://license.coscl.org.cn/MulanPSL2
THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
EITHER EXPaRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
See the Mulan PSL v2 for more details.
*/
package scheduler
import (
"gitlink.org.cn/jcce-pcm/pcm-coordinator/api/pkg/response"
"gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/models"
"gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/scheduler/algorithm/providerPricing"
"gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/scheduler/collector"
"gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/scheduler/strategies"
"gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/utils"
)
type AiScheduler struct {
yamlString string
collector collector.ResourceCollector
}
func NewAiScheduler(val string) *AiScheduler {
return &AiScheduler{yamlString: val}
}
func (as *AiScheduler) getNewStructForDb(task *response.TaskInfo, resource string, participantId int64) (interface{}, error) {
ai := models.Ai{
ParticipantId: participantId,
TaskId: task.TaskId,
Status: "Saved",
YamlString: as.yamlString,
}
utils.Convert(task.Metadata, &ai)
return ai, nil
}
func (as *AiScheduler) pickOptimalStrategy() (strategies.Strategy, error) {
//a, b := as.genTaskAndProviders()
return nil, nil
}
func (as *AiScheduler) genTaskAndProviders() (*providerPricing.Task, []*providerPricing.Provider) {
return nil, nil
}
func (as *AiScheduler) assignTask(clusters []*strategies.AssignedCluster) error {
return nil
}

View File

@ -1,13 +0,0 @@
package collector
//单条作业费=作业运行秒数×(CPU核心数*CPU单价+GPU卡数×GPU单价+DCU卡数×DCU单价)/3600
//CPU单价=队列CPU费率×计算中心CPU单价
//GPU单价=队列GPU费率×计算中心GPU单价
//DCU单价=队列DCU费率×计算中心DCU单价
type ShuguangAiCollector struct {
}
func (a *ShuguangAiCollector) getResourceSpecs() {
}

View File

@ -1,19 +0,0 @@
package collector
type ResourceCollector interface {
getResourceSpecs() ([]ResourceSpecs, error)
}
type ResourceSpecs struct {
CpuAvail float64
MemAvail float64
DiskAvail float64
GpuAvail float64
CardAvail []Card
}
type Card struct {
Type string
Name string
TOpsAtFp16 float64
}

View File

@ -12,23 +12,23 @@
*/
package scheduler
package common
import (
"gitlink.org.cn/jcce-pcm/pcm-coordinator/api/pkg/response"
"gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/scheduler/strategies"
"gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/scheduler/strategy"
"math/rand"
"time"
)
type scheduleService interface {
getNewStructForDb(task *response.TaskInfo, resource string, participantId int64) (interface{}, error)
pickOptimalStrategy() (strategies.Strategy, error)
assignTask(clusters []*strategies.AssignedCluster) error
type SubSchedule interface {
GetNewStructForDb(task *response.TaskInfo, resource string, participantId int64) (interface{}, error)
PickOptimalStrategy() (strategy.Strategy, error)
AssignTask(clusters []*strategy.AssignedCluster) error
}
// 求交集
func intersect(slice1, slice2 []int64) []int64 {
func Intersect(slice1, slice2 []int64) []int64 {
m := make(map[int64]int)
nn := make([]int64, 0)
for _, v := range slice1 {
@ -44,7 +44,7 @@ func intersect(slice1, slice2 []int64) []int64 {
return nn
}
func micsSlice(origin []int64, count int) []int64 {
func MicsSlice(origin []int64, count int) []int64 {
tmpOrigin := make([]int64, len(origin))
copy(tmpOrigin, origin)
//一定要seed

View File

@ -19,31 +19,42 @@ import (
"github.com/pkg/errors"
"github.com/zeromicro/go-zero/core/logx"
"gitlink.org.cn/jcce-pcm/pcm-coordinator/api/pkg/response"
"gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/scheduler/common"
"gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/scheduler/database"
"gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/scheduler/service/collector"
"gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/scheduler/service/executor"
"gitlink.org.cn/jcce-pcm/pcm-coordinator/rpc/client/participantservice"
"gorm.io/gorm"
"sigs.k8s.io/yaml"
"strings"
)
type scheduler struct {
task *response.TaskInfo
participantIds []int64
scheduleService scheduleService
dbEngin *gorm.DB
result []string //pID:子任务yamlstring 键值对
participantRpc participantservice.ParticipantService
type Scheduler struct {
task *response.TaskInfo
participantIds []int64
subSchedule common.SubSchedule
dbEngin *gorm.DB
result []string //pID:子任务yamlstring 键值对
participantRpc participantservice.ParticipantService
ResourceCollector *map[string]collector.ResourceCollector
Storages database.Storage
AiExecutor *map[string]executor.Executor
}
func NewScheduler(scheduleService scheduleService, val string, dbEngin *gorm.DB, participantRpc participantservice.ParticipantService) (*scheduler, error) {
func NewScheduler(subSchedule common.SubSchedule, val string, dbEngin *gorm.DB, participantRpc participantservice.ParticipantService) (*Scheduler, error) {
var task *response.TaskInfo
err := json.Unmarshal([]byte(val), &task)
if err != nil {
return nil, errors.New("create scheduler failed : " + err.Error())
}
return &scheduler{task: task, scheduleService: scheduleService, dbEngin: dbEngin, participantRpc: participantRpc}, nil
return &Scheduler{task: task, subSchedule: subSchedule, dbEngin: dbEngin, participantRpc: participantRpc}, nil
}
func (s *scheduler) SpecifyClusters() {
func NewScheduler2(resourceCollector *map[string]collector.ResourceCollector, storages database.Storage, aiExecutor *map[string]executor.Executor) *Scheduler {
return &Scheduler{ResourceCollector: resourceCollector, Storages: storages, AiExecutor: aiExecutor}
}
func (s *Scheduler) SpecifyClusters() {
// 如果已指定集群名通过数据库查询后返回p端ip列表
if len(s.task.Clusters) != 0 {
s.dbEngin.Raw("select id from sc_participant_phy_info where `name` in (?)", s.task.Clusters).Scan(&s.participantIds)
@ -51,7 +62,7 @@ func (s *scheduler) SpecifyClusters() {
}
}
func (s *scheduler) SpecifyNsID() {
func (s *Scheduler) SpecifyNsID() {
// 未指定集群名只指定nsID
if len(s.task.Clusters) == 0 {
if len(s.task.NsID) != 0 {
@ -67,7 +78,7 @@ func (s *scheduler) SpecifyNsID() {
}
}
func (s *scheduler) MatchLabels() {
func (s *Scheduler) MatchLabels() {
var ids []int64
count := 0
@ -80,7 +91,7 @@ func (s *scheduler) MatchLabels() {
if count == 0 {
ids = participantIds
}
ids = intersect(ids, participantIds)
ids = common.Intersect(ids, participantIds)
count++
}
s.participantIds = ids
@ -90,7 +101,7 @@ func (s *scheduler) MatchLabels() {
}
// TempAssign todo 屏蔽原调度算法
func (s *scheduler) TempAssign() error {
func (s *Scheduler) TempAssign() error {
//需要判断task中的资源类型针对metadata中的多个kind做不同处理
//输入副本数和集群列表最终结果输出为pID对应副本数量列表针对多个kind需要做拆分和重新拼接组合
@ -110,28 +121,28 @@ func (s *scheduler) TempAssign() error {
return nil
}
func (s *scheduler) AssignAndSchedule() error {
// 已指定 ParticipantId
if s.task.ParticipantId != 0 {
return nil
}
// 标签匹配以及后未找到ParticipantIds
if len(s.participantIds) == 0 {
return errors.New("未找到匹配的ParticipantIds")
}
func (s *Scheduler) AssignAndSchedule(ss common.SubSchedule) error {
//// 已指定 ParticipantId
//if s.task.ParticipantId != 0 {
// return nil
//}
//// 标签匹配以及后未找到ParticipantIds
//if len(s.participantIds) == 0 {
// return errors.New("未找到匹配的ParticipantIds")
//}
//
//// 指定或者标签匹配的结果只有一个集群,给任务信息指定
//if len(s.participantIds) == 1 {
// s.task.ParticipantId = s.participantIds[0]
// //replicas := s.task.Metadata.(map[string]interface{})["spec"].(map[string]interface{})["replicas"].(float64)
// //result := make(map[int64]string)
// //result[s.participantIds[0]] = strconv.FormatFloat(replicas, 'f', 2, 64)
// //s.result = result
//
// return nil
//}
// 指定或者标签匹配的结果只有一个集群,给任务信息指定
if len(s.participantIds) == 1 {
s.task.ParticipantId = s.participantIds[0]
//replicas := s.task.Metadata.(map[string]interface{})["spec"].(map[string]interface{})["replicas"].(float64)
//result := make(map[int64]string)
//result[s.participantIds[0]] = strconv.FormatFloat(replicas, 'f', 2, 64)
//s.result = result
return nil
}
strategy, err := s.scheduleService.pickOptimalStrategy()
strategy, err := ss.PickOptimalStrategy()
if err != nil {
return err
}
@ -147,7 +158,7 @@ func (s *scheduler) AssignAndSchedule() error {
// return nil
//}
err = s.scheduleService.assignTask(clusters)
err = ss.AssignTask(clusters)
if err != nil {
return err
}
@ -155,12 +166,12 @@ func (s *scheduler) AssignAndSchedule() error {
return nil
}
func (s *scheduler) SaveToDb() error {
func (s *Scheduler) SaveToDb() error {
for _, participantId := range s.participantIds {
for _, resource := range s.task.Metadata {
structForDb, err := s.scheduleService.getNewStructForDb(s.task, resource, participantId)
structForDb, err := s.subSchedule.GetNewStructForDb(s.task, resource, participantId)
if err != nil {
return err
}

View File

@ -0,0 +1,102 @@
/*
Copyright (c) [2023] [pcm]
[pcm-coordinator] is licensed under Mulan PSL v2.
You can use this software according to the terms and conditions of the Mulan PSL v2.
You may obtain a copy of Mulan PSL v2 at:
http://license.coscl.org.cn/MulanPSL2
THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
EITHER EXPaRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
See the Mulan PSL v2 for more details.
*/
package schedulers
import (
"errors"
"gitlink.org.cn/jcce-pcm/pcm-coordinator/api/pkg/response"
"gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/models"
"gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/scheduler"
"gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/scheduler/algorithm/providerPricing"
"gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/scheduler/entity"
"gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/scheduler/service/collector"
"gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/scheduler/strategy"
"gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/utils"
)
type AiScheduler struct {
yamlString string
task *response.TaskInfo
*scheduler.Scheduler
}
func NewAiScheduler(val string, scheduler *scheduler.Scheduler) (*AiScheduler, error) {
return &AiScheduler{yamlString: val, Scheduler: scheduler}, nil
}
func (as *AiScheduler) GetNewStructForDb(task *response.TaskInfo, resource string, participantId int64) (interface{}, error) {
ai := models.Ai{
ParticipantId: participantId,
TaskId: task.TaskId,
Status: "Saved",
YamlString: as.yamlString,
}
utils.Convert(task.Metadata, &ai)
return ai, nil
}
func (as *AiScheduler) PickOptimalStrategy() (strategy.Strategy, error) {
resources, err := as.findProvidersWithResource()
if err != nil {
return nil, err
}
if len(resources) < 2 /*|| as.task */ {
var pros []entity.Participant
for _, resource := range resources {
pros = append(pros, entity.Participant{
Participant_id: resource.ParticipantId,
Name: resource.Name,
})
}
strategy := strategy.NewReplicationStrategy(nil, 0)
return strategy, nil
}
task, providerList := as.genTaskAndProviders()
if err != nil {
return nil, nil
}
strategy := strategy.NewPricingStrategy(task, providerList...)
return strategy, nil
}
func (as *AiScheduler) genTaskAndProviders() (*providerPricing.Task, []*providerPricing.Provider) {
return nil, nil
}
func (as *AiScheduler) AssignTask(clusters []*strategy.AssignedCluster) error {
if clusters == nil {
return errors.New("clusters is nil")
}
return nil
}
func (as *AiScheduler) findProvidersWithResource() ([]*collector.ResourceSpecs, error) {
var resourceSpecs []*collector.ResourceSpecs
for _, resourceCollector := range *as.ResourceCollector {
spec, err := resourceCollector.GetResourceSpecs()
if err != nil {
continue
}
resourceSpecs = append(resourceSpecs, spec)
}
if len(resourceSpecs) == 0 {
return nil, errors.New("no resource found")
}
return resourceSpecs, nil
}

View File

@ -12,7 +12,7 @@
*/
package scheduler
package schedulers
import (
"bytes"
@ -20,7 +20,7 @@ import (
"gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/models"
"gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/scheduler/algorithm/providerPricing"
"gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/scheduler/database"
"gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/scheduler/strategies"
"gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/scheduler/strategy"
"gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/utils"
"io"
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
@ -37,17 +37,17 @@ func NewCloudScheduler() *CloudScheduler {
return &CloudScheduler{}
}
func (cs *CloudScheduler) pickOptimalStrategy() (strategies.Strategy, error) {
func (cs *CloudScheduler) PickOptimalStrategy() (strategy.Strategy, error) {
task, providerList, err := cs.genTaskAndProviders()
if err != nil {
return nil, nil
}
//调度算法
strategy := strategies.NewPricingStrategy(task, providerList...)
strategy := strategy.NewPricingStrategy(task, providerList...)
return strategy, nil
}
func (cs *CloudScheduler) getNewStructForDb(task *response.TaskInfo, resource string, participantId int64) (interface{}, error) {
func (cs *CloudScheduler) GetNewStructForDb(task *response.TaskInfo, resource string, participantId int64) (interface{}, error) {
cloud := cs.UnMarshalK8sStruct(resource, task.TaskId, task.NsID)
cloud.Id = utils.GenSnowflakeID()
cloud.NsID = task.NsID
@ -117,6 +117,6 @@ func (cs *CloudScheduler) genTaskAndProviders() (*providerPricing.Task, []*provi
return nil, providerList, nil
}
func (cs *CloudScheduler) assignTask(clusters []*strategies.AssignedCluster) error {
func (cs *CloudScheduler) AssignTask(clusters []*strategy.AssignedCluster) error {
return nil
}

View File

@ -12,14 +12,14 @@
*/
package scheduler
package schedulers
import (
"gitlink.org.cn/jcce-pcm/pcm-coordinator/api/pkg/response"
"gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/constants"
"gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/models"
"gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/scheduler/algorithm/providerPricing"
"gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/scheduler/strategies"
"gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/scheduler/strategy"
"gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/utils"
)
@ -31,7 +31,7 @@ func NewHpcScheduler(val string) *HpcScheduler {
return &HpcScheduler{yamlString: val}
}
func (h *HpcScheduler) getNewStructForDb(task *response.TaskInfo, resource string, participantId int64) (interface{}, error) {
func (h *HpcScheduler) GetNewStructForDb(task *response.TaskInfo, resource string, participantId int64) (interface{}, error) {
hpc := models.Hpc{}
utils.Convert(task.Metadata, &hpc)
hpc.Id = utils.GenSnowflakeID()
@ -42,7 +42,7 @@ func (h *HpcScheduler) getNewStructForDb(task *response.TaskInfo, resource strin
return hpc, nil
}
func (h *HpcScheduler) pickOptimalStrategy() (strategies.Strategy, error) {
func (h *HpcScheduler) PickOptimalStrategy() (strategy.Strategy, error) {
return nil, nil
}
@ -50,6 +50,6 @@ func (h *HpcScheduler) genTaskAndProviders(task *response.TaskInfo) (*providerPr
return nil, nil
}
func (h *HpcScheduler) assignTask(clusters []*strategies.AssignedCluster) error {
func (h *HpcScheduler) AssignTask(clusters []*strategy.AssignedCluster) error {
return nil
}

View File

@ -0,0 +1,24 @@
package schedulers
import (
"gitlink.org.cn/jcce-pcm/pcm-coordinator/api/pkg/response"
"gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/scheduler/strategy"
)
type VmScheduler struct {
}
func (v VmScheduler) GetNewStructForDb(task *response.TaskInfo, resource string, participantId int64) (interface{}, error) {
//TODO implement me
panic("implement me")
}
func (v VmScheduler) PickOptimalStrategy() (strategy.Strategy, error) {
//TODO implement me
panic("implement me")
}
func (v VmScheduler) AssignTask(clusters []*strategy.AssignedCluster) error {
//TODO implement me
panic("implement me")
}

View File

@ -0,0 +1,49 @@
package service
import (
"gitlink.org.cn/jcce-pcm/pcm-ac/hpcacclient"
"gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/scheduler/service/collector"
"gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/scheduler/service/executor"
"gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/scheduler/service/impl"
"gitlink.org.cn/jcce-pcm/pcm-participant-modelarts/client/imagesservice"
"gitlink.org.cn/jcce-pcm/pcm-participant-modelarts/client/modelartsservice"
"gitlink.org.cn/jcce-pcm/pcm-participant-octopus/octopusclient"
)
const (
OCTOPUS = "Octopus"
MODELARTS = "Modelarts"
SHUGUANGAI = "ShuguangAi"
)
var (
AiTypeMap = map[string]string{
"Hanwuji": OCTOPUS,
"Suiyan": OCTOPUS,
"Sailingsi": OCTOPUS,
"modelarts-CloudBrain2": MODELARTS,
"ShuguangAi": SHUGUANGAI,
}
)
func InitAiClusterMap(ACRpc hpcacclient.HpcAC, ModelArtsRpc modelartsservice.ModelArtsService, ModelArtsImgRpc imagesservice.ImagesService, OctopusRpc octopusclient.Octopus) (*map[string]executor.Executor, *map[string]collector.ResourceCollector) {
executorMap := make(map[string]executor.Executor)
collectorMap := make(map[string]collector.ResourceCollector)
for k, v := range AiTypeMap {
switch v {
case OCTOPUS:
octopus := impl.NewOctopusExecutor(OctopusRpc, k)
collectorMap[k] = octopus
executorMap[k] = octopus
case MODELARTS:
modelarts := impl.NewModelartsExecutor(ModelArtsRpc, ModelArtsImgRpc, k)
collectorMap[k] = modelarts
executorMap[k] = modelarts
case SHUGUANGAI:
sgai := impl.NewShuguangAiExecutor(ACRpc, k)
collectorMap[k] = sgai
executorMap[k] = sgai
}
}
return &executorMap, &collectorMap
}

View File

@ -0,0 +1,22 @@
package collector
type ResourceCollector interface {
GetResourceSpecs() (*ResourceSpecs, error)
}
type ResourceSpecs struct {
ParticipantId int64
Name string
CpuAvail float64
MemAvail float64
DiskAvail float64
GpuAvail float64
CardAvail []Card
Balance float64
}
type Card struct {
Type string
Name string
TOpsAtFp16 float64
}

View File

@ -0,0 +1,17 @@
package executor
type Executor interface {
QueryImageList() ([]Image, error)
SubmitTask(imageId string, cmd string, envs []string, params []string, resourceId string) (Task, error)
QueryTask(taskId string) (Task, error)
QuerySpecs() (Spec, error)
}
type Image struct {
}
type Task struct {
}
type Spec struct {
}

View File

@ -0,0 +1,44 @@
package impl
import (
"gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/scheduler/service/collector"
"gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/scheduler/service/executor"
"gitlink.org.cn/jcce-pcm/pcm-participant-modelarts/client/imagesservice"
"gitlink.org.cn/jcce-pcm/pcm-participant-modelarts/client/modelartsservice"
)
type ModelArtsExecutor struct {
Name string
pageIndex int32
pageSize int32
ModelArtsRpc modelartsservice.ModelArtsService
ModelArtsImgRpc imagesservice.ImagesService
}
func NewModelartsExecutor(modelArtsRpc modelartsservice.ModelArtsService, modelArtsImgRpc imagesservice.ImagesService, name string) *ModelArtsExecutor {
return &ModelArtsExecutor{Name: name, ModelArtsRpc: modelArtsRpc, ModelArtsImgRpc: modelArtsImgRpc, pageIndex: 1, pageSize: 100}
}
func (m ModelArtsExecutor) QueryImageList() ([]executor.Image, error) {
//TODO implement me
panic("implement me")
}
func (m ModelArtsExecutor) SubmitTask(imageId string, cmd string, envs []string, params []string, resourceId string) (executor.Task, error) {
//TODO implement me
panic("implement me")
}
func (m ModelArtsExecutor) QueryTask(taskId string) (executor.Task, error) {
//TODO implement me
panic("implement me")
}
func (m ModelArtsExecutor) QuerySpecs() (executor.Spec, error) {
//TODO implement me
panic("implement me")
}
func (a *ModelArtsExecutor) GetResourceSpecs() (*collector.ResourceSpecs, error) {
return nil, nil
}

View File

@ -0,0 +1,42 @@
package impl
import (
"gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/scheduler/service/collector"
"gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/scheduler/service/executor"
"gitlink.org.cn/jcce-pcm/pcm-participant-octopus/octopusclient"
)
type OctopusExecutor struct {
Name string
pageIndex int32
pageSize int32
OctopusRpc octopusclient.Octopus
}
func NewOctopusExecutor(OctopusRpc octopusclient.Octopus, name string) *OctopusExecutor {
return &OctopusExecutor{OctopusRpc: OctopusRpc, Name: name, pageIndex: 1, pageSize: 100}
}
func (o OctopusExecutor) QueryImageList() ([]executor.Image, error) {
//TODO implement me
panic("implement me")
}
func (o OctopusExecutor) SubmitTask(imageId string, cmd string, envs []string, params []string, resourceId string) (executor.Task, error) {
//TODO implement me
panic("implement me")
}
func (o OctopusExecutor) QueryTask(taskId string) (executor.Task, error) {
//TODO implement me
panic("implement me")
}
func (o OctopusExecutor) QuerySpecs() (executor.Spec, error) {
//TODO implement me
panic("implement me")
}
func (a *OctopusExecutor) GetResourceSpecs() (*collector.ResourceSpecs, error) {
return nil, nil
}

View File

@ -0,0 +1,45 @@
package impl
import (
"gitlink.org.cn/jcce-pcm/pcm-ac/hpcacclient"
"gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/scheduler/service/collector"
"gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/scheduler/service/executor"
)
//单条作业费=作业运行秒数×(CPU核心数*CPU单价+GPU卡数×GPU单价+DCU卡数×DCU单价)/3600
//CPU单价=队列CPU费率×计算中心CPU单价
//GPU单价=队列GPU费率×计算中心GPU单价
//DCU单价=队列DCU费率×计算中心DCU单价
type ShuguangAiExecutor struct {
Name string
ACRpc hpcacclient.HpcAC
}
func NewShuguangAiExecutor(acRpc hpcacclient.HpcAC, name string) *ShuguangAiExecutor {
return &ShuguangAiExecutor{Name: name, ACRpc: acRpc}
}
func (s ShuguangAiExecutor) QueryImageList() ([]executor.Image, error) {
//TODO implement me
panic("implement me")
}
func (s ShuguangAiExecutor) SubmitTask(imageId string, cmd string, envs []string, params []string, resourceId string) (executor.Task, error) {
//TODO implement me
panic("implement me")
}
func (s ShuguangAiExecutor) QueryTask(taskId string) (executor.Task, error) {
//TODO implement me
panic("implement me")
}
func (s ShuguangAiExecutor) QuerySpecs() (executor.Spec, error) {
//TODO implement me
panic("implement me")
}
func (a *ShuguangAiExecutor) GetResourceSpecs() (*collector.ResourceSpecs, error) {
return nil, nil
}

View File

@ -0,0 +1,4 @@
package strategy
type DynamicResourcesStrategy struct {
}

View File

@ -1,4 +1,4 @@
package strategies
package strategy
import (
"github.com/pkg/errors"

View File

@ -12,7 +12,7 @@
*/
package strategies
package strategy
import (
"errors"

View File

@ -1,4 +1,4 @@
package strategies
package strategy
type StaticWeightStrategy struct {
// TODO: add fields

View File

@ -1,4 +1,4 @@
package strategies
package strategy
type Strategy interface {
Schedule() ([]*AssignedCluster, error)