Merge pull request 'updated strategy parameters' (#17) from tzwang/pcm-coordinator:master into master
Former-commit-id: 44a3dfd14d9adbdab690b2c7c1e8be324ecf842f
This commit is contained in:
commit
75543eacae
|
@ -27,6 +27,7 @@ import (
|
||||||
"gorm.io/gorm"
|
"gorm.io/gorm"
|
||||||
"sigs.k8s.io/yaml"
|
"sigs.k8s.io/yaml"
|
||||||
"strings"
|
"strings"
|
||||||
|
"sync"
|
||||||
)
|
)
|
||||||
|
|
||||||
type Scheduler struct {
|
type Scheduler struct {
|
||||||
|
@ -38,7 +39,8 @@ type Scheduler struct {
|
||||||
participantRpc participantservice.ParticipantService
|
participantRpc participantservice.ParticipantService
|
||||||
ResourceCollector *map[string]collector.ResourceCollector
|
ResourceCollector *map[string]collector.ResourceCollector
|
||||||
Storages database.Storage
|
Storages database.Storage
|
||||||
AiExecutor *map[string]executor.Executor
|
AiExecutor *map[string]executor.AiExecutor
|
||||||
|
mu sync.RWMutex
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewScheduler(subSchedule common.SubSchedule, val string, dbEngin *gorm.DB, participantRpc participantservice.ParticipantService) (*Scheduler, error) {
|
func NewScheduler(subSchedule common.SubSchedule, val string, dbEngin *gorm.DB, participantRpc participantservice.ParticipantService) (*Scheduler, error) {
|
||||||
|
@ -50,7 +52,7 @@ func NewScheduler(subSchedule common.SubSchedule, val string, dbEngin *gorm.DB,
|
||||||
return &Scheduler{task: task, subSchedule: subSchedule, dbEngin: dbEngin, participantRpc: participantRpc}, nil
|
return &Scheduler{task: task, subSchedule: subSchedule, dbEngin: dbEngin, participantRpc: participantRpc}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewScheduler2(resourceCollector *map[string]collector.ResourceCollector, storages database.Storage, aiExecutor *map[string]executor.Executor) *Scheduler {
|
func NewScheduler2(resourceCollector *map[string]collector.ResourceCollector, storages database.Storage, aiExecutor *map[string]executor.AiExecutor) *Scheduler {
|
||||||
return &Scheduler{ResourceCollector: resourceCollector, Storages: storages, AiExecutor: aiExecutor}
|
return &Scheduler{ResourceCollector: resourceCollector, Storages: storages, AiExecutor: aiExecutor}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -19,6 +19,7 @@ import (
|
||||||
"gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/scheduler"
|
"gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/scheduler"
|
||||||
"gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/scheduler/algorithm/providerPricing"
|
"gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/scheduler/algorithm/providerPricing"
|
||||||
"gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/scheduler/entity"
|
"gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/scheduler/entity"
|
||||||
|
"gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/scheduler/schedulers/option"
|
||||||
"gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/scheduler/service/collector"
|
"gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/scheduler/service/collector"
|
||||||
"gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/scheduler/strategy"
|
"gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/scheduler/strategy"
|
||||||
"gitlink.org.cn/jcce-pcm/pcm-coordinator/api/pkg/response"
|
"gitlink.org.cn/jcce-pcm/pcm-coordinator/api/pkg/response"
|
||||||
|
@ -30,6 +31,7 @@ type AiScheduler struct {
|
||||||
yamlString string
|
yamlString string
|
||||||
task *response.TaskInfo
|
task *response.TaskInfo
|
||||||
*scheduler.Scheduler
|
*scheduler.Scheduler
|
||||||
|
option option.AiOption
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewAiScheduler(val string, scheduler *scheduler.Scheduler) (*AiScheduler, error) {
|
func NewAiScheduler(val string, scheduler *scheduler.Scheduler) (*AiScheduler, error) {
|
||||||
|
@ -48,7 +50,7 @@ func (as *AiScheduler) GetNewStructForDb(task *response.TaskInfo, resource strin
|
||||||
}
|
}
|
||||||
|
|
||||||
func (as *AiScheduler) PickOptimalStrategy() (strategy.Strategy, error) {
|
func (as *AiScheduler) PickOptimalStrategy() (strategy.Strategy, error) {
|
||||||
resources, err := as.findProvidersWithResource()
|
resources, err := as.findClustersWithResource()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
@ -61,7 +63,7 @@ func (as *AiScheduler) PickOptimalStrategy() (strategy.Strategy, error) {
|
||||||
Name: resource.Name,
|
Name: resource.Name,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
strategy := strategy.NewReplicationStrategy(nil, 0)
|
strategy := strategy.NewReplicationStrategy(nil)
|
||||||
return strategy, nil
|
return strategy, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -83,12 +85,19 @@ func (as *AiScheduler) AssignTask(clusters []*strategy.AssignedCluster) error {
|
||||||
return errors.New("clusters is nil")
|
return errors.New("clusters is nil")
|
||||||
}
|
}
|
||||||
|
|
||||||
_ = *as.AiExecutor
|
executorMap := *as.AiExecutor
|
||||||
|
for _, cluster := range clusters {
|
||||||
|
_, err := executorMap[cluster.Name].Execute(option.AiOption{})
|
||||||
|
if err != nil {
|
||||||
|
// TODO: database operation
|
||||||
|
}
|
||||||
|
// TODO: database operation
|
||||||
|
}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (as *AiScheduler) findProvidersWithResource() ([]*collector.ResourceSpecs, error) {
|
func (as *AiScheduler) findClustersWithResource() ([]*collector.ResourceSpecs, error) {
|
||||||
var resourceSpecs []*collector.ResourceSpecs
|
var resourceSpecs []*collector.ResourceSpecs
|
||||||
for _, resourceCollector := range *as.ResourceCollector {
|
for _, resourceCollector := range *as.ResourceCollector {
|
||||||
spec, err := resourceCollector.GetResourceSpecs()
|
spec, err := resourceCollector.GetResourceSpecs()
|
||||||
|
|
|
@ -0,0 +1,17 @@
|
||||||
|
package option
|
||||||
|
|
||||||
|
type AiOption struct {
|
||||||
|
aiType string // shuguangAi/octopus
|
||||||
|
resourceType string // cpu/gpu/compute card
|
||||||
|
taskType string // pytorch/tensorflow
|
||||||
|
|
||||||
|
imageId string
|
||||||
|
specId string
|
||||||
|
datasetsId string
|
||||||
|
codeId string
|
||||||
|
|
||||||
|
cmd string
|
||||||
|
|
||||||
|
datasets string
|
||||||
|
code string
|
||||||
|
}
|
|
@ -24,8 +24,8 @@ var (
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
func InitAiClusterMap(ctx context.Context, svcCtx *svc.ServiceContext) (*map[string]executor.Executor, *map[string]collector.ResourceCollector) {
|
func InitAiClusterMap(ctx context.Context, svcCtx *svc.ServiceContext) (*map[string]executor.AiExecutor, *map[string]collector.ResourceCollector) {
|
||||||
executorMap := make(map[string]executor.Executor)
|
executorMap := make(map[string]executor.AiExecutor)
|
||||||
collectorMap := make(map[string]collector.ResourceCollector)
|
collectorMap := make(map[string]collector.ResourceCollector)
|
||||||
for k, v := range AiTypeMap {
|
for k, v := range AiTypeMap {
|
||||||
switch v {
|
switch v {
|
||||||
|
|
|
@ -0,0 +1,11 @@
|
||||||
|
package executor
|
||||||
|
|
||||||
|
import (
|
||||||
|
"gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/scheduler/schedulers/option"
|
||||||
|
"gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/storeLink"
|
||||||
|
)
|
||||||
|
|
||||||
|
type AiExecutor interface {
|
||||||
|
Execute(option option.AiOption) (interface{}, error)
|
||||||
|
storeLink.Linkage
|
||||||
|
}
|
|
@ -1,8 +0,0 @@
|
||||||
package executor
|
|
||||||
|
|
||||||
type Executor interface {
|
|
||||||
QueryImageList() (interface{}, error)
|
|
||||||
SubmitTask(imageId string, cmd string, envs []string, params []string, resourceId string) (interface{}, error)
|
|
||||||
QueryTask(taskId string) (interface{}, error)
|
|
||||||
QuerySpecs() (interface{}, error)
|
|
||||||
}
|
|
|
@ -2,3 +2,7 @@ package strategy
|
||||||
|
|
||||||
type DynamicResourcesStrategy struct {
|
type DynamicResourcesStrategy struct {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (ps *DynamicResourcesStrategy) Schedule() ([]*AssignedCluster, error) {
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
|
|
@ -0,0 +1,9 @@
|
||||||
|
package params
|
||||||
|
|
||||||
|
import (
|
||||||
|
"gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/scheduler/service/collector"
|
||||||
|
)
|
||||||
|
|
||||||
|
type Params struct {
|
||||||
|
resources []*collector.ResourceSpecs
|
||||||
|
}
|
|
@ -0,0 +1,16 @@
|
||||||
|
package params
|
||||||
|
|
||||||
|
import "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/scheduler/entity"
|
||||||
|
|
||||||
|
type ReplicationOption struct {
|
||||||
|
replicas int32
|
||||||
|
participants []entity.Participant
|
||||||
|
}
|
||||||
|
|
||||||
|
func (o *ReplicationOption) GetReplicas() int32 {
|
||||||
|
return o.replicas
|
||||||
|
}
|
||||||
|
|
||||||
|
func (o *ReplicationOption) GetParticipants() []entity.Participant {
|
||||||
|
return o.participants
|
||||||
|
}
|
|
@ -0,0 +1,26 @@
|
||||||
|
package params
|
||||||
|
|
||||||
|
import (
|
||||||
|
"gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/scheduler/algorithm/providerPricing"
|
||||||
|
)
|
||||||
|
|
||||||
|
type ResourcePricingOption struct {
|
||||||
|
replicas int32
|
||||||
|
task *providerPricing.Task
|
||||||
|
providers []*providerPricing.Provider
|
||||||
|
*Params
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewResourcePricingOption(params *Params) *ResourcePricingOption {
|
||||||
|
return &ResourcePricingOption{
|
||||||
|
Params: params,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *ResourcePricingOption) GetReplicas() int32 {
|
||||||
|
return r.replicas
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *ResourcePricingOption) GetProviders() []*providerPricing.Provider {
|
||||||
|
return r.providers
|
||||||
|
}
|
|
@ -3,6 +3,7 @@ package strategy
|
||||||
import (
|
import (
|
||||||
"github.com/pkg/errors"
|
"github.com/pkg/errors"
|
||||||
"gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/scheduler/entity"
|
"gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/scheduler/entity"
|
||||||
|
"gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/scheduler/strategy/params"
|
||||||
)
|
)
|
||||||
|
|
||||||
type ReplicationStrategy struct {
|
type ReplicationStrategy struct {
|
||||||
|
@ -10,9 +11,9 @@ type ReplicationStrategy struct {
|
||||||
participants []entity.Participant
|
participants []entity.Participant
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewReplicationStrategy(participants []entity.Participant, replicas int32) *ReplicationStrategy {
|
func NewReplicationStrategy(params *params.ReplicationOption) *ReplicationStrategy {
|
||||||
return &ReplicationStrategy{replicas: replicas,
|
return &ReplicationStrategy{replicas: params.GetReplicas(),
|
||||||
participants: participants,
|
participants: params.GetParticipants(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -13,6 +13,12 @@ type StaticWeightStrategy struct {
|
||||||
weights []entity.WeightP
|
weights []entity.WeightP
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func NewStaticWeightStrategy(weights []entity.WeightP, replicas int32) *StaticWeightStrategy {
|
||||||
|
return &StaticWeightStrategy{weights: weights,
|
||||||
|
num: replicas,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func (ps *StaticWeightStrategy) Schedule() ([]*AssignedCluster, error) {
|
func (ps *StaticWeightStrategy) Schedule() ([]*AssignedCluster, error) {
|
||||||
// TODO: implement the scheduling logic return nil, nil
|
// TODO: implement the scheduling logic return nil, nil
|
||||||
|
|
||||||
|
|
|
@ -9,6 +9,3 @@ type AssignedCluster struct {
|
||||||
Name string
|
Name string
|
||||||
Replicas int32
|
Replicas int32
|
||||||
}
|
}
|
||||||
|
|
||||||
type Options struct {
|
|
||||||
}
|
|
||||||
|
|
|
@ -0,0 +1,90 @@
|
||||||
|
package test
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/scheduler/entity"
|
||||||
|
"gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/scheduler/strategy"
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestReplication(t *testing.T) {
|
||||||
|
parts := []entity.Participant{
|
||||||
|
{Name: "test1", Participant_id: 1},
|
||||||
|
{Name: "test2", Participant_id: 2},
|
||||||
|
{Name: "test3", Participant_id: 3},
|
||||||
|
}
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
replica int32
|
||||||
|
ps []entity.Participant
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "test1",
|
||||||
|
replica: 1,
|
||||||
|
ps: parts,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "test2",
|
||||||
|
replica: 2,
|
||||||
|
ps: parts,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
repl := strategy.NewReplicationStrategy(nil)
|
||||||
|
schedule, err := repl.Schedule()
|
||||||
|
if err != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
for _, cluster := range schedule {
|
||||||
|
fmt.Println(cluster)
|
||||||
|
}
|
||||||
|
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestStaticWeight(t *testing.T) {
|
||||||
|
parts := []entity.WeightP{
|
||||||
|
{Name: "p1", Participant_id: 1, Weight: 3},
|
||||||
|
{Name: "p2", Participant_id: 2, Weight: 5},
|
||||||
|
{Name: "p3", Participant_id: 3, Weight: 2},
|
||||||
|
}
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
replica int32
|
||||||
|
ps []entity.WeightP
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "test1",
|
||||||
|
replica: 1,
|
||||||
|
ps: parts,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "test2",
|
||||||
|
replica: 5,
|
||||||
|
ps: parts,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "test2",
|
||||||
|
replica: 6,
|
||||||
|
ps: parts,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
repl := strategy.NewStaticWeightStrategy(tt.ps, tt.replica)
|
||||||
|
schedule, err := repl.Schedule()
|
||||||
|
if err != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
for _, cluster := range schedule {
|
||||||
|
fmt.Println(cluster)
|
||||||
|
}
|
||||||
|
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
|
@ -16,6 +16,7 @@ package storeLink
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
|
"gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/scheduler/schedulers/option"
|
||||||
"gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/scheduler/service/collector"
|
"gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/scheduler/service/collector"
|
||||||
"gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/svc"
|
"gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/svc"
|
||||||
"gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/utils"
|
"gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/utils"
|
||||||
|
@ -151,3 +152,7 @@ func (o *ModelArtsLink) QuerySpecs() (interface{}, error) {
|
||||||
func (o *ModelArtsLink) GetResourceSpecs() (*collector.ResourceSpecs, error) {
|
func (o *ModelArtsLink) GetResourceSpecs() (*collector.ResourceSpecs, error) {
|
||||||
return nil, nil
|
return nil, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (o *ModelArtsLink) Execute(option option.AiOption) (interface{}, error) {
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
|
|
@ -16,6 +16,7 @@ package storeLink
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
|
"gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/scheduler/schedulers/option"
|
||||||
"gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/scheduler/service/collector"
|
"gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/scheduler/service/collector"
|
||||||
"gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/svc"
|
"gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/svc"
|
||||||
"gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/utils"
|
"gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/utils"
|
||||||
|
@ -198,3 +199,7 @@ func (o *OctopusLink) QuerySpecs() (interface{}, error) {
|
||||||
func (o *OctopusLink) GetResourceSpecs() (*collector.ResourceSpecs, error) {
|
func (o *OctopusLink) GetResourceSpecs() (*collector.ResourceSpecs, error) {
|
||||||
return nil, nil
|
return nil, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (o *OctopusLink) Execute(option option.AiOption) (interface{}, error) {
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
|
|
@ -18,6 +18,7 @@ import (
|
||||||
"context"
|
"context"
|
||||||
"errors"
|
"errors"
|
||||||
"gitlink.org.cn/jcce-pcm/pcm-ac/hpcAC"
|
"gitlink.org.cn/jcce-pcm/pcm-ac/hpcAC"
|
||||||
|
"gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/scheduler/schedulers/option"
|
||||||
"gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/scheduler/service/collector"
|
"gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/scheduler/service/collector"
|
||||||
"gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/svc"
|
"gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/svc"
|
||||||
"gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/utils"
|
"gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/utils"
|
||||||
|
@ -174,3 +175,7 @@ func (o *ShuguangAi) QuerySpecs() (interface{}, error) {
|
||||||
func (o *ShuguangAi) GetResourceSpecs() (*collector.ResourceSpecs, error) {
|
func (o *ShuguangAi) GetResourceSpecs() (*collector.ResourceSpecs, error) {
|
||||||
return nil, nil
|
return nil, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (o *ShuguangAi) Execute(option option.AiOption) (interface{}, error) {
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in New Issue