Merge pull request 'updated acquire resources concurrently' (#86) from tzwang/pcm-coordinator:master into master

Former-commit-id: 2559e61a8deed328a47f5e640d2096ce11e8be9f
This commit is contained in:
tzwang 2024-03-28 17:37:49 +08:00
commit 3065d16756
2 changed files with 144 additions and 58 deletions

View File

@ -24,6 +24,7 @@ import (
"gitlink.org.cn/JointCloud/pcm-coordinator/api/pkg/response" "gitlink.org.cn/JointCloud/pcm-coordinator/api/pkg/response"
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/models" "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/models"
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/utils" "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/utils"
"sync"
) )
type AiScheduler struct { type AiScheduler struct {
@ -98,25 +99,60 @@ func (as *AiScheduler) AssignTask(clusters []*strategy.AssignedCluster) error {
executorMap := *as.AiExecutor executorMap := *as.AiExecutor
for _, cluster := range clusters { for _, cluster := range clusters {
_, err := executorMap[cluster.Name].Execute(as.option) c := cluster
if cluster.Replicas == 0 {
continue
}
go func() {
_, err := executorMap[c.Name].Execute(as.option)
if err != nil { if err != nil {
// TODO: database operation // TODO: database operation
} }
// TODO: database operation // TODO: database operation
}()
} }
return nil return nil
} }
func (as *AiScheduler) findClustersWithResources() ([]*collector.ResourceStats, error) { func (as *AiScheduler) findClustersWithResources() ([]*collector.ResourceStats, error) {
var wg sync.WaitGroup
var ch = make(chan *collector.ResourceStats, len(*as.ResourceCollector))
var errCh = make(chan error, len(*as.ResourceCollector))
var resourceSpecs []*collector.ResourceStats var resourceSpecs []*collector.ResourceStats
var errs []error
for _, resourceCollector := range *as.ResourceCollector { for _, resourceCollector := range *as.ResourceCollector {
spec, err := resourceCollector.GetResourceStats() wg.Add(1)
rc := resourceCollector
go func() {
spec, err := rc.GetResourceStats()
if err != nil { if err != nil {
continue errCh <- err
wg.Done()
return
} }
resourceSpecs = append(resourceSpecs, spec) ch <- spec
wg.Done()
}()
} }
wg.Wait()
close(ch)
close(errCh)
for s := range ch {
resourceSpecs = append(resourceSpecs, s)
}
for e := range errCh {
errs = append(errs, e)
}
if len(errs) != 0 {
return nil, errors.New("get resources failed")
}
if len(resourceSpecs) == 0 { if len(resourceSpecs) == 0 {
return nil, errors.New("no resource found") return nil, errors.New("no resource found")
} }

View File

@ -28,6 +28,7 @@ import (
"gitlink.org.cn/jcce-pcm/pcm-participant-octopus/octopus" "gitlink.org.cn/jcce-pcm/pcm-participant-octopus/octopus"
"gorm.io/gorm" "gorm.io/gorm"
"strings" "strings"
"sync"
) )
type Linkage interface { type Linkage interface {
@ -124,43 +125,75 @@ func GetResourceTypes() []string {
} }
func GetDatasetsNames(collectorMap *map[string]collector.AiCollector) ([]string, error) { func GetDatasetsNames(collectorMap *map[string]collector.AiCollector) ([]string, error) {
var wg sync.WaitGroup
var errCh = make(chan error, len(*collectorMap))
var errs []error
var names []string var names []string
//errCount := 0 var mu sync.Mutex
colMap := *collectorMap colMap := *collectorMap
for _, col := range colMap { for _, col := range colMap {
wg.Add(1)
c := col
go func() {
var ns []string var ns []string
specs, err := col.GetDatasetsSpecs() specs, err := c.GetDatasetsSpecs()
if err != nil { if err != nil {
return nil, errors.New("failed to acquire datasets list") errCh <- err
wg.Done()
return
} }
for _, spec := range specs { for _, spec := range specs {
ns = append(ns, spec.Name) ns = append(ns, spec.Name)
} }
if len(ns) == 0 { if len(ns) == 0 {
continue wg.Done()
return
} }
mu.Lock()
if len(names) == 0 { if len(names) == 0 {
names = ns names = ns
continue wg.Done()
mu.Unlock()
return
}
names = common.IntersectString(names, ns)
wg.Done()
mu.Unlock()
}()
}
wg.Wait()
close(errCh)
for e := range errCh {
errs = append(errs, e)
} }
names = common.IntersectString(names, ns) if len(errs) != 0 {
return nil, errors.New("get DatasetsNames failed")
} }
//if (len(*collectorMap) - errCount) < 2 {
//
//}
names = common.RemoveDuplicates(names) names = common.RemoveDuplicates(names)
return names, nil return names, nil
} }
func GetAlgorithms(collectorMap *map[string]collector.AiCollector, resourceType string, taskType string, dataset string) ([]string, error) { func GetAlgorithms(collectorMap *map[string]collector.AiCollector, resourceType string, taskType string, dataset string) ([]string, error) {
var names []string var names []string
var wg sync.WaitGroup
var errCh = make(chan error, len(*collectorMap))
var errs []error
var mu sync.Mutex
colMap := *collectorMap colMap := *collectorMap
for _, col := range colMap { for _, col := range colMap {
wg.Add(1)
c := col
go func() {
var ns []string var ns []string
algorithms, err := col.GetAlgorithms() algorithms, err := c.GetAlgorithms()
if err != nil { if err != nil {
return nil, err errCh <- err
wg.Done()
return
} }
for _, algorithm := range algorithms { for _, algorithm := range algorithms {
if algorithm.TaskType != taskType { if algorithm.TaskType != taskType {
@ -182,15 +215,32 @@ func GetAlgorithms(collectorMap *map[string]collector.AiCollector, resourceType
} }
} }
if len(ns) == 0 { if len(ns) == 0 {
continue wg.Done()
return
} }
mu.Lock()
if len(names) == 0 { if len(names) == 0 {
names = ns names = ns
continue wg.Done()
mu.Unlock()
return
}
names = common.IntersectString(names, ns)
wg.Done()
mu.Unlock()
}()
}
wg.Wait()
close(errCh)
for e := range errCh {
errs = append(errs, e)
} }
names = common.IntersectString(names, ns) if len(errs) != 0 {
return nil, errors.New("get Algorithms failed")
} }
names = common.RemoveDuplicates(names) names = common.RemoveDuplicates(names)
return names, nil return names, nil
} }