Merge pull request 'updated acquire resources concurrently' (#86) from tzwang/pcm-coordinator:master into master
Former-commit-id: 2559e61a8deed328a47f5e640d2096ce11e8be9f
This commit is contained in:
commit
3065d16756
|
@ -24,6 +24,7 @@ import (
|
|||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/pkg/response"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/models"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/utils"
|
||||
"sync"
|
||||
)
|
||||
|
||||
type AiScheduler struct {
|
||||
|
@ -98,25 +99,60 @@ func (as *AiScheduler) AssignTask(clusters []*strategy.AssignedCluster) error {
|
|||
|
||||
executorMap := *as.AiExecutor
|
||||
for _, cluster := range clusters {
|
||||
_, err := executorMap[cluster.Name].Execute(as.option)
|
||||
if err != nil {
|
||||
// TODO: database operation
|
||||
c := cluster
|
||||
if cluster.Replicas == 0 {
|
||||
continue
|
||||
}
|
||||
// TODO: database operation
|
||||
go func() {
|
||||
_, err := executorMap[c.Name].Execute(as.option)
|
||||
if err != nil {
|
||||
// TODO: database operation
|
||||
}
|
||||
// TODO: database operation
|
||||
}()
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (as *AiScheduler) findClustersWithResources() ([]*collector.ResourceStats, error) {
|
||||
var wg sync.WaitGroup
|
||||
var ch = make(chan *collector.ResourceStats, len(*as.ResourceCollector))
|
||||
var errCh = make(chan error, len(*as.ResourceCollector))
|
||||
|
||||
var resourceSpecs []*collector.ResourceStats
|
||||
var errs []error
|
||||
|
||||
for _, resourceCollector := range *as.ResourceCollector {
|
||||
spec, err := resourceCollector.GetResourceStats()
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
resourceSpecs = append(resourceSpecs, spec)
|
||||
wg.Add(1)
|
||||
rc := resourceCollector
|
||||
go func() {
|
||||
spec, err := rc.GetResourceStats()
|
||||
if err != nil {
|
||||
errCh <- err
|
||||
wg.Done()
|
||||
return
|
||||
}
|
||||
ch <- spec
|
||||
wg.Done()
|
||||
}()
|
||||
}
|
||||
wg.Wait()
|
||||
close(ch)
|
||||
close(errCh)
|
||||
|
||||
for s := range ch {
|
||||
resourceSpecs = append(resourceSpecs, s)
|
||||
}
|
||||
|
||||
for e := range errCh {
|
||||
errs = append(errs, e)
|
||||
}
|
||||
|
||||
if len(errs) != 0 {
|
||||
return nil, errors.New("get resources failed")
|
||||
}
|
||||
|
||||
if len(resourceSpecs) == 0 {
|
||||
return nil, errors.New("no resource found")
|
||||
}
|
||||
|
|
|
@ -28,6 +28,7 @@ import (
|
|||
"gitlink.org.cn/jcce-pcm/pcm-participant-octopus/octopus"
|
||||
"gorm.io/gorm"
|
||||
"strings"
|
||||
"sync"
|
||||
)
|
||||
|
||||
type Linkage interface {
|
||||
|
@ -124,73 +125,122 @@ func GetResourceTypes() []string {
|
|||
}
|
||||
|
||||
func GetDatasetsNames(collectorMap *map[string]collector.AiCollector) ([]string, error) {
|
||||
var wg sync.WaitGroup
|
||||
var errCh = make(chan error, len(*collectorMap))
|
||||
var errs []error
|
||||
var names []string
|
||||
//errCount := 0
|
||||
var mu sync.Mutex
|
||||
colMap := *collectorMap
|
||||
for _, col := range colMap {
|
||||
var ns []string
|
||||
specs, err := col.GetDatasetsSpecs()
|
||||
if err != nil {
|
||||
return nil, errors.New("failed to acquire datasets list")
|
||||
}
|
||||
for _, spec := range specs {
|
||||
ns = append(ns, spec.Name)
|
||||
}
|
||||
if len(ns) == 0 {
|
||||
continue
|
||||
}
|
||||
if len(names) == 0 {
|
||||
names = ns
|
||||
continue
|
||||
}
|
||||
|
||||
names = common.IntersectString(names, ns)
|
||||
wg.Add(1)
|
||||
c := col
|
||||
go func() {
|
||||
var ns []string
|
||||
specs, err := c.GetDatasetsSpecs()
|
||||
if err != nil {
|
||||
errCh <- err
|
||||
wg.Done()
|
||||
return
|
||||
}
|
||||
for _, spec := range specs {
|
||||
ns = append(ns, spec.Name)
|
||||
}
|
||||
if len(ns) == 0 {
|
||||
wg.Done()
|
||||
return
|
||||
}
|
||||
mu.Lock()
|
||||
if len(names) == 0 {
|
||||
names = ns
|
||||
wg.Done()
|
||||
mu.Unlock()
|
||||
return
|
||||
}
|
||||
names = common.IntersectString(names, ns)
|
||||
wg.Done()
|
||||
mu.Unlock()
|
||||
}()
|
||||
}
|
||||
//if (len(*collectorMap) - errCount) < 2 {
|
||||
//
|
||||
//}
|
||||
wg.Wait()
|
||||
close(errCh)
|
||||
|
||||
for e := range errCh {
|
||||
errs = append(errs, e)
|
||||
}
|
||||
|
||||
if len(errs) != 0 {
|
||||
return nil, errors.New("get DatasetsNames failed")
|
||||
}
|
||||
|
||||
names = common.RemoveDuplicates(names)
|
||||
return names, nil
|
||||
}
|
||||
|
||||
func GetAlgorithms(collectorMap *map[string]collector.AiCollector, resourceType string, taskType string, dataset string) ([]string, error) {
|
||||
var names []string
|
||||
var wg sync.WaitGroup
|
||||
var errCh = make(chan error, len(*collectorMap))
|
||||
var errs []error
|
||||
var mu sync.Mutex
|
||||
|
||||
colMap := *collectorMap
|
||||
for _, col := range colMap {
|
||||
var ns []string
|
||||
algorithms, err := col.GetAlgorithms()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
for _, algorithm := range algorithms {
|
||||
if algorithm.TaskType != taskType {
|
||||
continue
|
||||
wg.Add(1)
|
||||
c := col
|
||||
go func() {
|
||||
var ns []string
|
||||
algorithms, err := c.GetAlgorithms()
|
||||
if err != nil {
|
||||
errCh <- err
|
||||
wg.Done()
|
||||
return
|
||||
}
|
||||
switch algorithm.Platform {
|
||||
case OCTOPUS:
|
||||
splitns := strings.Split(algorithm.Name, UNDERSCORE)
|
||||
if dataset != splitns[0] || len(splitns) == 1 {
|
||||
for _, algorithm := range algorithms {
|
||||
if algorithm.TaskType != taskType {
|
||||
continue
|
||||
}
|
||||
ns = append(ns, splitns[1])
|
||||
case SHUGUANGAI:
|
||||
splitns := strings.Split(algorithm.Name, DASH)
|
||||
if dataset != splitns[0] || len(splitns) == 1 {
|
||||
continue
|
||||
switch algorithm.Platform {
|
||||
case OCTOPUS:
|
||||
splitns := strings.Split(algorithm.Name, UNDERSCORE)
|
||||
if dataset != splitns[0] || len(splitns) == 1 {
|
||||
continue
|
||||
}
|
||||
ns = append(ns, splitns[1])
|
||||
case SHUGUANGAI:
|
||||
splitns := strings.Split(algorithm.Name, DASH)
|
||||
if dataset != splitns[0] || len(splitns) == 1 {
|
||||
continue
|
||||
}
|
||||
ns = append(ns, splitns[1])
|
||||
}
|
||||
ns = append(ns, splitns[1])
|
||||
}
|
||||
}
|
||||
if len(ns) == 0 {
|
||||
continue
|
||||
}
|
||||
if len(names) == 0 {
|
||||
names = ns
|
||||
continue
|
||||
}
|
||||
|
||||
names = common.IntersectString(names, ns)
|
||||
if len(ns) == 0 {
|
||||
wg.Done()
|
||||
return
|
||||
}
|
||||
mu.Lock()
|
||||
if len(names) == 0 {
|
||||
names = ns
|
||||
wg.Done()
|
||||
mu.Unlock()
|
||||
return
|
||||
}
|
||||
names = common.IntersectString(names, ns)
|
||||
wg.Done()
|
||||
mu.Unlock()
|
||||
}()
|
||||
}
|
||||
wg.Wait()
|
||||
close(errCh)
|
||||
|
||||
for e := range errCh {
|
||||
errs = append(errs, e)
|
||||
}
|
||||
|
||||
if len(errs) != 0 {
|
||||
return nil, errors.New("get Algorithms failed")
|
||||
}
|
||||
|
||||
names = common.RemoveDuplicates(names)
|
||||
return names, nil
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue