fix getresources and dynamicResources strategy bugs
Former-commit-id: fb05379230be7708ed91f5c21254af271a2b9237
This commit is contained in:
parent
cc0b729639
commit
db55f5e5a3
|
@ -28,11 +28,11 @@ func NewScheduleSubmitLogic(ctx context.Context, svcCtx *svc.ServiceContext) *Sc
|
|||
func (l *ScheduleSubmitLogic) ScheduleSubmit(req *types.ScheduleReq) (resp *types.ScheduleResp, err error) {
|
||||
resp = &types.ScheduleResp{}
|
||||
opt := &option.AiOption{
|
||||
ResourceType: req.AiOption.ResourceType,
|
||||
Tops: 0,
|
||||
TaskType: req.AiOption.TaskType,
|
||||
DatasetsName: req.AiOption.Datasets,
|
||||
AlgorithmName: "cnn",
|
||||
ResourceType: req.AiOption.ResourceType,
|
||||
Tops: 0,
|
||||
TaskType: req.AiOption.TaskType,
|
||||
DatasetsName: req.AiOption.Datasets,
|
||||
//AlgorithmName: "cnn",
|
||||
StrategyName: req.AiOption.Strategy,
|
||||
ClusterToStaticWeight: nil,
|
||||
Params: []string{
|
||||
|
|
|
@ -2,7 +2,6 @@ package database
|
|||
|
||||
import (
|
||||
"github.com/zeromicro/go-zero/core/logx"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/strategy"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/constants"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/models"
|
||||
|
@ -24,12 +23,12 @@ func (s *AiStorage) GetParticipants() (*types.ClusterListResp, error) {
|
|||
return &resp, nil
|
||||
}
|
||||
|
||||
func (s *AiStorage) SaveTask(cluster strategy.AssignedCluster) error {
|
||||
func (s *AiStorage) SaveTask(name string) error {
|
||||
// 构建主任务结构体
|
||||
taskModel := models.Task{
|
||||
Status: constants.Saved,
|
||||
Description: "ai task",
|
||||
Name: "testAi",
|
||||
Name: name,
|
||||
CommitTime: time.Now(),
|
||||
}
|
||||
// 保存任务数据到数据库
|
||||
|
|
|
@ -100,6 +100,8 @@ func (as *AiScheduler) AssignTask(clusters []*strategy.AssignedCluster) ([]inter
|
|||
return nil, errors.New("clusters is nil")
|
||||
}
|
||||
|
||||
//res := struct {
|
||||
//}{}
|
||||
var wg sync.WaitGroup
|
||||
var result []interface{}
|
||||
var errs []error
|
||||
|
@ -115,6 +117,7 @@ func (as *AiScheduler) AssignTask(clusters []*strategy.AssignedCluster) ([]inter
|
|||
wg.Add(1)
|
||||
go func() {
|
||||
resp, err := executorMap[c.Name].Execute(as.ctx, as.option)
|
||||
|
||||
if err != nil {
|
||||
// TODO: database operation
|
||||
errCh <- err
|
||||
|
@ -122,15 +125,20 @@ func (as *AiScheduler) AssignTask(clusters []*strategy.AssignedCluster) ([]inter
|
|||
return
|
||||
}
|
||||
// TODO: database operation
|
||||
ch <- resp
|
||||
data := struct {
|
||||
Resp interface{}
|
||||
ClusterId int64
|
||||
}{
|
||||
Resp: resp,
|
||||
ClusterId: c.ParticipantId,
|
||||
}
|
||||
ch <- data
|
||||
wg.Done()
|
||||
}()
|
||||
}
|
||||
wg.Wait()
|
||||
|
||||
for s := range ch {
|
||||
result = append(result, s)
|
||||
}
|
||||
close(ch)
|
||||
close(errCh)
|
||||
|
||||
for e := range errCh {
|
||||
errs = append(errs, e)
|
||||
|
@ -140,6 +148,19 @@ func (as *AiScheduler) AssignTask(clusters []*strategy.AssignedCluster) ([]inter
|
|||
return nil, errors.New("submit task failed")
|
||||
}
|
||||
|
||||
for s := range ch {
|
||||
data := (s).(struct {
|
||||
Resp interface{}
|
||||
ClusterId int64
|
||||
})
|
||||
|
||||
result = append(result, data.Resp)
|
||||
}
|
||||
|
||||
err := as.AiStorages.SaveTask(as.option.TaskName)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return result, nil
|
||||
}
|
||||
|
||||
|
|
|
@ -49,7 +49,7 @@ func (ps *DynamicResourcesStrategy) Schedule() ([]*AssignedCluster, error) {
|
|||
if opt.ResourceType == "computeCard" {
|
||||
var maxCurrentCardHours float64
|
||||
for _, card := range res.CardsAvail {
|
||||
cardHours := common.RoundFloat(card.TOpsAtFp16*card.CardHours, 3)
|
||||
cardHours := common.RoundFloat( /*card.TOpsAtFp16**/ card.CardHours, 3)
|
||||
if cardHours > maxCurrentCardHours {
|
||||
maxCurrentCardHours = cardHours
|
||||
}
|
||||
|
|
|
@ -284,14 +284,14 @@ func (s *ShuguangAi) GetResourceStats(ctx context.Context) (*collector.ResourceS
|
|||
totalDcu := limitResp.Data.AccountMaxDcu
|
||||
|
||||
//disk
|
||||
diskReq := &hpcAC.ParaStorQuotaReq{}
|
||||
diskResp, err := s.aCRpc.ParaStorQuota(ctx, diskReq)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
totalDisk := common.RoundFloat(diskResp.Data[0].Threshold*KB*KB*KB, 3)
|
||||
availDisk := common.RoundFloat((diskResp.Data[0].Threshold-diskResp.Data[0].Usage)*KB*KB*KB, 3)
|
||||
//diskReq := &hpcAC.ParaStorQuotaReq{}
|
||||
//diskResp, err := s.aCRpc.ParaStorQuota(ctx, diskReq)
|
||||
//if err != nil {
|
||||
// return nil, err
|
||||
//}
|
||||
//
|
||||
//totalDisk := common.RoundFloat(diskResp.Data[0].Threshold*KB*KB*KB, 3)
|
||||
//availDisk := common.RoundFloat((diskResp.Data[0].Threshold-diskResp.Data[0].Usage)*KB*KB*KB, 3)
|
||||
|
||||
//memory
|
||||
nodeResp, err := s.aCRpc.GetNodeResources(ctx, nil)
|
||||
|
@ -349,12 +349,12 @@ func (s *ShuguangAi) GetResourceStats(ctx context.Context) (*collector.ResourceS
|
|||
Balance: balance,
|
||||
CpuCoreTotal: totalCpu,
|
||||
CpuCoreAvail: CpuCoreAvail,
|
||||
DiskTotal: totalDisk,
|
||||
DiskAvail: availDisk,
|
||||
MemTotal: memSize,
|
||||
MemAvail: MemAvail,
|
||||
CpuCoreHours: cpuHours,
|
||||
CardsAvail: cards,
|
||||
//DiskTotal: totalDisk,
|
||||
//DiskAvail: availDisk,
|
||||
MemTotal: memSize,
|
||||
MemAvail: MemAvail,
|
||||
CpuCoreHours: cpuHours,
|
||||
CardsAvail: cards,
|
||||
}
|
||||
|
||||
return resourceStats, nil
|
||||
|
@ -381,7 +381,7 @@ func (s *ShuguangAi) GetAlgorithms(ctx context.Context) ([]*collector.Algorithm,
|
|||
var algorithms []*collector.Algorithm
|
||||
for _, t := range GetTaskTypes() {
|
||||
taskType := t
|
||||
req := &hpcAC.GetFileListReq{Limit: 100, Path: ALGORITHM_DIR + FORWARD_SLASH + taskType, Start: 0}
|
||||
req := &hpcAC.GetFileListReq{Limit: 100, Path: ALGORITHM_DIR + FORWARD_SLASH + taskType, Start: 0, Order: "asc", OrderBy: "name", KeyWord: ""}
|
||||
list, err := s.aCRpc.GetFileList(ctx, req)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
|
|
Loading…
Reference in New Issue