diff --git a/api/internal/logic/schedule/schedulesubmitlogic.go b/api/internal/logic/schedule/schedulesubmitlogic.go index a4704def..d5105390 100644 --- a/api/internal/logic/schedule/schedulesubmitlogic.go +++ b/api/internal/logic/schedule/schedulesubmitlogic.go @@ -28,11 +28,11 @@ func NewScheduleSubmitLogic(ctx context.Context, svcCtx *svc.ServiceContext) *Sc func (l *ScheduleSubmitLogic) ScheduleSubmit(req *types.ScheduleReq) (resp *types.ScheduleResp, err error) { resp = &types.ScheduleResp{} opt := &option.AiOption{ - ResourceType: req.AiOption.ResourceType, - Tops: 0, - TaskType: req.AiOption.TaskType, - DatasetsName: req.AiOption.Datasets, - AlgorithmName: "cnn", + ResourceType: req.AiOption.ResourceType, + Tops: 0, + TaskType: req.AiOption.TaskType, + DatasetsName: req.AiOption.Datasets, + //AlgorithmName: "cnn", StrategyName: req.AiOption.Strategy, ClusterToStaticWeight: nil, Params: []string{ diff --git a/api/internal/scheduler/database/aiStorage.go b/api/internal/scheduler/database/aiStorage.go index edcc5969..670aec91 100644 --- a/api/internal/scheduler/database/aiStorage.go +++ b/api/internal/scheduler/database/aiStorage.go @@ -2,7 +2,6 @@ package database import ( "github.com/zeromicro/go-zero/core/logx" - "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/strategy" "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types" "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/constants" "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/models" @@ -24,12 +23,12 @@ func (s *AiStorage) GetParticipants() (*types.ClusterListResp, error) { return &resp, nil } -func (s *AiStorage) SaveTask(cluster strategy.AssignedCluster) error { +func (s *AiStorage) SaveTask(name string) error { // 构建主任务结构体 taskModel := models.Task{ Status: constants.Saved, Description: "ai task", - Name: "testAi", + Name: name, CommitTime: time.Now(), } // 保存任务数据到数据库 diff --git a/api/internal/scheduler/schedulers/aiScheduler.go b/api/internal/scheduler/schedulers/aiScheduler.go index 4b62f427..8ff45161 100644 --- a/api/internal/scheduler/schedulers/aiScheduler.go +++ b/api/internal/scheduler/schedulers/aiScheduler.go @@ -100,6 +100,8 @@ func (as *AiScheduler) AssignTask(clusters []*strategy.AssignedCluster) ([]inter return nil, errors.New("clusters is nil") } + //res := struct { + //}{} var wg sync.WaitGroup var result []interface{} var errs []error @@ -115,6 +117,7 @@ func (as *AiScheduler) AssignTask(clusters []*strategy.AssignedCluster) ([]inter wg.Add(1) go func() { resp, err := executorMap[c.Name].Execute(as.ctx, as.option) + if err != nil { // TODO: database operation errCh <- err @@ -122,15 +125,20 @@ func (as *AiScheduler) AssignTask(clusters []*strategy.AssignedCluster) ([]inter return } // TODO: database operation - ch <- resp + data := struct { + Resp interface{} + ClusterId int64 + }{ + Resp: resp, + ClusterId: c.ParticipantId, + } + ch <- data wg.Done() }() } wg.Wait() - - for s := range ch { - result = append(result, s) - } + close(ch) + close(errCh) for e := range errCh { errs = append(errs, e) @@ -140,6 +148,19 @@ func (as *AiScheduler) AssignTask(clusters []*strategy.AssignedCluster) ([]inter return nil, errors.New("submit task failed") } + for s := range ch { + data := (s).(struct { + Resp interface{} + ClusterId int64 + }) + + result = append(result, data.Resp) + } + + err := as.AiStorages.SaveTask(as.option.TaskName) + if err != nil { + return nil, err + } return result, nil } diff --git a/api/internal/scheduler/strategy/dynamicResources.go b/api/internal/scheduler/strategy/dynamicResources.go index c8d4052f..bfb78263 100644 --- a/api/internal/scheduler/strategy/dynamicResources.go +++ b/api/internal/scheduler/strategy/dynamicResources.go @@ -49,7 +49,7 @@ func (ps *DynamicResourcesStrategy) Schedule() ([]*AssignedCluster, error) { if opt.ResourceType == "computeCard" { var maxCurrentCardHours float64 for _, card := range res.CardsAvail { - cardHours := common.RoundFloat(card.TOpsAtFp16*card.CardHours, 3) + cardHours := common.RoundFloat( /*card.TOpsAtFp16**/ card.CardHours, 3) if cardHours > maxCurrentCardHours { maxCurrentCardHours = cardHours } diff --git a/api/internal/storeLink/shuguangai.go b/api/internal/storeLink/shuguangai.go index e7da3222..a108d8b7 100644 --- a/api/internal/storeLink/shuguangai.go +++ b/api/internal/storeLink/shuguangai.go @@ -284,14 +284,14 @@ func (s *ShuguangAi) GetResourceStats(ctx context.Context) (*collector.ResourceS totalDcu := limitResp.Data.AccountMaxDcu //disk - diskReq := &hpcAC.ParaStorQuotaReq{} - diskResp, err := s.aCRpc.ParaStorQuota(ctx, diskReq) - if err != nil { - return nil, err - } - - totalDisk := common.RoundFloat(diskResp.Data[0].Threshold*KB*KB*KB, 3) - availDisk := common.RoundFloat((diskResp.Data[0].Threshold-diskResp.Data[0].Usage)*KB*KB*KB, 3) + //diskReq := &hpcAC.ParaStorQuotaReq{} + //diskResp, err := s.aCRpc.ParaStorQuota(ctx, diskReq) + //if err != nil { + // return nil, err + //} + // + //totalDisk := common.RoundFloat(diskResp.Data[0].Threshold*KB*KB*KB, 3) + //availDisk := common.RoundFloat((diskResp.Data[0].Threshold-diskResp.Data[0].Usage)*KB*KB*KB, 3) //memory nodeResp, err := s.aCRpc.GetNodeResources(ctx, nil) @@ -349,12 +349,12 @@ func (s *ShuguangAi) GetResourceStats(ctx context.Context) (*collector.ResourceS Balance: balance, CpuCoreTotal: totalCpu, CpuCoreAvail: CpuCoreAvail, - DiskTotal: totalDisk, - DiskAvail: availDisk, - MemTotal: memSize, - MemAvail: MemAvail, - CpuCoreHours: cpuHours, - CardsAvail: cards, + //DiskTotal: totalDisk, + //DiskAvail: availDisk, + MemTotal: memSize, + MemAvail: MemAvail, + CpuCoreHours: cpuHours, + CardsAvail: cards, } return resourceStats, nil @@ -381,7 +381,7 @@ func (s *ShuguangAi) GetAlgorithms(ctx context.Context) ([]*collector.Algorithm, var algorithms []*collector.Algorithm for _, t := range GetTaskTypes() { taskType := t - req := &hpcAC.GetFileListReq{Limit: 100, Path: ALGORITHM_DIR + FORWARD_SLASH + taskType, Start: 0} + req := &hpcAC.GetFileListReq{Limit: 100, Path: ALGORITHM_DIR + FORWARD_SLASH + taskType, Start: 0, Order: "asc", OrderBy: "name", KeyWord: ""} list, err := s.aCRpc.GetFileList(ctx, req) if err != nil { return nil, err