From 9bd725380e77ab2a8c421459dddac308fa245f9f Mon Sep 17 00:00:00 2001 From: tzwang Date: Sun, 28 Apr 2024 18:02:29 +0800 Subject: [PATCH] added ai center overview apis Former-commit-id: 02ea4ae3c93275c3c76f29aecba6ac7be32ef555 --- api/desc/ai/pcm-ai.api | 38 +++++++++++++++++++++++++++++ api/desc/pcm.api | 19 +++++++++++++++ api/desc/schedule/pcm-schedule.api | 3 +++ api/internal/scheduler/scheduler.go | 29 +++------------------- 4 files changed, 63 insertions(+), 26 deletions(-) diff --git a/api/desc/ai/pcm-ai.api b/api/desc/ai/pcm-ai.api index fcaf9d30..23cfff29 100644 --- a/api/desc/ai/pcm-ai.api +++ b/api/desc/ai/pcm-ai.api @@ -1697,6 +1697,44 @@ PayloadCreateTrainJob{ jobId string `json:"jobId,optional"` } ********************/ + + /******************Ai Center overview*************************/ + CenterOverviewResp { + CenterNum int32 `json:"totalCenters,optional"` + TaskNum int32 `json:"totalTasks,optional"` + CardNum int32 `json:"totalCards,optional"` + PowerInTops float64 `json:"totalPower,optional"` + } + + CenterQueueingResp { + Current []*CenterQueue `json:"current,optional"` + History []*CenterQueue `json:"history,optional"` + } + + CenterQueue { + Name string `json:"name,optional"` + QueueingNum int32 `json:"num,optional"` + } + + CenterListResp { + List []*Center `json:"centerList,optional"` + } + + Center { + Name string `json:"name,optional"` + StackName string `json:"stack,optional"` + Version string `json:"version,optional"` + } + + CenterTaskListResp { + List []*AiTask `json:"taskList,optional"` + } + + AiTask { + Name string `json:"name,optional"` + status string `json:"status,optional"` + TimeElapsed int32 `json:"elapsed,optional"` + } ) /******************create TrainIngJob end*************************/ diff --git a/api/desc/pcm.api b/api/desc/pcm.api index 12d338a0..c6734f1f 100644 --- a/api/desc/pcm.api +++ b/api/desc/pcm.api @@ -219,6 +219,22 @@ service pcm { group: ai ) service pcm { + @doc "智算中心概览" + @handler getCenterOverviewHandler + get /ai/getCenterOverview returns (CenterOverviewResp) + + @doc "智算中心排队状况" + @handler getCenterQueueingHandler + get /ai/getCenterQueueing returns (CenterQueueingResp) + + @doc "智算中心列表" + @handler getCenterListHandler + get /ai/getCenterList returns (CenterListResp) + + @doc "智算中心任务列表" + @handler getCenterTaskListHandler + get /ai/getCenterTaskList returns (CenterTaskListResp) + @doc "查询数据集列表" @handler listDataSetHandler get /ai/listDataSet/:projectId (DataSetReq) returns (DataSetResp) @@ -927,6 +943,9 @@ service pcm { @handler ScheduleSubmitHandler post /schedule/submit (ScheduleReq) returns (ScheduleResp) + + @handler ScheduleGetOverviewHandler + post /schedule/getOverview returns (ScheduleOverviewResp) } @server( diff --git a/api/desc/schedule/pcm-schedule.api b/api/desc/schedule/pcm-schedule.api index 02783746..a3068a25 100644 --- a/api/desc/schedule/pcm-schedule.api +++ b/api/desc/schedule/pcm-schedule.api @@ -24,6 +24,9 @@ type ( Msg string `json:"msg"` } + ScheduleOverviewResp { + } + AiOption { TaskName string `json:"taskName"` AdapterId string `json:"adapterId"` diff --git a/api/internal/scheduler/scheduler.go b/api/internal/scheduler/scheduler.go index d214e76a..bbdb1f23 100644 --- a/api/internal/scheduler/scheduler.go +++ b/api/internal/scheduler/scheduler.go @@ -129,42 +129,19 @@ func (s *Scheduler) TempAssign() error { } func (s *Scheduler) AssignAndSchedule(ss SubSchedule) (interface{}, error) { - //// 已指定 ParticipantId - //if s.task.ParticipantId != 0 { - // return nil - //} - //// 标签匹配以及后,未找到ParticipantIds - //if len(s.participantIds) == 0 { - // return errors.New("未找到匹配的ParticipantIds") - //} - // - //// 指定或者标签匹配的结果只有一个集群,给任务信息指定 - //if len(s.participantIds) == 1 { - // s.task.ParticipantId = s.participantIds[0] - // //replicas := s.task.Metadata.(map[string]interface{})["spec"].(map[string]interface{})["replicas"].(float64) - // //result := make(map[int64]string) - // //result[s.participantIds[0]] = strconv.FormatFloat(replicas, 'f', 2, 64) - // //s.result = result - // - // return nil - //} - + //choose strategy strategy, err := ss.PickOptimalStrategy() if err != nil { return nil, err } + //schedule clusters, err := strategy.Schedule() if err != nil { return nil, err } - //集群数量不满足,指定到标签匹配后第一个集群 - //if len(providerList) < 2 { - // s.task.ParticipantId = s.participantIds[0] - // return nil - //} - + //assign tasks to clusters resp, err := ss.AssignTask(clusters) if err != nil { return nil, err