added ai center overview apis

Former-commit-id: 02ea4ae3c93275c3c76f29aecba6ac7be32ef555
This commit is contained in:
tzwang 2024-04-28 18:02:29 +08:00
parent 190c158322
commit 9bd725380e
4 changed files with 63 additions and 26 deletions

View File

@ -1697,6 +1697,44 @@ PayloadCreateTrainJob{
jobId string `json:"jobId,optional"`
}
********************/
/******************Ai Center overview*************************/
CenterOverviewResp {
CenterNum int32 `json:"totalCenters,optional"`
TaskNum int32 `json:"totalTasks,optional"`
CardNum int32 `json:"totalCards,optional"`
PowerInTops float64 `json:"totalPower,optional"`
}
CenterQueueingResp {
Current []*CenterQueue `json:"current,optional"`
History []*CenterQueue `json:"history,optional"`
}
CenterQueue {
Name string `json:"name,optional"`
QueueingNum int32 `json:"num,optional"`
}
CenterListResp {
List []*Center `json:"centerList,optional"`
}
Center {
Name string `json:"name,optional"`
StackName string `json:"stack,optional"`
Version string `json:"version,optional"`
}
CenterTaskListResp {
List []*AiTask `json:"taskList,optional"`
}
AiTask {
Name string `json:"name,optional"`
status string `json:"status,optional"`
TimeElapsed int32 `json:"elapsed,optional"`
}
)
/******************create TrainIngJob end*************************/

View File

@ -219,6 +219,22 @@ service pcm {
group: ai
)
service pcm {
@doc "智算中心概览"
@handler getCenterOverviewHandler
get /ai/getCenterOverview returns (CenterOverviewResp)
@doc "智算中心排队状况"
@handler getCenterQueueingHandler
get /ai/getCenterQueueing returns (CenterQueueingResp)
@doc "智算中心列表"
@handler getCenterListHandler
get /ai/getCenterList returns (CenterListResp)
@doc "智算中心任务列表"
@handler getCenterTaskListHandler
get /ai/getCenterTaskList returns (CenterTaskListResp)
@doc "查询数据集列表"
@handler listDataSetHandler
get /ai/listDataSet/:projectId (DataSetReq) returns (DataSetResp)
@ -927,6 +943,9 @@ service pcm {
@handler ScheduleSubmitHandler
post /schedule/submit (ScheduleReq) returns (ScheduleResp)
@handler ScheduleGetOverviewHandler
post /schedule/getOverview returns (ScheduleOverviewResp)
}
@server(

View File

@ -24,6 +24,9 @@ type (
Msg string `json:"msg"`
}
ScheduleOverviewResp {
}
AiOption {
TaskName string `json:"taskName"`
AdapterId string `json:"adapterId"`

View File

@ -129,42 +129,19 @@ func (s *Scheduler) TempAssign() error {
}
func (s *Scheduler) AssignAndSchedule(ss SubSchedule) (interface{}, error) {
//// 已指定 ParticipantId
//if s.task.ParticipantId != 0 {
// return nil
//}
//// 标签匹配以及后未找到ParticipantIds
//if len(s.participantIds) == 0 {
// return errors.New("未找到匹配的ParticipantIds")
//}
//
//// 指定或者标签匹配的结果只有一个集群,给任务信息指定
//if len(s.participantIds) == 1 {
// s.task.ParticipantId = s.participantIds[0]
// //replicas := s.task.Metadata.(map[string]interface{})["spec"].(map[string]interface{})["replicas"].(float64)
// //result := make(map[int64]string)
// //result[s.participantIds[0]] = strconv.FormatFloat(replicas, 'f', 2, 64)
// //s.result = result
//
// return nil
//}
//choose strategy
strategy, err := ss.PickOptimalStrategy()
if err != nil {
return nil, err
}
//schedule
clusters, err := strategy.Schedule()
if err != nil {
return nil, err
}
//集群数量不满足,指定到标签匹配后第一个集群
//if len(providerList) < 2 {
// s.task.ParticipantId = s.participantIds[0]
// return nil
//}
//assign tasks to clusters
resp, err := ss.AssignTask(clusters)
if err != nil {
return nil, err