From cf5d5792ee2cc61f7bbff1eb25fea66035408d13 Mon Sep 17 00:00:00 2001 From: tzwang Date: Wed, 17 Apr 2024 06:17:13 -0400 Subject: [PATCH] updated ai scheduler api Former-commit-id: ebe3c21d6d702b4caccc8c7ba9eccf2012d151bb --- api/desc/pcm.api | 4 +- api/desc/schedule/pcm-schedule.api | 8 +- api/internal/handler/routes.go | 4 +- .../schedule/schedulegetalgorithmslogic.go | 2 +- .../schedule/schedulegetdatasetslogic.go | 5 +- .../logic/schedule/schedulesubmitlogic.go | 1 + api/internal/types/types.go | 158 +++++++++++++++++- 7 files changed, 173 insertions(+), 9 deletions(-) diff --git a/api/desc/pcm.api b/api/desc/pcm.api index 39ec7845..786c5151 100644 --- a/api/desc/pcm.api +++ b/api/desc/pcm.api @@ -899,13 +899,13 @@ service pcm { get /schedule/ai/getTaskTypes returns (AiTaskTypesResp) @handler ScheduleGetDatasetsHandler - get /schedule/ai/getDatasets returns (AiDatasetsResp) + get /schedule/ai/getDatasets/:adapterId (AiDatasetsReq) returns (AiDatasetsResp) @handler ScheduleGetStrategyHandler get /schedule/ai/getStrategies returns (AiStrategyResp) @handler ScheduleGetAlgorithmsHandler - get /schedule/ai/getAlgorithms/:resourceType/:taskType/:dataset (AiAlgorithmsReq) returns (AiAlgorithmsResp) + get /schedule/ai/getAlgorithms/:adapterId/:resourceType/:taskType/:dataset (AiAlgorithmsReq) returns (AiAlgorithmsResp) @handler ScheduleSubmitHandler post /schedule/submit (ScheduleReq) returns (ScheduleResp) diff --git a/api/desc/schedule/pcm-schedule.api b/api/desc/schedule/pcm-schedule.api index d3537c1c..3eccf3e5 100644 --- a/api/desc/schedule/pcm-schedule.api +++ b/api/desc/schedule/pcm-schedule.api @@ -26,7 +26,8 @@ type ( AiOption { TaskName string `json:"taskName"` - AiClusterId string `json:"aiClusterId,optional"` + AdapterId string `json:"adapterId"` + AiClusterIds []string `json:"aiClusterIds"` ResourceType string `json:"resourceType"` Tops float64 `json:"Tops,optional"` TaskType string `json:"taskType"` @@ -47,6 +48,10 @@ type ( TaskTypes []string `json:"taskTypes"` } + AiDatasetsReq { + AdapterId string `path:"adapterId"` + } + AiDatasetsResp { Datasets []string `json:"datasets"` } @@ -56,6 +61,7 @@ type ( } AiAlgorithmsReq { + AdapterId string `path:"adapterId"` ResourceType string `path:"resourceType"` TaskType string `path:"taskType"` Dataset string `path:"dataset"` diff --git a/api/internal/handler/routes.go b/api/internal/handler/routes.go index e4316643..431b66dc 100644 --- a/api/internal/handler/routes.go +++ b/api/internal/handler/routes.go @@ -1112,7 +1112,7 @@ func RegisterHandlers(server *rest.Server, serverCtx *svc.ServiceContext) { }, { Method: http.MethodGet, - Path: "/schedule/ai/getDatasets", + Path: "/schedule/ai/getDatasets/:adapterId", Handler: schedule.ScheduleGetDatasetsHandler(serverCtx), }, { @@ -1122,7 +1122,7 @@ func RegisterHandlers(server *rest.Server, serverCtx *svc.ServiceContext) { }, { Method: http.MethodGet, - Path: "/schedule/ai/getAlgorithms/:resourceType/:taskType/:dataset", + Path: "/schedule/ai/getAlgorithms/:adapterId/:resourceType/:taskType/:dataset", Handler: schedule.ScheduleGetAlgorithmsHandler(serverCtx), }, { diff --git a/api/internal/logic/schedule/schedulegetalgorithmslogic.go b/api/internal/logic/schedule/schedulegetalgorithmslogic.go index 0f26d789..009c44e0 100644 --- a/api/internal/logic/schedule/schedulegetalgorithmslogic.go +++ b/api/internal/logic/schedule/schedulegetalgorithmslogic.go @@ -26,7 +26,7 @@ func NewScheduleGetAlgorithmsLogic(ctx context.Context, svcCtx *svc.ServiceConte func (l *ScheduleGetAlgorithmsLogic) ScheduleGetAlgorithms(req *types.AiAlgorithmsReq) (resp *types.AiAlgorithmsResp, err error) { resp = &types.AiAlgorithmsResp{} - algorithms, err := storeLink.GetAlgorithms(l.ctx, l.svcCtx.Scheduler.AiService.AiCollectorAdapterMap["1777144940459986944"], req.ResourceType, req.TaskType, req.Dataset) + algorithms, err := storeLink.GetAlgorithms(l.ctx, l.svcCtx.Scheduler.AiService.AiCollectorAdapterMap[req.AdapterId], req.ResourceType, req.TaskType, req.Dataset) if err != nil { return nil, err } diff --git a/api/internal/logic/schedule/schedulegetdatasetslogic.go b/api/internal/logic/schedule/schedulegetdatasetslogic.go index 777d7435..196f9a1a 100644 --- a/api/internal/logic/schedule/schedulegetdatasetslogic.go +++ b/api/internal/logic/schedule/schedulegetdatasetslogic.go @@ -3,6 +3,7 @@ package schedule import ( "context" "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/storeLink" + "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc" "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types" @@ -23,9 +24,9 @@ func NewScheduleGetDatasetsLogic(ctx context.Context, svcCtx *svc.ServiceContext } } -func (l *ScheduleGetDatasetsLogic) ScheduleGetDatasets() (resp *types.AiDatasetsResp, err error) { +func (l *ScheduleGetDatasetsLogic) ScheduleGetDatasets(req *types.AiDatasetsReq) (resp *types.AiDatasetsResp, err error) { resp = &types.AiDatasetsResp{} - names, err := storeLink.GetDatasetsNames(l.ctx, l.svcCtx.Scheduler.AiService.AiCollectorAdapterMap["1777144940459986944"]) + names, err := storeLink.GetDatasetsNames(l.ctx, l.svcCtx.Scheduler.AiService.AiCollectorAdapterMap[req.AdapterId]) if err != nil { return nil, err } diff --git a/api/internal/logic/schedule/schedulesubmitlogic.go b/api/internal/logic/schedule/schedulesubmitlogic.go index 6d070d4d..2b9956d1 100644 --- a/api/internal/logic/schedule/schedulesubmitlogic.go +++ b/api/internal/logic/schedule/schedulesubmitlogic.go @@ -27,6 +27,7 @@ func NewScheduleSubmitLogic(ctx context.Context, svcCtx *svc.ServiceContext) *Sc func (l *ScheduleSubmitLogic) ScheduleSubmit(req *types.ScheduleReq) (resp *types.ScheduleResp, err error) { resp = &types.ScheduleResp{} opt := &option.AiOption{ + AdapterId: req.AiOption.AdapterId, ResourceType: req.AiOption.ResourceType, Tops: req.AiOption.Tops, TaskType: req.AiOption.TaskType, diff --git a/api/internal/types/types.go b/api/internal/types/types.go index be9a3177..55ad50af 100644 --- a/api/internal/types/types.go +++ b/api/internal/types/types.go @@ -5278,7 +5278,8 @@ type ScheduleResult struct { type AiOption struct { TaskName string `json:"taskName"` - AiClusterId string `json:"aiClusterId,optional"` + AdapterId string `json:"adapterId"` + AiClusterIds []string `json:"aiClusterIds"` ResourceType string `json:"resourceType"` Tops float64 `json:"Tops,optional"` TaskType string `json:"taskType"` @@ -5299,6 +5300,10 @@ type AiTaskTypesResp struct { TaskTypes []string `json:"taskTypes"` } +type AiDatasetsReq struct { + AdapterId string `path:"adapterId"` +} + type AiDatasetsResp struct { Datasets []string `json:"datasets"` } @@ -5308,6 +5313,7 @@ type AiStrategyResp struct { } type AiAlgorithmsReq struct { + AdapterId string `path:"adapterId"` ResourceType string `path:"resourceType"` TaskType string `path:"taskType"` Dataset string `path:"dataset"` @@ -5317,6 +5323,156 @@ type AiAlgorithmsResp struct { Algorithms []string `json:"algorithms"` } +type PullTaskInfoReq struct { + AdapterId int64 `form:"adapterId"` +} + +type PullTaskInfoResp struct { + HpcInfoList []*HpcInfo `json:"HpcInfoList,omitempty"` + CloudInfoList []*CloudInfo `json:"CloudInfoList,omitempty"` + AiInfoList []*AiInfo `json:"AiInfoList,omitempty"` + VmInfoList []*VmInfo `json:"VmInfoList,omitempty"` +} + +type HpcInfo struct { + Id int64 `json:"id"` // id + TaskId int64 `json:"task_id"` // 任务id + JobId string `json:"job_id"` // 作业id(在第三方系统中的作业id) + AdapterId int64 `json:"adapter_id"` // 执行任务的适配器id + ClusterId int64 `json:"cluster_id"` // 执行任务的集群id + ClusterType string `json:"cluster_type"` // 执行任务的集群类型 + Name string `json:"name"` // 名称 + Status string `json:"status"` // 状态 + CmdScript string `json:"cmd_script"` + StartTime string `json:"start_time"` // 开始时间 + RunningTime int64 `json:"running_time"` // 运行时间 + DerivedEs string `json:"derived_es"` + Cluster string `json:"cluster"` + BlockId int64 `json:"block_id"` + AllocNodes int64 `json:"alloc_nodes"` + AllocCpu int64 `json:"alloc_cpu"` + CardCount int64 `json:"card_count"` // 卡数 + Version string `json:"version"` + Account string `json:"account"` + WorkDir string `json:"work_dir"` // 工作路径 + AssocId int64 `json:"assoc_id"` + ExitCode int64 `json:"exit_code"` + WallTime string `json:"wall_time"` // 最大运行时间 + Result string `json:"result"` // 运行结果 + DeletedAt string `json:"deleted_at"` // 删除时间 + YamlString string `json:"yaml_string"` + AppType string `json:"app_type"` // 应用类型 + AppName string `json:"app_name"` // 应用名称 + Queue string `json:"queue"` // 队列名称 + SubmitType string `json:"submit_type"` // cmd(命令行模式) + NNode string `json:"n_node"` // 节点个数(当指定该参数时,GAP_NODE_STRING必须为"") + StdOutFile string `json:"std_out_file"` // 工作路径/std.err.%j + StdErrFile string `json:"std_err_file"` // 工作路径/std.err.%j + StdInput string `json:"std_input"` + Environment string `json:"environment"` + DeletedFlag int64 `json:"deleted_flag"` // 是否删除(0-否,1-是) + CreatedBy int64 `json:"created_by"` // 创建人 + CreatedTime string `json:"created_time"` // 创建时间 + UpdatedBy int64 `json:"updated_by"` // 更新人 + UpdatedTime string `json:"updated_time"` // 更新时间 +} + +type CloudInfo struct { + Participant int64 `json:"participant,omitempty"` + Id int64 `json:"id,omitempty"` + TaskId int64 `json:"taskId,omitempty"` + ApiVersion string `json:"apiVersion,omitempty"` + Kind string `json:"kind,omitempty"` + Namespace string `json:"namespace,omitempty"` + Name string `json:"name,omitempty"` + Status string `json:"status,omitempty"` + StartTime string `json:"startTime,omitempty"` + RunningTime int64 `json:"runningTime,omitempty"` + Result string `json:"result,omitempty"` + YamlString string `json:"yamlString,omitempty"` +} + +type AiInfo struct { + ParticipantId int64 `json:"participantId,omitempty"` + TaskId int64 `json:"taskId,omitempty"` + ProjectId string `json:"project_id,omitempty"` + Name string `json:"name,omitempty"` + Status string `json:"status,omitempty"` + StartTime string `json:"startTime,omitempty"` + RunningTime int64 `json:"runningTime,omitempty"` + Result string `json:"result,omitempty"` + JobId string `json:"jobId,omitempty"` + CreateTime string `json:"createTime,omitempty"` + ImageUrl string `json:"imageUrl,omitempty"` + Command string `json:"command,omitempty"` + FlavorId string `json:"flavorId,omitempty"` + SubscriptionId string `json:"subscriptionId,omitempty"` + ItemVersionId string `json:"itemVersionId,omitempty"` +} + +type VmInfo struct { + ParticipantId int64 `json:"participantId,omitempty"` + TaskId int64 `json:"taskId,omitempty"` + Name string `json:"name,omitempty"` + FlavorRef string `json:"flavor_ref,omitempty"` + ImageRef string `json:"image_ref,omitempty"` + NetworkUuid string `json:"network_uuid,omitempty"` + BlockUuid string `json:"block_uuid,omitempty"` + SourceType string `json:"source_type,omitempty"` + DeleteOnTermination bool `json:"delete_on_termination,omitempty"` + Status string `json:"status,omitempty"` + MinCount string `json:"min_count,omitempty"` + Platform string `json:"platform,omitempty"` + Uuid string `json:"uuid,omitempty"` +} + +type PushTaskInfoReq struct { + AdapterId int64 `json:"adapterId"` + HpcInfoList []*HpcInfo `json:"hpcInfoList"` + CloudInfoList []*CloudInfo `json:"cloudInfoList"` + AiInfoList []*AiInfo `json:"aiInfoList"` + VmInfoList []*VmInfo `json:"vmInfoList"` +} + +type PushTaskInfoResp struct { + Code int64 `json:"code"` + Msg string `json:"msg"` +} + +type PushResourceInfoReq struct { + AdapterId int64 `json:"adapterId"` + ResourceStats []ResourceStats `json:"resourceStats"` +} + +type PushResourceInfoResp struct { + Code int64 `json:"code"` + Msg string `json:"msg"` +} + +type ResourceStats struct { + ClusterId int64 `json:"clusterId"` + Name string `json:"name"` + CpuCoreAvail int64 `json:"cpuCoreAvail"` + CpuCoreTotal int64 `json:"cpuCoreTotal"` + MemAvail float64 `json:"memAvail"` + MemTotal float64 `json:"memTotal"` + DiskAvail float64 `json:"diskAvail"` + DiskTotal float64 `json:"diskTotal"` + GpuAvail int64 `json:"gpuAvail"` + CardsAvail []*Card `json:"cardsAvail"` + CpuCoreHours float64 `json:"cpuCoreHours"` + Balance float64 `json:"balance"` +} + +type Card struct { + Platform string `json:"platform"` + Type string `json:"type"` + Name string `json:"name"` + TOpsAtFp16 float64 `json:"TOpsAtFp16"` + CardHours float64 `json:"cardHours"` + CardNum int32 `json:"cardNum"` +} + type CreateAlertRuleReq struct { CLusterId int64 `json:"clusterId"` ClusterName string `json:"clusterName"`