updated ai scheduler api

Former-commit-id: ebe3c21d6d702b4caccc8c7ba9eccf2012d151bb
This commit is contained in:
tzwang 2024-04-17 06:17:13 -04:00
parent ea7216e153
commit cf5d5792ee
7 changed files with 173 additions and 9 deletions

View File

@ -899,13 +899,13 @@ service pcm {
get /schedule/ai/getTaskTypes returns (AiTaskTypesResp) get /schedule/ai/getTaskTypes returns (AiTaskTypesResp)
@handler ScheduleGetDatasetsHandler @handler ScheduleGetDatasetsHandler
get /schedule/ai/getDatasets returns (AiDatasetsResp) get /schedule/ai/getDatasets/:adapterId (AiDatasetsReq) returns (AiDatasetsResp)
@handler ScheduleGetStrategyHandler @handler ScheduleGetStrategyHandler
get /schedule/ai/getStrategies returns (AiStrategyResp) get /schedule/ai/getStrategies returns (AiStrategyResp)
@handler ScheduleGetAlgorithmsHandler @handler ScheduleGetAlgorithmsHandler
get /schedule/ai/getAlgorithms/:resourceType/:taskType/:dataset (AiAlgorithmsReq) returns (AiAlgorithmsResp) get /schedule/ai/getAlgorithms/:adapterId/:resourceType/:taskType/:dataset (AiAlgorithmsReq) returns (AiAlgorithmsResp)
@handler ScheduleSubmitHandler @handler ScheduleSubmitHandler
post /schedule/submit (ScheduleReq) returns (ScheduleResp) post /schedule/submit (ScheduleReq) returns (ScheduleResp)

View File

@ -26,7 +26,8 @@ type (
AiOption { AiOption {
TaskName string `json:"taskName"` TaskName string `json:"taskName"`
AiClusterId string `json:"aiClusterId,optional"` AdapterId string `json:"adapterId"`
AiClusterIds []string `json:"aiClusterIds"`
ResourceType string `json:"resourceType"` ResourceType string `json:"resourceType"`
Tops float64 `json:"Tops,optional"` Tops float64 `json:"Tops,optional"`
TaskType string `json:"taskType"` TaskType string `json:"taskType"`
@ -47,6 +48,10 @@ type (
TaskTypes []string `json:"taskTypes"` TaskTypes []string `json:"taskTypes"`
} }
AiDatasetsReq {
AdapterId string `path:"adapterId"`
}
AiDatasetsResp { AiDatasetsResp {
Datasets []string `json:"datasets"` Datasets []string `json:"datasets"`
} }
@ -56,6 +61,7 @@ type (
} }
AiAlgorithmsReq { AiAlgorithmsReq {
AdapterId string `path:"adapterId"`
ResourceType string `path:"resourceType"` ResourceType string `path:"resourceType"`
TaskType string `path:"taskType"` TaskType string `path:"taskType"`
Dataset string `path:"dataset"` Dataset string `path:"dataset"`

View File

@ -1112,7 +1112,7 @@ func RegisterHandlers(server *rest.Server, serverCtx *svc.ServiceContext) {
}, },
{ {
Method: http.MethodGet, Method: http.MethodGet,
Path: "/schedule/ai/getDatasets", Path: "/schedule/ai/getDatasets/:adapterId",
Handler: schedule.ScheduleGetDatasetsHandler(serverCtx), Handler: schedule.ScheduleGetDatasetsHandler(serverCtx),
}, },
{ {
@ -1122,7 +1122,7 @@ func RegisterHandlers(server *rest.Server, serverCtx *svc.ServiceContext) {
}, },
{ {
Method: http.MethodGet, Method: http.MethodGet,
Path: "/schedule/ai/getAlgorithms/:resourceType/:taskType/:dataset", Path: "/schedule/ai/getAlgorithms/:adapterId/:resourceType/:taskType/:dataset",
Handler: schedule.ScheduleGetAlgorithmsHandler(serverCtx), Handler: schedule.ScheduleGetAlgorithmsHandler(serverCtx),
}, },
{ {

View File

@ -26,7 +26,7 @@ func NewScheduleGetAlgorithmsLogic(ctx context.Context, svcCtx *svc.ServiceConte
func (l *ScheduleGetAlgorithmsLogic) ScheduleGetAlgorithms(req *types.AiAlgorithmsReq) (resp *types.AiAlgorithmsResp, err error) { func (l *ScheduleGetAlgorithmsLogic) ScheduleGetAlgorithms(req *types.AiAlgorithmsReq) (resp *types.AiAlgorithmsResp, err error) {
resp = &types.AiAlgorithmsResp{} resp = &types.AiAlgorithmsResp{}
algorithms, err := storeLink.GetAlgorithms(l.ctx, l.svcCtx.Scheduler.AiService.AiCollectorAdapterMap["1777144940459986944"], req.ResourceType, req.TaskType, req.Dataset) algorithms, err := storeLink.GetAlgorithms(l.ctx, l.svcCtx.Scheduler.AiService.AiCollectorAdapterMap[req.AdapterId], req.ResourceType, req.TaskType, req.Dataset)
if err != nil { if err != nil {
return nil, err return nil, err
} }

View File

@ -3,6 +3,7 @@ package schedule
import ( import (
"context" "context"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/storeLink" "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/storeLink"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc" "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types" "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types"
@ -23,9 +24,9 @@ func NewScheduleGetDatasetsLogic(ctx context.Context, svcCtx *svc.ServiceContext
} }
} }
func (l *ScheduleGetDatasetsLogic) ScheduleGetDatasets() (resp *types.AiDatasetsResp, err error) { func (l *ScheduleGetDatasetsLogic) ScheduleGetDatasets(req *types.AiDatasetsReq) (resp *types.AiDatasetsResp, err error) {
resp = &types.AiDatasetsResp{} resp = &types.AiDatasetsResp{}
names, err := storeLink.GetDatasetsNames(l.ctx, l.svcCtx.Scheduler.AiService.AiCollectorAdapterMap["1777144940459986944"]) names, err := storeLink.GetDatasetsNames(l.ctx, l.svcCtx.Scheduler.AiService.AiCollectorAdapterMap[req.AdapterId])
if err != nil { if err != nil {
return nil, err return nil, err
} }

View File

@ -27,6 +27,7 @@ func NewScheduleSubmitLogic(ctx context.Context, svcCtx *svc.ServiceContext) *Sc
func (l *ScheduleSubmitLogic) ScheduleSubmit(req *types.ScheduleReq) (resp *types.ScheduleResp, err error) { func (l *ScheduleSubmitLogic) ScheduleSubmit(req *types.ScheduleReq) (resp *types.ScheduleResp, err error) {
resp = &types.ScheduleResp{} resp = &types.ScheduleResp{}
opt := &option.AiOption{ opt := &option.AiOption{
AdapterId: req.AiOption.AdapterId,
ResourceType: req.AiOption.ResourceType, ResourceType: req.AiOption.ResourceType,
Tops: req.AiOption.Tops, Tops: req.AiOption.Tops,
TaskType: req.AiOption.TaskType, TaskType: req.AiOption.TaskType,

View File

@ -5278,7 +5278,8 @@ type ScheduleResult struct {
type AiOption struct { type AiOption struct {
TaskName string `json:"taskName"` TaskName string `json:"taskName"`
AiClusterId string `json:"aiClusterId,optional"` AdapterId string `json:"adapterId"`
AiClusterIds []string `json:"aiClusterIds"`
ResourceType string `json:"resourceType"` ResourceType string `json:"resourceType"`
Tops float64 `json:"Tops,optional"` Tops float64 `json:"Tops,optional"`
TaskType string `json:"taskType"` TaskType string `json:"taskType"`
@ -5299,6 +5300,10 @@ type AiTaskTypesResp struct {
TaskTypes []string `json:"taskTypes"` TaskTypes []string `json:"taskTypes"`
} }
type AiDatasetsReq struct {
AdapterId string `path:"adapterId"`
}
type AiDatasetsResp struct { type AiDatasetsResp struct {
Datasets []string `json:"datasets"` Datasets []string `json:"datasets"`
} }
@ -5308,6 +5313,7 @@ type AiStrategyResp struct {
} }
type AiAlgorithmsReq struct { type AiAlgorithmsReq struct {
AdapterId string `path:"adapterId"`
ResourceType string `path:"resourceType"` ResourceType string `path:"resourceType"`
TaskType string `path:"taskType"` TaskType string `path:"taskType"`
Dataset string `path:"dataset"` Dataset string `path:"dataset"`
@ -5317,6 +5323,156 @@ type AiAlgorithmsResp struct {
Algorithms []string `json:"algorithms"` Algorithms []string `json:"algorithms"`
} }
type PullTaskInfoReq struct {
AdapterId int64 `form:"adapterId"`
}
type PullTaskInfoResp struct {
HpcInfoList []*HpcInfo `json:"HpcInfoList,omitempty"`
CloudInfoList []*CloudInfo `json:"CloudInfoList,omitempty"`
AiInfoList []*AiInfo `json:"AiInfoList,omitempty"`
VmInfoList []*VmInfo `json:"VmInfoList,omitempty"`
}
type HpcInfo struct {
Id int64 `json:"id"` // id
TaskId int64 `json:"task_id"` // 任务id
JobId string `json:"job_id"` // 作业id(在第三方系统中的作业id)
AdapterId int64 `json:"adapter_id"` // 执行任务的适配器id
ClusterId int64 `json:"cluster_id"` // 执行任务的集群id
ClusterType string `json:"cluster_type"` // 执行任务的集群类型
Name string `json:"name"` // 名称
Status string `json:"status"` // 状态
CmdScript string `json:"cmd_script"`
StartTime string `json:"start_time"` // 开始时间
RunningTime int64 `json:"running_time"` // 运行时间
DerivedEs string `json:"derived_es"`
Cluster string `json:"cluster"`
BlockId int64 `json:"block_id"`
AllocNodes int64 `json:"alloc_nodes"`
AllocCpu int64 `json:"alloc_cpu"`
CardCount int64 `json:"card_count"` // 卡数
Version string `json:"version"`
Account string `json:"account"`
WorkDir string `json:"work_dir"` // 工作路径
AssocId int64 `json:"assoc_id"`
ExitCode int64 `json:"exit_code"`
WallTime string `json:"wall_time"` // 最大运行时间
Result string `json:"result"` // 运行结果
DeletedAt string `json:"deleted_at"` // 删除时间
YamlString string `json:"yaml_string"`
AppType string `json:"app_type"` // 应用类型
AppName string `json:"app_name"` // 应用名称
Queue string `json:"queue"` // 队列名称
SubmitType string `json:"submit_type"` // cmd命令行模式
NNode string `json:"n_node"` // 节点个数当指定该参数时GAP_NODE_STRING必须为""
StdOutFile string `json:"std_out_file"` // 工作路径/std.err.%j
StdErrFile string `json:"std_err_file"` // 工作路径/std.err.%j
StdInput string `json:"std_input"`
Environment string `json:"environment"`
DeletedFlag int64 `json:"deleted_flag"` // 是否删除0-否1-是)
CreatedBy int64 `json:"created_by"` // 创建人
CreatedTime string `json:"created_time"` // 创建时间
UpdatedBy int64 `json:"updated_by"` // 更新人
UpdatedTime string `json:"updated_time"` // 更新时间
}
type CloudInfo struct {
Participant int64 `json:"participant,omitempty"`
Id int64 `json:"id,omitempty"`
TaskId int64 `json:"taskId,omitempty"`
ApiVersion string `json:"apiVersion,omitempty"`
Kind string `json:"kind,omitempty"`
Namespace string `json:"namespace,omitempty"`
Name string `json:"name,omitempty"`
Status string `json:"status,omitempty"`
StartTime string `json:"startTime,omitempty"`
RunningTime int64 `json:"runningTime,omitempty"`
Result string `json:"result,omitempty"`
YamlString string `json:"yamlString,omitempty"`
}
type AiInfo struct {
ParticipantId int64 `json:"participantId,omitempty"`
TaskId int64 `json:"taskId,omitempty"`
ProjectId string `json:"project_id,omitempty"`
Name string `json:"name,omitempty"`
Status string `json:"status,omitempty"`
StartTime string `json:"startTime,omitempty"`
RunningTime int64 `json:"runningTime,omitempty"`
Result string `json:"result,omitempty"`
JobId string `json:"jobId,omitempty"`
CreateTime string `json:"createTime,omitempty"`
ImageUrl string `json:"imageUrl,omitempty"`
Command string `json:"command,omitempty"`
FlavorId string `json:"flavorId,omitempty"`
SubscriptionId string `json:"subscriptionId,omitempty"`
ItemVersionId string `json:"itemVersionId,omitempty"`
}
type VmInfo struct {
ParticipantId int64 `json:"participantId,omitempty"`
TaskId int64 `json:"taskId,omitempty"`
Name string `json:"name,omitempty"`
FlavorRef string `json:"flavor_ref,omitempty"`
ImageRef string `json:"image_ref,omitempty"`
NetworkUuid string `json:"network_uuid,omitempty"`
BlockUuid string `json:"block_uuid,omitempty"`
SourceType string `json:"source_type,omitempty"`
DeleteOnTermination bool `json:"delete_on_termination,omitempty"`
Status string `json:"status,omitempty"`
MinCount string `json:"min_count,omitempty"`
Platform string `json:"platform,omitempty"`
Uuid string `json:"uuid,omitempty"`
}
type PushTaskInfoReq struct {
AdapterId int64 `json:"adapterId"`
HpcInfoList []*HpcInfo `json:"hpcInfoList"`
CloudInfoList []*CloudInfo `json:"cloudInfoList"`
AiInfoList []*AiInfo `json:"aiInfoList"`
VmInfoList []*VmInfo `json:"vmInfoList"`
}
type PushTaskInfoResp struct {
Code int64 `json:"code"`
Msg string `json:"msg"`
}
type PushResourceInfoReq struct {
AdapterId int64 `json:"adapterId"`
ResourceStats []ResourceStats `json:"resourceStats"`
}
type PushResourceInfoResp struct {
Code int64 `json:"code"`
Msg string `json:"msg"`
}
type ResourceStats struct {
ClusterId int64 `json:"clusterId"`
Name string `json:"name"`
CpuCoreAvail int64 `json:"cpuCoreAvail"`
CpuCoreTotal int64 `json:"cpuCoreTotal"`
MemAvail float64 `json:"memAvail"`
MemTotal float64 `json:"memTotal"`
DiskAvail float64 `json:"diskAvail"`
DiskTotal float64 `json:"diskTotal"`
GpuAvail int64 `json:"gpuAvail"`
CardsAvail []*Card `json:"cardsAvail"`
CpuCoreHours float64 `json:"cpuCoreHours"`
Balance float64 `json:"balance"`
}
type Card struct {
Platform string `json:"platform"`
Type string `json:"type"`
Name string `json:"name"`
TOpsAtFp16 float64 `json:"TOpsAtFp16"`
CardHours float64 `json:"cardHours"`
CardNum int32 `json:"cardNum"`
}
type CreateAlertRuleReq struct { type CreateAlertRuleReq struct {
CLusterId int64 `json:"clusterId"` CLusterId int64 `json:"clusterId"`
ClusterName string `json:"clusterName"` ClusterName string `json:"clusterName"`