diff --git a/api/desc/pcm.api b/api/desc/pcm.api index 08959e26..561ceb67 100644 --- a/api/desc/pcm.api +++ b/api/desc/pcm.api @@ -906,13 +906,13 @@ service pcm { get /schedule/ai/getTaskTypes returns (AiTaskTypesResp) @handler ScheduleGetDatasetsHandler - get /schedule/ai/getDatasets returns (AiDatasetsResp) + get /schedule/ai/getDatasets/:adapterId (AiDatasetsReq) returns (AiDatasetsResp) @handler ScheduleGetStrategyHandler get /schedule/ai/getStrategies returns (AiStrategyResp) @handler ScheduleGetAlgorithmsHandler - get /schedule/ai/getAlgorithms/:resourceType/:taskType/:dataset (AiAlgorithmsReq) returns (AiAlgorithmsResp) + get /schedule/ai/getAlgorithms/:adapterId/:resourceType/:taskType/:dataset (AiAlgorithmsReq) returns (AiAlgorithmsResp) @handler ScheduleSubmitHandler post /schedule/submit (ScheduleReq) returns (ScheduleResp) diff --git a/api/desc/schedule/pcm-schedule.api b/api/desc/schedule/pcm-schedule.api index d3537c1c..3eccf3e5 100644 --- a/api/desc/schedule/pcm-schedule.api +++ b/api/desc/schedule/pcm-schedule.api @@ -26,7 +26,8 @@ type ( AiOption { TaskName string `json:"taskName"` - AiClusterId string `json:"aiClusterId,optional"` + AdapterId string `json:"adapterId"` + AiClusterIds []string `json:"aiClusterIds"` ResourceType string `json:"resourceType"` Tops float64 `json:"Tops,optional"` TaskType string `json:"taskType"` @@ -47,6 +48,10 @@ type ( TaskTypes []string `json:"taskTypes"` } + AiDatasetsReq { + AdapterId string `path:"adapterId"` + } + AiDatasetsResp { Datasets []string `json:"datasets"` } @@ -56,6 +61,7 @@ type ( } AiAlgorithmsReq { + AdapterId string `path:"adapterId"` ResourceType string `path:"resourceType"` TaskType string `path:"taskType"` Dataset string `path:"dataset"` diff --git a/api/internal/handler/routes.go b/api/internal/handler/routes.go index 7669d1bd..0e330b0a 100644 --- a/api/internal/handler/routes.go +++ b/api/internal/handler/routes.go @@ -1122,7 +1122,7 @@ func RegisterHandlers(server *rest.Server, serverCtx *svc.ServiceContext) { }, { Method: http.MethodGet, - Path: "/schedule/ai/getDatasets", + Path: "/schedule/ai/getDatasets/:adapterId", Handler: schedule.ScheduleGetDatasetsHandler(serverCtx), }, { @@ -1132,7 +1132,7 @@ func RegisterHandlers(server *rest.Server, serverCtx *svc.ServiceContext) { }, { Method: http.MethodGet, - Path: "/schedule/ai/getAlgorithms/:resourceType/:taskType/:dataset", + Path: "/schedule/ai/getAlgorithms/:adapterId/:resourceType/:taskType/:dataset", Handler: schedule.ScheduleGetAlgorithmsHandler(serverCtx), }, { diff --git a/api/internal/handler/schedule/schedulegetdatasetshandler.go b/api/internal/handler/schedule/schedulegetdatasetshandler.go index 5dc32bb7..6dae2970 100644 --- a/api/internal/handler/schedule/schedulegetdatasetshandler.go +++ b/api/internal/handler/schedule/schedulegetdatasetshandler.go @@ -1,16 +1,24 @@ package schedule import ( + "github.com/zeromicro/go-zero/rest/httpx" "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/logic/schedule" "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc" + "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types" "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/repository/result" "net/http" ) func ScheduleGetDatasetsHandler(svcCtx *svc.ServiceContext) http.HandlerFunc { return func(w http.ResponseWriter, r *http.Request) { + var req types.AiDatasetsReq + if err := httpx.Parse(r, &req); err != nil { + result.ParamErrorResult(r, w, err) + return + } + l := schedule.NewScheduleGetDatasetsLogic(r.Context(), svcCtx) - resp, err := l.ScheduleGetDatasets() + resp, err := l.ScheduleGetDatasets(&req) result.HttpResult(r, w, resp, err) } } diff --git a/api/internal/logic/schedule/schedulegetalgorithmslogic.go b/api/internal/logic/schedule/schedulegetalgorithmslogic.go index 2c78efd3..009c44e0 100644 --- a/api/internal/logic/schedule/schedulegetalgorithmslogic.go +++ b/api/internal/logic/schedule/schedulegetalgorithmslogic.go @@ -26,7 +26,7 @@ func NewScheduleGetAlgorithmsLogic(ctx context.Context, svcCtx *svc.ServiceConte func (l *ScheduleGetAlgorithmsLogic) ScheduleGetAlgorithms(req *types.AiAlgorithmsReq) (resp *types.AiAlgorithmsResp, err error) { resp = &types.AiAlgorithmsResp{} - algorithms, err := storeLink.GetAlgorithms(l.ctx, l.svcCtx.Scheduler.ResourceCollector, req.ResourceType, req.TaskType, req.Dataset) + algorithms, err := storeLink.GetAlgorithms(l.ctx, l.svcCtx.Scheduler.AiService.AiCollectorAdapterMap[req.AdapterId], req.ResourceType, req.TaskType, req.Dataset) if err != nil { return nil, err } diff --git a/api/internal/logic/schedule/schedulegetdatasetslogic.go b/api/internal/logic/schedule/schedulegetdatasetslogic.go index f7aeab14..196f9a1a 100644 --- a/api/internal/logic/schedule/schedulegetdatasetslogic.go +++ b/api/internal/logic/schedule/schedulegetdatasetslogic.go @@ -3,6 +3,7 @@ package schedule import ( "context" "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/storeLink" + "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc" "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types" @@ -23,9 +24,9 @@ func NewScheduleGetDatasetsLogic(ctx context.Context, svcCtx *svc.ServiceContext } } -func (l *ScheduleGetDatasetsLogic) ScheduleGetDatasets() (resp *types.AiDatasetsResp, err error) { +func (l *ScheduleGetDatasetsLogic) ScheduleGetDatasets(req *types.AiDatasetsReq) (resp *types.AiDatasetsResp, err error) { resp = &types.AiDatasetsResp{} - names, err := storeLink.GetDatasetsNames(l.ctx, l.svcCtx.Scheduler.ResourceCollector) + names, err := storeLink.GetDatasetsNames(l.ctx, l.svcCtx.Scheduler.AiService.AiCollectorAdapterMap[req.AdapterId]) if err != nil { return nil, err } diff --git a/api/internal/logic/schedule/schedulesubmitlogic.go b/api/internal/logic/schedule/schedulesubmitlogic.go index 6d070d4d..2b9956d1 100644 --- a/api/internal/logic/schedule/schedulesubmitlogic.go +++ b/api/internal/logic/schedule/schedulesubmitlogic.go @@ -27,6 +27,7 @@ func NewScheduleSubmitLogic(ctx context.Context, svcCtx *svc.ServiceContext) *Sc func (l *ScheduleSubmitLogic) ScheduleSubmit(req *types.ScheduleReq) (resp *types.ScheduleResp, err error) { resp = &types.ScheduleResp{} opt := &option.AiOption{ + AdapterId: req.AiOption.AdapterId, ResourceType: req.AiOption.ResourceType, Tops: req.AiOption.Tops, TaskType: req.AiOption.TaskType, diff --git a/api/internal/scheduler/database/aiStorage.go b/api/internal/scheduler/database/aiStorage.go index c0b706f2..2cf648aa 100644 --- a/api/internal/scheduler/database/aiStorage.go +++ b/api/internal/scheduler/database/aiStorage.go @@ -33,6 +33,21 @@ func (s *AiStorage) GetClustersByAdapterId(id string) (*types.ClusterListResp, e return &resp, nil } +func (s *AiStorage) GetAdapterIdsByType(adapterType string) ([]string, error) { + var list []types.AdapterInfo + var ids []string + db := s.DbEngin.Model(&types.AdapterInfo{}).Table("t_adapter") + db = db.Where("type = ?", adapterType) + err := db.Order("create_time desc").Find(&list).Error + if err != nil { + return nil, err + } + for _, info := range list { + ids = append(ids, info.Id) + } + return ids, nil +} + func (s *AiStorage) SaveTask(name string) error { // 构建主任务结构体 taskModel := models.Task{ diff --git a/api/internal/scheduler/scheduler.go b/api/internal/scheduler/scheduler.go index 75aa115f..d214e76a 100644 --- a/api/internal/scheduler/scheduler.go +++ b/api/internal/scheduler/scheduler.go @@ -20,8 +20,7 @@ import ( "github.com/zeromicro/go-zero/core/logx" "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/common" "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/database" - "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/service/collector" - "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/service/executor" + "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/service" "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/strategy" "gitlink.org.cn/JointCloud/pcm-coordinator/api/pkg/response" "gitlink.org.cn/JointCloud/pcm-coordinator/rpc/client/participantservice" @@ -32,16 +31,15 @@ import ( ) type Scheduler struct { - task *response.TaskInfo - participantIds []int64 - subSchedule SubSchedule - dbEngin *gorm.DB - result []string //pID:子任务yamlstring 键值对 - participantRpc participantservice.ParticipantService - ResourceCollector *map[string]collector.AiCollector - AiStorages *database.AiStorage - AiExecutor *map[string]executor.AiExecutor - mu sync.RWMutex + task *response.TaskInfo + participantIds []int64 + subSchedule SubSchedule + dbEngin *gorm.DB + result []string //pID:子任务yamlstring 键值对 + participantRpc participantservice.ParticipantService + AiStorages *database.AiStorage + AiService *service.AiService + mu sync.RWMutex } type SubSchedule interface { @@ -59,8 +57,8 @@ func NewScheduler(subSchedule SubSchedule, val string, dbEngin *gorm.DB, partici return &Scheduler{task: task, subSchedule: subSchedule, dbEngin: dbEngin, participantRpc: participantRpc}, nil } -func NewSchdlr(resourceCollector *map[string]collector.AiCollector, storages *database.AiStorage, aiExecutor *map[string]executor.AiExecutor) *Scheduler { - return &Scheduler{ResourceCollector: resourceCollector, AiStorages: storages, AiExecutor: aiExecutor} +func NewSchdlr(aiService *service.AiService, storages *database.AiStorage) *Scheduler { + return &Scheduler{AiService: aiService, AiStorages: storages} } func (s *Scheduler) SpecifyClusters() { diff --git a/api/internal/scheduler/schedulers/aiScheduler.go b/api/internal/scheduler/schedulers/aiScheduler.go index 7026f7e5..a3e3e366 100644 --- a/api/internal/scheduler/schedulers/aiScheduler.go +++ b/api/internal/scheduler/schedulers/aiScheduler.go @@ -64,9 +64,8 @@ func (as *AiScheduler) GetNewStructForDb(task *response.TaskInfo, resource strin } func (as *AiScheduler) PickOptimalStrategy() (strategy.Strategy, error) { - if as.option.AiClusterId != "" { - // TODO database operation Find - return &strategy.SingleAssignment{Cluster: &strategy.AssignedCluster{ClusterId: "", Replicas: 1}}, nil + if len(as.option.ClusterIds) == 1 { + return &strategy.SingleAssignment{Cluster: &strategy.AssignedCluster{ClusterId: as.option.ClusterIds[0], Replicas: 1}}, nil } resources, err := as.findClustersWithResources() @@ -131,7 +130,7 @@ func (as *AiScheduler) AssignTask(clusters []*strategy.AssignedCluster) (interfa var ch = make(chan *AiResult, len(clusters)) var errCh = make(chan interface{}, len(clusters)) - executorMap := *as.AiExecutor + executorMap := as.AiService.AiExecutorAdapterMap[as.option.AdapterId] for _, cluster := range clusters { c := cluster wg.Add(1) @@ -202,13 +201,14 @@ func (as *AiScheduler) AssignTask(clusters []*strategy.AssignedCluster) (interfa func (as *AiScheduler) findClustersWithResources() ([]*collector.ResourceStats, error) { var wg sync.WaitGroup - var ch = make(chan *collector.ResourceStats, len(*as.ResourceCollector)) - var errCh = make(chan interface{}, len(*as.ResourceCollector)) + var clustersNum = len(as.AiService.AiCollectorAdapterMap[as.option.AdapterId]) + var ch = make(chan *collector.ResourceStats, clustersNum) + var errCh = make(chan interface{}, clustersNum) var resourceSpecs []*collector.ResourceStats var errs []interface{} - for s, resourceCollector := range *as.ResourceCollector { + for s, resourceCollector := range as.AiService.AiCollectorAdapterMap[as.option.AdapterId] { wg.Add(1) rc := resourceCollector id := s @@ -242,7 +242,7 @@ func (as *AiScheduler) findClustersWithResources() ([]*collector.ResourceStats, errs = append(errs, e) } - if len(errs) == len(*as.ResourceCollector) { + if len(errs) == clustersNum { return nil, errors.New("get resources failed") } diff --git a/api/internal/scheduler/schedulers/option/aiOption.go b/api/internal/scheduler/schedulers/option/aiOption.go index 735a8610..f8a6495f 100644 --- a/api/internal/scheduler/schedulers/option/aiOption.go +++ b/api/internal/scheduler/schedulers/option/aiOption.go @@ -1,7 +1,8 @@ package option type AiOption struct { - AiClusterId string // shuguangAi /octopus ClusterId + AdapterId string + ClusterIds []string TaskName string ResourceType string // cpu/gpu/compute card CpuCoreNum int64 diff --git a/api/internal/scheduler/service/aiService.go b/api/internal/scheduler/service/aiService.go index 9fa480c7..93188f9d 100644 --- a/api/internal/scheduler/service/aiService.go +++ b/api/internal/scheduler/service/aiService.go @@ -1,11 +1,14 @@ package service import ( + "github.com/zeromicro/go-zero/zrpc" "gitlink.org.cn/JointCloud/pcm-ac/hpcacclient" + "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/config" "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/database" "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/service/collector" "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/service/executor" "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/storeLink" + "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types" "gitlink.org.cn/JointCloud/pcm-octopus/octopusclient" "gitlink.org.cn/jcce-pcm/pcm-participant-modelarts/client/imagesservice" "gitlink.org.cn/jcce-pcm/pcm-participant-modelarts/client/modelartsservice" @@ -18,30 +21,60 @@ const ( SHUGUANGAI = "shuguangAi" ) -func InitAiClusterMap(octopusRpc octopusclient.Octopus, modelArtsRpc modelartsservice.ModelArtsService, modelArtsImgRpc imagesservice.ImagesService, aCRpc hpcacclient.HpcAC, storages *database.AiStorage) (*map[string]executor.AiExecutor, *map[string]collector.AiCollector) { - clusters, _ := storages.GetClustersByAdapterId("1777144940459986944") +type AiService struct { + AiExecutorAdapterMap map[string]map[string]executor.AiExecutor + AiCollectorAdapterMap map[string]map[string]collector.AiCollector +} +func NewAiService(conf *config.Config, storages *database.AiStorage) (*AiService, error) { + var aiType = "1" + adapterIds, err := storages.GetAdapterIdsByType(aiType) + if err != nil { + return nil, err + } + aiService := &AiService{ + AiExecutorAdapterMap: make(map[string]map[string]executor.AiExecutor), + AiCollectorAdapterMap: make(map[string]map[string]collector.AiCollector), + } + for _, id := range adapterIds { + clusters, err := storages.GetClustersByAdapterId(id) + if err != nil { + return nil, err + } + exeClusterMap, colClusterMap := InitAiClusterMap(conf, clusters.List) + aiService.AiExecutorAdapterMap[id] = exeClusterMap + aiService.AiCollectorAdapterMap[id] = colClusterMap + } + + return aiService, nil +} + +func InitAiClusterMap(conf *config.Config, clusters []types.ClusterInfo) (map[string]executor.AiExecutor, map[string]collector.AiCollector) { executorMap := make(map[string]executor.AiExecutor) collectorMap := make(map[string]collector.AiCollector) - for _, c := range clusters.List { + for _, c := range clusters { switch c.Name { case OCTOPUS: id, _ := strconv.ParseInt(c.Id, 10, 64) + octopusRpc := octopusclient.NewOctopus(zrpc.MustNewClient(conf.OctopusRpcConf)) octopus := storeLink.NewOctopusLink(octopusRpc, c.Nickname, id) collectorMap[c.Id] = octopus executorMap[c.Id] = octopus case MODELARTS: id, _ := strconv.ParseInt(c.Id, 10, 64) + modelArtsRpc := modelartsservice.NewModelArtsService(zrpc.MustNewClient(conf.ModelArtsRpcConf)) + modelArtsImgRpc := imagesservice.NewImagesService(zrpc.MustNewClient(conf.ModelArtsImgRpcConf)) modelarts := storeLink.NewModelArtsLink(modelArtsRpc, modelArtsImgRpc, c.Nickname, id) collectorMap[c.Id] = modelarts executorMap[c.Id] = modelarts case SHUGUANGAI: id, _ := strconv.ParseInt(c.Id, 10, 64) + aCRpc := hpcacclient.NewHpcAC(zrpc.MustNewClient(conf.ACRpcConf)) sgai := storeLink.NewShuguangAi(aCRpc, c.Nickname, id) collectorMap[c.Id] = sgai executorMap[c.Id] = sgai } } - return &executorMap, &collectorMap + return executorMap, collectorMap } diff --git a/api/internal/storeLink/storeLink.go b/api/internal/storeLink/storeLink.go index 2cda06f6..55e51b35 100644 --- a/api/internal/storeLink/storeLink.go +++ b/api/internal/storeLink/storeLink.go @@ -128,13 +128,13 @@ func GetResourceTypes() []string { return resourceTypes } -func GetDatasetsNames(ctx context.Context, collectorMap *map[string]collector.AiCollector) ([]string, error) { +func GetDatasetsNames(ctx context.Context, collectorMap map[string]collector.AiCollector) ([]string, error) { var wg sync.WaitGroup - var errCh = make(chan interface{}, len(*collectorMap)) + var errCh = make(chan interface{}, len(collectorMap)) var errs []interface{} var names []string var mu sync.Mutex - colMap := *collectorMap + colMap := collectorMap for s, col := range colMap { wg.Add(1) c := col @@ -200,14 +200,14 @@ func GetDatasetsNames(ctx context.Context, collectorMap *map[string]collector.Ai return names, nil } -func GetAlgorithms(ctx context.Context, collectorMap *map[string]collector.AiCollector, resourceType string, taskType string, dataset string) ([]string, error) { +func GetAlgorithms(ctx context.Context, collectorMap map[string]collector.AiCollector, resourceType string, taskType string, dataset string) ([]string, error) { var names []string var wg sync.WaitGroup - var errCh = make(chan interface{}, len(*collectorMap)) + var errCh = make(chan interface{}, len(collectorMap)) var errs []interface{} var mu sync.Mutex - colMap := *collectorMap + colMap := collectorMap for s, col := range colMap { wg.Add(1) c := col diff --git a/api/internal/svc/servicecontext.go b/api/internal/svc/servicecontext.go index ee6fc50f..c4291a22 100644 --- a/api/internal/svc/servicecontext.go +++ b/api/internal/svc/servicecontext.go @@ -116,24 +116,28 @@ func NewServiceContext(c config.Config) *ServiceContext { }) // scheduler - octopusRpc := octopusclient.NewOctopus(zrpc.MustNewClient(c.OctopusRpcConf)) - aCRpc := hpcacclient.NewHpcAC(zrpc.MustNewClient(c.ACRpcConf)) - modelArtsRpc := modelartsservice.NewModelArtsService(zrpc.MustNewClient(c.ModelArtsRpcConf)) - modelArtsImgRpc := imagesservice.NewImagesService(zrpc.MustNewClient(c.ModelArtsImgRpcConf)) + //octopusRpc := octopusclient.NewOctopus(zrpc.MustNewClient(c.OctopusRpcConf)) + //aCRpc := hpcacclient.NewHpcAC(zrpc.MustNewClient(c.ACRpcConf)) + //modelArtsRpc := modelartsservice.NewModelArtsService(zrpc.MustNewClient(c.ModelArtsRpcConf)) + //modelArtsImgRpc := imagesservice.NewImagesService(zrpc.MustNewClient(c.ModelArtsImgRpcConf)) storage := &database.AiStorage{DbEngin: dbEngin} - aiExecutor, resourceCollector := service.InitAiClusterMap(octopusRpc, modelArtsRpc, modelArtsImgRpc, aCRpc, storage) - scheduler := scheduler.NewSchdlr(resourceCollector, storage, aiExecutor) + aiService, err := service.NewAiService(&c, storage) + if err != nil { + logx.Error(err.Error()) + return nil + } + scheduler := scheduler.NewSchdlr(aiService, storage) return &ServiceContext{ Cron: cron.New(cron.WithSeconds()), DbEngin: dbEngin, Config: c, RedisClient: redisClient, - ModelArtsRpc: modelArtsRpc, - ModelArtsImgRpc: modelArtsImgRpc, + ModelArtsRpc: modelartsservice.NewModelArtsService(zrpc.MustNewClient(c.ModelArtsRpcConf)), + ModelArtsImgRpc: imagesservice.NewImagesService(zrpc.MustNewClient(c.ModelArtsImgRpcConf)), CephRpc: cephclient.NewCeph(zrpc.MustNewClient(c.CephRpcConf)), - ACRpc: aCRpc, - OctopusRpc: octopusRpc, + ACRpc: hpcacclient.NewHpcAC(zrpc.MustNewClient(c.ACRpcConf)), + OctopusRpc: octopusclient.NewOctopus(zrpc.MustNewClient(c.OctopusRpcConf)), OpenstackRpc: openstackclient.NewOpenstack(zrpc.MustNewClient(c.OpenstackRpcConf)), K8sRpc: kubernetesclient.NewKubernetes(zrpc.MustNewClient(c.K8sNativeConf)), MonitorClient: make(map[int64]tracker.Prometheus), diff --git a/api/internal/types/types.go b/api/internal/types/types.go index be9a3177..55ad50af 100644 --- a/api/internal/types/types.go +++ b/api/internal/types/types.go @@ -5278,7 +5278,8 @@ type ScheduleResult struct { type AiOption struct { TaskName string `json:"taskName"` - AiClusterId string `json:"aiClusterId,optional"` + AdapterId string `json:"adapterId"` + AiClusterIds []string `json:"aiClusterIds"` ResourceType string `json:"resourceType"` Tops float64 `json:"Tops,optional"` TaskType string `json:"taskType"` @@ -5299,6 +5300,10 @@ type AiTaskTypesResp struct { TaskTypes []string `json:"taskTypes"` } +type AiDatasetsReq struct { + AdapterId string `path:"adapterId"` +} + type AiDatasetsResp struct { Datasets []string `json:"datasets"` } @@ -5308,6 +5313,7 @@ type AiStrategyResp struct { } type AiAlgorithmsReq struct { + AdapterId string `path:"adapterId"` ResourceType string `path:"resourceType"` TaskType string `path:"taskType"` Dataset string `path:"dataset"` @@ -5317,6 +5323,156 @@ type AiAlgorithmsResp struct { Algorithms []string `json:"algorithms"` } +type PullTaskInfoReq struct { + AdapterId int64 `form:"adapterId"` +} + +type PullTaskInfoResp struct { + HpcInfoList []*HpcInfo `json:"HpcInfoList,omitempty"` + CloudInfoList []*CloudInfo `json:"CloudInfoList,omitempty"` + AiInfoList []*AiInfo `json:"AiInfoList,omitempty"` + VmInfoList []*VmInfo `json:"VmInfoList,omitempty"` +} + +type HpcInfo struct { + Id int64 `json:"id"` // id + TaskId int64 `json:"task_id"` // 任务id + JobId string `json:"job_id"` // 作业id(在第三方系统中的作业id) + AdapterId int64 `json:"adapter_id"` // 执行任务的适配器id + ClusterId int64 `json:"cluster_id"` // 执行任务的集群id + ClusterType string `json:"cluster_type"` // 执行任务的集群类型 + Name string `json:"name"` // 名称 + Status string `json:"status"` // 状态 + CmdScript string `json:"cmd_script"` + StartTime string `json:"start_time"` // 开始时间 + RunningTime int64 `json:"running_time"` // 运行时间 + DerivedEs string `json:"derived_es"` + Cluster string `json:"cluster"` + BlockId int64 `json:"block_id"` + AllocNodes int64 `json:"alloc_nodes"` + AllocCpu int64 `json:"alloc_cpu"` + CardCount int64 `json:"card_count"` // 卡数 + Version string `json:"version"` + Account string `json:"account"` + WorkDir string `json:"work_dir"` // 工作路径 + AssocId int64 `json:"assoc_id"` + ExitCode int64 `json:"exit_code"` + WallTime string `json:"wall_time"` // 最大运行时间 + Result string `json:"result"` // 运行结果 + DeletedAt string `json:"deleted_at"` // 删除时间 + YamlString string `json:"yaml_string"` + AppType string `json:"app_type"` // 应用类型 + AppName string `json:"app_name"` // 应用名称 + Queue string `json:"queue"` // 队列名称 + SubmitType string `json:"submit_type"` // cmd(命令行模式) + NNode string `json:"n_node"` // 节点个数(当指定该参数时,GAP_NODE_STRING必须为"") + StdOutFile string `json:"std_out_file"` // 工作路径/std.err.%j + StdErrFile string `json:"std_err_file"` // 工作路径/std.err.%j + StdInput string `json:"std_input"` + Environment string `json:"environment"` + DeletedFlag int64 `json:"deleted_flag"` // 是否删除(0-否,1-是) + CreatedBy int64 `json:"created_by"` // 创建人 + CreatedTime string `json:"created_time"` // 创建时间 + UpdatedBy int64 `json:"updated_by"` // 更新人 + UpdatedTime string `json:"updated_time"` // 更新时间 +} + +type CloudInfo struct { + Participant int64 `json:"participant,omitempty"` + Id int64 `json:"id,omitempty"` + TaskId int64 `json:"taskId,omitempty"` + ApiVersion string `json:"apiVersion,omitempty"` + Kind string `json:"kind,omitempty"` + Namespace string `json:"namespace,omitempty"` + Name string `json:"name,omitempty"` + Status string `json:"status,omitempty"` + StartTime string `json:"startTime,omitempty"` + RunningTime int64 `json:"runningTime,omitempty"` + Result string `json:"result,omitempty"` + YamlString string `json:"yamlString,omitempty"` +} + +type AiInfo struct { + ParticipantId int64 `json:"participantId,omitempty"` + TaskId int64 `json:"taskId,omitempty"` + ProjectId string `json:"project_id,omitempty"` + Name string `json:"name,omitempty"` + Status string `json:"status,omitempty"` + StartTime string `json:"startTime,omitempty"` + RunningTime int64 `json:"runningTime,omitempty"` + Result string `json:"result,omitempty"` + JobId string `json:"jobId,omitempty"` + CreateTime string `json:"createTime,omitempty"` + ImageUrl string `json:"imageUrl,omitempty"` + Command string `json:"command,omitempty"` + FlavorId string `json:"flavorId,omitempty"` + SubscriptionId string `json:"subscriptionId,omitempty"` + ItemVersionId string `json:"itemVersionId,omitempty"` +} + +type VmInfo struct { + ParticipantId int64 `json:"participantId,omitempty"` + TaskId int64 `json:"taskId,omitempty"` + Name string `json:"name,omitempty"` + FlavorRef string `json:"flavor_ref,omitempty"` + ImageRef string `json:"image_ref,omitempty"` + NetworkUuid string `json:"network_uuid,omitempty"` + BlockUuid string `json:"block_uuid,omitempty"` + SourceType string `json:"source_type,omitempty"` + DeleteOnTermination bool `json:"delete_on_termination,omitempty"` + Status string `json:"status,omitempty"` + MinCount string `json:"min_count,omitempty"` + Platform string `json:"platform,omitempty"` + Uuid string `json:"uuid,omitempty"` +} + +type PushTaskInfoReq struct { + AdapterId int64 `json:"adapterId"` + HpcInfoList []*HpcInfo `json:"hpcInfoList"` + CloudInfoList []*CloudInfo `json:"cloudInfoList"` + AiInfoList []*AiInfo `json:"aiInfoList"` + VmInfoList []*VmInfo `json:"vmInfoList"` +} + +type PushTaskInfoResp struct { + Code int64 `json:"code"` + Msg string `json:"msg"` +} + +type PushResourceInfoReq struct { + AdapterId int64 `json:"adapterId"` + ResourceStats []ResourceStats `json:"resourceStats"` +} + +type PushResourceInfoResp struct { + Code int64 `json:"code"` + Msg string `json:"msg"` +} + +type ResourceStats struct { + ClusterId int64 `json:"clusterId"` + Name string `json:"name"` + CpuCoreAvail int64 `json:"cpuCoreAvail"` + CpuCoreTotal int64 `json:"cpuCoreTotal"` + MemAvail float64 `json:"memAvail"` + MemTotal float64 `json:"memTotal"` + DiskAvail float64 `json:"diskAvail"` + DiskTotal float64 `json:"diskTotal"` + GpuAvail int64 `json:"gpuAvail"` + CardsAvail []*Card `json:"cardsAvail"` + CpuCoreHours float64 `json:"cpuCoreHours"` + Balance float64 `json:"balance"` +} + +type Card struct { + Platform string `json:"platform"` + Type string `json:"type"` + Name string `json:"name"` + TOpsAtFp16 float64 `json:"TOpsAtFp16"` + CardHours float64 `json:"cardHours"` + CardNum int32 `json:"cardNum"` +} + type CreateAlertRuleReq struct { CLusterId int64 `json:"clusterId"` ClusterName string `json:"clusterName"`