Merge pull request 'updated scheduler submit function' (#130) from tzwang/pcm-coordinator:master into master

Former-commit-id: 340a25e6729207ab132ba1e13a5e6c795f354220
This commit is contained in:
tzwang 2024-04-29 17:56:11 +08:00
commit 16a23a8d77
29 changed files with 667 additions and 47 deletions

View File

@ -1697,6 +1697,44 @@ PayloadCreateTrainJob{
jobId string `json:"jobId,optional"`
}
********************/
/******************Ai Center overview*************************/
CenterOverviewResp {
CenterNum int32 `json:"totalCenters,optional"`
TaskNum int32 `json:"totalTasks,optional"`
CardNum int32 `json:"totalCards,optional"`
PowerInTops float64 `json:"totalPower,optional"`
}
CenterQueueingResp {
Current []*CenterQueue `json:"current,optional"`
History []*CenterQueue `json:"history,optional"`
}
CenterQueue {
Name string `json:"name,optional"`
QueueingNum int32 `json:"num,optional"`
}
CenterListResp {
List []*AiCenter `json:"centerList,optional"`
}
AiCenter {
Name string `json:"name,optional"`
StackName string `json:"stack,optional"`
Version string `json:"version,optional"`
}
CenterTaskListResp {
List []*AiTask `json:"taskList,optional"`
}
AiTask {
Name string `json:"name,optional"`
status string `json:"status,optional"`
TimeElapsed int32 `json:"elapsed,optional"`
}
)
/******************create TrainIngJob end*************************/

View File

@ -227,6 +227,22 @@ service pcm {
group: ai
)
service pcm {
@doc "智算中心概览"
@handler getCenterOverviewHandler
get /ai/getCenterOverview returns (CenterOverviewResp)
@doc "智算中心排队状况"
@handler getCenterQueueingHandler
get /ai/getCenterQueueing returns (CenterQueueingResp)
@doc "智算中心列表"
@handler getCenterListHandler
get /ai/getCenterList returns (CenterListResp)
@doc "智算中心任务列表"
@handler getCenterTaskListHandler
get /ai/getCenterTaskList returns (CenterTaskListResp)
@doc "查询数据集列表"
@handler listDataSetHandler
get /ai/listDataSet/:projectId (DataSetReq) returns (DataSetResp)
@ -935,6 +951,9 @@ service pcm {
@handler ScheduleSubmitHandler
post /schedule/submit (ScheduleReq) returns (ScheduleResp)
@handler ScheduleGetOverviewHandler
post /schedule/getOverview returns (ScheduleOverviewResp)
}
@server(

View File

@ -24,6 +24,9 @@ type (
Msg string `json:"msg"`
}
ScheduleOverviewResp {
}
AiOption {
TaskName string `json:"taskName"`
AdapterId string `json:"adapterId"`
@ -81,4 +84,20 @@ type (
AiJobLogResp {
Log string `json:"log"`
}
AiTaskDb {
Id string `json:"id,omitempty" db:"id"`
TaskId string `json:"taskId,omitempty" db:"task_id"`
AdapterId string `json:"adapterId,omitempty" db:"adapter_id"`
ClusterId string `json:"clusterId,omitempty" db:"cluster_id"`
Name string `json:"name,omitempty" db:"name"`
Replica string `json:"replica,omitempty" db:"replica"`
ClusterTaskId string `json:"clusterTaskId,omitempty" db:"c_task_id"`
Strategy string `json:"strategy,omitempty" db:"strategy"`
Status string `json:"status,omitempty" db:"status"`
Msg string `json:"msg,omitempty" db:"msg"`
CommitTime string `json:"commitTime,omitempty" db:"commit_time"`
StartTime string `json:"startTime,omitempty" db:"start_time"`
EndTime string `json:"endTime,omitempty" db:"end_time"`
}
)

View File

@ -0,0 +1,21 @@
package ai
import (
"net/http"
"github.com/zeromicro/go-zero/rest/httpx"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/logic/ai"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
)
func GetCenterListHandler(svcCtx *svc.ServiceContext) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
l := ai.NewGetCenterListLogic(r.Context(), svcCtx)
resp, err := l.GetCenterList()
if err != nil {
httpx.ErrorCtx(r.Context(), w, err)
} else {
httpx.OkJsonCtx(r.Context(), w, resp)
}
}
}

View File

@ -0,0 +1,21 @@
package ai
import (
"net/http"
"github.com/zeromicro/go-zero/rest/httpx"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/logic/ai"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
)
func GetCenterOverviewHandler(svcCtx *svc.ServiceContext) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
l := ai.NewGetCenterOverviewLogic(r.Context(), svcCtx)
resp, err := l.GetCenterOverview()
if err != nil {
httpx.ErrorCtx(r.Context(), w, err)
} else {
httpx.OkJsonCtx(r.Context(), w, resp)
}
}
}

View File

@ -0,0 +1,21 @@
package ai
import (
"net/http"
"github.com/zeromicro/go-zero/rest/httpx"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/logic/ai"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
)
func GetCenterQueueingHandler(svcCtx *svc.ServiceContext) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
l := ai.NewGetCenterQueueingLogic(r.Context(), svcCtx)
resp, err := l.GetCenterQueueing()
if err != nil {
httpx.ErrorCtx(r.Context(), w, err)
} else {
httpx.OkJsonCtx(r.Context(), w, resp)
}
}
}

View File

@ -0,0 +1,21 @@
package ai
import (
"net/http"
"github.com/zeromicro/go-zero/rest/httpx"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/logic/ai"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
)
func GetCenterTaskListHandler(svcCtx *svc.ServiceContext) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
l := ai.NewGetCenterTaskListLogic(r.Context(), svcCtx)
resp, err := l.GetCenterTaskList()
if err != nil {
httpx.ErrorCtx(r.Context(), w, err)
} else {
httpx.OkJsonCtx(r.Context(), w, resp)
}
}
}

View File

@ -268,6 +268,26 @@ func RegisterHandlers(server *rest.Server, serverCtx *svc.ServiceContext) {
server.AddRoutes(
[]rest.Route{
{
Method: http.MethodGet,
Path: "/ai/getCenterOverview",
Handler: ai.GetCenterOverviewHandler(serverCtx),
},
{
Method: http.MethodGet,
Path: "/ai/getCenterQueueing",
Handler: ai.GetCenterQueueingHandler(serverCtx),
},
{
Method: http.MethodGet,
Path: "/ai/getCenterList",
Handler: ai.GetCenterListHandler(serverCtx),
},
{
Method: http.MethodGet,
Path: "/ai/getCenterTaskList",
Handler: ai.GetCenterTaskListHandler(serverCtx),
},
{
Method: http.MethodGet,
Path: "/ai/listDataSet/:projectId",
@ -1165,6 +1185,11 @@ func RegisterHandlers(server *rest.Server, serverCtx *svc.ServiceContext) {
Path: "/schedule/submit",
Handler: schedule.ScheduleSubmitHandler(serverCtx),
},
{
Method: http.MethodPost,
Path: "/schedule/getOverview",
Handler: schedule.ScheduleGetOverviewHandler(serverCtx),
},
},
rest.WithPrefix("/pcm/v1"),
)

View File

@ -0,0 +1,21 @@
package schedule
import (
"net/http"
"github.com/zeromicro/go-zero/rest/httpx"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/logic/schedule"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
)
func ScheduleGetOverviewHandler(svcCtx *svc.ServiceContext) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
l := schedule.NewScheduleGetOverviewLogic(r.Context(), svcCtx)
resp, err := l.ScheduleGetOverview()
if err != nil {
httpx.ErrorCtx(r.Context(), w, err)
} else {
httpx.OkJsonCtx(r.Context(), w, resp)
}
}
}

View File

@ -0,0 +1,30 @@
package ai
import (
"context"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types"
"github.com/zeromicro/go-zero/core/logx"
)
type GetCenterListLogic struct {
logx.Logger
ctx context.Context
svcCtx *svc.ServiceContext
}
func NewGetCenterListLogic(ctx context.Context, svcCtx *svc.ServiceContext) *GetCenterListLogic {
return &GetCenterListLogic{
Logger: logx.WithContext(ctx),
ctx: ctx,
svcCtx: svcCtx,
}
}
func (l *GetCenterListLogic) GetCenterList() (resp *types.CenterListResp, err error) {
// todo: add your logic here and delete this line
return
}

View File

@ -0,0 +1,30 @@
package ai
import (
"context"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types"
"github.com/zeromicro/go-zero/core/logx"
)
type GetCenterOverviewLogic struct {
logx.Logger
ctx context.Context
svcCtx *svc.ServiceContext
}
func NewGetCenterOverviewLogic(ctx context.Context, svcCtx *svc.ServiceContext) *GetCenterOverviewLogic {
return &GetCenterOverviewLogic{
Logger: logx.WithContext(ctx),
ctx: ctx,
svcCtx: svcCtx,
}
}
func (l *GetCenterOverviewLogic) GetCenterOverview() (resp *types.CenterOverviewResp, err error) {
// todo: add your logic here and delete this line
return
}

View File

@ -0,0 +1,30 @@
package ai
import (
"context"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types"
"github.com/zeromicro/go-zero/core/logx"
)
type GetCenterQueueingLogic struct {
logx.Logger
ctx context.Context
svcCtx *svc.ServiceContext
}
func NewGetCenterQueueingLogic(ctx context.Context, svcCtx *svc.ServiceContext) *GetCenterQueueingLogic {
return &GetCenterQueueingLogic{
Logger: logx.WithContext(ctx),
ctx: ctx,
svcCtx: svcCtx,
}
}
func (l *GetCenterQueueingLogic) GetCenterQueueing() (resp *types.CenterQueueingResp, err error) {
// todo: add your logic here and delete this line
return
}

View File

@ -0,0 +1,30 @@
package ai
import (
"context"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types"
"github.com/zeromicro/go-zero/core/logx"
)
type GetCenterTaskListLogic struct {
logx.Logger
ctx context.Context
svcCtx *svc.ServiceContext
}
func NewGetCenterTaskListLogic(ctx context.Context, svcCtx *svc.ServiceContext) *GetCenterTaskListLogic {
return &GetCenterTaskListLogic{
Logger: logx.WithContext(ctx),
ctx: ctx,
svcCtx: svcCtx,
}
}
func (l *GetCenterTaskListLogic) GetCenterTaskList() (resp *types.CenterTaskListResp, err error) {
// todo: add your logic here and delete this line
return
}

View File

@ -26,7 +26,11 @@ func NewScheduleGetAiJobLogLogLogic(ctx context.Context, svcCtx *svc.ServiceCont
func (l *ScheduleGetAiJobLogLogLogic) ScheduleGetAiJobLogLog(req *types.AiJobLogReq) (resp *types.AiJobLogResp, err error) {
resp = &types.AiJobLogResp{}
log, err := l.svcCtx.Scheduler.AiService.AiCollectorAdapterMap[req.AdapterId][req.ClusterId].GetTrainingTaskLog(l.ctx, req.TaskId, req.InstanceNum)
id, err := l.svcCtx.Scheduler.AiStorages.GetAiTaskIdByClusterIdAndTaskId(req.ClusterId, req.TaskId)
if err != nil {
return nil, err
}
log, err := l.svcCtx.Scheduler.AiService.AiCollectorAdapterMap[req.AdapterId][req.ClusterId].GetTrainingTaskLog(l.ctx, id, req.InstanceNum)
if err != nil {
return nil, err
}

View File

@ -0,0 +1,30 @@
package schedule
import (
"context"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types"
"github.com/zeromicro/go-zero/core/logx"
)
type ScheduleGetOverviewLogic struct {
logx.Logger
ctx context.Context
svcCtx *svc.ServiceContext
}
func NewScheduleGetOverviewLogic(ctx context.Context, svcCtx *svc.ServiceContext) *ScheduleGetOverviewLogic {
return &ScheduleGetOverviewLogic{
Logger: logx.WithContext(ctx),
ctx: ctx,
svcCtx: svcCtx,
}
}
func (l *ScheduleGetOverviewLogic) ScheduleGetOverview() (resp *types.ScheduleOverviewResp, err error) {
// todo: add your logic here and delete this line
return
}

View File

@ -6,6 +6,7 @@ import (
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/schedulers/option"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types"
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/constants"
"github.com/zeromicro/go-zero/core/logx"
)
@ -51,6 +52,10 @@ func (l *ScheduleSubmitLogic) ScheduleSubmit(req *types.ScheduleReq) (resp *type
switch opt.GetOptionType() {
case option.AI:
id, err := l.svcCtx.Scheduler.AiStorages.SaveTask(req.AiOption.TaskName)
if err != nil {
return nil, err
}
rs := (results).([]*schedulers.AiResult)
for _, r := range rs {
scheResult := &types.ScheduleResult{}
@ -59,12 +64,13 @@ func (l *ScheduleSubmitLogic) ScheduleSubmit(req *types.ScheduleReq) (resp *type
scheResult.Strategy = r.Strategy
scheResult.Replica = r.Replica
scheResult.Msg = r.Msg
err := l.svcCtx.Scheduler.AiStorages.SaveAiTask(id, opt, r.ClusterId, r.TaskId, constants.Running, r.Msg)
if err != nil {
return nil, err
}
resp.Results = append(resp.Results, scheResult)
}
err = l.svcCtx.Scheduler.AiStorages.SaveTask(req.AiOption.TaskName)
if err != nil {
return nil, err
}
}
return resp, nil

View File

@ -2,10 +2,12 @@ package database
import (
"github.com/zeromicro/go-zero/core/logx"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/schedulers/option"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types"
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/constants"
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/models"
"gorm.io/gorm"
"strconv"
"time"
)
@ -48,7 +50,17 @@ func (s *AiStorage) GetAdapterIdsByType(adapterType string) ([]string, error) {
return ids, nil
}
func (s *AiStorage) SaveTask(name string) error {
func (s *AiStorage) GetAiTasks() ([]*types.AiTaskDb, error) {
var resp []*types.AiTaskDb
tx := s.DbEngin.Raw("select * from task_ai").Scan(&resp)
if tx.Error != nil {
logx.Errorf(tx.Error.Error())
return nil, tx.Error
}
return resp, nil
}
func (s *AiStorage) SaveTask(name string) (int64, error) {
// 构建主任务结构体
taskModel := models.Task{
Status: constants.Saved,
@ -58,12 +70,52 @@ func (s *AiStorage) SaveTask(name string) error {
}
// 保存任务数据到数据库
tx := s.DbEngin.Create(&taskModel)
if tx.Error != nil {
return 0, tx.Error
}
return taskModel.Id, nil
}
func (s *AiStorage) SaveAiTask(taskId int64, option *option.AiOption, clusterId string, jobId string, status string, msg string) error {
// 构建主任务结构体
aId, err := strconv.ParseInt(option.AdapterId, 10, 64)
if err != nil {
return err
}
cId, err := strconv.ParseInt(clusterId, 10, 64)
if err != nil {
return err
}
aiTaskModel := models.TaskAi{
TaskId: taskId,
AdapterId: aId,
ClusterId: cId,
Name: option.TaskName,
Replica: option.Replica,
JobId: jobId,
Strategy: option.StrategyName,
Status: status,
Msg: msg,
CommitTime: time.Now(),
}
// 保存任务数据到数据库
tx := s.DbEngin.Create(&aiTaskModel)
if tx.Error != nil {
return tx.Error
}
return nil
}
func (s *AiStorage) GetAiTaskIdByClusterIdAndTaskId(clusterId string, taskId string) (string, error) {
var aiTask models.TaskAi
tx := s.DbEngin.Raw("select * from task_ai where `cluster_id` = ? and `task_id` = ?", clusterId, taskId).Scan(&aiTask)
if tx.Error != nil {
logx.Errorf(tx.Error.Error())
return "", tx.Error
}
return aiTask.JobId, nil
}
func (s *AiStorage) UpdateTask() error {
return nil
}

View File

@ -129,42 +129,19 @@ func (s *Scheduler) TempAssign() error {
}
func (s *Scheduler) AssignAndSchedule(ss SubSchedule) (interface{}, error) {
//// 已指定 ParticipantId
//if s.task.ParticipantId != 0 {
// return nil
//}
//// 标签匹配以及后未找到ParticipantIds
//if len(s.participantIds) == 0 {
// return errors.New("未找到匹配的ParticipantIds")
//}
//
//// 指定或者标签匹配的结果只有一个集群,给任务信息指定
//if len(s.participantIds) == 1 {
// s.task.ParticipantId = s.participantIds[0]
// //replicas := s.task.Metadata.(map[string]interface{})["spec"].(map[string]interface{})["replicas"].(float64)
// //result := make(map[int64]string)
// //result[s.participantIds[0]] = strconv.FormatFloat(replicas, 'f', 2, 64)
// //s.result = result
//
// return nil
//}
//choose strategy
strategy, err := ss.PickOptimalStrategy()
if err != nil {
return nil, err
}
//schedule
clusters, err := strategy.Schedule()
if err != nil {
return nil, err
}
//集群数量不满足,指定到标签匹配后第一个集群
//if len(providerList) < 2 {
// s.task.ParticipantId = s.participantIds[0]
// return nil
//}
//assign tasks to clusters
resp, err := ss.AssignTask(clusters)
if err != nil {
return nil, err

View File

@ -26,6 +26,7 @@ import (
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/strategy"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/strategy/param"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/pkg/response"
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/constants"
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/models"
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/utils"
"gitlink.org.cn/JointCloud/pcm-octopus/octopus"
@ -168,32 +169,46 @@ func (as *AiScheduler) AssignTask(clusters []*strategy.AssignedCluster) (interfa
errs = append(errs, e)
}
if len(errs) == len(clusters) {
return nil, errors.New("submit task failed")
for s := range ch {
results = append(results, s)
}
if len(errs) != 0 {
var msg string
taskId, err := as.AiStorages.SaveTask(as.option.TaskName)
if err != nil {
return nil, err
}
var errmsg string
for _, err := range errs {
e := (err).(struct {
err error
clusterId string
})
msg += fmt.Sprintf("clusterId: %v , error: %v \n", e.clusterId, e.err.Error())
msg := fmt.Sprintf("clusterId: %v , error: %v \n", e.clusterId, e.err.Error())
errmsg += msg
err := as.AiStorages.SaveAiTask(taskId, as.option, e.clusterId, "", constants.Failed, msg)
if err != nil {
return nil, err
}
}
for s := range ch {
if s.Msg != "" {
msg += fmt.Sprintf("clusterId: %v , error: %v \n", s.ClusterId, s.Msg)
msg := fmt.Sprintf("clusterId: %v , error: %v \n", s.ClusterId, s.Msg)
errmsg += msg
err := as.AiStorages.SaveAiTask(taskId, as.option, s.ClusterId, "", constants.Failed, msg)
if err != nil {
return nil, err
}
} else {
msg += fmt.Sprintf("clusterId: %v , submitted successfully, taskId: %v \n", s.ClusterId, s.TaskId)
msg := fmt.Sprintf("clusterId: %v , submitted successfully, taskId: %v \n", s.ClusterId, s.TaskId)
errmsg += msg
err := as.AiStorages.SaveAiTask(taskId, as.option, s.ClusterId, s.TaskId, constants.Succeeded, msg)
if err != nil {
return nil, err
}
}
}
return nil, errors.New(msg)
}
for s := range ch {
// TODO: database operation
results = append(results, s)
return nil, errors.New(errmsg)
}
return results, nil

View File

@ -4,6 +4,7 @@ type AiOption struct {
AdapterId string
ClusterIds []string
TaskName string
Replica int64
ResourceType string // cpu/gpu/compute card
CpuCoreNum int64
TaskType string // pytorch/tensorflow/mindspore

View File

@ -7,6 +7,8 @@ type AiCollector interface {
GetDatasetsSpecs(ctx context.Context) ([]*DatasetsSpecs, error)
GetAlgorithms(ctx context.Context) ([]*Algorithm, error)
GetTrainingTaskLog(ctx context.Context, taskId string, instanceNum string) (string, error)
DownloadAlgorithmCode(ctx context.Context, resourceType string, taskType string, dataset string, algorithm string) (string, error)
UploadAlgorithmCode(ctx context.Context, resourceType string, taskType string, dataset string, algorithm string, code string) error
}
type ResourceStats struct {

View File

@ -162,6 +162,14 @@ func (m *ModelArtsLink) GetAlgorithms(ctx context.Context) ([]*collector.Algorit
return nil, nil
}
func (m *ModelArtsLink) DownloadAlgorithmCode(ctx context.Context, resourceType string, taskType string, dataset string, algorithm string) (string, error) {
return "", nil
}
func (m *ModelArtsLink) UploadAlgorithmCode(ctx context.Context, resourceType string, taskType string, dataset string, algorithm string, code string) error {
return nil
}
func (m *ModelArtsLink) GetTrainingTaskLog(ctx context.Context, taskId string, instanceNum string) (string, error) {
return "", nil
}

View File

@ -337,6 +337,14 @@ func (o *OctopusLink) GetAlgorithms(ctx context.Context) ([]*collector.Algorithm
return algorithms, nil
}
func (o *OctopusLink) DownloadAlgorithmCode(ctx context.Context, resourceType string, taskType string, dataset string, algorithm string) (string, error) {
return "", nil
}
func (o *OctopusLink) UploadAlgorithmCode(ctx context.Context, resourceType string, taskType string, dataset string, algorithm string, code string) error {
return nil
}
func (o *OctopusLink) GetTrainingTaskLog(ctx context.Context, taskId string, instanceNum string) (string, error) {
instance, err := strconv.ParseInt(instanceNum, 10, 32)
if err != nil {

View File

@ -447,6 +447,14 @@ func (s *ShuguangAi) GetAlgorithms(ctx context.Context) ([]*collector.Algorithm,
return algorithms, nil
}
func (s *ShuguangAi) DownloadAlgorithmCode(ctx context.Context, resourceType string, taskType string, dataset string, algorithm string) (string, error) {
return "", nil
}
func (s *ShuguangAi) UploadAlgorithmCode(ctx context.Context, resourceType string, taskType string, dataset string, algorithm string, code string) error {
return nil
}
func (s *ShuguangAi) GetTrainingTaskLog(ctx context.Context, taskId string, instanceNum string) (string, error) {
req := &hpcAC.GetInstanceLogReq{
TaskId: taskId,

View File

@ -2722,6 +2722,43 @@ type Nfs struct {
ReadOnly bool `json:"readOnly,optional"`
}
type CenterOverviewResp struct {
CenterNum int32 `json:"totalCenters,optional"`
TaskNum int32 `json:"totalTasks,optional"`
CardNum int32 `json:"totalCards,optional"`
PowerInTops float64 `json:"totalPower,optional"`
}
type CenterQueueingResp struct {
Current []*CenterQueue `json:"current,optional"`
History []*CenterQueue `json:"history,optional"`
}
type CenterQueue struct {
Name string `json:"name,optional"`
QueueingNum int32 `json:"num,optional"`
}
type CenterListResp struct {
List []*AiCenter `json:"centerList,optional"`
}
type AiCenter struct {
Name string `json:"name,optional"`
StackName string `json:"stack,optional"`
Version string `json:"version,optional"`
}
type CenterTaskListResp struct {
List []*AiTask `json:"taskList,optional"`
}
type AiTask struct {
Name string `json:"name,optional"`
Status string `json:"status,optional"`
TimeElapsed int32 `json:"elapsed,optional"`
}
type StorageScreenReq struct {
}
@ -5503,6 +5540,9 @@ type ScheduleResult struct {
Msg string `json:"msg"`
}
type ScheduleOverviewResp struct {
}
type AiOption struct {
TaskName string `json:"taskName"`
AdapterId string `json:"adapterId"`
@ -5561,6 +5601,22 @@ type AiJobLogResp struct {
Log string `json:"log"`
}
type AiTaskDb struct {
Id string `json:"id,omitempty" db:"id"`
TaskId string `json:"taskId,omitempty" db:"task_id"`
AdapterId string `json:"adapterId,omitempty" db:"adapter_id"`
ClusterId string `json:"clusterId,omitempty" db:"cluster_id"`
Name string `json:"name,omitempty" db:"name"`
Replica string `json:"replica,omitempty" db:"replica"`
ClusterTaskId string `json:"clusterTaskId,omitempty" db:"c_task_id"`
Strategy string `json:"strategy,omitempty" db:"strategy"`
Status string `json:"status,omitempty" db:"status"`
Msg string `json:"msg,omitempty" db:"msg"`
CommitTime string `json:"commitTime,omitempty" db:"commit_time"`
StartTime string `json:"startTime,omitempty" db:"start_time"`
EndTime string `json:"endTime,omitempty" db:"end_time"`
}
type CreateAlertRuleReq struct {
CLusterId string `json:"clusterId"`
ClusterName string `json:"clusterName"`

2
go.mod
View File

@ -24,7 +24,7 @@ require (
github.com/robfig/cron/v3 v3.0.1
github.com/rs/zerolog v1.28.0
github.com/zeromicro/go-zero v1.6.3
gitlink.org.cn/JointCloud/pcm-ac v0.0.0-20240420083915-58d6e2958aeb
gitlink.org.cn/JointCloud/pcm-ac v0.0.0-20240426095603-549fefd8bece
gitlink.org.cn/JointCloud/pcm-kubernetes v0.0.0-20240301071143-347480abff2c
gitlink.org.cn/JointCloud/pcm-octopus v0.0.0-20240424085753-6899615e9142
gitlink.org.cn/JointCloud/pcm-openstack v0.0.0-20240403033338-e7edabad4203

4
go.sum
View File

@ -1078,8 +1078,8 @@ github.com/yuin/gopher-lua v1.1.0/go.mod h1:GBR0iDaNXjAgGg9zfCvksxSRnQx76gclCIb7
github.com/zeromicro/go-zero v1.5.1/go.mod h1:bGYm4XWsGN9GhDsO2O2BngpVoWjf3Eog2a5hUOMhlXs=
github.com/zeromicro/go-zero v1.6.3 h1:OL0NnHD5LdRNDolfcK9vUkJt7K8TcBE3RkzfM8poOVw=
github.com/zeromicro/go-zero v1.6.3/go.mod h1:XZL435ZxVi9MSXXtw2MRQhHgx6OoX3++MRMOE9xU70c=
gitlink.org.cn/JointCloud/pcm-ac v0.0.0-20240420083915-58d6e2958aeb h1:k6mNEWKp+haQUaK2dWs/rI9OKgzJHY1/9KNKuBDN0Vw=
gitlink.org.cn/JointCloud/pcm-ac v0.0.0-20240420083915-58d6e2958aeb/go.mod h1:w3Nb5TNymCItQ7K3x4Q0JLuoq9OerwAzAWT2zsPE9Xo=
gitlink.org.cn/JointCloud/pcm-ac v0.0.0-20240426095603-549fefd8bece h1:W3yBnvAVV8dlRNQKYD6Mf8ySRrYsP0tPk7JjvqZzNHQ=
gitlink.org.cn/JointCloud/pcm-ac v0.0.0-20240426095603-549fefd8bece/go.mod h1:w3Nb5TNymCItQ7K3x4Q0JLuoq9OerwAzAWT2zsPE9Xo=
gitlink.org.cn/JointCloud/pcm-kubernetes v0.0.0-20240301071143-347480abff2c h1:2Wl/hvaSFjh6fmCSIQhjkr9llMRREQeqcXNLZ/HPY18=
gitlink.org.cn/JointCloud/pcm-kubernetes v0.0.0-20240301071143-347480abff2c/go.mod h1:lSRfGs+PxFvw7CcndHWRd6UlLlGrZn0b0hp5cfaMNGw=
gitlink.org.cn/JointCloud/pcm-octopus v0.0.0-20240424085753-6899615e9142 h1:+po0nesBDSWsgCySBG7eEXk7i9Ytd58wqvjL1M9y6d8=

24
pkg/models/taskaimodel.go Normal file
View File

@ -0,0 +1,24 @@
package models
import "github.com/zeromicro/go-zero/core/stores/sqlx"
var _ TaskAiModel = (*customTaskAiModel)(nil)
type (
// TaskAiModel is an interface to be customized, add more methods here,
// and implement the added methods in customTaskAiModel.
TaskAiModel interface {
taskAiModel
}
customTaskAiModel struct {
*defaultTaskAiModel
}
)
// NewTaskAiModel returns a model for the database table.
func NewTaskAiModel(conn sqlx.SqlConn) TaskAiModel {
return &customTaskAiModel{
defaultTaskAiModel: newTaskAiModel(conn),
}
}

View File

@ -0,0 +1,103 @@
// Code generated by goctl. DO NOT EDIT.
package models
import (
"context"
"database/sql"
"fmt"
"strings"
"time"
"github.com/zeromicro/go-zero/core/stores/builder"
"github.com/zeromicro/go-zero/core/stores/sqlc"
"github.com/zeromicro/go-zero/core/stores/sqlx"
"github.com/zeromicro/go-zero/core/stringx"
)
var (
taskAiFieldNames = builder.RawFieldNames(&TaskAi{})
taskAiRows = strings.Join(taskAiFieldNames, ",")
taskAiRowsExpectAutoSet = strings.Join(stringx.Remove(taskAiFieldNames, "`id`", "`create_at`", "`create_time`", "`created_at`", "`update_at`", "`update_time`", "`updated_at`"), ",")
taskAiRowsWithPlaceHolder = strings.Join(stringx.Remove(taskAiFieldNames, "`id`", "`create_at`", "`create_time`", "`created_at`", "`update_at`", "`update_time`", "`updated_at`"), "=?,") + "=?"
)
type (
taskAiModel interface {
Insert(ctx context.Context, data *TaskAi) (sql.Result, error)
FindOne(ctx context.Context, id int64) (*TaskAi, error)
Update(ctx context.Context, data *TaskAi) error
Delete(ctx context.Context, id int64) error
}
defaultTaskAiModel struct {
conn sqlx.SqlConn
table string
}
TaskAi struct {
Id int64 `db:"id"` // id
TaskId int64 `db:"task_id"` // 任务id
AdapterId int64 `db:"adapter_id"` // 设配器id
ClusterId int64 `db:"cluster_id"` // 集群id
Name string `db:"name"` // 任务名
Replica int64 `db:"replica"` // 执行数
JobId string `db:"job_id"` // 集群返回任务id
Strategy string `db:"strategy"` // 主任务使用策略
Status string `db:"status"` // 任务状态
Msg string `db:"msg"` // 集群返回任务信息
CommitTime time.Time `db:"commit_time"` // 提交时间
StartTime string `db:"start_time"` // 开始时间
EndTime string `db:"end_time"` // 结束时间
}
)
func newTaskAiModel(conn sqlx.SqlConn) *defaultTaskAiModel {
return &defaultTaskAiModel{
conn: conn,
table: "`task_ai`",
}
}
func (m *defaultTaskAiModel) withSession(session sqlx.Session) *defaultTaskAiModel {
return &defaultTaskAiModel{
conn: sqlx.NewSqlConnFromSession(session),
table: "`task_ai`",
}
}
func (m *defaultTaskAiModel) Delete(ctx context.Context, id int64) error {
query := fmt.Sprintf("delete from %s where `id` = ?", m.table)
_, err := m.conn.ExecCtx(ctx, query, id)
return err
}
func (m *defaultTaskAiModel) FindOne(ctx context.Context, id int64) (*TaskAi, error) {
query := fmt.Sprintf("select %s from %s where `id` = ? limit 1", taskAiRows, m.table)
var resp TaskAi
err := m.conn.QueryRowCtx(ctx, &resp, query, id)
switch err {
case nil:
return &resp, nil
case sqlc.ErrNotFound:
return nil, ErrNotFound
default:
return nil, err
}
}
func (m *defaultTaskAiModel) Insert(ctx context.Context, data *TaskAi) (sql.Result, error) {
query := fmt.Sprintf("insert into %s (%s) values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)", m.table, taskAiRowsExpectAutoSet)
ret, err := m.conn.ExecCtx(ctx, query, data.TaskId, data.AdapterId, data.ClusterId, data.Name, data.Replica, data.JobId, data.Strategy, data.Status, data.Msg, data.CommitTime, data.StartTime, data.EndTime)
return ret, err
}
func (m *defaultTaskAiModel) Update(ctx context.Context, data *TaskAi) error {
query := fmt.Sprintf("update %s set %s where `id` = ?", m.table, taskAiRowsWithPlaceHolder)
_, err := m.conn.ExecCtx(ctx, query, data.TaskId, data.AdapterId, data.ClusterId, data.Name, data.Replica, data.JobId, data.Strategy, data.Status, data.Msg, data.CommitTime, data.StartTime, data.EndTime, data.Id)
return err
}
func (m *defaultTaskAiModel) tableName() string {
return m.table
}