Merge pull request 'fix aiOverview timeout bugs' (#145) from tzwang/pcm-coordinator:master into master

Former-commit-id: 38786fb3fe33c433b06ae25214c691d8d7600d78
This commit is contained in:
tzwang 2024-05-09 16:17:40 +08:00
commit fa2c7b1aa9
10 changed files with 125 additions and 14 deletions

View File

@ -966,6 +966,12 @@ service pcm {
@handler ScheduleGetOverviewHandler @handler ScheduleGetOverviewHandler
post /schedule/getOverview returns (ScheduleOverviewResp) post /schedule/getOverview returns (ScheduleOverviewResp)
@handler DownloadAlgothmCodeHandler
get /schedule/getDownloadAlgothmCode (DownloadAlgorithmCodeReq) returns (DownloadAlgorithmCodeResp)
@handler UploadAlgothmCodeHandler
post /schedule/getDownloadAlgothmCode (UploadAlgorithmCodeReq) returns (UploadAlgorithmCodeResp)
} }
@server( @server(

View File

@ -100,4 +100,33 @@ type (
StartTime string `json:"startTime,omitempty" db:"start_time"` StartTime string `json:"startTime,omitempty" db:"start_time"`
EndTime string `json:"endTime,omitempty" db:"end_time"` EndTime string `json:"endTime,omitempty" db:"end_time"`
} }
DownloadAlgorithmCodeReq {
AdapterId string `form:"adapterId"`
ClusterId string `form:"clusterId"`
ResourceType string `form:"resourceType"`
Card string `form:"card"`
TaskType string `form:"taskType"`
Dataset string `form:"dataset"`
Algorithm string `form:"algorithm"`
Code string `form:"code"`
}
DownloadAlgorithmCodeResp {
Code string `json:"algorithms"`
}
UploadAlgorithmCodeReq {
AdapterId string `json:"adapterId"`
ClusterId string `json:"clusterId"`
ResourceType string `json:"resourceType"`
Card string `json:"card"`
TaskType string `json:"taskType"`
Dataset string `json:"dataset"`
Algorithm string `json:"algorithm"`
Code string `json:"code"`
}
UploadAlgorithmCodeResp {
}
) )

View File

@ -1190,6 +1190,11 @@ func RegisterHandlers(server *rest.Server, serverCtx *svc.ServiceContext) {
Path: "/schedule/ai/getAlgorithms/:adapterId/:resourceType/:taskType/:dataset", Path: "/schedule/ai/getAlgorithms/:adapterId/:resourceType/:taskType/:dataset",
Handler: schedule.ScheduleGetAlgorithmsHandler(serverCtx), Handler: schedule.ScheduleGetAlgorithmsHandler(serverCtx),
}, },
{
Method: http.MethodGet,
Path: "/schedule/ai/getJobLog/:adapterId/:clusterId/:taskId/:instanceNum",
Handler: schedule.ScheduleGetAiJobLogLogHandler(serverCtx),
},
{ {
Method: http.MethodPost, Method: http.MethodPost,
Path: "/schedule/submit", Path: "/schedule/submit",
@ -1200,6 +1205,16 @@ func RegisterHandlers(server *rest.Server, serverCtx *svc.ServiceContext) {
Path: "/schedule/getOverview", Path: "/schedule/getOverview",
Handler: schedule.ScheduleGetOverviewHandler(serverCtx), Handler: schedule.ScheduleGetOverviewHandler(serverCtx),
}, },
{
Method: http.MethodGet,
Path: "/schedule/getDownloadAlgothmCode",
Handler: schedule.DownloadAlgothmCodeHandler(serverCtx),
},
{
Method: http.MethodPost,
Path: "/schedule/getDownloadAlgothmCode",
Handler: schedule.UploadAlgothmCodeHandler(serverCtx),
},
}, },
rest.WithPrefix("/pcm/v1"), rest.WithPrefix("/pcm/v1"),
) )
@ -1294,7 +1309,7 @@ func RegisterHandlers(server *rest.Server, serverCtx *svc.ServiceContext) {
}, },
{ {
Method: http.MethodPost, Method: http.MethodPost,
Path: "/core/syncClusterAlert", Path: "/monitoring/syncClusterAlert",
Handler: monitoring.SyncClusterAlertHandler(serverCtx), Handler: monitoring.SyncClusterAlertHandler(serverCtx),
}, },
{ {

View File

@ -8,6 +8,7 @@ import (
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/models" "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/models"
"strconv" "strconv"
"sync" "sync"
"time"
) )
type GetCenterOverviewLogic struct { type GetCenterOverviewLogic struct {
@ -71,9 +72,14 @@ func (l *GetCenterOverviewLogic) GetCenterOverview() (resp *types.CenterOverview
} }
resp.CardNum = cardNum resp.CardNum = cardNum
resp.PowerInTops = totalTops resp.PowerInTops = totalTops
<-ch
select {
case _ = <-ch:
return resp, nil return resp, nil
case <-time.After(2 * time.Second):
return resp, nil
}
} }
func (l *GetCenterOverviewLogic) updateClusterResource(mu *sync.RWMutex, ch chan<- struct{}, list []*types.AdapterInfo) { func (l *GetCenterOverviewLogic) updateClusterResource(mu *sync.RWMutex, ch chan<- struct{}, list []*types.AdapterInfo) {

View File

@ -73,9 +73,14 @@ func (l *GetCenterTaskListLogic) GetCenterTaskList() (resp *types.CenterTaskList
resp.List = append(resp.List, t) resp.List = append(resp.List, t)
} }
} }
<-ch
select {
case _ = <-ch:
return resp, nil return resp, nil
case <-time.After(1 * time.Second):
return resp, nil
}
} }
func (l *GetCenterTaskListLogic) updateAiTaskStatus(mu *sync.RWMutex, ch chan<- struct{}, list []*types.AdapterInfo) { func (l *GetCenterTaskListLogic) updateAiTaskStatus(mu *sync.RWMutex, ch chan<- struct{}, list []*types.AdapterInfo) {

View File

@ -8,8 +8,8 @@ type AiCollector interface {
GetAlgorithms(ctx context.Context) ([]*Algorithm, error) GetAlgorithms(ctx context.Context) ([]*Algorithm, error)
GetTrainingTaskLog(ctx context.Context, taskId string, instanceNum string) (string, error) GetTrainingTaskLog(ctx context.Context, taskId string, instanceNum string) (string, error)
GetTrainingTask(ctx context.Context, taskId string) (*Task, error) GetTrainingTask(ctx context.Context, taskId string) (*Task, error)
DownloadAlgorithmCode(ctx context.Context, resourceType string, taskType string, dataset string, algorithm string) (string, error) DownloadAlgorithmCode(ctx context.Context, resourceType string, card string, taskType string, dataset string, algorithm string) (string, error)
UploadAlgorithmCode(ctx context.Context, resourceType string, taskType string, dataset string, algorithm string, code string) error UploadAlgorithmCode(ctx context.Context, resourceType string, card string, taskType string, dataset string, algorithm string, code string) error
} }
type ResourceStats struct { type ResourceStats struct {

View File

@ -162,11 +162,11 @@ func (m *ModelArtsLink) GetAlgorithms(ctx context.Context) ([]*collector.Algorit
return nil, nil return nil, nil
} }
func (m *ModelArtsLink) DownloadAlgorithmCode(ctx context.Context, resourceType string, taskType string, dataset string, algorithm string) (string, error) { func (m *ModelArtsLink) DownloadAlgorithmCode(ctx context.Context, resourceType string, card string, taskType string, dataset string, algorithm string) (string, error) {
return "", nil return "", nil
} }
func (m *ModelArtsLink) UploadAlgorithmCode(ctx context.Context, resourceType string, taskType string, dataset string, algorithm string, code string) error { func (m *ModelArtsLink) UploadAlgorithmCode(ctx context.Context, resourceType string, card string, taskType string, dataset string, algorithm string, code string) error {
return nil return nil
} }

View File

@ -339,11 +339,11 @@ func (o *OctopusLink) GetAlgorithms(ctx context.Context) ([]*collector.Algorithm
return algorithms, nil return algorithms, nil
} }
func (o *OctopusLink) DownloadAlgorithmCode(ctx context.Context, resourceType string, taskType string, dataset string, algorithm string) (string, error) { func (o *OctopusLink) DownloadAlgorithmCode(ctx context.Context, resourceType string, card string, taskType string, dataset string, algorithm string) (string, error) {
return "", nil return "", nil
} }
func (o *OctopusLink) UploadAlgorithmCode(ctx context.Context, resourceType string, taskType string, dataset string, algorithm string, code string) error { func (o *OctopusLink) UploadAlgorithmCode(ctx context.Context, resourceType string, card string, taskType string, dataset string, algorithm string, code string) error {
return nil return nil
} }

View File

@ -447,11 +447,32 @@ func (s *ShuguangAi) GetAlgorithms(ctx context.Context) ([]*collector.Algorithm,
return algorithms, nil return algorithms, nil
} }
func (s *ShuguangAi) DownloadAlgorithmCode(ctx context.Context, resourceType string, taskType string, dataset string, algorithm string) (string, error) { func (s *ShuguangAi) DownloadAlgorithmCode(ctx context.Context, resourceType string, card string, taskType string, dataset string, algorithm string) (string, error) {
return "", nil algoName := dataset + DASH + algorithm
req := &hpcAC.GetFileReq{
Path: ALGORITHM_DIR + FORWARD_SLASH + taskType + FORWARD_SLASH + algoName + FORWARD_SLASH + TRAIN_FILE,
}
resp, err := s.aCRpc.GetFile(ctx, req)
if err != nil {
return "", err
}
return resp.Content, nil
} }
func (s *ShuguangAi) UploadAlgorithmCode(ctx context.Context, resourceType string, taskType string, dataset string, algorithm string, code string) error { func (s *ShuguangAi) UploadAlgorithmCode(ctx context.Context, resourceType string, card string, taskType string, dataset string, algorithm string, code string) error {
algoName := dataset + DASH + algorithm
req := &hpcAC.UploadFileReq{
Path: ALGORITHM_DIR + FORWARD_SLASH + taskType + FORWARD_SLASH + algoName + FORWARD_SLASH,
Cover: "cover",
File: code,
}
_, err := s.aCRpc.UploadFile(ctx, req)
if err != nil {
return err
}
return nil return nil
} }

View File

@ -5676,6 +5676,35 @@ type AiTaskDb struct {
EndTime string `json:"endTime,omitempty" db:"end_time"` EndTime string `json:"endTime,omitempty" db:"end_time"`
} }
type DownloadAlgorithmCodeReq struct {
AdapterId string `form:"adapterId"`
ClusterId string `form:"clusterId"`
ResourceType string `form:"resourceType"`
Card string `form:"card"`
TaskType string `form:"taskType"`
Dataset string `form:"dataset"`
Algorithm string `form:"algorithm"`
Code string `form:"code"`
}
type DownloadAlgorithmCodeResp struct {
Code string `json:"algorithms"`
}
type UploadAlgorithmCodeReq struct {
AdapterId string `json:"adapterId"`
ClusterId string `json:"clusterId"`
ResourceType string `json:"resourceType"`
Card string `json:"card"`
TaskType string `json:"taskType"`
Dataset string `json:"dataset"`
Algorithm string `json:"algorithm"`
Code string `json:"code"`
}
type UploadAlgorithmCodeResp struct {
}
type CreateAlertRuleReq struct { type CreateAlertRuleReq struct {
CLusterId string `json:"clusterId"` CLusterId string `json:"clusterId"`
ClusterName string `json:"clusterName"` ClusterName string `json:"clusterName"`