Merge pull request 'fix aiOverview timeout bugs' (#145) from tzwang/pcm-coordinator:master into master
Former-commit-id: 38786fb3fe33c433b06ae25214c691d8d7600d78
This commit is contained in:
commit
fa2c7b1aa9
|
@ -966,6 +966,12 @@ service pcm {
|
||||||
|
|
||||||
@handler ScheduleGetOverviewHandler
|
@handler ScheduleGetOverviewHandler
|
||||||
post /schedule/getOverview returns (ScheduleOverviewResp)
|
post /schedule/getOverview returns (ScheduleOverviewResp)
|
||||||
|
|
||||||
|
@handler DownloadAlgothmCodeHandler
|
||||||
|
get /schedule/getDownloadAlgothmCode (DownloadAlgorithmCodeReq) returns (DownloadAlgorithmCodeResp)
|
||||||
|
|
||||||
|
@handler UploadAlgothmCodeHandler
|
||||||
|
post /schedule/getDownloadAlgothmCode (UploadAlgorithmCodeReq) returns (UploadAlgorithmCodeResp)
|
||||||
}
|
}
|
||||||
|
|
||||||
@server(
|
@server(
|
||||||
|
|
|
@ -100,4 +100,33 @@ type (
|
||||||
StartTime string `json:"startTime,omitempty" db:"start_time"`
|
StartTime string `json:"startTime,omitempty" db:"start_time"`
|
||||||
EndTime string `json:"endTime,omitempty" db:"end_time"`
|
EndTime string `json:"endTime,omitempty" db:"end_time"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
DownloadAlgorithmCodeReq {
|
||||||
|
AdapterId string `form:"adapterId"`
|
||||||
|
ClusterId string `form:"clusterId"`
|
||||||
|
ResourceType string `form:"resourceType"`
|
||||||
|
Card string `form:"card"`
|
||||||
|
TaskType string `form:"taskType"`
|
||||||
|
Dataset string `form:"dataset"`
|
||||||
|
Algorithm string `form:"algorithm"`
|
||||||
|
Code string `form:"code"`
|
||||||
|
}
|
||||||
|
|
||||||
|
DownloadAlgorithmCodeResp {
|
||||||
|
Code string `json:"algorithms"`
|
||||||
|
}
|
||||||
|
|
||||||
|
UploadAlgorithmCodeReq {
|
||||||
|
AdapterId string `json:"adapterId"`
|
||||||
|
ClusterId string `json:"clusterId"`
|
||||||
|
ResourceType string `json:"resourceType"`
|
||||||
|
Card string `json:"card"`
|
||||||
|
TaskType string `json:"taskType"`
|
||||||
|
Dataset string `json:"dataset"`
|
||||||
|
Algorithm string `json:"algorithm"`
|
||||||
|
Code string `json:"code"`
|
||||||
|
}
|
||||||
|
|
||||||
|
UploadAlgorithmCodeResp {
|
||||||
|
}
|
||||||
)
|
)
|
|
@ -1190,6 +1190,11 @@ func RegisterHandlers(server *rest.Server, serverCtx *svc.ServiceContext) {
|
||||||
Path: "/schedule/ai/getAlgorithms/:adapterId/:resourceType/:taskType/:dataset",
|
Path: "/schedule/ai/getAlgorithms/:adapterId/:resourceType/:taskType/:dataset",
|
||||||
Handler: schedule.ScheduleGetAlgorithmsHandler(serverCtx),
|
Handler: schedule.ScheduleGetAlgorithmsHandler(serverCtx),
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
Method: http.MethodGet,
|
||||||
|
Path: "/schedule/ai/getJobLog/:adapterId/:clusterId/:taskId/:instanceNum",
|
||||||
|
Handler: schedule.ScheduleGetAiJobLogLogHandler(serverCtx),
|
||||||
|
},
|
||||||
{
|
{
|
||||||
Method: http.MethodPost,
|
Method: http.MethodPost,
|
||||||
Path: "/schedule/submit",
|
Path: "/schedule/submit",
|
||||||
|
@ -1200,6 +1205,16 @@ func RegisterHandlers(server *rest.Server, serverCtx *svc.ServiceContext) {
|
||||||
Path: "/schedule/getOverview",
|
Path: "/schedule/getOverview",
|
||||||
Handler: schedule.ScheduleGetOverviewHandler(serverCtx),
|
Handler: schedule.ScheduleGetOverviewHandler(serverCtx),
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
Method: http.MethodGet,
|
||||||
|
Path: "/schedule/getDownloadAlgothmCode",
|
||||||
|
Handler: schedule.DownloadAlgothmCodeHandler(serverCtx),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Method: http.MethodPost,
|
||||||
|
Path: "/schedule/getDownloadAlgothmCode",
|
||||||
|
Handler: schedule.UploadAlgothmCodeHandler(serverCtx),
|
||||||
|
},
|
||||||
},
|
},
|
||||||
rest.WithPrefix("/pcm/v1"),
|
rest.WithPrefix("/pcm/v1"),
|
||||||
)
|
)
|
||||||
|
@ -1294,7 +1309,7 @@ func RegisterHandlers(server *rest.Server, serverCtx *svc.ServiceContext) {
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
Method: http.MethodPost,
|
Method: http.MethodPost,
|
||||||
Path: "/core/syncClusterAlert",
|
Path: "/monitoring/syncClusterAlert",
|
||||||
Handler: monitoring.SyncClusterAlertHandler(serverCtx),
|
Handler: monitoring.SyncClusterAlertHandler(serverCtx),
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
|
@ -8,6 +8,7 @@ import (
|
||||||
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/models"
|
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/models"
|
||||||
"strconv"
|
"strconv"
|
||||||
"sync"
|
"sync"
|
||||||
|
"time"
|
||||||
)
|
)
|
||||||
|
|
||||||
type GetCenterOverviewLogic struct {
|
type GetCenterOverviewLogic struct {
|
||||||
|
@ -71,9 +72,14 @@ func (l *GetCenterOverviewLogic) GetCenterOverview() (resp *types.CenterOverview
|
||||||
}
|
}
|
||||||
resp.CardNum = cardNum
|
resp.CardNum = cardNum
|
||||||
resp.PowerInTops = totalTops
|
resp.PowerInTops = totalTops
|
||||||
<-ch
|
|
||||||
|
|
||||||
return resp, nil
|
select {
|
||||||
|
case _ = <-ch:
|
||||||
|
return resp, nil
|
||||||
|
case <-time.After(2 * time.Second):
|
||||||
|
return resp, nil
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (l *GetCenterOverviewLogic) updateClusterResource(mu *sync.RWMutex, ch chan<- struct{}, list []*types.AdapterInfo) {
|
func (l *GetCenterOverviewLogic) updateClusterResource(mu *sync.RWMutex, ch chan<- struct{}, list []*types.AdapterInfo) {
|
||||||
|
|
|
@ -73,9 +73,14 @@ func (l *GetCenterTaskListLogic) GetCenterTaskList() (resp *types.CenterTaskList
|
||||||
resp.List = append(resp.List, t)
|
resp.List = append(resp.List, t)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
<-ch
|
|
||||||
|
|
||||||
return resp, nil
|
select {
|
||||||
|
case _ = <-ch:
|
||||||
|
return resp, nil
|
||||||
|
case <-time.After(1 * time.Second):
|
||||||
|
return resp, nil
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (l *GetCenterTaskListLogic) updateAiTaskStatus(mu *sync.RWMutex, ch chan<- struct{}, list []*types.AdapterInfo) {
|
func (l *GetCenterTaskListLogic) updateAiTaskStatus(mu *sync.RWMutex, ch chan<- struct{}, list []*types.AdapterInfo) {
|
||||||
|
|
|
@ -8,8 +8,8 @@ type AiCollector interface {
|
||||||
GetAlgorithms(ctx context.Context) ([]*Algorithm, error)
|
GetAlgorithms(ctx context.Context) ([]*Algorithm, error)
|
||||||
GetTrainingTaskLog(ctx context.Context, taskId string, instanceNum string) (string, error)
|
GetTrainingTaskLog(ctx context.Context, taskId string, instanceNum string) (string, error)
|
||||||
GetTrainingTask(ctx context.Context, taskId string) (*Task, error)
|
GetTrainingTask(ctx context.Context, taskId string) (*Task, error)
|
||||||
DownloadAlgorithmCode(ctx context.Context, resourceType string, taskType string, dataset string, algorithm string) (string, error)
|
DownloadAlgorithmCode(ctx context.Context, resourceType string, card string, taskType string, dataset string, algorithm string) (string, error)
|
||||||
UploadAlgorithmCode(ctx context.Context, resourceType string, taskType string, dataset string, algorithm string, code string) error
|
UploadAlgorithmCode(ctx context.Context, resourceType string, card string, taskType string, dataset string, algorithm string, code string) error
|
||||||
}
|
}
|
||||||
|
|
||||||
type ResourceStats struct {
|
type ResourceStats struct {
|
||||||
|
|
|
@ -162,11 +162,11 @@ func (m *ModelArtsLink) GetAlgorithms(ctx context.Context) ([]*collector.Algorit
|
||||||
return nil, nil
|
return nil, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *ModelArtsLink) DownloadAlgorithmCode(ctx context.Context, resourceType string, taskType string, dataset string, algorithm string) (string, error) {
|
func (m *ModelArtsLink) DownloadAlgorithmCode(ctx context.Context, resourceType string, card string, taskType string, dataset string, algorithm string) (string, error) {
|
||||||
return "", nil
|
return "", nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *ModelArtsLink) UploadAlgorithmCode(ctx context.Context, resourceType string, taskType string, dataset string, algorithm string, code string) error {
|
func (m *ModelArtsLink) UploadAlgorithmCode(ctx context.Context, resourceType string, card string, taskType string, dataset string, algorithm string, code string) error {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -339,11 +339,11 @@ func (o *OctopusLink) GetAlgorithms(ctx context.Context) ([]*collector.Algorithm
|
||||||
return algorithms, nil
|
return algorithms, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (o *OctopusLink) DownloadAlgorithmCode(ctx context.Context, resourceType string, taskType string, dataset string, algorithm string) (string, error) {
|
func (o *OctopusLink) DownloadAlgorithmCode(ctx context.Context, resourceType string, card string, taskType string, dataset string, algorithm string) (string, error) {
|
||||||
return "", nil
|
return "", nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (o *OctopusLink) UploadAlgorithmCode(ctx context.Context, resourceType string, taskType string, dataset string, algorithm string, code string) error {
|
func (o *OctopusLink) UploadAlgorithmCode(ctx context.Context, resourceType string, card string, taskType string, dataset string, algorithm string, code string) error {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -447,11 +447,32 @@ func (s *ShuguangAi) GetAlgorithms(ctx context.Context) ([]*collector.Algorithm,
|
||||||
return algorithms, nil
|
return algorithms, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *ShuguangAi) DownloadAlgorithmCode(ctx context.Context, resourceType string, taskType string, dataset string, algorithm string) (string, error) {
|
func (s *ShuguangAi) DownloadAlgorithmCode(ctx context.Context, resourceType string, card string, taskType string, dataset string, algorithm string) (string, error) {
|
||||||
return "", nil
|
algoName := dataset + DASH + algorithm
|
||||||
|
req := &hpcAC.GetFileReq{
|
||||||
|
Path: ALGORITHM_DIR + FORWARD_SLASH + taskType + FORWARD_SLASH + algoName + FORWARD_SLASH + TRAIN_FILE,
|
||||||
|
}
|
||||||
|
resp, err := s.aCRpc.GetFile(ctx, req)
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
|
||||||
|
return resp.Content, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *ShuguangAi) UploadAlgorithmCode(ctx context.Context, resourceType string, taskType string, dataset string, algorithm string, code string) error {
|
func (s *ShuguangAi) UploadAlgorithmCode(ctx context.Context, resourceType string, card string, taskType string, dataset string, algorithm string, code string) error {
|
||||||
|
algoName := dataset + DASH + algorithm
|
||||||
|
req := &hpcAC.UploadFileReq{
|
||||||
|
Path: ALGORITHM_DIR + FORWARD_SLASH + taskType + FORWARD_SLASH + algoName + FORWARD_SLASH,
|
||||||
|
Cover: "cover",
|
||||||
|
File: code,
|
||||||
|
}
|
||||||
|
|
||||||
|
_, err := s.aCRpc.UploadFile(ctx, req)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -5676,6 +5676,35 @@ type AiTaskDb struct {
|
||||||
EndTime string `json:"endTime,omitempty" db:"end_time"`
|
EndTime string `json:"endTime,omitempty" db:"end_time"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type DownloadAlgorithmCodeReq struct {
|
||||||
|
AdapterId string `form:"adapterId"`
|
||||||
|
ClusterId string `form:"clusterId"`
|
||||||
|
ResourceType string `form:"resourceType"`
|
||||||
|
Card string `form:"card"`
|
||||||
|
TaskType string `form:"taskType"`
|
||||||
|
Dataset string `form:"dataset"`
|
||||||
|
Algorithm string `form:"algorithm"`
|
||||||
|
Code string `form:"code"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type DownloadAlgorithmCodeResp struct {
|
||||||
|
Code string `json:"algorithms"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type UploadAlgorithmCodeReq struct {
|
||||||
|
AdapterId string `json:"adapterId"`
|
||||||
|
ClusterId string `json:"clusterId"`
|
||||||
|
ResourceType string `json:"resourceType"`
|
||||||
|
Card string `json:"card"`
|
||||||
|
TaskType string `json:"taskType"`
|
||||||
|
Dataset string `json:"dataset"`
|
||||||
|
Algorithm string `json:"algorithm"`
|
||||||
|
Code string `json:"code"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type UploadAlgorithmCodeResp struct {
|
||||||
|
}
|
||||||
|
|
||||||
type CreateAlertRuleReq struct {
|
type CreateAlertRuleReq struct {
|
||||||
CLusterId string `json:"clusterId"`
|
CLusterId string `json:"clusterId"`
|
||||||
ClusterName string `json:"clusterName"`
|
ClusterName string `json:"clusterName"`
|
||||||
|
|
Loading…
Reference in New Issue