From 1a2e4b031acbb6b2bd527f64803e1c27662f4704 Mon Sep 17 00:00:00 2001 From: jagger Date: Mon, 20 May 2024 21:13:06 +0800 Subject: [PATCH] fix bug Signed-off-by: jagger Former-commit-id: b2946b0f6946b3c587f6b1f3ced3068cc005459c --- api/internal/storeLink/modelarts.go | 56 +++++++++++++++++++++++++++-- 1 file changed, 54 insertions(+), 2 deletions(-) diff --git a/api/internal/storeLink/modelarts.go b/api/internal/storeLink/modelarts.go index c91df822..d19b0b82 100644 --- a/api/internal/storeLink/modelarts.go +++ b/api/internal/storeLink/modelarts.go @@ -19,12 +19,14 @@ import ( "github.com/pkg/errors" "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/schedulers/option" "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/service/collector" + "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/constants" "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/utils" "gitlink.org.cn/JointCloud/pcm-modelarts/client/imagesservice" "gitlink.org.cn/JointCloud/pcm-modelarts/client/modelartsservice" "gitlink.org.cn/JointCloud/pcm-modelarts/modelarts" "strconv" "strings" + "time" ) const ( @@ -207,11 +209,61 @@ func (m *ModelArtsLink) UploadAlgorithmCode(ctx context.Context, resourceType st } func (m *ModelArtsLink) GetTrainingTaskLog(ctx context.Context, taskId string, instanceNum string) (string, error) { - return "", nil + req := &modelartsservice.GetTrainingJobLogsPreviewReq{ + Platform: m.platform, + TaskId: "worker-0", + TrainingJobId: taskId, + } + resp, err := m.modelArtsRpc.GetTrainingJobLogsPreview(ctx, req) + if err != nil { + return "", err + } + + if strings.Contains(resp.Content, "404 Not Found") { + resp.Content = "waiting for logs..." + } + return resp.Content, nil } func (m *ModelArtsLink) GetTrainingTask(ctx context.Context, taskId string) (*collector.Task, error) { - return nil, nil + resp, err := m.QueryTask(ctx, taskId) + if err != nil { + return nil, err + } + jobresp, ok := (resp).(*modelartsservice.JobResponse) + if jobresp.ErrorMsg != "" || !ok { + if jobresp.ErrorMsg != "" { + return nil, errors.New(jobresp.ErrorMsg) + } else { + return nil, errors.New("get training task failed, empty error returned") + } + } + var task collector.Task + task.Id = jobresp.Metadata.Id + + switch strings.ToLower(jobresp.Status.Phase) { + case "completed": + task.Start = time.Unix(int64(jobresp.Status.StartTime)/1000, 0).Format(constants.Layout) + duration := jobresp.Status.Duration + task.End = time.Unix(int64(jobresp.Status.StartTime)/1000+int64(duration/1000), 0).Format(constants.Layout) + task.Status = constants.Completed + case "failed": + task.Status = constants.Failed + case "running": + task.Start = time.Unix(int64(jobresp.Status.StartTime)/1000, 0).Format(constants.Layout) + task.Status = constants.Running + case "stopped": + task.Status = constants.Stopped + case "pending": + task.Status = constants.Pending + case "terminated": + //TODO Failed + task.Status = constants.Failed + default: + task.Status = "undefined" + } + + return &task, nil } func (m *ModelArtsLink) Execute(ctx context.Context, option *option.AiOption) (interface{}, error) {