updated startall apis

Former-commit-id: fb95b3126f6f99e0a5de9ca0f12d410b97f1757b
This commit is contained in:
tzwang 2024-08-13 15:39:29 +08:00
parent f78083133b
commit 31d0096029
6 changed files with 88 additions and 6 deletions

View File

@ -4,6 +4,8 @@ import (
"context"
"errors"
"github.com/zeromicro/go-zero/core/logx"
"gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/service/inference"
"gitlink.org.cn/JointCloud/pcm-coordinator/internal/storeLink"
"gitlink.org.cn/JointCloud/pcm-coordinator/internal/svc"
"gitlink.org.cn/JointCloud/pcm-coordinator/internal/types"
"strconv"
@ -34,9 +36,15 @@ func (l *StartAllByDeployTaskIdLogic) StartAllByDeployTaskId(req *types.StartAll
}
for _, ins := range list {
success := l.svcCtx.Scheduler.AiService.InferenceAdapterMap[strconv.FormatInt(ins.AdapterId, 10)][strconv.FormatInt(ins.ClusterId, 10)].StartInferDeployInstance(l.ctx, ins.InstanceId)
if !success {
return nil, errors.New(ins.InstanceName + " start failed")
in, err := l.svcCtx.Scheduler.AiService.InferenceAdapterMap[strconv.FormatInt(ins.AdapterId, 10)][strconv.FormatInt(ins.ClusterId, 10)].GetInferDeployInstance(l.ctx, ins.InstanceId)
if err != nil {
return nil, err
}
if checkStopStatus(in) {
success := l.svcCtx.Scheduler.AiService.InferenceAdapterMap[strconv.FormatInt(ins.AdapterId, 10)][strconv.FormatInt(ins.ClusterId, 10)].StartInferDeployInstance(l.ctx, ins.InstanceId)
if !success {
return nil, errors.New(ins.InstanceName + " start failed")
}
}
}
@ -47,3 +55,31 @@ func (l *StartAllByDeployTaskIdLogic) StartAllByDeployTaskId(req *types.StartAll
return resp, nil
}
func checkStopStatus(in *inference.DeployInstance) bool {
switch in.ClusterType {
case storeLink.TYPE_OCTOPUS:
switch in.Status {
case "stopped":
return true
default:
return false
}
case storeLink.TYPE_MODELARTS:
switch in.Status {
case "stopped":
return true
default:
return false
}
case storeLink.TYPE_SHUGUANGAI:
switch in.Status {
case "Terminated":
return true
default:
return false
}
default:
return false
}
}

View File

@ -3,6 +3,8 @@ package inference
import (
"context"
"errors"
"gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/service/inference"
"gitlink.org.cn/JointCloud/pcm-coordinator/internal/storeLink"
"gitlink.org.cn/JointCloud/pcm-coordinator/internal/svc"
"gitlink.org.cn/JointCloud/pcm-coordinator/internal/types"
"strconv"
@ -35,9 +37,15 @@ func (l *StopAllByDeployTaskIdLogic) StopAllByDeployTaskId(req *types.StopAllByD
}
for _, ins := range list {
success := l.svcCtx.Scheduler.AiService.InferenceAdapterMap[strconv.FormatInt(ins.AdapterId, 10)][strconv.FormatInt(ins.ClusterId, 10)].StopInferDeployInstance(l.ctx, ins.InstanceId)
if !success {
return nil, errors.New(ins.InstanceName + " stop failed")
in, err := l.svcCtx.Scheduler.AiService.InferenceAdapterMap[strconv.FormatInt(ins.AdapterId, 10)][strconv.FormatInt(ins.ClusterId, 10)].GetInferDeployInstance(l.ctx, ins.InstanceId)
if err != nil {
return nil, err
}
if checkStatus(in) {
success := l.svcCtx.Scheduler.AiService.InferenceAdapterMap[strconv.FormatInt(ins.AdapterId, 10)][strconv.FormatInt(ins.ClusterId, 10)].StopInferDeployInstance(l.ctx, ins.InstanceId)
if !success {
return nil, errors.New(ins.InstanceName + " stop failed")
}
}
}
@ -48,3 +56,31 @@ func (l *StopAllByDeployTaskIdLogic) StopAllByDeployTaskId(req *types.StopAllByD
return resp, nil
}
func checkStatus(in *inference.DeployInstance) bool {
switch in.ClusterType {
case storeLink.TYPE_OCTOPUS:
switch in.Status {
case "running":
return true
default:
return false
}
case storeLink.TYPE_MODELARTS:
switch in.Status {
case "running":
return true
default:
return false
}
case storeLink.TYPE_SHUGUANGAI:
switch in.Status {
case "Running":
return true
default:
return false
}
default:
return false
}
}

View File

@ -38,6 +38,7 @@ type DeployInstance struct {
ModelType string
InferCard string
ClusterName string
ClusterType string
Status string
CreatedTime string
}

View File

@ -471,6 +471,7 @@ func (m *ModelArtsLink) GetInferDeployInstanceList(ctx context.Context) ([]*infe
ins.InferCard = "NPU"
ins.ClusterName = m.platform
ins.CreatedTime = string(services.StartTime)
ins.ClusterType = TYPE_MODELARTS
insList = append(insList, ins)
}
@ -525,5 +526,6 @@ func (m *ModelArtsLink) GetInferDeployInstance(ctx context.Context, id string) (
ins.InferCard = "NPU"
ins.ClusterName = m.platform
ins.CreatedTime = string(resp.StartTime)
ins.ClusterType = TYPE_MODELARTS
return ins, nil
}

View File

@ -926,6 +926,7 @@ func (o *OctopusLink) GetInferDeployInstanceList(ctx context.Context) ([]*infere
ins.InstanceId = notebook.Id
ins.ClusterName = o.platform
ins.Status = notebook.Status
ins.ClusterType = TYPE_OCTOPUS
insList = append(insList, ins)
}
return insList, nil
@ -974,5 +975,7 @@ func (o *OctopusLink) GetInferDeployInstance(ctx context.Context, id string) (*i
ins.InstanceId = resp.Payload.Notebook.Id
ins.ClusterName = o.platform
ins.Status = resp.Payload.Notebook.Status
ins.ClusterType = TYPE_OCTOPUS
return ins, nil
}

View File

@ -815,6 +815,8 @@ func (s *ShuguangAi) GetInferDeployInstanceList(ctx context.Context) ([]*inferen
ins.Status = datum.Status
ins.InferCard = DCU
ins.CreatedTime = datum.CreateTime
ins.ClusterType = TYPE_SHUGUANGAI
insList = append(insList, ins)
}
@ -865,5 +867,7 @@ func (s *ShuguangAi) GetInferDeployInstance(ctx context.Context, id string) (*in
ins.Status = resp.Data.Status
ins.InferCard = DCU
ins.CreatedTime = resp.Data.CreateTime
ins.ClusterType = TYPE_SHUGUANGAI
return ins, nil
}