updated deployinstance status files
This commit is contained in:
parent
97d2af6f2d
commit
5153b6a31a
|
@ -16,7 +16,8 @@ package cron
|
|||
|
||||
import (
|
||||
"github.com/zeromicro/go-zero/core/logx"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/service/updater"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/service/utils/stat"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/service/utils/status"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/internal/svc"
|
||||
)
|
||||
|
||||
|
@ -28,8 +29,8 @@ func AddCronGroup(svc *svc.ServiceContext) {
|
|||
logx.Errorf(err.Error())
|
||||
return
|
||||
}
|
||||
updater.UpdateTaskStatus(svc, list)
|
||||
updater.UpdateAiTaskStatus(svc, list)
|
||||
status.UpdateTaskStatus(svc, list)
|
||||
status.UpdateAiTaskStatus(svc, list)
|
||||
})
|
||||
|
||||
svc.Cron.AddFunc("*/5 * * * * ?", func() {
|
||||
|
@ -42,6 +43,6 @@ func AddCronGroup(svc *svc.ServiceContext) {
|
|||
logx.Errorf(err.Error())
|
||||
return
|
||||
}
|
||||
updater.UpdateClusterResources(svc, adapterList)
|
||||
stat.UpdateClusterResources(svc, adapterList)
|
||||
})
|
||||
}
|
||||
|
|
|
@ -3,7 +3,7 @@ package ai
|
|||
import (
|
||||
"context"
|
||||
"github.com/zeromicro/go-zero/core/logx"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/service/updater"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/service/utils/stat"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/internal/svc"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/internal/types"
|
||||
)
|
||||
|
@ -37,7 +37,7 @@ func (l *GetCenterOverviewLogic) GetCenterOverview() (resp *types.CenterOverview
|
|||
centerNum = int32(len(adapterList))
|
||||
resp.CenterNum = centerNum
|
||||
|
||||
go updater.UpdateClusterResources(l.svcCtx, adapterList)
|
||||
go stat.UpdateClusterResources(l.svcCtx, adapterList)
|
||||
|
||||
for _, adapter := range adapterList {
|
||||
taskList, err := l.svcCtx.Scheduler.AiStorages.GetAiTasksByAdapterId(adapter.Id)
|
||||
|
|
|
@ -3,7 +3,7 @@ package ai
|
|||
import (
|
||||
"context"
|
||||
"github.com/zeromicro/go-zero/core/logx"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/service/updater"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/service/utils/status"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/internal/svc"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/internal/types"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/constants"
|
||||
|
@ -32,7 +32,7 @@ func (l *GetCenterTaskListLogic) GetCenterTaskList() (resp *types.CenterTaskList
|
|||
return nil, err
|
||||
}
|
||||
|
||||
go updater.UpdateTrainingTaskStatus(l.svcCtx, adapterList)
|
||||
go status.UpdateTrainingTaskStatus(l.svcCtx, adapterList)
|
||||
|
||||
for _, adapter := range adapterList {
|
||||
taskList, err := l.svcCtx.Scheduler.AiStorages.GetAiTasksByAdapterId(adapter.Id)
|
||||
|
|
|
@ -2,7 +2,7 @@ package core
|
|||
|
||||
import (
|
||||
"context"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/service/updater"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/service/utils/status"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/internal/svc"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/internal/types"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/repository/result"
|
||||
|
@ -52,8 +52,8 @@ func (l *PageListTaskLogic) PageListTask(req *types.PageTaskReq) (resp *types.Pa
|
|||
}
|
||||
|
||||
// 更新智算任务状态
|
||||
go updater.UpdateTaskStatus(l.svcCtx, list)
|
||||
go updater.UpdateAiTaskStatus(l.svcCtx, list)
|
||||
go status.UpdateTaskStatus(l.svcCtx, list)
|
||||
go status.UpdateAiTaskStatus(l.svcCtx, list)
|
||||
|
||||
for _, model := range list {
|
||||
if model.StartTime != "" && model.EndTime == "" {
|
||||
|
|
|
@ -5,7 +5,7 @@ import (
|
|||
"errors"
|
||||
"github.com/zeromicro/go-zero/core/logx"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/common"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/service/updater"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/service/utils/status"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/internal/svc"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/internal/types"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/models"
|
||||
|
@ -71,7 +71,7 @@ func (l *DeployInstanceListLogic) DeployInstanceList(req *types.DeployInstanceLi
|
|||
list := common.ConcatMultipleSlices(slices)
|
||||
|
||||
if len(list) != 0 {
|
||||
go updater.UpdateDeployInstanceStatusBatch(l.svcCtx, list)
|
||||
go status.UpdateDeployInstanceStatusBatch(l.svcCtx, list, true)
|
||||
|
||||
ins := list[0]
|
||||
for i := range list {
|
||||
|
@ -82,8 +82,8 @@ func (l *DeployInstanceListLogic) DeployInstanceList(req *types.DeployInstanceLi
|
|||
}
|
||||
}
|
||||
|
||||
go updater.UpdateDeployInstanceStatus(l.svcCtx, ins, true)
|
||||
go updater.UpdateDeployTaskStatus(l.svcCtx)
|
||||
go status.UpdateDeployInstanceStatus(l.svcCtx, ins, true)
|
||||
go status.UpdateDeployTaskStatus(l.svcCtx)
|
||||
}
|
||||
|
||||
resp.List = &deployTasks
|
||||
|
|
|
@ -5,8 +5,7 @@ import (
|
|||
"errors"
|
||||
"fmt"
|
||||
"github.com/zeromicro/go-zero/core/logx"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/service/inference"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/internal/storeLink"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/service/utils/status"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/internal/svc"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/internal/types"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/models"
|
||||
|
@ -84,7 +83,7 @@ func (l *StartAllByDeployTaskIdLogic) startAll(list []*models.AiInferDeployInsta
|
|||
<-buf
|
||||
return
|
||||
}
|
||||
if checkStopStatus(in) {
|
||||
if status.CheckStopStatus(in) {
|
||||
success := l.svcCtx.Scheduler.AiService.InferenceAdapterMap[strconv.FormatInt(ins.AdapterId, 10)][strconv.FormatInt(ins.ClusterId, 10)].StartInferDeployInstance(l.ctx, ins.InstanceId)
|
||||
if !success {
|
||||
e := struct {
|
||||
|
@ -136,31 +135,3 @@ func (l *StartAllByDeployTaskIdLogic) startAll(list []*models.AiInferDeployInsta
|
|||
|
||||
return nil
|
||||
}
|
||||
|
||||
func checkStopStatus(in *inference.DeployInstance) bool {
|
||||
switch in.ClusterType {
|
||||
case storeLink.TYPE_OCTOPUS:
|
||||
switch in.Status {
|
||||
case "stopped":
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
case storeLink.TYPE_MODELARTS:
|
||||
switch in.Status {
|
||||
case "stopped":
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
case storeLink.TYPE_SHUGUANGAI:
|
||||
switch in.Status {
|
||||
case "Terminated":
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
|
|
@ -4,7 +4,7 @@ import (
|
|||
"context"
|
||||
"errors"
|
||||
"github.com/zeromicro/go-zero/core/logx"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/service/updater"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/service/utils/status"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/internal/svc"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/internal/types"
|
||||
"strconv"
|
||||
|
@ -33,12 +33,16 @@ func (l *StartDeployInstanceListLogic) StartDeployInstanceList(req *types.StartD
|
|||
return nil, err
|
||||
}
|
||||
|
||||
in, err := l.svcCtx.Scheduler.AiService.InferenceAdapterMap[strconv.FormatInt(ins.AdapterId, 10)][strconv.FormatInt(ins.ClusterId, 10)].GetInferDeployInstance(l.ctx, ins.InstanceId)
|
||||
|
||||
if status.CheckStopStatus(in) {
|
||||
success := l.svcCtx.Scheduler.AiService.InferenceAdapterMap[req.AdapterId][req.ClusterId].StartInferDeployInstance(l.ctx, req.InstanceId)
|
||||
if !success {
|
||||
return nil, errors.New("start instance failed")
|
||||
}
|
||||
}
|
||||
|
||||
go updater.UpdateDeployInstanceStatus(l.svcCtx, ins, true)
|
||||
go status.UpdateDeployInstanceStatus(l.svcCtx, ins, true)
|
||||
|
||||
return resp, nil
|
||||
}
|
||||
|
|
|
@ -4,8 +4,7 @@ import (
|
|||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/service/inference"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/internal/storeLink"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/service/utils/status"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/internal/svc"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/internal/types"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/models"
|
||||
|
@ -85,7 +84,7 @@ func (l *StopAllByDeployTaskIdLogic) stopAll(list []*models.AiInferDeployInstanc
|
|||
<-buf
|
||||
return
|
||||
}
|
||||
if checkStatus(in) {
|
||||
if status.CheckRunningStatus(in) {
|
||||
success := l.svcCtx.Scheduler.AiService.InferenceAdapterMap[strconv.FormatInt(ins.AdapterId, 10)][strconv.FormatInt(ins.ClusterId, 10)].StopInferDeployInstance(l.ctx, ins.InstanceId)
|
||||
if !success {
|
||||
e := struct {
|
||||
|
@ -137,31 +136,3 @@ func (l *StopAllByDeployTaskIdLogic) stopAll(list []*models.AiInferDeployInstanc
|
|||
|
||||
return nil
|
||||
}
|
||||
|
||||
func checkStatus(in *inference.DeployInstance) bool {
|
||||
switch in.ClusterType {
|
||||
case storeLink.TYPE_OCTOPUS:
|
||||
switch in.Status {
|
||||
case "running":
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
case storeLink.TYPE_MODELARTS:
|
||||
switch in.Status {
|
||||
case "running":
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
case storeLink.TYPE_SHUGUANGAI:
|
||||
switch in.Status {
|
||||
case "Running":
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
|
|
@ -4,7 +4,7 @@ import (
|
|||
"context"
|
||||
"errors"
|
||||
"github.com/zeromicro/go-zero/core/logx"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/service/updater"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/service/utils/status"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/internal/svc"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/internal/types"
|
||||
"strconv"
|
||||
|
@ -33,12 +33,16 @@ func (l *StopDeployInstanceLogic) StopDeployInstance(req *types.StopDeployInstan
|
|||
return nil, err
|
||||
}
|
||||
|
||||
in, err := l.svcCtx.Scheduler.AiService.InferenceAdapterMap[strconv.FormatInt(ins.AdapterId, 10)][strconv.FormatInt(ins.ClusterId, 10)].GetInferDeployInstance(l.ctx, ins.InstanceId)
|
||||
|
||||
if status.CheckRunningStatus(in) {
|
||||
success := l.svcCtx.Scheduler.AiService.InferenceAdapterMap[req.AdapterId][req.ClusterId].StopInferDeployInstance(l.ctx, req.InstanceId)
|
||||
if !success {
|
||||
return nil, errors.New("stop instance failed")
|
||||
}
|
||||
}
|
||||
|
||||
go updater.UpdateDeployInstanceStatus(l.svcCtx, ins, true)
|
||||
go status.UpdateDeployInstanceStatus(l.svcCtx, ins, true)
|
||||
|
||||
return resp, nil
|
||||
}
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
package updater
|
||||
package stat
|
||||
|
||||
import (
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/internal/svc"
|
|
@ -1,6 +1,7 @@
|
|||
package updater
|
||||
package status
|
||||
|
||||
import (
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/service/inference"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/internal/storeLink"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/internal/svc"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/constants"
|
||||
|
@ -10,14 +11,17 @@ import (
|
|||
"time"
|
||||
)
|
||||
|
||||
func UpdateDeployInstanceStatusBatch(svc *svc.ServiceContext, insList []*models.AiInferDeployInstance) {
|
||||
func UpdateDeployInstanceStatusBatch(svc *svc.ServiceContext, insList []*models.AiInferDeployInstance, needfilter bool) {
|
||||
list := make([]*models.AiInferDeployInstance, len(insList))
|
||||
copy(list, insList)
|
||||
|
||||
if needfilter {
|
||||
for i := len(list) - 1; i >= 0; i-- {
|
||||
if list[i].Status == constants.Running || list[i].Status == constants.Stopped {
|
||||
list = append(list[:i], list[i+1:]...)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if len(list) == 0 {
|
||||
return
|
||||
|
@ -119,3 +123,72 @@ func UpdateDeployInstanceStatus(svc *svc.ServiceContext, instance *models.AiInfe
|
|||
return
|
||||
}
|
||||
}
|
||||
|
||||
func UpdateAutoStoppedInstance(svc *svc.ServiceContext) {
|
||||
list, err := svc.Scheduler.AiStorages.GetInferDeployInstanceList()
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
if len(list) == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
UpdateDeployInstanceStatusBatch(svc, list, false)
|
||||
}
|
||||
|
||||
func CheckStopStatus(in *inference.DeployInstance) bool {
|
||||
switch in.ClusterType {
|
||||
case storeLink.TYPE_OCTOPUS:
|
||||
switch in.Status {
|
||||
case "stopped":
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
case storeLink.TYPE_MODELARTS:
|
||||
switch in.Status {
|
||||
case "stopped":
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
case storeLink.TYPE_SHUGUANGAI:
|
||||
switch in.Status {
|
||||
case "Terminated":
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
func CheckRunningStatus(in *inference.DeployInstance) bool {
|
||||
switch in.ClusterType {
|
||||
case storeLink.TYPE_OCTOPUS:
|
||||
switch in.Status {
|
||||
case "running":
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
case storeLink.TYPE_MODELARTS:
|
||||
switch in.Status {
|
||||
case "running":
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
case storeLink.TYPE_SHUGUANGAI:
|
||||
switch in.Status {
|
||||
case "Running":
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
|
@ -1,4 +1,4 @@
|
|||
package updater
|
||||
package status
|
||||
|
||||
import (
|
||||
"errors"
|
Loading…
Reference in New Issue