diff --git a/internal/cron/cron.go b/internal/cron/cron.go index d143333e..726057d7 100644 --- a/internal/cron/cron.go +++ b/internal/cron/cron.go @@ -45,4 +45,8 @@ func AddCronGroup(svc *svc.ServiceContext) { } stat.UpdateClusterResources(svc, adapterList) }) + + svc.Cron.AddFunc("*/30 * * * * ?", func() { + status.UpdateAutoStoppedInstance(svc) + }) } diff --git a/internal/logic/inference/deployinstancelistlogic.go b/internal/logic/inference/deployinstancelistlogic.go index b6a4b280..794673cf 100644 --- a/internal/logic/inference/deployinstancelistlogic.go +++ b/internal/logic/inference/deployinstancelistlogic.go @@ -82,7 +82,7 @@ func (l *DeployInstanceListLogic) DeployInstanceList(req *types.DeployInstanceLi } } - go status.UpdateDeployInstanceStatus(l.svcCtx, ins, true) + go status.UpdateDeployInstanceStatus(l.svcCtx, ins, true, nil) go status.UpdateDeployTaskStatus(l.svcCtx) } diff --git a/internal/logic/inference/startdeployinstancelistlogic.go b/internal/logic/inference/startdeployinstancelistlogic.go index db13d6cb..bcdb18a0 100644 --- a/internal/logic/inference/startdeployinstancelistlogic.go +++ b/internal/logic/inference/startdeployinstancelistlogic.go @@ -34,15 +34,18 @@ func (l *StartDeployInstanceListLogic) StartDeployInstanceList(req *types.StartD } in, err := l.svcCtx.Scheduler.AiService.InferenceAdapterMap[strconv.FormatInt(ins.AdapterId, 10)][strconv.FormatInt(ins.ClusterId, 10)].GetInferDeployInstance(l.ctx, ins.InstanceId) + if err != nil { + return nil, err + } if status.CheckStopStatus(in) { - success := l.svcCtx.Scheduler.AiService.InferenceAdapterMap[req.AdapterId][req.ClusterId].StartInferDeployInstance(l.ctx, req.InstanceId) + success := l.svcCtx.Scheduler.AiService.InferenceAdapterMap[req.AdapterId][req.ClusterId].StartInferDeployInstance(l.ctx, in.InstanceId) if !success { return nil, errors.New("start instance failed") } } - go status.UpdateDeployInstanceStatus(l.svcCtx, ins, true) + go status.UpdateDeployInstanceStatus(l.svcCtx, ins, true, nil) return resp, nil } diff --git a/internal/logic/inference/stopdeployinstancelogic.go b/internal/logic/inference/stopdeployinstancelogic.go index 6678d6c1..3255a7dc 100644 --- a/internal/logic/inference/stopdeployinstancelogic.go +++ b/internal/logic/inference/stopdeployinstancelogic.go @@ -34,15 +34,18 @@ func (l *StopDeployInstanceLogic) StopDeployInstance(req *types.StopDeployInstan } in, err := l.svcCtx.Scheduler.AiService.InferenceAdapterMap[strconv.FormatInt(ins.AdapterId, 10)][strconv.FormatInt(ins.ClusterId, 10)].GetInferDeployInstance(l.ctx, ins.InstanceId) + if err != nil { + return nil, err + } if status.CheckRunningStatus(in) { - success := l.svcCtx.Scheduler.AiService.InferenceAdapterMap[req.AdapterId][req.ClusterId].StopInferDeployInstance(l.ctx, req.InstanceId) + success := l.svcCtx.Scheduler.AiService.InferenceAdapterMap[req.AdapterId][req.ClusterId].StopInferDeployInstance(l.ctx, in.InstanceId) if !success { return nil, errors.New("stop instance failed") } } - go status.UpdateDeployInstanceStatus(l.svcCtx, ins, true) + go status.UpdateDeployInstanceStatus(l.svcCtx, ins, true, nil) return resp, nil } diff --git a/internal/scheduler/service/utils/status/deployInstance.go b/internal/scheduler/service/utils/status/deployInstance.go index 40b69d48..f82e66dd 100644 --- a/internal/scheduler/service/utils/status/deployInstance.go +++ b/internal/scheduler/service/utils/status/deployInstance.go @@ -27,8 +27,10 @@ func UpdateDeployInstanceStatusBatch(svc *svc.ServiceContext, insList []*models. return } + buffer := make(chan bool, 3) for _, instance := range list { - go UpdateDeployInstanceStatus(svc, instance, false) + buffer <- true + go UpdateDeployInstanceStatus(svc, instance, false, buffer) } } @@ -51,23 +53,37 @@ func UpdateDeployTaskStatus(svc *svc.ServiceContext) { return } + buffer := make(chan bool, 2) for _, instance := range inslist { - go UpdateDeployInstanceStatus(svc, instance, false) + buffer <- true + go UpdateDeployInstanceStatus(svc, instance, false, buffer) } } -func UpdateDeployInstanceStatus(svc *svc.ServiceContext, instance *models.AiInferDeployInstance, updatetime bool) { +func UpdateDeployInstanceStatus(svc *svc.ServiceContext, instance *models.AiInferDeployInstance, updatetime bool, ch chan bool) { amap, found := svc.Scheduler.AiService.InferenceAdapterMap[strconv.FormatInt(instance.AdapterId, 10)] if !found { + if ch != nil { + <-ch + return + } return } cmap, found := amap[strconv.FormatInt(instance.ClusterId, 10)] if !found { + if ch != nil { + <-ch + return + } return } h := http.Request{} ins, err := cmap.GetInferDeployInstance(h.Context(), instance.InstanceId) if err != nil { + if ch != nil { + <-ch + return + } return } switch instance.ClusterType { @@ -75,11 +91,19 @@ func UpdateDeployInstanceStatus(svc *svc.ServiceContext, instance *models.AiInfe switch ins.Status { case "running": if instance.Status == constants.Running { + if ch != nil { + <-ch + return + } return } instance.Status = constants.Running case "stopped": if instance.Status == constants.Stopped { + if ch != nil { + <-ch + return + } return } instance.Status = constants.Stopped @@ -90,11 +114,19 @@ func UpdateDeployInstanceStatus(svc *svc.ServiceContext, instance *models.AiInfe switch ins.Status { case "running": if instance.Status == constants.Running { + if ch != nil { + <-ch + return + } return } instance.Status = constants.Running case "stopped": if instance.Status == constants.Stopped { + if ch != nil { + <-ch + return + } return } instance.Status = constants.Stopped @@ -105,11 +137,19 @@ func UpdateDeployInstanceStatus(svc *svc.ServiceContext, instance *models.AiInfe switch ins.Status { case "Running": if instance.Status == constants.Running { + if ch != nil { + <-ch + return + } return } instance.Status = constants.Running case "Terminated": if instance.Status == constants.Stopped { + if ch != nil { + <-ch + return + } return } instance.Status = constants.Stopped @@ -120,6 +160,15 @@ func UpdateDeployInstanceStatus(svc *svc.ServiceContext, instance *models.AiInfe err = svc.Scheduler.AiStorages.UpdateInferDeployInstance(instance, updatetime) if err != nil { + if ch != nil { + <-ch + return + } + return + } + + if ch != nil { + <-ch return } }