From 7487707f3d63e7baa973f1f430f7e1e7a809e6d1 Mon Sep 17 00:00:00 2001 From: tzwang Date: Fri, 1 Mar 2024 17:07:41 +0800 Subject: [PATCH] modified ai option Former-commit-id: dc33df489a557efeecabb26e01f422b97311212a --- .../scheduler/schedulers/option/aiOption.go | 9 +- api/internal/storeLink/octopus.go | 170 +++++++++--------- api/internal/storeLink/shuguangai.go | 29 ++- 3 files changed, 118 insertions(+), 90 deletions(-) diff --git a/api/internal/scheduler/schedulers/option/aiOption.go b/api/internal/scheduler/schedulers/option/aiOption.go index 72bf9b3c..9024d907 100644 --- a/api/internal/scheduler/schedulers/option/aiOption.go +++ b/api/internal/scheduler/schedulers/option/aiOption.go @@ -2,6 +2,7 @@ package option type AiOption struct { AiClusterId string // shuguangAi /octopus ClusterId + TaskName string ResourceType string // cpu/gpu/compute card TaskType string // pytorch/tensorflow/mindspore DatasetsName string // mnist/imageNet/iris @@ -10,6 +11,7 @@ type AiOption struct { Tops float64 ComputeCard string CodeType string + AlgorithmName string ImageId string SpecId string @@ -22,7 +24,8 @@ type AiOption struct { Envs []string Params []string - Datasets string - Code string - Model interface{} + Datasets string + AlgorithmCode string + Image string + Model interface{} } diff --git a/api/internal/storeLink/octopus.go b/api/internal/storeLink/octopus.go index afdc0037..20d5a138 100644 --- a/api/internal/storeLink/octopus.go +++ b/api/internal/storeLink/octopus.go @@ -351,91 +351,7 @@ func (o *OctopusLink) generateResourceId(option *option.AiOption) error { } } - return nil -} - -func setResourceIdByCard(option *option.AiOption, specs *octopus.GetResourceSpecsResp, computeCard string) error { - if option.Tops == 0 { - for _, spec := range specs.TrainResourceSpecs { - if spec.Price == 1 { - ns := strings.Split(spec.Name, COMMA) - cardSpecs := strings.Split(ns[0], STAR) - if cardSpecs[1] == computeCard { - option.ResourceId = spec.Id - option.ComputeCard = computeCard - return nil - } - } else { - continue - } - } - } else { - cardNum := math.Ceil(option.Tops / float64(BASE_TOPS)) - for _, spec := range specs.TrainResourceSpecs { - if option.Tops < BASE_TOPS { - if spec.Price == 1 { - ns := strings.Split(spec.Name, COMMA) - cardSpecs := strings.Split(ns[0], STAR) - if cardSpecs[1] == computeCard { - option.ResourceId = spec.Id - option.ComputeCard = computeCard - return nil - } - } else { - continue - } - } else { - ns := strings.Split(spec.Name, COMMA) - if len(ns) != 4 { - continue - } - cardSpecs := strings.Split(ns[0], STAR) - if cardSpecs[1] != computeCard { - continue - } - s, err := strconv.ParseFloat(cardSpecs[0], 64) - if err != nil { - return err - } - switch computeCard { - case GCU: - if cardNum == s { // 1, 4, 8 - option.ResourceId = spec.Id - option.ComputeCard = computeCard - return nil - } - if 1 < cardNum && cardNum <= 4 && s == 4 { - option.ResourceId = spec.Id - option.ComputeCard = computeCard - return nil - } - if 4 < cardNum && s == 8 { - option.ResourceId = spec.Id - option.ComputeCard = computeCard - return nil - } - - case MLU: // 1, 2, 4 - if cardNum/2 == s { - option.ResourceId = spec.Id - option.ComputeCard = computeCard - return nil - } - if 1 < cardNum/2 && cardNum/2 <= 2 && s == 2 { - option.ResourceId = spec.Id - option.ComputeCard = computeCard - return nil - } - if 2 < cardNum/2 && s == 4 { - option.ResourceId = spec.Id - option.ComputeCard = computeCard - return nil - } - } - } - } - } - return errors.New("set ResourceId error") + return errors.New("failed to get ResourceId") } func (o *OctopusLink) generateDatasetsId(option *option.AiOption) error { @@ -552,3 +468,87 @@ func (o *OctopusLink) generateParams(option *option.AiOption) error { return nil } + +func setResourceIdByCard(option *option.AiOption, specs *octopus.GetResourceSpecsResp, computeCard string) error { + if option.Tops == 0 { + for _, spec := range specs.TrainResourceSpecs { + if spec.Price == 1 { + ns := strings.Split(spec.Name, COMMA) + cardSpecs := strings.Split(ns[0], STAR) + if cardSpecs[1] == computeCard { + option.ResourceId = spec.Id + option.ComputeCard = computeCard + return nil + } + } else { + continue + } + } + } else { + cardNum := math.Ceil(option.Tops / float64(BASE_TOPS)) + for _, spec := range specs.TrainResourceSpecs { + if option.Tops < BASE_TOPS { + if spec.Price == 1 { + ns := strings.Split(spec.Name, COMMA) + cardSpecs := strings.Split(ns[0], STAR) + if cardSpecs[1] == computeCard { + option.ResourceId = spec.Id + option.ComputeCard = computeCard + return nil + } + } else { + continue + } + } else { + ns := strings.Split(spec.Name, COMMA) + if len(ns) != 4 { + continue + } + cardSpecs := strings.Split(ns[0], STAR) + if cardSpecs[1] != computeCard { + continue + } + s, err := strconv.ParseFloat(cardSpecs[0], 64) + if err != nil { + return err + } + switch computeCard { + case GCU: + if cardNum == s { // 1, 4, 8 + option.ResourceId = spec.Id + option.ComputeCard = computeCard + return nil + } + if 1 < cardNum && cardNum <= 4 && s == 4 { + option.ResourceId = spec.Id + option.ComputeCard = computeCard + return nil + } + if 4 < cardNum && s == 8 { + option.ResourceId = spec.Id + option.ComputeCard = computeCard + return nil + } + + case MLU: // 1, 2, 4 + if cardNum/2 == s { + option.ResourceId = spec.Id + option.ComputeCard = computeCard + return nil + } + if 1 < cardNum/2 && cardNum/2 <= 2 && s == 2 { + option.ResourceId = spec.Id + option.ComputeCard = computeCard + return nil + } + if 2 < cardNum/2 && s == 4 { + option.ResourceId = spec.Id + option.ComputeCard = computeCard + return nil + } + } + } + } + } + return errors.New("set ResourceId error") +} diff --git a/api/internal/storeLink/shuguangai.go b/api/internal/storeLink/shuguangai.go index 3e6af2d3..d97fd96c 100644 --- a/api/internal/storeLink/shuguangai.go +++ b/api/internal/storeLink/shuguangai.go @@ -31,6 +31,7 @@ const ( RAM_SIZE_1G = 1024 // 1G WORKER_NUMBER = 1 DCU = "dcu" + DCU_TOPS = 24.5 PYTORCH = "Pytorch" TASK_PYTORCH_PREFIX = "PytorchTask" TENSORFLOW = "Tensorflow" @@ -345,8 +346,27 @@ func (s *ShuguangAi) GenerateSubmitParams(option *option.AiOption) error { } func (s *ShuguangAi) generateResourceId(option *option.AiOption) error { + if option.ResourceType == "" { + return errors.New("ResourceType not set") + } - return nil + if option.ResourceType == CPU { + option.ResourceId = "WodTB2rJ8SobMgQ1nrtR245jxOrsovFi" + } + + if option.ResourceType == CARD { + if option.Tops == 0 { + option.ResourceId = "WodTB2rJ8SobMgQ1nrtR245jxOrsovFi" + } + + if option.Tops > DCU_TOPS { + option.ResourceId = "jeYBVPwyIALjVYNzHvysh2o5CsBpBLp2" + } + + //Todo add more dcu specs + } + + return errors.New("failed to get ResourceId") } func (s *ShuguangAi) generateImageId(option *option.AiOption) error { @@ -365,7 +385,12 @@ func (s *ShuguangAi) generateImageId(option *option.AiOption) error { if resp.Code != "0" { return errors.New("failed to get imageId") } - return nil + + if option.ResourceType == CPU { + + } + + return errors.New("failed to get ImageId") } func (s *ShuguangAi) generateAlgorithmId(option *option.AiOption) error {