From 6f181c313a76fa047c9e4f9a566819442579b37d Mon Sep 17 00:00:00 2001 From: tzwang Date: Thu, 7 Mar 2024 17:28:43 +0800 Subject: [PATCH 1/3] modified octopus functions generateAlgorithmId Former-commit-id: 0440af0e370819df4d58399d89574ac29c62c44f --- api/internal/storeLink/octopus.go | 39 ++++++++++++++++++++++++------- 1 file changed, 30 insertions(+), 9 deletions(-) diff --git a/api/internal/storeLink/octopus.go b/api/internal/storeLink/octopus.go index 5851f0d1..8060ca05 100644 --- a/api/internal/storeLink/octopus.go +++ b/api/internal/storeLink/octopus.go @@ -341,11 +341,10 @@ func (o *OctopusLink) generateResourceId(option *option.AiOption) error { return nil } } - } if option.ResourceType == CARD { - err = setResourceIdByCard(option, specResp, MLU) + err = setResourceIdByCard(option, specResp, GCU) if err != nil { return err } @@ -418,16 +417,23 @@ func (o *OctopusLink) generateImageId(option *option.AiOption) error { if !preImgResp.Success { return errors.New("failed to get PresetImages") } - for _, image := range preImgResp.Payload.Images { - if strings.Contains(image.ImageName, option.TaskType) && strings.Contains(image.ImageName, cardAliasMap[option.ComputeCard]) { - option.ImageId = image.Id - return nil + + if option.ResourceType == CARD { + for _, image := range preImgResp.Payload.Images { + if strings.Contains(image.ImageName, cardAliasMap[option.ComputeCard]) { + option.ImageId = image.Id + return nil + } } } + return errors.New("failed to get ImageId") } func (o *OctopusLink) generateAlgorithmId(option *option.AiOption) error { + // temporarily set algorithm to cnn + option.AlgorithmName = "cnn" + req := &octopus.GetMyAlgorithmListReq{ Platform: o.platform, PageIndex: o.pageIndex, @@ -442,18 +448,33 @@ func (o *OctopusLink) generateAlgorithmId(option *option.AiOption) error { } for _, algorithm := range resp.Payload.Algorithms { - if algorithm.FrameworkName == strings.Title(option.TaskType) && strings.Contains(algorithm.AlgorithmName, option.DatasetsName) { + if algorithm.FrameworkName == strings.Title(option.TaskType) { + ns := strings.Split(algorithm.AlgorithmName, DASH) + if ns[0] != option.DatasetsName { + continue + } + if ns[1] != option.AlgorithmName { + continue + } + if ns[2] != option.ResourceType { + continue + } option.AlgorithmId = algorithm.AlgorithmId return nil } } - return nil + return errors.New("failed to get AlgorithmId") } func (o *OctopusLink) generateCmd(option *option.AiOption) error { if option.Cmd == "" { - option.Cmd = TRAIN_CMD + switch option.ComputeCard { + case GCU: + option.Cmd = "cd /code; python3 train.py" + default: + option.Cmd = TRAIN_CMD + } } return nil From 2f9462965ebcca7695b8952eb801d8b9291b108b Mon Sep 17 00:00:00 2001 From: tzwang Date: Tue, 12 Mar 2024 17:47:00 +0800 Subject: [PATCH 2/3] modified submit shuguangai implementation Former-commit-id: 840111b6bb589bec04417f70e3d943f2fa999025 --- api/internal/storeLink/shuguangai.go | 30 ++++++++++++++++++++++++++-- api/internal/storeLink/storeLink.go | 1 + 2 files changed, 29 insertions(+), 2 deletions(-) diff --git a/api/internal/storeLink/shuguangai.go b/api/internal/storeLink/shuguangai.go index 242d07ec..36c9311b 100644 --- a/api/internal/storeLink/shuguangai.go +++ b/api/internal/storeLink/shuguangai.go @@ -352,15 +352,18 @@ func (s *ShuguangAi) generateResourceId(option *option.AiOption) error { if option.ResourceType == CPU { option.ResourceId = "WodTB2rJ8SobMgQ1nrtR245jxOrsovFi" + return nil } if option.ResourceType == CARD { if option.Tops == 0 { option.ResourceId = "WodTB2rJ8SobMgQ1nrtR245jxOrsovFi" + return nil } if option.Tops > DCU_TOPS { option.ResourceId = "jeYBVPwyIALjVYNzHvysh2o5CsBpBLp2" + return nil } //Todo add more dcu specs @@ -386,7 +389,12 @@ func (s *ShuguangAi) generateImageId(option *option.AiOption) error { return errors.New("failed to get imageId") } - if option.ResourceType == CPU { + for _, datum := range resp.Data { + ns := strings.Split(datum.Version, COLON) + if ns[0] == "jupyterlab-pytorch" { + option.ImageId = datum.ImageId + return nil + } } @@ -412,6 +420,7 @@ func (s *ShuguangAi) generateAlgorithmId(option *option.AiOption) error { if ns[0] == option.DatasetsName { algorithmId = option.TaskType + DASH + file.Name option.AlgorithmId = algorithmId + option.AlgorithmName = ns[1] return nil } } @@ -430,6 +439,23 @@ func (s *ShuguangAi) generateEnv(option *option.AiOption) error { } func (s *ShuguangAi) generateParams(option *option.AiOption) error { + if option.ResourceType == "" { + return errors.New("ResourceType not set") + } - return nil + epoch := "epoch" + COMMA + "1" + option.Params = append(option.Params, epoch) + + switch option.ResourceType { + case CPU: + card := "card" + COMMA + CPU + option.Params = append(option.Params, card) + return nil + case GPU: + card := "card" + COMMA + "cuda:0" + option.Params = append(option.Params, card) + return nil + } + + return errors.New("failed to set params") } diff --git a/api/internal/storeLink/storeLink.go b/api/internal/storeLink/storeLink.go index 3693d58c..c77d3df2 100644 --- a/api/internal/storeLink/storeLink.go +++ b/api/internal/storeLink/storeLink.go @@ -40,6 +40,7 @@ type Linkage interface { } const ( + COLON = ":" PY_PARAM_PREFIX = "--" SPACE = " " UNDERSCORE = "_" From a32ff30ff082082d2d6e433abd163e3d1dcfaf12 Mon Sep 17 00:00:00 2001 From: tzwang Date: Wed, 13 Mar 2024 17:33:09 +0800 Subject: [PATCH 3/3] modified aitask submit options Former-commit-id: 1eb8398a899ff5bb1129dfdf6b2559cf1ec0130c --- api/internal/storeLink/octopus.go | 7 ++--- api/internal/storeLink/shuguangai.go | 38 +++++++++++++++++----------- 2 files changed, 27 insertions(+), 18 deletions(-) diff --git a/api/internal/storeLink/octopus.go b/api/internal/storeLink/octopus.go index 8060ca05..7dc97026 100644 --- a/api/internal/storeLink/octopus.go +++ b/api/internal/storeLink/octopus.go @@ -46,7 +46,8 @@ const ( SAILINGSI = "sailingsi" MLU = "MLU" CAMBRICONMLU290 = 256 - GCU = "enflame" + GCU = "GCU" + ENFLAME = "enflame" EnflameT20 = 128 BASE_TOPS = 128 CAMBRICON = "cambricon" @@ -57,7 +58,7 @@ const ( var ( cardAliasMap = map[string]string{ MLU: CAMBRICON, - GCU: GCU, + GCU: ENFLAME, } ) @@ -449,7 +450,7 @@ func (o *OctopusLink) generateAlgorithmId(option *option.AiOption) error { for _, algorithm := range resp.Payload.Algorithms { if algorithm.FrameworkName == strings.Title(option.TaskType) { - ns := strings.Split(algorithm.AlgorithmName, DASH) + ns := strings.Split(algorithm.AlgorithmName, UNDERSCORE) if ns[0] != option.DatasetsName { continue } diff --git a/api/internal/storeLink/shuguangai.go b/api/internal/storeLink/shuguangai.go index 36c9311b..1bd86443 100644 --- a/api/internal/storeLink/shuguangai.go +++ b/api/internal/storeLink/shuguangai.go @@ -57,17 +57,17 @@ var RESOURCESGAIMAP = map[string]ResourceSpecSGAI{ }, "OBtVaaXAv9n9FbLR7pWAoa3yR13jXwNc": { CPU: 2, - GPU: 1, + GPU: 3, RAM: 4 * RAM_SIZE_1G, }, "sBWfpkntUzsWYly11kdwEHZOYYIsFmve": { - CPU: 5, - GPU: 1, - RAM: 10 * RAM_SIZE_1G, + CPU: 4, + GPU: 4, + RAM: 8 * RAM_SIZE_1G, }, "jeYBVPwyIALjVYNzHvysh2o5CsBpBLp2": { CPU: 5, - GPU: 2, + GPU: 5, RAM: 10 * RAM_SIZE_1G, }, } @@ -75,9 +75,9 @@ var RESOURCESGAIMAP = map[string]ResourceSpecSGAI{ var RESOURCESPECSAI = map[string]string{ "WodTB2rJ8SobMgQ1nrtR245jxOrsovFi": "CPU:1, DCU:1, RAM:2G", "6d41v1XV53MQPmQOJ5kNatIck9yl8nWZ": "CPU:1, DCU:2, RAM:2G", - "OBtVaaXAv9n9FbLR7pWAoa3yR13jXwNc": "CPU:2, DCU:1, RAM:4G", - "sBWfpkntUzsWYly11kdwEHZOYYIsFmve": "CPU:5, DCU:1, RAM:10G", - "jeYBVPwyIALjVYNzHvysh2o5CsBpBLp2": "CPU:5, DCU:2, RAM:10G", + "OBtVaaXAv9n9FbLR7pWAoa3yR13jXwNc": "CPU:2, DCU:3, RAM:4G", + "sBWfpkntUzsWYly11kdwEHZOYYIsFmve": "CPU:4, DCU:4, RAM:8G", + "jeYBVPwyIALjVYNzHvysh2o5CsBpBLp2": "CPU:5, DCU:5, RAM:10G", } type ResourceSpecSGAI struct { @@ -356,17 +356,25 @@ func (s *ShuguangAi) generateResourceId(option *option.AiOption) error { } if option.ResourceType == CARD { - if option.Tops == 0 { + if 0 <= option.Tops && option.Tops <= DCU_TOPS { option.ResourceId = "WodTB2rJ8SobMgQ1nrtR245jxOrsovFi" return nil } - if option.Tops > DCU_TOPS { + cardNum := 5 + for k, v := range RESOURCESGAIMAP { + for i := 1; i <= cardNum; i++ { + if float64(i)*DCU_TOPS <= option.Tops && option.Tops <= float64(v.GPU)*DCU_TOPS { + option.ResourceId = k + return nil + } + } + } + + if option.Tops > float64(cardNum)*DCU_TOPS { option.ResourceId = "jeYBVPwyIALjVYNzHvysh2o5CsBpBLp2" return nil } - - //Todo add more dcu specs } return errors.New("failed to get ResourceId") @@ -443,15 +451,15 @@ func (s *ShuguangAi) generateParams(option *option.AiOption) error { return errors.New("ResourceType not set") } - epoch := "epoch" + COMMA + "1" - option.Params = append(option.Params, epoch) + //epoch := "epoch" + COMMA + "1" + //option.Params = append(option.Params, epoch) switch option.ResourceType { case CPU: card := "card" + COMMA + CPU option.Params = append(option.Params, card) return nil - case GPU: + case CARD: card := "card" + COMMA + "cuda:0" option.Params = append(option.Params, card) return nil