modified shuguangai and octopus task submit options for empty algorithmname

Former-commit-id: a5e9379e51b049114753623f610c61b2afe9a312
This commit is contained in:
tzwang 2024-03-14 17:19:08 +08:00
parent 9e3188afe6
commit f2cf784a7d
2 changed files with 59 additions and 12 deletions

View File

@ -349,6 +349,7 @@ func (o *OctopusLink) generateResourceId(option *option.AiOption) error {
if err != nil {
return err
}
return nil
}
return errors.New("failed to get ResourceId")
@ -433,7 +434,14 @@ func (o *OctopusLink) generateImageId(option *option.AiOption) error {
func (o *OctopusLink) generateAlgorithmId(option *option.AiOption) error {
// temporarily set algorithm to cnn
if option.AlgorithmName == "" {
switch option.DatasetsName {
case "cifar10":
option.AlgorithmName = "cnn"
case "mnist":
option.AlgorithmName = "fcn"
}
}
req := &octopus.GetMyAlgorithmListReq{
Platform: o.platform,
@ -457,14 +465,26 @@ func (o *OctopusLink) generateAlgorithmId(option *option.AiOption) error {
if ns[1] != option.AlgorithmName {
continue
}
if ns[2] != option.ResourceType {
switch option.ResourceType {
case CPU:
if ns[2] != CPU {
continue
}
case CARD:
if ns[2] != strings.ToLower(option.ComputeCard) {
continue
}
}
option.AlgorithmId = algorithm.AlgorithmId
return nil
}
}
if option.AlgorithmId == "" {
return errors.New("Algorithm does not exist")
}
return errors.New("failed to get AlgorithmId")
}
@ -487,7 +507,10 @@ func (o *OctopusLink) generateEnv(option *option.AiOption) error {
}
func (o *OctopusLink) generateParams(option *option.AiOption) error {
if len(option.Params) == 0 {
epoch := "epoch" + COMMA + "1"
option.Params = append(option.Params, epoch)
}
return nil
}

View File

@ -197,9 +197,9 @@ func (s *ShuguangAi) SubmitTensorflowTask(imageId string, cmd string, envs []str
}
func (s *ShuguangAi) SubmitTask(imageId string, cmd string, envs []string, params []string, resourceId string, datasetsId string, algorithmId string, aiType string) (interface{}, error) {
// set algorithmId temporarily
// set algorithmId temporarily for storelink submit
if algorithmId == "" {
algorithmId = "pytorch-mnist-fully_connected_network"
algorithmId = "pytorch-mnist-fcn"
}
// shuguangAi提交任务
@ -413,6 +413,7 @@ func (s *ShuguangAi) generateAlgorithmId(option *option.AiOption) error {
if option.DatasetsName == "" {
return errors.New("DatasetsName not set")
}
req := &hpcAC.GetFileListReq{Limit: 100, Path: ALGORITHM_DIR + FORWARD_SLASH + option.TaskType, Start: 0}
list, err := s.svcCtx.ACRpc.GetFileList(s.ctx, req)
if err != nil {
@ -426,11 +427,32 @@ func (s *ShuguangAi) generateAlgorithmId(option *option.AiOption) error {
for _, file := range list.Data.FileList {
ns := strings.Split(file.Name, DASH)
if ns[0] == option.DatasetsName {
algorithmId = option.TaskType + DASH + file.Name
algoName := ns[1]
if option.AlgorithmName == "" {
switch option.DatasetsName {
case "cifar10":
algorithmId = option.TaskType + DASH + option.DatasetsName + DASH + "cnn"
option.AlgorithmId = algorithmId
option.AlgorithmName = ns[1]
option.AlgorithmName = algoName
return nil
case "mnist":
algorithmId = option.TaskType + DASH + option.DatasetsName + DASH + "fcn"
option.AlgorithmId = algorithmId
option.AlgorithmName = algoName
return nil
}
} else {
if algoName == option.AlgorithmName {
algorithmId = option.TaskType + DASH + option.DatasetsName + DASH + algoName
option.AlgorithmId = algorithmId
return nil
}
}
}
}
if algorithmId == "" {
return errors.New("Algorithm does not exist")
}
return errors.New("failed to get AlgorithmId")
@ -451,8 +473,10 @@ func (s *ShuguangAi) generateParams(option *option.AiOption) error {
return errors.New("ResourceType not set")
}
//epoch := "epoch" + COMMA + "1"
//option.Params = append(option.Params, epoch)
if len(option.Params) == 0 {
epoch := "epoch" + COMMA + "1"
option.Params = append(option.Params, epoch)
}
switch option.ResourceType {
case CPU: