Merge pull request 'fix aitask status' (#174) from tzwang/pcm-coordinator:master into master

Former-commit-id: a67aa626fc6d38b2670a6062beb55bbfd9653916
This commit is contained in:
tzwang 2024-05-14 17:51:35 +08:00
commit 8f5d161236
3 changed files with 28 additions and 8 deletions

View File

@ -122,6 +122,11 @@ func (l *PageListTaskLogic) updateAitaskStatus(tasks []*types.TaskModel, ch chan
break
}
if a.Status == constants.Pending {
status = a.Status
continue
}
if a.Status == constants.Running {
status = a.Status
continue

View File

@ -508,6 +508,8 @@ func (o *OctopusLink) GetTrainingTask(ctx context.Context, taskId string) (*coll
task.Status = constants.Running
case "stopped":
task.Status = constants.Stopped
case "pending":
task.Status = constants.Pending
default:
task.Status = "undefined"
}
@ -585,7 +587,7 @@ func (o *OctopusLink) generateResourceId(ctx context.Context, option *option.AiO
}
if option.ResourceType == CARD {
err = setResourceIdByCard(option, specResp, GCU)
err = setResourceIdByCard(option, specResp, option.ComputeCard)
if err != nil {
return err
}
@ -662,10 +664,23 @@ func (o *OctopusLink) generateImageId(ctx context.Context, option *option.AiOpti
if option.ResourceType == CARD {
for _, image := range preImgResp.Payload.Images {
if strings.Contains(image.ImageName, cardAliasMap[option.ComputeCard]) {
if strings.Contains(image.ImageName, cardAliasMap[strings.ToUpper(option.ComputeCard)]) {
switch strings.ToUpper(option.ComputeCard) {
case GCU:
if strings.HasPrefix(image.ImageVersion, "t20_") {
option.ImageId = image.Id
return nil
}
case BIV100:
if strings.HasPrefix(image.ImageVersion, "bi_") {
option.ImageId = image.Id
return nil
}
case MLU:
option.ImageId = image.Id
return nil
}
}
}
}
@ -750,7 +765,7 @@ func setResourceIdByCard(option *option.AiOption, specs *octopus.GetResourceSpec
if spec.Price == 1 {
ns := strings.Split(spec.Name, COMMA)
cardSpecs := strings.Split(ns[0], STAR)
if cardSpecs[1] == cardCnMap[computeCard] {
if cardSpecs[1] == cardCnMap[strings.ToUpper(computeCard)] {
option.ResourceId = spec.Id
option.ComputeCard = computeCard
return nil
@ -766,7 +781,7 @@ func setResourceIdByCard(option *option.AiOption, specs *octopus.GetResourceSpec
if spec.Price == 1 {
ns := strings.Split(spec.Name, COMMA)
cardSpecs := strings.Split(ns[0], STAR)
if cardSpecs[1] == cardCnMap[computeCard] {
if cardSpecs[1] == cardCnMap[strings.ToUpper(computeCard)] {
option.ResourceId = spec.Id
option.ComputeCard = computeCard
return nil
@ -780,7 +795,7 @@ func setResourceIdByCard(option *option.AiOption, specs *octopus.GetResourceSpec
continue
}
cardSpecs := strings.Split(ns[0], STAR)
if cardSpecs[1] != cardCnMap[computeCard] {
if cardSpecs[1] != cardCnMap[strings.ToUpper(computeCard)] {
continue
}
s, err := strconv.ParseFloat(cardSpecs[0], 64)

View File

@ -83,7 +83,7 @@ var (
"4": SHUGUANGHPC,
}
resourceTypes = []string{CPU, CARD}
taskTypes = []string{PYTORCH_TASK, TENSORFLOW_TASK}
taskTypes = []string{PYTORCH_TASK}
ERROR_RESP_EMPTY = errors.New("resp empty error")
ERROR_CONVERT_EMPTY = errors.New("convert empty error")