Merge pull request 'fix aitask status' (#174) from tzwang/pcm-coordinator:master into master
Former-commit-id: a67aa626fc6d38b2670a6062beb55bbfd9653916
This commit is contained in:
commit
8f5d161236
|
@ -122,6 +122,11 @@ func (l *PageListTaskLogic) updateAitaskStatus(tasks []*types.TaskModel, ch chan
|
|||
break
|
||||
}
|
||||
|
||||
if a.Status == constants.Pending {
|
||||
status = a.Status
|
||||
continue
|
||||
}
|
||||
|
||||
if a.Status == constants.Running {
|
||||
status = a.Status
|
||||
continue
|
||||
|
|
|
@ -508,6 +508,8 @@ func (o *OctopusLink) GetTrainingTask(ctx context.Context, taskId string) (*coll
|
|||
task.Status = constants.Running
|
||||
case "stopped":
|
||||
task.Status = constants.Stopped
|
||||
case "pending":
|
||||
task.Status = constants.Pending
|
||||
default:
|
||||
task.Status = "undefined"
|
||||
}
|
||||
|
@ -585,7 +587,7 @@ func (o *OctopusLink) generateResourceId(ctx context.Context, option *option.AiO
|
|||
}
|
||||
|
||||
if option.ResourceType == CARD {
|
||||
err = setResourceIdByCard(option, specResp, GCU)
|
||||
err = setResourceIdByCard(option, specResp, option.ComputeCard)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
@ -662,9 +664,22 @@ func (o *OctopusLink) generateImageId(ctx context.Context, option *option.AiOpti
|
|||
|
||||
if option.ResourceType == CARD {
|
||||
for _, image := range preImgResp.Payload.Images {
|
||||
if strings.Contains(image.ImageName, cardAliasMap[option.ComputeCard]) {
|
||||
option.ImageId = image.Id
|
||||
return nil
|
||||
if strings.Contains(image.ImageName, cardAliasMap[strings.ToUpper(option.ComputeCard)]) {
|
||||
switch strings.ToUpper(option.ComputeCard) {
|
||||
case GCU:
|
||||
if strings.HasPrefix(image.ImageVersion, "t20_") {
|
||||
option.ImageId = image.Id
|
||||
return nil
|
||||
}
|
||||
case BIV100:
|
||||
if strings.HasPrefix(image.ImageVersion, "bi_") {
|
||||
option.ImageId = image.Id
|
||||
return nil
|
||||
}
|
||||
case MLU:
|
||||
option.ImageId = image.Id
|
||||
return nil
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -750,7 +765,7 @@ func setResourceIdByCard(option *option.AiOption, specs *octopus.GetResourceSpec
|
|||
if spec.Price == 1 {
|
||||
ns := strings.Split(spec.Name, COMMA)
|
||||
cardSpecs := strings.Split(ns[0], STAR)
|
||||
if cardSpecs[1] == cardCnMap[computeCard] {
|
||||
if cardSpecs[1] == cardCnMap[strings.ToUpper(computeCard)] {
|
||||
option.ResourceId = spec.Id
|
||||
option.ComputeCard = computeCard
|
||||
return nil
|
||||
|
@ -766,7 +781,7 @@ func setResourceIdByCard(option *option.AiOption, specs *octopus.GetResourceSpec
|
|||
if spec.Price == 1 {
|
||||
ns := strings.Split(spec.Name, COMMA)
|
||||
cardSpecs := strings.Split(ns[0], STAR)
|
||||
if cardSpecs[1] == cardCnMap[computeCard] {
|
||||
if cardSpecs[1] == cardCnMap[strings.ToUpper(computeCard)] {
|
||||
option.ResourceId = spec.Id
|
||||
option.ComputeCard = computeCard
|
||||
return nil
|
||||
|
@ -780,7 +795,7 @@ func setResourceIdByCard(option *option.AiOption, specs *octopus.GetResourceSpec
|
|||
continue
|
||||
}
|
||||
cardSpecs := strings.Split(ns[0], STAR)
|
||||
if cardSpecs[1] != cardCnMap[computeCard] {
|
||||
if cardSpecs[1] != cardCnMap[strings.ToUpper(computeCard)] {
|
||||
continue
|
||||
}
|
||||
s, err := strconv.ParseFloat(cardSpecs[0], 64)
|
||||
|
|
|
@ -83,7 +83,7 @@ var (
|
|||
"4": SHUGUANGHPC,
|
||||
}
|
||||
resourceTypes = []string{CPU, CARD}
|
||||
taskTypes = []string{PYTORCH_TASK, TENSORFLOW_TASK}
|
||||
taskTypes = []string{PYTORCH_TASK}
|
||||
|
||||
ERROR_RESP_EMPTY = errors.New("resp empty error")
|
||||
ERROR_CONVERT_EMPTY = errors.New("convert empty error")
|
||||
|
|
Loading…
Reference in New Issue