updated schedule submit task api logic

Former-commit-id: 72fe516fb25e2feb760391799265642341d16277
This commit is contained in:
tzwang 2024-04-07 16:50:30 +08:00
parent dbe2363339
commit cdd7fe9dca
5 changed files with 85 additions and 32 deletions

View File

@ -13,8 +13,14 @@ type (
} }
ScheduleResp { ScheduleResp {
Success bool `json:"success"` Results []*ScheduleResult `json:"results"`
ErrorMsg string `json:"errorMsg"` }
ScheduleResult {
ClusterId string `json:"clusterId"`
TaskId string `json:"taskId"`
Replica int32 `json:"replica"`
Msg string `json:"msg"`
} }
AiOption { AiOption {

View File

@ -4,8 +4,6 @@ import (
"context" "context"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/schedulers" "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/schedulers"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/schedulers/option" "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/schedulers/option"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/storeLink"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc" "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types" "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types"
@ -50,15 +48,10 @@ func (l *ScheduleSubmitLogic) ScheduleSubmit(req *types.ScheduleReq) (resp *type
return nil, err return nil, err
} }
//转换成统一返回类型 switch opt.GetOptionType() {
for _, r := range results { case option.AI:
sResp, err := storeLink.ConvertType(r, resp, nil) for _, result := range results {
if err != nil { _ = (result).(*schedulers.AiResult)
return nil, err
}
if sResp.(*types.ScheduleResp).ErrorMsg != "" {
resp.ErrorMsg = sResp.(*types.ScheduleResp).ErrorMsg + "\n"
} }
} }

View File

@ -16,6 +16,7 @@ package schedulers
import ( import (
"context" "context"
"encoding/json"
"errors" "errors"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler" "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/schedulers/option" "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/schedulers/option"
@ -25,6 +26,9 @@ import (
"gitlink.org.cn/JointCloud/pcm-coordinator/api/pkg/response" "gitlink.org.cn/JointCloud/pcm-coordinator/api/pkg/response"
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/models" "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/models"
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/utils" "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/utils"
"gitlink.org.cn/jcce-pcm/pcm-ac/hpcAC"
"gitlink.org.cn/jcce-pcm/pcm-participant-octopus/octopus"
"strconv"
"sync" "sync"
) )
@ -36,6 +40,13 @@ type AiScheduler struct {
ctx context.Context ctx context.Context
} }
type AiResult struct {
taskId string
clusterId string
replica int32
msg string
}
func NewAiScheduler(ctx context.Context, val string, scheduler *scheduler.Scheduler, option *option.AiOption) (*AiScheduler, error) { func NewAiScheduler(ctx context.Context, val string, scheduler *scheduler.Scheduler, option *option.AiOption) (*AiScheduler, error) {
return &AiScheduler{ctx: ctx, yamlString: val, Scheduler: scheduler, option: option}, nil return &AiScheduler{ctx: ctx, yamlString: val, Scheduler: scheduler, option: option}, nil
} }
@ -101,9 +112,9 @@ func (as *AiScheduler) AssignTask(clusters []*strategy.AssignedCluster) ([]inter
} }
var wg sync.WaitGroup var wg sync.WaitGroup
var result []interface{} var results []interface{}
var errs []error var errs []error
var ch = make(chan interface{}, len(clusters)) var ch = make(chan *AiResult, len(clusters))
var errCh = make(chan error, len(clusters)) var errCh = make(chan error, len(clusters))
executorMap := *as.AiExecutor executorMap := *as.AiExecutor
@ -114,9 +125,8 @@ func (as *AiScheduler) AssignTask(clusters []*strategy.AssignedCluster) ([]inter
} }
wg.Add(1) wg.Add(1)
go func() { go func() {
//var resp interface{} opt, _ := cloneAiOption(as.option)
//var err error resp, err := executorMap[c.Name].Execute(as.ctx, opt)
resp, err := executorMap[c.Name].Execute(as.ctx, as.option)
if err != nil { if err != nil {
errCh <- err errCh <- err
@ -124,14 +134,11 @@ func (as *AiScheduler) AssignTask(clusters []*strategy.AssignedCluster) ([]inter
return return
} }
data := struct { result, _ := convertType(resp)
Resp interface{} result.replica = c.Replicas
ClusterId int64 result.clusterId = strconv.FormatInt(c.ParticipantId, 10)
}{
Resp: resp, ch <- result
ClusterId: c.ParticipantId,
}
ch <- data
wg.Done() wg.Done()
}() }()
} }
@ -148,15 +155,11 @@ func (as *AiScheduler) AssignTask(clusters []*strategy.AssignedCluster) ([]inter
} }
for s := range ch { for s := range ch {
data := (s).(struct {
Resp interface{}
ClusterId int64
})
// TODO: database operation // TODO: database operation
result = append(result, data.Resp) results = append(results, s)
} }
return result, nil return results, nil
} }
func (as *AiScheduler) findClustersWithResources() ([]*collector.ResourceStats, error) { func (as *AiScheduler) findClustersWithResources() ([]*collector.ResourceStats, error) {
@ -202,3 +205,43 @@ func (as *AiScheduler) findClustersWithResources() ([]*collector.ResourceStats,
} }
return resourceSpecs, nil return resourceSpecs, nil
} }
func convertType(in interface{}) (*AiResult, error) {
var result AiResult
switch (in).(type) {
case *hpcAC.SubmitTaskAiResp:
resp := (in).(*hpcAC.SubmitTaskAiResp)
if resp.Code == "0" {
result.taskId = resp.Data
} else {
result.msg = resp.Msg
}
return &result, nil
case *octopus.CreateTrainJobResp:
resp := (in).(*octopus.CreateTrainJobResp)
if resp.Success {
result.taskId = resp.Payload.JobId
} else {
result.msg = resp.Error.Message
}
return &result, nil
default:
return nil, errors.New("ai task response failed")
}
}
func cloneAiOption(opt *option.AiOption) (*option.AiOption, error) {
origJSON, err := json.Marshal(opt)
if err != nil {
return nil, err
}
clone := option.AiOption{}
if err = json.Unmarshal(origJSON, &clone); err != nil {
return nil, err
}
return &clone, nil
}

View File

@ -0,0 +1,9 @@
package option
type CloudOption struct {
task interface{}
}
func (c CloudOption) GetOptionType() string {
return CLOUD
}

View File

@ -64,6 +64,8 @@ func (ps *DynamicResourcesStrategy) Schedule() ([]*AssignedCluster, error) {
} }
results = append(results, &assignedCluster) results = append(results, &assignedCluster)
return results, nil return results, nil
case option.CLOUD:
} }
return nil, errors.New("failed to apply DynamicResourcesStrategy") return nil, errors.New("failed to apply DynamicResourcesStrategy")