|
|
|
@ -561,7 +561,7 @@ func (o *OctopusLink) Execute(ctx context.Context, option *option.AiOption) (int
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func (o *OctopusLink) GenerateSubmitParams(ctx context.Context, option *option.AiOption) error {
|
|
|
|
|
err := o.generateResourceId(ctx, option)
|
|
|
|
|
err := o.generateResourceId(ctx, option, nil)
|
|
|
|
|
if err != nil {
|
|
|
|
|
return err
|
|
|
|
|
}
|
|
|
|
@ -569,15 +569,15 @@ func (o *OctopusLink) GenerateSubmitParams(ctx context.Context, option *option.A
|
|
|
|
|
if err != nil {
|
|
|
|
|
return err
|
|
|
|
|
}
|
|
|
|
|
err = o.generateImageId(ctx, option)
|
|
|
|
|
err = o.generateImageId(ctx, option, nil)
|
|
|
|
|
if err != nil {
|
|
|
|
|
return err
|
|
|
|
|
}
|
|
|
|
|
err = o.generateAlgorithmId(ctx, option)
|
|
|
|
|
err = o.generateAlgorithmId(ctx, option, nil)
|
|
|
|
|
if err != nil {
|
|
|
|
|
return err
|
|
|
|
|
}
|
|
|
|
|
err = o.generateCmd(option)
|
|
|
|
|
err = o.generateCmd(option, nil)
|
|
|
|
|
if err != nil {
|
|
|
|
|
return err
|
|
|
|
|
}
|
|
|
|
@ -592,10 +592,7 @@ func (o *OctopusLink) GenerateSubmitParams(ctx context.Context, option *option.A
|
|
|
|
|
return nil
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func (o *OctopusLink) generateResourceId(ctx context.Context, option *option.AiOption) error {
|
|
|
|
|
if option.ResourceType == "" {
|
|
|
|
|
return errors.New("ResourceType not set")
|
|
|
|
|
}
|
|
|
|
|
func (o *OctopusLink) generateResourceId(ctx context.Context, option *option.AiOption, ifoption *option.InferOption) error {
|
|
|
|
|
req := &octopus.GetResourceSpecsReq{
|
|
|
|
|
Platform: o.platform,
|
|
|
|
|
ResourcePool: RESOURCE_POOL,
|
|
|
|
@ -608,6 +605,30 @@ func (o *OctopusLink) generateResourceId(ctx context.Context, option *option.AiO
|
|
|
|
|
return errors.New(specResp.Error.Message)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if option != nil {
|
|
|
|
|
err = generateResourceIdForTraining(option, specResp)
|
|
|
|
|
if err != nil {
|
|
|
|
|
return err
|
|
|
|
|
}
|
|
|
|
|
return nil
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if ifoption != nil {
|
|
|
|
|
err = generateResourceIdForInferDeployInstance(ifoption, specResp)
|
|
|
|
|
if err != nil {
|
|
|
|
|
return err
|
|
|
|
|
}
|
|
|
|
|
return nil
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return errors.New("failed to set ResourceId")
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func generateResourceIdForTraining(option *option.AiOption, specResp *octopus.GetResourceSpecsResp) error {
|
|
|
|
|
if option.ResourceType == "" {
|
|
|
|
|
return errors.New("ResourceType not set")
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if option.ResourceType == CPU {
|
|
|
|
|
for _, spec := range specResp.TrainResourceSpecs {
|
|
|
|
|
if spec.Price == 0 {
|
|
|
|
@ -621,14 +642,38 @@ func (o *OctopusLink) generateResourceId(ctx context.Context, option *option.AiO
|
|
|
|
|
if option.ComputeCard == "" {
|
|
|
|
|
option.ComputeCard = GCU
|
|
|
|
|
}
|
|
|
|
|
err = setResourceIdByCard(option, specResp, option.ComputeCard)
|
|
|
|
|
err := setResourceIdByCard(option, specResp, option.ComputeCard)
|
|
|
|
|
if err != nil {
|
|
|
|
|
return err
|
|
|
|
|
}
|
|
|
|
|
return nil
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return errors.New("failed to get ResourceId")
|
|
|
|
|
return errors.New("ResourceType not set")
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func generateResourceIdForInferDeployInstance(option *option.InferOption, specResp *octopus.GetResourceSpecsResp) error {
|
|
|
|
|
// temporarily use bi-v100
|
|
|
|
|
cardName := cardCnMap[BIV100]
|
|
|
|
|
|
|
|
|
|
for _, spec := range specResp.TrainResourceSpecs {
|
|
|
|
|
names := strings.Split(spec.Name, COMMA)
|
|
|
|
|
if len(names) != 4 {
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
ns := strings.Split(names[0], STAR)
|
|
|
|
|
if len(ns) != 2 {
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if ns[0] == "1" && ns[1] == cardName {
|
|
|
|
|
option.ResourceId = spec.Id
|
|
|
|
|
return nil
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return errors.New("failed to set ResourceId")
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func (o *OctopusLink) generateDatasetsId(ctx context.Context, option *option.AiOption) error {
|
|
|
|
@ -656,7 +701,7 @@ func (o *OctopusLink) generateDatasetsId(ctx context.Context, option *option.AiO
|
|
|
|
|
return errors.New("failed to get DatasetsId")
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func (o *OctopusLink) generateImageId(ctx context.Context, option *option.AiOption) error {
|
|
|
|
|
func (o *OctopusLink) generateImageId(ctx context.Context, option *option.AiOption, ifoption *option.InferOption) error {
|
|
|
|
|
if option.TaskType == "" {
|
|
|
|
|
return errors.New("TaskType not set")
|
|
|
|
|
}
|
|
|
|
@ -696,6 +741,26 @@ func (o *OctopusLink) generateImageId(ctx context.Context, option *option.AiOpti
|
|
|
|
|
return errors.New("failed to get PresetImages")
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if option != nil {
|
|
|
|
|
err = generateImageIdForTraining(option, preImgResp)
|
|
|
|
|
if err != nil {
|
|
|
|
|
return err
|
|
|
|
|
}
|
|
|
|
|
return nil
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if ifoption != nil {
|
|
|
|
|
err = generateImageIdForInferDeployInstance(ifoption, preImgResp)
|
|
|
|
|
if err != nil {
|
|
|
|
|
return err
|
|
|
|
|
}
|
|
|
|
|
return nil
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return errors.New("failed to get ImageId")
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func generateImageIdForTraining(option *option.AiOption, preImgResp *octopus.GetPresetImageListResp) error {
|
|
|
|
|
if option.ResourceType == CARD {
|
|
|
|
|
for _, image := range preImgResp.Payload.Images {
|
|
|
|
|
if strings.Contains(image.ImageName, cardAliasMap[strings.ToUpper(option.ComputeCard)]) {
|
|
|
|
@ -717,11 +782,35 @@ func (o *OctopusLink) generateImageId(ctx context.Context, option *option.AiOpti
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return errors.New("failed to get ImageId")
|
|
|
|
|
return errors.New("failed to set ImageId")
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func (o *OctopusLink) generateAlgorithmId(ctx context.Context, option *option.AiOption) error {
|
|
|
|
|
func generateImageIdForInferDeployInstance(option *option.InferOption, preImgResp *octopus.GetPresetImageListResp) error {
|
|
|
|
|
for _, image := range preImgResp.Payload.Images {
|
|
|
|
|
// temporarily use bi-v100
|
|
|
|
|
if strings.Contains(image.ImageName, cardAliasMap[strings.ToUpper(BIV100)]) {
|
|
|
|
|
switch strings.ToUpper(BIV100) {
|
|
|
|
|
case GCU:
|
|
|
|
|
if strings.HasPrefix(image.ImageVersion, "t20_") {
|
|
|
|
|
option.ImageId = image.Id
|
|
|
|
|
return nil
|
|
|
|
|
}
|
|
|
|
|
case BIV100:
|
|
|
|
|
if strings.HasPrefix(image.ImageVersion, "bi_") {
|
|
|
|
|
option.ImageId = image.Id
|
|
|
|
|
return nil
|
|
|
|
|
}
|
|
|
|
|
case MLU:
|
|
|
|
|
option.ImageId = image.Id
|
|
|
|
|
return nil
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return errors.New("failed to set ImageId")
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func (o *OctopusLink) generateAlgorithmId(ctx context.Context, option *option.AiOption, ifoption *option.InferOption) error {
|
|
|
|
|
req := &octopus.GetMyAlgorithmListReq{
|
|
|
|
|
Platform: o.platform,
|
|
|
|
|
PageIndex: o.pageIndex,
|
|
|
|
@ -735,6 +824,26 @@ func (o *OctopusLink) generateAlgorithmId(ctx context.Context, option *option.Ai
|
|
|
|
|
return errors.New("failed to get algorithmId")
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if option != nil {
|
|
|
|
|
err = generateAlgorithmIdForTraining(option, resp)
|
|
|
|
|
if err != nil {
|
|
|
|
|
return err
|
|
|
|
|
}
|
|
|
|
|
return nil
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if ifoption != nil {
|
|
|
|
|
err = generateAlgorithmIdForInferDeployInstance(ifoption, resp)
|
|
|
|
|
if err != nil {
|
|
|
|
|
return err
|
|
|
|
|
}
|
|
|
|
|
return nil
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return errors.New("failed to set AlgorithmId")
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func generateAlgorithmIdForTraining(option *option.AiOption, resp *octopus.GetMyAlgorithmListResp) error {
|
|
|
|
|
for _, algorithm := range resp.Payload.Algorithms {
|
|
|
|
|
if algorithm.FrameworkName == strings.Title(option.TaskType) {
|
|
|
|
|
ns := strings.Split(algorithm.AlgorithmName, UNDERSCORE)
|
|
|
|
@ -760,14 +869,40 @@ func (o *OctopusLink) generateAlgorithmId(ctx context.Context, option *option.Ai
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if option.AlgorithmId == "" {
|
|
|
|
|
return errors.New("Algorithm does not exist")
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return errors.New("failed to get AlgorithmId")
|
|
|
|
|
return errors.New("Algorithm does not exist")
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func (o *OctopusLink) generateCmd(option *option.AiOption) error {
|
|
|
|
|
func generateAlgorithmIdForInferDeployInstance(option *option.InferOption, resp *octopus.GetMyAlgorithmListResp) error {
|
|
|
|
|
for _, algorithm := range resp.Payload.Algorithms {
|
|
|
|
|
if strings.Contains(algorithm.AlgorithmName, option.ModelName) {
|
|
|
|
|
option.AlgorithmId = algorithm.AlgorithmId
|
|
|
|
|
return nil
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return errors.New("Algorithm does not exist")
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func (o *OctopusLink) generateCmd(option *option.AiOption, ifoption *option.InferOption) error {
|
|
|
|
|
if option != nil {
|
|
|
|
|
err := generateCmdForTraining(option)
|
|
|
|
|
if err != nil {
|
|
|
|
|
return err
|
|
|
|
|
}
|
|
|
|
|
return nil
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if ifoption != nil {
|
|
|
|
|
err := generateCmdForInferDeployInstance(ifoption)
|
|
|
|
|
if err != nil {
|
|
|
|
|
return err
|
|
|
|
|
}
|
|
|
|
|
return nil
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return errors.New("failed to set cmd")
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func generateCmdForTraining(option *option.AiOption) error {
|
|
|
|
|
if option.Cmd == "" {
|
|
|
|
|
switch option.ComputeCard {
|
|
|
|
|
case GCU:
|
|
|
|
@ -782,6 +917,14 @@ func (o *OctopusLink) generateCmd(option *option.AiOption) error {
|
|
|
|
|
return nil
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func generateCmdForInferDeployInstance(option *option.InferOption) error {
|
|
|
|
|
if option.Cmd == "" {
|
|
|
|
|
option.Cmd = "su root; pip install fastapi uvicorn[standard]; cd /code/infer; python infer_biv100.py"
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return nil
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func (o *OctopusLink) generateEnv(option *option.AiOption) error {
|
|
|
|
|
|
|
|
|
|
return nil
|
|
|
|
@ -1020,3 +1163,55 @@ func (o *OctopusLink) GetInferResult(ctx context.Context, url string, file multi
|
|
|
|
|
|
|
|
|
|
return recv.Result, nil
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func (o *OctopusLink) CreateInferDeployInstance(ctx context.Context, option *option.InferOption) (string, error) {
|
|
|
|
|
err := o.generateResourceId(ctx, nil, option)
|
|
|
|
|
if err != nil {
|
|
|
|
|
return "", err
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
err = o.generateAlgorithmId(ctx, nil, option)
|
|
|
|
|
if err != nil {
|
|
|
|
|
return "", err
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
err = o.generateImageId(ctx, nil, option)
|
|
|
|
|
if err != nil {
|
|
|
|
|
return "", err
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
err = o.generateCmd(nil, option)
|
|
|
|
|
if err != nil {
|
|
|
|
|
return "", err
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
desc := option.ModelType + FORWARD_SLASH + option.ModelName + FORWARD_SLASH + BIV100
|
|
|
|
|
param := &octopus.CreateNotebookParam{
|
|
|
|
|
Name: option.TaskName,
|
|
|
|
|
ResourcePool: RESOURCE_POOL,
|
|
|
|
|
ResourceSpecId: option.ResourceId,
|
|
|
|
|
AlgorithmId: option.AlgorithmId,
|
|
|
|
|
AlgorithmVersion: VERSION,
|
|
|
|
|
ImageId: option.ImageId,
|
|
|
|
|
DatasetId: "",
|
|
|
|
|
DatasetVersion: "",
|
|
|
|
|
Command: option.Cmd,
|
|
|
|
|
Desc: desc,
|
|
|
|
|
TaskNumber: 1,
|
|
|
|
|
}
|
|
|
|
|
req := &octopus.CreateNotebookReq{
|
|
|
|
|
Platform: o.platform,
|
|
|
|
|
Params: param,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
resp, err := o.octopusRpc.CreateNotebook(ctx, req)
|
|
|
|
|
if err != nil {
|
|
|
|
|
return "", err
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if !resp.Success {
|
|
|
|
|
return "", errors.New(resp.Error.Message)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return resp.Payload.Id, nil
|
|
|
|
|
}
|
|
|
|
|