Merge branch 'master' of https://gitlink.org.cn/JointCloud/pcm-coordinator
Former-commit-id: b7838d8f6bb284644e04e80c09eb3f99af8d0d07
This commit is contained in:
commit
58af849beb
|
@ -20,7 +20,7 @@ type (
|
||||||
TaskDesc string `form:"taskDesc"`
|
TaskDesc string `form:"taskDesc"`
|
||||||
ModelName string `form:"modelName"`
|
ModelName string `form:"modelName"`
|
||||||
ModelType string `form:"modelType"`
|
ModelType string `form:"modelType"`
|
||||||
AdapterId string `form:"adapterId"`
|
AdapterIds []string `form:"adapterIds"`
|
||||||
AiClusterIds []string `form:"aiClusterIds,optional"`
|
AiClusterIds []string `form:"aiClusterIds,optional"`
|
||||||
ResourceType string `form:"resourceType,optional"`
|
ResourceType string `form:"resourceType,optional"`
|
||||||
ComputeCard string `form:"card,optional"`
|
ComputeCard string `form:"card,optional"`
|
||||||
|
@ -76,6 +76,18 @@ type (
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/******************TextToImage inference*************************/
|
||||||
|
TextToImageInferenceReq{
|
||||||
|
TaskName string `form:"taskName"`
|
||||||
|
TaskDesc string `form:"taskDesc"`
|
||||||
|
ModelName string `form:"modelName"`
|
||||||
|
ModelType string `form:"modelType"`
|
||||||
|
AiClusterIds []string `form:"aiClusterIds"`
|
||||||
|
}
|
||||||
|
TextToImageInferenceResp{
|
||||||
|
Result []byte
|
||||||
|
}
|
||||||
|
|
||||||
/******************Deploy instance*************************/
|
/******************Deploy instance*************************/
|
||||||
DeployInstanceListReq{
|
DeployInstanceListReq{
|
||||||
PageInfo
|
PageInfo
|
||||||
|
@ -144,4 +156,13 @@ type (
|
||||||
GetDeployTasksResp {
|
GetDeployTasksResp {
|
||||||
PageResult
|
PageResult
|
||||||
}
|
}
|
||||||
|
|
||||||
|
GetRunningInstanceReq {
|
||||||
|
AdapterIds []string `form:"adapterIds"`
|
||||||
|
ModelType string `path:"modelType"`
|
||||||
|
ModelName string `path:"modelName"`
|
||||||
|
}
|
||||||
|
GetRunningInstanceResp {
|
||||||
|
List interface{} `json:"list,omitempty"`
|
||||||
|
}
|
||||||
)
|
)
|
||||||
|
|
|
@ -968,6 +968,9 @@ service pcm {
|
||||||
|
|
||||||
@handler GetDeployTasks
|
@handler GetDeployTasks
|
||||||
get /inference/getDeployTasks (GetDeployTasksReq) returns (GetDeployTasksResp)
|
get /inference/getDeployTasks (GetDeployTasksReq) returns (GetDeployTasksResp)
|
||||||
|
|
||||||
|
@handler GetRunningInstanceByModel
|
||||||
|
get /inference/getInstanceByModel (GetRunningInstanceReq) returns (GetRunningInstanceResp)
|
||||||
}
|
}
|
||||||
|
|
||||||
@server(
|
@server(
|
||||||
|
|
|
@ -0,0 +1,25 @@
|
||||||
|
package inference
|
||||||
|
|
||||||
|
import (
|
||||||
|
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/repository/result"
|
||||||
|
"net/http"
|
||||||
|
|
||||||
|
"github.com/zeromicro/go-zero/rest/httpx"
|
||||||
|
"gitlink.org.cn/JointCloud/pcm-coordinator/internal/logic/inference"
|
||||||
|
"gitlink.org.cn/JointCloud/pcm-coordinator/internal/svc"
|
||||||
|
"gitlink.org.cn/JointCloud/pcm-coordinator/internal/types"
|
||||||
|
)
|
||||||
|
|
||||||
|
func GetRunningInstanceByModelHandler(svcCtx *svc.ServiceContext) http.HandlerFunc {
|
||||||
|
return func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
var req types.GetRunningInstanceReq
|
||||||
|
if err := httpx.Parse(r, &req); err != nil {
|
||||||
|
result.ParamErrorResult(r, w, err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
l := inference.NewGetRunningInstanceByModelLogic(r.Context(), svcCtx)
|
||||||
|
resp, err := l.GetRunningInstanceByModel(&req)
|
||||||
|
result.HttpResult(r, w, resp, err)
|
||||||
|
}
|
||||||
|
}
|
|
@ -1228,6 +1228,11 @@ func RegisterHandlers(server *rest.Server, serverCtx *svc.ServiceContext) {
|
||||||
Path: "/inference/getDeployTasks",
|
Path: "/inference/getDeployTasks",
|
||||||
Handler: inference.GetDeployTasksHandler(serverCtx),
|
Handler: inference.GetDeployTasksHandler(serverCtx),
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
Method: http.MethodGet,
|
||||||
|
Path: "/inference/getInstanceByModel",
|
||||||
|
Handler: inference.GetRunningInstanceByModelHandler(serverCtx),
|
||||||
|
},
|
||||||
},
|
},
|
||||||
rest.WithPrefix("/pcm/v1"),
|
rest.WithPrefix("/pcm/v1"),
|
||||||
)
|
)
|
||||||
|
|
|
@ -4,6 +4,7 @@ import (
|
||||||
"context"
|
"context"
|
||||||
"errors"
|
"errors"
|
||||||
"github.com/zeromicro/go-zero/core/logx"
|
"github.com/zeromicro/go-zero/core/logx"
|
||||||
|
"gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/common"
|
||||||
"gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/service/updater"
|
"gitlink.org.cn/JointCloud/pcm-coordinator/internal/scheduler/service/updater"
|
||||||
"gitlink.org.cn/JointCloud/pcm-coordinator/internal/svc"
|
"gitlink.org.cn/JointCloud/pcm-coordinator/internal/svc"
|
||||||
"gitlink.org.cn/JointCloud/pcm-coordinator/internal/types"
|
"gitlink.org.cn/JointCloud/pcm-coordinator/internal/types"
|
||||||
|
@ -30,14 +31,34 @@ func (l *DeployInstanceListLogic) DeployInstanceList(req *types.DeployInstanceLi
|
||||||
offset := req.PageSize * (req.PageNum - 1)
|
offset := req.PageSize * (req.PageNum - 1)
|
||||||
resp = &types.DeployInstanceListResp{}
|
resp = &types.DeployInstanceListResp{}
|
||||||
|
|
||||||
var list []*models.AiInferDeployInstance
|
var tasklist []*models.AiDeployInstanceTask
|
||||||
|
tx := l.svcCtx.DbEngin.Raw("select * from ai_deploy_instance_task").Scan(&tasklist)
|
||||||
tx := l.svcCtx.DbEngin.Raw("select * from ai_infer_deploy_instance").Scan(&list)
|
|
||||||
if tx.Error != nil {
|
if tx.Error != nil {
|
||||||
logx.Errorf(tx.Error.Error())
|
logx.Errorf(tx.Error.Error())
|
||||||
return nil, tx.Error
|
return nil, tx.Error
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//count total
|
||||||
|
var total int64
|
||||||
|
err = tx.Count(&total).Error
|
||||||
|
tx.Limit(limit).Offset(offset)
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
return resp, err
|
||||||
|
}
|
||||||
|
|
||||||
|
err = tx.Order("create_time desc").Find(&tasklist).Error
|
||||||
|
if err != nil {
|
||||||
|
return nil, errors.New(err.Error())
|
||||||
|
}
|
||||||
|
|
||||||
|
deployTasks := l.GenerateDeployTasks(tasklist)
|
||||||
|
slices := make([][]*models.AiInferDeployInstance, len(deployTasks))
|
||||||
|
for i := 0; i < len(deployTasks); i++ {
|
||||||
|
slices[i] = deployTasks[i].Instances
|
||||||
|
}
|
||||||
|
list := common.ConcatMultipleSlices(slices)
|
||||||
|
|
||||||
if len(list) == 0 {
|
if len(list) == 0 {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
@ -55,23 +76,35 @@ func (l *DeployInstanceListLogic) DeployInstanceList(req *types.DeployInstanceLi
|
||||||
|
|
||||||
go updater.UpdateDeployInstanceStatus(l.svcCtx, ins, true)
|
go updater.UpdateDeployInstanceStatus(l.svcCtx, ins, true)
|
||||||
go updater.UpdateDeployTaskStatus(l.svcCtx)
|
go updater.UpdateDeployTaskStatus(l.svcCtx)
|
||||||
//count total
|
|
||||||
var total int64
|
|
||||||
err = tx.Count(&total).Error
|
|
||||||
tx.Limit(limit).Offset(offset)
|
|
||||||
|
|
||||||
if err != nil {
|
resp.List = &deployTasks
|
||||||
return resp, err
|
|
||||||
}
|
|
||||||
|
|
||||||
err = tx.Order("create_time desc").Find(&list).Error
|
|
||||||
if err != nil {
|
|
||||||
return nil, errors.New(err.Error())
|
|
||||||
}
|
|
||||||
resp.List = &list
|
|
||||||
resp.PageSize = req.PageSize
|
resp.PageSize = req.PageSize
|
||||||
resp.PageNum = req.PageNum
|
resp.PageNum = req.PageNum
|
||||||
resp.Total = total
|
resp.Total = total
|
||||||
|
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (l *DeployInstanceListLogic) GenerateDeployTasks(tasklist []*models.AiDeployInstanceTask) []*DeployTask {
|
||||||
|
var tasks []*DeployTask
|
||||||
|
for _, t := range tasklist {
|
||||||
|
list, err := l.svcCtx.Scheduler.AiStorages.GetInstanceListByDeployTaskId(t.Id)
|
||||||
|
if err != nil {
|
||||||
|
logx.Errorf("db GetInstanceListByDeployTaskId error")
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
deployTask := &DeployTask{
|
||||||
|
Id: t.Id,
|
||||||
|
Name: t.Name,
|
||||||
|
Instances: list,
|
||||||
|
}
|
||||||
|
tasks = append(tasks, deployTask)
|
||||||
|
}
|
||||||
|
return tasks
|
||||||
|
}
|
||||||
|
|
||||||
|
type DeployTask struct {
|
||||||
|
Id int64 `json:"id,string"`
|
||||||
|
Name string `json:"name,string"`
|
||||||
|
Instances []*models.AiInferDeployInstance `json:"instances,string"`
|
||||||
|
}
|
||||||
|
|
|
@ -0,0 +1,30 @@
|
||||||
|
package inference
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
|
||||||
|
"gitlink.org.cn/JointCloud/pcm-coordinator/internal/svc"
|
||||||
|
"gitlink.org.cn/JointCloud/pcm-coordinator/internal/types"
|
||||||
|
|
||||||
|
"github.com/zeromicro/go-zero/core/logx"
|
||||||
|
)
|
||||||
|
|
||||||
|
type GetRunningInstanceByModelLogic struct {
|
||||||
|
logx.Logger
|
||||||
|
ctx context.Context
|
||||||
|
svcCtx *svc.ServiceContext
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewGetRunningInstanceByModelLogic(ctx context.Context, svcCtx *svc.ServiceContext) *GetRunningInstanceByModelLogic {
|
||||||
|
return &GetRunningInstanceByModelLogic{
|
||||||
|
Logger: logx.WithContext(ctx),
|
||||||
|
ctx: ctx,
|
||||||
|
svcCtx: svcCtx,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (l *GetRunningInstanceByModelLogic) GetRunningInstanceByModel(req *types.GetRunningInstanceReq) (resp *types.GetRunningInstanceResp, err error) {
|
||||||
|
resp = &types.GetRunningInstanceResp{}
|
||||||
|
|
||||||
|
return
|
||||||
|
}
|
|
@ -97,3 +97,21 @@ func Contains(s []string, e string) bool {
|
||||||
}
|
}
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func ConcatMultipleSlices[T any](slices [][]T) []T {
|
||||||
|
var totalLen int
|
||||||
|
|
||||||
|
for _, s := range slices {
|
||||||
|
totalLen += len(s)
|
||||||
|
}
|
||||||
|
|
||||||
|
result := make([]T, totalLen)
|
||||||
|
|
||||||
|
var i int
|
||||||
|
|
||||||
|
for _, s := range slices {
|
||||||
|
i += copy(result[i:], s)
|
||||||
|
}
|
||||||
|
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
|
|
@ -485,6 +485,16 @@ func (s *AiStorage) GetInferDeployInstanceList() ([]*models.AiInferDeployInstanc
|
||||||
return list, nil
|
return list, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (s *AiStorage) GetDeployTaskList() ([]*models.AiDeployInstanceTask, error) {
|
||||||
|
var list []*models.AiDeployInstanceTask
|
||||||
|
tx := s.DbEngin.Raw("select * from ai_deploy_instance_task").Scan(&list)
|
||||||
|
if tx.Error != nil {
|
||||||
|
logx.Errorf(tx.Error.Error())
|
||||||
|
return nil, tx.Error
|
||||||
|
}
|
||||||
|
return list, nil
|
||||||
|
}
|
||||||
|
|
||||||
func (s *AiStorage) GetInferDeployInstanceTotalNum() (int32, error) {
|
func (s *AiStorage) GetInferDeployInstanceTotalNum() (int32, error) {
|
||||||
var total int32
|
var total int32
|
||||||
tx := s.DbEngin.Raw("select count(*) from ai_infer_deploy_instance").Scan(&total)
|
tx := s.DbEngin.Raw("select count(*) from ai_infer_deploy_instance").Scan(&total)
|
||||||
|
@ -544,3 +554,32 @@ func (s *AiStorage) GetTrainingTaskRunningNum() (int32, error) {
|
||||||
}
|
}
|
||||||
return total, nil
|
return total, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (s *AiStorage) SaveInferDeployTask(taskName string, modelName string, modelType string, desc string) (int64, error) {
|
||||||
|
startTime := time.Now().Format(time.RFC3339)
|
||||||
|
// 构建主任务结构体
|
||||||
|
taskModel := models.AiDeployInstanceTask{
|
||||||
|
Name: taskName,
|
||||||
|
ModelName: modelName,
|
||||||
|
ModelType: modelType,
|
||||||
|
Desc: desc,
|
||||||
|
CreateTime: startTime,
|
||||||
|
UpdateTime: startTime,
|
||||||
|
}
|
||||||
|
// 保存任务数据到数据库
|
||||||
|
tx := s.DbEngin.Table("ai_deploy_instance_task").Create(&taskModel)
|
||||||
|
if tx.Error != nil {
|
||||||
|
return 0, tx.Error
|
||||||
|
}
|
||||||
|
return taskModel.Id, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *AiStorage) GetRunningDeployInstanceByModelNameAndAdapterId(modelType string, modelName string, adapterId string) ([]*models.AiInferDeployInstance, error) {
|
||||||
|
var list []*models.AiInferDeployInstance
|
||||||
|
tx := s.DbEngin.Raw("select * from ai_infer_deploy_instance where `model_type` = ? and `model_name` = ? and `adapter_id` = ? and `status` = 'Running'", modelType, modelName, adapterId).Scan(&list)
|
||||||
|
if tx.Error != nil {
|
||||||
|
logx.Errorf(tx.Error.Error())
|
||||||
|
return nil, tx.Error
|
||||||
|
}
|
||||||
|
return list, nil
|
||||||
|
}
|
||||||
|
|
|
@ -15,6 +15,10 @@ type InferOption struct {
|
||||||
Envs []string `json:"envs,optional"`
|
Envs []string `json:"envs,optional"`
|
||||||
Cmd string `json:"cmd,optional"`
|
Cmd string `json:"cmd,optional"`
|
||||||
Replica int32 `json:"replicas,optional"`
|
Replica int32 `json:"replicas,optional"`
|
||||||
|
|
||||||
|
ResourceId string
|
||||||
|
AlgorithmId string
|
||||||
|
ImageId string
|
||||||
}
|
}
|
||||||
|
|
||||||
func (a InferOption) GetOptionType() string {
|
func (a InferOption) GetOptionType() string {
|
||||||
|
|
|
@ -17,6 +17,7 @@ type ICluster interface {
|
||||||
StartInferDeployInstance(ctx context.Context, id string) bool
|
StartInferDeployInstance(ctx context.Context, id string) bool
|
||||||
StopInferDeployInstance(ctx context.Context, id string) bool
|
StopInferDeployInstance(ctx context.Context, id string) bool
|
||||||
GetInferDeployInstance(ctx context.Context, id string) (*DeployInstance, error)
|
GetInferDeployInstance(ctx context.Context, id string) (*DeployInstance, error)
|
||||||
|
CreateInferDeployInstance(ctx context.Context, option *option.InferOption) (string, error)
|
||||||
}
|
}
|
||||||
|
|
||||||
type IInference interface {
|
type IInference interface {
|
||||||
|
|
|
@ -539,3 +539,7 @@ func (m *ModelArtsLink) GetInferDeployInstance(ctx context.Context, id string) (
|
||||||
func (m *ModelArtsLink) GetInferResult(ctx context.Context, url string, file multipart.File, fileName string) (string, error) {
|
func (m *ModelArtsLink) GetInferResult(ctx context.Context, url string, file multipart.File, fileName string) (string, error) {
|
||||||
return "", nil
|
return "", nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (m *ModelArtsLink) CreateInferDeployInstance(ctx context.Context, option *option.InferOption) (string, error) {
|
||||||
|
return "", nil
|
||||||
|
}
|
||||||
|
|
|
@ -81,6 +81,14 @@ var (
|
||||||
MLU: CAMBRICONMLU290,
|
MLU: CAMBRICONMLU290,
|
||||||
GCU: EnflameT20,
|
GCU: EnflameT20,
|
||||||
}
|
}
|
||||||
|
CardModelNameCmdMap = map[string]map[string]string{
|
||||||
|
BIV100: {"blip-image-captioning-base": "pip install -U transformers; pip install fastapi uvicorn[standard]; pip install python-multipart; cd /code; python infer_biv100.py",
|
||||||
|
"imagenet_resnet50": "pip install -U transformers; pip install fastapi uvicorn[standard]; pip install python-multipart; cd /code/infer; python infer_biv100.py",
|
||||||
|
"chatGLM_6B": "su root; pip install transformers==4.33.2; pip install fastapi uvicorn[standard]; cd /code; python infer_biv100.py"},
|
||||||
|
MLU: {"blip-image-captioning-base": "",
|
||||||
|
"imagenet_resnet50": "su root; . /torch/venv3/pytorch/bin/activate; pip install fastapi uvicorn[standard]; pip install python-multipart; cd /code/infer; python infer_mlu.py",
|
||||||
|
"chatGLM_6B": ""},
|
||||||
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
func NewOctopusLink(octopusRpc octopusclient.Octopus, name string, id int64) *OctopusLink {
|
func NewOctopusLink(octopusRpc octopusclient.Octopus, name string, id int64) *OctopusLink {
|
||||||
|
@ -561,7 +569,7 @@ func (o *OctopusLink) Execute(ctx context.Context, option *option.AiOption) (int
|
||||||
}
|
}
|
||||||
|
|
||||||
func (o *OctopusLink) GenerateSubmitParams(ctx context.Context, option *option.AiOption) error {
|
func (o *OctopusLink) GenerateSubmitParams(ctx context.Context, option *option.AiOption) error {
|
||||||
err := o.generateResourceId(ctx, option)
|
err := o.generateResourceId(ctx, option, nil)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
@ -569,15 +577,15 @@ func (o *OctopusLink) GenerateSubmitParams(ctx context.Context, option *option.A
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
err = o.generateImageId(ctx, option)
|
err = o.generateImageId(ctx, option, nil)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
err = o.generateAlgorithmId(ctx, option)
|
err = o.generateAlgorithmId(ctx, option, nil)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
err = o.generateCmd(option)
|
err = o.generateCmd(option, nil)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
@ -592,10 +600,7 @@ func (o *OctopusLink) GenerateSubmitParams(ctx context.Context, option *option.A
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (o *OctopusLink) generateResourceId(ctx context.Context, option *option.AiOption) error {
|
func (o *OctopusLink) generateResourceId(ctx context.Context, option *option.AiOption, ifoption *option.InferOption) error {
|
||||||
if option.ResourceType == "" {
|
|
||||||
return errors.New("ResourceType not set")
|
|
||||||
}
|
|
||||||
req := &octopus.GetResourceSpecsReq{
|
req := &octopus.GetResourceSpecsReq{
|
||||||
Platform: o.platform,
|
Platform: o.platform,
|
||||||
ResourcePool: RESOURCE_POOL,
|
ResourcePool: RESOURCE_POOL,
|
||||||
|
@ -608,6 +613,30 @@ func (o *OctopusLink) generateResourceId(ctx context.Context, option *option.AiO
|
||||||
return errors.New(specResp.Error.Message)
|
return errors.New(specResp.Error.Message)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if option != nil {
|
||||||
|
err = generateResourceIdForTraining(option, specResp)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
if ifoption != nil {
|
||||||
|
err = generateResourceIdForInferDeployInstance(ifoption, specResp)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
return errors.New("failed to set ResourceId")
|
||||||
|
}
|
||||||
|
|
||||||
|
func generateResourceIdForTraining(option *option.AiOption, specResp *octopus.GetResourceSpecsResp) error {
|
||||||
|
if option.ResourceType == "" {
|
||||||
|
return errors.New("ResourceType not set")
|
||||||
|
}
|
||||||
|
|
||||||
if option.ResourceType == CPU {
|
if option.ResourceType == CPU {
|
||||||
for _, spec := range specResp.TrainResourceSpecs {
|
for _, spec := range specResp.TrainResourceSpecs {
|
||||||
if spec.Price == 0 {
|
if spec.Price == 0 {
|
||||||
|
@ -621,14 +650,44 @@ func (o *OctopusLink) generateResourceId(ctx context.Context, option *option.AiO
|
||||||
if option.ComputeCard == "" {
|
if option.ComputeCard == "" {
|
||||||
option.ComputeCard = GCU
|
option.ComputeCard = GCU
|
||||||
}
|
}
|
||||||
err = setResourceIdByCard(option, specResp, option.ComputeCard)
|
err := setResourceIdByCard(option, specResp, option.ComputeCard)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
return errors.New("failed to get ResourceId")
|
return errors.New("ResourceType not set")
|
||||||
|
}
|
||||||
|
|
||||||
|
func generateResourceIdForInferDeployInstance(option *option.InferOption, specResp *octopus.GetResourceSpecsResp) error {
|
||||||
|
// temporarily use bi-v100
|
||||||
|
cardName, ok := cardCnMap[BIV100]
|
||||||
|
if !ok {
|
||||||
|
errors.New("computeCard not set")
|
||||||
|
}
|
||||||
|
|
||||||
|
// set computeCard
|
||||||
|
option.ComputeCard = BIV100
|
||||||
|
|
||||||
|
for _, spec := range specResp.TrainResourceSpecs {
|
||||||
|
names := strings.Split(spec.Name, COMMA)
|
||||||
|
if len(names) != 4 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
ns := strings.Split(names[0], STAR)
|
||||||
|
if len(ns) != 2 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
if ns[0] == "1" && ns[1] == cardName {
|
||||||
|
option.ResourceId = spec.Id
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return errors.New("failed to set ResourceId")
|
||||||
}
|
}
|
||||||
|
|
||||||
func (o *OctopusLink) generateDatasetsId(ctx context.Context, option *option.AiOption) error {
|
func (o *OctopusLink) generateDatasetsId(ctx context.Context, option *option.AiOption) error {
|
||||||
|
@ -656,33 +715,7 @@ func (o *OctopusLink) generateDatasetsId(ctx context.Context, option *option.AiO
|
||||||
return errors.New("failed to get DatasetsId")
|
return errors.New("failed to get DatasetsId")
|
||||||
}
|
}
|
||||||
|
|
||||||
func (o *OctopusLink) generateImageId(ctx context.Context, option *option.AiOption) error {
|
func (o *OctopusLink) generateImageId(ctx context.Context, option *option.AiOption, ifoption *option.InferOption) error {
|
||||||
if option.TaskType == "" {
|
|
||||||
return errors.New("TaskType not set")
|
|
||||||
}
|
|
||||||
|
|
||||||
req := &octopus.GetUserImageListReq{
|
|
||||||
Platform: o.platform,
|
|
||||||
PageIndex: o.pageIndex,
|
|
||||||
PageSize: o.pageSize,
|
|
||||||
}
|
|
||||||
resp, err := o.octopusRpc.GetUserImageList(ctx, req)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
if !resp.Success {
|
|
||||||
return errors.New("failed to get imageId")
|
|
||||||
}
|
|
||||||
|
|
||||||
if option.ResourceType == CPU {
|
|
||||||
for _, img := range resp.Payload.Images {
|
|
||||||
if img.Image.ImageName == "test-image" {
|
|
||||||
option.ImageId = img.Image.Id
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
preImgReq := &octopus.GetPresetImageListReq{
|
preImgReq := &octopus.GetPresetImageListReq{
|
||||||
Platform: o.platform,
|
Platform: o.platform,
|
||||||
PageIndex: o.pageIndex,
|
PageIndex: o.pageIndex,
|
||||||
|
@ -696,6 +729,52 @@ func (o *OctopusLink) generateImageId(ctx context.Context, option *option.AiOpti
|
||||||
return errors.New("failed to get PresetImages")
|
return errors.New("failed to get PresetImages")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if option != nil {
|
||||||
|
if option.TaskType == "" {
|
||||||
|
return errors.New("TaskType not set")
|
||||||
|
}
|
||||||
|
|
||||||
|
req := &octopus.GetUserImageListReq{
|
||||||
|
Platform: o.platform,
|
||||||
|
PageIndex: o.pageIndex,
|
||||||
|
PageSize: o.pageSize,
|
||||||
|
}
|
||||||
|
resp, err := o.octopusRpc.GetUserImageList(ctx, req)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if !resp.Success {
|
||||||
|
return errors.New("failed to get imageId")
|
||||||
|
}
|
||||||
|
|
||||||
|
if option.ResourceType == CPU {
|
||||||
|
for _, img := range resp.Payload.Images {
|
||||||
|
if img.Image.ImageName == "test-image" {
|
||||||
|
option.ImageId = img.Image.Id
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
err = generateImageIdForTraining(option, preImgResp)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
if ifoption != nil {
|
||||||
|
err = generateImageIdForInferDeployInstance(ifoption, preImgResp)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
return errors.New("failed to get ImageId")
|
||||||
|
}
|
||||||
|
|
||||||
|
func generateImageIdForTraining(option *option.AiOption, preImgResp *octopus.GetPresetImageListResp) error {
|
||||||
if option.ResourceType == CARD {
|
if option.ResourceType == CARD {
|
||||||
for _, image := range preImgResp.Payload.Images {
|
for _, image := range preImgResp.Payload.Images {
|
||||||
if strings.Contains(image.ImageName, cardAliasMap[strings.ToUpper(option.ComputeCard)]) {
|
if strings.Contains(image.ImageName, cardAliasMap[strings.ToUpper(option.ComputeCard)]) {
|
||||||
|
@ -717,11 +796,35 @@ func (o *OctopusLink) generateImageId(ctx context.Context, option *option.AiOpti
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
return errors.New("failed to set ImageId")
|
||||||
return errors.New("failed to get ImageId")
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (o *OctopusLink) generateAlgorithmId(ctx context.Context, option *option.AiOption) error {
|
func generateImageIdForInferDeployInstance(option *option.InferOption, preImgResp *octopus.GetPresetImageListResp) error {
|
||||||
|
for _, image := range preImgResp.Payload.Images {
|
||||||
|
// temporarily use bi-v100
|
||||||
|
if strings.Contains(image.ImageName, cardAliasMap[strings.ToUpper(BIV100)]) {
|
||||||
|
switch strings.ToUpper(BIV100) {
|
||||||
|
case GCU:
|
||||||
|
if strings.HasPrefix(image.ImageVersion, "t20_") {
|
||||||
|
option.ImageId = image.Id
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
case BIV100:
|
||||||
|
if strings.HasPrefix(image.ImageVersion, "bi_") {
|
||||||
|
option.ImageId = image.Id
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
case MLU:
|
||||||
|
option.ImageId = image.Id
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return errors.New("failed to set ImageId")
|
||||||
|
}
|
||||||
|
|
||||||
|
func (o *OctopusLink) generateAlgorithmId(ctx context.Context, option *option.AiOption, ifoption *option.InferOption) error {
|
||||||
req := &octopus.GetMyAlgorithmListReq{
|
req := &octopus.GetMyAlgorithmListReq{
|
||||||
Platform: o.platform,
|
Platform: o.platform,
|
||||||
PageIndex: o.pageIndex,
|
PageIndex: o.pageIndex,
|
||||||
|
@ -735,6 +838,26 @@ func (o *OctopusLink) generateAlgorithmId(ctx context.Context, option *option.Ai
|
||||||
return errors.New("failed to get algorithmId")
|
return errors.New("failed to get algorithmId")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if option != nil {
|
||||||
|
err = generateAlgorithmIdForTraining(option, resp)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
if ifoption != nil {
|
||||||
|
err = generateAlgorithmIdForInferDeployInstance(ifoption, resp)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
return errors.New("failed to set AlgorithmId")
|
||||||
|
}
|
||||||
|
|
||||||
|
func generateAlgorithmIdForTraining(option *option.AiOption, resp *octopus.GetMyAlgorithmListResp) error {
|
||||||
for _, algorithm := range resp.Payload.Algorithms {
|
for _, algorithm := range resp.Payload.Algorithms {
|
||||||
if algorithm.FrameworkName == strings.Title(option.TaskType) {
|
if algorithm.FrameworkName == strings.Title(option.TaskType) {
|
||||||
ns := strings.Split(algorithm.AlgorithmName, UNDERSCORE)
|
ns := strings.Split(algorithm.AlgorithmName, UNDERSCORE)
|
||||||
|
@ -760,14 +883,48 @@ func (o *OctopusLink) generateAlgorithmId(ctx context.Context, option *option.Ai
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if option.AlgorithmId == "" {
|
return errors.New("Algorithm does not exist")
|
||||||
return errors.New("Algorithm does not exist")
|
|
||||||
}
|
|
||||||
|
|
||||||
return errors.New("failed to get AlgorithmId")
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (o *OctopusLink) generateCmd(option *option.AiOption) error {
|
func generateAlgorithmIdForInferDeployInstance(option *option.InferOption, resp *octopus.GetMyAlgorithmListResp) error {
|
||||||
|
if option.ModelType == "" {
|
||||||
|
return errors.New("ModelType not set")
|
||||||
|
}
|
||||||
|
|
||||||
|
if option.ModelName == "" {
|
||||||
|
return errors.New("ModelName not set")
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, algorithm := range resp.Payload.Algorithms {
|
||||||
|
if strings.Contains(algorithm.AlgorithmName, option.ModelName) {
|
||||||
|
option.AlgorithmId = algorithm.AlgorithmId
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return errors.New("ModelName does not exist")
|
||||||
|
}
|
||||||
|
|
||||||
|
func (o *OctopusLink) generateCmd(option *option.AiOption, ifoption *option.InferOption) error {
|
||||||
|
if option != nil {
|
||||||
|
err := generateCmdForTraining(option)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
if ifoption != nil {
|
||||||
|
err := generateCmdForInferDeployInstance(ifoption)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
return errors.New("failed to set cmd")
|
||||||
|
}
|
||||||
|
|
||||||
|
func generateCmdForTraining(option *option.AiOption) error {
|
||||||
if option.Cmd == "" {
|
if option.Cmd == "" {
|
||||||
switch option.ComputeCard {
|
switch option.ComputeCard {
|
||||||
case GCU:
|
case GCU:
|
||||||
|
@ -782,6 +939,23 @@ func (o *OctopusLink) generateCmd(option *option.AiOption) error {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func generateCmdForInferDeployInstance(option *option.InferOption) error {
|
||||||
|
if option.Cmd == "" {
|
||||||
|
nameCmd, ok := CardModelNameCmdMap[option.ComputeCard]
|
||||||
|
if !ok {
|
||||||
|
return errors.New("failed to set cmd, ComputeCard not exist")
|
||||||
|
}
|
||||||
|
cmd, ok := nameCmd[option.ModelName]
|
||||||
|
if !ok {
|
||||||
|
return errors.New("failed to set cmd, ModelName not exist")
|
||||||
|
}
|
||||||
|
option.Cmd = cmd
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
func (o *OctopusLink) generateEnv(option *option.AiOption) error {
|
func (o *OctopusLink) generateEnv(option *option.AiOption) error {
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
|
@ -1020,3 +1194,55 @@ func (o *OctopusLink) GetInferResult(ctx context.Context, url string, file multi
|
||||||
|
|
||||||
return recv.Result, nil
|
return recv.Result, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (o *OctopusLink) CreateInferDeployInstance(ctx context.Context, option *option.InferOption) (string, error) {
|
||||||
|
err := o.generateResourceId(ctx, nil, option)
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
|
||||||
|
err = o.generateAlgorithmId(ctx, nil, option)
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
|
||||||
|
err = o.generateImageId(ctx, nil, option)
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
|
||||||
|
err = o.generateCmd(nil, option)
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
|
||||||
|
desc := option.ModelType + FORWARD_SLASH + option.ModelName + FORWARD_SLASH + strings.ToLower(BIV100)
|
||||||
|
param := &octopus.CreateNotebookParam{
|
||||||
|
Name: option.TaskName,
|
||||||
|
ResourcePool: RESOURCE_POOL,
|
||||||
|
ResourceSpecId: option.ResourceId,
|
||||||
|
AlgorithmId: option.AlgorithmId,
|
||||||
|
AlgorithmVersion: VERSION,
|
||||||
|
ImageId: option.ImageId,
|
||||||
|
DatasetId: "",
|
||||||
|
DatasetVersion: "",
|
||||||
|
Command: option.Cmd,
|
||||||
|
Desc: desc,
|
||||||
|
TaskNumber: 1,
|
||||||
|
}
|
||||||
|
req := &octopus.CreateNotebookReq{
|
||||||
|
Platform: o.platform,
|
||||||
|
Params: param,
|
||||||
|
}
|
||||||
|
|
||||||
|
resp, err := o.octopusRpc.CreateNotebook(ctx, req)
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
|
||||||
|
if !resp.Success {
|
||||||
|
return "", errors.New(resp.Error.Message)
|
||||||
|
}
|
||||||
|
|
||||||
|
return resp.Payload.Id, nil
|
||||||
|
}
|
||||||
|
|
|
@ -881,3 +881,7 @@ func (s *ShuguangAi) GetInferDeployInstance(ctx context.Context, id string) (*in
|
||||||
func (s *ShuguangAi) GetInferResult(ctx context.Context, url string, file multipart.File, fileName string) (string, error) {
|
func (s *ShuguangAi) GetInferResult(ctx context.Context, url string, file multipart.File, fileName string) (string, error) {
|
||||||
return "", nil
|
return "", nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (s *ShuguangAi) CreateInferDeployInstance(ctx context.Context, option *option.InferOption) (string, error) {
|
||||||
|
return "", nil
|
||||||
|
}
|
||||||
|
|
|
@ -82,6 +82,7 @@ var (
|
||||||
"image_classification": {"imagenet_resnet50"},
|
"image_classification": {"imagenet_resnet50"},
|
||||||
"text_to_text": {"chatGLM_6B"},
|
"text_to_text": {"chatGLM_6B"},
|
||||||
"image_to_text": {"blip-image-captioning-base"},
|
"image_to_text": {"blip-image-captioning-base"},
|
||||||
|
"text_to_image": {"stable-diffusion-xl-base-1.0"},
|
||||||
}
|
}
|
||||||
AITYPE = map[string]string{
|
AITYPE = map[string]string{
|
||||||
"1": OCTOPUS,
|
"1": OCTOPUS,
|
||||||
|
|
|
@ -6041,3 +6041,12 @@ type GetDeployTasksReq struct {
|
||||||
type GetDeployTasksResp struct {
|
type GetDeployTasksResp struct {
|
||||||
PageResult
|
PageResult
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type GetRunningInstanceReq struct {
|
||||||
|
ModelType string `path:"modelType"`
|
||||||
|
ModelName string `path:"modelName"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type GetRunningInstanceResp struct {
|
||||||
|
List interface{} `json:"list,omitempty"`
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in New Issue