From 4f7314a2cd0142993380779b6a8214c1617c37f9 Mon Sep 17 00:00:00 2001 From: tzwang Date: Thu, 22 Aug 2024 11:01:22 +0800 Subject: [PATCH 1/3] updated CreateInferDeployInstance logic Former-commit-id: 3bbf914d3906131d8a71aeac3df0389ad8646d45 --- internal/storeLink/octopus.go | 91 +++++++++++++++++++++++------------ 1 file changed, 61 insertions(+), 30 deletions(-) diff --git a/internal/storeLink/octopus.go b/internal/storeLink/octopus.go index 2651930a..f2c4a3aa 100644 --- a/internal/storeLink/octopus.go +++ b/internal/storeLink/octopus.go @@ -81,6 +81,14 @@ var ( MLU: CAMBRICONMLU290, GCU: EnflameT20, } + CardModelNameCmdMap = map[string]map[string]string{ + BIV100: {"blip-image-captioning-base": "pip install -U transformers; pip install fastapi uvicorn[standard]; pip install python-multipart; cd /code; python infer_biv100.py", + "imagenet_resnet50": "pip install -U transformers; pip install fastapi uvicorn[standard]; pip install python-multipart; cd /code/infer; python infer_biv100.py", + "chatGLM_6B": "su root; pip install transformers==4.33.2; pip install fastapi uvicorn[standard]; cd /code; python infer_biv100.py"}, + MLU: {"blip-image-captioning-base": "", + "imagenet_resnet50": "su root; . /torch/venv3/pytorch/bin/activate; pip install fastapi uvicorn[standard]; pip install python-multipart; cd /code/infer; python infer_mlu.py", + "chatGLM_6B": ""}, + } ) func NewOctopusLink(octopusRpc octopusclient.Octopus, name string, id int64) *OctopusLink { @@ -654,7 +662,13 @@ func generateResourceIdForTraining(option *option.AiOption, specResp *octopus.Ge func generateResourceIdForInferDeployInstance(option *option.InferOption, specResp *octopus.GetResourceSpecsResp) error { // temporarily use bi-v100 - cardName := cardCnMap[BIV100] + cardName, ok := cardCnMap[BIV100] + if !ok { + errors.New("computeCard not set") + } + + // set computeCard + option.ComputeCard = BIV100 for _, spec := range specResp.TrainResourceSpecs { names := strings.Split(spec.Name, COMMA) @@ -702,32 +716,6 @@ func (o *OctopusLink) generateDatasetsId(ctx context.Context, option *option.AiO } func (o *OctopusLink) generateImageId(ctx context.Context, option *option.AiOption, ifoption *option.InferOption) error { - if option.TaskType == "" { - return errors.New("TaskType not set") - } - - req := &octopus.GetUserImageListReq{ - Platform: o.platform, - PageIndex: o.pageIndex, - PageSize: o.pageSize, - } - resp, err := o.octopusRpc.GetUserImageList(ctx, req) - if err != nil { - return err - } - if !resp.Success { - return errors.New("failed to get imageId") - } - - if option.ResourceType == CPU { - for _, img := range resp.Payload.Images { - if img.Image.ImageName == "test-image" { - option.ImageId = img.Image.Id - return nil - } - } - } - preImgReq := &octopus.GetPresetImageListReq{ Platform: o.platform, PageIndex: o.pageIndex, @@ -742,6 +730,32 @@ func (o *OctopusLink) generateImageId(ctx context.Context, option *option.AiOpti } if option != nil { + if option.TaskType == "" { + return errors.New("TaskType not set") + } + + req := &octopus.GetUserImageListReq{ + Platform: o.platform, + PageIndex: o.pageIndex, + PageSize: o.pageSize, + } + resp, err := o.octopusRpc.GetUserImageList(ctx, req) + if err != nil { + return err + } + if !resp.Success { + return errors.New("failed to get imageId") + } + + if option.ResourceType == CPU { + for _, img := range resp.Payload.Images { + if img.Image.ImageName == "test-image" { + option.ImageId = img.Image.Id + return nil + } + } + } + err = generateImageIdForTraining(option, preImgResp) if err != nil { return err @@ -873,13 +887,21 @@ func generateAlgorithmIdForTraining(option *option.AiOption, resp *octopus.GetMy } func generateAlgorithmIdForInferDeployInstance(option *option.InferOption, resp *octopus.GetMyAlgorithmListResp) error { + if option.ModelType == "" { + return errors.New("ModelType not set") + } + + if option.ModelName == "" { + return errors.New("ModelName not set") + } + for _, algorithm := range resp.Payload.Algorithms { if strings.Contains(algorithm.AlgorithmName, option.ModelName) { option.AlgorithmId = algorithm.AlgorithmId return nil } } - return errors.New("Algorithm does not exist") + return errors.New("ModelName does not exist") } func (o *OctopusLink) generateCmd(option *option.AiOption, ifoption *option.InferOption) error { @@ -919,7 +941,16 @@ func generateCmdForTraining(option *option.AiOption) error { func generateCmdForInferDeployInstance(option *option.InferOption) error { if option.Cmd == "" { - option.Cmd = "su root; pip install fastapi uvicorn[standard]; cd /code/infer; python infer_biv100.py" + nameCmd, ok := CardModelNameCmdMap[option.ComputeCard] + if !ok { + return errors.New("failed to set cmd, ComputeCard not exist") + } + cmd, ok := nameCmd[option.ModelName] + if !ok { + return errors.New("failed to set cmd, ModelName not exist") + } + option.Cmd = cmd + return nil } return nil @@ -1185,7 +1216,7 @@ func (o *OctopusLink) CreateInferDeployInstance(ctx context.Context, option *opt return "", err } - desc := option.ModelType + FORWARD_SLASH + option.ModelName + FORWARD_SLASH + BIV100 + desc := option.ModelType + FORWARD_SLASH + option.ModelName + FORWARD_SLASH + strings.ToLower(BIV100) param := &octopus.CreateNotebookParam{ Name: option.TaskName, ResourcePool: RESOURCE_POOL, From 3f21737673b95cd85ecf63c2833861bd7fc064b1 Mon Sep 17 00:00:00 2001 From: tzwang Date: Fri, 23 Aug 2024 16:55:32 +0800 Subject: [PATCH 2/3] added getRunningInstanceByModel api Former-commit-id: 9660ea240592d8c646ecf99804e147cfbad02a75 --- desc/inference/inference.api | 8 ++++++++ desc/pcm.api | 3 +++ internal/handler/routes.go | 5 +++++ internal/types/types.go | 9 +++++++++ 4 files changed, 25 insertions(+) diff --git a/desc/inference/inference.api b/desc/inference/inference.api index c46054d3..a77c0e98 100644 --- a/desc/inference/inference.api +++ b/desc/inference/inference.api @@ -144,4 +144,12 @@ type ( GetDeployTasksResp { PageResult } + + GetRunningInstanceReq { + ModelType string `path:"modelType"` + ModelName string `path:"modelName"` + } + GetRunningInstanceResp { + List interface{} `json:"list,omitempty"` + } ) diff --git a/desc/pcm.api b/desc/pcm.api index 5a956664..6eacc710 100644 --- a/desc/pcm.api +++ b/desc/pcm.api @@ -968,6 +968,9 @@ service pcm { @handler GetDeployTasks get /inference/getDeployTasks (GetDeployTasksReq) returns (GetDeployTasksResp) + + @handler GetRunningInstanceByModel + get /inference/getInstanceByModel (GetRunningInstanceReq) returns (GetRunningInstanceResp) } @server( diff --git a/internal/handler/routes.go b/internal/handler/routes.go index c735cd3a..c317a876 100644 --- a/internal/handler/routes.go +++ b/internal/handler/routes.go @@ -1228,6 +1228,11 @@ func RegisterHandlers(server *rest.Server, serverCtx *svc.ServiceContext) { Path: "/inference/getDeployTasks", Handler: inference.GetDeployTasksHandler(serverCtx), }, + { + Method: http.MethodGet, + Path: "/inference/getInstanceByModel", + Handler: inference.GetRunningInstanceByModelHandler(serverCtx), + }, }, rest.WithPrefix("/pcm/v1"), ) diff --git a/internal/types/types.go b/internal/types/types.go index 73949414..fcfe999e 100644 --- a/internal/types/types.go +++ b/internal/types/types.go @@ -6034,3 +6034,12 @@ type GetDeployTasksReq struct { type GetDeployTasksResp struct { PageResult } + +type GetRunningInstanceReq struct { + ModelType string `path:"modelType"` + ModelName string `path:"modelName"` +} + +type GetRunningInstanceResp struct { + List interface{} `json:"list,omitempty"` +} From da674642ea60da5d56f486510f05eb9409a4be90 Mon Sep 17 00:00:00 2001 From: tzwang Date: Fri, 23 Aug 2024 17:18:04 +0800 Subject: [PATCH 3/3] updated getRunningInstanceByModel api Former-commit-id: 2d48dac7551546d0d677c91a29776e7df511074b --- .../getrunninginstancebymodelhandler.go | 28 +++++++++++++++++ .../getrunninginstancebymodellogic.go | 30 +++++++++++++++++++ 2 files changed, 58 insertions(+) create mode 100644 internal/handler/inference/getrunninginstancebymodelhandler.go create mode 100644 internal/logic/inference/getrunninginstancebymodellogic.go diff --git a/internal/handler/inference/getrunninginstancebymodelhandler.go b/internal/handler/inference/getrunninginstancebymodelhandler.go new file mode 100644 index 00000000..534796c9 --- /dev/null +++ b/internal/handler/inference/getrunninginstancebymodelhandler.go @@ -0,0 +1,28 @@ +package inference + +import ( + "net/http" + + "github.com/zeromicro/go-zero/rest/httpx" + "gitlink.org.cn/tzwang/pcm-coordinator/internal/logic/inference" + "gitlink.org.cn/tzwang/pcm-coordinator/internal/svc" + "gitlink.org.cn/tzwang/pcm-coordinator/internal/types" +) + +func GetRunningInstanceByModelHandler(svcCtx *svc.ServiceContext) http.HandlerFunc { + return func(w http.ResponseWriter, r *http.Request) { + var req types.GetRunningInstanceReq + if err := httpx.Parse(r, &req); err != nil { + httpx.ErrorCtx(r.Context(), w, err) + return + } + + l := inference.NewGetRunningInstanceByModelLogic(r.Context(), svcCtx) + resp, err := l.GetRunningInstanceByModel(&req) + if err != nil { + httpx.ErrorCtx(r.Context(), w, err) + } else { + httpx.OkJsonCtx(r.Context(), w, resp) + } + } +} diff --git a/internal/logic/inference/getrunninginstancebymodellogic.go b/internal/logic/inference/getrunninginstancebymodellogic.go new file mode 100644 index 00000000..7ffef4c7 --- /dev/null +++ b/internal/logic/inference/getrunninginstancebymodellogic.go @@ -0,0 +1,30 @@ +package inference + +import ( + "context" + + "gitlink.org.cn/tzwang/pcm-coordinator/internal/svc" + "gitlink.org.cn/tzwang/pcm-coordinator/internal/types" + + "github.com/zeromicro/go-zero/core/logx" +) + +type GetRunningInstanceByModelLogic struct { + logx.Logger + ctx context.Context + svcCtx *svc.ServiceContext +} + +func NewGetRunningInstanceByModelLogic(ctx context.Context, svcCtx *svc.ServiceContext) *GetRunningInstanceByModelLogic { + return &GetRunningInstanceByModelLogic{ + Logger: logx.WithContext(ctx), + ctx: ctx, + svcCtx: svcCtx, + } +} + +func (l *GetRunningInstanceByModelLogic) GetRunningInstanceByModel(req *types.GetRunningInstanceReq) (resp *types.GetRunningInstanceResp, err error) { + // todo: add your logic here and delete this line + + return +}