Merge pull request 'added getrunninginstance api' (#286) from tzwang/pcm-coordinator:master into master

Former-commit-id: 7e7eb6bcb9ad332df724c6385597ec03ee082552
2024-08-23 17:26:57 +08:00 · 2024-08-23 17:26:57 +08:00 · e58d00a980
parent 37c2569417 da674642ea
commit e58d00a980
7 changed files with 144 additions and 30 deletions
--- a/desc/inference/inference.api
+++ b/desc/inference/inference.api
@ -144,4 +144,12 @@ type (
     GetDeployTasksResp {
          PageResult
     }
+
+     GetRunningInstanceReq {
+          ModelType string `path:"modelType"`
+          ModelName string `path:"modelName"`
+     }
+     GetRunningInstanceResp {
+          List     interface{} `json:"list,omitempty"`
+     }
 )
--- a/desc/pcm.api
+++ b/desc/pcm.api
@ -968,6 +968,9 @@ service pcm {

 	@handler GetDeployTasks
 	get /inference/getDeployTasks (GetDeployTasksReq) returns (GetDeployTasksResp)
+
+	@handler GetRunningInstanceByModel
+	get /inference/getInstanceByModel (GetRunningInstanceReq) returns (GetRunningInstanceResp)
 }

@server(
--- a/internal/handler/inference/getrunninginstancebymodelhandler.go
+++ b/internal/handler/inference/getrunninginstancebymodelhandler.go
@ -0,0 +1,28 @@
+package inference
+
+import (
+	"net/http"
+
+	"github.com/zeromicro/go-zero/rest/httpx"
+	"gitlink.org.cn/tzwang/pcm-coordinator/internal/logic/inference"
+	"gitlink.org.cn/tzwang/pcm-coordinator/internal/svc"
+	"gitlink.org.cn/tzwang/pcm-coordinator/internal/types"
+)
+
+func GetRunningInstanceByModelHandler(svcCtx *svc.ServiceContext) http.HandlerFunc {
+	return func(w http.ResponseWriter, r *http.Request) {
+		var req types.GetRunningInstanceReq
+		if err := httpx.Parse(r, &req); err != nil {
+			httpx.ErrorCtx(r.Context(), w, err)
+			return
+		}
+
+		l := inference.NewGetRunningInstanceByModelLogic(r.Context(), svcCtx)
+		resp, err := l.GetRunningInstanceByModel(&req)
+		if err != nil {
+			httpx.ErrorCtx(r.Context(), w, err)
+		} else {
+			httpx.OkJsonCtx(r.Context(), w, resp)
+		}
+	}
+}
--- a/internal/handler/routes.go
+++ b/internal/handler/routes.go
@ -1228,6 +1228,11 @@ func RegisterHandlers(server *rest.Server, serverCtx *svc.ServiceContext) {
 				Path:    "/inference/getDeployTasks",
 				Handler: inference.GetDeployTasksHandler(serverCtx),
 			},
+			{
+				Method:  http.MethodGet,
+				Path:    "/inference/getInstanceByModel",
+				Handler: inference.GetRunningInstanceByModelHandler(serverCtx),
+			},
 		},
 		rest.WithPrefix("/pcm/v1"),
 	)
--- a/internal/logic/inference/getrunninginstancebymodellogic.go
+++ b/internal/logic/inference/getrunninginstancebymodellogic.go
@ -0,0 +1,30 @@
+package inference
+
+import (
+	"context"
+
+	"gitlink.org.cn/tzwang/pcm-coordinator/internal/svc"
+	"gitlink.org.cn/tzwang/pcm-coordinator/internal/types"
+
+	"github.com/zeromicro/go-zero/core/logx"
+)
+
+type GetRunningInstanceByModelLogic struct {
+	logx.Logger
+	ctx    context.Context
+	svcCtx *svc.ServiceContext
+}
+
+func NewGetRunningInstanceByModelLogic(ctx context.Context, svcCtx *svc.ServiceContext) *GetRunningInstanceByModelLogic {
+	return &GetRunningInstanceByModelLogic{
+		Logger: logx.WithContext(ctx),
+		ctx:    ctx,
+		svcCtx: svcCtx,
+	}
+}
+
+func (l *GetRunningInstanceByModelLogic) GetRunningInstanceByModel(req *types.GetRunningInstanceReq) (resp *types.GetRunningInstanceResp, err error) {
+	// todo: add your logic here and delete this line
+
+	return
+}
--- a/internal/storeLink/octopus.go
+++ b/internal/storeLink/octopus.go
@ -81,6 +81,14 @@ var (
 		MLU: CAMBRICONMLU290,
 		GCU: EnflameT20,
 	}
+	CardModelNameCmdMap = map[string]map[string]string{
+		BIV100: {"blip-image-captioning-base": "pip install -U transformers; pip install fastapi uvicorn[standard]; pip install python-multipart; cd /code; python infer_biv100.py",
+			"imagenet_resnet50": "pip install -U transformers; pip install fastapi uvicorn[standard]; pip install python-multipart; cd /code/infer; python infer_biv100.py",
+			"chatGLM_6B":        "su root; pip install transformers==4.33.2; pip install fastapi uvicorn[standard]; cd /code; python infer_biv100.py"},
+		MLU: {"blip-image-captioning-base": "",
+			"imagenet_resnet50": "su root; . /torch/venv3/pytorch/bin/activate; pip install fastapi uvicorn[standard]; pip install python-multipart; cd /code/infer; python infer_mlu.py",
+			"chatGLM_6B":        ""},
+	}
 )

 func NewOctopusLink(octopusRpc octopusclient.Octopus, name string, id int64) *OctopusLink {
@ -654,7 +662,13 @@ func generateResourceIdForTraining(option *option.AiOption, specResp *octopus.Ge

 func generateResourceIdForInferDeployInstance(option *option.InferOption, specResp *octopus.GetResourceSpecsResp) error {
 	// temporarily use bi-v100
-	cardName := cardCnMap[BIV100]
+	cardName, ok := cardCnMap[BIV100]
+	if !ok {
+		errors.New("computeCard not set")
+	}
+
+	// set computeCard
+	option.ComputeCard = BIV100

 	for _, spec := range specResp.TrainResourceSpecs {
 		names := strings.Split(spec.Name, COMMA)
@ -702,32 +716,6 @@ func (o *OctopusLink) generateDatasetsId(ctx context.Context, option *option.AiO
 }

 func (o *OctopusLink) generateImageId(ctx context.Context, option *option.AiOption, ifoption *option.InferOption) error {
-	if option.TaskType == "" {
-		return errors.New("TaskType not set")
-	}
-
-	req := &octopus.GetUserImageListReq{
-		Platform:  o.platform,
-		PageIndex: o.pageIndex,
-		PageSize:  o.pageSize,
-	}
-	resp, err := o.octopusRpc.GetUserImageList(ctx, req)
-	if err != nil {
-		return err
-	}
-	if !resp.Success {
-		return errors.New("failed to get imageId")
-	}
-
-	if option.ResourceType == CPU {
-		for _, img := range resp.Payload.Images {
-			if img.Image.ImageName == "test-image" {
-				option.ImageId = img.Image.Id
-				return nil
-			}
-		}
-	}
-
 	preImgReq := &octopus.GetPresetImageListReq{
 		Platform:  o.platform,
 		PageIndex: o.pageIndex,
@ -742,6 +730,32 @@ func (o *OctopusLink) generateImageId(ctx context.Context, option *option.AiOpti
 	}

 	if option != nil {
+		if option.TaskType == "" {
+			return errors.New("TaskType not set")
+		}
+
+		req := &octopus.GetUserImageListReq{
+			Platform:  o.platform,
+			PageIndex: o.pageIndex,
+			PageSize:  o.pageSize,
+		}
+		resp, err := o.octopusRpc.GetUserImageList(ctx, req)
+		if err != nil {
+			return err
+		}
+		if !resp.Success {
+			return errors.New("failed to get imageId")
+		}
+
+		if option.ResourceType == CPU {
+			for _, img := range resp.Payload.Images {
+				if img.Image.ImageName == "test-image" {
+					option.ImageId = img.Image.Id
+					return nil
+				}
+			}
+		}
+
 		err = generateImageIdForTraining(option, preImgResp)
 		if err != nil {
 			return err
@ -873,13 +887,21 @@ func generateAlgorithmIdForTraining(option *option.AiOption, resp *octopus.GetMy
 }

 func generateAlgorithmIdForInferDeployInstance(option *option.InferOption, resp *octopus.GetMyAlgorithmListResp) error {
+	if option.ModelType == "" {
+		return errors.New("ModelType not set")
+	}
+
+	if option.ModelName == "" {
+		return errors.New("ModelName not set")
+	}
+
 	for _, algorithm := range resp.Payload.Algorithms {
 		if strings.Contains(algorithm.AlgorithmName, option.ModelName) {
 			option.AlgorithmId = algorithm.AlgorithmId
 			return nil
 		}
 	}
-	return errors.New("Algorithm does not exist")
+	return errors.New("ModelName does not exist")
 }

 func (o *OctopusLink) generateCmd(option *option.AiOption, ifoption *option.InferOption) error {
@ -919,7 +941,16 @@ func generateCmdForTraining(option *option.AiOption) error {

 func generateCmdForInferDeployInstance(option *option.InferOption) error {
 	if option.Cmd == "" {
-		option.Cmd = "su root; pip install fastapi uvicorn[standard]; cd /code/infer; python infer_biv100.py"
+		nameCmd, ok := CardModelNameCmdMap[option.ComputeCard]
+		if !ok {
+			return errors.New("failed to set cmd, ComputeCard not exist")
+		}
+		cmd, ok := nameCmd[option.ModelName]
+		if !ok {
+			return errors.New("failed to set cmd, ModelName not exist")
+		}
+		option.Cmd = cmd
+		return nil
 	}

 	return nil
@ -1185,7 +1216,7 @@ func (o *OctopusLink) CreateInferDeployInstance(ctx context.Context, option *opt
 		return "", err
 	}

-	desc := option.ModelType + FORWARD_SLASH + option.ModelName + FORWARD_SLASH + BIV100
+	desc := option.ModelType + FORWARD_SLASH + option.ModelName + FORWARD_SLASH + strings.ToLower(BIV100)
 	param := &octopus.CreateNotebookParam{
 		Name:             option.TaskName,
 		ResourcePool:     RESOURCE_POOL,
--- a/internal/types/types.go
+++ b/internal/types/types.go
@ -6034,3 +6034,12 @@ type GetDeployTasksReq struct {
 type GetDeployTasksResp struct {
 	PageResult
 }
+
+type GetRunningInstanceReq struct {
+	ModelType string `path:"modelType"`
+	ModelName string `path:"modelName"`
+}
+
+type GetRunningInstanceResp struct {
+	List interface{} `json:"list,omitempty"`
+}