Merge pull request 'added getrunninginstance api' (#286) from tzwang/pcm-coordinator:master into master
Former-commit-id: 7e7eb6bcb9ad332df724c6385597ec03ee082552
This commit is contained in:
commit
e58d00a980
|
@ -144,4 +144,12 @@ type (
|
||||||
GetDeployTasksResp {
|
GetDeployTasksResp {
|
||||||
PageResult
|
PageResult
|
||||||
}
|
}
|
||||||
|
|
||||||
|
GetRunningInstanceReq {
|
||||||
|
ModelType string `path:"modelType"`
|
||||||
|
ModelName string `path:"modelName"`
|
||||||
|
}
|
||||||
|
GetRunningInstanceResp {
|
||||||
|
List interface{} `json:"list,omitempty"`
|
||||||
|
}
|
||||||
)
|
)
|
||||||
|
|
|
@ -968,6 +968,9 @@ service pcm {
|
||||||
|
|
||||||
@handler GetDeployTasks
|
@handler GetDeployTasks
|
||||||
get /inference/getDeployTasks (GetDeployTasksReq) returns (GetDeployTasksResp)
|
get /inference/getDeployTasks (GetDeployTasksReq) returns (GetDeployTasksResp)
|
||||||
|
|
||||||
|
@handler GetRunningInstanceByModel
|
||||||
|
get /inference/getInstanceByModel (GetRunningInstanceReq) returns (GetRunningInstanceResp)
|
||||||
}
|
}
|
||||||
|
|
||||||
@server(
|
@server(
|
||||||
|
|
|
@ -0,0 +1,28 @@
|
||||||
|
package inference
|
||||||
|
|
||||||
|
import (
|
||||||
|
"net/http"
|
||||||
|
|
||||||
|
"github.com/zeromicro/go-zero/rest/httpx"
|
||||||
|
"gitlink.org.cn/tzwang/pcm-coordinator/internal/logic/inference"
|
||||||
|
"gitlink.org.cn/tzwang/pcm-coordinator/internal/svc"
|
||||||
|
"gitlink.org.cn/tzwang/pcm-coordinator/internal/types"
|
||||||
|
)
|
||||||
|
|
||||||
|
func GetRunningInstanceByModelHandler(svcCtx *svc.ServiceContext) http.HandlerFunc {
|
||||||
|
return func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
var req types.GetRunningInstanceReq
|
||||||
|
if err := httpx.Parse(r, &req); err != nil {
|
||||||
|
httpx.ErrorCtx(r.Context(), w, err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
l := inference.NewGetRunningInstanceByModelLogic(r.Context(), svcCtx)
|
||||||
|
resp, err := l.GetRunningInstanceByModel(&req)
|
||||||
|
if err != nil {
|
||||||
|
httpx.ErrorCtx(r.Context(), w, err)
|
||||||
|
} else {
|
||||||
|
httpx.OkJsonCtx(r.Context(), w, resp)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -1228,6 +1228,11 @@ func RegisterHandlers(server *rest.Server, serverCtx *svc.ServiceContext) {
|
||||||
Path: "/inference/getDeployTasks",
|
Path: "/inference/getDeployTasks",
|
||||||
Handler: inference.GetDeployTasksHandler(serverCtx),
|
Handler: inference.GetDeployTasksHandler(serverCtx),
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
Method: http.MethodGet,
|
||||||
|
Path: "/inference/getInstanceByModel",
|
||||||
|
Handler: inference.GetRunningInstanceByModelHandler(serverCtx),
|
||||||
|
},
|
||||||
},
|
},
|
||||||
rest.WithPrefix("/pcm/v1"),
|
rest.WithPrefix("/pcm/v1"),
|
||||||
)
|
)
|
||||||
|
|
|
@ -0,0 +1,30 @@
|
||||||
|
package inference
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
|
||||||
|
"gitlink.org.cn/tzwang/pcm-coordinator/internal/svc"
|
||||||
|
"gitlink.org.cn/tzwang/pcm-coordinator/internal/types"
|
||||||
|
|
||||||
|
"github.com/zeromicro/go-zero/core/logx"
|
||||||
|
)
|
||||||
|
|
||||||
|
type GetRunningInstanceByModelLogic struct {
|
||||||
|
logx.Logger
|
||||||
|
ctx context.Context
|
||||||
|
svcCtx *svc.ServiceContext
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewGetRunningInstanceByModelLogic(ctx context.Context, svcCtx *svc.ServiceContext) *GetRunningInstanceByModelLogic {
|
||||||
|
return &GetRunningInstanceByModelLogic{
|
||||||
|
Logger: logx.WithContext(ctx),
|
||||||
|
ctx: ctx,
|
||||||
|
svcCtx: svcCtx,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (l *GetRunningInstanceByModelLogic) GetRunningInstanceByModel(req *types.GetRunningInstanceReq) (resp *types.GetRunningInstanceResp, err error) {
|
||||||
|
// todo: add your logic here and delete this line
|
||||||
|
|
||||||
|
return
|
||||||
|
}
|
|
@ -81,6 +81,14 @@ var (
|
||||||
MLU: CAMBRICONMLU290,
|
MLU: CAMBRICONMLU290,
|
||||||
GCU: EnflameT20,
|
GCU: EnflameT20,
|
||||||
}
|
}
|
||||||
|
CardModelNameCmdMap = map[string]map[string]string{
|
||||||
|
BIV100: {"blip-image-captioning-base": "pip install -U transformers; pip install fastapi uvicorn[standard]; pip install python-multipart; cd /code; python infer_biv100.py",
|
||||||
|
"imagenet_resnet50": "pip install -U transformers; pip install fastapi uvicorn[standard]; pip install python-multipart; cd /code/infer; python infer_biv100.py",
|
||||||
|
"chatGLM_6B": "su root; pip install transformers==4.33.2; pip install fastapi uvicorn[standard]; cd /code; python infer_biv100.py"},
|
||||||
|
MLU: {"blip-image-captioning-base": "",
|
||||||
|
"imagenet_resnet50": "su root; . /torch/venv3/pytorch/bin/activate; pip install fastapi uvicorn[standard]; pip install python-multipart; cd /code/infer; python infer_mlu.py",
|
||||||
|
"chatGLM_6B": ""},
|
||||||
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
func NewOctopusLink(octopusRpc octopusclient.Octopus, name string, id int64) *OctopusLink {
|
func NewOctopusLink(octopusRpc octopusclient.Octopus, name string, id int64) *OctopusLink {
|
||||||
|
@ -654,7 +662,13 @@ func generateResourceIdForTraining(option *option.AiOption, specResp *octopus.Ge
|
||||||
|
|
||||||
func generateResourceIdForInferDeployInstance(option *option.InferOption, specResp *octopus.GetResourceSpecsResp) error {
|
func generateResourceIdForInferDeployInstance(option *option.InferOption, specResp *octopus.GetResourceSpecsResp) error {
|
||||||
// temporarily use bi-v100
|
// temporarily use bi-v100
|
||||||
cardName := cardCnMap[BIV100]
|
cardName, ok := cardCnMap[BIV100]
|
||||||
|
if !ok {
|
||||||
|
errors.New("computeCard not set")
|
||||||
|
}
|
||||||
|
|
||||||
|
// set computeCard
|
||||||
|
option.ComputeCard = BIV100
|
||||||
|
|
||||||
for _, spec := range specResp.TrainResourceSpecs {
|
for _, spec := range specResp.TrainResourceSpecs {
|
||||||
names := strings.Split(spec.Name, COMMA)
|
names := strings.Split(spec.Name, COMMA)
|
||||||
|
@ -702,32 +716,6 @@ func (o *OctopusLink) generateDatasetsId(ctx context.Context, option *option.AiO
|
||||||
}
|
}
|
||||||
|
|
||||||
func (o *OctopusLink) generateImageId(ctx context.Context, option *option.AiOption, ifoption *option.InferOption) error {
|
func (o *OctopusLink) generateImageId(ctx context.Context, option *option.AiOption, ifoption *option.InferOption) error {
|
||||||
if option.TaskType == "" {
|
|
||||||
return errors.New("TaskType not set")
|
|
||||||
}
|
|
||||||
|
|
||||||
req := &octopus.GetUserImageListReq{
|
|
||||||
Platform: o.platform,
|
|
||||||
PageIndex: o.pageIndex,
|
|
||||||
PageSize: o.pageSize,
|
|
||||||
}
|
|
||||||
resp, err := o.octopusRpc.GetUserImageList(ctx, req)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
if !resp.Success {
|
|
||||||
return errors.New("failed to get imageId")
|
|
||||||
}
|
|
||||||
|
|
||||||
if option.ResourceType == CPU {
|
|
||||||
for _, img := range resp.Payload.Images {
|
|
||||||
if img.Image.ImageName == "test-image" {
|
|
||||||
option.ImageId = img.Image.Id
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
preImgReq := &octopus.GetPresetImageListReq{
|
preImgReq := &octopus.GetPresetImageListReq{
|
||||||
Platform: o.platform,
|
Platform: o.platform,
|
||||||
PageIndex: o.pageIndex,
|
PageIndex: o.pageIndex,
|
||||||
|
@ -742,6 +730,32 @@ func (o *OctopusLink) generateImageId(ctx context.Context, option *option.AiOpti
|
||||||
}
|
}
|
||||||
|
|
||||||
if option != nil {
|
if option != nil {
|
||||||
|
if option.TaskType == "" {
|
||||||
|
return errors.New("TaskType not set")
|
||||||
|
}
|
||||||
|
|
||||||
|
req := &octopus.GetUserImageListReq{
|
||||||
|
Platform: o.platform,
|
||||||
|
PageIndex: o.pageIndex,
|
||||||
|
PageSize: o.pageSize,
|
||||||
|
}
|
||||||
|
resp, err := o.octopusRpc.GetUserImageList(ctx, req)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if !resp.Success {
|
||||||
|
return errors.New("failed to get imageId")
|
||||||
|
}
|
||||||
|
|
||||||
|
if option.ResourceType == CPU {
|
||||||
|
for _, img := range resp.Payload.Images {
|
||||||
|
if img.Image.ImageName == "test-image" {
|
||||||
|
option.ImageId = img.Image.Id
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
err = generateImageIdForTraining(option, preImgResp)
|
err = generateImageIdForTraining(option, preImgResp)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
|
@ -873,13 +887,21 @@ func generateAlgorithmIdForTraining(option *option.AiOption, resp *octopus.GetMy
|
||||||
}
|
}
|
||||||
|
|
||||||
func generateAlgorithmIdForInferDeployInstance(option *option.InferOption, resp *octopus.GetMyAlgorithmListResp) error {
|
func generateAlgorithmIdForInferDeployInstance(option *option.InferOption, resp *octopus.GetMyAlgorithmListResp) error {
|
||||||
|
if option.ModelType == "" {
|
||||||
|
return errors.New("ModelType not set")
|
||||||
|
}
|
||||||
|
|
||||||
|
if option.ModelName == "" {
|
||||||
|
return errors.New("ModelName not set")
|
||||||
|
}
|
||||||
|
|
||||||
for _, algorithm := range resp.Payload.Algorithms {
|
for _, algorithm := range resp.Payload.Algorithms {
|
||||||
if strings.Contains(algorithm.AlgorithmName, option.ModelName) {
|
if strings.Contains(algorithm.AlgorithmName, option.ModelName) {
|
||||||
option.AlgorithmId = algorithm.AlgorithmId
|
option.AlgorithmId = algorithm.AlgorithmId
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return errors.New("Algorithm does not exist")
|
return errors.New("ModelName does not exist")
|
||||||
}
|
}
|
||||||
|
|
||||||
func (o *OctopusLink) generateCmd(option *option.AiOption, ifoption *option.InferOption) error {
|
func (o *OctopusLink) generateCmd(option *option.AiOption, ifoption *option.InferOption) error {
|
||||||
|
@ -919,7 +941,16 @@ func generateCmdForTraining(option *option.AiOption) error {
|
||||||
|
|
||||||
func generateCmdForInferDeployInstance(option *option.InferOption) error {
|
func generateCmdForInferDeployInstance(option *option.InferOption) error {
|
||||||
if option.Cmd == "" {
|
if option.Cmd == "" {
|
||||||
option.Cmd = "su root; pip install fastapi uvicorn[standard]; cd /code/infer; python infer_biv100.py"
|
nameCmd, ok := CardModelNameCmdMap[option.ComputeCard]
|
||||||
|
if !ok {
|
||||||
|
return errors.New("failed to set cmd, ComputeCard not exist")
|
||||||
|
}
|
||||||
|
cmd, ok := nameCmd[option.ModelName]
|
||||||
|
if !ok {
|
||||||
|
return errors.New("failed to set cmd, ModelName not exist")
|
||||||
|
}
|
||||||
|
option.Cmd = cmd
|
||||||
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
|
@ -1185,7 +1216,7 @@ func (o *OctopusLink) CreateInferDeployInstance(ctx context.Context, option *opt
|
||||||
return "", err
|
return "", err
|
||||||
}
|
}
|
||||||
|
|
||||||
desc := option.ModelType + FORWARD_SLASH + option.ModelName + FORWARD_SLASH + BIV100
|
desc := option.ModelType + FORWARD_SLASH + option.ModelName + FORWARD_SLASH + strings.ToLower(BIV100)
|
||||||
param := &octopus.CreateNotebookParam{
|
param := &octopus.CreateNotebookParam{
|
||||||
Name: option.TaskName,
|
Name: option.TaskName,
|
||||||
ResourcePool: RESOURCE_POOL,
|
ResourcePool: RESOURCE_POOL,
|
||||||
|
|
|
@ -6034,3 +6034,12 @@ type GetDeployTasksReq struct {
|
||||||
type GetDeployTasksResp struct {
|
type GetDeployTasksResp struct {
|
||||||
PageResult
|
PageResult
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type GetRunningInstanceReq struct {
|
||||||
|
ModelType string `path:"modelType"`
|
||||||
|
ModelName string `path:"modelName"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type GetRunningInstanceResp struct {
|
||||||
|
List interface{} `json:"list,omitempty"`
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in New Issue