added deployinstance logics

Former-commit-id: 84e32bcb695a2a89210a0a912f7bd42649f1dcf3
This commit is contained in:
tzwang 2024-07-22 15:47:20 +08:00
parent d24f5dd7a8
commit 61d4ea3af7
7 changed files with 185 additions and 50 deletions

4
go.mod
View File

@ -18,9 +18,9 @@ require (
github.com/prometheus/common v0.54.0
github.com/robfig/cron/v3 v3.0.1
github.com/zeromicro/go-zero v1.6.5
gitlink.org.cn/JointCloud/pcm-ac v0.0.0-20240712090657-cfba062e68e1
gitlink.org.cn/JointCloud/pcm-ac v0.0.0-20240722032933-7bd6ab00d249
gitlink.org.cn/JointCloud/pcm-modelarts v0.0.0-20240620065702-5dcad373c1fe
gitlink.org.cn/JointCloud/pcm-octopus v0.0.0-20240510133934-6a5526289b35
gitlink.org.cn/JointCloud/pcm-octopus v0.0.0-20240718073732-bc5d687f6330
gitlink.org.cn/JointCloud/pcm-openstack v0.0.0-20240403033338-e7edabad4203
gitlink.org.cn/JointCloud/pcm-slurm v0.0.0-20240301080743-8b94bbaf57f5
gitlink.org.cn/jcce-pcm/pcm-participant-ceph v0.0.0-20230904090036-24fc730ec87d

8
go.sum
View File

@ -471,12 +471,12 @@ github.com/yuin/gopher-lua v1.1.1 h1:kYKnWBjvbNP4XLT3+bPEwAXJx262OhaHDWDVOPjL46M
github.com/yuin/gopher-lua v1.1.1/go.mod h1:GBR0iDaNXjAgGg9zfCvksxSRnQx76gclCIb7kdAd1Pw=
github.com/zeromicro/go-zero v1.6.5 h1:JgsBa25/knnEL7+KQksbwktudIkNQvaAin0nisVgnSA=
github.com/zeromicro/go-zero v1.6.5/go.mod h1:XjbssEVEzFKueAh0Fie5kNf+cRqFlQQk46fY9WgEGaM=
gitlink.org.cn/JointCloud/pcm-ac v0.0.0-20240712090657-cfba062e68e1 h1:Wc9M/vq+9Iw49KZb6mgHj85sysGHjVY+QlHJeZKlx4w=
gitlink.org.cn/JointCloud/pcm-ac v0.0.0-20240712090657-cfba062e68e1/go.mod h1:3eECiw9O2bIFkkePlloKyLNXiqBAhOxNrDoGaaGseGY=
gitlink.org.cn/JointCloud/pcm-ac v0.0.0-20240722032933-7bd6ab00d249 h1:bHJGq5P+8w4fP62PZhIiq/fvOhvDPRtkM4pcmU8OZ1w=
gitlink.org.cn/JointCloud/pcm-ac v0.0.0-20240722032933-7bd6ab00d249/go.mod h1:3eECiw9O2bIFkkePlloKyLNXiqBAhOxNrDoGaaGseGY=
gitlink.org.cn/JointCloud/pcm-modelarts v0.0.0-20240620065702-5dcad373c1fe h1:teAWL7sJszDb1ZA7uptrzPSwJ1OIV840Q1/nrrDsx7E=
gitlink.org.cn/JointCloud/pcm-modelarts v0.0.0-20240620065702-5dcad373c1fe/go.mod h1:/eOmBFZKWGoabG3sRVkVvIbLwsd2631k4jkUBR6x1AA=
gitlink.org.cn/JointCloud/pcm-octopus v0.0.0-20240510133934-6a5526289b35 h1:E2QfpS3Y0FjR8Zyv5l2Ti/2NetQFqHG66c8+T/+J1u0=
gitlink.org.cn/JointCloud/pcm-octopus v0.0.0-20240510133934-6a5526289b35/go.mod h1:QOD5+/l2D+AYBjF2h5T0mdJyfGAmF78QmeKdbBXbjLQ=
gitlink.org.cn/JointCloud/pcm-octopus v0.0.0-20240718073732-bc5d687f6330 h1:WxPrFSO6LjDCr+k7nmNFlPst8CtoTHQ2iSjv+D2rNnM=
gitlink.org.cn/JointCloud/pcm-octopus v0.0.0-20240718073732-bc5d687f6330/go.mod h1:QOD5+/l2D+AYBjF2h5T0mdJyfGAmF78QmeKdbBXbjLQ=
gitlink.org.cn/JointCloud/pcm-openstack v0.0.0-20240403033338-e7edabad4203 h1:s6PsZ1+bev294IWdZRlV7mnOwI1+UzFcldVW/BqhQzI=
gitlink.org.cn/JointCloud/pcm-openstack v0.0.0-20240403033338-e7edabad4203/go.mod h1:i2rrbMQ+Fve345BY9Heh4MUqVTAimZQElQhzzRee5B8=
gitlink.org.cn/JointCloud/pcm-slurm v0.0.0-20240301080743-8b94bbaf57f5 h1:+/5vnzkJBfMRnya1NrhOzlroUtRa5ePiYbPKlHLoLV0=

View File

@ -11,7 +11,10 @@ const (
type ICluster interface {
GetInferUrl(ctx context.Context, option *option.InferOption) ([]*InferUrl, error)
GetInferDeployInstanceList(ctx context.Context, option *option.InferOption) ([]*DeployInstance, error)
GetInferDeployInstanceList(ctx context.Context) ([]*DeployInstance, error)
StartInferDeployInstance(ctx context.Context, id string) bool
StopInferDeployInstance(ctx context.Context, id string) bool
GetInferDeployInstance(ctx context.Context, id string) (*DeployInstance, error)
}
type IInference interface {
@ -29,4 +32,12 @@ type InferUrl struct {
}
type DeployInstance struct {
InstanceName string
InstanceId string
ModelName string
ModelType string
InferCard string
ClusterName string
Status string
CreatedTime string
}

View File

@ -399,6 +399,18 @@ func (m *ModelArtsLink) GetInferUrl(ctx context.Context, option *option.InferOpt
return imageUrls, nil
}
func (m *ModelArtsLink) GetInferDeployInstanceList(ctx context.Context, option *option.InferOption) ([]*inference.DeployInstance, error) {
func (m *ModelArtsLink) GetInferDeployInstanceList(ctx context.Context) ([]*inference.DeployInstance, error) {
return nil, nil
}
func (m *ModelArtsLink) StartInferDeployInstance(ctx context.Context, id string) bool {
return false
}
func (m *ModelArtsLink) StopInferDeployInstance(ctx context.Context, id string) bool {
return false
}
func (m *ModelArtsLink) GetInferDeployInstance(ctx context.Context, id string) (*inference.DeployInstance, error) {
return nil, nil
}

View File

@ -904,6 +904,56 @@ func (o *OctopusLink) GetInferUrl(ctx context.Context, option *option.InferOptio
return imageUrls, nil
}
func (o *OctopusLink) GetInferDeployInstanceList(ctx context.Context, option *option.InferOption) ([]*inference.DeployInstance, error) {
func (o *OctopusLink) GetInferDeployInstanceList(ctx context.Context) ([]*inference.DeployInstance, error) {
var insList []*inference.DeployInstance
req := &octopus.GetNotebookListReq{
Platform: o.platform,
PageIndex: o.pageIndex,
PageSize: o.pageSize,
SearchKey: DEPLOY_INSTANCE_PREFIEX,
}
list, err := o.octopusRpc.GetNotebookList(ctx, req)
if err != nil {
return nil, err
}
if list.Error != nil {
return nil, errors.New(list.Error.Message)
}
for _, notebook := range list.Payload.Notebooks {
ins := &inference.DeployInstance{}
ins.InstanceName = notebook.Name
ins.InstanceId = notebook.Id
ins.ClusterName = o.platform
ins.Status = notebook.Status
insList = append(insList, ins)
}
return insList, nil
}
func (o *OctopusLink) StartInferDeployInstance(ctx context.Context, id string) bool {
req := &octopus.StartNotebookReq{
Platform: o.platform,
Id: id,
}
resp, err := o.octopusRpc.StartNotebook(ctx, req)
if err != nil || !resp.Success {
return false
}
return resp.Success
}
func (o *OctopusLink) StopInferDeployInstance(ctx context.Context, id string) bool {
req := &octopus.StopNotebookReq{
Platform: o.platform,
Id: id,
}
resp, err := o.octopusRpc.StopNotebook(ctx, req)
if err != nil || !resp.Success {
return false
}
return resp.Success
}
func (o *OctopusLink) GetInferDeployInstance(ctx context.Context, id string) (*inference.DeployInstance, error) {
return nil, nil
}

View File

@ -32,24 +32,25 @@ import (
)
const (
RAM_SIZE_1G = 1024 // 1G
WORKER_NUMBER = 1
DCU = "DCU"
DCU_TOPS = 24.5
PYTORCH = "Pytorch"
TASK_PYTORCH_PREFIX = "PytorchTask"
TENSORFLOW = "Tensorflow"
RESOURCE_GROUP = "wzhdtest"
WorkPath = "/work/home/acgnnmfbwo/pcmv1/"
TimeoutLimit = "10:00:00"
PythonCodePath = "/work/home/acgnnmfbwo/111111/py/test.py"
DATASETS_DIR = "/work/home/acgnnmfbwo/pcmv1/dataset"
ALGORITHM_DIR = "/work/home/acgnnmfbwo/pcmv1/algorithm"
TRAIN_FILE = "train.py"
CPUCOREPRICEPERHOUR = 0.09
DCUPRICEPERHOUR = 2.0
KB = 1024
TIMEOUT = 20
RAM_SIZE_1G = 1024 // 1G
WORKER_NUMBER = 1
DCU = "DCU"
DCU_TOPS = 24.5
PYTORCH = "Pytorch"
TASK_PYTORCH_PREFIX = "PytorchTask"
TENSORFLOW = "Tensorflow"
RESOURCE_GROUP = "wzhdtest"
WorkPath = "/work/home/acgnnmfbwo/pcmv1/"
TimeoutLimit = "10:00:00"
PythonCodePath = "/work/home/acgnnmfbwo/111111/py/test.py"
DATASETS_DIR = "/work/home/acgnnmfbwo/pcmv1/dataset"
ALGORITHM_DIR = "/work/home/acgnnmfbwo/pcmv1/algorithm"
TRAIN_FILE = "train.py"
CPUCOREPRICEPERHOUR = 0.09
DCUPRICEPERHOUR = 2.0
KB = 1024
TIMEOUT = 20
DEPLOY_INSTANCE_LIMIT = 100
)
var RESOURCESGAIMAP = map[string]ResourceSpecSGAI{
@ -789,6 +790,66 @@ func (s *ShuguangAi) GetInferUrl(ctx context.Context, option *option.InferOption
return imageUrls, nil
}
func (s *ShuguangAi) GetInferDeployInstanceList(ctx context.Context, option *option.InferOption) ([]*inference.DeployInstance, error) {
func (s *ShuguangAi) GetInferDeployInstanceList(ctx context.Context) ([]*inference.DeployInstance, error) {
var insList []*inference.DeployInstance
params := &hpcAC.GetInstanceServiceListReqParam{
InstanceServiceName: DEPLOY_INSTANCE_PREFIEX,
Start: 0,
Limit: DEPLOY_INSTANCE_LIMIT,
}
req := &hpcacclient.GetInstanceServiceListReq{
Param: params,
}
list, err := s.aCRpc.GetInstanceServiceList(ctx, req)
if err != nil {
return nil, err
}
if list.Code != "0" {
return nil, errors.New(list.Msg)
}
for _, datum := range list.Data {
ins := &inference.DeployInstance{}
ins.InstanceName = datum.InstanceServiceName
ins.InstanceId = datum.Id
ins.ClusterName = s.platform
ins.Status = datum.Status
ins.InferCard = DCU
ins.CreatedTime = datum.CreateTime
insList = append(insList, ins)
}
return insList, nil
}
func (s *ShuguangAi) StartInferDeployInstance(ctx context.Context, id string) bool {
req := &hpcAC.StartInstanceServiceReq{
InstanceServiceId: id,
}
resp, err := s.aCRpc.StartInstanceService(ctx, req)
if err != nil || resp.Code != "0" {
return false
}
if resp.Data == id && resp.Code == "0" {
return true
}
return false
}
func (s *ShuguangAi) StopInferDeployInstance(ctx context.Context, id string) bool {
ids := []string{id}
req := &hpcAC.StopInstanceServiceReq{
Ids: ids,
}
resp, err := s.aCRpc.StopInstanceService(ctx, req)
if err != nil || resp.Code != "0" {
return false
}
if resp.Code == "0" {
return true
}
return false
}
func (s *ShuguangAi) GetInferDeployInstance(ctx context.Context, id string) (*inference.DeployInstance, error) {
return nil, nil
}

View File

@ -46,28 +46,29 @@ type Linkage interface {
}
const (
COLON = ":"
PY_PARAM_PREFIX = "--"
SPACE = " "
UNDERSCORE = "_"
EQUAL = "="
DASH = "-"
FORWARD_SLASH = "/"
COMMA = ","
STAR = "*"
TYPE_OCTOPUS = "1"
TYPE_MODELARTS = "2"
TYPE_SHUGUANGAI = "3"
TYPE_SHUGUANGHPC = "4"
OCTOPUS = "Octopus"
MODELARTS = "Modelarts"
SHUGUANGAI = "ShuguangAi"
SHUGUANGHPC = "ShuguangHpc"
CPU = "cpu"
GPU = "gpu"
CARD = "computeCard"
PYTORCH_TASK = "pytorch"
TENSORFLOW_TASK = "tensorflow"
COLON = ":"
PY_PARAM_PREFIX = "--"
SPACE = " "
UNDERSCORE = "_"
EQUAL = "="
DASH = "-"
FORWARD_SLASH = "/"
COMMA = ","
STAR = "*"
TYPE_OCTOPUS = "1"
TYPE_MODELARTS = "2"
TYPE_SHUGUANGAI = "3"
TYPE_SHUGUANGHPC = "4"
OCTOPUS = "Octopus"
MODELARTS = "Modelarts"
SHUGUANGAI = "ShuguangAi"
SHUGUANGHPC = "ShuguangHpc"
CPU = "cpu"
GPU = "gpu"
CARD = "computeCard"
PYTORCH_TASK = "pytorch"
TENSORFLOW_TASK = "tensorflow"
DEPLOY_INSTANCE_PREFIEX = "infer"
)
var (