added deployinstance logics
Former-commit-id: 84e32bcb695a2a89210a0a912f7bd42649f1dcf3
This commit is contained in:
parent
d24f5dd7a8
commit
61d4ea3af7
4
go.mod
4
go.mod
|
@ -18,9 +18,9 @@ require (
|
|||
github.com/prometheus/common v0.54.0
|
||||
github.com/robfig/cron/v3 v3.0.1
|
||||
github.com/zeromicro/go-zero v1.6.5
|
||||
gitlink.org.cn/JointCloud/pcm-ac v0.0.0-20240712090657-cfba062e68e1
|
||||
gitlink.org.cn/JointCloud/pcm-ac v0.0.0-20240722032933-7bd6ab00d249
|
||||
gitlink.org.cn/JointCloud/pcm-modelarts v0.0.0-20240620065702-5dcad373c1fe
|
||||
gitlink.org.cn/JointCloud/pcm-octopus v0.0.0-20240510133934-6a5526289b35
|
||||
gitlink.org.cn/JointCloud/pcm-octopus v0.0.0-20240718073732-bc5d687f6330
|
||||
gitlink.org.cn/JointCloud/pcm-openstack v0.0.0-20240403033338-e7edabad4203
|
||||
gitlink.org.cn/JointCloud/pcm-slurm v0.0.0-20240301080743-8b94bbaf57f5
|
||||
gitlink.org.cn/jcce-pcm/pcm-participant-ceph v0.0.0-20230904090036-24fc730ec87d
|
||||
|
|
8
go.sum
8
go.sum
|
@ -471,12 +471,12 @@ github.com/yuin/gopher-lua v1.1.1 h1:kYKnWBjvbNP4XLT3+bPEwAXJx262OhaHDWDVOPjL46M
|
|||
github.com/yuin/gopher-lua v1.1.1/go.mod h1:GBR0iDaNXjAgGg9zfCvksxSRnQx76gclCIb7kdAd1Pw=
|
||||
github.com/zeromicro/go-zero v1.6.5 h1:JgsBa25/knnEL7+KQksbwktudIkNQvaAin0nisVgnSA=
|
||||
github.com/zeromicro/go-zero v1.6.5/go.mod h1:XjbssEVEzFKueAh0Fie5kNf+cRqFlQQk46fY9WgEGaM=
|
||||
gitlink.org.cn/JointCloud/pcm-ac v0.0.0-20240712090657-cfba062e68e1 h1:Wc9M/vq+9Iw49KZb6mgHj85sysGHjVY+QlHJeZKlx4w=
|
||||
gitlink.org.cn/JointCloud/pcm-ac v0.0.0-20240712090657-cfba062e68e1/go.mod h1:3eECiw9O2bIFkkePlloKyLNXiqBAhOxNrDoGaaGseGY=
|
||||
gitlink.org.cn/JointCloud/pcm-ac v0.0.0-20240722032933-7bd6ab00d249 h1:bHJGq5P+8w4fP62PZhIiq/fvOhvDPRtkM4pcmU8OZ1w=
|
||||
gitlink.org.cn/JointCloud/pcm-ac v0.0.0-20240722032933-7bd6ab00d249/go.mod h1:3eECiw9O2bIFkkePlloKyLNXiqBAhOxNrDoGaaGseGY=
|
||||
gitlink.org.cn/JointCloud/pcm-modelarts v0.0.0-20240620065702-5dcad373c1fe h1:teAWL7sJszDb1ZA7uptrzPSwJ1OIV840Q1/nrrDsx7E=
|
||||
gitlink.org.cn/JointCloud/pcm-modelarts v0.0.0-20240620065702-5dcad373c1fe/go.mod h1:/eOmBFZKWGoabG3sRVkVvIbLwsd2631k4jkUBR6x1AA=
|
||||
gitlink.org.cn/JointCloud/pcm-octopus v0.0.0-20240510133934-6a5526289b35 h1:E2QfpS3Y0FjR8Zyv5l2Ti/2NetQFqHG66c8+T/+J1u0=
|
||||
gitlink.org.cn/JointCloud/pcm-octopus v0.0.0-20240510133934-6a5526289b35/go.mod h1:QOD5+/l2D+AYBjF2h5T0mdJyfGAmF78QmeKdbBXbjLQ=
|
||||
gitlink.org.cn/JointCloud/pcm-octopus v0.0.0-20240718073732-bc5d687f6330 h1:WxPrFSO6LjDCr+k7nmNFlPst8CtoTHQ2iSjv+D2rNnM=
|
||||
gitlink.org.cn/JointCloud/pcm-octopus v0.0.0-20240718073732-bc5d687f6330/go.mod h1:QOD5+/l2D+AYBjF2h5T0mdJyfGAmF78QmeKdbBXbjLQ=
|
||||
gitlink.org.cn/JointCloud/pcm-openstack v0.0.0-20240403033338-e7edabad4203 h1:s6PsZ1+bev294IWdZRlV7mnOwI1+UzFcldVW/BqhQzI=
|
||||
gitlink.org.cn/JointCloud/pcm-openstack v0.0.0-20240403033338-e7edabad4203/go.mod h1:i2rrbMQ+Fve345BY9Heh4MUqVTAimZQElQhzzRee5B8=
|
||||
gitlink.org.cn/JointCloud/pcm-slurm v0.0.0-20240301080743-8b94bbaf57f5 h1:+/5vnzkJBfMRnya1NrhOzlroUtRa5ePiYbPKlHLoLV0=
|
||||
|
|
|
@ -11,7 +11,10 @@ const (
|
|||
|
||||
type ICluster interface {
|
||||
GetInferUrl(ctx context.Context, option *option.InferOption) ([]*InferUrl, error)
|
||||
GetInferDeployInstanceList(ctx context.Context, option *option.InferOption) ([]*DeployInstance, error)
|
||||
GetInferDeployInstanceList(ctx context.Context) ([]*DeployInstance, error)
|
||||
StartInferDeployInstance(ctx context.Context, id string) bool
|
||||
StopInferDeployInstance(ctx context.Context, id string) bool
|
||||
GetInferDeployInstance(ctx context.Context, id string) (*DeployInstance, error)
|
||||
}
|
||||
|
||||
type IInference interface {
|
||||
|
@ -29,4 +32,12 @@ type InferUrl struct {
|
|||
}
|
||||
|
||||
type DeployInstance struct {
|
||||
InstanceName string
|
||||
InstanceId string
|
||||
ModelName string
|
||||
ModelType string
|
||||
InferCard string
|
||||
ClusterName string
|
||||
Status string
|
||||
CreatedTime string
|
||||
}
|
||||
|
|
|
@ -399,6 +399,18 @@ func (m *ModelArtsLink) GetInferUrl(ctx context.Context, option *option.InferOpt
|
|||
return imageUrls, nil
|
||||
}
|
||||
|
||||
func (m *ModelArtsLink) GetInferDeployInstanceList(ctx context.Context, option *option.InferOption) ([]*inference.DeployInstance, error) {
|
||||
func (m *ModelArtsLink) GetInferDeployInstanceList(ctx context.Context) ([]*inference.DeployInstance, error) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (m *ModelArtsLink) StartInferDeployInstance(ctx context.Context, id string) bool {
|
||||
return false
|
||||
}
|
||||
|
||||
func (m *ModelArtsLink) StopInferDeployInstance(ctx context.Context, id string) bool {
|
||||
return false
|
||||
}
|
||||
|
||||
func (m *ModelArtsLink) GetInferDeployInstance(ctx context.Context, id string) (*inference.DeployInstance, error) {
|
||||
return nil, nil
|
||||
}
|
||||
|
|
|
@ -904,6 +904,56 @@ func (o *OctopusLink) GetInferUrl(ctx context.Context, option *option.InferOptio
|
|||
return imageUrls, nil
|
||||
}
|
||||
|
||||
func (o *OctopusLink) GetInferDeployInstanceList(ctx context.Context, option *option.InferOption) ([]*inference.DeployInstance, error) {
|
||||
func (o *OctopusLink) GetInferDeployInstanceList(ctx context.Context) ([]*inference.DeployInstance, error) {
|
||||
var insList []*inference.DeployInstance
|
||||
req := &octopus.GetNotebookListReq{
|
||||
Platform: o.platform,
|
||||
PageIndex: o.pageIndex,
|
||||
PageSize: o.pageSize,
|
||||
SearchKey: DEPLOY_INSTANCE_PREFIEX,
|
||||
}
|
||||
list, err := o.octopusRpc.GetNotebookList(ctx, req)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if list.Error != nil {
|
||||
return nil, errors.New(list.Error.Message)
|
||||
}
|
||||
for _, notebook := range list.Payload.Notebooks {
|
||||
ins := &inference.DeployInstance{}
|
||||
ins.InstanceName = notebook.Name
|
||||
ins.InstanceId = notebook.Id
|
||||
ins.ClusterName = o.platform
|
||||
ins.Status = notebook.Status
|
||||
insList = append(insList, ins)
|
||||
}
|
||||
return insList, nil
|
||||
}
|
||||
|
||||
func (o *OctopusLink) StartInferDeployInstance(ctx context.Context, id string) bool {
|
||||
req := &octopus.StartNotebookReq{
|
||||
Platform: o.platform,
|
||||
Id: id,
|
||||
}
|
||||
resp, err := o.octopusRpc.StartNotebook(ctx, req)
|
||||
if err != nil || !resp.Success {
|
||||
return false
|
||||
}
|
||||
return resp.Success
|
||||
}
|
||||
|
||||
func (o *OctopusLink) StopInferDeployInstance(ctx context.Context, id string) bool {
|
||||
req := &octopus.StopNotebookReq{
|
||||
Platform: o.platform,
|
||||
Id: id,
|
||||
}
|
||||
resp, err := o.octopusRpc.StopNotebook(ctx, req)
|
||||
if err != nil || !resp.Success {
|
||||
return false
|
||||
}
|
||||
return resp.Success
|
||||
}
|
||||
|
||||
func (o *OctopusLink) GetInferDeployInstance(ctx context.Context, id string) (*inference.DeployInstance, error) {
|
||||
return nil, nil
|
||||
}
|
||||
|
|
|
@ -32,24 +32,25 @@ import (
|
|||
)
|
||||
|
||||
const (
|
||||
RAM_SIZE_1G = 1024 // 1G
|
||||
WORKER_NUMBER = 1
|
||||
DCU = "DCU"
|
||||
DCU_TOPS = 24.5
|
||||
PYTORCH = "Pytorch"
|
||||
TASK_PYTORCH_PREFIX = "PytorchTask"
|
||||
TENSORFLOW = "Tensorflow"
|
||||
RESOURCE_GROUP = "wzhdtest"
|
||||
WorkPath = "/work/home/acgnnmfbwo/pcmv1/"
|
||||
TimeoutLimit = "10:00:00"
|
||||
PythonCodePath = "/work/home/acgnnmfbwo/111111/py/test.py"
|
||||
DATASETS_DIR = "/work/home/acgnnmfbwo/pcmv1/dataset"
|
||||
ALGORITHM_DIR = "/work/home/acgnnmfbwo/pcmv1/algorithm"
|
||||
TRAIN_FILE = "train.py"
|
||||
CPUCOREPRICEPERHOUR = 0.09
|
||||
DCUPRICEPERHOUR = 2.0
|
||||
KB = 1024
|
||||
TIMEOUT = 20
|
||||
RAM_SIZE_1G = 1024 // 1G
|
||||
WORKER_NUMBER = 1
|
||||
DCU = "DCU"
|
||||
DCU_TOPS = 24.5
|
||||
PYTORCH = "Pytorch"
|
||||
TASK_PYTORCH_PREFIX = "PytorchTask"
|
||||
TENSORFLOW = "Tensorflow"
|
||||
RESOURCE_GROUP = "wzhdtest"
|
||||
WorkPath = "/work/home/acgnnmfbwo/pcmv1/"
|
||||
TimeoutLimit = "10:00:00"
|
||||
PythonCodePath = "/work/home/acgnnmfbwo/111111/py/test.py"
|
||||
DATASETS_DIR = "/work/home/acgnnmfbwo/pcmv1/dataset"
|
||||
ALGORITHM_DIR = "/work/home/acgnnmfbwo/pcmv1/algorithm"
|
||||
TRAIN_FILE = "train.py"
|
||||
CPUCOREPRICEPERHOUR = 0.09
|
||||
DCUPRICEPERHOUR = 2.0
|
||||
KB = 1024
|
||||
TIMEOUT = 20
|
||||
DEPLOY_INSTANCE_LIMIT = 100
|
||||
)
|
||||
|
||||
var RESOURCESGAIMAP = map[string]ResourceSpecSGAI{
|
||||
|
@ -789,6 +790,66 @@ func (s *ShuguangAi) GetInferUrl(ctx context.Context, option *option.InferOption
|
|||
return imageUrls, nil
|
||||
}
|
||||
|
||||
func (s *ShuguangAi) GetInferDeployInstanceList(ctx context.Context, option *option.InferOption) ([]*inference.DeployInstance, error) {
|
||||
func (s *ShuguangAi) GetInferDeployInstanceList(ctx context.Context) ([]*inference.DeployInstance, error) {
|
||||
var insList []*inference.DeployInstance
|
||||
params := &hpcAC.GetInstanceServiceListReqParam{
|
||||
InstanceServiceName: DEPLOY_INSTANCE_PREFIEX,
|
||||
Start: 0,
|
||||
Limit: DEPLOY_INSTANCE_LIMIT,
|
||||
}
|
||||
req := &hpcacclient.GetInstanceServiceListReq{
|
||||
Param: params,
|
||||
}
|
||||
list, err := s.aCRpc.GetInstanceServiceList(ctx, req)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if list.Code != "0" {
|
||||
return nil, errors.New(list.Msg)
|
||||
}
|
||||
for _, datum := range list.Data {
|
||||
ins := &inference.DeployInstance{}
|
||||
ins.InstanceName = datum.InstanceServiceName
|
||||
ins.InstanceId = datum.Id
|
||||
ins.ClusterName = s.platform
|
||||
ins.Status = datum.Status
|
||||
ins.InferCard = DCU
|
||||
ins.CreatedTime = datum.CreateTime
|
||||
insList = append(insList, ins)
|
||||
}
|
||||
|
||||
return insList, nil
|
||||
}
|
||||
|
||||
func (s *ShuguangAi) StartInferDeployInstance(ctx context.Context, id string) bool {
|
||||
req := &hpcAC.StartInstanceServiceReq{
|
||||
InstanceServiceId: id,
|
||||
}
|
||||
resp, err := s.aCRpc.StartInstanceService(ctx, req)
|
||||
if err != nil || resp.Code != "0" {
|
||||
return false
|
||||
}
|
||||
if resp.Data == id && resp.Code == "0" {
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (s *ShuguangAi) StopInferDeployInstance(ctx context.Context, id string) bool {
|
||||
ids := []string{id}
|
||||
req := &hpcAC.StopInstanceServiceReq{
|
||||
Ids: ids,
|
||||
}
|
||||
resp, err := s.aCRpc.StopInstanceService(ctx, req)
|
||||
if err != nil || resp.Code != "0" {
|
||||
return false
|
||||
}
|
||||
if resp.Code == "0" {
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (s *ShuguangAi) GetInferDeployInstance(ctx context.Context, id string) (*inference.DeployInstance, error) {
|
||||
return nil, nil
|
||||
}
|
||||
|
|
|
@ -46,28 +46,29 @@ type Linkage interface {
|
|||
}
|
||||
|
||||
const (
|
||||
COLON = ":"
|
||||
PY_PARAM_PREFIX = "--"
|
||||
SPACE = " "
|
||||
UNDERSCORE = "_"
|
||||
EQUAL = "="
|
||||
DASH = "-"
|
||||
FORWARD_SLASH = "/"
|
||||
COMMA = ","
|
||||
STAR = "*"
|
||||
TYPE_OCTOPUS = "1"
|
||||
TYPE_MODELARTS = "2"
|
||||
TYPE_SHUGUANGAI = "3"
|
||||
TYPE_SHUGUANGHPC = "4"
|
||||
OCTOPUS = "Octopus"
|
||||
MODELARTS = "Modelarts"
|
||||
SHUGUANGAI = "ShuguangAi"
|
||||
SHUGUANGHPC = "ShuguangHpc"
|
||||
CPU = "cpu"
|
||||
GPU = "gpu"
|
||||
CARD = "computeCard"
|
||||
PYTORCH_TASK = "pytorch"
|
||||
TENSORFLOW_TASK = "tensorflow"
|
||||
COLON = ":"
|
||||
PY_PARAM_PREFIX = "--"
|
||||
SPACE = " "
|
||||
UNDERSCORE = "_"
|
||||
EQUAL = "="
|
||||
DASH = "-"
|
||||
FORWARD_SLASH = "/"
|
||||
COMMA = ","
|
||||
STAR = "*"
|
||||
TYPE_OCTOPUS = "1"
|
||||
TYPE_MODELARTS = "2"
|
||||
TYPE_SHUGUANGAI = "3"
|
||||
TYPE_SHUGUANGHPC = "4"
|
||||
OCTOPUS = "Octopus"
|
||||
MODELARTS = "Modelarts"
|
||||
SHUGUANGAI = "ShuguangAi"
|
||||
SHUGUANGHPC = "ShuguangHpc"
|
||||
CPU = "cpu"
|
||||
GPU = "gpu"
|
||||
CARD = "computeCard"
|
||||
PYTORCH_TASK = "pytorch"
|
||||
TENSORFLOW_TASK = "tensorflow"
|
||||
DEPLOY_INSTANCE_PREFIEX = "infer"
|
||||
)
|
||||
|
||||
var (
|
||||
|
|
Loading…
Reference in New Issue