added deployinstance logics

Former-commit-id: 84e32bcb695a2a89210a0a912f7bd42649f1dcf3
This commit is contained in:
tzwang 2024-07-22 15:47:20 +08:00
parent d24f5dd7a8
commit 61d4ea3af7
7 changed files with 185 additions and 50 deletions

4
go.mod
View File

@ -18,9 +18,9 @@ require (
github.com/prometheus/common v0.54.0 github.com/prometheus/common v0.54.0
github.com/robfig/cron/v3 v3.0.1 github.com/robfig/cron/v3 v3.0.1
github.com/zeromicro/go-zero v1.6.5 github.com/zeromicro/go-zero v1.6.5
gitlink.org.cn/JointCloud/pcm-ac v0.0.0-20240712090657-cfba062e68e1 gitlink.org.cn/JointCloud/pcm-ac v0.0.0-20240722032933-7bd6ab00d249
gitlink.org.cn/JointCloud/pcm-modelarts v0.0.0-20240620065702-5dcad373c1fe gitlink.org.cn/JointCloud/pcm-modelarts v0.0.0-20240620065702-5dcad373c1fe
gitlink.org.cn/JointCloud/pcm-octopus v0.0.0-20240510133934-6a5526289b35 gitlink.org.cn/JointCloud/pcm-octopus v0.0.0-20240718073732-bc5d687f6330
gitlink.org.cn/JointCloud/pcm-openstack v0.0.0-20240403033338-e7edabad4203 gitlink.org.cn/JointCloud/pcm-openstack v0.0.0-20240403033338-e7edabad4203
gitlink.org.cn/JointCloud/pcm-slurm v0.0.0-20240301080743-8b94bbaf57f5 gitlink.org.cn/JointCloud/pcm-slurm v0.0.0-20240301080743-8b94bbaf57f5
gitlink.org.cn/jcce-pcm/pcm-participant-ceph v0.0.0-20230904090036-24fc730ec87d gitlink.org.cn/jcce-pcm/pcm-participant-ceph v0.0.0-20230904090036-24fc730ec87d

8
go.sum
View File

@ -471,12 +471,12 @@ github.com/yuin/gopher-lua v1.1.1 h1:kYKnWBjvbNP4XLT3+bPEwAXJx262OhaHDWDVOPjL46M
github.com/yuin/gopher-lua v1.1.1/go.mod h1:GBR0iDaNXjAgGg9zfCvksxSRnQx76gclCIb7kdAd1Pw= github.com/yuin/gopher-lua v1.1.1/go.mod h1:GBR0iDaNXjAgGg9zfCvksxSRnQx76gclCIb7kdAd1Pw=
github.com/zeromicro/go-zero v1.6.5 h1:JgsBa25/knnEL7+KQksbwktudIkNQvaAin0nisVgnSA= github.com/zeromicro/go-zero v1.6.5 h1:JgsBa25/knnEL7+KQksbwktudIkNQvaAin0nisVgnSA=
github.com/zeromicro/go-zero v1.6.5/go.mod h1:XjbssEVEzFKueAh0Fie5kNf+cRqFlQQk46fY9WgEGaM= github.com/zeromicro/go-zero v1.6.5/go.mod h1:XjbssEVEzFKueAh0Fie5kNf+cRqFlQQk46fY9WgEGaM=
gitlink.org.cn/JointCloud/pcm-ac v0.0.0-20240712090657-cfba062e68e1 h1:Wc9M/vq+9Iw49KZb6mgHj85sysGHjVY+QlHJeZKlx4w= gitlink.org.cn/JointCloud/pcm-ac v0.0.0-20240722032933-7bd6ab00d249 h1:bHJGq5P+8w4fP62PZhIiq/fvOhvDPRtkM4pcmU8OZ1w=
gitlink.org.cn/JointCloud/pcm-ac v0.0.0-20240712090657-cfba062e68e1/go.mod h1:3eECiw9O2bIFkkePlloKyLNXiqBAhOxNrDoGaaGseGY= gitlink.org.cn/JointCloud/pcm-ac v0.0.0-20240722032933-7bd6ab00d249/go.mod h1:3eECiw9O2bIFkkePlloKyLNXiqBAhOxNrDoGaaGseGY=
gitlink.org.cn/JointCloud/pcm-modelarts v0.0.0-20240620065702-5dcad373c1fe h1:teAWL7sJszDb1ZA7uptrzPSwJ1OIV840Q1/nrrDsx7E= gitlink.org.cn/JointCloud/pcm-modelarts v0.0.0-20240620065702-5dcad373c1fe h1:teAWL7sJszDb1ZA7uptrzPSwJ1OIV840Q1/nrrDsx7E=
gitlink.org.cn/JointCloud/pcm-modelarts v0.0.0-20240620065702-5dcad373c1fe/go.mod h1:/eOmBFZKWGoabG3sRVkVvIbLwsd2631k4jkUBR6x1AA= gitlink.org.cn/JointCloud/pcm-modelarts v0.0.0-20240620065702-5dcad373c1fe/go.mod h1:/eOmBFZKWGoabG3sRVkVvIbLwsd2631k4jkUBR6x1AA=
gitlink.org.cn/JointCloud/pcm-octopus v0.0.0-20240510133934-6a5526289b35 h1:E2QfpS3Y0FjR8Zyv5l2Ti/2NetQFqHG66c8+T/+J1u0= gitlink.org.cn/JointCloud/pcm-octopus v0.0.0-20240718073732-bc5d687f6330 h1:WxPrFSO6LjDCr+k7nmNFlPst8CtoTHQ2iSjv+D2rNnM=
gitlink.org.cn/JointCloud/pcm-octopus v0.0.0-20240510133934-6a5526289b35/go.mod h1:QOD5+/l2D+AYBjF2h5T0mdJyfGAmF78QmeKdbBXbjLQ= gitlink.org.cn/JointCloud/pcm-octopus v0.0.0-20240718073732-bc5d687f6330/go.mod h1:QOD5+/l2D+AYBjF2h5T0mdJyfGAmF78QmeKdbBXbjLQ=
gitlink.org.cn/JointCloud/pcm-openstack v0.0.0-20240403033338-e7edabad4203 h1:s6PsZ1+bev294IWdZRlV7mnOwI1+UzFcldVW/BqhQzI= gitlink.org.cn/JointCloud/pcm-openstack v0.0.0-20240403033338-e7edabad4203 h1:s6PsZ1+bev294IWdZRlV7mnOwI1+UzFcldVW/BqhQzI=
gitlink.org.cn/JointCloud/pcm-openstack v0.0.0-20240403033338-e7edabad4203/go.mod h1:i2rrbMQ+Fve345BY9Heh4MUqVTAimZQElQhzzRee5B8= gitlink.org.cn/JointCloud/pcm-openstack v0.0.0-20240403033338-e7edabad4203/go.mod h1:i2rrbMQ+Fve345BY9Heh4MUqVTAimZQElQhzzRee5B8=
gitlink.org.cn/JointCloud/pcm-slurm v0.0.0-20240301080743-8b94bbaf57f5 h1:+/5vnzkJBfMRnya1NrhOzlroUtRa5ePiYbPKlHLoLV0= gitlink.org.cn/JointCloud/pcm-slurm v0.0.0-20240301080743-8b94bbaf57f5 h1:+/5vnzkJBfMRnya1NrhOzlroUtRa5ePiYbPKlHLoLV0=

View File

@ -11,7 +11,10 @@ const (
type ICluster interface { type ICluster interface {
GetInferUrl(ctx context.Context, option *option.InferOption) ([]*InferUrl, error) GetInferUrl(ctx context.Context, option *option.InferOption) ([]*InferUrl, error)
GetInferDeployInstanceList(ctx context.Context, option *option.InferOption) ([]*DeployInstance, error) GetInferDeployInstanceList(ctx context.Context) ([]*DeployInstance, error)
StartInferDeployInstance(ctx context.Context, id string) bool
StopInferDeployInstance(ctx context.Context, id string) bool
GetInferDeployInstance(ctx context.Context, id string) (*DeployInstance, error)
} }
type IInference interface { type IInference interface {
@ -29,4 +32,12 @@ type InferUrl struct {
} }
type DeployInstance struct { type DeployInstance struct {
InstanceName string
InstanceId string
ModelName string
ModelType string
InferCard string
ClusterName string
Status string
CreatedTime string
} }

View File

@ -399,6 +399,18 @@ func (m *ModelArtsLink) GetInferUrl(ctx context.Context, option *option.InferOpt
return imageUrls, nil return imageUrls, nil
} }
func (m *ModelArtsLink) GetInferDeployInstanceList(ctx context.Context, option *option.InferOption) ([]*inference.DeployInstance, error) { func (m *ModelArtsLink) GetInferDeployInstanceList(ctx context.Context) ([]*inference.DeployInstance, error) {
return nil, nil
}
func (m *ModelArtsLink) StartInferDeployInstance(ctx context.Context, id string) bool {
return false
}
func (m *ModelArtsLink) StopInferDeployInstance(ctx context.Context, id string) bool {
return false
}
func (m *ModelArtsLink) GetInferDeployInstance(ctx context.Context, id string) (*inference.DeployInstance, error) {
return nil, nil return nil, nil
} }

View File

@ -904,6 +904,56 @@ func (o *OctopusLink) GetInferUrl(ctx context.Context, option *option.InferOptio
return imageUrls, nil return imageUrls, nil
} }
func (o *OctopusLink) GetInferDeployInstanceList(ctx context.Context, option *option.InferOption) ([]*inference.DeployInstance, error) { func (o *OctopusLink) GetInferDeployInstanceList(ctx context.Context) ([]*inference.DeployInstance, error) {
var insList []*inference.DeployInstance
req := &octopus.GetNotebookListReq{
Platform: o.platform,
PageIndex: o.pageIndex,
PageSize: o.pageSize,
SearchKey: DEPLOY_INSTANCE_PREFIEX,
}
list, err := o.octopusRpc.GetNotebookList(ctx, req)
if err != nil {
return nil, err
}
if list.Error != nil {
return nil, errors.New(list.Error.Message)
}
for _, notebook := range list.Payload.Notebooks {
ins := &inference.DeployInstance{}
ins.InstanceName = notebook.Name
ins.InstanceId = notebook.Id
ins.ClusterName = o.platform
ins.Status = notebook.Status
insList = append(insList, ins)
}
return insList, nil
}
func (o *OctopusLink) StartInferDeployInstance(ctx context.Context, id string) bool {
req := &octopus.StartNotebookReq{
Platform: o.platform,
Id: id,
}
resp, err := o.octopusRpc.StartNotebook(ctx, req)
if err != nil || !resp.Success {
return false
}
return resp.Success
}
func (o *OctopusLink) StopInferDeployInstance(ctx context.Context, id string) bool {
req := &octopus.StopNotebookReq{
Platform: o.platform,
Id: id,
}
resp, err := o.octopusRpc.StopNotebook(ctx, req)
if err != nil || !resp.Success {
return false
}
return resp.Success
}
func (o *OctopusLink) GetInferDeployInstance(ctx context.Context, id string) (*inference.DeployInstance, error) {
return nil, nil return nil, nil
} }

View File

@ -50,6 +50,7 @@ const (
DCUPRICEPERHOUR = 2.0 DCUPRICEPERHOUR = 2.0
KB = 1024 KB = 1024
TIMEOUT = 20 TIMEOUT = 20
DEPLOY_INSTANCE_LIMIT = 100
) )
var RESOURCESGAIMAP = map[string]ResourceSpecSGAI{ var RESOURCESGAIMAP = map[string]ResourceSpecSGAI{
@ -789,6 +790,66 @@ func (s *ShuguangAi) GetInferUrl(ctx context.Context, option *option.InferOption
return imageUrls, nil return imageUrls, nil
} }
func (s *ShuguangAi) GetInferDeployInstanceList(ctx context.Context, option *option.InferOption) ([]*inference.DeployInstance, error) { func (s *ShuguangAi) GetInferDeployInstanceList(ctx context.Context) ([]*inference.DeployInstance, error) {
var insList []*inference.DeployInstance
params := &hpcAC.GetInstanceServiceListReqParam{
InstanceServiceName: DEPLOY_INSTANCE_PREFIEX,
Start: 0,
Limit: DEPLOY_INSTANCE_LIMIT,
}
req := &hpcacclient.GetInstanceServiceListReq{
Param: params,
}
list, err := s.aCRpc.GetInstanceServiceList(ctx, req)
if err != nil {
return nil, err
}
if list.Code != "0" {
return nil, errors.New(list.Msg)
}
for _, datum := range list.Data {
ins := &inference.DeployInstance{}
ins.InstanceName = datum.InstanceServiceName
ins.InstanceId = datum.Id
ins.ClusterName = s.platform
ins.Status = datum.Status
ins.InferCard = DCU
ins.CreatedTime = datum.CreateTime
insList = append(insList, ins)
}
return insList, nil
}
func (s *ShuguangAi) StartInferDeployInstance(ctx context.Context, id string) bool {
req := &hpcAC.StartInstanceServiceReq{
InstanceServiceId: id,
}
resp, err := s.aCRpc.StartInstanceService(ctx, req)
if err != nil || resp.Code != "0" {
return false
}
if resp.Data == id && resp.Code == "0" {
return true
}
return false
}
func (s *ShuguangAi) StopInferDeployInstance(ctx context.Context, id string) bool {
ids := []string{id}
req := &hpcAC.StopInstanceServiceReq{
Ids: ids,
}
resp, err := s.aCRpc.StopInstanceService(ctx, req)
if err != nil || resp.Code != "0" {
return false
}
if resp.Code == "0" {
return true
}
return false
}
func (s *ShuguangAi) GetInferDeployInstance(ctx context.Context, id string) (*inference.DeployInstance, error) {
return nil, nil return nil, nil
} }

View File

@ -68,6 +68,7 @@ const (
CARD = "computeCard" CARD = "computeCard"
PYTORCH_TASK = "pytorch" PYTORCH_TASK = "pytorch"
TENSORFLOW_TASK = "tensorflow" TENSORFLOW_TASK = "tensorflow"
DEPLOY_INSTANCE_PREFIEX = "infer"
) )
var ( var (