modified storelink queryspec implications

Former-commit-id: 41c7236f5274563cfd4f251805a9a366a6814f58
This commit is contained in:
tzwang 2024-02-27 17:46:10 +08:00
parent 40bb2d8f04
commit e2ffaeee37
3 changed files with 71 additions and 23 deletions

View File

@ -45,7 +45,7 @@ const (
Username = "acgnnmfbwo" Username = "acgnnmfbwo"
) )
var RESOURCEMAP = map[string]ResourceSpec{ var RESOURCEMAP = map[string]ResourceSpecHpc{
"FPOqD5Cx8iNYqawEgDrAxLdrszp4Tmhl": { "FPOqD5Cx8iNYqawEgDrAxLdrszp4Tmhl": {
GAP_NNODE: "1", GAP_NNODE: "1",
GAP_NPROC: "1", GAP_NPROC: "1",
@ -98,7 +98,7 @@ var RESOURCEMAP = map[string]ResourceSpec{
}, },
} }
var RESOURCESPECS = map[string]string{ var RESOURCESPECSHPC = map[string]string{
"FPOqD5Cx8iNYqawEgDrAxLdrszp4Tmhl": "1*NODE, CPU:1, 1*DCU", "FPOqD5Cx8iNYqawEgDrAxLdrszp4Tmhl": "1*NODE, CPU:1, 1*DCU",
"Nd99eGNoBFC2ZTycKDlqD37heWTOmrMS": "1*NODE, CPU:2, 1*DCU", "Nd99eGNoBFC2ZTycKDlqD37heWTOmrMS": "1*NODE, CPU:2, 1*DCU",
"uAmLkz6jgSZkC6o8JywG7Yo2aiFPPOBO": "1*NODE, CPU:4, 2*DCU", "uAmLkz6jgSZkC6o8JywG7Yo2aiFPPOBO": "1*NODE, CPU:4, 2*DCU",
@ -122,7 +122,7 @@ var AcStatus = map[string]string{
"statX": "Other", "statX": "Other",
} }
type ResourceSpec struct { type ResourceSpecHpc struct {
GAP_NNODE string GAP_NNODE string
GAP_NPROC string GAP_NPROC string
GAP_NDCU string GAP_NDCU string
@ -148,7 +148,7 @@ func (s ShuguangHpc) SubmitTask(imageId string, cmd string, envs []string, param
// shuguangHpc提交任务 // shuguangHpc提交任务
//判断是否resourceId匹配自定义资源Id //判断是否resourceId匹配自定义资源Id
_, isMapContainsKey := RESOURCESPECS[resourceId] _, isMapContainsKey := RESOURCESPECSHPC[resourceId]
if !isMapContainsKey { if !isMapContainsKey {
return nil, errors.New("shuguangHpc资源Id不存在") return nil, errors.New("shuguangHpc资源Id不存在")
} }
@ -233,9 +233,9 @@ func (s ShuguangHpc) QueryTask(taskId string) (interface{}, error) {
} }
func (s ShuguangHpc) QuerySpecs() (interface{}, error) { func (s ShuguangHpc) QuerySpecs() (interface{}, error) {
var resp types.GetResourceSpecsResp resp := &types.GetResourceSpecsResp{}
for k, v := range RESOURCESPECS { for k, v := range RESOURCESPECSHPC {
var respec types.ResourceSpecSl var respec types.ResourceSpecSl
respec.SpecId = k respec.SpecId = k
respec.SpecName = v respec.SpecName = v

View File

@ -21,19 +21,14 @@ import (
"gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/scheduler/schedulers/option" "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/scheduler/schedulers/option"
"gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/scheduler/service/collector" "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/scheduler/service/collector"
"gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/svc" "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/svc"
"gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/types"
"gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/utils" "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/utils"
"strconv" "strconv"
"strings" "strings"
) )
type ShuguangAi struct {
ctx context.Context
svcCtx *svc.ServiceContext
platform string
participantId int64
}
const ( const (
RAM_SIZE_1G = 1024 // 1G
WORKER_RAM_SIZE = 10240 // 10G WORKER_RAM_SIZE = 10240 // 10G
WORKER_NUMBER = 1 WORKER_NUMBER = 1
WORKER_CPU_NUMBER = 5 WORKER_CPU_NUMBER = 5
@ -45,12 +40,61 @@ const (
TASK_PYTORCH_PREFIX = "PytorchTask" TASK_PYTORCH_PREFIX = "PytorchTask"
TENSORFLOW = "Tensorflow" TENSORFLOW = "Tensorflow"
RESOURCE_GROUP = "wzhdtest" RESOURCE_GROUP = "wzhdtest"
WorkPath = "/work/home/acgnnmfbwo/111111/py/" WorkPath = "/work/home/acgnnmfbwo/pcmv1/"
TimeoutLimit = "10:00:00" TimeoutLimit = "10:00:00"
PythonCodePath = "/work/home/acgnnmfbwo/111111/py/test.py" PythonCodePath = "/work/home/acgnnmfbwo/111111/py/test.py"
DATASETS_DIR = "/work/home/acgnnmfbwo/pcmv1/dataset" DATASETS_DIR = "/work/home/acgnnmfbwo/pcmv1/dataset"
) )
var RESOURCESGMAP = map[string]ResourceSpecSG{
"WodTB2rJ8SobMgQ1nrtR245jxOrsovFi": {
CPU: 1,
GPU: 1,
RAM: 2 * RAM_SIZE_1G,
},
"6d41v1XV53MQPmQOJ5kNatIck9yl8nWZ": {
CPU: 1,
GPU: 2,
RAM: 2 * RAM_SIZE_1G,
},
"OBtVaaXAv9n9FbLR7pWAoa3yR13jXwNc": {
CPU: 2,
GPU: 1,
RAM: 4 * RAM_SIZE_1G,
},
"sBWfpkntUzsWYly11kdwEHZOYYIsFmve": {
CPU: 5,
GPU: 1,
RAM: 10 * RAM_SIZE_1G,
},
"jeYBVPwyIALjVYNzHvysh2o5CsBpBLp2": {
CPU: 5,
GPU: 2,
RAM: 10 * RAM_SIZE_1G,
},
}
var RESOURCESPECSAI = map[string]string{
"WodTB2rJ8SobMgQ1nrtR245jxOrsovFi": "CPU:1, DCU:1, RAM:2G",
"6d41v1XV53MQPmQOJ5kNatIck9yl8nWZ": "CPU:1, DCU:2, RAM:2G",
"OBtVaaXAv9n9FbLR7pWAoa3yR13jXwNc": "CPU:2, DCU:1, RAM:4G",
"sBWfpkntUzsWYly11kdwEHZOYYIsFmve": "CPU:5, DCU:1, RAM:10G",
"jeYBVPwyIALjVYNzHvysh2o5CsBpBLp2": "CPU:5, DCU:2, RAM:10G",
}
type ResourceSpecSG struct {
CPU int64
GPU int64
RAM int64
}
type ShuguangAi struct {
ctx context.Context
svcCtx *svc.ServiceContext
platform string
participantId int64
}
func NewShuguangAi(ctx context.Context, svcCtx *svc.ServiceContext, name string, id int64) *ShuguangAi { func NewShuguangAi(ctx context.Context, svcCtx *svc.ServiceContext, name string, id int64) *ShuguangAi {
return &ShuguangAi{ctx: ctx, svcCtx: svcCtx, platform: name, participantId: id} return &ShuguangAi{ctx: ctx, svcCtx: svcCtx, platform: name, participantId: id}
} }
@ -187,17 +231,19 @@ func (s *ShuguangAi) DeleteTask(taskId string) (interface{}, error) {
} }
func (s *ShuguangAi) QuerySpecs() (interface{}, error) { func (s *ShuguangAi) QuerySpecs() (interface{}, error) {
// ShuguangAi查询资源规格 resp := &types.GetResourceSpecsResp{}
req := &hpcAC.GetResourceSpecReq{
AcceleratorType: DCU, for k, v := range RESOURCESPECSAI {
ResourceGroup: RESOURCE_GROUP, var respec types.ResourceSpecSl
} respec.SpecId = k
specs, err := s.svcCtx.ACRpc.GetResourceSpec(s.ctx, req) respec.SpecName = v
if err != nil { respec.ParticipantId = s.participantId
return nil, err respec.ParticipantName = s.platform
resp.ResourceSpecs = append(resp.ResourceSpecs, &respec)
} }
return specs, nil resp.Success = true
return resp, nil
} }
func (s *ShuguangAi) GetResourceStats() (*collector.ResourceStats, error) { func (s *ShuguangAi) GetResourceStats() (*collector.ResourceStats, error) {

View File

@ -156,6 +156,8 @@ func GetTaskTypes() []string {
func ConvertType(in interface{}, out interface{}, participant *models.StorelinkCenter) (interface{}, error) { func ConvertType(in interface{}, out interface{}, participant *models.StorelinkCenter) (interface{}, error) {
switch (interface{})(in).(type) { switch (interface{})(in).(type) {
case *types.GetResourceSpecsResp:
return in, nil
case *octopus.UploadImageResp: case *octopus.UploadImageResp:
inresp := (interface{})(in).(*octopus.UploadImageResp) inresp := (interface{})(in).(*octopus.UploadImageResp)
switch (interface{})(out).(type) { switch (interface{})(out).(type) {