modified storelink queryspec implications
Former-commit-id: 41c7236f5274563cfd4f251805a9a366a6814f58
This commit is contained in:
parent
40bb2d8f04
commit
e2ffaeee37
|
@ -45,7 +45,7 @@ const (
|
|||
Username = "acgnnmfbwo"
|
||||
)
|
||||
|
||||
var RESOURCEMAP = map[string]ResourceSpec{
|
||||
var RESOURCEMAP = map[string]ResourceSpecHpc{
|
||||
"FPOqD5Cx8iNYqawEgDrAxLdrszp4Tmhl": {
|
||||
GAP_NNODE: "1",
|
||||
GAP_NPROC: "1",
|
||||
|
@ -98,7 +98,7 @@ var RESOURCEMAP = map[string]ResourceSpec{
|
|||
},
|
||||
}
|
||||
|
||||
var RESOURCESPECS = map[string]string{
|
||||
var RESOURCESPECSHPC = map[string]string{
|
||||
"FPOqD5Cx8iNYqawEgDrAxLdrszp4Tmhl": "1*NODE, CPU:1, 1*DCU",
|
||||
"Nd99eGNoBFC2ZTycKDlqD37heWTOmrMS": "1*NODE, CPU:2, 1*DCU",
|
||||
"uAmLkz6jgSZkC6o8JywG7Yo2aiFPPOBO": "1*NODE, CPU:4, 2*DCU",
|
||||
|
@ -122,7 +122,7 @@ var AcStatus = map[string]string{
|
|||
"statX": "Other",
|
||||
}
|
||||
|
||||
type ResourceSpec struct {
|
||||
type ResourceSpecHpc struct {
|
||||
GAP_NNODE string
|
||||
GAP_NPROC string
|
||||
GAP_NDCU string
|
||||
|
@ -148,7 +148,7 @@ func (s ShuguangHpc) SubmitTask(imageId string, cmd string, envs []string, param
|
|||
// shuguangHpc提交任务
|
||||
|
||||
//判断是否resourceId匹配自定义资源Id
|
||||
_, isMapContainsKey := RESOURCESPECS[resourceId]
|
||||
_, isMapContainsKey := RESOURCESPECSHPC[resourceId]
|
||||
if !isMapContainsKey {
|
||||
return nil, errors.New("shuguangHpc资源Id不存在")
|
||||
}
|
||||
|
@ -233,9 +233,9 @@ func (s ShuguangHpc) QueryTask(taskId string) (interface{}, error) {
|
|||
}
|
||||
|
||||
func (s ShuguangHpc) QuerySpecs() (interface{}, error) {
|
||||
var resp types.GetResourceSpecsResp
|
||||
resp := &types.GetResourceSpecsResp{}
|
||||
|
||||
for k, v := range RESOURCESPECS {
|
||||
for k, v := range RESOURCESPECSHPC {
|
||||
var respec types.ResourceSpecSl
|
||||
respec.SpecId = k
|
||||
respec.SpecName = v
|
||||
|
|
|
@ -21,19 +21,14 @@ import (
|
|||
"gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/scheduler/schedulers/option"
|
||||
"gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/scheduler/service/collector"
|
||||
"gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/svc"
|
||||
"gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/types"
|
||||
"gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/utils"
|
||||
"strconv"
|
||||
"strings"
|
||||
)
|
||||
|
||||
type ShuguangAi struct {
|
||||
ctx context.Context
|
||||
svcCtx *svc.ServiceContext
|
||||
platform string
|
||||
participantId int64
|
||||
}
|
||||
|
||||
const (
|
||||
RAM_SIZE_1G = 1024 // 1G
|
||||
WORKER_RAM_SIZE = 10240 // 10G
|
||||
WORKER_NUMBER = 1
|
||||
WORKER_CPU_NUMBER = 5
|
||||
|
@ -45,12 +40,61 @@ const (
|
|||
TASK_PYTORCH_PREFIX = "PytorchTask"
|
||||
TENSORFLOW = "Tensorflow"
|
||||
RESOURCE_GROUP = "wzhdtest"
|
||||
WorkPath = "/work/home/acgnnmfbwo/111111/py/"
|
||||
WorkPath = "/work/home/acgnnmfbwo/pcmv1/"
|
||||
TimeoutLimit = "10:00:00"
|
||||
PythonCodePath = "/work/home/acgnnmfbwo/111111/py/test.py"
|
||||
DATASETS_DIR = "/work/home/acgnnmfbwo/pcmv1/dataset"
|
||||
)
|
||||
|
||||
var RESOURCESGMAP = map[string]ResourceSpecSG{
|
||||
"WodTB2rJ8SobMgQ1nrtR245jxOrsovFi": {
|
||||
CPU: 1,
|
||||
GPU: 1,
|
||||
RAM: 2 * RAM_SIZE_1G,
|
||||
},
|
||||
"6d41v1XV53MQPmQOJ5kNatIck9yl8nWZ": {
|
||||
CPU: 1,
|
||||
GPU: 2,
|
||||
RAM: 2 * RAM_SIZE_1G,
|
||||
},
|
||||
"OBtVaaXAv9n9FbLR7pWAoa3yR13jXwNc": {
|
||||
CPU: 2,
|
||||
GPU: 1,
|
||||
RAM: 4 * RAM_SIZE_1G,
|
||||
},
|
||||
"sBWfpkntUzsWYly11kdwEHZOYYIsFmve": {
|
||||
CPU: 5,
|
||||
GPU: 1,
|
||||
RAM: 10 * RAM_SIZE_1G,
|
||||
},
|
||||
"jeYBVPwyIALjVYNzHvysh2o5CsBpBLp2": {
|
||||
CPU: 5,
|
||||
GPU: 2,
|
||||
RAM: 10 * RAM_SIZE_1G,
|
||||
},
|
||||
}
|
||||
|
||||
var RESOURCESPECSAI = map[string]string{
|
||||
"WodTB2rJ8SobMgQ1nrtR245jxOrsovFi": "CPU:1, DCU:1, RAM:2G",
|
||||
"6d41v1XV53MQPmQOJ5kNatIck9yl8nWZ": "CPU:1, DCU:2, RAM:2G",
|
||||
"OBtVaaXAv9n9FbLR7pWAoa3yR13jXwNc": "CPU:2, DCU:1, RAM:4G",
|
||||
"sBWfpkntUzsWYly11kdwEHZOYYIsFmve": "CPU:5, DCU:1, RAM:10G",
|
||||
"jeYBVPwyIALjVYNzHvysh2o5CsBpBLp2": "CPU:5, DCU:2, RAM:10G",
|
||||
}
|
||||
|
||||
type ResourceSpecSG struct {
|
||||
CPU int64
|
||||
GPU int64
|
||||
RAM int64
|
||||
}
|
||||
|
||||
type ShuguangAi struct {
|
||||
ctx context.Context
|
||||
svcCtx *svc.ServiceContext
|
||||
platform string
|
||||
participantId int64
|
||||
}
|
||||
|
||||
func NewShuguangAi(ctx context.Context, svcCtx *svc.ServiceContext, name string, id int64) *ShuguangAi {
|
||||
return &ShuguangAi{ctx: ctx, svcCtx: svcCtx, platform: name, participantId: id}
|
||||
}
|
||||
|
@ -187,17 +231,19 @@ func (s *ShuguangAi) DeleteTask(taskId string) (interface{}, error) {
|
|||
}
|
||||
|
||||
func (s *ShuguangAi) QuerySpecs() (interface{}, error) {
|
||||
// ShuguangAi查询资源规格
|
||||
req := &hpcAC.GetResourceSpecReq{
|
||||
AcceleratorType: DCU,
|
||||
ResourceGroup: RESOURCE_GROUP,
|
||||
}
|
||||
specs, err := s.svcCtx.ACRpc.GetResourceSpec(s.ctx, req)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
resp := &types.GetResourceSpecsResp{}
|
||||
|
||||
for k, v := range RESOURCESPECSAI {
|
||||
var respec types.ResourceSpecSl
|
||||
respec.SpecId = k
|
||||
respec.SpecName = v
|
||||
respec.ParticipantId = s.participantId
|
||||
respec.ParticipantName = s.platform
|
||||
resp.ResourceSpecs = append(resp.ResourceSpecs, &respec)
|
||||
}
|
||||
|
||||
return specs, nil
|
||||
resp.Success = true
|
||||
return resp, nil
|
||||
}
|
||||
|
||||
func (s *ShuguangAi) GetResourceStats() (*collector.ResourceStats, error) {
|
||||
|
|
|
@ -156,6 +156,8 @@ func GetTaskTypes() []string {
|
|||
func ConvertType(in interface{}, out interface{}, participant *models.StorelinkCenter) (interface{}, error) {
|
||||
|
||||
switch (interface{})(in).(type) {
|
||||
case *types.GetResourceSpecsResp:
|
||||
return in, nil
|
||||
case *octopus.UploadImageResp:
|
||||
inresp := (interface{})(in).(*octopus.UploadImageResp)
|
||||
switch (interface{})(out).(type) {
|
||||
|
|
Loading…
Reference in New Issue