modified storelink queryspec implications

Former-commit-id: 41c7236f5274563cfd4f251805a9a366a6814f58
This commit is contained in:
tzwang 2024-02-27 17:46:10 +08:00
parent 40bb2d8f04
commit e2ffaeee37
3 changed files with 71 additions and 23 deletions

View File

@ -45,7 +45,7 @@ const (
Username = "acgnnmfbwo"
)
var RESOURCEMAP = map[string]ResourceSpec{
var RESOURCEMAP = map[string]ResourceSpecHpc{
"FPOqD5Cx8iNYqawEgDrAxLdrszp4Tmhl": {
GAP_NNODE: "1",
GAP_NPROC: "1",
@ -98,7 +98,7 @@ var RESOURCEMAP = map[string]ResourceSpec{
},
}
var RESOURCESPECS = map[string]string{
var RESOURCESPECSHPC = map[string]string{
"FPOqD5Cx8iNYqawEgDrAxLdrszp4Tmhl": "1*NODE, CPU:1, 1*DCU",
"Nd99eGNoBFC2ZTycKDlqD37heWTOmrMS": "1*NODE, CPU:2, 1*DCU",
"uAmLkz6jgSZkC6o8JywG7Yo2aiFPPOBO": "1*NODE, CPU:4, 2*DCU",
@ -122,7 +122,7 @@ var AcStatus = map[string]string{
"statX": "Other",
}
type ResourceSpec struct {
type ResourceSpecHpc struct {
GAP_NNODE string
GAP_NPROC string
GAP_NDCU string
@ -148,7 +148,7 @@ func (s ShuguangHpc) SubmitTask(imageId string, cmd string, envs []string, param
// shuguangHpc提交任务
//判断是否resourceId匹配自定义资源Id
_, isMapContainsKey := RESOURCESPECS[resourceId]
_, isMapContainsKey := RESOURCESPECSHPC[resourceId]
if !isMapContainsKey {
return nil, errors.New("shuguangHpc资源Id不存在")
}
@ -233,9 +233,9 @@ func (s ShuguangHpc) QueryTask(taskId string) (interface{}, error) {
}
func (s ShuguangHpc) QuerySpecs() (interface{}, error) {
var resp types.GetResourceSpecsResp
resp := &types.GetResourceSpecsResp{}
for k, v := range RESOURCESPECS {
for k, v := range RESOURCESPECSHPC {
var respec types.ResourceSpecSl
respec.SpecId = k
respec.SpecName = v

View File

@ -21,19 +21,14 @@ import (
"gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/scheduler/schedulers/option"
"gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/scheduler/service/collector"
"gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/svc"
"gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/types"
"gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/utils"
"strconv"
"strings"
)
type ShuguangAi struct {
ctx context.Context
svcCtx *svc.ServiceContext
platform string
participantId int64
}
const (
RAM_SIZE_1G = 1024 // 1G
WORKER_RAM_SIZE = 10240 // 10G
WORKER_NUMBER = 1
WORKER_CPU_NUMBER = 5
@ -45,12 +40,61 @@ const (
TASK_PYTORCH_PREFIX = "PytorchTask"
TENSORFLOW = "Tensorflow"
RESOURCE_GROUP = "wzhdtest"
WorkPath = "/work/home/acgnnmfbwo/111111/py/"
WorkPath = "/work/home/acgnnmfbwo/pcmv1/"
TimeoutLimit = "10:00:00"
PythonCodePath = "/work/home/acgnnmfbwo/111111/py/test.py"
DATASETS_DIR = "/work/home/acgnnmfbwo/pcmv1/dataset"
)
var RESOURCESGMAP = map[string]ResourceSpecSG{
"WodTB2rJ8SobMgQ1nrtR245jxOrsovFi": {
CPU: 1,
GPU: 1,
RAM: 2 * RAM_SIZE_1G,
},
"6d41v1XV53MQPmQOJ5kNatIck9yl8nWZ": {
CPU: 1,
GPU: 2,
RAM: 2 * RAM_SIZE_1G,
},
"OBtVaaXAv9n9FbLR7pWAoa3yR13jXwNc": {
CPU: 2,
GPU: 1,
RAM: 4 * RAM_SIZE_1G,
},
"sBWfpkntUzsWYly11kdwEHZOYYIsFmve": {
CPU: 5,
GPU: 1,
RAM: 10 * RAM_SIZE_1G,
},
"jeYBVPwyIALjVYNzHvysh2o5CsBpBLp2": {
CPU: 5,
GPU: 2,
RAM: 10 * RAM_SIZE_1G,
},
}
var RESOURCESPECSAI = map[string]string{
"WodTB2rJ8SobMgQ1nrtR245jxOrsovFi": "CPU:1, DCU:1, RAM:2G",
"6d41v1XV53MQPmQOJ5kNatIck9yl8nWZ": "CPU:1, DCU:2, RAM:2G",
"OBtVaaXAv9n9FbLR7pWAoa3yR13jXwNc": "CPU:2, DCU:1, RAM:4G",
"sBWfpkntUzsWYly11kdwEHZOYYIsFmve": "CPU:5, DCU:1, RAM:10G",
"jeYBVPwyIALjVYNzHvysh2o5CsBpBLp2": "CPU:5, DCU:2, RAM:10G",
}
type ResourceSpecSG struct {
CPU int64
GPU int64
RAM int64
}
type ShuguangAi struct {
ctx context.Context
svcCtx *svc.ServiceContext
platform string
participantId int64
}
func NewShuguangAi(ctx context.Context, svcCtx *svc.ServiceContext, name string, id int64) *ShuguangAi {
return &ShuguangAi{ctx: ctx, svcCtx: svcCtx, platform: name, participantId: id}
}
@ -187,17 +231,19 @@ func (s *ShuguangAi) DeleteTask(taskId string) (interface{}, error) {
}
func (s *ShuguangAi) QuerySpecs() (interface{}, error) {
// ShuguangAi查询资源规格
req := &hpcAC.GetResourceSpecReq{
AcceleratorType: DCU,
ResourceGroup: RESOURCE_GROUP,
}
specs, err := s.svcCtx.ACRpc.GetResourceSpec(s.ctx, req)
if err != nil {
return nil, err
resp := &types.GetResourceSpecsResp{}
for k, v := range RESOURCESPECSAI {
var respec types.ResourceSpecSl
respec.SpecId = k
respec.SpecName = v
respec.ParticipantId = s.participantId
respec.ParticipantName = s.platform
resp.ResourceSpecs = append(resp.ResourceSpecs, &respec)
}
return specs, nil
resp.Success = true
return resp, nil
}
func (s *ShuguangAi) GetResourceStats() (*collector.ResourceStats, error) {

View File

@ -156,6 +156,8 @@ func GetTaskTypes() []string {
func ConvertType(in interface{}, out interface{}, participant *models.StorelinkCenter) (interface{}, error) {
switch (interface{})(in).(type) {
case *types.GetResourceSpecsResp:
return in, nil
case *octopus.UploadImageResp:
inresp := (interface{})(in).(*octopus.UploadImageResp)
switch (interface{})(out).(type) {