modified pcm.api

Former-commit-id: 4ad39580a9edd828447227c920639ae40b477650
This commit is contained in:
tzwang 2024-02-22 17:20:58 +08:00
parent c1855def29
commit 05aa692150
4 changed files with 40 additions and 11 deletions

View File

@ -634,5 +634,5 @@ service pcm {
get /schedule/ai/getStrategies returns (AiStrategyResp) get /schedule/ai/getStrategies returns (AiStrategyResp)
@handler ScheduleSubmitHandler @handler ScheduleSubmitHandler
post /schedule/submit (ScheduleResp) returns (ScheduleResp) post /schedule/submit (ScheduleReq) returns (ScheduleResp)
} }

View File

@ -6,20 +6,22 @@ type AiCollector interface {
} }
type ResourceStats struct { type ResourceStats struct {
ParticipantId int64 ParticipantId int64
Name string Name string
CpuAvail float64 CpuAvail float64
MemAvail float64 MemAvail float64
DiskAvail float64 DiskAvail float64
GpuAvail float64 GpuAvail float64
CardAvail []Card TopsToHoursAvail map[float64]float64
Balance float64 Cards []Card
Balance float64
} }
type Card struct { type Card struct {
Type string Type string
Name string Name string
TOpsAtFp16 float64 TOpsAtFp16 float64
Price int32
} }
type DatasetsSpecs struct { type DatasetsSpecs struct {

View File

@ -39,6 +39,11 @@ const (
IMG_VERSION_PREFIX = "version_" IMG_VERSION_PREFIX = "version_"
TASK_NAME_PREFIX = "trainJob" TASK_NAME_PREFIX = "trainJob"
RESOURCE_POOL = "common-pool" RESOURCE_POOL = "common-pool"
HANWUJI = "hanwuji"
SUIYUAN = "suiyuan"
SAILINGSI = "sailingsi"
MLU = "mlu"
CAMBRICONMLU290 = 256
) )
func NewOctopusLink(ctx context.Context, svcCtx *svc.ServiceContext, name string, id int64) *OctopusLink { func NewOctopusLink(ctx context.Context, svcCtx *svc.ServiceContext, name string, id int64) *OctopusLink {
@ -187,7 +192,7 @@ func (o *OctopusLink) QuerySpecs() (interface{}, error) {
// octopus查询资源规格 // octopus查询资源规格
req := &octopus.GetResourceSpecsReq{ req := &octopus.GetResourceSpecsReq{
Platform: o.platform, Platform: o.platform,
ResourcePool: "common-pool", ResourcePool: RESOURCE_POOL,
} }
resp, err := o.svcCtx.OctopusRpc.GetResourceSpecs(o.ctx, req) resp, err := o.svcCtx.OctopusRpc.GetResourceSpecs(o.ctx, req)
if err != nil { if err != nil {
@ -198,6 +203,27 @@ func (o *OctopusLink) QuerySpecs() (interface{}, error) {
} }
func (o *OctopusLink) GetResourceStats() (*collector.ResourceStats, error) { func (o *OctopusLink) GetResourceStats() (*collector.ResourceStats, error) {
req := &octopus.GetResourceSpecsReq{
Platform: o.platform,
ResourcePool: RESOURCE_POOL,
}
specResp, err := o.svcCtx.OctopusRpc.GetResourceSpecs(o.ctx, req)
if err != nil {
return nil, err
}
if !specResp.Success {
return nil, errors.New(specResp.Error.Message)
}
//_, err = o.svcCtx.OctopusRpc.GetUserBalance(o.ctx, req)
//if err != nil {
// return nil, err
//}
//resourceStat := collector.ResourceStats{}
//for _, spec := range specResp.TrainResourceSpecs {
//}
return nil, nil return nil, nil
} }
@ -263,6 +289,7 @@ func (o *OctopusLink) GenerateSubmitParams(option *option.AiOption) error {
} }
func (o *OctopusLink) generateResourceId(option *option.AiOption) error { func (o *OctopusLink) generateResourceId(option *option.AiOption) error {
return nil return nil
} }

View File

@ -72,7 +72,7 @@ var (
"3": SHUGUANGAI, "3": SHUGUANGAI,
"4": SHUGUANGHPC, "4": SHUGUANGHPC,
} }
resourceTypes = []string{CPU, GPU, CARD} resourceTypes = []string{CPU, CARD}
taskTypes = []string{PYTORCH_TASK, TENSORFLOW_TASK} taskTypes = []string{PYTORCH_TASK, TENSORFLOW_TASK}
ERROR_RESP_EMPTY = errors.New("resp empty error") ERROR_RESP_EMPTY = errors.New("resp empty error")