modified octopus implementations
Former-commit-id: 163c0a8fd054e1dcf77ad3262d48e70e314f1337
This commit is contained in:
parent
e62f734614
commit
c7597592f9
|
@ -7,6 +7,8 @@ type AiOption struct {
|
|||
DatasetsName string // mnist/imageNet/iris
|
||||
StrategyName string
|
||||
ClusterToStaticWeight map[string]int32
|
||||
Tops float64
|
||||
ComputeCard string
|
||||
CodeType string
|
||||
|
||||
ImageId string
|
||||
|
|
|
@ -12,8 +12,8 @@ type ResourceStats struct {
|
|||
MemAvail float64
|
||||
DiskAvail float64
|
||||
GpuAvail float64
|
||||
TopsToHoursAvail map[float64]float64
|
||||
Cards []Card
|
||||
CardToHours map[Card]float64
|
||||
CpuToHours map[int]float64
|
||||
Balance float64
|
||||
}
|
||||
|
||||
|
|
|
@ -22,6 +22,8 @@ import (
|
|||
"gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/svc"
|
||||
"gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/utils"
|
||||
"gitlink.org.cn/jcce-pcm/pcm-participant-octopus/octopus"
|
||||
"math"
|
||||
"strconv"
|
||||
"strings"
|
||||
)
|
||||
|
||||
|
@ -42,8 +44,11 @@ const (
|
|||
HANWUJI = "hanwuji"
|
||||
SUIYUAN = "suiyuan"
|
||||
SAILINGSI = "sailingsi"
|
||||
MLU = "mlu"
|
||||
MLU = "MLU"
|
||||
CAMBRICONMLU290 = 256
|
||||
GCU = "GCU"
|
||||
EnflameT20 = 128
|
||||
BASE_TOPS = 128
|
||||
)
|
||||
|
||||
func NewOctopusLink(ctx context.Context, svcCtx *svc.ServiceContext, name string, id int64) *OctopusLink {
|
||||
|
@ -226,8 +231,9 @@ func (o *OctopusLink) GetResourceStats() (*collector.ResourceStats, error) {
|
|||
}
|
||||
|
||||
//resourceStat := collector.ResourceStats{}
|
||||
|
||||
//
|
||||
//for _, spec := range specResp.TrainResourceSpecs {
|
||||
//
|
||||
//}
|
||||
|
||||
return nil, nil
|
||||
|
@ -295,10 +301,113 @@ func (o *OctopusLink) GenerateSubmitParams(option *option.AiOption) error {
|
|||
}
|
||||
|
||||
func (o *OctopusLink) generateResourceId(option *option.AiOption) error {
|
||||
if option.ResourceType == "" {
|
||||
return errors.New("ResourceType not set")
|
||||
}
|
||||
req := &octopus.GetResourceSpecsReq{
|
||||
Platform: o.platform,
|
||||
ResourcePool: RESOURCE_POOL,
|
||||
}
|
||||
specResp, err := o.svcCtx.OctopusRpc.GetResourceSpecs(o.ctx, req)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if !specResp.Success {
|
||||
return errors.New(specResp.Error.Message)
|
||||
}
|
||||
|
||||
if option.ResourceType == CPU {
|
||||
for _, spec := range specResp.TrainResourceSpecs {
|
||||
if spec.Price == 0 {
|
||||
option.ResourceId = spec.Id
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
if option.ResourceType == CARD {
|
||||
err = setResourceIdByCard(option, specResp, GCU)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func setResourceIdByCard(option *option.AiOption, specs *octopus.GetResourceSpecsResp, computeCard string) error {
|
||||
if option.Tops == 0 {
|
||||
for _, spec := range specs.TrainResourceSpecs {
|
||||
if spec.Price == 1 {
|
||||
ns := strings.Split(spec.Name, COMMA)
|
||||
cardSpecs := strings.Split(ns[0], STAR)
|
||||
if cardSpecs[1] == computeCard {
|
||||
option.ResourceId = spec.Id
|
||||
option.ComputeCard = computeCard
|
||||
return nil
|
||||
}
|
||||
} else {
|
||||
continue
|
||||
}
|
||||
}
|
||||
} else {
|
||||
cardNum := math.Ceil(option.Tops / float64(BASE_TOPS))
|
||||
for _, spec := range specs.TrainResourceSpecs {
|
||||
if option.Tops < BASE_TOPS {
|
||||
if spec.Price == 1 {
|
||||
ns := strings.Split(spec.Name, COMMA)
|
||||
cardSpecs := strings.Split(ns[0], STAR)
|
||||
if cardSpecs[1] == computeCard {
|
||||
option.ResourceId = spec.Id
|
||||
option.ComputeCard = computeCard
|
||||
return nil
|
||||
}
|
||||
} else {
|
||||
continue
|
||||
}
|
||||
} else {
|
||||
ns := strings.Split(spec.Name, COMMA)
|
||||
if len(ns) != 4 {
|
||||
continue
|
||||
}
|
||||
cardSpecs := strings.Split(ns[0], STAR)
|
||||
if cardSpecs[1] != computeCard {
|
||||
continue
|
||||
}
|
||||
s, err := strconv.ParseFloat(cardSpecs[0], 64)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
switch computeCard {
|
||||
case GCU:
|
||||
if cardNum == s { // 1, 4, 8
|
||||
option.ResourceId = spec.Id
|
||||
option.ComputeCard = computeCard
|
||||
return nil
|
||||
}
|
||||
if 1 < cardNum && cardNum <= 4 && s == 4 {
|
||||
option.ResourceId = spec.Id
|
||||
option.ComputeCard = computeCard
|
||||
return nil
|
||||
}
|
||||
if 4 < cardNum && s == 8 {
|
||||
option.ResourceId = spec.Id
|
||||
option.ComputeCard = computeCard
|
||||
return nil
|
||||
}
|
||||
|
||||
case MLU: // 1, 2, 4
|
||||
if cardNum*2 == s {
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return errors.New("set ResourceId error")
|
||||
}
|
||||
|
||||
func (o *OctopusLink) generateDatasetsId(option *option.AiOption) error {
|
||||
if option.DatasetsName == "" {
|
||||
return errors.New("DatasetsName not set")
|
||||
|
|
|
@ -45,6 +45,7 @@ const (
|
|||
UNDERSCORE = "_"
|
||||
EQUAL = "="
|
||||
COMMA = ","
|
||||
STAR = "*"
|
||||
TYPE_OCTOPUS = "1"
|
||||
TYPE_MODELARTS = "2"
|
||||
TYPE_SHUGUANGAI = "3"
|
||||
|
|
Loading…
Reference in New Issue