modified octopus implementations

Former-commit-id: 163c0a8fd054e1dcf77ad3262d48e70e314f1337
This commit is contained in:
tzwang 2024-02-23 17:52:02 +08:00
parent e62f734614
commit c7597592f9
4 changed files with 123 additions and 11 deletions

View File

@ -7,6 +7,8 @@ type AiOption struct {
DatasetsName string // mnist/imageNet/iris
StrategyName string
ClusterToStaticWeight map[string]int32
Tops float64
ComputeCard string
CodeType string
ImageId string

View File

@ -6,15 +6,15 @@ type AiCollector interface {
}
type ResourceStats struct {
ParticipantId int64
Name string
CpuAvail float64
MemAvail float64
DiskAvail float64
GpuAvail float64
TopsToHoursAvail map[float64]float64
Cards []Card
Balance float64
ParticipantId int64
Name string
CpuAvail float64
MemAvail float64
DiskAvail float64
GpuAvail float64
CardToHours map[Card]float64
CpuToHours map[int]float64
Balance float64
}
type Card struct {

View File

@ -22,6 +22,8 @@ import (
"gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/svc"
"gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/utils"
"gitlink.org.cn/jcce-pcm/pcm-participant-octopus/octopus"
"math"
"strconv"
"strings"
)
@ -42,8 +44,11 @@ const (
HANWUJI = "hanwuji"
SUIYUAN = "suiyuan"
SAILINGSI = "sailingsi"
MLU = "mlu"
MLU = "MLU"
CAMBRICONMLU290 = 256
GCU = "GCU"
EnflameT20 = 128
BASE_TOPS = 128
)
func NewOctopusLink(ctx context.Context, svcCtx *svc.ServiceContext, name string, id int64) *OctopusLink {
@ -226,8 +231,9 @@ func (o *OctopusLink) GetResourceStats() (*collector.ResourceStats, error) {
}
//resourceStat := collector.ResourceStats{}
//
//for _, spec := range specResp.TrainResourceSpecs {
//
//}
return nil, nil
@ -295,10 +301,113 @@ func (o *OctopusLink) GenerateSubmitParams(option *option.AiOption) error {
}
func (o *OctopusLink) generateResourceId(option *option.AiOption) error {
if option.ResourceType == "" {
return errors.New("ResourceType not set")
}
req := &octopus.GetResourceSpecsReq{
Platform: o.platform,
ResourcePool: RESOURCE_POOL,
}
specResp, err := o.svcCtx.OctopusRpc.GetResourceSpecs(o.ctx, req)
if err != nil {
return err
}
if !specResp.Success {
return errors.New(specResp.Error.Message)
}
if option.ResourceType == CPU {
for _, spec := range specResp.TrainResourceSpecs {
if spec.Price == 0 {
option.ResourceId = spec.Id
return nil
}
}
}
if option.ResourceType == CARD {
err = setResourceIdByCard(option, specResp, GCU)
if err != nil {
return err
}
}
return nil
}
func setResourceIdByCard(option *option.AiOption, specs *octopus.GetResourceSpecsResp, computeCard string) error {
if option.Tops == 0 {
for _, spec := range specs.TrainResourceSpecs {
if spec.Price == 1 {
ns := strings.Split(spec.Name, COMMA)
cardSpecs := strings.Split(ns[0], STAR)
if cardSpecs[1] == computeCard {
option.ResourceId = spec.Id
option.ComputeCard = computeCard
return nil
}
} else {
continue
}
}
} else {
cardNum := math.Ceil(option.Tops / float64(BASE_TOPS))
for _, spec := range specs.TrainResourceSpecs {
if option.Tops < BASE_TOPS {
if spec.Price == 1 {
ns := strings.Split(spec.Name, COMMA)
cardSpecs := strings.Split(ns[0], STAR)
if cardSpecs[1] == computeCard {
option.ResourceId = spec.Id
option.ComputeCard = computeCard
return nil
}
} else {
continue
}
} else {
ns := strings.Split(spec.Name, COMMA)
if len(ns) != 4 {
continue
}
cardSpecs := strings.Split(ns[0], STAR)
if cardSpecs[1] != computeCard {
continue
}
s, err := strconv.ParseFloat(cardSpecs[0], 64)
if err != nil {
return err
}
switch computeCard {
case GCU:
if cardNum == s { // 1, 4, 8
option.ResourceId = spec.Id
option.ComputeCard = computeCard
return nil
}
if 1 < cardNum && cardNum <= 4 && s == 4 {
option.ResourceId = spec.Id
option.ComputeCard = computeCard
return nil
}
if 4 < cardNum && s == 8 {
option.ResourceId = spec.Id
option.ComputeCard = computeCard
return nil
}
case MLU: // 1, 2, 4
if cardNum*2 == s {
}
}
}
}
}
return errors.New("set ResourceId error")
}
func (o *OctopusLink) generateDatasetsId(option *option.AiOption) error {
if option.DatasetsName == "" {
return errors.New("DatasetsName not set")

View File

@ -45,6 +45,7 @@ const (
UNDERSCORE = "_"
EQUAL = "="
COMMA = ","
STAR = "*"
TYPE_OCTOPUS = "1"
TYPE_MODELARTS = "2"
TYPE_SHUGUANGAI = "3"