modified octopus implementations

Former-commit-id: 163c0a8fd054e1dcf77ad3262d48e70e314f1337
This commit is contained in:
tzwang 2024-02-23 17:52:02 +08:00
parent e62f734614
commit c7597592f9
4 changed files with 123 additions and 11 deletions

View File

@ -7,6 +7,8 @@ type AiOption struct {
DatasetsName string // mnist/imageNet/iris DatasetsName string // mnist/imageNet/iris
StrategyName string StrategyName string
ClusterToStaticWeight map[string]int32 ClusterToStaticWeight map[string]int32
Tops float64
ComputeCard string
CodeType string CodeType string
ImageId string ImageId string

View File

@ -6,15 +6,15 @@ type AiCollector interface {
} }
type ResourceStats struct { type ResourceStats struct {
ParticipantId int64 ParticipantId int64
Name string Name string
CpuAvail float64 CpuAvail float64
MemAvail float64 MemAvail float64
DiskAvail float64 DiskAvail float64
GpuAvail float64 GpuAvail float64
TopsToHoursAvail map[float64]float64 CardToHours map[Card]float64
Cards []Card CpuToHours map[int]float64
Balance float64 Balance float64
} }
type Card struct { type Card struct {

View File

@ -22,6 +22,8 @@ import (
"gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/svc" "gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/svc"
"gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/utils" "gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/utils"
"gitlink.org.cn/jcce-pcm/pcm-participant-octopus/octopus" "gitlink.org.cn/jcce-pcm/pcm-participant-octopus/octopus"
"math"
"strconv"
"strings" "strings"
) )
@ -42,8 +44,11 @@ const (
HANWUJI = "hanwuji" HANWUJI = "hanwuji"
SUIYUAN = "suiyuan" SUIYUAN = "suiyuan"
SAILINGSI = "sailingsi" SAILINGSI = "sailingsi"
MLU = "mlu" MLU = "MLU"
CAMBRICONMLU290 = 256 CAMBRICONMLU290 = 256
GCU = "GCU"
EnflameT20 = 128
BASE_TOPS = 128
) )
func NewOctopusLink(ctx context.Context, svcCtx *svc.ServiceContext, name string, id int64) *OctopusLink { func NewOctopusLink(ctx context.Context, svcCtx *svc.ServiceContext, name string, id int64) *OctopusLink {
@ -226,8 +231,9 @@ func (o *OctopusLink) GetResourceStats() (*collector.ResourceStats, error) {
} }
//resourceStat := collector.ResourceStats{} //resourceStat := collector.ResourceStats{}
//
//for _, spec := range specResp.TrainResourceSpecs { //for _, spec := range specResp.TrainResourceSpecs {
//
//} //}
return nil, nil return nil, nil
@ -295,10 +301,113 @@ func (o *OctopusLink) GenerateSubmitParams(option *option.AiOption) error {
} }
func (o *OctopusLink) generateResourceId(option *option.AiOption) error { func (o *OctopusLink) generateResourceId(option *option.AiOption) error {
if option.ResourceType == "" {
return errors.New("ResourceType not set")
}
req := &octopus.GetResourceSpecsReq{
Platform: o.platform,
ResourcePool: RESOURCE_POOL,
}
specResp, err := o.svcCtx.OctopusRpc.GetResourceSpecs(o.ctx, req)
if err != nil {
return err
}
if !specResp.Success {
return errors.New(specResp.Error.Message)
}
if option.ResourceType == CPU {
for _, spec := range specResp.TrainResourceSpecs {
if spec.Price == 0 {
option.ResourceId = spec.Id
return nil
}
}
}
if option.ResourceType == CARD {
err = setResourceIdByCard(option, specResp, GCU)
if err != nil {
return err
}
}
return nil return nil
} }
func setResourceIdByCard(option *option.AiOption, specs *octopus.GetResourceSpecsResp, computeCard string) error {
if option.Tops == 0 {
for _, spec := range specs.TrainResourceSpecs {
if spec.Price == 1 {
ns := strings.Split(spec.Name, COMMA)
cardSpecs := strings.Split(ns[0], STAR)
if cardSpecs[1] == computeCard {
option.ResourceId = spec.Id
option.ComputeCard = computeCard
return nil
}
} else {
continue
}
}
} else {
cardNum := math.Ceil(option.Tops / float64(BASE_TOPS))
for _, spec := range specs.TrainResourceSpecs {
if option.Tops < BASE_TOPS {
if spec.Price == 1 {
ns := strings.Split(spec.Name, COMMA)
cardSpecs := strings.Split(ns[0], STAR)
if cardSpecs[1] == computeCard {
option.ResourceId = spec.Id
option.ComputeCard = computeCard
return nil
}
} else {
continue
}
} else {
ns := strings.Split(spec.Name, COMMA)
if len(ns) != 4 {
continue
}
cardSpecs := strings.Split(ns[0], STAR)
if cardSpecs[1] != computeCard {
continue
}
s, err := strconv.ParseFloat(cardSpecs[0], 64)
if err != nil {
return err
}
switch computeCard {
case GCU:
if cardNum == s { // 1, 4, 8
option.ResourceId = spec.Id
option.ComputeCard = computeCard
return nil
}
if 1 < cardNum && cardNum <= 4 && s == 4 {
option.ResourceId = spec.Id
option.ComputeCard = computeCard
return nil
}
if 4 < cardNum && s == 8 {
option.ResourceId = spec.Id
option.ComputeCard = computeCard
return nil
}
case MLU: // 1, 2, 4
if cardNum*2 == s {
}
}
}
}
}
return errors.New("set ResourceId error")
}
func (o *OctopusLink) generateDatasetsId(option *option.AiOption) error { func (o *OctopusLink) generateDatasetsId(option *option.AiOption) error {
if option.DatasetsName == "" { if option.DatasetsName == "" {
return errors.New("DatasetsName not set") return errors.New("DatasetsName not set")

View File

@ -45,6 +45,7 @@ const (
UNDERSCORE = "_" UNDERSCORE = "_"
EQUAL = "=" EQUAL = "="
COMMA = "," COMMA = ","
STAR = "*"
TYPE_OCTOPUS = "1" TYPE_OCTOPUS = "1"
TYPE_MODELARTS = "2" TYPE_MODELARTS = "2"
TYPE_SHUGUANGAI = "3" TYPE_SHUGUANGAI = "3"