added api getcomputecards

Former-commit-id: 4514161574188e63305fe9066ef4ddad473a56b2
This commit is contained in:
tzwang 2024-05-11 11:22:29 +08:00
parent eaf1b1b953
commit 6e82702876
6 changed files with 70 additions and 17 deletions

View File

@ -2,6 +2,7 @@ package schedule
import ( import (
"context" "context"
"strings"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc" "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types" "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types"
@ -26,7 +27,7 @@ func NewDownloadAlgothmCodeLogic(ctx context.Context, svcCtx *svc.ServiceContext
func (l *DownloadAlgothmCodeLogic) DownloadAlgorithmCode(req *types.DownloadAlgorithmCodeReq) (resp *types.DownloadAlgorithmCodeResp, err error) { func (l *DownloadAlgothmCodeLogic) DownloadAlgorithmCode(req *types.DownloadAlgorithmCodeReq) (resp *types.DownloadAlgorithmCodeResp, err error) {
resp = &types.DownloadAlgorithmCodeResp{} resp = &types.DownloadAlgorithmCodeResp{}
code, err := l.svcCtx.Scheduler.AiService.AiCollectorAdapterMap[req.AdapterId][req.ClusterId].DownloadAlgorithmCode(l.ctx, code, err := l.svcCtx.Scheduler.AiService.AiCollectorAdapterMap[req.AdapterId][req.ClusterId].DownloadAlgorithmCode(l.ctx,
req.ResourceType, req.Card, req.TaskType, req.Dataset, req.Algorithm) req.ResourceType, strings.ToLower(req.Card), req.TaskType, req.Dataset, req.Algorithm)
if err != nil { if err != nil {
return nil, err return nil, err
} }

View File

@ -24,7 +24,12 @@ func NewGetComputeCardsByClusterLogic(ctx context.Context, svcCtx *svc.ServiceCo
} }
func (l *GetComputeCardsByClusterLogic) GetComputeCardsByCluster(req *types.GetComputeCardsByClusterReq) (resp *types.GetComputeCardsByClusterResp, err error) { func (l *GetComputeCardsByClusterLogic) GetComputeCardsByCluster(req *types.GetComputeCardsByClusterReq) (resp *types.GetComputeCardsByClusterResp, err error) {
// todo: add your logic here and delete this line resp = &types.GetComputeCardsByClusterResp{}
cards, err := l.svcCtx.Scheduler.AiService.AiCollectorAdapterMap[req.AdapterId][req.ClusterId].GetComputeCards(l.ctx)
return if err != nil {
return nil, err
}
resp.Cards = cards
return resp, nil
} }

View File

@ -10,6 +10,7 @@ type AiCollector interface {
GetTrainingTask(ctx context.Context, taskId string) (*Task, error) GetTrainingTask(ctx context.Context, taskId string) (*Task, error)
DownloadAlgorithmCode(ctx context.Context, resourceType string, card string, taskType string, dataset string, algorithm string) (string, error) DownloadAlgorithmCode(ctx context.Context, resourceType string, card string, taskType string, dataset string, algorithm string) (string, error)
UploadAlgorithmCode(ctx context.Context, resourceType string, card string, taskType string, dataset string, algorithm string, code string) error UploadAlgorithmCode(ctx context.Context, resourceType string, card string, taskType string, dataset string, algorithm string, code string) error
GetComputeCards(ctx context.Context) ([]string, error)
} }
type ResourceStats struct { type ResourceStats struct {

View File

@ -162,6 +162,10 @@ func (m *ModelArtsLink) GetAlgorithms(ctx context.Context) ([]*collector.Algorit
return nil, nil return nil, nil
} }
func (m *ModelArtsLink) GetComputeCards(ctx context.Context) ([]string, error) {
return nil, nil
}
func (m *ModelArtsLink) DownloadAlgorithmCode(ctx context.Context, resourceType string, card string, taskType string, dataset string, algorithm string) (string, error) { func (m *ModelArtsLink) DownloadAlgorithmCode(ctx context.Context, resourceType string, card string, taskType string, dataset string, algorithm string) (string, error) {
return "", nil return "", nil
} }

View File

@ -46,12 +46,14 @@ const (
SUIYUAN = "suiyuan" SUIYUAN = "suiyuan"
SAILINGSI = "sailingsi" SAILINGSI = "sailingsi"
MLU = "MLU" MLU = "MLU"
BIV100 = "BI-V100"
CAMBRICONMLU290 = 256 CAMBRICONMLU290 = 256
GCU = "GCU" GCU = "GCU"
ENFLAME = "enflame" ENFLAME = "enflame"
EnflameT20 = 128 EnflameT20 = 128
BASE_TOPS = 128 BASE_TOPS = 128
CAMBRICON = "cambricon" CAMBRICON = "cambricon"
TIANSHU = "天数"
TRAIN_CMD = "cd /code; python train.py" TRAIN_CMD = "cd /code; python train.py"
VERSION = "V1" VERSION = "V1"
DOMAIN = "http://192.168.242.41:8001/" DOMAIN = "http://192.168.242.41:8001/"
@ -61,6 +63,7 @@ var (
cardAliasMap = map[string]string{ cardAliasMap = map[string]string{
MLU: CAMBRICON, MLU: CAMBRICON,
GCU: ENFLAME, GCU: ENFLAME,
BIV100: TIANSHU,
} }
cardTopsMap = map[string]float64{ cardTopsMap = map[string]float64{
MLU: CAMBRICONMLU290, MLU: CAMBRICONMLU290,
@ -340,11 +343,54 @@ func (o *OctopusLink) GetAlgorithms(ctx context.Context) ([]*collector.Algorithm
return algorithms, nil return algorithms, nil
} }
func (o *OctopusLink) GetComputeCards(ctx context.Context) ([]string, error) {
var cards []string
for s, _ := range cardAliasMap {
cards = append(cards, s)
}
return cards, nil
}
func (o *OctopusLink) DownloadAlgorithmCode(ctx context.Context, resourceType string, card string, taskType string, dataset string, algorithm string) (string, error) { func (o *OctopusLink) DownloadAlgorithmCode(ctx context.Context, resourceType string, card string, taskType string, dataset string, algorithm string) (string, error) {
var name string
if resourceType == CARD {
name = dataset + UNDERSCORE + algorithm + UNDERSCORE + card
} else {
name = dataset + UNDERSCORE + algorithm + UNDERSCORE + CPU
}
req := &octopus.GetMyAlgorithmListReq{
Platform: o.platform,
PageIndex: o.pageIndex,
PageSize: o.pageSize,
}
resp, err := o.octopusRpc.GetMyAlgorithmList(ctx, req)
if err != nil {
return "", err
}
if !resp.Success {
return "", errors.New("failed to get algorithmList")
}
var algorithmId string
for _, a := range resp.Payload.Algorithms {
if strings.ToLower(a.FrameworkName) != taskType {
continue
}
if a.AlgorithmName == name {
algorithmId = a.AlgorithmId
break
}
}
if algorithmId == "" {
return "", errors.New("algorithmId not found")
}
dcReq := &octopus.DownloadCompressReq{ dcReq := &octopus.DownloadCompressReq{
Platform: o.platform, Platform: o.platform,
Version: VERSION, Version: VERSION,
AlgorithmId: "", AlgorithmId: algorithmId,
} }
dcResp, err := o.octopusRpc.DownloadCompress(ctx, dcReq) dcResp, err := o.octopusRpc.DownloadCompress(ctx, dcReq)
if err != nil { if err != nil {
@ -358,7 +404,7 @@ func (o *OctopusLink) DownloadAlgorithmCode(ctx context.Context, resourceType st
daReq := &octopus.DownloadAlgorithmReq{ daReq := &octopus.DownloadAlgorithmReq{
Platform: o.platform, Platform: o.platform,
Version: VERSION, Version: VERSION,
AlgorithmId: "", AlgorithmId: algorithmId,
CompressAt: dcResp.Payload.CompressAt, CompressAt: dcResp.Payload.CompressAt,
Domain: DOMAIN, Domain: DOMAIN,
} }
@ -591,16 +637,6 @@ func (o *OctopusLink) generateImageId(ctx context.Context, option *option.AiOpti
} }
func (o *OctopusLink) generateAlgorithmId(ctx context.Context, option *option.AiOption) error { func (o *OctopusLink) generateAlgorithmId(ctx context.Context, option *option.AiOption) error {
// temporarily set algorithm to cnn
if option.AlgorithmName == "" {
switch option.DatasetsName {
case "cifar10":
option.AlgorithmName = "cnn"
case "mnist":
option.AlgorithmName = "fcn"
}
}
req := &octopus.GetMyAlgorithmListReq{ req := &octopus.GetMyAlgorithmListReq{
Platform: o.platform, Platform: o.platform,
PageIndex: o.pageIndex, PageIndex: o.pageIndex,

View File

@ -447,6 +447,12 @@ func (s *ShuguangAi) GetAlgorithms(ctx context.Context) ([]*collector.Algorithm,
return algorithms, nil return algorithms, nil
} }
func (s *ShuguangAi) GetComputeCards(ctx context.Context) ([]string, error) {
var cards []string
cards = append(cards, DCU)
return cards, nil
}
func (s *ShuguangAi) DownloadAlgorithmCode(ctx context.Context, resourceType string, card string, taskType string, dataset string, algorithm string) (string, error) { func (s *ShuguangAi) DownloadAlgorithmCode(ctx context.Context, resourceType string, card string, taskType string, dataset string, algorithm string) (string, error) {
algoName := dataset + DASH + algorithm algoName := dataset + DASH + algorithm
req := &hpcAC.GetFileReq{ req := &hpcAC.GetFileReq{