added api getcomputecards
Former-commit-id: 4514161574188e63305fe9066ef4ddad473a56b2
This commit is contained in:
parent
eaf1b1b953
commit
6e82702876
|
@ -2,6 +2,7 @@ package schedule
|
|||
|
||||
import (
|
||||
"context"
|
||||
"strings"
|
||||
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types"
|
||||
|
@ -26,7 +27,7 @@ func NewDownloadAlgothmCodeLogic(ctx context.Context, svcCtx *svc.ServiceContext
|
|||
func (l *DownloadAlgothmCodeLogic) DownloadAlgorithmCode(req *types.DownloadAlgorithmCodeReq) (resp *types.DownloadAlgorithmCodeResp, err error) {
|
||||
resp = &types.DownloadAlgorithmCodeResp{}
|
||||
code, err := l.svcCtx.Scheduler.AiService.AiCollectorAdapterMap[req.AdapterId][req.ClusterId].DownloadAlgorithmCode(l.ctx,
|
||||
req.ResourceType, req.Card, req.TaskType, req.Dataset, req.Algorithm)
|
||||
req.ResourceType, strings.ToLower(req.Card), req.TaskType, req.Dataset, req.Algorithm)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
|
|
@ -24,7 +24,12 @@ func NewGetComputeCardsByClusterLogic(ctx context.Context, svcCtx *svc.ServiceCo
|
|||
}
|
||||
|
||||
func (l *GetComputeCardsByClusterLogic) GetComputeCardsByCluster(req *types.GetComputeCardsByClusterReq) (resp *types.GetComputeCardsByClusterResp, err error) {
|
||||
// todo: add your logic here and delete this line
|
||||
resp = &types.GetComputeCardsByClusterResp{}
|
||||
cards, err := l.svcCtx.Scheduler.AiService.AiCollectorAdapterMap[req.AdapterId][req.ClusterId].GetComputeCards(l.ctx)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
resp.Cards = cards
|
||||
|
||||
return
|
||||
return resp, nil
|
||||
}
|
||||
|
|
|
@ -10,6 +10,7 @@ type AiCollector interface {
|
|||
GetTrainingTask(ctx context.Context, taskId string) (*Task, error)
|
||||
DownloadAlgorithmCode(ctx context.Context, resourceType string, card string, taskType string, dataset string, algorithm string) (string, error)
|
||||
UploadAlgorithmCode(ctx context.Context, resourceType string, card string, taskType string, dataset string, algorithm string, code string) error
|
||||
GetComputeCards(ctx context.Context) ([]string, error)
|
||||
}
|
||||
|
||||
type ResourceStats struct {
|
||||
|
|
|
@ -162,6 +162,10 @@ func (m *ModelArtsLink) GetAlgorithms(ctx context.Context) ([]*collector.Algorit
|
|||
return nil, nil
|
||||
}
|
||||
|
||||
func (m *ModelArtsLink) GetComputeCards(ctx context.Context) ([]string, error) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (m *ModelArtsLink) DownloadAlgorithmCode(ctx context.Context, resourceType string, card string, taskType string, dataset string, algorithm string) (string, error) {
|
||||
return "", nil
|
||||
}
|
||||
|
|
|
@ -46,12 +46,14 @@ const (
|
|||
SUIYUAN = "suiyuan"
|
||||
SAILINGSI = "sailingsi"
|
||||
MLU = "MLU"
|
||||
BIV100 = "BI-V100"
|
||||
CAMBRICONMLU290 = 256
|
||||
GCU = "GCU"
|
||||
ENFLAME = "enflame"
|
||||
EnflameT20 = 128
|
||||
BASE_TOPS = 128
|
||||
CAMBRICON = "cambricon"
|
||||
TIANSHU = "天数"
|
||||
TRAIN_CMD = "cd /code; python train.py"
|
||||
VERSION = "V1"
|
||||
DOMAIN = "http://192.168.242.41:8001/"
|
||||
|
@ -59,8 +61,9 @@ const (
|
|||
|
||||
var (
|
||||
cardAliasMap = map[string]string{
|
||||
MLU: CAMBRICON,
|
||||
GCU: ENFLAME,
|
||||
MLU: CAMBRICON,
|
||||
GCU: ENFLAME,
|
||||
BIV100: TIANSHU,
|
||||
}
|
||||
cardTopsMap = map[string]float64{
|
||||
MLU: CAMBRICONMLU290,
|
||||
|
@ -340,11 +343,54 @@ func (o *OctopusLink) GetAlgorithms(ctx context.Context) ([]*collector.Algorithm
|
|||
return algorithms, nil
|
||||
}
|
||||
|
||||
func (o *OctopusLink) GetComputeCards(ctx context.Context) ([]string, error) {
|
||||
var cards []string
|
||||
for s, _ := range cardAliasMap {
|
||||
cards = append(cards, s)
|
||||
}
|
||||
return cards, nil
|
||||
}
|
||||
|
||||
func (o *OctopusLink) DownloadAlgorithmCode(ctx context.Context, resourceType string, card string, taskType string, dataset string, algorithm string) (string, error) {
|
||||
var name string
|
||||
if resourceType == CARD {
|
||||
name = dataset + UNDERSCORE + algorithm + UNDERSCORE + card
|
||||
} else {
|
||||
name = dataset + UNDERSCORE + algorithm + UNDERSCORE + CPU
|
||||
}
|
||||
|
||||
req := &octopus.GetMyAlgorithmListReq{
|
||||
Platform: o.platform,
|
||||
PageIndex: o.pageIndex,
|
||||
PageSize: o.pageSize,
|
||||
}
|
||||
resp, err := o.octopusRpc.GetMyAlgorithmList(ctx, req)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
if !resp.Success {
|
||||
return "", errors.New("failed to get algorithmList")
|
||||
}
|
||||
|
||||
var algorithmId string
|
||||
for _, a := range resp.Payload.Algorithms {
|
||||
if strings.ToLower(a.FrameworkName) != taskType {
|
||||
continue
|
||||
}
|
||||
if a.AlgorithmName == name {
|
||||
algorithmId = a.AlgorithmId
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if algorithmId == "" {
|
||||
return "", errors.New("algorithmId not found")
|
||||
}
|
||||
|
||||
dcReq := &octopus.DownloadCompressReq{
|
||||
Platform: o.platform,
|
||||
Version: VERSION,
|
||||
AlgorithmId: "",
|
||||
AlgorithmId: algorithmId,
|
||||
}
|
||||
dcResp, err := o.octopusRpc.DownloadCompress(ctx, dcReq)
|
||||
if err != nil {
|
||||
|
@ -358,7 +404,7 @@ func (o *OctopusLink) DownloadAlgorithmCode(ctx context.Context, resourceType st
|
|||
daReq := &octopus.DownloadAlgorithmReq{
|
||||
Platform: o.platform,
|
||||
Version: VERSION,
|
||||
AlgorithmId: "",
|
||||
AlgorithmId: algorithmId,
|
||||
CompressAt: dcResp.Payload.CompressAt,
|
||||
Domain: DOMAIN,
|
||||
}
|
||||
|
@ -591,16 +637,6 @@ func (o *OctopusLink) generateImageId(ctx context.Context, option *option.AiOpti
|
|||
}
|
||||
|
||||
func (o *OctopusLink) generateAlgorithmId(ctx context.Context, option *option.AiOption) error {
|
||||
// temporarily set algorithm to cnn
|
||||
if option.AlgorithmName == "" {
|
||||
switch option.DatasetsName {
|
||||
case "cifar10":
|
||||
option.AlgorithmName = "cnn"
|
||||
case "mnist":
|
||||
option.AlgorithmName = "fcn"
|
||||
}
|
||||
}
|
||||
|
||||
req := &octopus.GetMyAlgorithmListReq{
|
||||
Platform: o.platform,
|
||||
PageIndex: o.pageIndex,
|
||||
|
|
|
@ -447,6 +447,12 @@ func (s *ShuguangAi) GetAlgorithms(ctx context.Context) ([]*collector.Algorithm,
|
|||
return algorithms, nil
|
||||
}
|
||||
|
||||
func (s *ShuguangAi) GetComputeCards(ctx context.Context) ([]string, error) {
|
||||
var cards []string
|
||||
cards = append(cards, DCU)
|
||||
return cards, nil
|
||||
}
|
||||
|
||||
func (s *ShuguangAi) DownloadAlgorithmCode(ctx context.Context, resourceType string, card string, taskType string, dataset string, algorithm string) (string, error) {
|
||||
algoName := dataset + DASH + algorithm
|
||||
req := &hpcAC.GetFileReq{
|
||||
|
|
Loading…
Reference in New Issue