Merge pull request 'added schedule.api into pcm.api' (#22) from tzwang/pcm-coordinator:master into master
Former-commit-id: 9652fc452b4046735f46279911690deb3735fdaf
This commit is contained in:
commit
a878233ccf
|
@ -8,6 +8,7 @@ import (
|
||||||
"vm/pcm-vm.api"
|
"vm/pcm-vm.api"
|
||||||
"cloud/pcm-cloud.api"
|
"cloud/pcm-cloud.api"
|
||||||
"storelink/pcm-storelink.api"
|
"storelink/pcm-storelink.api"
|
||||||
|
"schedule/pcm-schedule.api"
|
||||||
)
|
)
|
||||||
|
|
||||||
info(
|
info(
|
||||||
|
@ -617,4 +618,26 @@ service pcm {
|
||||||
|
|
||||||
@handler GetClusterHandler
|
@handler GetClusterHandler
|
||||||
get /adapter/cluster/get (ClusterDelReq) returns (ClusterResp)
|
get /adapter/cluster/get (ClusterDelReq) returns (ClusterResp)
|
||||||
|
}
|
||||||
|
|
||||||
|
@server(
|
||||||
|
prefix: pcm/v1
|
||||||
|
group : schedule
|
||||||
|
)
|
||||||
|
|
||||||
|
service pcm {
|
||||||
|
@handler ScheduleGetAiResourceTypesHandler
|
||||||
|
get /schedule/ai/getResourceTypes returns (AiResourceTypesResp)
|
||||||
|
|
||||||
|
@handler ScheduleGetAiTaskTypesHandler
|
||||||
|
get /schedule/ai/getTaskTypes returns (AiTaskTypesResp)
|
||||||
|
|
||||||
|
@handler ScheduleGetDatasetsHandler
|
||||||
|
get /schedule/ai/getDatasets returns (AiDatasetsResp)
|
||||||
|
|
||||||
|
@handler ScheduleGetStrategyHandler
|
||||||
|
get /schedule/ai/getStrategies returns (AiStrategyResp)
|
||||||
|
|
||||||
|
@handler ScheduleSubmitHandler
|
||||||
|
post /schedule/submit (ScheduleResp) returns (ScheduleResp)
|
||||||
}
|
}
|
|
@ -0,0 +1,44 @@
|
||||||
|
syntax = "v1"
|
||||||
|
|
||||||
|
info(
|
||||||
|
title: "schedule"
|
||||||
|
desc: "调度服务"
|
||||||
|
author: "tzwang"
|
||||||
|
email: "tzwang@qq.com"
|
||||||
|
)
|
||||||
|
|
||||||
|
type (
|
||||||
|
ScheduleReq {
|
||||||
|
AiOption *AiOption `json:"aiOption,optional"`
|
||||||
|
}
|
||||||
|
|
||||||
|
ScheduleResp {
|
||||||
|
Success bool `json:"success"`
|
||||||
|
TaskId string `json:"taskId"`
|
||||||
|
ClusterId string `json:"clusterId"`
|
||||||
|
ErrorMsg string `json:"errorMsg"`
|
||||||
|
}
|
||||||
|
|
||||||
|
AiOption {
|
||||||
|
ResourceType string `json:"resourceType"`
|
||||||
|
TaskType string `json:"taskType"`
|
||||||
|
Datasets string `json:"datasets"`
|
||||||
|
Strategy string `json:"strategy"`
|
||||||
|
}
|
||||||
|
|
||||||
|
AiResourceTypesResp {
|
||||||
|
ResourceTypes []string `json:"resourceTypes"`
|
||||||
|
}
|
||||||
|
|
||||||
|
AiTaskTypesResp {
|
||||||
|
TaskTypes []string `json:"taskTypes"`
|
||||||
|
}
|
||||||
|
|
||||||
|
AiDatasetsResp {
|
||||||
|
Datasets []string `json:"datasets"`
|
||||||
|
}
|
||||||
|
|
||||||
|
AiStrategyResp {
|
||||||
|
Strategies []string `json:"strategies"`
|
||||||
|
}
|
||||||
|
)
|
|
@ -67,7 +67,7 @@ func (l *SubmitLinkTaskLogic) SubmitLinkTask(req *types.SubmitLinkTaskReq) (resp
|
||||||
envs = append(envs, env)
|
envs = append(envs, env)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
task, err := storelink.ILinkage.SubmitTask(req.ImageId, req.Cmd, envs, params, req.ResourceId, "")
|
task, err := storelink.ILinkage.SubmitTask(req.ImageId, req.Cmd, envs, params, req.ResourceId, "pytorch")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
|
@ -37,13 +37,12 @@ func NewAiMq(ctx context.Context, svcCtx *svc.ServiceContext) *AiQueue {
|
||||||
return &AiQueue{
|
return &AiQueue{
|
||||||
ctx: ctx,
|
ctx: ctx,
|
||||||
svcCtx: svcCtx,
|
svcCtx: svcCtx,
|
||||||
scheduler: scheduler.NewScheduler2(aiCollectorMap, nil, aiExecutorMap),
|
scheduler: scheduler.NewSchdlr(aiCollectorMap, nil, aiExecutorMap),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (l *AiQueue) Consume(val string) error {
|
func (l *AiQueue) Consume(val string) error {
|
||||||
// 接受消息, 根据标签筛选过滤
|
aiSchdl, _ := schedulers.NewAiScheduler(val, l.scheduler, nil)
|
||||||
aiSchdl, _ := schedulers.NewAiScheduler(val, l.scheduler)
|
|
||||||
|
|
||||||
// 调度算法
|
// 调度算法
|
||||||
err := l.scheduler.AssignAndSchedule(aiSchdl)
|
err := l.scheduler.AssignAndSchedule(aiSchdl)
|
||||||
|
|
|
@ -44,6 +44,34 @@ func Intersect(slice1, slice2 []int64) []int64 {
|
||||||
return nn
|
return nn
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func IntersectString(slice1, slice2 []string) []string {
|
||||||
|
k := make(map[string]int)
|
||||||
|
for _, num := range slice1 {
|
||||||
|
k[num]++
|
||||||
|
}
|
||||||
|
var output []string
|
||||||
|
for _, num := range slice2 {
|
||||||
|
if k[num] > 0 {
|
||||||
|
output = append(output, num)
|
||||||
|
k[num]--
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return output
|
||||||
|
}
|
||||||
|
|
||||||
|
func RemoveDuplicates(slc []string) []string {
|
||||||
|
keys := make(map[string]bool)
|
||||||
|
list := []string{}
|
||||||
|
|
||||||
|
for _, entry := range slc {
|
||||||
|
if _, value := keys[entry]; !value {
|
||||||
|
keys[entry] = true
|
||||||
|
list = append(list, entry)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return list
|
||||||
|
}
|
||||||
|
|
||||||
func MicsSlice(origin []int64, count int) []int64 {
|
func MicsSlice(origin []int64, count int) []int64 {
|
||||||
tmpOrigin := make([]int64, len(origin))
|
tmpOrigin := make([]int64, len(origin))
|
||||||
copy(tmpOrigin, origin)
|
copy(tmpOrigin, origin)
|
||||||
|
|
|
@ -1,8 +1,19 @@
|
||||||
package database
|
package database
|
||||||
|
|
||||||
|
import (
|
||||||
|
"github.com/zeromicro/go-zero/core/logx"
|
||||||
|
"gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/types"
|
||||||
|
"gorm.io/gorm"
|
||||||
|
)
|
||||||
|
|
||||||
type AiStorage struct {
|
type AiStorage struct {
|
||||||
|
DbEngin *gorm.DB
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *AiStorage) getParticipants() {
|
func (s *AiStorage) GetParticipants() {
|
||||||
|
var resp types.ClusterListResp
|
||||||
|
tx := s.DbEngin.Raw("select * from t_cluster where `deleted_at` IS NULL ORDER BY create_time Desc").Scan(&resp.Data)
|
||||||
|
if tx.Error != nil {
|
||||||
|
logx.Errorf(tx.Error.Error())
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -37,7 +37,7 @@ type Scheduler struct {
|
||||||
dbEngin *gorm.DB
|
dbEngin *gorm.DB
|
||||||
result []string //pID:子任务yamlstring 键值对
|
result []string //pID:子任务yamlstring 键值对
|
||||||
participantRpc participantservice.ParticipantService
|
participantRpc participantservice.ParticipantService
|
||||||
ResourceCollector *map[string]collector.ResourceCollector
|
ResourceCollector *map[string]collector.AiCollector
|
||||||
Storages database.Storage
|
Storages database.Storage
|
||||||
AiExecutor *map[string]executor.AiExecutor
|
AiExecutor *map[string]executor.AiExecutor
|
||||||
mu sync.RWMutex
|
mu sync.RWMutex
|
||||||
|
@ -52,7 +52,7 @@ func NewScheduler(subSchedule common.SubSchedule, val string, dbEngin *gorm.DB,
|
||||||
return &Scheduler{task: task, subSchedule: subSchedule, dbEngin: dbEngin, participantRpc: participantRpc}, nil
|
return &Scheduler{task: task, subSchedule: subSchedule, dbEngin: dbEngin, participantRpc: participantRpc}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewScheduler2(resourceCollector *map[string]collector.ResourceCollector, storages database.Storage, aiExecutor *map[string]executor.AiExecutor) *Scheduler {
|
func NewSchdlr(resourceCollector *map[string]collector.AiCollector, storages database.Storage, aiExecutor *map[string]executor.AiExecutor) *Scheduler {
|
||||||
return &Scheduler{ResourceCollector: resourceCollector, Storages: storages, AiExecutor: aiExecutor}
|
return &Scheduler{ResourceCollector: resourceCollector, Storages: storages, AiExecutor: aiExecutor}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -33,8 +33,8 @@ type AiScheduler struct {
|
||||||
option *option.AiOption
|
option *option.AiOption
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewAiScheduler(val string, scheduler *scheduler.Scheduler) (*AiScheduler, error) {
|
func NewAiScheduler(val string, scheduler *scheduler.Scheduler, option *option.AiOption) (*AiScheduler, error) {
|
||||||
return &AiScheduler{yamlString: val, Scheduler: scheduler}, nil
|
return &AiScheduler{yamlString: val, Scheduler: scheduler, option: option}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (as *AiScheduler) GetNewStructForDb(task *response.TaskInfo, resource string, participantId int64) (interface{}, error) {
|
func (as *AiScheduler) GetNewStructForDb(task *response.TaskInfo, resource string, participantId int64) (interface{}, error) {
|
||||||
|
@ -49,6 +49,11 @@ func (as *AiScheduler) GetNewStructForDb(task *response.TaskInfo, resource strin
|
||||||
}
|
}
|
||||||
|
|
||||||
func (as *AiScheduler) PickOptimalStrategy() (strategy.Strategy, error) {
|
func (as *AiScheduler) PickOptimalStrategy() (strategy.Strategy, error) {
|
||||||
|
if as.option.AiClusterId != "" {
|
||||||
|
// TODO database operation Find
|
||||||
|
return &strategy.SingleAssignment{Cluster: &strategy.AssignedCluster{ParticipantId: 0, Name: "", Replicas: 1}}, nil
|
||||||
|
}
|
||||||
|
|
||||||
resources, err := as.findClustersWithResources()
|
resources, err := as.findClustersWithResources()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
|
@ -58,13 +63,20 @@ func (as *AiScheduler) PickOptimalStrategy() (strategy.Strategy, error) {
|
||||||
}
|
}
|
||||||
params := ¶m.Params{Resources: resources}
|
params := ¶m.Params{Resources: resources}
|
||||||
|
|
||||||
if len(resources) < 2 /*|| as.task */ {
|
if len(resources) == 1 {
|
||||||
strategy := strategy.NewReplicationStrategy(¶m.ReplicationParams{Params: params /*, Replicas: 1*/})
|
return &strategy.SingleAssignment{Cluster: &strategy.AssignedCluster{ParticipantId: 0, Name: "", Replicas: 1}}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
switch as.option.StrategyName {
|
||||||
|
case strategy.REPLICATION:
|
||||||
|
strategy := strategy.NewReplicationStrategy(¶m.ReplicationParams{Params: params, Replicas: 1})
|
||||||
|
return strategy, nil
|
||||||
|
case strategy.RESOURCE_PRICING:
|
||||||
|
strategy := strategy.NewPricingStrategy(¶m.ResourcePricingParams{Params: params, Replicas: 1})
|
||||||
return strategy, nil
|
return strategy, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
strategy := strategy.NewPricingStrategy(¶m.ResourcePricingParams{Params: params})
|
return nil, errors.New("no strategy has been chosen")
|
||||||
return strategy, nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (as *AiScheduler) AssignTask(clusters []*strategy.AssignedCluster) error {
|
func (as *AiScheduler) AssignTask(clusters []*strategy.AssignedCluster) error {
|
||||||
|
@ -84,10 +96,10 @@ func (as *AiScheduler) AssignTask(clusters []*strategy.AssignedCluster) error {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (as *AiScheduler) findClustersWithResources() ([]*collector.ResourceSpecs, error) {
|
func (as *AiScheduler) findClustersWithResources() ([]*collector.ResourceStats, error) {
|
||||||
var resourceSpecs []*collector.ResourceSpecs
|
var resourceSpecs []*collector.ResourceStats
|
||||||
for _, resourceCollector := range *as.ResourceCollector {
|
for _, resourceCollector := range *as.ResourceCollector {
|
||||||
spec, err := resourceCollector.GetResourceSpecs()
|
spec, err := resourceCollector.GetResourceStats()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,13 +1,17 @@
|
||||||
package option
|
package option
|
||||||
|
|
||||||
type AiOption struct {
|
type AiOption struct {
|
||||||
AiType string // shuguangAi/octopus
|
AiClusterId string // shuguangAi /octopus ClusterId
|
||||||
ResourceType string // cpu/gpu/compute card
|
ResourceType string // cpu/gpu/compute card
|
||||||
TaskType string // pytorch/tensorflow
|
TaskType string // pytorch/tensorflow/mindspore
|
||||||
|
DatasetsName string // mnist/imageNet/iris
|
||||||
|
StrategyName string
|
||||||
|
ClusterToStaticWeight map[string]int32
|
||||||
|
CodeType string
|
||||||
|
|
||||||
ImageId string
|
ImageId string
|
||||||
SpecId string
|
SpecId string
|
||||||
DatasetsId string
|
//DatasetsId string
|
||||||
CodeId string
|
CodeId string
|
||||||
ResourceId string
|
ResourceId string
|
||||||
|
|
||||||
|
@ -17,4 +21,5 @@ type AiOption struct {
|
||||||
|
|
||||||
Datasets string
|
Datasets string
|
||||||
Code string
|
Code string
|
||||||
|
Model interface{}
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,5 @@
|
||||||
|
package option
|
||||||
|
|
||||||
|
type Option struct {
|
||||||
|
Name string
|
||||||
|
}
|
|
@ -9,24 +9,24 @@ import (
|
||||||
)
|
)
|
||||||
|
|
||||||
const (
|
const (
|
||||||
OCTOPUS = "Octopus"
|
OCTOPUS = "octopus"
|
||||||
MODELARTS = "Modelarts"
|
MODELARTS = "modelarts"
|
||||||
SHUGUANGAI = "ShuguangAi"
|
SHUGUANGAI = "shuguangAi"
|
||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
AiTypeMap = map[string]string{
|
AiTypeMap = map[string]string{
|
||||||
"Hanwuji": OCTOPUS,
|
"hanwuji": OCTOPUS,
|
||||||
"Suiyan": OCTOPUS,
|
"suiyan": OCTOPUS,
|
||||||
"Sailingsi": OCTOPUS,
|
"sailingsi": OCTOPUS,
|
||||||
"Modelarts-CloudBrain2": MODELARTS,
|
"modelarts-CloudBrain2": MODELARTS,
|
||||||
"ShuguangAi": SHUGUANGAI,
|
"shuguangAi": SHUGUANGAI,
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
func InitAiClusterMap(ctx context.Context, svcCtx *svc.ServiceContext) (*map[string]executor.AiExecutor, *map[string]collector.ResourceCollector) {
|
func InitAiClusterMap(ctx context.Context, svcCtx *svc.ServiceContext) (*map[string]executor.AiExecutor, *map[string]collector.AiCollector) {
|
||||||
executorMap := make(map[string]executor.AiExecutor)
|
executorMap := make(map[string]executor.AiExecutor)
|
||||||
collectorMap := make(map[string]collector.ResourceCollector)
|
collectorMap := make(map[string]collector.AiCollector)
|
||||||
for k, v := range AiTypeMap {
|
for k, v := range AiTypeMap {
|
||||||
switch v {
|
switch v {
|
||||||
case OCTOPUS:
|
case OCTOPUS:
|
||||||
|
|
|
@ -1,10 +1,11 @@
|
||||||
package collector
|
package collector
|
||||||
|
|
||||||
type ResourceCollector interface {
|
type AiCollector interface {
|
||||||
GetResourceSpecs() (*ResourceSpecs, error)
|
GetResourceStats() (*ResourceStats, error)
|
||||||
|
GetDatasetsSpecs() ([]*DatasetsSpecs, error)
|
||||||
}
|
}
|
||||||
|
|
||||||
type ResourceSpecs struct {
|
type ResourceStats struct {
|
||||||
ParticipantId int64
|
ParticipantId int64
|
||||||
Name string
|
Name string
|
||||||
CpuAvail float64
|
CpuAvail float64
|
||||||
|
@ -20,3 +21,8 @@ type Card struct {
|
||||||
Name string
|
Name string
|
||||||
TOpsAtFp16 float64
|
TOpsAtFp16 float64
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type DatasetsSpecs struct {
|
||||||
|
Name string
|
||||||
|
Size string
|
||||||
|
}
|
||||||
|
|
|
@ -2,10 +2,8 @@ package executor
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/scheduler/schedulers/option"
|
"gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/scheduler/schedulers/option"
|
||||||
"gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/storeLink"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
type AiExecutor interface {
|
type AiExecutor interface {
|
||||||
Execute(option *option.AiOption) (interface{}, error)
|
Execute(option *option.AiOption) (interface{}, error)
|
||||||
storeLink.Linkage
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -5,5 +5,5 @@ import (
|
||||||
)
|
)
|
||||||
|
|
||||||
type Params struct {
|
type Params struct {
|
||||||
Resources []*collector.ResourceSpecs
|
Resources []*collector.ResourceStats
|
||||||
}
|
}
|
||||||
|
|
|
@ -5,13 +5,13 @@ import (
|
||||||
)
|
)
|
||||||
|
|
||||||
type ResourcePricingParams struct {
|
type ResourcePricingParams struct {
|
||||||
replicas int32
|
Replicas int32
|
||||||
task *providerPricing.Task
|
task *providerPricing.Task
|
||||||
*Params
|
*Params
|
||||||
}
|
}
|
||||||
|
|
||||||
func (r *ResourcePricingParams) GetReplicas() int32 {
|
func (r *ResourcePricingParams) GetReplicas() int32 {
|
||||||
return r.replicas
|
return r.Replicas
|
||||||
}
|
}
|
||||||
|
|
||||||
func (r *ResourcePricingParams) GetTask() *providerPricing.Task {
|
func (r *ResourcePricingParams) GetTask() *providerPricing.Task {
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
package strategy
|
package strategy
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"github.com/pkg/errors"
|
"errors"
|
||||||
"gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/scheduler/entity"
|
"gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/scheduler/entity"
|
||||||
"gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/scheduler/strategy/param"
|
"gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/scheduler/strategy/param"
|
||||||
)
|
)
|
||||||
|
|
|
@ -0,0 +1,11 @@
|
||||||
|
package strategy
|
||||||
|
|
||||||
|
type SingleAssignment struct {
|
||||||
|
Cluster *AssignedCluster
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *SingleAssignment) Schedule() ([]*AssignedCluster, error) {
|
||||||
|
var results []*AssignedCluster
|
||||||
|
results = append(results, s.Cluster)
|
||||||
|
return results, nil
|
||||||
|
}
|
|
@ -1,5 +1,16 @@
|
||||||
package strategy
|
package strategy
|
||||||
|
|
||||||
|
const (
|
||||||
|
REPLICATION = "replication"
|
||||||
|
RESOURCE_PRICING = "resourcePricing"
|
||||||
|
STATIC_WEIGHT = "staticWeight"
|
||||||
|
DYNAMIC_WEIGHT = "dynamicWeight"
|
||||||
|
)
|
||||||
|
|
||||||
|
var (
|
||||||
|
strategyNames = []string{REPLICATION, RESOURCE_PRICING}
|
||||||
|
)
|
||||||
|
|
||||||
type Strategy interface {
|
type Strategy interface {
|
||||||
Schedule() ([]*AssignedCluster, error)
|
Schedule() ([]*AssignedCluster, error)
|
||||||
}
|
}
|
||||||
|
@ -9,3 +20,7 @@ type AssignedCluster struct {
|
||||||
Name string
|
Name string
|
||||||
Replicas int32
|
Replicas int32
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func GetStrategyNames() []string {
|
||||||
|
return strategyNames
|
||||||
|
}
|
||||||
|
|
|
@ -38,24 +38,24 @@ func NewModelArtsLink(ctx context.Context, svcCtx *svc.ServiceContext, name stri
|
||||||
return &ModelArtsLink{ctx: ctx, svcCtx: svcCtx, platform: name, participantId: id, pageIndex: 1, pageSize: 100}
|
return &ModelArtsLink{ctx: ctx, svcCtx: svcCtx, platform: name, participantId: id, pageIndex: 1, pageSize: 100}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (o *ModelArtsLink) UploadImage(path string) (interface{}, error) {
|
func (m *ModelArtsLink) UploadImage(path string) (interface{}, error) {
|
||||||
//TODO modelArts上传镜像
|
//TODO modelArts上传镜像
|
||||||
return nil, nil
|
return nil, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (o *ModelArtsLink) DeleteImage(imageId string) (interface{}, error) {
|
func (m *ModelArtsLink) DeleteImage(imageId string) (interface{}, error) {
|
||||||
// TODO modelArts删除镜像
|
// TODO modelArts删除镜像
|
||||||
return nil, nil
|
return nil, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (o *ModelArtsLink) QueryImageList() (interface{}, error) {
|
func (m *ModelArtsLink) QueryImageList() (interface{}, error) {
|
||||||
// modelArts获取镜像列表
|
// modelArts获取镜像列表
|
||||||
req := &modelarts.ListRepoReq{
|
req := &modelarts.ListRepoReq{
|
||||||
Offset: "0",
|
Offset: "0",
|
||||||
Limit: strconv.Itoa(int(o.pageSize)),
|
Limit: strconv.Itoa(int(m.pageSize)),
|
||||||
Platform: o.platform,
|
Platform: m.platform,
|
||||||
}
|
}
|
||||||
resp, err := o.svcCtx.ModelArtsImgRpc.ListReposDetails(o.ctx, req)
|
resp, err := m.svcCtx.ModelArtsImgRpc.ListReposDetails(m.ctx, req)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
@ -63,7 +63,7 @@ func (o *ModelArtsLink) QueryImageList() (interface{}, error) {
|
||||||
return resp, nil
|
return resp, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (o *ModelArtsLink) SubmitTask(imageId string, cmd string, envs []string, params []string, resourceId string, aiType string) (interface{}, error) {
|
func (m *ModelArtsLink) SubmitTask(imageId string, cmd string, envs []string, params []string, resourceId string, aiType string) (interface{}, error) {
|
||||||
// modelArts提交任务
|
// modelArts提交任务
|
||||||
environments := make(map[string]string)
|
environments := make(map[string]string)
|
||||||
parameters := make([]*modelarts.ParametersTrainJob, 0)
|
parameters := make([]*modelarts.ParametersTrainJob, 0)
|
||||||
|
@ -98,9 +98,9 @@ func (o *ModelArtsLink) SubmitTask(imageId string, cmd string, envs []string, pa
|
||||||
NodeCount: 1,
|
NodeCount: 1,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
Platform: o.platform,
|
Platform: m.platform,
|
||||||
}
|
}
|
||||||
resp, err := o.svcCtx.ModelArtsRpc.CreateTrainingJob(o.ctx, req)
|
resp, err := m.svcCtx.ModelArtsRpc.CreateTrainingJob(m.ctx, req)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
@ -108,13 +108,13 @@ func (o *ModelArtsLink) SubmitTask(imageId string, cmd string, envs []string, pa
|
||||||
return resp, nil
|
return resp, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (o *ModelArtsLink) QueryTask(taskId string) (interface{}, error) {
|
func (m *ModelArtsLink) QueryTask(taskId string) (interface{}, error) {
|
||||||
// 获取任务
|
// 获取任务
|
||||||
req := &modelarts.DetailTrainingJobsReq{
|
req := &modelarts.DetailTrainingJobsReq{
|
||||||
TrainingJobId: taskId,
|
TrainingJobId: taskId,
|
||||||
Platform: o.platform,
|
Platform: m.platform,
|
||||||
}
|
}
|
||||||
resp, err := o.svcCtx.ModelArtsRpc.GetTrainingJobs(o.ctx, req)
|
resp, err := m.svcCtx.ModelArtsRpc.GetTrainingJobs(m.ctx, req)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
@ -122,13 +122,13 @@ func (o *ModelArtsLink) QueryTask(taskId string) (interface{}, error) {
|
||||||
return resp, nil
|
return resp, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (o *ModelArtsLink) DeleteTask(taskId string) (interface{}, error) {
|
func (m *ModelArtsLink) DeleteTask(taskId string) (interface{}, error) {
|
||||||
// 删除任务
|
// 删除任务
|
||||||
req := &modelarts.DeleteTrainingJobReq{
|
req := &modelarts.DeleteTrainingJobReq{
|
||||||
TrainingJobId: taskId,
|
TrainingJobId: taskId,
|
||||||
Platform: o.platform,
|
Platform: m.platform,
|
||||||
}
|
}
|
||||||
resp, err := o.svcCtx.ModelArtsRpc.DeleteTrainingJob(o.ctx, req)
|
resp, err := m.svcCtx.ModelArtsRpc.DeleteTrainingJob(m.ctx, req)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
@ -136,12 +136,12 @@ func (o *ModelArtsLink) DeleteTask(taskId string) (interface{}, error) {
|
||||||
return resp, nil
|
return resp, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (o *ModelArtsLink) QuerySpecs() (interface{}, error) {
|
func (m *ModelArtsLink) QuerySpecs() (interface{}, error) {
|
||||||
// octopus查询资源规格
|
// octopus查询资源规格
|
||||||
req := &modelarts.TrainingJobFlavorsReq{
|
req := &modelarts.TrainingJobFlavorsReq{
|
||||||
Platform: o.platform,
|
Platform: m.platform,
|
||||||
}
|
}
|
||||||
resp, err := o.svcCtx.ModelArtsRpc.GetTrainingJobFlavors(o.ctx, req)
|
resp, err := m.svcCtx.ModelArtsRpc.GetTrainingJobFlavors(m.ctx, req)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
@ -149,14 +149,74 @@ func (o *ModelArtsLink) QuerySpecs() (interface{}, error) {
|
||||||
return resp, nil
|
return resp, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (o *ModelArtsLink) GetResourceSpecs() (*collector.ResourceSpecs, error) {
|
func (m *ModelArtsLink) GetResourceStats() (*collector.ResourceStats, error) {
|
||||||
return nil, nil
|
return nil, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (o *ModelArtsLink) Execute(option *option.AiOption) (interface{}, error) {
|
func (m *ModelArtsLink) GetDatasetsSpecs() ([]*collector.DatasetsSpecs, error) {
|
||||||
task, err := o.SubmitTask(option.ImageId, option.Cmd, option.Envs, option.Params, option.ResourceId, option.AiType)
|
return nil, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *ModelArtsLink) Execute(option *option.AiOption) (interface{}, error) {
|
||||||
|
err := m.GenerateSubmitParams(option)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
task, err := m.SubmitTask(option.ImageId, option.Cmd, option.Envs, option.Params, option.ResourceId, option.TaskType)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
return task, nil
|
return task, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (m *ModelArtsLink) GenerateSubmitParams(option *option.AiOption) error {
|
||||||
|
err := m.generateResourceId(option)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
err = m.generateImageId(option)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
err = m.generateCmd(option)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
err = m.generateEnv(option)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
err = m.generateParams(option)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *ModelArtsLink) generateResourceId(option *option.AiOption) error {
|
||||||
|
_, err := m.QuerySpecs()
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *ModelArtsLink) generateImageId(option *option.AiOption) error {
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *ModelArtsLink) generateCmd(option *option.AiOption) error {
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *ModelArtsLink) generateEnv(option *option.AiOption) error {
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *ModelArtsLink) generateParams(option *option.AiOption) error {
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
|
@ -16,6 +16,7 @@ package storeLink
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
|
"errors"
|
||||||
"gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/scheduler/schedulers/option"
|
"gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/scheduler/schedulers/option"
|
||||||
"gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/scheduler/service/collector"
|
"gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/scheduler/service/collector"
|
||||||
"gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/svc"
|
"gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/svc"
|
||||||
|
@ -196,14 +197,88 @@ func (o *OctopusLink) QuerySpecs() (interface{}, error) {
|
||||||
return resp, nil
|
return resp, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (o *OctopusLink) GetResourceSpecs() (*collector.ResourceSpecs, error) {
|
func (o *OctopusLink) GetResourceStats() (*collector.ResourceStats, error) {
|
||||||
return nil, nil
|
return nil, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (o *OctopusLink) GetDatasetsSpecs() ([]*collector.DatasetsSpecs, error) {
|
||||||
|
req := &octopus.GetMyDatasetListReq{
|
||||||
|
Platform: o.platform,
|
||||||
|
PageIndex: o.pageIndex,
|
||||||
|
PageSize: o.pageSize,
|
||||||
|
}
|
||||||
|
resp, err := o.svcCtx.OctopusRpc.GetMyDatasetList(o.ctx, req)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
if !resp.Success {
|
||||||
|
return nil, errors.New(resp.Error.Message)
|
||||||
|
}
|
||||||
|
specs := []*collector.DatasetsSpecs{}
|
||||||
|
for _, dataset := range resp.Payload.Datasets {
|
||||||
|
spec := &collector.DatasetsSpecs{Name: dataset.Name}
|
||||||
|
specs = append(specs, spec)
|
||||||
|
}
|
||||||
|
return specs, nil
|
||||||
|
}
|
||||||
|
|
||||||
func (o *OctopusLink) Execute(option *option.AiOption) (interface{}, error) {
|
func (o *OctopusLink) Execute(option *option.AiOption) (interface{}, error) {
|
||||||
task, err := o.SubmitTask(option.ImageId, option.Cmd, option.Envs, option.Params, option.ResourceId, option.AiType)
|
err := o.GenerateSubmitParams(option)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
task, err := o.SubmitTask(option.ImageId, option.Cmd, option.Envs, option.Params, option.ResourceId, option.TaskType)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
return task, nil
|
return task, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (o *OctopusLink) GenerateSubmitParams(option *option.AiOption) error {
|
||||||
|
err := o.generateResourceId(option)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
err = o.generateImageId(option)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
err = o.generateCmd(option)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
err = o.generateEnv(option)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
err = o.generateParams(option)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (o *OctopusLink) generateResourceId(option *option.AiOption) error {
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (o *OctopusLink) generateImageId(option *option.AiOption) error {
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (o *OctopusLink) generateCmd(option *option.AiOption) error {
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (o *OctopusLink) generateEnv(option *option.AiOption) error {
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (o *OctopusLink) generateParams(option *option.AiOption) error {
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
|
@ -48,6 +48,7 @@ const (
|
||||||
WorkPath = "/work/home/acgnnmfbwo/111111/py/"
|
WorkPath = "/work/home/acgnnmfbwo/111111/py/"
|
||||||
TimeoutLimit = "10:00:00"
|
TimeoutLimit = "10:00:00"
|
||||||
PythonCodePath = "/work/home/acgnnmfbwo/111111/py/test.py"
|
PythonCodePath = "/work/home/acgnnmfbwo/111111/py/test.py"
|
||||||
|
DATASETS_DIR = "/work/home/acgnnmfbwo/pcmv1/dataset"
|
||||||
)
|
)
|
||||||
|
|
||||||
func NewShuguangAi(ctx context.Context, svcCtx *svc.ServiceContext, name string, id int64) *ShuguangAi {
|
func NewShuguangAi(ctx context.Context, svcCtx *svc.ServiceContext, name string, id int64) *ShuguangAi {
|
||||||
|
@ -131,14 +132,30 @@ func (s *ShuguangAi) SubmitPytorchTask(imageId string, cmd string, envs []string
|
||||||
return resp, nil
|
return resp, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (s *ShuguangAi) SubmitTensorflowTask(imageId string, cmd string, envs []string, params []string, resourceId string) (interface{}, error) {
|
||||||
|
//req := &hpcAC.SubmitTensorflowTaskReq{
|
||||||
|
// Params: &hpcAC.SubmitTensorflowTaskParams{
|
||||||
|
//
|
||||||
|
// }
|
||||||
|
//}
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
|
||||||
func (s *ShuguangAi) SubmitTask(imageId string, cmd string, envs []string, params []string, resourceId string, aiType string) (interface{}, error) {
|
func (s *ShuguangAi) SubmitTask(imageId string, cmd string, envs []string, params []string, resourceId string, aiType string) (interface{}, error) {
|
||||||
// shuguangAi提交任务
|
// shuguangAi提交任务
|
||||||
if aiType == PYTORCH {
|
switch aiType {
|
||||||
|
case PYTORCH_TASK:
|
||||||
task, err := s.SubmitPytorchTask(imageId, cmd, envs, params, resourceId)
|
task, err := s.SubmitPytorchTask(imageId, cmd, envs, params, resourceId)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
return task, nil
|
return task, nil
|
||||||
|
case TENSORFLOW_TASK:
|
||||||
|
task, err := s.SubmitTensorflowTask(imageId, cmd, envs, params, resourceId)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
return task, nil
|
||||||
}
|
}
|
||||||
return nil, errors.New("shuguangAi不支持的任务类型")
|
return nil, errors.New("shuguangAi不支持的任务类型")
|
||||||
}
|
}
|
||||||
|
@ -169,13 +186,13 @@ func (s *ShuguangAi) DeleteTask(taskId string) (interface{}, error) {
|
||||||
return resp, nil
|
return resp, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (o *ShuguangAi) QuerySpecs() (interface{}, error) {
|
func (s *ShuguangAi) QuerySpecs() (interface{}, error) {
|
||||||
// ShuguangAi查询资源规格
|
// ShuguangAi查询资源规格
|
||||||
req := &hpcAC.GetResourceSpecReq{
|
req := &hpcAC.GetResourceSpecReq{
|
||||||
AcceleratorType: DCU,
|
AcceleratorType: DCU,
|
||||||
ResourceGroup: RESOURCE_GROUP,
|
ResourceGroup: RESOURCE_GROUP,
|
||||||
}
|
}
|
||||||
specs, err := o.svcCtx.ACRpc.GetResourceSpec(o.ctx, req)
|
specs, err := s.svcCtx.ACRpc.GetResourceSpec(s.ctx, req)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
@ -183,36 +200,106 @@ func (o *ShuguangAi) QuerySpecs() (interface{}, error) {
|
||||||
return specs, nil
|
return specs, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (o *ShuguangAi) GetResourceSpecs() (*collector.ResourceSpecs, error) {
|
func (s *ShuguangAi) GetResourceStats() (*collector.ResourceStats, error) {
|
||||||
userReq := &hpcAC.GetUserInfoReq{}
|
userReq := &hpcAC.GetUserInfoReq{}
|
||||||
userinfo, err := o.svcCtx.ACRpc.GetUserInfo(o.ctx, userReq)
|
userinfo, err := s.svcCtx.ACRpc.GetUserInfo(s.ctx, userReq)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
limitReq := &hpcAC.QueueReq{}
|
limitReq := &hpcAC.QueueReq{}
|
||||||
_, err = o.svcCtx.ACRpc.QueryUserQuotasLimit(o.ctx, limitReq)
|
_, err = s.svcCtx.ACRpc.QueryUserQuotasLimit(s.ctx, limitReq)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
diskReq := &hpcAC.ParaStorQuotaReq{}
|
diskReq := &hpcAC.ParaStorQuotaReq{}
|
||||||
_, err = o.svcCtx.ACRpc.ParaStorQuota(o.ctx, diskReq)
|
_, err = s.svcCtx.ACRpc.ParaStorQuota(s.ctx, diskReq)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
balance, _ := strconv.ParseFloat(userinfo.Data.AccountBalance, 64)
|
balance, _ := strconv.ParseFloat(userinfo.Data.AccountBalance, 64)
|
||||||
_ = &collector.ResourceSpecs{
|
_ = &collector.ResourceStats{
|
||||||
ParticipantId: o.participantId,
|
ParticipantId: s.participantId,
|
||||||
Name: o.platform,
|
Name: s.platform,
|
||||||
Balance: balance,
|
Balance: balance,
|
||||||
}
|
}
|
||||||
return nil, nil
|
return nil, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (o *ShuguangAi) Execute(option *option.AiOption) (interface{}, error) {
|
func (s *ShuguangAi) GetDatasetsSpecs() ([]*collector.DatasetsSpecs, error) {
|
||||||
task, err := o.SubmitTask(option.ImageId, option.Cmd, option.Envs, option.Params, option.ResourceId, option.AiType)
|
req := &hpcAC.GetFileListReq{Limit: 100, Path: DATASETS_DIR, Start: 0}
|
||||||
|
list, err := s.svcCtx.ACRpc.GetFileList(s.ctx, req)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
if list.Code != "0" {
|
||||||
|
return nil, errors.New(list.Msg)
|
||||||
|
}
|
||||||
|
specs := []*collector.DatasetsSpecs{}
|
||||||
|
for _, file := range list.Data.FileList {
|
||||||
|
spec := &collector.DatasetsSpecs{Name: file.Name, Size: strconv.FormatInt(file.Size, 10)}
|
||||||
|
specs = append(specs, spec)
|
||||||
|
}
|
||||||
|
return specs, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *ShuguangAi) Execute(option *option.AiOption) (interface{}, error) {
|
||||||
|
err := s.GenerateSubmitParams(option)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
task, err := s.SubmitTask(option.ImageId, option.Cmd, option.Envs, option.Params, option.ResourceId, option.TaskType)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
return task, nil
|
return task, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (s *ShuguangAi) GenerateSubmitParams(option *option.AiOption) error {
|
||||||
|
err := s.generateResourceId(option)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
err = s.generateImageId(option)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
err = s.generateCmd(option)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
err = s.generateEnv(option)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
err = s.generateParams(option)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *ShuguangAi) generateResourceId(option *option.AiOption) error {
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *ShuguangAi) generateImageId(option *option.AiOption) error {
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *ShuguangAi) generateCmd(option *option.AiOption) error {
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *ShuguangAi) generateEnv(option *option.AiOption) error {
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *ShuguangAi) generateParams(option *option.AiOption) error {
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
|
@ -18,6 +18,8 @@ import (
|
||||||
"context"
|
"context"
|
||||||
"github.com/pkg/errors"
|
"github.com/pkg/errors"
|
||||||
"gitlink.org.cn/jcce-pcm/pcm-ac/hpcAC"
|
"gitlink.org.cn/jcce-pcm/pcm-ac/hpcAC"
|
||||||
|
"gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/scheduler/common"
|
||||||
|
"gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/scheduler/service/collector"
|
||||||
"gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/svc"
|
"gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/svc"
|
||||||
"gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/types"
|
"gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/types"
|
||||||
"gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/models"
|
"gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/models"
|
||||||
|
@ -51,6 +53,11 @@ const (
|
||||||
MODELARTS = "Modelarts"
|
MODELARTS = "Modelarts"
|
||||||
SHUGUANGAI = "ShuguangAi"
|
SHUGUANGAI = "ShuguangAi"
|
||||||
SHUGUANGHPC = "ShuguangHpc"
|
SHUGUANGHPC = "ShuguangHpc"
|
||||||
|
CPU = "cpu"
|
||||||
|
GPU = "gpu"
|
||||||
|
CARD = "computeCard"
|
||||||
|
PYTORCH_TASK = "pytorch"
|
||||||
|
TENSORFLOW_TASK = "tensorflow"
|
||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
|
@ -65,6 +72,9 @@ var (
|
||||||
"3": SHUGUANGAI,
|
"3": SHUGUANGAI,
|
||||||
"4": SHUGUANGHPC,
|
"4": SHUGUANGHPC,
|
||||||
}
|
}
|
||||||
|
resourceTypes = []string{CPU, GPU, CARD}
|
||||||
|
taskTypes = []string{PYTORCH_TASK, TENSORFLOW_TASK}
|
||||||
|
|
||||||
ERROR_RESP_EMPTY = errors.New("resp empty error")
|
ERROR_RESP_EMPTY = errors.New("resp empty error")
|
||||||
ERROR_CONVERT_EMPTY = errors.New("convert empty error")
|
ERROR_CONVERT_EMPTY = errors.New("convert empty error")
|
||||||
)
|
)
|
||||||
|
@ -104,6 +114,44 @@ func GetParticipantById(partId int64, dbEngin *gorm.DB) *models.StorelinkCenter
|
||||||
return &participant
|
return &participant
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func GetResourceTypes() []string {
|
||||||
|
return resourceTypes
|
||||||
|
}
|
||||||
|
|
||||||
|
func GetDatasetsNames(collectorMap *map[string]collector.AiCollector) ([]string, error) {
|
||||||
|
var names []string
|
||||||
|
//errCount := 0
|
||||||
|
colMap := *collectorMap
|
||||||
|
for _, col := range colMap {
|
||||||
|
var ns []string
|
||||||
|
specs, err := col.GetDatasetsSpecs()
|
||||||
|
if err != nil {
|
||||||
|
return nil, errors.New("failed to acquire datasets list")
|
||||||
|
}
|
||||||
|
for _, spec := range specs {
|
||||||
|
ns = append(ns, spec.Name)
|
||||||
|
}
|
||||||
|
if len(ns) == 0 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if len(names) == 0 {
|
||||||
|
names = ns
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
names = common.IntersectString(names, ns)
|
||||||
|
}
|
||||||
|
//if (len(*collectorMap) - errCount) < 2 {
|
||||||
|
//
|
||||||
|
//}
|
||||||
|
names = common.RemoveDuplicates(names)
|
||||||
|
return names, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func GetTaskTypes() []string {
|
||||||
|
return taskTypes
|
||||||
|
}
|
||||||
|
|
||||||
func ConvertType(in interface{}, out interface{}, participant *models.StorelinkCenter) (interface{}, error) {
|
func ConvertType(in interface{}, out interface{}, participant *models.StorelinkCenter) (interface{}, error) {
|
||||||
|
|
||||||
switch (interface{})(in).(type) {
|
switch (interface{})(in).(type) {
|
||||||
|
|
2
go.mod
2
go.mod
|
@ -26,7 +26,7 @@ require (
|
||||||
github.com/rs/zerolog v1.28.0
|
github.com/rs/zerolog v1.28.0
|
||||||
github.com/shopspring/decimal v1.3.1
|
github.com/shopspring/decimal v1.3.1
|
||||||
github.com/zeromicro/go-zero v1.6.0
|
github.com/zeromicro/go-zero v1.6.0
|
||||||
gitlink.org.cn/jcce-pcm/pcm-ac v0.0.0-20231207111119-cdecc6b118c8
|
gitlink.org.cn/jcce-pcm/pcm-ac v0.0.0-20240201033409-2d4e27a90c39
|
||||||
gitlink.org.cn/jcce-pcm/pcm-participant-ceph v0.0.0-20230904090036-24fc730ec87d
|
gitlink.org.cn/jcce-pcm/pcm-participant-ceph v0.0.0-20230904090036-24fc730ec87d
|
||||||
gitlink.org.cn/jcce-pcm/pcm-participant-kubernetes v0.0.0-20231214084401-de9ac5db7246
|
gitlink.org.cn/jcce-pcm/pcm-participant-kubernetes v0.0.0-20231214084401-de9ac5db7246
|
||||||
gitlink.org.cn/jcce-pcm/pcm-participant-modelarts v0.0.0-20231101085149-724c7c4cc090
|
gitlink.org.cn/jcce-pcm/pcm-participant-modelarts v0.0.0-20231101085149-724c7c4cc090
|
||||||
|
|
Loading…
Reference in New Issue