Merge branch 'refs/heads/upstream'
Former-commit-id: aabbc1719d3a845983eefacd0614322f32bd2246
This commit is contained in:
commit
e195bb4d89
|
@ -6,6 +6,7 @@ type Options struct {
|
||||||
}
|
}
|
||||||
type Client interface {
|
type Client interface {
|
||||||
Task(TaskOptions) (Task, error)
|
Task(TaskOptions) (Task, error)
|
||||||
|
Notice(NoticeOptions) (Notice, error)
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewClient(options Options) (Client, error) {
|
func NewClient(options Options) (Client, error) {
|
||||||
|
|
|
@ -19,6 +19,11 @@ func (c *client) Task(options TaskOptions) (Task, error) {
|
||||||
return task, nil
|
return task, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (c *client) Notice(options NoticeOptions) (Notice, error) {
|
||||||
|
notice, _ := newNotice(c, &options)
|
||||||
|
return notice, nil
|
||||||
|
}
|
||||||
|
|
||||||
func newClient(options Options) (Client, error) {
|
func newClient(options Options) (Client, error) {
|
||||||
//init dbEngine
|
//init dbEngine
|
||||||
dbEngin, _ := gorm.Open(mysql.Open(options.DataSource), &gorm.Config{
|
dbEngin, _ := gorm.Open(mysql.Open(options.DataSource), &gorm.Config{
|
||||||
|
|
|
@ -0,0 +1,9 @@
|
||||||
|
package client
|
||||||
|
|
||||||
|
type NoticeOptions struct {
|
||||||
|
pushNoticeReq PushNoticeReq
|
||||||
|
}
|
||||||
|
|
||||||
|
type Notice interface {
|
||||||
|
PushNotice(pushNoticeReq PushNoticeReq) (*PushNoticeResp, error)
|
||||||
|
}
|
|
@ -0,0 +1,46 @@
|
||||||
|
package client
|
||||||
|
|
||||||
|
import (
|
||||||
|
"io/ioutil"
|
||||||
|
"k8s.io/apimachinery/pkg/util/json"
|
||||||
|
"log"
|
||||||
|
"net/http"
|
||||||
|
"strings"
|
||||||
|
"sync"
|
||||||
|
)
|
||||||
|
|
||||||
|
type notice struct {
|
||||||
|
sync.RWMutex
|
||||||
|
client *client
|
||||||
|
options *NoticeOptions
|
||||||
|
log log.Logger
|
||||||
|
}
|
||||||
|
|
||||||
|
func newNotice(client *client, options *NoticeOptions) (*notice, error) {
|
||||||
|
notice := ¬ice{
|
||||||
|
RWMutex: sync.RWMutex{},
|
||||||
|
client: client,
|
||||||
|
options: options,
|
||||||
|
log: log.Logger{},
|
||||||
|
}
|
||||||
|
return notice, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (n *notice) PushNotice(pushNoticeReq PushNoticeReq) (*PushNoticeResp, error) {
|
||||||
|
|
||||||
|
url := n.client.url + "/pcm/v1/core/pushNotice"
|
||||||
|
method := "GET"
|
||||||
|
jsonStr, _ := json.Marshal(pushNoticeReq)
|
||||||
|
payload := strings.NewReader(string(jsonStr))
|
||||||
|
|
||||||
|
client := &http.Client{}
|
||||||
|
req, _ := http.NewRequest(method, url, payload)
|
||||||
|
req.Header.Add("Content-Type", "application/json")
|
||||||
|
res, _ := client.Do(req)
|
||||||
|
defer res.Body.Close()
|
||||||
|
|
||||||
|
body, _ := ioutil.ReadAll(res.Body)
|
||||||
|
var resp PushNoticeResp
|
||||||
|
json.Unmarshal(body, &resp)
|
||||||
|
return &resp, nil
|
||||||
|
}
|
|
@ -9,5 +9,5 @@ type TaskOptions struct {
|
||||||
type Task interface {
|
type Task interface {
|
||||||
PullTaskInfo(pullTaskInfoReq PullTaskInfoReq) (*PullTaskInfoResp, error)
|
PullTaskInfo(pullTaskInfoReq PullTaskInfoReq) (*PullTaskInfoResp, error)
|
||||||
PushTaskInfo(pushTaskInfoReq PushTaskInfoReq) (*PushTaskInfoResp, error)
|
PushTaskInfo(pushTaskInfoReq PushTaskInfoReq) (*PushTaskInfoResp, error)
|
||||||
PushResourceInfo(pushResourceInfoReq PushResourceInfoReq) error
|
PushResourceInfo(pushResourceInfoReq PushResourceInfoReq) (*PushResourceInfoResp, error)
|
||||||
}
|
}
|
||||||
|
|
|
@ -50,8 +50,8 @@ func (t *task) PushTaskInfo(pushTaskInfoReq PushTaskInfoReq) (*PushTaskInfoResp,
|
||||||
|
|
||||||
url := t.client.url + "/pcm/v1/core/pushTaskInfo"
|
url := t.client.url + "/pcm/v1/core/pushTaskInfo"
|
||||||
method := "POST"
|
method := "POST"
|
||||||
infoReq := PullTaskInfoReq{AdapterId: pushTaskInfoReq.AdapterId}
|
//infoReq := PullTaskInfoReq{AdapterId: pushTaskInfoReq.AdapterId}
|
||||||
jsonStr, _ := json.Marshal(infoReq)
|
jsonStr, _ := json.Marshal(pushTaskInfoReq)
|
||||||
payload := strings.NewReader(string(jsonStr))
|
payload := strings.NewReader(string(jsonStr))
|
||||||
|
|
||||||
client := &http.Client{}
|
client := &http.Client{}
|
||||||
|
@ -66,7 +66,22 @@ func (t *task) PushTaskInfo(pushTaskInfoReq PushTaskInfoReq) (*PushTaskInfoResp,
|
||||||
return &resp, nil
|
return &resp, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (t *task) PushResourceInfo(pushResourceInfoReq PushResourceInfoReq) error {
|
func (t *task) PushResourceInfo(pushResourceInfoReq PushResourceInfoReq) (*PushResourceInfoResp, error) {
|
||||||
//TODO implement me
|
|
||||||
panic("implement me")
|
url := t.client.url + "/pcm/v1/core/pushResourceInfo"
|
||||||
|
method := "POST"
|
||||||
|
//infoReq := PushResourceInfoReq{AdapterId: pushResourceInfoReq.AdapterId}
|
||||||
|
jsonStr, _ := json.Marshal(pushResourceInfoReq)
|
||||||
|
payload := strings.NewReader(string(jsonStr))
|
||||||
|
|
||||||
|
client := &http.Client{}
|
||||||
|
req, _ := http.NewRequest(method, url, payload)
|
||||||
|
req.Header.Add("Content-Type", "application/json")
|
||||||
|
res, _ := client.Do(req)
|
||||||
|
defer res.Body.Close()
|
||||||
|
|
||||||
|
body, _ := ioutil.ReadAll(res.Body)
|
||||||
|
var resp PushResourceInfoResp
|
||||||
|
json.Unmarshal(body, &resp)
|
||||||
|
return &resp, nil
|
||||||
}
|
}
|
||||||
|
|
|
@ -25,12 +25,46 @@ type PushTaskInfoReq struct {
|
||||||
}
|
}
|
||||||
|
|
||||||
type PushTaskInfoResp struct {
|
type PushTaskInfoResp struct {
|
||||||
Code int64
|
Code int64 `json:"code"`
|
||||||
Msg string
|
Msg string `json:"msg"`
|
||||||
}
|
}
|
||||||
|
|
||||||
type PushResourceInfoReq struct {
|
type PushResourceInfoReq struct {
|
||||||
AdapterId int64 `json:"adapterId"`
|
AdapterId int64 `json:"adapterId"`
|
||||||
|
ResourceStats []ResourceStats `json:"resourceStats"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type PushResourceInfoResp struct {
|
||||||
|
Code int64 `json:"code"`
|
||||||
|
Msg string `json:"msg"`
|
||||||
|
}
|
||||||
|
type NoticeInfo struct {
|
||||||
|
AdapterId int64 `json:"adapterId"`
|
||||||
|
AdapterName string `json:"adapterName"`
|
||||||
|
ClusterId int64 `json:"clusterId"`
|
||||||
|
ClusterName string `json:"clusterName"`
|
||||||
|
NoticeType string `json:"noticeType"`
|
||||||
|
TaskName string `json:"taskName"`
|
||||||
|
Incident string `json:"incident"`
|
||||||
|
CreatedTime time.Time `json:"createdTime"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type ListNoticeReq struct {
|
||||||
|
}
|
||||||
|
|
||||||
|
type ListNoticeResp struct {
|
||||||
|
Code int64 `json:"code"`
|
||||||
|
Msg string `json:"msg"`
|
||||||
|
Data []NoticeInfo `json:"data"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type PushNoticeReq struct {
|
||||||
|
NoticeInfo NoticeInfo `json:"noticeInfo"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type PushNoticeResp struct {
|
||||||
|
Code int64 `json:"code"`
|
||||||
|
Msg string `json:"msg"`
|
||||||
}
|
}
|
||||||
|
|
||||||
type HpcInfo struct {
|
type HpcInfo struct {
|
||||||
|
@ -119,5 +153,30 @@ type VmInfo struct {
|
||||||
BlockUuid string `json:"block_uuid,omitempty"`
|
BlockUuid string `json:"block_uuid,omitempty"`
|
||||||
SourceType string `json:"source_type,omitempty"`
|
SourceType string `json:"source_type,omitempty"`
|
||||||
DeleteOnTermination bool `json:"delete_on_termination,omitempty"`
|
DeleteOnTermination bool `json:"delete_on_termination,omitempty"`
|
||||||
State string `json:"state,omitempty"`
|
Status string `json:"Status,omitempty"`
|
||||||
|
StartTime string `json:"startTime,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type ResourceStats struct {
|
||||||
|
ClusterId int64 `json:"clusterId"`
|
||||||
|
Name string `json:"name"`
|
||||||
|
CpuCoreAvail int64 `json:"cpuCoreAvail"`
|
||||||
|
CpuCoreTotal int64 `json:"cpuCoreTotal"`
|
||||||
|
MemAvail float64 `json:"memAvail"`
|
||||||
|
MemTotal float64 `json:"memTotal"`
|
||||||
|
DiskAvail float64 `json:"diskAvail"`
|
||||||
|
DiskTotal float64 `json:"diskTotal"`
|
||||||
|
GpuAvail int64 `json:"gpuAvail"`
|
||||||
|
CardsAvail []*Card `json:"cardsAvail"`
|
||||||
|
CpuCoreHours float64 `json:"cpuCoreHours"`
|
||||||
|
Balance float64 `json:"balance"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type Card struct {
|
||||||
|
Platform string `json:"platform"`
|
||||||
|
Type string `json:"type"`
|
||||||
|
Name string `json:"name"`
|
||||||
|
TOpsAtFp16 float64 `json:"TOpsAtFp16"`
|
||||||
|
CardHours float64 `json:"cardHours"`
|
||||||
|
CardNum int32 `json:"cardNum"`
|
||||||
}
|
}
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -1,126 +0,0 @@
|
||||||
syntax = "v1"
|
|
||||||
|
|
||||||
info(
|
|
||||||
title: "type title here"
|
|
||||||
desc: "type desc here"
|
|
||||||
author: "type author here"
|
|
||||||
email: "type email here"
|
|
||||||
version: "type version here"
|
|
||||||
)
|
|
||||||
|
|
||||||
type PullTaskInfoReq {
|
|
||||||
AdapterId int64 `form:"adapterId"`
|
|
||||||
}
|
|
||||||
|
|
||||||
type PullTaskInfoResp struct {
|
|
||||||
HpcInfoList []*HpcInfo `json:"HpcInfoList,omitempty"`
|
|
||||||
CloudInfoList []*CloudInfo `json:"CloudInfoList,omitempty"`
|
|
||||||
AiInfoList []*AiInfo `json:"AiInfoList,omitempty"`
|
|
||||||
VmInfoList []*VmInfo `json:"VmInfoList,omitempty"`
|
|
||||||
}
|
|
||||||
|
|
||||||
type HpcInfo struct {
|
|
||||||
Id int64 `json:"id"` // id
|
|
||||||
TaskId int64 `json:"task_id"` // 任务id
|
|
||||||
JobId string `json:"job_id"` // 作业id(在第三方系统中的作业id)
|
|
||||||
AdapterId int64 `json:"adapter_id"` // 执行任务的适配器id
|
|
||||||
ClusterId int64 `json:"cluster_id"` // 执行任务的集群id
|
|
||||||
ClusterType string `json:"cluster_type"` // 执行任务的集群类型
|
|
||||||
Name string `json:"name"` // 名称
|
|
||||||
Status string `json:"status"` // 状态
|
|
||||||
CmdScript string `json:"cmd_script"`
|
|
||||||
StartTime string `json:"start_time"` // 开始时间
|
|
||||||
RunningTime int64 `json:"running_time"` // 运行时间
|
|
||||||
DerivedEs string `json:"derived_es"`
|
|
||||||
Cluster string `json:"cluster"`
|
|
||||||
BlockId int64 `json:"block_id"`
|
|
||||||
AllocNodes int64 `json:"alloc_nodes"`
|
|
||||||
AllocCpu int64 `json:"alloc_cpu"`
|
|
||||||
CardCount int64 `json:"card_count"` // 卡数
|
|
||||||
Version string `json:"version"`
|
|
||||||
Account string `json:"account"`
|
|
||||||
WorkDir string `json:"work_dir"` // 工作路径
|
|
||||||
AssocId int64 `json:"assoc_id"`
|
|
||||||
ExitCode int64 `json:"exit_code"`
|
|
||||||
WallTime string `json:"wall_time"` // 最大运行时间
|
|
||||||
Result string `json:"result"` // 运行结果
|
|
||||||
DeletedAt string `json:"deleted_at"` // 删除时间
|
|
||||||
YamlString string `json:"yaml_string"`
|
|
||||||
AppType string `json:"app_type"` // 应用类型
|
|
||||||
AppName string `json:"app_name"` // 应用名称
|
|
||||||
Queue string `json:"queue"` // 队列名称
|
|
||||||
SubmitType string `json:"submit_type"` // cmd(命令行模式)
|
|
||||||
NNode string `json:"n_node"` // 节点个数(当指定该参数时,GAP_NODE_STRING必须为"")
|
|
||||||
StdOutFile string `json:"std_out_file"` // 工作路径/std.err.%j
|
|
||||||
StdErrFile string `json:"std_err_file"` // 工作路径/std.err.%j
|
|
||||||
StdInput string `json:"std_input"`
|
|
||||||
Environment string `json:"environment"`
|
|
||||||
DeletedFlag int64 `json:"deleted_flag"` // 是否删除(0-否,1-是)
|
|
||||||
CreatedBy int64 `json:"created_by"` // 创建人
|
|
||||||
CreatedTime string `json:"created_time"` // 创建时间
|
|
||||||
UpdatedBy int64 `json:"updated_by"` // 更新人
|
|
||||||
UpdatedTime string `json:"updated_time"` // 更新时间
|
|
||||||
}
|
|
||||||
|
|
||||||
type CloudInfo struct {
|
|
||||||
Participant int64 `json:"participant,omitempty"`
|
|
||||||
Id int64 `json:"id,omitempty"`
|
|
||||||
TaskId int64 `json:"taskId,omitempty"`
|
|
||||||
ApiVersion string `json:"apiVersion,omitempty"`
|
|
||||||
Kind string `json:"kind,omitempty"`
|
|
||||||
Namespace string `json:"namespace,omitempty"`
|
|
||||||
Name string `json:"name,omitempty"`
|
|
||||||
Status string `json:"status,omitempty"`
|
|
||||||
StartTime string `json:"startTime,omitempty"`
|
|
||||||
RunningTime int64 `json:"runningTime,omitempty"`
|
|
||||||
Result string `json:"result,omitempty"`
|
|
||||||
YamlString string `json:"yamlString,omitempty"`
|
|
||||||
}
|
|
||||||
|
|
||||||
type AiInfo struct {
|
|
||||||
ParticipantId int64 `json:"participantId,omitempty"`
|
|
||||||
TaskId int64 `json:"taskId,omitempty"`
|
|
||||||
ProjectId string `json:"project_id,omitempty"`
|
|
||||||
Name string `json:"name,omitempty"`
|
|
||||||
Status string `json:"status,omitempty"`
|
|
||||||
StartTime string `json:"startTime,omitempty"`
|
|
||||||
RunningTime int64 `json:"runningTime,omitempty"`
|
|
||||||
Result string `json:"result,omitempty"`
|
|
||||||
JobId string `json:"jobId,omitempty"`
|
|
||||||
CreateTime string `json:"createTime,omitempty"`
|
|
||||||
ImageUrl string `json:"imageUrl,omitempty"`
|
|
||||||
Command string `json:"command,omitempty"`
|
|
||||||
FlavorId string `json:"flavorId,omitempty"`
|
|
||||||
SubscriptionId string `json:"subscriptionId,omitempty"`
|
|
||||||
ItemVersionId string `json:"itemVersionId,omitempty"`
|
|
||||||
}
|
|
||||||
|
|
||||||
type VmInfo struct {
|
|
||||||
ParticipantId int64 `json:"participantId,omitempty"`
|
|
||||||
TaskId int64 `json:"taskId,omitempty"`
|
|
||||||
Name string `json:"name,omitempty"`
|
|
||||||
FlavorRef string `json:"flavor_ref,omitempty"`
|
|
||||||
ImageRef string `json:"image_ref,omitempty"`
|
|
||||||
NetworkUuid string `json:"network_uuid,omitempty"`
|
|
||||||
BlockUuid string `json:"block_uuid,omitempty"`
|
|
||||||
SourceType string `json:"source_type,omitempty"`
|
|
||||||
DeleteOnTermination bool `json:"delete_on_termination,omitempty"`
|
|
||||||
State string `json:"state,omitempty"`
|
|
||||||
}
|
|
||||||
|
|
||||||
type PushTaskInfoReq struct {
|
|
||||||
AdapterId int64 `json:"adapterId"`
|
|
||||||
HpcInfoList []*HpcInfo `json:"hpcInfoList"`
|
|
||||||
CloudInfoList []*CloudInfo `json:"cloudInfoList"`
|
|
||||||
AiInfoList []*AiInfo `json:"aiInfoList"`
|
|
||||||
VmInfoList []*VmInfo `json:"vmInfoList"`
|
|
||||||
}
|
|
||||||
|
|
||||||
type PushTaskInfoResp struct {
|
|
||||||
Code int64 `json:"code"`
|
|
||||||
Msg string `json:"msg"`
|
|
||||||
}
|
|
||||||
|
|
||||||
type PushResourceInfoReq struct {
|
|
||||||
AdapterId int64 `json:"adapterId"`
|
|
||||||
}
|
|
|
@ -9,7 +9,6 @@ import (
|
||||||
"cloud/pcm-cloud.api"
|
"cloud/pcm-cloud.api"
|
||||||
"storelink/pcm-storelink.api"
|
"storelink/pcm-storelink.api"
|
||||||
"schedule/pcm-schedule.api"
|
"schedule/pcm-schedule.api"
|
||||||
"participant/pcm-participant.api"
|
|
||||||
"monitoring/pcm-monitoring.api"
|
"monitoring/pcm-monitoring.api"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -111,14 +110,26 @@ service pcm {
|
||||||
@handler metricsHandler
|
@handler metricsHandler
|
||||||
get /core/metrics
|
get /core/metrics
|
||||||
|
|
||||||
@doc "provided to participant to pull task info from core"
|
@doc "provide for adapter to pull task info from core"
|
||||||
@handler pullTaskInfoHandler
|
@handler pullTaskInfoHandler
|
||||||
get /core/pullTaskInfo (PullTaskInfoReq) returns (PullTaskInfoResp)
|
get /core/pullTaskInfo (PullTaskInfoReq) returns (PullTaskInfoResp)
|
||||||
|
|
||||||
@doc "provided to participant to push task info to core"
|
@doc "provide for adapter to push task info to core"
|
||||||
@handler pushTaskInfoHandler
|
@handler pushTaskInfoHandler
|
||||||
post /core/pushTaskInfo (PushTaskInfoReq) returns (PushTaskInfoResp)
|
post /core/pushTaskInfo (PushTaskInfoReq) returns (PushTaskInfoResp)
|
||||||
|
|
||||||
|
@doc "provide for adapter to push resource info to core"
|
||||||
|
@handler pushResourceInfoHandler
|
||||||
|
post /core/pushResourceInfo (PushResourceInfoReq) returns (PushResourceInfoResp)
|
||||||
|
|
||||||
|
@doc "provide for adapter to push notice info to core"
|
||||||
|
@handler pushNoticeHandler
|
||||||
|
post /core/pushNotice (PushNoticeReq) returns (PushNoticeResp)
|
||||||
|
|
||||||
|
@doc "list notice"
|
||||||
|
@handler listNoticeHandler
|
||||||
|
get /core/listNotice (ListNoticeReq) returns (ListNoticeResp)
|
||||||
|
|
||||||
@doc "paging queries the task list"
|
@doc "paging queries the task list"
|
||||||
@handler pageListTaskHandler
|
@handler pageListTaskHandler
|
||||||
get /core/task/list (pageTaskReq) returns(PageResult)
|
get /core/task/list (pageTaskReq) returns(PageResult)
|
||||||
|
@ -146,6 +157,10 @@ service pcm {
|
||||||
@handler jobHandler
|
@handler jobHandler
|
||||||
get /hpc/job (hpcJobReq) returns (hpcJobResp)
|
get /hpc/job (hpcJobReq) returns (hpcJobResp)
|
||||||
|
|
||||||
|
@doc "超算资源总览"
|
||||||
|
@handler resourceHandler
|
||||||
|
get /hpc/resource (hpcResourceReq) returns (hpcResourceResp)
|
||||||
|
|
||||||
@doc "超算查询资产列表"
|
@doc "超算查询资产列表"
|
||||||
@handler queueAssetsHandler
|
@handler queueAssetsHandler
|
||||||
get /hpc/queueAssets returns (QueueAssetsResp)
|
get /hpc/queueAssets returns (QueueAssetsResp)
|
||||||
|
@ -895,13 +910,13 @@ service pcm {
|
||||||
get /schedule/ai/getTaskTypes returns (AiTaskTypesResp)
|
get /schedule/ai/getTaskTypes returns (AiTaskTypesResp)
|
||||||
|
|
||||||
@handler ScheduleGetDatasetsHandler
|
@handler ScheduleGetDatasetsHandler
|
||||||
get /schedule/ai/getDatasets returns (AiDatasetsResp)
|
get /schedule/ai/getDatasets/:adapterId (AiDatasetsReq) returns (AiDatasetsResp)
|
||||||
|
|
||||||
@handler ScheduleGetStrategyHandler
|
@handler ScheduleGetStrategyHandler
|
||||||
get /schedule/ai/getStrategies returns (AiStrategyResp)
|
get /schedule/ai/getStrategies returns (AiStrategyResp)
|
||||||
|
|
||||||
@handler ScheduleGetAlgorithmsHandler
|
@handler ScheduleGetAlgorithmsHandler
|
||||||
get /schedule/ai/getAlgorithms/:resourceType/:taskType/:dataset (AiAlgorithmsReq) returns (AiAlgorithmsResp)
|
get /schedule/ai/getAlgorithms/:adapterId/:resourceType/:taskType/:dataset (AiAlgorithmsReq) returns (AiAlgorithmsResp)
|
||||||
|
|
||||||
@handler ScheduleSubmitHandler
|
@handler ScheduleSubmitHandler
|
||||||
post /schedule/submit (ScheduleReq) returns (ScheduleResp)
|
post /schedule/submit (ScheduleReq) returns (ScheduleResp)
|
||||||
|
|
|
@ -19,13 +19,15 @@ type (
|
||||||
ScheduleResult {
|
ScheduleResult {
|
||||||
ClusterId string `json:"clusterId"`
|
ClusterId string `json:"clusterId"`
|
||||||
TaskId string `json:"taskId"`
|
TaskId string `json:"taskId"`
|
||||||
|
Strategy string `json:"strategy"`
|
||||||
Replica int32 `json:"replica"`
|
Replica int32 `json:"replica"`
|
||||||
Msg string `json:"msg"`
|
Msg string `json:"msg"`
|
||||||
}
|
}
|
||||||
|
|
||||||
AiOption {
|
AiOption {
|
||||||
TaskName string `json:"taskName"`
|
TaskName string `json:"taskName"`
|
||||||
AiClusterId string `json:"aiClusterId,optional"`
|
AdapterId string `json:"adapterId"`
|
||||||
|
AiClusterIds []string `json:"aiClusterIds"`
|
||||||
ResourceType string `json:"resourceType"`
|
ResourceType string `json:"resourceType"`
|
||||||
Tops float64 `json:"Tops,optional"`
|
Tops float64 `json:"Tops,optional"`
|
||||||
TaskType string `json:"taskType"`
|
TaskType string `json:"taskType"`
|
||||||
|
@ -46,6 +48,10 @@ type (
|
||||||
TaskTypes []string `json:"taskTypes"`
|
TaskTypes []string `json:"taskTypes"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
AiDatasetsReq {
|
||||||
|
AdapterId string `path:"adapterId"`
|
||||||
|
}
|
||||||
|
|
||||||
AiDatasetsResp {
|
AiDatasetsResp {
|
||||||
Datasets []string `json:"datasets"`
|
Datasets []string `json:"datasets"`
|
||||||
}
|
}
|
||||||
|
@ -55,6 +61,7 @@ type (
|
||||||
}
|
}
|
||||||
|
|
||||||
AiAlgorithmsReq {
|
AiAlgorithmsReq {
|
||||||
|
AdapterId string `path:"adapterId"`
|
||||||
ResourceType string `path:"resourceType"`
|
ResourceType string `path:"resourceType"`
|
||||||
TaskType string `path:"taskType"`
|
TaskType string `path:"taskType"`
|
||||||
Dataset string `path:"dataset"`
|
Dataset string `path:"dataset"`
|
||||||
|
|
|
@ -0,0 +1,28 @@
|
||||||
|
package core
|
||||||
|
|
||||||
|
import (
|
||||||
|
clientCore "gitlink.org.cn/JointCloud/pcm-coordinator/api/client"
|
||||||
|
"net/http"
|
||||||
|
|
||||||
|
"github.com/zeromicro/go-zero/rest/httpx"
|
||||||
|
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/logic/core"
|
||||||
|
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
|
||||||
|
)
|
||||||
|
|
||||||
|
func ListNoticeHandler(svcCtx *svc.ServiceContext) http.HandlerFunc {
|
||||||
|
return func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
var req clientCore.ListNoticeReq
|
||||||
|
if err := httpx.Parse(r, &req); err != nil {
|
||||||
|
httpx.ErrorCtx(r.Context(), w, err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
l := core.NewListNoticeLogic(r.Context(), svcCtx)
|
||||||
|
resp, err := l.ListNotice(&req)
|
||||||
|
if err != nil {
|
||||||
|
httpx.ErrorCtx(r.Context(), w, err)
|
||||||
|
} else {
|
||||||
|
httpx.OkJsonCtx(r.Context(), w, resp)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,28 @@
|
||||||
|
package core
|
||||||
|
|
||||||
|
import (
|
||||||
|
clientCore "gitlink.org.cn/JointCloud/pcm-coordinator/api/client"
|
||||||
|
"net/http"
|
||||||
|
|
||||||
|
"github.com/zeromicro/go-zero/rest/httpx"
|
||||||
|
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/logic/core"
|
||||||
|
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
|
||||||
|
)
|
||||||
|
|
||||||
|
func PushNoticeHandler(svcCtx *svc.ServiceContext) http.HandlerFunc {
|
||||||
|
return func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
var req clientCore.PushNoticeReq
|
||||||
|
if err := httpx.Parse(r, &req); err != nil {
|
||||||
|
httpx.ErrorCtx(r.Context(), w, err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
l := core.NewPushNoticeLogic(r.Context(), svcCtx)
|
||||||
|
resp, err := l.PushNotice(&req)
|
||||||
|
if err != nil {
|
||||||
|
httpx.ErrorCtx(r.Context(), w, err)
|
||||||
|
} else {
|
||||||
|
httpx.OkJsonCtx(r.Context(), w, resp)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,28 @@
|
||||||
|
package core
|
||||||
|
|
||||||
|
import (
|
||||||
|
clientCore "gitlink.org.cn/JointCloud/pcm-coordinator/api/client"
|
||||||
|
"net/http"
|
||||||
|
|
||||||
|
"github.com/zeromicro/go-zero/rest/httpx"
|
||||||
|
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/logic/core"
|
||||||
|
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
|
||||||
|
)
|
||||||
|
|
||||||
|
func PushResourceInfoHandler(svcCtx *svc.ServiceContext) http.HandlerFunc {
|
||||||
|
return func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
var req clientCore.PushResourceInfoReq
|
||||||
|
if err := httpx.Parse(r, &req); err != nil {
|
||||||
|
httpx.ErrorCtx(r.Context(), w, err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
l := core.NewPushResourceInfoLogic(r.Context(), svcCtx)
|
||||||
|
resp, err := l.PushResourceInfo(&req)
|
||||||
|
if err != nil {
|
||||||
|
httpx.ErrorCtx(r.Context(), w, err)
|
||||||
|
} else {
|
||||||
|
httpx.OkJsonCtx(r.Context(), w, resp)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,28 @@
|
||||||
|
package hpc
|
||||||
|
|
||||||
|
import (
|
||||||
|
"net/http"
|
||||||
|
|
||||||
|
"github.com/zeromicro/go-zero/rest/httpx"
|
||||||
|
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/logic/hpc"
|
||||||
|
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
|
||||||
|
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types"
|
||||||
|
)
|
||||||
|
|
||||||
|
func ResourceHandler(svcCtx *svc.ServiceContext) http.HandlerFunc {
|
||||||
|
return func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
var req types.HpcResourceReq
|
||||||
|
if err := httpx.Parse(r, &req); err != nil {
|
||||||
|
httpx.ErrorCtx(r.Context(), w, err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
l := hpc.NewResourceLogic(r.Context(), svcCtx)
|
||||||
|
resp, err := l.Resource(&req)
|
||||||
|
if err != nil {
|
||||||
|
httpx.ErrorCtx(r.Context(), w, err)
|
||||||
|
} else {
|
||||||
|
httpx.OkJsonCtx(r.Context(), w, resp)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -140,6 +140,21 @@ func RegisterHandlers(server *rest.Server, serverCtx *svc.ServiceContext) {
|
||||||
Path: "/core/pushTaskInfo",
|
Path: "/core/pushTaskInfo",
|
||||||
Handler: core.PushTaskInfoHandler(serverCtx),
|
Handler: core.PushTaskInfoHandler(serverCtx),
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
Method: http.MethodPost,
|
||||||
|
Path: "/core/pushResourceInfo",
|
||||||
|
Handler: core.PushResourceInfoHandler(serverCtx),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Method: http.MethodPost,
|
||||||
|
Path: "/core/pushNotice",
|
||||||
|
Handler: core.PushNoticeHandler(serverCtx),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Method: http.MethodGet,
|
||||||
|
Path: "/core/listNotice",
|
||||||
|
Handler: core.ListNoticeHandler(serverCtx),
|
||||||
|
},
|
||||||
{
|
{
|
||||||
Method: http.MethodGet,
|
Method: http.MethodGet,
|
||||||
Path: "/core/task/list",
|
Path: "/core/task/list",
|
||||||
|
@ -171,6 +186,11 @@ func RegisterHandlers(server *rest.Server, serverCtx *svc.ServiceContext) {
|
||||||
Path: "/hpc/job",
|
Path: "/hpc/job",
|
||||||
Handler: hpc.JobHandler(serverCtx),
|
Handler: hpc.JobHandler(serverCtx),
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
Method: http.MethodGet,
|
||||||
|
Path: "/hpc/resource",
|
||||||
|
Handler: hpc.ResourceHandler(serverCtx),
|
||||||
|
},
|
||||||
{
|
{
|
||||||
Method: http.MethodGet,
|
Method: http.MethodGet,
|
||||||
Path: "/hpc/queueAssets",
|
Path: "/hpc/queueAssets",
|
||||||
|
@ -1107,7 +1127,7 @@ func RegisterHandlers(server *rest.Server, serverCtx *svc.ServiceContext) {
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
Method: http.MethodGet,
|
Method: http.MethodGet,
|
||||||
Path: "/schedule/ai/getDatasets",
|
Path: "/schedule/ai/getDatasets/:adapterId",
|
||||||
Handler: schedule.ScheduleGetDatasetsHandler(serverCtx),
|
Handler: schedule.ScheduleGetDatasetsHandler(serverCtx),
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -1117,7 +1137,7 @@ func RegisterHandlers(server *rest.Server, serverCtx *svc.ServiceContext) {
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
Method: http.MethodGet,
|
Method: http.MethodGet,
|
||||||
Path: "/schedule/ai/getAlgorithms/:resourceType/:taskType/:dataset",
|
Path: "/schedule/ai/getAlgorithms/:adapterId/:resourceType/:taskType/:dataset",
|
||||||
Handler: schedule.ScheduleGetAlgorithmsHandler(serverCtx),
|
Handler: schedule.ScheduleGetAlgorithmsHandler(serverCtx),
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
|
@ -1,16 +1,24 @@
|
||||||
package schedule
|
package schedule
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"github.com/zeromicro/go-zero/rest/httpx"
|
||||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/logic/schedule"
|
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/logic/schedule"
|
||||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
|
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
|
||||||
|
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types"
|
||||||
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/repository/result"
|
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/repository/result"
|
||||||
"net/http"
|
"net/http"
|
||||||
)
|
)
|
||||||
|
|
||||||
func ScheduleGetDatasetsHandler(svcCtx *svc.ServiceContext) http.HandlerFunc {
|
func ScheduleGetDatasetsHandler(svcCtx *svc.ServiceContext) http.HandlerFunc {
|
||||||
return func(w http.ResponseWriter, r *http.Request) {
|
return func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
var req types.AiDatasetsReq
|
||||||
|
if err := httpx.Parse(r, &req); err != nil {
|
||||||
|
result.ParamErrorResult(r, w, err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
l := schedule.NewScheduleGetDatasetsLogic(r.Context(), svcCtx)
|
l := schedule.NewScheduleGetDatasetsLogic(r.Context(), svcCtx)
|
||||||
resp, err := l.ScheduleGetDatasets()
|
resp, err := l.ScheduleGetDatasets(&req)
|
||||||
result.HttpResult(r, w, resp, err)
|
result.HttpResult(r, w, resp, err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -2,13 +2,12 @@ package core
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/mqs"
|
"fmt"
|
||||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
|
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
|
||||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types"
|
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types"
|
||||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/pkg/response"
|
|
||||||
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/constants"
|
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/constants"
|
||||||
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/models"
|
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/models"
|
||||||
tool "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/utils"
|
"math/rand"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/zeromicro/go-zero/core/logx"
|
"github.com/zeromicro/go-zero/core/logx"
|
||||||
|
@ -35,7 +34,6 @@ func (l *CommitVmTaskLogic) CommitVmTask(req *types.CommitVmTaskReq) (resp *type
|
||||||
Status: constants.Saved,
|
Status: constants.Saved,
|
||||||
Name: req.Name,
|
Name: req.Name,
|
||||||
CommitTime: time.Now(),
|
CommitTime: time.Now(),
|
||||||
NsID: req.NsID,
|
|
||||||
}
|
}
|
||||||
// Save task data to database
|
// Save task data to database
|
||||||
tx := l.svcCtx.DbEngin.Create(&taskModel)
|
tx := l.svcCtx.DbEngin.Create(&taskModel)
|
||||||
|
@ -43,28 +41,38 @@ func (l *CommitVmTaskLogic) CommitVmTask(req *types.CommitVmTaskReq) (resp *type
|
||||||
return nil, tx.Error
|
return nil, tx.Error
|
||||||
}
|
}
|
||||||
|
|
||||||
var clusterIds []int64
|
for _, CreateMulServer := range req.CreateMulServer {
|
||||||
l.svcCtx.DbEngin.Raw("SELECT id FROM `t_cluster` where adapter_id = ? and label = ?", req.AdapterId, req.ClusterType).Scan(&clusterIds)
|
fmt.Println("", req.CreateMulServer)
|
||||||
|
var clusterIds []int64
|
||||||
|
l.svcCtx.DbEngin.Raw("SELECT id FROM `t_cluster` where adapter_id = ? and label = ?", req.AdapterId, req.ClusterType).Scan(&clusterIds)
|
||||||
|
|
||||||
if len(clusterIds) == 0 || clusterIds == nil {
|
if len(clusterIds) == 0 || clusterIds == nil {
|
||||||
return nil, nil
|
return nil, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
vmInfo := models.TaskVm{
|
||||||
|
TaskId: taskModel.Id,
|
||||||
|
ClusterId: clusterIds[rand.Intn(len(clusterIds))],
|
||||||
|
Name: taskModel.Name,
|
||||||
|
Status: "Saved",
|
||||||
|
StartTime: time.Now().String(),
|
||||||
|
MinCount: CreateMulServer.Min_count,
|
||||||
|
ImageRef: CreateMulServer.ImageRef,
|
||||||
|
FlavorRef: CreateMulServer.FlavorRef,
|
||||||
|
Uuid: CreateMulServer.Uuid,
|
||||||
|
Platform: CreateMulServer.Platform,
|
||||||
|
}
|
||||||
|
|
||||||
|
tx = l.svcCtx.DbEngin.Create(&vmInfo)
|
||||||
|
if tx.Error != nil {
|
||||||
|
return nil, tx.Error
|
||||||
|
}
|
||||||
|
resp = &types.CommitVmTaskResp{
|
||||||
|
Code: 200,
|
||||||
|
Msg: "success",
|
||||||
|
TaskId: taskModel.Id,
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
vm := models.Vm{}
|
|
||||||
tool.Convert(req, &vm)
|
|
||||||
mqInfo := response.TaskInfo{
|
|
||||||
TaskId: taskModel.Id,
|
|
||||||
TaskType: "vm",
|
|
||||||
MatchLabels: req.MatchLabels,
|
|
||||||
NsID: req.NsID,
|
|
||||||
}
|
|
||||||
//req.TaskId = taskModel.Id
|
|
||||||
mqs.InsQueue.Beta.Add(&mqInfo)
|
|
||||||
tx = l.svcCtx.DbEngin.Create(&mqInfo)
|
|
||||||
resp = &types.CommitVmTaskResp{
|
|
||||||
Code: 200,
|
|
||||||
Msg: "success",
|
|
||||||
TaskId: taskModel.Id,
|
|
||||||
}
|
|
||||||
return resp, nil
|
return resp, nil
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,36 @@
|
||||||
|
package core
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"github.com/zeromicro/go-zero/core/logx"
|
||||||
|
clientCore "gitlink.org.cn/JointCloud/pcm-coordinator/api/client"
|
||||||
|
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
|
||||||
|
)
|
||||||
|
|
||||||
|
type ListNoticeLogic struct {
|
||||||
|
logx.Logger
|
||||||
|
ctx context.Context
|
||||||
|
svcCtx *svc.ServiceContext
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewListNoticeLogic(ctx context.Context, svcCtx *svc.ServiceContext) *ListNoticeLogic {
|
||||||
|
return &ListNoticeLogic{
|
||||||
|
Logger: logx.WithContext(ctx),
|
||||||
|
ctx: ctx,
|
||||||
|
svcCtx: svcCtx,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (l *ListNoticeLogic) ListNotice(req *clientCore.ListNoticeReq) (*clientCore.ListNoticeResp, error) {
|
||||||
|
var notices []clientCore.NoticeInfo
|
||||||
|
|
||||||
|
var resp clientCore.ListNoticeResp
|
||||||
|
|
||||||
|
l.svcCtx.DbEngin.Raw("select * from t_notice order by created_time desc").Scan(¬ices)
|
||||||
|
for _, notice := range notices {
|
||||||
|
resp.Data = append(resp.Data, notice)
|
||||||
|
}
|
||||||
|
resp.Code = 200
|
||||||
|
resp.Msg = "success"
|
||||||
|
return &resp, nil
|
||||||
|
}
|
|
@ -67,6 +67,13 @@ func (l *PullTaskInfoLogic) PullTaskInfo(req *clientCore.PullTaskInfoReq) (*clie
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
utils.Convert(aiModelList, &resp.AiInfoList)
|
utils.Convert(aiModelList, &resp.AiInfoList)
|
||||||
|
case 3:
|
||||||
|
var vmModelList []models.TaskVm
|
||||||
|
err := findModelList(req.AdapterId, l.svcCtx.DbEngin, &vmModelList)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
utils.Convert(vmModelList, &resp.VmInfoList)
|
||||||
}
|
}
|
||||||
return &resp, nil
|
return &resp, nil
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,31 @@
|
||||||
|
package core
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"github.com/zeromicro/go-zero/core/logx"
|
||||||
|
clientCore "gitlink.org.cn/JointCloud/pcm-coordinator/api/client"
|
||||||
|
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
|
||||||
|
)
|
||||||
|
|
||||||
|
type PushNoticeLogic struct {
|
||||||
|
logx.Logger
|
||||||
|
ctx context.Context
|
||||||
|
svcCtx *svc.ServiceContext
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewPushNoticeLogic(ctx context.Context, svcCtx *svc.ServiceContext) *PushNoticeLogic {
|
||||||
|
return &PushNoticeLogic{
|
||||||
|
Logger: logx.WithContext(ctx),
|
||||||
|
ctx: ctx,
|
||||||
|
svcCtx: svcCtx,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (l *PushNoticeLogic) PushNotice(req *clientCore.PushNoticeReq) (resp *clientCore.PushNoticeResp, err error) {
|
||||||
|
result := l.svcCtx.DbEngin.Table("t_notice").Create(&req.NoticeInfo)
|
||||||
|
if result.Error != nil {
|
||||||
|
return nil, result.Error
|
||||||
|
}
|
||||||
|
|
||||||
|
return
|
||||||
|
}
|
|
@ -0,0 +1,28 @@
|
||||||
|
package core
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
clientCore "gitlink.org.cn/JointCloud/pcm-coordinator/api/client"
|
||||||
|
|
||||||
|
"github.com/zeromicro/go-zero/core/logx"
|
||||||
|
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
|
||||||
|
)
|
||||||
|
|
||||||
|
type PushResourceInfoLogic struct {
|
||||||
|
logx.Logger
|
||||||
|
ctx context.Context
|
||||||
|
svcCtx *svc.ServiceContext
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewPushResourceInfoLogic(ctx context.Context, svcCtx *svc.ServiceContext) *PushResourceInfoLogic {
|
||||||
|
return &PushResourceInfoLogic{
|
||||||
|
Logger: logx.WithContext(ctx),
|
||||||
|
ctx: ctx,
|
||||||
|
svcCtx: svcCtx,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (l *PushResourceInfoLogic) PushResourceInfo(req *clientCore.PushResourceInfoReq) (resp *clientCore.PushResourceInfoResp, err error) {
|
||||||
|
|
||||||
|
return
|
||||||
|
}
|
|
@ -49,6 +49,12 @@ func (l *PushTaskInfoLogic) PushTaskInfo(req *clientCore.PushTaskInfoReq) (*clie
|
||||||
aiInfo.Status, aiInfo.StartTime, aiInfo.ProjectId, aiInfo.JobId, req.AdapterId, aiInfo.TaskId, aiInfo.Name)
|
aiInfo.Status, aiInfo.StartTime, aiInfo.ProjectId, aiInfo.JobId, req.AdapterId, aiInfo.TaskId, aiInfo.Name)
|
||||||
syncTask(l.svcCtx.DbEngin, aiInfo.TaskId)
|
syncTask(l.svcCtx.DbEngin, aiInfo.TaskId)
|
||||||
}
|
}
|
||||||
|
case 3:
|
||||||
|
for _, vmInfo := range req.VmInfoList {
|
||||||
|
l.svcCtx.DbEngin.Exec("update task_vm set status = ?,start_time = ? where participant_id = ? and task_id = ? and name = ?",
|
||||||
|
vmInfo.Status, vmInfo.StartTime, req.AdapterId, vmInfo.TaskId, vmInfo.Name)
|
||||||
|
syncTask(l.svcCtx.DbEngin, vmInfo.TaskId)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return &resp, nil
|
return &resp, nil
|
||||||
|
|
|
@ -0,0 +1,48 @@
|
||||||
|
package hpc
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
|
||||||
|
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
|
||||||
|
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types"
|
||||||
|
|
||||||
|
"github.com/zeromicro/go-zero/core/logx"
|
||||||
|
)
|
||||||
|
|
||||||
|
type ResourceLogic struct {
|
||||||
|
logx.Logger
|
||||||
|
ctx context.Context
|
||||||
|
svcCtx *svc.ServiceContext
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewResourceLogic(ctx context.Context, svcCtx *svc.ServiceContext) *ResourceLogic {
|
||||||
|
return &ResourceLogic{
|
||||||
|
Logger: logx.WithContext(ctx),
|
||||||
|
ctx: ctx,
|
||||||
|
svcCtx: svcCtx,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (l *ResourceLogic) Resource(req *types.HpcResourceReq) (resp *types.HpcResourceResp, err error) {
|
||||||
|
|
||||||
|
l.svcCtx.DbEngin.Raw("SELECT th.NAME as job_name,t.description as job_desc,t.commit_time as submit_time,th.STATUS as job_status,ta.name as adapter_name,tc.name as cluster_name,tc.label as cluster_type FROM task_hpc th LEFT JOIN task t ON t.id = th.task_id JOIN t_cluster tc on th.cluster_id = tc.id JOIN t_adapter ta on tc.adapter_id = ta.id")
|
||||||
|
|
||||||
|
hpcResource := types.HPCResource{
|
||||||
|
GPUCardsTotal: 0,
|
||||||
|
CPUCoresTotal: 0,
|
||||||
|
RAMTotal: 0,
|
||||||
|
GPUCardsUsed: 0,
|
||||||
|
CPUCoresUsed: 0,
|
||||||
|
RAMUsed: 0,
|
||||||
|
GPURate: 0,
|
||||||
|
CPURate: 0,
|
||||||
|
RAMRate: 0,
|
||||||
|
}
|
||||||
|
|
||||||
|
resp = &types.HpcResourceResp{
|
||||||
|
Code: 200,
|
||||||
|
Msg: "success",
|
||||||
|
HPCResource: hpcResource,
|
||||||
|
}
|
||||||
|
return resp, nil
|
||||||
|
}
|
|
@ -26,7 +26,7 @@ func NewScheduleGetAlgorithmsLogic(ctx context.Context, svcCtx *svc.ServiceConte
|
||||||
|
|
||||||
func (l *ScheduleGetAlgorithmsLogic) ScheduleGetAlgorithms(req *types.AiAlgorithmsReq) (resp *types.AiAlgorithmsResp, err error) {
|
func (l *ScheduleGetAlgorithmsLogic) ScheduleGetAlgorithms(req *types.AiAlgorithmsReq) (resp *types.AiAlgorithmsResp, err error) {
|
||||||
resp = &types.AiAlgorithmsResp{}
|
resp = &types.AiAlgorithmsResp{}
|
||||||
algorithms, err := storeLink.GetAlgorithms(l.ctx, l.svcCtx.Scheduler.ResourceCollector, req.ResourceType, req.TaskType, req.Dataset)
|
algorithms, err := storeLink.GetAlgorithms(l.ctx, l.svcCtx.Scheduler.AiService.AiCollectorAdapterMap[req.AdapterId], req.ResourceType, req.TaskType, req.Dataset)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
|
@ -3,6 +3,7 @@ package schedule
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/storeLink"
|
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/storeLink"
|
||||||
|
|
||||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
|
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
|
||||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types"
|
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types"
|
||||||
|
|
||||||
|
@ -23,9 +24,9 @@ func NewScheduleGetDatasetsLogic(ctx context.Context, svcCtx *svc.ServiceContext
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (l *ScheduleGetDatasetsLogic) ScheduleGetDatasets() (resp *types.AiDatasetsResp, err error) {
|
func (l *ScheduleGetDatasetsLogic) ScheduleGetDatasets(req *types.AiDatasetsReq) (resp *types.AiDatasetsResp, err error) {
|
||||||
resp = &types.AiDatasetsResp{}
|
resp = &types.AiDatasetsResp{}
|
||||||
names, err := storeLink.GetDatasetsNames(l.ctx, l.svcCtx.Scheduler.ResourceCollector)
|
names, err := storeLink.GetDatasetsNames(l.ctx, l.svcCtx.Scheduler.AiService.AiCollectorAdapterMap[req.AdapterId])
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
|
@ -27,6 +27,7 @@ func NewScheduleSubmitLogic(ctx context.Context, svcCtx *svc.ServiceContext) *Sc
|
||||||
func (l *ScheduleSubmitLogic) ScheduleSubmit(req *types.ScheduleReq) (resp *types.ScheduleResp, err error) {
|
func (l *ScheduleSubmitLogic) ScheduleSubmit(req *types.ScheduleReq) (resp *types.ScheduleResp, err error) {
|
||||||
resp = &types.ScheduleResp{}
|
resp = &types.ScheduleResp{}
|
||||||
opt := &option.AiOption{
|
opt := &option.AiOption{
|
||||||
|
AdapterId: req.AiOption.AdapterId,
|
||||||
ResourceType: req.AiOption.ResourceType,
|
ResourceType: req.AiOption.ResourceType,
|
||||||
Tops: req.AiOption.Tops,
|
Tops: req.AiOption.Tops,
|
||||||
TaskType: req.AiOption.TaskType,
|
TaskType: req.AiOption.TaskType,
|
||||||
|
@ -55,6 +56,7 @@ func (l *ScheduleSubmitLogic) ScheduleSubmit(req *types.ScheduleReq) (resp *type
|
||||||
scheResult := &types.ScheduleResult{}
|
scheResult := &types.ScheduleResult{}
|
||||||
scheResult.ClusterId = r.ClusterId
|
scheResult.ClusterId = r.ClusterId
|
||||||
scheResult.TaskId = r.TaskId
|
scheResult.TaskId = r.TaskId
|
||||||
|
scheResult.Strategy = r.Strategy
|
||||||
scheResult.Replica = r.Replica
|
scheResult.Replica = r.Replica
|
||||||
scheResult.Msg = r.Msg
|
scheResult.Msg = r.Msg
|
||||||
resp.Results = append(resp.Results, scheResult)
|
resp.Results = append(resp.Results, scheResult)
|
||||||
|
|
|
@ -5,9 +5,8 @@ import (
|
||||||
)
|
)
|
||||||
|
|
||||||
type Weight struct {
|
type Weight struct {
|
||||||
Id int64
|
Id string
|
||||||
Weight int32
|
Weight int32
|
||||||
Name string
|
|
||||||
Replica int32
|
Replica int32
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -33,6 +33,21 @@ func (s *AiStorage) GetClustersByAdapterId(id string) (*types.ClusterListResp, e
|
||||||
return &resp, nil
|
return &resp, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (s *AiStorage) GetAdapterIdsByType(adapterType string) ([]string, error) {
|
||||||
|
var list []types.AdapterInfo
|
||||||
|
var ids []string
|
||||||
|
db := s.DbEngin.Model(&types.AdapterInfo{}).Table("t_adapter")
|
||||||
|
db = db.Where("type = ?", adapterType)
|
||||||
|
err := db.Order("create_time desc").Find(&list).Error
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
for _, info := range list {
|
||||||
|
ids = append(ids, info.Id)
|
||||||
|
}
|
||||||
|
return ids, nil
|
||||||
|
}
|
||||||
|
|
||||||
func (s *AiStorage) SaveTask(name string) error {
|
func (s *AiStorage) SaveTask(name string) error {
|
||||||
// 构建主任务结构体
|
// 构建主任务结构体
|
||||||
taskModel := models.Task{
|
taskModel := models.Task{
|
||||||
|
|
|
@ -20,8 +20,7 @@ import (
|
||||||
"github.com/zeromicro/go-zero/core/logx"
|
"github.com/zeromicro/go-zero/core/logx"
|
||||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/common"
|
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/common"
|
||||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/database"
|
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/database"
|
||||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/service/collector"
|
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/service"
|
||||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/service/executor"
|
|
||||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/strategy"
|
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/strategy"
|
||||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/pkg/response"
|
"gitlink.org.cn/JointCloud/pcm-coordinator/api/pkg/response"
|
||||||
"gitlink.org.cn/JointCloud/pcm-coordinator/rpc/client/participantservice"
|
"gitlink.org.cn/JointCloud/pcm-coordinator/rpc/client/participantservice"
|
||||||
|
@ -32,16 +31,15 @@ import (
|
||||||
)
|
)
|
||||||
|
|
||||||
type Scheduler struct {
|
type Scheduler struct {
|
||||||
task *response.TaskInfo
|
task *response.TaskInfo
|
||||||
participantIds []int64
|
participantIds []int64
|
||||||
subSchedule SubSchedule
|
subSchedule SubSchedule
|
||||||
dbEngin *gorm.DB
|
dbEngin *gorm.DB
|
||||||
result []string //pID:子任务yamlstring 键值对
|
result []string //pID:子任务yamlstring 键值对
|
||||||
participantRpc participantservice.ParticipantService
|
participantRpc participantservice.ParticipantService
|
||||||
ResourceCollector *map[string]collector.AiCollector
|
AiStorages *database.AiStorage
|
||||||
AiStorages *database.AiStorage
|
AiService *service.AiService
|
||||||
AiExecutor *map[string]executor.AiExecutor
|
mu sync.RWMutex
|
||||||
mu sync.RWMutex
|
|
||||||
}
|
}
|
||||||
|
|
||||||
type SubSchedule interface {
|
type SubSchedule interface {
|
||||||
|
@ -59,8 +57,8 @@ func NewScheduler(subSchedule SubSchedule, val string, dbEngin *gorm.DB, partici
|
||||||
return &Scheduler{task: task, subSchedule: subSchedule, dbEngin: dbEngin, participantRpc: participantRpc}, nil
|
return &Scheduler{task: task, subSchedule: subSchedule, dbEngin: dbEngin, participantRpc: participantRpc}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewSchdlr(resourceCollector *map[string]collector.AiCollector, storages *database.AiStorage, aiExecutor *map[string]executor.AiExecutor) *Scheduler {
|
func NewSchdlr(aiService *service.AiService, storages *database.AiStorage) *Scheduler {
|
||||||
return &Scheduler{ResourceCollector: resourceCollector, AiStorages: storages, AiExecutor: aiExecutor}
|
return &Scheduler{AiService: aiService, AiStorages: storages}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *Scheduler) SpecifyClusters() {
|
func (s *Scheduler) SpecifyClusters() {
|
||||||
|
|
|
@ -18,6 +18,7 @@ import (
|
||||||
"context"
|
"context"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"errors"
|
"errors"
|
||||||
|
"fmt"
|
||||||
"gitlink.org.cn/JointCloud/pcm-ac/hpcAC"
|
"gitlink.org.cn/JointCloud/pcm-ac/hpcAC"
|
||||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler"
|
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler"
|
||||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/schedulers/option"
|
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/schedulers/option"
|
||||||
|
@ -28,7 +29,6 @@ import (
|
||||||
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/models"
|
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/models"
|
||||||
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/utils"
|
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/utils"
|
||||||
"gitlink.org.cn/JointCloud/pcm-octopus/octopus"
|
"gitlink.org.cn/JointCloud/pcm-octopus/octopus"
|
||||||
"strconv"
|
|
||||||
"sync"
|
"sync"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -43,6 +43,7 @@ type AiScheduler struct {
|
||||||
type AiResult struct {
|
type AiResult struct {
|
||||||
TaskId string
|
TaskId string
|
||||||
ClusterId string
|
ClusterId string
|
||||||
|
Strategy string
|
||||||
Replica int32
|
Replica int32
|
||||||
Msg string
|
Msg string
|
||||||
}
|
}
|
||||||
|
@ -63,9 +64,8 @@ func (as *AiScheduler) GetNewStructForDb(task *response.TaskInfo, resource strin
|
||||||
}
|
}
|
||||||
|
|
||||||
func (as *AiScheduler) PickOptimalStrategy() (strategy.Strategy, error) {
|
func (as *AiScheduler) PickOptimalStrategy() (strategy.Strategy, error) {
|
||||||
if as.option.AiClusterId != "" {
|
if len(as.option.ClusterIds) == 1 {
|
||||||
// TODO database operation Find
|
return &strategy.SingleAssignment{Cluster: &strategy.AssignedCluster{ClusterId: as.option.ClusterIds[0], Replicas: 1}}, nil
|
||||||
return &strategy.SingleAssignment{Cluster: &strategy.AssignedCluster{ParticipantId: 0, Name: "", Replicas: 1}}, nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
resources, err := as.findClustersWithResources()
|
resources, err := as.findClustersWithResources()
|
||||||
|
@ -79,8 +79,7 @@ func (as *AiScheduler) PickOptimalStrategy() (strategy.Strategy, error) {
|
||||||
|
|
||||||
if len(resources) == 1 {
|
if len(resources) == 1 {
|
||||||
var cluster strategy.AssignedCluster
|
var cluster strategy.AssignedCluster
|
||||||
cluster.ParticipantId = resources[0].ParticipantId
|
cluster.ClusterId = resources[0].ClusterId
|
||||||
cluster.Name = resources[0].Name
|
|
||||||
cluster.Replicas = 1
|
cluster.Replicas = 1
|
||||||
return &strategy.SingleAssignment{Cluster: &cluster}, nil
|
return &strategy.SingleAssignment{Cluster: &cluster}, nil
|
||||||
}
|
}
|
||||||
|
@ -89,7 +88,11 @@ func (as *AiScheduler) PickOptimalStrategy() (strategy.Strategy, error) {
|
||||||
|
|
||||||
switch as.option.StrategyName {
|
switch as.option.StrategyName {
|
||||||
case strategy.REPLICATION:
|
case strategy.REPLICATION:
|
||||||
strategy := strategy.NewReplicationStrategy(¶m.ReplicationParams{Params: params, Replicas: 1})
|
var clusterIds []string
|
||||||
|
for _, resource := range resources {
|
||||||
|
clusterIds = append(clusterIds, resource.ClusterId)
|
||||||
|
}
|
||||||
|
strategy := strategy.NewReplicationStrategy(clusterIds, 1)
|
||||||
return strategy, nil
|
return strategy, nil
|
||||||
case strategy.RESOURCES_PRICING:
|
case strategy.RESOURCES_PRICING:
|
||||||
strategy := strategy.NewPricingStrategy(¶m.ResourcePricingParams{Params: params, Replicas: 1})
|
strategy := strategy.NewPricingStrategy(¶m.ResourcePricingParams{Params: params, Replicas: 1})
|
||||||
|
@ -111,32 +114,47 @@ func (as *AiScheduler) AssignTask(clusters []*strategy.AssignedCluster) (interfa
|
||||||
return nil, errors.New("clusters is nil")
|
return nil, errors.New("clusters is nil")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
for i := len(clusters) - 1; i >= 0; i-- {
|
||||||
|
if clusters[i].Replicas == 0 {
|
||||||
|
clusters = append(clusters[:i], clusters[i+1:]...)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(clusters) == 0 {
|
||||||
|
return nil, errors.New("clusters is nil")
|
||||||
|
}
|
||||||
|
|
||||||
var wg sync.WaitGroup
|
var wg sync.WaitGroup
|
||||||
var results []*AiResult
|
var results []*AiResult
|
||||||
var errs []error
|
var errs []interface{}
|
||||||
var ch = make(chan *AiResult, len(clusters))
|
var ch = make(chan *AiResult, len(clusters))
|
||||||
var errCh = make(chan error, len(clusters))
|
var errCh = make(chan interface{}, len(clusters))
|
||||||
|
|
||||||
executorMap := *as.AiExecutor
|
executorMap := as.AiService.AiExecutorAdapterMap[as.option.AdapterId]
|
||||||
for _, cluster := range clusters {
|
for _, cluster := range clusters {
|
||||||
c := cluster
|
c := cluster
|
||||||
if cluster.Replicas == 0 {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
wg.Add(1)
|
wg.Add(1)
|
||||||
go func() {
|
go func() {
|
||||||
opt, _ := cloneAiOption(as.option)
|
opt, _ := cloneAiOption(as.option)
|
||||||
resp, err := executorMap[c.Name].Execute(as.ctx, opt)
|
resp, err := executorMap[c.ClusterId].Execute(as.ctx, opt)
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
errCh <- err
|
e := struct {
|
||||||
|
err error
|
||||||
|
clusterId string
|
||||||
|
}{
|
||||||
|
err: err,
|
||||||
|
clusterId: c.ClusterId,
|
||||||
|
}
|
||||||
|
errCh <- e
|
||||||
wg.Done()
|
wg.Done()
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
result, _ := convertType(resp)
|
result, _ := convertType(resp)
|
||||||
result.Replica = c.Replicas
|
result.Replica = c.Replicas
|
||||||
result.ClusterId = strconv.FormatInt(c.ParticipantId, 10)
|
result.ClusterId = c.ClusterId
|
||||||
|
result.Strategy = as.option.StrategyName
|
||||||
|
|
||||||
ch <- result
|
ch <- result
|
||||||
wg.Done()
|
wg.Done()
|
||||||
|
@ -150,10 +168,29 @@ func (as *AiScheduler) AssignTask(clusters []*strategy.AssignedCluster) (interfa
|
||||||
errs = append(errs, e)
|
errs = append(errs, e)
|
||||||
}
|
}
|
||||||
|
|
||||||
if len(errs) != 0 {
|
if len(errs) == len(clusters) {
|
||||||
return nil, errors.New("submit task failed")
|
return nil, errors.New("submit task failed")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if len(errs) != 0 {
|
||||||
|
var msg string
|
||||||
|
for _, err := range errs {
|
||||||
|
e := (err).(struct {
|
||||||
|
err error
|
||||||
|
clusterId string
|
||||||
|
})
|
||||||
|
msg += fmt.Sprintf("clusterId: %v , error: %v \n", e.clusterId, e.err.Error())
|
||||||
|
}
|
||||||
|
for s := range ch {
|
||||||
|
if s.Msg != "" {
|
||||||
|
msg += fmt.Sprintf("clusterId: %v , error: %v \n", s.ClusterId, s.Msg)
|
||||||
|
} else {
|
||||||
|
msg += fmt.Sprintf("clusterId: %v , submitted successfully, taskId: %v \n", s.ClusterId, s.TaskId)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil, errors.New(msg)
|
||||||
|
}
|
||||||
|
|
||||||
for s := range ch {
|
for s := range ch {
|
||||||
// TODO: database operation
|
// TODO: database operation
|
||||||
results = append(results, s)
|
results = append(results, s)
|
||||||
|
@ -164,19 +201,28 @@ func (as *AiScheduler) AssignTask(clusters []*strategy.AssignedCluster) (interfa
|
||||||
|
|
||||||
func (as *AiScheduler) findClustersWithResources() ([]*collector.ResourceStats, error) {
|
func (as *AiScheduler) findClustersWithResources() ([]*collector.ResourceStats, error) {
|
||||||
var wg sync.WaitGroup
|
var wg sync.WaitGroup
|
||||||
var ch = make(chan *collector.ResourceStats, len(*as.ResourceCollector))
|
var clustersNum = len(as.AiService.AiCollectorAdapterMap[as.option.AdapterId])
|
||||||
var errCh = make(chan error, len(*as.ResourceCollector))
|
var ch = make(chan *collector.ResourceStats, clustersNum)
|
||||||
|
var errCh = make(chan interface{}, clustersNum)
|
||||||
|
|
||||||
var resourceSpecs []*collector.ResourceStats
|
var resourceSpecs []*collector.ResourceStats
|
||||||
var errs []error
|
var errs []interface{}
|
||||||
|
|
||||||
for _, resourceCollector := range *as.ResourceCollector {
|
for s, resourceCollector := range as.AiService.AiCollectorAdapterMap[as.option.AdapterId] {
|
||||||
wg.Add(1)
|
wg.Add(1)
|
||||||
rc := resourceCollector
|
rc := resourceCollector
|
||||||
|
id := s
|
||||||
go func() {
|
go func() {
|
||||||
spec, err := rc.GetResourceStats(as.ctx)
|
spec, err := rc.GetResourceStats(as.ctx)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
errCh <- err
|
e := struct {
|
||||||
|
err error
|
||||||
|
clusterId string
|
||||||
|
}{
|
||||||
|
err: err,
|
||||||
|
clusterId: id,
|
||||||
|
}
|
||||||
|
errCh <- e
|
||||||
wg.Done()
|
wg.Done()
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
@ -196,13 +242,22 @@ func (as *AiScheduler) findClustersWithResources() ([]*collector.ResourceStats,
|
||||||
errs = append(errs, e)
|
errs = append(errs, e)
|
||||||
}
|
}
|
||||||
|
|
||||||
if len(errs) != 0 {
|
if len(errs) == clustersNum {
|
||||||
return nil, errors.New("get resources failed")
|
return nil, errors.New("get resources failed")
|
||||||
}
|
}
|
||||||
|
|
||||||
if len(resourceSpecs) == 0 {
|
if len(errs) != 0 {
|
||||||
return nil, errors.New("no resource found")
|
var msg string
|
||||||
|
for _, err := range errs {
|
||||||
|
e := (err).(struct {
|
||||||
|
err error
|
||||||
|
clusterId string
|
||||||
|
})
|
||||||
|
msg += fmt.Sprintf("clusterId: %v , error: %v \n", e.clusterId, e.err.Error())
|
||||||
|
}
|
||||||
|
return nil, errors.New(msg)
|
||||||
}
|
}
|
||||||
|
|
||||||
return resourceSpecs, nil
|
return resourceSpecs, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1,7 +1,8 @@
|
||||||
package option
|
package option
|
||||||
|
|
||||||
type AiOption struct {
|
type AiOption struct {
|
||||||
AiClusterId string // shuguangAi /octopus ClusterId
|
AdapterId string
|
||||||
|
ClusterIds []string
|
||||||
TaskName string
|
TaskName string
|
||||||
ResourceType string // cpu/gpu/compute card
|
ResourceType string // cpu/gpu/compute card
|
||||||
CpuCoreNum int64
|
CpuCoreNum int64
|
||||||
|
|
|
@ -1,11 +1,14 @@
|
||||||
package service
|
package service
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"github.com/zeromicro/go-zero/zrpc"
|
||||||
"gitlink.org.cn/JointCloud/pcm-ac/hpcacclient"
|
"gitlink.org.cn/JointCloud/pcm-ac/hpcacclient"
|
||||||
|
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/config"
|
||||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/database"
|
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/database"
|
||||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/service/collector"
|
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/service/collector"
|
||||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/service/executor"
|
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/service/executor"
|
||||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/storeLink"
|
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/storeLink"
|
||||||
|
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types"
|
||||||
"gitlink.org.cn/JointCloud/pcm-octopus/octopusclient"
|
"gitlink.org.cn/JointCloud/pcm-octopus/octopusclient"
|
||||||
"gitlink.org.cn/jcce-pcm/pcm-participant-modelarts/client/imagesservice"
|
"gitlink.org.cn/jcce-pcm/pcm-participant-modelarts/client/imagesservice"
|
||||||
"gitlink.org.cn/jcce-pcm/pcm-participant-modelarts/client/modelartsservice"
|
"gitlink.org.cn/jcce-pcm/pcm-participant-modelarts/client/modelartsservice"
|
||||||
|
@ -18,30 +21,60 @@ const (
|
||||||
SHUGUANGAI = "shuguangAi"
|
SHUGUANGAI = "shuguangAi"
|
||||||
)
|
)
|
||||||
|
|
||||||
func InitAiClusterMap(octopusRpc octopusclient.Octopus, modelArtsRpc modelartsservice.ModelArtsService, modelArtsImgRpc imagesservice.ImagesService, aCRpc hpcacclient.HpcAC, storages *database.AiStorage) (*map[string]executor.AiExecutor, *map[string]collector.AiCollector) {
|
type AiService struct {
|
||||||
clusters, _ := storages.GetClustersByAdapterId("1777144940459986944")
|
AiExecutorAdapterMap map[string]map[string]executor.AiExecutor
|
||||||
|
AiCollectorAdapterMap map[string]map[string]collector.AiCollector
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewAiService(conf *config.Config, storages *database.AiStorage) (*AiService, error) {
|
||||||
|
var aiType = "1"
|
||||||
|
adapterIds, err := storages.GetAdapterIdsByType(aiType)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
aiService := &AiService{
|
||||||
|
AiExecutorAdapterMap: make(map[string]map[string]executor.AiExecutor),
|
||||||
|
AiCollectorAdapterMap: make(map[string]map[string]collector.AiCollector),
|
||||||
|
}
|
||||||
|
for _, id := range adapterIds {
|
||||||
|
clusters, err := storages.GetClustersByAdapterId(id)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
exeClusterMap, colClusterMap := InitAiClusterMap(conf, clusters.List)
|
||||||
|
aiService.AiExecutorAdapterMap[id] = exeClusterMap
|
||||||
|
aiService.AiCollectorAdapterMap[id] = colClusterMap
|
||||||
|
}
|
||||||
|
|
||||||
|
return aiService, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func InitAiClusterMap(conf *config.Config, clusters []types.ClusterInfo) (map[string]executor.AiExecutor, map[string]collector.AiCollector) {
|
||||||
executorMap := make(map[string]executor.AiExecutor)
|
executorMap := make(map[string]executor.AiExecutor)
|
||||||
collectorMap := make(map[string]collector.AiCollector)
|
collectorMap := make(map[string]collector.AiCollector)
|
||||||
for _, c := range clusters.List {
|
for _, c := range clusters {
|
||||||
switch c.Name {
|
switch c.Name {
|
||||||
case OCTOPUS:
|
case OCTOPUS:
|
||||||
id, _ := strconv.ParseInt(c.Id, 10, 64)
|
id, _ := strconv.ParseInt(c.Id, 10, 64)
|
||||||
|
octopusRpc := octopusclient.NewOctopus(zrpc.MustNewClient(conf.OctopusRpcConf))
|
||||||
octopus := storeLink.NewOctopusLink(octopusRpc, c.Nickname, id)
|
octopus := storeLink.NewOctopusLink(octopusRpc, c.Nickname, id)
|
||||||
collectorMap[c.Nickname] = octopus
|
collectorMap[c.Id] = octopus
|
||||||
executorMap[c.Nickname] = octopus
|
executorMap[c.Id] = octopus
|
||||||
case MODELARTS:
|
case MODELARTS:
|
||||||
id, _ := strconv.ParseInt(c.Id, 10, 64)
|
id, _ := strconv.ParseInt(c.Id, 10, 64)
|
||||||
|
modelArtsRpc := modelartsservice.NewModelArtsService(zrpc.MustNewClient(conf.ModelArtsRpcConf))
|
||||||
|
modelArtsImgRpc := imagesservice.NewImagesService(zrpc.MustNewClient(conf.ModelArtsImgRpcConf))
|
||||||
modelarts := storeLink.NewModelArtsLink(modelArtsRpc, modelArtsImgRpc, c.Nickname, id)
|
modelarts := storeLink.NewModelArtsLink(modelArtsRpc, modelArtsImgRpc, c.Nickname, id)
|
||||||
collectorMap[c.Nickname] = modelarts
|
collectorMap[c.Id] = modelarts
|
||||||
executorMap[c.Nickname] = modelarts
|
executorMap[c.Id] = modelarts
|
||||||
case SHUGUANGAI:
|
case SHUGUANGAI:
|
||||||
id, _ := strconv.ParseInt(c.Id, 10, 64)
|
id, _ := strconv.ParseInt(c.Id, 10, 64)
|
||||||
|
aCRpc := hpcacclient.NewHpcAC(zrpc.MustNewClient(conf.ACRpcConf))
|
||||||
sgai := storeLink.NewShuguangAi(aCRpc, c.Nickname, id)
|
sgai := storeLink.NewShuguangAi(aCRpc, c.Nickname, id)
|
||||||
collectorMap[c.Nickname] = sgai
|
collectorMap[c.Id] = sgai
|
||||||
executorMap[c.Nickname] = sgai
|
executorMap[c.Id] = sgai
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return &executorMap, &collectorMap
|
return executorMap, collectorMap
|
||||||
}
|
}
|
||||||
|
|
|
@ -9,18 +9,18 @@ type AiCollector interface {
|
||||||
}
|
}
|
||||||
|
|
||||||
type ResourceStats struct {
|
type ResourceStats struct {
|
||||||
ParticipantId int64
|
ClusterId string
|
||||||
Name string
|
Name string
|
||||||
CpuCoreAvail int64
|
CpuCoreAvail int64
|
||||||
CpuCoreTotal int64
|
CpuCoreTotal int64
|
||||||
MemAvail float64
|
MemAvail float64
|
||||||
MemTotal float64
|
MemTotal float64
|
||||||
DiskAvail float64
|
DiskAvail float64
|
||||||
DiskTotal float64
|
DiskTotal float64
|
||||||
GpuAvail int64
|
GpuAvail int64
|
||||||
CardsAvail []*Card
|
CardsAvail []*Card
|
||||||
CpuCoreHours float64
|
CpuCoreHours float64
|
||||||
Balance float64
|
Balance float64
|
||||||
}
|
}
|
||||||
|
|
||||||
type Card struct {
|
type Card struct {
|
||||||
|
|
|
@ -33,15 +33,14 @@ func (ps *DynamicResourcesStrategy) Schedule() ([]*AssignedCluster, error) {
|
||||||
for _, res := range ps.resources {
|
for _, res := range ps.resources {
|
||||||
if opt.ResourceType == "cpu" {
|
if opt.ResourceType == "cpu" {
|
||||||
if res.CpuCoreHours <= 0 {
|
if res.CpuCoreHours <= 0 {
|
||||||
cluster := &AssignedCluster{ParticipantId: res.ParticipantId, Name: res.Name, Replicas: ps.replicas}
|
cluster := &AssignedCluster{ClusterId: res.ClusterId, Replicas: ps.replicas}
|
||||||
results = append(results, cluster)
|
results = append(results, cluster)
|
||||||
return results, nil
|
return results, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
if res.CpuCoreHours > maxCpuCoreHoursAvailable {
|
if res.CpuCoreHours > maxCpuCoreHoursAvailable {
|
||||||
maxCpuCoreHoursAvailable = res.CpuCoreHours
|
maxCpuCoreHoursAvailable = res.CpuCoreHours
|
||||||
assignedCluster.Name = res.Name
|
assignedCluster.ClusterId = res.ClusterId
|
||||||
assignedCluster.ParticipantId = res.ParticipantId
|
|
||||||
assignedCluster.Replicas = ps.replicas
|
assignedCluster.Replicas = ps.replicas
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -56,8 +55,7 @@ func (ps *DynamicResourcesStrategy) Schedule() ([]*AssignedCluster, error) {
|
||||||
}
|
}
|
||||||
if maxCurrentCardHours > maxCardHoursAvailable {
|
if maxCurrentCardHours > maxCardHoursAvailable {
|
||||||
maxCardHoursAvailable = maxCurrentCardHours
|
maxCardHoursAvailable = maxCurrentCardHours
|
||||||
assignedCluster.Name = res.Name
|
assignedCluster.ClusterId = res.ClusterId
|
||||||
assignedCluster.ParticipantId = res.ParticipantId
|
|
||||||
assignedCluster.Replicas = ps.replicas
|
assignedCluster.Replicas = ps.replicas
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,23 +0,0 @@
|
||||||
package param
|
|
||||||
|
|
||||||
import "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/entity"
|
|
||||||
|
|
||||||
type ReplicationParams struct {
|
|
||||||
Replicas int32
|
|
||||||
*Params
|
|
||||||
}
|
|
||||||
|
|
||||||
func (r *ReplicationParams) GetReplicas() int32 {
|
|
||||||
return r.Replicas
|
|
||||||
}
|
|
||||||
|
|
||||||
func (r *ReplicationParams) GetParticipants() []*entity.Participant {
|
|
||||||
var participants []*entity.Participant
|
|
||||||
for _, resource := range r.Resources {
|
|
||||||
participants = append(participants, &entity.Participant{
|
|
||||||
Participant_id: resource.ParticipantId,
|
|
||||||
Name: resource.Name,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
return participants
|
|
||||||
}
|
|
|
@ -2,6 +2,7 @@ package param
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/algorithm/providerPricing"
|
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/algorithm/providerPricing"
|
||||||
|
"strconv"
|
||||||
)
|
)
|
||||||
|
|
||||||
type ResourcePricingParams struct {
|
type ResourcePricingParams struct {
|
||||||
|
@ -21,8 +22,9 @@ func (r *ResourcePricingParams) GetTask() *providerPricing.Task {
|
||||||
func (r *ResourcePricingParams) GetProviders() []*providerPricing.Provider {
|
func (r *ResourcePricingParams) GetProviders() []*providerPricing.Provider {
|
||||||
var providerList []*providerPricing.Provider
|
var providerList []*providerPricing.Provider
|
||||||
for _, resource := range r.Resources {
|
for _, resource := range r.Resources {
|
||||||
|
id, _ := strconv.ParseInt(resource.ClusterId, 10, 64)
|
||||||
provider := providerPricing.NewProvider(
|
provider := providerPricing.NewProvider(
|
||||||
resource.ParticipantId,
|
id,
|
||||||
float64(resource.CpuCoreAvail),
|
float64(resource.CpuCoreAvail),
|
||||||
resource.MemAvail,
|
resource.MemAvail,
|
||||||
resource.DiskAvail, 0.0, 0.0, 0.0)
|
resource.DiskAvail, 0.0, 0.0, 0.0)
|
||||||
|
|
|
@ -2,33 +2,31 @@ package strategy
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"errors"
|
"errors"
|
||||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/entity"
|
|
||||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/strategy/param"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
type ReplicationStrategy struct {
|
type ReplicationStrategy struct {
|
||||||
replicas int32
|
replicas int32
|
||||||
participants []*entity.Participant
|
clusterIds []string
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewReplicationStrategy(params *param.ReplicationParams) *ReplicationStrategy {
|
func NewReplicationStrategy(clusterIds []string, replicas int32) *ReplicationStrategy {
|
||||||
return &ReplicationStrategy{replicas: params.GetReplicas(),
|
return &ReplicationStrategy{clusterIds: clusterIds,
|
||||||
participants: params.GetParticipants(),
|
replicas: replicas,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (ps *ReplicationStrategy) Schedule() ([]*AssignedCluster, error) {
|
func (r *ReplicationStrategy) Schedule() ([]*AssignedCluster, error) {
|
||||||
if ps.replicas < 1 {
|
if r.replicas < 1 {
|
||||||
return nil, errors.New("replicas must be greater than 0")
|
return nil, errors.New("replicas must be greater than 0")
|
||||||
}
|
}
|
||||||
|
|
||||||
if ps.participants == nil {
|
if len(r.clusterIds) == 0 {
|
||||||
return nil, errors.New("participantId must be set")
|
return nil, errors.New("clusterIds must be set")
|
||||||
}
|
}
|
||||||
|
|
||||||
var results []*AssignedCluster
|
var results []*AssignedCluster
|
||||||
for _, p := range ps.participants {
|
for _, c := range r.clusterIds {
|
||||||
cluster := &AssignedCluster{ParticipantId: p.Participant_id, Name: p.Name, Replicas: ps.replicas}
|
cluster := &AssignedCluster{ClusterId: c, Replicas: r.replicas}
|
||||||
results = append(results, cluster)
|
results = append(results, cluster)
|
||||||
}
|
}
|
||||||
return results, nil
|
return results, nil
|
||||||
|
|
|
@ -18,6 +18,7 @@ import (
|
||||||
"errors"
|
"errors"
|
||||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/algorithm/providerPricing"
|
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/algorithm/providerPricing"
|
||||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/strategy/param"
|
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/strategy/param"
|
||||||
|
"strconv"
|
||||||
)
|
)
|
||||||
|
|
||||||
type PricingStrategy struct {
|
type PricingStrategy struct {
|
||||||
|
@ -154,7 +155,7 @@ func (ps *PricingStrategy) Schedule() ([]*AssignedCluster, error) {
|
||||||
if e == 0 {
|
if e == 0 {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
cluster := &AssignedCluster{ParticipantId: ps.ProviderList[i].Pid, Replicas: int32(e)}
|
cluster := &AssignedCluster{ClusterId: strconv.FormatInt(ps.ProviderList[i].Pid, 10), Replicas: int32(e)}
|
||||||
results = append(results, cluster)
|
results = append(results, cluster)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -29,7 +29,7 @@ func (s *StaticWeightStrategy) Schedule() ([]*AssignedCluster, error) {
|
||||||
weights := make([]*weightDistributing.Weight, 0)
|
weights := make([]*weightDistributing.Weight, 0)
|
||||||
for k, v := range s.staticWeightMap {
|
for k, v := range s.staticWeightMap {
|
||||||
weight := &weightDistributing.Weight{
|
weight := &weightDistributing.Weight{
|
||||||
Name: k,
|
Id: k,
|
||||||
Weight: v,
|
Weight: v,
|
||||||
}
|
}
|
||||||
weights = append(weights, weight)
|
weights = append(weights, weight)
|
||||||
|
@ -39,7 +39,7 @@ func (s *StaticWeightStrategy) Schedule() ([]*AssignedCluster, error) {
|
||||||
|
|
||||||
var results []*AssignedCluster
|
var results []*AssignedCluster
|
||||||
for _, weight := range weights {
|
for _, weight := range weights {
|
||||||
cluster := &AssignedCluster{ParticipantId: weight.Id, Name: weight.Name, Replicas: weight.Replica}
|
cluster := &AssignedCluster{ClusterId: weight.Id, Replicas: weight.Replica}
|
||||||
results = append(results, cluster)
|
results = append(results, cluster)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -18,9 +18,8 @@ type Strategy interface {
|
||||||
}
|
}
|
||||||
|
|
||||||
type AssignedCluster struct {
|
type AssignedCluster struct {
|
||||||
ParticipantId int64
|
ClusterId string
|
||||||
Name string
|
Replicas int32
|
||||||
Replicas int32
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func GetStrategyNames() []string {
|
func GetStrategyNames() []string {
|
||||||
|
|
|
@ -5,7 +5,6 @@ import (
|
||||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/entity"
|
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/entity"
|
||||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/service/collector"
|
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/service/collector"
|
||||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/strategy"
|
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/strategy"
|
||||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/strategy/param"
|
|
||||||
"testing"
|
"testing"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -17,15 +16,15 @@ func TestReplication(t *testing.T) {
|
||||||
}
|
}
|
||||||
rsc := []*collector.ResourceStats{
|
rsc := []*collector.ResourceStats{
|
||||||
{
|
{
|
||||||
ParticipantId: 1,
|
ClusterId: "1",
|
||||||
Name: "test1",
|
Name: "test1",
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
ParticipantId: 1,
|
ClusterId: "2",
|
||||||
Name: "test2"},
|
Name: "test2"},
|
||||||
{
|
{
|
||||||
ParticipantId: 1,
|
ClusterId: "3",
|
||||||
Name: "test3"},
|
Name: "test3"},
|
||||||
}
|
}
|
||||||
tests := []struct {
|
tests := []struct {
|
||||||
name string
|
name string
|
||||||
|
@ -47,8 +46,11 @@ func TestReplication(t *testing.T) {
|
||||||
|
|
||||||
for _, tt := range tests {
|
for _, tt := range tests {
|
||||||
t.Run(tt.name, func(t *testing.T) {
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
params := ¶m.Params{Resources: rsc}
|
var clusterIds []string
|
||||||
repl := strategy.NewReplicationStrategy(¶m.ReplicationParams{Params: params, Replicas: tt.replica})
|
for _, stats := range rsc {
|
||||||
|
clusterIds = append(clusterIds, stats.ClusterId)
|
||||||
|
}
|
||||||
|
repl := strategy.NewReplicationStrategy(clusterIds, 0)
|
||||||
schedule, err := repl.Schedule()
|
schedule, err := repl.Schedule()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return
|
return
|
||||||
|
|
|
@ -283,11 +283,11 @@ func (o *OctopusLink) GetResourceStats(ctx context.Context) (*collector.Resource
|
||||||
}
|
}
|
||||||
|
|
||||||
resourceStats := &collector.ResourceStats{
|
resourceStats := &collector.ResourceStats{
|
||||||
ParticipantId: o.participantId,
|
ClusterId: strconv.FormatInt(o.participantId, 10),
|
||||||
Name: o.platform,
|
Name: o.platform,
|
||||||
Balance: balance,
|
Balance: balance,
|
||||||
CardsAvail: cards,
|
CardsAvail: cards,
|
||||||
CpuCoreHours: cpuHours,
|
CpuCoreHours: cpuHours,
|
||||||
}
|
}
|
||||||
|
|
||||||
return resourceStats, nil
|
return resourceStats, nil
|
||||||
|
|
|
@ -26,6 +26,8 @@ import (
|
||||||
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/utils"
|
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/utils"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
|
"sync"
|
||||||
|
"time"
|
||||||
)
|
)
|
||||||
|
|
||||||
const (
|
const (
|
||||||
|
@ -266,96 +268,144 @@ func (s *ShuguangAi) QuerySpecs(ctx context.Context) (interface{}, error) {
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *ShuguangAi) GetResourceStats(ctx context.Context) (*collector.ResourceStats, error) {
|
func (s *ShuguangAi) GetResourceStats(ctx context.Context) (*collector.ResourceStats, error) {
|
||||||
//balance
|
var wg sync.WaitGroup
|
||||||
userReq := &hpcAC.GetUserInfoReq{}
|
wg.Add(4)
|
||||||
userinfo, err := s.aCRpc.GetUserInfo(ctx, userReq)
|
var cBalance = make(chan float64)
|
||||||
if err != nil {
|
var cMemTotal = make(chan float64)
|
||||||
return nil, err
|
var cTotalCpu = make(chan int64)
|
||||||
}
|
|
||||||
balance, _ := strconv.ParseFloat(userinfo.Data.AccountBalance, 64)
|
|
||||||
|
|
||||||
//resource limit
|
resourceStats := &collector.ResourceStats{
|
||||||
limitReq := &hpcAC.QueueReq{}
|
ClusterId: strconv.FormatInt(s.participantId, 10),
|
||||||
limitResp, err := s.aCRpc.QueryUserQuotasLimit(ctx, limitReq)
|
Name: s.platform,
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
}
|
||||||
totalCpu := limitResp.Data.AccountMaxCpu
|
|
||||||
totalDcu := limitResp.Data.AccountMaxDcu
|
|
||||||
|
|
||||||
//disk
|
|
||||||
//diskReq := &hpcAC.ParaStorQuotaReq{}
|
|
||||||
//diskResp, err := s.aCRpc.ParaStorQuota(ctx, diskReq)
|
|
||||||
//if err != nil {
|
|
||||||
// return nil, err
|
|
||||||
//}
|
|
||||||
//
|
|
||||||
//totalDisk := common.RoundFloat(diskResp.Data[0].Threshold*KB*KB*KB, 3)
|
|
||||||
//availDisk := common.RoundFloat((diskResp.Data[0].Threshold-diskResp.Data[0].Usage)*KB*KB*KB, 3)
|
|
||||||
|
|
||||||
//memory
|
|
||||||
nodeResp, err := s.aCRpc.GetNodeResources(ctx, nil)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
memSize := common.RoundFloat(float64(nodeResp.Data.MemorySize)*KB*KB, 3) // MB to BYTES
|
|
||||||
|
|
||||||
//resources being occupied
|
|
||||||
memberJobResp, err := s.aCRpc.GetMemberJobs(ctx, nil)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
var CpuCoreAvail int64
|
|
||||||
var MemAvail float64
|
|
||||||
if len(memberJobResp.Data) != 0 {
|
|
||||||
CpuCoreAvail = totalCpu
|
|
||||||
MemAvail = memSize
|
|
||||||
} else {
|
|
||||||
var cpuCoreUsed int64
|
|
||||||
var memUsed float64
|
|
||||||
for _, datum := range memberJobResp.Data {
|
|
||||||
cpuCoreUsed += datum.CpuCore
|
|
||||||
}
|
|
||||||
memUsed = float64(cpuCoreUsed * 2 * KB * KB * KB) // 2 GB per cpu core
|
|
||||||
if cpuCoreUsed > totalCpu {
|
|
||||||
CpuCoreAvail = 0
|
|
||||||
} else {
|
|
||||||
CpuCoreAvail = totalCpu - cpuCoreUsed
|
|
||||||
}
|
|
||||||
if memUsed > memSize {
|
|
||||||
MemAvail = 0
|
|
||||||
} else {
|
|
||||||
MemAvail = memSize - memUsed
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
//usable hours
|
|
||||||
var cards []*collector.Card
|
|
||||||
cardHours := common.RoundFloat(balance/DCUPRICEPERHOUR, 3)
|
|
||||||
cpuHours := common.RoundFloat(balance/CPUCOREPRICEPERHOUR, 3)
|
|
||||||
|
|
||||||
dcu := &collector.Card{
|
dcu := &collector.Card{
|
||||||
Platform: SHUGUANGAI,
|
Platform: SHUGUANGAI,
|
||||||
Type: CARD,
|
Type: CARD,
|
||||||
Name: DCU,
|
Name: DCU,
|
||||||
TOpsAtFp16: DCU_TOPS,
|
TOpsAtFp16: DCU_TOPS,
|
||||||
CardHours: cardHours,
|
|
||||||
CardNum: int32(totalDcu),
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//balance
|
||||||
|
go func() {
|
||||||
|
userReq := &hpcAC.GetUserInfoReq{}
|
||||||
|
userinfo, err := s.aCRpc.GetUserInfo(ctx, userReq)
|
||||||
|
if err != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
balance, _ := strconv.ParseFloat(userinfo.Data.AccountBalance, 64)
|
||||||
|
resourceStats.Balance = balance
|
||||||
|
|
||||||
|
cBalance <- balance
|
||||||
|
}()
|
||||||
|
|
||||||
|
//resource limit
|
||||||
|
go func() {
|
||||||
|
limitReq := &hpcAC.QueueReq{}
|
||||||
|
limitResp, err := s.aCRpc.QueryUserQuotasLimit(ctx, limitReq)
|
||||||
|
if err != nil {
|
||||||
|
wg.Done()
|
||||||
|
return
|
||||||
|
}
|
||||||
|
totalCpu := limitResp.Data.AccountMaxCpu
|
||||||
|
totalDcu := limitResp.Data.AccountMaxDcu
|
||||||
|
|
||||||
|
dcu.CardNum = int32(totalDcu)
|
||||||
|
resourceStats.CpuCoreTotal = totalCpu
|
||||||
|
|
||||||
|
cTotalCpu <- totalCpu
|
||||||
|
wg.Done()
|
||||||
|
}()
|
||||||
|
|
||||||
|
//disk
|
||||||
|
go func() {
|
||||||
|
diskReq := &hpcAC.ParaStorQuotaReq{}
|
||||||
|
diskResp, err := s.aCRpc.ParaStorQuota(ctx, diskReq)
|
||||||
|
if err != nil {
|
||||||
|
wg.Done()
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
totalDisk := common.RoundFloat(diskResp.Data[0].Threshold*KB*KB*KB, 3)
|
||||||
|
availDisk := common.RoundFloat((diskResp.Data[0].Threshold-diskResp.Data[0].Usage)*KB*KB*KB, 3)
|
||||||
|
|
||||||
|
resourceStats.DiskTotal = totalDisk
|
||||||
|
resourceStats.DiskAvail = availDisk
|
||||||
|
wg.Done()
|
||||||
|
}()
|
||||||
|
|
||||||
|
//memory
|
||||||
|
go func() {
|
||||||
|
nodeResp, err := s.aCRpc.GetNodeResources(ctx, nil)
|
||||||
|
if err != nil {
|
||||||
|
wg.Done()
|
||||||
|
return
|
||||||
|
}
|
||||||
|
memSize := common.RoundFloat(float64(nodeResp.Data.MemorySize)*KB*KB, 3) // MB to BYTES
|
||||||
|
|
||||||
|
resourceStats.MemTotal = memSize
|
||||||
|
cMemTotal <- memSize
|
||||||
|
wg.Done()
|
||||||
|
}()
|
||||||
|
|
||||||
|
//resources being occupied
|
||||||
|
go func() {
|
||||||
|
memSize := <-cMemTotal
|
||||||
|
totalCpu := <-cTotalCpu
|
||||||
|
memberJobResp, err := s.aCRpc.GetMemberJobs(ctx, nil)
|
||||||
|
if err != nil {
|
||||||
|
wg.Done()
|
||||||
|
return
|
||||||
|
}
|
||||||
|
var cpuCoreAvail int64
|
||||||
|
var memAvail float64
|
||||||
|
if len(memberJobResp.Data) != 0 {
|
||||||
|
cpuCoreAvail = totalCpu
|
||||||
|
memAvail = memSize
|
||||||
|
} else {
|
||||||
|
var cpuCoreUsed int64
|
||||||
|
var memUsed float64
|
||||||
|
for _, datum := range memberJobResp.Data {
|
||||||
|
cpuCoreUsed += datum.CpuCore
|
||||||
|
}
|
||||||
|
memUsed = float64(cpuCoreUsed * 2 * KB * KB * KB) // 2 GB per cpu core
|
||||||
|
if cpuCoreUsed > totalCpu {
|
||||||
|
cpuCoreAvail = 0
|
||||||
|
} else {
|
||||||
|
cpuCoreAvail = totalCpu - cpuCoreUsed
|
||||||
|
}
|
||||||
|
if memUsed > memSize {
|
||||||
|
memAvail = 0
|
||||||
|
} else {
|
||||||
|
memAvail = memSize - memUsed
|
||||||
|
}
|
||||||
|
}
|
||||||
|
resourceStats.CpuCoreAvail = cpuCoreAvail
|
||||||
|
resourceStats.MemAvail = memAvail
|
||||||
|
wg.Done()
|
||||||
|
}()
|
||||||
|
|
||||||
|
//usable hours
|
||||||
|
var balance float64
|
||||||
|
|
||||||
|
select {
|
||||||
|
case v := <-cBalance:
|
||||||
|
balance = v
|
||||||
|
case <-time.After(2 * time.Second):
|
||||||
|
return nil, errors.New("get balance rpc call failed")
|
||||||
|
}
|
||||||
|
|
||||||
|
var cards []*collector.Card
|
||||||
|
cardHours := common.RoundFloat(balance/DCUPRICEPERHOUR, 3)
|
||||||
|
cpuHours := common.RoundFloat(balance/CPUCOREPRICEPERHOUR, 3)
|
||||||
|
|
||||||
|
dcu.CardHours = cardHours
|
||||||
|
resourceStats.CpuCoreHours = cpuHours
|
||||||
|
|
||||||
|
wg.Wait()
|
||||||
|
|
||||||
cards = append(cards, dcu)
|
cards = append(cards, dcu)
|
||||||
resourceStats := &collector.ResourceStats{
|
resourceStats.CardsAvail = cards
|
||||||
ParticipantId: s.participantId,
|
|
||||||
Name: s.platform,
|
|
||||||
Balance: balance,
|
|
||||||
CpuCoreTotal: totalCpu,
|
|
||||||
CpuCoreAvail: CpuCoreAvail,
|
|
||||||
//DiskTotal: totalDisk,
|
|
||||||
//DiskAvail: availDisk,
|
|
||||||
MemTotal: memSize,
|
|
||||||
MemAvail: MemAvail,
|
|
||||||
CpuCoreHours: cpuHours,
|
|
||||||
CardsAvail: cards,
|
|
||||||
}
|
|
||||||
|
|
||||||
return resourceStats, nil
|
return resourceStats, nil
|
||||||
}
|
}
|
||||||
|
|
|
@ -16,6 +16,7 @@ package storeLink
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
|
"fmt"
|
||||||
"github.com/pkg/errors"
|
"github.com/pkg/errors"
|
||||||
"gitlink.org.cn/JointCloud/pcm-ac/hpcAC"
|
"gitlink.org.cn/JointCloud/pcm-ac/hpcAC"
|
||||||
"gitlink.org.cn/JointCloud/pcm-ac/hpcacclient"
|
"gitlink.org.cn/JointCloud/pcm-ac/hpcacclient"
|
||||||
|
@ -127,21 +128,29 @@ func GetResourceTypes() []string {
|
||||||
return resourceTypes
|
return resourceTypes
|
||||||
}
|
}
|
||||||
|
|
||||||
func GetDatasetsNames(ctx context.Context, collectorMap *map[string]collector.AiCollector) ([]string, error) {
|
func GetDatasetsNames(ctx context.Context, collectorMap map[string]collector.AiCollector) ([]string, error) {
|
||||||
var wg sync.WaitGroup
|
var wg sync.WaitGroup
|
||||||
var errCh = make(chan error, len(*collectorMap))
|
var errCh = make(chan interface{}, len(collectorMap))
|
||||||
var errs []error
|
var errs []interface{}
|
||||||
var names []string
|
var names []string
|
||||||
var mu sync.Mutex
|
var mu sync.Mutex
|
||||||
colMap := *collectorMap
|
colMap := collectorMap
|
||||||
for _, col := range colMap {
|
for s, col := range colMap {
|
||||||
wg.Add(1)
|
wg.Add(1)
|
||||||
c := col
|
c := col
|
||||||
|
id := s
|
||||||
go func() {
|
go func() {
|
||||||
var ns []string
|
var ns []string
|
||||||
specs, err := c.GetDatasetsSpecs(ctx)
|
specs, err := c.GetDatasetsSpecs(ctx)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
errCh <- err
|
e := struct {
|
||||||
|
err error
|
||||||
|
clusterId string
|
||||||
|
}{
|
||||||
|
err: err,
|
||||||
|
clusterId: id,
|
||||||
|
}
|
||||||
|
errCh <- e
|
||||||
wg.Done()
|
wg.Done()
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
@ -167,34 +176,54 @@ func GetDatasetsNames(ctx context.Context, collectorMap *map[string]collector.Ai
|
||||||
wg.Wait()
|
wg.Wait()
|
||||||
close(errCh)
|
close(errCh)
|
||||||
|
|
||||||
|
if len(errs) == len(colMap) {
|
||||||
|
return nil, errors.New("get DatasetsNames failed")
|
||||||
|
}
|
||||||
|
|
||||||
for e := range errCh {
|
for e := range errCh {
|
||||||
errs = append(errs, e)
|
errs = append(errs, e)
|
||||||
}
|
}
|
||||||
|
|
||||||
if len(errs) != 0 {
|
if len(errs) != 0 {
|
||||||
return nil, errors.New("get DatasetsNames failed")
|
var msg string
|
||||||
|
for _, err := range errs {
|
||||||
|
e := (err).(struct {
|
||||||
|
err error
|
||||||
|
clusterId string
|
||||||
|
})
|
||||||
|
msg += fmt.Sprintf("clusterId: %v , error: %v \n", e.clusterId, e.err.Error())
|
||||||
|
}
|
||||||
|
return nil, errors.New(msg)
|
||||||
}
|
}
|
||||||
|
|
||||||
names = common.RemoveDuplicates(names)
|
names = common.RemoveDuplicates(names)
|
||||||
return names, nil
|
return names, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func GetAlgorithms(ctx context.Context, collectorMap *map[string]collector.AiCollector, resourceType string, taskType string, dataset string) ([]string, error) {
|
func GetAlgorithms(ctx context.Context, collectorMap map[string]collector.AiCollector, resourceType string, taskType string, dataset string) ([]string, error) {
|
||||||
var names []string
|
var names []string
|
||||||
var wg sync.WaitGroup
|
var wg sync.WaitGroup
|
||||||
var errCh = make(chan error, len(*collectorMap))
|
var errCh = make(chan interface{}, len(collectorMap))
|
||||||
var errs []error
|
var errs []interface{}
|
||||||
var mu sync.Mutex
|
var mu sync.Mutex
|
||||||
|
|
||||||
colMap := *collectorMap
|
colMap := collectorMap
|
||||||
for _, col := range colMap {
|
for s, col := range colMap {
|
||||||
wg.Add(1)
|
wg.Add(1)
|
||||||
c := col
|
c := col
|
||||||
|
id := s
|
||||||
go func() {
|
go func() {
|
||||||
var ns []string
|
var ns []string
|
||||||
algorithms, err := c.GetAlgorithms(ctx)
|
algorithms, err := c.GetAlgorithms(ctx)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
errCh <- err
|
e := struct {
|
||||||
|
err error
|
||||||
|
clusterId string
|
||||||
|
}{
|
||||||
|
err: err,
|
||||||
|
clusterId: id,
|
||||||
|
}
|
||||||
|
errCh <- e
|
||||||
wg.Done()
|
wg.Done()
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
@ -240,10 +269,22 @@ func GetAlgorithms(ctx context.Context, collectorMap *map[string]collector.AiCol
|
||||||
errs = append(errs, e)
|
errs = append(errs, e)
|
||||||
}
|
}
|
||||||
|
|
||||||
if len(errs) != 0 {
|
if len(errs) == len(colMap) {
|
||||||
return nil, errors.New("get Algorithms failed")
|
return nil, errors.New("get Algorithms failed")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if len(errs) != 0 {
|
||||||
|
var msg string
|
||||||
|
for _, err := range errs {
|
||||||
|
e := (err).(struct {
|
||||||
|
err error
|
||||||
|
clusterId string
|
||||||
|
})
|
||||||
|
msg += fmt.Sprintf("clusterId: %v , error: %v \n", e.clusterId, e.err.Error())
|
||||||
|
}
|
||||||
|
return nil, errors.New(msg)
|
||||||
|
}
|
||||||
|
|
||||||
names = common.RemoveDuplicates(names)
|
names = common.RemoveDuplicates(names)
|
||||||
return names, nil
|
return names, nil
|
||||||
}
|
}
|
||||||
|
|
|
@ -116,24 +116,28 @@ func NewServiceContext(c config.Config) *ServiceContext {
|
||||||
})
|
})
|
||||||
|
|
||||||
// scheduler
|
// scheduler
|
||||||
octopusRpc := octopusclient.NewOctopus(zrpc.MustNewClient(c.OctopusRpcConf))
|
//octopusRpc := octopusclient.NewOctopus(zrpc.MustNewClient(c.OctopusRpcConf))
|
||||||
aCRpc := hpcacclient.NewHpcAC(zrpc.MustNewClient(c.ACRpcConf))
|
//aCRpc := hpcacclient.NewHpcAC(zrpc.MustNewClient(c.ACRpcConf))
|
||||||
modelArtsRpc := modelartsservice.NewModelArtsService(zrpc.MustNewClient(c.ModelArtsRpcConf))
|
//modelArtsRpc := modelartsservice.NewModelArtsService(zrpc.MustNewClient(c.ModelArtsRpcConf))
|
||||||
modelArtsImgRpc := imagesservice.NewImagesService(zrpc.MustNewClient(c.ModelArtsImgRpcConf))
|
//modelArtsImgRpc := imagesservice.NewImagesService(zrpc.MustNewClient(c.ModelArtsImgRpcConf))
|
||||||
storage := &database.AiStorage{DbEngin: dbEngin}
|
storage := &database.AiStorage{DbEngin: dbEngin}
|
||||||
aiExecutor, resourceCollector := service.InitAiClusterMap(octopusRpc, modelArtsRpc, modelArtsImgRpc, aCRpc, storage)
|
aiService, err := service.NewAiService(&c, storage)
|
||||||
scheduler := scheduler.NewSchdlr(resourceCollector, storage, aiExecutor)
|
if err != nil {
|
||||||
|
logx.Error(err.Error())
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
scheduler := scheduler.NewSchdlr(aiService, storage)
|
||||||
|
|
||||||
return &ServiceContext{
|
return &ServiceContext{
|
||||||
Cron: cron.New(cron.WithSeconds()),
|
Cron: cron.New(cron.WithSeconds()),
|
||||||
DbEngin: dbEngin,
|
DbEngin: dbEngin,
|
||||||
Config: c,
|
Config: c,
|
||||||
RedisClient: redisClient,
|
RedisClient: redisClient,
|
||||||
ModelArtsRpc: modelArtsRpc,
|
ModelArtsRpc: modelartsservice.NewModelArtsService(zrpc.MustNewClient(c.ModelArtsRpcConf)),
|
||||||
ModelArtsImgRpc: modelArtsImgRpc,
|
ModelArtsImgRpc: imagesservice.NewImagesService(zrpc.MustNewClient(c.ModelArtsImgRpcConf)),
|
||||||
CephRpc: cephclient.NewCeph(zrpc.MustNewClient(c.CephRpcConf)),
|
CephRpc: cephclient.NewCeph(zrpc.MustNewClient(c.CephRpcConf)),
|
||||||
ACRpc: aCRpc,
|
ACRpc: hpcacclient.NewHpcAC(zrpc.MustNewClient(c.ACRpcConf)),
|
||||||
OctopusRpc: octopusRpc,
|
OctopusRpc: octopusclient.NewOctopus(zrpc.MustNewClient(c.OctopusRpcConf)),
|
||||||
OpenstackRpc: openstackclient.NewOpenstack(zrpc.MustNewClient(c.OpenstackRpcConf)),
|
OpenstackRpc: openstackclient.NewOpenstack(zrpc.MustNewClient(c.OpenstackRpcConf)),
|
||||||
K8sRpc: kubernetesclient.NewKubernetes(zrpc.MustNewClient(c.K8sNativeConf)),
|
K8sRpc: kubernetesclient.NewKubernetes(zrpc.MustNewClient(c.K8sNativeConf)),
|
||||||
MonitorClient: make(map[int64]tracker.Prometheus),
|
MonitorClient: make(map[int64]tracker.Prometheus),
|
||||||
|
|
|
@ -131,40 +131,22 @@ type TaskYaml struct {
|
||||||
}
|
}
|
||||||
|
|
||||||
type CommitVmTaskReq struct {
|
type CommitVmTaskReq struct {
|
||||||
Name string `json:"name"`
|
Name string `json:"name"`
|
||||||
NsID string `json:"nsID"`
|
NsID string `json:"nsID"`
|
||||||
Replicas int64 `json:"replicas,optional"`
|
Replicas int64 `json:"replicas,optional"`
|
||||||
MatchLabels map[string]string `json:"matchLabels,optional"`
|
MatchLabels map[string]string `json:"matchLabels,optional"`
|
||||||
Servers []ServerCommit `json:"servers,optional"`
|
AdapterId string `json:"adapterId,optional"`
|
||||||
Platform string `json:"platform,optional"`
|
ClusterType string `json:"clusterType,optional"`
|
||||||
AdapterId string `json:"adapterId,optional"`
|
CreateMulServer []CreateMulDomainServer `json:"createMulServer,optional"`
|
||||||
ClusterType string `json:"clusterType,optional"`
|
|
||||||
}
|
}
|
||||||
|
|
||||||
type ServerCommit struct {
|
type CreateMulDomainServer struct {
|
||||||
AllCardRunTime string `json:"allCardRunTime"`
|
Platform string `json:"platform,optional"`
|
||||||
FlavorRef string `json:"flavorRef,optional"`
|
Name string `json:"name,optional"`
|
||||||
Name string `json:"name,optional"`
|
Min_count int64 `json:"min_count,optional"`
|
||||||
ImageRef string `json:"imageRef,optional"`
|
ImageRef string `json:"imageRef,optional"`
|
||||||
AccessIPv4 string `json:"accessIPv4,optional"`
|
FlavorRef string `json:"flavorRef,optional"`
|
||||||
AccessIPv6 string `json:"accessIPv6,optional"`
|
Uuid string `json:"uuid,optional"`
|
||||||
AdminPass string `json:"adminPass,optional"`
|
|
||||||
Availability_zone string `json:"availability_zone,optional"`
|
|
||||||
Key_name string `json:"key_name,optional"`
|
|
||||||
Hostname string `json:"hostname,optional"`
|
|
||||||
Host string `json:"host,optional"`
|
|
||||||
Networks []Networks `json:"networks,optional"`
|
|
||||||
}
|
|
||||||
|
|
||||||
type Networks struct {
|
|
||||||
Uuid string `json:"uuid,optional"`
|
|
||||||
Port string `json:"port,optional"`
|
|
||||||
Fixed_ip string `json:"fixed_ip,optional"`
|
|
||||||
Tag string `json:"tag,optional"`
|
|
||||||
}
|
|
||||||
|
|
||||||
type Block_device_mapping_v2Commit struct {
|
|
||||||
Uuid string `json:"uuid,optional"`
|
|
||||||
}
|
}
|
||||||
|
|
||||||
type CommitVmTaskResp struct {
|
type CommitVmTaskResp struct {
|
||||||
|
@ -5309,13 +5291,15 @@ type ScheduleResp struct {
|
||||||
type ScheduleResult struct {
|
type ScheduleResult struct {
|
||||||
ClusterId string `json:"clusterId"`
|
ClusterId string `json:"clusterId"`
|
||||||
TaskId string `json:"taskId"`
|
TaskId string `json:"taskId"`
|
||||||
|
Strategy string `json:"strategy"`
|
||||||
Replica int32 `json:"replica"`
|
Replica int32 `json:"replica"`
|
||||||
Msg string `json:"msg"`
|
Msg string `json:"msg"`
|
||||||
}
|
}
|
||||||
|
|
||||||
type AiOption struct {
|
type AiOption struct {
|
||||||
TaskName string `json:"taskName"`
|
TaskName string `json:"taskName"`
|
||||||
AiClusterId string `json:"aiClusterId,optional"`
|
AdapterId string `json:"adapterId"`
|
||||||
|
AiClusterIds []string `json:"aiClusterIds"`
|
||||||
ResourceType string `json:"resourceType"`
|
ResourceType string `json:"resourceType"`
|
||||||
Tops float64 `json:"Tops,optional"`
|
Tops float64 `json:"Tops,optional"`
|
||||||
TaskType string `json:"taskType"`
|
TaskType string `json:"taskType"`
|
||||||
|
@ -5336,6 +5320,10 @@ type AiTaskTypesResp struct {
|
||||||
TaskTypes []string `json:"taskTypes"`
|
TaskTypes []string `json:"taskTypes"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type AiDatasetsReq struct {
|
||||||
|
AdapterId string `path:"adapterId"`
|
||||||
|
}
|
||||||
|
|
||||||
type AiDatasetsResp struct {
|
type AiDatasetsResp struct {
|
||||||
Datasets []string `json:"datasets"`
|
Datasets []string `json:"datasets"`
|
||||||
}
|
}
|
||||||
|
@ -5345,6 +5333,7 @@ type AiStrategyResp struct {
|
||||||
}
|
}
|
||||||
|
|
||||||
type AiAlgorithmsReq struct {
|
type AiAlgorithmsReq struct {
|
||||||
|
AdapterId string `path:"adapterId"`
|
||||||
ResourceType string `path:"resourceType"`
|
ResourceType string `path:"resourceType"`
|
||||||
TaskType string `path:"taskType"`
|
TaskType string `path:"taskType"`
|
||||||
Dataset string `path:"dataset"`
|
Dataset string `path:"dataset"`
|
||||||
|
@ -5451,7 +5440,10 @@ type VmInfo struct {
|
||||||
BlockUuid string `json:"block_uuid,omitempty"`
|
BlockUuid string `json:"block_uuid,omitempty"`
|
||||||
SourceType string `json:"source_type,omitempty"`
|
SourceType string `json:"source_type,omitempty"`
|
||||||
DeleteOnTermination bool `json:"delete_on_termination,omitempty"`
|
DeleteOnTermination bool `json:"delete_on_termination,omitempty"`
|
||||||
State string `json:"state,omitempty"`
|
Status string `json:"status,omitempty"`
|
||||||
|
MinCount string `json:"min_count,omitempty"`
|
||||||
|
Platform string `json:"platform,omitempty"`
|
||||||
|
Uuid string `json:"uuid,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
type PushTaskInfoReq struct {
|
type PushTaskInfoReq struct {
|
||||||
|
@ -5468,7 +5460,37 @@ type PushTaskInfoResp struct {
|
||||||
}
|
}
|
||||||
|
|
||||||
type PushResourceInfoReq struct {
|
type PushResourceInfoReq struct {
|
||||||
AdapterId int64 `json:"adapterId"`
|
AdapterId int64 `json:"adapterId"`
|
||||||
|
ResourceStats []ResourceStats `json:"resourceStats"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type PushResourceInfoResp struct {
|
||||||
|
Code int64 `json:"code"`
|
||||||
|
Msg string `json:"msg"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type ResourceStats struct {
|
||||||
|
ClusterId int64 `json:"clusterId"`
|
||||||
|
Name string `json:"name"`
|
||||||
|
CpuCoreAvail int64 `json:"cpuCoreAvail"`
|
||||||
|
CpuCoreTotal int64 `json:"cpuCoreTotal"`
|
||||||
|
MemAvail float64 `json:"memAvail"`
|
||||||
|
MemTotal float64 `json:"memTotal"`
|
||||||
|
DiskAvail float64 `json:"diskAvail"`
|
||||||
|
DiskTotal float64 `json:"diskTotal"`
|
||||||
|
GpuAvail int64 `json:"gpuAvail"`
|
||||||
|
CardsAvail []*Card `json:"cardsAvail"`
|
||||||
|
CpuCoreHours float64 `json:"cpuCoreHours"`
|
||||||
|
Balance float64 `json:"balance"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type Card struct {
|
||||||
|
Platform string `json:"platform"`
|
||||||
|
Type string `json:"type"`
|
||||||
|
Name string `json:"name"`
|
||||||
|
TOpsAtFp16 float64 `json:"TOpsAtFp16"`
|
||||||
|
CardHours float64 `json:"cardHours"`
|
||||||
|
CardNum int32 `json:"cardNum"`
|
||||||
}
|
}
|
||||||
|
|
||||||
type CreateAlertRuleReq struct {
|
type CreateAlertRuleReq struct {
|
||||||
|
|
2
go.mod
2
go.mod
|
@ -2,6 +2,8 @@ module gitlink.org.cn/JointCloud/pcm-coordinator
|
||||||
|
|
||||||
go 1.21
|
go 1.21
|
||||||
|
|
||||||
|
retract v0.1.20-0.20240319015239-6ae13da05255
|
||||||
|
|
||||||
require (
|
require (
|
||||||
github.com/JCCE-nudt/zero-contrib/zrpc/registry/nacos v0.0.0-20230419021610-13bbc83fbc3c
|
github.com/JCCE-nudt/zero-contrib/zrpc/registry/nacos v0.0.0-20230419021610-13bbc83fbc3c
|
||||||
github.com/Masterminds/squirrel v1.5.4
|
github.com/Masterminds/squirrel v1.5.4
|
||||||
|
|
|
@ -0,0 +1,24 @@
|
||||||
|
package models
|
||||||
|
|
||||||
|
import "github.com/zeromicro/go-zero/core/stores/sqlx"
|
||||||
|
|
||||||
|
var _ TaskVmModel = (*customTaskVmModel)(nil)
|
||||||
|
|
||||||
|
type (
|
||||||
|
// TaskVmModel is an interface to be customized, add more methods here,
|
||||||
|
// and implement the added methods in customTaskVmModel.
|
||||||
|
TaskVmModel interface {
|
||||||
|
taskVmModel
|
||||||
|
}
|
||||||
|
|
||||||
|
customTaskVmModel struct {
|
||||||
|
*defaultTaskVmModel
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
// NewTaskVmModel returns a model for the database table.
|
||||||
|
func NewTaskVmModel(conn sqlx.SqlConn) TaskVmModel {
|
||||||
|
return &customTaskVmModel{
|
||||||
|
defaultTaskVmModel: newTaskVmModel(conn),
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,107 @@
|
||||||
|
// Code generated by goctl. DO NOT EDIT.
|
||||||
|
|
||||||
|
package models
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"database/sql"
|
||||||
|
"fmt"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/zeromicro/go-zero/core/stores/builder"
|
||||||
|
"github.com/zeromicro/go-zero/core/stores/sqlc"
|
||||||
|
"github.com/zeromicro/go-zero/core/stores/sqlx"
|
||||||
|
"github.com/zeromicro/go-zero/core/stringx"
|
||||||
|
)
|
||||||
|
|
||||||
|
var (
|
||||||
|
taskVmFieldNames = builder.RawFieldNames(&TaskVm{})
|
||||||
|
taskVmRows = strings.Join(taskVmFieldNames, ",")
|
||||||
|
taskVmRowsExpectAutoSet = strings.Join(stringx.Remove(taskVmFieldNames, "`id`", "`create_at`", "`create_time`", "`created_at`", "`update_at`", "`update_time`", "`updated_at`"), ",")
|
||||||
|
taskVmRowsWithPlaceHolder = strings.Join(stringx.Remove(taskVmFieldNames, "`id`", "`create_at`", "`create_time`", "`created_at`", "`update_at`", "`update_time`", "`updated_at`"), "=?,") + "=?"
|
||||||
|
)
|
||||||
|
|
||||||
|
type (
|
||||||
|
taskVmModel interface {
|
||||||
|
Insert(ctx context.Context, data *TaskVm) (sql.Result, error)
|
||||||
|
FindOne(ctx context.Context, id int64) (*TaskVm, error)
|
||||||
|
Update(ctx context.Context, data *TaskVm) error
|
||||||
|
Delete(ctx context.Context, id int64) error
|
||||||
|
}
|
||||||
|
|
||||||
|
defaultTaskVmModel struct {
|
||||||
|
conn sqlx.SqlConn
|
||||||
|
table string
|
||||||
|
}
|
||||||
|
|
||||||
|
TaskVm struct {
|
||||||
|
Id int64 `db:"id"` // id
|
||||||
|
ParticipantId int64 `db:"participant_id"` // p端id
|
||||||
|
TaskId int64 `db:"task_id"` // 任务id
|
||||||
|
Name string `db:"name"` // 虚拟机名称
|
||||||
|
AdapterId int64 `db:"adapter_id"` // 执行任务的适配器id
|
||||||
|
ClusterId int64 `db:"cluster_id"` // 执行任务的集群id
|
||||||
|
FlavorRef string `db:"flavor_ref"` // 规格索引
|
||||||
|
ImageRef string `db:"image_ref"` // 镜像索引
|
||||||
|
Status string `db:"status"` // 状态
|
||||||
|
Platform string `db:"platform"` // 平台
|
||||||
|
Description string `db:"description"` // 描述
|
||||||
|
AvailabilityZone string `db:"availability_zone"`
|
||||||
|
MinCount int64 `db:"min_count"` // 数量
|
||||||
|
Uuid string `db:"uuid"` // 网络id
|
||||||
|
StartTime string `db:"start_time"` // 开始时间
|
||||||
|
RunningTime string `db:"running_time"` // 运行时间
|
||||||
|
Result string `db:"result"` // 运行结果
|
||||||
|
DeletedAt string `db:"deleted_at"` // 删除时间
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
func newTaskVmModel(conn sqlx.SqlConn) *defaultTaskVmModel {
|
||||||
|
return &defaultTaskVmModel{
|
||||||
|
conn: conn,
|
||||||
|
table: "`task_vm`",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *defaultTaskVmModel) withSession(session sqlx.Session) *defaultTaskVmModel {
|
||||||
|
return &defaultTaskVmModel{
|
||||||
|
conn: sqlx.NewSqlConnFromSession(session),
|
||||||
|
table: "`task_vm`",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *defaultTaskVmModel) Delete(ctx context.Context, id int64) error {
|
||||||
|
query := fmt.Sprintf("delete from %s where `id` = ?", m.table)
|
||||||
|
_, err := m.conn.ExecCtx(ctx, query, id)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *defaultTaskVmModel) FindOne(ctx context.Context, id int64) (*TaskVm, error) {
|
||||||
|
query := fmt.Sprintf("select %s from %s where `id` = ? limit 1", taskVmRows, m.table)
|
||||||
|
var resp TaskVm
|
||||||
|
err := m.conn.QueryRowCtx(ctx, &resp, query, id)
|
||||||
|
switch err {
|
||||||
|
case nil:
|
||||||
|
return &resp, nil
|
||||||
|
case sqlc.ErrNotFound:
|
||||||
|
return nil, ErrNotFound
|
||||||
|
default:
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *defaultTaskVmModel) Insert(ctx context.Context, data *TaskVm) (sql.Result, error) {
|
||||||
|
query := fmt.Sprintf("insert into %s (%s) values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)", m.table, taskVmRowsExpectAutoSet)
|
||||||
|
ret, err := m.conn.ExecCtx(ctx, query, data.ParticipantId, data.TaskId, data.Name, data.AdapterId, data.ClusterId, data.FlavorRef, data.ImageRef, data.Status, data.Platform, data.Description, data.AvailabilityZone, data.MinCount, data.Uuid, data.StartTime, data.RunningTime, data.Result, data.DeletedAt)
|
||||||
|
return ret, err
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *defaultTaskVmModel) Update(ctx context.Context, data *TaskVm) error {
|
||||||
|
query := fmt.Sprintf("update %s set %s where `id` = ?", m.table, taskVmRowsWithPlaceHolder)
|
||||||
|
_, err := m.conn.ExecCtx(ctx, query, data.ParticipantId, data.TaskId, data.Name, data.AdapterId, data.ClusterId, data.FlavorRef, data.ImageRef, data.Status, data.Platform, data.Description, data.AvailabilityZone, data.MinCount, data.Uuid, data.StartTime, data.RunningTime, data.Result, data.DeletedAt, data.Id)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *defaultTaskVmModel) tableName() string {
|
||||||
|
return m.table
|
||||||
|
}
|
Loading…
Reference in New Issue