Merge branch 'refs/heads/upstream'
Former-commit-id: aabbc1719d3a845983eefacd0614322f32bd2246
This commit is contained in:
commit
e195bb4d89
|
@ -6,6 +6,7 @@ type Options struct {
|
|||
}
|
||||
type Client interface {
|
||||
Task(TaskOptions) (Task, error)
|
||||
Notice(NoticeOptions) (Notice, error)
|
||||
}
|
||||
|
||||
func NewClient(options Options) (Client, error) {
|
||||
|
|
|
@ -19,6 +19,11 @@ func (c *client) Task(options TaskOptions) (Task, error) {
|
|||
return task, nil
|
||||
}
|
||||
|
||||
func (c *client) Notice(options NoticeOptions) (Notice, error) {
|
||||
notice, _ := newNotice(c, &options)
|
||||
return notice, nil
|
||||
}
|
||||
|
||||
func newClient(options Options) (Client, error) {
|
||||
//init dbEngine
|
||||
dbEngin, _ := gorm.Open(mysql.Open(options.DataSource), &gorm.Config{
|
||||
|
|
|
@ -0,0 +1,9 @@
|
|||
package client
|
||||
|
||||
type NoticeOptions struct {
|
||||
pushNoticeReq PushNoticeReq
|
||||
}
|
||||
|
||||
type Notice interface {
|
||||
PushNotice(pushNoticeReq PushNoticeReq) (*PushNoticeResp, error)
|
||||
}
|
|
@ -0,0 +1,46 @@
|
|||
package client
|
||||
|
||||
import (
|
||||
"io/ioutil"
|
||||
"k8s.io/apimachinery/pkg/util/json"
|
||||
"log"
|
||||
"net/http"
|
||||
"strings"
|
||||
"sync"
|
||||
)
|
||||
|
||||
type notice struct {
|
||||
sync.RWMutex
|
||||
client *client
|
||||
options *NoticeOptions
|
||||
log log.Logger
|
||||
}
|
||||
|
||||
func newNotice(client *client, options *NoticeOptions) (*notice, error) {
|
||||
notice := ¬ice{
|
||||
RWMutex: sync.RWMutex{},
|
||||
client: client,
|
||||
options: options,
|
||||
log: log.Logger{},
|
||||
}
|
||||
return notice, nil
|
||||
}
|
||||
|
||||
func (n *notice) PushNotice(pushNoticeReq PushNoticeReq) (*PushNoticeResp, error) {
|
||||
|
||||
url := n.client.url + "/pcm/v1/core/pushNotice"
|
||||
method := "GET"
|
||||
jsonStr, _ := json.Marshal(pushNoticeReq)
|
||||
payload := strings.NewReader(string(jsonStr))
|
||||
|
||||
client := &http.Client{}
|
||||
req, _ := http.NewRequest(method, url, payload)
|
||||
req.Header.Add("Content-Type", "application/json")
|
||||
res, _ := client.Do(req)
|
||||
defer res.Body.Close()
|
||||
|
||||
body, _ := ioutil.ReadAll(res.Body)
|
||||
var resp PushNoticeResp
|
||||
json.Unmarshal(body, &resp)
|
||||
return &resp, nil
|
||||
}
|
|
@ -9,5 +9,5 @@ type TaskOptions struct {
|
|||
type Task interface {
|
||||
PullTaskInfo(pullTaskInfoReq PullTaskInfoReq) (*PullTaskInfoResp, error)
|
||||
PushTaskInfo(pushTaskInfoReq PushTaskInfoReq) (*PushTaskInfoResp, error)
|
||||
PushResourceInfo(pushResourceInfoReq PushResourceInfoReq) error
|
||||
PushResourceInfo(pushResourceInfoReq PushResourceInfoReq) (*PushResourceInfoResp, error)
|
||||
}
|
||||
|
|
|
@ -50,8 +50,8 @@ func (t *task) PushTaskInfo(pushTaskInfoReq PushTaskInfoReq) (*PushTaskInfoResp,
|
|||
|
||||
url := t.client.url + "/pcm/v1/core/pushTaskInfo"
|
||||
method := "POST"
|
||||
infoReq := PullTaskInfoReq{AdapterId: pushTaskInfoReq.AdapterId}
|
||||
jsonStr, _ := json.Marshal(infoReq)
|
||||
//infoReq := PullTaskInfoReq{AdapterId: pushTaskInfoReq.AdapterId}
|
||||
jsonStr, _ := json.Marshal(pushTaskInfoReq)
|
||||
payload := strings.NewReader(string(jsonStr))
|
||||
|
||||
client := &http.Client{}
|
||||
|
@ -66,7 +66,22 @@ func (t *task) PushTaskInfo(pushTaskInfoReq PushTaskInfoReq) (*PushTaskInfoResp,
|
|||
return &resp, nil
|
||||
}
|
||||
|
||||
func (t *task) PushResourceInfo(pushResourceInfoReq PushResourceInfoReq) error {
|
||||
//TODO implement me
|
||||
panic("implement me")
|
||||
func (t *task) PushResourceInfo(pushResourceInfoReq PushResourceInfoReq) (*PushResourceInfoResp, error) {
|
||||
|
||||
url := t.client.url + "/pcm/v1/core/pushResourceInfo"
|
||||
method := "POST"
|
||||
//infoReq := PushResourceInfoReq{AdapterId: pushResourceInfoReq.AdapterId}
|
||||
jsonStr, _ := json.Marshal(pushResourceInfoReq)
|
||||
payload := strings.NewReader(string(jsonStr))
|
||||
|
||||
client := &http.Client{}
|
||||
req, _ := http.NewRequest(method, url, payload)
|
||||
req.Header.Add("Content-Type", "application/json")
|
||||
res, _ := client.Do(req)
|
||||
defer res.Body.Close()
|
||||
|
||||
body, _ := ioutil.ReadAll(res.Body)
|
||||
var resp PushResourceInfoResp
|
||||
json.Unmarshal(body, &resp)
|
||||
return &resp, nil
|
||||
}
|
||||
|
|
|
@ -25,12 +25,46 @@ type PushTaskInfoReq struct {
|
|||
}
|
||||
|
||||
type PushTaskInfoResp struct {
|
||||
Code int64
|
||||
Msg string
|
||||
Code int64 `json:"code"`
|
||||
Msg string `json:"msg"`
|
||||
}
|
||||
|
||||
type PushResourceInfoReq struct {
|
||||
AdapterId int64 `json:"adapterId"`
|
||||
AdapterId int64 `json:"adapterId"`
|
||||
ResourceStats []ResourceStats `json:"resourceStats"`
|
||||
}
|
||||
|
||||
type PushResourceInfoResp struct {
|
||||
Code int64 `json:"code"`
|
||||
Msg string `json:"msg"`
|
||||
}
|
||||
type NoticeInfo struct {
|
||||
AdapterId int64 `json:"adapterId"`
|
||||
AdapterName string `json:"adapterName"`
|
||||
ClusterId int64 `json:"clusterId"`
|
||||
ClusterName string `json:"clusterName"`
|
||||
NoticeType string `json:"noticeType"`
|
||||
TaskName string `json:"taskName"`
|
||||
Incident string `json:"incident"`
|
||||
CreatedTime time.Time `json:"createdTime"`
|
||||
}
|
||||
|
||||
type ListNoticeReq struct {
|
||||
}
|
||||
|
||||
type ListNoticeResp struct {
|
||||
Code int64 `json:"code"`
|
||||
Msg string `json:"msg"`
|
||||
Data []NoticeInfo `json:"data"`
|
||||
}
|
||||
|
||||
type PushNoticeReq struct {
|
||||
NoticeInfo NoticeInfo `json:"noticeInfo"`
|
||||
}
|
||||
|
||||
type PushNoticeResp struct {
|
||||
Code int64 `json:"code"`
|
||||
Msg string `json:"msg"`
|
||||
}
|
||||
|
||||
type HpcInfo struct {
|
||||
|
@ -119,5 +153,30 @@ type VmInfo struct {
|
|||
BlockUuid string `json:"block_uuid,omitempty"`
|
||||
SourceType string `json:"source_type,omitempty"`
|
||||
DeleteOnTermination bool `json:"delete_on_termination,omitempty"`
|
||||
State string `json:"state,omitempty"`
|
||||
Status string `json:"Status,omitempty"`
|
||||
StartTime string `json:"startTime,omitempty"`
|
||||
}
|
||||
|
||||
type ResourceStats struct {
|
||||
ClusterId int64 `json:"clusterId"`
|
||||
Name string `json:"name"`
|
||||
CpuCoreAvail int64 `json:"cpuCoreAvail"`
|
||||
CpuCoreTotal int64 `json:"cpuCoreTotal"`
|
||||
MemAvail float64 `json:"memAvail"`
|
||||
MemTotal float64 `json:"memTotal"`
|
||||
DiskAvail float64 `json:"diskAvail"`
|
||||
DiskTotal float64 `json:"diskTotal"`
|
||||
GpuAvail int64 `json:"gpuAvail"`
|
||||
CardsAvail []*Card `json:"cardsAvail"`
|
||||
CpuCoreHours float64 `json:"cpuCoreHours"`
|
||||
Balance float64 `json:"balance"`
|
||||
}
|
||||
|
||||
type Card struct {
|
||||
Platform string `json:"platform"`
|
||||
Type string `json:"type"`
|
||||
Name string `json:"name"`
|
||||
TOpsAtFp16 float64 `json:"TOpsAtFp16"`
|
||||
CardHours float64 `json:"cardHours"`
|
||||
CardNum int32 `json:"cardNum"`
|
||||
}
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -1,126 +0,0 @@
|
|||
syntax = "v1"
|
||||
|
||||
info(
|
||||
title: "type title here"
|
||||
desc: "type desc here"
|
||||
author: "type author here"
|
||||
email: "type email here"
|
||||
version: "type version here"
|
||||
)
|
||||
|
||||
type PullTaskInfoReq {
|
||||
AdapterId int64 `form:"adapterId"`
|
||||
}
|
||||
|
||||
type PullTaskInfoResp struct {
|
||||
HpcInfoList []*HpcInfo `json:"HpcInfoList,omitempty"`
|
||||
CloudInfoList []*CloudInfo `json:"CloudInfoList,omitempty"`
|
||||
AiInfoList []*AiInfo `json:"AiInfoList,omitempty"`
|
||||
VmInfoList []*VmInfo `json:"VmInfoList,omitempty"`
|
||||
}
|
||||
|
||||
type HpcInfo struct {
|
||||
Id int64 `json:"id"` // id
|
||||
TaskId int64 `json:"task_id"` // 任务id
|
||||
JobId string `json:"job_id"` // 作业id(在第三方系统中的作业id)
|
||||
AdapterId int64 `json:"adapter_id"` // 执行任务的适配器id
|
||||
ClusterId int64 `json:"cluster_id"` // 执行任务的集群id
|
||||
ClusterType string `json:"cluster_type"` // 执行任务的集群类型
|
||||
Name string `json:"name"` // 名称
|
||||
Status string `json:"status"` // 状态
|
||||
CmdScript string `json:"cmd_script"`
|
||||
StartTime string `json:"start_time"` // 开始时间
|
||||
RunningTime int64 `json:"running_time"` // 运行时间
|
||||
DerivedEs string `json:"derived_es"`
|
||||
Cluster string `json:"cluster"`
|
||||
BlockId int64 `json:"block_id"`
|
||||
AllocNodes int64 `json:"alloc_nodes"`
|
||||
AllocCpu int64 `json:"alloc_cpu"`
|
||||
CardCount int64 `json:"card_count"` // 卡数
|
||||
Version string `json:"version"`
|
||||
Account string `json:"account"`
|
||||
WorkDir string `json:"work_dir"` // 工作路径
|
||||
AssocId int64 `json:"assoc_id"`
|
||||
ExitCode int64 `json:"exit_code"`
|
||||
WallTime string `json:"wall_time"` // 最大运行时间
|
||||
Result string `json:"result"` // 运行结果
|
||||
DeletedAt string `json:"deleted_at"` // 删除时间
|
||||
YamlString string `json:"yaml_string"`
|
||||
AppType string `json:"app_type"` // 应用类型
|
||||
AppName string `json:"app_name"` // 应用名称
|
||||
Queue string `json:"queue"` // 队列名称
|
||||
SubmitType string `json:"submit_type"` // cmd(命令行模式)
|
||||
NNode string `json:"n_node"` // 节点个数(当指定该参数时,GAP_NODE_STRING必须为"")
|
||||
StdOutFile string `json:"std_out_file"` // 工作路径/std.err.%j
|
||||
StdErrFile string `json:"std_err_file"` // 工作路径/std.err.%j
|
||||
StdInput string `json:"std_input"`
|
||||
Environment string `json:"environment"`
|
||||
DeletedFlag int64 `json:"deleted_flag"` // 是否删除(0-否,1-是)
|
||||
CreatedBy int64 `json:"created_by"` // 创建人
|
||||
CreatedTime string `json:"created_time"` // 创建时间
|
||||
UpdatedBy int64 `json:"updated_by"` // 更新人
|
||||
UpdatedTime string `json:"updated_time"` // 更新时间
|
||||
}
|
||||
|
||||
type CloudInfo struct {
|
||||
Participant int64 `json:"participant,omitempty"`
|
||||
Id int64 `json:"id,omitempty"`
|
||||
TaskId int64 `json:"taskId,omitempty"`
|
||||
ApiVersion string `json:"apiVersion,omitempty"`
|
||||
Kind string `json:"kind,omitempty"`
|
||||
Namespace string `json:"namespace,omitempty"`
|
||||
Name string `json:"name,omitempty"`
|
||||
Status string `json:"status,omitempty"`
|
||||
StartTime string `json:"startTime,omitempty"`
|
||||
RunningTime int64 `json:"runningTime,omitempty"`
|
||||
Result string `json:"result,omitempty"`
|
||||
YamlString string `json:"yamlString,omitempty"`
|
||||
}
|
||||
|
||||
type AiInfo struct {
|
||||
ParticipantId int64 `json:"participantId,omitempty"`
|
||||
TaskId int64 `json:"taskId,omitempty"`
|
||||
ProjectId string `json:"project_id,omitempty"`
|
||||
Name string `json:"name,omitempty"`
|
||||
Status string `json:"status,omitempty"`
|
||||
StartTime string `json:"startTime,omitempty"`
|
||||
RunningTime int64 `json:"runningTime,omitempty"`
|
||||
Result string `json:"result,omitempty"`
|
||||
JobId string `json:"jobId,omitempty"`
|
||||
CreateTime string `json:"createTime,omitempty"`
|
||||
ImageUrl string `json:"imageUrl,omitempty"`
|
||||
Command string `json:"command,omitempty"`
|
||||
FlavorId string `json:"flavorId,omitempty"`
|
||||
SubscriptionId string `json:"subscriptionId,omitempty"`
|
||||
ItemVersionId string `json:"itemVersionId,omitempty"`
|
||||
}
|
||||
|
||||
type VmInfo struct {
|
||||
ParticipantId int64 `json:"participantId,omitempty"`
|
||||
TaskId int64 `json:"taskId,omitempty"`
|
||||
Name string `json:"name,omitempty"`
|
||||
FlavorRef string `json:"flavor_ref,omitempty"`
|
||||
ImageRef string `json:"image_ref,omitempty"`
|
||||
NetworkUuid string `json:"network_uuid,omitempty"`
|
||||
BlockUuid string `json:"block_uuid,omitempty"`
|
||||
SourceType string `json:"source_type,omitempty"`
|
||||
DeleteOnTermination bool `json:"delete_on_termination,omitempty"`
|
||||
State string `json:"state,omitempty"`
|
||||
}
|
||||
|
||||
type PushTaskInfoReq struct {
|
||||
AdapterId int64 `json:"adapterId"`
|
||||
HpcInfoList []*HpcInfo `json:"hpcInfoList"`
|
||||
CloudInfoList []*CloudInfo `json:"cloudInfoList"`
|
||||
AiInfoList []*AiInfo `json:"aiInfoList"`
|
||||
VmInfoList []*VmInfo `json:"vmInfoList"`
|
||||
}
|
||||
|
||||
type PushTaskInfoResp struct {
|
||||
Code int64 `json:"code"`
|
||||
Msg string `json:"msg"`
|
||||
}
|
||||
|
||||
type PushResourceInfoReq struct {
|
||||
AdapterId int64 `json:"adapterId"`
|
||||
}
|
|
@ -9,7 +9,6 @@ import (
|
|||
"cloud/pcm-cloud.api"
|
||||
"storelink/pcm-storelink.api"
|
||||
"schedule/pcm-schedule.api"
|
||||
"participant/pcm-participant.api"
|
||||
"monitoring/pcm-monitoring.api"
|
||||
)
|
||||
|
||||
|
@ -111,14 +110,26 @@ service pcm {
|
|||
@handler metricsHandler
|
||||
get /core/metrics
|
||||
|
||||
@doc "provided to participant to pull task info from core"
|
||||
@doc "provide for adapter to pull task info from core"
|
||||
@handler pullTaskInfoHandler
|
||||
get /core/pullTaskInfo (PullTaskInfoReq) returns (PullTaskInfoResp)
|
||||
|
||||
@doc "provided to participant to push task info to core"
|
||||
@doc "provide for adapter to push task info to core"
|
||||
@handler pushTaskInfoHandler
|
||||
post /core/pushTaskInfo (PushTaskInfoReq) returns (PushTaskInfoResp)
|
||||
|
||||
@doc "provide for adapter to push resource info to core"
|
||||
@handler pushResourceInfoHandler
|
||||
post /core/pushResourceInfo (PushResourceInfoReq) returns (PushResourceInfoResp)
|
||||
|
||||
@doc "provide for adapter to push notice info to core"
|
||||
@handler pushNoticeHandler
|
||||
post /core/pushNotice (PushNoticeReq) returns (PushNoticeResp)
|
||||
|
||||
@doc "list notice"
|
||||
@handler listNoticeHandler
|
||||
get /core/listNotice (ListNoticeReq) returns (ListNoticeResp)
|
||||
|
||||
@doc "paging queries the task list"
|
||||
@handler pageListTaskHandler
|
||||
get /core/task/list (pageTaskReq) returns(PageResult)
|
||||
|
@ -146,6 +157,10 @@ service pcm {
|
|||
@handler jobHandler
|
||||
get /hpc/job (hpcJobReq) returns (hpcJobResp)
|
||||
|
||||
@doc "超算资源总览"
|
||||
@handler resourceHandler
|
||||
get /hpc/resource (hpcResourceReq) returns (hpcResourceResp)
|
||||
|
||||
@doc "超算查询资产列表"
|
||||
@handler queueAssetsHandler
|
||||
get /hpc/queueAssets returns (QueueAssetsResp)
|
||||
|
@ -895,13 +910,13 @@ service pcm {
|
|||
get /schedule/ai/getTaskTypes returns (AiTaskTypesResp)
|
||||
|
||||
@handler ScheduleGetDatasetsHandler
|
||||
get /schedule/ai/getDatasets returns (AiDatasetsResp)
|
||||
get /schedule/ai/getDatasets/:adapterId (AiDatasetsReq) returns (AiDatasetsResp)
|
||||
|
||||
@handler ScheduleGetStrategyHandler
|
||||
get /schedule/ai/getStrategies returns (AiStrategyResp)
|
||||
|
||||
@handler ScheduleGetAlgorithmsHandler
|
||||
get /schedule/ai/getAlgorithms/:resourceType/:taskType/:dataset (AiAlgorithmsReq) returns (AiAlgorithmsResp)
|
||||
get /schedule/ai/getAlgorithms/:adapterId/:resourceType/:taskType/:dataset (AiAlgorithmsReq) returns (AiAlgorithmsResp)
|
||||
|
||||
@handler ScheduleSubmitHandler
|
||||
post /schedule/submit (ScheduleReq) returns (ScheduleResp)
|
||||
|
|
|
@ -19,13 +19,15 @@ type (
|
|||
ScheduleResult {
|
||||
ClusterId string `json:"clusterId"`
|
||||
TaskId string `json:"taskId"`
|
||||
Strategy string `json:"strategy"`
|
||||
Replica int32 `json:"replica"`
|
||||
Msg string `json:"msg"`
|
||||
}
|
||||
|
||||
AiOption {
|
||||
TaskName string `json:"taskName"`
|
||||
AiClusterId string `json:"aiClusterId,optional"`
|
||||
AdapterId string `json:"adapterId"`
|
||||
AiClusterIds []string `json:"aiClusterIds"`
|
||||
ResourceType string `json:"resourceType"`
|
||||
Tops float64 `json:"Tops,optional"`
|
||||
TaskType string `json:"taskType"`
|
||||
|
@ -46,6 +48,10 @@ type (
|
|||
TaskTypes []string `json:"taskTypes"`
|
||||
}
|
||||
|
||||
AiDatasetsReq {
|
||||
AdapterId string `path:"adapterId"`
|
||||
}
|
||||
|
||||
AiDatasetsResp {
|
||||
Datasets []string `json:"datasets"`
|
||||
}
|
||||
|
@ -55,6 +61,7 @@ type (
|
|||
}
|
||||
|
||||
AiAlgorithmsReq {
|
||||
AdapterId string `path:"adapterId"`
|
||||
ResourceType string `path:"resourceType"`
|
||||
TaskType string `path:"taskType"`
|
||||
Dataset string `path:"dataset"`
|
||||
|
|
|
@ -0,0 +1,28 @@
|
|||
package core
|
||||
|
||||
import (
|
||||
clientCore "gitlink.org.cn/JointCloud/pcm-coordinator/api/client"
|
||||
"net/http"
|
||||
|
||||
"github.com/zeromicro/go-zero/rest/httpx"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/logic/core"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
|
||||
)
|
||||
|
||||
func ListNoticeHandler(svcCtx *svc.ServiceContext) http.HandlerFunc {
|
||||
return func(w http.ResponseWriter, r *http.Request) {
|
||||
var req clientCore.ListNoticeReq
|
||||
if err := httpx.Parse(r, &req); err != nil {
|
||||
httpx.ErrorCtx(r.Context(), w, err)
|
||||
return
|
||||
}
|
||||
|
||||
l := core.NewListNoticeLogic(r.Context(), svcCtx)
|
||||
resp, err := l.ListNotice(&req)
|
||||
if err != nil {
|
||||
httpx.ErrorCtx(r.Context(), w, err)
|
||||
} else {
|
||||
httpx.OkJsonCtx(r.Context(), w, resp)
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,28 @@
|
|||
package core
|
||||
|
||||
import (
|
||||
clientCore "gitlink.org.cn/JointCloud/pcm-coordinator/api/client"
|
||||
"net/http"
|
||||
|
||||
"github.com/zeromicro/go-zero/rest/httpx"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/logic/core"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
|
||||
)
|
||||
|
||||
func PushNoticeHandler(svcCtx *svc.ServiceContext) http.HandlerFunc {
|
||||
return func(w http.ResponseWriter, r *http.Request) {
|
||||
var req clientCore.PushNoticeReq
|
||||
if err := httpx.Parse(r, &req); err != nil {
|
||||
httpx.ErrorCtx(r.Context(), w, err)
|
||||
return
|
||||
}
|
||||
|
||||
l := core.NewPushNoticeLogic(r.Context(), svcCtx)
|
||||
resp, err := l.PushNotice(&req)
|
||||
if err != nil {
|
||||
httpx.ErrorCtx(r.Context(), w, err)
|
||||
} else {
|
||||
httpx.OkJsonCtx(r.Context(), w, resp)
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,28 @@
|
|||
package core
|
||||
|
||||
import (
|
||||
clientCore "gitlink.org.cn/JointCloud/pcm-coordinator/api/client"
|
||||
"net/http"
|
||||
|
||||
"github.com/zeromicro/go-zero/rest/httpx"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/logic/core"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
|
||||
)
|
||||
|
||||
func PushResourceInfoHandler(svcCtx *svc.ServiceContext) http.HandlerFunc {
|
||||
return func(w http.ResponseWriter, r *http.Request) {
|
||||
var req clientCore.PushResourceInfoReq
|
||||
if err := httpx.Parse(r, &req); err != nil {
|
||||
httpx.ErrorCtx(r.Context(), w, err)
|
||||
return
|
||||
}
|
||||
|
||||
l := core.NewPushResourceInfoLogic(r.Context(), svcCtx)
|
||||
resp, err := l.PushResourceInfo(&req)
|
||||
if err != nil {
|
||||
httpx.ErrorCtx(r.Context(), w, err)
|
||||
} else {
|
||||
httpx.OkJsonCtx(r.Context(), w, resp)
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,28 @@
|
|||
package hpc
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
|
||||
"github.com/zeromicro/go-zero/rest/httpx"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/logic/hpc"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types"
|
||||
)
|
||||
|
||||
func ResourceHandler(svcCtx *svc.ServiceContext) http.HandlerFunc {
|
||||
return func(w http.ResponseWriter, r *http.Request) {
|
||||
var req types.HpcResourceReq
|
||||
if err := httpx.Parse(r, &req); err != nil {
|
||||
httpx.ErrorCtx(r.Context(), w, err)
|
||||
return
|
||||
}
|
||||
|
||||
l := hpc.NewResourceLogic(r.Context(), svcCtx)
|
||||
resp, err := l.Resource(&req)
|
||||
if err != nil {
|
||||
httpx.ErrorCtx(r.Context(), w, err)
|
||||
} else {
|
||||
httpx.OkJsonCtx(r.Context(), w, resp)
|
||||
}
|
||||
}
|
||||
}
|
|
@ -140,6 +140,21 @@ func RegisterHandlers(server *rest.Server, serverCtx *svc.ServiceContext) {
|
|||
Path: "/core/pushTaskInfo",
|
||||
Handler: core.PushTaskInfoHandler(serverCtx),
|
||||
},
|
||||
{
|
||||
Method: http.MethodPost,
|
||||
Path: "/core/pushResourceInfo",
|
||||
Handler: core.PushResourceInfoHandler(serverCtx),
|
||||
},
|
||||
{
|
||||
Method: http.MethodPost,
|
||||
Path: "/core/pushNotice",
|
||||
Handler: core.PushNoticeHandler(serverCtx),
|
||||
},
|
||||
{
|
||||
Method: http.MethodGet,
|
||||
Path: "/core/listNotice",
|
||||
Handler: core.ListNoticeHandler(serverCtx),
|
||||
},
|
||||
{
|
||||
Method: http.MethodGet,
|
||||
Path: "/core/task/list",
|
||||
|
@ -171,6 +186,11 @@ func RegisterHandlers(server *rest.Server, serverCtx *svc.ServiceContext) {
|
|||
Path: "/hpc/job",
|
||||
Handler: hpc.JobHandler(serverCtx),
|
||||
},
|
||||
{
|
||||
Method: http.MethodGet,
|
||||
Path: "/hpc/resource",
|
||||
Handler: hpc.ResourceHandler(serverCtx),
|
||||
},
|
||||
{
|
||||
Method: http.MethodGet,
|
||||
Path: "/hpc/queueAssets",
|
||||
|
@ -1107,7 +1127,7 @@ func RegisterHandlers(server *rest.Server, serverCtx *svc.ServiceContext) {
|
|||
},
|
||||
{
|
||||
Method: http.MethodGet,
|
||||
Path: "/schedule/ai/getDatasets",
|
||||
Path: "/schedule/ai/getDatasets/:adapterId",
|
||||
Handler: schedule.ScheduleGetDatasetsHandler(serverCtx),
|
||||
},
|
||||
{
|
||||
|
@ -1117,7 +1137,7 @@ func RegisterHandlers(server *rest.Server, serverCtx *svc.ServiceContext) {
|
|||
},
|
||||
{
|
||||
Method: http.MethodGet,
|
||||
Path: "/schedule/ai/getAlgorithms/:resourceType/:taskType/:dataset",
|
||||
Path: "/schedule/ai/getAlgorithms/:adapterId/:resourceType/:taskType/:dataset",
|
||||
Handler: schedule.ScheduleGetAlgorithmsHandler(serverCtx),
|
||||
},
|
||||
{
|
||||
|
|
|
@ -1,16 +1,24 @@
|
|||
package schedule
|
||||
|
||||
import (
|
||||
"github.com/zeromicro/go-zero/rest/httpx"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/logic/schedule"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/repository/result"
|
||||
"net/http"
|
||||
)
|
||||
|
||||
func ScheduleGetDatasetsHandler(svcCtx *svc.ServiceContext) http.HandlerFunc {
|
||||
return func(w http.ResponseWriter, r *http.Request) {
|
||||
var req types.AiDatasetsReq
|
||||
if err := httpx.Parse(r, &req); err != nil {
|
||||
result.ParamErrorResult(r, w, err)
|
||||
return
|
||||
}
|
||||
|
||||
l := schedule.NewScheduleGetDatasetsLogic(r.Context(), svcCtx)
|
||||
resp, err := l.ScheduleGetDatasets()
|
||||
resp, err := l.ScheduleGetDatasets(&req)
|
||||
result.HttpResult(r, w, resp, err)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -2,13 +2,12 @@ package core
|
|||
|
||||
import (
|
||||
"context"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/mqs"
|
||||
"fmt"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/pkg/response"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/constants"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/models"
|
||||
tool "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/utils"
|
||||
"math/rand"
|
||||
"time"
|
||||
|
||||
"github.com/zeromicro/go-zero/core/logx"
|
||||
|
@ -35,7 +34,6 @@ func (l *CommitVmTaskLogic) CommitVmTask(req *types.CommitVmTaskReq) (resp *type
|
|||
Status: constants.Saved,
|
||||
Name: req.Name,
|
||||
CommitTime: time.Now(),
|
||||
NsID: req.NsID,
|
||||
}
|
||||
// Save task data to database
|
||||
tx := l.svcCtx.DbEngin.Create(&taskModel)
|
||||
|
@ -43,28 +41,38 @@ func (l *CommitVmTaskLogic) CommitVmTask(req *types.CommitVmTaskReq) (resp *type
|
|||
return nil, tx.Error
|
||||
}
|
||||
|
||||
var clusterIds []int64
|
||||
l.svcCtx.DbEngin.Raw("SELECT id FROM `t_cluster` where adapter_id = ? and label = ?", req.AdapterId, req.ClusterType).Scan(&clusterIds)
|
||||
for _, CreateMulServer := range req.CreateMulServer {
|
||||
fmt.Println("", req.CreateMulServer)
|
||||
var clusterIds []int64
|
||||
l.svcCtx.DbEngin.Raw("SELECT id FROM `t_cluster` where adapter_id = ? and label = ?", req.AdapterId, req.ClusterType).Scan(&clusterIds)
|
||||
|
||||
if len(clusterIds) == 0 || clusterIds == nil {
|
||||
return nil, nil
|
||||
if len(clusterIds) == 0 || clusterIds == nil {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
vmInfo := models.TaskVm{
|
||||
TaskId: taskModel.Id,
|
||||
ClusterId: clusterIds[rand.Intn(len(clusterIds))],
|
||||
Name: taskModel.Name,
|
||||
Status: "Saved",
|
||||
StartTime: time.Now().String(),
|
||||
MinCount: CreateMulServer.Min_count,
|
||||
ImageRef: CreateMulServer.ImageRef,
|
||||
FlavorRef: CreateMulServer.FlavorRef,
|
||||
Uuid: CreateMulServer.Uuid,
|
||||
Platform: CreateMulServer.Platform,
|
||||
}
|
||||
|
||||
tx = l.svcCtx.DbEngin.Create(&vmInfo)
|
||||
if tx.Error != nil {
|
||||
return nil, tx.Error
|
||||
}
|
||||
resp = &types.CommitVmTaskResp{
|
||||
Code: 200,
|
||||
Msg: "success",
|
||||
TaskId: taskModel.Id,
|
||||
}
|
||||
}
|
||||
|
||||
vm := models.Vm{}
|
||||
tool.Convert(req, &vm)
|
||||
mqInfo := response.TaskInfo{
|
||||
TaskId: taskModel.Id,
|
||||
TaskType: "vm",
|
||||
MatchLabels: req.MatchLabels,
|
||||
NsID: req.NsID,
|
||||
}
|
||||
//req.TaskId = taskModel.Id
|
||||
mqs.InsQueue.Beta.Add(&mqInfo)
|
||||
tx = l.svcCtx.DbEngin.Create(&mqInfo)
|
||||
resp = &types.CommitVmTaskResp{
|
||||
Code: 200,
|
||||
Msg: "success",
|
||||
TaskId: taskModel.Id,
|
||||
}
|
||||
return resp, nil
|
||||
}
|
||||
|
|
|
@ -0,0 +1,36 @@
|
|||
package core
|
||||
|
||||
import (
|
||||
"context"
|
||||
"github.com/zeromicro/go-zero/core/logx"
|
||||
clientCore "gitlink.org.cn/JointCloud/pcm-coordinator/api/client"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
|
||||
)
|
||||
|
||||
type ListNoticeLogic struct {
|
||||
logx.Logger
|
||||
ctx context.Context
|
||||
svcCtx *svc.ServiceContext
|
||||
}
|
||||
|
||||
func NewListNoticeLogic(ctx context.Context, svcCtx *svc.ServiceContext) *ListNoticeLogic {
|
||||
return &ListNoticeLogic{
|
||||
Logger: logx.WithContext(ctx),
|
||||
ctx: ctx,
|
||||
svcCtx: svcCtx,
|
||||
}
|
||||
}
|
||||
|
||||
func (l *ListNoticeLogic) ListNotice(req *clientCore.ListNoticeReq) (*clientCore.ListNoticeResp, error) {
|
||||
var notices []clientCore.NoticeInfo
|
||||
|
||||
var resp clientCore.ListNoticeResp
|
||||
|
||||
l.svcCtx.DbEngin.Raw("select * from t_notice order by created_time desc").Scan(¬ices)
|
||||
for _, notice := range notices {
|
||||
resp.Data = append(resp.Data, notice)
|
||||
}
|
||||
resp.Code = 200
|
||||
resp.Msg = "success"
|
||||
return &resp, nil
|
||||
}
|
|
@ -67,6 +67,13 @@ func (l *PullTaskInfoLogic) PullTaskInfo(req *clientCore.PullTaskInfoReq) (*clie
|
|||
return nil, err
|
||||
}
|
||||
utils.Convert(aiModelList, &resp.AiInfoList)
|
||||
case 3:
|
||||
var vmModelList []models.TaskVm
|
||||
err := findModelList(req.AdapterId, l.svcCtx.DbEngin, &vmModelList)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
utils.Convert(vmModelList, &resp.VmInfoList)
|
||||
}
|
||||
return &resp, nil
|
||||
}
|
||||
|
|
|
@ -0,0 +1,31 @@
|
|||
package core
|
||||
|
||||
import (
|
||||
"context"
|
||||
"github.com/zeromicro/go-zero/core/logx"
|
||||
clientCore "gitlink.org.cn/JointCloud/pcm-coordinator/api/client"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
|
||||
)
|
||||
|
||||
type PushNoticeLogic struct {
|
||||
logx.Logger
|
||||
ctx context.Context
|
||||
svcCtx *svc.ServiceContext
|
||||
}
|
||||
|
||||
func NewPushNoticeLogic(ctx context.Context, svcCtx *svc.ServiceContext) *PushNoticeLogic {
|
||||
return &PushNoticeLogic{
|
||||
Logger: logx.WithContext(ctx),
|
||||
ctx: ctx,
|
||||
svcCtx: svcCtx,
|
||||
}
|
||||
}
|
||||
|
||||
func (l *PushNoticeLogic) PushNotice(req *clientCore.PushNoticeReq) (resp *clientCore.PushNoticeResp, err error) {
|
||||
result := l.svcCtx.DbEngin.Table("t_notice").Create(&req.NoticeInfo)
|
||||
if result.Error != nil {
|
||||
return nil, result.Error
|
||||
}
|
||||
|
||||
return
|
||||
}
|
|
@ -0,0 +1,28 @@
|
|||
package core
|
||||
|
||||
import (
|
||||
"context"
|
||||
clientCore "gitlink.org.cn/JointCloud/pcm-coordinator/api/client"
|
||||
|
||||
"github.com/zeromicro/go-zero/core/logx"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
|
||||
)
|
||||
|
||||
type PushResourceInfoLogic struct {
|
||||
logx.Logger
|
||||
ctx context.Context
|
||||
svcCtx *svc.ServiceContext
|
||||
}
|
||||
|
||||
func NewPushResourceInfoLogic(ctx context.Context, svcCtx *svc.ServiceContext) *PushResourceInfoLogic {
|
||||
return &PushResourceInfoLogic{
|
||||
Logger: logx.WithContext(ctx),
|
||||
ctx: ctx,
|
||||
svcCtx: svcCtx,
|
||||
}
|
||||
}
|
||||
|
||||
func (l *PushResourceInfoLogic) PushResourceInfo(req *clientCore.PushResourceInfoReq) (resp *clientCore.PushResourceInfoResp, err error) {
|
||||
|
||||
return
|
||||
}
|
|
@ -49,6 +49,12 @@ func (l *PushTaskInfoLogic) PushTaskInfo(req *clientCore.PushTaskInfoReq) (*clie
|
|||
aiInfo.Status, aiInfo.StartTime, aiInfo.ProjectId, aiInfo.JobId, req.AdapterId, aiInfo.TaskId, aiInfo.Name)
|
||||
syncTask(l.svcCtx.DbEngin, aiInfo.TaskId)
|
||||
}
|
||||
case 3:
|
||||
for _, vmInfo := range req.VmInfoList {
|
||||
l.svcCtx.DbEngin.Exec("update task_vm set status = ?,start_time = ? where participant_id = ? and task_id = ? and name = ?",
|
||||
vmInfo.Status, vmInfo.StartTime, req.AdapterId, vmInfo.TaskId, vmInfo.Name)
|
||||
syncTask(l.svcCtx.DbEngin, vmInfo.TaskId)
|
||||
}
|
||||
}
|
||||
|
||||
return &resp, nil
|
||||
|
|
|
@ -0,0 +1,48 @@
|
|||
package hpc
|
||||
|
||||
import (
|
||||
"context"
|
||||
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types"
|
||||
|
||||
"github.com/zeromicro/go-zero/core/logx"
|
||||
)
|
||||
|
||||
type ResourceLogic struct {
|
||||
logx.Logger
|
||||
ctx context.Context
|
||||
svcCtx *svc.ServiceContext
|
||||
}
|
||||
|
||||
func NewResourceLogic(ctx context.Context, svcCtx *svc.ServiceContext) *ResourceLogic {
|
||||
return &ResourceLogic{
|
||||
Logger: logx.WithContext(ctx),
|
||||
ctx: ctx,
|
||||
svcCtx: svcCtx,
|
||||
}
|
||||
}
|
||||
|
||||
func (l *ResourceLogic) Resource(req *types.HpcResourceReq) (resp *types.HpcResourceResp, err error) {
|
||||
|
||||
l.svcCtx.DbEngin.Raw("SELECT th.NAME as job_name,t.description as job_desc,t.commit_time as submit_time,th.STATUS as job_status,ta.name as adapter_name,tc.name as cluster_name,tc.label as cluster_type FROM task_hpc th LEFT JOIN task t ON t.id = th.task_id JOIN t_cluster tc on th.cluster_id = tc.id JOIN t_adapter ta on tc.adapter_id = ta.id")
|
||||
|
||||
hpcResource := types.HPCResource{
|
||||
GPUCardsTotal: 0,
|
||||
CPUCoresTotal: 0,
|
||||
RAMTotal: 0,
|
||||
GPUCardsUsed: 0,
|
||||
CPUCoresUsed: 0,
|
||||
RAMUsed: 0,
|
||||
GPURate: 0,
|
||||
CPURate: 0,
|
||||
RAMRate: 0,
|
||||
}
|
||||
|
||||
resp = &types.HpcResourceResp{
|
||||
Code: 200,
|
||||
Msg: "success",
|
||||
HPCResource: hpcResource,
|
||||
}
|
||||
return resp, nil
|
||||
}
|
|
@ -26,7 +26,7 @@ func NewScheduleGetAlgorithmsLogic(ctx context.Context, svcCtx *svc.ServiceConte
|
|||
|
||||
func (l *ScheduleGetAlgorithmsLogic) ScheduleGetAlgorithms(req *types.AiAlgorithmsReq) (resp *types.AiAlgorithmsResp, err error) {
|
||||
resp = &types.AiAlgorithmsResp{}
|
||||
algorithms, err := storeLink.GetAlgorithms(l.ctx, l.svcCtx.Scheduler.ResourceCollector, req.ResourceType, req.TaskType, req.Dataset)
|
||||
algorithms, err := storeLink.GetAlgorithms(l.ctx, l.svcCtx.Scheduler.AiService.AiCollectorAdapterMap[req.AdapterId], req.ResourceType, req.TaskType, req.Dataset)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
|
|
@ -3,6 +3,7 @@ package schedule
|
|||
import (
|
||||
"context"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/storeLink"
|
||||
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types"
|
||||
|
||||
|
@ -23,9 +24,9 @@ func NewScheduleGetDatasetsLogic(ctx context.Context, svcCtx *svc.ServiceContext
|
|||
}
|
||||
}
|
||||
|
||||
func (l *ScheduleGetDatasetsLogic) ScheduleGetDatasets() (resp *types.AiDatasetsResp, err error) {
|
||||
func (l *ScheduleGetDatasetsLogic) ScheduleGetDatasets(req *types.AiDatasetsReq) (resp *types.AiDatasetsResp, err error) {
|
||||
resp = &types.AiDatasetsResp{}
|
||||
names, err := storeLink.GetDatasetsNames(l.ctx, l.svcCtx.Scheduler.ResourceCollector)
|
||||
names, err := storeLink.GetDatasetsNames(l.ctx, l.svcCtx.Scheduler.AiService.AiCollectorAdapterMap[req.AdapterId])
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
|
|
@ -27,6 +27,7 @@ func NewScheduleSubmitLogic(ctx context.Context, svcCtx *svc.ServiceContext) *Sc
|
|||
func (l *ScheduleSubmitLogic) ScheduleSubmit(req *types.ScheduleReq) (resp *types.ScheduleResp, err error) {
|
||||
resp = &types.ScheduleResp{}
|
||||
opt := &option.AiOption{
|
||||
AdapterId: req.AiOption.AdapterId,
|
||||
ResourceType: req.AiOption.ResourceType,
|
||||
Tops: req.AiOption.Tops,
|
||||
TaskType: req.AiOption.TaskType,
|
||||
|
@ -55,6 +56,7 @@ func (l *ScheduleSubmitLogic) ScheduleSubmit(req *types.ScheduleReq) (resp *type
|
|||
scheResult := &types.ScheduleResult{}
|
||||
scheResult.ClusterId = r.ClusterId
|
||||
scheResult.TaskId = r.TaskId
|
||||
scheResult.Strategy = r.Strategy
|
||||
scheResult.Replica = r.Replica
|
||||
scheResult.Msg = r.Msg
|
||||
resp.Results = append(resp.Results, scheResult)
|
||||
|
|
|
@ -5,9 +5,8 @@ import (
|
|||
)
|
||||
|
||||
type Weight struct {
|
||||
Id int64
|
||||
Id string
|
||||
Weight int32
|
||||
Name string
|
||||
Replica int32
|
||||
}
|
||||
|
||||
|
|
|
@ -33,6 +33,21 @@ func (s *AiStorage) GetClustersByAdapterId(id string) (*types.ClusterListResp, e
|
|||
return &resp, nil
|
||||
}
|
||||
|
||||
func (s *AiStorage) GetAdapterIdsByType(adapterType string) ([]string, error) {
|
||||
var list []types.AdapterInfo
|
||||
var ids []string
|
||||
db := s.DbEngin.Model(&types.AdapterInfo{}).Table("t_adapter")
|
||||
db = db.Where("type = ?", adapterType)
|
||||
err := db.Order("create_time desc").Find(&list).Error
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
for _, info := range list {
|
||||
ids = append(ids, info.Id)
|
||||
}
|
||||
return ids, nil
|
||||
}
|
||||
|
||||
func (s *AiStorage) SaveTask(name string) error {
|
||||
// 构建主任务结构体
|
||||
taskModel := models.Task{
|
||||
|
|
|
@ -20,8 +20,7 @@ import (
|
|||
"github.com/zeromicro/go-zero/core/logx"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/common"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/database"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/service/collector"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/service/executor"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/service"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/strategy"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/pkg/response"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/rpc/client/participantservice"
|
||||
|
@ -32,16 +31,15 @@ import (
|
|||
)
|
||||
|
||||
type Scheduler struct {
|
||||
task *response.TaskInfo
|
||||
participantIds []int64
|
||||
subSchedule SubSchedule
|
||||
dbEngin *gorm.DB
|
||||
result []string //pID:子任务yamlstring 键值对
|
||||
participantRpc participantservice.ParticipantService
|
||||
ResourceCollector *map[string]collector.AiCollector
|
||||
AiStorages *database.AiStorage
|
||||
AiExecutor *map[string]executor.AiExecutor
|
||||
mu sync.RWMutex
|
||||
task *response.TaskInfo
|
||||
participantIds []int64
|
||||
subSchedule SubSchedule
|
||||
dbEngin *gorm.DB
|
||||
result []string //pID:子任务yamlstring 键值对
|
||||
participantRpc participantservice.ParticipantService
|
||||
AiStorages *database.AiStorage
|
||||
AiService *service.AiService
|
||||
mu sync.RWMutex
|
||||
}
|
||||
|
||||
type SubSchedule interface {
|
||||
|
@ -59,8 +57,8 @@ func NewScheduler(subSchedule SubSchedule, val string, dbEngin *gorm.DB, partici
|
|||
return &Scheduler{task: task, subSchedule: subSchedule, dbEngin: dbEngin, participantRpc: participantRpc}, nil
|
||||
}
|
||||
|
||||
func NewSchdlr(resourceCollector *map[string]collector.AiCollector, storages *database.AiStorage, aiExecutor *map[string]executor.AiExecutor) *Scheduler {
|
||||
return &Scheduler{ResourceCollector: resourceCollector, AiStorages: storages, AiExecutor: aiExecutor}
|
||||
func NewSchdlr(aiService *service.AiService, storages *database.AiStorage) *Scheduler {
|
||||
return &Scheduler{AiService: aiService, AiStorages: storages}
|
||||
}
|
||||
|
||||
func (s *Scheduler) SpecifyClusters() {
|
||||
|
|
|
@ -18,6 +18,7 @@ import (
|
|||
"context"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"gitlink.org.cn/JointCloud/pcm-ac/hpcAC"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/schedulers/option"
|
||||
|
@ -28,7 +29,6 @@ import (
|
|||
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/models"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/utils"
|
||||
"gitlink.org.cn/JointCloud/pcm-octopus/octopus"
|
||||
"strconv"
|
||||
"sync"
|
||||
)
|
||||
|
||||
|
@ -43,6 +43,7 @@ type AiScheduler struct {
|
|||
type AiResult struct {
|
||||
TaskId string
|
||||
ClusterId string
|
||||
Strategy string
|
||||
Replica int32
|
||||
Msg string
|
||||
}
|
||||
|
@ -63,9 +64,8 @@ func (as *AiScheduler) GetNewStructForDb(task *response.TaskInfo, resource strin
|
|||
}
|
||||
|
||||
func (as *AiScheduler) PickOptimalStrategy() (strategy.Strategy, error) {
|
||||
if as.option.AiClusterId != "" {
|
||||
// TODO database operation Find
|
||||
return &strategy.SingleAssignment{Cluster: &strategy.AssignedCluster{ParticipantId: 0, Name: "", Replicas: 1}}, nil
|
||||
if len(as.option.ClusterIds) == 1 {
|
||||
return &strategy.SingleAssignment{Cluster: &strategy.AssignedCluster{ClusterId: as.option.ClusterIds[0], Replicas: 1}}, nil
|
||||
}
|
||||
|
||||
resources, err := as.findClustersWithResources()
|
||||
|
@ -79,8 +79,7 @@ func (as *AiScheduler) PickOptimalStrategy() (strategy.Strategy, error) {
|
|||
|
||||
if len(resources) == 1 {
|
||||
var cluster strategy.AssignedCluster
|
||||
cluster.ParticipantId = resources[0].ParticipantId
|
||||
cluster.Name = resources[0].Name
|
||||
cluster.ClusterId = resources[0].ClusterId
|
||||
cluster.Replicas = 1
|
||||
return &strategy.SingleAssignment{Cluster: &cluster}, nil
|
||||
}
|
||||
|
@ -89,7 +88,11 @@ func (as *AiScheduler) PickOptimalStrategy() (strategy.Strategy, error) {
|
|||
|
||||
switch as.option.StrategyName {
|
||||
case strategy.REPLICATION:
|
||||
strategy := strategy.NewReplicationStrategy(¶m.ReplicationParams{Params: params, Replicas: 1})
|
||||
var clusterIds []string
|
||||
for _, resource := range resources {
|
||||
clusterIds = append(clusterIds, resource.ClusterId)
|
||||
}
|
||||
strategy := strategy.NewReplicationStrategy(clusterIds, 1)
|
||||
return strategy, nil
|
||||
case strategy.RESOURCES_PRICING:
|
||||
strategy := strategy.NewPricingStrategy(¶m.ResourcePricingParams{Params: params, Replicas: 1})
|
||||
|
@ -111,32 +114,47 @@ func (as *AiScheduler) AssignTask(clusters []*strategy.AssignedCluster) (interfa
|
|||
return nil, errors.New("clusters is nil")
|
||||
}
|
||||
|
||||
for i := len(clusters) - 1; i >= 0; i-- {
|
||||
if clusters[i].Replicas == 0 {
|
||||
clusters = append(clusters[:i], clusters[i+1:]...)
|
||||
}
|
||||
}
|
||||
|
||||
if len(clusters) == 0 {
|
||||
return nil, errors.New("clusters is nil")
|
||||
}
|
||||
|
||||
var wg sync.WaitGroup
|
||||
var results []*AiResult
|
||||
var errs []error
|
||||
var errs []interface{}
|
||||
var ch = make(chan *AiResult, len(clusters))
|
||||
var errCh = make(chan error, len(clusters))
|
||||
var errCh = make(chan interface{}, len(clusters))
|
||||
|
||||
executorMap := *as.AiExecutor
|
||||
executorMap := as.AiService.AiExecutorAdapterMap[as.option.AdapterId]
|
||||
for _, cluster := range clusters {
|
||||
c := cluster
|
||||
if cluster.Replicas == 0 {
|
||||
continue
|
||||
}
|
||||
wg.Add(1)
|
||||
go func() {
|
||||
opt, _ := cloneAiOption(as.option)
|
||||
resp, err := executorMap[c.Name].Execute(as.ctx, opt)
|
||||
resp, err := executorMap[c.ClusterId].Execute(as.ctx, opt)
|
||||
|
||||
if err != nil {
|
||||
errCh <- err
|
||||
e := struct {
|
||||
err error
|
||||
clusterId string
|
||||
}{
|
||||
err: err,
|
||||
clusterId: c.ClusterId,
|
||||
}
|
||||
errCh <- e
|
||||
wg.Done()
|
||||
return
|
||||
}
|
||||
|
||||
result, _ := convertType(resp)
|
||||
result.Replica = c.Replicas
|
||||
result.ClusterId = strconv.FormatInt(c.ParticipantId, 10)
|
||||
result.ClusterId = c.ClusterId
|
||||
result.Strategy = as.option.StrategyName
|
||||
|
||||
ch <- result
|
||||
wg.Done()
|
||||
|
@ -150,10 +168,29 @@ func (as *AiScheduler) AssignTask(clusters []*strategy.AssignedCluster) (interfa
|
|||
errs = append(errs, e)
|
||||
}
|
||||
|
||||
if len(errs) != 0 {
|
||||
if len(errs) == len(clusters) {
|
||||
return nil, errors.New("submit task failed")
|
||||
}
|
||||
|
||||
if len(errs) != 0 {
|
||||
var msg string
|
||||
for _, err := range errs {
|
||||
e := (err).(struct {
|
||||
err error
|
||||
clusterId string
|
||||
})
|
||||
msg += fmt.Sprintf("clusterId: %v , error: %v \n", e.clusterId, e.err.Error())
|
||||
}
|
||||
for s := range ch {
|
||||
if s.Msg != "" {
|
||||
msg += fmt.Sprintf("clusterId: %v , error: %v \n", s.ClusterId, s.Msg)
|
||||
} else {
|
||||
msg += fmt.Sprintf("clusterId: %v , submitted successfully, taskId: %v \n", s.ClusterId, s.TaskId)
|
||||
}
|
||||
}
|
||||
return nil, errors.New(msg)
|
||||
}
|
||||
|
||||
for s := range ch {
|
||||
// TODO: database operation
|
||||
results = append(results, s)
|
||||
|
@ -164,19 +201,28 @@ func (as *AiScheduler) AssignTask(clusters []*strategy.AssignedCluster) (interfa
|
|||
|
||||
func (as *AiScheduler) findClustersWithResources() ([]*collector.ResourceStats, error) {
|
||||
var wg sync.WaitGroup
|
||||
var ch = make(chan *collector.ResourceStats, len(*as.ResourceCollector))
|
||||
var errCh = make(chan error, len(*as.ResourceCollector))
|
||||
var clustersNum = len(as.AiService.AiCollectorAdapterMap[as.option.AdapterId])
|
||||
var ch = make(chan *collector.ResourceStats, clustersNum)
|
||||
var errCh = make(chan interface{}, clustersNum)
|
||||
|
||||
var resourceSpecs []*collector.ResourceStats
|
||||
var errs []error
|
||||
var errs []interface{}
|
||||
|
||||
for _, resourceCollector := range *as.ResourceCollector {
|
||||
for s, resourceCollector := range as.AiService.AiCollectorAdapterMap[as.option.AdapterId] {
|
||||
wg.Add(1)
|
||||
rc := resourceCollector
|
||||
id := s
|
||||
go func() {
|
||||
spec, err := rc.GetResourceStats(as.ctx)
|
||||
if err != nil {
|
||||
errCh <- err
|
||||
e := struct {
|
||||
err error
|
||||
clusterId string
|
||||
}{
|
||||
err: err,
|
||||
clusterId: id,
|
||||
}
|
||||
errCh <- e
|
||||
wg.Done()
|
||||
return
|
||||
}
|
||||
|
@ -196,13 +242,22 @@ func (as *AiScheduler) findClustersWithResources() ([]*collector.ResourceStats,
|
|||
errs = append(errs, e)
|
||||
}
|
||||
|
||||
if len(errs) != 0 {
|
||||
if len(errs) == clustersNum {
|
||||
return nil, errors.New("get resources failed")
|
||||
}
|
||||
|
||||
if len(resourceSpecs) == 0 {
|
||||
return nil, errors.New("no resource found")
|
||||
if len(errs) != 0 {
|
||||
var msg string
|
||||
for _, err := range errs {
|
||||
e := (err).(struct {
|
||||
err error
|
||||
clusterId string
|
||||
})
|
||||
msg += fmt.Sprintf("clusterId: %v , error: %v \n", e.clusterId, e.err.Error())
|
||||
}
|
||||
return nil, errors.New(msg)
|
||||
}
|
||||
|
||||
return resourceSpecs, nil
|
||||
}
|
||||
|
||||
|
|
|
@ -1,7 +1,8 @@
|
|||
package option
|
||||
|
||||
type AiOption struct {
|
||||
AiClusterId string // shuguangAi /octopus ClusterId
|
||||
AdapterId string
|
||||
ClusterIds []string
|
||||
TaskName string
|
||||
ResourceType string // cpu/gpu/compute card
|
||||
CpuCoreNum int64
|
||||
|
|
|
@ -1,11 +1,14 @@
|
|||
package service
|
||||
|
||||
import (
|
||||
"github.com/zeromicro/go-zero/zrpc"
|
||||
"gitlink.org.cn/JointCloud/pcm-ac/hpcacclient"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/config"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/database"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/service/collector"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/service/executor"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/storeLink"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types"
|
||||
"gitlink.org.cn/JointCloud/pcm-octopus/octopusclient"
|
||||
"gitlink.org.cn/jcce-pcm/pcm-participant-modelarts/client/imagesservice"
|
||||
"gitlink.org.cn/jcce-pcm/pcm-participant-modelarts/client/modelartsservice"
|
||||
|
@ -18,30 +21,60 @@ const (
|
|||
SHUGUANGAI = "shuguangAi"
|
||||
)
|
||||
|
||||
func InitAiClusterMap(octopusRpc octopusclient.Octopus, modelArtsRpc modelartsservice.ModelArtsService, modelArtsImgRpc imagesservice.ImagesService, aCRpc hpcacclient.HpcAC, storages *database.AiStorage) (*map[string]executor.AiExecutor, *map[string]collector.AiCollector) {
|
||||
clusters, _ := storages.GetClustersByAdapterId("1777144940459986944")
|
||||
type AiService struct {
|
||||
AiExecutorAdapterMap map[string]map[string]executor.AiExecutor
|
||||
AiCollectorAdapterMap map[string]map[string]collector.AiCollector
|
||||
}
|
||||
|
||||
func NewAiService(conf *config.Config, storages *database.AiStorage) (*AiService, error) {
|
||||
var aiType = "1"
|
||||
adapterIds, err := storages.GetAdapterIdsByType(aiType)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
aiService := &AiService{
|
||||
AiExecutorAdapterMap: make(map[string]map[string]executor.AiExecutor),
|
||||
AiCollectorAdapterMap: make(map[string]map[string]collector.AiCollector),
|
||||
}
|
||||
for _, id := range adapterIds {
|
||||
clusters, err := storages.GetClustersByAdapterId(id)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
exeClusterMap, colClusterMap := InitAiClusterMap(conf, clusters.List)
|
||||
aiService.AiExecutorAdapterMap[id] = exeClusterMap
|
||||
aiService.AiCollectorAdapterMap[id] = colClusterMap
|
||||
}
|
||||
|
||||
return aiService, nil
|
||||
}
|
||||
|
||||
func InitAiClusterMap(conf *config.Config, clusters []types.ClusterInfo) (map[string]executor.AiExecutor, map[string]collector.AiCollector) {
|
||||
executorMap := make(map[string]executor.AiExecutor)
|
||||
collectorMap := make(map[string]collector.AiCollector)
|
||||
for _, c := range clusters.List {
|
||||
for _, c := range clusters {
|
||||
switch c.Name {
|
||||
case OCTOPUS:
|
||||
id, _ := strconv.ParseInt(c.Id, 10, 64)
|
||||
octopusRpc := octopusclient.NewOctopus(zrpc.MustNewClient(conf.OctopusRpcConf))
|
||||
octopus := storeLink.NewOctopusLink(octopusRpc, c.Nickname, id)
|
||||
collectorMap[c.Nickname] = octopus
|
||||
executorMap[c.Nickname] = octopus
|
||||
collectorMap[c.Id] = octopus
|
||||
executorMap[c.Id] = octopus
|
||||
case MODELARTS:
|
||||
id, _ := strconv.ParseInt(c.Id, 10, 64)
|
||||
modelArtsRpc := modelartsservice.NewModelArtsService(zrpc.MustNewClient(conf.ModelArtsRpcConf))
|
||||
modelArtsImgRpc := imagesservice.NewImagesService(zrpc.MustNewClient(conf.ModelArtsImgRpcConf))
|
||||
modelarts := storeLink.NewModelArtsLink(modelArtsRpc, modelArtsImgRpc, c.Nickname, id)
|
||||
collectorMap[c.Nickname] = modelarts
|
||||
executorMap[c.Nickname] = modelarts
|
||||
collectorMap[c.Id] = modelarts
|
||||
executorMap[c.Id] = modelarts
|
||||
case SHUGUANGAI:
|
||||
id, _ := strconv.ParseInt(c.Id, 10, 64)
|
||||
aCRpc := hpcacclient.NewHpcAC(zrpc.MustNewClient(conf.ACRpcConf))
|
||||
sgai := storeLink.NewShuguangAi(aCRpc, c.Nickname, id)
|
||||
collectorMap[c.Nickname] = sgai
|
||||
executorMap[c.Nickname] = sgai
|
||||
collectorMap[c.Id] = sgai
|
||||
executorMap[c.Id] = sgai
|
||||
}
|
||||
}
|
||||
|
||||
return &executorMap, &collectorMap
|
||||
return executorMap, collectorMap
|
||||
}
|
||||
|
|
|
@ -9,18 +9,18 @@ type AiCollector interface {
|
|||
}
|
||||
|
||||
type ResourceStats struct {
|
||||
ParticipantId int64
|
||||
Name string
|
||||
CpuCoreAvail int64
|
||||
CpuCoreTotal int64
|
||||
MemAvail float64
|
||||
MemTotal float64
|
||||
DiskAvail float64
|
||||
DiskTotal float64
|
||||
GpuAvail int64
|
||||
CardsAvail []*Card
|
||||
CpuCoreHours float64
|
||||
Balance float64
|
||||
ClusterId string
|
||||
Name string
|
||||
CpuCoreAvail int64
|
||||
CpuCoreTotal int64
|
||||
MemAvail float64
|
||||
MemTotal float64
|
||||
DiskAvail float64
|
||||
DiskTotal float64
|
||||
GpuAvail int64
|
||||
CardsAvail []*Card
|
||||
CpuCoreHours float64
|
||||
Balance float64
|
||||
}
|
||||
|
||||
type Card struct {
|
||||
|
|
|
@ -33,15 +33,14 @@ func (ps *DynamicResourcesStrategy) Schedule() ([]*AssignedCluster, error) {
|
|||
for _, res := range ps.resources {
|
||||
if opt.ResourceType == "cpu" {
|
||||
if res.CpuCoreHours <= 0 {
|
||||
cluster := &AssignedCluster{ParticipantId: res.ParticipantId, Name: res.Name, Replicas: ps.replicas}
|
||||
cluster := &AssignedCluster{ClusterId: res.ClusterId, Replicas: ps.replicas}
|
||||
results = append(results, cluster)
|
||||
return results, nil
|
||||
}
|
||||
|
||||
if res.CpuCoreHours > maxCpuCoreHoursAvailable {
|
||||
maxCpuCoreHoursAvailable = res.CpuCoreHours
|
||||
assignedCluster.Name = res.Name
|
||||
assignedCluster.ParticipantId = res.ParticipantId
|
||||
assignedCluster.ClusterId = res.ClusterId
|
||||
assignedCluster.Replicas = ps.replicas
|
||||
}
|
||||
}
|
||||
|
@ -56,8 +55,7 @@ func (ps *DynamicResourcesStrategy) Schedule() ([]*AssignedCluster, error) {
|
|||
}
|
||||
if maxCurrentCardHours > maxCardHoursAvailable {
|
||||
maxCardHoursAvailable = maxCurrentCardHours
|
||||
assignedCluster.Name = res.Name
|
||||
assignedCluster.ParticipantId = res.ParticipantId
|
||||
assignedCluster.ClusterId = res.ClusterId
|
||||
assignedCluster.Replicas = ps.replicas
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,23 +0,0 @@
|
|||
package param
|
||||
|
||||
import "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/entity"
|
||||
|
||||
type ReplicationParams struct {
|
||||
Replicas int32
|
||||
*Params
|
||||
}
|
||||
|
||||
func (r *ReplicationParams) GetReplicas() int32 {
|
||||
return r.Replicas
|
||||
}
|
||||
|
||||
func (r *ReplicationParams) GetParticipants() []*entity.Participant {
|
||||
var participants []*entity.Participant
|
||||
for _, resource := range r.Resources {
|
||||
participants = append(participants, &entity.Participant{
|
||||
Participant_id: resource.ParticipantId,
|
||||
Name: resource.Name,
|
||||
})
|
||||
}
|
||||
return participants
|
||||
}
|
|
@ -2,6 +2,7 @@ package param
|
|||
|
||||
import (
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/algorithm/providerPricing"
|
||||
"strconv"
|
||||
)
|
||||
|
||||
type ResourcePricingParams struct {
|
||||
|
@ -21,8 +22,9 @@ func (r *ResourcePricingParams) GetTask() *providerPricing.Task {
|
|||
func (r *ResourcePricingParams) GetProviders() []*providerPricing.Provider {
|
||||
var providerList []*providerPricing.Provider
|
||||
for _, resource := range r.Resources {
|
||||
id, _ := strconv.ParseInt(resource.ClusterId, 10, 64)
|
||||
provider := providerPricing.NewProvider(
|
||||
resource.ParticipantId,
|
||||
id,
|
||||
float64(resource.CpuCoreAvail),
|
||||
resource.MemAvail,
|
||||
resource.DiskAvail, 0.0, 0.0, 0.0)
|
||||
|
|
|
@ -2,33 +2,31 @@ package strategy
|
|||
|
||||
import (
|
||||
"errors"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/entity"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/strategy/param"
|
||||
)
|
||||
|
||||
type ReplicationStrategy struct {
|
||||
replicas int32
|
||||
participants []*entity.Participant
|
||||
replicas int32
|
||||
clusterIds []string
|
||||
}
|
||||
|
||||
func NewReplicationStrategy(params *param.ReplicationParams) *ReplicationStrategy {
|
||||
return &ReplicationStrategy{replicas: params.GetReplicas(),
|
||||
participants: params.GetParticipants(),
|
||||
func NewReplicationStrategy(clusterIds []string, replicas int32) *ReplicationStrategy {
|
||||
return &ReplicationStrategy{clusterIds: clusterIds,
|
||||
replicas: replicas,
|
||||
}
|
||||
}
|
||||
|
||||
func (ps *ReplicationStrategy) Schedule() ([]*AssignedCluster, error) {
|
||||
if ps.replicas < 1 {
|
||||
func (r *ReplicationStrategy) Schedule() ([]*AssignedCluster, error) {
|
||||
if r.replicas < 1 {
|
||||
return nil, errors.New("replicas must be greater than 0")
|
||||
}
|
||||
|
||||
if ps.participants == nil {
|
||||
return nil, errors.New("participantId must be set")
|
||||
if len(r.clusterIds) == 0 {
|
||||
return nil, errors.New("clusterIds must be set")
|
||||
}
|
||||
|
||||
var results []*AssignedCluster
|
||||
for _, p := range ps.participants {
|
||||
cluster := &AssignedCluster{ParticipantId: p.Participant_id, Name: p.Name, Replicas: ps.replicas}
|
||||
for _, c := range r.clusterIds {
|
||||
cluster := &AssignedCluster{ClusterId: c, Replicas: r.replicas}
|
||||
results = append(results, cluster)
|
||||
}
|
||||
return results, nil
|
||||
|
|
|
@ -18,6 +18,7 @@ import (
|
|||
"errors"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/algorithm/providerPricing"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/strategy/param"
|
||||
"strconv"
|
||||
)
|
||||
|
||||
type PricingStrategy struct {
|
||||
|
@ -154,7 +155,7 @@ func (ps *PricingStrategy) Schedule() ([]*AssignedCluster, error) {
|
|||
if e == 0 {
|
||||
continue
|
||||
}
|
||||
cluster := &AssignedCluster{ParticipantId: ps.ProviderList[i].Pid, Replicas: int32(e)}
|
||||
cluster := &AssignedCluster{ClusterId: strconv.FormatInt(ps.ProviderList[i].Pid, 10), Replicas: int32(e)}
|
||||
results = append(results, cluster)
|
||||
}
|
||||
|
||||
|
|
|
@ -29,7 +29,7 @@ func (s *StaticWeightStrategy) Schedule() ([]*AssignedCluster, error) {
|
|||
weights := make([]*weightDistributing.Weight, 0)
|
||||
for k, v := range s.staticWeightMap {
|
||||
weight := &weightDistributing.Weight{
|
||||
Name: k,
|
||||
Id: k,
|
||||
Weight: v,
|
||||
}
|
||||
weights = append(weights, weight)
|
||||
|
@ -39,7 +39,7 @@ func (s *StaticWeightStrategy) Schedule() ([]*AssignedCluster, error) {
|
|||
|
||||
var results []*AssignedCluster
|
||||
for _, weight := range weights {
|
||||
cluster := &AssignedCluster{ParticipantId: weight.Id, Name: weight.Name, Replicas: weight.Replica}
|
||||
cluster := &AssignedCluster{ClusterId: weight.Id, Replicas: weight.Replica}
|
||||
results = append(results, cluster)
|
||||
}
|
||||
|
||||
|
|
|
@ -18,9 +18,8 @@ type Strategy interface {
|
|||
}
|
||||
|
||||
type AssignedCluster struct {
|
||||
ParticipantId int64
|
||||
Name string
|
||||
Replicas int32
|
||||
ClusterId string
|
||||
Replicas int32
|
||||
}
|
||||
|
||||
func GetStrategyNames() []string {
|
||||
|
|
|
@ -5,7 +5,6 @@ import (
|
|||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/entity"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/service/collector"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/strategy"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/strategy/param"
|
||||
"testing"
|
||||
)
|
||||
|
||||
|
@ -17,15 +16,15 @@ func TestReplication(t *testing.T) {
|
|||
}
|
||||
rsc := []*collector.ResourceStats{
|
||||
{
|
||||
ParticipantId: 1,
|
||||
Name: "test1",
|
||||
ClusterId: "1",
|
||||
Name: "test1",
|
||||
},
|
||||
{
|
||||
ParticipantId: 1,
|
||||
Name: "test2"},
|
||||
ClusterId: "2",
|
||||
Name: "test2"},
|
||||
{
|
||||
ParticipantId: 1,
|
||||
Name: "test3"},
|
||||
ClusterId: "3",
|
||||
Name: "test3"},
|
||||
}
|
||||
tests := []struct {
|
||||
name string
|
||||
|
@ -47,8 +46,11 @@ func TestReplication(t *testing.T) {
|
|||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
params := ¶m.Params{Resources: rsc}
|
||||
repl := strategy.NewReplicationStrategy(¶m.ReplicationParams{Params: params, Replicas: tt.replica})
|
||||
var clusterIds []string
|
||||
for _, stats := range rsc {
|
||||
clusterIds = append(clusterIds, stats.ClusterId)
|
||||
}
|
||||
repl := strategy.NewReplicationStrategy(clusterIds, 0)
|
||||
schedule, err := repl.Schedule()
|
||||
if err != nil {
|
||||
return
|
||||
|
|
|
@ -283,11 +283,11 @@ func (o *OctopusLink) GetResourceStats(ctx context.Context) (*collector.Resource
|
|||
}
|
||||
|
||||
resourceStats := &collector.ResourceStats{
|
||||
ParticipantId: o.participantId,
|
||||
Name: o.platform,
|
||||
Balance: balance,
|
||||
CardsAvail: cards,
|
||||
CpuCoreHours: cpuHours,
|
||||
ClusterId: strconv.FormatInt(o.participantId, 10),
|
||||
Name: o.platform,
|
||||
Balance: balance,
|
||||
CardsAvail: cards,
|
||||
CpuCoreHours: cpuHours,
|
||||
}
|
||||
|
||||
return resourceStats, nil
|
||||
|
|
|
@ -26,6 +26,8 @@ import (
|
|||
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/utils"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
)
|
||||
|
||||
const (
|
||||
|
@ -266,96 +268,144 @@ func (s *ShuguangAi) QuerySpecs(ctx context.Context) (interface{}, error) {
|
|||
}
|
||||
|
||||
func (s *ShuguangAi) GetResourceStats(ctx context.Context) (*collector.ResourceStats, error) {
|
||||
//balance
|
||||
userReq := &hpcAC.GetUserInfoReq{}
|
||||
userinfo, err := s.aCRpc.GetUserInfo(ctx, userReq)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
balance, _ := strconv.ParseFloat(userinfo.Data.AccountBalance, 64)
|
||||
var wg sync.WaitGroup
|
||||
wg.Add(4)
|
||||
var cBalance = make(chan float64)
|
||||
var cMemTotal = make(chan float64)
|
||||
var cTotalCpu = make(chan int64)
|
||||
|
||||
//resource limit
|
||||
limitReq := &hpcAC.QueueReq{}
|
||||
limitResp, err := s.aCRpc.QueryUserQuotasLimit(ctx, limitReq)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
resourceStats := &collector.ResourceStats{
|
||||
ClusterId: strconv.FormatInt(s.participantId, 10),
|
||||
Name: s.platform,
|
||||
}
|
||||
totalCpu := limitResp.Data.AccountMaxCpu
|
||||
totalDcu := limitResp.Data.AccountMaxDcu
|
||||
|
||||
//disk
|
||||
//diskReq := &hpcAC.ParaStorQuotaReq{}
|
||||
//diskResp, err := s.aCRpc.ParaStorQuota(ctx, diskReq)
|
||||
//if err != nil {
|
||||
// return nil, err
|
||||
//}
|
||||
//
|
||||
//totalDisk := common.RoundFloat(diskResp.Data[0].Threshold*KB*KB*KB, 3)
|
||||
//availDisk := common.RoundFloat((diskResp.Data[0].Threshold-diskResp.Data[0].Usage)*KB*KB*KB, 3)
|
||||
|
||||
//memory
|
||||
nodeResp, err := s.aCRpc.GetNodeResources(ctx, nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
memSize := common.RoundFloat(float64(nodeResp.Data.MemorySize)*KB*KB, 3) // MB to BYTES
|
||||
|
||||
//resources being occupied
|
||||
memberJobResp, err := s.aCRpc.GetMemberJobs(ctx, nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
var CpuCoreAvail int64
|
||||
var MemAvail float64
|
||||
if len(memberJobResp.Data) != 0 {
|
||||
CpuCoreAvail = totalCpu
|
||||
MemAvail = memSize
|
||||
} else {
|
||||
var cpuCoreUsed int64
|
||||
var memUsed float64
|
||||
for _, datum := range memberJobResp.Data {
|
||||
cpuCoreUsed += datum.CpuCore
|
||||
}
|
||||
memUsed = float64(cpuCoreUsed * 2 * KB * KB * KB) // 2 GB per cpu core
|
||||
if cpuCoreUsed > totalCpu {
|
||||
CpuCoreAvail = 0
|
||||
} else {
|
||||
CpuCoreAvail = totalCpu - cpuCoreUsed
|
||||
}
|
||||
if memUsed > memSize {
|
||||
MemAvail = 0
|
||||
} else {
|
||||
MemAvail = memSize - memUsed
|
||||
}
|
||||
}
|
||||
|
||||
//usable hours
|
||||
var cards []*collector.Card
|
||||
cardHours := common.RoundFloat(balance/DCUPRICEPERHOUR, 3)
|
||||
cpuHours := common.RoundFloat(balance/CPUCOREPRICEPERHOUR, 3)
|
||||
|
||||
dcu := &collector.Card{
|
||||
Platform: SHUGUANGAI,
|
||||
Type: CARD,
|
||||
Name: DCU,
|
||||
TOpsAtFp16: DCU_TOPS,
|
||||
CardHours: cardHours,
|
||||
CardNum: int32(totalDcu),
|
||||
}
|
||||
|
||||
//balance
|
||||
go func() {
|
||||
userReq := &hpcAC.GetUserInfoReq{}
|
||||
userinfo, err := s.aCRpc.GetUserInfo(ctx, userReq)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
balance, _ := strconv.ParseFloat(userinfo.Data.AccountBalance, 64)
|
||||
resourceStats.Balance = balance
|
||||
|
||||
cBalance <- balance
|
||||
}()
|
||||
|
||||
//resource limit
|
||||
go func() {
|
||||
limitReq := &hpcAC.QueueReq{}
|
||||
limitResp, err := s.aCRpc.QueryUserQuotasLimit(ctx, limitReq)
|
||||
if err != nil {
|
||||
wg.Done()
|
||||
return
|
||||
}
|
||||
totalCpu := limitResp.Data.AccountMaxCpu
|
||||
totalDcu := limitResp.Data.AccountMaxDcu
|
||||
|
||||
dcu.CardNum = int32(totalDcu)
|
||||
resourceStats.CpuCoreTotal = totalCpu
|
||||
|
||||
cTotalCpu <- totalCpu
|
||||
wg.Done()
|
||||
}()
|
||||
|
||||
//disk
|
||||
go func() {
|
||||
diskReq := &hpcAC.ParaStorQuotaReq{}
|
||||
diskResp, err := s.aCRpc.ParaStorQuota(ctx, diskReq)
|
||||
if err != nil {
|
||||
wg.Done()
|
||||
return
|
||||
}
|
||||
|
||||
totalDisk := common.RoundFloat(diskResp.Data[0].Threshold*KB*KB*KB, 3)
|
||||
availDisk := common.RoundFloat((diskResp.Data[0].Threshold-diskResp.Data[0].Usage)*KB*KB*KB, 3)
|
||||
|
||||
resourceStats.DiskTotal = totalDisk
|
||||
resourceStats.DiskAvail = availDisk
|
||||
wg.Done()
|
||||
}()
|
||||
|
||||
//memory
|
||||
go func() {
|
||||
nodeResp, err := s.aCRpc.GetNodeResources(ctx, nil)
|
||||
if err != nil {
|
||||
wg.Done()
|
||||
return
|
||||
}
|
||||
memSize := common.RoundFloat(float64(nodeResp.Data.MemorySize)*KB*KB, 3) // MB to BYTES
|
||||
|
||||
resourceStats.MemTotal = memSize
|
||||
cMemTotal <- memSize
|
||||
wg.Done()
|
||||
}()
|
||||
|
||||
//resources being occupied
|
||||
go func() {
|
||||
memSize := <-cMemTotal
|
||||
totalCpu := <-cTotalCpu
|
||||
memberJobResp, err := s.aCRpc.GetMemberJobs(ctx, nil)
|
||||
if err != nil {
|
||||
wg.Done()
|
||||
return
|
||||
}
|
||||
var cpuCoreAvail int64
|
||||
var memAvail float64
|
||||
if len(memberJobResp.Data) != 0 {
|
||||
cpuCoreAvail = totalCpu
|
||||
memAvail = memSize
|
||||
} else {
|
||||
var cpuCoreUsed int64
|
||||
var memUsed float64
|
||||
for _, datum := range memberJobResp.Data {
|
||||
cpuCoreUsed += datum.CpuCore
|
||||
}
|
||||
memUsed = float64(cpuCoreUsed * 2 * KB * KB * KB) // 2 GB per cpu core
|
||||
if cpuCoreUsed > totalCpu {
|
||||
cpuCoreAvail = 0
|
||||
} else {
|
||||
cpuCoreAvail = totalCpu - cpuCoreUsed
|
||||
}
|
||||
if memUsed > memSize {
|
||||
memAvail = 0
|
||||
} else {
|
||||
memAvail = memSize - memUsed
|
||||
}
|
||||
}
|
||||
resourceStats.CpuCoreAvail = cpuCoreAvail
|
||||
resourceStats.MemAvail = memAvail
|
||||
wg.Done()
|
||||
}()
|
||||
|
||||
//usable hours
|
||||
var balance float64
|
||||
|
||||
select {
|
||||
case v := <-cBalance:
|
||||
balance = v
|
||||
case <-time.After(2 * time.Second):
|
||||
return nil, errors.New("get balance rpc call failed")
|
||||
}
|
||||
|
||||
var cards []*collector.Card
|
||||
cardHours := common.RoundFloat(balance/DCUPRICEPERHOUR, 3)
|
||||
cpuHours := common.RoundFloat(balance/CPUCOREPRICEPERHOUR, 3)
|
||||
|
||||
dcu.CardHours = cardHours
|
||||
resourceStats.CpuCoreHours = cpuHours
|
||||
|
||||
wg.Wait()
|
||||
|
||||
cards = append(cards, dcu)
|
||||
resourceStats := &collector.ResourceStats{
|
||||
ParticipantId: s.participantId,
|
||||
Name: s.platform,
|
||||
Balance: balance,
|
||||
CpuCoreTotal: totalCpu,
|
||||
CpuCoreAvail: CpuCoreAvail,
|
||||
//DiskTotal: totalDisk,
|
||||
//DiskAvail: availDisk,
|
||||
MemTotal: memSize,
|
||||
MemAvail: MemAvail,
|
||||
CpuCoreHours: cpuHours,
|
||||
CardsAvail: cards,
|
||||
}
|
||||
resourceStats.CardsAvail = cards
|
||||
|
||||
return resourceStats, nil
|
||||
}
|
||||
|
|
|
@ -16,6 +16,7 @@ package storeLink
|
|||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"github.com/pkg/errors"
|
||||
"gitlink.org.cn/JointCloud/pcm-ac/hpcAC"
|
||||
"gitlink.org.cn/JointCloud/pcm-ac/hpcacclient"
|
||||
|
@ -127,21 +128,29 @@ func GetResourceTypes() []string {
|
|||
return resourceTypes
|
||||
}
|
||||
|
||||
func GetDatasetsNames(ctx context.Context, collectorMap *map[string]collector.AiCollector) ([]string, error) {
|
||||
func GetDatasetsNames(ctx context.Context, collectorMap map[string]collector.AiCollector) ([]string, error) {
|
||||
var wg sync.WaitGroup
|
||||
var errCh = make(chan error, len(*collectorMap))
|
||||
var errs []error
|
||||
var errCh = make(chan interface{}, len(collectorMap))
|
||||
var errs []interface{}
|
||||
var names []string
|
||||
var mu sync.Mutex
|
||||
colMap := *collectorMap
|
||||
for _, col := range colMap {
|
||||
colMap := collectorMap
|
||||
for s, col := range colMap {
|
||||
wg.Add(1)
|
||||
c := col
|
||||
id := s
|
||||
go func() {
|
||||
var ns []string
|
||||
specs, err := c.GetDatasetsSpecs(ctx)
|
||||
if err != nil {
|
||||
errCh <- err
|
||||
e := struct {
|
||||
err error
|
||||
clusterId string
|
||||
}{
|
||||
err: err,
|
||||
clusterId: id,
|
||||
}
|
||||
errCh <- e
|
||||
wg.Done()
|
||||
return
|
||||
}
|
||||
|
@ -167,34 +176,54 @@ func GetDatasetsNames(ctx context.Context, collectorMap *map[string]collector.Ai
|
|||
wg.Wait()
|
||||
close(errCh)
|
||||
|
||||
if len(errs) == len(colMap) {
|
||||
return nil, errors.New("get DatasetsNames failed")
|
||||
}
|
||||
|
||||
for e := range errCh {
|
||||
errs = append(errs, e)
|
||||
}
|
||||
|
||||
if len(errs) != 0 {
|
||||
return nil, errors.New("get DatasetsNames failed")
|
||||
var msg string
|
||||
for _, err := range errs {
|
||||
e := (err).(struct {
|
||||
err error
|
||||
clusterId string
|
||||
})
|
||||
msg += fmt.Sprintf("clusterId: %v , error: %v \n", e.clusterId, e.err.Error())
|
||||
}
|
||||
return nil, errors.New(msg)
|
||||
}
|
||||
|
||||
names = common.RemoveDuplicates(names)
|
||||
return names, nil
|
||||
}
|
||||
|
||||
func GetAlgorithms(ctx context.Context, collectorMap *map[string]collector.AiCollector, resourceType string, taskType string, dataset string) ([]string, error) {
|
||||
func GetAlgorithms(ctx context.Context, collectorMap map[string]collector.AiCollector, resourceType string, taskType string, dataset string) ([]string, error) {
|
||||
var names []string
|
||||
var wg sync.WaitGroup
|
||||
var errCh = make(chan error, len(*collectorMap))
|
||||
var errs []error
|
||||
var errCh = make(chan interface{}, len(collectorMap))
|
||||
var errs []interface{}
|
||||
var mu sync.Mutex
|
||||
|
||||
colMap := *collectorMap
|
||||
for _, col := range colMap {
|
||||
colMap := collectorMap
|
||||
for s, col := range colMap {
|
||||
wg.Add(1)
|
||||
c := col
|
||||
id := s
|
||||
go func() {
|
||||
var ns []string
|
||||
algorithms, err := c.GetAlgorithms(ctx)
|
||||
if err != nil {
|
||||
errCh <- err
|
||||
e := struct {
|
||||
err error
|
||||
clusterId string
|
||||
}{
|
||||
err: err,
|
||||
clusterId: id,
|
||||
}
|
||||
errCh <- e
|
||||
wg.Done()
|
||||
return
|
||||
}
|
||||
|
@ -240,10 +269,22 @@ func GetAlgorithms(ctx context.Context, collectorMap *map[string]collector.AiCol
|
|||
errs = append(errs, e)
|
||||
}
|
||||
|
||||
if len(errs) != 0 {
|
||||
if len(errs) == len(colMap) {
|
||||
return nil, errors.New("get Algorithms failed")
|
||||
}
|
||||
|
||||
if len(errs) != 0 {
|
||||
var msg string
|
||||
for _, err := range errs {
|
||||
e := (err).(struct {
|
||||
err error
|
||||
clusterId string
|
||||
})
|
||||
msg += fmt.Sprintf("clusterId: %v , error: %v \n", e.clusterId, e.err.Error())
|
||||
}
|
||||
return nil, errors.New(msg)
|
||||
}
|
||||
|
||||
names = common.RemoveDuplicates(names)
|
||||
return names, nil
|
||||
}
|
||||
|
|
|
@ -116,24 +116,28 @@ func NewServiceContext(c config.Config) *ServiceContext {
|
|||
})
|
||||
|
||||
// scheduler
|
||||
octopusRpc := octopusclient.NewOctopus(zrpc.MustNewClient(c.OctopusRpcConf))
|
||||
aCRpc := hpcacclient.NewHpcAC(zrpc.MustNewClient(c.ACRpcConf))
|
||||
modelArtsRpc := modelartsservice.NewModelArtsService(zrpc.MustNewClient(c.ModelArtsRpcConf))
|
||||
modelArtsImgRpc := imagesservice.NewImagesService(zrpc.MustNewClient(c.ModelArtsImgRpcConf))
|
||||
//octopusRpc := octopusclient.NewOctopus(zrpc.MustNewClient(c.OctopusRpcConf))
|
||||
//aCRpc := hpcacclient.NewHpcAC(zrpc.MustNewClient(c.ACRpcConf))
|
||||
//modelArtsRpc := modelartsservice.NewModelArtsService(zrpc.MustNewClient(c.ModelArtsRpcConf))
|
||||
//modelArtsImgRpc := imagesservice.NewImagesService(zrpc.MustNewClient(c.ModelArtsImgRpcConf))
|
||||
storage := &database.AiStorage{DbEngin: dbEngin}
|
||||
aiExecutor, resourceCollector := service.InitAiClusterMap(octopusRpc, modelArtsRpc, modelArtsImgRpc, aCRpc, storage)
|
||||
scheduler := scheduler.NewSchdlr(resourceCollector, storage, aiExecutor)
|
||||
aiService, err := service.NewAiService(&c, storage)
|
||||
if err != nil {
|
||||
logx.Error(err.Error())
|
||||
return nil
|
||||
}
|
||||
scheduler := scheduler.NewSchdlr(aiService, storage)
|
||||
|
||||
return &ServiceContext{
|
||||
Cron: cron.New(cron.WithSeconds()),
|
||||
DbEngin: dbEngin,
|
||||
Config: c,
|
||||
RedisClient: redisClient,
|
||||
ModelArtsRpc: modelArtsRpc,
|
||||
ModelArtsImgRpc: modelArtsImgRpc,
|
||||
ModelArtsRpc: modelartsservice.NewModelArtsService(zrpc.MustNewClient(c.ModelArtsRpcConf)),
|
||||
ModelArtsImgRpc: imagesservice.NewImagesService(zrpc.MustNewClient(c.ModelArtsImgRpcConf)),
|
||||
CephRpc: cephclient.NewCeph(zrpc.MustNewClient(c.CephRpcConf)),
|
||||
ACRpc: aCRpc,
|
||||
OctopusRpc: octopusRpc,
|
||||
ACRpc: hpcacclient.NewHpcAC(zrpc.MustNewClient(c.ACRpcConf)),
|
||||
OctopusRpc: octopusclient.NewOctopus(zrpc.MustNewClient(c.OctopusRpcConf)),
|
||||
OpenstackRpc: openstackclient.NewOpenstack(zrpc.MustNewClient(c.OpenstackRpcConf)),
|
||||
K8sRpc: kubernetesclient.NewKubernetes(zrpc.MustNewClient(c.K8sNativeConf)),
|
||||
MonitorClient: make(map[int64]tracker.Prometheus),
|
||||
|
|
|
@ -131,40 +131,22 @@ type TaskYaml struct {
|
|||
}
|
||||
|
||||
type CommitVmTaskReq struct {
|
||||
Name string `json:"name"`
|
||||
NsID string `json:"nsID"`
|
||||
Replicas int64 `json:"replicas,optional"`
|
||||
MatchLabels map[string]string `json:"matchLabels,optional"`
|
||||
Servers []ServerCommit `json:"servers,optional"`
|
||||
Platform string `json:"platform,optional"`
|
||||
AdapterId string `json:"adapterId,optional"`
|
||||
ClusterType string `json:"clusterType,optional"`
|
||||
Name string `json:"name"`
|
||||
NsID string `json:"nsID"`
|
||||
Replicas int64 `json:"replicas,optional"`
|
||||
MatchLabels map[string]string `json:"matchLabels,optional"`
|
||||
AdapterId string `json:"adapterId,optional"`
|
||||
ClusterType string `json:"clusterType,optional"`
|
||||
CreateMulServer []CreateMulDomainServer `json:"createMulServer,optional"`
|
||||
}
|
||||
|
||||
type ServerCommit struct {
|
||||
AllCardRunTime string `json:"allCardRunTime"`
|
||||
FlavorRef string `json:"flavorRef,optional"`
|
||||
Name string `json:"name,optional"`
|
||||
ImageRef string `json:"imageRef,optional"`
|
||||
AccessIPv4 string `json:"accessIPv4,optional"`
|
||||
AccessIPv6 string `json:"accessIPv6,optional"`
|
||||
AdminPass string `json:"adminPass,optional"`
|
||||
Availability_zone string `json:"availability_zone,optional"`
|
||||
Key_name string `json:"key_name,optional"`
|
||||
Hostname string `json:"hostname,optional"`
|
||||
Host string `json:"host,optional"`
|
||||
Networks []Networks `json:"networks,optional"`
|
||||
}
|
||||
|
||||
type Networks struct {
|
||||
Uuid string `json:"uuid,optional"`
|
||||
Port string `json:"port,optional"`
|
||||
Fixed_ip string `json:"fixed_ip,optional"`
|
||||
Tag string `json:"tag,optional"`
|
||||
}
|
||||
|
||||
type Block_device_mapping_v2Commit struct {
|
||||
Uuid string `json:"uuid,optional"`
|
||||
type CreateMulDomainServer struct {
|
||||
Platform string `json:"platform,optional"`
|
||||
Name string `json:"name,optional"`
|
||||
Min_count int64 `json:"min_count,optional"`
|
||||
ImageRef string `json:"imageRef,optional"`
|
||||
FlavorRef string `json:"flavorRef,optional"`
|
||||
Uuid string `json:"uuid,optional"`
|
||||
}
|
||||
|
||||
type CommitVmTaskResp struct {
|
||||
|
@ -5309,13 +5291,15 @@ type ScheduleResp struct {
|
|||
type ScheduleResult struct {
|
||||
ClusterId string `json:"clusterId"`
|
||||
TaskId string `json:"taskId"`
|
||||
Strategy string `json:"strategy"`
|
||||
Replica int32 `json:"replica"`
|
||||
Msg string `json:"msg"`
|
||||
}
|
||||
|
||||
type AiOption struct {
|
||||
TaskName string `json:"taskName"`
|
||||
AiClusterId string `json:"aiClusterId,optional"`
|
||||
AdapterId string `json:"adapterId"`
|
||||
AiClusterIds []string `json:"aiClusterIds"`
|
||||
ResourceType string `json:"resourceType"`
|
||||
Tops float64 `json:"Tops,optional"`
|
||||
TaskType string `json:"taskType"`
|
||||
|
@ -5336,6 +5320,10 @@ type AiTaskTypesResp struct {
|
|||
TaskTypes []string `json:"taskTypes"`
|
||||
}
|
||||
|
||||
type AiDatasetsReq struct {
|
||||
AdapterId string `path:"adapterId"`
|
||||
}
|
||||
|
||||
type AiDatasetsResp struct {
|
||||
Datasets []string `json:"datasets"`
|
||||
}
|
||||
|
@ -5345,6 +5333,7 @@ type AiStrategyResp struct {
|
|||
}
|
||||
|
||||
type AiAlgorithmsReq struct {
|
||||
AdapterId string `path:"adapterId"`
|
||||
ResourceType string `path:"resourceType"`
|
||||
TaskType string `path:"taskType"`
|
||||
Dataset string `path:"dataset"`
|
||||
|
@ -5451,7 +5440,10 @@ type VmInfo struct {
|
|||
BlockUuid string `json:"block_uuid,omitempty"`
|
||||
SourceType string `json:"source_type,omitempty"`
|
||||
DeleteOnTermination bool `json:"delete_on_termination,omitempty"`
|
||||
State string `json:"state,omitempty"`
|
||||
Status string `json:"status,omitempty"`
|
||||
MinCount string `json:"min_count,omitempty"`
|
||||
Platform string `json:"platform,omitempty"`
|
||||
Uuid string `json:"uuid,omitempty"`
|
||||
}
|
||||
|
||||
type PushTaskInfoReq struct {
|
||||
|
@ -5468,7 +5460,37 @@ type PushTaskInfoResp struct {
|
|||
}
|
||||
|
||||
type PushResourceInfoReq struct {
|
||||
AdapterId int64 `json:"adapterId"`
|
||||
AdapterId int64 `json:"adapterId"`
|
||||
ResourceStats []ResourceStats `json:"resourceStats"`
|
||||
}
|
||||
|
||||
type PushResourceInfoResp struct {
|
||||
Code int64 `json:"code"`
|
||||
Msg string `json:"msg"`
|
||||
}
|
||||
|
||||
type ResourceStats struct {
|
||||
ClusterId int64 `json:"clusterId"`
|
||||
Name string `json:"name"`
|
||||
CpuCoreAvail int64 `json:"cpuCoreAvail"`
|
||||
CpuCoreTotal int64 `json:"cpuCoreTotal"`
|
||||
MemAvail float64 `json:"memAvail"`
|
||||
MemTotal float64 `json:"memTotal"`
|
||||
DiskAvail float64 `json:"diskAvail"`
|
||||
DiskTotal float64 `json:"diskTotal"`
|
||||
GpuAvail int64 `json:"gpuAvail"`
|
||||
CardsAvail []*Card `json:"cardsAvail"`
|
||||
CpuCoreHours float64 `json:"cpuCoreHours"`
|
||||
Balance float64 `json:"balance"`
|
||||
}
|
||||
|
||||
type Card struct {
|
||||
Platform string `json:"platform"`
|
||||
Type string `json:"type"`
|
||||
Name string `json:"name"`
|
||||
TOpsAtFp16 float64 `json:"TOpsAtFp16"`
|
||||
CardHours float64 `json:"cardHours"`
|
||||
CardNum int32 `json:"cardNum"`
|
||||
}
|
||||
|
||||
type CreateAlertRuleReq struct {
|
||||
|
|
2
go.mod
2
go.mod
|
@ -2,6 +2,8 @@ module gitlink.org.cn/JointCloud/pcm-coordinator
|
|||
|
||||
go 1.21
|
||||
|
||||
retract v0.1.20-0.20240319015239-6ae13da05255
|
||||
|
||||
require (
|
||||
github.com/JCCE-nudt/zero-contrib/zrpc/registry/nacos v0.0.0-20230419021610-13bbc83fbc3c
|
||||
github.com/Masterminds/squirrel v1.5.4
|
||||
|
|
|
@ -0,0 +1,24 @@
|
|||
package models
|
||||
|
||||
import "github.com/zeromicro/go-zero/core/stores/sqlx"
|
||||
|
||||
var _ TaskVmModel = (*customTaskVmModel)(nil)
|
||||
|
||||
type (
|
||||
// TaskVmModel is an interface to be customized, add more methods here,
|
||||
// and implement the added methods in customTaskVmModel.
|
||||
TaskVmModel interface {
|
||||
taskVmModel
|
||||
}
|
||||
|
||||
customTaskVmModel struct {
|
||||
*defaultTaskVmModel
|
||||
}
|
||||
)
|
||||
|
||||
// NewTaskVmModel returns a model for the database table.
|
||||
func NewTaskVmModel(conn sqlx.SqlConn) TaskVmModel {
|
||||
return &customTaskVmModel{
|
||||
defaultTaskVmModel: newTaskVmModel(conn),
|
||||
}
|
||||
}
|
|
@ -0,0 +1,107 @@
|
|||
// Code generated by goctl. DO NOT EDIT.
|
||||
|
||||
package models
|
||||
|
||||
import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"github.com/zeromicro/go-zero/core/stores/builder"
|
||||
"github.com/zeromicro/go-zero/core/stores/sqlc"
|
||||
"github.com/zeromicro/go-zero/core/stores/sqlx"
|
||||
"github.com/zeromicro/go-zero/core/stringx"
|
||||
)
|
||||
|
||||
var (
|
||||
taskVmFieldNames = builder.RawFieldNames(&TaskVm{})
|
||||
taskVmRows = strings.Join(taskVmFieldNames, ",")
|
||||
taskVmRowsExpectAutoSet = strings.Join(stringx.Remove(taskVmFieldNames, "`id`", "`create_at`", "`create_time`", "`created_at`", "`update_at`", "`update_time`", "`updated_at`"), ",")
|
||||
taskVmRowsWithPlaceHolder = strings.Join(stringx.Remove(taskVmFieldNames, "`id`", "`create_at`", "`create_time`", "`created_at`", "`update_at`", "`update_time`", "`updated_at`"), "=?,") + "=?"
|
||||
)
|
||||
|
||||
type (
|
||||
taskVmModel interface {
|
||||
Insert(ctx context.Context, data *TaskVm) (sql.Result, error)
|
||||
FindOne(ctx context.Context, id int64) (*TaskVm, error)
|
||||
Update(ctx context.Context, data *TaskVm) error
|
||||
Delete(ctx context.Context, id int64) error
|
||||
}
|
||||
|
||||
defaultTaskVmModel struct {
|
||||
conn sqlx.SqlConn
|
||||
table string
|
||||
}
|
||||
|
||||
TaskVm struct {
|
||||
Id int64 `db:"id"` // id
|
||||
ParticipantId int64 `db:"participant_id"` // p端id
|
||||
TaskId int64 `db:"task_id"` // 任务id
|
||||
Name string `db:"name"` // 虚拟机名称
|
||||
AdapterId int64 `db:"adapter_id"` // 执行任务的适配器id
|
||||
ClusterId int64 `db:"cluster_id"` // 执行任务的集群id
|
||||
FlavorRef string `db:"flavor_ref"` // 规格索引
|
||||
ImageRef string `db:"image_ref"` // 镜像索引
|
||||
Status string `db:"status"` // 状态
|
||||
Platform string `db:"platform"` // 平台
|
||||
Description string `db:"description"` // 描述
|
||||
AvailabilityZone string `db:"availability_zone"`
|
||||
MinCount int64 `db:"min_count"` // 数量
|
||||
Uuid string `db:"uuid"` // 网络id
|
||||
StartTime string `db:"start_time"` // 开始时间
|
||||
RunningTime string `db:"running_time"` // 运行时间
|
||||
Result string `db:"result"` // 运行结果
|
||||
DeletedAt string `db:"deleted_at"` // 删除时间
|
||||
}
|
||||
)
|
||||
|
||||
func newTaskVmModel(conn sqlx.SqlConn) *defaultTaskVmModel {
|
||||
return &defaultTaskVmModel{
|
||||
conn: conn,
|
||||
table: "`task_vm`",
|
||||
}
|
||||
}
|
||||
|
||||
func (m *defaultTaskVmModel) withSession(session sqlx.Session) *defaultTaskVmModel {
|
||||
return &defaultTaskVmModel{
|
||||
conn: sqlx.NewSqlConnFromSession(session),
|
||||
table: "`task_vm`",
|
||||
}
|
||||
}
|
||||
|
||||
func (m *defaultTaskVmModel) Delete(ctx context.Context, id int64) error {
|
||||
query := fmt.Sprintf("delete from %s where `id` = ?", m.table)
|
||||
_, err := m.conn.ExecCtx(ctx, query, id)
|
||||
return err
|
||||
}
|
||||
|
||||
func (m *defaultTaskVmModel) FindOne(ctx context.Context, id int64) (*TaskVm, error) {
|
||||
query := fmt.Sprintf("select %s from %s where `id` = ? limit 1", taskVmRows, m.table)
|
||||
var resp TaskVm
|
||||
err := m.conn.QueryRowCtx(ctx, &resp, query, id)
|
||||
switch err {
|
||||
case nil:
|
||||
return &resp, nil
|
||||
case sqlc.ErrNotFound:
|
||||
return nil, ErrNotFound
|
||||
default:
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
func (m *defaultTaskVmModel) Insert(ctx context.Context, data *TaskVm) (sql.Result, error) {
|
||||
query := fmt.Sprintf("insert into %s (%s) values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)", m.table, taskVmRowsExpectAutoSet)
|
||||
ret, err := m.conn.ExecCtx(ctx, query, data.ParticipantId, data.TaskId, data.Name, data.AdapterId, data.ClusterId, data.FlavorRef, data.ImageRef, data.Status, data.Platform, data.Description, data.AvailabilityZone, data.MinCount, data.Uuid, data.StartTime, data.RunningTime, data.Result, data.DeletedAt)
|
||||
return ret, err
|
||||
}
|
||||
|
||||
func (m *defaultTaskVmModel) Update(ctx context.Context, data *TaskVm) error {
|
||||
query := fmt.Sprintf("update %s set %s where `id` = ?", m.table, taskVmRowsWithPlaceHolder)
|
||||
_, err := m.conn.ExecCtx(ctx, query, data.ParticipantId, data.TaskId, data.Name, data.AdapterId, data.ClusterId, data.FlavorRef, data.ImageRef, data.Status, data.Platform, data.Description, data.AvailabilityZone, data.MinCount, data.Uuid, data.StartTime, data.RunningTime, data.Result, data.DeletedAt, data.Id)
|
||||
return err
|
||||
}
|
||||
|
||||
func (m *defaultTaskVmModel) tableName() string {
|
||||
return m.table
|
||||
}
|
Loading…
Reference in New Issue