Merge branch 'refs/heads/upstream'

Former-commit-id: aabbc1719d3a845983eefacd0614322f32bd2246
This commit is contained in:
jagger 2024-04-19 15:45:01 +08:00
commit e195bb4d89
50 changed files with 2020 additions and 1190 deletions

View File

@ -6,6 +6,7 @@ type Options struct {
}
type Client interface {
Task(TaskOptions) (Task, error)
Notice(NoticeOptions) (Notice, error)
}
func NewClient(options Options) (Client, error) {

View File

@ -19,6 +19,11 @@ func (c *client) Task(options TaskOptions) (Task, error) {
return task, nil
}
func (c *client) Notice(options NoticeOptions) (Notice, error) {
notice, _ := newNotice(c, &options)
return notice, nil
}
func newClient(options Options) (Client, error) {
//init dbEngine
dbEngin, _ := gorm.Open(mysql.Open(options.DataSource), &gorm.Config{

9
api/client/notice.go Normal file
View File

@ -0,0 +1,9 @@
package client
type NoticeOptions struct {
pushNoticeReq PushNoticeReq
}
type Notice interface {
PushNotice(pushNoticeReq PushNoticeReq) (*PushNoticeResp, error)
}

46
api/client/notice_impl.go Normal file
View File

@ -0,0 +1,46 @@
package client
import (
"io/ioutil"
"k8s.io/apimachinery/pkg/util/json"
"log"
"net/http"
"strings"
"sync"
)
type notice struct {
sync.RWMutex
client *client
options *NoticeOptions
log log.Logger
}
func newNotice(client *client, options *NoticeOptions) (*notice, error) {
notice := &notice{
RWMutex: sync.RWMutex{},
client: client,
options: options,
log: log.Logger{},
}
return notice, nil
}
func (n *notice) PushNotice(pushNoticeReq PushNoticeReq) (*PushNoticeResp, error) {
url := n.client.url + "/pcm/v1/core/pushNotice"
method := "GET"
jsonStr, _ := json.Marshal(pushNoticeReq)
payload := strings.NewReader(string(jsonStr))
client := &http.Client{}
req, _ := http.NewRequest(method, url, payload)
req.Header.Add("Content-Type", "application/json")
res, _ := client.Do(req)
defer res.Body.Close()
body, _ := ioutil.ReadAll(res.Body)
var resp PushNoticeResp
json.Unmarshal(body, &resp)
return &resp, nil
}

View File

@ -9,5 +9,5 @@ type TaskOptions struct {
type Task interface {
PullTaskInfo(pullTaskInfoReq PullTaskInfoReq) (*PullTaskInfoResp, error)
PushTaskInfo(pushTaskInfoReq PushTaskInfoReq) (*PushTaskInfoResp, error)
PushResourceInfo(pushResourceInfoReq PushResourceInfoReq) error
PushResourceInfo(pushResourceInfoReq PushResourceInfoReq) (*PushResourceInfoResp, error)
}

View File

@ -50,8 +50,8 @@ func (t *task) PushTaskInfo(pushTaskInfoReq PushTaskInfoReq) (*PushTaskInfoResp,
url := t.client.url + "/pcm/v1/core/pushTaskInfo"
method := "POST"
infoReq := PullTaskInfoReq{AdapterId: pushTaskInfoReq.AdapterId}
jsonStr, _ := json.Marshal(infoReq)
//infoReq := PullTaskInfoReq{AdapterId: pushTaskInfoReq.AdapterId}
jsonStr, _ := json.Marshal(pushTaskInfoReq)
payload := strings.NewReader(string(jsonStr))
client := &http.Client{}
@ -66,7 +66,22 @@ func (t *task) PushTaskInfo(pushTaskInfoReq PushTaskInfoReq) (*PushTaskInfoResp,
return &resp, nil
}
func (t *task) PushResourceInfo(pushResourceInfoReq PushResourceInfoReq) error {
//TODO implement me
panic("implement me")
func (t *task) PushResourceInfo(pushResourceInfoReq PushResourceInfoReq) (*PushResourceInfoResp, error) {
url := t.client.url + "/pcm/v1/core/pushResourceInfo"
method := "POST"
//infoReq := PushResourceInfoReq{AdapterId: pushResourceInfoReq.AdapterId}
jsonStr, _ := json.Marshal(pushResourceInfoReq)
payload := strings.NewReader(string(jsonStr))
client := &http.Client{}
req, _ := http.NewRequest(method, url, payload)
req.Header.Add("Content-Type", "application/json")
res, _ := client.Do(req)
defer res.Body.Close()
body, _ := ioutil.ReadAll(res.Body)
var resp PushResourceInfoResp
json.Unmarshal(body, &resp)
return &resp, nil
}

View File

@ -25,12 +25,46 @@ type PushTaskInfoReq struct {
}
type PushTaskInfoResp struct {
Code int64
Msg string
Code int64 `json:"code"`
Msg string `json:"msg"`
}
type PushResourceInfoReq struct {
AdapterId int64 `json:"adapterId"`
AdapterId int64 `json:"adapterId"`
ResourceStats []ResourceStats `json:"resourceStats"`
}
type PushResourceInfoResp struct {
Code int64 `json:"code"`
Msg string `json:"msg"`
}
type NoticeInfo struct {
AdapterId int64 `json:"adapterId"`
AdapterName string `json:"adapterName"`
ClusterId int64 `json:"clusterId"`
ClusterName string `json:"clusterName"`
NoticeType string `json:"noticeType"`
TaskName string `json:"taskName"`
Incident string `json:"incident"`
CreatedTime time.Time `json:"createdTime"`
}
type ListNoticeReq struct {
}
type ListNoticeResp struct {
Code int64 `json:"code"`
Msg string `json:"msg"`
Data []NoticeInfo `json:"data"`
}
type PushNoticeReq struct {
NoticeInfo NoticeInfo `json:"noticeInfo"`
}
type PushNoticeResp struct {
Code int64 `json:"code"`
Msg string `json:"msg"`
}
type HpcInfo struct {
@ -119,5 +153,30 @@ type VmInfo struct {
BlockUuid string `json:"block_uuid,omitempty"`
SourceType string `json:"source_type,omitempty"`
DeleteOnTermination bool `json:"delete_on_termination,omitempty"`
State string `json:"state,omitempty"`
Status string `json:"Status,omitempty"`
StartTime string `json:"startTime,omitempty"`
}
type ResourceStats struct {
ClusterId int64 `json:"clusterId"`
Name string `json:"name"`
CpuCoreAvail int64 `json:"cpuCoreAvail"`
CpuCoreTotal int64 `json:"cpuCoreTotal"`
MemAvail float64 `json:"memAvail"`
MemTotal float64 `json:"memTotal"`
DiskAvail float64 `json:"diskAvail"`
DiskTotal float64 `json:"diskTotal"`
GpuAvail int64 `json:"gpuAvail"`
CardsAvail []*Card `json:"cardsAvail"`
CpuCoreHours float64 `json:"cpuCoreHours"`
Balance float64 `json:"balance"`
}
type Card struct {
Platform string `json:"platform"`
Type string `json:"type"`
Name string `json:"name"`
TOpsAtFp16 float64 `json:"TOpsAtFp16"`
CardHours float64 `json:"cardHours"`
CardNum int32 `json:"cardNum"`
}

File diff suppressed because it is too large Load Diff

View File

@ -1,126 +0,0 @@
syntax = "v1"
info(
title: "type title here"
desc: "type desc here"
author: "type author here"
email: "type email here"
version: "type version here"
)
type PullTaskInfoReq {
AdapterId int64 `form:"adapterId"`
}
type PullTaskInfoResp struct {
HpcInfoList []*HpcInfo `json:"HpcInfoList,omitempty"`
CloudInfoList []*CloudInfo `json:"CloudInfoList,omitempty"`
AiInfoList []*AiInfo `json:"AiInfoList,omitempty"`
VmInfoList []*VmInfo `json:"VmInfoList,omitempty"`
}
type HpcInfo struct {
Id int64 `json:"id"` // id
TaskId int64 `json:"task_id"` // 任务id
JobId string `json:"job_id"` // 作业id(在第三方系统中的作业id)
AdapterId int64 `json:"adapter_id"` // 执行任务的适配器id
ClusterId int64 `json:"cluster_id"` // 执行任务的集群id
ClusterType string `json:"cluster_type"` // 执行任务的集群类型
Name string `json:"name"` // 名称
Status string `json:"status"` // 状态
CmdScript string `json:"cmd_script"`
StartTime string `json:"start_time"` // 开始时间
RunningTime int64 `json:"running_time"` // 运行时间
DerivedEs string `json:"derived_es"`
Cluster string `json:"cluster"`
BlockId int64 `json:"block_id"`
AllocNodes int64 `json:"alloc_nodes"`
AllocCpu int64 `json:"alloc_cpu"`
CardCount int64 `json:"card_count"` // 卡数
Version string `json:"version"`
Account string `json:"account"`
WorkDir string `json:"work_dir"` // 工作路径
AssocId int64 `json:"assoc_id"`
ExitCode int64 `json:"exit_code"`
WallTime string `json:"wall_time"` // 最大运行时间
Result string `json:"result"` // 运行结果
DeletedAt string `json:"deleted_at"` // 删除时间
YamlString string `json:"yaml_string"`
AppType string `json:"app_type"` // 应用类型
AppName string `json:"app_name"` // 应用名称
Queue string `json:"queue"` // 队列名称
SubmitType string `json:"submit_type"` // cmd命令行模式
NNode string `json:"n_node"` // 节点个数当指定该参数时GAP_NODE_STRING必须为""
StdOutFile string `json:"std_out_file"` // 工作路径/std.err.%j
StdErrFile string `json:"std_err_file"` // 工作路径/std.err.%j
StdInput string `json:"std_input"`
Environment string `json:"environment"`
DeletedFlag int64 `json:"deleted_flag"` // 是否删除0-否1-是)
CreatedBy int64 `json:"created_by"` // 创建人
CreatedTime string `json:"created_time"` // 创建时间
UpdatedBy int64 `json:"updated_by"` // 更新人
UpdatedTime string `json:"updated_time"` // 更新时间
}
type CloudInfo struct {
Participant int64 `json:"participant,omitempty"`
Id int64 `json:"id,omitempty"`
TaskId int64 `json:"taskId,omitempty"`
ApiVersion string `json:"apiVersion,omitempty"`
Kind string `json:"kind,omitempty"`
Namespace string `json:"namespace,omitempty"`
Name string `json:"name,omitempty"`
Status string `json:"status,omitempty"`
StartTime string `json:"startTime,omitempty"`
RunningTime int64 `json:"runningTime,omitempty"`
Result string `json:"result,omitempty"`
YamlString string `json:"yamlString,omitempty"`
}
type AiInfo struct {
ParticipantId int64 `json:"participantId,omitempty"`
TaskId int64 `json:"taskId,omitempty"`
ProjectId string `json:"project_id,omitempty"`
Name string `json:"name,omitempty"`
Status string `json:"status,omitempty"`
StartTime string `json:"startTime,omitempty"`
RunningTime int64 `json:"runningTime,omitempty"`
Result string `json:"result,omitempty"`
JobId string `json:"jobId,omitempty"`
CreateTime string `json:"createTime,omitempty"`
ImageUrl string `json:"imageUrl,omitempty"`
Command string `json:"command,omitempty"`
FlavorId string `json:"flavorId,omitempty"`
SubscriptionId string `json:"subscriptionId,omitempty"`
ItemVersionId string `json:"itemVersionId,omitempty"`
}
type VmInfo struct {
ParticipantId int64 `json:"participantId,omitempty"`
TaskId int64 `json:"taskId,omitempty"`
Name string `json:"name,omitempty"`
FlavorRef string `json:"flavor_ref,omitempty"`
ImageRef string `json:"image_ref,omitempty"`
NetworkUuid string `json:"network_uuid,omitempty"`
BlockUuid string `json:"block_uuid,omitempty"`
SourceType string `json:"source_type,omitempty"`
DeleteOnTermination bool `json:"delete_on_termination,omitempty"`
State string `json:"state,omitempty"`
}
type PushTaskInfoReq struct {
AdapterId int64 `json:"adapterId"`
HpcInfoList []*HpcInfo `json:"hpcInfoList"`
CloudInfoList []*CloudInfo `json:"cloudInfoList"`
AiInfoList []*AiInfo `json:"aiInfoList"`
VmInfoList []*VmInfo `json:"vmInfoList"`
}
type PushTaskInfoResp struct {
Code int64 `json:"code"`
Msg string `json:"msg"`
}
type PushResourceInfoReq struct {
AdapterId int64 `json:"adapterId"`
}

View File

@ -9,7 +9,6 @@ import (
"cloud/pcm-cloud.api"
"storelink/pcm-storelink.api"
"schedule/pcm-schedule.api"
"participant/pcm-participant.api"
"monitoring/pcm-monitoring.api"
)
@ -111,14 +110,26 @@ service pcm {
@handler metricsHandler
get /core/metrics
@doc "provided to participant to pull task info from core"
@doc "provide for adapter to pull task info from core"
@handler pullTaskInfoHandler
get /core/pullTaskInfo (PullTaskInfoReq) returns (PullTaskInfoResp)
@doc "provided to participant to push task info to core"
@doc "provide for adapter to push task info to core"
@handler pushTaskInfoHandler
post /core/pushTaskInfo (PushTaskInfoReq) returns (PushTaskInfoResp)
@doc "provide for adapter to push resource info to core"
@handler pushResourceInfoHandler
post /core/pushResourceInfo (PushResourceInfoReq) returns (PushResourceInfoResp)
@doc "provide for adapter to push notice info to core"
@handler pushNoticeHandler
post /core/pushNotice (PushNoticeReq) returns (PushNoticeResp)
@doc "list notice"
@handler listNoticeHandler
get /core/listNotice (ListNoticeReq) returns (ListNoticeResp)
@doc "paging queries the task list"
@handler pageListTaskHandler
get /core/task/list (pageTaskReq) returns(PageResult)
@ -146,6 +157,10 @@ service pcm {
@handler jobHandler
get /hpc/job (hpcJobReq) returns (hpcJobResp)
@doc "超算资源总览"
@handler resourceHandler
get /hpc/resource (hpcResourceReq) returns (hpcResourceResp)
@doc "超算查询资产列表"
@handler queueAssetsHandler
get /hpc/queueAssets returns (QueueAssetsResp)
@ -895,13 +910,13 @@ service pcm {
get /schedule/ai/getTaskTypes returns (AiTaskTypesResp)
@handler ScheduleGetDatasetsHandler
get /schedule/ai/getDatasets returns (AiDatasetsResp)
get /schedule/ai/getDatasets/:adapterId (AiDatasetsReq) returns (AiDatasetsResp)
@handler ScheduleGetStrategyHandler
get /schedule/ai/getStrategies returns (AiStrategyResp)
@handler ScheduleGetAlgorithmsHandler
get /schedule/ai/getAlgorithms/:resourceType/:taskType/:dataset (AiAlgorithmsReq) returns (AiAlgorithmsResp)
get /schedule/ai/getAlgorithms/:adapterId/:resourceType/:taskType/:dataset (AiAlgorithmsReq) returns (AiAlgorithmsResp)
@handler ScheduleSubmitHandler
post /schedule/submit (ScheduleReq) returns (ScheduleResp)

View File

@ -19,13 +19,15 @@ type (
ScheduleResult {
ClusterId string `json:"clusterId"`
TaskId string `json:"taskId"`
Strategy string `json:"strategy"`
Replica int32 `json:"replica"`
Msg string `json:"msg"`
}
AiOption {
TaskName string `json:"taskName"`
AiClusterId string `json:"aiClusterId,optional"`
AdapterId string `json:"adapterId"`
AiClusterIds []string `json:"aiClusterIds"`
ResourceType string `json:"resourceType"`
Tops float64 `json:"Tops,optional"`
TaskType string `json:"taskType"`
@ -46,6 +48,10 @@ type (
TaskTypes []string `json:"taskTypes"`
}
AiDatasetsReq {
AdapterId string `path:"adapterId"`
}
AiDatasetsResp {
Datasets []string `json:"datasets"`
}
@ -55,6 +61,7 @@ type (
}
AiAlgorithmsReq {
AdapterId string `path:"adapterId"`
ResourceType string `path:"resourceType"`
TaskType string `path:"taskType"`
Dataset string `path:"dataset"`

View File

@ -0,0 +1,28 @@
package core
import (
clientCore "gitlink.org.cn/JointCloud/pcm-coordinator/api/client"
"net/http"
"github.com/zeromicro/go-zero/rest/httpx"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/logic/core"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
)
func ListNoticeHandler(svcCtx *svc.ServiceContext) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
var req clientCore.ListNoticeReq
if err := httpx.Parse(r, &req); err != nil {
httpx.ErrorCtx(r.Context(), w, err)
return
}
l := core.NewListNoticeLogic(r.Context(), svcCtx)
resp, err := l.ListNotice(&req)
if err != nil {
httpx.ErrorCtx(r.Context(), w, err)
} else {
httpx.OkJsonCtx(r.Context(), w, resp)
}
}
}

View File

@ -0,0 +1,28 @@
package core
import (
clientCore "gitlink.org.cn/JointCloud/pcm-coordinator/api/client"
"net/http"
"github.com/zeromicro/go-zero/rest/httpx"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/logic/core"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
)
func PushNoticeHandler(svcCtx *svc.ServiceContext) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
var req clientCore.PushNoticeReq
if err := httpx.Parse(r, &req); err != nil {
httpx.ErrorCtx(r.Context(), w, err)
return
}
l := core.NewPushNoticeLogic(r.Context(), svcCtx)
resp, err := l.PushNotice(&req)
if err != nil {
httpx.ErrorCtx(r.Context(), w, err)
} else {
httpx.OkJsonCtx(r.Context(), w, resp)
}
}
}

View File

@ -0,0 +1,28 @@
package core
import (
clientCore "gitlink.org.cn/JointCloud/pcm-coordinator/api/client"
"net/http"
"github.com/zeromicro/go-zero/rest/httpx"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/logic/core"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
)
func PushResourceInfoHandler(svcCtx *svc.ServiceContext) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
var req clientCore.PushResourceInfoReq
if err := httpx.Parse(r, &req); err != nil {
httpx.ErrorCtx(r.Context(), w, err)
return
}
l := core.NewPushResourceInfoLogic(r.Context(), svcCtx)
resp, err := l.PushResourceInfo(&req)
if err != nil {
httpx.ErrorCtx(r.Context(), w, err)
} else {
httpx.OkJsonCtx(r.Context(), w, resp)
}
}
}

View File

@ -0,0 +1,28 @@
package hpc
import (
"net/http"
"github.com/zeromicro/go-zero/rest/httpx"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/logic/hpc"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types"
)
func ResourceHandler(svcCtx *svc.ServiceContext) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
var req types.HpcResourceReq
if err := httpx.Parse(r, &req); err != nil {
httpx.ErrorCtx(r.Context(), w, err)
return
}
l := hpc.NewResourceLogic(r.Context(), svcCtx)
resp, err := l.Resource(&req)
if err != nil {
httpx.ErrorCtx(r.Context(), w, err)
} else {
httpx.OkJsonCtx(r.Context(), w, resp)
}
}
}

View File

@ -140,6 +140,21 @@ func RegisterHandlers(server *rest.Server, serverCtx *svc.ServiceContext) {
Path: "/core/pushTaskInfo",
Handler: core.PushTaskInfoHandler(serverCtx),
},
{
Method: http.MethodPost,
Path: "/core/pushResourceInfo",
Handler: core.PushResourceInfoHandler(serverCtx),
},
{
Method: http.MethodPost,
Path: "/core/pushNotice",
Handler: core.PushNoticeHandler(serverCtx),
},
{
Method: http.MethodGet,
Path: "/core/listNotice",
Handler: core.ListNoticeHandler(serverCtx),
},
{
Method: http.MethodGet,
Path: "/core/task/list",
@ -171,6 +186,11 @@ func RegisterHandlers(server *rest.Server, serverCtx *svc.ServiceContext) {
Path: "/hpc/job",
Handler: hpc.JobHandler(serverCtx),
},
{
Method: http.MethodGet,
Path: "/hpc/resource",
Handler: hpc.ResourceHandler(serverCtx),
},
{
Method: http.MethodGet,
Path: "/hpc/queueAssets",
@ -1107,7 +1127,7 @@ func RegisterHandlers(server *rest.Server, serverCtx *svc.ServiceContext) {
},
{
Method: http.MethodGet,
Path: "/schedule/ai/getDatasets",
Path: "/schedule/ai/getDatasets/:adapterId",
Handler: schedule.ScheduleGetDatasetsHandler(serverCtx),
},
{
@ -1117,7 +1137,7 @@ func RegisterHandlers(server *rest.Server, serverCtx *svc.ServiceContext) {
},
{
Method: http.MethodGet,
Path: "/schedule/ai/getAlgorithms/:resourceType/:taskType/:dataset",
Path: "/schedule/ai/getAlgorithms/:adapterId/:resourceType/:taskType/:dataset",
Handler: schedule.ScheduleGetAlgorithmsHandler(serverCtx),
},
{

View File

@ -1,16 +1,24 @@
package schedule
import (
"github.com/zeromicro/go-zero/rest/httpx"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/logic/schedule"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types"
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/repository/result"
"net/http"
)
func ScheduleGetDatasetsHandler(svcCtx *svc.ServiceContext) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
var req types.AiDatasetsReq
if err := httpx.Parse(r, &req); err != nil {
result.ParamErrorResult(r, w, err)
return
}
l := schedule.NewScheduleGetDatasetsLogic(r.Context(), svcCtx)
resp, err := l.ScheduleGetDatasets()
resp, err := l.ScheduleGetDatasets(&req)
result.HttpResult(r, w, resp, err)
}
}

View File

@ -2,13 +2,12 @@ package core
import (
"context"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/mqs"
"fmt"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/pkg/response"
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/constants"
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/models"
tool "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/utils"
"math/rand"
"time"
"github.com/zeromicro/go-zero/core/logx"
@ -35,7 +34,6 @@ func (l *CommitVmTaskLogic) CommitVmTask(req *types.CommitVmTaskReq) (resp *type
Status: constants.Saved,
Name: req.Name,
CommitTime: time.Now(),
NsID: req.NsID,
}
// Save task data to database
tx := l.svcCtx.DbEngin.Create(&taskModel)
@ -43,28 +41,38 @@ func (l *CommitVmTaskLogic) CommitVmTask(req *types.CommitVmTaskReq) (resp *type
return nil, tx.Error
}
var clusterIds []int64
l.svcCtx.DbEngin.Raw("SELECT id FROM `t_cluster` where adapter_id = ? and label = ?", req.AdapterId, req.ClusterType).Scan(&clusterIds)
for _, CreateMulServer := range req.CreateMulServer {
fmt.Println("", req.CreateMulServer)
var clusterIds []int64
l.svcCtx.DbEngin.Raw("SELECT id FROM `t_cluster` where adapter_id = ? and label = ?", req.AdapterId, req.ClusterType).Scan(&clusterIds)
if len(clusterIds) == 0 || clusterIds == nil {
return nil, nil
if len(clusterIds) == 0 || clusterIds == nil {
return nil, nil
}
vmInfo := models.TaskVm{
TaskId: taskModel.Id,
ClusterId: clusterIds[rand.Intn(len(clusterIds))],
Name: taskModel.Name,
Status: "Saved",
StartTime: time.Now().String(),
MinCount: CreateMulServer.Min_count,
ImageRef: CreateMulServer.ImageRef,
FlavorRef: CreateMulServer.FlavorRef,
Uuid: CreateMulServer.Uuid,
Platform: CreateMulServer.Platform,
}
tx = l.svcCtx.DbEngin.Create(&vmInfo)
if tx.Error != nil {
return nil, tx.Error
}
resp = &types.CommitVmTaskResp{
Code: 200,
Msg: "success",
TaskId: taskModel.Id,
}
}
vm := models.Vm{}
tool.Convert(req, &vm)
mqInfo := response.TaskInfo{
TaskId: taskModel.Id,
TaskType: "vm",
MatchLabels: req.MatchLabels,
NsID: req.NsID,
}
//req.TaskId = taskModel.Id
mqs.InsQueue.Beta.Add(&mqInfo)
tx = l.svcCtx.DbEngin.Create(&mqInfo)
resp = &types.CommitVmTaskResp{
Code: 200,
Msg: "success",
TaskId: taskModel.Id,
}
return resp, nil
}

View File

@ -0,0 +1,36 @@
package core
import (
"context"
"github.com/zeromicro/go-zero/core/logx"
clientCore "gitlink.org.cn/JointCloud/pcm-coordinator/api/client"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
)
type ListNoticeLogic struct {
logx.Logger
ctx context.Context
svcCtx *svc.ServiceContext
}
func NewListNoticeLogic(ctx context.Context, svcCtx *svc.ServiceContext) *ListNoticeLogic {
return &ListNoticeLogic{
Logger: logx.WithContext(ctx),
ctx: ctx,
svcCtx: svcCtx,
}
}
func (l *ListNoticeLogic) ListNotice(req *clientCore.ListNoticeReq) (*clientCore.ListNoticeResp, error) {
var notices []clientCore.NoticeInfo
var resp clientCore.ListNoticeResp
l.svcCtx.DbEngin.Raw("select * from t_notice order by created_time desc").Scan(&notices)
for _, notice := range notices {
resp.Data = append(resp.Data, notice)
}
resp.Code = 200
resp.Msg = "success"
return &resp, nil
}

View File

@ -67,6 +67,13 @@ func (l *PullTaskInfoLogic) PullTaskInfo(req *clientCore.PullTaskInfoReq) (*clie
return nil, err
}
utils.Convert(aiModelList, &resp.AiInfoList)
case 3:
var vmModelList []models.TaskVm
err := findModelList(req.AdapterId, l.svcCtx.DbEngin, &vmModelList)
if err != nil {
return nil, err
}
utils.Convert(vmModelList, &resp.VmInfoList)
}
return &resp, nil
}

View File

@ -0,0 +1,31 @@
package core
import (
"context"
"github.com/zeromicro/go-zero/core/logx"
clientCore "gitlink.org.cn/JointCloud/pcm-coordinator/api/client"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
)
type PushNoticeLogic struct {
logx.Logger
ctx context.Context
svcCtx *svc.ServiceContext
}
func NewPushNoticeLogic(ctx context.Context, svcCtx *svc.ServiceContext) *PushNoticeLogic {
return &PushNoticeLogic{
Logger: logx.WithContext(ctx),
ctx: ctx,
svcCtx: svcCtx,
}
}
func (l *PushNoticeLogic) PushNotice(req *clientCore.PushNoticeReq) (resp *clientCore.PushNoticeResp, err error) {
result := l.svcCtx.DbEngin.Table("t_notice").Create(&req.NoticeInfo)
if result.Error != nil {
return nil, result.Error
}
return
}

View File

@ -0,0 +1,28 @@
package core
import (
"context"
clientCore "gitlink.org.cn/JointCloud/pcm-coordinator/api/client"
"github.com/zeromicro/go-zero/core/logx"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
)
type PushResourceInfoLogic struct {
logx.Logger
ctx context.Context
svcCtx *svc.ServiceContext
}
func NewPushResourceInfoLogic(ctx context.Context, svcCtx *svc.ServiceContext) *PushResourceInfoLogic {
return &PushResourceInfoLogic{
Logger: logx.WithContext(ctx),
ctx: ctx,
svcCtx: svcCtx,
}
}
func (l *PushResourceInfoLogic) PushResourceInfo(req *clientCore.PushResourceInfoReq) (resp *clientCore.PushResourceInfoResp, err error) {
return
}

View File

@ -49,6 +49,12 @@ func (l *PushTaskInfoLogic) PushTaskInfo(req *clientCore.PushTaskInfoReq) (*clie
aiInfo.Status, aiInfo.StartTime, aiInfo.ProjectId, aiInfo.JobId, req.AdapterId, aiInfo.TaskId, aiInfo.Name)
syncTask(l.svcCtx.DbEngin, aiInfo.TaskId)
}
case 3:
for _, vmInfo := range req.VmInfoList {
l.svcCtx.DbEngin.Exec("update task_vm set status = ?,start_time = ? where participant_id = ? and task_id = ? and name = ?",
vmInfo.Status, vmInfo.StartTime, req.AdapterId, vmInfo.TaskId, vmInfo.Name)
syncTask(l.svcCtx.DbEngin, vmInfo.TaskId)
}
}
return &resp, nil

View File

@ -0,0 +1,48 @@
package hpc
import (
"context"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types"
"github.com/zeromicro/go-zero/core/logx"
)
type ResourceLogic struct {
logx.Logger
ctx context.Context
svcCtx *svc.ServiceContext
}
func NewResourceLogic(ctx context.Context, svcCtx *svc.ServiceContext) *ResourceLogic {
return &ResourceLogic{
Logger: logx.WithContext(ctx),
ctx: ctx,
svcCtx: svcCtx,
}
}
func (l *ResourceLogic) Resource(req *types.HpcResourceReq) (resp *types.HpcResourceResp, err error) {
l.svcCtx.DbEngin.Raw("SELECT th.NAME as job_name,t.description as job_desc,t.commit_time as submit_time,th.STATUS as job_status,ta.name as adapter_name,tc.name as cluster_name,tc.label as cluster_type FROM task_hpc th LEFT JOIN task t ON t.id = th.task_id JOIN t_cluster tc on th.cluster_id = tc.id JOIN t_adapter ta on tc.adapter_id = ta.id")
hpcResource := types.HPCResource{
GPUCardsTotal: 0,
CPUCoresTotal: 0,
RAMTotal: 0,
GPUCardsUsed: 0,
CPUCoresUsed: 0,
RAMUsed: 0,
GPURate: 0,
CPURate: 0,
RAMRate: 0,
}
resp = &types.HpcResourceResp{
Code: 200,
Msg: "success",
HPCResource: hpcResource,
}
return resp, nil
}

View File

@ -26,7 +26,7 @@ func NewScheduleGetAlgorithmsLogic(ctx context.Context, svcCtx *svc.ServiceConte
func (l *ScheduleGetAlgorithmsLogic) ScheduleGetAlgorithms(req *types.AiAlgorithmsReq) (resp *types.AiAlgorithmsResp, err error) {
resp = &types.AiAlgorithmsResp{}
algorithms, err := storeLink.GetAlgorithms(l.ctx, l.svcCtx.Scheduler.ResourceCollector, req.ResourceType, req.TaskType, req.Dataset)
algorithms, err := storeLink.GetAlgorithms(l.ctx, l.svcCtx.Scheduler.AiService.AiCollectorAdapterMap[req.AdapterId], req.ResourceType, req.TaskType, req.Dataset)
if err != nil {
return nil, err
}

View File

@ -3,6 +3,7 @@ package schedule
import (
"context"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/storeLink"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types"
@ -23,9 +24,9 @@ func NewScheduleGetDatasetsLogic(ctx context.Context, svcCtx *svc.ServiceContext
}
}
func (l *ScheduleGetDatasetsLogic) ScheduleGetDatasets() (resp *types.AiDatasetsResp, err error) {
func (l *ScheduleGetDatasetsLogic) ScheduleGetDatasets(req *types.AiDatasetsReq) (resp *types.AiDatasetsResp, err error) {
resp = &types.AiDatasetsResp{}
names, err := storeLink.GetDatasetsNames(l.ctx, l.svcCtx.Scheduler.ResourceCollector)
names, err := storeLink.GetDatasetsNames(l.ctx, l.svcCtx.Scheduler.AiService.AiCollectorAdapterMap[req.AdapterId])
if err != nil {
return nil, err
}

View File

@ -27,6 +27,7 @@ func NewScheduleSubmitLogic(ctx context.Context, svcCtx *svc.ServiceContext) *Sc
func (l *ScheduleSubmitLogic) ScheduleSubmit(req *types.ScheduleReq) (resp *types.ScheduleResp, err error) {
resp = &types.ScheduleResp{}
opt := &option.AiOption{
AdapterId: req.AiOption.AdapterId,
ResourceType: req.AiOption.ResourceType,
Tops: req.AiOption.Tops,
TaskType: req.AiOption.TaskType,
@ -55,6 +56,7 @@ func (l *ScheduleSubmitLogic) ScheduleSubmit(req *types.ScheduleReq) (resp *type
scheResult := &types.ScheduleResult{}
scheResult.ClusterId = r.ClusterId
scheResult.TaskId = r.TaskId
scheResult.Strategy = r.Strategy
scheResult.Replica = r.Replica
scheResult.Msg = r.Msg
resp.Results = append(resp.Results, scheResult)

View File

@ -5,9 +5,8 @@ import (
)
type Weight struct {
Id int64
Id string
Weight int32
Name string
Replica int32
}

View File

@ -33,6 +33,21 @@ func (s *AiStorage) GetClustersByAdapterId(id string) (*types.ClusterListResp, e
return &resp, nil
}
func (s *AiStorage) GetAdapterIdsByType(adapterType string) ([]string, error) {
var list []types.AdapterInfo
var ids []string
db := s.DbEngin.Model(&types.AdapterInfo{}).Table("t_adapter")
db = db.Where("type = ?", adapterType)
err := db.Order("create_time desc").Find(&list).Error
if err != nil {
return nil, err
}
for _, info := range list {
ids = append(ids, info.Id)
}
return ids, nil
}
func (s *AiStorage) SaveTask(name string) error {
// 构建主任务结构体
taskModel := models.Task{

View File

@ -20,8 +20,7 @@ import (
"github.com/zeromicro/go-zero/core/logx"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/common"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/database"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/service/collector"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/service/executor"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/service"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/strategy"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/pkg/response"
"gitlink.org.cn/JointCloud/pcm-coordinator/rpc/client/participantservice"
@ -32,16 +31,15 @@ import (
)
type Scheduler struct {
task *response.TaskInfo
participantIds []int64
subSchedule SubSchedule
dbEngin *gorm.DB
result []string //pID:子任务yamlstring 键值对
participantRpc participantservice.ParticipantService
ResourceCollector *map[string]collector.AiCollector
AiStorages *database.AiStorage
AiExecutor *map[string]executor.AiExecutor
mu sync.RWMutex
task *response.TaskInfo
participantIds []int64
subSchedule SubSchedule
dbEngin *gorm.DB
result []string //pID:子任务yamlstring 键值对
participantRpc participantservice.ParticipantService
AiStorages *database.AiStorage
AiService *service.AiService
mu sync.RWMutex
}
type SubSchedule interface {
@ -59,8 +57,8 @@ func NewScheduler(subSchedule SubSchedule, val string, dbEngin *gorm.DB, partici
return &Scheduler{task: task, subSchedule: subSchedule, dbEngin: dbEngin, participantRpc: participantRpc}, nil
}
func NewSchdlr(resourceCollector *map[string]collector.AiCollector, storages *database.AiStorage, aiExecutor *map[string]executor.AiExecutor) *Scheduler {
return &Scheduler{ResourceCollector: resourceCollector, AiStorages: storages, AiExecutor: aiExecutor}
func NewSchdlr(aiService *service.AiService, storages *database.AiStorage) *Scheduler {
return &Scheduler{AiService: aiService, AiStorages: storages}
}
func (s *Scheduler) SpecifyClusters() {

View File

@ -18,6 +18,7 @@ import (
"context"
"encoding/json"
"errors"
"fmt"
"gitlink.org.cn/JointCloud/pcm-ac/hpcAC"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/schedulers/option"
@ -28,7 +29,6 @@ import (
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/models"
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/utils"
"gitlink.org.cn/JointCloud/pcm-octopus/octopus"
"strconv"
"sync"
)
@ -43,6 +43,7 @@ type AiScheduler struct {
type AiResult struct {
TaskId string
ClusterId string
Strategy string
Replica int32
Msg string
}
@ -63,9 +64,8 @@ func (as *AiScheduler) GetNewStructForDb(task *response.TaskInfo, resource strin
}
func (as *AiScheduler) PickOptimalStrategy() (strategy.Strategy, error) {
if as.option.AiClusterId != "" {
// TODO database operation Find
return &strategy.SingleAssignment{Cluster: &strategy.AssignedCluster{ParticipantId: 0, Name: "", Replicas: 1}}, nil
if len(as.option.ClusterIds) == 1 {
return &strategy.SingleAssignment{Cluster: &strategy.AssignedCluster{ClusterId: as.option.ClusterIds[0], Replicas: 1}}, nil
}
resources, err := as.findClustersWithResources()
@ -79,8 +79,7 @@ func (as *AiScheduler) PickOptimalStrategy() (strategy.Strategy, error) {
if len(resources) == 1 {
var cluster strategy.AssignedCluster
cluster.ParticipantId = resources[0].ParticipantId
cluster.Name = resources[0].Name
cluster.ClusterId = resources[0].ClusterId
cluster.Replicas = 1
return &strategy.SingleAssignment{Cluster: &cluster}, nil
}
@ -89,7 +88,11 @@ func (as *AiScheduler) PickOptimalStrategy() (strategy.Strategy, error) {
switch as.option.StrategyName {
case strategy.REPLICATION:
strategy := strategy.NewReplicationStrategy(&param.ReplicationParams{Params: params, Replicas: 1})
var clusterIds []string
for _, resource := range resources {
clusterIds = append(clusterIds, resource.ClusterId)
}
strategy := strategy.NewReplicationStrategy(clusterIds, 1)
return strategy, nil
case strategy.RESOURCES_PRICING:
strategy := strategy.NewPricingStrategy(&param.ResourcePricingParams{Params: params, Replicas: 1})
@ -111,32 +114,47 @@ func (as *AiScheduler) AssignTask(clusters []*strategy.AssignedCluster) (interfa
return nil, errors.New("clusters is nil")
}
for i := len(clusters) - 1; i >= 0; i-- {
if clusters[i].Replicas == 0 {
clusters = append(clusters[:i], clusters[i+1:]...)
}
}
if len(clusters) == 0 {
return nil, errors.New("clusters is nil")
}
var wg sync.WaitGroup
var results []*AiResult
var errs []error
var errs []interface{}
var ch = make(chan *AiResult, len(clusters))
var errCh = make(chan error, len(clusters))
var errCh = make(chan interface{}, len(clusters))
executorMap := *as.AiExecutor
executorMap := as.AiService.AiExecutorAdapterMap[as.option.AdapterId]
for _, cluster := range clusters {
c := cluster
if cluster.Replicas == 0 {
continue
}
wg.Add(1)
go func() {
opt, _ := cloneAiOption(as.option)
resp, err := executorMap[c.Name].Execute(as.ctx, opt)
resp, err := executorMap[c.ClusterId].Execute(as.ctx, opt)
if err != nil {
errCh <- err
e := struct {
err error
clusterId string
}{
err: err,
clusterId: c.ClusterId,
}
errCh <- e
wg.Done()
return
}
result, _ := convertType(resp)
result.Replica = c.Replicas
result.ClusterId = strconv.FormatInt(c.ParticipantId, 10)
result.ClusterId = c.ClusterId
result.Strategy = as.option.StrategyName
ch <- result
wg.Done()
@ -150,10 +168,29 @@ func (as *AiScheduler) AssignTask(clusters []*strategy.AssignedCluster) (interfa
errs = append(errs, e)
}
if len(errs) != 0 {
if len(errs) == len(clusters) {
return nil, errors.New("submit task failed")
}
if len(errs) != 0 {
var msg string
for _, err := range errs {
e := (err).(struct {
err error
clusterId string
})
msg += fmt.Sprintf("clusterId: %v , error: %v \n", e.clusterId, e.err.Error())
}
for s := range ch {
if s.Msg != "" {
msg += fmt.Sprintf("clusterId: %v , error: %v \n", s.ClusterId, s.Msg)
} else {
msg += fmt.Sprintf("clusterId: %v , submitted successfully, taskId: %v \n", s.ClusterId, s.TaskId)
}
}
return nil, errors.New(msg)
}
for s := range ch {
// TODO: database operation
results = append(results, s)
@ -164,19 +201,28 @@ func (as *AiScheduler) AssignTask(clusters []*strategy.AssignedCluster) (interfa
func (as *AiScheduler) findClustersWithResources() ([]*collector.ResourceStats, error) {
var wg sync.WaitGroup
var ch = make(chan *collector.ResourceStats, len(*as.ResourceCollector))
var errCh = make(chan error, len(*as.ResourceCollector))
var clustersNum = len(as.AiService.AiCollectorAdapterMap[as.option.AdapterId])
var ch = make(chan *collector.ResourceStats, clustersNum)
var errCh = make(chan interface{}, clustersNum)
var resourceSpecs []*collector.ResourceStats
var errs []error
var errs []interface{}
for _, resourceCollector := range *as.ResourceCollector {
for s, resourceCollector := range as.AiService.AiCollectorAdapterMap[as.option.AdapterId] {
wg.Add(1)
rc := resourceCollector
id := s
go func() {
spec, err := rc.GetResourceStats(as.ctx)
if err != nil {
errCh <- err
e := struct {
err error
clusterId string
}{
err: err,
clusterId: id,
}
errCh <- e
wg.Done()
return
}
@ -196,13 +242,22 @@ func (as *AiScheduler) findClustersWithResources() ([]*collector.ResourceStats,
errs = append(errs, e)
}
if len(errs) != 0 {
if len(errs) == clustersNum {
return nil, errors.New("get resources failed")
}
if len(resourceSpecs) == 0 {
return nil, errors.New("no resource found")
if len(errs) != 0 {
var msg string
for _, err := range errs {
e := (err).(struct {
err error
clusterId string
})
msg += fmt.Sprintf("clusterId: %v , error: %v \n", e.clusterId, e.err.Error())
}
return nil, errors.New(msg)
}
return resourceSpecs, nil
}

View File

@ -1,7 +1,8 @@
package option
type AiOption struct {
AiClusterId string // shuguangAi /octopus ClusterId
AdapterId string
ClusterIds []string
TaskName string
ResourceType string // cpu/gpu/compute card
CpuCoreNum int64

View File

@ -1,11 +1,14 @@
package service
import (
"github.com/zeromicro/go-zero/zrpc"
"gitlink.org.cn/JointCloud/pcm-ac/hpcacclient"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/config"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/database"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/service/collector"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/service/executor"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/storeLink"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types"
"gitlink.org.cn/JointCloud/pcm-octopus/octopusclient"
"gitlink.org.cn/jcce-pcm/pcm-participant-modelarts/client/imagesservice"
"gitlink.org.cn/jcce-pcm/pcm-participant-modelarts/client/modelartsservice"
@ -18,30 +21,60 @@ const (
SHUGUANGAI = "shuguangAi"
)
func InitAiClusterMap(octopusRpc octopusclient.Octopus, modelArtsRpc modelartsservice.ModelArtsService, modelArtsImgRpc imagesservice.ImagesService, aCRpc hpcacclient.HpcAC, storages *database.AiStorage) (*map[string]executor.AiExecutor, *map[string]collector.AiCollector) {
clusters, _ := storages.GetClustersByAdapterId("1777144940459986944")
type AiService struct {
AiExecutorAdapterMap map[string]map[string]executor.AiExecutor
AiCollectorAdapterMap map[string]map[string]collector.AiCollector
}
func NewAiService(conf *config.Config, storages *database.AiStorage) (*AiService, error) {
var aiType = "1"
adapterIds, err := storages.GetAdapterIdsByType(aiType)
if err != nil {
return nil, err
}
aiService := &AiService{
AiExecutorAdapterMap: make(map[string]map[string]executor.AiExecutor),
AiCollectorAdapterMap: make(map[string]map[string]collector.AiCollector),
}
for _, id := range adapterIds {
clusters, err := storages.GetClustersByAdapterId(id)
if err != nil {
return nil, err
}
exeClusterMap, colClusterMap := InitAiClusterMap(conf, clusters.List)
aiService.AiExecutorAdapterMap[id] = exeClusterMap
aiService.AiCollectorAdapterMap[id] = colClusterMap
}
return aiService, nil
}
func InitAiClusterMap(conf *config.Config, clusters []types.ClusterInfo) (map[string]executor.AiExecutor, map[string]collector.AiCollector) {
executorMap := make(map[string]executor.AiExecutor)
collectorMap := make(map[string]collector.AiCollector)
for _, c := range clusters.List {
for _, c := range clusters {
switch c.Name {
case OCTOPUS:
id, _ := strconv.ParseInt(c.Id, 10, 64)
octopusRpc := octopusclient.NewOctopus(zrpc.MustNewClient(conf.OctopusRpcConf))
octopus := storeLink.NewOctopusLink(octopusRpc, c.Nickname, id)
collectorMap[c.Nickname] = octopus
executorMap[c.Nickname] = octopus
collectorMap[c.Id] = octopus
executorMap[c.Id] = octopus
case MODELARTS:
id, _ := strconv.ParseInt(c.Id, 10, 64)
modelArtsRpc := modelartsservice.NewModelArtsService(zrpc.MustNewClient(conf.ModelArtsRpcConf))
modelArtsImgRpc := imagesservice.NewImagesService(zrpc.MustNewClient(conf.ModelArtsImgRpcConf))
modelarts := storeLink.NewModelArtsLink(modelArtsRpc, modelArtsImgRpc, c.Nickname, id)
collectorMap[c.Nickname] = modelarts
executorMap[c.Nickname] = modelarts
collectorMap[c.Id] = modelarts
executorMap[c.Id] = modelarts
case SHUGUANGAI:
id, _ := strconv.ParseInt(c.Id, 10, 64)
aCRpc := hpcacclient.NewHpcAC(zrpc.MustNewClient(conf.ACRpcConf))
sgai := storeLink.NewShuguangAi(aCRpc, c.Nickname, id)
collectorMap[c.Nickname] = sgai
executorMap[c.Nickname] = sgai
collectorMap[c.Id] = sgai
executorMap[c.Id] = sgai
}
}
return &executorMap, &collectorMap
return executorMap, collectorMap
}

View File

@ -9,18 +9,18 @@ type AiCollector interface {
}
type ResourceStats struct {
ParticipantId int64
Name string
CpuCoreAvail int64
CpuCoreTotal int64
MemAvail float64
MemTotal float64
DiskAvail float64
DiskTotal float64
GpuAvail int64
CardsAvail []*Card
CpuCoreHours float64
Balance float64
ClusterId string
Name string
CpuCoreAvail int64
CpuCoreTotal int64
MemAvail float64
MemTotal float64
DiskAvail float64
DiskTotal float64
GpuAvail int64
CardsAvail []*Card
CpuCoreHours float64
Balance float64
}
type Card struct {

View File

@ -33,15 +33,14 @@ func (ps *DynamicResourcesStrategy) Schedule() ([]*AssignedCluster, error) {
for _, res := range ps.resources {
if opt.ResourceType == "cpu" {
if res.CpuCoreHours <= 0 {
cluster := &AssignedCluster{ParticipantId: res.ParticipantId, Name: res.Name, Replicas: ps.replicas}
cluster := &AssignedCluster{ClusterId: res.ClusterId, Replicas: ps.replicas}
results = append(results, cluster)
return results, nil
}
if res.CpuCoreHours > maxCpuCoreHoursAvailable {
maxCpuCoreHoursAvailable = res.CpuCoreHours
assignedCluster.Name = res.Name
assignedCluster.ParticipantId = res.ParticipantId
assignedCluster.ClusterId = res.ClusterId
assignedCluster.Replicas = ps.replicas
}
}
@ -56,8 +55,7 @@ func (ps *DynamicResourcesStrategy) Schedule() ([]*AssignedCluster, error) {
}
if maxCurrentCardHours > maxCardHoursAvailable {
maxCardHoursAvailable = maxCurrentCardHours
assignedCluster.Name = res.Name
assignedCluster.ParticipantId = res.ParticipantId
assignedCluster.ClusterId = res.ClusterId
assignedCluster.Replicas = ps.replicas
}
}

View File

@ -1,23 +0,0 @@
package param
import "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/entity"
type ReplicationParams struct {
Replicas int32
*Params
}
func (r *ReplicationParams) GetReplicas() int32 {
return r.Replicas
}
func (r *ReplicationParams) GetParticipants() []*entity.Participant {
var participants []*entity.Participant
for _, resource := range r.Resources {
participants = append(participants, &entity.Participant{
Participant_id: resource.ParticipantId,
Name: resource.Name,
})
}
return participants
}

View File

@ -2,6 +2,7 @@ package param
import (
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/algorithm/providerPricing"
"strconv"
)
type ResourcePricingParams struct {
@ -21,8 +22,9 @@ func (r *ResourcePricingParams) GetTask() *providerPricing.Task {
func (r *ResourcePricingParams) GetProviders() []*providerPricing.Provider {
var providerList []*providerPricing.Provider
for _, resource := range r.Resources {
id, _ := strconv.ParseInt(resource.ClusterId, 10, 64)
provider := providerPricing.NewProvider(
resource.ParticipantId,
id,
float64(resource.CpuCoreAvail),
resource.MemAvail,
resource.DiskAvail, 0.0, 0.0, 0.0)

View File

@ -2,33 +2,31 @@ package strategy
import (
"errors"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/entity"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/strategy/param"
)
type ReplicationStrategy struct {
replicas int32
participants []*entity.Participant
replicas int32
clusterIds []string
}
func NewReplicationStrategy(params *param.ReplicationParams) *ReplicationStrategy {
return &ReplicationStrategy{replicas: params.GetReplicas(),
participants: params.GetParticipants(),
func NewReplicationStrategy(clusterIds []string, replicas int32) *ReplicationStrategy {
return &ReplicationStrategy{clusterIds: clusterIds,
replicas: replicas,
}
}
func (ps *ReplicationStrategy) Schedule() ([]*AssignedCluster, error) {
if ps.replicas < 1 {
func (r *ReplicationStrategy) Schedule() ([]*AssignedCluster, error) {
if r.replicas < 1 {
return nil, errors.New("replicas must be greater than 0")
}
if ps.participants == nil {
return nil, errors.New("participantId must be set")
if len(r.clusterIds) == 0 {
return nil, errors.New("clusterIds must be set")
}
var results []*AssignedCluster
for _, p := range ps.participants {
cluster := &AssignedCluster{ParticipantId: p.Participant_id, Name: p.Name, Replicas: ps.replicas}
for _, c := range r.clusterIds {
cluster := &AssignedCluster{ClusterId: c, Replicas: r.replicas}
results = append(results, cluster)
}
return results, nil

View File

@ -18,6 +18,7 @@ import (
"errors"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/algorithm/providerPricing"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/strategy/param"
"strconv"
)
type PricingStrategy struct {
@ -154,7 +155,7 @@ func (ps *PricingStrategy) Schedule() ([]*AssignedCluster, error) {
if e == 0 {
continue
}
cluster := &AssignedCluster{ParticipantId: ps.ProviderList[i].Pid, Replicas: int32(e)}
cluster := &AssignedCluster{ClusterId: strconv.FormatInt(ps.ProviderList[i].Pid, 10), Replicas: int32(e)}
results = append(results, cluster)
}

View File

@ -29,7 +29,7 @@ func (s *StaticWeightStrategy) Schedule() ([]*AssignedCluster, error) {
weights := make([]*weightDistributing.Weight, 0)
for k, v := range s.staticWeightMap {
weight := &weightDistributing.Weight{
Name: k,
Id: k,
Weight: v,
}
weights = append(weights, weight)
@ -39,7 +39,7 @@ func (s *StaticWeightStrategy) Schedule() ([]*AssignedCluster, error) {
var results []*AssignedCluster
for _, weight := range weights {
cluster := &AssignedCluster{ParticipantId: weight.Id, Name: weight.Name, Replicas: weight.Replica}
cluster := &AssignedCluster{ClusterId: weight.Id, Replicas: weight.Replica}
results = append(results, cluster)
}

View File

@ -18,9 +18,8 @@ type Strategy interface {
}
type AssignedCluster struct {
ParticipantId int64
Name string
Replicas int32
ClusterId string
Replicas int32
}
func GetStrategyNames() []string {

View File

@ -5,7 +5,6 @@ import (
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/entity"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/service/collector"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/strategy"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/strategy/param"
"testing"
)
@ -17,15 +16,15 @@ func TestReplication(t *testing.T) {
}
rsc := []*collector.ResourceStats{
{
ParticipantId: 1,
Name: "test1",
ClusterId: "1",
Name: "test1",
},
{
ParticipantId: 1,
Name: "test2"},
ClusterId: "2",
Name: "test2"},
{
ParticipantId: 1,
Name: "test3"},
ClusterId: "3",
Name: "test3"},
}
tests := []struct {
name string
@ -47,8 +46,11 @@ func TestReplication(t *testing.T) {
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
params := &param.Params{Resources: rsc}
repl := strategy.NewReplicationStrategy(&param.ReplicationParams{Params: params, Replicas: tt.replica})
var clusterIds []string
for _, stats := range rsc {
clusterIds = append(clusterIds, stats.ClusterId)
}
repl := strategy.NewReplicationStrategy(clusterIds, 0)
schedule, err := repl.Schedule()
if err != nil {
return

View File

@ -283,11 +283,11 @@ func (o *OctopusLink) GetResourceStats(ctx context.Context) (*collector.Resource
}
resourceStats := &collector.ResourceStats{
ParticipantId: o.participantId,
Name: o.platform,
Balance: balance,
CardsAvail: cards,
CpuCoreHours: cpuHours,
ClusterId: strconv.FormatInt(o.participantId, 10),
Name: o.platform,
Balance: balance,
CardsAvail: cards,
CpuCoreHours: cpuHours,
}
return resourceStats, nil

View File

@ -26,6 +26,8 @@ import (
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/utils"
"strconv"
"strings"
"sync"
"time"
)
const (
@ -266,96 +268,144 @@ func (s *ShuguangAi) QuerySpecs(ctx context.Context) (interface{}, error) {
}
func (s *ShuguangAi) GetResourceStats(ctx context.Context) (*collector.ResourceStats, error) {
//balance
userReq := &hpcAC.GetUserInfoReq{}
userinfo, err := s.aCRpc.GetUserInfo(ctx, userReq)
if err != nil {
return nil, err
}
balance, _ := strconv.ParseFloat(userinfo.Data.AccountBalance, 64)
var wg sync.WaitGroup
wg.Add(4)
var cBalance = make(chan float64)
var cMemTotal = make(chan float64)
var cTotalCpu = make(chan int64)
//resource limit
limitReq := &hpcAC.QueueReq{}
limitResp, err := s.aCRpc.QueryUserQuotasLimit(ctx, limitReq)
if err != nil {
return nil, err
resourceStats := &collector.ResourceStats{
ClusterId: strconv.FormatInt(s.participantId, 10),
Name: s.platform,
}
totalCpu := limitResp.Data.AccountMaxCpu
totalDcu := limitResp.Data.AccountMaxDcu
//disk
//diskReq := &hpcAC.ParaStorQuotaReq{}
//diskResp, err := s.aCRpc.ParaStorQuota(ctx, diskReq)
//if err != nil {
// return nil, err
//}
//
//totalDisk := common.RoundFloat(diskResp.Data[0].Threshold*KB*KB*KB, 3)
//availDisk := common.RoundFloat((diskResp.Data[0].Threshold-diskResp.Data[0].Usage)*KB*KB*KB, 3)
//memory
nodeResp, err := s.aCRpc.GetNodeResources(ctx, nil)
if err != nil {
return nil, err
}
memSize := common.RoundFloat(float64(nodeResp.Data.MemorySize)*KB*KB, 3) // MB to BYTES
//resources being occupied
memberJobResp, err := s.aCRpc.GetMemberJobs(ctx, nil)
if err != nil {
return nil, err
}
var CpuCoreAvail int64
var MemAvail float64
if len(memberJobResp.Data) != 0 {
CpuCoreAvail = totalCpu
MemAvail = memSize
} else {
var cpuCoreUsed int64
var memUsed float64
for _, datum := range memberJobResp.Data {
cpuCoreUsed += datum.CpuCore
}
memUsed = float64(cpuCoreUsed * 2 * KB * KB * KB) // 2 GB per cpu core
if cpuCoreUsed > totalCpu {
CpuCoreAvail = 0
} else {
CpuCoreAvail = totalCpu - cpuCoreUsed
}
if memUsed > memSize {
MemAvail = 0
} else {
MemAvail = memSize - memUsed
}
}
//usable hours
var cards []*collector.Card
cardHours := common.RoundFloat(balance/DCUPRICEPERHOUR, 3)
cpuHours := common.RoundFloat(balance/CPUCOREPRICEPERHOUR, 3)
dcu := &collector.Card{
Platform: SHUGUANGAI,
Type: CARD,
Name: DCU,
TOpsAtFp16: DCU_TOPS,
CardHours: cardHours,
CardNum: int32(totalDcu),
}
//balance
go func() {
userReq := &hpcAC.GetUserInfoReq{}
userinfo, err := s.aCRpc.GetUserInfo(ctx, userReq)
if err != nil {
return
}
balance, _ := strconv.ParseFloat(userinfo.Data.AccountBalance, 64)
resourceStats.Balance = balance
cBalance <- balance
}()
//resource limit
go func() {
limitReq := &hpcAC.QueueReq{}
limitResp, err := s.aCRpc.QueryUserQuotasLimit(ctx, limitReq)
if err != nil {
wg.Done()
return
}
totalCpu := limitResp.Data.AccountMaxCpu
totalDcu := limitResp.Data.AccountMaxDcu
dcu.CardNum = int32(totalDcu)
resourceStats.CpuCoreTotal = totalCpu
cTotalCpu <- totalCpu
wg.Done()
}()
//disk
go func() {
diskReq := &hpcAC.ParaStorQuotaReq{}
diskResp, err := s.aCRpc.ParaStorQuota(ctx, diskReq)
if err != nil {
wg.Done()
return
}
totalDisk := common.RoundFloat(diskResp.Data[0].Threshold*KB*KB*KB, 3)
availDisk := common.RoundFloat((diskResp.Data[0].Threshold-diskResp.Data[0].Usage)*KB*KB*KB, 3)
resourceStats.DiskTotal = totalDisk
resourceStats.DiskAvail = availDisk
wg.Done()
}()
//memory
go func() {
nodeResp, err := s.aCRpc.GetNodeResources(ctx, nil)
if err != nil {
wg.Done()
return
}
memSize := common.RoundFloat(float64(nodeResp.Data.MemorySize)*KB*KB, 3) // MB to BYTES
resourceStats.MemTotal = memSize
cMemTotal <- memSize
wg.Done()
}()
//resources being occupied
go func() {
memSize := <-cMemTotal
totalCpu := <-cTotalCpu
memberJobResp, err := s.aCRpc.GetMemberJobs(ctx, nil)
if err != nil {
wg.Done()
return
}
var cpuCoreAvail int64
var memAvail float64
if len(memberJobResp.Data) != 0 {
cpuCoreAvail = totalCpu
memAvail = memSize
} else {
var cpuCoreUsed int64
var memUsed float64
for _, datum := range memberJobResp.Data {
cpuCoreUsed += datum.CpuCore
}
memUsed = float64(cpuCoreUsed * 2 * KB * KB * KB) // 2 GB per cpu core
if cpuCoreUsed > totalCpu {
cpuCoreAvail = 0
} else {
cpuCoreAvail = totalCpu - cpuCoreUsed
}
if memUsed > memSize {
memAvail = 0
} else {
memAvail = memSize - memUsed
}
}
resourceStats.CpuCoreAvail = cpuCoreAvail
resourceStats.MemAvail = memAvail
wg.Done()
}()
//usable hours
var balance float64
select {
case v := <-cBalance:
balance = v
case <-time.After(2 * time.Second):
return nil, errors.New("get balance rpc call failed")
}
var cards []*collector.Card
cardHours := common.RoundFloat(balance/DCUPRICEPERHOUR, 3)
cpuHours := common.RoundFloat(balance/CPUCOREPRICEPERHOUR, 3)
dcu.CardHours = cardHours
resourceStats.CpuCoreHours = cpuHours
wg.Wait()
cards = append(cards, dcu)
resourceStats := &collector.ResourceStats{
ParticipantId: s.participantId,
Name: s.platform,
Balance: balance,
CpuCoreTotal: totalCpu,
CpuCoreAvail: CpuCoreAvail,
//DiskTotal: totalDisk,
//DiskAvail: availDisk,
MemTotal: memSize,
MemAvail: MemAvail,
CpuCoreHours: cpuHours,
CardsAvail: cards,
}
resourceStats.CardsAvail = cards
return resourceStats, nil
}

View File

@ -16,6 +16,7 @@ package storeLink
import (
"context"
"fmt"
"github.com/pkg/errors"
"gitlink.org.cn/JointCloud/pcm-ac/hpcAC"
"gitlink.org.cn/JointCloud/pcm-ac/hpcacclient"
@ -127,21 +128,29 @@ func GetResourceTypes() []string {
return resourceTypes
}
func GetDatasetsNames(ctx context.Context, collectorMap *map[string]collector.AiCollector) ([]string, error) {
func GetDatasetsNames(ctx context.Context, collectorMap map[string]collector.AiCollector) ([]string, error) {
var wg sync.WaitGroup
var errCh = make(chan error, len(*collectorMap))
var errs []error
var errCh = make(chan interface{}, len(collectorMap))
var errs []interface{}
var names []string
var mu sync.Mutex
colMap := *collectorMap
for _, col := range colMap {
colMap := collectorMap
for s, col := range colMap {
wg.Add(1)
c := col
id := s
go func() {
var ns []string
specs, err := c.GetDatasetsSpecs(ctx)
if err != nil {
errCh <- err
e := struct {
err error
clusterId string
}{
err: err,
clusterId: id,
}
errCh <- e
wg.Done()
return
}
@ -167,34 +176,54 @@ func GetDatasetsNames(ctx context.Context, collectorMap *map[string]collector.Ai
wg.Wait()
close(errCh)
if len(errs) == len(colMap) {
return nil, errors.New("get DatasetsNames failed")
}
for e := range errCh {
errs = append(errs, e)
}
if len(errs) != 0 {
return nil, errors.New("get DatasetsNames failed")
var msg string
for _, err := range errs {
e := (err).(struct {
err error
clusterId string
})
msg += fmt.Sprintf("clusterId: %v , error: %v \n", e.clusterId, e.err.Error())
}
return nil, errors.New(msg)
}
names = common.RemoveDuplicates(names)
return names, nil
}
func GetAlgorithms(ctx context.Context, collectorMap *map[string]collector.AiCollector, resourceType string, taskType string, dataset string) ([]string, error) {
func GetAlgorithms(ctx context.Context, collectorMap map[string]collector.AiCollector, resourceType string, taskType string, dataset string) ([]string, error) {
var names []string
var wg sync.WaitGroup
var errCh = make(chan error, len(*collectorMap))
var errs []error
var errCh = make(chan interface{}, len(collectorMap))
var errs []interface{}
var mu sync.Mutex
colMap := *collectorMap
for _, col := range colMap {
colMap := collectorMap
for s, col := range colMap {
wg.Add(1)
c := col
id := s
go func() {
var ns []string
algorithms, err := c.GetAlgorithms(ctx)
if err != nil {
errCh <- err
e := struct {
err error
clusterId string
}{
err: err,
clusterId: id,
}
errCh <- e
wg.Done()
return
}
@ -240,10 +269,22 @@ func GetAlgorithms(ctx context.Context, collectorMap *map[string]collector.AiCol
errs = append(errs, e)
}
if len(errs) != 0 {
if len(errs) == len(colMap) {
return nil, errors.New("get Algorithms failed")
}
if len(errs) != 0 {
var msg string
for _, err := range errs {
e := (err).(struct {
err error
clusterId string
})
msg += fmt.Sprintf("clusterId: %v , error: %v \n", e.clusterId, e.err.Error())
}
return nil, errors.New(msg)
}
names = common.RemoveDuplicates(names)
return names, nil
}

View File

@ -116,24 +116,28 @@ func NewServiceContext(c config.Config) *ServiceContext {
})
// scheduler
octopusRpc := octopusclient.NewOctopus(zrpc.MustNewClient(c.OctopusRpcConf))
aCRpc := hpcacclient.NewHpcAC(zrpc.MustNewClient(c.ACRpcConf))
modelArtsRpc := modelartsservice.NewModelArtsService(zrpc.MustNewClient(c.ModelArtsRpcConf))
modelArtsImgRpc := imagesservice.NewImagesService(zrpc.MustNewClient(c.ModelArtsImgRpcConf))
//octopusRpc := octopusclient.NewOctopus(zrpc.MustNewClient(c.OctopusRpcConf))
//aCRpc := hpcacclient.NewHpcAC(zrpc.MustNewClient(c.ACRpcConf))
//modelArtsRpc := modelartsservice.NewModelArtsService(zrpc.MustNewClient(c.ModelArtsRpcConf))
//modelArtsImgRpc := imagesservice.NewImagesService(zrpc.MustNewClient(c.ModelArtsImgRpcConf))
storage := &database.AiStorage{DbEngin: dbEngin}
aiExecutor, resourceCollector := service.InitAiClusterMap(octopusRpc, modelArtsRpc, modelArtsImgRpc, aCRpc, storage)
scheduler := scheduler.NewSchdlr(resourceCollector, storage, aiExecutor)
aiService, err := service.NewAiService(&c, storage)
if err != nil {
logx.Error(err.Error())
return nil
}
scheduler := scheduler.NewSchdlr(aiService, storage)
return &ServiceContext{
Cron: cron.New(cron.WithSeconds()),
DbEngin: dbEngin,
Config: c,
RedisClient: redisClient,
ModelArtsRpc: modelArtsRpc,
ModelArtsImgRpc: modelArtsImgRpc,
ModelArtsRpc: modelartsservice.NewModelArtsService(zrpc.MustNewClient(c.ModelArtsRpcConf)),
ModelArtsImgRpc: imagesservice.NewImagesService(zrpc.MustNewClient(c.ModelArtsImgRpcConf)),
CephRpc: cephclient.NewCeph(zrpc.MustNewClient(c.CephRpcConf)),
ACRpc: aCRpc,
OctopusRpc: octopusRpc,
ACRpc: hpcacclient.NewHpcAC(zrpc.MustNewClient(c.ACRpcConf)),
OctopusRpc: octopusclient.NewOctopus(zrpc.MustNewClient(c.OctopusRpcConf)),
OpenstackRpc: openstackclient.NewOpenstack(zrpc.MustNewClient(c.OpenstackRpcConf)),
K8sRpc: kubernetesclient.NewKubernetes(zrpc.MustNewClient(c.K8sNativeConf)),
MonitorClient: make(map[int64]tracker.Prometheus),

View File

@ -131,40 +131,22 @@ type TaskYaml struct {
}
type CommitVmTaskReq struct {
Name string `json:"name"`
NsID string `json:"nsID"`
Replicas int64 `json:"replicas,optional"`
MatchLabels map[string]string `json:"matchLabels,optional"`
Servers []ServerCommit `json:"servers,optional"`
Platform string `json:"platform,optional"`
AdapterId string `json:"adapterId,optional"`
ClusterType string `json:"clusterType,optional"`
Name string `json:"name"`
NsID string `json:"nsID"`
Replicas int64 `json:"replicas,optional"`
MatchLabels map[string]string `json:"matchLabels,optional"`
AdapterId string `json:"adapterId,optional"`
ClusterType string `json:"clusterType,optional"`
CreateMulServer []CreateMulDomainServer `json:"createMulServer,optional"`
}
type ServerCommit struct {
AllCardRunTime string `json:"allCardRunTime"`
FlavorRef string `json:"flavorRef,optional"`
Name string `json:"name,optional"`
ImageRef string `json:"imageRef,optional"`
AccessIPv4 string `json:"accessIPv4,optional"`
AccessIPv6 string `json:"accessIPv6,optional"`
AdminPass string `json:"adminPass,optional"`
Availability_zone string `json:"availability_zone,optional"`
Key_name string `json:"key_name,optional"`
Hostname string `json:"hostname,optional"`
Host string `json:"host,optional"`
Networks []Networks `json:"networks,optional"`
}
type Networks struct {
Uuid string `json:"uuid,optional"`
Port string `json:"port,optional"`
Fixed_ip string `json:"fixed_ip,optional"`
Tag string `json:"tag,optional"`
}
type Block_device_mapping_v2Commit struct {
Uuid string `json:"uuid,optional"`
type CreateMulDomainServer struct {
Platform string `json:"platform,optional"`
Name string `json:"name,optional"`
Min_count int64 `json:"min_count,optional"`
ImageRef string `json:"imageRef,optional"`
FlavorRef string `json:"flavorRef,optional"`
Uuid string `json:"uuid,optional"`
}
type CommitVmTaskResp struct {
@ -5309,13 +5291,15 @@ type ScheduleResp struct {
type ScheduleResult struct {
ClusterId string `json:"clusterId"`
TaskId string `json:"taskId"`
Strategy string `json:"strategy"`
Replica int32 `json:"replica"`
Msg string `json:"msg"`
}
type AiOption struct {
TaskName string `json:"taskName"`
AiClusterId string `json:"aiClusterId,optional"`
AdapterId string `json:"adapterId"`
AiClusterIds []string `json:"aiClusterIds"`
ResourceType string `json:"resourceType"`
Tops float64 `json:"Tops,optional"`
TaskType string `json:"taskType"`
@ -5336,6 +5320,10 @@ type AiTaskTypesResp struct {
TaskTypes []string `json:"taskTypes"`
}
type AiDatasetsReq struct {
AdapterId string `path:"adapterId"`
}
type AiDatasetsResp struct {
Datasets []string `json:"datasets"`
}
@ -5345,6 +5333,7 @@ type AiStrategyResp struct {
}
type AiAlgorithmsReq struct {
AdapterId string `path:"adapterId"`
ResourceType string `path:"resourceType"`
TaskType string `path:"taskType"`
Dataset string `path:"dataset"`
@ -5451,7 +5440,10 @@ type VmInfo struct {
BlockUuid string `json:"block_uuid,omitempty"`
SourceType string `json:"source_type,omitempty"`
DeleteOnTermination bool `json:"delete_on_termination,omitempty"`
State string `json:"state,omitempty"`
Status string `json:"status,omitempty"`
MinCount string `json:"min_count,omitempty"`
Platform string `json:"platform,omitempty"`
Uuid string `json:"uuid,omitempty"`
}
type PushTaskInfoReq struct {
@ -5468,7 +5460,37 @@ type PushTaskInfoResp struct {
}
type PushResourceInfoReq struct {
AdapterId int64 `json:"adapterId"`
AdapterId int64 `json:"adapterId"`
ResourceStats []ResourceStats `json:"resourceStats"`
}
type PushResourceInfoResp struct {
Code int64 `json:"code"`
Msg string `json:"msg"`
}
type ResourceStats struct {
ClusterId int64 `json:"clusterId"`
Name string `json:"name"`
CpuCoreAvail int64 `json:"cpuCoreAvail"`
CpuCoreTotal int64 `json:"cpuCoreTotal"`
MemAvail float64 `json:"memAvail"`
MemTotal float64 `json:"memTotal"`
DiskAvail float64 `json:"diskAvail"`
DiskTotal float64 `json:"diskTotal"`
GpuAvail int64 `json:"gpuAvail"`
CardsAvail []*Card `json:"cardsAvail"`
CpuCoreHours float64 `json:"cpuCoreHours"`
Balance float64 `json:"balance"`
}
type Card struct {
Platform string `json:"platform"`
Type string `json:"type"`
Name string `json:"name"`
TOpsAtFp16 float64 `json:"TOpsAtFp16"`
CardHours float64 `json:"cardHours"`
CardNum int32 `json:"cardNum"`
}
type CreateAlertRuleReq struct {

2
go.mod
View File

@ -2,6 +2,8 @@ module gitlink.org.cn/JointCloud/pcm-coordinator
go 1.21
retract v0.1.20-0.20240319015239-6ae13da05255
require (
github.com/JCCE-nudt/zero-contrib/zrpc/registry/nacos v0.0.0-20230419021610-13bbc83fbc3c
github.com/Masterminds/squirrel v1.5.4

24
pkg/models/taskvmmodel.go Normal file
View File

@ -0,0 +1,24 @@
package models
import "github.com/zeromicro/go-zero/core/stores/sqlx"
var _ TaskVmModel = (*customTaskVmModel)(nil)
type (
// TaskVmModel is an interface to be customized, add more methods here,
// and implement the added methods in customTaskVmModel.
TaskVmModel interface {
taskVmModel
}
customTaskVmModel struct {
*defaultTaskVmModel
}
)
// NewTaskVmModel returns a model for the database table.
func NewTaskVmModel(conn sqlx.SqlConn) TaskVmModel {
return &customTaskVmModel{
defaultTaskVmModel: newTaskVmModel(conn),
}
}

View File

@ -0,0 +1,107 @@
// Code generated by goctl. DO NOT EDIT.
package models
import (
"context"
"database/sql"
"fmt"
"strings"
"github.com/zeromicro/go-zero/core/stores/builder"
"github.com/zeromicro/go-zero/core/stores/sqlc"
"github.com/zeromicro/go-zero/core/stores/sqlx"
"github.com/zeromicro/go-zero/core/stringx"
)
var (
taskVmFieldNames = builder.RawFieldNames(&TaskVm{})
taskVmRows = strings.Join(taskVmFieldNames, ",")
taskVmRowsExpectAutoSet = strings.Join(stringx.Remove(taskVmFieldNames, "`id`", "`create_at`", "`create_time`", "`created_at`", "`update_at`", "`update_time`", "`updated_at`"), ",")
taskVmRowsWithPlaceHolder = strings.Join(stringx.Remove(taskVmFieldNames, "`id`", "`create_at`", "`create_time`", "`created_at`", "`update_at`", "`update_time`", "`updated_at`"), "=?,") + "=?"
)
type (
taskVmModel interface {
Insert(ctx context.Context, data *TaskVm) (sql.Result, error)
FindOne(ctx context.Context, id int64) (*TaskVm, error)
Update(ctx context.Context, data *TaskVm) error
Delete(ctx context.Context, id int64) error
}
defaultTaskVmModel struct {
conn sqlx.SqlConn
table string
}
TaskVm struct {
Id int64 `db:"id"` // id
ParticipantId int64 `db:"participant_id"` // p端id
TaskId int64 `db:"task_id"` // 任务id
Name string `db:"name"` // 虚拟机名称
AdapterId int64 `db:"adapter_id"` // 执行任务的适配器id
ClusterId int64 `db:"cluster_id"` // 执行任务的集群id
FlavorRef string `db:"flavor_ref"` // 规格索引
ImageRef string `db:"image_ref"` // 镜像索引
Status string `db:"status"` // 状态
Platform string `db:"platform"` // 平台
Description string `db:"description"` // 描述
AvailabilityZone string `db:"availability_zone"`
MinCount int64 `db:"min_count"` // 数量
Uuid string `db:"uuid"` // 网络id
StartTime string `db:"start_time"` // 开始时间
RunningTime string `db:"running_time"` // 运行时间
Result string `db:"result"` // 运行结果
DeletedAt string `db:"deleted_at"` // 删除时间
}
)
func newTaskVmModel(conn sqlx.SqlConn) *defaultTaskVmModel {
return &defaultTaskVmModel{
conn: conn,
table: "`task_vm`",
}
}
func (m *defaultTaskVmModel) withSession(session sqlx.Session) *defaultTaskVmModel {
return &defaultTaskVmModel{
conn: sqlx.NewSqlConnFromSession(session),
table: "`task_vm`",
}
}
func (m *defaultTaskVmModel) Delete(ctx context.Context, id int64) error {
query := fmt.Sprintf("delete from %s where `id` = ?", m.table)
_, err := m.conn.ExecCtx(ctx, query, id)
return err
}
func (m *defaultTaskVmModel) FindOne(ctx context.Context, id int64) (*TaskVm, error) {
query := fmt.Sprintf("select %s from %s where `id` = ? limit 1", taskVmRows, m.table)
var resp TaskVm
err := m.conn.QueryRowCtx(ctx, &resp, query, id)
switch err {
case nil:
return &resp, nil
case sqlc.ErrNotFound:
return nil, ErrNotFound
default:
return nil, err
}
}
func (m *defaultTaskVmModel) Insert(ctx context.Context, data *TaskVm) (sql.Result, error) {
query := fmt.Sprintf("insert into %s (%s) values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)", m.table, taskVmRowsExpectAutoSet)
ret, err := m.conn.ExecCtx(ctx, query, data.ParticipantId, data.TaskId, data.Name, data.AdapterId, data.ClusterId, data.FlavorRef, data.ImageRef, data.Status, data.Platform, data.Description, data.AvailabilityZone, data.MinCount, data.Uuid, data.StartTime, data.RunningTime, data.Result, data.DeletedAt)
return ret, err
}
func (m *defaultTaskVmModel) Update(ctx context.Context, data *TaskVm) error {
query := fmt.Sprintf("update %s set %s where `id` = ?", m.table, taskVmRowsWithPlaceHolder)
_, err := m.conn.ExecCtx(ctx, query, data.ParticipantId, data.TaskId, data.Name, data.AdapterId, data.ClusterId, data.FlavorRef, data.ImageRef, data.Status, data.Platform, data.Description, data.AvailabilityZone, data.MinCount, data.Uuid, data.StartTime, data.RunningTime, data.Result, data.DeletedAt, data.Id)
return err
}
func (m *defaultTaskVmModel) tableName() string {
return m.table
}