Merge remote-tracking branch 'upstream/master' into upmaster_wq
# Conflicts: # api/desc/pcm.api # api/internal/handler/routes.go Former-commit-id: e8916b1c9f9f0f56463a8395bae80bb159c0d065
This commit is contained in:
commit
a57e0b7ba8
|
@ -111,18 +111,17 @@ type HpcInfo struct {
|
|||
}
|
||||
|
||||
type CloudInfo struct {
|
||||
Participant int64 `json:"participant,omitempty"`
|
||||
Id int64 `json:"id,omitempty"`
|
||||
TaskId int64 `json:"taskId,omitempty"`
|
||||
ApiVersion string `json:"apiVersion,omitempty"`
|
||||
Kind string `json:"kind,omitempty"`
|
||||
Namespace string `json:"namespace,omitempty"`
|
||||
Name string `json:"name,omitempty"`
|
||||
Status string `json:"status,omitempty"`
|
||||
StartTime string `json:"startTime,omitempty"`
|
||||
RunningTime int64 `json:"runningTime,omitempty"`
|
||||
Result string `json:"result,omitempty"`
|
||||
YamlString string `json:"yamlString,omitempty"`
|
||||
Id uint `json:"id,omitempty,optional"`
|
||||
TaskId int64 `json:"taskId,omitempty,optional"`
|
||||
AdapterId uint `json:"adapterId,omitempty,optional"`
|
||||
ClusterId uint `json:"clusterId,omitempty,optional"`
|
||||
ClusterName string `json:"clusterName,omitempty,optional"`
|
||||
Kind string `json:"kind,omitempty,optional"`
|
||||
Status string `json:"status,omitempty,optional"`
|
||||
StartTime *time.Time `json:"startTime,omitempty,optional,string"`
|
||||
YamlString string `json:"yamlString,omitempty,optional"`
|
||||
Result string `json:"result,omitempty,optional"`
|
||||
Namespace string `json:"namespace,omitempty,optional"`
|
||||
}
|
||||
|
||||
type AiInfo struct {
|
||||
|
|
|
@ -1697,6 +1697,44 @@ PayloadCreateTrainJob{
|
|||
jobId string `json:"jobId,optional"`
|
||||
}
|
||||
********************/
|
||||
|
||||
/******************Ai Center overview*************************/
|
||||
CenterOverviewResp {
|
||||
CenterNum int32 `json:"totalCenters,optional"`
|
||||
TaskNum int32 `json:"totalTasks,optional"`
|
||||
CardNum int32 `json:"totalCards,optional"`
|
||||
PowerInTops float64 `json:"totalPower,optional"`
|
||||
}
|
||||
|
||||
CenterQueueingResp {
|
||||
Current []*CenterQueue `json:"current,optional"`
|
||||
History []*CenterQueue `json:"history,optional"`
|
||||
}
|
||||
|
||||
CenterQueue {
|
||||
Name string `json:"name,optional"`
|
||||
QueueingNum int32 `json:"num,optional"`
|
||||
}
|
||||
|
||||
CenterListResp {
|
||||
List []*AiCenter `json:"centerList,optional"`
|
||||
}
|
||||
|
||||
AiCenter {
|
||||
Name string `json:"name,optional"`
|
||||
StackName string `json:"stack,optional"`
|
||||
Version string `json:"version,optional"`
|
||||
}
|
||||
|
||||
CenterTaskListResp {
|
||||
List []*AiTask `json:"taskList,optional"`
|
||||
}
|
||||
|
||||
AiTask {
|
||||
Name string `json:"name,optional"`
|
||||
status string `json:"status,optional"`
|
||||
TimeElapsed int32 `json:"elapsed,optional"`
|
||||
}
|
||||
)
|
||||
|
||||
/******************create TrainIngJob end*************************/
|
||||
|
|
|
@ -59,9 +59,9 @@ type (
|
|||
Type int64 `json:"type"` // 租户所属(0数算,1超算,2智算)
|
||||
DeletedFlag int64 `json:"deletedFlag"` // 是否删除
|
||||
CreatedBy int64 `json:"createdBy"` // 创建人
|
||||
CreatedTime string `json:"createdTime"` // 创建时间
|
||||
CreateTime string `json:"createdTime"` // 创建时间
|
||||
UpdatedBy int64 `json:"updatedBy"` // 更新人
|
||||
UpdatedTime string `json:"updated_time"` // 更新时间
|
||||
UpdateTime string `json:"updated_time"` // 更新时间
|
||||
}
|
||||
|
||||
UpdateTenantReq {
|
||||
|
@ -115,6 +115,6 @@ type Cloud {
|
|||
StartTime string `json:"startTime"` // 开始时间
|
||||
RunningTime int64 `json:"runningTime"` // 运行时长
|
||||
CreatedBy int64 `json:"createdBy"` // 创建人
|
||||
CreatedTime string `json:"createdTime"` // 创建时间
|
||||
CreateTime string `json:"createdTime"` // 创建时间
|
||||
Result string `json:"result"`
|
||||
}
|
|
@ -158,23 +158,13 @@ type (
|
|||
|
||||
type (
|
||||
GeneralTaskReq {
|
||||
Name string `json:"name"`
|
||||
ComputeType string `json:"computeType"`
|
||||
TemplateId string `json:"templateId"`
|
||||
AdapterId string `json:"adapterId"`
|
||||
ClusterIds []string `json:"clusterIds"`
|
||||
Strategy Strategy `json:"strategy"`
|
||||
Name string `json:"name"`
|
||||
AdapterIds []string `json:"adapterIds"`
|
||||
ClusterIds []string `json:"clusterIds"`
|
||||
Strategy string `json:"strategy"`
|
||||
StaticWeightMap map[string]int32 `json:"staticWeightMap,optional"`
|
||||
ReqBody []string `json:"reqBody"`
|
||||
}
|
||||
|
||||
Strategy {
|
||||
Name string `json:"name"`
|
||||
StaticWeightList []StaticWeightList `json:"staticWeightList"`
|
||||
}
|
||||
|
||||
StaticWeightList {
|
||||
ClusterName string `json:"clusterName"`
|
||||
Weight int `json:"weight"`
|
||||
Replicas int64 `json:"replicas,string"`
|
||||
}
|
||||
)
|
||||
|
||||
|
@ -210,42 +200,116 @@ type (
|
|||
}
|
||||
)
|
||||
|
||||
|
||||
type (
|
||||
commitVmTaskReq {
|
||||
Name string `json:"name"`
|
||||
NsID string `json:"nsID"`
|
||||
Replicas int64 `json:"replicas,optional"`
|
||||
MatchLabels map[string]string `json:"matchLabels,optional"`
|
||||
AdapterId string `json:"adapterId,optional"`
|
||||
ClusterType string `json:"clusterType,optional"`
|
||||
//Virtual Machine Section
|
||||
// Name string `json:"name"`
|
||||
// NsID string `json:"nsID"`
|
||||
// Replicas int64 `json:"replicas,optional"`
|
||||
// MatchLabels map[string]string `json:"matchLabels,optional"`
|
||||
// AdapterId string `json:"adapterId,optional"`
|
||||
// ClusterType string `json:"clusterType,optional"`
|
||||
// //Virtual Machine Section
|
||||
CreateMulServer []CreateMulDomainServer `json:"createMulServer,optional"`
|
||||
VmOption *VmOption `json:"vmOption,optional"`
|
||||
}
|
||||
VmOption {
|
||||
AdapterId string `json:"adapterId"`
|
||||
VmClusterIds []string `json:"vmClusterIds"`
|
||||
Replicas int64 `json:"replicas,optional"`
|
||||
Name string `json:"name"`
|
||||
//ResourceType string `json:"resourceType"`
|
||||
//TaskType string `json:"taskType"`
|
||||
Strategy string `json:"strategy"`
|
||||
ClusterToStaticWeight map[string]int32 `json:"clusterToStaticWeight"`
|
||||
MatchLabels map[string]string `json:"matchLabels,optional"`
|
||||
StaticWeightMap map[string]int32 `json:"staticWeightMap,optional"`
|
||||
CreateMulServer []CreateMulDomainServer `json:"createMulServer,optional"`
|
||||
// Id int64 `json:"id"`
|
||||
// ParticipantId int64 `json:"participantId"`
|
||||
// TaskId int64 `json:"taskId"`
|
||||
// AdapterId int64 `json:"adapterId"`
|
||||
// ClusterId int64 `json:"clusterId"`
|
||||
// FlavorRef string `json:"flavorRef"`
|
||||
// ImageRef string `json:"imageRef"`
|
||||
// Status string `json:"status"`
|
||||
// Platform string `json:"platform"`
|
||||
// Description string `json:"description"`
|
||||
// AvailabilityZone string `json:"availabilityZone"`
|
||||
// MinCount int64 `json:"minCount"`
|
||||
// Uuid string `json:"uuid"`
|
||||
// StartTime string `json:"startTime"`
|
||||
// RunningTime string `json:"runningTime"`
|
||||
// Result string `json:"result"`
|
||||
// DeletedAt string `json:"deletedAt"`
|
||||
}
|
||||
|
||||
CreateMulDomainServer {
|
||||
Platform string `json:"platform,optional"`
|
||||
Name string `json:"name,optional"`
|
||||
Min_count int64 `json:"min_count,optional"`
|
||||
ImageRef string `json:"imageRef,optional"`
|
||||
FlavorRef string `json:"flavorRef,optional"`
|
||||
Uuid string `json:"uuid,optional"`
|
||||
Platform string `json:"platform,optional"`
|
||||
name string `json:"name,optional"`
|
||||
min_count int64 `json:"min_count,optional"`
|
||||
imageRef string `json:"imageRef,optional"`
|
||||
flavorRef string `json:"flavorRef,optional"`
|
||||
uuid string `json:"uuid,optional"`
|
||||
ClusterId string `json:"clusterId,optional"`
|
||||
}
|
||||
commitVmTaskResp {
|
||||
// VmTask []VmTask `json:"vmTask" copier:"VmTask"`
|
||||
TaskId int64 `json:"taskId"`
|
||||
Code int32 `json:"code"`
|
||||
Msg string `json:"msg"`
|
||||
}
|
||||
VmTask {
|
||||
Id string `json:"id" copier:"Id"`
|
||||
Links []VmLinks `json:"links" copier:"Links"`
|
||||
OSDCFDiskConfig string `json:"OS_DCF_diskConfig" copier:"OSDCFDiskConfig"`
|
||||
SecurityGroups []VmSecurity_groups_server `json:"security_groups" copier:"SecurityGroups"`
|
||||
AdminPass string `json:"adminPass" copier:"AdminPass"`
|
||||
ScheduleVmResult struct {
|
||||
ClusterId string `json:"clusterId"`
|
||||
TaskId string `json:"taskId"`
|
||||
Strategy string `json:"strategy"`
|
||||
Replica int32 `json:"replica"`
|
||||
Msg string `json:"msg"`
|
||||
}
|
||||
VmTask{
|
||||
Id string `json:"id" copier:"Id"`
|
||||
Links []VmLinks `json:"links" copier:"Links"`
|
||||
OSDCFDiskConfig string `json:"OS_DCF_diskConfig" copier:"OSDCFDiskConfig"`
|
||||
SecurityGroups []VmSecurity_groups_server `json:"security_groups" copier:"SecurityGroups"`
|
||||
AdminPass string `json:"adminPass" copier:"AdminPass"`
|
||||
}
|
||||
VmLinks {
|
||||
Href string `json:"href " copier:"Href"`
|
||||
Rel string `json:"rel" copier:"Rel"`
|
||||
Rel string `json:"rel" copier:"Rel"`
|
||||
}
|
||||
// commitVmTaskReq {
|
||||
// Name string `json:"name"`
|
||||
// NsID string `json:"nsID"`
|
||||
// Replicas int64 `json:"replicas,optional"`
|
||||
// MatchLabels map[string]string `json:"matchLabels,optional"`
|
||||
// AdapterId string `json:"adapterId,optional"`
|
||||
// ClusterType string `json:"clusterType,optional"`
|
||||
// //Virtual Machine Section
|
||||
// CreateMulServer []CreateMulDomainServer `json:"createMulServer,optional"`
|
||||
// }
|
||||
// CreateMulDomainServer {
|
||||
// Platform string `json:"platform,optional"`
|
||||
// Name string `json:"name,optional"`
|
||||
// Min_count int64 `json:"min_count,optional"`
|
||||
// ImageRef string `json:"imageRef,optional"`
|
||||
// FlavorRef string `json:"flavorRef,optional"`
|
||||
// Uuid string `json:"uuid,optional"`
|
||||
// }
|
||||
// commitVmTaskResp {
|
||||
// // VmTask []VmTask `json:"vmTask" copier:"VmTask"`
|
||||
// TaskId int64 `json:"taskId"`
|
||||
// Code int32 `json:"code"`
|
||||
// Msg string `json:"msg"`
|
||||
// }
|
||||
// VmTask {
|
||||
// Id string `json:"id" copier:"Id"`
|
||||
// Links []VmLinks `json:"links" copier:"Links"`
|
||||
// OSDCFDiskConfig string `json:"OS_DCF_diskConfig" copier:"OSDCFDiskConfig"`
|
||||
// SecurityGroups []VmSecurity_groups_server `json:"security_groups" copier:"SecurityGroups"`
|
||||
// AdminPass string `json:"adminPass" copier:"AdminPass"`
|
||||
// }
|
||||
// VmLinks {
|
||||
// Href string `json:"href " copier:"Href"`
|
||||
// Rel string `json:"rel" copier:"Rel"`
|
||||
// }
|
||||
|
||||
VmSecurity_groups_server {
|
||||
Name string `json:"name" copier:"Name"`
|
||||
|
@ -320,7 +384,7 @@ type (
|
|||
}
|
||||
|
||||
TaskModel {
|
||||
Id int64 `json:"id,omitempty" db:"id"` // id
|
||||
Id int64 `json:"id,omitempty,string" db:"id"` // id
|
||||
Name string `json:"name,omitempty" db:"name"` // 作业名称
|
||||
Description string `json:"description,omitempty" db:"description"` // 作业描述
|
||||
Status string `json:"status,omitempty" db:"status"` // 作业状态
|
||||
|
@ -336,6 +400,7 @@ type (
|
|||
NsID string `json:"nsId,omitempty" db:"ns_id"`
|
||||
TenantId string `json:"tenantId,omitempty" db:"tenant_id"`
|
||||
CreateTime string `json:"createTime,omitempty" db:"create_time" gorm:"autoCreateTime"`
|
||||
AdapterTypeDict int `json:"adapterTypeDict" db:"create_time" gorm:"adapter_type_dict"` //任务类型(对应字典表的值
|
||||
}
|
||||
)
|
||||
|
||||
|
@ -1004,9 +1069,9 @@ type (
|
|||
Environment string `json:"environment"`
|
||||
DeletedFlag int64 `json:"deleted_flag"` // 是否删除(0-否,1-是)
|
||||
CreatedBy int64 `json:"created_by"` // 创建人
|
||||
CreatedTime string `json:"created_time"` // 创建时间
|
||||
CreateTime string `json:"created_time"` // 创建时间
|
||||
UpdatedBy int64 `json:"updated_by"` // 更新人
|
||||
UpdatedTime string `json:"updated_time"` // 更新时间
|
||||
UpdateTime string `json:"updated_time"` // 更新时间
|
||||
}
|
||||
|
||||
CloudInfo {
|
||||
|
@ -1155,5 +1220,15 @@ type TaskStatusResp {
|
|||
Succeeded int `json:"Succeeded"`
|
||||
Failed int `json:"Failed"`
|
||||
Running int `json:"Running"`
|
||||
Pause int `json:"Pause"`
|
||||
Saved int `json:"Saved"`
|
||||
}
|
||||
|
||||
type TaskDetailsResp {
|
||||
Name string `json:"name"`
|
||||
description string `json:"description"`
|
||||
StartTime string `json:"startTime"`
|
||||
EndTime string `json:"endTime"`
|
||||
Strategy int64 `json:"strategy"`
|
||||
SynergyStatus int64 `json:"synergyStatus"`
|
||||
ClusterInfos []*ClusterInfo `json:"clusterInfos"`
|
||||
}
|
|
@ -81,3 +81,28 @@ type (
|
|||
version string `json:"version"`
|
||||
}
|
||||
)
|
||||
|
||||
type (
|
||||
scheduleSituationResp{
|
||||
nodes []NodeRegion `json:"nodes"`
|
||||
links []Link `json:"links"`
|
||||
categories []Category `json:"categories"`
|
||||
}
|
||||
|
||||
NodeRegion{
|
||||
id string `json:"id"`
|
||||
name string `json:"name"`
|
||||
category int `json:"category"`
|
||||
value int `json:"value"`
|
||||
}
|
||||
|
||||
Link{
|
||||
source string `json:"source"`
|
||||
target string `json:"target"`
|
||||
}
|
||||
|
||||
Category{
|
||||
name string `json:"name"`
|
||||
}
|
||||
|
||||
)
|
|
@ -142,6 +142,10 @@ service pcm {
|
|||
@handler homeOverviewHandler
|
||||
get /core/homeOverview (HomeOverviewReq) returns (HomeOverviewResp)
|
||||
|
||||
@doc "task details"
|
||||
@handler taskDetails
|
||||
get /core/task/details (FId) returns(TaskDetailsResp)
|
||||
|
||||
@doc "Get Public Image"
|
||||
@handler getPublicImageHandler
|
||||
get /core/getPublicImage (PublicImageReq) returns (PublicImageResp)
|
||||
|
@ -226,7 +230,7 @@ service pcm {
|
|||
|
||||
@doc "Create cloud computing common tasks"
|
||||
@handler commitGeneralTask
|
||||
post /cloud/task/create (GeneralTaskReq) returns()
|
||||
post /cloud/task/create (GeneralTaskReq) returns ()
|
||||
}
|
||||
|
||||
//智算二级接口
|
||||
|
@ -235,6 +239,22 @@ service pcm {
|
|||
group: ai
|
||||
)
|
||||
service pcm {
|
||||
@doc "智算中心概览"
|
||||
@handler getCenterOverviewHandler
|
||||
get /ai/getCenterOverview returns (CenterOverviewResp)
|
||||
|
||||
@doc "智算中心排队状况"
|
||||
@handler getCenterQueueingHandler
|
||||
get /ai/getCenterQueueing returns (CenterQueueingResp)
|
||||
|
||||
@doc "智算中心列表"
|
||||
@handler getCenterListHandler
|
||||
get /ai/getCenterList returns (CenterListResp)
|
||||
|
||||
@doc "智算中心任务列表"
|
||||
@handler getCenterTaskListHandler
|
||||
get /ai/getCenterTaskList returns (CenterTaskListResp)
|
||||
|
||||
@doc "查询数据集列表"
|
||||
@handler listDataSetHandler
|
||||
get /ai/listDataSet/:projectId (DataSetReq) returns (DataSetResp)
|
||||
|
@ -938,8 +958,14 @@ service pcm {
|
|||
@handler ScheduleGetAlgorithmsHandler
|
||||
get /schedule/ai/getAlgorithms/:adapterId/:resourceType/:taskType/:dataset (AiAlgorithmsReq) returns (AiAlgorithmsResp)
|
||||
|
||||
@handler ScheduleGetAiJobLogLogHandler
|
||||
get /schedule/ai/getJobLog/:adapterId/:clusterId/:taskId/:instanceNum (AiJobLogReq) returns (AiJobLogResp)
|
||||
|
||||
@handler ScheduleSubmitHandler
|
||||
post /schedule/submit (ScheduleReq) returns (ScheduleResp)
|
||||
|
||||
@handler ScheduleGetOverviewHandler
|
||||
post /schedule/getOverview returns (ScheduleOverviewResp)
|
||||
}
|
||||
|
||||
@server(
|
||||
|
@ -991,7 +1017,7 @@ service pcm {
|
|||
|
||||
@doc "alert rules"
|
||||
@handler alertRulesHandler
|
||||
get /monitoring/alert/rule (AlertRulesReq)returns (AlertRulesResp)
|
||||
get /monitoring/alert/rule (AlertRulesReq) returns (AlertRulesResp)
|
||||
|
||||
@doc "cluster resource load"
|
||||
@handler clustersLoadHandler
|
||||
|
@ -1007,5 +1033,14 @@ service pcm {
|
|||
|
||||
@doc "Synchronize Cluster alert Information"
|
||||
@handler syncClusterAlertHandler
|
||||
post /core/syncClusterAlert (SyncClusterAlertReq)
|
||||
post /monitoring/syncClusterAlert (SyncClusterAlertReq)
|
||||
|
||||
@handler taskNumHandler
|
||||
get /monitoring/task/num (taskNumReq) returns (taskNumResp)
|
||||
|
||||
@handler adapterInfoHandler
|
||||
get /monitoring/adapter/info (adapterInfoReq) returns (adapterInfoResp)
|
||||
|
||||
@handler scheduleSituationHandler
|
||||
get /monitoring/schedule/situation returns (scheduleSituationResp)
|
||||
}
|
|
@ -24,6 +24,9 @@ type (
|
|||
Msg string `json:"msg"`
|
||||
}
|
||||
|
||||
ScheduleOverviewResp {
|
||||
}
|
||||
|
||||
AiOption {
|
||||
TaskName string `json:"taskName"`
|
||||
AdapterId string `json:"adapterId"`
|
||||
|
@ -81,4 +84,20 @@ type (
|
|||
AiJobLogResp {
|
||||
Log string `json:"log"`
|
||||
}
|
||||
|
||||
AiTaskDb {
|
||||
Id string `json:"id,omitempty" db:"id"`
|
||||
TaskId string `json:"taskId,omitempty" db:"task_id"`
|
||||
AdapterId string `json:"adapterId,omitempty" db:"adapter_id"`
|
||||
ClusterId string `json:"clusterId,omitempty" db:"cluster_id"`
|
||||
Name string `json:"name,omitempty" db:"name"`
|
||||
Replica string `json:"replica,omitempty" db:"replica"`
|
||||
ClusterTaskId string `json:"clusterTaskId,omitempty" db:"c_task_id"`
|
||||
Strategy string `json:"strategy,omitempty" db:"strategy"`
|
||||
Status string `json:"status,omitempty" db:"status"`
|
||||
Msg string `json:"msg,omitempty" db:"msg"`
|
||||
CommitTime string `json:"commitTime,omitempty" db:"commit_time"`
|
||||
StartTime string `json:"startTime,omitempty" db:"start_time"`
|
||||
EndTime string `json:"endTime,omitempty" db:"end_time"`
|
||||
}
|
||||
)
|
|
@ -6,7 +6,7 @@ Timeout: 50000
|
|||
|
||||
DB:
|
||||
DataSource: root:uJpLd6u-J?HC1@(10.206.0.12:3306)/pcm?parseTime=true&loc=Local
|
||||
# DataSource: root:uJpLd6u-J?HC1@(47.92.88.143:3306)/pcm?parseTime=true&loc=Local
|
||||
# DataSource: root:uJpLd6u-J?HC1@(47.92.88.143:3306)/pcm?parseTime=true&loc=Local
|
||||
Redis:
|
||||
Host: 10.206.0.12:6379
|
||||
Pass: redisPW123
|
||||
|
|
|
@ -0,0 +1,21 @@
|
|||
package ai
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
|
||||
"github.com/zeromicro/go-zero/rest/httpx"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/logic/ai"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
|
||||
)
|
||||
|
||||
func GetCenterListHandler(svcCtx *svc.ServiceContext) http.HandlerFunc {
|
||||
return func(w http.ResponseWriter, r *http.Request) {
|
||||
l := ai.NewGetCenterListLogic(r.Context(), svcCtx)
|
||||
resp, err := l.GetCenterList()
|
||||
if err != nil {
|
||||
httpx.ErrorCtx(r.Context(), w, err)
|
||||
} else {
|
||||
httpx.OkJsonCtx(r.Context(), w, resp)
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,21 @@
|
|||
package ai
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
|
||||
"github.com/zeromicro/go-zero/rest/httpx"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/logic/ai"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
|
||||
)
|
||||
|
||||
func GetCenterOverviewHandler(svcCtx *svc.ServiceContext) http.HandlerFunc {
|
||||
return func(w http.ResponseWriter, r *http.Request) {
|
||||
l := ai.NewGetCenterOverviewLogic(r.Context(), svcCtx)
|
||||
resp, err := l.GetCenterOverview()
|
||||
if err != nil {
|
||||
httpx.ErrorCtx(r.Context(), w, err)
|
||||
} else {
|
||||
httpx.OkJsonCtx(r.Context(), w, resp)
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,21 @@
|
|||
package ai
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
|
||||
"github.com/zeromicro/go-zero/rest/httpx"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/logic/ai"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
|
||||
)
|
||||
|
||||
func GetCenterQueueingHandler(svcCtx *svc.ServiceContext) http.HandlerFunc {
|
||||
return func(w http.ResponseWriter, r *http.Request) {
|
||||
l := ai.NewGetCenterQueueingLogic(r.Context(), svcCtx)
|
||||
resp, err := l.GetCenterQueueing()
|
||||
if err != nil {
|
||||
httpx.ErrorCtx(r.Context(), w, err)
|
||||
} else {
|
||||
httpx.OkJsonCtx(r.Context(), w, resp)
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,21 @@
|
|||
package ai
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
|
||||
"github.com/zeromicro/go-zero/rest/httpx"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/logic/ai"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
|
||||
)
|
||||
|
||||
func GetCenterTaskListHandler(svcCtx *svc.ServiceContext) http.HandlerFunc {
|
||||
return func(w http.ResponseWriter, r *http.Request) {
|
||||
l := ai.NewGetCenterTaskListLogic(r.Context(), svcCtx)
|
||||
resp, err := l.GetCenterTaskList()
|
||||
if err != nil {
|
||||
httpx.ErrorCtx(r.Context(), w, err)
|
||||
} else {
|
||||
httpx.OkJsonCtx(r.Context(), w, resp)
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,24 @@
|
|||
package core
|
||||
|
||||
import (
|
||||
"github.com/zeromicro/go-zero/rest/httpx"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/logic/core"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/repository/result"
|
||||
"net/http"
|
||||
)
|
||||
|
||||
func TaskDetailsHandler(svcCtx *svc.ServiceContext) http.HandlerFunc {
|
||||
return func(w http.ResponseWriter, r *http.Request) {
|
||||
var req types.FId
|
||||
if err := httpx.Parse(r, &req); err != nil {
|
||||
result.ParamErrorResult(r, w, err)
|
||||
return
|
||||
}
|
||||
|
||||
l := core.NewTaskDetailsLogic(r.Context(), svcCtx)
|
||||
resp, err := l.TaskDetails(&req)
|
||||
result.HttpResult(r, w, resp, err)
|
||||
}
|
||||
}
|
|
@ -0,0 +1,17 @@
|
|||
package monitoring
|
||||
|
||||
import (
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/repository/result"
|
||||
"net/http"
|
||||
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/logic/monitoring"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
|
||||
)
|
||||
|
||||
func ScheduleSituationHandler(svcCtx *svc.ServiceContext) http.HandlerFunc {
|
||||
return func(w http.ResponseWriter, r *http.Request) {
|
||||
l := monitoring.NewScheduleSituationLogic(r.Context(), svcCtx)
|
||||
resp, err := l.ScheduleSituation()
|
||||
result.HttpResult(r, w, resp, err)
|
||||
}
|
||||
}
|
|
@ -170,6 +170,11 @@ func RegisterHandlers(server *rest.Server, serverCtx *svc.ServiceContext) {
|
|||
Path: "/core/homeOverview",
|
||||
Handler: core.HomeOverviewHandler(serverCtx),
|
||||
},
|
||||
{
|
||||
Method: http.MethodGet,
|
||||
Path: "/core/task/details",
|
||||
Handler: core.TaskDetailsHandler(serverCtx),
|
||||
},
|
||||
{
|
||||
Method: http.MethodGet,
|
||||
Path: "/core/getPublicImage",
|
||||
|
@ -278,6 +283,26 @@ func RegisterHandlers(server *rest.Server, serverCtx *svc.ServiceContext) {
|
|||
|
||||
server.AddRoutes(
|
||||
[]rest.Route{
|
||||
{
|
||||
Method: http.MethodGet,
|
||||
Path: "/ai/getCenterOverview",
|
||||
Handler: ai.GetCenterOverviewHandler(serverCtx),
|
||||
},
|
||||
{
|
||||
Method: http.MethodGet,
|
||||
Path: "/ai/getCenterQueueing",
|
||||
Handler: ai.GetCenterQueueingHandler(serverCtx),
|
||||
},
|
||||
{
|
||||
Method: http.MethodGet,
|
||||
Path: "/ai/getCenterList",
|
||||
Handler: ai.GetCenterListHandler(serverCtx),
|
||||
},
|
||||
{
|
||||
Method: http.MethodGet,
|
||||
Path: "/ai/getCenterTaskList",
|
||||
Handler: ai.GetCenterTaskListHandler(serverCtx),
|
||||
},
|
||||
{
|
||||
Method: http.MethodGet,
|
||||
Path: "/ai/listDataSet/:projectId",
|
||||
|
@ -1170,6 +1195,11 @@ func RegisterHandlers(server *rest.Server, serverCtx *svc.ServiceContext) {
|
|||
Path: "/schedule/submit",
|
||||
Handler: schedule.ScheduleSubmitHandler(serverCtx),
|
||||
},
|
||||
{
|
||||
Method: http.MethodPost,
|
||||
Path: "/schedule/getOverview",
|
||||
Handler: schedule.ScheduleGetOverviewHandler(serverCtx),
|
||||
},
|
||||
},
|
||||
rest.WithPrefix("/pcm/v1"),
|
||||
)
|
||||
|
@ -1267,6 +1297,21 @@ func RegisterHandlers(server *rest.Server, serverCtx *svc.ServiceContext) {
|
|||
Path: "/core/syncClusterAlert",
|
||||
Handler: monitoring.SyncClusterAlertHandler(serverCtx),
|
||||
},
|
||||
{
|
||||
Method: http.MethodGet,
|
||||
Path: "/monitoring/task/num",
|
||||
Handler: monitoring.TaskNumHandler(serverCtx),
|
||||
},
|
||||
{
|
||||
Method: http.MethodGet,
|
||||
Path: "/monitoring/adapter/info",
|
||||
Handler: monitoring.AdapterInfoHandler(serverCtx),
|
||||
},
|
||||
{
|
||||
Method: http.MethodGet,
|
||||
Path: "/monitoring/schedule/situation",
|
||||
Handler: monitoring.ScheduleSituationHandler(serverCtx),
|
||||
},
|
||||
},
|
||||
rest.WithPrefix("/pcm/v1"),
|
||||
)
|
||||
|
|
|
@ -0,0 +1,21 @@
|
|||
package schedule
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
|
||||
"github.com/zeromicro/go-zero/rest/httpx"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/logic/schedule"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
|
||||
)
|
||||
|
||||
func ScheduleGetOverviewHandler(svcCtx *svc.ServiceContext) http.HandlerFunc {
|
||||
return func(w http.ResponseWriter, r *http.Request) {
|
||||
l := schedule.NewScheduleGetOverviewLogic(r.Context(), svcCtx)
|
||||
resp, err := l.ScheduleGetOverview()
|
||||
if err != nil {
|
||||
httpx.ErrorCtx(r.Context(), w, err)
|
||||
} else {
|
||||
httpx.OkJsonCtx(r.Context(), w, resp)
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,43 @@
|
|||
package ai
|
||||
|
||||
import (
|
||||
"context"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types"
|
||||
|
||||
"github.com/zeromicro/go-zero/core/logx"
|
||||
)
|
||||
|
||||
type GetCenterListLogic struct {
|
||||
logx.Logger
|
||||
ctx context.Context
|
||||
svcCtx *svc.ServiceContext
|
||||
}
|
||||
|
||||
func NewGetCenterListLogic(ctx context.Context, svcCtx *svc.ServiceContext) *GetCenterListLogic {
|
||||
return &GetCenterListLogic{
|
||||
Logger: logx.WithContext(ctx),
|
||||
ctx: ctx,
|
||||
svcCtx: svcCtx,
|
||||
}
|
||||
}
|
||||
|
||||
func (l *GetCenterListLogic) GetCenterList() (resp *types.CenterListResp, err error) {
|
||||
resp = &types.CenterListResp{}
|
||||
|
||||
adapterList, err := l.svcCtx.Scheduler.AiStorages.GetAdaptersByType("1")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
for _, adapter := range adapterList {
|
||||
a := &types.AiCenter{
|
||||
Name: adapter.Name,
|
||||
StackName: adapter.Nickname,
|
||||
Version: adapter.Version,
|
||||
}
|
||||
resp.List = append(resp.List, a)
|
||||
}
|
||||
|
||||
return resp, nil
|
||||
}
|
|
@ -0,0 +1,139 @@
|
|||
package ai
|
||||
|
||||
import (
|
||||
"context"
|
||||
"github.com/zeromicro/go-zero/core/logx"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/models"
|
||||
"strconv"
|
||||
"sync"
|
||||
)
|
||||
|
||||
type GetCenterOverviewLogic struct {
|
||||
logx.Logger
|
||||
ctx context.Context
|
||||
svcCtx *svc.ServiceContext
|
||||
}
|
||||
|
||||
func NewGetCenterOverviewLogic(ctx context.Context, svcCtx *svc.ServiceContext) *GetCenterOverviewLogic {
|
||||
return &GetCenterOverviewLogic{
|
||||
Logger: logx.WithContext(ctx),
|
||||
ctx: ctx,
|
||||
svcCtx: svcCtx,
|
||||
}
|
||||
}
|
||||
|
||||
func (l *GetCenterOverviewLogic) GetCenterOverview() (resp *types.CenterOverviewResp, err error) {
|
||||
resp = &types.CenterOverviewResp{}
|
||||
var mu sync.RWMutex
|
||||
ch := make(chan struct{})
|
||||
|
||||
var centerNum int32
|
||||
var taskNum int32
|
||||
var cardNum int32
|
||||
var totalTops float64
|
||||
|
||||
adapterList, err := l.svcCtx.Scheduler.AiStorages.GetAdaptersByType("1")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
centerNum = int32(len(adapterList))
|
||||
resp.CenterNum = centerNum
|
||||
|
||||
go l.updateClusterResource(&mu, ch, adapterList)
|
||||
|
||||
for _, adapter := range adapterList {
|
||||
taskList, err := l.svcCtx.Scheduler.AiStorages.GetAiTasksByAdapterId(adapter.Id)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
taskNum += int32(len(taskList))
|
||||
}
|
||||
resp.TaskNum = taskNum
|
||||
|
||||
for _, adapter := range adapterList {
|
||||
clusters, err := l.svcCtx.Scheduler.AiStorages.GetClustersByAdapterId(adapter.Id)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
for _, cluster := range clusters.List {
|
||||
mu.RLock()
|
||||
clusterResource, err := l.svcCtx.Scheduler.AiStorages.GetClusterResourcesById(cluster.Id)
|
||||
mu.RUnlock()
|
||||
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
cardNum += int32(clusterResource.CardTotal)
|
||||
totalTops += clusterResource.CardTopsTotal
|
||||
}
|
||||
}
|
||||
resp.CardNum = cardNum
|
||||
resp.PowerInTops = totalTops
|
||||
<-ch
|
||||
|
||||
return resp, nil
|
||||
}
|
||||
|
||||
func (l *GetCenterOverviewLogic) updateClusterResource(mu *sync.RWMutex, ch chan<- struct{}, list []*types.AdapterInfo) {
|
||||
var wg sync.WaitGroup
|
||||
for _, adapter := range list {
|
||||
clusters, err := l.svcCtx.Scheduler.AiStorages.GetClustersByAdapterId(adapter.Id)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
for _, cluster := range clusters.List {
|
||||
c := cluster
|
||||
mu.RLock()
|
||||
clusterResource, err := l.svcCtx.Scheduler.AiStorages.GetClusterResourcesById(c.Id)
|
||||
mu.RUnlock()
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
wg.Add(1)
|
||||
go func() {
|
||||
stat, err := l.svcCtx.Scheduler.AiService.AiCollectorAdapterMap[adapter.Id][c.Id].GetResourceStats(l.ctx)
|
||||
if err != nil {
|
||||
wg.Done()
|
||||
return
|
||||
}
|
||||
clusterType, err := strconv.ParseInt(adapter.Type, 10, 64)
|
||||
if err != nil {
|
||||
wg.Done()
|
||||
return
|
||||
}
|
||||
var cardTotal int64
|
||||
var topsTotal float64
|
||||
for _, card := range stat.CardsAvail {
|
||||
cardTotal += int64(card.CardNum)
|
||||
topsTotal += card.TOpsAtFp16 * float64(card.CardNum)
|
||||
}
|
||||
|
||||
mu.Lock()
|
||||
if (models.TClusterResource{} == *clusterResource) {
|
||||
err = l.svcCtx.Scheduler.AiStorages.SaveClusterResources(c.Id, c.Name, clusterType, float64(stat.CpuCoreAvail), float64(stat.CpuCoreTotal),
|
||||
stat.MemAvail, stat.MemTotal, stat.DiskAvail, stat.DiskTotal, float64(stat.GpuAvail), float64(stat.GpuTotal), cardTotal, topsTotal)
|
||||
if err != nil {
|
||||
mu.Unlock()
|
||||
wg.Done()
|
||||
return
|
||||
}
|
||||
} else {
|
||||
clusterResource.CardTotal = cardTotal
|
||||
clusterResource.CardTopsTotal = topsTotal
|
||||
err := l.svcCtx.Scheduler.AiStorages.UpdateClusterResources(clusterResource)
|
||||
if err != nil {
|
||||
mu.Unlock()
|
||||
wg.Done()
|
||||
return
|
||||
}
|
||||
}
|
||||
mu.Unlock()
|
||||
wg.Done()
|
||||
}()
|
||||
}
|
||||
}
|
||||
wg.Wait()
|
||||
ch <- struct{}{}
|
||||
}
|
|
@ -0,0 +1,70 @@
|
|||
package ai
|
||||
|
||||
import (
|
||||
"context"
|
||||
"sort"
|
||||
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types"
|
||||
|
||||
"github.com/zeromicro/go-zero/core/logx"
|
||||
)
|
||||
|
||||
type GetCenterQueueingLogic struct {
|
||||
logx.Logger
|
||||
ctx context.Context
|
||||
svcCtx *svc.ServiceContext
|
||||
}
|
||||
|
||||
func NewGetCenterQueueingLogic(ctx context.Context, svcCtx *svc.ServiceContext) *GetCenterQueueingLogic {
|
||||
return &GetCenterQueueingLogic{
|
||||
Logger: logx.WithContext(ctx),
|
||||
ctx: ctx,
|
||||
svcCtx: svcCtx,
|
||||
}
|
||||
}
|
||||
|
||||
func (l *GetCenterQueueingLogic) GetCenterQueueing() (resp *types.CenterQueueingResp, err error) {
|
||||
resp = &types.CenterQueueingResp{}
|
||||
|
||||
adapters, err := l.svcCtx.Scheduler.AiStorages.GetAdaptersByType("1")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
for _, adapter := range adapters {
|
||||
clusters, err := l.svcCtx.Scheduler.AiStorages.GetClustersByAdapterId(adapter.Id)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
for _, cluster := range clusters.List {
|
||||
queues, err := l.svcCtx.Scheduler.AiStorages.GetClusterTaskQueues(adapter.Id, cluster.Id)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
//todo sync current task queues
|
||||
current := &types.CenterQueue{
|
||||
Name: cluster.Name,
|
||||
QueueingNum: int32(queues[0].QueueNum),
|
||||
}
|
||||
history := &types.CenterQueue{
|
||||
Name: cluster.Name,
|
||||
QueueingNum: int32(queues[0].QueueNum),
|
||||
}
|
||||
resp.Current = append(resp.Current, current)
|
||||
resp.History = append(resp.History, history)
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
sortQueueingNum(resp.Current)
|
||||
sortQueueingNum(resp.History)
|
||||
|
||||
return resp, nil
|
||||
}
|
||||
|
||||
func sortQueueingNum(q []*types.CenterQueue) {
|
||||
sort.Slice(q, func(i, j int) bool {
|
||||
return q[i].QueueingNum > q[j].QueueingNum
|
||||
})
|
||||
}
|
|
@ -0,0 +1,116 @@
|
|||
package ai
|
||||
|
||||
import (
|
||||
"context"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/constants"
|
||||
"strconv"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types"
|
||||
|
||||
"github.com/zeromicro/go-zero/core/logx"
|
||||
)
|
||||
|
||||
type GetCenterTaskListLogic struct {
|
||||
logx.Logger
|
||||
ctx context.Context
|
||||
svcCtx *svc.ServiceContext
|
||||
}
|
||||
|
||||
func NewGetCenterTaskListLogic(ctx context.Context, svcCtx *svc.ServiceContext) *GetCenterTaskListLogic {
|
||||
return &GetCenterTaskListLogic{
|
||||
Logger: logx.WithContext(ctx),
|
||||
ctx: ctx,
|
||||
svcCtx: svcCtx,
|
||||
}
|
||||
}
|
||||
|
||||
func (l *GetCenterTaskListLogic) GetCenterTaskList() (resp *types.CenterTaskListResp, err error) {
|
||||
resp = &types.CenterTaskListResp{}
|
||||
var mu sync.RWMutex
|
||||
ch := make(chan struct{})
|
||||
|
||||
adapterList, err := l.svcCtx.Scheduler.AiStorages.GetAdaptersByType("1")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
go l.updateAiTaskStatus(&mu, ch, adapterList)
|
||||
|
||||
for _, adapter := range adapterList {
|
||||
mu.RLock()
|
||||
taskList, err := l.svcCtx.Scheduler.AiStorages.GetAiTasksByAdapterId(adapter.Id)
|
||||
mu.RUnlock()
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
for _, task := range taskList {
|
||||
var elapsed time.Duration
|
||||
switch task.Status {
|
||||
case constants.Completed:
|
||||
end, err := time.ParseInLocation(constants.Layout, task.EndTime, time.Local)
|
||||
if err != nil {
|
||||
elapsed = time.Duration(0)
|
||||
}
|
||||
start, err := time.ParseInLocation(constants.Layout, task.StartTime, time.Local)
|
||||
if err != nil {
|
||||
elapsed = time.Duration(0)
|
||||
}
|
||||
elapsed = end.Sub(start)
|
||||
case constants.Running:
|
||||
elapsed = time.Now().Sub(task.CommitTime)
|
||||
default:
|
||||
elapsed = 0
|
||||
}
|
||||
|
||||
t := &types.AiTask{
|
||||
Name: task.Name,
|
||||
Status: task.Status,
|
||||
TimeElapsed: int32(elapsed.Seconds()),
|
||||
}
|
||||
resp.List = append(resp.List, t)
|
||||
}
|
||||
}
|
||||
<-ch
|
||||
|
||||
return resp, nil
|
||||
}
|
||||
|
||||
func (l *GetCenterTaskListLogic) updateAiTaskStatus(mu *sync.RWMutex, ch chan<- struct{}, list []*types.AdapterInfo) {
|
||||
var wg sync.WaitGroup
|
||||
for _, adapter := range list {
|
||||
taskList, err := l.svcCtx.Scheduler.AiStorages.GetAiTasksByAdapterId(adapter.Id)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
for _, task := range taskList {
|
||||
t := task
|
||||
if t.Status == constants.Completed {
|
||||
continue
|
||||
}
|
||||
wg.Add(1)
|
||||
go func() {
|
||||
trainingTask, err := l.svcCtx.Scheduler.AiService.AiCollectorAdapterMap[adapter.Id][strconv.FormatInt(t.ClusterId, 10)].GetTrainingTask(l.ctx, t.JobId)
|
||||
if err != nil {
|
||||
wg.Done()
|
||||
return
|
||||
}
|
||||
t.Status = trainingTask.Status
|
||||
t.StartTime = trainingTask.Start
|
||||
t.EndTime = trainingTask.End
|
||||
mu.Lock()
|
||||
err = l.svcCtx.Scheduler.AiStorages.UpdateAiTask(t)
|
||||
mu.Unlock()
|
||||
if err != nil {
|
||||
wg.Done()
|
||||
return
|
||||
}
|
||||
wg.Done()
|
||||
}()
|
||||
}
|
||||
}
|
||||
wg.Wait()
|
||||
ch <- struct{}{}
|
||||
}
|
|
@ -4,15 +4,19 @@ import (
|
|||
"bytes"
|
||||
"context"
|
||||
"github.com/pkg/errors"
|
||||
clientCore "gitlink.org.cn/JointCloud/pcm-coordinator/api/client"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/schedulers"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/schedulers/option"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/constants"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/models"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/models/cloud"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/utils"
|
||||
"io"
|
||||
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
|
||||
"k8s.io/apimachinery/pkg/runtime"
|
||||
syaml "k8s.io/apimachinery/pkg/runtime/serializer/yaml"
|
||||
kyaml "k8s.io/apimachinery/pkg/util/yaml"
|
||||
"sigs.k8s.io/yaml"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
|
@ -37,62 +41,102 @@ func NewCommitGeneralTaskLogic(ctx context.Context, svcCtx *svc.ServiceContext)
|
|||
}
|
||||
|
||||
func (l *CommitGeneralTaskLogic) CommitGeneralTask(req *types.GeneralTaskReq) error {
|
||||
var yamlStr []string
|
||||
for _, s := range req.ReqBody {
|
||||
j2, err := yaml.YAMLToJSON([]byte(s))
|
||||
if err != nil {
|
||||
logx.Errorf("Failed to convert yaml to JSON, err: %v", err)
|
||||
return err
|
||||
tx := l.svcCtx.DbEngin.Begin()
|
||||
// 执行回滚或者提交操作
|
||||
defer func() {
|
||||
if p := recover(); p != nil {
|
||||
tx.Rollback()
|
||||
logx.Error(p)
|
||||
} else if tx.Error != nil {
|
||||
logx.Info("rollback, error", tx.Error)
|
||||
tx.Rollback()
|
||||
} else {
|
||||
tx = tx.Commit()
|
||||
logx.Info("commit success")
|
||||
}
|
||||
yamlStr = append(yamlStr, string(j2))
|
||||
}
|
||||
result := strings.Join(yamlStr, ",")
|
||||
//TODO The namespace is fixed to ns-admin for the time being. Later, the namespace is obtained based on the user
|
||||
taskModel := models.Task{
|
||||
Status: constants.Saved,
|
||||
Name: req.Name,
|
||||
CommitTime: time.Now(),
|
||||
NsID: "ns-admin",
|
||||
YamlString: "[" + result + "]",
|
||||
}
|
||||
// Save the task data to the database
|
||||
tx := l.svcCtx.DbEngin.Create(&taskModel)
|
||||
if tx.Error != nil {
|
||||
return tx.Error
|
||||
}
|
||||
|
||||
}()
|
||||
//TODO adapter
|
||||
adapterId, _ := strconv.ParseUint(req.AdapterIds[0], 10, 64)
|
||||
var clusters []*models.CloudModel
|
||||
err := l.svcCtx.DbEngin.Raw("SELECT * FROM `t_cluster` where adapter_id = ? and id in ?", req.AdapterId, req.ClusterIds).Scan(&clusters).Error
|
||||
err := tx.Raw("SELECT * FROM `t_cluster` where adapter_id in ? and id in ?", req.AdapterIds, req.ClusterIds).Scan(&clusters).Error
|
||||
if err != nil {
|
||||
logx.Errorf("CommitGeneralTask() => sql execution error: %v", err)
|
||||
return errors.Errorf("the cluster does not match the drive resources. Check the data")
|
||||
}
|
||||
taskCloud := cloud.TaskCloudModel{}
|
||||
//TODO 执行策略返回集群跟 Replica
|
||||
for _, c := range clusters {
|
||||
opt := &option.CloudOption{}
|
||||
utils.Convert(&req, &opt)
|
||||
sc, _ := schedulers.NewCloudScheduler(l.ctx, "", l.svcCtx.Scheduler, opt, tx, l.svcCtx.PromClient)
|
||||
|
||||
results, err := l.svcCtx.Scheduler.AssignAndSchedule(sc)
|
||||
if err != nil {
|
||||
logx.Errorf("AssignAndSchedule() => execution error: %v", err)
|
||||
return err
|
||||
}
|
||||
|
||||
rs := (results).([]*schedulers.CloudResult)
|
||||
|
||||
var synergyStatus int64
|
||||
if len(rs) > 1 {
|
||||
synergyStatus = 1
|
||||
}
|
||||
var strategy int64
|
||||
sqlStr := `select t_dict_item.item_value
|
||||
from t_dict
|
||||
left join t_dict_item on t_dict.id = t_dict_item.dict_id
|
||||
where item_text = ?
|
||||
and t_dict.dict_code = 'schedule_Strategy'`
|
||||
//查询调度策略
|
||||
err = tx.Raw(sqlStr, req.Strategy).Scan(&strategy).Error
|
||||
taskModel := models.Task{
|
||||
Id: utils.GenSnowflakeID(),
|
||||
Status: constants.Saved,
|
||||
Name: req.Name,
|
||||
CommitTime: time.Now(),
|
||||
YamlString: strings.Join(req.ReqBody, "\n---\n"),
|
||||
AdapterTypeDict: 0,
|
||||
SynergyStatus: synergyStatus,
|
||||
Strategy: strategy,
|
||||
}
|
||||
var taskClouds []cloud.TaskCloudModel
|
||||
for _, r := range rs {
|
||||
for _, s := range req.ReqBody {
|
||||
sStruct := UnMarshalK8sStruct(s)
|
||||
sStruct := UnMarshalK8sStruct(s, int64(r.Replica))
|
||||
unString, _ := sStruct.MarshalJSON()
|
||||
taskCloud.Id = utils.GenSnowflakeIDUint()
|
||||
taskCloud.TaskId = uint(taskModel.Id)
|
||||
taskCloud.AdapterId = c.AdapterId
|
||||
taskCloud.ClusterId = c.Id
|
||||
taskCloud.ClusterName = c.Name
|
||||
taskCloud.Status = "Saved"
|
||||
clusterId, _ := strconv.ParseUint(r.ClusterId, 10, 64)
|
||||
taskCloud.AdapterId = uint(adapterId)
|
||||
taskCloud.ClusterId = uint(clusterId)
|
||||
taskCloud.ClusterName = r.ClusterName
|
||||
taskCloud.Status = constants.Saved
|
||||
taskCloud.YamlString = string(unString)
|
||||
taskCloud.Kind = sStruct.GetKind()
|
||||
taskCloud.Namespace = sStruct.GetNamespace()
|
||||
tx = l.svcCtx.DbEngin.Create(&taskCloud)
|
||||
if tx.Error != nil {
|
||||
logx.Errorf("CommitGeneralTask() create taskCloud => sql execution error: %v", err)
|
||||
return tx.Error
|
||||
}
|
||||
taskClouds = append(taskClouds, taskCloud)
|
||||
}
|
||||
}
|
||||
|
||||
adapterName := ""
|
||||
tx.Table("t_adapter").Select("name").Where("id=?", adapterId).Find(&adapterName)
|
||||
noticeInfo := clientCore.NoticeInfo{
|
||||
AdapterId: int64(adapterId),
|
||||
AdapterName: adapterName,
|
||||
NoticeType: "create",
|
||||
TaskName: req.Name,
|
||||
Incident: "任务创建中",
|
||||
CreatedTime: time.Now(),
|
||||
}
|
||||
db := tx.Table("task").Create(&taskModel)
|
||||
db = tx.Table("task_cloud").Create(&taskClouds)
|
||||
db = tx.Table("t_notice").Create(¬iceInfo)
|
||||
if db.Error != nil {
|
||||
logx.Errorf("Task creation failure, err: %v", db.Error)
|
||||
return errors.New("task creation failure")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func UnMarshalK8sStruct(yamlString string) *unstructured.Unstructured {
|
||||
func UnMarshalK8sStruct(yamlString string, replica int64) *unstructured.Unstructured {
|
||||
unstructuredObj := &unstructured.Unstructured{}
|
||||
d := kyaml.NewYAMLOrJSONDecoder(bytes.NewBufferString(yamlString), 4096)
|
||||
var err error
|
||||
|
@ -113,6 +157,10 @@ func UnMarshalK8sStruct(yamlString string) *unstructured.Unstructured {
|
|||
if len(unstructuredObj.GetNamespace()) == 0 {
|
||||
unstructuredObj.SetNamespace("default")
|
||||
}
|
||||
//设置副本数
|
||||
if unstructuredObj.GetKind() == "Deployment" || unstructuredObj.GetKind() == "StatefulSet" {
|
||||
unstructured.SetNestedField(unstructuredObj.Object, replica, "spec", "replicas")
|
||||
}
|
||||
}
|
||||
return unstructuredObj
|
||||
}
|
||||
|
|
|
@ -1,65 +0,0 @@
|
|||
package core
|
||||
|
||||
import (
|
||||
"context"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/pkg/response"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/constants"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/models"
|
||||
tool "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/utils"
|
||||
"k8s.io/apimachinery/pkg/util/json"
|
||||
"time"
|
||||
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types"
|
||||
|
||||
"github.com/zeromicro/go-zero/core/logx"
|
||||
)
|
||||
|
||||
type CommitHpcTaskLogic struct {
|
||||
logx.Logger
|
||||
ctx context.Context
|
||||
svcCtx *svc.ServiceContext
|
||||
}
|
||||
|
||||
func NewCommitHpcTaskLogic(ctx context.Context, svcCtx *svc.ServiceContext) *CommitHpcTaskLogic {
|
||||
return &CommitHpcTaskLogic{
|
||||
Logger: logx.WithContext(ctx),
|
||||
ctx: ctx,
|
||||
svcCtx: svcCtx,
|
||||
}
|
||||
}
|
||||
|
||||
func (l *CommitHpcTaskLogic) CommitHpcTask(req *types.CommitHpcTaskReq) (resp *types.CommitHpcTaskResp, err error) {
|
||||
// 构建主任务结构体
|
||||
taskModel := models.Task{
|
||||
Status: constants.Saved,
|
||||
Description: req.Description,
|
||||
Name: req.Name,
|
||||
CommitTime: time.Now(),
|
||||
}
|
||||
// 保存任务数据到数据库
|
||||
tx := l.svcCtx.DbEngin.Create(&taskModel)
|
||||
if tx.Error != nil {
|
||||
return nil, tx.Error
|
||||
}
|
||||
hpc := models.Hpc{}
|
||||
tool.Convert(req, &hpc)
|
||||
mqInfo := response.TaskInfo{
|
||||
TaskId: taskModel.Id,
|
||||
TaskType: "hpc",
|
||||
MatchLabels: req.MatchLabels,
|
||||
//Metadata: hpc,
|
||||
}
|
||||
req.TaskId = taskModel.Id
|
||||
// 将任务数据转换成消息体
|
||||
reqMessage, err := json.Marshal(mqInfo)
|
||||
if err != nil {
|
||||
logx.Error(err)
|
||||
return nil, err
|
||||
}
|
||||
publish := l.svcCtx.RedisClient.Publish(context.Background(), mqInfo.TaskType, reqMessage)
|
||||
if publish.Err() != nil {
|
||||
return nil, publish.Err()
|
||||
}
|
||||
return
|
||||
}
|
|
@ -3,11 +3,13 @@ package core
|
|||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/schedulers"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/schedulers/option"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/constants"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/models"
|
||||
"math/rand"
|
||||
"strconv"
|
||||
"time"
|
||||
|
||||
"github.com/zeromicro/go-zero/core/logx"
|
||||
|
@ -29,11 +31,24 @@ func NewCommitVmTaskLogic(ctx context.Context, svcCtx *svc.ServiceContext) *Comm
|
|||
|
||||
func (l *CommitVmTaskLogic) CommitVmTask(req *types.CommitVmTaskReq) (resp *types.CommitVmTaskResp, err error) {
|
||||
// todo: add your logic here and delete this line
|
||||
resp = &types.CommitVmTaskResp{}
|
||||
//Building the main task structure
|
||||
opt := &option.VmOption{
|
||||
AdapterId: req.VmOption.AdapterId,
|
||||
Replicas: req.VmOption.Replicas,
|
||||
Strategy: req.VmOption.Strategy,
|
||||
ClusterToStaticWeight: req.VmOption.StaticWeightMap,
|
||||
Status: constants.Saved,
|
||||
MatchLabels: req.VmOption.MatchLabels,
|
||||
StaticWeightMap: req.VmOption.StaticWeightMap,
|
||||
Name: req.VmOption.Name,
|
||||
CommitTime: time.Now(),
|
||||
}
|
||||
taskModel := models.Task{
|
||||
Status: constants.Saved,
|
||||
Name: req.Name,
|
||||
CommitTime: time.Now(),
|
||||
Status: constants.Saved,
|
||||
Name: req.VmOption.Name,
|
||||
CommitTime: time.Now(),
|
||||
Description: "vm task",
|
||||
}
|
||||
// Save task data to database
|
||||
tx := l.svcCtx.DbEngin.Create(&taskModel)
|
||||
|
@ -41,38 +56,63 @@ func (l *CommitVmTaskLogic) CommitVmTask(req *types.CommitVmTaskReq) (resp *type
|
|||
return nil, tx.Error
|
||||
}
|
||||
|
||||
for _, CreateMulServer := range req.CreateMulServer {
|
||||
fmt.Println("", req.CreateMulServer)
|
||||
var clusterIds []int64
|
||||
l.svcCtx.DbEngin.Raw("SELECT id FROM `t_cluster` where adapter_id = ? and label = ?", req.AdapterId, req.ClusterType).Scan(&clusterIds)
|
||||
//var clusters []*models.VmModel
|
||||
//err2 := l.svcCtx.DbEngin.Raw("SELECT * FROM `t_cluster` where adapter_id in ? and id in ?", req.VmOption.AdapterId, req.VmOption.VmClusterIds).Scan(&clusters).Error
|
||||
//if err2 != nil {
|
||||
// logx.Errorf("CommitGeneralTask() => sql execution error: %v", err)
|
||||
// //return errors.Errorf("the cluster does not match the drive resources. Check the data"), nil
|
||||
//}
|
||||
|
||||
if len(clusterIds) == 0 || clusterIds == nil {
|
||||
return nil, nil
|
||||
}
|
||||
taskVm := models.TaskVm{}
|
||||
//TODO 执行策略返回集群跟 Replica
|
||||
/*opt := &option.VmOption{}
|
||||
utils.Convert(&req, &opt)*/
|
||||
// 2、Initialize scheduler
|
||||
vmSchdl, err := schedulers.NewVmScheduler(l.ctx, "", l.svcCtx.Scheduler, opt, l.svcCtx.DbEngin, l.svcCtx.PromClient)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
vmInfo := models.TaskVm{
|
||||
TaskId: taskModel.Id,
|
||||
ClusterId: clusterIds[rand.Intn(len(clusterIds))],
|
||||
Name: taskModel.Name,
|
||||
Status: "Saved",
|
||||
StartTime: time.Now().String(),
|
||||
MinCount: CreateMulServer.Min_count,
|
||||
ImageRef: CreateMulServer.ImageRef,
|
||||
FlavorRef: CreateMulServer.FlavorRef,
|
||||
Uuid: CreateMulServer.Uuid,
|
||||
Platform: CreateMulServer.Platform,
|
||||
}
|
||||
// 3、Return scheduling results
|
||||
results, err := l.svcCtx.Scheduler.AssignAndSchedule(vmSchdl)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
tx = l.svcCtx.DbEngin.Create(&vmInfo)
|
||||
if tx.Error != nil {
|
||||
return nil, tx.Error
|
||||
}
|
||||
resp = &types.CommitVmTaskResp{
|
||||
Code: 200,
|
||||
Msg: "success",
|
||||
TaskId: taskModel.Id,
|
||||
rs := (results).([]*schedulers.VmResult)
|
||||
for _, r := range rs {
|
||||
for _, CreateMulServer := range req.CreateMulServer {
|
||||
if r.Replica > 0 && r.ClusterId == CreateMulServer.ClusterId {
|
||||
fmt.Println("", req.CreateMulServer)
|
||||
var clusterIds []int64
|
||||
l.svcCtx.DbEngin.Raw("SELECT id FROM `t_cluster` where adapter_id = ? ", req.VmOption.AdapterId).Scan(&clusterIds)
|
||||
if len(clusterIds) == 0 || clusterIds == nil {
|
||||
return nil, nil
|
||||
}
|
||||
adapterId, _ := strconv.ParseUint(req.VmOption.AdapterId, 10, 64)
|
||||
taskVm.AdapterId = int64(adapterId)
|
||||
clusterId, _ := strconv.ParseUint(r.ClusterId, 10, 64)
|
||||
taskVm.ClusterId = int64(clusterId)
|
||||
taskVm.Name = req.VmOption.Name
|
||||
taskVm.TaskId = taskModel.Id
|
||||
clusterId, _ = strconv.ParseUint(r.ClusterId, 10, 64)
|
||||
taskVm.ClusterId = int64(clusterId)
|
||||
taskVm.Status = "Saved"
|
||||
taskVm.StartTime = time.Now().String()
|
||||
taskVm.MinCount = CreateMulServer.Min_count
|
||||
taskVm.ImageRef = CreateMulServer.ImageRef
|
||||
taskVm.FlavorRef = CreateMulServer.FlavorRef
|
||||
taskVm.Uuid = CreateMulServer.Uuid
|
||||
taskVm.Platform = CreateMulServer.Platform
|
||||
tx = l.svcCtx.DbEngin.Create(&taskVm)
|
||||
if tx.Error != nil {
|
||||
return nil, tx.Error
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
resp.Code = 200
|
||||
resp.Msg = "Success"
|
||||
|
||||
return resp, nil
|
||||
}
|
||||
|
|
|
@ -30,7 +30,7 @@ func (l *CountTaskStatusLogic) CountTaskStatus() (resp *types.TaskStatusResp, er
|
|||
COUNT(CASE WHEN status = 'Succeeded' THEN 1 END) AS Succeeded,
|
||||
COUNT(CASE WHEN status = 'Failed' THEN 1 END) AS Failed,
|
||||
COUNT(CASE WHEN status = 'Running' THEN 1 END) AS Running,
|
||||
COUNT(CASE WHEN status = 'Pause' THEN 1 END) AS Pause
|
||||
COUNT(CASE WHEN status = 'Saved' THEN 1 END) AS Saved
|
||||
FROM task;`
|
||||
err = l.svcCtx.DbEngin.Raw(sqlStr).Scan(&resp).Error
|
||||
if err != nil {
|
||||
|
|
|
@ -3,6 +3,8 @@ package core
|
|||
import (
|
||||
"context"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/repository/result"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/utils/timeutils"
|
||||
"time"
|
||||
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types"
|
||||
|
@ -28,7 +30,7 @@ func (l *PageListTaskLogic) PageListTask(req *types.PageTaskReq) (resp *types.Pa
|
|||
limit := req.PageSize
|
||||
offset := req.PageSize * (req.PageNum - 1)
|
||||
resp = &types.PageResult{}
|
||||
var list []types.TaskModel
|
||||
var list []*types.TaskModel
|
||||
db := l.svcCtx.DbEngin.Model(&types.TaskModel{}).Table("task")
|
||||
|
||||
db = db.Where("deleted_at is null")
|
||||
|
@ -48,8 +50,18 @@ func (l *PageListTaskLogic) PageListTask(req *types.PageTaskReq) (resp *types.Pa
|
|||
if err != nil {
|
||||
return nil, result.NewDefaultError(err.Error())
|
||||
}
|
||||
|
||||
resp.List = list
|
||||
for _, model := range list {
|
||||
if model.EndTime != "" && model.StartTime != "" {
|
||||
startTime := timeutils.TimeStringToGoTime(model.StartTime)
|
||||
endTime := timeutils.TimeStringToGoTime(model.EndTime)
|
||||
model.RunningTime = int64(endTime.Sub(startTime).Seconds())
|
||||
}
|
||||
if model.StartTime != "" {
|
||||
startTime := timeutils.TimeStringToGoTime(model.StartTime)
|
||||
model.RunningTime = int64(time.Now().Sub(startTime).Seconds())
|
||||
}
|
||||
}
|
||||
resp.List = &list
|
||||
resp.PageSize = req.PageSize
|
||||
resp.PageNum = req.PageNum
|
||||
resp.Total = total
|
||||
|
|
|
@ -5,6 +5,7 @@ import (
|
|||
"github.com/jinzhu/copier"
|
||||
clientCore "gitlink.org.cn/JointCloud/pcm-coordinator/api/client"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/models"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/models/cloud"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/utils"
|
||||
"gorm.io/gorm"
|
||||
|
||||
|
@ -54,7 +55,7 @@ func (l *PullTaskInfoLogic) PullTaskInfo(req *clientCore.PullTaskInfoReq) (*clie
|
|||
}
|
||||
}
|
||||
case 0:
|
||||
var cloudModelList []models.Cloud
|
||||
var cloudModelList []cloud.TaskCloudModel
|
||||
err := findModelList(req.AdapterId, l.svcCtx.DbEngin, &cloudModelList)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
|
|
|
@ -2,14 +2,15 @@ package core
|
|||
|
||||
import (
|
||||
"context"
|
||||
"github.com/pkg/errors"
|
||||
"github.com/zeromicro/go-zero/core/logx"
|
||||
clientCore "gitlink.org.cn/JointCloud/pcm-coordinator/api/client"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/constants"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/models"
|
||||
"gorm.io/gorm"
|
||||
"strings"
|
||||
|
||||
"github.com/zeromicro/go-zero/core/logx"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
|
||||
"time"
|
||||
)
|
||||
|
||||
type PushTaskInfoLogic struct {
|
||||
|
@ -33,9 +34,14 @@ func (l *PushTaskInfoLogic) PushTaskInfo(req *clientCore.PushTaskInfoReq) (*clie
|
|||
switch kind {
|
||||
case 0:
|
||||
for _, cloudInfo := range req.CloudInfoList {
|
||||
l.svcCtx.DbEngin.Exec("update cloud set status = ?,start_time = ?,result = ? where participant_id = ? and id = ?",
|
||||
cloudInfo.Status, cloudInfo.StartTime, cloudInfo.Result, req.AdapterId, cloudInfo.Id)
|
||||
syncTask(l.svcCtx.DbEngin, cloudInfo.TaskId)
|
||||
var taskId uint
|
||||
result := l.svcCtx.DbEngin.Table("task_cloud").Select("task_id").Where("task_id = ?", cloudInfo.TaskId).Find(&taskId)
|
||||
if errors.Is(result.Error, gorm.ErrRecordNotFound) {
|
||||
return nil, errors.New("Record does not exist")
|
||||
}
|
||||
l.svcCtx.DbEngin.Exec("update task_cloud set status = ?,start_time = ?,result = ? where task_id = ?",
|
||||
cloudInfo.Status, cloudInfo.StartTime, cloudInfo.Result, cloudInfo.TaskId)
|
||||
syncTask(l.svcCtx.DbEngin, int64(taskId))
|
||||
}
|
||||
case 2:
|
||||
for _, hpcInfo := range req.HpcInfoList {
|
||||
|
@ -63,7 +69,7 @@ func (l *PushTaskInfoLogic) PushTaskInfo(req *clientCore.PushTaskInfoReq) (*clie
|
|||
func syncTask(gorm *gorm.DB, taskId int64) {
|
||||
|
||||
var allStatus string
|
||||
tx := gorm.Raw("SELECT CONCAT_WS(',',GROUP_CONCAT(DISTINCT h.status) ,GROUP_CONCAT(DISTINCT a.status) ,GROUP_CONCAT(DISTINCT c.status))as status from task t left join hpc h on t.id = h.task_id left join cloud c on t.id = c.task_id left join ai a on t.id = a.task_id where t.id = ?", taskId).Scan(&allStatus)
|
||||
tx := gorm.Raw("SELECT CONCAT_WS(',',GROUP_CONCAT(DISTINCT h.status) ,GROUP_CONCAT(DISTINCT a.status) ,GROUP_CONCAT(DISTINCT c.status))as status from task t left join hpc h on t.id = h.task_id left join task_cloud c on t.id = c.task_id left join ai a on t.id = a.task_id where t.id = ?", taskId).Scan(&allStatus)
|
||||
if tx.Error != nil {
|
||||
logx.Error(tx.Error)
|
||||
}
|
||||
|
@ -71,7 +77,6 @@ func syncTask(gorm *gorm.DB, taskId int64) {
|
|||
statusArray := strings.Split(allStatus, ",")
|
||||
if len(removeRepeatedElement(statusArray)) == 1 {
|
||||
updateTask(gorm, taskId, statusArray[0])
|
||||
|
||||
}
|
||||
// 子任务包含失败状态 主任务则失败
|
||||
if strings.Contains(allStatus, constants.Failed) {
|
||||
|
@ -85,10 +90,14 @@ func syncTask(gorm *gorm.DB, taskId int64) {
|
|||
}
|
||||
|
||||
func updateTask(gorm *gorm.DB, taskId int64, status string) {
|
||||
now := time.Now()
|
||||
var task models.Task
|
||||
gorm.Where("id = ? ", taskId).Find(&task)
|
||||
if task.Status != status {
|
||||
task.Status = status
|
||||
if status == constants.Running {
|
||||
task.StartTime = &now
|
||||
}
|
||||
gorm.Updates(&task)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,54 @@
|
|||
package core
|
||||
|
||||
import (
|
||||
"context"
|
||||
"github.com/pkg/errors"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/models"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/utils"
|
||||
"gorm.io/gorm"
|
||||
|
||||
"github.com/zeromicro/go-zero/core/logx"
|
||||
)
|
||||
|
||||
type TaskDetailsLogic struct {
|
||||
logx.Logger
|
||||
ctx context.Context
|
||||
svcCtx *svc.ServiceContext
|
||||
}
|
||||
|
||||
func NewTaskDetailsLogic(ctx context.Context, svcCtx *svc.ServiceContext) *TaskDetailsLogic {
|
||||
return &TaskDetailsLogic{
|
||||
Logger: logx.WithContext(ctx),
|
||||
ctx: ctx,
|
||||
svcCtx: svcCtx,
|
||||
}
|
||||
}
|
||||
|
||||
func (l *TaskDetailsLogic) TaskDetails(req *types.FId) (resp *types.TaskDetailsResp, err error) {
|
||||
resp = &types.TaskDetailsResp{}
|
||||
task := &models.Task{}
|
||||
if errors.Is(l.svcCtx.DbEngin.Where("id", req.Id).First(&task).Error, gorm.ErrRecordNotFound) {
|
||||
return nil, errors.New("记录不存在")
|
||||
}
|
||||
clusterIds := make([]int64, 0)
|
||||
var cList []*types.ClusterInfo
|
||||
switch task.AdapterTypeDict {
|
||||
case 0:
|
||||
l.svcCtx.DbEngin.Table("task_cloud").Select("cluster_id").Where("task_id", task.Id).Scan(&clusterIds)
|
||||
case 1:
|
||||
l.svcCtx.DbEngin.Table("task_ai").Select("cluster_id").Where("task_id", task.Id).Scan(&clusterIds)
|
||||
case 2:
|
||||
l.svcCtx.DbEngin.Table("task_hpc").Select("cluster_id").Where("task_id", task.Id).Scan(&clusterIds)
|
||||
case 3:
|
||||
l.svcCtx.DbEngin.Table("task_vm").Select("cluster_id").Where("task_id", task.Id).Find(&clusterIds)
|
||||
}
|
||||
err = l.svcCtx.DbEngin.Table("t_cluster").Where("id in ?", clusterIds).Scan(&cList).Error
|
||||
if err != nil {
|
||||
return resp, err
|
||||
}
|
||||
utils.Convert(&task, &resp)
|
||||
resp.ClusterInfos = cList
|
||||
return
|
||||
}
|
|
@ -93,13 +93,12 @@ func (l *TaskListLogic) TaskList(req *types.TaskListReq) (resp *types.TaskListRe
|
|||
pStatus = "Normal"
|
||||
}
|
||||
}
|
||||
|
||||
resp.Tasks = append(resp.Tasks, types.Task{
|
||||
Id: task.Id,
|
||||
Name: task.Name,
|
||||
Status: task.Status,
|
||||
StartTime: task.StartTime,
|
||||
EndTime: task.EndTime,
|
||||
StartTime: task.StartTime.Format("2006-01-02 15:04:05"),
|
||||
EndTime: task.EndTime.Format("2006-01-02 15:04:05"),
|
||||
ParticipantId: pInfo.Id,
|
||||
ParticipantName: pInfo.Name,
|
||||
ParticipantStatus: pStatus,
|
||||
|
|
|
@ -32,11 +32,15 @@ func (l *CommitHpcTaskLogic) CommitHpcTask(req *types.CommitHpcTaskReq) (resp *t
|
|||
|
||||
// 构建主任务结构体
|
||||
taskModel := models.Task{
|
||||
Status: constants.Saved,
|
||||
Description: req.Description,
|
||||
Name: req.Name,
|
||||
CommitTime: time.Now(),
|
||||
Name: req.Name,
|
||||
Description: req.Description,
|
||||
Status: constants.Saved,
|
||||
Strategy: 0,
|
||||
SynergyStatus: 0,
|
||||
CommitTime: time.Now(),
|
||||
AdapterTypeDict: 2,
|
||||
}
|
||||
|
||||
// 保存任务数据到数据库
|
||||
tx := l.svcCtx.DbEngin.Create(&taskModel)
|
||||
if tx.Error != nil {
|
||||
|
@ -49,7 +53,9 @@ func (l *CommitHpcTaskLogic) CommitHpcTask(req *types.CommitHpcTaskReq) (resp *t
|
|||
env, _ := json.Marshal(req.Environment)
|
||||
|
||||
if len(clusterIds) == 0 || clusterIds == nil {
|
||||
return nil, nil
|
||||
resp.Code = 400
|
||||
resp.Msg = "no cluster found"
|
||||
return resp, nil
|
||||
}
|
||||
|
||||
hpcInfo := models.TaskHpc{
|
||||
|
|
|
@ -0,0 +1,82 @@
|
|||
package monitoring
|
||||
|
||||
import (
|
||||
"context"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types"
|
||||
"strings"
|
||||
|
||||
"github.com/zeromicro/go-zero/core/logx"
|
||||
)
|
||||
|
||||
type ScheduleSituationLogic struct {
|
||||
logx.Logger
|
||||
ctx context.Context
|
||||
svcCtx *svc.ServiceContext
|
||||
}
|
||||
|
||||
func NewScheduleSituationLogic(ctx context.Context, svcCtx *svc.ServiceContext) *ScheduleSituationLogic {
|
||||
return &ScheduleSituationLogic{
|
||||
Logger: logx.WithContext(ctx),
|
||||
ctx: ctx,
|
||||
svcCtx: svcCtx,
|
||||
}
|
||||
}
|
||||
|
||||
func (l *ScheduleSituationLogic) ScheduleSituation() (resp *types.ScheduleSituationResp, err error) {
|
||||
resp = &types.ScheduleSituationResp{}
|
||||
// node region
|
||||
tx := l.svcCtx.DbEngin.Raw("SELECT c.id, c.name, tdi.id AS category, count(DISTINCT ta.id)+count(DISTINCT tc.id)+COUNT(DISTINCT th.id)+COUNT(tv.id) as value FROM t_cluster c LEFT JOIN t_dict_item tdi ON c.region_dict = tdi.id left JOIN task_ai ta ON ta.cluster_id = c.id left JOIN task_cloud tc ON tc.cluster_id = c.id left JOIN task_hpc th ON th.cluster_id = c.id left JOIN task_vm tv ON tv.cluster_id = c.id WHERE tc.deleted_at IS NULL GROUP BY c.id").Scan(&resp.Nodes)
|
||||
if tx.Error != nil {
|
||||
return nil, tx.Error
|
||||
}
|
||||
|
||||
// hpc
|
||||
var hpcLinks []string
|
||||
tx = l.svcCtx.DbEngin.Raw("SELECT GROUP_CONCAT(cluster_id SEPARATOR ',') as cluster_ids FROM task_hpc WHERE deleted_at IS NULL GROUP BY task_id HAVING COUNT(*) > 1;").Scan(&hpcLinks)
|
||||
if tx.Error != nil {
|
||||
return nil, tx.Error
|
||||
}
|
||||
LinksHandler(hpcLinks, resp)
|
||||
// cloud
|
||||
var cloudLinks []string
|
||||
tx = l.svcCtx.DbEngin.Raw("SELECT GROUP_CONCAT(cluster_id SEPARATOR ',') as cluster_ids FROM task_cloud WHERE deleted_at IS NULL GROUP BY task_id HAVING COUNT(*) > 1;").Scan(&cloudLinks)
|
||||
if tx.Error != nil {
|
||||
return nil, tx.Error
|
||||
}
|
||||
LinksHandler(cloudLinks, resp)
|
||||
// ai
|
||||
var aiLinks []string
|
||||
tx = l.svcCtx.DbEngin.Raw("SELECT GROUP_CONCAT(cluster_id SEPARATOR ',') as cluster_ids FROM task_ai WHERE deleted_at IS NULL GROUP BY task_id HAVING COUNT(*) > 1;").Scan(&aiLinks)
|
||||
if tx.Error != nil {
|
||||
return nil, tx.Error
|
||||
}
|
||||
LinksHandler(aiLinks, resp)
|
||||
// vm
|
||||
var vmLinks []string
|
||||
tx = l.svcCtx.DbEngin.Raw("SELECT GROUP_CONCAT(cluster_id SEPARATOR ',') as cluster_ids FROM task_vm WHERE deleted_at IS NULL GROUP BY task_id HAVING COUNT(*) > 1;").Scan(&vmLinks)
|
||||
if tx.Error != nil {
|
||||
return nil, tx.Error
|
||||
}
|
||||
LinksHandler(vmLinks, resp)
|
||||
|
||||
// categories
|
||||
tx = l.svcCtx.DbEngin.Raw("select tdi.item_text as name from t_dict_item tdi,t_dict td where td.dict_code = 'cluster_region_dict' and tdi.dict_id = td.id").Scan(&resp.Categories)
|
||||
if tx.Error != nil {
|
||||
return nil, tx.Error
|
||||
}
|
||||
return resp, nil
|
||||
}
|
||||
|
||||
func LinksHandler(sources []string, resp *types.ScheduleSituationResp) {
|
||||
for _, source := range sources {
|
||||
links := strings.Split(source, ",")
|
||||
|
||||
for i := 1; i < len(links); i++ {
|
||||
if links[i] != links[i-1] {
|
||||
resp.Links = append(resp.Links, types.Link{Source: links[i], Target: links[i-1]})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -26,7 +26,11 @@ func NewScheduleGetAiJobLogLogLogic(ctx context.Context, svcCtx *svc.ServiceCont
|
|||
func (l *ScheduleGetAiJobLogLogLogic) ScheduleGetAiJobLogLog(req *types.AiJobLogReq) (resp *types.AiJobLogResp, err error) {
|
||||
resp = &types.AiJobLogResp{}
|
||||
|
||||
log, err := l.svcCtx.Scheduler.AiService.AiCollectorAdapterMap[req.AdapterId][req.ClusterId].GetTrainingTaskLog(l.ctx, req.TaskId, req.InstanceNum)
|
||||
id, err := l.svcCtx.Scheduler.AiStorages.GetAiTaskIdByClusterIdAndTaskId(req.ClusterId, req.TaskId)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
log, err := l.svcCtx.Scheduler.AiService.AiCollectorAdapterMap[req.AdapterId][req.ClusterId].GetTrainingTaskLog(l.ctx, id, req.InstanceNum)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
|
|
@ -0,0 +1,30 @@
|
|||
package schedule
|
||||
|
||||
import (
|
||||
"context"
|
||||
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types"
|
||||
|
||||
"github.com/zeromicro/go-zero/core/logx"
|
||||
)
|
||||
|
||||
type ScheduleGetOverviewLogic struct {
|
||||
logx.Logger
|
||||
ctx context.Context
|
||||
svcCtx *svc.ServiceContext
|
||||
}
|
||||
|
||||
func NewScheduleGetOverviewLogic(ctx context.Context, svcCtx *svc.ServiceContext) *ScheduleGetOverviewLogic {
|
||||
return &ScheduleGetOverviewLogic{
|
||||
Logger: logx.WithContext(ctx),
|
||||
ctx: ctx,
|
||||
svcCtx: svcCtx,
|
||||
}
|
||||
}
|
||||
|
||||
func (l *ScheduleGetOverviewLogic) ScheduleGetOverview() (resp *types.ScheduleOverviewResp, err error) {
|
||||
// todo: add your logic here and delete this line
|
||||
|
||||
return
|
||||
}
|
|
@ -6,6 +6,7 @@ import (
|
|||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/schedulers/option"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/constants"
|
||||
|
||||
"github.com/zeromicro/go-zero/core/logx"
|
||||
)
|
||||
|
@ -28,7 +29,9 @@ func (l *ScheduleSubmitLogic) ScheduleSubmit(req *types.ScheduleReq) (resp *type
|
|||
resp = &types.ScheduleResp{}
|
||||
opt := &option.AiOption{
|
||||
AdapterId: req.AiOption.AdapterId,
|
||||
TaskName: req.AiOption.TaskName,
|
||||
ResourceType: req.AiOption.ResourceType,
|
||||
Replica: 1,
|
||||
Tops: req.AiOption.Tops,
|
||||
TaskType: req.AiOption.TaskType,
|
||||
DatasetsName: req.AiOption.Datasets,
|
||||
|
@ -52,6 +55,17 @@ func (l *ScheduleSubmitLogic) ScheduleSubmit(req *types.ScheduleReq) (resp *type
|
|||
switch opt.GetOptionType() {
|
||||
case option.AI:
|
||||
rs := (results).([]*schedulers.AiResult)
|
||||
var synergystatus int64
|
||||
if len(rs) > 1 {
|
||||
synergystatus = 1
|
||||
}
|
||||
strategyCode, err := l.svcCtx.Scheduler.AiStorages.GetStrategyCode(req.AiOption.Strategy)
|
||||
|
||||
id, err := l.svcCtx.Scheduler.AiStorages.SaveTask(req.AiOption.TaskName, strategyCode, synergystatus)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
for _, r := range rs {
|
||||
scheResult := &types.ScheduleResult{}
|
||||
scheResult.ClusterId = r.ClusterId
|
||||
|
@ -59,12 +73,13 @@ func (l *ScheduleSubmitLogic) ScheduleSubmit(req *types.ScheduleReq) (resp *type
|
|||
scheResult.Strategy = r.Strategy
|
||||
scheResult.Replica = r.Replica
|
||||
scheResult.Msg = r.Msg
|
||||
err := l.svcCtx.Scheduler.AiStorages.SaveAiTask(id, opt, r.ClusterId, r.TaskId, constants.Saved, r.Msg)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
resp.Results = append(resp.Results, scheResult)
|
||||
}
|
||||
err = l.svcCtx.Scheduler.AiStorages.SaveTask(req.AiOption.TaskName)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return resp, nil
|
||||
|
|
|
@ -16,8 +16,6 @@ package mqs
|
|||
|
||||
import (
|
||||
"context"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/schedulers"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
|
||||
)
|
||||
|
||||
|
@ -38,28 +36,28 @@ func NewCloudMq(ctx context.Context, svcCtx *svc.ServiceContext) *CloudMq {
|
|||
|
||||
func (l *CloudMq) Consume(val string) error {
|
||||
// 接受消息, 根据标签筛选过滤
|
||||
cloudScheduler := schedulers.NewCloudScheduler()
|
||||
schdl, err := scheduler.NewScheduler(cloudScheduler, val, l.svcCtx.DbEngin, l.svcCtx.ParticipantRpc)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
//检测是否指定了集群列表
|
||||
schdl.SpecifyClusters()
|
||||
|
||||
//检测是否指定了nsID
|
||||
schdl.SpecifyNsID()
|
||||
|
||||
//通过标签匹配筛选出集群范围
|
||||
schdl.MatchLabels()
|
||||
|
||||
//todo 屏蔽原调度算法,因为监控数据暂未上报,临时采用随机调度
|
||||
schdl.TempAssign()
|
||||
|
||||
// 存储数据
|
||||
err = schdl.SaveToDb()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
//cloudScheduler := schedulers.NewCloudScheduler()
|
||||
//schdl, err := scheduler.NewScheduler(cloudScheduler, val, l.svcCtx.DbEngin, l.svcCtx.ParticipantRpc)
|
||||
//if err != nil {
|
||||
// return err
|
||||
//}
|
||||
//
|
||||
////检测是否指定了集群列表
|
||||
//schdl.SpecifyClusters()
|
||||
//
|
||||
////检测是否指定了nsID
|
||||
//schdl.SpecifyNsID()
|
||||
//
|
||||
////通过标签匹配筛选出集群范围
|
||||
//schdl.MatchLabels()
|
||||
//
|
||||
////todo 屏蔽原调度算法,因为监控数据暂未上报,临时采用随机调度
|
||||
//schdl.TempAssign()
|
||||
//
|
||||
//// 存储数据
|
||||
//err = schdl.SaveToDb()
|
||||
//if err != nil {
|
||||
// return err
|
||||
//}
|
||||
return nil
|
||||
}
|
||||
|
|
|
@ -2,8 +2,6 @@ package mqs
|
|||
|
||||
import (
|
||||
"context"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/schedulers"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
|
||||
)
|
||||
|
||||
|
@ -24,28 +22,28 @@ func NewVmMq(ctx context.Context, svcCtx *svc.ServiceContext) *VmMq {
|
|||
|
||||
func (l *VmMq) Consume(val string) error {
|
||||
// 接受消息, 根据标签筛选过滤
|
||||
vmScheduler := schedulers.NewVmScheduler()
|
||||
schdl, err := scheduler.NewScheduler(vmScheduler, val, l.svcCtx.DbEngin, l.svcCtx.ParticipantRpc)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
//检测是否指定了集群列表
|
||||
schdl.SpecifyClusters()
|
||||
|
||||
//检测是否指定了nsID
|
||||
schdl.SpecifyNsID()
|
||||
|
||||
//通过标签匹配筛选出集群范围
|
||||
schdl.MatchLabels()
|
||||
|
||||
//todo 屏蔽原调度算法,因为监控数据暂未上报,临时采用随机调度
|
||||
schdl.TempAssign()
|
||||
|
||||
// 存储数据
|
||||
err = schdl.SaveToDb()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
//vmScheduler := schedulers.NewVmScheduler()
|
||||
//schdl, err := scheduler.NewScheduler(vmScheduler, val, l.svcCtx.DbEngin, l.svcCtx.ParticipantRpc)
|
||||
//if err != nil {
|
||||
// return err
|
||||
//}
|
||||
//
|
||||
////检测是否指定了集群列表
|
||||
//schdl.SpecifyClusters()
|
||||
//
|
||||
////检测是否指定了nsID
|
||||
//schdl.SpecifyNsID()
|
||||
//
|
||||
////通过标签匹配筛选出集群范围
|
||||
//schdl.MatchLabels()
|
||||
//
|
||||
////todo 屏蔽原调度算法,因为监控数据暂未上报,临时采用随机调度
|
||||
//schdl.TempAssign()
|
||||
//
|
||||
//// 存储数据
|
||||
//err = schdl.SaveToDb()
|
||||
//if err != nil {
|
||||
// return err
|
||||
//}
|
||||
return nil
|
||||
}
|
||||
|
|
|
@ -2,10 +2,12 @@ package database
|
|||
|
||||
import (
|
||||
"github.com/zeromicro/go-zero/core/logx"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/schedulers/option"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/constants"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/models"
|
||||
"gorm.io/gorm"
|
||||
"strconv"
|
||||
"time"
|
||||
)
|
||||
|
||||
|
@ -48,22 +50,183 @@ func (s *AiStorage) GetAdapterIdsByType(adapterType string) ([]string, error) {
|
|||
return ids, nil
|
||||
}
|
||||
|
||||
func (s *AiStorage) SaveTask(name string) error {
|
||||
func (s *AiStorage) GetAdaptersByType(adapterType string) ([]*types.AdapterInfo, error) {
|
||||
var list []*types.AdapterInfo
|
||||
db := s.DbEngin.Model(&types.AdapterInfo{}).Table("t_adapter")
|
||||
db = db.Where("type = ?", adapterType)
|
||||
err := db.Order("create_time desc").Find(&list).Error
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return list, nil
|
||||
}
|
||||
|
||||
func (s *AiStorage) GetAiTasksByAdapterId(adapterId string) ([]*models.TaskAi, error) {
|
||||
var resp []*models.TaskAi
|
||||
tx := s.DbEngin.Raw("select * from task_ai where `adapter_id` = ? ", adapterId).Scan(&resp)
|
||||
if tx.Error != nil {
|
||||
logx.Errorf(tx.Error.Error())
|
||||
return nil, tx.Error
|
||||
}
|
||||
return resp, nil
|
||||
}
|
||||
|
||||
func (s *AiStorage) SaveTask(name string, strategyCode int64, synergyStatus int64) (int64, error) {
|
||||
// 构建主任务结构体
|
||||
taskModel := models.Task{
|
||||
Status: constants.Saved,
|
||||
Description: "ai task",
|
||||
Name: name,
|
||||
CommitTime: time.Now(),
|
||||
Status: constants.Saved,
|
||||
Description: "ai task",
|
||||
Name: name,
|
||||
SynergyStatus: synergyStatus,
|
||||
Strategy: strategyCode,
|
||||
AdapterTypeDict: 1,
|
||||
CommitTime: time.Now(),
|
||||
}
|
||||
// 保存任务数据到数据库
|
||||
tx := s.DbEngin.Create(&taskModel)
|
||||
if tx.Error != nil {
|
||||
return 0, tx.Error
|
||||
}
|
||||
return taskModel.Id, nil
|
||||
}
|
||||
|
||||
func (s *AiStorage) SaveAiTask(taskId int64, option *option.AiOption, clusterId string, jobId string, status string, msg string) error {
|
||||
// 构建主任务结构体
|
||||
aId, err := strconv.ParseInt(option.AdapterId, 10, 64)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
cId, err := strconv.ParseInt(clusterId, 10, 64)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
aiTaskModel := models.TaskAi{
|
||||
TaskId: taskId,
|
||||
AdapterId: aId,
|
||||
ClusterId: cId,
|
||||
Name: option.TaskName,
|
||||
Replica: option.Replica,
|
||||
JobId: jobId,
|
||||
TaskType: option.TaskType,
|
||||
Strategy: option.StrategyName,
|
||||
Status: status,
|
||||
Msg: msg,
|
||||
CommitTime: time.Now(),
|
||||
}
|
||||
// 保存任务数据到数据库
|
||||
tx := s.DbEngin.Create(&aiTaskModel)
|
||||
if tx.Error != nil {
|
||||
return tx.Error
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *AiStorage) UpdateTask() error {
|
||||
func (s *AiStorage) SaveClusterTaskQueue(adapterId string, clusterId string, queueNum int64) error {
|
||||
aId, err := strconv.ParseInt(adapterId, 10, 64)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
cId, err := strconv.ParseInt(clusterId, 10, 64)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
taskQueue := models.TClusterTaskQueue{
|
||||
AdapterId: aId,
|
||||
ClusterId: cId,
|
||||
QueueNum: queueNum,
|
||||
}
|
||||
tx := s.DbEngin.Create(&taskQueue)
|
||||
if tx.Error != nil {
|
||||
return tx.Error
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *AiStorage) GetClusterTaskQueues(adapterId string, clusterId string) ([]*models.TClusterTaskQueue, error) {
|
||||
var taskQueues []*models.TClusterTaskQueue
|
||||
tx := s.DbEngin.Raw("select * from t_cluster_task_queue where `adapter_id` = ? and `cluster_id` = ?", adapterId, clusterId).Scan(&taskQueues)
|
||||
if tx.Error != nil {
|
||||
logx.Errorf(tx.Error.Error())
|
||||
return nil, tx.Error
|
||||
}
|
||||
return taskQueues, nil
|
||||
}
|
||||
|
||||
func (s *AiStorage) GetAiTaskIdByClusterIdAndTaskId(clusterId string, taskId string) (string, error) {
|
||||
var aiTask models.TaskAi
|
||||
tx := s.DbEngin.Raw("select * from task_ai where `cluster_id` = ? and `task_id` = ?", clusterId, taskId).Scan(&aiTask)
|
||||
if tx.Error != nil {
|
||||
logx.Errorf(tx.Error.Error())
|
||||
return "", tx.Error
|
||||
}
|
||||
return aiTask.JobId, nil
|
||||
}
|
||||
|
||||
func (s *AiStorage) GetClusterResourcesById(clusterId string) (*models.TClusterResource, error) {
|
||||
var clusterResource models.TClusterResource
|
||||
tx := s.DbEngin.Raw("select * from t_cluster_resource where `cluster_id` = ?", clusterId).Scan(&clusterResource)
|
||||
if tx.Error != nil {
|
||||
logx.Errorf(tx.Error.Error())
|
||||
return nil, tx.Error
|
||||
}
|
||||
return &clusterResource, nil
|
||||
}
|
||||
|
||||
func (s *AiStorage) SaveClusterResources(clusterId string, clusterName string, clusterType int64, cpuAvail float64, cpuTotal float64,
|
||||
memAvail float64, memTotal float64, diskAvail float64, diskTotal float64, gpuAvail float64, gpuTotal float64, cardTotal int64, topsTotal float64) error {
|
||||
cId, err := strconv.ParseInt(clusterId, 10, 64)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
clusterResource := models.TClusterResource{
|
||||
ClusterId: cId,
|
||||
ClusterName: clusterName,
|
||||
ClusterType: clusterType,
|
||||
CpuAvail: cpuAvail,
|
||||
CpuTotal: cpuTotal,
|
||||
MemAvail: memAvail,
|
||||
MemTotal: memTotal,
|
||||
DiskAvail: diskAvail,
|
||||
DiskTotal: diskTotal,
|
||||
GpuAvail: gpuAvail,
|
||||
GpuTotal: gpuTotal,
|
||||
CardTotal: cardTotal,
|
||||
CardTopsTotal: topsTotal,
|
||||
}
|
||||
tx := s.DbEngin.Create(&clusterResource)
|
||||
if tx.Error != nil {
|
||||
return tx.Error
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *AiStorage) UpdateClusterResources(clusterResource *models.TClusterResource) error {
|
||||
tx := s.DbEngin.Updates(clusterResource)
|
||||
if tx.Error != nil {
|
||||
return tx.Error
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *AiStorage) UpdateAiTask(task *models.TaskAi) error {
|
||||
tx := s.DbEngin.Updates(task)
|
||||
if tx.Error != nil {
|
||||
return tx.Error
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *AiStorage) GetStrategyCode(name string) (int64, error) {
|
||||
var strategy int64
|
||||
sqlStr := `select t_dict_item.item_value
|
||||
from t_dict
|
||||
left join t_dict_item on t_dict.id = t_dict_item.dict_id
|
||||
where item_text = ?
|
||||
and t_dict.dict_code = 'schedule_Strategy'`
|
||||
//查询调度策略
|
||||
err := s.DbEngin.Raw(sqlStr, name).Scan(&strategy).Error
|
||||
if err != nil {
|
||||
return strategy, nil
|
||||
}
|
||||
return strategy, nil
|
||||
}
|
||||
|
|
|
@ -129,42 +129,19 @@ func (s *Scheduler) TempAssign() error {
|
|||
}
|
||||
|
||||
func (s *Scheduler) AssignAndSchedule(ss SubSchedule) (interface{}, error) {
|
||||
//// 已指定 ParticipantId
|
||||
//if s.task.ParticipantId != 0 {
|
||||
// return nil
|
||||
//}
|
||||
//// 标签匹配以及后,未找到ParticipantIds
|
||||
//if len(s.participantIds) == 0 {
|
||||
// return errors.New("未找到匹配的ParticipantIds")
|
||||
//}
|
||||
//
|
||||
//// 指定或者标签匹配的结果只有一个集群,给任务信息指定
|
||||
//if len(s.participantIds) == 1 {
|
||||
// s.task.ParticipantId = s.participantIds[0]
|
||||
// //replicas := s.task.Metadata.(map[string]interface{})["spec"].(map[string]interface{})["replicas"].(float64)
|
||||
// //result := make(map[int64]string)
|
||||
// //result[s.participantIds[0]] = strconv.FormatFloat(replicas, 'f', 2, 64)
|
||||
// //s.result = result
|
||||
//
|
||||
// return nil
|
||||
//}
|
||||
|
||||
//choose strategy
|
||||
strategy, err := ss.PickOptimalStrategy()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
//schedule
|
||||
clusters, err := strategy.Schedule()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
//集群数量不满足,指定到标签匹配后第一个集群
|
||||
//if len(providerList) < 2 {
|
||||
// s.task.ParticipantId = s.participantIds[0]
|
||||
// return nil
|
||||
//}
|
||||
|
||||
//assign tasks to clusters
|
||||
resp, err := ss.AssignTask(clusters)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
|
|
|
@ -26,6 +26,7 @@ import (
|
|||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/strategy"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/strategy/param"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/pkg/response"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/constants"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/models"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/utils"
|
||||
"gitlink.org.cn/JointCloud/pcm-octopus/octopus"
|
||||
|
@ -168,32 +169,52 @@ func (as *AiScheduler) AssignTask(clusters []*strategy.AssignedCluster) (interfa
|
|||
errs = append(errs, e)
|
||||
}
|
||||
|
||||
if len(errs) == len(clusters) {
|
||||
return nil, errors.New("submit task failed")
|
||||
for s := range ch {
|
||||
results = append(results, s)
|
||||
}
|
||||
|
||||
if len(errs) != 0 {
|
||||
var msg string
|
||||
var synergystatus int64
|
||||
if len(clusters) > 1 {
|
||||
synergystatus = 1
|
||||
}
|
||||
strategyCode, err := as.AiStorages.GetStrategyCode(as.option.StrategyName)
|
||||
taskId, err := as.AiStorages.SaveTask(as.option.TaskName, strategyCode, synergystatus)
|
||||
if err != nil {
|
||||
return nil, errors.New("database add failed: " + err.Error())
|
||||
}
|
||||
|
||||
var errmsg string
|
||||
for _, err := range errs {
|
||||
e := (err).(struct {
|
||||
err error
|
||||
clusterId string
|
||||
})
|
||||
msg += fmt.Sprintf("clusterId: %v , error: %v \n", e.clusterId, e.err.Error())
|
||||
msg := fmt.Sprintf("clusterId: %v , error: %v \n", e.clusterId, e.err.Error())
|
||||
errmsg += msg
|
||||
err := as.AiStorages.SaveAiTask(taskId, as.option, e.clusterId, "", constants.Failed, msg)
|
||||
if err != nil {
|
||||
return nil, errors.New("database add failed: " + err.Error())
|
||||
}
|
||||
}
|
||||
for s := range ch {
|
||||
if s.Msg != "" {
|
||||
msg += fmt.Sprintf("clusterId: %v , error: %v \n", s.ClusterId, s.Msg)
|
||||
msg := fmt.Sprintf("clusterId: %v , error: %v \n", s.ClusterId, s.Msg)
|
||||
errmsg += msg
|
||||
err := as.AiStorages.SaveAiTask(taskId, as.option, s.ClusterId, "", constants.Failed, msg)
|
||||
if err != nil {
|
||||
return nil, errors.New("database add failed: " + err.Error())
|
||||
}
|
||||
} else {
|
||||
msg += fmt.Sprintf("clusterId: %v , submitted successfully, taskId: %v \n", s.ClusterId, s.TaskId)
|
||||
msg := fmt.Sprintf("clusterId: %v , submitted successfully, taskId: %v \n", s.ClusterId, s.TaskId)
|
||||
errmsg += msg
|
||||
err := as.AiStorages.SaveAiTask(taskId, as.option, s.ClusterId, s.TaskId, constants.Succeeded, msg)
|
||||
if err != nil {
|
||||
return nil, errors.New("database add failed: " + err.Error())
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil, errors.New(msg)
|
||||
}
|
||||
|
||||
for s := range ch {
|
||||
// TODO: database operation
|
||||
results = append(results, s)
|
||||
return nil, errors.New(errmsg)
|
||||
}
|
||||
|
||||
return results, nil
|
||||
|
|
|
@ -15,106 +15,176 @@
|
|||
package schedulers
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/algorithm/providerPricing"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/database"
|
||||
"context"
|
||||
"errors"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/schedulers/option"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/service/collector"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/strategy"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/strategy/param"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/pkg/response"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/models"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/constants"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/models/cloud"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/tracker"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/utils"
|
||||
"io"
|
||||
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
|
||||
"k8s.io/apimachinery/pkg/runtime"
|
||||
syaml "k8s.io/apimachinery/pkg/runtime/serializer/yaml"
|
||||
kyaml "k8s.io/apimachinery/pkg/util/yaml"
|
||||
"gorm.io/gorm"
|
||||
"math"
|
||||
"time"
|
||||
)
|
||||
|
||||
type CloudScheduler struct {
|
||||
storage database.Storage
|
||||
yamlString string
|
||||
task *response.TaskInfo
|
||||
*scheduler.Scheduler
|
||||
option *option.CloudOption
|
||||
ctx context.Context
|
||||
dbEngin *gorm.DB
|
||||
promClient tracker.Prometheus
|
||||
svcCtx *svc.ServiceContext
|
||||
}
|
||||
|
||||
func NewCloudScheduler() *CloudScheduler {
|
||||
return &CloudScheduler{}
|
||||
type CloudResult struct {
|
||||
TaskId string
|
||||
ClusterId string
|
||||
ClusterName string
|
||||
Strategy string
|
||||
Replica int32
|
||||
Msg string
|
||||
}
|
||||
|
||||
func (cs *CloudScheduler) PickOptimalStrategy() (strategy.Strategy, error) {
|
||||
//获取所有计算中心
|
||||
//调度算法
|
||||
strategy := strategy.NewPricingStrategy(¶m.ResourcePricingParams{})
|
||||
return strategy, nil
|
||||
func NewCloudScheduler(ctx context.Context, val string, scheduler *scheduler.Scheduler, option *option.CloudOption, dbEngin *gorm.DB, promClient tracker.Prometheus) (*CloudScheduler, error) {
|
||||
return &CloudScheduler{ctx: ctx, yamlString: val, Scheduler: scheduler, option: option, dbEngin: dbEngin, promClient: promClient}, nil
|
||||
}
|
||||
|
||||
func (cs *CloudScheduler) GetNewStructForDb(task *response.TaskInfo, resource string, participantId int64) (interface{}, error) {
|
||||
cloud := cs.UnMarshalK8sStruct(resource, task.TaskId, task.NsID)
|
||||
cloud.Id = utils.GenSnowflakeID()
|
||||
cloud.NsID = task.NsID
|
||||
|
||||
cloud.ParticipantId = participantId
|
||||
return cloud, nil
|
||||
}
|
||||
|
||||
func (cs *CloudScheduler) UnMarshalK8sStruct(yamlString string, taskId int64, nsID string) models.Cloud {
|
||||
var cloud models.Cloud
|
||||
d := kyaml.NewYAMLOrJSONDecoder(bytes.NewBufferString(yamlString), 4096)
|
||||
var err error
|
||||
for {
|
||||
var rawObj runtime.RawExtension
|
||||
err = d.Decode(&rawObj)
|
||||
if err == io.EOF {
|
||||
break
|
||||
}
|
||||
if err != nil {
|
||||
}
|
||||
obj := &unstructured.Unstructured{}
|
||||
syaml.NewDecodingSerializer(unstructured.UnstructuredJSONScheme).Decode(rawObj.Raw, nil, obj)
|
||||
if err != nil {
|
||||
}
|
||||
|
||||
unstructuredMap, err := runtime.DefaultUnstructuredConverter.ToUnstructured(obj)
|
||||
if err != nil {
|
||||
}
|
||||
|
||||
unstructureObj := &unstructured.Unstructured{Object: unstructuredMap}
|
||||
if len(nsID) != 0 {
|
||||
unstructureObj.SetNamespace(nsID)
|
||||
}
|
||||
cloud = models.Cloud{
|
||||
TaskId: taskId,
|
||||
ApiVersion: unstructureObj.GetAPIVersion(),
|
||||
Name: unstructureObj.GetName(),
|
||||
Kind: unstructureObj.GetKind(),
|
||||
Namespace: unstructureObj.GetNamespace(),
|
||||
Status: "Saved",
|
||||
}
|
||||
// 命名空间为空 设置默认值
|
||||
if len(unstructureObj.GetNamespace()) == 0 {
|
||||
cloud.Namespace = "default"
|
||||
}
|
||||
//unstructureObj转成string
|
||||
unString, _ := unstructureObj.MarshalJSON()
|
||||
cloud.YamlString = string(unString)
|
||||
func (as *CloudScheduler) GetNewStructForDb(task *response.TaskInfo, resource string, participantId int64) (interface{}, error) {
|
||||
c := cloud.TaskCloudModel{
|
||||
AdapterId: uint(participantId),
|
||||
TaskId: uint(task.TaskId),
|
||||
Status: constants.Saved,
|
||||
YamlString: as.yamlString,
|
||||
}
|
||||
return cloud
|
||||
utils.Convert(task.Metadata, &c)
|
||||
return c, nil
|
||||
}
|
||||
|
||||
func (cs *CloudScheduler) genTaskAndProviders() (*providerPricing.Task, []*providerPricing.Provider, error) {
|
||||
proParams, err := cs.storage.GetProviderParams()
|
||||
func (as *CloudScheduler) PickOptimalStrategy() (strategy.Strategy, error) {
|
||||
if len(as.option.ClusterIds) == 1 {
|
||||
return &strategy.SingleAssignment{Cluster: &strategy.AssignedCluster{ClusterId: as.option.ClusterIds[0], Replicas: 1}}, nil
|
||||
}
|
||||
|
||||
resources, err := as.findClustersWithResources()
|
||||
|
||||
if err != nil {
|
||||
return nil, nil, nil
|
||||
return nil, err
|
||||
}
|
||||
var providerList []*providerPricing.Provider
|
||||
for _, p := range proParams {
|
||||
provider := providerPricing.NewProvider(p.Participant_id, p.Cpu_avail, p.Mem_avail, p.Disk_avail, 0.0, 0.0, 0.0)
|
||||
providerList = append(providerList, provider)
|
||||
if len(resources) == 0 {
|
||||
return nil, errors.New("no cluster has resources")
|
||||
}
|
||||
|
||||
//replicas := task.Metadata.(map[string]interface{})["spec"].(map[string]interface{})["replicas"].(float64)
|
||||
//t := algorithm.NewTask(0, int(replicas), 2, 75120000, 301214500, 1200, 2, 6, 2000)
|
||||
if len(resources) == 1 {
|
||||
var cluster strategy.AssignedCluster
|
||||
cluster.ClusterId = resources[0].ClusterId
|
||||
cluster.Replicas = 1
|
||||
return &strategy.SingleAssignment{Cluster: &cluster}, nil
|
||||
}
|
||||
|
||||
return nil, providerList, nil
|
||||
params := ¶m.Params{Resources: resources}
|
||||
|
||||
switch as.option.Strategy {
|
||||
case strategy.REPLICATION:
|
||||
var clusterIds []string
|
||||
for _, resource := range resources {
|
||||
clusterIds = append(clusterIds, resource.ClusterId)
|
||||
}
|
||||
strategy := strategy.NewReplicationStrategy(clusterIds, as.option.Replica)
|
||||
return strategy, nil
|
||||
case strategy.RESOURCES_PRICING:
|
||||
strategy := strategy.NewPricingStrategy(¶m.ResourcePricingParams{Params: params, Replicas: as.option.Replica})
|
||||
return strategy, nil
|
||||
case strategy.DYNAMIC_RESOURCES:
|
||||
strategy := strategy.NewDynamicResourcesStrategy(params.Resources, as.option, 1)
|
||||
return strategy, nil
|
||||
case strategy.STATIC_WEIGHT:
|
||||
//todo resources should match cluster StaticWeightMap
|
||||
strategy := strategy.NewStaticWeightStrategy(as.option.StaticWeightMap, as.option.Replica)
|
||||
return strategy, nil
|
||||
}
|
||||
|
||||
return nil, errors.New("no strategy has been chosen")
|
||||
}
|
||||
|
||||
func (cs *CloudScheduler) AssignTask(clusters []*strategy.AssignedCluster) (interface{}, error) {
|
||||
return nil, nil
|
||||
func (as *CloudScheduler) AssignTask(clusters []*strategy.AssignedCluster) (interface{}, error) {
|
||||
if clusters == nil {
|
||||
return nil, errors.New("clusters is nil")
|
||||
}
|
||||
|
||||
for i := len(clusters) - 1; i >= 0; i-- {
|
||||
if clusters[i].Replicas == 0 {
|
||||
clusters = append(clusters[:i], clusters[i+1:]...)
|
||||
}
|
||||
}
|
||||
|
||||
if len(clusters) == 0 {
|
||||
return nil, errors.New("clusters is nil")
|
||||
}
|
||||
|
||||
var results []*CloudResult
|
||||
|
||||
for _, cluster := range clusters {
|
||||
cName := ""
|
||||
as.dbEngin.Table("t_cluster").Select("name").Where("id=?", cluster.ClusterId).Find(&cName)
|
||||
cr := CloudResult{
|
||||
ClusterId: cluster.ClusterId,
|
||||
ClusterName: cName,
|
||||
Replica: cluster.Replicas,
|
||||
}
|
||||
cr.ClusterId = cluster.ClusterId
|
||||
cr.Replica = cluster.Replicas
|
||||
|
||||
cr.ClusterName = cName
|
||||
results = append(results, &cr)
|
||||
}
|
||||
|
||||
return results, nil
|
||||
}
|
||||
|
||||
func (as *CloudScheduler) findClustersWithResources() ([]*collector.ResourceStats, error) {
|
||||
resp := []*collector.ResourceStats{}
|
||||
//查询集群资源信息
|
||||
var rMetrics []tracker.Metric
|
||||
metrics := []string{"cluster_cpu_utilisation", "cluster_cpu_avail", "cluster_cpu_total", "cluster_memory_total", "cluster_memory_avail", "cluster_memory_utilisation", "cluster_disk_utilisation", "cluster_disk_avail", "cluster_disk_total", "cluster_pod_utilisation"}
|
||||
var clusterNames []string
|
||||
as.dbEngin.Table("t_cluster").Select("name").Where("id in ?", as.option.ClusterIds).Find(&clusterNames)
|
||||
for _, c := range clusterNames {
|
||||
rMetrics = as.promClient.GetNamedMetrics(metrics, time.Now(), tracker.ClusterOption{ClusterName: c})
|
||||
r := collector.ResourceStats{}
|
||||
var cid string
|
||||
as.dbEngin.Table("t_cluster").Select("id").Where("name = ?", c).Find(&cid)
|
||||
r.ClusterId = cid
|
||||
r.Name = c
|
||||
for _, metric := range rMetrics {
|
||||
if metric.MetricName == "cluster_cpu_total" {
|
||||
r.CpuCoreTotal = int64(metric.MetricData.MetricValues[0].Sample.Value())
|
||||
}
|
||||
if metric.MetricName == "cluster_cpu_avail" {
|
||||
cpuAvail := metric.MetricData.MetricValues[0].Sample.Value()
|
||||
r.CpuCoreAvail = int64(math.Round(cpuAvail))
|
||||
}
|
||||
if metric.MetricName == "cluster_memory_total" {
|
||||
r.MemTotal = metric.MetricData.MetricValues[0].Sample.Value()
|
||||
}
|
||||
if metric.MetricName == "cluster_memory_avail" {
|
||||
r.MemAvail = metric.MetricData.MetricValues[0].Sample.Value()
|
||||
}
|
||||
if metric.MetricName == "cluster_disk_total" {
|
||||
r.DiskTotal = metric.MetricData.MetricValues[0].Sample.Value()
|
||||
}
|
||||
if metric.MetricName == "cluster_disk_avail" {
|
||||
r.DiskAvail = metric.MetricData.MetricValues[0].Sample.Value()
|
||||
}
|
||||
}
|
||||
resp = append(resp, &r)
|
||||
}
|
||||
return resp, nil
|
||||
}
|
||||
|
|
|
@ -4,6 +4,7 @@ type AiOption struct {
|
|||
AdapterId string
|
||||
ClusterIds []string
|
||||
TaskName string
|
||||
Replica int64
|
||||
ResourceType string // cpu/gpu/compute card
|
||||
CpuCoreNum int64
|
||||
TaskType string // pytorch/tensorflow/mindspore
|
||||
|
|
|
@ -1,7 +1,13 @@
|
|||
package option
|
||||
|
||||
type CloudOption struct {
|
||||
task interface{}
|
||||
Name string `json:"name"`
|
||||
AdapterIds []string `json:"adapterIds"`
|
||||
ClusterIds []string `json:"clusterIds"`
|
||||
Strategy string `json:"strategy"`
|
||||
StaticWeightMap map[string]int32 `json:"staticWeightMap,optional"`
|
||||
ReqBody []string `json:"reqBody"`
|
||||
Replica int32 `json:"replicas,string"`
|
||||
}
|
||||
|
||||
func (c CloudOption) GetOptionType() string {
|
||||
|
|
|
@ -4,6 +4,7 @@ const (
|
|||
AI = "ai"
|
||||
CLOUD = "cloud"
|
||||
HPC = "hpc"
|
||||
VM = "vm"
|
||||
)
|
||||
|
||||
type Option interface {
|
||||
|
|
|
@ -0,0 +1,49 @@
|
|||
package option
|
||||
|
||||
import "time"
|
||||
|
||||
type VmOption struct {
|
||||
AdapterId string
|
||||
ClusterIds []string
|
||||
TaskName string
|
||||
ResourceType string // cpu/gpu/compute card
|
||||
TaskType string // pytorch/tensorflow/mindspore
|
||||
Strategy string
|
||||
ClusterToStaticWeight map[string]int32
|
||||
CommitTime time.Time
|
||||
NsID string
|
||||
Replicas int64
|
||||
MatchLabels map[string]string
|
||||
StaticWeightMap map[string]int32
|
||||
CreateMulServer []CreateMulDomainServer
|
||||
Id int64
|
||||
ParticipantId int64
|
||||
TaskId int64
|
||||
Name string
|
||||
ClusterId int64
|
||||
FlavorRef string
|
||||
ImageRef string
|
||||
Status string
|
||||
Platform string
|
||||
Description string
|
||||
AvailabilityZone string
|
||||
MinCount int64
|
||||
Uuid string
|
||||
StartTime string
|
||||
RunningTime string
|
||||
Result string
|
||||
DeletedAt string
|
||||
}
|
||||
|
||||
type CreateMulDomainServer struct {
|
||||
Platform string
|
||||
Name string
|
||||
Min_count int64
|
||||
ImageRef string
|
||||
FlavorRef string
|
||||
Uuid string
|
||||
}
|
||||
|
||||
func (a VmOption) GetOptionType() string {
|
||||
return VM
|
||||
}
|
|
@ -1,29 +1,96 @@
|
|||
package schedulers
|
||||
|
||||
import (
|
||||
"context"
|
||||
"github.com/pkg/errors"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/algorithm/providerPricing"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/database"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/schedulers/option"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/strategy"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/strategy/param"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/pkg/response"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/constants"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/models"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/tracker"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/utils"
|
||||
"gorm.io/gorm"
|
||||
)
|
||||
|
||||
type VmScheduler struct {
|
||||
storage database.Storage
|
||||
yamlString string
|
||||
storage database.Storage
|
||||
task *response.TaskInfo
|
||||
*scheduler.Scheduler
|
||||
option *option.VmOption
|
||||
ctx context.Context
|
||||
promClient tracker.Prometheus
|
||||
dbEngin *gorm.DB
|
||||
}
|
||||
|
||||
func NewVmScheduler() *VmScheduler {
|
||||
return &VmScheduler{}
|
||||
type VmResult struct {
|
||||
TaskId string
|
||||
ClusterId string
|
||||
ClusterName string
|
||||
Strategy string
|
||||
Replica int32
|
||||
Msg string
|
||||
}
|
||||
|
||||
func NewVmScheduler(ctx context.Context, val string, scheduler *scheduler.Scheduler, option *option.VmOption, dbEngin *gorm.DB, promClient tracker.Prometheus) (*VmScheduler, error) {
|
||||
return &VmScheduler{ctx: ctx, yamlString: val, Scheduler: scheduler, option: option, dbEngin: dbEngin, promClient: promClient}, nil
|
||||
}
|
||||
|
||||
/*func NewCloudScheduler(ctx context.Context, val string, scheduler *scheduler.Scheduler, option *option.CloudOption, dbEngin *gorm.DB, promClient tracker.Prometheus) (*CloudScheduler, error) {
|
||||
return &CloudScheduler{ctx: ctx, yamlString: val, Scheduler: scheduler, option: option, dbEngin: dbEngin, promClient: promClient}, nil
|
||||
}*/
|
||||
|
||||
func (vm *VmScheduler) PickOptimalStrategy() (strategy.Strategy, error) {
|
||||
//获取所有计算中心
|
||||
//调度算法
|
||||
strategy := strategy.NewPricingStrategy(¶m.ResourcePricingParams{})
|
||||
return strategy, nil
|
||||
if len(vm.option.ClusterIds) == 1 {
|
||||
// TODO database operation Find
|
||||
return &strategy.SingleAssignment{Cluster: &strategy.AssignedCluster{ClusterId: vm.option.ClusterIds[0], Replicas: 1}}, nil
|
||||
}
|
||||
//resources, err := vm.findClustersWithResources()
|
||||
|
||||
/* if err != nil {
|
||||
return nil, err
|
||||
}*/
|
||||
|
||||
/* if len(resources) == 0 {
|
||||
return nil, errors.New("no cluster has resources")
|
||||
}*/
|
||||
//
|
||||
//if len(resources) == 1 {
|
||||
// var cluster strategy.AssignedCluster
|
||||
// cluster.ClusterId = resources[0].ClusterId
|
||||
// cluster.Replicas = 1
|
||||
// return &strategy.SingleAssignment{Cluster: &cluster}, nil
|
||||
//}
|
||||
//params := ¶m.Params{Resources: resources}
|
||||
|
||||
switch vm.option.Strategy {
|
||||
/* case strategy.REPLICATION:
|
||||
var clusterIds []string
|
||||
for _, resource := range resources {
|
||||
clusterIds = append(clusterIds, resource.ClusterId)
|
||||
}
|
||||
strategy := strategy.NewReplicationStrategy(clusterIds, 1)
|
||||
return strategy, nil
|
||||
case strategy.RESOURCES_PRICING:
|
||||
strategy := strategy.NewPricingStrategy(¶m.ResourcePricingParams{Params: params, Replicas: 1})
|
||||
return strategy, nil
|
||||
case strategy.DYNAMIC_RESOURCES:
|
||||
strategy := strategy.NewDynamicResourcesStrategy(params.Resources, vm.option, 1)
|
||||
return strategy, nil*/
|
||||
case strategy.STATIC_WEIGHT:
|
||||
//todo resources should match cluster StaticWeightMap
|
||||
strategy := strategy.NewStaticWeightStrategy(vm.option.ClusterToStaticWeight, 1)
|
||||
return strategy, nil
|
||||
}
|
||||
|
||||
/*strategy := strategy.NewPricingStrategy(¶m.ResourcePricingParams{})
|
||||
return strategy, nil*/
|
||||
|
||||
return nil, errors.New("no strategy has been chosen")
|
||||
}
|
||||
|
||||
func (v *VmScheduler) GetNewStructForDb(task *response.TaskInfo, resource string, participantId int64) (interface{}, error) {
|
||||
|
@ -41,12 +108,6 @@ func (v *VmScheduler) GetNewStructForDb(task *response.TaskInfo, resource string
|
|||
vm.ParticipantId = participantId*/
|
||||
}
|
||||
|
||||
/*
|
||||
func (vm *VmScheduler) UnMarshalVmStruct(yamlString string, taskId int64, nsID string) models.vm {
|
||||
var vm models.Vm
|
||||
vm := kyaml.NewYAMLOrJSONDecoder(bytes.NewBufferString(yamlString), 4096)
|
||||
}
|
||||
*/
|
||||
func (vm *VmScheduler) genTaskAndProviders() (*providerPricing.Task, []*providerPricing.Provider, error) {
|
||||
proParams, err := vm.storage.GetProviderParams()
|
||||
if err != nil {
|
||||
|
@ -64,7 +125,38 @@ func (vm *VmScheduler) genTaskAndProviders() (*providerPricing.Task, []*provider
|
|||
return nil, providerList, nil
|
||||
}
|
||||
|
||||
func (v VmScheduler) AssignTask(clusters []*strategy.AssignedCluster) (interface{}, error) {
|
||||
func (as *VmScheduler) AssignTask(clusters []*strategy.AssignedCluster) (interface{}, error) {
|
||||
//TODO implement me
|
||||
panic("implement me")
|
||||
if clusters == nil {
|
||||
return nil, errors.New("clusters is nil")
|
||||
}
|
||||
|
||||
for i := len(clusters) - 1; i >= 0; i-- {
|
||||
if clusters[i].Replicas == 0 {
|
||||
clusters = append(clusters[:i], clusters[i+1:]...)
|
||||
}
|
||||
}
|
||||
|
||||
if len(clusters) == 0 {
|
||||
return nil, errors.New("clusters is nil")
|
||||
}
|
||||
|
||||
var results []*VmResult
|
||||
|
||||
for _, cluster := range clusters {
|
||||
cName := ""
|
||||
as.dbEngin.Table("t_cluster").Select("name").Where("id=?", cluster.ClusterId).Find(&cName)
|
||||
cr := VmResult{
|
||||
ClusterId: cluster.ClusterId,
|
||||
ClusterName: cName,
|
||||
Replica: cluster.Replicas,
|
||||
}
|
||||
cr.ClusterId = cluster.ClusterId
|
||||
cr.Replica = cluster.Replicas
|
||||
|
||||
cr.ClusterName = cName
|
||||
results = append(results, &cr)
|
||||
}
|
||||
|
||||
return results, nil
|
||||
}
|
||||
|
|
|
@ -7,6 +7,9 @@ type AiCollector interface {
|
|||
GetDatasetsSpecs(ctx context.Context) ([]*DatasetsSpecs, error)
|
||||
GetAlgorithms(ctx context.Context) ([]*Algorithm, error)
|
||||
GetTrainingTaskLog(ctx context.Context, taskId string, instanceNum string) (string, error)
|
||||
GetTrainingTask(ctx context.Context, taskId string) (*Task, error)
|
||||
DownloadAlgorithmCode(ctx context.Context, resourceType string, taskType string, dataset string, algorithm string) (string, error)
|
||||
UploadAlgorithmCode(ctx context.Context, resourceType string, taskType string, dataset string, algorithm string, code string) error
|
||||
}
|
||||
|
||||
type ResourceStats struct {
|
||||
|
@ -19,6 +22,7 @@ type ResourceStats struct {
|
|||
DiskAvail float64
|
||||
DiskTotal float64
|
||||
GpuAvail int64
|
||||
GpuTotal int64
|
||||
CardsAvail []*Card
|
||||
CpuCoreHours float64
|
||||
Balance float64
|
||||
|
@ -43,3 +47,10 @@ type Algorithm struct {
|
|||
Platform string
|
||||
TaskType string
|
||||
}
|
||||
|
||||
type Task struct {
|
||||
Id string
|
||||
Start string
|
||||
End string
|
||||
Status string
|
||||
}
|
||||
|
|
|
@ -162,10 +162,22 @@ func (m *ModelArtsLink) GetAlgorithms(ctx context.Context) ([]*collector.Algorit
|
|||
return nil, nil
|
||||
}
|
||||
|
||||
func (m *ModelArtsLink) DownloadAlgorithmCode(ctx context.Context, resourceType string, taskType string, dataset string, algorithm string) (string, error) {
|
||||
return "", nil
|
||||
}
|
||||
|
||||
func (m *ModelArtsLink) UploadAlgorithmCode(ctx context.Context, resourceType string, taskType string, dataset string, algorithm string, code string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *ModelArtsLink) GetTrainingTaskLog(ctx context.Context, taskId string, instanceNum string) (string, error) {
|
||||
return "", nil
|
||||
}
|
||||
|
||||
func (m *ModelArtsLink) GetTrainingTask(ctx context.Context, taskId string) (*collector.Task, error) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (m *ModelArtsLink) Execute(ctx context.Context, option *option.AiOption) (interface{}, error) {
|
||||
err := m.GenerateSubmitParams(ctx, option)
|
||||
if err != nil {
|
||||
|
|
|
@ -19,12 +19,14 @@ import (
|
|||
"errors"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/schedulers/option"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/service/collector"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/constants"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/utils"
|
||||
"gitlink.org.cn/JointCloud/pcm-octopus/octopus"
|
||||
"gitlink.org.cn/JointCloud/pcm-octopus/octopusclient"
|
||||
"math"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
type OctopusLink struct {
|
||||
|
@ -337,6 +339,14 @@ func (o *OctopusLink) GetAlgorithms(ctx context.Context) ([]*collector.Algorithm
|
|||
return algorithms, nil
|
||||
}
|
||||
|
||||
func (o *OctopusLink) DownloadAlgorithmCode(ctx context.Context, resourceType string, taskType string, dataset string, algorithm string) (string, error) {
|
||||
return "", nil
|
||||
}
|
||||
|
||||
func (o *OctopusLink) UploadAlgorithmCode(ctx context.Context, resourceType string, taskType string, dataset string, algorithm string, code string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (o *OctopusLink) GetTrainingTaskLog(ctx context.Context, taskId string, instanceNum string) (string, error) {
|
||||
instance, err := strconv.ParseInt(instanceNum, 10, 32)
|
||||
if err != nil {
|
||||
|
@ -356,6 +366,35 @@ func (o *OctopusLink) GetTrainingTaskLog(ctx context.Context, taskId string, ins
|
|||
return resp.Content, nil
|
||||
}
|
||||
|
||||
func (o *OctopusLink) GetTrainingTask(ctx context.Context, taskId string) (*collector.Task, error) {
|
||||
resp, err := o.QueryTask(ctx, taskId)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
jobresp := (resp).(*octopus.GetTrainJobResp)
|
||||
if !jobresp.Success {
|
||||
return nil, errors.New(jobresp.Error.Message)
|
||||
}
|
||||
var task collector.Task
|
||||
task.Id = jobresp.Payload.TrainJob.Id
|
||||
task.Start = time.Unix(jobresp.Payload.TrainJob.StartedAt, 0).Format(constants.Layout)
|
||||
task.End = time.Unix(jobresp.Payload.TrainJob.CompletedAt, 0).Format(constants.Layout)
|
||||
switch jobresp.Payload.TrainJob.Status {
|
||||
case "succeeded":
|
||||
task.Status = constants.Completed
|
||||
case "failed":
|
||||
task.Status = constants.Failed
|
||||
case "running":
|
||||
task.Status = constants.Running
|
||||
case "stopped":
|
||||
task.Status = constants.Stopped
|
||||
default:
|
||||
task.Status = "undefined"
|
||||
}
|
||||
|
||||
return &task, nil
|
||||
}
|
||||
|
||||
func (o *OctopusLink) Execute(ctx context.Context, option *option.AiOption) (interface{}, error) {
|
||||
err := o.GenerateSubmitParams(ctx, option)
|
||||
if err != nil {
|
||||
|
|
|
@ -447,6 +447,14 @@ func (s *ShuguangAi) GetAlgorithms(ctx context.Context) ([]*collector.Algorithm,
|
|||
return algorithms, nil
|
||||
}
|
||||
|
||||
func (s *ShuguangAi) DownloadAlgorithmCode(ctx context.Context, resourceType string, taskType string, dataset string, algorithm string) (string, error) {
|
||||
return "", nil
|
||||
}
|
||||
|
||||
func (s *ShuguangAi) UploadAlgorithmCode(ctx context.Context, resourceType string, taskType string, dataset string, algorithm string, code string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *ShuguangAi) GetTrainingTaskLog(ctx context.Context, taskId string, instanceNum string) (string, error) {
|
||||
req := &hpcAC.GetInstanceLogReq{
|
||||
TaskId: taskId,
|
||||
|
@ -465,6 +473,24 @@ func (s *ShuguangAi) GetTrainingTaskLog(ctx context.Context, taskId string, inst
|
|||
return resp.Data.Content, nil
|
||||
}
|
||||
|
||||
func (s *ShuguangAi) GetTrainingTask(ctx context.Context, taskId string) (*collector.Task, error) {
|
||||
resp, err := s.QueryTask(ctx, taskId)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
jobresp := (resp).(*hpcAC.GetPytorchTaskResp)
|
||||
if jobresp.Code != "0" {
|
||||
return nil, errors.New(jobresp.Msg)
|
||||
}
|
||||
var task collector.Task
|
||||
task.Id = jobresp.Data.Id
|
||||
task.Start = jobresp.Data.StartTime
|
||||
task.End = jobresp.Data.EndTime
|
||||
task.Status = jobresp.Data.Status
|
||||
|
||||
return &task, nil
|
||||
}
|
||||
|
||||
func (s *ShuguangAi) Execute(ctx context.Context, option *option.AiOption) (interface{}, error) {
|
||||
err := s.GenerateSubmitParams(ctx, option)
|
||||
if err != nil {
|
||||
|
|
|
@ -141,23 +141,13 @@ type Region struct {
|
|||
}
|
||||
|
||||
type GeneralTaskReq struct {
|
||||
Name string `json:"name"`
|
||||
ComputeType string `json:"computeType"`
|
||||
TemplateId string `json:"templateId"`
|
||||
AdapterId string `json:"adapterId"`
|
||||
ClusterIds []string `json:"clusterIds"`
|
||||
Strategy Strategy `json:"strategy"`
|
||||
ReqBody []string `json:"reqBody"`
|
||||
}
|
||||
|
||||
type Strategy struct {
|
||||
Name string `json:"name"`
|
||||
StaticWeightList []StaticWeightList `json:"staticWeightList"`
|
||||
}
|
||||
|
||||
type StaticWeightList struct {
|
||||
ClusterName string `json:"clusterName"`
|
||||
Weight int `json:"weight"`
|
||||
Name string `json:"name"`
|
||||
AdapterIds []string `json:"adapterIds"`
|
||||
ClusterIds []string `json:"clusterIds"`
|
||||
Strategy string `json:"strategy"`
|
||||
StaticWeightMap map[string]int32 `json:"staticWeightMap,optional"`
|
||||
ReqBody []string `json:"reqBody"`
|
||||
Replicas int64 `json:"replicas,string"`
|
||||
}
|
||||
|
||||
type DeleteTaskReq struct {
|
||||
|
@ -192,13 +182,20 @@ type TaskYaml struct {
|
|||
}
|
||||
|
||||
type CommitVmTaskReq struct {
|
||||
Name string `json:"name"`
|
||||
NsID string `json:"nsID"`
|
||||
Replicas int64 `json:"replicas,optional"`
|
||||
MatchLabels map[string]string `json:"matchLabels,optional"`
|
||||
AdapterId string `json:"adapterId,optional"`
|
||||
ClusterType string `json:"clusterType,optional"`
|
||||
CreateMulServer []CreateMulDomainServer `json:"createMulServer,optional"`
|
||||
VmOption *VmOption `json:"vmOption,optional"`
|
||||
}
|
||||
|
||||
type VmOption struct {
|
||||
AdapterId string `json:"adapterId"`
|
||||
VmClusterIds []string `json:"vmClusterIds"`
|
||||
Replicas int64 `json:"replicas,optional"`
|
||||
Name string `json:"name"`
|
||||
Strategy string `json:"strategy"`
|
||||
ClusterToStaticWeight map[string]int32 `json:"clusterToStaticWeight"`
|
||||
MatchLabels map[string]string `json:"matchLabels,optional"`
|
||||
StaticWeightMap map[string]int32 `json:"staticWeightMap,optional"`
|
||||
CreateMulServer []CreateMulDomainServer `json:"createMulServer,optional"`
|
||||
}
|
||||
|
||||
type CreateMulDomainServer struct {
|
||||
|
@ -208,12 +205,20 @@ type CreateMulDomainServer struct {
|
|||
ImageRef string `json:"imageRef,optional"`
|
||||
FlavorRef string `json:"flavorRef,optional"`
|
||||
Uuid string `json:"uuid,optional"`
|
||||
ClusterId string `json:"clusterId,optional"`
|
||||
}
|
||||
|
||||
type CommitVmTaskResp struct {
|
||||
TaskId int64 `json:"taskId"`
|
||||
Code int32 `json:"code"`
|
||||
Msg string `json:"msg"`
|
||||
Code int32 `json:"code"`
|
||||
Msg string `json:"msg"`
|
||||
}
|
||||
|
||||
type ScheduleVmResult struct {
|
||||
ClusterId string `json:"clusterId"`
|
||||
TaskId string `json:"taskId"`
|
||||
Strategy string `json:"strategy"`
|
||||
Replica int32 `json:"replica"`
|
||||
Msg string `json:"msg"`
|
||||
}
|
||||
|
||||
type VmTask struct {
|
||||
|
@ -298,22 +303,23 @@ type PageTaskReq struct {
|
|||
}
|
||||
|
||||
type TaskModel struct {
|
||||
Id int64 `json:"id,omitempty" db:"id"` // id
|
||||
Name string `json:"name,omitempty" db:"name"` // 作业名称
|
||||
Description string `json:"description,omitempty" db:"description"` // 作业描述
|
||||
Status string `json:"status,omitempty" db:"status"` // 作业状态
|
||||
Strategy int64 `json:"strategy" db:"strategy"` // 策略
|
||||
SynergyStatus int64 `json:"synergyStatus" db:"synergy_status"` // 协同状态(0-未协同、1-已协同)
|
||||
CommitTime string `json:"commitTime,omitempty" db:"commit_time"` // 提交时间
|
||||
StartTime string `json:"startTime,omitempty" db:"start_time"` // 开始时间
|
||||
EndTime string `json:"endTime,omitempty" db:"end_time"` // 结束运行时间
|
||||
RunningTime int64 `json:"runningTime" db:"running_time"` // 已运行时间(单位秒)
|
||||
YamlString string `json:"yamlString,omitempty" db:"yaml_string"`
|
||||
Result string `json:"result,omitempty" db:"result"` // 作业结果
|
||||
DeletedAt string `json:"deletedAt,omitempty" gorm:"index" db:"deleted_at"`
|
||||
NsID string `json:"nsId,omitempty" db:"ns_id"`
|
||||
TenantId string `json:"tenantId,omitempty" db:"tenant_id"`
|
||||
CreateTime string `json:"createTime,omitempty" db:"create_time" gorm:"autoCreateTime"`
|
||||
Id int64 `json:"id,omitempty,string" db:"id"` // id
|
||||
Name string `json:"name,omitempty" db:"name"` // 作业名称
|
||||
Description string `json:"description,omitempty" db:"description"` // 作业描述
|
||||
Status string `json:"status,omitempty" db:"status"` // 作业状态
|
||||
Strategy int64 `json:"strategy" db:"strategy"` // 策略
|
||||
SynergyStatus int64 `json:"synergyStatus" db:"synergy_status"` // 协同状态(0-未协同、1-已协同)
|
||||
CommitTime string `json:"commitTime,omitempty" db:"commit_time"` // 提交时间
|
||||
StartTime string `json:"startTime,omitempty" db:"start_time"` // 开始时间
|
||||
EndTime string `json:"endTime,omitempty" db:"end_time"` // 结束运行时间
|
||||
RunningTime int64 `json:"runningTime" db:"running_time"` // 已运行时间(单位秒)
|
||||
YamlString string `json:"yamlString,omitempty" db:"yaml_string"`
|
||||
Result string `json:"result,omitempty" db:"result"` // 作业结果
|
||||
DeletedAt string `json:"deletedAt,omitempty" gorm:"index" db:"deleted_at"`
|
||||
NsID string `json:"nsId,omitempty" db:"ns_id"`
|
||||
TenantId string `json:"tenantId,omitempty" db:"tenant_id"`
|
||||
CreateTime string `json:"createTime,omitempty" db:"create_time" gorm:"autoCreateTime"`
|
||||
AdapterTypeDict int `json:"adapterTypeDict" db:"create_time" gorm:"adapter_type_dict"` //任务类型(对应字典表的值
|
||||
}
|
||||
|
||||
type TaskDetailReq struct {
|
||||
|
@ -970,9 +976,9 @@ type HpcInfo struct {
|
|||
Environment string `json:"environment"`
|
||||
DeletedFlag int64 `json:"deleted_flag"` // 是否删除(0-否,1-是)
|
||||
CreatedBy int64 `json:"created_by"` // 创建人
|
||||
CreatedTime string `json:"created_time"` // 创建时间
|
||||
CreateTime string `json:"created_time"` // 创建时间
|
||||
UpdatedBy int64 `json:"updated_by"` // 更新人
|
||||
UpdatedTime string `json:"updated_time"` // 更新时间
|
||||
UpdateTime string `json:"updated_time"` // 更新时间
|
||||
}
|
||||
|
||||
type CloudInfo struct {
|
||||
|
@ -1114,7 +1120,17 @@ type TaskStatusResp struct {
|
|||
Succeeded int `json:"Succeeded"`
|
||||
Failed int `json:"Failed"`
|
||||
Running int `json:"Running"`
|
||||
Pause int `json:"Pause"`
|
||||
Saved int `json:"Saved"`
|
||||
}
|
||||
|
||||
type TaskDetailsResp struct {
|
||||
Name string `json:"name"`
|
||||
Description string `json:"description"`
|
||||
StartTime string `json:"startTime"`
|
||||
EndTime string `json:"endTime"`
|
||||
Strategy int64 `json:"strategy"`
|
||||
SynergyStatus int64 `json:"synergyStatus"`
|
||||
ClusterInfos []*ClusterInfo `json:"clusterInfos"`
|
||||
}
|
||||
|
||||
type CommitHpcTaskReq struct {
|
||||
|
@ -2765,6 +2781,43 @@ type Nfs struct {
|
|||
ReadOnly bool `json:"readOnly,optional"`
|
||||
}
|
||||
|
||||
type CenterOverviewResp struct {
|
||||
CenterNum int32 `json:"totalCenters,optional"`
|
||||
TaskNum int32 `json:"totalTasks,optional"`
|
||||
CardNum int32 `json:"totalCards,optional"`
|
||||
PowerInTops float64 `json:"totalPower,optional"`
|
||||
}
|
||||
|
||||
type CenterQueueingResp struct {
|
||||
Current []*CenterQueue `json:"current,optional"`
|
||||
History []*CenterQueue `json:"history,optional"`
|
||||
}
|
||||
|
||||
type CenterQueue struct {
|
||||
Name string `json:"name,optional"`
|
||||
QueueingNum int32 `json:"num,optional"`
|
||||
}
|
||||
|
||||
type CenterListResp struct {
|
||||
List []*AiCenter `json:"centerList,optional"`
|
||||
}
|
||||
|
||||
type AiCenter struct {
|
||||
Name string `json:"name,optional"`
|
||||
StackName string `json:"stack,optional"`
|
||||
Version string `json:"version,optional"`
|
||||
}
|
||||
|
||||
type CenterTaskListResp struct {
|
||||
List []*AiTask `json:"taskList,optional"`
|
||||
}
|
||||
|
||||
type AiTask struct {
|
||||
Name string `json:"name,optional"`
|
||||
Status string `json:"status,optional"`
|
||||
TimeElapsed int32 `json:"elapsed,optional"`
|
||||
}
|
||||
|
||||
type StorageScreenReq struct {
|
||||
}
|
||||
|
||||
|
@ -5347,9 +5400,9 @@ type TenantInfo struct {
|
|||
Type int64 `json:"type"` // 租户所属(0数算,1超算,2智算)
|
||||
DeletedFlag int64 `json:"deletedFlag"` // 是否删除
|
||||
CreatedBy int64 `json:"createdBy"` // 创建人
|
||||
CreatedTime string `json:"createdTime"` // 创建时间
|
||||
CreateTime string `json:"createdTime"` // 创建时间
|
||||
UpdatedBy int64 `json:"updatedBy"` // 更新人
|
||||
UpdatedTime string `json:"updated_time"` // 更新时间
|
||||
UpdateTime string `json:"updated_time"` // 更新时间
|
||||
}
|
||||
|
||||
type UpdateTenantReq struct {
|
||||
|
@ -5403,7 +5456,7 @@ type Cloud struct {
|
|||
StartTime string `json:"startTime"` // 开始时间
|
||||
RunningTime int64 `json:"runningTime"` // 运行时长
|
||||
CreatedBy int64 `json:"createdBy"` // 创建人
|
||||
CreatedTime string `json:"createdTime"` // 创建时间
|
||||
CreateTime string `json:"createdTime"` // 创建时间
|
||||
Result string `json:"result"`
|
||||
}
|
||||
|
||||
|
@ -5546,6 +5599,9 @@ type ScheduleResult struct {
|
|||
Msg string `json:"msg"`
|
||||
}
|
||||
|
||||
type ScheduleOverviewResp struct {
|
||||
}
|
||||
|
||||
type AiOption struct {
|
||||
TaskName string `json:"taskName"`
|
||||
AdapterId string `json:"adapterId"`
|
||||
|
@ -5604,6 +5660,22 @@ type AiJobLogResp struct {
|
|||
Log string `json:"log"`
|
||||
}
|
||||
|
||||
type AiTaskDb struct {
|
||||
Id string `json:"id,omitempty" db:"id"`
|
||||
TaskId string `json:"taskId,omitempty" db:"task_id"`
|
||||
AdapterId string `json:"adapterId,omitempty" db:"adapter_id"`
|
||||
ClusterId string `json:"clusterId,omitempty" db:"cluster_id"`
|
||||
Name string `json:"name,omitempty" db:"name"`
|
||||
Replica string `json:"replica,omitempty" db:"replica"`
|
||||
ClusterTaskId string `json:"clusterTaskId,omitempty" db:"c_task_id"`
|
||||
Strategy string `json:"strategy,omitempty" db:"strategy"`
|
||||
Status string `json:"status,omitempty" db:"status"`
|
||||
Msg string `json:"msg,omitempty" db:"msg"`
|
||||
CommitTime string `json:"commitTime,omitempty" db:"commit_time"`
|
||||
StartTime string `json:"startTime,omitempty" db:"start_time"`
|
||||
EndTime string `json:"endTime,omitempty" db:"end_time"`
|
||||
}
|
||||
|
||||
type CreateAlertRuleReq struct {
|
||||
CLusterId string `json:"clusterId"`
|
||||
ClusterName string `json:"clusterName"`
|
||||
|
@ -5680,3 +5752,25 @@ type AdapterInfoResp struct {
|
|||
Name string `json:"name"`
|
||||
Version string `json:"version"`
|
||||
}
|
||||
|
||||
type ScheduleSituationResp struct {
|
||||
Nodes []NodeRegion `json:"nodes"`
|
||||
Links []Link `json:"links"`
|
||||
Categories []Category `json:"categories"`
|
||||
}
|
||||
|
||||
type NodeRegion struct {
|
||||
Id string `json:"id"`
|
||||
Name string `json:"name"`
|
||||
Category int `json:"category"`
|
||||
Value int `json:"value"`
|
||||
}
|
||||
|
||||
type Link struct {
|
||||
Source string `json:"source"`
|
||||
Target string `json:"target"`
|
||||
}
|
||||
|
||||
type Category struct {
|
||||
Name string `json:"name"`
|
||||
}
|
||||
|
|
File diff suppressed because one or more lines are too long
Binary file not shown.
File diff suppressed because it is too large
Load Diff
|
@ -1,41 +1,116 @@
|
|||
|
||||
|
||||
## 1 安装部署kubekey
|
||||
通过以下的命令,可以下载 KubeKey 的最新版本。您可以更改命令中的版本号来下载特定的版本。
|
||||
|
||||
```
|
||||
```shell
|
||||
export KKZONE=cn
|
||||
curl -sfL https://get-kk.kubesphere.io | VERSION=v3.0.7 sh -
|
||||
```
|
||||
## 2 mysql部署及数据导入
|
||||
#### 卸载已有的mariadb
|
||||
`yum remove -y mariadb-server mariadb mariadb-libs`
|
||||
#### 下载对应系统版本的mysql包
|
||||
wget https://dev.mysql.com/get/Downloads/MySQL-8.0/mysql-8.0.36-1.el7.x86_64.rpm-bundle.tar
|
||||
##### 解压
|
||||
`tar -xvf mysql-8.0.36-1.el7.x86_64.rpm-bundle.tar`
|
||||
##### 安装
|
||||
```shell
|
||||
rpm -ivh mysql-community-libs-8.0.36-1.el7.x86_64.rpm
|
||||
rpm -ivh mysql-community-libs-compat-8.0.36-1.el7.x86_64.rpm
|
||||
rpm -ivh mysql-community-client-plugins-8.0.36-1.el7.x86_64.rpm
|
||||
rpm -ivh mysql-community-client-8.0.36-1.el7.x86_64.rpm
|
||||
rpm -ivh mysql-community-common-8.0.36-1.el7.x86_64.rpm
|
||||
rpm -ivh mysql-community-debuginfo-8.0.36-1.el7.x86_64.rpm
|
||||
rpm -ivh mysql-community-devel-8.0.36-1.el7.x86_64.rpm
|
||||
rpm -ivh mysql-community-server-8.0.36-1.el7.x86_64.rpm
|
||||
```
|
||||
##### 启动服务
|
||||
`systemctl start mysqld`
|
||||
##### 查看初始密码
|
||||
`grep 'temporary password' /var/log/mysqld.log`
|
||||
使用mysql -u root -p 登录
|
||||
##### 修改密码
|
||||
`ALTER USER 'root'@'localhost' IDENTIFIED BY 'Nudt!123';`
|
||||
##### 配置外部访问
|
||||
```sql
|
||||
use mysql;
|
||||
update user set host = '%' where user = 'root';
|
||||
flush privileges;
|
||||
```
|
||||
##### 创建数据库
|
||||
```sql
|
||||
create database pcm;
|
||||
create database pcm_auth;
|
||||
```
|
||||
##### 关闭防火墙
|
||||
`systemctl stop firewalld`
|
||||
##### 下载脚本
|
||||
`wget -O pcm_auth.sql https://www.gitlink.org.cn/attachments/entries/get_file?download_url=https://www.gitlink.org.cn/api/JointCloud/pcm-coordinator/raw/deploy%2Fpcm-auth.sql?ref=master`
|
||||
`wget -O pcm.sql https://www.gitlink.org.cn/attachments/entries/get_file?download_url=https://www.gitlink.org.cn/api/JointCloud/pcm-coordinator/raw/deploy%2Fpcm.sql?ref=master`
|
||||
|
||||
## 2 安装部署k8s集群
|
||||
##### 执行sql脚本导入数据
|
||||
`mysql -u root -p pcm < pcm.sql`
|
||||
`mysql -u root -p pcm_auth < pcm_auth.sql`
|
||||
|
||||
## 3 安装部署k8s集群
|
||||
```
|
||||
./kk create cluster
|
||||
export KKZONE=cn
|
||||
sudo ./kk create cluster
|
||||
```
|
||||
|
||||
执行可能会提示部分软件未安装,直接yum安装即可
|
||||

|
||||
然后重新执行创建集群命令,执行成功后可以验证环境
|
||||

|
||||
eg:
|
||||
`sudo yum install -y conntrack`
|
||||
`sudo yum install -y socat`
|
||||

|
||||
|
||||
然后重新执行创建集群命令,执行成功后可以执行kubectl get pod 验证环境
|
||||

|
||||
|
||||
|
||||
## 3 部署鉴权、pcm-coordinator、前端服务
|
||||
### 3.1 yaml文件下载
|
||||
pcm所有服务的yaml文件包下载地址在[这里](https://www.gitlink.org.cn/attachments/entries/get_file?download_url=https://www.gitlink.org.cn/api/JointCloud/pcm-coordinator/raw/deploy%2Fpcm-yaml.zip?ref=master "这里")
|
||||
或者在服务器上直接执行
|
||||
```shell
|
||||
wget -O yaml.zip https://www.gitlink.org.cn/attachments/entries/get_file?download_url=https://www.gitlink.org.cn/api/JointCloud/pcm-coordinator/raw/deploy%2Fpcm-yaml.zip?ref=master
|
||||
```
|
||||
下载完成解压
|
||||
```shell
|
||||
unzip yaml.zip
|
||||
```
|
||||
### 3.2 yaml执行完成服务、负载、配置文件的部署
|
||||
#### 修改地址
|
||||
需要修改配置文件中的数据库地址为mysql服务安装的地址
|
||||
|
||||
yaml文件下载链接:https://pan.baidu.com/s/1VU1zE2xcFkrz9Hz2MkgDaQ
|
||||
#### 一次性部署所有的文件
|
||||
```shell
|
||||
kubectl apply -f .
|
||||
```
|
||||
#### 或者单模块部署
|
||||
##### 鉴权:
|
||||
`kubectl apply -f pcm-auth.yaml`
|
||||
##### C端:
|
||||
`kubectl apply -f pcm-core-api.yaml`
|
||||
`kubectl apply -f pcm-core-rpc.yaml`
|
||||
##### 前端:
|
||||
`kubectl apply -f pcm-rip.yaml`
|
||||
|
||||
鉴权:
|
||||
kubectl apply -f pcm-auth.yaml
|
||||
C端:
|
||||
kubectl apply -f pcm-core-api.yaml
|
||||
kubectl apply -f pcm-core-rpc.yaml
|
||||
前端:
|
||||
kubectl apply -f pcm-rip.yaml
|
||||
部署情况可以通过以下命令查看
|
||||
`kubectl get pod`
|
||||

|
||||
|
||||
## 4 配置驱动器、集群信息
|
||||
此时前端服务可以通过服务器ip的31149端口访问到
|
||||
默认账号密码为admin/Nudt@123
|
||||
|
||||
新建一个适配器,配置成功后可以获取到对应的adapterId
|
||||

|
||||

|
||||
将对应的id填写到对应的P端配置信息中(configmap 内容)
|
||||

|
||||

|
||||
## 5 部署P端服务
|
||||
P端:
|
||||
### HPC服务端:
|
||||
kubectl apply -f pcm-hpc.yaml
|
||||
### kubernetes适配器:
|
||||
kubectl apply -f pcm-kubernetes.yaml
|
||||
|
||||
## 7.系统使用
|
2
go.mod
2
go.mod
|
@ -24,7 +24,7 @@ require (
|
|||
github.com/robfig/cron/v3 v3.0.1
|
||||
github.com/rs/zerolog v1.28.0
|
||||
github.com/zeromicro/go-zero v1.6.3
|
||||
gitlink.org.cn/JointCloud/pcm-ac v0.0.0-20240420083915-58d6e2958aeb
|
||||
gitlink.org.cn/JointCloud/pcm-ac v0.0.0-20240426095603-549fefd8bece
|
||||
gitlink.org.cn/JointCloud/pcm-kubernetes v0.0.0-20240301071143-347480abff2c
|
||||
gitlink.org.cn/JointCloud/pcm-octopus v0.0.0-20240424085753-6899615e9142
|
||||
gitlink.org.cn/JointCloud/pcm-openstack v0.0.0-20240403033338-e7edabad4203
|
||||
|
|
4
go.sum
4
go.sum
|
@ -1078,8 +1078,8 @@ github.com/yuin/gopher-lua v1.1.0/go.mod h1:GBR0iDaNXjAgGg9zfCvksxSRnQx76gclCIb7
|
|||
github.com/zeromicro/go-zero v1.5.1/go.mod h1:bGYm4XWsGN9GhDsO2O2BngpVoWjf3Eog2a5hUOMhlXs=
|
||||
github.com/zeromicro/go-zero v1.6.3 h1:OL0NnHD5LdRNDolfcK9vUkJt7K8TcBE3RkzfM8poOVw=
|
||||
github.com/zeromicro/go-zero v1.6.3/go.mod h1:XZL435ZxVi9MSXXtw2MRQhHgx6OoX3++MRMOE9xU70c=
|
||||
gitlink.org.cn/JointCloud/pcm-ac v0.0.0-20240420083915-58d6e2958aeb h1:k6mNEWKp+haQUaK2dWs/rI9OKgzJHY1/9KNKuBDN0Vw=
|
||||
gitlink.org.cn/JointCloud/pcm-ac v0.0.0-20240420083915-58d6e2958aeb/go.mod h1:w3Nb5TNymCItQ7K3x4Q0JLuoq9OerwAzAWT2zsPE9Xo=
|
||||
gitlink.org.cn/JointCloud/pcm-ac v0.0.0-20240426095603-549fefd8bece h1:W3yBnvAVV8dlRNQKYD6Mf8ySRrYsP0tPk7JjvqZzNHQ=
|
||||
gitlink.org.cn/JointCloud/pcm-ac v0.0.0-20240426095603-549fefd8bece/go.mod h1:w3Nb5TNymCItQ7K3x4Q0JLuoq9OerwAzAWT2zsPE9Xo=
|
||||
gitlink.org.cn/JointCloud/pcm-kubernetes v0.0.0-20240301071143-347480abff2c h1:2Wl/hvaSFjh6fmCSIQhjkr9llMRREQeqcXNLZ/HPY18=
|
||||
gitlink.org.cn/JointCloud/pcm-kubernetes v0.0.0-20240301071143-347480abff2c/go.mod h1:lSRfGs+PxFvw7CcndHWRd6UlLlGrZn0b0hp5cfaMNGw=
|
||||
gitlink.org.cn/JointCloud/pcm-octopus v0.0.0-20240424085753-6899615e9142 h1:+po0nesBDSWsgCySBG7eEXk7i9Ytd58wqvjL1M9y6d8=
|
||||
|
|
|
@ -26,4 +26,6 @@ const (
|
|||
WaitRestart = "WaitRestart"
|
||||
WaitPause = "WaitPause"
|
||||
WaitStart = "WaitStart"
|
||||
Pending = "Pending"
|
||||
Stopped = "Stopped"
|
||||
)
|
||||
|
|
|
@ -0,0 +1,3 @@
|
|||
package constants
|
||||
|
||||
const Layout = "2006-01-02 15:04:05"
|
|
@ -6,9 +6,9 @@ import (
|
|||
)
|
||||
|
||||
type BaseModel struct {
|
||||
DeletedAt gorm.DeletedAt `gorm:"index;comment:删除时间" json:"-"` // 删除时间
|
||||
CreatedBy uint `gorm:"created_by;comment:创建人" json:"createdBy"` //创建人
|
||||
CreatedTime time.Time `gorm:"comment:创建时间" json:"-"` // 创建时间
|
||||
UpdatedBy uint `gorm:"updated_by;comment:更新人" json:"UpdatedBy"` //创建人
|
||||
UpdatedTime time.Time `gorm:"comment:更新时间" json:"-"` // 更新时间
|
||||
DeletedAt gorm.DeletedAt `gorm:"index;comment:删除时间" json:"-"` // 删除时间
|
||||
CreatedBy uint `gorm:"created_by;comment:创建人" json:"createdBy"` //创建人
|
||||
CreateTime time.Time `gorm:"autoCreateTime:nano;comment:创建时间" json:"-"` // 创建时间
|
||||
UpdatedBy uint `gorm:"updated_by;comment:更新人" json:"UpdatedBy"` //创建人
|
||||
UpdateTime time.Time `gorm:"autoUpdateTime:nano;;comment:更新时间" json:"-"` // 更新时间
|
||||
}
|
||||
|
|
|
@ -6,18 +6,17 @@ import (
|
|||
)
|
||||
|
||||
type TaskCloudModel struct {
|
||||
Id uint `json:"id" gorm:"primarykey;not null;comment:id"`
|
||||
TaskId uint `json:"taskId" gorm:"not null;comment:task表id"`
|
||||
AdapterId uint `json:"adapterId" gorm:"not null;comment:适配器id"`
|
||||
ClusterId uint `json:"clusterId" gorm:"not null;comment:集群id"`
|
||||
ClusterName string `json:"clusterName" gorm:"not null;comment:集群名称"`
|
||||
Kind string `json:"kind" gorm:"comment:种类"`
|
||||
Status string `json:"status" gorm:"comment:状态"`
|
||||
StartTime time.Time `json:"startTime" gorm:"comment:开始时间"`
|
||||
YamlString string `json:"yamlString" gorm:"not null;comment:入参"`
|
||||
Result string `json:"result" gorm:"comment:运行结果"`
|
||||
Namespace string `json:"namespace" gorm:"comment:命名空间"`
|
||||
Replica int `json:"replica" gorm:"not null;comment:副本数"`
|
||||
Id uint `json:"id" gorm:"primarykey;not null;comment:id"`
|
||||
TaskId uint `json:"taskId" gorm:"not null;comment:task表id"`
|
||||
AdapterId uint `json:"adapterId" gorm:"not null;comment:适配器id"`
|
||||
ClusterId uint `json:"clusterId" gorm:"not null;comment:集群id"`
|
||||
ClusterName string `json:"clusterName" gorm:"not null;comment:集群名称"`
|
||||
Kind string `json:"kind" gorm:"comment:种类"`
|
||||
Status string `json:"status" gorm:"comment:状态"`
|
||||
StartTime *time.Time `json:"startTime,string" gorm:"comment:开始时间"`
|
||||
YamlString string `json:"yamlString" gorm:"not null;comment:入参"`
|
||||
Result string `json:"result" gorm:"comment:运行结果"`
|
||||
Namespace string `json:"namespace" gorm:"comment:命名空间"`
|
||||
base.BaseModel
|
||||
}
|
||||
|
||||
|
|
|
@ -37,9 +37,9 @@ type File struct {
|
|||
Status string `gorm:"column:status" json:"Status"` //type:string comment:hash version:2023-05-06 09:58
|
||||
DeletedFlag *int `gorm:"column:deleted_flag" json:"DeletedFlag"` //type:*int comment:是否删除 version:2023-05-06 09:58
|
||||
CreatedBy *int `gorm:"column:created_by" json:"CreatedBy"` //type:*int comment:创建人 version:2023-05-06 09:58
|
||||
CreatedTime *time.Time `gorm:"column:created_time" json:"CreatedTime"` //type:*time.Time comment:创建时间 version:2023-05-06 09:58
|
||||
CreatedTime *time.Time `gorm:"column:created_time" json:"CreateTime"` //type:*time.Time comment:创建时间 version:2023-05-06 09:58
|
||||
UpdatedBy *int `gorm:"column:updated_by" json:"UpdatedBy"` //type:*int comment:更新人 version:2023-05-06 09:58
|
||||
UpdatedTime *time.Time `gorm:"column:updated_time" json:"UpdatedTime"` //type:*time.Time comment:更新时间 version:2023-05-06 09:58
|
||||
UpdatedTime *time.Time `gorm:"column:updated_time" json:"UpdateTime"` //type:*time.Time comment:更新时间 version:2023-05-06 09:58
|
||||
}
|
||||
|
||||
// TableName 表名:data_set,。
|
||||
|
|
|
@ -0,0 +1,24 @@
|
|||
package models
|
||||
|
||||
import "github.com/zeromicro/go-zero/core/stores/sqlx"
|
||||
|
||||
var _ TaskAiModel = (*customTaskAiModel)(nil)
|
||||
|
||||
type (
|
||||
// TaskAiModel is an interface to be customized, add more methods here,
|
||||
// and implement the added methods in customTaskAiModel.
|
||||
TaskAiModel interface {
|
||||
taskAiModel
|
||||
}
|
||||
|
||||
customTaskAiModel struct {
|
||||
*defaultTaskAiModel
|
||||
}
|
||||
)
|
||||
|
||||
// NewTaskAiModel returns a model for the database table.
|
||||
func NewTaskAiModel(conn sqlx.SqlConn) TaskAiModel {
|
||||
return &customTaskAiModel{
|
||||
defaultTaskAiModel: newTaskAiModel(conn),
|
||||
}
|
||||
}
|
|
@ -0,0 +1,104 @@
|
|||
// Code generated by goctl. DO NOT EDIT.
|
||||
|
||||
package models
|
||||
|
||||
import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"fmt"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/zeromicro/go-zero/core/stores/builder"
|
||||
"github.com/zeromicro/go-zero/core/stores/sqlc"
|
||||
"github.com/zeromicro/go-zero/core/stores/sqlx"
|
||||
"github.com/zeromicro/go-zero/core/stringx"
|
||||
)
|
||||
|
||||
var (
|
||||
taskAiFieldNames = builder.RawFieldNames(&TaskAi{})
|
||||
taskAiRows = strings.Join(taskAiFieldNames, ",")
|
||||
taskAiRowsExpectAutoSet = strings.Join(stringx.Remove(taskAiFieldNames, "`id`", "`create_at`", "`create_time`", "`created_at`", "`update_at`", "`update_time`", "`updated_at`"), ",")
|
||||
taskAiRowsWithPlaceHolder = strings.Join(stringx.Remove(taskAiFieldNames, "`id`", "`create_at`", "`create_time`", "`created_at`", "`update_at`", "`update_time`", "`updated_at`"), "=?,") + "=?"
|
||||
)
|
||||
|
||||
type (
|
||||
taskAiModel interface {
|
||||
Insert(ctx context.Context, data *TaskAi) (sql.Result, error)
|
||||
FindOne(ctx context.Context, id int64) (*TaskAi, error)
|
||||
Update(ctx context.Context, data *TaskAi) error
|
||||
Delete(ctx context.Context, id int64) error
|
||||
}
|
||||
|
||||
defaultTaskAiModel struct {
|
||||
conn sqlx.SqlConn
|
||||
table string
|
||||
}
|
||||
|
||||
TaskAi struct {
|
||||
Id int64 `db:"id"` // id
|
||||
TaskId int64 `db:"task_id"` // 任务id
|
||||
AdapterId int64 `db:"adapter_id"` // 设配器id
|
||||
ClusterId int64 `db:"cluster_id"` // 集群id
|
||||
Name string `db:"name"` // 任务名
|
||||
Replica int64 `db:"replica"` // 执行数
|
||||
JobId string `db:"job_id"` // 集群返回任务id
|
||||
Strategy string `db:"strategy"` // 主任务使用策略
|
||||
Status string `db:"status"` // 任务状态
|
||||
Msg string `db:"msg"` // 集群返回任务信息
|
||||
CommitTime time.Time `db:"commit_time"` // 提交时间
|
||||
StartTime string `db:"start_time"` // 开始时间
|
||||
EndTime string `db:"end_time"` // 结束时间
|
||||
TaskType string `db:"task_type"`
|
||||
}
|
||||
)
|
||||
|
||||
func newTaskAiModel(conn sqlx.SqlConn) *defaultTaskAiModel {
|
||||
return &defaultTaskAiModel{
|
||||
conn: conn,
|
||||
table: "`task_ai`",
|
||||
}
|
||||
}
|
||||
|
||||
func (m *defaultTaskAiModel) withSession(session sqlx.Session) *defaultTaskAiModel {
|
||||
return &defaultTaskAiModel{
|
||||
conn: sqlx.NewSqlConnFromSession(session),
|
||||
table: "`task_ai`",
|
||||
}
|
||||
}
|
||||
|
||||
func (m *defaultTaskAiModel) Delete(ctx context.Context, id int64) error {
|
||||
query := fmt.Sprintf("delete from %s where `id` = ?", m.table)
|
||||
_, err := m.conn.ExecCtx(ctx, query, id)
|
||||
return err
|
||||
}
|
||||
|
||||
func (m *defaultTaskAiModel) FindOne(ctx context.Context, id int64) (*TaskAi, error) {
|
||||
query := fmt.Sprintf("select %s from %s where `id` = ? limit 1", taskAiRows, m.table)
|
||||
var resp TaskAi
|
||||
err := m.conn.QueryRowCtx(ctx, &resp, query, id)
|
||||
switch err {
|
||||
case nil:
|
||||
return &resp, nil
|
||||
case sqlc.ErrNotFound:
|
||||
return nil, ErrNotFound
|
||||
default:
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
func (m *defaultTaskAiModel) Insert(ctx context.Context, data *TaskAi) (sql.Result, error) {
|
||||
query := fmt.Sprintf("insert into %s (%s) values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)", m.table, taskAiRowsExpectAutoSet)
|
||||
ret, err := m.conn.ExecCtx(ctx, query, data.TaskId, data.AdapterId, data.ClusterId, data.Name, data.Replica, data.JobId, data.Strategy, data.Status, data.Msg, data.CommitTime, data.StartTime, data.EndTime, data.TaskType)
|
||||
return ret, err
|
||||
}
|
||||
|
||||
func (m *defaultTaskAiModel) Update(ctx context.Context, data *TaskAi) error {
|
||||
query := fmt.Sprintf("update %s set %s where `id` = ?", m.table, taskAiRowsWithPlaceHolder)
|
||||
_, err := m.conn.ExecCtx(ctx, query, data.TaskId, data.AdapterId, data.ClusterId, data.Name, data.Replica, data.JobId, data.Strategy, data.Status, data.Msg, data.CommitTime, data.StartTime, data.EndTime, data.TaskType, data.Id)
|
||||
return err
|
||||
}
|
||||
|
||||
func (m *defaultTaskAiModel) tableName() string {
|
||||
return m.table
|
||||
}
|
|
@ -35,20 +35,21 @@ type (
|
|||
}
|
||||
|
||||
Task struct {
|
||||
Id int64 `db:"id"` // id
|
||||
Name string `db:"name"` // 作业名称
|
||||
Description string `db:"description"` // 作业描述
|
||||
Status string `db:"status"` // 作业状态
|
||||
Strategy int64 `db:"strategy"` // 策略
|
||||
SynergyStatus int64 `db:"synergy_status"` // 协同状态(0-未协同、1-已协同)
|
||||
CommitTime time.Time `db:"commit_time"` // 提交时间
|
||||
StartTime string `db:"start_time"` // 开始时间
|
||||
EndTime string `db:"end_time"` // 结束运行时间
|
||||
RunningTime int64 `db:"running_time"` // 已运行时间(单位秒)
|
||||
YamlString string `db:"yaml_string"`
|
||||
Result string `db:"result"` // 作业结果
|
||||
DeletedAt gorm.DeletedAt `gorm:"index"`
|
||||
NsID string `db:"ns_id"`
|
||||
Id int64 `db:"id"` // id
|
||||
Name string `db:"name"` // 作业名称
|
||||
Description string `db:"description"` // 作业描述
|
||||
Status string `db:"status"` // 作业状态
|
||||
Strategy int64 `db:"strategy"` // 策略
|
||||
SynergyStatus int64 `db:"synergy_status"` // 协同状态(0-未协同、1-已协同)
|
||||
CommitTime time.Time `db:"commit_time"` // 提交时间
|
||||
StartTime *time.Time `db:"start_time"` // 开始时间
|
||||
EndTime *time.Time `db:"end_time"` // 结束运行时间
|
||||
RunningTime int64 `db:"running_time"` // 已运行时间(单位秒)
|
||||
YamlString string `db:"yaml_string"`
|
||||
Result string `db:"result"` // 作业结果
|
||||
DeletedAt gorm.DeletedAt `gorm:"index"`
|
||||
NsID string `db:"ns_id"`
|
||||
AdapterTypeDict int `db:"adapter_type_dict"` //任务类型(对应字典表的值)
|
||||
}
|
||||
)
|
||||
|
||||
|
|
|
@ -35,17 +35,19 @@ type (
|
|||
}
|
||||
|
||||
TClusterResource struct {
|
||||
ClusterId int64 `db:"cluster_id"`
|
||||
ClusterName string `db:"cluster_name"`
|
||||
ClusterType int64 `db:"cluster_type"` // 类型0->容器,1->智算,2->超算,3-虚拟机
|
||||
CpuAvail float64 `db:"cpu_avail"`
|
||||
CpuTotal float64 `db:"cpu_total"`
|
||||
MemAvail float64 `db:"mem_avail"`
|
||||
MemTotal float64 `db:"mem_total"`
|
||||
DiskAvail float64 `db:"disk_avail"`
|
||||
DiskTotal float64 `db:"disk_total"`
|
||||
GpuAvail float64 `db:"gpu_avail"`
|
||||
GpuTotal float64 `db:"gpu_total"`
|
||||
ClusterId int64 `db:"cluster_id"`
|
||||
ClusterName string `db:"cluster_name"`
|
||||
ClusterType int64 `db:"cluster_type"` // 类型0->容器,1->智算,2->超算,3-虚拟机
|
||||
CpuAvail float64 `db:"cpu_avail"`
|
||||
CpuTotal float64 `db:"cpu_total"`
|
||||
MemAvail float64 `db:"mem_avail"`
|
||||
MemTotal float64 `db:"mem_total"`
|
||||
DiskAvail float64 `db:"disk_avail"`
|
||||
DiskTotal float64 `db:"disk_total"`
|
||||
GpuAvail float64 `db:"gpu_avail"`
|
||||
GpuTotal float64 `db:"gpu_total"`
|
||||
CardTotal int64 `db:"card_total"` // 算力卡数量
|
||||
CardTopsTotal float64 `db:"card_tops_total"` // 算力总量tops
|
||||
}
|
||||
)
|
||||
|
||||
|
@ -56,6 +58,13 @@ func newTClusterResourceModel(conn sqlx.SqlConn) *defaultTClusterResourceModel {
|
|||
}
|
||||
}
|
||||
|
||||
func (m *defaultTClusterResourceModel) withSession(session sqlx.Session) *defaultTClusterResourceModel {
|
||||
return &defaultTClusterResourceModel{
|
||||
conn: sqlx.NewSqlConnFromSession(session),
|
||||
table: "`t_cluster_resource`",
|
||||
}
|
||||
}
|
||||
|
||||
func (m *defaultTClusterResourceModel) Delete(ctx context.Context, clusterId int64) error {
|
||||
query := fmt.Sprintf("delete from %s where `cluster_id` = ?", m.table)
|
||||
_, err := m.conn.ExecCtx(ctx, query, clusterId)
|
||||
|
@ -77,14 +86,14 @@ func (m *defaultTClusterResourceModel) FindOne(ctx context.Context, clusterId in
|
|||
}
|
||||
|
||||
func (m *defaultTClusterResourceModel) Insert(ctx context.Context, data *TClusterResource) (sql.Result, error) {
|
||||
query := fmt.Sprintf("insert into %s (%s) values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)", m.table, tClusterResourceRowsExpectAutoSet)
|
||||
ret, err := m.conn.ExecCtx(ctx, query, data.ClusterId, data.ClusterName, data.ClusterType, data.CpuAvail, data.CpuTotal, data.MemAvail, data.MemTotal, data.DiskAvail, data.DiskTotal, data.GpuAvail, data.GpuTotal)
|
||||
query := fmt.Sprintf("insert into %s (%s) values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)", m.table, tClusterResourceRowsExpectAutoSet)
|
||||
ret, err := m.conn.ExecCtx(ctx, query, data.ClusterId, data.ClusterName, data.ClusterType, data.CpuAvail, data.CpuTotal, data.MemAvail, data.MemTotal, data.DiskAvail, data.DiskTotal, data.GpuAvail, data.GpuTotal, data.CardTotal, data.CardTopsTotal)
|
||||
return ret, err
|
||||
}
|
||||
|
||||
func (m *defaultTClusterResourceModel) Update(ctx context.Context, data *TClusterResource) error {
|
||||
query := fmt.Sprintf("update %s set %s where `cluster_id` = ?", m.table, tClusterResourceRowsWithPlaceHolder)
|
||||
_, err := m.conn.ExecCtx(ctx, query, data.ClusterName, data.ClusterType, data.CpuAvail, data.CpuTotal, data.MemAvail, data.MemTotal, data.DiskAvail, data.DiskTotal, data.GpuAvail, data.GpuTotal, data.ClusterId)
|
||||
_, err := m.conn.ExecCtx(ctx, query, data.ClusterName, data.ClusterType, data.CpuAvail, data.CpuTotal, data.MemAvail, data.MemTotal, data.DiskAvail, data.DiskTotal, data.GpuAvail, data.GpuTotal, data.CardTotal, data.CardTopsTotal, data.ClusterId)
|
||||
return err
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,24 @@
|
|||
package models
|
||||
|
||||
import "github.com/zeromicro/go-zero/core/stores/sqlx"
|
||||
|
||||
var _ TClusterTaskQueueModel = (*customTClusterTaskQueueModel)(nil)
|
||||
|
||||
type (
|
||||
// TClusterTaskQueueModel is an interface to be customized, add more methods here,
|
||||
// and implement the added methods in customTClusterTaskQueueModel.
|
||||
TClusterTaskQueueModel interface {
|
||||
tClusterTaskQueueModel
|
||||
}
|
||||
|
||||
customTClusterTaskQueueModel struct {
|
||||
*defaultTClusterTaskQueueModel
|
||||
}
|
||||
)
|
||||
|
||||
// NewTClusterTaskQueueModel returns a model for the database table.
|
||||
func NewTClusterTaskQueueModel(conn sqlx.SqlConn) TClusterTaskQueueModel {
|
||||
return &customTClusterTaskQueueModel{
|
||||
defaultTClusterTaskQueueModel: newTClusterTaskQueueModel(conn),
|
||||
}
|
||||
}
|
|
@ -0,0 +1,95 @@
|
|||
// Code generated by goctl. DO NOT EDIT.
|
||||
|
||||
package models
|
||||
|
||||
import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"fmt"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/zeromicro/go-zero/core/stores/builder"
|
||||
"github.com/zeromicro/go-zero/core/stores/sqlc"
|
||||
"github.com/zeromicro/go-zero/core/stores/sqlx"
|
||||
"github.com/zeromicro/go-zero/core/stringx"
|
||||
)
|
||||
|
||||
var (
|
||||
tClusterTaskQueueFieldNames = builder.RawFieldNames(&TClusterTaskQueue{})
|
||||
tClusterTaskQueueRows = strings.Join(tClusterTaskQueueFieldNames, ",")
|
||||
tClusterTaskQueueRowsExpectAutoSet = strings.Join(stringx.Remove(tClusterTaskQueueFieldNames, "`id`", "`create_at`", "`create_time`", "`created_at`", "`update_at`", "`update_time`", "`updated_at`"), ",")
|
||||
tClusterTaskQueueRowsWithPlaceHolder = strings.Join(stringx.Remove(tClusterTaskQueueFieldNames, "`id`", "`create_at`", "`create_time`", "`created_at`", "`update_at`", "`update_time`", "`updated_at`"), "=?,") + "=?"
|
||||
)
|
||||
|
||||
type (
|
||||
tClusterTaskQueueModel interface {
|
||||
Insert(ctx context.Context, data *TClusterTaskQueue) (sql.Result, error)
|
||||
FindOne(ctx context.Context, id int64) (*TClusterTaskQueue, error)
|
||||
Update(ctx context.Context, data *TClusterTaskQueue) error
|
||||
Delete(ctx context.Context, id int64) error
|
||||
}
|
||||
|
||||
defaultTClusterTaskQueueModel struct {
|
||||
conn sqlx.SqlConn
|
||||
table string
|
||||
}
|
||||
|
||||
TClusterTaskQueue struct {
|
||||
Id int64 `db:"id"` // id
|
||||
AdapterId int64 `db:"adapter_id"` // 适配器id
|
||||
ClusterId int64 `db:"cluster_id"` // 集群id
|
||||
QueueNum int64 `db:"queue_num"` // 任务排队数量
|
||||
Date time.Time `db:"date"`
|
||||
}
|
||||
)
|
||||
|
||||
func newTClusterTaskQueueModel(conn sqlx.SqlConn) *defaultTClusterTaskQueueModel {
|
||||
return &defaultTClusterTaskQueueModel{
|
||||
conn: conn,
|
||||
table: "`t_cluster_task_queue`",
|
||||
}
|
||||
}
|
||||
|
||||
func (m *defaultTClusterTaskQueueModel) withSession(session sqlx.Session) *defaultTClusterTaskQueueModel {
|
||||
return &defaultTClusterTaskQueueModel{
|
||||
conn: sqlx.NewSqlConnFromSession(session),
|
||||
table: "`t_cluster_task_queue`",
|
||||
}
|
||||
}
|
||||
|
||||
func (m *defaultTClusterTaskQueueModel) Delete(ctx context.Context, id int64) error {
|
||||
query := fmt.Sprintf("delete from %s where `id` = ?", m.table)
|
||||
_, err := m.conn.ExecCtx(ctx, query, id)
|
||||
return err
|
||||
}
|
||||
|
||||
func (m *defaultTClusterTaskQueueModel) FindOne(ctx context.Context, id int64) (*TClusterTaskQueue, error) {
|
||||
query := fmt.Sprintf("select %s from %s where `id` = ? limit 1", tClusterTaskQueueRows, m.table)
|
||||
var resp TClusterTaskQueue
|
||||
err := m.conn.QueryRowCtx(ctx, &resp, query, id)
|
||||
switch err {
|
||||
case nil:
|
||||
return &resp, nil
|
||||
case sqlc.ErrNotFound:
|
||||
return nil, ErrNotFound
|
||||
default:
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
func (m *defaultTClusterTaskQueueModel) Insert(ctx context.Context, data *TClusterTaskQueue) (sql.Result, error) {
|
||||
query := fmt.Sprintf("insert into %s (%s) values (?, ?, ?, ?)", m.table, tClusterTaskQueueRowsExpectAutoSet)
|
||||
ret, err := m.conn.ExecCtx(ctx, query, data.AdapterId, data.ClusterId, data.QueueNum, data.Date)
|
||||
return ret, err
|
||||
}
|
||||
|
||||
func (m *defaultTClusterTaskQueueModel) Update(ctx context.Context, data *TClusterTaskQueue) error {
|
||||
query := fmt.Sprintf("update %s set %s where `id` = ?", m.table, tClusterTaskQueueRowsWithPlaceHolder)
|
||||
_, err := m.conn.ExecCtx(ctx, query, data.AdapterId, data.ClusterId, data.QueueNum, data.Date, data.Id)
|
||||
return err
|
||||
}
|
||||
|
||||
func (m *defaultTClusterTaskQueueModel) tableName() string {
|
||||
return m.table
|
||||
}
|
|
@ -41,3 +41,8 @@ func GenSnowflakeID() int64 {
|
|||
func GenSnowflakeIDStr() string {
|
||||
return node.Generate().String()
|
||||
}
|
||||
|
||||
// GenSnowflakeIDStr 工作id
|
||||
func GenSnowflakeIDUint() uint {
|
||||
return uint(node.Generate().Int64())
|
||||
}
|
||||
|
|
|
@ -19,7 +19,9 @@ import (
|
|||
)
|
||||
|
||||
var timeTemplates = []string{
|
||||
"2006-01-02 15:04:05", //常规类型
|
||||
"2006-01-02T15:04:05Z07:00", //RFC3339
|
||||
"2006-01-02 15:04:05", //常规类型
|
||||
"2006/01/02T15:04:05Z07:00", //RFC3339
|
||||
"2006/01/02 15:04:05",
|
||||
"2006-01-02",
|
||||
"2006/01/02",
|
||||
|
|
Loading…
Reference in New Issue