Merge remote-tracking branch 'upstream/master' into upmaster_wq

# Conflicts:
#	api/desc/pcm.api
#	api/internal/handler/routes.go


Former-commit-id: e8916b1c9f9f0f56463a8395bae80bb159c0d065
This commit is contained in:
qiwang 2024-05-09 15:16:23 +08:00
commit a57e0b7ba8
69 changed files with 6457 additions and 530 deletions

View File

@ -111,18 +111,17 @@ type HpcInfo struct {
} }
type CloudInfo struct { type CloudInfo struct {
Participant int64 `json:"participant,omitempty"` Id uint `json:"id,omitempty,optional"`
Id int64 `json:"id,omitempty"` TaskId int64 `json:"taskId,omitempty,optional"`
TaskId int64 `json:"taskId,omitempty"` AdapterId uint `json:"adapterId,omitempty,optional"`
ApiVersion string `json:"apiVersion,omitempty"` ClusterId uint `json:"clusterId,omitempty,optional"`
Kind string `json:"kind,omitempty"` ClusterName string `json:"clusterName,omitempty,optional"`
Namespace string `json:"namespace,omitempty"` Kind string `json:"kind,omitempty,optional"`
Name string `json:"name,omitempty"` Status string `json:"status,omitempty,optional"`
Status string `json:"status,omitempty"` StartTime *time.Time `json:"startTime,omitempty,optional,string"`
StartTime string `json:"startTime,omitempty"` YamlString string `json:"yamlString,omitempty,optional"`
RunningTime int64 `json:"runningTime,omitempty"` Result string `json:"result,omitempty,optional"`
Result string `json:"result,omitempty"` Namespace string `json:"namespace,omitempty,optional"`
YamlString string `json:"yamlString,omitempty"`
} }
type AiInfo struct { type AiInfo struct {

View File

@ -1697,6 +1697,44 @@ PayloadCreateTrainJob{
jobId string `json:"jobId,optional"` jobId string `json:"jobId,optional"`
} }
********************/ ********************/
/******************Ai Center overview*************************/
CenterOverviewResp {
CenterNum int32 `json:"totalCenters,optional"`
TaskNum int32 `json:"totalTasks,optional"`
CardNum int32 `json:"totalCards,optional"`
PowerInTops float64 `json:"totalPower,optional"`
}
CenterQueueingResp {
Current []*CenterQueue `json:"current,optional"`
History []*CenterQueue `json:"history,optional"`
}
CenterQueue {
Name string `json:"name,optional"`
QueueingNum int32 `json:"num,optional"`
}
CenterListResp {
List []*AiCenter `json:"centerList,optional"`
}
AiCenter {
Name string `json:"name,optional"`
StackName string `json:"stack,optional"`
Version string `json:"version,optional"`
}
CenterTaskListResp {
List []*AiTask `json:"taskList,optional"`
}
AiTask {
Name string `json:"name,optional"`
status string `json:"status,optional"`
TimeElapsed int32 `json:"elapsed,optional"`
}
) )
/******************create TrainIngJob end*************************/ /******************create TrainIngJob end*************************/

View File

@ -59,9 +59,9 @@ type (
Type int64 `json:"type"` // 租户所属(0数算1超算2智算 Type int64 `json:"type"` // 租户所属(0数算1超算2智算
DeletedFlag int64 `json:"deletedFlag"` // 是否删除 DeletedFlag int64 `json:"deletedFlag"` // 是否删除
CreatedBy int64 `json:"createdBy"` // 创建人 CreatedBy int64 `json:"createdBy"` // 创建人
CreatedTime string `json:"createdTime"` // 创建时间 CreateTime string `json:"createdTime"` // 创建时间
UpdatedBy int64 `json:"updatedBy"` // 更新人 UpdatedBy int64 `json:"updatedBy"` // 更新人
UpdatedTime string `json:"updated_time"` // 更新时间 UpdateTime string `json:"updated_time"` // 更新时间
} }
UpdateTenantReq { UpdateTenantReq {
@ -115,6 +115,6 @@ type Cloud {
StartTime string `json:"startTime"` // 开始时间 StartTime string `json:"startTime"` // 开始时间
RunningTime int64 `json:"runningTime"` // 运行时长 RunningTime int64 `json:"runningTime"` // 运行时长
CreatedBy int64 `json:"createdBy"` // 创建人 CreatedBy int64 `json:"createdBy"` // 创建人
CreatedTime string `json:"createdTime"` // 创建时间 CreateTime string `json:"createdTime"` // 创建时间
Result string `json:"result"` Result string `json:"result"`
} }

View File

@ -158,23 +158,13 @@ type (
type ( type (
GeneralTaskReq { GeneralTaskReq {
Name string `json:"name"` Name string `json:"name"`
ComputeType string `json:"computeType"` AdapterIds []string `json:"adapterIds"`
TemplateId string `json:"templateId"` ClusterIds []string `json:"clusterIds"`
AdapterId string `json:"adapterId"` Strategy string `json:"strategy"`
ClusterIds []string `json:"clusterIds"` StaticWeightMap map[string]int32 `json:"staticWeightMap,optional"`
Strategy Strategy `json:"strategy"`
ReqBody []string `json:"reqBody"` ReqBody []string `json:"reqBody"`
} Replicas int64 `json:"replicas,string"`
Strategy {
Name string `json:"name"`
StaticWeightList []StaticWeightList `json:"staticWeightList"`
}
StaticWeightList {
ClusterName string `json:"clusterName"`
Weight int `json:"weight"`
} }
) )
@ -210,42 +200,116 @@ type (
} }
) )
type ( type (
commitVmTaskReq { commitVmTaskReq {
Name string `json:"name"` // Name string `json:"name"`
NsID string `json:"nsID"` // NsID string `json:"nsID"`
Replicas int64 `json:"replicas,optional"` // Replicas int64 `json:"replicas,optional"`
MatchLabels map[string]string `json:"matchLabels,optional"` // MatchLabels map[string]string `json:"matchLabels,optional"`
AdapterId string `json:"adapterId,optional"` // AdapterId string `json:"adapterId,optional"`
ClusterType string `json:"clusterType,optional"` // ClusterType string `json:"clusterType,optional"`
//Virtual Machine Section // //Virtual Machine Section
CreateMulServer []CreateMulDomainServer `json:"createMulServer,optional"` CreateMulServer []CreateMulDomainServer `json:"createMulServer,optional"`
VmOption *VmOption `json:"vmOption,optional"`
} }
VmOption {
AdapterId string `json:"adapterId"`
VmClusterIds []string `json:"vmClusterIds"`
Replicas int64 `json:"replicas,optional"`
Name string `json:"name"`
//ResourceType string `json:"resourceType"`
//TaskType string `json:"taskType"`
Strategy string `json:"strategy"`
ClusterToStaticWeight map[string]int32 `json:"clusterToStaticWeight"`
MatchLabels map[string]string `json:"matchLabels,optional"`
StaticWeightMap map[string]int32 `json:"staticWeightMap,optional"`
CreateMulServer []CreateMulDomainServer `json:"createMulServer,optional"`
// Id int64 `json:"id"`
// ParticipantId int64 `json:"participantId"`
// TaskId int64 `json:"taskId"`
// AdapterId int64 `json:"adapterId"`
// ClusterId int64 `json:"clusterId"`
// FlavorRef string `json:"flavorRef"`
// ImageRef string `json:"imageRef"`
// Status string `json:"status"`
// Platform string `json:"platform"`
// Description string `json:"description"`
// AvailabilityZone string `json:"availabilityZone"`
// MinCount int64 `json:"minCount"`
// Uuid string `json:"uuid"`
// StartTime string `json:"startTime"`
// RunningTime string `json:"runningTime"`
// Result string `json:"result"`
// DeletedAt string `json:"deletedAt"`
}
CreateMulDomainServer { CreateMulDomainServer {
Platform string `json:"platform,optional"` Platform string `json:"platform,optional"`
Name string `json:"name,optional"` name string `json:"name,optional"`
Min_count int64 `json:"min_count,optional"` min_count int64 `json:"min_count,optional"`
ImageRef string `json:"imageRef,optional"` imageRef string `json:"imageRef,optional"`
FlavorRef string `json:"flavorRef,optional"` flavorRef string `json:"flavorRef,optional"`
Uuid string `json:"uuid,optional"` uuid string `json:"uuid,optional"`
ClusterId string `json:"clusterId,optional"`
} }
commitVmTaskResp { commitVmTaskResp {
// VmTask []VmTask `json:"vmTask" copier:"VmTask"`
TaskId int64 `json:"taskId"`
Code int32 `json:"code"` Code int32 `json:"code"`
Msg string `json:"msg"` Msg string `json:"msg"`
} }
VmTask { ScheduleVmResult struct {
Id string `json:"id" copier:"Id"` ClusterId string `json:"clusterId"`
Links []VmLinks `json:"links" copier:"Links"` TaskId string `json:"taskId"`
OSDCFDiskConfig string `json:"OS_DCF_diskConfig" copier:"OSDCFDiskConfig"` Strategy string `json:"strategy"`
SecurityGroups []VmSecurity_groups_server `json:"security_groups" copier:"SecurityGroups"` Replica int32 `json:"replica"`
AdminPass string `json:"adminPass" copier:"AdminPass"` Msg string `json:"msg"`
}
VmTask{
Id string `json:"id" copier:"Id"`
Links []VmLinks `json:"links" copier:"Links"`
OSDCFDiskConfig string `json:"OS_DCF_diskConfig" copier:"OSDCFDiskConfig"`
SecurityGroups []VmSecurity_groups_server `json:"security_groups" copier:"SecurityGroups"`
AdminPass string `json:"adminPass" copier:"AdminPass"`
} }
VmLinks { VmLinks {
Href string `json:"href " copier:"Href"` Href string `json:"href " copier:"Href"`
Rel string `json:"rel" copier:"Rel"` Rel string `json:"rel" copier:"Rel"`
} }
// commitVmTaskReq {
// Name string `json:"name"`
// NsID string `json:"nsID"`
// Replicas int64 `json:"replicas,optional"`
// MatchLabels map[string]string `json:"matchLabels,optional"`
// AdapterId string `json:"adapterId,optional"`
// ClusterType string `json:"clusterType,optional"`
// //Virtual Machine Section
// CreateMulServer []CreateMulDomainServer `json:"createMulServer,optional"`
// }
// CreateMulDomainServer {
// Platform string `json:"platform,optional"`
// Name string `json:"name,optional"`
// Min_count int64 `json:"min_count,optional"`
// ImageRef string `json:"imageRef,optional"`
// FlavorRef string `json:"flavorRef,optional"`
// Uuid string `json:"uuid,optional"`
// }
// commitVmTaskResp {
// // VmTask []VmTask `json:"vmTask" copier:"VmTask"`
// TaskId int64 `json:"taskId"`
// Code int32 `json:"code"`
// Msg string `json:"msg"`
// }
// VmTask {
// Id string `json:"id" copier:"Id"`
// Links []VmLinks `json:"links" copier:"Links"`
// OSDCFDiskConfig string `json:"OS_DCF_diskConfig" copier:"OSDCFDiskConfig"`
// SecurityGroups []VmSecurity_groups_server `json:"security_groups" copier:"SecurityGroups"`
// AdminPass string `json:"adminPass" copier:"AdminPass"`
// }
// VmLinks {
// Href string `json:"href " copier:"Href"`
// Rel string `json:"rel" copier:"Rel"`
// }
VmSecurity_groups_server { VmSecurity_groups_server {
Name string `json:"name" copier:"Name"` Name string `json:"name" copier:"Name"`
@ -320,7 +384,7 @@ type (
} }
TaskModel { TaskModel {
Id int64 `json:"id,omitempty" db:"id"` // id Id int64 `json:"id,omitempty,string" db:"id"` // id
Name string `json:"name,omitempty" db:"name"` // 作业名称 Name string `json:"name,omitempty" db:"name"` // 作业名称
Description string `json:"description,omitempty" db:"description"` // 作业描述 Description string `json:"description,omitempty" db:"description"` // 作业描述
Status string `json:"status,omitempty" db:"status"` // 作业状态 Status string `json:"status,omitempty" db:"status"` // 作业状态
@ -336,6 +400,7 @@ type (
NsID string `json:"nsId,omitempty" db:"ns_id"` NsID string `json:"nsId,omitempty" db:"ns_id"`
TenantId string `json:"tenantId,omitempty" db:"tenant_id"` TenantId string `json:"tenantId,omitempty" db:"tenant_id"`
CreateTime string `json:"createTime,omitempty" db:"create_time" gorm:"autoCreateTime"` CreateTime string `json:"createTime,omitempty" db:"create_time" gorm:"autoCreateTime"`
AdapterTypeDict int `json:"adapterTypeDict" db:"create_time" gorm:"adapter_type_dict"` //任务类型(对应字典表的值
} }
) )
@ -1004,9 +1069,9 @@ type (
Environment string `json:"environment"` Environment string `json:"environment"`
DeletedFlag int64 `json:"deleted_flag"` // 是否删除0-否1-是) DeletedFlag int64 `json:"deleted_flag"` // 是否删除0-否1-是)
CreatedBy int64 `json:"created_by"` // 创建人 CreatedBy int64 `json:"created_by"` // 创建人
CreatedTime string `json:"created_time"` // 创建时间 CreateTime string `json:"created_time"` // 创建时间
UpdatedBy int64 `json:"updated_by"` // 更新人 UpdatedBy int64 `json:"updated_by"` // 更新人
UpdatedTime string `json:"updated_time"` // 更新时间 UpdateTime string `json:"updated_time"` // 更新时间
} }
CloudInfo { CloudInfo {
@ -1155,5 +1220,15 @@ type TaskStatusResp {
Succeeded int `json:"Succeeded"` Succeeded int `json:"Succeeded"`
Failed int `json:"Failed"` Failed int `json:"Failed"`
Running int `json:"Running"` Running int `json:"Running"`
Pause int `json:"Pause"` Saved int `json:"Saved"`
}
type TaskDetailsResp {
Name string `json:"name"`
description string `json:"description"`
StartTime string `json:"startTime"`
EndTime string `json:"endTime"`
Strategy int64 `json:"strategy"`
SynergyStatus int64 `json:"synergyStatus"`
ClusterInfos []*ClusterInfo `json:"clusterInfos"`
} }

View File

@ -80,4 +80,29 @@ type (
name string `json:"name"` name string `json:"name"`
version string `json:"version"` version string `json:"version"`
} }
)
type (
scheduleSituationResp{
nodes []NodeRegion `json:"nodes"`
links []Link `json:"links"`
categories []Category `json:"categories"`
}
NodeRegion{
id string `json:"id"`
name string `json:"name"`
category int `json:"category"`
value int `json:"value"`
}
Link{
source string `json:"source"`
target string `json:"target"`
}
Category{
name string `json:"name"`
}
) )

View File

@ -142,6 +142,10 @@ service pcm {
@handler homeOverviewHandler @handler homeOverviewHandler
get /core/homeOverview (HomeOverviewReq) returns (HomeOverviewResp) get /core/homeOverview (HomeOverviewReq) returns (HomeOverviewResp)
@doc "task details"
@handler taskDetails
get /core/task/details (FId) returns(TaskDetailsResp)
@doc "Get Public Image" @doc "Get Public Image"
@handler getPublicImageHandler @handler getPublicImageHandler
get /core/getPublicImage (PublicImageReq) returns (PublicImageResp) get /core/getPublicImage (PublicImageReq) returns (PublicImageResp)
@ -226,7 +230,7 @@ service pcm {
@doc "Create cloud computing common tasks" @doc "Create cloud computing common tasks"
@handler commitGeneralTask @handler commitGeneralTask
post /cloud/task/create (GeneralTaskReq) returns() post /cloud/task/create (GeneralTaskReq) returns ()
} }
//智算二级接口 //智算二级接口
@ -235,6 +239,22 @@ service pcm {
group: ai group: ai
) )
service pcm { service pcm {
@doc "智算中心概览"
@handler getCenterOverviewHandler
get /ai/getCenterOverview returns (CenterOverviewResp)
@doc "智算中心排队状况"
@handler getCenterQueueingHandler
get /ai/getCenterQueueing returns (CenterQueueingResp)
@doc "智算中心列表"
@handler getCenterListHandler
get /ai/getCenterList returns (CenterListResp)
@doc "智算中心任务列表"
@handler getCenterTaskListHandler
get /ai/getCenterTaskList returns (CenterTaskListResp)
@doc "查询数据集列表" @doc "查询数据集列表"
@handler listDataSetHandler @handler listDataSetHandler
get /ai/listDataSet/:projectId (DataSetReq) returns (DataSetResp) get /ai/listDataSet/:projectId (DataSetReq) returns (DataSetResp)
@ -938,8 +958,14 @@ service pcm {
@handler ScheduleGetAlgorithmsHandler @handler ScheduleGetAlgorithmsHandler
get /schedule/ai/getAlgorithms/:adapterId/:resourceType/:taskType/:dataset (AiAlgorithmsReq) returns (AiAlgorithmsResp) get /schedule/ai/getAlgorithms/:adapterId/:resourceType/:taskType/:dataset (AiAlgorithmsReq) returns (AiAlgorithmsResp)
@handler ScheduleGetAiJobLogLogHandler
get /schedule/ai/getJobLog/:adapterId/:clusterId/:taskId/:instanceNum (AiJobLogReq) returns (AiJobLogResp)
@handler ScheduleSubmitHandler @handler ScheduleSubmitHandler
post /schedule/submit (ScheduleReq) returns (ScheduleResp) post /schedule/submit (ScheduleReq) returns (ScheduleResp)
@handler ScheduleGetOverviewHandler
post /schedule/getOverview returns (ScheduleOverviewResp)
} }
@server( @server(
@ -991,7 +1017,7 @@ service pcm {
@doc "alert rules" @doc "alert rules"
@handler alertRulesHandler @handler alertRulesHandler
get /monitoring/alert/rule (AlertRulesReq)returns (AlertRulesResp) get /monitoring/alert/rule (AlertRulesReq) returns (AlertRulesResp)
@doc "cluster resource load" @doc "cluster resource load"
@handler clustersLoadHandler @handler clustersLoadHandler
@ -1007,5 +1033,14 @@ service pcm {
@doc "Synchronize Cluster alert Information" @doc "Synchronize Cluster alert Information"
@handler syncClusterAlertHandler @handler syncClusterAlertHandler
post /core/syncClusterAlert (SyncClusterAlertReq) post /monitoring/syncClusterAlert (SyncClusterAlertReq)
@handler taskNumHandler
get /monitoring/task/num (taskNumReq) returns (taskNumResp)
@handler adapterInfoHandler
get /monitoring/adapter/info (adapterInfoReq) returns (adapterInfoResp)
@handler scheduleSituationHandler
get /monitoring/schedule/situation returns (scheduleSituationResp)
} }

View File

@ -24,6 +24,9 @@ type (
Msg string `json:"msg"` Msg string `json:"msg"`
} }
ScheduleOverviewResp {
}
AiOption { AiOption {
TaskName string `json:"taskName"` TaskName string `json:"taskName"`
AdapterId string `json:"adapterId"` AdapterId string `json:"adapterId"`
@ -81,4 +84,20 @@ type (
AiJobLogResp { AiJobLogResp {
Log string `json:"log"` Log string `json:"log"`
} }
AiTaskDb {
Id string `json:"id,omitempty" db:"id"`
TaskId string `json:"taskId,omitempty" db:"task_id"`
AdapterId string `json:"adapterId,omitempty" db:"adapter_id"`
ClusterId string `json:"clusterId,omitempty" db:"cluster_id"`
Name string `json:"name,omitempty" db:"name"`
Replica string `json:"replica,omitempty" db:"replica"`
ClusterTaskId string `json:"clusterTaskId,omitempty" db:"c_task_id"`
Strategy string `json:"strategy,omitempty" db:"strategy"`
Status string `json:"status,omitempty" db:"status"`
Msg string `json:"msg,omitempty" db:"msg"`
CommitTime string `json:"commitTime,omitempty" db:"commit_time"`
StartTime string `json:"startTime,omitempty" db:"start_time"`
EndTime string `json:"endTime,omitempty" db:"end_time"`
}
) )

View File

@ -6,7 +6,7 @@ Timeout: 50000
DB: DB:
DataSource: root:uJpLd6u-J?HC1@(10.206.0.12:3306)/pcm?parseTime=true&loc=Local DataSource: root:uJpLd6u-J?HC1@(10.206.0.12:3306)/pcm?parseTime=true&loc=Local
# DataSource: root:uJpLd6u-J?HC1@(47.92.88.143:3306)/pcm?parseTime=true&loc=Local # DataSource: root:uJpLd6u-J?HC1@(47.92.88.143:3306)/pcm?parseTime=true&loc=Local
Redis: Redis:
Host: 10.206.0.12:6379 Host: 10.206.0.12:6379
Pass: redisPW123 Pass: redisPW123

View File

@ -0,0 +1,21 @@
package ai
import (
"net/http"
"github.com/zeromicro/go-zero/rest/httpx"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/logic/ai"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
)
func GetCenterListHandler(svcCtx *svc.ServiceContext) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
l := ai.NewGetCenterListLogic(r.Context(), svcCtx)
resp, err := l.GetCenterList()
if err != nil {
httpx.ErrorCtx(r.Context(), w, err)
} else {
httpx.OkJsonCtx(r.Context(), w, resp)
}
}
}

View File

@ -0,0 +1,21 @@
package ai
import (
"net/http"
"github.com/zeromicro/go-zero/rest/httpx"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/logic/ai"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
)
func GetCenterOverviewHandler(svcCtx *svc.ServiceContext) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
l := ai.NewGetCenterOverviewLogic(r.Context(), svcCtx)
resp, err := l.GetCenterOverview()
if err != nil {
httpx.ErrorCtx(r.Context(), w, err)
} else {
httpx.OkJsonCtx(r.Context(), w, resp)
}
}
}

View File

@ -0,0 +1,21 @@
package ai
import (
"net/http"
"github.com/zeromicro/go-zero/rest/httpx"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/logic/ai"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
)
func GetCenterQueueingHandler(svcCtx *svc.ServiceContext) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
l := ai.NewGetCenterQueueingLogic(r.Context(), svcCtx)
resp, err := l.GetCenterQueueing()
if err != nil {
httpx.ErrorCtx(r.Context(), w, err)
} else {
httpx.OkJsonCtx(r.Context(), w, resp)
}
}
}

View File

@ -0,0 +1,21 @@
package ai
import (
"net/http"
"github.com/zeromicro/go-zero/rest/httpx"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/logic/ai"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
)
func GetCenterTaskListHandler(svcCtx *svc.ServiceContext) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
l := ai.NewGetCenterTaskListLogic(r.Context(), svcCtx)
resp, err := l.GetCenterTaskList()
if err != nil {
httpx.ErrorCtx(r.Context(), w, err)
} else {
httpx.OkJsonCtx(r.Context(), w, resp)
}
}
}

View File

@ -0,0 +1,24 @@
package core
import (
"github.com/zeromicro/go-zero/rest/httpx"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/logic/core"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types"
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/repository/result"
"net/http"
)
func TaskDetailsHandler(svcCtx *svc.ServiceContext) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
var req types.FId
if err := httpx.Parse(r, &req); err != nil {
result.ParamErrorResult(r, w, err)
return
}
l := core.NewTaskDetailsLogic(r.Context(), svcCtx)
resp, err := l.TaskDetails(&req)
result.HttpResult(r, w, resp, err)
}
}

View File

@ -0,0 +1,17 @@
package monitoring
import (
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/repository/result"
"net/http"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/logic/monitoring"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
)
func ScheduleSituationHandler(svcCtx *svc.ServiceContext) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
l := monitoring.NewScheduleSituationLogic(r.Context(), svcCtx)
resp, err := l.ScheduleSituation()
result.HttpResult(r, w, resp, err)
}
}

View File

@ -170,6 +170,11 @@ func RegisterHandlers(server *rest.Server, serverCtx *svc.ServiceContext) {
Path: "/core/homeOverview", Path: "/core/homeOverview",
Handler: core.HomeOverviewHandler(serverCtx), Handler: core.HomeOverviewHandler(serverCtx),
}, },
{
Method: http.MethodGet,
Path: "/core/task/details",
Handler: core.TaskDetailsHandler(serverCtx),
},
{ {
Method: http.MethodGet, Method: http.MethodGet,
Path: "/core/getPublicImage", Path: "/core/getPublicImage",
@ -278,6 +283,26 @@ func RegisterHandlers(server *rest.Server, serverCtx *svc.ServiceContext) {
server.AddRoutes( server.AddRoutes(
[]rest.Route{ []rest.Route{
{
Method: http.MethodGet,
Path: "/ai/getCenterOverview",
Handler: ai.GetCenterOverviewHandler(serverCtx),
},
{
Method: http.MethodGet,
Path: "/ai/getCenterQueueing",
Handler: ai.GetCenterQueueingHandler(serverCtx),
},
{
Method: http.MethodGet,
Path: "/ai/getCenterList",
Handler: ai.GetCenterListHandler(serverCtx),
},
{
Method: http.MethodGet,
Path: "/ai/getCenterTaskList",
Handler: ai.GetCenterTaskListHandler(serverCtx),
},
{ {
Method: http.MethodGet, Method: http.MethodGet,
Path: "/ai/listDataSet/:projectId", Path: "/ai/listDataSet/:projectId",
@ -1170,6 +1195,11 @@ func RegisterHandlers(server *rest.Server, serverCtx *svc.ServiceContext) {
Path: "/schedule/submit", Path: "/schedule/submit",
Handler: schedule.ScheduleSubmitHandler(serverCtx), Handler: schedule.ScheduleSubmitHandler(serverCtx),
}, },
{
Method: http.MethodPost,
Path: "/schedule/getOverview",
Handler: schedule.ScheduleGetOverviewHandler(serverCtx),
},
}, },
rest.WithPrefix("/pcm/v1"), rest.WithPrefix("/pcm/v1"),
) )
@ -1267,6 +1297,21 @@ func RegisterHandlers(server *rest.Server, serverCtx *svc.ServiceContext) {
Path: "/core/syncClusterAlert", Path: "/core/syncClusterAlert",
Handler: monitoring.SyncClusterAlertHandler(serverCtx), Handler: monitoring.SyncClusterAlertHandler(serverCtx),
}, },
{
Method: http.MethodGet,
Path: "/monitoring/task/num",
Handler: monitoring.TaskNumHandler(serverCtx),
},
{
Method: http.MethodGet,
Path: "/monitoring/adapter/info",
Handler: monitoring.AdapterInfoHandler(serverCtx),
},
{
Method: http.MethodGet,
Path: "/monitoring/schedule/situation",
Handler: monitoring.ScheduleSituationHandler(serverCtx),
},
}, },
rest.WithPrefix("/pcm/v1"), rest.WithPrefix("/pcm/v1"),
) )

View File

@ -0,0 +1,21 @@
package schedule
import (
"net/http"
"github.com/zeromicro/go-zero/rest/httpx"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/logic/schedule"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
)
func ScheduleGetOverviewHandler(svcCtx *svc.ServiceContext) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
l := schedule.NewScheduleGetOverviewLogic(r.Context(), svcCtx)
resp, err := l.ScheduleGetOverview()
if err != nil {
httpx.ErrorCtx(r.Context(), w, err)
} else {
httpx.OkJsonCtx(r.Context(), w, resp)
}
}
}

View File

@ -0,0 +1,43 @@
package ai
import (
"context"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types"
"github.com/zeromicro/go-zero/core/logx"
)
type GetCenterListLogic struct {
logx.Logger
ctx context.Context
svcCtx *svc.ServiceContext
}
func NewGetCenterListLogic(ctx context.Context, svcCtx *svc.ServiceContext) *GetCenterListLogic {
return &GetCenterListLogic{
Logger: logx.WithContext(ctx),
ctx: ctx,
svcCtx: svcCtx,
}
}
func (l *GetCenterListLogic) GetCenterList() (resp *types.CenterListResp, err error) {
resp = &types.CenterListResp{}
adapterList, err := l.svcCtx.Scheduler.AiStorages.GetAdaptersByType("1")
if err != nil {
return nil, err
}
for _, adapter := range adapterList {
a := &types.AiCenter{
Name: adapter.Name,
StackName: adapter.Nickname,
Version: adapter.Version,
}
resp.List = append(resp.List, a)
}
return resp, nil
}

View File

@ -0,0 +1,139 @@
package ai
import (
"context"
"github.com/zeromicro/go-zero/core/logx"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types"
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/models"
"strconv"
"sync"
)
type GetCenterOverviewLogic struct {
logx.Logger
ctx context.Context
svcCtx *svc.ServiceContext
}
func NewGetCenterOverviewLogic(ctx context.Context, svcCtx *svc.ServiceContext) *GetCenterOverviewLogic {
return &GetCenterOverviewLogic{
Logger: logx.WithContext(ctx),
ctx: ctx,
svcCtx: svcCtx,
}
}
func (l *GetCenterOverviewLogic) GetCenterOverview() (resp *types.CenterOverviewResp, err error) {
resp = &types.CenterOverviewResp{}
var mu sync.RWMutex
ch := make(chan struct{})
var centerNum int32
var taskNum int32
var cardNum int32
var totalTops float64
adapterList, err := l.svcCtx.Scheduler.AiStorages.GetAdaptersByType("1")
if err != nil {
return nil, err
}
centerNum = int32(len(adapterList))
resp.CenterNum = centerNum
go l.updateClusterResource(&mu, ch, adapterList)
for _, adapter := range adapterList {
taskList, err := l.svcCtx.Scheduler.AiStorages.GetAiTasksByAdapterId(adapter.Id)
if err != nil {
continue
}
taskNum += int32(len(taskList))
}
resp.TaskNum = taskNum
for _, adapter := range adapterList {
clusters, err := l.svcCtx.Scheduler.AiStorages.GetClustersByAdapterId(adapter.Id)
if err != nil {
continue
}
for _, cluster := range clusters.List {
mu.RLock()
clusterResource, err := l.svcCtx.Scheduler.AiStorages.GetClusterResourcesById(cluster.Id)
mu.RUnlock()
if err != nil {
continue
}
cardNum += int32(clusterResource.CardTotal)
totalTops += clusterResource.CardTopsTotal
}
}
resp.CardNum = cardNum
resp.PowerInTops = totalTops
<-ch
return resp, nil
}
func (l *GetCenterOverviewLogic) updateClusterResource(mu *sync.RWMutex, ch chan<- struct{}, list []*types.AdapterInfo) {
var wg sync.WaitGroup
for _, adapter := range list {
clusters, err := l.svcCtx.Scheduler.AiStorages.GetClustersByAdapterId(adapter.Id)
if err != nil {
continue
}
for _, cluster := range clusters.List {
c := cluster
mu.RLock()
clusterResource, err := l.svcCtx.Scheduler.AiStorages.GetClusterResourcesById(c.Id)
mu.RUnlock()
if err != nil {
continue
}
wg.Add(1)
go func() {
stat, err := l.svcCtx.Scheduler.AiService.AiCollectorAdapterMap[adapter.Id][c.Id].GetResourceStats(l.ctx)
if err != nil {
wg.Done()
return
}
clusterType, err := strconv.ParseInt(adapter.Type, 10, 64)
if err != nil {
wg.Done()
return
}
var cardTotal int64
var topsTotal float64
for _, card := range stat.CardsAvail {
cardTotal += int64(card.CardNum)
topsTotal += card.TOpsAtFp16 * float64(card.CardNum)
}
mu.Lock()
if (models.TClusterResource{} == *clusterResource) {
err = l.svcCtx.Scheduler.AiStorages.SaveClusterResources(c.Id, c.Name, clusterType, float64(stat.CpuCoreAvail), float64(stat.CpuCoreTotal),
stat.MemAvail, stat.MemTotal, stat.DiskAvail, stat.DiskTotal, float64(stat.GpuAvail), float64(stat.GpuTotal), cardTotal, topsTotal)
if err != nil {
mu.Unlock()
wg.Done()
return
}
} else {
clusterResource.CardTotal = cardTotal
clusterResource.CardTopsTotal = topsTotal
err := l.svcCtx.Scheduler.AiStorages.UpdateClusterResources(clusterResource)
if err != nil {
mu.Unlock()
wg.Done()
return
}
}
mu.Unlock()
wg.Done()
}()
}
}
wg.Wait()
ch <- struct{}{}
}

View File

@ -0,0 +1,70 @@
package ai
import (
"context"
"sort"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types"
"github.com/zeromicro/go-zero/core/logx"
)
type GetCenterQueueingLogic struct {
logx.Logger
ctx context.Context
svcCtx *svc.ServiceContext
}
func NewGetCenterQueueingLogic(ctx context.Context, svcCtx *svc.ServiceContext) *GetCenterQueueingLogic {
return &GetCenterQueueingLogic{
Logger: logx.WithContext(ctx),
ctx: ctx,
svcCtx: svcCtx,
}
}
func (l *GetCenterQueueingLogic) GetCenterQueueing() (resp *types.CenterQueueingResp, err error) {
resp = &types.CenterQueueingResp{}
adapters, err := l.svcCtx.Scheduler.AiStorages.GetAdaptersByType("1")
if err != nil {
return nil, err
}
for _, adapter := range adapters {
clusters, err := l.svcCtx.Scheduler.AiStorages.GetClustersByAdapterId(adapter.Id)
if err != nil {
continue
}
for _, cluster := range clusters.List {
queues, err := l.svcCtx.Scheduler.AiStorages.GetClusterTaskQueues(adapter.Id, cluster.Id)
if err != nil {
continue
}
//todo sync current task queues
current := &types.CenterQueue{
Name: cluster.Name,
QueueingNum: int32(queues[0].QueueNum),
}
history := &types.CenterQueue{
Name: cluster.Name,
QueueingNum: int32(queues[0].QueueNum),
}
resp.Current = append(resp.Current, current)
resp.History = append(resp.History, history)
}
}
sortQueueingNum(resp.Current)
sortQueueingNum(resp.History)
return resp, nil
}
func sortQueueingNum(q []*types.CenterQueue) {
sort.Slice(q, func(i, j int) bool {
return q[i].QueueingNum > q[j].QueueingNum
})
}

View File

@ -0,0 +1,116 @@
package ai
import (
"context"
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/constants"
"strconv"
"sync"
"time"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types"
"github.com/zeromicro/go-zero/core/logx"
)
type GetCenterTaskListLogic struct {
logx.Logger
ctx context.Context
svcCtx *svc.ServiceContext
}
func NewGetCenterTaskListLogic(ctx context.Context, svcCtx *svc.ServiceContext) *GetCenterTaskListLogic {
return &GetCenterTaskListLogic{
Logger: logx.WithContext(ctx),
ctx: ctx,
svcCtx: svcCtx,
}
}
func (l *GetCenterTaskListLogic) GetCenterTaskList() (resp *types.CenterTaskListResp, err error) {
resp = &types.CenterTaskListResp{}
var mu sync.RWMutex
ch := make(chan struct{})
adapterList, err := l.svcCtx.Scheduler.AiStorages.GetAdaptersByType("1")
if err != nil {
return nil, err
}
go l.updateAiTaskStatus(&mu, ch, adapterList)
for _, adapter := range adapterList {
mu.RLock()
taskList, err := l.svcCtx.Scheduler.AiStorages.GetAiTasksByAdapterId(adapter.Id)
mu.RUnlock()
if err != nil {
continue
}
for _, task := range taskList {
var elapsed time.Duration
switch task.Status {
case constants.Completed:
end, err := time.ParseInLocation(constants.Layout, task.EndTime, time.Local)
if err != nil {
elapsed = time.Duration(0)
}
start, err := time.ParseInLocation(constants.Layout, task.StartTime, time.Local)
if err != nil {
elapsed = time.Duration(0)
}
elapsed = end.Sub(start)
case constants.Running:
elapsed = time.Now().Sub(task.CommitTime)
default:
elapsed = 0
}
t := &types.AiTask{
Name: task.Name,
Status: task.Status,
TimeElapsed: int32(elapsed.Seconds()),
}
resp.List = append(resp.List, t)
}
}
<-ch
return resp, nil
}
func (l *GetCenterTaskListLogic) updateAiTaskStatus(mu *sync.RWMutex, ch chan<- struct{}, list []*types.AdapterInfo) {
var wg sync.WaitGroup
for _, adapter := range list {
taskList, err := l.svcCtx.Scheduler.AiStorages.GetAiTasksByAdapterId(adapter.Id)
if err != nil {
continue
}
for _, task := range taskList {
t := task
if t.Status == constants.Completed {
continue
}
wg.Add(1)
go func() {
trainingTask, err := l.svcCtx.Scheduler.AiService.AiCollectorAdapterMap[adapter.Id][strconv.FormatInt(t.ClusterId, 10)].GetTrainingTask(l.ctx, t.JobId)
if err != nil {
wg.Done()
return
}
t.Status = trainingTask.Status
t.StartTime = trainingTask.Start
t.EndTime = trainingTask.End
mu.Lock()
err = l.svcCtx.Scheduler.AiStorages.UpdateAiTask(t)
mu.Unlock()
if err != nil {
wg.Done()
return
}
wg.Done()
}()
}
}
wg.Wait()
ch <- struct{}{}
}

View File

@ -4,15 +4,19 @@ import (
"bytes" "bytes"
"context" "context"
"github.com/pkg/errors" "github.com/pkg/errors"
clientCore "gitlink.org.cn/JointCloud/pcm-coordinator/api/client"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/schedulers"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/schedulers/option"
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/constants" "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/constants"
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/models" "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/models"
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/models/cloud" "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/models/cloud"
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/utils"
"io" "io"
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
"k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/runtime"
syaml "k8s.io/apimachinery/pkg/runtime/serializer/yaml" syaml "k8s.io/apimachinery/pkg/runtime/serializer/yaml"
kyaml "k8s.io/apimachinery/pkg/util/yaml" kyaml "k8s.io/apimachinery/pkg/util/yaml"
"sigs.k8s.io/yaml" "strconv"
"strings" "strings"
"time" "time"
@ -37,62 +41,102 @@ func NewCommitGeneralTaskLogic(ctx context.Context, svcCtx *svc.ServiceContext)
} }
func (l *CommitGeneralTaskLogic) CommitGeneralTask(req *types.GeneralTaskReq) error { func (l *CommitGeneralTaskLogic) CommitGeneralTask(req *types.GeneralTaskReq) error {
var yamlStr []string tx := l.svcCtx.DbEngin.Begin()
for _, s := range req.ReqBody { // 执行回滚或者提交操作
j2, err := yaml.YAMLToJSON([]byte(s)) defer func() {
if err != nil { if p := recover(); p != nil {
logx.Errorf("Failed to convert yaml to JSON, err: %v", err) tx.Rollback()
return err logx.Error(p)
} else if tx.Error != nil {
logx.Info("rollback, error", tx.Error)
tx.Rollback()
} else {
tx = tx.Commit()
logx.Info("commit success")
} }
yamlStr = append(yamlStr, string(j2)) }()
} //TODO adapter
result := strings.Join(yamlStr, ",") adapterId, _ := strconv.ParseUint(req.AdapterIds[0], 10, 64)
//TODO The namespace is fixed to ns-admin for the time being. Later, the namespace is obtained based on the user
taskModel := models.Task{
Status: constants.Saved,
Name: req.Name,
CommitTime: time.Now(),
NsID: "ns-admin",
YamlString: "[" + result + "]",
}
// Save the task data to the database
tx := l.svcCtx.DbEngin.Create(&taskModel)
if tx.Error != nil {
return tx.Error
}
var clusters []*models.CloudModel var clusters []*models.CloudModel
err := l.svcCtx.DbEngin.Raw("SELECT * FROM `t_cluster` where adapter_id = ? and id in ?", req.AdapterId, req.ClusterIds).Scan(&clusters).Error err := tx.Raw("SELECT * FROM `t_cluster` where adapter_id in ? and id in ?", req.AdapterIds, req.ClusterIds).Scan(&clusters).Error
if err != nil { if err != nil {
logx.Errorf("CommitGeneralTask() => sql execution error: %v", err) logx.Errorf("CommitGeneralTask() => sql execution error: %v", err)
return errors.Errorf("the cluster does not match the drive resources. Check the data") return errors.Errorf("the cluster does not match the drive resources. Check the data")
} }
taskCloud := cloud.TaskCloudModel{} taskCloud := cloud.TaskCloudModel{}
//TODO 执行策略返回集群跟 Replica opt := &option.CloudOption{}
for _, c := range clusters { utils.Convert(&req, &opt)
sc, _ := schedulers.NewCloudScheduler(l.ctx, "", l.svcCtx.Scheduler, opt, tx, l.svcCtx.PromClient)
results, err := l.svcCtx.Scheduler.AssignAndSchedule(sc)
if err != nil {
logx.Errorf("AssignAndSchedule() => execution error: %v", err)
return err
}
rs := (results).([]*schedulers.CloudResult)
var synergyStatus int64
if len(rs) > 1 {
synergyStatus = 1
}
var strategy int64
sqlStr := `select t_dict_item.item_value
from t_dict
left join t_dict_item on t_dict.id = t_dict_item.dict_id
where item_text = ?
and t_dict.dict_code = 'schedule_Strategy'`
//查询调度策略
err = tx.Raw(sqlStr, req.Strategy).Scan(&strategy).Error
taskModel := models.Task{
Id: utils.GenSnowflakeID(),
Status: constants.Saved,
Name: req.Name,
CommitTime: time.Now(),
YamlString: strings.Join(req.ReqBody, "\n---\n"),
AdapterTypeDict: 0,
SynergyStatus: synergyStatus,
Strategy: strategy,
}
var taskClouds []cloud.TaskCloudModel
for _, r := range rs {
for _, s := range req.ReqBody { for _, s := range req.ReqBody {
sStruct := UnMarshalK8sStruct(s) sStruct := UnMarshalK8sStruct(s, int64(r.Replica))
unString, _ := sStruct.MarshalJSON() unString, _ := sStruct.MarshalJSON()
taskCloud.Id = utils.GenSnowflakeIDUint()
taskCloud.TaskId = uint(taskModel.Id) taskCloud.TaskId = uint(taskModel.Id)
taskCloud.AdapterId = c.AdapterId clusterId, _ := strconv.ParseUint(r.ClusterId, 10, 64)
taskCloud.ClusterId = c.Id taskCloud.AdapterId = uint(adapterId)
taskCloud.ClusterName = c.Name taskCloud.ClusterId = uint(clusterId)
taskCloud.Status = "Saved" taskCloud.ClusterName = r.ClusterName
taskCloud.Status = constants.Saved
taskCloud.YamlString = string(unString) taskCloud.YamlString = string(unString)
taskCloud.Kind = sStruct.GetKind() taskCloud.Kind = sStruct.GetKind()
taskCloud.Namespace = sStruct.GetNamespace() taskCloud.Namespace = sStruct.GetNamespace()
tx = l.svcCtx.DbEngin.Create(&taskCloud) taskClouds = append(taskClouds, taskCloud)
if tx.Error != nil {
logx.Errorf("CommitGeneralTask() create taskCloud => sql execution error: %v", err)
return tx.Error
}
} }
} }
adapterName := ""
tx.Table("t_adapter").Select("name").Where("id=?", adapterId).Find(&adapterName)
noticeInfo := clientCore.NoticeInfo{
AdapterId: int64(adapterId),
AdapterName: adapterName,
NoticeType: "create",
TaskName: req.Name,
Incident: "任务创建中",
CreatedTime: time.Now(),
}
db := tx.Table("task").Create(&taskModel)
db = tx.Table("task_cloud").Create(&taskClouds)
db = tx.Table("t_notice").Create(&noticeInfo)
if db.Error != nil {
logx.Errorf("Task creation failure, err: %v", db.Error)
return errors.New("task creation failure")
}
return nil return nil
} }
func UnMarshalK8sStruct(yamlString string) *unstructured.Unstructured { func UnMarshalK8sStruct(yamlString string, replica int64) *unstructured.Unstructured {
unstructuredObj := &unstructured.Unstructured{} unstructuredObj := &unstructured.Unstructured{}
d := kyaml.NewYAMLOrJSONDecoder(bytes.NewBufferString(yamlString), 4096) d := kyaml.NewYAMLOrJSONDecoder(bytes.NewBufferString(yamlString), 4096)
var err error var err error
@ -113,6 +157,10 @@ func UnMarshalK8sStruct(yamlString string) *unstructured.Unstructured {
if len(unstructuredObj.GetNamespace()) == 0 { if len(unstructuredObj.GetNamespace()) == 0 {
unstructuredObj.SetNamespace("default") unstructuredObj.SetNamespace("default")
} }
//设置副本数
if unstructuredObj.GetKind() == "Deployment" || unstructuredObj.GetKind() == "StatefulSet" {
unstructured.SetNestedField(unstructuredObj.Object, replica, "spec", "replicas")
}
} }
return unstructuredObj return unstructuredObj
} }

View File

@ -1,65 +0,0 @@
package core
import (
"context"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/pkg/response"
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/constants"
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/models"
tool "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/utils"
"k8s.io/apimachinery/pkg/util/json"
"time"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types"
"github.com/zeromicro/go-zero/core/logx"
)
type CommitHpcTaskLogic struct {
logx.Logger
ctx context.Context
svcCtx *svc.ServiceContext
}
func NewCommitHpcTaskLogic(ctx context.Context, svcCtx *svc.ServiceContext) *CommitHpcTaskLogic {
return &CommitHpcTaskLogic{
Logger: logx.WithContext(ctx),
ctx: ctx,
svcCtx: svcCtx,
}
}
func (l *CommitHpcTaskLogic) CommitHpcTask(req *types.CommitHpcTaskReq) (resp *types.CommitHpcTaskResp, err error) {
// 构建主任务结构体
taskModel := models.Task{
Status: constants.Saved,
Description: req.Description,
Name: req.Name,
CommitTime: time.Now(),
}
// 保存任务数据到数据库
tx := l.svcCtx.DbEngin.Create(&taskModel)
if tx.Error != nil {
return nil, tx.Error
}
hpc := models.Hpc{}
tool.Convert(req, &hpc)
mqInfo := response.TaskInfo{
TaskId: taskModel.Id,
TaskType: "hpc",
MatchLabels: req.MatchLabels,
//Metadata: hpc,
}
req.TaskId = taskModel.Id
// 将任务数据转换成消息体
reqMessage, err := json.Marshal(mqInfo)
if err != nil {
logx.Error(err)
return nil, err
}
publish := l.svcCtx.RedisClient.Publish(context.Background(), mqInfo.TaskType, reqMessage)
if publish.Err() != nil {
return nil, publish.Err()
}
return
}

View File

@ -3,11 +3,13 @@ package core
import ( import (
"context" "context"
"fmt" "fmt"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/schedulers"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/schedulers/option"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc" "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types" "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types"
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/constants" "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/constants"
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/models" "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/models"
"math/rand" "strconv"
"time" "time"
"github.com/zeromicro/go-zero/core/logx" "github.com/zeromicro/go-zero/core/logx"
@ -29,11 +31,24 @@ func NewCommitVmTaskLogic(ctx context.Context, svcCtx *svc.ServiceContext) *Comm
func (l *CommitVmTaskLogic) CommitVmTask(req *types.CommitVmTaskReq) (resp *types.CommitVmTaskResp, err error) { func (l *CommitVmTaskLogic) CommitVmTask(req *types.CommitVmTaskReq) (resp *types.CommitVmTaskResp, err error) {
// todo: add your logic here and delete this line // todo: add your logic here and delete this line
resp = &types.CommitVmTaskResp{}
//Building the main task structure //Building the main task structure
opt := &option.VmOption{
AdapterId: req.VmOption.AdapterId,
Replicas: req.VmOption.Replicas,
Strategy: req.VmOption.Strategy,
ClusterToStaticWeight: req.VmOption.StaticWeightMap,
Status: constants.Saved,
MatchLabels: req.VmOption.MatchLabels,
StaticWeightMap: req.VmOption.StaticWeightMap,
Name: req.VmOption.Name,
CommitTime: time.Now(),
}
taskModel := models.Task{ taskModel := models.Task{
Status: constants.Saved, Status: constants.Saved,
Name: req.Name, Name: req.VmOption.Name,
CommitTime: time.Now(), CommitTime: time.Now(),
Description: "vm task",
} }
// Save task data to database // Save task data to database
tx := l.svcCtx.DbEngin.Create(&taskModel) tx := l.svcCtx.DbEngin.Create(&taskModel)
@ -41,38 +56,63 @@ func (l *CommitVmTaskLogic) CommitVmTask(req *types.CommitVmTaskReq) (resp *type
return nil, tx.Error return nil, tx.Error
} }
for _, CreateMulServer := range req.CreateMulServer { //var clusters []*models.VmModel
fmt.Println("", req.CreateMulServer) //err2 := l.svcCtx.DbEngin.Raw("SELECT * FROM `t_cluster` where adapter_id in ? and id in ?", req.VmOption.AdapterId, req.VmOption.VmClusterIds).Scan(&clusters).Error
var clusterIds []int64 //if err2 != nil {
l.svcCtx.DbEngin.Raw("SELECT id FROM `t_cluster` where adapter_id = ? and label = ?", req.AdapterId, req.ClusterType).Scan(&clusterIds) // logx.Errorf("CommitGeneralTask() => sql execution error: %v", err)
// //return errors.Errorf("the cluster does not match the drive resources. Check the data"), nil
//}
if len(clusterIds) == 0 || clusterIds == nil { taskVm := models.TaskVm{}
return nil, nil //TODO 执行策略返回集群跟 Replica
} /*opt := &option.VmOption{}
utils.Convert(&req, &opt)*/
// 2、Initialize scheduler
vmSchdl, err := schedulers.NewVmScheduler(l.ctx, "", l.svcCtx.Scheduler, opt, l.svcCtx.DbEngin, l.svcCtx.PromClient)
if err != nil {
return nil, err
}
vmInfo := models.TaskVm{ // 3、Return scheduling results
TaskId: taskModel.Id, results, err := l.svcCtx.Scheduler.AssignAndSchedule(vmSchdl)
ClusterId: clusterIds[rand.Intn(len(clusterIds))], if err != nil {
Name: taskModel.Name, return nil, err
Status: "Saved", }
StartTime: time.Now().String(),
MinCount: CreateMulServer.Min_count,
ImageRef: CreateMulServer.ImageRef,
FlavorRef: CreateMulServer.FlavorRef,
Uuid: CreateMulServer.Uuid,
Platform: CreateMulServer.Platform,
}
tx = l.svcCtx.DbEngin.Create(&vmInfo) rs := (results).([]*schedulers.VmResult)
if tx.Error != nil { for _, r := range rs {
return nil, tx.Error for _, CreateMulServer := range req.CreateMulServer {
} if r.Replica > 0 && r.ClusterId == CreateMulServer.ClusterId {
resp = &types.CommitVmTaskResp{ fmt.Println("", req.CreateMulServer)
Code: 200, var clusterIds []int64
Msg: "success", l.svcCtx.DbEngin.Raw("SELECT id FROM `t_cluster` where adapter_id = ? ", req.VmOption.AdapterId).Scan(&clusterIds)
TaskId: taskModel.Id, if len(clusterIds) == 0 || clusterIds == nil {
return nil, nil
}
adapterId, _ := strconv.ParseUint(req.VmOption.AdapterId, 10, 64)
taskVm.AdapterId = int64(adapterId)
clusterId, _ := strconv.ParseUint(r.ClusterId, 10, 64)
taskVm.ClusterId = int64(clusterId)
taskVm.Name = req.VmOption.Name
taskVm.TaskId = taskModel.Id
clusterId, _ = strconv.ParseUint(r.ClusterId, 10, 64)
taskVm.ClusterId = int64(clusterId)
taskVm.Status = "Saved"
taskVm.StartTime = time.Now().String()
taskVm.MinCount = CreateMulServer.Min_count
taskVm.ImageRef = CreateMulServer.ImageRef
taskVm.FlavorRef = CreateMulServer.FlavorRef
taskVm.Uuid = CreateMulServer.Uuid
taskVm.Platform = CreateMulServer.Platform
tx = l.svcCtx.DbEngin.Create(&taskVm)
if tx.Error != nil {
return nil, tx.Error
}
}
} }
} }
resp.Code = 200
resp.Msg = "Success"
return resp, nil return resp, nil
} }

View File

@ -30,7 +30,7 @@ func (l *CountTaskStatusLogic) CountTaskStatus() (resp *types.TaskStatusResp, er
COUNT(CASE WHEN status = 'Succeeded' THEN 1 END) AS Succeeded, COUNT(CASE WHEN status = 'Succeeded' THEN 1 END) AS Succeeded,
COUNT(CASE WHEN status = 'Failed' THEN 1 END) AS Failed, COUNT(CASE WHEN status = 'Failed' THEN 1 END) AS Failed,
COUNT(CASE WHEN status = 'Running' THEN 1 END) AS Running, COUNT(CASE WHEN status = 'Running' THEN 1 END) AS Running,
COUNT(CASE WHEN status = 'Pause' THEN 1 END) AS Pause COUNT(CASE WHEN status = 'Saved' THEN 1 END) AS Saved
FROM task;` FROM task;`
err = l.svcCtx.DbEngin.Raw(sqlStr).Scan(&resp).Error err = l.svcCtx.DbEngin.Raw(sqlStr).Scan(&resp).Error
if err != nil { if err != nil {

View File

@ -3,6 +3,8 @@ package core
import ( import (
"context" "context"
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/repository/result" "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/repository/result"
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/utils/timeutils"
"time"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc" "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types" "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types"
@ -28,7 +30,7 @@ func (l *PageListTaskLogic) PageListTask(req *types.PageTaskReq) (resp *types.Pa
limit := req.PageSize limit := req.PageSize
offset := req.PageSize * (req.PageNum - 1) offset := req.PageSize * (req.PageNum - 1)
resp = &types.PageResult{} resp = &types.PageResult{}
var list []types.TaskModel var list []*types.TaskModel
db := l.svcCtx.DbEngin.Model(&types.TaskModel{}).Table("task") db := l.svcCtx.DbEngin.Model(&types.TaskModel{}).Table("task")
db = db.Where("deleted_at is null") db = db.Where("deleted_at is null")
@ -48,8 +50,18 @@ func (l *PageListTaskLogic) PageListTask(req *types.PageTaskReq) (resp *types.Pa
if err != nil { if err != nil {
return nil, result.NewDefaultError(err.Error()) return nil, result.NewDefaultError(err.Error())
} }
for _, model := range list {
resp.List = list if model.EndTime != "" && model.StartTime != "" {
startTime := timeutils.TimeStringToGoTime(model.StartTime)
endTime := timeutils.TimeStringToGoTime(model.EndTime)
model.RunningTime = int64(endTime.Sub(startTime).Seconds())
}
if model.StartTime != "" {
startTime := timeutils.TimeStringToGoTime(model.StartTime)
model.RunningTime = int64(time.Now().Sub(startTime).Seconds())
}
}
resp.List = &list
resp.PageSize = req.PageSize resp.PageSize = req.PageSize
resp.PageNum = req.PageNum resp.PageNum = req.PageNum
resp.Total = total resp.Total = total

View File

@ -5,6 +5,7 @@ import (
"github.com/jinzhu/copier" "github.com/jinzhu/copier"
clientCore "gitlink.org.cn/JointCloud/pcm-coordinator/api/client" clientCore "gitlink.org.cn/JointCloud/pcm-coordinator/api/client"
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/models" "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/models"
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/models/cloud"
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/utils" "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/utils"
"gorm.io/gorm" "gorm.io/gorm"
@ -54,7 +55,7 @@ func (l *PullTaskInfoLogic) PullTaskInfo(req *clientCore.PullTaskInfoReq) (*clie
} }
} }
case 0: case 0:
var cloudModelList []models.Cloud var cloudModelList []cloud.TaskCloudModel
err := findModelList(req.AdapterId, l.svcCtx.DbEngin, &cloudModelList) err := findModelList(req.AdapterId, l.svcCtx.DbEngin, &cloudModelList)
if err != nil { if err != nil {
return nil, err return nil, err

View File

@ -2,14 +2,15 @@ package core
import ( import (
"context" "context"
"github.com/pkg/errors"
"github.com/zeromicro/go-zero/core/logx"
clientCore "gitlink.org.cn/JointCloud/pcm-coordinator/api/client" clientCore "gitlink.org.cn/JointCloud/pcm-coordinator/api/client"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/constants" "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/constants"
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/models" "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/models"
"gorm.io/gorm" "gorm.io/gorm"
"strings" "strings"
"time"
"github.com/zeromicro/go-zero/core/logx"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
) )
type PushTaskInfoLogic struct { type PushTaskInfoLogic struct {
@ -33,9 +34,14 @@ func (l *PushTaskInfoLogic) PushTaskInfo(req *clientCore.PushTaskInfoReq) (*clie
switch kind { switch kind {
case 0: case 0:
for _, cloudInfo := range req.CloudInfoList { for _, cloudInfo := range req.CloudInfoList {
l.svcCtx.DbEngin.Exec("update cloud set status = ?,start_time = ?,result = ? where participant_id = ? and id = ?", var taskId uint
cloudInfo.Status, cloudInfo.StartTime, cloudInfo.Result, req.AdapterId, cloudInfo.Id) result := l.svcCtx.DbEngin.Table("task_cloud").Select("task_id").Where("task_id = ?", cloudInfo.TaskId).Find(&taskId)
syncTask(l.svcCtx.DbEngin, cloudInfo.TaskId) if errors.Is(result.Error, gorm.ErrRecordNotFound) {
return nil, errors.New("Record does not exist")
}
l.svcCtx.DbEngin.Exec("update task_cloud set status = ?,start_time = ?,result = ? where task_id = ?",
cloudInfo.Status, cloudInfo.StartTime, cloudInfo.Result, cloudInfo.TaskId)
syncTask(l.svcCtx.DbEngin, int64(taskId))
} }
case 2: case 2:
for _, hpcInfo := range req.HpcInfoList { for _, hpcInfo := range req.HpcInfoList {
@ -63,7 +69,7 @@ func (l *PushTaskInfoLogic) PushTaskInfo(req *clientCore.PushTaskInfoReq) (*clie
func syncTask(gorm *gorm.DB, taskId int64) { func syncTask(gorm *gorm.DB, taskId int64) {
var allStatus string var allStatus string
tx := gorm.Raw("SELECT CONCAT_WS(',',GROUP_CONCAT(DISTINCT h.status) ,GROUP_CONCAT(DISTINCT a.status) ,GROUP_CONCAT(DISTINCT c.status))as status from task t left join hpc h on t.id = h.task_id left join cloud c on t.id = c.task_id left join ai a on t.id = a.task_id where t.id = ?", taskId).Scan(&allStatus) tx := gorm.Raw("SELECT CONCAT_WS(',',GROUP_CONCAT(DISTINCT h.status) ,GROUP_CONCAT(DISTINCT a.status) ,GROUP_CONCAT(DISTINCT c.status))as status from task t left join hpc h on t.id = h.task_id left join task_cloud c on t.id = c.task_id left join ai a on t.id = a.task_id where t.id = ?", taskId).Scan(&allStatus)
if tx.Error != nil { if tx.Error != nil {
logx.Error(tx.Error) logx.Error(tx.Error)
} }
@ -71,7 +77,6 @@ func syncTask(gorm *gorm.DB, taskId int64) {
statusArray := strings.Split(allStatus, ",") statusArray := strings.Split(allStatus, ",")
if len(removeRepeatedElement(statusArray)) == 1 { if len(removeRepeatedElement(statusArray)) == 1 {
updateTask(gorm, taskId, statusArray[0]) updateTask(gorm, taskId, statusArray[0])
} }
// 子任务包含失败状态 主任务则失败 // 子任务包含失败状态 主任务则失败
if strings.Contains(allStatus, constants.Failed) { if strings.Contains(allStatus, constants.Failed) {
@ -85,10 +90,14 @@ func syncTask(gorm *gorm.DB, taskId int64) {
} }
func updateTask(gorm *gorm.DB, taskId int64, status string) { func updateTask(gorm *gorm.DB, taskId int64, status string) {
now := time.Now()
var task models.Task var task models.Task
gorm.Where("id = ? ", taskId).Find(&task) gorm.Where("id = ? ", taskId).Find(&task)
if task.Status != status { if task.Status != status {
task.Status = status task.Status = status
if status == constants.Running {
task.StartTime = &now
}
gorm.Updates(&task) gorm.Updates(&task)
} }
} }

View File

@ -0,0 +1,54 @@
package core
import (
"context"
"github.com/pkg/errors"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types"
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/models"
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/utils"
"gorm.io/gorm"
"github.com/zeromicro/go-zero/core/logx"
)
type TaskDetailsLogic struct {
logx.Logger
ctx context.Context
svcCtx *svc.ServiceContext
}
func NewTaskDetailsLogic(ctx context.Context, svcCtx *svc.ServiceContext) *TaskDetailsLogic {
return &TaskDetailsLogic{
Logger: logx.WithContext(ctx),
ctx: ctx,
svcCtx: svcCtx,
}
}
func (l *TaskDetailsLogic) TaskDetails(req *types.FId) (resp *types.TaskDetailsResp, err error) {
resp = &types.TaskDetailsResp{}
task := &models.Task{}
if errors.Is(l.svcCtx.DbEngin.Where("id", req.Id).First(&task).Error, gorm.ErrRecordNotFound) {
return nil, errors.New("记录不存在")
}
clusterIds := make([]int64, 0)
var cList []*types.ClusterInfo
switch task.AdapterTypeDict {
case 0:
l.svcCtx.DbEngin.Table("task_cloud").Select("cluster_id").Where("task_id", task.Id).Scan(&clusterIds)
case 1:
l.svcCtx.DbEngin.Table("task_ai").Select("cluster_id").Where("task_id", task.Id).Scan(&clusterIds)
case 2:
l.svcCtx.DbEngin.Table("task_hpc").Select("cluster_id").Where("task_id", task.Id).Scan(&clusterIds)
case 3:
l.svcCtx.DbEngin.Table("task_vm").Select("cluster_id").Where("task_id", task.Id).Find(&clusterIds)
}
err = l.svcCtx.DbEngin.Table("t_cluster").Where("id in ?", clusterIds).Scan(&cList).Error
if err != nil {
return resp, err
}
utils.Convert(&task, &resp)
resp.ClusterInfos = cList
return
}

View File

@ -93,13 +93,12 @@ func (l *TaskListLogic) TaskList(req *types.TaskListReq) (resp *types.TaskListRe
pStatus = "Normal" pStatus = "Normal"
} }
} }
resp.Tasks = append(resp.Tasks, types.Task{ resp.Tasks = append(resp.Tasks, types.Task{
Id: task.Id, Id: task.Id,
Name: task.Name, Name: task.Name,
Status: task.Status, Status: task.Status,
StartTime: task.StartTime, StartTime: task.StartTime.Format("2006-01-02 15:04:05"),
EndTime: task.EndTime, EndTime: task.EndTime.Format("2006-01-02 15:04:05"),
ParticipantId: pInfo.Id, ParticipantId: pInfo.Id,
ParticipantName: pInfo.Name, ParticipantName: pInfo.Name,
ParticipantStatus: pStatus, ParticipantStatus: pStatus,

View File

@ -32,11 +32,15 @@ func (l *CommitHpcTaskLogic) CommitHpcTask(req *types.CommitHpcTaskReq) (resp *t
// 构建主任务结构体 // 构建主任务结构体
taskModel := models.Task{ taskModel := models.Task{
Status: constants.Saved, Name: req.Name,
Description: req.Description, Description: req.Description,
Name: req.Name, Status: constants.Saved,
CommitTime: time.Now(), Strategy: 0,
SynergyStatus: 0,
CommitTime: time.Now(),
AdapterTypeDict: 2,
} }
// 保存任务数据到数据库 // 保存任务数据到数据库
tx := l.svcCtx.DbEngin.Create(&taskModel) tx := l.svcCtx.DbEngin.Create(&taskModel)
if tx.Error != nil { if tx.Error != nil {
@ -49,7 +53,9 @@ func (l *CommitHpcTaskLogic) CommitHpcTask(req *types.CommitHpcTaskReq) (resp *t
env, _ := json.Marshal(req.Environment) env, _ := json.Marshal(req.Environment)
if len(clusterIds) == 0 || clusterIds == nil { if len(clusterIds) == 0 || clusterIds == nil {
return nil, nil resp.Code = 400
resp.Msg = "no cluster found"
return resp, nil
} }
hpcInfo := models.TaskHpc{ hpcInfo := models.TaskHpc{

View File

@ -0,0 +1,82 @@
package monitoring
import (
"context"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types"
"strings"
"github.com/zeromicro/go-zero/core/logx"
)
type ScheduleSituationLogic struct {
logx.Logger
ctx context.Context
svcCtx *svc.ServiceContext
}
func NewScheduleSituationLogic(ctx context.Context, svcCtx *svc.ServiceContext) *ScheduleSituationLogic {
return &ScheduleSituationLogic{
Logger: logx.WithContext(ctx),
ctx: ctx,
svcCtx: svcCtx,
}
}
func (l *ScheduleSituationLogic) ScheduleSituation() (resp *types.ScheduleSituationResp, err error) {
resp = &types.ScheduleSituationResp{}
// node region
tx := l.svcCtx.DbEngin.Raw("SELECT c.id, c.name, tdi.id AS category, count(DISTINCT ta.id)+count(DISTINCT tc.id)+COUNT(DISTINCT th.id)+COUNT(tv.id) as value FROM t_cluster c LEFT JOIN t_dict_item tdi ON c.region_dict = tdi.id left JOIN task_ai ta ON ta.cluster_id = c.id left JOIN task_cloud tc ON tc.cluster_id = c.id left JOIN task_hpc th ON th.cluster_id = c.id left JOIN task_vm tv ON tv.cluster_id = c.id WHERE tc.deleted_at IS NULL GROUP BY c.id").Scan(&resp.Nodes)
if tx.Error != nil {
return nil, tx.Error
}
// hpc
var hpcLinks []string
tx = l.svcCtx.DbEngin.Raw("SELECT GROUP_CONCAT(cluster_id SEPARATOR ',') as cluster_ids FROM task_hpc WHERE deleted_at IS NULL GROUP BY task_id HAVING COUNT(*) > 1;").Scan(&hpcLinks)
if tx.Error != nil {
return nil, tx.Error
}
LinksHandler(hpcLinks, resp)
// cloud
var cloudLinks []string
tx = l.svcCtx.DbEngin.Raw("SELECT GROUP_CONCAT(cluster_id SEPARATOR ',') as cluster_ids FROM task_cloud WHERE deleted_at IS NULL GROUP BY task_id HAVING COUNT(*) > 1;").Scan(&cloudLinks)
if tx.Error != nil {
return nil, tx.Error
}
LinksHandler(cloudLinks, resp)
// ai
var aiLinks []string
tx = l.svcCtx.DbEngin.Raw("SELECT GROUP_CONCAT(cluster_id SEPARATOR ',') as cluster_ids FROM task_ai WHERE deleted_at IS NULL GROUP BY task_id HAVING COUNT(*) > 1;").Scan(&aiLinks)
if tx.Error != nil {
return nil, tx.Error
}
LinksHandler(aiLinks, resp)
// vm
var vmLinks []string
tx = l.svcCtx.DbEngin.Raw("SELECT GROUP_CONCAT(cluster_id SEPARATOR ',') as cluster_ids FROM task_vm WHERE deleted_at IS NULL GROUP BY task_id HAVING COUNT(*) > 1;").Scan(&vmLinks)
if tx.Error != nil {
return nil, tx.Error
}
LinksHandler(vmLinks, resp)
// categories
tx = l.svcCtx.DbEngin.Raw("select tdi.item_text as name from t_dict_item tdi,t_dict td where td.dict_code = 'cluster_region_dict' and tdi.dict_id = td.id").Scan(&resp.Categories)
if tx.Error != nil {
return nil, tx.Error
}
return resp, nil
}
func LinksHandler(sources []string, resp *types.ScheduleSituationResp) {
for _, source := range sources {
links := strings.Split(source, ",")
for i := 1; i < len(links); i++ {
if links[i] != links[i-1] {
resp.Links = append(resp.Links, types.Link{Source: links[i], Target: links[i-1]})
}
}
}
}

View File

@ -26,7 +26,11 @@ func NewScheduleGetAiJobLogLogLogic(ctx context.Context, svcCtx *svc.ServiceCont
func (l *ScheduleGetAiJobLogLogLogic) ScheduleGetAiJobLogLog(req *types.AiJobLogReq) (resp *types.AiJobLogResp, err error) { func (l *ScheduleGetAiJobLogLogLogic) ScheduleGetAiJobLogLog(req *types.AiJobLogReq) (resp *types.AiJobLogResp, err error) {
resp = &types.AiJobLogResp{} resp = &types.AiJobLogResp{}
log, err := l.svcCtx.Scheduler.AiService.AiCollectorAdapterMap[req.AdapterId][req.ClusterId].GetTrainingTaskLog(l.ctx, req.TaskId, req.InstanceNum) id, err := l.svcCtx.Scheduler.AiStorages.GetAiTaskIdByClusterIdAndTaskId(req.ClusterId, req.TaskId)
if err != nil {
return nil, err
}
log, err := l.svcCtx.Scheduler.AiService.AiCollectorAdapterMap[req.AdapterId][req.ClusterId].GetTrainingTaskLog(l.ctx, id, req.InstanceNum)
if err != nil { if err != nil {
return nil, err return nil, err
} }

View File

@ -0,0 +1,30 @@
package schedule
import (
"context"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types"
"github.com/zeromicro/go-zero/core/logx"
)
type ScheduleGetOverviewLogic struct {
logx.Logger
ctx context.Context
svcCtx *svc.ServiceContext
}
func NewScheduleGetOverviewLogic(ctx context.Context, svcCtx *svc.ServiceContext) *ScheduleGetOverviewLogic {
return &ScheduleGetOverviewLogic{
Logger: logx.WithContext(ctx),
ctx: ctx,
svcCtx: svcCtx,
}
}
func (l *ScheduleGetOverviewLogic) ScheduleGetOverview() (resp *types.ScheduleOverviewResp, err error) {
// todo: add your logic here and delete this line
return
}

View File

@ -6,6 +6,7 @@ import (
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/schedulers/option" "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/schedulers/option"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc" "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types" "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types"
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/constants"
"github.com/zeromicro/go-zero/core/logx" "github.com/zeromicro/go-zero/core/logx"
) )
@ -28,7 +29,9 @@ func (l *ScheduleSubmitLogic) ScheduleSubmit(req *types.ScheduleReq) (resp *type
resp = &types.ScheduleResp{} resp = &types.ScheduleResp{}
opt := &option.AiOption{ opt := &option.AiOption{
AdapterId: req.AiOption.AdapterId, AdapterId: req.AiOption.AdapterId,
TaskName: req.AiOption.TaskName,
ResourceType: req.AiOption.ResourceType, ResourceType: req.AiOption.ResourceType,
Replica: 1,
Tops: req.AiOption.Tops, Tops: req.AiOption.Tops,
TaskType: req.AiOption.TaskType, TaskType: req.AiOption.TaskType,
DatasetsName: req.AiOption.Datasets, DatasetsName: req.AiOption.Datasets,
@ -52,6 +55,17 @@ func (l *ScheduleSubmitLogic) ScheduleSubmit(req *types.ScheduleReq) (resp *type
switch opt.GetOptionType() { switch opt.GetOptionType() {
case option.AI: case option.AI:
rs := (results).([]*schedulers.AiResult) rs := (results).([]*schedulers.AiResult)
var synergystatus int64
if len(rs) > 1 {
synergystatus = 1
}
strategyCode, err := l.svcCtx.Scheduler.AiStorages.GetStrategyCode(req.AiOption.Strategy)
id, err := l.svcCtx.Scheduler.AiStorages.SaveTask(req.AiOption.TaskName, strategyCode, synergystatus)
if err != nil {
return nil, err
}
for _, r := range rs { for _, r := range rs {
scheResult := &types.ScheduleResult{} scheResult := &types.ScheduleResult{}
scheResult.ClusterId = r.ClusterId scheResult.ClusterId = r.ClusterId
@ -59,12 +73,13 @@ func (l *ScheduleSubmitLogic) ScheduleSubmit(req *types.ScheduleReq) (resp *type
scheResult.Strategy = r.Strategy scheResult.Strategy = r.Strategy
scheResult.Replica = r.Replica scheResult.Replica = r.Replica
scheResult.Msg = r.Msg scheResult.Msg = r.Msg
err := l.svcCtx.Scheduler.AiStorages.SaveAiTask(id, opt, r.ClusterId, r.TaskId, constants.Saved, r.Msg)
if err != nil {
return nil, err
}
resp.Results = append(resp.Results, scheResult) resp.Results = append(resp.Results, scheResult)
} }
err = l.svcCtx.Scheduler.AiStorages.SaveTask(req.AiOption.TaskName)
if err != nil {
return nil, err
}
} }
return resp, nil return resp, nil

View File

@ -16,8 +16,6 @@ package mqs
import ( import (
"context" "context"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/schedulers"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc" "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
) )
@ -38,28 +36,28 @@ func NewCloudMq(ctx context.Context, svcCtx *svc.ServiceContext) *CloudMq {
func (l *CloudMq) Consume(val string) error { func (l *CloudMq) Consume(val string) error {
// 接受消息, 根据标签筛选过滤 // 接受消息, 根据标签筛选过滤
cloudScheduler := schedulers.NewCloudScheduler() //cloudScheduler := schedulers.NewCloudScheduler()
schdl, err := scheduler.NewScheduler(cloudScheduler, val, l.svcCtx.DbEngin, l.svcCtx.ParticipantRpc) //schdl, err := scheduler.NewScheduler(cloudScheduler, val, l.svcCtx.DbEngin, l.svcCtx.ParticipantRpc)
if err != nil { //if err != nil {
return err // return err
} //}
//
//检测是否指定了集群列表 ////检测是否指定了集群列表
schdl.SpecifyClusters() //schdl.SpecifyClusters()
//
//检测是否指定了nsID ////检测是否指定了nsID
schdl.SpecifyNsID() //schdl.SpecifyNsID()
//
//通过标签匹配筛选出集群范围 ////通过标签匹配筛选出集群范围
schdl.MatchLabels() //schdl.MatchLabels()
//
//todo 屏蔽原调度算法,因为监控数据暂未上报,临时采用随机调度 ////todo 屏蔽原调度算法,因为监控数据暂未上报,临时采用随机调度
schdl.TempAssign() //schdl.TempAssign()
//
// 存储数据 //// 存储数据
err = schdl.SaveToDb() //err = schdl.SaveToDb()
if err != nil { //if err != nil {
return err // return err
} //}
return nil return nil
} }

View File

@ -2,8 +2,6 @@ package mqs
import ( import (
"context" "context"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/schedulers"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc" "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
) )
@ -24,28 +22,28 @@ func NewVmMq(ctx context.Context, svcCtx *svc.ServiceContext) *VmMq {
func (l *VmMq) Consume(val string) error { func (l *VmMq) Consume(val string) error {
// 接受消息, 根据标签筛选过滤 // 接受消息, 根据标签筛选过滤
vmScheduler := schedulers.NewVmScheduler() //vmScheduler := schedulers.NewVmScheduler()
schdl, err := scheduler.NewScheduler(vmScheduler, val, l.svcCtx.DbEngin, l.svcCtx.ParticipantRpc) //schdl, err := scheduler.NewScheduler(vmScheduler, val, l.svcCtx.DbEngin, l.svcCtx.ParticipantRpc)
if err != nil { //if err != nil {
return err // return err
} //}
//
//检测是否指定了集群列表 ////检测是否指定了集群列表
schdl.SpecifyClusters() //schdl.SpecifyClusters()
//
//检测是否指定了nsID ////检测是否指定了nsID
schdl.SpecifyNsID() //schdl.SpecifyNsID()
//
//通过标签匹配筛选出集群范围 ////通过标签匹配筛选出集群范围
schdl.MatchLabels() //schdl.MatchLabels()
//
//todo 屏蔽原调度算法,因为监控数据暂未上报,临时采用随机调度 ////todo 屏蔽原调度算法,因为监控数据暂未上报,临时采用随机调度
schdl.TempAssign() //schdl.TempAssign()
//
// 存储数据 //// 存储数据
err = schdl.SaveToDb() //err = schdl.SaveToDb()
if err != nil { //if err != nil {
return err // return err
} //}
return nil return nil
} }

View File

@ -2,10 +2,12 @@ package database
import ( import (
"github.com/zeromicro/go-zero/core/logx" "github.com/zeromicro/go-zero/core/logx"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/schedulers/option"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types" "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types"
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/constants" "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/constants"
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/models" "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/models"
"gorm.io/gorm" "gorm.io/gorm"
"strconv"
"time" "time"
) )
@ -48,22 +50,183 @@ func (s *AiStorage) GetAdapterIdsByType(adapterType string) ([]string, error) {
return ids, nil return ids, nil
} }
func (s *AiStorage) SaveTask(name string) error { func (s *AiStorage) GetAdaptersByType(adapterType string) ([]*types.AdapterInfo, error) {
var list []*types.AdapterInfo
db := s.DbEngin.Model(&types.AdapterInfo{}).Table("t_adapter")
db = db.Where("type = ?", adapterType)
err := db.Order("create_time desc").Find(&list).Error
if err != nil {
return nil, err
}
return list, nil
}
func (s *AiStorage) GetAiTasksByAdapterId(adapterId string) ([]*models.TaskAi, error) {
var resp []*models.TaskAi
tx := s.DbEngin.Raw("select * from task_ai where `adapter_id` = ? ", adapterId).Scan(&resp)
if tx.Error != nil {
logx.Errorf(tx.Error.Error())
return nil, tx.Error
}
return resp, nil
}
func (s *AiStorage) SaveTask(name string, strategyCode int64, synergyStatus int64) (int64, error) {
// 构建主任务结构体 // 构建主任务结构体
taskModel := models.Task{ taskModel := models.Task{
Status: constants.Saved, Status: constants.Saved,
Description: "ai task", Description: "ai task",
Name: name, Name: name,
CommitTime: time.Now(), SynergyStatus: synergyStatus,
Strategy: strategyCode,
AdapterTypeDict: 1,
CommitTime: time.Now(),
} }
// 保存任务数据到数据库 // 保存任务数据到数据库
tx := s.DbEngin.Create(&taskModel) tx := s.DbEngin.Create(&taskModel)
if tx.Error != nil {
return 0, tx.Error
}
return taskModel.Id, nil
}
func (s *AiStorage) SaveAiTask(taskId int64, option *option.AiOption, clusterId string, jobId string, status string, msg string) error {
// 构建主任务结构体
aId, err := strconv.ParseInt(option.AdapterId, 10, 64)
if err != nil {
return err
}
cId, err := strconv.ParseInt(clusterId, 10, 64)
if err != nil {
return err
}
aiTaskModel := models.TaskAi{
TaskId: taskId,
AdapterId: aId,
ClusterId: cId,
Name: option.TaskName,
Replica: option.Replica,
JobId: jobId,
TaskType: option.TaskType,
Strategy: option.StrategyName,
Status: status,
Msg: msg,
CommitTime: time.Now(),
}
// 保存任务数据到数据库
tx := s.DbEngin.Create(&aiTaskModel)
if tx.Error != nil { if tx.Error != nil {
return tx.Error return tx.Error
} }
return nil return nil
} }
func (s *AiStorage) UpdateTask() error { func (s *AiStorage) SaveClusterTaskQueue(adapterId string, clusterId string, queueNum int64) error {
aId, err := strconv.ParseInt(adapterId, 10, 64)
if err != nil {
return err
}
cId, err := strconv.ParseInt(clusterId, 10, 64)
if err != nil {
return err
}
taskQueue := models.TClusterTaskQueue{
AdapterId: aId,
ClusterId: cId,
QueueNum: queueNum,
}
tx := s.DbEngin.Create(&taskQueue)
if tx.Error != nil {
return tx.Error
}
return nil return nil
} }
func (s *AiStorage) GetClusterTaskQueues(adapterId string, clusterId string) ([]*models.TClusterTaskQueue, error) {
var taskQueues []*models.TClusterTaskQueue
tx := s.DbEngin.Raw("select * from t_cluster_task_queue where `adapter_id` = ? and `cluster_id` = ?", adapterId, clusterId).Scan(&taskQueues)
if tx.Error != nil {
logx.Errorf(tx.Error.Error())
return nil, tx.Error
}
return taskQueues, nil
}
func (s *AiStorage) GetAiTaskIdByClusterIdAndTaskId(clusterId string, taskId string) (string, error) {
var aiTask models.TaskAi
tx := s.DbEngin.Raw("select * from task_ai where `cluster_id` = ? and `task_id` = ?", clusterId, taskId).Scan(&aiTask)
if tx.Error != nil {
logx.Errorf(tx.Error.Error())
return "", tx.Error
}
return aiTask.JobId, nil
}
func (s *AiStorage) GetClusterResourcesById(clusterId string) (*models.TClusterResource, error) {
var clusterResource models.TClusterResource
tx := s.DbEngin.Raw("select * from t_cluster_resource where `cluster_id` = ?", clusterId).Scan(&clusterResource)
if tx.Error != nil {
logx.Errorf(tx.Error.Error())
return nil, tx.Error
}
return &clusterResource, nil
}
func (s *AiStorage) SaveClusterResources(clusterId string, clusterName string, clusterType int64, cpuAvail float64, cpuTotal float64,
memAvail float64, memTotal float64, diskAvail float64, diskTotal float64, gpuAvail float64, gpuTotal float64, cardTotal int64, topsTotal float64) error {
cId, err := strconv.ParseInt(clusterId, 10, 64)
if err != nil {
return err
}
clusterResource := models.TClusterResource{
ClusterId: cId,
ClusterName: clusterName,
ClusterType: clusterType,
CpuAvail: cpuAvail,
CpuTotal: cpuTotal,
MemAvail: memAvail,
MemTotal: memTotal,
DiskAvail: diskAvail,
DiskTotal: diskTotal,
GpuAvail: gpuAvail,
GpuTotal: gpuTotal,
CardTotal: cardTotal,
CardTopsTotal: topsTotal,
}
tx := s.DbEngin.Create(&clusterResource)
if tx.Error != nil {
return tx.Error
}
return nil
}
func (s *AiStorage) UpdateClusterResources(clusterResource *models.TClusterResource) error {
tx := s.DbEngin.Updates(clusterResource)
if tx.Error != nil {
return tx.Error
}
return nil
}
func (s *AiStorage) UpdateAiTask(task *models.TaskAi) error {
tx := s.DbEngin.Updates(task)
if tx.Error != nil {
return tx.Error
}
return nil
}
func (s *AiStorage) GetStrategyCode(name string) (int64, error) {
var strategy int64
sqlStr := `select t_dict_item.item_value
from t_dict
left join t_dict_item on t_dict.id = t_dict_item.dict_id
where item_text = ?
and t_dict.dict_code = 'schedule_Strategy'`
//查询调度策略
err := s.DbEngin.Raw(sqlStr, name).Scan(&strategy).Error
if err != nil {
return strategy, nil
}
return strategy, nil
}

View File

@ -129,42 +129,19 @@ func (s *Scheduler) TempAssign() error {
} }
func (s *Scheduler) AssignAndSchedule(ss SubSchedule) (interface{}, error) { func (s *Scheduler) AssignAndSchedule(ss SubSchedule) (interface{}, error) {
//// 已指定 ParticipantId //choose strategy
//if s.task.ParticipantId != 0 {
// return nil
//}
//// 标签匹配以及后未找到ParticipantIds
//if len(s.participantIds) == 0 {
// return errors.New("未找到匹配的ParticipantIds")
//}
//
//// 指定或者标签匹配的结果只有一个集群,给任务信息指定
//if len(s.participantIds) == 1 {
// s.task.ParticipantId = s.participantIds[0]
// //replicas := s.task.Metadata.(map[string]interface{})["spec"].(map[string]interface{})["replicas"].(float64)
// //result := make(map[int64]string)
// //result[s.participantIds[0]] = strconv.FormatFloat(replicas, 'f', 2, 64)
// //s.result = result
//
// return nil
//}
strategy, err := ss.PickOptimalStrategy() strategy, err := ss.PickOptimalStrategy()
if err != nil { if err != nil {
return nil, err return nil, err
} }
//schedule
clusters, err := strategy.Schedule() clusters, err := strategy.Schedule()
if err != nil { if err != nil {
return nil, err return nil, err
} }
//集群数量不满足,指定到标签匹配后第一个集群 //assign tasks to clusters
//if len(providerList) < 2 {
// s.task.ParticipantId = s.participantIds[0]
// return nil
//}
resp, err := ss.AssignTask(clusters) resp, err := ss.AssignTask(clusters)
if err != nil { if err != nil {
return nil, err return nil, err

View File

@ -26,6 +26,7 @@ import (
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/strategy" "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/strategy"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/strategy/param" "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/strategy/param"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/pkg/response" "gitlink.org.cn/JointCloud/pcm-coordinator/api/pkg/response"
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/constants"
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/models" "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/models"
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/utils" "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/utils"
"gitlink.org.cn/JointCloud/pcm-octopus/octopus" "gitlink.org.cn/JointCloud/pcm-octopus/octopus"
@ -168,32 +169,52 @@ func (as *AiScheduler) AssignTask(clusters []*strategy.AssignedCluster) (interfa
errs = append(errs, e) errs = append(errs, e)
} }
if len(errs) == len(clusters) { for s := range ch {
return nil, errors.New("submit task failed") results = append(results, s)
} }
if len(errs) != 0 { if len(errs) != 0 {
var msg string var synergystatus int64
if len(clusters) > 1 {
synergystatus = 1
}
strategyCode, err := as.AiStorages.GetStrategyCode(as.option.StrategyName)
taskId, err := as.AiStorages.SaveTask(as.option.TaskName, strategyCode, synergystatus)
if err != nil {
return nil, errors.New("database add failed: " + err.Error())
}
var errmsg string
for _, err := range errs { for _, err := range errs {
e := (err).(struct { e := (err).(struct {
err error err error
clusterId string clusterId string
}) })
msg += fmt.Sprintf("clusterId: %v , error: %v \n", e.clusterId, e.err.Error()) msg := fmt.Sprintf("clusterId: %v , error: %v \n", e.clusterId, e.err.Error())
errmsg += msg
err := as.AiStorages.SaveAiTask(taskId, as.option, e.clusterId, "", constants.Failed, msg)
if err != nil {
return nil, errors.New("database add failed: " + err.Error())
}
} }
for s := range ch { for s := range ch {
if s.Msg != "" { if s.Msg != "" {
msg += fmt.Sprintf("clusterId: %v , error: %v \n", s.ClusterId, s.Msg) msg := fmt.Sprintf("clusterId: %v , error: %v \n", s.ClusterId, s.Msg)
errmsg += msg
err := as.AiStorages.SaveAiTask(taskId, as.option, s.ClusterId, "", constants.Failed, msg)
if err != nil {
return nil, errors.New("database add failed: " + err.Error())
}
} else { } else {
msg += fmt.Sprintf("clusterId: %v , submitted successfully, taskId: %v \n", s.ClusterId, s.TaskId) msg := fmt.Sprintf("clusterId: %v , submitted successfully, taskId: %v \n", s.ClusterId, s.TaskId)
errmsg += msg
err := as.AiStorages.SaveAiTask(taskId, as.option, s.ClusterId, s.TaskId, constants.Succeeded, msg)
if err != nil {
return nil, errors.New("database add failed: " + err.Error())
}
} }
} }
return nil, errors.New(msg) return nil, errors.New(errmsg)
}
for s := range ch {
// TODO: database operation
results = append(results, s)
} }
return results, nil return results, nil

View File

@ -15,106 +15,176 @@
package schedulers package schedulers
import ( import (
"bytes" "context"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/algorithm/providerPricing" "errors"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/database" "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/schedulers/option"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/service/collector"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/strategy" "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/strategy"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/strategy/param" "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/strategy/param"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/pkg/response" "gitlink.org.cn/JointCloud/pcm-coordinator/api/pkg/response"
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/models" "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/constants"
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/models/cloud"
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/tracker"
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/utils" "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/utils"
"io" "gorm.io/gorm"
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" "math"
"k8s.io/apimachinery/pkg/runtime" "time"
syaml "k8s.io/apimachinery/pkg/runtime/serializer/yaml"
kyaml "k8s.io/apimachinery/pkg/util/yaml"
) )
type CloudScheduler struct { type CloudScheduler struct {
storage database.Storage yamlString string
task *response.TaskInfo
*scheduler.Scheduler
option *option.CloudOption
ctx context.Context
dbEngin *gorm.DB
promClient tracker.Prometheus
svcCtx *svc.ServiceContext
} }
func NewCloudScheduler() *CloudScheduler { type CloudResult struct {
return &CloudScheduler{} TaskId string
ClusterId string
ClusterName string
Strategy string
Replica int32
Msg string
} }
func (cs *CloudScheduler) PickOptimalStrategy() (strategy.Strategy, error) { func NewCloudScheduler(ctx context.Context, val string, scheduler *scheduler.Scheduler, option *option.CloudOption, dbEngin *gorm.DB, promClient tracker.Prometheus) (*CloudScheduler, error) {
//获取所有计算中心 return &CloudScheduler{ctx: ctx, yamlString: val, Scheduler: scheduler, option: option, dbEngin: dbEngin, promClient: promClient}, nil
//调度算法
strategy := strategy.NewPricingStrategy(&param.ResourcePricingParams{})
return strategy, nil
} }
func (cs *CloudScheduler) GetNewStructForDb(task *response.TaskInfo, resource string, participantId int64) (interface{}, error) { func (as *CloudScheduler) GetNewStructForDb(task *response.TaskInfo, resource string, participantId int64) (interface{}, error) {
cloud := cs.UnMarshalK8sStruct(resource, task.TaskId, task.NsID) c := cloud.TaskCloudModel{
cloud.Id = utils.GenSnowflakeID() AdapterId: uint(participantId),
cloud.NsID = task.NsID TaskId: uint(task.TaskId),
Status: constants.Saved,
cloud.ParticipantId = participantId YamlString: as.yamlString,
return cloud, nil
}
func (cs *CloudScheduler) UnMarshalK8sStruct(yamlString string, taskId int64, nsID string) models.Cloud {
var cloud models.Cloud
d := kyaml.NewYAMLOrJSONDecoder(bytes.NewBufferString(yamlString), 4096)
var err error
for {
var rawObj runtime.RawExtension
err = d.Decode(&rawObj)
if err == io.EOF {
break
}
if err != nil {
}
obj := &unstructured.Unstructured{}
syaml.NewDecodingSerializer(unstructured.UnstructuredJSONScheme).Decode(rawObj.Raw, nil, obj)
if err != nil {
}
unstructuredMap, err := runtime.DefaultUnstructuredConverter.ToUnstructured(obj)
if err != nil {
}
unstructureObj := &unstructured.Unstructured{Object: unstructuredMap}
if len(nsID) != 0 {
unstructureObj.SetNamespace(nsID)
}
cloud = models.Cloud{
TaskId: taskId,
ApiVersion: unstructureObj.GetAPIVersion(),
Name: unstructureObj.GetName(),
Kind: unstructureObj.GetKind(),
Namespace: unstructureObj.GetNamespace(),
Status: "Saved",
}
// 命名空间为空 设置默认值
if len(unstructureObj.GetNamespace()) == 0 {
cloud.Namespace = "default"
}
//unstructureObj转成string
unString, _ := unstructureObj.MarshalJSON()
cloud.YamlString = string(unString)
} }
return cloud utils.Convert(task.Metadata, &c)
return c, nil
} }
func (cs *CloudScheduler) genTaskAndProviders() (*providerPricing.Task, []*providerPricing.Provider, error) { func (as *CloudScheduler) PickOptimalStrategy() (strategy.Strategy, error) {
proParams, err := cs.storage.GetProviderParams() if len(as.option.ClusterIds) == 1 {
return &strategy.SingleAssignment{Cluster: &strategy.AssignedCluster{ClusterId: as.option.ClusterIds[0], Replicas: 1}}, nil
}
resources, err := as.findClustersWithResources()
if err != nil { if err != nil {
return nil, nil, nil return nil, err
} }
var providerList []*providerPricing.Provider if len(resources) == 0 {
for _, p := range proParams { return nil, errors.New("no cluster has resources")
provider := providerPricing.NewProvider(p.Participant_id, p.Cpu_avail, p.Mem_avail, p.Disk_avail, 0.0, 0.0, 0.0)
providerList = append(providerList, provider)
} }
//replicas := task.Metadata.(map[string]interface{})["spec"].(map[string]interface{})["replicas"].(float64) if len(resources) == 1 {
//t := algorithm.NewTask(0, int(replicas), 2, 75120000, 301214500, 1200, 2, 6, 2000) var cluster strategy.AssignedCluster
cluster.ClusterId = resources[0].ClusterId
cluster.Replicas = 1
return &strategy.SingleAssignment{Cluster: &cluster}, nil
}
return nil, providerList, nil params := &param.Params{Resources: resources}
switch as.option.Strategy {
case strategy.REPLICATION:
var clusterIds []string
for _, resource := range resources {
clusterIds = append(clusterIds, resource.ClusterId)
}
strategy := strategy.NewReplicationStrategy(clusterIds, as.option.Replica)
return strategy, nil
case strategy.RESOURCES_PRICING:
strategy := strategy.NewPricingStrategy(&param.ResourcePricingParams{Params: params, Replicas: as.option.Replica})
return strategy, nil
case strategy.DYNAMIC_RESOURCES:
strategy := strategy.NewDynamicResourcesStrategy(params.Resources, as.option, 1)
return strategy, nil
case strategy.STATIC_WEIGHT:
//todo resources should match cluster StaticWeightMap
strategy := strategy.NewStaticWeightStrategy(as.option.StaticWeightMap, as.option.Replica)
return strategy, nil
}
return nil, errors.New("no strategy has been chosen")
} }
func (cs *CloudScheduler) AssignTask(clusters []*strategy.AssignedCluster) (interface{}, error) { func (as *CloudScheduler) AssignTask(clusters []*strategy.AssignedCluster) (interface{}, error) {
return nil, nil if clusters == nil {
return nil, errors.New("clusters is nil")
}
for i := len(clusters) - 1; i >= 0; i-- {
if clusters[i].Replicas == 0 {
clusters = append(clusters[:i], clusters[i+1:]...)
}
}
if len(clusters) == 0 {
return nil, errors.New("clusters is nil")
}
var results []*CloudResult
for _, cluster := range clusters {
cName := ""
as.dbEngin.Table("t_cluster").Select("name").Where("id=?", cluster.ClusterId).Find(&cName)
cr := CloudResult{
ClusterId: cluster.ClusterId,
ClusterName: cName,
Replica: cluster.Replicas,
}
cr.ClusterId = cluster.ClusterId
cr.Replica = cluster.Replicas
cr.ClusterName = cName
results = append(results, &cr)
}
return results, nil
}
func (as *CloudScheduler) findClustersWithResources() ([]*collector.ResourceStats, error) {
resp := []*collector.ResourceStats{}
//查询集群资源信息
var rMetrics []tracker.Metric
metrics := []string{"cluster_cpu_utilisation", "cluster_cpu_avail", "cluster_cpu_total", "cluster_memory_total", "cluster_memory_avail", "cluster_memory_utilisation", "cluster_disk_utilisation", "cluster_disk_avail", "cluster_disk_total", "cluster_pod_utilisation"}
var clusterNames []string
as.dbEngin.Table("t_cluster").Select("name").Where("id in ?", as.option.ClusterIds).Find(&clusterNames)
for _, c := range clusterNames {
rMetrics = as.promClient.GetNamedMetrics(metrics, time.Now(), tracker.ClusterOption{ClusterName: c})
r := collector.ResourceStats{}
var cid string
as.dbEngin.Table("t_cluster").Select("id").Where("name = ?", c).Find(&cid)
r.ClusterId = cid
r.Name = c
for _, metric := range rMetrics {
if metric.MetricName == "cluster_cpu_total" {
r.CpuCoreTotal = int64(metric.MetricData.MetricValues[0].Sample.Value())
}
if metric.MetricName == "cluster_cpu_avail" {
cpuAvail := metric.MetricData.MetricValues[0].Sample.Value()
r.CpuCoreAvail = int64(math.Round(cpuAvail))
}
if metric.MetricName == "cluster_memory_total" {
r.MemTotal = metric.MetricData.MetricValues[0].Sample.Value()
}
if metric.MetricName == "cluster_memory_avail" {
r.MemAvail = metric.MetricData.MetricValues[0].Sample.Value()
}
if metric.MetricName == "cluster_disk_total" {
r.DiskTotal = metric.MetricData.MetricValues[0].Sample.Value()
}
if metric.MetricName == "cluster_disk_avail" {
r.DiskAvail = metric.MetricData.MetricValues[0].Sample.Value()
}
}
resp = append(resp, &r)
}
return resp, nil
} }

View File

@ -4,6 +4,7 @@ type AiOption struct {
AdapterId string AdapterId string
ClusterIds []string ClusterIds []string
TaskName string TaskName string
Replica int64
ResourceType string // cpu/gpu/compute card ResourceType string // cpu/gpu/compute card
CpuCoreNum int64 CpuCoreNum int64
TaskType string // pytorch/tensorflow/mindspore TaskType string // pytorch/tensorflow/mindspore

View File

@ -1,7 +1,13 @@
package option package option
type CloudOption struct { type CloudOption struct {
task interface{} Name string `json:"name"`
AdapterIds []string `json:"adapterIds"`
ClusterIds []string `json:"clusterIds"`
Strategy string `json:"strategy"`
StaticWeightMap map[string]int32 `json:"staticWeightMap,optional"`
ReqBody []string `json:"reqBody"`
Replica int32 `json:"replicas,string"`
} }
func (c CloudOption) GetOptionType() string { func (c CloudOption) GetOptionType() string {

View File

@ -4,6 +4,7 @@ const (
AI = "ai" AI = "ai"
CLOUD = "cloud" CLOUD = "cloud"
HPC = "hpc" HPC = "hpc"
VM = "vm"
) )
type Option interface { type Option interface {

View File

@ -0,0 +1,49 @@
package option
import "time"
type VmOption struct {
AdapterId string
ClusterIds []string
TaskName string
ResourceType string // cpu/gpu/compute card
TaskType string // pytorch/tensorflow/mindspore
Strategy string
ClusterToStaticWeight map[string]int32
CommitTime time.Time
NsID string
Replicas int64
MatchLabels map[string]string
StaticWeightMap map[string]int32
CreateMulServer []CreateMulDomainServer
Id int64
ParticipantId int64
TaskId int64
Name string
ClusterId int64
FlavorRef string
ImageRef string
Status string
Platform string
Description string
AvailabilityZone string
MinCount int64
Uuid string
StartTime string
RunningTime string
Result string
DeletedAt string
}
type CreateMulDomainServer struct {
Platform string
Name string
Min_count int64
ImageRef string
FlavorRef string
Uuid string
}
func (a VmOption) GetOptionType() string {
return VM
}

View File

@ -1,29 +1,96 @@
package schedulers package schedulers
import ( import (
"context"
"github.com/pkg/errors"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/algorithm/providerPricing" "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/algorithm/providerPricing"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/database" "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/database"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/schedulers/option"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/strategy" "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/strategy"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/strategy/param"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/pkg/response" "gitlink.org.cn/JointCloud/pcm-coordinator/api/pkg/response"
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/constants" "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/constants"
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/models" "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/models"
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/tracker"
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/utils" "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/utils"
"gorm.io/gorm"
) )
type VmScheduler struct { type VmScheduler struct {
storage database.Storage yamlString string
storage database.Storage
task *response.TaskInfo
*scheduler.Scheduler
option *option.VmOption
ctx context.Context
promClient tracker.Prometheus
dbEngin *gorm.DB
} }
func NewVmScheduler() *VmScheduler { type VmResult struct {
return &VmScheduler{} TaskId string
ClusterId string
ClusterName string
Strategy string
Replica int32
Msg string
} }
func NewVmScheduler(ctx context.Context, val string, scheduler *scheduler.Scheduler, option *option.VmOption, dbEngin *gorm.DB, promClient tracker.Prometheus) (*VmScheduler, error) {
return &VmScheduler{ctx: ctx, yamlString: val, Scheduler: scheduler, option: option, dbEngin: dbEngin, promClient: promClient}, nil
}
/*func NewCloudScheduler(ctx context.Context, val string, scheduler *scheduler.Scheduler, option *option.CloudOption, dbEngin *gorm.DB, promClient tracker.Prometheus) (*CloudScheduler, error) {
return &CloudScheduler{ctx: ctx, yamlString: val, Scheduler: scheduler, option: option, dbEngin: dbEngin, promClient: promClient}, nil
}*/
func (vm *VmScheduler) PickOptimalStrategy() (strategy.Strategy, error) { func (vm *VmScheduler) PickOptimalStrategy() (strategy.Strategy, error) {
//获取所有计算中心 if len(vm.option.ClusterIds) == 1 {
//调度算法 // TODO database operation Find
strategy := strategy.NewPricingStrategy(&param.ResourcePricingParams{}) return &strategy.SingleAssignment{Cluster: &strategy.AssignedCluster{ClusterId: vm.option.ClusterIds[0], Replicas: 1}}, nil
return strategy, nil }
//resources, err := vm.findClustersWithResources()
/* if err != nil {
return nil, err
}*/
/* if len(resources) == 0 {
return nil, errors.New("no cluster has resources")
}*/
//
//if len(resources) == 1 {
// var cluster strategy.AssignedCluster
// cluster.ClusterId = resources[0].ClusterId
// cluster.Replicas = 1
// return &strategy.SingleAssignment{Cluster: &cluster}, nil
//}
//params := &param.Params{Resources: resources}
switch vm.option.Strategy {
/* case strategy.REPLICATION:
var clusterIds []string
for _, resource := range resources {
clusterIds = append(clusterIds, resource.ClusterId)
}
strategy := strategy.NewReplicationStrategy(clusterIds, 1)
return strategy, nil
case strategy.RESOURCES_PRICING:
strategy := strategy.NewPricingStrategy(&param.ResourcePricingParams{Params: params, Replicas: 1})
return strategy, nil
case strategy.DYNAMIC_RESOURCES:
strategy := strategy.NewDynamicResourcesStrategy(params.Resources, vm.option, 1)
return strategy, nil*/
case strategy.STATIC_WEIGHT:
//todo resources should match cluster StaticWeightMap
strategy := strategy.NewStaticWeightStrategy(vm.option.ClusterToStaticWeight, 1)
return strategy, nil
}
/*strategy := strategy.NewPricingStrategy(&param.ResourcePricingParams{})
return strategy, nil*/
return nil, errors.New("no strategy has been chosen")
} }
func (v *VmScheduler) GetNewStructForDb(task *response.TaskInfo, resource string, participantId int64) (interface{}, error) { func (v *VmScheduler) GetNewStructForDb(task *response.TaskInfo, resource string, participantId int64) (interface{}, error) {
@ -41,12 +108,6 @@ func (v *VmScheduler) GetNewStructForDb(task *response.TaskInfo, resource string
vm.ParticipantId = participantId*/ vm.ParticipantId = participantId*/
} }
/*
func (vm *VmScheduler) UnMarshalVmStruct(yamlString string, taskId int64, nsID string) models.vm {
var vm models.Vm
vm := kyaml.NewYAMLOrJSONDecoder(bytes.NewBufferString(yamlString), 4096)
}
*/
func (vm *VmScheduler) genTaskAndProviders() (*providerPricing.Task, []*providerPricing.Provider, error) { func (vm *VmScheduler) genTaskAndProviders() (*providerPricing.Task, []*providerPricing.Provider, error) {
proParams, err := vm.storage.GetProviderParams() proParams, err := vm.storage.GetProviderParams()
if err != nil { if err != nil {
@ -64,7 +125,38 @@ func (vm *VmScheduler) genTaskAndProviders() (*providerPricing.Task, []*provider
return nil, providerList, nil return nil, providerList, nil
} }
func (v VmScheduler) AssignTask(clusters []*strategy.AssignedCluster) (interface{}, error) { func (as *VmScheduler) AssignTask(clusters []*strategy.AssignedCluster) (interface{}, error) {
//TODO implement me //TODO implement me
panic("implement me") if clusters == nil {
return nil, errors.New("clusters is nil")
}
for i := len(clusters) - 1; i >= 0; i-- {
if clusters[i].Replicas == 0 {
clusters = append(clusters[:i], clusters[i+1:]...)
}
}
if len(clusters) == 0 {
return nil, errors.New("clusters is nil")
}
var results []*VmResult
for _, cluster := range clusters {
cName := ""
as.dbEngin.Table("t_cluster").Select("name").Where("id=?", cluster.ClusterId).Find(&cName)
cr := VmResult{
ClusterId: cluster.ClusterId,
ClusterName: cName,
Replica: cluster.Replicas,
}
cr.ClusterId = cluster.ClusterId
cr.Replica = cluster.Replicas
cr.ClusterName = cName
results = append(results, &cr)
}
return results, nil
} }

View File

@ -7,6 +7,9 @@ type AiCollector interface {
GetDatasetsSpecs(ctx context.Context) ([]*DatasetsSpecs, error) GetDatasetsSpecs(ctx context.Context) ([]*DatasetsSpecs, error)
GetAlgorithms(ctx context.Context) ([]*Algorithm, error) GetAlgorithms(ctx context.Context) ([]*Algorithm, error)
GetTrainingTaskLog(ctx context.Context, taskId string, instanceNum string) (string, error) GetTrainingTaskLog(ctx context.Context, taskId string, instanceNum string) (string, error)
GetTrainingTask(ctx context.Context, taskId string) (*Task, error)
DownloadAlgorithmCode(ctx context.Context, resourceType string, taskType string, dataset string, algorithm string) (string, error)
UploadAlgorithmCode(ctx context.Context, resourceType string, taskType string, dataset string, algorithm string, code string) error
} }
type ResourceStats struct { type ResourceStats struct {
@ -19,6 +22,7 @@ type ResourceStats struct {
DiskAvail float64 DiskAvail float64
DiskTotal float64 DiskTotal float64
GpuAvail int64 GpuAvail int64
GpuTotal int64
CardsAvail []*Card CardsAvail []*Card
CpuCoreHours float64 CpuCoreHours float64
Balance float64 Balance float64
@ -43,3 +47,10 @@ type Algorithm struct {
Platform string Platform string
TaskType string TaskType string
} }
type Task struct {
Id string
Start string
End string
Status string
}

View File

@ -162,10 +162,22 @@ func (m *ModelArtsLink) GetAlgorithms(ctx context.Context) ([]*collector.Algorit
return nil, nil return nil, nil
} }
func (m *ModelArtsLink) DownloadAlgorithmCode(ctx context.Context, resourceType string, taskType string, dataset string, algorithm string) (string, error) {
return "", nil
}
func (m *ModelArtsLink) UploadAlgorithmCode(ctx context.Context, resourceType string, taskType string, dataset string, algorithm string, code string) error {
return nil
}
func (m *ModelArtsLink) GetTrainingTaskLog(ctx context.Context, taskId string, instanceNum string) (string, error) { func (m *ModelArtsLink) GetTrainingTaskLog(ctx context.Context, taskId string, instanceNum string) (string, error) {
return "", nil return "", nil
} }
func (m *ModelArtsLink) GetTrainingTask(ctx context.Context, taskId string) (*collector.Task, error) {
return nil, nil
}
func (m *ModelArtsLink) Execute(ctx context.Context, option *option.AiOption) (interface{}, error) { func (m *ModelArtsLink) Execute(ctx context.Context, option *option.AiOption) (interface{}, error) {
err := m.GenerateSubmitParams(ctx, option) err := m.GenerateSubmitParams(ctx, option)
if err != nil { if err != nil {

View File

@ -19,12 +19,14 @@ import (
"errors" "errors"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/schedulers/option" "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/schedulers/option"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/service/collector" "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/service/collector"
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/constants"
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/utils" "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/utils"
"gitlink.org.cn/JointCloud/pcm-octopus/octopus" "gitlink.org.cn/JointCloud/pcm-octopus/octopus"
"gitlink.org.cn/JointCloud/pcm-octopus/octopusclient" "gitlink.org.cn/JointCloud/pcm-octopus/octopusclient"
"math" "math"
"strconv" "strconv"
"strings" "strings"
"time"
) )
type OctopusLink struct { type OctopusLink struct {
@ -337,6 +339,14 @@ func (o *OctopusLink) GetAlgorithms(ctx context.Context) ([]*collector.Algorithm
return algorithms, nil return algorithms, nil
} }
func (o *OctopusLink) DownloadAlgorithmCode(ctx context.Context, resourceType string, taskType string, dataset string, algorithm string) (string, error) {
return "", nil
}
func (o *OctopusLink) UploadAlgorithmCode(ctx context.Context, resourceType string, taskType string, dataset string, algorithm string, code string) error {
return nil
}
func (o *OctopusLink) GetTrainingTaskLog(ctx context.Context, taskId string, instanceNum string) (string, error) { func (o *OctopusLink) GetTrainingTaskLog(ctx context.Context, taskId string, instanceNum string) (string, error) {
instance, err := strconv.ParseInt(instanceNum, 10, 32) instance, err := strconv.ParseInt(instanceNum, 10, 32)
if err != nil { if err != nil {
@ -356,6 +366,35 @@ func (o *OctopusLink) GetTrainingTaskLog(ctx context.Context, taskId string, ins
return resp.Content, nil return resp.Content, nil
} }
func (o *OctopusLink) GetTrainingTask(ctx context.Context, taskId string) (*collector.Task, error) {
resp, err := o.QueryTask(ctx, taskId)
if err != nil {
return nil, err
}
jobresp := (resp).(*octopus.GetTrainJobResp)
if !jobresp.Success {
return nil, errors.New(jobresp.Error.Message)
}
var task collector.Task
task.Id = jobresp.Payload.TrainJob.Id
task.Start = time.Unix(jobresp.Payload.TrainJob.StartedAt, 0).Format(constants.Layout)
task.End = time.Unix(jobresp.Payload.TrainJob.CompletedAt, 0).Format(constants.Layout)
switch jobresp.Payload.TrainJob.Status {
case "succeeded":
task.Status = constants.Completed
case "failed":
task.Status = constants.Failed
case "running":
task.Status = constants.Running
case "stopped":
task.Status = constants.Stopped
default:
task.Status = "undefined"
}
return &task, nil
}
func (o *OctopusLink) Execute(ctx context.Context, option *option.AiOption) (interface{}, error) { func (o *OctopusLink) Execute(ctx context.Context, option *option.AiOption) (interface{}, error) {
err := o.GenerateSubmitParams(ctx, option) err := o.GenerateSubmitParams(ctx, option)
if err != nil { if err != nil {

View File

@ -447,6 +447,14 @@ func (s *ShuguangAi) GetAlgorithms(ctx context.Context) ([]*collector.Algorithm,
return algorithms, nil return algorithms, nil
} }
func (s *ShuguangAi) DownloadAlgorithmCode(ctx context.Context, resourceType string, taskType string, dataset string, algorithm string) (string, error) {
return "", nil
}
func (s *ShuguangAi) UploadAlgorithmCode(ctx context.Context, resourceType string, taskType string, dataset string, algorithm string, code string) error {
return nil
}
func (s *ShuguangAi) GetTrainingTaskLog(ctx context.Context, taskId string, instanceNum string) (string, error) { func (s *ShuguangAi) GetTrainingTaskLog(ctx context.Context, taskId string, instanceNum string) (string, error) {
req := &hpcAC.GetInstanceLogReq{ req := &hpcAC.GetInstanceLogReq{
TaskId: taskId, TaskId: taskId,
@ -465,6 +473,24 @@ func (s *ShuguangAi) GetTrainingTaskLog(ctx context.Context, taskId string, inst
return resp.Data.Content, nil return resp.Data.Content, nil
} }
func (s *ShuguangAi) GetTrainingTask(ctx context.Context, taskId string) (*collector.Task, error) {
resp, err := s.QueryTask(ctx, taskId)
if err != nil {
return nil, err
}
jobresp := (resp).(*hpcAC.GetPytorchTaskResp)
if jobresp.Code != "0" {
return nil, errors.New(jobresp.Msg)
}
var task collector.Task
task.Id = jobresp.Data.Id
task.Start = jobresp.Data.StartTime
task.End = jobresp.Data.EndTime
task.Status = jobresp.Data.Status
return &task, nil
}
func (s *ShuguangAi) Execute(ctx context.Context, option *option.AiOption) (interface{}, error) { func (s *ShuguangAi) Execute(ctx context.Context, option *option.AiOption) (interface{}, error) {
err := s.GenerateSubmitParams(ctx, option) err := s.GenerateSubmitParams(ctx, option)
if err != nil { if err != nil {

View File

@ -141,23 +141,13 @@ type Region struct {
} }
type GeneralTaskReq struct { type GeneralTaskReq struct {
Name string `json:"name"` Name string `json:"name"`
ComputeType string `json:"computeType"` AdapterIds []string `json:"adapterIds"`
TemplateId string `json:"templateId"` ClusterIds []string `json:"clusterIds"`
AdapterId string `json:"adapterId"` Strategy string `json:"strategy"`
ClusterIds []string `json:"clusterIds"` StaticWeightMap map[string]int32 `json:"staticWeightMap,optional"`
Strategy Strategy `json:"strategy"` ReqBody []string `json:"reqBody"`
ReqBody []string `json:"reqBody"` Replicas int64 `json:"replicas,string"`
}
type Strategy struct {
Name string `json:"name"`
StaticWeightList []StaticWeightList `json:"staticWeightList"`
}
type StaticWeightList struct {
ClusterName string `json:"clusterName"`
Weight int `json:"weight"`
} }
type DeleteTaskReq struct { type DeleteTaskReq struct {
@ -192,13 +182,20 @@ type TaskYaml struct {
} }
type CommitVmTaskReq struct { type CommitVmTaskReq struct {
Name string `json:"name"`
NsID string `json:"nsID"`
Replicas int64 `json:"replicas,optional"`
MatchLabels map[string]string `json:"matchLabels,optional"`
AdapterId string `json:"adapterId,optional"`
ClusterType string `json:"clusterType,optional"`
CreateMulServer []CreateMulDomainServer `json:"createMulServer,optional"` CreateMulServer []CreateMulDomainServer `json:"createMulServer,optional"`
VmOption *VmOption `json:"vmOption,optional"`
}
type VmOption struct {
AdapterId string `json:"adapterId"`
VmClusterIds []string `json:"vmClusterIds"`
Replicas int64 `json:"replicas,optional"`
Name string `json:"name"`
Strategy string `json:"strategy"`
ClusterToStaticWeight map[string]int32 `json:"clusterToStaticWeight"`
MatchLabels map[string]string `json:"matchLabels,optional"`
StaticWeightMap map[string]int32 `json:"staticWeightMap,optional"`
CreateMulServer []CreateMulDomainServer `json:"createMulServer,optional"`
} }
type CreateMulDomainServer struct { type CreateMulDomainServer struct {
@ -208,12 +205,20 @@ type CreateMulDomainServer struct {
ImageRef string `json:"imageRef,optional"` ImageRef string `json:"imageRef,optional"`
FlavorRef string `json:"flavorRef,optional"` FlavorRef string `json:"flavorRef,optional"`
Uuid string `json:"uuid,optional"` Uuid string `json:"uuid,optional"`
ClusterId string `json:"clusterId,optional"`
} }
type CommitVmTaskResp struct { type CommitVmTaskResp struct {
TaskId int64 `json:"taskId"` Code int32 `json:"code"`
Code int32 `json:"code"` Msg string `json:"msg"`
Msg string `json:"msg"` }
type ScheduleVmResult struct {
ClusterId string `json:"clusterId"`
TaskId string `json:"taskId"`
Strategy string `json:"strategy"`
Replica int32 `json:"replica"`
Msg string `json:"msg"`
} }
type VmTask struct { type VmTask struct {
@ -298,22 +303,23 @@ type PageTaskReq struct {
} }
type TaskModel struct { type TaskModel struct {
Id int64 `json:"id,omitempty" db:"id"` // id Id int64 `json:"id,omitempty,string" db:"id"` // id
Name string `json:"name,omitempty" db:"name"` // 作业名称 Name string `json:"name,omitempty" db:"name"` // 作业名称
Description string `json:"description,omitempty" db:"description"` // 作业描述 Description string `json:"description,omitempty" db:"description"` // 作业描述
Status string `json:"status,omitempty" db:"status"` // 作业状态 Status string `json:"status,omitempty" db:"status"` // 作业状态
Strategy int64 `json:"strategy" db:"strategy"` // 策略 Strategy int64 `json:"strategy" db:"strategy"` // 策略
SynergyStatus int64 `json:"synergyStatus" db:"synergy_status"` // 协同状态0-未协同、1-已协同) SynergyStatus int64 `json:"synergyStatus" db:"synergy_status"` // 协同状态0-未协同、1-已协同)
CommitTime string `json:"commitTime,omitempty" db:"commit_time"` // 提交时间 CommitTime string `json:"commitTime,omitempty" db:"commit_time"` // 提交时间
StartTime string `json:"startTime,omitempty" db:"start_time"` // 开始时间 StartTime string `json:"startTime,omitempty" db:"start_time"` // 开始时间
EndTime string `json:"endTime,omitempty" db:"end_time"` // 结束运行时间 EndTime string `json:"endTime,omitempty" db:"end_time"` // 结束运行时间
RunningTime int64 `json:"runningTime" db:"running_time"` // 已运行时间(单位秒) RunningTime int64 `json:"runningTime" db:"running_time"` // 已运行时间(单位秒)
YamlString string `json:"yamlString,omitempty" db:"yaml_string"` YamlString string `json:"yamlString,omitempty" db:"yaml_string"`
Result string `json:"result,omitempty" db:"result"` // 作业结果 Result string `json:"result,omitempty" db:"result"` // 作业结果
DeletedAt string `json:"deletedAt,omitempty" gorm:"index" db:"deleted_at"` DeletedAt string `json:"deletedAt,omitempty" gorm:"index" db:"deleted_at"`
NsID string `json:"nsId,omitempty" db:"ns_id"` NsID string `json:"nsId,omitempty" db:"ns_id"`
TenantId string `json:"tenantId,omitempty" db:"tenant_id"` TenantId string `json:"tenantId,omitempty" db:"tenant_id"`
CreateTime string `json:"createTime,omitempty" db:"create_time" gorm:"autoCreateTime"` CreateTime string `json:"createTime,omitempty" db:"create_time" gorm:"autoCreateTime"`
AdapterTypeDict int `json:"adapterTypeDict" db:"create_time" gorm:"adapter_type_dict"` //任务类型(对应字典表的值
} }
type TaskDetailReq struct { type TaskDetailReq struct {
@ -970,9 +976,9 @@ type HpcInfo struct {
Environment string `json:"environment"` Environment string `json:"environment"`
DeletedFlag int64 `json:"deleted_flag"` // 是否删除0-否1-是) DeletedFlag int64 `json:"deleted_flag"` // 是否删除0-否1-是)
CreatedBy int64 `json:"created_by"` // 创建人 CreatedBy int64 `json:"created_by"` // 创建人
CreatedTime string `json:"created_time"` // 创建时间 CreateTime string `json:"created_time"` // 创建时间
UpdatedBy int64 `json:"updated_by"` // 更新人 UpdatedBy int64 `json:"updated_by"` // 更新人
UpdatedTime string `json:"updated_time"` // 更新时间 UpdateTime string `json:"updated_time"` // 更新时间
} }
type CloudInfo struct { type CloudInfo struct {
@ -1114,7 +1120,17 @@ type TaskStatusResp struct {
Succeeded int `json:"Succeeded"` Succeeded int `json:"Succeeded"`
Failed int `json:"Failed"` Failed int `json:"Failed"`
Running int `json:"Running"` Running int `json:"Running"`
Pause int `json:"Pause"` Saved int `json:"Saved"`
}
type TaskDetailsResp struct {
Name string `json:"name"`
Description string `json:"description"`
StartTime string `json:"startTime"`
EndTime string `json:"endTime"`
Strategy int64 `json:"strategy"`
SynergyStatus int64 `json:"synergyStatus"`
ClusterInfos []*ClusterInfo `json:"clusterInfos"`
} }
type CommitHpcTaskReq struct { type CommitHpcTaskReq struct {
@ -2765,6 +2781,43 @@ type Nfs struct {
ReadOnly bool `json:"readOnly,optional"` ReadOnly bool `json:"readOnly,optional"`
} }
type CenterOverviewResp struct {
CenterNum int32 `json:"totalCenters,optional"`
TaskNum int32 `json:"totalTasks,optional"`
CardNum int32 `json:"totalCards,optional"`
PowerInTops float64 `json:"totalPower,optional"`
}
type CenterQueueingResp struct {
Current []*CenterQueue `json:"current,optional"`
History []*CenterQueue `json:"history,optional"`
}
type CenterQueue struct {
Name string `json:"name,optional"`
QueueingNum int32 `json:"num,optional"`
}
type CenterListResp struct {
List []*AiCenter `json:"centerList,optional"`
}
type AiCenter struct {
Name string `json:"name,optional"`
StackName string `json:"stack,optional"`
Version string `json:"version,optional"`
}
type CenterTaskListResp struct {
List []*AiTask `json:"taskList,optional"`
}
type AiTask struct {
Name string `json:"name,optional"`
Status string `json:"status,optional"`
TimeElapsed int32 `json:"elapsed,optional"`
}
type StorageScreenReq struct { type StorageScreenReq struct {
} }
@ -5347,9 +5400,9 @@ type TenantInfo struct {
Type int64 `json:"type"` // 租户所属(0数算1超算2智算 Type int64 `json:"type"` // 租户所属(0数算1超算2智算
DeletedFlag int64 `json:"deletedFlag"` // 是否删除 DeletedFlag int64 `json:"deletedFlag"` // 是否删除
CreatedBy int64 `json:"createdBy"` // 创建人 CreatedBy int64 `json:"createdBy"` // 创建人
CreatedTime string `json:"createdTime"` // 创建时间 CreateTime string `json:"createdTime"` // 创建时间
UpdatedBy int64 `json:"updatedBy"` // 更新人 UpdatedBy int64 `json:"updatedBy"` // 更新人
UpdatedTime string `json:"updated_time"` // 更新时间 UpdateTime string `json:"updated_time"` // 更新时间
} }
type UpdateTenantReq struct { type UpdateTenantReq struct {
@ -5403,7 +5456,7 @@ type Cloud struct {
StartTime string `json:"startTime"` // 开始时间 StartTime string `json:"startTime"` // 开始时间
RunningTime int64 `json:"runningTime"` // 运行时长 RunningTime int64 `json:"runningTime"` // 运行时长
CreatedBy int64 `json:"createdBy"` // 创建人 CreatedBy int64 `json:"createdBy"` // 创建人
CreatedTime string `json:"createdTime"` // 创建时间 CreateTime string `json:"createdTime"` // 创建时间
Result string `json:"result"` Result string `json:"result"`
} }
@ -5546,6 +5599,9 @@ type ScheduleResult struct {
Msg string `json:"msg"` Msg string `json:"msg"`
} }
type ScheduleOverviewResp struct {
}
type AiOption struct { type AiOption struct {
TaskName string `json:"taskName"` TaskName string `json:"taskName"`
AdapterId string `json:"adapterId"` AdapterId string `json:"adapterId"`
@ -5604,6 +5660,22 @@ type AiJobLogResp struct {
Log string `json:"log"` Log string `json:"log"`
} }
type AiTaskDb struct {
Id string `json:"id,omitempty" db:"id"`
TaskId string `json:"taskId,omitempty" db:"task_id"`
AdapterId string `json:"adapterId,omitempty" db:"adapter_id"`
ClusterId string `json:"clusterId,omitempty" db:"cluster_id"`
Name string `json:"name,omitempty" db:"name"`
Replica string `json:"replica,omitempty" db:"replica"`
ClusterTaskId string `json:"clusterTaskId,omitempty" db:"c_task_id"`
Strategy string `json:"strategy,omitempty" db:"strategy"`
Status string `json:"status,omitempty" db:"status"`
Msg string `json:"msg,omitempty" db:"msg"`
CommitTime string `json:"commitTime,omitempty" db:"commit_time"`
StartTime string `json:"startTime,omitempty" db:"start_time"`
EndTime string `json:"endTime,omitempty" db:"end_time"`
}
type CreateAlertRuleReq struct { type CreateAlertRuleReq struct {
CLusterId string `json:"clusterId"` CLusterId string `json:"clusterId"`
ClusterName string `json:"clusterName"` ClusterName string `json:"clusterName"`
@ -5680,3 +5752,25 @@ type AdapterInfoResp struct {
Name string `json:"name"` Name string `json:"name"`
Version string `json:"version"` Version string `json:"version"`
} }
type ScheduleSituationResp struct {
Nodes []NodeRegion `json:"nodes"`
Links []Link `json:"links"`
Categories []Category `json:"categories"`
}
type NodeRegion struct {
Id string `json:"id"`
Name string `json:"name"`
Category int `json:"category"`
Value int `json:"value"`
}
type Link struct {
Source string `json:"source"`
Target string `json:"target"`
}
type Category struct {
Name string `json:"name"`
}

1159
deploy/pcm-auth.sql Normal file

File diff suppressed because one or more lines are too long

Binary file not shown.

2882
deploy/pcm.sql Normal file

File diff suppressed because it is too large Load Diff

View File

@ -1,41 +1,116 @@
## 1 安装部署kubekey ## 1 安装部署kubekey
通过以下的命令,可以下载 KubeKey 的最新版本。您可以更改命令中的版本号来下载特定的版本。 通过以下的命令,可以下载 KubeKey 的最新版本。您可以更改命令中的版本号来下载特定的版本。
```shell
```
export KKZONE=cn export KKZONE=cn
curl -sfL https://get-kk.kubesphere.io | VERSION=v3.0.7 sh - curl -sfL https://get-kk.kubesphere.io | VERSION=v3.0.7 sh -
``` ```
## 2 mysql部署及数据导入
#### 卸载已有的mariadb
`yum remove -y mariadb-server mariadb mariadb-libs`
#### 下载对应系统版本的mysql包
wget https://dev.mysql.com/get/Downloads/MySQL-8.0/mysql-8.0.36-1.el7.x86_64.rpm-bundle.tar
##### 解压
`tar -xvf mysql-8.0.36-1.el7.x86_64.rpm-bundle.tar`
##### 安装
```shell
rpm -ivh mysql-community-libs-8.0.36-1.el7.x86_64.rpm
rpm -ivh mysql-community-libs-compat-8.0.36-1.el7.x86_64.rpm
rpm -ivh mysql-community-client-plugins-8.0.36-1.el7.x86_64.rpm
rpm -ivh mysql-community-client-8.0.36-1.el7.x86_64.rpm
rpm -ivh mysql-community-common-8.0.36-1.el7.x86_64.rpm
rpm -ivh mysql-community-debuginfo-8.0.36-1.el7.x86_64.rpm
rpm -ivh mysql-community-devel-8.0.36-1.el7.x86_64.rpm
rpm -ivh mysql-community-server-8.0.36-1.el7.x86_64.rpm
```
##### 启动服务
`systemctl start mysqld`
##### 查看初始密码
`grep 'temporary password' /var/log/mysqld.log`
使用mysql -u root -p 登录
##### 修改密码
`ALTER USER 'root'@'localhost' IDENTIFIED BY 'Nudt!123';`
##### 配置外部访问
```sql
use mysql;
update user set host = '%' where user = 'root';
flush privileges;
```
##### 创建数据库
```sql
create database pcm;
create database pcm_auth;
```
##### 关闭防火墙
`systemctl stop firewalld`
##### 下载脚本
`wget -O pcm_auth.sql https://www.gitlink.org.cn/attachments/entries/get_file?download_url=https://www.gitlink.org.cn/api/JointCloud/pcm-coordinator/raw/deploy%2Fpcm-auth.sql?ref=master`
`wget -O pcm.sql https://www.gitlink.org.cn/attachments/entries/get_file?download_url=https://www.gitlink.org.cn/api/JointCloud/pcm-coordinator/raw/deploy%2Fpcm.sql?ref=master`
## 2 安装部署k8s集群 ##### 执行sql脚本导入数据
`mysql -u root -p pcm < pcm.sql`
`mysql -u root -p pcm_auth < pcm_auth.sql`
## 3 安装部署k8s集群
``` ```
./kk create cluster export KKZONE=cn
sudo ./kk create cluster
``` ```
执行可能会提示部分软件未安装直接yum安装即可 执行可能会提示部分软件未安装直接yum安装即可
![输入图片说明](/imgs/2024-04-28/qF082JVaumRARK1J.png) eg:
然后重新执行创建集群命令,执行成功后可以验证环境 `sudo yum install -y conntrack`
![输入图片说明](/imgs/2024-04-28/FoVNPbwm1pnt839Z.png) `sudo yum install -y socat`
![](/api/attachments/3f8b9884-03b3-4e84-b408-d2ec451a533b)
然后重新执行创建集群命令执行成功后可以执行kubectl get pod 验证环境
![](/api/attachments/2e282429-d3ae-4019-8280-d6409da50b80)
## 3 部署鉴权、pcm-coordinator、前端服务 ## 3 部署鉴权、pcm-coordinator、前端服务
### 3.1 yaml文件下载
pcm所有服务的yaml文件包下载地址在[这里](https://www.gitlink.org.cn/attachments/entries/get_file?download_url=https://www.gitlink.org.cn/api/JointCloud/pcm-coordinator/raw/deploy%2Fpcm-yaml.zip?ref=master "这里")
或者在服务器上直接执行
```shell
wget -O yaml.zip https://www.gitlink.org.cn/attachments/entries/get_file?download_url=https://www.gitlink.org.cn/api/JointCloud/pcm-coordinator/raw/deploy%2Fpcm-yaml.zip?ref=master
```
下载完成解压
```shell
unzip yaml.zip
```
### 3.2 yaml执行完成服务、负载、配置文件的部署
#### 修改地址
需要修改配置文件中的数据库地址为mysql服务安装的地址
yaml文件下载链接https://pan.baidu.com/s/1VU1zE2xcFkrz9Hz2MkgDaQ #### 一次性部署所有的文件
```shell
kubectl apply -f .
```
#### 或者单模块部署
##### 鉴权:
`kubectl apply -f pcm-auth.yaml`
##### C端
`kubectl apply -f pcm-core-api.yaml`
`kubectl apply -f pcm-core-rpc.yaml`
##### 前端:
`kubectl apply -f pcm-rip.yaml`
鉴权: 部署情况可以通过以下命令查看
kubectl apply -f pcm-auth.yaml `kubectl get pod`
C端 ![](/api/attachments/644de412-1155-4e07-a90d-367f63260a81)
kubectl apply -f pcm-core-api.yaml
kubectl apply -f pcm-core-rpc.yaml
前端:
kubectl apply -f pcm-rip.yaml
## 4 配置驱动器、集群信息 ## 4 配置驱动器、集群信息
此时前端服务可以通过服务器ip的31149端口访问到
默认账号密码为admin/Nudt@123
新建一个适配器配置成功后可以获取到对应的adapterId 新建一个适配器配置成功后可以获取到对应的adapterId
![输入图片说明](/imgs/2024-04-28/Dtu4KC835jSfcf5R.png) ![](/api/attachments/ad8e33d9-7155-4030-a813-227bb019c6e0)
将对应的id填写到对应的P端配置信息中(configmap 内容) 将对应的id填写到对应的P端配置信息中(configmap 内容)
![输入图片说明](/imgs/2024-04-28/zuFWMVKAycNlPXOF.png) ![](/api/attachments/f0d8ee8d-f94f-40c7-8785-58ce09c89ba0)
## 5 部署P端服务 ## 5 部署P端服务
P端 ### HPC服务端:
kubectl apply -f pcm-hpc.yaml kubectl apply -f pcm-hpc.yaml
### kubernetes适配器:
kubectl apply -f pcm-kubernetes.yaml kubectl apply -f pcm-kubernetes.yaml
## 7.系统使用 ## 7.系统使用

2
go.mod
View File

@ -24,7 +24,7 @@ require (
github.com/robfig/cron/v3 v3.0.1 github.com/robfig/cron/v3 v3.0.1
github.com/rs/zerolog v1.28.0 github.com/rs/zerolog v1.28.0
github.com/zeromicro/go-zero v1.6.3 github.com/zeromicro/go-zero v1.6.3
gitlink.org.cn/JointCloud/pcm-ac v0.0.0-20240420083915-58d6e2958aeb gitlink.org.cn/JointCloud/pcm-ac v0.0.0-20240426095603-549fefd8bece
gitlink.org.cn/JointCloud/pcm-kubernetes v0.0.0-20240301071143-347480abff2c gitlink.org.cn/JointCloud/pcm-kubernetes v0.0.0-20240301071143-347480abff2c
gitlink.org.cn/JointCloud/pcm-octopus v0.0.0-20240424085753-6899615e9142 gitlink.org.cn/JointCloud/pcm-octopus v0.0.0-20240424085753-6899615e9142
gitlink.org.cn/JointCloud/pcm-openstack v0.0.0-20240403033338-e7edabad4203 gitlink.org.cn/JointCloud/pcm-openstack v0.0.0-20240403033338-e7edabad4203

4
go.sum
View File

@ -1078,8 +1078,8 @@ github.com/yuin/gopher-lua v1.1.0/go.mod h1:GBR0iDaNXjAgGg9zfCvksxSRnQx76gclCIb7
github.com/zeromicro/go-zero v1.5.1/go.mod h1:bGYm4XWsGN9GhDsO2O2BngpVoWjf3Eog2a5hUOMhlXs= github.com/zeromicro/go-zero v1.5.1/go.mod h1:bGYm4XWsGN9GhDsO2O2BngpVoWjf3Eog2a5hUOMhlXs=
github.com/zeromicro/go-zero v1.6.3 h1:OL0NnHD5LdRNDolfcK9vUkJt7K8TcBE3RkzfM8poOVw= github.com/zeromicro/go-zero v1.6.3 h1:OL0NnHD5LdRNDolfcK9vUkJt7K8TcBE3RkzfM8poOVw=
github.com/zeromicro/go-zero v1.6.3/go.mod h1:XZL435ZxVi9MSXXtw2MRQhHgx6OoX3++MRMOE9xU70c= github.com/zeromicro/go-zero v1.6.3/go.mod h1:XZL435ZxVi9MSXXtw2MRQhHgx6OoX3++MRMOE9xU70c=
gitlink.org.cn/JointCloud/pcm-ac v0.0.0-20240420083915-58d6e2958aeb h1:k6mNEWKp+haQUaK2dWs/rI9OKgzJHY1/9KNKuBDN0Vw= gitlink.org.cn/JointCloud/pcm-ac v0.0.0-20240426095603-549fefd8bece h1:W3yBnvAVV8dlRNQKYD6Mf8ySRrYsP0tPk7JjvqZzNHQ=
gitlink.org.cn/JointCloud/pcm-ac v0.0.0-20240420083915-58d6e2958aeb/go.mod h1:w3Nb5TNymCItQ7K3x4Q0JLuoq9OerwAzAWT2zsPE9Xo= gitlink.org.cn/JointCloud/pcm-ac v0.0.0-20240426095603-549fefd8bece/go.mod h1:w3Nb5TNymCItQ7K3x4Q0JLuoq9OerwAzAWT2zsPE9Xo=
gitlink.org.cn/JointCloud/pcm-kubernetes v0.0.0-20240301071143-347480abff2c h1:2Wl/hvaSFjh6fmCSIQhjkr9llMRREQeqcXNLZ/HPY18= gitlink.org.cn/JointCloud/pcm-kubernetes v0.0.0-20240301071143-347480abff2c h1:2Wl/hvaSFjh6fmCSIQhjkr9llMRREQeqcXNLZ/HPY18=
gitlink.org.cn/JointCloud/pcm-kubernetes v0.0.0-20240301071143-347480abff2c/go.mod h1:lSRfGs+PxFvw7CcndHWRd6UlLlGrZn0b0hp5cfaMNGw= gitlink.org.cn/JointCloud/pcm-kubernetes v0.0.0-20240301071143-347480abff2c/go.mod h1:lSRfGs+PxFvw7CcndHWRd6UlLlGrZn0b0hp5cfaMNGw=
gitlink.org.cn/JointCloud/pcm-octopus v0.0.0-20240424085753-6899615e9142 h1:+po0nesBDSWsgCySBG7eEXk7i9Ytd58wqvjL1M9y6d8= gitlink.org.cn/JointCloud/pcm-octopus v0.0.0-20240424085753-6899615e9142 h1:+po0nesBDSWsgCySBG7eEXk7i9Ytd58wqvjL1M9y6d8=

View File

@ -26,4 +26,6 @@ const (
WaitRestart = "WaitRestart" WaitRestart = "WaitRestart"
WaitPause = "WaitPause" WaitPause = "WaitPause"
WaitStart = "WaitStart" WaitStart = "WaitStart"
Pending = "Pending"
Stopped = "Stopped"
) )

3
pkg/constants/time.go Normal file
View File

@ -0,0 +1,3 @@
package constants
const Layout = "2006-01-02 15:04:05"

View File

@ -6,9 +6,9 @@ import (
) )
type BaseModel struct { type BaseModel struct {
DeletedAt gorm.DeletedAt `gorm:"index;comment:删除时间" json:"-"` // 删除时间 DeletedAt gorm.DeletedAt `gorm:"index;comment:删除时间" json:"-"` // 删除时间
CreatedBy uint `gorm:"created_by;comment:创建人" json:"createdBy"` //创建人 CreatedBy uint `gorm:"created_by;comment:创建人" json:"createdBy"` //创建人
CreatedTime time.Time `gorm:"comment:创建时间" json:"-"` // 创建时间 CreateTime time.Time `gorm:"autoCreateTime:nano;comment:创建时间" json:"-"` // 创建时间
UpdatedBy uint `gorm:"updated_by;comment:更新人" json:"UpdatedBy"` //创建人 UpdatedBy uint `gorm:"updated_by;comment:更新人" json:"UpdatedBy"` //创建人
UpdatedTime time.Time `gorm:"comment:更新时间" json:"-"` // 更新时间 UpdateTime time.Time `gorm:"autoUpdateTime:nano;;comment:更新时间" json:"-"` // 更新时间
} }

View File

@ -6,18 +6,17 @@ import (
) )
type TaskCloudModel struct { type TaskCloudModel struct {
Id uint `json:"id" gorm:"primarykey;not null;comment:id"` Id uint `json:"id" gorm:"primarykey;not null;comment:id"`
TaskId uint `json:"taskId" gorm:"not null;comment:task表id"` TaskId uint `json:"taskId" gorm:"not null;comment:task表id"`
AdapterId uint `json:"adapterId" gorm:"not null;comment:适配器id"` AdapterId uint `json:"adapterId" gorm:"not null;comment:适配器id"`
ClusterId uint `json:"clusterId" gorm:"not null;comment:集群id"` ClusterId uint `json:"clusterId" gorm:"not null;comment:集群id"`
ClusterName string `json:"clusterName" gorm:"not null;comment:集群名称"` ClusterName string `json:"clusterName" gorm:"not null;comment:集群名称"`
Kind string `json:"kind" gorm:"comment:种类"` Kind string `json:"kind" gorm:"comment:种类"`
Status string `json:"status" gorm:"comment:状态"` Status string `json:"status" gorm:"comment:状态"`
StartTime time.Time `json:"startTime" gorm:"comment:开始时间"` StartTime *time.Time `json:"startTime,string" gorm:"comment:开始时间"`
YamlString string `json:"yamlString" gorm:"not null;comment:入参"` YamlString string `json:"yamlString" gorm:"not null;comment:入参"`
Result string `json:"result" gorm:"comment:运行结果"` Result string `json:"result" gorm:"comment:运行结果"`
Namespace string `json:"namespace" gorm:"comment:命名空间"` Namespace string `json:"namespace" gorm:"comment:命名空间"`
Replica int `json:"replica" gorm:"not null;comment:副本数"`
base.BaseModel base.BaseModel
} }

View File

@ -37,9 +37,9 @@ type File struct {
Status string `gorm:"column:status" json:"Status"` //type:string comment:hash version:2023-05-06 09:58 Status string `gorm:"column:status" json:"Status"` //type:string comment:hash version:2023-05-06 09:58
DeletedFlag *int `gorm:"column:deleted_flag" json:"DeletedFlag"` //type:*int comment:是否删除 version:2023-05-06 09:58 DeletedFlag *int `gorm:"column:deleted_flag" json:"DeletedFlag"` //type:*int comment:是否删除 version:2023-05-06 09:58
CreatedBy *int `gorm:"column:created_by" json:"CreatedBy"` //type:*int comment:创建人 version:2023-05-06 09:58 CreatedBy *int `gorm:"column:created_by" json:"CreatedBy"` //type:*int comment:创建人 version:2023-05-06 09:58
CreatedTime *time.Time `gorm:"column:created_time" json:"CreatedTime"` //type:*time.Time comment:创建时间 version:2023-05-06 09:58 CreatedTime *time.Time `gorm:"column:created_time" json:"CreateTime"` //type:*time.Time comment:创建时间 version:2023-05-06 09:58
UpdatedBy *int `gorm:"column:updated_by" json:"UpdatedBy"` //type:*int comment:更新人 version:2023-05-06 09:58 UpdatedBy *int `gorm:"column:updated_by" json:"UpdatedBy"` //type:*int comment:更新人 version:2023-05-06 09:58
UpdatedTime *time.Time `gorm:"column:updated_time" json:"UpdatedTime"` //type:*time.Time comment:更新时间 version:2023-05-06 09:58 UpdatedTime *time.Time `gorm:"column:updated_time" json:"UpdateTime"` //type:*time.Time comment:更新时间 version:2023-05-06 09:58
} }
// TableName 表名:data_set // TableName 表名:data_set

24
pkg/models/taskaimodel.go Normal file
View File

@ -0,0 +1,24 @@
package models
import "github.com/zeromicro/go-zero/core/stores/sqlx"
var _ TaskAiModel = (*customTaskAiModel)(nil)
type (
// TaskAiModel is an interface to be customized, add more methods here,
// and implement the added methods in customTaskAiModel.
TaskAiModel interface {
taskAiModel
}
customTaskAiModel struct {
*defaultTaskAiModel
}
)
// NewTaskAiModel returns a model for the database table.
func NewTaskAiModel(conn sqlx.SqlConn) TaskAiModel {
return &customTaskAiModel{
defaultTaskAiModel: newTaskAiModel(conn),
}
}

View File

@ -0,0 +1,104 @@
// Code generated by goctl. DO NOT EDIT.
package models
import (
"context"
"database/sql"
"fmt"
"strings"
"time"
"github.com/zeromicro/go-zero/core/stores/builder"
"github.com/zeromicro/go-zero/core/stores/sqlc"
"github.com/zeromicro/go-zero/core/stores/sqlx"
"github.com/zeromicro/go-zero/core/stringx"
)
var (
taskAiFieldNames = builder.RawFieldNames(&TaskAi{})
taskAiRows = strings.Join(taskAiFieldNames, ",")
taskAiRowsExpectAutoSet = strings.Join(stringx.Remove(taskAiFieldNames, "`id`", "`create_at`", "`create_time`", "`created_at`", "`update_at`", "`update_time`", "`updated_at`"), ",")
taskAiRowsWithPlaceHolder = strings.Join(stringx.Remove(taskAiFieldNames, "`id`", "`create_at`", "`create_time`", "`created_at`", "`update_at`", "`update_time`", "`updated_at`"), "=?,") + "=?"
)
type (
taskAiModel interface {
Insert(ctx context.Context, data *TaskAi) (sql.Result, error)
FindOne(ctx context.Context, id int64) (*TaskAi, error)
Update(ctx context.Context, data *TaskAi) error
Delete(ctx context.Context, id int64) error
}
defaultTaskAiModel struct {
conn sqlx.SqlConn
table string
}
TaskAi struct {
Id int64 `db:"id"` // id
TaskId int64 `db:"task_id"` // 任务id
AdapterId int64 `db:"adapter_id"` // 设配器id
ClusterId int64 `db:"cluster_id"` // 集群id
Name string `db:"name"` // 任务名
Replica int64 `db:"replica"` // 执行数
JobId string `db:"job_id"` // 集群返回任务id
Strategy string `db:"strategy"` // 主任务使用策略
Status string `db:"status"` // 任务状态
Msg string `db:"msg"` // 集群返回任务信息
CommitTime time.Time `db:"commit_time"` // 提交时间
StartTime string `db:"start_time"` // 开始时间
EndTime string `db:"end_time"` // 结束时间
TaskType string `db:"task_type"`
}
)
func newTaskAiModel(conn sqlx.SqlConn) *defaultTaskAiModel {
return &defaultTaskAiModel{
conn: conn,
table: "`task_ai`",
}
}
func (m *defaultTaskAiModel) withSession(session sqlx.Session) *defaultTaskAiModel {
return &defaultTaskAiModel{
conn: sqlx.NewSqlConnFromSession(session),
table: "`task_ai`",
}
}
func (m *defaultTaskAiModel) Delete(ctx context.Context, id int64) error {
query := fmt.Sprintf("delete from %s where `id` = ?", m.table)
_, err := m.conn.ExecCtx(ctx, query, id)
return err
}
func (m *defaultTaskAiModel) FindOne(ctx context.Context, id int64) (*TaskAi, error) {
query := fmt.Sprintf("select %s from %s where `id` = ? limit 1", taskAiRows, m.table)
var resp TaskAi
err := m.conn.QueryRowCtx(ctx, &resp, query, id)
switch err {
case nil:
return &resp, nil
case sqlc.ErrNotFound:
return nil, ErrNotFound
default:
return nil, err
}
}
func (m *defaultTaskAiModel) Insert(ctx context.Context, data *TaskAi) (sql.Result, error) {
query := fmt.Sprintf("insert into %s (%s) values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)", m.table, taskAiRowsExpectAutoSet)
ret, err := m.conn.ExecCtx(ctx, query, data.TaskId, data.AdapterId, data.ClusterId, data.Name, data.Replica, data.JobId, data.Strategy, data.Status, data.Msg, data.CommitTime, data.StartTime, data.EndTime, data.TaskType)
return ret, err
}
func (m *defaultTaskAiModel) Update(ctx context.Context, data *TaskAi) error {
query := fmt.Sprintf("update %s set %s where `id` = ?", m.table, taskAiRowsWithPlaceHolder)
_, err := m.conn.ExecCtx(ctx, query, data.TaskId, data.AdapterId, data.ClusterId, data.Name, data.Replica, data.JobId, data.Strategy, data.Status, data.Msg, data.CommitTime, data.StartTime, data.EndTime, data.TaskType, data.Id)
return err
}
func (m *defaultTaskAiModel) tableName() string {
return m.table
}

View File

@ -35,20 +35,21 @@ type (
} }
Task struct { Task struct {
Id int64 `db:"id"` // id Id int64 `db:"id"` // id
Name string `db:"name"` // 作业名称 Name string `db:"name"` // 作业名称
Description string `db:"description"` // 作业描述 Description string `db:"description"` // 作业描述
Status string `db:"status"` // 作业状态 Status string `db:"status"` // 作业状态
Strategy int64 `db:"strategy"` // 策略 Strategy int64 `db:"strategy"` // 策略
SynergyStatus int64 `db:"synergy_status"` // 协同状态0-未协同、1-已协同) SynergyStatus int64 `db:"synergy_status"` // 协同状态0-未协同、1-已协同)
CommitTime time.Time `db:"commit_time"` // 提交时间 CommitTime time.Time `db:"commit_time"` // 提交时间
StartTime string `db:"start_time"` // 开始时间 StartTime *time.Time `db:"start_time"` // 开始时间
EndTime string `db:"end_time"` // 结束运行时间 EndTime *time.Time `db:"end_time"` // 结束运行时间
RunningTime int64 `db:"running_time"` // 已运行时间(单位秒) RunningTime int64 `db:"running_time"` // 已运行时间(单位秒)
YamlString string `db:"yaml_string"` YamlString string `db:"yaml_string"`
Result string `db:"result"` // 作业结果 Result string `db:"result"` // 作业结果
DeletedAt gorm.DeletedAt `gorm:"index"` DeletedAt gorm.DeletedAt `gorm:"index"`
NsID string `db:"ns_id"` NsID string `db:"ns_id"`
AdapterTypeDict int `db:"adapter_type_dict"` //任务类型(对应字典表的值)
} }
) )

View File

@ -35,17 +35,19 @@ type (
} }
TClusterResource struct { TClusterResource struct {
ClusterId int64 `db:"cluster_id"` ClusterId int64 `db:"cluster_id"`
ClusterName string `db:"cluster_name"` ClusterName string `db:"cluster_name"`
ClusterType int64 `db:"cluster_type"` // 类型0->容器1->智算2->超算3-虚拟机 ClusterType int64 `db:"cluster_type"` // 类型0->容器1->智算2->超算3-虚拟机
CpuAvail float64 `db:"cpu_avail"` CpuAvail float64 `db:"cpu_avail"`
CpuTotal float64 `db:"cpu_total"` CpuTotal float64 `db:"cpu_total"`
MemAvail float64 `db:"mem_avail"` MemAvail float64 `db:"mem_avail"`
MemTotal float64 `db:"mem_total"` MemTotal float64 `db:"mem_total"`
DiskAvail float64 `db:"disk_avail"` DiskAvail float64 `db:"disk_avail"`
DiskTotal float64 `db:"disk_total"` DiskTotal float64 `db:"disk_total"`
GpuAvail float64 `db:"gpu_avail"` GpuAvail float64 `db:"gpu_avail"`
GpuTotal float64 `db:"gpu_total"` GpuTotal float64 `db:"gpu_total"`
CardTotal int64 `db:"card_total"` // 算力卡数量
CardTopsTotal float64 `db:"card_tops_total"` // 算力总量tops
} }
) )
@ -56,6 +58,13 @@ func newTClusterResourceModel(conn sqlx.SqlConn) *defaultTClusterResourceModel {
} }
} }
func (m *defaultTClusterResourceModel) withSession(session sqlx.Session) *defaultTClusterResourceModel {
return &defaultTClusterResourceModel{
conn: sqlx.NewSqlConnFromSession(session),
table: "`t_cluster_resource`",
}
}
func (m *defaultTClusterResourceModel) Delete(ctx context.Context, clusterId int64) error { func (m *defaultTClusterResourceModel) Delete(ctx context.Context, clusterId int64) error {
query := fmt.Sprintf("delete from %s where `cluster_id` = ?", m.table) query := fmt.Sprintf("delete from %s where `cluster_id` = ?", m.table)
_, err := m.conn.ExecCtx(ctx, query, clusterId) _, err := m.conn.ExecCtx(ctx, query, clusterId)
@ -77,14 +86,14 @@ func (m *defaultTClusterResourceModel) FindOne(ctx context.Context, clusterId in
} }
func (m *defaultTClusterResourceModel) Insert(ctx context.Context, data *TClusterResource) (sql.Result, error) { func (m *defaultTClusterResourceModel) Insert(ctx context.Context, data *TClusterResource) (sql.Result, error) {
query := fmt.Sprintf("insert into %s (%s) values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)", m.table, tClusterResourceRowsExpectAutoSet) query := fmt.Sprintf("insert into %s (%s) values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)", m.table, tClusterResourceRowsExpectAutoSet)
ret, err := m.conn.ExecCtx(ctx, query, data.ClusterId, data.ClusterName, data.ClusterType, data.CpuAvail, data.CpuTotal, data.MemAvail, data.MemTotal, data.DiskAvail, data.DiskTotal, data.GpuAvail, data.GpuTotal) ret, err := m.conn.ExecCtx(ctx, query, data.ClusterId, data.ClusterName, data.ClusterType, data.CpuAvail, data.CpuTotal, data.MemAvail, data.MemTotal, data.DiskAvail, data.DiskTotal, data.GpuAvail, data.GpuTotal, data.CardTotal, data.CardTopsTotal)
return ret, err return ret, err
} }
func (m *defaultTClusterResourceModel) Update(ctx context.Context, data *TClusterResource) error { func (m *defaultTClusterResourceModel) Update(ctx context.Context, data *TClusterResource) error {
query := fmt.Sprintf("update %s set %s where `cluster_id` = ?", m.table, tClusterResourceRowsWithPlaceHolder) query := fmt.Sprintf("update %s set %s where `cluster_id` = ?", m.table, tClusterResourceRowsWithPlaceHolder)
_, err := m.conn.ExecCtx(ctx, query, data.ClusterName, data.ClusterType, data.CpuAvail, data.CpuTotal, data.MemAvail, data.MemTotal, data.DiskAvail, data.DiskTotal, data.GpuAvail, data.GpuTotal, data.ClusterId) _, err := m.conn.ExecCtx(ctx, query, data.ClusterName, data.ClusterType, data.CpuAvail, data.CpuTotal, data.MemAvail, data.MemTotal, data.DiskAvail, data.DiskTotal, data.GpuAvail, data.GpuTotal, data.CardTotal, data.CardTopsTotal, data.ClusterId)
return err return err
} }

View File

@ -0,0 +1,24 @@
package models
import "github.com/zeromicro/go-zero/core/stores/sqlx"
var _ TClusterTaskQueueModel = (*customTClusterTaskQueueModel)(nil)
type (
// TClusterTaskQueueModel is an interface to be customized, add more methods here,
// and implement the added methods in customTClusterTaskQueueModel.
TClusterTaskQueueModel interface {
tClusterTaskQueueModel
}
customTClusterTaskQueueModel struct {
*defaultTClusterTaskQueueModel
}
)
// NewTClusterTaskQueueModel returns a model for the database table.
func NewTClusterTaskQueueModel(conn sqlx.SqlConn) TClusterTaskQueueModel {
return &customTClusterTaskQueueModel{
defaultTClusterTaskQueueModel: newTClusterTaskQueueModel(conn),
}
}

View File

@ -0,0 +1,95 @@
// Code generated by goctl. DO NOT EDIT.
package models
import (
"context"
"database/sql"
"fmt"
"strings"
"time"
"github.com/zeromicro/go-zero/core/stores/builder"
"github.com/zeromicro/go-zero/core/stores/sqlc"
"github.com/zeromicro/go-zero/core/stores/sqlx"
"github.com/zeromicro/go-zero/core/stringx"
)
var (
tClusterTaskQueueFieldNames = builder.RawFieldNames(&TClusterTaskQueue{})
tClusterTaskQueueRows = strings.Join(tClusterTaskQueueFieldNames, ",")
tClusterTaskQueueRowsExpectAutoSet = strings.Join(stringx.Remove(tClusterTaskQueueFieldNames, "`id`", "`create_at`", "`create_time`", "`created_at`", "`update_at`", "`update_time`", "`updated_at`"), ",")
tClusterTaskQueueRowsWithPlaceHolder = strings.Join(stringx.Remove(tClusterTaskQueueFieldNames, "`id`", "`create_at`", "`create_time`", "`created_at`", "`update_at`", "`update_time`", "`updated_at`"), "=?,") + "=?"
)
type (
tClusterTaskQueueModel interface {
Insert(ctx context.Context, data *TClusterTaskQueue) (sql.Result, error)
FindOne(ctx context.Context, id int64) (*TClusterTaskQueue, error)
Update(ctx context.Context, data *TClusterTaskQueue) error
Delete(ctx context.Context, id int64) error
}
defaultTClusterTaskQueueModel struct {
conn sqlx.SqlConn
table string
}
TClusterTaskQueue struct {
Id int64 `db:"id"` // id
AdapterId int64 `db:"adapter_id"` // 适配器id
ClusterId int64 `db:"cluster_id"` // 集群id
QueueNum int64 `db:"queue_num"` // 任务排队数量
Date time.Time `db:"date"`
}
)
func newTClusterTaskQueueModel(conn sqlx.SqlConn) *defaultTClusterTaskQueueModel {
return &defaultTClusterTaskQueueModel{
conn: conn,
table: "`t_cluster_task_queue`",
}
}
func (m *defaultTClusterTaskQueueModel) withSession(session sqlx.Session) *defaultTClusterTaskQueueModel {
return &defaultTClusterTaskQueueModel{
conn: sqlx.NewSqlConnFromSession(session),
table: "`t_cluster_task_queue`",
}
}
func (m *defaultTClusterTaskQueueModel) Delete(ctx context.Context, id int64) error {
query := fmt.Sprintf("delete from %s where `id` = ?", m.table)
_, err := m.conn.ExecCtx(ctx, query, id)
return err
}
func (m *defaultTClusterTaskQueueModel) FindOne(ctx context.Context, id int64) (*TClusterTaskQueue, error) {
query := fmt.Sprintf("select %s from %s where `id` = ? limit 1", tClusterTaskQueueRows, m.table)
var resp TClusterTaskQueue
err := m.conn.QueryRowCtx(ctx, &resp, query, id)
switch err {
case nil:
return &resp, nil
case sqlc.ErrNotFound:
return nil, ErrNotFound
default:
return nil, err
}
}
func (m *defaultTClusterTaskQueueModel) Insert(ctx context.Context, data *TClusterTaskQueue) (sql.Result, error) {
query := fmt.Sprintf("insert into %s (%s) values (?, ?, ?, ?)", m.table, tClusterTaskQueueRowsExpectAutoSet)
ret, err := m.conn.ExecCtx(ctx, query, data.AdapterId, data.ClusterId, data.QueueNum, data.Date)
return ret, err
}
func (m *defaultTClusterTaskQueueModel) Update(ctx context.Context, data *TClusterTaskQueue) error {
query := fmt.Sprintf("update %s set %s where `id` = ?", m.table, tClusterTaskQueueRowsWithPlaceHolder)
_, err := m.conn.ExecCtx(ctx, query, data.AdapterId, data.ClusterId, data.QueueNum, data.Date, data.Id)
return err
}
func (m *defaultTClusterTaskQueueModel) tableName() string {
return m.table
}

View File

@ -41,3 +41,8 @@ func GenSnowflakeID() int64 {
func GenSnowflakeIDStr() string { func GenSnowflakeIDStr() string {
return node.Generate().String() return node.Generate().String()
} }
// GenSnowflakeIDStr 工作id
func GenSnowflakeIDUint() uint {
return uint(node.Generate().Int64())
}

View File

@ -19,7 +19,9 @@ import (
) )
var timeTemplates = []string{ var timeTemplates = []string{
"2006-01-02 15:04:05", //常规类型 "2006-01-02T15:04:05Z07:00", //RFC3339
"2006-01-02 15:04:05", //常规类型
"2006/01/02T15:04:05Z07:00", //RFC3339
"2006/01/02 15:04:05", "2006/01/02 15:04:05",
"2006-01-02", "2006-01-02",
"2006/01/02", "2006/01/02",