Merge remote-tracking branch 'origin/master' into master-wq

Former-commit-id: a81094f3b6baefef77f8a9cc45f4254e563c7abe
This commit is contained in:
qiwang 2024-06-14 17:26:48 +08:00
commit d09b872f42
70 changed files with 748 additions and 2991 deletions

View File

@ -1234,12 +1234,24 @@ type TaskStatusResp {
Saved int `json:"Saved"`
}
type TaskDetailsResp {
Name string `json:"name"`
description string `json:"description"`
StartTime string `json:"startTime"`
EndTime string `json:"endTime"`
Strategy int64 `json:"strategy"`
SynergyStatus int64 `json:"synergyStatus"`
ClusterInfos []*ClusterInfo `json:"clusterInfos"`
}
type (
TaskDetailsResp {
Name string `json:"name"`
description string `json:"description"`
StartTime string `json:"startTime"`
EndTime string `json:"endTime"`
Strategy int64 `json:"strategy"`
SynergyStatus int64 `json:"synergyStatus"`
ClusterInfos []*ClusterInfo `json:"clusterInfos"`
SubTaskInfos []*SubTaskInfo `json:"subTaskInfos"`
}
SubTaskInfo{
Id string `json:"id" db:"id"`
Name string `json:"name" db:"name"`
ClusterId string `json:"clusterId" db:"cluster_id"`
ClusterName string `json:"clusterName" db:"cluster_name"`
Status string `json:"status" db:"status"`
Remark string `json:"remark" db:"remark"`
}
)

View File

@ -50,10 +50,6 @@ service pcm {
@handler TaskListHandler
get /core/taskList (taskListReq) returns (taskListResp)
@doc "查询任务详情"
@handler TaskDetailHandler
get /core/taskDetail/:taskId (taskDetailReq) returns (taskDetailResp)
@doc "任务概览"
@handler JobTotalHandler
get /core/jobTotal returns (jobTotalResp)
@ -216,14 +212,6 @@ service pcm {
@handler deleteClusterHandler
post /cloud/deleteCluster (deleteClusterReq) returns (CloudResp)
@doc "触发租户更新"
@handler noticeTenantHandler
get /cloud/noticeTenant returns (CloudResp)
@doc "租户更新"
@handler updateTenantHandler
post /cloud/updateTenant (UpdateTenantReq) returns (CloudResp)
@doc "Obtain cluster list information according to adapterId"
@handler getClusterListHandler
get /core/clusterList (getClusterListReq) returns (getClusterListResp)
@ -398,33 +386,6 @@ service pcm {
get /storage/perCenterComputerPowers (PerCenterComputerPowersReq) returns (PerCenterComputerPowersResp)
}
//镜像接口
@server(
prefix: pcm/v1
group: image
)
service pcm {
@doc "镜像上传"
@handler uploadHandler
post /upload
@doc "镜像分块"
@handler chunkHandler
post /chunk
@doc "查询镜像列表"
@handler imageListHandler
get /image/list returns (imageListResp)
@doc "数据集检查"
@handler dataSetCheckHandler
get /dataSet/check/:fileMd5 (checkReq) returns (checkResp)
@doc "上传数据集"
@handler uploadDataSetHandler
post /dataSet/upload
}
//openstack 接口
@server(
prefix: pcm/v1
@ -855,49 +816,6 @@ service pcm {
get /storelink/getResourceSpecs (GetResourceSpecsReq) returns (GetResourceSpecsResp)
}
// 接口
@server(
prefix: pcm/v1
group: apps
)
service pcm {
@doc "应用列表"
@handler AppListHandler
get /apps/list (AppListReq) returns (AppListResp)
@doc "获取应用分发详情"
@handler AppDetailHandler
get /apps/distribute/:appName (AppDetailReq) returns (AppDetailResp)
@doc "应用pods列表"
@handler AppPodsHandler
get /apps/pods/:appName (AppDetailReq) returns (AppDetailResp)
@doc "获取应用详情"
@handler GetAppByAppName
get /apps/getAppByAppName/:appName (AppDetailReq) returns (AppTaskResp)
@doc "删除应用"
@handler DeleteAppByAppName
delete /apps/deleteApp (DeleteAppReq) returns (DeleteAppResp)
@doc "更新应用"
@handler UpdateAppByAppName
put /apps/updateApp (DeleteAppReq) returns (AppTaskResp)
@doc "重启应用"
@handler RestartAppByAppName
put /apps/restartApp (DeleteAppReq) returns (AppResp)
@doc "暂停应用"
@handler PauseAppByAppName
put /apps/pauseApp (DeleteAppReq) returns (AppResp)
@doc "启动应用"
@handler StartAppByAppName
put /apps/startApp (DeleteAppReq) returns (AppResp)
}
// 接口
@server(
prefix: pcm/v1

View File

@ -15,14 +15,6 @@ Monitoring:
PromUrl: http://47.92.39.128:30877
AlertUrl: 47.92.39.128:32243
# k8s rpc
K8sNativeConf:
# target: nacos://10.206.0.12:8848/pcm.kubenative.rpc?timeout=30s&namespaceid=test&groupname=DEFAULT_GROUP&appName=pcm.core.api
Endpoints:
- 127.0.0.1:2003
NonBlock: true
#rpc
THRpcConf:
target: nacos://10.206.0.12:8848/pcm.th.rpc?timeout=30s&namespaceid=test&groupname=DEFAULT_GROUP&appName=pcm.core.api
@ -51,7 +43,7 @@ ACRpcConf:
# Endpoints:
# - 127.0.0.1:8888
NonBlock: true
Timeout: 20000
Timeout: 50000
#rpc
CephRpcConf:
@ -83,19 +75,10 @@ PcmCoreRpcConf:
NonBlock: true
Timeout: 20000
NexusUrl: http://10.101.15.175:8081
JccScheduleUrl: http://jcce-schedule-service:8082
MinioConf:
Secret: minio_xnu122@_
AccessKey: minioadmin
Endpoint: http://121.89.220.60:9000
RegistryConf:
Username: jointcloudNudt
Password: Nudt@123
SnowflakeConf:
MachineId: 1

View File

@ -37,17 +37,13 @@ type Config struct {
OpenstackRpcConf zrpc.RpcClientConf
OctopusRpcConf zrpc.RpcClientConf
PcmCoreRpcConf zrpc.RpcClientConf
JccScheduleUrl string
MinioConf struct {
MinioConf struct {
Secret string
AccessKey string
Endpoint string
}
RegistryConf struct {
Username string
Password string
}
SnowflakeConf SnowflakeConf
Monitoring Monitoring
}

View File

@ -0,0 +1,479 @@
package cron
import (
"errors"
"fmt"
"github.com/zeromicro/go-zero/core/logx"
"github.com/zeromicro/go-zero/zrpc"
"gitlink.org.cn/JointCloud/pcm-ac/hpcacclient"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/config"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/service/collector"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/service/executor"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/storeLink"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types"
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/constants"
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/models"
"gitlink.org.cn/JointCloud/pcm-modelarts/client/imagesservice"
"gitlink.org.cn/JointCloud/pcm-modelarts/client/modelartsservice"
"gitlink.org.cn/JointCloud/pcm-octopus/octopusclient"
"google.golang.org/grpc/codes"
"google.golang.org/grpc/status"
"net/http"
"strconv"
"sync"
"time"
)
const (
OCTOPUS = "octopus"
MODELARTS = "modelarts"
SHUGUANGAI = "shuguangAi"
)
func GetTaskList(svc *svc.ServiceContext) ([]*types.TaskModel, error) {
limit := 10
offset := 0
var list []*types.TaskModel
db := svc.DbEngin.Model(&types.TaskModel{}).Table("task")
db = db.Where("deleted_at is null")
//count total
var total int64
err := db.Count(&total).Error
db.Limit(limit).Offset(offset)
if err != nil {
return nil, err
}
err = db.Order("created_time desc").Find(&list).Error
if err != nil {
return nil, err
}
return list, nil
}
func UpdateAiTaskStatus(svc *svc.ServiceContext, tasklist []*types.TaskModel) {
list := make([]*types.TaskModel, len(tasklist))
copy(list, tasklist)
for i := len(list) - 1; i >= 0; i-- {
if list[i].AdapterTypeDict != 1 || list[i].Status == constants.Succeeded || list[i].Status == constants.Failed {
list = append(list[:i], list[i+1:]...)
}
}
if len(list) == 0 {
return
}
task := list[0]
for i := range list {
earliest, _ := time.Parse(constants.Layout, task.UpdatedTime)
latest, _ := time.Parse(constants.Layout, list[i].UpdatedTime)
if latest.Before(earliest) {
task = list[i]
}
}
var aiTaskList []*models.TaskAi
tx := svc.DbEngin.Raw("select * from task_ai where `task_id` = ? ", task.Id).Scan(&aiTaskList)
if tx.Error != nil {
logx.Errorf(tx.Error.Error())
return
}
if len(aiTaskList) == 0 {
return
}
var wg sync.WaitGroup
for _, aitask := range aiTaskList {
t := aitask
if t.Status == constants.Completed || t.Status == constants.Failed {
continue
}
wg.Add(1)
go func() {
h := http.Request{}
trainingTask, err := svc.Scheduler.AiService.AiCollectorAdapterMap[strconv.FormatInt(t.AdapterId, 10)][strconv.FormatInt(t.ClusterId, 10)].GetTrainingTask(h.Context(), t.JobId)
if err != nil {
if status.Code(err) == codes.DeadlineExceeded {
msg := fmt.Sprintf("###UpdateAiTaskStatus###, AiTaskId: %v, clusterId: %v , JobId: %v, error: %v \n", t.Id, t.ClusterId, t.JobId, err.Error())
logx.Errorf(errors.New(msg).Error())
wg.Done()
return
}
msg := fmt.Sprintf("###UpdateAiTaskStatus###, AiTaskId: %v, clusterId: %v , JobId: %v, error: %v \n", t.Id, t.ClusterId, t.JobId, err.Error())
logx.Errorf(errors.New(msg).Error())
wg.Done()
return
}
if trainingTask == nil {
wg.Done()
return
}
switch trainingTask.Status {
case constants.Running:
if t.Status != trainingTask.Status {
svc.Scheduler.AiStorages.AddNoticeInfo(strconv.FormatInt(t.AdapterId, 10), t.AdapterName, strconv.FormatInt(t.ClusterId, 10), t.ClusterName, t.Name, "running", "任务运行中")
t.Status = trainingTask.Status
}
case constants.Failed:
if t.Status != trainingTask.Status {
svc.Scheduler.AiStorages.AddNoticeInfo(strconv.FormatInt(t.AdapterId, 10), t.AdapterName, strconv.FormatInt(t.ClusterId, 10), t.ClusterName, t.Name, "failed", "任务失败")
t.Status = trainingTask.Status
}
case constants.Completed:
if t.Status != trainingTask.Status {
svc.Scheduler.AiStorages.AddNoticeInfo(strconv.FormatInt(t.AdapterId, 10), t.AdapterName, strconv.FormatInt(t.ClusterId, 10), t.ClusterName, t.Name, "completed", "任务完成")
t.Status = trainingTask.Status
}
default:
if t.Status != trainingTask.Status {
svc.Scheduler.AiStorages.AddNoticeInfo(strconv.FormatInt(t.AdapterId, 10), t.AdapterName, strconv.FormatInt(t.ClusterId, 10), t.ClusterName, t.Name, "pending", "任务pending")
t.Status = trainingTask.Status
}
}
t.StartTime = trainingTask.Start
t.EndTime = trainingTask.End
err = svc.Scheduler.AiStorages.UpdateAiTask(t)
if err != nil {
msg := fmt.Sprintf("###UpdateAiTaskStatus###, AiTaskId: %v, clusterId: %v , JobId: %v, error: %v \n", t.Id, t.ClusterId, t.JobId, err.Error())
logx.Errorf(errors.New(msg).Error())
wg.Done()
return
}
wg.Done()
}()
}
wg.Wait()
}
func UpdateTaskStatus(svc *svc.ServiceContext, tasklist []*types.TaskModel) {
list := make([]*types.TaskModel, len(tasklist))
copy(list, tasklist)
for i := len(list) - 1; i >= 0; i-- {
if list[i].AdapterTypeDict != 1 || list[i].Status == constants.Succeeded || list[i].Status == constants.Failed {
list = append(list[:i], list[i+1:]...)
}
}
if len(list) == 0 {
return
}
task := list[0]
for i := range list {
earliest, _ := time.Parse(time.RFC3339, task.UpdatedTime)
latest, _ := time.Parse(time.RFC3339, list[i].UpdatedTime)
if latest.Before(earliest) {
task = list[i]
}
}
var aiTask []*models.TaskAi
tx := svc.DbEngin.Raw("select * from task_ai where `task_id` = ? ", task.Id).Scan(&aiTask)
if tx.Error != nil {
logx.Errorf(tx.Error.Error())
return
}
if len(aiTask) == 0 {
tx = svc.DbEngin.Model(task).Table("task").Where("deleted_at is null").Updates(task)
if tx.Error != nil {
logx.Errorf(tx.Error.Error())
return
}
return
}
if len(aiTask) == 1 {
if aiTask[0].Status == constants.Completed {
task.Status = constants.Succeeded
} else {
task.Status = aiTask[0].Status
}
task.StartTime = aiTask[0].StartTime
task.EndTime = aiTask[0].EndTime
task.UpdatedTime = time.Now().Format(constants.Layout)
tx = svc.DbEngin.Model(task).Table("task").Where("deleted_at is null").Updates(task)
if tx.Error != nil {
logx.Errorf(tx.Error.Error())
return
}
return
}
for i := len(aiTask) - 1; i >= 0; i-- {
if aiTask[i].StartTime == "" {
task.Status = aiTask[i].Status
aiTask = append(aiTask[:i], aiTask[i+1:]...)
}
}
if len(aiTask) == 0 {
task.UpdatedTime = time.Now().Format(constants.Layout)
tx = svc.DbEngin.Table("task").Model(task).Updates(task)
if tx.Error != nil {
logx.Errorf(tx.Error.Error())
return
}
return
}
start, _ := time.ParseInLocation(constants.Layout, aiTask[0].StartTime, time.Local)
end, _ := time.ParseInLocation(constants.Layout, aiTask[0].EndTime, time.Local)
var status string
var count int
for _, a := range aiTask {
s, _ := time.ParseInLocation(constants.Layout, a.StartTime, time.Local)
e, _ := time.ParseInLocation(constants.Layout, a.EndTime, time.Local)
if s.Before(start) {
start = s
}
if e.After(end) {
end = e
}
if a.Status == constants.Failed {
status = a.Status
break
}
if a.Status == constants.Pending {
status = a.Status
continue
}
if a.Status == constants.Running {
status = a.Status
continue
}
if a.Status == constants.Completed {
count++
continue
}
}
if count == len(aiTask) {
status = constants.Succeeded
}
if status != "" {
task.Status = status
task.StartTime = start.Format(constants.Layout)
task.EndTime = end.Format(constants.Layout)
}
task.UpdatedTime = time.Now().Format(constants.Layout)
tx = svc.DbEngin.Table("task").Model(task).Updates(task)
if tx.Error != nil {
logx.Errorf(tx.Error.Error())
return
}
}
func UpdateAiAdapterMaps(svc *svc.ServiceContext) {
var aiType = "1"
adapterIds, err := svc.Scheduler.AiStorages.GetAdapterIdsByType(aiType)
if err != nil {
msg := fmt.Sprintf("###UpdateAiAdapterMaps###, error: %v \n", err.Error())
logx.Errorf(errors.New(msg).Error())
return
}
if len(adapterIds) == 0 {
return
}
for _, id := range adapterIds {
clusters, err := svc.Scheduler.AiStorages.GetClustersByAdapterId(id)
if err != nil {
msg := fmt.Sprintf("###UpdateAiAdapterMaps###, error: %v \n", err.Error())
logx.Errorf(errors.New(msg).Error())
return
}
if len(clusters.List) == 0 {
continue
}
if isAdapterExist(svc, id, len(clusters.List)) {
continue
} else {
if isAdapterEmpty(svc, id) {
exeClusterMap, colClusterMap := InitAiClusterMap(&svc.Config, clusters.List)
svc.Scheduler.AiService.AiExecutorAdapterMap[id] = exeClusterMap
svc.Scheduler.AiService.AiCollectorAdapterMap[id] = colClusterMap
} else {
UpdateClusterMaps(svc, id, clusters.List)
}
}
}
}
func UpdateClusterMaps(svc *svc.ServiceContext, adapterId string, clusters []types.ClusterInfo) {
for _, c := range clusters {
_, ok := svc.Scheduler.AiService.AiExecutorAdapterMap[adapterId][c.Id]
_, ok2 := svc.Scheduler.AiService.AiCollectorAdapterMap[adapterId][c.Id]
if !ok && !ok2 {
switch c.Name {
case OCTOPUS:
id, _ := strconv.ParseInt(c.Id, 10, 64)
octopusRpc := octopusclient.NewOctopus(zrpc.MustNewClient(svc.Config.OctopusRpcConf))
octopus := storeLink.NewOctopusLink(octopusRpc, c.Nickname, id)
svc.Scheduler.AiService.AiExecutorAdapterMap[adapterId][c.Id] = octopus
svc.Scheduler.AiService.AiExecutorAdapterMap[adapterId][c.Id] = octopus
case MODELARTS:
id, _ := strconv.ParseInt(c.Id, 10, 64)
modelArtsRpc := modelartsservice.NewModelArtsService(zrpc.MustNewClient(svc.Config.ModelArtsRpcConf))
modelArtsImgRpc := imagesservice.NewImagesService(zrpc.MustNewClient(svc.Config.ModelArtsImgRpcConf))
modelarts := storeLink.NewModelArtsLink(modelArtsRpc, modelArtsImgRpc, c.Name, id, c.Nickname)
svc.Scheduler.AiService.AiExecutorAdapterMap[adapterId][c.Id] = modelarts
svc.Scheduler.AiService.AiExecutorAdapterMap[adapterId][c.Id] = modelarts
case SHUGUANGAI:
id, _ := strconv.ParseInt(c.Id, 10, 64)
aCRpc := hpcacclient.NewHpcAC(zrpc.MustNewClient(svc.Config.ACRpcConf))
sgai := storeLink.NewShuguangAi(aCRpc, c.Nickname, id)
svc.Scheduler.AiService.AiExecutorAdapterMap[adapterId][c.Id] = sgai
svc.Scheduler.AiService.AiExecutorAdapterMap[adapterId][c.Id] = sgai
}
} else {
continue
}
}
}
func isAdapterExist(svc *svc.ServiceContext, id string, clusterNum int) bool {
emap, ok := svc.Scheduler.AiService.AiExecutorAdapterMap[id]
cmap, ok2 := svc.Scheduler.AiService.AiCollectorAdapterMap[id]
if ok && ok2 {
if len(emap) == clusterNum && len(cmap) == clusterNum {
return true
}
}
return false
}
func isAdapterEmpty(svc *svc.ServiceContext, id string) bool {
_, ok := svc.Scheduler.AiService.AiExecutorAdapterMap[id]
_, ok2 := svc.Scheduler.AiService.AiCollectorAdapterMap[id]
if !ok && !ok2 {
return true
}
return false
}
func InitAiClusterMap(conf *config.Config, clusters []types.ClusterInfo) (map[string]executor.AiExecutor, map[string]collector.AiCollector) {
executorMap := make(map[string]executor.AiExecutor)
collectorMap := make(map[string]collector.AiCollector)
for _, c := range clusters {
switch c.Name {
case OCTOPUS:
id, _ := strconv.ParseInt(c.Id, 10, 64)
octopusRpc := octopusclient.NewOctopus(zrpc.MustNewClient(conf.OctopusRpcConf))
octopus := storeLink.NewOctopusLink(octopusRpc, c.Nickname, id)
collectorMap[c.Id] = octopus
executorMap[c.Id] = octopus
case MODELARTS:
id, _ := strconv.ParseInt(c.Id, 10, 64)
modelArtsRpc := modelartsservice.NewModelArtsService(zrpc.MustNewClient(conf.ModelArtsRpcConf))
modelArtsImgRpc := imagesservice.NewImagesService(zrpc.MustNewClient(conf.ModelArtsImgRpcConf))
modelarts := storeLink.NewModelArtsLink(modelArtsRpc, modelArtsImgRpc, c.Name, id, c.Nickname)
collectorMap[c.Id] = modelarts
executorMap[c.Id] = modelarts
case SHUGUANGAI:
id, _ := strconv.ParseInt(c.Id, 10, 64)
aCRpc := hpcacclient.NewHpcAC(zrpc.MustNewClient(conf.ACRpcConf))
sgai := storeLink.NewShuguangAi(aCRpc, c.Nickname, id)
collectorMap[c.Id] = sgai
executorMap[c.Id] = sgai
}
}
return executorMap, collectorMap
}
func UpdateClusterResource(svc *svc.ServiceContext) {
list, err := svc.Scheduler.AiStorages.GetAdaptersByType("1")
if err != nil {
return
}
var wg sync.WaitGroup
for _, adapter := range list {
clusters, err := svc.Scheduler.AiStorages.GetClustersByAdapterId(adapter.Id)
if err != nil {
continue
}
for _, cluster := range clusters.List {
c := cluster
clusterResource, err := svc.Scheduler.AiStorages.GetClusterResourcesById(c.Id)
if err != nil {
continue
}
wg.Add(1)
go func() {
_, ok := svc.Scheduler.AiService.AiCollectorAdapterMap[adapter.Id][c.Id]
if !ok {
wg.Done()
return
}
h := http.Request{}
stat, err := svc.Scheduler.AiService.AiCollectorAdapterMap[adapter.Id][c.Id].GetResourceStats(h.Context())
if err != nil {
wg.Done()
return
}
if stat == nil {
wg.Done()
return
}
clusterType, err := strconv.ParseInt(adapter.Type, 10, 64)
if err != nil {
wg.Done()
return
}
var cardTotal int64
var topsTotal float64
for _, card := range stat.CardsAvail {
cardTotal += int64(card.CardNum)
topsTotal += card.TOpsAtFp16 * float64(card.CardNum)
}
if (models.TClusterResource{} == *clusterResource) {
err = svc.Scheduler.AiStorages.SaveClusterResources(adapter.Id, c.Id, c.Name, clusterType, float64(stat.CpuCoreAvail), float64(stat.CpuCoreTotal),
stat.MemAvail, stat.MemTotal, stat.DiskAvail, stat.DiskTotal, float64(stat.GpuAvail), float64(stat.GpuTotal), cardTotal, topsTotal)
if err != nil {
wg.Done()
return
}
} else {
if stat.CpuCoreTotal == 0 || stat.MemTotal == 0 || stat.DiskTotal == 0 {
wg.Done()
return
}
clusterResource.CardTotal = cardTotal
clusterResource.CardTopsTotal = topsTotal
clusterResource.CpuAvail = float64(stat.CpuCoreAvail)
clusterResource.CpuTotal = float64(stat.CpuCoreTotal)
clusterResource.MemAvail = stat.MemAvail
clusterResource.MemTotal = stat.MemTotal
clusterResource.DiskAvail = stat.DiskAvail
clusterResource.DiskTotal = stat.DiskTotal
err := svc.Scheduler.AiStorages.UpdateClusterResources(clusterResource)
if err != nil {
wg.Done()
return
}
}
wg.Done()
}()
}
}
wg.Wait()
}

View File

@ -15,18 +15,27 @@
package cron
import (
"github.com/zeromicro/go-zero/core/logx"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
)
func AddCronGroup(svc *svc.ServiceContext) {
// 删除三天前的监控信息
svc.Cron.AddFunc("0 0 0 ? * ? ", func() {
ClearMetricsData(svc)
})
// 同步任务信息到core端
svc.Cron.AddFunc("*/5 * * * * ?", func() {
SyncParticipantRpc(svc)
list, err := GetTaskList(svc)
if err != nil {
logx.Errorf(err.Error())
return
}
UpdateTaskStatus(svc, list)
UpdateAiTaskStatus(svc, list)
})
svc.Cron.AddFunc("*/5 * * * * ?", func() {
UpdateAiAdapterMaps(svc)
})
svc.Cron.AddFunc("*/59 * * * * ?", func() {
UpdateClusterResource(svc)
})
}

View File

@ -1,28 +0,0 @@
/*
Copyright (c) [2023] [pcm]
[pcm-coordinator] is licensed under Mulan PSL v2.
You can use this software according to the terms and conditions of the Mulan PSL v2.
You may obtain a copy of Mulan PSL v2 at:
http://license.coscl.org.cn/MulanPSL2
THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
EITHER EXPaRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
See the Mulan PSL v2 for more details.
*/
package cron
import (
"github.com/rs/zerolog/log"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/models"
)
func ClearMetricsData(svc *svc.ServiceContext) {
tx := svc.DbEngin.Where("DATE(created_time) <= DATE(DATE_SUB(NOW(),INTERVAL 3 DAY))").Delete(&models.ScNodeAvailInfo{})
if tx.Error != nil {
log.Err(tx.Error)
}
}

View File

@ -1,41 +0,0 @@
/*
Copyright (c) [2023] [pcm]
[pcm-coordinator] is licensed under Mulan PSL v2.
You can use this software according to the terms and conditions of the Mulan PSL v2.
You may obtain a copy of Mulan PSL v2 at:
http://license.coscl.org.cn/MulanPSL2
THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
EITHER EXPaRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
See the Mulan PSL v2 for more details.
*/
package cron
import (
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/constants"
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/models"
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/tracker"
)
func SyncParticipantRpc(svc *svc.ServiceContext) {
// 查询出所有p端信息
var participants []*models.ScParticipantPhyInfo
svc.DbEngin.Where("type in (?)", []string{constants.CLOUD, constants.SEALOS}).Find(&participants)
if len(participants) != 0 {
for _, participant := range participants {
if len(participant.MetricsUrl) != 0 {
// 初始化p端prometheus client
promClient, err := tracker.NewPrometheus(participant.MetricsUrl)
if err != nil {
return
}
svc.MonitorClient[participant.Id] = promClient
}
}
}
}

View File

@ -1,24 +0,0 @@
package apps
import (
"github.com/zeromicro/go-zero/rest/httpx"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/logic/apps"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types"
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/repository/result"
"net/http"
)
func AppDetailHandler(svcCtx *svc.ServiceContext) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
var req types.AppDetailReq
if err := httpx.Parse(r, &req); err != nil {
result.ParamErrorResult(r, w, err)
return
}
l := apps.NewAppDetailLogic(r.Context(), svcCtx)
resp, err := l.AppDetail(&req)
result.HttpResult(r, w, resp, err)
}
}

View File

@ -1,24 +0,0 @@
package apps
import (
"github.com/zeromicro/go-zero/rest/httpx"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/logic/apps"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types"
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/repository/result"
"net/http"
)
func AppListHandler(svcCtx *svc.ServiceContext) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
var req types.AppListReq
if err := httpx.Parse(r, &req); err != nil {
result.ParamErrorResult(r, w, err)
return
}
l := apps.NewAppListLogic(r.Context(), svcCtx)
resp, err := l.AppList(&req)
result.HttpResult(r, w, resp, err)
}
}

View File

@ -1,24 +0,0 @@
package apps
import (
"github.com/zeromicro/go-zero/rest/httpx"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/logic/apps"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types"
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/repository/result"
"net/http"
)
func AppPodsHandler(svcCtx *svc.ServiceContext) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
var req types.AppDetailReq
if err := httpx.Parse(r, &req); err != nil {
result.ParamErrorResult(r, w, err)
return
}
l := apps.NewAppPodsLogic(r.Context(), svcCtx)
resp, err := l.AppPods(&req)
result.HttpResult(r, w, resp, err)
}
}

View File

@ -1,24 +0,0 @@
package apps
import (
"github.com/zeromicro/go-zero/rest/httpx"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/logic/apps"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types"
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/repository/result"
"net/http"
)
func DeleteAppByAppNameHandler(svcCtx *svc.ServiceContext) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
var req types.DeleteAppReq
if err := httpx.Parse(r, &req); err != nil {
result.ParamErrorResult(r, w, err)
return
}
l := apps.NewDeleteAppByAppNameLogic(r.Context(), svcCtx)
resp, err := l.DeleteAppByAppName(&req)
result.HttpResult(r, w, resp, err)
}
}

View File

@ -1,24 +0,0 @@
package apps
import (
"github.com/zeromicro/go-zero/rest/httpx"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/logic/apps"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types"
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/repository/result"
"net/http"
)
func GetAppByAppNameHandler(svcCtx *svc.ServiceContext) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
var req types.AppDetailReq
if err := httpx.Parse(r, &req); err != nil {
result.ParamErrorResult(r, w, err)
return
}
l := apps.NewGetAppByAppNameLogic(r.Context(), svcCtx)
resp, err := l.GetAppByAppName(&req)
result.HttpResult(r, w, resp, err)
}
}

View File

@ -1,24 +0,0 @@
package apps
import (
"github.com/zeromicro/go-zero/rest/httpx"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/logic/apps"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types"
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/repository/result"
"net/http"
)
func PauseAppByAppNameHandler(svcCtx *svc.ServiceContext) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
var req types.DeleteAppReq
if err := httpx.Parse(r, &req); err != nil {
result.ParamErrorResult(r, w, err)
return
}
l := apps.NewPauseAppByAppNameLogic(r.Context(), svcCtx)
resp, err := l.PauseAppByAppName(&req)
result.HttpResult(r, w, resp, err)
}
}

View File

@ -1,24 +0,0 @@
package apps
import (
"github.com/zeromicro/go-zero/rest/httpx"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/logic/apps"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types"
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/repository/result"
"net/http"
)
func RestartAppByAppNameHandler(svcCtx *svc.ServiceContext) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
var req types.DeleteAppReq
if err := httpx.Parse(r, &req); err != nil {
result.ParamErrorResult(r, w, err)
return
}
l := apps.NewRestartAppByAppNameLogic(r.Context(), svcCtx)
resp, err := l.RestartAppByAppName(&req)
result.HttpResult(r, w, resp, err)
}
}

View File

@ -1,24 +0,0 @@
package apps
import (
"github.com/zeromicro/go-zero/rest/httpx"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/logic/apps"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types"
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/repository/result"
"net/http"
)
func StartAppByAppNameHandler(svcCtx *svc.ServiceContext) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
var req types.DeleteAppReq
if err := httpx.Parse(r, &req); err != nil {
result.ParamErrorResult(r, w, err)
return
}
l := apps.NewStartAppByAppNameLogic(r.Context(), svcCtx)
resp, err := l.StartAppByAppName(&req)
result.HttpResult(r, w, resp, err)
}
}

View File

@ -1,24 +0,0 @@
package apps
import (
"github.com/zeromicro/go-zero/rest/httpx"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/logic/apps"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types"
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/repository/result"
"net/http"
)
func UpdateAppByAppNameHandler(svcCtx *svc.ServiceContext) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
var req types.DeleteAppReq
if err := httpx.Parse(r, &req); err != nil {
result.ParamErrorResult(r, w, err)
return
}
l := apps.NewUpdateAppByAppNameLogic(r.Context(), svcCtx)
resp, err := l.UpdateAppByAppName(&req)
result.HttpResult(r, w, resp, err)
}
}

View File

@ -1,16 +0,0 @@
package cloud
import (
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/logic/cloud"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/repository/result"
"net/http"
)
func NoticeTenantHandler(svcCtx *svc.ServiceContext) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
l := cloud.NewNoticeTenantLogic(r.Context(), svcCtx)
resp, err := l.NoticeTenant()
result.HttpResult(r, w, resp, err)
}
}

View File

@ -1,24 +0,0 @@
package cloud
import (
"github.com/zeromicro/go-zero/rest/httpx"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/logic/cloud"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types"
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/repository/result"
"net/http"
)
func UpdateTenantHandler(svcCtx *svc.ServiceContext) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
var req types.UpdateTenantReq
if err := httpx.Parse(r, &req); err != nil {
result.ParamErrorResult(r, w, err)
return
}
l := cloud.NewUpdateTenantLogic(r.Context(), svcCtx)
resp, err := l.UpdateTenant(&req)
result.HttpResult(r, w, resp, err)
}
}

View File

@ -1,24 +0,0 @@
package core
import (
"github.com/zeromicro/go-zero/rest/httpx"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/logic/core"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types"
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/repository/result"
"net/http"
)
func TaskDetailHandler(svcCtx *svc.ServiceContext) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
var req types.TaskDetailReq
if err := httpx.Parse(r, &req); err != nil {
result.ParamErrorResult(r, w, err)
return
}
l := core.NewTaskDetailLogic(r.Context(), svcCtx)
resp, err := l.TaskDetail(&req)
result.HttpResult(r, w, resp, err)
}
}

View File

@ -1,16 +0,0 @@
package image
import (
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/logic/image"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/repository/result"
"net/http"
)
func ChunkHandler(svcCtx *svc.ServiceContext) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
l := image.NewChunkLogic(r.Context(), svcCtx)
err := l.Chunk()
result.HttpResult(r, w, nil, err)
}
}

View File

@ -1,24 +0,0 @@
package image
import (
"github.com/zeromicro/go-zero/rest/httpx"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/logic/image"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types"
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/repository/result"
"net/http"
)
func DataSetCheckHandler(svcCtx *svc.ServiceContext) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
var req types.CheckReq
if err := httpx.Parse(r, &req); err != nil {
result.ParamErrorResult(r, w, err)
return
}
l := image.NewDataSetCheckLogic(r.Context(), svcCtx)
resp, err := l.DataSetCheck(&req)
result.HttpResult(r, w, resp, err)
}
}

View File

@ -1,16 +0,0 @@
package image
import (
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/logic/image"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/repository/result"
"net/http"
)
func ImageListHandler(svcCtx *svc.ServiceContext) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
l := image.NewImageListLogic(r.Context(), svcCtx)
resp, err := l.ImageList()
result.HttpResult(r, w, resp, err)
}
}

View File

@ -1,16 +0,0 @@
package image
import (
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/logic/image"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/repository/result"
"net/http"
)
func UploadDataSetHandler(svcCtx *svc.ServiceContext) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
l := image.NewUploadDataSetLogic(r.Context(), svcCtx)
err := l.UploadDataSet()
result.HttpResult(r, w, nil, err)
}
}

View File

@ -1,16 +0,0 @@
package image
import (
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/logic/image"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/repository/result"
"net/http"
)
func UploadHandler(svcCtx *svc.ServiceContext) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
l := image.NewUploadLogic(r.Context(), svcCtx)
err := l.Upload()
result.HttpResult(r, w, nil, err)
}
}

View File

@ -6,12 +6,10 @@ import (
adapters "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/handler/adapters"
ai "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/handler/ai"
apps "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/handler/apps"
cloud "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/handler/cloud"
core "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/handler/core"
dictionary "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/handler/dictionary"
hpc "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/handler/hpc"
image "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/handler/image"
monitoring "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/handler/monitoring"
schedule "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/handler/schedule"
storage "gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/handler/storage"
@ -55,11 +53,6 @@ func RegisterHandlers(server *rest.Server, serverCtx *svc.ServiceContext) {
Path: "/core/taskList",
Handler: core.TaskListHandler(serverCtx),
},
{
Method: http.MethodGet,
Path: "/core/taskDetail/:taskId",
Handler: core.TaskDetailHandler(serverCtx),
},
{
Method: http.MethodGet,
Path: "/core/jobTotal",
@ -257,16 +250,6 @@ func RegisterHandlers(server *rest.Server, serverCtx *svc.ServiceContext) {
Path: "/cloud/deleteCluster",
Handler: cloud.DeleteClusterHandler(serverCtx),
},
{
Method: http.MethodGet,
Path: "/cloud/noticeTenant",
Handler: cloud.NoticeTenantHandler(serverCtx),
},
{
Method: http.MethodPost,
Path: "/cloud/updateTenant",
Handler: cloud.UpdateTenantHandler(serverCtx),
},
{
Method: http.MethodGet,
Path: "/core/clusterList",
@ -478,37 +461,6 @@ func RegisterHandlers(server *rest.Server, serverCtx *svc.ServiceContext) {
rest.WithPrefix("/pcm/v1"),
)
server.AddRoutes(
[]rest.Route{
{
Method: http.MethodPost,
Path: "/upload",
Handler: image.UploadHandler(serverCtx),
},
{
Method: http.MethodPost,
Path: "/chunk",
Handler: image.ChunkHandler(serverCtx),
},
{
Method: http.MethodGet,
Path: "/image/list",
Handler: image.ImageListHandler(serverCtx),
},
{
Method: http.MethodGet,
Path: "/dataSet/check/:fileMd5",
Handler: image.DataSetCheckHandler(serverCtx),
},
{
Method: http.MethodPost,
Path: "/dataSet/upload",
Handler: image.UploadDataSetHandler(serverCtx),
},
},
rest.WithPrefix("/pcm/v1"),
)
server.AddRoutes(
[]rest.Route{
{
@ -1051,57 +1003,6 @@ func RegisterHandlers(server *rest.Server, serverCtx *svc.ServiceContext) {
rest.WithPrefix("/pcm/v1"),
)
server.AddRoutes(
[]rest.Route{
{
Method: http.MethodGet,
Path: "/apps/list",
Handler: apps.AppListHandler(serverCtx),
},
{
Method: http.MethodGet,
Path: "/apps/distribute/:appName",
Handler: apps.AppDetailHandler(serverCtx),
},
{
Method: http.MethodGet,
Path: "/apps/pods/:appName",
Handler: apps.AppPodsHandler(serverCtx),
},
{
Method: http.MethodGet,
Path: "/apps/getAppByAppName/:appName",
Handler: apps.GetAppByAppNameHandler(serverCtx),
},
{
Method: http.MethodDelete,
Path: "/apps/deleteApp",
Handler: apps.DeleteAppByAppNameHandler(serverCtx),
},
{
Method: http.MethodPut,
Path: "/apps/updateApp",
Handler: apps.UpdateAppByAppNameHandler(serverCtx),
},
{
Method: http.MethodPut,
Path: "/apps/restartApp",
Handler: apps.RestartAppByAppNameHandler(serverCtx),
},
{
Method: http.MethodPut,
Path: "/apps/pauseApp",
Handler: apps.PauseAppByAppNameHandler(serverCtx),
},
{
Method: http.MethodPut,
Path: "/apps/startApp",
Handler: apps.StartAppByAppNameHandler(serverCtx),
},
},
rest.WithPrefix("/pcm/v1"),
)
server.AddRoutes(
[]rest.Route{
{

View File

@ -43,5 +43,23 @@ func (l *CreateClusterLogic) CreateCluster(req *types.ClusterCreateReq) (resp *t
logx.Errorf(tx.Error.Error())
return nil, errors.New("cluster create failed")
}
// push cluster info to adapter
var adapterServer string
l.svcCtx.DbEngin.Raw("select server from t_adapter where id = ?", req.AdapterId).Scan(&adapterServer)
response, err := l.svcCtx.HttpClient.R().
SetBody(&types.ClusterInfo{
Name: req.Name,
Server: req.Server,
Token: req.Token,
MonitorServer: req.MonitorServer,
}).
ForceContentType("application/json").
Post(adapterServer + "/api/v1/cluster/info")
if err != nil {
}
if response.IsError() {
}
return
}

View File

@ -79,7 +79,6 @@ func (l *GetCenterOverviewLogic) GetCenterOverview() (resp *types.CenterOverview
case <-time.After(1 * time.Second):
return resp, nil
}
}
func (l *GetCenterOverviewLogic) updateClusterResource(mu *sync.RWMutex, ch chan<- struct{}, list []*types.AdapterInfo) {
@ -127,7 +126,7 @@ func (l *GetCenterOverviewLogic) updateClusterResource(mu *sync.RWMutex, ch chan
mu.Lock()
if (models.TClusterResource{} == *clusterResource) {
err = l.svcCtx.Scheduler.AiStorages.SaveClusterResources(c.Id, c.Name, clusterType, float64(stat.CpuCoreAvail), float64(stat.CpuCoreTotal),
err = l.svcCtx.Scheduler.AiStorages.SaveClusterResources(adapter.Id, c.Id, c.Name, clusterType, float64(stat.CpuCoreAvail), float64(stat.CpuCoreTotal),
stat.MemAvail, stat.MemTotal, stat.DiskAvail, stat.DiskTotal, float64(stat.GpuAvail), float64(stat.GpuTotal), cardTotal, topsTotal)
if err != nil {
mu.Unlock()
@ -135,8 +134,19 @@ func (l *GetCenterOverviewLogic) updateClusterResource(mu *sync.RWMutex, ch chan
return
}
} else {
if stat.CpuCoreTotal == 0 || stat.MemTotal == 0 || stat.DiskTotal == 0 {
wg.Done()
return
}
clusterResource.CardTotal = cardTotal
clusterResource.CardTopsTotal = topsTotal
clusterResource.CpuAvail = float64(stat.CpuCoreAvail)
clusterResource.CpuTotal = float64(stat.CpuCoreTotal)
clusterResource.MemAvail = stat.MemAvail
clusterResource.MemTotal = stat.MemTotal
clusterResource.DiskAvail = stat.DiskAvail
clusterResource.DiskTotal = stat.DiskTotal
err := l.svcCtx.Scheduler.AiStorages.UpdateClusterResources(clusterResource)
if err != nil {
mu.Unlock()

View File

@ -1,42 +0,0 @@
package apps
import (
"context"
"gitlink.org.cn/JointCloud/pcm-kubernetes/kubernetes"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types"
"github.com/zeromicro/go-zero/core/logx"
)
type AppDetailLogic struct {
logx.Logger
ctx context.Context
svcCtx *svc.ServiceContext
}
func NewAppDetailLogic(ctx context.Context, svcCtx *svc.ServiceContext) *AppDetailLogic {
return &AppDetailLogic{
Logger: logx.WithContext(ctx),
ctx: ctx,
svcCtx: svcCtx,
}
}
func (l *AppDetailLogic) AppDetail(req *types.AppDetailReq) (resp *kubernetes.AppDetailResp, err error) {
resp = &kubernetes.AppDetailResp{}
//调用p端接口查询应用详情
appDetail, err := l.svcCtx.K8sRpc.GetAppDetail(context.Background(), &kubernetes.AppDetailReq{
Namespace: req.NsID,
Name: req.Name,
})
if err != nil {
logx.Errorf("调用p端接口查询应用详情失败err:%v", err)
resp.Code = "500"
resp.Msg = err.Error()
return resp, err
}
resp.Code = "200"
return appDetail, err
}

View File

@ -1,114 +0,0 @@
package apps
import (
"context"
"gitlink.org.cn/JointCloud/pcm-kubernetes/kubernetes"
"gorm.io/datatypes"
"gorm.io/gorm"
"time"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types"
"github.com/zeromicro/go-zero/core/logx"
)
type AppListLogic struct {
logx.Logger
ctx context.Context
svcCtx *svc.ServiceContext
}
func NewAppListLogic(ctx context.Context, svcCtx *svc.ServiceContext) *AppListLogic {
return &AppListLogic{
Logger: logx.WithContext(ctx),
ctx: ctx,
svcCtx: svcCtx,
}
}
type Task struct {
Id int64 `db:"id"` // id
Name string `db:"name"` // 作业名称
Description string `db:"description"` // 作业描述
Status string `db:"status"` // 作业状态
Strategy int64 `db:"strategy"` // 策略
SynergyStatus int64 `db:"synergy_status"` // 协同状态0-未协同、1-已协同)
CommitTime time.Time `db:"commit_time"` // 提交时间
StartTime string `db:"start_time"` // 开始时间
EndTime string `db:"end_time"` // 结束运行时间
RunningTime int64 `db:"running_time"` // 已运行时间(单位秒)
YamlString datatypes.JSON `db:"yaml_string"`
Result string `db:"result"` // 作业结果
DeletedAt gorm.DeletedAt `gorm:"index"`
NsID string `db:"ns_id"`
PName string `db:"p_name"` // p端名称
PId int64 `db:"p_id"` // p端id
}
func (l *AppListLogic) AppList(req *types.AppListReq) (resp *types.AppListResp, err error) {
var tasks []Task
resp = &types.AppListResp{}
l.svcCtx.DbEngin.Raw("select * from task t where t.`ns_id` = ? AND t.`deleted_at` IS NULL ORDER BY t.created_time Desc", req.NsID).Scan(&tasks)
for _, task := range tasks {
//调用p端接口查询应用状态 running、creating、waiting、error、pause
data, err := l.svcCtx.K8sRpc.GetAppByAppName(context.Background(), &kubernetes.DeploymentDetailReq{
Namespace: req.NsID,
Name: task.Name,
})
if err != nil {
logx.Errorf("调用p端接口查询应用失败err:%v", err)
return resp, err
}
minReplicas := ""
maxReplicas := ""
status := "creating"
if data.Data.Deployment != nil {
app := data.Data.Deployment
maxReplicas = app.Metadata.Annotations["deploy.cloud.sealos.io/maxReplicas"]
minReplicas = app.Metadata.Annotations["deploy.cloud.sealos.io/minReplicas"]
if app.Status != nil {
if app.Status.Replicas == nil && app.Status.AvailableReplicas == nil {
status = "pause"
} else if app.Status.Replicas != nil && app.Status.AvailableReplicas == nil {
status = "creating"
} else if *app.Status.Replicas == *app.Status.AvailableReplicas {
status = "running"
}
}
} else if data.Data.StatefulSet != nil {
app := data.Data.StatefulSet
maxReplicas = app.Metadata.Annotations["deploy.cloud.sealos.io/maxReplicas"]
minReplicas = app.Metadata.Annotations["deploy.cloud.sealos.io/minReplicas"]
if app.Status != nil {
replicas := app.Status.Replicas
availableReplicas := app.Status.AvailableReplicas
if *replicas == 0 && *availableReplicas == 0 {
status = "pause"
} else if *replicas == *availableReplicas {
status = "running"
} else if *replicas > *availableReplicas {
status = "creating"
}
}
}
var details []types.AppLocation
sql :=
`select phy.id as participant_id, phy.name as participant_name, c.kind
from cloud c
join sc_participant_phy_info phy on c.participant_id = phy.id
WHERE c.kind in ('Deployment', 'StatefulSet')
and task_id = ?`
l.svcCtx.DbEngin.Raw(sql, task.Id).Scan(&details)
resp.Apps = append(resp.Apps, types.App{
Id: task.Id,
Name: task.Name,
Status: status,
CreateTime: task.CommitTime.Format("2006-01-02 15:04:05"),
MinReplicas: minReplicas,
MaxReplicas: maxReplicas,
AppLocations: details,
})
}
return
}

View File

@ -1,42 +0,0 @@
package apps
import (
"context"
"gitlink.org.cn/JointCloud/pcm-kubernetes/kubernetes"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types"
"github.com/zeromicro/go-zero/core/logx"
)
type AppPodsLogic struct {
logx.Logger
ctx context.Context
svcCtx *svc.ServiceContext
}
func NewAppPodsLogic(ctx context.Context, svcCtx *svc.ServiceContext) *AppPodsLogic {
return &AppPodsLogic{
Logger: logx.WithContext(ctx),
ctx: ctx,
svcCtx: svcCtx,
}
}
func (l *AppPodsLogic) AppPods(req *types.AppDetailReq) (resp *kubernetes.PodDetailResp, err error) {
resp = &kubernetes.PodDetailResp{}
//调用p端接口查询应用详情
podList, err := l.svcCtx.K8sRpc.GetAppPodsByAppName(context.Background(), &kubernetes.AppDetailReq{
Namespace: req.NsID,
Name: req.Name,
})
if err != nil {
logx.Errorf("调用p端接口查询应用详情失败err:%v", err)
resp.Code = "500"
resp.Msg = err.Error()
return resp, err
}
resp.Code = "200"
return podList, err
}

View File

@ -1,49 +0,0 @@
package apps
import (
"context"
"github.com/zeromicro/go-zero/core/logx"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types"
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/constants"
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/models"
)
type DeleteAppByAppNameLogic struct {
logx.Logger
ctx context.Context
svcCtx *svc.ServiceContext
}
func NewDeleteAppByAppNameLogic(ctx context.Context, svcCtx *svc.ServiceContext) *DeleteAppByAppNameLogic {
return &DeleteAppByAppNameLogic{
Logger: logx.WithContext(ctx),
ctx: ctx,
svcCtx: svcCtx,
}
}
func (l *DeleteAppByAppNameLogic) DeleteAppByAppName(req *types.DeleteAppReq) (resp *types.DeleteAppResp, err error) {
resp = &types.DeleteAppResp{}
var task = &Task{}
//查询应用的yamlString
l.svcCtx.DbEngin.Raw(`select * from task where ns_id= ? and name= ? AND deleted_at IS NULL`, req.NsID, req.Name).Scan(&task)
if task.Id == 0 {
resp.Code = 500
resp.Msg = "App not fount"
return resp, err
}
//删除主任务信息
l.svcCtx.DbEngin.Model(&models.Task{}).Where("id", task.Id).Update("status", constants.Deleted)
tx := l.svcCtx.DbEngin.Delete(&models.Task{}, task.Id)
if tx.Error != nil {
return nil, tx.Error
}
// 将子任务状态修改为待删除
tx = l.svcCtx.DbEngin.Model(&models.Cloud{}).Where("task_id", task.Id).Update("status", constants.WaitDelete)
l.svcCtx.DbEngin.Where("task_id = ?", task.Id).Delete(&models.Cloud{})
if tx.Error != nil {
return nil, tx.Error
}
return
}

View File

@ -1,33 +0,0 @@
package apps
import (
"context"
"github.com/zeromicro/go-zero/core/logx"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types"
)
type GetAppByAppNameLogic struct {
logx.Logger
ctx context.Context
svcCtx *svc.ServiceContext
}
func NewGetAppByAppNameLogic(ctx context.Context, svcCtx *svc.ServiceContext) *GetAppByAppNameLogic {
return &GetAppByAppNameLogic{
Logger: logx.WithContext(ctx),
ctx: ctx,
svcCtx: svcCtx,
}
}
func (l *GetAppByAppNameLogic) GetAppByAppName(req *types.AppDetailReq) (resp interface{}, err error) {
var task Task
db := l.svcCtx.DbEngin.Raw("select * from task where ns_id = ? and name = ? AND deleted_at IS NULL", req.NsID, req.Name).Scan(&task)
if db.Error != nil {
logx.Errorf("db error: %v", db.Error)
return
}
resp = task.YamlString
return
}

View File

@ -1,41 +0,0 @@
package apps
import (
"context"
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/constants"
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/models"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types"
"github.com/zeromicro/go-zero/core/logx"
)
type PauseAppByAppNameLogic struct {
logx.Logger
ctx context.Context
svcCtx *svc.ServiceContext
}
func NewPauseAppByAppNameLogic(ctx context.Context, svcCtx *svc.ServiceContext) *PauseAppByAppNameLogic {
return &PauseAppByAppNameLogic{
Logger: logx.WithContext(ctx),
ctx: ctx,
svcCtx: svcCtx,
}
}
func (l *PauseAppByAppNameLogic) PauseAppByAppName(req *types.DeleteAppReq) (resp *types.AppResp, err error) {
resp = &types.AppResp{}
var task = &Task{}
//查询应用的yamlString
l.svcCtx.DbEngin.Raw(`select * from task where ns_id= ? and name= ? AND deleted_at IS NULL`, req.NsID, req.Name).Scan(&task)
if task.Id == 0 {
resp.Code = 500
resp.Msg = "App not fount"
return resp, err
}
// 将子任务状态修改为待暂停
l.svcCtx.DbEngin.Model(&models.Cloud{}).Where("task_id", task.Id).Update("status", constants.WaitPause)
return
}

View File

@ -1,41 +0,0 @@
package apps
import (
"context"
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/constants"
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/models"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types"
"github.com/zeromicro/go-zero/core/logx"
)
type RestartAppByAppNameLogic struct {
logx.Logger
ctx context.Context
svcCtx *svc.ServiceContext
}
func NewRestartAppByAppNameLogic(ctx context.Context, svcCtx *svc.ServiceContext) *RestartAppByAppNameLogic {
return &RestartAppByAppNameLogic{
Logger: logx.WithContext(ctx),
ctx: ctx,
svcCtx: svcCtx,
}
}
func (l *RestartAppByAppNameLogic) RestartAppByAppName(req *types.DeleteAppReq) (resp *types.AppResp, err error) {
resp = &types.AppResp{}
var task = &Task{}
//查询应用的yamlString
l.svcCtx.DbEngin.Raw(`select * from task where ns_id= ? and name= ? AND deleted_at IS NULL`, req.NsID, req.Name).Scan(&task)
if task.Id == 0 {
resp.Code = 500
resp.Msg = "App not fount"
return resp, err
}
// 将子任务状态修改为待重启
l.svcCtx.DbEngin.Model(&models.Cloud{}).Where("task_id", task.Id).Update("status", constants.WaitRestart)
return
}

View File

@ -1,41 +0,0 @@
package apps
import (
"context"
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/constants"
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/models"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types"
"github.com/zeromicro/go-zero/core/logx"
)
type StartAppByAppNameLogic struct {
logx.Logger
ctx context.Context
svcCtx *svc.ServiceContext
}
func NewStartAppByAppNameLogic(ctx context.Context, svcCtx *svc.ServiceContext) *StartAppByAppNameLogic {
return &StartAppByAppNameLogic{
Logger: logx.WithContext(ctx),
ctx: ctx,
svcCtx: svcCtx,
}
}
func (l *StartAppByAppNameLogic) StartAppByAppName(req *types.DeleteAppReq) (resp *types.AppResp, err error) {
resp = &types.AppResp{}
var task = &Task{}
//查询应用的yamlString
l.svcCtx.DbEngin.Raw(`select * from task where ns_id= ? and name= ? AND deleted_at IS NULL`, req.NsID, req.Name).Scan(&task)
if task.Id == 0 {
resp.Code = 500
resp.Msg = "App not fount"
return resp, err
}
// 将子任务状态修改为待启动
l.svcCtx.DbEngin.Model(&models.Cloud{}).Where("task_id", task.Id).Update("status", constants.WaitStart)
return
}

View File

@ -1,30 +0,0 @@
package apps
import (
"context"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types"
"github.com/zeromicro/go-zero/core/logx"
)
type UpdateAppByAppNameLogic struct {
logx.Logger
ctx context.Context
svcCtx *svc.ServiceContext
}
func NewUpdateAppByAppNameLogic(ctx context.Context, svcCtx *svc.ServiceContext) *UpdateAppByAppNameLogic {
return &UpdateAppByAppNameLogic{
Logger: logx.WithContext(ctx),
ctx: ctx,
svcCtx: svcCtx,
}
}
func (l *UpdateAppByAppNameLogic) UpdateAppByAppName(req *types.DeleteAppReq) (resp *types.AppTaskResp, err error) {
// todo: add your logic here and delete this line
return
}

View File

@ -106,6 +106,7 @@ func (l *CommitGeneralTaskLogic) CommitGeneralTask(req *types.GeneralTaskReq) er
sStruct := UnMarshalK8sStruct(s, int64(r.Replica))
unString, _ := sStruct.MarshalJSON()
taskCloud.Id = utils.GenSnowflakeIDUint()
taskCloud.Name = sStruct.GetName() + "-" + sStruct.GetKind()
taskCloud.TaskId = uint(taskModel.Id)
clusterId, _ := strconv.ParseUint(r.ClusterId, 10, 64)
taskCloud.AdapterId = uint(adapterId)

View File

@ -1,56 +0,0 @@
package cloud
import (
"context"
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/models"
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/utils"
"gitlink.org.cn/JointCloud/pcm-kubernetes/kubernetes"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types"
"github.com/zeromicro/go-zero/core/logx"
)
type NoticeTenantLogic struct {
logx.Logger
ctx context.Context
svcCtx *svc.ServiceContext
}
func NewNoticeTenantLogic(ctx context.Context, svcCtx *svc.ServiceContext) *NoticeTenantLogic {
return &NoticeTenantLogic{
Logger: logx.WithContext(ctx),
ctx: ctx,
svcCtx: svcCtx,
}
}
// NoticeTenant 通知更新租户数据
func (l *NoticeTenantLogic) NoticeTenant() (resp *types.CloudResp, err error) {
var tenants []*models.ScTenantInfo
//从p端kubernetes获取租户信息
namespace, err := l.svcCtx.K8sRpc.ListNamespace(context.Background(), &kubernetes.NamespaceListReq{})
if err != nil {
logx.Errorf("获取租户信息失败:%v", err)
return nil, err
}
//先删除所有租户数据
l.svcCtx.DbEngin.Where("type = 0").Delete(models.ScTenantInfo{})
//遍历租户信息
for k, v := range namespace.Data {
tenants = append(tenants, &models.ScTenantInfo{
Id: utils.GenSnowflakeID(),
TenantName: k,
Clusters: v,
Type: 0,
})
}
tx := l.svcCtx.DbEngin.Save(&tenants)
if tx.Error != nil {
logx.Errorf("保存租户信息失败:%v", tx.Error)
return nil, err
}
logx.Info("更新租户数据成功")
return
}

View File

@ -1,49 +0,0 @@
package cloud
import (
"context"
"github.com/zeromicro/go-zero/core/logx"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types"
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/models"
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/utils"
"gitlink.org.cn/JointCloud/pcm-kubernetes/kubernetes"
)
type UpdateTenantLogic struct {
logx.Logger
ctx context.Context
svcCtx *svc.ServiceContext
}
func NewUpdateTenantLogic(ctx context.Context, svcCtx *svc.ServiceContext) *UpdateTenantLogic {
return &UpdateTenantLogic{
Logger: logx.WithContext(ctx),
ctx: ctx,
svcCtx: svcCtx,
}
}
// UpdateTenant 更新租户数据
func (l *UpdateTenantLogic) UpdateTenant(req *types.UpdateTenantReq) (resp *types.CloudResp, err error) {
var tenants []*models.ScTenantInfo
//从p端kubernetes获取租户信息
namespace, err := l.svcCtx.K8sRpc.ListNamespace(context.Background(), &kubernetes.NamespaceListReq{})
if err != nil {
return nil, err
}
//先删除所有租户数据
l.svcCtx.DbEngin.Where("type = 0").Delete(models.ScTenantInfo{})
//遍历租户信息
for k, v := range namespace.Data {
tenants = append(tenants, &models.ScTenantInfo{
Id: utils.GenSnowflakeID(),
TenantName: k,
Clusters: v,
Type: 0,
})
}
l.svcCtx.DbEngin.Save(&tenants)
return
}

View File

@ -46,20 +46,22 @@ func (l *CenterResourcesLogic) CenterResources() (resp *types.CenterResourcesRes
for _, centerIndex := range centersIndex {
// Query the types of resource centers
l.svcCtx.DbEngin.Raw("select name,type as CenterType from t_adapter where id = ?", centerIndex.Id).Scan(&centerIndex)
cpuRawData, err := l.svcCtx.PromClient.GetRawData("center_cpu_utilisation", tracker.AdapterOption{AdapterId: centerIndex.Id})
var clustersName string
l.svcCtx.DbEngin.Raw("SELECT GROUP_CONCAT(name SEPARATOR '|' ) as clustersName from t_cluster where adapter_id = ?", centerIndex.Id).Scan(&clustersName)
cpuRawData, err := l.svcCtx.PromClient.GetRawData("center_cpu_utilisation", tracker.AdapterOption{AdapterId: centerIndex.Id, ClustersName: clustersName})
cpuData := cpuRawData.(model.Vector)
if err != nil {
return nil, err
}
centerIndex.Cpu = cpuData[0].Value.String()
memoryRawData, err := l.svcCtx.PromClient.GetRawData("center_memory_utilisation", tracker.AdapterOption{AdapterId: centerIndex.Id})
memoryRawData, err := l.svcCtx.PromClient.GetRawData("center_memory_utilisation", tracker.AdapterOption{AdapterId: centerIndex.Id, ClustersName: clustersName})
if err != nil {
return nil, err
}
memoryData := memoryRawData.(model.Vector)
centerIndex.Memory = memoryData[0].Value.String()
diskRawData, err := l.svcCtx.PromClient.GetRawData("center_disk_utilisation", tracker.AdapterOption{AdapterId: centerIndex.Id})
diskRawData, err := l.svcCtx.PromClient.GetRawData("center_disk_utilisation", tracker.AdapterOption{AdapterId: centerIndex.Id, ClustersName: clustersName})
if err != nil {
return nil, err
}

View File

@ -80,6 +80,7 @@ func (l *PageListTaskLogic) PageListTask(req *types.PageTaskReq) (resp *types.Pa
for _, ch := range chs {
select {
case <-ch:
case <-time.After(1 * time.Second):
}
}
return

View File

@ -2,12 +2,11 @@ package core
import (
"context"
"github.com/zeromicro/go-zero/core/logx"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types"
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/tracker"
"strconv"
"github.com/zeromicro/go-zero/core/logx"
tool "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/utils"
)
type SyncClusterLoadLogic struct {
@ -25,23 +24,11 @@ func NewSyncClusterLoadLogic(ctx context.Context, svcCtx *svc.ServiceContext) *S
}
func (l *SyncClusterLoadLogic) SyncClusterLoad(req *types.SyncClusterLoadReq) error {
if len(req.ClusterLoadRecords) != 0 {
if nil != req.ClusterLoadRecords {
for _, record := range req.ClusterLoadRecords {
tracker.ClusterCpuUtilisationGauge.WithLabelValues(record.ClusterName, strconv.FormatInt(record.AdapterId, 10)).Set(record.CpuUtilisation)
tracker.ClusterCpuAvailGauge.WithLabelValues(record.ClusterName, strconv.FormatInt(record.AdapterId, 10)).Set(record.CpuAvail)
tracker.ClusterCpuTotalGauge.WithLabelValues(record.ClusterName, strconv.FormatInt(record.AdapterId, 10)).Set(record.CpuTotal)
tracker.ClusterMemoryUtilisationGauge.WithLabelValues(record.ClusterName, strconv.FormatInt(record.AdapterId, 10)).Set(record.MemoryUtilisation)
tracker.ClusterMemoryAvailGauge.WithLabelValues(record.ClusterName, strconv.FormatInt(record.AdapterId, 10)).Set(record.MemoryAvail)
tracker.ClusterMemoryTotalGauge.WithLabelValues(record.ClusterName, strconv.FormatInt(record.AdapterId, 10)).Set(record.MemoryTotal)
tracker.ClusterDiskUtilisationGauge.WithLabelValues(record.ClusterName, strconv.FormatInt(record.AdapterId, 10)).Set(record.DiskUtilisation)
tracker.ClusterDiskAvailGauge.WithLabelValues(record.ClusterName, strconv.FormatInt(record.AdapterId, 10)).Set(record.DiskAvail)
tracker.ClusterDiskTotalGauge.WithLabelValues(record.ClusterName, strconv.FormatInt(record.AdapterId, 10)).Set(record.DiskTotal)
tracker.ClusterPodUtilisationGauge.WithLabelValues(record.ClusterName, strconv.FormatInt(record.AdapterId, 10)).Set(record.PodsUtilisation)
tracker.ClusterPodCountGauge.WithLabelValues(record.ClusterName, strconv.FormatInt(record.AdapterId, 10)).Set(float64(record.PodsCount))
tracker.ClusterPodTotalGauge.WithLabelValues(record.ClusterName, strconv.FormatInt(record.AdapterId, 10)).Set(float64(record.PodsTotal))
var param tracker.ClusterLoadRecord
tool.Convert(record, &param)
tracker.SyncClusterLoad(param)
}
}
return nil

View File

@ -1,147 +0,0 @@
/*
Copyright (c) [2023] [pcm]
[pcm-coordinator] is licensed under Mulan PSL v2.
You can use this software according to the terms and conditions of the Mulan PSL v2.
You may obtain a copy of Mulan PSL v2 at:
http://license.coscl.org.cn/MulanPSL2
THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
EITHER EXPaRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
See the Mulan PSL v2 for more details.
*/
package core
import (
"context"
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/models"
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/tracker"
"gitlink.org.cn/JointCloud/pcm-kubernetes/kubernetesclient"
"time"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types"
"github.com/zeromicro/go-zero/core/logx"
)
type TaskDetailLogic struct {
logx.Logger
ctx context.Context
svcCtx *svc.ServiceContext
}
func NewTaskDetailLogic(ctx context.Context, svcCtx *svc.ServiceContext) *TaskDetailLogic {
return &TaskDetailLogic{
Logger: logx.WithContext(ctx),
ctx: ctx,
svcCtx: svcCtx,
}
}
func (l *TaskDetailLogic) TaskDetail(req *types.TaskDetailReq) (resp *types.TaskDetailResp, err error) {
resp = &types.TaskDetailResp{}
var clouds []models.Cloud
l.svcCtx.DbEngin.Where("task_id = ?", req.TaskId).Find(&clouds)
for _, cloud := range clouds {
if l.svcCtx.K8sRpc == nil {
continue
}
// 查询监控地址
var metricsUrl string
l.svcCtx.DbEngin.Raw(" SELECT metrics_url FROM `sc_participant_phy_info` WHERE id = ? ", cloud.ParticipantId).Scan(&metricsUrl)
//
var pods []*kubernetesclient.Pod
switch cloud.Kind {
case "Job":
jobResult, err := l.svcCtx.K8sRpc.JobDetail(context.Background(), &kubernetesclient.JobDetailReq{
Namespace: cloud.Namespace,
Name: cloud.Name,
})
if err != nil {
return nil, err
}
// 查询出job下关联的pod列表
uid := jobResult.Job.Metadata.Labels["controller-uid"]
LabelSelector := "controller-uid=" + uid
podResp, err := l.svcCtx.K8sRpc.PodList(context.Background(), &kubernetesclient.PodListReq{
ListOptions: &kubernetesclient.ListOptions{
LabelSelector: &LabelSelector,
},
})
if err != nil {
return nil, err
}
pods = podResp.PodList.Items
case "Deployment":
deploymentResult, err := l.svcCtx.K8sRpc.DeploymentDetail(context.Background(), &kubernetesclient.DeploymentDetailReq{
Namespace: cloud.Namespace,
Name: cloud.Name,
})
if err != nil {
return nil, err
}
// 查询出job下关联的pod列表
uid := deploymentResult.Deployment.Spec.Selector.MatchLabels["app"]
LabelSelector := "app=" + uid
podResp, err := l.svcCtx.K8sRpc.PodList(context.Background(), &kubernetesclient.PodListReq{
ListOptions: &kubernetesclient.ListOptions{
LabelSelector: &LabelSelector,
},
})
if err != nil {
return nil, err
}
pods = podResp.PodList.Items
}
podsMetrics(metricsUrl, pods, resp)
}
return resp, nil
}
func podsMetrics(metricsUrl string, pods []*kubernetesclient.Pod, resp *types.TaskDetailResp) {
// 查询每个pod资源使用情况
for _, pod := range pods {
prometheusClient, _ := tracker.NewPrometheus(metricsUrl)
// cpu需求量
podCpuLimit := prometheusClient.GetNamedMetrics([]string{"pod_cpu_resource_limits"}, time.Now(), tracker.PodOption{
PodName: *pod.Metadata.Name,
NamespaceName: *pod.Metadata.Namespace,
})
resp.CpuLimit = metricAdd(resp.CpuLimit, podCpuLimit)
// cpu使用量
podCpuUsage := prometheusClient.GetNamedMetrics([]string{"pod_cpu_usage"}, time.Now(), tracker.PodOption{
PodName: *pod.Metadata.Name,
NamespaceName: *pod.Metadata.Namespace,
})
resp.CpuCores = metricAdd(resp.CpuCores, podCpuUsage)
// 内存使用量
podMemoryUsage := prometheusClient.GetNamedMetrics([]string{"pod_memory_usage"}, time.Now(), tracker.PodOption{
PodName: *pod.Metadata.Name,
NamespaceName: *pod.Metadata.Namespace,
})
resp.MemoryTotal = metricAdd(resp.MemoryTotal, podMemoryUsage)
// 内存需求量
podMemoryLimit := prometheusClient.GetNamedMetrics([]string{"pod_memory_resource_limits"}, time.Now(), tracker.PodOption{
PodName: *pod.Metadata.Name,
NamespaceName: *pod.Metadata.Namespace,
})
resp.MemoryLimit = metricAdd(resp.MemoryLimit, podMemoryLimit)
}
resp.MemoryTotal = resp.MemoryTotal / float64(1073741824)
resp.MemoryLimit = resp.MemoryLimit / float64(1073741824)
}
func metricAdd(z float64, metric []tracker.Metric) float64 {
if metric[0].MetricValues != nil {
z = z + metric[0].MetricValues[0].Sample.Value()
}
return z
}

View File

@ -32,18 +32,22 @@ func (l *TaskDetailsLogic) TaskDetails(req *types.FId) (resp *types.TaskDetailsR
if errors.Is(l.svcCtx.DbEngin.Where("id", req.Id).First(&task).Error, gorm.ErrRecordNotFound) {
return nil, errors.New("记录不存在")
}
clusterIds := make([]int64, 0)
clusterIds := make([]string, 0)
var cList []*types.ClusterInfo
var subList []*types.SubTaskInfo
switch task.AdapterTypeDict {
case 0:
l.svcCtx.DbEngin.Table("task_cloud").Select("cluster_id").Where("task_id", task.Id).Scan(&clusterIds)
if len(clusterIds) <= 0 {
l.svcCtx.DbEngin.Table("task_vm").Select("cluster_id").Where("task_id", task.Id).Find(&clusterIds)
l.svcCtx.DbEngin.Table("task_cloud").Where("task_id", task.Id).Scan(&subList)
if len(subList) <= 0 {
l.svcCtx.DbEngin.Table("task_vm").Where("task_id", task.Id).Find(&subList)
}
case 1:
l.svcCtx.DbEngin.Table("task_ai").Select("cluster_id").Where("task_id", task.Id).Scan(&clusterIds)
l.svcCtx.DbEngin.Table("task_ai").Where("task_id", task.Id).Scan(&subList)
case 2:
l.svcCtx.DbEngin.Table("task_hpc").Select("cluster_id").Where("task_id", task.Id).Scan(&clusterIds)
l.svcCtx.DbEngin.Table("task_hpc").Where("task_id", task.Id).Scan(&subList)
}
for _, sub := range subList {
clusterIds = append(clusterIds, sub.ClusterId)
}
err = l.svcCtx.DbEngin.Table("t_cluster").Where("id in ?", clusterIds).Scan(&cList).Error
if err != nil {
@ -51,5 +55,6 @@ func (l *TaskDetailsLogic) TaskDetails(req *types.FId) (resp *types.TaskDetailsR
}
utils.Convert(&task, &resp)
resp.ClusterInfos = cList
resp.SubTaskInfos = subList
return
}

View File

@ -1,42 +0,0 @@
/*
Copyright (c) [2023] [pcm]
[pcm-coordinator] is licensed under Mulan PSL v2.
You can use this software according to the terms and conditions of the Mulan PSL v2.
You may obtain a copy of Mulan PSL v2 at:
http://license.coscl.org.cn/MulanPSL2
THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
EITHER EXPaRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
See the Mulan PSL v2 for more details.
*/
package image
import (
"context"
"github.com/zeromicro/go-zero/core/logx"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
)
type ChunkLogic struct {
logx.Logger
ctx context.Context
svcCtx *svc.ServiceContext
}
func NewChunkLogic(ctx context.Context, svcCtx *svc.ServiceContext) *ChunkLogic {
return &ChunkLogic{
Logger: logx.WithContext(ctx),
ctx: ctx,
svcCtx: svcCtx,
}
}
func (l *ChunkLogic) Chunk() error {
// todo: add your logic here and delete this line
return nil
}

View File

@ -1,51 +0,0 @@
/*
Copyright (c) [2023] [pcm]
[pcm-coordinator] is licensed under Mulan PSL v2.
You can use this software according to the terms and conditions of the Mulan PSL v2.
You may obtain a copy of Mulan PSL v2 at:
http://license.coscl.org.cn/MulanPSL2
THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
EITHER EXPaRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
See the Mulan PSL v2 for more details.
*/
package image
import (
"context"
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/models"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types"
"github.com/zeromicro/go-zero/core/logx"
)
type DataSetCheckLogic struct {
logx.Logger
ctx context.Context
svcCtx *svc.ServiceContext
}
func NewDataSetCheckLogic(ctx context.Context, svcCtx *svc.ServiceContext) *DataSetCheckLogic {
return &DataSetCheckLogic{
Logger: logx.WithContext(ctx),
ctx: ctx,
svcCtx: svcCtx,
}
}
func (l *DataSetCheckLogic) DataSetCheck(req *types.CheckReq) (resp *types.CheckResp, err error) {
resp = &types.CheckResp{}
var dataSets []models.File
l.svcCtx.DbEngin.Find(&dataSets).Where("md5", req.FileMd5)
if len(dataSets) != 0 {
resp.Exist = true
} else {
resp.Exist = false
}
return resp, nil
}

View File

@ -1,52 +0,0 @@
/*
Copyright (c) [2023] [pcm]
[pcm-coordinator] is licensed under Mulan PSL v2.
You can use this software according to the terms and conditions of the Mulan PSL v2.
You may obtain a copy of Mulan PSL v2 at:
http://license.coscl.org.cn/MulanPSL2
THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
EITHER EXPaRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
See the Mulan PSL v2 for more details.
*/
package image
import (
"context"
"github.com/zeromicro/go-zero/core/logx"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types"
)
type ImageListLogic struct {
logx.Logger
ctx context.Context
svcCtx *svc.ServiceContext
}
type ImageTags struct {
Name string `json:"name"`
Tags []string `json:"tags" copier:"tags"`
}
func NewImageListLogic(ctx context.Context, svcCtx *svc.ServiceContext) *ImageListLogic {
return &ImageListLogic{
Logger: logx.WithContext(ctx),
ctx: ctx,
svcCtx: svcCtx,
}
}
func (l *ImageListLogic) ImageList() (resp *types.ImageListResp, err error) {
var images []string
l.svcCtx.DbEngin.Raw("select distinct name from file where kind = 'image'").Scan(&images)
result := types.ImageListResp{
Repositories: images,
}
return &result, nil
}

View File

@ -1,40 +0,0 @@
/*
Copyright (c) [2023] [pcm]
[pcm-coordinator] is licensed under Mulan PSL v2.
You can use this software according to the terms and conditions of the Mulan PSL v2.
You may obtain a copy of Mulan PSL v2 at:
http://license.coscl.org.cn/MulanPSL2
THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
EITHER EXPaRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
See the Mulan PSL v2 for more details.
*/
package image
import (
"context"
"github.com/zeromicro/go-zero/core/logx"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
)
type UploadDataSetLogic struct {
logx.Logger
ctx context.Context
svcCtx *svc.ServiceContext
}
func NewUploadDataSetLogic(ctx context.Context, svcCtx *svc.ServiceContext) *UploadDataSetLogic {
return &UploadDataSetLogic{
Logger: logx.WithContext(ctx),
ctx: ctx,
svcCtx: svcCtx,
}
}
func (l *UploadDataSetLogic) UploadDataSet() error {
return nil
}

View File

@ -1,42 +0,0 @@
/*
Copyright (c) [2023] [pcm]
[pcm-coordinator] is licensed under Mulan PSL v2.
You can use this software according to the terms and conditions of the Mulan PSL v2.
You may obtain a copy of Mulan PSL v2 at:
http://license.coscl.org.cn/MulanPSL2
THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
EITHER EXPaRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
See the Mulan PSL v2 for more details.
*/
package image
import (
"context"
"github.com/zeromicro/go-zero/core/logx"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
)
type UploadLogic struct {
logx.Logger
ctx context.Context
svcCtx *svc.ServiceContext
}
func NewUploadLogic(ctx context.Context, svcCtx *svc.ServiceContext) *UploadLogic {
return &UploadLogic{
Logger: logx.WithContext(ctx),
ctx: ctx,
svcCtx: svcCtx,
}
}
func (l *UploadLogic) Upload() error {
// todo: add your logic here and delete this line
return nil
}

View File

@ -64,7 +64,13 @@ func (l *ScheduleSubmitLogic) ScheduleSubmit(req *types.ScheduleReq) (resp *type
synergystatus = 1
}
strategyCode, err := l.svcCtx.Scheduler.AiStorages.GetStrategyCode(req.AiOption.Strategy)
if err != nil {
return nil, err
}
adapterName, err := l.svcCtx.Scheduler.AiStorages.GetAdapterNameById(rs[0].AdapterId)
if err != nil {
return nil, err
}
id, err := l.svcCtx.Scheduler.AiStorages.SaveTask(req.AiOption.TaskName, strategyCode, synergystatus)
if err != nil {
return nil, err
@ -84,11 +90,13 @@ func (l *ScheduleSubmitLogic) ScheduleSubmit(req *types.ScheduleReq) (resp *type
clusterName, _ := l.svcCtx.Scheduler.AiStorages.GetClusterNameById(r.ClusterId)
err := l.svcCtx.Scheduler.AiStorages.SaveAiTask(id, opt, r.ClusterId, clusterName, r.JobId, constants.Saved, r.Msg)
err := l.svcCtx.Scheduler.AiStorages.SaveAiTask(id, opt, adapterName, r.ClusterId, clusterName, r.JobId, constants.Saved, r.Msg)
if err != nil {
return nil, err
}
l.svcCtx.Scheduler.AiStorages.AddNoticeInfo(r.AdapterId, adapterName, r.ClusterId, clusterName, r.TaskName, "create", "任务创建中")
resp.Results = append(resp.Results, scheResult)
}

View File

@ -2,10 +2,12 @@ package database
import (
"github.com/zeromicro/go-zero/core/logx"
clientCore "gitlink.org.cn/JointCloud/pcm-coordinator/api/client"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/scheduler/schedulers/option"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types"
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/constants"
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/models"
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/tracker"
"gorm.io/gorm"
"strconv"
"time"
@ -45,6 +47,16 @@ func (s *AiStorage) GetClusterNameById(id string) (string, error) {
return name, nil
}
func (s *AiStorage) GetAdapterNameById(id string) (string, error) {
var name string
tx := s.DbEngin.Raw("select `name` from t_adapter where `id` = ?", id).Scan(&name)
if tx.Error != nil {
logx.Errorf(tx.Error.Error())
return "", tx.Error
}
return name, nil
}
func (s *AiStorage) GetAdapterIdsByType(adapterType string) ([]string, error) {
var list []types.AdapterInfo
var ids []string
@ -101,7 +113,7 @@ func (s *AiStorage) SaveTask(name string, strategyCode int64, synergyStatus int6
return taskModel.Id, nil
}
func (s *AiStorage) SaveAiTask(taskId int64, option *option.AiOption, clusterId string, clusterName string, jobId string, status string, msg string) error {
func (s *AiStorage) SaveAiTask(taskId int64, option *option.AiOption, adapterName string, clusterId string, clusterName string, jobId string, status string, msg string) error {
// 构建主任务结构体
aId, err := strconv.ParseInt(option.AdapterId, 10, 64)
if err != nil {
@ -115,6 +127,7 @@ func (s *AiStorage) SaveAiTask(taskId int64, option *option.AiOption, clusterId
aiTaskModel := models.TaskAi{
TaskId: taskId,
AdapterId: aId,
AdapterName: adapterName,
ClusterId: cId,
ClusterName: clusterName,
Name: option.TaskName,
@ -186,13 +199,18 @@ func (s *AiStorage) GetClusterResourcesById(clusterId string) (*models.TClusterR
return &clusterResource, nil
}
func (s *AiStorage) SaveClusterResources(clusterId string, clusterName string, clusterType int64, cpuAvail float64, cpuTotal float64,
func (s *AiStorage) SaveClusterResources(adapterId string, clusterId string, clusterName string, clusterType int64, cpuAvail float64, cpuTotal float64,
memAvail float64, memTotal float64, diskAvail float64, diskTotal float64, gpuAvail float64, gpuTotal float64, cardTotal int64, topsTotal float64) error {
cId, err := strconv.ParseInt(clusterId, 10, 64)
if err != nil {
return err
}
aId, err := strconv.ParseInt(adapterId, 10, 64)
if err != nil {
return err
}
clusterResource := models.TClusterResource{
AdapterId: aId,
ClusterId: cId,
ClusterName: clusterName,
ClusterType: clusterType,
@ -211,14 +229,45 @@ func (s *AiStorage) SaveClusterResources(clusterId string, clusterName string, c
if tx.Error != nil {
return tx.Error
}
// prometheus
param := tracker.ClusterLoadRecord{
AdapterId: aId,
ClusterName: clusterName,
CpuAvail: cpuAvail,
CpuTotal: cpuTotal,
CpuUtilisation: clusterResource.CpuAvail / clusterResource.CpuTotal,
MemoryAvail: memAvail,
MemoryTotal: memTotal,
MemoryUtilisation: clusterResource.MemAvail / clusterResource.MemTotal,
DiskAvail: diskAvail,
DiskTotal: diskTotal,
DiskUtilisation: clusterResource.DiskAvail / clusterResource.DiskTotal,
}
tracker.SyncClusterLoad(param)
return nil
}
func (s *AiStorage) UpdateClusterResources(clusterResource *models.TClusterResource) error {
tx := s.DbEngin.Model(clusterResource).Updates(clusterResource)
tx := s.DbEngin.Where("cluster_id = ?", clusterResource.ClusterId).Updates(clusterResource)
if tx.Error != nil {
return tx.Error
}
// prometheus
param := tracker.ClusterLoadRecord{
AdapterId: clusterResource.AdapterId,
ClusterName: clusterResource.ClusterName,
CpuAvail: clusterResource.CpuAvail,
CpuTotal: clusterResource.CpuTotal,
CpuUtilisation: clusterResource.CpuAvail / clusterResource.CpuTotal,
MemoryAvail: clusterResource.MemAvail,
MemoryTotal: clusterResource.MemTotal,
MemoryUtilisation: clusterResource.MemAvail / clusterResource.MemTotal,
DiskAvail: clusterResource.DiskAvail,
DiskTotal: clusterResource.DiskTotal,
DiskUtilisation: clusterResource.DiskAvail / clusterResource.DiskTotal,
}
tracker.SyncClusterLoad(param)
return nil
}
@ -244,3 +293,28 @@ func (s *AiStorage) GetStrategyCode(name string) (int64, error) {
}
return strategy, nil
}
func (s *AiStorage) AddNoticeInfo(adapterId string, adapterName string, clusterId string, clusterName string, taskName string, noticeType string, incident string) {
aId, err := strconv.ParseInt(adapterId, 10, 64)
if err != nil {
return
}
cId, err := strconv.ParseInt(clusterId, 10, 64)
if err != nil {
return
}
noticeInfo := clientCore.NoticeInfo{
AdapterId: aId,
AdapterName: adapterName,
ClusterId: cId,
ClusterName: clusterName,
NoticeType: noticeType,
TaskName: taskName,
Incident: incident,
CreatedTime: time.Now(),
}
result := s.DbEngin.Table("t_notice").Create(&noticeInfo)
if result.Error != nil {
logx.Errorf("Task creation failure, err: %v", result.Error)
}
}

View File

@ -45,6 +45,8 @@ type AiScheduler struct {
}
type AiResult struct {
AdapterId string
TaskName string
JobId string
ClusterId string
Strategy string
@ -190,6 +192,8 @@ func (as *AiScheduler) AssignTask(clusters []*strategy.AssignedCluster) (interfa
result, _ = convertType(resp)
mu.Unlock()
result.AdapterId = opt.AdapterId
result.TaskName = opt.TaskName
result.Replica = c.Replicas
result.ClusterId = c.ClusterId
result.Strategy = as.option.StrategyName
@ -222,6 +226,10 @@ func (as *AiScheduler) AssignTask(clusters []*strategy.AssignedCluster) (interfa
if err != nil {
return nil, errors.New("database add failed: " + err.Error())
}
adapterName, err := as.AiStorages.GetAdapterNameById(as.option.AdapterId)
if err != nil {
return nil, err
}
var errmsg string
for _, err := range errs {
@ -234,7 +242,7 @@ func (as *AiScheduler) AssignTask(clusters []*strategy.AssignedCluster) (interfa
clusterName, _ := as.AiStorages.GetClusterNameById(e.clusterId)
err := as.AiStorages.SaveAiTask(taskId, as.option, e.clusterId, clusterName, "", constants.Failed, msg)
err := as.AiStorages.SaveAiTask(taskId, as.option, adapterName, e.clusterId, clusterName, "", constants.Failed, msg)
if err != nil {
return nil, errors.New("database add failed: " + err.Error())
}
@ -246,14 +254,14 @@ func (as *AiScheduler) AssignTask(clusters []*strategy.AssignedCluster) (interfa
if s.Msg != "" {
msg := fmt.Sprintf("clusterId: %v , error: %v \n", s.ClusterId, s.Msg)
errmsg += msg
err := as.AiStorages.SaveAiTask(taskId, as.option, s.ClusterId, clusterName, "", constants.Failed, msg)
err := as.AiStorages.SaveAiTask(taskId, as.option, adapterName, s.ClusterId, clusterName, "", constants.Failed, msg)
if err != nil {
return nil, errors.New("database add failed: " + err.Error())
}
} else {
msg := fmt.Sprintf("clusterId: %v , submitted successfully, jobId: %v \n", s.ClusterId, s.JobId)
errmsg += msg
err := as.AiStorages.SaveAiTask(taskId, as.option, s.ClusterId, clusterName, s.JobId, constants.Saved, msg)
err := as.AiStorages.SaveAiTask(taskId, as.option, adapterName, s.ClusterId, clusterName, s.JobId, constants.Saved, msg)
if err != nil {
return nil, errors.New("database add failed: " + err.Error())
}

View File

@ -13,6 +13,7 @@ import (
"gitlink.org.cn/JointCloud/pcm-modelarts/client/modelartsservice"
"gitlink.org.cn/JointCloud/pcm-octopus/octopusclient"
"strconv"
"sync"
)
const (
@ -24,6 +25,8 @@ const (
type AiService struct {
AiExecutorAdapterMap map[string]map[string]executor.AiExecutor
AiCollectorAdapterMap map[string]map[string]collector.AiCollector
Storage *database.AiStorage
mu sync.Mutex
}
func NewAiService(conf *config.Config, storages *database.AiStorage) (*AiService, error) {
@ -35,12 +38,16 @@ func NewAiService(conf *config.Config, storages *database.AiStorage) (*AiService
aiService := &AiService{
AiExecutorAdapterMap: make(map[string]map[string]executor.AiExecutor),
AiCollectorAdapterMap: make(map[string]map[string]collector.AiCollector),
Storage: storages,
}
for _, id := range adapterIds {
clusters, err := storages.GetClustersByAdapterId(id)
if err != nil {
return nil, err
}
if len(clusters.List) == 0 {
continue
}
exeClusterMap, colClusterMap := InitAiClusterMap(conf, clusters.List)
aiService.AiExecutorAdapterMap[id] = exeClusterMap
aiService.AiCollectorAdapterMap[id] = colClusterMap
@ -78,3 +85,11 @@ func InitAiClusterMap(conf *config.Config, clusters []types.ClusterInfo) (map[st
return executorMap, collectorMap
}
//func (a *AiService) AddCluster() error {
//
//}
//
//func (a *AiService) AddAdapter() error {
//
//}

View File

@ -29,7 +29,6 @@ import (
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/tracker"
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/utils"
"gitlink.org.cn/JointCloud/pcm-coordinator/rpc/client/participantservice"
"gitlink.org.cn/JointCloud/pcm-kubernetes/kubernetesclient"
"gitlink.org.cn/JointCloud/pcm-modelarts/client/imagesservice"
"gitlink.org.cn/JointCloud/pcm-modelarts/client/modelartsservice"
"gitlink.org.cn/JointCloud/pcm-octopus/octopusclient"
@ -55,7 +54,6 @@ type ServiceContext struct {
OctopusRpc octopusclient.Octopus
CephRpc cephclient.Ceph
OpenstackRpc openstackclient.Openstack
K8sRpc kubernetesclient.Kubernetes
MonitorClient map[int64]tracker.Prometheus
ParticipantRpc participantservice.ParticipantService
PromClient tracker.Prometheus
@ -91,7 +89,7 @@ func NewServiceContext(c config.Config) *ServiceContext {
NamingStrategy: schema.NamingStrategy{
SingularTable: true, // 使用单数表名,启用该选项,此时,`User` 的表名应该是 `t_user`
},
Logger: logger.Default.LogMode(logger.Error),
Logger: logger.Default.LogMode(logger.Info),
})
if err != nil {
logx.Errorf("数据库连接失败, err%v", err)
@ -126,8 +124,8 @@ func NewServiceContext(c config.Config) *ServiceContext {
scheduler := scheduler.NewSchdlr(aiService, storage)
return &ServiceContext{
Cron: cron.New(cron.WithSeconds()),
DbEngin: dbEngin,
Cron: cron.New(cron.WithSeconds()),
Config: c,
RedisClient: redisClient,
ModelArtsRpc: modelartsservice.NewModelArtsService(zrpc.MustNewClient(c.ModelArtsRpcConf)),
@ -136,7 +134,6 @@ func NewServiceContext(c config.Config) *ServiceContext {
ACRpc: hpcacclient.NewHpcAC(zrpc.MustNewClient(c.ACRpcConf)),
OctopusRpc: octopusclient.NewOctopus(zrpc.MustNewClient(c.OctopusRpcConf)),
OpenstackRpc: openstackclient.NewOpenstack(zrpc.MustNewClient(c.OpenstackRpcConf)),
K8sRpc: kubernetesclient.NewKubernetes(zrpc.MustNewClient(c.K8sNativeConf)),
MonitorClient: make(map[int64]tracker.Prometheus),
ParticipantRpc: participantservice.NewParticipantService(zrpc.MustNewClient(c.PcmCoreRpcConf)),
PromClient: promClient,

View File

@ -1166,6 +1166,16 @@ type TaskDetailsResp struct {
Strategy int64 `json:"strategy"`
SynergyStatus int64 `json:"synergyStatus"`
ClusterInfos []*ClusterInfo `json:"clusterInfos"`
SubTaskInfos []*SubTaskInfo `json:"subTaskInfos"`
}
type SubTaskInfo struct {
Id string `json:"id" db:"id"`
Name string `json:"name" db:"name"`
ClusterId string `json:"clusterId" db:"cluster_id"`
ClusterName string `json:"clusterName" db:"cluster_name"`
Status string `json:"status" db:"status"`
Remark string `json:"remark" db:"remark"`
}
type CommitHpcTaskReq struct {

View File

@ -68,7 +68,6 @@ podTemplate(label: label, containers: [
cd ${code_path}
sed -i 's#image_name#${image}#' ${project_name}.yaml
sed -i 's#secret_name#${secret_name}#' ${project_name}.yaml
sed -i 's#nacos_host#${nacos_host}#' ${project_name}.yaml
cat ${project_name}.yaml
kubectl apply -f ${project_name}.yaml
"""

View File

@ -67,7 +67,6 @@ podTemplate(label: label, containers: [
cd ${code_path}
sed -i 's#image_name#${image}#' ${project_name}.yaml
sed -i 's#secret_name#${secret_name}#' ${project_name}.yaml
sed -i 's#nacos_host#${nacos_host}#' ${project_name}.yaml
cat ${project_name}.yaml
kubectl apply -f ${project_name}.yaml
"""

12
go.mod
View File

@ -5,9 +5,7 @@ go 1.21
retract v0.1.20-0.20240319015239-6ae13da05255
require (
github.com/JCCE-nudt/zero-contrib/zrpc/registry/nacos v0.0.0-20230419021610-13bbc83fbc3c
github.com/Masterminds/squirrel v1.5.4
github.com/aliyun/alibaba-cloud-sdk-go v1.61.1800
github.com/bwmarrin/snowflake v0.3.0
github.com/ghodss/yaml v1.0.0
github.com/go-redis/redis/v8 v8.11.5
@ -15,14 +13,12 @@ require (
github.com/go-sql-driver/mysql v1.7.1
github.com/jinzhu/copier v0.4.0
github.com/json-iterator/go v1.1.12
github.com/nacos-group/nacos-sdk-go/v2 v2.2.5
github.com/pkg/errors v0.9.1
github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.71.2
github.com/prometheus/alertmanager v0.27.0
github.com/prometheus/client_golang v1.19.0
github.com/prometheus/common v0.52.2
github.com/robfig/cron/v3 v3.0.1
github.com/rs/zerolog v1.28.0
github.com/zeromicro/go-zero v1.6.3
gitlink.org.cn/JointCloud/pcm-ac v0.0.0-20240426095603-549fefd8bece
gitlink.org.cn/JointCloud/pcm-kubernetes v0.0.0-20240301071143-347480abff2c
@ -46,17 +42,11 @@ require (
require (
github.com/alecthomas/kingpin/v2 v2.4.0 // indirect
github.com/alecthomas/units v0.0.0-20231202071711-9a357b53e9c9 // indirect
github.com/alibabacloud-go/debug v0.0.0-20190504072949-9472017b5c68 // indirect
github.com/alibabacloud-go/tea v1.1.17 // indirect
github.com/alibabacloud-go/tea-utils v1.4.4 // indirect
github.com/aliyun/alibabacloud-dkms-gcs-go-sdk v0.2.2 // indirect
github.com/aliyun/alibabacloud-dkms-transfer-go-sdk v0.1.7 // indirect
github.com/armon/go-metrics v0.4.1 // indirect
github.com/asaskevich/govalidator v0.0.0-20230301143203-a9d515a09cc2 // indirect
github.com/aws/aws-sdk-go v1.50.8 // indirect
github.com/benbjohnson/clock v1.3.5 // indirect
github.com/beorn7/perks v1.0.1 // indirect
github.com/buger/jsonparser v1.1.1 // indirect
github.com/cenkalti/backoff/v4 v4.3.0 // indirect
github.com/cespare/xxhash/v2 v2.3.0 // indirect
github.com/coreos/go-semver v0.3.1 // indirect
@ -166,8 +156,6 @@ require (
google.golang.org/genproto/googleapis/api v0.0.0-20240401170217-c3f982113cda // indirect
google.golang.org/genproto/googleapis/rpc v0.0.0-20240401170217-c3f982113cda // indirect
gopkg.in/inf.v0 v0.9.1 // indirect
gopkg.in/ini.v1 v1.67.0 // indirect
gopkg.in/natefinch/lumberjack.v2 v2.2.1 // indirect
gopkg.in/yaml.v2 v2.4.0 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
k8s.io/api v0.29.3 // indirect

988
go.sum

File diff suppressed because it is too large Load Diff

View File

@ -1,241 +0,0 @@
/*
Copyright (c) [2023] [pcm]
[pcm-coordinator] is licensed under Mulan PSL v2.
You can use this software according to the terms and conditions of the Mulan PSL v2.
You may obtain a copy of Mulan PSL v2 at:
http://license.coscl.org.cn/MulanPSL2
THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
EITHER EXPaRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
See the Mulan PSL v2 for more details.
*/
package nacos
import (
"context"
"encoding/json"
"fmt"
"github.com/JCCE-nudt/zero-contrib/zrpc/registry/nacos"
"github.com/nacos-group/nacos-sdk-go/v2/clients"
"github.com/nacos-group/nacos-sdk-go/v2/clients/nacos_client"
"github.com/nacos-group/nacos-sdk-go/v2/clients/naming_client/naming_cache"
"github.com/nacos-group/nacos-sdk-go/v2/clients/naming_client/naming_proxy"
"github.com/nacos-group/nacos-sdk-go/v2/common/constant"
"github.com/nacos-group/nacos-sdk-go/v2/common/http_agent"
"github.com/nacos-group/nacos-sdk-go/v2/common/nacos_server"
"github.com/nacos-group/nacos-sdk-go/v2/common/security"
"github.com/nacos-group/nacos-sdk-go/v2/vo"
"github.com/zeromicro/go-zero/core/logx"
"github.com/zeromicro/go-zero/rest"
"github.com/zeromicro/go-zero/zrpc"
nacosVo "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/helper/nacos/vo"
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/utils"
"net/http"
"sync"
)
type (
BootstrapConfig struct {
NacosConfig NacosConfig
}
ListenConfig func(data string)
NacosServerConfig struct {
IpAddr string
Port uint64
}
NacosClientConfig struct {
NamespaceId string
TimeoutMs uint64
NotLoadCacheAtStart bool
LogDir string
CacheDir string
LogLevel string
}
NacosConfig struct {
ServerConfigs []NacosServerConfig
ClientConfig NacosClientConfig
DataId string
Group string
}
)
// NamingClient ...
type NamingClient struct {
nacos_client.INacosClient
ctx context.Context
cancel context.CancelFunc
serviceProxy naming_proxy.INamingProxy
serviceInfoHolder *naming_cache.ServiceInfoHolder
clientConfig constant.ClientConfig
}
func (n *NacosConfig) Discovery(c *zrpc.RpcServerConf) {
sc, cc := n.buildConfig()
opts := nacos.NewNacosConfig(c.Name, c.ListenOn, sc, &cc)
opts.Group = n.Group
err := nacos.RegisterService(opts)
if err != nil {
panic(err)
}
}
func (n *NacosConfig) DiscoveryRest(c *rest.RestConf) {
sc, cc := n.buildConfig()
opts := nacos.NewNacosConfig(c.Name, fmt.Sprintf("%s:%d", c.Host, c.Port), sc, &cc)
err := nacos.RegisterService(opts)
if err != nil {
panic(err)
}
}
func (n *NacosConfig) InitConfig(listenConfigCallback ListenConfig) string {
//nacos server
sc, cc := n.buildConfig()
pa := vo.NacosClientParam{
ClientConfig: &cc,
ServerConfigs: sc,
}
configClient, err := clients.NewConfigClient(pa)
if err != nil {
panic(err)
}
//获取配置中心内容
content, err := configClient.GetConfig(vo.ConfigParam{
DataId: n.DataId,
Group: n.Group,
})
if err != nil {
panic(err)
}
//设置配置监听
if err = configClient.ListenConfig(vo.ConfigParam{
DataId: n.DataId,
Group: n.Group,
OnChange: func(namespace, group, dataId, data string) {
//配置文件产生变化就会触发
if len(data) == 0 {
logx.Errorf("listen nacos data nil error , namespace : %sgroup : %s , dataId : %s , data : %s", namespace, group, dataId, data)
return
}
listenConfigCallback(data)
},
}); err != nil {
panic(err)
}
if len(content) == 0 {
panic("read nacos nacos content err , content is nil")
}
return content
}
func (n *NacosConfig) buildConfig() ([]constant.ServerConfig, constant.ClientConfig) {
var sc []constant.ServerConfig
if len(n.ServerConfigs) == 0 {
panic("nacos server no set")
}
for _, serveConfig := range n.ServerConfigs {
sc = append(sc, constant.ServerConfig{
Port: serveConfig.Port,
IpAddr: serveConfig.IpAddr,
},
)
}
//nacos client
cc := constant.ClientConfig{
NamespaceId: n.ClientConfig.NamespaceId,
TimeoutMs: n.ClientConfig.TimeoutMs,
NotLoadCacheAtStart: n.ClientConfig.NotLoadCacheAtStart,
LogDir: n.ClientConfig.LogDir,
CacheDir: n.ClientConfig.CacheDir,
LogLevel: n.ClientConfig.LogLevel,
}
return sc, cc
}
type NacosServer struct {
sync.RWMutex
securityLogin security.AuthClient
serverList []constant.ServerConfig
httpAgent http_agent.IHttpAgent
timeoutMs uint64
endpoint string
lastSrvRefTime int64
vipSrvRefInterMills int64
contextPath string
currentIndex int32
ServerSrcChangeSignal chan struct{}
}
// GetAllServicesInfo Get all Services
func (n *NacosConfig) GetAllServicesInfo() (nacosVo.NacosServiceList, error) {
nacosServiceList := nacosVo.NacosServiceList{}
api := constant.SERVICE_BASE_PATH + "/catalog/services"
nacosServer, err := nacos_server.NewNacosServer(context.Background(),
[]constant.ServerConfig{*constant.NewServerConfig(n.ServerConfigs[0].IpAddr, n.ServerConfigs[0].Port)},
constant.ClientConfig{},
&http_agent.HttpAgent{},
1000,
"")
if err != nil {
return nacosServiceList, err
}
params := map[string]string{}
params["namespaceId"] = n.ClientConfig.NamespaceId
params["groupName"] = ""
params["pageNo"] = "1"
params["pageSize"] = "10000"
result, err := nacosServer.ReqApi(api, params, http.MethodGet, constant.ClientConfig{})
if err != nil {
logx.Errorf("Failed to get all services ,error: <%+v>, namespace : <%s> ", err, n.ClientConfig.NamespaceId)
return nacosServiceList, err
}
err1 := json.Unmarshal([]byte(result), &nacosServiceList)
if err1 != nil {
logx.Errorf("Conversion failed ,error: %+v, str: %s", err1, result)
return nacosServiceList, err
}
return nacosServiceList, err
}
// GetAllGroupName Get all GroupName
func (n *NacosConfig) GetAllGroupName() (nacosGroupList nacosVo.NacosGroupList, err error) {
nacosServiceList := nacosVo.NacosServiceList{}
api := constant.SERVICE_BASE_PATH + "/catalog/services"
nacosServer, err := nacos_server.NewNacosServer(context.Background(),
[]constant.ServerConfig{*constant.NewServerConfig(n.ServerConfigs[0].IpAddr, n.ServerConfigs[0].Port)},
constant.ClientConfig{},
&http_agent.HttpAgent{},
1000,
"")
if err != nil {
return nacosGroupList, err
}
params := map[string]string{}
params["namespaceId"] = "test"
params["groupName"] = ""
params["pageNo"] = "1"
params["pageSize"] = "10000"
result, err := nacosServer.ReqApi(api, params, http.MethodGet, constant.ClientConfig{})
err1 := json.Unmarshal([]byte(result), &nacosServiceList)
if err1 != nil {
logx.Errorf("Conversion failed ,error: %+v, str: %s", err1, result)
return nacosGroupList, err1
}
for _, v := range nacosServiceList.ServiceList {
nacosGroupList.GroupName = append(nacosGroupList.GroupName, v.GroupName)
}
nacosGroupList.GroupName = utils.RemoveDuplication_map(nacosGroupList.GroupName)
return nacosGroupList, err
}

View File

@ -1,19 +0,0 @@
/*
Copyright (c) [2023] [pcm]
[pcm-coordinator] is licensed under Mulan PSL v2.
You can use this software according to the terms and conditions of the Mulan PSL v2.
You may obtain a copy of Mulan PSL v2 at:
http://license.coscl.org.cn/MulanPSL2
THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
EITHER EXPaRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
See the Mulan PSL v2 for more details.
*/
package vo
type NacosGroupList struct {
GroupName []string `json:"groupName"`
}

View File

@ -1,22 +0,0 @@
/*
Copyright (c) [2023] [pcm]
[pcm-coordinator] is licensed under Mulan PSL v2.
You can use this software according to the terms and conditions of the Mulan PSL v2.
You may obtain a copy of Mulan PSL v2 at:
http://license.coscl.org.cn/MulanPSL2
THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
EITHER EXPaRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
See the Mulan PSL v2 for more details.
*/
package vo
import "github.com/aliyun/alibaba-cloud-sdk-go/services/mse"
type NacosServiceList struct {
Count int `json:"count"`
ServiceList []mse.SimpleNacosAnsService `json:"serviceList"`
}

View File

@ -7,6 +7,7 @@ import (
type TaskCloudModel struct {
Id uint `json:"id" gorm:"primarykey;not null;comment:id"`
Name string `json:"name" gorm:"null;comment:名称"`
TaskId uint `json:"taskId" gorm:"not null;comment:task表id"`
AdapterId uint `json:"adapterId" gorm:"not null;comment:适配器id"`
AdapterName string `json:"adapterName" gorm:"not null;comment:适配器名称"`

View File

@ -48,6 +48,7 @@ type (
GpuTotal float64 `db:"gpu_total"`
CardTotal int64 `db:"card_total"` // 算力卡数量
CardTopsTotal float64 `db:"card_tops_total"` // 算力总量tops
AdapterId int64 `db:"adapter_id"`
}
)
@ -86,14 +87,14 @@ func (m *defaultTClusterResourceModel) FindOne(ctx context.Context, clusterId in
}
func (m *defaultTClusterResourceModel) Insert(ctx context.Context, data *TClusterResource) (sql.Result, error) {
query := fmt.Sprintf("insert into %s (%s) values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)", m.table, tClusterResourceRowsExpectAutoSet)
ret, err := m.conn.ExecCtx(ctx, query, data.ClusterId, data.ClusterName, data.ClusterType, data.CpuAvail, data.CpuTotal, data.MemAvail, data.MemTotal, data.DiskAvail, data.DiskTotal, data.GpuAvail, data.GpuTotal, data.CardTotal, data.CardTopsTotal)
query := fmt.Sprintf("insert into %s (%s) values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)", m.table, tClusterResourceRowsExpectAutoSet)
ret, err := m.conn.ExecCtx(ctx, query, data.ClusterId, data.ClusterName, data.ClusterType, data.CpuAvail, data.CpuTotal, data.MemAvail, data.MemTotal, data.DiskAvail, data.DiskTotal, data.GpuAvail, data.GpuTotal, data.CardTotal, data.CardTopsTotal, data.AdapterId)
return ret, err
}
func (m *defaultTClusterResourceModel) Update(ctx context.Context, data *TClusterResource) error {
query := fmt.Sprintf("update %s set %s where `cluster_id` = ?", m.table, tClusterResourceRowsWithPlaceHolder)
_, err := m.conn.ExecCtx(ctx, query, data.ClusterName, data.ClusterType, data.CpuAvail, data.CpuTotal, data.MemAvail, data.MemTotal, data.DiskAvail, data.DiskTotal, data.GpuAvail, data.GpuTotal, data.CardTotal, data.CardTopsTotal, data.ClusterId)
_, err := m.conn.ExecCtx(ctx, query, data.ClusterName, data.ClusterType, data.CpuAvail, data.CpuTotal, data.MemAvail, data.MemTotal, data.DiskAvail, data.DiskTotal, data.GpuAvail, data.GpuTotal, data.CardTotal, data.CardTopsTotal, data.AdapterId, data.ClusterId)
return err
}

View File

@ -20,15 +20,7 @@ import (
)
type Interface interface {
//GetMetric(expr string, time time.Time) Metric
//GetMetricOverTime(expr string, start, end time.Time, step time.Duration) Metric
GetNamedMetrics(metrics []string, time time.Time, opt QueryOption) []Metric
GetNamedMetricsByTime(metrics []string, start, end string, step time.Duration, opt QueryOption) []Metric
//GetMetadata(namespace string) []Metadata
//GetMetricLabelSet(expr string, start, end time.Time) []map[string]string
//
//// meter
//GetNamedMeters(meters []string, time time.Time, opts []QueryOption) []Metric
//GetNamedMetersOverTime(metrics []string, start, end time.Time, step time.Duration, opts []QueryOption) []Metric
GetRawData(expr string, o QueryOption) (model.Value, error)
}

View File

@ -85,8 +85,6 @@ func makeExpr(metric string, opts QueryOptions) string {
return makeContainerMetricExpr(tmpl, opts)
case LevelPVC:
return makePVCMetricExpr(tmpl, opts)
case LevelIngress:
return makeIngressMetricExpr(tmpl, opts)
case LevelComponent:
return tmpl
default:
@ -108,6 +106,9 @@ func makeAdapterMetricExpr(tmpl string, o QueryOptions) string {
if o.AdapterId != 0 {
adapterSelector = fmt.Sprintf(`adapter_id="%d"`, o.AdapterId)
}
if len(o.ClustersName) != 0 {
adapterSelector = fmt.Sprintf(`adapter_id="%d"`, o.AdapterId)
}
return strings.Replace(tmpl, "$1", adapterSelector, -1)
}

View File

@ -29,12 +29,10 @@ const (
LevelNamespace
LevelApplication
LevelController
LevelService
LevelPod
LevelContainer
LevelPVC
LevelComponent
LevelIngress
LevelAdapter
)
@ -46,7 +44,6 @@ var MeteringLevelMap = map[string]int{
"LevelNamespace": LevelNamespace,
"LevelApplication": LevelApplication,
"LevelController": LevelController,
"LevelService": LevelService,
"LevelPod": LevelPod,
"LevelContainer": LevelContainer,
"LevelPVC": LevelPVC,
@ -57,15 +54,15 @@ type QueryOption interface {
Apply(*QueryOptions)
}
type Meteroptions struct {
type MeterOptions struct {
Start time.Time
End time.Time
Step time.Duration
}
type QueryOptions struct {
Level Level
Level Level
ClustersName string
NamespacedResourcesFilter string
QueryType string
ResourceFilter string
@ -81,10 +78,9 @@ type QueryOptions struct {
ContainerName string
AdapterId int64
ServiceName string
Ingress string
Job string
Duration *time.Duration
MeterOptions *Meteroptions
MeterOptions *MeterOptions
}
func NewQueryOptions() *QueryOptions {
@ -92,12 +88,14 @@ func NewQueryOptions() *QueryOptions {
}
type AdapterOption struct {
AdapterId int64
AdapterId int64
ClustersName string
}
func (a AdapterOption) Apply(o *QueryOptions) {
o.Level = LevelAdapter
o.AdapterId = a.AdapterId
o.ClustersName = a.ClustersName
}
type ClusterOption struct {
@ -154,31 +152,6 @@ func (no NamespaceOption) Apply(o *QueryOptions) {
o.Namespace = no.NamespaceName
}
type ApplicationsOption struct {
NamespaceName string
Applications []string
StorageClassName string
}
func (aso ApplicationsOption) Apply(o *QueryOptions) {
// nothing should be done
//nolint:gosimple
return
}
type OpenpitrixsOption struct {
Cluster string
NamespaceName string
Openpitrixs []string
StorageClassName string
}
func (oso OpenpitrixsOption) Apply(o *QueryOptions) {
// nothing should be done
//nolint:gosimple
return
}
// ApplicationsOption & OpenpitrixsOption share the same ApplicationOption struct
type ApplicationOption struct {
NamespaceName string
@ -213,17 +186,6 @@ func (wo WorkloadOption) Apply(o *QueryOptions) {
o.WorkloadKind = wo.WorkloadKind
}
type ServicesOption struct {
NamespaceName string
Services []string
}
func (sso ServicesOption) Apply(o *QueryOptions) {
// nothing should be done
//nolint:gosimple
return
}
type ServiceOption struct {
ResourceFilter string
NamespaceName string
@ -231,20 +193,6 @@ type ServiceOption struct {
PodNames []string
}
func (so ServiceOption) Apply(o *QueryOptions) {
o.Level = LevelService
o.Namespace = so.NamespaceName
o.ServiceName = so.ServiceName
pod_names := strings.Join(so.PodNames, "|")
if len(pod_names) > 0 {
o.ResourceFilter = fmt.Sprintf(`pod=~"%s", namespace="%s"`, pod_names, o.Namespace)
} else {
o.ResourceFilter = fmt.Sprintf(`pod=~"%s", namespace="%s"`, ".*", o.Namespace)
}
}
type PodOption struct {
NamespacedResourcesFilter string
ResourceFilter string
@ -310,25 +258,6 @@ func (po PVCOption) Apply(o *QueryOptions) {
o.Namespace = po.NamespaceName
}
type IngressOption struct {
ResourceFilter string
NamespaceName string
Ingress string
Job string
Pod string
Duration *time.Duration
}
func (no IngressOption) Apply(o *QueryOptions) {
o.Level = LevelIngress
o.ResourceFilter = no.ResourceFilter
o.Namespace = no.NamespaceName
o.Ingress = no.Ingress
o.Job = no.Job
o.PodName = no.Pod
o.Duration = no.Duration
}
type ComponentOption struct{}
func (_ ComponentOption) Apply(o *QueryOptions) {
@ -342,7 +271,7 @@ type MeterOption struct {
}
func (mo MeterOption) Apply(o *QueryOptions) {
o.MeterOptions = &Meteroptions{
o.MeterOptions = &MeterOptions{
Start: mo.Start,
End: mo.End,
Step: mo.Step,

View File

@ -107,6 +107,23 @@ var (
}
)
type ClusterLoadRecord struct {
AdapterId int64 `json:"adapterId,optional"`
ClusterName string `json:"clusterName,optional"`
CpuAvail float64 `json:"cpuAvail,optional"`
CpuTotal float64 `json:"cpuTotal,optional"`
CpuUtilisation float64 `json:"cpuUtilisation,optional"`
MemoryAvail float64 `json:"memoryAvail,optional"`
MemoryUtilisation float64 `json:"memoryUtilisation,optional"`
MemoryTotal float64 `json:"memoryTotal,optional"`
DiskAvail float64 `json:"diskAvail,optional"`
DiskTotal float64 `json:"diskTotal,optional"`
DiskUtilisation float64 `json:"diskUtilisation,optional"`
PodsUtilisation float64 `json:"podsUtilisation,optional"`
PodsCount int64 `json:"podsCount,optional"`
PodsTotal int64 `json:"podsTotal,optional"`
}
func init() {
prometheus.MustRegister(metrics...)
}
@ -302,3 +319,21 @@ func (p Prometheus) GetRawData(expr string, o QueryOption) (model.Value, error)
}
return value, nil
}
func SyncClusterLoad(record ClusterLoadRecord) {
ClusterCpuUtilisationGauge.WithLabelValues(record.ClusterName, strconv.FormatInt(record.AdapterId, 10)).Set(record.CpuUtilisation)
ClusterCpuAvailGauge.WithLabelValues(record.ClusterName, strconv.FormatInt(record.AdapterId, 10)).Set(record.CpuAvail)
ClusterCpuTotalGauge.WithLabelValues(record.ClusterName, strconv.FormatInt(record.AdapterId, 10)).Set(record.CpuTotal)
ClusterMemoryUtilisationGauge.WithLabelValues(record.ClusterName, strconv.FormatInt(record.AdapterId, 10)).Set(record.MemoryUtilisation)
ClusterMemoryAvailGauge.WithLabelValues(record.ClusterName, strconv.FormatInt(record.AdapterId, 10)).Set(record.MemoryAvail)
ClusterMemoryTotalGauge.WithLabelValues(record.ClusterName, strconv.FormatInt(record.AdapterId, 10)).Set(record.MemoryTotal)
ClusterDiskUtilisationGauge.WithLabelValues(record.ClusterName, strconv.FormatInt(record.AdapterId, 10)).Set(record.DiskUtilisation)
ClusterDiskAvailGauge.WithLabelValues(record.ClusterName, strconv.FormatInt(record.AdapterId, 10)).Set(record.DiskAvail)
ClusterDiskTotalGauge.WithLabelValues(record.ClusterName, strconv.FormatInt(record.AdapterId, 10)).Set(record.DiskTotal)
ClusterPodUtilisationGauge.WithLabelValues(record.ClusterName, strconv.FormatInt(record.AdapterId, 10)).Set(record.PodsUtilisation)
ClusterPodCountGauge.WithLabelValues(record.ClusterName, strconv.FormatInt(record.AdapterId, 10)).Set(float64(record.PodsCount))
ClusterPodTotalGauge.WithLabelValues(record.ClusterName, strconv.FormatInt(record.AdapterId, 10)).Set(float64(record.PodsTotal))
}

View File

@ -17,7 +17,6 @@ package svc
import (
"github.com/go-redis/redis/v8"
_ "github.com/go-sql-driver/mysql"
"github.com/robfig/cron/v3"
"github.com/zeromicro/go-zero/core/logx"
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/utils"
"gitlink.org.cn/JointCloud/pcm-coordinator/rpc/internal/config"
@ -31,7 +30,6 @@ import (
type ServiceContext struct {
Config config.Config
DbEngin *gorm.DB
Cron *cron.Cron
RedisClient *redis.Client
}
@ -59,7 +57,6 @@ func NewServiceContext(c config.Config) *ServiceContext {
panic("InitSnowflake err")
}
return &ServiceContext{
Cron: cron.New(cron.WithSeconds()),
Config: c,
DbEngin: dbEngin,
RedisClient: redis.NewClient(&redis.Options{