added cron updateClusterResources
Former-commit-id: f38bd68df8012cb81ba9866a0c8b0310511c2427
This commit is contained in:
parent
04f0eae608
commit
b61e434f75
|
@ -327,3 +327,79 @@ func InitAiClusterMap(conf *config.Config, clusters []types.ClusterInfo) (map[st
|
||||||
|
|
||||||
return executorMap, collectorMap
|
return executorMap, collectorMap
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func UpdateClusterResource(svc *svc.ServiceContext) {
|
||||||
|
list, err := svc.Scheduler.AiStorages.GetAdaptersByType("1")
|
||||||
|
if err != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
var wg sync.WaitGroup
|
||||||
|
for _, adapter := range list {
|
||||||
|
clusters, err := svc.Scheduler.AiStorages.GetClustersByAdapterId(adapter.Id)
|
||||||
|
if err != nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
for _, cluster := range clusters.List {
|
||||||
|
c := cluster
|
||||||
|
clusterResource, err := svc.Scheduler.AiStorages.GetClusterResourcesById(c.Id)
|
||||||
|
if err != nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
wg.Add(1)
|
||||||
|
go func() {
|
||||||
|
_, ok := svc.Scheduler.AiService.AiCollectorAdapterMap[adapter.Id][c.Id]
|
||||||
|
if !ok {
|
||||||
|
wg.Done()
|
||||||
|
return
|
||||||
|
}
|
||||||
|
h := http.Request{}
|
||||||
|
stat, err := svc.Scheduler.AiService.AiCollectorAdapterMap[adapter.Id][c.Id].GetResourceStats(h.Context())
|
||||||
|
if err != nil {
|
||||||
|
wg.Done()
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if stat == nil {
|
||||||
|
wg.Done()
|
||||||
|
return
|
||||||
|
}
|
||||||
|
clusterType, err := strconv.ParseInt(adapter.Type, 10, 64)
|
||||||
|
if err != nil {
|
||||||
|
wg.Done()
|
||||||
|
return
|
||||||
|
}
|
||||||
|
var cardTotal int64
|
||||||
|
var topsTotal float64
|
||||||
|
for _, card := range stat.CardsAvail {
|
||||||
|
cardTotal += int64(card.CardNum)
|
||||||
|
topsTotal += card.TOpsAtFp16 * float64(card.CardNum)
|
||||||
|
}
|
||||||
|
|
||||||
|
if (models.TClusterResource{} == *clusterResource) {
|
||||||
|
err = svc.Scheduler.AiStorages.SaveClusterResources(c.Id, c.Name, clusterType, float64(stat.CpuCoreAvail), float64(stat.CpuCoreTotal),
|
||||||
|
stat.MemAvail, stat.MemTotal, stat.DiskAvail, stat.DiskTotal, float64(stat.GpuAvail), float64(stat.GpuTotal), cardTotal, topsTotal)
|
||||||
|
if err != nil {
|
||||||
|
wg.Done()
|
||||||
|
return
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
clusterResource.CardTotal = cardTotal
|
||||||
|
clusterResource.CardTopsTotal = topsTotal
|
||||||
|
clusterResource.CpuAvail = float64(stat.CpuCoreAvail)
|
||||||
|
clusterResource.CpuTotal = float64(stat.CpuCoreTotal)
|
||||||
|
clusterResource.MemAvail = stat.MemAvail
|
||||||
|
clusterResource.MemTotal = stat.MemTotal
|
||||||
|
clusterResource.DiskAvail = stat.DiskAvail
|
||||||
|
clusterResource.DiskTotal = stat.DiskTotal
|
||||||
|
|
||||||
|
err := svc.Scheduler.AiStorages.UpdateClusterResources(clusterResource)
|
||||||
|
if err != nil {
|
||||||
|
wg.Done()
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
wg.Done()
|
||||||
|
}()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
wg.Wait()
|
||||||
|
}
|
|
@ -44,4 +44,7 @@ func AddCronGroup(svc *svc.ServiceContext) {
|
||||||
UpdateAiAdapterMaps(svc)
|
UpdateAiAdapterMaps(svc)
|
||||||
})
|
})
|
||||||
|
|
||||||
|
svc.Cron.AddFunc("30 21 * * *", func() {
|
||||||
|
UpdateClusterResource(svc)
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|
|
@ -137,6 +137,13 @@ func (l *GetCenterOverviewLogic) updateClusterResource(mu *sync.RWMutex, ch chan
|
||||||
} else {
|
} else {
|
||||||
clusterResource.CardTotal = cardTotal
|
clusterResource.CardTotal = cardTotal
|
||||||
clusterResource.CardTopsTotal = topsTotal
|
clusterResource.CardTopsTotal = topsTotal
|
||||||
|
clusterResource.CpuAvail = float64(stat.CpuCoreAvail)
|
||||||
|
clusterResource.CpuTotal = float64(stat.CpuCoreTotal)
|
||||||
|
clusterResource.MemAvail = stat.MemAvail
|
||||||
|
clusterResource.MemTotal = stat.MemTotal
|
||||||
|
clusterResource.DiskAvail = stat.DiskAvail
|
||||||
|
clusterResource.DiskTotal = stat.DiskTotal
|
||||||
|
|
||||||
err := l.svcCtx.Scheduler.AiStorages.UpdateClusterResources(clusterResource)
|
err := l.svcCtx.Scheduler.AiStorages.UpdateClusterResources(clusterResource)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
mu.Unlock()
|
mu.Unlock()
|
||||||
|
|
|
@ -2,12 +2,11 @@ package core
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
|
"github.com/zeromicro/go-zero/core/logx"
|
||||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
|
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
|
||||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types"
|
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types"
|
||||||
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/tracker"
|
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/tracker"
|
||||||
"strconv"
|
tool "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/utils"
|
||||||
|
|
||||||
"github.com/zeromicro/go-zero/core/logx"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
type SyncClusterLoadLogic struct {
|
type SyncClusterLoadLogic struct {
|
||||||
|
@ -25,24 +24,10 @@ func NewSyncClusterLoadLogic(ctx context.Context, svcCtx *svc.ServiceContext) *S
|
||||||
}
|
}
|
||||||
|
|
||||||
func (l *SyncClusterLoadLogic) SyncClusterLoad(req *types.SyncClusterLoadReq) error {
|
func (l *SyncClusterLoadLogic) SyncClusterLoad(req *types.SyncClusterLoadReq) error {
|
||||||
if len(req.ClusterLoadRecords) != 0 {
|
if nil != req.ClusterLoadRecords {
|
||||||
for _, record := range req.ClusterLoadRecords {
|
var param tracker.ClusterLoadRecord
|
||||||
tracker.ClusterCpuUtilisationGauge.WithLabelValues(record.ClusterName, strconv.FormatInt(record.AdapterId, 10)).Set(record.CpuUtilisation)
|
tool.Convert(req, ¶m)
|
||||||
tracker.ClusterCpuAvailGauge.WithLabelValues(record.ClusterName, strconv.FormatInt(record.AdapterId, 10)).Set(record.CpuAvail)
|
tracker.SyncClusterLoad(param)
|
||||||
tracker.ClusterCpuTotalGauge.WithLabelValues(record.ClusterName, strconv.FormatInt(record.AdapterId, 10)).Set(record.CpuTotal)
|
|
||||||
|
|
||||||
tracker.ClusterMemoryUtilisationGauge.WithLabelValues(record.ClusterName, strconv.FormatInt(record.AdapterId, 10)).Set(record.MemoryUtilisation)
|
|
||||||
tracker.ClusterMemoryAvailGauge.WithLabelValues(record.ClusterName, strconv.FormatInt(record.AdapterId, 10)).Set(record.MemoryAvail)
|
|
||||||
tracker.ClusterMemoryTotalGauge.WithLabelValues(record.ClusterName, strconv.FormatInt(record.AdapterId, 10)).Set(record.MemoryTotal)
|
|
||||||
|
|
||||||
tracker.ClusterDiskUtilisationGauge.WithLabelValues(record.ClusterName, strconv.FormatInt(record.AdapterId, 10)).Set(record.DiskUtilisation)
|
|
||||||
tracker.ClusterDiskAvailGauge.WithLabelValues(record.ClusterName, strconv.FormatInt(record.AdapterId, 10)).Set(record.DiskAvail)
|
|
||||||
tracker.ClusterDiskTotalGauge.WithLabelValues(record.ClusterName, strconv.FormatInt(record.AdapterId, 10)).Set(record.DiskTotal)
|
|
||||||
|
|
||||||
tracker.ClusterPodUtilisationGauge.WithLabelValues(record.ClusterName, strconv.FormatInt(record.AdapterId, 10)).Set(record.PodsUtilisation)
|
|
||||||
tracker.ClusterPodCountGauge.WithLabelValues(record.ClusterName, strconv.FormatInt(record.AdapterId, 10)).Set(float64(record.PodsCount))
|
|
||||||
tracker.ClusterPodTotalGauge.WithLabelValues(record.ClusterName, strconv.FormatInt(record.AdapterId, 10)).Set(float64(record.PodsTotal))
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
|
@ -6,6 +6,7 @@ import (
|
||||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types"
|
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types"
|
||||||
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/constants"
|
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/constants"
|
||||||
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/models"
|
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/models"
|
||||||
|
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/tracker"
|
||||||
"gorm.io/gorm"
|
"gorm.io/gorm"
|
||||||
"strconv"
|
"strconv"
|
||||||
"time"
|
"time"
|
||||||
|
@ -211,6 +212,17 @@ func (s *AiStorage) SaveClusterResources(clusterId string, clusterName string, c
|
||||||
if tx.Error != nil {
|
if tx.Error != nil {
|
||||||
return tx.Error
|
return tx.Error
|
||||||
}
|
}
|
||||||
|
// prometheus
|
||||||
|
param := tracker.ClusterLoadRecord{
|
||||||
|
ClusterName: clusterName,
|
||||||
|
CpuAvail: cpuAvail,
|
||||||
|
CpuTotal: cpuTotal,
|
||||||
|
MemoryAvail: memAvail,
|
||||||
|
MemoryTotal: memTotal,
|
||||||
|
DiskAvail: diskAvail,
|
||||||
|
DiskTotal: diskTotal,
|
||||||
|
}
|
||||||
|
tracker.SyncClusterLoad(param)
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -219,6 +231,17 @@ func (s *AiStorage) UpdateClusterResources(clusterResource *models.TClusterResou
|
||||||
if tx.Error != nil {
|
if tx.Error != nil {
|
||||||
return tx.Error
|
return tx.Error
|
||||||
}
|
}
|
||||||
|
// prometheus
|
||||||
|
param := tracker.ClusterLoadRecord{
|
||||||
|
ClusterName: clusterResource.ClusterName,
|
||||||
|
CpuAvail: clusterResource.CpuAvail,
|
||||||
|
CpuTotal: clusterResource.CpuTotal,
|
||||||
|
MemoryAvail: clusterResource.MemAvail,
|
||||||
|
MemoryTotal: clusterResource.MemTotal,
|
||||||
|
DiskAvail: clusterResource.DiskAvail,
|
||||||
|
DiskTotal: clusterResource.DiskTotal,
|
||||||
|
}
|
||||||
|
tracker.SyncClusterLoad(param)
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -107,6 +107,23 @@ var (
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
type ClusterLoadRecord struct {
|
||||||
|
AdapterId int64 `json:"adapterId,optional"`
|
||||||
|
ClusterName string `json:"clusterName,optional"`
|
||||||
|
CpuAvail float64 `json:"cpuAvail,optional"`
|
||||||
|
CpuTotal float64 `json:"cpuTotal,optional"`
|
||||||
|
CpuUtilisation float64 `json:"cpuUtilisation,optional"`
|
||||||
|
MemoryAvail float64 `json:"memoryAvail,optional"`
|
||||||
|
MemoryUtilisation float64 `json:"memoryUtilisation,optional"`
|
||||||
|
MemoryTotal float64 `json:"memoryTotal,optional"`
|
||||||
|
DiskAvail float64 `json:"diskAvail,optional"`
|
||||||
|
DiskTotal float64 `json:"diskTotal,optional"`
|
||||||
|
DiskUtilisation float64 `json:"diskUtilisation,optional"`
|
||||||
|
PodsUtilisation float64 `json:"podsUtilisation,optional"`
|
||||||
|
PodsCount int64 `json:"podsCount,optional"`
|
||||||
|
PodsTotal int64 `json:"podsTotal,optional"`
|
||||||
|
}
|
||||||
|
|
||||||
func init() {
|
func init() {
|
||||||
prometheus.MustRegister(metrics...)
|
prometheus.MustRegister(metrics...)
|
||||||
}
|
}
|
||||||
|
@ -302,3 +319,21 @@ func (p Prometheus) GetRawData(expr string, o QueryOption) (model.Value, error)
|
||||||
}
|
}
|
||||||
return value, nil
|
return value, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func SyncClusterLoad(record ClusterLoadRecord) {
|
||||||
|
ClusterCpuUtilisationGauge.WithLabelValues(record.ClusterName, strconv.FormatInt(record.AdapterId, 10)).Set(record.CpuUtilisation)
|
||||||
|
ClusterCpuAvailGauge.WithLabelValues(record.ClusterName, strconv.FormatInt(record.AdapterId, 10)).Set(record.CpuAvail)
|
||||||
|
ClusterCpuTotalGauge.WithLabelValues(record.ClusterName, strconv.FormatInt(record.AdapterId, 10)).Set(record.CpuTotal)
|
||||||
|
|
||||||
|
ClusterMemoryUtilisationGauge.WithLabelValues(record.ClusterName, strconv.FormatInt(record.AdapterId, 10)).Set(record.MemoryUtilisation)
|
||||||
|
ClusterMemoryAvailGauge.WithLabelValues(record.ClusterName, strconv.FormatInt(record.AdapterId, 10)).Set(record.MemoryAvail)
|
||||||
|
ClusterMemoryTotalGauge.WithLabelValues(record.ClusterName, strconv.FormatInt(record.AdapterId, 10)).Set(record.MemoryTotal)
|
||||||
|
|
||||||
|
ClusterDiskUtilisationGauge.WithLabelValues(record.ClusterName, strconv.FormatInt(record.AdapterId, 10)).Set(record.DiskUtilisation)
|
||||||
|
ClusterDiskAvailGauge.WithLabelValues(record.ClusterName, strconv.FormatInt(record.AdapterId, 10)).Set(record.DiskAvail)
|
||||||
|
ClusterDiskTotalGauge.WithLabelValues(record.ClusterName, strconv.FormatInt(record.AdapterId, 10)).Set(record.DiskTotal)
|
||||||
|
|
||||||
|
ClusterPodUtilisationGauge.WithLabelValues(record.ClusterName, strconv.FormatInt(record.AdapterId, 10)).Set(record.PodsUtilisation)
|
||||||
|
ClusterPodCountGauge.WithLabelValues(record.ClusterName, strconv.FormatInt(record.AdapterId, 10)).Set(float64(record.PodsCount))
|
||||||
|
ClusterPodTotalGauge.WithLabelValues(record.ClusterName, strconv.FormatInt(record.AdapterId, 10)).Set(float64(record.PodsTotal))
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in New Issue