monitoring

Former-commit-id: bc8021349d340169ad469dde4587591143700370
This commit is contained in:
zhangwei 2024-03-26 17:01:29 +08:00
parent 2d59b6ef3a
commit 2b27073ac4
11 changed files with 5586 additions and 5498 deletions

View File

@ -31,6 +31,23 @@ type (
centerType string `json:"centerType"`
}
)
type remoteResp {
code int `json:"code"`
message string `json:"message"`
data interface{} `json:"data"`
}
type (
clustersLoadReq {
adapterId int64 `form:"adapterId"`
clusterName string `form:"clusterName"`
}
clustersLoadResp {
data interface{} `json:"data"`
}
)
type (
syncClusterLoadReq {
clusterLoadRecords []ClusterLoadRecord `json:"clusterLoadRecords"`

View File

@ -11,18 +11,18 @@ import (
"schedule/pcm-schedule.api"
)
info (
title: "pcm api service"
desc: "type desc here"
author: "type author here"
email: "type email here"
info(
title: "pcm api service"
desc: "type desc here"
author: "type author here"
email: "type email here"
version: "type version here"
)
//core端接口
@server (
@server(
prefix: pcm/v1
group: core
group: core
)
service pcm {
@doc "查询P端服务列表"
@ -112,12 +112,16 @@ service pcm {
@doc "alert rules"
@handler alertRulesHandler
get /core/alert/rules
@doc "cluster resource load"
@handler clustersLoadHandler
get /core/cluster/load (clustersLoadReq) returns (clustersLoadResp)
}
//hpc二级接口
@server (
@server(
prefix: pcm/v1
group: hpc
group: hpc
)
service pcm {
@doc "提交超算任务"
@ -138,9 +142,9 @@ service pcm {
}
//cloud二级接口
@server (
@server(
prefix: pcm/v1
group: cloud
group: cloud
)
service pcm {
@doc "云算任务列表"
@ -177,9 +181,9 @@ service pcm {
}
//智算二级接口
@server (
@server(
prefix: pcm/v1
group: ai
group: ai
)
service pcm {
@doc "查询数据集列表"
@ -300,13 +304,13 @@ service pcm {
@doc "创建虚拟化任务"
@handler createVisualizationJobHandler
post /ai/CreateVisualizationJob (CreateVisualizationJobReq) returns (CreateVisualizationJobResp)
/******************Visualization Job Method start*************************/
/******************Visualization Job Method start*************************/
}
//screen接口
@server (
@server(
prefix: pcm/v1
group: storage
group: storage
)
service pcm {
@doc "存储概览"
@ -323,9 +327,9 @@ service pcm {
}
//镜像接口
@server (
@server(
prefix: pcm/v1
group: image
group: image
)
service pcm {
@doc "镜像上传"
@ -350,9 +354,9 @@ service pcm {
}
//openstack 接口
@server (
@server(
prefix: pcm/v1
group: vm
group: vm
)
service pcm {
@doc "openstack计算中心概览"
@ -741,9 +745,9 @@ service pcm {
}
//存算联动 接口
@server (
@server(
prefix: pcm/v1
group: storelink
group: storelink
)
service pcm {
@handler UploadLinkImageHandler
@ -772,9 +776,9 @@ service pcm {
}
// 接口
@server (
@server(
prefix: pcm/v1
group: apps
group: apps
)
service pcm {
@doc "应用列表"
@ -815,9 +819,9 @@ service pcm {
}
// 接口
@server (
@server(
prefix: pcm/v1
group: adapters
group: adapters
)
service pcm {
@handler AdaptersListHandler
@ -857,9 +861,9 @@ service pcm {
get /adapter/clusterSum (clusterSumReq) returns (clusterSumReqResp)
}
@server (
@server(
prefix: pcm/v1
group: schedule
group: schedule
)
service pcm {
@handler ScheduleGetAiResourceTypesHandler
@ -878,9 +882,9 @@ service pcm {
post /schedule/submit (ScheduleReq) returns (ScheduleResp)
}
@server (
@server(
prefix: pcm/v1
group: dictionary
group: dictionary
)
service pcm {
@handler GetDict
@ -915,5 +919,4 @@ service pcm {
@handler ListDictItemByCode
get /dictItem/code/:dictCode (DictCodeReq) returns (PageResult)
}
}

View File

@ -11,11 +11,10 @@ Redis:
Host: 10.206.0.12:6379
Pass: redisPW123
Cache:
- Host: 10.206.0.12:6379
Pass: redisPW123
Monitoring:
PromUrl: http://47.92.39.128:30877
AlertUrl: 47.92.39.128:32243
PromUrl: http://47.92.39.128:30877
# k8s rpc
K8sNativeConf:

View File

@ -0,0 +1,25 @@
package core
import (
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/repository/result"
"net/http"
"github.com/zeromicro/go-zero/rest/httpx"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/logic/core"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types"
)
func ClustersLoadHandler(svcCtx *svc.ServiceContext) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
var req types.ClustersLoadReq
if err := httpx.Parse(r, &req); err != nil {
httpx.ErrorCtx(r.Context(), w, err)
return
}
l := core.NewClustersLoadLogic(r.Context(), svcCtx)
resp, err := l.ClustersLoad(&req)
result.HttpResult(r, w, resp, err)
}
}

View File

@ -1,32 +0,0 @@
/*
Copyright (c) [2023] [pcm]
[pcm-coordinator] is licensed under Mulan PSL v2.
You can use this software according to the terms and conditions of the Mulan PSL v2.
You may obtain a copy of Mulan PSL v2 at:
http://license.coscl.org.cn/MulanPSL2
THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
EITHER EXPaRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
See the Mulan PSL v2 for more details.
*/
package core
import (
"net/http"
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/repository/result"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/logic/core"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
)
func ListDomainResourceHandler(svcCtx *svc.ServiceContext) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
l := core.NewListDomainResourceLogic(r.Context(), svcCtx)
resp, err := l.ListDomainResource()
result.HttpResult(r, w, resp, err)
}
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,50 @@
package core
import (
"context"
"github.com/zeromicro/go-zero/core/logx"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types"
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/tracker"
"time"
)
type ClustersLoadLogic struct {
logx.Logger
ctx context.Context
svcCtx *svc.ServiceContext
}
func NewClustersLoadLogic(ctx context.Context, svcCtx *svc.ServiceContext) *ClustersLoadLogic {
return &ClustersLoadLogic{
Logger: logx.WithContext(ctx),
ctx: ctx,
svcCtx: svcCtx,
}
}
func (l *ClustersLoadLogic) ClustersLoad(req *types.ClustersLoadReq) (resp *types.ClustersLoadResp, err error) {
//resp = &types.ClustersLoadResp{}
//var server string
//metrics := []string{"cluster_cpu_total", "cluster_cpu_utilisation", "cluster_memory_avail", "cluster_memory_total", "cluster_disk_avail", "cluster_disk_total"}
//var remoteResp types.RemoteResp
//l.svcCtx.DbEngin.Raw("select server from t_adapter where id = ?", req.AdapterId).Scan(&server)
//adapterResp, err := l.svcCtx.HttpClient.R().
// SetQueryParamsFromValues(map[string][]string{
// "clusterName": []string{req.ClusterName},
// "metrics": metrics,
// }).
// SetResult(&remoteResp).
// ForceContentType("application/json").
// Get(server + "/api/v1/monitoring/cluster/load")
////Get("http://47.92.39.128:31031/pcm/v1/adapter/cluster/list")
//if err != nil || adapterResp.IsError() {
// return nil, err
//}
//resp.Data = remoteResp.Data
resp = &types.ClustersLoadResp{}
metrics := []string{"cluster_cpu_utilisation", "cluster_cpu_avail", "cluster_cpu_total", "cluster_memory_total", "cluster_memory_avail", "cluster_memory_utilisation", "cluster_memory_utilisation", "cluster_memory_avail", "cluster_memory_total"}
result := l.svcCtx.PromClient.GetNamedMetrics(metrics, time.Now(), tracker.ClusterOption{})
println(len(result))
return resp, nil
}

View File

@ -21,6 +21,7 @@ import (
"github.com/aws/aws-sdk-go/service/s3/s3manager"
"github.com/docker/docker/client"
"github.com/go-redis/redis/v8"
"github.com/go-resty/resty/v2"
alert "github.com/prometheus/alertmanager/api/v2/client"
"github.com/robfig/cron/v3"
"github.com/zeromicro/go-zero/core/logx"
@ -64,6 +65,7 @@ type ServiceContext struct {
ParticipantRpc participantservice.ParticipantService
PromClient tracker.Prometheus
AlertClient *alert.AlertmanagerAPI
HttpClient *resty.Client
}
func NewServiceContext(c config.Config) *ServiceContext {
@ -76,11 +78,12 @@ func NewServiceContext(c config.Config) *ServiceContext {
S3ForcePathStyle: aws.Bool(true), //使用路径样式而非虚拟主机样式,区别请参考:https://docs.aws.amazon.com/AmazonS3/latest/dev/VirtualHosting.html
})
promClient, err := tracker.NewPrometheus(c.Monitoring.PromUrl)
if err != nil {
logx.Errorf("InitPrometheus err: %v", err)
panic("InitSnowflake err")
}
httpClient := resty.New()
alertClient := tracker.NewAlertClient(c.Monitoring.AlertUrl)
if err != nil {
logx.Errorf("InitPrometheus err: %v", err)
@ -144,5 +147,6 @@ func NewServiceContext(c config.Config) *ServiceContext {
Uploader: uploader,
PromClient: promClient,
AlertClient: alertClient,
HttpClient: httpClient,
}
}

File diff suppressed because it is too large Load Diff

View File

@ -101,6 +101,7 @@ func (a AdapterOption) Apply(o *QueryOptions) {
}
type ClusterOption struct {
AdapterId int
ClusterName string
}

View File

@ -138,6 +138,7 @@ func (p Prometheus) GetNamedMetricsByTime(metrics []string, start, end string, s
End: endTimestamp,
Step: step,
}
p.client.Rules(context.Background())
value, _, err := p.client.QueryRange(context.Background(), makeExpr(metric, *opts), timeRange)
if err != nil {
parsedResp.Error = err.Error()