monitoring

Former-commit-id: bc8021349d340169ad469dde4587591143700370
This commit is contained in:
zhangwei 2024-03-26 17:01:29 +08:00
parent 2d59b6ef3a
commit 2b27073ac4
11 changed files with 5586 additions and 5498 deletions

View File

@ -31,6 +31,23 @@ type (
centerType string `json:"centerType"` centerType string `json:"centerType"`
} }
) )
type remoteResp {
code int `json:"code"`
message string `json:"message"`
data interface{} `json:"data"`
}
type (
clustersLoadReq {
adapterId int64 `form:"adapterId"`
clusterName string `form:"clusterName"`
}
clustersLoadResp {
data interface{} `json:"data"`
}
)
type ( type (
syncClusterLoadReq { syncClusterLoadReq {
clusterLoadRecords []ClusterLoadRecord `json:"clusterLoadRecords"` clusterLoadRecords []ClusterLoadRecord `json:"clusterLoadRecords"`

View File

@ -11,18 +11,18 @@ import (
"schedule/pcm-schedule.api" "schedule/pcm-schedule.api"
) )
info ( info(
title: "pcm api service" title: "pcm api service"
desc: "type desc here" desc: "type desc here"
author: "type author here" author: "type author here"
email: "type email here" email: "type email here"
version: "type version here" version: "type version here"
) )
//core端接口 //core端接口
@server ( @server(
prefix: pcm/v1 prefix: pcm/v1
group: core group: core
) )
service pcm { service pcm {
@doc "查询P端服务列表" @doc "查询P端服务列表"
@ -112,12 +112,16 @@ service pcm {
@doc "alert rules" @doc "alert rules"
@handler alertRulesHandler @handler alertRulesHandler
get /core/alert/rules get /core/alert/rules
@doc "cluster resource load"
@handler clustersLoadHandler
get /core/cluster/load (clustersLoadReq) returns (clustersLoadResp)
} }
//hpc二级接口 //hpc二级接口
@server ( @server(
prefix: pcm/v1 prefix: pcm/v1
group: hpc group: hpc
) )
service pcm { service pcm {
@doc "提交超算任务" @doc "提交超算任务"
@ -138,9 +142,9 @@ service pcm {
} }
//cloud二级接口 //cloud二级接口
@server ( @server(
prefix: pcm/v1 prefix: pcm/v1
group: cloud group: cloud
) )
service pcm { service pcm {
@doc "云算任务列表" @doc "云算任务列表"
@ -177,9 +181,9 @@ service pcm {
} }
//智算二级接口 //智算二级接口
@server ( @server(
prefix: pcm/v1 prefix: pcm/v1
group: ai group: ai
) )
service pcm { service pcm {
@doc "查询数据集列表" @doc "查询数据集列表"
@ -300,13 +304,13 @@ service pcm {
@doc "创建虚拟化任务" @doc "创建虚拟化任务"
@handler createVisualizationJobHandler @handler createVisualizationJobHandler
post /ai/CreateVisualizationJob (CreateVisualizationJobReq) returns (CreateVisualizationJobResp) post /ai/CreateVisualizationJob (CreateVisualizationJobReq) returns (CreateVisualizationJobResp)
/******************Visualization Job Method start*************************/ /******************Visualization Job Method start*************************/
} }
//screen接口 //screen接口
@server ( @server(
prefix: pcm/v1 prefix: pcm/v1
group: storage group: storage
) )
service pcm { service pcm {
@doc "存储概览" @doc "存储概览"
@ -323,9 +327,9 @@ service pcm {
} }
//镜像接口 //镜像接口
@server ( @server(
prefix: pcm/v1 prefix: pcm/v1
group: image group: image
) )
service pcm { service pcm {
@doc "镜像上传" @doc "镜像上传"
@ -350,9 +354,9 @@ service pcm {
} }
//openstack 接口 //openstack 接口
@server ( @server(
prefix: pcm/v1 prefix: pcm/v1
group: vm group: vm
) )
service pcm { service pcm {
@doc "openstack计算中心概览" @doc "openstack计算中心概览"
@ -741,9 +745,9 @@ service pcm {
} }
//存算联动 接口 //存算联动 接口
@server ( @server(
prefix: pcm/v1 prefix: pcm/v1
group: storelink group: storelink
) )
service pcm { service pcm {
@handler UploadLinkImageHandler @handler UploadLinkImageHandler
@ -772,9 +776,9 @@ service pcm {
} }
// 接口 // 接口
@server ( @server(
prefix: pcm/v1 prefix: pcm/v1
group: apps group: apps
) )
service pcm { service pcm {
@doc "应用列表" @doc "应用列表"
@ -815,9 +819,9 @@ service pcm {
} }
// 接口 // 接口
@server ( @server(
prefix: pcm/v1 prefix: pcm/v1
group: adapters group: adapters
) )
service pcm { service pcm {
@handler AdaptersListHandler @handler AdaptersListHandler
@ -857,9 +861,9 @@ service pcm {
get /adapter/clusterSum (clusterSumReq) returns (clusterSumReqResp) get /adapter/clusterSum (clusterSumReq) returns (clusterSumReqResp)
} }
@server ( @server(
prefix: pcm/v1 prefix: pcm/v1
group: schedule group: schedule
) )
service pcm { service pcm {
@handler ScheduleGetAiResourceTypesHandler @handler ScheduleGetAiResourceTypesHandler
@ -878,9 +882,9 @@ service pcm {
post /schedule/submit (ScheduleReq) returns (ScheduleResp) post /schedule/submit (ScheduleReq) returns (ScheduleResp)
} }
@server ( @server(
prefix: pcm/v1 prefix: pcm/v1
group: dictionary group: dictionary
) )
service pcm { service pcm {
@handler GetDict @handler GetDict
@ -915,5 +919,4 @@ service pcm {
@handler ListDictItemByCode @handler ListDictItemByCode
get /dictItem/code/:dictCode (DictCodeReq) returns (PageResult) get /dictItem/code/:dictCode (DictCodeReq) returns (PageResult)
} }

View File

@ -11,11 +11,10 @@ Redis:
Host: 10.206.0.12:6379 Host: 10.206.0.12:6379
Pass: redisPW123 Pass: redisPW123
Cache: Monitoring:
- Host: 10.206.0.12:6379 PromUrl: http://47.92.39.128:30877
Pass: redisPW123 AlertUrl: 47.92.39.128:32243
PromUrl: http://47.92.39.128:30877
# k8s rpc # k8s rpc
K8sNativeConf: K8sNativeConf:

View File

@ -0,0 +1,25 @@
package core
import (
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/repository/result"
"net/http"
"github.com/zeromicro/go-zero/rest/httpx"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/logic/core"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types"
)
func ClustersLoadHandler(svcCtx *svc.ServiceContext) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
var req types.ClustersLoadReq
if err := httpx.Parse(r, &req); err != nil {
httpx.ErrorCtx(r.Context(), w, err)
return
}
l := core.NewClustersLoadLogic(r.Context(), svcCtx)
resp, err := l.ClustersLoad(&req)
result.HttpResult(r, w, resp, err)
}
}

View File

@ -1,32 +0,0 @@
/*
Copyright (c) [2023] [pcm]
[pcm-coordinator] is licensed under Mulan PSL v2.
You can use this software according to the terms and conditions of the Mulan PSL v2.
You may obtain a copy of Mulan PSL v2 at:
http://license.coscl.org.cn/MulanPSL2
THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
EITHER EXPaRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
See the Mulan PSL v2 for more details.
*/
package core
import (
"net/http"
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/repository/result"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/logic/core"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
)
func ListDomainResourceHandler(svcCtx *svc.ServiceContext) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
l := core.NewListDomainResourceLogic(r.Context(), svcCtx)
resp, err := l.ListDomainResource()
result.HttpResult(r, w, resp, err)
}
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,50 @@
package core
import (
"context"
"github.com/zeromicro/go-zero/core/logx"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types"
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/tracker"
"time"
)
type ClustersLoadLogic struct {
logx.Logger
ctx context.Context
svcCtx *svc.ServiceContext
}
func NewClustersLoadLogic(ctx context.Context, svcCtx *svc.ServiceContext) *ClustersLoadLogic {
return &ClustersLoadLogic{
Logger: logx.WithContext(ctx),
ctx: ctx,
svcCtx: svcCtx,
}
}
func (l *ClustersLoadLogic) ClustersLoad(req *types.ClustersLoadReq) (resp *types.ClustersLoadResp, err error) {
//resp = &types.ClustersLoadResp{}
//var server string
//metrics := []string{"cluster_cpu_total", "cluster_cpu_utilisation", "cluster_memory_avail", "cluster_memory_total", "cluster_disk_avail", "cluster_disk_total"}
//var remoteResp types.RemoteResp
//l.svcCtx.DbEngin.Raw("select server from t_adapter where id = ?", req.AdapterId).Scan(&server)
//adapterResp, err := l.svcCtx.HttpClient.R().
// SetQueryParamsFromValues(map[string][]string{
// "clusterName": []string{req.ClusterName},
// "metrics": metrics,
// }).
// SetResult(&remoteResp).
// ForceContentType("application/json").
// Get(server + "/api/v1/monitoring/cluster/load")
////Get("http://47.92.39.128:31031/pcm/v1/adapter/cluster/list")
//if err != nil || adapterResp.IsError() {
// return nil, err
//}
//resp.Data = remoteResp.Data
resp = &types.ClustersLoadResp{}
metrics := []string{"cluster_cpu_utilisation", "cluster_cpu_avail", "cluster_cpu_total", "cluster_memory_total", "cluster_memory_avail", "cluster_memory_utilisation", "cluster_memory_utilisation", "cluster_memory_avail", "cluster_memory_total"}
result := l.svcCtx.PromClient.GetNamedMetrics(metrics, time.Now(), tracker.ClusterOption{})
println(len(result))
return resp, nil
}

View File

@ -21,6 +21,7 @@ import (
"github.com/aws/aws-sdk-go/service/s3/s3manager" "github.com/aws/aws-sdk-go/service/s3/s3manager"
"github.com/docker/docker/client" "github.com/docker/docker/client"
"github.com/go-redis/redis/v8" "github.com/go-redis/redis/v8"
"github.com/go-resty/resty/v2"
alert "github.com/prometheus/alertmanager/api/v2/client" alert "github.com/prometheus/alertmanager/api/v2/client"
"github.com/robfig/cron/v3" "github.com/robfig/cron/v3"
"github.com/zeromicro/go-zero/core/logx" "github.com/zeromicro/go-zero/core/logx"
@ -64,6 +65,7 @@ type ServiceContext struct {
ParticipantRpc participantservice.ParticipantService ParticipantRpc participantservice.ParticipantService
PromClient tracker.Prometheus PromClient tracker.Prometheus
AlertClient *alert.AlertmanagerAPI AlertClient *alert.AlertmanagerAPI
HttpClient *resty.Client
} }
func NewServiceContext(c config.Config) *ServiceContext { func NewServiceContext(c config.Config) *ServiceContext {
@ -76,11 +78,12 @@ func NewServiceContext(c config.Config) *ServiceContext {
S3ForcePathStyle: aws.Bool(true), //使用路径样式而非虚拟主机样式,区别请参考:https://docs.aws.amazon.com/AmazonS3/latest/dev/VirtualHosting.html S3ForcePathStyle: aws.Bool(true), //使用路径样式而非虚拟主机样式,区别请参考:https://docs.aws.amazon.com/AmazonS3/latest/dev/VirtualHosting.html
}) })
promClient, err := tracker.NewPrometheus(c.Monitoring.PromUrl) promClient, err := tracker.NewPrometheus(c.Monitoring.PromUrl)
if err != nil { if err != nil {
logx.Errorf("InitPrometheus err: %v", err) logx.Errorf("InitPrometheus err: %v", err)
panic("InitSnowflake err") panic("InitSnowflake err")
} }
httpClient := resty.New()
alertClient := tracker.NewAlertClient(c.Monitoring.AlertUrl) alertClient := tracker.NewAlertClient(c.Monitoring.AlertUrl)
if err != nil { if err != nil {
logx.Errorf("InitPrometheus err: %v", err) logx.Errorf("InitPrometheus err: %v", err)
@ -144,5 +147,6 @@ func NewServiceContext(c config.Config) *ServiceContext {
Uploader: uploader, Uploader: uploader,
PromClient: promClient, PromClient: promClient,
AlertClient: alertClient, AlertClient: alertClient,
HttpClient: httpClient,
} }
} }

File diff suppressed because it is too large Load Diff

View File

@ -101,6 +101,7 @@ func (a AdapterOption) Apply(o *QueryOptions) {
} }
type ClusterOption struct { type ClusterOption struct {
AdapterId int
ClusterName string ClusterName string
} }

View File

@ -138,6 +138,7 @@ func (p Prometheus) GetNamedMetricsByTime(metrics []string, start, end string, s
End: endTimestamp, End: endTimestamp,
Step: step, Step: step,
} }
p.client.Rules(context.Background())
value, _, err := p.client.QueryRange(context.Background(), makeExpr(metric, *opts), timeRange) value, _, err := p.client.QueryRange(context.Background(), makeExpr(metric, *opts), timeRange)
if err != nil { if err != nil {
parsedResp.Error = err.Error() parsedResp.Error = err.Error()