alert
Former-commit-id: 545aa908151fb83153dbe89334f2a6cf1d3baf53
This commit is contained in:
parent
cecdf2be3a
commit
a8660d5d5b
|
@ -40,7 +40,6 @@ type remoteResp {
|
|||
|
||||
type (
|
||||
clustersLoadReq {
|
||||
adapterId int64 `form:"adapterId"`
|
||||
clusterName string `form:"clusterName"`
|
||||
}
|
||||
clustersLoadResp {
|
||||
|
|
|
@ -1,12 +1,34 @@
|
|||
syntax = "v1"
|
||||
|
||||
type CreateAlertRuleReq {
|
||||
CLusterId int64 `json:"clusterId"`
|
||||
ClusterName string `json:"clusterName"`
|
||||
Namespace string `json:"namespace"`
|
||||
Name string `json:"name"`
|
||||
PromQL string `json:"PromQL"`
|
||||
PromQL string `json:"promQL"`
|
||||
Duration string `json:"duration"`
|
||||
Labels map[string]string `json:"labels"`
|
||||
Annotations map[string]string `json:"annotations"`
|
||||
Annotations string `json:"annotations,optional"`
|
||||
AlertLevel string `json:"alertLevel"`
|
||||
AlertType string `json:"alertType"`
|
||||
}
|
||||
|
||||
type (
|
||||
alertRulesResp {
|
||||
Id int64 `json:"id"`
|
||||
ClusterName string `json:"clusterName"`
|
||||
Name string `json:"name"`
|
||||
PromQL string `json:"promQL"`
|
||||
Duration string `json:"duration"`
|
||||
Annotations string `json:"annotations"`
|
||||
AlertLevel string `json:"alertLevel"`
|
||||
}
|
||||
)
|
||||
|
||||
type (
|
||||
nodesLoadTopReq {
|
||||
ClusterName string `form:"clusterName"`
|
||||
Metrics string `form:"metrics"`
|
||||
}
|
||||
nodesLoadTopResp {
|
||||
data interface{} `json:"data"`
|
||||
}
|
||||
)
|
|
@ -948,9 +948,13 @@ service pcm {
|
|||
|
||||
@doc "alert rules"
|
||||
@handler alertRulesHandler
|
||||
get /monitoring/alert/rules
|
||||
get /monitoring/alert/rule returns (alertRulesResp)
|
||||
|
||||
@doc "cluster resource load"
|
||||
@handler clustersLoadHandler
|
||||
get /monitoring/cluster/load (clustersLoadReq) returns (clustersLoadResp)
|
||||
|
||||
@doc "node resource load"
|
||||
@handler nodesLoadTopHandler
|
||||
get /monitoring/node/top (nodesLoadTopReq) returns (nodesLoadTopResp)
|
||||
}
|
|
@ -1,6 +1,7 @@
|
|||
package cloud
|
||||
|
||||
import (
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/repository/result"
|
||||
"net/http"
|
||||
|
||||
"github.com/zeromicro/go-zero/rest/httpx"
|
||||
|
@ -19,10 +20,6 @@ func ClusterInfoHandler(svcCtx *svc.ServiceContext) http.HandlerFunc {
|
|||
|
||||
l := cloud.NewClusterInfoLogic(r.Context(), svcCtx)
|
||||
resp, err := l.ClusterInfo(&req)
|
||||
if err != nil {
|
||||
httpx.ErrorCtx(r.Context(), w, err)
|
||||
} else {
|
||||
httpx.OkJsonCtx(r.Context(), w, resp)
|
||||
}
|
||||
result.HttpResult(r, w, resp, err)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
package monitoring
|
||||
|
||||
import (
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/repository/result"
|
||||
"net/http"
|
||||
|
||||
"github.com/zeromicro/go-zero/rest/httpx"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/logic/monitoring"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
|
||||
)
|
||||
|
@ -11,11 +11,7 @@ import (
|
|||
func AlertRulesHandler(svcCtx *svc.ServiceContext) http.HandlerFunc {
|
||||
return func(w http.ResponseWriter, r *http.Request) {
|
||||
l := monitoring.NewAlertRulesLogic(r.Context(), svcCtx)
|
||||
err := l.AlertRules()
|
||||
if err != nil {
|
||||
httpx.ErrorCtx(r.Context(), w, err)
|
||||
} else {
|
||||
httpx.Ok(w)
|
||||
}
|
||||
resp, err := l.AlertRules()
|
||||
result.HttpResult(r, w, resp, err)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
package monitoring
|
||||
|
||||
import (
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/repository/result"
|
||||
"net/http"
|
||||
|
||||
"github.com/zeromicro/go-zero/rest/httpx"
|
||||
|
@ -19,10 +20,6 @@ func CreateAlertRuleHandler(svcCtx *svc.ServiceContext) http.HandlerFunc {
|
|||
|
||||
l := monitoring.NewCreateAlertRuleLogic(r.Context(), svcCtx)
|
||||
err := l.CreateAlertRule(&req)
|
||||
if err != nil {
|
||||
httpx.ErrorCtx(r.Context(), w, err)
|
||||
} else {
|
||||
httpx.Ok(w)
|
||||
}
|
||||
result.HttpResult(r, w, nil, err)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,28 @@
|
|||
package monitoring
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
|
||||
"github.com/zeromicro/go-zero/rest/httpx"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/logic/monitoring"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types"
|
||||
)
|
||||
|
||||
func NodesLoadTopHandler(svcCtx *svc.ServiceContext) http.HandlerFunc {
|
||||
return func(w http.ResponseWriter, r *http.Request) {
|
||||
var req types.NodesLoadTopReq
|
||||
if err := httpx.Parse(r, &req); err != nil {
|
||||
httpx.ErrorCtx(r.Context(), w, err)
|
||||
return
|
||||
}
|
||||
|
||||
l := monitoring.NewNodesLoadTopLogic(r.Context(), svcCtx)
|
||||
resp, err := l.NodesLoadTop(&req)
|
||||
if err != nil {
|
||||
httpx.ErrorCtx(r.Context(), w, err)
|
||||
} else {
|
||||
httpx.OkJsonCtx(r.Context(), w, resp)
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1189,7 +1189,7 @@ func RegisterHandlers(server *rest.Server, serverCtx *svc.ServiceContext) {
|
|||
},
|
||||
{
|
||||
Method: http.MethodGet,
|
||||
Path: "/monitoring/alert/rules",
|
||||
Path: "/monitoring/alert/rule",
|
||||
Handler: monitoring.AlertRulesHandler(serverCtx),
|
||||
},
|
||||
{
|
||||
|
@ -1197,6 +1197,11 @@ func RegisterHandlers(server *rest.Server, serverCtx *svc.ServiceContext) {
|
|||
Path: "/monitoring/cluster/load",
|
||||
Handler: monitoring.ClustersLoadHandler(serverCtx),
|
||||
},
|
||||
{
|
||||
Method: http.MethodGet,
|
||||
Path: "/monitoring/node/top",
|
||||
Handler: monitoring.NodesLoadTopHandler(serverCtx),
|
||||
},
|
||||
},
|
||||
rest.WithPrefix("/pcm/v1"),
|
||||
)
|
||||
|
|
|
@ -2,10 +2,11 @@ package monitoring
|
|||
|
||||
import (
|
||||
"context"
|
||||
"github.com/prometheus/alertmanager/api/v2/client/alert"
|
||||
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types"
|
||||
|
||||
"github.com/zeromicro/go-zero/core/logx"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
|
||||
)
|
||||
|
||||
type AlertRulesLogic struct {
|
||||
|
@ -22,13 +23,8 @@ func NewAlertRulesLogic(ctx context.Context, svcCtx *svc.ServiceContext) *AlertR
|
|||
}
|
||||
}
|
||||
|
||||
func (l *AlertRulesLogic) AlertRules() error {
|
||||
// todo: add your logic here and delete this line
|
||||
alerts, err := l.svcCtx.AlertClient.Alert.GetAlerts(&alert.GetAlertsParams{})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
println(alerts.Error())
|
||||
return nil
|
||||
return nil
|
||||
func (l *AlertRulesLogic) AlertRules() (resp *types.AlertRulesResp, err error) {
|
||||
resp = &types.AlertRulesResp{}
|
||||
l.svcCtx.DbEngin.Raw("SELECT ar.id,ar.*,GROUP_CONCAT(tc.`name` ORDER BY tc.`name` ASC SEPARATOR ',') as cluster_name FROM alert_rule ar JOIN t_cluster tc ON ar.cluster_id = tc.id WHERE ar.deleted_at IS NULL AND tc.deleted_at IS NULL GROUP BY ar.id;").Scan(&resp)
|
||||
return resp, nil
|
||||
}
|
||||
|
|
|
@ -28,8 +28,7 @@ func NewClustersLoadLogic(ctx context.Context, svcCtx *svc.ServiceContext) *Clus
|
|||
func (l *ClustersLoadLogic) ClustersLoad(req *types.ClustersLoadReq) (resp *types.ClustersLoadResp, err error) {
|
||||
resp = &types.ClustersLoadResp{}
|
||||
metrics := []string{"cluster_cpu_utilisation", "cluster_cpu_avail", "cluster_cpu_total", "cluster_memory_total", "cluster_memory_avail", "cluster_memory_utilisation", "cluster_disk_utilisation", "cluster_disk_avail", "cluster_disk_total"}
|
||||
result := l.svcCtx.PromClient.GetNamedMetrics(metrics, time.Now(), tracker.ClusterOption{AdapterId: req.AdapterId, ClusterName: req.ClusterName})
|
||||
result := l.svcCtx.PromClient.GetNamedMetrics(metrics, time.Now(), tracker.ClusterOption{ClusterName: req.ClusterName})
|
||||
resp.Data = result
|
||||
return resp, nil
|
||||
return
|
||||
}
|
||||
|
|
|
@ -3,12 +3,14 @@ package monitoring
|
|||
import (
|
||||
"context"
|
||||
v1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1"
|
||||
v12 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/apimachinery/pkg/util/intstr"
|
||||
|
||||
"github.com/zeromicro/go-zero/core/logx"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/pkg/models"
|
||||
tool "gitlink.org.cn/JointCloud/pcm-coordinator/pkg/utils"
|
||||
v12 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/apimachinery/pkg/util/intstr"
|
||||
"k8s.io/apimachinery/pkg/util/json"
|
||||
)
|
||||
|
||||
type CreateAlertRuleLogic struct {
|
||||
|
@ -25,16 +27,49 @@ func NewCreateAlertRuleLogic(ctx context.Context, svcCtx *svc.ServiceContext) *C
|
|||
}
|
||||
}
|
||||
|
||||
type RuleSelectorResp struct {
|
||||
Code int `json:"code"`
|
||||
Msg string `json:"msg"`
|
||||
Prometheus v1.Prometheus `json:"data"`
|
||||
}
|
||||
|
||||
func (l *CreateAlertRuleLogic) CreateAlertRule(req *types.CreateAlertRuleReq) error {
|
||||
// todo: add your logic here and delete this line
|
||||
|
||||
// save to db
|
||||
var alertRule models.AlertRule
|
||||
tool.Convert(req, &alertRule)
|
||||
alertRule.Id = tool.GenSnowflakeID()
|
||||
tx := l.svcCtx.DbEngin.Save(&alertRule)
|
||||
if tx.Error != nil {
|
||||
return tx.Error
|
||||
}
|
||||
|
||||
// query server http url.
|
||||
var server string
|
||||
l.svcCtx.DbEngin.Raw("select ta.server from t_adapter ta,t_cluster tc where ta.id = tc.adapter_id and tc.name = ?", &req.ClusterName).Scan(&server)
|
||||
|
||||
// rule selector
|
||||
var ruleSelectorResp RuleSelectorResp
|
||||
|
||||
response, err := l.svcCtx.HttpClient.R().
|
||||
SetQueryParams(map[string]string{
|
||||
"clusterName": req.ClusterName,
|
||||
}).
|
||||
SetResult(&ruleSelectorResp).
|
||||
ForceContentType("application/json").
|
||||
Get(server + "/api/v1/monitoring/rule/selector")
|
||||
if err != nil || response.IsError() {
|
||||
return err
|
||||
}
|
||||
// Data Filling
|
||||
ruleDuration := v1.Duration(req.Duration)
|
||||
rule := &v1.PrometheusRule{
|
||||
TypeMeta: v12.TypeMeta{Kind: "PrometheusRule",
|
||||
APIVersion: "monitoring.coreos.com/v1"},
|
||||
ObjectMeta: v12.ObjectMeta{
|
||||
Name: req.Name,
|
||||
Namespace: req.Namespace,
|
||||
Labels: map[string]string{
|
||||
"release": "prometheus",
|
||||
},
|
||||
Namespace: ruleSelectorResp.Prometheus.ObjectMeta.Namespace,
|
||||
Labels: ruleSelectorResp.Prometheus.Spec.RuleSelector.MatchLabels,
|
||||
},
|
||||
Spec: v1.PrometheusRuleSpec{
|
||||
Groups: []v1.RuleGroup{
|
||||
|
@ -48,13 +83,35 @@ func (l *CreateAlertRuleLogic) CreateAlertRule(req *types.CreateAlertRuleReq) er
|
|||
Labels: map[string]string{
|
||||
"severity": req.AlertLevel,
|
||||
},
|
||||
Annotations: req.Annotations,
|
||||
Annotations: map[string]string{"description": req.Annotations},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
println(rule.Kind)
|
||||
|
||||
ruleBytes, err := json.Marshal(rule)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// create prometheus rule
|
||||
response, err = l.svcCtx.HttpClient.R().
|
||||
SetBody(&OperateStruct{
|
||||
ClusterName: req.ClusterName,
|
||||
YamlString: string(ruleBytes),
|
||||
}).
|
||||
ForceContentType("application/json").
|
||||
Post(server + "/api/v1/operate/apply")
|
||||
if err != nil || response.IsError() {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
type OperateStruct struct {
|
||||
ClusterName string `json:"clusterName"`
|
||||
YamlString string `json:"yamlString"`
|
||||
}
|
||||
|
|
|
@ -0,0 +1,44 @@
|
|||
package monitoring
|
||||
|
||||
import (
|
||||
"context"
|
||||
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/svc"
|
||||
"gitlink.org.cn/JointCloud/pcm-coordinator/api/internal/types"
|
||||
|
||||
"github.com/zeromicro/go-zero/core/logx"
|
||||
)
|
||||
|
||||
type NodesLoadTopLogic struct {
|
||||
logx.Logger
|
||||
ctx context.Context
|
||||
svcCtx *svc.ServiceContext
|
||||
}
|
||||
|
||||
func NewNodesLoadTopLogic(ctx context.Context, svcCtx *svc.ServiceContext) *NodesLoadTopLogic {
|
||||
return &NodesLoadTopLogic{
|
||||
Logger: logx.WithContext(ctx),
|
||||
ctx: ctx,
|
||||
svcCtx: svcCtx,
|
||||
}
|
||||
}
|
||||
|
||||
func (l *NodesLoadTopLogic) NodesLoadTop(req *types.NodesLoadTopReq) (resp *types.NodesLoadTopResp, err error) {
|
||||
|
||||
resp = &types.NodesLoadTopResp{}
|
||||
|
||||
var server string
|
||||
l.svcCtx.DbEngin.Raw("select ta.server from t_adapter ta,t_cluster tc where ta.id = tc.adapter_id and tc.name = ?", &req.ClusterName).Scan(&server)
|
||||
response, err := l.svcCtx.HttpClient.R().
|
||||
SetQueryParams(map[string]string{
|
||||
"clusterName": req.ClusterName,
|
||||
"metrics": req.Metrics,
|
||||
}).
|
||||
SetResult(&resp).
|
||||
ForceContentType("application/json").
|
||||
Get(server + "/api/v1/monitoring/node")
|
||||
if err != nil || response.IsError() {
|
||||
|
||||
}
|
||||
return resp, nil
|
||||
}
|
|
@ -32,7 +32,6 @@ type RemoteResp struct {
|
|||
}
|
||||
|
||||
type ClustersLoadReq struct {
|
||||
AdapterId int64 `form:"adapterId"`
|
||||
ClusterName string `form:"clusterName"`
|
||||
}
|
||||
|
||||
|
@ -5376,12 +5375,31 @@ type PushResourceInfoReq struct {
|
|||
}
|
||||
|
||||
type CreateAlertRuleReq struct {
|
||||
ClusterName string `json:"clusterName"`
|
||||
Namespace string `json:"namespace"`
|
||||
Name string `json:"name"`
|
||||
PromQL string `json:"PromQL"`
|
||||
Duration string `json:"duration"`
|
||||
Labels map[string]string `json:"labels"`
|
||||
Annotations map[string]string `json:"annotations"`
|
||||
AlertLevel string `json:"alertLevel"`
|
||||
CLusterId int64 `json:"clusterId"`
|
||||
ClusterName string `json:"clusterName"`
|
||||
Name string `json:"name"`
|
||||
PromQL string `json:"promQL"`
|
||||
Duration string `json:"duration"`
|
||||
Annotations string `json:"annotations,optional"`
|
||||
AlertLevel string `json:"alertLevel"`
|
||||
AlertType string `json:"alertType"`
|
||||
}
|
||||
|
||||
type AlertRulesResp struct {
|
||||
Id int64 `json:"id"`
|
||||
ClusterName string `json:"clusterName"`
|
||||
Name string `json:"name"`
|
||||
PromQL string `json:"promQL"`
|
||||
Duration string `json:"duration"`
|
||||
Annotations string `json:"annotations"`
|
||||
AlertLevel string `json:"alertLevel"`
|
||||
}
|
||||
|
||||
type NodesLoadTopReq struct {
|
||||
ClusterName string `form:"clusterName"`
|
||||
Metrics string `form:"metrics"`
|
||||
}
|
||||
|
||||
type NodesLoadTopResp struct {
|
||||
Data interface{} `json:"data"`
|
||||
}
|
||||
|
|
|
@ -0,0 +1,18 @@
|
|||
package models
|
||||
|
||||
import (
|
||||
"database/sql"
|
||||
)
|
||||
|
||||
type AlertRule struct {
|
||||
Id int64 `db:"id"` // id
|
||||
ClusterId int64 `db:"cluster_id"`
|
||||
Name string `db:"name"` // 节点名称
|
||||
AlertType string `db:"alert_type"` // 节点类型 int64 `db:"cpu_total"` // cpu核数
|
||||
PromQL string `db:"prom_ql"`
|
||||
Duration string `db:"duration"`
|
||||
AlertLevel string `db:"alert_level"`
|
||||
Annotations string `db:"annotations"`
|
||||
CreatedBy sql.NullInt64 `db:"created_by"` // 创建人
|
||||
UpdatedBy sql.NullInt64 `db:"updated_by"` // 更新人
|
||||
}
|
|
@ -19,32 +19,31 @@ import (
|
|||
"strings"
|
||||
)
|
||||
|
||||
const (
|
||||
StatefulSet = "StatefulSet"
|
||||
DaemonSet = "DaemonSet"
|
||||
Deployment = "Deployment"
|
||||
)
|
||||
|
||||
var promQLTemplates = map[string]string{
|
||||
|
||||
"cluster_cpu_utilisation": "cluster_cpu_utilisation{$1}",
|
||||
"cluster_memory_utilisation": "cluster_memory_utilisation{$1}",
|
||||
"cluster_disk_utilisation": "cluster_disk_utilisation{$1}",
|
||||
"cluster_cpu_total": "cluster_cpu_total{$1}",
|
||||
"cluster_memory_total": "cluster_memory_total{$1}",
|
||||
"cluster_disk_total": "cluster_disk_total{$1}",
|
||||
"cluster_cpu_avail": "cluster_cpu_total{$1}",
|
||||
"cluster_memory_avail": "cluster_memory_total{$1}",
|
||||
"cluster_disk_avail": "cluster_disk_total{$1}",
|
||||
"center_cpu_utilisation": "(sum by (adapter_id)(cluster_cpu_total{$1})-sum by (adapter_id)(cluster_cpu_avail{$1}))/sum by (adapter_id)(cluster_cpu_total{$1})",
|
||||
"center_memory_utilisation": "(sum by (adapter_id)(cluster_memory_total{$1})-sum by (adapter_id)(cluster_memory_avail{$1}))/sum by (adapter_id)(cluster_memory_total{$1})",
|
||||
"center_disk_utilisation": "(sum by (adapter_id)(cluster_disk_total{$1})-sum by (adapter_id)(cluster_disk_avail{$1}))/sum by (adapter_id)(cluster_disk_total{$1})",
|
||||
"center_top3": "topk(3,((sum by (adapter_id)(cluster_cpu_total)-sum by (adapter_id)(cluster_cpu_avail))/sum by (adapter_id)(cluster_cpu_total) + (sum by (adapter_id)(cluster_memory_total) - sum by (adapter_id)(cluster_memory_avail))/sum by (adapter_id)(cluster_memory_total) + (sum by (adapter_id)(cluster_disk_total)-sum by (adapter_id)(cluster_disk_avail))/sum by (adapter_id)(cluster_disk_total))/3)",
|
||||
"cluster_cpu_utilisation": "cluster_cpu_utilisation{$1}",
|
||||
"cluster_memory_utilisation": "cluster_memory_utilisation{$1}",
|
||||
"cluster_disk_utilisation": "cluster_disk_utilisation{$1}",
|
||||
"cluster_cpu_total": "cluster_cpu_total{$1}",
|
||||
"cluster_memory_total": "cluster_memory_total{$1}",
|
||||
"cluster_disk_total": "cluster_disk_total{$1}",
|
||||
"cluster_cpu_avail": "cluster_cpu_total{$1}",
|
||||
"cluster_memory_avail": "cluster_memory_total{$1}",
|
||||
"cluster_disk_avail": "cluster_disk_total{$1}",
|
||||
|
||||
// center
|
||||
"center_cpu_utilisation": "(sum by (adapter_id)(cluster_cpu_total{$1})-sum by (adapter_id)(cluster_cpu_avail{$1}))/sum by (adapter_id)(cluster_cpu_total{$1})",
|
||||
"center_memory_utilisation": "(sum by (adapter_id)(cluster_memory_total{$1})-sum by (adapter_id)(cluster_memory_avail{$1}))/sum by (adapter_id)(cluster_memory_total{$1})",
|
||||
"center_disk_utilisation": "(sum by (adapter_id)(cluster_disk_total{$1})-sum by (adapter_id)(cluster_disk_avail{$1}))/sum by (adapter_id)(cluster_disk_total{$1})",
|
||||
"center_top3": "topk(3,((sum by (adapter_id)(cluster_cpu_total)-sum by (adapter_id)(cluster_cpu_avail))/sum by (adapter_id)(cluster_cpu_total) + (sum by (adapter_id)(cluster_memory_total) - sum by (adapter_id)(cluster_memory_avail))/sum by (adapter_id)(cluster_memory_total) + (sum by (adapter_id)(cluster_disk_total)-sum by (adapter_id)(cluster_disk_avail))/sum by (adapter_id)(cluster_disk_total))/3)",
|
||||
|
||||
// namespace
|
||||
"namespace_cpu_usage": `round(namespace:container_cpu_usage_seconds_total:sum_rate{namespace!="", $1}, 0.001)`,
|
||||
"namespace_memory_usage": `namespace:container_memory_usage_bytes:sum{namespace!="", $1}`,
|
||||
"namespace_memory_usage_wo_cache": `namespace:container_memory_usage_bytes_wo_cache:sum{namespace!="", $1}`,
|
||||
"controller_cpu_usage_rate": `sum( node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{}* on(namespace,pod) group_left(workload) namespace_workload_pod:kube_pod_owner:relabel{$1}) by (workload)/sum( kube_pod_container_resource_limits{job="kube-state-metrics", resource="cpu"}* on(namespace,pod) group_left(workload) namespace_workload_pod:kube_pod_owner:relabel{ }) by (workload)`,
|
||||
"controller_memory_usage_rate": `sum( container_memory_working_set_bytes{job="kubelet", metrics_path="/metrics/cadvisor", container!="", image!=""} * on(namespace,pod) group_left(workload) namespace_workload_pod:kube_pod_owner:relabel{$1}) by (workload)/sum( kube_pod_container_resource_limits{job="kube-state-metrics", resource="memory"}* on(namespace,pod) group_left(workload) namespace_workload_pod:kube_pod_owner:relabel{ }) by (workload)`,
|
||||
// controller
|
||||
"controller_cpu_usage_rate": `sum( node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{}* on(namespace,pod) group_left(workload) namespace_workload_pod:kube_pod_owner:relabel{$1}) by (workload)/sum( kube_pod_container_resource_limits{job="kube-state-metrics", resource="cpu"}* on(namespace,pod) group_left(workload) namespace_workload_pod:kube_pod_owner:relabel{ }) by (workload)`,
|
||||
"controller_memory_usage_rate": `sum( container_memory_working_set_bytes{job="kubelet", metrics_path="/metrics/cadvisor", container!="", image!=""} * on(namespace,pod) group_left(workload) namespace_workload_pod:kube_pod_owner:relabel{$1}) by (workload)/sum( kube_pod_container_resource_limits{job="kube-state-metrics", resource="memory"}* on(namespace,pod) group_left(workload) namespace_workload_pod:kube_pod_owner:relabel{ }) by (workload)`,
|
||||
// pod
|
||||
"pod_cpu_usage": `round(sum by (namespace, pod) (irate(container_cpu_usage_seconds_total{job="kubelet", pod!="", image!=""}[5m])) * on (namespace, pod) group_left(owner_kind,owner_name) kube_pod_owner{$1} * on (namespace, pod) group_left(node) kube_pod_info{$2}, 0.001)`,
|
||||
"pod_cpu_usage_rate": `sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{ $1}) by (pod) / sum(kube_pod_container_resource_limits{ $1,unit="core"}) by (pod)`,
|
||||
|
|
Loading…
Reference in New Issue