任务详情监控
Former-commit-id: f584527e2f8b081846c6aa88798e50b26bb10328
This commit is contained in:
parent
ecd23ba389
commit
f52314ea1f
|
@ -176,11 +176,8 @@ type deleteTaskReq {
|
||||||
|
|
||||||
type (
|
type (
|
||||||
scheduleTaskByYamlReq {
|
scheduleTaskByYamlReq {
|
||||||
|
|
||||||
Name string `yaml:"name"`
|
Name string `yaml:"name"`
|
||||||
synergy string `yaml:"synergy"`
|
|
||||||
Description string `yaml:"description"`
|
Description string `yaml:"description"`
|
||||||
strategy string `yaml:"strategy"`
|
|
||||||
tenantId int64 `yaml:"tenantId"`
|
tenantId int64 `yaml:"tenantId"`
|
||||||
tasks []TaskYaml `yaml:"tasks"`
|
tasks []TaskYaml `yaml:"tasks"`
|
||||||
}
|
}
|
||||||
|
@ -260,7 +257,12 @@ type (
|
||||||
TaskId int64 `path:"taskId"`
|
TaskId int64 `path:"taskId"`
|
||||||
}
|
}
|
||||||
taskDetailResp {
|
taskDetailResp {
|
||||||
|
CpuCores float64 `json:"cpuCores"`
|
||||||
|
CpuRate float64 `json:"cpuRate"`
|
||||||
|
CpuLimit float64 `json:"cpuLimit"`
|
||||||
|
MemoryTotal float64 `json:"memoryTotal"`
|
||||||
|
MemoryRate float64 `json:"memoryRate"`
|
||||||
|
MemoryLimit float64 `json:"memoryLimit"`
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
type (
|
type (
|
||||||
|
|
|
@ -42,7 +42,7 @@ service pcm {
|
||||||
|
|
||||||
// 任务详情接口
|
// 任务详情接口
|
||||||
@handler TaskDetailHandler
|
@handler TaskDetailHandler
|
||||||
get /core/taskDetail (taskDetailReq) returns (taskDetailResp)
|
get /core/taskDetail/:taskId (taskDetailReq) returns (taskDetailResp)
|
||||||
|
|
||||||
@handler JobTotalHandler
|
@handler JobTotalHandler
|
||||||
get /core/jobTotal returns (jobTotalResp)
|
get /core/jobTotal returns (jobTotalResp)
|
||||||
|
|
|
@ -1,7 +1,6 @@
|
||||||
package config
|
package config
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"github.com/zeromicro/go-queue/kq"
|
|
||||||
"github.com/zeromicro/go-zero/core/logx"
|
"github.com/zeromicro/go-zero/core/logx"
|
||||||
"github.com/zeromicro/go-zero/core/stores/cache"
|
"github.com/zeromicro/go-zero/core/stores/cache"
|
||||||
"github.com/zeromicro/go-zero/core/stores/redis"
|
"github.com/zeromicro/go-zero/core/stores/redis"
|
||||||
|
@ -11,21 +10,12 @@ import (
|
||||||
|
|
||||||
type Config struct {
|
type Config struct {
|
||||||
rest.RestConf
|
rest.RestConf
|
||||||
KqProducerConf struct {
|
|
||||||
Brokers []string
|
|
||||||
HpcTopic string
|
|
||||||
CloudTopic string
|
|
||||||
AiTopic string
|
|
||||||
}
|
|
||||||
DB struct {
|
DB struct {
|
||||||
DataSource string
|
DataSource string
|
||||||
}
|
}
|
||||||
Redis redis.RedisConf
|
Redis redis.RedisConf
|
||||||
Cache cache.CacheConf
|
Cache cache.CacheConf
|
||||||
LogConf logx.LogConf
|
LogConf logx.LogConf
|
||||||
HpcConsumerConf kq.KqConf
|
|
||||||
CloudConsumerConf kq.KqConf
|
|
||||||
AiConsumerConf kq.KqConf
|
|
||||||
K8sNativeConf zrpc.RpcClientConf
|
K8sNativeConf zrpc.RpcClientConf
|
||||||
ACRpcConf zrpc.RpcClientConf
|
ACRpcConf zrpc.RpcClientConf
|
||||||
THRpcConf zrpc.RpcClientConf
|
THRpcConf zrpc.RpcClientConf
|
||||||
|
|
|
@ -1,6 +1,8 @@
|
||||||
package core
|
package core
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/utils"
|
||||||
|
"gitlink.org.cn/jcce-pcm/utils/result"
|
||||||
"net/http"
|
"net/http"
|
||||||
|
|
||||||
"github.com/zeromicro/go-zero/rest/httpx"
|
"github.com/zeromicro/go-zero/rest/httpx"
|
||||||
|
@ -13,16 +15,22 @@ func ScheduleTaskByYamlHandler(svcCtx *svc.ServiceContext) http.HandlerFunc {
|
||||||
return func(w http.ResponseWriter, r *http.Request) {
|
return func(w http.ResponseWriter, r *http.Request) {
|
||||||
var req types.ScheduleTaskByYamlReq
|
var req types.ScheduleTaskByYamlReq
|
||||||
if err := httpx.Parse(r, &req); err != nil {
|
if err := httpx.Parse(r, &req); err != nil {
|
||||||
httpx.ErrorCtx(r.Context(), w, err)
|
result.HttpResult(r, w, nil, err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
// 解析yaml文件
|
||||||
|
_, fileHeader, err := r.FormFile("file")
|
||||||
|
if err != nil {
|
||||||
|
result.HttpResult(r, w, nil, err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
err = utils.Yaml2struct(fileHeader, &req)
|
||||||
|
if err != nil {
|
||||||
|
result.HttpResult(r, w, nil, err)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
l := core.NewScheduleTaskByYamlLogic(r.Context(), svcCtx)
|
l := core.NewScheduleTaskByYamlLogic(r.Context(), svcCtx)
|
||||||
resp, err := l.ScheduleTaskByYaml(&req)
|
resp, err := l.ScheduleTaskByYaml(&req)
|
||||||
if err != nil {
|
result.HttpResult(r, w, resp, err)
|
||||||
httpx.ErrorCtx(r.Context(), w, err)
|
|
||||||
} else {
|
|
||||||
httpx.OkJsonCtx(r.Context(), w, resp)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -47,7 +47,7 @@ func RegisterHandlers(server *rest.Server, serverCtx *svc.ServiceContext) {
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
Method: http.MethodGet,
|
Method: http.MethodGet,
|
||||||
Path: "/core/taskDetail",
|
Path: "/core/taskDetail/:taskId",
|
||||||
Handler: core.TaskDetailHandler(serverCtx),
|
Handler: core.TaskDetailHandler(serverCtx),
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
|
@ -2,9 +2,9 @@ package core
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"encoding/json"
|
|
||||||
"gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/constants"
|
"gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/constants"
|
||||||
"gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/models"
|
"gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/models"
|
||||||
|
"k8s.io/apimachinery/pkg/util/json"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/svc"
|
"gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/svc"
|
||||||
|
@ -28,6 +28,7 @@ func NewScheduleTaskByYamlLogic(ctx context.Context, svcCtx *svc.ServiceContext)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (l *ScheduleTaskByYamlLogic) ScheduleTaskByYaml(req *types.ScheduleTaskByYamlReq) (resp *types.ScheduleTaskByYamlResp, err error) {
|
func (l *ScheduleTaskByYamlLogic) ScheduleTaskByYaml(req *types.ScheduleTaskByYamlReq) (resp *types.ScheduleTaskByYamlResp, err error) {
|
||||||
|
resp = &types.ScheduleTaskByYamlResp{}
|
||||||
bytes, err := json.Marshal(req)
|
bytes, err := json.Marshal(req)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
|
@ -55,7 +56,10 @@ func (l *ScheduleTaskByYamlLogic) ScheduleTaskByYaml(req *types.ScheduleTaskByYa
|
||||||
logx.Error(err)
|
logx.Error(err)
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
l.svcCtx.RedisClient.Publish(context.Background(), task.TaskType, reqMessage)
|
publish := l.svcCtx.RedisClient.Publish(context.Background(), task.TaskType, reqMessage)
|
||||||
|
if publish.Err() != nil {
|
||||||
|
return nil, publish.Err()
|
||||||
|
}
|
||||||
}
|
}
|
||||||
resp.TaskId = taskModel.Id
|
resp.TaskId = taskModel.Id
|
||||||
return resp, nil
|
return resp, nil
|
||||||
|
|
|
@ -2,6 +2,10 @@ package core
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
|
"gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/models"
|
||||||
|
"gitlink.org.cn/jcce-pcm/pcm-coordinator/pkg/tracker"
|
||||||
|
"gitlink.org.cn/jcce-pcm/pcm-participant-kubernetes/kubernetesclient"
|
||||||
|
"time"
|
||||||
|
|
||||||
"gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/svc"
|
"gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/svc"
|
||||||
"gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/types"
|
"gitlink.org.cn/jcce-pcm/pcm-coordinator/api/internal/types"
|
||||||
|
@ -24,7 +28,100 @@ func NewTaskDetailLogic(ctx context.Context, svcCtx *svc.ServiceContext) *TaskDe
|
||||||
}
|
}
|
||||||
|
|
||||||
func (l *TaskDetailLogic) TaskDetail(req *types.TaskDetailReq) (resp *types.TaskDetailResp, err error) {
|
func (l *TaskDetailLogic) TaskDetail(req *types.TaskDetailReq) (resp *types.TaskDetailResp, err error) {
|
||||||
// todo: add your logic here and delete this line
|
resp = &types.TaskDetailResp{}
|
||||||
|
var clouds []models.Cloud
|
||||||
|
l.svcCtx.DbEngin.Where("task_id = ?", req.TaskId).Find(&clouds)
|
||||||
|
for _, cloud := range clouds {
|
||||||
|
// 查询监控地址
|
||||||
|
var metricsUrl string
|
||||||
|
l.svcCtx.DbEngin.Raw(" SELECT metrics_url FROM `sc_participant_phy_info` WHERE id = ? ", cloud.ParticipantId).Scan(&metricsUrl)
|
||||||
|
|
||||||
return
|
//
|
||||||
|
var pods []*kubernetesclient.Pod
|
||||||
|
switch cloud.Kind {
|
||||||
|
case "Job":
|
||||||
|
jobResult, err := l.svcCtx.K8sRpc[cloud.ParticipantId].JobDetail(context.Background(), &kubernetesclient.JobDetailReq{
|
||||||
|
Namespace: cloud.Namespace,
|
||||||
|
Name: cloud.Name,
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
// 查询出job下关联的pod列表
|
||||||
|
uid := jobResult.Job.Metadata.Labels["controller-uid"]
|
||||||
|
LabelSelector := "controller-uid=" + uid
|
||||||
|
podResp, err := l.svcCtx.K8sRpc[cloud.ParticipantId].PodList(context.Background(), &kubernetesclient.PodListReq{
|
||||||
|
ListOptions: &kubernetesclient.ListOptions{
|
||||||
|
LabelSelector: &LabelSelector,
|
||||||
|
},
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
pods = podResp.PodList.Items
|
||||||
|
|
||||||
|
case "Deployment":
|
||||||
|
deploymentResult, err := l.svcCtx.K8sRpc[cloud.ParticipantId].DeploymentDetail(context.Background(), &kubernetesclient.DeploymentDetailReq{
|
||||||
|
Namespace: cloud.Namespace,
|
||||||
|
Name: cloud.Name,
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
// 查询出job下关联的pod列表
|
||||||
|
|
||||||
|
uid := deploymentResult.Deployment.Spec.Selector.MatchLabels["app"]
|
||||||
|
LabelSelector := "app=" + uid
|
||||||
|
podResp, err := l.svcCtx.K8sRpc[cloud.ParticipantId].PodList(context.Background(), &kubernetesclient.PodListReq{
|
||||||
|
ListOptions: &kubernetesclient.ListOptions{
|
||||||
|
LabelSelector: &LabelSelector,
|
||||||
|
},
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
pods = podResp.PodList.Items
|
||||||
|
}
|
||||||
|
podsMetrics(metricsUrl, pods, resp)
|
||||||
|
|
||||||
|
}
|
||||||
|
return resp, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func podsMetrics(metricsUrl string, pods []*kubernetesclient.Pod, resp *types.TaskDetailResp) {
|
||||||
|
// 查询每个pod资源使用情况
|
||||||
|
for _, pod := range pods {
|
||||||
|
prometheusClient, _ := tracker.NewPrometheus(metricsUrl)
|
||||||
|
// cpu需求量
|
||||||
|
podCpuLimit := prometheusClient.GetNamedMetrics([]string{"pod_cpu_resource_limits"}, time.Now(), tracker.PodOption{
|
||||||
|
PodName: *pod.Metadata.Name,
|
||||||
|
NamespaceName: *pod.Metadata.Namespace,
|
||||||
|
})
|
||||||
|
resp.CpuLimit = metricAdd(resp.CpuLimit, podCpuLimit)
|
||||||
|
// cpu使用量
|
||||||
|
podCpuUsage := prometheusClient.GetNamedMetrics([]string{"pod_cpu_usage"}, time.Now(), tracker.PodOption{
|
||||||
|
PodName: *pod.Metadata.Name,
|
||||||
|
NamespaceName: *pod.Metadata.Namespace,
|
||||||
|
})
|
||||||
|
resp.CpuCores = metricAdd(resp.CpuCores, podCpuUsage)
|
||||||
|
// 内存使用量
|
||||||
|
podMemoryUsage := prometheusClient.GetNamedMetrics([]string{"pod_memory_usage"}, time.Now(), tracker.PodOption{
|
||||||
|
PodName: *pod.Metadata.Name,
|
||||||
|
NamespaceName: *pod.Metadata.Namespace,
|
||||||
|
})
|
||||||
|
resp.MemoryTotal = metricAdd(resp.MemoryTotal, podMemoryUsage)
|
||||||
|
// 内存需求量
|
||||||
|
podMemoryLimit := prometheusClient.GetNamedMetrics([]string{"pod_memory_resource_limits"}, time.Now(), tracker.PodOption{
|
||||||
|
PodName: *pod.Metadata.Name,
|
||||||
|
NamespaceName: *pod.Metadata.Namespace,
|
||||||
|
})
|
||||||
|
resp.MemoryLimit = metricAdd(resp.MemoryLimit, podMemoryLimit)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func metricAdd(z float64, metric []tracker.Metric) float64 {
|
||||||
|
if metric[0].MetricValues != nil {
|
||||||
|
z = z + metric[0].MetricValues[0].Sample.Value()
|
||||||
|
}
|
||||||
|
return z
|
||||||
}
|
}
|
||||||
|
|
|
@ -158,9 +158,7 @@ type DeleteTaskReq struct {
|
||||||
|
|
||||||
type ScheduleTaskByYamlReq struct {
|
type ScheduleTaskByYamlReq struct {
|
||||||
Name string `yaml:"name"`
|
Name string `yaml:"name"`
|
||||||
Synergy string `yaml:"synergy"`
|
|
||||||
Description string `yaml:"description"`
|
Description string `yaml:"description"`
|
||||||
Strategy string `yaml:"strategy"`
|
|
||||||
TenantId int64 `yaml:"tenantId"`
|
TenantId int64 `yaml:"tenantId"`
|
||||||
Tasks []TaskYaml `yaml:"tasks"`
|
Tasks []TaskYaml `yaml:"tasks"`
|
||||||
}
|
}
|
||||||
|
@ -236,6 +234,12 @@ type TaskDetailReq struct {
|
||||||
}
|
}
|
||||||
|
|
||||||
type TaskDetailResp struct {
|
type TaskDetailResp struct {
|
||||||
|
CpuCores float64 `json:"cpuCores"`
|
||||||
|
CpuRate float64 `json:"cpuRate"`
|
||||||
|
CpuLimit float64 `json:"cpuLimit"`
|
||||||
|
MemoryTotal float64 `json:"memoryTotal"`
|
||||||
|
MemoryRate float64 `json:"memoryRate"`
|
||||||
|
MemoryLimit float64 `json:"memoryLimit"`
|
||||||
}
|
}
|
||||||
|
|
||||||
type ListCenterResp struct {
|
type ListCenterResp struct {
|
||||||
|
|
6
go.mod
6
go.mod
|
@ -13,9 +13,11 @@ require (
|
||||||
github.com/go-resty/resty/v2 v2.7.0
|
github.com/go-resty/resty/v2 v2.7.0
|
||||||
github.com/go-sql-driver/mysql v1.7.1
|
github.com/go-sql-driver/mysql v1.7.1
|
||||||
github.com/jinzhu/copier v0.3.5
|
github.com/jinzhu/copier v0.3.5
|
||||||
|
github.com/json-iterator/go v1.1.12
|
||||||
github.com/nacos-group/nacos-sdk-go/v2 v2.2.3
|
github.com/nacos-group/nacos-sdk-go/v2 v2.2.3
|
||||||
github.com/pkg/errors v0.9.1
|
github.com/pkg/errors v0.9.1
|
||||||
github.com/prometheus/client_golang v1.16.0
|
github.com/prometheus/client_golang v1.16.0
|
||||||
|
github.com/prometheus/common v0.44.0
|
||||||
github.com/redis/go-redis/v9 v9.2.1
|
github.com/redis/go-redis/v9 v9.2.1
|
||||||
github.com/robfig/cron/v3 v3.0.1
|
github.com/robfig/cron/v3 v3.0.1
|
||||||
github.com/shopspring/decimal v1.3.1
|
github.com/shopspring/decimal v1.3.1
|
||||||
|
@ -23,7 +25,7 @@ require (
|
||||||
github.com/zeromicro/go-zero v1.5.5
|
github.com/zeromicro/go-zero v1.5.5
|
||||||
gitlink.org.cn/jcce-pcm/pcm-participant-ac v0.0.0-20231027084000-16876da5aa31
|
gitlink.org.cn/jcce-pcm/pcm-participant-ac v0.0.0-20231027084000-16876da5aa31
|
||||||
gitlink.org.cn/jcce-pcm/pcm-participant-ceph v0.0.0-20230817103341-2459e5bfc835
|
gitlink.org.cn/jcce-pcm/pcm-participant-ceph v0.0.0-20230817103341-2459e5bfc835
|
||||||
gitlink.org.cn/jcce-pcm/pcm-participant-kubernetes v0.0.0-20230830120334-bf6d99c715ef
|
gitlink.org.cn/jcce-pcm/pcm-participant-kubernetes v0.0.0-20231027083610-c8a292768f4a
|
||||||
gitlink.org.cn/jcce-pcm/pcm-participant-modelarts v0.0.0-20231024115530-f6fd0505d2a1
|
gitlink.org.cn/jcce-pcm/pcm-participant-modelarts v0.0.0-20231024115530-f6fd0505d2a1
|
||||||
gitlink.org.cn/jcce-pcm/pcm-participant-octopus v0.0.0-20231011071802-c6a7637b74e4
|
gitlink.org.cn/jcce-pcm/pcm-participant-octopus v0.0.0-20231011071802-c6a7637b74e4
|
||||||
gitlink.org.cn/jcce-pcm/pcm-participant-openstack v0.0.0-20231024105731-cbdceff549c9
|
gitlink.org.cn/jcce-pcm/pcm-participant-openstack v0.0.0-20231024105731-cbdceff549c9
|
||||||
|
@ -79,7 +81,6 @@ require (
|
||||||
github.com/jinzhu/now v1.1.5 // indirect
|
github.com/jinzhu/now v1.1.5 // indirect
|
||||||
github.com/jmespath/go-jmespath v0.4.0 // indirect
|
github.com/jmespath/go-jmespath v0.4.0 // indirect
|
||||||
github.com/josharian/intern v1.0.0 // indirect
|
github.com/josharian/intern v1.0.0 // indirect
|
||||||
github.com/json-iterator/go v1.1.12 // indirect
|
|
||||||
github.com/klauspost/compress v1.15.15 // indirect
|
github.com/klauspost/compress v1.15.15 // indirect
|
||||||
github.com/lann/builder v0.0.0-20180802200727-47ae307949d0 // indirect
|
github.com/lann/builder v0.0.0-20180802200727-47ae307949d0 // indirect
|
||||||
github.com/lann/ps v0.0.0-20150810152359-62de8c46ede0 // indirect
|
github.com/lann/ps v0.0.0-20150810152359-62de8c46ede0 // indirect
|
||||||
|
@ -96,7 +97,6 @@ require (
|
||||||
github.com/pelletier/go-toml/v2 v2.1.0 // indirect
|
github.com/pelletier/go-toml/v2 v2.1.0 // indirect
|
||||||
github.com/pierrec/lz4/v4 v4.1.17 // indirect
|
github.com/pierrec/lz4/v4 v4.1.17 // indirect
|
||||||
github.com/prometheus/client_model v0.4.0 // indirect
|
github.com/prometheus/client_model v0.4.0 // indirect
|
||||||
github.com/prometheus/common v0.44.0 // indirect
|
|
||||||
github.com/prometheus/procfs v0.10.1 // indirect
|
github.com/prometheus/procfs v0.10.1 // indirect
|
||||||
github.com/segmentio/kafka-go v0.4.38 // indirect
|
github.com/segmentio/kafka-go v0.4.38 // indirect
|
||||||
github.com/spaolacci/murmur3 v1.1.0 // indirect
|
github.com/spaolacci/murmur3 v1.1.0 // indirect
|
||||||
|
|
4
go.sum
4
go.sum
|
@ -1037,8 +1037,8 @@ gitlink.org.cn/jcce-pcm/pcm-participant-ac v0.0.0-20231027084000-16876da5aa31 h1
|
||||||
gitlink.org.cn/jcce-pcm/pcm-participant-ac v0.0.0-20231027084000-16876da5aa31/go.mod h1:DY45tXlPBWBptj9YjCHWnAK5LshvJ33PjFkE5/vtd4o=
|
gitlink.org.cn/jcce-pcm/pcm-participant-ac v0.0.0-20231027084000-16876da5aa31/go.mod h1:DY45tXlPBWBptj9YjCHWnAK5LshvJ33PjFkE5/vtd4o=
|
||||||
gitlink.org.cn/jcce-pcm/pcm-participant-ceph v0.0.0-20230817103341-2459e5bfc835 h1:WDCPqD8IrepGJXankkpG14Ny6inh9AldB0RX9WWa+ck=
|
gitlink.org.cn/jcce-pcm/pcm-participant-ceph v0.0.0-20230817103341-2459e5bfc835 h1:WDCPqD8IrepGJXankkpG14Ny6inh9AldB0RX9WWa+ck=
|
||||||
gitlink.org.cn/jcce-pcm/pcm-participant-ceph v0.0.0-20230817103341-2459e5bfc835/go.mod h1:r/KLzUpupCV5jdxSfgDhc2pVjP0fBi3VhAWRttsBn30=
|
gitlink.org.cn/jcce-pcm/pcm-participant-ceph v0.0.0-20230817103341-2459e5bfc835/go.mod h1:r/KLzUpupCV5jdxSfgDhc2pVjP0fBi3VhAWRttsBn30=
|
||||||
gitlink.org.cn/jcce-pcm/pcm-participant-kubernetes v0.0.0-20230830120334-bf6d99c715ef h1:s7JfXjka2MhGaDjKMJ57fj0k3XuDB6w+UlYHFLyJlUY=
|
gitlink.org.cn/jcce-pcm/pcm-participant-kubernetes v0.0.0-20231027083610-c8a292768f4a h1:12PKPeDecCY7IfPp9JZmdMEIoJn6fF1oT4WW4ZKEUFM=
|
||||||
gitlink.org.cn/jcce-pcm/pcm-participant-kubernetes v0.0.0-20230830120334-bf6d99c715ef/go.mod h1:SFpXY1gy1ELYTo4P6EU68nTL2vKu1ZuH7nKecL16hJk=
|
gitlink.org.cn/jcce-pcm/pcm-participant-kubernetes v0.0.0-20231027083610-c8a292768f4a/go.mod h1:xtSfvDUd+jJhqHBBibZ1d9/ud3oN9nxaNYYHwz+CDgY=
|
||||||
gitlink.org.cn/jcce-pcm/pcm-participant-modelarts v0.0.0-20231024115530-f6fd0505d2a1 h1:4Ibzcl4waYiHO3tdbqvcLUWEoV51ZaJhZBi7T518AA8=
|
gitlink.org.cn/jcce-pcm/pcm-participant-modelarts v0.0.0-20231024115530-f6fd0505d2a1 h1:4Ibzcl4waYiHO3tdbqvcLUWEoV51ZaJhZBi7T518AA8=
|
||||||
gitlink.org.cn/jcce-pcm/pcm-participant-modelarts v0.0.0-20231024115530-f6fd0505d2a1/go.mod h1:pisJKAI8FRFFUcBaH3Gob+ENXWRM97rpuYmv9s1raag=
|
gitlink.org.cn/jcce-pcm/pcm-participant-modelarts v0.0.0-20231024115530-f6fd0505d2a1/go.mod h1:pisJKAI8FRFFUcBaH3Gob+ENXWRM97rpuYmv9s1raag=
|
||||||
gitlink.org.cn/jcce-pcm/pcm-participant-octopus v0.0.0-20231011071802-c6a7637b74e4 h1:iv78VZ5+j6/VNkEyD/GSmTJ96rpxzpKDUNknAoXsAmg=
|
gitlink.org.cn/jcce-pcm/pcm-participant-octopus v0.0.0-20231011071802-c6a7637b74e4 h1:iv78VZ5+j6/VNkEyD/GSmTJ96rpxzpKDUNknAoXsAmg=
|
||||||
|
|
|
@ -1,25 +1,11 @@
|
||||||
package tracker
|
package tracker
|
||||||
|
|
||||||
/*
|
import "time"
|
||||||
Copyright 2020 KubeSphere Authors
|
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
you may not use this file except in compliance with the License.
|
|
||||||
You may obtain a copy of the License at
|
|
||||||
|
|
||||||
http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
|
|
||||||
Unless required by applicable law or agreed to in writing, software
|
|
||||||
distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
See the License for the specific language governing permissions and
|
|
||||||
limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
type Interface interface {
|
type Interface interface {
|
||||||
//GetMetric(expr string, time time.Time) Metric
|
//GetMetric(expr string, time time.Time) Metric
|
||||||
//GetMetricOverTime(expr string, start, end time.Time, step time.Duration) Metric
|
//GetMetricOverTime(expr string, start, end time.Time, step time.Duration) Metric
|
||||||
//GetNamedMetrics(metrics []string, time time.Time, opt QueryOption) []Metric
|
GetNamedMetrics(metrics []string, time time.Time, opt QueryOption) []Metric
|
||||||
//GetNamedMetricsOverTime(metrics []string, start, end time.Time, step time.Duration, opt QueryOption) []Metric
|
//GetNamedMetricsOverTime(metrics []string, start, end time.Time, step time.Duration, opt QueryOption) []Metric
|
||||||
//GetMetadata(namespace string) []Metadata
|
//GetMetadata(namespace string) []Metadata
|
||||||
//GetMetricLabelSet(expr string, start, end time.Time) []map[string]string
|
//GetMetricLabelSet(expr string, start, end time.Time) []map[string]string
|
||||||
|
|
|
@ -0,0 +1,486 @@
|
||||||
|
package tracker
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"strings"
|
||||||
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
StatefulSet = "StatefulSet"
|
||||||
|
DaemonSet = "DaemonSet"
|
||||||
|
Deployment = "Deployment"
|
||||||
|
)
|
||||||
|
|
||||||
|
var promQLTemplates = map[string]string{
|
||||||
|
//cluster
|
||||||
|
"cluster_cpu_utilisation": ":node_cpu_utilisation:avg1m",
|
||||||
|
"cluster_cpu_usage": `round(:node_cpu_utilisation:avg1m * sum(node:node_num_cpu:sum), 0.001)`,
|
||||||
|
"cluster_cpu_total": "sum(node:node_num_cpu:sum)",
|
||||||
|
"cluster_memory_utilisation": ":node_memory_utilisation:",
|
||||||
|
"cluster_memory_available": "sum(node:node_memory_bytes_available:sum)",
|
||||||
|
"cluster_memory_total": "sum(node:node_memory_bytes_total:sum)",
|
||||||
|
"cluster_memory_usage_wo_cache": "sum(node:node_memory_bytes_total:sum) - sum(node:node_memory_bytes_available:sum)",
|
||||||
|
"cluster_net_utilisation": ":node_net_utilisation:sum_irate",
|
||||||
|
"cluster_net_bytes_transmitted": "sum(node:node_net_bytes_transmitted:sum_irate)",
|
||||||
|
"cluster_net_bytes_received": "sum(node:node_net_bytes_received:sum_irate)",
|
||||||
|
"cluster_disk_read_iops": "sum(node:data_volume_iops_reads:sum)",
|
||||||
|
"cluster_disk_write_iops": "sum(node:data_volume_iops_writes:sum)",
|
||||||
|
"cluster_disk_read_throughput": "sum(node:data_volume_throughput_bytes_read:sum)",
|
||||||
|
"cluster_disk_write_throughput": "sum(node:data_volume_throughput_bytes_written:sum)",
|
||||||
|
"cluster_disk_size_usage": `sum(max(node_filesystem_size_bytes{device=~"/dev/.*", device!~"/dev/loop\\d+", job="node-exporter"} - node_filesystem_avail_bytes{device=~"/dev/.*", device!~"/dev/loop\\d+", job="node-exporter"}) by (device, instance))`,
|
||||||
|
"cluster_disk_size_utilisation": `cluster:disk_utilization:ratio`,
|
||||||
|
"cluster_disk_size_capacity": `sum(max(node_filesystem_size_bytes{device=~"/dev/.*", device!~"/dev/loop\\d+", job="node-exporter"}) by (device, instance))`,
|
||||||
|
"cluster_disk_size_available": `sum(max(node_filesystem_avail_bytes{device=~"/dev/.*", device!~"/dev/loop\\d+", job="node-exporter"}) by (device, instance))`,
|
||||||
|
"cluster_disk_inode_total": `sum(node:node_inodes_total:)`,
|
||||||
|
"cluster_disk_inode_usage": `sum(node:node_inodes_total:) - sum(node:node_inodes_free:)`,
|
||||||
|
"cluster_disk_inode_utilisation": `cluster:disk_inode_utilization:ratio`,
|
||||||
|
"cluster_namespace_count": `count(kube_namespace_labels)`,
|
||||||
|
"cluster_pod_count": `cluster:pod:sum`,
|
||||||
|
"cluster_pod_quota": `sum(max(kube_node_status_capacity{resource="pods"}) by (node) unless on (node) (kube_node_status_condition{condition="Ready",status=~"unknown|false"} > 0))`,
|
||||||
|
"cluster_pod_utilisation": `cluster:pod_utilization:ratio`,
|
||||||
|
"cluster_pod_running_count": `cluster:pod_running:count`,
|
||||||
|
"cluster_pod_succeeded_count": `count(kube_pod_info unless on (pod) (kube_pod_status_phase{phase=~"Failed|Pending|Unknown|Running"} > 0) unless on (node) (kube_node_status_condition{condition="Ready",status=~"unknown|false"} > 0))`,
|
||||||
|
"cluster_pod_abnormal_count": `cluster:pod_abnormal:sum`,
|
||||||
|
"cluster_node_online": `sum(kube_node_status_condition{condition="Ready",status="true"})`,
|
||||||
|
"cluster_node_offline": `cluster:node_offline:sum`,
|
||||||
|
"cluster_node_total": `sum(kube_node_status_condition{condition="Ready"})`,
|
||||||
|
"cluster_cronjob_count": `sum(kube_cronjob_labels)`,
|
||||||
|
"cluster_pvc_count": `sum(kube_persistentvolumeclaim_info)`,
|
||||||
|
"cluster_daemonset_count": `sum(kube_daemonset_labels)`,
|
||||||
|
"cluster_deployment_count": `sum(kube_deployment_labels)`,
|
||||||
|
"cluster_endpoint_count": `sum(kube_endpoint_labels)`,
|
||||||
|
"cluster_hpa_count": `sum(kube_horizontalpodautoscaler_labels)`,
|
||||||
|
"cluster_job_count": `sum(kube_job_labels)`,
|
||||||
|
"cluster_statefulset_count": `sum(kube_statefulset_labels)`,
|
||||||
|
"cluster_replicaset_count": `count(kube_replicaset_labels)`,
|
||||||
|
"cluster_service_count": `sum(kube_service_info)`,
|
||||||
|
"cluster_secret_count": `sum(kube_secret_info)`,
|
||||||
|
"cluster_pv_count": `sum(kube_persistentvolume_labels)`,
|
||||||
|
"cluster_ingresses_extensions_count": `sum(kube_ingress_labels)`,
|
||||||
|
"cluster_load1": `sum(node_load1{job="node-exporter"}) / sum(node:node_num_cpu:sum)`,
|
||||||
|
"cluster_load5": `sum(node_load5{job="node-exporter"}) / sum(node:node_num_cpu:sum)`,
|
||||||
|
"cluster_load15": `sum(node_load15{job="node-exporter"}) / sum(node:node_num_cpu:sum)`,
|
||||||
|
"cluster_pod_abnormal_ratio": `cluster:pod_abnormal:ratio`,
|
||||||
|
"cluster_node_offline_ratio": `cluster:node_offline:ratio`,
|
||||||
|
|
||||||
|
//node
|
||||||
|
"node_cpu_utilisation": "node:node_cpu_utilisation:avg1m{$1}",
|
||||||
|
"node_cpu_total": "node:node_num_cpu:sum{$1}",
|
||||||
|
"node_memory_utilisation": "node:node_memory_utilisation:{$1}",
|
||||||
|
"node_memory_available": "node:node_memory_bytes_available:sum{$1}",
|
||||||
|
"node_memory_total": "node:node_memory_bytes_total:sum{$1}",
|
||||||
|
"node_memory_usage_wo_cache": "node:node_memory_bytes_total:sum{$1} - node:node_memory_bytes_available:sum{$1}",
|
||||||
|
"node_net_utilisation": "node:node_net_utilisation:sum_irate{$1}",
|
||||||
|
"node_net_bytes_transmitted": "node:node_net_bytes_transmitted:sum_irate{$1}",
|
||||||
|
"node_net_bytes_received": "node:node_net_bytes_received:sum_irate{$1}",
|
||||||
|
"node_disk_read_iops": "node:data_volume_iops_reads:sum{$1}",
|
||||||
|
"node_disk_write_iops": "node:data_volume_iops_writes:sum{$1}",
|
||||||
|
"node_disk_read_throughput": "node:data_volume_throughput_bytes_read:sum{$1}",
|
||||||
|
"node_disk_write_throughput": "node:data_volume_throughput_bytes_written:sum{$1}",
|
||||||
|
"node_disk_size_capacity": `sum(max(node_filesystem_size_bytes{device=~"/dev/.*", device!~"/dev/loop\\d+", job="node-exporter"} * on (namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:{$1}) by (device, node)) by (node)`,
|
||||||
|
"node_disk_size_available": `node:disk_space_available:{$1}`,
|
||||||
|
"node_disk_size_usage": `sum(max((node_filesystem_size_bytes{device=~"/dev/.*", device!~"/dev/loop\\d+", job="node-exporter"} - node_filesystem_avail_bytes{device=~"/dev/.*", device!~"/dev/loop\\d+", job="node-exporter"}) * on (namespace, pod) group_left(node) node_namespace_pod:kube_pod_info:{$1}) by (device, node)) by (node)`,
|
||||||
|
"node_disk_size_utilisation": `node:disk_space_utilization:ratio{$1}`,
|
||||||
|
"node_disk_inode_total": `node:node_inodes_total:{$1}`,
|
||||||
|
"node_disk_inode_usage": `node:node_inodes_total:{$1} - node:node_inodes_free:{$1}`,
|
||||||
|
"node_disk_inode_utilisation": `node:disk_inode_utilization:ratio{$1}`,
|
||||||
|
"node_pod_count": `node:pod_count:sum{$1}`,
|
||||||
|
"node_pod_quota": `max(kube_node_status_capacity{resource="pods",$1}) by (node) unless on (node) (kube_node_status_condition{condition="Ready",status=~"unknown|false"} > 0)`,
|
||||||
|
"node_pod_utilisation": `node:pod_utilization:ratio{$1}`,
|
||||||
|
"node_pod_running_count": `node:pod_running:count{$1}`,
|
||||||
|
"node_pod_succeeded_count": `node:pod_succeeded:count{$1}`,
|
||||||
|
"node_pod_abnormal_count": `node:pod_abnormal:count{$1}`,
|
||||||
|
"node_cpu_usage": `round(node:node_cpu_utilisation:avg1m{$1} * node:node_num_cpu:sum{$1}, 0.001)`,
|
||||||
|
"node_load1": `node:load1:ratio{$1}`,
|
||||||
|
"node_load5": `node:load5:ratio{$1}`,
|
||||||
|
"node_load15": `node:load15:ratio{$1}`,
|
||||||
|
"node_pod_abnormal_ratio": `node:pod_abnormal:ratio{$1}`,
|
||||||
|
"node_pleg_quantile": `node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile{$1}`,
|
||||||
|
|
||||||
|
"node_device_size_usage": `sum by(device, node, host_ip, role) (node_filesystem_size_bytes{device!~"/dev/loop\\d+",device=~"/dev/.*",job="node-exporter"} * on(namespace, pod) group_left(node, host_ip, role) node_namespace_pod:kube_pod_info:{$1}) - sum by(device, node, host_ip, role) (node_filesystem_avail_bytes{device!~"/dev/loop\\d+",device=~"/dev/.*",job="node-exporter"} * on(namespace, pod) group_left(node, host_ip, role) node_namespace_pod:kube_pod_info:{$1})`,
|
||||||
|
"node_device_size_utilisation": `1 - sum by(device, node, host_ip, role) (node_filesystem_avail_bytes{device!~"/dev/loop\\d+",device=~"/dev/.*",job="node-exporter"} * on(namespace, pod) group_left(node, host_ip, role) node_namespace_pod:kube_pod_info:{$1}) / sum by(device, node, host_ip, role) (node_filesystem_size_bytes{device!~"/dev/loop\\d+",device=~"/dev/.*",job="node-exporter"} * on(namespace, pod) group_left(node, host_ip, role) node_namespace_pod:kube_pod_info:{$1})`,
|
||||||
|
|
||||||
|
// workspace
|
||||||
|
"workspace_cpu_usage": `round(sum by (workspace) (namespace:container_cpu_usage_seconds_total:sum_rate{namespace!="", $1}), 0.001)`,
|
||||||
|
"workspace_memory_usage": `sum by (workspace) (namespace:container_memory_usage_bytes:sum{namespace!="", $1})`,
|
||||||
|
"workspace_memory_usage_wo_cache": `sum by (workspace) (namespace:container_memory_usage_bytes_wo_cache:sum{namespace!="", $1})`,
|
||||||
|
"workspace_net_bytes_transmitted": `sum by (workspace) (sum by (namespace) (irate(container_network_transmit_bytes_total{namespace!="", pod!="", interface!~"^(cali.+|tunl.+|dummy.+|kube.+|flannel.+|cni.+|docker.+|veth.+|lo.*)", job="kubelet"}[5m])) * on (namespace) group_left(workspace) kube_namespace_labels{$1}) or on(workspace) max by(workspace) (kube_namespace_labels{$1} * 0)`,
|
||||||
|
"workspace_net_bytes_received": `sum by (workspace) (sum by (namespace) (irate(container_network_receive_bytes_total{namespace!="", pod!="", interface!~"^(cali.+|tunl.+|dummy.+|kube.+|flannel.+|cni.+|docker.+|veth.+|lo.*)", job="kubelet"}[5m])) * on (namespace) group_left(workspace) kube_namespace_labels{$1}) or on(workspace) max by(workspace) (kube_namespace_labels{$1} * 0)`,
|
||||||
|
"workspace_pod_count": `sum by (workspace) (kube_pod_status_phase{phase!~"Failed|Succeeded", namespace!=""} * on (namespace) group_left(workspace)(kube_namespace_labels{$1})) or on(workspace) max by(workspace) (kube_namespace_labels{$1} * 0)`,
|
||||||
|
"workspace_pod_running_count": `sum by (workspace) (kube_pod_status_phase{phase="Running", namespace!=""} * on (namespace) group_left(workspace)(kube_namespace_labels{$1})) or on(workspace) max by(workspace) (kube_namespace_labels{$1} * 0)`,
|
||||||
|
"workspace_pod_succeeded_count": `sum by (workspace) (kube_pod_status_phase{phase="Succeeded", namespace!=""} * on (namespace) group_left(workspace)(kube_namespace_labels{$1})) or on(workspace) max by(workspace) (kube_namespace_labels{$1} * 0)`,
|
||||||
|
"workspace_pod_abnormal_count": `count by (workspace) ((kube_pod_info{node!=""} unless on (pod, namespace) (kube_pod_status_phase{job="kube-state-metrics", phase="Succeeded"}>0) unless on (pod, namespace) ((kube_pod_status_ready{job="kube-state-metrics", condition="true"}>0) and on (pod, namespace) (kube_pod_status_phase{job="kube-state-metrics", phase="Running"}>0)) unless on (pod, namespace) (kube_pod_container_status_waiting_reason{job="kube-state-metrics", reason="ContainerCreating"}>0)) * on (namespace) group_left(workspace)(kube_namespace_labels{$1}))`,
|
||||||
|
"workspace_ingresses_extensions_count": `sum by (workspace) (kube_ingress_labels{namespace!=""} * on (namespace) group_left(workspace)(kube_namespace_labels{$1}))`,
|
||||||
|
"workspace_cronjob_count": `sum by (workspace) (kube_cronjob_labels{namespace!=""} * on (namespace) group_left(workspace)(kube_namespace_labels{$1}))`,
|
||||||
|
"workspace_pvc_count": `sum by (workspace) (kube_persistentvolumeclaim_info{namespace!=""} * on (namespace) group_left(workspace)(kube_namespace_labels{$1}))`,
|
||||||
|
"workspace_daemonset_count": `sum by (workspace) (kube_daemonset_labels{namespace!=""} * on (namespace) group_left(workspace)(kube_namespace_labels{$1}))`,
|
||||||
|
"workspace_deployment_count": `sum by (workspace) (kube_deployment_labels{namespace!=""} * on (namespace) group_left(workspace)(kube_namespace_labels{$1}))`,
|
||||||
|
"workspace_endpoint_count": `sum by (workspace) (kube_endpoint_labels{namespace!=""} * on (namespace) group_left(workspace)(kube_namespace_labels{$1}))`,
|
||||||
|
"workspace_hpa_count": `sum by (workspace) (kube_horizontalpodautoscaler_labels{namespace!=""} * on (namespace) group_left(workspace)(kube_namespace_labels{$1}))`,
|
||||||
|
"workspace_job_count": `sum by (workspace) (kube_job_labels{namespace!=""} * on (namespace) group_left(workspace)(kube_namespace_labels{$1}))`,
|
||||||
|
"workspace_statefulset_count": `sum by (workspace) (kube_statefulset_labels{namespace!=""} * on (namespace) group_left(workspace)(kube_namespace_labels{$1}))`,
|
||||||
|
"workspace_replicaset_count": `count by (workspace) (kube_replicaset_labels{namespace!=""} * on (namespace) group_left(workspace)(kube_namespace_labels{$1}))`,
|
||||||
|
"workspace_service_count": `sum by (workspace) (kube_service_info{namespace!=""} * on (namespace) group_left(workspace)(kube_namespace_labels{$1}))`,
|
||||||
|
"workspace_secret_count": `sum by (workspace) (kube_secret_info{namespace!=""} * on (namespace) group_left(workspace)(kube_namespace_labels{$1}))`,
|
||||||
|
"workspace_pod_abnormal_ratio": `count by (workspace) ((kube_pod_info{node!=""} unless on (pod, namespace) (kube_pod_status_phase{job="kube-state-metrics", phase="Succeeded"}>0) unless on (pod, namespace) ((kube_pod_status_ready{job="kube-state-metrics", condition="true"}>0) and on (pod, namespace) (kube_pod_status_phase{job="kube-state-metrics", phase="Running"}>0)) unless on (pod, namespace) (kube_pod_container_status_waiting_reason{job="kube-state-metrics", reason="ContainerCreating"}>0)) * on (namespace) group_left(workspace) kube_namespace_labels{$1}) / sum by (workspace) (kube_pod_status_phase{phase!="Succeeded", namespace!=""} * on (namespace) group_left(workspace)(kube_namespace_labels{$1}))`,
|
||||||
|
|
||||||
|
//namespace
|
||||||
|
"namespace_cpu_usage": `round(namespace:container_cpu_usage_seconds_total:sum_rate{namespace!="", $1}, 0.001)`,
|
||||||
|
"namespace_memory_usage": `namespace:container_memory_usage_bytes:sum{namespace!="", $1}`,
|
||||||
|
"namespace_memory_usage_wo_cache": `namespace:container_memory_usage_bytes_wo_cache:sum{namespace!="", $1}`,
|
||||||
|
"namespace_net_bytes_transmitted": `sum by (namespace) (irate(container_network_transmit_bytes_total{namespace!="", pod!="", interface!~"^(cali.+|tunl.+|dummy.+|kube.+|flannel.+|cni.+|docker.+|veth.+|lo.*)", job="kubelet"}[5m]) * on (namespace) group_left(workspace) kube_namespace_labels{$1}) or on(namespace) max by(namespace) (kube_namespace_labels{$1} * 0)`,
|
||||||
|
"namespace_net_bytes_received": `sum by (namespace) (irate(container_network_receive_bytes_total{namespace!="", pod!="", interface!~"^(cali.+|tunl.+|dummy.+|kube.+|flannel.+|cni.+|docker.+|veth.+|lo.*)", job="kubelet"}[5m]) * on (namespace) group_left(workspace) kube_namespace_labels{$1}) or on(namespace) max by(namespace) (kube_namespace_labels{$1} * 0)`,
|
||||||
|
"namespace_pod_count": `sum by (namespace) (kube_pod_status_phase{phase!~"Failed|Succeeded", namespace!=""} * on (namespace) group_left(workspace) kube_namespace_labels{$1}) or on(namespace) max by(namespace) (kube_namespace_labels{$1} * 0)`,
|
||||||
|
"namespace_pod_running_count": `sum by (namespace) (kube_pod_status_phase{phase="Running", namespace!=""} * on (namespace) group_left(workspace) kube_namespace_labels{$1}) or on(namespace) max by(namespace) (kube_namespace_labels{$1} * 0)`,
|
||||||
|
"namespace_pod_succeeded_count": `sum by (namespace) (kube_pod_status_phase{phase="Succeeded", namespace!=""} * on (namespace) group_left(workspace) kube_namespace_labels{$1}) or on(namespace) max by(namespace) (kube_namespace_labels{$1} * 0)`,
|
||||||
|
"namespace_pod_abnormal_count": `namespace:pod_abnormal:count{namespace!="", $1}`,
|
||||||
|
"namespace_pod_abnormal_ratio": `namespace:pod_abnormal:ratio{namespace!="", $1}`,
|
||||||
|
"namespace_memory_limit_hard": `min by (namespace) (kube_resourcequota{resourcequota!="quota", type="hard", namespace!="", resource="limits.memory"} * on (namespace) group_left(workspace) kube_namespace_labels{$1})`,
|
||||||
|
"namespace_cpu_limit_hard": `min by (namespace) (kube_resourcequota{resourcequota!="quota", type="hard", namespace!="", resource="limits.cpu"} * on (namespace) group_left(workspace) kube_namespace_labels{$1})`,
|
||||||
|
"namespace_pod_count_hard": `min by (namespace) (kube_resourcequota{resourcequota!="quota", type="hard", namespace!="", resource="count/pods"} * on (namespace) group_left(workspace) kube_namespace_labels{$1})`,
|
||||||
|
"namespace_cronjob_count": `sum by (namespace) (kube_cronjob_labels{namespace!=""} * on (namespace) group_left(workspace) kube_namespace_labels{$1})`,
|
||||||
|
"namespace_pvc_count": `sum by (namespace) (kube_persistentvolumeclaim_info{namespace!=""} * on (namespace) group_left(workspace) kube_namespace_labels{$1})`,
|
||||||
|
"namespace_daemonset_count": `sum by (namespace) (kube_daemonset_labels{namespace!=""} * on (namespace) group_left(workspace) kube_namespace_labels{$1})`,
|
||||||
|
"namespace_deployment_count": `sum by (namespace) (kube_deployment_labels{namespace!=""} * on (namespace) group_left(workspace) kube_namespace_labels{$1})`,
|
||||||
|
"namespace_endpoint_count": `sum by (namespace) (kube_endpoint_labels{namespace!=""} * on (namespace) group_left(workspace) kube_namespace_labels{$1})`,
|
||||||
|
"namespace_hpa_count": `sum by (namespace) (kube_horizontalpodautoscaler_labels{namespace!=""} * on (namespace) group_left(workspace) kube_namespace_labels{$1})`,
|
||||||
|
"namespace_job_count": `sum by (namespace) (kube_job_labels{namespace!=""} * on (namespace) group_left(workspace) kube_namespace_labels{$1})`,
|
||||||
|
"namespace_statefulset_count": `sum by (namespace) (kube_statefulset_labels{namespace!=""} * on (namespace) group_left(workspace) kube_namespace_labels{$1})`,
|
||||||
|
"namespace_replicaset_count": `count by (namespace) (kube_replicaset_labels{namespace!=""} * on (namespace) group_left(workspace) kube_namespace_labels{$1})`,
|
||||||
|
"namespace_service_count": `sum by (namespace) (kube_service_info{namespace!=""} * on (namespace) group_left(workspace) kube_namespace_labels{$1})`,
|
||||||
|
"namespace_secret_count": `sum by (namespace) (kube_secret_info{namespace!=""} * on (namespace) group_left(workspace) kube_namespace_labels{$1})`,
|
||||||
|
"namespace_configmap_count": `sum by (namespace) (kube_configmap_info{namespace!=""} * on (namespace) group_left(workspace) kube_namespace_labels{$1})`,
|
||||||
|
"namespace_ingresses_extensions_count": `sum by (namespace) (kube_ingress_labels{namespace!=""} * on (namespace) group_left(workspace) kube_namespace_labels{$1})`,
|
||||||
|
"namespace_s2ibuilder_count": `sum by (namespace) (s2i_s2ibuilder_created{namespace!=""} * on (namespace) group_left(workspace) kube_namespace_labels{$1})`,
|
||||||
|
|
||||||
|
// ingress
|
||||||
|
"ingress_request_count": `round(sum(increase(nginx_ingress_controller_requests{$1,$2}[$3])))`,
|
||||||
|
"ingress_request_4xx_count": `round(sum(increase(nginx_ingress_controller_requests{$1,$2,status=~"[4].*"}[$3])))`,
|
||||||
|
"ingress_request_5xx_count": `round(sum(increase(nginx_ingress_controller_requests{$1,$2,status=~"[5].*"}[$3])))`,
|
||||||
|
"ingress_active_connections": `sum(avg_over_time(nginx_ingress_controller_nginx_process_connections{$2,state="active"}[$3]))`,
|
||||||
|
"ingress_success_rate": `sum(rate(nginx_ingress_controller_requests{$1,$2,status!~"[4-5].*"}[$3])) / sum(rate(nginx_ingress_controller_requests{$1,$2}[$3]))`,
|
||||||
|
"ingress_request_duration_average": `sum_over_time(nginx_ingress_controller_request_duration_seconds_sum{$1,$2}[$3])/sum_over_time(nginx_ingress_controller_request_duration_seconds_count{$1,$2}[$3])`,
|
||||||
|
"ingress_request_duration_50percentage": `histogram_quantile(0.50, sum by (le) (rate(nginx_ingress_controller_request_duration_seconds_bucket{$1,$2}[$3])))`,
|
||||||
|
"ingress_request_duration_95percentage": `histogram_quantile(0.95, sum by (le) (rate(nginx_ingress_controller_request_duration_seconds_bucket{$1,$2}[$3])))`,
|
||||||
|
"ingress_request_duration_99percentage": `histogram_quantile(0.99, sum by (le) (rate(nginx_ingress_controller_request_duration_seconds_bucket{$1,$2}[$3])))`,
|
||||||
|
"ingress_request_volume": `round(sum(irate(nginx_ingress_controller_requests{$1,$2}[$3])), 0.001)`,
|
||||||
|
"ingress_request_volume_by_ingress": `round(sum(irate(nginx_ingress_controller_requests{$1,$2}[$3])) by (ingress), 0.001)`,
|
||||||
|
"ingress_request_network_sent": `sum(irate(nginx_ingress_controller_response_size_sum{$1,$2}[$3]))`,
|
||||||
|
"ingress_request_network_received": `sum(irate(nginx_ingress_controller_request_size_sum{$1,$2}[$3]))`,
|
||||||
|
"ingress_request_memory_bytes": `avg(nginx_ingress_controller_nginx_process_resident_memory_bytes{$2})`,
|
||||||
|
"ingress_request_cpu_usage": `avg(rate(nginx_ingress_controller_nginx_process_cpu_seconds_total{$2}[5m]))`,
|
||||||
|
|
||||||
|
// workload
|
||||||
|
"workload_cpu_usage": `round(namespace:workload_cpu_usage:sum{$1}, 0.001)`,
|
||||||
|
"workload_memory_usage": `namespace:workload_memory_usage:sum{$1}`,
|
||||||
|
"workload_memory_usage_wo_cache": `namespace:workload_memory_usage_wo_cache:sum{$1}`,
|
||||||
|
"workload_net_bytes_transmitted": `namespace:workload_net_bytes_transmitted:sum_irate{$1}`,
|
||||||
|
"workload_net_bytes_received": `namespace:workload_net_bytes_received:sum_irate{$1}`,
|
||||||
|
|
||||||
|
"workload_deployment_replica": `label_join(sum (label_join(label_replace(kube_deployment_spec_replicas{$2}, "owner_kind", "Deployment", "", ""), "workload", "", "deployment")) by (namespace, owner_kind, workload), "workload", ":", "owner_kind", "workload")`,
|
||||||
|
"workload_deployment_replica_available": `label_join(sum (label_join(label_replace(kube_deployment_status_replicas_available{$2}, "owner_kind", "Deployment", "", ""), "workload", "", "deployment")) by (namespace, owner_kind, workload), "workload", ":", "owner_kind", "workload")`,
|
||||||
|
"workload_statefulset_replica": `label_join(sum (label_join(label_replace(kube_statefulset_replicas{$2}, "owner_kind", "StatefulSet", "", ""), "workload", "", "statefulset")) by (namespace, owner_kind, workload), "workload", ":", "owner_kind", "workload")`,
|
||||||
|
"workload_statefulset_replica_available": `label_join(sum (label_join(label_replace(kube_statefulset_status_replicas_current{$2}, "owner_kind", "StatefulSet", "", ""), "workload", "", "statefulset")) by (namespace, owner_kind, workload), "workload", ":", "owner_kind", "workload")`,
|
||||||
|
"workload_daemonset_replica": `label_join(sum (label_join(label_replace(kube_daemonset_status_desired_number_scheduled{$2}, "owner_kind", "DaemonSet", "", ""), "workload", "", "daemonset")) by (namespace, owner_kind, workload), "workload", ":", "owner_kind", "workload")`,
|
||||||
|
"workload_daemonset_replica_available": `label_join(sum (label_join(label_replace(kube_daemonset_status_number_available{$2}, "owner_kind", "DaemonSet", "", ""), "workload", "", "daemonset")) by (namespace, owner_kind, workload), "workload", ":", "owner_kind", "workload")`,
|
||||||
|
"workload_deployment_unavailable_replicas_ratio": `namespace:deployment_unavailable_replicas:ratio{$1}`,
|
||||||
|
"workload_daemonset_unavailable_replicas_ratio": `namespace:daemonset_unavailable_replicas:ratio{$1}`,
|
||||||
|
"workload_statefulset_unavailable_replicas_ratio": `namespace:statefulset_unavailable_replicas:ratio{$1}`,
|
||||||
|
|
||||||
|
// pod
|
||||||
|
"pod_cpu_usage": `round(sum by (namespace, pod) (irate(container_cpu_usage_seconds_total{job="kubelet", pod!="", image!=""}[5m])) * on (namespace, pod) group_left(owner_kind, owner_name) kube_pod_owner{$1} * on (namespace, pod) group_left(node) kube_pod_info{$2}, 0.001)`,
|
||||||
|
"pod_memory_usage": `sum by (namespace, pod) (container_memory_usage_bytes{job="kubelet", pod!="", image!=""}) * on (namespace, pod) group_left(owner_kind, owner_name) kube_pod_owner{$1} * on (namespace, pod) group_left(node) kube_pod_info{$2}`,
|
||||||
|
"pod_memory_usage_wo_cache": `sum by (namespace, pod) (container_memory_working_set_bytes{job="kubelet", pod!="", image!=""}) * on (namespace, pod) group_left(owner_kind, owner_name) kube_pod_owner{$1} * on (namespace, pod) group_left(node) kube_pod_info{$2}`,
|
||||||
|
"pod_net_bytes_transmitted": `sum by (namespace, pod) (irate(container_network_transmit_bytes_total{pod!="", interface!~"^(cali.+|tunl.+|dummy.+|kube.+|flannel.+|cni.+|docker.+|veth.+|lo.*)", job="kubelet"}[5m])) * on (namespace, pod) group_left(owner_kind, owner_name) kube_pod_owner{$1} * on (namespace, pod) group_left(node) kube_pod_info{$2}`,
|
||||||
|
"pod_net_bytes_received": `sum by (namespace, pod) (irate(container_network_receive_bytes_total{pod!="", interface!~"^(cali.+|tunl.+|dummy.+|kube.+|flannel.+|cni.+|docker.+|veth.+|lo.*)", job="kubelet"}[5m])) * on (namespace, pod) group_left(owner_kind, owner_name) kube_pod_owner{$1} * on (namespace, pod) group_left(node) kube_pod_info{$2}`,
|
||||||
|
"pod_cpu_resource_limits": `sum by (namespace, pod) (kube_pod_container_resource_limits{origin_prometheus=~"",resource="cpu",unit="core"}) * on (namespace, pod) group_left(owner_kind, owner_name) kube_pod_owner{$1} * on (namespace, pod) group_left(node) kube_pod_info{$2}`,
|
||||||
|
"pod_memory_resource_limits": `sum by (namespace, pod) (kube_pod_container_resource_limits{origin_prometheus=~"",resource="memory",unit="byte"}) * on (namespace, pod) group_left(owner_kind, owner_name) kube_pod_owner{$1} * on (namespace, pod) group_left(node) kube_pod_info{$2}`,
|
||||||
|
|
||||||
|
// container
|
||||||
|
"container_cpu_usage": `round(sum by (namespace, pod, container) (irate(container_cpu_usage_seconds_total{job="kubelet", container!="POD", container!="", image!="", $1}[5m])), 0.001)`,
|
||||||
|
"container_memory_usage": `sum by (namespace, pod, container) (container_memory_usage_bytes{job="kubelet", container!="POD", container!="", image!="", $1})`,
|
||||||
|
"container_memory_usage_wo_cache": `sum by (namespace, pod, container) (container_memory_working_set_bytes{job="kubelet", container!="POD", container!="", image!="", $1})`,
|
||||||
|
"container_processes_usage": `sum by (namespace, pod, container) (container_processes{job="kubelet", container!="POD", container!="", image!="", $1})`,
|
||||||
|
"container_threads_usage": `sum by (namespace, pod, container) (container_threads {job="kubelet", container!="POD", container!="", image!="", $1})`,
|
||||||
|
|
||||||
|
// pvc
|
||||||
|
"pvc_inodes_available": `max by (namespace, persistentvolumeclaim) (kubelet_volume_stats_inodes_free) * on (namespace, persistentvolumeclaim) group_left (storageclass) kube_persistentvolumeclaim_info{$1}`,
|
||||||
|
"pvc_inodes_used": `max by (namespace, persistentvolumeclaim) (kubelet_volume_stats_inodes_used) * on (namespace, persistentvolumeclaim) group_left (storageclass) kube_persistentvolumeclaim_info{$1}`,
|
||||||
|
"pvc_inodes_total": `max by (namespace, persistentvolumeclaim) (kubelet_volume_stats_inodes) * on (namespace, persistentvolumeclaim) group_left (storageclass) kube_persistentvolumeclaim_info{$1}`,
|
||||||
|
"pvc_inodes_utilisation": `max by (namespace, persistentvolumeclaim) (kubelet_volume_stats_inodes_used / kubelet_volume_stats_inodes) * on (namespace, persistentvolumeclaim) group_left (storageclass) kube_persistentvolumeclaim_info{$1}`,
|
||||||
|
"pvc_bytes_available": `max by (namespace, persistentvolumeclaim) (kubelet_volume_stats_available_bytes) * on (namespace, persistentvolumeclaim) group_left (storageclass) kube_persistentvolumeclaim_info{$1}`,
|
||||||
|
"pvc_bytes_used": `max by (namespace, persistentvolumeclaim) (kubelet_volume_stats_used_bytes) * on (namespace, persistentvolumeclaim) group_left (storageclass) kube_persistentvolumeclaim_info{$1}`,
|
||||||
|
"pvc_bytes_total": `max by (namespace, persistentvolumeclaim) (kubelet_volume_stats_capacity_bytes) * on (namespace, persistentvolumeclaim) group_left (storageclass) kube_persistentvolumeclaim_info{$1}`,
|
||||||
|
"pvc_bytes_utilisation": `max by (namespace, persistentvolumeclaim) (kubelet_volume_stats_used_bytes / kubelet_volume_stats_capacity_bytes) * on (namespace, persistentvolumeclaim) group_left (storageclass) kube_persistentvolumeclaim_info{$1}`,
|
||||||
|
|
||||||
|
// component
|
||||||
|
"etcd_server_list": `label_replace(up{job="etcd"}, "node_ip", "$1", "instance", "(.*):.*")`,
|
||||||
|
"etcd_server_total": `count(up{job="etcd"})`,
|
||||||
|
"etcd_server_up_total": `etcd:up:sum`,
|
||||||
|
"etcd_server_has_leader": `label_replace(etcd_server_has_leader, "node_ip", "$1", "instance", "(.*):.*")`,
|
||||||
|
"etcd_server_is_leader": `label_replace(etcd_server_is_leader, "node_ip", "$1", "instance", "(.*):.*")`,
|
||||||
|
"etcd_server_leader_changes": `label_replace(etcd:etcd_server_leader_changes_seen:sum_changes, "node_ip", "$1", "node", "(.*)")`,
|
||||||
|
"etcd_server_proposals_failed_rate": `avg(etcd:etcd_server_proposals_failed:sum_irate)`,
|
||||||
|
"etcd_server_proposals_applied_rate": `avg(etcd:etcd_server_proposals_applied:sum_irate)`,
|
||||||
|
"etcd_server_proposals_committed_rate": `avg(etcd:etcd_server_proposals_committed:sum_irate)`,
|
||||||
|
"etcd_server_proposals_pending_count": `avg(etcd:etcd_server_proposals_pending:sum)`,
|
||||||
|
"etcd_mvcc_db_size": `avg(etcd:etcd_mvcc_db_total_size:sum)`,
|
||||||
|
"etcd_network_client_grpc_received_bytes": `sum(etcd:etcd_network_client_grpc_received_bytes:sum_irate)`,
|
||||||
|
"etcd_network_client_grpc_sent_bytes": `sum(etcd:etcd_network_client_grpc_sent_bytes:sum_irate)`,
|
||||||
|
"etcd_grpc_call_rate": `sum(etcd:grpc_server_started:sum_irate)`,
|
||||||
|
"etcd_grpc_call_failed_rate": `sum(etcd:grpc_server_handled:sum_irate)`,
|
||||||
|
"etcd_grpc_server_msg_received_rate": `sum(etcd:grpc_server_msg_received:sum_irate)`,
|
||||||
|
"etcd_grpc_server_msg_sent_rate": `sum(etcd:grpc_server_msg_sent:sum_irate)`,
|
||||||
|
"etcd_disk_wal_fsync_duration": `avg(etcd:etcd_disk_wal_fsync_duration:avg)`,
|
||||||
|
"etcd_disk_wal_fsync_duration_quantile": `avg(etcd:etcd_disk_wal_fsync_duration:histogram_quantile) by (quantile)`,
|
||||||
|
"etcd_disk_backend_commit_duration": `avg(etcd:etcd_disk_backend_commit_duration:avg)`,
|
||||||
|
"etcd_disk_backend_commit_duration_quantile": `avg(etcd:etcd_disk_backend_commit_duration:histogram_quantile) by (quantile)`,
|
||||||
|
|
||||||
|
"apiserver_up_sum": `apiserver:up:sum`,
|
||||||
|
"apiserver_request_rate": `apiserver:apiserver_request_total:sum_irate`,
|
||||||
|
"apiserver_request_by_verb_rate": `apiserver:apiserver_request_total:sum_verb_irate`,
|
||||||
|
"apiserver_request_latencies": `apiserver:apiserver_request_duration:avg`,
|
||||||
|
"apiserver_request_by_verb_latencies": `apiserver:apiserver_request_duration:avg_by_verb`,
|
||||||
|
|
||||||
|
"scheduler_up_sum": `scheduler:up:sum`,
|
||||||
|
"scheduler_schedule_attempts": `scheduler:scheduler_schedule_attempts:sum`,
|
||||||
|
"scheduler_schedule_attempt_rate": `scheduler:scheduler_schedule_attempts:sum_rate`,
|
||||||
|
"scheduler_e2e_scheduling_latency": `scheduler:scheduler_e2e_scheduling_duration:avg`,
|
||||||
|
"scheduler_e2e_scheduling_latency_quantile": `scheduler:scheduler_e2e_scheduling_duration:histogram_quantile`,
|
||||||
|
}
|
||||||
|
|
||||||
|
func makeExpr(metric string, opts QueryOptions) string {
|
||||||
|
tmpl := promQLTemplates[metric]
|
||||||
|
switch opts.Level {
|
||||||
|
case LevelCluster:
|
||||||
|
return tmpl
|
||||||
|
case LevelNode:
|
||||||
|
return makeNodeMetricExpr(tmpl, opts)
|
||||||
|
case LevelWorkspace:
|
||||||
|
return makeWorkspaceMetricExpr(tmpl, opts)
|
||||||
|
case LevelNamespace:
|
||||||
|
return makeNamespaceMetricExpr(tmpl, opts)
|
||||||
|
case LevelWorkload:
|
||||||
|
return makeWorkloadMetricExpr(metric, tmpl, opts)
|
||||||
|
case LevelPod:
|
||||||
|
return makePodMetricExpr(tmpl, opts)
|
||||||
|
case LevelContainer:
|
||||||
|
return makeContainerMetricExpr(tmpl, opts)
|
||||||
|
case LevelPVC:
|
||||||
|
return makePVCMetricExpr(tmpl, opts)
|
||||||
|
case LevelIngress:
|
||||||
|
return makeIngressMetricExpr(tmpl, opts)
|
||||||
|
case LevelComponent:
|
||||||
|
return tmpl
|
||||||
|
default:
|
||||||
|
return tmpl
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func makeNodeMetricExpr(tmpl string, o QueryOptions) string {
|
||||||
|
var nodeSelector string
|
||||||
|
if o.NodeName != "" {
|
||||||
|
nodeSelector = fmt.Sprintf(`node="%s"`, o.NodeName)
|
||||||
|
} else {
|
||||||
|
nodeSelector = fmt.Sprintf(`node=~"%s"`, o.ResourceFilter)
|
||||||
|
}
|
||||||
|
return strings.Replace(tmpl, "$1", nodeSelector, -1)
|
||||||
|
}
|
||||||
|
|
||||||
|
func makeWorkspaceMetricExpr(tmpl string, o QueryOptions) string {
|
||||||
|
var workspaceSelector string
|
||||||
|
if o.WorkspaceName != "" {
|
||||||
|
workspaceSelector = fmt.Sprintf(`workspace="%s"`, o.WorkspaceName)
|
||||||
|
} else {
|
||||||
|
workspaceSelector = fmt.Sprintf(`workspace=~"%s", workspace!=""`, o.ResourceFilter)
|
||||||
|
}
|
||||||
|
return strings.Replace(tmpl, "$1", workspaceSelector, -1)
|
||||||
|
}
|
||||||
|
|
||||||
|
func makeNamespaceMetricExpr(tmpl string, o QueryOptions) string {
|
||||||
|
var namespaceSelector string
|
||||||
|
|
||||||
|
// For monitoring namespaces in the specific workspace
|
||||||
|
// GET /workspaces/{workspace}/namespaces
|
||||||
|
if o.WorkspaceName != "" {
|
||||||
|
namespaceSelector = fmt.Sprintf(`workspace="%s", namespace=~"%s"`, o.WorkspaceName, o.ResourceFilter)
|
||||||
|
return strings.Replace(tmpl, "$1", namespaceSelector, -1)
|
||||||
|
}
|
||||||
|
|
||||||
|
// For monitoring the specific namespaces
|
||||||
|
// GET /namespaces/{namespace} or
|
||||||
|
// GET /namespaces
|
||||||
|
if o.NamespaceName != "" {
|
||||||
|
namespaceSelector = fmt.Sprintf(`namespace="%s"`, o.NamespaceName)
|
||||||
|
} else {
|
||||||
|
namespaceSelector = fmt.Sprintf(`namespace=~"%s"`, o.ResourceFilter)
|
||||||
|
}
|
||||||
|
return strings.Replace(tmpl, "$1", namespaceSelector, -1)
|
||||||
|
}
|
||||||
|
|
||||||
|
func makeWorkloadMetricExpr(metric, tmpl string, o QueryOptions) string {
|
||||||
|
var kindSelector, workloadSelector string
|
||||||
|
|
||||||
|
switch o.WorkloadKind {
|
||||||
|
case "deployment":
|
||||||
|
o.WorkloadKind = Deployment
|
||||||
|
case "statefulset":
|
||||||
|
o.WorkloadKind = StatefulSet
|
||||||
|
case "daemonset":
|
||||||
|
o.WorkloadKind = DaemonSet
|
||||||
|
default:
|
||||||
|
o.WorkloadKind = ".*"
|
||||||
|
}
|
||||||
|
workloadSelector = fmt.Sprintf(`namespace="%s", workload=~"%s:(%s)"`, o.NamespaceName, o.WorkloadKind, o.ResourceFilter)
|
||||||
|
|
||||||
|
if strings.Contains(metric, "deployment") {
|
||||||
|
kindSelector = fmt.Sprintf(`namespace="%s", deployment!="", deployment=~"%s"`, o.NamespaceName, o.ResourceFilter)
|
||||||
|
}
|
||||||
|
if strings.Contains(metric, "statefulset") {
|
||||||
|
kindSelector = fmt.Sprintf(`namespace="%s", statefulset!="", statefulset=~"%s"`, o.NamespaceName, o.ResourceFilter)
|
||||||
|
}
|
||||||
|
if strings.Contains(metric, "daemonset") {
|
||||||
|
kindSelector = fmt.Sprintf(`namespace="%s", daemonset!="", daemonset=~"%s"`, o.NamespaceName, o.ResourceFilter)
|
||||||
|
}
|
||||||
|
|
||||||
|
return strings.NewReplacer("$1", workloadSelector, "$2", kindSelector).Replace(tmpl)
|
||||||
|
}
|
||||||
|
|
||||||
|
func makePodMetricExpr(tmpl string, o QueryOptions) string {
|
||||||
|
var podSelector, workloadSelector string
|
||||||
|
|
||||||
|
// For monitoriong pods of the specific workload
|
||||||
|
// GET /namespaces/{namespace}/workloads/{kind}/{workload}/pods
|
||||||
|
if o.WorkloadName != "" {
|
||||||
|
switch o.WorkloadKind {
|
||||||
|
case "deployment":
|
||||||
|
workloadSelector = fmt.Sprintf(`owner_kind="ReplicaSet", owner_name=~"^%s-[^-]{1,10}$"`, o.WorkloadName)
|
||||||
|
case "statefulset":
|
||||||
|
workloadSelector = fmt.Sprintf(`owner_kind="StatefulSet", owner_name="%s"`, o.WorkloadName)
|
||||||
|
case "daemonset":
|
||||||
|
workloadSelector = fmt.Sprintf(`owner_kind="DaemonSet", owner_name="%s"`, o.WorkloadName)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// For monitoring pods in the specific namespace
|
||||||
|
// GET /namespaces/{namespace}/workloads/{kind}/{workload}/pods or
|
||||||
|
// GET /namespaces/{namespace}/pods/{pod} or
|
||||||
|
// GET /namespaces/{namespace}/pods
|
||||||
|
if o.NamespaceName != "" {
|
||||||
|
if o.PodName != "" {
|
||||||
|
podSelector = fmt.Sprintf(`pod="%s", namespace="%s"`, o.PodName, o.NamespaceName)
|
||||||
|
} else {
|
||||||
|
podSelector = fmt.Sprintf(`pod=~"%s", namespace="%s"`, o.ResourceFilter, o.NamespaceName)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
var namespaces, pods []string
|
||||||
|
if o.NamespacedResourcesFilter != "" {
|
||||||
|
for _, np := range strings.Split(o.NamespacedResourcesFilter, "|") {
|
||||||
|
if nparr := strings.SplitN(np, "/", 2); len(nparr) > 1 {
|
||||||
|
namespaces = append(namespaces, nparr[0])
|
||||||
|
pods = append(pods, nparr[1])
|
||||||
|
} else {
|
||||||
|
pods = append(pods, np)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// For monitoring pods on the specific node
|
||||||
|
// GET /nodes/{node}/pods/{pod}
|
||||||
|
// GET /nodes/{node}/pods
|
||||||
|
if o.NodeName != "" {
|
||||||
|
if o.PodName != "" {
|
||||||
|
if nparr := strings.SplitN(o.PodName, "/", 2); len(nparr) > 1 {
|
||||||
|
podSelector = fmt.Sprintf(`namespace="%s",pod="%s", node="%s"`, nparr[0], nparr[1], o.NodeName)
|
||||||
|
} else {
|
||||||
|
podSelector = fmt.Sprintf(`pod="%s", node="%s"`, o.PodName, o.NodeName)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
var ps []string
|
||||||
|
ps = append(ps, fmt.Sprintf(`node="%s"`, o.NodeName))
|
||||||
|
if o.ResourceFilter != "" {
|
||||||
|
ps = append(ps, fmt.Sprintf(`pod=~"%s"`, o.ResourceFilter))
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(namespaces) > 0 {
|
||||||
|
ps = append(ps, fmt.Sprintf(`namespace=~"%s"`, strings.Join(namespaces, "|")))
|
||||||
|
}
|
||||||
|
if len(pods) > 0 {
|
||||||
|
ps = append(ps, fmt.Sprintf(`pod=~"%s"`, strings.Join(pods, "|")))
|
||||||
|
}
|
||||||
|
podSelector = strings.Join(ps, ",")
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// For monitoring pods in the whole cluster
|
||||||
|
// Get /pods
|
||||||
|
var ps []string
|
||||||
|
if len(namespaces) > 0 {
|
||||||
|
ps = append(ps, fmt.Sprintf(`namespace=~"%s"`, strings.Join(namespaces, "|")))
|
||||||
|
}
|
||||||
|
if len(pods) > 0 {
|
||||||
|
ps = append(ps, fmt.Sprintf(`pod=~"%s"`, strings.Join(pods, "|")))
|
||||||
|
}
|
||||||
|
if len(ps) > 0 {
|
||||||
|
podSelector = strings.Join(ps, ",")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return strings.NewReplacer("$1", workloadSelector, "$2", podSelector).Replace(tmpl)
|
||||||
|
}
|
||||||
|
|
||||||
|
func makeContainerMetricExpr(tmpl string, o QueryOptions) string {
|
||||||
|
var containerSelector string
|
||||||
|
if o.ContainerName != "" {
|
||||||
|
containerSelector = fmt.Sprintf(`pod="%s", namespace="%s", container="%s"`, o.PodName, o.NamespaceName, o.ContainerName)
|
||||||
|
} else {
|
||||||
|
containerSelector = fmt.Sprintf(`pod="%s", namespace="%s", container=~"%s"`, o.PodName, o.NamespaceName, o.ResourceFilter)
|
||||||
|
}
|
||||||
|
return strings.Replace(tmpl, "$1", containerSelector, -1)
|
||||||
|
}
|
||||||
|
|
||||||
|
func makePVCMetricExpr(tmpl string, o QueryOptions) string {
|
||||||
|
var pvcSelector string
|
||||||
|
|
||||||
|
// For monitoring persistentvolumeclaims in the specific namespace
|
||||||
|
// GET /namespaces/{namespace}/persistentvolumeclaims/{persistentvolumeclaim} or
|
||||||
|
// GET /namespaces/{namespace}/persistentvolumeclaims
|
||||||
|
if o.NamespaceName != "" {
|
||||||
|
if o.PersistentVolumeClaimName != "" {
|
||||||
|
pvcSelector = fmt.Sprintf(`namespace="%s", persistentvolumeclaim="%s"`, o.NamespaceName, o.PersistentVolumeClaimName)
|
||||||
|
} else {
|
||||||
|
pvcSelector = fmt.Sprintf(`namespace="%s", persistentvolumeclaim=~"%s"`, o.NamespaceName, o.ResourceFilter)
|
||||||
|
}
|
||||||
|
return strings.Replace(tmpl, "$1", pvcSelector, -1)
|
||||||
|
}
|
||||||
|
|
||||||
|
// For monitoring persistentvolumeclaims of the specific storageclass
|
||||||
|
// GET /storageclasses/{storageclass}/persistentvolumeclaims
|
||||||
|
if o.StorageClassName != "" {
|
||||||
|
pvcSelector = fmt.Sprintf(`storageclass="%s", persistentvolumeclaim=~"%s"`, o.StorageClassName, o.ResourceFilter)
|
||||||
|
}
|
||||||
|
return strings.Replace(tmpl, "$1", pvcSelector, -1)
|
||||||
|
}
|
||||||
|
|
||||||
|
func makeIngressMetricExpr(tmpl string, o QueryOptions) string {
|
||||||
|
var ingressSelector string
|
||||||
|
var jobSelector string
|
||||||
|
duration := "5m"
|
||||||
|
|
||||||
|
// parse Range Vector Selectors metric{key=value}[duration]
|
||||||
|
if o.Duration != nil {
|
||||||
|
duration = o.Duration.String()
|
||||||
|
}
|
||||||
|
|
||||||
|
// job is a reqiuried filter
|
||||||
|
// GET /namespaces/{namespace}/ingress?job=xxx&pod=xxx
|
||||||
|
if o.Job != "" {
|
||||||
|
jobSelector = fmt.Sprintf(`job="%s"`, o.Job)
|
||||||
|
if o.PodName != "" {
|
||||||
|
jobSelector = fmt.Sprintf(`%s,controller_pod="%s"`, jobSelector, o.PodName)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
tmpl = strings.Replace(tmpl, "$1", ingressSelector, -1)
|
||||||
|
tmpl = strings.Replace(tmpl, "$2", jobSelector, -1)
|
||||||
|
return strings.Replace(tmpl, "$3", duration, -1)
|
||||||
|
}
|
|
@ -0,0 +1,318 @@
|
||||||
|
package tracker
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
type Level int
|
||||||
|
|
||||||
|
const (
|
||||||
|
LevelCluster = 1 << iota
|
||||||
|
LevelNode
|
||||||
|
LevelWorkspace
|
||||||
|
LevelNamespace
|
||||||
|
LevelApplication
|
||||||
|
LevelOpenpitrix
|
||||||
|
LevelWorkload
|
||||||
|
LevelService
|
||||||
|
LevelPod
|
||||||
|
LevelContainer
|
||||||
|
LevelPVC
|
||||||
|
LevelComponent
|
||||||
|
LevelIngress
|
||||||
|
)
|
||||||
|
|
||||||
|
var MeteringLevelMap = map[string]int{
|
||||||
|
"LevelCluster": LevelCluster,
|
||||||
|
"LevelNode": LevelNode,
|
||||||
|
"LevelWorkspace": LevelWorkspace,
|
||||||
|
"LevelNamespace": LevelNamespace,
|
||||||
|
"LevelApplication": LevelApplication,
|
||||||
|
"LevelWorkload": LevelWorkload,
|
||||||
|
"LevelService": LevelService,
|
||||||
|
"LevelPod": LevelPod,
|
||||||
|
"LevelContainer": LevelContainer,
|
||||||
|
"LevelPVC": LevelPVC,
|
||||||
|
"LevelComponent": LevelComponent,
|
||||||
|
}
|
||||||
|
|
||||||
|
type QueryOption interface {
|
||||||
|
Apply(*QueryOptions)
|
||||||
|
}
|
||||||
|
|
||||||
|
type Meteroptions struct {
|
||||||
|
Start time.Time
|
||||||
|
End time.Time
|
||||||
|
Step time.Duration
|
||||||
|
}
|
||||||
|
|
||||||
|
type QueryOptions struct {
|
||||||
|
Level Level
|
||||||
|
|
||||||
|
NamespacedResourcesFilter string
|
||||||
|
QueryType string
|
||||||
|
ResourceFilter string
|
||||||
|
NodeName string
|
||||||
|
WorkspaceName string
|
||||||
|
NamespaceName string
|
||||||
|
WorkloadKind string
|
||||||
|
WorkloadName string
|
||||||
|
PodName string
|
||||||
|
ContainerName string
|
||||||
|
StorageClassName string
|
||||||
|
PersistentVolumeClaimName string
|
||||||
|
PVCFilter string
|
||||||
|
ApplicationName string
|
||||||
|
ServiceName string
|
||||||
|
Ingress string
|
||||||
|
Job string
|
||||||
|
Duration *time.Duration
|
||||||
|
MeterOptions *Meteroptions
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewQueryOptions() *QueryOptions {
|
||||||
|
return &QueryOptions{}
|
||||||
|
}
|
||||||
|
|
||||||
|
type ClusterOption struct{}
|
||||||
|
|
||||||
|
func (_ ClusterOption) Apply(o *QueryOptions) {
|
||||||
|
o.Level = LevelCluster
|
||||||
|
}
|
||||||
|
|
||||||
|
type NodeOption struct {
|
||||||
|
ResourceFilter string
|
||||||
|
NodeName string
|
||||||
|
PVCFilter string
|
||||||
|
StorageClassName string
|
||||||
|
QueryType string
|
||||||
|
}
|
||||||
|
|
||||||
|
func (no NodeOption) Apply(o *QueryOptions) {
|
||||||
|
o.Level = LevelNode
|
||||||
|
o.ResourceFilter = no.ResourceFilter
|
||||||
|
o.NodeName = no.NodeName
|
||||||
|
o.PVCFilter = no.PVCFilter
|
||||||
|
o.StorageClassName = no.StorageClassName
|
||||||
|
o.QueryType = no.QueryType
|
||||||
|
}
|
||||||
|
|
||||||
|
type WorkspaceOption struct {
|
||||||
|
ResourceFilter string
|
||||||
|
WorkspaceName string
|
||||||
|
PVCFilter string
|
||||||
|
StorageClassName string
|
||||||
|
}
|
||||||
|
|
||||||
|
func (wo WorkspaceOption) Apply(o *QueryOptions) {
|
||||||
|
o.Level = LevelWorkspace
|
||||||
|
o.ResourceFilter = wo.ResourceFilter
|
||||||
|
o.WorkspaceName = wo.WorkspaceName
|
||||||
|
o.PVCFilter = wo.PVCFilter
|
||||||
|
o.StorageClassName = wo.StorageClassName
|
||||||
|
}
|
||||||
|
|
||||||
|
type NamespaceOption struct {
|
||||||
|
ResourceFilter string
|
||||||
|
WorkspaceName string
|
||||||
|
NamespaceName string
|
||||||
|
PVCFilter string
|
||||||
|
StorageClassName string
|
||||||
|
}
|
||||||
|
|
||||||
|
func (no NamespaceOption) Apply(o *QueryOptions) {
|
||||||
|
o.Level = LevelNamespace
|
||||||
|
o.ResourceFilter = no.ResourceFilter
|
||||||
|
o.WorkspaceName = no.WorkspaceName
|
||||||
|
o.NamespaceName = no.NamespaceName
|
||||||
|
o.PVCFilter = no.PVCFilter
|
||||||
|
o.StorageClassName = no.StorageClassName
|
||||||
|
}
|
||||||
|
|
||||||
|
type ApplicationsOption struct {
|
||||||
|
NamespaceName string
|
||||||
|
Applications []string
|
||||||
|
StorageClassName string
|
||||||
|
}
|
||||||
|
|
||||||
|
func (aso ApplicationsOption) Apply(o *QueryOptions) {
|
||||||
|
// nothing should be done
|
||||||
|
//nolint:gosimple
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
type OpenpitrixsOption struct {
|
||||||
|
Cluster string
|
||||||
|
NamespaceName string
|
||||||
|
Openpitrixs []string
|
||||||
|
StorageClassName string
|
||||||
|
}
|
||||||
|
|
||||||
|
func (oso OpenpitrixsOption) Apply(o *QueryOptions) {
|
||||||
|
// nothing should be done
|
||||||
|
//nolint:gosimple
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// ApplicationsOption & OpenpitrixsOption share the same ApplicationOption struct
|
||||||
|
type ApplicationOption struct {
|
||||||
|
NamespaceName string
|
||||||
|
Application string
|
||||||
|
ApplicationComponents []string
|
||||||
|
StorageClassName string
|
||||||
|
}
|
||||||
|
|
||||||
|
func (ao ApplicationOption) Apply(o *QueryOptions) {
|
||||||
|
o.Level = LevelApplication
|
||||||
|
o.NamespaceName = ao.NamespaceName
|
||||||
|
o.ApplicationName = ao.Application
|
||||||
|
o.StorageClassName = ao.StorageClassName
|
||||||
|
|
||||||
|
app_components := strings.Join(ao.ApplicationComponents[:], "|")
|
||||||
|
|
||||||
|
if len(app_components) > 0 {
|
||||||
|
o.ResourceFilter = fmt.Sprintf(`namespace="%s", workload=~"%s"`, o.NamespaceName, app_components)
|
||||||
|
} else {
|
||||||
|
o.ResourceFilter = fmt.Sprintf(`namespace="%s", workload=~"%s"`, o.NamespaceName, ".*")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
type WorkloadOption struct {
|
||||||
|
ResourceFilter string
|
||||||
|
NamespaceName string
|
||||||
|
WorkloadKind string
|
||||||
|
}
|
||||||
|
|
||||||
|
func (wo WorkloadOption) Apply(o *QueryOptions) {
|
||||||
|
o.Level = LevelWorkload
|
||||||
|
o.ResourceFilter = wo.ResourceFilter
|
||||||
|
o.NamespaceName = wo.NamespaceName
|
||||||
|
o.WorkloadKind = wo.WorkloadKind
|
||||||
|
}
|
||||||
|
|
||||||
|
type ServicesOption struct {
|
||||||
|
NamespaceName string
|
||||||
|
Services []string
|
||||||
|
}
|
||||||
|
|
||||||
|
func (sso ServicesOption) Apply(o *QueryOptions) {
|
||||||
|
// nothing should be done
|
||||||
|
//nolint:gosimple
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
type ServiceOption struct {
|
||||||
|
ResourceFilter string
|
||||||
|
NamespaceName string
|
||||||
|
ServiceName string
|
||||||
|
PodNames []string
|
||||||
|
}
|
||||||
|
|
||||||
|
func (so ServiceOption) Apply(o *QueryOptions) {
|
||||||
|
o.Level = LevelService
|
||||||
|
o.NamespaceName = so.NamespaceName
|
||||||
|
o.ServiceName = so.ServiceName
|
||||||
|
|
||||||
|
pod_names := strings.Join(so.PodNames, "|")
|
||||||
|
|
||||||
|
if len(pod_names) > 0 {
|
||||||
|
o.ResourceFilter = fmt.Sprintf(`pod=~"%s", namespace="%s"`, pod_names, o.NamespaceName)
|
||||||
|
} else {
|
||||||
|
o.ResourceFilter = fmt.Sprintf(`pod=~"%s", namespace="%s"`, ".*", o.NamespaceName)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
type PodOption struct {
|
||||||
|
NamespacedResourcesFilter string
|
||||||
|
ResourceFilter string
|
||||||
|
NodeName string
|
||||||
|
NamespaceName string
|
||||||
|
WorkloadKind string
|
||||||
|
WorkloadName string
|
||||||
|
PodName string
|
||||||
|
}
|
||||||
|
|
||||||
|
func (po PodOption) Apply(o *QueryOptions) {
|
||||||
|
o.Level = LevelPod
|
||||||
|
o.NamespacedResourcesFilter = po.NamespacedResourcesFilter
|
||||||
|
o.ResourceFilter = po.ResourceFilter
|
||||||
|
o.NodeName = po.NodeName
|
||||||
|
o.NamespaceName = po.NamespaceName
|
||||||
|
o.WorkloadKind = po.WorkloadKind
|
||||||
|
o.WorkloadName = po.WorkloadName
|
||||||
|
o.PodName = po.PodName
|
||||||
|
}
|
||||||
|
|
||||||
|
type ContainerOption struct {
|
||||||
|
ResourceFilter string
|
||||||
|
NamespaceName string
|
||||||
|
PodName string
|
||||||
|
ContainerName string
|
||||||
|
}
|
||||||
|
|
||||||
|
func (co ContainerOption) Apply(o *QueryOptions) {
|
||||||
|
o.Level = LevelContainer
|
||||||
|
o.ResourceFilter = co.ResourceFilter
|
||||||
|
o.NamespaceName = co.NamespaceName
|
||||||
|
o.PodName = co.PodName
|
||||||
|
o.ContainerName = co.ContainerName
|
||||||
|
}
|
||||||
|
|
||||||
|
type PVCOption struct {
|
||||||
|
ResourceFilter string
|
||||||
|
NamespaceName string
|
||||||
|
StorageClassName string
|
||||||
|
PersistentVolumeClaimName string
|
||||||
|
}
|
||||||
|
|
||||||
|
func (po PVCOption) Apply(o *QueryOptions) {
|
||||||
|
o.Level = LevelPVC
|
||||||
|
o.ResourceFilter = po.ResourceFilter
|
||||||
|
o.NamespaceName = po.NamespaceName
|
||||||
|
o.StorageClassName = po.StorageClassName
|
||||||
|
o.PersistentVolumeClaimName = po.PersistentVolumeClaimName
|
||||||
|
|
||||||
|
// for meter
|
||||||
|
o.PVCFilter = po.PersistentVolumeClaimName
|
||||||
|
}
|
||||||
|
|
||||||
|
type IngressOption struct {
|
||||||
|
ResourceFilter string
|
||||||
|
NamespaceName string
|
||||||
|
Ingress string
|
||||||
|
Job string
|
||||||
|
Pod string
|
||||||
|
Duration *time.Duration
|
||||||
|
}
|
||||||
|
|
||||||
|
func (no IngressOption) Apply(o *QueryOptions) {
|
||||||
|
o.Level = LevelIngress
|
||||||
|
o.ResourceFilter = no.ResourceFilter
|
||||||
|
o.NamespaceName = no.NamespaceName
|
||||||
|
o.Ingress = no.Ingress
|
||||||
|
o.Job = no.Job
|
||||||
|
o.PodName = no.Pod
|
||||||
|
o.Duration = no.Duration
|
||||||
|
}
|
||||||
|
|
||||||
|
type ComponentOption struct{}
|
||||||
|
|
||||||
|
func (_ ComponentOption) Apply(o *QueryOptions) {
|
||||||
|
o.Level = LevelComponent
|
||||||
|
}
|
||||||
|
|
||||||
|
type MeterOption struct {
|
||||||
|
Start time.Time
|
||||||
|
End time.Time
|
||||||
|
Step time.Duration
|
||||||
|
}
|
||||||
|
|
||||||
|
func (mo MeterOption) Apply(o *QueryOptions) {
|
||||||
|
o.MeterOptions = &Meteroptions{
|
||||||
|
Start: mo.Start,
|
||||||
|
End: mo.End,
|
||||||
|
Step: mo.Step,
|
||||||
|
}
|
||||||
|
}
|
|
@ -1,14 +1,20 @@
|
||||||
package tracker
|
package tracker
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"context"
|
||||||
"github.com/prometheus/client_golang/api"
|
"github.com/prometheus/client_golang/api"
|
||||||
v1 "github.com/prometheus/client_golang/api/prometheus/v1"
|
v1 "github.com/prometheus/client_golang/api/prometheus/v1"
|
||||||
|
"github.com/prometheus/common/model"
|
||||||
|
"strings"
|
||||||
|
"sync"
|
||||||
|
"time"
|
||||||
)
|
)
|
||||||
|
|
||||||
type prometheus struct {
|
type prometheus struct {
|
||||||
client v1.API
|
client v1.API
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// NewPrometheus 初始化Prometheus客户端
|
||||||
func NewPrometheus(address string) (Interface, error) {
|
func NewPrometheus(address string) (Interface, error) {
|
||||||
cfg := api.Config{
|
cfg := api.Config{
|
||||||
Address: address,
|
Address: address,
|
||||||
|
@ -17,3 +23,83 @@ func NewPrometheus(address string) (Interface, error) {
|
||||||
client, err := api.NewClient(cfg)
|
client, err := api.NewClient(cfg)
|
||||||
return prometheus{client: v1.NewAPI(client)}, err
|
return prometheus{client: v1.NewAPI(client)}, err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (p prometheus) GetNamedMetrics(metrics []string, ts time.Time, o QueryOption) []Metric {
|
||||||
|
var res []Metric
|
||||||
|
var mtx sync.Mutex
|
||||||
|
var wg sync.WaitGroup
|
||||||
|
|
||||||
|
opts := NewQueryOptions()
|
||||||
|
o.Apply(opts)
|
||||||
|
|
||||||
|
for _, metric := range metrics {
|
||||||
|
wg.Add(1)
|
||||||
|
go func(metric string) {
|
||||||
|
parsedResp := Metric{MetricName: metric}
|
||||||
|
|
||||||
|
value, _, err := p.client.Query(context.Background(), makeExpr(metric, *opts), ts)
|
||||||
|
if err != nil {
|
||||||
|
parsedResp.Error = err.Error()
|
||||||
|
} else {
|
||||||
|
parsedResp.MetricData = parseQueryResp(value, genMetricFilter(o))
|
||||||
|
}
|
||||||
|
|
||||||
|
mtx.Lock()
|
||||||
|
res = append(res, parsedResp)
|
||||||
|
mtx.Unlock()
|
||||||
|
|
||||||
|
wg.Done()
|
||||||
|
}(metric)
|
||||||
|
}
|
||||||
|
|
||||||
|
wg.Wait()
|
||||||
|
|
||||||
|
return res
|
||||||
|
}
|
||||||
|
func parseQueryResp(value model.Value, metricFilter func(metric model.Metric) bool) MetricData {
|
||||||
|
res := MetricData{MetricType: MetricTypeVector}
|
||||||
|
|
||||||
|
data, _ := value.(model.Vector)
|
||||||
|
|
||||||
|
for _, v := range data {
|
||||||
|
if metricFilter != nil && !metricFilter(v.Metric) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
mv := MetricValue{
|
||||||
|
Metadata: make(map[string]string),
|
||||||
|
}
|
||||||
|
|
||||||
|
for k, v := range v.Metric {
|
||||||
|
mv.Metadata[string(k)] = string(v)
|
||||||
|
}
|
||||||
|
|
||||||
|
mv.Sample = &Point{float64(v.Timestamp) / 1000, float64(v.Value)}
|
||||||
|
|
||||||
|
res.MetricValues = append(res.MetricValues, mv)
|
||||||
|
}
|
||||||
|
|
||||||
|
return res
|
||||||
|
}
|
||||||
|
|
||||||
|
func genMetricFilter(o QueryOption) func(metric model.Metric) bool {
|
||||||
|
if o != nil {
|
||||||
|
if po, ok := o.(PodOption); ok {
|
||||||
|
if po.NamespacedResourcesFilter != "" {
|
||||||
|
namespacedPodsMap := make(map[string]struct{})
|
||||||
|
for _, s := range strings.Split(po.NamespacedResourcesFilter, "|") {
|
||||||
|
namespacedPodsMap[s] = struct{}{}
|
||||||
|
}
|
||||||
|
return func(metric model.Metric) bool {
|
||||||
|
if len(metric) == 0 {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
_, ok := namespacedPodsMap[string(metric["namespace"])+"/"+string(metric["pod"])]
|
||||||
|
return ok
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return func(metric model.Metric) bool {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -0,0 +1,17 @@
|
||||||
|
package tracker
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestGetNamedMetrics(t *testing.T) {
|
||||||
|
client, _ := NewPrometheus("http://10.101.15.3:32585")
|
||||||
|
result := client.GetNamedMetrics([]string{"pod_cpu_resource_limits"}, time.Now(), PodOption{
|
||||||
|
|
||||||
|
PodName: "prometheus-k8s-0",
|
||||||
|
NamespaceName: "monitoring-system",
|
||||||
|
})
|
||||||
|
println("zzz", result[0].MetricValues[0].Sample.Value())
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,174 @@
|
||||||
|
package tracker
|
||||||
|
|
||||||
|
import (
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"strconv"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
jsoniter "github.com/json-iterator/go"
|
||||||
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
MetricTypeMatrix = "matrix"
|
||||||
|
MetricTypeVector = "vector"
|
||||||
|
)
|
||||||
|
|
||||||
|
type Metadata struct {
|
||||||
|
Metric string `json:"metric,omitempty" description:"metric name"`
|
||||||
|
Type string `json:"type,omitempty" description:"metric type"`
|
||||||
|
Help string `json:"help,omitempty" description:"metric description"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type Metric struct {
|
||||||
|
MetricName string `json:"metric_name,omitempty" description:"metric name, eg. scheduler_up_sum" csv:"metric_name"`
|
||||||
|
MetricData `json:"data,omitempty" description:"actual metric result"`
|
||||||
|
Error string `json:"error,omitempty" csv:"-"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type MetricValues []MetricValue
|
||||||
|
|
||||||
|
type MetricData struct {
|
||||||
|
MetricType string `json:"resultType,omitempty" description:"result type, one of matrix, vector" csv:"metric_type"`
|
||||||
|
MetricValues `json:"result,omitempty" description:"metric data including labels, time series and values" csv:"metric_values"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type DashboardEntity struct {
|
||||||
|
GrafanaDashboardUrl string `json:"grafanaDashboardUrl,omitempty"`
|
||||||
|
GrafanaDashboardContent string `json:"grafanaDashboardContent,omitempty"`
|
||||||
|
Description string `json:"description,omitempty"`
|
||||||
|
Namespace string `json:"namespace,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// The first element is the timestamp, the second is the metric value.
|
||||||
|
// eg, [1585658599.195, 0.528]
|
||||||
|
type Point [2]float64
|
||||||
|
|
||||||
|
type MetricValue struct {
|
||||||
|
Metadata map[string]string `json:"metric,omitempty" description:"time series labels"`
|
||||||
|
// The type of Point is a float64 array with fixed length of 2.
|
||||||
|
// So Point will always be initialized as [0, 0], rather than nil.
|
||||||
|
// To allow empty Sample, we should declare Sample to type *Point
|
||||||
|
Sample *Point `json:"value,omitempty" description:"time series, values of vector type"`
|
||||||
|
Series []Point `json:"values,omitempty" description:"time series, values of matrix type"`
|
||||||
|
ExportSample *ExportPoint `json:"exported_value,omitempty" description:"exported time series, values of vector type"`
|
||||||
|
ExportedSeries []ExportPoint `json:"exported_values,omitempty" description:"exported time series, values of matrix type"`
|
||||||
|
|
||||||
|
MinValue string `json:"min_value" description:"minimum value from monitor points"`
|
||||||
|
MaxValue string `json:"max_value" description:"maximum value from monitor points"`
|
||||||
|
AvgValue string `json:"avg_value" description:"average value from monitor points"`
|
||||||
|
SumValue string `json:"sum_value" description:"sum value from monitor points"`
|
||||||
|
Fee string `json:"fee" description:"resource fee"`
|
||||||
|
ResourceUnit string `json:"resource_unit"`
|
||||||
|
CurrencyUnit string `json:"currency_unit"`
|
||||||
|
}
|
||||||
|
|
||||||
|
func (mv *MetricValue) TransferToExportedMetricValue() {
|
||||||
|
|
||||||
|
if mv.Sample != nil {
|
||||||
|
sample := mv.Sample.transferToExported()
|
||||||
|
mv.ExportSample = &sample
|
||||||
|
mv.Sample = nil
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, item := range mv.Series {
|
||||||
|
mv.ExportedSeries = append(mv.ExportedSeries, item.transferToExported())
|
||||||
|
}
|
||||||
|
mv.Series = nil
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p Point) Timestamp() float64 {
|
||||||
|
return p[0]
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p Point) Value() float64 {
|
||||||
|
return p[1]
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p Point) transferToExported() ExportPoint {
|
||||||
|
return ExportPoint{p[0], p[1]}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p Point) Add(other Point) Point {
|
||||||
|
return Point{p[0], p[1] + other[1]}
|
||||||
|
}
|
||||||
|
|
||||||
|
// MarshalJSON implements json.Marshaler. It will be called when writing JSON to HTTP response
|
||||||
|
// Inspired by prometheus/client_golang
|
||||||
|
func (p Point) MarshalJSON() ([]byte, error) {
|
||||||
|
t, err := jsoniter.Marshal(p.Timestamp())
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
v, err := jsoniter.Marshal(strconv.FormatFloat(p.Value(), 'f', -1, 64))
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
return []byte(fmt.Sprintf("[%s,%s]", t, v)), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// UnmarshalJSON implements json.Unmarshaler. This is for unmarshaling test data.
|
||||||
|
func (p *Point) UnmarshalJSON(b []byte) error {
|
||||||
|
var v []interface{}
|
||||||
|
if err := jsoniter.Unmarshal(b, &v); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
if v == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(v) != 2 {
|
||||||
|
return errors.New("unsupported array length")
|
||||||
|
}
|
||||||
|
|
||||||
|
ts, ok := v[0].(float64)
|
||||||
|
if !ok {
|
||||||
|
return errors.New("failed to unmarshal [timestamp]")
|
||||||
|
}
|
||||||
|
valstr, ok := v[1].(string)
|
||||||
|
if !ok {
|
||||||
|
return errors.New("failed to unmarshal [value]")
|
||||||
|
}
|
||||||
|
valf, err := strconv.ParseFloat(valstr, 64)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
p[0] = ts
|
||||||
|
p[1] = valf
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
type CSVPoint struct {
|
||||||
|
MetricName string `csv:"metric_name"`
|
||||||
|
Selector string `csv:"selector"`
|
||||||
|
Time string `csv:"time"`
|
||||||
|
Value string `csv:"value"`
|
||||||
|
ResourceUnit string `csv:"unit"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type ExportPoint [2]float64
|
||||||
|
|
||||||
|
func (p ExportPoint) Timestamp() string {
|
||||||
|
return time.Unix(int64(p[0]), 0).Format("2006-01-02 03:04:05 PM")
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p ExportPoint) Value() float64 {
|
||||||
|
return p[1]
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p ExportPoint) Format() string {
|
||||||
|
return p.Timestamp() + " " + strconv.FormatFloat(p.Value(), 'f', -1, 64)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p ExportPoint) TransformToCSVPoint(metricName string, selector string, resourceUnit string) CSVPoint {
|
||||||
|
return CSVPoint{
|
||||||
|
MetricName: metricName,
|
||||||
|
Selector: selector,
|
||||||
|
Time: p.Timestamp(),
|
||||||
|
Value: strconv.FormatFloat(p.Value(), 'f', -1, 64),
|
||||||
|
ResourceUnit: resourceUnit,
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue