controller图表接口

Former-commit-id: 035c357c629d9d4ff90c8ae999cb4b05d35fc8f9
This commit is contained in:
zhangwei 2023-11-23 15:20:15 +08:00
parent feb444a53b
commit 6270da26bd
10 changed files with 26 additions and 126 deletions

View File

@ -179,10 +179,12 @@ type (
Name string `yaml:"name"`
Description string `yaml:"description"`
tenantId int64 `yaml:"tenantId"`
nsID string `yaml:"nsID"`
tasks []TaskYaml `yaml:"tasks"`
}
TaskYaml {
TaskId int64 `yaml:"taskId"`
nsID string `yaml:"nsID"`
taskType string `yaml:"taskType"`
participantId int64 `yaml:"participantId"`
matchLabels map[string]string `yaml:"matchLabels"`

View File

@ -26,9 +26,8 @@ func NewControllerMetricsLogic(ctx context.Context, svcCtx *svc.ServiceContext)
func (l *ControllerMetricsLogic) ControllerMetrics(req *types.ControllerMetricsReq) (resp *types.ControllerMetricsResp, err error) {
resp = &types.ControllerMetricsResp{}
metrics := l.svcCtx.PromClient[req.ParticipantId].GetNamedMetricsByTime([]string{"pod_cpu_usage", "pod_memory_usage_wo_cache"}, req.Start, req.End, 10*time.Minute, tracker.ControllerOption{
PodsName: req.Pods,
Namespace: req.Namespace,
metrics := l.svcCtx.PromClient[req.ParticipantId].GetNamedMetricsByTime(req.Metrics, req.Start, req.End, 10*time.Minute, tracker.ControllerOption{
PodsName: req.Pods,
})
resp.Data = metrics
return resp, nil

View File

@ -54,6 +54,7 @@ func (l *ScheduleTaskByYamlLogic) ScheduleTaskByYaml(req *types.ScheduleTaskByYa
Name: req.Name,
YamlString: string(bytes),
CommitTime: time.Now(),
NsID: req.NsID,
}
// 保存任务数据到数据库
tx := l.svcCtx.DbEngin.Create(&taskModel)
@ -63,6 +64,7 @@ func (l *ScheduleTaskByYamlLogic) ScheduleTaskByYaml(req *types.ScheduleTaskByYa
// 遍历子任务放入任务队列中
for _, task := range req.Tasks {
task.NsID = req.NsID
task.TaskId = taskModel.Id
// 将任务数据转换成消息体
reqMessage, err := json.Marshal(task)

View File

@ -160,11 +160,13 @@ type ScheduleTaskByYamlReq struct {
Name string `yaml:"name"`
Description string `yaml:"description"`
TenantId int64 `yaml:"tenantId"`
NsID string `yaml:"nsID"`
Tasks []TaskYaml `yaml:"tasks"`
}
type TaskYaml struct {
TaskId int64 `yaml:"taskId"`
NsID string `yaml:"nsID"`
TaskType string `yaml:"taskType"`
ParticipantId int64 `yaml:"participantId"`
MatchLabels map[string]string `yaml:"matchLabels"`
@ -3326,12 +3328,13 @@ type ShowNodeDetailsResp struct {
}
type ControllerMetricsReq struct {
ParticipantId int64 `form:"participantId"`
Namespace string `form:"namespace"`
Pods string `form:"pods"`
Steps string `form:"steps"`
Start string `form:"start"`
End string `form:"end"`
Metrics []string `form:"metrics"`
ParticipantId int64 `form:"participantId"`
Namespace string `form:"namespace"`
Pods string `form:"pods"`
Steps string `form:"steps"`
Start string `form:"start"`
End string `form:"end"`
}
type ControllerMetricsResp struct {

View File

@ -18,6 +18,7 @@ import "fmt"
type TaskInfo struct {
TaskId int64 `json:"taskId,optional"`
NsID string `json:"nsID"`
TaskType string `json:"taskType,optional"`
MatchLabels map[string]string `json:"matchLabels"`
ParticipantId int64 `json:"participantId"`

View File

@ -48,6 +48,7 @@ type (
DeletedAt gorm.DeletedAt `gorm:"index"`
YamlString string `db:"yaml_string"`
Result string `db:"result"` // 运行结果
NsID string `db:"ns_id"`
}
)

View File

@ -48,7 +48,7 @@ type (
YamlString string `db:"yaml_string"`
Result string `db:"result"` // 作业结果
DeletedAt gorm.DeletedAt `gorm:"index"`
TenantId int64 `db:"tenant_id"`
NsID string `db:"ns_id"`
}
)

View File

@ -64,6 +64,7 @@ func (cs *cloudScheduler) getNewStructForDb(task *response.TaskInfo, participant
cloud := cs.UnMarshalK8sStruct(string(bytes), task.TaskId)
cloud.Id = utils.GenSnowflakeID()
cloud.YamlString = string(bytes)
cloud.NsID = task.NsID
cloud.ParticipantId = participantId
return cloud, nil
}

View File

@ -26,57 +26,6 @@ const (
)
var promQLTemplates = map[string]string{
//cluster
"cluster_cpu_utilisation": ":node_cpu_utilisation:avg1m",
"cluster_cpu_usage": `round(:node_cpu_utilisation:avg1m * sum(node:node_num_cpu:sum), 0.001)`,
"cluster_cpu_total": "sum(node:node_num_cpu:sum)",
"cluster_memory_utilisation": ":node_memory_utilisation:",
"cluster_memory_available": "sum(node:node_memory_bytes_available:sum)",
"cluster_memory_total": "sum(node:node_memory_bytes_total:sum)",
"cluster_memory_usage_wo_cache": "sum(node:node_memory_bytes_total:sum) - sum(node:node_memory_bytes_available:sum)",
"cluster_net_utilisation": ":node_net_utilisation:sum_irate",
"cluster_net_bytes_transmitted": "sum(node:node_net_bytes_transmitted:sum_irate)",
"cluster_net_bytes_received": "sum(node:node_net_bytes_received:sum_irate)",
"cluster_disk_read_iops": "sum(node:data_volume_iops_reads:sum)",
"cluster_disk_write_iops": "sum(node:data_volume_iops_writes:sum)",
"cluster_disk_read_throughput": "sum(node:data_volume_throughput_bytes_read:sum)",
"cluster_disk_write_throughput": "sum(node:data_volume_throughput_bytes_written:sum)",
"cluster_disk_size_usage": `sum(max(node_filesystem_size_bytes{device=~"/dev/.*", device!~"/dev/loop\\d+", job="node-exporter"} - node_filesystem_avail_bytes{device=~"/dev/.*", device!~"/dev/loop\\d+", job="node-exporter"}) by (device, instance))`,
"cluster_disk_size_utilisation": `cluster:disk_utilization:ratio`,
"cluster_disk_size_capacity": `sum(max(node_filesystem_size_bytes{device=~"/dev/.*", device!~"/dev/loop\\d+", job="node-exporter"}) by (device, instance))`,
"cluster_disk_size_available": `sum(max(node_filesystem_avail_bytes{device=~"/dev/.*", device!~"/dev/loop\\d+", job="node-exporter"}) by (device, instance))`,
"cluster_disk_inode_total": `sum(node:node_inodes_total:)`,
"cluster_disk_inode_usage": `sum(node:node_inodes_total:) - sum(node:node_inodes_free:)`,
"cluster_disk_inode_utilisation": `cluster:disk_inode_utilization:ratio`,
"cluster_namespace_count": `count(kube_namespace_labels)`,
"cluster_pod_count": `cluster:pod:sum`,
"cluster_pod_quota": `sum(max(kube_node_status_capacity{resource="pods"}) by (node) unless on (node) (kube_node_status_condition{condition="Ready",status=~"unknown|false"} > 0))`,
"cluster_pod_utilisation": `cluster:pod_utilization:ratio`,
"cluster_pod_running_count": `cluster:pod_running:count`,
"cluster_pod_succeeded_count": `count(kube_pod_info unless on (pod) (kube_pod_status_phase{phase=~"Failed|Pending|Unknown|Running"} > 0) unless on (node) (kube_node_status_condition{condition="Ready",status=~"unknown|false"} > 0))`,
"cluster_pod_abnormal_count": `cluster:pod_abnormal:sum`,
"cluster_node_online": `sum(kube_node_status_condition{condition="Ready",status="true"})`,
"cluster_node_offline": `cluster:node_offline:sum`,
"cluster_node_total": `sum(kube_node_status_condition{condition="Ready"})`,
"cluster_cronjob_count": `sum(kube_cronjob_labels)`,
"cluster_pvc_count": `sum(kube_persistentvolumeclaim_info)`,
"cluster_daemonset_count": `sum(kube_daemonset_labels)`,
"cluster_deployment_count": `sum(kube_deployment_labels)`,
"cluster_endpoint_count": `sum(kube_endpoint_labels)`,
"cluster_hpa_count": `sum(kube_horizontalpodautoscaler_labels)`,
"cluster_job_count": `sum(kube_job_labels)`,
"cluster_statefulset_count": `sum(kube_statefulset_labels)`,
"cluster_replicaset_count": `count(kube_replicaset_labels)`,
"cluster_service_count": `sum(kube_service_info)`,
"cluster_secret_count": `sum(kube_secret_info)`,
"cluster_pv_count": `sum(kube_persistentvolume_labels)`,
"cluster_ingresses_extensions_count": `sum(kube_ingress_labels)`,
"cluster_load1": `sum(node_load1{job="node-exporter"}) / sum(node:node_num_cpu:sum)`,
"cluster_load5": `sum(node_load5{job="node-exporter"}) / sum(node:node_num_cpu:sum)`,
"cluster_load15": `sum(node_load15{job="node-exporter"}) / sum(node:node_num_cpu:sum)`,
"cluster_pod_abnormal_ratio": `cluster:pod_abnormal:ratio`,
"cluster_node_offline_ratio": `cluster:node_offline:ratio`,
//node
"node_cpu_utilisation": "node:node_cpu_utilisation:avg1m{$1}",
"node_cpu_total": "node:node_num_cpu:sum{$1}",
@ -167,23 +116,6 @@ var promQLTemplates = map[string]string{
"namespace_ingresses_extensions_count": `sum by (namespace) (kube_ingress_labels{namespace!=""} * on (namespace) group_left(workspace) kube_namespace_labels{$1})`,
"namespace_s2ibuilder_count": `sum by (namespace) (s2i_s2ibuilder_created{namespace!=""} * on (namespace) group_left(workspace) kube_namespace_labels{$1})`,
// ingress
"ingress_request_count": `round(sum(increase(nginx_ingress_controller_requests{$1,$2}[$3])))`,
"ingress_request_4xx_count": `round(sum(increase(nginx_ingress_controller_requests{$1,$2,status=~"[4].*"}[$3])))`,
"ingress_request_5xx_count": `round(sum(increase(nginx_ingress_controller_requests{$1,$2,status=~"[5].*"}[$3])))`,
"ingress_active_connections": `sum(avg_over_time(nginx_ingress_controller_nginx_process_connections{$2,state="active"}[$3]))`,
"ingress_success_rate": `sum(rate(nginx_ingress_controller_requests{$1,$2,status!~"[4-5].*"}[$3])) / sum(rate(nginx_ingress_controller_requests{$1,$2}[$3]))`,
"ingress_request_duration_average": `sum_over_time(nginx_ingress_controller_request_duration_seconds_sum{$1,$2}[$3])/sum_over_time(nginx_ingress_controller_request_duration_seconds_count{$1,$2}[$3])`,
"ingress_request_duration_50percentage": `histogram_quantile(0.50, sum by (le) (rate(nginx_ingress_controller_request_duration_seconds_bucket{$1,$2}[$3])))`,
"ingress_request_duration_95percentage": `histogram_quantile(0.95, sum by (le) (rate(nginx_ingress_controller_request_duration_seconds_bucket{$1,$2}[$3])))`,
"ingress_request_duration_99percentage": `histogram_quantile(0.99, sum by (le) (rate(nginx_ingress_controller_request_duration_seconds_bucket{$1,$2}[$3])))`,
"ingress_request_volume": `round(sum(irate(nginx_ingress_controller_requests{$1,$2}[$3])), 0.001)`,
"ingress_request_volume_by_ingress": `round(sum(irate(nginx_ingress_controller_requests{$1,$2}[$3])) by (ingress), 0.001)`,
"ingress_request_network_sent": `sum(irate(nginx_ingress_controller_response_size_sum{$1,$2}[$3]))`,
"ingress_request_network_received": `sum(irate(nginx_ingress_controller_request_size_sum{$1,$2}[$3]))`,
"ingress_request_memory_bytes": `avg(nginx_ingress_controller_nginx_process_resident_memory_bytes{$2})`,
"ingress_request_cpu_usage": `avg(rate(nginx_ingress_controller_nginx_process_cpu_seconds_total{$2}[5m]))`,
// workload
"workload_cpu_usage": `round(namespace:workload_cpu_usage:sum{$1}, 0.001)`,
"workload_memory_usage": `namespace:workload_memory_usage:sum{$1}`,
@ -201,8 +133,12 @@ var promQLTemplates = map[string]string{
"workload_daemonset_unavailable_replicas_ratio": `namespace:daemonset_unavailable_replicas:ratio{$1}`,
"workload_statefulset_unavailable_replicas_ratio": `namespace:statefulset_unavailable_replicas:ratio{$1}`,
"controller_cpu_usage_rate": `round(sum by (owner_name) (sum by (owner_name, pod) (irate(container_cpu_usage_seconds_total{job="kubelet", $1, image!=""}[5m]))/ sum by (owner_name,pod) (kube_pod_container_resource_limits{resource="cpu"}))/count(kube_pod_info{$2}) by (owner_name),0.0001)`,
"controller_memory_usage_rate": `round(sum by (owner_name) (sum by (owner_name, pod) (irate(container_memory_usage_bytes{job="kubelet", $1, image!=""}[5m]))/ sum by (owner_name,pod) (kube_pod_container_resource_limits{resource="memory"}))/count(kube_pod_info{$2}) by (owner_name),0.0001)`,
// pod
"pod_cpu_usage": `round(sum by (namespace, pod) (irate(container_cpu_usage_seconds_total{job="kubelet", pod!="", image!=""}[5m])) * on (namespace, pod) group_left(owner_kind,owner_name) kube_pod_owner{$1} * on (namespace, pod) group_left(node) kube_pod_info{$2}, 0.001)`,
"pod_cpu_usage_rate": `round(sum by (namespace, pod) (irate(container_cpu_usage_seconds_total{job="kubelet", pod!="", image!=""}[5m]))/sum by (namespace,pod) (kube_pod_container_resource_limits{resource="cpu"}) * on (namespace, pod) group_left(owner_kind,owner_name) kube_pod_owner{$1} * on (namespace, pod) group_left(node) kube_pod_info{$2}, 0.0001)`,
"pod_memory_usage_rate": `round(sum by (namespace, pod) (irate(container_memory_usage_bytes{job="kubelet", pod!="", image!=""}[5m]))/sum by (namespace,pod) (kube_pod_container_resource_limits{resource="memory"}) * on (namespace, pod) group_left(owner_kind,owner_name) kube_pod_owner{$1} * on (namespace, pod) group_left(node) kube_pod_info{$2}, 0.0001)`,
"pod_memory_usage": `sum by (namespace, pod) (container_memory_usage_bytes{job="kubelet", pod!="", image!=""}) * on (namespace, pod) group_left(owner_kind, owner_name) kube_pod_owner{$1} * on (namespace, pod) group_left(node) kube_pod_info{$2}`,
"pod_memory_usage_wo_cache": `sum by (namespace, pod) (container_memory_working_set_bytes{job="kubelet", pod!="", image!=""}) * on (namespace, pod) group_left(owner_kind, owner_name) kube_pod_owner{$1} * on (namespace, pod) group_left(node) kube_pod_info{$2}`,
"pod_net_bytes_transmitted": `sum by (namespace, pod) (irate(container_network_transmit_bytes_total{pod!="", interface!~"^(cali.+|tunl.+|dummy.+|kube.+|flannel.+|cni.+|docker.+|veth.+|lo.*)", job="kubelet"}[5m])) * on (namespace, pod) group_left(owner_kind, owner_name) kube_pod_owner{$1} * on (namespace, pod) group_left(node) kube_pod_info{$2}`,
@ -216,51 +152,6 @@ var promQLTemplates = map[string]string{
"container_memory_usage_wo_cache": `sum by (namespace, pod, container) (container_memory_working_set_bytes{job="kubelet", container!="POD", container!="", image!="", $1})`,
"container_processes_usage": `sum by (namespace, pod, container) (container_processes{job="kubelet", container!="POD", container!="", image!="", $1})`,
"container_threads_usage": `sum by (namespace, pod, container) (container_threads {job="kubelet", container!="POD", container!="", image!="", $1})`,
// pvc
"pvc_inodes_available": `max by (namespace, persistentvolumeclaim) (kubelet_volume_stats_inodes_free) * on (namespace, persistentvolumeclaim) group_left (storageclass) kube_persistentvolumeclaim_info{$1}`,
"pvc_inodes_used": `max by (namespace, persistentvolumeclaim) (kubelet_volume_stats_inodes_used) * on (namespace, persistentvolumeclaim) group_left (storageclass) kube_persistentvolumeclaim_info{$1}`,
"pvc_inodes_total": `max by (namespace, persistentvolumeclaim) (kubelet_volume_stats_inodes) * on (namespace, persistentvolumeclaim) group_left (storageclass) kube_persistentvolumeclaim_info{$1}`,
"pvc_inodes_utilisation": `max by (namespace, persistentvolumeclaim) (kubelet_volume_stats_inodes_used / kubelet_volume_stats_inodes) * on (namespace, persistentvolumeclaim) group_left (storageclass) kube_persistentvolumeclaim_info{$1}`,
"pvc_bytes_available": `max by (namespace, persistentvolumeclaim) (kubelet_volume_stats_available_bytes) * on (namespace, persistentvolumeclaim) group_left (storageclass) kube_persistentvolumeclaim_info{$1}`,
"pvc_bytes_used": `max by (namespace, persistentvolumeclaim) (kubelet_volume_stats_used_bytes) * on (namespace, persistentvolumeclaim) group_left (storageclass) kube_persistentvolumeclaim_info{$1}`,
"pvc_bytes_total": `max by (namespace, persistentvolumeclaim) (kubelet_volume_stats_capacity_bytes) * on (namespace, persistentvolumeclaim) group_left (storageclass) kube_persistentvolumeclaim_info{$1}`,
"pvc_bytes_utilisation": `max by (namespace, persistentvolumeclaim) (kubelet_volume_stats_used_bytes / kubelet_volume_stats_capacity_bytes) * on (namespace, persistentvolumeclaim) group_left (storageclass) kube_persistentvolumeclaim_info{$1}`,
// component
"etcd_server_list": `label_replace(up{job="etcd"}, "node_ip", "$1", "instance", "(.*):.*")`,
"etcd_server_total": `count(up{job="etcd"})`,
"etcd_server_up_total": `etcd:up:sum`,
"etcd_server_has_leader": `label_replace(etcd_server_has_leader, "node_ip", "$1", "instance", "(.*):.*")`,
"etcd_server_is_leader": `label_replace(etcd_server_is_leader, "node_ip", "$1", "instance", "(.*):.*")`,
"etcd_server_leader_changes": `label_replace(etcd:etcd_server_leader_changes_seen:sum_changes, "node_ip", "$1", "node", "(.*)")`,
"etcd_server_proposals_failed_rate": `avg(etcd:etcd_server_proposals_failed:sum_irate)`,
"etcd_server_proposals_applied_rate": `avg(etcd:etcd_server_proposals_applied:sum_irate)`,
"etcd_server_proposals_committed_rate": `avg(etcd:etcd_server_proposals_committed:sum_irate)`,
"etcd_server_proposals_pending_count": `avg(etcd:etcd_server_proposals_pending:sum)`,
"etcd_mvcc_db_size": `avg(etcd:etcd_mvcc_db_total_size:sum)`,
"etcd_network_client_grpc_received_bytes": `sum(etcd:etcd_network_client_grpc_received_bytes:sum_irate)`,
"etcd_network_client_grpc_sent_bytes": `sum(etcd:etcd_network_client_grpc_sent_bytes:sum_irate)`,
"etcd_grpc_call_rate": `sum(etcd:grpc_server_started:sum_irate)`,
"etcd_grpc_call_failed_rate": `sum(etcd:grpc_server_handled:sum_irate)`,
"etcd_grpc_server_msg_received_rate": `sum(etcd:grpc_server_msg_received:sum_irate)`,
"etcd_grpc_server_msg_sent_rate": `sum(etcd:grpc_server_msg_sent:sum_irate)`,
"etcd_disk_wal_fsync_duration": `avg(etcd:etcd_disk_wal_fsync_duration:avg)`,
"etcd_disk_wal_fsync_duration_quantile": `avg(etcd:etcd_disk_wal_fsync_duration:histogram_quantile) by (quantile)`,
"etcd_disk_backend_commit_duration": `avg(etcd:etcd_disk_backend_commit_duration:avg)`,
"etcd_disk_backend_commit_duration_quantile": `avg(etcd:etcd_disk_backend_commit_duration:histogram_quantile) by (quantile)`,
"apiserver_up_sum": `apiserver:up:sum`,
"apiserver_request_rate": `apiserver:apiserver_request_total:sum_irate`,
"apiserver_request_by_verb_rate": `apiserver:apiserver_request_total:sum_verb_irate`,
"apiserver_request_latencies": `apiserver:apiserver_request_duration:avg`,
"apiserver_request_by_verb_latencies": `apiserver:apiserver_request_duration:avg_by_verb`,
"scheduler_up_sum": `scheduler:up:sum`,
"scheduler_schedule_attempts": `scheduler:scheduler_schedule_attempts:sum`,
"scheduler_schedule_attempt_rate": `scheduler:scheduler_schedule_attempts:sum_rate`,
"scheduler_e2e_scheduling_latency": `scheduler:scheduler_e2e_scheduling_duration:avg`,
"scheduler_e2e_scheduling_latency_quantile": `scheduler:scheduler_e2e_scheduling_duration:histogram_quantile`,
}
func makeExpr(metric string, opts QueryOptions) string {
@ -333,11 +224,10 @@ func makeNamespaceMetricExpr(tmpl string, o QueryOptions) string {
}
func makeControllerMetricExpr(tmpl string, o QueryOptions) string {
var namespace, podName string
var podName string
namespace = fmt.Sprintf(`namespace="%s"`, o.Namespace)
podName = fmt.Sprintf(`pod=~"%s"`, o.PodName)
return strings.NewReplacer("$1", namespace, "$2", podName).Replace(tmpl)
return strings.NewReplacer("$1", podName, "$2", podName).Replace(tmpl)
}
func makePodMetricExpr(tmpl string, o QueryOptions) string {

View File

@ -254,6 +254,7 @@ type ControllerOption struct {
Namespace string
Kind string
OwnerName string
Level string
}
func (po PodOption) Apply(o *QueryOptions) {