From 082b172738090c0abd09bcaf2f91352ce7ce9dd6 Mon Sep 17 00:00:00 2001 From: Yibo Liu Date: Thu, 5 Dec 2024 10:17:28 +0800 Subject: [PATCH] Update index.mdx --- docs/zh/14-reference/01-components/12-tdinsight/index.mdx | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/zh/14-reference/01-components/12-tdinsight/index.mdx b/docs/zh/14-reference/01-components/12-tdinsight/index.mdx index 649396e499..db8cdd73c1 100644 --- a/docs/zh/14-reference/01-components/12-tdinsight/index.mdx +++ b/docs/zh/14-reference/01-components/12-tdinsight/index.mdx @@ -149,7 +149,7 @@ TDinsight 仪表盘旨在提供 TDengine 相关资源的使用情况和状态, 涛思总结用户使用经验,整理出14个常用的告警规则(alert rule),能够对集群关键指标进行监测并及时上报指标异常、超限等告警信息。 从TDengine-server 3.3.4.3版本(tdengine-datasource 3.6.3)开始,TDengine Datasource 支持预配置告警规则自动导入功能,用户可将14个告警规则一键导入Grafana(11.x版本),直接使用。 -预配置告警规则导入方法如下图所示,在tdengine-datasource setting界面,打开 “Load Tengine Alert” 开关,点击 “Save & test” 按钮后,插件会自动加载上述告警规则, 规则会放入以数据源名称 + “-alert” 的 grafana 告警目录中。如不需要,关闭Load TDengine Alert开关。点击 “Clear TDengine Alert” 旁边的按钮则会清除此数据源导入的所有告警。 +预配置告警规则导入方法如下图所示,在tdengine-datasource setting界面,打开 “Load Tengine Alert” 开关,点击 “Save & test” 按钮后,插件会自动加载上述告警规则, 规则会放入以数据源名称 + “-alert” 的 grafana 告警目录中。如不需要,关闭Load TDengine Alert开关。点击 “Clear TDengine Alert” 旁边的按钮则会清除此数据源已导入的所有告警规则。 ![TDengine Alert](./assets/TDengine-Alert.webp) @@ -171,9 +171,9 @@ TDinsight 仪表盘旨在提供 TDengine 相关资源的使用情况和状态, |慢查询执行最长时间 (无时间窗口) |> 300秒|不触发报警|1分钟|0秒|`select now() as ts, count(*) as slow_count from performance_schema.perf_queries where exec_usec>300000000`| |dnode下线 |total != alive|触发告警|30秒|0秒|`select now(), cluster_id, last(dnodes_total) - last(dnodes_alive) as dnode_offline from log.taosd_cluster_info where _ts >= (now -30s) and _ts < now partition by cluster_id having first(_ts) > 0`| |vnode下线 |total != alive|触发告警|30秒|0秒|`select now(), cluster_id, last(vnodes_total) - last(vnodes_alive) as vnode_offline from log.taosd_cluster_info where _ts >= (now - 30s) and _ts < now partition by cluster_id having first(_ts) > 0 `| -|数据删除请求数 |> 0|不触发报警|30秒|0秒|`select now(), count(`count`) as `delete_count` from log.taos_sql_req where sql_type = 'delete' and _ts >= (now -30s) and _ts < now`| -|Adapter RESTful 请求失败 |> 5|不触发报警|30秒|0秒|`select now(), sum(`fail`) as `Failed` from log.adapter_requests where req_type=0 and ts >= (now -30s) and ts < now;`| -|Adapter WebSocket 请求失败 |> 5|不触发报警|30秒|0秒|`select now(), sum(`fail`) as `Failed` from log.adapter_requests where req_type=1 and ts >= (now -30s) and ts < now`| +|数据删除请求数 |> 0|不触发报警|30秒|0秒|``select now(), count(`count`) as `delete_count` from log.taos_sql_req where sql_type = 'delete' and _ts >= (now -30s) and _ts < now``| +|Adapter RESTful 请求失败 |> 5|不触发报警|30秒|0秒|``select now(), sum(`fail`) as `Failed` from log.adapter_requests where req_type=0 and ts >= (now -30s) and ts < now``| +|Adapter WebSocket 请求失败 |> 5|不触发报警|30秒|0秒|``select now(), sum(`fail`) as `Failed` from log.adapter_requests where req_type=1 and ts >= (now -30s) and ts < now``| |dnode 数据上报缺少 |< 3|触发告警|180秒|0秒|`select now(), cluster_id, count(*) as dnode_report from log.taosd_cluster_info where _ts >= (now -180s) and _ts < now partition by cluster_id having timetruncate(first(_ts), 1h) > 0`| |dnode 重启 |max(update_time) > last(update_time)|触发告警|90秒|0秒|`select now(), dnode_id, max(uptime) - last(uptime) as dnode_restart from log.taosd_dnodes_info where _ts >= (now - 90s) and _ts < now partition by dnode_id`|