Merge pull request #29617 from taosdata/enh/telemetry

enh: refactor telemetry scripts
This commit is contained in:
WANG Xu 2025-01-23 15:50:48 +08:00 committed by GitHub
commit b77754a63e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
9 changed files with 734 additions and 0 deletions

View File

@ -0,0 +1,6 @@
EXCLUDE_IP="192.168.1.10"
SERVER_IP="192.168.1.11"
HTTP_SERV_IP="192.168.1.12"
HTTP_SERV_PORT=8080
FEISHU_MSG_URL="https://open.feishu.cn/open-apis/bot/v2/hook/*******"
OWNER="Jayden Jia"

View File

@ -0,0 +1,308 @@
from datetime import date
from datetime import timedelta
import os
import json
import re
import requests
import subprocess
from dotenv import load_dotenv
# load .env
# You should have a .env file in the same directory as this script
# You can exec: cp .env.example .env
load_dotenv()
# define version
version = "3.3.2.*"
version_pattern_str = version.replace('.', r'\.').replace('*', r'\d+')
version_pattern = re.compile(rf'^{version_pattern_str}$')
version_stack_list = list()
# define ip
ip = os.getenv("EXCLUDE_IP")
server_ip = os.getenv("SERVER_IP")
http_serv_ip = os.getenv("HTTP_SERV_IP")
http_serv_port = os.getenv("HTTP_SERV_PORT")
owner = os.getenv("OWNER")
# feishu-msg url
feishu_msg_url = os.getenv("FEISHU_MSG_URL")
# get today
today = date.today()
# Define the file and parameters
path="/data/telemetry/crash-report/"
trace_report_path = path + "trace_report"
os.makedirs(path, exist_ok=True)
os.makedirs(trace_report_path, exist_ok=True)
assert_script_path = path + "filter_assert.sh"
nassert_script_path = path + "filter_nassert.sh"
# get files for the past 7 days
def get_files():
files = ""
for i in range(1,8):
#print ((today - timedelta(days=i)).strftime("%Y%m%d"))
files = files + path + (today - timedelta(days=i)).strftime("%Y%m%d") + ".txt "
return files.strip().split(" ")
# Define the AWK script as a string with proper escaping
def get_res(file_path):
# Execute the script
command = ['bash', file_path, version, ip] + get_files()
process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True)
# Capture the output and errors
output, errors = process.communicate()
# Check for errors
if process.returncode != 0:
return errors
else:
return output.rstrip()
def get_sum(output):
# Split the output into lines
lines = output.strip().split('\n')
# Initialize the sum
total_sum = 0
# Iterate over each line
for line in lines:
# Split each line by space to separate the columns
parts = line.split()
# The first part of the line is the number, convert it to integer
if parts: # Check if there are any elements in the parts list
number = int(parts[0])
total_sum += number
return total_sum
def convert_html(data):
# convert data to json
start_time = get_files()[6].split("/")[-1].split(".")[0]
end_time = get_files()[0].split("/")[-1].split(".")[0]
html_report_file = f'{start_time}_{end_time}.html'
json_data = json.dumps(data)
# Create HTML content
html_content = f'''
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Stack Trace Report</title>
<style>
body {{
font-family: Arial, sans-serif;
margin: 20px;
background-color: #f0f0f5;
}}
h1 {{
color: #2c3e50;
text-align: center;
}}
table {{
width: 100%;
border-collapse: collapse;
margin-bottom: 20px;
box-shadow: 0 2px 10px rgba(0, 0, 0, 0.1);
}}
th, td {{
border: 1px solid #ddd;
padding: 10px;
text-align: left;
}}
th {{
background-color: #3498db;
color: white;
}}
tr:nth-child(even) {{
background-color: #ecf0f1;
}}
tr:hover {{
background-color: #d1e7fd;
}}
pre {{
background-color: #f7f7f7;
padding: 10px;
border: 1px solid #ddd;
overflow-x: auto;
white-space: pre-wrap;
border-radius: 5px;
}}
</style>
</head>
<body>
<h1>Stack Trace Report From {start_time} To {end_time} </h1>
<table>
<thead>
<tr>
<th>Key Stack Info</th>
<th>Versions</th>
<th>Num Of Crashes</th>
<th>Full Stack Info</th>
</tr>
</thead>
<tbody id="report">
</tbody>
</table>
<script>
const data = {json_data};
const reportBody = document.getElementById('report');
data.forEach(entry => {{
const row = document.createElement('tr');
row.innerHTML = `
<td>${{entry.key_stack_info}}</td>
<td>${{entry.version_list.join('<br>')}}</td>
<td>${{entry.count}}</td>
<td><pre>${{entry.full_stack_info}}</pre></td>
`;
reportBody.appendChild(row);
}});
</script>
</body>
</html>
'''
# Write the HTML content to a file
with open(f'{trace_report_path}/{html_report_file}', 'w') as f:
f.write(html_content)
return html_report_file
def get_version_stack_list(res):
for line in res.strip().split('\n'):
version_list = list()
version_stack_dict = dict()
count = line.split()[0]
key_stack_info = line.split()[1]
for file in get_files():
with open(file, 'r') as infile:
for line in infile:
line = line.strip()
data = json.loads(line)
# print(line)
if ip not in line and version_pattern.search(data["version"]) and key_stack_info in line:
if data["version"] not in version_list:
version_list.append(data["version"])
full_stack_info = data["stackInfo"]
version_stack_dict["key_stack_info"] = key_stack_info
version_stack_dict["full_stack_info"] = full_stack_info
version_stack_dict["version_list"] = version_list
version_stack_dict["count"] = count
# print(version_stack_dict)
version_stack_list.append(version_stack_dict)
return version_stack_list
# get msg info
def get_msg(text):
return {
"msg_type": "post",
"content": {
"post": {
"zh_cn": {
"title": "Telemetry Statistics",
"content": [
[{
"tag": "text",
"text": text
}
]]
}
}
}
}
# post msg
def send_msg(json):
headers = {
'Content-Type': 'application/json'
}
req = requests.post(url=feishu_msg_url, headers=headers, json=json)
inf = req.json()
if "StatusCode" in inf and inf["StatusCode"] == 0:
pass
else:
print(inf)
def format_results(results):
# Split the results into lines
lines = results.strip().split('\n')
# Parse lines into a list of tuples (number, rest_of_line)
parsed_lines = []
for line in lines:
parts = line.split(maxsplit=1)
if len(parts) == 2:
number = int(parts[0]) # Convert the number part to an integer
parsed_lines.append((number, parts[1]))
# Sort the parsed lines by the first element (number) in descending order
parsed_lines.sort(reverse=True, key=lambda x: x[0])
# Determine the maximum width of the first column for alignment
# max_width = max(len(str(item[0])) for item in parsed_lines)
if parsed_lines:
max_width = max(len(str(item[0])) for item in parsed_lines)
else:
max_width = 0
# Format each line to align the numbers and function names with indentation
formatted_lines = []
for number, text in parsed_lines:
formatted_line = f" {str(number).rjust(max_width)} {text}"
formatted_lines.append(formatted_line)
# Join the formatted lines into a single string
return '\n'.join(formatted_lines)
# # send report to feishu
def send_report(res, sum, html_report_file):
content = f'''
version: v{version}
from: {get_files()[6].split("/")[-1].split(".")[0]}
to: {get_files()[0].split("/")[-1].split(".")[0]}
ip: {server_ip}
owner: {owner}
result: \n{format_results(res)}\n
total crashes: {sum}\n
details: http://{http_serv_ip}:{http_serv_port}/{html_report_file}
'''
print(get_msg(content))
send_msg(get_msg(content))
# print(content)
# for none-taosAssertDebug
nassert_res = get_res(nassert_script_path)
# print(nassert_res)
# for taosAssertDebug
assert_res = get_res(assert_script_path)
# print(assert_res)
# combine the results
res = nassert_res + assert_res
# get version stack list
version_stack_list = get_version_stack_list(res) if len(res) > 0 else list()
# convert to html
html_report_file = convert_html(version_stack_list)
# get sum
sum = get_sum(res)
# send report
send_report(res, sum, html_report_file)

View File

@ -0,0 +1,128 @@
from datetime import date
from datetime import timedelta
import os
import re
import requests
from dotenv import load_dotenv
# load .env
load_dotenv()
# define version
version = "3.3.*"
ip = os.getenv("EXCLUDE_IP")
server_ip = os.getenv("SERVER_IP")
owner = os.getenv("OWNER")
# feishu-msg url
feishu_msg_url = os.getenv("FEISHU_MSG_URL")
today = date.today()
#today = date(2023,8,7)
path="/data/telemetry/crash-report/"
# get files for the past 7 days
def get_files():
files = ""
for i in range(1,8):
#print ((today - timedelta(days=i)).strftime("%Y%m%d"))
files = files + path + (today - timedelta(days=i)).strftime("%Y%m%d") + ".txt "
return files
# for none-taosAssertDebug
filter1_cmd = '''grep '"version":"%s"' %s \
| grep "taosd(" \
| awk -F "stackInfo" '{print $2}' \
| grep -v "taosAssertDebug" \
| grep -v %s \
| awk -F "taosd" '{print $3}' \
| cut -d")" -f 1 \
| cut -d"(" -f 2 \
| sort | uniq -c ''' % (version, get_files(), ip)
# for taosAssertDebug
filter2_cmd = '''grep '"version":"%s"' %s \
| grep "taosd(" \
| awk -F "stackInfo" '{print $2}' \
| grep "taosAssertDebug" \
| grep -v %s \
| awk -F "taosd" '{print $3}' \
| cut -d")" -f 1 \
| cut -d"(" -f 2 \
| sort | uniq -c ''' % (version, get_files(), ip)
# get msg info
def get_msg(text):
return {
"msg_type": "post",
"content": {
"post": {
"zh_cn": {
"title": "Telemetry Statistics",
"content": [
[{
"tag": "text",
"text": text
}
]]
}
}
}
}
# post msg
def send_msg(json):
headers = {
'Content-Type': 'application/json'
}
req = requests.post(url=group_url, headers=headers, json=json)
inf = req.json()
if "StatusCode" in inf and inf["StatusCode"] == 0:
pass
else:
print(inf)
# exec cmd and return res
def get_output(cmd):
text = os.popen(cmd)
lines = text.read()
text.close()
return lines
# get sum
def get_count(output):
res = re.findall(" \d+ ", output)
sum1 = 0
for r in res:
sum1 = sum1 + int(r.strip())
return sum1
# print total crash count
def print_result():
#print(f"Files for statistics: {get_files()}\n")
sum1 = get_count(get_output(filter1_cmd))
sum2 = get_count(get_output(filter2_cmd))
total = sum1 + sum2
#print(f"total crashes: {total}")
return total
# send report to feishu
def send_report():
content = f'''
test scope: Telemetry Statistics
owner: {owner}
ip: {server_ip}
from: {get_files().split(" ")[6].split("/")[4].split(".")[0]}
to: {get_files().split(" ")[0].split("/")[4].split(".")[0]}
filter1 result: {get_output(filter1_cmd)}
filter2 result: {get_output(filter2_cmd)}
total crashes: {print_result()}
'''
#send_msg(get_msg(content))
print(content)
print_result()
send_report()

View File

@ -0,0 +1,61 @@
# 目录
1. [介绍](#1-介绍)
1. [前置条件](#2-前置条件)
1. [运行](#3-运行)
# 1. 介绍
本手册旨在为开发人员提供全面的指导以收集过去7天的崩溃信息并将其报告到飞书通知群。
> [!NOTE]
> - 下面的命令和脚本已在 LinuxCentOS 7.9.2009)上验证.
# 2. 前置条件
- 安装 Python3
```bash
yum install python3
yum install python3-pip
```
- 安装 Python 依赖
```bash
pip3 install requests python-dotenv
```
- 调整 .env 文件
```bash
cd $DIR/telemetry/crash-report
cp .env.example .env
vim .env
...
```
- .env 样例
```bash
# 过滤器排除 IP公司网络出口 IP
EXCLUDE_IP="192.168.1.10"
# 英文官网服务器 IP
SERVER_IP="192.168.1.11"
# 内网提供 HTTP 服务的 IP 及端口,用于提供 HTML 报告浏览
HTTP_SERV_IP="192.168.1.12"
HTTP_SERV_PORT=8080
# 飞书群机器人 webhook 地址
FEISHU_MSG_URL="https://open.feishu.cn/open-apis/bot/v2/hook/*******"
# 负责人
OWNER="Jayden Jia"
```
# 3. 运行
在 $DIR/telemetry/crash-report 目录中,有类似文件名为 202501**.txt 的一些文件。Python 脚本会将从这些文本文件中收集崩溃信息,并将报告发送到您的飞书机器人群组中。
```bash
cd $DIR/telemetry/crash-report
python3 CrashCounter.py
```

View File

@ -0,0 +1,61 @@
# Table of Contents
1. [Introduction](#1-introduction)
1. [Prerequisites](#2-prerequisites)
1. [Running](#3-running)
# 1. Introduction
This manual is intended to give developers comprehensive guidance to collect crash information from the past 7 days and report it to the FeiShu notification group.
> [!NOTE]
> - The commands and scripts below are verified on Linux (CentOs 7.9.2009).
# 2. Prerequisites
- Install Python3
```bash
yum install python3
yum install python3-pip
```
- Install Python dependencies
```bash
pip3 install requests python-dotenv
```
- Adjust .env file
```bash
cd $DIR/telemetry/crash-report
cp .env.example .env
vim .env
...
```
- Example for .env
```bash
# Filter to exclude IP (Company network export IP)
EXCLUDE_IP="192.168.1.10"
# Official website server IP
SERVER_IP="192.168.1.11"
# Internal network providing HTTP service IP and port, used for HTML report browsing
HTTP_SERV_IP="192.168.1.12"
HTTP_SERV_PORT=8080
# Webhook address for feiShu group bot
FEISHU_MSG_URL="https://open.feishu.cn/open-apis/bot/v2/hook/*******"
# Owner
OWNER="Jayden Jia"
```
# 3. Running
In `$DIR/telemetry/crash-report` directory, there are several files with names like 202501**.txt. The python script will collect crash information from these text files and send report to your Feishu bot group.
```bash
cd $DIR/telemetry/crash-report
python3 CrashCounter.py
```

View File

@ -0,0 +1,15 @@
#!/bin/bash
source .env
filesPath="/data/telemetry/crash-report"
version="3.0.4.1"
taosdataIp=$EXCLUDE_IP
grep "\"version\":\"${version}\"" ${filesPath}/*.txt \
| grep "taosd(" \
| awk -F "stackInfo" '{print $2}' \
| grep -v "taosAssertDebug" \
| grep -v ${taosdataIp} \
| awk -F "taosd" '{print $2}' \
| cut -d")" -f 1 \
| cut -d"(" -f 2 \
| sort | uniq -c

View File

@ -0,0 +1,14 @@
#!/bin/bash
source .env
filesPath="/data/telemetry/crash-report"
version="3.0.4.1"
taosdataIp=$EXCLUDE_IP
grep "\"version\":\"${version}\"" ${filesPath}/*.txt \
| grep "taosd(" \
| awk -F "stackInfo" '{print $2}' \
| grep "taosAssertDebug" \
| grep -v ${taosdataIp} \
| awk -F "taosd" '{print $3}' \
| cut -d")" -f 1 \
| cut -d"(" -f 2 \
| sort | uniq -c

View File

@ -0,0 +1,67 @@
#!/bin/bash
# Extract version and IP from the first two arguments
version="$1"
ip="$2"
shift 2 # Remove the first two arguments, leaving only file paths
# All remaining arguments are considered as file paths
file_paths="$@"
# Execute the awk script and capture the output
readarray -t output < <(awk -v version="$version" -v ip="$ip" '
BEGIN {
RS = "\\n"; # Set the record separator to newline
FS = ","; # Set the field separator to comma
total = 0; # Initialize total count
version_regex = version; # Use the passed version pattern
ip_regex = ip; # Use the passed IP pattern
}
{
start_collecting = 0;
version_matched = 0;
ip_excluded = 0;
# Check each field within a record
for (i = 1; i <= NF; i++) {
if ($i ~ /"ip":"[^"]*"/ && $i ~ ip_regex) {
ip_excluded = 1;
}
if ($i ~ /"version":"[^"]*"/ && $i ~ version_regex) {
version_matched = 1;
}
}
if (!ip_excluded && version_matched) {
for (i = 1; i <= NF; i++) {
if ($i ~ /taosAssertDebug/ && start_collecting == 0) {
start_collecting = 1;
continue;
}
if (start_collecting == 1 && $i ~ /taosd\(([^)]+)\)/) {
match($i, /taosd\(([^)]+)\)/, arr);
if (arr[1] != "") {
count[arr[1]]++;
total++;
break;
}
}
}
}
}
END {
for (c in count) {
printf "%d %s\n", count[c], c;
}
print "Total count:", total;
}' $file_paths)
# Capture the function details and total count into separate variables
function_details=$(printf "%s\n" "${output[@]::${#output[@]}-1}")
total_count="${output[-1]}"
# Output or use the variables as needed
#echo "Function Details:"
echo "$function_details"
#echo "Total Count:"
#echo "$total_count"

View File

@ -0,0 +1,74 @@
#!/bin/bash
# Pass version, ip, and file paths as arguments
version="$1"
ip="$2"
shift 2 # Shift the first two arguments to get file paths
file_paths="$@"
# Execute awk and capture the output
readarray -t output < <(awk -v version="$version" -v ip="$ip" '
BEGIN {
RS = "\\n"; # Set the record separator to newline
total = 0; # Initialize total count
version_regex = "\"version\":\"" version; # Construct the regex for version
ip_regex = "\"ip\":\"" ip "\""; # Construct the regex for IP
}
{
found = 0; # Initialize the found flag to false
start_collecting = 1; # Start collecting by default, unless taosAssertDebug is encountered
split($0, parts, "\\n"); # Split each record by newline
# Check for version and IP in each part
version_matched = 0;
ip_excluded = 0;
for (i in parts) {
if (parts[i] ~ version_regex) {
version_matched = 1; # Set flag if version is matched
}
if (parts[i] ~ ip_regex) {
ip_excluded = 1; # Set flag if IP is matched
break; # No need to continue if IP is excluded
}
}
# Process only if version is matched and IP is not excluded
if (version_matched && !ip_excluded) {
for (i in parts) {
if (parts[i] ~ /taosAssertDebug/) {
start_collecting = 0; # Skip this record if taosAssertDebug is encountered
break; # Exit the loop
}
}
if (start_collecting == 1) { # Continue processing if taosAssertDebug is not found
for (i in parts) {
if (found == 0 && parts[i] ~ /frame:.*taosd\([^)]+\)/) {
# Match the first frame that meets the condition
match(parts[i], /taosd\(([^)]+)\)/, a); # Extract the function name
if (a[1] != "") {
count[a[1]]++; # Increment the count for this function name
total++; # Increment the total count
found = 1; # Set found flag to true
break; # Exit the loop once the function is found
}
}
}
}
}
}
END {
for (c in count) {
printf "%d %s\n", count[c], c; # Print the count and function name formatted
}
print total; # Print the total count alone
}' $file_paths) # Note the removal of quotes around "$file_paths" to handle multiple paths
# Capture the function details and total count into separate variables
function_details=$(printf "%s\n" "${output[@]::${#output[@]}-1}") # Join array elements with newlines
total_count="${output[-1]}" # The last element
# Output or use the variables as needed
#echo "Function Details:"
echo "$function_details"
#echo "Total Count:"
#echo "$total_count"