Merge pull request #29617 from taosdata/enh/telemetry

enh: refactor telemetry scripts
2025-01-23 15:50:48 +08:00 · 2025-01-23 15:50:48 +08:00 · b77754a63e
parent 32860403dc fb682e441c
commit b77754a63e
9 changed files with 734 additions and 0 deletions
--- a/tests/script/telemetry/crash-report/.env.example
+++ b/tests/script/telemetry/crash-report/.env.example
@ -0,0 +1,6 @@
+EXCLUDE_IP="192.168.1.10"
+SERVER_IP="192.168.1.11"
+HTTP_SERV_IP="192.168.1.12"
+HTTP_SERV_PORT=8080
+FEISHU_MSG_URL="https://open.feishu.cn/open-apis/bot/v2/hook/*******"
+OWNER="Jayden Jia"
--- a/tests/script/telemetry/crash-report/CrashCounter.py
+++ b/tests/script/telemetry/crash-report/CrashCounter.py
@ -0,0 +1,308 @@
+from datetime import date
+from datetime import timedelta
+import os
+import json
+import re
+import requests
+import subprocess
+from dotenv import load_dotenv
+
+# load .env
+# You should have a .env file in the same directory as this script
+# You can exec: cp .env.example .env
+load_dotenv()
+
+# define version
+version = "3.3.2.*"
+version_pattern_str = version.replace('.', r'\.').replace('*', r'\d+')
+version_pattern = re.compile(rf'^{version_pattern_str}$')
+version_stack_list = list()
+
+# define ip
+
+ip = os.getenv("EXCLUDE_IP")
+server_ip = os.getenv("SERVER_IP")
+http_serv_ip = os.getenv("HTTP_SERV_IP")
+http_serv_port = os.getenv("HTTP_SERV_PORT")
+owner = os.getenv("OWNER")
+
+# feishu-msg url
+feishu_msg_url = os.getenv("FEISHU_MSG_URL")
+
+# get today
+today = date.today()
+
+# Define the file and parameters
+path="/data/telemetry/crash-report/"
+trace_report_path = path + "trace_report"
+os.makedirs(path, exist_ok=True)
+os.makedirs(trace_report_path, exist_ok=True)
+
+assert_script_path = path + "filter_assert.sh"
+nassert_script_path = path + "filter_nassert.sh"
+
+# get files for the past 7 days
+def get_files():
+    files = ""
+    for i in range(1,8):
+        #print ((today - timedelta(days=i)).strftime("%Y%m%d"))
+        files = files + path + (today - timedelta(days=i)).strftime("%Y%m%d") + ".txt "
+    return files.strip().split(" ")
+
+# Define the AWK script as a string with proper escaping
+def get_res(file_path):
+    # Execute the script
+    command = ['bash', file_path, version, ip] + get_files()
+    process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True)
+
+    # Capture the output and errors
+    output, errors = process.communicate()
+
+    # Check for errors
+    if process.returncode != 0:
+        return errors
+    else:
+        return output.rstrip()
+
+def get_sum(output):
+    # Split the output into lines
+    lines = output.strip().split('\n')
+
+    # Initialize the sum
+    total_sum = 0
+
+    # Iterate over each line
+    for line in lines:
+        # Split each line by space to separate the columns
+        parts = line.split()
+
+        # The first part of the line is the number, convert it to integer
+        if parts:  # Check if there are any elements in the parts list
+            number = int(parts[0])
+            total_sum += number
+
+    return total_sum
+
+def convert_html(data):
+    # convert data to json
+    start_time = get_files()[6].split("/")[-1].split(".")[0]
+    end_time = get_files()[0].split("/")[-1].split(".")[0]
+    html_report_file = f'{start_time}_{end_time}.html'
+    json_data = json.dumps(data)
+
+    # Create HTML content
+    html_content = f'''
+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>Stack Trace Report</title>
+    <style>
+        body {{
+            font-family: Arial, sans-serif;
+            margin: 20px;
+            background-color: #f0f0f5;
+        }}
+        h1 {{
+            color: #2c3e50;
+            text-align: center;
+        }}
+        table {{
+            width: 100%;
+            border-collapse: collapse;
+            margin-bottom: 20px;
+            box-shadow: 0 2px 10px rgba(0, 0, 0, 0.1);
+        }}
+        th, td {{
+            border: 1px solid #ddd;
+            padding: 10px;
+            text-align: left;
+        }}
+        th {{
+            background-color: #3498db;
+            color: white;
+        }}
+        tr:nth-child(even) {{
+            background-color: #ecf0f1;
+        }}
+        tr:hover {{
+            background-color: #d1e7fd;
+        }}
+        pre {{
+            background-color: #f7f7f7;
+            padding: 10px;
+            border: 1px solid #ddd;
+            overflow-x: auto;
+            white-space: pre-wrap;
+            border-radius: 5px;
+        }}
+    </style>
+</head>
+<body>
+    <h1>Stack Trace Report From {start_time} To {end_time} </h1>
+
+    <table>
+        <thead>
+            <tr>
+                <th>Key Stack Info</th>
+                <th>Versions</th>
+                <th>Num Of Crashes</th>
+                <th>Full Stack Info</th>
+            </tr>
+        </thead>
+        <tbody id="report">
+        </tbody>
+    </table>
+
+    <script>
+        const data = {json_data};
+
+        const reportBody = document.getElementById('report');
+        data.forEach(entry => {{
+            const row = document.createElement('tr');
+            row.innerHTML = `
+                <td>${{entry.key_stack_info}}</td>
+                <td>${{entry.version_list.join('<br>')}}</td>
+                <td>${{entry.count}}</td>
+                <td><pre>${{entry.full_stack_info}}</pre></td>
+            `;
+            reportBody.appendChild(row);
+        }});
+    </script>
+</body>
+</html>
+'''
+    # Write the HTML content to a file
+
+    with open(f'{trace_report_path}/{html_report_file}', 'w') as f:
+        f.write(html_content)
+    return html_report_file
+
+def get_version_stack_list(res):
+    for line in res.strip().split('\n'):
+        version_list = list()
+        version_stack_dict = dict()
+        count = line.split()[0]
+        key_stack_info = line.split()[1]
+        for file in get_files():
+            with open(file, 'r') as infile:
+                for line in infile:
+                    line = line.strip()
+                    data = json.loads(line)
+                    # print(line)
+                    if ip not in line and version_pattern.search(data["version"]) and key_stack_info in line:
+                        if data["version"] not in version_list:
+                            version_list.append(data["version"])
+                            full_stack_info = data["stackInfo"]
+        version_stack_dict["key_stack_info"] = key_stack_info
+        version_stack_dict["full_stack_info"] = full_stack_info
+        version_stack_dict["version_list"] = version_list
+        version_stack_dict["count"] = count
+        # print(version_stack_dict)
+        version_stack_list.append(version_stack_dict)
+    return version_stack_list
+
+# get msg info
+def get_msg(text):
+    return {
+        "msg_type": "post",
+        "content": {
+            "post": {
+                "zh_cn": {
+                    "title": "Telemetry Statistics",
+                    "content": [
+                        [{
+                            "tag": "text",
+                            "text": text
+                        }
+                        ]]
+                }
+            }
+        }
+    }
+
+# post msg
+def send_msg(json):
+    headers = {
+        'Content-Type': 'application/json'
+    }
+
+    req = requests.post(url=feishu_msg_url, headers=headers, json=json)
+    inf = req.json()
+    if "StatusCode" in inf and inf["StatusCode"] == 0:
+        pass
+    else:
+        print(inf)
+
+
+def format_results(results):
+    # Split the results into lines
+    lines = results.strip().split('\n')
+
+    # Parse lines into a list of tuples (number, rest_of_line)
+    parsed_lines = []
+    for line in lines:
+        parts = line.split(maxsplit=1)
+        if len(parts) == 2:
+            number = int(parts[0])  # Convert the number part to an integer
+            parsed_lines.append((number, parts[1]))
+
+    # Sort the parsed lines by the first element (number) in descending order
+    parsed_lines.sort(reverse=True, key=lambda x: x[0])
+
+    # Determine the maximum width of the first column for alignment
+    # max_width = max(len(str(item[0])) for item in parsed_lines)
+    if parsed_lines:
+        max_width = max(len(str(item[0])) for item in parsed_lines)
+    else:
+        max_width = 0
+
+    # Format each line to align the numbers and function names with indentation
+    formatted_lines = []
+    for number, text in parsed_lines:
+        formatted_line = f"       {str(number).rjust(max_width)} {text}"
+        formatted_lines.append(formatted_line)
+
+    # Join the formatted lines into a single string
+    return '\n'.join(formatted_lines)
+
+# # send report to feishu
+def send_report(res, sum, html_report_file):
+    content = f'''
+    version: v{version}
+    from: {get_files()[6].split("/")[-1].split(".")[0]}
+    to: {get_files()[0].split("/")[-1].split(".")[0]}
+    ip: {server_ip}
+    owner: {owner}
+    result: \n{format_results(res)}\n
+    total crashes: {sum}\n
+    details: http://{http_serv_ip}:{http_serv_port}/{html_report_file}
+    '''
+    print(get_msg(content))
+    send_msg(get_msg(content))
+    # print(content)
+
+# for none-taosAssertDebug
+nassert_res = get_res(nassert_script_path)
+# print(nassert_res)
+
+# for taosAssertDebug
+assert_res = get_res(assert_script_path)
+# print(assert_res)
+
+# combine the results
+res = nassert_res + assert_res
+
+# get version stack list
+version_stack_list = get_version_stack_list(res) if len(res) > 0 else list()
+
+# convert to html
+html_report_file = convert_html(version_stack_list)
+
+# get sum
+sum = get_sum(res)
+
+# send report
+send_report(res, sum, html_report_file)
+
--- a/tests/script/telemetry/crash-report/CrashCounter.py.old
+++ b/tests/script/telemetry/crash-report/CrashCounter.py.old
@ -0,0 +1,128 @@
+from datetime import date
+from datetime import timedelta
+import os
+import re
+import requests
+from dotenv import load_dotenv
+
+# load .env
+load_dotenv()
+
+# define version
+version = "3.3.*"
+
+ip = os.getenv("EXCLUDE_IP")
+server_ip = os.getenv("SERVER_IP")
+owner = os.getenv("OWNER")
+
+# feishu-msg url
+feishu_msg_url = os.getenv("FEISHU_MSG_URL")
+
+today = date.today()
+#today = date(2023,8,7)
+path="/data/telemetry/crash-report/"
+
+# get files for the past 7 days
+def get_files():
+    files = ""
+    for i in range(1,8):
+        #print ((today - timedelta(days=i)).strftime("%Y%m%d"))
+        files = files + path + (today - timedelta(days=i)).strftime("%Y%m%d") + ".txt "
+
+    return files
+
+# for none-taosAssertDebug
+filter1_cmd = '''grep '"version":"%s"'  %s \
+| grep  "taosd(" \
+| awk -F "stackInfo" '{print $2}' \
+| grep -v "taosAssertDebug" \
+| grep -v %s \
+| awk -F "taosd" '{print $3}' \
+| cut -d")" -f 1 \
+| cut -d"(" -f 2 \
+| sort | uniq -c ''' % (version, get_files(), ip)
+
+# for taosAssertDebug
+filter2_cmd = '''grep '"version":"%s"'  %s \
+| grep  "taosd(" \
+| awk -F "stackInfo" '{print $2}' \
+| grep "taosAssertDebug" \
+| grep -v %s \
+| awk -F "taosd" '{print $3}' \
+| cut -d")" -f 1 \
+| cut -d"(" -f 2 \
+| sort | uniq -c ''' % (version, get_files(), ip)
+
+# get msg info
+def get_msg(text):
+    return {
+        "msg_type": "post",
+        "content": {
+            "post": {
+                "zh_cn": {
+                    "title": "Telemetry Statistics",
+                    "content": [
+                        [{
+                            "tag": "text",
+                            "text": text
+                        }
+                        ]]
+                }
+            }
+        }
+    }
+
+# post msg
+def send_msg(json):
+    headers = {
+        'Content-Type': 'application/json'
+    }
+
+    req = requests.post(url=group_url, headers=headers, json=json)
+    inf = req.json()
+    if "StatusCode" in inf and inf["StatusCode"] == 0:
+        pass
+    else:
+        print(inf)
+
+# exec cmd and return res
+def get_output(cmd):
+    text = os.popen(cmd)
+    lines = text.read()
+    text.close()
+    return lines
+
+# get sum
+def get_count(output):
+    res = re.findall(" \d+ ", output)
+    sum1 = 0
+    for r in res:
+        sum1 = sum1 + int(r.strip())
+    return sum1
+
+# print total crash count
+def print_result():
+    #print(f"Files for statistics: {get_files()}\n")
+    sum1 = get_count(get_output(filter1_cmd))
+    sum2 = get_count(get_output(filter2_cmd))
+    total = sum1 + sum2
+    #print(f"total crashes: {total}")
+    return total
+
+# send report to feishu
+def send_report():
+    content = f'''
+    test scope: Telemetry Statistics
+    owner: {owner}
+    ip: {server_ip}
+    from: {get_files().split(" ")[6].split("/")[4].split(".")[0]}
+    to: {get_files().split(" ")[0].split("/")[4].split(".")[0]}
+    filter1 result: {get_output(filter1_cmd)}
+    filter2 result: {get_output(filter2_cmd)}
+    total crashes: {print_result()}
+    '''
+    #send_msg(get_msg(content))
+    print(content)
+
+print_result()
+send_report()
--- a/tests/script/telemetry/crash-report/README-CN.md
+++ b/tests/script/telemetry/crash-report/README-CN.md
@ -0,0 +1,61 @@
+# 目录
+
+1. [介绍](#1-介绍)
+1. [前置条件](#2-前置条件)
+1. [运行](#3-运行)
+
+# 1. 介绍
+
+本手册旨在为开发人员提供全面的指导，以收集过去7天的崩溃信息并将其报告到飞书通知群。
+
+> [!NOTE]
+> - 下面的命令和脚本已在 Linux（CentOS 7.9.2009）上验证.
+
+# 2. 前置条件
+
+- 安装 Python3
+
+```bash
+yum install python3
+yum install python3-pip
+```
+
+- 安装 Python 依赖
+
+```bash
+pip3 install requests python-dotenv
+```
+
+- 调整 .env 文件
+
+```bash
+cd $DIR/telemetry/crash-report
+cp .env.example .env
+vim .env
+...
+```
+
+- .env 样例
+
+```bash
+# 过滤器排除 IP（公司网络出口 IP）
+EXCLUDE_IP="192.168.1.10"
+# 英文官网服务器 IP
+SERVER_IP="192.168.1.11"
+# 内网提供 HTTP 服务的 IP 及端口，用于提供 HTML 报告浏览
+HTTP_SERV_IP="192.168.1.12"
+HTTP_SERV_PORT=8080
+# 飞书群机器人 webhook 地址
+FEISHU_MSG_URL="https://open.feishu.cn/open-apis/bot/v2/hook/*******"
+# 负责人
+OWNER="Jayden Jia"
+```
+
+# 3. 运行
+
+在 $DIR/telemetry/crash-report 目录中，有类似文件名为 202501**.txt 的一些文件。Python 脚本会将从这些文本文件中收集崩溃信息，并将报告发送到您的飞书机器人群组中。
+
+```bash
+cd $DIR/telemetry/crash-report
+python3 CrashCounter.py
+```
--- a/tests/script/telemetry/crash-report/README.md
+++ b/tests/script/telemetry/crash-report/README.md
@ -0,0 +1,61 @@
+# Table of Contents
+
+1. [Introduction](#1-introduction)
+1. [Prerequisites](#2-prerequisites)
+1. [Running](#3-running)
+
+# 1. Introduction
+
+This manual is intended to give developers comprehensive guidance to collect crash information from the past 7 days and report it to the FeiShu notification group.
+
+> [!NOTE]
+> - The commands and scripts below are verified on Linux (CentOs 7.9.2009).
+
+# 2. Prerequisites
+
+- Install Python3
+
+```bash
+yum install python3
+yum install python3-pip
+```
+
+- Install Python dependencies
+
+```bash
+pip3 install requests python-dotenv
+```
+
+- Adjust .env file
+
+```bash
+cd $DIR/telemetry/crash-report
+cp .env.example .env
+vim .env
+...
+```
+
+- Example for .env
+
+```bash
+# Filter to exclude IP (Company network export IP)
+EXCLUDE_IP="192.168.1.10"
+# Official website server IP
+SERVER_IP="192.168.1.11"
+# Internal network providing HTTP service IP and port, used for HTML report browsing
+HTTP_SERV_IP="192.168.1.12"
+HTTP_SERV_PORT=8080
+# Webhook address for feiShu group bot
+FEISHU_MSG_URL="https://open.feishu.cn/open-apis/bot/v2/hook/*******"
+# Owner
+OWNER="Jayden Jia"
+```
+
+# 3. Running
+
+In `$DIR/telemetry/crash-report` directory, there are several files with names like 202501**.txt. The python script will collect crash information from these text files and send report to your Feishu bot group.
+
+```bash
+cd $DIR/telemetry/crash-report
+python3 CrashCounter.py
+```
--- a/tests/script/telemetry/crash-report/filter1.sh
+++ b/tests/script/telemetry/crash-report/filter1.sh
@ -0,0 +1,15 @@
+#!/bin/bash
+source .env
+filesPath="/data/telemetry/crash-report"
+version="3.0.4.1"
+taosdataIp=$EXCLUDE_IP
+grep "\"version\":\"${version}\"" ${filesPath}/*.txt \
+| grep  "taosd(" \
+| awk -F "stackInfo" '{print $2}' \
+| grep -v "taosAssertDebug" \
+| grep -v ${taosdataIp} \
+| awk -F "taosd" '{print $2}' \
+| cut -d")" -f 1 \
+| cut -d"(" -f 2 \
+| sort | uniq -c
+
--- a/tests/script/telemetry/crash-report/filter2.sh
+++ b/tests/script/telemetry/crash-report/filter2.sh
@ -0,0 +1,14 @@
+#!/bin/bash
+source .env
+filesPath="/data/telemetry/crash-report"
+version="3.0.4.1"
+taosdataIp=$EXCLUDE_IP
+grep "\"version\":\"${version}\"" ${filesPath}/*.txt \
+| grep  "taosd(" \
+| awk -F "stackInfo" '{print $2}' \
+| grep "taosAssertDebug" \
+| grep -v ${taosdataIp} \
+| awk -F "taosd" '{print $3}' \
+| cut -d")" -f 1 \
+| cut -d"(" -f 2 \
+| sort | uniq -c
--- a/tests/script/telemetry/crash-report/filter_assert.sh
+++ b/tests/script/telemetry/crash-report/filter_assert.sh
@ -0,0 +1,67 @@
+#!/bin/bash
+
+# Extract version and IP from the first two arguments
+version="$1"
+ip="$2"
+shift 2  # Remove the first two arguments, leaving only file paths
+
+# All remaining arguments are considered as file paths
+file_paths="$@"
+
+# Execute the awk script and capture the output
+readarray -t output < <(awk -v version="$version" -v ip="$ip" '
+BEGIN {
+    RS = "\\n";  # Set the record separator to newline
+    FS = ",";    # Set the field separator to comma
+    total = 0;   # Initialize total count
+    version_regex = version;  # Use the passed version pattern
+    ip_regex = ip;  # Use the passed IP pattern
+}
+{
+    start_collecting = 0;
+    version_matched = 0;
+    ip_excluded = 0;
+
+    # Check each field within a record
+    for (i = 1; i <= NF; i++) {
+        if ($i ~ /"ip":"[^"]*"/ && $i ~ ip_regex) {
+            ip_excluded = 1;
+        }
+        if ($i ~ /"version":"[^"]*"/ && $i ~ version_regex) {
+            version_matched = 1;
+        }
+    }
+
+    if (!ip_excluded && version_matched) {
+        for (i = 1; i <= NF; i++) {
+            if ($i ~ /taosAssertDebug/ && start_collecting == 0) {
+                start_collecting = 1;
+                continue;
+            }
+            if (start_collecting == 1 && $i ~ /taosd\(([^)]+)\)/) {
+                match($i, /taosd\(([^)]+)\)/, arr);
+                if (arr[1] != "") {
+                    count[arr[1]]++;
+                    total++;
+                    break;
+                }
+            }
+        }
+    }
+}
+END {
+    for (c in count) {
+        printf "%d %s\n", count[c], c;
+    }
+    print "Total count:", total;
+}' $file_paths)
+
+# Capture the function details and total count into separate variables
+function_details=$(printf "%s\n" "${output[@]::${#output[@]}-1}")
+total_count="${output[-1]}"
+
+# Output or use the variables as needed
+#echo "Function Details:"
+echo "$function_details"
+#echo "Total Count:"
+#echo "$total_count"
--- a/tests/script/telemetry/crash-report/filter_nassert.sh
+++ b/tests/script/telemetry/crash-report/filter_nassert.sh
@ -0,0 +1,74 @@
+#!/bin/bash
+
+# Pass version, ip, and file paths as arguments
+version="$1"
+ip="$2"
+shift 2  # Shift the first two arguments to get file paths
+file_paths="$@"
+
+# Execute awk and capture the output
+readarray -t output < <(awk -v version="$version" -v ip="$ip" '
+BEGIN {
+    RS = "\\n";  # Set the record separator to newline
+    total = 0;   # Initialize total count
+    version_regex = "\"version\":\"" version;  # Construct the regex for version
+    ip_regex = "\"ip\":\"" ip "\"";  # Construct the regex for IP
+}
+{
+    found = 0;  # Initialize the found flag to false
+    start_collecting = 1;  # Start collecting by default, unless taosAssertDebug is encountered
+    split($0, parts, "\\n");  # Split each record by newline
+
+    # Check for version and IP in each part
+    version_matched = 0;
+    ip_excluded = 0;
+    for (i in parts) {
+        if (parts[i] ~ version_regex) {
+            version_matched = 1;  # Set flag if version is matched
+        }
+        if (parts[i] ~ ip_regex) {
+            ip_excluded = 1;  # Set flag if IP is matched
+            break;  # No need to continue if IP is excluded
+        }
+    }
+
+    # Process only if version is matched and IP is not excluded
+    if (version_matched && !ip_excluded) {
+        for (i in parts) {
+            if (parts[i] ~ /taosAssertDebug/) {
+                start_collecting = 0;  # Skip this record if taosAssertDebug is encountered
+                break;  # Exit the loop
+            }
+        }
+        if (start_collecting == 1) {  # Continue processing if taosAssertDebug is not found
+            for (i in parts) {
+                if (found == 0 && parts[i] ~ /frame:.*taosd\([^)]+\)/) {
+                    # Match the first frame that meets the condition
+                    match(parts[i], /taosd\(([^)]+)\)/, a);  # Extract the function name
+                    if (a[1] != "") {
+                        count[a[1]]++;  # Increment the count for this function name
+                        total++;  # Increment the total count
+                        found = 1;  # Set found flag to true
+                        break;  # Exit the loop once the function is found
+                    }
+                }
+            }
+        }
+    }
+}
+END {
+    for (c in count) {
+        printf "%d %s\n", count[c], c;  # Print the count and function name formatted
+    }
+    print total;  # Print the total count alone
+}' $file_paths)  # Note the removal of quotes around "$file_paths" to handle multiple paths
+
+# Capture the function details and total count into separate variables
+function_details=$(printf "%s\n" "${output[@]::${#output[@]}-1}")  # Join array elements with newlines
+total_count="${output[-1]}"  # The last element
+
+# Output or use the variables as needed
+#echo "Function Details:"
+echo "$function_details"
+#echo "Total Count:"
+#echo "$total_count"