diff --git a/tests/script/telemetry/crash-report/.env.example b/tests/script/telemetry/crash-report/.env.example new file mode 100644 index 0000000000..f7d50f40c9 --- /dev/null +++ b/tests/script/telemetry/crash-report/.env.example @@ -0,0 +1,6 @@ +EXCLUDE_IP="192.168.1.10" +SERVER_IP="192.168.1.11" +HTTP_SERV_IP="192.168.1.12" +HTTP_SERV_PORT=8080 +FEISHU_MSG_URL="https://open.feishu.cn/open-apis/bot/v2/hook/*******" +OWNER="Jayden Jia" diff --git a/tests/script/telemetry/crash-report/CrashCounter.py b/tests/script/telemetry/crash-report/CrashCounter.py new file mode 100644 index 0000000000..a89567da3d --- /dev/null +++ b/tests/script/telemetry/crash-report/CrashCounter.py @@ -0,0 +1,308 @@ +from datetime import date +from datetime import timedelta +import os +import json +import re +import requests +import subprocess +from dotenv import load_dotenv + +# load .env +# You should have a .env file in the same directory as this script +# You can exec: cp .env.example .env +load_dotenv() + +# define version +version = "3.3.2.*" +version_pattern_str = version.replace('.', r'\.').replace('*', r'\d+') +version_pattern = re.compile(rf'^{version_pattern_str}$') +version_stack_list = list() + +# define ip + +ip = os.getenv("EXCLUDE_IP") +server_ip = os.getenv("SERVER_IP") +http_serv_ip = os.getenv("HTTP_SERV_IP") +http_serv_port = os.getenv("HTTP_SERV_PORT") +owner = os.getenv("OWNER") + +# feishu-msg url +feishu_msg_url = os.getenv("FEISHU_MSG_URL") + +# get today +today = date.today() + +# Define the file and parameters +path="/data/telemetry/crash-report/" +trace_report_path = path + "trace_report" +os.makedirs(path, exist_ok=True) +os.makedirs(trace_report_path, exist_ok=True) + +assert_script_path = path + "filter_assert.sh" +nassert_script_path = path + "filter_nassert.sh" + +# get files for the past 7 days +def get_files(): + files = "" + for i in range(1,8): + #print ((today - timedelta(days=i)).strftime("%Y%m%d")) + files = files + path + (today - timedelta(days=i)).strftime("%Y%m%d") + ".txt " + return files.strip().split(" ") + +# Define the AWK script as a string with proper escaping +def get_res(file_path): + # Execute the script + command = ['bash', file_path, version, ip] + get_files() + process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True) + + # Capture the output and errors + output, errors = process.communicate() + + # Check for errors + if process.returncode != 0: + return errors + else: + return output.rstrip() + +def get_sum(output): + # Split the output into lines + lines = output.strip().split('\n') + + # Initialize the sum + total_sum = 0 + + # Iterate over each line + for line in lines: + # Split each line by space to separate the columns + parts = line.split() + + # The first part of the line is the number, convert it to integer + if parts: # Check if there are any elements in the parts list + number = int(parts[0]) + total_sum += number + + return total_sum + +def convert_html(data): + # convert data to json + start_time = get_files()[6].split("/")[-1].split(".")[0] + end_time = get_files()[0].split("/")[-1].split(".")[0] + html_report_file = f'{start_time}_{end_time}.html' + json_data = json.dumps(data) + + # Create HTML content + html_content = f''' + + + + + + Stack Trace Report + + + +

Stack Trace Report From {start_time} To {end_time}

+ + + + + + + + + + + + +
Key Stack InfoVersionsNum Of CrashesFull Stack Info
+ + + + +''' + # Write the HTML content to a file + + with open(f'{trace_report_path}/{html_report_file}', 'w') as f: + f.write(html_content) + return html_report_file + +def get_version_stack_list(res): + for line in res.strip().split('\n'): + version_list = list() + version_stack_dict = dict() + count = line.split()[0] + key_stack_info = line.split()[1] + for file in get_files(): + with open(file, 'r') as infile: + for line in infile: + line = line.strip() + data = json.loads(line) + # print(line) + if ip not in line and version_pattern.search(data["version"]) and key_stack_info in line: + if data["version"] not in version_list: + version_list.append(data["version"]) + full_stack_info = data["stackInfo"] + version_stack_dict["key_stack_info"] = key_stack_info + version_stack_dict["full_stack_info"] = full_stack_info + version_stack_dict["version_list"] = version_list + version_stack_dict["count"] = count + # print(version_stack_dict) + version_stack_list.append(version_stack_dict) + return version_stack_list + +# get msg info +def get_msg(text): + return { + "msg_type": "post", + "content": { + "post": { + "zh_cn": { + "title": "Telemetry Statistics", + "content": [ + [{ + "tag": "text", + "text": text + } + ]] + } + } + } + } + +# post msg +def send_msg(json): + headers = { + 'Content-Type': 'application/json' + } + + req = requests.post(url=feishu_msg_url, headers=headers, json=json) + inf = req.json() + if "StatusCode" in inf and inf["StatusCode"] == 0: + pass + else: + print(inf) + + +def format_results(results): + # Split the results into lines + lines = results.strip().split('\n') + + # Parse lines into a list of tuples (number, rest_of_line) + parsed_lines = [] + for line in lines: + parts = line.split(maxsplit=1) + if len(parts) == 2: + number = int(parts[0]) # Convert the number part to an integer + parsed_lines.append((number, parts[1])) + + # Sort the parsed lines by the first element (number) in descending order + parsed_lines.sort(reverse=True, key=lambda x: x[0]) + + # Determine the maximum width of the first column for alignment + # max_width = max(len(str(item[0])) for item in parsed_lines) + if parsed_lines: + max_width = max(len(str(item[0])) for item in parsed_lines) + else: + max_width = 0 + + # Format each line to align the numbers and function names with indentation + formatted_lines = [] + for number, text in parsed_lines: + formatted_line = f" {str(number).rjust(max_width)} {text}" + formatted_lines.append(formatted_line) + + # Join the formatted lines into a single string + return '\n'.join(formatted_lines) + +# # send report to feishu +def send_report(res, sum, html_report_file): + content = f''' + version: v{version} + from: {get_files()[6].split("/")[-1].split(".")[0]} + to: {get_files()[0].split("/")[-1].split(".")[0]} + ip: {server_ip} + owner: {owner} + result: \n{format_results(res)}\n + total crashes: {sum}\n + details: http://{http_serv_ip}:{http_serv_port}/{html_report_file} + ''' + print(get_msg(content)) + send_msg(get_msg(content)) + # print(content) + +# for none-taosAssertDebug +nassert_res = get_res(nassert_script_path) +# print(nassert_res) + +# for taosAssertDebug +assert_res = get_res(assert_script_path) +# print(assert_res) + +# combine the results +res = nassert_res + assert_res + +# get version stack list +version_stack_list = get_version_stack_list(res) if len(res) > 0 else list() + +# convert to html +html_report_file = convert_html(version_stack_list) + +# get sum +sum = get_sum(res) + +# send report +send_report(res, sum, html_report_file) + diff --git a/tests/script/telemetry/crash-report/CrashCounter.py.old b/tests/script/telemetry/crash-report/CrashCounter.py.old new file mode 100644 index 0000000000..66edc8d63e --- /dev/null +++ b/tests/script/telemetry/crash-report/CrashCounter.py.old @@ -0,0 +1,128 @@ +from datetime import date +from datetime import timedelta +import os +import re +import requests +from dotenv import load_dotenv + +# load .env +load_dotenv() + +# define version +version = "3.3.*" + +ip = os.getenv("EXCLUDE_IP") +server_ip = os.getenv("SERVER_IP") +owner = os.getenv("OWNER") + +# feishu-msg url +feishu_msg_url = os.getenv("FEISHU_MSG_URL") + +today = date.today() +#today = date(2023,8,7) +path="/data/telemetry/crash-report/" + +# get files for the past 7 days +def get_files(): + files = "" + for i in range(1,8): + #print ((today - timedelta(days=i)).strftime("%Y%m%d")) + files = files + path + (today - timedelta(days=i)).strftime("%Y%m%d") + ".txt " + + return files + +# for none-taosAssertDebug +filter1_cmd = '''grep '"version":"%s"' %s \ +| grep "taosd(" \ +| awk -F "stackInfo" '{print $2}' \ +| grep -v "taosAssertDebug" \ +| grep -v %s \ +| awk -F "taosd" '{print $3}' \ +| cut -d")" -f 1 \ +| cut -d"(" -f 2 \ +| sort | uniq -c ''' % (version, get_files(), ip) + +# for taosAssertDebug +filter2_cmd = '''grep '"version":"%s"' %s \ +| grep "taosd(" \ +| awk -F "stackInfo" '{print $2}' \ +| grep "taosAssertDebug" \ +| grep -v %s \ +| awk -F "taosd" '{print $3}' \ +| cut -d")" -f 1 \ +| cut -d"(" -f 2 \ +| sort | uniq -c ''' % (version, get_files(), ip) + +# get msg info +def get_msg(text): + return { + "msg_type": "post", + "content": { + "post": { + "zh_cn": { + "title": "Telemetry Statistics", + "content": [ + [{ + "tag": "text", + "text": text + } + ]] + } + } + } + } + +# post msg +def send_msg(json): + headers = { + 'Content-Type': 'application/json' + } + + req = requests.post(url=group_url, headers=headers, json=json) + inf = req.json() + if "StatusCode" in inf and inf["StatusCode"] == 0: + pass + else: + print(inf) + +# exec cmd and return res +def get_output(cmd): + text = os.popen(cmd) + lines = text.read() + text.close() + return lines + +# get sum +def get_count(output): + res = re.findall(" \d+ ", output) + sum1 = 0 + for r in res: + sum1 = sum1 + int(r.strip()) + return sum1 + +# print total crash count +def print_result(): + #print(f"Files for statistics: {get_files()}\n") + sum1 = get_count(get_output(filter1_cmd)) + sum2 = get_count(get_output(filter2_cmd)) + total = sum1 + sum2 + #print(f"total crashes: {total}") + return total + +# send report to feishu +def send_report(): + content = f''' + test scope: Telemetry Statistics + owner: {owner} + ip: {server_ip} + from: {get_files().split(" ")[6].split("/")[4].split(".")[0]} + to: {get_files().split(" ")[0].split("/")[4].split(".")[0]} + filter1 result: {get_output(filter1_cmd)} + filter2 result: {get_output(filter2_cmd)} + total crashes: {print_result()} + ''' + #send_msg(get_msg(content)) + print(content) + +print_result() +send_report() diff --git a/tests/script/telemetry/crash-report/README-CN.md b/tests/script/telemetry/crash-report/README-CN.md new file mode 100644 index 0000000000..e0deab9f5b --- /dev/null +++ b/tests/script/telemetry/crash-report/README-CN.md @@ -0,0 +1,61 @@ +# 目录 + +1. [介绍](#1-介绍) +1. [前置条件](#2-前置条件) +1. [运行](#3-运行) + +# 1. 介绍 + +本手册旨在为开发人员提供全面的指导,以收集过去7天的崩溃信息并将其报告到飞书通知群。 + +> [!NOTE] +> - 下面的命令和脚本已在 Linux(CentOS 7.9.2009)上验证. + +# 2. 前置条件 + +- 安装 Python3 + +```bash +yum install python3 +yum install python3-pip +``` + +- 安装 Python 依赖 + +```bash +pip3 install requests python-dotenv +``` + +- 调整 .env 文件 + +```bash +cd $DIR/telemetry/crash-report +cp .env.example .env +vim .env +... +``` + +- .env 样例 + +```bash +# 过滤器排除 IP(公司网络出口 IP) +EXCLUDE_IP="192.168.1.10" +# 英文官网服务器 IP +SERVER_IP="192.168.1.11" +# 内网提供 HTTP 服务的 IP 及端口,用于提供 HTML 报告浏览 +HTTP_SERV_IP="192.168.1.12" +HTTP_SERV_PORT=8080 +# 飞书群机器人 webhook 地址 +FEISHU_MSG_URL="https://open.feishu.cn/open-apis/bot/v2/hook/*******" +# 负责人 +OWNER="Jayden Jia" +``` + +# 3. 运行 + +在 $DIR/telemetry/crash-report 目录中,有类似文件名为 202501**.txt 的一些文件。Python 脚本会将从这些文本文件中收集崩溃信息,并将报告发送到您的飞书机器人群组中。 + +```bash +cd $DIR/telemetry/crash-report +python3 CrashCounter.py +``` diff --git a/tests/script/telemetry/crash-report/README.md b/tests/script/telemetry/crash-report/README.md new file mode 100644 index 0000000000..a47c9bc8bb --- /dev/null +++ b/tests/script/telemetry/crash-report/README.md @@ -0,0 +1,61 @@ +# Table of Contents + +1. [Introduction](#1-introduction) +1. [Prerequisites](#2-prerequisites) +1. [Running](#3-running) + +# 1. Introduction + +This manual is intended to give developers comprehensive guidance to collect crash information from the past 7 days and report it to the FeiShu notification group. + +> [!NOTE] +> - The commands and scripts below are verified on Linux (CentOs 7.9.2009). + +# 2. Prerequisites + +- Install Python3 + +```bash +yum install python3 +yum install python3-pip +``` + +- Install Python dependencies + +```bash +pip3 install requests python-dotenv +``` + +- Adjust .env file + +```bash +cd $DIR/telemetry/crash-report +cp .env.example .env +vim .env +... +``` + +- Example for .env + +```bash +# Filter to exclude IP (Company network export IP) +EXCLUDE_IP="192.168.1.10" +# Official website server IP +SERVER_IP="192.168.1.11" +# Internal network providing HTTP service IP and port, used for HTML report browsing +HTTP_SERV_IP="192.168.1.12" +HTTP_SERV_PORT=8080 +# Webhook address for feiShu group bot +FEISHU_MSG_URL="https://open.feishu.cn/open-apis/bot/v2/hook/*******" +# Owner +OWNER="Jayden Jia" +``` + +# 3. Running + +In `$DIR/telemetry/crash-report` directory, there are several files with names like 202501**.txt. The python script will collect crash information from these text files and send report to your Feishu bot group. + +```bash +cd $DIR/telemetry/crash-report +python3 CrashCounter.py +``` diff --git a/tests/script/telemetry/crash-report/filter1.sh b/tests/script/telemetry/crash-report/filter1.sh new file mode 100755 index 0000000000..3cb36a18ad --- /dev/null +++ b/tests/script/telemetry/crash-report/filter1.sh @@ -0,0 +1,15 @@ +#!/bin/bash +source .env +filesPath="/data/telemetry/crash-report" +version="3.0.4.1" +taosdataIp=$EXCLUDE_IP +grep "\"version\":\"${version}\"" ${filesPath}/*.txt \ +| grep "taosd(" \ +| awk -F "stackInfo" '{print $2}' \ +| grep -v "taosAssertDebug" \ +| grep -v ${taosdataIp} \ +| awk -F "taosd" '{print $2}' \ +| cut -d")" -f 1 \ +| cut -d"(" -f 2 \ +| sort | uniq -c + diff --git a/tests/script/telemetry/crash-report/filter2.sh b/tests/script/telemetry/crash-report/filter2.sh new file mode 100755 index 0000000000..4ad545345e --- /dev/null +++ b/tests/script/telemetry/crash-report/filter2.sh @@ -0,0 +1,14 @@ +#!/bin/bash +source .env +filesPath="/data/telemetry/crash-report" +version="3.0.4.1" +taosdataIp=$EXCLUDE_IP +grep "\"version\":\"${version}\"" ${filesPath}/*.txt \ +| grep "taosd(" \ +| awk -F "stackInfo" '{print $2}' \ +| grep "taosAssertDebug" \ +| grep -v ${taosdataIp} \ +| awk -F "taosd" '{print $3}' \ +| cut -d")" -f 1 \ +| cut -d"(" -f 2 \ +| sort | uniq -c diff --git a/tests/script/telemetry/crash-report/filter_assert.sh b/tests/script/telemetry/crash-report/filter_assert.sh new file mode 100755 index 0000000000..2d56287fc9 --- /dev/null +++ b/tests/script/telemetry/crash-report/filter_assert.sh @@ -0,0 +1,67 @@ +#!/bin/bash + +# Extract version and IP from the first two arguments +version="$1" +ip="$2" +shift 2 # Remove the first two arguments, leaving only file paths + +# All remaining arguments are considered as file paths +file_paths="$@" + +# Execute the awk script and capture the output +readarray -t output < <(awk -v version="$version" -v ip="$ip" ' +BEGIN { + RS = "\\n"; # Set the record separator to newline + FS = ","; # Set the field separator to comma + total = 0; # Initialize total count + version_regex = version; # Use the passed version pattern + ip_regex = ip; # Use the passed IP pattern +} +{ + start_collecting = 0; + version_matched = 0; + ip_excluded = 0; + + # Check each field within a record + for (i = 1; i <= NF; i++) { + if ($i ~ /"ip":"[^"]*"/ && $i ~ ip_regex) { + ip_excluded = 1; + } + if ($i ~ /"version":"[^"]*"/ && $i ~ version_regex) { + version_matched = 1; + } + } + + if (!ip_excluded && version_matched) { + for (i = 1; i <= NF; i++) { + if ($i ~ /taosAssertDebug/ && start_collecting == 0) { + start_collecting = 1; + continue; + } + if (start_collecting == 1 && $i ~ /taosd\(([^)]+)\)/) { + match($i, /taosd\(([^)]+)\)/, arr); + if (arr[1] != "") { + count[arr[1]]++; + total++; + break; + } + } + } + } +} +END { + for (c in count) { + printf "%d %s\n", count[c], c; + } + print "Total count:", total; +}' $file_paths) + +# Capture the function details and total count into separate variables +function_details=$(printf "%s\n" "${output[@]::${#output[@]}-1}") +total_count="${output[-1]}" + +# Output or use the variables as needed +#echo "Function Details:" +echo "$function_details" +#echo "Total Count:" +#echo "$total_count" diff --git a/tests/script/telemetry/crash-report/filter_nassert.sh b/tests/script/telemetry/crash-report/filter_nassert.sh new file mode 100755 index 0000000000..2a5acdfbf1 --- /dev/null +++ b/tests/script/telemetry/crash-report/filter_nassert.sh @@ -0,0 +1,74 @@ +#!/bin/bash + +# Pass version, ip, and file paths as arguments +version="$1" +ip="$2" +shift 2 # Shift the first two arguments to get file paths +file_paths="$@" + +# Execute awk and capture the output +readarray -t output < <(awk -v version="$version" -v ip="$ip" ' +BEGIN { + RS = "\\n"; # Set the record separator to newline + total = 0; # Initialize total count + version_regex = "\"version\":\"" version; # Construct the regex for version + ip_regex = "\"ip\":\"" ip "\""; # Construct the regex for IP +} +{ + found = 0; # Initialize the found flag to false + start_collecting = 1; # Start collecting by default, unless taosAssertDebug is encountered + split($0, parts, "\\n"); # Split each record by newline + + # Check for version and IP in each part + version_matched = 0; + ip_excluded = 0; + for (i in parts) { + if (parts[i] ~ version_regex) { + version_matched = 1; # Set flag if version is matched + } + if (parts[i] ~ ip_regex) { + ip_excluded = 1; # Set flag if IP is matched + break; # No need to continue if IP is excluded + } + } + + # Process only if version is matched and IP is not excluded + if (version_matched && !ip_excluded) { + for (i in parts) { + if (parts[i] ~ /taosAssertDebug/) { + start_collecting = 0; # Skip this record if taosAssertDebug is encountered + break; # Exit the loop + } + } + if (start_collecting == 1) { # Continue processing if taosAssertDebug is not found + for (i in parts) { + if (found == 0 && parts[i] ~ /frame:.*taosd\([^)]+\)/) { + # Match the first frame that meets the condition + match(parts[i], /taosd\(([^)]+)\)/, a); # Extract the function name + if (a[1] != "") { + count[a[1]]++; # Increment the count for this function name + total++; # Increment the total count + found = 1; # Set found flag to true + break; # Exit the loop once the function is found + } + } + } + } + } +} +END { + for (c in count) { + printf "%d %s\n", count[c], c; # Print the count and function name formatted + } + print total; # Print the total count alone +}' $file_paths) # Note the removal of quotes around "$file_paths" to handle multiple paths + +# Capture the function details and total count into separate variables +function_details=$(printf "%s\n" "${output[@]::${#output[@]}-1}") # Join array elements with newlines +total_count="${output[-1]}" # The last element + +# Output or use the variables as needed +#echo "Function Details:" +echo "$function_details" +#echo "Total Count:" +#echo "$total_count"