From 98144cee081bde5a9c04786fc794eacb4c5d692a Mon Sep 17 00:00:00 2001
From: jiajingbin <jiajaybin@126.com>
Date: Tue, 21 Jan 2025 10:23:15 +0800
Subject: [PATCH] enh: add new scripts

---
 tests/script/telemetry/CrashCounter_new.py | 301 +++++++++++++++++++++
 tests/script/telemetry/filter_assert.sh    |  67 +++++
 tests/script/telemetry/filter_nassert.sh   |  74 +++++
 3 files changed, 442 insertions(+)
 create mode 100644 tests/script/telemetry/CrashCounter_new.py
 create mode 100755 tests/script/telemetry/filter_assert.sh
 create mode 100755 tests/script/telemetry/filter_nassert.sh
diff --git a/tests/script/telemetry/CrashCounter_new.py b/tests/script/telemetry/CrashCounter_new.py
new file mode 100644
index 0000000000..af88558c6a
--- /dev/null
+++ b/tests/script/telemetry/CrashCounter_new.py
@@ -0,0 +1,301 @@
+from datetime import date
+from datetime import timedelta
+import os
+import json
+import re
+import requests
+import subprocess
+
+# define version
+version = "3.3.2.*"
+version_pattern_str = version.replace('.', r'\.').replace('*', r'\d+')
+version_pattern = re.compile(rf'^{version_pattern_str}$')
+version_stack_list = list()
+
+# define ip
+ip = "103.229.218.146"
+server_ip = "20.124.239.6"
+http_ip = "192.168.2.92"
+owner = "Jayden Jia"
+
+# feishu-msg url
+group_url = 'https://open.feishu.cn/open-apis/bot/v2/hook/56c333b5-eae9-4c18-b0b6-7e4b7174f5c9'
+# group_url = 'https://open.feishu.cn/open-apis/bot/v2/hook/11e9e452-34a0-4c88-b014-10e21cb521dd'
+
+# get today
+today = date.today()
+
+# Define the file and parameters
+path="/data/telemetry/crash-report/"
+trace_report_path = path + "trace_report"
+os.makedirs(path, exist_ok=True)
+os.makedirs(trace_report_path, exist_ok=True)
+
+assert_script_path = path + "filter_assert.sh"
+nassert_script_path = path + "filter_nassert.sh"
+
+# get files for the past 7 days
+def get_files():
+    files = ""
+    for i in range(1,8):
+        #print ((today - timedelta(days=i)).strftime("%Y%m%d"))
+        files = files + path + (today - timedelta(days=i)).strftime("%Y%m%d") + ".txt "
+    return files.strip().split(" ")
+
+# Define the AWK script as a string with proper escaping
+def get_res(file_path):
+    # Execute the script
+    command = ['bash', file_path, version, ip] + get_files()
+    process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True)
+
+    # Capture the output and errors
+    output, errors = process.communicate()
+
+    # Check for errors
+    if process.returncode != 0:
+        return errors
+    else:
+        return output.rstrip()
+
+def get_sum(output):
+    # Split the output into lines
+    lines = output.strip().split('\n')
+
+    # Initialize the sum
+    total_sum = 0
+
+    # Iterate over each line
+    for line in lines:
+        # Split each line by space to separate the columns
+        parts = line.split()
+
+        # The first part of the line is the number, convert it to integer
+        if parts:  # Check if there are any elements in the parts list
+            number = int(parts[0])
+            total_sum += number
+
+    return total_sum
+
+def convert_html(data):
+    # convert data to json
+    start_time = get_files()[6].split("/")[-1].split(".")[0]
+    end_time = get_files()[0].split("/")[-1].split(".")[0]
+    html_report_file = f'{start_time}_{end_time}.html'
+    json_data = json.dumps(data)
+
+    # Create HTML content
+    html_content = f'''
+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>Stack Trace Report</title>
+    <style>
+        body {{
+            font-family: Arial, sans-serif;
+            margin: 20px;
+            background-color: #f0f0f5;
+        }}
+        h1 {{
+            color: #2c3e50;
+            text-align: center;
+        }}
+        table {{
+            width: 100%;
+            border-collapse: collapse;
+            margin-bottom: 20px;
+            box-shadow: 0 2px 10px rgba(0, 0, 0, 0.1);
+        }}
+        th, td {{
+            border: 1px solid #ddd;
+            padding: 10px;
+            text-align: left;
+        }}
+        th {{
+            background-color: #3498db;
+            color: white;
+        }}
+        tr:nth-child(even) {{
+            background-color: #ecf0f1;
+        }}
+        tr:hover {{
+            background-color: #d1e7fd;
+        }}
+        pre {{
+            background-color: #f7f7f7;
+            padding: 10px;
+            border: 1px solid #ddd;
+            overflow-x: auto;
+            white-space: pre-wrap;
+            border-radius: 5px;
+        }}
+    </style>
+</head>
+<body>
+    <h1>Stack Trace Report From {start_time} To {end_time} </h1>
+
+    <table>
+        <thead>
+            <tr>
+                <th>Key Stack Info</th>
+                <th>Versions</th>
+                <th>Num Of Crashes</th>
+                <th>Full Stack Info</th>
+            </tr>
+        </thead>
+        <tbody id="report">
+        </tbody>
+    </table>
+
+    <script>
+        const data = {json_data};
+
+        const reportBody = document.getElementById('report');
+        data.forEach(entry => {{
+            const row = document.createElement('tr');
+            row.innerHTML = `
+                <td>${{entry.key_stack_info}}</td>
+                <td>${{entry.version_list.join('<br>')}}</td>
+                <td>${{entry.count}}</td>
+                <td><pre>${{entry.full_stack_info}}</pre></td>
+            `;
+            reportBody.appendChild(row);
+        }});
+    </script>
+</body>
+</html>
+'''
+    # Write the HTML content to a file
+
+    with open(f'{trace_report_path}/{html_report_file}', 'w') as f:
+        f.write(html_content)
+    return html_report_file
+
+def get_version_stack_list(res):
+    for line in res.strip().split('\n'):
+        version_list = list()
+        version_stack_dict = dict()
+        count = line.split()[0]
+        key_stack_info = line.split()[1]
+        for file in get_files():
+            with open(file, 'r') as infile:
+                for line in infile:
+                    line = line.strip()
+                    data = json.loads(line)
+                    # print(line)
+                    if ip not in line and version_pattern.search(data["version"]) and key_stack_info in line:
+                        if data["version"] not in version_list:
+                            version_list.append(data["version"])
+                            full_stack_info = data["stackInfo"]
+        version_stack_dict["key_stack_info"] = key_stack_info
+        version_stack_dict["full_stack_info"] = full_stack_info
+        version_stack_dict["version_list"] = version_list
+        version_stack_dict["count"] = count
+        # print(version_stack_dict)
+        version_stack_list.append(version_stack_dict)
+    return version_stack_list
+
+# get msg info
+def get_msg(text):
+    return {
+        "msg_type": "post",
+        "content": {
+            "post": {
+                "zh_cn": {
+                    "title": "Telemetry Statistics",
+                    "content": [
+                        [{
+                            "tag": "text",
+                            "text": text
+                        }
+                        ]]
+                }
+            }
+        }
+    }
+
+# post msg
+def send_msg(json):
+    headers = {
+        'Content-Type': 'application/json'
+    }
+
+    req = requests.post(url=group_url, headers=headers, json=json)
+    inf = req.json()
+    if "StatusCode" in inf and inf["StatusCode"] == 0:
+        pass
+    else:
+        print(inf)
+
+
+def format_results(results):
+    # Split the results into lines
+    lines = results.strip().split('\n')
+
+    # Parse lines into a list of tuples (number, rest_of_line)
+    parsed_lines = []
+    for line in lines:
+        parts = line.split(maxsplit=1)
+        if len(parts) == 2:
+            number = int(parts[0])  # Convert the number part to an integer
+            parsed_lines.append((number, parts[1]))
+
+    # Sort the parsed lines by the first element (number) in descending order
+    parsed_lines.sort(reverse=True, key=lambda x: x[0])
+
+    # Determine the maximum width of the first column for alignment
+    # max_width = max(len(str(item[0])) for item in parsed_lines)
+    if parsed_lines:
+        max_width = max(len(str(item[0])) for item in parsed_lines)
+    else:
+        max_width = 0
+
+    # Format each line to align the numbers and function names with indentation
+    formatted_lines = []
+    for number, text in parsed_lines:
+        formatted_line = f"       {str(number).rjust(max_width)} {text}"
+        formatted_lines.append(formatted_line)
+
+    # Join the formatted lines into a single string
+    return '\n'.join(formatted_lines)
+
+# # send report to feishu
+def send_report(res, sum, html_report_file):
+    content = f'''
+    version: v{version}
+    from: {get_files()[6].split("/")[-1].split(".")[0]}
+    to: {get_files()[0].split("/")[-1].split(".")[0]}
+    ip: {server_ip}
+    owner: {owner}
+    result: \n{format_results(res)}\n
+    total crashes: {sum}\n
+    details: http://{http_ip}:8000/{html_report_file}
+    '''
+    print(get_msg(content))
+    send_msg(get_msg(content))
+    # print(content)
+
+# for none-taosAssertDebug
+nassert_res = get_res(nassert_script_path)
+# print(nassert_res)
+
+# for taosAssertDebug
+assert_res = get_res(assert_script_path)
+# print(assert_res)
+
+# combine the results
+res = nassert_res + assert_res
+
+# get version stack list
+version_stack_list = get_version_stack_list(res) if len(res) > 0 else list()
+
+# convert to html
+html_report_file = convert_html(version_stack_list)
+
+# get sum
+sum = get_sum(res)
+
+# send report
+send_report(res, sum, html_report_file)
+
diff --git a/tests/script/telemetry/filter_assert.sh b/tests/script/telemetry/filter_assert.sh
new file mode 100755
index 0000000000..2d56287fc9
--- /dev/null
+++ b/tests/script/telemetry/filter_assert.sh
@@ -0,0 +1,67 @@
+#!/bin/bash
+
+# Extract version and IP from the first two arguments
+version="$1"
+ip="$2"
+shift 2  # Remove the first two arguments, leaving only file paths
+
+# All remaining arguments are considered as file paths
+file_paths="$@"
+
+# Execute the awk script and capture the output
+readarray -t output < <(awk -v version="$version" -v ip="$ip" '
+BEGIN {
+    RS = "\\n";  # Set the record separator to newline
+    FS = ",";    # Set the field separator to comma
+    total = 0;   # Initialize total count
+    version_regex = version;  # Use the passed version pattern
+    ip_regex = ip;  # Use the passed IP pattern
+}
+{
+    start_collecting = 0;
+    version_matched = 0;
+    ip_excluded = 0;
+
+    # Check each field within a record
+    for (i = 1; i <= NF; i++) {
+        if ($i ~ /"ip":"[^"]*"/ && $i ~ ip_regex) {
+            ip_excluded = 1;
+        }
+        if ($i ~ /"version":"[^"]*"/ && $i ~ version_regex) {
+            version_matched = 1;
+        }
+    }
+
+    if (!ip_excluded && version_matched) {
+        for (i = 1; i <= NF; i++) {
+            if ($i ~ /taosAssertDebug/ && start_collecting == 0) {
+                start_collecting = 1;
+                continue;
+            }
+            if (start_collecting == 1 && $i ~ /taosd\(([^)]+)\)/) {
+                match($i, /taosd\(([^)]+)\)/, arr);
+                if (arr[1] != "") {
+                    count[arr[1]]++;
+                    total++;
+                    break;
+                }
+            }
+        }
+    }
+}
+END {
+    for (c in count) {
+        printf "%d %s\n", count[c], c;
+    }
+    print "Total count:", total;
+}' $file_paths)
+
+# Capture the function details and total count into separate variables
+function_details=$(printf "%s\n" "${output[@]::${#output[@]}-1}")
+total_count="${output[-1]}"
+
+# Output or use the variables as needed
+#echo "Function Details:"
+echo "$function_details"
+#echo "Total Count:"
+#echo "$total_count"
diff --git a/tests/script/telemetry/filter_nassert.sh b/tests/script/telemetry/filter_nassert.sh
new file mode 100755
index 0000000000..2a5acdfbf1
--- /dev/null
+++ b/tests/script/telemetry/filter_nassert.sh
@@ -0,0 +1,74 @@
+#!/bin/bash
+
+# Pass version, ip, and file paths as arguments
+version="$1"
+ip="$2"
+shift 2  # Shift the first two arguments to get file paths
+file_paths="$@"
+
+# Execute awk and capture the output
+readarray -t output < <(awk -v version="$version" -v ip="$ip" '
+BEGIN {
+    RS = "\\n";  # Set the record separator to newline
+    total = 0;   # Initialize total count
+    version_regex = "\"version\":\"" version;  # Construct the regex for version
+    ip_regex = "\"ip\":\"" ip "\"";  # Construct the regex for IP
+}
+{
+    found = 0;  # Initialize the found flag to false
+    start_collecting = 1;  # Start collecting by default, unless taosAssertDebug is encountered
+    split($0, parts, "\\n");  # Split each record by newline
+
+    # Check for version and IP in each part
+    version_matched = 0;
+    ip_excluded = 0;
+    for (i in parts) {
+        if (parts[i] ~ version_regex) {
+            version_matched = 1;  # Set flag if version is matched
+        }
+        if (parts[i] ~ ip_regex) {
+            ip_excluded = 1;  # Set flag if IP is matched
+            break;  # No need to continue if IP is excluded
+        }
+    }
+
+    # Process only if version is matched and IP is not excluded
+    if (version_matched && !ip_excluded) {
+        for (i in parts) {
+            if (parts[i] ~ /taosAssertDebug/) {
+                start_collecting = 0;  # Skip this record if taosAssertDebug is encountered
+                break;  # Exit the loop
+            }
+        }
+        if (start_collecting == 1) {  # Continue processing if taosAssertDebug is not found
+            for (i in parts) {
+                if (found == 0 && parts[i] ~ /frame:.*taosd\([^)]+\)/) {
+                    # Match the first frame that meets the condition
+                    match(parts[i], /taosd\(([^)]+)\)/, a);  # Extract the function name
+                    if (a[1] != "") {
+                        count[a[1]]++;  # Increment the count for this function name
+                        total++;  # Increment the total count
+                        found = 1;  # Set found flag to true
+                        break;  # Exit the loop once the function is found
+                    }
+                }
+            }
+        }
+    }
+}
+END {
+    for (c in count) {
+        printf "%d %s\n", count[c], c;  # Print the count and function name formatted
+    }
+    print total;  # Print the total count alone
+}' $file_paths)  # Note the removal of quotes around "$file_paths" to handle multiple paths
+
+# Capture the function details and total count into separate variables
+function_details=$(printf "%s\n" "${output[@]::${#output[@]}-1}")  # Join array elements with newlines
+total_count="${output[-1]}"  # The last element
+
+# Output or use the variables as needed
+#echo "Function Details:"
+echo "$function_details"
+#echo "Total Count:"
+#echo "$total_count"