From cdf8ea13893cd43deb096b6b13fbc8bc138836b6 Mon Sep 17 00:00:00 2001 From: wenzhouwww Date: Sun, 30 Oct 2022 18:55:22 +0800 Subject: [PATCH 01/20] test: enh crash_gen function and let it run --- tests/pytest/auto_crash_gen.py | 365 ++++++++++++++++ tests/pytest/auto_crash_gen_valgrind.py | 399 ++++++++++++++++++ .../pytest/auto_crash_gen_valgrind_cluster.py | 399 ++++++++++++++++++ tests/pytest/auto_run_regular.sh | 11 + tests/pytest/auto_run_valgrind.sh | 11 + tests/pytest/auto_run_valgrind_cluster.sh | 11 + 6 files changed, 1196 insertions(+) create mode 100755 tests/pytest/auto_crash_gen.py create mode 100755 tests/pytest/auto_crash_gen_valgrind.py create mode 100755 tests/pytest/auto_crash_gen_valgrind_cluster.py create mode 100755 tests/pytest/auto_run_regular.sh create mode 100755 tests/pytest/auto_run_valgrind.sh create mode 100755 tests/pytest/auto_run_valgrind_cluster.sh diff --git a/tests/pytest/auto_crash_gen.py b/tests/pytest/auto_crash_gen.py new file mode 100755 index 0000000000..02cca810a7 --- /dev/null +++ b/tests/pytest/auto_crash_gen.py @@ -0,0 +1,365 @@ +import os +import socket +import requests + +# -*- coding: utf-8 -*- +import os ,sys +import random +import argparse +import subprocess +import time +import platform + +# valgrind mode ? +valgrind_mode = False + +msg_dict = {0:"success" , 1:"failed" , 2:"other errors" , 3:"crash occured" , 4:"Invalid read/write" , 5:"memory leak" } + +# formal +hostname = socket.gethostname() + +group_url = 'https://open.feishu.cn/open-apis/bot/v2/hook/56c333b5-eae9-4c18-b0b6-7e4b7174f5c9' + +def get_msg(text): + return { + "msg_type": "post", + "content": { + "post": { + "zh_cn": { + "title": "Crash_gen Monitor", + "content": [ + [{ + "tag": "text", + "text": text + } + ]] + } + } + } + } + + +def send_msg(json): + headers = { + 'Content-Type': 'application/json' + } + + req = requests.post(url=group_url, headers=headers, json=json) + inf = req.json() + if "StatusCode" in inf and inf["StatusCode"] == 0: + pass + else: + print(inf) + + +# set path about run instance + +core_path = subprocess.Popen("cat /proc/sys/kernel/core_pattern", shell=True, stdout=subprocess.PIPE,stderr=subprocess.STDOUT).stdout.read().decode("utf-8") +core_path = "/".join(core_path.split("/")[:-1]) +print(" ======= core path is %s ======== " %core_path) +if not os.path.exists(core_path): + os.mkdir(core_path) + +base_dir = os.path.dirname(os.path.realpath(__file__)) +if base_dir.find("community")>0: + repo = "community" +elif base_dir.find("TDengine")>0: + repo = "TDengine" +else: + repo ="TDengine" +print("base_dir:",base_dir) +home_dir = base_dir[:base_dir.find(repo)] +print("home_dir:",home_dir) +run_dir = os.path.join(home_dir,'run_dir') +run_dir = os.path.abspath(run_dir) +print("run dir is *** :",run_dir) +if not os.path.exists(run_dir): + os.mkdir(run_dir) +run_log_file = run_dir+'/crash_gen_run.log' +crash_gen_cmds_file = os.path.join(run_dir, 'crash_gen_cmds.sh') +exit_status_logs = os.path.join(run_dir, 'crash_exit.log') + +def get_path(): + buildPath='' + selfPath = os.path.dirname(os.path.realpath(__file__)) + if ("community" in selfPath): + projPath = selfPath[:selfPath.find("community")] + else: + projPath = selfPath[:selfPath.find("tests")] + + for root, dirs, files in os.walk(projPath): + if ("taosd" in files): + rootRealPath = os.path.dirname(os.path.realpath(root)) + if ("packaging" not in rootRealPath): + buildPath = root[:len(root) - len("/build/bin")] + break + return buildPath + +# generate crash_gen start script randomly + +def random_args(args_list): + nums_args_list = ["--max-dbs","--num-replicas","--num-dnodes","--max-steps","--num-threads",] # record int type arguments + bools_args_list = ["--auto-start-service" , "--debug","--run-tdengine","--ignore-errors","--track-memory-leaks","--larger-data","--mix-oos-data","--dynamic-db-table-names", + "--per-thread-db-connection","--record-ops","--verify-data","--use-shadow-db","--continue-on-exception" + ] # record bool type arguments + strs_args_list = ["--connector-type"] # record str type arguments + + args_list["--auto-start-service"]= False + args_list["--continue-on-exception"]=True + # connect_types=['native','rest','mixed'] # restful interface has change ,we should trans dbnames to connection or change sql such as "db.test" + connect_types=['native'] + # args_list["--connector-type"]=connect_types[random.randint(0,2)] + args_list["--connector-type"]= connect_types[0] + args_list["--max-dbs"]= random.randint(1,10) + + # dnodes = [1,3] # set single dnodes; + + # args_list["--num-dnodes"]= random.sample(dnodes,1)[0] + # args_list["--num-replicas"]= random.randint(1,args_list["--num-dnodes"]) + args_list["--debug"]=False + args_list["--per-thread-db-connection"]=True + args_list["--track-memory-leaks"]=False + + args_list["--max-steps"]=random.randint(500,2000) + + # args_list["--ignore-errors"]=[] ## can add error codes for detail + + + args_list["--run-tdengine"]= False + args_list["--use-shadow-db"]= False + args_list["--dynamic-db-table-names"]= True + args_list["--verify-data"]= False + args_list["--record-ops"] = False + + for key in bools_args_list: + set_bool_value = [True,False] + if key == "--auto-start-service" : + continue + elif key =="--run-tdengine": + continue + elif key == "--ignore-errors": + continue + elif key == "--debug": + continue + elif key == "--per-thread-db-connection": + continue + elif key == "--continue-on-exception": + continue + elif key == "--use-shadow-db": + continue + elif key =="--track-memory-leaks": + continue + elif key == "--dynamic-db-table-names": + continue + elif key == "--verify-data": + continue + elif key == "--record-ops": + continue + else: + args_list[key]=set_bool_value[random.randint(0,1)] + + if args_list["--larger-data"]: + threads = [16,32] + else: + threads = [32,64,128,256] + args_list["--num-threads"]=random.sample(threads,1)[0] #$ debug + + return args_list + +def limits(args_list): + if args_list["--use-shadow-db"]==True: + if args_list["--max-dbs"] > 1: + print("Cannot combine use-shadow-db with max-dbs of more than 1 ,set max-dbs=1") + args_list["--max-dbs"]=1 + else: + pass + + # env is start by test frame , not crash_gen instance + + # elif args_list["--num-replicas"]==0: + # print(" make sure num-replicas is at least 1 ") + # args_list["--num-replicas"]=1 + # elif args_list["--num-replicas"]==1: + # pass + + # elif args_list["--num-replicas"]>1: + # if not args_list["--auto-start-service"]: + # print("it should be deployed by crash_gen auto-start-service for multi replicas") + + # else: + # pass + + return args_list + +def get_auto_mix_cmds(args_list ,valgrind=valgrind_mode): + build_path = get_path() + if repo == "community": + crash_gen_path = build_path[:-5]+"community/tests/pytest/" + elif repo == "TDengine": + crash_gen_path = build_path[:-5]+"/tests/pytest/" + else: + pass + + bools_args_list = ["--auto-start-service" , "--debug","--run-tdengine","--ignore-errors","--track-memory-leaks","--larger-data","--mix-oos-data","--dynamic-db-table-names", + "--per-thread-db-connection","--record-ops","--verify-data","--use-shadow-db","--continue-on-exception"] + arguments = "" + for k ,v in args_list.items(): + if k == "--ignore-errors": + if v: + arguments+=(k+"="+str(v)+" ") + else: + arguments+="" + elif k in bools_args_list and v==True: + arguments+=(k+" ") + elif k in bools_args_list and v==False: + arguments+="" + else: + arguments+=(k+"="+str(v)+" ") + + if valgrind : + + crash_gen_cmd = 'cd %s && ./crash_gen.sh --valgrind %s -g 0x32c,0x32d,0x3d3,0x18,0x2501,0x369,0x388,0x061a,0x2550 '%(crash_gen_path ,arguments) + + else: + + crash_gen_cmd = 'cd %s && ./crash_gen.sh %s -g 0x32c,0x32d,0x3d3,0x18,0x2501,0x369,0x388,0x061a,0x2550'%(crash_gen_path ,arguments) + + return crash_gen_cmd + +def start_taosd(): + build_path = get_path() + if repo == "community": + start_path = build_path[:-5]+"community/tests/system-test/" + elif repo == "TDengine": + start_path = build_path[:-5]+"/tests/system-test/" + else: + pass + + start_cmd = 'cd %s && python3 test.py >>/dev/null '%(start_path) + os.system(start_cmd) + +def get_cmds(args_list): + # build_path = get_path() + # if repo == "community": + # crash_gen_path = build_path[:-5]+"community/tests/pytest/" + # elif repo == "TDengine": + # crash_gen_path = build_path[:-5]+"/tests/pytest/" + # else: + # pass + + # crash_gen_cmd = 'cd %s && ./crash_gen.sh --valgrind -p -t 10 -s 1000 -g 0x32c,0x32d,0x3d3,0x18,0x2501,0x369,0x388,0x061a,0x2550 '%(crash_gen_path) + + crash_gen_cmd = get_auto_mix_cmds(args_list,valgrind=valgrind_mode) + return crash_gen_cmd + +def run_crash_gen(crash_cmds): + + # prepare env of taosd + start_taosd() + + build_path = get_path() + if repo == "community": + crash_gen_path = build_path[:-5]+"community/tests/pytest/" + elif repo == "TDengine": + crash_gen_path = build_path[:-5]+"/tests/pytest/" + else: + pass + result_file = os.path.join(crash_gen_path, 'valgrind.out') + + + # run crash_gen and back logs + os.system('echo "%s">>%s'%(crash_cmds,crash_gen_cmds_file)) + os.system("%s >>%s "%(crash_cmds,result_file)) + + +def check_status(): + build_path = get_path() + if repo == "community": + crash_gen_path = build_path[:-5]+"community/tests/pytest/" + elif repo == "TDengine": + crash_gen_path = build_path[:-5]+"/tests/pytest/" + else: + pass + result_file = os.path.join(crash_gen_path, 'valgrind.out') + run_code = subprocess.Popen("tail -n 50 %s"%result_file, shell=True, stdout=subprocess.PIPE,stderr=subprocess.STDOUT).stdout.read().decode("utf-8") + os.system("tail -n 50 %s>>%s"%(result_file,exit_status_logs)) + + core_check = subprocess.Popen('ls -l %s | grep "^-" | wc -l'%core_path, shell=True, stdout=subprocess.PIPE,stderr=subprocess.STDOUT).stdout.read().decode("utf-8") + + if int(core_check.strip().rstrip()) > 0: + # it means core files has occured + return 3 + + if "Crash_Gen is now exiting with status code: 1" in run_code: + return 1 + elif "Crash_Gen is now exiting with status code: 0" in run_code: + return 0 + else: + return 2 + + +def main(): + + args_list = {"--auto-start-service":False ,"--max-dbs":0,"--connector-type":"native","--debug":False,"--run-tdengine":False,"--ignore-errors":[], + "--track-memory-leaks":False , "--larger-data":False, "--mix-oos-data":False, "--dynamic-db-table-names":False, + "--per-thread-db-connection":False , "--record-ops":False , "--max-steps":100, "--num-threads":10, "--verify-data":False,"--use-shadow-db":False , + "--continue-on-exception":False } + + args = random_args(args_list) + args = limits(args) + + + build_path = get_path() + os.system("pip install git+https://github.com/taosdata/taos-connector-python.git") + if repo =="community": + crash_gen_path = build_path[:-5]+"community/tests/pytest/" + elif repo =="TDengine": + crash_gen_path = build_path[:-5]+"/tests/pytest/" + else: + pass + + if os.path.exists(crash_gen_path+"crash_gen.sh"): + print(" make sure crash_gen.sh is ready") + else: + print( " crash_gen.sh is not exists ") + sys.exit(1) + + git_commit = subprocess.Popen("cd %s && git log | head -n1"%crash_gen_path, shell=True, stdout=subprocess.PIPE,stderr=subprocess.STDOUT).stdout.read().decode("utf-8")[8:16] + + # crash_cmds = get_cmds() + + crash_cmds = get_cmds(args) + # clean run_dir + os.system('rm -rf %s'%run_dir ) + if not os.path.exists(run_dir): + os.mkdir(run_dir) + print(crash_cmds) + run_crash_gen(crash_cmds) + status = check_status() + + print("exit status : ", status) + + if status ==4: + print('======== crash_gen found memory bugs ========') + if status ==5: + print('======== crash_gen found memory errors ========') + if status >0: + print('======== crash_gen run failed and not exit as expected ========') + else: + print('======== crash_gen run sucess and exit as expected ========') + + + if status!=0 : + + try: + text = f"crash_gen instance exit status of docker [ {hostname} ] is : {msg_dict[status]}\n " + f" and git commit : {git_commit}" + send_msg(get_msg(text)) + except Exception as e: + print("exception:", e) + exit(status) + + +if __name__ == '__main__': + main() + + diff --git a/tests/pytest/auto_crash_gen_valgrind.py b/tests/pytest/auto_crash_gen_valgrind.py new file mode 100755 index 0000000000..1443dcd543 --- /dev/null +++ b/tests/pytest/auto_crash_gen_valgrind.py @@ -0,0 +1,399 @@ +#!/usr/bin/python3 + + +import os +import socket +import requests + +# -*- coding: utf-8 -*- +import os ,sys +import random +import argparse +import subprocess +import time +import platform + +# valgrind mode ? +valgrind_mode = True + +msg_dict = {0:"success" , 1:"failed" , 2:"other errors" , 3:"crash occured" , 4:"Invalid read/write" , 5:"memory leak" } + +# formal +hostname = socket.gethostname() + +group_url = 'https://open.feishu.cn/open-apis/bot/v2/hook/56c333b5-eae9-4c18-b0b6-7e4b7174f5c9' + +def get_msg(text): + return { + "msg_type": "post", + "content": { + "post": { + "zh_cn": { + "title": "Crash_gen Monitor", + "content": [ + [{ + "tag": "text", + "text": text + } + ]] + } + } + } + } + + +def send_msg(json): + headers = { + 'Content-Type': 'application/json' + } + + req = requests.post(url=group_url, headers=headers, json=json) + inf = req.json() + if "StatusCode" in inf and inf["StatusCode"] == 0: + pass + else: + print(inf) + + +# set path about run instance + +core_path = subprocess.Popen("cat /proc/sys/kernel/core_pattern", shell=True, stdout=subprocess.PIPE,stderr=subprocess.STDOUT).stdout.read().decode("utf-8") +core_path = "/".join(core_path.split("/")[:-1]) +print(" ======= core path is %s ======== " %core_path) +if not os.path.exists(core_path): + os.mkdir(core_path) + +base_dir = os.path.dirname(os.path.realpath(__file__)) +if base_dir.find("community")>0: + repo = "community" +elif base_dir.find("TDengine")>0: + repo = "TDengine" +else: + repo ="TDengine" +print("base_dir:",base_dir) +home_dir = base_dir[:base_dir.find(repo)] +print("home_dir:",home_dir) +run_dir = os.path.join(home_dir,'run_dir') +run_dir = os.path.abspath(run_dir) +print("run dir is *** :",run_dir) +if not os.path.exists(run_dir): + os.mkdir(run_dir) +run_log_file = run_dir+'/crash_gen_run.log' +crash_gen_cmds_file = os.path.join(run_dir, 'crash_gen_cmds.sh') +exit_status_logs = os.path.join(run_dir, 'crash_exit.log') + +def get_path(): + buildPath='' + selfPath = os.path.dirname(os.path.realpath(__file__)) + if ("community" in selfPath): + projPath = selfPath[:selfPath.find("community")] + else: + projPath = selfPath[:selfPath.find("tests")] + + for root, dirs, files in os.walk(projPath): + if ("taosd" in files): + rootRealPath = os.path.dirname(os.path.realpath(root)) + if ("packaging" not in rootRealPath): + buildPath = root[:len(root) - len("/build/bin")] + break + return buildPath + +# generate crash_gen start script randomly + +def random_args(args_list): + nums_args_list = ["--max-dbs","--num-replicas","--num-dnodes","--max-steps","--num-threads",] # record int type arguments + bools_args_list = ["--auto-start-service" , "--debug","--run-tdengine","--ignore-errors","--track-memory-leaks","--larger-data","--mix-oos-data","--dynamic-db-table-names", + "--per-thread-db-connection","--record-ops","--verify-data","--use-shadow-db","--continue-on-exception" + ] # record bool type arguments + strs_args_list = ["--connector-type"] # record str type arguments + + args_list["--auto-start-service"]= False + args_list["--continue-on-exception"]=True + # connect_types=['native','rest','mixed'] # restful interface has change ,we should trans dbnames to connection or change sql such as "db.test" + connect_types=['native'] + # args_list["--connector-type"]=connect_types[random.randint(0,2)] + args_list["--connector-type"]= connect_types[0] + args_list["--max-dbs"]= random.randint(1,10) + + # dnodes = [1,3] # set single dnodes; + + # args_list["--num-dnodes"]= random.sample(dnodes,1)[0] + # args_list["--num-replicas"]= random.randint(1,args_list["--num-dnodes"]) + args_list["--debug"]=False + args_list["--per-thread-db-connection"]=True + args_list["--track-memory-leaks"]=False + + args_list["--max-steps"]=random.randint(200,500) + + threads = [16,32] + + args_list["--num-threads"]=random.sample(threads,1)[0] #$ debug + # args_list["--ignore-errors"]=[] ## can add error codes for detail + + + args_list["--run-tdengine"]= False + args_list["--use-shadow-db"]= False + args_list["--dynamic-db-table-names"]= True + args_list["--verify-data"]= False + args_list["--record-ops"] = False + + for key in bools_args_list: + set_bool_value = [True,False] + if key == "--auto-start-service" : + continue + elif key =="--run-tdengine": + continue + elif key == "--ignore-errors": + continue + elif key == "--debug": + continue + elif key == "--per-thread-db-connection": + continue + elif key == "--continue-on-exception": + continue + elif key == "--use-shadow-db": + continue + elif key =="--track-memory-leaks": + continue + elif key == "--dynamic-db-table-names": + continue + elif key == "--verify-data": + continue + elif key == "--record-ops": + continue + elif key == "--larger-data": + continue + else: + args_list[key]=set_bool_value[random.randint(0,1)] + return args_list + +def limits(args_list): + if args_list["--use-shadow-db"]==True: + if args_list["--max-dbs"] > 1: + print("Cannot combine use-shadow-db with max-dbs of more than 1 ,set max-dbs=1") + args_list["--max-dbs"]=1 + else: + pass + + # env is start by test frame , not crash_gen instance + + # elif args_list["--num-replicas"]==0: + # print(" make sure num-replicas is at least 1 ") + # args_list["--num-replicas"]=1 + # elif args_list["--num-replicas"]==1: + # pass + + # elif args_list["--num-replicas"]>1: + # if not args_list["--auto-start-service"]: + # print("it should be deployed by crash_gen auto-start-service for multi replicas") + + # else: + # pass + + return args_list + +def get_auto_mix_cmds(args_list ,valgrind=valgrind_mode): + build_path = get_path() + if repo == "community": + crash_gen_path = build_path[:-5]+"community/tests/pytest/" + elif repo == "TDengine": + crash_gen_path = build_path[:-5]+"/tests/pytest/" + else: + pass + + bools_args_list = ["--auto-start-service" , "--debug","--run-tdengine","--ignore-errors","--track-memory-leaks","--larger-data","--mix-oos-data","--dynamic-db-table-names", + "--per-thread-db-connection","--record-ops","--verify-data","--use-shadow-db","--continue-on-exception"] + arguments = "" + for k ,v in args_list.items(): + if k == "--ignore-errors": + if v: + arguments+=(k+"="+str(v)+" ") + else: + arguments+="" + elif k in bools_args_list and v==True: + arguments+=(k+" ") + elif k in bools_args_list and v==False: + arguments+="" + else: + arguments+=(k+"="+str(v)+" ") + + if valgrind : + + crash_gen_cmd = 'cd %s && ./crash_gen.sh --valgrind %s -g 0x32c,0x32d,0x3d3,0x18,0x2501,0x369,0x388,0x061a,0x2550 '%(crash_gen_path ,arguments) + + else: + + crash_gen_cmd = 'cd %s && ./crash_gen.sh %s -g 0x32c,0x32d,0x3d3,0x18,0x2501,0x369,0x388,0x061a,0x2550'%(crash_gen_path ,arguments) + + return crash_gen_cmd + + +def start_taosd(): + build_path = get_path() + if repo == "community": + start_path = build_path[:-5]+"community/tests/system-test/" + elif repo == "TDengine": + start_path = build_path[:-5]+"/tests/system-test/" + else: + pass + + start_cmd = 'cd %s && python3 test.py '%(start_path) + os.system(start_cmd +">>/dev/null") + +def get_cmds(args_list): + # build_path = get_path() + # if repo == "community": + # crash_gen_path = build_path[:-5]+"community/tests/pytest/" + # elif repo == "TDengine": + # crash_gen_path = build_path[:-5]+"/tests/pytest/" + # else: + # pass + + # crash_gen_cmd = 'cd %s && ./crash_gen.sh --valgrind -p -t 10 -s 1000 -g 0x32c,0x32d,0x3d3,0x18,0x2501,0x369,0x388,0x061a,0x2550 '%(crash_gen_path) + + crash_gen_cmd = get_auto_mix_cmds(args_list,valgrind=valgrind_mode) + return crash_gen_cmd + +def run_crash_gen(crash_cmds): + + # prepare env of taosd + start_taosd() + # run crash_gen and back logs + os.system('echo "%s">>%s'%(crash_cmds,crash_gen_cmds_file)) + # os.system("cp %s %s"%(crash_gen_cmds_file, core_path)) + os.system("%s"%(crash_cmds)) + +def check_status(): + build_path = get_path() + if repo == "community": + crash_gen_path = build_path[:-5]+"community/tests/pytest/" + elif repo == "TDengine": + crash_gen_path = build_path[:-5]+"/tests/pytest/" + else: + pass + result_file = os.path.join(crash_gen_path, 'valgrind.out') + run_code = subprocess.Popen("tail -n 50 %s"%result_file, shell=True, stdout=subprocess.PIPE,stderr=subprocess.STDOUT).stdout.read().decode("utf-8") + os.system("tail -n 50 %s>>%s"%(result_file,exit_status_logs)) + + core_check = subprocess.Popen('ls -l %s | grep "^-" | wc -l'%core_path, shell=True, stdout=subprocess.PIPE,stderr=subprocess.STDOUT).stdout.read().decode("utf-8") + + if int(core_check.strip().rstrip()) > 0: + # it means core files has occured + return 3 + + mem_status = check_memory() + if mem_status >0: + return mem_status + if "Crash_Gen is now exiting with status code: 1" in run_code: + return 1 + elif "Crash_Gen is now exiting with status code: 0" in run_code: + return 0 + else: + return 2 + + +def check_memory(): + + build_path = get_path() + if repo == "community": + crash_gen_path = build_path[:-5]+"community/tests/pytest/" + elif repo == "TDengine": + crash_gen_path = build_path[:-5]+"/tests/pytest/" + else: + pass + ''' + invalid read, invalid write + ''' + back_path = os.path.join(core_path,"valgrind_report") + if not os.path.exists(back_path): + os.mkdir(back_path) + + stderr_file = os.path.join(crash_gen_path , "valgrind.err") + + status = 0 + + grep_res = subprocess.Popen("grep -i 'Invalid read' %s "%stderr_file , shell=True, stdout=subprocess.PIPE,stderr=subprocess.STDOUT).stdout.read().decode("utf-8") + + if grep_res: + # os.system("cp %s %s"%(stderr_file , back_path)) + status = 4 + + grep_res = subprocess.Popen("grep -i 'Invalid write' %s "%stderr_file , shell=True, stdout=subprocess.PIPE,stderr=subprocess.STDOUT).stdout.read().decode("utf-8") + + if grep_res: + # os.system("cp %s %s"%(stderr_file , back_path)) + status = 4 + + grep_res = subprocess.Popen("grep -i 'taosMemoryMalloc' %s "%stderr_file , shell=True, stdout=subprocess.PIPE,stderr=subprocess.STDOUT).stdout.read().decode("utf-8") + + if grep_res: + # os.system("cp %s %s"%(stderr_file , back_path)) + status = 5 + + return status + +def main(): + + args_list = {"--auto-start-service":False ,"--max-dbs":0,"--connector-type":"native","--debug":False,"--run-tdengine":False,"--ignore-errors":[], + "--track-memory-leaks":False , "--larger-data":False, "--mix-oos-data":False, "--dynamic-db-table-names":False, + "--per-thread-db-connection":False , "--record-ops":False , "--max-steps":100, "--num-threads":10, "--verify-data":False,"--use-shadow-db":False , + "--continue-on-exception":False } + + args = random_args(args_list) + args = limits(args) + + build_path = get_path() + os.system("pip install git+https://github.com/taosdata/taos-connector-python.git >>/dev/null") + if repo =="community": + crash_gen_path = build_path[:-5]+"community/tests/pytest/" + elif repo =="TDengine": + crash_gen_path = build_path[:-5]+"/tests/pytest/" + else: + pass + + if os.path.exists(crash_gen_path+"crash_gen.sh"): + print(" make sure crash_gen.sh is ready") + else: + print( " crash_gen.sh is not exists ") + sys.exit(1) + + git_commit = subprocess.Popen("cd %s && git log | head -n1"%crash_gen_path, shell=True, stdout=subprocess.PIPE,stderr=subprocess.STDOUT).stdout.read().decode("utf-8")[8:16] + + # crash_cmds = get_cmds() + + crash_cmds = get_cmds(args) + + # clean run_dir + os.system('rm -rf %s'%run_dir ) + if not os.path.exists(run_dir): + os.mkdir(run_dir) + print(crash_cmds) + run_crash_gen(crash_cmds) + status = check_status() + # back_path = os.path.join(core_path,"valgrind_report") + + print("exit status : ", status) + + if status ==4: + print('======== crash_gen found memory bugs ========') + if status ==5: + print('======== crash_gen found memory errors ========') + if status >0: + print('======== crash_gen run failed and not exit as expected ========') + else: + print('======== crash_gen run sucess and exit as expected ========') + + if status!=0 : + + try: + text = f"crash_gen instance exit status of docker [ {hostname} ] is : {msg_dict[status]}\n " + f" and git commit : {git_commit}" + send_msg(get_msg(text)) + except Exception as e: + print("exception:", e) + exit(status) + + +if __name__ == '__main__': + main() + + diff --git a/tests/pytest/auto_crash_gen_valgrind_cluster.py b/tests/pytest/auto_crash_gen_valgrind_cluster.py new file mode 100755 index 0000000000..05cdaa6cc5 --- /dev/null +++ b/tests/pytest/auto_crash_gen_valgrind_cluster.py @@ -0,0 +1,399 @@ +#!/usr/bin/python3 + + +import os +import socket +import requests + +# -*- coding: utf-8 -*- +import os ,sys +import random +import argparse +import subprocess +import time +import platform + +# valgrind mode ? +valgrind_mode = True + +msg_dict = {0:"success" , 1:"failed" , 2:"other errors" , 3:"crash occured" , 4:"Invalid read/write" , 5:"memory leak" } + +# formal +hostname = socket.gethostname() + +group_url = 'https://open.feishu.cn/open-apis/bot/v2/hook/56c333b5-eae9-4c18-b0b6-7e4b7174f5c9' + +def get_msg(text): + return { + "msg_type": "post", + "content": { + "post": { + "zh_cn": { + "title": "Crash_gen Monitor", + "content": [ + [{ + "tag": "text", + "text": text + } + ]] + } + } + } + } + + +def send_msg(json): + headers = { + 'Content-Type': 'application/json' + } + + req = requests.post(url=group_url, headers=headers, json=json) + inf = req.json() + if "StatusCode" in inf and inf["StatusCode"] == 0: + pass + else: + print(inf) + + +# set path about run instance + +core_path = subprocess.Popen("cat /proc/sys/kernel/core_pattern", shell=True, stdout=subprocess.PIPE,stderr=subprocess.STDOUT).stdout.read().decode("utf-8") +core_path = "/".join(core_path.split("/")[:-1]) +print(" ======= core path is %s ======== " %core_path) +if not os.path.exists(core_path): + os.mkdir(core_path) + +base_dir = os.path.dirname(os.path.realpath(__file__)) +if base_dir.find("community")>0: + repo = "community" +elif base_dir.find("TDengine")>0: + repo = "TDengine" +else: + repo ="TDengine" +print("base_dir:",base_dir) +home_dir = base_dir[:base_dir.find(repo)] +print("home_dir:",home_dir) +run_dir = os.path.join(home_dir,'run_dir') +run_dir = os.path.abspath(run_dir) +print("run dir is *** :",run_dir) +if not os.path.exists(run_dir): + os.mkdir(run_dir) +run_log_file = run_dir+'/crash_gen_run.log' +crash_gen_cmds_file = os.path.join(run_dir, 'crash_gen_cmds.sh') +exit_status_logs = os.path.join(run_dir, 'crash_exit.log') + +def get_path(): + buildPath='' + selfPath = os.path.dirname(os.path.realpath(__file__)) + if ("community" in selfPath): + projPath = selfPath[:selfPath.find("community")] + else: + projPath = selfPath[:selfPath.find("tests")] + + for root, dirs, files in os.walk(projPath): + if ("taosd" in files): + rootRealPath = os.path.dirname(os.path.realpath(root)) + if ("packaging" not in rootRealPath): + buildPath = root[:len(root) - len("/build/bin")] + break + return buildPath + +# generate crash_gen start script randomly + +def random_args(args_list): + nums_args_list = ["--max-dbs","--num-replicas","--num-dnodes","--max-steps","--num-threads",] # record int type arguments + bools_args_list = ["--auto-start-service" , "--debug","--run-tdengine","--ignore-errors","--track-memory-leaks","--larger-data","--mix-oos-data","--dynamic-db-table-names", + "--per-thread-db-connection","--record-ops","--verify-data","--use-shadow-db","--continue-on-exception" + ] # record bool type arguments + strs_args_list = ["--connector-type"] # record str type arguments + + args_list["--auto-start-service"]= False + args_list["--continue-on-exception"]=True + # connect_types=['native','rest','mixed'] # restful interface has change ,we should trans dbnames to connection or change sql such as "db.test" + connect_types=['native'] + # args_list["--connector-type"]=connect_types[random.randint(0,2)] + args_list["--connector-type"]= connect_types[0] + args_list["--max-dbs"]= random.randint(1,10) + + # dnodes = [1,3] # set single dnodes; + + # args_list["--num-dnodes"]= random.sample(dnodes,1)[0] + # args_list["--num-replicas"]= random.randint(1,args_list["--num-dnodes"]) + args_list["--debug"]=False + args_list["--per-thread-db-connection"]=True + args_list["--track-memory-leaks"]=False + + args_list["--max-steps"]=random.randint(200,500) + + threads = [16,32] + + args_list["--num-threads"]=random.sample(threads,1)[0] #$ debug + # args_list["--ignore-errors"]=[] ## can add error codes for detail + + + args_list["--run-tdengine"]= False + args_list["--use-shadow-db"]= False + args_list["--dynamic-db-table-names"]= True + args_list["--verify-data"]= False + args_list["--record-ops"] = False + + for key in bools_args_list: + set_bool_value = [True,False] + if key == "--auto-start-service" : + continue + elif key =="--run-tdengine": + continue + elif key == "--ignore-errors": + continue + elif key == "--debug": + continue + elif key == "--per-thread-db-connection": + continue + elif key == "--continue-on-exception": + continue + elif key == "--use-shadow-db": + continue + elif key =="--track-memory-leaks": + continue + elif key == "--dynamic-db-table-names": + continue + elif key == "--verify-data": + continue + elif key == "--record-ops": + continue + elif key == "--larger-data": + continue + else: + args_list[key]=set_bool_value[random.randint(0,1)] + return args_list + +def limits(args_list): + if args_list["--use-shadow-db"]==True: + if args_list["--max-dbs"] > 1: + print("Cannot combine use-shadow-db with max-dbs of more than 1 ,set max-dbs=1") + args_list["--max-dbs"]=1 + else: + pass + + # env is start by test frame , not crash_gen instance + + # elif args_list["--num-replicas"]==0: + # print(" make sure num-replicas is at least 1 ") + # args_list["--num-replicas"]=1 + # elif args_list["--num-replicas"]==1: + # pass + + # elif args_list["--num-replicas"]>1: + # if not args_list["--auto-start-service"]: + # print("it should be deployed by crash_gen auto-start-service for multi replicas") + + # else: + # pass + + return args_list + +def get_auto_mix_cmds(args_list ,valgrind=valgrind_mode): + build_path = get_path() + if repo == "community": + crash_gen_path = build_path[:-5]+"community/tests/pytest/" + elif repo == "TDengine": + crash_gen_path = build_path[:-5]+"/tests/pytest/" + else: + pass + + bools_args_list = ["--auto-start-service" , "--debug","--run-tdengine","--ignore-errors","--track-memory-leaks","--larger-data","--mix-oos-data","--dynamic-db-table-names", + "--per-thread-db-connection","--record-ops","--verify-data","--use-shadow-db","--continue-on-exception"] + arguments = "" + for k ,v in args_list.items(): + if k == "--ignore-errors": + if v: + arguments+=(k+"="+str(v)+" ") + else: + arguments+="" + elif k in bools_args_list and v==True: + arguments+=(k+" ") + elif k in bools_args_list and v==False: + arguments+="" + else: + arguments+=(k+"="+str(v)+" ") + + if valgrind : + + crash_gen_cmd = 'cd %s && ./crash_gen.sh --valgrind -i 3 %s -g 0x32c,0x32d,0x3d3,0x18,0x2501,0x369,0x388,0x061a,0x2550,0x0707 '%(crash_gen_path ,arguments) + + else: + + crash_gen_cmd = 'cd %s && ./crash_gen.sh -i 3 %s -g 0x32c,0x32d,0x3d3,0x18,0x2501,0x369,0x388,0x061a,0x2550,0x0014,0x0707'%(crash_gen_path ,arguments) + + return crash_gen_cmd + + +def start_taosd(): + build_path = get_path() + if repo == "community": + start_path = build_path[:-5]+"community/tests/system-test/" + elif repo == "TDengine": + start_path = build_path[:-5]+"/tests/system-test/" + else: + pass + + start_cmd = 'cd %s && python3 test.py -N 4 -M 1 '%(start_path) + os.system(start_cmd +">>/dev/null") + +def get_cmds(args_list): + # build_path = get_path() + # if repo == "community": + # crash_gen_path = build_path[:-5]+"community/tests/pytest/" + # elif repo == "TDengine": + # crash_gen_path = build_path[:-5]+"/tests/pytest/" + # else: + # pass + + # crash_gen_cmd = 'cd %s && ./crash_gen.sh --valgrind -p -t 10 -s 1000 -g 0x32c,0x32d,0x3d3,0x18,0x2501,0x369,0x388,0x061a,0x2550 '%(crash_gen_path) + + crash_gen_cmd = get_auto_mix_cmds(args_list,valgrind=valgrind_mode) + return crash_gen_cmd + +def run_crash_gen(crash_cmds): + + # prepare env of taosd + start_taosd() + # run crash_gen and back logs + os.system('echo "%s">>%s'%(crash_cmds,crash_gen_cmds_file)) + # os.system("cp %s %s"%(crash_gen_cmds_file, core_path)) + os.system("%s"%(crash_cmds)) + +def check_status(): + build_path = get_path() + if repo == "community": + crash_gen_path = build_path[:-5]+"community/tests/pytest/" + elif repo == "TDengine": + crash_gen_path = build_path[:-5]+"/tests/pytest/" + else: + pass + result_file = os.path.join(crash_gen_path, 'valgrind.out') + run_code = subprocess.Popen("tail -n 50 %s"%result_file, shell=True, stdout=subprocess.PIPE,stderr=subprocess.STDOUT).stdout.read().decode("utf-8") + os.system("tail -n 50 %s>>%s"%(result_file,exit_status_logs)) + + core_check = subprocess.Popen('ls -l %s | grep "^-" | wc -l'%core_path, shell=True, stdout=subprocess.PIPE,stderr=subprocess.STDOUT).stdout.read().decode("utf-8") + + if int(core_check.strip().rstrip()) > 0: + # it means core files has occured + return 3 + + mem_status = check_memory() + if mem_status >0: + return mem_status + if "Crash_Gen is now exiting with status code: 1" in run_code: + return 1 + elif "Crash_Gen is now exiting with status code: 0" in run_code: + return 0 + else: + return 2 + + +def check_memory(): + + build_path = get_path() + if repo == "community": + crash_gen_path = build_path[:-5]+"community/tests/pytest/" + elif repo == "TDengine": + crash_gen_path = build_path[:-5]+"/tests/pytest/" + else: + pass + ''' + invalid read, invalid write + ''' + back_path = os.path.join(core_path,"valgrind_report") + if not os.path.exists(back_path): + os.mkdir(back_path) + + stderr_file = os.path.join(crash_gen_path , "valgrind.err") + + status = 0 + + grep_res = subprocess.Popen("grep -i 'Invalid read' %s "%stderr_file , shell=True, stdout=subprocess.PIPE,stderr=subprocess.STDOUT).stdout.read().decode("utf-8") + + if grep_res: + # os.system("cp %s %s"%(stderr_file , back_path)) + status = 4 + + grep_res = subprocess.Popen("grep -i 'Invalid write' %s "%stderr_file , shell=True, stdout=subprocess.PIPE,stderr=subprocess.STDOUT).stdout.read().decode("utf-8") + + if grep_res: + # os.system("cp %s %s"%(stderr_file , back_path)) + status = 4 + + grep_res = subprocess.Popen("grep -i 'taosMemoryMalloc' %s "%stderr_file , shell=True, stdout=subprocess.PIPE,stderr=subprocess.STDOUT).stdout.read().decode("utf-8") + + if grep_res: + # os.system("cp %s %s"%(stderr_file , back_path)) + status = 5 + + return status + +def main(): + + args_list = {"--auto-start-service":False ,"--max-dbs":0,"--connector-type":"native","--debug":False,"--run-tdengine":False,"--ignore-errors":[], + "--track-memory-leaks":False , "--larger-data":False, "--mix-oos-data":False, "--dynamic-db-table-names":False, + "--per-thread-db-connection":False , "--record-ops":False , "--max-steps":100, "--num-threads":10, "--verify-data":False,"--use-shadow-db":False , + "--continue-on-exception":False } + + args = random_args(args_list) + args = limits(args) + + build_path = get_path() + os.system("pip install git+https://github.com/taosdata/taos-connector-python.git >>/dev/null") + if repo =="community": + crash_gen_path = build_path[:-5]+"community/tests/pytest/" + elif repo =="TDengine": + crash_gen_path = build_path[:-5]+"/tests/pytest/" + else: + pass + + if os.path.exists(crash_gen_path+"crash_gen.sh"): + print(" make sure crash_gen.sh is ready") + else: + print( " crash_gen.sh is not exists ") + sys.exit(1) + + git_commit = subprocess.Popen("cd %s && git log | head -n1"%crash_gen_path, shell=True, stdout=subprocess.PIPE,stderr=subprocess.STDOUT).stdout.read().decode("utf-8")[8:16] + + # crash_cmds = get_cmds() + + crash_cmds = get_cmds(args) + + # clean run_dir + os.system('rm -rf %s'%run_dir ) + if not os.path.exists(run_dir): + os.mkdir(run_dir) + print(crash_cmds) + run_crash_gen(crash_cmds) + status = check_status() + # back_path = os.path.join(core_path,"valgrind_report") + + print("exit status : ", status) + + if status ==4: + print('======== crash_gen found memory bugs ========') + if status ==5: + print('======== crash_gen found memory errors ========') + if status >0: + print('======== crash_gen run failed and not exit as expected ========') + else: + print('======== crash_gen run sucess and exit as expected ========') + + if status!=0 : + + try: + text = f"crash_gen instance exit status of docker [ {hostname} ] is : {msg_dict[status]}\n " + f" and git commit : {git_commit}" + send_msg(get_msg(text)) + except Exception as e: + print("exception:", e) + exit(status) + + +if __name__ == '__main__': + main() + + diff --git a/tests/pytest/auto_run_regular.sh b/tests/pytest/auto_run_regular.sh new file mode 100755 index 0000000000..27e8013269 --- /dev/null +++ b/tests/pytest/auto_run_regular.sh @@ -0,0 +1,11 @@ +#!/bin/bash + +# set LD_LIBRARY_PATH +export PATH=$PATH:/home/TDengine/debug/build/bin +export LD_LIBRARY_PATH=/home/TDengine/debug/build/lib +ln -s /home/TDengine/debug/build/lib/libtaos.so /usr/lib/libtaos.so 2>/dev/null +ln -s /home/TDengine/debug/build/lib/libtaos.so /usr/lib/libtaos.so.1 2>/dev/null +ln -s /home/TDengine/include/client/taos.h /usr/include/taos.h 2>/dev/null + +# run crash_gen auto script +python3 /home/TDengine/tests/pytest/auto_crash_gen.py \ No newline at end of file diff --git a/tests/pytest/auto_run_valgrind.sh b/tests/pytest/auto_run_valgrind.sh new file mode 100755 index 0000000000..c7154e867c --- /dev/null +++ b/tests/pytest/auto_run_valgrind.sh @@ -0,0 +1,11 @@ +#!/bin/bash + +# set LD_LIBRARY_PATH +export PATH=$PATH:/home/TDengine/debug/build/bin +export LD_LIBRARY_PATH=/home/TDengine/debug/build/lib +ln -s /home/TDengine/debug/build/lib/libtaos.so /usr/lib/libtaos.so 2>/dev/null +ln -s /home/TDengine/debug/build/lib/libtaos.so /usr/lib/libtaos.so.1 2>/dev/null +ln -s /home/TDengine/include/client/taos.h /usr/include/taos.h 2>/dev/null + +# run crash_gen auto script +python3 /home/TDengine/tests/pytest/auto_crash_gen_valgrind.py \ No newline at end of file diff --git a/tests/pytest/auto_run_valgrind_cluster.sh b/tests/pytest/auto_run_valgrind_cluster.sh new file mode 100755 index 0000000000..62bc22e923 --- /dev/null +++ b/tests/pytest/auto_run_valgrind_cluster.sh @@ -0,0 +1,11 @@ +#!/bin/bash + +# set LD_LIBRARY_PATH +export PATH=$PATH:/home/TDengine/debug/build/bin +export LD_LIBRARY_PATH=/home/TDengine/debug/build/lib +ln -s /home/TDengine/debug/build/lib/libtaos.so /usr/lib/libtaos.so 2>/dev/null +ln -s /home/TDengine/debug/build/lib/libtaos.so /usr/lib/libtaos.so.1 2>/dev/null +ln -s /home/TDengine/include/client/taos.h /usr/include/taos.h 2>/dev/null + +# run crash_gen auto script +python3 /home/TDengine/tests/pytest/auto_crash_gen_valgrind_cluster.py \ No newline at end of file From 17a502f569353e8a7c2f66632f59502f50beb1bd Mon Sep 17 00:00:00 2001 From: "wenzhouwww@live.cn" Date: Mon, 7 Nov 2022 13:44:12 +0800 Subject: [PATCH 02/20] update --- tests/pytest/crash_gen.sh | 2 +- tests/pytest/crash_gen/crash_gen_main.py | 478 +++++++++++++++++++++-- 2 files changed, 456 insertions(+), 24 deletions(-) diff --git a/tests/pytest/crash_gen.sh b/tests/pytest/crash_gen.sh index 539314dea4..cc2941a52a 100755 --- a/tests/pytest/crash_gen.sh +++ b/tests/pytest/crash_gen.sh @@ -45,7 +45,7 @@ fi # Now getting ready to execute Python # The following is the default of our standard dev env (Ubuntu 20.04), modify/adjust at your own risk -PYTHON_EXEC=python3.8 +PYTHON_EXEC=python3 # First we need to set up a path for Python to find our own TAOS modules, so that "import" can work. # export PYTHONPATH=$(pwd)/../../src/connector/python:$(pwd) diff --git a/tests/pytest/crash_gen/crash_gen_main.py b/tests/pytest/crash_gen/crash_gen_main.py index 600c64b8e6..d8946780a2 100755 --- a/tests/pytest/crash_gen/crash_gen_main.py +++ b/tests/pytest/crash_gen/crash_gen_main.py @@ -254,7 +254,7 @@ class WorkerThread: class ThreadCoordinator: - WORKER_THREAD_TIMEOUT = 120 # Normal: 120 + WORKER_THREAD_TIMEOUT = 1200 # Normal: 120 def __init__(self, pool: ThreadPool, dbManager: DbManager): self._curStep = -1 # first step is 0 @@ -674,9 +674,12 @@ class AnyState: # only "under normal circumstances", as we may override it with the -b option CAN_DROP_DB = 2 CAN_CREATE_FIXED_SUPER_TABLE = 3 + CAN_CREATE_STREAM = 3 # super table must exists CAN_DROP_FIXED_SUPER_TABLE = 4 CAN_ADD_DATA = 5 + CAN_DROP_STREAM = 5 CAN_READ_DATA = 6 + CAN_DELETE_DATA = 6 def __init__(self): self._info = self.getInfo() @@ -727,12 +730,18 @@ class AnyState: return False return self._info[self.CAN_DROP_FIXED_SUPER_TABLE] + def canCreateStream(self): + return self._info[self.CAN_CREATE_STREAM] + def canAddData(self): return self._info[self.CAN_ADD_DATA] def canReadData(self): return self._info[self.CAN_READ_DATA] + def canDeleteData(self): + return self._info[self.CAN_DELETE_DATA] + def assertAtMostOneSuccess(self, tasks, cls): sCnt = 0 for task in tasks: @@ -921,7 +930,7 @@ class StateMechine: except taos.error.ProgrammingError as err: Logging.error("Failed to initialized state machine, cannot find current state: {}".format(err)) traceback.print_stack() - raise # re-throw + pass # re-throw # TODO: seems no lnoger used, remove? def getCurrentState(self): @@ -974,14 +983,21 @@ class StateMechine: # did not do this when openning connection, and this is NOT the worker # thread, which does this on their own dbc.use(dbName) + if not dbc.hasTables(): # no tables + Logging.debug("[STT] DB_ONLY found, between {} and {}".format(ts, time.time())) return StateDbOnly() # For sure we have tables, which means we must have the super table. # TODO: are we sure? + sTable = self._db.getFixedSuperTable() - if sTable.hasRegTables(dbc): # no regular tables + + + if sTable.hasRegTables(dbc): # no regular tables + # print("debug=====*\n"*100) Logging.debug("[STT] SUPER_TABLE_ONLY found, between {} and {}".format(ts, time.time())) + return StateSuperTableOnly() else: # has actual tables Logging.debug("[STT] HAS_DATA found, between {} and {}".format(ts, time.time())) @@ -1109,6 +1125,7 @@ class Database: return "fs_table" def getFixedSuperTable(self) -> TdSuperTable: + return TdSuperTable(self.getFixedSuperTableName(), self.getName()) # We aim to create a starting time tick, such that, whenever we run our test here once @@ -1342,6 +1359,11 @@ class Task(): 0x2603, # Table does not exist, replaced by 2662 below 0x260d, # Tags number not matched 0x2662, # Table does not exist #TODO: what about 2603 above? + 0x032C, # Object is creating + 0x032D, # Object is dropping + 0x03D3, # Conflict transaction not completed + 0x0707, # Query not ready , it always occur at replica 3 + 0x707, # Query not ready @@ -1638,9 +1660,12 @@ class TaskCreateDb(StateTransitionTask): # numReplica = Dice.throw(Settings.getConfig().max_replicas) + 1 # 1,2 ... N numReplica = Config.getConfig().num_replicas # fixed, always repStr = "replica {}".format(numReplica) - updatePostfix = "update 1" if Config.getConfig().verify_data else "" # allow update only when "verify data" is active + updatePostfix = "" if Config.getConfig().verify_data else "" # allow update only when "verify data" is active , 3.0 version default is update 1 + vg_nums = random.randint(1,8) + cache_model = Dice.choice(['none' , 'last_row' , 'last_value' , 'both']) + buffer = random.randint(3,128) dbName = self._db.getName() - self.execWtSql(wt, "create database {} {} {} ".format(dbName, repStr, updatePostfix ) ) + self.execWtSql(wt, "create database {} {} {} vgroups {} cachemodel '{}' buffer {} ".format(dbName, repStr, updatePostfix, vg_nums, cache_model,buffer ) ) if dbName == "db_0" and Config.getConfig().use_shadow_db: self.execWtSql(wt, "create database {} {} {} ".format("db_s", repStr, updatePostfix ) ) @@ -1657,6 +1682,69 @@ class TaskDropDb(StateTransitionTask): self.execWtSql(wt, "drop database {}".format(self._db.getName())) Logging.debug("[OPS] database dropped at {}".format(time.time())) +class TaskCreateStream(StateTransitionTask): + + @classmethod + def getEndState(cls): + return StateHasData() + + @classmethod + def canBeginFrom(cls, state: AnyState): + return state.canCreateStream() + + def _executeInternal(self, te: TaskExecutor, wt: WorkerThread): + dbname = self._db.getName() + + sub_stream_name = dbname+ '_sub_stream' + sub_stream_tb_name = 'avg_sub' + super_stream_name = dbname+ '_super_stream' + super_stream_tb_name = 'avg_super' + if not self._db.exists(wt.getDbConn()): + Logging.debug("Skipping task, no DB yet") + return + + sTable = self._db.getFixedSuperTable() # type: TdSuperTable + # wt.execSql("use db") # should always be in place + + # create stream + ''' + CREATE STREAM avg_vol_s INTO avg_vol AS SELECT _wstartts, count(*), avg(voltage) FROM meters PARTITION BY tbname INTERVAL(1m) SLIDING(30s); + ''' + stbname =sTable.getName() + sub_tables = sTable.getRegTables(wt.getDbConn()) + if sub_tables: + + if sub_tables: # if not empty + sub_tbname = sub_tables[0] + + # create stream with query above sub_table + stream_sql = 'create stream {} into {}.{} as select count(*), avg(speed) FROM {}.{} PARTITION BY tbname INTERVAL(5s) SLIDING(3s) '.format(sub_stream_name,dbname,sub_stream_tb_name ,dbname,sub_tbname) + try: + self.execWtSql(wt, stream_sql) + except taos.error.ProgrammingError as err: + errno = Helper.convertErrno(err.errno) + if errno in [0x03f0]: # stream already exists + # stream need drop before drop table + pass + + sTable.setStreamName(sub_stream_name) + else: + pass + + else: + stream_sql = 'create stream {} into {}.{} as select count(*), avg(speed) FROM {}.{} PARTITION BY tbname INTERVAL(5s) SLIDING(3s) '.format(super_stream_name,dbname,super_stream_tb_name,dbname,stbname) + + try: + self.execWtSql(wt, stream_sql) + except taos.error.ProgrammingError as err: + errno = Helper.convertErrno(err.errno) + if errno in [0x03f0]: # stream already exists + # stream need drop before drop table + pass + + + + class TaskCreateSuperTable(StateTransitionTask): @classmethod def getEndState(cls): @@ -1688,15 +1776,40 @@ class TdSuperTable: def __init__(self, stName, dbName): self._stName = stName self._dbName = dbName + self._streamName = [] def getName(self): return self._stName + def setStreamName(self,name): + self._streamName.append(name) + + def getStreamName(self): + return self._streamName + def drop(self, dbc, skipCheck = False): dbName = self._dbName if self.exists(dbc) : # if myself exists - fullTableName = dbName + '.' + self._stName - dbc.execute("DROP TABLE {}".format(fullTableName)) + fullTableName = dbName + '.' + self._stName + try: + dbc.execute("DROP TABLE {}".format(fullTableName)) + except taos.error.ProgrammingError as err: + errno = Helper.convertErrno(err.errno) + if errno in [1011,0x3F3,0x03f3,0x2662]: # table doesn't exist + pass + # # stream need drop before drop table + # for stream in self.getStreamName(): + # drop_stream_sql = 'drop stream {}'.format(stream) + # try: + # dbc.execute(drop_stream_sql) + # except taos.error.ProgrammingError as err: + # # correcting for strange error number scheme + # errno3 = Helper.convertErrno(err.errno) + # if errno3 in [1011,0x3F3,0x03f3,0x2662,0x03f1]: # stream not exists + # pass + # dbc.execute("DROP TABLE {}".format(fullTableName)) + # pass + else: if not skipCheck: raise CrashGenError("Cannot drop non-existant super table: {}".format(self._stName)) @@ -1711,10 +1824,17 @@ class TdSuperTable: dbName = self._dbName dbc.execute("USE " + dbName) - fullTableName = dbName + '.' + self._stName + fullTableName = dbName + '.' + self._stName + if dbc.existsSuperTable(self._stName): if dropIfExists: + dbc.execute("DROP TABLE {}".format(fullTableName)) + + pass + + + # dbc.execute("DROP TABLE {}".format(fullTableName)) else: # error raise CrashGenError("Cannot create super table, already exists: {}".format(self._stName)) @@ -1733,7 +1853,7 @@ class TdSuperTable: def getRegTables(self, dbc: DbConn): dbName = self._dbName try: - dbc.query("select TBNAME from {}.{}".format(dbName, self._stName)) # TODO: analyze result set later + dbc.query("select distinct TBNAME from {}.{}".format(dbName, self._stName)) # TODO: analyze result set later except taos.error.ProgrammingError as err: errno2 = Helper.convertErrno(err.errno) Logging.debug("[=] Failed to get tables from super table: errno=0x{:X}, msg: {}".format(errno2, err)) @@ -1743,7 +1863,44 @@ class TdSuperTable: return [v[0] for v in qr] # list transformation, ref: https://stackoverflow.com/questions/643823/python-list-transformation def hasRegTables(self, dbc: DbConn): - return dbc.query("SELECT * FROM {}.{}".format(self._dbName, self._stName)) > 0 + # print(self._stName) + # dbc.query("SELECT * FROM {}.{}".format(self._dbName, self._stName)) + # print(dbc.getQueryResult()) + if self.hasStreamTables(dbc) or self.hasStreams(dbc): + if self.dropStreams(dbc): + self.dropStreamTables(dbc) + if dbc.existsSuperTable(self._stName): + + return dbc.query("SELECT * FROM {}.{}".format(self._dbName, self._stName)) > 0 + else: + return False + + def hasStreamTables(self,dbc: DbConn): + + return dbc.query("show {}.stables like 'avg%'".format(self._dbName)) > 0 + + def hasStreams(self,dbc: DbConn): + return dbc.query("show streams") > 0 + + def dropStreams(self,dbc:DbConn): + dbc.query("show streams ") + Streams = dbc.getQueryResult() + for Stream in Streams: + if Stream[0].startswith(self._dbName): + dbc.execute('drop stream {}'.format(Stream[0])) + + return not dbc.query("show streams ") > 0 + + def dropStreamTables(self, dbc: DbConn): + dbc.query("show {}.stables like 'avg%'".format(self._dbName)) + + StreamTables = dbc.getQueryResult() + + for StreamTable in StreamTables: + if self.dropStreams: + dbc.execute('drop table {}.{}'.format(self._dbName,StreamTable[0])) + + return not dbc.query("show {}.stables like 'avg%'".format(self._dbName)) def ensureRegTable(self, task: Optional[Task], dbc: DbConn, regTableName: str): ''' @@ -1838,10 +1995,46 @@ class TdSuperTable: # Run the query against the regular table first doAggr = (Dice.throw(2) == 0) # 1 in 2 chance if not doAggr: # don't do aggregate query, just simple one + commonExpr = Dice.choice([ + '*', + # 'abs(speed)', + # 'acos(speed)', + # 'asin(speed)', + # 'atan(speed)', + # 'ceil(speed)', + # 'cos(speed)', + # 'cos(speed)', + # 'floor(speed)', + # 'log(speed,2)', + # 'pow(speed,2)', + # 'round(speed)', + # 'sin(speed)', + # 'sqrt(speed)', + # 'char_length(color)', + # 'concat(color,color)', + # 'concat_ws(" ", color,color," ")', + # 'length(color)', + # 'lower(color)', + # 'ltrim(color)', + # 'substr(color , 2)', + # 'upper(color)', + # 'cast(speed as double)', + # 'cast(ts as bigint)', + # # 'TO_ISO8601(color)', + # # 'TO_UNIXTIMESTAMP(ts)', + # 'now()', + # 'timediff(ts,now)', + # 'timezone()', + # 'TIMETRUNCATE(ts,1s)', + # 'TIMEZONE()', + # 'TODAY()', + # 'distinct(color)' + ] + ) ret.append(SqlQuery( # reg table - "select {} from {}.{}".format('*', self._dbName, rTbName))) + "select {} from {}.{}".format(commonExpr, self._dbName, rTbName))) ret.append(SqlQuery( # super table - "select {} from {}.{}".format('*', self._dbName, self.getName()))) + "select {} from {}.{}".format(commonExpr, self._dbName, self.getName()))) else: # Aggregate query aggExpr = Dice.choice([ 'count(*)', @@ -1857,17 +2050,38 @@ class TdSuperTable: 'top(speed, 50)', # TODO: not supported? 'bottom(speed, 50)', # TODO: not supported? 'apercentile(speed, 10)', # TODO: TD-1316 - # 'last_row(speed)', # TODO: commented out per TD-3231, we should re-create + 'last_row(*)', # TODO: commented out per TD-3231, we should re-create # Transformation Functions # 'diff(speed)', # TODO: no supported?! - 'spread(speed)' + 'spread(speed)', + # 'elapsed(ts)', + # 'mode(speed)', + # 'bottom(speed,1)', + # 'top(speed,1)', + # 'tail(speed,1)', + # 'unique(color)', + # 'csum(speed)', + # 'DERIVATIVE(speed,1s,1)', + # 'diff(speed,1)', + # 'irate(speed)', + # 'mavg(speed,3)', + # 'sample(speed,5)', + # 'STATECOUNT(speed,"LT",1)', + # 'STATEDURATION(speed,"LT",1)', + # 'twa(speed)' + + + + + ]) # TODO: add more from 'top' # if aggExpr not in ['stddev(speed)']: # STDDEV not valid for super tables?! (Done in TD-1049) sql = "select {} from {}.{}".format(aggExpr, self._dbName, self.getName()) if Dice.throw(3) == 0: # 1 in X chance - sql = sql + ' GROUP BY color' + partion_expr = Dice.choice(['color','tbname']) + sql = sql + ' partition BY ' + partion_expr + ' order by ' + partion_expr Progress.emit(Progress.QUERY_GROUP_BY) # Logging.info("Executing GROUP-BY query: " + sql) ret.append(SqlQuery(sql)) @@ -1964,16 +2178,17 @@ class TaskDropSuperTable(StateTransitionTask): isSuccess = True for i in tblSeq: regTableName = self.getRegTableName(i) # "db.reg_table_{}".format(i) + streams_prix = self._db.getName() try: self.execWtSql(wt, "drop table {}.{}". format(self._db.getName(), regTableName)) # nRows always 0, like MySQL except taos.error.ProgrammingError as err: - # correcting for strange error number scheme - errno2 = Helper.convertErrno(err.errno) - if (errno2 in [0x362]): # mnode invalid table name - isSuccess = False - Logging.debug("[DB] Acceptable error when dropping a table") - continue # try to delete next regular table + pass + + + + + if (not tickOutput): tickOutput = True # Print only one time @@ -1984,7 +2199,17 @@ class TaskDropSuperTable(StateTransitionTask): # Drop the super table itself tblName = self._db.getFixedSuperTableName() - self.execWtSql(wt, "drop table {}.{}".format(self._db.getName(), tblName)) + try: + self.execWtSql(wt, "drop table {}.{}".format(self._db.getName(), tblName)) + except taos.error.ProgrammingError as err: + # correcting for strange error number scheme + errno2 = Helper.convertErrno(err.errno) + if (errno2 in [0x362]): # mnode invalid table name + isSuccess = False + Logging.debug("[DB] Acceptable error when dropping a table") + elif errno2 in [1011,0x3F3,0x03f3]: # table doesn't exist + + pass class TaskAlterTags(StateTransitionTask): @@ -2234,6 +2459,212 @@ class TaskAddData(StateTransitionTask): self.activeTable.discard(i) # not raising an error, unlike remove +class TaskDeleteData(StateTransitionTask): + # Track which table is being actively worked on + activeTable: Set[int] = set() + + # We use these two files to record operations to DB, useful for power-off tests + fAddLogReady = None # type: Optional[io.TextIOWrapper] + fAddLogDone = None # type: Optional[io.TextIOWrapper] + + @classmethod + def prepToRecordOps(cls): + if Config.getConfig().record_ops: + if (cls.fAddLogReady is None): + Logging.info( + "Recording in a file operations to be performed...") + cls.fAddLogReady = open("add_log_ready.txt", "w") + if (cls.fAddLogDone is None): + Logging.info("Recording in a file operations completed...") + cls.fAddLogDone = open("add_log_done.txt", "w") + + @classmethod + def getEndState(cls): + return StateHasData() + + @classmethod + def canBeginFrom(cls, state: AnyState): + return state.canDeleteData() + + def _lockTableIfNeeded(self, fullTableName, extraMsg = ''): + if Config.getConfig().verify_data: + # Logging.info("Locking table: {}".format(fullTableName)) + self.lockTable(fullTableName) + # Logging.info("Table locked {}: {}".format(extraMsg, fullTableName)) + # print("_w" + str(nextInt % 100), end="", flush=True) # Trace what was written + else: + # Logging.info("Skipping locking table") + pass + + def _unlockTableIfNeeded(self, fullTableName): + if Config.getConfig().verify_data: + # Logging.info("Unlocking table: {}".format(fullTableName)) + self.unlockTable(fullTableName) + # Logging.info("Table unlocked: {}".format(fullTableName)) + else: + pass + # Logging.info("Skipping unlocking table") + + def _deleteData(self, db: Database, dbc, regTableName, te: TaskExecutor): # implied: NOT in batches + numRecords = self.LARGE_NUMBER_OF_RECORDS if Config.getConfig().larger_data else self.SMALL_NUMBER_OF_RECORDS + del_Records = int(numRecords/5) + if Dice.throw(2) == 0: + for j in range(del_Records): # number of records per table + intToWrite = db.getNextInt() + nextTick = db.getNextTick() + # nextColor = db.getNextColor() + if Config.getConfig().record_ops: + self.prepToRecordOps() + if self.fAddLogReady is None: + raise CrashGenError("Unexpected empty fAddLogReady") + self.fAddLogReady.write("Ready to delete {} to {}\n".format(intToWrite, regTableName)) + self.fAddLogReady.flush() + os.fsync(self.fAddLogReady.fileno()) + + # TODO: too ugly trying to lock the table reliably, refactor... + fullTableName = db.getName() + '.' + regTableName + self._lockTableIfNeeded(fullTableName) # so that we are verify read-back. TODO: deal with exceptions before unlock + + try: + sql = "delete from {} where ts = '{}' ;".format( # removed: tags ('{}', {}) + fullTableName, + # ds.getFixedSuperTableName(), + # ds.getNextBinary(), ds.getNextFloat(), + nextTick) + + # print(sql) + # Logging.info("Adding data: {}".format(sql)) + dbc.execute(sql) + # Logging.info("Data added: {}".format(sql)) + intWrote = intToWrite + + # Quick hack, attach an update statement here. TODO: create an "update" task + if (not Config.getConfig().use_shadow_db) and Dice.throw(5) == 0: # 1 in N chance, plus not using shaddow DB + intToUpdate = db.getNextInt() # Updated, but should not succeed + # nextColor = db.getNextColor() + sql = "delete from {} where ts = '{}' ;".format( # "INSERt" means "update" here + fullTableName, + nextTick) + # sql = "UPDATE {} set speed={}, color='{}' WHERE ts='{}'".format( + # fullTableName, db.getNextInt(), db.getNextColor(), nextTick) + dbc.execute(sql) + intWrote = intToUpdate # We updated, seems TDengine non-cluster accepts this. + + except: # Any exception at all + self._unlockTableIfNeeded(fullTableName) + raise + + # Now read it back and verify, we might encounter an error if table is dropped + if Config.getConfig().verify_data: # only if command line asks for it + try: + readBack = dbc.queryScalar("SELECT * from {}.{} WHERE ts='{}'". + format(db.getName(), regTableName, nextTick)) + if readBack == None : + pass + except taos.error.ProgrammingError as err: + errno = Helper.convertErrno(err.errno) + if errno == CrashGenError.INVALID_EMPTY_RESULT: # empty result + print("D1",end="") # D1 means delete data success and only 1 record + + elif errno in [0x218, 0x362]: # table doesn't exist + # do nothing + pass + else: + # Re-throw otherwise + raise + finally: + self._unlockTableIfNeeded(fullTableName) # Quite ugly, refactor lock/unlock + # Done with read-back verification, unlock the table now + # Successfully wrote the data into the DB, let's record it somehow + te.recordDataMark(intWrote) + else: + + # delete all datas and verify datas ,expected table is empty + if Config.getConfig().record_ops: + self.prepToRecordOps() + if self.fAddLogReady is None: + raise CrashGenError("Unexpected empty fAddLogReady") + self.fAddLogReady.write("Ready to delete {} to {}\n".format(intToWrite, regTableName)) + self.fAddLogReady.flush() + os.fsync(self.fAddLogReady.fileno()) + + # TODO: too ugly trying to lock the table reliably, refactor... + fullTableName = db.getName() + '.' + regTableName + self._lockTableIfNeeded(fullTableName) # so that we are verify read-back. TODO: deal with exceptions before unlock + + try: + sql = "delete from {} ;".format( # removed: tags ('{}', {}) + fullTableName) + # Logging.info("Adding data: {}".format(sql)) + dbc.execute(sql) + # Logging.info("Data added: {}".format(sql)) + + # Quick hack, attach an update statement here. TODO: create an "update" task + if (not Config.getConfig().use_shadow_db) and Dice.throw(5) == 0: # 1 in N chance, plus not using shaddow DB + sql = "delete from {} ;".format( # "INSERt" means "update" here + fullTableName) + dbc.execute(sql) + + except: # Any exception at all + self._unlockTableIfNeeded(fullTableName) + raise + + # Now read it back and verify, we might encounter an error if table is dropped + if Config.getConfig().verify_data: # only if command line asks for it + try: + readBack = dbc.queryScalar("SELECT * from {}.{} ". + format(db.getName(), regTableName)) + if readBack == None : + pass + except taos.error.ProgrammingError as err: + errno = Helper.convertErrno(err.errno) + if errno == CrashGenError.INVALID_EMPTY_RESULT: # empty result + print("Da",end="") # Da means delete data success and for all datas + + elif errno in [0x218, 0x362]: # table doesn't exist + # do nothing + pass + else: + # Re-throw otherwise + raise + finally: + self._unlockTableIfNeeded(fullTableName) # Quite ugly, refactor lock/unlock + # Done with read-back verification, unlock the table now + + if Config.getConfig().record_ops: + if self.fAddLogDone is None: + raise CrashGenError("Unexpected empty fAddLogDone") + self.fAddLogDone.write("Wrote {} to {}\n".format(intWrote, regTableName)) + self.fAddLogDone.flush() + os.fsync(self.fAddLogDone.fileno()) + + def _executeInternal(self, te: TaskExecutor, wt: WorkerThread): + # ds = self._dbManager # Quite DANGEROUS here, may result in multi-thread client access + db = self._db + dbc = wt.getDbConn() + numTables = self.LARGE_NUMBER_OF_TABLES if Config.getConfig().larger_data else self.SMALL_NUMBER_OF_TABLES + numRecords = self.LARGE_NUMBER_OF_RECORDS if Config.getConfig().larger_data else self.SMALL_NUMBER_OF_RECORDS + tblSeq = list(range(numTables )) + random.shuffle(tblSeq) # now we have random sequence + for i in tblSeq: + if (i in self.activeTable): # wow already active + # print("x", end="", flush=True) # concurrent insertion + Progress.emit(Progress.CONCURRENT_INSERTION) + else: + self.activeTable.add(i) # marking it active + + dbName = db.getName() + sTable = db.getFixedSuperTable() + regTableName = self.getRegTableName(i) # "db.reg_table_{}".format(i) + fullTableName = dbName + '.' + regTableName + # self._lockTable(fullTableName) # "create table" below. Stop it if the table is "locked" + sTable.ensureRegTable(self, wt.getDbConn(), regTableName) # Ensure the table exists + # self._unlockTable(fullTableName) + + self._deleteData(db, dbc, regTableName, te) + + self.activeTable.discard(i) # not raising an error, unlike remove + class ThreadStacks: # stack info for all threads def __init__(self): @@ -2259,7 +2690,8 @@ class ThreadStacks: # stack info for all threads # Now print print("\n<----- Thread Info for LWP/ID: {} (most recent call last) <-----".format(shortTid)) lastSqlForThread = DbConn.fetchSqlForThread(shortTid) - print("Last SQL statement attempted from thread {} is: {}".format(shortTid, lastSqlForThread)) + time_cost = DbConn.get_time_cost() + print("Last SQL statement attempted from thread {} ({:.4f} sec ago) is: {}".format(shortTid, time_cost ,lastSqlForThread)) stackFrame = 0 for frame in stack: # was using: reversed(stack) # print(frame) From 8a5427c95bd27eb55397d8a17ff76892a1b47a5e Mon Sep 17 00:00:00 2001 From: "wenzhouwww@live.cn" Date: Tue, 8 Nov 2022 09:41:42 +0800 Subject: [PATCH 03/20] update --- tests/pytest/crash_gen/crash_gen_main.py | 39 ++++++++++------ tests/pytest/crash_gen/shared/db.py | 57 +++++++++++++++++++++--- 2 files changed, 77 insertions(+), 19 deletions(-) diff --git a/tests/pytest/crash_gen/crash_gen_main.py b/tests/pytest/crash_gen/crash_gen_main.py index d8946780a2..1ef89dec4e 100755 --- a/tests/pytest/crash_gen/crash_gen_main.py +++ b/tests/pytest/crash_gen/crash_gen_main.py @@ -1761,7 +1761,10 @@ class TaskCreateSuperTable(StateTransitionTask): sTable = self._db.getFixedSuperTable() # type: TdSuperTable # wt.execSql("use db") # should always be in place - + + if sTable.hasStreams(wt.getDbConn()) or sTable.hasStreamTables(wt.getDbConn()): + sTable.dropStreams(wt.getDbConn()) + sTable.dropStreamTables(wt.getDbConn()) sTable.create(wt.getDbConn(), {'ts': TdDataType.TIMESTAMP, 'speed': TdDataType.INT, 'color': TdDataType.BINARY16}, { 'b': TdDataType.BINARY200, 'f': TdDataType.FLOAT}, @@ -2557,16 +2560,20 @@ class TaskDeleteData(StateTransitionTask): # Now read it back and verify, we might encounter an error if table is dropped if Config.getConfig().verify_data: # only if command line asks for it try: - readBack = dbc.queryScalar("SELECT * from {}.{} WHERE ts='{}'". + dbc.query("SELECT * from {}.{} WHERE ts='{}'". format(db.getName(), regTableName, nextTick)) - if readBack == None : - pass + result = dbc.getQueryResult() + if len(result)==0: + # means data has been delete + print("D1",end="") # DF means delete failed + else: + print("DF",end="") # DF means delete failed except taos.error.ProgrammingError as err: errno = Helper.convertErrno(err.errno) - if errno == CrashGenError.INVALID_EMPTY_RESULT: # empty result - print("D1",end="") # D1 means delete data success and only 1 record + # if errno == CrashGenError.INVALID_EMPTY_RESULT: # empty result + # print("D1",end="") # D1 means delete data success and only 1 record - elif errno in [0x218, 0x362]: # table doesn't exist + if errno in [0x218, 0x362,0x2662]: # table doesn't exist # do nothing pass else: @@ -2612,16 +2619,20 @@ class TaskDeleteData(StateTransitionTask): # Now read it back and verify, we might encounter an error if table is dropped if Config.getConfig().verify_data: # only if command line asks for it try: - readBack = dbc.queryScalar("SELECT * from {}.{} ". - format(db.getName(), regTableName)) - if readBack == None : - pass + dbc.query("SELECT * from {}.{} WHERE ts='{}'". + format(db.getName(), regTableName, nextTick)) + result = dbc.getQueryResult() + if len(result)==0: + # means data has been delete + print("DA",end="") + else: + print("DF",end="") # DF means delete failed except taos.error.ProgrammingError as err: errno = Helper.convertErrno(err.errno) - if errno == CrashGenError.INVALID_EMPTY_RESULT: # empty result - print("Da",end="") # Da means delete data success and for all datas + # if errno == CrashGenError.INVALID_EMPTY_RESULT: # empty result + # print("Da",end="") # Da means delete data success and for all datas - elif errno in [0x218, 0x362]: # table doesn't exist + if errno in [0x218, 0x362,0x2662]: # table doesn't exist # do nothing pass else: diff --git a/tests/pytest/crash_gen/shared/db.py b/tests/pytest/crash_gen/shared/db.py index 60c830f4f7..f97e4025e5 100644 --- a/tests/pytest/crash_gen/shared/db.py +++ b/tests/pytest/crash_gen/shared/db.py @@ -26,9 +26,12 @@ class DbConn: TYPE_NATIVE = "native-c" TYPE_REST = "rest-api" TYPE_INVALID = "invalid" + + # class variables lastSqlFromThreads : dict[int, str] = {} # stored by thread id, obtained from threading.current_thread().ident%10000 + spendThreads : dict[int, float] = {} # stored by thread id, obtained from threading.current_thread().ident%10000 @classmethod def saveSqlForCurrentThread(cls, sql: str): @@ -37,15 +40,36 @@ class DbConn: run into a dead-lock situation, we can pick out the deadlocked thread, and use that information to find what what SQL statement is stuck. ''' + th = threading.current_thread() shortTid = th.native_id % 10000 #type: ignore cls.lastSqlFromThreads[shortTid] = sql # Save this for later @classmethod - def fetchSqlForThread(cls, shortTid : int) -> str : + def fetchSqlForThread(cls, shortTid : int) -> str : + + print("=======================") if shortTid not in cls.lastSqlFromThreads: raise CrashGenError("No last-attempted-SQL found for thread id: {}".format(shortTid)) - return cls.lastSqlFromThreads[shortTid] + return cls.lastSqlFromThreads[shortTid] + + + @classmethod + def sql_exec_spend(cls, cost: float): + ''' + Let us save the last SQL statement on a per-thread basis, so that when later we + run into a dead-lock situation, we can pick out the deadlocked thread, and use + that information to find what what SQL statement is stuck. + ''' + th = threading.current_thread() + shortTid = th.native_id % 10000 #type: ignore + cls.spendThreads[shortTid] = cost # Save this for later + + @classmethod + def get_time_cost(cls) ->float: + th = threading.current_thread() + shortTid = th.native_id % 10000 #type: ignore + return cls.spendThreads.get(shortTid) @classmethod def create(cls, connType, dbTarget): @@ -61,6 +85,7 @@ class DbConn: def createNative(cls, dbTarget) -> DbConn: return cls.create(cls.TYPE_NATIVE, dbTarget) + @classmethod def createRest(cls, dbTarget) -> DbConn: return cls.create(cls.TYPE_REST, dbTarget) @@ -75,6 +100,7 @@ class DbConn: return "[DbConn: type={}, target={}]".format(self._type, self._dbTarget) def getLastSql(self): + return self._lastSql def open(self): @@ -184,13 +210,19 @@ class DbConnRest(DbConn): def _doSql(self, sql): self._lastSql = sql # remember this, last SQL attempted self.saveSqlForCurrentThread(sql) # Save in global structure too. #TODO: combine with above - try: + time_cost = -1 + time_start = time.time() + try: r = requests.post(self._url, data = sql, - auth = HTTPBasicAuth('root', 'taosdata')) + auth = HTTPBasicAuth('root', 'taosdata')) except: print("REST API Failure (TODO: more info here)") + self.sql_exec_spend(-2) raise + finally: + time_cost = time.time()- time_start + self.sql_exec_spend(time_cost) rj = r.json() # Sanity check for the "Json Result" if ('status' not in rj): @@ -223,6 +255,8 @@ class DbConnRest(DbConn): "[SQL-REST] Execution Result, nRows = {}, SQL = {}".format(nRows, sql)) return nRows + + def query(self, sql): # return rows affected return self.execute(sql) @@ -336,6 +370,7 @@ class MyTDSql: raise return self.affectedRows + class DbTarget: def __init__(self, cfgPath, hostAddr, port): self.cfgPath = cfgPath @@ -355,6 +390,7 @@ class DbConnNative(DbConn): # _connInfoDisplayed = False # TODO: find another way to display this totalConnections = 0 # Not private totalRequests = 0 + time_cost = -1 def __init__(self, dbTarget): super().__init__(dbTarget) @@ -413,8 +449,19 @@ class DbConnNative(DbConn): "Cannot exec SQL unless db connection is open", CrashGenError.DB_CONNECTION_NOT_OPEN) Logging.debug("[SQL] Executing SQL: {}".format(sql)) self._lastSql = sql + time_cost = -1 + nRows = 0 + time_start = time.time() self.saveSqlForCurrentThread(sql) # Save in global structure too. #TODO: combine with above - nRows = self._tdSql.execute(sql) + try: + nRows= self._tdSql.execute(sql) + except Exception as e: + self.sql_exec_spend(-2) + finally: + time_cost = time.time() - time_start + self.sql_exec_spend(time_cost) + + cls = self.__class__ cls.totalRequests += 1 Logging.debug( From 7aac1066507d69f55ed3317cfcd6228d2673ea3f Mon Sep 17 00:00:00 2001 From: "wenzhouwww@live.cn" Date: Tue, 8 Nov 2022 10:54:57 +0800 Subject: [PATCH 04/20] update --- tests/pytest/crash_gen/crash_gen_main.py | 149 +++++++++++++---------- 1 file changed, 87 insertions(+), 62 deletions(-) diff --git a/tests/pytest/crash_gen/crash_gen_main.py b/tests/pytest/crash_gen/crash_gen_main.py index 1ef89dec4e..8d2fb65ca8 100755 --- a/tests/pytest/crash_gen/crash_gen_main.py +++ b/tests/pytest/crash_gen/crash_gen_main.py @@ -1712,30 +1712,66 @@ class TaskCreateStream(StateTransitionTask): ''' stbname =sTable.getName() sub_tables = sTable.getRegTables(wt.getDbConn()) + aggExpr = Dice.choice([ + 'count(*)', + 'avg(speed)', + # 'twa(speed)', # TODO: this one REQUIRES a where statement, not reasonable + 'sum(speed)', + 'stddev(speed)', + # SELECTOR functions + 'min(speed)', + 'max(speed)', + 'first(speed)', + 'last(speed)', + 'top(speed, 50)', # TODO: not supported? + 'bottom(speed, 50)', # TODO: not supported? + 'apercentile(speed, 10)', # TODO: TD-1316 + 'last_row(*)', # TODO: commented out per TD-3231, we should re-create + # Transformation Functions + # 'diff(speed)', # TODO: no supported?! + 'spread(speed)', + 'elapsed(ts)', + 'mode(speed)', + 'bottom(speed,1)', + 'top(speed,1)', + 'tail(speed,1)', + 'unique(color)', + 'csum(speed)', + 'DERIVATIVE(speed,1s,1)', + 'diff(speed,1)', + 'irate(speed)', + 'mavg(speed,3)', + 'sample(speed,5)', + 'STATECOUNT(speed,"LT",1)', + 'STATEDURATION(speed,"LT",1)', + 'twa(speed)' + + ]) # TODO: add more from 'top' + if sub_tables: if sub_tables: # if not empty sub_tbname = sub_tables[0] - # create stream with query above sub_table - stream_sql = 'create stream {} into {}.{} as select count(*), avg(speed) FROM {}.{} PARTITION BY tbname INTERVAL(5s) SLIDING(3s) '.format(sub_stream_name,dbname,sub_stream_tb_name ,dbname,sub_tbname) + stream_sql = 'create stream {} into {}.{} as select {}, avg(speed) FROM {}.{} PARTITION BY tbname INTERVAL(5s) SLIDING(3s) '.format(sub_stream_name,dbname,sub_stream_tb_name ,aggExpr,dbname,sub_tbname) try: self.execWtSql(wt, stream_sql) + Logging.debug("[OPS] stream is creating at {}".format(time.time())) except taos.error.ProgrammingError as err: errno = Helper.convertErrno(err.errno) if errno in [0x03f0]: # stream already exists # stream need drop before drop table pass - sTable.setStreamName(sub_stream_name) else: pass else: - stream_sql = 'create stream {} into {}.{} as select count(*), avg(speed) FROM {}.{} PARTITION BY tbname INTERVAL(5s) SLIDING(3s) '.format(super_stream_name,dbname,super_stream_tb_name,dbname,stbname) + stream_sql = 'create stream {} into {}.{} as select {}, avg(speed) FROM {}.{} PARTITION BY tbname INTERVAL(5s) SLIDING(3s) '.format(super_stream_name,dbname,super_stream_tb_name,aggExpr, dbname,stbname) try: self.execWtSql(wt, stream_sql) + Logging.debug("[OPS] stream is creating at {}".format(time.time())) except taos.error.ProgrammingError as err: errno = Helper.convertErrno(err.errno) if errno in [0x03f0]: # stream already exists @@ -1779,16 +1815,10 @@ class TdSuperTable: def __init__(self, stName, dbName): self._stName = stName self._dbName = dbName - self._streamName = [] def getName(self): return self._stName - def setStreamName(self,name): - self._streamName.append(name) - - def getStreamName(self): - return self._streamName def drop(self, dbc, skipCheck = False): dbName = self._dbName @@ -2000,38 +2030,38 @@ class TdSuperTable: if not doAggr: # don't do aggregate query, just simple one commonExpr = Dice.choice([ '*', - # 'abs(speed)', - # 'acos(speed)', - # 'asin(speed)', - # 'atan(speed)', - # 'ceil(speed)', - # 'cos(speed)', - # 'cos(speed)', - # 'floor(speed)', - # 'log(speed,2)', - # 'pow(speed,2)', - # 'round(speed)', - # 'sin(speed)', - # 'sqrt(speed)', - # 'char_length(color)', - # 'concat(color,color)', - # 'concat_ws(" ", color,color," ")', - # 'length(color)', - # 'lower(color)', - # 'ltrim(color)', - # 'substr(color , 2)', - # 'upper(color)', - # 'cast(speed as double)', - # 'cast(ts as bigint)', - # # 'TO_ISO8601(color)', - # # 'TO_UNIXTIMESTAMP(ts)', - # 'now()', - # 'timediff(ts,now)', - # 'timezone()', - # 'TIMETRUNCATE(ts,1s)', - # 'TIMEZONE()', - # 'TODAY()', - # 'distinct(color)' + 'abs(speed)', + 'acos(speed)', + 'asin(speed)', + 'atan(speed)', + 'ceil(speed)', + 'cos(speed)', + 'cos(speed)', + 'floor(speed)', + 'log(speed,2)', + 'pow(speed,2)', + 'round(speed)', + 'sin(speed)', + 'sqrt(speed)', + 'char_length(color)', + 'concat(color,color)', + 'concat_ws(" ", color,color," ")', + 'length(color)', + 'lower(color)', + 'ltrim(color)', + 'substr(color , 2)', + 'upper(color)', + 'cast(speed as double)', + 'cast(ts as bigint)', + # 'TO_ISO8601(color)', + # 'TO_UNIXTIMESTAMP(ts)', + 'now()', + 'timediff(ts,now)', + 'timezone()', + 'TIMETRUNCATE(ts,1s)', + 'TIMEZONE()', + 'TODAY()', + 'distinct(color)' ] ) ret.append(SqlQuery( # reg table @@ -2057,21 +2087,21 @@ class TdSuperTable: # Transformation Functions # 'diff(speed)', # TODO: no supported?! 'spread(speed)', - # 'elapsed(ts)', - # 'mode(speed)', - # 'bottom(speed,1)', - # 'top(speed,1)', - # 'tail(speed,1)', - # 'unique(color)', - # 'csum(speed)', - # 'DERIVATIVE(speed,1s,1)', - # 'diff(speed,1)', - # 'irate(speed)', - # 'mavg(speed,3)', - # 'sample(speed,5)', - # 'STATECOUNT(speed,"LT",1)', - # 'STATEDURATION(speed,"LT",1)', - # 'twa(speed)' + 'elapsed(ts)', + 'mode(speed)', + 'bottom(speed,1)', + 'top(speed,1)', + 'tail(speed,1)', + 'unique(color)', + 'csum(speed)', + 'DERIVATIVE(speed,1s,1)', + 'diff(speed,1)', + 'irate(speed)', + 'mavg(speed,3)', + 'sample(speed,5)', + 'STATECOUNT(speed,"LT",1)', + 'STATEDURATION(speed,"LT",1)', + 'twa(speed)' @@ -2181,7 +2211,6 @@ class TaskDropSuperTable(StateTransitionTask): isSuccess = True for i in tblSeq: regTableName = self.getRegTableName(i) # "db.reg_table_{}".format(i) - streams_prix = self._db.getName() try: self.execWtSql(wt, "drop table {}.{}". format(self._db.getName(), regTableName)) # nRows always 0, like MySQL @@ -2189,10 +2218,6 @@ class TaskDropSuperTable(StateTransitionTask): pass - - - - if (not tickOutput): tickOutput = True # Print only one time if isSuccess: From 937631085f2956529e501dcf09eb83d47f984378 Mon Sep 17 00:00:00 2001 From: "wenzhouwww@live.cn" Date: Tue, 8 Nov 2022 11:44:42 +0800 Subject: [PATCH 05/20] update --- tests/pytest/crash_gen/crash_gen_main.py | 20 +------------------- 1 file changed, 1 insertion(+), 19 deletions(-) diff --git a/tests/pytest/crash_gen/crash_gen_main.py b/tests/pytest/crash_gen/crash_gen_main.py index 8d2fb65ca8..1701c293a6 100755 --- a/tests/pytest/crash_gen/crash_gen_main.py +++ b/tests/pytest/crash_gen/crash_gen_main.py @@ -1712,7 +1712,7 @@ class TaskCreateStream(StateTransitionTask): ''' stbname =sTable.getName() sub_tables = sTable.getRegTables(wt.getDbConn()) - aggExpr = Dice.choice([ + aggExpr = Dice.choice([ CREATE STREAM avg_vol_s1 INTO avg_vol1 AS SELECT csum(current), avg(voltage) FROM meters PARTITION BY tbname INTERVAL(1m) SLIDING(30s); 'count(*)', 'avg(speed)', # 'twa(speed)', # TODO: this one REQUIRES a where statement, not reasonable @@ -1723,27 +1723,9 @@ class TaskCreateStream(StateTransitionTask): 'max(speed)', 'first(speed)', 'last(speed)', - 'top(speed, 50)', # TODO: not supported? - 'bottom(speed, 50)', # TODO: not supported? 'apercentile(speed, 10)', # TODO: TD-1316 'last_row(*)', # TODO: commented out per TD-3231, we should re-create # Transformation Functions - # 'diff(speed)', # TODO: no supported?! - 'spread(speed)', - 'elapsed(ts)', - 'mode(speed)', - 'bottom(speed,1)', - 'top(speed,1)', - 'tail(speed,1)', - 'unique(color)', - 'csum(speed)', - 'DERIVATIVE(speed,1s,1)', - 'diff(speed,1)', - 'irate(speed)', - 'mavg(speed,3)', - 'sample(speed,5)', - 'STATECOUNT(speed,"LT",1)', - 'STATEDURATION(speed,"LT",1)', 'twa(speed)' ]) # TODO: add more from 'top' From 908f0d719f191feecf55a303e09c584689072a55 Mon Sep 17 00:00:00 2001 From: "wenzhouwww@live.cn" Date: Tue, 8 Nov 2022 11:47:00 +0800 Subject: [PATCH 06/20] update --- tests/pytest/crash_gen/crash_gen_main.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/pytest/crash_gen/crash_gen_main.py b/tests/pytest/crash_gen/crash_gen_main.py index 1701c293a6..6c23158535 100755 --- a/tests/pytest/crash_gen/crash_gen_main.py +++ b/tests/pytest/crash_gen/crash_gen_main.py @@ -1696,9 +1696,9 @@ class TaskCreateStream(StateTransitionTask): dbname = self._db.getName() sub_stream_name = dbname+ '_sub_stream' - sub_stream_tb_name = 'avg_sub' + sub_stream_tb_name = 'stream_tb_sub' super_stream_name = dbname+ '_super_stream' - super_stream_tb_name = 'avg_super' + super_stream_tb_name = 'stream_tb_super' if not self._db.exists(wt.getDbConn()): Logging.debug("Skipping task, no DB yet") return @@ -1892,7 +1892,7 @@ class TdSuperTable: def hasStreamTables(self,dbc: DbConn): - return dbc.query("show {}.stables like 'avg%'".format(self._dbName)) > 0 + return dbc.query("show {}.stables like 'stream_tb%'".format(self._dbName)) > 0 def hasStreams(self,dbc: DbConn): return dbc.query("show streams") > 0 @@ -1907,7 +1907,7 @@ class TdSuperTable: return not dbc.query("show streams ") > 0 def dropStreamTables(self, dbc: DbConn): - dbc.query("show {}.stables like 'avg%'".format(self._dbName)) + dbc.query("show {}.stables like 'stream_tb%'".format(self._dbName)) StreamTables = dbc.getQueryResult() @@ -1915,7 +1915,7 @@ class TdSuperTable: if self.dropStreams: dbc.execute('drop table {}.{}'.format(self._dbName,StreamTable[0])) - return not dbc.query("show {}.stables like 'avg%'".format(self._dbName)) + return not dbc.query("show {}.stables like 'stream_tb%'".format(self._dbName)) def ensureRegTable(self, task: Optional[Task], dbc: DbConn, regTableName: str): ''' From ca00459abb72a56a2b833ed225daf06532145510 Mon Sep 17 00:00:00 2001 From: "wenzhouwww@live.cn" Date: Tue, 8 Nov 2022 11:56:18 +0800 Subject: [PATCH 07/20] update --- tests/pytest/crash_gen/crash_gen_main.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/pytest/crash_gen/crash_gen_main.py b/tests/pytest/crash_gen/crash_gen_main.py index 6c23158535..933d620824 100755 --- a/tests/pytest/crash_gen/crash_gen_main.py +++ b/tests/pytest/crash_gen/crash_gen_main.py @@ -1712,7 +1712,7 @@ class TaskCreateStream(StateTransitionTask): ''' stbname =sTable.getName() sub_tables = sTable.getRegTables(wt.getDbConn()) - aggExpr = Dice.choice([ CREATE STREAM avg_vol_s1 INTO avg_vol1 AS SELECT csum(current), avg(voltage) FROM meters PARTITION BY tbname INTERVAL(1m) SLIDING(30s); + aggExpr = Dice.choice([ 'count(*)', 'avg(speed)', # 'twa(speed)', # TODO: this one REQUIRES a where statement, not reasonable @@ -1735,7 +1735,7 @@ class TaskCreateStream(StateTransitionTask): if sub_tables: # if not empty sub_tbname = sub_tables[0] # create stream with query above sub_table - stream_sql = 'create stream {} into {}.{} as select {}, avg(speed) FROM {}.{} PARTITION BY tbname INTERVAL(5s) SLIDING(3s) '.format(sub_stream_name,dbname,sub_stream_tb_name ,aggExpr,dbname,sub_tbname) + stream_sql = 'create stream {} into {}.{} as select {}, avg(speed) FROM {}.{} where ts now -1h PARTITION BY tbname INTERVAL(5s) SLIDING(3s) FILL (prev) '.format(sub_stream_name,dbname,sub_stream_tb_name ,aggExpr,dbname,sub_tbname) try: self.execWtSql(wt, stream_sql) Logging.debug("[OPS] stream is creating at {}".format(time.time())) @@ -1749,7 +1749,7 @@ class TaskCreateStream(StateTransitionTask): pass else: - stream_sql = 'create stream {} into {}.{} as select {}, avg(speed) FROM {}.{} PARTITION BY tbname INTERVAL(5s) SLIDING(3s) '.format(super_stream_name,dbname,super_stream_tb_name,aggExpr, dbname,stbname) + stream_sql = 'create stream {} into {}.{} as select {}, avg(speed) FROM {}.{} where ts now -1h PARTITION BY tbname INTERVAL(5s) SLIDING(3s) FILL (prev) '.format(super_stream_name,dbname,super_stream_tb_name,aggExpr, dbname,stbname) try: self.execWtSql(wt, stream_sql) From 59f50ae8c5ad38fc8e36eba5128ebdcda4f13f59 Mon Sep 17 00:00:00 2001 From: "wenzhouwww@live.cn" Date: Wed, 9 Nov 2022 18:08:02 +0800 Subject: [PATCH 08/20] update --- tests/pytest/crash_gen/crash_gen_main.py | 298 +++++++++++++++++++++-- 1 file changed, 271 insertions(+), 27 deletions(-) diff --git a/tests/pytest/crash_gen/crash_gen_main.py b/tests/pytest/crash_gen/crash_gen_main.py index 933d620824..6c52b459ca 100755 --- a/tests/pytest/crash_gen/crash_gen_main.py +++ b/tests/pytest/crash_gen/crash_gen_main.py @@ -37,6 +37,7 @@ import requests # from guppy import hpy import gc import taos +from taos.tmq import * from .shared.types import TdColumns, TdTags @@ -675,9 +676,10 @@ class AnyState: CAN_DROP_DB = 2 CAN_CREATE_FIXED_SUPER_TABLE = 3 CAN_CREATE_STREAM = 3 # super table must exists + CAN_CREATE_TOPIC = 3 # super table must exists + CAN_CREATE_CONSUMERS = 3 CAN_DROP_FIXED_SUPER_TABLE = 4 CAN_ADD_DATA = 5 - CAN_DROP_STREAM = 5 CAN_READ_DATA = 6 CAN_DELETE_DATA = 6 @@ -730,6 +732,12 @@ class AnyState: return False return self._info[self.CAN_DROP_FIXED_SUPER_TABLE] + def canCreateTopic(self): + return self._info[self.CAN_CREATE_TOPIC] + + def canCreateConsumers(self): + return self._info[self.CAN_CREATE_CONSUMERS] + def canCreateStream(self): return self._info[self.CAN_CREATE_STREAM] @@ -1679,9 +1687,22 @@ class TaskDropDb(StateTransitionTask): return state.canDropDb() def _executeInternal(self, te: TaskExecutor, wt: WorkerThread): + + # drop topics before drop db + + if self._db.getFixedSuperTable().hasTopics(wt.getDbConn()): + + self._db.getFixedSuperTable().dropTopics(wt.getDbConn(),self._db.getName(),None) + self._db.getFixedSuperTable().dropTopics(wt.getDbConn(),self._db.getName(),self._db.getFixedSuperTableName) + self.execWtSql(wt, "drop database {}".format(self._db.getName())) + Logging.debug("[OPS] database dropped at {}".format(time.time())) + +''' +# Streams will generator TD-20237 (it will crash taosd , start this task when this issue fixed ) + class TaskCreateStream(StateTransitionTask): @classmethod @@ -1707,9 +1728,9 @@ class TaskCreateStream(StateTransitionTask): # wt.execSql("use db") # should always be in place # create stream - ''' - CREATE STREAM avg_vol_s INTO avg_vol AS SELECT _wstartts, count(*), avg(voltage) FROM meters PARTITION BY tbname INTERVAL(1m) SLIDING(30s); - ''' + + # CREATE STREAM avg_vol_s INTO avg_vol AS SELECT _wstartts, count(*), avg(voltage) FROM meters PARTITION BY tbname INTERVAL(1m) SLIDING(30s); + stbname =sTable.getName() sub_tables = sTable.getRegTables(wt.getDbConn()) aggExpr = Dice.choice([ @@ -1735,7 +1756,7 @@ class TaskCreateStream(StateTransitionTask): if sub_tables: # if not empty sub_tbname = sub_tables[0] # create stream with query above sub_table - stream_sql = 'create stream {} into {}.{} as select {}, avg(speed) FROM {}.{} where ts now -1h PARTITION BY tbname INTERVAL(5s) SLIDING(3s) FILL (prev) '.format(sub_stream_name,dbname,sub_stream_tb_name ,aggExpr,dbname,sub_tbname) + stream_sql = 'create stream {} into {}.{} as select {}, avg(speed) FROM {}.{} PARTITION BY tbname INTERVAL(5s) SLIDING(3s) '.format(sub_stream_name,dbname,sub_stream_tb_name ,aggExpr,dbname,sub_tbname) try: self.execWtSql(wt, stream_sql) Logging.debug("[OPS] stream is creating at {}".format(time.time())) @@ -1749,7 +1770,7 @@ class TaskCreateStream(StateTransitionTask): pass else: - stream_sql = 'create stream {} into {}.{} as select {}, avg(speed) FROM {}.{} where ts now -1h PARTITION BY tbname INTERVAL(5s) SLIDING(3s) FILL (prev) '.format(super_stream_name,dbname,super_stream_tb_name,aggExpr, dbname,stbname) + stream_sql = 'create stream {} into {}.{} as select {}, avg(speed) FROM {}.{} PARTITION BY tbname INTERVAL(5s) SLIDING(3s) '.format(super_stream_name,dbname,super_stream_tb_name,aggExpr, dbname,stbname) try: self.execWtSql(wt, stream_sql) @@ -1759,8 +1780,142 @@ class TaskCreateStream(StateTransitionTask): if errno in [0x03f0]: # stream already exists # stream need drop before drop table pass +''' - +class TaskCreateTopic(StateTransitionTask): + + @classmethod + def getEndState(cls): + return StateHasData() + + @classmethod + def canBeginFrom(cls, state: AnyState): + return state.canCreateTopic() + + def _executeInternal(self, te: TaskExecutor, wt: WorkerThread): + dbname = self._db.getName() + + sub_topic_name = dbname+ '_sub_topic' + super_topic_name = dbname+ '_super_topic' + stable_topic = dbname+ '_stable_topic' + db_topic = 'database_' + dbname+ '_topics' + if not self._db.exists(wt.getDbConn()): + Logging.debug("Skipping task, no DB yet") + return + + sTable = self._db.getFixedSuperTable() # type: TdSuperTable + # wt.execSql("use db") # should always be in place + + # create topic + + # create topic if not exists topic_ctb_column as select ts, c1, c2, c3 from stb1; + + stbname =sTable.getName() + sub_tables = sTable.getRegTables(wt.getDbConn()) + scalarExpr = Dice.choice([ '*','speed','color', + 'abs(speed)', + 'acos(speed)', + 'asin(speed)', + 'atan(speed)', + 'ceil(speed)', + 'cos(speed)', + 'cos(speed)', + 'floor(speed)', + 'log(speed,2)', + 'pow(speed,2)', + 'round(speed)', + 'sin(speed)', + 'sqrt(speed)', + 'char_length(color)', + 'concat(color,color)', + 'concat_ws(" ", color,color," ")', + 'length(color)', + 'lower(color)', + 'ltrim(color)', + 'substr(color , 2)', + 'upper(color)', + 'cast(speed as double)', + 'cast(ts as bigint)', + + ]) # TODO: add more from 'top' + if Dice.throw(3)==0: + + if sub_tables: + + if sub_tables: # if not empty + sub_tbname = sub_tables[0] + # create stream with query above sub_table + topic_sql = 'create topic {} as select {} FROM {}.{} ; '.format(sub_topic_name,scalarExpr,dbname,sub_tbname) + try: + self.execWtSql(wt, "use {}".format(dbname)) + self.execWtSql(wt, topic_sql) + Logging.debug("[OPS] topic is creating at {}".format(time.time())) + except taos.error.ProgrammingError as err: + errno = Helper.convertErrno(err.errno) + if errno in [0x03f0]: # topic already exists + # topic need drop before drop table + pass + + else: + pass + + else: + topic_sql = 'create topic {} as select {} FROM {}.{} '.format(super_topic_name,scalarExpr, dbname,stbname) + try: + self.execWtSql(wt, "use {}".format(dbname)) + self.execWtSql(wt, topic_sql) + Logging.debug("[OPS] subquery topic is creating at {}".format(time.time())) + except taos.error.ProgrammingError as err: + errno = Helper.convertErrno(err.errno) + if errno in [0x03f0]: # topic already exists + # topic need drop before drop table + pass + elif Dice.throw(3)==1: + topic_sql = 'create topic {} AS STABLE {}.{} '.format(stable_topic,dbname,stbname) + try: + self.execWtSql(wt, "use {}".format(dbname)) + self.execWtSql(wt, topic_sql) + Logging.debug("[OPS] stable topic is creating at {}".format(time.time())) + except taos.error.ProgrammingError as err: + errno = Helper.convertErrno(err.errno) + if errno in [0x03f0]: # topic already exists + # topic need drop before drop table + pass + elif Dice.throw(3)==2: + topic_sql = 'create topic {} AS DATABASE {} '.format(db_topic,dbname) + try: + self.execWtSql(wt, "use {}".format(dbname)) + self.execWtSql(wt, topic_sql) + Logging.debug("[OPS] db topic is creating at {}".format(time.time())) + except taos.error.ProgrammingError as err: + errno = Helper.convertErrno(err.errno) + if errno in [0x03f0]: # topic already exists + # topic need drop before drop table + pass + else: + pass + + +class TaskCreateConsumers(StateTransitionTask): + + @classmethod + def getEndState(cls): + return StateHasData() + + @classmethod + def canBeginFrom(cls, state: AnyState): + return state.canCreateConsumers() + + def _executeInternal(self, te: TaskExecutor, wt: WorkerThread): + dbname = self._db.getName() + + sTable = self._db.getFixedSuperTable() # type: TdSuperTable + # wt.execSql("use db") # should always be in place + + # create Consumers + if Dice.throw(50)==0: # because subscribe is cost so much time , Reduce frequency of this task + if sTable.hasTopics(wt.getDbConn()): + sTable.createConsumer(wt.getDbConn(),random.randint(1,10)) class TaskCreateSuperTable(StateTransitionTask): @@ -1780,9 +1935,6 @@ class TaskCreateSuperTable(StateTransitionTask): sTable = self._db.getFixedSuperTable() # type: TdSuperTable # wt.execSql("use db") # should always be in place - if sTable.hasStreams(wt.getDbConn()) or sTable.hasStreamTables(wt.getDbConn()): - sTable.dropStreams(wt.getDbConn()) - sTable.dropStreamTables(wt.getDbConn()) sTable.create(wt.getDbConn(), {'ts': TdDataType.TIMESTAMP, 'speed': TdDataType.INT, 'color': TdDataType.BINARY16}, { 'b': TdDataType.BINARY200, 'f': TdDataType.FLOAT}, @@ -1797,6 +1949,8 @@ class TdSuperTable: def __init__(self, stName, dbName): self._stName = stName self._dbName = dbName + self._consumerLists = {} + self._ConsumerInsts = [] def getName(self): return self._stName @@ -1806,6 +1960,12 @@ class TdSuperTable: dbName = self._dbName if self.exists(dbc) : # if myself exists fullTableName = dbName + '.' + self._stName + if self.hasStreams(dbc): + self.dropStreams(dbc) + self.dropStreamTables(dbc) + if self.hasTopics(dbc): + self.dropTopics(dbName,None) + self.dropTopics(dbName,self._stName) try: dbc.execute("DROP TABLE {}".format(fullTableName)) except taos.error.ProgrammingError as err: @@ -1843,12 +2003,19 @@ class TdSuperTable: if dbc.existsSuperTable(self._stName): if dropIfExists: + if self.hasStreams(dbc): + self.dropStreams(dbc) + self.dropStreamTables(dbc) + + # drop topics before drop stables + if self.hasTopics(dbc): + self.dropTopics(dbc,self._dbName,None) + self.dropTopics(dbc,self._dbName,self._stName ) + dbc.execute("DROP TABLE {}".format(fullTableName)) - pass - - + pass # dbc.execute("DROP TABLE {}".format(fullTableName)) else: # error raise CrashGenError("Cannot create super table, already exists: {}".format(self._stName)) @@ -1863,7 +2030,47 @@ class TdSuperTable: ) else: sql += " TAGS (dummy int) " - dbc.execute(sql) + dbc.execute(sql) + + def createConsumer(self, dbc: DbConn , Consumer_nums): + + def generateConsumer(current_topic_list): + conf = TaosTmqConf() + conf.set("group.id", "tg2") + conf.set("td.connect.user", "root") + conf.set("td.connect.pass", "taosdata") + conf.set("enable.auto.commit", "true") + def tmq_commit_cb_print(tmq, resp, offset, param=None): + print(f"commit: {resp}, tmq: {tmq}, offset: {offset}, param: {param}") + conf.set_auto_commit_cb(tmq_commit_cb_print, None) + consumer = conf.new_consumer() + topic_list = TaosTmqList() + for topic in current_topic_list: + topic_list.append(topic) + consumer.subscribe(topic_list) + time.sleep(5) # consumer work only 5 sec ,and then it will exit + try: + consumer.unsubscribe() + except Exception as e : + pass + return + + # mulit Consumer + current_topic_list = self.getTopicLists(dbc) + for i in range(Consumer_nums): + consumer_inst = threading.Thread(target=generateConsumer, args=(current_topic_list,)) + self._ConsumerInsts.append(consumer_inst) + + for ConsumerInst in self._ConsumerInsts: + ConsumerInst.start() + for ConsumerInst in self._ConsumerInsts: + ConsumerInst.join() + + def getTopicLists(self, dbc: DbConn): + dbc.query("show topics ") + topics = dbc.getQueryResult() + topicLists = [v[0] for v in topics] + return topicLists def getRegTables(self, dbc: DbConn): dbName = self._dbName @@ -1878,12 +2085,7 @@ class TdSuperTable: return [v[0] for v in qr] # list transformation, ref: https://stackoverflow.com/questions/643823/python-list-transformation def hasRegTables(self, dbc: DbConn): - # print(self._stName) - # dbc.query("SELECT * FROM {}.{}".format(self._dbName, self._stName)) - # print(dbc.getQueryResult()) - if self.hasStreamTables(dbc) or self.hasStreams(dbc): - if self.dropStreams(dbc): - self.dropStreamTables(dbc) + if dbc.existsSuperTable(self._stName): return dbc.query("SELECT * FROM {}.{}".format(self._dbName, self._stName)) > 0 @@ -1896,7 +2098,41 @@ class TdSuperTable: def hasStreams(self,dbc: DbConn): return dbc.query("show streams") > 0 - + + def hasTopics(self,dbc: DbConn): + + return dbc.query("show topics") > 0 + + def dropTopics(self,dbc: DbConn , dbname=None,stb_name=None): + dbc.query("show topics ") + topics = dbc.getQueryResult() + + if dbname !=None and stb_name == None : + + for topic in topics: + if dbname in topic[0] and topic[0].startswith("database"): + try: + dbc.execute('drop topic {}'.format(topic[0])) + except taos.error.ProgrammingError as err: + errno = Helper.convertErrno(err.errno) + if errno in [0x03EB]: # Topic subscribed cannot be dropped + pass + # for subsript in subscriptions: + + else: + pass + + pass + return True + elif dbname !=None and stb_name!= None: + for topic in topics: + if topic[0].startswith(self._dbName) and topic[0].endswith('topic'): + dbc.execute('drop topic {}'.format(topic[0])) + return True + else: + return True + pass + def dropStreams(self,dbc:DbConn): dbc.query("show streams ") Streams = dbc.getQueryResult() @@ -1912,7 +2148,7 @@ class TdSuperTable: StreamTables = dbc.getQueryResult() for StreamTable in StreamTables: - if self.dropStreams: + if self.dropStreams(dbc): dbc.execute('drop table {}.{}'.format(self._dbName,StreamTable[0])) return not dbc.query("show {}.stables like 'stream_tb%'".format(self._dbName)) @@ -2083,11 +2319,7 @@ class TdSuperTable: 'sample(speed,5)', 'STATECOUNT(speed,"LT",1)', 'STATEDURATION(speed,"LT",1)', - 'twa(speed)' - - - - + 'twa(speed)' ]) # TODO: add more from 'top' @@ -2194,6 +2426,7 @@ class TaskDropSuperTable(StateTransitionTask): for i in tblSeq: regTableName = self.getRegTableName(i) # "db.reg_table_{}".format(i) try: + self.execWtSql(wt, "drop table {}.{}". format(self._db.getName(), regTableName)) # nRows always 0, like MySQL except taos.error.ProgrammingError as err: @@ -2209,6 +2442,17 @@ class TaskDropSuperTable(StateTransitionTask): # Drop the super table itself tblName = self._db.getFixedSuperTableName() + + # drop streams before drop stables + if self._db.getFixedSuperTable().hasStreams(wt.getDbConn()): + self._db.getFixedSuperTable().dropStreams(wt.getDbConn()) + self._db.getFixedSuperTable().dropStreamTables(wt.getDbConn()) + + # drop topics before drop stables + if self._db.getFixedSuperTable().hasTopics(wt.getDbConn()): + self._db.getFixedSuperTable().dropTopics(wt.getDbConn(),self._db.getName(),None) + self._db.getFixedSuperTable().dropTopics(wt.getDbConn(),self._db.getName(),tblName) + try: self.execWtSql(wt, "drop table {}.{}".format(self._db.getName(), tblName)) except taos.error.ProgrammingError as err: From dde87a374601d6511c4ca808dd5696dc8c976ba5 Mon Sep 17 00:00:00 2001 From: "wenzhouwww@live.cn" Date: Wed, 9 Nov 2022 18:21:43 +0800 Subject: [PATCH 09/20] update --- tests/pytest/crash_gen/crash_gen_main.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/pytest/crash_gen/crash_gen_main.py b/tests/pytest/crash_gen/crash_gen_main.py index 6c52b459ca..dbaa8b8ef2 100755 --- a/tests/pytest/crash_gen/crash_gen_main.py +++ b/tests/pytest/crash_gen/crash_gen_main.py @@ -1372,6 +1372,7 @@ class Task(): 0x03D3, # Conflict transaction not completed 0x0707, # Query not ready , it always occur at replica 3 0x707, # Query not ready + 0x396, # Database in creating status From 05f0ac62e7c5d6a824c24a258841b1c9fadf7254 Mon Sep 17 00:00:00 2001 From: "wenzhouwww@live.cn" Date: Wed, 9 Nov 2022 18:32:31 +0800 Subject: [PATCH 10/20] update --- tests/pytest/crash_gen/crash_gen_main.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/pytest/crash_gen/crash_gen_main.py b/tests/pytest/crash_gen/crash_gen_main.py index dbaa8b8ef2..eeffe15a88 100755 --- a/tests/pytest/crash_gen/crash_gen_main.py +++ b/tests/pytest/crash_gen/crash_gen_main.py @@ -1373,7 +1373,7 @@ class Task(): 0x0707, # Query not ready , it always occur at replica 3 0x707, # Query not ready 0x396, # Database in creating status - + 0x386, # Database in droping status 1000 # REST catch-all error @@ -2052,7 +2052,7 @@ class TdSuperTable: time.sleep(5) # consumer work only 5 sec ,and then it will exit try: consumer.unsubscribe() - except Exception as e : + except TmqError as e : pass return From 3e133a8661ab4b15fc09482b054787f3079279ab Mon Sep 17 00:00:00 2001 From: "wenzhouwww@live.cn" Date: Thu, 10 Nov 2022 09:35:22 +0800 Subject: [PATCH 11/20] updaate --- tests/pytest/crash_gen/crash_gen_main.py | 32 +++++++++++++++--------- 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/tests/pytest/crash_gen/crash_gen_main.py b/tests/pytest/crash_gen/crash_gen_main.py index eeffe15a88..18c3957630 100755 --- a/tests/pytest/crash_gen/crash_gen_main.py +++ b/tests/pytest/crash_gen/crash_gen_main.py @@ -255,7 +255,7 @@ class WorkerThread: class ThreadCoordinator: - WORKER_THREAD_TIMEOUT = 1200 # Normal: 120 + WORKER_THREAD_TIMEOUT = 120 # Normal: 120 def __init__(self, pool: ThreadPool, dbManager: DbManager): self._curStep = -1 # first step is 0 @@ -1374,6 +1374,7 @@ class Task(): 0x707, # Query not ready 0x396, # Database in creating status 0x386, # Database in droping status + 0x03E1, # failed on tmq_subscribe ,topic not exist 1000 # REST catch-all error @@ -1905,18 +1906,23 @@ class TaskCreateConsumers(StateTransitionTask): @classmethod def canBeginFrom(cls, state: AnyState): - return state.canCreateConsumers() + return state.canCreateConsumers() def _executeInternal(self, te: TaskExecutor, wt: WorkerThread): - dbname = self._db.getName() - - sTable = self._db.getFixedSuperTable() # type: TdSuperTable - # wt.execSql("use db") # should always be in place - # create Consumers - if Dice.throw(50)==0: # because subscribe is cost so much time , Reduce frequency of this task - if sTable.hasTopics(wt.getDbConn()): - sTable.createConsumer(wt.getDbConn(),random.randint(1,10)) + if Config.getConfig().connector_type == 'native': + dbname = self._db.getName() + + sTable = self._db.getFixedSuperTable() # type: TdSuperTable + # wt.execSql("use db") # should always be in place + + # create Consumers + if Dice.throw(50)==0: # because subscribe is cost so much time , Reduce frequency of this task + if sTable.hasTopics(wt.getDbConn()): + sTable.createConsumer(wt.getDbConn(),random.randint(1,10)) + else: + print(" restful not support tmq consumers") + return class TaskCreateSuperTable(StateTransitionTask): @@ -2048,7 +2054,10 @@ class TdSuperTable: topic_list = TaosTmqList() for topic in current_topic_list: topic_list.append(topic) - consumer.subscribe(topic_list) + try: + consumer.subscribe(topic_list) + except TmqError as e : + pass time.sleep(5) # consumer work only 5 sec ,and then it will exit try: consumer.unsubscribe() @@ -3326,4 +3335,3 @@ class Container(): return self._verifyValidProperty(name) self._cargo[name] = value - From 86b5b89531772a795f179c851f16b7ff5f282366 Mon Sep 17 00:00:00 2001 From: "wenzhouwww@live.cn" Date: Thu, 10 Nov 2022 10:37:16 +0800 Subject: [PATCH 12/20] update --- tests/pytest/crash_gen/crash_gen_main.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/pytest/crash_gen/crash_gen_main.py b/tests/pytest/crash_gen/crash_gen_main.py index 18c3957630..7d5d1c0c8a 100755 --- a/tests/pytest/crash_gen/crash_gen_main.py +++ b/tests/pytest/crash_gen/crash_gen_main.py @@ -1367,6 +1367,7 @@ class Task(): 0x2603, # Table does not exist, replaced by 2662 below 0x260d, # Tags number not matched 0x2662, # Table does not exist #TODO: what about 2603 above? + 0x2600, # database not specified, SQL: show stables , database droped , and show tables 0x032C, # Object is creating 0x032D, # Object is dropping 0x03D3, # Conflict transaction not completed From 20071f09a729da0280c0ca82f76b410463f5e30a Mon Sep 17 00:00:00 2001 From: "wenzhouwww@live.cn" Date: Thu, 10 Nov 2022 20:23:00 +0800 Subject: [PATCH 13/20] update --- tests/pytest/crash_gen/crash_gen_main.py | 218 ++++++++++++++++++----- tests/pytest/crash_gen/shared/db.py | 24 ++- 2 files changed, 196 insertions(+), 46 deletions(-) diff --git a/tests/pytest/crash_gen/crash_gen_main.py b/tests/pytest/crash_gen/crash_gen_main.py index 7d5d1c0c8a..ba501c3e78 100755 --- a/tests/pytest/crash_gen/crash_gen_main.py +++ b/tests/pytest/crash_gen/crash_gen_main.py @@ -420,10 +420,12 @@ class ThreadCoordinator: except threading.BrokenBarrierError as err: self._execStats.registerFailure("Aborted due to worker thread timeout") Logging.error("\n") + Logging.error("Main loop aborted, caused by worker thread(s) time-out of {} seconds".format( ThreadCoordinator.WORKER_THREAD_TIMEOUT)) Logging.error("TAOS related threads blocked at (stack frames top-to-bottom):") ts = ThreadStacks() + ts.record_current_time(time.time()) # record thread exit time at current moment ts.print(filterInternal=True) workerTimeout = True @@ -547,7 +549,12 @@ class ThreadCoordinator: # pick a task type for current state db = self.pickDatabase() - taskType = db.getStateMachine().pickTaskType() # dynamic name of class + if Dice.throw(2)==1: + taskType = db.getStateMachine().pickTaskType() # dynamic name of class + else: + taskType = db.getStateMachine().balance_pickTaskType() # and an method can get balance task types + pass + return taskType(self._execStats, db) # create a task from it def resetExecutedTasks(self): @@ -679,6 +686,8 @@ class AnyState: CAN_CREATE_TOPIC = 3 # super table must exists CAN_CREATE_CONSUMERS = 3 CAN_DROP_FIXED_SUPER_TABLE = 4 + CAN_DROP_TOPIC = 4 + CAN_DROP_STREAM = 4 CAN_ADD_DATA = 5 CAN_READ_DATA = 6 CAN_DELETE_DATA = 6 @@ -734,13 +743,19 @@ class AnyState: def canCreateTopic(self): return self._info[self.CAN_CREATE_TOPIC] + + def canDropTopic(self): + return self._info[self.CAN_DROP_TOPIC] def canCreateConsumers(self): return self._info[self.CAN_CREATE_CONSUMERS] - def canCreateStream(self): + def canCreateStreams(self): return self._info[self.CAN_CREATE_STREAM] + def canDropStream(self): + return self._info[self.CAN_DROP_STREAM] + def canAddData(self): return self._info[self.CAN_ADD_DATA] @@ -919,7 +934,7 @@ class StateHasData(AnyState): ): # only if we didn't create one # we shouldn't have dropped it self.assertNoTask(tasks, TaskDropDb) - if (not self.hasTask(tasks, TaskCreateSuperTable) + if not( self.hasTask(tasks, TaskCreateSuperTable) ): # if we didn't create the table # we should not have a task that drops it self.assertNoTask(tasks, TaskDropSuperTable) @@ -1075,6 +1090,28 @@ class StateMechine: # Logging.debug(" (weighted random:{}/{}) ".format(i, len(taskTypes))) return taskTypes[i] + def balance_pickTaskType(self): + # all the task types we can choose from at curent state + BasicTypes = self.getTaskTypes() + weightsTypes = BasicTypes.copy() + + # this matrixs can balance the Frequency of different types of tasks + weight_matrixs = {'TaskDropDb': 5 , 'TaskDropTopics': 20 , 'TaskDropStreams':10 , 'TaskDropStreamTables':10 , + 'TaskReadData':50 , 'TaskDropSuperTable':5 , 'TaskAlterTags':3 , 'TaskAddData':10, + 'TaskDeleteData':10 , 'TaskCreateDb':10 , 'TaskCreateStream': 3, 'TaskCreateTopic' :3, + 'TaskCreateConsumers':10, 'TaskCreateSuperTable': 10 } # task type : weghts matrixs + + for task , weights in weight_matrixs.items(): + + for basicType in BasicTypes: + if basicType.__name__ == task: + for _ in range(weights): + weightsTypes.append(basicType) + + task = random.sample(weightsTypes,1) + return task[0] + + # ref: # https://eli.thegreenplace.net/2010/01/22/weighted-random-generation-in-python/ def _weighted_choice_sub(self, weights) -> int: @@ -1376,6 +1413,10 @@ class Task(): 0x396, # Database in creating status 0x386, # Database in droping status 0x03E1, # failed on tmq_subscribe ,topic not exist + 0x03ed , # Topic must be dropped first, SQL: drop database db_0 + 0x0203 , # Invalid value + + 1000 # REST catch-all error @@ -1693,19 +1734,24 @@ class TaskDropDb(StateTransitionTask): # drop topics before drop db - if self._db.getFixedSuperTable().hasTopics(wt.getDbConn()): + # if self._db.getFixedSuperTable().hasTopics(wt.getDbConn()): - self._db.getFixedSuperTable().dropTopics(wt.getDbConn(),self._db.getName(),None) - self._db.getFixedSuperTable().dropTopics(wt.getDbConn(),self._db.getName(),self._db.getFixedSuperTableName) - - self.execWtSql(wt, "drop database {}".format(self._db.getName())) + # self._db.getFixedSuperTable().dropTopics(wt.getDbConn(),self._db.getName(),None) + # self._db.getFixedSuperTable().dropTopics(wt.getDbConn(),self._db.getName(),self._db.getFixedSuperTableName) + try: + self.queryWtSql(wt, "drop database {}".format(self._db.getName())) # drop database maybe failed ,because topic exists + except taos.error.ProgrammingError as err: + errno = Helper.convertErrno(err.errno) + if errno in [0x0203]: # drop maybe failed + pass Logging.debug("[OPS] database dropped at {}".format(time.time())) -''' + # Streams will generator TD-20237 (it will crash taosd , start this task when this issue fixed ) + class TaskCreateStream(StateTransitionTask): @classmethod @@ -1714,7 +1760,7 @@ class TaskCreateStream(StateTransitionTask): @classmethod def canBeginFrom(cls, state: AnyState): - return state.canCreateStream() + return state.canCreateStreams() def _executeInternal(self, te: TaskExecutor, wt: WorkerThread): dbname = self._db.getName() @@ -1783,7 +1829,6 @@ class TaskCreateStream(StateTransitionTask): if errno in [0x03f0]: # stream already exists # stream need drop before drop table pass -''' class TaskCreateTopic(StateTransitionTask): @@ -1855,7 +1900,7 @@ class TaskCreateTopic(StateTransitionTask): Logging.debug("[OPS] topic is creating at {}".format(time.time())) except taos.error.ProgrammingError as err: errno = Helper.convertErrno(err.errno) - if errno in [0x03f0]: # topic already exists + if errno in [0x03f0 ]: # topic already exists # topic need drop before drop table pass @@ -1877,7 +1922,7 @@ class TaskCreateTopic(StateTransitionTask): topic_sql = 'create topic {} AS STABLE {}.{} '.format(stable_topic,dbname,stbname) try: self.execWtSql(wt, "use {}".format(dbname)) - self.execWtSql(wt, topic_sql) + self.queryWtSql(wt, topic_sql) Logging.debug("[OPS] stable topic is creating at {}".format(time.time())) except taos.error.ProgrammingError as err: errno = Helper.convertErrno(err.errno) @@ -1897,7 +1942,81 @@ class TaskCreateTopic(StateTransitionTask): pass else: pass - + +class TaskDropTopics(StateTransitionTask): + + @classmethod + def getEndState(cls): + return StateHasData() + + @classmethod + def canBeginFrom(cls, state: AnyState): + return state.canDropTopic() + + def _executeInternal(self, te: TaskExecutor, wt: WorkerThread): + dbname = self._db.getName() + + + if not self._db.exists(wt.getDbConn()): + Logging.debug("Skipping task, no DB yet") + return + + sTable = self._db.getFixedSuperTable() # type: TdSuperTable + # wt.execSql("use db") # should always be in place + tblName = sTable.getName() + if sTable.hasTopics(wt.getDbConn()): + sTable.dropTopics(wt.getDbConn(),dbname,None) # drop topics of database + sTable.dropTopics(wt.getDbConn(),dbname,tblName) # drop topics of stable + +class TaskDropStreams(StateTransitionTask): + + @classmethod + def getEndState(cls): + return StateHasData() + + @classmethod + def canBeginFrom(cls, state: AnyState): + return state.canDropStream() + + def _executeInternal(self, te: TaskExecutor, wt: WorkerThread): + # dbname = self._db.getName() + + + if not self._db.exists(wt.getDbConn()): + Logging.debug("Skipping task, no DB yet") + return + + sTable = self._db.getFixedSuperTable() # type: TdSuperTable + # wt.execSql("use db") # should always be in place + # tblName = sTable.getName() + if sTable.hasStreams(wt.getDbConn()): + sTable.dropStreams(wt.getDbConn()) # drop stream of database + # sTable.dropStreamTables(wt.getDbConn()) # drop streamtables of stable + +class TaskDropStreamTables(StateTransitionTask): + + @classmethod + def getEndState(cls): + return StateHasData() + + @classmethod + def canBeginFrom(cls, state: AnyState): + return state.canDropStream() + + def _executeInternal(self, te: TaskExecutor, wt: WorkerThread): + # dbname = self._db.getName() + + + if not self._db.exists(wt.getDbConn()): + Logging.debug("Skipping task, no DB yet") + return + + sTable = self._db.getFixedSuperTable() # type: TdSuperTable + # wt.execSql("use db") # should always be in place + # tblName = sTable.getName() + if sTable.hasStreamTables(wt.getDbConn()): + # sTable.dropStreams(wt.getDbConn()) + sTable.dropStreamTables(wt.getDbConn()) # drop stream tables class TaskCreateConsumers(StateTransitionTask): @@ -1918,9 +2037,10 @@ class TaskCreateConsumers(StateTransitionTask): # wt.execSql("use db") # should always be in place # create Consumers - if Dice.throw(50)==0: # because subscribe is cost so much time , Reduce frequency of this task - if sTable.hasTopics(wt.getDbConn()): - sTable.createConsumer(wt.getDbConn(),random.randint(1,10)) + # if Dice.throw(50)==0: # because subscribe is cost so much time , Reduce frequency of this task + if sTable.hasTopics(wt.getDbConn()): + sTable.createConsumer(wt.getDbConn(),random.randint(1,10)) + pass else: print(" restful not support tmq consumers") return @@ -1968,17 +2088,17 @@ class TdSuperTable: dbName = self._dbName if self.exists(dbc) : # if myself exists fullTableName = dbName + '.' + self._stName - if self.hasStreams(dbc): - self.dropStreams(dbc) - self.dropStreamTables(dbc) - if self.hasTopics(dbc): - self.dropTopics(dbName,None) - self.dropTopics(dbName,self._stName) + # if self.hasStreams(dbc): + # self.dropStreams(dbc) + # self.dropStreamTables(dbc) + # if self.hasTopics(dbc): + # self.dropTopics(dbName,None) + # self.dropTopics(dbName,self._stName) try: dbc.execute("DROP TABLE {}".format(fullTableName)) except taos.error.ProgrammingError as err: errno = Helper.convertErrno(err.errno) - if errno in [1011,0x3F3,0x03f3,0x2662]: # table doesn't exist + if errno in [1011,0x3F3,0x03f3,0x2662]: # table doesn't exist # Stream must be dropped first, SQL: DROP TABLE db_0.fs_table pass # # stream need drop before drop table # for stream in self.getStreamName(): @@ -2011,17 +2131,21 @@ class TdSuperTable: if dbc.existsSuperTable(self._stName): if dropIfExists: - if self.hasStreams(dbc): - self.dropStreams(dbc) - self.dropStreamTables(dbc) + # if self.hasStreams(dbc): + # self.dropStreams(dbc) + # self.dropStreamTables(dbc) - # drop topics before drop stables - if self.hasTopics(dbc): - self.dropTopics(dbc,self._dbName,None) - self.dropTopics(dbc,self._dbName,self._stName ) + # # drop topics before drop stables + # if self.hasTopics(dbc): + # self.dropTopics(dbc,self._dbName,None) + # self.dropTopics(dbc,self._dbName,self._stName ) - - dbc.execute("DROP TABLE {}".format(fullTableName)) + try: + dbc.execute("DROP TABLE {}".format(fullTableName)) + except taos.error.ProgrammingError as err: + errno = Helper.convertErrno(err.errno) + if errno in [1011,0x3F3,0x03f3,0x2662]: # table doesn't exist # Stream must be dropped first, SQL: DROP TABLE db_0.fs_table + pass pass # dbc.execute("DROP TABLE {}".format(fullTableName)) @@ -2124,6 +2248,7 @@ class TdSuperTable: if dbname in topic[0] and topic[0].startswith("database"): try: dbc.execute('drop topic {}'.format(topic[0])) + Logging.debug("[OPS] topic {} is droping at {}".format(topic,time.time())) except taos.error.ProgrammingError as err: errno = Helper.convertErrno(err.errno) if errno in [0x03EB]: # Topic subscribed cannot be dropped @@ -2139,6 +2264,7 @@ class TdSuperTable: for topic in topics: if topic[0].startswith(self._dbName) and topic[0].endswith('topic'): dbc.execute('drop topic {}'.format(topic[0])) + Logging.debug("[OPS] topic {} is droping at {}".format(topic,time.time())) return True else: return True @@ -2454,15 +2580,16 @@ class TaskDropSuperTable(StateTransitionTask): # Drop the super table itself tblName = self._db.getFixedSuperTableName() - # drop streams before drop stables - if self._db.getFixedSuperTable().hasStreams(wt.getDbConn()): - self._db.getFixedSuperTable().dropStreams(wt.getDbConn()) - self._db.getFixedSuperTable().dropStreamTables(wt.getDbConn()) + # # drop streams before drop stables + # if self._db.getFixedSuperTable().hasStreams(wt.getDbConn()): + # self._db.getFixedSuperTable().dropStreams(wt.getDbConn()) + # self._db.getFixedSuperTable().dropStreamTables(wt.getDbConn()) + + # # drop topics before drop stables + # if self._db.getFixedSuperTable().hasTopics(wt.getDbConn()): + # self._db.getFixedSuperTable().dropTopics(wt.getDbConn(),self._db.getName(),None) + # self._db.getFixedSuperTable().dropTopics(wt.getDbConn(),self._db.getName(),tblName) - # drop topics before drop stables - if self._db.getFixedSuperTable().hasTopics(wt.getDbConn()): - self._db.getFixedSuperTable().dropTopics(wt.getDbConn(),self._db.getName(),None) - self._db.getFixedSuperTable().dropTopics(wt.getDbConn(),self._db.getName(),tblName) try: self.execWtSql(wt, "drop table {}.{}".format(self._db.getName(), tblName)) @@ -2948,6 +3075,9 @@ class ThreadStacks: # stack info for all threads shortTid = th.native_id % 10000 #type: ignore self._allStacks[shortTid] = stack # Was using th.native_id + def record_current_time(self,current_time): + self.current_time = current_time + def print(self, filteredEndName = None, filterInternal = False): for shortTid, stack in self._allStacks.items(): # for each thread, stack frames top to bottom lastFrame = stack[-1] @@ -2962,9 +3092,11 @@ class ThreadStacks: # stack info for all threads continue # ignore # Now print print("\n<----- Thread Info for LWP/ID: {} (most recent call last) <-----".format(shortTid)) + lastSqlForThread = DbConn.fetchSqlForThread(shortTid) - time_cost = DbConn.get_time_cost() - print("Last SQL statement attempted from thread {} ({:.4f} sec ago) is: {}".format(shortTid, time_cost ,lastSqlForThread)) + last_sql_commit_time = DbConn.get_save_sql_time(shortTid) + # time_cost = DbConn.get_time_cost() + print("Last SQL statement attempted from thread {} ({:.4f} sec ago) is: {}".format(shortTid, self.current_time-last_sql_commit_time ,lastSqlForThread)) stackFrame = 0 for frame in stack: # was using: reversed(stack) # print(frame) diff --git a/tests/pytest/crash_gen/shared/db.py b/tests/pytest/crash_gen/shared/db.py index f97e4025e5..05711efbc6 100644 --- a/tests/pytest/crash_gen/shared/db.py +++ b/tests/pytest/crash_gen/shared/db.py @@ -32,7 +32,7 @@ class DbConn: # class variables lastSqlFromThreads : dict[int, str] = {} # stored by thread id, obtained from threading.current_thread().ident%10000 spendThreads : dict[int, float] = {} # stored by thread id, obtained from threading.current_thread().ident%10000 - + current_time : dict[int, float] = {} # save current time @classmethod def saveSqlForCurrentThread(cls, sql: str): ''' @@ -44,6 +44,7 @@ class DbConn: th = threading.current_thread() shortTid = th.native_id % 10000 #type: ignore cls.lastSqlFromThreads[shortTid] = sql # Save this for later + cls.record_save_sql_time() @classmethod def fetchSqlForThread(cls, shortTid : int) -> str : @@ -53,6 +54,25 @@ class DbConn: raise CrashGenError("No last-attempted-SQL found for thread id: {}".format(shortTid)) return cls.lastSqlFromThreads[shortTid] + @classmethod + def get_save_sql_time(cls, shortTid : int): + ''' + Let us save the last SQL statement on a per-thread basis, so that when later we + run into a dead-lock situation, we can pick out the deadlocked thread, and use + that information to find what what SQL statement is stuck. + ''' + return cls.current_time[shortTid] + + @classmethod + def record_save_sql_time(cls): + ''' + Let us save the last SQL statement on a per-thread basis, so that when later we + run into a dead-lock situation, we can pick out the deadlocked thread, and use + that information to find what what SQL statement is stuck. + ''' + th = threading.current_thread() + shortTid = th.native_id % 10000 #type: ignore + cls.current_time[shortTid] = float(time.time()) # Save this for later @classmethod def sql_exec_spend(cls, cost: float): @@ -460,7 +480,6 @@ class DbConnNative(DbConn): finally: time_cost = time.time() - time_start self.sql_exec_spend(time_cost) - cls = self.__class__ cls.totalRequests += 1 @@ -541,4 +560,3 @@ class DbManager(): self._dbConn.close() self._dbConn = None Logging.debug("DbManager closed DB connection...") - From 4774696aaeddc6e7eda26a30729f3916922c84c8 Mon Sep 17 00:00:00 2001 From: "wenzhouwww@live.cn" Date: Thu, 10 Nov 2022 20:32:08 +0800 Subject: [PATCH 14/20] update errorno --- tests/pytest/auto_crash_gen.py | 4 ++-- tests/pytest/auto_crash_gen_valgrind.py | 4 ++-- tests/pytest/auto_crash_gen_valgrind_cluster.py | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/pytest/auto_crash_gen.py b/tests/pytest/auto_crash_gen.py index 02cca810a7..9c134e6d64 100755 --- a/tests/pytest/auto_crash_gen.py +++ b/tests/pytest/auto_crash_gen.py @@ -218,11 +218,11 @@ def get_auto_mix_cmds(args_list ,valgrind=valgrind_mode): if valgrind : - crash_gen_cmd = 'cd %s && ./crash_gen.sh --valgrind %s -g 0x32c,0x32d,0x3d3,0x18,0x2501,0x369,0x388,0x061a,0x2550 '%(crash_gen_path ,arguments) + crash_gen_cmd = 'cd %s && ./crash_gen.sh --valgrind %s -g 0x32c,0x32d,0x3d3,0x18,0x2501,0x369,0x388,0x061a,0x2550,0x0203 '%(crash_gen_path ,arguments) else: - crash_gen_cmd = 'cd %s && ./crash_gen.sh %s -g 0x32c,0x32d,0x3d3,0x18,0x2501,0x369,0x388,0x061a,0x2550'%(crash_gen_path ,arguments) + crash_gen_cmd = 'cd %s && ./crash_gen.sh %s -g 0x32c,0x32d,0x3d3,0x18,0x2501,0x369,0x388,0x061a,0x2550,0x0203'%(crash_gen_path ,arguments) return crash_gen_cmd diff --git a/tests/pytest/auto_crash_gen_valgrind.py b/tests/pytest/auto_crash_gen_valgrind.py index 1443dcd543..ce87fec684 100755 --- a/tests/pytest/auto_crash_gen_valgrind.py +++ b/tests/pytest/auto_crash_gen_valgrind.py @@ -219,11 +219,11 @@ def get_auto_mix_cmds(args_list ,valgrind=valgrind_mode): if valgrind : - crash_gen_cmd = 'cd %s && ./crash_gen.sh --valgrind %s -g 0x32c,0x32d,0x3d3,0x18,0x2501,0x369,0x388,0x061a,0x2550 '%(crash_gen_path ,arguments) + crash_gen_cmd = 'cd %s && ./crash_gen.sh --valgrind %s -g 0x32c,0x32d,0x3d3,0x18,0x2501,0x369,0x388,0x061a,0x2550,0x0203 '%(crash_gen_path ,arguments) else: - crash_gen_cmd = 'cd %s && ./crash_gen.sh %s -g 0x32c,0x32d,0x3d3,0x18,0x2501,0x369,0x388,0x061a,0x2550'%(crash_gen_path ,arguments) + crash_gen_cmd = 'cd %s && ./crash_gen.sh %s -g 0x32c,0x32d,0x3d3,0x18,0x2501,0x369,0x388,0x061a,0x2550,0x0203'%(crash_gen_path ,arguments) return crash_gen_cmd diff --git a/tests/pytest/auto_crash_gen_valgrind_cluster.py b/tests/pytest/auto_crash_gen_valgrind_cluster.py index 05cdaa6cc5..f4afa80afe 100755 --- a/tests/pytest/auto_crash_gen_valgrind_cluster.py +++ b/tests/pytest/auto_crash_gen_valgrind_cluster.py @@ -219,11 +219,11 @@ def get_auto_mix_cmds(args_list ,valgrind=valgrind_mode): if valgrind : - crash_gen_cmd = 'cd %s && ./crash_gen.sh --valgrind -i 3 %s -g 0x32c,0x32d,0x3d3,0x18,0x2501,0x369,0x388,0x061a,0x2550,0x0707 '%(crash_gen_path ,arguments) + crash_gen_cmd = 'cd %s && ./crash_gen.sh --valgrind -i 3 %s -g 0x32c,0x32d,0x3d3,0x18,0x2501,0x369,0x388,0x061a,0x2550,0x0707,0x0203 '%(crash_gen_path ,arguments) else: - crash_gen_cmd = 'cd %s && ./crash_gen.sh -i 3 %s -g 0x32c,0x32d,0x3d3,0x18,0x2501,0x369,0x388,0x061a,0x2550,0x0014,0x0707'%(crash_gen_path ,arguments) + crash_gen_cmd = 'cd %s && ./crash_gen.sh -i 3 %s -g 0x32c,0x32d,0x3d3,0x18,0x2501,0x369,0x388,0x061a,0x2550,0x0014,0x0707,0x0203'%(crash_gen_path ,arguments) return crash_gen_cmd From 4eb5c2cae7f146e190276472ec9b173e0b9996c8 Mon Sep 17 00:00:00 2001 From: "wenzhouwww@live.cn" Date: Fri, 11 Nov 2022 13:49:01 +0800 Subject: [PATCH 15/20] update --- tests/pytest/crash_gen/crash_gen_main.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/pytest/crash_gen/crash_gen_main.py b/tests/pytest/crash_gen/crash_gen_main.py index ba501c3e78..4fa19f543d 100755 --- a/tests/pytest/crash_gen/crash_gen_main.py +++ b/tests/pytest/crash_gen/crash_gen_main.py @@ -953,7 +953,7 @@ class StateMechine: except taos.error.ProgrammingError as err: Logging.error("Failed to initialized state machine, cannot find current state: {}".format(err)) traceback.print_stack() - pass # re-throw + raise # re-throw # TODO: seems no lnoger used, remove? def getCurrentState(self): @@ -1095,13 +1095,13 @@ class StateMechine: BasicTypes = self.getTaskTypes() weightsTypes = BasicTypes.copy() - # this matrixs can balance the Frequency of different types of tasks - weight_matrixs = {'TaskDropDb': 5 , 'TaskDropTopics': 20 , 'TaskDropStreams':10 , 'TaskDropStreamTables':10 , + # this matrixs can balance the Frequency of TaskTypes + balance_TaskType_matrixs = {'TaskDropDb': 5 , 'TaskDropTopics': 20 , 'TaskDropStreams':10 , 'TaskDropStreamTables':10 , 'TaskReadData':50 , 'TaskDropSuperTable':5 , 'TaskAlterTags':3 , 'TaskAddData':10, 'TaskDeleteData':10 , 'TaskCreateDb':10 , 'TaskCreateStream': 3, 'TaskCreateTopic' :3, - 'TaskCreateConsumers':10, 'TaskCreateSuperTable': 10 } # task type : weghts matrixs + 'TaskCreateConsumers':10, 'TaskCreateSuperTable': 10 } # TaskType : balance_matrixs of task - for task , weights in weight_matrixs.items(): + for task , weights in balance_TaskType_matrixs.items(): for basicType in BasicTypes: if basicType.__name__ == task: From 9c7913b00b1100cb48845af2cb640fd594a65f71 Mon Sep 17 00:00:00 2001 From: wenzhouwww Date: Wed, 16 Nov 2022 10:39:37 +0800 Subject: [PATCH 16/20] Update crash_gen_main.py --- tests/pytest/crash_gen/crash_gen_main.py | 244 +++++------------------ 1 file changed, 48 insertions(+), 196 deletions(-) diff --git a/tests/pytest/crash_gen/crash_gen_main.py b/tests/pytest/crash_gen/crash_gen_main.py index 4fa19f543d..90d27c3b8d 100755 --- a/tests/pytest/crash_gen/crash_gen_main.py +++ b/tests/pytest/crash_gen/crash_gen_main.py @@ -1415,6 +1415,7 @@ class Task(): 0x03E1, # failed on tmq_subscribe ,topic not exist 0x03ed , # Topic must be dropped first, SQL: drop database db_0 0x0203 , # Invalid value + 0x03f0 , # Stream already exist , topic already exists @@ -1732,12 +1733,6 @@ class TaskDropDb(StateTransitionTask): def _executeInternal(self, te: TaskExecutor, wt: WorkerThread): - # drop topics before drop db - - # if self._db.getFixedSuperTable().hasTopics(wt.getDbConn()): - - # self._db.getFixedSuperTable().dropTopics(wt.getDbConn(),self._db.getName(),None) - # self._db.getFixedSuperTable().dropTopics(wt.getDbConn(),self._db.getName(),self._db.getFixedSuperTableName) try: self.queryWtSql(wt, "drop database {}".format(self._db.getName())) # drop database maybe failed ,because topic exists except taos.error.ProgrammingError as err: @@ -1748,10 +1743,6 @@ class TaskDropDb(StateTransitionTask): Logging.debug("[OPS] database dropped at {}".format(time.time())) - -# Streams will generator TD-20237 (it will crash taosd , start this task when this issue fixed ) - - class TaskCreateStream(StateTransitionTask): @classmethod @@ -1775,60 +1766,25 @@ class TaskCreateStream(StateTransitionTask): sTable = self._db.getFixedSuperTable() # type: TdSuperTable # wt.execSql("use db") # should always be in place - - # create stream - - # CREATE STREAM avg_vol_s INTO avg_vol AS SELECT _wstartts, count(*), avg(voltage) FROM meters PARTITION BY tbname INTERVAL(1m) SLIDING(30s); - stbname =sTable.getName() sub_tables = sTable.getRegTables(wt.getDbConn()) aggExpr = Dice.choice([ - 'count(*)', - 'avg(speed)', - # 'twa(speed)', # TODO: this one REQUIRES a where statement, not reasonable - 'sum(speed)', - 'stddev(speed)', - # SELECTOR functions - 'min(speed)', - 'max(speed)', - 'first(speed)', - 'last(speed)', - 'apercentile(speed, 10)', # TODO: TD-1316 - 'last_row(*)', # TODO: commented out per TD-3231, we should re-create - # Transformation Functions - 'twa(speed)' - - ]) # TODO: add more from 'top' + 'count(*)', 'avg(speed)', 'sum(speed)', 'stddev(speed)','min(speed)', 'max(speed)', 'first(speed)', 'last(speed)', + 'apercentile(speed, 10)', 'last_row(*)', 'twa(speed)']) + + stream_sql = '' # set default value if sub_tables: - - if sub_tables: # if not empty - sub_tbname = sub_tables[0] - # create stream with query above sub_table - stream_sql = 'create stream {} into {}.{} as select {}, avg(speed) FROM {}.{} PARTITION BY tbname INTERVAL(5s) SLIDING(3s) '.format(sub_stream_name,dbname,sub_stream_tb_name ,aggExpr,dbname,sub_tbname) - try: - self.execWtSql(wt, stream_sql) - Logging.debug("[OPS] stream is creating at {}".format(time.time())) - except taos.error.ProgrammingError as err: - errno = Helper.convertErrno(err.errno) - if errno in [0x03f0]: # stream already exists - # stream need drop before drop table - pass - - else: - pass - + sub_tbname = sub_tables[0] + # create stream with query above sub_table + stream_sql = 'create stream {} into {}.{} as select {}, avg(speed) FROM {}.{} PARTITION BY tbname INTERVAL(5s) SLIDING(3s) '.\ + format(sub_stream_name,dbname,sub_stream_tb_name ,aggExpr,dbname,sub_tbname) else: - stream_sql = 'create stream {} into {}.{} as select {}, avg(speed) FROM {}.{} PARTITION BY tbname INTERVAL(5s) SLIDING(3s) '.format(super_stream_name,dbname,super_stream_tb_name,aggExpr, dbname,stbname) + stream_sql = 'create stream {} into {}.{} as select {}, avg(speed) FROM {}.{} PARTITION BY tbname INTERVAL(5s) SLIDING(3s) '.\ + format(super_stream_name,dbname,super_stream_tb_name,aggExpr, dbname,stbname) + self.execWtSql(wt, stream_sql) + Logging.debug("[OPS] stream is creating at {}".format(time.time())) - try: - self.execWtSql(wt, stream_sql) - Logging.debug("[OPS] stream is creating at {}".format(time.time())) - except taos.error.ProgrammingError as err: - errno = Helper.convertErrno(err.errno) - if errno in [0x03f0]: # stream already exists - # stream need drop before drop table - pass class TaskCreateTopic(StateTransitionTask): @@ -1853,96 +1809,37 @@ class TaskCreateTopic(StateTransitionTask): sTable = self._db.getFixedSuperTable() # type: TdSuperTable # wt.execSql("use db") # should always be in place - - # create topic - # create topic if not exists topic_ctb_column as select ts, c1, c2, c3 from stb1; stbname =sTable.getName() sub_tables = sTable.getRegTables(wt.getDbConn()) - scalarExpr = Dice.choice([ '*','speed','color', - 'abs(speed)', - 'acos(speed)', - 'asin(speed)', - 'atan(speed)', - 'ceil(speed)', - 'cos(speed)', - 'cos(speed)', - 'floor(speed)', - 'log(speed,2)', - 'pow(speed,2)', - 'round(speed)', - 'sin(speed)', - 'sqrt(speed)', - 'char_length(color)', - 'concat(color,color)', - 'concat_ws(" ", color,color," ")', - 'length(color)', - 'lower(color)', - 'ltrim(color)', - 'substr(color , 2)', - 'upper(color)', - 'cast(speed as double)', - 'cast(ts as bigint)', - - ]) # TODO: add more from 'top' - if Dice.throw(3)==0: - if sub_tables: - - if sub_tables: # if not empty - sub_tbname = sub_tables[0] - # create stream with query above sub_table - topic_sql = 'create topic {} as select {} FROM {}.{} ; '.format(sub_topic_name,scalarExpr,dbname,sub_tbname) - try: - self.execWtSql(wt, "use {}".format(dbname)) - self.execWtSql(wt, topic_sql) - Logging.debug("[OPS] topic is creating at {}".format(time.time())) - except taos.error.ProgrammingError as err: - errno = Helper.convertErrno(err.errno) - if errno in [0x03f0 ]: # topic already exists - # topic need drop before drop table - pass - - else: - pass - - else: + scalarExpr = Dice.choice([ '*','speed','color','abs(speed)','acos(speed)','asin(speed)','atan(speed)','ceil(speed)','cos(speed)','cos(speed)', + 'floor(speed)','log(speed,2)','pow(speed,2)','round(speed)','sin(speed)','sqrt(speed)','char_length(color)','concat(color,color)', + 'concat_ws(" ", color,color," ")','length(color)', 'lower(color)', 'ltrim(color)','substr(color , 2)','upper(color)','cast(speed as double)', + 'cast(ts as bigint)']) + topic_sql = '' # set default value + if Dice.throw(3)==0: # create topic : source data from sub query + if sub_tables: # if not empty + sub_tbname = sub_tables[0] + # create topic : source data from sub query of sub stable + topic_sql = 'create topic {} as select {} FROM {}.{} ; '.format(sub_topic_name,scalarExpr,dbname,sub_tbname) + + else: # create topic : source data from sub query of stable topic_sql = 'create topic {} as select {} FROM {}.{} '.format(super_topic_name,scalarExpr, dbname,stbname) - try: - self.execWtSql(wt, "use {}".format(dbname)) - self.execWtSql(wt, topic_sql) - Logging.debug("[OPS] subquery topic is creating at {}".format(time.time())) - except taos.error.ProgrammingError as err: - errno = Helper.convertErrno(err.errno) - if errno in [0x03f0]: # topic already exists - # topic need drop before drop table - pass - elif Dice.throw(3)==1: + elif Dice.throw(3)==1: # create topic : source data from super table topic_sql = 'create topic {} AS STABLE {}.{} '.format(stable_topic,dbname,stbname) - try: - self.execWtSql(wt, "use {}".format(dbname)) - self.queryWtSql(wt, topic_sql) - Logging.debug("[OPS] stable topic is creating at {}".format(time.time())) - except taos.error.ProgrammingError as err: - errno = Helper.convertErrno(err.errno) - if errno in [0x03f0]: # topic already exists - # topic need drop before drop table - pass - elif Dice.throw(3)==2: - topic_sql = 'create topic {} AS DATABASE {} '.format(db_topic,dbname) - try: - self.execWtSql(wt, "use {}".format(dbname)) - self.execWtSql(wt, topic_sql) - Logging.debug("[OPS] db topic is creating at {}".format(time.time())) - except taos.error.ProgrammingError as err: - errno = Helper.convertErrno(err.errno) - if errno in [0x03f0]: # topic already exists - # topic need drop before drop table - pass + + elif Dice.throw(3)==2: # create topic : source data from whole database + topic_sql = 'create topic {} AS DATABASE {} '.format(db_topic,dbname) else: pass + # exec create topics + self.execWtSql(wt, "use {}".format(dbname)) + self.execWtSql(wt, topic_sql) + Logging.debug("[OPS] db topic is creating at {}".format(time.time())) + class TaskDropTopics(StateTransitionTask): @classmethod @@ -1991,7 +1888,6 @@ class TaskDropStreams(StateTransitionTask): # tblName = sTable.getName() if sTable.hasStreams(wt.getDbConn()): sTable.dropStreams(wt.getDbConn()) # drop stream of database - # sTable.dropStreamTables(wt.getDbConn()) # drop streamtables of stable class TaskDropStreamTables(StateTransitionTask): @@ -2012,10 +1908,9 @@ class TaskDropStreamTables(StateTransitionTask): return sTable = self._db.getFixedSuperTable() # type: TdSuperTable - # wt.execSql("use db") # should always be in place + wt.execSql("use db") # should always be in place # tblName = sTable.getName() if sTable.hasStreamTables(wt.getDbConn()): - # sTable.dropStreams(wt.getDbConn()) sTable.dropStreamTables(wt.getDbConn()) # drop stream tables class TaskCreateConsumers(StateTransitionTask): @@ -2031,13 +1926,9 @@ class TaskCreateConsumers(StateTransitionTask): def _executeInternal(self, te: TaskExecutor, wt: WorkerThread): if Config.getConfig().connector_type == 'native': - dbname = self._db.getName() - + sTable = self._db.getFixedSuperTable() # type: TdSuperTable # wt.execSql("use db") # should always be in place - - # create Consumers - # if Dice.throw(50)==0: # because subscribe is cost so much time , Reduce frequency of this task if sTable.hasTopics(wt.getDbConn()): sTable.createConsumer(wt.getDbConn(),random.randint(1,10)) pass @@ -2088,30 +1979,13 @@ class TdSuperTable: dbName = self._dbName if self.exists(dbc) : # if myself exists fullTableName = dbName + '.' + self._stName - # if self.hasStreams(dbc): - # self.dropStreams(dbc) - # self.dropStreamTables(dbc) - # if self.hasTopics(dbc): - # self.dropTopics(dbName,None) - # self.dropTopics(dbName,self._stName) + try: dbc.execute("DROP TABLE {}".format(fullTableName)) except taos.error.ProgrammingError as err: errno = Helper.convertErrno(err.errno) if errno in [1011,0x3F3,0x03f3,0x2662]: # table doesn't exist # Stream must be dropped first, SQL: DROP TABLE db_0.fs_table pass - # # stream need drop before drop table - # for stream in self.getStreamName(): - # drop_stream_sql = 'drop stream {}'.format(stream) - # try: - # dbc.execute(drop_stream_sql) - # except taos.error.ProgrammingError as err: - # # correcting for strange error number scheme - # errno3 = Helper.convertErrno(err.errno) - # if errno3 in [1011,0x3F3,0x03f3,0x2662,0x03f1]: # stream not exists - # pass - # dbc.execute("DROP TABLE {}".format(fullTableName)) - # pass else: if not skipCheck: @@ -2131,15 +2005,6 @@ class TdSuperTable: if dbc.existsSuperTable(self._stName): if dropIfExists: - # if self.hasStreams(dbc): - # self.dropStreams(dbc) - # self.dropStreamTables(dbc) - - # # drop topics before drop stables - # if self.hasTopics(dbc): - # self.dropTopics(dbc,self._dbName,None) - # self.dropTopics(dbc,self._dbName,self._stName ) - try: dbc.execute("DROP TABLE {}".format(fullTableName)) except taos.error.ProgrammingError as err: @@ -2164,7 +2029,7 @@ class TdSuperTable: sql += " TAGS (dummy int) " dbc.execute(sql) - def createConsumer(self, dbc: DbConn , Consumer_nums): + def createConsumer(self, dbc,Consumer_nums): def generateConsumer(current_topic_list): conf = TaosTmqConf() @@ -2183,7 +2048,14 @@ class TdSuperTable: consumer.subscribe(topic_list) except TmqError as e : pass - time.sleep(5) # consumer work only 5 sec ,and then it will exit + + # consumer work only 30 sec + time_start = time.time() + while 1: + res = consumer.poll(1000) + if time.time() - time_start >5 : + break + # time.sleep(10) try: consumer.unsubscribe() except TmqError as e : @@ -2579,29 +2451,9 @@ class TaskDropSuperTable(StateTransitionTask): # Drop the super table itself tblName = self._db.getFixedSuperTableName() - - # # drop streams before drop stables - # if self._db.getFixedSuperTable().hasStreams(wt.getDbConn()): - # self._db.getFixedSuperTable().dropStreams(wt.getDbConn()) - # self._db.getFixedSuperTable().dropStreamTables(wt.getDbConn()) + self.execWtSql(wt, "drop table {}.{}".format(self._db.getName(), tblName)) + - # # drop topics before drop stables - # if self._db.getFixedSuperTable().hasTopics(wt.getDbConn()): - # self._db.getFixedSuperTable().dropTopics(wt.getDbConn(),self._db.getName(),None) - # self._db.getFixedSuperTable().dropTopics(wt.getDbConn(),self._db.getName(),tblName) - - - try: - self.execWtSql(wt, "drop table {}.{}".format(self._db.getName(), tblName)) - except taos.error.ProgrammingError as err: - # correcting for strange error number scheme - errno2 = Helper.convertErrno(err.errno) - if (errno2 in [0x362]): # mnode invalid table name - isSuccess = False - Logging.debug("[DB] Acceptable error when dropping a table") - elif errno2 in [1011,0x3F3,0x03f3]: # table doesn't exist - - pass class TaskAlterTags(StateTransitionTask): From ea591977bcd22ed2163bf53bb22e348cbbb3d5ca Mon Sep 17 00:00:00 2001 From: wenzhouwww Date: Tue, 22 Nov 2022 15:48:47 +0800 Subject: [PATCH 17/20] Update crash_gen_main.py --- tests/pytest/crash_gen/crash_gen_main.py | 39 +++++++++--------------- 1 file changed, 15 insertions(+), 24 deletions(-) diff --git a/tests/pytest/crash_gen/crash_gen_main.py b/tests/pytest/crash_gen/crash_gen_main.py index 90d27c3b8d..f8c5f970c5 100755 --- a/tests/pytest/crash_gen/crash_gen_main.py +++ b/tests/pytest/crash_gen/crash_gen_main.py @@ -1978,15 +1978,8 @@ class TdSuperTable: def drop(self, dbc, skipCheck = False): dbName = self._dbName if self.exists(dbc) : # if myself exists - fullTableName = dbName + '.' + self._stName - - try: - dbc.execute("DROP TABLE {}".format(fullTableName)) - except taos.error.ProgrammingError as err: - errno = Helper.convertErrno(err.errno) - if errno in [1011,0x3F3,0x03f3,0x2662]: # table doesn't exist # Stream must be dropped first, SQL: DROP TABLE db_0.fs_table - pass - + fullTableName = dbName + '.' + self._stName + dbc.execute("DROP TABLE {}".format(fullTableName)) else: if not skipCheck: raise CrashGenError("Cannot drop non-existant super table: {}".format(self._stName)) @@ -2004,16 +1997,9 @@ class TdSuperTable: fullTableName = dbName + '.' + self._stName if dbc.existsSuperTable(self._stName): - if dropIfExists: - try: - dbc.execute("DROP TABLE {}".format(fullTableName)) - except taos.error.ProgrammingError as err: - errno = Helper.convertErrno(err.errno) - if errno in [1011,0x3F3,0x03f3,0x2662]: # table doesn't exist # Stream must be dropped first, SQL: DROP TABLE db_0.fs_table - pass - - pass - # dbc.execute("DROP TABLE {}".format(fullTableName)) + if dropIfExists: + dbc.execute("DROP TABLE {}".format(fullTableName)) + else: # error raise CrashGenError("Cannot create super table, already exists: {}".format(self._stName)) @@ -2049,13 +2035,12 @@ class TdSuperTable: except TmqError as e : pass - # consumer work only 30 sec + # consumer with random work life time_start = time.time() while 1: res = consumer.poll(1000) - if time.time() - time_start >5 : + if time.time() - time_start >random.randint(5,50) : break - # time.sleep(10) try: consumer.unsubscribe() except TmqError as e : @@ -2435,11 +2420,15 @@ class TaskDropSuperTable(StateTransitionTask): for i in tblSeq: regTableName = self.getRegTableName(i) # "db.reg_table_{}".format(i) try: - self.execWtSql(wt, "drop table {}.{}". format(self._db.getName(), regTableName)) # nRows always 0, like MySQL except taos.error.ProgrammingError as err: - pass + # correcting for strange error number scheme + errno2 = Helper.convertErrno(err.errno) + if (errno2 in [0x362]): # mnode invalid table name + isSuccess = False + Logging.debug("[DB] Acceptable error when dropping a table") + continue # try to delete next regular table if (not tickOutput): @@ -2957,6 +2946,8 @@ class ThreadStacks: # stack info for all threads print(" {}".format(frame.line)) stackFrame += 1 print("-----> End of Thread Info ----->\n") + if self.current_time-last_sql_commit_time >100: # dead lock occured + print("maybe dead locked of thread {} ".format(shortTid)) class ClientManager: def __init__(self): From 7a81c2e2d0de73a45eff5e0a79204d6ae51c7227 Mon Sep 17 00:00:00 2001 From: wenzhouwww Date: Tue, 22 Nov 2022 15:49:18 +0800 Subject: [PATCH 18/20] Update db.py From a6ac48700941b1084f386ac034b9f47ec8f1ccf5 Mon Sep 17 00:00:00 2001 From: wenzhouwww Date: Tue, 22 Nov 2022 15:50:32 +0800 Subject: [PATCH 19/20] Update crash_gen_main.py From ca3c1f714749851f99f26e8d4f6483c57f30a041 Mon Sep 17 00:00:00 2001 From: wenzhouwww Date: Tue, 22 Nov 2022 16:08:14 +0800 Subject: [PATCH 20/20] add docker run exec script --- tests/pytest/docker_exec_service.sh | 31 +++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 tests/pytest/docker_exec_service.sh diff --git a/tests/pytest/docker_exec_service.sh b/tests/pytest/docker_exec_service.sh new file mode 100644 index 0000000000..4156a0bae5 --- /dev/null +++ b/tests/pytest/docker_exec_service.sh @@ -0,0 +1,31 @@ +#!/bin/bash + +mkdir -p /data/wz/crash_gen_logs/ +logdir='/data/wz/crash_gen_logs/' +date_tag=`date +%Y%m%d-%H%M%S` +hostname='vm_valgrind_' + +for i in {1..50} +do + echo $i + # create docker and start crash_gen + log_dir=${logdir}${hostname}${date_tag}_${i} + docker run -d --hostname=${hostname}${date_tag}_${i} --name ${hostname}${date_tag}_${i} --privileged -v ${log_dir}:/corefile/ -- crash_gen:v1.0 sleep 99999999999999 + echo create docker ${hostname}${date_tag}_${i} + docker exec -d ${hostname}${date_tag}_${i} sh -c 'rm -rf /home/taos-connector-python' + docker cp /data/wz/TDengine ${hostname}${date_tag}_${i}:/home/TDengine + docker cp /data/wz/taos-connector-python ${hostname}${date_tag}_${i}:/home/taos-connector-python + echo copy TDengine in container done! + docker exec ${hostname}${date_tag}_${i} sh -c 'sh /home/TDengine/tests/pytest/auto_run_valgrind.sh ' + if [ $? -eq 0 ] + then + echo crash_gen exit as expect , run success + + # # clear docker which success + docker stop ${hostname}${date_tag}_${i} + docker rm -f ${hostname}${date_tag}_${i} + else + docker stop ${hostname}${date_tag}_${i} + echo crash_gen exit error , run failed + fi +done \ No newline at end of file