From bd643aaefd63e1073525ea3f4dbc5e7d283b9dff Mon Sep 17 00:00:00 2001 From: "wenzhouwww@live.cn" Date: Wed, 20 Jul 2022 16:43:46 +0800 Subject: [PATCH 1/9] test:add vnode test case --- .../4dnode1mnode_basic_createDb_replica1.py | 138 ++++++++++++++ ...4dnode1mnode_basic_replica1_insertdatas.py | 179 ++++++++++++++++++ 2 files changed, 317 insertions(+) create mode 100644 tests/system-test/6-cluster/vnode/4dnode1mnode_basic_createDb_replica1.py create mode 100644 tests/system-test/6-cluster/vnode/4dnode1mnode_basic_replica1_insertdatas.py diff --git a/tests/system-test/6-cluster/vnode/4dnode1mnode_basic_createDb_replica1.py b/tests/system-test/6-cluster/vnode/4dnode1mnode_basic_createDb_replica1.py new file mode 100644 index 0000000000..28d5b47b4f --- /dev/null +++ b/tests/system-test/6-cluster/vnode/4dnode1mnode_basic_createDb_replica1.py @@ -0,0 +1,138 @@ +# author : wenzhouwww +from ssl import ALERT_DESCRIPTION_CERTIFICATE_UNOBTAINABLE +import taos +import sys +import time +import os + +from util.log import * +from util.sql import * +from util.cases import * +from util.dnodes import TDDnodes +from util.dnodes import TDDnode +from util.cluster import * + +import time +import socket +import subprocess +sys.path.append(os.path.dirname(__file__)) + +class TDTestCase: + def init(self,conn ,logSql): + tdLog.debug(f"start to excute {__file__}") + tdSql.init(conn.cursor()) + self.host = socket.gethostname() + self.mnode_list = {} + self.dnode_list = {} + + def getBuildPath(self): + selfPath = os.path.dirname(os.path.realpath(__file__)) + if ("community" in selfPath): + projPath = selfPath[:selfPath.find("community")] + else: + projPath = selfPath[:selfPath.find("tests")] + + for root, dirs, files in os.walk(projPath): + if ("taosd" in files): + rootRealPath = os.path.dirname(os.path.realpath(root)) + if ("packaging" not in rootRealPath): + buildPath = root[:len(root) - len("/build/bin")] + break + return buildPath + + def check_setup_cluster_status(self): + tdSql.query("show mnodes") + for mnode in tdSql.queryResult: + name = mnode[1] + info = mnode + self.mnode_list[name] = info + + tdSql.query("show dnodes") + for dnode in tdSql.queryResult: + name = dnode[1] + info = dnode + self.dnode_list[name] = info + + count = 0 + is_leader = False + mnode_name = '' + for k,v in self.mnode_list.items(): + count +=1 + # only for 1 mnode + mnode_name = k + + if v[2] =='leader': + is_leader=True + + if count==1 and is_leader: + tdLog.info("===== depoly cluster success with 1 mnode as leader =====") + else: + tdLog.exit("===== depoly cluster fail with 1 mnode as leader =====") + + for k ,v in self.dnode_list.items(): + if k == mnode_name: + if v[3]==0: + tdLog.info("===== depoly cluster mnode only success at {} , support_vnodes is {} ".format(mnode_name,v[3])) + else: + tdLog.exit("===== depoly cluster mnode only fail at {} , support_vnodes is {} ".format(mnode_name,v[3])) + else: + continue + + def create_db_check_vgroups(self): + + tdSql.execute("drop database if exists test") + tdSql.execute("create database if not exists test replica 1 duration 300") + tdSql.execute("use test") + tdSql.execute( + '''create table stb1 + (ts timestamp, c1 int, c2 bigint, c3 smallint, c4 tinyint, c5 float, c6 double, c7 bool, c8 binary(16),c9 nchar(32), c10 timestamp) + tags (t1 int) + ''' + ) + tdSql.execute( + ''' + create table t1 + (ts timestamp, c1 int, c2 bigint, c3 smallint, c4 tinyint, c5 float, c6 double, c7 bool, c8 binary(16),c9 nchar(32), c10 timestamp) + ''' + ) + + for i in range(5): + tdSql.execute("create table sub_tb_{} using stb1 tags({})".format(i,i)) + tdSql.query("show stables") + tdSql.checkRows(1) + tdSql.query("show tables") + tdSql.checkRows(6) + + tdSql.query("show test.vgroups;") + vgroups_infos = {} # key is id: value is info list + for vgroup_info in tdSql.queryResult: + vgroup_id = vgroup_info[0] + tmp_list = [] + for role in vgroup_info[3:-3]: + if role in ['leader','follower']: + tmp_list.append(role) + vgroups_infos[vgroup_id]=tmp_list + + for k , v in vgroups_infos.items(): + if len(v) ==1 and v[0]=="leader": + tdLog.info(" === create database replica only 1 role leader check success of vgroup_id {} ======".format(k)) + else: + tdLog.exit(" === create database replica only 1 role leader check fail of vgroup_id {} ======".format(k)) + + def getConnection(self, dnode): + host = dnode.cfgDict["fqdn"] + port = dnode.cfgDict["serverPort"] + config_dir = dnode.cfgDir + return taos.connect(host=host, port=int(port), config=config_dir) + + + def run(self): + self.check_setup_cluster_status() + self.create_db_check_vgroups() + + def stop(self): + tdSql.close() + tdLog.success(f"{__file__} successfully executed") + +tdCases.addLinux(__file__, TDTestCase()) +tdCases.addWindows(__file__, TDTestCase()) \ No newline at end of file diff --git a/tests/system-test/6-cluster/vnode/4dnode1mnode_basic_replica1_insertdatas.py b/tests/system-test/6-cluster/vnode/4dnode1mnode_basic_replica1_insertdatas.py new file mode 100644 index 0000000000..e113e2a6e5 --- /dev/null +++ b/tests/system-test/6-cluster/vnode/4dnode1mnode_basic_replica1_insertdatas.py @@ -0,0 +1,179 @@ +# author : wenzhouwww +from ssl import ALERT_DESCRIPTION_CERTIFICATE_UNOBTAINABLE +import taos +import sys +import time +import os + +from util.log import * +from util.sql import * +from util.cases import * +from util.dnodes import TDDnodes +from util.dnodes import TDDnode +from util.cluster import * + +import time +import socket +import subprocess +sys.path.append(os.path.dirname(__file__)) + +class TDTestCase: + def init(self,conn ,logSql): + tdLog.debug(f"start to excute {__file__}") + tdSql.init(conn.cursor()) + self.host = socket.gethostname() + self.mnode_list = {} + self.dnode_list = {} + self.ts = 1483200000000 + self.db_name ='testdb' + self.replica = 1 + self.vgroups = 2 + self.tb_nums = 10 + self.row_nums = 100 + + def getBuildPath(self): + selfPath = os.path.dirname(os.path.realpath(__file__)) + if ("community" in selfPath): + projPath = selfPath[:selfPath.find("community")] + else: + projPath = selfPath[:selfPath.find("tests")] + + for root, dirs, files in os.walk(projPath): + if ("taosd" in files): + rootRealPath = os.path.dirname(os.path.realpath(root)) + if ("packaging" not in rootRealPath): + buildPath = root[:len(root) - len("/build/bin")] + break + return buildPath + + def check_setup_cluster_status(self): + tdSql.query("show mnodes") + for mnode in tdSql.queryResult: + name = mnode[1] + info = mnode + self.mnode_list[name] = info + + tdSql.query("show dnodes") + for dnode in tdSql.queryResult: + name = dnode[1] + info = dnode + self.dnode_list[name] = info + + count = 0 + is_leader = False + mnode_name = '' + for k,v in self.mnode_list.items(): + count +=1 + # only for 1 mnode + mnode_name = k + + if v[2] =='leader': + is_leader=True + + if count==1 and is_leader: + tdLog.info("===== depoly cluster success with 1 mnode as leader =====") + else: + tdLog.exit("===== depoly cluster fail with 1 mnode as leader =====") + + for k ,v in self.dnode_list.items(): + if k == mnode_name: + if v[3]==0: + tdLog.info("===== depoly cluster mnode only success at {} , support_vnodes is {} ".format(mnode_name,v[3])) + else: + tdLog.exit("===== depoly cluster mnode only fail at {} , support_vnodes is {} ".format(mnode_name,v[3])) + else: + continue + + def create_db_check_vgroups(self): + + tdSql.execute("drop database if exists test") + tdSql.execute("create database if not exists test replica 1 duration 300") + tdSql.execute("use test") + tdSql.execute( + '''create table stb1 + (ts timestamp, c1 int, c2 bigint, c3 smallint, c4 tinyint, c5 float, c6 double, c7 bool, c8 binary(16),c9 nchar(32), c10 timestamp) + tags (t1 int) + ''' + ) + tdSql.execute( + ''' + create table t1 + (ts timestamp, c1 int, c2 bigint, c3 smallint, c4 tinyint, c5 float, c6 double, c7 bool, c8 binary(16),c9 nchar(32), c10 timestamp) + ''' + ) + + for i in range(5): + tdSql.execute("create table sub_tb_{} using stb1 tags({})".format(i,i)) + tdSql.query("show stables") + tdSql.checkRows(1) + tdSql.query("show tables") + tdSql.checkRows(6) + + tdSql.query("show test.vgroups;") + vgroups_infos = {} # key is id: value is info list + for vgroup_info in tdSql.queryResult: + vgroup_id = vgroup_info[0] + tmp_list = [] + for role in vgroup_info[3:-3]: + if role in ['leader','follower']: + tmp_list.append(role) + vgroups_infos[vgroup_id]=tmp_list + + for k , v in vgroups_infos.items(): + if len(v) ==1 and v[0]=="leader": + tdLog.info(" === create database replica only 1 role leader check success of vgroup_id {} ======".format(k)) + else: + tdLog.exit(" === create database replica only 1 role leader check fail of vgroup_id {} ======".format(k)) + + def create_db_replica_1_insertdatas(self, dbname, replica_num ,vgroup_nums ,tb_nums , row_nums ): + drop_db_sql = "drop database if exists {}".format(dbname) + create_db_sql = "create database {} replica {} vgroups {}".format(dbname,replica_num,vgroup_nums) + + tdLog.info(" ==== create database {} and insert rows begin =====".format(dbname)) + tdSql.execute(drop_db_sql) + tdSql.execute(create_db_sql) + tdSql.execute("use {}".format(dbname)) + tdSql.execute( + '''create table stb1 + (ts timestamp, c1 int, c2 bigint, c3 smallint, c4 tinyint, c5 float, c6 double, c7 bool, c8 binary(32),c9 nchar(32), c10 timestamp) + tags (t1 int) + ''' + ) + tdSql.execute( + ''' + create table t1 + (ts timestamp, c1 int, c2 bigint, c3 smallint, c4 tinyint, c5 float, c6 double, c7 bool, c8 binary(32),c9 nchar(32), c10 timestamp) + ''' + ) + + for i in range(tb_nums): + sub_tbname = "sub_tb_{}".format(i) + tdSql.execute("create table {} using stb1 tags({})".format(sub_tbname,i)) + # insert datas about new database + + for row_num in range(row_nums): + ts = self.ts + 1000*row_num + tdSql.execute(f"insert into {sub_tbname} values ({ts}, {row_num} ,{row_num}, 10 ,1 ,{row_num} ,{row_num},true,'bin_{row_num}','nchar_{row_num}',now) ") + + tdLog.info(" ==== create database {} and insert rows execute end =====".format(dbname)) + + def check_insert_status(self, dbname, tb_nums , row_nums): + tdSql.execute("use {}".format(dbname)) + tdSql.query("select count(*) from {}.{}".format(dbname,'stb1')) + tdSql.checkData(0 , 0 , tb_nums*row_nums) + tdSql.query("select distinct tbname from {}.{}".format(dbname,'stb1')) + tdSql.checkRows(tb_nums) + + def run(self): + self.check_setup_cluster_status() + self.create_db_check_vgroups() + self.create_db_replica_1_insertdatas(self.db_name , self.replica , self.vgroups , self.tb_nums , self.row_nums) + self.check_insert_status(self.db_name , self.tb_nums , self.row_nums) + + + def stop(self): + tdSql.close() + tdLog.success(f"{__file__} successfully executed") + +tdCases.addLinux(__file__, TDTestCase()) +tdCases.addWindows(__file__, TDTestCase()) \ No newline at end of file From 14dfe117910668ed344a3dd42dbc2a00852bb02b Mon Sep 17 00:00:00 2001 From: "wenzhouwww@live.cn" Date: Wed, 20 Jul 2022 20:00:54 +0800 Subject: [PATCH 2/9] add case about vote leader about replica 3 --- .../4dnode1mnode_basic_createDb_replica1.py | 2 +- ...4dnode1mnode_basic_replica1_insertdatas.py | 2 +- .../4dnode1mnode_basic_replica3_vgroups.py | 206 ++++++++++++++++++ 3 files changed, 208 insertions(+), 2 deletions(-) create mode 100644 tests/system-test/6-cluster/vnode/4dnode1mnode_basic_replica3_vgroups.py diff --git a/tests/system-test/6-cluster/vnode/4dnode1mnode_basic_createDb_replica1.py b/tests/system-test/6-cluster/vnode/4dnode1mnode_basic_createDb_replica1.py index 28d5b47b4f..e6192ba313 100644 --- a/tests/system-test/6-cluster/vnode/4dnode1mnode_basic_createDb_replica1.py +++ b/tests/system-test/6-cluster/vnode/4dnode1mnode_basic_createDb_replica1.py @@ -108,7 +108,7 @@ class TDTestCase: for vgroup_info in tdSql.queryResult: vgroup_id = vgroup_info[0] tmp_list = [] - for role in vgroup_info[3:-3]: + for role in vgroup_info[3:-4]: if role in ['leader','follower']: tmp_list.append(role) vgroups_infos[vgroup_id]=tmp_list diff --git a/tests/system-test/6-cluster/vnode/4dnode1mnode_basic_replica1_insertdatas.py b/tests/system-test/6-cluster/vnode/4dnode1mnode_basic_replica1_insertdatas.py index e113e2a6e5..d5fef08945 100644 --- a/tests/system-test/6-cluster/vnode/4dnode1mnode_basic_replica1_insertdatas.py +++ b/tests/system-test/6-cluster/vnode/4dnode1mnode_basic_replica1_insertdatas.py @@ -114,7 +114,7 @@ class TDTestCase: for vgroup_info in tdSql.queryResult: vgroup_id = vgroup_info[0] tmp_list = [] - for role in vgroup_info[3:-3]: + for role in vgroup_info[3:-4]: if role in ['leader','follower']: tmp_list.append(role) vgroups_infos[vgroup_id]=tmp_list diff --git a/tests/system-test/6-cluster/vnode/4dnode1mnode_basic_replica3_vgroups.py b/tests/system-test/6-cluster/vnode/4dnode1mnode_basic_replica3_vgroups.py new file mode 100644 index 0000000000..5529a5e256 --- /dev/null +++ b/tests/system-test/6-cluster/vnode/4dnode1mnode_basic_replica3_vgroups.py @@ -0,0 +1,206 @@ +# author : wenzhouwww +from ssl import ALERT_DESCRIPTION_CERTIFICATE_UNOBTAINABLE +import taos +import sys +import time +import os + +from util.log import * +from util.sql import * +from util.cases import * +from util.dnodes import TDDnodes +from util.dnodes import TDDnode +from util.cluster import * + +import time +import socket +import subprocess + +class TDTestCase: + def init(self,conn ,logSql): + tdLog.debug(f"start to excute {__file__}") + tdSql.init(conn.cursor()) + self.host = socket.gethostname() + self.mnode_list = {} + self.dnode_list = {} + self.ts = 1483200000000 + self.db_name ='testdb' + self.replica = 1 + self.vgroups = 2 + self.tb_nums = 10 + self.row_nums = 100 + self.max_vote_time_cost = 10 # seconds + + def getBuildPath(self): + selfPath = os.path.dirname(os.path.realpath(__file__)) + if ("community" in selfPath): + projPath = selfPath[:selfPath.find("community")] + else: + projPath = selfPath[:selfPath.find("tests")] + + for root, dirs, files in os.walk(projPath): + if ("taosd" in files): + rootRealPath = os.path.dirname(os.path.realpath(root)) + if ("packaging" not in rootRealPath): + buildPath = root[:len(root) - len("/build/bin")] + break + return buildPath + + def check_setup_cluster_status(self): + tdSql.query("show mnodes") + for mnode in tdSql.queryResult: + name = mnode[1] + info = mnode + self.mnode_list[name] = info + + tdSql.query("show dnodes") + for dnode in tdSql.queryResult: + name = dnode[1] + info = dnode + self.dnode_list[name] = info + + count = 0 + is_leader = False + mnode_name = '' + for k,v in self.mnode_list.items(): + count +=1 + # only for 1 mnode + mnode_name = k + + if v[2] =='leader': + is_leader=True + + if count==1 and is_leader: + tdLog.info("===== depoly cluster success with 1 mnode as leader =====") + else: + tdLog.exit("===== depoly cluster fail with 1 mnode as leader =====") + + for k ,v in self.dnode_list.items(): + if k == mnode_name: + if v[3]==0: + tdLog.info("===== depoly cluster mnode only success at {} , support_vnodes is {} ".format(mnode_name,v[3])) + else: + tdLog.exit("===== depoly cluster mnode only fail at {} , support_vnodes is {} ".format(mnode_name,v[3])) + else: + continue + + def create_db_check_vgroups(self): + + tdSql.execute("drop database if exists test") + tdSql.execute("create database if not exists test replica 1 duration 300") + tdSql.execute("use test") + tdSql.execute( + '''create table stb1 + (ts timestamp, c1 int, c2 bigint, c3 smallint, c4 tinyint, c5 float, c6 double, c7 bool, c8 binary(16),c9 nchar(32), c10 timestamp) + tags (t1 int) + ''' + ) + tdSql.execute( + ''' + create table t1 + (ts timestamp, c1 int, c2 bigint, c3 smallint, c4 tinyint, c5 float, c6 double, c7 bool, c8 binary(16),c9 nchar(32), c10 timestamp) + ''' + ) + + for i in range(5): + tdSql.execute("create table sub_tb_{} using stb1 tags({})".format(i,i)) + tdSql.query("show stables") + tdSql.checkRows(1) + tdSql.query("show tables") + tdSql.checkRows(6) + + tdSql.query("show test.vgroups;") + vgroups_infos = {} # key is id: value is info list + for vgroup_info in tdSql.queryResult: + vgroup_id = vgroup_info[0] + tmp_list = [] + for role in vgroup_info[3:-4]: + if role in ['leader','follower']: + tmp_list.append(role) + vgroups_infos[vgroup_id]=tmp_list + + for k , v in vgroups_infos.items(): + if len(v) ==1 and v[0]=="leader": + tdLog.info(" === create database replica only 1 role leader check success of vgroup_id {} ======".format(k)) + else: + tdLog.exit(" === create database replica only 1 role leader check fail of vgroup_id {} ======".format(k)) + + def check_vgroups_init_done(self,dbname): + + status = True + + tdSql.query("show {}.vgroups".format(dbname)) + for vgroup_info in tdSql.queryResult: + vgroup_id = vgroup_info[0] + vgroup_status = [] + for ind , role in enumerate(vgroup_info[3:-4]): + + if ind%2==0: + continue + else: + vgroup_status.append(role) + if vgroup_status.count("leader")!=1 or vgroup_status.count("follower")!=2: + status = False + return status + return status + + + def vote_leader_time_costs(self,dbname): + start = time.time() + status = self.check_vgroups_init_done(dbname) + while not status: + time.sleep(0.1) + status = self.check_vgroups_init_done(dbname) + + # tdLog.info("=== database {} show vgroups vote the leader is in progress ===".format(dbname)) + end = time.time() + cost_time = end - start + tdLog.info(" ==== database %s vote the leaders success , cost time is %.3f second ====="%(dbname,cost_time) ) + # os.system("taos -s 'show {}.vgroups;'".format(dbname)) + if cost_time >= self.max_vote_time_cost: + tdLog.exit(" ==== database %s vote the leaders cost too large time , cost time is %.3f second ===="%(dbname,cost_time) ) + + + return cost_time + + def test_init_vgroups_time_costs(self): + + tdLog.info(" ====start check time cost about vgroups vote leaders ==== ") + tdLog.info(" ==== current max time cost is set value : {} =======".format(self.max_vote_time_cost)) + + # create database replica 3 vgroups 1 + + db1 = 'db_1' + create_db_replica_3_vgroups_1 = "create database {} replica 3 vgroups 1".format(db1) + tdLog.info('=======database {} replica 3 vgroups 1 ======'.format(db1)) + tdSql.execute(create_db_replica_3_vgroups_1) + self.vote_leader_time_costs(db1) + + # create database replica 3 vgroups 10 + db2 = 'db_2' + create_db_replica_3_vgroups_10 = "create database {} replica 3 vgroups 10".format(db2) + tdLog.info('=======database {} replica 3 vgroups 10 ======'.format(db2)) + tdSql.execute(create_db_replica_3_vgroups_10) + self.vote_leader_time_costs(db2) + + # create database replica 3 vgroups 100 + db3 = 'db_3' + create_db_replica_3_vgroups_100 = "create database {} replica 3 vgroups 100".format(db3) + tdLog.info('=======database {} replica 3 vgroups 100 ======'.format(db3)) + tdSql.execute(create_db_replica_3_vgroups_100) + self.vote_leader_time_costs(db3) + + + + def run(self): + self.check_setup_cluster_status() + self.test_init_vgroups_time_costs() + + + + def stop(self): + tdSql.close() + tdLog.success(f"{__file__} successfully executed") + +tdCases.addLinux(__file__, TDTestCase()) +tdCases.addWindows(__file__, TDTestCase()) \ No newline at end of file From ff06586b2c2c6406519c0049bd1c13408f1cfd60 Mon Sep 17 00:00:00 2001 From: "wenzhouwww@live.cn" Date: Thu, 21 Jul 2022 16:04:13 +0800 Subject: [PATCH 3/9] revote leader when stop one dnode --- ...de1mnode_basic_replica3_vgroups_stopOne.py | 364 ++++++++++++++++++ 1 file changed, 364 insertions(+) create mode 100644 tests/system-test/6-cluster/vnode/4dnode1mnode_basic_replica3_vgroups_stopOne.py diff --git a/tests/system-test/6-cluster/vnode/4dnode1mnode_basic_replica3_vgroups_stopOne.py b/tests/system-test/6-cluster/vnode/4dnode1mnode_basic_replica3_vgroups_stopOne.py new file mode 100644 index 0000000000..3244b6bd7b --- /dev/null +++ b/tests/system-test/6-cluster/vnode/4dnode1mnode_basic_replica3_vgroups_stopOne.py @@ -0,0 +1,364 @@ +# author : wenzhouwww +from errno import ESOCKTNOSUPPORT +from ssl import ALERT_DESCRIPTION_CERTIFICATE_UNOBTAINABLE +import taos +import sys +import time +import os + +from util.log import * +from util.sql import * +from util.cases import * +from util.dnodes import TDDnodes +from util.dnodes import TDDnode +from util.cluster import * + +import time +import random +import socket +import subprocess + +class TDTestCase: + def init(self,conn ,logSql): + tdLog.debug(f"start to excute {__file__}") + tdSql.init(conn.cursor()) + self.host = socket.gethostname() + self.mnode_list = {} + self.dnode_list = {} + self.ts = 1483200000000 + self.db_name ='testdb' + self.replica = 1 + self.vgroups = 2 + self.tb_nums = 10 + self.row_nums = 100 + self.max_vote_time_cost = 10 # seconds + self.stop_dnode = None + + def getBuildPath(self): + selfPath = os.path.dirname(os.path.realpath(__file__)) + if ("community" in selfPath): + projPath = selfPath[:selfPath.find("community")] + else: + projPath = selfPath[:selfPath.find("tests")] + + for root, dirs, files in os.walk(projPath): + if ("taosd" in files): + rootRealPath = os.path.dirname(os.path.realpath(root)) + if ("packaging" not in rootRealPath): + buildPath = root[:len(root) - len("/build/bin")] + break + return buildPath + + def check_setup_cluster_status(self): + tdSql.query("show mnodes") + for mnode in tdSql.queryResult: + name = mnode[1] + info = mnode + self.mnode_list[name] = info + + tdSql.query("show dnodes") + for dnode in tdSql.queryResult: + name = dnode[1] + info = dnode + self.dnode_list[name] = info + + count = 0 + is_leader = False + mnode_name = '' + for k,v in self.mnode_list.items(): + count +=1 + # only for 1 mnode + mnode_name = k + + if v[2] =='leader': + is_leader=True + + if count==1 and is_leader: + tdLog.info("===== depoly cluster success with 1 mnode as leader =====") + else: + tdLog.exit("===== depoly cluster fail with 1 mnode as leader =====") + + for k ,v in self.dnode_list.items(): + if k == mnode_name: + if v[3]==0: + tdLog.info("===== depoly cluster mnode only success at {} , support_vnodes is {} ".format(mnode_name,v[3])) + else: + tdLog.exit("===== depoly cluster mnode only fail at {} , support_vnodes is {} ".format(mnode_name,v[3])) + else: + continue + + def create_db_check_vgroups(self): + + tdSql.execute("drop database if exists test") + tdSql.execute("create database if not exists test replica 1 duration 300") + tdSql.execute("use test") + tdSql.execute( + '''create table stb1 + (ts timestamp, c1 int, c2 bigint, c3 smallint, c4 tinyint, c5 float, c6 double, c7 bool, c8 binary(16),c9 nchar(32), c10 timestamp) + tags (t1 int) + ''' + ) + tdSql.execute( + ''' + create table t1 + (ts timestamp, c1 int, c2 bigint, c3 smallint, c4 tinyint, c5 float, c6 double, c7 bool, c8 binary(16),c9 nchar(32), c10 timestamp) + ''' + ) + + for i in range(5): + tdSql.execute("create table sub_tb_{} using stb1 tags({})".format(i,i)) + tdSql.query("show stables") + tdSql.checkRows(1) + tdSql.query("show tables") + tdSql.checkRows(6) + + tdSql.query("show test.vgroups;") + vgroups_infos = {} # key is id: value is info list + for vgroup_info in tdSql.queryResult: + vgroup_id = vgroup_info[0] + tmp_list = [] + for role in vgroup_info[3:-4]: + if role in ['leader','follower']: + tmp_list.append(role) + vgroups_infos[vgroup_id]=tmp_list + + for k , v in vgroups_infos.items(): + if len(v) ==1 and v[0]=="leader": + tdLog.info(" === create database replica only 1 role leader check success of vgroup_id {} ======".format(k)) + else: + tdLog.exit(" === create database replica only 1 role leader check fail of vgroup_id {} ======".format(k)) + + def _get_stop_dnode(self): + only_dnode_list = self.dnode_list.keys() - self.mnode_list.keys() + self.stop_dnode = random.sample(only_dnode_list , 1 )[0] + return self.stop_dnode + + + def check_vgroups_revote_leader(self,dbname): + + status = True + stop_dnode_id = self.dnode_list[self.stop_dnode][0] + + tdSql.query("show {}.vgroups".format(dbname)) + for vgroup_info in tdSql.queryResult: + vgroup_id = vgroup_info[0] + vgroup_status = [] + vgroups_leader_follower = vgroup_info[3:-4] + for ind , role in enumerate(vgroups_leader_follower): + + if ind%2==0: + if role == stop_dnode_id and vgroups_leader_follower[ind+1]=="offline": + tdLog.info("====== dnode {} has offline , endpoint is {}".format(stop_dnode_id , self.stop_dnode)) + elif role == stop_dnode_id : + tdLog.exit("====== dnode {} has not offline , endpoint is {}".format(stop_dnode_id , self.stop_dnode)) + else: + continue + else: + vgroup_status.append(role) + if vgroup_status.count("leader")!=1 or vgroup_status.count("follower")!=1 or vgroup_status.count("offline")!=1: + status = False + return status + return status + + + def wait_stop_dnode_OK(self): + + def _get_status(): + + status = "" + tdSql.query("show dnodes") + dnode_infos = tdSql.queryResult + for dnode_info in dnode_infos: + endpoint = dnode_info[1] + dnode_status = dnode_info[4] + if endpoint == self.stop_dnode: + status = dnode_status + break + return status + + status = _get_status() + while status !="offline": + time.sleep(0.1) + status = _get_status() + # tdLog.info("==== stop dnode has not been stopped , endpoint is {}".format(self.stop_dnode)) + tdLog.info("==== stop_dnode has stopped , endpoint is {}".format(self.stop_dnode)) + + def wait_start_dnode_OK(self): + + def _get_status(): + + status = "" + tdSql.query("show dnodes") + dnode_infos = tdSql.queryResult + for dnode_info in dnode_infos: + endpoint = dnode_info[1] + dnode_status = dnode_info[4] + if endpoint == self.stop_dnode: + status = dnode_status + break + return status + + status = _get_status() + while status !="ready": + time.sleep(0.1) + status = _get_status() + # tdLog.info("==== stop dnode has not been stopped , endpoint is {}".format(self.stop_dnode)) + tdLog.info("==== stop_dnode has restart , endpoint is {}".format(self.stop_dnode)) + + + + def random_stop_One_dnode(self): + self.stop_dnode = self._get_stop_dnode() + stop_dnode_id = self.dnode_list[self.stop_dnode][0] + tdLog.info(" ==== dnode {} will offline ,endpoints is {} ====".format(stop_dnode_id , self.stop_dnode)) + tdDnodes=cluster.dnodes + tdDnodes[stop_dnode_id-1].stoptaosd() + self.wait_stop_dnode_OK() + # os.system("taos -s 'show dnodes;'") + + def Restart_stop_dnode(self): + + tdDnodes=cluster.dnodes + stop_dnode_id = self.dnode_list[self.stop_dnode][0] + tdDnodes[stop_dnode_id-1].starttaosd() + self.wait_start_dnode_OK() + # os.system("taos -s 'show dnodes;'") + + def check_vgroups_init_done(self,dbname): + + status = True + + tdSql.query("show {}.vgroups".format(dbname)) + for vgroup_info in tdSql.queryResult: + vgroup_id = vgroup_info[0] + vgroup_status = [] + for ind , role in enumerate(vgroup_info[3:-4]): + + if ind%2==0: + continue + else: + vgroup_status.append(role) + if vgroup_status.count("leader")!=1 or vgroup_status.count("follower")!=2: + status = False + return status + return status + + def vote_leader_time_costs(self,dbname): + start = time.time() + status = self.check_vgroups_init_done(dbname) + while not status: + time.sleep(0.1) + status = self.check_vgroups_init_done(dbname) + + # tdLog.info("=== database {} show vgroups vote the leader is in progress ===".format(dbname)) + end = time.time() + cost_time = end - start + tdLog.info(" ==== database %s vote the leaders success , cost time is %.3f second ====="%(dbname,cost_time) ) + # os.system("taos -s 'show {}.vgroups;'".format(dbname)) + if cost_time >= self.max_vote_time_cost: + tdLog.exit(" ==== database %s vote the leaders cost too large time , cost time is %.3f second ===="%(dbname,cost_time) ) + + return cost_time + + + def revote_leader_time_costs(self,dbname): + start = time.time() + + status = self.check_vgroups_revote_leader(dbname) + while not status: + time.sleep(0.1) + status = self.check_vgroups_revote_leader(dbname) + + # tdLog.info("=== database {} show vgroups vote the leader is in progress ===".format(dbname)) + end = time.time() + cost_time = end - start + tdLog.info(" ==== database %s revote the leaders success , cost time is %.3f second ====="%(dbname,cost_time) ) + # os.system("taos -s 'show {}.vgroups;'".format(dbname)) + if cost_time >= self.max_vote_time_cost: + tdLog.exit(" ==== database %s revote the leaders cost too large time , cost time is %.3f second ===="%(dbname,cost_time) ) + + + return cost_time + + def exec_revote_action(self,dbname): + + tdSql.query("show {}.vgroups".format(dbname)) + before_revote = tdSql.queryResult + + before_vgroups = set() + for vgroup_info in before_revote: + before_vgroups.add(vgroup_info[3:-4]) + + self.random_stop_One_dnode() + tdSql.query("show {}.vgroups".format(dbname)) + after_revote = tdSql.queryResult + + after_vgroups = set() + for vgroup_info in after_revote: + after_vgroups.add(vgroup_info[3:-4]) + + vote_act = set(set(after_vgroups)-set(before_vgroups)) + if not vote_act: + tdLog.exit(" ===maybe revote not occured , there is no dnode offline ====") + else: + for vgroup_info in vote_act: + for ind , role in enumerate(vgroup_info): + if role==self.dnode_list[self.stop_dnode][0]: + + if vgroup_info[ind+1] =="offline" and "leader" in vgroup_info: + tdLog.info(" === revote leader ok , leader is {} now ====".format(list(vgroup_info).index("leader")-1)) + elif vgroup_info[ind+1] !="offline": + tdLog.exit(" === dnode {} should be offline ".format(self.stop_dnode)) + else: + continue + break + + + + self.revote_leader_time_costs(dbname) + self.Restart_stop_dnode() + def test_init_vgroups_time_costs(self): + + tdLog.info(" ====start check time cost about vgroups vote leaders ==== ") + tdLog.info(" ==== current max time cost is set value : {} =======".format(self.max_vote_time_cost)) + + # create database replica 3 vgroups 1 + + db1 = 'db_1' + create_db_replica_3_vgroups_1 = "create database {} replica 3 vgroups 1".format(db1) + tdLog.info('=======database {} replica 3 vgroups 1 ======'.format(db1)) + tdSql.execute(create_db_replica_3_vgroups_1) + self.vote_leader_time_costs(db1) + self.exec_revote_action(db1) + + # create database replica 3 vgroups 10 + db2 = 'db_2' + create_db_replica_3_vgroups_10 = "create database {} replica 3 vgroups 10".format(db2) + tdLog.info('=======database {} replica 3 vgroups 10 ======'.format(db2)) + tdSql.execute(create_db_replica_3_vgroups_10) + self.vote_leader_time_costs(db2) + self.exec_revote_action(db2) + + # # create database replica 3 vgroups 100 + # db3 = 'db_3' + # create_db_replica_3_vgroups_100 = "create database {} replica 3 vgroups 100".format(db3) + # tdLog.info('=======database {} replica 3 vgroups 100 ======'.format(db3)) + # tdSql.execute(create_db_replica_3_vgroups_100) + # self.vote_leader_time_costs(db3) + # self.exec_revote_action(db3) + + + + def run(self): + self.check_setup_cluster_status() + self.test_init_vgroups_time_costs() + + + + + def stop(self): + tdSql.close() + tdLog.success(f"{__file__} successfully executed") + +tdCases.addLinux(__file__, TDTestCase()) +tdCases.addWindows(__file__, TDTestCase()) \ No newline at end of file From 2fa173c0471fc6aaab22516678b6120d80aad9eb Mon Sep 17 00:00:00 2001 From: "wenzhouwww@live.cn" Date: Thu, 21 Jul 2022 16:06:07 +0800 Subject: [PATCH 4/9] revote leader when stop one dnode --- .../4dnode1mnode_basic_replica3_vgroups_stopOne.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tests/system-test/6-cluster/vnode/4dnode1mnode_basic_replica3_vgroups_stopOne.py b/tests/system-test/6-cluster/vnode/4dnode1mnode_basic_replica3_vgroups_stopOne.py index 3244b6bd7b..3be36c067e 100644 --- a/tests/system-test/6-cluster/vnode/4dnode1mnode_basic_replica3_vgroups_stopOne.py +++ b/tests/system-test/6-cluster/vnode/4dnode1mnode_basic_replica3_vgroups_stopOne.py @@ -339,13 +339,13 @@ class TDTestCase: self.vote_leader_time_costs(db2) self.exec_revote_action(db2) - # # create database replica 3 vgroups 100 - # db3 = 'db_3' - # create_db_replica_3_vgroups_100 = "create database {} replica 3 vgroups 100".format(db3) - # tdLog.info('=======database {} replica 3 vgroups 100 ======'.format(db3)) - # tdSql.execute(create_db_replica_3_vgroups_100) - # self.vote_leader_time_costs(db3) - # self.exec_revote_action(db3) + # create database replica 3 vgroups 100 + db3 = 'db_3' + create_db_replica_3_vgroups_100 = "create database {} replica 3 vgroups 100".format(db3) + tdLog.info('=======database {} replica 3 vgroups 100 ======'.format(db3)) + tdSql.execute(create_db_replica_3_vgroups_100) + self.vote_leader_time_costs(db3) + self.exec_revote_action(db3) From ce900e4835cf63e36f314080f3981222943270b1 Mon Sep 17 00:00:00 2001 From: "wenzhouwww@live.cn" Date: Thu, 21 Jul 2022 19:58:28 +0800 Subject: [PATCH 5/9] add case of stop follow of vnode --- tests/pytest/util/common.py | 4 +- ...asic_replica3_insertdatas_stop_follower.py | 368 ++++++++++++++++++ 2 files changed, 370 insertions(+), 2 deletions(-) create mode 100644 tests/system-test/6-cluster/vnode/4dnode1mnode_basic_replica3_insertdatas_stop_follower.py diff --git a/tests/pytest/util/common.py b/tests/pytest/util/common.py index 94043ed01a..e7655881b8 100644 --- a/tests/pytest/util/common.py +++ b/tests/pytest/util/common.py @@ -382,14 +382,14 @@ class TDCom: def newcon(self,host='localhost',port=6030,user='root',password='taosdata'): con=taos.connect(host=host, user=user, password=password, port=port) - print(con) + # print(con) return con def newcur(self,host='localhost',port=6030,user='root',password='taosdata'): cfgPath = self.getClientCfgPath() con=taos.connect(host=host, user=user, password=password, config=cfgPath, port=port) cur=con.cursor() - print(cur) + # print(cur) return cur def newTdSql(self, host='localhost',port=6030,user='root',password='taosdata'): diff --git a/tests/system-test/6-cluster/vnode/4dnode1mnode_basic_replica3_insertdatas_stop_follower.py b/tests/system-test/6-cluster/vnode/4dnode1mnode_basic_replica3_insertdatas_stop_follower.py new file mode 100644 index 0000000000..63d560587d --- /dev/null +++ b/tests/system-test/6-cluster/vnode/4dnode1mnode_basic_replica3_insertdatas_stop_follower.py @@ -0,0 +1,368 @@ +# author : wenzhouwww +from ssl import ALERT_DESCRIPTION_CERTIFICATE_UNOBTAINABLE +import taos +import sys +import time +import os + +from util.log import * +from util.sql import * +from util.cases import * +from util.dnodes import TDDnodes +from util.dnodes import TDDnode +from util.cluster import * + +import time +import socket +import subprocess +import threading +sys.path.append(os.path.dirname(__file__)) + +class TDTestCase: + def init(self,conn ,logSql): + tdLog.debug(f"start to excute {__file__}") + tdSql.init(conn.cursor()) + self.host = socket.gethostname() + self.mnode_list = {} + self.dnode_list = {} + self.ts = 1483200000000 + self.ts_step =1000 + self.db_name ='testdb' + self.replica = 3 + self.vgroups = 1 + self.tb_nums = 10 + self.row_nums = 100 + self.stop_dnode_id = None + self.loop_restart_times = 5 + self.current_thread = None + self.max_restart_time = 20 + + def getBuildPath(self): + selfPath = os.path.dirname(os.path.realpath(__file__)) + if ("community" in selfPath): + projPath = selfPath[:selfPath.find("community")] + else: + projPath = selfPath[:selfPath.find("tests")] + + for root, dirs, files in os.walk(projPath): + if ("taosd" in files): + rootRealPath = os.path.dirname(os.path.realpath(root)) + if ("packaging" not in rootRealPath): + buildPath = root[:len(root) - len("/build/bin")] + break + return buildPath + + def check_setup_cluster_status(self): + tdSql.query("show mnodes") + for mnode in tdSql.queryResult: + name = mnode[1] + info = mnode + self.mnode_list[name] = info + + tdSql.query("show dnodes") + for dnode in tdSql.queryResult: + name = dnode[1] + info = dnode + self.dnode_list[name] = info + + count = 0 + is_leader = False + mnode_name = '' + for k,v in self.mnode_list.items(): + count +=1 + # only for 1 mnode + mnode_name = k + + if v[2] =='leader': + is_leader=True + + if count==1 and is_leader: + tdLog.info("===== depoly cluster success with 1 mnode as leader =====") + else: + tdLog.exit("===== depoly cluster fail with 1 mnode as leader =====") + + for k ,v in self.dnode_list.items(): + if k == mnode_name: + if v[3]==0: + tdLog.info("===== depoly cluster mnode only success at {} , support_vnodes is {} ".format(mnode_name,v[3])) + else: + tdLog.exit("===== depoly cluster mnode only fail at {} , support_vnodes is {} ".format(mnode_name,v[3])) + else: + continue + + def create_db_check_vgroups(self): + + tdSql.execute("drop database if exists test") + tdSql.execute("create database if not exists test replica 1 duration 300") + tdSql.execute("use test") + tdSql.execute( + '''create table stb1 + (ts timestamp, c1 int, c2 bigint, c3 smallint, c4 tinyint, c5 float, c6 double, c7 bool, c8 binary(16),c9 nchar(32), c10 timestamp) + tags (t1 int) + ''' + ) + tdSql.execute( + ''' + create table t1 + (ts timestamp, c1 int, c2 bigint, c3 smallint, c4 tinyint, c5 float, c6 double, c7 bool, c8 binary(16),c9 nchar(32), c10 timestamp) + ''' + ) + + for i in range(5): + tdSql.execute("create table sub_tb_{} using stb1 tags({})".format(i,i)) + tdSql.query("show stables") + tdSql.checkRows(1) + tdSql.query("show tables") + tdSql.checkRows(6) + + tdSql.query("show test.vgroups;") + vgroups_infos = {} # key is id: value is info list + for vgroup_info in tdSql.queryResult: + vgroup_id = vgroup_info[0] + tmp_list = [] + for role in vgroup_info[3:-4]: + if role in ['leader','follower']: + tmp_list.append(role) + vgroups_infos[vgroup_id]=tmp_list + + for k , v in vgroups_infos.items(): + if len(v) ==1 and v[0]=="leader": + tdLog.info(" === create database replica only 1 role leader check success of vgroup_id {} ======".format(k)) + else: + tdLog.exit(" === create database replica only 1 role leader check fail of vgroup_id {} ======".format(k)) + + def create_database(self, dbname, replica_num ,vgroup_nums ): + drop_db_sql = "drop database if exists {}".format(dbname) + create_db_sql = "create database {} replica {} vgroups {}".format(dbname,replica_num,vgroup_nums) + + tdLog.info(" ==== create database {} and insert rows begin =====".format(dbname)) + tdSql.execute(drop_db_sql) + tdSql.execute(create_db_sql) + tdSql.execute("use {}".format(dbname)) + + def create_stable_insert_datas(self,dbname ,stablename , tb_nums , row_nums): + tdSql.execute("use {}".format(dbname)) + tdSql.execute( + '''create table {} + (ts timestamp, c1 int, c2 bigint, c3 smallint, c4 tinyint, c5 float, c6 double, c7 bool, c8 binary(32),c9 nchar(32), c10 timestamp) + tags (t1 int) + '''.format(stablename) + ) + + for i in range(tb_nums): + sub_tbname = "sub_{}_{}".format(stablename,i) + tdSql.execute("create table {} using {} tags({})".format(sub_tbname, stablename ,i)) + # insert datas about new database + + for row_num in range(row_nums): + ts = self.ts + self.ts_step*row_num + tdSql.execute(f"insert into {sub_tbname} values ({ts}, {row_num} ,{row_num}, 10 ,1 ,{row_num} ,{row_num},true,'bin_{row_num}','nchar_{row_num}',now) ") + + tdLog.info(" ==== stable {} insert rows execute end =====".format(stablename)) + + def append_rows_of_exists_tables(self,dbname ,stablename , tbname , append_nums ): + + tdSql.execute("use {}".format(dbname)) + + for row_num in range(append_nums): + tdSql.execute(f"insert into {tbname} values (now, {row_num} ,{row_num}, 10 ,1 ,{row_num} ,{row_num},true,'bin_{row_num}','nchar_{row_num}',now) ") + # print(f"insert into {tbname} values (now, {row_num} ,{row_num}, 10 ,1 ,{row_num} ,{row_num},true,'bin_{row_num}','nchar_{row_num}',now) ") + tdLog.info(" ==== append new rows of table {} belongs to stable {} execute end =====".format(tbname,stablename)) + os.system("taos -s 'select count(*) from {}.{}';".format(dbname,stablename)) + + def check_insert_rows(self, dbname, stablename , tb_nums , row_nums, append_rows): + + tdSql.execute("use {}".format(dbname)) + tdSql.query("select count(*) from {}.{}".format(dbname,stablename)) + tdSql.checkData(0 , 0 , tb_nums*row_nums+append_rows) + tdSql.query("select distinct tbname from {}.{}".format(dbname,stablename)) + tdSql.checkRows(tb_nums) + + def _get_stop_dnode_id(self,dbname): + tdSql.query("show {}.vgroups".format(dbname)) + vgroup_infos = tdSql.queryResult + for vgroup_info in vgroup_infos: + leader_infos = vgroup_info[3:-4] + # print(vgroup_info) + for ind ,role in enumerate(leader_infos): + if role =='follower': + # print(ind,leader_infos) + self.stop_dnode_id = leader_infos[ind-1] + break + + + return self.stop_dnode_id + + def wait_stop_dnode_OK(self): + + def _get_status(): + newTdSql=tdCom.newTdSql() + + status = "" + newTdSql.query("show dnodes") + dnode_infos = newTdSql.queryResult + for dnode_info in dnode_infos: + id = dnode_info[0] + dnode_status = dnode_info[4] + if id == self.stop_dnode_id: + status = dnode_status + break + return status + + status = _get_status() + while status !="offline": + time.sleep(0.1) + status = _get_status() + # tdLog.info("==== stop dnode has not been stopped , endpoint is {}".format(self.stop_dnode)) + tdLog.info("==== stop_dnode has stopped , id is {}".format(self.stop_dnode_id)) + + def wait_start_dnode_OK(self): + + def _get_status(): + newTdSql=tdCom.newTdSql() + status = "" + newTdSql.query("show dnodes") + dnode_infos = newTdSql.queryResult + for dnode_info in dnode_infos: + id = dnode_info[0] + dnode_status = dnode_info[4] + if id == self.stop_dnode_id: + status = dnode_status + break + return status + + status = _get_status() + while status !="ready": + time.sleep(0.1) + status = _get_status() + # tdLog.info("==== stop dnode has not been stopped , endpoint is {}".format(self.stop_dnode)) + tdLog.info("==== stop_dnode has restart , id is {}".format(self.stop_dnode_id)) + + def sync_run_case(self): + # stop follower and insert datas , update tables and create new stables + tdDnodes=cluster.dnodes + for loop in range(self.loop_restart_times): + db_name = "sync_db_{}".format(loop) + stablename = 'stable_{}'.format(loop) + self.create_database(dbname = db_name ,replica_num= self.replica , vgroup_nums= 1) + self.create_stable_insert_datas(dbname = db_name , stablename = stablename , tb_nums= 10 ,row_nums= 10 ) + self.stop_dnode_id = self._get_stop_dnode_id(db_name) + + # check rows of datas + + self.check_insert_rows(db_name ,stablename ,tb_nums=10 , row_nums= 10 ,append_rows=0) + + # begin stop dnode + start = time.time() + tdDnodes[self.stop_dnode_id-1].stoptaosd() + + self.wait_stop_dnode_OK() + + # append rows of stablename when dnode stop + + tbname = "sub_{}_{}".format(stablename , 0) + tdLog.info(" ==== begin append rows of exists table {} when dnode {} offline ====".format(tbname , self.stop_dnode_id)) + self.append_rows_of_exists_tables(db_name ,stablename , tbname , 100 ) + tdLog.info(" ==== check append rows of exists table {} when dnode {} offline ====".format(tbname , self.stop_dnode_id)) + self.check_insert_rows(db_name ,stablename ,tb_nums=10 , row_nums= 10 ,append_rows=100) + + # create new stables + tdLog.info(" ==== create new stable {} when dnode {} offline ====".format('new_stb1' , self.stop_dnode_id)) + self.create_stable_insert_datas(dbname = db_name , stablename = 'new_stb1' , tb_nums= 10 ,row_nums= 10 ) + tdLog.info(" ==== check new stable {} when dnode {} offline ====".format('new_stb1' , self.stop_dnode_id)) + self.check_insert_rows(db_name ,'new_stb1' ,tb_nums=10 , row_nums= 10 ,append_rows=0) + + # begin start dnode + tdDnodes[self.stop_dnode_id-1].starttaosd() + self.wait_start_dnode_OK() + end = time.time() + time_cost = int(end -start) + if time_cost > self.max_restart_time: + tdLog.exit(" ==== restart dnode {} cost too much time , please check ====".format(self.stop_dnode_id)) + + # create new stables again + tdLog.info(" ==== create new stable {} when dnode {} restart ====".format('new_stb2' , self.stop_dnode_id)) + self.create_stable_insert_datas(dbname = db_name , stablename = 'new_stb2' , tb_nums= 10 ,row_nums= 10 ) + tdLog.info(" ==== check new stable {} when dnode {} restart ====".format('new_stb2' , self.stop_dnode_id)) + self.check_insert_rows(db_name ,'new_stb2' ,tb_nums=10 , row_nums= 10 ,append_rows=0) + + def unsync_run_case(self): + + def _restart_dnode_of_db_unsync(dbname): + start = time.time() + tdDnodes=cluster.dnodes + self.stop_dnode_id = self._get_stop_dnode_id(dbname) + # begin restart dnode + tdDnodes[self.stop_dnode_id-1].stoptaosd() + self.wait_stop_dnode_OK() + tdDnodes[self.stop_dnode_id-1].starttaosd() + self.wait_start_dnode_OK() + end = time.time() + time_cost = int(end-start) + + if time_cost > self.max_restart_time: + tdLog.exit(" ==== restart dnode {} cost too much time , please check ====".format(self.stop_dnode_id)) + + + def _create_threading(dbname): + self.current_thread = threading.Thread(target=_restart_dnode_of_db_unsync, args=(dbname,)) + return self.current_thread + + + ''' + in this mode , it will be extra threading control start or stop dnode , insert will always going with not care follower online or alive + ''' + tdDnodes=cluster.dnodes + for loop in range(self.loop_restart_times): + db_name = "unsync_db_{}".format(loop) + stablename = 'stable_{}'.format(loop) + self.create_database(dbname = db_name ,replica_num= self.replica , vgroup_nums= 1) + self.create_stable_insert_datas(dbname = db_name , stablename = stablename , tb_nums= 10 ,row_nums= 10 ) + + tdLog.info(" ===== restart dnode of database {} in an unsync threading ===== ".format(db_name)) + + # create sync threading and start it + self.current_thread = _create_threading(db_name) + self.current_thread.start() + + # check rows of datas + self.check_insert_rows(db_name ,stablename ,tb_nums=10 , row_nums= 10 ,append_rows=0) + + tbname = "sub_{}_{}".format(stablename , 0) + tdLog.info(" ==== begin append rows of exists table {} when dnode {} offline ====".format(tbname , self.stop_dnode_id)) + self.append_rows_of_exists_tables(db_name ,stablename , tbname , 100 ) + tdLog.info(" ==== check append rows of exists table {} when dnode {} offline ====".format(tbname , self.stop_dnode_id)) + self.check_insert_rows(db_name ,stablename ,tb_nums=10 , row_nums= 10 ,append_rows=100) + + # create new stables + tdLog.info(" ==== create new stable {} when dnode {} offline ====".format('new_stb1' , self.stop_dnode_id)) + self.create_stable_insert_datas(dbname = db_name , stablename = 'new_stb1' , tb_nums= 10 ,row_nums= 10 ) + tdLog.info(" ==== check new stable {} when dnode {} offline ====".format('new_stb1' , self.stop_dnode_id)) + self.check_insert_rows(db_name ,'new_stb1' ,tb_nums=10 , row_nums= 10 ,append_rows=0) + + # create new stables again + tdLog.info(" ==== create new stable {} when dnode {} restart ====".format('new_stb2' , self.stop_dnode_id)) + self.create_stable_insert_datas(dbname = db_name , stablename = 'new_stb2' , tb_nums= 10 ,row_nums= 10 ) + tdLog.info(" ==== check new stable {} when dnode {} restart ====".format('new_stb2' , self.stop_dnode_id)) + self.check_insert_rows(db_name ,'new_stb2' ,tb_nums=10 , row_nums= 10 ,append_rows=0) + + self.current_thread.join() + + + def run(self): + + # basic insert and check of cluster + self.check_setup_cluster_status() + self.create_db_check_vgroups() + self.sync_run_case() + self.unsync_run_case() + + + + + def stop(self): + tdSql.close() + tdLog.success(f"{__file__} successfully executed") + +tdCases.addLinux(__file__, TDTestCase()) +tdCases.addWindows(__file__, TDTestCase()) \ No newline at end of file From 7cce7da526801eaee99a8b39837ac6058dfc9b6e Mon Sep 17 00:00:00 2001 From: "wenzhouwww@live.cn" Date: Fri, 22 Jul 2022 16:29:22 +0800 Subject: [PATCH 6/9] add test case stop follower and leader --- ...4dnode1mnode_basic_replica3_insertdatas.py | 179 ++++++ ...replica3_insertdatas_stop_follower_sync.py | 484 ++++++++++++++++ ...lica3_insertdatas_stop_follower_unsync.py} | 126 +++- ..._basic_replica3_insertdatas_stop_leader.py | 548 ++++++++++++++++++ 4 files changed, 1332 insertions(+), 5 deletions(-) create mode 100644 tests/system-test/6-cluster/vnode/4dnode1mnode_basic_replica3_insertdatas.py create mode 100644 tests/system-test/6-cluster/vnode/4dnode1mnode_basic_replica3_insertdatas_stop_follower_sync.py rename tests/system-test/6-cluster/vnode/{4dnode1mnode_basic_replica3_insertdatas_stop_follower.py => 4dnode1mnode_basic_replica3_insertdatas_stop_follower_unsync.py} (71%) create mode 100644 tests/system-test/6-cluster/vnode/4dnode1mnode_basic_replica3_insertdatas_stop_leader.py diff --git a/tests/system-test/6-cluster/vnode/4dnode1mnode_basic_replica3_insertdatas.py b/tests/system-test/6-cluster/vnode/4dnode1mnode_basic_replica3_insertdatas.py new file mode 100644 index 0000000000..00bd8a48d9 --- /dev/null +++ b/tests/system-test/6-cluster/vnode/4dnode1mnode_basic_replica3_insertdatas.py @@ -0,0 +1,179 @@ +# author : wenzhouwww +from ssl import ALERT_DESCRIPTION_CERTIFICATE_UNOBTAINABLE +import taos +import sys +import time +import os + +from util.log import * +from util.sql import * +from util.cases import * +from util.dnodes import TDDnodes +from util.dnodes import TDDnode +from util.cluster import * + +import time +import socket +import subprocess +sys.path.append(os.path.dirname(__file__)) + +class TDTestCase: + def init(self,conn ,logSql): + tdLog.debug(f"start to excute {__file__}") + tdSql.init(conn.cursor()) + self.host = socket.gethostname() + self.mnode_list = {} + self.dnode_list = {} + self.ts = 1483200000000 + self.db_name ='testdb' + self.replica = 3 + self.vgroups = 2 + self.tb_nums = 10 + self.row_nums = 100 + + def getBuildPath(self): + selfPath = os.path.dirname(os.path.realpath(__file__)) + if ("community" in selfPath): + projPath = selfPath[:selfPath.find("community")] + else: + projPath = selfPath[:selfPath.find("tests")] + + for root, dirs, files in os.walk(projPath): + if ("taosd" in files): + rootRealPath = os.path.dirname(os.path.realpath(root)) + if ("packaging" not in rootRealPath): + buildPath = root[:len(root) - len("/build/bin")] + break + return buildPath + + def check_setup_cluster_status(self): + tdSql.query("show mnodes") + for mnode in tdSql.queryResult: + name = mnode[1] + info = mnode + self.mnode_list[name] = info + + tdSql.query("show dnodes") + for dnode in tdSql.queryResult: + name = dnode[1] + info = dnode + self.dnode_list[name] = info + + count = 0 + is_leader = False + mnode_name = '' + for k,v in self.mnode_list.items(): + count +=1 + # only for 1 mnode + mnode_name = k + + if v[2] =='leader': + is_leader=True + + if count==1 and is_leader: + tdLog.info("===== depoly cluster success with 1 mnode as leader =====") + else: + tdLog.exit("===== depoly cluster fail with 1 mnode as leader =====") + + for k ,v in self.dnode_list.items(): + if k == mnode_name: + if v[3]==0: + tdLog.info("===== depoly cluster mnode only success at {} , support_vnodes is {} ".format(mnode_name,v[3])) + else: + tdLog.exit("===== depoly cluster mnode only fail at {} , support_vnodes is {} ".format(mnode_name,v[3])) + else: + continue + + def create_db_check_vgroups(self): + + tdSql.execute("drop database if exists test") + tdSql.execute("create database if not exists test replica 1 duration 300") + tdSql.execute("use test") + tdSql.execute( + '''create table stb1 + (ts timestamp, c1 int, c2 bigint, c3 smallint, c4 tinyint, c5 float, c6 double, c7 bool, c8 binary(16),c9 nchar(32), c10 timestamp) + tags (t1 int) + ''' + ) + tdSql.execute( + ''' + create table t1 + (ts timestamp, c1 int, c2 bigint, c3 smallint, c4 tinyint, c5 float, c6 double, c7 bool, c8 binary(16),c9 nchar(32), c10 timestamp) + ''' + ) + + for i in range(5): + tdSql.execute("create table sub_tb_{} using stb1 tags({})".format(i,i)) + tdSql.query("show stables") + tdSql.checkRows(1) + tdSql.query("show tables") + tdSql.checkRows(6) + + tdSql.query("show test.vgroups;") + vgroups_infos = {} # key is id: value is info list + for vgroup_info in tdSql.queryResult: + vgroup_id = vgroup_info[0] + tmp_list = [] + for role in vgroup_info[3:-4]: + if role in ['leader','follower']: + tmp_list.append(role) + vgroups_infos[vgroup_id]=tmp_list + + for k , v in vgroups_infos.items(): + if len(v) ==1 and v[0]=="leader": + tdLog.info(" === create database replica only 1 role leader check success of vgroup_id {} ======".format(k)) + else: + tdLog.exit(" === create database replica only 1 role leader check fail of vgroup_id {} ======".format(k)) + + def create_db_replica_3_insertdatas(self, dbname, replica_num ,vgroup_nums ,tb_nums , row_nums ): + drop_db_sql = "drop database if exists {}".format(dbname) + create_db_sql = "create database {} replica {} vgroups {}".format(dbname,replica_num,vgroup_nums) + + tdLog.info(" ==== create database {} and insert rows begin =====".format(dbname)) + tdSql.execute(drop_db_sql) + tdSql.execute(create_db_sql) + tdSql.execute("use {}".format(dbname)) + tdSql.execute( + '''create table stb1 + (ts timestamp, c1 int, c2 bigint, c3 smallint, c4 tinyint, c5 float, c6 double, c7 bool, c8 binary(32),c9 nchar(32), c10 timestamp) + tags (t1 int) + ''' + ) + tdSql.execute( + ''' + create table t1 + (ts timestamp, c1 int, c2 bigint, c3 smallint, c4 tinyint, c5 float, c6 double, c7 bool, c8 binary(32),c9 nchar(32), c10 timestamp) + ''' + ) + + for i in range(tb_nums): + sub_tbname = "sub_tb_{}".format(i) + tdSql.execute("create table {} using stb1 tags({})".format(sub_tbname,i)) + # insert datas about new database + + for row_num in range(row_nums): + ts = self.ts + 1000*row_num + tdSql.execute(f"insert into {sub_tbname} values ({ts}, {row_num} ,{row_num}, 10 ,1 ,{row_num} ,{row_num},true,'bin_{row_num}','nchar_{row_num}',now) ") + + tdLog.info(" ==== create database {} and insert rows execute end =====".format(dbname)) + + def check_insert_status(self, dbname, tb_nums , row_nums): + tdSql.execute("use {}".format(dbname)) + tdSql.query("select count(*) from {}.{}".format(dbname,'stb1')) + tdSql.checkData(0 , 0 , tb_nums*row_nums) + tdSql.query("select distinct tbname from {}.{}".format(dbname,'stb1')) + tdSql.checkRows(tb_nums) + + def run(self): + self.check_setup_cluster_status() + self.create_db_check_vgroups() + self.create_db_replica_3_insertdatas(self.db_name , self.replica , self.vgroups , self.tb_nums , self.row_nums) + self.check_insert_status(self.db_name , self.tb_nums , self.row_nums) + + + def stop(self): + tdSql.close() + tdLog.success(f"{__file__} successfully executed") + +tdCases.addLinux(__file__, TDTestCase()) +tdCases.addWindows(__file__, TDTestCase()) \ No newline at end of file diff --git a/tests/system-test/6-cluster/vnode/4dnode1mnode_basic_replica3_insertdatas_stop_follower_sync.py b/tests/system-test/6-cluster/vnode/4dnode1mnode_basic_replica3_insertdatas_stop_follower_sync.py new file mode 100644 index 0000000000..5db1ced199 --- /dev/null +++ b/tests/system-test/6-cluster/vnode/4dnode1mnode_basic_replica3_insertdatas_stop_follower_sync.py @@ -0,0 +1,484 @@ +# author : wenzhouwww +from ssl import ALERT_DESCRIPTION_CERTIFICATE_UNOBTAINABLE +import taos +import sys +import time +import os + +from util.log import * +from util.sql import * +from util.cases import * +from util.dnodes import TDDnodes +from util.dnodes import TDDnode +from util.cluster import * + +import datetime +import inspect +import time +import socket +import subprocess +import threading +sys.path.append(os.path.dirname(__file__)) + +class TDTestCase: + def init(self,conn ,logSql): + tdLog.debug(f"start to excute {__file__}") + tdSql.init(conn.cursor()) + self.host = socket.gethostname() + self.mnode_list = {} + self.dnode_list = {} + self.ts = 1483200000000 + self.ts_step =1000 + self.db_name ='testdb' + self.replica = 3 + self.vgroups = 1 + self.tb_nums = 10 + self.row_nums = 100 + self.stop_dnode_id = None + self.loop_restart_times = 5 + self.current_thread = None + self.max_restart_time = 10 + self.try_check_times = 10 + + def getBuildPath(self): + selfPath = os.path.dirname(os.path.realpath(__file__)) + if ("community" in selfPath): + projPath = selfPath[:selfPath.find("community")] + else: + projPath = selfPath[:selfPath.find("tests")] + + for root, dirs, files in os.walk(projPath): + if ("taosd" in files): + rootRealPath = os.path.dirname(os.path.realpath(root)) + if ("packaging" not in rootRealPath): + buildPath = root[:len(root) - len("/build/bin")] + break + return buildPath + + def check_setup_cluster_status(self): + tdSql.query("show mnodes") + for mnode in tdSql.queryResult: + name = mnode[1] + info = mnode + self.mnode_list[name] = info + + tdSql.query("show dnodes") + for dnode in tdSql.queryResult: + name = dnode[1] + info = dnode + self.dnode_list[name] = info + + count = 0 + is_leader = False + mnode_name = '' + for k,v in self.mnode_list.items(): + count +=1 + # only for 1 mnode + mnode_name = k + + if v[2] =='leader': + is_leader=True + + if count==1 and is_leader: + tdLog.info("===== depoly cluster success with 1 mnode as leader =====") + else: + tdLog.exit("===== depoly cluster fail with 1 mnode as leader =====") + + for k ,v in self.dnode_list.items(): + if k == mnode_name: + if v[3]==0: + tdLog.info("===== depoly cluster mnode only success at {} , support_vnodes is {} ".format(mnode_name,v[3])) + else: + tdLog.exit("===== depoly cluster mnode only fail at {} , support_vnodes is {} ".format(mnode_name,v[3])) + else: + continue + + def create_db_check_vgroups(self): + + tdSql.execute("drop database if exists test") + tdSql.execute("create database if not exists test replica 1 duration 300") + tdSql.execute("use test") + tdSql.execute( + '''create table stb1 + (ts timestamp, c1 int, c2 bigint, c3 smallint, c4 tinyint, c5 float, c6 double, c7 bool, c8 binary(16),c9 nchar(32), c10 timestamp) + tags (t1 int) + ''' + ) + tdSql.execute( + ''' + create table t1 + (ts timestamp, c1 int, c2 bigint, c3 smallint, c4 tinyint, c5 float, c6 double, c7 bool, c8 binary(16),c9 nchar(32), c10 timestamp) + ''' + ) + + for i in range(5): + tdSql.execute("create table sub_tb_{} using stb1 tags({})".format(i,i)) + tdSql.query("show stables") + tdSql.checkRows(1) + tdSql.query("show tables") + tdSql.checkRows(6) + + tdSql.query("show test.vgroups;") + vgroups_infos = {} # key is id: value is info list + for vgroup_info in tdSql.queryResult: + vgroup_id = vgroup_info[0] + tmp_list = [] + for role in vgroup_info[3:-4]: + if role in ['leader','follower']: + tmp_list.append(role) + vgroups_infos[vgroup_id]=tmp_list + + for k , v in vgroups_infos.items(): + if len(v) ==1 and v[0]=="leader": + tdLog.info(" === create database replica only 1 role leader check success of vgroup_id {} ======".format(k)) + else: + tdLog.exit(" === create database replica only 1 role leader check fail of vgroup_id {} ======".format(k)) + + def create_database(self, dbname, replica_num ,vgroup_nums ): + drop_db_sql = "drop database if exists {}".format(dbname) + create_db_sql = "create database {} replica {} vgroups {}".format(dbname,replica_num,vgroup_nums) + + tdLog.info(" ==== create database {} and insert rows begin =====".format(dbname)) + tdSql.execute(drop_db_sql) + tdSql.execute(create_db_sql) + tdSql.execute("use {}".format(dbname)) + + def create_stable_insert_datas(self,dbname ,stablename , tb_nums , row_nums): + tdSql.execute("use {}".format(dbname)) + tdSql.execute( + '''create table {} + (ts timestamp, c1 int, c2 bigint, c3 smallint, c4 tinyint, c5 float, c6 double, c7 bool, c8 binary(32),c9 nchar(32), c10 timestamp) + tags (t1 int) + '''.format(stablename) + ) + + for i in range(tb_nums): + sub_tbname = "sub_{}_{}".format(stablename,i) + tdSql.execute("create table {} using {} tags({})".format(sub_tbname, stablename ,i)) + # insert datas about new database + + for row_num in range(row_nums): + ts = self.ts + self.ts_step*row_num + tdSql.execute(f"insert into {sub_tbname} values ({ts}, {row_num} ,{row_num}, 10 ,1 ,{row_num} ,{row_num},true,'bin_{row_num}','nchar_{row_num}',now) ") + + tdLog.info(" ==== stable {} insert rows execute end =====".format(stablename)) + + def append_rows_of_exists_tables(self,dbname ,stablename , tbname , append_nums ): + + tdSql.execute("use {}".format(dbname)) + + for row_num in range(append_nums): + tdSql.execute(f"insert into {tbname} values (now, {row_num} ,{row_num}, 10 ,1 ,{row_num} ,{row_num},true,'bin_{row_num}','nchar_{row_num}',now) ") + # print(f"insert into {tbname} values (now, {row_num} ,{row_num}, 10 ,1 ,{row_num} ,{row_num},true,'bin_{row_num}','nchar_{row_num}',now) ") + tdLog.info(" ==== append new rows of table {} belongs to stable {} execute end =====".format(tbname,stablename)) + os.system("taos -s 'select count(*) from {}.{}';".format(dbname,stablename)) + + def check_insert_rows(self, dbname, stablename , tb_nums , row_nums, append_rows): + + tdSql.execute("use {}".format(dbname)) + + tdSql.query("select count(*) from {}.{}".format(dbname,stablename)) + + status_OK = self.mycheckData("select count(*) from {}.{}".format(dbname,stablename) ,0 , 0 , tb_nums*row_nums+append_rows) + + count = 0 + while not status_OK : + if count > self.try_check_times: + os.system("taos -s ' show {}.vgroups;'".format(dbname)) + tdLog.exit(" ==== check insert rows failed after {} try check {} times of database {}".format(count , self.try_check_times ,dbname)) + break + time.sleep(0.1) + tdSql.query("select count(*) from {}.{}".format(dbname,stablename)) + status_OK = self.mycheckData("select count(*) from {}.{}".format(dbname,stablename) ,0 , 0 , tb_nums*row_nums+append_rows) + tdLog.info(" ==== check insert rows first failed , this is {}_th retry check rows of database {}".format(count , dbname)) + count += 1 + + + tdSql.query("select distinct tbname from {}.{}".format(dbname,stablename)) + status_OK = self.mycheckRows("select distinct tbname from {}.{}".format(dbname,stablename) ,tb_nums) + count = 0 + while not status_OK : + if count > self.try_check_times: + os.system("taos -s ' show {}.vgroups;'".format(dbname)) + tdLog.exit(" ==== check insert rows failed after {} try check {} times of database {}".format(count , self.try_check_times ,dbname)) + break + time.sleep(0.1) + tdSql.query("select distinct tbname from {}.{}".format(dbname,stablename)) + status_OK = self.mycheckRows("select distinct tbname from {}.{}".format(dbname,stablename) ,tb_nums) + tdLog.info(" ==== check insert tbnames first failed , this is {}_th retry check tbnames of database {}".format(count , dbname)) + count += 1 + + def _get_stop_dnode_id(self,dbname): + tdSql.query("show {}.vgroups".format(dbname)) + vgroup_infos = tdSql.queryResult + for vgroup_info in vgroup_infos: + leader_infos = vgroup_info[3:-4] + # print(vgroup_info) + for ind ,role in enumerate(leader_infos): + if role =='follower': + # print(ind,leader_infos) + self.stop_dnode_id = leader_infos[ind-1] + break + + + return self.stop_dnode_id + + def wait_stop_dnode_OK(self): + + def _get_status(): + newTdSql=tdCom.newTdSql() + + status = "" + newTdSql.query("show dnodes") + dnode_infos = newTdSql.queryResult + for dnode_info in dnode_infos: + id = dnode_info[0] + dnode_status = dnode_info[4] + if id == self.stop_dnode_id: + status = dnode_status + break + return status + + status = _get_status() + while status !="offline": + time.sleep(0.1) + status = _get_status() + # tdLog.info("==== stop dnode has not been stopped , endpoint is {}".format(self.stop_dnode)) + tdLog.info("==== stop_dnode has stopped , id is {}".format(self.stop_dnode_id)) + + def wait_start_dnode_OK(self): + + def _get_status(): + newTdSql=tdCom.newTdSql() + status = "" + newTdSql.query("show dnodes") + dnode_infos = newTdSql.queryResult + for dnode_info in dnode_infos: + id = dnode_info[0] + dnode_status = dnode_info[4] + if id == self.stop_dnode_id: + status = dnode_status + break + return status + + status = _get_status() + while status !="ready": + time.sleep(0.1) + status = _get_status() + # tdLog.info("==== stop dnode has not been stopped , endpoint is {}".format(self.stop_dnode)) + tdLog.info("==== stop_dnode has restart , id is {}".format(self.stop_dnode_id)) + + def _parse_datetime(self,timestr): + try: + return datetime.datetime.strptime(timestr, '%Y-%m-%d %H:%M:%S.%f') + except ValueError: + pass + try: + return datetime.datetime.strptime(timestr, '%Y-%m-%d %H:%M:%S') + except ValueError: + pass + + def mycheckRowCol(self, sql, row, col): + caller = inspect.getframeinfo(inspect.stack()[2][0]) + if row < 0: + args = (caller.filename, caller.lineno, sql, row) + tdLog.exit("%s(%d) failed: sql:%s, row:%d is smaller than zero" % args) + if col < 0: + args = (caller.filename, caller.lineno, sql, row) + tdLog.exit("%s(%d) failed: sql:%s, col:%d is smaller than zero" % args) + if row > tdSql.queryRows: + args = (caller.filename, caller.lineno, sql, row, tdSql.queryRows) + tdLog.exit("%s(%d) failed: sql:%s, row:%d is larger than queryRows:%d" % args) + if col > tdSql.queryCols: + args = (caller.filename, caller.lineno, sql, col, tdSql.queryCols) + tdLog.exit("%s(%d) failed: sql:%s, col:%d is larger than queryCols:%d" % args) + + def mycheckData(self, sql ,row, col, data): + check_status = True + self.mycheckRowCol(sql ,row, col) + if tdSql.queryResult[row][col] != data: + if tdSql.cursor.istype(col, "TIMESTAMP"): + # suppose user want to check nanosecond timestamp if a longer data passed + if (len(data) >= 28): + if pd.to_datetime(tdSql.queryResult[row][col]) == pd.to_datetime(data): + tdLog.info("sql:%s, row:%d col:%d data:%d == expect:%s" % + (sql, row, col, tdSql.queryResult[row][col], data)) + else: + if tdSql.queryResult[row][col] == self._parse_datetime(data): + tdLog.info("sql:%s, row:%d col:%d data:%s == expect:%s" % + (sql, row, col, tdSql.queryResult[row][col], data)) + return + + if str(tdSql.queryResult[row][col]) == str(data): + tdLog.info("sql:%s, row:%d col:%d data:%s == expect:%s" % + (sql, row, col, tdSql.queryResult[row][col], data)) + return + elif isinstance(data, float) and abs(tdSql.queryResult[row][col] - data) <= 0.000001: + tdLog.info("sql:%s, row:%d col:%d data:%f == expect:%f" % + (sql, row, col, tdSql.queryResult[row][col], data)) + return + else: + caller = inspect.getframeinfo(inspect.stack()[1][0]) + args = (caller.filename, caller.lineno, sql, row, col, tdSql.queryResult[row][col], data) + tdLog.info("%s(%d) failed: sql:%s row:%d col:%d data:%s != expect:%s" % args) + + check_status = False + + if data is None: + tdLog.info("sql:%s, row:%d col:%d data:%s == expect:%s" % + (sql, row, col, tdSql.queryResult[row][col], data)) + elif isinstance(data, str): + tdLog.info("sql:%s, row:%d col:%d data:%s == expect:%s" % + (sql, row, col, tdSql.queryResult[row][col], data)) + elif isinstance(data, datetime.date): + tdLog.info("sql:%s, row:%d col:%d data:%s == expect:%s" % + (sql, row, col, tdSql.queryResult[row][col], data)) + elif isinstance(data, float): + tdLog.info("sql:%s, row:%d col:%d data:%s == expect:%s" % + (sql, row, col, tdSql.queryResult[row][col], data)) + else: + tdLog.info("sql:%s, row:%d col:%d data:%s == expect:%d" % + (sql, row, col, tdSql.queryResult[row][col], data)) + + return check_status + + def mycheckRows(self, sql, expectRows): + check_status = True + if len(tdSql.queryResult) == expectRows: + tdLog.info("sql:%s, queryRows:%d == expect:%d" % (sql, len(tdSql.queryResult), expectRows)) + return True + else: + caller = inspect.getframeinfo(inspect.stack()[1][0]) + args = (caller.filename, caller.lineno, sql, len(tdSql.queryResult), expectRows) + tdLog.info("%s(%d) failed: sql:%s, queryRows:%d != expect:%d" % args) + check_status = False + return check_status + + def sync_run_case(self): + # stop follower and insert datas , update tables and create new stables + tdDnodes=cluster.dnodes + for loop in range(self.loop_restart_times): + db_name = "sync_db_{}".format(loop) + stablename = 'stable_{}'.format(loop) + self.create_database(dbname = db_name ,replica_num= self.replica , vgroup_nums= 1) + self.create_stable_insert_datas(dbname = db_name , stablename = stablename , tb_nums= 10 ,row_nums= 10 ) + self.stop_dnode_id = self._get_stop_dnode_id(db_name) + + # check rows of datas + + self.check_insert_rows(db_name ,stablename ,tb_nums=10 , row_nums= 10 ,append_rows=0) + + # begin stop dnode + start = time.time() + tdDnodes[self.stop_dnode_id-1].stoptaosd() + + self.wait_stop_dnode_OK() + + # append rows of stablename when dnode stop + + tbname = "sub_{}_{}".format(stablename , 0) + tdLog.info(" ==== begin append rows of exists table {} when dnode {} offline ====".format(tbname , self.stop_dnode_id)) + self.append_rows_of_exists_tables(db_name ,stablename , tbname , 100 ) + tdLog.info(" ==== check append rows of exists table {} when dnode {} offline ====".format(tbname , self.stop_dnode_id)) + self.check_insert_rows(db_name ,stablename ,tb_nums=10 , row_nums= 10 ,append_rows=100) + + # create new stables + tdLog.info(" ==== create new stable {} when dnode {} offline ====".format('new_stb1' , self.stop_dnode_id)) + self.create_stable_insert_datas(dbname = db_name , stablename = 'new_stb1' , tb_nums= 10 ,row_nums= 10 ) + tdLog.info(" ==== check new stable {} when dnode {} offline ====".format('new_stb1' , self.stop_dnode_id)) + self.check_insert_rows(db_name ,'new_stb1' ,tb_nums=10 , row_nums= 10 ,append_rows=0) + + # begin start dnode + tdDnodes[self.stop_dnode_id-1].starttaosd() + self.wait_start_dnode_OK() + end = time.time() + time_cost = int(end -start) + if time_cost > self.max_restart_time: + tdLog.exit(" ==== restart dnode {} cost too much time , please check ====".format(self.stop_dnode_id)) + + # create new stables again + tdLog.info(" ==== create new stable {} when dnode {} restart ====".format('new_stb2' , self.stop_dnode_id)) + self.create_stable_insert_datas(dbname = db_name , stablename = 'new_stb2' , tb_nums= 10 ,row_nums= 10 ) + tdLog.info(" ==== check new stable {} when dnode {} restart ====".format('new_stb2' , self.stop_dnode_id)) + self.check_insert_rows(db_name ,'new_stb2' ,tb_nums=10 , row_nums= 10 ,append_rows=0) + + def unsync_run_case(self): + + def _restart_dnode_of_db_unsync(dbname): + start = time.time() + tdDnodes=cluster.dnodes + self.stop_dnode_id = self._get_stop_dnode_id(dbname) + # begin restart dnode + tdDnodes[self.stop_dnode_id-1].stoptaosd() + self.wait_stop_dnode_OK() + tdDnodes[self.stop_dnode_id-1].starttaosd() + self.wait_start_dnode_OK() + end = time.time() + time_cost = int(end-start) + + if time_cost > self.max_restart_time: + tdLog.exit(" ==== restart dnode {} cost too much time , please check ====".format(self.stop_dnode_id)) + + + def _create_threading(dbname): + self.current_thread = threading.Thread(target=_restart_dnode_of_db_unsync, args=(dbname,)) + return self.current_thread + + + ''' + in this mode , it will be extra threading control start or stop dnode , insert will always going with not care follower online or alive + ''' + tdDnodes=cluster.dnodes + for loop in range(self.loop_restart_times): + db_name = "unsync_db_{}".format(loop) + stablename = 'stable_{}'.format(loop) + self.create_database(dbname = db_name ,replica_num= self.replica , vgroup_nums= 1) + self.create_stable_insert_datas(dbname = db_name , stablename = stablename , tb_nums= 10 ,row_nums= 10 ) + + tdLog.info(" ===== restart dnode of database {} in an unsync threading ===== ".format(db_name)) + + # create sync threading and start it + self.current_thread = _create_threading(db_name) + self.current_thread.start() + + # check rows of datas + self.check_insert_rows(db_name ,stablename ,tb_nums=10 , row_nums= 10 ,append_rows=0) + + tbname = "sub_{}_{}".format(stablename , 0) + tdLog.info(" ==== begin append rows of exists table {} when dnode {} offline ====".format(tbname , self.stop_dnode_id)) + self.append_rows_of_exists_tables(db_name ,stablename , tbname , 100 ) + tdLog.info(" ==== check append rows of exists table {} when dnode {} offline ====".format(tbname , self.stop_dnode_id)) + self.check_insert_rows(db_name ,stablename ,tb_nums=10 , row_nums= 10 ,append_rows=100) + + # create new stables + tdLog.info(" ==== create new stable {} when dnode {} offline ====".format('new_stb1' , self.stop_dnode_id)) + self.create_stable_insert_datas(dbname = db_name , stablename = 'new_stb1' , tb_nums= 10 ,row_nums= 10 ) + tdLog.info(" ==== check new stable {} when dnode {} offline ====".format('new_stb1' , self.stop_dnode_id)) + self.check_insert_rows(db_name ,'new_stb1' ,tb_nums=10 , row_nums= 10 ,append_rows=0) + + # create new stables again + tdLog.info(" ==== create new stable {} when dnode {} restart ====".format('new_stb2' , self.stop_dnode_id)) + self.create_stable_insert_datas(dbname = db_name , stablename = 'new_stb2' , tb_nums= 10 ,row_nums= 10 ) + tdLog.info(" ==== check new stable {} when dnode {} restart ====".format('new_stb2' , self.stop_dnode_id)) + self.check_insert_rows(db_name ,'new_stb2' ,tb_nums=10 , row_nums= 10 ,append_rows=0) + + self.current_thread.join() + + + def run(self): + + # basic insert and check of cluster + self.check_setup_cluster_status() + self.create_db_check_vgroups() + self.sync_run_case() + # self.unsync_run_case() + + + + + def stop(self): + tdSql.close() + tdLog.success(f"{__file__} successfully executed") + +tdCases.addLinux(__file__, TDTestCase()) +tdCases.addWindows(__file__, TDTestCase()) \ No newline at end of file diff --git a/tests/system-test/6-cluster/vnode/4dnode1mnode_basic_replica3_insertdatas_stop_follower.py b/tests/system-test/6-cluster/vnode/4dnode1mnode_basic_replica3_insertdatas_stop_follower_unsync.py similarity index 71% rename from tests/system-test/6-cluster/vnode/4dnode1mnode_basic_replica3_insertdatas_stop_follower.py rename to tests/system-test/6-cluster/vnode/4dnode1mnode_basic_replica3_insertdatas_stop_follower_unsync.py index 63d560587d..854a28b48a 100644 --- a/tests/system-test/6-cluster/vnode/4dnode1mnode_basic_replica3_insertdatas_stop_follower.py +++ b/tests/system-test/6-cluster/vnode/4dnode1mnode_basic_replica3_insertdatas_stop_follower_unsync.py @@ -12,6 +12,8 @@ from util.dnodes import TDDnodes from util.dnodes import TDDnode from util.cluster import * +import datetime +import inspect import time import socket import subprocess @@ -35,7 +37,8 @@ class TDTestCase: self.stop_dnode_id = None self.loop_restart_times = 5 self.current_thread = None - self.max_restart_time = 20 + self.max_restart_time = 10 + self.try_check_times = 10 def getBuildPath(self): selfPath = os.path.dirname(os.path.realpath(__file__)) @@ -173,10 +176,37 @@ class TDTestCase: def check_insert_rows(self, dbname, stablename , tb_nums , row_nums, append_rows): tdSql.execute("use {}".format(dbname)) + tdSql.query("select count(*) from {}.{}".format(dbname,stablename)) - tdSql.checkData(0 , 0 , tb_nums*row_nums+append_rows) + + status_OK = self.mycheckData("select count(*) from {}.{}".format(dbname,stablename) ,0 , 0 , tb_nums*row_nums+append_rows) + + count = 0 + while not status_OK : + if count > self.try_check_times: + os.system("taos -s ' show {}.vgroups; '".format(dbname)) + tdLog.exit(" ==== check insert rows failed after {} try check {} times of database {}".format(count , self.try_check_times ,dbname)) + break + time.sleep(0.1) + tdSql.query("select count(*) from {}.{}".format(dbname,stablename)) + status_OK = self.mycheckData("select count(*) from {}.{}".format(dbname,stablename) ,0 , 0 , tb_nums*row_nums+append_rows) + tdLog.info(" ==== check insert rows first failed , this is {}_th retry check rows of database {}".format(count , dbname)) + count += 1 + + tdSql.query("select distinct tbname from {}.{}".format(dbname,stablename)) - tdSql.checkRows(tb_nums) + status_OK = self.mycheckRows("select distinct tbname from {}.{}".format(dbname,stablename) ,tb_nums) + count = 0 + while not status_OK : + if count > self.try_check_times: + os.system("taos -s ' show {}.vgroups;'".format(dbname)) + tdLog.exit(" ==== check insert rows failed after {} try check {} times of database {}".format(count , self.try_check_times ,dbname)) + break + time.sleep(0.1) + tdSql.query("select distinct tbname from {}.{}".format(dbname,stablename)) + status_OK = self.mycheckRows("select distinct tbname from {}.{}".format(dbname,stablename) ,tb_nums) + tdLog.info(" ==== check insert tbnames first failed , this is {}_th retry check tbnames of database {}".format(count , dbname)) + count += 1 def _get_stop_dnode_id(self,dbname): tdSql.query("show {}.vgroups".format(dbname)) @@ -238,6 +268,92 @@ class TDTestCase: # tdLog.info("==== stop dnode has not been stopped , endpoint is {}".format(self.stop_dnode)) tdLog.info("==== stop_dnode has restart , id is {}".format(self.stop_dnode_id)) + def _parse_datetime(self,timestr): + try: + return datetime.datetime.strptime(timestr, '%Y-%m-%d %H:%M:%S.%f') + except ValueError: + pass + try: + return datetime.datetime.strptime(timestr, '%Y-%m-%d %H:%M:%S') + except ValueError: + pass + + def mycheckRowCol(self, sql, row, col): + caller = inspect.getframeinfo(inspect.stack()[2][0]) + if row < 0: + args = (caller.filename, caller.lineno, sql, row) + tdLog.exit("%s(%d) failed: sql:%s, row:%d is smaller than zero" % args) + if col < 0: + args = (caller.filename, caller.lineno, sql, row) + tdLog.exit("%s(%d) failed: sql:%s, col:%d is smaller than zero" % args) + if row > tdSql.queryRows: + args = (caller.filename, caller.lineno, sql, row, tdSql.queryRows) + tdLog.exit("%s(%d) failed: sql:%s, row:%d is larger than queryRows:%d" % args) + if col > tdSql.queryCols: + args = (caller.filename, caller.lineno, sql, col, tdSql.queryCols) + tdLog.exit("%s(%d) failed: sql:%s, col:%d is larger than queryCols:%d" % args) + + def mycheckData(self, sql ,row, col, data): + check_status = True + self.mycheckRowCol(sql ,row, col) + if tdSql.queryResult[row][col] != data: + if tdSql.cursor.istype(col, "TIMESTAMP"): + # suppose user want to check nanosecond timestamp if a longer data passed + if (len(data) >= 28): + if pd.to_datetime(tdSql.queryResult[row][col]) == pd.to_datetime(data): + tdLog.info("sql:%s, row:%d col:%d data:%d == expect:%s" % + (sql, row, col, tdSql.queryResult[row][col], data)) + else: + if tdSql.queryResult[row][col] == self._parse_datetime(data): + tdLog.info("sql:%s, row:%d col:%d data:%s == expect:%s" % + (sql, row, col, tdSql.queryResult[row][col], data)) + return + + if str(tdSql.queryResult[row][col]) == str(data): + tdLog.info("sql:%s, row:%d col:%d data:%s == expect:%s" % + (sql, row, col, tdSql.queryResult[row][col], data)) + return + elif isinstance(data, float) and abs(tdSql.queryResult[row][col] - data) <= 0.000001: + tdLog.info("sql:%s, row:%d col:%d data:%f == expect:%f" % + (sql, row, col, tdSql.queryResult[row][col], data)) + return + else: + caller = inspect.getframeinfo(inspect.stack()[1][0]) + args = (caller.filename, caller.lineno, sql, row, col, tdSql.queryResult[row][col], data) + tdLog.info("%s(%d) failed: sql:%s row:%d col:%d data:%s != expect:%s" % args) + + check_status = False + + if data is None: + tdLog.info("sql:%s, row:%d col:%d data:%s == expect:%s" % + (sql, row, col, tdSql.queryResult[row][col], data)) + elif isinstance(data, str): + tdLog.info("sql:%s, row:%d col:%d data:%s == expect:%s" % + (sql, row, col, tdSql.queryResult[row][col], data)) + elif isinstance(data, datetime.date): + tdLog.info("sql:%s, row:%d col:%d data:%s == expect:%s" % + (sql, row, col, tdSql.queryResult[row][col], data)) + elif isinstance(data, float): + tdLog.info("sql:%s, row:%d col:%d data:%s == expect:%s" % + (sql, row, col, tdSql.queryResult[row][col], data)) + else: + tdLog.info("sql:%s, row:%d col:%d data:%s == expect:%d" % + (sql, row, col, tdSql.queryResult[row][col], data)) + + return check_status + + def mycheckRows(self, sql, expectRows): + check_status = True + if len(tdSql.queryResult) == expectRows: + tdLog.info("sql:%s, queryRows:%d == expect:%d" % (sql, len(tdSql.queryResult), expectRows)) + return True + else: + caller = inspect.getframeinfo(inspect.stack()[1][0]) + args = (caller.filename, caller.lineno, sql, len(tdSql.queryResult), expectRows) + tdLog.info("%s(%d) failed: sql:%s, queryRows:%d != expect:%d" % args) + check_status = False + return check_status + def sync_run_case(self): # stop follower and insert datas , update tables and create new stables tdDnodes=cluster.dnodes @@ -251,7 +367,7 @@ class TDTestCase: # check rows of datas self.check_insert_rows(db_name ,stablename ,tb_nums=10 , row_nums= 10 ,append_rows=0) - + # begin stop dnode start = time.time() tdDnodes[self.stop_dnode_id-1].stoptaosd() @@ -354,7 +470,7 @@ class TDTestCase: # basic insert and check of cluster self.check_setup_cluster_status() self.create_db_check_vgroups() - self.sync_run_case() + # self.sync_run_case() self.unsync_run_case() diff --git a/tests/system-test/6-cluster/vnode/4dnode1mnode_basic_replica3_insertdatas_stop_leader.py b/tests/system-test/6-cluster/vnode/4dnode1mnode_basic_replica3_insertdatas_stop_leader.py new file mode 100644 index 0000000000..9dde4a6382 --- /dev/null +++ b/tests/system-test/6-cluster/vnode/4dnode1mnode_basic_replica3_insertdatas_stop_leader.py @@ -0,0 +1,548 @@ +# author : wenzhouwww +from ssl import ALERT_DESCRIPTION_CERTIFICATE_UNOBTAINABLE +import taos +import sys +import time +import os + +from util.log import * +from util.sql import * +from util.cases import * +from util.dnodes import TDDnodes +from util.dnodes import TDDnode +from util.cluster import * + +import time +import socket +import subprocess +import threading +sys.path.append(os.path.dirname(__file__)) + +class TDTestCase: + def init(self,conn ,logSql): + tdLog.debug(f"start to excute {__file__}") + tdSql.init(conn.cursor()) + self.host = socket.gethostname() + self.mnode_list = {} + self.dnode_list = {} + self.ts = 1483200000000 + self.ts_step =1000 + self.db_name ='testdb' + self.replica = 3 + self.vgroups = 1 + self.tb_nums = 10 + self.row_nums = 100 + self.stop_dnode_id = None + self.loop_restart_times = 50 + self.current_thread = None + self.max_restart_time = 5 + + def getBuildPath(self): + selfPath = os.path.dirname(os.path.realpath(__file__)) + if ("community" in selfPath): + projPath = selfPath[:selfPath.find("community")] + else: + projPath = selfPath[:selfPath.find("tests")] + + for root, dirs, files in os.walk(projPath): + if ("taosd" in files): + rootRealPath = os.path.dirname(os.path.realpath(root)) + if ("packaging" not in rootRealPath): + buildPath = root[:len(root) - len("/build/bin")] + break + return buildPath + + def _parse_datetime(self,timestr): + try: + return datetime.datetime.strptime(timestr, '%Y-%m-%d %H:%M:%S.%f') + except ValueError: + pass + try: + return datetime.datetime.strptime(timestr, '%Y-%m-%d %H:%M:%S') + except ValueError: + pass + + def mycheckRowCol(self, sql, row, col): + caller = inspect.getframeinfo(inspect.stack()[2][0]) + if row < 0: + args = (caller.filename, caller.lineno, sql, row) + tdLog.exit("%s(%d) failed: sql:%s, row:%d is smaller than zero" % args) + if col < 0: + args = (caller.filename, caller.lineno, sql, row) + tdLog.exit("%s(%d) failed: sql:%s, col:%d is smaller than zero" % args) + if row > tdSql.queryRows: + args = (caller.filename, caller.lineno, sql, row, tdSql.queryRows) + tdLog.exit("%s(%d) failed: sql:%s, row:%d is larger than queryRows:%d" % args) + if col > tdSql.queryCols: + args = (caller.filename, caller.lineno, sql, col, tdSql.queryCols) + tdLog.exit("%s(%d) failed: sql:%s, col:%d is larger than queryCols:%d" % args) + + def mycheckData(self, sql ,row, col, data): + check_status = True + self.mycheckRowCol(sql ,row, col) + if tdSql.queryResult[row][col] != data: + if tdSql.cursor.istype(col, "TIMESTAMP"): + # suppose user want to check nanosecond timestamp if a longer data passed + if (len(data) >= 28): + if pd.to_datetime(tdSql.queryResult[row][col]) == pd.to_datetime(data): + tdLog.info("sql:%s, row:%d col:%d data:%d == expect:%s" % + (sql, row, col, tdSql.queryResult[row][col], data)) + else: + if tdSql.queryResult[row][col] == self._parse_datetime(data): + tdLog.info("sql:%s, row:%d col:%d data:%s == expect:%s" % + (sql, row, col, tdSql.queryResult[row][col], data)) + return + + if str(tdSql.queryResult[row][col]) == str(data): + tdLog.info("sql:%s, row:%d col:%d data:%s == expect:%s" % + (sql, row, col, tdSql.queryResult[row][col], data)) + return + elif isinstance(data, float) and abs(tdSql.queryResult[row][col] - data) <= 0.000001: + tdLog.info("sql:%s, row:%d col:%d data:%f == expect:%f" % + (sql, row, col, tdSql.queryResult[row][col], data)) + return + else: + caller = inspect.getframeinfo(inspect.stack()[1][0]) + args = (caller.filename, caller.lineno, sql, row, col, tdSql.queryResult[row][col], data) + tdLog.info("%s(%d) failed: sql:%s row:%d col:%d data:%s != expect:%s" % args) + + check_status = False + + if data is None: + tdLog.info("sql:%s, row:%d col:%d data:%s == expect:%s" % + (sql, row, col, tdSql.queryResult[row][col], data)) + elif isinstance(data, str): + tdLog.info("sql:%s, row:%d col:%d data:%s == expect:%s" % + (sql, row, col, tdSql.queryResult[row][col], data)) + elif isinstance(data, datetime.date): + tdLog.info("sql:%s, row:%d col:%d data:%s == expect:%s" % + (sql, row, col, tdSql.queryResult[row][col], data)) + elif isinstance(data, float): + tdLog.info("sql:%s, row:%d col:%d data:%s == expect:%s" % + (sql, row, col, tdSql.queryResult[row][col], data)) + else: + tdLog.info("sql:%s, row:%d col:%d data:%s == expect:%d" % + (sql, row, col, tdSql.queryResult[row][col], data)) + + return check_status + + def mycheckRows(self, sql, expectRows): + check_status = True + if len(tdSql.queryResult) == expectRows: + tdLog.info("sql:%s, queryRows:%d == expect:%d" % (sql, len(tdSql.queryResult), expectRows)) + return True + else: + caller = inspect.getframeinfo(inspect.stack()[1][0]) + args = (caller.filename, caller.lineno, sql, len(tdSql.queryResult), expectRows) + tdLog.info("%s(%d) failed: sql:%s, queryRows:%d != expect:%d" % args) + check_status = False + return check_status + + def check_setup_cluster_status(self): + tdSql.query("show mnodes") + for mnode in tdSql.queryResult: + name = mnode[1] + info = mnode + self.mnode_list[name] = info + + tdSql.query("show dnodes") + for dnode in tdSql.queryResult: + name = dnode[1] + info = dnode + self.dnode_list[name] = info + + count = 0 + is_leader = False + mnode_name = '' + for k,v in self.mnode_list.items(): + count +=1 + # only for 1 mnode + mnode_name = k + + if v[2] =='leader': + is_leader=True + + if count==1 and is_leader: + tdLog.info("===== depoly cluster success with 1 mnode as leader =====") + else: + tdLog.exit("===== depoly cluster fail with 1 mnode as leader =====") + + for k ,v in self.dnode_list.items(): + if k == mnode_name: + if v[3]==0: + tdLog.info("===== depoly cluster mnode only success at {} , support_vnodes is {} ".format(mnode_name,v[3])) + else: + tdLog.exit("===== depoly cluster mnode only fail at {} , support_vnodes is {} ".format(mnode_name,v[3])) + else: + continue + + def create_db_check_vgroups(self): + + tdSql.execute("drop database if exists test") + tdSql.execute("create database if not exists test replica 1 duration 300") + tdSql.execute("use test") + tdSql.execute( + '''create table stb1 + (ts timestamp, c1 int, c2 bigint, c3 smallint, c4 tinyint, c5 float, c6 double, c7 bool, c8 binary(16),c9 nchar(32), c10 timestamp) + tags (t1 int) + ''' + ) + tdSql.execute( + ''' + create table t1 + (ts timestamp, c1 int, c2 bigint, c3 smallint, c4 tinyint, c5 float, c6 double, c7 bool, c8 binary(16),c9 nchar(32), c10 timestamp) + ''' + ) + + for i in range(5): + tdSql.execute("create table sub_tb_{} using stb1 tags({})".format(i,i)) + tdSql.query("show stables") + tdSql.checkRows(1) + tdSql.query("show tables") + tdSql.checkRows(6) + + tdSql.query("show test.vgroups;") + vgroups_infos = {} # key is id: value is info list + for vgroup_info in tdSql.queryResult: + vgroup_id = vgroup_info[0] + tmp_list = [] + for role in vgroup_info[3:-4]: + if role in ['leader','follower']: + tmp_list.append(role) + vgroups_infos[vgroup_id]=tmp_list + + for k , v in vgroups_infos.items(): + if len(v) ==1 and v[0]=="leader": + tdLog.info(" === create database replica only 1 role leader check success of vgroup_id {} ======".format(k)) + else: + tdLog.exit(" === create database replica only 1 role leader check fail of vgroup_id {} ======".format(k)) + + def create_database(self, dbname, replica_num ,vgroup_nums ): + drop_db_sql = "drop database if exists {}".format(dbname) + create_db_sql = "create database {} replica {} vgroups {}".format(dbname,replica_num,vgroup_nums) + + tdLog.info(" ==== create database {} and insert rows begin =====".format(dbname)) + tdSql.execute(drop_db_sql) + tdSql.execute(create_db_sql) + tdSql.execute("use {}".format(dbname)) + + def create_stable_insert_datas(self,dbname ,stablename , tb_nums , row_nums): + tdSql.execute("use {}".format(dbname)) + tdSql.execute( + '''create table {} + (ts timestamp, c1 int, c2 bigint, c3 smallint, c4 tinyint, c5 float, c6 double, c7 bool, c8 binary(32),c9 nchar(32), c10 timestamp) + tags (t1 int) + '''.format(stablename) + ) + + for i in range(tb_nums): + sub_tbname = "sub_{}_{}".format(stablename,i) + tdSql.execute("create table {} using {} tags({})".format(sub_tbname, stablename ,i)) + # insert datas about new database + + for row_num in range(row_nums): + ts = self.ts + self.ts_step*row_num + tdSql.execute(f"insert into {sub_tbname} values ({ts}, {row_num} ,{row_num}, 10 ,1 ,{row_num} ,{row_num},true,'bin_{row_num}','nchar_{row_num}',now) ") + + tdLog.info(" ==== stable {} insert rows execute end =====".format(stablename)) + + def append_rows_of_exists_tables(self,dbname ,stablename , tbname , append_nums ): + + tdSql.execute("use {}".format(dbname)) + + for row_num in range(append_nums): + tdSql.execute(f"insert into {tbname} values (now, {row_num} ,{row_num}, 10 ,1 ,{row_num} ,{row_num},true,'bin_{row_num}','nchar_{row_num}',now) ") + # print(f"insert into {tbname} values (now, {row_num} ,{row_num}, 10 ,1 ,{row_num} ,{row_num},true,'bin_{row_num}','nchar_{row_num}',now) ") + tdLog.info(" ==== append new rows of table {} belongs to stable {} execute end =====".format(tbname,stablename)) + os.system("taos -s 'select count(*) from {}.{}';".format(dbname,stablename)) + + def check_insert_rows(self, dbname, stablename , tb_nums , row_nums, append_rows): + + tdSql.execute("use {}".format(dbname)) + + tdSql.query("select count(*) from {}.{}".format(dbname,stablename)) + + status_OK = self.mycheckData("select count(*) from {}.{}".format(dbname,stablename) ,0 , 0 , tb_nums*row_nums+append_rows) + + count = 0 + while not status_OK : + if count > self.try_check_times: + os.system("taos -s ' show {}.vgroups;'".format(dbname)) + tdLog.exit(" ==== check insert rows failed after {} try check {} times of database {}".format(count , self.try_check_times ,dbname)) + break + time.sleep(0.1) + tdSql.query("select count(*) from {}.{}".format(dbname,stablename)) + status_OK = self.mycheckData("select count(*) from {}.{}".format(dbname,stablename) ,0 , 0 , tb_nums*row_nums+append_rows) + tdLog.info(" ==== check insert rows first failed , this is {}_th retry check rows of database {}".format(count , dbname)) + count += 1 + + tdSql.query("select distinct tbname from {}.{}".format(dbname,stablename)) + status_OK = self.mycheckRows("select distinct tbname from {}.{}".format(dbname,stablename) ,tb_nums) + count = 0 + while not status_OK : + if count > self.try_check_times: + os.system("taos -s ' show {}.vgroups;'".format(dbname)) + tdLog.exit(" ==== check insert rows failed after {} try check {} times of database {}".format(count , self.try_check_times ,dbname)) + break + time.sleep(0.1) + tdSql.query("select distinct tbname from {}.{}".format(dbname,stablename)) + status_OK = self.mycheckRows("select distinct tbname from {}.{}".format(dbname,stablename) ,tb_nums) + tdLog.info(" ==== check insert tbnames first failed , this is {}_th retry check tbnames of database {}".format(count , dbname)) + count += 1 + + def _get_stop_dnode_id(self,dbname): + newTdSql=tdCom.newTdSql() + newTdSql.query("show {}.vgroups".format(dbname)) + vgroup_infos = newTdSql.queryResult + for vgroup_info in vgroup_infos: + leader_infos = vgroup_info[3:-4] + # print(vgroup_info) + for ind ,role in enumerate(leader_infos): + if role =='leader': + # print(ind,leader_infos) + self.stop_dnode_id = leader_infos[ind-1] + break + + + return self.stop_dnode_id + + def wait_stop_dnode_OK(self): + + def _get_status(): + newTdSql=tdCom.newTdSql() + + status = "" + newTdSql.query("show dnodes") + dnode_infos = newTdSql.queryResult + for dnode_info in dnode_infos: + id = dnode_info[0] + dnode_status = dnode_info[4] + if id == self.stop_dnode_id: + status = dnode_status + break + return status + + status = _get_status() + while status !="offline": + time.sleep(0.1) + status = _get_status() + # tdLog.info("==== stop dnode has not been stopped , endpoint is {}".format(self.stop_dnode)) + tdLog.info("==== stop_dnode has stopped , id is {}".format(self.stop_dnode_id)) + + def wait_start_dnode_OK(self): + + def _get_status(): + newTdSql=tdCom.newTdSql() + status = "" + newTdSql.query("show dnodes") + dnode_infos = newTdSql.queryResult + for dnode_info in dnode_infos: + id = dnode_info[0] + dnode_status = dnode_info[4] + if id == self.stop_dnode_id: + status = dnode_status + break + return status + + status = _get_status() + while status !="ready": + time.sleep(0.1) + status = _get_status() + # tdLog.info("==== stop dnode has not been stopped , endpoint is {}".format(self.stop_dnode)) + tdLog.info("==== stop_dnode has restart , id is {}".format(self.stop_dnode_id)) + + def get_leader_infos(self ,dbname): + + newTdSql=tdCom.newTdSql() + newTdSql.query("show {}.vgroups".format(dbname)) + vgroup_infos = newTdSql.queryResult + + leader_infos = set() + for vgroup_info in vgroup_infos: + leader_infos.add(vgroup_info[3:-4]) + + return leader_infos + + def check_revote_leader_success(self, dbname, before_leader_infos , after_leader_infos): + check_status = False + vote_act = set(set(after_leader_infos)-set(before_leader_infos)) + if not vote_act: + print("=======before_revote_leader_infos ======\n" , before_leader_infos) + print("=======after_revote_leader_infos ======\n" , after_leader_infos) + tdLog.exit(" ===maybe revote not occured , there is no dnode offline ====") + else: + for vgroup_info in vote_act: + for ind , role in enumerate(vgroup_info): + if role==self.stop_dnode_id: + + if vgroup_info[ind+1] =="offline" and "leader" in vgroup_info: + tdLog.info(" === revote leader ok , leader is {} now ====".format(list(vgroup_info).index("leader")-1)) + check_status = True + elif vgroup_info[ind+1] !="offline": + tdLog.info(" === dnode {} should be offline ".format(self.stop_dnode_id)) + else: + continue + break + return check_status + + def sync_run_case(self): + # stop follower and insert datas , update tables and create new stables + tdDnodes=cluster.dnodes + for loop in range(self.loop_restart_times): + db_name = "sync_db_{}".format(loop) + stablename = 'stable_{}'.format(loop) + self.create_database(dbname = db_name ,replica_num= self.replica , vgroup_nums= 1) + self.create_stable_insert_datas(dbname = db_name , stablename = stablename , tb_nums= 10 ,row_nums= 10 ) + self.stop_dnode_id = self._get_stop_dnode_id(db_name) + + # check rows of datas + + self.check_insert_rows(db_name ,stablename ,tb_nums=10 , row_nums= 10 ,append_rows=0) + + # get leader info before stop + before_leader_infos = self.get_leader_infos(db_name) + + # begin stop dnode + + tdDnodes[self.stop_dnode_id-1].stoptaosd() + + self.wait_stop_dnode_OK() + + # vote leaders check + + # get leader info after stop + after_leader_infos = self.get_leader_infos(db_name) + + revote_status = self.check_revote_leader_success(db_name ,before_leader_infos , after_leader_infos) + + # append rows of stablename when dnode stop make sure revote leaders + + while not revote_status: + after_leader_infos = self.get_leader_infos(db_name) + revote_status = self.check_revote_leader_success(db_name ,before_leader_infos , after_leader_infos) + + + if revote_status: + tbname = "sub_{}_{}".format(stablename , 0) + tdLog.info(" ==== begin append rows of exists table {} when dnode {} offline ====".format(tbname , self.stop_dnode_id)) + self.append_rows_of_exists_tables(db_name ,stablename , tbname , 100 ) + tdLog.info(" ==== check append rows of exists table {} when dnode {} offline ====".format(tbname , self.stop_dnode_id)) + self.check_insert_rows(db_name ,stablename ,tb_nums=10 , row_nums= 10 ,append_rows=100) + + # create new stables + tdLog.info(" ==== create new stable {} when dnode {} offline ====".format('new_stb1' , self.stop_dnode_id)) + self.create_stable_insert_datas(dbname = db_name , stablename = 'new_stb1' , tb_nums= 10 ,row_nums= 10 ) + tdLog.info(" ==== check new stable {} when dnode {} offline ====".format('new_stb1' , self.stop_dnode_id)) + self.check_insert_rows(db_name ,'new_stb1' ,tb_nums=10 , row_nums= 10 ,append_rows=0) + else: + tdLog.info("===== leader of database {} is not ok , append rows fail =====".format(db_name)) + + # begin start dnode + start = time.time() + tdDnodes[self.stop_dnode_id-1].starttaosd() + self.wait_start_dnode_OK() + end = time.time() + time_cost = int(end -start) + if time_cost > self.max_restart_time: + tdLog.exit(" ==== restart dnode {} cost too much time , please check ====".format(self.stop_dnode_id)) + + # create new stables again + tdLog.info(" ==== create new stable {} when dnode {} restart ====".format('new_stb2' , self.stop_dnode_id)) + self.create_stable_insert_datas(dbname = db_name , stablename = 'new_stb2' , tb_nums= 10 ,row_nums= 10 ) + tdLog.info(" ==== check new stable {} when dnode {} restart ====".format('new_stb2' , self.stop_dnode_id)) + self.check_insert_rows(db_name ,'new_stb2' ,tb_nums=10 , row_nums= 10 ,append_rows=0) + + def unsync_run_case(self): + + def _restart_dnode_of_db_unsync(dbname): + + tdDnodes=cluster.dnodes + self.stop_dnode_id = self._get_stop_dnode_id(dbname) + # begin restart dnode + + # get leader info before stop + before_leader_infos = self.get_leader_infos(db_name) + + tdDnodes[self.stop_dnode_id-1].stoptaosd() + + # self.wait_stop_dnode_OK() + + # check revote leader when restart servers + # # get leader info after stop + # after_leader_infos = self.get_leader_infos(db_name) + # revote_status = self.check_revote_leader_success(db_name ,before_leader_infos , after_leader_infos) + # # append rows of stablename when dnode stop make sure revote leaders + # while not revote_status: + # after_leader_infos = self.get_leader_infos(db_name) + # revote_status = self.check_revote_leader_success(db_name ,before_leader_infos , after_leader_infos) + tdDnodes[self.stop_dnode_id-1].starttaosd() + start = time.time() + self.wait_start_dnode_OK() + end = time.time() + time_cost = int(end-start) + + if time_cost > self.max_restart_time: + tdLog.exit(" ==== restart dnode {} cost too much time , please check ====".format(self.stop_dnode_id)) + + + def _create_threading(dbname): + self.current_thread = threading.Thread(target=_restart_dnode_of_db_unsync, args=(dbname,)) + return self.current_thread + + + ''' + in this mode , it will be extra threading control start or stop dnode , insert will always going with not care follower online or alive + ''' + for loop in range(self.loop_restart_times): + db_name = "unsync_db_{}".format(loop) + stablename = 'stable_{}'.format(loop) + self.create_database(dbname = db_name ,replica_num= self.replica , vgroup_nums= 1) + self.create_stable_insert_datas(dbname = db_name , stablename = stablename , tb_nums= 10 ,row_nums= 10 ) + + tdLog.info(" ===== restart dnode of database {} in an unsync threading ===== ".format(db_name)) + + # create sync threading and start it + self.current_thread = _create_threading(db_name) + self.current_thread.start() + + # check rows of datas + self.check_insert_rows(db_name ,stablename ,tb_nums=10 , row_nums= 10 ,append_rows=0) + + tbname = "sub_{}_{}".format(stablename , 0) + tdLog.info(" ==== begin append rows of exists table {} when dnode {} offline ====".format(tbname , self.stop_dnode_id)) + self.append_rows_of_exists_tables(db_name ,stablename , tbname , 100 ) + tdLog.info(" ==== check append rows of exists table {} when dnode {} offline ====".format(tbname , self.stop_dnode_id)) + self.check_insert_rows(db_name ,stablename ,tb_nums=10 , row_nums= 10 ,append_rows=100) + + # create new stables + tdLog.info(" ==== create new stable {} when dnode {} offline ====".format('new_stb1' , self.stop_dnode_id)) + self.create_stable_insert_datas(dbname = db_name , stablename = 'new_stb1' , tb_nums= 10 ,row_nums= 10 ) + tdLog.info(" ==== check new stable {} when dnode {} offline ====".format('new_stb1' , self.stop_dnode_id)) + self.check_insert_rows(db_name ,'new_stb1' ,tb_nums=10 , row_nums= 10 ,append_rows=0) + + # create new stables again + tdLog.info(" ==== create new stable {} when dnode {} restart ====".format('new_stb2' , self.stop_dnode_id)) + self.create_stable_insert_datas(dbname = db_name , stablename = 'new_stb2' , tb_nums= 10 ,row_nums= 10 ) + tdLog.info(" ==== check new stable {} when dnode {} restart ====".format('new_stb2' , self.stop_dnode_id)) + self.check_insert_rows(db_name ,'new_stb2' ,tb_nums=10 , row_nums= 10 ,append_rows=0) + + self.current_thread.join() + + + def run(self): + + # basic insert and check of cluster + self.check_setup_cluster_status() + self.create_db_check_vgroups() + # self.sync_run_case() + self.unsync_run_case() + + + + + def stop(self): + tdSql.close() + tdLog.success(f"{__file__} successfully executed") + +tdCases.addLinux(__file__, TDTestCase()) +tdCases.addWindows(__file__, TDTestCase()) \ No newline at end of file From cdf1a72f7ca6c75cd0656b7159249bc6e3073320 Mon Sep 17 00:00:00 2001 From: "wenzhouwww@live.cn" Date: Fri, 22 Jul 2022 16:51:29 +0800 Subject: [PATCH 7/9] add test case of stop leader of an database --- ..._basic_replica3_insertdatas_stop_leader.py | 49 +++++++++---------- 1 file changed, 23 insertions(+), 26 deletions(-) diff --git a/tests/system-test/6-cluster/vnode/4dnode1mnode_basic_replica3_insertdatas_stop_leader.py b/tests/system-test/6-cluster/vnode/4dnode1mnode_basic_replica3_insertdatas_stop_leader.py index 9dde4a6382..d326a23137 100644 --- a/tests/system-test/6-cluster/vnode/4dnode1mnode_basic_replica3_insertdatas_stop_leader.py +++ b/tests/system-test/6-cluster/vnode/4dnode1mnode_basic_replica3_insertdatas_stop_leader.py @@ -33,7 +33,7 @@ class TDTestCase: self.tb_nums = 10 self.row_nums = 100 self.stop_dnode_id = None - self.loop_restart_times = 50 + self.loop_restart_times = 10 self.current_thread = None self.max_restart_time = 5 @@ -459,12 +459,29 @@ class TDTestCase: tdDnodes=cluster.dnodes self.stop_dnode_id = self._get_stop_dnode_id(dbname) # begin restart dnode - - # get leader info before stop - before_leader_infos = self.get_leader_infos(db_name) tdDnodes[self.stop_dnode_id-1].stoptaosd() + tbname = "sub_{}_{}".format(stablename , 0) + tdLog.info(" ==== begin append rows of exists table {} when dnode {} offline ====".format(tbname , self.stop_dnode_id)) + self.append_rows_of_exists_tables(db_name ,stablename , tbname , 100 ) + tdLog.info(" ==== check append rows of exists table {} when dnode {} offline ====".format(tbname , self.stop_dnode_id)) + self.check_insert_rows(db_name ,stablename ,tb_nums=10 , row_nums= 10 ,append_rows=100) + + # create new stables + tdLog.info(" ==== create new stable {} when dnode {} offline ====".format('new_stb1' , self.stop_dnode_id)) + self.create_stable_insert_datas(dbname = db_name , stablename = 'new_stb1' , tb_nums= 10 ,row_nums= 10 ) + tdLog.info(" ==== check new stable {} when dnode {} offline ====".format('new_stb1' , self.stop_dnode_id)) + self.check_insert_rows(db_name ,'new_stb1' ,tb_nums=10 , row_nums= 10 ,append_rows=0) + + # create new stables again + tdLog.info(" ==== create new stable {} when dnode {} restart ====".format('new_stb2' , self.stop_dnode_id)) + self.create_stable_insert_datas(dbname = db_name , stablename = 'new_stb2' , tb_nums= 10 ,row_nums= 10 ) + tdLog.info(" ==== check new stable {} when dnode {} restart ====".format('new_stb2' , self.stop_dnode_id)) + self.check_insert_rows(db_name ,'new_stb2' ,tb_nums=10 , row_nums= 10 ,append_rows=0) + + # # get leader info before stop + # before_leader_infos = self.get_leader_infos(db_name) # self.wait_stop_dnode_OK() # check revote leader when restart servers @@ -508,23 +525,6 @@ class TDTestCase: # check rows of datas self.check_insert_rows(db_name ,stablename ,tb_nums=10 , row_nums= 10 ,append_rows=0) - tbname = "sub_{}_{}".format(stablename , 0) - tdLog.info(" ==== begin append rows of exists table {} when dnode {} offline ====".format(tbname , self.stop_dnode_id)) - self.append_rows_of_exists_tables(db_name ,stablename , tbname , 100 ) - tdLog.info(" ==== check append rows of exists table {} when dnode {} offline ====".format(tbname , self.stop_dnode_id)) - self.check_insert_rows(db_name ,stablename ,tb_nums=10 , row_nums= 10 ,append_rows=100) - - # create new stables - tdLog.info(" ==== create new stable {} when dnode {} offline ====".format('new_stb1' , self.stop_dnode_id)) - self.create_stable_insert_datas(dbname = db_name , stablename = 'new_stb1' , tb_nums= 10 ,row_nums= 10 ) - tdLog.info(" ==== check new stable {} when dnode {} offline ====".format('new_stb1' , self.stop_dnode_id)) - self.check_insert_rows(db_name ,'new_stb1' ,tb_nums=10 , row_nums= 10 ,append_rows=0) - - # create new stables again - tdLog.info(" ==== create new stable {} when dnode {} restart ====".format('new_stb2' , self.stop_dnode_id)) - self.create_stable_insert_datas(dbname = db_name , stablename = 'new_stb2' , tb_nums= 10 ,row_nums= 10 ) - tdLog.info(" ==== check new stable {} when dnode {} restart ====".format('new_stb2' , self.stop_dnode_id)) - self.check_insert_rows(db_name ,'new_stb2' ,tb_nums=10 , row_nums= 10 ,append_rows=0) self.current_thread.join() @@ -534,11 +534,8 @@ class TDTestCase: # basic insert and check of cluster self.check_setup_cluster_status() self.create_db_check_vgroups() - # self.sync_run_case() - self.unsync_run_case() - - - + self.sync_run_case() + # self.unsync_run_case() def stop(self): tdSql.close() From f9c907bd59e72ae1cfa69effe3a3acd5ff371a80 Mon Sep 17 00:00:00 2001 From: "wenzhouwww@live.cn" Date: Fri, 22 Jul 2022 18:47:23 +0800 Subject: [PATCH 8/9] add test case for stop follower by kill -9 --- ...replica3_insertdatas_stop_follower_sync.py | 16 +- ...plica3_insertdatas_stop_follower_unsync.py | 17 +- ...rtdatas_stop_follower_unsync_force_stop.py | 521 ++++++++++++++++++ ..._basic_replica3_insertdatas_stop_leader.py | 17 +- 4 files changed, 565 insertions(+), 6 deletions(-) create mode 100644 tests/system-test/6-cluster/vnode/4dnode1mnode_basic_replica3_insertdatas_stop_follower_unsync_force_stop.py diff --git a/tests/system-test/6-cluster/vnode/4dnode1mnode_basic_replica3_insertdatas_stop_follower_sync.py b/tests/system-test/6-cluster/vnode/4dnode1mnode_basic_replica3_insertdatas_stop_follower_sync.py index 5db1ced199..22d3ba6dbc 100644 --- a/tests/system-test/6-cluster/vnode/4dnode1mnode_basic_replica3_insertdatas_stop_follower_sync.py +++ b/tests/system-test/6-cluster/vnode/4dnode1mnode_basic_replica3_insertdatas_stop_follower_sync.py @@ -179,22 +179,32 @@ class TDTestCase: tdSql.query("select count(*) from {}.{}".format(dbname,stablename)) + while not tdSql.queryResult: + time.sleep(0.1) + tdSql.query("select count(*) from {}.{}".format(dbname,stablename)) + status_OK = self.mycheckData("select count(*) from {}.{}".format(dbname,stablename) ,0 , 0 , tb_nums*row_nums+append_rows) count = 0 while not status_OK : if count > self.try_check_times: - os.system("taos -s ' show {}.vgroups;'".format(dbname)) + os.system("taos -s ' show {}.vgroups; '".format(dbname)) tdLog.exit(" ==== check insert rows failed after {} try check {} times of database {}".format(count , self.try_check_times ,dbname)) break time.sleep(0.1) tdSql.query("select count(*) from {}.{}".format(dbname,stablename)) + while not tdSql.queryResult: + time.sleep(0.1) + tdSql.query("select count(*) from {}.{}".format(dbname,stablename)) status_OK = self.mycheckData("select count(*) from {}.{}".format(dbname,stablename) ,0 , 0 , tb_nums*row_nums+append_rows) tdLog.info(" ==== check insert rows first failed , this is {}_th retry check rows of database {}".format(count , dbname)) count += 1 tdSql.query("select distinct tbname from {}.{}".format(dbname,stablename)) + while not tdSql.queryResult: + time.sleep(0.1) + tdSql.query("select distinct tbname from {}.{}".format(dbname,stablename)) status_OK = self.mycheckRows("select distinct tbname from {}.{}".format(dbname,stablename) ,tb_nums) count = 0 while not status_OK : @@ -204,10 +214,12 @@ class TDTestCase: break time.sleep(0.1) tdSql.query("select distinct tbname from {}.{}".format(dbname,stablename)) + while not tdSql.queryResult: + time.sleep(0.1) + tdSql.query("select distinct tbname from {}.{}".format(dbname,stablename)) status_OK = self.mycheckRows("select distinct tbname from {}.{}".format(dbname,stablename) ,tb_nums) tdLog.info(" ==== check insert tbnames first failed , this is {}_th retry check tbnames of database {}".format(count , dbname)) count += 1 - def _get_stop_dnode_id(self,dbname): tdSql.query("show {}.vgroups".format(dbname)) vgroup_infos = tdSql.queryResult diff --git a/tests/system-test/6-cluster/vnode/4dnode1mnode_basic_replica3_insertdatas_stop_follower_unsync.py b/tests/system-test/6-cluster/vnode/4dnode1mnode_basic_replica3_insertdatas_stop_follower_unsync.py index 854a28b48a..28198b9529 100644 --- a/tests/system-test/6-cluster/vnode/4dnode1mnode_basic_replica3_insertdatas_stop_follower_unsync.py +++ b/tests/system-test/6-cluster/vnode/4dnode1mnode_basic_replica3_insertdatas_stop_follower_unsync.py @@ -174,11 +174,15 @@ class TDTestCase: os.system("taos -s 'select count(*) from {}.{}';".format(dbname,stablename)) def check_insert_rows(self, dbname, stablename , tb_nums , row_nums, append_rows): - + tdSql.execute("use {}".format(dbname)) tdSql.query("select count(*) from {}.{}".format(dbname,stablename)) + while not tdSql.queryResult: + time.sleep(0.1) + tdSql.query("select count(*) from {}.{}".format(dbname,stablename)) + status_OK = self.mycheckData("select count(*) from {}.{}".format(dbname,stablename) ,0 , 0 , tb_nums*row_nums+append_rows) count = 0 @@ -189,12 +193,18 @@ class TDTestCase: break time.sleep(0.1) tdSql.query("select count(*) from {}.{}".format(dbname,stablename)) + while not tdSql.queryResult: + time.sleep(0.1) + tdSql.query("select count(*) from {}.{}".format(dbname,stablename)) status_OK = self.mycheckData("select count(*) from {}.{}".format(dbname,stablename) ,0 , 0 , tb_nums*row_nums+append_rows) tdLog.info(" ==== check insert rows first failed , this is {}_th retry check rows of database {}".format(count , dbname)) count += 1 tdSql.query("select distinct tbname from {}.{}".format(dbname,stablename)) + while not tdSql.queryResult: + time.sleep(0.1) + tdSql.query("select distinct tbname from {}.{}".format(dbname,stablename)) status_OK = self.mycheckRows("select distinct tbname from {}.{}".format(dbname,stablename) ,tb_nums) count = 0 while not status_OK : @@ -204,10 +214,13 @@ class TDTestCase: break time.sleep(0.1) tdSql.query("select distinct tbname from {}.{}".format(dbname,stablename)) + while not tdSql.queryResult: + time.sleep(0.1) + tdSql.query("select distinct tbname from {}.{}".format(dbname,stablename)) status_OK = self.mycheckRows("select distinct tbname from {}.{}".format(dbname,stablename) ,tb_nums) tdLog.info(" ==== check insert tbnames first failed , this is {}_th retry check tbnames of database {}".format(count , dbname)) count += 1 - + def _get_stop_dnode_id(self,dbname): tdSql.query("show {}.vgroups".format(dbname)) vgroup_infos = tdSql.queryResult diff --git a/tests/system-test/6-cluster/vnode/4dnode1mnode_basic_replica3_insertdatas_stop_follower_unsync_force_stop.py b/tests/system-test/6-cluster/vnode/4dnode1mnode_basic_replica3_insertdatas_stop_follower_unsync_force_stop.py new file mode 100644 index 0000000000..83faba4578 --- /dev/null +++ b/tests/system-test/6-cluster/vnode/4dnode1mnode_basic_replica3_insertdatas_stop_follower_unsync_force_stop.py @@ -0,0 +1,521 @@ +# author : wenzhouwww +from ssl import ALERT_DESCRIPTION_CERTIFICATE_UNOBTAINABLE +import taos +import sys +import time +import os + +from util.log import * +from util.sql import * +from util.cases import * +from util.dnodes import TDDnodes +from util.dnodes import TDDnode +from util.cluster import * + +import datetime +import inspect +import time +import socket +import subprocess +import threading +sys.path.append(os.path.dirname(__file__)) + +class TDTestCase: + def init(self,conn ,logSql): + tdLog.debug(f"start to excute {__file__}") + tdSql.init(conn.cursor()) + self.host = socket.gethostname() + self.mnode_list = {} + self.dnode_list = {} + self.ts = 1483200000000 + self.ts_step =1000 + self.db_name ='testdb' + self.replica = 3 + self.vgroups = 1 + self.tb_nums = 10 + self.row_nums = 100 + self.stop_dnode_id = None + self.loop_restart_times = 5 + self.current_thread = None + self.max_restart_time = 10 + self.try_check_times = 10 + + def getBuildPath(self): + selfPath = os.path.dirname(os.path.realpath(__file__)) + if ("community" in selfPath): + projPath = selfPath[:selfPath.find("community")] + else: + projPath = selfPath[:selfPath.find("tests")] + + for root, dirs, files in os.walk(projPath): + if ("taosd" in files): + rootRealPath = os.path.dirname(os.path.realpath(root)) + if ("packaging" not in rootRealPath): + buildPath = root[:len(root) - len("/build/bin")] + break + return buildPath + + def check_setup_cluster_status(self): + tdSql.query("show mnodes") + for mnode in tdSql.queryResult: + name = mnode[1] + info = mnode + self.mnode_list[name] = info + + tdSql.query("show dnodes") + for dnode in tdSql.queryResult: + name = dnode[1] + info = dnode + self.dnode_list[name] = info + + count = 0 + is_leader = False + mnode_name = '' + for k,v in self.mnode_list.items(): + count +=1 + # only for 1 mnode + mnode_name = k + + if v[2] =='leader': + is_leader=True + + if count==1 and is_leader: + tdLog.info("===== depoly cluster success with 1 mnode as leader =====") + else: + tdLog.exit("===== depoly cluster fail with 1 mnode as leader =====") + + for k ,v in self.dnode_list.items(): + if k == mnode_name: + if v[3]==0: + tdLog.info("===== depoly cluster mnode only success at {} , support_vnodes is {} ".format(mnode_name,v[3])) + else: + tdLog.exit("===== depoly cluster mnode only fail at {} , support_vnodes is {} ".format(mnode_name,v[3])) + else: + continue + + def create_db_check_vgroups(self): + + tdSql.execute("drop database if exists test") + tdSql.execute("create database if not exists test replica 1 duration 300") + tdSql.execute("use test") + tdSql.execute( + '''create table stb1 + (ts timestamp, c1 int, c2 bigint, c3 smallint, c4 tinyint, c5 float, c6 double, c7 bool, c8 binary(16),c9 nchar(32), c10 timestamp) + tags (t1 int) + ''' + ) + tdSql.execute( + ''' + create table t1 + (ts timestamp, c1 int, c2 bigint, c3 smallint, c4 tinyint, c5 float, c6 double, c7 bool, c8 binary(16),c9 nchar(32), c10 timestamp) + ''' + ) + + for i in range(5): + tdSql.execute("create table sub_tb_{} using stb1 tags({})".format(i,i)) + tdSql.query("show stables") + tdSql.checkRows(1) + tdSql.query("show tables") + tdSql.checkRows(6) + + tdSql.query("show test.vgroups;") + vgroups_infos = {} # key is id: value is info list + for vgroup_info in tdSql.queryResult: + vgroup_id = vgroup_info[0] + tmp_list = [] + for role in vgroup_info[3:-4]: + if role in ['leader','follower']: + tmp_list.append(role) + vgroups_infos[vgroup_id]=tmp_list + + for k , v in vgroups_infos.items(): + if len(v) ==1 and v[0]=="leader": + tdLog.info(" === create database replica only 1 role leader check success of vgroup_id {} ======".format(k)) + else: + tdLog.exit(" === create database replica only 1 role leader check fail of vgroup_id {} ======".format(k)) + + def create_database(self, dbname, replica_num ,vgroup_nums ): + drop_db_sql = "drop database if exists {}".format(dbname) + create_db_sql = "create database {} replica {} vgroups {}".format(dbname,replica_num,vgroup_nums) + + tdLog.info(" ==== create database {} and insert rows begin =====".format(dbname)) + tdSql.execute(drop_db_sql) + tdSql.execute(create_db_sql) + tdSql.execute("use {}".format(dbname)) + + def create_stable_insert_datas(self,dbname ,stablename , tb_nums , row_nums): + tdSql.execute("use {}".format(dbname)) + tdSql.execute( + '''create table {} + (ts timestamp, c1 int, c2 bigint, c3 smallint, c4 tinyint, c5 float, c6 double, c7 bool, c8 binary(32),c9 nchar(32), c10 timestamp) + tags (t1 int) + '''.format(stablename) + ) + + for i in range(tb_nums): + sub_tbname = "sub_{}_{}".format(stablename,i) + tdSql.execute("create table {} using {} tags({})".format(sub_tbname, stablename ,i)) + # insert datas about new database + + for row_num in range(row_nums): + ts = self.ts + self.ts_step*row_num + tdSql.execute(f"insert into {sub_tbname} values ({ts}, {row_num} ,{row_num}, 10 ,1 ,{row_num} ,{row_num},true,'bin_{row_num}','nchar_{row_num}',now) ") + + tdLog.info(" ==== stable {} insert rows execute end =====".format(stablename)) + + def append_rows_of_exists_tables(self,dbname ,stablename , tbname , append_nums ): + + tdSql.execute("use {}".format(dbname)) + + for row_num in range(append_nums): + tdSql.execute(f"insert into {tbname} values (now, {row_num} ,{row_num}, 10 ,1 ,{row_num} ,{row_num},true,'bin_{row_num}','nchar_{row_num}',now) ") + # print(f"insert into {tbname} values (now, {row_num} ,{row_num}, 10 ,1 ,{row_num} ,{row_num},true,'bin_{row_num}','nchar_{row_num}',now) ") + tdLog.info(" ==== append new rows of table {} belongs to stable {} execute end =====".format(tbname,stablename)) + os.system("taos -s 'select count(*) from {}.{}';".format(dbname,stablename)) + + def check_insert_rows(self, dbname, stablename , tb_nums , row_nums, append_rows): + + tdSql.execute("use {}".format(dbname)) + + tdSql.query("select count(*) from {}.{}".format(dbname,stablename)) + + while not tdSql.queryResult: + time.sleep(0.1) + tdSql.query("select count(*) from {}.{}".format(dbname,stablename)) + + status_OK = self.mycheckData("select count(*) from {}.{}".format(dbname,stablename) ,0 , 0 , tb_nums*row_nums+append_rows) + + count = 0 + while not status_OK : + if count > self.try_check_times: + os.system("taos -s ' show {}.vgroups; '".format(dbname)) + tdLog.exit(" ==== check insert rows failed after {} try check {} times of database {}".format(count , self.try_check_times ,dbname)) + break + time.sleep(0.1) + tdSql.query("select count(*) from {}.{}".format(dbname,stablename)) + while not tdSql.queryResult: + time.sleep(0.1) + tdSql.query("select count(*) from {}.{}".format(dbname,stablename)) + status_OK = self.mycheckData("select count(*) from {}.{}".format(dbname,stablename) ,0 , 0 , tb_nums*row_nums+append_rows) + tdLog.info(" ==== check insert rows first failed , this is {}_th retry check rows of database {}".format(count , dbname)) + count += 1 + + + tdSql.query("select distinct tbname from {}.{}".format(dbname,stablename)) + while not tdSql.queryResult: + time.sleep(0.1) + tdSql.query("select distinct tbname from {}.{}".format(dbname,stablename)) + status_OK = self.mycheckRows("select distinct tbname from {}.{}".format(dbname,stablename) ,tb_nums) + count = 0 + while not status_OK : + if count > self.try_check_times: + os.system("taos -s ' show {}.vgroups;'".format(dbname)) + tdLog.exit(" ==== check insert rows failed after {} try check {} times of database {}".format(count , self.try_check_times ,dbname)) + break + time.sleep(0.1) + tdSql.query("select distinct tbname from {}.{}".format(dbname,stablename)) + while not tdSql.queryResult: + time.sleep(0.1) + tdSql.query("select distinct tbname from {}.{}".format(dbname,stablename)) + status_OK = self.mycheckRows("select distinct tbname from {}.{}".format(dbname,stablename) ,tb_nums) + tdLog.info(" ==== check insert tbnames first failed , this is {}_th retry check tbnames of database {}".format(count , dbname)) + count += 1 + + def _get_stop_dnode_id(self,dbname): + tdSql.query("show {}.vgroups".format(dbname)) + vgroup_infos = tdSql.queryResult + for vgroup_info in vgroup_infos: + leader_infos = vgroup_info[3:-4] + # print(vgroup_info) + for ind ,role in enumerate(leader_infos): + if role =='follower': + # print(ind,leader_infos) + self.stop_dnode_id = leader_infos[ind-1] + break + + + return self.stop_dnode_id + + def wait_stop_dnode_OK(self): + + def _get_status(): + newTdSql=tdCom.newTdSql() + + status = "" + newTdSql.query("show dnodes") + dnode_infos = newTdSql.queryResult + for dnode_info in dnode_infos: + id = dnode_info[0] + dnode_status = dnode_info[4] + if id == self.stop_dnode_id: + status = dnode_status + break + return status + + status = _get_status() + while status !="offline": + time.sleep(0.1) + status = _get_status() + # tdLog.info("==== stop dnode has not been stopped , endpoint is {}".format(self.stop_dnode)) + tdLog.info("==== stop_dnode has stopped , id is {}".format(self.stop_dnode_id)) + + def wait_start_dnode_OK(self): + + def _get_status(): + newTdSql=tdCom.newTdSql() + status = "" + newTdSql.query("show dnodes") + dnode_infos = newTdSql.queryResult + for dnode_info in dnode_infos: + id = dnode_info[0] + dnode_status = dnode_info[4] + if id == self.stop_dnode_id: + status = dnode_status + break + return status + + status = _get_status() + while status !="ready": + time.sleep(0.1) + status = _get_status() + # tdLog.info("==== stop dnode has not been stopped , endpoint is {}".format(self.stop_dnode)) + tdLog.info("==== stop_dnode has restart , id is {}".format(self.stop_dnode_id)) + + def _parse_datetime(self,timestr): + try: + return datetime.datetime.strptime(timestr, '%Y-%m-%d %H:%M:%S.%f') + except ValueError: + pass + try: + return datetime.datetime.strptime(timestr, '%Y-%m-%d %H:%M:%S') + except ValueError: + pass + + def mycheckRowCol(self, sql, row, col): + caller = inspect.getframeinfo(inspect.stack()[2][0]) + if row < 0: + args = (caller.filename, caller.lineno, sql, row) + tdLog.exit("%s(%d) failed: sql:%s, row:%d is smaller than zero" % args) + if col < 0: + args = (caller.filename, caller.lineno, sql, row) + tdLog.exit("%s(%d) failed: sql:%s, col:%d is smaller than zero" % args) + if row > tdSql.queryRows: + args = (caller.filename, caller.lineno, sql, row, tdSql.queryRows) + tdLog.exit("%s(%d) failed: sql:%s, row:%d is larger than queryRows:%d" % args) + if col > tdSql.queryCols: + args = (caller.filename, caller.lineno, sql, col, tdSql.queryCols) + tdLog.exit("%s(%d) failed: sql:%s, col:%d is larger than queryCols:%d" % args) + + def mycheckData(self, sql ,row, col, data): + check_status = True + self.mycheckRowCol(sql ,row, col) + if tdSql.queryResult[row][col] != data: + if tdSql.cursor.istype(col, "TIMESTAMP"): + # suppose user want to check nanosecond timestamp if a longer data passed + if (len(data) >= 28): + if pd.to_datetime(tdSql.queryResult[row][col]) == pd.to_datetime(data): + tdLog.info("sql:%s, row:%d col:%d data:%d == expect:%s" % + (sql, row, col, tdSql.queryResult[row][col], data)) + else: + if tdSql.queryResult[row][col] == self._parse_datetime(data): + tdLog.info("sql:%s, row:%d col:%d data:%s == expect:%s" % + (sql, row, col, tdSql.queryResult[row][col], data)) + return + + if str(tdSql.queryResult[row][col]) == str(data): + tdLog.info("sql:%s, row:%d col:%d data:%s == expect:%s" % + (sql, row, col, tdSql.queryResult[row][col], data)) + return + elif isinstance(data, float) and abs(tdSql.queryResult[row][col] - data) <= 0.000001: + tdLog.info("sql:%s, row:%d col:%d data:%f == expect:%f" % + (sql, row, col, tdSql.queryResult[row][col], data)) + return + else: + caller = inspect.getframeinfo(inspect.stack()[1][0]) + args = (caller.filename, caller.lineno, sql, row, col, tdSql.queryResult[row][col], data) + tdLog.info("%s(%d) failed: sql:%s row:%d col:%d data:%s != expect:%s" % args) + + check_status = False + + if data is None: + tdLog.info("sql:%s, row:%d col:%d data:%s == expect:%s" % + (sql, row, col, tdSql.queryResult[row][col], data)) + elif isinstance(data, str): + tdLog.info("sql:%s, row:%d col:%d data:%s == expect:%s" % + (sql, row, col, tdSql.queryResult[row][col], data)) + elif isinstance(data, datetime.date): + tdLog.info("sql:%s, row:%d col:%d data:%s == expect:%s" % + (sql, row, col, tdSql.queryResult[row][col], data)) + elif isinstance(data, float): + tdLog.info("sql:%s, row:%d col:%d data:%s == expect:%s" % + (sql, row, col, tdSql.queryResult[row][col], data)) + else: + tdLog.info("sql:%s, row:%d col:%d data:%s == expect:%d" % + (sql, row, col, tdSql.queryResult[row][col], data)) + + return check_status + + def mycheckRows(self, sql, expectRows): + check_status = True + if len(tdSql.queryResult) == expectRows: + tdLog.info("sql:%s, queryRows:%d == expect:%d" % (sql, len(tdSql.queryResult), expectRows)) + return True + else: + caller = inspect.getframeinfo(inspect.stack()[1][0]) + args = (caller.filename, caller.lineno, sql, len(tdSql.queryResult), expectRows) + tdLog.info("%s(%d) failed: sql:%s, queryRows:%d != expect:%d" % args) + check_status = False + return check_status + + def sync_run_case(self): + # stop follower and insert datas , update tables and create new stables + tdDnodes=cluster.dnodes + for loop in range(self.loop_restart_times): + db_name = "sync_db_{}".format(loop) + stablename = 'stable_{}'.format(loop) + self.create_database(dbname = db_name ,replica_num= self.replica , vgroup_nums= 1) + self.create_stable_insert_datas(dbname = db_name , stablename = stablename , tb_nums= 10 ,row_nums= 10 ) + self.stop_dnode_id = self._get_stop_dnode_id(db_name) + + # check rows of datas + + self.check_insert_rows(db_name ,stablename ,tb_nums=10 , row_nums= 10 ,append_rows=0) + + # begin stop dnode + start = time.time() + tdDnodes[self.stop_dnode_id-1].forcestop() + + self.wait_stop_dnode_OK() + + # append rows of stablename when dnode stop + + tbname = "sub_{}_{}".format(stablename , 0) + tdLog.info(" ==== begin append rows of exists table {} when dnode {} offline ====".format(tbname , self.stop_dnode_id)) + self.append_rows_of_exists_tables(db_name ,stablename , tbname , 100 ) + tdLog.info(" ==== check append rows of exists table {} when dnode {} offline ====".format(tbname , self.stop_dnode_id)) + self.check_insert_rows(db_name ,stablename ,tb_nums=10 , row_nums= 10 ,append_rows=100) + + # create new stables + tdLog.info(" ==== create new stable {} when dnode {} offline ====".format('new_stb1' , self.stop_dnode_id)) + self.create_stable_insert_datas(dbname = db_name , stablename = 'new_stb1' , tb_nums= 10 ,row_nums= 10 ) + tdLog.info(" ==== check new stable {} when dnode {} offline ====".format('new_stb1' , self.stop_dnode_id)) + self.check_insert_rows(db_name ,'new_stb1' ,tb_nums=10 , row_nums= 10 ,append_rows=0) + + # begin start dnode + tdDnodes[self.stop_dnode_id-1].starttaosd() + self.wait_start_dnode_OK() + end = time.time() + time_cost = int(end -start) + if time_cost > self.max_restart_time: + tdLog.exit(" ==== restart dnode {} cost too much time , please check ====".format(self.stop_dnode_id)) + + # create new stables again + tdLog.info(" ==== create new stable {} when dnode {} restart ====".format('new_stb2' , self.stop_dnode_id)) + self.create_stable_insert_datas(dbname = db_name , stablename = 'new_stb2' , tb_nums= 10 ,row_nums= 10 ) + tdLog.info(" ==== check new stable {} when dnode {} restart ====".format('new_stb2' , self.stop_dnode_id)) + self.check_insert_rows(db_name ,'new_stb2' ,tb_nums=10 , row_nums= 10 ,append_rows=0) + + def unsync_run_case(self): + + def _restart_dnode_of_db_unsync(dbname): + start = time.time() + tdDnodes=cluster.dnodes + self.stop_dnode_id = self._get_stop_dnode_id(dbname) + while not self.stop_dnode_id: + time.sleep(0.5) + self.stop_dnode_id = self._get_stop_dnode_id(dbname) + # begin restart dnode + + # force stop taosd by kill -9 + self.force_stop_dnode(self.stop_dnode_id) + self.wait_stop_dnode_OK() + tdDnodes[self.stop_dnode_id-1].starttaosd() + self.wait_start_dnode_OK() + end = time.time() + time_cost = int(end-start) + + if time_cost > self.max_restart_time: + tdLog.exit(" ==== restart dnode {} cost too much time , please check ====".format(self.stop_dnode_id)) + + + def _create_threading(dbname): + self.current_thread = threading.Thread(target=_restart_dnode_of_db_unsync, args=(dbname,)) + return self.current_thread + + + ''' + in this mode , it will be extra threading control start or stop dnode , insert will always going with not care follower online or alive + ''' + tdDnodes=cluster.dnodes + for loop in range(self.loop_restart_times): + db_name = "unsync_db_{}".format(loop) + stablename = 'stable_{}'.format(loop) + self.create_database(dbname = db_name ,replica_num= self.replica , vgroup_nums= 1) + self.create_stable_insert_datas(dbname = db_name , stablename = stablename , tb_nums= 10 ,row_nums= 10 ) + + tdLog.info(" ===== restart dnode of database {} in an unsync threading ===== ".format(db_name)) + + # create sync threading and start it + self.current_thread = _create_threading(db_name) + self.current_thread.start() + + # check rows of datas + self.check_insert_rows(db_name ,stablename ,tb_nums=10 , row_nums= 10 ,append_rows=0) + + tbname = "sub_{}_{}".format(stablename , 0) + tdLog.info(" ==== begin append rows of exists table {} when dnode {} offline ====".format(tbname , self.stop_dnode_id)) + self.append_rows_of_exists_tables(db_name ,stablename , tbname , 100 ) + tdLog.info(" ==== check append rows of exists table {} when dnode {} offline ====".format(tbname , self.stop_dnode_id)) + self.check_insert_rows(db_name ,stablename ,tb_nums=10 , row_nums= 10 ,append_rows=100) + + # create new stables + tdLog.info(" ==== create new stable {} when dnode {} offline ====".format('new_stb1' , self.stop_dnode_id)) + self.create_stable_insert_datas(dbname = db_name , stablename = 'new_stb1' , tb_nums= 10 ,row_nums= 10 ) + tdLog.info(" ==== check new stable {} when dnode {} offline ====".format('new_stb1' , self.stop_dnode_id)) + self.check_insert_rows(db_name ,'new_stb1' ,tb_nums=10 , row_nums= 10 ,append_rows=0) + + # create new stables again + tdLog.info(" ==== create new stable {} when dnode {} restart ====".format('new_stb2' , self.stop_dnode_id)) + self.create_stable_insert_datas(dbname = db_name , stablename = 'new_stb2' , tb_nums= 10 ,row_nums= 10 ) + tdLog.info(" ==== check new stable {} when dnode {} restart ====".format('new_stb2' , self.stop_dnode_id)) + self.check_insert_rows(db_name ,'new_stb2' ,tb_nums=10 , row_nums= 10 ,append_rows=0) + + self.current_thread.join() + + def force_stop_dnode(self, dnode_id ): + + tdSql.query("show dnodes") + port = None + for dnode_info in tdSql.queryResult: + if dnode_id == dnode_info[0]: + port = dnode_info[1].split(":")[-1] + break + else: + continue + if port: + tdLog.info(" ==== dnode {} will be force stop by kill -9 ====".format(dnode_id)) + psCmd = '''netstat -anp|grep -w LISTEN|grep -w %s |grep -o "LISTEN.*"|awk '{print $2}'|cut -d/ -f1|head -n1''' %(port) + processID = subprocess.check_output( + psCmd, shell=True).decode("utf-8") + ps_kill_taosd = ''' kill -9 {} '''.format(processID) + # print(ps_kill_taosd) + os.system(ps_kill_taosd) + + + def run(self): + + # basic insert and check of cluster + self.check_setup_cluster_status() + self.create_db_check_vgroups() + # self.sync_run_case() + self.unsync_run_case() + + + + + def stop(self): + tdSql.close() + tdLog.success(f"{__file__} successfully executed") + +tdCases.addLinux(__file__, TDTestCase()) +tdCases.addWindows(__file__, TDTestCase()) \ No newline at end of file diff --git a/tests/system-test/6-cluster/vnode/4dnode1mnode_basic_replica3_insertdatas_stop_leader.py b/tests/system-test/6-cluster/vnode/4dnode1mnode_basic_replica3_insertdatas_stop_leader.py index d326a23137..47f0ffb61f 100644 --- a/tests/system-test/6-cluster/vnode/4dnode1mnode_basic_replica3_insertdatas_stop_leader.py +++ b/tests/system-test/6-cluster/vnode/4dnode1mnode_basic_replica3_insertdatas_stop_leader.py @@ -262,21 +262,32 @@ class TDTestCase: tdSql.query("select count(*) from {}.{}".format(dbname,stablename)) + while not tdSql.queryResult: + time.sleep(0.1) + tdSql.query("select count(*) from {}.{}".format(dbname,stablename)) + status_OK = self.mycheckData("select count(*) from {}.{}".format(dbname,stablename) ,0 , 0 , tb_nums*row_nums+append_rows) count = 0 while not status_OK : if count > self.try_check_times: - os.system("taos -s ' show {}.vgroups;'".format(dbname)) + os.system("taos -s ' show {}.vgroups; '".format(dbname)) tdLog.exit(" ==== check insert rows failed after {} try check {} times of database {}".format(count , self.try_check_times ,dbname)) break time.sleep(0.1) tdSql.query("select count(*) from {}.{}".format(dbname,stablename)) + while not tdSql.queryResult: + time.sleep(0.1) + tdSql.query("select count(*) from {}.{}".format(dbname,stablename)) status_OK = self.mycheckData("select count(*) from {}.{}".format(dbname,stablename) ,0 , 0 , tb_nums*row_nums+append_rows) tdLog.info(" ==== check insert rows first failed , this is {}_th retry check rows of database {}".format(count , dbname)) count += 1 + tdSql.query("select distinct tbname from {}.{}".format(dbname,stablename)) + while not tdSql.queryResult: + time.sleep(0.1) + tdSql.query("select distinct tbname from {}.{}".format(dbname,stablename)) status_OK = self.mycheckRows("select distinct tbname from {}.{}".format(dbname,stablename) ,tb_nums) count = 0 while not status_OK : @@ -286,10 +297,12 @@ class TDTestCase: break time.sleep(0.1) tdSql.query("select distinct tbname from {}.{}".format(dbname,stablename)) + while not tdSql.queryResult: + time.sleep(0.1) + tdSql.query("select distinct tbname from {}.{}".format(dbname,stablename)) status_OK = self.mycheckRows("select distinct tbname from {}.{}".format(dbname,stablename) ,tb_nums) tdLog.info(" ==== check insert tbnames first failed , this is {}_th retry check tbnames of database {}".format(count , dbname)) count += 1 - def _get_stop_dnode_id(self,dbname): newTdSql=tdCom.newTdSql() newTdSql.query("show {}.vgroups".format(dbname)) From 950cf5dc35f013a8ed50664d97abc0e5ad5d9c2e Mon Sep 17 00:00:00 2001 From: "wenzhouwww@live.cn" Date: Fri, 22 Jul 2022 19:09:54 +0800 Subject: [PATCH 9/9] add case about force stop leader of cluster --- ..._basic_replica3_insertdatas_stop_leader.py | 2 +- ...ca3_insertdatas_stop_leader_forece_stop.py | 580 ++++++++++++++++++ 2 files changed, 581 insertions(+), 1 deletion(-) create mode 100644 tests/system-test/6-cluster/vnode/4dnode1mnode_basic_replica3_insertdatas_stop_leader_forece_stop.py diff --git a/tests/system-test/6-cluster/vnode/4dnode1mnode_basic_replica3_insertdatas_stop_leader.py b/tests/system-test/6-cluster/vnode/4dnode1mnode_basic_replica3_insertdatas_stop_leader.py index 47f0ffb61f..c0aafa7978 100644 --- a/tests/system-test/6-cluster/vnode/4dnode1mnode_basic_replica3_insertdatas_stop_leader.py +++ b/tests/system-test/6-cluster/vnode/4dnode1mnode_basic_replica3_insertdatas_stop_leader.py @@ -389,7 +389,7 @@ class TDTestCase: if role==self.stop_dnode_id: if vgroup_info[ind+1] =="offline" and "leader" in vgroup_info: - tdLog.info(" === revote leader ok , leader is {} now ====".format(list(vgroup_info).index("leader")-1)) + tdLog.info(" === revote leader ok , leader is {} now ====".format(vgroup_info[list(vgroup_info).index("leader")-1])) check_status = True elif vgroup_info[ind+1] !="offline": tdLog.info(" === dnode {} should be offline ".format(self.stop_dnode_id)) diff --git a/tests/system-test/6-cluster/vnode/4dnode1mnode_basic_replica3_insertdatas_stop_leader_forece_stop.py b/tests/system-test/6-cluster/vnode/4dnode1mnode_basic_replica3_insertdatas_stop_leader_forece_stop.py new file mode 100644 index 0000000000..d6edfda770 --- /dev/null +++ b/tests/system-test/6-cluster/vnode/4dnode1mnode_basic_replica3_insertdatas_stop_leader_forece_stop.py @@ -0,0 +1,580 @@ +# author : wenzhouwww +from ssl import ALERT_DESCRIPTION_CERTIFICATE_UNOBTAINABLE +import taos +import sys +import time +import os + +from util.log import * +from util.sql import * +from util.cases import * +from util.dnodes import TDDnodes +from util.dnodes import TDDnode +from util.cluster import * + +import time +import socket +import subprocess +import threading +sys.path.append(os.path.dirname(__file__)) + +class TDTestCase: + def init(self,conn ,logSql): + tdLog.debug(f"start to excute {__file__}") + tdSql.init(conn.cursor()) + self.host = socket.gethostname() + self.mnode_list = {} + self.dnode_list = {} + self.ts = 1483200000000 + self.ts_step =1000 + self.db_name ='testdb' + self.replica = 3 + self.vgroups = 1 + self.tb_nums = 10 + self.row_nums = 100 + self.stop_dnode_id = None + self.loop_restart_times = 10 + self.current_thread = None + self.max_restart_time = 5 + + def getBuildPath(self): + selfPath = os.path.dirname(os.path.realpath(__file__)) + if ("community" in selfPath): + projPath = selfPath[:selfPath.find("community")] + else: + projPath = selfPath[:selfPath.find("tests")] + + for root, dirs, files in os.walk(projPath): + if ("taosd" in files): + rootRealPath = os.path.dirname(os.path.realpath(root)) + if ("packaging" not in rootRealPath): + buildPath = root[:len(root) - len("/build/bin")] + break + return buildPath + + def _parse_datetime(self,timestr): + try: + return datetime.datetime.strptime(timestr, '%Y-%m-%d %H:%M:%S.%f') + except ValueError: + pass + try: + return datetime.datetime.strptime(timestr, '%Y-%m-%d %H:%M:%S') + except ValueError: + pass + + def mycheckRowCol(self, sql, row, col): + caller = inspect.getframeinfo(inspect.stack()[2][0]) + if row < 0: + args = (caller.filename, caller.lineno, sql, row) + tdLog.exit("%s(%d) failed: sql:%s, row:%d is smaller than zero" % args) + if col < 0: + args = (caller.filename, caller.lineno, sql, row) + tdLog.exit("%s(%d) failed: sql:%s, col:%d is smaller than zero" % args) + if row > tdSql.queryRows: + args = (caller.filename, caller.lineno, sql, row, tdSql.queryRows) + tdLog.exit("%s(%d) failed: sql:%s, row:%d is larger than queryRows:%d" % args) + if col > tdSql.queryCols: + args = (caller.filename, caller.lineno, sql, col, tdSql.queryCols) + tdLog.exit("%s(%d) failed: sql:%s, col:%d is larger than queryCols:%d" % args) + + def mycheckData(self, sql ,row, col, data): + check_status = True + self.mycheckRowCol(sql ,row, col) + if tdSql.queryResult[row][col] != data: + if tdSql.cursor.istype(col, "TIMESTAMP"): + # suppose user want to check nanosecond timestamp if a longer data passed + if (len(data) >= 28): + if pd.to_datetime(tdSql.queryResult[row][col]) == pd.to_datetime(data): + tdLog.info("sql:%s, row:%d col:%d data:%d == expect:%s" % + (sql, row, col, tdSql.queryResult[row][col], data)) + else: + if tdSql.queryResult[row][col] == self._parse_datetime(data): + tdLog.info("sql:%s, row:%d col:%d data:%s == expect:%s" % + (sql, row, col, tdSql.queryResult[row][col], data)) + return + + if str(tdSql.queryResult[row][col]) == str(data): + tdLog.info("sql:%s, row:%d col:%d data:%s == expect:%s" % + (sql, row, col, tdSql.queryResult[row][col], data)) + return + elif isinstance(data, float) and abs(tdSql.queryResult[row][col] - data) <= 0.000001: + tdLog.info("sql:%s, row:%d col:%d data:%f == expect:%f" % + (sql, row, col, tdSql.queryResult[row][col], data)) + return + else: + caller = inspect.getframeinfo(inspect.stack()[1][0]) + args = (caller.filename, caller.lineno, sql, row, col, tdSql.queryResult[row][col], data) + tdLog.info("%s(%d) failed: sql:%s row:%d col:%d data:%s != expect:%s" % args) + + check_status = False + + if data is None: + tdLog.info("sql:%s, row:%d col:%d data:%s == expect:%s" % + (sql, row, col, tdSql.queryResult[row][col], data)) + elif isinstance(data, str): + tdLog.info("sql:%s, row:%d col:%d data:%s == expect:%s" % + (sql, row, col, tdSql.queryResult[row][col], data)) + elif isinstance(data, datetime.date): + tdLog.info("sql:%s, row:%d col:%d data:%s == expect:%s" % + (sql, row, col, tdSql.queryResult[row][col], data)) + elif isinstance(data, float): + tdLog.info("sql:%s, row:%d col:%d data:%s == expect:%s" % + (sql, row, col, tdSql.queryResult[row][col], data)) + else: + tdLog.info("sql:%s, row:%d col:%d data:%s == expect:%d" % + (sql, row, col, tdSql.queryResult[row][col], data)) + + return check_status + + def mycheckRows(self, sql, expectRows): + check_status = True + if len(tdSql.queryResult) == expectRows: + tdLog.info("sql:%s, queryRows:%d == expect:%d" % (sql, len(tdSql.queryResult), expectRows)) + return True + else: + caller = inspect.getframeinfo(inspect.stack()[1][0]) + args = (caller.filename, caller.lineno, sql, len(tdSql.queryResult), expectRows) + tdLog.info("%s(%d) failed: sql:%s, queryRows:%d != expect:%d" % args) + check_status = False + return check_status + + def check_setup_cluster_status(self): + tdSql.query("show mnodes") + for mnode in tdSql.queryResult: + name = mnode[1] + info = mnode + self.mnode_list[name] = info + + tdSql.query("show dnodes") + for dnode in tdSql.queryResult: + name = dnode[1] + info = dnode + self.dnode_list[name] = info + + count = 0 + is_leader = False + mnode_name = '' + for k,v in self.mnode_list.items(): + count +=1 + # only for 1 mnode + mnode_name = k + + if v[2] =='leader': + is_leader=True + + if count==1 and is_leader: + tdLog.info("===== depoly cluster success with 1 mnode as leader =====") + else: + tdLog.exit("===== depoly cluster fail with 1 mnode as leader =====") + + for k ,v in self.dnode_list.items(): + if k == mnode_name: + if v[3]==0: + tdLog.info("===== depoly cluster mnode only success at {} , support_vnodes is {} ".format(mnode_name,v[3])) + else: + tdLog.exit("===== depoly cluster mnode only fail at {} , support_vnodes is {} ".format(mnode_name,v[3])) + else: + continue + + def create_db_check_vgroups(self): + + tdSql.execute("drop database if exists test") + tdSql.execute("create database if not exists test replica 1 duration 300") + tdSql.execute("use test") + tdSql.execute( + '''create table stb1 + (ts timestamp, c1 int, c2 bigint, c3 smallint, c4 tinyint, c5 float, c6 double, c7 bool, c8 binary(16),c9 nchar(32), c10 timestamp) + tags (t1 int) + ''' + ) + tdSql.execute( + ''' + create table t1 + (ts timestamp, c1 int, c2 bigint, c3 smallint, c4 tinyint, c5 float, c6 double, c7 bool, c8 binary(16),c9 nchar(32), c10 timestamp) + ''' + ) + + for i in range(5): + tdSql.execute("create table sub_tb_{} using stb1 tags({})".format(i,i)) + tdSql.query("show stables") + tdSql.checkRows(1) + tdSql.query("show tables") + tdSql.checkRows(6) + + tdSql.query("show test.vgroups;") + vgroups_infos = {} # key is id: value is info list + for vgroup_info in tdSql.queryResult: + vgroup_id = vgroup_info[0] + tmp_list = [] + for role in vgroup_info[3:-4]: + if role in ['leader','follower']: + tmp_list.append(role) + vgroups_infos[vgroup_id]=tmp_list + + for k , v in vgroups_infos.items(): + if len(v) ==1 and v[0]=="leader": + tdLog.info(" === create database replica only 1 role leader check success of vgroup_id {} ======".format(k)) + else: + tdLog.exit(" === create database replica only 1 role leader check fail of vgroup_id {} ======".format(k)) + + def create_database(self, dbname, replica_num ,vgroup_nums ): + drop_db_sql = "drop database if exists {}".format(dbname) + create_db_sql = "create database {} replica {} vgroups {}".format(dbname,replica_num,vgroup_nums) + + tdLog.info(" ==== create database {} and insert rows begin =====".format(dbname)) + tdSql.execute(drop_db_sql) + tdSql.execute(create_db_sql) + tdSql.execute("use {}".format(dbname)) + + def create_stable_insert_datas(self,dbname ,stablename , tb_nums , row_nums): + tdSql.execute("use {}".format(dbname)) + tdSql.execute( + '''create table {} + (ts timestamp, c1 int, c2 bigint, c3 smallint, c4 tinyint, c5 float, c6 double, c7 bool, c8 binary(32),c9 nchar(32), c10 timestamp) + tags (t1 int) + '''.format(stablename) + ) + + for i in range(tb_nums): + sub_tbname = "sub_{}_{}".format(stablename,i) + tdSql.execute("create table {} using {} tags({})".format(sub_tbname, stablename ,i)) + # insert datas about new database + + for row_num in range(row_nums): + ts = self.ts + self.ts_step*row_num + tdSql.execute(f"insert into {sub_tbname} values ({ts}, {row_num} ,{row_num}, 10 ,1 ,{row_num} ,{row_num},true,'bin_{row_num}','nchar_{row_num}',now) ") + + tdLog.info(" ==== stable {} insert rows execute end =====".format(stablename)) + + def append_rows_of_exists_tables(self,dbname ,stablename , tbname , append_nums ): + + tdSql.execute("use {}".format(dbname)) + + for row_num in range(append_nums): + tdSql.execute(f"insert into {tbname} values (now, {row_num} ,{row_num}, 10 ,1 ,{row_num} ,{row_num},true,'bin_{row_num}','nchar_{row_num}',now) ") + # print(f"insert into {tbname} values (now, {row_num} ,{row_num}, 10 ,1 ,{row_num} ,{row_num},true,'bin_{row_num}','nchar_{row_num}',now) ") + tdLog.info(" ==== append new rows of table {} belongs to stable {} execute end =====".format(tbname,stablename)) + os.system("taos -s 'select count(*) from {}.{}';".format(dbname,stablename)) + + def check_insert_rows(self, dbname, stablename , tb_nums , row_nums, append_rows): + + tdSql.execute("use {}".format(dbname)) + + tdSql.query("select count(*) from {}.{}".format(dbname,stablename)) + + while not tdSql.queryResult: + time.sleep(0.1) + tdSql.query("select count(*) from {}.{}".format(dbname,stablename)) + + status_OK = self.mycheckData("select count(*) from {}.{}".format(dbname,stablename) ,0 , 0 , tb_nums*row_nums+append_rows) + + count = 0 + while not status_OK : + if count > self.try_check_times: + os.system("taos -s ' show {}.vgroups; '".format(dbname)) + tdLog.exit(" ==== check insert rows failed after {} try check {} times of database {}".format(count , self.try_check_times ,dbname)) + break + time.sleep(0.1) + tdSql.query("select count(*) from {}.{}".format(dbname,stablename)) + while not tdSql.queryResult: + time.sleep(0.1) + tdSql.query("select count(*) from {}.{}".format(dbname,stablename)) + status_OK = self.mycheckData("select count(*) from {}.{}".format(dbname,stablename) ,0 , 0 , tb_nums*row_nums+append_rows) + tdLog.info(" ==== check insert rows first failed , this is {}_th retry check rows of database {}".format(count , dbname)) + count += 1 + + + tdSql.query("select distinct tbname from {}.{}".format(dbname,stablename)) + while not tdSql.queryResult: + time.sleep(0.1) + tdSql.query("select distinct tbname from {}.{}".format(dbname,stablename)) + status_OK = self.mycheckRows("select distinct tbname from {}.{}".format(dbname,stablename) ,tb_nums) + count = 0 + while not status_OK : + if count > self.try_check_times: + os.system("taos -s ' show {}.vgroups;'".format(dbname)) + tdLog.exit(" ==== check insert rows failed after {} try check {} times of database {}".format(count , self.try_check_times ,dbname)) + break + time.sleep(0.1) + tdSql.query("select distinct tbname from {}.{}".format(dbname,stablename)) + while not tdSql.queryResult: + time.sleep(0.1) + tdSql.query("select distinct tbname from {}.{}".format(dbname,stablename)) + status_OK = self.mycheckRows("select distinct tbname from {}.{}".format(dbname,stablename) ,tb_nums) + tdLog.info(" ==== check insert tbnames first failed , this is {}_th retry check tbnames of database {}".format(count , dbname)) + count += 1 + + def _get_stop_dnode_id(self,dbname): + newTdSql=tdCom.newTdSql() + newTdSql.query("show {}.vgroups".format(dbname)) + vgroup_infos = newTdSql.queryResult + for vgroup_info in vgroup_infos: + leader_infos = vgroup_info[3:-4] + # print(vgroup_info) + for ind ,role in enumerate(leader_infos): + if role =='leader': + # print(ind,leader_infos) + self.stop_dnode_id = leader_infos[ind-1] + break + + + return self.stop_dnode_id + + def wait_stop_dnode_OK(self): + + def _get_status(): + newTdSql=tdCom.newTdSql() + + status = "" + newTdSql.query("show dnodes") + dnode_infos = newTdSql.queryResult + for dnode_info in dnode_infos: + id = dnode_info[0] + dnode_status = dnode_info[4] + if id == self.stop_dnode_id: + status = dnode_status + break + return status + + status = _get_status() + while status !="offline": + time.sleep(0.1) + status = _get_status() + # tdLog.info("==== stop dnode has not been stopped , endpoint is {}".format(self.stop_dnode)) + tdLog.info("==== stop_dnode has stopped , id is {}".format(self.stop_dnode_id)) + + def wait_start_dnode_OK(self): + + def _get_status(): + newTdSql=tdCom.newTdSql() + status = "" + newTdSql.query("show dnodes") + dnode_infos = newTdSql.queryResult + for dnode_info in dnode_infos: + id = dnode_info[0] + dnode_status = dnode_info[4] + if id == self.stop_dnode_id: + status = dnode_status + break + return status + + status = _get_status() + while status !="ready": + time.sleep(0.1) + status = _get_status() + # tdLog.info("==== stop dnode has not been stopped , endpoint is {}".format(self.stop_dnode)) + tdLog.info("==== stop_dnode has restart , id is {}".format(self.stop_dnode_id)) + + def get_leader_infos(self ,dbname): + + newTdSql=tdCom.newTdSql() + newTdSql.query("show {}.vgroups".format(dbname)) + vgroup_infos = newTdSql.queryResult + + leader_infos = set() + for vgroup_info in vgroup_infos: + leader_infos.add(vgroup_info[3:-4]) + + return leader_infos + + def check_revote_leader_success(self, dbname, before_leader_infos , after_leader_infos): + check_status = False + vote_act = set(set(after_leader_infos)-set(before_leader_infos)) + if not vote_act: + print("=======before_revote_leader_infos ======\n" , before_leader_infos) + print("=======after_revote_leader_infos ======\n" , after_leader_infos) + tdLog.exit(" ===maybe revote not occured , there is no dnode offline ====") + else: + for vgroup_info in vote_act: + for ind , role in enumerate(vgroup_info): + if role==self.stop_dnode_id: + + if vgroup_info[ind+1] =="offline" and "leader" in vgroup_info: + tdLog.info(" === revote leader ok , leader is {} now ====".format(vgroup_info[list(vgroup_info).index("leader")-1])) + check_status = True + elif vgroup_info[ind+1] !="offline": + tdLog.info(" === dnode {} should be offline ".format(self.stop_dnode_id)) + else: + continue + break + return check_status + + def force_stop_dnode(self, dnode_id ): + + tdSql.query("show dnodes") + port = None + for dnode_info in tdSql.queryResult: + if dnode_id == dnode_info[0]: + port = dnode_info[1].split(":")[-1] + break + else: + continue + if port: + tdLog.info(" ==== dnode {} will be force stop by kill -9 ====".format(dnode_id)) + psCmd = '''netstat -anp|grep -w LISTEN|grep -w %s |grep -o "LISTEN.*"|awk '{print $2}'|cut -d/ -f1|head -n1''' %(port) + processID = subprocess.check_output( + psCmd, shell=True).decode("utf-8") + ps_kill_taosd = ''' kill -9 {} '''.format(processID) + # print(ps_kill_taosd) + os.system(ps_kill_taosd) + + def sync_run_case(self): + # stop follower and insert datas , update tables and create new stables + tdDnodes=cluster.dnodes + for loop in range(self.loop_restart_times): + db_name = "sync_db_{}".format(loop) + stablename = 'stable_{}'.format(loop) + self.create_database(dbname = db_name ,replica_num= self.replica , vgroup_nums= 1) + self.create_stable_insert_datas(dbname = db_name , stablename = stablename , tb_nums= 10 ,row_nums= 10 ) + self.stop_dnode_id = self._get_stop_dnode_id(db_name) + + # check rows of datas + + self.check_insert_rows(db_name ,stablename ,tb_nums=10 , row_nums= 10 ,append_rows=0) + + # get leader info before stop + before_leader_infos = self.get_leader_infos(db_name) + + # begin stop dnode + # force stop taosd by kill -9 + self.force_stop_dnode(self.stop_dnode_id) + + self.wait_stop_dnode_OK() + + # vote leaders check + + # get leader info after stop + after_leader_infos = self.get_leader_infos(db_name) + + revote_status = self.check_revote_leader_success(db_name ,before_leader_infos , after_leader_infos) + + # append rows of stablename when dnode stop make sure revote leaders + + while not revote_status: + after_leader_infos = self.get_leader_infos(db_name) + revote_status = self.check_revote_leader_success(db_name ,before_leader_infos , after_leader_infos) + + + if revote_status: + tbname = "sub_{}_{}".format(stablename , 0) + tdLog.info(" ==== begin append rows of exists table {} when dnode {} offline ====".format(tbname , self.stop_dnode_id)) + self.append_rows_of_exists_tables(db_name ,stablename , tbname , 100 ) + tdLog.info(" ==== check append rows of exists table {} when dnode {} offline ====".format(tbname , self.stop_dnode_id)) + self.check_insert_rows(db_name ,stablename ,tb_nums=10 , row_nums= 10 ,append_rows=100) + + # create new stables + tdLog.info(" ==== create new stable {} when dnode {} offline ====".format('new_stb1' , self.stop_dnode_id)) + self.create_stable_insert_datas(dbname = db_name , stablename = 'new_stb1' , tb_nums= 10 ,row_nums= 10 ) + tdLog.info(" ==== check new stable {} when dnode {} offline ====".format('new_stb1' , self.stop_dnode_id)) + self.check_insert_rows(db_name ,'new_stb1' ,tb_nums=10 , row_nums= 10 ,append_rows=0) + else: + tdLog.info("===== leader of database {} is not ok , append rows fail =====".format(db_name)) + + # begin start dnode + start = time.time() + tdDnodes[self.stop_dnode_id-1].starttaosd() + self.wait_start_dnode_OK() + end = time.time() + time_cost = int(end -start) + if time_cost > self.max_restart_time: + tdLog.exit(" ==== restart dnode {} cost too much time , please check ====".format(self.stop_dnode_id)) + + # create new stables again + tdLog.info(" ==== create new stable {} when dnode {} restart ====".format('new_stb2' , self.stop_dnode_id)) + self.create_stable_insert_datas(dbname = db_name , stablename = 'new_stb2' , tb_nums= 10 ,row_nums= 10 ) + tdLog.info(" ==== check new stable {} when dnode {} restart ====".format('new_stb2' , self.stop_dnode_id)) + self.check_insert_rows(db_name ,'new_stb2' ,tb_nums=10 , row_nums= 10 ,append_rows=0) + + def unsync_run_case(self): + + def _restart_dnode_of_db_unsync(dbname): + + tdDnodes=cluster.dnodes + self.stop_dnode_id = self._get_stop_dnode_id(dbname) + # begin restart dnode + # force stop taosd by kill -9 + # get leader info before stop + before_leader_infos = self.get_leader_infos(db_name) + self.force_stop_dnode(self.stop_dnode_id) + + self.wait_stop_dnode_OK() + + # check revote leader when restart servers + # get leader info after stop + after_leader_infos = self.get_leader_infos(db_name) + revote_status = self.check_revote_leader_success(db_name ,before_leader_infos , after_leader_infos) + # append rows of stablename when dnode stop make sure revote leaders + while not revote_status: + after_leader_infos = self.get_leader_infos(db_name) + revote_status = self.check_revote_leader_success(db_name ,before_leader_infos , after_leader_infos) + + tbname = "sub_{}_{}".format(stablename , 0) + tdLog.info(" ==== begin append rows of exists table {} when dnode {} offline ====".format(tbname , self.stop_dnode_id)) + self.append_rows_of_exists_tables(db_name ,stablename , tbname , 100 ) + tdLog.info(" ==== check append rows of exists table {} when dnode {} offline ====".format(tbname , self.stop_dnode_id)) + self.check_insert_rows(db_name ,stablename ,tb_nums=10 , row_nums= 10 ,append_rows=100) + + # create new stables + tdLog.info(" ==== create new stable {} when dnode {} offline ====".format('new_stb1' , self.stop_dnode_id)) + self.create_stable_insert_datas(dbname = db_name , stablename = 'new_stb1' , tb_nums= 10 ,row_nums= 10 ) + tdLog.info(" ==== check new stable {} when dnode {} offline ====".format('new_stb1' , self.stop_dnode_id)) + self.check_insert_rows(db_name ,'new_stb1' ,tb_nums=10 , row_nums= 10 ,append_rows=0) + + # create new stables again + tdLog.info(" ==== create new stable {} when dnode {} restart ====".format('new_stb2' , self.stop_dnode_id)) + self.create_stable_insert_datas(dbname = db_name , stablename = 'new_stb2' , tb_nums= 10 ,row_nums= 10 ) + tdLog.info(" ==== check new stable {} when dnode {} restart ====".format('new_stb2' , self.stop_dnode_id)) + self.check_insert_rows(db_name ,'new_stb2' ,tb_nums=10 , row_nums= 10 ,append_rows=0) + + + tdDnodes[self.stop_dnode_id-1].starttaosd() + start = time.time() + self.wait_start_dnode_OK() + end = time.time() + time_cost = int(end-start) + + if time_cost > self.max_restart_time: + tdLog.exit(" ==== restart dnode {} cost too much time , please check ====".format(self.stop_dnode_id)) + + + def _create_threading(dbname): + self.current_thread = threading.Thread(target=_restart_dnode_of_db_unsync, args=(dbname,)) + return self.current_thread + + + ''' + in this mode , it will be extra threading control start or stop dnode , insert will always going with not care follower online or alive + ''' + for loop in range(self.loop_restart_times): + db_name = "unsync_db_{}".format(loop) + stablename = 'stable_{}'.format(loop) + self.create_database(dbname = db_name ,replica_num= self.replica , vgroup_nums= 1) + self.create_stable_insert_datas(dbname = db_name , stablename = stablename , tb_nums= 10 ,row_nums= 10 ) + + tdLog.info(" ===== restart dnode of database {} in an unsync threading ===== ".format(db_name)) + + # create sync threading and start it + self.current_thread = _create_threading(db_name) + self.current_thread.start() + + # check rows of datas + self.check_insert_rows(db_name ,stablename ,tb_nums=10 , row_nums= 10 ,append_rows=0) + + + self.current_thread.join() + + + def run(self): + + # basic insert and check of cluster + self.check_setup_cluster_status() + self.create_db_check_vgroups() + # self.sync_run_case() + self.unsync_run_case() + + def stop(self): + tdSql.close() + tdLog.success(f"{__file__} successfully executed") + +tdCases.addLinux(__file__, TDTestCase()) +tdCases.addWindows(__file__, TDTestCase()) \ No newline at end of file