diff --git a/tests/army/cluster/clusterBasic.py b/tests/army/cluster/clusterBasic.py index 8dceecab38..1ec23e56ea 100644 --- a/tests/army/cluster/clusterBasic.py +++ b/tests/army/cluster/clusterBasic.py @@ -31,40 +31,61 @@ from frame.clusterCommonCheck import clusterComCheck class TDTestCase(TBase): def init(self, conn, logSql, replicaVar=3): super(TDTestCase, self).init(conn, logSql, replicaVar=3, db="db") - self.dnodeNum = 3 - self.mnodeNum = 3 + self.vgroupNum = 3 tdSql.init(conn.cursor(), logSql) def checkClusterEmptyDB(self): while 1: - if clusterComCheck.checkDnodes(3): break + if clusterComCheck.checkDnodes(5): break tdSql.query("show cluster alive;") tdSql.checkData(0, 0, 1) sc.dnodeStop(3) while 1: - if clusterComCheck.checkDnodes(2): break + if clusterComCheck.checkDnodes(4): break tdSql.query("show cluster alive;") tdSql.checkData(0, 0, 1) + sc.dnodeStop(2) + while 1: + if clusterComCheck.checkDnodes(3): break + tdSql.query("show cluster alive;") + tdSql.checkData(0, 0, 1) def checkClusterWithDB(self): sc.dnodeStart(3) + sc.dnodeStart(2) while 1: - if clusterComCheck.checkDnodes(3): break + if clusterComCheck.checkDnodes(5): break + tdSql.execute(f'drop database if exists {self.db}') - tdSql.execute(f'create database {self.db} replica 3') + tdSql.execute(f'create database {self.db} replica {self.replicaVar} vgroups {self.vgroupNum}') + while 1: + leader_status = clusterComCheck.check_vgroups_status_with_offline(vgroup_numbers=self.vgroupNum, db_replica=self.replicaVar) + if leader_status >= 0: break + leader_status = clusterComCheck.check_vgroups_status_with_offline(vgroup_numbers=self.vgroupNum, db_replica=self.replicaVar) tdSql.query("show cluster alive;") - print(tdSql.getResult("show cluster alive;")) - tdSql.checkData(0, 0, 1) + tdSql.checkData(0, 0, leader_status) sc.dnodeStop(3) while 1: - if clusterComCheck.checkDnodes(2): break + if clusterComCheck.checkDnodes(4): break + while 1: + leader_status = clusterComCheck.check_vgroups_status_with_offline(vgroup_numbers=self.vgroupNum, db_replica=self.replicaVar) + if leader_status >= 0: break + leader_status = clusterComCheck.check_vgroups_status_with_offline(vgroup_numbers=self.vgroupNum, db_replica=self.replicaVar) tdSql.query("show cluster alive;") - print(tdSql.getResult("show cluster alive;")) - tdSql.checkData(0, 0, 1) - + tdSql.checkData(0, 0, leader_status) + + sc.dnodeStop(2) + while 1: + if clusterComCheck.checkDnodes(3): break + while 1: + leader_status = clusterComCheck.check_vgroups_status_with_offline(vgroup_numbers=self.vgroupNum, db_replica=self.replicaVar) + if leader_status >= 0: break + leader_status = clusterComCheck.check_vgroups_status_with_offline(vgroup_numbers=self.vgroupNum, db_replica=self.replicaVar) + tdSql.query("show cluster alive;") + tdSql.checkData(0, 0, leader_status) # run def run(self): @@ -73,8 +94,9 @@ class TDTestCase(TBase): self.checkClusterWithDB() def stop(self): - sc.dnodeStop(2) sc.dnodeStop(1) + sc.dnodeStop(4) + sc.dnodeStop(5) tdSql.close() tdLog.success(f"{__file__} successfully executed") diff --git a/tests/army/frame/clusterCommonCheck.py b/tests/army/frame/clusterCommonCheck.py index cca70a88b7..9cbac776b9 100644 --- a/tests/army/frame/clusterCommonCheck.py +++ b/tests/army/frame/clusterCommonCheck.py @@ -44,7 +44,7 @@ class ClusterComCheck: tdSql.query("select * from information_schema.ins_dnodes") # tdLog.debug(tdSql.res) status=0 - for i in range(dnodeNumbers): + for i in range(len(tdSql.res)): if tdSql.res[i][4] == "ready": status+=1 # tdLog.info(status) @@ -228,6 +228,46 @@ class ClusterComCheck: tdLog.debug(tdSql.res) tdLog.exit("stop mnodes on dnode 2 or 3 failed in 10s") + def check_vgroups_status_with_offline(self,vgroup_numbers=2,db_replica=3,count_number=10,db_name="db"): + """ + n nodes cluster, 3 replica database + return 1, n leaders, stable status + return 2, 0 < num of leader < n, stable status + return 0, no leader, stable status + return -1, Elections not yet completed, unstable status + """ + vgroup_numbers = int(vgroup_numbers) + self.db_replica = int(db_replica) + tdLog.debug("start to check status of vgroups") + count=0 + leader_number = 0 + while count < count_number: + time.sleep(1) + count+=1 + tdSql.query(f"show {db_name}.vgroups;") + if tdSql.getRows() != vgroup_numbers : + continue + for i in range(vgroup_numbers): + print(tdSql.res[i]) + if 'leader' in tdSql.res[i]:leader_number += 1 + elif tdSql.res[i].count('follower') + tdSql.res[i].count('candidate') >= 2: + tdLog.debug("Elections not yet completed") + return -1 + else: # only one 'follower' or 'offline' + tdLog.debug("Not in compliance with Raft protocol, unable to complete election") + if leader_number == vgroup_numbers: + tdLog.debug("Leader election for all vgroups completed") + return 1 + elif leader_number == 0: + tdLog.debug("all vnodes is follower") + return 0 + else: + tdLog.debug(f"there is {vgroup_numbers} vgroups, and leader elections for {leader_number} vgroups competed") + return 2 + else: + tdLog.debug(tdSql.res) + tdLog.notice(f"elections of {db_name} all vgroups with replica {self.db_replica} are failed in {count} s ") + def check_vgroups_status(self,vgroup_numbers=2,db_replica=3,count_number=10,db_name="db"): """ check vgroups status in 10s after db vgroups status is changed """ vgroup_numbers = int(vgroup_numbers) diff --git a/tests/parallel_test/cases.task b/tests/parallel_test/cases.task index 3c2adfb46f..c8e0a624d5 100644 --- a/tests/parallel_test/cases.task +++ b/tests/parallel_test/cases.task @@ -24,7 +24,7 @@ ,,y,army,./pytest.sh python3 ./test.py -f query/fill/fill_desc.py -N 3 -L 3 -D 2 ,,y,army,./pytest.sh python3 ./test.py -f query/fill/fill_null.py ,,y,army,./pytest.sh python3 ./test.py -f cluster/incSnapshot.py -N 3 -#,,y,army,./pytest.sh python3 ./test.py -f cluster/clusterBasic.py -N 3 +,,y,army,./pytest.sh python3 ./test.py -f cluster/clusterBasic.py -N 5 ,,y,army,./pytest.sh python3 ./test.py -f query/query_basic.py -N 3 ,,y,army,./pytest.sh python3 ./test.py -f query/accuracy/test_query_accuracy.py ,,y,army,./pytest.sh python3 ./test.py -f insert/insert_basic.py -N 3