From 9b02d103800de03c776703ca05a08c8d1aea5408 Mon Sep 17 00:00:00 2001 From: Alex Duan <417921451@qq.com> Date: Mon, 5 Aug 2024 18:13:43 +0800 Subject: [PATCH 1/2] fix: replace sleep with ClusterComCheck --- tests/army/cluster/incSnapshot.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tests/army/cluster/incSnapshot.py b/tests/army/cluster/incSnapshot.py index dfd8d95c9c..a4ddddcee5 100644 --- a/tests/army/cluster/incSnapshot.py +++ b/tests/army/cluster/incSnapshot.py @@ -15,6 +15,10 @@ from frame import * from frame.autogen import * # from frame.server.dnodes import * # from frame.server.cluster import * +from util.cluster import * +sys.path.append("./6-cluster") +from clusterCommonCreate import * +from clusterCommonCheck import clusterComCheck class TDTestCase(TBase): @@ -61,7 +65,7 @@ class TDTestCase(TBase): # if bFinish: # break self.snapshotAgg() - time.sleep(10) + clusterComCheck.check_vgroups_status(vgroup_numbers=2,db_replica=3,db_name=f"{self.db}",count_number=60) sc.dnodeStopAll() for i in range(1, 4): path = clusterDnodes.getDnodeDir(i) @@ -75,7 +79,7 @@ class TDTestCase(TBase): sc.dnodeStart(2) sc.dnodeStart(3) sql = "show vnodes;" - time.sleep(10) + clusterComCheck.check_vgroups_status(vgroup_numbers=2,db_replica=3,db_name=f"{self.db}",count_number=60) while True: bFinish = True param_list = tdSql.query(sql, row_tag=True) From 6557b6c7aedae8f40ca639eb52036c76189acafd Mon Sep 17 00:00:00 2001 From: Alex Duan <417921451@qq.com> Date: Mon, 5 Aug 2024 19:53:21 +0800 Subject: [PATCH 2/2] fix: add clusterCommonCheck.py to army --- tests/army/cluster/incSnapshot.py | 37 +--- tests/army/frame/clusterCommonCheck.py | 277 +++++++++++++++++++++++++ 2 files changed, 279 insertions(+), 35 deletions(-) create mode 100644 tests/army/frame/clusterCommonCheck.py diff --git a/tests/army/cluster/incSnapshot.py b/tests/army/cluster/incSnapshot.py index a4ddddcee5..1028781fcb 100644 --- a/tests/army/cluster/incSnapshot.py +++ b/tests/army/cluster/incSnapshot.py @@ -15,11 +15,7 @@ from frame import * from frame.autogen import * # from frame.server.dnodes import * # from frame.server.cluster import * -from util.cluster import * -sys.path.append("./6-cluster") -from clusterCommonCreate import * -from clusterCommonCheck import clusterComCheck - +from frame.clusterCommonCheck import * class TDTestCase(TBase): updatecfgDict = { @@ -43,29 +39,12 @@ class TDTestCase(TBase): autoGen.insert_data(1000) tdSql.execute(f"flush database {self.db}") sc.dnodeStop(3) - # clusterDnodes.stoptaosd(1) - # clusterDnodes.starttaosd(3) - # time.sleep(5) - # clusterDnodes.stoptaosd(2) - # clusterDnodes.starttaosd(1) - # time.sleep(5) autoGen.insert_data(5000, True) self.flushDb(True) # wait flush operation over time.sleep(5) - # sql = 'show vnodes;' - # while True: - # bFinish = True - # param_list = tdSql.query(sql, row_tag=True) - # for param in param_list: - # if param[3] == 'leading' or param[3] == 'following': - # bFinish = False - # break - # if bFinish: - # break - self.snapshotAgg() - clusterComCheck.check_vgroups_status(vgroup_numbers=2,db_replica=3,db_name=f"{self.db}",count_number=60) + self.snapshotAgg() sc.dnodeStopAll() for i in range(1, 4): path = clusterDnodes.getDnodeDir(i) @@ -80,18 +59,6 @@ class TDTestCase(TBase): sc.dnodeStart(3) sql = "show vnodes;" clusterComCheck.check_vgroups_status(vgroup_numbers=2,db_replica=3,db_name=f"{self.db}",count_number=60) - while True: - bFinish = True - param_list = tdSql.query(sql, row_tag=True) - for param in param_list: - if param[3] == 'offline': - tdLog.exit( - "dnode synchronous fail dnode id: %d, vgroup id:%d status offline" % (param[0], param[1])) - if param[3] == 'leading' or param[3] == 'following': - bFinish = False - break - if bFinish: - break self.timestamp_step = 1000 self.insert_rows = 6000 diff --git a/tests/army/frame/clusterCommonCheck.py b/tests/army/frame/clusterCommonCheck.py new file mode 100644 index 0000000000..cca70a88b7 --- /dev/null +++ b/tests/army/frame/clusterCommonCheck.py @@ -0,0 +1,277 @@ +################################################################### +# Copyright (c) 2016 by TAOS Technologies, Inc. +# All rights reserved. +# +# This file is proprietary and confidential to TAOS Technologies. +# No part of this file may be reproduced, stored, transmitted, +# disclosed or used in any form or by any means other than as +# expressly provided by the written permission from Jianhui Tao +# +################################################################### + +# -*- coding: utf-8 -*- + +from collections import defaultdict +import random +import string +import threading +import requests +import time +# import socketfrom + +import taos +from frame.log import * +from frame.sql import * +from frame.cases import * +from frame.server.dnodes import * +from frame.common import * + +# class actionType(Enum): +# CREATE_DATABASE = 0 +# CREATE_STABLE = 1 +# CREATE_CTABLE = 2 +# INSERT_DATA = 3 + +class ClusterComCheck: + def init(self, conn, logSql=False): + tdSql.init(conn.cursor()) + # tdSql.init(conn.cursor(), logSql) # output sql.txt file + + def checkDnodes(self,dnodeNumbers, timeout=100): + count=0 + # print(tdSql) + while count < timeout: + tdSql.query("select * from information_schema.ins_dnodes") + # tdLog.debug(tdSql.res) + status=0 + for i in range(dnodeNumbers): + if tdSql.res[i][4] == "ready": + status+=1 + # tdLog.info(status) + + if status == dnodeNumbers: + tdLog.success("it find cluster with %d dnodes and check that all cluster dnodes are ready within %ds! " % (dnodeNumbers, count+1)) + return True + time.sleep(1) + count+=1 + + else: + tdSql.query("select * from information_schema.ins_dnodes") + tdLog.debug(tdSql.res) + tdLog.exit("it find cluster with %d dnodes but check that there dnodes are not ready within %ds ! "% (dnodeNumbers, timeout)) + + def checkDbRows(self,dbNumbers): + dbNumbers=int(dbNumbers) + count=0 + while count < 5: + tdSql.query("select * from information_schema.ins_databases where name!='collectd' ;") + count+=1 + if tdSql.checkRows(dbNumbers+2): + tdLog.success("we find %d databases and expect %d in clusters! " %(tdSql.queryRows,dbNumbers+2)) + return True + else: + continue + else : + tdLog.debug(tdSql.res) + tdLog.exit("we find %d databases but expect %d in clusters! " %(tdSql.queryRows,dbNumbers)) + + def checkDb(self,dbNumbers,restartNumber,dbNameIndex, timeout=100): + count=0 + alldbNumbers=(dbNumbers*restartNumber)+2 + while count < timeout: + query_status=0 + for j in range(dbNumbers): + for i in range(alldbNumbers): + tdSql.query("select * from information_schema.ins_databases;") + if "%s_%d"%(dbNameIndex,j) == tdSql.res[i][0] : + if tdSql.res[i][15] == "ready": + query_status+=1 + tdLog.debug("check %s_%d that status is ready "%(dbNameIndex,j)) + else: + sleep(1) + continue + # print(query_status) + if query_status == dbNumbers: + tdLog.success(" check %d database and all databases are ready within %ds! " %(dbNumbers,count+1)) + return True + count+=1 + + else: + tdLog.debug(tdSql.res) + tdLog.debug("query status is %d"%query_status) + tdLog.exit("database is not ready within %ds"%(timeout+1)) + + def checkData(self,dbname,stbname,stableCount,CtableCount,rowsPerSTable,): + tdSql.execute("use %s"%dbname) + tdSql.query("show %s.stables"%dbname) + tdSql.checkRows(stableCount) + tdSql.query("show %s.tables"%dbname) + tdSql.checkRows(CtableCount) + for i in range(stableCount): + tdSql.query("select count(*) from %s%d"%(stbname,i)) + tdSql.checkData(0,0,rowsPerSTable) + return + + def checkMnodeStatus(self,mnodeNums): + self.mnodeNums=int(mnodeNums) + # self.leaderDnode=int(leaderDnode) + tdLog.debug("start to check status of mnodes") + count=0 + + while count < 10: + time.sleep(1) + tdSql.query("select * from information_schema.ins_mnodes;") + if tdSql.checkRows(self.mnodeNums) : + tdLog.success("cluster has %d mnodes" %self.mnodeNums ) + + if self.mnodeNums == 1: + if tdSql.res[0][2]== 'leader' and tdSql.res[0][3]== 'ready' : + tdLog.success("%d mnodes is ready in 10s"%self.mnodeNums) + return True + count+=1 + elif self.mnodeNums == 3 : + if tdSql.res[0][2]=='leader' and tdSql.res[0][3]== 'ready' : + if tdSql.res[1][2]=='follower' and tdSql.res[1][3]== 'ready' : + if tdSql.res[2][2]=='follower' and tdSql.res[2][3]== 'ready' : + tdLog.success("%d mnodes is ready in 10s"%self.mnodeNums) + return True + elif tdSql.res[1][2]=='leader' and tdSql.res[1][3]== 'ready' : + if tdSql.res[0][2]=='follower' and tdSql.res[0][3]== 'ready' : + if tdSql.res[2][2]=='follower' and tdSql.res[2][3]== 'ready' : + tdLog.success("%d mnodes is ready in 10s"%self.mnodeNums) + return True + elif tdSql.res[2][2]=='leader' and tdSql.res[2][3]== 'ready' : + if tdSql.res[0][2]=='follower' and tdSql.res[0][3]== 'ready' : + if tdSql.res[1][2]=='follower' and tdSql.res[1][3]== 'ready' : + tdLog.success("%d mnodes is ready in 10s"%self.mnodeNums) + return True + count+=1 + elif self.mnodeNums == 2 : + if tdSql.res[0][2]=='leader' and tdSql.res[0][3]== 'ready' : + if tdSql.res[1][2]=='follower' and tdSql.res[1][3]== 'ready' : + tdLog.success("%d mnodes is ready in 10s"%self.mnodeNums) + return True + elif tdSql.res[1][2]=='leader' and tdSql.res[1][3]== 'ready' : + if tdSql.res[0][2]=='follower' and tdSql.res[0][3]== 'ready' : + tdLog.success("%d mnodes is ready in 10s"%self.mnodeNums) + return True + count+=1 + else: + tdLog.debug(tdSql.res) + tdLog.exit("cluster of %d mnodes is not ready in 10s " %self.mnodeNums) + + + + + def check3mnodeoff(self,offlineDnodeNo,mnodeNums=3): + count=0 + while count < 10: + time.sleep(1) + tdSql.query("select * from information_schema.ins_mnodes;") + if tdSql.checkRows(mnodeNums) : + tdLog.success("cluster has %d mnodes" %self.mnodeNums ) + else: + tdLog.exit("mnode number is correct") + if offlineDnodeNo == 1: + if tdSql.res[0][2]=='offline' : + if tdSql.res[1][2]=='leader': + if tdSql.res[2][2]=='follower': + tdLog.success("stop mnodes on dnode %d successfully in 10s" %offlineDnodeNo) + return True + elif tdSql.res[1][2]=='follower': + if tdSql.res[2][2]=='leader': + tdLog.debug("stop mnodes on dnode %d successfully in 10s" %offlineDnodeNo) + return True + count+=1 + elif offlineDnodeNo == 2: + if tdSql.res[1][2]=='offline' : + if tdSql.res[0][2]=='leader': + if tdSql.res[2][2]=='follower': + tdLog.debug("stop mnodes on dnode %d successfully in 10s" %offlineDnodeNo) + return True + elif tdSql.res[0][2]=='follower': + if tdSql.res[2][2]=='leader': + tdLog.debug("stop mnodes on dnode %d successfully in 10s" %offlineDnodeNo) + return True + count+=1 + elif offlineDnodeNo == 3: + if tdSql.res[2][2]=='offline' : + if tdSql.res[0][2]=='leader': + if tdSql.res[1][2]=='follower': + tdLog.debug("stop mnodes on dnode %d successfully in 10s" %offlineDnodeNo) + return True + elif tdSql.res[0][2]=='follower': + if tdSql.res[1][2]=='leader': + tdLog.debug("stop mnodes on dnode %d successfully in 10s" %offlineDnodeNo) + return True + count+=1 + else: + tdLog.debug(tdSql.res) + tdLog.exit(f"stop mnodes on dnode {offlineDnodeNo} failed in 10s ") + + def check3mnode2off(self,mnodeNums=3): + count=0 + while count < 10: + time.sleep(1) + tdSql.query("select * from information_schema.ins_mnodes;") + if tdSql.checkRows(mnodeNums) : + tdLog.success("cluster has %d mnodes" %self.mnodeNums ) + else: + tdLog.exit("mnode number is correct") + if tdSql.res[0][2]=='leader' : + if tdSql.res[1][2]=='offline': + if tdSql.res[2][2]=='offline': + tdLog.success("stop mnodes of follower on dnode successfully in 10s") + return True + count+=1 + else: + tdLog.debug(tdSql.res) + tdLog.exit("stop mnodes on dnode 2 or 3 failed in 10s") + + def check_vgroups_status(self,vgroup_numbers=2,db_replica=3,count_number=10,db_name="db"): + """ check vgroups status in 10s after db vgroups status is changed """ + vgroup_numbers = int(vgroup_numbers) + self.db_replica = int(db_replica) + tdLog.debug("start to check status of vgroups") + count=0 + last_number=vgroup_numbers-1 + while count < count_number: + time.sleep(1) + count+=1 + print("check vgroup count :", count) + tdSql.query(f"show {db_name}.vgroups;") + if tdSql.getRows() != vgroup_numbers : + continue + if self.db_replica == 1 : + if tdSql.res[0][4] == 'leader' and tdSql.res[last_number][4] == 'leader': + tdSql.query(f"select `replica` from information_schema.ins_databases where `name`='{db_name}';") + print("db replica :",tdSql.res[0][0]) + if tdSql.res[0][0] == db_replica: + tdLog.success(f"all vgroups with replica {self.db_replica} of {db_name} are leaders in {count} s") + return True + + elif self.db_replica == 3 : + vgroup_status_first=[tdSql.res[0][4],tdSql.res[0][6],tdSql.res[0][8]] + + vgroup_status_last=[tdSql.res[last_number][4],tdSql.res[last_number][6],tdSql.res[last_number][8]] + if vgroup_status_first.count('leader') == 1 and vgroup_status_first.count('follower') == 2: + if vgroup_status_last.count('leader') == 1 and vgroup_status_last.count('follower') == 2: + tdSql.query(f"select `replica` from information_schema.ins_databases where `name`='{db_name}';") + print("db replica :",tdSql.res[0][0]) + if tdSql.res[0][0] == db_replica: + tdLog.success(f"elections of {db_name}.vgroups with replica {self.db_replica} are ready in {count} s") + return True + else: + tdLog.debug(tdSql.res) + tdLog.notice(f"elections of {db_name} all vgroups with replica {self.db_replica} are failed in {count} s ") + caller = inspect.getframeinfo(inspect.stack()[1][0]) + args = (caller.filename, caller.lineno) + tdLog.exit("%s(%d) failed " % args) + + + + + def close(self): + self.cursor.close() + +clusterComCheck = ClusterComCheck()