From 59d23065fd469447717b2bc7da46ce2ec012c01e Mon Sep 17 00:00:00 2001 From: Shengliang Guan Date: Tue, 11 Jan 2022 02:54:11 -0800 Subject: [PATCH 1/4] fix invalid write in mnode --- source/dnode/mnode/impl/src/mndDb.c | 2 +- source/dnode/mnode/impl/src/mndDnode.c | 2 +- source/dnode/mnode/impl/src/mndVgroup.c | 8 ++++ tests/script/sh/exec.sh | 4 +- tests/script/sim/db/basic6.sim | 56 ++++++++++--------------- 5 files changed, 33 insertions(+), 39 deletions(-) diff --git a/source/dnode/mnode/impl/src/mndDb.c b/source/dnode/mnode/impl/src/mndDb.c index 85b7fbbb42..bc4d890257 100644 --- a/source/dnode/mnode/impl/src/mndDb.c +++ b/source/dnode/mnode/impl/src/mndDb.c @@ -740,7 +740,7 @@ static int32_t mndBuildDropVgroupAction(SMnode *pMnode, STrans *pTrans, SDbObj * if (pReq == NULL) return -1; action.pCont = pReq; - action.contLen = sizeof(SCreateVnodeReq); + action.contLen = sizeof(SDropVnodeReq); action.msgType = TDMT_DND_DROP_VNODE; action.acceptableCode = TSDB_CODE_DND_VNODE_NOT_DEPLOYED; if (mndTransAppendRedoAction(pTrans, &action) != 0) { diff --git a/source/dnode/mnode/impl/src/mndDnode.c b/source/dnode/mnode/impl/src/mndDnode.c index 4bc570c11d..2a67e6a0f9 100644 --- a/source/dnode/mnode/impl/src/mndDnode.c +++ b/source/dnode/mnode/impl/src/mndDnode.c @@ -235,7 +235,7 @@ int32_t mndGetDnodeSize(SMnode *pMnode) { bool mndIsDnodeOnline(SMnode *pMnode, SDnodeObj *pDnode, int64_t curMs) { int64_t interval = ABS(pDnode->lastAccessTime - curMs); - if (interval > 3500 * pMnode->cfg.statusInterval) { + if (interval > 10000 * pMnode->cfg.statusInterval) { if (pDnode->rebootTime > 0) { pDnode->offlineReason = DND_REASON_STATUS_MSG_TIMEOUT; } diff --git a/source/dnode/mnode/impl/src/mndVgroup.c b/source/dnode/mnode/impl/src/mndVgroup.c index e0d6d3dd42..93d6d104ff 100644 --- a/source/dnode/mnode/impl/src/mndVgroup.c +++ b/source/dnode/mnode/impl/src/mndVgroup.c @@ -319,6 +319,14 @@ static int32_t mndGetAvailableDnode(SMnode *pMnode, SVgObj *pVgroup, SArray *pAr taosArraySort(pArray, (__compar_fn_t)mndCompareDnodeVnodes); + int32_t size = taosArrayGetSize(pArray); + if (size < pVgroup->replica) { + mError("db:%s, vgId:%d, no enough online dnodes:%d to alloc %d replica", pVgroup->dbName, pVgroup->vgId, size, + pVgroup->replica); + terrno = TSDB_CODE_MND_NO_ENOUGH_DNODES; + return -1; + } + for (int32_t v = 0; v < pVgroup->replica; ++v) { SVnodeGid *pVgid = &pVgroup->vnodeGid[v]; SDnodeObj *pDnode = taosArrayGet(pArray, v); diff --git a/tests/script/sh/exec.sh b/tests/script/sh/exec.sh index 2e95a740d0..05f756ebb6 100755 --- a/tests/script/sh/exec.sh +++ b/tests/script/sh/exec.sh @@ -30,7 +30,7 @@ do CLEAR_OPTION="clear" ;; v) - SHELL_OPTION="true" + VALGRIND_OPTION="true" ;; u) USERS=$OPTARG @@ -99,7 +99,7 @@ fi if [ "$EXEC_OPTON" = "start" ]; then echo "ExcuteCmd:" $EXE_DIR/taosd -c $CFG_DIR - if [ "$SHELL_OPTION" = "true" ]; then + if [ "$VALGRIND_OPTION" = "true" ]; then TT=`date +%s` mkdir ${LOG_DIR}/${TT} nohup valgrind --log-file=${LOG_DIR}/${TT}/valgrind.log --tool=memcheck --leak-check=full --show-reachable=no --track-origins=yes --show-leak-kinds=all -v --workaround-gcc296-bugs=yes $EXE_DIR/taosd -c $CFG_DIR > /dev/null 2>&1 & diff --git a/tests/script/sim/db/basic6.sim b/tests/script/sim/db/basic6.sim index 684ce825fe..8e7d4f5ec1 100644 --- a/tests/script/sim/db/basic6.sim +++ b/tests/script/sim/db/basic6.sim @@ -1,29 +1,24 @@ system sh/stop_dnodes.sh system sh/deploy.sh -n dnode1 -i 1 -system sh/cfg.sh -n dnode1 -c wallevel -v 0 -system sh/cfg.sh -n dnode1 -c maxVgroupsPerDb -v 4 -system sh/cfg.sh -n dnode1 -c maxTablesPerVnode -v 1000 - system sh/exec.sh -n dnode1 -s start - -sleep 2000 sql connect + print ============================ dnode1 start $i = 0 -$dbPrefix = ob_db_db -$tbPrefix = ob_db_tb +$dbPrefix = db +$tbPrefix = tb $db = $dbPrefix . $i $tb = $tbPrefix . $i print =============== step1 -sql create database $db replica 1 days 20 keep 2000 cache 16 +sql create database $db replica 1 days 20 keep 2000 cache 16 vgroups 4 sql show databases print $data00 $data01 $data02 $data03 $data04 $data05 $data06 $data07 if $data00 != $db then return -1 endi -if $data02 != 0 then +if $data02 != 4 then return -1 endi if $data03 != 0 then @@ -63,9 +58,6 @@ print $data00 $data01 $data02 $data03 $data04 $data05 $data06 $data07 if $data00 != $db then return -1 endi -if $data02 != 0 then - return -1 -endi if $data03 != 0 then return -1 endi @@ -77,30 +69,14 @@ if $data06 != 15 then endi print =============== step6 -sql use $db -sql create table $tb (ts timestamp, speed int) -$i = 1 -while $i < 4 +$i = $i + 1 +while $i < 5 $db = $dbPrefix . $i - $tb = $tbPrefix . $i sql create database $db sql use $db - sql create table $tb (ts timestamp, speed int) $i = $i + 1 endw -sql show databases -if $rows != 4 then - return -1 -endi - -$i = 4 -$db = $dbPrefix . $i -$tb = $tbPrefix . $i -sql create database $db -sql use $db -sql create table $tb (ts timestamp, speed int) - print =============== step7 $i = 0 while $i < 5 @@ -115,7 +91,8 @@ $db = $dbPrefix . $i $tb = $tbPrefix . $i sql create database $db sql use $db -sql create table $tb (ts timestamp, speed int) +sql create table st (ts timestamp, i int) tags (j int) +sql create table $tb using st tags(1) sql show tables if $rows != 1 then return -1 @@ -133,7 +110,8 @@ if $rows != 0 then endi print =============== step11 -sql create table $tb (ts timestamp, speed int) +sql create table st (ts timestamp, i int) tags (j int) +sql create table $tb using st tags(1) sql show tables if $rows != 1 then return -1 @@ -149,16 +127,23 @@ sql show tables if $rows != 0 then return -1 endi -sql create table $tb (ts timestamp, speed int) + +sql create table st (ts timestamp, i int) tags (j int) +sql create table $tb using st tags(1) sql show tables if $rows != 1 then return -1 endi + sql insert into $tb values (now+1a, 0) sql insert into $tb values (now+2a, 1) sql insert into $tb values (now+3a, 2) sql insert into $tb values (now+4a, 3) sql insert into $tb values (now+5a, 4) + + +return + sql select * from $tb if $rows != 5 then return -1 @@ -176,7 +161,8 @@ if $rows != 0 then endi print =============== step16 -sql create table $tb (ts timestamp, speed int) +sql create table st (ts timestamp, i int) tags (j int) +sql create table $tb using st tags(1) sql show tables if $rows != 1 then return -1 From f94f8aadc0b00e15a0869eb7291ae53b8bec8d49 Mon Sep 17 00:00:00 2001 From: Shengliang Guan Date: Tue, 11 Jan 2022 03:13:26 -0800 Subject: [PATCH 2/4] dnode may offline while create vnode --- source/dnode/mgmt/impl/inc/dndInt.h | 1 + source/dnode/mgmt/impl/src/dndMgmt.c | 13 ++++++++++++- source/dnode/mnode/impl/src/mndDnode.c | 2 +- 3 files changed, 14 insertions(+), 2 deletions(-) diff --git a/source/dnode/mgmt/impl/inc/dndInt.h b/source/dnode/mgmt/impl/inc/dndInt.h index afdd678213..e637b38815 100644 --- a/source/dnode/mgmt/impl/inc/dndInt.h +++ b/source/dnode/mgmt/impl/inc/dndInt.h @@ -94,6 +94,7 @@ typedef struct { pthread_t *threadId; SRWLatch latch; SDnodeWorker mgmtWorker; + SDnodeWorker statusWorker; } SDnodeMgmt; typedef struct { diff --git a/source/dnode/mgmt/impl/src/dndMgmt.c b/source/dnode/mgmt/impl/src/dndMgmt.c index f252bffbbf..d9edf39b73 100644 --- a/source/dnode/mgmt/impl/src/dndMgmt.c +++ b/source/dnode/mgmt/impl/src/dndMgmt.c @@ -536,6 +536,11 @@ int32_t dndInitMgmt(SDnode *pDnode) { return -1; } + if (dndInitWorker(pDnode, &pMgmt->statusWorker, DND_WORKER_SINGLE, "dnode-status", 1, 1, dndProcessMgmtQueue) != 0) { + dError("failed to start dnode mgmt worker since %s", terrstr()); + return -1; + } + pMgmt->threadId = taosCreateThread(dnodeThreadRoutine, pDnode); if (pMgmt->threadId == NULL) { dError("failed to init dnode thread"); @@ -550,6 +555,7 @@ int32_t dndInitMgmt(SDnode *pDnode) { void dndStopMgmt(SDnode *pDnode) { SDnodeMgmt *pMgmt = &pDnode->dmgmt; dndCleanupWorker(&pMgmt->mgmtWorker); + dndCleanupWorker(&pMgmt->statusWorker); if (pMgmt->threadId != NULL) { taosDestoryThread(pMgmt->threadId); @@ -587,7 +593,12 @@ void dndProcessMgmtMsg(SDnode *pDnode, SRpcMsg *pMsg, SEpSet *pEpSet) { dndUpdateMnodeEpSet(pDnode, pEpSet); } - if (dndWriteMsgToWorker(&pMgmt->mgmtWorker, pMsg, sizeof(SRpcMsg)) != 0) { + SDnodeWorker *pWorker = &pMgmt->mgmtWorker; + if (pMsg->msgType == TDMT_MND_STATUS_RSP) { + pWorker = &pMgmt->statusWorker; + } + + if (dndWriteMsgToWorker(pWorker, pMsg, sizeof(SRpcMsg)) != 0) { if (pMsg->msgType & 1u) { SRpcMsg rsp = {.handle = pMsg->handle, .code = TSDB_CODE_OUT_OF_MEMORY}; rpcSendResponse(&rsp); diff --git a/source/dnode/mnode/impl/src/mndDnode.c b/source/dnode/mnode/impl/src/mndDnode.c index 2a67e6a0f9..4bc570c11d 100644 --- a/source/dnode/mnode/impl/src/mndDnode.c +++ b/source/dnode/mnode/impl/src/mndDnode.c @@ -235,7 +235,7 @@ int32_t mndGetDnodeSize(SMnode *pMnode) { bool mndIsDnodeOnline(SMnode *pMnode, SDnodeObj *pDnode, int64_t curMs) { int64_t interval = ABS(pDnode->lastAccessTime - curMs); - if (interval > 10000 * pMnode->cfg.statusInterval) { + if (interval > 3500 * pMnode->cfg.statusInterval) { if (pDnode->rebootTime > 0) { pDnode->offlineReason = DND_REASON_STATUS_MSG_TIMEOUT; } From 07a1b4acf21d25321819781d1ec9a2214d98e175 Mon Sep 17 00:00:00 2001 From: Shengliang Guan Date: Tue, 11 Jan 2022 03:41:38 -0800 Subject: [PATCH 3/4] minor changes --- source/dnode/mnode/impl/test/profile/profile.cpp | 10 +++++----- tests/script/jenkins/basic.txt | 5 +++-- tests/script/sim/db/basic6.sim | 4 ++++ 3 files changed, 12 insertions(+), 7 deletions(-) diff --git a/source/dnode/mnode/impl/test/profile/profile.cpp b/source/dnode/mnode/impl/test/profile/profile.cpp index bdffb6c72a..bf047517d3 100644 --- a/source/dnode/mnode/impl/test/profile/profile.cpp +++ b/source/dnode/mnode/impl/test/profile/profile.cpp @@ -13,7 +13,7 @@ class MndTestProfile : public ::testing::Test { protected: - static void SetUpTestSuite() { test.Init("/tmp/mnode_test_profile", 9022); } + static void SetUpTestSuite() { test.Init("/tmp/mnode_test_profile", 9031); } static void TearDownTestSuite() { test.Cleanup(); } static Testbase test; @@ -53,7 +53,7 @@ TEST_F(MndTestProfile, 01_ConnectMsg) { EXPECT_EQ(pRsp->epSet.inUse, 0); EXPECT_EQ(pRsp->epSet.numOfEps, 1); - EXPECT_EQ(pRsp->epSet.port[0], 9022); + EXPECT_EQ(pRsp->epSet.port[0], 9031); EXPECT_STREQ(pRsp->epSet.fqdn[0], "localhost"); connId = pRsp->connId; @@ -127,7 +127,7 @@ TEST_F(MndTestProfile, 04_HeartBeatMsg) { EXPECT_EQ(pRsp->epSet.inUse, 0); EXPECT_EQ(pRsp->epSet.numOfEps, 1); - EXPECT_EQ(pRsp->epSet.port[0], 9022); + EXPECT_EQ(pRsp->epSet.port[0], 9031); EXPECT_STREQ(pRsp->epSet.fqdn[0], "localhost"); } @@ -185,7 +185,7 @@ TEST_F(MndTestProfile, 05_KillConnMsg) { EXPECT_EQ(pRsp->epSet.inUse, 0); EXPECT_EQ(pRsp->epSet.numOfEps, 1); - EXPECT_EQ(pRsp->epSet.port[0], 9022); + EXPECT_EQ(pRsp->epSet.port[0], 9031); EXPECT_STREQ(pRsp->epSet.fqdn[0], "localhost"); connId = pRsp->connId; @@ -249,7 +249,7 @@ TEST_F(MndTestProfile, 07_KillQueryMsg) { EXPECT_EQ(pRsp->epSet.inUse, 0); EXPECT_EQ(pRsp->epSet.numOfEps, 1); - EXPECT_EQ(pRsp->epSet.port[0], 9022); + EXPECT_EQ(pRsp->epSet.port[0], 9031); EXPECT_STREQ(pRsp->epSet.fqdn[0], "localhost"); } } diff --git a/tests/script/jenkins/basic.txt b/tests/script/jenkins/basic.txt index bc0c3a4f64..a84e473d9f 100644 --- a/tests/script/jenkins/basic.txt +++ b/tests/script/jenkins/basic.txt @@ -5,8 +5,9 @@ ./test.sh -f sim/user/basic1.sim # ---- db -./test.sh -f sim/db/basic1.sim -./test.sh -f sim/db/error1.sim +./test.sh -f sim/db/basic1.sim +./test.sh -f sim/db/error6.sim +./test.sh -f sim/db/error1.sim # ---- table ./test.sh -f sim/table/basic1.sim diff --git a/tests/script/sim/db/basic6.sim b/tests/script/sim/db/basic6.sim index 8e7d4f5ec1..a688b4c2f3 100644 --- a/tests/script/sim/db/basic6.sim +++ b/tests/script/sim/db/basic6.sim @@ -93,6 +93,10 @@ sql create database $db sql use $db sql create table st (ts timestamp, i int) tags (j int) sql create table $tb using st tags(1) + +return +system sh/exec.sh -n dnode1 -s stop -x SIGINT + sql show tables if $rows != 1 then return -1 From 530ffb6cbc6fc4b14a07c96388bc6e7fcf22c1fc Mon Sep 17 00:00:00 2001 From: Shengliang Guan Date: Tue, 11 Jan 2022 03:46:05 -0800 Subject: [PATCH 4/4] minor changes --- tests/script/jenkins/basic.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/script/jenkins/basic.txt b/tests/script/jenkins/basic.txt index a84e473d9f..bb6569deb0 100644 --- a/tests/script/jenkins/basic.txt +++ b/tests/script/jenkins/basic.txt @@ -6,7 +6,7 @@ # ---- db ./test.sh -f sim/db/basic1.sim -./test.sh -f sim/db/error6.sim +./test.sh -f sim/db/basic6.sim ./test.sh -f sim/db/error1.sim # ---- table