From 3d56707049707a4d67880b09de98c11f09216a31 Mon Sep 17 00:00:00 2001 From: Bomin Zhang Date: Tue, 16 Jun 2020 18:19:03 +0800 Subject: [PATCH 01/10] fix td-642 --- src/client/src/tscSQLParser.c | 9 ++++-- src/tsdb/src/tsdbMeta.c | 4 +++ src/vnode/src/vnodeMain.c | 10 +++--- tests/script/general/parser/stream.sim | 44 ++++++++++---------------- 4 files changed, 31 insertions(+), 36 deletions(-) diff --git a/src/client/src/tscSQLParser.c b/src/client/src/tscSQLParser.c index 64c38f011c..db12693c29 100644 --- a/src/client/src/tscSQLParser.c +++ b/src/client/src/tscSQLParser.c @@ -6082,11 +6082,14 @@ int32_t exprTreeFromSqlExpr(tExprNode **pExpr, const tSQLExpr* pSqlExpr, SArray* } } + if ((*pExpr)->_node.optr != TSDB_RELATION_EQUAL && (*pExpr)->_node.optr != TSDB_RELATION_NOT_EQUAL) { if (pRight->nodeType == TSQL_NODE_VALUE) { - if ( pRight->pVal->nType == TSDB_DATA_TYPE_BOOL - || pRight->pVal->nType == TSDB_DATA_TYPE_BINARY - || pRight->pVal->nType == TSDB_DATA_TYPE_NCHAR) { + if (pRight->pVal->nType == TSDB_DATA_TYPE_BOOL) { + return TSDB_CODE_TSC_INVALID_SQL; + } + if ((pRight->pVal->nType == TSDB_DATA_TYPE_BINARY || pRight->pVal->nType == TSDB_DATA_TYPE_NCHAR) + && (*pExpr)->_node.optr != TSDB_RELATION_LIKE) { return TSDB_CODE_TSC_INVALID_SQL; } } diff --git a/src/tsdb/src/tsdbMeta.c b/src/tsdb/src/tsdbMeta.c index 7795f25b71..1f31f18ce1 100644 --- a/src/tsdb/src/tsdbMeta.c +++ b/src/tsdb/src/tsdbMeta.c @@ -600,6 +600,10 @@ int tsdbDropTable(TsdbRepoT *repo, STableId tableId) { return -1; } + if (pTable->cqhandle != NULL) { + pRepo->appH.cqDropFunc(pTable->cqhandle); + } + tsdbTrace("vgId:%d, table %s is dropped! tid:%d, uid:%" PRId64, pRepo->config.tsdbId, varDataVal(pTable->name), tableId.tid, tableId.uid); if (tsdbRemoveTableFromMeta(pMeta, pTable, true) < 0) return -1; diff --git a/src/vnode/src/vnodeMain.c b/src/vnode/src/vnodeMain.c index 0882ee983d..fef0b071a6 100644 --- a/src/vnode/src/vnodeMain.c +++ b/src/vnode/src/vnodeMain.c @@ -325,6 +325,11 @@ void vnodeRelease(void *pVnodeRaw) { tsdbCloseRepo(pVnode->tsdb, 1); pVnode->tsdb = NULL; + // stop continuous query + if (pVnode->cq) + cqClose(pVnode->cq); + pVnode->cq = NULL; + if (pVnode->wal) walClose(pVnode->wal); pVnode->wal = NULL; @@ -436,11 +441,6 @@ static void vnodeCleanUp(SVnodeObj *pVnode) { pVnode->sync = NULL; } - // stop continuous query - if (pVnode->cq) - cqClose(pVnode->cq); - pVnode->cq = NULL; - // release local resources only after cutting off outside connections vnodeRelease(pVnode); } diff --git a/tests/script/general/parser/stream.sim b/tests/script/general/parser/stream.sim index 6c8d8f93ea..fca5f37d74 100644 --- a/tests/script/general/parser/stream.sim +++ b/tests/script/general/parser/stream.sim @@ -51,19 +51,20 @@ sql drop table strm ## [TBASE304] print ====== TBASE-304 sleep 10000 -print create mt -sql create table mt (ts timestamp, c1 int) tags(t1 int, t2 int) +# we cannot reset query cache in server side, as a workaround, +# set super table name to mt304, need to change back to mt later +print create mt304 +sql create table mt304 (ts timestamp, c1 int) tags(t1 int, t2 int) print create tb1 -sql create table tb1 using mt tags(1, 1) +sql create table tb1 using mt304 tags(1, 1) print create tb2 -sql create table tb2 using mt tags(1, -1) +sql create table tb2 using mt304 tags(1, -1) print create strm -sql create table strm as select count(*), avg(c1) from mt where t2 >= 0 interval(4s) sliding(2s) +sql create table strm as select count(*), avg(c1) from mt304 where t2 >= 0 interval(4s) sliding(2s) sql insert into tb1 values (now,1) sql insert into tb2 values (now,2) -sleep 20000 +sleep 100000 sql select * from strm; -sleep 1000 if $rows != 2 then print ==== expect rows = 2, actually returned rows = $rows return -1 @@ -75,11 +76,11 @@ print data02 = $data02 if $data02 != 1.000000000 then return -1 endi -sql alter table mt drop tag t2; +sql alter table mt304 drop tag t2; sql insert into tb2 values (now,2); sql insert into tb1 values (now,1); sql select * from strm; -sql alter table mt add tag t2 int; +sql alter table mt304 add tag t2 int; sleep 10000 sql select * from strm @@ -98,7 +99,7 @@ sleep 4000 sql insert into tb2 values (now, 2, 'tb2') sleep 4000 sql insert into tb3 values (now, 0, 'tb3') -sleep 6000 +sleep 60000 sql describe strm if $rows == 0 then @@ -134,11 +135,9 @@ endi ## The vnode client needs to refresh metadata cache to allow strm calculate tb4's data. But the current refreshing frequency is every 10 min ## commented out the case below to save running time sql create table tb4 using stb tags('a4') -sleep 6000 sql insert into tb4 values(now, 4, 'tb4') -sleep 10000 +sleep 60000 sql select * from strm order by ts desc -sleep 1000 print ======== data0: $data00 $data01 $data02 $data03 #print ======== data1: $data10 $data11 $data12 $data13 #print ======== data2: $data20 $data21 $data22 $data23 @@ -160,7 +159,7 @@ sleep 3000 # waiting for new tag valid sql insert into tb1 values (now, 1, 'tb1_a1') sleep 4000 sql insert into tb4 values (now, -4, 'tb4_b4') -sleep 10000 +sleep 100000 sql select * from strm order by ts desc sleep 1000 print ======== data0: $data00 $data01 $data02 $data03 @@ -191,9 +190,9 @@ sql create table tb3 using stb tags(3, 'tb3') sql create table tb4 using stb tags(4, 'tb4') sql create table strm0 as select count(ts), count(c1), max(c2), min(c4), first(c5), last(c6) from stb where ts < now + 30s interval(4s) sliding(2s) -sleep 10000 +sleep 1000 sql insert into tb0 values (now, 0, 0, 0, 0, 'binary0', '涛思0', true) tb1 values (now, 1, 1, 1, 1, 'binary1', '涛思1', false) tb2 values (now, 2, 2, 2, 2, 'binary2', '涛思2', true) tb3 values (now, 3, 3, 3, 3, 'binary3', '涛思3', false) tb4 values (now, 4, 4, 4, 4, 'binary4', '涛思4', true) -sleep 5000 +sleep 20000 sql select * from strm0 order by ts desc sleep 1000 if $rows != 2 then @@ -202,7 +201,7 @@ if $rows != 2 then endi sql insert into tb0 values (now, 10, 10, 10, 10, 'binary0', '涛思0', true) tb1 values (now, 11, 11, 11, 11, 'binary1', '涛思1', false) tb2 values (now, 12, 12, 12, 12, 'binary2', '涛思2', true) tb3 values (now, 13, 13, 13, 13, 'binary3', '涛思3', false) tb4 values (now, 14, 14, 14, 14, 'binary4', '涛思4', true) -sleep 5000 +sleep 30000 sql select * from strm0 order by ts desc sleep 1000 if $rows != 4 then @@ -210,15 +209,4 @@ if $rows != 4 then return -1 endi -sql drop database if exists strm_db_0 -sql show databases -if $rows != 0 then - return -1 -endi - -sql create database $db -sql use $db -sql create table stb (ts timestamp, c1 int) tags(t1 int) -sql create table tb1 using stb tags(1) - system sh/exec.sh -n dnode1 -s stop -x SIGINT From f75af3364e8a70e6230cc6def247a2e7980bcb1a Mon Sep 17 00:00:00 2001 From: Hui Li Date: Tue, 16 Jun 2020 18:27:49 +0800 Subject: [PATCH 02/10] [TD-421] --- src/mnode/src/mnodeProfile.c | 26 ++++++++++---------------- 1 file changed, 10 insertions(+), 16 deletions(-) diff --git a/src/mnode/src/mnodeProfile.c b/src/mnode/src/mnodeProfile.c index 7552ea5c25..31ca47929c 100644 --- a/src/mnode/src/mnodeProfile.c +++ b/src/mnode/src/mnodeProfile.c @@ -235,14 +235,12 @@ static int32_t mnodeRetrieveConns(SShowObj *pShow, char *data, int32_t rows, voi cols++; pWrite = data + pShow->offset[cols] * rows + pShow->bytes[cols] * numOfRows; - size_t size = sizeof(pConnObj->user); - STR_WITH_MAXSIZE_TO_VARSTR(pWrite, pConnObj->user, size); + STR_WITH_MAXSIZE_TO_VARSTR(pWrite, pConnObj->user, pShow->bytes[cols]); cols++; pWrite = data + pShow->offset[cols] * rows + pShow->bytes[cols] * numOfRows; snprintf(ipStr, sizeof(ipStr), "%s:%u", taosIpStr(pConnObj->ip), pConnObj->port); - size = sizeof(ipStr); - STR_WITH_MAXSIZE_TO_VARSTR(pWrite, ipStr, size); + STR_WITH_MAXSIZE_TO_VARSTR(pWrite, ipStr, pShow->bytes[cols]); cols++; pWrite = data + pShow->offset[cols] * rows + pShow->bytes[cols] * numOfRows; @@ -356,18 +354,16 @@ static int32_t mnodeRetrieveQueries(SShowObj *pShow, char *data, int32_t rows, v snprintf(ipStr, QUERY_ID_SIZE + 1, "%u:%u", pConnObj->connId, htonl(pDesc->queryId)); pWrite = data + pShow->offset[cols] * rows + pShow->bytes[cols] * numOfRows; - STR_WITH_MAXSIZE_TO_VARSTR(pWrite, ipStr, QUERY_ID_SIZE); + STR_WITH_MAXSIZE_TO_VARSTR(pWrite, ipStr, pShow->bytes[cols]); cols++; pWrite = data + pShow->offset[cols] * rows + pShow->bytes[cols] * numOfRows; - size_t size = sizeof(pConnObj->user); - STR_WITH_MAXSIZE_TO_VARSTR(pWrite, pConnObj->user, size); + STR_WITH_MAXSIZE_TO_VARSTR(pWrite, pConnObj->user, pShow->bytes[cols]); cols++; pWrite = data + pShow->offset[cols] * rows + pShow->bytes[cols] * numOfRows; snprintf(ipStr, sizeof(ipStr), "%s:%u", taosIpStr(pConnObj->ip), pConnObj->port); - size = sizeof(ipStr); - STR_WITH_MAXSIZE_TO_VARSTR(pWrite, ipStr, size); + STR_WITH_MAXSIZE_TO_VARSTR(pWrite, ipStr, pShow->bytes[cols]); cols++; pWrite = data + pShow->offset[cols] * rows + pShow->bytes[cols] * numOfRows; @@ -379,7 +375,7 @@ static int32_t mnodeRetrieveQueries(SShowObj *pShow, char *data, int32_t rows, v cols++; pWrite = data + pShow->offset[cols] * rows + pShow->bytes[cols] * numOfRows; - STR_WITH_MAXSIZE_TO_VARSTR(pWrite, pDesc->sql, TSDB_SHOW_SQL_LEN); + STR_WITH_MAXSIZE_TO_VARSTR(pWrite, pDesc->sql, pShow->bytes[cols]); cols++; numOfRows++; @@ -479,18 +475,16 @@ static int32_t mnodeRetrieveStreams(SShowObj *pShow, char *data, int32_t rows, v snprintf(ipStr, QUERY_ID_SIZE + 1, "%u:%u", pConnObj->connId, htonl(pDesc->streamId)); pWrite = data + pShow->offset[cols] * rows + pShow->bytes[cols] * numOfRows; - STR_WITH_MAXSIZE_TO_VARSTR(pWrite, ipStr, QUERY_ID_SIZE); + STR_WITH_MAXSIZE_TO_VARSTR(pWrite, ipStr, pShow->bytes[cols]); cols++; pWrite = data + pShow->offset[cols] * rows + pShow->bytes[cols] * numOfRows; - size_t size = sizeof(pConnObj->user); - STR_WITH_MAXSIZE_TO_VARSTR(pWrite, pConnObj->user, size); + STR_WITH_MAXSIZE_TO_VARSTR(pWrite, pConnObj->user, pShow->bytes[cols]); cols++; pWrite = data + pShow->offset[cols] * rows + pShow->bytes[cols] * numOfRows; snprintf(ipStr, sizeof(ipStr), "%s:%u", taosIpStr(pConnObj->ip), pConnObj->port); - size = sizeof(ipStr); - STR_WITH_MAXSIZE_TO_VARSTR(pWrite, ipStr, size); + STR_WITH_MAXSIZE_TO_VARSTR(pWrite, ipStr, pShow->bytes[cols]); cols++; pWrite = data + pShow->offset[cols] * rows + pShow->bytes[cols] * numOfRows; @@ -506,7 +500,7 @@ static int32_t mnodeRetrieveStreams(SShowObj *pShow, char *data, int32_t rows, v cols++; pWrite = data + pShow->offset[cols] * rows + pShow->bytes[cols] * numOfRows; - STR_WITH_MAXSIZE_TO_VARSTR(pWrite, pDesc->sql, TSDB_SHOW_SQL_LEN); + STR_WITH_MAXSIZE_TO_VARSTR(pWrite, pDesc->sql, pShow->bytes[cols]); cols++; pWrite = data + pShow->offset[cols] * rows + pShow->bytes[cols] * numOfRows; From 646b5e53a7aba1d1e861c614b67040ab7939d5a1 Mon Sep 17 00:00:00 2001 From: Jeff Tao Date: Tue, 16 Jun 2020 11:03:23 +0000 Subject: [PATCH 03/10] remove race condition in retry timer --- src/rpc/src/rpcMain.c | 41 ++++++++++++++++++++++------------------- 1 file changed, 22 insertions(+), 19 deletions(-) diff --git a/src/rpc/src/rpcMain.c b/src/rpc/src/rpcMain.c index 9bb63b751a..f812d95188 100644 --- a/src/rpc/src/rpcMain.c +++ b/src/rpc/src/rpcMain.c @@ -555,18 +555,10 @@ static SRpcConn *rpcOpenConn(SRpcInfo *pRpc, char *peerFqdn, uint16_t peerPort, return pConn; } -static void rpcCloseConn(void *thandle) { - SRpcConn *pConn = (SRpcConn *)thandle; +static void rpcReleaseConn(SRpcConn *pConn) { SRpcInfo *pRpc = pConn->pRpc; if (pConn->user[0] == 0) return; - rpcLockConn(pConn); - - if (pConn->user[0] == 0) { - rpcUnlockConn(pConn); - return; - } - pConn->user[0] = 0; if (taosCloseConn[pConn->connType]) (*taosCloseConn[pConn->connType])(pConn->chandle); @@ -591,7 +583,21 @@ static void rpcCloseConn(void *thandle) { taosFreeId(pRpc->idPool, pConn->sid); pConn->pContext = NULL; - tTrace("%s, rpc connection is closed", pConn->info); + tTrace("%s, rpc connection is released", pConn->info); +} + +static void rpcCloseConn(void *thandle) { + SRpcConn *pConn = (SRpcConn *)thandle; + if (pConn->user[0] == 0) return; + + rpcLockConn(pConn); + + if (pConn->user[0] == 0) { + rpcUnlockConn(pConn); + return; + } + + rpcReleaseConn(pConn); rpcUnlockConn(pConn); } @@ -911,8 +917,8 @@ static void rpcProcessBrokenLink(SRpcConn *pConn) { if (pConn->inType) rpcReportBrokenLinkToServer(pConn); + rpcReleaseConn(pConn); rpcUnlockConn(pConn); - rpcCloseConn(pConn); } static void *rpcProcessMsgFromPeer(SRecvInfo *pRecv) { @@ -1217,7 +1223,6 @@ static void rpcProcessConnError(void *param, void *id) { static void rpcProcessRetryTimer(void *param, void *tmrId) { SRpcConn *pConn = (SRpcConn *)param; SRpcInfo *pRpc = pConn->pRpc; - int reportDisc = 0; rpcLockConn(pConn); @@ -1233,19 +1238,17 @@ static void rpcProcessRetryTimer(void *param, void *tmrId) { } else { // close the connection tTrace("%s, failed to send msg:%s to %s:%hu", pConn->info, taosMsg[pConn->outType], pConn->peerFqdn, pConn->peerPort); - reportDisc = 1; + if (pConn->pContext) { + pConn->pContext->code = TSDB_CODE_RPC_NETWORK_UNAVAIL; + rpcProcessConnError(pConn->pContext, NULL); + rpcReleaseConn(pConn); + } } } else { tTrace("%s, retry timer not processed", pConn->info); } rpcUnlockConn(pConn); - - if (reportDisc && pConn->pContext) { - pConn->pContext->code = TSDB_CODE_RPC_NETWORK_UNAVAIL; - rpcProcessConnError(pConn->pContext, NULL); - rpcCloseConn(pConn); - } } static void rpcProcessIdleTimer(void *param, void *tmrId) { From c96e3b7ccc5b125716aea72b898edaee600c367d Mon Sep 17 00:00:00 2001 From: Hui Li Date: Tue, 16 Jun 2020 19:20:06 +0800 Subject: [PATCH 04/10] [modify] --- .../dn3_mn1_vnode_corruptFile_offline.sim | 157 +++++++++++++++++- 1 file changed, 152 insertions(+), 5 deletions(-) diff --git a/tests/script/unique/arbitrator/dn3_mn1_vnode_corruptFile_offline.sim b/tests/script/unique/arbitrator/dn3_mn1_vnode_corruptFile_offline.sim index 6062b07510..a7529d5bbc 100644 --- a/tests/script/unique/arbitrator/dn3_mn1_vnode_corruptFile_offline.sim +++ b/tests/script/unique/arbitrator/dn3_mn1_vnode_corruptFile_offline.sim @@ -93,7 +93,7 @@ if $data00 != $totalRows then return -1 endi -print ============== step3: stop dnode3, then corrupt vnode data file in dnode3 +print ============== step3: stop dnode3 for falling disc, then corrupt vnode data file in dnode3 system sh/exec.sh -n dnode3 -s stop -x SIGINT sleep $sleepTimer wait_dnode3_offline_0: @@ -141,13 +141,26 @@ if $dnode2Vtatus != master then goto wait_dnode3_vgroup_offline endi -# del the second row -system sed '2d' ../../../sim/dnode3/data/vnode/vnode2/tsdb/data/v1849.data -sleep 1000 +#system_content ls ../../../sim/dnode3/data/vnode/vnode2/tsdb/data/ -l | grep "^-" | wc -l | sed 's/^[ \t]*//g' +#system_content ls ../../../sim/dnode3/data/vnode/vnode2/tsdb/data/ -l | grep "^-" | wc -l | sed 's/[ \t]*$//g' +#print --2-->dnode3 data files: [ $system_content ] + +system_content ls ../../../sim/dnode2/data/vnode/vnode2/tsdb/data/ -l | grep "^-" | wc -l +print ---->dnode2 data files: [ $system_content ], expect is 0 + +system_content ls ../../../sim/dnode3/data/vnode/vnode2/tsdb/data/ -l | grep "^-" | wc -l +print ---->dnode3 data files: [ $system_content ], expect is 3 +#if $system_content != 3 then +# return -1 +#endi + +#system echo "haha, nothing......" > ../../../sim/dnode3/data/vnode/vnode2/tsdb/data/f1643.data +#sleep 1000 print ============== step4: restart dnode3, and run query system sh/exec.sh -n dnode3 -s start sleep $sleepTimer + wait_dnode3_reready: sql show dnodes if $rows != 3 then @@ -171,12 +184,49 @@ if $dnode3Status != ready then goto wait_dnode3_reready endi +wait_dnode3_vgroup_slave: +sql show vgroups +if $rows != 1 then + sleep 2000 + goto wait_dnode3_vgroup_slave +endi +print show vgroups: +print $data0_1 $data1_1 $data2_1 $data3_1 $data4_1 $data5_1 $data6_1 $data7_1 $data8_1 $data9_1 +print $data0_2 $data1_2 $data2_2 $data3_2 $data4_2 $data5_2 $data6_2 $data7_2 $data8_2 $data9_2 +print $data0_3 $data1_3 $data2_3 $data3_3 $data4_3 $data5_3 $data6_3 $data7_3 $data8_3 $data9_3 +$dnode2Vtatus = $data7_2 +$dnode3Vtatus = $data4_2 + +if $dnode2Vtatus != master then + sleep 2000 + goto wait_dnode3_vgroup_slave +endi +if $dnode3Vtatus != slave then + sleep 2000 + goto wait_dnode3_vgroup_slave +endi + sql select count(*) from $stb print data00 $data00 if $data00 != $totalRows then return -1 endi +system_content ls ../../../sim/dnode2/data/vnode/vnode2/tsdb/data/ -l |grep "^-"|wc -l +print ----> dnode2 data files: [ $system_content ], expect is 0 +#if $system_content != 0 then +# return -1 +#endi + +system_content ls ../../../sim/dnode3/data/vnode/vnode2/tsdb/data/ -l |grep "^-"|wc -l +print ----> dnode3 data files: [ $system_content ], expect is 0 +#if $system_content != 0 then +# print there should be no data file in dnode3 after sync +# return -1 +#endi + +return -1 + print ============== step5: stop dnode2, and check if dnode3 sync ok system sh/exec.sh -n dnode2 -s stop -x SIGINT sleep $sleepTimer @@ -229,4 +279,101 @@ sql select count(*) from $stb print data00 $data00 if $data00 != $totalRows then return -1 -endi \ No newline at end of file +endi + +print ============== step6: stop dnode3 for falling disck +system sh/exec.sh -n dnode3 -s stop -x SIGINT +sleep $sleepTimer +sql show dnodes +print $data0_1 $data1_1 $data2_1 $data3_1 $data4_1 +print $data0_2 $data1_2 $data2_2 $data3_2 $data4_2 +print $data0_3 $data1_3 $data2_3 $data3_3 $data4_3 + +sql show vgroups +print show vgroups: +print $data0_1 $data1_1 $data2_1 $data3_1 $data4_1 $data5_1 $data6_1 $data7_1 $data8_1 $data9_1 +print $data0_2 $data1_2 $data2_2 $data3_2 $data4_2 $data5_2 $data6_2 $data7_2 $data8_2 $data9_2 +print $data0_3 $data1_3 $data2_3 $data3_3 $data4_3 $data5_3 $data6_3 $data7_3 $data8_3 $data9_3 + + +print ============== step7: restart dnode3, and run query +system sh/exec.sh -n dnode3 -s start +sleep $sleepTimer +$loopCnt = 0 +wait_dnode3_reready_2: +$loopCnt = $loopCnt + 1 +if $loopCnt == 10 then + return -1 +endi +sql show dnodes +if $rows != 3 then + sleep 2000 + goto wait_dnode3_reready_2 +endi +print $data0_1 $data1_1 $data2_1 $data3_1 $data4_1 +print $data0_2 $data1_2 $data2_2 $data3_2 $data4_2 +print $data0_3 $data1_3 $data2_3 $data3_3 $data4_3 +$dnode1Status = $data4_1 +$dnode2Status = $data4_2 +$dnode3Status = $data4_3 + +if $dnode3Status != ready then + sleep 2000 + goto wait_dnode3_reready_2 +endi + +$loopCnt = 0 +wait_dnode3_vgroup_master_2: +$loopCnt = $loopCnt + 1 +if $loopCnt == 10 then + return -1 +endi +sql show vgroups +if $rows != 1 then + sleep 2000 + goto wait_dnode3_vgroup_master_2 +endi +print show vgroups: +print $data0_1 $data1_1 $data2_1 $data3_1 $data4_1 $data5_1 $data6_1 $data7_1 $data8_1 $data9_1 +print $data0_2 $data1_2 $data2_2 $data3_2 $data4_2 $data5_2 $data6_2 $data7_2 $data8_2 $data9_2 +print $data0_3 $data1_3 $data2_3 $data3_3 $data4_3 $data5_3 $data6_3 $data7_3 $data8_3 $data9_3 +$dnode2Vtatus = $data7_2 +$dnode3Vtatus = $data4_2 + +if $dnode2Vtatus != offline then + sleep 2000 + goto wait_dnode3_vgroup_master_2 +endi +if $dnode3Vtatus != master then + sleep 2000 + goto wait_dnode3_vgroup_master_2 +endi + +sql select count(*) from $stb +print data00 $data00 +if $data00 != $totalRows then + return -1 +endi + + + + + + + + + + + + + + + + + + + + + + + From 2ab6298003e6d0904126584bf19f75eec53baf4b Mon Sep 17 00:00:00 2001 From: Hui Li Date: Tue, 16 Jun 2020 19:34:19 +0800 Subject: [PATCH 05/10] [modify] --- .../arbitrator/dn2_mn1_cache_file_sync.sim | 77 +++++++++++++++---- 1 file changed, 63 insertions(+), 14 deletions(-) diff --git a/tests/script/unique/arbitrator/dn2_mn1_cache_file_sync.sim b/tests/script/unique/arbitrator/dn2_mn1_cache_file_sync.sim index cfd992db3a..2620746d38 100644 --- a/tests/script/unique/arbitrator/dn2_mn1_cache_file_sync.sim +++ b/tests/script/unique/arbitrator/dn2_mn1_cache_file_sync.sim @@ -1,11 +1,11 @@ # Test case describe: dnode1 is only mnode, dnode2/dnode3 are only vnode # step 1: start dnode1 # step 2: start dnode2 and dnode3, and all added into cluster (Suppose dnode2 is master-vnode) -# step 3: create db, table, insert data, and Falling disc into file (control only one file, e.g. 1841) -# step 4: insert old data(now-20d) and new data(now-40d), control data rows in order to save in cache, not falling disc -# step 5: stop dnode2, so date rows falling disc, generate two new files 1840, 1842 in dnode2 -# step 6: insert two data rows: now-21d, now-41d -# step 7: restart dnode2, waiting sync end +# step 2: create db, table, insert data, and Falling disc into file (control only one file, e.g. 1841) +# step 3: insert old data(now-20d) and new data(now-40d), control data rows in order to save in cache, not falling disc +# step 4: stop dnode2, so date rows falling disc, generate two new files 1840, 1842 in dnode2 +# step 5: insert two data rows: now-21d, now-41d +# step 6: restart dnode2, waiting sync end # expect: in dnode2, the files 1837 and 1839 will be removed system sh/stop_dnodes.sh @@ -113,15 +113,15 @@ sql insert into $tb values ( now - 20d , -20 ) sql insert into $tb values ( now - 40d , -40 ) $totalRows = $totalRows + 2 -print ============== step4: stop dnode2, so date rows falling disc, generate two new files in dnode2 -system sh/exec.sh -n dnode2 -s stop -x SIGINT +print ============== step4: stop dnode3, so date rows falling disc, generate two new files in dnode3 +system sh/exec.sh -n dnode3 -s stop -x SIGINT sleep $sleepTimer -wait_dnode2_offline: +wait_dnode3_offline: sql show dnodes if $rows != 3 then sleep 2000 - goto wait_dnode2_offline + goto wait_dnode3_offline endi print $data0_1 $data1_1 $data2_1 $data3_1 $data4_1 print $data0_2 $data1_2 $data2_2 $data3_2 $data4_2 @@ -135,13 +135,13 @@ $dnode3Status = $data4_3 #$dnode4Status = $data4_4 #$dnode5Status = $data4_5 -if $dnode2Status != offline then +if $dnode3Status != offline then sleep 2000 - goto wait_dnode2_offline + goto wait_dnode3_offline endi -if $dnode3Status != ready then +if $dnode2Status != ready then sleep 2000 - goto wait_dnode2_offline + goto wait_dnode3_offline endi sleep $sleepTimer # waitting for move master vnode of dnode2 to dnode3 @@ -152,6 +152,12 @@ if $data00 != $totalRows then return -1 endi +system_content ls ../../../sim/dnode3/data/vnode/vnode2/tsdb/data/ -l |grep "^-"|wc -l +print ---->dnode3 data files: $system_content , expect is 9 +#if $system_content != @9@ then +# return -1 +#endi + print ============== step5: insert two data rows: now-16d, now+16d, sql insert into $tb values ( now - 21d , -21 ) sql insert into $tb values ( now - 41d , -41 ) @@ -163,4 +169,47 @@ if $data00 != $totalRows then return -1 endi -print ============== step6: please check there should be 3 file in sim/dnode2/data/vnode/vnode2/tsdb/data/, and 1 file sim/dnode3/data/vnode/vnode2/tsdb/data/ +system_content ls ../../../sim/dnode2/data/vnode/vnode2/tsdb/data/ -l |grep "^-"|wc -l +print ---->dnode2 data files: $system_content , expect is 3 +#if $system_content != @3@ then +# return -1 +#endi + +print ============== step7: restart dnode3, waiting sync end +system sh/exec.sh -n dnode3 -s start +sleep 3000 + +wait_dnode3_ready: +sql show dnodes +if $rows != 3 then + sleep 2000 + goto wait_dnode3_ready +endi +print $data0_1 $data1_1 $data2_1 $data3_1 $data4_1 +print $data0_2 $data1_2 $data2_2 $data3_2 $data4_2 +print $data0_3 $data1_3 $data2_3 $data3_3 $data4_3 +$dnode1Status = $data4_1 +$dnode2Status = $data4_2 +$dnode3Status = $data4_3 + +if $dnode3Status != ready then + sleep 2000 + goto wait_dnode3_ready +endi + +sql select count(*) from $stb +print data00 $data00 +if $data00 != $totalRows then + return -1 +endi + +system_content ls ../../../sim/dnode2/data/vnode/vnode2/tsdb/data/ -l |grep "^-"|wc -l +print ---->dnode2 data files: $system_content , expect is 3 +#if $system_content != @3@ then +# return -1 +#endi +system_content ls ../../../sim/dnode3/data/vnode/vnode2/tsdb/data/ -l |grep "^-"|wc -l +print ---->dnode3 data files: $system_content , expect is 3 +#if $system_content != @3@ then +# return -1 +#endi \ No newline at end of file From 4e87a28e8d0c980f2ea1c5d00cf530145d88a955 Mon Sep 17 00:00:00 2001 From: Jeff Tao Date: Tue, 16 Jun 2020 12:03:36 +0000 Subject: [PATCH 06/10] fix deadlock --- src/rpc/src/rpcMain.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/rpc/src/rpcMain.c b/src/rpc/src/rpcMain.c index f812d95188..d455cd645f 100644 --- a/src/rpc/src/rpcMain.c +++ b/src/rpc/src/rpcMain.c @@ -1240,7 +1240,7 @@ static void rpcProcessRetryTimer(void *param, void *tmrId) { tTrace("%s, failed to send msg:%s to %s:%hu", pConn->info, taosMsg[pConn->outType], pConn->peerFqdn, pConn->peerPort); if (pConn->pContext) { pConn->pContext->code = TSDB_CODE_RPC_NETWORK_UNAVAIL; - rpcProcessConnError(pConn->pContext, NULL); + taosTmrStart(rpcProcessConnError, 0, pContext, pRpc->tmrCtrl); rpcReleaseConn(pConn); } } From 58922d86002a7d3772faba8229ef48e0951e693a Mon Sep 17 00:00:00 2001 From: Jeff Tao Date: Tue, 16 Jun 2020 12:05:25 +0000 Subject: [PATCH 07/10] compiling error --- src/rpc/src/rpcMain.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/rpc/src/rpcMain.c b/src/rpc/src/rpcMain.c index d455cd645f..2e2202a31b 100644 --- a/src/rpc/src/rpcMain.c +++ b/src/rpc/src/rpcMain.c @@ -1240,7 +1240,7 @@ static void rpcProcessRetryTimer(void *param, void *tmrId) { tTrace("%s, failed to send msg:%s to %s:%hu", pConn->info, taosMsg[pConn->outType], pConn->peerFqdn, pConn->peerPort); if (pConn->pContext) { pConn->pContext->code = TSDB_CODE_RPC_NETWORK_UNAVAIL; - taosTmrStart(rpcProcessConnError, 0, pContext, pRpc->tmrCtrl); + taosTmrStart(rpcProcessConnError, 0, pConn->pContext, pRpc->tmrCtrl); rpcReleaseConn(pConn); } } From 387f162ddf2f0e562858eafd37827e8cf8df602f Mon Sep 17 00:00:00 2001 From: Jeff Tao Date: Tue, 16 Jun 2020 12:36:58 +0000 Subject: [PATCH 08/10] add lock in all timer processing --- src/rpc/src/rpcMain.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/rpc/src/rpcMain.c b/src/rpc/src/rpcMain.c index 2e2202a31b..90f11bb3de 100644 --- a/src/rpc/src/rpcMain.c +++ b/src/rpc/src/rpcMain.c @@ -490,6 +490,7 @@ void rpcSendRecv(void *shandle, SRpcIpSet *pIpSet, const SRpcMsg *pMsg, SRpcMsg int rpcReportProgress(void *handle, char *pCont, int contLen) { SRpcConn *pConn = (SRpcConn *)handle; + rpcLockConn(pConn); if (pConn->user[0]) { // pReqMsg and reqMsgLen is re-used to store the context from app server pConn->pReqMsg = pCont; @@ -499,6 +500,8 @@ int rpcReportProgress(void *handle, char *pCont, int contLen) { tTrace("%s, rpc connection is already released", pConn->info); rpcFreeCont(pCont); + rpcUnlockConn(pConn); + return -1; } @@ -1254,13 +1257,17 @@ static void rpcProcessRetryTimer(void *param, void *tmrId) { static void rpcProcessIdleTimer(void *param, void *tmrId) { SRpcConn *pConn = (SRpcConn *)param; + rpcLockConn(pConn); + if (pConn->user[0]) { tTrace("%s, close the connection since no activity", pConn->info); if (pConn->inType) rpcReportBrokenLinkToServer(pConn); - rpcCloseConn(pConn); + rpcReleaseConn(pConn); } else { tTrace("%s, idle timer:%p not processed", pConn->info, tmrId); } + + rpcUnlockConn(pConn); } static void rpcProcessProgressTimer(void *param, void *tmrId) { From f3f0e39ab16f4bf6fb4675c8b62e28b6a8480a47 Mon Sep 17 00:00:00 2001 From: Jeff Tao Date: Tue, 16 Jun 2020 13:12:27 +0000 Subject: [PATCH 09/10] deadlock --- src/rpc/src/rpcMain.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/rpc/src/rpcMain.c b/src/rpc/src/rpcMain.c index 90f11bb3de..1558de37ea 100644 --- a/src/rpc/src/rpcMain.c +++ b/src/rpc/src/rpcMain.c @@ -489,20 +489,22 @@ void rpcSendRecv(void *shandle, SRpcIpSet *pIpSet, const SRpcMsg *pMsg, SRpcMsg // this API is used by server app to keep an APP context in case connection is broken int rpcReportProgress(void *handle, char *pCont, int contLen) { SRpcConn *pConn = (SRpcConn *)handle; + int code = 0; rpcLockConn(pConn); + if (pConn->user[0]) { // pReqMsg and reqMsgLen is re-used to store the context from app server pConn->pReqMsg = pCont; pConn->reqMsgLen = contLen; - return 0; - } + } else { + tTrace("%s, rpc connection is already released", pConn->info); + rpcFreeCont(pCont); + code = -1; + } - tTrace("%s, rpc connection is already released", pConn->info); - rpcFreeCont(pCont); rpcUnlockConn(pConn); - - return -1; + return code; } /* todo: cancel process may have race condition, pContext may have been released From a703510f1bd0598634dfa04b5f54fa533e6c4e03 Mon Sep 17 00:00:00 2001 From: Jeff Tao Date: Tue, 16 Jun 2020 13:40:18 +0000 Subject: [PATCH 10/10] tune up code --- src/rpc/src/rpcMain.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/src/rpc/src/rpcMain.c b/src/rpc/src/rpcMain.c index 1558de37ea..989021eb52 100644 --- a/src/rpc/src/rpcMain.c +++ b/src/rpc/src/rpcMain.c @@ -593,16 +593,11 @@ static void rpcReleaseConn(SRpcConn *pConn) { static void rpcCloseConn(void *thandle) { SRpcConn *pConn = (SRpcConn *)thandle; - if (pConn->user[0] == 0) return; rpcLockConn(pConn); - if (pConn->user[0] == 0) { - rpcUnlockConn(pConn); - return; - } - - rpcReleaseConn(pConn); + if (pConn->user[0]) + rpcReleaseConn(pConn); rpcUnlockConn(pConn); }