From a2150031461af90ba441db5f62a44e66cea28f35 Mon Sep 17 00:00:00 2001 From: shenglian zhou Date: Sat, 18 Jun 2022 15:20:30 +0800 Subject: [PATCH 01/40] feature: eliminate projection optimization --- source/libs/function/src/tudf.c | 5 ++ source/libs/planner/src/planOptimizer.c | 59 ++++++++++++++++++- source/libs/planner/test/planOptimizeTest.cpp | 6 ++ 3 files changed, 69 insertions(+), 1 deletion(-) diff --git a/source/libs/function/src/tudf.c b/source/libs/function/src/tudf.c index 472d672607..f6ae027e48 100644 --- a/source/libs/function/src/tudf.c +++ b/source/libs/function/src/tudf.c @@ -972,6 +972,11 @@ void releaseUdfFuncHandle(char* udfName) { } int32_t cleanUpUdfs() { + int8_t initialized = atomic_load_8(&gUdfdProxy.initialized); + if (!initialized) { + return TSDB_CODE_SUCCESS; + } + uv_mutex_lock(&gUdfdProxy.udfStubsMutex); int32_t i = 0; SArray* udfStubs = taosArrayInit(16, sizeof(SUdfcFuncStub)); diff --git a/source/libs/planner/src/planOptimizer.c b/source/libs/planner/src/planOptimizer.c index a7c25162b7..0f2abc4568 100644 --- a/source/libs/planner/src/planOptimizer.c +++ b/source/libs/planner/src/planOptimizer.c @@ -1082,13 +1082,70 @@ static int32_t partTagsOptimize(SOptimizeContext* pCxt, SLogicSubplan* pLogicSub return code; } +static bool eliminateProjOptMayBeOptimized(SLogicNode* pNode) { + if (QUERY_NODE_LOGIC_PLAN_PROJECT != nodeType(pNode) || 1 != LIST_LENGTH(pNode->pChildren)) { + return false; + } + + SProjectLogicNode* pProjectNode = (SProjectLogicNode*)pNode; + SNode* pProjection; + FOREACH(pProjection, pProjectNode->pProjections) { + SExprNode* pExprNode = (SExprNode*)pProjection; + if (QUERY_NODE_COLUMN != nodeType(pExprNode)) { + return false; + } + } + + return true; +} + +static int32_t eliminateProjOptimizeImpl(SOptimizeContext* pCxt, SLogicSubplan* pLogicSubplan, SProjectLogicNode* pProjectNode) { + SLogicNode* pChild = (SLogicNode*)nodesListGetNode(pProjectNode->node.pChildren, 0); + SNodeList* pNewChildTargets = nodesMakeList(); + + SNode* pProjection = NULL; + FOREACH(pProjection, pProjectNode->pProjections) { + SColumnNode* projColumn = (SColumnNode*)pProjection; + SNode* pChildTarget = NULL; + FOREACH(pChildTarget, pChild->pTargets) { + SExprNode* childExpr = (SExprNode*)pChildTarget; + char* projColumnName = projColumn->colName; + if (QUERY_NODE_COLUMN == nodeType(childExpr) && strcmp(projColumnName, ((SColumnNode*)childExpr)->colName) == 0 || + strcmp(projColumnName, childExpr->aliasName) == 0) { + nodesListAppend(pNewChildTargets, pChildTarget); + } + } + } + + TSWAP(pChild->pTargets, pNewChildTargets); + int32_t code = replaceLogicNode(pLogicSubplan, (SLogicNode*)pProjectNode, pChild); + if (TSDB_CODE_SUCCESS == code) { + NODES_CLEAR_LIST(pProjectNode->node.pChildren); + nodesDestroyNode((SNode*)pProjectNode); + } + NODES_CLEAR_LIST(pNewChildTargets); + return code; +} + +static int32_t eliminateProjOptimize(SOptimizeContext* pCxt, SLogicSubplan* pLogicSubplan) { + SProjectLogicNode* pProjectNode = + (SProjectLogicNode*)optFindPossibleNode(pLogicSubplan->pNode, eliminateProjOptMayBeOptimized); + + if (NULL == pProjectNode) { + return TSDB_CODE_SUCCESS; + } + + return eliminateProjOptimizeImpl(pCxt, pLogicSubplan, pProjectNode); +} + // clang-format off static const SOptimizeRule optimizeRuleSet[] = { {.pName = "OptimizeScanData", .optimizeFunc = osdOptimize}, {.pName = "ConditionPushDown", .optimizeFunc = cpdOptimize}, {.pName = "OrderByPrimaryKey", .optimizeFunc = opkOptimize}, {.pName = "SmaIndex", .optimizeFunc = smaOptimize}, - {.pName = "PartitionByTags", .optimizeFunc = partTagsOptimize} + {.pName = "PartitionByTags", .optimizeFunc = partTagsOptimize}, + {.pName = "EliminateProject", .optimizeFunc = eliminateProjOptimize} }; // clang-format on diff --git a/source/libs/planner/test/planOptimizeTest.cpp b/source/libs/planner/test/planOptimizeTest.cpp index 84ccea668d..b9e2be4e16 100644 --- a/source/libs/planner/test/planOptimizeTest.cpp +++ b/source/libs/planner/test/planOptimizeTest.cpp @@ -52,3 +52,9 @@ TEST_F(PlanOptimizeTest, orderByPrimaryKey) { run("SELECT COUNT(*) FROM t1 INTERVAL(10S) ORDER BY _WSTARTTS DESC"); } + +TEST_F(PlanOptimizeTest, eliminateProjection) { + useDb("root", "test"); + + run("SELECT c1, sum(c3) FROM t1 GROUP BY c1"); +} From 683cf876760c78a866c21ed0653f60ff9e7967e6 Mon Sep 17 00:00:00 2001 From: shenglian zhou Date: Sat, 18 Jun 2022 16:24:29 +0800 Subject: [PATCH 02/40] fix: set parent of new logic node to null --- source/libs/planner/src/planUtil.c | 1 + source/libs/planner/test/planOptimizeTest.cpp | 2 ++ 2 files changed, 3 insertions(+) diff --git a/source/libs/planner/src/planUtil.c b/source/libs/planner/src/planUtil.c index 7f650c7c9a..77e4e05530 100644 --- a/source/libs/planner/src/planUtil.c +++ b/source/libs/planner/src/planUtil.c @@ -107,6 +107,7 @@ int32_t createColumnByRewriteExpr(SNode* pExpr, SNodeList** pList) { int32_t replaceLogicNode(SLogicSubplan* pSubplan, SLogicNode* pOld, SLogicNode* pNew) { if (NULL == pOld->pParent) { pSubplan->pNode = (SLogicNode*)pNew; + pNew->pParent = NULL; return TSDB_CODE_SUCCESS; } diff --git a/source/libs/planner/test/planOptimizeTest.cpp b/source/libs/planner/test/planOptimizeTest.cpp index b9e2be4e16..10bdd5b21d 100644 --- a/source/libs/planner/test/planOptimizeTest.cpp +++ b/source/libs/planner/test/planOptimizeTest.cpp @@ -57,4 +57,6 @@ TEST_F(PlanOptimizeTest, eliminateProjection) { useDb("root", "test"); run("SELECT c1, sum(c3) FROM t1 GROUP BY c1"); + run("SELECT c1 FROM t1"); + run("SELECT * FROM st1"); } From 53a07f0c3700cd64bdf6b6ba3d920ee76c604d38 Mon Sep 17 00:00:00 2001 From: shenglian zhou Date: Sat, 18 Jun 2022 17:58:15 +0800 Subject: [PATCH 03/40] feat: add test case sql --- source/libs/planner/test/planOptimizeTest.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/source/libs/planner/test/planOptimizeTest.cpp b/source/libs/planner/test/planOptimizeTest.cpp index 10bdd5b21d..07b3adbc1f 100644 --- a/source/libs/planner/test/planOptimizeTest.cpp +++ b/source/libs/planner/test/planOptimizeTest.cpp @@ -59,4 +59,5 @@ TEST_F(PlanOptimizeTest, eliminateProjection) { run("SELECT c1, sum(c3) FROM t1 GROUP BY c1"); run("SELECT c1 FROM t1"); run("SELECT * FROM st1"); + run("SELECT c1 FROM st1s3"); } From 8c402d2aa3b9aa1b586b88309abccd4f60f223c3 Mon Sep 17 00:00:00 2001 From: shenglian zhou Date: Sun, 19 Jun 2022 21:39:35 +0800 Subject: [PATCH 04/40] fix: column target/output desc match error --- source/libs/executor/src/dataDispatcher.c | 10 ++++++++-- source/libs/executor/src/executil.c | 17 +++++++++++++---- 2 files changed, 21 insertions(+), 6 deletions(-) diff --git a/source/libs/executor/src/dataDispatcher.c b/source/libs/executor/src/dataDispatcher.c index b7c7102143..808dd78ac3 100644 --- a/source/libs/executor/src/dataDispatcher.c +++ b/source/libs/executor/src/dataDispatcher.c @@ -75,8 +75,14 @@ static bool needCompress(const SSDataBlock* pData, int32_t numOfCols) { // The length of bitmap is decided by number of rows of this data block, and the length of each column data is // recorded in the first segment, next to the struct header static void toDataCacheEntry(SDataDispatchHandle* pHandle, const SInputData* pInput, SDataDispatchBuf* pBuf) { - int32_t numOfCols = LIST_LENGTH(pHandle->pSchema->pSlots); - + int32_t numOfCols = 0; + SNode* pNode; + FOREACH(pNode, pHandle->pSchema->pSlots) { + SSlotDescNode* pSlotDesc = (SSlotDescNode*)pNode; + if (pSlotDesc->output) { + ++numOfCols; + } + } SDataCacheEntry* pEntry = (SDataCacheEntry*)pBuf->pData; pEntry->compressed = (int8_t)needCompress(pInput->pData, numOfCols); pEntry->numOfRows = pInput->pData->info.rows; diff --git a/source/libs/executor/src/executil.c b/source/libs/executor/src/executil.c index 99139be409..b319d77c17 100644 --- a/source/libs/executor/src/executil.c +++ b/source/libs/executor/src/executil.c @@ -193,9 +193,9 @@ SSDataBlock* createResDataBlock(SDataBlockDescNode* pNode) { for (int32_t i = 0; i < numOfCols; ++i) { SColumnInfoData idata = {{0}}; SSlotDescNode* pDescNode = (SSlotDescNode*)nodesListGetNode(pNode->pSlots, i); - // if (!pDescNode->output) { // todo disable it temporarily - // continue; - // } + if (!pDescNode->output) { // todo disable it temporarily + continue; + } idata.info.type = pDescNode->dataType.type; idata.info.bytes = pDescNode->dataType.bytes; @@ -319,7 +319,16 @@ SArray* extractColMatchInfo(SNodeList* pNodeList, SDataBlockDescNode* pOutputNod continue; } - SColMatchInfo* info = taosArrayGet(pList, pNode->slotId); + bool foundSource = false; + SColMatchInfo* info = NULL; + for (int32_t j = 0; j < taosArrayGetSize(pList); ++j) { + info = taosArrayGet(pList, j); + if (info->targetSlotId == pNode->slotId) { + foundSource = true; + break; + } + } + ASSERT(foundSource); if (pNode->output) { (*numOfOutputCols) += 1; } else { From 413d26ee30b8093c34d3bd93e78acf8c91f71471 Mon Sep 17 00:00:00 2001 From: shenglian zhou Date: Sun, 19 Jun 2022 22:00:57 +0800 Subject: [PATCH 05/40] fix: add column to datablock even no output --- source/libs/executor/src/executil.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/source/libs/executor/src/executil.c b/source/libs/executor/src/executil.c index b319d77c17..18fc1ff477 100644 --- a/source/libs/executor/src/executil.c +++ b/source/libs/executor/src/executil.c @@ -193,9 +193,9 @@ SSDataBlock* createResDataBlock(SDataBlockDescNode* pNode) { for (int32_t i = 0; i < numOfCols; ++i) { SColumnInfoData idata = {{0}}; SSlotDescNode* pDescNode = (SSlotDescNode*)nodesListGetNode(pNode->pSlots, i); - if (!pDescNode->output) { // todo disable it temporarily - continue; - } +// if (!pDescNode->output) { // todo disable it temporarily +// continue; +// } idata.info.type = pDescNode->dataType.type; idata.info.bytes = pDescNode->dataType.bytes; @@ -594,10 +594,10 @@ void relocateColumnData(SSDataBlock* pBlock, const SArray* pColMatchInfo, SArray while (i < numOfSrcCols && j < taosArrayGetSize(pColMatchInfo)) { SColumnInfoData* p = taosArrayGet(pCols, i); SColMatchInfo* pmInfo = taosArrayGet(pColMatchInfo, j); - if (!pmInfo->output) { - j++; - continue; - } +// if (!pmInfo->output) { +// j++; +// continue; +// } if (p->info.colId == pmInfo->colId) { SColumnInfoData* pDst = taosArrayGet(pBlock->pDataBlock, pmInfo->targetSlotId); From 218d455dccb9b03e7319bc1161ea42dd9302dbcc Mon Sep 17 00:00:00 2001 From: shenglian zhou Date: Sun, 19 Jun 2022 22:32:26 +0800 Subject: [PATCH 06/40] fix: elimate projection error --- source/libs/executor/src/dataDispatcher.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/libs/executor/src/dataDispatcher.c b/source/libs/executor/src/dataDispatcher.c index 808dd78ac3..802f9ea5b5 100644 --- a/source/libs/executor/src/dataDispatcher.c +++ b/source/libs/executor/src/dataDispatcher.c @@ -86,7 +86,7 @@ static void toDataCacheEntry(SDataDispatchHandle* pHandle, const SInputData* pIn SDataCacheEntry* pEntry = (SDataCacheEntry*)pBuf->pData; pEntry->compressed = (int8_t)needCompress(pInput->pData, numOfCols); pEntry->numOfRows = pInput->pData->info.rows; - pEntry->numOfCols = pInput->pData->info.numOfCols; + pEntry->numOfCols = numOfCols; pEntry->dataLen = 0; pBuf->useSize = sizeof(SDataCacheEntry); From 8c955ef017276cb46cd419b602df02f83d5eef14 Mon Sep 17 00:00:00 2001 From: shenglian zhou Date: Mon, 20 Jun 2022 07:52:29 +0800 Subject: [PATCH 07/40] fix: limit/slimit would prevent optimization projection elimination --- source/libs/executor/src/executil.c | 3 ++- source/libs/planner/src/planOptimizer.c | 4 ++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/source/libs/executor/src/executil.c b/source/libs/executor/src/executil.c index 18fc1ff477..77a18028b4 100644 --- a/source/libs/executor/src/executil.c +++ b/source/libs/executor/src/executil.c @@ -328,10 +328,11 @@ SArray* extractColMatchInfo(SNodeList* pNodeList, SDataBlockDescNode* pOutputNod break; } } - ASSERT(foundSource); + if (pNode->output) { (*numOfOutputCols) += 1; } else { + ASSERT(foundSource); info->output = false; } } diff --git a/source/libs/planner/src/planOptimizer.c b/source/libs/planner/src/planOptimizer.c index 0f2abc4568..b8c6df4b54 100644 --- a/source/libs/planner/src/planOptimizer.c +++ b/source/libs/planner/src/planOptimizer.c @@ -1088,6 +1088,10 @@ static bool eliminateProjOptMayBeOptimized(SLogicNode* pNode) { } SProjectLogicNode* pProjectNode = (SProjectLogicNode*)pNode; + if (-1 != pProjectNode->limit || -1 != pProjectNode->slimit || -1 != pProjectNode->offset || -1 != pProjectNode->soffset) { + return false; + } + SNode* pProjection; FOREACH(pProjection, pProjectNode->pProjections) { SExprNode* pExprNode = (SExprNode*)pProjection; From 9bb21ebbc2e89afef67cf87c6cd7fc05113e1211 Mon Sep 17 00:00:00 2001 From: shenglian zhou Date: Mon, 20 Jun 2022 08:45:57 +0800 Subject: [PATCH 08/40] fix: double free error during plan optimize --- source/libs/planner/src/planOptimizer.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/source/libs/planner/src/planOptimizer.c b/source/libs/planner/src/planOptimizer.c index b8c6df4b54..be6186f891 100644 --- a/source/libs/planner/src/planOptimizer.c +++ b/source/libs/planner/src/planOptimizer.c @@ -1116,18 +1116,18 @@ static int32_t eliminateProjOptimizeImpl(SOptimizeContext* pCxt, SLogicSubplan* char* projColumnName = projColumn->colName; if (QUERY_NODE_COLUMN == nodeType(childExpr) && strcmp(projColumnName, ((SColumnNode*)childExpr)->colName) == 0 || strcmp(projColumnName, childExpr->aliasName) == 0) { - nodesListAppend(pNewChildTargets, pChildTarget); + nodesListAppend(pNewChildTargets, nodesCloneNode(pChildTarget)); } } } - - TSWAP(pChild->pTargets, pNewChildTargets); + nodesDestroyList(pChild->pTargets); + pChild->pTargets = pNewChildTargets; + int32_t code = replaceLogicNode(pLogicSubplan, (SLogicNode*)pProjectNode, pChild); if (TSDB_CODE_SUCCESS == code) { NODES_CLEAR_LIST(pProjectNode->node.pChildren); nodesDestroyNode((SNode*)pProjectNode); } - NODES_CLEAR_LIST(pNewChildTargets); return code; } From 003be444e5cdb37acca5fb286b86f07f15754ef7 Mon Sep 17 00:00:00 2001 From: Shengliang Guan Date: Mon, 20 Jun 2022 11:43:36 +0800 Subject: [PATCH 09/40] test: adjust log output --- .../6-cluster/5dnode3mnodeStopInsert.py | 54 +++++++++---------- 1 file changed, 27 insertions(+), 27 deletions(-) diff --git a/tests/system-test/6-cluster/5dnode3mnodeStopInsert.py b/tests/system-test/6-cluster/5dnode3mnodeStopInsert.py index ce3f2c4093..a53930faac 100644 --- a/tests/system-test/6-cluster/5dnode3mnodeStopInsert.py +++ b/tests/system-test/6-cluster/5dnode3mnodeStopInsert.py @@ -77,7 +77,7 @@ class TDTestCase: for couti in range(countstart,countstop): tdLog.debug("drop database if exists db%d" %couti) tdSql.execute("drop database if exists db%d" %couti) - print("create database if not exists db%d replica 1 duration 300" %couti) + tdLog.debug("create database if not exists db%d replica 1 duration 300" %couti) tdSql.execute("create database if not exists db%d replica 1 duration 300" %couti) tdSql.execute("use db%d" %couti) tdSql.execute( @@ -126,12 +126,12 @@ class TDTestCase: # create cluster for dnode in self.TDDnodes.dnodes[1:]: - # print(dnode.cfgDict) + # tdLog.debug(dnode.cfgDict) dnode_id = dnode.cfgDict["fqdn"] + ":" +dnode.cfgDict["serverPort"] dnode_first_host = dnode.cfgDict["firstEp"].split(":")[0] dnode_first_port = dnode.cfgDict["firstEp"].split(":")[-1] cmd = f" taos -h {dnode_first_host} -P {dnode_first_port} -s ' create dnode \"{dnode_id} \" ' ;" - print(cmd) + tdLog.debug(cmd) os.system(cmd) time.sleep(2) @@ -144,22 +144,22 @@ class TDTestCase: statusReadyBumber=0 tdSql.query("show dnodes;") if tdSql.checkRows(dnodenumber) : - print("dnode is %d nodes"%dnodenumber) + tdLog.debug("dnode is %d nodes"%dnodenumber) for i in range(dnodenumber): if tdSql.queryResult[i][4] !='ready' : status=tdSql.queryResult[i][4] - print("dnode:%d status is %s "%(i,status)) + tdLog.debug("dnode:%d status is %s "%(i,status)) break else: statusReadyBumber+=1 - print(statusReadyBumber) + tdLog.debug(statusReadyBumber) if statusReadyBumber == dnodenumber : - print("all of %d mnodes is ready in 10s "%dnodenumber) + tdLog.debug("all of %d mnodes is ready in 10s "%dnodenumber) return True break count+=1 else: - print("%d mnodes is not ready in 10s "%dnodenumber) + tdLog.debug("%d mnodes is not ready in 10s "%dnodenumber) return False @@ -169,25 +169,25 @@ class TDTestCase: time.sleep(1) tdSql.query("show mnodes;") if tdSql.checkRows(3) : - print("mnode is three nodes") + tdLog.debug("mnode is three nodes") if tdSql.queryResult[0][2]=='leader' : if tdSql.queryResult[1][2]=='follower': if tdSql.queryResult[2][2]=='follower': - print("three mnodes is ready in 10s") + tdLog.debug("three mnodes is ready in 10s") break elif tdSql.queryResult[0][2]=='follower' : if tdSql.queryResult[1][2]=='leader': if tdSql.queryResult[2][2]=='follower': - print("three mnodes is ready in 10s") + tdLog.debug("three mnodes is ready in 10s") break elif tdSql.queryResult[0][2]=='follower' : if tdSql.queryResult[1][2]=='follower': if tdSql.queryResult[2][2]=='leader': - print("three mnodes is ready in 10s") + tdLog.debug("three mnodes is ready in 10s") break count+=1 else: - print("three mnodes is not ready in 10s ") + tdLog.debug("three mnodes is not ready in 10s ") return -1 tdSql.query("show mnodes;") @@ -205,19 +205,19 @@ class TDTestCase: time.sleep(1) tdSql.query("show mnodes;") if tdSql.checkRows(3) : - print("mnode is three nodes") + tdLog.debug("mnode is three nodes") if tdSql.queryResult[0][2]=='offline' : if tdSql.queryResult[1][2]=='leader': if tdSql.queryResult[2][2]=='follower': - print("stop mnodes on dnode 2 successfully in 10s") + tdLog.debug("stop mnodes on dnode 2 successfully in 10s") break elif tdSql.queryResult[1][2]=='follower': if tdSql.queryResult[2][2]=='leader': - print("stop mnodes on dnode 2 successfully in 10s") + tdLog.debug("stop mnodes on dnode 2 successfully in 10s") break count+=1 else: - print("stop mnodes on dnode 2 failed in 10s ") + tdLog.debug("stop mnodes on dnode 2 failed in 10s ") return -1 tdSql.error("drop mnode on dnode 1;") @@ -237,15 +237,15 @@ class TDTestCase: time.sleep(1) tdSql.query("show mnodes;") if tdSql.checkRows(3) : - print("mnode is three nodes") + tdLog.debug("mnode is three nodes") if tdSql.queryResult[0][2]=='leader' : if tdSql.queryResult[1][2]=='offline': if tdSql.queryResult[2][2]=='follower': - print("stop mnodes on dnode 2 successfully in 10s") + tdLog.debug("stop mnodes on dnode 2 successfully in 10s") break count+=1 else: - print("stop mnodes on dnode 2 failed in 10s ") + tdLog.debug("stop mnodes on dnode 2 failed in 10s ") return -1 tdSql.error("drop mnode on dnode 2;") @@ -267,15 +267,15 @@ class TDTestCase: time.sleep(1) tdSql.query("show mnodes;") if tdSql.checkRows(3) : - print("mnode is three nodes") + tdLog.debug("mnode is three nodes") if tdSql.queryResult[0][2]=='leader' : if tdSql.queryResult[2][2]=='offline': if tdSql.queryResult[1][2]=='follower': - print("stop mnodes on dnode 3 successfully in 10s") + tdLog.debug("stop mnodes on dnode 3 successfully in 10s") break count+=1 else: - print("stop mnodes on dnode 3 failed in 10s") + tdLog.debug("stop mnodes on dnode 3 failed in 10s") return -1 tdSql.error("drop mnode on dnode 3;") tdSql.query("show mnodes;") @@ -311,7 +311,7 @@ class TDTestCase: tdSql.error("create mnode on dnode 2") tdSql.query("show dnodes;") - print(tdSql.queryResult) + tdLog.debug(tdSql.queryResult) tdLog.debug("stop all of mnode ") stopcount =0 @@ -325,10 +325,10 @@ class TDTestCase: self.TDDnodes.starttaosd(i+1) if self.checkdnodes(5): - print("123") + tdLog.debug("123") threads.join() else: - print("456") + tdLog.debug("456") self.stop_thread(threads) assert 1 == 2 ,"some dnode started failed" return False @@ -345,7 +345,7 @@ class TDTestCase: def run(self): - # print(self.master_dnode.cfgDict) + # tdLog.debug(self.master_dnode.cfgDict) self.buildcluster(5) self.five_dnode_three_mnode(5) From 02cb6dc41229a0eb2a765ee3f3e6705fb5f74153 Mon Sep 17 00:00:00 2001 From: Ganlin Zhao Date: Mon, 20 Jun 2022 13:22:00 +0800 Subject: [PATCH 10/40] refactor(query): top/bot function refactor --- source/libs/function/src/builtins.c | 120 ++---------------------- source/libs/function/src/builtinsimpl.c | 38 +------- 2 files changed, 13 insertions(+), 145 deletions(-) diff --git a/source/libs/function/src/builtins.c b/source/libs/function/src/builtins.c index 522ee09b3c..ee3eaadf2c 100644 --- a/source/libs/function/src/builtins.c +++ b/source/libs/function/src/builtins.c @@ -419,7 +419,7 @@ static int32_t translateTopBot(SFunctionNode* pFunc, char* pErrBuf, int32_t len) return TSDB_CODE_SUCCESS; } -int32_t topCreateMergePara(SNodeList* pRawParameters, SNode* pPartialRes, SNodeList** pParameters) { +int32_t topBotCreateMergePara(SNodeList* pRawParameters, SNode* pPartialRes, SNodeList** pParameters) { int32_t code = nodesListMakeAppend(pParameters, pPartialRes); if (TSDB_CODE_SUCCESS == code) { code = nodesListStrictAppend(*pParameters, nodesCloneNode(nodesListGetNode(pRawParameters, 1))); @@ -427,65 +427,6 @@ int32_t topCreateMergePara(SNodeList* pRawParameters, SNode* pPartialRes, SNodeL return TSDB_CODE_SUCCESS; } -static int32_t translateTopBotImpl(SFunctionNode* pFunc, char* pErrBuf, int32_t len, bool isPartial) { - int32_t numOfParams = LIST_LENGTH(pFunc->pParameterList); - - if (isPartial) { - if (2 != numOfParams) { - return invaildFuncParaNumErrMsg(pErrBuf, len, pFunc->functionName); - } - - uint8_t para1Type = ((SExprNode*)nodesListGetNode(pFunc->pParameterList, 0))->resType.type; - uint8_t para2Type = ((SExprNode*)nodesListGetNode(pFunc->pParameterList, 1))->resType.type; - if (!IS_NUMERIC_TYPE(para1Type) || !IS_INTEGER_TYPE(para2Type)) { - return invaildFuncParaTypeErrMsg(pErrBuf, len, pFunc->functionName); - } - - // param1 - SNode* pParamNode1 = nodesListGetNode(pFunc->pParameterList, 1); - if (nodeType(pParamNode1) != QUERY_NODE_VALUE) { - return invaildFuncParaTypeErrMsg(pErrBuf, len, pFunc->functionName); - } - - SValueNode* pValue = (SValueNode*)pParamNode1; - if (pValue->node.resType.type != TSDB_DATA_TYPE_BIGINT) { - return invaildFuncParaTypeErrMsg(pErrBuf, len, pFunc->functionName); - } - - if (pValue->datum.i < 1 || pValue->datum.i > 100) { - return invaildFuncParaValueErrMsg(pErrBuf, len, pFunc->functionName); - } - - pValue->notReserved = true; - - // set result type - pFunc->node.resType = - (SDataType){.bytes = getTopBotInfoSize(pValue->datum.i) + VARSTR_HEADER_SIZE, .type = TSDB_DATA_TYPE_BINARY}; - } else { - if (1 != numOfParams) { - return invaildFuncParaNumErrMsg(pErrBuf, len, pFunc->functionName); - } - - uint8_t para1Type = ((SExprNode*)nodesListGetNode(pFunc->pParameterList, 0))->resType.type; - if (TSDB_DATA_TYPE_BINARY != para1Type) { - return invaildFuncParaTypeErrMsg(pErrBuf, len, pFunc->functionName); - } - - // Do nothing. We can only access output of partial functions as input, - // so original input type cannot be obtained, resType will be set same - // as original function input type after merge function created. - } - return TSDB_CODE_SUCCESS; -} - -static int32_t translateTopBotPartial(SFunctionNode* pFunc, char* pErrBuf, int32_t len) { - return translateTopBotImpl(pFunc, pErrBuf, len, true); -} - -static int32_t translateTopBotMerge(SFunctionNode* pFunc, char* pErrBuf, int32_t len) { - return translateTopBotImpl(pFunc, pErrBuf, len, false); -} - static int32_t translateSpread(SFunctionNode* pFunc, char* pErrBuf, int32_t len) { if (1 != LIST_LENGTH(pFunc->pParameterList)) { return invaildFuncParaNumErrMsg(pErrBuf, len, pFunc->functionName); @@ -1735,31 +1676,9 @@ const SBuiltinFuncDefinition funcMgtBuiltins[] = { .processFunc = topFunction, .finalizeFunc = topBotFinalize, .combineFunc = topCombine, - .pPartialFunc = "_top_partial", - .pMergeFunc = "_top_merge", - // .createMergeParaFuc = topCreateMergePara - }, - { - .name = "_top_partial", - .type = FUNCTION_TYPE_TOP_PARTIAL, - .classification = FUNC_MGT_AGG_FUNC | FUNC_MGT_SELECT_FUNC | FUNC_MGT_INDEFINITE_ROWS_FUNC, - .translateFunc = translateTopBotPartial, - .getEnvFunc = getTopBotFuncEnv, - .initFunc = topBotFunctionSetup, - .processFunc = topFunction, - .finalizeFunc = topBotPartialFinalize, - .combineFunc = topCombine, - }, - { - .name = "_top_merge", - .type = FUNCTION_TYPE_TOP_MERGE, - .classification = FUNC_MGT_AGG_FUNC | FUNC_MGT_SELECT_FUNC | FUNC_MGT_INDEFINITE_ROWS_FUNC, - .translateFunc = translateTopBotMerge, - .getEnvFunc = getTopBotMergeFuncEnv, - .initFunc = functionSetup, - .processFunc = topFunctionMerge, - .finalizeFunc = topBotMergeFinalize, - .combineFunc = topCombine, + .pPartialFunc = "top", + .pMergeFunc = "top", + .createMergeParaFuc = topBotCreateMergePara }, { .name = "bottom", @@ -1771,30 +1690,9 @@ const SBuiltinFuncDefinition funcMgtBuiltins[] = { .processFunc = bottomFunction, .finalizeFunc = topBotFinalize, .combineFunc = bottomCombine, - .pPartialFunc = "_bottom_partial", - .pMergeFunc = "_bottom_merge" - }, - { - .name = "_bottom_partial", - .type = FUNCTION_TYPE_BOTTOM_PARTIAL, - .classification = FUNC_MGT_AGG_FUNC | FUNC_MGT_SELECT_FUNC | FUNC_MGT_INDEFINITE_ROWS_FUNC, - .translateFunc = translateTopBotPartial, - .getEnvFunc = getTopBotFuncEnv, - .initFunc = topBotFunctionSetup, - .processFunc = bottomFunction, - .finalizeFunc = topBotPartialFinalize, - .combineFunc = bottomCombine, - }, - { - .name = "_bottom_merge", - .type = FUNCTION_TYPE_BOTTOM_MERGE, - .classification = FUNC_MGT_AGG_FUNC | FUNC_MGT_SELECT_FUNC | FUNC_MGT_INDEFINITE_ROWS_FUNC, - .translateFunc = translateTopBotMerge, - .getEnvFunc = getTopBotMergeFuncEnv, - .initFunc = functionSetup, - .processFunc = bottomFunctionMerge, - .finalizeFunc = topBotMergeFinalize, - .combineFunc = bottomCombine, + .pPartialFunc = "bottom", + .pMergeFunc = "bottom", + .createMergeParaFuc = topBotCreateMergePara }, { .name = "spread", @@ -2524,7 +2422,9 @@ const SBuiltinFuncDefinition funcMgtBuiltins[] = { .getEnvFunc = getSelectivityFuncEnv, // todo remove this function later. .initFunc = functionSetup, .processFunc = NULL, - .finalizeFunc = NULL + .finalizeFunc = NULL, + .pPartialFunc = "_select_value", + .pMergeFunc = "_select_value" }, { .name = "_block_dist", diff --git a/source/libs/function/src/builtinsimpl.c b/source/libs/function/src/builtinsimpl.c index 583e8bd300..6e25655d3f 100644 --- a/source/libs/function/src/builtinsimpl.c +++ b/source/libs/function/src/builtinsimpl.c @@ -2870,12 +2870,6 @@ bool getTopBotFuncEnv(SFunctionNode* pFunc, SFuncExecEnv* pEnv) { return true; } -bool getTopBotMergeFuncEnv(SFunctionNode* pFunc, SFuncExecEnv* pEnv) { - // intermediate result is binary and length contains VAR header size - pEnv->calcMemSize = pFunc->node.resType.bytes; - return true; -} - bool topBotFunctionSetup(SqlFunctionCtx* pCtx, SResultRowEntryInfo* pResInfo) { if (!functionSetup(pCtx, pResInfo)) { return false; @@ -3142,7 +3136,7 @@ void copyTupleData(SqlFunctionCtx* pCtx, int32_t rowIndex, const SSDataBlock* pS releaseBufPage(pCtx->pBuf, pPage); } -int32_t topBotFinalizeImpl(SqlFunctionCtx* pCtx, SSDataBlock* pBlock, bool isMerge) { +int32_t topBotFinalize(SqlFunctionCtx* pCtx, SSDataBlock* pBlock) { SResultRowEntryInfo* pEntryInfo = GET_RES_INFO(pCtx); STopBotRes* pRes = GET_ROWCELL_INTERBUF(pEntryInfo); @@ -3162,39 +3156,13 @@ int32_t topBotFinalizeImpl(SqlFunctionCtx* pCtx, SSDataBlock* pBlock, bool isMer colDataAppend(pCol, currentRow, (const char*)&pItem->v.i, false); } - if (!isMerge) { - setSelectivityValue(pCtx, pBlock, &pRes->pItems[i].tuplePos, currentRow); - } + setSelectivityValue(pCtx, pBlock, &pRes->pItems[i].tuplePos, currentRow); currentRow += 1; } return pEntryInfo->numOfRes; } -int32_t topBotFinalize(SqlFunctionCtx* pCtx, SSDataBlock* pBlock) { return topBotFinalizeImpl(pCtx, pBlock, false); } - -int32_t topBotMergeFinalize(SqlFunctionCtx* pCtx, SSDataBlock* pBlock) { - return topBotFinalizeImpl(pCtx, pBlock, true); -} - -int32_t topBotPartialFinalize(SqlFunctionCtx* pCtx, SSDataBlock* pBlock) { - SResultRowEntryInfo* pEntryInfo = GET_RES_INFO(pCtx); - STopBotRes* pRes = GET_ROWCELL_INTERBUF(GET_RES_INFO(pCtx)); - int32_t resultBytes = getTopBotInfoSize(pRes->maxSize); - char* res = taosMemoryCalloc(resultBytes + VARSTR_HEADER_SIZE, sizeof(char)); - - memcpy(varDataVal(res), pRes, resultBytes); - varDataSetLen(res, resultBytes); - - int32_t slotId = pCtx->pExpr->base.resSchema.slotId; - SColumnInfoData* pCol = taosArrayGet(pBlock->pDataBlock, slotId); - - colDataAppend(pCol, pBlock->info.rows, res, false); - - taosMemoryFree(res); - return 1; -} - void addResult(SqlFunctionCtx* pCtx, STopBotResItem* pSourceItem, int16_t type, bool isTopQuery) { SResultRowEntryInfo* pEntryInfo = GET_RES_INFO(pCtx); STopBotRes* pRes = getTopBotOutputInfo(pCtx); @@ -5362,4 +5330,4 @@ int32_t interpFunction(SqlFunctionCtx* pCtx) { #endif return TSDB_CODE_SUCCESS; -} \ No newline at end of file +} From 84dde937a1da9abf1d35821815e42149e989b921 Mon Sep 17 00:00:00 2001 From: jiacy-jcy Date: Mon, 20 Jun 2022 13:32:29 +0800 Subject: [PATCH 11/40] add test case --- .../1-insert/create_table_comment.py | 113 --------------- tests/system-test/1-insert/table_comment.py | 135 ++++++++++++++++++ 2 files changed, 135 insertions(+), 113 deletions(-) delete mode 100644 tests/system-test/1-insert/create_table_comment.py create mode 100644 tests/system-test/1-insert/table_comment.py diff --git a/tests/system-test/1-insert/create_table_comment.py b/tests/system-test/1-insert/create_table_comment.py deleted file mode 100644 index 92ea083c5a..0000000000 --- a/tests/system-test/1-insert/create_table_comment.py +++ /dev/null @@ -1,113 +0,0 @@ -################################################################### -# Copyright (c) 2016 by TAOS Technologies, Inc. -# All rights reserved. -# -# This file is proprietary and confidential to TAOS Technologies. -# No part of this file may be reproduced, stored, transmitted, -# disclosed or used in any form or by any means other than as -# expressly provided by the written permission from Jianhui Tao -# -################################################################### - -# -*- coding: utf-8 -*- - -import random -import string -from util.log import * -from util.cases import * -from util.sql import * - -class TDTestCase: - def init(self, conn, logSql): - tdLog.debug("start to execute %s" % __file__) - tdSql.init(conn.cursor()) - - def get_long_name(self, length, mode="mixed"): - """ - generate long name - mode could be numbers/letters/letters_mixed/mixed - """ - if mode == "numbers": - population = string.digits - elif mode == "letters": - population = string.ascii_letters.lower() - elif mode == "letters_mixed": - population = string.ascii_letters.upper() + string.ascii_letters.lower() - else: - population = string.ascii_letters.lower() + string.digits - return "".join(random.choices(population, k=length)) - - def __create_tb(self,dbname,stbname,tbname,comment): - tdSql.execute(f'create database if not exists {dbname}') - tdSql.execute(f'use {dbname}') - tdSql.execute( - f'create table {stbname} (ts timestamp,c0 int) tags(t0 int) ') - tdSql.execute( - f'create table {tbname} using {stbname} tags(1) comment "{comment}"') - def __create_normaltb(self,dbname,tbname,comment): - tdSql.execute(f'create database if not exists {dbname}') - tdSql.execute(f'use {dbname}') - tdSql.execute( - f'create table {tbname} (ts timestamp,c0 int) comment "{comment}"') - - def check_comment(self): - dbname = self.get_long_name(length=10, mode="letters") - ntbname = self.get_long_name(length=5, mode="letters") - - # create normal table with comment - comment = self.get_long_name(length=10, mode="letters") - self.__create_normaltb(dbname,ntbname,comment) - ntb_kv_list = tdSql.getResult("show tables") - print(ntb_kv_list) - tdSql.checkEqual(ntb_kv_list[0][8], comment) - tdSql.error('alter table {ntbname} comment "test1"') - tdSql.execute(f'drop database {dbname}') - - # max length(1024) - comment = self.get_long_name(length=1024, mode="letters") - self.__create_normaltb(dbname,ntbname,comment) - ntb_kv_list = tdSql.getResult("show tables") - tdSql.checkEqual(ntb_kv_list[0][8], comment) - tdSql.execute(f'drop database {dbname}') - - # error overlength - comment = self.get_long_name(length=1025, mode="letters") - tdSql.execute(f'create database if not exists {dbname}') - tdSql.execute(f'use {dbname}') - tdSql.error(f"create table ntb (ts timestamp,c0 int) comment '{comment}'") - tdSql.execute(f'drop database {dbname}') - - # create child table with comment - comment = self.get_long_name(length=10, mode="letters") - stbname = self.get_long_name(length=5, mode="letters") - tbname = self.get_long_name(length=3, mode="letters") - self.__create_tb(dbname,stbname,tbname,comment) - ntb_kv_list = tdSql.getResult("show tables") - tdSql.checkEqual(ntb_kv_list[0][8], comment) - tdSql.error(f'alter table {tbname} comment "test1"') - tdSql.execute(f'drop database {dbname}') - - # max length 1024 - comment = self.get_long_name(length=1024, mode="letters") - self.__create_tb(dbname,ntbname,comment) - ntb_kv_list = tdSql.getResult("show tables") - tdSql.checkEqual(ntb_kv_list[0][8], comment) - tdSql.execute(f'drop database {dbname}') - - # error overlength - comment = self.get_long_name(length=1025, mode="letters") - tdSql.execute(f'create database if not exists {dbname}') - tdSql.execute(f'use {dbname}') - tdSql.execute(f"create table stb (ts timestamp,c0 int) tags(t0 int)") - tdSql.error(f'create table stb_1 us stb tags(1) comment "{comment}"') - tdSql.execute(f'drop database {dbname}') - - def run(self): - self.check_comment() - - def stop(self): - tdSql.close() - tdLog.success("%s successfully executed" % __file__) - -tdCases.addWindows(__file__, TDTestCase()) -tdCases.addLinux(__file__, TDTestCase()) \ No newline at end of file diff --git a/tests/system-test/1-insert/table_comment.py b/tests/system-test/1-insert/table_comment.py new file mode 100644 index 0000000000..1e999c5f00 --- /dev/null +++ b/tests/system-test/1-insert/table_comment.py @@ -0,0 +1,135 @@ +################################################################### +# Copyright (c) 2016 by TAOS Technologies, Inc. +# All rights reserved. +# +# This file is proprietary and confidential to TAOS Technologies. +# No part of this file may be reproduced, stored, transmitted, +# disclosed or used in any form or by any means other than as +# expressly provided by the written permission from Jianhui Tao +# +################################################################### + +# -*- coding: utf-8 -*- + +import random +import string + +from tomlkit import comment +from util.log import * +from util.cases import * +from util.sql import * +from util.common import * + +class TDTestCase: + def init(self, conn, logSql): + tdLog.debug("start to execute %s" % __file__) + tdSql.init(conn.cursor()) + # prepare data + self.ntbname = 'ntb' + self.stbname = 'stb' + self.column_dict = { + 'ts':'timestamp', + 'c1':'int', + 'c2':'float', + 'c3':'double', + 'c4':'timestamp' + } + self.tag_dict = { + 't0':'int' + } + self.comment_length = [0,1024] + self.error_comment_length = [1025] + self.table_type_list = ['normal_table','stable','child_table'] + self.comment_flag_list = [True,False] + + def __set_and_alter_comment(self,tb_type='',comment_flag= False): + + column_sql = '' + tag_sql = '' + for k,v in self.column_dict.items(): + column_sql += f"{k} {v}," + for k,v in self.tag_dict.items(): + tag_sql += f"{k} {v}," + if tb_type == 'normal_table' or tb_type == '': + if comment_flag == False: + tdSql.execute(f'create table {self.ntbname} ({column_sql[:-1]})') + self.check_comment_info() + self.alter_comment(self.ntbname) + tdSql.execute(f'drop table {self.ntbname}') + elif comment_flag == True: + for i in self.comment_length: + comment_info = tdCom.getLongName(i) + tdSql.execute(f'create table {self.ntbname} ({column_sql[:-1]}) comment "{comment_info}"') + self.check_comment_info(comment_info) + self.alter_comment(self.ntbname) + tdSql.execute(f'drop table {self.ntbname}') + for i in self.error_comment_length: + comment_info = tdCom.getLongName(i) + tdSql.error(f'create table {self.ntbname} ({column_sql[:-1]}) comment "{comment_info}"') + elif tb_type == 'stable': + for operation in ['table','stable']: + if comment_flag == False: + tdSql.execute(f'create {operation} {self.stbname} ({column_sql[:-1]}) tags({tag_sql[:-1]})') + self.check_comment_info(None,'stable') + self.alter_comment(self.stbname,'stable') + tdSql.execute(f'drop table {self.stbname}') + elif comment_flag == True: + for i in self.comment_length: + comment_info = tdCom.getLongName(i) + tdSql.execute(f'create {operation} {self.stbname} ({column_sql[:-1]}) tags({tag_sql[:-1]}) comment "{comment_info}"') + self.check_comment_info(comment_info,'stable') + self.alter_comment(self.stbname,'stable') + tdSql.execute(f'drop table {self.stbname}') + elif tb_type == 'child_table': + tdSql.execute(f'create table if not exists {self.stbname} ({column_sql[:-1]}) tags({tag_sql[:-1]})') + if comment_flag == False: + tdSql.execute(f'create table if not exists {self.stbname}_ctb using {self.stbname} tags(1)') + self.check_comment_info() + self.alter_comment(f'{self.stbname}_ctb') + tdSql.execute(f'drop table {self.stbname}_ctb') + elif comment_flag == True: + for j in self.comment_length: + comment_info = tdCom.getLongName(j) + tdSql.execute(f'create table if not exists {self.stbname}_ctb using {self.stbname} tags(1) comment "{comment_info}"') + self.check_comment_info(comment_info) + self.alter_comment(f'{self.stbname}_ctb') + tdSql.execute(f'drop table {self.stbname}_ctb') + tdSql.execute(f'drop table {self.stbname}') + def alter_comment(self,tbname,tb_type=''): + for i in self.comment_length: + comment_info = tdCom.getLongName(i) + print(comment_info) + tdSql.execute(f'alter table {tbname} comment "{comment_info}"') + self.check_comment_info(comment_info,tb_type) + for i in self.error_comment_length: + comment_info = tdCom.getLongName(i) + tdSql.error(f'alter table {tbname} comment "{comment_info}"') + def check_comment_info(self,comment_info=None,tb_type=''): + if tb_type == '' or tb_type == 'normal_table' or tb_type == 'child_table': + tdSql.query('show tables') + if comment_info == None: + tdSql.checkEqual(tdSql.queryResult[0][8],None) + else : + tdSql.checkEqual(tdSql.queryResult[0][8],comment_info) + elif tb_type == 'stable': + tdSql.query('show stables') + if comment_info == None: + tdSql.checkEqual(tdSql.queryResult[0][6],None) + else : + tdSql.checkEqual(tdSql.queryResult[0][6],comment_info) + def comment_check_case(self,table_type,comment_flag): + tdSql.prepare() + for tb in table_type: + for flag in comment_flag: + self.__set_and_alter_comment(tb,flag) + tdSql.execute('drop database db') + + def run(self): + self.comment_check_case(self.table_type_list,self.comment_flag_list) + + def stop(self): + tdSql.close() + tdLog.success("%s successfully executed" % __file__) + +tdCases.addWindows(__file__, TDTestCase()) +tdCases.addLinux(__file__, TDTestCase()) \ No newline at end of file From 0ad4c6ca6d4ec87baea7ded72c471deb3bf7162c Mon Sep 17 00:00:00 2001 From: Ganlin Zhao Date: Mon, 20 Jun 2022 13:33:44 +0800 Subject: [PATCH 12/40] remove unused code --- source/libs/function/src/builtinsimpl.c | 51 +------------------------ 1 file changed, 1 insertion(+), 50 deletions(-) diff --git a/source/libs/function/src/builtinsimpl.c b/source/libs/function/src/builtinsimpl.c index 6e25655d3f..806b2a049d 100644 --- a/source/libs/function/src/builtinsimpl.c +++ b/source/libs/function/src/builtinsimpl.c @@ -67,8 +67,7 @@ typedef struct STopBotResItem { typedef struct STopBotRes { int32_t maxSize; - int16_t type; // store the original input type, used in merge function - int32_t numOfItems; + int16_t type; STopBotResItem* pItems; } STopBotRes; @@ -2944,50 +2943,6 @@ int32_t bottomFunction(SqlFunctionCtx* pCtx) { return TSDB_CODE_SUCCESS; } -static void topBotTransferInfo(SqlFunctionCtx* pCtx, STopBotRes* pInput, bool isTopQuery) { - for (int32_t i = 0; i < pInput->numOfItems; i++) { - addResult(pCtx, &pInput->pItems[i], pInput->type, isTopQuery); - } -} - -int32_t topFunctionMerge(SqlFunctionCtx* pCtx) { - SResultRowEntryInfo* pEntryInfo = GET_RES_INFO(pCtx); - SInputColumnInfoData* pInput = &pCtx->input; - SColumnInfoData* pCol = pInput->pData[0]; - ASSERT(pCol->info.type == TSDB_DATA_TYPE_BINARY); - - int32_t start = pInput->startRowIndex; - char* data = colDataGetData(pCol, start); - STopBotRes* pInputInfo = (STopBotRes*)varDataVal(data); - STopBotRes* pInfo = GET_ROWCELL_INTERBUF(GET_RES_INFO(pCtx)); - - pInfo->maxSize = pInputInfo->maxSize; - pInfo->type = pInputInfo->type; - topBotTransferInfo(pCtx, pInputInfo, true); - SET_VAL(GET_RES_INFO(pCtx), pEntryInfo->numOfRes, pEntryInfo->numOfRes); - - return TSDB_CODE_SUCCESS; -} - -int32_t bottomFunctionMerge(SqlFunctionCtx* pCtx) { - SResultRowEntryInfo* pEntryInfo = GET_RES_INFO(pCtx); - SInputColumnInfoData* pInput = &pCtx->input; - SColumnInfoData* pCol = pInput->pData[0]; - ASSERT(pCol->info.type == TSDB_DATA_TYPE_BINARY); - - int32_t start = pInput->startRowIndex; - char* data = colDataGetData(pCol, start); - STopBotRes* pInputInfo = (STopBotRes*)varDataVal(data); - STopBotRes* pInfo = GET_ROWCELL_INTERBUF(GET_RES_INFO(pCtx)); - - pInfo->maxSize = pInputInfo->maxSize; - pInfo->type = pInputInfo->type; - topBotTransferInfo(pCtx, pInputInfo, false); - SET_VAL(GET_RES_INFO(pCtx), pEntryInfo->numOfRes, pEntryInfo->numOfRes); - - return TSDB_CODE_SUCCESS; -} - static int32_t topBotResComparFn(const void* p1, const void* p2, const void* param) { uint16_t type = *(uint16_t*)param; @@ -3036,8 +2991,6 @@ void doAddIntoResult(SqlFunctionCtx* pCtx, void* pData, int32_t rowIndex, SSData // allocate the buffer and keep the data of this row into the new allocated buffer pEntryInfo->numOfRes++; - // accumulate number of items for each vgroup, this info is needed for merge - pRes->numOfItems++; taosheapsort((void*)pItems, sizeof(STopBotResItem), pEntryInfo->numOfRes, (const void*)&type, topBotResComparFn, !isTopQuery); } else { // replace the minimum value in the result @@ -3177,8 +3130,6 @@ void addResult(SqlFunctionCtx* pCtx, STopBotResItem* pSourceItem, int16_t type, pItem->tuplePos.pageId = -1; replaceTupleData(&pItem->tuplePos, &pSourceItem->tuplePos); pEntryInfo->numOfRes++; - // accumulate number of items for each vgroup, this info is needed for merge - pRes->numOfItems++; taosheapsort((void*)pItems, sizeof(STopBotResItem), pEntryInfo->numOfRes, (const void*)&type, topBotResComparFn, !isTopQuery); } else { // replace the minimum value in the result From b086526d91a98310ad7237bb06459d5114b8b016 Mon Sep 17 00:00:00 2001 From: jiacy-jcy Date: Mon, 20 Jun 2022 13:35:20 +0800 Subject: [PATCH 13/40] add test case into ci --- tests/system-test/fulltest.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/system-test/fulltest.sh b/tests/system-test/fulltest.sh index 41004cc5a2..4d670fad9b 100755 --- a/tests/system-test/fulltest.sh +++ b/tests/system-test/fulltest.sh @@ -22,7 +22,7 @@ python3 ./test.py -f 1-insert/opentsdb_json_taosc_insert.py python3 ./test.py -f 1-insert/alter_stable.py python3 ./test.py -f 1-insert/alter_table.py python3 ./test.py -f 1-insert/insertWithMoreVgroup.py -# python3 ./test.py -f 1-inerst/create_table_comment.py +python3 ./test.py -f 1-inerst/table_comment.py python3 ./test.py -f 2-query/between.py python3 ./test.py -f 2-query/distinct.py python3 ./test.py -f 2-query/varchar.py From 1e65fb0a404e2a40a8c36943129226e6b1cf3dec Mon Sep 17 00:00:00 2001 From: Shengliang Guan Date: Mon, 20 Jun 2022 13:37:49 +0800 Subject: [PATCH 14/40] test: adjust case --- .../system-test/6-cluster/5dnode3mnodeStop.py | 38 +++++++++---------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/tests/system-test/6-cluster/5dnode3mnodeStop.py b/tests/system-test/6-cluster/5dnode3mnodeStop.py index e11f819263..654b27bfc0 100644 --- a/tests/system-test/6-cluster/5dnode3mnodeStop.py +++ b/tests/system-test/6-cluster/5dnode3mnodeStop.py @@ -99,12 +99,12 @@ class TDTestCase: # create cluster for dnode in self.TDDnodes.dnodes[1:]: - # print(dnode.cfgDict) + # tdLog.debug(dnode.cfgDict) dnode_id = dnode.cfgDict["fqdn"] + ":" +dnode.cfgDict["serverPort"] dnode_first_host = dnode.cfgDict["firstEp"].split(":")[0] dnode_first_port = dnode.cfgDict["firstEp"].split(":")[-1] cmd = f" taos -h {dnode_first_host} -P {dnode_first_port} -s ' create dnode \"{dnode_id} \" ' ;" - print(cmd) + tdLog.debug(cmd) os.system(cmd) time.sleep(2) @@ -116,25 +116,25 @@ class TDTestCase: time.sleep(1) tdSql.query("show mnodes;") if tdSql.checkRows(3) : - print("mnode is three nodes") + tdLog.debug("mnode is three nodes") if tdSql.queryResult[0][2]=='leader' : if tdSql.queryResult[1][2]=='follower': if tdSql.queryResult[2][2]=='follower': - print("three mnodes is ready in 10s") + tdLog.debug("three mnodes is ready in 10s") break elif tdSql.queryResult[0][2]=='follower' : if tdSql.queryResult[1][2]=='leader': if tdSql.queryResult[2][2]=='follower': - print("three mnodes is ready in 10s") + tdLog.debug("three mnodes is ready in 10s") break elif tdSql.queryResult[0][2]=='follower' : if tdSql.queryResult[1][2]=='follower': if tdSql.queryResult[2][2]=='leader': - print("three mnodes is ready in 10s") + tdLog.debug("three mnodes is ready in 10s") break count+=1 else: - print("three mnodes is not ready in 10s ") + tdLog.debug("three mnodes is not ready in 10s ") return -1 tdSql.query("show mnodes;") @@ -152,19 +152,19 @@ class TDTestCase: time.sleep(1) tdSql.query("show mnodes;") if tdSql.checkRows(3) : - print("mnode is three nodes") + tdLog.debug("mnode is three nodes") if tdSql.queryResult[0][2]=='offline' : if tdSql.queryResult[1][2]=='leader': if tdSql.queryResult[2][2]=='follower': - print("stop mnodes on dnode 2 successfully in 10s") + tdLog.debug("stop mnodes on dnode 2 successfully in 10s") break elif tdSql.queryResult[1][2]=='follower': if tdSql.queryResult[2][2]=='leader': - print("stop mnodes on dnode 2 successfully in 10s") + tdLog.debug("stop mnodes on dnode 2 successfully in 10s") break count+=1 else: - print("stop mnodes on dnode 2 failed in 10s ") + tdLog.debug("stop mnodes on dnode 2 failed in 10s ") return -1 tdSql.error("drop mnode on dnode 1;") @@ -184,15 +184,15 @@ class TDTestCase: time.sleep(1) tdSql.query("show mnodes;") if tdSql.checkRows(3) : - print("mnode is three nodes") + tdLog.debug("mnode is three nodes") if tdSql.queryResult[0][2]=='leader' : if tdSql.queryResult[1][2]=='offline': if tdSql.queryResult[2][2]=='follower': - print("stop mnodes on dnode 2 successfully in 10s") + tdLog.debug("stop mnodes on dnode 2 successfully in 10s") break count+=1 else: - print("stop mnodes on dnode 2 failed in 10s ") + tdLog.debug("stop mnodes on dnode 2 failed in 10s ") return -1 tdSql.error("drop mnode on dnode 2;") @@ -214,15 +214,15 @@ class TDTestCase: time.sleep(1) tdSql.query("show mnodes;") if tdSql.checkRows(3) : - print("mnode is three nodes") + tdLog.debug("mnode is three nodes") if tdSql.queryResult[0][2]=='leader' : if tdSql.queryResult[2][2]=='offline': if tdSql.queryResult[1][2]=='follower': - print("stop mnodes on dnode 3 successfully in 10s") + tdLog.debug("stop mnodes on dnode 3 successfully in 10s") break count+=1 else: - print("stop mnodes on dnode 3 failed in 10s") + tdLog.debug("stop mnodes on dnode 3 failed in 10s") return -1 tdSql.error("drop mnode on dnode 3;") tdSql.query("show mnodes;") @@ -262,7 +262,7 @@ class TDTestCase: tdSql.error("create mnode on dnode 2") tdSql.query("show dnodes;") - print(tdSql.queryResult) + tdLog.debug(tdSql.queryResult) tdLog.debug("stop and follower of mnode") self.TDDnodes.stoptaosd(2) @@ -303,7 +303,7 @@ class TDTestCase: def run(self): - # print(self.master_dnode.cfgDict) + # tdLog.debug(self.master_dnode.cfgDict) self.buildcluster(5) self.five_dnode_three_mnode(5) From 987a194aa50e7fc5dab325dab99311fd096df93b Mon Sep 17 00:00:00 2001 From: Shengliang Guan Date: Mon, 20 Jun 2022 13:47:32 +0800 Subject: [PATCH 15/40] test: enable 5dnode3mnodeStop.py --- tests/system-test/fulltest.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/system-test/fulltest.sh b/tests/system-test/fulltest.sh index 41004cc5a2..a7e04ad37c 100755 --- a/tests/system-test/fulltest.sh +++ b/tests/system-test/fulltest.sh @@ -108,7 +108,7 @@ python3 ./test.py -f 2-query/distribute_agg_apercentile.py python3 ./test.py -f 6-cluster/5dnode1mnode.py python3 ./test.py -f 6-cluster/5dnode2mnode.py -#python3 ./test.py -f 6-cluster/5dnode3mnodeStop.py +python3 ./test.py -f 6-cluster/5dnode3mnodeStop.py #python3 ./test.py -f 6-cluster/5dnode3mnodeDrop.py # BUG python3 ./test.py -f 6-cluster/5dnode3mnodeStopInsert.py From 8777afacb1eeaa2735bc4194037fa3414d0463fc Mon Sep 17 00:00:00 2001 From: jiacy-jcy Date: Mon, 20 Jun 2022 14:00:15 +0800 Subject: [PATCH 16/40] modify fulltest.sh --- tests/system-test/fulltest.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/system-test/fulltest.sh b/tests/system-test/fulltest.sh index 4d670fad9b..db3f7216ab 100755 --- a/tests/system-test/fulltest.sh +++ b/tests/system-test/fulltest.sh @@ -22,7 +22,7 @@ python3 ./test.py -f 1-insert/opentsdb_json_taosc_insert.py python3 ./test.py -f 1-insert/alter_stable.py python3 ./test.py -f 1-insert/alter_table.py python3 ./test.py -f 1-insert/insertWithMoreVgroup.py -python3 ./test.py -f 1-inerst/table_comment.py +python3 ./test.py -f 1-insert/table_comment.py python3 ./test.py -f 2-query/between.py python3 ./test.py -f 2-query/distinct.py python3 ./test.py -f 2-query/varchar.py From 580fb571e1afbfa5662b4bd34daed7b1b6171a33 Mon Sep 17 00:00:00 2001 From: jiacy-jcy Date: Mon, 20 Jun 2022 14:11:29 +0800 Subject: [PATCH 17/40] modify import packages --- tests/system-test/1-insert/table_comment.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/system-test/1-insert/table_comment.py b/tests/system-test/1-insert/table_comment.py index 1e999c5f00..5b85a3964f 100644 --- a/tests/system-test/1-insert/table_comment.py +++ b/tests/system-test/1-insert/table_comment.py @@ -14,7 +14,6 @@ import random import string -from tomlkit import comment from util.log import * from util.cases import * from util.sql import * From 056301fb7bf38c7a248b553a4d0087d16b3c6cfd Mon Sep 17 00:00:00 2001 From: shenglian zhou Date: Mon, 20 Jun 2022 14:22:50 +0800 Subject: [PATCH 18/40] fix: disable eliminate projection when repeat proj column name --- source/libs/executor/src/executil.c | 13 +++++-------- source/libs/planner/src/planOptimizer.c | 16 +++++++++++++++- source/libs/qworker/src/qworker.c | 2 +- 3 files changed, 21 insertions(+), 10 deletions(-) diff --git a/source/libs/executor/src/executil.c b/source/libs/executor/src/executil.c index 77a18028b4..b493f3222e 100644 --- a/source/libs/executor/src/executil.c +++ b/source/libs/executor/src/executil.c @@ -235,7 +235,7 @@ int32_t getTableList(void* metaHandle, SScanPhysiNode* pScanNode, STableListInfo terrno = code; return code; } else { - qDebug("sucess to get tableIds, size: %d, suid: %" PRIu64 "", (int)taosArrayGetSize(res), tableUid); + qDebug("success to get tableIds, size: %d, suid: %" PRIu64 "", (int)taosArrayGetSize(res), tableUid); } for (int i = 0; i < taosArrayGetSize(res); i++) { @@ -319,12 +319,10 @@ SArray* extractColMatchInfo(SNodeList* pNodeList, SDataBlockDescNode* pOutputNod continue; } - bool foundSource = false; SColMatchInfo* info = NULL; for (int32_t j = 0; j < taosArrayGetSize(pList); ++j) { info = taosArrayGet(pList, j); if (info->targetSlotId == pNode->slotId) { - foundSource = true; break; } } @@ -332,7 +330,6 @@ SArray* extractColMatchInfo(SNodeList* pNodeList, SDataBlockDescNode* pOutputNod if (pNode->output) { (*numOfOutputCols) += 1; } else { - ASSERT(foundSource); info->output = false; } } @@ -595,10 +592,10 @@ void relocateColumnData(SSDataBlock* pBlock, const SArray* pColMatchInfo, SArray while (i < numOfSrcCols && j < taosArrayGetSize(pColMatchInfo)) { SColumnInfoData* p = taosArrayGet(pCols, i); SColMatchInfo* pmInfo = taosArrayGet(pColMatchInfo, j); -// if (!pmInfo->output) { -// j++; -// continue; -// } + if (!pmInfo->output) { + j++; + continue; + } if (p->info.colId == pmInfo->colId) { SColumnInfoData* pDst = taosArrayGet(pBlock->pDataBlock, pmInfo->targetSlotId); diff --git a/source/libs/planner/src/planOptimizer.c b/source/libs/planner/src/planOptimizer.c index be6186f891..02c1779224 100644 --- a/source/libs/planner/src/planOptimizer.c +++ b/source/libs/planner/src/planOptimizer.c @@ -1092,14 +1092,27 @@ static bool eliminateProjOptMayBeOptimized(SLogicNode* pNode) { return false; } + SHashObj* pProjColNameHash = taosHashInit(16, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), true, HASH_NO_LOCK); SNode* pProjection; FOREACH(pProjection, pProjectNode->pProjections) { SExprNode* pExprNode = (SExprNode*)pProjection; if (QUERY_NODE_COLUMN != nodeType(pExprNode)) { + taosHashCleanup(pProjColNameHash); return false; } + + char* projColumnName = ((SColumnNode*)pProjection)->colName; + int32_t* pExist = taosHashGet(pProjColNameHash, projColumnName, strlen(projColumnName)); + if (NULL != pExist) { + taosHashCleanup(pProjColNameHash); + return false; + } else { + int32_t exist = 1; + taosHashPut(pProjColNameHash, projColumnName, strlen(projColumnName), &exist, sizeof(exist)); + } } + taosHashCleanup(pProjColNameHash); return true; } @@ -1110,13 +1123,14 @@ static int32_t eliminateProjOptimizeImpl(SOptimizeContext* pCxt, SLogicSubplan* SNode* pProjection = NULL; FOREACH(pProjection, pProjectNode->pProjections) { SColumnNode* projColumn = (SColumnNode*)pProjection; + char* projColumnName = projColumn->colName; SNode* pChildTarget = NULL; FOREACH(pChildTarget, pChild->pTargets) { SExprNode* childExpr = (SExprNode*)pChildTarget; - char* projColumnName = projColumn->colName; if (QUERY_NODE_COLUMN == nodeType(childExpr) && strcmp(projColumnName, ((SColumnNode*)childExpr)->colName) == 0 || strcmp(projColumnName, childExpr->aliasName) == 0) { nodesListAppend(pNewChildTargets, nodesCloneNode(pChildTarget)); + break; } } } diff --git a/source/libs/qworker/src/qworker.c b/source/libs/qworker/src/qworker.c index 800cc4c6e5..9161f4456a 100644 --- a/source/libs/qworker/src/qworker.c +++ b/source/libs/qworker/src/qworker.c @@ -526,7 +526,7 @@ int32_t qwProcessQuery(QW_FPARAMS_DEF, SQWMsg *qwMsg, int8_t taskType, int8_t ex atomic_store_8(&ctx->taskType, taskType); atomic_store_8(&ctx->explain, explain); - /*QW_TASK_DLOGL("subplan json string, len:%d, %s", qwMsg->msgLen, qwMsg->msg);*/ + QW_TASK_DLOGL("subplan json string, len:%d, %s", qwMsg->msgLen, qwMsg->msg); code = qStringToSubplan(qwMsg->msg, &plan); if (TSDB_CODE_SUCCESS != code) { From 53ef66961a40d06dbbfb9916d0463891916d80c6 Mon Sep 17 00:00:00 2001 From: Liu Jicong Date: Mon, 20 Jun 2022 14:29:18 +0800 Subject: [PATCH 19/40] feat(stream): support snode --- examples/c/stream_demo.c | 13 +- include/common/tmsgcb.h | 1 - include/dnode/snode/snode.h | 6 +- include/libs/planner/planner.h | 1 - include/libs/stream/tstream.h | 5 +- source/dnode/mgmt/mgmt_snode/src/smHandle.c | 3 + source/dnode/mgmt/mgmt_snode/src/smWorker.c | 8 +- source/dnode/mgmt/mgmt_vnode/src/vmWorker.c | 9 - source/dnode/mnode/impl/src/mndScheduler.c | 9 +- source/dnode/snode/inc/sndInt.h | 1 - source/dnode/snode/src/snode.c | 175 ++++++++++++++++---- source/dnode/vnode/src/tq/tq.c | 52 +++--- source/libs/executor/src/executor.c | 2 +- source/libs/executor/src/executorMain.c | 84 +++++----- source/libs/executor/src/executorimpl.c | 10 +- source/libs/executor/src/scanoperator.c | 120 +++++++------- source/libs/stream/src/streamTask.c | 4 +- 17 files changed, 306 insertions(+), 197 deletions(-) diff --git a/examples/c/stream_demo.c b/examples/c/stream_demo.c index 5a141867e7..6d341c61c7 100644 --- a/examples/c/stream_demo.c +++ b/examples/c/stream_demo.c @@ -88,9 +88,9 @@ int32_t create_stream() { /*const char* sql = "select min(k), max(k), sum(k) as sum_of_k from st1";*/ /*const char* sql = "select sum(k) from tu1 interval(10m)";*/ /*pRes = tmq_create_stream(pConn, "stream1", "out1", sql);*/ - pRes = taos_query(pConn, - "create stream stream1 trigger at_once into abc2.outstb as select _wstartts, sum(k) from st1 " - "partition by tbname interval(10m) "); + pRes = taos_query( + pConn, + "create stream stream1 trigger at_once into abc1.outstb as select _wstartts, sum(k) from st1 interval(10m) "); if (taos_errno(pRes) != 0) { printf("failed to create stream stream1, reason:%s\n", taos_errstr(pRes)); return -1; @@ -107,11 +107,4 @@ int main(int argc, char* argv[]) { code = init_env(); } create_stream(); -#if 0 - tmq_t* tmq = build_consumer(); - tmq_list_t* topic_list = build_topic_list(); - /*perf_loop(tmq, topic_list);*/ - /*basic_consume_loop(tmq, topic_list);*/ - sync_consume_loop(tmq, topic_list); -#endif } diff --git a/include/common/tmsgcb.h b/include/common/tmsgcb.h index e99377f9b4..b56f755266 100644 --- a/include/common/tmsgcb.h +++ b/include/common/tmsgcb.h @@ -34,7 +34,6 @@ typedef enum { WRITE_QUEUE, APPLY_QUEUE, SYNC_QUEUE, - MERGE_QUEUE, QUEUE_MAX, } EQueueType; diff --git a/include/dnode/snode/snode.h b/include/dnode/snode/snode.h index 611bff49f1..3d0ef2e052 100644 --- a/include/dnode/snode/snode.h +++ b/include/dnode/snode/snode.h @@ -16,8 +16,8 @@ #ifndef _TD_SNODE_H_ #define _TD_SNODE_H_ -#include "tmsgcb.h" #include "tmsg.h" +#include "tmsgcb.h" #include "trpc.h" #ifdef __cplusplus @@ -68,8 +68,8 @@ int32_t sndGetLoad(SSnode *pSnode, SSnodeLoad *pLoad); * @param pMsg The request message * @param pRsp The response message */ -void sndProcessUMsg(SSnode *pSnode, SRpcMsg *pMsg); -void sndProcessSMsg(SSnode *pSnode, SRpcMsg *pMsg); +int32_t sndProcessUMsg(SSnode *pSnode, SRpcMsg *pMsg); +int32_t sndProcessSMsg(SSnode *pSnode, SRpcMsg *pMsg); #ifdef __cplusplus } diff --git a/include/libs/planner/planner.h b/include/libs/planner/planner.h index 8ed95b6010..c4f71e57a8 100644 --- a/include/libs/planner/planner.h +++ b/include/libs/planner/planner.h @@ -36,7 +36,6 @@ typedef struct SPlanContext { int64_t watermark; char* pMsg; int32_t msgLen; - // double filesFactor; } SPlanContext; // Create the physical plan for the query, according to the AST. diff --git a/include/libs/stream/tstream.h b/include/libs/stream/tstream.h index 2c9d66a828..2b3a1f2650 100644 --- a/include/libs/stream/tstream.h +++ b/include/libs/stream/tstream.h @@ -152,7 +152,7 @@ void* streamDataBlockDecode(const void* buf, SStreamDataBlock* pInput); typedef struct { char* qmsg; // followings are not applicable to encoder and decoder - void* inputHandle; + // void* inputHandle; void* executor; } STaskExec; @@ -240,12 +240,13 @@ struct SStreamTask { int8_t inputType; int8_t status; - int8_t sourceType; int8_t execType; int8_t sinkType; int8_t dispatchType; int16_t dispatchMsgType; + int8_t dataScan; + // node info int32_t childId; int32_t nodeId; diff --git a/source/dnode/mgmt/mgmt_snode/src/smHandle.c b/source/dnode/mgmt/mgmt_snode/src/smHandle.c index 66ab627e32..52a69f95b4 100644 --- a/source/dnode/mgmt/mgmt_snode/src/smHandle.c +++ b/source/dnode/mgmt/mgmt_snode/src/smHandle.c @@ -95,9 +95,12 @@ SArray *smGetMsgHandles() { if (dmSetMgmtHandle(pArray, TDMT_MON_SM_INFO, smPutNodeMsgToMonitorQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_DEPLOY, smPutNodeMsgToMgmtQueue, 1) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_VND_STREAM_TASK_DROP, smPutNodeMsgToMgmtQueue, 1) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_RUN, smPutNodeMsgToMgmtQueue, 1) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_DISPATCH, smPutNodeMsgToMgmtQueue, 1) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_DISPATCH_RSP, smPutNodeMsgToMgmtQueue, 1) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_RECOVER, smPutNodeMsgToMgmtQueue, 1) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_RECOVER_RSP, smPutNodeMsgToMgmtQueue, 1) == NULL) goto _OVER; code = 0; _OVER: diff --git a/source/dnode/mgmt/mgmt_snode/src/smWorker.c b/source/dnode/mgmt/mgmt_snode/src/smWorker.c index 34a205232e..8d93ddd66c 100644 --- a/source/dnode/mgmt/mgmt_snode/src/smWorker.c +++ b/source/dnode/mgmt/mgmt_snode/src/smWorker.c @@ -55,7 +55,9 @@ static void smProcessUniqueQueue(SQueueInfo *pInfo, STaosQall *qall, int32_t num taosGetQitem(qall, (void **)&pMsg); dTrace("msg:%p, get from snode-unique queue", pMsg); - sndProcessUMsg(pMgmt->pSnode, pMsg); + if (sndProcessUMsg(pMgmt->pSnode, pMsg) < 0) { + ASSERT(0); + } dTrace("msg:%p, is freed", pMsg); rpcFreeCont(pMsg->pCont); @@ -67,7 +69,9 @@ static void smProcessSharedQueue(SQueueInfo *pInfo, SRpcMsg *pMsg) { SSnodeMgmt *pMgmt = pInfo->ahandle; dTrace("msg:%p, get from snode-shared queue", pMsg); - sndProcessSMsg(pMgmt->pSnode, pMsg); + if (sndProcessSMsg(pMgmt->pSnode, pMsg) < 0) { + ASSERT(0); + } dTrace("msg:%p, is freed", pMsg); rpcFreeCont(pMsg->pCont); diff --git a/source/dnode/mgmt/mgmt_vnode/src/vmWorker.c b/source/dnode/mgmt/mgmt_vnode/src/vmWorker.c index 88831384d4..69a2d491ab 100644 --- a/source/dnode/mgmt/mgmt_vnode/src/vmWorker.c +++ b/source/dnode/mgmt/mgmt_vnode/src/vmWorker.c @@ -169,10 +169,6 @@ static int32_t vmPutMsgToQueue(SVnodeMgmt *pMgmt, SRpcMsg *pMsg, EQueueType qtyp dTrace("vgId:%d, msg:%p put into vnode-sync queue", pVnode->vgId, pMsg); taosWriteQitem(pVnode->pSyncQ, pMsg); break; - case MERGE_QUEUE: - dTrace("vgId:%d, msg:%p put into vnode-merge queue", pVnode->vgId, pMsg); - taosWriteQitem(pVnode->pMergeQ, pMsg); - break; case APPLY_QUEUE: dTrace("vgId:%d, msg:%p put into vnode-apply queue", pVnode->vgId, pMsg); taosWriteQitem(pVnode->pApplyQ, pMsg); @@ -195,8 +191,6 @@ int32_t vmPutMsgToQueryQueue(SVnodeMgmt *pMgmt, SRpcMsg *pMsg) { return vmPutMsg int32_t vmPutMsgToFetchQueue(SVnodeMgmt *pMgmt, SRpcMsg *pMsg) { return vmPutMsgToQueue(pMgmt, pMsg, FETCH_QUEUE); } -int32_t vmPutMsgToMergeQueue(SVnodeMgmt *pMgmt, SRpcMsg *pMsg) { return vmPutMsgToQueue(pMgmt, pMsg, MERGE_QUEUE); } - int32_t vmPutMsgToMgmtQueue(SVnodeMgmt *pMgmt, SRpcMsg *pMsg) { dTrace("msg:%p, put into vnode-mgmt queue", pMsg); taosWriteQitem(pMgmt->mgmtWorker.queue, pMsg); @@ -242,9 +236,6 @@ int32_t vmGetQueueSize(SVnodeMgmt *pMgmt, int32_t vgId, EQueueType qtype) { case FETCH_QUEUE: size = taosQueueItemSize(pVnode->pFetchQ); break; - case MERGE_QUEUE: - size = taosQueueItemSize(pVnode->pMergeQ); - break; default: break; } diff --git a/source/dnode/mnode/impl/src/mndScheduler.c b/source/dnode/mnode/impl/src/mndScheduler.c index 39bb6798aa..3ff0c39bc3 100644 --- a/source/dnode/mnode/impl/src/mndScheduler.c +++ b/source/dnode/mnode/impl/src/mndScheduler.c @@ -63,9 +63,8 @@ int32_t mndConvertRsmaTask(char** pDst, int32_t* pDstLen, const char* ast, int64 .topicQuery = false, .streamQuery = true, .rSmaQuery = true, - .triggerType = STREAM_TRIGGER_AT_ONCE, + .triggerType = triggerType, .watermark = watermark, - /*.filesFactor = filesFactor,*/ }; if (qCreateQueryPlan(&cxt, &pPlan, NULL) < 0) { @@ -270,7 +269,6 @@ int32_t mndAddShuffleSinkTasksToStream(SMnode* pMnode, STrans* pTrans, SStreamOb pTask->epSet = mndGetVgroupEpset(pMnode, pVgroup); // source - pTask->sourceType = TASK_SOURCE__MERGE; pTask->inputType = TASK_INPUT_TYPE__DATA_BLOCK; // exec @@ -316,7 +314,6 @@ int32_t mndAddFixedSinkTaskToStream(SMnode* pMnode, STrans* pTrans, SStreamObj* #endif pTask->epSet = mndGetVgroupEpset(pMnode, &pStream->fixedSinkVg); // source - pTask->sourceType = TASK_SOURCE__MERGE; pTask->inputType = TASK_INPUT_TYPE__DATA_BLOCK; // exec @@ -427,6 +424,8 @@ int32_t mndScheduleStream(SMnode* pMnode, STrans* pTrans, SStreamObj* pStream) { SStreamTask* pTask = tNewSStreamTask(pStream->uid); mndAddTaskToTaskSet(taskSourceLevel, pTask); + pTask->dataScan = 1; + // input pTask->inputType = TASK_INPUT_TYPE__SUMBIT_BLOCK; @@ -470,6 +469,8 @@ int32_t mndScheduleStream(SMnode* pMnode, STrans* pTrans, SStreamObj* pStream) { SStreamTask* pTask = tNewSStreamTask(pStream->uid); mndAddTaskToTaskSet(taskOneLevel, pTask); + pTask->dataScan = 1; + // input pTask->inputType = TASK_INPUT_TYPE__SUMBIT_BLOCK; diff --git a/source/dnode/snode/inc/sndInt.h b/source/dnode/snode/inc/sndInt.h index 2802537dcd..8916e2a31c 100644 --- a/source/dnode/snode/inc/sndInt.h +++ b/source/dnode/snode/inc/sndInt.h @@ -56,7 +56,6 @@ SStreamTask* sndMetaGetTask(SStreamMeta* pMeta, int32_t taskId); int32_t sndMetaRemoveTask(SStreamMeta* pMeta, int32_t taskId); int32_t sndDropTaskOfStream(SStreamMeta* pMeta, int64_t streamId); - int32_t sndStopTaskOfStream(SStreamMeta* pMeta, int64_t streamId); int32_t sndResumeTaskOfStream(SStreamMeta* pMeta, int64_t streamId); diff --git a/source/dnode/snode/src/snode.c b/source/dnode/snode/src/snode.c index cbbe071c5f..8ef48ccbf9 100644 --- a/source/dnode/snode/src/snode.c +++ b/source/dnode/snode/src/snode.c @@ -76,45 +76,158 @@ int32_t sndMetaRemoveTask(SStreamMeta *pMeta, int32_t taskId) { return taosHashRemove(pMeta->pHash, &taskId, sizeof(int32_t)); } -static int32_t sndProcessTaskExecReq(SSnode *pSnode, SRpcMsg *pMsg) { - /*SStreamExecMsgHead *pHead = pMsg->pCont;*/ - /*int32_t taskId = pHead->streamTaskId;*/ - /*SStreamTask *pTask = sndMetaGetTask(pSnode->pMeta, taskId);*/ - /*if (pTask == NULL) {*/ - /*return -1;*/ - /*}*/ +static int32_t sndProcessTaskDeployReq(SSnode *pNode, SRpcMsg *pMsg) { + SStreamMeta *pMeta = pNode->pMeta; + char *msg = pMsg->pCont; + int32_t msgLen = pMsg->contLen; + + SStreamTask *pTask = taosMemoryCalloc(1, sizeof(SStreamTask)); + if (pTask == NULL) { + return -1; + } + SDecoder decoder; + tDecoderInit(&decoder, (uint8_t *)msg, msgLen); + if (tDecodeSStreamTask(&decoder, pTask) < 0) { + ASSERT(0); + } + tDecoderClear(&decoder); + + pTask->status = TASK_STATUS__IDLE; + + pTask->inputQueue = streamQueueOpen(); + pTask->outputQueue = streamQueueOpen(); + pTask->inputStatus = TASK_INPUT_STATUS__NORMAL; + pTask->outputStatus = TASK_INPUT_STATUS__NORMAL; + + if (pTask->inputQueue == NULL || pTask->outputQueue == NULL) goto FAIL; + + pTask->pMsgCb = &pNode->msgCb; + + ASSERT(pTask->execType != TASK_EXEC__NONE); + + SReadHandle handle = { + .pMsgCb = &pNode->msgCb, + }; + + /*pTask->exec.inputHandle = NULL;*/ + pTask->exec.executor = qCreateStreamExecTaskInfo(pTask->exec.qmsg, &handle); + ASSERT(pTask->exec.executor); + + streamSetupTrigger(pTask); + + qInfo("deploy stream: stream id %ld task id %d child id %d on snode", pTask->streamId, pTask->taskId, pTask->childId); + + return 0; + +FAIL: + if (pTask->inputQueue) streamQueueClose(pTask->inputQueue); + if (pTask->outputQueue) streamQueueClose(pTask->outputQueue); + return -1; +} + +static int32_t sndProcessTaskRunReq(SSnode *pNode, SRpcMsg *pMsg) { + SStreamMeta *pMeta = pNode->pMeta; + SStreamTaskRunReq *pReq = pMsg->pCont; + int32_t taskId = pReq->taskId; + SStreamTask *pTask = *(SStreamTask **)taosHashGet(pMeta->pHash, &taskId, sizeof(int32_t)); + streamTaskProcessRunReq(pTask, &pNode->msgCb); return 0; } -void sndProcessUMsg(SSnode *pSnode, SRpcMsg *pMsg) { +static int32_t sndProcessTaskDispatchReq(SSnode *pNode, SRpcMsg *pMsg) { + SStreamMeta *pMeta = pNode->pMeta; + + char *msgStr = pMsg->pCont; + char *msgBody = POINTER_SHIFT(msgStr, sizeof(SMsgHead)); + int32_t msgLen = pMsg->contLen - sizeof(SMsgHead); + + SStreamDispatchReq req; + SDecoder decoder; + tDecoderInit(&decoder, msgBody, msgLen); + tDecodeStreamDispatchReq(&decoder, &req); + int32_t taskId = req.taskId; + SStreamTask *pTask = *(SStreamTask **)taosHashGet(pMeta->pHash, &taskId, sizeof(int32_t)); + SRpcMsg rsp = { + .info = pMsg->info, + .code = 0, + }; + streamProcessDispatchReq(pTask, &pNode->msgCb, &req, &rsp); + return 0; +} + +static int32_t sndProcessTaskRecoverReq(SSnode *pNode, SRpcMsg *pMsg) { + SStreamMeta *pMeta = pNode->pMeta; + + SStreamTaskRecoverReq *pReq = pMsg->pCont; + int32_t taskId = pReq->taskId; + SStreamTask *pTask = *(SStreamTask **)taosHashGet(pMeta->pHash, &taskId, sizeof(int32_t)); + streamProcessRecoverReq(pTask, &pNode->msgCb, pReq, pMsg); + return 0; +} + +static int32_t sndProcessTaskDispatchRsp(SSnode *pNode, SRpcMsg *pMsg) { + SStreamMeta *pMeta = pNode->pMeta; + + SStreamDispatchRsp *pRsp = POINTER_SHIFT(pMsg->pCont, sizeof(SMsgHead)); + int32_t taskId = pRsp->taskId; + SStreamTask *pTask = *(SStreamTask **)taosHashGet(pMeta->pHash, &taskId, sizeof(int32_t)); + streamProcessDispatchRsp(pTask, &pNode->msgCb, pRsp); + return 0; +} + +static int32_t sndProcessTaskRecoverRsp(SSnode *pNode, SRpcMsg *pMsg) { + SStreamMeta *pMeta = pNode->pMeta; + + SStreamTaskRecoverRsp *pRsp = pMsg->pCont; + int32_t taskId = pRsp->taskId; + SStreamTask *pTask = *(SStreamTask **)taosHashGet(pMeta->pHash, &taskId, sizeof(int32_t)); + streamProcessRecoverRsp(pTask, pRsp); + return 0; +} + +static int32_t sndProcessTaskDropReq(SSnode *pNode, SRpcMsg *pMsg) { + SStreamMeta *pMeta = pNode->pMeta; + + char *msg = pMsg->pCont; + int32_t msgLen = pMsg->contLen; + SVDropStreamTaskReq *pReq = (SVDropStreamTaskReq *)msg; + int32_t code = taosHashRemove(pMeta->pHash, &pReq->taskId, sizeof(int32_t)); + ASSERT(code == 0); + if (code == 0) { + // sendrsp + } + return code; +} + +int32_t sndProcessUMsg(SSnode *pSnode, SRpcMsg *pMsg) { // stream deploy // stream stop/resume // operator exec - if (pMsg->msgType == TDMT_STREAM_TASK_DEPLOY) { - void *msg = POINTER_SHIFT(pMsg->pCont, sizeof(SMsgHead)); - SStreamTask *pTask = taosMemoryMalloc(sizeof(SStreamTask)); - if (pTask == NULL) { + switch (pMsg->msgType) { + case TDMT_STREAM_TASK_DEPLOY: + return sndProcessTaskDeployReq(pSnode, pMsg); + case TDMT_VND_STREAM_TASK_DROP: + return sndProcessTaskDropReq(pSnode, pMsg); + default: ASSERT(0); - return; - } - SDecoder decoder; - tDecoderInit(&decoder, msg, pMsg->contLen - sizeof(SMsgHead)); - tDecodeSStreamTask(&decoder, pTask); - tDecoderClear(&decoder); - - sndMetaDeployTask(pSnode->pMeta, pTask); - /*} else if (pMsg->msgType == TDMT_SND_TASK_EXEC) {*/ - /*sndProcessTaskExecReq(pSnode, pMsg);*/ - } else { - ASSERT(0); } + return 0; } -void sndProcessSMsg(SSnode *pSnode, SRpcMsg *pMsg) { - // operator exec - /*if (pMsg->msgType == TDMT_SND_TASK_EXEC) {*/ - /*sndProcessTaskExecReq(pSnode, pMsg);*/ - /*} else {*/ - ASSERT(0); - /*}*/ +int32_t sndProcessSMsg(SSnode *pSnode, SRpcMsg *pMsg) { + switch (pMsg->msgType) { + case TDMT_STREAM_TASK_RUN: + return sndProcessTaskRunReq(pSnode, pMsg); + case TDMT_STREAM_TASK_DISPATCH: + return sndProcessTaskDispatchReq(pSnode, pMsg); + case TDMT_STREAM_TASK_RECOVER: + return sndProcessTaskRecoverReq(pSnode, pMsg); + case TDMT_STREAM_TASK_DISPATCH_RSP: + return sndProcessTaskDispatchRsp(pSnode, pMsg); + case TDMT_STREAM_TASK_RECOVER_RSP: + return sndProcessTaskRecoverRsp(pSnode, pMsg); + default: + ASSERT(0); + } + return 0; } diff --git a/source/dnode/vnode/src/tq/tq.c b/source/dnode/vnode/src/tq/tq.c index 70b09ec701..06a119b076 100644 --- a/source/dnode/vnode/src/tq/tq.c +++ b/source/dnode/vnode/src/tq/tq.c @@ -125,10 +125,10 @@ int32_t tqProcessOffsetCommitReq(STQ* pTq, char* msg, int32_t msgLen) { if (offset.type == TMQ_OFFSET__SNAPSHOT) { tqDebug("receive offset commit msg to %s on vg %d, offset(type:snapshot) uid: %ld, ts: %ld", offset.subKey, - pTq->pVnode->config.vgId, offset.uid, offset.ts); + TD_VID(pTq->pVnode), offset.uid, offset.ts); } else if (offset.type == TMQ_OFFSET__LOG) { tqDebug("receive offset commit msg to %s on vg %d, offset(type:log) version: %ld", offset.subKey, - pTq->pVnode->config.vgId, offset.version); + TD_VID(pTq->pVnode), offset.version); } else { ASSERT(0); } @@ -159,7 +159,7 @@ int32_t tqProcessPollReq(STQ* pTq, SRpcMsg* pMsg, int32_t workerId) { if (pOffset != NULL) { ASSERT(pOffset->type == TMQ_OFFSET__LOG); tqDebug("consumer %ld, restore offset of %s on vg %d, offset(type:log) version: %ld", consumerId, pReq->subKey, - pTq->pVnode->config.vgId, pOffset->version); + TD_VID(pTq->pVnode), pOffset->version); fetchOffset = pOffset->version + 1; } else { if (pReq->currentOffset == TMQ_CONF__RESET_OFFSET__EARLIEAST) { @@ -167,13 +167,13 @@ int32_t tqProcessPollReq(STQ* pTq, SRpcMsg* pMsg, int32_t workerId) { } else if (pReq->currentOffset == TMQ_CONF__RESET_OFFSET__LATEST) { fetchOffset = walGetCommittedVer(pTq->pWal); } else if (pReq->currentOffset == TMQ_CONF__RESET_OFFSET__NONE) { - tqError("tmq poll: no offset committed for consumer %ld in vg %d, subkey %s", consumerId, - pTq->pVnode->config.vgId, pReq->subKey); + tqError("tmq poll: no offset committed for consumer %ld in vg %d, subkey %s", consumerId, TD_VID(pTq->pVnode), + pReq->subKey); terrno = TSDB_CODE_TQ_NO_COMMITTED_OFFSET; return -1; } tqDebug("consumer %ld, restore offset of %s on vg %d failed, config is %ld, set to %ld", consumerId, pReq->subKey, - pTq->pVnode->config.vgId, pReq->currentOffset, fetchOffset); + TD_VID(pTq->pVnode), pReq->currentOffset, fetchOffset); } } @@ -183,14 +183,14 @@ int32_t tqProcessPollReq(STQ* pTq, SRpcMsg* pMsg, int32_t workerId) { STqHandle* pHandle = taosHashGet(pTq->handles, pReq->subKey, strlen(pReq->subKey)); /*ASSERT(pHandle);*/ if (pHandle == NULL) { - tqError("tmq poll: no consumer handle for consumer %ld in vg %d, subkey %s", consumerId, pTq->pVnode->config.vgId, + tqError("tmq poll: no consumer handle for consumer %ld in vg %d, subkey %s", consumerId, TD_VID(pTq->pVnode), pReq->subKey); return -1; } if (pHandle->consumerId != consumerId) { tqError("tmq poll: consumer handle mismatch for consumer %ld in vg %d, subkey %s, handle consumer id %ld", - consumerId, pTq->pVnode->config.vgId, pReq->subKey, pHandle->consumerId); + consumerId, TD_VID(pTq->pVnode), pReq->subKey, pHandle->consumerId); return -1; } @@ -304,7 +304,6 @@ int32_t tqProcessVgDeleteReq(STQ* pTq, char* msg, int32_t msgLen) { return 0; } -// TODO: persist meta into tdb int32_t tqProcessVgChangeReq(STQ* pTq, char* msg, int32_t msgLen) { SMqRebVgReq req = {0}; tDecodeSMqRebVgReq(msg, &req); @@ -346,10 +345,10 @@ int32_t tqProcessVgChangeReq(STQ* pTq, char* msg, int32_t msgLen) { pHandle->execHandle.execTb.suid = req.suid; SArray* tbUidList = taosArrayInit(0, sizeof(int64_t)); tsdbGetCtbIdList(pTq->pVnode->pMeta, req.suid, tbUidList); - tqDebug("vg %d, tq try get suid: %ld", pTq->pVnode->config.vgId, req.suid); + tqDebug("vg %d, tq try get suid: %ld", TD_VID(pTq->pVnode), req.suid); for (int32_t i = 0; i < taosArrayGetSize(tbUidList); i++) { int64_t tbUid = *(int64_t*)taosArrayGet(tbUidList, i); - tqDebug("vg %d, idx %d, uid: %ld", pTq->pVnode->config.vgId, i, tbUid); + tqDebug("vg %d, idx %d, uid: %ld", TD_VID(pTq->pVnode), i, tbUid); } for (int32_t i = 0; i < 5; i++) { tqReadHandleSetTbUidList(pHandle->execHandle.pExecReader[i], tbUidList); @@ -400,16 +399,21 @@ int32_t tqProcessTaskDeploy(STQ* pTq, char* msg, int32_t msgLen) { // exec if (pTask->execType != TASK_EXEC__NONE) { // expand runners - STqReadHandle* pStreamReader = tqInitSubmitMsgScanner(pTq->pVnode->pMeta); - SReadHandle handle = { - .reader = pStreamReader, - .meta = pTq->pVnode->pMeta, - .pMsgCb = &pTq->pVnode->msgCb, - .vnode = pTq->pVnode, - }; - pTask->exec.inputHandle = pStreamReader; - pTask->exec.executor = qCreateStreamExecTaskInfo(pTask->exec.qmsg, &handle); - ASSERT(pTask->exec.executor); + if (pTask->dataScan) { + STqReadHandle* pStreamReader = tqInitSubmitMsgScanner(pTq->pVnode->pMeta); + SReadHandle handle = { + .reader = pStreamReader, + .meta = pTq->pVnode->pMeta, + .pMsgCb = &pTq->pVnode->msgCb, + .vnode = pTq->pVnode, + }; + /*pTask->exec.inputHandle = pStreamReader;*/ + pTask->exec.executor = qCreateStreamExecTaskInfo(pTask->exec.qmsg, &handle); + ASSERT(pTask->exec.executor); + } else { + pTask->exec.executor = qCreateStreamExecTaskInfo(pTask->exec.qmsg, NULL); + ASSERT(pTask->exec.executor); + } } // sink @@ -431,7 +435,7 @@ int32_t tqProcessTaskDeploy(STQ* pTq, char* msg, int32_t msgLen) { streamSetupTrigger(pTask); - tqInfo("deploy stream task id %d child id %d on vg %d", pTask->taskId, pTask->childId, pTq->pVnode->config.vgId); + tqInfo("deploy stream task id %d child id %d on vg %d", pTask->taskId, pTask->childId, TD_VID(pTq->pVnode)); taosHashPut(pTq->pStreamTasks, &pTask->taskId, sizeof(int32_t), &pTask, sizeof(void*)); @@ -464,7 +468,7 @@ int32_t tqProcessStreamTrigger(STQ* pTq, SSubmitReq* pReq) { continue; } - if (streamLaunchByWrite(pTask, pTq->pVnode->config.vgId, &pTq->pVnode->msgCb) < 0) { + if (streamLaunchByWrite(pTask, TD_VID(pTq->pVnode), &pTq->pVnode->msgCb) < 0) { continue; } } else { @@ -534,9 +538,9 @@ int32_t tqProcessTaskRecoverRsp(STQ* pTq, SRpcMsg* pMsg) { int32_t tqProcessTaskDropReq(STQ* pTq, char* msg, int32_t msgLen) { SVDropStreamTaskReq* pReq = (SVDropStreamTaskReq*)msg; int32_t code = taosHashRemove(pTq->pStreamTasks, &pReq->taskId, sizeof(int32_t)); + ASSERT(code == 0); if (code == 0) { // sendrsp } - ASSERT(code == 0); return code; } diff --git a/source/libs/executor/src/executor.c b/source/libs/executor/src/executor.c index b1d076e8f5..c99b3c1058 100644 --- a/source/libs/executor/src/executor.c +++ b/source/libs/executor/src/executor.c @@ -99,7 +99,7 @@ int32_t qSetMultiStreamInput(qTaskInfo_t tinfo, const void* pBlocks, size_t numO } qTaskInfo_t qCreateStreamExecTaskInfo(void* msg, void* streamReadHandle) { - if (msg == NULL || streamReadHandle == NULL) { + if (msg == NULL) { return NULL; } diff --git a/source/libs/executor/src/executorMain.c b/source/libs/executor/src/executorMain.c index 00158d7024..663f50e0fc 100644 --- a/source/libs/executor/src/executorMain.c +++ b/source/libs/executor/src/executorMain.c @@ -13,10 +13,10 @@ * along with this program. If not, see . */ -#include "os.h" -#include "tref.h" #include "dataSinkMgt.h" +#include "os.h" #include "tmsg.h" +#include "tref.h" #include "tudf.h" #include "executor.h" @@ -24,15 +24,13 @@ #include "query.h" static TdThreadOnce initPoolOnce = PTHREAD_ONCE_INIT; -int32_t exchangeObjRefPool = -1; +int32_t exchangeObjRefPool = -1; -static void initRefPool() { - exchangeObjRefPool = taosOpenRef(1024, doDestroyExchangeOperatorInfo); -} +static void initRefPool() { exchangeObjRefPool = taosOpenRef(1024, doDestroyExchangeOperatorInfo); } int32_t qCreateExecTask(SReadHandle* readHandle, int32_t vgId, uint64_t taskId, SSubplan* pSubplan, qTaskInfo_t* pTaskInfo, DataSinkHandle* handle, const char* sql, EOPTR_EXEC_MODEL model) { - assert(readHandle != NULL && pSubplan != NULL); + assert(pSubplan != NULL); SExecTaskInfo** pTask = (SExecTaskInfo**)pTaskInfo; taosThreadOnce(&initPoolOnce, initRefPool); @@ -47,57 +45,57 @@ int32_t qCreateExecTask(SReadHandle* readHandle, int32_t vgId, uint64_t taskId, if (code != TSDB_CODE_SUCCESS) { goto _error; } - + if (handle) { void* pSinkParam = NULL; code = createDataSinkParam(pSubplan->pDataSink, &pSinkParam, pTaskInfo); if (code != TSDB_CODE_SUCCESS) { goto _error; } - + code = dsCreateDataSinker(pSubplan->pDataSink, handle, pSinkParam); } - _error: +_error: // if failed to add ref for all tables in this query, abort current query return code; } #ifdef TEST_IMPL // wait moment -int waitMoment(SQInfo* pQInfo){ - if(pQInfo->sql) { - int ms = 0; +int waitMoment(SQInfo* pQInfo) { + if (pQInfo->sql) { + int ms = 0; char* pcnt = strstr(pQInfo->sql, " count(*)"); - if(pcnt) return 0; - + if (pcnt) return 0; + char* pos = strstr(pQInfo->sql, " t_"); - if(pos){ + if (pos) { pos += 3; ms = atoi(pos); - while(*pos >= '0' && *pos <= '9'){ - pos ++; + while (*pos >= '0' && *pos <= '9') { + pos++; } char unit_char = *pos; - if(unit_char == 'h'){ - ms *= 3600*1000; - } else if(unit_char == 'm'){ - ms *= 60*1000; - } else if(unit_char == 's'){ + if (unit_char == 'h') { + ms *= 3600 * 1000; + } else if (unit_char == 'm') { + ms *= 60 * 1000; + } else if (unit_char == 's') { ms *= 1000; } } - if(ms == 0) return 0; + if (ms == 0) return 0; printf("test wait sleep %dms. sql=%s ...\n", ms, pQInfo->sql); - - if(ms < 1000) { + + if (ms < 1000) { taosMsleep(ms); } else { int used_ms = 0; - while(used_ms < ms) { + while (used_ms < ms) { taosMsleep(1000); used_ms += 1000; - if(isTaskKilled(pQInfo)){ + if (isTaskKilled(pQInfo)) { printf("test check query is canceled, sleep break.%s\n", pQInfo->sql); break; } @@ -108,15 +106,14 @@ int waitMoment(SQInfo* pQInfo){ } #endif -int32_t qExecTask(qTaskInfo_t tinfo, SSDataBlock** pRes, uint64_t *useconds) { +int32_t qExecTask(qTaskInfo_t tinfo, SSDataBlock** pRes, uint64_t* useconds) { SExecTaskInfo* pTaskInfo = (SExecTaskInfo*)tinfo; int64_t threadId = taosGetSelfPthreadId(); *pRes = NULL; int64_t curOwner = 0; if ((curOwner = atomic_val_compare_exchange_64(&pTaskInfo->owner, 0, threadId)) != 0) { - qError("%s-%p execTask is now executed by thread:%p", GET_TASKID(pTaskInfo), pTaskInfo, - (void*)curOwner); + qError("%s-%p execTask is now executed by thread:%p", GET_TASKID(pTaskInfo), pTaskInfo, (void*)curOwner); pTaskInfo->code = TSDB_CODE_QRY_IN_EXEC; return pTaskInfo->code; } @@ -152,18 +149,18 @@ int32_t qExecTask(qTaskInfo_t tinfo, SSDataBlock** pRes, uint64_t *useconds) { cleanUpUdfs(); - int32_t current = (*pRes != NULL)? (*pRes)->info.rows:0; + int32_t current = (*pRes != NULL) ? (*pRes)->info.rows : 0; uint64_t total = pTaskInfo->pRoot->resultInfo.totalRows; qDebug("%s task suspended, %d rows returned, total:%" PRId64 " rows, in sinkNode:%d, elapsed:%.2f ms", - GET_TASKID(pTaskInfo), current, total, 0, el/1000.0); + GET_TASKID(pTaskInfo), current, total, 0, el / 1000.0); atomic_store_64(&pTaskInfo->owner, 0); return pTaskInfo->code; } int32_t qKillTask(qTaskInfo_t qinfo) { - SExecTaskInfo *pTaskInfo = (SExecTaskInfo *)qinfo; + SExecTaskInfo* pTaskInfo = (SExecTaskInfo*)qinfo; if (pTaskInfo == NULL) { return TSDB_CODE_QRY_INVALID_QHANDLE; @@ -182,7 +179,7 @@ int32_t qKillTask(qTaskInfo_t qinfo) { } int32_t qAsyncKillTask(qTaskInfo_t qinfo) { - SExecTaskInfo *pTaskInfo = (SExecTaskInfo *)qinfo; + SExecTaskInfo* pTaskInfo = (SExecTaskInfo*)qinfo; if (pTaskInfo == NULL) { return TSDB_CODE_QRY_INVALID_QHANDLE; @@ -195,7 +192,7 @@ int32_t qAsyncKillTask(qTaskInfo_t qinfo) { } int32_t qIsTaskCompleted(qTaskInfo_t qinfo) { - SExecTaskInfo *pTaskInfo = (SExecTaskInfo *)qinfo; + SExecTaskInfo* pTaskInfo = (SExecTaskInfo*)qinfo; if (pTaskInfo == NULL) { return TSDB_CODE_QRY_INVALID_QHANDLE; @@ -205,18 +202,17 @@ int32_t qIsTaskCompleted(qTaskInfo_t qinfo) { } void qDestroyTask(qTaskInfo_t qTaskHandle) { - SExecTaskInfo* pTaskInfo = (SExecTaskInfo*) qTaskHandle; - qDebug("%s execTask completed, numOfRows:%"PRId64, GET_TASKID(pTaskInfo), pTaskInfo->pRoot->resultInfo.totalRows); + SExecTaskInfo* pTaskInfo = (SExecTaskInfo*)qTaskHandle; + qDebug("%s execTask completed, numOfRows:%" PRId64, GET_TASKID(pTaskInfo), pTaskInfo->pRoot->resultInfo.totalRows); - queryCostStatis(pTaskInfo); // print the query cost summary + queryCostStatis(pTaskInfo); // print the query cost summary doDestroyTask(pTaskInfo); } -int32_t qGetExplainExecInfo(qTaskInfo_t tinfo, int32_t *resNum, SExplainExecInfo **pRes) { - SExecTaskInfo *pTaskInfo = (SExecTaskInfo *)tinfo; - int32_t capacity = 0; +int32_t qGetExplainExecInfo(qTaskInfo_t tinfo, int32_t* resNum, SExplainExecInfo** pRes) { + SExecTaskInfo* pTaskInfo = (SExecTaskInfo*)tinfo; + int32_t capacity = 0; - return getOperatorExplainExecInfo(pTaskInfo->pRoot, pRes, &capacity, resNum); + return getOperatorExplainExecInfo(pTaskInfo->pRoot, pRes, &capacity, resNum); } - diff --git a/source/libs/executor/src/executorimpl.c b/source/libs/executor/src/executorimpl.c index b8516a3056..36291545db 100644 --- a/source/libs/executor/src/executorimpl.c +++ b/source/libs/executor/src/executorimpl.c @@ -4557,11 +4557,17 @@ SOperatorInfo* createOperatorTree(SPhysiNode* pPhyNode, SExecTaskInfo* pTaskInfo STimeWindowAggSupp twSup = { .waterMark = pTableScanNode->watermark, .calTrigger = pTableScanNode->triggerType, .maxTs = INT64_MIN}; tsdbReaderT pDataReader = NULL; + + if (pHandle) { + pDataReader = doCreateDataReader(pTableScanNode, pHandle, pTableListInfo, (uint64_t)queryId, taskId, pTagCond); + } +#if 0 if (pHandle->vnode) { pDataReader = doCreateDataReader(pTableScanNode, pHandle, pTableListInfo, (uint64_t)queryId, taskId, pTagCond); } else { getTableList(pHandle->meta, pScanPhyNode->tableType, pScanPhyNode->uid, pTableListInfo, pTagCond); } +#endif if (pDataReader == NULL && terrno != 0) { qDebug("%s pDataReader is NULL", GET_TASKID(pTaskInfo)); @@ -4894,8 +4900,8 @@ SArray* extractColumnInfo(SNodeList* pNodeList) { } SArray* extractPartitionColInfo(SNodeList* pNodeList) { - if(!pNodeList) { - return NULL; + if (!pNodeList) { + return NULL; } size_t numOfCols = LIST_LENGTH(pNodeList); diff --git a/source/libs/executor/src/scanoperator.c b/source/libs/executor/src/scanoperator.c index 2871145dcc..dbac4f0483 100644 --- a/source/libs/executor/src/scanoperator.c +++ b/source/libs/executor/src/scanoperator.c @@ -537,7 +537,7 @@ SOperatorInfo* createTableScanOperatorInfo(STableScanPhysiNode* pTableScanNode, goto _error; } - //taosSsleep(20); + // taosSsleep(20); SDataBlockDescNode* pDescNode = pTableScanNode->scan.node.pOutputDataBlockDesc; @@ -800,23 +800,23 @@ static SSDataBlock* doDataScan(SStreamBlockScanInfo* pInfo) { if (!pResult) { return NULL; } - + if (pResult->info.groupId == pInfo->groupId) { return pResult; } } -/* Todo(liuyao) for partition by column - SSDataBlock* pBlock = createOneDataBlock(pResult, true); - blockDataCleanup(pResult); - for (int32_t i = 0; i < pBlock->info.rows; i++) { - uint64_t id = getGroupId(pInfo->pOperatorDumy, pBlock, i); - if (id == pInfo->groupId) { - copyOneRow(pResult, pBlock, i); + /* Todo(liuyao) for partition by column + SSDataBlock* pBlock = createOneDataBlock(pResult, true); + blockDataCleanup(pResult); + for (int32_t i = 0; i < pBlock->info.rows; i++) { + uint64_t id = getGroupId(pInfo->pOperatorDumy, pBlock, i); + if (id == pInfo->groupId) { + copyOneRow(pResult, pBlock, i); + } } - } - return pResult; -*/ + return pResult; + */ } static void setUpdateData(SStreamBlockScanInfo* pInfo, SSDataBlock* pBlock, SSDataBlock* pUpdateBlock) { @@ -831,7 +831,7 @@ static void setUpdateData(SStreamBlockScanInfo* pInfo, SSDataBlock* pBlock, SSDa int32_t rowId = *(int32_t*)taosArrayGet(pInfo->tsArray, pInfo->tsArrayIndex); pInfo->groupId = getGroupId(pInfo->pOperatorDumy, pBlock, rowId); int32_t i = 0; - for ( ; i < size; i++) { + for (; i < size; i++) { rowId = *(int32_t*)taosArrayGet(pInfo->tsArray, i + pInfo->tsArrayIndex); uint64_t id = getGroupId(pInfo->pOperatorDumy, pBlock, rowId); if (pInfo->groupId != id) { @@ -1061,9 +1061,6 @@ SOperatorInfo* createStreamScanOperatorInfo(void* pDataReader, SReadHandle* pHan SScanPhysiNode* pScanPhyNode = &pTableScanNode->scan; SDataBlockDescNode* pDescNode = pScanPhyNode->node.pOutputDataBlockDesc; - SOperatorInfo* pTableScanDummy = createTableScanOperatorInfo(pTableScanNode, pDataReader, pHandle, pTaskInfo); - - STableScanInfo* pSTInfo = (STableScanInfo*)pTableScanDummy->info; int32_t numOfCols = 0; pInfo->pColMatchInfo = @@ -1081,16 +1078,6 @@ SOperatorInfo* createStreamScanOperatorInfo(void* pDataReader, SReadHandle* pHan } } - // set the extract column id to streamHandle - tqReadHandleSetColIdList((STqReadHandle*)pHandle->reader, pColIds); - SArray* tableIdList = extractTableIdList(&pTaskInfo->tableqinfoList); - int32_t code = tqReadHandleSetTbUidList(pHandle->reader, tableIdList); - if (code != 0) { - taosArrayDestroy(tableIdList); - goto _error; - } - taosArrayDestroy(tableIdList); - pInfo->pBlockLists = taosArrayInit(4, POINTER_BYTES); if (pInfo->pBlockLists == NULL) { terrno = TSDB_CODE_OUT_OF_MEMORY; @@ -1102,30 +1089,44 @@ SOperatorInfo* createStreamScanOperatorInfo(void* pDataReader, SReadHandle* pHan goto _error; } - if (pSTInfo->interval.interval > 0 && pDataReader) { - pInfo->pUpdateInfo = updateInfoInitP(&pSTInfo->interval, pTwSup->waterMark); - } else { - pInfo->pUpdateInfo = NULL; + if (pDataReader) { + SOperatorInfo* pTableScanDummy = createTableScanOperatorInfo(pTableScanNode, pDataReader, pHandle, pTaskInfo); + STableScanInfo* pSTInfo = (STableScanInfo*)pTableScanDummy->info; + if (pSTInfo->interval.interval > 0) { + pInfo->pUpdateInfo = updateInfoInitP(&pSTInfo->interval, pTwSup->waterMark); + } else { + pInfo->pUpdateInfo = NULL; + } + pInfo->pOperatorDumy = pTableScanDummy; + pInfo->interval = pSTInfo->interval; + + // set the extract column id to streamHandle + tqReadHandleSetColIdList((STqReadHandle*)pHandle->reader, pColIds); + SArray* tableIdList = extractTableIdList(&pTaskInfo->tableqinfoList); + int32_t code = tqReadHandleSetTbUidList(pHandle->reader, tableIdList); + if (code != 0) { + taosArrayDestroy(tableIdList); + goto _error; + } + taosArrayDestroy(tableIdList); + pInfo->readHandle = *pHandle; + pInfo->streamBlockReader = pHandle->reader; } // create the pseduo columns info if (pTableScanNode->scan.pScanPseudoCols != NULL) { pInfo->pPseudoExpr = createExprInfo(pTableScanNode->scan.pScanPseudoCols, NULL, &pInfo->numOfPseudoExpr); + pInfo->tableUid = pScanPhyNode->uid; } - pInfo->readHandle = *pHandle; - pInfo->tableUid = pScanPhyNode->uid; - pInfo->streamBlockReader = pHandle->reader; pInfo->pRes = createResDataBlock(pDescNode); pInfo->pUpdateRes = createResDataBlock(pDescNode); pInfo->pCondition = pScanPhyNode->node.pConditions; pInfo->pDataReader = pDataReader; pInfo->scanMode = STREAM_SCAN_FROM_READERHANDLE; - pInfo->pOperatorDumy = pTableScanDummy; - pInfo->interval = pSTInfo->interval; pInfo->sessionSup = (SessionWindowSupporter){.pStreamAggSup = NULL, .gap = -1}; pInfo->groupId = 0; - + pOperator->name = "StreamBlockScanOperator"; pOperator->operatorType = QUERY_NODE_PHYSICAL_PLAN_STREAM_SCAN; pOperator->blocking = false; @@ -1947,7 +1948,7 @@ _error: static int32_t loadDataBlockFromOneTable(SOperatorInfo* pOperator, STableMergeScanInfo* pTableScanInfo, int32_t readerIdx, SSDataBlock* pBlock, uint32_t* status) { - SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo; + SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo; STableMergeScanInfo* pInfo = pOperator->info; SFileBlockLoadRecorder* pCost = &pTableScanInfo->readRecorder; @@ -2200,8 +2201,7 @@ SSDataBlock* doTableMergeScan(SOperatorInfo* pOperator) { longjmp(pTaskInfo->env, code); } - SSDataBlock* pBlock = - getSortedTableMergeScanBlockData(pInfo->pSortHandle, pOperator->resultInfo.capacity, pOperator); + SSDataBlock* pBlock = getSortedTableMergeScanBlockData(pInfo->pSortHandle, pOperator->resultInfo.capacity, pOperator); if (pBlock != NULL) { pOperator->resultInfo.totalRows += pBlock->info.rows; @@ -2234,20 +2234,20 @@ void destroyTableMergeScanOperatorInfo(void* param, int32_t numOfOutput) { typedef struct STableMergeScanExecInfo { SFileBlockLoadRecorder blockRecorder; - SSortExecInfo sortExecInfo; + SSortExecInfo sortExecInfo; } STableMergeScanExecInfo; int32_t getTableMergeScanExplainExecInfo(SOperatorInfo* pOptr, void** pOptrExplain, uint32_t* len) { ASSERT(pOptr != NULL); // TODO: merge these two info into one struct STableMergeScanExecInfo* execInfo = taosMemoryCalloc(1, sizeof(STableMergeScanExecInfo)); - STableMergeScanInfo* pInfo = pOptr->info; + STableMergeScanInfo* pInfo = pOptr->info; execInfo->blockRecorder = pInfo->readRecorder; execInfo->sortExecInfo = tsortGetSortExecInfo(pInfo->pSortHandle); *pOptrExplain = execInfo; *len = sizeof(STableMergeScanExecInfo); - + return TSDB_CODE_SUCCESS; } @@ -2277,16 +2277,16 @@ SOperatorInfo* createTableMergeScanOperatorInfo(STableScanPhysiNode* pTableScanN pInfo->scanInfo = (SScanInfo){.numOfAsc = pTableScanNode->scanSeq[0], .numOfDesc = pTableScanNode->scanSeq[1]}; - pInfo->readHandle = *readHandle; - pInfo->interval = extractIntervalInfo(pTableScanNode); + pInfo->readHandle = *readHandle; + pInfo->interval = extractIntervalInfo(pTableScanNode); pInfo->sample.sampleRatio = pTableScanNode->ratio; - pInfo->sample.seed = taosGetTimestampSec(); - pInfo->dataBlockLoadFlag = pTableScanNode->dataRequired; - pInfo->pFilterNode = pTableScanNode->scan.node.pConditions; - pInfo->dataReaders = dataReaders; - pInfo->scanFlag = MAIN_SCAN; - pInfo->pColMatchInfo = pColList; - pInfo->curTWinIdx = 0; + pInfo->sample.seed = taosGetTimestampSec(); + pInfo->dataBlockLoadFlag = pTableScanNode->dataRequired; + pInfo->pFilterNode = pTableScanNode->scan.node.pConditions; + pInfo->dataReaders = dataReaders; + pInfo->scanFlag = MAIN_SCAN; + pInfo->pColMatchInfo = pColList; + pInfo->curTWinIdx = 0; pInfo->pResBlock = createResDataBlock(pDescNode); @@ -2304,22 +2304,22 @@ SOperatorInfo* createTableMergeScanOperatorInfo(STableScanPhysiNode* pTableScanN pInfo->pSortInputBlock = createOneDataBlock(pInfo->pResBlock, false); int32_t rowSize = pInfo->pResBlock->info.rowSize; - pInfo->bufPageSize = getProperSortPageSize(rowSize); + pInfo->bufPageSize = getProperSortPageSize(rowSize); // todo the total available buffer should be determined by total capacity of buffer of this task. // the additional one is reserved for merge result - pInfo->sortBufSize = pInfo->bufPageSize * (taosArrayGetSize(dataReaders) + 1); - pInfo->hasGroupId = false; + pInfo->sortBufSize = pInfo->bufPageSize * (taosArrayGetSize(dataReaders) + 1); + pInfo->hasGroupId = false; pInfo->prefetchedTuple = NULL; - pOperator->name = "TableMergeScanOperator"; + pOperator->name = "TableMergeScanOperator"; // TODO : change it pOperator->operatorType = QUERY_NODE_PHYSICAL_PLAN_TABLE_MERGE_SCAN; - pOperator->blocking = false; - pOperator->status = OP_NOT_OPENED; - pOperator->info = pInfo; - pOperator->numOfExprs = numOfCols; - pOperator->pTaskInfo = pTaskInfo; + pOperator->blocking = false; + pOperator->status = OP_NOT_OPENED; + pOperator->info = pInfo; + pOperator->numOfExprs = numOfCols; + pOperator->pTaskInfo = pTaskInfo; initResultSizeInfo(pOperator, 1024); pOperator->fpSet = diff --git a/source/libs/stream/src/streamTask.c b/source/libs/stream/src/streamTask.c index a35e7679a1..b5a63d937a 100644 --- a/source/libs/stream/src/streamTask.c +++ b/source/libs/stream/src/streamTask.c @@ -36,11 +36,11 @@ int32_t tEncodeSStreamTask(SEncoder* pEncoder, const SStreamTask* pTask) { if (tEncodeI32(pEncoder, pTask->taskId) < 0) return -1; if (tEncodeI8(pEncoder, pTask->inputType) < 0) return -1; if (tEncodeI8(pEncoder, pTask->status) < 0) return -1; - if (tEncodeI8(pEncoder, pTask->sourceType) < 0) return -1; if (tEncodeI8(pEncoder, pTask->execType) < 0) return -1; if (tEncodeI8(pEncoder, pTask->sinkType) < 0) return -1; if (tEncodeI8(pEncoder, pTask->dispatchType) < 0) return -1; if (tEncodeI16(pEncoder, pTask->dispatchMsgType) < 0) return -1; + if (tEncodeI8(pEncoder, pTask->dataScan) < 0) return -1; if (tEncodeI32(pEncoder, pTask->childId) < 0) return -1; if (tEncodeI32(pEncoder, pTask->nodeId) < 0) return -1; @@ -84,11 +84,11 @@ int32_t tDecodeSStreamTask(SDecoder* pDecoder, SStreamTask* pTask) { if (tDecodeI32(pDecoder, &pTask->taskId) < 0) return -1; if (tDecodeI8(pDecoder, &pTask->inputType) < 0) return -1; if (tDecodeI8(pDecoder, &pTask->status) < 0) return -1; - if (tDecodeI8(pDecoder, &pTask->sourceType) < 0) return -1; if (tDecodeI8(pDecoder, &pTask->execType) < 0) return -1; if (tDecodeI8(pDecoder, &pTask->sinkType) < 0) return -1; if (tDecodeI8(pDecoder, &pTask->dispatchType) < 0) return -1; if (tDecodeI16(pDecoder, &pTask->dispatchMsgType) < 0) return -1; + if (tDecodeI8(pDecoder, &pTask->dataScan) < 0) return -1; if (tDecodeI32(pDecoder, &pTask->childId) < 0) return -1; if (tDecodeI32(pDecoder, &pTask->nodeId) < 0) return -1; From 85766d1a6b8aed1ce1d67bbefd4670446b21e6d6 Mon Sep 17 00:00:00 2001 From: Shengliang Guan Date: Mon, 20 Jun 2022 14:39:56 +0800 Subject: [PATCH 20/40] test: adjust log --- .../system-test/6-cluster/5dnode3mnodeDrop.py | 46 +++++++++---------- tests/system-test/fulltest.sh | 2 +- 2 files changed, 24 insertions(+), 24 deletions(-) diff --git a/tests/system-test/6-cluster/5dnode3mnodeDrop.py b/tests/system-test/6-cluster/5dnode3mnodeDrop.py index b98134f5e0..e81d5295f2 100644 --- a/tests/system-test/6-cluster/5dnode3mnodeDrop.py +++ b/tests/system-test/6-cluster/5dnode3mnodeDrop.py @@ -75,7 +75,7 @@ class TDTestCase: testCluster = False valgrind = 0 hostname = socket.gethostname() - print(hostname) + tdLog.debug(hostname) dnodes = [] start_port = 6030 start_port_sec = 6130 @@ -102,12 +102,12 @@ class TDTestCase: # create cluster for dnode in self.TDDnodes.dnodes[1:]: - # print(dnode.cfgDict) + # tdLog.debug(dnode.cfgDict) dnode_id = dnode.cfgDict["fqdn"] + ":" +dnode.cfgDict["serverPort"] dnode_first_host = dnode.cfgDict["firstEp"].split(":")[0] dnode_first_port = dnode.cfgDict["firstEp"].split(":")[-1] cmd = f" taos -h {dnode_first_host} -P {dnode_first_port} -s ' create dnode \"{dnode_id} \" ' ;" - print(cmd) + tdLog.debug(cmd) os.system(cmd) time.sleep(2) @@ -119,26 +119,26 @@ class TDTestCase: time.sleep(1) tdSql.query("show mnodes;") if tdSql.checkRows(3) : - print("mnode is three nodes") + tdLog.debug("mnode is three nodes") if tdSql.queryResult[0][2]=='leader' : if tdSql.queryResult[1][2]=='follower': if tdSql.queryResult[2][2]=='follower': - print("three mnodes is ready in 10s") + tdLog.debug("three mnodes is ready in 10s") break elif tdSql.queryResult[0][2]=='follower' : if tdSql.queryResult[1][2]=='leader': if tdSql.queryResult[2][2]=='follower': - print("three mnodes is ready in 10s") + tdLog.debug("three mnodes is ready in 10s") break elif tdSql.queryResult[0][2]=='follower' : if tdSql.queryResult[1][2]=='follower': if tdSql.queryResult[2][2]=='leader': - print("three mnodes is ready in 10s") + tdLog.debug("three mnodes is ready in 10s") break count+=1 else: - print(tdSql.queryResult) - print("three mnodes is not ready in 10s ") + tdLog.debug(tdSql.queryResult) + tdLog.debug("three mnodes is not ready in 10s ") return -1 tdSql.query("show mnodes;") @@ -156,19 +156,19 @@ class TDTestCase: time.sleep(1) tdSql.query("show mnodes;") if tdSql.checkRows(3) : - print("mnode is three nodes") + tdLog.debug("mnode is three nodes") if tdSql.queryResult[0][2]=='offline' : if tdSql.queryResult[1][2]=='leader': if tdSql.queryResult[2][2]=='follower': - print("stop mnodes on dnode 2 successfully in 10s") + tdLog.debug("stop mnodes on dnode 2 successfully in 10s") break elif tdSql.queryResult[1][2]=='follower': if tdSql.queryResult[2][2]=='leader': - print("stop mnodes on dnode 2 successfully in 10s") + tdLog.debug("stop mnodes on dnode 2 successfully in 10s") break count+=1 else: - print("stop mnodes on dnode 2 failed in 10s ") + tdLog.debug("stop mnodes on dnode 2 failed in 10s ") return -1 tdSql.error("drop mnode on dnode 1;") @@ -188,15 +188,15 @@ class TDTestCase: time.sleep(1) tdSql.query("show mnodes;") if tdSql.checkRows(3) : - print("mnode is three nodes") + tdLog.debug("mnode is three nodes") if tdSql.queryResult[0][2]=='leader' : if tdSql.queryResult[1][2]=='offline': if tdSql.queryResult[2][2]=='follower': - print("stop mnodes on dnode 2 successfully in 10s") + tdLog.debug("stop mnodes on dnode 2 successfully in 10s") break count+=1 else: - print("stop mnodes on dnode 2 failed in 10s ") + tdLog.debug("stop mnodes on dnode 2 failed in 10s ") return -1 tdSql.error("drop mnode on dnode 2;") @@ -218,15 +218,15 @@ class TDTestCase: time.sleep(1) tdSql.query("show mnodes;") if tdSql.checkRows(3) : - print("mnode is three nodes") + tdLog.debug("mnode is three nodes") if tdSql.queryResult[0][2]=='leader' : if tdSql.queryResult[2][2]=='offline': if tdSql.queryResult[1][2]=='follower': - print("stop mnodes on dnode 3 successfully in 10s") + tdLog.debug("stop mnodes on dnode 3 successfully in 10s") break count+=1 else: - print("stop mnodes on dnode 3 failed in 10s") + tdLog.debug("stop mnodes on dnode 3 failed in 10s") return -1 tdSql.error("drop mnode on dnode 3;") tdSql.query("show mnodes;") @@ -268,7 +268,7 @@ class TDTestCase: tdSql.error("drop mnode on dnode 1") tdSql.query("show dnodes;") - print(tdSql.queryResult) + tdLog.debug(tdSql.queryResult) # drop follower of mnode dropcount =0 @@ -282,7 +282,7 @@ class TDTestCase: time.sleep(1) tdSql.query("show mnodes;") if tdSql.checkRows(2): - print("drop mnode %d successfully"%(i+1)) + tdLog.debug("drop mnode %d successfully"%(i+1)) break count+=1 tdLog.debug("create mnode on dnode %d"%(i+1)) @@ -292,7 +292,7 @@ class TDTestCase: time.sleep(1) tdSql.query("show mnodes;") if tdSql.checkRows(3): - print("drop mnode %d successfully"%(i+1)) + tdLog.debug("drop mnode %d successfully"%(i+1)) break count+=1 dropcount+=1 @@ -307,7 +307,7 @@ class TDTestCase: def run(self): - # print(self.master_dnode.cfgDict) + # tdLog.debug(self.master_dnode.cfgDict) self.buildcluster(5) self.five_dnode_three_mnode() diff --git a/tests/system-test/fulltest.sh b/tests/system-test/fulltest.sh index a7e04ad37c..41004cc5a2 100755 --- a/tests/system-test/fulltest.sh +++ b/tests/system-test/fulltest.sh @@ -108,7 +108,7 @@ python3 ./test.py -f 2-query/distribute_agg_apercentile.py python3 ./test.py -f 6-cluster/5dnode1mnode.py python3 ./test.py -f 6-cluster/5dnode2mnode.py -python3 ./test.py -f 6-cluster/5dnode3mnodeStop.py +#python3 ./test.py -f 6-cluster/5dnode3mnodeStop.py #python3 ./test.py -f 6-cluster/5dnode3mnodeDrop.py # BUG python3 ./test.py -f 6-cluster/5dnode3mnodeStopInsert.py From fcfd5c250d3beccfa6237b0eb1f68a4cb5ff4214 Mon Sep 17 00:00:00 2001 From: Cary Xu Date: Mon, 20 Jun 2022 15:21:30 +0800 Subject: [PATCH 21/40] feat: fetch rsma result by timer supported --- source/dnode/mnode/impl/src/mndScheduler.c | 2 +- source/dnode/vnode/src/inc/sma.h | 11 +- source/dnode/vnode/src/sma/sma.c | 5 + source/dnode/vnode/src/sma/smaEnv.c | 32 ++- source/dnode/vnode/src/sma/smaRollup.c | 214 +++++++++++++++------ source/dnode/vnode/src/vnd/vnodeSvr.c | 5 +- source/libs/executor/src/executor.c | 6 + 7 files changed, 209 insertions(+), 66 deletions(-) diff --git a/source/dnode/mnode/impl/src/mndScheduler.c b/source/dnode/mnode/impl/src/mndScheduler.c index 39bb6798aa..c53ed6fd0b 100644 --- a/source/dnode/mnode/impl/src/mndScheduler.c +++ b/source/dnode/mnode/impl/src/mndScheduler.c @@ -63,7 +63,7 @@ int32_t mndConvertRsmaTask(char** pDst, int32_t* pDstLen, const char* ast, int64 .topicQuery = false, .streamQuery = true, .rSmaQuery = true, - .triggerType = STREAM_TRIGGER_AT_ONCE, + .triggerType = STREAM_TRIGGER_WINDOW_CLOSE, .watermark = watermark, /*.filesFactor = filesFactor,*/ }; diff --git a/source/dnode/vnode/src/inc/sma.h b/source/dnode/vnode/src/inc/sma.h index 1e77022d04..902e5e1bcc 100644 --- a/source/dnode/vnode/src/inc/sma.h +++ b/source/dnode/vnode/src/inc/sma.h @@ -32,11 +32,12 @@ extern "C" { #define smaTrace(...) do { if (smaDebugFlag & DEBUG_TRACE) { taosPrintLog("SMA ", DEBUG_TRACE, tsdbDebugFlag, __VA_ARGS__); }} while(0) // clang-format on -typedef struct SSmaEnv SSmaEnv; -typedef struct SSmaStat SSmaStat; -typedef struct SSmaStatItem SSmaStatItem; -typedef struct SSmaKey SSmaKey; -typedef struct SRSmaInfo SRSmaInfo; +typedef struct SSmaEnv SSmaEnv; +typedef struct SSmaStat SSmaStat; +typedef struct SSmaStatItem SSmaStatItem; +typedef struct SSmaKey SSmaKey; +typedef struct SRSmaInfo SRSmaInfo; +typedef struct SRSmaInfoItem SRSmaInfoItem; struct SSmaEnv { TdThreadRwlock lock; diff --git a/source/dnode/vnode/src/sma/sma.c b/source/dnode/vnode/src/sma/sma.c index b5c55a2f83..12f93f9400 100644 --- a/source/dnode/vnode/src/sma/sma.c +++ b/source/dnode/vnode/src/sma/sma.c @@ -15,6 +15,8 @@ #include "sma.h" +// functions for external invocation + // TODO: Who is responsible for resource allocate and release? int32_t tdProcessTSmaInsert(SSma* pSma, int64_t indexUid, const char* msg) { int32_t code = TSDB_CODE_SUCCESS; @@ -45,6 +47,9 @@ int32_t smaGetTSmaDays(SVnodeCfg* pCfg, void* pCont, uint32_t contLen, int32_t* return code; } + +// functions for internal invocation + #if 0 /** diff --git a/source/dnode/vnode/src/sma/smaEnv.c b/source/dnode/vnode/src/sma/smaEnv.c index f71c222772..a80af2b202 100644 --- a/source/dnode/vnode/src/sma/smaEnv.c +++ b/source/dnode/vnode/src/sma/smaEnv.c @@ -208,7 +208,6 @@ int32_t tdUnLockSma(SSma *pSma) { int32_t tdCheckAndInitSmaEnv(SSma *pSma, int8_t smaType) { SSmaEnv *pEnv = NULL; - // return if already init switch (smaType) { case TSDB_SMA_TYPE_TIME_RANGE: if ((pEnv = (SSmaEnv *)atomic_load_ptr(&SMA_TSMA_ENV(pSma)))) { @@ -244,3 +243,34 @@ int32_t tdCheckAndInitSmaEnv(SSma *pSma, int8_t smaType) { return TSDB_CODE_SUCCESS; }; + +int32_t smaTimerInit(void **timer, int8_t *initFlag, const char *label) { + int8_t old; + while (1) { + old = atomic_val_compare_exchange_8(initFlag, 0, 2); + if (old != 2) break; + } + + if (old == 0) { + *timer = taosTmrInit(10000, 100, 10000, label); + if (!(*timer)) { + atomic_store_8(initFlag, 0); + return -1; + } + atomic_store_8(initFlag, 1); + } + return 0; +} + +void smaTimerCleanUp(void *timer, int8_t *initFlag) { + int8_t old; + while (1) { + old = atomic_val_compare_exchange_8(initFlag, 1, 2); + if (old != 2) break; + } + + if (old == 1) { + taosTmrCleanUp(timer); + atomic_store_8(initFlag, 0); + } +} diff --git a/source/dnode/vnode/src/sma/smaRollup.c b/source/dnode/vnode/src/sma/smaRollup.c index 0c372dfa70..458170b9aa 100644 --- a/source/dnode/vnode/src/sma/smaRollup.c +++ b/source/dnode/vnode/src/sma/smaRollup.c @@ -14,14 +14,36 @@ */ #include "sma.h" +#include "tstream.h" static FORCE_INLINE int32_t tdUidStorePut(STbUidStore *pStore, tb_uid_t suid, tb_uid_t *uid); static FORCE_INLINE int32_t tdUpdateTbUidListImpl(SSma *pSma, tb_uid_t *suid, SArray *tbUids); -static FORCE_INLINE int32_t tdExecuteRSmaImpl(SSma *pSma, const void *pMsg, int32_t inputType, qTaskInfo_t *taskInfo, - STSchema *pTSchema, tb_uid_t suid, int8_t level); +static FORCE_INLINE int32_t tdExecuteRSmaImpl(SSma *pSma, const void *pMsg, int32_t inputType, SRSmaInfoItem *rsmaItem, + tb_uid_t suid, int8_t level); + +struct SRSmaInfoItem { + SRSmaInfo *pRsmaInfo; + void *taskInfo; // qTaskInfo_t + void *tmrHandle; + tmr_h tmrId; + int8_t level; + int8_t tmrInitFlag; + int8_t triggerStatus; // TASK_TRIGGER_STATUS__IN_ACTIVE/TASK_TRIGGER_STATUS__ACTIVE + int32_t maxDelay; +}; + +typedef struct { + int64_t suid; + SRSmaInfoItem *pItem; + SSma *pSma; + STSchema *pTSchema; +} SRSmaTriggerParam; struct SRSmaInfo { - void *taskInfo[TSDB_RETENTION_L2]; // qTaskInfo_t + STSchema *pTSchema; + SSma *pSma; + int64_t suid; + SRSmaInfoItem items[TSDB_RETENTION_L2]; }; static FORCE_INLINE void tdFreeTaskHandle(qTaskInfo_t *taskHandle) { @@ -33,11 +55,20 @@ static FORCE_INLINE void tdFreeTaskHandle(qTaskInfo_t *taskHandle) { } void *tdFreeRSmaInfo(SRSmaInfo *pInfo) { - for (int32_t i = 0; i < TSDB_RETENTION_MAX; ++i) { - if (pInfo->taskInfo[i]) { - tdFreeTaskHandle(pInfo->taskInfo[i]); + if (pInfo) { + for (int32_t i = 0; i < TSDB_RETENTION_MAX; ++i) { + SRSmaInfoItem *pItem = &pInfo->items[i]; + if (pItem->taskInfo) { + tdFreeTaskHandle(pItem->taskInfo); + } + if (pItem->tmrHandle) { + taosTmrCleanUp(pItem->tmrHandle); + } } + taosMemoryFree(pInfo->pTSchema); + taosMemoryFree(pInfo); } + return NULL; } @@ -69,20 +100,20 @@ static FORCE_INLINE int32_t tdUpdateTbUidListImpl(SSma *pSma, tb_uid_t *suid, SA return TSDB_CODE_FAILED; } - if (pRSmaInfo->taskInfo[0] && (qUpdateQualifiedTableId(pRSmaInfo->taskInfo[0], tbUids, true) != 0)) { + if (pRSmaInfo->items[0].taskInfo && (qUpdateQualifiedTableId(pRSmaInfo->items[0].taskInfo, tbUids, true) < 0)) { smaError("vgId:%d, update tbUidList failed for uid:%" PRIi64 " since %s", SMA_VID(pSma), *suid, terrstr(terrno)); return TSDB_CODE_FAILED; } else { smaDebug("vgId:%d, update tbUidList succeed for qTaskInfo:%p with suid:%" PRIi64 ", uid:%" PRIi64, SMA_VID(pSma), - pRSmaInfo->taskInfo[0], *suid, *(int64_t *)taosArrayGet(tbUids, 0)); + pRSmaInfo->items[0].taskInfo, *suid, *(int64_t *)taosArrayGet(tbUids, 0)); } - if (pRSmaInfo->taskInfo[1] && (qUpdateQualifiedTableId(pRSmaInfo->taskInfo[1], tbUids, true) != 0)) { + if (pRSmaInfo->items[1].taskInfo && (qUpdateQualifiedTableId(pRSmaInfo->items[1].taskInfo, tbUids, true) < 0)) { smaError("vgId:%d, update tbUidList failed for uid:%" PRIi64 " since %s", SMA_VID(pSma), *suid, terrstr(terrno)); return TSDB_CODE_FAILED; } else { smaDebug("vgId:%d, update tbUidList succeed for qTaskInfo:%p with suid:%" PRIi64 ", uid:%" PRIi64, SMA_VID(pSma), - pRSmaInfo->taskInfo[1], *suid, *(int64_t *)taosArrayGet(tbUids, 0)); + pRSmaInfo->items[1].taskInfo, *suid, *(int64_t *)taosArrayGet(tbUids, 0)); } return TSDB_CODE_SUCCESS; @@ -144,12 +175,12 @@ int32_t tdFetchTbUidList(SSma *pSma, STbUidStore **ppStore, tb_uid_t suid, tb_ui ASSERT(ppStore != NULL); if (!(*ppStore)) { - if (tdUidStoreInit(ppStore) != 0) { + if (tdUidStoreInit(ppStore) < 0) { return TSDB_CODE_FAILED; } } - if (tdUidStorePut(*ppStore, suid, &uid) != 0) { + if (tdUidStorePut(*ppStore, suid, &uid) < 0) { *ppStore = tdUidStoreFree(*ppStore); return TSDB_CODE_FAILED; } @@ -172,8 +203,8 @@ int32_t tdProcessRSmaCreate(SVnode *pVnode, SVCreateStbReq *pReq) { return TSDB_CODE_SUCCESS; } - SMeta *pMeta = pVnode->pMeta; - SMsgCb *pMsgCb = &pVnode->msgCb; + SMeta *pMeta = pVnode->pMeta; + SMsgCb *pMsgCb = &pVnode->msgCb; SRSmaParam *param = &pReq->pRSmaParam; if ((param->qmsg1Len == 0) && (param->qmsg2Len == 0)) { @@ -192,10 +223,12 @@ int32_t tdProcessRSmaCreate(SVnode *pVnode, SVCreateStbReq *pReq) { pRSmaInfo = taosHashGet(SMA_STAT_INFO_HASH(pStat), &pReq->suid, sizeof(tb_uid_t)); if (pRSmaInfo) { + ASSERT(0); // TODO: free original pRSmaInfo is exists abnormally smaWarn("vgId:%d, rsma info already exists for stb: %s, %" PRIi64, SMA_VID(pSma), pReq->name, pReq->suid); return TSDB_CODE_SUCCESS; } + // from write queue: single thead pRSmaInfo = (SRSmaInfo *)taosMemoryCalloc(1, sizeof(SRSmaInfo)); if (!pRSmaInfo) { terrno = TSDB_CODE_OUT_OF_MEMORY; @@ -204,9 +237,8 @@ int32_t tdProcessRSmaCreate(SVnode *pVnode, SVCreateStbReq *pReq) { STqReadHandle *pReadHandle = tqInitSubmitMsgScanner(pMeta); if (!pReadHandle) { - taosMemoryFree(pRSmaInfo); terrno = TSDB_CODE_OUT_OF_MEMORY; - return TSDB_CODE_FAILED; + goto _err; } SReadHandle handle = { @@ -216,32 +248,58 @@ int32_t tdProcessRSmaCreate(SVnode *pVnode, SVCreateStbReq *pReq) { .vnode = pVnode, }; + STSchema *pTSchema = metaGetTbTSchema(SMA_META(pSma), pReq->suid, -1); + if (!pTSchema) { + terrno = TSDB_CODE_TDB_IVD_TB_SCHEMA_VERSION; + goto _err; + } + pRSmaInfo->pTSchema = pTSchema; + pRSmaInfo->pSma = pSma; + pRSmaInfo->suid = pReq->suid; + if (param->qmsg1) { - pRSmaInfo->taskInfo[0] = qCreateStreamExecTaskInfo(param->qmsg1, &handle); - if (!pRSmaInfo->taskInfo[0]) { - taosMemoryFree(pRSmaInfo); - taosMemoryFree(pReadHandle); - return TSDB_CODE_FAILED; + pRSmaInfo->items[0].pRsmaInfo = pRSmaInfo; + pRSmaInfo->items[0].taskInfo = qCreateStreamExecTaskInfo(param->qmsg1, &handle); + if (!pRSmaInfo->items[0].taskInfo) { + goto _err; + } + pRSmaInfo->items[0].triggerStatus = TASK_TRIGGER_STATUS__IN_ACTIVE; + pRSmaInfo->items[0].maxDelay = 5000; + pRSmaInfo->items[0].level = TSDB_RETENTION_L1; + pRSmaInfo->items[0].tmrHandle = taosTmrInit(10000, 100, 10000, "RSMA_L1"); + + if (!pRSmaInfo->items[0].tmrHandle) { + goto _err; } } if (param->qmsg2) { - pRSmaInfo->taskInfo[1] = qCreateStreamExecTaskInfo(param->qmsg2, &handle); - if (!pRSmaInfo->taskInfo[1]) { - taosMemoryFree(pRSmaInfo); - taosMemoryFree(pReadHandle); - return TSDB_CODE_FAILED; + pRSmaInfo->items[1].pRsmaInfo = pRSmaInfo; + pRSmaInfo->items[1].taskInfo = qCreateStreamExecTaskInfo(param->qmsg2, &handle); + if (!pRSmaInfo->items[1].taskInfo) { + goto _err; + } + pRSmaInfo->items[1].triggerStatus = TASK_TRIGGER_STATUS__IN_ACTIVE; + pRSmaInfo->items[1].maxDelay = 5000; + pRSmaInfo->items[0].level = TSDB_RETENTION_L2; + pRSmaInfo->items[1].tmrHandle = taosTmrInit(10000, 100, 10000, "RSMA_L2"); + if (!pRSmaInfo->items[1].tmrHandle) { + goto _err; } } if (taosHashPut(SMA_STAT_INFO_HASH(pStat), &pReq->suid, sizeof(tb_uid_t), &pRSmaInfo, sizeof(pRSmaInfo)) != TSDB_CODE_SUCCESS) { - return TSDB_CODE_FAILED; + goto _err; } else { smaDebug("vgId:%d, register rsma info succeed for suid:%" PRIi64, SMA_VID(pSma), pReq->suid); } return TSDB_CODE_SUCCESS; +_err: + tdFreeRSmaInfo(pRSmaInfo); + taosMemoryFree(pReadHandle); + return TSDB_CODE_FAILED; } /** @@ -291,12 +349,12 @@ static int32_t tdUidStorePut(STbUidStore *pStore, tb_uid_t suid, tb_uid_t *uid) terrno = TSDB_CODE_OUT_OF_MEMORY; return TSDB_CODE_FAILED; } - if (taosHashPut(pStore->uidHash, &suid, sizeof(suid), &pUidArray, sizeof(pUidArray)) != 0) { + if (taosHashPut(pStore->uidHash, &suid, sizeof(suid), &pUidArray, sizeof(pUidArray)) < 0) { return TSDB_CODE_FAILED; } } } else { - if (taosHashPut(pStore->uidHash, &suid, sizeof(suid), NULL, 0) != 0) { + if (taosHashPut(pStore->uidHash, &suid, sizeof(suid), NULL, 0) < 0) { return TSDB_CODE_FAILED; } } @@ -367,22 +425,15 @@ static int32_t tdFetchSubmitReqSuids(SSubmitReq *pMsg, STbUidStore *pStore) { return 0; } -static FORCE_INLINE int32_t tdExecuteRSmaImpl(SSma *pSma, const void *pMsg, int32_t inputType, qTaskInfo_t *taskInfo, - STSchema *pTSchema, tb_uid_t suid, int8_t level) { - SArray *pResult = NULL; +static int32_t tdFetchAndSubmitRSmaResult(SRSmaInfoItem *pItem, int8_t blkType) { + SArray *pResult = NULL; + SRSmaInfo *pRSmaInfo = pItem->pRsmaInfo; + SSma *pSma = pRSmaInfo->pSma; - if (!taskInfo) { - smaDebug("vgId:%d, no qTaskInfo to execute rsma %" PRIi8 " task for suid:%" PRIu64, SMA_VID(pSma), level, suid); - return TSDB_CODE_SUCCESS; - } - - smaDebug("vgId:%d, execute rsma %" PRIi8 " task for qTaskInfo:%p suid:%" PRIu64, SMA_VID(pSma), level, taskInfo, suid); - - qSetStreamInput(taskInfo, pMsg, inputType, true); while (1) { SSDataBlock *output = NULL; uint64_t ts; - if (qExecTask(taskInfo, &output, &ts) < 0) { + if (qExecTask(pItem->taskInfo, &output, &ts) < 0) { ASSERT(false); } if (!output) { @@ -402,16 +453,16 @@ static FORCE_INLINE int32_t tdExecuteRSmaImpl(SSma *pSma, const void *pMsg, int3 if (taosArrayGetSize(pResult) > 0) { #if 0 char flag[10] = {0}; - snprintf(flag, 10, "level %" PRIi8, level); + snprintf(flag, 10, "level %" PRIi8, pItem->level); blockDebugShowData(pResult, flag); #endif - STsdb *sinkTsdb = (level == TSDB_RETENTION_L1 ? pSma->pRSmaTsdb1 : pSma->pRSmaTsdb2); + STsdb *sinkTsdb = (pItem->level == TSDB_RETENTION_L1 ? pSma->pRSmaTsdb1 : pSma->pRSmaTsdb2); SSubmitReq *pReq = NULL; - if (buildSubmitReqFromDataBlock(&pReq, pResult, pTSchema, SMA_VID(pSma), suid) < 0) { + if (buildSubmitReqFromDataBlock(&pReq, pResult, pRSmaInfo->pTSchema, SMA_VID(pSma), pRSmaInfo->suid) < 0) { taosArrayDestroy(pResult); return TSDB_CODE_FAILED; } - + if (pReq && tdProcessSubmitReq(sinkTsdb, INT64_MAX, pReq) < 0) { taosArrayDestroy(pResult); taosMemoryFreeClear(pReq); @@ -420,10 +471,63 @@ static FORCE_INLINE int32_t tdExecuteRSmaImpl(SSma *pSma, const void *pMsg, int3 taosMemoryFreeClear(pReq); } else { - smaDebug("vgId:%d, no rsma % " PRIi8 " data generated since %s", SMA_VID(pSma), level, tstrerror(terrno)); + smaDebug("vgId:%d, no rsma % " PRIi8 " data generated since %s", SMA_VID(pSma), pItem->level, tstrerror(terrno)); + } + + if (blkType == STREAM_DATA_TYPE_SUBMIT_BLOCK) { + atomic_store_8(&pItem->triggerStatus, TASK_TRIGGER_STATUS__ACTIVE); } taosArrayDestroy(pResult); + return 0; +} + +/** + * @brief trigger to get rsma result + * + * @param param + * @param tmrId + */ +static void rsmaTriggerByTimer(void *param, void *tmrId) { + // SRSmaTriggerParam *pParam = (SRSmaTriggerParam *)param; + // SRSmaInfoItem *pItem = pParam->pItem; + SRSmaInfoItem *pItem = param; + + if (atomic_load_8(&pItem->triggerStatus) == TASK_TRIGGER_STATUS__ACTIVE) { + printf("%s:%d THREAD:%" PRIi64 " status = active\n", __func__, __LINE__, taosGetSelfPthreadId()); + SSDataBlock dataBlock = {.info.type = STREAM_GET_ALL}; + + atomic_store_8(&pItem->triggerStatus, TASK_TRIGGER_STATUS__IN_ACTIVE); + qSetStreamInput(pItem->taskInfo, &dataBlock, STREAM_DATA_TYPE_SSDATA_BLOCK, false); + + tdFetchAndSubmitRSmaResult(pItem, STREAM_DATA_TYPE_SSDATA_BLOCK); + } else { + printf("%s:%d THREAD:%" PRIi64 " status = in active\n", __func__, __LINE__, taosGetSelfPthreadId()); + } + + // taosTmrReset(rsmaTriggerByTimer, pItem->maxDelay, pItem, pItem->tmrHandle, &pItem->tmrId); +} + +static FORCE_INLINE int32_t tdExecuteRSmaImpl(SSma *pSma, const void *pMsg, int32_t inputType, SRSmaInfoItem *pItem, + tb_uid_t suid, int8_t level) { + if (!pItem || !pItem->taskInfo) { + smaDebug("vgId:%d, no qTaskInfo to execute rsma %" PRIi8 " task for suid:%" PRIu64, SMA_VID(pSma), level, suid); + return TSDB_CODE_SUCCESS; + } + + smaDebug("vgId:%d, execute rsma %" PRIi8 " task for qTaskInfo:%p suid:%" PRIu64, SMA_VID(pSma), level, + pItem->taskInfo, suid); + + // inputType = STREAM_DATA_TYPE_SUBMIT_BLOCK(1) + if (qSetStreamInput(pItem->taskInfo, pMsg, inputType, true) < 0) { + smaError("vgId:%d, rsma % " PRIi8 " qSetStreamInput failed since %s", SMA_VID(pSma), level, tstrerror(terrno)); + return TSDB_CODE_FAILED; + } + + // SRSmaTriggerParam triggerParam = {.suid = suid, .pItem = pItem, .pSma = pSma, .pTSchema = pTSchema}; + tdFetchAndSubmitRSmaResult(pItem, STREAM_DATA_TYPE_SUBMIT_BLOCK); + atomic_store_8(&pItem->triggerStatus, TASK_TRIGGER_STATUS__ACTIVE); + taosTmrReset(rsmaTriggerByTimer, pItem->maxDelay, pItem, pItem->tmrHandle, &pItem->tmrId); return TSDB_CODE_SUCCESS; } @@ -441,24 +545,18 @@ static int32_t tdExecuteRSma(SSma *pSma, const void *pMsg, int32_t inputType, tb pRSmaInfo = taosHashGet(SMA_STAT_INFO_HASH(pStat), &suid, sizeof(tb_uid_t)); if (!pRSmaInfo || !(pRSmaInfo = *(SRSmaInfo **)pRSmaInfo)) { - smaDebug("vgId:%d, no rsma info for suid:%" PRIu64, SMA_VID(pSma), suid); + smaDebug("vgId:%d, return as no rsma info for suid:%" PRIu64, SMA_VID(pSma), suid); return TSDB_CODE_SUCCESS; } - if (!pRSmaInfo->taskInfo[0]) { - smaDebug("vgId:%d, no rsma qTaskInfo for suid:%" PRIu64, SMA_VID(pSma), suid); + + if (!pRSmaInfo->items[0].taskInfo) { + smaDebug("vgId:%d, return as no rsma qTaskInfo for suid:%" PRIu64, SMA_VID(pSma), suid); return TSDB_CODE_SUCCESS; } if (inputType == STREAM_DATA_TYPE_SUBMIT_BLOCK) { - // TODO: cache STSchema - STSchema *pTSchema = metaGetTbTSchema(SMA_META(pSma), suid, -1); - if (!pTSchema) { - terrno = TSDB_CODE_TDB_IVD_TB_SCHEMA_VERSION; - return TSDB_CODE_FAILED; - } - tdExecuteRSmaImpl(pSma, pMsg, inputType, pRSmaInfo->taskInfo[0], pTSchema, suid, TSDB_RETENTION_L1); - tdExecuteRSmaImpl(pSma, pMsg, inputType, pRSmaInfo->taskInfo[1], pTSchema, suid, TSDB_RETENTION_L2); - taosMemoryFree(pTSchema); + tdExecuteRSmaImpl(pSma, pMsg, inputType, &pRSmaInfo->items[0], suid, TSDB_RETENTION_L1); + tdExecuteRSmaImpl(pSma, pMsg, inputType, &pRSmaInfo->items[1], suid, TSDB_RETENTION_L2); } return TSDB_CODE_SUCCESS; diff --git a/source/dnode/vnode/src/vnd/vnodeSvr.c b/source/dnode/vnode/src/vnd/vnodeSvr.c index fb22b7c5bf..98fcee97c5 100644 --- a/source/dnode/vnode/src/vnd/vnodeSvr.c +++ b/source/dnode/vnode/src/vnd/vnodeSvr.c @@ -346,7 +346,10 @@ static int32_t vnodeProcessCreateStbReq(SVnode *pVnode, int64_t version, void *p goto _err; } - tdProcessRSmaCreate(pVnode, &req); + if (tdProcessRSmaCreate(pVnode, &req) < 0) { + pRsp->code = terrno; + goto _err; + } tDecoderClear(&coder); return 0; diff --git a/source/libs/executor/src/executor.c b/source/libs/executor/src/executor.c index b1d076e8f5..1526cf66f1 100644 --- a/source/libs/executor/src/executor.c +++ b/source/libs/executor/src/executor.c @@ -41,12 +41,18 @@ static int32_t doSetStreamBlock(SOperatorInfo* pOperator, void* input, size_t nu pInfo->assignBlockUid = assignUid; // the block type can not be changed in the streamscan operators +#if 0 if (pInfo->blockType == 0) { pInfo->blockType = type; } else if (pInfo->blockType != type) { ASSERT(0); return TSDB_CODE_QRY_APP_ERROR; } +#endif + // rollup sma, the same qTaskInfo is used to insert data by SubmitReq and fetch result by SSDataBlock + if (pInfo->blockType != type) { + pInfo->blockType = type; + } if (type == STREAM_DATA_TYPE_SUBMIT_BLOCK) { if (tqReadHandleSetMsg(pInfo->streamBlockReader, input, 0) < 0) { From 24af96f833241bdca2ae0c95574f2fde135eb9a5 Mon Sep 17 00:00:00 2001 From: Cary Xu Date: Mon, 20 Jun 2022 15:39:20 +0800 Subject: [PATCH 22/40] other: solve conflict --- source/dnode/mnode/impl/src/mndScheduler.c | 2 +- source/dnode/mnode/impl/src/mndStb.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/source/dnode/mnode/impl/src/mndScheduler.c b/source/dnode/mnode/impl/src/mndScheduler.c index c53ed6fd0b..37aa2d33d0 100644 --- a/source/dnode/mnode/impl/src/mndScheduler.c +++ b/source/dnode/mnode/impl/src/mndScheduler.c @@ -63,7 +63,7 @@ int32_t mndConvertRsmaTask(char** pDst, int32_t* pDstLen, const char* ast, int64 .topicQuery = false, .streamQuery = true, .rSmaQuery = true, - .triggerType = STREAM_TRIGGER_WINDOW_CLOSE, + .triggerType = triggerType, .watermark = watermark, /*.filesFactor = filesFactor,*/ }; diff --git a/source/dnode/mnode/impl/src/mndStb.c b/source/dnode/mnode/impl/src/mndStb.c index 96dc79adbc..6c8021e3b3 100644 --- a/source/dnode/mnode/impl/src/mndStb.c +++ b/source/dnode/mnode/impl/src/mndStb.c @@ -403,13 +403,13 @@ static void *mndBuildVCreateStbReq(SMnode *pMnode, SVgObj *pVgroup, SStbObj *pSt req.pRSmaParam.delay = pStb->delay; if (pStb->ast1Len > 0) { if (mndConvertRsmaTask(&req.pRSmaParam.qmsg1, &req.pRSmaParam.qmsg1Len, pStb->pAst1, pStb->uid, - STREAM_TRIGGER_AT_ONCE, 0, req.pRSmaParam.xFilesFactor) != TSDB_CODE_SUCCESS) { + STREAM_TRIGGER_WINDOW_CLOSE, 0, req.pRSmaParam.xFilesFactor) != TSDB_CODE_SUCCESS) { return NULL; } } if (pStb->ast2Len > 0) { if (mndConvertRsmaTask(&req.pRSmaParam.qmsg2, &req.pRSmaParam.qmsg2Len, pStb->pAst2, pStb->uid, - STREAM_TRIGGER_AT_ONCE, 0, req.pRSmaParam.xFilesFactor) != TSDB_CODE_SUCCESS) { + STREAM_TRIGGER_WINDOW_CLOSE, 0, req.pRSmaParam.xFilesFactor) != TSDB_CODE_SUCCESS) { return NULL; } } From 6a86b51cf092600502380f983a47a67d81bb391a Mon Sep 17 00:00:00 2001 From: 54liuyao <54liuyao@163.com> Date: Mon, 20 Jun 2022 15:37:35 +0800 Subject: [PATCH 23/40] ci(stream): stream interval test --- tests/script/jenkins/basic.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/script/jenkins/basic.txt b/tests/script/jenkins/basic.txt index 720625a570..49f79f9488 100644 --- a/tests/script/jenkins/basic.txt +++ b/tests/script/jenkins/basic.txt @@ -72,14 +72,14 @@ ./test.sh -f tsim/stream/basic0.sim ./test.sh -f tsim/stream/basic1.sim ./test.sh -f tsim/stream/basic2.sim -# ./test.sh -f tsim/stream/distributeInterval0.sim +./test.sh -f tsim/stream/distributeInterval0.sim # ./test.sh -f tsim/stream/distributesession0.sim # ./test.sh -f tsim/stream/session0.sim # ./test.sh -f tsim/stream/session1.sim # ./test.sh -f tsim/stream/state0.sim -# ./test.sh -f tsim/stream/triggerInterval0.sim +./test.sh -f tsim/stream/triggerInterval0.sim # ./test.sh -f tsim/stream/triggerSession0.sim -# ./test.sh -f tsim/stream/partitionby.sim +./test.sh -f tsim/stream/partitionby.sim # ---- transaction From 63a917581d8a10dcfdcbfd1c0dd2ed74e34d0c6e Mon Sep 17 00:00:00 2001 From: Ganlin Zhao Date: Mon, 20 Jun 2022 16:11:42 +0800 Subject: [PATCH 24/40] fix(query): select last(*) from super table returns 0 if all columns are NULL. TD-16561 --- source/libs/function/src/builtinsimpl.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/source/libs/function/src/builtinsimpl.c b/source/libs/function/src/builtinsimpl.c index c72d7b5106..7440df062f 100644 --- a/source/libs/function/src/builtinsimpl.c +++ b/source/libs/function/src/builtinsimpl.c @@ -2563,9 +2563,6 @@ int32_t lastFunction(SqlFunctionCtx* pCtx) { } static void firstLastTransferInfo(SFirstLastRes* pInput, SFirstLastRes* pOutput, bool isFirst) { - if (!pInput->hasResult) { - return; - } pOutput->bytes = pInput->bytes; TSKEY* tsIn = (TSKEY*)(pInput->buf + pInput->bytes); TSKEY* tsOut = (TSKEY*)(pOutput->buf + pInput->bytes); @@ -2599,7 +2596,9 @@ static int32_t firstLastFunctionMergeImpl(SqlFunctionCtx* pCtx, bool isFirstQuer firstLastTransferInfo(pInputInfo, pInfo, isFirstQuery); - SET_VAL(GET_RES_INFO(pCtx), 1, 1); + int32_t numOfElems = pInputInfo->hasResult ? 1 : 0; + + SET_VAL(GET_RES_INFO(pCtx), numOfElems, 1); return TSDB_CODE_SUCCESS; } @@ -2624,6 +2623,7 @@ int32_t firstLastFinalize(SqlFunctionCtx* pCtx, SSDataBlock* pBlock) { int32_t firstLastPartialFinalize(SqlFunctionCtx* pCtx, SSDataBlock* pBlock) { SResultRowEntryInfo* pEntryInfo = GET_RES_INFO(pCtx); SFirstLastRes* pRes = GET_ROWCELL_INTERBUF(GET_RES_INFO(pCtx)); + int32_t resultBytes = getFirstLastInfoSize(pRes->bytes); char* res = taosMemoryCalloc(resultBytes + VARSTR_HEADER_SIZE, sizeof(char)); From 96434756de7365c93a5aadd88bc46a9cfb5c7e47 Mon Sep 17 00:00:00 2001 From: Shengliang Guan Date: Mon, 20 Jun 2022 16:23:04 +0800 Subject: [PATCH 25/40] refactor: save sdb file on needed --- source/common/src/tglobal.c | 2 +- source/dnode/mnode/impl/src/mndSync.c | 3 ++- source/dnode/mnode/impl/src/mndTrans.c | 6 +++--- source/dnode/mnode/sdb/inc/sdb.h | 2 +- source/dnode/mnode/sdb/src/sdb.c | 6 +++++- ...redistribute_vgroup_replica3_v1_leader.sim | 21 ------------------- 6 files changed, 12 insertions(+), 28 deletions(-) diff --git a/source/common/src/tglobal.c b/source/common/src/tglobal.c index 7457fe7eb6..54b478f33f 100644 --- a/source/common/src/tglobal.c +++ b/source/common/src/tglobal.c @@ -185,7 +185,7 @@ char tsCompressor[32] = "ZSTD_COMPRESSOR"; // ZSTD_COMPRESSOR or GZIP_COMPR bool tsStartUdfd = true; // internal -int32_t tsTransPullupInterval = 6; +int32_t tsTransPullupInterval = 2; int32_t tsMqRebalanceInterval = 2; void taosAddDataDir(int32_t index, char *v1, int32_t level, int32_t primary) { diff --git a/source/dnode/mnode/impl/src/mndSync.c b/source/dnode/mnode/impl/src/mndSync.c index 3a023bcece..07f65b2a90 100644 --- a/source/dnode/mnode/impl/src/mndSync.c +++ b/source/dnode/mnode/impl/src/mndSync.c @@ -68,8 +68,9 @@ void mndSyncCommitMsg(struct SSyncFSM *pFsm, const SRpcMsg *pMsg, SFsmCbMeta cbM mndTransExecute(pMnode, pTrans); mndReleaseTrans(pMnode, pTrans); } - +#if 0 sdbWriteFile(pMnode->pSdb, SDB_WRITE_DELTA); +#endif } } diff --git a/source/dnode/mnode/impl/src/mndTrans.c b/source/dnode/mnode/impl/src/mndTrans.c index 31a955b030..30e46af03c 100644 --- a/source/dnode/mnode/impl/src/mndTrans.c +++ b/source/dnode/mnode/impl/src/mndTrans.c @@ -804,7 +804,7 @@ static void mndTransSendRpcRsp(SMnode *pMnode, STrans *pTrans) { sendRsp = true; } } else { - if (pTrans->stage == TRN_STAGE_REDO_ACTION && pTrans->failedTimes > 2) { + if (pTrans->stage == TRN_STAGE_REDO_ACTION && pTrans->failedTimes > 3) { if (code == 0) code = TSDB_CODE_MND_TRANS_UNKNOW_ERROR; sendRsp = true; } @@ -1127,6 +1127,7 @@ static int32_t mndTransExecuteRedoActionsSerial(SMnode *pMnode, STrans *pTrans) } if (code == 0) { + pTrans->failedTimes = 0; pTrans->lastAction = action; pTrans->lastMsgType = 0; pTrans->lastErrorNo = 0; @@ -1430,8 +1431,7 @@ void mndTransPullup(SMnode *pMnode) { mndReleaseTrans(pMnode, pTrans); } - // todo, set to SDB_WRITE_DELTA - sdbWriteFile(pMnode->pSdb, 0); + sdbWriteFile(pMnode->pSdb, SDB_WRITE_DELTA); taosArrayDestroy(pArray); } diff --git a/source/dnode/mnode/sdb/inc/sdb.h b/source/dnode/mnode/sdb/inc/sdb.h index 1bd09aef63..3b1c4000a8 100644 --- a/source/dnode/mnode/sdb/inc/sdb.h +++ b/source/dnode/mnode/sdb/inc/sdb.h @@ -37,7 +37,7 @@ extern "C" { #define mTrace(...) { if (mDebugFlag & DEBUG_TRACE) { taosPrintLog("MND ", DEBUG_TRACE, mDebugFlag, __VA_ARGS__); }} // clang-format on -#define SDB_WRITE_DELTA 100 +#define SDB_WRITE_DELTA 20 #define SDB_GET_VAL(pData, dataPos, val, pos, func, type) \ { \ diff --git a/source/dnode/mnode/sdb/src/sdb.c b/source/dnode/mnode/sdb/src/sdb.c index c44f1670c3..fbf66da632 100644 --- a/source/dnode/mnode/sdb/src/sdb.c +++ b/source/dnode/mnode/sdb/src/sdb.c @@ -161,9 +161,11 @@ static int32_t sdbCreateDir(SSdb *pSdb) { } void sdbSetApplyInfo(SSdb *pSdb, int64_t index, int64_t term, int64_t config) { - mTrace("mnode apply info changed, from index:%" PRId64 " term:%" PRId64 " config:%" PRId64 ", to index:%" PRId64 +#if 1 + mTrace("mnode apply info changed from index:%" PRId64 " term:%" PRId64 " config:%" PRId64 " to index:%" PRId64 " term:%" PRId64 " config:%" PRId64, pSdb->applyIndex, pSdb->applyTerm, pSdb->applyConfig, index, term, config); +#endif pSdb->applyIndex = index; pSdb->applyTerm = term; pSdb->applyConfig = config; @@ -173,7 +175,9 @@ void sdbGetCommitInfo(SSdb *pSdb, int64_t *index, int64_t *term, int64_t *config *index = pSdb->commitIndex; *term = pSdb->commitTerm; *config = pSdb->commitConfig; +#if 0 mTrace("mnode current info, apply index:%" PRId64 " term:%" PRId64 " config:%" PRId64 ", commit index:%" PRId64 " term:%" PRId64 " config:%" PRId64, pSdb->applyIndex, pSdb->applyTerm, pSdb->applyConfig, *index, *term, *config); +#endif } diff --git a/tests/script/tsim/dnode/redistribute_vgroup_replica3_v1_leader.sim b/tests/script/tsim/dnode/redistribute_vgroup_replica3_v1_leader.sim index 7b52b51306..e9dd82cad9 100644 --- a/tests/script/tsim/dnode/redistribute_vgroup_replica3_v1_leader.sim +++ b/tests/script/tsim/dnode/redistribute_vgroup_replica3_v1_leader.sim @@ -53,31 +53,10 @@ endi if $data(4)[4] != ready then goto step1 endi -#if $data(5)[4] != ready then -# goto step1 -#endi print =============== step2: create db sql create database d1 vgroups 1 replica 3 -# dnode not exist -sql_error redistribute vgroup 3 dnode 6 dnode 3 dnode 4 -# vgroup not exist -sql_error redistribute vgroup 3 dnode 5 dnode 3 dnode 4 -# un changed -sql_error redistribute vgroup 2 dnode 2 dnode 3 dnode 4 -# no enought vnodes -sql_error redistribute vgroup 2 dnode 1 dnode 3 dnode 4 -# offline vnodes -sql_error redistribute vgroup 2 dnode 5 dnode 3 dnode 4 -# Invalid replica -sql_error redistribute vgroup 2 dnode 5 -sql_error redistribute vgroup 2 dnode 5 dnode 3 -sql_error redistribute vgroup 2 dnode 2 dnode 3 -sql_error redistribute vgroup 2 dnode 2 dnode 2 -sql_error redistribute vgroup 3 dnode 2 dnode 2 -sql_error redistribute vgroup 2 dnode 2 dnode 2 dnode 3 - system sh/exec.sh -n dnode5 -s start $x = 0 step2: From a28a275dffce45b62eb3d057796ac403f174df85 Mon Sep 17 00:00:00 2001 From: shenglian zhou Date: Mon, 20 Jun 2022 16:39:19 +0800 Subject: [PATCH 26/40] fix: fix eliminate projection bugs --- source/libs/executor/inc/executil.h | 2 +- source/libs/executor/src/executil.c | 5 +++-- source/libs/executor/src/executorimpl.c | 2 +- source/libs/executor/src/scanoperator.c | 8 ++++---- source/libs/planner/test/planOptimizeTest.cpp | 1 + 5 files changed, 10 insertions(+), 8 deletions(-) diff --git a/source/libs/executor/inc/executil.h b/source/libs/executor/inc/executil.h index 07686893db..1117be5db0 100644 --- a/source/libs/executor/inc/executil.h +++ b/source/libs/executor/inc/executil.h @@ -114,7 +114,7 @@ SArray* extractColMatchInfo(SNodeList* pNodeList, SDataBlockDescNode* pOutputNod SExprInfo* createExprInfo(SNodeList* pNodeList, SNodeList* pGroupKeys, int32_t* numOfExprs); SqlFunctionCtx* createSqlFunctionCtx(SExprInfo* pExprInfo, int32_t numOfOutput, int32_t** rowEntryInfoOffset); -void relocateColumnData(SSDataBlock* pBlock, const SArray* pColMatchInfo, SArray* pCols); +void relocateColumnData(SSDataBlock* pBlock, const SArray* pColMatchInfo, SArray* pCols, bool outputEveryColumn); void initExecTimeWindowInfo(SColumnInfoData* pColData, STimeWindow* pQueryWindow); SInterval extractIntervalInfo(const STableScanPhysiNode* pTableScanNode); diff --git a/source/libs/executor/src/executil.c b/source/libs/executor/src/executil.c index b493f3222e..aadab4f22a 100644 --- a/source/libs/executor/src/executil.c +++ b/source/libs/executor/src/executil.c @@ -585,14 +585,15 @@ SqlFunctionCtx* createSqlFunctionCtx(SExprInfo* pExprInfo, int32_t numOfOutput, } // NOTE: sources columns are more than the destination SSDatablock columns. -void relocateColumnData(SSDataBlock* pBlock, const SArray* pColMatchInfo, SArray* pCols) { +// doFilter in table scan needs every column even its output is false +void relocateColumnData(SSDataBlock* pBlock, const SArray* pColMatchInfo, SArray* pCols, bool outputEveryColumn) { size_t numOfSrcCols = taosArrayGetSize(pCols); int32_t i = 0, j = 0; while (i < numOfSrcCols && j < taosArrayGetSize(pColMatchInfo)) { SColumnInfoData* p = taosArrayGet(pCols, i); SColMatchInfo* pmInfo = taosArrayGet(pColMatchInfo, j); - if (!pmInfo->output) { + if (!outputEveryColumn && !pmInfo->output) { j++; continue; } diff --git a/source/libs/executor/src/executorimpl.c b/source/libs/executor/src/executorimpl.c index 58918667f3..902dae754c 100644 --- a/source/libs/executor/src/executorimpl.c +++ b/source/libs/executor/src/executorimpl.c @@ -2084,7 +2084,7 @@ int32_t extractDataBlockFromFetchRsp(SSDataBlock* pRes, SLoadRemoteDataInfo* pLo // data from mnode pRes->info.rows = numOfRows; - relocateColumnData(pRes, pColList, pBlock->pDataBlock); + relocateColumnData(pRes, pColList, pBlock->pDataBlock, false); taosArrayDestroy(pBlock->pDataBlock); taosMemoryFree(pBlock); // blockDataDestroy(pBlock); diff --git a/source/libs/executor/src/scanoperator.c b/source/libs/executor/src/scanoperator.c index 6f0187fa53..b9d90c4cf4 100644 --- a/source/libs/executor/src/scanoperator.c +++ b/source/libs/executor/src/scanoperator.c @@ -258,7 +258,7 @@ static int32_t loadDataBlock(SOperatorInfo* pOperator, STableScanInfo* pTableSca return terrno; } - relocateColumnData(pBlock, pTableScanInfo->pColMatchInfo, pCols); + relocateColumnData(pBlock, pTableScanInfo->pColMatchInfo, pCols, true); // currently only the tbname pseudo column if (pTableScanInfo->pseudoSup.numOfExprs > 0) { @@ -1469,7 +1469,7 @@ static SSDataBlock* doSysTableScan(SOperatorInfo* pOperator) { p->info.rows = numOfRows; pInfo->pRes->info.rows = numOfRows; - relocateColumnData(pInfo->pRes, pInfo->scanCols, p->pDataBlock); + relocateColumnData(pInfo->pRes, pInfo->scanCols, p->pDataBlock, false); doFilterResult(pInfo); blockDataDestroy(p); @@ -1561,7 +1561,7 @@ int32_t buildSysDbTableInfo(const SSysTableScanInfo* pInfo, int32_t capacity) { getPerfDbMeta(&pSysDbTableMeta, &size); p->info.rows = buildDbTableInfoBlock(p, pSysDbTableMeta, size, TSDB_PERFORMANCE_SCHEMA_DB); - relocateColumnData(pInfo->pRes, pInfo->scanCols, p->pDataBlock); + relocateColumnData(pInfo->pRes, pInfo->scanCols, p->pDataBlock, false); pInfo->pRes->info.rows = p->info.rows; blockDataDestroy(p); @@ -2042,7 +2042,7 @@ static int32_t loadDataBlockFromOneTable(SOperatorInfo* pOperator, STableMergeSc return terrno; } - relocateColumnData(pBlock, pTableScanInfo->pColMatchInfo, pCols); + relocateColumnData(pBlock, pTableScanInfo->pColMatchInfo, pCols, true); // currently only the tbname pseudo column if (pTableScanInfo->numOfPseudoExpr > 0) { diff --git a/source/libs/planner/test/planOptimizeTest.cpp b/source/libs/planner/test/planOptimizeTest.cpp index 07b3adbc1f..6b514ef2c3 100644 --- a/source/libs/planner/test/planOptimizeTest.cpp +++ b/source/libs/planner/test/planOptimizeTest.cpp @@ -60,4 +60,5 @@ TEST_F(PlanOptimizeTest, eliminateProjection) { run("SELECT c1 FROM t1"); run("SELECT * FROM st1"); run("SELECT c1 FROM st1s3"); + //run("select 1-abs(c1) from (select unique(c1) c1 from st1s3) order by 1 nulls first"); } From 4ef0a664533c41af3e70386438d5e7ceb4840ded Mon Sep 17 00:00:00 2001 From: Cary Xu Date: Mon, 20 Jun 2022 16:41:36 +0800 Subject: [PATCH 27/40] test: adjust rsma test case --- source/dnode/vnode/src/sma/smaRollup.c | 4 ++-- tests/script/tsim/sma/rsmaCreateInsertQuery.sim | 4 +++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/source/dnode/vnode/src/sma/smaRollup.c b/source/dnode/vnode/src/sma/smaRollup.c index 458170b9aa..ecd47c2303 100644 --- a/source/dnode/vnode/src/sma/smaRollup.c +++ b/source/dnode/vnode/src/sma/smaRollup.c @@ -281,7 +281,7 @@ int32_t tdProcessRSmaCreate(SVnode *pVnode, SVCreateStbReq *pReq) { } pRSmaInfo->items[1].triggerStatus = TASK_TRIGGER_STATUS__IN_ACTIVE; pRSmaInfo->items[1].maxDelay = 5000; - pRSmaInfo->items[0].level = TSDB_RETENTION_L2; + pRSmaInfo->items[1].level = TSDB_RETENTION_L2; pRSmaInfo->items[1].tmrHandle = taosTmrInit(10000, 100, 10000, "RSMA_L2"); if (!pRSmaInfo->items[1].tmrHandle) { goto _err; @@ -451,7 +451,7 @@ static int32_t tdFetchAndSubmitRSmaResult(SRSmaInfoItem *pItem, int8_t blkType) } if (taosArrayGetSize(pResult) > 0) { -#if 0 +#if 1 char flag[10] = {0}; snprintf(flag, 10, "level %" PRIi8, pItem->level); blockDebugShowData(pResult, flag); diff --git a/tests/script/tsim/sma/rsmaCreateInsertQuery.sim b/tests/script/tsim/sma/rsmaCreateInsertQuery.sim index 645b28f771..fb3503c841 100644 --- a/tests/script/tsim/sma/rsmaCreateInsertQuery.sim +++ b/tests/script/tsim/sma/rsmaCreateInsertQuery.sim @@ -5,7 +5,7 @@ sleep 50 sql connect print =============== create database with retentions -sql create database d0 retentions 15s:7d,1m:21d,15m:365d; +sql create database d0 retentions 5s:7d,10s:21d,15s:365d; sql use d0 print =============== create super table and register rsma @@ -29,6 +29,8 @@ sql insert into ct1 values(now, 10); sql insert into ct1 values(now+1s, 1); sql insert into ct1 values(now+2s, 100); +print =============== wait maxdelay 15+1 seconds for results +sleep 16000 print =============== select * from retention level 2 from memory sql select * from ct1; From c31ec1988531825630955f4d39579cee08558ec8 Mon Sep 17 00:00:00 2001 From: shenglian zhou Date: Mon, 20 Jun 2022 17:01:00 +0800 Subject: [PATCH 28/40] fix: fix eliminate project error --- source/libs/planner/src/planOptimizer.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/source/libs/planner/src/planOptimizer.c b/source/libs/planner/src/planOptimizer.c index 02c1779224..cb4a967761 100644 --- a/source/libs/planner/src/planOptimizer.c +++ b/source/libs/planner/src/planOptimizer.c @@ -1111,8 +1111,8 @@ static bool eliminateProjOptMayBeOptimized(SLogicNode* pNode) { taosHashPut(pProjColNameHash, projColumnName, strlen(projColumnName), &exist, sizeof(exist)); } } - taosHashCleanup(pProjColNameHash); + return true; } @@ -1123,12 +1123,9 @@ static int32_t eliminateProjOptimizeImpl(SOptimizeContext* pCxt, SLogicSubplan* SNode* pProjection = NULL; FOREACH(pProjection, pProjectNode->pProjections) { SColumnNode* projColumn = (SColumnNode*)pProjection; - char* projColumnName = projColumn->colName; SNode* pChildTarget = NULL; FOREACH(pChildTarget, pChild->pTargets) { - SExprNode* childExpr = (SExprNode*)pChildTarget; - if (QUERY_NODE_COLUMN == nodeType(childExpr) && strcmp(projColumnName, ((SColumnNode*)childExpr)->colName) == 0 || - strcmp(projColumnName, childExpr->aliasName) == 0) { + if (strcmp(projColumn->colName, ((SColumnNode*)pChildTarget)->colName) == 0) { nodesListAppend(pNewChildTargets, nodesCloneNode(pChildTarget)); break; } From 2872a50edc32135a08416c2fe70cf1fc985d4755 Mon Sep 17 00:00:00 2001 From: Minglei Jin Date: Mon, 20 Jun 2022 17:16:29 +0800 Subject: [PATCH 29/40] tdb/ofp: support overflow pages for big data --- source/libs/tdb/src/db/tdbBtree.c | 524 ++++++++++++++++++++++--- source/libs/tdb/src/db/tdbPage.c | 16 +- source/libs/tdb/src/inc/tdbInt.h | 30 +- source/libs/tdb/test/CMakeLists.txt | 7 +- source/libs/tdb/test/tdbExOVFLTest.cpp | 469 ++++++++++++++++++++++ 5 files changed, 972 insertions(+), 74 deletions(-) create mode 100644 source/libs/tdb/test/tdbExOVFLTest.cpp diff --git a/source/libs/tdb/src/db/tdbBtree.c b/source/libs/tdb/src/db/tdbBtree.c index fffda68731..13aaa770da 100644 --- a/source/libs/tdb/src/db/tdbBtree.c +++ b/source/libs/tdb/src/db/tdbBtree.c @@ -17,6 +17,7 @@ #define TDB_BTREE_ROOT 0x1 #define TDB_BTREE_LEAF 0x2 +#define TDB_BTREE_OVFL 0x4 struct SBTree { SPgno root; @@ -38,9 +39,11 @@ struct SBTree { #define TDB_BTREE_PAGE_SET_FLAGS(PAGE, flags) ((PAGE)->pData[0] = (flags)) #define TDB_BTREE_PAGE_IS_ROOT(PAGE) (TDB_BTREE_PAGE_GET_FLAGS(PAGE) & TDB_BTREE_ROOT) #define TDB_BTREE_PAGE_IS_LEAF(PAGE) (TDB_BTREE_PAGE_GET_FLAGS(PAGE) & TDB_BTREE_LEAF) +#define TDB_BTREE_PAGE_IS_OVFL(PAGE) (TDB_BTREE_PAGE_GET_FLAGS(PAGE) & TDB_BTREE_OVFL) #define TDB_BTREE_ASSERT_FLAG(flags) \ ASSERT(TDB_FLAG_IS(flags, TDB_BTREE_ROOT) || TDB_FLAG_IS(flags, TDB_BTREE_LEAF) || \ - TDB_FLAG_IS(flags, TDB_BTREE_ROOT | TDB_BTREE_LEAF) || TDB_FLAG_IS(flags, 0)) + TDB_FLAG_IS(flags, TDB_BTREE_ROOT | TDB_BTREE_LEAF) || TDB_FLAG_IS(flags, 0) || \ + TDB_FLAG_IS(flags, TDB_BTREE_OVFL)) #pragma pack(push, 1) typedef struct { @@ -62,10 +65,10 @@ static int tdbDefaultKeyCmprFn(const void *pKey1, int keyLen1, const void *pKey2 static int tdbBtreeOpenImpl(SBTree *pBt); static int tdbBtreeInitPage(SPage *pPage, void *arg, int init); static int tdbBtreeEncodeCell(SPage *pPage, const void *pKey, int kLen, const void *pVal, int vLen, SCell *pCell, - int *szCell); -static int tdbBtreeDecodeCell(SPage *pPage, const SCell *pCell, SCellDecoder *pDecoder); + int *szCell, TXN *pTxn, SBTree *pBt); +static int tdbBtreeDecodeCell(SPage *pPage, const SCell *pCell, SCellDecoder *pDecoder, TXN *pTxn, SBTree *pBt); static int tdbBtreeBalance(SBTC *pBtc); -static int tdbBtreeCellSize(const SPage *pPage, SCell *pCell); +static int tdbBtreeCellSize(const SPage *pPage, SCell *pCell, int dropOfp, TXN *pTxn, SBTree *pBt); static int tdbBtcMoveDownward(SBTC *pBtc); static int tdbBtcMoveUpward(SBTC *pBtc); @@ -255,7 +258,7 @@ int tdbBtreePGet(SBTree *pBt, const void *pKey, int kLen, void **ppKey, int *pkL } pCell = tdbPageGetCell(btc.pPage, btc.idx); - tdbBtreeDecodeCell(btc.pPage, pCell, &cd); + tdbBtreeDecodeCell(btc.pPage, pCell, &cd, btc.pTxn, pBt); if (ppKey) { pTKey = tdbRealloc(*ppKey, cd.kLen); @@ -281,6 +284,14 @@ int tdbBtreePGet(SBTree *pBt, const void *pKey, int kLen, void **ppKey, int *pkL memcpy(*ppVal, cd.pVal, cd.vLen); } + if (TDB_CELLDECODER_FREE_KEY(&cd)) { + tdbFree(cd.pKey); + } + + if (TDB_CELLDECODER_FREE_VAL(&cd)) { + tdbFree(cd.pVal); + } + tdbBtcClose(&btc); return 0; @@ -375,6 +386,11 @@ static int tdbBtreeInitPage(SPage *pPage, void *arg, int init) { pPage->vLen = pBt->valLen; pPage->maxLocal = pBt->maxLeaf; pPage->minLocal = pBt->minLeaf; + } else if (TDB_BTREE_PAGE_IS_OVFL(pPage)) { + pPage->kLen = pBt->keyLen; + pPage->vLen = pBt->valLen; + pPage->maxLocal = tdbPageCapacity(pBt->pageSize, sizeof(SIntHdr)); + pPage->minLocal = pBt->minLocal; } else { pPage->kLen = pBt->keyLen; pPage->vLen = sizeof(SPgno); @@ -499,7 +515,7 @@ static int tdbBtreeBalanceNonRoot(SBTree *pBt, SPage *pParent, int idx, TXN *pTx for (int i = 0; i < nOlds; i++) { if (sIdx + i < TDB_PAGE_TOTAL_CELLS(pParent)) { pCell = tdbPageGetCell(pParent, sIdx + i); - szDivCell[i] = tdbBtreeCellSize(pParent, pCell); + szDivCell[i] = tdbBtreeCellSize(pParent, pCell, 0, NULL, NULL); pDivCell[i] = tdbOsMalloc(szDivCell[i]); memcpy(pDivCell[i], pCell, szDivCell[i]); } @@ -524,7 +540,7 @@ static int tdbBtreeBalanceNonRoot(SBTree *pBt, SPage *pParent, int idx, TXN *pTx for (int i = 0; i < nOlds; i++) { nCells = TDB_PAGE_TOTAL_CELLS(pParent); if (sIdx < nCells) { - tdbPageDropCell(pParent, sIdx); + tdbPageDropCell(pParent, sIdx, pTxn, pBt); } else { ((SIntHdr *)pParent->pData)->pgno = 0; } @@ -582,7 +598,7 @@ static int tdbBtreeBalanceNonRoot(SBTree *pBt, SPage *pParent, int idx, TXN *pTx for (;;) { pCell = tdbPageGetCell(pOlds[infoNews[iNew - 1].iPage], infoNews[iNew - 1].oIdx); - szLCell = tdbBtreeCellSize(pOlds[infoNews[iNew - 1].iPage], pCell); + szLCell = tdbBtreeCellSize(pOlds[infoNews[iNew - 1].iPage], pCell, 0, NULL, NULL); if (!childNotLeaf) { szRCell = szLCell; } else { @@ -600,7 +616,7 @@ static int tdbBtreeBalanceNonRoot(SBTree *pBt, SPage *pParent, int idx, TXN *pTx } pCell = tdbPageGetCell(pPage, oIdx); - szRCell = tdbBtreeCellSize(pPage, pCell); + szRCell = tdbBtreeCellSize(pPage, pCell, 0, NULL, NULL); } ASSERT(infoNews[iNew - 1].cnt > 0); @@ -687,7 +703,7 @@ static int tdbBtreeBalanceNonRoot(SBTree *pBt, SPage *pParent, int idx, TXN *pTx for (int oIdx = 0; oIdx < TDB_PAGE_TOTAL_CELLS(pPage); oIdx++) { pCell = tdbPageGetCell(pPage, oIdx); - szCell = tdbBtreeCellSize(pPage, pCell); + szCell = tdbBtreeCellSize(pPage, pCell, 0, NULL, NULL); ASSERT(nNewCells <= infoNews[iNew].cnt); ASSERT(iNew < nNews); @@ -703,14 +719,14 @@ static int tdbBtreeBalanceNonRoot(SBTree *pBt, SPage *pParent, int idx, TXN *pTx if (iNew == nNews - 1 && pIntHdr->pgno == 0) { pIntHdr->pgno = TDB_PAGE_PGNO(pNews[iNew]); } else { - tdbBtreeDecodeCell(pPage, pCell, &cd); + tdbBtreeDecodeCell(pPage, pCell, &cd, pTxn, pBt); // TODO: pCell here may be inserted as an overflow cell, handle it SCell *pNewCell = tdbOsMalloc(cd.kLen + 9); int szNewCell; SPgno pgno; pgno = TDB_PAGE_PGNO(pNews[iNew]); - tdbBtreeEncodeCell(pParent, cd.pKey, cd.kLen, (void *)&pgno, sizeof(SPgno), pNewCell, &szNewCell); + tdbBtreeEncodeCell(pParent, cd.pKey, cd.kLen, (void *)&pgno, sizeof(SPgno), pNewCell, &szNewCell, pTxn, pBt); tdbPageInsertCell(pParent, sIdx++, pNewCell, szNewCell, 0); tdbOsFree(pNewCell); } @@ -846,13 +862,50 @@ static int tdbBtreeBalance(SBTC *pBtc) { } // TDB_BTREE_BALANCE +static int tdbFetchOvflPage(SPager *pPager, SPgno *pPgno, SPage **ppOfp, TXN *pTxn, SBTree *pBt) { + int ret = 0; + + *pPgno = 0; + SBtreeInitPageArg iArg; + iArg.pBt = pBt; + iArg.flags = TDB_FLAG_ADD(0, TDB_BTREE_OVFL); + ret = tdbPagerFetchPage(pPager, pPgno, ppOfp, tdbBtreeInitPage, &iArg, pTxn); + if (ret < 0) { + return -1; + } + + // mark dirty + ret = tdbPagerWrite(pPager, *ppOfp); + if (ret < 0) { + ASSERT(0); + return -1; + } + + return ret; +} + +static int tdbLoadOvflPage(SPager *pPager, SPgno *pPgno, SPage **ppOfp, TXN *pTxn, SBTree *pBt) { + int ret = 0; + + SBtreeInitPageArg iArg; + iArg.pBt = pBt; + iArg.flags = TDB_FLAG_ADD(0, TDB_BTREE_OVFL); + ret = tdbPagerFetchPage(pPager, pPgno, ppOfp, tdbBtreeInitPage, &iArg, pTxn); + if (ret < 0) { + return -1; + } + + return ret; +} + // TDB_BTREE_CELL ===================== static int tdbBtreeEncodePayload(SPage *pPage, SCell *pCell, int nHeader, const void *pKey, int kLen, const void *pVal, - int vLen, int *szPayload) { - int nPayload; + int vLen, int *szPayload, TXN *pTxn, SBTree *pBt) { + int ret = 0; + int nPayload = kLen + vLen; + int maxLocal = pPage->maxLocal; - nPayload = kLen + vLen; - if (nPayload + nHeader <= pPage->maxLocal) { + if (nPayload + nHeader <= maxLocal) { // no overflow page is needed memcpy(pCell + nHeader, pKey, kLen); if (pVal) { @@ -861,18 +914,190 @@ static int tdbBtreeEncodePayload(SPage *pPage, SCell *pCell, int nHeader, const *szPayload = nPayload; return 0; - } + } else { + // handle overflow case + // calc local storage size + int minLocal = pPage->minLocal; + int surplus = minLocal + (nPayload + nHeader - minLocal) % (maxLocal - sizeof(SPgno)); + int nLocal = surplus <= maxLocal ? surplus : minLocal; - { - // TODO: handle overflow case - ASSERT(0); + //int ofpCap = tdbPageCapacity(pBt->pageSize, sizeof(SIntHdr)); + + // fetch a new ofp and make it dirty + SPgno pgno = 0; + SPage *ofp, *nextOfp; + + ret = tdbFetchOvflPage(pPage->pPager, &pgno, &ofp, pTxn, pBt); + if (ret < 0) { + return -1; + } + + // local buffer for cell + void *pBuf = tdbRealloc(NULL, pBt->pageSize); + if (pBuf == NULL) { + return -1; + } + + int nLeft = nPayload; + int bytes; + int lastPage = 0; + if (nLocal >= kLen + 4) { + // pack key to local + memcpy(pCell + nHeader, pKey, kLen); + nLeft -= kLen; + // pack partial val to local if any space left + if (nLocal > kLen + 4) { + memcpy(pCell + nHeader + kLen, pVal, nLocal - kLen - sizeof(SPgno)); + nLeft -= nLocal - kLen - sizeof(SPgno); + } + + // pack nextPgno + memcpy(pCell + nHeader + nPayload - nLeft, &pgno, sizeof(pgno)); + + // pack left val data to ovpages + do { + lastPage = 0; + if (nLeft <= ofp->maxLocal - sizeof(SPgno)) { + bytes = nLeft; + lastPage = 1; + } else { + bytes = ofp->maxLocal - sizeof(SPgno); + } + + // fetch next ofp if not last page + if (!lastPage) { + // fetch a new ofp and make it dirty + ret = tdbFetchOvflPage(pPage->pPager, &pgno, &nextOfp, pTxn, pBt); + if (ret < 0) { + tdbFree(pBuf); + return -1; + } + } else { + pgno = 0; + } + + memcpy(pBuf, pVal + vLen - nLeft, bytes); + memcpy(pBuf + bytes, &pgno, sizeof(pgno)); + + ret = tdbPageInsertCell(ofp, 0, pBuf, bytes + sizeof(pgno), 0); + if (ret < 0) { + tdbFree(pBuf); + return -1; + } + + ofp = nextOfp; + nLeft -= bytes; + } while (nLeft > 0); + } else { + int nLeftKey = kLen; + // pack partial key and nextPgno + memcpy(pCell + nHeader, pKey, nLocal - 4); + nLeft -= nLocal - 4; + nLeftKey -= nLocal -4; + + memcpy(pCell + nHeader + nLocal - 4, &pgno, sizeof(pgno)); + + int lastKeyPageSpace = 0; + // pack left key & val to ovpages + do { + // cal key to cpy + int lastKeyPage = 0; + if (nLeftKey <= ofp->maxLocal - sizeof(SPgno)) { + bytes = nLeftKey; + lastKeyPage = 1; + lastKeyPageSpace = ofp->maxLocal - sizeof(SPgno) - nLeftKey; + } else { + bytes = ofp->maxLocal - sizeof(SPgno); + } + + // cpy key + memcpy(pBuf, pKey + kLen - nLeftKey, bytes); + + if (lastKeyPage) { + if (lastKeyPageSpace >= vLen) { + memcpy(pBuf + kLen -nLeftKey, pVal, vLen); + + nLeft -= vLen; + pgno = 0; + } else { + memcpy(pBuf + kLen -nLeftKey, pVal, lastKeyPageSpace); + nLeft -= lastKeyPageSpace; + + // fetch next ofp, a new ofp and make it dirty + ret = tdbFetchOvflPage(pPage->pPager, &pgno, &nextOfp, pTxn, pBt); + if (ret < 0) { + return -1; + } + } + } else { + // fetch next ofp, a new ofp and make it dirty + ret = tdbFetchOvflPage(pPage->pPager, &pgno, &nextOfp, pTxn, pBt); + if (ret < 0) { + return -1; + } + } + + memcpy(pBuf + kLen - nLeft, &pgno, sizeof(pgno)); + + ret = tdbPageInsertCell(ofp, 0, pBuf, bytes + sizeof(pgno), 0); + if (ret < 0) { + return -1; + } + + ofp = nextOfp; + nLeftKey -= bytes; + nLeft -= bytes; + } while (nLeftKey > 0); + + while (nLeft > 0) { + // pack left val data to ovpages + lastPage = 0; + if (nLeft <= maxLocal - sizeof(SPgno)) { + bytes = nLeft; + lastPage = 1; + } else { + bytes = maxLocal - sizeof(SPgno); + } + + // fetch next ofp if not last page + if (!lastPage) { + // fetch a new ofp and make it dirty + ret = tdbFetchOvflPage(pPage->pPager, &pgno, &nextOfp, pTxn, pBt); + if (ret < 0) { + tdbFree(pBuf); + return -1; + } + } else { + pgno = 0; + } + + memcpy(pBuf, pVal + vLen - nLeft, bytes); + memcpy(pBuf + bytes, &pgno, sizeof(pgno)); + + ret = tdbPageInsertCell(ofp, 0, pBuf, bytes + sizeof(pgno), 0); + if (ret < 0) { + tdbFree(pBuf); + return -1; + } + + ofp = nextOfp; + nLeft -= bytes; + } + } + + // free local buffer + tdbFree(pBuf); + + *szPayload = nLocal; + + // ASSERT(0); } return 0; } static int tdbBtreeEncodeCell(SPage *pPage, const void *pKey, int kLen, const void *pVal, int vLen, SCell *pCell, - int *szCell) { + int *szCell, TXN *pTxn, SBTree *pBt) { u8 leaf; int nHeader; int nPayload; @@ -911,7 +1136,7 @@ static int tdbBtreeEncodeCell(SPage *pPage, const void *pKey, int kLen, const vo vLen = 0; } - ret = tdbBtreeEncodePayload(pPage, pCell, nHeader, pKey, kLen, pVal, vLen, &nPayload); + ret = tdbBtreeEncodePayload(pPage, pCell, nHeader, pKey, kLen, pVal, vLen, &nPayload, pTxn, pBt); if (ret < 0) { // TODO ASSERT(0); @@ -922,8 +1147,13 @@ static int tdbBtreeEncodeCell(SPage *pPage, const void *pKey, int kLen, const vo return 0; } -static int tdbBtreeDecodePayload(SPage *pPage, const SCell *pCell, int nHeader, SCellDecoder *pDecoder) { +static int tdbBtreeDecodePayload(SPage *pPage, const SCell *pCell, int nHeader, SCellDecoder *pDecoder, TXN *pTxn, SBTree *pBt) { + int ret = 0; int nPayload; + int maxLocal = pPage->maxLocal; + + int kLen = pDecoder->kLen; + int vLen = pDecoder->vLen; if (pDecoder->pVal) { ASSERT(!TDB_BTREE_PAGE_IS_LEAF(pPage)); @@ -932,24 +1162,171 @@ static int tdbBtreeDecodePayload(SPage *pPage, const SCell *pCell, int nHeader, nPayload = pDecoder->kLen + pDecoder->vLen; } - if (nHeader + nPayload <= pPage->maxLocal) { + if (nHeader + nPayload <= maxLocal) { // no over flow case - pDecoder->pKey = pCell + nHeader; + pDecoder->pKey = (SCell *)pCell + nHeader; if (pDecoder->pVal == NULL && pDecoder->vLen > 0) { - pDecoder->pVal = pCell + nHeader + pDecoder->kLen; + pDecoder->pVal = (SCell *)pCell + nHeader + pDecoder->kLen; } return 0; - } + } else { + // handle overflow case + // calc local storage size + int minLocal = pPage->minLocal; + int surplus = minLocal + (nPayload + nHeader - minLocal) % (maxLocal - sizeof(SPgno)); + int nLocal = surplus <= maxLocal ? surplus : minLocal; - { - // TODO: handle overflow case - ASSERT(0); + int nLeft = nPayload; + SPgno pgno = 0; + SPage *ofp; + SCell *ofpCell; + int bytes; + int lastPage = 0; + + if (nLocal >= pDecoder->kLen + 4) { + pDecoder->pKey = (SCell *)pCell + nHeader; + nLeft -= kLen; + if (nLocal > kLen + 4) { + // read partial val to local + pDecoder->pVal = tdbRealloc(pDecoder->pVal, vLen); + if (pDecoder->pVal == NULL) { + return -1; + } + TDB_CELLDECODER_SET_FREE_VAL(pDecoder); + + memcpy(pDecoder->pVal, pCell + nHeader + kLen, nLocal - kLen - sizeof(SPgno)); + + nLeft -= nLocal - kLen - sizeof(SPgno); + } + + memcpy(&pgno, pCell + nHeader + nPayload - nLeft, sizeof(pgno)); + + // unpack left val data from ovpages + while (pgno != 0) { + ret = tdbLoadOvflPage(pPage->pPager, &pgno, &ofp, pTxn, pBt); + if (ret < 0) { + return -1; + } + + ofpCell = tdbPageGetCell(ofp, 0); + + if (nLeft <= ofp->maxLocal - sizeof(SPgno)) { + bytes = nLeft; + lastPage = 1; + } else { + bytes = ofp->maxLocal - sizeof(SPgno); + } + + memcpy(pDecoder->pVal + vLen - nLeft, ofpCell, bytes); + nLeft -= bytes; + + memcpy(&pgno, ofpCell + bytes, sizeof(pgno)); + } + } else { + int nLeftKey = kLen; + // load partial key and nextPgno + pDecoder->pKey = tdbRealloc(pDecoder->pKey, kLen); + if (pDecoder->pKey == NULL) { + return -1; + } + TDB_CELLDECODER_SET_FREE_KEY(pDecoder); + + memcpy(pDecoder->pKey, pCell + nHeader, nLocal - 4); + nLeft -= nLocal - 4; + nLeftKey -= nLocal -4; + + memcpy(&pgno, pCell + nHeader + nLocal - 4, sizeof(pgno)); + + int lastKeyPageSpace = 0; + // load left key & val to ovpages + while (pgno != 0) { + ret = tdbLoadOvflPage(pPage->pPager, &pgno, &ofp, pTxn, pBt); + if (ret < 0) { + return -1; + } + + ofpCell = tdbPageGetCell(ofp, 0); + + int lastKeyPage = 0; + if (nLeftKey <= maxLocal - sizeof(SPgno)) { + bytes = nLeftKey; + lastKeyPage = 1; + lastKeyPageSpace = ofp->maxLocal - sizeof(SPgno) - nLeftKey; + } else { + bytes = ofp->maxLocal - sizeof(SPgno); + } + + // cpy key + memcpy(pDecoder->pKey + kLen - nLeftKey, ofpCell, bytes); + + if (lastKeyPage) { + if (lastKeyPageSpace >= vLen) { + pDecoder->pVal = ofpCell + kLen -nLeftKey; + + nLeft -= vLen; + pgno = 0; + } else { + // read partial val to local + pDecoder->pVal = tdbRealloc(pDecoder->pVal, vLen); + if (pDecoder->pVal == NULL) { + return -1; + } + TDB_CELLDECODER_SET_FREE_VAL(pDecoder); + + memcpy(pDecoder->pVal, ofpCell + kLen -nLeftKey, lastKeyPageSpace); + nLeft -= lastKeyPageSpace; + } + } + + memcpy(&pgno, ofpCell + bytes, sizeof(pgno)); + + nLeftKey -= bytes; + nLeft -= bytes; + } + + while (nLeft > 0) { + ret = tdbLoadOvflPage(pPage->pPager, &pgno, &ofp, pTxn, pBt); + if (ret < 0) { + return -1; + } + + ofpCell = tdbPageGetCell(ofp, 0); + + // load left val data to ovpages + lastPage = 0; + if (nLeft <= ofp->maxLocal - sizeof(SPgno)) { + bytes = nLeft; + lastPage = 1; + } else { + bytes = ofp->maxLocal - sizeof(SPgno); + } + + if (lastPage) { + pgno = 0; + } + + if (!pDecoder->pVal) { + pDecoder->pVal = tdbRealloc(pDecoder->pVal, vLen); + if (pDecoder->pVal == NULL) { + return -1; + } + TDB_CELLDECODER_SET_FREE_VAL(pDecoder); + } + + memcpy(pDecoder->pVal, ofpCell + vLen - nLeft, bytes); + nLeft -= bytes; + + memcpy(&pgno, ofpCell + vLen - nLeft + bytes, sizeof(pgno)); + + nLeft -= bytes; + } + } } return 0; } -static int tdbBtreeDecodeCell(SPage *pPage, const SCell *pCell, SCellDecoder *pDecoder) { +static int tdbBtreeDecodeCell(SPage *pPage, const SCell *pCell, SCellDecoder *pDecoder, TXN *pTxn, SBTree *pBt) { u8 leaf; int nHeader; int ret; @@ -963,6 +1340,7 @@ static int tdbBtreeDecodeCell(SPage *pPage, const SCell *pCell, SCellDecoder *pD pDecoder->vLen = -1; pDecoder->pVal = NULL; pDecoder->pgno = 0; + TDB_CELLDECODER_SET_FREE_NIL(pDecoder); // 1. Decode header part if (!leaf) { @@ -987,7 +1365,7 @@ static int tdbBtreeDecodeCell(SPage *pPage, const SCell *pCell, SCellDecoder *pD } // 2. Decode payload part - ret = tdbBtreeDecodePayload(pPage, pCell, nHeader, pDecoder); + ret = tdbBtreeDecodePayload(pPage, pCell, nHeader, pDecoder, pTxn, pBt); if (ret < 0) { return -1; } @@ -995,41 +1373,71 @@ static int tdbBtreeDecodeCell(SPage *pPage, const SCell *pCell, SCellDecoder *pD return 0; } -static int tdbBtreeCellSize(const SPage *pPage, SCell *pCell) { +static int tdbBtreeCellSize(const SPage *pPage, SCell *pCell, int dropOfp, TXN *pTxn, SBTree *pBt) { u8 leaf; - int szCell; - int kLen = 0, vLen = 0; + int kLen = 0, vLen = 0, nHeader = 0; leaf = TDB_BTREE_PAGE_IS_LEAF(pPage); - szCell = 0; if (!leaf) { - szCell += sizeof(SPgno); + nHeader += sizeof(SPgno); } if (pPage->kLen == TDB_VARIANT_LEN) { - szCell += tdbGetVarInt(pCell + szCell, &kLen); + nHeader += tdbGetVarInt(pCell + nHeader, &kLen); } else { kLen = pPage->kLen; } if (pPage->vLen == TDB_VARIANT_LEN) { ASSERT(leaf); - szCell += tdbGetVarInt(pCell + szCell, &vLen); + nHeader += tdbGetVarInt(pCell + nHeader, &vLen); } else if (leaf) { vLen = pPage->vLen; } - szCell = szCell + kLen + vLen; + int nPayload = kLen + vLen; + if (nHeader + nPayload <= pPage->maxLocal) { + return nHeader + kLen + vLen; + } else { + int maxLocal = pPage->maxLocal; - if (szCell <= pPage->maxLocal) { - return szCell; - } + // calc local storage size + int minLocal = pPage->minLocal; + int surplus = minLocal + (nPayload + nHeader - minLocal) % (maxLocal - sizeof(SPgno)); + int nLocal = surplus <= maxLocal ? surplus : minLocal; - { - // TODO - ASSERT(0); - return 0; + // free ofp pages' cells + if (dropOfp) { + int ret = 0; + SPgno pgno = *(SPgno *) (pCell + nHeader + nLocal - sizeof(SPgno)); + int nLeft = nPayload - nLocal + sizeof(SPgno); + SPage *ofp; + int bytes; + + while (pgno != 0) { + ret = tdbLoadOvflPage(pPage->pPager, &pgno, &ofp, pTxn, pBt); + if (ret < 0) { + return -1; + } + + SCell *ofpCell = tdbPageGetCell(ofp, 0); + + if (nLeft <= ofp->maxLocal - sizeof(SPgno)) { + bytes = nLeft; + } else { + bytes = ofp->maxLocal - sizeof(SPgno); + } + + memcpy(&pgno, ofpCell + bytes, sizeof(pgno)); + + tdbPagerReturnPage(pPage->pPager, ofp, pTxn); + + nLeft -= bytes; + } + } + + return nHeader + nLocal; } } // TDB_BTREE_CELL @@ -1212,7 +1620,7 @@ int tdbBtreeNext(SBTC *pBtc, void **ppKey, int *kLen, void **ppVal, int *vLen) { pCell = tdbPageGetCell(pBtc->pPage, pBtc->idx); - tdbBtreeDecodeCell(pBtc->pPage, pCell, &cd); + tdbBtreeDecodeCell(pBtc->pPage, pCell, &cd, pBtc->pTxn, pBtc->pBt); pKey = tdbRealloc(*ppKey, cd.kLen); if (pKey == NULL) { @@ -1381,7 +1789,7 @@ int tdbBtcGet(SBTC *pBtc, const void **ppKey, int *kLen, const void **ppVal, int } pCell = tdbPageGetCell(pBtc->pPage, pBtc->idx); - tdbBtreeDecodeCell(pBtc->pPage, pCell, &pBtc->coder); + tdbBtreeDecodeCell(pBtc->pPage, pCell, &pBtc->coder, pBtc->pTxn, pBtc->pBt); if (ppKey) { *ppKey = (void *)pBtc->coder.pKey; @@ -1418,7 +1826,7 @@ int tdbBtcDelete(SBTC *pBtc) { return -1; } - tdbPageDropCell(pBtc->pPage, idx); + tdbPageDropCell(pBtc->pPage, idx, pBtc->pTxn, pBtc->pBt); // update interior page or do balance if (idx == nCells - 1) { @@ -1442,9 +1850,9 @@ int tdbBtcDelete(SBTC *pBtc) { // update the cell with new key pCell = tdbOsMalloc(nKey + 9); - tdbBtreeEncodeCell(pPage, pKey, nKey, &pgno, sizeof(pgno), pCell, &szCell); + tdbBtreeEncodeCell(pPage, pKey, nKey, &pgno, sizeof(pgno), pCell, &szCell, pBtc->pTxn, pBtc->pBt); - ret = tdbPageUpdateCell(pPage, idx, pCell, szCell); + ret = tdbPageUpdateCell(pPage, idx, pCell, szCell, pBtc->pTxn, pBtc->pBt); if (ret < 0) { tdbOsFree(pCell); ASSERT(0); @@ -1483,7 +1891,7 @@ int tdbBtcUpsert(SBTC *pBtc, const void *pKey, int kLen, const void *pData, int // alloc space szBuf = kLen + nData + 14; - pBuf = tdbRealloc(pBtc->pBt->pBuf, pBtc->pBt->pageSize > szBuf ? szBuf : pBtc->pBt->pageSize); + pBuf = tdbRealloc(pBtc->pBt->pBuf, pBtc->pBt->pageSize > szBuf ? szBuf : pBtc->pBt->pageSize); if (pBuf == NULL) { ASSERT(0); return -1; @@ -1492,7 +1900,7 @@ int tdbBtcUpsert(SBTC *pBtc, const void *pKey, int kLen, const void *pData, int pCell = (SCell *)pBtc->pBt->pBuf; // encode cell - ret = tdbBtreeEncodeCell(pBtc->pPage, pKey, kLen, pData, nData, pCell, &szCell); + ret = tdbBtreeEncodeCell(pBtc->pPage, pKey, kLen, pData, nData, pCell, &szCell, pBtc->pTxn, pBtc->pBt); if (ret < 0) { ASSERT(0); return -1; @@ -1513,7 +1921,7 @@ int tdbBtcUpsert(SBTC *pBtc, const void *pKey, int kLen, const void *pData, int } else { ASSERT(pBtc->idx < nCells); - ret = tdbPageUpdateCell(pBtc->pPage, pBtc->idx, pCell, szCell); + ret = tdbPageUpdateCell(pBtc->pPage, pBtc->idx, pCell, szCell, pBtc->pTxn, pBtc->pBt); } if (ret < 0) { ASSERT(0); @@ -1574,7 +1982,7 @@ int tdbBtcMoveTo(SBTC *pBtc, const void *pKey, int kLen, int *pCRst) { // check if key <= current position if (idx < nCells) { pCell = tdbPageGetCell(pPage, idx); - tdbBtreeDecodeCell(pPage, pCell, &cd); + tdbBtreeDecodeCell(pPage, pCell, &cd, pBtc->pTxn, pBtc->pBt); c = pBt->kcmpr(pKey, kLen, cd.pKey, cd.kLen); if (c > 0) break; } @@ -1583,7 +1991,7 @@ int tdbBtcMoveTo(SBTC *pBtc, const void *pKey, int kLen, int *pCRst) { if (idx > 0) { pCell = tdbPageGetCell(pPage, idx - 1); tdbBtreeDecodeCell(pPage, pCell, &cd); - c = pBt->kcmpr(pKey, kLen, cd.pKey, cd.kLen); + c = pBt->kcmpr(pKey, kLen, cd.pKey, cd.kLen, pBtc->pTxn, pBtc->pBt); if (c <= 0) break; } } @@ -1723,4 +2131,4 @@ void tdbBtPageInfo(SPage *pPage, int idx) { pBtPageInfo->nOvfl = pPage->nOverflow; } #endif -// TDB_BTREE_DEBUG \ No newline at end of file +// TDB_BTREE_DEBUG diff --git a/source/libs/tdb/src/db/tdbPage.c b/source/libs/tdb/src/db/tdbPage.c index 78470b6256..7a70b621c6 100644 --- a/source/libs/tdb/src/db/tdbPage.c +++ b/source/libs/tdb/src/db/tdbPage.c @@ -82,7 +82,8 @@ int tdbPageDestroy(SPage *pPage, void (*xFree)(void *arg, void *ptr), void *arg) return 0; } -void tdbPageZero(SPage *pPage, u8 szAmHdr, int (*xCellSize)(const SPage *, SCell *)) { +void tdbPageZero(SPage *pPage, u8 szAmHdr, int (*xCellSize)(const SPage *, SCell *, int, + TXN *, SBTree *pBt)) { pPage->pPageHdr = pPage->pData + szAmHdr; TDB_PAGE_NCELLS_SET(pPage, 0); TDB_PAGE_CCELLS_SET(pPage, pPage->pageSize - sizeof(SPageFtr)); @@ -98,7 +99,8 @@ void tdbPageZero(SPage *pPage, u8 szAmHdr, int (*xCellSize)(const SPage *, SCell ASSERT((u8 *)pPage->pPageFtr == pPage->pFreeEnd); } -void tdbPageInit(SPage *pPage, u8 szAmHdr, int (*xCellSize)(const SPage *, SCell *)) { +void tdbPageInit(SPage *pPage, u8 szAmHdr, int (*xCellSize)(const SPage *, SCell *, int, + TXN *, SBTree *pBt)) { pPage->pPageHdr = pPage->pData + szAmHdr; pPage->pCellIdx = pPage->pPageHdr + TDB_PAGE_HDR_SIZE(pPage); pPage->pFreeStart = pPage->pCellIdx + TDB_PAGE_OFFSET_SIZE(pPage) * TDB_PAGE_NCELLS(pPage); @@ -171,12 +173,12 @@ int tdbPageInsertCell(SPage *pPage, int idx, SCell *pCell, int szCell, u8 asOvfl return 0; } -int tdbPageUpdateCell(SPage *pPage, int idx, SCell *pCell, int szCell) { - tdbPageDropCell(pPage, idx); +int tdbPageUpdateCell(SPage *pPage, int idx, SCell *pCell, int szCell, TXN *pTxn, SBTree *pBt) { + tdbPageDropCell(pPage, idx, pTxn, pBt); return tdbPageInsertCell(pPage, idx, pCell, szCell, 0); } -int tdbPageDropCell(SPage *pPage, int idx) { +int tdbPageDropCell(SPage *pPage, int idx, TXN *pTxn, SBTree *pBt) { int lidx; SCell *pCell; int szCell; @@ -205,7 +207,7 @@ int tdbPageDropCell(SPage *pPage, int idx) { lidx = idx - iOvfl; pCell = TDB_PAGE_CELL_AT(pPage, lidx); - szCell = (*pPage->xCellSize)(pPage, pCell); + szCell = (*pPage->xCellSize)(pPage, pCell, 1, pTxn, pBt); tdbPageFree(pPage, lidx, pCell, szCell); TDB_PAGE_NCELLS_SET(pPage, nCells - 1); @@ -420,7 +422,7 @@ static int tdbPageDefragment(SPage *pPage) { ASSERT(pCell != NULL); - szCell = (*pPage->xCellSize)(pPage, pCell); + szCell = (*pPage->xCellSize)(pPage, pCell, 0, NULL, NULL); ASSERT(pCell + szCell <= pNextCell); if (pCell + szCell < pNextCell) { diff --git a/source/libs/tdb/src/inc/tdbInt.h b/source/libs/tdb/src/inc/tdbInt.h index 6524e3c9bc..2884123a93 100644 --- a/source/libs/tdb/src/inc/tdbInt.h +++ b/source/libs/tdb/src/inc/tdbInt.h @@ -116,13 +116,25 @@ typedef struct SBtInfo { int nData; } SBtInfo; +#define TDB_CELLD_F_NIL 0x0 +#define TDB_CELLD_F_KEY 0x1 +#define TDB_CELLD_F_VAL 0x2 + +#define TDB_CELLDECODER_SET_FREE_NIL(pCellDecoder) ((pCellDecoder)->freeKV = TDB_CELLD_F_NIL) +#define TDB_CELLDECODER_SET_FREE_KEY(pCellDecoder) ((pCellDecoder)->freeKV |= TDB_CELLD_F_KEY) +#define TDB_CELLDECODER_SET_FREE_VAL(pCellDecoder) ((pCellDecoder)->freeKV |= TDB_CELLD_F_VAL) + +#define TDB_CELLDECODER_FREE_KEY(pCellDecoder) ((pCellDecoder)->freeKV & TDB_CELLD_F_KEY) +#define TDB_CELLDECODER_FREE_VAL(pCellDecoder) ((pCellDecoder)->freeKV & TDB_CELLD_F_VAL) + typedef struct { int kLen; - const u8 *pKey; + u8 *pKey; int vLen; - const u8 *pVal; + u8 *pVal; SPgno pgno; u8 *pBuf; + u8 freeKV; } SCellDecoder; struct SBTC { @@ -250,7 +262,7 @@ struct SPage { int vLen; // value length of the page, -1 for unknown int maxLocal; int minLocal; - int (*xCellSize)(const SPage *, SCell *); + int (*xCellSize)(const SPage *, SCell *, int, TXN *pTxn, SBTree *pBt); // Fields used by SPCache TDB_PCACHE_PAGE }; @@ -297,16 +309,18 @@ static inline int tdbTryLockPage(tdb_spinlock_t *pLock) { #define TDB_PAGE_USABLE_SIZE(pPage) ((u8 *)(pPage)->pPageFtr - (pPage)->pCellIdx) #define TDB_PAGE_FREE_SIZE(pPage) (*(pPage)->pPageMethods->getFreeBytes)(pPage) #define TDB_PAGE_PGNO(pPage) ((pPage)->pgid.pgno) -#define TDB_BYTES_CELL_TAKEN(pPage, pCell) ((*(pPage)->xCellSize)(pPage, pCell) + (pPage)->pPageMethods->szOffset) +#define TDB_BYTES_CELL_TAKEN(pPage, pCell) ((*(pPage)->xCellSize)(pPage, pCell, 0, NULL, NULL) + (pPage)->pPageMethods->szOffset) #define TDB_PAGE_OFFSET_SIZE(pPage) ((pPage)->pPageMethods->szOffset) int tdbPageCreate(int pageSize, SPage **ppPage, void *(*xMalloc)(void *, size_t), void *arg); int tdbPageDestroy(SPage *pPage, void (*xFree)(void *arg, void *ptr), void *arg); -void tdbPageZero(SPage *pPage, u8 szAmHdr, int (*xCellSize)(const SPage *, SCell *)); -void tdbPageInit(SPage *pPage, u8 szAmHdr, int (*xCellSize)(const SPage *, SCell *)); +void tdbPageZero(SPage *pPage, u8 szAmHdr, int (*xCellSize)(const SPage *, SCell *, int, + TXN *, SBTree *pBt)); +void tdbPageInit(SPage *pPage, u8 szAmHdr, int (*xCellSize)(const SPage *, SCell *, int, + TXN *, SBTree *pBt)); int tdbPageInsertCell(SPage *pPage, int idx, SCell *pCell, int szCell, u8 asOvfl); -int tdbPageDropCell(SPage *pPage, int idx); -int tdbPageUpdateCell(SPage *pPage, int idx, SCell *pCell, int szCell); +int tdbPageDropCell(SPage *pPage, int idx, TXN *pTxn, SBTree *pBt); +int tdbPageUpdateCell(SPage *pPage, int idx, SCell *pCell, int szCell, TXN *pTxn, SBTree *pBt); void tdbPageCopy(SPage *pFromPage, SPage *pToPage); int tdbPageCapacity(int pageSize, int amHdrSize); diff --git a/source/libs/tdb/test/CMakeLists.txt b/source/libs/tdb/test/CMakeLists.txt index b2c8aaf9bc..2621e02b02 100644 --- a/source/libs/tdb/test/CMakeLists.txt +++ b/source/libs/tdb/test/CMakeLists.txt @@ -4,4 +4,9 @@ target_link_libraries(tdbTest tdb gtest gtest_main) # tdbUtilTest add_executable(tdbUtilTest "tdbUtilTest.cpp") -target_link_libraries(tdbUtilTest tdb gtest gtest_main) \ No newline at end of file +target_link_libraries(tdbUtilTest tdb gtest gtest_main) + +# tdbUtilTest +add_executable(tdbExOVFLTest "tdbExOVFLTest.cpp") +target_link_libraries(tdbExOVFLTest tdb gtest gtest_main) + diff --git a/source/libs/tdb/test/tdbExOVFLTest.cpp b/source/libs/tdb/test/tdbExOVFLTest.cpp new file mode 100644 index 0000000000..2d8d012a6a --- /dev/null +++ b/source/libs/tdb/test/tdbExOVFLTest.cpp @@ -0,0 +1,469 @@ +#include + +#define ALLOW_FORBID_FUNC +#include "os.h" +#include "tdb.h" + +#include +#include +#include +#include + +typedef struct SPoolMem { + int64_t size; + struct SPoolMem *prev; + struct SPoolMem *next; +} SPoolMem; + +static SPoolMem *openPool() { + SPoolMem *pPool = (SPoolMem *)taosMemoryMalloc(sizeof(*pPool)); + + pPool->prev = pPool->next = pPool; + pPool->size = 0; + + return pPool; +} + +static void clearPool(SPoolMem *pPool) { + SPoolMem *pMem; + + do { + pMem = pPool->next; + + if (pMem == pPool) break; + + pMem->next->prev = pMem->prev; + pMem->prev->next = pMem->next; + pPool->size -= pMem->size; + + taosMemoryFree(pMem); + } while (1); + + assert(pPool->size == 0); +} + +static void closePool(SPoolMem *pPool) { + clearPool(pPool); + taosMemoryFree(pPool); +} + +static void *poolMalloc(void *arg, size_t size) { + void *ptr = NULL; + SPoolMem *pPool = (SPoolMem *)arg; + SPoolMem *pMem; + + pMem = (SPoolMem *)taosMemoryMalloc(sizeof(*pMem) + size); + if (pMem == NULL) { + assert(0); + } + + pMem->size = sizeof(*pMem) + size; + pMem->next = pPool->next; + pMem->prev = pPool; + + pPool->next->prev = pMem; + pPool->next = pMem; + pPool->size += pMem->size; + + ptr = (void *)(&pMem[1]); + return ptr; +} + +static void poolFree(void *arg, void *ptr) { + SPoolMem *pPool = (SPoolMem *)arg; + SPoolMem *pMem; + + pMem = &(((SPoolMem *)ptr)[-1]); + + pMem->next->prev = pMem->prev; + pMem->prev->next = pMem->next; + pPool->size -= pMem->size; + + taosMemoryFree(pMem); +} + +static int tKeyCmpr(const void *pKey1, int kLen1, const void *pKey2, int kLen2) { + int k1, k2; + + std::string s1((char *)pKey1 + 3, kLen1 - 3); + std::string s2((char *)pKey2 + 3, kLen2 - 3); + k1 = stoi(s1); + k2 = stoi(s2); + + if (k1 < k2) { + return -1; + } else if (k1 > k2) { + return 1; + } else { + return 0; + } +} + +static int tDefaultKeyCmpr(const void *pKey1, int keyLen1, const void *pKey2, int keyLen2) { + int mlen; + int cret; + + ASSERT(keyLen1 > 0 && keyLen2 > 0 && pKey1 != NULL && pKey2 != NULL); + + mlen = keyLen1 < keyLen2 ? keyLen1 : keyLen2; + cret = memcmp(pKey1, pKey2, mlen); + if (cret == 0) { + if (keyLen1 < keyLen2) { + cret = -1; + } else if (keyLen1 > keyLen2) { + cret = 1; + } else { + cret = 0; + } + } + return cret; +} + +TEST(TdbOVFLPagesTest, TbUpsertTest) { + +} + +TEST(TdbOVFLPagesTest, TbPGetTest) { + +} + +static void generateBigVal(char *val, int valLen) { + for (int i = 0; i < valLen; ++i) { + char c = char(i & 0xff); + if (c == 0) { + c = 1; + } + val[i] = c; + } +} + +static TDB *openEnv(char const *envName, int const pageSize, int const pageNum) { + TDB *pEnv = NULL; + + int ret = tdbOpen(envName, pageSize, pageNum, &pEnv); + if (ret) { + pEnv = NULL; + } + + return pEnv; +} + +static void insertOfp(void) { + int ret = 0; + + taosRemoveDir("tdb"); + + // open Env + int const pageSize = 4096; + int const pageNum = 64; + TDB *pEnv = openEnv("tdb", pageSize, pageNum); + GTEST_ASSERT_NE(pEnv, nullptr); + + // open db + TTB *pDb = NULL; + tdb_cmpr_fn_t compFunc = tKeyCmpr; + ret = tdbTbOpen("ofp_insert.db", -1, -1, compFunc, pEnv, &pDb); + GTEST_ASSERT_EQ(ret, 0); + + // open the pool + SPoolMem *pPool = openPool(); + + // start a transaction + TXN txn; + int64_t txnid = 0; + ++txnid; + tdbTxnOpen(&txn, txnid, poolMalloc, poolFree, pPool, TDB_TXN_WRITE | TDB_TXN_READ_UNCOMMITTED); + tdbBegin(pEnv, &txn); + + // generate value payload + char val[((4083 - 4 - 3 - 2)+1)*100]; // pSize(4096) - amSize(1) - pageHdr(8) - footerSize(4) + int valLen = sizeof(val) / sizeof(val[0]); + generateBigVal(val, valLen); + + // insert the generated big data + ret = tdbTbInsert(pDb, "key1", strlen("key1"), val, valLen, &txn); + GTEST_ASSERT_EQ(ret, 0); + + // commit current transaction + tdbCommit(pEnv, &txn); + tdbTxnClose(&txn); +} + +//TEST(TdbOVFLPagesTest, DISABLED_TbInsertTest) { +TEST(TdbOVFLPagesTest, TbInsertTest) { + insertOfp(); +} + +//TEST(TdbOVFLPagesTest, DISABLED_TbGetTest) { +TEST(TdbOVFLPagesTest, TbGetTest) { + insertOfp(); + + // open Env + int const pageSize = 4096; + int const pageNum = 64; + TDB *pEnv = openEnv("tdb", pageSize, pageNum); + GTEST_ASSERT_NE(pEnv, nullptr); + + // open db + TTB *pDb = NULL; + tdb_cmpr_fn_t compFunc = tKeyCmpr; + int ret = tdbTbOpen("ofp_insert.db", -1, -1, compFunc, pEnv, &pDb); + GTEST_ASSERT_EQ(ret, 0); + + // generate value payload + char val[((4083 - 4 - 3 - 2)+1)*100]; // pSize(4096) - amSize(1) - pageHdr(8) - footerSize(4) + int valLen = sizeof(val) / sizeof(val[0]); + generateBigVal(val, valLen); + + { // Query the data + void *pVal = NULL; + int vLen; + + ret = tdbTbGet(pDb, "key1", strlen("key1"), &pVal, &vLen); + ASSERT(ret == 0); + GTEST_ASSERT_EQ(ret, 0); + + GTEST_ASSERT_EQ(vLen, valLen); + GTEST_ASSERT_EQ(memcmp(val, pVal, vLen), 0); + + tdbFree(pVal); + } +} + +TEST(TdbOVFLPagesTest, TbDeleteTest) { + int ret = 0; + + taosRemoveDir("tdb"); + + // open Env + int const pageSize = 4096; + int const pageNum = 64; + TDB *pEnv = openEnv("tdb", pageSize, pageNum); + GTEST_ASSERT_NE(pEnv, nullptr); + + // open db + TTB *pDb = NULL; + tdb_cmpr_fn_t compFunc = tKeyCmpr; + ret = tdbTbOpen("ofp_insert.db", -1, -1, compFunc, pEnv, &pDb); + GTEST_ASSERT_EQ(ret, 0); + + // open the pool + SPoolMem *pPool = openPool(); + + // start a transaction + TXN txn; + int64_t txnid = 0; + ++txnid; + tdbTxnOpen(&txn, txnid, poolMalloc, poolFree, pPool, TDB_TXN_WRITE | TDB_TXN_READ_UNCOMMITTED); + tdbBegin(pEnv, &txn); + + // generate value payload + char val[((4083 - 4 - 3 - 2)+1)*100]; // pSize(4096) - amSize(1) - pageHdr(8) - footerSize(4) + int valLen = sizeof(val) / sizeof(val[0]); + generateBigVal(val, valLen); + + { // insert the generated big data + ret = tdbTbInsert(pDb, "key1", strlen("key1"), val, valLen, &txn); + GTEST_ASSERT_EQ(ret, 0); + } + + { // query the data + void *pVal = NULL; + int vLen; + + ret = tdbTbGet(pDb, "key1", strlen("key1"), &pVal, &vLen); + ASSERT(ret == 0); + GTEST_ASSERT_EQ(ret, 0); + + GTEST_ASSERT_EQ(vLen, valLen); + GTEST_ASSERT_EQ(memcmp(val, pVal, vLen), 0); + + tdbFree(pVal); + } + /* open to debug committed file + tdbCommit(pEnv, &txn); + tdbTxnClose(&txn); + + ++txnid; + tdbTxnOpen(&txn, txnid, poolMalloc, poolFree, pPool, TDB_TXN_WRITE | TDB_TXN_READ_UNCOMMITTED); + tdbBegin(pEnv, &txn); + */ + { // upsert the data + ret = tdbTbUpsert(pDb, "key1", strlen("key1"), "value1", strlen("value1"), &txn); + GTEST_ASSERT_EQ(ret, 0); + } + + { // query the upserted data + void *pVal = NULL; + int vLen; + + ret = tdbTbGet(pDb, "key1", strlen("key1"), &pVal, &vLen); + ASSERT(ret == 0); + GTEST_ASSERT_EQ(ret, 0); + + GTEST_ASSERT_EQ(vLen, strlen("value1")); + GTEST_ASSERT_EQ(memcmp("value1", pVal, vLen), 0); + + tdbFree(pVal); + } + + { // delete the data + ret = tdbTbDelete(pDb, "key1", strlen("key1"), &txn); + GTEST_ASSERT_EQ(ret, 0); + } + + { // query the deleted data + void *pVal = NULL; + int vLen = -1; + + ret = tdbTbGet(pDb, "key1", strlen("key1"), &pVal, &vLen); + ASSERT(ret == -1); + GTEST_ASSERT_EQ(ret, -1); + + GTEST_ASSERT_EQ(vLen, -1); + GTEST_ASSERT_EQ(pVal, nullptr); + + tdbFree(pVal); + } + + // commit current transaction + tdbCommit(pEnv, &txn); + tdbTxnClose(&txn); +} + +TEST(tdb_test, DISABLED_simple_insert1) { +//TEST(tdb_test, simple_insert1) { + int ret; + TDB *pEnv; + TTB *pDb; + tdb_cmpr_fn_t compFunc; + int nData = 1; + TXN txn; + int const pageSize = 4096; + + taosRemoveDir("tdb"); + + // Open Env + ret = tdbOpen("tdb", pageSize, 64, &pEnv); + GTEST_ASSERT_EQ(ret, 0); + + // Create a database + compFunc = tKeyCmpr; + ret = tdbTbOpen("db.db", -1, -1, compFunc, pEnv, &pDb); + GTEST_ASSERT_EQ(ret, 0); + + { + char key[64]; + //char val[(4083 - 4 - 3 - 2)]; // pSize(4096) - amSize(1) - pageHdr(8) - footerSize(4) + char val[(4083 - 4 - 3 - 2)+1]; // pSize(4096) - amSize(1) - pageHdr(8) - footerSize(4) + int64_t poolLimit = 4096; // 1M pool limit + int64_t txnid = 0; + SPoolMem *pPool; + + // open the pool + pPool = openPool(); + + // start a transaction + txnid++; + tdbTxnOpen(&txn, txnid, poolMalloc, poolFree, pPool, TDB_TXN_WRITE | TDB_TXN_READ_UNCOMMITTED); + tdbBegin(pEnv, &txn); + + for (int iData = 1; iData <= nData; iData++) { + sprintf(key, "key0"); + sprintf(val, "value%d", iData); + + //ret = tdbTbInsert(pDb, key, strlen(key), val, strlen(val), &txn); + //GTEST_ASSERT_EQ(ret, 0); + + // generate value payload + int valLen = sizeof(val) / sizeof(val[0]); + for (int i = 6; i < valLen; ++i) { + char c = char(i & 0xff); + if (c == 0) { + c = 1; + } + val[i] = c; + } + + ret = tdbTbInsert(pDb, "key1", strlen("key1"), val, valLen, &txn); + GTEST_ASSERT_EQ(ret, 0); + + // if pool is full, commit the transaction and start a new one + if (pPool->size >= poolLimit) { + // commit current transaction + tdbCommit(pEnv, &txn); + tdbTxnClose(&txn); + + // start a new transaction + clearPool(pPool); + txnid++; + tdbTxnOpen(&txn, txnid, poolMalloc, poolFree, pPool, TDB_TXN_WRITE | TDB_TXN_READ_UNCOMMITTED); + tdbBegin(pEnv, &txn); + } + } + + // commit the transaction + tdbCommit(pEnv, &txn); + tdbTxnClose(&txn); + + { // Query the data + void *pVal = NULL; + int vLen; + + for (int i = 1; i <= nData; i++) { + sprintf(key, "key%d", i); + sprintf(val, "value%d", i); + + ret = tdbTbGet(pDb, key, strlen(key), &pVal, &vLen); + ASSERT(ret == 0); + GTEST_ASSERT_EQ(ret, 0); + + GTEST_ASSERT_EQ(vLen, strlen(val)); + GTEST_ASSERT_EQ(memcmp(val, pVal, vLen), 0); + } + + tdbFree(pVal); + } + + { // Iterate to query the DB data + TBC *pDBC; + void *pKey = NULL; + void *pVal = NULL; + int vLen, kLen; + int count = 0; + + ret = tdbTbcOpen(pDb, &pDBC, NULL); + GTEST_ASSERT_EQ(ret, 0); + + tdbTbcMoveToFirst(pDBC); + + for (;;) { + ret = tdbTbcNext(pDBC, &pKey, &kLen, &pVal, &vLen); + if (ret < 0) break; + + // std::cout.write((char *)pKey, kLen) /* << " " << kLen */ << " "; + // std::cout.write((char *)pVal, vLen) /* << " " << vLen */; + // std::cout << std::endl; + + count++; + } + + GTEST_ASSERT_EQ(count, nData); + + tdbTbcClose(pDBC); + + tdbFree(pKey); + tdbFree(pVal); + } + } + + ret = tdbTbDrop(pDb); + GTEST_ASSERT_EQ(ret, 0); + + // Close a database + tdbTbClose(pDb); + + // Close Env + ret = tdbClose(pEnv); + GTEST_ASSERT_EQ(ret, 0); +} From 56b5ff03e64a912c0e444bab87dee115c1c4e9d6 Mon Sep 17 00:00:00 2001 From: shenglian zhou Date: Mon, 20 Jun 2022 17:21:12 +0800 Subject: [PATCH 30/40] fix: disable project elimination when the project node has parent --- source/libs/planner/src/planOptimizer.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/source/libs/planner/src/planOptimizer.c b/source/libs/planner/src/planOptimizer.c index cb4a967761..5ed49f6aae 100644 --- a/source/libs/planner/src/planOptimizer.c +++ b/source/libs/planner/src/planOptimizer.c @@ -1083,6 +1083,11 @@ static int32_t partTagsOptimize(SOptimizeContext* pCxt, SLogicSubplan* pLogicSub } static bool eliminateProjOptMayBeOptimized(SLogicNode* pNode) { + //TODO: enable this optimization after new mechanising that map projection and targets of project node + if (NULL != pNode->pParent) { + return false; + } + if (QUERY_NODE_LOGIC_PLAN_PROJECT != nodeType(pNode) || 1 != LIST_LENGTH(pNode->pChildren)) { return false; } @@ -1122,10 +1127,9 @@ static int32_t eliminateProjOptimizeImpl(SOptimizeContext* pCxt, SLogicSubplan* SNode* pProjection = NULL; FOREACH(pProjection, pProjectNode->pProjections) { - SColumnNode* projColumn = (SColumnNode*)pProjection; SNode* pChildTarget = NULL; FOREACH(pChildTarget, pChild->pTargets) { - if (strcmp(projColumn->colName, ((SColumnNode*)pChildTarget)->colName) == 0) { + if (strcmp(((SColumnNode*)pProjection)->colName, ((SColumnNode*)pChildTarget)->colName) == 0) { nodesListAppend(pNewChildTargets, nodesCloneNode(pChildTarget)); break; } From e441bd266ba1c69a4e1e4810f7c9b85adbf311f4 Mon Sep 17 00:00:00 2001 From: Minghao Li Date: Mon, 20 Jun 2022 17:48:56 +0800 Subject: [PATCH 31/40] refactor(sync): add changing state --- include/common/tmsgdef.h | 1 + include/libs/sync/sync.h | 41 +-- include/libs/sync/syncTools.h | 34 +++ include/util/taoserror.h | 3 + source/dnode/mnode/impl/src/mndMain.c | 48 ++-- source/dnode/mnode/impl/src/mndTrans.c | 6 +- source/dnode/vnode/src/vnd/vnodeSync.c | 34 +-- source/libs/sync/inc/syncInt.h | 12 +- source/libs/sync/inc/syncRaftCfg.h | 10 +- source/libs/sync/inc/syncSnapshot.h | 21 +- source/libs/sync/src/syncMain.c | 266 +++++++++++++----- source/libs/sync/src/syncMessage.c | 129 +++++++++ source/libs/sync/src/syncRaftCfg.c | 8 +- source/libs/sync/src/syncSnapshot.c | 40 ++- source/libs/sync/src/syncUtil.c | 12 +- source/libs/sync/test/CMakeLists.txt | 14 + .../test/syncConfigChangeSnapshotTest.cpp | 4 +- .../libs/sync/test/syncConfigChangeTest.cpp | 4 +- source/libs/sync/test/syncRaftCfgTest.cpp | 2 +- .../libs/sync/test/syncReconfigFinishTest.cpp | 135 +++++++++ source/libs/sync/test/syncTestTool.cpp | 4 +- source/util/src/terror.c | 3 + 22 files changed, 658 insertions(+), 173 deletions(-) create mode 100644 source/libs/sync/test/syncReconfigFinishTest.cpp diff --git a/include/common/tmsgdef.h b/include/common/tmsgdef.h index 1b640642d7..eeac619105 100644 --- a/include/common/tmsgdef.h +++ b/include/common/tmsgdef.h @@ -236,6 +236,7 @@ enum { TD_DEF_MSG_TYPE(TDMT_SYNC_COMMON_RESPONSE, "sync-common-response", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_SYNC_APPLY_MSG, "sync-apply-msg", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_SYNC_CONFIG_CHANGE, "sync-config-change", NULL, NULL) + TD_DEF_MSG_TYPE(TDMT_SYNC_CONFIG_CHANGE_FINISH, "sync-config-change-finish", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_SYNC_SNAPSHOT_SEND, "sync-snapshot-send", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_SYNC_SNAPSHOT_RSP, "sync-snapshot-rsp", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_SYNC_LEADER_TRANSFER, "sync-leader-transfer", NULL, NULL) diff --git a/include/libs/sync/sync.h b/include/libs/sync/sync.h index dd9fd384ce..e963f25616 100644 --- a/include/libs/sync/sync.h +++ b/include/libs/sync/sync.h @@ -24,6 +24,8 @@ extern "C" { #include "tdef.h" #include "tmsgcb.h" +extern bool gRaftDetailLog; + #define SYNC_INDEX_BEGIN 0 #define SYNC_INDEX_INVALID -1 @@ -61,28 +63,35 @@ typedef struct SSyncCfg { } SSyncCfg; typedef struct SFsmCbMeta { - SyncIndex index; - SyncIndex lastConfigIndex; - bool isWeak; int32_t code; - ESyncState state; - uint64_t seqNum; + SyncIndex index; SyncTerm term; + uint64_t seqNum; + SyncIndex lastConfigIndex; + ESyncState state; SyncTerm currentTerm; + bool isWeak; uint64_t flag; } SFsmCbMeta; typedef struct SReConfigCbMeta { - int32_t code; - SyncIndex index; - SyncTerm term; - SyncIndex lastConfigIndex; - SyncTerm currentTerm; + int32_t code; + SyncIndex index; + SyncTerm term; + uint64_t seqNum; + SyncIndex lastConfigIndex; + ESyncState state; + SyncTerm currentTerm; + bool isWeak; + uint64_t flag; + + // config info SSyncCfg oldCfg; SSyncCfg newCfg; - bool isDrop; - uint64_t flag; - uint64_t seqNum; + SyncIndex newCfgIndex; + SyncTerm newCfgTerm; + uint64_t newCfgSeqNum; + } SReConfigCbMeta; typedef struct SSnapshot { @@ -107,8 +116,7 @@ typedef struct SSyncFSM { void (*FpReConfigCb)(struct SSyncFSM* pFsm, const SRpcMsg* pMsg, SReConfigCbMeta cbMeta); void (*FpLeaderTransferCb)(struct SSyncFSM* pFsm, const SRpcMsg* pMsg, SFsmCbMeta cbMeta); - - int32_t (*FpGetSnapshot)(struct SSyncFSM* pFsm, SSnapshot* pSnapshot, void *pReaderParam, void** ppReader); + int32_t (*FpGetSnapshot)(struct SSyncFSM* pFsm, SSnapshot* pSnapshot, void* pReaderParam, void** ppReader); int32_t (*FpGetSnapshotInfo)(struct SSyncFSM* pFsm, SSnapshot* pSnapshot); int32_t (*FpSnapshotStartRead)(struct SSyncFSM* pFsm, void** ppReader); @@ -189,14 +197,13 @@ ESyncState syncGetMyRole(int64_t rid); bool syncIsReady(int64_t rid); const char* syncGetMyRoleStr(int64_t rid); SyncTerm syncGetMyTerm(int64_t rid); +SyncGroupId syncGetVgId(int64_t rid); void syncGetEpSet(int64_t rid, SEpSet* pEpSet); -int32_t syncGetVgId(int64_t rid); int32_t syncPropose(int64_t rid, const SRpcMsg* pMsg, bool isWeak); bool syncEnvIsStart(); const char* syncStr(ESyncState state); bool syncIsRestoreFinish(int64_t rid); - int32_t syncReconfig(int64_t rid, const SSyncCfg* pNewCfg); // build SRpcMsg, need to call syncPropose with SRpcMsg diff --git a/include/libs/sync/syncTools.h b/include/libs/sync/syncTools.h index fbf124bf47..37b465e56e 100644 --- a/include/libs/sync/syncTools.h +++ b/include/libs/sync/syncTools.h @@ -489,6 +489,40 @@ void syncLeaderTransferPrint2(char* s, const SyncLeaderTransfer* pMsg); void syncLeaderTransferLog(const SyncLeaderTransfer* pMsg); void syncLeaderTransferLog2(char* s, const SyncLeaderTransfer* pMsg); + +// --------------------------------------------- +typedef struct SyncReconfigFinish { + uint32_t bytes; + int32_t vgId; + uint32_t msgType; + SSyncCfg oldCfg; + SSyncCfg newCfg; + SyncIndex newCfgIndex; + SyncTerm newCfgTerm; + uint64_t newCfgSeqNum; + +} SyncReconfigFinish; + +SyncReconfigFinish* syncReconfigFinishBuild(int32_t vgId); +void syncReconfigFinishDestroy(SyncReconfigFinish* pMsg); +void syncReconfigFinishSerialize(const SyncReconfigFinish* pMsg, char* buf, uint32_t bufLen); +void syncReconfigFinishDeserialize(const char* buf, uint32_t len, SyncReconfigFinish* pMsg); +char* syncReconfigFinishSerialize2(const SyncReconfigFinish* pMsg, uint32_t* len); +SyncReconfigFinish* syncReconfigFinishDeserialize2(const char* buf, uint32_t len); +void syncReconfigFinish2RpcMsg(const SyncReconfigFinish* pMsg, SRpcMsg* pRpcMsg); +void syncReconfigFinishFromRpcMsg(const SRpcMsg* pRpcMsg, SyncReconfigFinish* pMsg); +SyncReconfigFinish* syncReconfigFinishFromRpcMsg2(const SRpcMsg* pRpcMsg); +cJSON* syncReconfigFinish2Json(const SyncReconfigFinish* pMsg); +char* syncReconfigFinish2Str(const SyncReconfigFinish* pMsg); + +// for debug ---------------------- +void syncReconfigFinishPrint(const SyncReconfigFinish* pMsg); +void syncReconfigFinishPrint2(char* s, const SyncReconfigFinish* pMsg); +void syncReconfigFinishLog(const SyncReconfigFinish* pMsg); +void syncReconfigFinishLog2(char* s, const SyncReconfigFinish* pMsg); + + + // on message ---------------------- int32_t syncNodeOnPingCb(SSyncNode* ths, SyncPing* pMsg); int32_t syncNodeOnPingReplyCb(SSyncNode* ths, SyncPingReply* pMsg); diff --git a/include/util/taoserror.h b/include/util/taoserror.h index fde016375b..053d295e9e 100644 --- a/include/util/taoserror.h +++ b/include/util/taoserror.h @@ -416,6 +416,9 @@ int32_t* taosGetErrno(); #define TSDB_CODE_SYN_NOT_LEADER TAOS_DEF_ERROR_CODE(0, 0x090C) #define TSDB_CODE_SYN_ONE_REPLICA TAOS_DEF_ERROR_CODE(0, 0x090D) #define TSDB_CODE_SYN_NOT_IN_NEW_CONFIG TAOS_DEF_ERROR_CODE(0, 0x090E) +#define TSDB_CODE_SYN_NEW_CONFIG_ERROR TAOS_DEF_ERROR_CODE(0, 0x090F) +#define TSDB_CODE_SYN_RECONFIG_NOT_READY TAOS_DEF_ERROR_CODE(0, 0x0910) +#define TSDB_CODE_SYN_PROPOSE_NOT_READY TAOS_DEF_ERROR_CODE(0, 0x0911) #define TSDB_CODE_SYN_INTERNAL_ERROR TAOS_DEF_ERROR_CODE(0, 0x09FF) // tq diff --git a/source/dnode/mnode/impl/src/mndMain.c b/source/dnode/mnode/impl/src/mndMain.c index 5ea55d558f..f950ab3b4e 100644 --- a/source/dnode/mnode/impl/src/mndMain.c +++ b/source/dnode/mnode/impl/src/mndMain.c @@ -58,21 +58,21 @@ static void *mndBuildTimerMsg(int32_t *pContLen) { static void mndPullupTrans(SMnode *pMnode) { int32_t contLen = 0; - void * pReq = mndBuildTimerMsg(&contLen); + void *pReq = mndBuildTimerMsg(&contLen); SRpcMsg rpcMsg = {.msgType = TDMT_MND_TRANS_TIMER, .pCont = pReq, .contLen = contLen}; tmsgPutToQueue(&pMnode->msgCb, WRITE_QUEUE, &rpcMsg); } static void mndCalMqRebalance(SMnode *pMnode) { int32_t contLen = 0; - void * pReq = mndBuildTimerMsg(&contLen); + void *pReq = mndBuildTimerMsg(&contLen); SRpcMsg rpcMsg = {.msgType = TDMT_MND_MQ_TIMER, .pCont = pReq, .contLen = contLen}; tmsgPutToQueue(&pMnode->msgCb, READ_QUEUE, &rpcMsg); } static void mndPullupTelem(SMnode *pMnode) { int32_t contLen = 0; - void * pReq = mndBuildTimerMsg(&contLen); + void *pReq = mndBuildTimerMsg(&contLen); SRpcMsg rpcMsg = {.msgType = TDMT_MND_TELEM_TIMER, .pCont = pReq, .contLen = contLen}; tmsgPutToQueue(&pMnode->msgCb, READ_QUEUE, &rpcMsg); } @@ -86,8 +86,8 @@ static void mndPushTtlTime(SMnode *pMnode) { pIter = sdbFetch(pSdb, SDB_VGROUP, pIter, (void **)&pVgroup); if (pIter == NULL) break; - int32_t contLen = sizeof(SMsgHead) + sizeof(int32_t); - SMsgHead *pHead = rpcMallocCont(contLen); + int32_t contLen = sizeof(SMsgHead) + sizeof(int32_t); + SMsgHead *pHead = rpcMallocCont(contLen); if (pHead == NULL) { mError("ttl time malloc err. contLen:%d", contLen); sdbRelease(pSdb, pVgroup); @@ -97,13 +97,13 @@ static void mndPushTtlTime(SMnode *pMnode) { pHead->vgId = htonl(pVgroup->vgId); int32_t t = taosGetTimestampSec(); - *(int32_t*)(POINTER_SHIFT(pHead, sizeof(SMsgHead))) = htonl(t); + *(int32_t *)(POINTER_SHIFT(pHead, sizeof(SMsgHead))) = htonl(t); SRpcMsg rpcMsg = {.msgType = TDMT_VND_DROP_TTL_TABLE, .pCont = pHead, .contLen = contLen}; - SEpSet epSet = mndGetVgroupEpset(pMnode, pVgroup); + SEpSet epSet = mndGetVgroupEpset(pMnode, pVgroup); int32_t code = tmsgSendReq(&epSet, &rpcMsg); - if(code != 0){ + if (code != 0) { mError("ttl time seed err. code:%d", code); } mError("ttl time seed succ. time:%d", t); @@ -117,7 +117,7 @@ static void *mndThreadFp(void *param) { setThreadName("mnode-timer"); while (1) { - if (lastTime % (864000) == 0) { // sleep 1 day for ttl + if (lastTime % (864000) == 0) { // sleep 1 day for ttl mndPushTtlTime(pMnode); } @@ -416,7 +416,7 @@ void mndStop(SMnode *pMnode) { } int32_t mndProcessSyncMsg(SRpcMsg *pMsg) { - SMnode * pMnode = pMsg->info.node; + SMnode *pMnode = pMsg->info.node; SSyncMgmt *pMgmt = &pMnode->syncMgmt; int32_t code = 0; @@ -433,15 +433,19 @@ int32_t mndProcessSyncMsg(SRpcMsg *pMsg) { return -1; } - char logBuf[512] = {0}; - char *syncNodeStr = sync2SimpleStr(pMgmt->sync); - snprintf(logBuf, sizeof(logBuf), "==mndProcessSyncMsg== msgType:%d, syncNode: %s", pMsg->msgType, syncNodeStr); - static int64_t mndTick = 0; - if (++mndTick % 10 == 1) { - mTrace("sync trace msg:%s, %s", TMSG_INFO(pMsg->msgType), syncNodeStr); - } - syncRpcMsgLog2(logBuf, pMsg); - taosMemoryFree(syncNodeStr); + do { + char *syncNodeStr = sync2SimpleStr(pMgmt->sync); + static int64_t mndTick = 0; + if (++mndTick % 10 == 1) { + mTrace("vgId:%d, sync heartbeat msg:%s, %s", syncGetVgId(pMgmt->sync), TMSG_INFO(pMsg->msgType), syncNodeStr); + } + if (gRaftDetailLog) { + char logBuf[512] = {0}; + snprintf(logBuf, sizeof(logBuf), "==mndProcessSyncMsg== msgType:%d, syncNode: %s", pMsg->msgType, syncNodeStr); + syncRpcMsgLog2(logBuf, pMsg); + } + taosMemoryFree(syncNodeStr); + } while (0); // ToDo: ugly! use function pointer if (syncNodeSnapshotEnable(pSyncNode)) { @@ -578,7 +582,7 @@ static int32_t mndCheckMsgContent(SRpcMsg *pMsg) { } int32_t mndProcessRpcMsg(SRpcMsg *pMsg) { - SMnode * pMnode = pMsg->info.node; + SMnode *pMnode = pMsg->info.node; MndMsgFp fp = pMnode->msgFp[TMSG_INDEX(pMsg->msgType)]; if (fp == NULL) { mError("msg:%p, failed to get msg handle, app:%p type:%s", pMsg, pMsg->info.ahandle, TMSG_INFO(pMsg->msgType)); @@ -631,7 +635,7 @@ int32_t mndGetMonitorInfo(SMnode *pMnode, SMonClusterInfo *pClusterInfo, SMonVgr SMonGrantInfo *pGrantInfo) { if (mndAcquireRpcRef(pMnode) != 0) return -1; - SSdb * pSdb = pMnode->pSdb; + SSdb *pSdb = pMnode->pSdb; int64_t ms = taosGetTimestampMs(); pClusterInfo->dnodes = taosArrayInit(sdbGetSize(pSdb, SDB_DNODE), sizeof(SMonDnodeDesc)); @@ -707,7 +711,7 @@ int32_t mndGetMonitorInfo(SMnode *pMnode, SMonClusterInfo *pClusterInfo, SMonVgr pGrantInfo->timeseries_used += pVgroup->numOfTimeSeries; tstrncpy(desc.status, "unsynced", sizeof(desc.status)); for (int32_t i = 0; i < pVgroup->replica; ++i) { - SVnodeGid * pVgid = &pVgroup->vnodeGid[i]; + SVnodeGid *pVgid = &pVgroup->vnodeGid[i]; SMonVnodeDesc *pVnDesc = &desc.vnodes[i]; pVnDesc->dnode_id = pVgid->dnodeId; tstrncpy(pVnDesc->vnode_role, syncStr(pVgid->role), sizeof(pVnDesc->vnode_role)); diff --git a/source/dnode/mnode/impl/src/mndTrans.c b/source/dnode/mnode/impl/src/mndTrans.c index 31a955b030..9e059cf9c5 100644 --- a/source/dnode/mnode/impl/src/mndTrans.c +++ b/source/dnode/mnode/impl/src/mndTrans.c @@ -22,8 +22,8 @@ #include "mndSync.h" #include "mndUser.h" -#define TRANS_VER_NUMBER 1 -#define TRANS_ARRAY_SIZE 8 +#define TRANS_VER_NUMBER 1 +#define TRANS_ARRAY_SIZE 8 #define TRANS_RESERVE_SIZE 64 static SSdbRaw *mndTransActionEncode(STrans *pTrans); @@ -896,7 +896,7 @@ static void mndTransResetActions(SMnode *pMnode, STrans *pTrans, SArray *pArray) pAction->rawWritten = 0; pAction->msgSent = 0; pAction->msgReceived = 0; - if (pAction->errCode == TSDB_CODE_RPC_REDIRECT || pAction->errCode == TSDB_CODE_SYN_NOT_IN_NEW_CONFIG || + if (pAction->errCode == TSDB_CODE_RPC_REDIRECT || pAction->errCode == TSDB_CODE_SYN_NEW_CONFIG_ERROR || pAction->errCode == TSDB_CODE_SYN_INTERNAL_ERROR || pAction->errCode == TSDB_CODE_SYN_NOT_LEADER) { pAction->epSet.inUse = (pAction->epSet.inUse + 1) % pAction->epSet.numOfEps; mDebug("trans:%d, %s:%d execute status is reset and set epset inuse:%d", pTrans->id, mndTransStr(pAction->stage), diff --git a/source/dnode/vnode/src/vnd/vnodeSync.c b/source/dnode/vnode/src/vnd/vnodeSync.c index 297a486ee9..f1c43512ce 100644 --- a/source/dnode/vnode/src/vnd/vnodeSync.c +++ b/source/dnode/vnode/src/vnd/vnodeSync.c @@ -119,7 +119,7 @@ static int32_t vnodeProcessAlterReplicaReq(SVnode *pVnode, SRpcMsg *pMsg) { } void vnodeProposeMsg(SQueueInfo *pInfo, STaosQall *qall, int32_t numOfMsgs) { - SVnode * pVnode = pInfo->ahandle; + SVnode *pVnode = pInfo->ahandle; int32_t vgId = pVnode->config.vgId; int32_t code = 0; SRpcMsg *pMsg = NULL; @@ -174,7 +174,7 @@ void vnodeProposeMsg(SQueueInfo *pInfo, STaosQall *qall, int32_t numOfMsgs) { } void vnodeApplyMsg(SQueueInfo *pInfo, STaosQall *qall, int32_t numOfMsgs) { - SVnode * pVnode = pInfo->ahandle; + SVnode *pVnode = pInfo->ahandle; int32_t vgId = pVnode->config.vgId; int32_t code = 0; SRpcMsg *pMsg = NULL; @@ -211,21 +211,23 @@ int32_t vnodeProcessSyncReq(SVnode *pVnode, SRpcMsg *pMsg, SRpcMsg **pRsp) { SSyncNode *pSyncNode = syncNodeAcquire(pVnode->sync); assert(pSyncNode != NULL); - ESyncState state = syncGetMyRole(pVnode->sync); - SyncTerm currentTerm = syncGetMyTerm(pVnode->sync); - SMsgHead *pHead = pMsg->pCont; + STraceId *trace = &pMsg->info.traceId; - char logBuf[512] = {0}; - char *syncNodeStr = sync2SimpleStr(pVnode->sync); - snprintf(logBuf, sizeof(logBuf), "==vnodeProcessSyncReq== msgType:%d, syncNode: %s", pMsg->msgType, syncNodeStr); - static int64_t vndTick = 0; - STraceId * trace = &pMsg->info.traceId; - if (++vndTick % 10 == 1) { - vGTrace("sync trace msg:%s, %s", TMSG_INFO(pMsg->msgType), syncNodeStr); - } - syncRpcMsgLog2(logBuf, pMsg); - taosMemoryFree(syncNodeStr); + do { + char *syncNodeStr = sync2SimpleStr(pVnode->sync); + static int64_t vndTick = 0; + if (++vndTick % 10 == 1) { + vGTrace("vgId:%d, sync heartbeat msg:%s, %s", syncGetVgId(pVnode->sync), TMSG_INFO(pMsg->msgType), syncNodeStr); + } + if (gRaftDetailLog) { + char logBuf[512] = {0}; + snprintf(logBuf, sizeof(logBuf), "==vnodeProcessSyncReq== msgType:%d, syncNode: %s", pMsg->msgType, + syncNodeStr); + syncRpcMsgLog2(logBuf, pMsg); + } + taosMemoryFree(syncNodeStr); + } while (0); SRpcMsg *pRpcMsg = pMsg; @@ -348,7 +350,7 @@ static void vnodeSyncReconfig(struct SSyncFSM *pFsm, const SRpcMsg *pMsg, SReCon } static void vnodeSyncCommitMsg(SSyncFSM *pFsm, const SRpcMsg *pMsg, SFsmCbMeta cbMeta) { - SVnode * pVnode = pFsm->data; + SVnode *pVnode = pFsm->data; SSnapshot snapshot = {0}; SyncIndex beginIndex = SYNC_INDEX_INVALID; char logBuf[256] = {0}; diff --git a/source/libs/sync/inc/syncInt.h b/source/libs/sync/inc/syncInt.h index dcfea84f36..02a9e189cf 100644 --- a/source/libs/sync/inc/syncInt.h +++ b/source/libs/sync/inc/syncInt.h @@ -159,7 +159,8 @@ typedef struct SSyncNode { SSyncSnapshotSender* senders[TSDB_MAX_REPLICA]; SSyncSnapshotReceiver* pNewNodeReceiver; - // SSnapshotMeta sMeta; + // is config changing + bool changing; } SSyncNode; @@ -198,7 +199,7 @@ char* syncNode2Str(const SSyncNode* pSyncNode); void syncNodeEventLog(const SSyncNode* pSyncNode, char* str); char* syncNode2SimpleStr(const SSyncNode* pSyncNode); bool syncNodeInConfig(SSyncNode* pSyncNode, const SSyncCfg* config); -void syncNodeUpdateConfig(SSyncNode* pSyncNode, SSyncCfg* newConfig, SyncIndex lastConfigChangeIndex, bool* isDrop); +void syncNodeDoConfigChange(SSyncNode* pSyncNode, SSyncCfg* newConfig, SyncIndex lastConfigChangeIndex, bool* isDrop); SSyncNode* syncNodeAcquire(int64_t rid); void syncNodeRelease(SSyncNode* pNode); @@ -238,12 +239,15 @@ int32_t syncNodeUpdateNewConfigIndex(SSyncNode* ths, SSyncCfg* pNewCfg); bool syncNodeInRaftGroup(SSyncNode* ths, SRaftId* pRaftId); SSyncSnapshotSender* syncNodeGetSnapshotSender(SSyncNode* ths, SRaftId* pDestId); -int32_t syncGetSnapshotMeta(int64_t rid, struct SSnapshotMeta* sMeta); -int32_t syncGetSnapshotMetaByIndex(int64_t rid, SyncIndex snapshotIndex, struct SSnapshotMeta* sMeta); +int32_t syncGetSnapshotMeta(int64_t rid, struct SSnapshotMeta* sMeta); +int32_t syncGetSnapshotMetaByIndex(int64_t rid, SyncIndex snapshotIndex, struct SSnapshotMeta* sMeta); void syncStartNormal(int64_t rid); void syncStartStandBy(int64_t rid); +bool syncNodeCanChange(SSyncNode* pSyncNode); +bool syncNodeCheckNewConfig(SSyncNode* pSyncNode, const SSyncCfg* pNewCfg); + // for debug -------------- void syncNodePrint(SSyncNode* pObj); void syncNodePrint2(char* s, SSyncNode* pObj); diff --git a/source/libs/sync/inc/syncRaftCfg.h b/source/libs/sync/inc/syncRaftCfg.h index 435ad98fb3..7f45276e9f 100644 --- a/source/libs/sync/inc/syncRaftCfg.h +++ b/source/libs/sync/inc/syncRaftCfg.h @@ -49,14 +49,14 @@ int32_t raftCfgClose(SRaftCfg *pRaftCfg); int32_t raftCfgPersist(SRaftCfg *pRaftCfg); int32_t raftCfgAddConfigIndex(SRaftCfg *pRaftCfg, SyncIndex configIndex); -cJSON *syncCfg2Json(SSyncCfg *pSyncCfg); -char *syncCfg2Str(SSyncCfg *pSyncCfg); -char *syncCfg2SimpleStr(SSyncCfg *pSyncCfg); +cJSON * syncCfg2Json(SSyncCfg *pSyncCfg); +char * syncCfg2Str(SSyncCfg *pSyncCfg); +char * syncCfg2SimpleStr(SSyncCfg *pSyncCfg); int32_t syncCfgFromJson(const cJSON *pRoot, SSyncCfg *pSyncCfg); int32_t syncCfgFromStr(const char *s, SSyncCfg *pSyncCfg); -cJSON *raftCfg2Json(SRaftCfg *pRaftCfg); -char *raftCfg2Str(SRaftCfg *pRaftCfg); +cJSON * raftCfg2Json(SRaftCfg *pRaftCfg); +char * raftCfg2Str(SRaftCfg *pRaftCfg); int32_t raftCfgFromJson(const cJSON *pRoot, SRaftCfg *pRaftCfg); int32_t raftCfgFromStr(const char *s, SRaftCfg *pRaftCfg); diff --git a/source/libs/sync/inc/syncSnapshot.h b/source/libs/sync/inc/syncSnapshot.h index f6544dd79d..069154fb93 100644 --- a/source/libs/sync/inc/syncSnapshot.h +++ b/source/libs/sync/inc/syncSnapshot.h @@ -66,10 +66,11 @@ char *snapshotSender2SimpleStr(SSyncSnapshotSender *pSender, char typedef struct SSyncSnapshotReceiver { bool start; - int32_t ack; - void *pWriter; - SyncTerm term; - SyncTerm privateTerm; + int32_t ack; + void *pWriter; + SyncTerm term; + SyncTerm privateTerm; + SSnapshot snapshot; SSyncNode *pSyncNode; SRaftId fromId; @@ -78,12 +79,12 @@ typedef struct SSyncSnapshotReceiver { SSyncSnapshotReceiver *snapshotReceiverCreate(SSyncNode *pSyncNode, SRaftId fromId); void snapshotReceiverDestroy(SSyncSnapshotReceiver *pReceiver); -void snapshotReceiverStart(SSyncSnapshotReceiver *pReceiver, SyncTerm privateTerm, SRaftId fromId); -bool snapshotReceiverIsStart(SSyncSnapshotReceiver *pReceiver); -void snapshotReceiverStop(SSyncSnapshotReceiver *pReceiver, bool apply); -cJSON *snapshotReceiver2Json(SSyncSnapshotReceiver *pReceiver); -char *snapshotReceiver2Str(SSyncSnapshotReceiver *pReceiver); -char *snapshotReceiver2SimpleStr(SSyncSnapshotReceiver *pReceiver, char *event); +void snapshotReceiverStart(SSyncSnapshotReceiver *pReceiver, SyncTerm privateTerm, SyncSnapshotSend *pBeginMsg); +bool snapshotReceiverIsStart(SSyncSnapshotReceiver *pReceiver); +void snapshotReceiverStop(SSyncSnapshotReceiver *pReceiver, bool apply); +cJSON *snapshotReceiver2Json(SSyncSnapshotReceiver *pReceiver); +char *snapshotReceiver2Str(SSyncSnapshotReceiver *pReceiver); +char *snapshotReceiver2SimpleStr(SSyncSnapshotReceiver *pReceiver, char *event); int32_t syncNodeOnSnapshotSendCb(SSyncNode *ths, SyncSnapshotSend *pMsg); int32_t syncNodeOnSnapshotRspCb(SSyncNode *ths, SyncSnapshotRsp *pMsg); diff --git a/source/libs/sync/src/syncMain.c b/source/libs/sync/src/syncMain.c index e13271341d..8da24d7c05 100644 --- a/source/libs/sync/src/syncMain.c +++ b/source/libs/sync/src/syncMain.c @@ -178,6 +178,23 @@ int32_t syncSetStandby(int64_t rid) { return 0; } +bool syncNodeCheckNewConfig(SSyncNode* pSyncNode, const SSyncCfg* pNewCfg) { + bool IamInNew = syncNodeInConfig(pSyncNode, pNewCfg); + if (!IamInNew) { + return false; + } + + if (pNewCfg->replicaNum > pSyncNode->replicaNum + 1) { + return false; + } + + if (pNewCfg->replicaNum < pSyncNode->replicaNum - 1) { + return false; + } + + return true; +} + int32_t syncReconfigBuild(int64_t rid, const SSyncCfg* pNewCfg, SRpcMsg* pRpcMsg) { SSyncNode* pSyncNode = (SSyncNode*)taosAcquireRef(tsNodeRefId, rid); if (pSyncNode == NULL) { @@ -185,13 +202,12 @@ int32_t syncReconfigBuild(int64_t rid, const SSyncCfg* pNewCfg, SRpcMsg* pRpcMsg return -1; } ASSERT(rid == pSyncNode->rid); - int32_t ret = 0; - bool IamInNew = syncNodeInConfig(pSyncNode, pNewCfg); - if (!IamInNew) { + if (!syncNodeCheckNewConfig(pSyncNode, pNewCfg)) { taosReleaseRef(tsNodeRefId, pSyncNode->rid); - terrno = TSDB_CODE_SYN_NOT_IN_NEW_CONFIG; + terrno = TSDB_CODE_SYN_NEW_CONFIG_ERROR; + sError("syncNodeCheckNewConfig error"); return -1; } @@ -215,12 +231,10 @@ int32_t syncReconfig(int64_t rid, const SSyncCfg* pNewCfg) { } ASSERT(rid == pSyncNode->rid); - bool IamInNew = syncNodeInConfig(pSyncNode, pNewCfg); - - if (!IamInNew) { - sError("sync reconfig error, not in new config"); + if (!syncNodeCheckNewConfig(pSyncNode, pNewCfg)) { taosReleaseRef(tsNodeRefId, pSyncNode->rid); - terrno = TSDB_CODE_SYN_NOT_IN_NEW_CONFIG; + terrno = TSDB_CODE_SYN_NEW_CONFIG_ERROR; + sError("syncNodeCheckNewConfig error"); return -1; } @@ -425,18 +439,6 @@ const char* syncGetMyRoleStr(int64_t rid) { return s; } -int32_t syncGetVgId(int64_t rid) { - SSyncNode* pSyncNode = (SSyncNode*)taosAcquireRef(tsNodeRefId, rid); - if (pSyncNode == NULL) { - return TAOS_SYNC_STATE_ERROR; - } - assert(rid == pSyncNode->rid); - int32_t vgId = pSyncNode->vgId; - - taosReleaseRef(tsNodeRefId, pSyncNode->rid); - return vgId; -} - SyncTerm syncGetMyTerm(int64_t rid) { SSyncNode* pSyncNode = (SSyncNode*)taosAcquireRef(tsNodeRefId, rid); if (pSyncNode == NULL) { @@ -449,6 +451,18 @@ SyncTerm syncGetMyTerm(int64_t rid) { return term; } +SyncGroupId syncGetVgId(int64_t rid) { + SSyncNode* pSyncNode = (SSyncNode*)taosAcquireRef(tsNodeRefId, rid); + if (pSyncNode == NULL) { + return TAOS_SYNC_STATE_ERROR; + } + assert(rid == pSyncNode->rid); + SyncGroupId vgId = pSyncNode->vgId; + + taosReleaseRef(tsNodeRefId, pSyncNode->rid); + return vgId; +} + void syncGetEpSet(int64_t rid, SEpSet* pEpSet) { SSyncNode* pSyncNode = (SSyncNode*)taosAcquireRef(tsNodeRefId, rid); if (pSyncNode == NULL) { @@ -589,6 +603,26 @@ int32_t syncNodePropose(SSyncNode* pSyncNode, const SRpcMsg* pMsg, bool isWeak) syncNodeEventLog(pSyncNode, eventLog); if (pSyncNode->state == TAOS_SYNC_STATE_LEADER) { + if (pSyncNode->changing && pMsg->msgType != TDMT_SYNC_CONFIG_CHANGE_FINISH) { + ret = -1; + terrno = TSDB_CODE_SYN_PROPOSE_NOT_READY; + sError("sync propose not ready, type:%s,%d", TMSG_INFO(pMsg->msgType), pMsg->msgType); + goto _END; + } + + // config change + if (pMsg->msgType == TDMT_SYNC_CONFIG_CHANGE) { + if (!syncNodeCanChange(pSyncNode)) { + ret = -1; + terrno = TSDB_CODE_SYN_RECONFIG_NOT_READY; + sError("sync reconfig not ready, type:%s,%d", TMSG_INFO(pMsg->msgType), pMsg->msgType); + goto _END; + } + + ASSERT(!pSyncNode->changing); + pSyncNode->changing = true; + } + SRespStub stub; stub.createTime = taosGetTimestampMs(); stub.rpcMsg = *pMsg; @@ -606,12 +640,16 @@ int32_t syncNodePropose(SSyncNode* pSyncNode, const SRpcMsg* pMsg, bool isWeak) sError("syncPropose pSyncNode->FpEqMsg is NULL"); } syncClientRequestDestroy(pSyncMsg); + goto _END; + } else { ret = -1; terrno = TSDB_CODE_SYN_NOT_LEADER; sError("syncPropose not leader, %s", syncUtilState2String(pSyncNode->state)); + goto _END; } +_END: return ret; } @@ -825,6 +863,9 @@ SSyncNode* syncNodeOpen(const SSyncInfo* pOldSyncInfo) { // snapshot receivers pSyncNode->pNewNodeReceiver = snapshotReceiverCreate(pSyncNode, EMPTY_RAFT_ID); + // is config changing + pSyncNode->changing = false; + // start in syncNodeStart // start raft // syncNodeBecomeFollower(pSyncNode); @@ -1253,20 +1294,32 @@ char* syncNode2Str(const SSyncNode* pSyncNode) { void syncNodeEventLog(const SSyncNode* pSyncNode, char* str) { int32_t userStrLen = strlen(str); + + SSnapshot snapshot = {.data = NULL, .lastApplyIndex = -1, .lastApplyTerm = 0}; + if (pSyncNode->pFsm->FpGetSnapshotInfo != NULL) { + pSyncNode->pFsm->FpGetSnapshotInfo(pSyncNode->pFsm, &snapshot); + } + SyncIndex logLastIndex = pSyncNode->pLogStore->syncLogLastIndex(pSyncNode->pLogStore); + if (userStrLen < 256) { char logBuf[128 + 256]; snprintf(logBuf, sizeof(logBuf), - "vgId:%d %s term:%lu commit:%ld standby:%d replica-num:%d lconfig:%ld sync event %s", pSyncNode->vgId, - syncUtilState2String(pSyncNode->state), pSyncNode->pRaftStore->currentTerm, pSyncNode->commitIndex, - pSyncNode->pRaftCfg->isStandBy, pSyncNode->replicaNum, pSyncNode->pRaftCfg->lastConfigIndex, str); + "vgId:%d, sync %s %s, term:%lu, commit:%ld, lastlog:%ld, lastsnapshot:%ld, standby:%d, replica-num:%d, " + "lconfig:%ld, changing:%d", + pSyncNode->vgId, syncUtilState2String(pSyncNode->state), str, pSyncNode->pRaftStore->currentTerm, + pSyncNode->commitIndex, logLastIndex, snapshot.lastApplyIndex, pSyncNode->pRaftCfg->isStandBy, + pSyncNode->replicaNum, pSyncNode->pRaftCfg->lastConfigIndex, pSyncNode->changing); sDebug("%s", logBuf); + } else { int len = 128 + userStrLen; char* s = (char*)taosMemoryMalloc(len); - snprintf(s, len, "vgId:%d %s term:%lu commit:%ld standby:%d replica-num:%d lconfig:%ld sync event %s", - pSyncNode->vgId, syncUtilState2String(pSyncNode->state), pSyncNode->pRaftStore->currentTerm, - pSyncNode->commitIndex, pSyncNode->pRaftCfg->isStandBy, pSyncNode->replicaNum, - pSyncNode->pRaftCfg->lastConfigIndex, str); + snprintf(s, len, + "vgId:%d, sync %s %s, term:%lu, commit:%ld, lastlog:%ld, lastsnapshot:%ld, standby:%d, replica-num:%d, " + "lconfig:%ld, changing:%d", + pSyncNode->vgId, syncUtilState2String(pSyncNode->state), str, pSyncNode->pRaftStore->currentTerm, + pSyncNode->commitIndex, logLastIndex, snapshot.lastApplyIndex, pSyncNode->pRaftCfg->isStandBy, + pSyncNode->replicaNum, pSyncNode->pRaftCfg->lastConfigIndex, pSyncNode->changing); sDebug("%s", s); taosMemoryFree(s); } @@ -1313,7 +1366,7 @@ bool syncNodeInConfig(SSyncNode* pSyncNode, const SSyncCfg* config) { return b1; } -void syncNodeUpdateConfig(SSyncNode* pSyncNode, SSyncCfg* pNewConfig, SyncIndex lastConfigChangeIndex, bool* isDrop) { +void syncNodeDoConfigChange(SSyncNode* pSyncNode, SSyncCfg* pNewConfig, SyncIndex lastConfigChangeIndex, bool* isDrop) { SSyncCfg oldConfig = pSyncNode->pRaftCfg->cfg; pSyncNode->pRaftCfg->cfg = *pNewConfig; pSyncNode->pRaftCfg->lastConfigIndex = lastConfigChangeIndex; @@ -1451,7 +1504,7 @@ void syncNodeUpdateConfig(SSyncNode* pSyncNode, SSyncCfg* pNewConfig, SyncIndex raftCfgPersist(pSyncNode->pRaftCfg); if (gRaftDetailLog) { - syncNodeLog2("==syncNodeUpdateConfig==", pSyncNode); + syncNodeLog2("==syncNodeDoConfigChange==", pSyncNode); } } @@ -2170,33 +2223,70 @@ int32_t syncNodeUpdateNewConfigIndex(SSyncNode* ths, SSyncCfg* pNewCfg) { return -1; } -static int32_t syncNodeConfigChange(SSyncNode* ths, SRpcMsg* pRpcMsg, SSyncRaftEntry* pEntry) { +static int32_t syncNodeConfigChangeFinish(SSyncNode* ths, SRpcMsg* pRpcMsg, SSyncRaftEntry* pEntry) { + SyncReconfigFinish* pFinish = syncReconfigFinishFromRpcMsg2(pRpcMsg); + ASSERT(pFinish); + + if (ths->pFsm->FpReConfigCb != NULL) { + SReConfigCbMeta cbMeta = {0}; + cbMeta.code = 0; + cbMeta.index = pEntry->index; + cbMeta.term = pEntry->term; + cbMeta.seqNum = pEntry->seqNum; + cbMeta.lastConfigIndex = syncNodeGetSnapshotConfigIndex(ths, pEntry->index); + cbMeta.state = ths->state; + cbMeta.currentTerm = ths->pRaftStore->currentTerm; + cbMeta.isWeak = pEntry->isWeak; + cbMeta.flag = 0; + + cbMeta.oldCfg = pFinish->oldCfg; + cbMeta.newCfg = pFinish->newCfg; + cbMeta.newCfgIndex = pFinish->newCfgIndex; + cbMeta.newCfgTerm = pFinish->newCfgTerm; + cbMeta.newCfgSeqNum = pFinish->newCfgSeqNum; + + ths->pFsm->FpReConfigCb(ths->pFsm, pRpcMsg, cbMeta); + } + + // update changing + ths->changing = false; + + char tmpbuf[512]; + char* oldStr = syncCfg2SimpleStr(&(pFinish->oldCfg)); + char* newStr = syncCfg2SimpleStr(&(pFinish->newCfg)); + snprintf(tmpbuf, sizeof(tmpbuf), "config change finish from %d to %d, index:%ld, %s --> %s", + pFinish->oldCfg.replicaNum, pFinish->newCfg.replicaNum, pFinish->newCfgIndex, oldStr, newStr); + taosMemoryFree(oldStr); + taosMemoryFree(newStr); + syncNodeEventLog(ths, tmpbuf); + + syncReconfigFinishDestroy(pFinish); + + return 0; +} + +static int32_t syncNodeConfigChange(SSyncNode* ths, SRpcMsg* pRpcMsg, SSyncRaftEntry* pEntry, + SyncReconfigFinish* pFinish) { + // old config SSyncCfg oldSyncCfg = ths->pRaftCfg->cfg; + // new config SSyncCfg newSyncCfg; int32_t ret = syncCfgFromStr(pRpcMsg->pCont, &newSyncCfg); ASSERT(ret == 0); + // persist last config index + raftCfgAddConfigIndex(ths->pRaftCfg, pEntry->index); + raftCfgPersist(ths->pRaftCfg); + // update new config myIndex syncNodeUpdateNewConfigIndex(ths, &newSyncCfg); + bool isDrop = false; bool IamInNew = syncNodeInConfig(ths, &newSyncCfg); - - /* - for (int i = 0; i < newSyncCfg.replicaNum; ++i) { - if (strcmp(ths->myNodeInfo.nodeFqdn, (newSyncCfg.nodeInfo)[i].nodeFqdn) == 0 && - ths->myNodeInfo.nodePort == (newSyncCfg.nodeInfo)[i].nodePort) { - newSyncCfg.myIndex = i; - IamInNew = true; - break; - } - } - */ - - bool isDrop; - if (IamInNew) { - syncNodeUpdateConfig(ths, &newSyncCfg, pEntry->index, &isDrop); + // do config change + syncNodeDoConfigChange(ths, &newSyncCfg, pEntry->index, &isDrop); // change isStandBy to normal if (!isDrop) { @@ -2214,6 +2304,7 @@ static int32_t syncNodeConfigChange(SSyncNode* ths, SRpcMsg* pRpcMsg, SSyncRaftE syncNodeBecomeFollower(ths, tmpbuf); } } + } else { char tmpbuf[512]; char* oldStr = syncCfg2SimpleStr(&oldSyncCfg); @@ -2226,31 +2317,25 @@ static int32_t syncNodeConfigChange(SSyncNode* ths, SRpcMsg* pRpcMsg, SSyncRaftE syncNodeBecomeFollower(ths, tmpbuf); } - if (gRaftDetailLog) { - char* sOld = syncCfg2Str(&oldSyncCfg); - char* sNew = syncCfg2Str(&newSyncCfg); - sInfo("==config change== 0x11 old:%s new:%s isDrop:%d index:%ld IamInNew:%d \n", sOld, sNew, isDrop, pEntry->index, - IamInNew); - taosMemoryFree(sOld); - taosMemoryFree(sNew); - } + // set pFinish + pFinish->oldCfg = oldSyncCfg; + pFinish->newCfg = newSyncCfg; + pFinish->newCfgIndex = pEntry->index; + pFinish->newCfgTerm = pEntry->term; + pFinish->newCfgSeqNum = pEntry->seqNum; - // always call FpReConfigCb - if (ths->pFsm->FpReConfigCb != NULL) { - SReConfigCbMeta cbMeta = {0}; - cbMeta.code = 0; - cbMeta.currentTerm = ths->pRaftStore->currentTerm; - cbMeta.index = pEntry->index; - cbMeta.lastConfigIndex = syncNodeGetSnapshotConfigIndex(ths, pEntry->index); - cbMeta.term = pEntry->term; - cbMeta.newCfg = newSyncCfg; - cbMeta.oldCfg = oldSyncCfg; - cbMeta.seqNum = pEntry->seqNum; - cbMeta.flag = 0x11; - cbMeta.isDrop = isDrop; - ths->pFsm->FpReConfigCb(ths->pFsm, pRpcMsg, cbMeta); - } + return 0; +} +static int32_t syncNodeProposeConfigChangeFinish(SSyncNode* ths, SyncReconfigFinish* pFinish) { + SRpcMsg rpcMsg; + syncReconfigFinish2RpcMsg(pFinish, &rpcMsg); + + int32_t code = syncNodePropose(ths, &rpcMsg, false); + if (code != 0) { + sError("syncNodeProposeConfigChangeFinish error"); + ths->changing = false; + } return 0; } @@ -2292,9 +2377,21 @@ int32_t syncNodeCommit(SSyncNode* ths, SyncIndex beginIndex, SyncIndex endIndex, // config change if (pEntry->originalRpcType == TDMT_SYNC_CONFIG_CHANGE) { - raftCfgAddConfigIndex(ths->pRaftCfg, pEntry->index); - raftCfgPersist(ths->pRaftCfg); - code = syncNodeConfigChange(ths, &rpcMsg, pEntry); + SyncReconfigFinish* pFinish = syncReconfigFinishBuild(ths->vgId); + ASSERT(pFinish != NULL); + + code = syncNodeConfigChange(ths, &rpcMsg, pEntry, pFinish); + ASSERT(code == 0); + + if (ths->state == TAOS_SYNC_STATE_LEADER) { + syncNodeProposeConfigChangeFinish(ths, pFinish); + } + syncReconfigFinishDestroy(pFinish); + } + + // config change finish + if (pEntry->originalRpcType == TDMT_SYNC_CONFIG_CHANGE_FINISH) { + code = syncNodeConfigChangeFinish(ths, &rpcMsg, pEntry); ASSERT(code == 0); } @@ -2345,3 +2442,28 @@ SSyncSnapshotSender* syncNodeGetSnapshotSender(SSyncNode* ths, SRaftId* pDestId) } return pSender; } + +bool syncNodeCanChange(SSyncNode* pSyncNode) { + if (pSyncNode->changing) { + sError("sync cannot change"); + return false; + } + + if ((pSyncNode->commitIndex >= SYNC_INDEX_BEGIN)) { + SyncIndex lastIndex = syncNodeGetLastIndex(pSyncNode); + if (pSyncNode->commitIndex != lastIndex) { + sError("sync cannot change2"); + return false; + } + } + + for (int i = 0; i < pSyncNode->peersNum; ++i) { + SSyncSnapshotSender* pSender = syncNodeGetSnapshotSender(pSyncNode, &(pSyncNode->peersId)[i]); + if (pSender->start) { + sError("sync cannot change3"); + return false; + } + } + + return true; +} \ No newline at end of file diff --git a/source/libs/sync/src/syncMessage.c b/source/libs/sync/src/syncMessage.c index 23165f6790..454609009c 100644 --- a/source/libs/sync/src/syncMessage.c +++ b/source/libs/sync/src/syncMessage.c @@ -2227,4 +2227,133 @@ void syncLeaderTransferLog2(char* s, const SyncLeaderTransfer* pMsg) { sTrace("syncLeaderTransferLog2 | len:%lu | %s | %s", strlen(serialized), s, serialized); taosMemoryFree(serialized); } +} + +// --------------------------------------------- +SyncReconfigFinish* syncReconfigFinishBuild(int32_t vgId) { + uint32_t bytes = sizeof(SyncReconfigFinish); + SyncReconfigFinish* pMsg = taosMemoryMalloc(bytes); + memset(pMsg, 0, bytes); + pMsg->bytes = bytes; + pMsg->vgId = vgId; + pMsg->msgType = TDMT_SYNC_CONFIG_CHANGE_FINISH; + return pMsg; +} + +void syncReconfigFinishDestroy(SyncReconfigFinish* pMsg) { + if (pMsg != NULL) { + taosMemoryFree(pMsg); + } +} + +void syncReconfigFinishSerialize(const SyncReconfigFinish* pMsg, char* buf, uint32_t bufLen) { + assert(pMsg->bytes <= bufLen); + memcpy(buf, pMsg, pMsg->bytes); +} + +void syncReconfigFinishDeserialize(const char* buf, uint32_t len, SyncReconfigFinish* pMsg) { + memcpy(pMsg, buf, len); + assert(len == pMsg->bytes); +} + +char* syncReconfigFinishSerialize2(const SyncReconfigFinish* pMsg, uint32_t* len) { + char* buf = taosMemoryMalloc(pMsg->bytes); + assert(buf != NULL); + syncReconfigFinishSerialize(pMsg, buf, pMsg->bytes); + if (len != NULL) { + *len = pMsg->bytes; + } + return buf; +} + +SyncReconfigFinish* syncReconfigFinishDeserialize2(const char* buf, uint32_t len) { + uint32_t bytes = *((uint32_t*)buf); + SyncReconfigFinish* pMsg = taosMemoryMalloc(bytes); + assert(pMsg != NULL); + syncReconfigFinishDeserialize(buf, len, pMsg); + assert(len == pMsg->bytes); + return pMsg; +} + +void syncReconfigFinish2RpcMsg(const SyncReconfigFinish* pMsg, SRpcMsg* pRpcMsg) { + memset(pRpcMsg, 0, sizeof(*pRpcMsg)); + pRpcMsg->msgType = pMsg->msgType; + pRpcMsg->contLen = pMsg->bytes; + pRpcMsg->pCont = rpcMallocCont(pRpcMsg->contLen); + syncReconfigFinishSerialize(pMsg, pRpcMsg->pCont, pRpcMsg->contLen); +} + +void syncReconfigFinishFromRpcMsg(const SRpcMsg* pRpcMsg, SyncReconfigFinish* pMsg) { + syncReconfigFinishDeserialize(pRpcMsg->pCont, pRpcMsg->contLen, pMsg); +} + +SyncReconfigFinish* syncReconfigFinishFromRpcMsg2(const SRpcMsg* pRpcMsg) { + SyncReconfigFinish* pMsg = syncReconfigFinishDeserialize2(pRpcMsg->pCont, pRpcMsg->contLen); + assert(pMsg != NULL); + return pMsg; +} + +cJSON* syncReconfigFinish2Json(const SyncReconfigFinish* pMsg) { + char u64buf[128]; + cJSON* pRoot = cJSON_CreateObject(); + + if (pMsg != NULL) { + cJSON_AddNumberToObject(pRoot, "bytes", pMsg->bytes); + cJSON_AddNumberToObject(pRoot, "vgId", pMsg->vgId); + cJSON_AddNumberToObject(pRoot, "msgType", pMsg->msgType); + + cJSON* pOldCfg = syncCfg2Json((SSyncCfg*)(&(pMsg->oldCfg))); + cJSON* pNewCfg = syncCfg2Json((SSyncCfg*)(&(pMsg->newCfg))); + cJSON_AddItemToObject(pRoot, "oldCfg", pOldCfg); + cJSON_AddItemToObject(pRoot, "newCfg", pNewCfg); + + snprintf(u64buf, sizeof(u64buf), "%ld", pMsg->newCfgIndex); + cJSON_AddStringToObject(pRoot, "newCfgIndex", u64buf); + + snprintf(u64buf, sizeof(u64buf), "%lu", pMsg->newCfgTerm); + cJSON_AddStringToObject(pRoot, "newCfgTerm", u64buf); + + snprintf(u64buf, sizeof(u64buf), "%lu", pMsg->newCfgSeqNum); + cJSON_AddStringToObject(pRoot, "newCfgSeqNum", u64buf); + } + + cJSON* pJson = cJSON_CreateObject(); + cJSON_AddItemToObject(pJson, "SyncReconfigFinish", pRoot); + return pJson; +} + +char* syncReconfigFinish2Str(const SyncReconfigFinish* pMsg) { + cJSON* pJson = syncReconfigFinish2Json(pMsg); + char* serialized = cJSON_Print(pJson); + cJSON_Delete(pJson); + return serialized; +} + +// for debug ---------------------- +void syncReconfigFinishPrint(const SyncReconfigFinish* pMsg) { + char* serialized = syncReconfigFinish2Str(pMsg); + printf("syncReconfigFinishPrint | len:%lu | %s \n", strlen(serialized), serialized); + fflush(NULL); + taosMemoryFree(serialized); +} + +void syncReconfigFinishPrint2(char* s, const SyncReconfigFinish* pMsg) { + char* serialized = syncReconfigFinish2Str(pMsg); + printf("syncReconfigFinishPrint2 | len:%lu | %s | %s \n", strlen(serialized), s, serialized); + fflush(NULL); + taosMemoryFree(serialized); +} + +void syncReconfigFinishLog(const SyncReconfigFinish* pMsg) { + char* serialized = syncReconfigFinish2Str(pMsg); + sTrace("syncReconfigFinishLog | len:%lu | %s", strlen(serialized), serialized); + taosMemoryFree(serialized); +} + +void syncReconfigFinishLog2(char* s, const SyncReconfigFinish* pMsg) { + if (gRaftDetailLog) { + char* serialized = syncReconfigFinish2Str(pMsg); + sTrace("syncReconfigFinishLog2 | len:%lu | %s | %s", strlen(serialized), s, serialized); + taosMemoryFree(serialized); + } } \ No newline at end of file diff --git a/source/libs/sync/src/syncRaftCfg.c b/source/libs/sync/src/syncRaftCfg.c index a8242d06e3..08c3e0126c 100644 --- a/source/libs/sync/src/syncRaftCfg.c +++ b/source/libs/sync/src/syncRaftCfg.c @@ -96,14 +96,14 @@ cJSON *syncCfg2Json(SSyncCfg *pSyncCfg) { char *syncCfg2Str(SSyncCfg *pSyncCfg) { cJSON *pJson = syncCfg2Json(pSyncCfg); - char *serialized = cJSON_Print(pJson); + char * serialized = cJSON_Print(pJson); cJSON_Delete(pJson); return serialized; } char *syncCfg2SimpleStr(SSyncCfg *pSyncCfg) { int32_t len = 512; - char *s = taosMemoryMalloc(len); + char * s = taosMemoryMalloc(len); memset(s, 0, len); snprintf(s, len, "{replica-num:%d, my-index:%d, ", pSyncCfg->replicaNum, pSyncCfg->myIndex); @@ -196,7 +196,7 @@ cJSON *raftCfg2Json(SRaftCfg *pRaftCfg) { char *raftCfg2Str(SRaftCfg *pRaftCfg) { cJSON *pJson = raftCfg2Json(pRaftCfg); - char *serialized = cJSON_Print(pJson); + char * serialized = cJSON_Print(pJson); cJSON_Delete(pJson); return serialized; } @@ -262,7 +262,7 @@ int32_t raftCfgFromJson(const cJSON *pRoot, SRaftCfg *pRaftCfg) { (pRaftCfg->configIndexArr)[i] = atoll(pIndex->valuestring); } - cJSON *pJsonSyncCfg = cJSON_GetObjectItem(pJson, "SSyncCfg"); + cJSON * pJsonSyncCfg = cJSON_GetObjectItem(pJson, "SSyncCfg"); int32_t code = syncCfgFromJson(pJsonSyncCfg, &(pRaftCfg->cfg)); ASSERT(code == 0); diff --git a/source/libs/sync/src/syncSnapshot.c b/source/libs/sync/src/syncSnapshot.c index 029891a692..a88125cf3e 100644 --- a/source/libs/sync/src/syncSnapshot.c +++ b/source/libs/sync/src/syncSnapshot.c @@ -21,7 +21,8 @@ #include "syncUtil.h" #include "wal.h" -static void snapshotReceiverDoStart(SSyncSnapshotReceiver *pReceiver, SyncTerm privateTerm, SRaftId fromId); +static void snapshotReceiverDoStart(SSyncSnapshotReceiver *pReceiver, SyncTerm privateTerm, + SyncSnapshotSend *pBeginMsg); //---------------------------------- SSyncSnapshotSender *snapshotSenderCreate(SSyncNode *pSyncNode, int32_t replicaIndex) { @@ -341,6 +342,10 @@ SSyncSnapshotReceiver *snapshotReceiverCreate(SSyncNode *pSyncNode, SRaftId from pReceiver->fromId = fromId; pReceiver->term = pSyncNode->pRaftStore->currentTerm; pReceiver->privateTerm = 0; + pReceiver->snapshot.data = NULL; + pReceiver->snapshot.lastApplyIndex = -1; + pReceiver->snapshot.lastApplyTerm = 0; + pReceiver->snapshot.lastConfigIndex = -1; } else { sInfo("snapshotReceiverCreate cannot create receiver"); @@ -358,11 +363,16 @@ void snapshotReceiverDestroy(SSyncSnapshotReceiver *pReceiver) { bool snapshotReceiverIsStart(SSyncSnapshotReceiver *pReceiver) { return pReceiver->start; } // begin receive snapshot msg (current term, seq begin) -static void snapshotReceiverDoStart(SSyncSnapshotReceiver *pReceiver, SyncTerm privateTerm, SRaftId fromId) { +static void snapshotReceiverDoStart(SSyncSnapshotReceiver *pReceiver, SyncTerm privateTerm, + SyncSnapshotSend *pBeginMsg) { pReceiver->term = pReceiver->pSyncNode->pRaftStore->currentTerm; pReceiver->privateTerm = privateTerm; pReceiver->ack = SYNC_SNAPSHOT_SEQ_BEGIN; - pReceiver->fromId = fromId; + pReceiver->fromId = pBeginMsg->srcId; + + pReceiver->snapshot.lastApplyIndex = pBeginMsg->lastIndex; + pReceiver->snapshot.lastApplyTerm = pBeginMsg->lastTerm; + pReceiver->snapshot.lastConfigIndex = pBeginMsg->lastConfigIndex; ASSERT(pReceiver->pWriter == NULL); int32_t ret = pReceiver->pSyncNode->pFsm->FpSnapshotStartWrite(pReceiver->pSyncNode->pFsm, &(pReceiver->pWriter)); @@ -371,10 +381,10 @@ static void snapshotReceiverDoStart(SSyncSnapshotReceiver *pReceiver, SyncTerm p // if receiver receive msg from seq = SYNC_SNAPSHOT_SEQ_BEGIN, start receiver // if already start, force close, start again -void snapshotReceiverStart(SSyncSnapshotReceiver *pReceiver, SyncTerm privateTerm, SRaftId fromId) { +void snapshotReceiverStart(SSyncSnapshotReceiver *pReceiver, SyncTerm privateTerm, SyncSnapshotSend *pBeginMsg) { if (!snapshotReceiverIsStart(pReceiver)) { // start - snapshotReceiverDoStart(pReceiver, privateTerm, fromId); + snapshotReceiverDoStart(pReceiver, privateTerm, pBeginMsg); pReceiver->start = true; } else { @@ -388,7 +398,7 @@ void snapshotReceiverStart(SSyncSnapshotReceiver *pReceiver, SyncTerm privateTer pReceiver->pWriter = NULL; // start again - snapshotReceiverDoStart(pReceiver, privateTerm, fromId); + snapshotReceiverDoStart(pReceiver, privateTerm, pBeginMsg); pReceiver->start = true; } @@ -449,6 +459,15 @@ cJSON *snapshotReceiver2Json(SSyncSnapshotReceiver *pReceiver) { cJSON_AddNumberToObject(pFromId, "vgId", pReceiver->fromId.vgId); cJSON_AddItemToObject(pRoot, "fromId", pFromId); + snprintf(u64buf, sizeof(u64buf), "%lu", pReceiver->snapshot.lastApplyIndex); + cJSON_AddStringToObject(pRoot, "snapshot.lastApplyIndex", u64buf); + + snprintf(u64buf, sizeof(u64buf), "%lu", pReceiver->snapshot.lastApplyTerm); + cJSON_AddStringToObject(pRoot, "snapshot.lastApplyTerm", u64buf); + + snprintf(u64buf, sizeof(u64buf), "%lu", pReceiver->snapshot.lastConfigIndex); + cJSON_AddStringToObject(pRoot, "snapshot.lastConfigIndex", u64buf); + snprintf(u64buf, sizeof(u64buf), "%lu", pReceiver->term); cJSON_AddStringToObject(pRoot, "term", u64buf); @@ -477,8 +496,9 @@ char *snapshotReceiver2SimpleStr(SSyncSnapshotReceiver *pReceiver, char *event) uint16_t port; syncUtilU642Addr(fromId.addr, host, sizeof(host), &port); - snprintf(s, len, "%s %p start:%d ack:%d term:%lu pterm:%lu %s:%d ", event, pReceiver, pReceiver->start, - pReceiver->ack, pReceiver->term, pReceiver->privateTerm, host, port); + snprintf(s, len, "%s %p start:%d ack:%d term:%lu pterm:%lu from:%s:%d laindex:%ld laterm:%lu lcindex:%ld", event, + pReceiver, pReceiver->start, pReceiver->ack, pReceiver->term, pReceiver->privateTerm, host, port, + pReceiver->snapshot.lastApplyIndex, pReceiver->snapshot.lastApplyTerm, pReceiver->snapshot.lastConfigIndex); return s; } @@ -495,7 +515,7 @@ int32_t syncNodeOnSnapshotSendCb(SSyncNode *pSyncNode, SyncSnapshotSend *pMsg) { if (pMsg->term == pSyncNode->pRaftStore->currentTerm) { if (pMsg->seq == SYNC_SNAPSHOT_SEQ_BEGIN) { // begin - snapshotReceiverStart(pReceiver, pMsg->privateTerm, pMsg->srcId); + snapshotReceiverStart(pReceiver, pMsg->privateTerm, pMsg); pReceiver->ack = pMsg->seq; needRsp = true; @@ -529,7 +549,7 @@ int32_t syncNodeOnSnapshotSendCb(SSyncNode *pSyncNode, SyncSnapshotSend *pMsg) { pMsg->lastTerm, pMsg->lastConfigIndex); syncNodeEventLog(pSyncNode, eventLog); - syncNodeUpdateConfig(pSyncNode, &newSyncCfg, pMsg->lastConfigIndex, &isDrop); + syncNodeDoConfigChange(pSyncNode, &newSyncCfg, pMsg->lastConfigIndex, &isDrop); } else { char eventLog[128]; diff --git a/source/libs/sync/src/syncUtil.c b/source/libs/sync/src/syncUtil.c index cbc1298113..1d1ff7ae53 100644 --- a/source/libs/sync/src/syncUtil.c +++ b/source/libs/sync/src/syncUtil.c @@ -261,23 +261,29 @@ bool syncUtilIsData(tmsg_t msgType) { #endif bool syncUtilUserPreCommit(tmsg_t msgType) { - if (msgType != TDMT_SYNC_NOOP && msgType != TDMT_SYNC_CONFIG_CHANGE && msgType != TDMT_SYNC_LEADER_TRANSFER) { + if (msgType != TDMT_SYNC_NOOP && msgType != TDMT_SYNC_CONFIG_CHANGE && msgType != TDMT_SYNC_CONFIG_CHANGE_FINISH && + msgType != TDMT_SYNC_LEADER_TRANSFER) { return true; } + return false; } bool syncUtilUserCommit(tmsg_t msgType) { - if (msgType != TDMT_SYNC_NOOP && msgType != TDMT_SYNC_CONFIG_CHANGE && msgType != TDMT_SYNC_LEADER_TRANSFER) { + if (msgType != TDMT_SYNC_NOOP && msgType != TDMT_SYNC_CONFIG_CHANGE && msgType != TDMT_SYNC_CONFIG_CHANGE_FINISH && + msgType != TDMT_SYNC_LEADER_TRANSFER) { return true; } + return false; } bool syncUtilUserRollback(tmsg_t msgType) { - if (msgType != TDMT_SYNC_NOOP && msgType != TDMT_SYNC_CONFIG_CHANGE && msgType != TDMT_SYNC_LEADER_TRANSFER) { + if (msgType != TDMT_SYNC_NOOP && msgType != TDMT_SYNC_CONFIG_CHANGE && msgType != TDMT_SYNC_CONFIG_CHANGE_FINISH && + msgType != TDMT_SYNC_LEADER_TRANSFER) { return true; } + return false; } diff --git a/source/libs/sync/test/CMakeLists.txt b/source/libs/sync/test/CMakeLists.txt index 725343e373..2057aa23a4 100644 --- a/source/libs/sync/test/CMakeLists.txt +++ b/source/libs/sync/test/CMakeLists.txt @@ -49,6 +49,7 @@ add_executable(syncRaftLogTest "") add_executable(syncRaftLogTest2 "") add_executable(syncRaftLogTest3 "") add_executable(syncLeaderTransferTest "") +add_executable(syncReconfigFinishTest "") target_sources(syncTest @@ -255,6 +256,10 @@ target_sources(syncLeaderTransferTest PRIVATE "syncLeaderTransferTest.cpp" ) +target_sources(syncReconfigFinishTest + PRIVATE + "syncReconfigFinishTest.cpp" +) target_include_directories(syncTest @@ -512,6 +517,11 @@ target_include_directories(syncLeaderTransferTest "${TD_SOURCE_DIR}/include/libs/sync" "${CMAKE_CURRENT_SOURCE_DIR}/../inc" ) +target_include_directories(syncReconfigFinishTest + PUBLIC + "${TD_SOURCE_DIR}/include/libs/sync" + "${CMAKE_CURRENT_SOURCE_DIR}/../inc" +) target_link_libraries(syncTest @@ -718,6 +728,10 @@ target_link_libraries(syncLeaderTransferTest sync gtest_main ) +target_link_libraries(syncReconfigFinishTest + sync + gtest_main +) enable_testing() diff --git a/source/libs/sync/test/syncConfigChangeSnapshotTest.cpp b/source/libs/sync/test/syncConfigChangeSnapshotTest.cpp index 6fd6944273..b4f173ff02 100644 --- a/source/libs/sync/test/syncConfigChangeSnapshotTest.cpp +++ b/source/libs/sync/test/syncConfigChangeSnapshotTest.cpp @@ -147,8 +147,8 @@ int32_t SnapshotDoWrite(struct SSyncFSM* pFsm, void* pWriter, void* pBuf, int32_ void RestoreFinishCb(struct SSyncFSM* pFsm) { sTrace("==callback== ==RestoreFinishCb=="); } void ReConfigCb(struct SSyncFSM* pFsm, const SRpcMsg* pMsg, SReConfigCbMeta cbMeta) { - sTrace("==callback== ==ReConfigCb== flag:0x%lX, isDrop:%d, index:%ld, code:%d, currentTerm:%lu, term:%lu", - cbMeta.flag, cbMeta.isDrop, cbMeta.index, cbMeta.code, cbMeta.currentTerm, cbMeta.term); + sTrace("==callback== ==ReConfigCb== flag:0x%lX, index:%ld, code:%d, currentTerm:%lu, term:%lu", cbMeta.flag, + cbMeta.index, cbMeta.code, cbMeta.currentTerm, cbMeta.term); } SSyncFSM* createFsm() { diff --git a/source/libs/sync/test/syncConfigChangeTest.cpp b/source/libs/sync/test/syncConfigChangeTest.cpp index a185870a3b..c96e337378 100644 --- a/source/libs/sync/test/syncConfigChangeTest.cpp +++ b/source/libs/sync/test/syncConfigChangeTest.cpp @@ -78,8 +78,8 @@ int32_t GetSnapshotCb(struct SSyncFSM* pFsm, SSnapshot* pSnapshot) { void RestoreFinishCb(struct SSyncFSM* pFsm) { sTrace("==callback== ==RestoreFinishCb=="); } void ReConfigCb(struct SSyncFSM* pFsm, const SRpcMsg* pMsg, SReConfigCbMeta cbMeta) { - sTrace("==callback== ==ReConfigCb== flag:0x%lX, isDrop:%d, index:%ld, code:%d, currentTerm:%lu, term:%lu", - cbMeta.flag, cbMeta.isDrop, cbMeta.index, cbMeta.code, cbMeta.currentTerm, cbMeta.term); + sTrace("==callback== ==ReConfigCb== flag:0x%lX, index:%ld, code:%d, currentTerm:%lu, term:%lu", cbMeta.flag, + cbMeta.index, cbMeta.code, cbMeta.currentTerm, cbMeta.term); } SSyncFSM* createFsm() { diff --git a/source/libs/sync/test/syncRaftCfgTest.cpp b/source/libs/sync/test/syncRaftCfgTest.cpp index 8171d266d4..0f111ef22c 100644 --- a/source/libs/sync/test/syncRaftCfgTest.cpp +++ b/source/libs/sync/test/syncRaftCfgTest.cpp @@ -137,6 +137,6 @@ int main() { test3(); test4(); test5(); - + return 0; } diff --git a/source/libs/sync/test/syncReconfigFinishTest.cpp b/source/libs/sync/test/syncReconfigFinishTest.cpp new file mode 100644 index 0000000000..22e22bb562 --- /dev/null +++ b/source/libs/sync/test/syncReconfigFinishTest.cpp @@ -0,0 +1,135 @@ +#include +#include +#include "syncIO.h" +#include "syncInt.h" +#include "syncMessage.h" +#include "syncUtil.h" + +void logTest() { + sTrace("--- sync log test: trace"); + sDebug("--- sync log test: debug"); + sInfo("--- sync log test: info"); + sWarn("--- sync log test: warn"); + sError("--- sync log test: error"); + sFatal("--- sync log test: fatal"); +} + +SSyncCfg* createSyncOldCfg() { + SSyncCfg* pCfg = (SSyncCfg*)taosMemoryMalloc(sizeof(SSyncCfg)); + memset(pCfg, 0, sizeof(SSyncCfg)); + + pCfg->replicaNum = 3; + pCfg->myIndex = 1; + for (int i = 0; i < pCfg->replicaNum; ++i) { + ((pCfg->nodeInfo)[i]).nodePort = i * 100; + snprintf(((pCfg->nodeInfo)[i]).nodeFqdn, sizeof(((pCfg->nodeInfo)[i]).nodeFqdn), "100.200.300.%d", i); + } + + return pCfg; +} + +SSyncCfg* createSyncNewCfg() { + SSyncCfg* pCfg = (SSyncCfg*)taosMemoryMalloc(sizeof(SSyncCfg)); + memset(pCfg, 0, sizeof(SSyncCfg)); + + pCfg->replicaNum = 3; + pCfg->myIndex = 1; + for (int i = 0; i < pCfg->replicaNum; ++i) { + ((pCfg->nodeInfo)[i]).nodePort = i * 100; + snprintf(((pCfg->nodeInfo)[i]).nodeFqdn, sizeof(((pCfg->nodeInfo)[i]).nodeFqdn), "500.600.700.%d", i); + } + + return pCfg; +} + +SyncReconfigFinish *createMsg() { + SyncReconfigFinish *pMsg = syncReconfigFinishBuild(1234); + + SSyncCfg* pOld = createSyncOldCfg(); + SSyncCfg* pNew = createSyncNewCfg(); + pMsg->oldCfg = *pOld; + pMsg->newCfg = *pNew; + + pMsg->newCfgIndex = 11; + pMsg->newCfgTerm = 22; + pMsg->newCfgSeqNum = 33; + + taosMemoryFree(pOld); + taosMemoryFree(pNew); + + return pMsg; +} + + +void test1() { + SyncReconfigFinish *pMsg = createMsg(); + syncReconfigFinishLog2((char *)"test1:", pMsg); + syncReconfigFinishDestroy(pMsg); +} + + +void test2() { + SyncReconfigFinish *pMsg = createMsg(); + uint32_t len = pMsg->bytes; + char * serialized = (char *)taosMemoryMalloc(len); + syncReconfigFinishSerialize(pMsg, serialized, len); + SyncReconfigFinish *pMsg2 = syncReconfigFinishBuild(1000); + syncReconfigFinishDeserialize(serialized, len, pMsg2); + syncReconfigFinishLog2((char *)"test2: syncReconfigFinishSerialize -> syncReconfigFinishDeserialize ", pMsg2); + + taosMemoryFree(serialized); + syncReconfigFinishDestroy(pMsg); + syncReconfigFinishDestroy(pMsg2); +} + +void test3() { + SyncReconfigFinish *pMsg = createMsg(); + uint32_t len; + char * serialized = syncReconfigFinishSerialize2(pMsg, &len); + SyncReconfigFinish *pMsg2 = syncReconfigFinishDeserialize2(serialized, len); + syncReconfigFinishLog2((char *)"test3: SyncReconfigFinishSerialize2 -> syncReconfigFinishDeserialize2 ", pMsg2); + + taosMemoryFree(serialized); + syncReconfigFinishDestroy(pMsg); + syncReconfigFinishDestroy(pMsg2); +} + +void test4() { + SyncReconfigFinish *pMsg = createMsg(); + SRpcMsg rpcMsg; + syncReconfigFinish2RpcMsg(pMsg, &rpcMsg); + SyncReconfigFinish *pMsg2 = (SyncReconfigFinish *)taosMemoryMalloc(rpcMsg.contLen); + syncReconfigFinishFromRpcMsg(&rpcMsg, pMsg2); + syncReconfigFinishLog2((char *)"test4: syncReconfigFinish2RpcMsg -> syncReconfigFinishFromRpcMsg ", pMsg2); + + rpcFreeCont(rpcMsg.pCont); + syncReconfigFinishDestroy(pMsg); + syncReconfigFinishDestroy(pMsg2); +} + +void test5() { + SyncReconfigFinish *pMsg = createMsg(); + SRpcMsg rpcMsg; + syncReconfigFinish2RpcMsg(pMsg, &rpcMsg); + SyncReconfigFinish *pMsg2 = syncReconfigFinishFromRpcMsg2(&rpcMsg); + syncReconfigFinishLog2((char *)"test5: syncReconfigFinish2RpcMsg -> syncReconfigFinishFromRpcMsg2 ", pMsg2); + + rpcFreeCont(rpcMsg.pCont); + syncReconfigFinishDestroy(pMsg); + syncReconfigFinishDestroy(pMsg2); +} + +int main() { + gRaftDetailLog = true; + tsAsyncLog = 0; + sDebugFlag = DEBUG_TRACE + DEBUG_SCREEN + DEBUG_FILE; + logTest(); + + test1(); + test2(); + test3(); + test4(); + test5(); + + return 0; +} diff --git a/source/libs/sync/test/syncTestTool.cpp b/source/libs/sync/test/syncTestTool.cpp index 74dfaa192a..91a16cc033 100644 --- a/source/libs/sync/test/syncTestTool.cpp +++ b/source/libs/sync/test/syncTestTool.cpp @@ -148,8 +148,8 @@ void RestoreFinishCb(struct SSyncFSM* pFsm) { sTrace("==callback== ==RestoreFini void ReConfigCb(struct SSyncFSM* pFsm, const SRpcMsg* pMsg, SReConfigCbMeta cbMeta) { char* s = syncCfg2Str(&(cbMeta.newCfg)); - sTrace("==callback== ==ReConfigCb== flag:0x%lX, isDrop:%d, index:%ld, code:%d, currentTerm:%lu, term:%lu, newCfg:%s", - cbMeta.flag, cbMeta.isDrop, cbMeta.index, cbMeta.code, cbMeta.currentTerm, cbMeta.term, s); + sTrace("==callback== ==ReConfigCb== flag:0x%lX, index:%ld, code:%d, currentTerm:%lu, term:%lu, newCfg:%s", + cbMeta.flag, cbMeta.index, cbMeta.code, cbMeta.currentTerm, cbMeta.term, s); taosMemoryFree(s); } diff --git a/source/util/src/terror.c b/source/util/src/terror.c index 7f949e5b27..6afb240ab9 100644 --- a/source/util/src/terror.c +++ b/source/util/src/terror.c @@ -423,6 +423,9 @@ TAOS_DEFINE_ERROR(TSDB_CODE_SYN_IS_LEADER, "Sync is leader") TAOS_DEFINE_ERROR(TSDB_CODE_SYN_NOT_LEADER, "Sync not leader") TAOS_DEFINE_ERROR(TSDB_CODE_SYN_ONE_REPLICA, "Sync one replica") TAOS_DEFINE_ERROR(TSDB_CODE_SYN_NOT_IN_NEW_CONFIG, "Sync not in new config") +TAOS_DEFINE_ERROR(TSDB_CODE_SYN_NEW_CONFIG_ERROR, "Sync new config error") +TAOS_DEFINE_ERROR(TSDB_CODE_SYN_RECONFIG_NOT_READY, "Sync not ready for reconfig") +TAOS_DEFINE_ERROR(TSDB_CODE_SYN_PROPOSE_NOT_READY, "Sync not ready for propose") TAOS_DEFINE_ERROR(TSDB_CODE_SYN_INTERNAL_ERROR, "Sync internal error") // wal From 9ef9209a5c9de41824d2cce1b6dfcb1abc0f1a11 Mon Sep 17 00:00:00 2001 From: Minglei Jin Date: Mon, 20 Jun 2022 18:00:56 +0800 Subject: [PATCH 32/40] tdb: fix windows compiling issues --- source/libs/tdb/src/db/tdbBtree.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/source/libs/tdb/src/db/tdbBtree.c b/source/libs/tdb/src/db/tdbBtree.c index bac3554345..45e71f6c0d 100644 --- a/source/libs/tdb/src/db/tdbBtree.c +++ b/source/libs/tdb/src/db/tdbBtree.c @@ -933,7 +933,7 @@ static int tdbBtreeEncodePayload(SPage *pPage, SCell *pCell, int nHeader, const } // local buffer for cell - void *pBuf = tdbRealloc(NULL, pBt->pageSize); + SCell *pBuf = tdbRealloc(NULL, pBt->pageSize); if (pBuf == NULL) { return -1; } @@ -976,7 +976,7 @@ static int tdbBtreeEncodePayload(SPage *pPage, SCell *pCell, int nHeader, const pgno = 0; } - memcpy(pBuf, pVal + vLen - nLeft, bytes); + memcpy(pBuf, ((SCell *)pVal) + vLen - nLeft, bytes); memcpy(pBuf + bytes, &pgno, sizeof(pgno)); ret = tdbPageInsertCell(ofp, 0, pBuf, bytes + sizeof(pgno), 0); @@ -1011,7 +1011,7 @@ static int tdbBtreeEncodePayload(SPage *pPage, SCell *pCell, int nHeader, const } // cpy key - memcpy(pBuf, pKey + kLen - nLeftKey, bytes); + memcpy(pBuf, ((SCell *)pKey) + kLen - nLeftKey, bytes); if (lastKeyPage) { if (lastKeyPageSpace >= vLen) { @@ -1071,7 +1071,7 @@ static int tdbBtreeEncodePayload(SPage *pPage, SCell *pCell, int nHeader, const pgno = 0; } - memcpy(pBuf, pVal + vLen - nLeft, bytes); + memcpy(pBuf, ((SCell *)pVal) + vLen - nLeft, bytes); memcpy(pBuf + bytes, &pgno, sizeof(pgno)); ret = tdbPageInsertCell(ofp, 0, pBuf, bytes + sizeof(pgno), 0); From ba94a44bec9fe6351ead43c7a6d188cdbec73ebc Mon Sep 17 00:00:00 2001 From: Cary Xu Date: Mon, 20 Jun 2022 19:27:26 +0800 Subject: [PATCH 33/40] refactor: rsma support max_delay/watermark params --- include/common/tmsg.h | 10 ++-- source/common/src/tmsg.c | 47 +++++++-------- source/dnode/mnode/impl/inc/mndDef.h | 4 +- source/dnode/mnode/impl/inc/mndScheduler.h | 2 +- source/dnode/mnode/impl/src/mndScheduler.c | 2 +- source/dnode/mnode/impl/src/mndStb.c | 58 ++++++++++--------- source/dnode/vnode/src/sma/smaRollup.c | 66 +++++++++++----------- source/dnode/vnode/src/vnd/vnodeSvr.c | 2 +- tests/test/c/sdbDump.c | 6 +- 9 files changed, 98 insertions(+), 99 deletions(-) diff --git a/include/common/tmsg.h b/include/common/tmsg.h index 2a29d9b7c7..fd9ff3eb65 100644 --- a/include/common/tmsg.h +++ b/include/common/tmsg.h @@ -1776,12 +1776,10 @@ typedef struct { } SDDropTopicReq; typedef struct { - float xFilesFactor; - int32_t delay; - int32_t qmsg1Len; - int32_t qmsg2Len; - char* qmsg1; // pAst1:qmsg1:SRetention1 => trigger aggr task1 - char* qmsg2; // pAst2:qmsg2:SRetention2 => trigger aggr task2 + int64_t maxdelay[2]; + int64_t watermark[2]; + int32_t qmsgLen[2]; + char* qmsg[2]; // pAst:qmsg:SRetention => trigger aggr task1/2 } SRSmaParam; int32_t tEncodeSRSmaParam(SEncoder* pCoder, const SRSmaParam* pRSmaParam); diff --git a/source/common/src/tmsg.c b/source/common/src/tmsg.c index c3c96972b7..d078d22cdf 100644 --- a/source/common/src/tmsg.c +++ b/source/common/src/tmsg.c @@ -4273,39 +4273,34 @@ void tFreeSCMCreateStreamReq(SCMCreateStreamReq *pReq) { } int32_t tEncodeSRSmaParam(SEncoder *pCoder, const SRSmaParam *pRSmaParam) { - if (tEncodeFloat(pCoder, pRSmaParam->xFilesFactor) < 0) return -1; - if (tEncodeI32v(pCoder, pRSmaParam->delay) < 0) return -1; - if (tEncodeI32v(pCoder, pRSmaParam->qmsg1Len) < 0) return -1; - if (tEncodeI32v(pCoder, pRSmaParam->qmsg2Len) < 0) return -1; - if (pRSmaParam->qmsg1Len > 0) { - if (tEncodeBinary(pCoder, pRSmaParam->qmsg1, (uint64_t)pRSmaParam->qmsg1Len) < 0) // qmsg1Len contains len of '\0' - return -1; - } - if (pRSmaParam->qmsg2Len > 0) { - if (tEncodeBinary(pCoder, pRSmaParam->qmsg2, (uint64_t)pRSmaParam->qmsg2Len) < 0) // qmsg2Len contains len of '\0' - return -1; + for (int32_t i = 0; i < 2; ++i) { + if (tEncodeI64v(pCoder, pRSmaParam->maxdelay[i]) < 0) return -1; + if (tEncodeI64v(pCoder, pRSmaParam->watermark[i]) < 0) return -1; + if (tEncodeI32v(pCoder, pRSmaParam->qmsgLen[i]) < 0) return -1; + if (pRSmaParam->qmsgLen[i] > 0) { + if (tEncodeBinary(pCoder, pRSmaParam->qmsg[i], (uint64_t)pRSmaParam->qmsgLen[i]) < + 0) // qmsgLen contains len of '\0' + return -1; + } } return 0; } int32_t tDecodeSRSmaParam(SDecoder *pCoder, SRSmaParam *pRSmaParam) { - if (tDecodeFloat(pCoder, &pRSmaParam->xFilesFactor) < 0) return -1; - if (tDecodeI32v(pCoder, &pRSmaParam->delay) < 0) return -1; - if (tDecodeI32v(pCoder, &pRSmaParam->qmsg1Len) < 0) return -1; - if (tDecodeI32v(pCoder, &pRSmaParam->qmsg2Len) < 0) return -1; - if (pRSmaParam->qmsg1Len > 0) { - uint64_t len; - if (tDecodeBinaryAlloc(pCoder, (void **)&pRSmaParam->qmsg1, &len) < 0) return -1; // qmsg1Len contains len of '\0' - } else { - pRSmaParam->qmsg1 = NULL; - } - if (pRSmaParam->qmsg2Len > 0) { - uint64_t len; - if (tDecodeBinaryAlloc(pCoder, (void **)&pRSmaParam->qmsg2, &len) < 0) return -1; // qmsg2Len contains len of '\0' - } else { - pRSmaParam->qmsg2 = NULL; + for (int32_t i = 0; i < 2; ++i) { + if (tDecodeI64v(pCoder, &pRSmaParam->maxdelay[i]) < 0) return -1; + if (tDecodeI64v(pCoder, &pRSmaParam->watermark[i]) < 0) return -1; + if (tDecodeI32v(pCoder, &pRSmaParam->qmsgLen[i]) < 0) return -1; + if (pRSmaParam->qmsgLen[i] > 0) { + uint64_t len; + if (tDecodeBinaryAlloc(pCoder, (void **)&pRSmaParam->qmsg[i], &len) < 0) + return -1; // qmsgLen contains len of '\0' + } else { + pRSmaParam->qmsg[i] = NULL; + } } + return 0; } diff --git a/source/dnode/mnode/impl/inc/mndDef.h b/source/dnode/mnode/impl/inc/mndDef.h index 8963f6be39..987b01b96a 100644 --- a/source/dnode/mnode/impl/inc/mndDef.h +++ b/source/dnode/mnode/impl/inc/mndDef.h @@ -341,8 +341,8 @@ typedef struct { int32_t colVer; int32_t smaVer; int32_t nextColId; - float xFilesFactor; - int32_t delay; + int64_t watermark[2]; + int64_t maxdelay[2]; int32_t ttl; int32_t numOfColumns; int32_t numOfTags; diff --git a/source/dnode/mnode/impl/inc/mndScheduler.h b/source/dnode/mnode/impl/inc/mndScheduler.h index 8e816d2dd6..15d2c6cd5e 100644 --- a/source/dnode/mnode/impl/inc/mndScheduler.h +++ b/source/dnode/mnode/impl/inc/mndScheduler.h @@ -30,7 +30,7 @@ int32_t mndSchedInitSubEp(SMnode* pMnode, const SMqTopicObj* pTopic, SMqSubscrib int32_t mndScheduleStream(SMnode* pMnode, STrans* pTrans, SStreamObj* pStream); int32_t mndConvertRsmaTask(char** pDst, int32_t* pDstLen, const char* ast, int64_t uid, int8_t triggerType, - int64_t watermark, double filesFactor); + int64_t watermark); int32_t mndScheduleStream(SMnode* pMnode, STrans* pTrans, SStreamObj* pStream); diff --git a/source/dnode/mnode/impl/src/mndScheduler.c b/source/dnode/mnode/impl/src/mndScheduler.c index 37aa2d33d0..f417e2267b 100644 --- a/source/dnode/mnode/impl/src/mndScheduler.c +++ b/source/dnode/mnode/impl/src/mndScheduler.c @@ -43,7 +43,7 @@ static int32_t mndAddTaskToTaskSet(SArray* pArray, SStreamTask* pTask) { } int32_t mndConvertRsmaTask(char** pDst, int32_t* pDstLen, const char* ast, int64_t uid, int8_t triggerType, - int64_t watermark, double filesFactor) { + int64_t watermark) { SNode* pAst = NULL; SQueryPlan* pPlan = NULL; terrno = TSDB_CODE_SUCCESS; diff --git a/source/dnode/mnode/impl/src/mndStb.c b/source/dnode/mnode/impl/src/mndStb.c index 6c8021e3b3..3e50ea8262 100644 --- a/source/dnode/mnode/impl/src/mndStb.c +++ b/source/dnode/mnode/impl/src/mndStb.c @@ -89,8 +89,10 @@ SSdbRaw *mndStbActionEncode(SStbObj *pStb) { SDB_SET_INT32(pRaw, dataPos, pStb->tagVer, _OVER) SDB_SET_INT32(pRaw, dataPos, pStb->colVer, _OVER) SDB_SET_INT32(pRaw, dataPos, pStb->nextColId, _OVER) - SDB_SET_INT32(pRaw, dataPos, (int32_t)(pStb->xFilesFactor * 10000), _OVER) - SDB_SET_INT32(pRaw, dataPos, pStb->delay, _OVER) + SDB_SET_INT64(pRaw, dataPos, pStb->maxdelay[0], _OVER) + SDB_SET_INT64(pRaw, dataPos, pStb->maxdelay[1], _OVER) + SDB_SET_INT64(pRaw, dataPos, pStb->watermark[0], _OVER) + SDB_SET_INT64(pRaw, dataPos, pStb->watermark[1], _OVER) SDB_SET_INT32(pRaw, dataPos, pStb->ttl, _OVER) SDB_SET_INT32(pRaw, dataPos, pStb->numOfColumns, _OVER) SDB_SET_INT32(pRaw, dataPos, pStb->numOfTags, _OVER) @@ -168,10 +170,10 @@ static SSdbRow *mndStbActionDecode(SSdbRaw *pRaw) { SDB_GET_INT32(pRaw, dataPos, &pStb->tagVer, _OVER) SDB_GET_INT32(pRaw, dataPos, &pStb->colVer, _OVER) SDB_GET_INT32(pRaw, dataPos, &pStb->nextColId, _OVER) - int32_t xFilesFactor = 0; - SDB_GET_INT32(pRaw, dataPos, &xFilesFactor, _OVER) - pStb->xFilesFactor = xFilesFactor / 10000.0f; - SDB_GET_INT32(pRaw, dataPos, &pStb->delay, _OVER) + SDB_GET_INT64(pRaw, dataPos, &pStb->maxdelay[0], _OVER) + SDB_GET_INT64(pRaw, dataPos, &pStb->maxdelay[1], _OVER) + SDB_GET_INT64(pRaw, dataPos, &pStb->watermark[0], _OVER) + SDB_GET_INT64(pRaw, dataPos, &pStb->watermark[1], _OVER) SDB_GET_INT32(pRaw, dataPos, &pStb->ttl, _OVER) SDB_GET_INT32(pRaw, dataPos, &pStb->numOfColumns, _OVER) SDB_GET_INT32(pRaw, dataPos, &pStb->numOfTags, _OVER) @@ -399,18 +401,18 @@ static void *mndBuildVCreateStbReq(SMnode *pMnode, SVgObj *pVgroup, SStbObj *pSt req.schemaTag.pSchema = pStb->pTags; if (req.rollup) { - req.pRSmaParam.xFilesFactor = pStb->xFilesFactor; - req.pRSmaParam.delay = pStb->delay; + req.pRSmaParam.maxdelay[0] = pStb->maxdelay[0]; + req.pRSmaParam.maxdelay[1] = pStb->maxdelay[1]; if (pStb->ast1Len > 0) { - if (mndConvertRsmaTask(&req.pRSmaParam.qmsg1, &req.pRSmaParam.qmsg1Len, pStb->pAst1, pStb->uid, - STREAM_TRIGGER_WINDOW_CLOSE, 0, req.pRSmaParam.xFilesFactor) != TSDB_CODE_SUCCESS) { - return NULL; + if (mndConvertRsmaTask(&req.pRSmaParam.qmsg[0], &req.pRSmaParam.qmsgLen[0], pStb->pAst1, pStb->uid, + STREAM_TRIGGER_WINDOW_CLOSE, req.pRSmaParam.watermark[0]) < 0) { + goto _err; } } if (pStb->ast2Len > 0) { - if (mndConvertRsmaTask(&req.pRSmaParam.qmsg2, &req.pRSmaParam.qmsg2Len, pStb->pAst2, pStb->uid, - STREAM_TRIGGER_WINDOW_CLOSE, 0, req.pRSmaParam.xFilesFactor) != TSDB_CODE_SUCCESS) { - return NULL; + if (mndConvertRsmaTask(&req.pRSmaParam.qmsg[1], &req.pRSmaParam.qmsgLen[1], pStb->pAst2, pStb->uid, + STREAM_TRIGGER_WINDOW_CLOSE, req.pRSmaParam.watermark[1]) < 0) { + goto _err; } } } @@ -418,17 +420,15 @@ static void *mndBuildVCreateStbReq(SMnode *pMnode, SVgObj *pVgroup, SStbObj *pSt int32_t ret = 0; tEncodeSize(tEncodeSVCreateStbReq, &req, contLen, ret); if (ret < 0) { - return NULL; + goto _err; } contLen += sizeof(SMsgHead); SMsgHead *pHead = taosMemoryMalloc(contLen); if (pHead == NULL) { - taosMemoryFreeClear(req.pRSmaParam.qmsg1); - taosMemoryFreeClear(req.pRSmaParam.qmsg2); terrno = TSDB_CODE_OUT_OF_MEMORY; - return NULL; + goto _err; } pHead->contLen = htonl(contLen); @@ -438,17 +438,19 @@ static void *mndBuildVCreateStbReq(SMnode *pMnode, SVgObj *pVgroup, SStbObj *pSt tEncoderInit(&encoder, pBuf, contLen - sizeof(SMsgHead)); if (tEncodeSVCreateStbReq(&encoder, &req) < 0) { taosMemoryFreeClear(pHead); - taosMemoryFreeClear(req.pRSmaParam.qmsg1); - taosMemoryFreeClear(req.pRSmaParam.qmsg2); tEncoderClear(&encoder); - return NULL; + goto _err; } tEncoderClear(&encoder); *pContLen = contLen; - taosMemoryFreeClear(req.pRSmaParam.qmsg1); - taosMemoryFreeClear(req.pRSmaParam.qmsg2); + taosMemoryFreeClear(req.pRSmaParam.qmsg[0]); + taosMemoryFreeClear(req.pRSmaParam.qmsg[1]); return pHead; +_err: + taosMemoryFreeClear(req.pRSmaParam.qmsg[0]); + taosMemoryFreeClear(req.pRSmaParam.qmsg[1]); + return NULL; } static void *mndBuildVDropStbReq(SMnode *pMnode, SVgObj *pVgroup, SStbObj *pStb, int32_t *pContLen) { @@ -670,8 +672,10 @@ int32_t mndBuildStbFromReq(SMnode *pMnode, SStbObj *pDst, SMCreateStbReq *pCreat pDst->tagVer = 1; pDst->colVer = 1; pDst->nextColId = 1; - // pDst->xFilesFactor = pCreate->xFilesFactor; - // pDst->delay = pCreate->delay; + pDst->maxdelay[0] = pCreate->delay1; + pDst->maxdelay[1] = pCreate->delay2; + pDst->watermark[0] = pCreate->watermark1; + pDst->watermark[1] = pCreate->watermark2; pDst->ttl = pCreate->ttl; pDst->numOfColumns = pCreate->numOfColumns; pDst->numOfTags = pCreate->numOfTags; @@ -897,7 +901,7 @@ static int32_t mndUpdateStbCommentAndTTL(const SStbObj *pOld, SStbObj *pNew, cha return -1; } memcpy(pNew->comment, pComment, commentLen + 1); - } else if(commentLen == 0){ + } else if (commentLen == 0) { pNew->commentLen = 0; } @@ -1849,7 +1853,7 @@ static int32_t mndRetrieveStb(SRpcMsg *pReq, SShowObj *pShow, SSDataBlock *pBloc char comment[TSDB_TB_COMMENT_LEN + VARSTR_HEADER_SIZE] = {0}; STR_TO_VARSTR(comment, pStb->comment); colDataAppend(pColInfo, numOfRows, comment, false); - } else if(pStb->commentLen == 0) { + } else if (pStb->commentLen == 0) { char comment[VARSTR_HEADER_SIZE + VARSTR_HEADER_SIZE] = {0}; STR_TO_VARSTR(comment, ""); colDataAppend(pColInfo, numOfRows, comment, false); diff --git a/source/dnode/vnode/src/sma/smaRollup.c b/source/dnode/vnode/src/sma/smaRollup.c index ecd47c2303..1f18f6cb87 100644 --- a/source/dnode/vnode/src/sma/smaRollup.c +++ b/source/dnode/vnode/src/sma/smaRollup.c @@ -21,6 +21,31 @@ static FORCE_INLINE int32_t tdUpdateTbUidListImpl(SSma *pSma, tb_uid_t *suid, SA static FORCE_INLINE int32_t tdExecuteRSmaImpl(SSma *pSma, const void *pMsg, int32_t inputType, SRSmaInfoItem *rsmaItem, tb_uid_t suid, int8_t level); +#define SET_RSMA_INFO_ITEM_PARAMS(__idx, __level) \ + if (param->qmsg[__idx]) { \ + pRSmaInfo->items[__idx].pRsmaInfo = pRSmaInfo; \ + pRSmaInfo->items[__idx].taskInfo = qCreateStreamExecTaskInfo(param->qmsg[0], &handle); \ + if (!pRSmaInfo->items[__idx].taskInfo) { \ + goto _err; \ + } \ + pRSmaInfo->items[__idx].triggerStatus = TASK_TRIGGER_STATUS__IN_ACTIVE; \ + if (param->maxdelay[__idx] < 1) { \ + int64_t msInterval = \ + convertTimeFromPrecisionToUnit(pRetention[__level].freq, pTsdbCfg->precision, TIME_UNIT_MILLISECOND); \ + pRSmaInfo->items[__idx].maxDelay = msInterval; \ + } else { \ + pRSmaInfo->items[__idx].maxDelay = param->maxdelay[__idx]; \ + } \ + if (pRSmaInfo->items[__idx].maxDelay > TSDB_MAX_ROLLUP_MAX_DELAY) { \ + pRSmaInfo->items[__idx].maxDelay = TSDB_MAX_ROLLUP_MAX_DELAY; \ + } \ + pRSmaInfo->items[__idx].level = TSDB_RETENTION_L##__level; \ + pRSmaInfo->items[__idx].tmrHandle = taosTmrInit(10000, 100, 10000, "RSMA"); \ + if (!pRSmaInfo->items[__idx].tmrHandle) { \ + goto _err; \ + } \ + } + struct SRSmaInfoItem { SRSmaInfo *pRsmaInfo; void *taskInfo; // qTaskInfo_t @@ -207,7 +232,7 @@ int32_t tdProcessRSmaCreate(SVnode *pVnode, SVCreateStbReq *pReq) { SMsgCb *pMsgCb = &pVnode->msgCb; SRSmaParam *param = &pReq->pRSmaParam; - if ((param->qmsg1Len == 0) && (param->qmsg2Len == 0)) { + if ((param->qmsgLen[0] == 0) && (param->qmsgLen[1] == 0)) { smaWarn("vgId:%d, no qmsg1/qmsg2 for rollup stable %s %" PRIi64, SMA_VID(pSma), pReq->name, pReq->suid); return TSDB_CODE_SUCCESS; } @@ -257,36 +282,11 @@ int32_t tdProcessRSmaCreate(SVnode *pVnode, SVCreateStbReq *pReq) { pRSmaInfo->pSma = pSma; pRSmaInfo->suid = pReq->suid; - if (param->qmsg1) { - pRSmaInfo->items[0].pRsmaInfo = pRSmaInfo; - pRSmaInfo->items[0].taskInfo = qCreateStreamExecTaskInfo(param->qmsg1, &handle); - if (!pRSmaInfo->items[0].taskInfo) { - goto _err; - } - pRSmaInfo->items[0].triggerStatus = TASK_TRIGGER_STATUS__IN_ACTIVE; - pRSmaInfo->items[0].maxDelay = 5000; - pRSmaInfo->items[0].level = TSDB_RETENTION_L1; - pRSmaInfo->items[0].tmrHandle = taosTmrInit(10000, 100, 10000, "RSMA_L1"); + SRetention *pRetention = SMA_RETENTION(pSma); + STsdbCfg *pTsdbCfg = SMA_TSDB_CFG(pSma); - if (!pRSmaInfo->items[0].tmrHandle) { - goto _err; - } - } - - if (param->qmsg2) { - pRSmaInfo->items[1].pRsmaInfo = pRSmaInfo; - pRSmaInfo->items[1].taskInfo = qCreateStreamExecTaskInfo(param->qmsg2, &handle); - if (!pRSmaInfo->items[1].taskInfo) { - goto _err; - } - pRSmaInfo->items[1].triggerStatus = TASK_TRIGGER_STATUS__IN_ACTIVE; - pRSmaInfo->items[1].maxDelay = 5000; - pRSmaInfo->items[1].level = TSDB_RETENTION_L2; - pRSmaInfo->items[1].tmrHandle = taosTmrInit(10000, 100, 10000, "RSMA_L2"); - if (!pRSmaInfo->items[1].tmrHandle) { - goto _err; - } - } + SET_RSMA_INFO_ITEM_PARAMS(0, 1); + SET_RSMA_INFO_ITEM_PARAMS(1, 2); if (taosHashPut(SMA_STAT_INFO_HASH(pStat), &pReq->suid, sizeof(tb_uid_t), &pRSmaInfo, sizeof(pRSmaInfo)) != TSDB_CODE_SUCCESS) { @@ -451,7 +451,7 @@ static int32_t tdFetchAndSubmitRSmaResult(SRSmaInfoItem *pItem, int8_t blkType) } if (taosArrayGetSize(pResult) > 0) { -#if 1 +#if 0 char flag[10] = {0}; snprintf(flag, 10, "level %" PRIi8, pItem->level); blockDebugShowData(pResult, flag); @@ -494,7 +494,7 @@ static void rsmaTriggerByTimer(void *param, void *tmrId) { SRSmaInfoItem *pItem = param; if (atomic_load_8(&pItem->triggerStatus) == TASK_TRIGGER_STATUS__ACTIVE) { - printf("%s:%d THREAD:%" PRIi64 " status = active\n", __func__, __LINE__, taosGetSelfPthreadId()); + smaTrace("level %" PRIi8 " status is active for tb suid:%" PRIi64, pItem->level, pItem->pRsmaInfo->suid); SSDataBlock dataBlock = {.info.type = STREAM_GET_ALL}; atomic_store_8(&pItem->triggerStatus, TASK_TRIGGER_STATUS__IN_ACTIVE); @@ -502,7 +502,7 @@ static void rsmaTriggerByTimer(void *param, void *tmrId) { tdFetchAndSubmitRSmaResult(pItem, STREAM_DATA_TYPE_SSDATA_BLOCK); } else { - printf("%s:%d THREAD:%" PRIi64 " status = in active\n", __func__, __LINE__, taosGetSelfPthreadId()); + smaTrace("level %" PRIi8 " status is inactive for tb suid:%" PRIi64, pItem->level, pItem->pRsmaInfo->suid); } // taosTmrReset(rsmaTriggerByTimer, pItem->maxDelay, pItem, pItem->tmrHandle, &pItem->tmrId); diff --git a/source/dnode/vnode/src/vnd/vnodeSvr.c b/source/dnode/vnode/src/vnd/vnodeSvr.c index 98fcee97c5..009c739693 100644 --- a/source/dnode/vnode/src/vnd/vnodeSvr.c +++ b/source/dnode/vnode/src/vnd/vnodeSvr.c @@ -106,7 +106,7 @@ int32_t vnodeProcessWriteReq(SVnode *pVnode, SRpcMsg *pMsg, int64_t version, SRp int32_t len; int32_t ret; - vError("vgId:%d, start to process write request %s, index:%" PRId64, TD_VID(pVnode), TMSG_INFO(pMsg->msgType), + vTrace("vgId:%d, start to process write request %s, index:%" PRId64, TD_VID(pVnode), TMSG_INFO(pMsg->msgType), version); pVnode->state.applied = version; diff --git a/tests/test/c/sdbDump.c b/tests/test/c/sdbDump.c index 612b870b7e..a62c30660d 100644 --- a/tests/test/c/sdbDump.c +++ b/tests/test/c/sdbDump.c @@ -114,8 +114,10 @@ void dumpStb(SSdb *pSdb, SJson *json) { tjsonAddIntegerToObject(item, "tagVer", pObj->tagVer); tjsonAddIntegerToObject(item, "colVer", pObj->colVer); tjsonAddIntegerToObject(item, "nextColId", pObj->nextColId); - tjsonAddIntegerToObject(item, "xFilesFactor", pObj->xFilesFactor * 10000); - tjsonAddIntegerToObject(item, "delay", pObj->delay); + tjsonAddIntegerToObject(item, "watermark1", pObj->watermark[0]); + tjsonAddIntegerToObject(item, "watermark2", pObj->watermark[1]); + tjsonAddIntegerToObject(item, "maxdelay1", pObj->maxdelay[0]); + tjsonAddIntegerToObject(item, "maxdelay2", pObj->maxdelay[1]); tjsonAddIntegerToObject(item, "ttl", pObj->ttl); tjsonAddIntegerToObject(item, "numOfColumns", pObj->numOfColumns); tjsonAddIntegerToObject(item, "numOfTags", pObj->numOfTags); From 3573ae15648e123406047f0cfe68ac52349e5a8b Mon Sep 17 00:00:00 2001 From: Minghao Li Date: Mon, 20 Jun 2022 20:07:36 +0800 Subject: [PATCH 34/40] refactor(sync): block when changing --- source/libs/sync/inc/syncInt.h | 2 +- source/libs/sync/src/syncMain.c | 328 ++++++++++++++-------------- source/libs/sync/src/syncSnapshot.c | 37 +--- 3 files changed, 170 insertions(+), 197 deletions(-) diff --git a/source/libs/sync/inc/syncInt.h b/source/libs/sync/inc/syncInt.h index 02a9e189cf..63db395425 100644 --- a/source/libs/sync/inc/syncInt.h +++ b/source/libs/sync/inc/syncInt.h @@ -199,7 +199,7 @@ char* syncNode2Str(const SSyncNode* pSyncNode); void syncNodeEventLog(const SSyncNode* pSyncNode, char* str); char* syncNode2SimpleStr(const SSyncNode* pSyncNode); bool syncNodeInConfig(SSyncNode* pSyncNode, const SSyncCfg* config); -void syncNodeDoConfigChange(SSyncNode* pSyncNode, SSyncCfg* newConfig, SyncIndex lastConfigChangeIndex, bool* isDrop); +void syncNodeDoConfigChange(SSyncNode* pSyncNode, SSyncCfg* newConfig, SyncIndex lastConfigChangeIndex); SSyncNode* syncNodeAcquire(int64_t rid); void syncNodeRelease(SSyncNode* pNode); diff --git a/source/libs/sync/src/syncMain.c b/source/libs/sync/src/syncMain.c index 8da24d7c05..dd2f2d3eac 100644 --- a/source/libs/sync/src/syncMain.c +++ b/source/libs/sync/src/syncMain.c @@ -1366,146 +1366,188 @@ bool syncNodeInConfig(SSyncNode* pSyncNode, const SSyncCfg* config) { return b1; } -void syncNodeDoConfigChange(SSyncNode* pSyncNode, SSyncCfg* pNewConfig, SyncIndex lastConfigChangeIndex, bool* isDrop) { +void syncNodeDoConfigChange(SSyncNode* pSyncNode, SSyncCfg* pNewConfig, SyncIndex lastConfigChangeIndex) { SSyncCfg oldConfig = pSyncNode->pRaftCfg->cfg; pSyncNode->pRaftCfg->cfg = *pNewConfig; pSyncNode->pRaftCfg->lastConfigIndex = lastConfigChangeIndex; - int32_t ret = 0; - - // save snapshot senders - int32_t oldReplicaNum = pSyncNode->replicaNum; - SRaftId oldReplicasId[TSDB_MAX_REPLICA]; - memcpy(oldReplicasId, pSyncNode->replicasId, sizeof(oldReplicasId)); - SSyncSnapshotSender* oldSenders[TSDB_MAX_REPLICA]; - for (int i = 0; i < TSDB_MAX_REPLICA; ++i) { - oldSenders[i] = (pSyncNode->senders)[i]; - - char* eventLog = snapshotSender2SimpleStr(oldSenders[i], "snapshot sender save old"); - syncNodeEventLog(pSyncNode, eventLog); - taosMemoryFree(eventLog); - } - - // init internal - pSyncNode->myNodeInfo = pSyncNode->pRaftCfg->cfg.nodeInfo[pSyncNode->pRaftCfg->cfg.myIndex]; - syncUtilnodeInfo2raftId(&pSyncNode->myNodeInfo, pSyncNode->vgId, &pSyncNode->myRaftId); - - // init peersNum, peers, peersId - pSyncNode->peersNum = pSyncNode->pRaftCfg->cfg.replicaNum - 1; - int j = 0; - for (int i = 0; i < pSyncNode->pRaftCfg->cfg.replicaNum; ++i) { - if (i != pSyncNode->pRaftCfg->cfg.myIndex) { - pSyncNode->peersNodeInfo[j] = pSyncNode->pRaftCfg->cfg.nodeInfo[i]; - j++; - } - } - for (int i = 0; i < pSyncNode->peersNum; ++i) { - syncUtilnodeInfo2raftId(&pSyncNode->peersNodeInfo[i], pSyncNode->vgId, &pSyncNode->peersId[i]); - } - - // init replicaNum, replicasId - pSyncNode->replicaNum = pSyncNode->pRaftCfg->cfg.replicaNum; - for (int i = 0; i < pSyncNode->pRaftCfg->cfg.replicaNum; ++i) { - syncUtilnodeInfo2raftId(&pSyncNode->pRaftCfg->cfg.nodeInfo[i], pSyncNode->vgId, &pSyncNode->replicasId[i]); - } - - syncIndexMgrUpdate(pSyncNode->pNextIndex, pSyncNode); - syncIndexMgrUpdate(pSyncNode->pMatchIndex, pSyncNode); - voteGrantedUpdate(pSyncNode->pVotesGranted, pSyncNode); - votesRespondUpdate(pSyncNode->pVotesRespond, pSyncNode); - - pSyncNode->quorum = syncUtilQuorum(pSyncNode->pRaftCfg->cfg.replicaNum); - - // reset snapshot senders - - // clear new - for (int i = 0; i < TSDB_MAX_REPLICA; ++i) { - (pSyncNode->senders)[i] = NULL; - } - - // reset new - for (int i = 0; i < pSyncNode->replicaNum; ++i) { - // reset sender - bool reset = false; - for (int j = 0; j < TSDB_MAX_REPLICA; ++j) { - if (syncUtilSameId(&(pSyncNode->replicasId)[i], &oldReplicasId[j])) { - char host[128]; - uint16_t port; - syncUtilU642Addr((pSyncNode->replicasId)[i].addr, host, sizeof(host), &port); - - do { - char eventLog[256]; - snprintf(eventLog, sizeof(eventLog), "snapshot sender reset for %lu, newIndex:%d, %s:%d, %p", - (pSyncNode->replicasId)[i].addr, i, host, port, oldSenders[j]); - syncNodeEventLog(pSyncNode, eventLog); - } while (0); - - (pSyncNode->senders)[i] = oldSenders[j]; - oldSenders[j] = NULL; - reset = true; - - // reset replicaIndex - int32_t oldreplicaIndex = (pSyncNode->senders)[i]->replicaIndex; - (pSyncNode->senders)[i]->replicaIndex = i; - - do { - char eventLog[256]; - snprintf(eventLog, sizeof(eventLog), "snapshot sender udpate replicaIndex from %d to %d, %s:%d, %p, reset:%d", - oldreplicaIndex, i, host, port, (pSyncNode->senders)[i], reset); - syncNodeEventLog(pSyncNode, eventLog); - } while (0); - } - } - } - - // create new - for (int i = 0; i < TSDB_MAX_REPLICA; ++i) { - if ((pSyncNode->senders)[i] == NULL) { - (pSyncNode->senders)[i] = snapshotSenderCreate(pSyncNode, i); - - char* eventLog = snapshotSender2SimpleStr((pSyncNode->senders)[i], "snapshot sender create new"); - syncNodeEventLog(pSyncNode, eventLog); - taosMemoryFree(eventLog); - } - } - - // free old - for (int i = 0; i < TSDB_MAX_REPLICA; ++i) { - if (oldSenders[i] != NULL) { - snapshotSenderDestroy(oldSenders[i]); - - do { - char eventLog[128]; - snprintf(eventLog, sizeof(eventLog), "snapshot sender delete old %p replica-index:%d", oldSenders[i], i); - syncNodeEventLog(pSyncNode, eventLog); - } while (0); - - oldSenders[i] = NULL; - } - } - bool IamInOld = syncNodeInConfig(pSyncNode, &oldConfig); bool IamInNew = syncNodeInConfig(pSyncNode, pNewConfig); - *isDrop = true; + bool isDrop = false; + bool isAdd = false; + if (IamInOld && !IamInNew) { - *isDrop = true; + isDrop = true; } else { - *isDrop = false; + isDrop = false; } - // may be add me to a new raft group - if (IamInOld && IamInNew && oldConfig.replicaNum == 1) { + if (!IamInOld && IamInNew) { + isAdd = true; + } else { + isAdd = false; } if (IamInNew) { pSyncNode->pRaftCfg->isStandBy = 0; // change isStandBy to normal } - raftCfgPersist(pSyncNode->pRaftCfg); - - if (gRaftDetailLog) { - syncNodeLog2("==syncNodeDoConfigChange==", pSyncNode); + if (isDrop) { + pSyncNode->pRaftCfg->isStandBy = 1; // set standby } + + // persist last config index + raftCfgAddConfigIndex(pSyncNode->pRaftCfg, lastConfigChangeIndex); + + if (IamInNew) { + //----------------------------------------- + int32_t ret = 0; + + // save snapshot senders + int32_t oldReplicaNum = pSyncNode->replicaNum; + SRaftId oldReplicasId[TSDB_MAX_REPLICA]; + memcpy(oldReplicasId, pSyncNode->replicasId, sizeof(oldReplicasId)); + SSyncSnapshotSender* oldSenders[TSDB_MAX_REPLICA]; + for (int i = 0; i < TSDB_MAX_REPLICA; ++i) { + oldSenders[i] = (pSyncNode->senders)[i]; + + char* eventLog = snapshotSender2SimpleStr(oldSenders[i], "snapshot sender save old"); + syncNodeEventLog(pSyncNode, eventLog); + taosMemoryFree(eventLog); + } + + // init internal + pSyncNode->myNodeInfo = pSyncNode->pRaftCfg->cfg.nodeInfo[pSyncNode->pRaftCfg->cfg.myIndex]; + syncUtilnodeInfo2raftId(&pSyncNode->myNodeInfo, pSyncNode->vgId, &pSyncNode->myRaftId); + + // init peersNum, peers, peersId + pSyncNode->peersNum = pSyncNode->pRaftCfg->cfg.replicaNum - 1; + int j = 0; + for (int i = 0; i < pSyncNode->pRaftCfg->cfg.replicaNum; ++i) { + if (i != pSyncNode->pRaftCfg->cfg.myIndex) { + pSyncNode->peersNodeInfo[j] = pSyncNode->pRaftCfg->cfg.nodeInfo[i]; + j++; + } + } + for (int i = 0; i < pSyncNode->peersNum; ++i) { + syncUtilnodeInfo2raftId(&pSyncNode->peersNodeInfo[i], pSyncNode->vgId, &pSyncNode->peersId[i]); + } + + // init replicaNum, replicasId + pSyncNode->replicaNum = pSyncNode->pRaftCfg->cfg.replicaNum; + for (int i = 0; i < pSyncNode->pRaftCfg->cfg.replicaNum; ++i) { + syncUtilnodeInfo2raftId(&pSyncNode->pRaftCfg->cfg.nodeInfo[i], pSyncNode->vgId, &pSyncNode->replicasId[i]); + } + + syncIndexMgrUpdate(pSyncNode->pNextIndex, pSyncNode); + syncIndexMgrUpdate(pSyncNode->pMatchIndex, pSyncNode); + voteGrantedUpdate(pSyncNode->pVotesGranted, pSyncNode); + votesRespondUpdate(pSyncNode->pVotesRespond, pSyncNode); + + pSyncNode->quorum = syncUtilQuorum(pSyncNode->pRaftCfg->cfg.replicaNum); + + // reset snapshot senders + + // clear new + for (int i = 0; i < TSDB_MAX_REPLICA; ++i) { + (pSyncNode->senders)[i] = NULL; + } + + // reset new + for (int i = 0; i < pSyncNode->replicaNum; ++i) { + // reset sender + bool reset = false; + for (int j = 0; j < TSDB_MAX_REPLICA; ++j) { + if (syncUtilSameId(&(pSyncNode->replicasId)[i], &oldReplicasId[j])) { + char host[128]; + uint16_t port; + syncUtilU642Addr((pSyncNode->replicasId)[i].addr, host, sizeof(host), &port); + + do { + char eventLog[256]; + snprintf(eventLog, sizeof(eventLog), "snapshot sender reset for %lu, newIndex:%d, %s:%d, %p", + (pSyncNode->replicasId)[i].addr, i, host, port, oldSenders[j]); + syncNodeEventLog(pSyncNode, eventLog); + } while (0); + + (pSyncNode->senders)[i] = oldSenders[j]; + oldSenders[j] = NULL; + reset = true; + + // reset replicaIndex + int32_t oldreplicaIndex = (pSyncNode->senders)[i]->replicaIndex; + (pSyncNode->senders)[i]->replicaIndex = i; + + do { + char eventLog[256]; + snprintf(eventLog, sizeof(eventLog), + "snapshot sender udpate replicaIndex from %d to %d, %s:%d, %p, reset:%d", oldreplicaIndex, i, host, + port, (pSyncNode->senders)[i], reset); + syncNodeEventLog(pSyncNode, eventLog); + } while (0); + } + } + } + + // create new + for (int i = 0; i < TSDB_MAX_REPLICA; ++i) { + if ((pSyncNode->senders)[i] == NULL) { + (pSyncNode->senders)[i] = snapshotSenderCreate(pSyncNode, i); + + char* eventLog = snapshotSender2SimpleStr((pSyncNode->senders)[i], "snapshot sender create new"); + syncNodeEventLog(pSyncNode, eventLog); + taosMemoryFree(eventLog); + } + } + + // free old + for (int i = 0; i < TSDB_MAX_REPLICA; ++i) { + if (oldSenders[i] != NULL) { + snapshotSenderDestroy(oldSenders[i]); + + do { + char eventLog[128]; + snprintf(eventLog, sizeof(eventLog), "snapshot sender delete old %p replica-index:%d", oldSenders[i], i); + syncNodeEventLog(pSyncNode, eventLog); + } while (0); + + oldSenders[i] = NULL; + } + } + + // persist + raftCfgPersist(pSyncNode->pRaftCfg); + + char tmpbuf[512]; + char* oldStr = syncCfg2SimpleStr(&oldConfig); + char* newStr = syncCfg2SimpleStr(pNewConfig); + snprintf(tmpbuf, sizeof(tmpbuf), "config change from %d to %d, index:%ld, %s --> %s", oldConfig.replicaNum, + pNewConfig->replicaNum, lastConfigChangeIndex, oldStr, newStr); + taosMemoryFree(oldStr); + taosMemoryFree(newStr); + + // change isStandBy to normal (election timeout) + if (pSyncNode->state == TAOS_SYNC_STATE_LEADER) { + syncNodeBecomeLeader(pSyncNode, tmpbuf); + } else { + syncNodeBecomeFollower(pSyncNode, tmpbuf); + } + } else { + // persist + raftCfgPersist(pSyncNode->pRaftCfg); + + char tmpbuf[512]; + char* oldStr = syncCfg2SimpleStr(&oldConfig); + char* newStr = syncCfg2SimpleStr(pNewConfig); + snprintf(tmpbuf, sizeof(tmpbuf), "do not config change from %d to %d, index:%ld, %s --> %s", oldConfig.replicaNum, + pNewConfig->replicaNum, lastConfigChangeIndex, oldStr, newStr); + taosMemoryFree(oldStr); + taosMemoryFree(newStr); + syncNodeEventLog(pSyncNode, tmpbuf); + } + +_END: + return; } SSyncNode* syncNodeAcquire(int64_t rid) { @@ -2275,47 +2317,11 @@ static int32_t syncNodeConfigChange(SSyncNode* ths, SRpcMsg* pRpcMsg, SSyncRaftE int32_t ret = syncCfgFromStr(pRpcMsg->pCont, &newSyncCfg); ASSERT(ret == 0); - // persist last config index - raftCfgAddConfigIndex(ths->pRaftCfg, pEntry->index); - raftCfgPersist(ths->pRaftCfg); - // update new config myIndex syncNodeUpdateNewConfigIndex(ths, &newSyncCfg); - bool isDrop = false; - bool IamInNew = syncNodeInConfig(ths, &newSyncCfg); - if (IamInNew) { - // do config change - syncNodeDoConfigChange(ths, &newSyncCfg, pEntry->index, &isDrop); - - // change isStandBy to normal - if (!isDrop) { - char tmpbuf[512]; - char* oldStr = syncCfg2SimpleStr(&oldSyncCfg); - char* newStr = syncCfg2SimpleStr(&newSyncCfg); - snprintf(tmpbuf, sizeof(tmpbuf), "config change from %d to %d, index:%ld, %s --> %s", oldSyncCfg.replicaNum, - newSyncCfg.replicaNum, pEntry->index, oldStr, newStr); - taosMemoryFree(oldStr); - taosMemoryFree(newStr); - - if (ths->state == TAOS_SYNC_STATE_LEADER) { - syncNodeBecomeLeader(ths, tmpbuf); - } else { - syncNodeBecomeFollower(ths, tmpbuf); - } - } - - } else { - char tmpbuf[512]; - char* oldStr = syncCfg2SimpleStr(&oldSyncCfg); - char* newStr = syncCfg2SimpleStr(&newSyncCfg); - snprintf(tmpbuf, sizeof(tmpbuf), "config change2 from %d to %d, index:%ld, %s --> %s", oldSyncCfg.replicaNum, - newSyncCfg.replicaNum, pEntry->index, oldStr, newStr); - taosMemoryFree(oldStr); - taosMemoryFree(newStr); - - syncNodeBecomeFollower(ths, tmpbuf); - } + // do config change + syncNodeDoConfigChange(ths, &newSyncCfg, pEntry->index); // set pFinish pFinish->oldCfg = oldSyncCfg; diff --git a/source/libs/sync/src/syncSnapshot.c b/source/libs/sync/src/syncSnapshot.c index a88125cf3e..c694a0b715 100644 --- a/source/libs/sync/src/syncSnapshot.c +++ b/source/libs/sync/src/syncSnapshot.c @@ -539,42 +539,9 @@ int32_t syncNodeOnSnapshotSendCb(SSyncNode *pSyncNode, SyncSnapshotSend *pMsg) { // update new config myIndex SSyncCfg newSyncCfg = pMsg->lastConfig; syncNodeUpdateNewConfigIndex(pSyncNode, &newSyncCfg); - bool IamInNew = syncNodeInConfig(pSyncNode, &newSyncCfg); - bool isDrop = false; - if (IamInNew) { - char eventLog[128]; - snprintf(eventLog, sizeof(eventLog), - "update config by snapshot, lastIndex:%ld, lastTerm:%lu, lastConfigIndex:%ld", pMsg->lastIndex, - pMsg->lastTerm, pMsg->lastConfigIndex); - syncNodeEventLog(pSyncNode, eventLog); - - syncNodeDoConfigChange(pSyncNode, &newSyncCfg, pMsg->lastConfigIndex, &isDrop); - - } else { - char eventLog[128]; - snprintf(eventLog, sizeof(eventLog), - "do not update config by snapshot, not in new, lastIndex:%ld, lastTerm:%lu, lastConfigIndex:%ld", - pMsg->lastIndex, pMsg->lastTerm, pMsg->lastConfigIndex); - syncNodeEventLog(pSyncNode, eventLog); - } - - // change isStandBy to normal - if (!isDrop) { - char tmpbuf[512]; - char *oldStr = syncCfg2SimpleStr(&oldSyncCfg); - char *newStr = syncCfg2SimpleStr(&newSyncCfg); - snprintf(tmpbuf, sizeof(tmpbuf), "config change3 from %d to %d, index:%ld, %s --> %s", - oldSyncCfg.replicaNum, newSyncCfg.replicaNum, pMsg->lastConfigIndex, oldStr, newStr); - taosMemoryFree(oldStr); - taosMemoryFree(newStr); - - if (pSyncNode->state == TAOS_SYNC_STATE_LEADER) { - syncNodeBecomeLeader(pSyncNode, tmpbuf); - } else { - syncNodeBecomeFollower(pSyncNode, tmpbuf); - } - } + // do config change + syncNodeDoConfigChange(pSyncNode, &newSyncCfg, pMsg->lastConfigIndex); } SSnapshot snapshot; From 9f765048309ce66493895644e2fbb263e088894a Mon Sep 17 00:00:00 2001 From: Minghao Li Date: Mon, 20 Jun 2022 20:10:13 +0800 Subject: [PATCH 35/40] refactor(sync): propose fail when changing --- source/libs/sync/src/syncMain.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/source/libs/sync/src/syncMain.c b/source/libs/sync/src/syncMain.c index dd2f2d3eac..599410510d 100644 --- a/source/libs/sync/src/syncMain.c +++ b/source/libs/sync/src/syncMain.c @@ -1515,7 +1515,7 @@ void syncNodeDoConfigChange(SSyncNode* pSyncNode, SSyncCfg* pNewConfig, SyncInde } } - // persist + // persist cfg raftCfgPersist(pSyncNode->pRaftCfg); char tmpbuf[512]; @@ -1533,7 +1533,7 @@ void syncNodeDoConfigChange(SSyncNode* pSyncNode, SSyncCfg* pNewConfig, SyncInde syncNodeBecomeFollower(pSyncNode, tmpbuf); } } else { - // persist + // persist cfg raftCfgPersist(pSyncNode->pRaftCfg); char tmpbuf[512]; From 48d22c359f177386f8aff6153969e4ce47bb3deb Mon Sep 17 00:00:00 2001 From: shenglian zhou Date: Mon, 20 Jun 2022 20:17:56 +0800 Subject: [PATCH 36/40] feat: order group by group id generated by partition --- source/libs/executor/inc/executorimpl.h | 3 ++- source/libs/executor/src/groupoperator.c | 33 +++++++++++++++++++----- 2 files changed, 29 insertions(+), 7 deletions(-) diff --git a/source/libs/executor/inc/executorimpl.h b/source/libs/executor/inc/executorimpl.h index c33b6622e3..9e6613d3c2 100644 --- a/source/libs/executor/inc/executorimpl.h +++ b/source/libs/executor/inc/executorimpl.h @@ -506,7 +506,8 @@ typedef struct SPartitionOperatorInfo { SDiskbasedBuf* pBuf; // query result buffer based on blocked-wised disk file int32_t rowCapacity; // maximum number of rows for each buffer page int32_t* columnOffset; // start position for each column data - void* pGroupIter; // group iterator + SArray* sortedGroupArray; // SDataGroupInfo sorted by group id + int32_t groupIndex; // group index int32_t pageIndex; // page index of current group SSDataBlock* pUpdateRes; SExprSupp scalarSup; diff --git a/source/libs/executor/src/groupoperator.c b/source/libs/executor/src/groupoperator.c index 527f4520bf..482326a30e 100644 --- a/source/libs/executor/src/groupoperator.c +++ b/source/libs/executor/src/groupoperator.c @@ -586,24 +586,30 @@ static void clearPartitionOperator(SPartitionOperatorInfo* pInfo) { while( (ite = taosHashIterate(pInfo->pGroupSet, ite)) != NULL ) { taosArrayDestroy( ((SDataGroupInfo *)ite)->pPageList); } - taosHashClear(pInfo->pGroupSet); + taosArrayClear(pInfo->sortedGroupArray); clearDiskbasedBuf(pInfo->pBuf); } +static int compareDataGroupInfo(const void* group1, const void* group2) { + const SDataGroupInfo* pGroupInfo1 = group1; + const SDataGroupInfo* pGroupInfo2 = group2; + return pGroupInfo1->groupId - pGroupInfo2->groupId; +} + static SSDataBlock* buildPartitionResult(SOperatorInfo* pOperator) { SPartitionOperatorInfo* pInfo = pOperator->info; - SDataGroupInfo* pGroupInfo = pInfo->pGroupIter; - if (pInfo->pGroupIter == NULL || pInfo->pageIndex >= taosArrayGetSize(pGroupInfo->pPageList)) { + SDataGroupInfo* pGroupInfo = (pInfo->groupIndex != -1) ? taosArrayGet(pInfo->sortedGroupArray, pInfo->groupIndex) : NULL; + if (pInfo->groupIndex == -1 || pInfo->pageIndex >= taosArrayGetSize(pGroupInfo->pPageList)) { // try next group data - pInfo->pGroupIter = taosHashIterate(pInfo->pGroupSet, pInfo->pGroupIter); - if (pInfo->pGroupIter == NULL) { + ++pInfo->groupIndex; + if (pInfo->groupIndex >= taosArrayGetSize(pInfo->sortedGroupArray)) { doSetOperatorCompleted(pOperator); clearPartitionOperator(pInfo); return NULL; } - pGroupInfo = pInfo->pGroupIter; + pGroupInfo = taosArrayGet(pInfo->sortedGroupArray, pInfo->groupIndex); pInfo->pageIndex = 0; } @@ -657,6 +663,20 @@ static SSDataBlock* hashPartition(SOperatorInfo* pOperator) { doHashPartition(pOperator, pBlock); } + SArray* groupArray = taosArrayInit(taosHashGetSize(pInfo->pGroupSet), sizeof(SDataGroupInfo)); + void* pGroupIter = NULL; + pGroupIter = taosHashIterate(pInfo->pGroupSet, NULL); + while (pGroupIter != NULL) { + SDataGroupInfo* pGroupInfo = pGroupIter; + taosArrayPush(groupArray, pGroupInfo); + pGroupIter = taosHashIterate(pInfo->pGroupSet, pGroupIter); + } + + taosArraySort(groupArray, compareDataGroupInfo); + pInfo->sortedGroupArray = groupArray; + pInfo->groupIndex = -1; + taosHashClear(pInfo->pGroupSet); + pOperator->cost.openCost = (taosGetTimestampUs() - st) / 1000.0; pOperator->status = OP_RES_TO_RETURN; @@ -676,6 +696,7 @@ static void destroyPartitionOperatorInfo(void* param, int32_t numOfOutput) { taosArrayDestroy(pInfo->pGroupColVals); taosMemoryFree(pInfo->keyBuf); + taosArrayDestroy(pInfo->sortedGroupArray); taosHashCleanup(pInfo->pGroupSet); taosMemoryFree(pInfo->columnOffset); From 450675f67224c2484180e4cedc44ae503d602086 Mon Sep 17 00:00:00 2001 From: Liu Jicong Date: Mon, 20 Jun 2022 19:53:48 +0800 Subject: [PATCH 37/40] feat(stream): support snode --- include/libs/stream/tstream.h | 13 +- source/common/src/tglobal.c | 2 +- source/dnode/mgmt/mgmt_snode/src/smHandle.c | 10 +- source/dnode/mgmt/mgmt_snode/src/smWorker.c | 2 + source/dnode/mgmt/mgmt_vnode/inc/vmInt.h | 2 - source/dnode/mgmt/mgmt_vnode/src/vmInt.c | 1 - source/dnode/mgmt/mgmt_vnode/src/vmWorker.c | 15 +- source/dnode/mnode/impl/src/mndScheduler.c | 66 ++++--- source/dnode/snode/src/snode.c | 23 ++- source/dnode/vnode/src/inc/vnodeInt.h | 40 ++-- source/dnode/vnode/src/tq/tq.c | 14 +- source/dnode/vnode/src/vnd/vnodeSvr.c | 13 +- source/libs/executor/src/executorimpl.c | 7 +- source/libs/stream/src/stream.c | 30 +-- source/libs/stream/src/streamDispatch.c | 4 +- source/libs/stream/src/streamExec.c | 11 +- tests/script/jenkins/basic.txt | 1 + .../tsim/stream/distributeInterval0.sim | 2 +- .../script/tsim/stream/distributesession0.sim | 2 +- tests/script/tsim/stream/schedSnode.sim | 173 ++++++++++++++++++ 20 files changed, 312 insertions(+), 119 deletions(-) create mode 100644 tests/script/tsim/stream/schedSnode.sim diff --git a/include/libs/stream/tstream.h b/include/libs/stream/tstream.h index 2b3a1f2650..937ac2b408 100644 --- a/include/libs/stream/tstream.h +++ b/include/libs/stream/tstream.h @@ -152,7 +152,6 @@ void* streamDataBlockDecode(const void* buf, SStreamDataBlock* pInput); typedef struct { char* qmsg; // followings are not applicable to encoder and decoder - // void* inputHandle; void* executor; } STaskExec; @@ -400,15 +399,13 @@ typedef struct { int32_t tDecodeStreamDispatchReq(SDecoder* pDecoder, SStreamDispatchReq* pReq); -int32_t streamLaunchByWrite(SStreamTask* pTask, int32_t vgId, SMsgCb* pMsgCb); +int32_t streamLaunchByWrite(SStreamTask* pTask, int32_t vgId); int32_t streamSetupTrigger(SStreamTask* pTask); -int32_t streamTaskRun(SStreamTask* pTask); - -int32_t streamTaskProcessRunReq(SStreamTask* pTask, SMsgCb* pMsgCb); -int32_t streamProcessDispatchReq(SStreamTask* pTask, SMsgCb* pMsgCb, SStreamDispatchReq* pReq, SRpcMsg* pMsg); -int32_t streamProcessDispatchRsp(SStreamTask* pTask, SMsgCb* pMsgCb, SStreamDispatchRsp* pRsp); -int32_t streamProcessRecoverReq(SStreamTask* pTask, SMsgCb* pMsgCb, SStreamTaskRecoverReq* pReq, SRpcMsg* pMsg); +int32_t streamProcessRunReq(SStreamTask* pTask); +int32_t streamProcessDispatchReq(SStreamTask* pTask, SStreamDispatchReq* pReq, SRpcMsg* pMsg); +int32_t streamProcessDispatchRsp(SStreamTask* pTask, SStreamDispatchRsp* pRsp); +int32_t streamProcessRecoverReq(SStreamTask* pTask, SStreamTaskRecoverReq* pReq, SRpcMsg* pMsg); int32_t streamProcessRecoverRsp(SStreamTask* pTask, SStreamTaskRecoverRsp* pRsp); #ifdef __cplusplus diff --git a/source/common/src/tglobal.c b/source/common/src/tglobal.c index 7457fe7eb6..92eda0c5e0 100644 --- a/source/common/src/tglobal.c +++ b/source/common/src/tglobal.c @@ -161,7 +161,7 @@ int32_t tsDiskCfgNum = 0; SDiskCfg tsDiskCfg[TFS_MAX_DISKS] = {0}; // stream scheduler -bool tsStreamSchedV = true; +bool tsSchedStreamToSnode = true; /* * minimum scale for whole system, millisecond by default diff --git a/source/dnode/mgmt/mgmt_snode/src/smHandle.c b/source/dnode/mgmt/mgmt_snode/src/smHandle.c index 52a69f95b4..81576e153e 100644 --- a/source/dnode/mgmt/mgmt_snode/src/smHandle.c +++ b/source/dnode/mgmt/mgmt_snode/src/smHandle.c @@ -96,11 +96,11 @@ SArray *smGetMsgHandles() { if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_DEPLOY, smPutNodeMsgToMgmtQueue, 1) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_STREAM_TASK_DROP, smPutNodeMsgToMgmtQueue, 1) == NULL) goto _OVER; - if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_RUN, smPutNodeMsgToMgmtQueue, 1) == NULL) goto _OVER; - if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_DISPATCH, smPutNodeMsgToMgmtQueue, 1) == NULL) goto _OVER; - if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_DISPATCH_RSP, smPutNodeMsgToMgmtQueue, 1) == NULL) goto _OVER; - if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_RECOVER, smPutNodeMsgToMgmtQueue, 1) == NULL) goto _OVER; - if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_RECOVER_RSP, smPutNodeMsgToMgmtQueue, 1) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_RUN, smPutNodeMsgToSharedQueue, 1) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_DISPATCH, smPutNodeMsgToSharedQueue, 1) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_DISPATCH_RSP, smPutNodeMsgToSharedQueue, 1) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_RECOVER, smPutNodeMsgToSharedQueue, 1) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_RECOVER_RSP, smPutNodeMsgToSharedQueue, 1) == NULL) goto _OVER; code = 0; _OVER: diff --git a/source/dnode/mgmt/mgmt_snode/src/smWorker.c b/source/dnode/mgmt/mgmt_snode/src/smWorker.c index 8d93ddd66c..19c1b9b5c7 100644 --- a/source/dnode/mgmt/mgmt_snode/src/smWorker.c +++ b/source/dnode/mgmt/mgmt_snode/src/smWorker.c @@ -58,6 +58,7 @@ static void smProcessUniqueQueue(SQueueInfo *pInfo, STaosQall *qall, int32_t num if (sndProcessUMsg(pMgmt->pSnode, pMsg) < 0) { ASSERT(0); } + smSendRsp(pMsg, 0); dTrace("msg:%p, is freed", pMsg); rpcFreeCont(pMsg->pCont); @@ -70,6 +71,7 @@ static void smProcessSharedQueue(SQueueInfo *pInfo, SRpcMsg *pMsg) { dTrace("msg:%p, get from snode-shared queue", pMsg); if (sndProcessSMsg(pMgmt->pSnode, pMsg) < 0) { + smSendRsp(pMsg, terrno); ASSERT(0); } diff --git a/source/dnode/mgmt/mgmt_vnode/inc/vmInt.h b/source/dnode/mgmt/mgmt_vnode/inc/vmInt.h index 7e5379b0f8..6f00767eb0 100644 --- a/source/dnode/mgmt/mgmt_vnode/inc/vmInt.h +++ b/source/dnode/mgmt/mgmt_vnode/inc/vmInt.h @@ -35,7 +35,6 @@ typedef struct SVnodeMgmt { SWWorkerPool syncPool; SWWorkerPool writePool; SWWorkerPool applyPool; - SWWorkerPool mergePool; SSingleWorker mgmtWorker; SSingleWorker monitorWorker; SHashObj *hash; @@ -63,7 +62,6 @@ typedef struct { STaosQueue *pApplyQ; STaosQueue *pQueryQ; STaosQueue *pFetchQ; - STaosQueue *pMergeQ; } SVnodeObj; typedef struct { diff --git a/source/dnode/mgmt/mgmt_vnode/src/vmInt.c b/source/dnode/mgmt/mgmt_vnode/src/vmInt.c index 9e4e7713f2..3f053639aa 100644 --- a/source/dnode/mgmt/mgmt_vnode/src/vmInt.c +++ b/source/dnode/mgmt/mgmt_vnode/src/vmInt.c @@ -86,7 +86,6 @@ void vmCloseVnode(SVnodeMgmt *pMgmt, SVnodeObj *pVnode) { while (!taosQueueEmpty(pVnode->pApplyQ)) taosMsleep(10); while (!taosQueueEmpty(pVnode->pQueryQ)) taosMsleep(10); while (!taosQueueEmpty(pVnode->pFetchQ)) taosMsleep(10); - while (!taosQueueEmpty(pVnode->pMergeQ)) taosMsleep(10); vmFreeQueue(pMgmt, pVnode); vnodeClose(pVnode->pImpl); diff --git a/source/dnode/mgmt/mgmt_vnode/src/vmWorker.c b/source/dnode/mgmt/mgmt_vnode/src/vmWorker.c index 95dd5732c6..71bbc8ddd4 100644 --- a/source/dnode/mgmt/mgmt_vnode/src/vmWorker.c +++ b/source/dnode/mgmt/mgmt_vnode/src/vmWorker.c @@ -98,7 +98,7 @@ static void vmProcessFetchQueue(SQueueInfo *pInfo, SRpcMsg *pMsg) { static void vmProcessSyncQueue(SQueueInfo *pInfo, STaosQall *qall, int32_t numOfMsgs) { SVnodeObj *pVnode = pInfo->ahandle; - SRpcMsg * pMsg = NULL; + SRpcMsg *pMsg = NULL; for (int32_t i = 0; i < numOfMsgs; ++i) { if (taosGetQitem(qall, (void **)&pMsg) == 0) continue; @@ -119,7 +119,7 @@ static void vmProcessSyncQueue(SQueueInfo *pInfo, STaosQall *qall, int32_t numOf static void vmProcessMergeQueue(SQueueInfo *pInfo, STaosQall *qall, int32_t numOfMsgs) { SVnodeObj *pVnode = pInfo->ahandle; - SRpcMsg * pMsg = NULL; + SRpcMsg *pMsg = NULL; for (int32_t i = 0; i < numOfMsgs; ++i) { if (taosGetQitem(qall, (void **)&pMsg) == 0) continue; @@ -251,10 +251,9 @@ int32_t vmAllocQueue(SVnodeMgmt *pMgmt, SVnodeObj *pVnode) { pVnode->pApplyQ = tWWorkerAllocQueue(&pMgmt->applyPool, pVnode->pImpl, (FItems)vnodeApplyMsg); pVnode->pQueryQ = tQWorkerAllocQueue(&pMgmt->queryPool, pVnode, (FItem)vmProcessQueryQueue); pVnode->pFetchQ = tQWorkerAllocQueue(&pMgmt->fetchPool, pVnode, (FItem)vmProcessFetchQueue); - pVnode->pMergeQ = tWWorkerAllocQueue(&pMgmt->mergePool, pVnode, (FItems)vmProcessMergeQueue); if (pVnode->pWriteQ == NULL || pVnode->pSyncQ == NULL || pVnode->pApplyQ == NULL || pVnode->pQueryQ == NULL || - pVnode->pFetchQ == NULL || pVnode->pMergeQ == NULL) { + pVnode->pFetchQ == NULL) { terrno = TSDB_CODE_OUT_OF_MEMORY; return -1; } @@ -269,13 +268,11 @@ void vmFreeQueue(SVnodeMgmt *pMgmt, SVnodeObj *pVnode) { tWWorkerFreeQueue(&pMgmt->syncPool, pVnode->pSyncQ); tQWorkerFreeQueue(&pMgmt->queryPool, pVnode->pQueryQ); tQWorkerFreeQueue(&pMgmt->fetchPool, pVnode->pFetchQ); - tWWorkerFreeQueue(&pMgmt->mergePool, pVnode->pMergeQ); pVnode->pWriteQ = NULL; pVnode->pSyncQ = NULL; pVnode->pApplyQ = NULL; pVnode->pQueryQ = NULL; pVnode->pFetchQ = NULL; - pVnode->pMergeQ = NULL; dDebug("vgId:%d, queue is freed", pVnode->vgId); } @@ -307,11 +304,6 @@ int32_t vmStartWorker(SVnodeMgmt *pMgmt) { pSPool->max = tsNumOfVnodeSyncThreads; if (tWWorkerInit(pSPool) != 0) return -1; - SWWorkerPool *pMPool = &pMgmt->mergePool; - pMPool->name = "vnode-merge"; - pMPool->max = tsNumOfVnodeMergeThreads; - if (tWWorkerInit(pMPool) != 0) return -1; - SSingleWorkerCfg mgmtCfg = { .min = 1, .max = 1, @@ -342,6 +334,5 @@ void vmStopWorker(SVnodeMgmt *pMgmt) { tWWorkerCleanup(&pMgmt->syncPool); tQWorkerCleanup(&pMgmt->queryPool); tQWorkerCleanup(&pMgmt->fetchPool); - tWWorkerCleanup(&pMgmt->mergePool); dDebug("vnode workers are closed"); } diff --git a/source/dnode/mnode/impl/src/mndScheduler.c b/source/dnode/mnode/impl/src/mndScheduler.c index 3ff0c39bc3..3bd9a9128d 100644 --- a/source/dnode/mnode/impl/src/mndScheduler.c +++ b/source/dnode/mnode/impl/src/mndScheduler.c @@ -33,7 +33,7 @@ #include "tname.h" #include "tuuid.h" -extern bool tsStreamSchedV; +extern bool tsSchedStreamToSnode; static int32_t mndAddTaskToTaskSet(SArray* pArray, SStreamTask* pTask) { int32_t childId = taosArrayGetSize(pArray); @@ -204,9 +204,11 @@ int32_t mndAssignTaskToVg(SMnode* pMnode, STrans* pTrans, SStreamTask* pTask, SS return 0; } -SSnodeObj* mndSchedFetchSnode(SMnode* pMnode) { +SSnodeObj* mndSchedFetchOneSnode(SMnode* pMnode) { SSnodeObj* pObj = NULL; - pObj = sdbFetch(pMnode->pSdb, SDB_SNODE, NULL, (void**)&pObj); + void* pIter = NULL; + // TODO random fetch + pIter = sdbFetch(pMnode->pSdb, SDB_SNODE, pIter, (void**)&pObj); return pObj; } @@ -214,7 +216,7 @@ int32_t mndAssignTaskToSnode(SMnode* pMnode, STrans* pTrans, SStreamTask* pTask, const SSnodeObj* pSnode) { int32_t msgLen; - pTask->nodeId = 0; + pTask->nodeId = SNODE_HANDLE; pTask->epSet = mndAcquireEpFromSnode(pMnode, pSnode); plan->execNode.nodeId = 0; @@ -224,7 +226,7 @@ int32_t mndAssignTaskToSnode(SMnode* pMnode, STrans* pTrans, SStreamTask* pTask, terrno = TSDB_CODE_QRY_INVALID_INPUT; return -1; } - mndPersistTaskDeployReq(pTrans, pTask, &plan->execNode.epSet, TDMT_STREAM_TASK_DEPLOY, 0); + mndPersistTaskDeployReq(pTrans, pTask, &plan->execNode.epSet, TDMT_STREAM_TASK_DEPLOY, SNODE_HANDLE); return 0; } @@ -370,8 +372,8 @@ int32_t mndScheduleStream(SMnode* pMnode, STrans* pTrans, SStreamObj* pStream) { } if (totLevel > 1) { - SStreamTask* pFinalTask; - // inner plan + SStreamTask* pInnerTask; + // inner level { SArray* taskInnerLevel = taosArrayInit(0, sizeof(void*)); taosArrayPush(pStream->tasks, &taskInnerLevel); @@ -380,31 +382,51 @@ int32_t mndScheduleStream(SMnode* pMnode, STrans* pTrans, SStreamObj* pStream) { SSubplan* plan = (SSubplan*)nodesListGetNode(inner->pNodeList, 0); ASSERT(plan->subplanType == SUBPLAN_TYPE_MERGE); - pFinalTask = tNewSStreamTask(pStream->uid); - mndAddTaskToTaskSet(taskInnerLevel, pFinalTask); + pInnerTask = tNewSStreamTask(pStream->uid); + mndAddTaskToTaskSet(taskInnerLevel, pInnerTask); // input - pFinalTask->inputType = TASK_INPUT_TYPE__DATA_BLOCK; + pInnerTask->inputType = TASK_INPUT_TYPE__DATA_BLOCK; // trigger - pFinalTask->triggerParam = pStream->triggerParam; + pInnerTask->triggerParam = pStream->triggerParam; // dispatch - if (mndAddDispatcherToInnerTask(pMnode, pTrans, pStream, pFinalTask) < 0) { + if (mndAddDispatcherToInnerTask(pMnode, pTrans, pStream, pInnerTask) < 0) { qDestroyQueryPlan(pPlan); return -1; } // exec - pFinalTask->execType = TASK_EXEC__PIPE; - SVgObj* pVgroup = mndSchedFetchOneVg(pMnode, pStream->sourceDbUid); - if (mndAssignTaskToVg(pMnode, pTrans, pFinalTask, plan, pVgroup) < 0) { - sdbRelease(pSdb, pVgroup); - qDestroyQueryPlan(pPlan); - return -1; + pInnerTask->execType = TASK_EXEC__PIPE; + + if (tsSchedStreamToSnode) { + SSnodeObj* pSnode = mndSchedFetchOneSnode(pMnode); + if (pSnode == NULL) { + SVgObj* pVgroup = mndSchedFetchOneVg(pMnode, pStream->sourceDbUid); + if (mndAssignTaskToVg(pMnode, pTrans, pInnerTask, plan, pVgroup) < 0) { + sdbRelease(pSdb, pVgroup); + qDestroyQueryPlan(pPlan); + return -1; + } + } else { + if (mndAssignTaskToSnode(pMnode, pTrans, pInnerTask, plan, pSnode) < 0) { + ASSERT(0); + sdbRelease(pSdb, pSnode); + qDestroyQueryPlan(pPlan); + return -1; + } + } + } else { + SVgObj* pVgroup = mndSchedFetchOneVg(pMnode, pStream->sourceDbUid); + if (mndAssignTaskToVg(pMnode, pTrans, pInnerTask, plan, pVgroup) < 0) { + sdbRelease(pSdb, pVgroup); + qDestroyQueryPlan(pPlan); + return -1; + } } } - // source plan + // source level SArray* taskSourceLevel = taosArrayInit(0, sizeof(void*)); taosArrayPush(pStream->tasks, &taskSourceLevel); @@ -434,9 +456,9 @@ int32_t mndScheduleStream(SMnode* pMnode, STrans* pTrans, SStreamObj* pStream) { pTask->dispatchMsgType = TDMT_STREAM_TASK_DISPATCH; pTask->dispatchType = TASK_DISPATCH__FIXED; - pTask->fixedEpDispatcher.taskId = pFinalTask->taskId; - pTask->fixedEpDispatcher.nodeId = pFinalTask->nodeId; - pTask->fixedEpDispatcher.epSet = pFinalTask->epSet; + pTask->fixedEpDispatcher.taskId = pInnerTask->taskId; + pTask->fixedEpDispatcher.nodeId = pInnerTask->nodeId; + pTask->fixedEpDispatcher.epSet = pInnerTask->epSet; // exec pTask->execType = TASK_EXEC__PIPE; diff --git a/source/dnode/snode/src/snode.c b/source/dnode/snode/src/snode.c index 8ef48ccbf9..84a66c680b 100644 --- a/source/dnode/snode/src/snode.c +++ b/source/dnode/snode/src/snode.c @@ -78,8 +78,8 @@ int32_t sndMetaRemoveTask(SStreamMeta *pMeta, int32_t taskId) { static int32_t sndProcessTaskDeployReq(SSnode *pNode, SRpcMsg *pMsg) { SStreamMeta *pMeta = pNode->pMeta; - char *msg = pMsg->pCont; - int32_t msgLen = pMsg->contLen; + char *msg = POINTER_SHIFT(pMsg->pCont, sizeof(SMsgHead)); + int32_t msgLen = pMsg->contLen - sizeof(SMsgHead); SStreamTask *pTask = taosMemoryCalloc(1, sizeof(SStreamTask)); if (pTask == NULL) { @@ -105,23 +105,22 @@ static int32_t sndProcessTaskDeployReq(SSnode *pNode, SRpcMsg *pMsg) { ASSERT(pTask->execType != TASK_EXEC__NONE); - SReadHandle handle = { - .pMsgCb = &pNode->msgCb, - }; - - /*pTask->exec.inputHandle = NULL;*/ - pTask->exec.executor = qCreateStreamExecTaskInfo(pTask->exec.qmsg, &handle); + ASSERT(pTask->dataScan == 0); + pTask->exec.executor = qCreateStreamExecTaskInfo(pTask->exec.qmsg, NULL); ASSERT(pTask->exec.executor); streamSetupTrigger(pTask); qInfo("deploy stream: stream id %ld task id %d child id %d on snode", pTask->streamId, pTask->taskId, pTask->childId); + taosHashPut(pMeta->pHash, &pTask->taskId, sizeof(int32_t), &pTask, sizeof(void *)); + return 0; FAIL: if (pTask->inputQueue) streamQueueClose(pTask->inputQueue); if (pTask->outputQueue) streamQueueClose(pTask->outputQueue); + if (pTask) taosMemoryFree(pTask); return -1; } @@ -130,7 +129,7 @@ static int32_t sndProcessTaskRunReq(SSnode *pNode, SRpcMsg *pMsg) { SStreamTaskRunReq *pReq = pMsg->pCont; int32_t taskId = pReq->taskId; SStreamTask *pTask = *(SStreamTask **)taosHashGet(pMeta->pHash, &taskId, sizeof(int32_t)); - streamTaskProcessRunReq(pTask, &pNode->msgCb); + streamProcessRunReq(pTask); return 0; } @@ -151,7 +150,7 @@ static int32_t sndProcessTaskDispatchReq(SSnode *pNode, SRpcMsg *pMsg) { .info = pMsg->info, .code = 0, }; - streamProcessDispatchReq(pTask, &pNode->msgCb, &req, &rsp); + streamProcessDispatchReq(pTask, &req, &rsp); return 0; } @@ -161,7 +160,7 @@ static int32_t sndProcessTaskRecoverReq(SSnode *pNode, SRpcMsg *pMsg) { SStreamTaskRecoverReq *pReq = pMsg->pCont; int32_t taskId = pReq->taskId; SStreamTask *pTask = *(SStreamTask **)taosHashGet(pMeta->pHash, &taskId, sizeof(int32_t)); - streamProcessRecoverReq(pTask, &pNode->msgCb, pReq, pMsg); + streamProcessRecoverReq(pTask, pReq, pMsg); return 0; } @@ -171,7 +170,7 @@ static int32_t sndProcessTaskDispatchRsp(SSnode *pNode, SRpcMsg *pMsg) { SStreamDispatchRsp *pRsp = POINTER_SHIFT(pMsg->pCont, sizeof(SMsgHead)); int32_t taskId = pRsp->taskId; SStreamTask *pTask = *(SStreamTask **)taosHashGet(pMeta->pHash, &taskId, sizeof(int32_t)); - streamProcessDispatchRsp(pTask, &pNode->msgCb, pRsp); + streamProcessDispatchRsp(pTask, pRsp); return 0; } diff --git a/source/dnode/vnode/src/inc/vnodeInt.h b/source/dnode/vnode/src/inc/vnodeInt.h index 70b6e24b07..c0dfebb08f 100644 --- a/source/dnode/vnode/src/inc/vnodeInt.h +++ b/source/dnode/vnode/src/inc/vnodeInt.h @@ -87,7 +87,7 @@ int metaAlterSTable(SMeta* pMeta, int64_t version, SVCreateStbReq* p int metaDropSTable(SMeta* pMeta, int64_t verison, SVDropStbReq* pReq); int metaCreateTable(SMeta* pMeta, int64_t version, SVCreateTbReq* pReq); int metaDropTable(SMeta* pMeta, int64_t version, SVDropTbReq* pReq, SArray* tbUids); -int metaTtlDropTable(SMeta *pMeta, int64_t ttl, SArray *tbUids); +int metaTtlDropTable(SMeta* pMeta, int64_t ttl, SArray* tbUids); int metaAlterTable(SMeta* pMeta, int64_t version, SVAlterTbReq* pReq, STableMetaRsp* pMetaRsp); SSchemaWrapper* metaGetTableSchema(SMeta* pMeta, tb_uid_t uid, int32_t sver, bool isinline); STSchema* metaGetTbTSchema(SMeta* pMeta, tb_uid_t uid, int32_t sver); @@ -106,28 +106,28 @@ int32_t metaSnapshotReaderClose(SMetaSnapshotReader* pReader); int32_t metaSnapshotRead(SMetaSnapshotReader* pReader, void** ppData, uint32_t* nData); void* metaGetIdx(SMeta* pMeta); void* metaGetIvtIdx(SMeta* pMeta); -int metaTtlSmaller(SMeta *pMeta, uint64_t time, SArray *uidList); +int metaTtlSmaller(SMeta* pMeta, uint64_t time, SArray* uidList); int32_t metaCreateTSma(SMeta* pMeta, int64_t version, SSmaCfg* pCfg); int32_t metaDropTSma(SMeta* pMeta, int64_t indexUid); // tsdb -int tsdbOpen(SVnode* pVnode, STsdb** ppTsdb, const char* dir, STsdbKeepCfg* pKeepCfg); -int tsdbClose(STsdb** pTsdb); -int32_t tsdbBegin(STsdb* pTsdb); -int32_t tsdbCommit(STsdb* pTsdb); -int tsdbScanAndConvertSubmitMsg(STsdb* pTsdb, SSubmitReq* pMsg); -int tsdbInsertData(STsdb* pTsdb, int64_t version, SSubmitReq* pMsg, SSubmitRsp* pRsp); -int32_t tsdbInsertTableData(STsdb* pTsdb, int64_t version, SSubmitMsgIter* pMsgIter, SSubmitBlk* pBlock, - SSubmitBlkRsp* pRsp); -int32_t tsdbDeleteTableData(STsdb* pTsdb, int64_t version, tb_uid_t suid, tb_uid_t uid, TSKEY sKey, TSKEY eKey); -tsdbReaderT tsdbReaderOpen(SVnode* pVnode, SQueryTableDataCond* pCond, STableListInfo* tableList, uint64_t qId, - uint64_t taskId); -tsdbReaderT tsdbQueryCacheLastT(STsdb* tsdb, SQueryTableDataCond* pCond, STableListInfo* tableList, uint64_t qId, - void* pMemRef); -int32_t tsdbSnapshotReaderOpen(STsdb* pTsdb, STsdbSnapshotReader** ppReader, int64_t sver, int64_t ever); -int32_t tsdbSnapshotReaderClose(STsdbSnapshotReader* pReader); -int32_t tsdbSnapshotRead(STsdbSnapshotReader* pReader, void** ppData, uint32_t* nData); +int tsdbOpen(SVnode* pVnode, STsdb** ppTsdb, const char* dir, STsdbKeepCfg* pKeepCfg); +int tsdbClose(STsdb** pTsdb); +int32_t tsdbBegin(STsdb* pTsdb); +int32_t tsdbCommit(STsdb* pTsdb); +int tsdbScanAndConvertSubmitMsg(STsdb* pTsdb, SSubmitReq* pMsg); +int tsdbInsertData(STsdb* pTsdb, int64_t version, SSubmitReq* pMsg, SSubmitRsp* pRsp); +int32_t tsdbInsertTableData(STsdb* pTsdb, int64_t version, SSubmitMsgIter* pMsgIter, SSubmitBlk* pBlock, + SSubmitBlkRsp* pRsp); +int32_t tsdbDeleteTableData(STsdb* pTsdb, int64_t version, tb_uid_t suid, tb_uid_t uid, TSKEY sKey, TSKEY eKey); +tsdbReaderT tsdbReaderOpen(SVnode* pVnode, SQueryTableDataCond* pCond, STableListInfo* tableList, uint64_t qId, + uint64_t taskId); +tsdbReaderT tsdbQueryCacheLastT(STsdb* tsdb, SQueryTableDataCond* pCond, STableListInfo* tableList, uint64_t qId, + void* pMemRef); +int32_t tsdbSnapshotReaderOpen(STsdb* pTsdb, STsdbSnapshotReader** ppReader, int64_t sver, int64_t ever); +int32_t tsdbSnapshotReaderClose(STsdbSnapshotReader* pReader); +int32_t tsdbSnapshotRead(STsdbSnapshotReader* pReader, void** ppData, uint32_t* nData); // tq int tqInit(); @@ -141,7 +141,7 @@ int32_t tqProcessVgChangeReq(STQ* pTq, char* msg, int32_t msgLen); int32_t tqProcessVgDeleteReq(STQ* pTq, char* msg, int32_t msgLen); int32_t tqProcessOffsetCommitReq(STQ* pTq, char* msg, int32_t msgLen); int32_t tqProcessPollReq(STQ* pTq, SRpcMsg* pMsg, int32_t workerId); -int32_t tqProcessTaskDeploy(STQ* pTq, char* msg, int32_t msgLen); +int32_t tqProcessTaskDeployReq(STQ* pTq, char* msg, int32_t msgLen); int32_t tqProcessTaskDropReq(STQ* pTq, char* msg, int32_t msgLen); int32_t tqProcessStreamTrigger(STQ* pTq, SSubmitReq* data); int32_t tqProcessTaskRunReq(STQ* pTq, SRpcMsg* pMsg); @@ -262,7 +262,7 @@ struct SSma { #define SMA_CFG(s) (&(s)->pVnode->config) #define SMA_TSDB_CFG(s) (&(s)->pVnode->config.tsdbCfg) -#define SMA_RETENTION(s) ((SRetention *)&(s)->pVnode->config.tsdbCfg.retentions) +#define SMA_RETENTION(s) ((SRetention*)&(s)->pVnode->config.tsdbCfg.retentions) #define SMA_LOCKED(s) ((s)->locked) #define SMA_META(s) ((s)->pVnode->pMeta) #define SMA_VID(s) TD_VID((s)->pVnode) diff --git a/source/dnode/vnode/src/tq/tq.c b/source/dnode/vnode/src/tq/tq.c index 06a119b076..ece4b7e2a4 100644 --- a/source/dnode/vnode/src/tq/tq.c +++ b/source/dnode/vnode/src/tq/tq.c @@ -333,7 +333,6 @@ int32_t tqProcessVgChangeReq(STQ* pTq, char* msg, int32_t msgLen) { SReadHandle handle = { .reader = pHandle->execHandle.pExecReader[i], .meta = pTq->pVnode->pMeta, - .pMsgCb = &pTq->pVnode->msgCb, }; pHandle->execHandle.execCol.task[i] = qCreateStreamExecTaskInfo(pHandle->execHandle.execCol.qmsg, &handle); ASSERT(pHandle->execHandle.execCol.task[i]); @@ -373,7 +372,7 @@ int32_t tqProcessVgChangeReq(STQ* pTq, char* msg, int32_t msgLen) { return 0; } -int32_t tqProcessTaskDeploy(STQ* pTq, char* msg, int32_t msgLen) { +int32_t tqProcessTaskDeployReq(STQ* pTq, char* msg, int32_t msgLen) { SStreamTask* pTask = taosMemoryCalloc(1, sizeof(SStreamTask)); if (pTask == NULL) { return -1; @@ -404,7 +403,6 @@ int32_t tqProcessTaskDeploy(STQ* pTq, char* msg, int32_t msgLen) { SReadHandle handle = { .reader = pStreamReader, .meta = pTq->pVnode->pMeta, - .pMsgCb = &pTq->pVnode->msgCb, .vnode = pTq->pVnode, }; /*pTask->exec.inputHandle = pStreamReader;*/ @@ -468,7 +466,7 @@ int32_t tqProcessStreamTrigger(STQ* pTq, SSubmitReq* pReq) { continue; } - if (streamLaunchByWrite(pTask, TD_VID(pTq->pVnode), &pTq->pVnode->msgCb) < 0) { + if (streamLaunchByWrite(pTask, TD_VID(pTq->pVnode)) < 0) { continue; } } else { @@ -489,7 +487,7 @@ int32_t tqProcessTaskRunReq(STQ* pTq, SRpcMsg* pMsg) { SStreamTaskRunReq* pReq = pMsg->pCont; int32_t taskId = pReq->taskId; SStreamTask* pTask = *(SStreamTask**)taosHashGet(pTq->pStreamTasks, &taskId, sizeof(int32_t)); - streamTaskProcessRunReq(pTask, &pTq->pVnode->msgCb); + streamProcessRunReq(pTask); return 0; } @@ -507,7 +505,7 @@ int32_t tqProcessTaskDispatchReq(STQ* pTq, SRpcMsg* pMsg) { .info = pMsg->info, .code = 0, }; - streamProcessDispatchReq(pTask, &pTq->pVnode->msgCb, &req, &rsp); + streamProcessDispatchReq(pTask, &req, &rsp); return 0; } @@ -515,7 +513,7 @@ int32_t tqProcessTaskRecoverReq(STQ* pTq, SRpcMsg* pMsg) { SStreamTaskRecoverReq* pReq = pMsg->pCont; int32_t taskId = pReq->taskId; SStreamTask* pTask = *(SStreamTask**)taosHashGet(pTq->pStreamTasks, &taskId, sizeof(int32_t)); - streamProcessRecoverReq(pTask, &pTq->pVnode->msgCb, pReq, pMsg); + streamProcessRecoverReq(pTask, pReq, pMsg); return 0; } @@ -523,7 +521,7 @@ int32_t tqProcessTaskDispatchRsp(STQ* pTq, SRpcMsg* pMsg) { SStreamDispatchRsp* pRsp = POINTER_SHIFT(pMsg->pCont, sizeof(SMsgHead)); int32_t taskId = pRsp->taskId; SStreamTask* pTask = *(SStreamTask**)taosHashGet(pTq->pStreamTasks, &taskId, sizeof(int32_t)); - streamProcessDispatchRsp(pTask, &pTq->pVnode->msgCb, pRsp); + streamProcessDispatchRsp(pTask, pRsp); return 0; } diff --git a/source/dnode/vnode/src/vnd/vnodeSvr.c b/source/dnode/vnode/src/vnd/vnodeSvr.c index fb22b7c5bf..c097e2f929 100644 --- a/source/dnode/vnode/src/vnd/vnodeSvr.c +++ b/source/dnode/vnode/src/vnd/vnodeSvr.c @@ -167,8 +167,8 @@ int32_t vnodeProcessWriteReq(SVnode *pVnode, SRpcMsg *pMsg, int64_t version, SRp } break; case TDMT_STREAM_TASK_DEPLOY: { - if (tqProcessTaskDeploy(pVnode->pTq, POINTER_SHIFT(pMsg->pCont, sizeof(SMsgHead)), - pMsg->contLen - sizeof(SMsgHead)) < 0) { + if (tqProcessTaskDeployReq(pVnode->pTq, POINTER_SHIFT(pMsg->pCont, sizeof(SMsgHead)), + pMsg->contLen - sizeof(SMsgHead)) < 0) { goto _err; } } break; @@ -304,18 +304,17 @@ void vnodeUpdateMetaRsp(SVnode *pVnode, STableMetaRsp *pMetaRsp) { pMetaRsp->precision = pVnode->config.tsdbCfg.precision; } -static int32_t vnodeProcessDropTtlTbReq(SVnode *pVnode, int64_t version, void *pReq, int32_t len, SRpcMsg *pRsp){ - +static int32_t vnodeProcessDropTtlTbReq(SVnode *pVnode, int64_t version, void *pReq, int32_t len, SRpcMsg *pRsp) { SArray *tbUids = taosArrayInit(8, sizeof(int64_t)); if (tbUids == NULL) return TSDB_CODE_OUT_OF_MEMORY; - int32_t t = ntohl(*(int32_t*)pReq); + int32_t t = ntohl(*(int32_t *)pReq); vError("rec ttl time:%d", t); int32_t ret = metaTtlDropTable(pVnode->pMeta, t, tbUids); - if(ret != 0){ + if (ret != 0) { goto end; } - if(taosArrayGetSize(tbUids) > 0){ + if (taosArrayGetSize(tbUids) > 0) { tqUpdateTbUidList(pVnode->pTq, tbUids, false); } diff --git a/source/libs/executor/src/executorimpl.c b/source/libs/executor/src/executorimpl.c index 5c038ed709..3828a26bc4 100644 --- a/source/libs/executor/src/executorimpl.c +++ b/source/libs/executor/src/executorimpl.c @@ -4052,14 +4052,19 @@ SOperatorInfo* createOperatorTree(SPhysiNode* pPhyNode, SExecTaskInfo* pTaskInfo SScanPhysiNode* pScanPhyNode = (SScanPhysiNode*)pPhyNode; // simple child table. STableScanPhysiNode* pTableScanNode = (STableScanPhysiNode*)pPhyNode; STimeWindowAggSupp twSup = { - .waterMark = pTableScanNode->watermark, .calTrigger = pTableScanNode->triggerType, .maxTs = INT64_MIN}; + .waterMark = pTableScanNode->watermark, + .calTrigger = pTableScanNode->triggerType, + .maxTs = INT64_MIN, + }; tsdbReaderT pDataReader = NULL; if (pHandle) { if (pHandle->vnode) { + // for stram pDataReader = doCreateDataReader(pTableScanNode, pHandle, pTableListInfo, (uint64_t)queryId, taskId, pTagCond); } else { + // for tq getTableList(pHandle->meta, pScanPhyNode, pTableListInfo, pTagCond); } } diff --git a/source/libs/stream/src/stream.c b/source/libs/stream/src/stream.c index 6dcbfad957..38a1ad14b1 100644 --- a/source/libs/stream/src/stream.c +++ b/source/libs/stream/src/stream.c @@ -64,7 +64,7 @@ void streamTriggerByTimer(void* param, void* tmrId) { atomic_store_8(&pTask->triggerStatus, TASK_TRIGGER_STATUS__IN_ACTIVE); streamTaskInput(pTask, (SStreamQueueItem*)trigger); - streamLaunchByWrite(pTask, pTask->nodeId, pTask->pMsgCb); + streamLaunchByWrite(pTask, pTask->nodeId); } taosTmrReset(streamTriggerByTimer, (int32_t)pTask->triggerParam, pTask, streamEnv.timer, &pTask->timer); @@ -81,7 +81,7 @@ int32_t streamSetupTrigger(SStreamTask* pTask) { return 0; } -int32_t streamLaunchByWrite(SStreamTask* pTask, int32_t vgId, SMsgCb* pMsgCb) { +int32_t streamLaunchByWrite(SStreamTask* pTask, int32_t vgId) { int8_t execStatus = atomic_load_8(&pTask->status); if (execStatus == TASK_STATUS__IDLE || execStatus == TASK_STATUS__CLOSING) { SStreamTaskRunReq* pRunReq = rpcMallocCont(sizeof(SStreamTaskRunReq)); @@ -96,7 +96,7 @@ int32_t streamLaunchByWrite(SStreamTask* pTask, int32_t vgId, SMsgCb* pMsgCb) { .pCont = pRunReq, .contLen = sizeof(SStreamTaskRunReq), }; - tmsgPutToQueue(pMsgCb, FETCH_QUEUE, &msg); + tmsgPutToQueue(pTask->pMsgCb, FETCH_QUEUE, &msg); } return 0; } @@ -136,7 +136,9 @@ int32_t streamTaskEnqueue(SStreamTask* pTask, SStreamDispatchReq* pReq, SRpcMsg* return status == TASK_INPUT_STATUS__NORMAL ? 0 : -1; } -int32_t streamProcessDispatchReq(SStreamTask* pTask, SMsgCb* pMsgCb, SStreamDispatchReq* pReq, SRpcMsg* pRsp) { +int32_t streamProcessDispatchReq(SStreamTask* pTask, SStreamDispatchReq* pReq, SRpcMsg* pRsp) { + qInfo("task %d receive dispatch req from node %d task %d", pTask->taskId, pReq->upstreamNodeId, pReq->sourceTaskId); + // 1. handle input streamTaskEnqueue(pTask, pReq, pRsp); @@ -145,7 +147,7 @@ int32_t streamProcessDispatchReq(SStreamTask* pTask, SMsgCb* pMsgCb, SStreamDisp // 2.2. executing: return // 2.3. closing: keep trying if (pTask->execType != TASK_EXEC__NONE) { - streamExec(pTask, pMsgCb); + streamExec(pTask, pTask->pMsgCb); } else { ASSERT(pTask->sinkType != TASK_SINK__NONE); while (1) { @@ -161,34 +163,38 @@ int32_t streamProcessDispatchReq(SStreamTask* pTask, SMsgCb* pMsgCb, SStreamDisp // 3.1 check and set status // 3.2 dispatch / sink if (pTask->dispatchType != TASK_DISPATCH__NONE) { - streamDispatch(pTask, pMsgCb); + streamDispatch(pTask, pTask->pMsgCb); } return 0; } -int32_t streamProcessDispatchRsp(SStreamTask* pTask, SMsgCb* pMsgCb, SStreamDispatchRsp* pRsp) { +int32_t streamProcessDispatchRsp(SStreamTask* pTask, SStreamDispatchRsp* pRsp) { ASSERT(pRsp->inputStatus == TASK_OUTPUT_STATUS__NORMAL || pRsp->inputStatus == TASK_OUTPUT_STATUS__BLOCKED); + + qInfo("task %d receive dispatch rsp", pTask->taskId); + int8_t old = atomic_exchange_8(&pTask->outputStatus, pRsp->inputStatus); ASSERT(old == TASK_OUTPUT_STATUS__WAIT); if (pRsp->inputStatus == TASK_INPUT_STATUS__BLOCKED) { // TODO: init recover timer + ASSERT(0); return 0; } // continue dispatch - streamDispatch(pTask, pMsgCb); + streamDispatch(pTask, pTask->pMsgCb); return 0; } -int32_t streamTaskProcessRunReq(SStreamTask* pTask, SMsgCb* pMsgCb) { - streamExec(pTask, pMsgCb); +int32_t streamProcessRunReq(SStreamTask* pTask) { + streamExec(pTask, pTask->pMsgCb); if (pTask->dispatchType != TASK_DISPATCH__NONE) { - streamDispatch(pTask, pMsgCb); + streamDispatch(pTask, pTask->pMsgCb); } return 0; } -int32_t streamProcessRecoverReq(SStreamTask* pTask, SMsgCb* pMsgCb, SStreamTaskRecoverReq* pReq, SRpcMsg* pMsg) { +int32_t streamProcessRecoverReq(SStreamTask* pTask, SStreamTaskRecoverReq* pReq, SRpcMsg* pMsg) { // return 0; } diff --git a/source/libs/stream/src/streamDispatch.c b/source/libs/stream/src/streamDispatch.c index ca10e7d956..1894f697c0 100644 --- a/source/libs/stream/src/streamDispatch.c +++ b/source/libs/stream/src/streamDispatch.c @@ -144,7 +144,7 @@ int32_t streamBuildDispatchMsg(SStreamTask* pTask, SStreamDataBlock* data, SRpcM } } - ASSERT(vgId != 0); + ASSERT(vgId > 0 || vgId == SNODE_HANDLE); req.taskId = downstreamTaskId; qInfo("dispatch from task %d (child id %d) to down stream task %d in vnode %d", pTask->taskId, pTask->childId, @@ -199,6 +199,8 @@ int32_t streamDispatch(SStreamTask* pTask, SMsgCb* pMsgCb) { } ASSERT(pBlock->type == STREAM_DATA_TYPE_SSDATA_BLOCK); + qInfo("stream continue dispatching: task %d", pTask->taskId); + SRpcMsg dispatchMsg = {0}; SEpSet* pEpSet = NULL; if (streamBuildDispatchMsg(pTask, pBlock, &dispatchMsg, &pEpSet) < 0) { diff --git a/source/libs/stream/src/streamExec.c b/source/libs/stream/src/streamExec.c index 04428136ae..5a71fccab8 100644 --- a/source/libs/stream/src/streamExec.c +++ b/source/libs/stream/src/streamExec.c @@ -107,18 +107,19 @@ int32_t streamExec(SStreamTask* pTask, SMsgCb* pMsgCb) { pRes = streamExecForQall(pTask, pRes); if (pRes == NULL) goto FAIL; - break; + taosArrayDestroy(pRes); + atomic_store_8(&pTask->status, TASK_STATUS__IDLE); + return 0; } else if (execStatus == TASK_STATUS__CLOSING) { continue; } else if (execStatus == TASK_STATUS__EXECUTING) { - break; + ASSERT(taosArrayGetSize(pRes) == 0); + taosArrayDestroy(pRes); + return 0; } else { ASSERT(0); } } - if (pRes) taosArrayDestroy(pRes); - atomic_store_8(&pTask->status, TASK_STATUS__IDLE); - return 0; FAIL: if (pRes) taosArrayDestroy(pRes); atomic_store_8(&pTask->status, TASK_STATUS__IDLE); diff --git a/tests/script/jenkins/basic.txt b/tests/script/jenkins/basic.txt index 720625a570..c1ab51bb27 100644 --- a/tests/script/jenkins/basic.txt +++ b/tests/script/jenkins/basic.txt @@ -80,6 +80,7 @@ # ./test.sh -f tsim/stream/triggerInterval0.sim # ./test.sh -f tsim/stream/triggerSession0.sim # ./test.sh -f tsim/stream/partitionby.sim +./test.sh -f tsim/stream/schedSnode.sim # ---- transaction diff --git a/tests/script/tsim/stream/distributeInterval0.sim b/tests/script/tsim/stream/distributeInterval0.sim index b720272116..91ce49bc8c 100644 --- a/tests/script/tsim/stream/distributeInterval0.sim +++ b/tests/script/tsim/stream/distributeInterval0.sim @@ -208,4 +208,4 @@ if $data11 != 2 then goto loop2 endi -system sh/stop_dnodes.sh \ No newline at end of file +system sh/stop_dnodes.sh diff --git a/tests/script/tsim/stream/distributesession0.sim b/tests/script/tsim/stream/distributesession0.sim index 78f65ed8a3..a165b86edd 100644 --- a/tests/script/tsim/stream/distributesession0.sim +++ b/tests/script/tsim/stream/distributesession0.sim @@ -55,4 +55,4 @@ if $data03 != 7 then return -1 endi -system sh/stop_dnodes.sh \ No newline at end of file +system sh/stop_dnodes.sh diff --git a/tests/script/tsim/stream/schedSnode.sim b/tests/script/tsim/stream/schedSnode.sim new file mode 100644 index 0000000000..dbf714a96f --- /dev/null +++ b/tests/script/tsim/stream/schedSnode.sim @@ -0,0 +1,173 @@ +system sh/stop_dnodes.sh +system sh/deploy.sh -n dnode1 -i 1 +system sh/deploy.sh -n dnode2 -i 2 + +system sh/exec.sh -n dnode1 -s start +sleep 50 +sql connect + +sql create snode on dnode 1 + +sql create database test vgroups 1; +sql create database target vgroups 1; +sql use test; +sql create stable st(ts timestamp, a int, b int , c int, d double) tags(ta int,tb int,tc int); +sql create table ts1 using st tags(1,1,1); +sql create table ts2 using st tags(2,2,2); +sql create table ts3 using st tags(3,2,2); +sql create table ts4 using st tags(4,2,2); +sql create stream stream_t1 trigger at_once into target.streamtST1 as select _wstartts, count(*) c1, count(d) c2 , sum(a) c3 , max(b) c4, min(c) c5 from st interval(10s); + +sleep 1000 + +sql insert into ts1 values(1648791213001,1,12,3,1.0); +sql insert into ts2 values(1648791213001,1,12,3,1.0); + +sql insert into ts3 values(1648791213001,1,12,3,1.0); +sql insert into ts4 values(1648791213001,1,12,3,1.0); + +sql insert into ts1 values(1648791213002,NULL,NULL,NULL,NULL); +sql insert into ts2 values(1648791213002,NULL,NULL,NULL,NULL); + +sql insert into ts3 values(1648791213002,NULL,NULL,NULL,NULL); +sql insert into ts4 values(1648791213002,NULL,NULL,NULL,NULL); + +sql insert into ts1 values(1648791223002,2,2,3,1.1); +sql insert into ts1 values(1648791233003,3,2,3,2.1); +sql insert into ts2 values(1648791243004,4,2,43,73.1); +sql insert into ts1 values(1648791213002,24,22,23,4.1); +sql insert into ts1 values(1648791243005,4,20,3,3.1); +sql insert into ts2 values(1648791243006,4,2,3,3.1) (1648791243007,4,2,3,3.1) ; +sql insert into ts1 values(1648791243008,4,2,30,3.1) (1648791243009,4,2,3,3.1) (1648791243010,4,2,3,3.1) ; +sql insert into ts2 values(1648791243011,4,2,3,3.1) (1648791243012,34,32,33,3.1) (1648791243013,4,2,3,3.1) (1648791243014,4,2,13,3.1); +sql insert into ts1 values(1648791243005,4,42,3,3.1) (1648791243003,4,2,33,3.1) (1648791243006,4,2,3,3.1) (1648791213001,1,52,13,1.0) (1648791223001,22,22,83,1.1) ; +sql insert into ts2 values(1648791243005,4,42,3,3.1) (1648791243003,4,2,33,3.1) (1648791243006,4,2,3,3.1) (1648791213001,1,52,13,1.0) (1648791223001,22,22,83,1.1) (1648791233004,13,12,13,2.1) ; +sql insert into ts1 values(1648791243006,4,2,3,3.1) (1648791213001,1,52,13,1.0) (1648791223001,22,22,83,1.1) ; + +sql insert into ts3 values(1648791223002,2,2,3,1.1); +sql insert into ts4 values(1648791233003,3,2,3,2.1); +sql insert into ts3 values(1648791243004,4,2,43,73.1); +sql insert into ts4 values(1648791213002,24,22,23,4.1); +sql insert into ts3 values(1648791243005,4,20,3,3.1); +sql insert into ts4 values(1648791243006,4,2,3,3.1) (1648791243007,4,2,3,3.1) ; +sql insert into ts3 values(1648791243008,4,2,30,3.1) (1648791243009,4,2,3,3.1) (1648791243010,4,2,3,3.1) ; +sql insert into ts4 values(1648791243011,4,2,3,3.1) (1648791243012,34,32,33,3.1) (1648791243013,4,2,3,3.1) (1648791243014,4,2,13,3.1); +sql insert into ts3 values(1648791243005,4,42,3,3.1) (1648791243003,4,2,33,3.1) (1648791243006,4,2,3,3.1) (1648791213001,1,52,13,1.0) (1648791223001,22,22,83,1.1) ; +sql insert into ts4 values(1648791243005,4,42,3,3.1) (1648791243003,4,2,33,3.1) (1648791243006,4,2,3,3.1) (1648791213001,1,52,13,1.0) (1648791223001,22,22,83,1.1) (1648791233004,13,12,13,2.1) ; +sql insert into ts3 values(1648791243006,4,2,3,3.1) (1648791213001,1,52,13,1.0) (1648791223001,22,22,83,1.1) ; + +$loop_count = 0 +loop1: +sql select * from target.streamtST1; + +sleep 300 +$loop_count = $loop_count + 1 +if $loop_count == 10 then + return -1 +endi + +# row 0 +if $data01 != 8 then + print =====data01=$data01 + goto loop1 +endi + +if $data02 != 4 then + print =====data02=$data02 + goto loop1 +endi + +if $data03 != 4 then + print ======$data03 + return -1 +endi + +if $data04 != 52 then + print ======$data04 + return -1 +endi + +if $data05 != 13 then + print ======$data05 + return -1 +endi + +# row 1 +if $data11 != 6 then + print =====data11=$data11 + goto loop1 +endi + +if $data12 != 6 then + print =====data12=$data12 + goto loop1 +endi + +if $data13 != 92 then + print ======$data13 + return -1 +endi + +if $data14 != 22 then + print ======$data14 + return -1 +endi + +if $data15 != 3 then + print ======$data15 + return -1 +endi + +# row 2 +if $data21 != 4 then + print =====data21=$data21 + goto loop1 +endi + +if $data22 != 4 then + print =====data22=$data22 + goto loop1 +endi + +if $data23 != 32 then + print ======$data23 + return -1 +endi + +if $data24 != 12 then + print ======$data24 + return -1 +endi + +if $data25 != 3 then + print ======$data25 + return -1 +endi + +# row 3 +if $data31 != 30 then + print =====data31=$data31 + goto loop1 +endi + +if $data32 != 30 then + print =====data32=$data32 + goto loop1 +endi + +if $data33 != 180 then + print ======$data33 + return -1 +endi + +if $data34 != 42 then + print ======$data34 + return -1 +endi + +if $data35 != 3 then + print ======$data35 + return -1 +endi + +sql select _wstartts, count(*) c1, count(d) c2 , sum(a) c3 , max(b) c4, min(c) c5, avg(d) from st interval(10s); From 286b9e40715547c6a80a7ded567846550ddeaea0 Mon Sep 17 00:00:00 2001 From: plum-lihui Date: Mon, 20 Jun 2022 20:54:43 +0800 Subject: [PATCH 38/40] test: add notify between main script and comsume processor --- tests/system-test/7-tmq/subscribeDb3.py | 38 +++++++++-- tests/test/c/tmqSim.c | 89 ++++++++++++++++--------- 2 files changed, 89 insertions(+), 38 deletions(-) diff --git a/tests/system-test/7-tmq/subscribeDb3.py b/tests/system-test/7-tmq/subscribeDb3.py index 6973f4c51f..b576a0ea70 100644 --- a/tests/system-test/7-tmq/subscribeDb3.py +++ b/tests/system-test/7-tmq/subscribeDb3.py @@ -54,9 +54,11 @@ class TDTestCase: tdSql.query("create database if not exists %s vgroups 1"%(cdbName)) tdSql.query("drop table if exists %s.consumeinfo "%(cdbName)) tdSql.query("drop table if exists %s.consumeresult "%(cdbName)) + tdSql.query("drop table if exists %s.notifyinfo "%(cdbName)) tdSql.query("create table %s.consumeinfo (ts timestamp, consumerid int, topiclist binary(1024), keylist binary(1024), expectmsgcnt bigint, ifcheckdata int, ifmanualcommit int)"%cdbName) tdSql.query("create table %s.consumeresult (ts timestamp, consumerid int, consummsgcnt bigint, consumrowcnt bigint, checkresult int)"%cdbName) + tdSql.query("create table %s.notifyinfo (ts timestamp, cmdid int, consumerid int)"%cdbName) def insertConsumerInfo(self,consumerId, expectrowcnt,topicList,keyList,ifcheckdata,ifmanualcommit,cdbName='cdb'): sql = "insert into %s.consumeinfo values "%cdbName @@ -64,6 +66,27 @@ class TDTestCase: tdLog.info("consume info sql: %s"%sql) tdSql.query(sql) + def getStartConsumeNotifyFromTmqsim(self,cdbName='cdb'): + while 1: + tdSql.query("select * from %s.notifyinfo"%cdbName) + #tdLog.info("row: %d, %l64d, %l64d"%(tdSql.getData(0, 1),tdSql.getData(0, 2),tdSql.getData(0, 3)) + if (tdSql.getRows() == 1) and (tdSql.getData(0, 1) == 0): + break + else: + time.sleep(0.1) + return + + def getStartCommitNotifyFromTmqsim(self,cdbName='cdb'): + while 1: + tdSql.query("select * from %s.notifyinfo"%cdbName) + #tdLog.info("row: %d, %l64d, %l64d"%(tdSql.getData(0, 1),tdSql.getData(0, 2),tdSql.getData(0, 3)) + if tdSql.getRows() == 2 : + print(tdSql.getData(0, 1), tdSql.getData(1, 1)) + if tdSql.getData(1, 1) == 1: + break + time.sleep(0.1) + return + def selectConsumeResult(self,expectRows,cdbName='cdb'): resultList=[] while 1: @@ -72,7 +95,7 @@ class TDTestCase: if tdSql.getRows() == expectRows: break else: - time.sleep(5) + time.sleep(1) for i in range(expectRows): tdLog.info ("ts: %s, consume id: %d, consume msgs: %d, consume rows: %d"%(tdSql.getData(i , 0), tdSql.getData(i , 1), tdSql.getData(i , 2), tdSql.getData(i , 3))) @@ -207,7 +230,9 @@ class TDTestCase: showRow = 1 self.startTmqSimProcess(buildPath,cfgPath,pollDelay,parameterDict["dbName"],showMsg, showRow) - time.sleep(2) + tdLog.info("wait the notify info of start consume") + self.getStartConsumeNotifyFromTmqsim() + tdLog.info("pkill consume processor") if (platform.system().lower() == 'windows'): os.system("TASKKILL /F /IM tmq_sim.exe") @@ -282,14 +307,17 @@ class TDTestCase: showRow = 1 self.startTmqSimProcess(buildPath,cfgPath,pollDelay,parameterDict["dbName"],showMsg, showRow) - time.sleep(6) + # time.sleep(6) + tdLog.info("start to wait commit notify") + self.getStartCommitNotifyFromTmqsim() + tdLog.info("pkill consume processor") if (platform.system().lower() == 'windows'): os.system("TASKKILL /F /IM tmq_sim.exe") else: os.system('pkill tmq_sim') - expectRows = 0 - resultList = self.selectConsumeResult(expectRows) + # expectRows = 0 + # resultList = self.selectConsumeResult(expectRows) # wait for data ready prepareEnvThread.join() diff --git a/tests/test/c/tmqSim.c b/tests/test/c/tmqSim.c index 0f78a003d6..948df3a40a 100644 --- a/tests/test/c/tmqSim.c +++ b/tests/test/c/tmqSim.c @@ -34,6 +34,12 @@ #define MAX_CONSUMER_THREAD_CNT (16) #define MAX_VGROUP_CNT (32) +typedef enum { + NOTIFY_CMD_START_CONSUM, + NOTIFY_CMD_START_COMMIT, + NOTIFY_CMD_ID_BUTT +}NOTIFY_CMD_ID; + typedef struct { TdThread thread; int32_t consumerId; @@ -67,6 +73,8 @@ typedef struct { int32_t rowsOfPerVgroups[MAX_VGROUP_CNT][2]; // [i][0]: vgroup id, [i][1]: rows of consume int64_t ts; + TAOS* taos; + } SThreadInfo; typedef struct { @@ -339,8 +347,37 @@ int queryDB(TAOS* taos, char* command) { return 0; } +static void appNothing(void* param, TAOS_RES* res, int32_t numOfRows) { +} + +int32_t notifyMainScript(SThreadInfo* pInfo, int32_t cmdId) { + char sqlStr[1024] = {0}; + + int64_t now = taosGetTimestampMs(); + + // schema: ts timestamp, consumerid int, consummsgcnt bigint, checkresult int + sprintf(sqlStr, "insert into %s.notifyinfo values (%"PRId64", %d, %d)", + g_stConfInfo.cdbName, + now, + cmdId, + pInfo->consumerId); + + taos_query_a(pInfo->taos, sqlStr, appNothing, NULL); + + taosFprintfFile(g_fp, "notifyMainScript success, sql: %s\n", sqlStr); + + return 0; +} + +static int32_t g_once_commit_flag = 0; static void tmq_commit_cb_print(tmq_t* tmq, int32_t code, void* param) { - pError("tmq_commit_cb_print() commit %d\n", code); + pError("tmq_commit_cb_print() commit %d\n", code); + + if (0 == g_once_commit_flag) { + g_once_commit_flag = 1; + notifyMainScript((SThreadInfo*)param, (int32_t)NOTIFY_CMD_START_COMMIT); + } + taosFprintfFile(g_fp, "tmq_commit_cb_print() be called\n"); } void build_consumer(SThreadInfo* pInfo) { @@ -353,7 +390,7 @@ void build_consumer(SThreadInfo* pInfo) { // tmq_conf_set(conf, "td.connect.db", g_stConfInfo.dbName); - tmq_conf_set_auto_commit_cb(conf, tmq_commit_cb_print, NULL); + tmq_conf_set_auto_commit_cb(conf, tmq_commit_cb_print, pInfo); // tmq_conf_set(conf, "group.id", "cgrp1"); for (int32_t i = 0; i < pInfo->numOfKey; i++) { @@ -392,9 +429,6 @@ void build_topic_list(SThreadInfo* pInfo) { int32_t saveConsumeResult(SThreadInfo* pInfo) { char sqlStr[1024] = {0}; - TAOS* pConn = taos_connect(NULL, "root", "taosdata", NULL, 0); - assert(pConn != NULL); - int64_t now = taosGetTimestampMs(); // schema: ts timestamp, consumerid int, consummsgcnt bigint, checkresult int @@ -404,7 +438,7 @@ int32_t saveConsumeResult(SThreadInfo* pInfo) { char tmpString[128]; taosFprintfFile(g_fp, "%s, consume id %d result: %s\n", getCurrentTimeString(tmpString), pInfo->consumerId, sqlStr); - TAOS_RES* pRes = taos_query(pConn, sqlStr); + TAOS_RES* pRes = taos_query(pInfo->taos, sqlStr); if (taos_errno(pRes) != 0) { pError("error in save consumeinfo, reason:%s\n", taos_errstr(pRes)); taos_free_result(pRes); @@ -413,38 +447,14 @@ int32_t saveConsumeResult(SThreadInfo* pInfo) { taos_free_result(pRes); -#if 0 - // vgroups - for (i = 0; i < pInfo->numOfVgroups; i++) { - // schema: ts timestamp, consumerid int, consummsgcnt bigint, checkresult int - sprintf(sqlStr, "insert into %s.vgroup_%d values (%"PRId64", %d, %" PRId64 ", %" PRId64 ", %d)", - g_stConfInfo.cdbName, - now, - pInfo->consumerId, - pInfo->consumeMsgCnt, - pInfo->consumeRowCnt, - pInfo->checkresult); - - char tmpString[128]; - taosFprintfFile(g_fp, "%s, consume id %d result: %s\n", getCurrentTimeString(tmpString), pInfo->consumerId ,sqlStr); - - TAOS_RES* pRes = taos_query(pConn, sqlStr); - if (taos_errno(pRes) != 0) { - pError("error in save consumeinfo, reason:%s\n", taos_errstr(pRes)); - taos_free_result(pRes); - exit(-1); - } - - taos_free_result(pRes); - } -#endif - return 0; } void loop_consume(SThreadInfo* pInfo) { int32_t code; + int32_t once_flag = 0; + int64_t totalMsgs = 0; int64_t totalRows = 0; @@ -465,6 +475,11 @@ void loop_consume(SThreadInfo* pInfo) { totalMsgs++; + if (0 == once_flag) { + once_flag = 1; + notifyMainScript(pInfo, NOTIFY_CMD_START_CONSUM); + } + if (totalRows >= pInfo->expectMsgCnt) { char tmpString[128]; taosFprintfFile(g_fp, "%s over than expect rows, so break consume\n", getCurrentTimeString(tmpString)); @@ -489,6 +504,12 @@ void* consumeThreadFunc(void* param) { SThreadInfo* pInfo = (SThreadInfo*)param; + pInfo->taos = taos_connect(NULL, "root", "taosdata", NULL, 0); + if (pInfo->taos == NULL) { + taosFprintfFile(g_fp, "taos_connect() fail, can not notify and save consume result to main scripte\n"); + exit(-1); + } + build_consumer(pInfo); build_topic_list(pInfo); if ((NULL == pInfo->tmq) || (NULL == pInfo->topicList)) { @@ -508,7 +529,6 @@ void* consumeThreadFunc(void* param) { loop_consume(pInfo); if (pInfo->ifManualCommit) { - taosFprintfFile(g_fp, "tmq_commit() manual commit when consume end.\n"); pPrint("tmq_commit() manual commit when consume end.\n"); /*tmq_commit(pInfo->tmq, NULL, 0);*/ tmq_commit_sync(pInfo->tmq, NULL); @@ -539,6 +559,9 @@ void* consumeThreadFunc(void* param) { taosFprintfFile(g_fp, "vgroups: %04d, rows: %d\n", pInfo->rowsOfPerVgroups[i][0], pInfo->rowsOfPerVgroups[i][1]); } + taos_close(pInfo->taos); + pInfo->taos = NULL; + return NULL; } From 3b7e0bfdccca787257055bbae92a978795007ed0 Mon Sep 17 00:00:00 2001 From: Minghao Li Date: Mon, 20 Jun 2022 20:55:40 +0800 Subject: [PATCH 39/40] fix(sync): set standby when not follower --- source/libs/sync/src/syncMain.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/source/libs/sync/src/syncMain.c b/source/libs/sync/src/syncMain.c index 599410510d..33e7a8241f 100644 --- a/source/libs/sync/src/syncMain.c +++ b/source/libs/sync/src/syncMain.c @@ -154,10 +154,10 @@ int32_t syncSetStandby(int64_t rid) { return -1; } - if (pSyncNode->state == TAOS_SYNC_STATE_LEADER) { + if (pSyncNode->state != TAOS_SYNC_STATE_FOLLOWER) { taosReleaseRef(tsNodeRefId, pSyncNode->rid); terrno = TSDB_CODE_SYN_IS_LEADER; - sError("failed to set standby since it is leader, rid:%" PRId64, rid); + sError("failed to set standby since it is not follower, rid:%" PRId64, rid); return -1; } From 212144a3e6312e74689f66314a98897706fda087 Mon Sep 17 00:00:00 2001 From: Liu Jicong Date: Mon, 20 Jun 2022 21:50:13 +0800 Subject: [PATCH 40/40] test(stream): window close not work for stb --- examples/c/stream_demo.c | 10 ++++--- source/dnode/mgmt/mgmt_mnode/src/mmHandle.c | 1 + source/dnode/mnode/impl/src/mndScheduler.c | 3 ++ source/dnode/mnode/impl/src/mndSnode.c | 1 + source/dnode/mnode/impl/src/mndStream.c | 20 ++++++------- tests/script/tsim/stream/windowClose.sim | 32 +++++++++++++++++++++ 6 files changed, 51 insertions(+), 16 deletions(-) create mode 100644 tests/script/tsim/stream/windowClose.sim diff --git a/examples/c/stream_demo.c b/examples/c/stream_demo.c index 6d341c61c7..5f6e3b2aeb 100644 --- a/examples/c/stream_demo.c +++ b/examples/c/stream_demo.c @@ -25,19 +25,21 @@ int32_t init_env() { return -1; } - TAOS_RES* pRes = taos_query(pConn, "create database if not exists abc1 vgroups 2"); + TAOS_RES* pRes = taos_query(pConn, "create database if not exists abc1 vgroups 1"); if (taos_errno(pRes) != 0) { printf("error in create db, reason:%s\n", taos_errstr(pRes)); return -1; } taos_free_result(pRes); +#if 0 pRes = taos_query(pConn, "create database if not exists abc2 vgroups 20"); if (taos_errno(pRes) != 0) { printf("error in create db, reason:%s\n", taos_errstr(pRes)); return -1; } taos_free_result(pRes); +#endif pRes = taos_query(pConn, "use abc1"); if (taos_errno(pRes) != 0) { @@ -88,9 +90,9 @@ int32_t create_stream() { /*const char* sql = "select min(k), max(k), sum(k) as sum_of_k from st1";*/ /*const char* sql = "select sum(k) from tu1 interval(10m)";*/ /*pRes = tmq_create_stream(pConn, "stream1", "out1", sql);*/ - pRes = taos_query( - pConn, - "create stream stream1 trigger at_once into abc1.outstb as select _wstartts, sum(k) from st1 interval(10m) "); + pRes = taos_query(pConn, + "create stream stream1 trigger window_close into outstb as select _wstartts, sum(k) from st1 " + "interval(10s) "); if (taos_errno(pRes) != 0) { printf("failed to create stream stream1, reason:%s\n", taos_errstr(pRes)); return -1; diff --git a/source/dnode/mgmt/mgmt_mnode/src/mmHandle.c b/source/dnode/mgmt/mgmt_mnode/src/mmHandle.c index a845ae7b39..0112feedd2 100644 --- a/source/dnode/mgmt/mgmt_mnode/src/mmHandle.c +++ b/source/dnode/mgmt/mgmt_mnode/src/mmHandle.c @@ -183,6 +183,7 @@ SArray *mmGetMsgHandles() { if (dmSetMgmtHandle(pArray, TDMT_MND_CREATE_SMA, mmPutMsgToWriteQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_MND_DROP_SMA, mmPutMsgToWriteQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_MND_CREATE_STREAM, mmPutMsgToWriteQueue, 0) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_MND_DROP_STREAM, mmPutMsgToWriteQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_MND_GET_INDEX, mmPutMsgToReadQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_MND_GET_TABLE_INDEX, mmPutMsgToReadQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_MND_CREATE_TOPIC, mmPutMsgToWriteQueue, 0) == NULL) goto _OVER; diff --git a/source/dnode/mnode/impl/src/mndScheduler.c b/source/dnode/mnode/impl/src/mndScheduler.c index 645634d7f3..2b6258b10a 100644 --- a/source/dnode/mnode/impl/src/mndScheduler.c +++ b/source/dnode/mnode/impl/src/mndScheduler.c @@ -496,6 +496,9 @@ int32_t mndScheduleStream(SMnode* pMnode, STrans* pTrans, SStreamObj* pStream) { // input pTask->inputType = TASK_INPUT_TYPE__SUMBIT_BLOCK; + // trigger + pTask->triggerParam = pStream->triggerParam; + // sink or dispatch if (hasExtraSink) { mndAddDispatcherToInnerTask(pMnode, pTrans, pStream, pTask); diff --git a/source/dnode/mnode/impl/src/mndSnode.c b/source/dnode/mnode/impl/src/mndSnode.c index 0a99f356b1..12188a3b3a 100644 --- a/source/dnode/mnode/impl/src/mndSnode.c +++ b/source/dnode/mnode/impl/src/mndSnode.c @@ -408,6 +408,7 @@ static int32_t mndProcessDropSnodeReq(SRpcMsg *pReq) { goto _OVER; } + // check deletable code = mndDropSnode(pMnode, pReq, pObj); if (code == 0) code = TSDB_CODE_ACTION_IN_PROGRESS; diff --git a/source/dnode/mnode/impl/src/mndStream.c b/source/dnode/mnode/impl/src/mndStream.c index d432256f15..fb92efecf6 100644 --- a/source/dnode/mnode/impl/src/mndStream.c +++ b/source/dnode/mnode/impl/src/mndStream.c @@ -518,7 +518,6 @@ static int32_t mndCreateStream(SMnode *pMnode, SRpcMsg *pReq, SCMCreateStreamReq // TODO streamObj.fixedSinkVgId = 0; streamObj.smaId = 0; - /*streamObj.physicalPlan = "";*/ streamObj.trigger = pCreate->triggerType; streamObj.watermark = pCreate->watermark; streamObj.triggerParam = pCreate->maxDelay; @@ -607,17 +606,6 @@ static int32_t mndProcessCreateStreamReq(SRpcMsg *pReq) { } #endif - STrans *pTrans = mndTransCreate(pMnode, TRN_POLICY_ROLLBACK, TRN_CONFLICT_DB_INSIDE, pReq); - if (pTrans == NULL) { - mError("stream:%s, failed to create since %s", createStreamReq.name, terrstr()); - goto _OVER; - } - - mndTransSetDbName(pTrans, createStreamReq.sourceDB, NULL); - // TODO - /*mndTransSetDbName(pTrans, streamObj.targetDb, NULL);*/ - mDebug("trans:%d, used to create stream:%s", pTrans->id, createStreamReq.name); - // build stream obj from request SStreamObj streamObj = {0}; if (mndBuildStreamObjFromCreateReq(pMnode, &streamObj, &createStreamReq) < 0) { @@ -626,6 +614,14 @@ static int32_t mndProcessCreateStreamReq(SRpcMsg *pReq) { goto _OVER; } + STrans *pTrans = mndTransCreate(pMnode, TRN_POLICY_ROLLBACK, TRN_CONFLICT_DB_INSIDE, pReq); + if (pTrans == NULL) { + mError("stream:%s, failed to create since %s", createStreamReq.name, terrstr()); + goto _OVER; + } + mndTransSetDbName(pTrans, createStreamReq.sourceDB, streamObj.targetDb); + mDebug("trans:%d, used to create stream:%s", pTrans->id, createStreamReq.name); + // create stb for stream if (mndCreateStbForStream(pMnode, pTrans, &streamObj, pReq->info.conn.user) < 0) { mError("trans:%d, failed to create stb for stream %s since %s", pTrans->id, createStreamReq.name, terrstr()); diff --git a/tests/script/tsim/stream/windowClose.sim b/tests/script/tsim/stream/windowClose.sim new file mode 100644 index 0000000000..07d7fb794e --- /dev/null +++ b/tests/script/tsim/stream/windowClose.sim @@ -0,0 +1,32 @@ +system sh/stop_dnodes.sh +system sh/deploy.sh -n dnode1 -i 1 +system sh/exec.sh -n dnode1 -s start +sleep 50 +sql connect + +print =============== create database +sql create database test vgroups 1 +sql show databases +if $rows != 3 then + return -1 +endi + +print $data00 $data01 $data02 + +sql use test +sql create stable st(ts timestamp, a int) tags(t int); +sql create table tu1 using st tags(1); +sql create table tu2 using st tags(2); + +sql create stream stream1 trigger window_close into streamt as select _wstartts, sum(a) from st interval(10s); + +sql insert into tu1 values(now, 1); + +sleep 300 +sql select * from streamt; +if $rows != 0 then + print ======$rows + return -1 +endi + +system sh/exec.sh -n dnode1 -s stop -x SIGINT