From a9d30f25d2e46deb304255abcbc98047a53eb060 Mon Sep 17 00:00:00 2001 From: kailixu Date: Wed, 20 Sep 2023 10:11:03 +0800 Subject: [PATCH 01/79] enh: rsma support delete raw data --- source/dnode/vnode/src/inc/vnodeInt.h | 1 + source/dnode/vnode/src/sma/smaRollup.c | 101 +++++++++++++++++++++---- source/dnode/vnode/src/vnd/vnodeSvr.c | 2 + source/libs/executor/src/executor.c | 9 +++ 4 files changed, 97 insertions(+), 16 deletions(-) diff --git a/source/dnode/vnode/src/inc/vnodeInt.h b/source/dnode/vnode/src/inc/vnodeInt.h index 536273c044..6f0a7b171e 100644 --- a/source/dnode/vnode/src/inc/vnodeInt.h +++ b/source/dnode/vnode/src/inc/vnodeInt.h @@ -278,6 +278,7 @@ int32_t tdProcessTSmaInsert(SSma* pSma, int64_t indexUid, const char* msg); int32_t tdProcessRSmaCreate(SSma* pSma, SVCreateStbReq* pReq); int32_t tdProcessRSmaSubmit(SSma* pSma, int64_t version, void* pReq, void* pMsg, int32_t len, int32_t inputType); +int32_t tdProcessRSmaDelete(SSma* pSma, int64_t version, void* pReq, void* pMsg, int32_t len, int32_t inputType); int32_t tdProcessRSmaDrop(SSma* pSma, SVDropStbReq* pReq); int32_t tdFetchTbUidList(SSma* pSma, STbUidStore** ppStore, tb_uid_t suid, tb_uid_t uid); int32_t tdUpdateTbUidList(SSma* pSma, STbUidStore* pUidStore, bool isAdd); diff --git a/source/dnode/vnode/src/sma/smaRollup.c b/source/dnode/vnode/src/sma/smaRollup.c index 8da2fff5a6..6dcba568f1 100644 --- a/source/dnode/vnode/src/sma/smaRollup.c +++ b/source/dnode/vnode/src/sma/smaRollup.c @@ -669,7 +669,7 @@ _exit: */ static int32_t tdExecuteRSmaImplAsync(SSma *pSma, int64_t version, const void *pMsg, int32_t len, int32_t inputType, SRSmaInfo *pInfo, tb_uid_t suid) { - int32_t size = sizeof(int32_t) + sizeof(int64_t) + len; + int32_t size = sizeof(int8_t) + sizeof(int32_t) + sizeof(int64_t) + len; // type + len + version + payload void *qItem = taosAllocateQitem(size, DEF_QITEM, 0); if (!qItem) { @@ -678,6 +678,8 @@ static int32_t tdExecuteRSmaImplAsync(SSma *pSma, int64_t version, const void *p void *pItem = qItem; + *(int8_t *)pItem = (int8_t)inputType; + pItem = POINTER_SHIFT(pItem, sizeof(int8_t)); *(int32_t *)pItem = len; pItem = POINTER_SHIFT(pItem, sizeof(int32_t)); *(int64_t *)pItem = version; @@ -852,7 +854,7 @@ static int32_t tdExecuteRSmaAsync(SSma *pSma, int64_t version, const void *pMsg, return TSDB_CODE_SUCCESS; } - if (inputType == STREAM_INPUT__DATA_SUBMIT) { + if (inputType == STREAM_INPUT__DATA_SUBMIT || inputType == STREAM_INPUT__REF_DATA_BLOCK) { if (tdExecuteRSmaImplAsync(pSma, version, pMsg, len, inputType, pRSmaInfo, suid) < 0) { tdReleaseRSmaInfo(pSma, pRSmaInfo); return TSDB_CODE_FAILED; @@ -918,6 +920,25 @@ _err: return TSDB_CODE_FAILED; } +int32_t tdProcessRSmaDelete(SSma *pSma, int64_t version, void *pReq, void *pMsg, int32_t len, int32_t inputType) { + SSmaEnv *pEnv = SMA_RSMA_ENV(pSma); + if (!pEnv) { + // only applicable when rsma env exists + return TSDB_CODE_SUCCESS; + } + + if (inputType == STREAM_INPUT__REF_DATA_BLOCK) { + SDeleteRes *pReq = pReq; + if (tdExecuteRSmaAsync(pSma, version, pMsg, len, inputType, pReq->suid) < 0) { + smaError("vgId:%d, failed to process rsma submit exec 1 since: %s", SMA_VID(pSma), terrstr()); + goto _err; + } + } + return TSDB_CODE_SUCCESS; +_err: + return TSDB_CODE_FAILED; +} + /** * @brief retrieve rsma meta and init * @@ -1203,7 +1224,7 @@ _end: static void tdFreeRSmaSubmitItems(SArray *pItems) { for (int32_t i = 0; i < taosArrayGetSize(pItems); ++i) { SPackedData *packData = taosArrayGet(pItems, i); - taosFreeQitem(POINTER_SHIFT(packData->msgStr, -sizeof(int32_t) - sizeof(int64_t))); + taosFreeQitem(POINTER_SHIFT(packData->msgStr, -sizeof(int8_t) - sizeof(int32_t) - sizeof(int64_t))); } taosArrayClear(pItems); } @@ -1267,33 +1288,81 @@ _err: } static int32_t tdRSmaBatchExec(SSma *pSma, SRSmaInfo *pInfo, STaosQall *qall, SArray *pSubmitArr, ERsmaExecType type) { + + void *msg = NULL; + int8_t resume = 0; + int32_t nSubmit = 0; + int32_t nDelete = 0; + + SPackedData packData; + taosArrayClear(pSubmitArr); + while (1) { - void *msg = NULL; taosGetQitem(qall, (void **)&msg); if (msg) { - SPackedData packData = {.msgLen = *(int32_t *)msg, - .ver = *(int64_t *)POINTER_SHIFT(msg, sizeof(int32_t)), - .msgStr = POINTER_SHIFT(msg, sizeof(int32_t) + sizeof(int64_t))}; + int8_t inputType = *(int8_t *)msg; - if (!taosArrayPush(pSubmitArr, &packData)) { - tdFreeRSmaSubmitItems(pSubmitArr); - goto _err; + msg = POINTER_SHIFT(msg, sizeof(int8_t)); + + if (inputType == STREAM_INPUT__DATA_SUBMIT) { + if (nDelete > 0) { + resume = 1; + break; + } + _resume_submit: + packData.msgLen = *(int32_t *)msg; + packData.ver = *(int64_t *)POINTER_SHIFT(msg, sizeof(int32_t)); + packData.msgStr = POINTER_SHIFT(msg, sizeof(int32_t) + sizeof(int64_t)); + if (!taosArrayPush(pSubmitArr, &packData)) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + goto _err; + } + ++nSubmit; + } else if (inputType == STREAM_INPUT__REF_DATA_BLOCK) { + if (nSubmit > 0) { + resume = 2; + break; + } + _resume_delete: + ++nDelete; } + } else { break; } } - int32_t size = taosArrayGetSize(pSubmitArr); - if (size > 0) { - for (int32_t i = 1; i <= TSDB_RETENTION_L2; ++i) { - if (tdExecuteRSmaImpl(pSma, pSubmitArr->pData, size, STREAM_INPUT__MERGED_SUBMIT, pInfo, type, i) < 0) { - goto _err; + if (nSubmit > 0) { + int32_t size = taosArrayGetSize(pSubmitArr); + if (size > 0) { + for (int32_t i = 1; i <= TSDB_RETENTION_L2; ++i) { + if (tdExecuteRSmaImpl(pSma, pSubmitArr->pData, size, STREAM_INPUT__MERGED_SUBMIT, pInfo, type, i) < 0) { + goto _err; + } } + tdFreeRSmaSubmitItems(pSubmitArr); } - tdFreeRSmaSubmitItems(pSubmitArr); + } else if (nDelete > 0) { } + + if (resume == 0) { + goto _rtn; + } else if (resume == 1) { + nSubmit = 0; + nDelete = 0; + resume = 0; + taosArrayClear(pSubmitArr); + goto _resume_submit; + } else { + nSubmit = 0; + nDelete = 0; + resume = 0; + taosArrayClear(pSubmitArr); + goto _resume_delete; + } + +_rtn: return TSDB_CODE_SUCCESS; _err: smaError("vgId:%d, batch exec for suid:%" PRIi64 " execType:%d size:%d failed since %s", SMA_VID(pSma), pInfo->suid, diff --git a/source/dnode/vnode/src/vnd/vnodeSvr.c b/source/dnode/vnode/src/vnd/vnodeSvr.c index 56d8b1ea45..5b5afbb976 100644 --- a/source/dnode/vnode/src/vnd/vnodeSvr.c +++ b/source/dnode/vnode/src/vnd/vnodeSvr.c @@ -1884,6 +1884,8 @@ static int32_t vnodeProcessDeleteReq(SVnode *pVnode, int64_t ver, void *pReq, in if (code) goto _err; } + tdProcessRSmaDelete(pVnode->pSma, ver, pRes, pReq, len, STREAM_INPUT__REF_DATA_BLOCK); + tDecoderClear(pCoder); taosArrayDestroy(pRes->uidList); diff --git a/source/libs/executor/src/executor.c b/source/libs/executor/src/executor.c index 60dc6f0185..f73f028758 100644 --- a/source/libs/executor/src/executor.c +++ b/source/libs/executor/src/executor.c @@ -75,6 +75,15 @@ static int32_t doSetSMABlock(SOperatorInfo* pOperator, void* input, size_t numOf taosArrayPush(pInfo->pBlockLists, &tmp); } pInfo->blockType = STREAM_INPUT__DATA_BLOCK; + } else if (type == STREAM_INPUT__REF_DATA_BLOCK) { + for (int32_t i = 0; i < numOfBlocks; ++i) { + SSDataBlock* pDataBlock = &((SSDataBlock*)input)[i]; + SPackedData tmp = { + .pDataBlock = pDataBlock, + }; + taosArrayPush(pInfo->pBlockLists, &tmp); + } + pInfo->blockType = STREAM_INPUT__REF_DATA_BLOCK; } return TSDB_CODE_SUCCESS; From 1bda5a722348d4210bf0263435da1cb61af6ddcb Mon Sep 17 00:00:00 2001 From: shenglian zhou Date: Fri, 27 Oct 2023 14:47:56 +0800 Subject: [PATCH 02/79] enhance: use tbname cond to set vgroup list of table node --- source/libs/parser/src/parTranslater.c | 105 +++++++++++++++++++++++++ 1 file changed, 105 insertions(+) diff --git a/source/libs/parser/src/parTranslater.c b/source/libs/parser/src/parTranslater.c index 72293e2f8c..15d0a1f511 100644 --- a/source/libs/parser/src/parTranslater.c +++ b/source/libs/parser/src/parTranslater.c @@ -3848,6 +3848,110 @@ static int32_t translatePartitionBy(STranslateContext* pCxt, SSelectStmt* pSelec return code; } +typedef struct SEqCondTbNameTableInfo { + SRealTableNode* pRealTable; + char tbName[TSDB_TABLE_NAME_LEN]; +} SEqCondTbNameTableInfo; + +//[tableAlias.]tbname = tbNamVal +static bool isOperatorEqTbnameCond(STranslateContext* pCxt, SOperatorNode* pOperator, char** ppTableAlias, char** ppTbNameVal) { + if (pOperator->opType != OP_TYPE_EQUAL) return false; + SFunctionNode* pTbnameFunc = NULL; + SValueNode* pValueNode = NULL; + if (nodeType(pOperator->pLeft) == QUERY_NODE_FUNCTION && + ((SFunctionNode*)(pOperator->pLeft))->funcType == FUNCTION_TYPE_TBNAME && + nodeType(pOperator->pRight) == QUERY_NODE_VALUE) { + pTbnameFunc = (SFunctionNode*)pOperator->pLeft; + pValueNode = (SValueNode*)pOperator->pRight; + } else if (nodeType(pOperator->pRight) == QUERY_NODE_FUNCTION && + ((SFunctionNode*)(pOperator->pRight))->funcType == FUNCTION_TYPE_TBNAME && + nodeType(pOperator->pLeft) == QUERY_NODE_VALUE) { + pTbnameFunc = (SFunctionNode*)pOperator->pRight; + pValueNode = (SValueNode*)pOperator->pLeft; + } else { + return false; + } + + if (LIST_LENGTH(pTbnameFunc->pParameterList) == 0) { + *ppTableAlias = NULL; + } else if (LIST_LENGTH(pTbnameFunc->pParameterList) == 1) { + SNode* pQualNode = nodesListGetNode(pTbnameFunc->pParameterList, 0); + if (nodeType(pQualNode) != QUERY_NODE_VALUE) return false; + SValueNode* pQualValNode = (SValueNode*)pQualNode; + *ppTableAlias = pQualValNode->literal; + } else { + return false; + } + *ppTbNameVal = pValueNode->literal; + return true; +} + +static int32_t findEqCondTbNameInOperatorNode(STranslateContext* pCxt, SNode* pWhere, SArray* aTables) { + char* pTableAlias = NULL; + char* pTbNameVal = NULL; + if (isOperatorEqTbnameCond(pCxt, (SOperatorNode*)pWhere, &pTableAlias, &pTbNameVal)) { + STableNode* pTable; + if (pTableAlias == NULL) { + pTable = (STableNode*)((SSelectStmt*)(pCxt->pCurrStmt))->pFromTable; + } else { + findTable(pCxt, pTableAlias, &pTable); + } + if (nodeType(pTable) == QUERY_NODE_REAL_TABLE) { + SEqCondTbNameTableInfo info = {0}; + strcpy(info.tbName, pTbNameVal); + info.pRealTable = (SRealTableNode*)pTable; + taosArrayPush(aTables, &info); + } + } + return TSDB_CODE_SUCCESS; +} + +static int32_t findEqualCondTbname(STranslateContext* pCxt, SNode* pWhere, SArray* aTables) { + if (nodeType(pWhere) == QUERY_NODE_OPERATOR) { + findEqCondTbNameInOperatorNode(pCxt, pWhere, aTables); + } else if (nodeType(pWhere) == QUERY_NODE_LOGIC_CONDITION && + ((SLogicConditionNode*)pWhere)->condType == LOGIC_COND_TYPE_AND) { + SNode* pTmpNode = NULL; + FOREACH(pTmpNode, ((SLogicConditionNode*)pWhere)->pParameterList) { + if (nodeType(pWhere) == QUERY_NODE_OPERATOR) { + findEqCondTbNameInOperatorNode(pCxt, pTmpNode, aTables); + } + } + } + return TSDB_CODE_SUCCESS; +} + +static int32_t setEqualTbnameTableVgroups(STranslateContext* pCxt, SSelectStmt* pSelect, SArray* aTables) { + int32_t code = TSDB_CODE_SUCCESS; + for (int i = 0; i < taosArrayGetSize(aTables); ++i) { + SEqCondTbNameTableInfo* pInfo = taosArrayGet(aTables, i); + char* dbName = pInfo->pRealTable->table.dbName; + SName snameTb; + toName(pCxt->pParseCxt->acctId, dbName, pInfo->tbName, &snameTb); + SVgroupInfo vgInfo; + bool bExists; + code = catalogGetCachedTableHashVgroup(pCxt->pParseCxt->pCatalog, &snameTb, &vgInfo, &bExists); + if (code == TSDB_CODE_SUCCESS && bExists) { + taosMemoryFree(pInfo->pRealTable->pVgroupList); + pInfo->pRealTable->pVgroupList = taosMemoryMalloc(sizeof(SVgroupsInfo) + sizeof(SVgroupInfo)); + pInfo->pRealTable->pVgroupList->numOfVgroups = 1; + pInfo->pRealTable->pVgroupList->vgroups[0] = vgInfo; + } + } + return TSDB_CODE_SUCCESS; +} + +static int32_t setTableVgroupsFromEqualTbnameCond(STranslateContext* pCxt, SSelectStmt* pSelect) { + int32_t code = TSDB_CODE_SUCCESS; + SArray* aTables = taosArrayInit(1, sizeof(SEqCondTbNameTableInfo)); + code = findEqualCondTbname(pCxt, pSelect->pWhere, aTables); + if (code == TSDB_CODE_SUCCESS) { + code = setEqualTbnameTableVgroups(pCxt, pSelect, aTables); + } + taosArrayDestroy(aTables); + return code; +} + static int32_t translateWhere(STranslateContext* pCxt, SSelectStmt* pSelect) { pCxt->currClause = SQL_CLAUSE_WHERE; int32_t code = translateExpr(pCxt, &pSelect->pWhere); @@ -3857,6 +3961,7 @@ static int32_t translateWhere(STranslateContext* pCxt, SSelectStmt* pSelect) { if (TSDB_CODE_SUCCESS == code && pSelect->timeRange.skey > pSelect->timeRange.ekey) { pSelect->isEmptyResult = true; } + setTableVgroupsFromEqualTbnameCond(pCxt, pSelect); return code; } From 15aedb8d8cd593518a2a91a2505c9197bf437ba4 Mon Sep 17 00:00:00 2001 From: slzhou Date: Fri, 27 Oct 2023 15:16:40 +0800 Subject: [PATCH 03/79] fix: fix bugs of pWhere is null --- source/libs/parser/src/parTranslater.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/source/libs/parser/src/parTranslater.c b/source/libs/parser/src/parTranslater.c index 15d0a1f511..171d9757c1 100644 --- a/source/libs/parser/src/parTranslater.c +++ b/source/libs/parser/src/parTranslater.c @@ -3961,7 +3961,9 @@ static int32_t translateWhere(STranslateContext* pCxt, SSelectStmt* pSelect) { if (TSDB_CODE_SUCCESS == code && pSelect->timeRange.skey > pSelect->timeRange.ekey) { pSelect->isEmptyResult = true; } - setTableVgroupsFromEqualTbnameCond(pCxt, pSelect); + if (pSelect->pWhere != NULL) { + setTableVgroupsFromEqualTbnameCond(pCxt, pSelect); + } return code; } From 701a148ad1be97343cd863d497853c8d6aa28df3 Mon Sep 17 00:00:00 2001 From: dapan1121 Date: Tue, 31 Oct 2023 19:59:36 +0800 Subject: [PATCH 04/79] feat: init merge operator --- source/libs/executor/src/mergeoperator.c | 207 +++++++++++++++++++++++ source/libs/executor/src/sortoperator.c | 108 +----------- 2 files changed, 214 insertions(+), 101 deletions(-) create mode 100755 source/libs/executor/src/mergeoperator.c diff --git a/source/libs/executor/src/mergeoperator.c b/source/libs/executor/src/mergeoperator.c new file mode 100755 index 0000000000..9650ac4cb5 --- /dev/null +++ b/source/libs/executor/src/mergeoperator.c @@ -0,0 +1,207 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include "executorInt.h" +#include "filter.h" +#include "operator.h" +#include "querytask.h" +#include "tdatablock.h" + +typedef struct SSortMergeInfo { + SArray* pSortInfo; + SSortHandle* pSortHandle; + STupleHandle* prefetchedTuple; +} SSortMergeInfo; + +typedef struct SNonSortMergeInfo { + +} SNonSortMergeInfo; + +typedef struct SColumnMergeInfo { + +} SColumnMergeInfo; + +typedef struct SMultiwayMergeOperatorInfo { + SOptrBasicInfo binfo; + union { + SSortMergeInfo sortMergeInfo; + SNonSortMergeInfo nsortMergeInfo; + SColumnMergeInfo colMergeInfo; + }; + int32_t bufPageSize; + uint32_t sortBufSize; // max buffer size for in-memory sort + SLimitInfo limitInfo; + SColMatchInfo matchInfo; + SSDataBlock* pInputBlock; + SSDataBlock* pIntermediateBlock; // to hold the intermediate result + int64_t startTs; // sort start time + bool groupMerge; + bool ignoreGroupId; + uint64_t groupId; + bool inputWithGroupId; +} SMultiwayMergeOperatorInfo; + +int32_t openMultiwayMergeOperator(SOperatorInfo* pOperator) { + SMultiwayMergeOperatorInfo* pInfo = pOperator->info; + SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo; + + if (OPTR_IS_OPENED(pOperator)) { + return TSDB_CODE_SUCCESS; + } + + pInfo->startTs = taosGetTimestampUs(); + int32_t numOfBufPage = pInfo->sortBufSize / pInfo->bufPageSize; + + pInfo->pSortHandle = tsortCreateSortHandle(pInfo->pSortInfo, SORT_MULTISOURCE_MERGE, pInfo->bufPageSize, numOfBufPage, + pInfo->pInputBlock, pTaskInfo->id.str, 0, 0, 0); + + tsortSetFetchRawDataFp(pInfo->pSortHandle, loadNextDataBlock, NULL, NULL); + tsortSetCompareGroupId(pInfo->pSortHandle, pInfo->groupMerge); + + for (int32_t i = 0; i < pOperator->numOfDownstream; ++i) { + SOperatorInfo* pDownstream = pOperator->pDownstream[i]; + if (pDownstream->operatorType == QUERY_NODE_PHYSICAL_PLAN_EXCHANGE) { + pDownstream->fpSet._openFn(pDownstream); + } + + SSortSource* ps = taosMemoryCalloc(1, sizeof(SSortSource)); + ps->param = pDownstream; + ps->onlyRef = true; + + tsortAddSource(pInfo->pSortHandle, ps); + } + + int32_t code = tsortOpen(pInfo->pSortHandle); + if (code != TSDB_CODE_SUCCESS) { + T_LONG_JMP(pTaskInfo->env, terrno); + } + + pOperator->cost.openCost = (taosGetTimestampUs() - pInfo->startTs) / 1000.0; + pOperator->status = OP_RES_TO_RETURN; + + OPTR_SET_OPENED(pOperator); + return TSDB_CODE_SUCCESS; +} + +SSDataBlock* doMultiwayMerge(SOperatorInfo* pOperator) { + if (pOperator->status == OP_EXEC_DONE) { + return NULL; + } + + SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo; + SMultiwayMergeOperatorInfo* pInfo = pOperator->info; + + int32_t code = pOperator->fpSet._openFn(pOperator); + if (code != TSDB_CODE_SUCCESS) { + T_LONG_JMP(pTaskInfo->env, code); + } + + qDebug("start to merge final sorted rows, %s", GET_TASKID(pTaskInfo)); + SSDataBlock* pBlock = getMultiwaySortedBlockData(pInfo->pSortHandle, pInfo->binfo.pRes, pInfo->matchInfo.pList, pOperator); + if (pBlock != NULL) { + pOperator->resultInfo.totalRows += pBlock->info.rows; + } else { + setOperatorCompleted(pOperator); + } + + return pBlock; +} + +void destroyMultiwayMergeOperatorInfo(void* param) { + SMultiwayMergeOperatorInfo* pInfo = (SMultiwayMergeOperatorInfo*)param; + pInfo->binfo.pRes = blockDataDestroy(pInfo->binfo.pRes); + pInfo->pInputBlock = blockDataDestroy(pInfo->pInputBlock); + pInfo->pIntermediateBlock = blockDataDestroy(pInfo->pIntermediateBlock); + + tsortDestroySortHandle(pInfo->pSortHandle); + taosArrayDestroy(pInfo->pSortInfo); + taosArrayDestroy(pInfo->matchInfo.pList); + + taosMemoryFreeClear(param); +} + +int32_t getMultiwayMergeExplainExecInfo(SOperatorInfo* pOptr, void** pOptrExplain, uint32_t* len) { + SSortExecInfo* pSortExecInfo = taosMemoryCalloc(1, sizeof(SSortExecInfo)); + + SMultiwayMergeOperatorInfo* pInfo = (SMultiwayMergeOperatorInfo*)pOptr->info; + + *pSortExecInfo = tsortGetSortExecInfo(pInfo->pSortHandle); + *pOptrExplain = pSortExecInfo; + + *len = sizeof(SSortExecInfo); + return TSDB_CODE_SUCCESS; +} + +SOperatorInfo* createMultiwayMergeOperatorInfo(SOperatorInfo** downStreams, size_t numStreams, + SMergePhysiNode* pMergePhyNode, SExecTaskInfo* pTaskInfo) { + SPhysiNode* pPhyNode = (SPhysiNode*)pMergePhyNode; + + SMultiwayMergeOperatorInfo* pInfo = taosMemoryCalloc(1, sizeof(SMultiwayMergeOperatorInfo)); + SOperatorInfo* pOperator = taosMemoryCalloc(1, sizeof(SOperatorInfo)); + SDataBlockDescNode* pDescNode = pPhyNode->pOutputDataBlockDesc; + + int32_t code = TSDB_CODE_SUCCESS; + if (pInfo == NULL || pOperator == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _error; + } + + initLimitInfo(pMergePhyNode->node.pLimit, pMergePhyNode->node.pSlimit, &pInfo->limitInfo); + pInfo->binfo.pRes = createDataBlockFromDescNode(pDescNode); + + int32_t rowSize = pInfo->binfo.pRes->info.rowSize; + int32_t numOfOutputCols = 0; + code = extractColMatchInfo(pMergePhyNode->pTargets, pDescNode, &numOfOutputCols, COL_MATCH_FROM_SLOT_ID, + &pInfo->matchInfo); + if (code != TSDB_CODE_SUCCESS) { + goto _error; + } + + SPhysiNode* pChildNode = (SPhysiNode*)nodesListGetNode(pPhyNode->pChildren, 0); + SSDataBlock* pInputBlock = createDataBlockFromDescNode(pChildNode->pOutputDataBlockDesc); + + initResultSizeInfo(&pOperator->resultInfo, 1024); + blockDataEnsureCapacity(pInfo->binfo.pRes, pOperator->resultInfo.capacity); + + pInfo->groupMerge = pMergePhyNode->groupSort; + pInfo->ignoreGroupId = pMergePhyNode->ignoreGroupId; + pInfo->pSortInfo = createSortInfo(pMergePhyNode->pMergeKeys); + pInfo->pInputBlock = pInputBlock; + size_t numOfCols = taosArrayGetSize(pInfo->binfo.pRes->pDataBlock); + pInfo->bufPageSize = getProperSortPageSize(rowSize, numOfCols); + pInfo->sortBufSize = pInfo->bufPageSize * (numStreams + 1); // one additional is reserved for merged result. + pInfo->binfo.inputTsOrder = pMergePhyNode->node.inputTsOrder; + pInfo->binfo.outputTsOrder = pMergePhyNode->node.outputTsOrder; + pInfo->inputWithGroupId = pMergePhyNode->inputWithGroupId; + + setOperatorInfo(pOperator, "MultiwayMergeOperator", QUERY_NODE_PHYSICAL_PLAN_MERGE, false, OP_NOT_OPENED, pInfo, pTaskInfo); + pOperator->fpSet = createOperatorFpSet(openMultiwayMergeOperator, doMultiwayMerge, NULL, + destroyMultiwayMergeOperatorInfo, optrDefaultBufFn, getMultiwayMergeExplainExecInfo, optrDefaultGetNextExtFn, NULL); + + code = appendDownstream(pOperator, downStreams, numStreams); + if (code != TSDB_CODE_SUCCESS) { + goto _error; + } + return pOperator; + +_error: + if (pInfo != NULL) { + destroyMultiwayMergeOperatorInfo(pInfo); + } + + pTaskInfo->code = code; + taosMemoryFree(pOperator); + return NULL; +} diff --git a/source/libs/executor/src/sortoperator.c b/source/libs/executor/src/sortoperator.c index ccef6640be..0ccdb2dd2b 100644 --- a/source/libs/executor/src/sortoperator.c +++ b/source/libs/executor/src/sortoperator.c @@ -675,27 +675,7 @@ _error: return NULL; } -//===================================================================================== -// Multiway Sort Merge operator -typedef struct SMultiwayMergeOperatorInfo { - SOptrBasicInfo binfo; - int32_t bufPageSize; - uint32_t sortBufSize; // max buffer size for in-memory sort - SLimitInfo limitInfo; - SArray* pSortInfo; - SSortHandle* pSortHandle; - SColMatchInfo matchInfo; - SSDataBlock* pInputBlock; - SSDataBlock* pIntermediateBlock; // to hold the intermediate result - int64_t startTs; // sort start time - bool groupSort; - bool ignoreGroupId; - uint64_t groupId; - STupleHandle* prefetchedTuple; - bool inputWithGroupId; -} SMultiwayMergeOperatorInfo; - -int32_t openMultiwayMergeOperator(SOperatorInfo* pOperator) { +int32_t openSortMergeOperator(SOperatorInfo* pOperator) { SMultiwayMergeOperatorInfo* pInfo = pOperator->info; SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo; @@ -710,7 +690,7 @@ int32_t openMultiwayMergeOperator(SOperatorInfo* pOperator) { pInfo->pInputBlock, pTaskInfo->id.str, 0, 0, 0); tsortSetFetchRawDataFp(pInfo->pSortHandle, loadNextDataBlock, NULL, NULL); - tsortSetCompareGroupId(pInfo->pSortHandle, pInfo->groupSort); + tsortSetCompareGroupId(pInfo->pSortHandle, pInfo->groupMerge); for (int32_t i = 0; i < pOperator->numOfDownstream; ++i) { SOperatorInfo* pDownstream = pOperator->pDownstream[i]; @@ -743,7 +723,7 @@ static void doGetSortedBlockData(SMultiwayMergeOperatorInfo* pInfo, SSortHandle* while (1) { STupleHandle* pTupleHandle = NULL; - if (pInfo->groupSort || pInfo->inputWithGroupId) { + if (pInfo->groupMerge || pInfo->inputWithGroupId) { if (pInfo->prefetchedTuple == NULL) { pTupleHandle = tsortNextTuple(pHandle); } else { @@ -764,7 +744,7 @@ static void doGetSortedBlockData(SMultiwayMergeOperatorInfo* pInfo, SSortHandle* break; } - if (pInfo->groupSort || pInfo->inputWithGroupId) { + if (pInfo->groupMerge || pInfo->inputWithGroupId) { uint64_t tupleGroupId = tsortGetGroupId(pTupleHandle); if (pInfo->groupId == 0 || pInfo->groupId == tupleGroupId) { appendOneRowToDataBlock(p, pTupleHandle); @@ -789,7 +769,7 @@ static void doGetSortedBlockData(SMultiwayMergeOperatorInfo* pInfo, SSortHandle* } } -SSDataBlock* getMultiwaySortedBlockData(SSortHandle* pHandle, SSDataBlock* pDataBlock, SArray* pColMatchInfo, +SSDataBlock* getSortMergeSortedBlockData(SSortHandle* pHandle, SSDataBlock* pDataBlock, SArray* pColMatchInfo, SOperatorInfo* pOperator) { SMultiwayMergeOperatorInfo* pInfo = pOperator->info; @@ -855,7 +835,7 @@ SSDataBlock* getMultiwaySortedBlockData(SSortHandle* pHandle, SSDataBlock* pData return (pDataBlock->info.rows > 0) ? pDataBlock : NULL; } -SSDataBlock* doMultiwayMerge(SOperatorInfo* pOperator) { +SSDataBlock* doSortMerge(SOperatorInfo* pOperator) { if (pOperator->status == OP_EXEC_DONE) { return NULL; } @@ -879,20 +859,7 @@ SSDataBlock* doMultiwayMerge(SOperatorInfo* pOperator) { return pBlock; } -void destroyMultiwayMergeOperatorInfo(void* param) { - SMultiwayMergeOperatorInfo* pInfo = (SMultiwayMergeOperatorInfo*)param; - pInfo->binfo.pRes = blockDataDestroy(pInfo->binfo.pRes); - pInfo->pInputBlock = blockDataDestroy(pInfo->pInputBlock); - pInfo->pIntermediateBlock = blockDataDestroy(pInfo->pIntermediateBlock); - - tsortDestroySortHandle(pInfo->pSortHandle); - taosArrayDestroy(pInfo->pSortInfo); - taosArrayDestroy(pInfo->matchInfo.pList); - - taosMemoryFreeClear(param); -} - -int32_t getMultiwayMergeExplainExecInfo(SOperatorInfo* pOptr, void** pOptrExplain, uint32_t* len) { +int32_t getSortMergeExplainExecInfo(SOperatorInfo* pOptr, void** pOptrExplain, uint32_t* len) { SSortExecInfo* pSortExecInfo = taosMemoryCalloc(1, sizeof(SSortExecInfo)); SMultiwayMergeOperatorInfo* pInfo = (SMultiwayMergeOperatorInfo*)pOptr->info; @@ -904,64 +871,3 @@ int32_t getMultiwayMergeExplainExecInfo(SOperatorInfo* pOptr, void** pOptrExplai return TSDB_CODE_SUCCESS; } -SOperatorInfo* createMultiwayMergeOperatorInfo(SOperatorInfo** downStreams, size_t numStreams, - SMergePhysiNode* pMergePhyNode, SExecTaskInfo* pTaskInfo) { - SPhysiNode* pPhyNode = (SPhysiNode*)pMergePhyNode; - - SMultiwayMergeOperatorInfo* pInfo = taosMemoryCalloc(1, sizeof(SMultiwayMergeOperatorInfo)); - SOperatorInfo* pOperator = taosMemoryCalloc(1, sizeof(SOperatorInfo)); - SDataBlockDescNode* pDescNode = pPhyNode->pOutputDataBlockDesc; - - int32_t code = TSDB_CODE_SUCCESS; - if (pInfo == NULL || pOperator == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - goto _error; - } - - initLimitInfo(pMergePhyNode->node.pLimit, pMergePhyNode->node.pSlimit, &pInfo->limitInfo); - pInfo->binfo.pRes = createDataBlockFromDescNode(pDescNode); - - int32_t rowSize = pInfo->binfo.pRes->info.rowSize; - int32_t numOfOutputCols = 0; - code = extractColMatchInfo(pMergePhyNode->pTargets, pDescNode, &numOfOutputCols, COL_MATCH_FROM_SLOT_ID, - &pInfo->matchInfo); - if (code != TSDB_CODE_SUCCESS) { - goto _error; - } - - SPhysiNode* pChildNode = (SPhysiNode*)nodesListGetNode(pPhyNode->pChildren, 0); - SSDataBlock* pInputBlock = createDataBlockFromDescNode(pChildNode->pOutputDataBlockDesc); - - initResultSizeInfo(&pOperator->resultInfo, 1024); - blockDataEnsureCapacity(pInfo->binfo.pRes, pOperator->resultInfo.capacity); - - pInfo->groupSort = pMergePhyNode->groupSort; - pInfo->ignoreGroupId = pMergePhyNode->ignoreGroupId; - pInfo->pSortInfo = createSortInfo(pMergePhyNode->pMergeKeys); - pInfo->pInputBlock = pInputBlock; - size_t numOfCols = taosArrayGetSize(pInfo->binfo.pRes->pDataBlock); - pInfo->bufPageSize = getProperSortPageSize(rowSize, numOfCols); - pInfo->sortBufSize = pInfo->bufPageSize * (numStreams + 1); // one additional is reserved for merged result. - pInfo->binfo.inputTsOrder = pMergePhyNode->node.inputTsOrder; - pInfo->binfo.outputTsOrder = pMergePhyNode->node.outputTsOrder; - pInfo->inputWithGroupId = pMergePhyNode->inputWithGroupId; - - setOperatorInfo(pOperator, "MultiwayMergeOperator", QUERY_NODE_PHYSICAL_PLAN_MERGE, false, OP_NOT_OPENED, pInfo, pTaskInfo); - pOperator->fpSet = createOperatorFpSet(openMultiwayMergeOperator, doMultiwayMerge, NULL, - destroyMultiwayMergeOperatorInfo, optrDefaultBufFn, getMultiwayMergeExplainExecInfo, optrDefaultGetNextExtFn, NULL); - - code = appendDownstream(pOperator, downStreams, numStreams); - if (code != TSDB_CODE_SUCCESS) { - goto _error; - } - return pOperator; - -_error: - if (pInfo != NULL) { - destroyMultiwayMergeOperatorInfo(pInfo); - } - - pTaskInfo->code = code; - taosMemoryFree(pOperator); - return NULL; -} From bab886b22984ae3ffd1ae529645783a72e7fb61a Mon Sep 17 00:00:00 2001 From: shenglian zhou Date: Fri, 3 Nov 2023 14:12:04 +0800 Subject: [PATCH 05/79] enhance: refact code --- source/libs/parser/src/parTranslater.c | 41 ++++++++++++++++---------- 1 file changed, 26 insertions(+), 15 deletions(-) diff --git a/source/libs/parser/src/parTranslater.c b/source/libs/parser/src/parTranslater.c index 15d0a1f511..59239c08b5 100644 --- a/source/libs/parser/src/parTranslater.c +++ b/source/libs/parser/src/parTranslater.c @@ -3886,35 +3886,46 @@ static bool isOperatorEqTbnameCond(STranslateContext* pCxt, SOperatorNode* pOper return true; } -static int32_t findEqCondTbNameInOperatorNode(STranslateContext* pCxt, SNode* pWhere, SArray* aTables) { +static bool findEqCondTbNameInOperatorNode(STranslateContext* pCxt, SNode* pWhere, SEqCondTbNameTableInfo* pInfo) { + int32_t code = TSDB_CODE_SUCCESS; char* pTableAlias = NULL; char* pTbNameVal = NULL; + if (isOperatorEqTbnameCond(pCxt, (SOperatorNode*)pWhere, &pTableAlias, &pTbNameVal)) { STableNode* pTable; if (pTableAlias == NULL) { pTable = (STableNode*)((SSelectStmt*)(pCxt->pCurrStmt))->pFromTable; } else { - findTable(pCxt, pTableAlias, &pTable); + code = findTable(pCxt, pTableAlias, &pTable); } - if (nodeType(pTable) == QUERY_NODE_REAL_TABLE) { - SEqCondTbNameTableInfo info = {0}; - strcpy(info.tbName, pTbNameVal); - info.pRealTable = (SRealTableNode*)pTable; - taosArrayPush(aTables, &info); + if (code == TSDB_CODE_SUCCESS && nodeType(pTable) == QUERY_NODE_REAL_TABLE) { + strcpy(pInfo->tbName, pTbNameVal); + pInfo->pRealTable = (SRealTableNode*)pTable; + return true; } } - return TSDB_CODE_SUCCESS; + return false; } static int32_t findEqualCondTbname(STranslateContext* pCxt, SNode* pWhere, SArray* aTables) { if (nodeType(pWhere) == QUERY_NODE_OPERATOR) { - findEqCondTbNameInOperatorNode(pCxt, pWhere, aTables); - } else if (nodeType(pWhere) == QUERY_NODE_LOGIC_CONDITION && - ((SLogicConditionNode*)pWhere)->condType == LOGIC_COND_TYPE_AND) { - SNode* pTmpNode = NULL; - FOREACH(pTmpNode, ((SLogicConditionNode*)pWhere)->pParameterList) { - if (nodeType(pWhere) == QUERY_NODE_OPERATOR) { - findEqCondTbNameInOperatorNode(pCxt, pTmpNode, aTables); + SEqCondTbNameTableInfo info = {0}; + bool bIsEqTbnameCond = findEqCondTbNameInOperatorNode(pCxt, pWhere, &info); + if (bIsEqTbnameCond) { + taosArrayPush(aTables, &info); + } + } else if (nodeType(pWhere) == QUERY_NODE_LOGIC_CONDITION) { + if (((SLogicConditionNode*)pWhere)->condType == LOGIC_COND_TYPE_AND) { + SNode* pTmpNode = NULL; + FOREACH(pTmpNode, ((SLogicConditionNode*)pWhere)->pParameterList) { + if (nodeType(pWhere) == QUERY_NODE_OPERATOR) { + SEqCondTbNameTableInfo info = {0}; + bool bIsEqTbnameCond = findEqCondTbNameInOperatorNode(pCxt, pTmpNode, &info); + if (bIsEqTbnameCond) { + taosArrayPush(aTables, &info); + break; + } + } } } } From 893f7cc080060d124d5e518c0f4a7199d480f867 Mon Sep 17 00:00:00 2001 From: shenglian zhou Date: Fri, 3 Nov 2023 16:35:43 +0800 Subject: [PATCH 06/79] enhance: prepare for or processing --- source/libs/parser/src/parTranslater.c | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/source/libs/parser/src/parTranslater.c b/source/libs/parser/src/parTranslater.c index 75581c80af..f1808ff911 100644 --- a/source/libs/parser/src/parTranslater.c +++ b/source/libs/parser/src/parTranslater.c @@ -3907,12 +3907,22 @@ static bool findEqCondTbNameInOperatorNode(STranslateContext* pCxt, SNode* pWher return false; } -static int32_t findEqualCondTbname(STranslateContext* pCxt, SNode* pWhere, SArray* aTables) { +static bool isTableExistInTableTbnames(SArray* aTableTbNames, SRealTableNode* pTable) { + for (int i = 0; i < taosArrayGetSize(aTableTbNames); ++i) { + SEqCondTbNameTableInfo* info = taosArrayGet(aTableTbNames, i); + if (info->pRealTable == pTable) { + return true; + } + } + return false; +} + +static int32_t findEqualCondTbname(STranslateContext* pCxt, SNode* pWhere, SArray* aTableTbnames) { if (nodeType(pWhere) == QUERY_NODE_OPERATOR) { SEqCondTbNameTableInfo info = {0}; bool bIsEqTbnameCond = findEqCondTbNameInOperatorNode(pCxt, pWhere, &info); if (bIsEqTbnameCond) { - taosArrayPush(aTables, &info); + taosArrayPush(aTableTbnames, &info); } } else if (nodeType(pWhere) == QUERY_NODE_LOGIC_CONDITION) { if (((SLogicConditionNode*)pWhere)->condType == LOGIC_COND_TYPE_AND) { @@ -3921,12 +3931,14 @@ static int32_t findEqualCondTbname(STranslateContext* pCxt, SNode* pWhere, SArra if (nodeType(pWhere) == QUERY_NODE_OPERATOR) { SEqCondTbNameTableInfo info = {0}; bool bIsEqTbnameCond = findEqCondTbNameInOperatorNode(pCxt, pTmpNode, &info); - if (bIsEqTbnameCond) { - taosArrayPush(aTables, &info); + if (bIsEqTbnameCond && !isTableExistInTableTbnames(aTableTbnames, info.pRealTable)) { + taosArrayPush(aTableTbnames, &info); break; } } } + } else if (((SLogicConditionNode*)pWhere)->condType == LOGIC_COND_TYPE_OR) { + // deal with or condition } } return TSDB_CODE_SUCCESS; From 88c8463e18be24103fae595b02dfbd9f144ce257 Mon Sep 17 00:00:00 2001 From: slzhou Date: Mon, 6 Nov 2023 11:06:22 +0800 Subject: [PATCH 07/79] fix: process logic or conditions --- source/libs/parser/src/parTranslater.c | 116 +++++++++++++++++++------ 1 file changed, 91 insertions(+), 25 deletions(-) diff --git a/source/libs/parser/src/parTranslater.c b/source/libs/parser/src/parTranslater.c index f1808ff911..139dd75ed2 100644 --- a/source/libs/parser/src/parTranslater.c +++ b/source/libs/parser/src/parTranslater.c @@ -3851,6 +3851,7 @@ static int32_t translatePartitionBy(STranslateContext* pCxt, SSelectStmt* pSelec typedef struct SEqCondTbNameTableInfo { SRealTableNode* pRealTable; char tbName[TSDB_TABLE_NAME_LEN]; + bool done; } SEqCondTbNameTableInfo; //[tableAlias.]tbname = tbNamVal @@ -3898,7 +3899,8 @@ static bool findEqCondTbNameInOperatorNode(STranslateContext* pCxt, SNode* pWher } else { code = findTable(pCxt, pTableAlias, &pTable); } - if (code == TSDB_CODE_SUCCESS && nodeType(pTable) == QUERY_NODE_REAL_TABLE) { + if (code == TSDB_CODE_SUCCESS && nodeType(pTable) == QUERY_NODE_REAL_TABLE && + ((SRealTableNode*)pTable)->pMeta && ((SRealTableNode*)pTable)->pMeta->tableType == TSDB_SUPER_TABLE) { strcpy(pInfo->tbName, pTbNameVal); pInfo->pRealTable = (SRealTableNode*)pTable; return true; @@ -3917,6 +3919,43 @@ static bool isTableExistInTableTbnames(SArray* aTableTbNames, SRealTableNode* pT return false; } +static void findEqualCondTbnameInLogicCondAnd(STranslateContext* pCxt, SNode* pWhere, SArray* aTableTbnames) { + SNode* pTmpNode = NULL; + FOREACH(pTmpNode, ((SLogicConditionNode*)pWhere)->pParameterList) { + if (nodeType(pTmpNode) == QUERY_NODE_OPERATOR) { + SEqCondTbNameTableInfo info = {0}; + bool bIsEqTbnameCond = findEqCondTbNameInOperatorNode(pCxt, pTmpNode, &info); + if (bIsEqTbnameCond && !isTableExistInTableTbnames(aTableTbnames, info.pRealTable)) { + taosArrayPush(aTableTbnames, &info); + break; + } + } + } +} + +static void findEqualCondTbnameInLogicCondOr(STranslateContext* pCxt, SNode* pWhere, SArray* aTableTbnames) { + bool bAllTbName = true; + SNode* pTmpNode = NULL; + FOREACH(pTmpNode, ((SLogicConditionNode*)pWhere)->pParameterList) { + if (nodeType(pTmpNode) == QUERY_NODE_OPERATOR) { + SEqCondTbNameTableInfo info = {0}; + bool bIsEqTbnameCond = findEqCondTbNameInOperatorNode(pCxt, pTmpNode, &info); + if (!bIsEqTbnameCond) { + bAllTbName = false; + break; + } else if (isTableExistInTableTbnames(aTableTbnames, info.pRealTable)) { + taosArrayPush(aTableTbnames, &info); + } + } else { + bAllTbName = false; + break; + } + } + if (!bAllTbName) { + taosArrayClear(aTableTbnames); + } +} + static int32_t findEqualCondTbname(STranslateContext* pCxt, SNode* pWhere, SArray* aTableTbnames) { if (nodeType(pWhere) == QUERY_NODE_OPERATOR) { SEqCondTbNameTableInfo info = {0}; @@ -3926,19 +3965,45 @@ static int32_t findEqualCondTbname(STranslateContext* pCxt, SNode* pWhere, SArra } } else if (nodeType(pWhere) == QUERY_NODE_LOGIC_CONDITION) { if (((SLogicConditionNode*)pWhere)->condType == LOGIC_COND_TYPE_AND) { - SNode* pTmpNode = NULL; - FOREACH(pTmpNode, ((SLogicConditionNode*)pWhere)->pParameterList) { - if (nodeType(pWhere) == QUERY_NODE_OPERATOR) { - SEqCondTbNameTableInfo info = {0}; - bool bIsEqTbnameCond = findEqCondTbNameInOperatorNode(pCxt, pTmpNode, &info); - if (bIsEqTbnameCond && !isTableExistInTableTbnames(aTableTbnames, info.pRealTable)) { - taosArrayPush(aTableTbnames, &info); - break; - } + findEqualCondTbnameInLogicCondAnd(pCxt, pWhere, aTableTbnames); + } else if (((SLogicConditionNode*)pWhere)->condType == LOGIC_COND_TYPE_OR) { + findEqualCondTbnameInLogicCondOr(pCxt, pWhere, aTableTbnames); + } + } + return TSDB_CODE_SUCCESS; +} + +static int32_t findVgroupsFromEqualTbname(STranslateContext* pCxt, SArray* aTables, int32_t start, SVgroupsInfo* vgsInfo) { + int32_t nVgroups = 0; + int32_t nTbls = taosArrayGetSize(aTables); + SEqCondTbNameTableInfo* pInfo1 = taosArrayGet(aTables, start); + + for (int j = start; j < nTbls; ++j) { + SEqCondTbNameTableInfo* pInfo = taosArrayGet(aTables, j); + if (pInfo->done || pInfo->pRealTable != pInfo1->pRealTable) { + continue; + } + char* dbName = pInfo->pRealTable->table.dbName; + SName snameTb; + toName(pCxt->pParseCxt->acctId, dbName, pInfo->tbName, &snameTb); + SVgroupInfo vgInfo; + bool bExists; + int32_t code = catalogGetCachedTableHashVgroup(pCxt->pParseCxt->pCatalog, &snameTb, &vgInfo, &bExists); + if (code == TSDB_CODE_SUCCESS && bExists) { + bool bFoundVg = false; + for (int32_t k = 0; k < nVgroups; ++k) { + if (vgsInfo->vgroups[k].vgId == vgInfo.vgId) { + bFoundVg = true; } } - } else if (((SLogicConditionNode*)pWhere)->condType == LOGIC_COND_TYPE_OR) { - // deal with or condition + if (!bFoundVg) { + vgsInfo->vgroups[nVgroups] = vgInfo; + ++nVgroups; + vgsInfo->numOfVgroups = nVgroups; + } + } else { + vgsInfo->numOfVgroups = 0; + break; } } return TSDB_CODE_SUCCESS; @@ -3946,20 +4011,21 @@ static int32_t findEqualCondTbname(STranslateContext* pCxt, SNode* pWhere, SArra static int32_t setEqualTbnameTableVgroups(STranslateContext* pCxt, SSelectStmt* pSelect, SArray* aTables) { int32_t code = TSDB_CODE_SUCCESS; - for (int i = 0; i < taosArrayGetSize(aTables); ++i) { - SEqCondTbNameTableInfo* pInfo = taosArrayGet(aTables, i); - char* dbName = pInfo->pRealTable->table.dbName; - SName snameTb; - toName(pCxt->pParseCxt->acctId, dbName, pInfo->tbName, &snameTb); - SVgroupInfo vgInfo; - bool bExists; - code = catalogGetCachedTableHashVgroup(pCxt->pParseCxt->pCatalog, &snameTb, &vgInfo, &bExists); - if (code == TSDB_CODE_SUCCESS && bExists) { - taosMemoryFree(pInfo->pRealTable->pVgroupList); - pInfo->pRealTable->pVgroupList = taosMemoryMalloc(sizeof(SVgroupsInfo) + sizeof(SVgroupInfo)); - pInfo->pRealTable->pVgroupList->numOfVgroups = 1; - pInfo->pRealTable->pVgroupList->vgroups[0] = vgInfo; + int32_t nTbls = taosArrayGetSize(aTables); + for (int i = 0; i < nTbls; ++i) { + SEqCondTbNameTableInfo* pInfo1 = taosArrayGet(aTables, i); + if (pInfo1->done) { + continue; } + SVgroupsInfo* vgsInfo = taosMemoryMalloc(sizeof(SVgroupsInfo) + nTbls * sizeof(SVgroupInfo)); + int32_t nVgroups = 0; + findVgroupsFromEqualTbname(pCxt, aTables, i, vgsInfo); + if (vgsInfo->numOfVgroups != 0) { + taosMemoryFree(pInfo1->pRealTable->pVgroupList); + pInfo1->pRealTable->pVgroupList = vgsInfo; + } else { + taosMemoryFree(vgsInfo); + } } return TSDB_CODE_SUCCESS; } From 6136b1d018b405c5fc9b629a57df2efa0efd3eef Mon Sep 17 00:00:00 2001 From: slzhou Date: Mon, 6 Nov 2023 13:49:01 +0800 Subject: [PATCH 08/79] fix: fix tbname or error --- source/libs/parser/src/parTranslater.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/source/libs/parser/src/parTranslater.c b/source/libs/parser/src/parTranslater.c index 1aa0052007..398d31d750 100644 --- a/source/libs/parser/src/parTranslater.c +++ b/source/libs/parser/src/parTranslater.c @@ -4020,7 +4020,7 @@ static void findEqualCondTbnameInLogicCondOr(STranslateContext* pCxt, SNode* pWh if (!bIsEqTbnameCond) { bAllTbName = false; break; - } else if (isTableExistInTableTbnames(aTableTbnames, info.pRealTable)) { + } else { taosArrayPush(aTableTbnames, &info); } } else { @@ -4066,6 +4066,7 @@ static int32_t findVgroupsFromEqualTbname(STranslateContext* pCxt, SArray* aTabl SVgroupInfo vgInfo; bool bExists; int32_t code = catalogGetCachedTableHashVgroup(pCxt->pParseCxt->pCatalog, &snameTb, &vgInfo, &bExists); + pInfo->done = true; if (code == TSDB_CODE_SUCCESS && bExists) { bool bFoundVg = false; for (int32_t k = 0; k < nVgroups; ++k) { From 41fa664916e2f642531fc4f6ea758940f162199c Mon Sep 17 00:00:00 2001 From: slzhou Date: Mon, 6 Nov 2023 14:52:34 +0800 Subject: [PATCH 09/79] enhance: add test case --- tests/parallel_test/cases.task | 1 + tests/system-test/2-query/tbname_vgroup.py | 118 +++++++++++++++++++++ 2 files changed, 119 insertions(+) create mode 100644 tests/system-test/2-query/tbname_vgroup.py diff --git a/tests/parallel_test/cases.task b/tests/parallel_test/cases.task index 21dcd16441..e00ac4e089 100644 --- a/tests/parallel_test/cases.task +++ b/tests/parallel_test/cases.task @@ -21,6 +21,7 @@ ,,y,system-test,./pytest.sh python3 ./test.py -f 8-stream/partition_interval.py ,,y,system-test,./pytest.sh python3 ./test.py -f 8-stream/pause_resume_test.py +,,y,system-test,./pytest.sh python3 ./test.py -f 2-query/tbname_vgroup.py ,,y,system-test,./pytest.sh python3 ./test.py -f 2-query/stbJoin.py ,,y,system-test,./pytest.sh python3 ./test.py -f 2-query/stbJoin.py -Q 2 ,,y,system-test,./pytest.sh python3 ./test.py -f 2-query/stbJoin.py -Q 3 diff --git a/tests/system-test/2-query/tbname_vgroup.py b/tests/system-test/2-query/tbname_vgroup.py new file mode 100644 index 0000000000..dedf473929 --- /dev/null +++ b/tests/system-test/2-query/tbname_vgroup.py @@ -0,0 +1,118 @@ +import sys +from util.log import * +from util.cases import * +from util.sql import * +from util.dnodes import tdDnodes +from math import inf + +class TDTestCase: + def caseDescription(self): + ''' + case1: [TD-] + ''' + return + + def init(self, conn, logSql, replicaVer=1): + tdLog.debug("start to execute %s" % __file__) + tdSql.init(conn.cursor(), True) + self.conn = conn + + def restartTaosd(self, index=1, dbname="db"): + tdDnodes.stop(index) + tdDnodes.startWithoutSleep(index) + tdSql.execute(f"use tbname_vgroup") + + def run(self): + print("running {}".format(__file__)) + tdSql.execute("drop database if exists tbname_vgroup") + tdSql.execute("create database if not exists tbname_vgroup") + tdSql.execute('use tbname_vgroup') + tdSql.execute('create database dbvg vgroups 8;') + + tdSql.execute('use dbvg;') + + tdSql.execute('create table st(ts timestamp, f int) tags (t int);') + + tdSql.execute("insert into ct1 using st tags(1) values('2021-04-19 00:00:01', 1)") + + tdSql.execute("insert into ct2 using st tags(2) values('2021-04-19 00:00:02', 2)") + + tdSql.execute("insert into ct3 using st tags(3) values('2021-04-19 00:00:03', 3)") + + tdSql.execute("insert into ct4 using st tags(4) values('2021-04-19 00:00:04', 4)") + + tdSql.query("select * from st where tbname='ct1'") + tdSql.checkRows(1) + tdSql.checkData(0, 0, datetime.datetime(2021, 4, 19, 0, 0, 1)) + tdSql.checkData(0, 1, 1) + tdSql.checkData(0, 2, 1) + + tdSql.query("select * from st where tbname='ct3'") + tdSql.checkRows(1) + tdSql.checkData(0, 0, datetime.datetime(2021, 4, 19, 0, 0, 3)) + tdSql.checkData(0, 1, 3) + tdSql.checkData(0, 2, 3) + + tdSql.query("select * from st where tbname='ct3' and f=2") + tdSql.checkRows(0) + + tdSql.query("select * from st where tbname='ct1' and tbname='ct4'") + tdSql.checkRows(0) + + tdSql.query("select * from st where tbname='ct1' or tbname='ct4' order by ts") + tdSql.checkRows(2) + tdSql.checkData(0, 0, datetime.datetime(2021, 4, 19, 0, 0, 1)) + tdSql.checkData(0, 1, 1) + tdSql.checkData(0, 2, 1) + tdSql.checkData(1, 0, datetime.datetime(2021, 4, 19, 0, 0, 4)) + tdSql.checkData(1, 1, 4) + tdSql.checkData(1, 2, 4) + + tdSql.query("select * from st where tbname='ct2' or tbname='ct3' order by ts") + tdSql.checkRows(2) + tdSql.checkData(0, 0, datetime.datetime(2021, 4, 19, 0, 0, 2)) + tdSql.checkData(0, 1, 2) + tdSql.checkData(0, 2, 2) + tdSql.checkData(1, 0, datetime.datetime(2021, 4, 19, 0, 0, 3)) + tdSql.checkData(1, 1, 3) + tdSql.checkData(1, 2, 3) + + tdSql.query("select * from st where tbname='ct1' or tbname='ct4' or tbname='ct3' or tbname='ct2' order by ts") + tdSql.checkRows(4) + tdSql.checkData(0, 0, datetime.datetime(2021, 4, 19, 0, 0, 1)) + tdSql.checkData(0, 1, 1) + tdSql.checkData(0, 2, 1) + tdSql.checkData(1, 0, datetime.datetime(2021, 4, 19, 0, 0, 2)) + tdSql.checkData(1, 1, 2) + tdSql.checkData(1, 2, 2) + tdSql.checkData(2, 0, datetime.datetime(2021, 4, 19, 0, 0, 3)) + tdSql.checkData(2, 1, 3) + tdSql.checkData(2, 2, 3) + tdSql.checkData(3, 0, datetime.datetime(2021, 4, 19, 0, 0, 4)) + tdSql.checkData(3, 1, 4) + tdSql.checkData(3, 2, 4) + + tdSql.query("select * from st where tbname='ct4' or 1=1 order by ts") + tdSql.checkRows(4) + tdSql.checkData(0, 0, datetime.datetime(2021, 4, 19, 0, 0, 1)) + tdSql.checkData(0, 1, 1) + tdSql.checkData(0, 2, 1) + tdSql.checkData(1, 0, datetime.datetime(2021, 4, 19, 0, 0, 2)) + tdSql.checkData(1, 1, 2) + tdSql.checkData(1, 2, 2) + tdSql.checkData(2, 0, datetime.datetime(2021, 4, 19, 0, 0, 3)) + tdSql.checkData(2, 1, 3) + tdSql.checkData(2, 2, 3) + tdSql.checkData(3, 0, datetime.datetime(2021, 4, 19, 0, 0, 4)) + tdSql.checkData(3, 1, 4) + tdSql.checkData(3, 2, 4) + + tdSql.execute('drop database dbvg;') + + tdSql.execute('drop database tbname_vgroup') + def stop(self): + tdSql.close() + tdLog.success("%s successfully executed" % __file__) + +tdCases.addWindows(__file__, TDTestCase()) +tdCases.addLinux(__file__, TDTestCase()) From 04a1f2ef4da0aec0d2a79668d9f81e45a5b5f3cb Mon Sep 17 00:00:00 2001 From: wangjiaming0909 <604227650@qq.com> Date: Fri, 3 Nov 2023 16:22:09 +0800 Subject: [PATCH 10/79] feat: cache scan for select last(c), ts/c --- source/dnode/vnode/src/tsdb/tsdbCacheRead.c | 19 +- source/libs/executor/src/cachescanoperator.c | 12 +- source/libs/function/src/builtins.c | 2 +- source/libs/planner/src/planOptimizer.c | 76 ++++- tests/parallel_test/cases.task | 4 + tests/system-test/2-query/last_cache_scan.py | 279 +++++++++++++++++++ 6 files changed, 378 insertions(+), 14 deletions(-) create mode 100644 tests/system-test/2-query/last_cache_scan.py diff --git a/source/dnode/vnode/src/tsdb/tsdbCacheRead.c b/source/dnode/vnode/src/tsdb/tsdbCacheRead.c index e9e848f1b0..e4a91b73d5 100644 --- a/source/dnode/vnode/src/tsdb/tsdbCacheRead.c +++ b/source/dnode/vnode/src/tsdb/tsdbCacheRead.c @@ -28,13 +28,16 @@ static int32_t saveOneRow(SArray* pRow, SSDataBlock* pBlock, SCacheRowsReader* p // bool allNullRow = true; if (HASTYPE(pReader->type, CACHESCAN_RETRIEVE_LAST)) { + uint64_t ts = 0; + SFirstLastRes* p; for (int32_t i = 0; i < pReader->numOfCols; ++i) { SColumnInfoData* pColInfoData = taosArrayGet(pBlock->pDataBlock, dstSlotIds[i]); - SFirstLastRes* p = (SFirstLastRes*)varDataVal(pRes[i]); int32_t slotId = slotIds[i]; SLastCol* pColVal = (SLastCol*)taosArrayGet(pRow, i); + p = (SFirstLastRes*)varDataVal(pRes[i]); p->ts = pColVal->ts; + ts = p->ts; p->isNull = !COL_VAL_IS_VALUE(&pColVal->colVal); // allNullRow = p->isNull & allNullRow; @@ -55,6 +58,20 @@ static int32_t saveOneRow(SArray* pRow, SSDataBlock* pBlock, SCacheRowsReader* p varDataSetLen(pRes[i], pColInfoData->info.bytes - VARSTR_HEADER_SIZE); colDataSetVal(pColInfoData, numOfRows, (const char*)pRes[i], false); } + for (int32_t idx = 0; idx < taosArrayGetSize(pBlock->pDataBlock); ++idx) { + SColumnInfoData* pCol = taosArrayGet(pBlock->pDataBlock, idx); + if (pCol->info.type == TSDB_DATA_TYPE_TIMESTAMP) { + colDataSetVal(pCol, numOfRows, (const char*)&ts, false); + continue; + } + if (pReader->numOfCols == 1 && dstSlotIds[0] != idx) { + if (!p->isNull) { + colDataSetVal(pCol, numOfRows, p->buf, false); + } else { + colDataSetNULL(pCol, numOfRows); + } + } + } // pBlock->info.rows += allNullRow ? 0 : 1; ++pBlock->info.rows; diff --git a/source/libs/executor/src/cachescanoperator.c b/source/libs/executor/src/cachescanoperator.c index 29d098494b..a7b4fe02f6 100644 --- a/source/libs/executor/src/cachescanoperator.c +++ b/source/libs/executor/src/cachescanoperator.c @@ -191,9 +191,9 @@ SSDataBlock* doScanCache(SOperatorInfo* pOperator) { SSDataBlock* pRes = pInfo->pRes; if (pInfo->indexOfBufferedRes < pInfo->pBufferredRes->info.rows) { - for (int32_t i = 0; i < taosArrayGetSize(pInfo->matchInfo.pList); ++i) { - SColMatchItem* pMatchInfo = taosArrayGet(pInfo->matchInfo.pList, i); - int32_t slotId = pMatchInfo->dstSlotId; + for (int32_t i = 0; i < taosArrayGetSize(pInfo->pBufferredRes->pDataBlock); ++i) { + SColumnInfoData* pCol = taosArrayGet(pInfo->pBufferredRes->pDataBlock, i); + int32_t slotId = pCol->info.slotId; SColumnInfoData* pSrc = taosArrayGet(pInfo->pBufferredRes->pDataBlock, slotId); SColumnInfoData* pDst = taosArrayGet(pRes->pDataBlock, slotId); @@ -201,8 +201,10 @@ SSDataBlock* doScanCache(SOperatorInfo* pOperator) { if (colDataIsNull_s(pSrc, pInfo->indexOfBufferedRes)) { colDataSetNULL(pDst, 0); } else { - char* p = colDataGetData(pSrc, pInfo->indexOfBufferedRes); - colDataSetVal(pDst, 0, p, false); + if (pSrc->pData) { + char* p = colDataGetData(pSrc, pInfo->indexOfBufferedRes); + colDataSetVal(pDst, 0, p, false); + } } } diff --git a/source/libs/function/src/builtins.c b/source/libs/function/src/builtins.c index 84aff9fa88..74b7218591 100644 --- a/source/libs/function/src/builtins.c +++ b/source/libs/function/src/builtins.c @@ -2772,7 +2772,7 @@ const SBuiltinFuncDefinition funcMgtBuiltins[] = { { .name = "_cache_last", .type = FUNCTION_TYPE_CACHE_LAST, - .classification = FUNC_MGT_AGG_FUNC | FUNC_MGT_MULTI_RES_FUNC | FUNC_MGT_FORBID_STREAM_FUNC | FUNC_MGT_FORBID_SYSTABLE_FUNC, + .classification = FUNC_MGT_AGG_FUNC | FUNC_MGT_MULTI_RES_FUNC | FUNC_MGT_SELECT_FUNC | FUNC_MGT_FORBID_STREAM_FUNC | FUNC_MGT_FORBID_SYSTABLE_FUNC, .translateFunc = translateFirstLast, .getEnvFunc = getFirstLastFuncEnv, .initFunc = functionSetup, diff --git a/source/libs/planner/src/planOptimizer.c b/source/libs/planner/src/planOptimizer.c index 8d4c042960..0b3a432bec 100644 --- a/source/libs/planner/src/planOptimizer.c +++ b/source/libs/planner/src/planOptimizer.c @@ -2478,6 +2478,27 @@ static bool hasSuitableCache(int8_t cacheLastMode, bool hasLastRow, bool hasLast return false; } +/// @brief check if we can apply last row scan optimization +/// @param lastColNum how many distinct last col specified +/// @param lastColId only used when lastColNum equals 1, the col id of the only one last col +/// @param selectNonPKColNum num of normal cols +/// @param selectNonPKColId only used when selectNonPKColNum equals 1, the col id of the only one select col +static bool lastRowScanOptCheckColNum(int32_t lastColNum, col_id_t lastColId, + int32_t selectNonPKColNum, col_id_t selectNonPKColId) { + // multi select non pk col + last func: select c1, c2, last(c1) + if (selectNonPKColNum > 1 && lastColNum > 0) return false; + + if (selectNonPKColNum == 1) { + // select last(c1), last(c2), c1 ... + // which is not possible currently + if (lastColNum > 1) return false; + + // select last(c1), c2 ... + if (lastColNum == 1 && lastColId != selectNonPKColId) return false; + } + return true; +} + static bool lastRowScanOptMayBeOptimized(SLogicNode* pNode) { if (QUERY_NODE_LOGIC_PLAN_AGG != nodeType(pNode) || 1 != LIST_LENGTH(pNode->pChildren) || QUERY_NODE_LOGIC_PLAN_SCAN != nodeType(nodesListGetNode(pNode->pChildren, 0))) { @@ -2493,9 +2514,10 @@ static bool lastRowScanOptMayBeOptimized(SLogicNode* pNode) { return false; } - bool hasLastFunc = false; - bool hasSelectFunc = false; - SNode* pFunc = NULL; + bool hasNonPKSelectFunc = false; + SNode* pFunc = NULL; + int32_t lastColNum = 0, selectNonPKColNum = 0; + col_id_t lastColId = -1, selectNonPKColId = -1; FOREACH(pFunc, ((SAggLogicNode*)pNode)->pAggFuncs) { SFunctionNode* pAggFunc = (SFunctionNode*)pFunc; if (FUNCTION_TYPE_LAST == pAggFunc->funcType) { @@ -2505,16 +2527,33 @@ static bool lastRowScanOptMayBeOptimized(SLogicNode* pNode) { if (pCol->colType != COLUMN_TYPE_COLUMN) { return false; } + if (lastColId != pCol->colId) { + lastColId = pCol->colId; + lastColNum++; + } } - if (hasSelectFunc || QUERY_NODE_VALUE == nodeType(nodesListGetNode(pAggFunc->pParameterList, 0))) { + if (QUERY_NODE_VALUE == nodeType(nodesListGetNode(pAggFunc->pParameterList, 0))) { return false; } - hasLastFunc = true; + if (!lastRowScanOptCheckColNum(lastColNum, lastColId, selectNonPKColNum, selectNonPKColId)) + return false; } else if (FUNCTION_TYPE_SELECT_VALUE == pAggFunc->funcType) { - if (hasLastFunc) { + SNode* pParam = nodesListGetNode(pAggFunc->pParameterList, 0); + if (QUERY_NODE_COLUMN == nodeType(pParam)) { + SColumnNode* pCol = (SColumnNode*)pParam; + if (PRIMARYKEY_TIMESTAMP_COL_ID != pCol->colId) { + if (selectNonPKColId != pCol->colId) { + selectNonPKColId = pCol->colId; + selectNonPKColNum++; + } + } else { + continue; + } + } else if (lastColNum > 0) { return false; } - hasSelectFunc = true; + if (!lastRowScanOptCheckColNum(lastColNum, lastColId, selectNonPKColNum, selectNonPKColId)) + return false; } else if (FUNCTION_TYPE_GROUP_KEY == pAggFunc->funcType) { if (!lastRowScanOptLastParaIsTag(nodesListGetNode(pAggFunc->pParameterList, 0))) { return false; @@ -2581,6 +2620,9 @@ static int32_t lastRowScanOptimize(SOptimizeContext* pCxt, SLogicSubplan* pLogic SLastRowScanOptSetColDataTypeCxt cxt = {.doAgg = true, .pLastCols = NULL}; SNode* pNode = NULL; + SColumnNode* pPKTsCol = NULL; + SColumnNode* pNonPKCol = NULL; + FOREACH(pNode, pAgg->pAggFuncs) { SFunctionNode* pFunc = (SFunctionNode*)pNode; int32_t funcType = pFunc->funcType; @@ -2597,6 +2639,16 @@ static int32_t lastRowScanOptimize(SOptimizeContext* pCxt, SLogicSubplan* pLogic nodesWalkExpr(nodesListGetNode(pFunc->pParameterList, 0), lastRowScanOptSetColDataType, &cxt); nodesListErase(pFunc->pParameterList, nodesListGetCell(pFunc->pParameterList, 1)); } + } else if (FUNCTION_TYPE_SELECT_VALUE) { + pNode = nodesListGetNode(pFunc->pParameterList, 0); + if (nodeType(pNode) == QUERY_NODE_COLUMN) { + SColumnNode* pCol = (SColumnNode*)pNode; + if (pCol->colId == PRIMARYKEY_TIMESTAMP_COL_ID) { + pPKTsCol = pCol; + } else { + pNonPKCol = pCol; + } + } } } @@ -2608,6 +2660,16 @@ static int32_t lastRowScanOptimize(SOptimizeContext* pCxt, SLogicSubplan* pLogic lastRowScanOptSetLastTargets(pScan->pScanCols, cxt.pLastCols, true); nodesWalkExprs(pScan->pScanPseudoCols, lastRowScanOptSetColDataType, &cxt); lastRowScanOptSetLastTargets(pScan->node.pTargets, cxt.pLastCols, false); + if (pPKTsCol && pScan->node.pTargets->length == 1) { + // when select last(ts),ts from ..., we add another ts to targets + sprintf(pPKTsCol->colName, "#sel_val.%p", pPKTsCol); + nodesListAppend(pScan->node.pTargets, nodesCloneNode((SNode*)pPKTsCol)); + } + if (pNonPKCol && cxt.pLastCols->length == 1 && nodesEqualNode((SNode*)pNonPKCol, nodesListGetNode(cxt.pLastCols, 0))) { + // when select last(c1), c1 from ..., we add c1 to targets + sprintf(pNonPKCol->colName, "#sel_val.%p", pNonPKCol); + nodesListAppend(pScan->node.pTargets, nodesCloneNode((SNode*)pNonPKCol)); + } nodesClearList(cxt.pLastCols); } pAgg->hasLastRow = false; diff --git a/tests/parallel_test/cases.task b/tests/parallel_test/cases.task index 15cb1f034f..2795c22a07 100644 --- a/tests/parallel_test/cases.task +++ b/tests/parallel_test/cases.task @@ -69,6 +69,10 @@ ,,y,system-test,./pytest.sh python3 ./test.py -f 2-query/func_to_char_timestamp.py -Q 2 ,,y,system-test,./pytest.sh python3 ./test.py -f 2-query/func_to_char_timestamp.py -Q 3 ,,y,system-test,./pytest.sh python3 ./test.py -f 2-query/func_to_char_timestamp.py -Q 4 +,,y,system-test,./pytest.sh python3 ./test.py -f 2-query/last_cache_scan.py +,,y,system-test,./pytest.sh python3 ./test.py -f 2-query/last_cache_scan.py -Q 2 +,,y,system-test,./pytest.sh python3 ./test.py -f 2-query/last_cache_scan.py -Q 3 +,,y,system-test,./pytest.sh python3 ./test.py -f 2-query/last_cache_scan.py -Q 4 ,,y,system-test,./pytest.sh python3 ./test.py -f 7-tmq/tmqShow.py ,,y,system-test,./pytest.sh python3 ./test.py -f 7-tmq/tmqDropStb.py ,,y,system-test,./pytest.sh python3 ./test.py -f 7-tmq/subscribeStb0.py diff --git a/tests/system-test/2-query/last_cache_scan.py b/tests/system-test/2-query/last_cache_scan.py new file mode 100644 index 0000000000..fb5c8bcee2 --- /dev/null +++ b/tests/system-test/2-query/last_cache_scan.py @@ -0,0 +1,279 @@ +import taos +import sys +import time +import socket +import os +import threading +import math +from datetime import datetime + +from util.log import * +from util.sql import * +from util.cases import * +from util.dnodes import * +from util.common import * +# from tmqCommon import * + +COMPARE_DATA = 0 +COMPARE_LEN = 1 + +class TDTestCase: + def __init__(self): + self.vgroups = 4 + self.ctbNum = 10 + self.rowsPerTbl = 10000 + self.duraion = '1h' + + def init(self, conn, logSql, replicaVar=1): + self.replicaVar = int(replicaVar) + tdLog.debug(f"start to excute {__file__}") + tdSql.init(conn.cursor(), False) + + def create_database(self,tsql, dbName,dropFlag=1,vgroups=2,replica=1, duration:str='1d'): + if dropFlag == 1: + tsql.execute("drop database if exists %s"%(dbName)) + + tsql.execute("create database if not exists %s vgroups %d replica %d duration %s CACHEMODEL 'both'"%(dbName, vgroups, replica, duration)) + tdLog.debug("complete to create database %s"%(dbName)) + return + + def create_stable(self,tsql, paraDict): + colString = tdCom.gen_column_type_str(colname_prefix=paraDict["colPrefix"], column_elm_list=paraDict["colSchema"]) + tagString = tdCom.gen_tag_type_str(tagname_prefix=paraDict["tagPrefix"], tag_elm_list=paraDict["tagSchema"]) + sqlString = f"create table if not exists %s.%s (%s) tags (%s)"%(paraDict["dbName"], paraDict["stbName"], colString, tagString) + tdLog.debug("%s"%(sqlString)) + tsql.execute(sqlString) + return + + def create_ctable(self,tsql=None, dbName='dbx',stbName='stb',ctbPrefix='ctb',ctbNum=1,ctbStartIdx=0): + for i in range(ctbNum): + sqlString = "create table %s.%s%d using %s.%s tags(%d, 'tb%d', 'tb%d', %d, %d, %d)" % \ + (dbName,ctbPrefix,i+ctbStartIdx,dbName,stbName,(i+ctbStartIdx) % 5,i+ctbStartIdx,i+ctbStartIdx,i+ctbStartIdx,i+ctbStartIdx,i+ctbStartIdx) + tsql.execute(sqlString) + + tdLog.debug("complete to create %d child tables by %s.%s" %(ctbNum, dbName, stbName)) + return + + def insert_data(self,tsql,dbName,ctbPrefix,ctbNum,rowsPerTbl,batchNum,startTs,tsStep): + tdLog.debug("start to insert data ............") + tsql.execute("use %s" %dbName) + pre_insert = "insert into " + sql = pre_insert + + for i in range(ctbNum): + rowsBatched = 0 + sql += " %s%d values "%(ctbPrefix,i) + for j in range(rowsPerTbl): + if (i < ctbNum/2): + sql += "(%d, %d, %d, %d,%d,%d,%d,true,'binary%d', 'nchar%d') "%(startTs + j*tsStep, j%1000, j%500, j%1000, j%5000, j%5400, j%128, j%10000, j%1000) + else: + sql += "(%d, %d, NULL, %d,NULL,%d,%d,true,'binary%d', 'nchar%d') "%(startTs + j*tsStep, j%1000, j%500, j%1000, j%128, j%10000, j%1000) + rowsBatched += 1 + if ((rowsBatched == batchNum) or (j == rowsPerTbl - 1)): + tsql.execute(sql) + rowsBatched = 0 + if j < rowsPerTbl - 1: + sql = "insert into %s%d values " %(ctbPrefix,i) + else: + sql = "insert into " + if sql != pre_insert: + tsql.execute(sql) + tdLog.debug("insert data ............ [OK]") + return + + def prepareTestEnv(self): + tdLog.printNoPrefix("======== prepare test env include database, stable, ctables, and insert data: ") + paraDict = {'dbName': 'test', + 'dropFlag': 1, + 'vgroups': 2, + 'stbName': 'meters', + 'colPrefix': 'c', + 'tagPrefix': 't', + 'colSchema': [{'type': 'INT', 'count':1}, + {'type': 'BIGINT', 'count':1}, + {'type': 'FLOAT', 'count':1}, + {'type': 'DOUBLE', 'count':1}, + {'type': 'smallint', 'count':1}, + {'type': 'tinyint', 'count':1}, + {'type': 'bool', 'count':1}, + {'type': 'binary', 'len':10, 'count':1}, + {'type': 'nchar', 'len':10, 'count':1}], + 'tagSchema': [{'type': 'INT', 'count':1},{'type': 'nchar', 'len':20, 'count':1},{'type': 'binary', 'len':20, 'count':1},{'type': 'BIGINT', 'count':1},{'type': 'smallint', 'count':1},{'type': 'DOUBLE', 'count':1}], + 'ctbPrefix': 't', + 'ctbStartIdx': 0, + 'ctbNum': 100, + 'rowsPerTbl': 10000, + 'batchNum': 3000, + 'startTs': 1537146000000, + 'tsStep': 600000} + + paraDict['vgroups'] = self.vgroups + paraDict['ctbNum'] = self.ctbNum + paraDict['rowsPerTbl'] = self.rowsPerTbl + + tdLog.info("create database") + self.create_database(tsql=tdSql, dbName=paraDict["dbName"], dropFlag=paraDict["dropFlag"], vgroups=paraDict["vgroups"], replica=self.replicaVar, duration=self.duraion) + + tdLog.info("create stb") + self.create_stable(tsql=tdSql, paraDict=paraDict) + + tdLog.info("create child tables") + self.create_ctable(tsql=tdSql, dbName=paraDict["dbName"], \ + stbName=paraDict["stbName"],ctbPrefix=paraDict["ctbPrefix"],\ + ctbNum=paraDict["ctbNum"],ctbStartIdx=paraDict["ctbStartIdx"]) + self.insert_data(tsql=tdSql, dbName=paraDict["dbName"],\ + ctbPrefix=paraDict["ctbPrefix"],ctbNum=paraDict["ctbNum"],\ + rowsPerTbl=paraDict["rowsPerTbl"],batchNum=paraDict["batchNum"],\ + startTs=paraDict["startTs"],tsStep=paraDict["tsStep"]) + return + + def check_explain_res_has_row(self, plan_str_expect: str, rows, sql): + plan_found = False + for row in rows: + if str(row).find(plan_str_expect) >= 0: + tdLog.debug("plan: [%s] found in: [%s]" % (plan_str_expect, str(row))) + plan_found = True + break + if not plan_found: + tdLog.exit("plan: %s not found in res: [%s] in sql: %s" % (plan_str_expect, str(rows), sql)) + + def check_explain_res_no_row(self, plan_str_not_expect: str, res, sql): + for row in res: + if str(row).find(plan_str_not_expect) >= 0: + tdLog.exit('plan: [%s] found in: [%s] for sql: %s' % (plan_str_not_expect, str(row), sql)) + + def explain_sql(self, sql: str): + sql = "explain verbose true " + sql + tdSql.query(sql, queryTimes=1) + return tdSql.queryResult + + def explain_and_check_res(self, sqls, hasLastRowScanRes): + for sql, has_last in zip(sqls, hasLastRowScanRes): + res = self.explain_sql(sql) + if has_last == 1: + self.check_explain_res_has_row("Last Row Scan", res, sql) + else: + self.check_explain_res_no_row("Last Row Scan", res, sql) + + def format_sqls(self, sql_template, select_items): + sqls = [] + for item in select_items: + sqls.append(sql_template % item) + return sqls + + def query_check_one(self, sql, res_expect): + if res_expect is not None: + tdSql.query(sql, queryTimes=1) + tdSql.checkRows(1) + for i in range(0, tdSql.queryCols): + tdSql.checkData(0, i, res_expect[i]) + tdLog.info('%s check res col: %d succeed value: %s' % (sql, i, str(res_expect[i]))) + + def query_check_sqls(self, sqls, has_last_row_scan_res, res_expect): + for sql, has_last, res in zip(sqls, has_last_row_scan_res, res_expect): + if has_last == 1: + self.query_check_one(sql, res) + + def test_last_cache_scan(self): + sql_template = 'select %s from meters' + select_items = [ + "last(ts), ts", "last(ts), c1", "last(ts), c2", "last(ts), c3",\ + "last(ts), c4", "last(ts), tbname", "last(ts), t1", "last(ts), ts, ts"] + has_last_row_scan_res = [1, 0, 0, 0, 0, 0, 0, 1] + res_expect = [ + ["2018-11-25 19:30:00.000", "2018-11-25 19:30:00.000"], + None, None, None, None, None, None, + ["2018-11-25 19:30:00.000", "2018-11-25 19:30:00.000", "2018-11-25 19:30:00.000"] + ] + sqls = self.format_sqls(sql_template, select_items) + self.explain_and_check_res(sqls, has_last_row_scan_res) + self.query_check_sqls(sqls, has_last_row_scan_res, res_expect) + + select_items = ["last(c1),ts", "last(c1), c1", "last(c1), c2", "last(c1), c3",\ + "last(c1), c4", "last(c1), tbname", "last(c1), t1", "last(c1), ts, ts", "last(c1), c1, c1"] + has_last_row_scan_res = [1, 1, 0, 0, 0, 0, 0, 1, 1] + res_expect = [ + [999, "2018-11-25 19:30:00.000"], + [999, 999], None, None, None, None, None, + [999, "2018-11-25 19:30:00.000", "2018-11-25 19:30:00.000"], + [999,999,999] + ] + sqls = self.format_sqls(sql_template, select_items) + self.explain_and_check_res(sqls, has_last_row_scan_res) + self.query_check_sqls(sqls, has_last_row_scan_res, res_expect) + + select_items = ["last(c4),ts", "last(c4), c1", "last(c4), c2", "last(c4), c3",\ + "last(c4), c4", "last(c4), tbname", "last(c4), t1"] + has_last_row_scan_res = [1, 0, 0, 0, 1, 0, 0] + res_expect = [ + [4999.000000000000000, "2018-11-25 19:30:00.000"], + None,None,None, + [4999.000000000000000, 4999.000000000000000] + ] + sqls = self.format_sqls(sql_template, select_items) + self.explain_and_check_res(sqls, has_last_row_scan_res) + self.query_check_sqls(sqls, has_last_row_scan_res, res_expect) + + select_items = ["last(c8), ts", "last(c8), c1", "last(c8), c8", "last(c8), tbname", \ + "last(c8), t1", "last(c8), c8, c8", "last(c8), ts, ts"] + has_last_row_scan_res = [1, 0, 1, 0, 0, 1, 1] + res_expect = [ + ["binary9999", "2018-11-25 19:30:00.000"], + None, + ["binary9999", "binary9999"], + None, None, + ["binary9999", "binary9999", "binary9999"], + ["binary9999", "2018-11-25 19:30:00.000", "2018-11-25 19:30:00.000"] + ] + sqls = self.format_sqls(sql_template, select_items) + self.explain_and_check_res(sqls, has_last_row_scan_res) + self.query_check_sqls(sqls, has_last_row_scan_res, res_expect) + + # c2, c4 in last row of t5,t6,t7,t8,t9 will always be NULL + sql_template = 'select %s from t5' + select_items = ["last(c4), ts", "last(c4), c4", "last(c4), c4, c4", "last(c4), ts, ts"] + has_last_row_scan_res = [1,1,1,1] + + sqls = self.format_sqls(sql_template, select_items) + self.explain_and_check_res(sqls, has_last_row_scan_res) + for sql in sqls: + tdSql.query(sql, queryTimes=1) + tdSql.checkRows(0) + + sql_template = 'select %s from meters' + select_items = [ + "last_row(ts), last(ts)", + "last_row(c1), last(c1)", + "last_row(c1), c1,c3, ts" + ] + has_last_row_scan_res = [0,0,1] + sqls = self.format_sqls(sql_template, select_items) + self.explain_and_check_res(sqls, has_last_row_scan_res) + #res_expect = [None, None, [999, 999, 499, "2018-11-25 19:30:00.000"]] + #self.query_check_sqls(sqls, has_last_row_scan_res, res_expect) + + sql = "select last(c1), c1, c1+1, c1+2, ts from meters" + res = self.explain_sql(sql) + self.check_explain_res_has_row("Last Row Scan", res, sql) + + tdSql.query(sql) + tdSql.checkRows(1) + tdSql.checkData(0, 0, 999) + tdSql.checkData(0, 1, 999) + tdSql.checkData(0, 2, 1000) + tdSql.checkData(0, 3, 1001) + tdSql.checkData(0, 4, "2018-11-25 19:30:00.000") + + def run(self): + self.prepareTestEnv() + #time.sleep(99999999) + self.test_last_cache_scan() + + def stop(self): + tdSql.close() + tdLog.success(f"{__file__} successfully executed") + +event = threading.Event() + +tdCases.addLinux(__file__, TDTestCase()) +tdCases.addWindows(__file__, TDTestCase()) From 0ed4d1507f35c0c17916ee36b234b2617da4ed95 Mon Sep 17 00:00:00 2001 From: dapan1121 Date: Tue, 7 Nov 2023 09:23:06 +0800 Subject: [PATCH 11/79] feat: support non_sort mode --- include/common/tcommon.h | 5 + include/libs/nodes/plannodes.h | 10 + source/libs/command/inc/commandInt.h | 7 +- source/libs/command/src/explain.c | 52 ++- source/libs/executor/src/mergejoinoperator.c | 2 +- source/libs/executor/src/mergeoperator.c | 419 ++++++++++++++++--- source/libs/executor/src/sortoperator.c | 194 --------- source/libs/nodes/src/nodesCloneFuncs.c | 1 + source/libs/nodes/src/nodesCodeFuncs.c | 7 + source/libs/nodes/src/nodesMsgFuncs.c | 7 + source/libs/planner/inc/planInt.h | 1 + source/libs/planner/src/planPhysiCreater.c | 54 ++- source/libs/planner/src/planSpliter.c | 21 +- source/libs/planner/src/planValidator.c | 161 +++++++ source/libs/planner/src/planner.c | 3 + 15 files changed, 636 insertions(+), 308 deletions(-) create mode 100755 source/libs/planner/src/planValidator.c diff --git a/include/common/tcommon.h b/include/common/tcommon.h index 72aab9adf0..c7b5858409 100644 --- a/include/common/tcommon.h +++ b/include/common/tcommon.h @@ -364,6 +364,11 @@ typedef struct SSortExecInfo { int32_t readBytes; // read io bytes } SSortExecInfo; +typedef struct SNonSortExecInfo { + +} SNonSortExecInfo; + + typedef struct STUidTagInfo { char* name; uint64_t uid; diff --git a/include/libs/nodes/plannodes.h b/include/libs/nodes/plannodes.h index dbbe1d92dc..b1f2c4390c 100644 --- a/include/libs/nodes/plannodes.h +++ b/include/libs/nodes/plannodes.h @@ -40,6 +40,13 @@ typedef enum EGroupAction { GROUP_ACTION_CLEAR } EGroupAction; +typedef enum EMergeType { + MERGE_TYPE_SORT = 1, + MERGE_TYPE_NON_SORT, + MERGE_TYPE_COLUMNS, + MERGE_TYPE_MAX_VALUE +} EMergeType; + typedef struct SLogicNode { ENodeType type; bool dynamicOp; @@ -221,6 +228,8 @@ typedef struct SMergeLogicNode { SNodeList* pInputs; int32_t numOfChannels; int32_t srcGroupId; + bool colsMerge; + bool needSort; bool groupSort; bool ignoreGroupId; bool inputWithGroupId; @@ -531,6 +540,7 @@ typedef struct SExchangePhysiNode { typedef struct SMergePhysiNode { SPhysiNode node; + EMergeType type; SNodeList* pMergeKeys; SNodeList* pTargets; int32_t numOfChannels; diff --git a/source/libs/command/inc/commandInt.h b/source/libs/command/inc/commandInt.h index c704eb3951..1171e386d1 100644 --- a/source/libs/command/inc/commandInt.h +++ b/source/libs/command/inc/commandInt.h @@ -37,7 +37,7 @@ extern "C" { #define EXPLAIN_TABLE_COUNT_SCAN_FORMAT "Table Count Row Scan on %s" #define EXPLAIN_PROJECTION_FORMAT "Projection" #define EXPLAIN_JOIN_FORMAT "%s" -#define EXPLAIN_AGG_FORMAT "Aggragate" +#define EXPLAIN_AGG_FORMAT "%s" #define EXPLAIN_INDEF_ROWS_FORMAT "Indefinite Rows Function" #define EXPLAIN_EXCHANGE_FORMAT "Data Exchange %d:1" #define EXPLAIN_SORT_FORMAT "Sort" @@ -85,7 +85,8 @@ extern "C" { #define EXPLAIN_COLUMNS_FORMAT "columns=%d" #define EXPLAIN_PSEUDO_COLUMNS_FORMAT "pseudo_columns=%d" #define EXPLAIN_WIDTH_FORMAT "width=%d" -#define EXPLAIN_TABLE_SCAN_FORMAT "order=[asc|%d desc|%d]" +#define EXPLAIN_SCAN_ORDER_FORMAT "order=[asc|%d desc|%d]" +#define EXPLAIN_SCAN_MODE_FORMAT "mode=%s" #define EXPLAIN_GROUPS_FORMAT "groups=%d" #define EXPLAIN_WIDTH_FORMAT "width=%d" #define EXPLAIN_INTERVAL_VALUE_FORMAT "interval=%" PRId64 "%c" @@ -105,6 +106,7 @@ extern "C" { #define EXPLAIN_UID_SLOT_FORMAT "uid_slot=%d,%d" #define EXPLAIN_SRC_SCAN_FORMAT "src_scan=%d,%d" #define EXPLAIN_PLAN_BLOCKING "blocking=%d" +#define EXPLAIN_MERGE_MODE_FORMAT "mode=%s" #define COMMAND_RESET_LOG "resetLog" #define COMMAND_SCHEDULE_POLICY "schedulePolicy" @@ -157,6 +159,7 @@ typedef struct SExplainCtx { #define EXPLAIN_ORDER_STRING(_order) ((ORDER_ASC == _order) ? "asc" : ORDER_DESC == _order ? "desc" : "unknown") #define EXPLAIN_JOIN_STRING(_type) ((JOIN_TYPE_INNER == _type) ? "Inner join" : "Join") +#define EXPLAIN_MERGE_MODE_STRING(_mode) ((_mode) == MERGE_TYPE_SORT ? "sort" : ((_mode) == MERGE_TYPE_NON_SORT ? "merge" : "column")) #define INVERAL_TIME_FROM_PRECISION_TO_UNIT(_t, _u, _p) (((_u) == 'n' || (_u) == 'y') ? (_t) : (convertTimeFromPrecisionToUnit(_t, _p, _u))) diff --git a/source/libs/command/src/explain.c b/source/libs/command/src/explain.c index 27cfaab3cf..185e23590a 100644 --- a/source/libs/command/src/explain.c +++ b/source/libs/command/src/explain.c @@ -284,10 +284,34 @@ int32_t qExplainResAppendRow(SExplainCtx *ctx, char *tbuf, int32_t len, int32_t return TSDB_CODE_SUCCESS; } -static uint8_t getIntervalPrecision(SIntervalPhysiNode *pIntNode) { +static uint8_t qExplainGetIntervalPrecision(SIntervalPhysiNode *pIntNode) { return ((SColumnNode *)pIntNode->window.pTspk)->node.resType.precision; } +static char* qExplainGetScanMode(STableScanPhysiNode* pScan) { + bool isGroupByTbname = false; + bool isGroupByTag = false; + bool seq = false; + bool groupOrder = false; + if (pScan->pGroupTags && LIST_LENGTH(pScan->pGroupTags) == 1) { + SNode* p = nodesListGetNode(pScan->pGroupTags, 0); + if (QUERY_NODE_FUNCTION == nodeType(p) && (strcmp(((struct SFunctionNode*)p)->functionName, "tbname") == 0)) { + isGroupByTbname = true; + } + } + + isGroupByTag = (NULL != pScan->pGroupTags) && !isGroupByTbname; + if ((((!isGroupByTag) || isGroupByTbname) && pScan->groupSort) || (isGroupByTag && (pScan->groupSort || pScan->scan.groupOrderScan))) { + return "seq_grp_order"; + } + + if ((isGroupByTbname && (pScan->groupSort || pScan->scan.groupOrderScan)) || (isGroupByTag && (pScan->groupSort || pScan->scan.groupOrderScan))) { + return "grp_order"; + } + + return "ts_order"; +} + int32_t qExplainResNodeToRowsImpl(SExplainResNode *pResNode, SExplainCtx *ctx, int32_t level) { int32_t tlen = 0; bool isVerboseLine = false; @@ -360,7 +384,9 @@ int32_t qExplainResNodeToRowsImpl(SExplainResNode *pResNode, SExplainCtx *ctx, i } EXPLAIN_ROW_APPEND(EXPLAIN_WIDTH_FORMAT, pTblScanNode->scan.node.pOutputDataBlockDesc->totalRowSize); EXPLAIN_ROW_APPEND(EXPLAIN_BLANK_FORMAT); - EXPLAIN_ROW_APPEND(EXPLAIN_TABLE_SCAN_FORMAT, pTblScanNode->scanSeq[0], pTblScanNode->scanSeq[1]); + EXPLAIN_ROW_APPEND(EXPLAIN_SCAN_ORDER_FORMAT, pTblScanNode->scanSeq[0], pTblScanNode->scanSeq[1]); + EXPLAIN_ROW_APPEND(EXPLAIN_BLANK_FORMAT); + EXPLAIN_ROW_APPEND(EXPLAIN_SCAN_MODE_FORMAT, qExplainGetScanMode(pTblScanNode)); EXPLAIN_ROW_APPEND(EXPLAIN_RIGHT_PARENTHESIS_FORMAT); EXPLAIN_ROW_END(); QRY_ERR_RET(qExplainResAppendRow(ctx, tbuf, tlen, level)); @@ -599,7 +625,7 @@ int32_t qExplainResNodeToRowsImpl(SExplainResNode *pResNode, SExplainCtx *ctx, i } case QUERY_NODE_PHYSICAL_PLAN_HASH_AGG: { SAggPhysiNode *pAggNode = (SAggPhysiNode *)pNode; - EXPLAIN_ROW_NEW(level, EXPLAIN_AGG_FORMAT); + EXPLAIN_ROW_NEW(level, EXPLAIN_AGG_FORMAT, (pAggNode->pGroupKeys ? "Group" : "Aggragate")); EXPLAIN_ROW_APPEND(EXPLAIN_LEFT_PARENTHESIS_FORMAT); if (pResNode->pExecInfo) { QRY_ERR_RET(qExplainBufAppendExecInfo(pResNode->pExecInfo, tbuf, &tlen)); @@ -841,7 +867,7 @@ int32_t qExplainResNodeToRowsImpl(SExplainResNode *pResNode, SExplainCtx *ctx, i EXPLAIN_ROW_APPEND_SLIMIT(pIntNode->window.node.pSlimit); EXPLAIN_ROW_END(); QRY_ERR_RET(qExplainResAppendRow(ctx, tbuf, tlen, level + 1)); - uint8_t precision = getIntervalPrecision(pIntNode); + uint8_t precision = qExplainGetIntervalPrecision(pIntNode); EXPLAIN_ROW_NEW(level + 1, EXPLAIN_TIME_WINDOWS_FORMAT, INVERAL_TIME_FROM_PRECISION_TO_UNIT(pIntNode->interval, pIntNode->intervalUnit, precision), pIntNode->intervalUnit, pIntNode->offset, getPrecisionUnit(precision), @@ -893,7 +919,7 @@ int32_t qExplainResNodeToRowsImpl(SExplainResNode *pResNode, SExplainCtx *ctx, i EXPLAIN_ROW_APPEND_SLIMIT(pIntNode->window.node.pSlimit); EXPLAIN_ROW_END(); QRY_ERR_RET(qExplainResAppendRow(ctx, tbuf, tlen, level + 1)); - uint8_t precision = getIntervalPrecision(pIntNode); + uint8_t precision = qExplainGetIntervalPrecision(pIntNode); EXPLAIN_ROW_NEW(level + 1, EXPLAIN_TIME_WINDOWS_FORMAT, INVERAL_TIME_FROM_PRECISION_TO_UNIT(pIntNode->interval, pIntNode->intervalUnit, precision), pIntNode->intervalUnit, pIntNode->offset, getPrecisionUnit(precision), @@ -1119,23 +1145,13 @@ int32_t qExplainResNodeToRowsImpl(SExplainResNode *pResNode, SExplainCtx *ctx, i EXPLAIN_ROW_APPEND(EXPLAIN_INPUT_ORDER_FORMAT, EXPLAIN_ORDER_STRING(pMergeNode->node.inputTsOrder)); EXPLAIN_ROW_APPEND(EXPLAIN_BLANK_FORMAT); EXPLAIN_ROW_APPEND(EXPLAIN_OUTPUT_ORDER_TYPE_FORMAT, EXPLAIN_ORDER_STRING(pMergeNode->node.outputTsOrder)); + EXPLAIN_ROW_APPEND(EXPLAIN_BLANK_FORMAT); + EXPLAIN_ROW_APPEND(EXPLAIN_MERGE_MODE_FORMAT, EXPLAIN_MERGE_MODE_STRING(pMergeNode->type)); EXPLAIN_ROW_APPEND(EXPLAIN_RIGHT_PARENTHESIS_FORMAT); EXPLAIN_ROW_END(); QRY_ERR_RET(qExplainResAppendRow(ctx, tbuf, tlen, level)); if (EXPLAIN_MODE_ANALYZE == ctx->mode) { - // sort key - EXPLAIN_ROW_NEW(level + 1, "Merge Key: "); - if (pResNode->pExecInfo) { - for (int32_t i = 0; i < LIST_LENGTH(pMergeNode->pMergeKeys); ++i) { - SOrderByExprNode *ptn = (SOrderByExprNode *)nodesListGetNode(pMergeNode->pMergeKeys, i); - EXPLAIN_ROW_APPEND("%s ", nodesGetNameFromColumnNode(ptn->pExpr)); - } - } - - EXPLAIN_ROW_END(); - QRY_ERR_RET(qExplainResAppendRow(ctx, tbuf, tlen, level)); - // sort method EXPLAIN_ROW_NEW(level + 1, "Sort Method: "); @@ -1419,7 +1435,7 @@ int32_t qExplainResNodeToRowsImpl(SExplainResNode *pResNode, SExplainCtx *ctx, i EXPLAIN_ROW_APPEND_SLIMIT(pIntNode->window.node.pSlimit); EXPLAIN_ROW_END(); QRY_ERR_RET(qExplainResAppendRow(ctx, tbuf, tlen, level + 1)); - uint8_t precision = getIntervalPrecision(pIntNode); + uint8_t precision = qExplainGetIntervalPrecision(pIntNode); EXPLAIN_ROW_NEW(level + 1, EXPLAIN_TIME_WINDOWS_FORMAT, INVERAL_TIME_FROM_PRECISION_TO_UNIT(pIntNode->interval, pIntNode->intervalUnit, precision), pIntNode->intervalUnit, pIntNode->offset, getPrecisionUnit(precision), diff --git a/source/libs/executor/src/mergejoinoperator.c b/source/libs/executor/src/mergejoinoperator.c index 2348a3c97b..b4461f20b1 100644 --- a/source/libs/executor/src/mergejoinoperator.c +++ b/source/libs/executor/src/mergejoinoperator.c @@ -239,7 +239,7 @@ SOperatorInfo* createMergeJoinOperatorInfo(SOperatorInfo** pDownstream, int32_t numOfDownstream = 2; } else { pInfo->downstreamResBlkId[0] = getOperatorResultBlockId(pDownstream[0], 0); - pInfo->downstreamResBlkId[1] = getOperatorResultBlockId(pDownstream[1], 1); + pInfo->downstreamResBlkId[1] = getOperatorResultBlockId(pDownstream[1], 0); } int32_t numOfCols = 0; diff --git a/source/libs/executor/src/mergeoperator.c b/source/libs/executor/src/mergeoperator.c index 9650ac4cb5..204a9458b8 100755 --- a/source/libs/executor/src/mergeoperator.c +++ b/source/libs/executor/src/mergeoperator.c @@ -23,52 +23,57 @@ typedef struct SSortMergeInfo { SArray* pSortInfo; SSortHandle* pSortHandle; STupleHandle* prefetchedTuple; + int32_t bufPageSize; + uint32_t sortBufSize; // max buffer size for in-memory sort + SSDataBlock* pIntermediateBlock; // to hold the intermediate result + SSDataBlock* pInputBlock; + SColMatchInfo matchInfo; } SSortMergeInfo; typedef struct SNonSortMergeInfo { - + int32_t lastSourceIdx; + int32_t sourceWorkIdx; + int32_t sourceNum; + int32_t* pSourceStatus; } SNonSortMergeInfo; -typedef struct SColumnMergeInfo { - -} SColumnMergeInfo; +typedef struct SColsMergeInfo { + uint64_t srcBlkIds[2]; +} SColsMergeInfo; typedef struct SMultiwayMergeOperatorInfo { SOptrBasicInfo binfo; + EMergeType type; union { SSortMergeInfo sortMergeInfo; SNonSortMergeInfo nsortMergeInfo; - SColumnMergeInfo colMergeInfo; + SColsMergeInfo colsMergeInfo; }; - int32_t bufPageSize; - uint32_t sortBufSize; // max buffer size for in-memory sort SLimitInfo limitInfo; - SColMatchInfo matchInfo; - SSDataBlock* pInputBlock; - SSDataBlock* pIntermediateBlock; // to hold the intermediate result - int64_t startTs; // sort start time bool groupMerge; bool ignoreGroupId; uint64_t groupId; bool inputWithGroupId; } SMultiwayMergeOperatorInfo; -int32_t openMultiwayMergeOperator(SOperatorInfo* pOperator) { +SSDataBlock* sortMergeloadNextDataBlock(void* param) { + SOperatorInfo* pOperator = (SOperatorInfo*)param; + SSDataBlock* pBlock = pOperator->fpSet.getNextFn(pOperator); + return pBlock; +} + +int32_t openSortMergeOperator(SOperatorInfo* pOperator) { SMultiwayMergeOperatorInfo* pInfo = pOperator->info; SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo; + SSortMergeInfo* pSortMergeInfo = &pInfo->sortMergeInfo; - if (OPTR_IS_OPENED(pOperator)) { - return TSDB_CODE_SUCCESS; - } + int32_t numOfBufPage = pSortMergeInfo->sortBufSize / pSortMergeInfo->bufPageSize; - pInfo->startTs = taosGetTimestampUs(); - int32_t numOfBufPage = pInfo->sortBufSize / pInfo->bufPageSize; + pSortMergeInfo->pSortHandle = tsortCreateSortHandle(pSortMergeInfo->pSortInfo, SORT_MULTISOURCE_MERGE, pSortMergeInfo->bufPageSize, numOfBufPage, + pSortMergeInfo->pInputBlock, pTaskInfo->id.str, 0, 0, 0); - pInfo->pSortHandle = tsortCreateSortHandle(pInfo->pSortInfo, SORT_MULTISOURCE_MERGE, pInfo->bufPageSize, numOfBufPage, - pInfo->pInputBlock, pTaskInfo->id.str, 0, 0, 0); - - tsortSetFetchRawDataFp(pInfo->pSortHandle, loadNextDataBlock, NULL, NULL); - tsortSetCompareGroupId(pInfo->pSortHandle, pInfo->groupMerge); + tsortSetFetchRawDataFp(pSortMergeInfo->pSortHandle, sortMergeloadNextDataBlock, NULL, NULL); + tsortSetCompareGroupId(pSortMergeInfo->pSortHandle, pInfo->groupMerge); for (int32_t i = 0; i < pOperator->numOfDownstream; ++i) { SOperatorInfo* pDownstream = pOperator->pDownstream[i]; @@ -80,19 +85,280 @@ int32_t openMultiwayMergeOperator(SOperatorInfo* pOperator) { ps->param = pDownstream; ps->onlyRef = true; - tsortAddSource(pInfo->pSortHandle, ps); + tsortAddSource(pSortMergeInfo->pSortHandle, ps); } - int32_t code = tsortOpen(pInfo->pSortHandle); + return tsortOpen(pSortMergeInfo->pSortHandle); +} + +static void doGetSortedBlockData(SMultiwayMergeOperatorInfo* pInfo, SSortHandle* pHandle, int32_t capacity, + SSDataBlock* p, bool* newgroup) { + SSortMergeInfo* pSortMergeInfo = &pInfo->sortMergeInfo; + *newgroup = false; + + while (1) { + STupleHandle* pTupleHandle = NULL; + if (pInfo->groupMerge || pInfo->inputWithGroupId) { + if (pSortMergeInfo->prefetchedTuple == NULL) { + pTupleHandle = tsortNextTuple(pHandle); + } else { + pTupleHandle = pSortMergeInfo->prefetchedTuple; + pSortMergeInfo->prefetchedTuple = NULL; + uint64_t gid = tsortGetGroupId(pTupleHandle); + if (gid != pInfo->groupId) { + *newgroup = true; + pInfo->groupId = gid; + } + } + } else { + pTupleHandle = tsortNextTuple(pHandle); + pInfo->groupId = 0; + } + + if (pTupleHandle == NULL) { + break; + } + + if (pInfo->groupMerge || pInfo->inputWithGroupId) { + uint64_t tupleGroupId = tsortGetGroupId(pTupleHandle); + if (pInfo->groupId == 0 || pInfo->groupId == tupleGroupId) { + appendOneRowToDataBlock(p, pTupleHandle); + p->info.id.groupId = tupleGroupId; + pInfo->groupId = tupleGroupId; + } else { + if (p->info.rows == 0) { + appendOneRowToDataBlock(p, pTupleHandle); + p->info.id.groupId = pInfo->groupId = tupleGroupId; + } else { + pSortMergeInfo->prefetchedTuple = pTupleHandle; + break; + } + } + } else { + appendOneRowToDataBlock(p, pTupleHandle); + } + + if (p->info.rows >= capacity) { + break; + } + } +} + +SSDataBlock* doSortMerge(SOperatorInfo* pOperator) { + SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo; + SMultiwayMergeOperatorInfo* pInfo = pOperator->info; + SSortMergeInfo* pSortMergeInfo = &pInfo->sortMergeInfo; + SSortHandle* pHandle = pSortMergeInfo->pSortHandle; + SSDataBlock* pDataBlock = pInfo->binfo.pRes; + SArray* pColMatchInfo = pInfo->matchInfo.pList; + int32_t capacity = pOperator->resultInfo.capacity; + + qDebug("start to merge final sorted rows, %s", GET_TASKID(pTaskInfo)); + + blockDataCleanup(pDataBlock); + + if (pSortMergeInfo->pIntermediateBlock == NULL) { + pSortMergeInfo->pIntermediateBlock = tsortGetSortedDataBlock(pHandle); + if (pSortMergeInfo->pIntermediateBlock == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + return NULL; + } + blockDataEnsureCapacity(pSortMergeInfo->pIntermediateBlock, capacity); + } else { + blockDataCleanup(pSortMergeInfo->pIntermediateBlock); + } + + SSDataBlock* p = pSortMergeInfo->pIntermediateBlock; + bool newgroup = false; + + while (1) { + doGetSortedBlockData(pInfo, pHandle, capacity, p, &newgroup); + if (p->info.rows == 0) { + break; + } + + if (newgroup) { + resetLimitInfoForNextGroup(&pInfo->limitInfo); + } + + applyLimitOffset(&pInfo->limitInfo, p, pTaskInfo); + + if (p->info.rows > 0) { + break; + } + } + + if (p->info.rows > 0) { + int32_t numOfCols = taosArrayGetSize(pColMatchInfo); + for (int32_t i = 0; i < numOfCols; ++i) { + SColMatchItem* pmInfo = taosArrayGet(pColMatchInfo, i); + + SColumnInfoData* pSrc = taosArrayGet(p->pDataBlock, pmInfo->srcSlotId); + SColumnInfoData* pDst = taosArrayGet(pDataBlock->pDataBlock, pmInfo->dstSlotId); + colDataAssign(pDst, pSrc, p->info.rows, &pDataBlock->info); + } + + pDataBlock->info.rows = p->info.rows; + pDataBlock->info.scanFlag = p->info.scanFlag; + if (pInfo->ignoreGroupId) { + pDataBlock->info.id.groupId = 0; + } else { + pDataBlock->info.id.groupId = pInfo->groupId; + } + pDataBlock->info.dataLoad = 1; + } + + qDebug("%s get sorted block, groupId:0x%" PRIx64 " rows:%" PRId64 , GET_TASKID(pTaskInfo), pDataBlock->info.id.groupId, + pDataBlock->info.rows); + + return (pDataBlock->info.rows > 0) ? pDataBlock : NULL; +} + + +int32_t getSortMergeExplainExecInfo(SOperatorInfo* pOptr, void** pOptrExplain, uint32_t* len) { + SSortExecInfo* pSortExecInfo = taosMemoryCalloc(1, sizeof(SSortExecInfo)); + + SMultiwayMergeOperatorInfo* pInfo = (SMultiwayMergeOperatorInfo*)pOptr->info; + SSortMergeInfo* pSortMergeInfo = &pInfo->sortMergeInfo; + + *pSortExecInfo = tsortGetSortExecInfo(pSortMergeInfo->pSortHandle); + *pOptrExplain = pSortExecInfo; + + *len = sizeof(SSortExecInfo); + return TSDB_CODE_SUCCESS; +} + + +void destroySortMergeOperatorInfo(void* param) { + SSortMergeInfo* pSortMergeInfo = param; + pSortMergeInfo->pInputBlock = blockDataDestroy(pSortMergeInfo->pInputBlock); + pSortMergeInfo->pIntermediateBlock = blockDataDestroy(pSortMergeInfo->pIntermediateBlock); + + tsortDestroySortHandle(pSortMergeInfo->pSortHandle); + taosArrayDestroy(pSortMergeInfo->pSortInfo); +} + +#define NON_SORT_NEXT_SRC(_info, _idx) ((++(_idx) >= (_info)->sourceNum) ? ((_info)->sourceWorkIdx) : (_idx)) + +int32_t openNonSortMergeOperator(SOperatorInfo* pOperator) { + SMultiwayMergeOperatorInfo* pInfo = pOperator->info; + SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo; + SNonSortMergeInfo* pNonSortMergeInfo = &pInfo->nsortMergeInfo; + + pNonSortMergeInfo->sourceWorkIdx = 0; + pNonSortMergeInfo->sourceNum = pOperator->numOfDownstream; + pNonSortMergeInfo->lastSourceIdx = -1; + pNonSortMergeInfo->pSourceStatus = taosMemoryCalloc(pOperator->numOfDownstream, sizeof(*pNonSortMergeInfo->pSourceStatus)); + if (NULL == pNonSortMergeInfo->pSourceStatus) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + return -1; + } + + for (int32_t i = 0; i < pOperator->numOfDownstream; ++i) { + pNonSortMergeInfo->pSourceStatus[i] = i; + } + + return TSDB_CODE_SUCCESS; +} + +SSDataBlock* doNonSortMerge(SOperatorInfo* pOperator) { + SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo; + SMultiwayMergeOperatorInfo* pInfo = pOperator->info; + SNonSortMergeInfo* pNonSortMerge = &pInfo->nsortMergeInfo; + SSDataBlock* pBlock = NULL; + + qDebug("start to merge no sorted rows, %s", GET_TASKID(pTaskInfo)); + + int32_t idx = NON_SORT_NEXT_SRC(pNonSortMerge, pNonSortMerge->lastSourceIdx); + while (idx < pNonSortMerge->sourceNum) { + pBlock = getNextBlockFromDownstream(pOperator, pNonSortMerge->pSourceStatus[idx]); + if (NULL == pBlock) { + TSWAP(pNonSortMerge->pSourceStatus[pNonSortMerge->sourceWorkIdx], pNonSortMerge->pSourceStatus[idx]); + pNonSortMerge->sourceWorkIdx++; + idx = NON_SORT_NEXT_SRC(pNonSortMerge, idx); + continue; + } + break; + } + + return pBlock; +} + +void destroyNonSortMergeOperatorInfo(void* param) { + SNonSortMergeInfo* pNonSortMerge = param; + taosMemoryFree(pNonSortMerge->pSourceStatus); +} + +int32_t getNonSortMergeExplainExecInfo(SOperatorInfo* pOptr, void** pOptrExplain, uint32_t* len) { + return TSDB_CODE_SUCCESS; +} + + +int32_t openColsMergeOperator(SOperatorInfo* pOperator) { + return TSDB_CODE_SUCCESS; +} + +SSDataBlock* doColsMerge(SOperatorInfo* pOperator) { + SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo; + SMultiwayMergeOperatorInfo* pInfo = pOperator->info; + SSDataBlock* pBlock = NULL; + + qDebug("start to merge no sorted rows, %s", GET_TASKID(pTaskInfo)); + + for (int32_t i = 0; i < 2; ++i) { + pBlock = getNextBlockFromDownstream(pOperator, i); + if (NULL == pBlock) { + TSWAP(pNonSortMerge->pSourceStatus[pNonSortMerge->sourceWorkIdx], pNonSortMerge->pSourceStatus[idx]); + pNonSortMerge->sourceWorkIdx++; + idx = NON_SORT_NEXT_SRC(pNonSortMerge, idx); + continue; + } + break; + } + + return pBlock; +} + +void destroyColsMergeOperatorInfo(void* param) { +} + +int32_t getColsMergeExplainExecInfo(SOperatorInfo* pOptr, void** pOptrExplain, uint32_t* len) { + return TSDB_CODE_SUCCESS; +} + + +SOperatorFpSet gMultiwayMergeFps[MERGE_TYPE_MAX_VALUE] = { + {0}, + {._openFn = openSortMergeOperator, .getNextFn = doSortMerge, .closeFn = destroySortMergeOperatorInfo, .getExplainFn = getSortMergeExplainExecInfo}, + {._openFn = openNonSortMergeOperator, .getNextFn = doNonSortMerge, .closeFn = destroyNonSortMergeOperatorInfo, .getExplainFn = getNonSortMergeExplainExecInfo}, + {._openFn = openColsMergeOperator, .getNextFn = doColsMerge, .closeFn = destroyColsMergeOperatorInfo, .getExplainFn = getColsMergeExplainExecInfo}, +}; + + +int32_t openMultiwayMergeOperator(SOperatorInfo* pOperator) { + int32_t code = 0; + SMultiwayMergeOperatorInfo* pInfo = pOperator->info; + SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo; + + if (OPTR_IS_OPENED(pOperator)) { + return TSDB_CODE_SUCCESS; + } + + int64_t startTs = taosGetTimestampUs(); + + if (NULL != gMultiwayMergeFps[pInfo->type]._openFn) { + code = (*gMultiwayMergeFps[pInfo->type]._openFn)(pOperator); + } + + pOperator->cost.openCost = (taosGetTimestampUs() - startTs) / 1000.0; + pOperator->status = OP_RES_TO_RETURN; + if (code != TSDB_CODE_SUCCESS) { T_LONG_JMP(pTaskInfo->env, terrno); } - pOperator->cost.openCost = (taosGetTimestampUs() - pInfo->startTs) / 1000.0; - pOperator->status = OP_RES_TO_RETURN; - OPTR_SET_OPENED(pOperator); - return TSDB_CODE_SUCCESS; + return code; } SSDataBlock* doMultiwayMerge(SOperatorInfo* pOperator) { @@ -100,7 +366,8 @@ SSDataBlock* doMultiwayMerge(SOperatorInfo* pOperator) { return NULL; } - SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo; + SSDataBlock* pBlock = NULL; + SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo; SMultiwayMergeOperatorInfo* pInfo = pOperator->info; int32_t code = pOperator->fpSet._openFn(pOperator); @@ -108,8 +375,9 @@ SSDataBlock* doMultiwayMerge(SOperatorInfo* pOperator) { T_LONG_JMP(pTaskInfo->env, code); } - qDebug("start to merge final sorted rows, %s", GET_TASKID(pTaskInfo)); - SSDataBlock* pBlock = getMultiwaySortedBlockData(pInfo->pSortHandle, pInfo->binfo.pRes, pInfo->matchInfo.pList, pOperator); + if (NULL != gMultiwayMergeFps[pInfo->type].getNextFn) { + pBlock = (*gMultiwayMergeFps[pInfo->type].getNextFn)(pOperator); + } if (pBlock != NULL) { pOperator->resultInfo.totalRows += pBlock->info.rows; } else { @@ -122,26 +390,24 @@ SSDataBlock* doMultiwayMerge(SOperatorInfo* pOperator) { void destroyMultiwayMergeOperatorInfo(void* param) { SMultiwayMergeOperatorInfo* pInfo = (SMultiwayMergeOperatorInfo*)param; pInfo->binfo.pRes = blockDataDestroy(pInfo->binfo.pRes); - pInfo->pInputBlock = blockDataDestroy(pInfo->pInputBlock); - pInfo->pIntermediateBlock = blockDataDestroy(pInfo->pIntermediateBlock); - - tsortDestroySortHandle(pInfo->pSortHandle); - taosArrayDestroy(pInfo->pSortInfo); taosArrayDestroy(pInfo->matchInfo.pList); + if (NULL != gMultiwayMergeFps[pInfo->type].closeFn) { + (*gMultiwayMergeFps[pInfo->type].closeFn)(&pInfo->sortMergeInfo); + } + taosMemoryFreeClear(param); } int32_t getMultiwayMergeExplainExecInfo(SOperatorInfo* pOptr, void** pOptrExplain, uint32_t* len) { - SSortExecInfo* pSortExecInfo = taosMemoryCalloc(1, sizeof(SSortExecInfo)); - + int32_t code = 0; SMultiwayMergeOperatorInfo* pInfo = (SMultiwayMergeOperatorInfo*)pOptr->info; - *pSortExecInfo = tsortGetSortExecInfo(pInfo->pSortHandle); - *pOptrExplain = pSortExecInfo; + if (NULL != gMultiwayMergeFps[pInfo->type].getExplainFn) { + code = (*gMultiwayMergeFps[pInfo->type].getExplainFn)(pOptr, pOptrExplain, len); + } - *len = sizeof(SSortExecInfo); - return TSDB_CODE_SUCCESS; + return code; } SOperatorInfo* createMultiwayMergeOperatorInfo(SOperatorInfo** downStreams, size_t numStreams, @@ -158,34 +424,59 @@ SOperatorInfo* createMultiwayMergeOperatorInfo(SOperatorInfo** downStreams, size goto _error; } - initLimitInfo(pMergePhyNode->node.pLimit, pMergePhyNode->node.pSlimit, &pInfo->limitInfo); - pInfo->binfo.pRes = createDataBlockFromDescNode(pDescNode); - - int32_t rowSize = pInfo->binfo.pRes->info.rowSize; - int32_t numOfOutputCols = 0; - code = extractColMatchInfo(pMergePhyNode->pTargets, pDescNode, &numOfOutputCols, COL_MATCH_FROM_SLOT_ID, - &pInfo->matchInfo); - if (code != TSDB_CODE_SUCCESS) { - goto _error; - } - - SPhysiNode* pChildNode = (SPhysiNode*)nodesListGetNode(pPhyNode->pChildren, 0); - SSDataBlock* pInputBlock = createDataBlockFromDescNode(pChildNode->pOutputDataBlockDesc); - - initResultSizeInfo(&pOperator->resultInfo, 1024); - blockDataEnsureCapacity(pInfo->binfo.pRes, pOperator->resultInfo.capacity); - pInfo->groupMerge = pMergePhyNode->groupSort; pInfo->ignoreGroupId = pMergePhyNode->ignoreGroupId; - pInfo->pSortInfo = createSortInfo(pMergePhyNode->pMergeKeys); - pInfo->pInputBlock = pInputBlock; - size_t numOfCols = taosArrayGetSize(pInfo->binfo.pRes->pDataBlock); - pInfo->bufPageSize = getProperSortPageSize(rowSize, numOfCols); - pInfo->sortBufSize = pInfo->bufPageSize * (numStreams + 1); // one additional is reserved for merged result. pInfo->binfo.inputTsOrder = pMergePhyNode->node.inputTsOrder; pInfo->binfo.outputTsOrder = pMergePhyNode->node.outputTsOrder; pInfo->inputWithGroupId = pMergePhyNode->inputWithGroupId; + pInfo->type = pMergePhyNode->type; + switch (pInfo->type) { + case MERGE_TYPE_SORT: { + SSortMergeInfo* pSortMergeInfo = &pInfo->sortMergeInfo; + initLimitInfo(pMergePhyNode->node.pLimit, pMergePhyNode->node.pSlimit, &pInfo->limitInfo); + pInfo->binfo.pRes = createDataBlockFromDescNode(pDescNode); + + SPhysiNode* pChildNode = (SPhysiNode*)nodesListGetNode(pPhyNode->pChildren, 0); + SSDataBlock* pInputBlock = createDataBlockFromDescNode(pChildNode->pOutputDataBlockDesc); + + initResultSizeInfo(&pOperator->resultInfo, 1024); + blockDataEnsureCapacity(pInfo->binfo.pRes, pOperator->resultInfo.capacity); + + size_t numOfCols = taosArrayGetSize(pInfo->binfo.pRes->pDataBlock); + int32_t rowSize = pInfo->binfo.pRes->info.rowSize; + int32_t numOfOutputCols = 0; + pSortMergeInfo->pSortInfo = createSortInfo(pMergePhyNode->pMergeKeys); + pSortMergeInfo->bufPageSize = getProperSortPageSize(rowSize, numOfCols); + pSortMergeInfo->sortBufSize = pSortMergeInfo->bufPageSize * (numStreams + 1); // one additional is reserved for merged result. + pSortMergeInfo->pInputBlock = pInputBlock; + code = extractColMatchInfo(pMergePhyNode->pTargets, pDescNode, &numOfOutputCols, COL_MATCH_FROM_SLOT_ID, + &pSortMergeInfo->matchInfo); + if (code != TSDB_CODE_SUCCESS) { + goto _error; + } + break; + } + case MERGE_TYPE_NON_SORT: { + SNonSortMergeInfo* pNonSortMerge = &pInfo->nsortMergeInfo; + break; + } + case MERGE_TYPE_COLUMNS: { + SColsMergeInfo* pColsMerge = &pInfo->colsMergeInfo; + pInfo->binfo.pRes = createDataBlockFromDescNode(pDescNode); + initResultSizeInfo(&pOperator->resultInfo, 1); + blockDataEnsureCapacity(pInfo->binfo.pRes, pOperator->resultInfo.capacity); + + pColsMerge->srcBlkIds[0] = getOperatorResultBlockId(downStreams[0], 0); + pColsMerge->srcBlkIds[1] = getOperatorResultBlockId(downStreams[1], 0); + break; + } + default: + qError("Invalid merge type: %d", pInfo->type); + code = TSDB_CODE_INVALID_PARA; + goto _error; + } + setOperatorInfo(pOperator, "MultiwayMergeOperator", QUERY_NODE_PHYSICAL_PLAN_MERGE, false, OP_NOT_OPENED, pInfo, pTaskInfo); pOperator->fpSet = createOperatorFpSet(openMultiwayMergeOperator, doMultiwayMerge, NULL, destroyMultiwayMergeOperatorInfo, optrDefaultBufFn, getMultiwayMergeExplainExecInfo, optrDefaultGetNextExtFn, NULL); diff --git a/source/libs/executor/src/sortoperator.c b/source/libs/executor/src/sortoperator.c index 0ccdb2dd2b..507dbe7ee2 100644 --- a/source/libs/executor/src/sortoperator.c +++ b/source/libs/executor/src/sortoperator.c @@ -675,199 +675,5 @@ _error: return NULL; } -int32_t openSortMergeOperator(SOperatorInfo* pOperator) { - SMultiwayMergeOperatorInfo* pInfo = pOperator->info; - SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo; - if (OPTR_IS_OPENED(pOperator)) { - return TSDB_CODE_SUCCESS; - } - - pInfo->startTs = taosGetTimestampUs(); - int32_t numOfBufPage = pInfo->sortBufSize / pInfo->bufPageSize; - - pInfo->pSortHandle = tsortCreateSortHandle(pInfo->pSortInfo, SORT_MULTISOURCE_MERGE, pInfo->bufPageSize, numOfBufPage, - pInfo->pInputBlock, pTaskInfo->id.str, 0, 0, 0); - - tsortSetFetchRawDataFp(pInfo->pSortHandle, loadNextDataBlock, NULL, NULL); - tsortSetCompareGroupId(pInfo->pSortHandle, pInfo->groupMerge); - - for (int32_t i = 0; i < pOperator->numOfDownstream; ++i) { - SOperatorInfo* pDownstream = pOperator->pDownstream[i]; - if (pDownstream->operatorType == QUERY_NODE_PHYSICAL_PLAN_EXCHANGE) { - pDownstream->fpSet._openFn(pDownstream); - } - - SSortSource* ps = taosMemoryCalloc(1, sizeof(SSortSource)); - ps->param = pDownstream; - ps->onlyRef = true; - - tsortAddSource(pInfo->pSortHandle, ps); - } - - int32_t code = tsortOpen(pInfo->pSortHandle); - if (code != TSDB_CODE_SUCCESS) { - T_LONG_JMP(pTaskInfo->env, terrno); - } - - pOperator->cost.openCost = (taosGetTimestampUs() - pInfo->startTs) / 1000.0; - pOperator->status = OP_RES_TO_RETURN; - - OPTR_SET_OPENED(pOperator); - return TSDB_CODE_SUCCESS; -} - -static void doGetSortedBlockData(SMultiwayMergeOperatorInfo* pInfo, SSortHandle* pHandle, int32_t capacity, - SSDataBlock* p, bool* newgroup) { - *newgroup = false; - - while (1) { - STupleHandle* pTupleHandle = NULL; - if (pInfo->groupMerge || pInfo->inputWithGroupId) { - if (pInfo->prefetchedTuple == NULL) { - pTupleHandle = tsortNextTuple(pHandle); - } else { - pTupleHandle = pInfo->prefetchedTuple; - pInfo->prefetchedTuple = NULL; - uint64_t gid = tsortGetGroupId(pTupleHandle); - if (gid != pInfo->groupId) { - *newgroup = true; - pInfo->groupId = gid; - } - } - } else { - pTupleHandle = tsortNextTuple(pHandle); - pInfo->groupId = 0; - } - - if (pTupleHandle == NULL) { - break; - } - - if (pInfo->groupMerge || pInfo->inputWithGroupId) { - uint64_t tupleGroupId = tsortGetGroupId(pTupleHandle); - if (pInfo->groupId == 0 || pInfo->groupId == tupleGroupId) { - appendOneRowToDataBlock(p, pTupleHandle); - p->info.id.groupId = tupleGroupId; - pInfo->groupId = tupleGroupId; - } else { - if (p->info.rows == 0) { - appendOneRowToDataBlock(p, pTupleHandle); - p->info.id.groupId = pInfo->groupId = tupleGroupId; - } else { - pInfo->prefetchedTuple = pTupleHandle; - break; - } - } - } else { - appendOneRowToDataBlock(p, pTupleHandle); - } - - if (p->info.rows >= capacity) { - break; - } - } -} - -SSDataBlock* getSortMergeSortedBlockData(SSortHandle* pHandle, SSDataBlock* pDataBlock, SArray* pColMatchInfo, - SOperatorInfo* pOperator) { - SMultiwayMergeOperatorInfo* pInfo = pOperator->info; - - int32_t capacity = pOperator->resultInfo.capacity; - - SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo; - blockDataCleanup(pDataBlock); - - if (pInfo->pIntermediateBlock == NULL) { - pInfo->pIntermediateBlock = tsortGetSortedDataBlock(pHandle); - if (pInfo->pIntermediateBlock == NULL) { - terrno = TSDB_CODE_OUT_OF_MEMORY; - return NULL; - } - blockDataEnsureCapacity(pInfo->pIntermediateBlock, capacity); - } else { - blockDataCleanup(pInfo->pIntermediateBlock); - } - - SSDataBlock* p = pInfo->pIntermediateBlock; - bool newgroup = false; - - while (1) { - doGetSortedBlockData(pInfo, pHandle, capacity, p, &newgroup); - if (p->info.rows == 0) { - break; - } - - if (newgroup) { - resetLimitInfoForNextGroup(&pInfo->limitInfo); - } - - applyLimitOffset(&pInfo->limitInfo, p, pTaskInfo); - - if (p->info.rows > 0) { - break; - } - } - - if (p->info.rows > 0) { - int32_t numOfCols = taosArrayGetSize(pColMatchInfo); - for (int32_t i = 0; i < numOfCols; ++i) { - SColMatchItem* pmInfo = taosArrayGet(pColMatchInfo, i); - - SColumnInfoData* pSrc = taosArrayGet(p->pDataBlock, pmInfo->srcSlotId); - SColumnInfoData* pDst = taosArrayGet(pDataBlock->pDataBlock, pmInfo->dstSlotId); - colDataAssign(pDst, pSrc, p->info.rows, &pDataBlock->info); - } - - pDataBlock->info.rows = p->info.rows; - pDataBlock->info.scanFlag = p->info.scanFlag; - if (pInfo->ignoreGroupId) { - pDataBlock->info.id.groupId = 0; - } else { - pDataBlock->info.id.groupId = pInfo->groupId; - } - pDataBlock->info.dataLoad = 1; - } - - qDebug("%s get sorted block, groupId:0x%" PRIx64 " rows:%" PRId64 , GET_TASKID(pTaskInfo), pDataBlock->info.id.groupId, - pDataBlock->info.rows); - - return (pDataBlock->info.rows > 0) ? pDataBlock : NULL; -} - -SSDataBlock* doSortMerge(SOperatorInfo* pOperator) { - if (pOperator->status == OP_EXEC_DONE) { - return NULL; - } - - SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo; - SMultiwayMergeOperatorInfo* pInfo = pOperator->info; - - int32_t code = pOperator->fpSet._openFn(pOperator); - if (code != TSDB_CODE_SUCCESS) { - T_LONG_JMP(pTaskInfo->env, code); - } - - qDebug("start to merge final sorted rows, %s", GET_TASKID(pTaskInfo)); - SSDataBlock* pBlock = getMultiwaySortedBlockData(pInfo->pSortHandle, pInfo->binfo.pRes, pInfo->matchInfo.pList, pOperator); - if (pBlock != NULL) { - pOperator->resultInfo.totalRows += pBlock->info.rows; - } else { - setOperatorCompleted(pOperator); - } - - return pBlock; -} - -int32_t getSortMergeExplainExecInfo(SOperatorInfo* pOptr, void** pOptrExplain, uint32_t* len) { - SSortExecInfo* pSortExecInfo = taosMemoryCalloc(1, sizeof(SSortExecInfo)); - - SMultiwayMergeOperatorInfo* pInfo = (SMultiwayMergeOperatorInfo*)pOptr->info; - - *pSortExecInfo = tsortGetSortExecInfo(pInfo->pSortHandle); - *pOptrExplain = pSortExecInfo; - - *len = sizeof(SSortExecInfo); - return TSDB_CODE_SUCCESS; -} diff --git a/source/libs/nodes/src/nodesCloneFuncs.c b/source/libs/nodes/src/nodesCloneFuncs.c index bd73b02c80..91f40da00e 100644 --- a/source/libs/nodes/src/nodesCloneFuncs.c +++ b/source/libs/nodes/src/nodesCloneFuncs.c @@ -488,6 +488,7 @@ static int32_t logicMergeCopy(const SMergeLogicNode* pSrc, SMergeLogicNode* pDst CLONE_NODE_LIST_FIELD(pInputs); COPY_SCALAR_FIELD(numOfChannels); COPY_SCALAR_FIELD(srcGroupId); + COPY_SCALAR_FIELD(needSort); COPY_SCALAR_FIELD(groupSort); COPY_SCALAR_FIELD(ignoreGroupId); COPY_SCALAR_FIELD(inputWithGroupId); diff --git a/source/libs/nodes/src/nodesCodeFuncs.c b/source/libs/nodes/src/nodesCodeFuncs.c index c2acf0dbdf..cf7ade1b64 100644 --- a/source/libs/nodes/src/nodesCodeFuncs.c +++ b/source/libs/nodes/src/nodesCodeFuncs.c @@ -2272,6 +2272,7 @@ static const char* jkMergePhysiPlanSrcGroupId = "SrcGroupId"; static const char* jkMergePhysiPlanGroupSort = "GroupSort"; static const char* jkMergePhysiPlanIgnoreGroupID = "IgnoreGroupID"; static const char* jkMergePhysiPlanInputWithGroupId = "InputWithGroupId"; +static const char* jkMergePhysiPlanType = "Type"; static int32_t physiMergeNodeToJson(const void* pObj, SJson* pJson) { const SMergePhysiNode* pNode = (const SMergePhysiNode*)pObj; @@ -2298,6 +2299,9 @@ static int32_t physiMergeNodeToJson(const void* pObj, SJson* pJson) { if (TSDB_CODE_SUCCESS == code) { code = tjsonAddBoolToObject(pJson, jkMergePhysiPlanInputWithGroupId, pNode->inputWithGroupId); } + if (TSDB_CODE_SUCCESS == code) { + code = tjsonAddIntegerToObject(pJson, jkMergePhysiPlanType, pNode->type); + } return code; } @@ -2324,6 +2328,9 @@ static int32_t jsonToPhysiMergeNode(const SJson* pJson, void* pObj) { if (TSDB_CODE_SUCCESS == code) { code = tjsonGetBoolValue(pJson, jkMergePhysiPlanIgnoreGroupID, &pNode->ignoreGroupId); } + if (TSDB_CODE_SUCCESS == code) { + code = tjsonGetIntValue(pJson, jkMergePhysiPlanType, (int32_t*)&pNode->type); + } return code; } diff --git a/source/libs/nodes/src/nodesMsgFuncs.c b/source/libs/nodes/src/nodesMsgFuncs.c index 99100b2a1d..28a7edd541 100644 --- a/source/libs/nodes/src/nodesMsgFuncs.c +++ b/source/libs/nodes/src/nodesMsgFuncs.c @@ -2683,6 +2683,7 @@ enum { PHY_MERGE_CODE_GROUP_SORT, PHY_MERGE_CODE_IGNORE_GROUP_ID, PHY_MERGE_CODE_INPUT_WITH_GROUP_ID, + PHY_MERGE_CODE_TYPE, }; static int32_t physiMergeNodeToMsg(const void* pObj, STlvEncoder* pEncoder) { @@ -2710,6 +2711,9 @@ static int32_t physiMergeNodeToMsg(const void* pObj, STlvEncoder* pEncoder) { if (TSDB_CODE_SUCCESS == code) { code = tlvEncodeBool(pEncoder, PHY_MERGE_CODE_INPUT_WITH_GROUP_ID, pNode->inputWithGroupId); } + if (TSDB_CODE_SUCCESS == code) { + code = tlvEncodeI32(pEncoder, PHY_MERGE_CODE_TYPE, pNode->type); + } return code; } @@ -2745,6 +2749,9 @@ static int32_t msgToPhysiMergeNode(STlvDecoder* pDecoder, void* pObj) { case PHY_MERGE_CODE_INPUT_WITH_GROUP_ID: code = tlvDecodeBool(pTlv, &pNode->inputWithGroupId); break; + case PHY_MERGE_CODE_TYPE: + code = tlvDecodeI32(pTlv, (int32_t*)&pNode->type); + break; default: break; } diff --git a/source/libs/planner/inc/planInt.h b/source/libs/planner/inc/planInt.h index 83a4e9ced8..e2a4ded5a9 100644 --- a/source/libs/planner/inc/planInt.h +++ b/source/libs/planner/inc/planInt.h @@ -43,6 +43,7 @@ int32_t optimizeLogicPlan(SPlanContext* pCxt, SLogicSubplan* pLogicSubplan); int32_t splitLogicPlan(SPlanContext* pCxt, SLogicSubplan* pLogicSubplan); int32_t scaleOutLogicPlan(SPlanContext* pCxt, SLogicSubplan* pLogicSubplan, SQueryLogicPlan** pLogicPlan); int32_t createPhysiPlan(SPlanContext* pCxt, SQueryLogicPlan* pLogicPlan, SQueryPlan** pPlan, SArray* pExecNodeList); +int32_t validateQueryPlan(SPlanContext* pCxt, SQueryPlan* pPlan); bool getBatchScanOptionFromHint(SNodeList* pList); bool getSortForGroupOptHint(SNodeList* pList); diff --git a/source/libs/planner/src/planPhysiCreater.c b/source/libs/planner/src/planPhysiCreater.c index d6799a25a7..0e80f5bcec 100644 --- a/source/libs/planner/src/planPhysiCreater.c +++ b/source/libs/planner/src/planPhysiCreater.c @@ -1950,41 +1950,57 @@ static int32_t createExchangePhysiNodeByMerge(SMergePhysiNode* pMerge) { return nodesListMakeStrictAppend(&pMerge->node.pChildren, (SNode*)pExchange); } -static int32_t createMergePhysiNode(SPhysiPlanContext* pCxt, SMergeLogicNode* pMergeLogicNode, SPhysiNode** pPhyNode) { +static int32_t createMergePhysiNode(SPhysiPlanContext* pCxt, SNodeList* pChildren, SMergeLogicNode* pMergeLogicNode, SPhysiNode** pPhyNode) { + int32_t code = TSDB_CODE_SUCCESS; SMergePhysiNode* pMerge = (SMergePhysiNode*)makePhysiNode(pCxt, (SLogicNode*)pMergeLogicNode, QUERY_NODE_PHYSICAL_PLAN_MERGE); if (NULL == pMerge) { return TSDB_CODE_OUT_OF_MEMORY; } + if (pMergeLogicNode->colsMerge) { + pMerge->type = MERGE_TYPE_COLUMNS; + } else if (pMergeLogicNode->needSort) { + pMerge->type = MERGE_TYPE_SORT; + } else { + pMerge->type = MERGE_TYPE_NON_SORT; + } + pMerge->numOfChannels = pMergeLogicNode->numOfChannels; pMerge->srcGroupId = pMergeLogicNode->srcGroupId; pMerge->groupSort = pMergeLogicNode->groupSort; pMerge->ignoreGroupId = pMergeLogicNode->ignoreGroupId; pMerge->inputWithGroupId = pMergeLogicNode->inputWithGroupId; - int32_t code = addDataBlockSlots(pCxt, pMergeLogicNode->pInputs, pMerge->node.pOutputDataBlockDesc); + if (!pMergeLogicNode->colsMerge) { + code = addDataBlockSlots(pCxt, pMergeLogicNode->pInputs, pMerge->node.pOutputDataBlockDesc); - if (TSDB_CODE_SUCCESS == code) { - for (int32_t i = 0; i < pMerge->numOfChannels; ++i) { - code = createExchangePhysiNodeByMerge(pMerge); - if (TSDB_CODE_SUCCESS != code) { - break; + if (TSDB_CODE_SUCCESS == code) { + for (int32_t i = 0; i < pMerge->numOfChannels; ++i) { + code = createExchangePhysiNodeByMerge(pMerge); + if (TSDB_CODE_SUCCESS != code) { + break; + } } } - } - if (TSDB_CODE_SUCCESS == code && NULL != pMergeLogicNode->pMergeKeys) { - code = setListSlotId(pCxt, pMerge->node.pOutputDataBlockDesc->dataBlockId, -1, pMergeLogicNode->pMergeKeys, - &pMerge->pMergeKeys); - } + if (TSDB_CODE_SUCCESS == code && NULL != pMergeLogicNode->pMergeKeys) { + code = setListSlotId(pCxt, pMerge->node.pOutputDataBlockDesc->dataBlockId, -1, pMergeLogicNode->pMergeKeys, + &pMerge->pMergeKeys); + } - if (TSDB_CODE_SUCCESS == code) { - code = setListSlotId(pCxt, pMerge->node.pOutputDataBlockDesc->dataBlockId, -1, pMergeLogicNode->node.pTargets, - &pMerge->pTargets); - } - if (TSDB_CODE_SUCCESS == code) { - code = addDataBlockSlots(pCxt, pMerge->pTargets, pMerge->node.pOutputDataBlockDesc); + if (TSDB_CODE_SUCCESS == code) { + code = setListSlotId(pCxt, pMerge->node.pOutputDataBlockDesc->dataBlockId, -1, pMergeLogicNode->node.pTargets, + &pMerge->pTargets); + } + if (TSDB_CODE_SUCCESS == code) { + code = addDataBlockSlots(pCxt, pMerge->pTargets, pMerge->node.pOutputDataBlockDesc); + } + } else { + SDataBlockDescNode* pLeftDesc = ((SPhysiNode*)nodesListGetNode(pChildren, 0))->pOutputDataBlockDesc; + SDataBlockDescNode* pRightDesc = ((SPhysiNode*)nodesListGetNode(pChildren, 1))->pOutputDataBlockDesc; + + code = setListSlotId(pCxt, pLeftDesc->dataBlockId, pRightDesc->dataBlockId, pMergeLogicNode->node.pTargets, &pMerge->pTargets); } if (TSDB_CODE_SUCCESS == code) { @@ -2022,7 +2038,7 @@ static int32_t doCreatePhysiNode(SPhysiPlanContext* pCxt, SLogicNode* pLogicNode case QUERY_NODE_LOGIC_PLAN_INTERP_FUNC: return createInterpFuncPhysiNode(pCxt, pChildren, (SInterpFuncLogicNode*)pLogicNode, pPhyNode); case QUERY_NODE_LOGIC_PLAN_MERGE: - return createMergePhysiNode(pCxt, (SMergeLogicNode*)pLogicNode, pPhyNode); + return createMergePhysiNode(pCxt, pChildren, (SMergeLogicNode*)pLogicNode, pPhyNode); case QUERY_NODE_LOGIC_PLAN_GROUP_CACHE: return createGroupCachePhysiNode(pCxt, pChildren, (SGroupCacheLogicNode*)pLogicNode, pPhyNode); case QUERY_NODE_LOGIC_PLAN_DYN_QUERY_CTRL: diff --git a/source/libs/planner/src/planSpliter.c b/source/libs/planner/src/planSpliter.c index d7b3f51961..bf5fe901a6 100644 --- a/source/libs/planner/src/planSpliter.c +++ b/source/libs/planner/src/planSpliter.c @@ -248,8 +248,6 @@ static bool stbSplHasMultiTbScan(bool streamQuery, SLogicNode* pNode) { } if (QUERY_NODE_LOGIC_PLAN_SCAN == nodeType(pChild) && stbSplIsMultiTbScan(streamQuery, (SScanLogicNode*)pChild)) { return true; - } else if (QUERY_NODE_LOGIC_PLAN_SORT == nodeType(pChild)) { - return stbSplHasMultiTbScan(streamQuery, (SLogicNode*)pChild); } return false; } @@ -540,11 +538,12 @@ static int32_t stbSplRewriteFromMergeNode(SMergeLogicNode* pMerge, SLogicNode* p } static int32_t stbSplCreateMergeNode(SSplitContext* pCxt, SLogicSubplan* pSubplan, SLogicNode* pSplitNode, - SNodeList* pMergeKeys, SLogicNode* pPartChild, bool groupSort) { + SNodeList* pMergeKeys, SLogicNode* pPartChild, bool groupSort, bool needSort) { SMergeLogicNode* pMerge = (SMergeLogicNode*)nodesMakeNode(QUERY_NODE_LOGIC_PLAN_MERGE); if (NULL == pMerge) { return TSDB_CODE_OUT_OF_MEMORY; } + pMerge->needSort = needSort; pMerge->numOfChannels = stbSplGetNumOfVgroups(pPartChild); pMerge->srcGroupId = pCxt->groupId; pMerge->node.precision = pPartChild->precision; @@ -621,7 +620,7 @@ static int32_t stbSplSplitIntervalForBatch(SSplitContext* pCxt, SStableSplitInfo code = stbSplCreateMergeKeysByPrimaryKey(((SWindowLogicNode*)pInfo->pSplitNode)->pTspk, ((SWindowLogicNode*)pInfo->pSplitNode)->node.outputTsOrder, &pMergeKeys); if (TSDB_CODE_SUCCESS == code) { - code = stbSplCreateMergeNode(pCxt, NULL, pInfo->pSplitNode, pMergeKeys, pPartWindow, true); + code = stbSplCreateMergeNode(pCxt, NULL, pInfo->pSplitNode, pMergeKeys, pPartWindow, true, true); } if (TSDB_CODE_SUCCESS != code) { nodesDestroyList(pMergeKeys); @@ -712,7 +711,7 @@ static int32_t stbSplSplitSessionOrStateForBatch(SSplitContext* pCxt, SStableSpl ((SWindowLogicNode*)pWindow)->node.inputTsOrder, &pMergeKeys); if (TSDB_CODE_SUCCESS == code) { - code = stbSplCreateMergeNode(pCxt, pInfo->pSubplan, pChild, pMergeKeys, (SLogicNode*)pChild, true); + code = stbSplCreateMergeNode(pCxt, pInfo->pSubplan, pChild, pMergeKeys, (SLogicNode*)pChild, true, true); } if (TSDB_CODE_SUCCESS == code) { @@ -982,7 +981,7 @@ static int32_t stbSplAggNodeCreateMerge(SSplitContext* pCtx, SStableSplitInfo* p } } } - code = stbSplCreateMergeNode(pCtx, NULL, pInfo->pSplitNode, pMergeKeys, pChildAgg, groupSort); + code = stbSplCreateMergeNode(pCtx, NULL, pInfo->pSplitNode, pMergeKeys, pChildAgg, groupSort, true); if (TSDB_CODE_SUCCESS == code && sortForGroup) { SMergeLogicNode* pMerge = (SMergeLogicNode*)nodesListGetNode(pInfo->pSplitNode->pChildren, LIST_LENGTH(pInfo->pSplitNode->pChildren) - 1); @@ -1145,7 +1144,7 @@ static int32_t stbSplSplitSortNode(SSplitContext* pCxt, SStableSplitInfo* pInfo) bool groupSort = ((SSortLogicNode*)pInfo->pSplitNode)->groupSort; int32_t code = stbSplCreatePartSortNode((SSortLogicNode*)pInfo->pSplitNode, &pPartSort, &pMergeKeys); if (TSDB_CODE_SUCCESS == code) { - code = stbSplCreateMergeNode(pCxt, pInfo->pSubplan, pInfo->pSplitNode, pMergeKeys, pPartSort, groupSort); + code = stbSplCreateMergeNode(pCxt, pInfo->pSubplan, pInfo->pSplitNode, pMergeKeys, pPartSort, groupSort, true); } if (TSDB_CODE_SUCCESS == code) { nodesDestroyNode((SNode*)pInfo->pSplitNode); @@ -1195,7 +1194,7 @@ static int32_t stbSplSplitScanNodeWithPartTags(SSplitContext* pCxt, SStableSplit SLogicNode* pSplitNode = NULL; int32_t code = stbSplGetSplitNodeForScan(pInfo, &pSplitNode); if (TSDB_CODE_SUCCESS == code) { - code = stbSplCreateMergeNode(pCxt, pInfo->pSubplan, pSplitNode, NULL, pSplitNode, true); + code = stbSplCreateMergeNode(pCxt, pInfo->pSubplan, pSplitNode, NULL, pSplitNode, true, pSplitNode->requireDataOrder >= DATA_ORDER_LEVEL_IN_GROUP); } if (TSDB_CODE_SUCCESS == code) { code = nodesListMakeStrictAppend(&pInfo->pSubplan->pChildren, @@ -1269,7 +1268,7 @@ static int32_t stbSplSplitMergeScanNode(SSplitContext* pCxt, SLogicSubplan* pSub ((SLimitNode*)pMergeScan->pLimit)->limit += ((SLimitNode*)pMergeScan->pLimit)->offset; ((SLimitNode*)pMergeScan->pLimit)->offset = 0; } - code = stbSplCreateMergeNode(pCxt, pSubplan, (SLogicNode*)pScan, pMergeKeys, pMergeScan, groupSort); + code = stbSplCreateMergeNode(pCxt, pSubplan, (SLogicNode*)pScan, pMergeKeys, pMergeScan, groupSort, true); } if (TSDB_CODE_SUCCESS == code) { nodesDestroyNode((SNode*)pScan); @@ -1340,12 +1339,14 @@ static int32_t stbSplCreateMergeKeysForPartitionNode(SLogicNode* pPart, SNodeLis static int32_t stbSplSplitPartitionNode(SSplitContext* pCxt, SStableSplitInfo* pInfo) { int32_t code = TSDB_CODE_SUCCESS; + bool needSort = false; SNodeList* pMergeKeys = NULL; if (pInfo->pSplitNode->requireDataOrder >= DATA_ORDER_LEVEL_IN_GROUP) { + needSort = true; code = stbSplCreateMergeKeysForPartitionNode(pInfo->pSplitNode, &pMergeKeys); } if (TSDB_CODE_SUCCESS == code) { - code = stbSplCreateMergeNode(pCxt, pInfo->pSubplan, pInfo->pSplitNode, pMergeKeys, pInfo->pSplitNode, true); + code = stbSplCreateMergeNode(pCxt, pInfo->pSubplan, pInfo->pSplitNode, pMergeKeys, pInfo->pSplitNode, true, needSort); } if (TSDB_CODE_SUCCESS == code) { code = nodesListMakeStrictAppend(&pInfo->pSubplan->pChildren, diff --git a/source/libs/planner/src/planValidator.c b/source/libs/planner/src/planValidator.c new file mode 100755 index 0000000000..7461ee4f9a --- /dev/null +++ b/source/libs/planner/src/planValidator.c @@ -0,0 +1,161 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include "planInt.h" + +#include "catalog.h" +#include "functionMgt.h" +#include "systable.h" +#include "tglobal.h" + +typedef struct SValidatePlanContext { + SPlanContext* pPlanCxt; + int32_t errCode; +} SValidatePlanContext; + +int32_t doValidatePhysiNode(SValidatePlanContext* pCxt, SNode* pNode); + +int32_t validateMergePhysiNode(SValidatePlanContext* pCxt, SMergePhysiNode* pMerge) { + if ((NULL != pMerge->node.pLimit || NULL != pMerge->node.pSlimit) && pMerge->type == MERGE_TYPE_NON_SORT) { + planError("no limit&slimit supported for non sort merge"); + return TSDB_CODE_PLAN_INTERNAL_ERROR; + } + + return TSDB_CODE_SUCCESS; +} + +int32_t validateSubplanNode(SValidatePlanContext* pCxt, SSubplan* pSubPlan) { + if (SUBPLAN_TYPE_MODIFY == pSubPlan->subplanType) { + return TSDB_CODE_SUCCESS; + } + return doValidatePhysiNode(pCxt, (SNode*)pSubPlan->pNode); +} + +int32_t validateQueryPlanNode(SValidatePlanContext* pCxt, SQueryPlan* pPlan) { + int32_t code = TSDB_CODE_SUCCESS; + SNode* pNode = NULL; + FOREACH(pNode, pPlan->pSubplans) { + if (QUERY_NODE_NODE_LIST != nodeType(pNode)) { + code = TSDB_CODE_PLAN_INTERNAL_ERROR; + break; + } + + SNode* pSubNode = NULL; + SNodeListNode* pSubplans = (SNodeListNode*)pNode; + FOREACH(pSubNode, pSubplans->pNodeList) { + if (QUERY_NODE_PHYSICAL_SUBPLAN != nodeType(pNode)) { + code = TSDB_CODE_PLAN_INTERNAL_ERROR; + break; + } + + code = doValidatePhysiNode(pCxt, pSubNode); + if (code) { + break; + } + } + } + + return code; +} + +int32_t doValidatePhysiNode(SValidatePlanContext* pCxt, SNode* pNode) { + switch (nodeType(pNode)) { + case QUERY_NODE_PHYSICAL_PLAN_TAG_SCAN: + case QUERY_NODE_PHYSICAL_PLAN_TABLE_SCAN: + case QUERY_NODE_PHYSICAL_PLAN_TABLE_SEQ_SCAN: + case QUERY_NODE_PHYSICAL_PLAN_TABLE_MERGE_SCAN: + case QUERY_NODE_PHYSICAL_PLAN_STREAM_SCAN: + case QUERY_NODE_PHYSICAL_PLAN_SYSTABLE_SCAN: + case QUERY_NODE_PHYSICAL_PLAN_BLOCK_DIST_SCAN: + case QUERY_NODE_PHYSICAL_PLAN_LAST_ROW_SCAN: + case QUERY_NODE_PHYSICAL_PLAN_PROJECT: + case QUERY_NODE_PHYSICAL_PLAN_MERGE_JOIN: + case QUERY_NODE_PHYSICAL_PLAN_HASH_AGG: + case QUERY_NODE_PHYSICAL_PLAN_EXCHANGE: + break; + case QUERY_NODE_PHYSICAL_PLAN_MERGE: + return validateMergePhysiNode(pCxt, (SMergePhysiNode*)pNode); + case QUERY_NODE_PHYSICAL_PLAN_SORT: + case QUERY_NODE_PHYSICAL_PLAN_GROUP_SORT: + case QUERY_NODE_PHYSICAL_PLAN_HASH_INTERVAL: + case QUERY_NODE_PHYSICAL_PLAN_MERGE_INTERVAL: + case QUERY_NODE_PHYSICAL_PLAN_MERGE_ALIGNED_INTERVAL: + case QUERY_NODE_PHYSICAL_PLAN_STREAM_INTERVAL: + case QUERY_NODE_PHYSICAL_PLAN_STREAM_FINAL_INTERVAL: + case QUERY_NODE_PHYSICAL_PLAN_STREAM_SEMI_INTERVAL: + case QUERY_NODE_PHYSICAL_PLAN_FILL: + case QUERY_NODE_PHYSICAL_PLAN_STREAM_FILL: + case QUERY_NODE_PHYSICAL_PLAN_MERGE_SESSION: + case QUERY_NODE_PHYSICAL_PLAN_STREAM_SESSION: + case QUERY_NODE_PHYSICAL_PLAN_STREAM_SEMI_SESSION: + case QUERY_NODE_PHYSICAL_PLAN_STREAM_FINAL_SESSION: + case QUERY_NODE_PHYSICAL_PLAN_MERGE_STATE: + case QUERY_NODE_PHYSICAL_PLAN_STREAM_STATE: + case QUERY_NODE_PHYSICAL_PLAN_PARTITION: + case QUERY_NODE_PHYSICAL_PLAN_STREAM_PARTITION: + case QUERY_NODE_PHYSICAL_PLAN_INDEF_ROWS_FUNC: + case QUERY_NODE_PHYSICAL_PLAN_INTERP_FUNC: + case QUERY_NODE_PHYSICAL_PLAN_DISPATCH: + case QUERY_NODE_PHYSICAL_PLAN_INSERT: + case QUERY_NODE_PHYSICAL_PLAN_QUERY_INSERT: + case QUERY_NODE_PHYSICAL_PLAN_DELETE: + case QUERY_NODE_PHYSICAL_PLAN_TABLE_COUNT_SCAN: + case QUERY_NODE_PHYSICAL_PLAN_MERGE_EVENT: + case QUERY_NODE_PHYSICAL_PLAN_STREAM_EVENT: + case QUERY_NODE_PHYSICAL_PLAN_HASH_JOIN: + case QUERY_NODE_PHYSICAL_PLAN_GROUP_CACHE: + case QUERY_NODE_PHYSICAL_PLAN_DYN_QUERY_CTRL: + break; + case QUERY_NODE_PHYSICAL_SUBPLAN: + return validateSubplanNode(pCxt, (SSubplan*)pNode); + case QUERY_NODE_PHYSICAL_PLAN: + return validateQueryPlanNode(pCxt, (SQueryPlan *)pNode); + default: + break; + } + + return TSDB_CODE_SUCCESS; +} + +static void destoryValidatePlanContext(SValidatePlanContext* pCxt) { + +} + +int32_t validateQueryPlan(SPlanContext* pCxt, SQueryPlan* pPlan) { + SValidatePlanContext cxt = {.pPlanCxt = pCxt, + .errCode = TSDB_CODE_SUCCESS + }; + + int32_t code = TSDB_CODE_SUCCESS; + SNode* pNode = NULL; + FOREACH(pNode, pPlan->pSubplans) { + if (QUERY_NODE_NODE_LIST != nodeType(pNode)) { + code = TSDB_CODE_PLAN_INTERNAL_ERROR; + break; + } + + SNode* pSubNode = NULL; + SNodeListNode* pSubplans = (SNodeListNode*)pNode; + FOREACH(pSubNode, pSubplans->pNodeList) { + code = doValidatePhysiNode(&cxt, pSubNode); + if (code) { + break; + } + } + } + + destoryValidatePlanContext(&cxt); + return code; +} diff --git a/source/libs/planner/src/planner.c b/source/libs/planner/src/planner.c index 6dd9c544cc..a4a33b30fd 100644 --- a/source/libs/planner/src/planner.c +++ b/source/libs/planner/src/planner.c @@ -57,6 +57,9 @@ int32_t qCreateQueryPlan(SPlanContext* pCxt, SQueryPlan** pPlan, SArray* pExecNo if (TSDB_CODE_SUCCESS == code) { code = createPhysiPlan(pCxt, pLogicPlan, pPlan, pExecNodeList); } + if (TSDB_CODE_SUCCESS == code) { + code = validateQueryPlan(pCxt, *pPlan); + } if (TSDB_CODE_SUCCESS == code) { dumpQueryPlan(*pPlan); } From 6cc40fa66ce7e85e9e5fabbc05f1feeb76c8973e Mon Sep 17 00:00:00 2001 From: wangjiaming0909 <604227650@qq.com> Date: Tue, 7 Nov 2023 15:53:30 +0800 Subject: [PATCH 12/79] fix: last(ts2), ts2, ts caused data err --- source/dnode/vnode/src/tsdb/tsdbCacheRead.c | 2 +- tests/system-test/2-query/last_cache_scan.py | 23 +++++++++++++++++--- 2 files changed, 21 insertions(+), 4 deletions(-) diff --git a/source/dnode/vnode/src/tsdb/tsdbCacheRead.c b/source/dnode/vnode/src/tsdb/tsdbCacheRead.c index e4a91b73d5..d3e76287c7 100644 --- a/source/dnode/vnode/src/tsdb/tsdbCacheRead.c +++ b/source/dnode/vnode/src/tsdb/tsdbCacheRead.c @@ -60,7 +60,7 @@ static int32_t saveOneRow(SArray* pRow, SSDataBlock* pBlock, SCacheRowsReader* p } for (int32_t idx = 0; idx < taosArrayGetSize(pBlock->pDataBlock); ++idx) { SColumnInfoData* pCol = taosArrayGet(pBlock->pDataBlock, idx); - if (pCol->info.type == TSDB_DATA_TYPE_TIMESTAMP) { + if (pCol->info.colId == PRIMARYKEY_TIMESTAMP_COL_ID) { colDataSetVal(pCol, numOfRows, (const char*)&ts, false); continue; } diff --git a/tests/system-test/2-query/last_cache_scan.py b/tests/system-test/2-query/last_cache_scan.py index fb5c8bcee2..ee4cc388ac 100644 --- a/tests/system-test/2-query/last_cache_scan.py +++ b/tests/system-test/2-query/last_cache_scan.py @@ -65,9 +65,9 @@ class TDTestCase: sql += " %s%d values "%(ctbPrefix,i) for j in range(rowsPerTbl): if (i < ctbNum/2): - sql += "(%d, %d, %d, %d,%d,%d,%d,true,'binary%d', 'nchar%d') "%(startTs + j*tsStep, j%1000, j%500, j%1000, j%5000, j%5400, j%128, j%10000, j%1000) + sql += "(%d, %d, %d, %d,%d,%d,%d,true,'binary%d', 'nchar%d', %d) "%(startTs + j*tsStep, j%1000, j%500, j%1000, j%5000, j%5400, j%128, j%10000, j%1000, startTs+j*tsStep+1000) else: - sql += "(%d, %d, NULL, %d,NULL,%d,%d,true,'binary%d', 'nchar%d') "%(startTs + j*tsStep, j%1000, j%500, j%1000, j%128, j%10000, j%1000) + sql += "(%d, %d, NULL, %d,NULL,%d,%d,true,'binary%d', 'nchar%d', %d) "%(startTs + j*tsStep, j%1000, j%500, j%1000, j%128, j%10000, j%1000, startTs + j*tsStep + 1000) rowsBatched += 1 if ((rowsBatched == batchNum) or (j == rowsPerTbl - 1)): tsql.execute(sql) @@ -97,7 +97,8 @@ class TDTestCase: {'type': 'tinyint', 'count':1}, {'type': 'bool', 'count':1}, {'type': 'binary', 'len':10, 'count':1}, - {'type': 'nchar', 'len':10, 'count':1}], + {'type': 'nchar', 'len':10, 'count':1}, + {'type': 'timestamp', 'count':1}], 'tagSchema': [{'type': 'INT', 'count':1},{'type': 'nchar', 'len':20, 'count':1},{'type': 'binary', 'len':20, 'count':1},{'type': 'BIGINT', 'count':1},{'type': 'smallint', 'count':1},{'type': 'DOUBLE', 'count':1}], 'ctbPrefix': 't', 'ctbStartIdx': 0, @@ -252,6 +253,22 @@ class TDTestCase: #res_expect = [None, None, [999, 999, 499, "2018-11-25 19:30:00.000"]] #self.query_check_sqls(sqls, has_last_row_scan_res, res_expect) + select_items = ["last(c10), c10", + "last(c10), ts", + "last(c10), c10, ts", + "last(c10), c10, ts, c10,ts", + "last(c10), ts, c1"] + has_last_row_scan_res = [1,1,1,1,0] + sqls = self.format_sqls(sql_template, select_items) + self.explain_and_check_res(sqls, has_last_row_scan_res) + res_expect = [ + ["2018-11-25 19:30:01.000", "2018-11-25 19:30:01.000"], + ["2018-11-25 19:30:01.000", "2018-11-25 19:30:00.000"], + ["2018-11-25 19:30:01.000", "2018-11-25 19:30:01.000", "2018-11-25 19:30:00.000"], + ["2018-11-25 19:30:01.000", "2018-11-25 19:30:01.000", "2018-11-25 19:30:00.000", "2018-11-25 19:30:01.000", "2018-11-25 19:30:00.000"] + ] + self.query_check_sqls(sqls, has_last_row_scan_res, res_expect) + sql = "select last(c1), c1, c1+1, c1+2, ts from meters" res = self.explain_sql(sql) self.check_explain_res_has_row("Last Row Scan", res, sql) From edeeb490b12357fbbed7365a9ea6c68fa48f6baf Mon Sep 17 00:00:00 2001 From: shenglian zhou Date: Tue, 7 Nov 2023 16:40:05 +0800 Subject: [PATCH 13/79] enhance: add tbname in values --- source/libs/parser/src/parTranslater.c | 113 ++++++++++++++++++------- 1 file changed, 84 insertions(+), 29 deletions(-) diff --git a/source/libs/parser/src/parTranslater.c b/source/libs/parser/src/parTranslater.c index 398d31d750..83feb22cc3 100644 --- a/source/libs/parser/src/parTranslater.c +++ b/source/libs/parser/src/parTranslater.c @@ -3927,12 +3927,11 @@ static int32_t translatePartitionBy(STranslateContext* pCxt, SSelectStmt* pSelec typedef struct SEqCondTbNameTableInfo { SRealTableNode* pRealTable; - char tbName[TSDB_TABLE_NAME_LEN]; - bool done; + SArray* aTbnames; } SEqCondTbNameTableInfo; //[tableAlias.]tbname = tbNamVal -static bool isOperatorEqTbnameCond(STranslateContext* pCxt, SOperatorNode* pOperator, char** ppTableAlias, char** ppTbNameVal) { +static bool isOperatorEqTbnameCond(STranslateContext* pCxt, SOperatorNode* pOperator, char** ppTableAlias, SArray* aTabNames) { if (pOperator->opType != OP_TYPE_EQUAL) return false; SFunctionNode* pTbnameFunc = NULL; SValueNode* pValueNode = NULL; @@ -3960,16 +3959,50 @@ static bool isOperatorEqTbnameCond(STranslateContext* pCxt, SOperatorNode* pOper } else { return false; } - *ppTbNameVal = pValueNode->literal; + taosArrayPush(aTabNames, &(pValueNode->literal)); return true; } +//[tableAlias.]tbname in (value1, value2, ...) +static bool isOperatorTbnameInCond(STranslateContext* pCxt, SOperatorNode* pOperator, char** ppTableAlias, SArray* aTbNames) { + if (pOperator->opType != OP_TYPE_IN) return false; + if (nodeType(pOperator->pLeft) != QUERY_NODE_FUNCTION || + ((SFunctionNode*)(pOperator->pLeft))->funcType != FUNCTION_TYPE_TBNAME || + nodeType(pOperator->pRight) == QUERY_NODE_NODE_LIST) { + return false; + } + + SFunctionNode* pTbnameFunc = (SFunctionNode*)pOperator->pLeft; + if (LIST_LENGTH(pTbnameFunc->pParameterList) == 0) { + *ppTableAlias = NULL; + } else if (LIST_LENGTH(pTbnameFunc->pParameterList) == 1) { + SNode* pQualNode = nodesListGetNode(pTbnameFunc->pParameterList, 0); + if (nodeType(pQualNode) != QUERY_NODE_VALUE) return false; + SValueNode* pQualValNode = (SValueNode*)pQualNode; + *ppTableAlias = pQualValNode->literal; + } else { + return false; + } + SNodeListNode* pValueListNode = (SNodeListNode*)pOperator->pRight; + SNodeList* pValueNodeList = pValueListNode->pNodeList; + SNode* pValNode = NULL; + FOREACH(pValNode, pValueNodeList) { + if (nodeType(pValNode) != QUERY_NODE_VALUE) { + return false; + } + taosArrayPush(aTbNames, &((SValueNode*)pValNode)->literal); + } + return true; + +} + static bool findEqCondTbNameInOperatorNode(STranslateContext* pCxt, SNode* pWhere, SEqCondTbNameTableInfo* pInfo) { int32_t code = TSDB_CODE_SUCCESS; char* pTableAlias = NULL; char* pTbNameVal = NULL; - - if (isOperatorEqTbnameCond(pCxt, (SOperatorNode*)pWhere, &pTableAlias, &pTbNameVal)) { + SArray* aTableNames = taosArrayInit(1, sizeof(void*)); + if (isOperatorEqTbnameCond(pCxt, (SOperatorNode*)pWhere, &pTableAlias, aTableNames) || + isOperatorTbnameInCond(pCxt, (SOperatorNode*)pWhere, &pTableAlias, aTableNames)) { STableNode* pTable; if (pTableAlias == NULL) { pTable = (STableNode*)((SSelectStmt*)(pCxt->pCurrStmt))->pFromTable; @@ -3978,11 +4011,12 @@ static bool findEqCondTbNameInOperatorNode(STranslateContext* pCxt, SNode* pWher } if (code == TSDB_CODE_SUCCESS && nodeType(pTable) == QUERY_NODE_REAL_TABLE && ((SRealTableNode*)pTable)->pMeta && ((SRealTableNode*)pTable)->pMeta->tableType == TSDB_SUPER_TABLE) { - strcpy(pInfo->tbName, pTbNameVal); pInfo->pRealTable = (SRealTableNode*)pTable; + taosArrayAddAll(pInfo->aTbnames, aTableNames); return true; } } + taosArrayDestroy(aTableNames); return false; } @@ -4001,27 +4035,47 @@ static void findEqualCondTbnameInLogicCondAnd(STranslateContext* pCxt, SNode* pW FOREACH(pTmpNode, ((SLogicConditionNode*)pWhere)->pParameterList) { if (nodeType(pTmpNode) == QUERY_NODE_OPERATOR) { SEqCondTbNameTableInfo info = {0}; + info.aTbnames = taosArrayInit(1, sizeof(void*)); bool bIsEqTbnameCond = findEqCondTbNameInOperatorNode(pCxt, pTmpNode, &info); if (bIsEqTbnameCond && !isTableExistInTableTbnames(aTableTbnames, info.pRealTable)) { + //TODO: intersect tbNames of same table? speed taosArrayPush(aTableTbnames, &info); - break; + } else { + taosArrayDestroy(info.aTbnames); } } } } +static void unionTbnamesOfTbNameCond(SArray* aTableTbnames, SEqCondTbNameTableInfo* pInfo) { + bool bFoundTable = false; + for (int i = 0; i < taosArrayGetSize(aTableTbnames); ++i) { + SEqCondTbNameTableInfo* info = taosArrayGet(aTableTbnames, i); + if (info->pRealTable == pInfo->pRealTable) { + taosArrayAddAll(info->aTbnames, pInfo->aTbnames); + taosArrayDestroy(pInfo->aTbnames); + bFoundTable = true; + break; + } + } + if (!bFoundTable) { + taosArrayPush(aTableTbnames, pInfo); + } +} + static void findEqualCondTbnameInLogicCondOr(STranslateContext* pCxt, SNode* pWhere, SArray* aTableTbnames) { bool bAllTbName = true; SNode* pTmpNode = NULL; FOREACH(pTmpNode, ((SLogicConditionNode*)pWhere)->pParameterList) { if (nodeType(pTmpNode) == QUERY_NODE_OPERATOR) { SEqCondTbNameTableInfo info = {0}; - bool bIsEqTbnameCond = findEqCondTbNameInOperatorNode(pCxt, pTmpNode, &info); + info.aTbnames = taosArrayInit(1, sizeof(void*)); + bool bIsEqTbnameCond = findEqCondTbNameInOperatorNode(pCxt, pTmpNode, &info); if (!bIsEqTbnameCond) { bAllTbName = false; break; } else { - taosArrayPush(aTableTbnames, &info); + unionTbnamesOfTbNameCond(aTableTbnames, &info); } } else { bAllTbName = false; @@ -4029,6 +4083,10 @@ static void findEqualCondTbnameInLogicCondOr(STranslateContext* pCxt, SNode* pWh } } if (!bAllTbName) { + for (int i = 0; i < taosArrayGetSize(aTableTbnames); ++i) { + SEqCondTbNameTableInfo* pInfo = taosArrayGet(aTableTbnames, i); + taosArrayDestroy(pInfo->aTbnames); + } taosArrayClear(aTableTbnames); } } @@ -4050,23 +4108,18 @@ static int32_t findEqualCondTbname(STranslateContext* pCxt, SNode* pWhere, SArra return TSDB_CODE_SUCCESS; } -static int32_t findVgroupsFromEqualTbname(STranslateContext* pCxt, SArray* aTables, int32_t start, SVgroupsInfo* vgsInfo) { +static int32_t findVgroupsFromEqualTbname(STranslateContext* pCxt, SEqCondTbNameTableInfo* pInfo, SVgroupsInfo* vgsInfo) { int32_t nVgroups = 0; - int32_t nTbls = taosArrayGetSize(aTables); - SEqCondTbNameTableInfo* pInfo1 = taosArrayGet(aTables, start); + int32_t nTbls = taosArrayGetSize(pInfo->aTbnames); - for (int j = start; j < nTbls; ++j) { - SEqCondTbNameTableInfo* pInfo = taosArrayGet(aTables, j); - if (pInfo->done || pInfo->pRealTable != pInfo1->pRealTable) { - continue; - } + for (int j = 0; j < nTbls; ++j) { char* dbName = pInfo->pRealTable->table.dbName; SName snameTb; - toName(pCxt->pParseCxt->acctId, dbName, pInfo->tbName, &snameTb); + char* tbName = taosArrayGetP(pInfo->aTbnames, j); + toName(pCxt->pParseCxt->acctId, dbName, tbName, &snameTb); SVgroupInfo vgInfo; bool bExists; int32_t code = catalogGetCachedTableHashVgroup(pCxt->pParseCxt->pCatalog, &snameTb, &vgInfo, &bExists); - pInfo->done = true; if (code == TSDB_CODE_SUCCESS && bExists) { bool bFoundVg = false; for (int32_t k = 0; k < nVgroups; ++k) { @@ -4089,18 +4142,16 @@ static int32_t findVgroupsFromEqualTbname(STranslateContext* pCxt, SArray* aTabl static int32_t setEqualTbnameTableVgroups(STranslateContext* pCxt, SSelectStmt* pSelect, SArray* aTables) { int32_t code = TSDB_CODE_SUCCESS; - int32_t nTbls = taosArrayGetSize(aTables); - for (int i = 0; i < nTbls; ++i) { - SEqCondTbNameTableInfo* pInfo1 = taosArrayGet(aTables, i); - if (pInfo1->done) { - continue; - } + for (int i = 0; i < taosArrayGetSize(aTables); ++i) { + SEqCondTbNameTableInfo* pInfo = taosArrayGet(aTables, i); + int32_t nTbls = taosArrayGetSize(pInfo->aTbnames); + SVgroupsInfo* vgsInfo = taosMemoryMalloc(sizeof(SVgroupsInfo) + nTbls * sizeof(SVgroupInfo)); int32_t nVgroups = 0; - findVgroupsFromEqualTbname(pCxt, aTables, i, vgsInfo); + findVgroupsFromEqualTbname(pCxt, pInfo, vgsInfo); if (vgsInfo->numOfVgroups != 0) { - taosMemoryFree(pInfo1->pRealTable->pVgroupList); - pInfo1->pRealTable->pVgroupList = vgsInfo; + taosMemoryFree(pInfo->pRealTable->pVgroupList); + pInfo->pRealTable->pVgroupList = vgsInfo; } else { taosMemoryFree(vgsInfo); } @@ -4115,6 +4166,10 @@ static int32_t setTableVgroupsFromEqualTbnameCond(STranslateContext* pCxt, SSele if (code == TSDB_CODE_SUCCESS) { code = setEqualTbnameTableVgroups(pCxt, pSelect, aTables); } + for (int i = 0; i < taosArrayGetSize(aTables); ++i) { + SEqCondTbNameTableInfo* pInfo = taosArrayGet(aTables, i); + taosArrayDestroy(pInfo->aTbnames); + } taosArrayDestroy(aTables); return code; } From 4ff81b84b0018b8ffe21a45ce32f93b74a2ff0d4 Mon Sep 17 00:00:00 2001 From: slzhou Date: Tue, 7 Nov 2023 20:47:14 +0800 Subject: [PATCH 14/79] fix: fix address sanitizer error --- source/libs/parser/src/parTranslater.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/source/libs/parser/src/parTranslater.c b/source/libs/parser/src/parTranslater.c index 83feb22cc3..9e26f64c5d 100644 --- a/source/libs/parser/src/parTranslater.c +++ b/source/libs/parser/src/parTranslater.c @@ -4013,6 +4013,7 @@ static bool findEqCondTbNameInOperatorNode(STranslateContext* pCxt, SNode* pWher ((SRealTableNode*)pTable)->pMeta && ((SRealTableNode*)pTable)->pMeta->tableType == TSDB_SUPER_TABLE) { pInfo->pRealTable = (SRealTableNode*)pTable; taosArrayAddAll(pInfo->aTbnames, aTableNames); + taosArrayDestroy(aTableNames); return true; } } @@ -4072,6 +4073,7 @@ static void findEqualCondTbnameInLogicCondOr(STranslateContext* pCxt, SNode* pWh info.aTbnames = taosArrayInit(1, sizeof(void*)); bool bIsEqTbnameCond = findEqCondTbNameInOperatorNode(pCxt, pTmpNode, &info); if (!bIsEqTbnameCond) { + taosArrayDestroy(info.aTbnames); bAllTbName = false; break; } else { @@ -4094,9 +4096,12 @@ static void findEqualCondTbnameInLogicCondOr(STranslateContext* pCxt, SNode* pWh static int32_t findEqualCondTbname(STranslateContext* pCxt, SNode* pWhere, SArray* aTableTbnames) { if (nodeType(pWhere) == QUERY_NODE_OPERATOR) { SEqCondTbNameTableInfo info = {0}; + info.aTbnames = taosArrayInit(1, sizeof(void*)); bool bIsEqTbnameCond = findEqCondTbNameInOperatorNode(pCxt, pWhere, &info); if (bIsEqTbnameCond) { taosArrayPush(aTableTbnames, &info); + } else { + taosArrayDestroy(info.aTbnames); } } else if (nodeType(pWhere) == QUERY_NODE_LOGIC_CONDITION) { if (((SLogicConditionNode*)pWhere)->condType == LOGIC_COND_TYPE_AND) { From fabf4e8cf003a63c67dae6399cd1a21982f04709 Mon Sep 17 00:00:00 2001 From: slzhou Date: Wed, 8 Nov 2023 09:09:03 +0800 Subject: [PATCH 15/79] fix: tbname in (value...) apply --- source/libs/parser/src/parTranslater.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/libs/parser/src/parTranslater.c b/source/libs/parser/src/parTranslater.c index 9e26f64c5d..4aa2e0263e 100644 --- a/source/libs/parser/src/parTranslater.c +++ b/source/libs/parser/src/parTranslater.c @@ -3968,7 +3968,7 @@ static bool isOperatorTbnameInCond(STranslateContext* pCxt, SOperatorNode* pOper if (pOperator->opType != OP_TYPE_IN) return false; if (nodeType(pOperator->pLeft) != QUERY_NODE_FUNCTION || ((SFunctionNode*)(pOperator->pLeft))->funcType != FUNCTION_TYPE_TBNAME || - nodeType(pOperator->pRight) == QUERY_NODE_NODE_LIST) { + nodeType(pOperator->pRight) != QUERY_NODE_NODE_LIST) { return false; } From 680afd41b0d6d3ea98f8220af15327a31f397398 Mon Sep 17 00:00:00 2001 From: slzhou Date: Wed, 8 Nov 2023 10:40:07 +0800 Subject: [PATCH 16/79] enhance: add test case --- tests/system-test/2-query/tbname_vgroup.py | 60 ++++++++++++++++++++++ 1 file changed, 60 insertions(+) diff --git a/tests/system-test/2-query/tbname_vgroup.py b/tests/system-test/2-query/tbname_vgroup.py index dedf473929..b3cbd207d4 100644 --- a/tests/system-test/2-query/tbname_vgroup.py +++ b/tests/system-test/2-query/tbname_vgroup.py @@ -107,6 +107,66 @@ class TDTestCase: tdSql.checkData(3, 1, 4) tdSql.checkData(3, 2, 4) + tdSql.query("select * from st where tbname in ('ct1') order by ts") + tdSql.checkRows(1) + tdSql.checkData(0, 0, datetime.datetime(2021, 4, 19, 0, 0, 1)) + tdSql.checkData(0, 1, 1) + tdSql.checkData(0, 2, 1) + + tdSql.query("select * from st where tbname in ('ct1', 'ct2') order by ts") + tdSql.checkRows(2) + tdSql.checkData(0, 0, datetime.datetime(2021, 4, 19, 0, 0, 1)) + tdSql.checkData(0, 1, 1) + tdSql.checkData(0, 2, 1) + tdSql.checkData(1, 0, datetime.datetime(2021, 4, 19, 0, 0, 2)) + tdSql.checkData(1, 1, 2) + tdSql.checkData(1, 2, 2) + + tdSql.query("select * from st where tbname in ('ct1', 'ct2') or tbname in ('ct3', 'ct4') order by ts") + tdSql.checkRows(4) + tdSql.checkData(0, 0, datetime.datetime(2021, 4, 19, 0, 0, 1)) + tdSql.checkData(0, 1, 1) + tdSql.checkData(0, 2, 1) + tdSql.checkData(1, 0, datetime.datetime(2021, 4, 19, 0, 0, 2)) + tdSql.checkData(1, 1, 2) + tdSql.checkData(1, 2, 2) + tdSql.checkData(2, 0, datetime.datetime(2021, 4, 19, 0, 0, 3)) + tdSql.checkData(2, 1, 3) + tdSql.checkData(2, 2, 3) + tdSql.checkData(3, 0, datetime.datetime(2021, 4, 19, 0, 0, 4)) + tdSql.checkData(3, 1, 4) + tdSql.checkData(3, 2, 4) + + tdSql.query("select * from st where tbname in ('ct1', 'ct2') or tbname='ct3' order by ts") + tdSql.checkRows(3) + tdSql.checkData(0, 0, datetime.datetime(2021, 4, 19, 0, 0, 1)) + tdSql.checkData(0, 1, 1) + tdSql.checkData(0, 2, 1) + tdSql.checkData(1, 0, datetime.datetime(2021, 4, 19, 0, 0, 2)) + tdSql.checkData(1, 1, 2) + tdSql.checkData(1, 2, 2) + tdSql.checkData(2, 0, datetime.datetime(2021, 4, 19, 0, 0, 3)) + tdSql.checkData(2, 1, 3) + tdSql.checkData(2, 2, 3) + + tdSql.query("select * from st where tbname in ('ct1', 'ct2') and tbname='ct3' order by ts") + tdSql.checkRows(0) + + tdSql.query("select * from st where tbname in ('ct1') or 1=1 order by ts") + tdSql.checkRows(4) + tdSql.checkData(0, 0, datetime.datetime(2021, 4, 19, 0, 0, 1)) + tdSql.checkData(0, 1, 1) + tdSql.checkData(0, 2, 1) + tdSql.checkData(1, 0, datetime.datetime(2021, 4, 19, 0, 0, 2)) + tdSql.checkData(1, 1, 2) + tdSql.checkData(1, 2, 2) + tdSql.checkData(2, 0, datetime.datetime(2021, 4, 19, 0, 0, 3)) + tdSql.checkData(2, 1, 3) + tdSql.checkData(2, 2, 3) + tdSql.checkData(3, 0, datetime.datetime(2021, 4, 19, 0, 0, 4)) + tdSql.checkData(3, 1, 4) + tdSql.checkData(3, 2, 4) + tdSql.execute('drop database dbvg;') tdSql.execute('drop database tbname_vgroup') From 144856893cceff2c349145ce903e29afacc818bc Mon Sep 17 00:00:00 2001 From: slzhou Date: Wed, 8 Nov 2023 14:19:30 +0800 Subject: [PATCH 17/79] enhance: check no exchange operator --- tests/system-test/2-query/tbname_vgroup.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/system-test/2-query/tbname_vgroup.py b/tests/system-test/2-query/tbname_vgroup.py index b3cbd207d4..1e489356e6 100644 --- a/tests/system-test/2-query/tbname_vgroup.py +++ b/tests/system-test/2-query/tbname_vgroup.py @@ -167,6 +167,9 @@ class TDTestCase: tdSql.checkData(3, 1, 4) tdSql.checkData(3, 2, 4) + tdSql.query("explain select * from st where tbname='ct1'") + tdSql.checkRows(2) + tdSql.execute('drop database dbvg;') tdSql.execute('drop database tbname_vgroup') From 49ebb7145e06435f0a635de65a7aeac60c64bd8c Mon Sep 17 00:00:00 2001 From: dapan1121 Date: Wed, 8 Nov 2023 18:42:41 +0800 Subject: [PATCH 18/79] enh: support split cache last and other functions --- include/libs/nodes/nodes.h | 1 + include/libs/nodes/plannodes.h | 1 + source/libs/command/src/explain.c | 76 ++++--- source/libs/executor/src/mergeoperator.c | 46 +++- source/libs/nodes/src/nodesCloneFuncs.c | 8 + source/libs/nodes/src/nodesUtilFuncs.c | 13 ++ source/libs/planner/src/planLogicCreater.c | 3 +- source/libs/planner/src/planOptimizer.c | 249 +++++++++++++++++++-- source/libs/planner/src/planPhysiCreater.c | 3 + source/libs/planner/src/planSpliter.c | 9 +- 10 files changed, 342 insertions(+), 67 deletions(-) diff --git a/include/libs/nodes/nodes.h b/include/libs/nodes/nodes.h index 9725aa48c0..7fbdbfb211 100644 --- a/include/libs/nodes/nodes.h +++ b/include/libs/nodes/nodes.h @@ -121,6 +121,7 @@ int32_t nodesListMakeAppend(SNodeList** pList, SNode* pNode); int32_t nodesListMakeStrictAppend(SNodeList** pList, SNode* pNode); int32_t nodesListAppendList(SNodeList* pTarget, SNodeList* pSrc); int32_t nodesListStrictAppendList(SNodeList* pTarget, SNodeList* pSrc); +int32_t nodesListMakeStrictAppendList(SNodeList** pTarget, SNodeList* pSrc); int32_t nodesListPushFront(SNodeList* pList, SNode* pNode); SListCell* nodesListErase(SNodeList* pList, SListCell* pCell); void nodesListInsertList(SNodeList* pTarget, SListCell* pPos, SNodeList* pSrc); diff --git a/include/libs/nodes/plannodes.h b/include/libs/nodes/plannodes.h index b1f2c4390c..bb47120022 100644 --- a/include/libs/nodes/plannodes.h +++ b/include/libs/nodes/plannodes.h @@ -145,6 +145,7 @@ typedef struct SAggLogicNode { bool hasGroupKeyOptimized; bool isGroupTb; bool isPartTb; // true if partition keys has tbname + bool hasGroup; } SAggLogicNode; typedef struct SProjectLogicNode { diff --git a/source/libs/command/src/explain.c b/source/libs/command/src/explain.c index 185e23590a..0f2a1e2f29 100644 --- a/source/libs/command/src/explain.c +++ b/source/libs/command/src/explain.c @@ -625,7 +625,7 @@ int32_t qExplainResNodeToRowsImpl(SExplainResNode *pResNode, SExplainCtx *ctx, i } case QUERY_NODE_PHYSICAL_PLAN_HASH_AGG: { SAggPhysiNode *pAggNode = (SAggPhysiNode *)pNode; - EXPLAIN_ROW_NEW(level, EXPLAIN_AGG_FORMAT, (pAggNode->pGroupKeys ? "Group" : "Aggragate")); + EXPLAIN_ROW_NEW(level, EXPLAIN_AGG_FORMAT, (pAggNode->pGroupKeys ? "GroupAggragate" : "Aggragate")); EXPLAIN_ROW_APPEND(EXPLAIN_LEFT_PARENTHESIS_FORMAT); if (pResNode->pExecInfo) { QRY_ERR_RET(qExplainBufAppendExecInfo(pResNode->pExecInfo, tbuf, &tlen)); @@ -1152,24 +1152,26 @@ int32_t qExplainResNodeToRowsImpl(SExplainResNode *pResNode, SExplainCtx *ctx, i QRY_ERR_RET(qExplainResAppendRow(ctx, tbuf, tlen, level)); if (EXPLAIN_MODE_ANALYZE == ctx->mode) { - // sort method - EXPLAIN_ROW_NEW(level + 1, "Sort Method: "); + if (MERGE_TYPE_SORT == pMergeNode->type) { + // sort method + EXPLAIN_ROW_NEW(level + 1, "Sort Method: "); - int32_t nodeNum = taosArrayGetSize(pResNode->pExecInfo); - SExplainExecInfo *execInfo = taosArrayGet(pResNode->pExecInfo, 0); - SSortExecInfo *pExecInfo = (SSortExecInfo *)execInfo->verboseInfo; - EXPLAIN_ROW_APPEND("%s", pExecInfo->sortMethod == SORT_QSORT_T ? "quicksort" : "merge sort"); - if (pExecInfo->sortBuffer > 1024 * 1024) { - EXPLAIN_ROW_APPEND(" Buffers:%.2f Mb", pExecInfo->sortBuffer / (1024 * 1024.0)); - } else if (pExecInfo->sortBuffer > 1024) { - EXPLAIN_ROW_APPEND(" Buffers:%.2f Kb", pExecInfo->sortBuffer / (1024.0)); - } else { - EXPLAIN_ROW_APPEND(" Buffers:%d b", pExecInfo->sortBuffer); + int32_t nodeNum = taosArrayGetSize(pResNode->pExecInfo); + SExplainExecInfo *execInfo = taosArrayGet(pResNode->pExecInfo, 0); + SSortExecInfo *pExecInfo = (SSortExecInfo *)execInfo->verboseInfo; + EXPLAIN_ROW_APPEND("%s", pExecInfo->sortMethod == SORT_QSORT_T ? "quicksort" : "merge sort"); + if (pExecInfo->sortBuffer > 1024 * 1024) { + EXPLAIN_ROW_APPEND(" Buffers:%.2f Mb", pExecInfo->sortBuffer / (1024 * 1024.0)); + } else if (pExecInfo->sortBuffer > 1024) { + EXPLAIN_ROW_APPEND(" Buffers:%.2f Kb", pExecInfo->sortBuffer / (1024.0)); + } else { + EXPLAIN_ROW_APPEND(" Buffers:%d b", pExecInfo->sortBuffer); + } + + EXPLAIN_ROW_APPEND(" loops:%d", pExecInfo->loops); + EXPLAIN_ROW_END(); + QRY_ERR_RET(qExplainResAppendRow(ctx, tbuf, tlen, level)); } - - EXPLAIN_ROW_APPEND(" loops:%d", pExecInfo->loops); - EXPLAIN_ROW_END(); - QRY_ERR_RET(qExplainResAppendRow(ctx, tbuf, tlen, level)); } if (verbose) { @@ -1183,29 +1185,31 @@ int32_t qExplainResNodeToRowsImpl(SExplainResNode *pResNode, SExplainCtx *ctx, i EXPLAIN_ROW_END(); QRY_ERR_RET(qExplainResAppendRow(ctx, tbuf, tlen, level + 1)); - EXPLAIN_ROW_NEW(level + 1, EXPLAIN_OUTPUT_FORMAT); - EXPLAIN_ROW_APPEND(EXPLAIN_IGNORE_GROUPID_FORMAT, pMergeNode->ignoreGroupId ? "true" : "false"); - EXPLAIN_ROW_END(); - QRY_ERR_RET(qExplainResAppendRow(ctx, tbuf, tlen, level + 1)); + if (MERGE_TYPE_SORT == pMergeNode->type) { + EXPLAIN_ROW_NEW(level + 1, EXPLAIN_OUTPUT_FORMAT); + EXPLAIN_ROW_APPEND(EXPLAIN_IGNORE_GROUPID_FORMAT, pMergeNode->ignoreGroupId ? "true" : "false"); + EXPLAIN_ROW_END(); + QRY_ERR_RET(qExplainResAppendRow(ctx, tbuf, tlen, level + 1)); - EXPLAIN_ROW_NEW(level + 1, EXPLAIN_MERGE_KEYS_FORMAT); - if (pMergeNode->groupSort) { - EXPLAIN_ROW_APPEND(EXPLAIN_STRING_TYPE_FORMAT, "_group_id asc"); - if (LIST_LENGTH(pMergeNode->pMergeKeys) > 0) { - EXPLAIN_ROW_APPEND(EXPLAIN_COMMA_FORMAT); + EXPLAIN_ROW_NEW(level + 1, EXPLAIN_MERGE_KEYS_FORMAT); + if (pMergeNode->groupSort) { + EXPLAIN_ROW_APPEND(EXPLAIN_STRING_TYPE_FORMAT, "_group_id asc"); + if (LIST_LENGTH(pMergeNode->pMergeKeys) > 0) { + EXPLAIN_ROW_APPEND(EXPLAIN_COMMA_FORMAT); + } } - } - for (int32_t i = 0; i < LIST_LENGTH(pMergeNode->pMergeKeys); ++i) { - SOrderByExprNode *ptn = (SOrderByExprNode *)nodesListGetNode(pMergeNode->pMergeKeys, i); - EXPLAIN_ROW_APPEND(EXPLAIN_STRING_TYPE_FORMAT, nodesGetNameFromColumnNode(ptn->pExpr)); - EXPLAIN_ROW_APPEND(EXPLAIN_BLANK_FORMAT); - EXPLAIN_ROW_APPEND(EXPLAIN_STRING_TYPE_FORMAT, EXPLAIN_ORDER_STRING(ptn->order)); - if (i != LIST_LENGTH(pMergeNode->pMergeKeys) - 1) { - EXPLAIN_ROW_APPEND(EXPLAIN_COMMA_FORMAT); + for (int32_t i = 0; i < LIST_LENGTH(pMergeNode->pMergeKeys); ++i) { + SOrderByExprNode *ptn = (SOrderByExprNode *)nodesListGetNode(pMergeNode->pMergeKeys, i); + EXPLAIN_ROW_APPEND(EXPLAIN_STRING_TYPE_FORMAT, nodesGetNameFromColumnNode(ptn->pExpr)); + EXPLAIN_ROW_APPEND(EXPLAIN_BLANK_FORMAT); + EXPLAIN_ROW_APPEND(EXPLAIN_STRING_TYPE_FORMAT, EXPLAIN_ORDER_STRING(ptn->order)); + if (i != LIST_LENGTH(pMergeNode->pMergeKeys) - 1) { + EXPLAIN_ROW_APPEND(EXPLAIN_COMMA_FORMAT); + } } + EXPLAIN_ROW_END(); + QRY_ERR_RET(qExplainResAppendRow(ctx, tbuf, tlen, level + 1)); } - EXPLAIN_ROW_END(); - QRY_ERR_RET(qExplainResAppendRow(ctx, tbuf, tlen, level + 1)); if (pMergeNode->node.pConditions) { EXPLAIN_ROW_NEW(level + 1, EXPLAIN_FILTER_FORMAT); diff --git a/source/libs/executor/src/mergeoperator.c b/source/libs/executor/src/mergeoperator.c index 204a9458b8..a580524e87 100755 --- a/source/libs/executor/src/mergeoperator.c +++ b/source/libs/executor/src/mergeoperator.c @@ -38,7 +38,8 @@ typedef struct SNonSortMergeInfo { } SNonSortMergeInfo; typedef struct SColsMergeInfo { - uint64_t srcBlkIds[2]; + SNodeList* pTargets; + uint64_t srcBlkIds[2]; } SColsMergeInfo; typedef struct SMultiwayMergeOperatorInfo { @@ -150,7 +151,7 @@ SSDataBlock* doSortMerge(SOperatorInfo* pOperator) { SSortMergeInfo* pSortMergeInfo = &pInfo->sortMergeInfo; SSortHandle* pHandle = pSortMergeInfo->pSortHandle; SSDataBlock* pDataBlock = pInfo->binfo.pRes; - SArray* pColMatchInfo = pInfo->matchInfo.pList; + SArray* pColMatchInfo = pSortMergeInfo->matchInfo.pList; int32_t capacity = pOperator->resultInfo.capacity; qDebug("start to merge final sorted rows, %s", GET_TASKID(pTaskInfo)); @@ -234,6 +235,8 @@ void destroySortMergeOperatorInfo(void* param) { pSortMergeInfo->pInputBlock = blockDataDestroy(pSortMergeInfo->pInputBlock); pSortMergeInfo->pIntermediateBlock = blockDataDestroy(pSortMergeInfo->pIntermediateBlock); + taosArrayDestroy(pSortMergeInfo->matchInfo.pList); + tsortDestroySortHandle(pSortMergeInfo->pSortHandle); taosArrayDestroy(pSortMergeInfo->pSortInfo); } @@ -298,25 +301,46 @@ int32_t openColsMergeOperator(SOperatorInfo* pOperator) { return TSDB_CODE_SUCCESS; } +int32_t copyColumnsValue(SNodeList* pNodeList, uint64_t targetBlkId, SSDataBlock* pDst, SSDataBlock* pSrc) { + bool isNull = (NULL == pSrc || pSrc->info.rows <= 0); + size_t numOfCols = LIST_LENGTH(pNodeList); + for (int32_t i = 0; i < numOfCols; ++i) { + STargetNode* pNode = (STargetNode*)nodesListGetNode(pNodeList, i); + if (nodeType(pNode->pExpr) == QUERY_NODE_COLUMN && ((SColumnNode*)pNode->pExpr)->dataBlockId == targetBlkId) { + SColumnInfoData* pDstCol = taosArrayGet(pDst->pDataBlock, pNode->slotId); + if (isNull) { + colDataSetVal(pDstCol, 0, NULL, true); + } else { + SColumnInfoData* pSrcCol = taosArrayGet(pSrc->pDataBlock, ((SColumnNode*)pNode->pExpr)->slotId); + colDataAssign(pDstCol, pSrcCol, 1, &pDst->info); + } + } + } + + return TSDB_CODE_SUCCESS; +} + SSDataBlock* doColsMerge(SOperatorInfo* pOperator) { SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo; SMultiwayMergeOperatorInfo* pInfo = pOperator->info; SSDataBlock* pBlock = NULL; + SColsMergeInfo* pColsMerge = &pInfo->colsMergeInfo; - qDebug("start to merge no sorted rows, %s", GET_TASKID(pTaskInfo)); + qDebug("start to merge columns, %s", GET_TASKID(pTaskInfo)); for (int32_t i = 0; i < 2; ++i) { pBlock = getNextBlockFromDownstream(pOperator, i); - if (NULL == pBlock) { - TSWAP(pNonSortMerge->pSourceStatus[pNonSortMerge->sourceWorkIdx], pNonSortMerge->pSourceStatus[idx]); - pNonSortMerge->sourceWorkIdx++; - idx = NON_SORT_NEXT_SRC(pNonSortMerge, idx); - continue; + if (pBlock && pBlock->info.rows > 1) { + qError("more than 1 row returned from downstream, rows:%" PRId64, pBlock->info.rows); + T_LONG_JMP(pTaskInfo->env, TSDB_CODE_QRY_EXECUTOR_INTERNAL_ERROR); } - break; + + copyColumnsValue(pColsMerge->pTargets, pColsMerge->srcBlkIds[i], pInfo->binfo.pRes, pBlock); } - return pBlock; + pInfo->binfo.pRes->info.rows = 1; + + return pInfo->binfo.pRes; } void destroyColsMergeOperatorInfo(void* param) { @@ -390,7 +414,6 @@ SSDataBlock* doMultiwayMerge(SOperatorInfo* pOperator) { void destroyMultiwayMergeOperatorInfo(void* param) { SMultiwayMergeOperatorInfo* pInfo = (SMultiwayMergeOperatorInfo*)param; pInfo->binfo.pRes = blockDataDestroy(pInfo->binfo.pRes); - taosArrayDestroy(pInfo->matchInfo.pList); if (NULL != gMultiwayMergeFps[pInfo->type].closeFn) { (*gMultiwayMergeFps[pInfo->type].closeFn)(&pInfo->sortMergeInfo); @@ -467,6 +490,7 @@ SOperatorInfo* createMultiwayMergeOperatorInfo(SOperatorInfo** downStreams, size initResultSizeInfo(&pOperator->resultInfo, 1); blockDataEnsureCapacity(pInfo->binfo.pRes, pOperator->resultInfo.capacity); + pColsMerge->pTargets = pMergePhyNode->pTargets; pColsMerge->srcBlkIds[0] = getOperatorResultBlockId(downStreams[0], 0); pColsMerge->srcBlkIds[1] = getOperatorResultBlockId(downStreams[1], 0); break; diff --git a/source/libs/nodes/src/nodesCloneFuncs.c b/source/libs/nodes/src/nodesCloneFuncs.c index 028745803b..ce23928268 100644 --- a/source/libs/nodes/src/nodesCloneFuncs.c +++ b/source/libs/nodes/src/nodesCloneFuncs.c @@ -419,6 +419,7 @@ static int32_t logicScanCopy(const SScanLogicNode* pSrc, SScanLogicNode* pDst) { COPY_SCALAR_FIELD(groupSort); CLONE_NODE_LIST_FIELD(pTags); CLONE_NODE_FIELD(pSubtable); + COPY_SCALAR_FIELD(cacheLastMode); COPY_SCALAR_FIELD(igLastNull); COPY_SCALAR_FIELD(groupOrderScan); COPY_SCALAR_FIELD(onlyMetaCtbIdx); @@ -443,8 +444,14 @@ static int32_t logicAggCopy(const SAggLogicNode* pSrc, SAggLogicNode* pDst) { COPY_BASE_OBJECT_FIELD(node, logicNodeCopy); CLONE_NODE_LIST_FIELD(pGroupKeys); CLONE_NODE_LIST_FIELD(pAggFuncs); + COPY_SCALAR_FIELD(hasLastRow); + COPY_SCALAR_FIELD(hasLast); + COPY_SCALAR_FIELD(hasTimeLineFunc); + COPY_SCALAR_FIELD(onlyHasKeepOrderFunc); COPY_SCALAR_FIELD(hasGroupKeyOptimized); + COPY_SCALAR_FIELD(isGroupTb); COPY_SCALAR_FIELD(isPartTb); + COPY_SCALAR_FIELD(hasGroup); return TSDB_CODE_SUCCESS; } @@ -488,6 +495,7 @@ static int32_t logicMergeCopy(const SMergeLogicNode* pSrc, SMergeLogicNode* pDst CLONE_NODE_LIST_FIELD(pInputs); COPY_SCALAR_FIELD(numOfChannels); COPY_SCALAR_FIELD(srcGroupId); + COPY_SCALAR_FIELD(colsMerge); COPY_SCALAR_FIELD(needSort); COPY_SCALAR_FIELD(groupSort); COPY_SCALAR_FIELD(ignoreGroupId); diff --git a/source/libs/nodes/src/nodesUtilFuncs.c b/source/libs/nodes/src/nodesUtilFuncs.c index 4f6d3d95e1..71263892a5 100644 --- a/source/libs/nodes/src/nodesUtilFuncs.c +++ b/source/libs/nodes/src/nodesUtilFuncs.c @@ -1571,6 +1571,19 @@ int32_t nodesListStrictAppendList(SNodeList* pTarget, SNodeList* pSrc) { return code; } + +int32_t nodesListMakeStrictAppendList(SNodeList** pTarget, SNodeList* pSrc) { + if (NULL == *pTarget) { + *pTarget = nodesMakeList(); + if (NULL == *pTarget) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + return TSDB_CODE_OUT_OF_MEMORY; + } + } + return nodesListStrictAppendList(*pTarget, pSrc); +} + + int32_t nodesListPushFront(SNodeList* pList, SNode* pNode) { if (NULL == pList || NULL == pNode) { return TSDB_CODE_FAILED; diff --git a/source/libs/planner/src/planLogicCreater.c b/source/libs/planner/src/planLogicCreater.c index 222aec9813..bed75b84ac 100644 --- a/source/libs/planner/src/planLogicCreater.c +++ b/source/libs/planner/src/planLogicCreater.c @@ -747,7 +747,8 @@ static int32_t createAggLogicNode(SLogicPlanContext* pCxt, SSelectStmt* pSelect, pAgg->isGroupTb = pAgg->pGroupKeys ? keysHasTbname(pAgg->pGroupKeys) : 0; pAgg->isPartTb = pSelect->pPartitionByList ? keysHasTbname(pSelect->pPartitionByList) : 0; - + pAgg->hasGroup = pAgg->pGroupKeys || pSelect->pPartitionByList; + if (TSDB_CODE_SUCCESS == code) { *pLogicNode = (SLogicNode*)pAgg; } else { diff --git a/source/libs/planner/src/planOptimizer.c b/source/libs/planner/src/planOptimizer.c index 0b3a432bec..79747d44fe 100644 --- a/source/libs/planner/src/planOptimizer.c +++ b/source/libs/planner/src/planOptimizer.c @@ -2499,21 +2499,7 @@ static bool lastRowScanOptCheckColNum(int32_t lastColNum, col_id_t lastColId, return true; } -static bool lastRowScanOptMayBeOptimized(SLogicNode* pNode) { - if (QUERY_NODE_LOGIC_PLAN_AGG != nodeType(pNode) || 1 != LIST_LENGTH(pNode->pChildren) || - QUERY_NODE_LOGIC_PLAN_SCAN != nodeType(nodesListGetNode(pNode->pChildren, 0))) { - return false; - } - - SAggLogicNode* pAgg = (SAggLogicNode*)pNode; - SScanLogicNode* pScan = (SScanLogicNode*)nodesListGetNode(pNode->pChildren, 0); - // Only one of LAST and LASTROW can appear - if (pAgg->hasLastRow == pAgg->hasLast || NULL != pAgg->pGroupKeys || NULL != pScan->node.pConditions || - !hasSuitableCache(pScan->cacheLastMode, pAgg->hasLastRow, pAgg->hasLast) || - IS_TSWINDOW_SPECIFIED(pScan->scanRange)) { - return false; - } - +static bool lastRowScanOptCheckFuncList(SLogicNode* pNode, bool* hasOtherFunc) { bool hasNonPKSelectFunc = false; SNode* pFunc = NULL; int32_t lastColNum = 0, selectNonPKColNum = 0; @@ -2559,13 +2545,48 @@ static bool lastRowScanOptMayBeOptimized(SLogicNode* pNode) { return false; } } else if (FUNCTION_TYPE_LAST_ROW != pAggFunc->funcType) { - return false; + *hasOtherFunc = true; } } return true; } +static bool lastRowScanOptCheckLastCache(SAggLogicNode* pAgg, SScanLogicNode* pScan) { + // Only one of LAST and LASTROW can appear + if (pAgg->hasLastRow == pAgg->hasLast || (!pAgg->hasLast && !pAgg->hasLastRow) || NULL != pAgg->pGroupKeys || NULL != pScan->node.pConditions || + !hasSuitableCache(pScan->cacheLastMode, pAgg->hasLastRow, pAgg->hasLast) || + IS_TSWINDOW_SPECIFIED(pScan->scanRange)) { + return false; + } + + return true; +} + +static bool lastRowScanOptMayBeOptimized(SLogicNode* pNode) { + if (QUERY_NODE_LOGIC_PLAN_AGG != nodeType(pNode) || 1 != LIST_LENGTH(pNode->pChildren) || + QUERY_NODE_LOGIC_PLAN_SCAN != nodeType(nodesListGetNode(pNode->pChildren, 0))) { + return false; + } + + SAggLogicNode* pAgg = (SAggLogicNode*)pNode; + SScanLogicNode* pScan = (SScanLogicNode*)nodesListGetNode(pNode->pChildren, 0); + if (!lastRowScanOptCheckLastCache(pAgg, pScan)) { + return false; + } + + bool hasOtherFunc = false; + if (!lastRowScanOptCheckFuncList(pNode, &hasOtherFunc)) { + return false; + } + + if (hasOtherFunc) { + return false; + } + + return true; +} + typedef struct SLastRowScanOptSetColDataTypeCxt { bool doAgg; SNodeList* pLastCols; @@ -2679,6 +2700,201 @@ static int32_t lastRowScanOptimize(SOptimizeContext* pCxt, SLogicSubplan* pLogic return TSDB_CODE_SUCCESS; } + +static bool splitCacheLastFuncOptMayBeOptimized(SLogicNode* pNode) { + if (QUERY_NODE_LOGIC_PLAN_AGG != nodeType(pNode) || 1 != LIST_LENGTH(pNode->pChildren) || + QUERY_NODE_LOGIC_PLAN_SCAN != nodeType(nodesListGetNode(pNode->pChildren, 0))) { + return false; + } + + SAggLogicNode* pAgg = (SAggLogicNode*)pNode; + SScanLogicNode* pScan = (SScanLogicNode*)nodesListGetNode(pNode->pChildren, 0); + if (!lastRowScanOptCheckLastCache(pAgg, pScan)) { + return false; + } + + bool hasOtherFunc = false; + if (!lastRowScanOptCheckFuncList(pNode, &hasOtherFunc)) { + return false; + } + + if (pAgg->hasGroup || !hasOtherFunc) { + return false; + } + + return true; +} + +static int32_t splitCacheLastFuncOptCreateAggLogicNode(SAggLogicNode** pNewAgg, SAggLogicNode* pAgg, SNodeList* pFunc, SNodeList* pTargets) { + SAggLogicNode* pNew = (SAggLogicNode*)nodesMakeNode(QUERY_NODE_LOGIC_PLAN_AGG); + if (NULL == pNew) { + nodesDestroyList(pFunc); + nodesDestroyList(pTargets); + return TSDB_CODE_OUT_OF_MEMORY; + } + + pNew->hasLastRow = false; + pNew->hasLast = false; + pNew->hasTimeLineFunc = pAgg->hasTimeLineFunc; + pNew->hasGroupKeyOptimized = false; + pNew->onlyHasKeepOrderFunc = pAgg->onlyHasKeepOrderFunc; + pNew->node.groupAction = pAgg->node.groupAction; + pNew->node.requireDataOrder = pAgg->node.requireDataOrder; + pNew->node.resultDataOrder = pAgg->node.resultDataOrder; + pNew->node.pTargets = pTargets; + pNew->pAggFuncs = pFunc; + pNew->pGroupKeys = nodesCloneList(pAgg->pGroupKeys); + pNew->node.pConditions = nodesCloneNode(pAgg->node.pConditions); + pNew->isGroupTb = pAgg->isGroupTb; + pNew->isPartTb = pAgg->isPartTb; + pNew->hasGroup = pAgg->hasGroup; + pNew->node.pChildren = nodesCloneList(pAgg->node.pChildren); + + *pNewAgg = pNew; + + return TSDB_CODE_SUCCESS; +} + +static int32_t splitCacheLastFuncOptModifyAggLogicNode(SAggLogicNode* pAgg) { + pAgg->hasTimeLineFunc = false; + pAgg->onlyHasKeepOrderFunc = true; + + return TSDB_CODE_SUCCESS; +} + +static int32_t splitCacheLastFuncOptCreateMergeLogicNode(SMergeLogicNode** pNew, SAggLogicNode* pAgg1, SAggLogicNode* pAgg2) { + SMergeLogicNode* pMerge = (SMergeLogicNode*)nodesMakeNode(QUERY_NODE_LOGIC_PLAN_MERGE); + if (NULL == pMerge) { + return TSDB_CODE_OUT_OF_MEMORY; + } + pMerge->colsMerge = true; + pMerge->numOfChannels = 2; + pMerge->srcGroupId = -1; + pMerge->node.precision = pAgg1->node.precision; + + SNode* pNewAgg1 = nodesCloneNode((SNode*)pAgg1); + SNode* pNewAgg2 = nodesCloneNode((SNode*)pAgg2); + if (NULL == pNewAgg1 || NULL == pNewAgg2) { + nodesDestroyNode(pNewAgg1); + nodesDestroyNode(pNewAgg2); + return TSDB_CODE_OUT_OF_MEMORY; + } + + ((SAggLogicNode*)pNewAgg1)->node.pParent = (SLogicNode*)pMerge; + ((SAggLogicNode*)pNewAgg2)->node.pParent = (SLogicNode*)pMerge; + + SNode* pNode = NULL; + FOREACH(pNode, ((SAggLogicNode*)pNewAgg1)->node.pChildren) { + ((SLogicNode*)pNode)->pParent = (SLogicNode*)pNewAgg1; + } + FOREACH(pNode, ((SAggLogicNode*)pNewAgg2)->node.pChildren) { + ((SLogicNode*)pNode)->pParent = (SLogicNode*)pNewAgg2; + } + + int32_t code = nodesListMakeStrictAppendList(&pMerge->node.pTargets, nodesCloneList(pAgg1->node.pTargets)); + if (TSDB_CODE_SUCCESS == code) { + code = nodesListMakeStrictAppendList(&pMerge->node.pTargets, nodesCloneList(pAgg2->node.pTargets)); + } + if (TSDB_CODE_SUCCESS == code) { + code = nodesListMakeStrictAppend(&pMerge->node.pChildren, pNewAgg1); + } + if (TSDB_CODE_SUCCESS == code) { + code = nodesListMakeStrictAppend(&pMerge->node.pChildren, pNewAgg2); + } + + if (TSDB_CODE_SUCCESS != code) { + nodesDestroyNode(pNewAgg1); + nodesDestroyNode(pNewAgg2); + nodesDestroyNode((SNode*)pMerge); + } else { + *pNew = pMerge; + } + + return code; +} + +static int32_t splitCacheLastFuncOptimize(SOptimizeContext* pCxt, SLogicSubplan* pLogicSubplan) { + SAggLogicNode* pAgg = (SAggLogicNode*)optFindPossibleNode(pLogicSubplan->pNode, splitCacheLastFuncOptMayBeOptimized); + + if (NULL == pAgg) { + return TSDB_CODE_SUCCESS; + } + + SNode* pNode = NULL; + SNodeList* pAggFuncList = NULL; + { + WHERE_EACH(pNode, pAgg->pAggFuncs) { + SFunctionNode* pFunc = (SFunctionNode*)pNode; + int32_t funcType = pFunc->funcType; + if (FUNCTION_TYPE_LAST_ROW != funcType && FUNCTION_TYPE_LAST != funcType && + FUNCTION_TYPE_SELECT_VALUE != funcType && FUNCTION_TYPE_GROUP_KEY != funcType) { + nodesListMakeStrictAppend(&pAggFuncList, nodesCloneNode(pNode)); + ERASE_NODE(pAgg->pAggFuncs); + continue; + } + WHERE_NEXT; + } + } + + if (NULL == pAggFuncList) { + planError("empty agg func list while splite projections"); + return TSDB_CODE_PLAN_INTERNAL_ERROR; + } + + SNodeList* pTargets = NULL; + { + WHERE_EACH(pNode, pAgg->node.pTargets) { + SColumnNode* pCol = (SColumnNode*)pNode; + SNode* pFuncNode = NULL; + bool found = false; + FOREACH(pFuncNode, pAggFuncList) { + SFunctionNode* pFunc = (SFunctionNode*)pFuncNode; + if (0 == strcmp(pFunc->node.aliasName, pCol->colName)) { + nodesListMakeStrictAppend(&pTargets, nodesCloneNode(pNode)); + found = true; + break; + } + } + if (found) { + ERASE_NODE(pAgg->node.pTargets); + continue; + } + WHERE_NEXT; + } + } + + if (NULL == pTargets) { + planError("empty target func list while splite projections"); + nodesDestroyList(pAggFuncList); + return TSDB_CODE_PLAN_INTERNAL_ERROR; + } + + SMergeLogicNode* pMerge = NULL; + SAggLogicNode* pNewAgg = NULL; + int32_t code = splitCacheLastFuncOptCreateAggLogicNode(&pNewAgg, pAgg, pAggFuncList, pTargets); + if (TSDB_CODE_SUCCESS == code) { + code = splitCacheLastFuncOptModifyAggLogicNode(pAgg); + } + if (TSDB_CODE_SUCCESS == code) { + code = splitCacheLastFuncOptCreateMergeLogicNode(&pMerge, pNewAgg, pAgg); + } + if (TSDB_CODE_SUCCESS == code) { + code = replaceLogicNode(pLogicSubplan, (SLogicNode*)pAgg, (SLogicNode*)pMerge); + } + + nodesDestroyNode((SNode *)pAgg); + nodesDestroyNode((SNode *)pNewAgg); + + if (TSDB_CODE_SUCCESS != code) { + nodesDestroyNode((SNode *)pMerge); + } + + pCxt->optimized = true; + return code; +} + + + // merge projects static bool mergeProjectsMayBeOptimized(SLogicNode* pNode) { if (QUERY_NODE_LOGIC_PLAN_PROJECT != nodeType(pNode) || 1 != LIST_LENGTH(pNode->pChildren)) { @@ -3762,6 +3978,7 @@ static const SOptimizeRule optimizeRuleSet[] = { {.pName = "MergeProjects", .optimizeFunc = mergeProjectsOptimize}, {.pName = "RewriteTail", .optimizeFunc = rewriteTailOptimize}, {.pName = "RewriteUnique", .optimizeFunc = rewriteUniqueOptimize}, + {.pName = "splitCacheLastFunc", .optimizeFunc = splitCacheLastFuncOptimize}, {.pName = "LastRowScan", .optimizeFunc = lastRowScanOptimize}, {.pName = "TagScan", .optimizeFunc = tagScanOptimize}, {.pName = "TableCountScan", .optimizeFunc = tableCountScanOptimize}, diff --git a/source/libs/planner/src/planPhysiCreater.c b/source/libs/planner/src/planPhysiCreater.c index 0e80f5bcec..6780dcd681 100644 --- a/source/libs/planner/src/planPhysiCreater.c +++ b/source/libs/planner/src/planPhysiCreater.c @@ -2001,6 +2001,9 @@ static int32_t createMergePhysiNode(SPhysiPlanContext* pCxt, SNodeList* pChildre SDataBlockDescNode* pRightDesc = ((SPhysiNode*)nodesListGetNode(pChildren, 1))->pOutputDataBlockDesc; code = setListSlotId(pCxt, pLeftDesc->dataBlockId, pRightDesc->dataBlockId, pMergeLogicNode->node.pTargets, &pMerge->pTargets); + if (TSDB_CODE_SUCCESS == code) { + code = addDataBlockSlots(pCxt, pMerge->pTargets, pMerge->node.pOutputDataBlockDesc); + } } if (TSDB_CODE_SUCCESS == code) { diff --git a/source/libs/planner/src/planSpliter.c b/source/libs/planner/src/planSpliter.c index bf5fe901a6..fd6706e43e 100644 --- a/source/libs/planner/src/planSpliter.c +++ b/source/libs/planner/src/planSpliter.c @@ -1588,9 +1588,12 @@ typedef struct SSmaIndexSplitInfo { static bool smaIdxSplFindSplitNode(SSplitContext* pCxt, SLogicSubplan* pSubplan, SLogicNode* pNode, SSmaIndexSplitInfo* pInfo) { if (QUERY_NODE_LOGIC_PLAN_MERGE == nodeType(pNode) && LIST_LENGTH(pNode->pChildren) > 1) { - pInfo->pMerge = (SMergeLogicNode*)pNode; - pInfo->pSubplan = pSubplan; - return true; + int32_t nodeType = nodeType(nodesListGetNode(pNode->pChildren, 0)); + if (nodeType == QUERY_NODE_LOGIC_PLAN_EXCHANGE || nodeType == QUERY_NODE_LOGIC_PLAN_MERGE) { + pInfo->pMerge = (SMergeLogicNode*)pNode; + pInfo->pSubplan = pSubplan; + return true; + } } return false; } From 6352b28b4c8a1d89f387f0b1b5eab20d76041a25 Mon Sep 17 00:00:00 2001 From: dapan1121 Date: Thu, 9 Nov 2023 09:47:55 +0800 Subject: [PATCH 19/79] fix: split scan columns from cache last scan --- source/libs/command/inc/commandInt.h | 2 +- source/libs/executor/src/mergeoperator.c | 9 +++++ source/libs/planner/src/planOptimizer.c | 49 ++++++++++++++++++++---- 3 files changed, 51 insertions(+), 9 deletions(-) diff --git a/source/libs/command/inc/commandInt.h b/source/libs/command/inc/commandInt.h index 5d4bf4e0ec..bb0d8a32dd 100644 --- a/source/libs/command/inc/commandInt.h +++ b/source/libs/command/inc/commandInt.h @@ -59,7 +59,7 @@ extern "C" { #define EXPLAIN_TIME_WINDOWS_FORMAT "Time Window: interval=%" PRId64 "%c offset=%" PRId64 "%c sliding=%" PRId64 "%c" #define EXPLAIN_WINDOW_FORMAT "Window: gap=%" PRId64 #define EXPLAIN_RATIO_TIME_FORMAT "Ratio: %f" -#define EXPLAIN_MERGE_FORMAT "SortMerge" +#define EXPLAIN_MERGE_FORMAT "Merge" #define EXPLAIN_MERGE_KEYS_FORMAT "Merge Key: " #define EXPLAIN_IGNORE_GROUPID_FORMAT "Ignore Group Id: %s" #define EXPLAIN_PARTITION_KETS_FORMAT "Partition Key: " diff --git a/source/libs/executor/src/mergeoperator.c b/source/libs/executor/src/mergeoperator.c index a580524e87..093b6ab11e 100755 --- a/source/libs/executor/src/mergeoperator.c +++ b/source/libs/executor/src/mergeoperator.c @@ -325,6 +325,7 @@ SSDataBlock* doColsMerge(SOperatorInfo* pOperator) { SMultiwayMergeOperatorInfo* pInfo = pOperator->info; SSDataBlock* pBlock = NULL; SColsMergeInfo* pColsMerge = &pInfo->colsMergeInfo; + int32_t nullBlkNum = 0; qDebug("start to merge columns, %s", GET_TASKID(pTaskInfo)); @@ -333,11 +334,19 @@ SSDataBlock* doColsMerge(SOperatorInfo* pOperator) { if (pBlock && pBlock->info.rows > 1) { qError("more than 1 row returned from downstream, rows:%" PRId64, pBlock->info.rows); T_LONG_JMP(pTaskInfo->env, TSDB_CODE_QRY_EXECUTOR_INTERNAL_ERROR); + } else if (NULL == pBlock) { + nullBlkNum++; } copyColumnsValue(pColsMerge->pTargets, pColsMerge->srcBlkIds[i], pInfo->binfo.pRes, pBlock); } + setOperatorCompleted(pOperator); + + if (2 == nullBlkNum) { + return NULL; + } + pInfo->binfo.pRes->info.rows = 1; return pInfo->binfo.pRes; diff --git a/source/libs/planner/src/planOptimizer.c b/source/libs/planner/src/planOptimizer.c index d9f24313d1..3871928f81 100644 --- a/source/libs/planner/src/planOptimizer.c +++ b/source/libs/planner/src/planOptimizer.c @@ -2590,6 +2590,7 @@ static bool lastRowScanOptMayBeOptimized(SLogicNode* pNode) { typedef struct SLastRowScanOptSetColDataTypeCxt { bool doAgg; SNodeList* pLastCols; + SNodeList* pOtherCols; } SLastRowScanOptSetColDataTypeCxt; static EDealRes lastRowScanOptSetColDataType(SNode* pNode, void* pContext) { @@ -2632,6 +2633,33 @@ static void lastRowScanOptSetLastTargets(SNodeList* pTargets, SNodeList* pLastCo } } +static void lastRowScanOptRemoveUslessTargets(SNodeList* pTargets, SNodeList* pList1, SNodeList* pList2) { + SNode* pTarget = NULL; + WHERE_EACH(pTarget, pTargets) { + bool found = false; + SNode* pCol = NULL; + FOREACH(pCol, pList1) { + if (nodesEqualNode(pCol, pTarget)) { + found = true; + break; + } + } + if (!found) { + FOREACH(pCol, pList2) { + if (nodesEqualNode(pCol, pTarget)) { + found = true; + break; + } + } + } + if (!found) { + ERASE_NODE(pTargets); + continue; + } + WHERE_NEXT; + } +} + static int32_t lastRowScanOptimize(SOptimizeContext* pCxt, SLogicSubplan* pLogicSubplan) { SAggLogicNode* pAgg = (SAggLogicNode*)optFindPossibleNode(pLogicSubplan->pNode, lastRowScanOptMayBeOptimized); @@ -2639,7 +2667,7 @@ static int32_t lastRowScanOptimize(SOptimizeContext* pCxt, SLogicSubplan* pLogic return TSDB_CODE_SUCCESS; } - SLastRowScanOptSetColDataTypeCxt cxt = {.doAgg = true, .pLastCols = NULL}; + SLastRowScanOptSetColDataTypeCxt cxt = {.doAgg = true, .pLastCols = NULL, .pOtherCols = NULL}; SNode* pNode = NULL; SColumnNode* pPKTsCol = NULL; SColumnNode* pNonPKCol = NULL; @@ -2660,14 +2688,18 @@ static int32_t lastRowScanOptimize(SOptimizeContext* pCxt, SLogicSubplan* pLogic nodesWalkExpr(nodesListGetNode(pFunc->pParameterList, 0), lastRowScanOptSetColDataType, &cxt); nodesListErase(pFunc->pParameterList, nodesListGetCell(pFunc->pParameterList, 1)); } - } else if (FUNCTION_TYPE_SELECT_VALUE == funcType) { + } else { pNode = nodesListGetNode(pFunc->pParameterList, 0); - if (nodeType(pNode) == QUERY_NODE_COLUMN) { - SColumnNode* pCol = (SColumnNode*)pNode; - if (pCol->colId == PRIMARYKEY_TIMESTAMP_COL_ID) { - pPKTsCol = pCol; - } else { - pNonPKCol = pCol; + nodesListMakeAppend(&cxt.pOtherCols, pNode); + + if (FUNCTION_TYPE_SELECT_VALUE == funcType) { + if (nodeType(pNode) == QUERY_NODE_COLUMN) { + SColumnNode* pCol = (SColumnNode*)pNode; + if (pCol->colId == PRIMARYKEY_TIMESTAMP_COL_ID) { + pPKTsCol = pCol; + } else { + pNonPKCol = pCol; + } } } } @@ -2681,6 +2713,7 @@ static int32_t lastRowScanOptimize(SOptimizeContext* pCxt, SLogicSubplan* pLogic lastRowScanOptSetLastTargets(pScan->pScanCols, cxt.pLastCols, true); nodesWalkExprs(pScan->pScanPseudoCols, lastRowScanOptSetColDataType, &cxt); lastRowScanOptSetLastTargets(pScan->node.pTargets, cxt.pLastCols, false); + lastRowScanOptRemoveUslessTargets(pScan->node.pTargets, cxt.pLastCols, cxt.pOtherCols); if (pPKTsCol && pScan->node.pTargets->length == 1) { // when select last(ts),ts from ..., we add another ts to targets sprintf(pPKTsCol->colName, "#sel_val.%p", pPKTsCol); From 30cbf9ae5c3b89d5f41e723a6e523f4d37efa269 Mon Sep 17 00:00:00 2001 From: slzhou Date: Thu, 9 Nov 2023 10:31:01 +0800 Subject: [PATCH 20/79] feature: if num of tbnames is greater than vgroups, do not update vgroupList of table --- source/libs/parser/src/parTranslater.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/source/libs/parser/src/parTranslater.c b/source/libs/parser/src/parTranslater.c index 4aa2e0263e..d38122a6ad 100644 --- a/source/libs/parser/src/parTranslater.c +++ b/source/libs/parser/src/parTranslater.c @@ -4116,6 +4116,10 @@ static int32_t findEqualCondTbname(STranslateContext* pCxt, SNode* pWhere, SArra static int32_t findVgroupsFromEqualTbname(STranslateContext* pCxt, SEqCondTbNameTableInfo* pInfo, SVgroupsInfo* vgsInfo) { int32_t nVgroups = 0; int32_t nTbls = taosArrayGetSize(pInfo->aTbnames); + + if (nTbls >= pInfo->pRealTable->pVgroupList->numOfVgroups) { + vgsInfo->numOfVgroups = 0; + } for (int j = 0; j < nTbls; ++j) { char* dbName = pInfo->pRealTable->table.dbName; From c1f935bd124956fd83cf6e5e1772c367330296bf Mon Sep 17 00:00:00 2001 From: dapan1121 Date: Thu, 9 Nov 2023 14:49:04 +0800 Subject: [PATCH 21/79] fix: const value replace issue --- source/libs/parser/src/parCalcConst.c | 11 +++++++++-- tests/parallel_test/cases.task | 1 + tests/script/tsim/query/const.sim | 11 +++++++++++ 3 files changed, 21 insertions(+), 2 deletions(-) create mode 100644 tests/script/tsim/query/const.sim diff --git a/source/libs/parser/src/parCalcConst.c b/source/libs/parser/src/parCalcConst.c index 0657f1a43d..441f4da3b1 100644 --- a/source/libs/parser/src/parCalcConst.c +++ b/source/libs/parser/src/parCalcConst.c @@ -176,12 +176,15 @@ static int32_t calcConstStmtCondition(SCalcConstContext* pCxt, SNode** pCond, bo static EDealRes doFindAndReplaceNode(SNode** pNode, void* pContext) { SCalcConstContext* pCxt = pContext; if (pCxt->replaceCxt.pTarget == *pNode) { + char aliasName[TSDB_COL_NAME_LEN] = {0}; + strcpy(aliasName, ((SExprNode*)*pNode)->aliasName); nodesDestroyNode(*pNode); *pNode = nodesCloneNode(pCxt->replaceCxt.pNew); if (NULL == *pNode) { pCxt->code = TSDB_CODE_OUT_OF_MEMORY; return DEAL_RES_ERROR; } + strcpy(((SExprNode*)*pNode)->aliasName, aliasName); pCxt->replaceCxt.replaced = true; return DEAL_RES_END; @@ -211,7 +214,6 @@ static int32_t calcConstProject(SCalcConstContext* pCxt, SNode* pProject, bool d } char aliasName[TSDB_COL_NAME_LEN] = {0}; - strcpy(aliasName, ((SExprNode*)pProject)->aliasName); int32_t code = TSDB_CODE_SUCCESS; if (dual) { code = scalarCalculateConstantsFromDual(pProject, pNew); @@ -219,15 +221,20 @@ static int32_t calcConstProject(SCalcConstContext* pCxt, SNode* pProject, bool d code = scalarCalculateConstants(pProject, pNew); } if (TSDB_CODE_SUCCESS == code) { - strcpy(((SExprNode*)*pNew)->aliasName, aliasName); if (QUERY_NODE_VALUE == nodeType(*pNew) && NULL != pAssociation) { int32_t size = taosArrayGetSize(pAssociation); for (int32_t i = 0; i < size; ++i) { SAssociationNode* pAssNode = taosArrayGet(pAssociation, i); SNode** pCol = pAssNode->pPlace; if (*pCol == pAssNode->pAssociationNode) { + strcpy(aliasName, ((SExprNode*)*pCol)->aliasName); + SArray* pOrigAss = NULL; + TSWAP(((SExprNode*)*pCol)->pAssociation, pOrigAss); nodesDestroyNode(*pCol); *pCol = nodesCloneNode(*pNew); + TSWAP(pOrigAss, ((SExprNode*)*pCol)->pAssociation); + taosArrayDestroy(pOrigAss); + strcpy(((SExprNode*)*pCol)->aliasName, aliasName); if (NULL == *pCol) { code = TSDB_CODE_OUT_OF_MEMORY; break; diff --git a/tests/parallel_test/cases.task b/tests/parallel_test/cases.task index 28aa8744fd..9fd3625b4c 100644 --- a/tests/parallel_test/cases.task +++ b/tests/parallel_test/cases.task @@ -1294,6 +1294,7 @@ e ,,y,script,./test.sh -f tsim/tagindex/add_index.sim ,,n,script,./test.sh -f tsim/tagindex/sma_and_tag_index.sim ,,y,script,./test.sh -f tsim/view/view.sim +,,y,script,./test.sh -f tsim/query/const.sim #develop test diff --git a/tests/script/tsim/query/const.sim b/tests/script/tsim/query/const.sim new file mode 100644 index 0000000000..08f2b909c1 --- /dev/null +++ b/tests/script/tsim/query/const.sim @@ -0,0 +1,11 @@ +system sh/stop_dnodes.sh +system sh/deploy.sh -n dnode1 -i 1 +system sh/exec.sh -n dnode1 -s start +sql connect + +sql select b.z from (select c.a as z from (select 'a' as a) c) b; +if $rows != 1 then + return -1 +endi + +system sh/exec.sh -n dnode1 -s stop -x SIGINT From aa3ba74f6e1591c42f81e1a8316194815a4c9c56 Mon Sep 17 00:00:00 2001 From: kailixu Date: Thu, 9 Nov 2023 15:03:14 +0800 Subject: [PATCH 22/79] chore: code optimization --- source/common/src/tglobal.c | 2 +- source/dnode/vnode/src/sma/smaRollup.c | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/source/common/src/tglobal.c b/source/common/src/tglobal.c index d441b22aa3..b4224e7364 100644 --- a/source/common/src/tglobal.c +++ b/source/common/src/tglobal.c @@ -605,7 +605,7 @@ static int32_t taosAddServerCfg(SConfig *pCfg) { 0) return -1; - tsNumOfVnodeRsmaThreads = tsNumOfCores; + tsNumOfVnodeRsmaThreads = tsNumOfCores / 4; tsNumOfVnodeRsmaThreads = TMAX(tsNumOfVnodeRsmaThreads, 4); if (cfgAddInt32(pCfg, "numOfVnodeRsmaThreads", tsNumOfVnodeRsmaThreads, 1, 1024, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; diff --git a/source/dnode/vnode/src/sma/smaRollup.c b/source/dnode/vnode/src/sma/smaRollup.c index 22c9cc6260..519f0dde4f 100644 --- a/source/dnode/vnode/src/sma/smaRollup.c +++ b/source/dnode/vnode/src/sma/smaRollup.c @@ -21,6 +21,7 @@ #define RSMA_FETCH_DELAY_MAX (120000) // ms #define RSMA_FETCH_ACTIVE_MAX (1000) // ms #define RSMA_FETCH_INTERVAL (5000) // ms +#define RSMA_SUBMIT_HEAD_LEN (13) // type(int8_t) + len(int32_t) + version(int64_t) #define RSMA_NEED_FETCH(r) (RSMA_INFO_ITEM((r), 0)->fetchLevel || RSMA_INFO_ITEM((r), 1)->fetchLevel) @@ -667,7 +668,7 @@ _exit: */ static int32_t tdExecuteRSmaImplAsync(SSma *pSma, int64_t version, const void *pMsg, int32_t len, int32_t inputType, SRSmaInfo *pInfo, tb_uid_t suid) { - int32_t size = sizeof(int8_t) + sizeof(int32_t) + sizeof(int64_t) + len; // type + len + version + payload + int32_t size = RSMA_SUBMIT_HEAD_LEN + len; // header(type+len+version) + payload void *qItem = taosAllocateQitem(size, DEF_QITEM, 0); if (!qItem) { @@ -1222,7 +1223,7 @@ _end: static void tdFreeRSmaSubmitItems(SArray *pItems) { for (int32_t i = 0; i < taosArrayGetSize(pItems); ++i) { SPackedData *packData = taosArrayGet(pItems, i); - taosFreeQitem(POINTER_SHIFT(packData->msgStr, -sizeof(int8_t) - sizeof(int32_t) - sizeof(int64_t))); + taosFreeQitem(POINTER_SHIFT(packData->msgStr, -RSMA_SUBMIT_HEAD_LEN)); } taosArrayClear(pItems); } From 97804695d214731c27c601cd7862dd0668c74864 Mon Sep 17 00:00:00 2001 From: slzhou Date: Thu, 9 Nov 2023 15:59:49 +0800 Subject: [PATCH 23/79] query: add test case --- tests/system-test/2-query/tbname_vgroup.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tests/system-test/2-query/tbname_vgroup.py b/tests/system-test/2-query/tbname_vgroup.py index 1e489356e6..90be1bf394 100644 --- a/tests/system-test/2-query/tbname_vgroup.py +++ b/tests/system-test/2-query/tbname_vgroup.py @@ -169,6 +169,15 @@ class TDTestCase: tdSql.query("explain select * from st where tbname='ct1'") tdSql.checkRows(2) + + tdSql.query("select table_name, vgroup_id from information_schema.ins_tables where db_name='dbvg' and type='CHILD_TABLE'"); + print(tdSql.queryResult); + + tdSql.query("explain select * from st where tbname in ('ct1', 'ct2')") + if tdSql.queryResult[0][0].count("Data Exchange 2:1") == 0: + tdLog.exit("failed, not two vgroups") + else: + tdLog.info("select * from st where tbname in ('ct1', 'ct2') involves two vgroups") tdSql.execute('drop database dbvg;') From e269d042b6e3a6b83609075ceea97b3b0e86242d Mon Sep 17 00:00:00 2001 From: dapan1121 Date: Thu, 9 Nov 2023 16:28:35 +0800 Subject: [PATCH 24/79] fix: add test cases --- tests/parallel_test/cases.task | 1 + tests/script/tsim/query/cache_last.sim | 105 +++++++++++++++++++++++++ 2 files changed, 106 insertions(+) create mode 100644 tests/script/tsim/query/cache_last.sim diff --git a/tests/parallel_test/cases.task b/tests/parallel_test/cases.task index 28aa8744fd..91f1ef1aa0 100644 --- a/tests/parallel_test/cases.task +++ b/tests/parallel_test/cases.task @@ -1294,6 +1294,7 @@ e ,,y,script,./test.sh -f tsim/tagindex/add_index.sim ,,n,script,./test.sh -f tsim/tagindex/sma_and_tag_index.sim ,,y,script,./test.sh -f tsim/view/view.sim +,,y,script,./test.sh -f tsim/query/cache_last.sim #develop test diff --git a/tests/script/tsim/query/cache_last.sim b/tests/script/tsim/query/cache_last.sim new file mode 100644 index 0000000000..8247a2f723 --- /dev/null +++ b/tests/script/tsim/query/cache_last.sim @@ -0,0 +1,105 @@ +system sh/stop_dnodes.sh +system sh/deploy.sh -n dnode1 -i 1 +system sh/exec.sh -n dnode1 -s start +sql connect + +sql drop database if exists db1; +sql create database if not exists db1 cachemodel 'both' cachesize 10; +sql use db1; +sql create stable sta (ts timestamp, f1 double, f2 binary(200)) tags(t1 int); +sql create table tba1 using sta tags(1); +sql insert into tba1 values ('2022-04-26 15:15:01', 1.0, "a"); +sql insert into tba1 values ('2022-04-26 15:15:02', 2.0, "b"); +sql insert into tba1 values ('2022-04-26 15:15:04', 4.0, "b"); +sql insert into tba1 values ('2022-04-26 15:15:05', 5.0, "b"); +sql create table tba2 using sta tags(2); +sql insert into tba2 values ('2022-04-26 15:15:01', 1.2, "a"); +sql insert into tba2 values ('2022-04-26 15:15:02', 2.2, "b"); +sql create table tba3 using sta tags(3); +sql insert into tba3 values ('2022-04-26 15:15:10', 1.3, "a"); +sql insert into tba3 values ('2022-04-26 15:15:11', 2.3, "b"); +sql select count(*), last(*) from sta; +if $rows != 1 then + return -1 +endi +if $data00 != 8 then + return -1 +endi +if $data01 != @22-04-26 15:15:11.000@ then + return -1 +endi +if $data02 != 2.300000000 then + return -1 +endi +if $data03 != b then + return -1 +endi +sql explain select count(*), last(*) from sta; +if $data00 != @-> Merge (columns=4 width=226 input_order=unknown output_order=unknown mode=column)@ then + return -1 +endi +sql explain select first(f1), last(*) from sta; +if $data00 != @-> Merge (columns=4 width=226 input_order=unknown output_order=unknown mode=column)@ then + return -1 +endi +sql select first(f1), last(*) from sta; +if $rows != 1 then + return -1 +endi +sql select last_row(f1), last(f1) from sta; +if $rows != 1 then + return -1 +endi +sql select count(*), last_row(f1), last(f1) from sta; +if $rows != 1 then + return -1 +endi +sql explain select count(*), last_row(f1), last(f1) from sta; +if $data00 != @-> Aggragate (functions=3 width=24 input_order=desc )@ then + return -1 +endi +sql_error select count(*), last_row(f1), min(f1), f1 from sta; +sql select count(*), last_row(f1), min(f1),tbname from sta partition by tbname; +if $rows != 3 then + return -1 +endi +sql explain select count(*), last_row(f1), min(f1),tbname from sta partition by tbname; +if $data00 != @-> Data Exchange 2:1 (width=296)@ then + return -1 +endi +sql explain select count(*), last_row(f1), min(f1) from sta; +if $data00 != @-> Merge (columns=3 width=24 input_order=unknown output_order=unknown mode=column)@ then + return -1 +endi +sql explain select count(*), last_row(f1), min(f1),tbname from sta group by tbname; +if $data00 != @-> Data Exchange 2:1 (width=296)@ then + return -1 +endi +sql explain select count(*), last_row(f1), min(f1),t1 from sta partition by t1; +if $data00 != @-> Aggragate (functions=4 width=28 input_order=desc )@ then + return -1 +endi +sql explain select count(*), last_row(f1), min(f1),t1 from sta group by t1; +if $data00 != @-> Aggragate (functions=4 width=28 input_order=desc )@ then + return -1 +endi +sql explain select distinct count(*), last_row(f1), min(f1) from sta; +if $data10 != @ -> Merge (columns=3 width=24 input_order=unknown output_order=unknown mode=column)@ then + print $data10 + return -1 +endi +sql explain select count(*), last_row(f1), min(f1) from sta interval(1s); +if $data10 != @ -> Merge (columns=4 width=66 input_order=asc output_order=asc mode=sort)@ then + return -1 +endi +sql explain select distinct count(*), last_row(f1), min(f1) from tba1; +if $data10 != @ -> Merge (columns=3 width=24 input_order=unknown output_order=unknown mode=column)@ then + return -1 +endi +sql select distinct count(*), last_row(f1), min(f1) from tba1; +if $rows != 1 then + return -1 +endi + + +system sh/exec.sh -n dnode1 -s stop -x SIGINT From 2fbf0a532dd71428e386433ed4d512bee19c81e2 Mon Sep 17 00:00:00 2001 From: dmchen Date: Thu, 9 Nov 2023 09:51:35 +0000 Subject: [PATCH 25/79] fix/TD-27243 --- source/dnode/vnode/src/vnd/vnodeSvr.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/source/dnode/vnode/src/vnd/vnodeSvr.c b/source/dnode/vnode/src/vnd/vnodeSvr.c index c6c93e3d3f..eadfd39d0b 100644 --- a/source/dnode/vnode/src/vnd/vnodeSvr.c +++ b/source/dnode/vnode/src/vnd/vnodeSvr.c @@ -928,6 +928,17 @@ static int32_t vnodeProcessCreateTbReq(SVnode *pVnode, int64_t ver, void *pReq, goto _exit; } + if(tsEnableAuditCreateTable){ + char* str = taosMemoryCalloc(1, TSDB_TABLE_FNAME_LEN); + if (str == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + rcode = -1; + goto _exit; + } + strcpy(str, pCreateReq->name); + taosArrayPush(tbNames, &str); + } + // validate hash sprintf(tbName, "%s.%s", pVnode->config.dbname, pCreateReq->name); if (vnodeValidateTableHash(pVnode, tbName) < 0) { @@ -951,12 +962,6 @@ static int32_t vnodeProcessCreateTbReq(SVnode *pVnode, int64_t ver, void *pReq, } taosArrayPush(rsp.pArray, &cRsp); - - if (tsEnableAuditCreateTable) { - char *str = taosMemoryCalloc(1, TSDB_TABLE_FNAME_LEN); - strcpy(str, pCreateReq->name); - taosArrayPush(tbNames, &str); - } } vDebug("vgId:%d, add %d new created tables into query table list", TD_VID(pVnode), (int32_t)taosArrayGetSize(tbUids)); @@ -985,10 +990,10 @@ static int32_t vnodeProcessCreateTbReq(SVnode *pVnode, int64_t ver, void *pReq, tNameFromString(&name, pVnode->config.dbname, T_NAME_ACCT | T_NAME_DB); SStringBuilder sb = {0}; - for (int32_t iReq = 0; iReq < req.nReqs; iReq++) { - char **key = (char **)taosArrayGet(tbNames, iReq); + for(int32_t i = 0; i < tbNames->size; i++){ + char** key = (char**)taosArrayGet(tbNames, i); taosStringBuilderAppendStringLen(&sb, *key, strlen(*key)); - if (iReq < req.nReqs - 1) { + if(i < tbNames->size - 1){ taosStringBuilderAppendChar(&sb, ','); } taosMemoryFreeClear(*key); From dd0ac98a5f00a320da8983afd13e34eeee0fd37c Mon Sep 17 00:00:00 2001 From: dapan1121 Date: Thu, 9 Nov 2023 19:25:44 +0800 Subject: [PATCH 26/79] fix: correct no data load --- source/libs/command/inc/commandInt.h | 1 + source/libs/command/src/explain.c | 18 ++++++++++++++++++ source/libs/planner/src/planOptimizer.c | 8 ++++++++ 3 files changed, 27 insertions(+) diff --git a/source/libs/command/inc/commandInt.h b/source/libs/command/inc/commandInt.h index bb0d8a32dd..d7ded9d6f1 100644 --- a/source/libs/command/inc/commandInt.h +++ b/source/libs/command/inc/commandInt.h @@ -87,6 +87,7 @@ extern "C" { #define EXPLAIN_WIDTH_FORMAT "width=%d" #define EXPLAIN_SCAN_ORDER_FORMAT "order=[asc|%d desc|%d]" #define EXPLAIN_SCAN_MODE_FORMAT "mode=%s" +#define EXPLAIN_SCAN_DATA_LOAD_FORMAT "data_load=%s" #define EXPLAIN_GROUPS_FORMAT "groups=%d" #define EXPLAIN_WIDTH_FORMAT "width=%d" #define EXPLAIN_INTERVAL_VALUE_FORMAT "interval=%" PRId64 "%c" diff --git a/source/libs/command/src/explain.c b/source/libs/command/src/explain.c index 0f2a1e2f29..66b50bcb47 100644 --- a/source/libs/command/src/explain.c +++ b/source/libs/command/src/explain.c @@ -20,6 +20,7 @@ #include "tcommon.h" #include "tdatablock.h" #include "systable.h" +#include "functionMgt.h" int32_t qExplainGenerateResNode(SPhysiNode *pNode, SExplainGroup *group, SExplainResNode **pRes); int32_t qExplainAppendGroupResRows(void *pCtx, int32_t groupId, int32_t level, bool singleChannel); @@ -312,6 +313,21 @@ static char* qExplainGetScanMode(STableScanPhysiNode* pScan) { return "ts_order"; } +static char* qExplainGetScanDataLoad(STableScanPhysiNode* pScan) { + switch (pScan->dataRequired) { + case FUNC_DATA_REQUIRED_DATA_LOAD: + return "data"; + case FUNC_DATA_REQUIRED_SMA_LOAD: + return "sma"; + case FUNC_DATA_REQUIRED_NOT_LOAD: + return "no"; + default: + break; + } + + return "unknown"; +} + int32_t qExplainResNodeToRowsImpl(SExplainResNode *pResNode, SExplainCtx *ctx, int32_t level) { int32_t tlen = 0; bool isVerboseLine = false; @@ -387,6 +403,8 @@ int32_t qExplainResNodeToRowsImpl(SExplainResNode *pResNode, SExplainCtx *ctx, i EXPLAIN_ROW_APPEND(EXPLAIN_SCAN_ORDER_FORMAT, pTblScanNode->scanSeq[0], pTblScanNode->scanSeq[1]); EXPLAIN_ROW_APPEND(EXPLAIN_BLANK_FORMAT); EXPLAIN_ROW_APPEND(EXPLAIN_SCAN_MODE_FORMAT, qExplainGetScanMode(pTblScanNode)); + EXPLAIN_ROW_APPEND(EXPLAIN_BLANK_FORMAT); + EXPLAIN_ROW_APPEND(EXPLAIN_SCAN_DATA_LOAD_FORMAT, qExplainGetScanDataLoad(pTblScanNode)); EXPLAIN_ROW_APPEND(EXPLAIN_RIGHT_PARENTHESIS_FORMAT); EXPLAIN_ROW_END(); QRY_ERR_RET(qExplainResAppendRow(ctx, tbuf, tlen, level)); diff --git a/source/libs/planner/src/planOptimizer.c b/source/libs/planner/src/planOptimizer.c index 3871928f81..99fed47b92 100644 --- a/source/libs/planner/src/planOptimizer.c +++ b/source/libs/planner/src/planOptimizer.c @@ -26,6 +26,7 @@ #define OPTIMIZE_FLAG_PUSH_DOWN_CONDE OPTIMIZE_FLAG_MASK(1) #define OPTIMIZE_FLAG_SET_MASK(val, mask) (val) |= (mask) +#define OPTIMIZE_FLAG_CLEAR_MASK(val, mask) (val) &= (~(mask)) #define OPTIMIZE_FLAG_TEST_MASK(val, mask) (((val) & (mask)) != 0) typedef struct SOptimizeContext { @@ -2783,6 +2784,13 @@ static int32_t splitCacheLastFuncOptCreateAggLogicNode(SAggLogicNode** pNewAgg, pNew->hasGroup = pAgg->hasGroup; pNew->node.pChildren = nodesCloneList(pAgg->node.pChildren); + SNode* pNode = NULL; + FOREACH(pNode, pNew->node.pChildren) { + if (QUERY_NODE_LOGIC_PLAN_SCAN == nodeType(pNode)) { + OPTIMIZE_FLAG_CLEAR_MASK(((SScanLogicNode*)pNode)->node.optimizedFlag, OPTIMIZE_FLAG_SCAN_PATH); + } + } + *pNewAgg = pNew; return TSDB_CODE_SUCCESS; From 572eb691fb5afd2974389869621d400ce8b54fc9 Mon Sep 17 00:00:00 2001 From: kailixu Date: Thu, 9 Nov 2023 19:51:01 +0800 Subject: [PATCH 27/79] enh: support delete msg for rsma --- source/dnode/vnode/src/inc/tq.h | 2 +- source/dnode/vnode/src/inc/vnodeInt.h | 4 +- source/dnode/vnode/src/sma/smaCommit.c | 5 +- source/dnode/vnode/src/sma/smaRollup.c | 183 +++++++++++++------------ source/dnode/vnode/src/tq/tqRead.c | 2 +- source/dnode/vnode/src/tq/tqUtil.c | 21 +-- source/dnode/vnode/src/vnd/vnodeSvr.c | 6 +- source/libs/executor/src/executor.c | 9 +- 8 files changed, 121 insertions(+), 111 deletions(-) diff --git a/source/dnode/vnode/src/inc/tq.h b/source/dnode/vnode/src/inc/tq.h index 675bfa334a..238407b26c 100644 --- a/source/dnode/vnode/src/inc/tq.h +++ b/source/dnode/vnode/src/inc/tq.h @@ -160,7 +160,7 @@ int32_t tqResetStreamTaskStatus(STQ* pTq); int32_t tqStopStreamTasks(STQ* pTq); // tq util -int32_t extractDelDataBlock(const void* pData, int32_t len, int64_t ver, SStreamRefDataBlock** pRefBlock); +int32_t extractDelDataBlock(const void* pData, int32_t len, int64_t ver, void** pRefBlock, int8_t type); int32_t tqExtractDataForMq(STQ* pTq, STqHandle* pHandle, const SMqPollReq* pRequest, SRpcMsg* pMsg); int32_t tqDoSendDataRsp(const SRpcHandleInfo* pRpcHandleInfo, const SMqDataRsp* pRsp, int32_t epoch, int64_t consumerId, int32_t type, int64_t sver, int64_t ever); diff --git a/source/dnode/vnode/src/inc/vnodeInt.h b/source/dnode/vnode/src/inc/vnodeInt.h index 137c2f4f7e..df1720d4a7 100644 --- a/source/dnode/vnode/src/inc/vnodeInt.h +++ b/source/dnode/vnode/src/inc/vnodeInt.h @@ -286,8 +286,8 @@ int32_t tdProcessTSmaCreate(SSma* pSma, int64_t version, const char* msg); int32_t tdProcessTSmaInsert(SSma* pSma, int64_t indexUid, const char* msg); int32_t tdProcessRSmaCreate(SSma* pSma, SVCreateStbReq* pReq); -int32_t tdProcessRSmaSubmit(SSma* pSma, int64_t version, void* pReq, void* pMsg, int32_t len, int32_t inputType); -int32_t tdProcessRSmaDelete(SSma* pSma, int64_t version, void* pReq, void* pMsg, int32_t len, int32_t inputType); +int32_t tdProcessRSmaSubmit(SSma* pSma, int64_t version, void* pReq, void* pMsg, int32_t len); +int32_t tdProcessRSmaDelete(SSma* pSma, int64_t version, void* pReq, void* pMsg, int32_t len); int32_t tdProcessRSmaDrop(SSma* pSma, SVDropStbReq* pReq); int32_t tdFetchTbUidList(SSma* pSma, STbUidStore** ppStore, tb_uid_t suid, tb_uid_t uid); int32_t tdUpdateTbUidList(SSma* pSma, STbUidStore* pUidStore, bool isAdd); diff --git a/source/dnode/vnode/src/sma/smaCommit.c b/source/dnode/vnode/src/sma/smaCommit.c index 92b8c09fbc..3512f1476f 100644 --- a/source/dnode/vnode/src/sma/smaCommit.c +++ b/source/dnode/vnode/src/sma/smaCommit.c @@ -217,10 +217,7 @@ static int32_t tdProcessRSmaAsyncCommitImpl(SSma *pSma, SCommitInfo *pInfo) { int32_t lino = 0; SVnode *pVnode = pSma->pVnode; - SSmaEnv *pSmaEnv = SMA_RSMA_ENV(pSma); - if (!pSmaEnv) { - goto _exit; - } + if (!SMA_RSMA_ENV(pSma)) goto _exit; code = tsdbCommitBegin(VND_RSMA1(pVnode), pInfo); TSDB_CHECK_CODE(code, lino, _exit); diff --git a/source/dnode/vnode/src/sma/smaRollup.c b/source/dnode/vnode/src/sma/smaRollup.c index dd21ec2b30..6742f30d53 100644 --- a/source/dnode/vnode/src/sma/smaRollup.c +++ b/source/dnode/vnode/src/sma/smaRollup.c @@ -43,7 +43,7 @@ static int32_t tdExecuteRSmaImpl(SSma *pSma, const void *pMsg, int32_t msgSiz ERsmaExecType type, int8_t level); static SRSmaInfo *tdAcquireRSmaInfoBySuid(SSma *pSma, int64_t suid); static void tdReleaseRSmaInfo(SSma *pSma, SRSmaInfo *pInfo); -static void tdFreeRSmaSubmitItems(SArray *pItems); +static void tdFreeRSmaSubmitItems(SArray *pItems, int32_t type); static int32_t tdRSmaFetchAllResult(SSma *pSma, SRSmaInfo *pInfo); static int32_t tdRSmaExecAndSubmitResult(SSma *pSma, qTaskInfo_t taskInfo, SRSmaInfoItem *pItem, SRSmaInfo *pInfo, int32_t execType, int8_t *streamFlushed); @@ -723,7 +723,7 @@ _exit: */ static int32_t tdExecuteRSmaImplAsync(SSma *pSma, int64_t version, const void *pMsg, int32_t len, int32_t inputType, SRSmaInfo *pInfo, tb_uid_t suid) { - int32_t size = RSMA_SUBMIT_HEAD_LEN + len; // header(type+len+version) + payload + int32_t size = RSMA_SUBMIT_HEAD_LEN + len; // header + payload void *qItem = taosAllocateQitem(size, DEF_QITEM, 0); if (!qItem) { @@ -940,12 +940,8 @@ static int32_t tdExecuteRSmaAsync(SSma *pSma, int64_t version, const void *pMsg, return TSDB_CODE_SUCCESS; } -int32_t tdProcessRSmaSubmit(SSma *pSma, int64_t version, void *pReq, void *pMsg, int32_t len, int32_t inputType) { - SSmaEnv *pEnv = SMA_RSMA_ENV(pSma); - if (!pEnv) { - // only applicable when rsma env exists - return TDB_CODE_SUCCESS; - } +int32_t tdProcessRSmaSubmit(SSma *pSma, int64_t version, void *pReq, void *pMsg, int32_t len) { + if (!SMA_RSMA_ENV(pSma)) return TSDB_CODE_SUCCESS; if ((terrno = atomic_load_32(&SMA_RSMA_STAT(pSma)->execStat))) { smaError("vgId:%d, failed to process rsma submit since invalid exec code: %s", SMA_VID(pSma), terrstr()); @@ -954,27 +950,25 @@ int32_t tdProcessRSmaSubmit(SSma *pSma, int64_t version, void *pReq, void *pMsg, STbUidStore uidStore = {0}; - if (inputType == STREAM_INPUT__DATA_SUBMIT) { - if (tdFetchSubmitReqSuids(pReq, &uidStore) < 0) { - smaError("vgId:%d, failed to process rsma submit fetch suid since: %s", SMA_VID(pSma), terrstr()); + if (tdFetchSubmitReqSuids(pReq, &uidStore) < 0) { + smaError("vgId:%d, failed to process rsma submit fetch suid since: %s", SMA_VID(pSma), terrstr()); + goto _err; + } + + if (uidStore.suid != 0) { + if (tdExecuteRSmaAsync(pSma, version, pMsg, len, STREAM_INPUT__DATA_SUBMIT, uidStore.suid) < 0) { + smaError("vgId:%d, failed to process rsma submit exec 1 since: %s", SMA_VID(pSma), terrstr()); goto _err; } - if (uidStore.suid != 0) { - if (tdExecuteRSmaAsync(pSma, version, pMsg, len, inputType, uidStore.suid) < 0) { - smaError("vgId:%d, failed to process rsma submit exec 1 since: %s", SMA_VID(pSma), terrstr()); + void *pIter = NULL; + while ((pIter = taosHashIterate(uidStore.uidHash, pIter))) { + tb_uid_t *pTbSuid = (tb_uid_t *)taosHashGetKey(pIter, NULL); + if (tdExecuteRSmaAsync(pSma, version, pMsg, len, STREAM_INPUT__DATA_SUBMIT, *pTbSuid) < 0) { + smaError("vgId:%d, failed to process rsma submit exec 2 since: %s", SMA_VID(pSma), terrstr()); + taosHashCancelIterate(uidStore.uidHash, pIter); goto _err; } - - void *pIter = NULL; - while ((pIter = taosHashIterate(uidStore.uidHash, pIter))) { - tb_uid_t *pTbSuid = (tb_uid_t *)taosHashGetKey(pIter, NULL); - if (tdExecuteRSmaAsync(pSma, version, pMsg, len, inputType, *pTbSuid) < 0) { - smaError("vgId:%d, failed to process rsma submit exec 2 since: %s", SMA_VID(pSma), terrstr()); - taosHashCancelIterate(uidStore.uidHash, pIter); - goto _err; - } - } } } tdUidStoreDestory(&uidStore); @@ -984,19 +978,18 @@ _err: return terrno; } -int32_t tdProcessRSmaDelete(SSma *pSma, int64_t version, void *pReq, void *pMsg, int32_t len, int32_t inputType) { - SSmaEnv *pEnv = SMA_RSMA_ENV(pSma); - if (!pEnv) { - // only applicable when rsma env exists - return TSDB_CODE_SUCCESS; +int32_t tdProcessRSmaDelete(SSma *pSma, int64_t version, void *pReq, void *pMsg, int32_t len) { + if (!SMA_RSMA_ENV(pSma)) return TSDB_CODE_SUCCESS; + + if ((terrno = atomic_load_32(&SMA_RSMA_STAT(pSma)->execStat))) { + smaError("vgId:%d, failed to process rsma delete since invalid exec code: %s", SMA_VID(pSma), terrstr()); + goto _err; } - if (inputType == STREAM_INPUT__REF_DATA_BLOCK) { - SDeleteRes *pReq = pReq; - if (tdExecuteRSmaAsync(pSma, version, pMsg, len, inputType, pReq->suid) < 0) { - smaError("vgId:%d, failed to process rsma submit exec 1 since: %s", SMA_VID(pSma), terrstr()); - goto _err; - } + SDeleteRes *pDelRes = pReq; + if (tdExecuteRSmaAsync(pSma, version, pMsg, len, STREAM_INPUT__REF_DATA_BLOCK, pDelRes->suid) < 0) { + smaError("vgId:%d, failed to process rsma submit exec 1 since: %s", SMA_VID(pSma), terrstr()); + goto _err; } return TSDB_CODE_SUCCESS; _err: @@ -1381,10 +1374,20 @@ _end: tdReleaseSmaRef(smaMgmt.rsetId, pRSmaRef->refId); } -static void tdFreeRSmaSubmitItems(SArray *pItems) { - for (int32_t i = 0; i < taosArrayGetSize(pItems); ++i) { - SPackedData *packData = taosArrayGet(pItems, i); - taosFreeQitem(POINTER_SHIFT(packData->msgStr, -RSMA_SUBMIT_HEAD_LEN)); +static void tdFreeRSmaSubmitItems(SArray *pItems, int32_t type) { + int32_t arrSize = taosArrayGetSize(pItems); + if (type == STREAM_INPUT__MERGED_SUBMIT) { + for (int32_t i = 0; i < arrSize; ++i) { + SPackedData *packData = TARRAY_GET_ELEM(pItems, i); + taosFreeQitem(POINTER_SHIFT(packData->msgStr, -RSMA_SUBMIT_HEAD_LEN)); + } + } else if (type == STREAM_INPUT__REF_DATA_BLOCK) { + for (int32_t i = 0; i < arrSize; ++i) { + SPackedData *packData = TARRAY_GET_ELEM(pItems, i); + blockDataDestroy(packData->pDataBlock); + } + } else { + ASSERTS(0, "unknown type:%d", type); } taosArrayClear(pItems); } @@ -1448,34 +1451,37 @@ _err: return TSDB_CODE_FAILED; } +#define RSMA_SUBMIT_MSG_TYPE(msg) (*(int8_t *)(msg)) +#define RSMA_SUBMIT_MSG_LEN(msg) (*(int32_t *)POINTER_SHIFT((msg), sizeof(int8_t))) +#define RSMA_SUBMIT_MSG_VER(msg) (*(int64_t *)POINTER_SHIFT((msg), sizeof(int8_t) + sizeof(int32_t))) +#define RSMA_SUBMIT_MSG_BODY(msg) (POINTER_SHIFT((msg), sizeof(int8_t) + sizeof(int32_t) + sizeof(int64_t))) + static int32_t tdRSmaBatchExec(SSma *pSma, SRSmaInfo *pInfo, STaosQall *qall, SArray *pSubmitArr, ERsmaExecType type) { - - void *msg = NULL; - int8_t resume = 0; - int32_t nSubmit = 0; - int32_t nDelete = 0; + void *msg = NULL; + int8_t resume = 0; + int32_t nSubmit = 0; + int32_t nDelete = 0; SPackedData packData; taosArrayClear(pSubmitArr); + // the submitReq/deleteReq msg may exsit alternately in the msg queue, consume them sequentially in batch mode while (1) { taosGetQitem(qall, (void **)&msg); if (msg) { - int8_t inputType = *(int8_t *)msg; - - msg = POINTER_SHIFT(msg, sizeof(int8_t)); - + int8_t inputType = RSMA_SUBMIT_MSG_TYPE(msg); if (inputType == STREAM_INPUT__DATA_SUBMIT) { if (nDelete > 0) { resume = 1; break; } _resume_submit: - packData.msgLen = *(int32_t *)msg; - packData.ver = *(int64_t *)POINTER_SHIFT(msg, sizeof(int32_t)); - packData.msgStr = POINTER_SHIFT(msg, sizeof(int32_t) + sizeof(int64_t)); + packData.msgLen = RSMA_SUBMIT_MSG_LEN(msg); + packData.ver = RSMA_SUBMIT_MSG_VER(msg); + packData.msgStr = RSMA_SUBMIT_MSG_BODY(msg); if (!taosArrayPush(pSubmitArr, &packData)) { + taosFreeQitem(msg); terrno = TSDB_CODE_OUT_OF_MEMORY; goto _err; } @@ -1486,46 +1492,49 @@ static int32_t tdRSmaBatchExec(SSma *pSma, SRSmaInfo *pInfo, STaosQall *qall, SA break; } _resume_delete: - ++nDelete; -#if 0 - if (!taosArrayPush(pSubmitArr, &packData)) { - terrno = TSDB_CODE_OUT_OF_MEMORY; - tdFreeRSmaSubmitItems(pSubmitArr); - goto _err; - } -#endif - } else { - break; - } - } - - if (nSubmit > 0) { - int32_t size = taosArrayGetSize(pSubmitArr); - if (size > 0) { - for (int32_t i = 1; i <= TSDB_RETENTION_L2; ++i) { - if (tdExecuteRSmaImpl(pSma, pSubmitArr->pData, size, STREAM_INPUT__MERGED_SUBMIT, pInfo, type, i) < 0) { + extractDelDataBlock(RSMA_SUBMIT_MSG_BODY(msg), RSMA_SUBMIT_MSG_LEN(msg), RSMA_SUBMIT_MSG_VER(msg), + &packData.pDataBlock, 1); + if (!taosArrayPush(pSubmitArr, &packData)) { + taosFreeQitem(msg); + terrno = TSDB_CODE_OUT_OF_MEMORY; goto _err; } + taosFreeQitem(msg); + ++nDelete; + } else { + ASSERTS(0, "unknown msg type:%d", inputType); + break; } - tdFreeRSmaSubmitItems(pSubmitArr); } - } else if (nDelete > 0) { - } - if (resume == 0) { - goto _rtn; - } else if (resume == 1) { - nSubmit = 0; - nDelete = 0; - resume = 0; - taosArrayClear(pSubmitArr); - goto _resume_submit; - } else { - nSubmit = 0; - nDelete = 0; - resume = 0; - taosArrayClear(pSubmitArr); - goto _resume_delete; + if (nSubmit > 0 || nDelete > 0) { + int32_t size = TARRAY_SIZE(pSubmitArr); + if (size > 0) { + int32_t inputType = nSubmit > 0 ? STREAM_INPUT__MERGED_SUBMIT : STREAM_INPUT__REF_DATA_BLOCK; + for (int32_t i = 1; i <= TSDB_RETENTION_L2; ++i) { + if (tdExecuteRSmaImpl(pSma, pSubmitArr->pData, size, inputType, pInfo, type, i) < 0) { + goto _err; + } + } + tdFreeRSmaSubmitItems(pSubmitArr, inputType); + } + } + + if (resume == 0) { + goto _rtn; + } else if (resume == 1) { + nSubmit = 0; + nDelete = 0; + resume = 0; + tdFreeRSmaSubmitItems(pSubmitArr, STREAM_INPUT__REF_DATA_BLOCK); + goto _resume_submit; + } else { + nSubmit = 0; + nDelete = 0; + resume = 0; + tdFreeRSmaSubmitItems(pSubmitArr, STREAM_INPUT__MERGED_SUBMIT); + goto _resume_delete; + } } _rtn: @@ -1534,7 +1543,7 @@ _err: atomic_store_32(&SMA_RSMA_STAT(pSma)->execStat, terrno); smaError("vgId:%d, batch exec for suid:%" PRIi64 " execType:%d size:%d failed since %s", SMA_VID(pSma), pInfo->suid, type, (int32_t)taosArrayGetSize(pSubmitArr), terrstr()); - tdFreeRSmaSubmitItems(pSubmitArr); + tdFreeRSmaSubmitItems(pSubmitArr, nSubmit ? STREAM_INPUT__MERGED_SUBMIT : STREAM_INPUT__REF_DATA_BLOCK); while (1) { void *msg = NULL; taosGetQitem(qall, (void **)&msg); diff --git a/source/dnode/vnode/src/tq/tqRead.c b/source/dnode/vnode/src/tq/tqRead.c index bd2a591a98..41b1aa7bd1 100644 --- a/source/dnode/vnode/src/tq/tqRead.c +++ b/source/dnode/vnode/src/tq/tqRead.c @@ -343,7 +343,7 @@ int32_t extractMsgFromWal(SWalReader* pReader, void** pItem, int64_t maxVer, con void* pBody = POINTER_SHIFT(pCont->body, sizeof(SMsgHead)); int32_t len = pCont->bodyLen - sizeof(SMsgHead); - code = extractDelDataBlock(pBody, len, ver, (SStreamRefDataBlock**)pItem); + code = extractDelDataBlock(pBody, len, ver, (void**)pItem, 0); if (code == TSDB_CODE_SUCCESS) { if (*pItem == NULL) { tqDebug("s-task:%s empty delete msg, discard it, len:%d, ver:%" PRId64, id, len, ver); diff --git a/source/dnode/vnode/src/tq/tqUtil.c b/source/dnode/vnode/src/tq/tqUtil.c index d8fe899bf6..f7d0ad54e9 100644 --- a/source/dnode/vnode/src/tq/tqUtil.c +++ b/source/dnode/vnode/src/tq/tqUtil.c @@ -399,7 +399,7 @@ int32_t tqDoSendDataRsp(const SRpcHandleInfo* pRpcHandleInfo, const SMqDataRsp* return 0; } -int32_t extractDelDataBlock(const void* pData, int32_t len, int64_t ver, SStreamRefDataBlock** pRefBlock) { +int32_t extractDelDataBlock(const void* pData, int32_t len, int64_t ver, void** pRefBlock, int8_t type) { SDecoder* pCoder = &(SDecoder){0}; SDeleteRes* pRes = &(SDeleteRes){0}; @@ -442,14 +442,19 @@ int32_t extractDelDataBlock(const void* pData, int32_t len, int64_t ver, SStream } taosArrayDestroy(pRes->uidList); - *pRefBlock = taosAllocateQitem(sizeof(SStreamRefDataBlock), DEF_QITEM, 0); - if (*pRefBlock == NULL) { - blockDataCleanup(pDelBlock); - taosMemoryFree(pDelBlock); - return TSDB_CODE_OUT_OF_MEMORY; + if (type == 0) { + *pRefBlock = taosAllocateQitem(sizeof(SStreamRefDataBlock), DEF_QITEM, 0); + if (*pRefBlock == NULL) { + blockDataCleanup(pDelBlock); + taosMemoryFree(pDelBlock); + return TSDB_CODE_OUT_OF_MEMORY; + } + + ((SStreamRefDataBlock*)(*pRefBlock))->type = STREAM_INPUT__REF_DATA_BLOCK; + ((SStreamRefDataBlock*)(*pRefBlock))->pBlock = pDelBlock; + } else { + *pRefBlock = pDelBlock; } - (*pRefBlock)->type = STREAM_INPUT__REF_DATA_BLOCK; - (*pRefBlock)->pBlock = pDelBlock; return TSDB_CODE_SUCCESS; } diff --git a/source/dnode/vnode/src/vnd/vnodeSvr.c b/source/dnode/vnode/src/vnd/vnodeSvr.c index 44b6af4ff5..df42e02776 100644 --- a/source/dnode/vnode/src/vnd/vnodeSvr.c +++ b/source/dnode/vnode/src/vnd/vnodeSvr.c @@ -380,7 +380,7 @@ static int32_t vnodePreProcessDeleteMsg(SVnode *pVnode, SRpcMsg *pMsg) { SEncoder *pCoder = &(SEncoder){0}; SDeleteRes res = {0}; - SReadHandle handle = {.vnode = pVnode, .pMsgCb = &pVnode->msgCb}; + SReadHandle handle = {.vnode = pVnode, .pMsgCb = &pVnode->msgCb, .skipRollup = 1}; initStorageAPI(&handle.api); code = qWorkerProcessDeleteMsg(&handle, pVnode->pQuery, pMsg, &res); @@ -1669,7 +1669,7 @@ _exit: atomic_add_fetch_64(&pVnode->statis.nBatchInsert, 1); if (code == 0) { atomic_add_fetch_64(&pVnode->statis.nBatchInsertSuccess, 1); - code = tdProcessRSmaSubmit(pVnode->pSma, ver, pSubmitReq, pReq, len, STREAM_INPUT__DATA_SUBMIT); + code = tdProcessRSmaSubmit(pVnode->pSma, ver, pSubmitReq, pReq, len); } // clear @@ -1947,7 +1947,7 @@ static int32_t vnodeProcessDeleteReq(SVnode *pVnode, int64_t ver, void *pReq, in if (code) goto _err; } - tdProcessRSmaDelete(pVnode->pSma, ver, pRes, pReq, len, STREAM_INPUT__REF_DATA_BLOCK); + tdProcessRSmaDelete(pVnode->pSma, ver, pRes, pReq, len); tDecoderClear(pCoder); taosArrayDestroy(pRes->uidList); diff --git a/source/libs/executor/src/executor.c b/source/libs/executor/src/executor.c index d569f78322..1f82a9477b 100644 --- a/source/libs/executor/src/executor.c +++ b/source/libs/executor/src/executor.c @@ -78,12 +78,11 @@ static int32_t doSetSMABlock(SOperatorInfo* pOperator, void* input, size_t numOf taosArrayPush(pInfo->pBlockLists, &tmp); pInfo->blockType = STREAM_INPUT__CHECKPOINT; } else if (type == STREAM_INPUT__REF_DATA_BLOCK) { - for (int32_t i = 0; i < numOfBlocks; ++i) { - SSDataBlock* pDataBlock = &((SSDataBlock*)input)[i]; - SPackedData tmp = {.pDataBlock = pDataBlock}; - taosArrayPush(pInfo->pBlockLists, &tmp); + for (int32_t i = 0; i < numOfBlocks; ++i) { + SPackedData* pReq = POINTER_SHIFT(input, i * sizeof(SPackedData)); + taosArrayPush(pInfo->pBlockLists, pReq); } - pInfo->blockType = STREAM_INPUT__REF_DATA_BLOCK; + pInfo->blockType = STREAM_INPUT__DATA_BLOCK; } return TSDB_CODE_SUCCESS; From ff3621f38224cc6e9be85eb6b161d5172a9bb9a6 Mon Sep 17 00:00:00 2001 From: kailixu Date: Thu, 9 Nov 2023 20:09:39 +0800 Subject: [PATCH 28/79] enh: rsma delete logic --- source/dnode/vnode/src/inc/tq.h | 2 +- source/dnode/vnode/src/sma/smaRollup.c | 54 +++++++++++++------------- source/dnode/vnode/src/tq/tqUtil.c | 6 ++- 3 files changed, 33 insertions(+), 29 deletions(-) diff --git a/source/dnode/vnode/src/inc/tq.h b/source/dnode/vnode/src/inc/tq.h index 238407b26c..6b4f8d1431 100644 --- a/source/dnode/vnode/src/inc/tq.h +++ b/source/dnode/vnode/src/inc/tq.h @@ -160,7 +160,7 @@ int32_t tqResetStreamTaskStatus(STQ* pTq); int32_t tqStopStreamTasks(STQ* pTq); // tq util -int32_t extractDelDataBlock(const void* pData, int32_t len, int64_t ver, void** pRefBlock, int8_t type); +int32_t extractDelDataBlock(const void* pData, int32_t len, int64_t ver, void** pRefBlock, int32_t type); int32_t tqExtractDataForMq(STQ* pTq, STqHandle* pHandle, const SMqPollReq* pRequest, SRpcMsg* pMsg); int32_t tqDoSendDataRsp(const SRpcHandleInfo* pRpcHandleInfo, const SMqDataRsp* pRsp, int32_t epoch, int64_t consumerId, int32_t type, int64_t sver, int64_t ever); diff --git a/source/dnode/vnode/src/sma/smaRollup.c b/source/dnode/vnode/src/sma/smaRollup.c index 6742f30d53..d530133159 100644 --- a/source/dnode/vnode/src/sma/smaRollup.c +++ b/source/dnode/vnode/src/sma/smaRollup.c @@ -17,13 +17,17 @@ #include "tq.h" #include "tstream.h" -#define RSMA_QTASKEXEC_SMOOTH_SIZE (100) // cnt -#define RSMA_SUBMIT_BATCH_SIZE (1024) // cnt -#define RSMA_FETCH_DELAY_MAX (120000) // ms -#define RSMA_FETCH_ACTIVE_MAX (1000) // ms -#define RSMA_FETCH_INTERVAL (5000) // ms -#define RSMA_SUBMIT_HEAD_LEN (13) // type(int8_t) + len(int32_t) + version(int64_t) -#define RSMA_TASK_FLAG "rsma" +#define RSMA_EXEC_SMOOTH_SIZE (100) // cnt +#define RSMA_EXEC_BATCH_SIZE (1024) // cnt +#define RSMA_FETCH_DELAY_MAX (120000) // ms +#define RSMA_FETCH_ACTIVE_MAX (1000) // ms +#define RSMA_FETCH_INTERVAL (5000) // ms +#define RSMA_EXEC_MSG_HLEN (13) // type(int8_t) + len(int32_t) + version(int64_t) +#define RSMA_EXEC_MSG_TYPE(msg) (*(int8_t *)(msg)) +#define RSMA_EXEC_MSG_LEN(msg) (*(int32_t *)POINTER_SHIFT((msg), sizeof(int8_t))) +#define RSMA_EXEC_MSG_VER(msg) (*(int64_t *)POINTER_SHIFT((msg), sizeof(int8_t) + sizeof(int32_t))) +#define RSMA_EXEC_MSG_BODY(msg) (POINTER_SHIFT((msg), sizeof(int8_t) + sizeof(int32_t) + sizeof(int64_t))) +#define RSMA_TASK_FLAG "rsma" #define RSMA_NEED_FETCH(r) (RSMA_INFO_ITEM((r), 0)->fetchLevel || RSMA_INFO_ITEM((r), 1)->fetchLevel) @@ -723,7 +727,7 @@ _exit: */ static int32_t tdExecuteRSmaImplAsync(SSma *pSma, int64_t version, const void *pMsg, int32_t len, int32_t inputType, SRSmaInfo *pInfo, tb_uid_t suid) { - int32_t size = RSMA_SUBMIT_HEAD_LEN + len; // header + payload + int32_t size = RSMA_EXEC_MSG_HLEN + len; // header + payload void *qItem = taosAllocateQitem(size, DEF_QITEM, 0); if (!qItem) { @@ -752,7 +756,7 @@ static int32_t tdExecuteRSmaImplAsync(SSma *pSma, int64_t version, const void *p } // smoothing consume - int32_t n = nItems / RSMA_QTASKEXEC_SMOOTH_SIZE; + int32_t n = nItems / RSMA_EXEC_SMOOTH_SIZE; if (n > 1) { if (n > 10) { n = 10; @@ -1379,7 +1383,7 @@ static void tdFreeRSmaSubmitItems(SArray *pItems, int32_t type) { if (type == STREAM_INPUT__MERGED_SUBMIT) { for (int32_t i = 0; i < arrSize; ++i) { SPackedData *packData = TARRAY_GET_ELEM(pItems, i); - taosFreeQitem(POINTER_SHIFT(packData->msgStr, -RSMA_SUBMIT_HEAD_LEN)); + taosFreeQitem(POINTER_SHIFT(packData->msgStr, -RSMA_EXEC_MSG_HLEN)); } } else if (type == STREAM_INPUT__REF_DATA_BLOCK) { for (int32_t i = 0; i < arrSize; ++i) { @@ -1451,11 +1455,6 @@ _err: return TSDB_CODE_FAILED; } -#define RSMA_SUBMIT_MSG_TYPE(msg) (*(int8_t *)(msg)) -#define RSMA_SUBMIT_MSG_LEN(msg) (*(int32_t *)POINTER_SHIFT((msg), sizeof(int8_t))) -#define RSMA_SUBMIT_MSG_VER(msg) (*(int64_t *)POINTER_SHIFT((msg), sizeof(int8_t) + sizeof(int32_t))) -#define RSMA_SUBMIT_MSG_BODY(msg) (POINTER_SHIFT((msg), sizeof(int8_t) + sizeof(int32_t) + sizeof(int64_t))) - static int32_t tdRSmaBatchExec(SSma *pSma, SRSmaInfo *pInfo, STaosQall *qall, SArray *pSubmitArr, ERsmaExecType type) { void *msg = NULL; int8_t resume = 0; @@ -1470,16 +1469,16 @@ static int32_t tdRSmaBatchExec(SSma *pSma, SRSmaInfo *pInfo, STaosQall *qall, SA while (1) { taosGetQitem(qall, (void **)&msg); if (msg) { - int8_t inputType = RSMA_SUBMIT_MSG_TYPE(msg); + int8_t inputType = RSMA_EXEC_MSG_TYPE(msg); if (inputType == STREAM_INPUT__DATA_SUBMIT) { if (nDelete > 0) { resume = 1; break; } _resume_submit: - packData.msgLen = RSMA_SUBMIT_MSG_LEN(msg); - packData.ver = RSMA_SUBMIT_MSG_VER(msg); - packData.msgStr = RSMA_SUBMIT_MSG_BODY(msg); + packData.msgLen = RSMA_EXEC_MSG_LEN(msg); + packData.ver = RSMA_EXEC_MSG_VER(msg); + packData.msgStr = RSMA_EXEC_MSG_BODY(msg); if (!taosArrayPush(pSubmitArr, &packData)) { taosFreeQitem(msg); terrno = TSDB_CODE_OUT_OF_MEMORY; @@ -1492,8 +1491,11 @@ static int32_t tdRSmaBatchExec(SSma *pSma, SRSmaInfo *pInfo, STaosQall *qall, SA break; } _resume_delete: - extractDelDataBlock(RSMA_SUBMIT_MSG_BODY(msg), RSMA_SUBMIT_MSG_LEN(msg), RSMA_SUBMIT_MSG_VER(msg), - &packData.pDataBlock, 1); + if ((terrno = extractDelDataBlock(RSMA_EXEC_MSG_BODY(msg), RSMA_EXEC_MSG_LEN(msg), RSMA_EXEC_MSG_VER(msg), + &packData.pDataBlock, 1))) { + taosFreeQitem(msg); + goto _err; + } if (!taosArrayPush(pSubmitArr, &packData)) { taosFreeQitem(msg); terrno = TSDB_CODE_OUT_OF_MEMORY; @@ -1518,17 +1520,17 @@ static int32_t tdRSmaBatchExec(SSma *pSma, SRSmaInfo *pInfo, STaosQall *qall, SA } tdFreeRSmaSubmitItems(pSubmitArr, inputType); } + } else { + goto _rtn; } - if (resume == 0) { - goto _rtn; - } else if (resume == 1) { + if (resume == 1) { nSubmit = 0; nDelete = 0; resume = 0; tdFreeRSmaSubmitItems(pSubmitArr, STREAM_INPUT__REF_DATA_BLOCK); goto _resume_submit; - } else { + } else if (resume == 2) { nSubmit = 0; nDelete = 0; resume = 0; @@ -1580,7 +1582,7 @@ int32_t tdRSmaProcessExecImpl(SSma *pSma, ERsmaExecType type) { } if (!(pSubmitArr = - taosArrayInit(TMIN(RSMA_SUBMIT_BATCH_SIZE, atomic_load_64(&pRSmaStat->nBufItems)), sizeof(SPackedData)))) { + taosArrayInit(TMIN(RSMA_EXEC_BATCH_SIZE, atomic_load_64(&pRSmaStat->nBufItems)), sizeof(SPackedData)))) { code = TSDB_CODE_OUT_OF_MEMORY; TSDB_CHECK_CODE(code, lino, _exit); } diff --git a/source/dnode/vnode/src/tq/tqUtil.c b/source/dnode/vnode/src/tq/tqUtil.c index f7d0ad54e9..8f62928d22 100644 --- a/source/dnode/vnode/src/tq/tqUtil.c +++ b/source/dnode/vnode/src/tq/tqUtil.c @@ -399,7 +399,7 @@ int32_t tqDoSendDataRsp(const SRpcHandleInfo* pRpcHandleInfo, const SMqDataRsp* return 0; } -int32_t extractDelDataBlock(const void* pData, int32_t len, int64_t ver, void** pRefBlock, int8_t type) { +int32_t extractDelDataBlock(const void* pData, int32_t len, int64_t ver, void** pRefBlock, int32_t type) { SDecoder* pCoder = &(SDecoder){0}; SDeleteRes* pRes = &(SDeleteRes){0}; @@ -452,8 +452,10 @@ int32_t extractDelDataBlock(const void* pData, int32_t len, int64_t ver, void** ((SStreamRefDataBlock*)(*pRefBlock))->type = STREAM_INPUT__REF_DATA_BLOCK; ((SStreamRefDataBlock*)(*pRefBlock))->pBlock = pDelBlock; - } else { + } else if (type == 1) { *pRefBlock = pDelBlock; + } else { + ASSERTS(0, "unknown type:%d", type); } return TSDB_CODE_SUCCESS; From 7288b0fc770dffa888ec50833df1929f0c0d17c9 Mon Sep 17 00:00:00 2001 From: kailixu Date: Thu, 9 Nov 2023 20:16:29 +0800 Subject: [PATCH 29/79] enh: rsma delete error code --- source/dnode/vnode/src/sma/smaRollup.c | 2 +- source/dnode/vnode/src/vnd/vnodeSvr.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/source/dnode/vnode/src/sma/smaRollup.c b/source/dnode/vnode/src/sma/smaRollup.c index d530133159..e1cada4665 100644 --- a/source/dnode/vnode/src/sma/smaRollup.c +++ b/source/dnode/vnode/src/sma/smaRollup.c @@ -997,7 +997,7 @@ int32_t tdProcessRSmaDelete(SSma *pSma, int64_t version, void *pReq, void *pMsg, } return TSDB_CODE_SUCCESS; _err: - return TSDB_CODE_FAILED; + return terrno; } /** diff --git a/source/dnode/vnode/src/vnd/vnodeSvr.c b/source/dnode/vnode/src/vnd/vnodeSvr.c index df42e02776..fbd59203dc 100644 --- a/source/dnode/vnode/src/vnd/vnodeSvr.c +++ b/source/dnode/vnode/src/vnd/vnodeSvr.c @@ -1947,7 +1947,7 @@ static int32_t vnodeProcessDeleteReq(SVnode *pVnode, int64_t ver, void *pReq, in if (code) goto _err; } - tdProcessRSmaDelete(pVnode->pSma, ver, pRes, pReq, len); + code = tdProcessRSmaDelete(pVnode->pSma, ver, pRes, pReq, len); tDecoderClear(pCoder); taosArrayDestroy(pRes->uidList); From ea5adf55114e2567eef16ba7ca75b599c76cfe22 Mon Sep 17 00:00:00 2001 From: wangjiaming0909 <604227650@qq.com> Date: Thu, 9 Nov 2023 20:20:00 +0800 Subject: [PATCH 30/79] report not supported error when DDD but no DD specified --- docs/en/12-taos-sql/10-function.md | 2 +- docs/zh/12-taos-sql/10-function.md | 2 +- include/util/taoserror.h | 1 + source/common/src/ttime.c | 13 +++++++++++++ source/common/test/commonTests.cpp | 1 + source/libs/scalar/src/sclfunc.c | 1 - source/util/src/terror.c | 1 + 7 files changed, 18 insertions(+), 3 deletions(-) diff --git a/docs/en/12-taos-sql/10-function.md b/docs/en/12-taos-sql/10-function.md index 2ea144c56a..3cf75f15a5 100644 --- a/docs/en/12-taos-sql/10-function.md +++ b/docs/en/12-taos-sql/10-function.md @@ -565,7 +565,7 @@ TO_TIMESTAMP(ts_str_literal, format_str_literal) - The uppercase or lowercase of `MONTH`, `MON`, `DAY`, `DY` and formtas that output digits have same effect when used in `to_timestamp`, like `to_timestamp('2023-JANUARY-01', 'YYYY-month-dd')`, `month` can be replaced by `MONTH`, or `month`. The cases are ignored. - If multi times are specified for one component, the previous will be overwritten. Like `to_timestamp('2023-22-10-10', 'yyyy-yy-MM-dd')`, the output year will be `2022`. - To avoid unexpected time zone used during the convertion, it's recommended to put time zone in the ts string, e.g. '2023-10-10 10:10:10+08'. If time zone not specified, default will be that in server or client. -- The default timestamp if some components are not specified will be: `1970-01-01 00:00:00` with the timezone specified or default to local timezone. +- The default timestamp if some components are not specified will be: `1970-01-01 00:00:00` with the timezone specified or default to local timezone. If `DDD` is specified without `DD`, server will report `not supported` error, e.g. format 'yyyy-mm-ddd' is not supported currently, specify month day instead. - If `AM` or `PM` is specified in formats, the Hour must between `1-12`. - In some cases, `to_timestamp` can convert correctly even the format and the timestamp string are not totally matched. Like `to_timetamp('200101/2', 'yyyyMM1/dd')`, the digit `1` in format string are ignored, and the output timestsamp is `2001-01-02 00:00:00`. Spaces and tabs in formats and tiemstamp string are also ignored automatically. - The precision of the output timestamp will be the same as the table in SELECT stmt, millisecond will be used if no table is specified. The output of `select to_timestamp('2023-08-1 10:10:10.123456789', 'yyyy-mm-dd hh:mi:ss.ns')` will be truncated to millisecond precision. If a nano precision table is specified, no truncation will be applied. Like `select to_timestamp('2023-08-1 10:10:10.123456789', 'yyyy-mm-dd hh:mi:ss.ns') from db_ns.table_ns limit 1`. diff --git a/docs/zh/12-taos-sql/10-function.md b/docs/zh/12-taos-sql/10-function.md index c1dc6a6363..2b20c85fd5 100644 --- a/docs/zh/12-taos-sql/10-function.md +++ b/docs/zh/12-taos-sql/10-function.md @@ -565,7 +565,7 @@ TO_TIMESTAMP(ts_str_literal, format_str_literal) - `MONTH`, `MON`, `DAY`, `DY` 以及其他输出为数字的格式的大小写意义相同, 如 `to_timestamp('2023-JANUARY-01', 'YYYY-month-dd')`, `month`可以被替换为`MONTH` 或者`Month`. - 如果同一字段被指定了多次, 那么前面的指定将会被覆盖. 如 `to_timestamp('2023-22-10-10', 'yyyy-yy-MM-dd')`, 输出年份是`2022`. - 为避免转换时使用了非预期的时区,推荐在时间中携带时区信息,例如'2023-10-10 10:10:10+08',如果未指定时区则默认时区为服务端或客户端指定的时区。 -- 如果没有指定完整的时间,那么默认时间值为指定或默认时区的 `1970-01-01 00:00:00`, 未指定部分使用该默认值中的对应部分. +- 如果没有指定完整的时间,那么默认时间值为指定或默认时区的 `1970-01-01 00:00:00`, 未指定部分使用该默认值中的对应部分. 目前格式中指定年日, 但是不指定月日会报不支持错误, 如'yyyy-mm-DDD'不支持, 由于目前无法将年日转换成具体的月日, 应指定月日`DD`. - 如果格式串中有`AM`, `PM`等, 那么小时必须是12小时制, 范围必须是01-12. - `to_timestamp`转换具有一定的容错机制, 在格式串和时间戳串不完全对应时, 有时也可转换, 如: `to_timestamp('200101/2', 'yyyyMM1/dd')`, 格式串中多出来的1会被丢弃. 格式串与时间戳串中多余的空格字符(空格, tab等)也会被 自动忽略. 如`to_timestamp(' 23 年 - 1 月 - 01 日 ', 'yy 年-MM月-dd日')` 可以被成功转换. 虽然`MM`等字段需要两个数字对应(只有一位时前面补0), 在`to_timestamp`时, 一个数字也可以成功转换. - 输出时间戳的精度与查询表的精度相同, 若查询未指定表, 则输出精度为毫秒. 如`select to_timestamp('2023-08-1 10:10:10.123456789', 'yyyy-mm-dd hh:mi:ss.ns')`的输出将会把微妙和纳秒进行截断. 如果指定一张纳秒表, 那么就不会发生截断, 如`select to_timestamp('2023-08-1 10:10:10.123456789', 'yyyy-mm-dd hh:mi:ss.ns') from db_ns.table_ns limit 1`. diff --git a/include/util/taoserror.h b/include/util/taoserror.h index 980d5b9177..7665550153 100644 --- a/include/util/taoserror.h +++ b/include/util/taoserror.h @@ -754,6 +754,7 @@ int32_t* taosGetErrno(); #define TSDB_CODE_FUNC_DUP_TIMESTAMP TAOS_DEF_ERROR_CODE(0, 0x2805) #define TSDB_CODE_FUNC_TO_TIMESTAMP_FAILED_FORMAT_ERR TAOS_DEF_ERROR_CODE(0, 0x2806) #define TSDB_CODE_FUNC_TO_TIMESTAMP_FAILED_TS_ERR TAOS_DEF_ERROR_CODE(0, 0x2807) +#define TSDB_CODE_FUNC_TO_TIMESTAMP_FAILED_NOT_SUPPORTED TAOS_DEF_ERROR_CODE(0, 0x2808) //udf #define TSDB_CODE_UDF_STOPPING TAOS_DEF_ERROR_CODE(0, 0x2901) diff --git a/source/common/src/ttime.c b/source/common/src/ttime.c index 4b0848e5e9..ec2202f299 100644 --- a/source/common/src/ttime.c +++ b/source/common/src/ttime.c @@ -1580,6 +1580,7 @@ static bool needMoreDigits(SArray* formats, int32_t curIdx) { /// @retval 0 for success /// @retval -1 for format and s mismatch error /// @retval -2 if datetime err, like 2023-13-32 25:61:69 +/// @retval -3 if not supported static int32_t char2ts(const char* s, SArray* formats, int64_t* ts, int32_t precision, const char** sErrPos, int32_t* fErrIdx) { int32_t size = taosArrayGetSize(formats); @@ -1589,6 +1590,7 @@ static int32_t char2ts(const char* s, SArray* formats, int64_t* ts, int32_t prec int32_t hour = 0, min = 0, sec = 0, us = 0, ms = 0, ns = 0; int32_t tzSign = 1, tz = tsTimezone; int32_t err = 0; + bool withYD = false, withMD = false; for (int32_t i = 0; i < size && *s != '\0'; ++i) { while (isspace(*s) && *s != '\0') { @@ -1782,6 +1784,7 @@ static int32_t char2ts(const char* s, SArray* formats, int64_t* ts, int32_t prec } else { s = newPos; } + withYD = true; } break; case TSFKW_DD: { const char* newPos = tsFormatStr2Int32(&md, s, 2, needMoreDigits(formats, i)); @@ -1790,6 +1793,7 @@ static int32_t char2ts(const char* s, SArray* formats, int64_t* ts, int32_t prec } else { s = newPos; } + withMD = true; } break; case TSFKW_D: { const char* newPos = tsFormatStr2Int32(&wd, s, 1, needMoreDigits(formats, i)); @@ -1843,6 +1847,10 @@ static int32_t char2ts(const char* s, SArray* formats, int64_t* ts, int32_t prec return err; } } + if (!withMD) { + // yyyy-mm-DDD, currently, the c api can't convert to correct timestamp, return not supported + if (withYD) return -3; + } struct STm tm = {0}; tm.tm.tm_year = year - 1900; tm.tm.tm_mon = mon; @@ -1892,8 +1900,13 @@ int32_t taosChar2Ts(const char* format, SArray** formats, const char* tsStr, int TSFormatNode* fNode = (taosArrayGet(*formats, fErrIdx)); snprintf(errMsg, errMsgLen, "mismatch format for: %s and %s", sErrPos, fErrIdx < taosArrayGetSize(*formats) ? ((TSFormatNode*)taosArrayGet(*formats, fErrIdx))->key->name : ""); + code = TSDB_CODE_FUNC_TO_TIMESTAMP_FAILED_FORMAT_ERR; } else if (code == -2) { snprintf(errMsg, errMsgLen, "timestamp format error: %s -> %s", tsStr, format); + code = TSDB_CODE_FUNC_TO_TIMESTAMP_FAILED_TS_ERR; + } else if (code == -3) { + snprintf(errMsg, errMsgLen, "not supported currently"); + code = TSDB_CODE_FUNC_TO_TIMESTAMP_FAILED_NOT_SUPPORTED; } return code; } diff --git a/source/common/test/commonTests.cpp b/source/common/test/commonTests.cpp index c65d8761b7..107276d7f9 100644 --- a/source/common/test/commonTests.cpp +++ b/source/common/test/commonTests.cpp @@ -489,6 +489,7 @@ TEST(timeTest, char2ts) { ASSERT_EQ(ts, 0); ASSERT_EQ(0, TEST_char2ts("yyyy年 a a a MM/ddTZH", &ts, TSDB_TIME_PRECISION_MICRO, "1970年 a a a 1/1+0")); ASSERT_EQ(0, TEST_char2ts("yyyy年 a a a a a a a a a a a a a a a MM/ddTZH", &ts, TSDB_TIME_PRECISION_MICRO, "1970年 a ")); + ASSERT_EQ(-3, TEST_char2ts("yyyy-mm-DDD", &ts, TSDB_TIME_PRECISION_MILLI, "1970-01-001")); } #pragma GCC diagnostic pop diff --git a/source/libs/scalar/src/sclfunc.c b/source/libs/scalar/src/sclfunc.c index dbdd79cc65..e7c6297f44 100644 --- a/source/libs/scalar/src/sclfunc.c +++ b/source/libs/scalar/src/sclfunc.c @@ -1230,7 +1230,6 @@ int32_t toTimestampFunction(SScalarParam* pInput, int32_t inputNum, SScalarParam code = taosChar2Ts(format, &formats, tsStr, &ts, precision, errMsg, 128); if (code) { qError("func to_timestamp failed %s", errMsg); - code = code == -1 ? TSDB_CODE_FUNC_TO_TIMESTAMP_FAILED_FORMAT_ERR : TSDB_CODE_FUNC_TO_TIMESTAMP_FAILED_TS_ERR; break; } colDataSetVal(pOutput->columnData, i, (char *)&ts, false); diff --git a/source/util/src/terror.c b/source/util/src/terror.c index 6c964c3da0..a957ff4b29 100644 --- a/source/util/src/terror.c +++ b/source/util/src/terror.c @@ -615,6 +615,7 @@ TAOS_DEFINE_ERROR(TSDB_CODE_FUNC_NOT_BUILTIN_FUNTION, "Not buildin function TAOS_DEFINE_ERROR(TSDB_CODE_FUNC_DUP_TIMESTAMP, "Duplicate timestamps not allowed in function") TAOS_DEFINE_ERROR(TSDB_CODE_FUNC_TO_TIMESTAMP_FAILED_FORMAT_ERR, "Func to_timestamp failed, format mismatch") TAOS_DEFINE_ERROR(TSDB_CODE_FUNC_TO_TIMESTAMP_FAILED_TS_ERR, "Func to_timestamp failed, wrong timestamp") +TAOS_DEFINE_ERROR(TSDB_CODE_FUNC_TO_TIMESTAMP_FAILED_NOT_SUPPORTED, "Func to_timestamp failed, not supported yet") //udf TAOS_DEFINE_ERROR(TSDB_CODE_UDF_STOPPING, "udf is stopping") From ee6e68a71e70cc84434d046f3e20bdc9f6d8d548 Mon Sep 17 00:00:00 2001 From: kailixu Date: Fri, 10 Nov 2023 07:02:46 +0800 Subject: [PATCH 31/79] enh: rsma support delete --- source/common/src/tdatablock.c | 4 ++ source/dnode/vnode/src/sma/smaRollup.c | 78 +++++++++++++++++++++----- source/dnode/vnode/src/vnd/vnodeSvr.c | 5 ++ 3 files changed, 73 insertions(+), 14 deletions(-) diff --git a/source/common/src/tdatablock.c b/source/common/src/tdatablock.c index bf21b2eda0..4b777dac82 100644 --- a/source/common/src/tdatablock.c +++ b/source/common/src/tdatablock.c @@ -1978,6 +1978,10 @@ int32_t buildSubmitReqFromDataBlock(SSubmitReq2** ppReq, const SSDataBlock* pDat int32_t colNum = taosArrayGetSize(pDataBlock->pDataBlock); int32_t rows = pDataBlock->info.rows; + if(pDataBlock->info.type == STREAM_DELETE_RESULT) { + + } + if (colNum <= 1) { // invalid if only with TS col continue; } diff --git a/source/dnode/vnode/src/sma/smaRollup.c b/source/dnode/vnode/src/sma/smaRollup.c index e1cada4665..d0ef36d17f 100644 --- a/source/dnode/vnode/src/sma/smaRollup.c +++ b/source/dnode/vnode/src/sma/smaRollup.c @@ -629,6 +629,46 @@ _end: return code; } +static int32_t tdRSmaProcessDelReq(SSma *pSma, int64_t suid, int8_t level, SBatchDeleteReq *pDelReq) { + int32_t code = 0; + int32_t lino = 0; + + if (taosArrayGetSize(pDelReq->deleteReqs) > 0) { + int32_t len = 0; + tEncodeSize(tEncodeSBatchDeleteReq, pDelReq, len, code); + TSDB_CHECK_CODE(code, lino, _exit); + + void *pBuf = rpcMallocCont(len + sizeof(SMsgHead)); + if (!pBuf) { + code = terrno; + TSDB_CHECK_CODE(code, lino, _exit); + } + + SEncoder encoder; + tEncoderInit(&encoder, POINTER_SHIFT(pBuf, sizeof(SMsgHead)), len); + tEncodeSBatchDeleteReq(&encoder, pDelReq); + tEncoderClear(&encoder); + + ((SMsgHead *)pBuf)->vgId = TD_VID(pSma->pVnode); + + SRpcMsg delMsg = {.msgType = TDMT_VND_BATCH_DEL, + .pCont = pBuf, + .contLen = len + sizeof(SMsgHead), + .info.wrapper = level == 1 ? VND_RSMA1(pSma->pVnode) : VND_RSMA2(pSma->pVnode)}; + code = tmsgPutToQueue(&pSma->pVnode->msgCb, WRITE_QUEUE, &delMsg); + TSDB_CHECK_CODE(code, lino, _exit); + } + +_exit: + taosArrayDestroy(pDelReq->deleteReqs); + if (code) { + smaError("vgId:%d, failed at line %d to process delete req for table:%" PRIi64 ", level:%" PRIi8 " since %s", + SMA_VID(pSma), lino, suid, level, tstrerror(code)); + } + + return code; +} + static int32_t tdRSmaExecAndSubmitResult(SSma *pSma, qTaskInfo_t taskInfo, SRSmaInfoItem *pItem, SRSmaInfo *pInfo, int32_t execType, int8_t *streamFlushed) { int32_t code = 0; @@ -657,10 +697,25 @@ static int32_t tdRSmaExecAndSubmitResult(SSma *pSma, qTaskInfo_t taskInfo, SRSma if (output->info.type == STREAM_CHECKPOINT) { if (streamFlushed) *streamFlushed = 1; continue; + } else if (output->info.type == STREAM_DELETE_RESULT) { + SBatchDeleteReq *pDeleteReq = NULL; + pDeleteReq->suid = suid; + pDeleteReq->deleteReqs = taosArrayInit(0, sizeof(SSingleDeleteReq)); + if (!pDeleteReq->deleteReqs) { + code = terrno; + TSDB_CHECK_CODE(code, lino, _exit); + } + code = tqBuildDeleteReq("", output, pDeleteReq, ""); + TSDB_CHECK_CODE(code, lino, _exit); + code = tdRSmaProcessDelReq(pSma, suid, pItem->level, pDeleteReq); + TSDB_CHECK_CODE(code, lino, _exit); + continue; } + smaDebug("vgId:%d, result block, uid:%" PRIu64 ", groupid:%" PRIu64 ", rows:%" PRIi64, SMA_VID(pSma), output->info.id.uid, output->info.id.groupId, output->info.rows); + STsdb *sinkTsdb = (pItem->level == TSDB_RETENTION_L1 ? pSma->pRSmaTsdb[0] : pSma->pRSmaTsdb[1]); SSubmitReq2 *pReq = NULL; @@ -669,7 +724,7 @@ static int32_t tdRSmaExecAndSubmitResult(SSma *pSma, qTaskInfo_t taskInfo, SRSma TSDB_CHECK_CODE(code, lino, _exit); } - // reset the output version to handle reboot + // reset the output version when reboot if (STREAM_GET_ALL == execType && output->info.version == 0) { // the submitReqVer keeps unchanged since tdExecuteRSmaImpl and tdRSmaFetchAllResult are executed synchronously output->info.version = pItem->submitReqVer; @@ -1511,30 +1566,25 @@ static int32_t tdRSmaBatchExec(SSma *pSma, SRSmaInfo *pInfo, STaosQall *qall, SA if (nSubmit > 0 || nDelete > 0) { int32_t size = TARRAY_SIZE(pSubmitArr); - if (size > 0) { - int32_t inputType = nSubmit > 0 ? STREAM_INPUT__MERGED_SUBMIT : STREAM_INPUT__REF_DATA_BLOCK; - for (int32_t i = 1; i <= TSDB_RETENTION_L2; ++i) { - if (tdExecuteRSmaImpl(pSma, pSubmitArr->pData, size, inputType, pInfo, type, i) < 0) { - goto _err; - } + ASSERTS(size > 0, "size is %d", size); + int32_t inputType = nSubmit > 0 ? STREAM_INPUT__MERGED_SUBMIT : STREAM_INPUT__REF_DATA_BLOCK; + for (int32_t i = 1; i <= TSDB_RETENTION_L2; ++i) { + if (tdExecuteRSmaImpl(pSma, pSubmitArr->pData, size, inputType, pInfo, type, i) < 0) { + goto _err; } - tdFreeRSmaSubmitItems(pSubmitArr, inputType); } + tdFreeRSmaSubmitItems(pSubmitArr, inputType); + nSubmit = 0; + nDelete = 0; } else { goto _rtn; } if (resume == 1) { - nSubmit = 0; - nDelete = 0; resume = 0; - tdFreeRSmaSubmitItems(pSubmitArr, STREAM_INPUT__REF_DATA_BLOCK); goto _resume_submit; } else if (resume == 2) { - nSubmit = 0; - nDelete = 0; resume = 0; - tdFreeRSmaSubmitItems(pSubmitArr, STREAM_INPUT__MERGED_SUBMIT); goto _resume_delete; } } diff --git a/source/dnode/vnode/src/vnd/vnodeSvr.c b/source/dnode/vnode/src/vnd/vnodeSvr.c index fbd59203dc..0eb6448fca 100644 --- a/source/dnode/vnode/src/vnd/vnodeSvr.c +++ b/source/dnode/vnode/src/vnd/vnodeSvr.c @@ -412,6 +412,10 @@ static int32_t vnodePreProcessBatchDeleteMsg(SVnode *pVnode, SRpcMsg *pMsg) { int32_t code = 0; int32_t lino = 0; + if (pMsg->info.wrapper) { // skip for rsma + return code; + } + int64_t ctimeMs = taosGetTimestampMs(); SBatchDeleteReq pReq = {0}; SDecoder *pCoder = &(SDecoder){0}; @@ -1905,6 +1909,7 @@ static int32_t vnodeProcessBatchDeleteReq(SVnode *pVnode, int64_t ver, void *pRe TD_VID(pVnode), terrstr(), deleteReq.suid, uid, pOneReq->startTs, pOneReq->endTs); } + if() code = metaUpdateChangeTimeWithLock(pVnode->pMeta, uid, deleteReq.ctimeMs); if (code < 0) { terrno = code; From 4cba023447a8f0b75a3bca6412e7b636ade1db78 Mon Sep 17 00:00:00 2001 From: dapan1121 Date: Fri, 10 Nov 2023 11:47:02 +0800 Subject: [PATCH 32/79] fix: case issue --- include/common/tcommon.h | 2 +- tests/system-test/2-query/partition_by_col_agg.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/include/common/tcommon.h b/include/common/tcommon.h index c7b5858409..518dda7b01 100644 --- a/include/common/tcommon.h +++ b/include/common/tcommon.h @@ -365,7 +365,7 @@ typedef struct SSortExecInfo { } SSortExecInfo; typedef struct SNonSortExecInfo { - + int32_t blkNums; } SNonSortExecInfo; diff --git a/tests/system-test/2-query/partition_by_col_agg.py b/tests/system-test/2-query/partition_by_col_agg.py index 011415867b..1bc7a2414a 100644 --- a/tests/system-test/2-query/partition_by_col_agg.py +++ b/tests/system-test/2-query/partition_by_col_agg.py @@ -210,7 +210,7 @@ class TDTestCase: #self.check_explain_res_has_row('SortMerge', explain_res) #self.check_explain_res_has_row("blocking=0", explain_res) explain_res = self.explain_sql(sql_hint) - self.check_explain_res_has_row('SortMerge', explain_res) + self.check_explain_res_has_row('Merge', explain_res) self.check_explain_res_has_row('blocking=0', explain_res) def test_pipelined_agg_plan_with_slimit(self): From 32528ab9cd003a3dadd5e28abaf3ce4a94f91f29 Mon Sep 17 00:00:00 2001 From: Minglei Jin Date: Fri, 10 Nov 2023 12:44:09 +0800 Subject: [PATCH 33/79] fix(tsdb/reader-writer): fix pgnoEnd calc --- source/dnode/vnode/src/tsdb/tsdbReaderWriter.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/dnode/vnode/src/tsdb/tsdbReaderWriter.c b/source/dnode/vnode/src/tsdb/tsdbReaderWriter.c index adb72821e4..8b9cae42fc 100644 --- a/source/dnode/vnode/src/tsdb/tsdbReaderWriter.c +++ b/source/dnode/vnode/src/tsdb/tsdbReaderWriter.c @@ -338,7 +338,7 @@ static int32_t tsdbReadFileS3(STsdbFD *pFD, int64_t offset, uint8_t *pBuf, int64 // 2, retrieve pgs from s3 uint8_t *pBlock = NULL; int64_t retrieve_offset = PAGE_OFFSET(pgno, pFD->szPage); - int64_t pgnoEnd = pgno - 1 + (size - n + szPgCont - 1) / szPgCont; + int64_t pgnoEnd = pgno - 1 + (bOffset + size - n + szPgCont - 1) / szPgCont; int64_t retrieve_size = (pgnoEnd - pgno + 1) * pFD->szPage; code = s3GetObjectBlock(pFD->objName, retrieve_offset, retrieve_size, 1, &pBlock); if (code != TSDB_CODE_SUCCESS) { From 8937436b8a62ad14e2e60fee42c3ca7dcb55665f Mon Sep 17 00:00:00 2001 From: dapan1121 <72057773+dapan1121@users.noreply.github.com> Date: Fri, 10 Nov 2023 14:34:18 +0800 Subject: [PATCH 34/79] Update 10-function.md --- docs/zh/12-taos-sql/10-function.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/zh/12-taos-sql/10-function.md b/docs/zh/12-taos-sql/10-function.md index 2b20c85fd5..26313390a6 100644 --- a/docs/zh/12-taos-sql/10-function.md +++ b/docs/zh/12-taos-sql/10-function.md @@ -565,7 +565,7 @@ TO_TIMESTAMP(ts_str_literal, format_str_literal) - `MONTH`, `MON`, `DAY`, `DY` 以及其他输出为数字的格式的大小写意义相同, 如 `to_timestamp('2023-JANUARY-01', 'YYYY-month-dd')`, `month`可以被替换为`MONTH` 或者`Month`. - 如果同一字段被指定了多次, 那么前面的指定将会被覆盖. 如 `to_timestamp('2023-22-10-10', 'yyyy-yy-MM-dd')`, 输出年份是`2022`. - 为避免转换时使用了非预期的时区,推荐在时间中携带时区信息,例如'2023-10-10 10:10:10+08',如果未指定时区则默认时区为服务端或客户端指定的时区。 -- 如果没有指定完整的时间,那么默认时间值为指定或默认时区的 `1970-01-01 00:00:00`, 未指定部分使用该默认值中的对应部分. 目前格式中指定年日, 但是不指定月日会报不支持错误, 如'yyyy-mm-DDD'不支持, 由于目前无法将年日转换成具体的月日, 应指定月日`DD`. +- 如果没有指定完整的时间,那么默认时间值为指定或默认时区的 `1970-01-01 00:00:00`, 未指定部分使用该默认值中的对应部分. 暂不支持只指定年日而不指定月日的格式, 如'yyyy-mm-DDD', 支持'yyyy-mm-DD'. - 如果格式串中有`AM`, `PM`等, 那么小时必须是12小时制, 范围必须是01-12. - `to_timestamp`转换具有一定的容错机制, 在格式串和时间戳串不完全对应时, 有时也可转换, 如: `to_timestamp('200101/2', 'yyyyMM1/dd')`, 格式串中多出来的1会被丢弃. 格式串与时间戳串中多余的空格字符(空格, tab等)也会被 自动忽略. 如`to_timestamp(' 23 年 - 1 月 - 01 日 ', 'yy 年-MM月-dd日')` 可以被成功转换. 虽然`MM`等字段需要两个数字对应(只有一位时前面补0), 在`to_timestamp`时, 一个数字也可以成功转换. - 输出时间戳的精度与查询表的精度相同, 若查询未指定表, 则输出精度为毫秒. 如`select to_timestamp('2023-08-1 10:10:10.123456789', 'yyyy-mm-dd hh:mi:ss.ns')`的输出将会把微妙和纳秒进行截断. 如果指定一张纳秒表, 那么就不会发生截断, 如`select to_timestamp('2023-08-1 10:10:10.123456789', 'yyyy-mm-dd hh:mi:ss.ns') from db_ns.table_ns limit 1`. From 674e878ffad5e0d983db5af7742318a301a0fb41 Mon Sep 17 00:00:00 2001 From: kailixu Date: Fri, 10 Nov 2023 14:44:29 +0800 Subject: [PATCH 35/79] enh: rsma support delete --- include/common/tmsg.h | 1 + source/common/src/tdatablock.c | 4 - source/common/src/tmsg.c | 4 + source/dnode/vnode/src/inc/tq.h | 2 +- source/dnode/vnode/src/sma/smaRollup.c | 57 ++- source/dnode/vnode/src/sma/smaTimeRange.c | 2 +- source/dnode/vnode/src/tq/tqSink.c | 36 +- source/dnode/vnode/src/vnd/vnodeSvr.c | 29 +- source/libs/executor/src/scanoperator.c | 6 + tests/parallel_test/cases.task | 1 + .../tsim/sma/rsmaCreateInsertQueryDelete.sim | 446 ++++++++++++++++++ tests/script/tsim/testsuit.sim | 1 + tests/script/win-test-file | 1 + 13 files changed, 531 insertions(+), 59 deletions(-) create mode 100644 tests/script/tsim/sma/rsmaCreateInsertQueryDelete.sim diff --git a/include/common/tmsg.h b/include/common/tmsg.h index 4ef4273631..86d34502c6 100644 --- a/include/common/tmsg.h +++ b/include/common/tmsg.h @@ -3774,6 +3774,7 @@ typedef struct { int64_t suid; SArray* deleteReqs; // SArray int64_t ctimeMs; // fill by vnode + int8_t level; // 0 tsdb(default), 1 rsma1 , 2 rsma2 } SBatchDeleteReq; int32_t tEncodeSBatchDeleteReq(SEncoder* pCoder, const SBatchDeleteReq* pReq); diff --git a/source/common/src/tdatablock.c b/source/common/src/tdatablock.c index 4b777dac82..bf21b2eda0 100644 --- a/source/common/src/tdatablock.c +++ b/source/common/src/tdatablock.c @@ -1978,10 +1978,6 @@ int32_t buildSubmitReqFromDataBlock(SSubmitReq2** ppReq, const SSDataBlock* pDat int32_t colNum = taosArrayGetSize(pDataBlock->pDataBlock); int32_t rows = pDataBlock->info.rows; - if(pDataBlock->info.type == STREAM_DELETE_RESULT) { - - } - if (colNum <= 1) { // invalid if only with TS col continue; } diff --git a/source/common/src/tmsg.c b/source/common/src/tmsg.c index 34b789fef8..dc3ba7934f 100644 --- a/source/common/src/tmsg.c +++ b/source/common/src/tmsg.c @@ -8337,6 +8337,7 @@ int32_t tEncodeSBatchDeleteReq(SEncoder *pEncoder, const SBatchDeleteReq *pReq) if (tEncodeSSingleDeleteReq(pEncoder, pOneReq) < 0) return -1; } if (tEncodeI64(pEncoder, pReq->ctimeMs) < 0) return -1; + if (tEncodeI8(pEncoder, pReq->level) < 0) return -1; return 0; } @@ -8361,6 +8362,9 @@ int32_t tDecodeSBatchDeleteReq(SDecoder *pDecoder, SBatchDeleteReq *pReq) { if (!tDecodeIsEnd(pDecoder)) { if (tDecodeI64(pDecoder, &pReq->ctimeMs) < 0) return -1; } + if (!tDecodeIsEnd(pDecoder)) { + if (tDecodeI8(pDecoder, &pReq->level) < 0) return -1; + } return 0; } diff --git a/source/dnode/vnode/src/inc/tq.h b/source/dnode/vnode/src/inc/tq.h index 6b4f8d1431..fdd449bf36 100644 --- a/source/dnode/vnode/src/inc/tq.h +++ b/source/dnode/vnode/src/inc/tq.h @@ -147,7 +147,7 @@ int32_t tqOffsetDelete(STqOffsetStore* pStore, const char* subscribeKey) int32_t tqOffsetCommitFile(STqOffsetStore* pStore); // tqSink -int32_t tqBuildDeleteReq(const char* stbFullName, const SSDataBlock* pDataBlock, SBatchDeleteReq* deleteReq, +int32_t tqBuildDeleteReq(STQ* pTq, const char* stbFullName, const SSDataBlock* pDataBlock, SBatchDeleteReq* deleteReq, const char* pIdStr); void tqSinkDataIntoDstTable(SStreamTask* pTask, void* vnode, void* data); diff --git a/source/dnode/vnode/src/sma/smaRollup.c b/source/dnode/vnode/src/sma/smaRollup.c index d0ef36d17f..99f5283df3 100644 --- a/source/dnode/vnode/src/sma/smaRollup.c +++ b/source/dnode/vnode/src/sma/smaRollup.c @@ -43,8 +43,8 @@ static void tdUidStoreDestory(STbUidStore *pStore); static int32_t tdUpdateTbUidListImpl(SSma *pSma, tb_uid_t *suid, SArray *tbUids, bool isAdd); static int32_t tdSetRSmaInfoItemParams(SSma *pSma, SRSmaParam *param, SRSmaStat *pStat, SRSmaInfo *pRSmaInfo, int8_t idx); -static int32_t tdExecuteRSmaImpl(SSma *pSma, const void *pMsg, int32_t msgSize, int32_t inputType, SRSmaInfo *pInfo, - ERsmaExecType type, int8_t level); +static int32_t tdExecuteRSmaImpl(SSma *pSma, const void *pMsg, int32_t msgSize, int64_t version, int32_t inputType, + SRSmaInfo *pInfo, ERsmaExecType type, int8_t level); static SRSmaInfo *tdAcquireRSmaInfoBySuid(SSma *pSma, int64_t suid); static void tdReleaseRSmaInfo(SSma *pSma, SRSmaInfo *pInfo); static void tdFreeRSmaSubmitItems(SArray *pItems, int32_t type); @@ -654,7 +654,7 @@ static int32_t tdRSmaProcessDelReq(SSma *pSma, int64_t suid, int8_t level, SBatc SRpcMsg delMsg = {.msgType = TDMT_VND_BATCH_DEL, .pCont = pBuf, .contLen = len + sizeof(SMsgHead), - .info.wrapper = level == 1 ? VND_RSMA1(pSma->pVnode) : VND_RSMA2(pSma->pVnode)}; + .info.ahandle = level == 1 ? VND_RSMA1(pSma->pVnode) : VND_RSMA2(pSma->pVnode)}; code = tmsgPutToQueue(&pSma->pVnode->msgCb, WRITE_QUEUE, &delMsg); TSDB_CHECK_CODE(code, lino, _exit); } @@ -698,16 +698,15 @@ static int32_t tdRSmaExecAndSubmitResult(SSma *pSma, qTaskInfo_t taskInfo, SRSma if (streamFlushed) *streamFlushed = 1; continue; } else if (output->info.type == STREAM_DELETE_RESULT) { - SBatchDeleteReq *pDeleteReq = NULL; - pDeleteReq->suid = suid; - pDeleteReq->deleteReqs = taosArrayInit(0, sizeof(SSingleDeleteReq)); - if (!pDeleteReq->deleteReqs) { + SBatchDeleteReq deleteReq = {.suid = suid, .level = pItem->level}; + deleteReq.deleteReqs = taosArrayInit(0, sizeof(SSingleDeleteReq)); + if (!deleteReq.deleteReqs) { code = terrno; TSDB_CHECK_CODE(code, lino, _exit); } - code = tqBuildDeleteReq("", output, pDeleteReq, ""); + code = tqBuildDeleteReq(pSma->pVnode->pTq, NULL, output, &deleteReq, ""); TSDB_CHECK_CODE(code, lino, _exit); - code = tdRSmaProcessDelReq(pSma, suid, pItem->level, pDeleteReq); + code = tdRSmaProcessDelReq(pSma, suid, pItem->level, &deleteReq); TSDB_CHECK_CODE(code, lino, _exit); continue; } @@ -715,6 +714,19 @@ static int32_t tdRSmaExecAndSubmitResult(SSma *pSma, qTaskInfo_t taskInfo, SRSma smaDebug("vgId:%d, result block, uid:%" PRIu64 ", groupid:%" PRIu64 ", rows:%" PRIi64, SMA_VID(pSma), output->info.id.uid, output->info.id.groupId, output->info.rows); + if (STREAM_GET_ALL == execType) { + /** + * 1. reset the output version when reboot + * 2. delete msg version not updated from the result + */ + if (output->info.version < pItem->submitReqVer) { + // submitReqVer keeps unchanged since tdExecuteRSmaImpl and tdRSmaFetchAllResult are executed synchronously + output->info.version = pItem->submitReqVer; + } else if (output->info.version == pItem->fetchResultVer) { + ASSERTS(0, "duplicated fetch version:%" PRIi64, pItem->fetchResultVer); + continue; + } + } STsdb *sinkTsdb = (pItem->level == TSDB_RETENTION_L1 ? pSma->pRSmaTsdb[0] : pSma->pRSmaTsdb[1]); SSubmitReq2 *pReq = NULL; @@ -724,12 +736,6 @@ static int32_t tdRSmaExecAndSubmitResult(SSma *pSma, qTaskInfo_t taskInfo, SRSma TSDB_CHECK_CODE(code, lino, _exit); } - // reset the output version when reboot - if (STREAM_GET_ALL == execType && output->info.version == 0) { - // the submitReqVer keeps unchanged since tdExecuteRSmaImpl and tdRSmaFetchAllResult are executed synchronously - output->info.version = pItem->submitReqVer; - } - if (pReq && tdProcessSubmitReq(sinkTsdb, output->info.version, pReq) < 0) { if (terrno == TSDB_CODE_TDB_TIMESTAMP_OUT_OF_RANGE) { // TODO: reconfigure SSubmitReq2 @@ -858,7 +864,7 @@ static int32_t tdRsmaPrintSubmitReq(SSma *pSma, SSubmitReq *pReq) { * @param level * @return int32_t */ -static int32_t tdExecuteRSmaImpl(SSma *pSma, const void *pMsg, int32_t msgSize, int32_t inputType, SRSmaInfo *pInfo, +static int32_t tdExecuteRSmaImpl(SSma *pSma, const void *pMsg, int32_t msgSize, int64_t version, int32_t inputType, SRSmaInfo *pInfo, ERsmaExecType type, int8_t level) { int32_t idx = level - 1; void *qTaskInfo = RSMA_INFO_QTASK(pInfo, idx); @@ -878,22 +884,12 @@ static int32_t tdExecuteRSmaImpl(SSma *pSma, const void *pMsg, int32_t msgSize, smaDebug("vgId:%d, execute rsma %" PRIi8 " task for qTaskInfo:%p suid:%" PRIu64 " nMsg:%d", SMA_VID(pSma), level, RSMA_INFO_QTASK(pInfo, idx), pInfo->suid, msgSize); -#if 0 - for (int32_t i = 0; i < msgSize; ++i) { - SSubmitReq *pReq = *(SSubmitReq **)((char *)pMsg + i * sizeof(void *)); - smaDebug("vgId:%d, [%d][%d] version %" PRIi64, SMA_VID(pSma), msgSize, i, pReq->version); - tdRsmaPrintSubmitReq(pSma, pReq); - } -#endif if ((terrno = qSetSMAInput(qTaskInfo, pMsg, msgSize, inputType)) < 0) { smaError("vgId:%d, rsma %" PRIi8 " qSetStreamInput failed since %s", SMA_VID(pSma), level, tstrerror(terrno)); return TSDB_CODE_FAILED; } - if (STREAM_INPUT__MERGED_SUBMIT == inputType) { - SPackedData *packData = POINTER_SHIFT(pMsg, sizeof(SPackedData) * (msgSize - 1)); - atomic_store_64(&pItem->submitReqVer, packData->ver); - } + atomic_store_64(&pItem->submitReqVer, version); terrno = tdRSmaExecAndSubmitResult(pSma, qTaskInfo, pItem, pInfo, STREAM_NORMAL, NULL); @@ -1515,6 +1511,7 @@ static int32_t tdRSmaBatchExec(SSma *pSma, SRSmaInfo *pInfo, STaosQall *qall, SA int8_t resume = 0; int32_t nSubmit = 0; int32_t nDelete = 0; + int64_t version = 0; SPackedData packData; @@ -1534,6 +1531,7 @@ static int32_t tdRSmaBatchExec(SSma *pSma, SRSmaInfo *pInfo, STaosQall *qall, SA packData.msgLen = RSMA_EXEC_MSG_LEN(msg); packData.ver = RSMA_EXEC_MSG_VER(msg); packData.msgStr = RSMA_EXEC_MSG_BODY(msg); + version = packData.ver; if (!taosArrayPush(pSubmitArr, &packData)) { taosFreeQitem(msg); terrno = TSDB_CODE_OUT_OF_MEMORY; @@ -1546,7 +1544,8 @@ static int32_t tdRSmaBatchExec(SSma *pSma, SRSmaInfo *pInfo, STaosQall *qall, SA break; } _resume_delete: - if ((terrno = extractDelDataBlock(RSMA_EXEC_MSG_BODY(msg), RSMA_EXEC_MSG_LEN(msg), RSMA_EXEC_MSG_VER(msg), + version = RSMA_EXEC_MSG_VER(msg); + if ((terrno = extractDelDataBlock(RSMA_EXEC_MSG_BODY(msg), RSMA_EXEC_MSG_LEN(msg), version, &packData.pDataBlock, 1))) { taosFreeQitem(msg); goto _err; @@ -1569,7 +1568,7 @@ static int32_t tdRSmaBatchExec(SSma *pSma, SRSmaInfo *pInfo, STaosQall *qall, SA ASSERTS(size > 0, "size is %d", size); int32_t inputType = nSubmit > 0 ? STREAM_INPUT__MERGED_SUBMIT : STREAM_INPUT__REF_DATA_BLOCK; for (int32_t i = 1; i <= TSDB_RETENTION_L2; ++i) { - if (tdExecuteRSmaImpl(pSma, pSubmitArr->pData, size, inputType, pInfo, type, i) < 0) { + if (tdExecuteRSmaImpl(pSma, pSubmitArr->pData, size, version, inputType, pInfo, type, i) < 0) { goto _err; } } diff --git a/source/dnode/vnode/src/sma/smaTimeRange.c b/source/dnode/vnode/src/sma/smaTimeRange.c index 08ddc4bd7b..94ff5ef6b3 100644 --- a/source/dnode/vnode/src/sma/smaTimeRange.c +++ b/source/dnode/vnode/src/sma/smaTimeRange.c @@ -188,7 +188,7 @@ int32_t smaBlockToSubmit(SVnode *pVnode, const SArray *pBlocks, const STSchema * if (pDataBlock->info.type == STREAM_DELETE_RESULT) { pDeleteReq->suid = suid; pDeleteReq->deleteReqs = taosArrayInit(0, sizeof(SSingleDeleteReq)); - tqBuildDeleteReq(stbFullName, pDataBlock, pDeleteReq, ""); + tqBuildDeleteReq(pVnode->pTq, stbFullName, pDataBlock, pDeleteReq, ""); continue; } diff --git a/source/dnode/vnode/src/tq/tqSink.c b/source/dnode/vnode/src/tq/tqSink.c index 4b64737936..80ce867e70 100644 --- a/source/dnode/vnode/src/tq/tqSink.c +++ b/source/dnode/vnode/src/tq/tqSink.c @@ -43,7 +43,7 @@ static SArray* createDefaultTagColName(); static void setCreateTableMsgTableName(SVCreateTbReq* pCreateTableReq, SSDataBlock* pDataBlock, const char* stbFullName, int64_t gid); -int32_t tqBuildDeleteReq(const char* stbFullName, const SSDataBlock* pDataBlock, SBatchDeleteReq* deleteReq, +int32_t tqBuildDeleteReq(STQ* pTq, const char* stbFullName, const SSDataBlock* pDataBlock, SBatchDeleteReq* deleteReq, const char* pIdStr) { int32_t totalRows = pDataBlock->info.rows; SColumnInfoData* pStartTsCol = taosArrayGet(pDataBlock->pDataBlock, START_TS_COLUMN_INDEX); @@ -53,30 +53,44 @@ int32_t tqBuildDeleteReq(const char* stbFullName, const SSDataBlock* pDataBlock, tqDebug("s-task:%s build %d rows delete msg for table:%s", pIdStr, totalRows, stbFullName); + char tbName[TSDB_TABLE_NAME_LEN + VARSTR_HEADER_SIZE]; for (int32_t row = 0; row < totalRows; row++) { int64_t skey = *(int64_t*)colDataGetData(pStartTsCol, row); int64_t ekey = *(int64_t*)colDataGetData(pEndTsCol, row); int64_t groupId = *(int64_t*)colDataGetData(pGidCol, row); - char* name; - void* varTbName = NULL; + char* name = NULL; + char* pName = NULL; + void* varTbName = NULL; + tbName[0] = '\0'; if (!colDataIsNull(pTbNameCol, totalRows, row, NULL)) { varTbName = colDataGetVarData(pTbNameCol, row); } if (varTbName != NULL && varTbName != (void*)-1) { - name = taosMemoryCalloc(1, TSDB_TABLE_NAME_LEN); - memcpy(name, varDataVal(varTbName), varDataLen(varTbName)); + name = varDataVal(varTbName); + } else if (stbFullName) { + pName = buildCtbNameByGroupId(stbFullName, groupId); + name = pName; } else { - name = buildCtbNameByGroupId(stbFullName, groupId); + metaGetTableNameByUid(pTq->pVnode, groupId, tbName); + name = varDataVal(tbName); } - tqDebug("s-task:%s build delete msg groupId:%" PRId64 ", name:%s, skey:%" PRId64 " ekey:%" PRId64, - pIdStr, groupId, name, skey, ekey); + if (!name || *name == '\0') { + tqError("s-task:%s build delete msg groupId:%" PRId64 ", skey:%" PRId64 " ekey:%" PRId64 + " failed since invalid tbname:%s", + pIdStr, groupId, name, skey, ekey, name ? name : "NULL"); + taosArrayDestroy(deleteReq->deleteReqs); + return -1; + } - SSingleDeleteReq req = { .startTs = skey, .endTs = ekey}; + tqDebug("s-task:%s build delete msg groupId:%" PRId64 ", name:%s, skey:%" PRId64 " ekey:%" PRId64, pIdStr, groupId, + name, skey, ekey); + + SSingleDeleteReq req = {.startTs = skey, .endTs = ekey}; strncpy(req.tbname, name, TSDB_TABLE_NAME_LEN - 1); - taosMemoryFree(name); + if (pName) taosMemoryFree(pName); taosArrayPush(deleteReq->deleteReqs, &req); } @@ -345,7 +359,7 @@ int32_t doBuildAndSendDeleteMsg(SVnode* pVnode, char* stbFullName, SSDataBlock* int64_t suid) { SBatchDeleteReq deleteReq = {.suid = suid, .deleteReqs = taosArrayInit(0, sizeof(SSingleDeleteReq))}; - int32_t code = tqBuildDeleteReq(stbFullName, pDataBlock, &deleteReq, pTask->id.idStr); + int32_t code = tqBuildDeleteReq(pVnode->pTq, stbFullName, pDataBlock, &deleteReq, pTask->id.idStr); if (code != TSDB_CODE_SUCCESS) { return code; } diff --git a/source/dnode/vnode/src/vnd/vnodeSvr.c b/source/dnode/vnode/src/vnd/vnodeSvr.c index 0eb6448fca..c219508cd0 100644 --- a/source/dnode/vnode/src/vnd/vnodeSvr.c +++ b/source/dnode/vnode/src/vnd/vnodeSvr.c @@ -412,10 +412,6 @@ static int32_t vnodePreProcessBatchDeleteMsg(SVnode *pVnode, SRpcMsg *pMsg) { int32_t code = 0; int32_t lino = 0; - if (pMsg->info.wrapper) { // skip for rsma - return code; - } - int64_t ctimeMs = taosGetTimestampMs(); SBatchDeleteReq pReq = {0}; SDecoder *pCoder = &(SDecoder){0}; @@ -1890,6 +1886,13 @@ static int32_t vnodeProcessBatchDeleteReq(SVnode *pVnode, int64_t ver, void *pRe SMetaReader mr = {0}; metaReaderDoInit(&mr, pVnode->pMeta, META_READER_NOLOCK); + STsdb *pTsdb = pVnode->pTsdb; + + if (deleteReq.level == 1) { + pTsdb = VND_RSMA1(pVnode); + } else { + pTsdb = VND_RSMA2(pVnode); + } int32_t sz = taosArrayGetSize(deleteReq.deleteReqs); for (int32_t i = 0; i < sz; i++) { @@ -1902,22 +1905,22 @@ static int32_t vnodeProcessBatchDeleteReq(SVnode *pVnode, int64_t ver, void *pRe int64_t uid = mr.me.uid; - int32_t code = tsdbDeleteTableData(pVnode->pTsdb, ver, deleteReq.suid, uid, pOneReq->startTs, pOneReq->endTs); + int32_t code = tsdbDeleteTableData(pTsdb, ver, deleteReq.suid, uid, pOneReq->startTs, pOneReq->endTs); if (code < 0) { terrno = code; vError("vgId:%d, delete error since %s, suid:%" PRId64 ", uid:%" PRId64 ", start ts:%" PRId64 ", end ts:%" PRId64, TD_VID(pVnode), terrstr(), deleteReq.suid, uid, pOneReq->startTs, pOneReq->endTs); } - if() - code = metaUpdateChangeTimeWithLock(pVnode->pMeta, uid, deleteReq.ctimeMs); - if (code < 0) { - terrno = code; - vError("vgId:%d, update change time error since %s, suid:%" PRId64 ", uid:%" PRId64 ", start ts:%" PRId64 - ", end ts:%" PRId64, - TD_VID(pVnode), terrstr(), deleteReq.suid, uid, pOneReq->startTs, pOneReq->endTs); + if (deleteReq.level == 0) { + code = metaUpdateChangeTimeWithLock(pVnode->pMeta, uid, deleteReq.ctimeMs); + if (code < 0) { + terrno = code; + vError("vgId:%d, update change time error since %s, suid:%" PRId64 ", uid:%" PRId64 ", start ts:%" PRId64 + ", end ts:%" PRId64, + TD_VID(pVnode), terrstr(), deleteReq.suid, uid, pOneReq->startTs, pOneReq->endTs); + } } - tDecoderClear(&mr.coder); } metaReaderClear(&mr); diff --git a/source/libs/executor/src/scanoperator.c b/source/libs/executor/src/scanoperator.c index c47e14ad0d..c7f0bb3895 100644 --- a/source/libs/executor/src/scanoperator.c +++ b/source/libs/executor/src/scanoperator.c @@ -2105,6 +2105,12 @@ FETCH_NEXT_BLOCK: SPackedData* pPacked = taosArrayGet(pInfo->pBlockLists, current); SSDataBlock* pBlock = pPacked->pDataBlock; + + if(!pBlock) { + doClearBufferedBlocks(pInfo); + return NULL; + } + if (pBlock->info.parTbName[0]) { pAPI->stateStore.streamStatePutParName(pStreamInfo->pState, pBlock->info.id.groupId, pBlock->info.parTbName); } diff --git a/tests/parallel_test/cases.task b/tests/parallel_test/cases.task index d482e0fde9..3a1b935ee6 100644 --- a/tests/parallel_test/cases.task +++ b/tests/parallel_test/cases.task @@ -1180,6 +1180,7 @@ e ,,y,script,./test.sh -f tsim/sma/sma_leak.sim ,,y,script,./test.sh -f tsim/sma/tsmaCreateInsertQuery.sim ,,y,script,./test.sh -f tsim/sma/rsmaCreateInsertQuery.sim +,,y,script,./test.sh -f tsim/sma/rsmaCreateInsertQueryDelete.sim ,,y,script,./test.sh -f tsim/sma/rsmaPersistenceRecovery.sim ,,y,script,./test.sh -f tsim/sync/vnodesnapshot-rsma-test.sim ,,n,script,./test.sh -f tsim/valgrind/checkError1.sim diff --git a/tests/script/tsim/sma/rsmaCreateInsertQueryDelete.sim b/tests/script/tsim/sma/rsmaCreateInsertQueryDelete.sim new file mode 100644 index 0000000000..5f5c840eb6 --- /dev/null +++ b/tests/script/tsim/sma/rsmaCreateInsertQueryDelete.sim @@ -0,0 +1,446 @@ +system sh/stop_dnodes.sh +system sh/deploy.sh -n dnode1 -i 1 +system sh/exec.sh -n dnode1 -s start +sleep 50 +sql connect + +print =============== create database with retentions +sql create database d0 retentions -:7d,10s:21d,15s:365d; +sql use d0 + +print =============== create super table and register rsma +sql create table if not exists stb (ts timestamp, c1 float, c2 double) tags (city binary(20),district binary(20)) rollup(sum) max_delay 1s,1s; + +sql show stables +if $rows != 1 then + return -1 +endi + +print =============== create child table +sql create table ct1 using stb tags("BeiJing", "ChaoYang"); + +sql show tables +if $rows != 1 then + return -1 +endi + +print =============== insert data and trigger rollup +sql insert into ct1 values(now, 10, NULL); +sql insert into ct1 values(now+60m, 1, NULL); +sql insert into ct1 values(now+120m, 100, NULL); + +print =============== wait 5 seconds for results +sleep 5000 + +print =============== select * from retention level 2 from memory +sql select * from ct1; +print $data00 $data01 $data02 +print $data10 $data11 $data12 +print $data20 $data21 $data22 +if $rows != 3 then + print retention level 2 file rows $rows != 3 + return -1 +endi + +if $data01 != 10.00000 then + return -1 +endi +if $data02 != NULL then + return -1 +endi +if $data11 != 1.00000 then + return -1 +endi +if $data12 != NULL then + return -1 +endi +if $data21 != 100.00000 then + return -1 +endi +if $data22 != NULL then + return -1 +endi + +print =============== select * from retention level 1 from memory +sql select * from ct1 where ts > now-8d; +print $data00 $data01 $data02 +print $data10 $data11 $data12 +print $data20 $data21 $data22 + +if $rows != 3 then + print retention level 2 file rows $rows != 3 + return -1 +endi + +if $data01 != 10.00000 then + return -1 +endi +if $data02 != NULL then + return -1 +endi +if $data11 != 1.00000 then + return -1 +endi +if $data12 != NULL then + return -1 +endi +if $data21 != 100.00000 then + return -1 +endi +if $data22 != NULL then + return -1 +endi + +print =============== select * from retention level 0 from memory +sql select * from ct1 where ts > now-3d; +print $data00 $data01 $data02 +print $data10 $data11 $data12 +print $data20 $data21 $data22 + +if $rows != 3 then + print retention level 2 file rows $rows != 3 + return -1 +endi + +if $data01 != 10.00000 then + return -1 +endi +if $data02 != NULL then + return -1 +endi +if $data11 != 1.00000 then + return -1 +endi +if $data12 != NULL then + return -1 +endi +if $data21 != 100.00000 then + return -1 +endi +if $data22 != NULL then + return -1 +endi + +print =============== delete row 0 +sql delete from ct1 where ts < now; +sql delete from ct1 where ts < now; +sql delete from ct1 where ts < now; +print =============== wait 5 seconds for results +sleep 5000 + +print =============== select * from retention level 2 from memory after delete row 0 +sql select * from ct1; +print $data00 $data01 $data02 +print $data10 $data11 $data12 + +if $rows != 2 then + print retention level 2 file rows $rows != 2 + return -1 +endi + +if $data01 != 1.00000 then + return -1 +endi +if $data02 != NULL then + return -1 +endi +if $data11 != 100.00000 then + return -1 +endi +if $data12 != NULL then + return -1 +endi + +print =============== select * from retention level 1 from memory after delete row 0 +sql select * from ct1 where ts > now-8d; +print $data00 $data01 $data02 +print $data10 $data11 $data12 + +if $rows != 2 then + print retention level 2 file rows $rows != 2 + return -1 +endi + +if $data01 != 1.00000 then + return -1 +endi +if $data02 != NULL then + return -1 +endi +if $data11 != 100.00000 then + return -1 +endi +if $data12 != NULL then + return -1 +endi + +print =============== select * from retention level 0 from memory after delete row 0 +sql select * from ct1 where ts > now-3d; +print $data00 $data01 $data02 +print $data10 $data11 $data12 + +if $rows != 2 then + print retention level 2 file rows $rows != 2 + return -1 +endi + +if $data01 != 1.00000 then + return -1 +endi +if $data02 != NULL then + return -1 +endi +if $data11 != 100.00000 then + return -1 +endi +if $data12 != NULL then + return -1 +endi + +print =============== delete row 1 +sql delete from ct1 where ts < now; +sql delete from ct1 where ts < now; +sql delete from ct1 where ts < now + 60m; +sql delete from ct1 where ts < now + 60m; +sql delete from ct1 where ts < now + 60m; +sql delete from ct1 where ts < now + 60m; +sql delete from ct1 where ts < now + 60m; + +print =============== wait 5 seconds for results +sleep 5000 + +print =============== select * from retention level 2 from memory after delete row 1 +sql select * from ct1; +print $data00 $data01 $data02 + +if $rows != 1 then + print retention level 2 file rows $rows != 1 + return -1 +endi + +if $data01 != 100.00000 then + return -1 +endi +if $data02 != NULL then + return -1 +endi + +print =============== select * from retention level 1 from memory after delete row 1 +sql select * from ct1 where ts > now-8d; +print $data00 $data01 $data02 + +if $rows != 1 then + print retention level 2 file rows $rows != 1 + return -1 +endi + +if $data01 != 100.00000 then + return -1 +endi +if $data02 != NULL then + return -1 +endi + +print =============== select * from retention level 0 from memory after delete row 1 +sql select * from ct1 where ts > now-3d; +print $data00 $data01 $data02 + +if $rows != 1 then + print retention level 2 file rows $rows != 1 + return -1 +endi + +if $data01 != 100.00000 then + return -1 +endi +if $data02 != NULL then + return -1 +endi + +#=================================================================== +system sh/exec.sh -n dnode1 -s stop -x SIGINT +system sh/exec.sh -n dnode1 -s start +print =============== wait 5 seconds for results after reboot +sleep 5000 + +print =============== select * from retention level 2 from memory after reboot +sql select * from ct1; +print $data00 $data01 $data02 + +if $rows != 1 then + print retention level 2 file rows $rows != 1 + return -1 +endi + +if $data01 != 100.00000 then + return -1 +endi +if $data02 != NULL then + return -1 +endi + +print =============== select * from retention level 1 from memory after reboot +sql select * from ct1 where ts > now-8d; +print $data00 $data01 $data02 + +if $rows != 1 then + print retention level 2 file rows $rows != 1 + return -1 +endi + +if $data01 != 100.00000 then + return -1 +endi +if $data02 != NULL then + return -1 +endi + +print =============== select * from retention level 0 from memory after reboot +sql select * from ct1 where ts > now-3d; +print $data00 $data01 $data02 + +if $rows != 1 then + print retention level 2 file rows $rows != 1 + return -1 +endi + +if $data01 != 100.00000 then + return -1 +endi +if $data02 != NULL then + return -1 +endi + +#==================== flush database to trigger commit data to file +sql flush database d0; +system sh/exec.sh -n dnode1 -s stop -x SIGINT +system sh/exec.sh -n dnode1 -s start + +print =============== select * from retention level 2 from file +sql select * from ct1 where ts > now-365d; +print $data00 $data01 $data02 + +if $rows != 1 then + print retention level 2 file rows $rows != 1 + return -1 +endi + +if $data01 != 100.00000 then + return -1 +endi +if $data02 != NULL then + return -1 +endi + +print =============== select * from retention level 1 from file +sql select * from ct1 where ts > now-8d; +print $data00 $data01 $data02 + +if $rows != 1 then + print retention level 2 file rows $rows != 1 + return -1 +endi + +if $data01 != 100.00000 then + return -1 +endi +if $data02 != NULL then + return -1 +endi + +print =============== select * from retention level 0 from file +sql select * from ct1 where ts > now-3d; +print $data00 $data01 $data02 + +if $rows != 1 then + print retention level 2 file rows $rows != 1 + return -1 +endi + +if $data01 != 100.00000 then + return -1 +endi +if $data02 != NULL then + return -1 +endi + +print =============== delete row 2 +sql delete from ct1 where ts < now; +sql delete from ct1 where ts < now; +sql delete from ct1 where ts < now + 60m; +sql delete from ct1 where ts < now + 60m; +sql delete from ct1 where ts < now + 60m; +sql delete from ct1 where ts < now + 60m; +sql delete from ct1 where ts < now + 60m; +sql delete from ct1 where ts < now + 120m; +sql delete from ct1 where ts < now + 200m; +sql delete from ct1 where ts < now + 300m; +sql delete from ct1 where ts < now + 60m; +sql delete from ct1 where ts < now; + +print =============== wait 5 seconds for results +sleep 5000 + +print =============== select * from retention level 2 from memory after delete row 2 +sql select * from ct1; +print $data00 $data01 $data02 + +if $rows != 0 then + print retention level 2 file rows $rows != 0 + return -1 +endi + +print =============== select * from retention level 1 from memory after delete row 2 +sql select * from ct1 where ts > now-8d; +print $data00 $data01 $data02 + +if $rows != 0 then + print retention level 2 file rows $rows != 0 + return -1 +endi + +print =============== select * from retention level 0 from memory after delete row 2 +sql select * from ct1 where ts > now-3d; +print $data00 $data01 $data02 + +if $rows != 0 then + print retention level 2 file rows $rows != 0 + return -1 +endi + +#=================================================================== +system sh/exec.sh -n dnode1 -s stop -x SIGINT +system sh/exec.sh -n dnode1 -s start +print =============== wait 5 seconds for results after reboot +sleep 5000 + +print =============== select * from retention level 2 from memory after delete row 2 +sql select * from ct1; +print $data00 $data01 $data02 + +if $rows != 0 then + print retention level 2 file rows $rows != 0 + return -1 +endi + +print =============== select * from retention level 1 from memory after delete row 2 +sql select * from ct1 where ts > now-8d; +print $data00 $data01 $data02 + +if $rows != 0 then + print retention level 2 file rows $rows != 0 + return -1 +endi + +print =============== select * from retention level 0 from memory after delete row 2 +sql select * from ct1 where ts > now-3d; +print $data00 $data01 $data02 + +if $rows != 0 then + print retention level 2 file rows $rows != 0 + return -1 +endi + +#=================================================================== +system sh/exec.sh -n dnode1 -s stop -x SIGINT +#=================================================================== \ No newline at end of file diff --git a/tests/script/tsim/testsuit.sim b/tests/script/tsim/testsuit.sim index 0abe56ab3c..c208a07488 100644 --- a/tests/script/tsim/testsuit.sim +++ b/tests/script/tsim/testsuit.sim @@ -130,5 +130,6 @@ run tsim/sync/3Replica1VgElect.sim run tsim/sync/threeReplica1VgElectWihtInsert.sim run tsim/sma/tsmaCreateInsertQuery.sim run tsim/sma/rsmaCreateInsertQuery.sim +run tsim/sma/rsmaCreateInsertQueryDelete.sim run tsim/valgrind/basic.sim run tsim/valgrind/checkError.sim \ No newline at end of file diff --git a/tests/script/win-test-file b/tests/script/win-test-file index fe5f5c39e3..b2d50ade8a 100644 --- a/tests/script/win-test-file +++ b/tests/script/win-test-file @@ -319,6 +319,7 @@ ./test.sh -f tsim/sma/sma_leak.sim ./test.sh -f tsim/sma/tsmaCreateInsertQuery.sim ./test.sh -f tsim/sma/rsmaCreateInsertQuery.sim +./test.sh -f tsim/sma/rsmaCreateInsertQueryDelete.sim ./test.sh -f tsim/sma/rsmaPersistenceRecovery.sim ./test.sh -f tsim/sync/vnodesnapshot-rsma-test.sim ./test.sh -f tsim/valgrind/checkError1.sim From 2dc92188df86fc63281ada7b79f8e584709a624d Mon Sep 17 00:00:00 2001 From: dapan1121 <72057773+dapan1121@users.noreply.github.com> Date: Fri, 10 Nov 2023 15:00:47 +0800 Subject: [PATCH 36/79] Update 10-function.md --- docs/en/12-taos-sql/10-function.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/12-taos-sql/10-function.md b/docs/en/12-taos-sql/10-function.md index 3cf75f15a5..4f8ccc418b 100644 --- a/docs/en/12-taos-sql/10-function.md +++ b/docs/en/12-taos-sql/10-function.md @@ -565,7 +565,7 @@ TO_TIMESTAMP(ts_str_literal, format_str_literal) - The uppercase or lowercase of `MONTH`, `MON`, `DAY`, `DY` and formtas that output digits have same effect when used in `to_timestamp`, like `to_timestamp('2023-JANUARY-01', 'YYYY-month-dd')`, `month` can be replaced by `MONTH`, or `month`. The cases are ignored. - If multi times are specified for one component, the previous will be overwritten. Like `to_timestamp('2023-22-10-10', 'yyyy-yy-MM-dd')`, the output year will be `2022`. - To avoid unexpected time zone used during the convertion, it's recommended to put time zone in the ts string, e.g. '2023-10-10 10:10:10+08'. If time zone not specified, default will be that in server or client. -- The default timestamp if some components are not specified will be: `1970-01-01 00:00:00` with the timezone specified or default to local timezone. If `DDD` is specified without `DD`, server will report `not supported` error, e.g. format 'yyyy-mm-ddd' is not supported currently, specify month day instead. +- The default timestamp if some components are not specified will be: `1970-01-01 00:00:00` with the timezone specified or default to local timezone. Only `DDD` is specified without `DD` is not supported currently, e.g. format 'yyyy-mm-ddd' is not supported, but 'yyyy-mm-dd' is supported. - If `AM` or `PM` is specified in formats, the Hour must between `1-12`. - In some cases, `to_timestamp` can convert correctly even the format and the timestamp string are not totally matched. Like `to_timetamp('200101/2', 'yyyyMM1/dd')`, the digit `1` in format string are ignored, and the output timestsamp is `2001-01-02 00:00:00`. Spaces and tabs in formats and tiemstamp string are also ignored automatically. - The precision of the output timestamp will be the same as the table in SELECT stmt, millisecond will be used if no table is specified. The output of `select to_timestamp('2023-08-1 10:10:10.123456789', 'yyyy-mm-dd hh:mi:ss.ns')` will be truncated to millisecond precision. If a nano precision table is specified, no truncation will be applied. Like `select to_timestamp('2023-08-1 10:10:10.123456789', 'yyyy-mm-dd hh:mi:ss.ns') from db_ns.table_ns limit 1`. From 6a09e40b4d019ac407d788afcec937a7df21f573 Mon Sep 17 00:00:00 2001 From: dapan1121 <72057773+dapan1121@users.noreply.github.com> Date: Fri, 10 Nov 2023 15:04:29 +0800 Subject: [PATCH 37/79] Update ttime.c --- source/common/src/ttime.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/common/src/ttime.c b/source/common/src/ttime.c index ec2202f299..a701c88a24 100644 --- a/source/common/src/ttime.c +++ b/source/common/src/ttime.c @@ -1905,7 +1905,7 @@ int32_t taosChar2Ts(const char* format, SArray** formats, const char* tsStr, int snprintf(errMsg, errMsgLen, "timestamp format error: %s -> %s", tsStr, format); code = TSDB_CODE_FUNC_TO_TIMESTAMP_FAILED_TS_ERR; } else if (code == -3) { - snprintf(errMsg, errMsgLen, "not supported currently"); + snprintf(errMsg, errMsgLen, "timestamp format not supported"); code = TSDB_CODE_FUNC_TO_TIMESTAMP_FAILED_NOT_SUPPORTED; } return code; From ea93915eaa037300b97564755e7dbdf45d63f4aa Mon Sep 17 00:00:00 2001 From: dapan1121 <72057773+dapan1121@users.noreply.github.com> Date: Fri, 10 Nov 2023 15:06:04 +0800 Subject: [PATCH 38/79] Update terror.c --- source/util/src/terror.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/source/util/src/terror.c b/source/util/src/terror.c index a957ff4b29..bcdbb3e3ac 100644 --- a/source/util/src/terror.c +++ b/source/util/src/terror.c @@ -613,9 +613,9 @@ TAOS_DEFINE_ERROR(TSDB_CODE_FUNC_FUNTION_PARA_TYPE, "Invalid function par TAOS_DEFINE_ERROR(TSDB_CODE_FUNC_FUNTION_PARA_VALUE, "Invalid function para value") TAOS_DEFINE_ERROR(TSDB_CODE_FUNC_NOT_BUILTIN_FUNTION, "Not buildin function") TAOS_DEFINE_ERROR(TSDB_CODE_FUNC_DUP_TIMESTAMP, "Duplicate timestamps not allowed in function") -TAOS_DEFINE_ERROR(TSDB_CODE_FUNC_TO_TIMESTAMP_FAILED_FORMAT_ERR, "Func to_timestamp failed, format mismatch") -TAOS_DEFINE_ERROR(TSDB_CODE_FUNC_TO_TIMESTAMP_FAILED_TS_ERR, "Func to_timestamp failed, wrong timestamp") -TAOS_DEFINE_ERROR(TSDB_CODE_FUNC_TO_TIMESTAMP_FAILED_NOT_SUPPORTED, "Func to_timestamp failed, not supported yet") +TAOS_DEFINE_ERROR(TSDB_CODE_FUNC_TO_TIMESTAMP_FAILED_FORMAT_ERR, "Func to_timestamp failed for format mismatch") +TAOS_DEFINE_ERROR(TSDB_CODE_FUNC_TO_TIMESTAMP_FAILED_TS_ERR, "Func to_timestamp failed for wrong timestamp") +TAOS_DEFINE_ERROR(TSDB_CODE_FUNC_TO_TIMESTAMP_FAILED_NOT_SUPPORTED, "Func to_timestamp failed for unsupported timestamp format") //udf TAOS_DEFINE_ERROR(TSDB_CODE_UDF_STOPPING, "udf is stopping") From 5fae10bc760f9758b48baeab1dedd2106327ebcf Mon Sep 17 00:00:00 2001 From: kailixu Date: Fri, 10 Nov 2023 15:09:01 +0800 Subject: [PATCH 39/79] enh: error code when build delete req --- source/common/src/tdatablock.c | 3 +++ source/common/src/tname.c | 5 ++++- source/dnode/vnode/src/sma/smaTimeRange.c | 3 ++- source/dnode/vnode/src/tq/tqSink.c | 15 ++++++++++----- 4 files changed, 19 insertions(+), 7 deletions(-) diff --git a/source/common/src/tdatablock.c b/source/common/src/tdatablock.c index bf21b2eda0..054cff560f 100644 --- a/source/common/src/tdatablock.c +++ b/source/common/src/tdatablock.c @@ -2121,6 +2121,7 @@ _end: char* buildCtbNameByGroupId(const char* stbFullName, uint64_t groupId) { char* pBuf = taosMemoryCalloc(1, TSDB_TABLE_NAME_LEN + 1); if (!pBuf) { + terrno = TSDB_CODE_OUT_OF_MEMORY; return NULL; } int32_t code = buildCtbNameByGroupIdImpl(stbFullName, groupId, pBuf); @@ -2133,6 +2134,7 @@ char* buildCtbNameByGroupId(const char* stbFullName, uint64_t groupId) { int32_t buildCtbNameByGroupIdImpl(const char* stbFullName, uint64_t groupId, char* cname) { if (stbFullName[0] == 0) { + terrno = TSDB_CODE_INVALID_PARA; return TSDB_CODE_FAILED; } @@ -2142,6 +2144,7 @@ int32_t buildCtbNameByGroupIdImpl(const char* stbFullName, uint64_t groupId, cha } if (cname == NULL) { + terrno = TSDB_CODE_INVALID_PARA; taosArrayDestroy(tags); return TSDB_CODE_FAILED; } diff --git a/source/common/src/tname.c b/source/common/src/tname.c index c6210ca8c9..4fe2beb6aa 100644 --- a/source/common/src/tname.c +++ b/source/common/src/tname.c @@ -296,7 +296,10 @@ static int compareKv(const void* p1, const void* p2) { void buildChildTableName(RandTableName* rName) { SStringBuilder sb = {0}; taosStringBuilderAppendStringLen(&sb, rName->stbFullName, rName->stbFullNameLen); - if (sb.buf == NULL) return; + if (sb.buf == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + return; + } taosArraySort(rName->tags, compareKv); for (int j = 0; j < taosArrayGetSize(rName->tags); ++j) { taosStringBuilderAppendChar(&sb, ','); diff --git a/source/dnode/vnode/src/sma/smaTimeRange.c b/source/dnode/vnode/src/sma/smaTimeRange.c index 94ff5ef6b3..289986e01f 100644 --- a/source/dnode/vnode/src/sma/smaTimeRange.c +++ b/source/dnode/vnode/src/sma/smaTimeRange.c @@ -188,7 +188,8 @@ int32_t smaBlockToSubmit(SVnode *pVnode, const SArray *pBlocks, const STSchema * if (pDataBlock->info.type == STREAM_DELETE_RESULT) { pDeleteReq->suid = suid; pDeleteReq->deleteReqs = taosArrayInit(0, sizeof(SSingleDeleteReq)); - tqBuildDeleteReq(pVnode->pTq, stbFullName, pDataBlock, pDeleteReq, ""); + code = tqBuildDeleteReq(pVnode->pTq, stbFullName, pDataBlock, pDeleteReq, ""); + TSDB_CHECK_CODE(code, lino, _exit); continue; } diff --git a/source/dnode/vnode/src/tq/tqSink.c b/source/dnode/vnode/src/tq/tqSink.c index 80ce867e70..cf77679478 100644 --- a/source/dnode/vnode/src/tq/tqSink.c +++ b/source/dnode/vnode/src/tq/tqSink.c @@ -45,6 +45,7 @@ static void setCreateTableMsgTableName(SVCreateTbReq* pCreateTableReq, SSData int32_t tqBuildDeleteReq(STQ* pTq, const char* stbFullName, const SSDataBlock* pDataBlock, SBatchDeleteReq* deleteReq, const char* pIdStr) { + int32_t code = 0; int32_t totalRows = pDataBlock->info.rows; SColumnInfoData* pStartTsCol = taosArrayGet(pDataBlock->pDataBlock, START_TS_COLUMN_INDEX); SColumnInfoData* pEndTsCol = taosArrayGet(pDataBlock->pDataBlock, END_TS_COLUMN_INDEX); @@ -73,16 +74,20 @@ int32_t tqBuildDeleteReq(STQ* pTq, const char* stbFullName, const SSDataBlock* p pName = buildCtbNameByGroupId(stbFullName, groupId); name = pName; } else { - metaGetTableNameByUid(pTq->pVnode, groupId, tbName); - name = varDataVal(tbName); + if (metaGetTableNameByUid(pTq->pVnode, groupId, tbName) == 0) { + name = varDataVal(tbName); + } else { + terrno = TSDB_CODE_OUT_OF_MEMORY; + } } if (!name || *name == '\0') { tqError("s-task:%s build delete msg groupId:%" PRId64 ", skey:%" PRId64 " ekey:%" PRId64 " failed since invalid tbname:%s", - pIdStr, groupId, name, skey, ekey, name ? name : "NULL"); + pIdStr, groupId, skey, ekey, name ? name : "NULL"); taosArrayDestroy(deleteReq->deleteReqs); - return -1; + code = terrno ? terrno : TSDB_CODE_APP_ERROR; + return code; } tqDebug("s-task:%s build delete msg groupId:%" PRId64 ", name:%s, skey:%" PRId64 " ekey:%" PRId64, pIdStr, groupId, @@ -95,7 +100,7 @@ int32_t tqBuildDeleteReq(STQ* pTq, const char* stbFullName, const SSDataBlock* p taosArrayPush(deleteReq->deleteReqs, &req); } - return 0; + return code; } static int32_t encodeCreateChildTableForRPC(SVCreateTbBatchReq* pReqs, int32_t vgId, void** pBuf, int32_t* contLen) { From b1bca4e1e2e89fddfd2023020679d0f69d93ddff Mon Sep 17 00:00:00 2001 From: kailixu Date: Fri, 10 Nov 2023 15:12:10 +0800 Subject: [PATCH 40/79] fix: batch delete logic --- source/dnode/vnode/src/vnd/vnodeSvr.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/source/dnode/vnode/src/vnd/vnodeSvr.c b/source/dnode/vnode/src/vnd/vnodeSvr.c index d0c0e5f65e..29db7d602f 100644 --- a/source/dnode/vnode/src/vnd/vnodeSvr.c +++ b/source/dnode/vnode/src/vnd/vnodeSvr.c @@ -1893,10 +1893,8 @@ static int32_t vnodeProcessBatchDeleteReq(SVnode *pVnode, int64_t ver, void *pRe metaReaderDoInit(&mr, pVnode->pMeta, META_READER_NOLOCK); STsdb *pTsdb = pVnode->pTsdb; - if (deleteReq.level == 1) { - pTsdb = VND_RSMA1(pVnode); - } else { - pTsdb = VND_RSMA2(pVnode); + if (deleteReq.level) { + pTsdb = deleteReq.level == 1 ? VND_RSMA1(pVnode) : VND_RSMA2(pVnode); } int32_t sz = taosArrayGetSize(deleteReq.deleteReqs); From d5d8568bad8986252cd3208f6e4ad44e6324344f Mon Sep 17 00:00:00 2001 From: kailixu Date: Fri, 10 Nov 2023 15:30:49 +0800 Subject: [PATCH 41/79] fix: skip delete msg affects 0 row --- source/dnode/vnode/src/sma/smaRollup.c | 8 ++++++-- source/libs/executor/src/scanoperator.c | 5 ----- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/source/dnode/vnode/src/sma/smaRollup.c b/source/dnode/vnode/src/sma/smaRollup.c index 99f5283df3..6d2f76bbe9 100644 --- a/source/dnode/vnode/src/sma/smaRollup.c +++ b/source/dnode/vnode/src/sma/smaRollup.c @@ -1550,13 +1550,17 @@ static int32_t tdRSmaBatchExec(SSma *pSma, SRSmaInfo *pInfo, STaosQall *qall, SA taosFreeQitem(msg); goto _err; } - if (!taosArrayPush(pSubmitArr, &packData)) { + + if (packData.pDataBlock && !taosArrayPush(pSubmitArr, &packData)) { taosFreeQitem(msg); terrno = TSDB_CODE_OUT_OF_MEMORY; goto _err; } taosFreeQitem(msg); - ++nDelete; + if (packData.pDataBlock) { + // packData.pDataBlock is NULL if delete affects 0 row + ++nDelete; + } } else { ASSERTS(0, "unknown msg type:%d", inputType); break; diff --git a/source/libs/executor/src/scanoperator.c b/source/libs/executor/src/scanoperator.c index c7f0bb3895..08138e7700 100644 --- a/source/libs/executor/src/scanoperator.c +++ b/source/libs/executor/src/scanoperator.c @@ -2106,11 +2106,6 @@ FETCH_NEXT_BLOCK: SPackedData* pPacked = taosArrayGet(pInfo->pBlockLists, current); SSDataBlock* pBlock = pPacked->pDataBlock; - if(!pBlock) { - doClearBufferedBlocks(pInfo); - return NULL; - } - if (pBlock->info.parTbName[0]) { pAPI->stateStore.streamStatePutParName(pStreamInfo->pState, pBlock->info.id.groupId, pBlock->info.parTbName); } From 57f7ff5b9c84b64f1bb7a9ab679934f15e9cd5f9 Mon Sep 17 00:00:00 2001 From: kailixu Date: Fri, 10 Nov 2023 15:34:41 +0800 Subject: [PATCH 42/79] chore: revert the code change --- source/libs/executor/src/scanoperator.c | 1 - 1 file changed, 1 deletion(-) diff --git a/source/libs/executor/src/scanoperator.c b/source/libs/executor/src/scanoperator.c index 08138e7700..c47e14ad0d 100644 --- a/source/libs/executor/src/scanoperator.c +++ b/source/libs/executor/src/scanoperator.c @@ -2105,7 +2105,6 @@ FETCH_NEXT_BLOCK: SPackedData* pPacked = taosArrayGet(pInfo->pBlockLists, current); SSDataBlock* pBlock = pPacked->pDataBlock; - if (pBlock->info.parTbName[0]) { pAPI->stateStore.streamStatePutParName(pStreamInfo->pState, pBlock->info.id.groupId, pBlock->info.parTbName); } From a8de040af21001ad4054bf2b691d8bce5b158e20 Mon Sep 17 00:00:00 2001 From: dapan1121 Date: Fri, 10 Nov 2023 16:13:23 +0800 Subject: [PATCH 43/79] fix: remove non sort usage for now --- source/libs/planner/src/planOptimizer.c | 4 ++-- source/libs/planner/src/planSpliter.c | 6 ++---- source/libs/planner/src/planValidator.c | 3 +++ 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/source/libs/planner/src/planOptimizer.c b/source/libs/planner/src/planOptimizer.c index 99fed47b92..4eda11a6a4 100644 --- a/source/libs/planner/src/planOptimizer.c +++ b/source/libs/planner/src/planOptimizer.c @@ -2878,7 +2878,7 @@ static int32_t splitCacheLastFuncOptimize(SOptimizeContext* pCxt, SLogicSubplan* } if (NULL == pAggFuncList) { - planError("empty agg func list while splite projections"); + planError("empty agg func list while splite projections, funcNum:%d", pAgg->pAggFuncs->length); return TSDB_CODE_PLAN_INTERNAL_ERROR; } @@ -2905,7 +2905,7 @@ static int32_t splitCacheLastFuncOptimize(SOptimizeContext* pCxt, SLogicSubplan* } if (NULL == pTargets) { - planError("empty target func list while splite projections"); + planError("empty target func list while splite projections, targetsNum:%d", pAgg->node.pTargets->length); nodesDestroyList(pAggFuncList); return TSDB_CODE_PLAN_INTERNAL_ERROR; } diff --git a/source/libs/planner/src/planSpliter.c b/source/libs/planner/src/planSpliter.c index fd6706e43e..43bd8a5589 100644 --- a/source/libs/planner/src/planSpliter.c +++ b/source/libs/planner/src/planSpliter.c @@ -1194,7 +1194,7 @@ static int32_t stbSplSplitScanNodeWithPartTags(SSplitContext* pCxt, SStableSplit SLogicNode* pSplitNode = NULL; int32_t code = stbSplGetSplitNodeForScan(pInfo, &pSplitNode); if (TSDB_CODE_SUCCESS == code) { - code = stbSplCreateMergeNode(pCxt, pInfo->pSubplan, pSplitNode, NULL, pSplitNode, true, pSplitNode->requireDataOrder >= DATA_ORDER_LEVEL_IN_GROUP); + code = stbSplCreateMergeNode(pCxt, pInfo->pSubplan, pSplitNode, NULL, pSplitNode, true, true); } if (TSDB_CODE_SUCCESS == code) { code = nodesListMakeStrictAppend(&pInfo->pSubplan->pChildren, @@ -1339,14 +1339,12 @@ static int32_t stbSplCreateMergeKeysForPartitionNode(SLogicNode* pPart, SNodeLis static int32_t stbSplSplitPartitionNode(SSplitContext* pCxt, SStableSplitInfo* pInfo) { int32_t code = TSDB_CODE_SUCCESS; - bool needSort = false; SNodeList* pMergeKeys = NULL; if (pInfo->pSplitNode->requireDataOrder >= DATA_ORDER_LEVEL_IN_GROUP) { - needSort = true; code = stbSplCreateMergeKeysForPartitionNode(pInfo->pSplitNode, &pMergeKeys); } if (TSDB_CODE_SUCCESS == code) { - code = stbSplCreateMergeNode(pCxt, pInfo->pSubplan, pInfo->pSplitNode, pMergeKeys, pInfo->pSplitNode, true, needSort); + code = stbSplCreateMergeNode(pCxt, pInfo->pSubplan, pInfo->pSplitNode, pMergeKeys, pInfo->pSplitNode, true, true); } if (TSDB_CODE_SUCCESS == code) { code = nodesListMakeStrictAppend(&pInfo->pSubplan->pChildren, diff --git a/source/libs/planner/src/planValidator.c b/source/libs/planner/src/planValidator.c index 7461ee4f9a..66e7defc1d 100755 --- a/source/libs/planner/src/planValidator.c +++ b/source/libs/planner/src/planValidator.c @@ -154,6 +154,9 @@ int32_t validateQueryPlan(SPlanContext* pCxt, SQueryPlan* pPlan) { break; } } + if (code) { + break; + } } destoryValidatePlanContext(&cxt); From 65870ae117095f0bc6a13a75a3d10147648ec44c Mon Sep 17 00:00:00 2001 From: dmchen Date: Fri, 10 Nov 2023 08:23:09 +0000 Subject: [PATCH 44/79] TS-4249 --- source/dnode/vnode/src/vnd/vnodeSvr.c | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/source/dnode/vnode/src/vnd/vnodeSvr.c b/source/dnode/vnode/src/vnd/vnodeSvr.c index eadfd39d0b..6a20d4a184 100644 --- a/source/dnode/vnode/src/vnd/vnodeSvr.c +++ b/source/dnode/vnode/src/vnd/vnodeSvr.c @@ -25,9 +25,11 @@ static int32_t vnodeProcessCreateStbReq(SVnode *pVnode, int64_t ver, void *pReq, int32_t len, SRpcMsg *pRsp); static int32_t vnodeProcessAlterStbReq(SVnode *pVnode, int64_t ver, void *pReq, int32_t len, SRpcMsg *pRsp); static int32_t vnodeProcessDropStbReq(SVnode *pVnode, int64_t ver, void *pReq, int32_t len, SRpcMsg *pRsp); -static int32_t vnodeProcessCreateTbReq(SVnode *pVnode, int64_t ver, void *pReq, int32_t len, SRpcMsg *pRsp); +static int32_t vnodeProcessCreateTbReq(SVnode *pVnode, int64_t ver, void *pReq, int32_t len, SRpcMsg *pRsp, + SRpcMsg *pOriginRpc); static int32_t vnodeProcessAlterTbReq(SVnode *pVnode, int64_t ver, void *pReq, int32_t len, SRpcMsg *pRsp); -static int32_t vnodeProcessDropTbReq(SVnode *pVnode, int64_t ver, void *pReq, int32_t len, SRpcMsg *pRsp); +static int32_t vnodeProcessDropTbReq(SVnode *pVnode, int64_t ver, void *pReq, int32_t len, SRpcMsg *pRsp, + SRpcMsg *pOriginRpc); static int32_t vnodeProcessSubmitReq(SVnode *pVnode, int64_t ver, void *pReq, int32_t len, SRpcMsg *pRsp); static int32_t vnodeProcessCreateTSmaReq(SVnode *pVnode, int64_t ver, void *pReq, int32_t len, SRpcMsg *pRsp); static int32_t vnodeProcessAlterConfirmReq(SVnode *pVnode, int64_t ver, void *pReq, int32_t len, SRpcMsg *pRsp); @@ -509,13 +511,13 @@ int32_t vnodeProcessWriteMsg(SVnode *pVnode, SRpcMsg *pMsg, int64_t ver, SRpcMsg if (vnodeProcessDropStbReq(pVnode, ver, pReq, len, pRsp) < 0) goto _err; break; case TDMT_VND_CREATE_TABLE: - if (vnodeProcessCreateTbReq(pVnode, ver, pReq, len, pRsp) < 0) goto _err; + if (vnodeProcessCreateTbReq(pVnode, ver, pReq, len, pRsp, pMsg) < 0) goto _err; break; case TDMT_VND_ALTER_TABLE: if (vnodeProcessAlterTbReq(pVnode, ver, pReq, len, pRsp) < 0) goto _err; break; case TDMT_VND_DROP_TABLE: - if (vnodeProcessDropTbReq(pVnode, ver, pReq, len, pRsp) < 0) goto _err; + if (vnodeProcessDropTbReq(pVnode, ver, pReq, len, pRsp, pMsg) < 0) goto _err; break; case TDMT_VND_DROP_TTL_TABLE: if (vnodeProcessDropTtlTbReq(pVnode, ver, pReq, len, pRsp) < 0) goto _err; @@ -878,7 +880,8 @@ _err: return -1; } -static int32_t vnodeProcessCreateTbReq(SVnode *pVnode, int64_t ver, void *pReq, int32_t len, SRpcMsg *pRsp) { +static int32_t vnodeProcessCreateTbReq(SVnode *pVnode, int64_t ver, void *pReq, int32_t len, SRpcMsg *pRsp, + SRpcMsg *pOriginRpc) { SDecoder decoder = {0}; SEncoder encoder = {0}; int32_t rcode = 0; @@ -928,7 +931,7 @@ static int32_t vnodeProcessCreateTbReq(SVnode *pVnode, int64_t ver, void *pReq, goto _exit; } - if(tsEnableAuditCreateTable){ + if(tsEnableAudit && tsEnableAuditCreateTable){ char* str = taosMemoryCalloc(1, TSDB_TABLE_FNAME_LEN); if (str == NULL) { terrno = TSDB_CODE_OUT_OF_MEMORY; @@ -983,7 +986,7 @@ static int32_t vnodeProcessCreateTbReq(SVnode *pVnode, int64_t ver, void *pReq, tEncoderInit(&encoder, pRsp->pCont, pRsp->contLen); tEncodeSVCreateTbBatchRsp(&encoder, &rsp); - if (tsEnableAuditCreateTable) { + if(tsEnableAudit && tsEnableAuditCreateTable){ int64_t clusterId = pVnode->config.syncCfg.nodeInfo[0].clusterId; SName name = {0}; @@ -1002,7 +1005,7 @@ static int32_t vnodeProcessCreateTbReq(SVnode *pVnode, int64_t ver, void *pReq, size_t len = 0; char *keyJoined = taosStringBuilderGetResult(&sb, &len); - auditRecord(NULL, clusterId, "createTable", name.dbname, "", keyJoined, len); + auditRecord(pOriginRpc, clusterId, "createTable", name.dbname, "", keyJoined, len); taosStringBuilderDestroy(&sb); } @@ -1144,7 +1147,8 @@ _exit: return 0; } -static int32_t vnodeProcessDropTbReq(SVnode *pVnode, int64_t ver, void *pReq, int32_t len, SRpcMsg *pRsp) { +static int32_t vnodeProcessDropTbReq(SVnode *pVnode, int64_t ver, void *pReq, int32_t len, SRpcMsg *pRsp, + SRpcMsg *pOriginRpc) { SVDropTbBatchReq req = {0}; SVDropTbBatchRsp rsp = {0}; SDecoder decoder = {0}; @@ -1223,7 +1227,7 @@ static int32_t vnodeProcessDropTbReq(SVnode *pVnode, int64_t ver, void *pReq, in size_t len = 0; char *keyJoined = taosStringBuilderGetResult(&sb, &len); - auditRecord(NULL, clusterId, "dropTable", name.dbname, "", keyJoined, len); + auditRecord(pOriginRpc, clusterId, "dropTable", name.dbname, "", keyJoined, len); taosStringBuilderDestroy(&sb); } From 9d448c7c6c4fcf22506f4d18a924fca26070cac9 Mon Sep 17 00:00:00 2001 From: slzhou Date: Fri, 10 Nov 2023 16:30:16 +0800 Subject: [PATCH 45/79] fix: review comments --- source/libs/parser/src/parTranslater.c | 53 ++++++++++++---------- tests/system-test/2-query/tbname_vgroup.py | 17 ++++++- 2 files changed, 44 insertions(+), 26 deletions(-) diff --git a/source/libs/parser/src/parTranslater.c b/source/libs/parser/src/parTranslater.c index d38122a6ad..a55bd5663f 100644 --- a/source/libs/parser/src/parTranslater.c +++ b/source/libs/parser/src/parTranslater.c @@ -3931,7 +3931,7 @@ typedef struct SEqCondTbNameTableInfo { } SEqCondTbNameTableInfo; //[tableAlias.]tbname = tbNamVal -static bool isOperatorEqTbnameCond(STranslateContext* pCxt, SOperatorNode* pOperator, char** ppTableAlias, SArray* aTabNames) { +static bool isOperatorEqTbnameCond(STranslateContext* pCxt, SOperatorNode* pOperator, char** ppTableAlias, SArray** ppTabNames) { if (pOperator->opType != OP_TYPE_EQUAL) return false; SFunctionNode* pTbnameFunc = NULL; SValueNode* pValueNode = NULL; @@ -3959,12 +3959,13 @@ static bool isOperatorEqTbnameCond(STranslateContext* pCxt, SOperatorNode* pOper } else { return false; } - taosArrayPush(aTabNames, &(pValueNode->literal)); + *ppTabNames = taosArrayInit(1, sizeof(void*)); + taosArrayPush(*ppTabNames, &(pValueNode->literal)); return true; } //[tableAlias.]tbname in (value1, value2, ...) -static bool isOperatorTbnameInCond(STranslateContext* pCxt, SOperatorNode* pOperator, char** ppTableAlias, SArray* aTbNames) { +static bool isOperatorTbnameInCond(STranslateContext* pCxt, SOperatorNode* pOperator, char** ppTableAlias, SArray** ppTbNames) { if (pOperator->opType != OP_TYPE_IN) return false; if (nodeType(pOperator->pLeft) != QUERY_NODE_FUNCTION || ((SFunctionNode*)(pOperator->pLeft))->funcType != FUNCTION_TYPE_TBNAME || @@ -3983,6 +3984,7 @@ static bool isOperatorTbnameInCond(STranslateContext* pCxt, SOperatorNode* pOper } else { return false; } + *ppTbNames = taosArrayInit(1, sizeof(void*)); SNodeListNode* pValueListNode = (SNodeListNode*)pOperator->pRight; SNodeList* pValueNodeList = pValueListNode->pNodeList; SNode* pValNode = NULL; @@ -3990,7 +3992,7 @@ static bool isOperatorTbnameInCond(STranslateContext* pCxt, SOperatorNode* pOper if (nodeType(pValNode) != QUERY_NODE_VALUE) { return false; } - taosArrayPush(aTbNames, &((SValueNode*)pValNode)->literal); + taosArrayPush(*ppTbNames, &((SValueNode*)pValNode)->literal); } return true; @@ -4000,9 +4002,8 @@ static bool findEqCondTbNameInOperatorNode(STranslateContext* pCxt, SNode* pWher int32_t code = TSDB_CODE_SUCCESS; char* pTableAlias = NULL; char* pTbNameVal = NULL; - SArray* aTableNames = taosArrayInit(1, sizeof(void*)); - if (isOperatorEqTbnameCond(pCxt, (SOperatorNode*)pWhere, &pTableAlias, aTableNames) || - isOperatorTbnameInCond(pCxt, (SOperatorNode*)pWhere, &pTableAlias, aTableNames)) { + if (isOperatorEqTbnameCond(pCxt, (SOperatorNode*)pWhere, &pTableAlias, &pInfo->aTbnames) || + isOperatorTbnameInCond(pCxt, (SOperatorNode*)pWhere, &pTableAlias, &pInfo->aTbnames)) { STableNode* pTable; if (pTableAlias == NULL) { pTable = (STableNode*)((SSelectStmt*)(pCxt->pCurrStmt))->pFromTable; @@ -4012,12 +4013,11 @@ static bool findEqCondTbNameInOperatorNode(STranslateContext* pCxt, SNode* pWher if (code == TSDB_CODE_SUCCESS && nodeType(pTable) == QUERY_NODE_REAL_TABLE && ((SRealTableNode*)pTable)->pMeta && ((SRealTableNode*)pTable)->pMeta->tableType == TSDB_SUPER_TABLE) { pInfo->pRealTable = (SRealTableNode*)pTable; - taosArrayAddAll(pInfo->aTbnames, aTableNames); - taosArrayDestroy(aTableNames); return true; } + taosArrayDestroy(pInfo->aTbnames); + pInfo->aTbnames = NULL; } - taosArrayDestroy(aTableNames); return false; } @@ -4036,25 +4036,28 @@ static void findEqualCondTbnameInLogicCondAnd(STranslateContext* pCxt, SNode* pW FOREACH(pTmpNode, ((SLogicConditionNode*)pWhere)->pParameterList) { if (nodeType(pTmpNode) == QUERY_NODE_OPERATOR) { SEqCondTbNameTableInfo info = {0}; - info.aTbnames = taosArrayInit(1, sizeof(void*)); - bool bIsEqTbnameCond = findEqCondTbNameInOperatorNode(pCxt, pTmpNode, &info); - if (bIsEqTbnameCond && !isTableExistInTableTbnames(aTableTbnames, info.pRealTable)) { - //TODO: intersect tbNames of same table? speed - taosArrayPush(aTableTbnames, &info); - } else { - taosArrayDestroy(info.aTbnames); + bool bIsEqTbnameCond = findEqCondTbNameInOperatorNode(pCxt, pTmpNode, &info); + if (bIsEqTbnameCond) { + if (!isTableExistInTableTbnames(aTableTbnames, info.pRealTable)) { + //TODO: intersect tbNames of same table? speed + taosArrayPush(aTableTbnames, &info); + } else { + taosArrayDestroy(info.aTbnames); + } } } + //TODO: logic cond } } -static void unionTbnamesOfTbNameCond(SArray* aTableTbnames, SEqCondTbNameTableInfo* pInfo) { +static void unionEqualCondTbnamesOfSameTable(SArray* aTableTbnames, SEqCondTbNameTableInfo* pInfo) { bool bFoundTable = false; for (int i = 0; i < taosArrayGetSize(aTableTbnames); ++i) { SEqCondTbNameTableInfo* info = taosArrayGet(aTableTbnames, i); if (info->pRealTable == pInfo->pRealTable) { taosArrayAddAll(info->aTbnames, pInfo->aTbnames); taosArrayDestroy(pInfo->aTbnames); + pInfo->aTbnames = NULL; bFoundTable = true; break; } @@ -4068,16 +4071,15 @@ static void findEqualCondTbnameInLogicCondOr(STranslateContext* pCxt, SNode* pWh bool bAllTbName = true; SNode* pTmpNode = NULL; FOREACH(pTmpNode, ((SLogicConditionNode*)pWhere)->pParameterList) { + //TODO: logic cond if (nodeType(pTmpNode) == QUERY_NODE_OPERATOR) { SEqCondTbNameTableInfo info = {0}; - info.aTbnames = taosArrayInit(1, sizeof(void*)); bool bIsEqTbnameCond = findEqCondTbNameInOperatorNode(pCxt, pTmpNode, &info); if (!bIsEqTbnameCond) { - taosArrayDestroy(info.aTbnames); bAllTbName = false; break; } else { - unionTbnamesOfTbNameCond(aTableTbnames, &info); + unionEqualCondTbnamesOfSameTable(aTableTbnames, &info); } } else { bAllTbName = false; @@ -4088,20 +4090,19 @@ static void findEqualCondTbnameInLogicCondOr(STranslateContext* pCxt, SNode* pWh for (int i = 0; i < taosArrayGetSize(aTableTbnames); ++i) { SEqCondTbNameTableInfo* pInfo = taosArrayGet(aTableTbnames, i); taosArrayDestroy(pInfo->aTbnames); + pInfo->aTbnames = NULL; } taosArrayClear(aTableTbnames); } } static int32_t findEqualCondTbname(STranslateContext* pCxt, SNode* pWhere, SArray* aTableTbnames) { + //TODO: optimize nested and/or condition. now only the fist level is processed. if (nodeType(pWhere) == QUERY_NODE_OPERATOR) { SEqCondTbNameTableInfo info = {0}; - info.aTbnames = taosArrayInit(1, sizeof(void*)); bool bIsEqTbnameCond = findEqCondTbNameInOperatorNode(pCxt, pWhere, &info); if (bIsEqTbnameCond) { taosArrayPush(aTableTbnames, &info); - } else { - taosArrayDestroy(info.aTbnames); } } else if (nodeType(pWhere) == QUERY_NODE_LOGIC_CONDITION) { if (((SLogicConditionNode*)pWhere)->condType == LOGIC_COND_TYPE_AND) { @@ -4116,9 +4117,10 @@ static int32_t findEqualCondTbname(STranslateContext* pCxt, SNode* pWhere, SArra static int32_t findVgroupsFromEqualTbname(STranslateContext* pCxt, SEqCondTbNameTableInfo* pInfo, SVgroupsInfo* vgsInfo) { int32_t nVgroups = 0; int32_t nTbls = taosArrayGetSize(pInfo->aTbnames); - + if (nTbls >= pInfo->pRealTable->pVgroupList->numOfVgroups) { vgsInfo->numOfVgroups = 0; + return TSDB_CODE_SUCCESS; } for (int j = 0; j < nTbls; ++j) { @@ -4134,6 +4136,7 @@ static int32_t findVgroupsFromEqualTbname(STranslateContext* pCxt, SEqCondTbName for (int32_t k = 0; k < nVgroups; ++k) { if (vgsInfo->vgroups[k].vgId == vgInfo.vgId) { bFoundVg = true; + break; } } if (!bFoundVg) { diff --git a/tests/system-test/2-query/tbname_vgroup.py b/tests/system-test/2-query/tbname_vgroup.py index 90be1bf394..a4deea8089 100644 --- a/tests/system-test/2-query/tbname_vgroup.py +++ b/tests/system-test/2-query/tbname_vgroup.py @@ -27,6 +27,7 @@ class TDTestCase: tdSql.execute("drop database if exists tbname_vgroup") tdSql.execute("create database if not exists tbname_vgroup") tdSql.execute('use tbname_vgroup') + tdSql.execute('drop database if exists dbvg') tdSql.execute('create database dbvg vgroups 8;') tdSql.execute('use dbvg;') @@ -179,7 +180,21 @@ class TDTestCase: else: tdLog.info("select * from st where tbname in ('ct1', 'ct2') involves two vgroups") - tdSql.execute('drop database dbvg;') + tdSql.execute('create table st2(ts timestamp, f int) tags (t int);') + + tdSql.execute("insert into ct21 using st2 tags(1) values('2021-04-19 00:00:01', 1)") + + tdSql.execute("insert into ct22 using st2 tags(2) values('2021-04-19 00:00:02', 2)") + + tdSql.execute("insert into ct23 using st2 tags(3) values('2021-04-19 00:00:03', 3)") + + tdSql.execute("insert into ct24 using st2 tags(4) values('2021-04-19 00:00:04', 4)") + + tdSql.query("select * from st, st2 where st.ts=st2.ts and st.tbname in ('ct1', 'ct2') and st2.tbname in ('ct1', 'ct3')"); + tdSql.checkRows(0); + + + #tdSql.execute('drop database dbvg;') tdSql.execute('drop database tbname_vgroup') def stop(self): From 11c0427b22217c88ef43de5572436ff66397fa26 Mon Sep 17 00:00:00 2001 From: kailixu Date: Fri, 10 Nov 2023 16:41:41 +0800 Subject: [PATCH 46/79] enh: test case to delete mutiple tables --- source/dnode/vnode/src/sma/smaRollup.c | 21 ++-- .../tsim/sma/rsmaCreateInsertQueryDelete.sim | 96 ++++++++++++++++++- 2 files changed, 109 insertions(+), 8 deletions(-) diff --git a/source/dnode/vnode/src/sma/smaRollup.c b/source/dnode/vnode/src/sma/smaRollup.c index 6d2f76bbe9..72738ec289 100644 --- a/source/dnode/vnode/src/sma/smaRollup.c +++ b/source/dnode/vnode/src/sma/smaRollup.c @@ -711,8 +711,10 @@ static int32_t tdRSmaExecAndSubmitResult(SSma *pSma, qTaskInfo_t taskInfo, SRSma continue; } - smaDebug("vgId:%d, result block, uid:%" PRIu64 ", groupid:%" PRIu64 ", rows:%" PRIi64, SMA_VID(pSma), - output->info.id.uid, output->info.id.groupId, output->info.rows); + smaDebug("vgId:%d, result block, execType:%d, ver:%" PRIi64 ", submitReqVer:%" PRIi64 ", fetchResultVer:%" PRIi64 + ", suid:%" PRIi64 ", level:%" PRIi8 ", uid:%" PRIu64 ", groupid:%" PRIu64 ", rows:%" PRIi64, + SMA_VID(pSma), execType, output->info.version, pItem->submitReqVer, pItem->fetchResultVer, suid, + pItem->level, output->info.id.uid, output->info.id.groupId, output->info.rows); if (STREAM_GET_ALL == execType) { /** @@ -723,7 +725,11 @@ static int32_t tdRSmaExecAndSubmitResult(SSma *pSma, qTaskInfo_t taskInfo, SRSma // submitReqVer keeps unchanged since tdExecuteRSmaImpl and tdRSmaFetchAllResult are executed synchronously output->info.version = pItem->submitReqVer; } else if (output->info.version == pItem->fetchResultVer) { - ASSERTS(0, "duplicated fetch version:%" PRIi64, pItem->fetchResultVer); + smaWarn("vgId:%d, result block, skip dup version, execType:%d, ver:%" PRIi64 ", submitReqVer:%" PRIi64 + ", fetchResultVer:%" PRIi64 ", suid:%" PRIi64 ", level:%" PRIi8 ", uid:%" PRIu64 ", groupid:%" PRIu64 + ", rows:%" PRIi64, + SMA_VID(pSma), execType, output->info.version, pItem->submitReqVer, pItem->fetchResultVer, suid, + pItem->level, output->info.id.uid, output->info.id.groupId, output->info.rows); continue; } } @@ -752,8 +758,9 @@ static int32_t tdRSmaExecAndSubmitResult(SSma *pSma, qTaskInfo_t taskInfo, SRSma atomic_store_64(&pItem->fetchResultVer, output->info.version); } - smaDebug("vgId:%d, process submit req for rsma suid:%" PRIu64 ",uid:%" PRIu64 ", level %" PRIi8 " ver %" PRIi64, - SMA_VID(pSma), suid, output->info.id.groupId, pItem->level, output->info.version); + smaDebug("vgId:%d, process submit req for rsma suid:%" PRIu64 ",uid:%" PRIu64 ", level:%" PRIi8 + ", execType:%d, ver:%" PRIi64, + SMA_VID(pSma), suid, output->info.id.groupId, pItem->level, execType, output->info.version); if (pReq) { tDestroySubmitReq(pReq, TSDB_MSG_FLG_ENCODE); @@ -881,8 +888,8 @@ static int32_t tdExecuteRSmaImpl(SSma *pSma, const void *pMsg, int32_t msgSize, return TSDB_CODE_FAILED; } - smaDebug("vgId:%d, execute rsma %" PRIi8 " task for qTaskInfo:%p suid:%" PRIu64 " nMsg:%d", SMA_VID(pSma), level, - RSMA_INFO_QTASK(pInfo, idx), pInfo->suid, msgSize); + smaDebug("vgId:%d, execute rsma %" PRIi8 " task for qTaskInfo:%p, suid:%" PRIu64 ", nMsg:%d, submitReqVer:%" PRIi64 ", inputType:%d", SMA_VID(pSma), level, + RSMA_INFO_QTASK(pInfo, idx), pInfo->suid, msgSize, version, inputType); if ((terrno = qSetSMAInput(qTaskInfo, pMsg, msgSize, inputType)) < 0) { smaError("vgId:%d, rsma %" PRIi8 " qSetStreamInput failed since %s", SMA_VID(pSma), level, tstrerror(terrno)); diff --git a/tests/script/tsim/sma/rsmaCreateInsertQueryDelete.sim b/tests/script/tsim/sma/rsmaCreateInsertQueryDelete.sim index 5f5c840eb6..594c062292 100644 --- a/tests/script/tsim/sma/rsmaCreateInsertQueryDelete.sim +++ b/tests/script/tsim/sma/rsmaCreateInsertQueryDelete.sim @@ -5,7 +5,7 @@ sleep 50 sql connect print =============== create database with retentions -sql create database d0 retentions -:7d,10s:21d,15s:365d; +sql create database d0 retentions -:7d,10s:21d,15s:365d vgroups 1; sql use d0 print =============== create super table and register rsma @@ -441,6 +441,100 @@ if $rows != 0 then return -1 endi +print =============== check delete multiple tables +sql create table ct2 using stb tags("BeiJing", "HaiDian"); +sql create table ct3 using stb tags("ShangHai", "PuDong"); + +sql insert into ct2 values(now, 10, NULL); +sql insert into ct2 values(now+60m, 1, NULL); +sql insert into ct2 values(now+120m, 100, NULL); +sql insert into ct3 values(now, 10, NULL); +sql insert into ct3 values(now+60m, 1, NULL); +sql insert into ct3 values(now+120m, 100, NULL); + +print =============== wait 5 seconds for results +sleep 5000 + +print =============== select * from retention level 2 from memory +sql select * from ct2; +print $data00 $data01 $data02 +print $data10 $data11 $data12 +print $data20 $data21 $data22 +if $rows != 3 then + print retention level 2 file rows $rows != 3 + return -1 +endi + +if $data01 != 10.00000 then + return -1 +endi +if $data02 != NULL then + return -1 +endi +if $data11 != 1.00000 then + return -1 +endi +if $data12 != NULL then + return -1 +endi +if $data21 != 100.00000 then + return -1 +endi +if $data22 != NULL then + return -1 +endi + +sql delete from ct1 where ts < now + 120m; +sql delete from ct3 where ts < now; +sql delete from ct2 where ts < now + 60m; +sql delete from ct2 where ts < now + 120m; +sql delete from ct3 where ts < now + 60m; +sql delete from ct3 where ts < now + 120m; +sql delete from ct3 where ts < now; + +print =============== wait 5 seconds for results +sleep 5000 + +print =============== select * from retention level 2 from memory after delete ct2 +sql select * from ct2; +print $data00 $data01 $data02 +if $rows != 0 then + print retention level 2 file rows $rows != 0 + return -1 +endi + +print =============== select * from retention level 1 from memory after delete ct3 +sql select * from ct3 where ts > now - 8d; +print $data00 $data01 $data02 +if $rows != 0 then + print retention level 2 file rows $rows != 0 + return -1 +endi + + +#=================================================================== +system sh/exec.sh -n dnode1 -s stop -x SIGINT +#=================================================================== +system sh/exec.sh -n dnode1 -s start +print =============== wait 5 seconds for results after reboot +sleep 5000 + +print =============== select * from retention level 1 from memory after delete ct2 +sql select * from ct2 where ts > now - 8d; +print $data00 $data01 $data02 +if $rows != 0 then + print retention level 2 file rows $rows != 0 + return -1 +endi + +print =============== select * from retention level 2 from memory after delete ct3 +sql select * from ct3 where ts > now - 365d; +print $data00 $data01 $data02 +if $rows != 0 then + print retention level 2 file rows $rows != 0 + return -1 +endi + #=================================================================== system sh/exec.sh -n dnode1 -s stop -x SIGINT #=================================================================== \ No newline at end of file From 7fec8181945b47d5cf4e3922780ed66a2459fcba Mon Sep 17 00:00:00 2001 From: dmchen Date: Fri, 10 Nov 2023 08:43:24 +0000 Subject: [PATCH 47/79] password --- source/dnode/mnode/impl/src/mndUser.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/source/dnode/mnode/impl/src/mndUser.c b/source/dnode/mnode/impl/src/mndUser.c index 585263ef95..66abfd6bc1 100644 --- a/source/dnode/mnode/impl/src/mndUser.c +++ b/source/dnode/mnode/impl/src/mndUser.c @@ -1561,7 +1561,11 @@ static int32_t mndProcessCreateUserReq(SRpcMsg *pReq) { code = mndCreateUser(pMnode, pOperUser->acct, &createReq, pReq); if (code == 0) code = TSDB_CODE_ACTION_IN_PROGRESS; - auditRecord(pReq, pMnode->clusterId, "createUser", "", createReq.user, createReq.sql, createReq.sqlLen); + char detail[1000] = {0}; + sprintf(detail, "enable:%d, superUser:%d, sysInfo:%d, password:xxx", + createReq.enable, createReq.superUser, createReq.sysInfo); + + auditRecord(pReq, pMnode->clusterId, "createUser", "", createReq.user, detail, strlen(detail)); _OVER: if (code != 0 && code != TSDB_CODE_ACTION_IN_PROGRESS) { From f69a98da9e65c55eca4bdca03f2d90e61ef3ff65 Mon Sep 17 00:00:00 2001 From: dmchen Date: Fri, 10 Nov 2023 08:53:29 +0000 Subject: [PATCH 48/79] login --- source/dnode/mnode/impl/src/mndProfile.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/dnode/mnode/impl/src/mndProfile.c b/source/dnode/mnode/impl/src/mndProfile.c index 471ed99b67..34ee18a5cc 100644 --- a/source/dnode/mnode/impl/src/mndProfile.c +++ b/source/dnode/mnode/impl/src/mndProfile.c @@ -313,7 +313,7 @@ _CONNECT: code = 0; char detail[1000] = {0}; - sprintf(detail, "%s:%d, app:%s", ip, pConn->port, connReq.app); + sprintf(detail, "app:%s", connReq.app); auditRecord(pReq, pMnode->clusterId, "login", "", "", detail, strlen(detail)); From 792e0db35091bd738cf9365aaf5f0c032485413d Mon Sep 17 00:00:00 2001 From: slzhou Date: Fri, 10 Nov 2023 17:00:10 +0800 Subject: [PATCH 49/79] enhance: add join test case --- tests/system-test/2-query/tbname_vgroup.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/tests/system-test/2-query/tbname_vgroup.py b/tests/system-test/2-query/tbname_vgroup.py index a4deea8089..564c78b47a 100644 --- a/tests/system-test/2-query/tbname_vgroup.py +++ b/tests/system-test/2-query/tbname_vgroup.py @@ -190,9 +190,14 @@ class TDTestCase: tdSql.execute("insert into ct24 using st2 tags(4) values('2021-04-19 00:00:04', 4)") - tdSql.query("select * from st, st2 where st.ts=st2.ts and st.tbname in ('ct1', 'ct2') and st2.tbname in ('ct1', 'ct3')"); - tdSql.checkRows(0); - + tdSql.query("select * from st, st2 where st.ts=st2.ts and st.tbname in ('ct1', 'ct2') and st2.tbname in ('ct21', 'ct23')"); + tdSql.checkRows(1); + tdSql.checkData(0, 0, datetime.datetime(2021, 4, 19, 0, 0, 1)) + tdSql.checkData(0, 1, 1) + tdSql.checkData(0, 2, 1) + tdSql.checkData(0, 3, datetime.datetime(2021, 4, 19, 0, 0, 1)) + tdSql.checkData(0, 4, 1) + tdSql.checkData(0, 5, 1) #tdSql.execute('drop database dbvg;') From bc446598267b205e3488cf9cddf4c2acc998855b Mon Sep 17 00:00:00 2001 From: kailixu Date: Fri, 10 Nov 2023 17:14:57 +0800 Subject: [PATCH 50/79] fix: buffer overflow --- source/dnode/vnode/src/tq/tqSink.c | 31 +++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/source/dnode/vnode/src/tq/tqSink.c b/source/dnode/vnode/src/tq/tqSink.c index cf77679478..3a4b567360 100644 --- a/source/dnode/vnode/src/tq/tqSink.c +++ b/source/dnode/vnode/src/tq/tqSink.c @@ -54,28 +54,27 @@ int32_t tqBuildDeleteReq(STQ* pTq, const char* stbFullName, const SSDataBlock* p tqDebug("s-task:%s build %d rows delete msg for table:%s", pIdStr, totalRows, stbFullName); - char tbName[TSDB_TABLE_NAME_LEN + VARSTR_HEADER_SIZE]; for (int32_t row = 0; row < totalRows; row++) { int64_t skey = *(int64_t*)colDataGetData(pStartTsCol, row); int64_t ekey = *(int64_t*)colDataGetData(pEndTsCol, row); int64_t groupId = *(int64_t*)colDataGetData(pGidCol, row); - char* name = NULL; - char* pName = NULL; - void* varTbName = NULL; - tbName[0] = '\0'; + char* name = NULL; + char* originName = NULL; + void* varTbName = NULL; if (!colDataIsNull(pTbNameCol, totalRows, row, NULL)) { varTbName = colDataGetVarData(pTbNameCol, row); } if (varTbName != NULL && varTbName != (void*)-1) { - name = varDataVal(varTbName); + name = taosMemoryCalloc(1, TSDB_TABLE_NAME_LEN); + memcpy(name, varDataVal(varTbName), varDataLen(varTbName)); } else if (stbFullName) { - pName = buildCtbNameByGroupId(stbFullName, groupId); - name = pName; + name = buildCtbNameByGroupId(stbFullName, groupId); } else { - if (metaGetTableNameByUid(pTq->pVnode, groupId, tbName) == 0) { - name = varDataVal(tbName); + originName = taosMemoryCalloc(1, TSDB_TABLE_NAME_LEN + VARSTR_HEADER_SIZE); + if (metaGetTableNameByUid(pTq->pVnode, groupId, originName) == 0) { + name = varDataVal(originName); } else { terrno = TSDB_CODE_OUT_OF_MEMORY; } @@ -90,17 +89,19 @@ int32_t tqBuildDeleteReq(STQ* pTq, const char* stbFullName, const SSDataBlock* p return code; } - tqDebug("s-task:%s build delete msg groupId:%" PRId64 ", name:%s, skey:%" PRId64 " ekey:%" PRId64, pIdStr, groupId, - name, skey, ekey); + tqDebug("s-task:%s build delete msg groupId:%" PRId64 ", name:%s, skey:%" PRId64 " ekey:%" PRId64, + pIdStr, groupId, name, skey, ekey); - SSingleDeleteReq req = {.startTs = skey, .endTs = ekey}; + SSingleDeleteReq req = { .startTs = skey, .endTs = ekey}; strncpy(req.tbname, name, TSDB_TABLE_NAME_LEN - 1); - if (pName) taosMemoryFree(pName); + + if (originName) name = originName; + taosMemoryFree(name); taosArrayPush(deleteReq->deleteReqs, &req); } - return code; + return 0; } static int32_t encodeCreateChildTableForRPC(SVCreateTbBatchReq* pReqs, int32_t vgId, void** pBuf, int32_t* contLen) { From c04fb6d543a8fb7c2a610ca7757d54a078073666 Mon Sep 17 00:00:00 2001 From: kailixu Date: Fri, 10 Nov 2023 17:23:17 +0800 Subject: [PATCH 51/79] enh: logic optimization --- source/dnode/vnode/src/tq/tqSink.c | 35 ++++++++++++------------------ 1 file changed, 14 insertions(+), 21 deletions(-) diff --git a/source/dnode/vnode/src/tq/tqSink.c b/source/dnode/vnode/src/tq/tqSink.c index 3a4b567360..20a72e6a28 100644 --- a/source/dnode/vnode/src/tq/tqSink.c +++ b/source/dnode/vnode/src/tq/tqSink.c @@ -59,9 +59,9 @@ int32_t tqBuildDeleteReq(STQ* pTq, const char* stbFullName, const SSDataBlock* p int64_t ekey = *(int64_t*)colDataGetData(pEndTsCol, row); int64_t groupId = *(int64_t*)colDataGetData(pGidCol, row); - char* name = NULL; - char* originName = NULL; - void* varTbName = NULL; + char* name = NULL; + char* originName = NULL; + void* varTbName = NULL; if (!colDataIsNull(pTbNameCol, totalRows, row, NULL)) { varTbName = colDataGetVarData(pTbNameCol, row); } @@ -75,30 +75,23 @@ int32_t tqBuildDeleteReq(STQ* pTq, const char* stbFullName, const SSDataBlock* p originName = taosMemoryCalloc(1, TSDB_TABLE_NAME_LEN + VARSTR_HEADER_SIZE); if (metaGetTableNameByUid(pTq->pVnode, groupId, originName) == 0) { name = varDataVal(originName); - } else { - terrno = TSDB_CODE_OUT_OF_MEMORY; } } if (!name || *name == '\0') { - tqError("s-task:%s build delete msg groupId:%" PRId64 ", skey:%" PRId64 " ekey:%" PRId64 - " failed since invalid tbname:%s", - pIdStr, groupId, skey, ekey, name ? name : "NULL"); - taosArrayDestroy(deleteReq->deleteReqs); - code = terrno ? terrno : TSDB_CODE_APP_ERROR; - return code; + tqWarn("s-task:%s failed to build delete msg groupId:%" PRId64 ", skey:%" PRId64 " ekey:%" PRId64 + " since invalid tbname:%s", + pIdStr, groupId, skey, ekey, name ? name : "NULL"); + } else { + tqDebug("s-task:%s build delete msg groupId:%" PRId64 ", name:%s, skey:%" PRId64 " ekey:%" PRId64, pIdStr, + groupId, name, skey, ekey); + + SSingleDeleteReq req = {.startTs = skey, .endTs = ekey}; + strncpy(req.tbname, name, TSDB_TABLE_NAME_LEN - 1); + taosArrayPush(deleteReq->deleteReqs, &req); } - - tqDebug("s-task:%s build delete msg groupId:%" PRId64 ", name:%s, skey:%" PRId64 " ekey:%" PRId64, - pIdStr, groupId, name, skey, ekey); - - SSingleDeleteReq req = { .startTs = skey, .endTs = ekey}; - strncpy(req.tbname, name, TSDB_TABLE_NAME_LEN - 1); - if (originName) name = originName; - taosMemoryFree(name); - - taosArrayPush(deleteReq->deleteReqs, &req); + taosMemoryFreeClear(name); } return 0; From 8bcb7f874147df963483fcef717bb2b97a729a52 Mon Sep 17 00:00:00 2001 From: dapan1121 Date: Fri, 10 Nov 2023 17:29:52 +0800 Subject: [PATCH 52/79] fix: windows compile error --- source/libs/planner/src/planValidator.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/libs/planner/src/planValidator.c b/source/libs/planner/src/planValidator.c index 66e7defc1d..a5d729ab84 100755 --- a/source/libs/planner/src/planValidator.c +++ b/source/libs/planner/src/planValidator.c @@ -29,7 +29,7 @@ int32_t doValidatePhysiNode(SValidatePlanContext* pCxt, SNode* pNode); int32_t validateMergePhysiNode(SValidatePlanContext* pCxt, SMergePhysiNode* pMerge) { if ((NULL != pMerge->node.pLimit || NULL != pMerge->node.pSlimit) && pMerge->type == MERGE_TYPE_NON_SORT) { - planError("no limit&slimit supported for non sort merge"); + planError("no limit&slimit supported for non sort merge, pLimit:%p", pMerge->node.pLimit); return TSDB_CODE_PLAN_INTERNAL_ERROR; } From f06ea9fcebeed8344ebd1c2fe124c60809b7ccb5 Mon Sep 17 00:00:00 2001 From: kailixu Date: Fri, 10 Nov 2023 17:32:33 +0800 Subject: [PATCH 53/79] enh: wait more time for result --- .../tsim/sma/rsmaCreateInsertQueryDelete.sim | 36 +++++++++---------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/tests/script/tsim/sma/rsmaCreateInsertQueryDelete.sim b/tests/script/tsim/sma/rsmaCreateInsertQueryDelete.sim index 594c062292..b93f6f0c44 100644 --- a/tests/script/tsim/sma/rsmaCreateInsertQueryDelete.sim +++ b/tests/script/tsim/sma/rsmaCreateInsertQueryDelete.sim @@ -29,8 +29,8 @@ sql insert into ct1 values(now, 10, NULL); sql insert into ct1 values(now+60m, 1, NULL); sql insert into ct1 values(now+120m, 100, NULL); -print =============== wait 5 seconds for results -sleep 5000 +print =============== wait 7 seconds for results +sleep 7000 print =============== select * from retention level 2 from memory sql select * from ct1; @@ -125,8 +125,8 @@ print =============== delete row 0 sql delete from ct1 where ts < now; sql delete from ct1 where ts < now; sql delete from ct1 where ts < now; -print =============== wait 5 seconds for results -sleep 5000 +print =============== wait 7 seconds for results +sleep 7000 print =============== select * from retention level 2 from memory after delete row 0 sql select * from ct1; @@ -206,8 +206,8 @@ sql delete from ct1 where ts < now + 60m; sql delete from ct1 where ts < now + 60m; sql delete from ct1 where ts < now + 60m; -print =============== wait 5 seconds for results -sleep 5000 +print =============== wait 7 seconds for results +sleep 7000 print =============== select * from retention level 2 from memory after delete row 1 sql select * from ct1; @@ -260,8 +260,8 @@ endi #=================================================================== system sh/exec.sh -n dnode1 -s stop -x SIGINT system sh/exec.sh -n dnode1 -s start -print =============== wait 5 seconds for results after reboot -sleep 5000 +print =============== wait 7 seconds for results after reboot +sleep 7000 print =============== select * from retention level 2 from memory after reboot sql select * from ct1; @@ -378,8 +378,8 @@ sql delete from ct1 where ts < now + 300m; sql delete from ct1 where ts < now + 60m; sql delete from ct1 where ts < now; -print =============== wait 5 seconds for results -sleep 5000 +print =============== wait 7 seconds for results +sleep 7000 print =============== select * from retention level 2 from memory after delete row 2 sql select * from ct1; @@ -411,8 +411,8 @@ endi #=================================================================== system sh/exec.sh -n dnode1 -s stop -x SIGINT system sh/exec.sh -n dnode1 -s start -print =============== wait 5 seconds for results after reboot -sleep 5000 +print =============== wait 7 seconds for results after reboot +sleep 7000 print =============== select * from retention level 2 from memory after delete row 2 sql select * from ct1; @@ -452,8 +452,8 @@ sql insert into ct3 values(now, 10, NULL); sql insert into ct3 values(now+60m, 1, NULL); sql insert into ct3 values(now+120m, 100, NULL); -print =============== wait 5 seconds for results -sleep 5000 +print =============== wait 7 seconds for results +sleep 7000 print =============== select * from retention level 2 from memory sql select * from ct2; @@ -492,8 +492,8 @@ sql delete from ct3 where ts < now + 60m; sql delete from ct3 where ts < now + 120m; sql delete from ct3 where ts < now; -print =============== wait 5 seconds for results -sleep 5000 +print =============== wait 7 seconds for results +sleep 7000 print =============== select * from retention level 2 from memory after delete ct2 sql select * from ct2; @@ -516,8 +516,8 @@ endi system sh/exec.sh -n dnode1 -s stop -x SIGINT #=================================================================== system sh/exec.sh -n dnode1 -s start -print =============== wait 5 seconds for results after reboot -sleep 5000 +print =============== wait 7 seconds for results after reboot +sleep 7000 print =============== select * from retention level 1 from memory after delete ct2 sql select * from ct2 where ts > now - 8d; From 9017e2ed689004c4788e9737636a5c6b7df0a05b Mon Sep 17 00:00:00 2001 From: kailixu Date: Fri, 10 Nov 2023 18:11:06 +0800 Subject: [PATCH 54/79] chore: code optimization --- source/dnode/vnode/src/sma/smaRollup.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/source/dnode/vnode/src/sma/smaRollup.c b/source/dnode/vnode/src/sma/smaRollup.c index 72738ec289..e813674c56 100644 --- a/source/dnode/vnode/src/sma/smaRollup.c +++ b/source/dnode/vnode/src/sma/smaRollup.c @@ -22,12 +22,12 @@ #define RSMA_FETCH_DELAY_MAX (120000) // ms #define RSMA_FETCH_ACTIVE_MAX (1000) // ms #define RSMA_FETCH_INTERVAL (5000) // ms -#define RSMA_EXEC_MSG_HLEN (13) // type(int8_t) + len(int32_t) + version(int64_t) +#define RSMA_EXEC_TASK_FLAG "rsma" +#define RSMA_EXEC_MSG_HLEN (13) // type(int8_t) + len(int32_t) + version(int64_t) #define RSMA_EXEC_MSG_TYPE(msg) (*(int8_t *)(msg)) #define RSMA_EXEC_MSG_LEN(msg) (*(int32_t *)POINTER_SHIFT((msg), sizeof(int8_t))) #define RSMA_EXEC_MSG_VER(msg) (*(int64_t *)POINTER_SHIFT((msg), sizeof(int8_t) + sizeof(int32_t))) -#define RSMA_EXEC_MSG_BODY(msg) (POINTER_SHIFT((msg), sizeof(int8_t) + sizeof(int32_t) + sizeof(int64_t))) -#define RSMA_TASK_FLAG "rsma" +#define RSMA_EXEC_MSG_BODY(msg) (POINTER_SHIFT((msg), RSMA_EXEC_MSG_HLEN) #define RSMA_NEED_FETCH(r) (RSMA_INFO_ITEM((r), 0)->fetchLevel || RSMA_INFO_ITEM((r), 1)->fetchLevel) @@ -293,8 +293,8 @@ static int32_t tdSetRSmaInfoItemParams(SSma *pSma, SRSmaParam *param, SRSmaStat pStreamTask->id.streamId = pRSmaInfo->suid + idx; pStreamTask->chkInfo.startTs = taosGetTimestampMs(); pStreamTask->pMeta = pVnode->pTq->pStreamMeta; - pStreamTask->exec.qmsg = taosMemoryMalloc(strlen(RSMA_TASK_FLAG) + 1); - sprintf(pStreamTask->exec.qmsg, "%s", RSMA_TASK_FLAG); + pStreamTask->exec.qmsg = taosMemoryMalloc(strlen(RSMA_EXEC_TASK_FLAG) + 1); + sprintf(pStreamTask->exec.qmsg, "%s", RSMA_EXEC_TASK_FLAG); pStreamTask->chkInfo.checkpointId = streamMetaGetLatestCheckpointId(pStreamTask->pMeta); tdRSmaTaskInit(pStreamTask->pMeta, pItem, &pStreamTask->id); pStreamState = streamStateOpen(taskInfDir, pStreamTask, true, -1, -1); From 42e4e2b7a4b8c713c1d0a803a2cfc0ec01dd2a09 Mon Sep 17 00:00:00 2001 From: kailixu Date: Fri, 10 Nov 2023 18:27:42 +0800 Subject: [PATCH 55/79] fix: macro define --- source/dnode/vnode/src/sma/smaRollup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/dnode/vnode/src/sma/smaRollup.c b/source/dnode/vnode/src/sma/smaRollup.c index e813674c56..54a150145e 100644 --- a/source/dnode/vnode/src/sma/smaRollup.c +++ b/source/dnode/vnode/src/sma/smaRollup.c @@ -27,7 +27,7 @@ #define RSMA_EXEC_MSG_TYPE(msg) (*(int8_t *)(msg)) #define RSMA_EXEC_MSG_LEN(msg) (*(int32_t *)POINTER_SHIFT((msg), sizeof(int8_t))) #define RSMA_EXEC_MSG_VER(msg) (*(int64_t *)POINTER_SHIFT((msg), sizeof(int8_t) + sizeof(int32_t))) -#define RSMA_EXEC_MSG_BODY(msg) (POINTER_SHIFT((msg), RSMA_EXEC_MSG_HLEN) +#define RSMA_EXEC_MSG_BODY(msg) (POINTER_SHIFT((msg), RSMA_EXEC_MSG_HLEN)) #define RSMA_NEED_FETCH(r) (RSMA_INFO_ITEM((r), 0)->fetchLevel || RSMA_INFO_ITEM((r), 1)->fetchLevel) From 3707857c1220bdd57257be54f08a4f67b0cf446f Mon Sep 17 00:00:00 2001 From: kailixu Date: Fri, 10 Nov 2023 18:43:23 +0800 Subject: [PATCH 56/79] chore: remove obsolete code --- source/dnode/vnode/src/sma/smaRollup.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/source/dnode/vnode/src/sma/smaRollup.c b/source/dnode/vnode/src/sma/smaRollup.c index 54a150145e..5dc29509a0 100644 --- a/source/dnode/vnode/src/sma/smaRollup.c +++ b/source/dnode/vnode/src/sma/smaRollup.c @@ -653,8 +653,7 @@ static int32_t tdRSmaProcessDelReq(SSma *pSma, int64_t suid, int8_t level, SBatc SRpcMsg delMsg = {.msgType = TDMT_VND_BATCH_DEL, .pCont = pBuf, - .contLen = len + sizeof(SMsgHead), - .info.ahandle = level == 1 ? VND_RSMA1(pSma->pVnode) : VND_RSMA2(pSma->pVnode)}; + .contLen = len + sizeof(SMsgHead)}; code = tmsgPutToQueue(&pSma->pVnode->msgCb, WRITE_QUEUE, &delMsg); TSDB_CHECK_CODE(code, lino, _exit); } From 1c2e9c18d554affacc458a18e26bcf6f3cd573ba Mon Sep 17 00:00:00 2001 From: Cary Xu Date: Fri, 10 Nov 2023 22:01:28 +0800 Subject: [PATCH 57/79] Update tqSink.c --- source/dnode/vnode/src/tq/tqSink.c | 1 - 1 file changed, 1 deletion(-) diff --git a/source/dnode/vnode/src/tq/tqSink.c b/source/dnode/vnode/src/tq/tqSink.c index 20a72e6a28..c2e48d5d92 100644 --- a/source/dnode/vnode/src/tq/tqSink.c +++ b/source/dnode/vnode/src/tq/tqSink.c @@ -45,7 +45,6 @@ static void setCreateTableMsgTableName(SVCreateTbReq* pCreateTableReq, SSData int32_t tqBuildDeleteReq(STQ* pTq, const char* stbFullName, const SSDataBlock* pDataBlock, SBatchDeleteReq* deleteReq, const char* pIdStr) { - int32_t code = 0; int32_t totalRows = pDataBlock->info.rows; SColumnInfoData* pStartTsCol = taosArrayGet(pDataBlock->pDataBlock, START_TS_COLUMN_INDEX); SColumnInfoData* pEndTsCol = taosArrayGet(pDataBlock->pDataBlock, END_TS_COLUMN_INDEX); From 22808ce1b70efe3e0eb4cf675d8b76b834235773 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Tue, 7 Nov 2023 17:33:30 +0800 Subject: [PATCH 58/79] fix(stream): update the fill-time for quota limitation. --- source/libs/stream/src/streamQueue.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/libs/stream/src/streamQueue.c b/source/libs/stream/src/streamQueue.c index 63ee702ada..11e02e94b9 100644 --- a/source/libs/stream/src/streamQueue.c +++ b/source/libs/stream/src/streamQueue.c @@ -160,7 +160,7 @@ int32_t streamTaskGetDataFromInputQ(SStreamTask* pTask, SStreamQueueItem** pInpu // no available token in bucket for sink task, let's wait for a little bit if (taskLevel == TASK_LEVEL__SINK && (!streamTaskExtractAvailableToken(pTask->outputInfo.pTokenBucket, pTask->id.idStr))) { stDebug("s-task:%s no available token in bucket for sink data, wait for 10ms", id); - taosMsleep(10); +// taosMsleep(10); return TSDB_CODE_SUCCESS; } From 0463c0d7557ddb76591da03781936a45ad8a28b2 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Tue, 7 Nov 2023 17:49:49 +0800 Subject: [PATCH 59/79] refactor: wait for a while when no quota available. --- source/libs/stream/src/streamQueue.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/libs/stream/src/streamQueue.c b/source/libs/stream/src/streamQueue.c index 11e02e94b9..63ee702ada 100644 --- a/source/libs/stream/src/streamQueue.c +++ b/source/libs/stream/src/streamQueue.c @@ -160,7 +160,7 @@ int32_t streamTaskGetDataFromInputQ(SStreamTask* pTask, SStreamQueueItem** pInpu // no available token in bucket for sink task, let's wait for a little bit if (taskLevel == TASK_LEVEL__SINK && (!streamTaskExtractAvailableToken(pTask->outputInfo.pTokenBucket, pTask->id.idStr))) { stDebug("s-task:%s no available token in bucket for sink data, wait for 10ms", id); -// taosMsleep(10); + taosMsleep(10); return TSDB_CODE_SUCCESS; } From 608c72e901c75202ef2ea5b8fa0118f22ceef31e Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Wed, 8 Nov 2023 11:04:16 +0800 Subject: [PATCH 60/79] refactor(stream): create sim env for stream processing. --- source/dnode/vnode/src/tq/tq.c | 38 ++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/source/dnode/vnode/src/tq/tq.c b/source/dnode/vnode/src/tq/tq.c index 1c1a4a192c..9dc941eab1 100644 --- a/source/dnode/vnode/src/tq/tq.c +++ b/source/dnode/vnode/src/tq/tq.c @@ -1925,7 +1925,45 @@ int32_t tqProcessTaskUpdateReq(STQ* pTq, SRpcMsg* pMsg) { streamMetaWUnLock(pMeta); } else { streamMetaWUnLock(pMeta); +#if 0 tqStartStreamTaskAsync(pTq, true); +#else + // For debug purpose. + // the following procedure consume many CPU resource, result in the re-election of leader + // with high probability. So we employ it as a test case for the stream processing framework, with + // checkpoint/restart/nodeUpdate etc. + while (streamMetaTaskInTimer(pMeta)) { + tqDebug("vgId:%d some tasks in timer, wait for 100ms and recheck", pMeta->vgId); + taosMsleep(100); + } + + streamMetaWLock(pMeta); + + int32_t code = streamMetaReopen(pMeta); + if (code != 0) { + tqError("vgId:%d failed to reopen stream meta", vgId); + streamMetaWUnLock(pMeta); + taosArrayDestroy(req.pNodeList); + return -1; + } + + if (streamMetaLoadAllTasks(pTq->pStreamMeta) < 0) { + tqError("vgId:%d failed to load stream tasks", vgId); + streamMetaWUnLock(pMeta); + taosArrayDestroy(req.pNodeList); + return -1; + } + + if (vnodeIsRoleLeader(pTq->pVnode) && !tsDisableStream) { + tqInfo("vgId:%d restart all stream tasks after all tasks being updated", vgId); + tqResetStreamTaskStatus(pTq); + tqStartStreamTaskAsync(pTq, false); + } else { + tqInfo("vgId:%d, follower node not start stream tasks", vgId); + } + + streamMetaWUnLock(pMeta); +#endif } } From 77d6fd5394427123aa06c6373319f8299559cdaf Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Wed, 8 Nov 2023 14:02:08 +0800 Subject: [PATCH 61/79] refactor: do some internal refactor. --- source/dnode/mnode/impl/src/mndStream.c | 2 +- source/dnode/vnode/src/tq/tq.c | 6 ++++-- source/dnode/vnode/src/tq/tqStreamTask.c | 2 +- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/source/dnode/mnode/impl/src/mndStream.c b/source/dnode/mnode/impl/src/mndStream.c index fd0c349dd2..0cdb180645 100644 --- a/source/dnode/mnode/impl/src/mndStream.c +++ b/source/dnode/mnode/impl/src/mndStream.c @@ -2571,7 +2571,7 @@ int32_t doKillActiveCheckpointTrans(SMnode *pMnode) { } if (transId == 0) { - mError("failed to find the checkpoint trans, reset not executed"); + mDebug("failed to find the checkpoint trans, reset not executed"); return TSDB_CODE_SUCCESS; } diff --git a/source/dnode/vnode/src/tq/tq.c b/source/dnode/vnode/src/tq/tq.c index 9dc941eab1..10d42dcbea 100644 --- a/source/dnode/vnode/src/tq/tq.c +++ b/source/dnode/vnode/src/tq/tq.c @@ -1924,6 +1924,8 @@ int32_t tqProcessTaskUpdateReq(STQ* pTq, SRpcMsg* pMsg) { pMeta->startInfo.tasksWillRestart = 0; streamMetaWUnLock(pMeta); } else { + tqDebug("vgId:%d all %d task(s) nodeEp updated and closed", vgId, numOfTasks); + streamMetaWUnLock(pMeta); #if 0 tqStartStreamTaskAsync(pTq, true); @@ -1955,13 +1957,13 @@ int32_t tqProcessTaskUpdateReq(STQ* pTq, SRpcMsg* pMsg) { } if (vnodeIsRoleLeader(pTq->pVnode) && !tsDisableStream) { - tqInfo("vgId:%d restart all stream tasks after all tasks being updated", vgId); + tqInfo("vgId:%d start all stream tasks after all being updated", vgId); tqResetStreamTaskStatus(pTq); tqStartStreamTaskAsync(pTq, false); } else { tqInfo("vgId:%d, follower node not start stream tasks", vgId); } - + taosArrayDestroy(req.pNodeList); streamMetaWUnLock(pMeta); #endif } diff --git a/source/dnode/vnode/src/tq/tqStreamTask.c b/source/dnode/vnode/src/tq/tqStreamTask.c index 26849f8578..0531557cc1 100644 --- a/source/dnode/vnode/src/tq/tqStreamTask.c +++ b/source/dnode/vnode/src/tq/tqStreamTask.c @@ -299,7 +299,7 @@ int32_t tqResetStreamTaskStatus(STQ* pTq) { int32_t vgId = TD_VID(pTq->pVnode); int32_t numOfTasks = taosArrayGetSize(pMeta->pTaskList); - tqDebug("vgId:%d start all %d stream task(s)", vgId, numOfTasks); + tqDebug("vgId:%d reset all %d stream task(s) status to be uninit", vgId, numOfTasks); if (numOfTasks == 0) { return TSDB_CODE_SUCCESS; } From 801b211cd87c6068c30a4d4a283d83432f4aca56 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Wed, 8 Nov 2023 14:29:40 +0800 Subject: [PATCH 62/79] refactor:do some internal refactor. --- cmake/cmake.define | 10 +++++++++- include/os/osEnv.h | 3 ++- include/os/osSysinfo.h | 2 +- source/common/src/tglobal.c | 15 ++++++++------- source/libs/function/src/detail/tavgfunction.c | 2 +- source/libs/function/src/detail/tminmax.c | 10 +++++----- source/os/src/osEnv.c | 3 ++- source/os/src/osSysinfo.c | 11 +++-------- source/util/src/tcompression.c | 4 ++-- 9 files changed, 33 insertions(+), 27 deletions(-) diff --git a/cmake/cmake.define b/cmake/cmake.define index 3343798686..56b6b7e1de 100644 --- a/cmake/cmake.define +++ b/cmake/cmake.define @@ -149,6 +149,8 @@ ELSE () CHECK_C_COMPILER_FLAG("-mfma" COMPILER_SUPPORT_FMA) CHECK_C_COMPILER_FLAG("-mavx" COMPILER_SUPPORT_AVX) CHECK_C_COMPILER_FLAG("-mavx2" COMPILER_SUPPORT_AVX2) + CHECK_C_COMPILER_FLAG("-mavx512f" COMPILER_SUPPORT_AVX512F) + CHECK_C_COMPILER_FLAG("-mavx512vbmi" COMPILER_SUPPORT_AVX512BMI) IF (COMPILER_SUPPORT_SSE42) SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -msse4.2") @@ -168,7 +170,13 @@ ELSE () SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mavx2") SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx2") ENDIF() - MESSAGE(STATUS "SIMD instructions (FMA/AVX/AVX2) is ACTIVATED") + MESSAGE(STATUS "SIMD instructions (FMA/AVX/AVX2/AVX512) is ACTIVATED") + + IF (COMPILER_SUPPORT_AVX512F AND COMPILER_SUPPORT_AVX512BMI) + SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mavx512f -mavx512vbmi") + SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx512f -mavx512vbmi") + MESSAGE(STATUS "avx512 supported by gcc") + ENDIF() ENDIF() # build mode diff --git a/include/os/osEnv.h b/include/os/osEnv.h index bc65da47a9..ac4ecd4212 100644 --- a/include/os/osEnv.h +++ b/include/os/osEnv.h @@ -36,11 +36,12 @@ extern int64_t tsStreamMax; extern float tsNumOfCores; extern int64_t tsTotalMemoryKB; extern char *tsProcPath; -extern char tsSIMDBuiltins; +extern char tsSIMDEnable; extern char tsSSE42Enable; extern char tsAVXEnable; extern char tsAVX2Enable; extern char tsFMAEnable; +extern char tsAVX512Enable; extern char tsTagFilterCache; extern char configDir[]; diff --git a/include/os/osSysinfo.h b/include/os/osSysinfo.h index 29b6f07dca..7a1df2b81c 100644 --- a/include/os/osSysinfo.h +++ b/include/os/osSysinfo.h @@ -41,7 +41,7 @@ int32_t taosGetOsReleaseName(char *releaseName, char* sName, char* ver, int32_t int32_t taosGetCpuInfo(char *cpuModel, int32_t maxLen, float *numOfCores); int32_t taosGetCpuCores(float *numOfCores, bool physical); void taosGetCpuUsage(double *cpu_system, double *cpu_engine); -int32_t taosGetCpuInstructions(char* sse42, char* avx, char* avx2, char* fma); +int32_t taosGetCpuInstructions(char* sse42, char* avx, char* avx2, char* fma, char* avx512); int32_t taosGetTotalMemory(int64_t *totalKB); int32_t taosGetProcMemory(int64_t *usedKB); int32_t taosGetSysMemory(int64_t *usedKB); diff --git a/source/common/src/tglobal.c b/source/common/src/tglobal.c index cb67fc1ba3..d12ebb13c2 100644 --- a/source/common/src/tglobal.c +++ b/source/common/src/tglobal.c @@ -511,12 +511,13 @@ static int32_t taosAddSystemCfg(SConfig *pCfg) { if (cfgAddBool(pCfg, "enableCoreFile", 1, CFG_SCOPE_BOTH, CFG_DYN_CLIENT) != 0) return -1; if (cfgAddFloat(pCfg, "numOfCores", tsNumOfCores, 1, 100000, CFG_SCOPE_BOTH, CFG_DYN_NONE) != 0) return -1; - if (cfgAddBool(pCfg, "ssd42", tsSSE42Enable, CFG_SCOPE_BOTH, CFG_DYN_NONE) != 0) return -1; - if (cfgAddBool(pCfg, "avx", tsAVXEnable, CFG_SCOPE_BOTH, CFG_DYN_NONE) != 0) return -1; - if (cfgAddBool(pCfg, "avx2", tsAVX2Enable, CFG_SCOPE_BOTH, CFG_DYN_NONE) != 0) return -1; - if (cfgAddBool(pCfg, "fma", tsFMAEnable, CFG_SCOPE_BOTH, CFG_DYN_NONE) != 0) return -1; - if (cfgAddBool(pCfg, "simdEnable", tsSIMDBuiltins, CFG_SCOPE_BOTH, CFG_DYN_NONE) != 0) return -1; - if (cfgAddBool(pCfg, "tagFilterCache", tsTagFilterCache, CFG_SCOPE_BOTH, CFG_DYN_NONE) != 0) return -1; + if (cfgAddBool(pCfg, "ssd42", tsSSE42Enable, CFG_SCOPE_BOTH) != 0) return -1; + if (cfgAddBool(pCfg, "avx", tsAVXEnable, CFG_SCOPE_BOTH) != 0) return -1; + if (cfgAddBool(pCfg, "avx2", tsAVX2Enable, CFG_SCOPE_BOTH) != 0) return -1; + if (cfgAddBool(pCfg, "fma", tsFMAEnable, CFG_SCOPE_BOTH) != 0) return -1; + if (cfgAddBool(pCfg, "avx512", tsAVX512Enable, CFG_SCOPE_BOTH) != 0) return -1; + if (cfgAddBool(pCfg, "simdEnable", tsSIMDEnable, CFG_SCOPE_BOTH) != 0) return -1; + if (cfgAddBool(pCfg, "tagFilterCache", tsTagFilterCache, CFG_SCOPE_BOTH) != 0) return -1; if (cfgAddInt64(pCfg, "openMax", tsOpenMax, 0, INT64_MAX, CFG_SCOPE_BOTH, CFG_DYN_NONE) != 0) return -1; #if !defined(_ALPINE) @@ -1080,7 +1081,7 @@ static int32_t taosSetServerCfg(SConfig *pCfg) { tsNumOfSnodeWriteThreads = cfgGetItem(pCfg, "numOfSnodeUniqueThreads")->i32; tsRpcQueueMemoryAllowed = cfgGetItem(pCfg, "rpcQueueMemoryAllowed")->i64; - tsSIMDBuiltins = (bool)cfgGetItem(pCfg, "simdEnable")->bval; + tsSIMDEnable = (bool)cfgGetItem(pCfg, "simdEnable")->bval; tsTagFilterCache = (bool)cfgGetItem(pCfg, "tagFilterCache")->bval; tsEnableMonitor = cfgGetItem(pCfg, "monitor")->bval; diff --git a/source/libs/function/src/detail/tavgfunction.c b/source/libs/function/src/detail/tavgfunction.c index 50df1b5067..e626c937da 100644 --- a/source/libs/function/src/detail/tavgfunction.c +++ b/source/libs/function/src/detail/tavgfunction.c @@ -565,7 +565,7 @@ int32_t avgFunction(SqlFunctionCtx* pCtx) { numOfElem = pInput->numOfRows; pAvgRes->count += pInput->numOfRows; - bool simdAvailable = tsAVXEnable && tsSIMDBuiltins && (numOfRows > THRESHOLD_SIZE); + bool simdAvailable = tsAVXEnable && tsSIMDEnable && (numOfRows > THRESHOLD_SIZE); switch(type) { case TSDB_DATA_TYPE_UTINYINT: diff --git a/source/libs/function/src/detail/tminmax.c b/source/libs/function/src/detail/tminmax.c index 3ca1c06303..a6c91a57ce 100644 --- a/source/libs/function/src/detail/tminmax.c +++ b/source/libs/function/src/detail/tminmax.c @@ -370,7 +370,7 @@ static int32_t findFirstValPosition(const SColumnInfoData* pCol, int32_t start, static void handleInt8Col(const void* data, int32_t start, int32_t numOfRows, SMinmaxResInfo* pBuf, bool isMinFunc, bool signVal) { // AVX2 version to speedup the loop - if (tsAVX2Enable && tsSIMDBuiltins) { + if (tsAVX2Enable && tsSIMDEnable) { pBuf->v = i8VectorCmpAVX2(data, numOfRows, isMinFunc, signVal); } else { if (!pBuf->assign) { @@ -404,7 +404,7 @@ static void handleInt8Col(const void* data, int32_t start, int32_t numOfRows, SM static void handleInt16Col(const void* data, int32_t start, int32_t numOfRows, SMinmaxResInfo* pBuf, bool isMinFunc, bool signVal) { // AVX2 version to speedup the loop - if (tsAVX2Enable && tsSIMDBuiltins) { + if (tsAVX2Enable && tsSIMDEnable) { pBuf->v = i16VectorCmpAVX2(data, numOfRows, isMinFunc, signVal); } else { if (!pBuf->assign) { @@ -438,7 +438,7 @@ static void handleInt16Col(const void* data, int32_t start, int32_t numOfRows, S static void handleInt32Col(const void* data, int32_t start, int32_t numOfRows, SMinmaxResInfo* pBuf, bool isMinFunc, bool signVal) { // AVX2 version to speedup the loop - if (tsAVX2Enable && tsSIMDBuiltins) { + if (tsAVX2Enable && tsSIMDEnable) { pBuf->v = i32VectorCmpAVX2(data, numOfRows, isMinFunc, signVal); } else { if (!pBuf->assign) { @@ -502,7 +502,7 @@ static void handleFloatCol(SColumnInfoData* pCol, int32_t start, int32_t numOfRo float* val = (float*)&pBuf->v; // AVX version to speedup the loop - if (tsAVXEnable && tsSIMDBuiltins) { + if (tsAVXEnable && tsSIMDEnable) { *val = floatVectorCmpAVX(pData, numOfRows, isMinFunc); } else { if (!pBuf->assign) { @@ -533,7 +533,7 @@ static void handleDoubleCol(SColumnInfoData* pCol, int32_t start, int32_t numOfR double* val = (double*)&pBuf->v; // AVX version to speedup the loop - if (tsAVXEnable && tsSIMDBuiltins) { + if (tsAVXEnable && tsSIMDEnable) { *val = (double)doubleVectorCmpAVX(pData, numOfRows, isMinFunc); } else { if (!pBuf->assign) { diff --git a/source/os/src/osEnv.c b/source/os/src/osEnv.c index 0fc136c693..54107db325 100644 --- a/source/os/src/osEnv.c +++ b/source/os/src/osEnv.c @@ -37,11 +37,12 @@ float tsNumOfCores = 0; int64_t tsTotalMemoryKB = 0; char *tsProcPath = NULL; -char tsSIMDBuiltins = 0; +char tsSIMDEnable = 0; char tsSSE42Enable = 0; char tsAVXEnable = 0; char tsAVX2Enable = 0; char tsFMAEnable = 0; +char tsAVX512Enable = 0; void osDefaultInit() { taosSeedRand(taosSafeRand()); diff --git a/source/os/src/osSysinfo.c b/source/os/src/osSysinfo.c index 4816ec8f8b..fea7a4f63d 100644 --- a/source/os/src/osSysinfo.c +++ b/source/os/src/osSysinfo.c @@ -250,7 +250,7 @@ void taosGetSystemInfo() { taosGetCpuCores(&tsNumOfCores, false); taosGetTotalMemory(&tsTotalMemoryKB); taosGetCpuUsage(NULL, NULL); - taosGetCpuInstructions(&tsSSE42Enable, &tsAVXEnable, &tsAVX2Enable, &tsFMAEnable); + taosGetCpuInstructions(&tsSSE42Enable, &tsAVXEnable, &tsAVX2Enable, &tsFMAEnable, &tsAVX512Enable); #endif } @@ -602,7 +602,7 @@ void taosGetCpuUsage(double *cpu_system, double *cpu_engine) { : "0"(level)) // todo add for windows and mac -int32_t taosGetCpuInstructions(char* sse42, char* avx, char* avx2, char* fma) { +int32_t taosGetCpuInstructions(char* sse42, char* avx, char* avx2, char* fma, char* avx512) { #ifdef WINDOWS #elif defined(_TD_DARWIN_64) #else @@ -610,12 +610,6 @@ int32_t taosGetCpuInstructions(char* sse42, char* avx, char* avx2, char* fma) { #ifdef _TD_X86_ // Since the compiler is not support avx/avx2 instructions, the global variables always need to be // set to be false -//#if __AVX__ || __AVX2__ -// tsSIMDBuiltins = true; -//#else -// tsSIMDBuiltins = false; -//#endif - uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0; int32_t ret = __get_cpuid(1, &eax, &ebx, &ecx, &edx); @@ -631,6 +625,7 @@ int32_t taosGetCpuInstructions(char* sse42, char* avx, char* avx2, char* fma) { // Ref to https://gcc.gnu.org/bugzilla/show_bug.cgi?id=77756 __cpuid_fix(7u, eax, ebx, ecx, edx); *avx2 = (char) ((ebx & bit_AVX2) == bit_AVX2); + *avx512 = (char)((ebx & bit_AVX512F) == bit_AVX512F); #endif // _TD_X86_ #endif diff --git a/source/util/src/tcompression.c b/source/util/src/tcompression.c index 3fc3ef6be6..dc89a24180 100644 --- a/source/util/src/tcompression.c +++ b/source/util/src/tcompression.c @@ -283,7 +283,7 @@ int32_t tsDecompressINTImp(const char *const input, const int32_t nelements, cha int32_t batch = num >> 2; int32_t remain = num & 0x03; if (selector == 0 || selector == 1) { - if (tsAVX2Enable && tsSIMDBuiltins) { + if (tsAVX2Enable && tsSIMDEnable) { for (int32_t i = 0; i < batch; ++i) { __m256i prev = _mm256_set1_epi64x(prev_value); _mm256_storeu_si256((__m256i *)&p[_pos], prev); @@ -300,7 +300,7 @@ int32_t tsDecompressINTImp(const char *const input, const int32_t nelements, cha } } } else { - if (tsAVX2Enable && tsSIMDBuiltins) { + if (tsAVX2Enable && tsSIMDEnable) { __m256i base = _mm256_set1_epi64x(w); __m256i maskVal = _mm256_set1_epi64x(mask); From 45ab92a02da7a355ae1227b9f7570e6d25b11dee Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Thu, 9 Nov 2023 10:10:37 +0800 Subject: [PATCH 63/79] fix(stream): remove invalid free. --- source/dnode/vnode/src/tq/tq.c | 1 - source/dnode/vnode/src/tq/tqStreamTask.c | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/source/dnode/vnode/src/tq/tq.c b/source/dnode/vnode/src/tq/tq.c index 10d42dcbea..b3a359a683 100644 --- a/source/dnode/vnode/src/tq/tq.c +++ b/source/dnode/vnode/src/tq/tq.c @@ -1963,7 +1963,6 @@ int32_t tqProcessTaskUpdateReq(STQ* pTq, SRpcMsg* pMsg) { } else { tqInfo("vgId:%d, follower node not start stream tasks", vgId); } - taosArrayDestroy(req.pNodeList); streamMetaWUnLock(pMeta); #endif } diff --git a/source/dnode/vnode/src/tq/tqStreamTask.c b/source/dnode/vnode/src/tq/tqStreamTask.c index 0531557cc1..e578638e9d 100644 --- a/source/dnode/vnode/src/tq/tqStreamTask.c +++ b/source/dnode/vnode/src/tq/tqStreamTask.c @@ -448,7 +448,7 @@ bool doPutDataIntoInputQFromWal(SStreamTask* pTask, int64_t maxVer, int32_t* num numOfNewItems += 1; int64_t ver = walReaderGetCurrentVer(pTask->exec.pWalReader); pTask->chkInfo.nextProcessVer = ver; - tqDebug("s-task:%s set the ver:%" PRId64 " from WALReader after extract block from WAL", id, ver); + tqDebug("s-task:%s set ver:%" PRId64 " for reader after extract data from WAL", id, ver); bool itemInFillhistory = handleFillhistoryScanComplete(pTask, ver); if (itemInFillhistory) { From 97772e9aabdb06ead7d9c52efdaed330d4e208cf Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Thu, 9 Nov 2023 17:11:37 +0800 Subject: [PATCH 64/79] fix(stream): the checkpoint version can only be updated when generating checkpoint. --- include/libs/stream/tstream.h | 1 + source/libs/stream/src/streamCheckpoint.c | 5 ++++- source/libs/stream/src/streamExec.c | 17 ++++++++++------- source/libs/stream/src/streamTask.c | 6 ++++-- 4 files changed, 19 insertions(+), 10 deletions(-) diff --git a/include/libs/stream/tstream.h b/include/libs/stream/tstream.h index eab3ecf04e..9d32912ece 100644 --- a/include/libs/stream/tstream.h +++ b/include/libs/stream/tstream.h @@ -304,6 +304,7 @@ typedef struct SCheckpointInfo { int64_t startTs; int64_t checkpointId; int64_t checkpointVer; // latest checkpointId version + int64_t processedVer; // already processed ver, that has generated results version. int64_t nextProcessVer; // current offset in WAL, not serialize it int64_t failedId; // record the latest failed checkpoint id } SCheckpointInfo; diff --git a/source/libs/stream/src/streamCheckpoint.c b/source/libs/stream/src/streamCheckpoint.c index 81840aaeb7..48b6486e05 100644 --- a/source/libs/stream/src/streamCheckpoint.c +++ b/source/libs/stream/src/streamCheckpoint.c @@ -297,9 +297,12 @@ int32_t streamSaveAllTaskStatus(SStreamMeta* pMeta, int64_t checkpointId) { continue; } - ASSERT(p->chkInfo.checkpointId < p->checkpointingId && p->checkpointingId == checkpointId); + ASSERT(p->chkInfo.checkpointId < p->checkpointingId && p->checkpointingId == checkpointId && + p->chkInfo.checkpointVer <= p->chkInfo.processedVer); p->chkInfo.checkpointId = p->checkpointingId; + p->chkInfo.checkpointVer = p->chkInfo.processedVer; + streamTaskClearCheckInfo(p); char* str = NULL; diff --git a/source/libs/stream/src/streamExec.c b/source/libs/stream/src/streamExec.c index a6101b0932..cae537a860 100644 --- a/source/libs/stream/src/streamExec.c +++ b/source/libs/stream/src/streamExec.c @@ -593,7 +593,7 @@ int32_t streamExecForAll(SStreamTask* pTask) { const SStreamQueueItem* pItem = pInput; stDebug("s-task:%s start to process batch of blocks, num:%d, type:%d", id, numOfBlocks, pItem->type); - int64_t ver = pTask->chkInfo.checkpointVer; + int64_t ver = pTask->chkInfo.processedVer; doSetStreamInputBlock(pTask, pInput, &ver, id); int64_t resSize = 0; @@ -604,13 +604,16 @@ int32_t streamExecForAll(SStreamTask* pTask) { stDebug("s-task:%s batch of input blocks exec end, elapsed time:%.2fs, result size:%.2fMiB, numOfBlocks:%d", id, el, SIZE_IN_MiB(resSize), totalBlocks); - // update the currentVer if processing the submit blocks. - ASSERT(pTask->chkInfo.checkpointVer <= pTask->chkInfo.nextProcessVer && ver >= pTask->chkInfo.checkpointVer); + SCheckpointInfo* pInfo = &pTask->chkInfo; - if (ver != pTask->chkInfo.checkpointVer) { - stDebug("s-task:%s update checkpointVer(unsaved) from %" PRId64 " to %" PRId64 ", nextProcessVer:%" PRId64, - pTask->id.idStr, pTask->chkInfo.checkpointVer, ver, pTask->chkInfo.nextProcessVer); - pTask->chkInfo.checkpointVer = ver; + // update the currentVer if processing the submit blocks. + ASSERT(pInfo->checkpointVer <= pInfo->nextProcessVer && ver >= pInfo->checkpointVer); + + if (ver != pInfo->processedVer) { + stDebug("s-task:%s update processedVer(unsaved) from %" PRId64 " to %" PRId64 " nextProcessVer:%" PRId64 + " ckpt:%" PRId64, + pTask->id.idStr, pInfo->processedVer, ver, pInfo->nextProcessVer, pInfo->checkpointVer); + pInfo->processedVer = ver; } streamFreeQitem(pInput); diff --git a/source/libs/stream/src/streamTask.c b/source/libs/stream/src/streamTask.c index a7fb590d1b..24228c0307 100644 --- a/source/libs/stream/src/streamTask.c +++ b/source/libs/stream/src/streamTask.c @@ -431,8 +431,10 @@ int32_t streamTaskInit(SStreamTask* pTask, SStreamMeta* pMeta, SMsgCb* pMsgCb, i pTask->outputq.status = TASK_OUTPUT_STATUS__NORMAL; pTask->pMeta = pMeta; - pTask->chkInfo.checkpointVer = ver - 1; - pTask->chkInfo.nextProcessVer = ver; + pTask->chkInfo.checkpointVer = ver - 1; // only update when generating checkpoint + pTask->chkInfo.processedVer = ver - 1; // already processed version + + pTask->chkInfo.nextProcessVer = ver; // next processed version pTask->dataRange.range.maxVer = ver; pTask->dataRange.range.minVer = ver; pTask->pMsgCb = pMsgCb; From e1de1de4214e6f28122a172ffa03f4be8a5f1d24 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Thu, 9 Nov 2023 18:14:27 +0800 Subject: [PATCH 65/79] fix(stream): fix the invalid free. --- source/libs/stream/src/streamData.c | 9 ++++----- source/libs/stream/src/streamQueue.c | 10 ++++------ source/libs/stream/src/streamStart.c | 1 - 3 files changed, 8 insertions(+), 12 deletions(-) diff --git a/source/libs/stream/src/streamData.c b/source/libs/stream/src/streamData.c index 80927b36b9..f6ec6e9fdb 100644 --- a/source/libs/stream/src/streamData.c +++ b/source/libs/stream/src/streamData.c @@ -129,6 +129,7 @@ SStreamDataSubmit* streamDataSubmitNew(SPackedData* pData, int32_t type) { void streamDataSubmitDestroy(SStreamDataSubmit* pDataSubmit) { ASSERT(pDataSubmit->type == STREAM_INPUT__DATA_SUBMIT); taosMemoryFree(pDataSubmit->submit.msgStr); + taosFreeQitem(pDataSubmit); } SStreamMergedSubmit* streamMergedSubmitNew() { @@ -208,12 +209,10 @@ void streamFreeQitem(SStreamQueueItem* data) { if (type == STREAM_INPUT__GET_RES) { blockDataDestroy(((SStreamTrigger*)data)->pBlock); taosFreeQitem(data); - } else if (type == STREAM_INPUT__DATA_BLOCK || type == STREAM_INPUT__DATA_RETRIEVE || type == STREAM_INPUT__TRANS_STATE) { - taosArrayDestroyEx(((SStreamDataBlock*)data)->blocks, (FDelete)blockDataFreeRes); - taosFreeQitem(data); + } else if (type == STREAM_INPUT__DATA_BLOCK || type == STREAM_INPUT__DATA_RETRIEVE) { + destroyStreamDataBlock((SStreamDataBlock*)data); } else if (type == STREAM_INPUT__DATA_SUBMIT) { streamDataSubmitDestroy((SStreamDataSubmit*)data); - taosFreeQitem(data); } else if (type == STREAM_INPUT__MERGED_SUBMIT) { SStreamMergedSubmit* pMerge = (SStreamMergedSubmit*)data; @@ -228,7 +227,7 @@ void streamFreeQitem(SStreamQueueItem* data) { SStreamRefDataBlock* pRefBlock = (SStreamRefDataBlock*)data; blockDataDestroy(pRefBlock->pBlock); taosFreeQitem(pRefBlock); - } else if (type == STREAM_INPUT__CHECKPOINT || type == STREAM_INPUT__CHECKPOINT_TRIGGER) { + } else if (type == STREAM_INPUT__CHECKPOINT || type == STREAM_INPUT__CHECKPOINT_TRIGGER || type == STREAM_INPUT__TRANS_STATE) { SStreamDataBlock* pBlock = (SStreamDataBlock*) data; taosArrayDestroyEx(pBlock->blocks, freeItems); taosFreeQitem(pBlock); diff --git a/source/libs/stream/src/streamQueue.c b/source/libs/stream/src/streamQueue.c index 63ee702ada..556de169b4 100644 --- a/source/libs/stream/src/streamQueue.c +++ b/source/libs/stream/src/streamQueue.c @@ -270,7 +270,6 @@ int32_t streamTaskPutDataIntoInputQ(SStreamTask* pTask, SStreamQueueItem* pItem) "s-task:%s inputQ is full, capacity(size:%d num:%dMiB), current(blocks:%d, size:%.2fMiB) stop to push data", pTask->id.idStr, STREAM_TASK_QUEUE_CAPACITY, STREAM_TASK_QUEUE_CAPACITY_IN_SIZE, total, size); streamDataSubmitDestroy(px); - taosFreeQitem(pItem); return -1; } @@ -280,7 +279,6 @@ int32_t streamTaskPutDataIntoInputQ(SStreamTask* pTask, SStreamQueueItem* pItem) int32_t code = taosWriteQitem(pQueue, pItem); if (code != TSDB_CODE_SUCCESS) { streamDataSubmitDestroy(px); - taosFreeQitem(pItem); return code; } @@ -296,13 +294,13 @@ int32_t streamTaskPutDataIntoInputQ(SStreamTask* pTask, SStreamQueueItem* pItem) stTrace("s-task:%s input queue is full, capacity:%d size:%d MiB, current(blocks:%d, size:%.2fMiB) abort", pTask->id.idStr, STREAM_TASK_QUEUE_CAPACITY, STREAM_TASK_QUEUE_CAPACITY_IN_SIZE, total, size); - destroyStreamDataBlock((SStreamDataBlock*)pItem); + streamFreeQitem(pItem); return -1; } int32_t code = taosWriteQitem(pQueue, pItem); if (code != TSDB_CODE_SUCCESS) { - destroyStreamDataBlock((SStreamDataBlock*)pItem); + streamFreeQitem(pItem); return code; } @@ -312,7 +310,7 @@ int32_t streamTaskPutDataIntoInputQ(SStreamTask* pTask, SStreamQueueItem* pItem) type == STREAM_INPUT__TRANS_STATE) { int32_t code = taosWriteQitem(pQueue, pItem); if (code != TSDB_CODE_SUCCESS) { - taosFreeQitem(pItem); + streamFreeQitem(pItem); return code; } @@ -323,7 +321,7 @@ int32_t streamTaskPutDataIntoInputQ(SStreamTask* pTask, SStreamQueueItem* pItem) // use the default memory limit, refactor later. int32_t code = taosWriteQitem(pQueue, pItem); if (code != TSDB_CODE_SUCCESS) { - taosFreeQitem(pItem); + streamFreeQitem(pItem); return code; } diff --git a/source/libs/stream/src/streamStart.c b/source/libs/stream/src/streamStart.c index e672b256da..da4aa02e9c 100644 --- a/source/libs/stream/src/streamStart.c +++ b/source/libs/stream/src/streamStart.c @@ -562,7 +562,6 @@ int32_t streamTaskPutTranstateIntoInputQ(SStreamTask* pTask) { taosMemoryFree(pBlock); if (streamTaskPutDataIntoInputQ(pTask, (SStreamQueueItem*)pTranstate) < 0) { - taosFreeQitem(pTranstate); return TSDB_CODE_OUT_OF_MEMORY; } From 4d9b4228742f008adb18f1f67e2ea93de8198e8a Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Thu, 9 Nov 2023 23:26:09 +0800 Subject: [PATCH 66/79] fix(stream): check for the repeatedly sent checkpoint-source msg. --- source/dnode/vnode/src/tq/tq.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/source/dnode/vnode/src/tq/tq.c b/source/dnode/vnode/src/tq/tq.c index b3a359a683..438734e191 100644 --- a/source/dnode/vnode/src/tq/tq.c +++ b/source/dnode/vnode/src/tq/tq.c @@ -1668,7 +1668,7 @@ int32_t tqProcessTaskCheckPointSourceReq(STQ* pTq, SRpcMsg* pMsg, SRpcMsg* pRsp) SStreamCheckpointSourceReq req = {0}; if (!vnodeIsRoleLeader(pTq->pVnode)) { - tqDebug("vgId:%d not leader, ignore checkpoint-source msg", vgId); + tqDebug("vgId:%d not leader, ignore checkpoint-source msg, s-task:0x%x", vgId, req.taskId); SRpcMsg rsp = {0}; buildCheckpointSourceRsp(&req, &pMsg->info, &rsp, 0); tmsgSendRsp(&rsp); // error occurs @@ -1676,7 +1676,7 @@ int32_t tqProcessTaskCheckPointSourceReq(STQ* pTq, SRpcMsg* pMsg, SRpcMsg* pRsp) } if (!pTq->pVnode->restored) { - tqDebug("vgId:%d checkpoint-source msg received during restoring, ignore it", vgId); + tqDebug("vgId:%d checkpoint-source msg received during restoring, s-task:0x%x ignore it", vgId, req.taskId); SRpcMsg rsp = {0}; buildCheckpointSourceRsp(&req, &pMsg->info, &rsp, 0); tmsgSendRsp(&rsp); // error occurs @@ -1696,7 +1696,6 @@ int32_t tqProcessTaskCheckPointSourceReq(STQ* pTq, SRpcMsg* pMsg, SRpcMsg* pRsp) } tDecoderClear(&decoder); - // todo handle failure to reset from checkpoint procedure SStreamTask* pTask = streamMetaAcquireTask(pMeta, req.streamId, req.taskId); if (pTask == NULL) { tqError("vgId:%d failed to find s-task:0x%x, ignore checkpoint msg. it may have been destroyed already", vgId, @@ -1707,7 +1706,6 @@ int32_t tqProcessTaskCheckPointSourceReq(STQ* pTq, SRpcMsg* pMsg, SRpcMsg* pRsp) return TSDB_CODE_SUCCESS; } - // todo handle failure to reset from checkpoint procedure // downstream not ready, current the stream tasks are not all ready. Ignore this checkpoint req. if (pTask->status.downstreamReady != 1) { pTask->chkInfo.failedId = req.checkpointId; // record the latest failed checkpoint id @@ -1728,7 +1726,7 @@ int32_t tqProcessTaskCheckPointSourceReq(STQ* pTq, SRpcMsg* pMsg, SRpcMsg* pRsp) ETaskStatus status = streamTaskGetStatus(pTask, NULL); if (status == TASK_STATUS__HALT || status == TASK_STATUS__PAUSE) { - qError("s-task:%s not ready for checkpoint, since it is halt, ignore this checkpoint:%" PRId64 ", set it failure", + tqError("s-task:%s not ready for checkpoint, since it is halt, ignore this checkpoint:%" PRId64 ", set it failure", pTask->id.idStr, req.checkpointId); taosThreadMutexUnlock(&pTask->lock); @@ -1739,6 +1737,18 @@ int32_t tqProcessTaskCheckPointSourceReq(STQ* pTq, SRpcMsg* pMsg, SRpcMsg* pRsp) tmsgSendRsp(&rsp); // error occurs return TSDB_CODE_SUCCESS; } + + // check if the checkpoint msg already sent or not. + if (status == TASK_STATUS__CK) { + ASSERT(pTask->checkpointingId == req.checkpointId); + + tqWarn("s-task:%s recv checkpoint-source msg again checkpointId:%" PRId64 + " already received, ignore this msg and continue process checkpoint", + pTask->id.idStr, pTask->checkpointingId); + streamMetaReleaseTask(pMeta, pTask); + return code; + } + streamProcessCheckpointSourceReq(pTask, &req); taosThreadMutexUnlock(&pTask->lock); From af08a189c140bcc8c03d89ead1087aecee2c47ea Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Fri, 10 Nov 2023 09:24:08 +0800 Subject: [PATCH 67/79] fix(stream): disable concurrently restart stream tasks. --- source/dnode/vnode/src/tq/tq.c | 10 ++++++++++ source/libs/stream/inc/streamBackendRocksdb.h | 2 +- source/libs/stream/src/streamBackendRocksdb.c | 6 +++--- source/libs/stream/src/streamMeta.c | 7 ++++--- 4 files changed, 18 insertions(+), 7 deletions(-) diff --git a/source/dnode/vnode/src/tq/tq.c b/source/dnode/vnode/src/tq/tq.c index 438734e191..dafd3aaa4a 100644 --- a/source/dnode/vnode/src/tq/tq.c +++ b/source/dnode/vnode/src/tq/tq.c @@ -1944,6 +1944,16 @@ int32_t tqProcessTaskUpdateReq(STQ* pTq, SRpcMsg* pMsg) { // the following procedure consume many CPU resource, result in the re-election of leader // with high probability. So we employ it as a test case for the stream processing framework, with // checkpoint/restart/nodeUpdate etc. + while(1) { + int32_t startVal = atomic_val_compare_exchange_32(&pMeta->startInfo.taskStarting, 0, 1); + if (startVal == 0) { + break; + } + + tqDebug("vgId:%d in start stream tasks procedure, wait for 500ms and recheck", vgId); + taosMsleep(500); + } + while (streamMetaTaskInTimer(pMeta)) { tqDebug("vgId:%d some tasks in timer, wait for 100ms and recheck", pMeta->vgId); taosMsleep(100); diff --git a/source/libs/stream/inc/streamBackendRocksdb.h b/source/libs/stream/inc/streamBackendRocksdb.h index b34b3420fe..441c71662e 100644 --- a/source/libs/stream/inc/streamBackendRocksdb.h +++ b/source/libs/stream/inc/streamBackendRocksdb.h @@ -44,7 +44,7 @@ typedef struct { int64_t defaultCfInit; } SBackendWrapper; -void* streamBackendInit(const char* path, int64_t chkpId); +void* streamBackendInit(const char* path, int64_t chkpId, int32_t vgId); void streamBackendCleanup(void* arg); void streamBackendHandleCleanup(void* arg); int32_t streamBackendLoadCheckpointInfo(void* pMeta); diff --git a/source/libs/stream/src/streamBackendRocksdb.c b/source/libs/stream/src/streamBackendRocksdb.c index b22c6c9b0f..63dc497c6f 100644 --- a/source/libs/stream/src/streamBackendRocksdb.c +++ b/source/libs/stream/src/streamBackendRocksdb.c @@ -469,11 +469,11 @@ int32_t rebuildDirFromCheckpoint(const char* path, int64_t chkpId, char** dst) { return 0; } -void* streamBackendInit(const char* streamPath, int64_t chkpId) { +void* streamBackendInit(const char* streamPath, int64_t chkpId, int32_t vgId) { char* backendPath = NULL; int32_t code = rebuildDirFromCheckpoint(streamPath, chkpId, &backendPath); - stDebug("start to init stream backend at %s, checkpointid: %" PRId64 "", backendPath, chkpId); + stDebug("start to init stream backend at %s, checkpointid: %" PRId64 " vgId:%d", backendPath, chkpId, vgId); uint32_t dbMemLimit = nextPow2(tsMaxStreamBackendCache) << 20; SBackendWrapper* pHandle = taosMemoryCalloc(1, sizeof(SBackendWrapper)); @@ -534,7 +534,7 @@ void* streamBackendInit(const char* streamPath, int64_t chkpId) { if (cfs != NULL) { rocksdb_list_column_families_destroy(cfs, nCf); } - stDebug("succ to init stream backend at %s, backend:%p", backendPath, pHandle); + stDebug("succ to init stream backend at %s, backend:%p, vgId:%d", backendPath, pHandle, vgId); taosMemoryFreeClear(backendPath); return (void*)pHandle; diff --git a/source/libs/stream/src/streamMeta.c b/source/libs/stream/src/streamMeta.c index 17cd9fac57..e6bbd89f02 100644 --- a/source/libs/stream/src/streamMeta.c +++ b/source/libs/stream/src/streamMeta.c @@ -195,10 +195,10 @@ SStreamMeta* streamMetaOpen(const char* path, void* ahandle, FTaskExpand expandF taosInitRWLatch(&pMeta->chkpDirLock); pMeta->chkpId = streamGetLatestCheckpointId(pMeta); - pMeta->streamBackend = streamBackendInit(pMeta->path, pMeta->chkpId); + pMeta->streamBackend = streamBackendInit(pMeta->path, pMeta->chkpId, vgId); while (pMeta->streamBackend == NULL) { taosMsleep(100); - pMeta->streamBackend = streamBackendInit(pMeta->path, pMeta->chkpId); + pMeta->streamBackend = streamBackendInit(pMeta->path, pMeta->chkpId, vgId); if (pMeta->streamBackend == NULL) { stInfo("vgId:%d failed to init stream backend, retry in 100ms", pMeta->vgId); } @@ -263,7 +263,8 @@ int32_t streamMetaReopen(SStreamMeta* pMeta) { } } - while ((pMeta->streamBackend = streamBackendInit(pMeta->path, pMeta->chkpId)) == NULL) { + // todo: not wait in a critical region + while ((pMeta->streamBackend = streamBackendInit(pMeta->path, pMeta->chkpId, pMeta->vgId)) == NULL) { stInfo("vgId:%d failed to init stream backend, retry in 100ms", pMeta->vgId); taosMsleep(100); } From cb0d244d5a6e965c96083f8fec41dbc92ef40919 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Fri, 10 Nov 2023 09:35:41 +0800 Subject: [PATCH 68/79] fix(stream): release lock. --- source/dnode/vnode/src/tq/tq.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/source/dnode/vnode/src/tq/tq.c b/source/dnode/vnode/src/tq/tq.c index dafd3aaa4a..ba79844e40 100644 --- a/source/dnode/vnode/src/tq/tq.c +++ b/source/dnode/vnode/src/tq/tq.c @@ -1728,25 +1728,28 @@ int32_t tqProcessTaskCheckPointSourceReq(STQ* pTq, SRpcMsg* pMsg, SRpcMsg* pRsp) if (status == TASK_STATUS__HALT || status == TASK_STATUS__PAUSE) { tqError("s-task:%s not ready for checkpoint, since it is halt, ignore this checkpoint:%" PRId64 ", set it failure", pTask->id.idStr, req.checkpointId); - taosThreadMutexUnlock(&pTask->lock); + taosThreadMutexUnlock(&pTask->lock); streamMetaReleaseTask(pMeta, pTask); SRpcMsg rsp = {0}; buildCheckpointSourceRsp(&req, &pMsg->info, &rsp, 0); tmsgSendRsp(&rsp); // error occurs + return TSDB_CODE_SUCCESS; } // check if the checkpoint msg already sent or not. if (status == TASK_STATUS__CK) { ASSERT(pTask->checkpointingId == req.checkpointId); - tqWarn("s-task:%s recv checkpoint-source msg again checkpointId:%" PRId64 " already received, ignore this msg and continue process checkpoint", pTask->id.idStr, pTask->checkpointingId); + + taosThreadMutexUnlock(&pTask->lock); streamMetaReleaseTask(pMeta, pTask); - return code; + + return TSDB_CODE_SUCCESS; } streamProcessCheckpointSourceReq(pTask, &req); From 5937bdddf58249ee65166d5caacc6da6beffcfaf Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Fri, 10 Nov 2023 09:46:20 +0800 Subject: [PATCH 69/79] refactor:disable test. --- source/dnode/vnode/src/tq/tq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/dnode/vnode/src/tq/tq.c b/source/dnode/vnode/src/tq/tq.c index ba79844e40..065690dbfe 100644 --- a/source/dnode/vnode/src/tq/tq.c +++ b/source/dnode/vnode/src/tq/tq.c @@ -1940,7 +1940,7 @@ int32_t tqProcessTaskUpdateReq(STQ* pTq, SRpcMsg* pMsg) { tqDebug("vgId:%d all %d task(s) nodeEp updated and closed", vgId, numOfTasks); streamMetaWUnLock(pMeta); -#if 0 +#if 1 tqStartStreamTaskAsync(pTq, true); #else // For debug purpose. From 8e5db21c3f76c04c1b71fec5fb282a3bfc932149 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Fri, 10 Nov 2023 10:10:38 +0800 Subject: [PATCH 70/79] fix(test): fix syntax error. --- source/libs/stream/test/tstreamUpdateTest.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/libs/stream/test/tstreamUpdateTest.cpp b/source/libs/stream/test/tstreamUpdateTest.cpp index f63939ac9e..1b999e5fb0 100644 --- a/source/libs/stream/test/tstreamUpdateTest.cpp +++ b/source/libs/stream/test/tstreamUpdateTest.cpp @@ -12,7 +12,7 @@ class StreamStateEnv : public ::testing::Test { protected: virtual void SetUp() { streamMetaInit(); - backend = streamBackendInit(path, 0); + backend = streamBackendInit(path, 0, 0); } virtual void TearDown() { streamMetaCleanup(); From 8925c721e54adedeb446f3a1b5328f822a54630c Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Fri, 10 Nov 2023 10:26:45 +0800 Subject: [PATCH 71/79] fix(stream): adjust critical section. --- source/dnode/vnode/src/tq/tq.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/source/dnode/vnode/src/tq/tq.c b/source/dnode/vnode/src/tq/tq.c index 065690dbfe..3ae0eb1ddf 100644 --- a/source/dnode/vnode/src/tq/tq.c +++ b/source/dnode/vnode/src/tq/tq.c @@ -1939,10 +1939,12 @@ int32_t tqProcessTaskUpdateReq(STQ* pTq, SRpcMsg* pMsg) { } else { tqDebug("vgId:%d all %d task(s) nodeEp updated and closed", vgId, numOfTasks); - streamMetaWUnLock(pMeta); #if 1 tqStartStreamTaskAsync(pTq, true); + streamMetaWUnLock(pMeta); #else + streamMetaWUnLock(pMeta); + // For debug purpose. // the following procedure consume many CPU resource, result in the re-election of leader // with high probability. So we employ it as a test case for the stream processing framework, with From 7a23df4b1aee3fb5595d91f890dd79fb71f9b702 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Fri, 10 Nov 2023 13:36:35 +0800 Subject: [PATCH 72/79] fix(stream): set the correct updated nodeId. --- source/libs/stream/src/streamMeta.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/source/libs/stream/src/streamMeta.c b/source/libs/stream/src/streamMeta.c index e6bbd89f02..042ff1d1d8 100644 --- a/source/libs/stream/src/streamMeta.c +++ b/source/libs/stream/src/streamMeta.c @@ -952,19 +952,20 @@ void metaHbToMnode(void* param, void* tmrId) { taosThreadMutexLock(&(*pTask)->lock); int32_t num = taosArrayGetSize((*pTask)->outputInfo.pDownstreamUpdateList); for (int j = 0; j < num; ++j) { - int32_t* pNodeId = taosArrayGet((*pTask)->outputInfo.pDownstreamUpdateList, j); + SDownstreamTaskEpset* pTaskEpset = taosArrayGet((*pTask)->outputInfo.pDownstreamUpdateList, j); bool exist = false; int32_t numOfExisted = taosArrayGetSize(hbMsg.pUpdateNodes); for (int k = 0; k < numOfExisted; ++k) { - if (*pNodeId == *(int32_t*)taosArrayGet(hbMsg.pUpdateNodes, k)) { + if (pTaskEpset->nodeId == *(int32_t*)taosArrayGet(hbMsg.pUpdateNodes, k)) { exist = true; break; } } if (!exist) { - taosArrayPush(hbMsg.pUpdateNodes, pNodeId); + taosArrayPush(hbMsg.pUpdateNodes, &pTaskEpset->nodeId); + stDebug("vgId:%d nodeId:%d added into the update list", pMeta->vgId, pTaskEpset->nodeId); } } From 5fcef5bd895ba0b570b28c2c5994bb6891c5d5b8 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Fri, 10 Nov 2023 14:36:18 +0800 Subject: [PATCH 73/79] refactor:add some logs. --- include/libs/stream/tstream.h | 2 +- source/dnode/mnode/impl/src/mndStream.c | 1 + source/libs/stream/src/streamMeta.c | 3 ++- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/include/libs/stream/tstream.h b/include/libs/stream/tstream.h index 9d32912ece..4b760f3f4e 100644 --- a/include/libs/stream/tstream.h +++ b/include/libs/stream/tstream.h @@ -461,7 +461,7 @@ typedef struct STaskStartInfo { int32_t taskStarting; // restart flag, sentinel to guard the restart procedure. SHashObj* pReadyTaskSet; // tasks that are all ready for running stream processing SHashObj* pFailedTaskSet; // tasks that are done the check downstream process, may be successful or failed - int32_t elapsedTime; + int64_t elapsedTime; } STaskStartInfo; typedef struct STaskUpdateInfo { diff --git a/source/dnode/mnode/impl/src/mndStream.c b/source/dnode/mnode/impl/src/mndStream.c index 0cdb180645..e589088c35 100644 --- a/source/dnode/mnode/impl/src/mndStream.c +++ b/source/dnode/mnode/impl/src/mndStream.c @@ -2670,6 +2670,7 @@ int32_t mndProcessStreamHb(SRpcMsg *pReq) { doExtractTasksFromStream(pMnode); } + mDebug("%d stream nodes needs updated", (int32_t) taosArrayGetSize(req.pUpdateNodes)); setNodeEpsetExpiredFlag(req.pUpdateNodes); for (int32_t i = 0; i < req.numOfTasks; ++i) { diff --git a/source/libs/stream/src/streamMeta.c b/source/libs/stream/src/streamMeta.c index 042ff1d1d8..fe157aaa24 100644 --- a/source/libs/stream/src/streamMeta.c +++ b/source/libs/stream/src/streamMeta.c @@ -965,7 +965,8 @@ void metaHbToMnode(void* param, void* tmrId) { if (!exist) { taosArrayPush(hbMsg.pUpdateNodes, &pTaskEpset->nodeId); - stDebug("vgId:%d nodeId:%d added into the update list", pMeta->vgId, pTaskEpset->nodeId); + stDebug("vgId:%d nodeId:%d added into the update list, total:%d", pMeta->vgId, pTaskEpset->nodeId, + (int32_t)taosArrayGetSize(hbMsg.pUpdateNodes)); } } From 15430f4d50703d90611ec84e016feba4f8fc92ff Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Fri, 10 Nov 2023 15:30:21 +0800 Subject: [PATCH 74/79] refactor: --- source/dnode/mnode/impl/src/mndStream.c | 11 +++-- source/libs/stream/src/streamMeta.c | 56 ++++++++++++++----------- source/libs/stream/src/streamStart.c | 2 +- 3 files changed, 41 insertions(+), 28 deletions(-) diff --git a/source/dnode/mnode/impl/src/mndStream.c b/source/dnode/mnode/impl/src/mndStream.c index e589088c35..a537b4e501 100644 --- a/source/dnode/mnode/impl/src/mndStream.c +++ b/source/dnode/mnode/impl/src/mndStream.c @@ -2610,16 +2610,18 @@ int32_t mndResetFromCheckpoint(SMnode* pMnode) { int32_t setNodeEpsetExpiredFlag(const SArray* pNodeList) { int32_t num = taosArrayGetSize(pNodeList); + mInfo("set node expired for %d nodes", num); for (int k = 0; k < num; ++k) { int32_t* pVgId = taosArrayGet(pNodeList, k); + mInfo("set node expired for nodeId:%d, total:%d", *pVgId, num); int32_t numOfNodes = taosArrayGetSize(execInfo.pNodeEntryList); for (int i = 0; i < numOfNodes; ++i) { SNodeEntry* pNodeEntry = taosArrayGet(execInfo.pNodeEntryList, i); if (pNodeEntry->nodeId == *pVgId) { - mInfo("vgId:%d expired in stream task, needs update nodeEp", *pVgId); + mInfo("vgId:%d expired for some stream tasks, needs update nodeEp", *pVgId); pNodeEntry->stageUpdated = true; break; } @@ -2670,8 +2672,11 @@ int32_t mndProcessStreamHb(SRpcMsg *pReq) { doExtractTasksFromStream(pMnode); } - mDebug("%d stream nodes needs updated", (int32_t) taosArrayGetSize(req.pUpdateNodes)); - setNodeEpsetExpiredFlag(req.pUpdateNodes); + int32_t numOfUpdated = taosArrayGetSize(req.pUpdateNodes); + if (numOfUpdated > 0) { + mDebug("%d stream nodes needs updated from tasks' report", (int32_t)taosArrayGetSize(req.pUpdateNodes)); + setNodeEpsetExpiredFlag(req.pUpdateNodes); + } for (int32_t i = 0; i < req.numOfTasks; ++i) { STaskStatusEntry *p = taosArrayGet(req.pTaskStatus, i); diff --git a/source/libs/stream/src/streamMeta.c b/source/libs/stream/src/streamMeta.c index fe157aaa24..dfe5729b29 100644 --- a/source/libs/stream/src/streamMeta.c +++ b/source/libs/stream/src/streamMeta.c @@ -854,6 +854,37 @@ static void clearHbMsg(SStreamHbMsg* pMsg, SArray* pIdList) { taosArrayDestroy(pIdList); } +static bool existInHbMsg(SStreamHbMsg* pMsg, SDownstreamTaskEpset* pTaskEpset) { + int32_t numOfExisted = taosArrayGetSize(pMsg->pUpdateNodes); + for (int k = 0; k < numOfExisted; ++k) { + if (pTaskEpset->nodeId == *(int32_t*)taosArrayGet(pMsg->pUpdateNodes, k)) { + return true; + } + } + return false; +} + +static void addUpdateNodeIntoHbMsg(SStreamTask* pTask, SStreamHbMsg* pMsg) { + SStreamMeta* pMeta = pTask->pMeta; + + taosThreadMutexLock(&pTask->lock); + + int32_t num = taosArrayGetSize(pTask->outputInfo.pDownstreamUpdateList); + for (int j = 0; j < num; ++j) { + SDownstreamTaskEpset* pTaskEpset = taosArrayGet(pTask->outputInfo.pDownstreamUpdateList, j); + + bool exist = existInHbMsg(pMsg, pTaskEpset); + if (!exist) { + taosArrayPush(pMsg->pUpdateNodes, &pTaskEpset->nodeId); + stDebug("vgId:%d nodeId:%d added into hb update list, total:%d", pMeta->vgId, pTaskEpset->nodeId, + (int32_t)taosArrayGetSize(pMsg->pUpdateNodes)); + } + } + + taosArrayClear(pTask->outputInfo.pDownstreamUpdateList); + taosThreadMutexUnlock(&pTask->lock); +} + void metaHbToMnode(void* param, void* tmrId) { int64_t rid = *(int64_t*)param; @@ -949,30 +980,7 @@ void metaHbToMnode(void* param, void* tmrId) { walReaderValidVersionRange((*pTask)->exec.pWalReader, &entry.verStart, &entry.verEnd); } - taosThreadMutexLock(&(*pTask)->lock); - int32_t num = taosArrayGetSize((*pTask)->outputInfo.pDownstreamUpdateList); - for (int j = 0; j < num; ++j) { - SDownstreamTaskEpset* pTaskEpset = taosArrayGet((*pTask)->outputInfo.pDownstreamUpdateList, j); - - bool exist = false; - int32_t numOfExisted = taosArrayGetSize(hbMsg.pUpdateNodes); - for (int k = 0; k < numOfExisted; ++k) { - if (pTaskEpset->nodeId == *(int32_t*)taosArrayGet(hbMsg.pUpdateNodes, k)) { - exist = true; - break; - } - } - - if (!exist) { - taosArrayPush(hbMsg.pUpdateNodes, &pTaskEpset->nodeId); - stDebug("vgId:%d nodeId:%d added into the update list, total:%d", pMeta->vgId, pTaskEpset->nodeId, - (int32_t)taosArrayGetSize(hbMsg.pUpdateNodes)); - } - } - - taosArrayClear((*pTask)->outputInfo.pDownstreamUpdateList); - taosThreadMutexUnlock(&(*pTask)->lock); - + addUpdateNodeIntoHbMsg(*pTask, &hbMsg); taosArrayPush(hbMsg.pTaskStatus, &entry); if (!hasMnodeEpset) { epsetAssign(&epset, &(*pTask)->info.mnodeEpset); diff --git a/source/libs/stream/src/streamStart.c b/source/libs/stream/src/streamStart.c index da4aa02e9c..0b2bf6b4ba 100644 --- a/source/libs/stream/src/streamStart.c +++ b/source/libs/stream/src/streamStart.c @@ -1083,7 +1083,7 @@ int32_t streamMetaUpdateTaskDownstreamStatus(SStreamTask* pTask, int64_t startTs int32_t numOfTotal = streamMetaGetNumOfTasks(pMeta); if (taosHashGetSize(pStartInfo->pReadyTaskSet) + taosHashGetSize(pStartInfo->pFailedTaskSet) == numOfTotal) { - pStartInfo->readyTs = pTask->execInfo.start; + pStartInfo->readyTs = taosGetTimestampMs(); pStartInfo->elapsedTime = (pStartInfo->startTs != 0) ? pStartInfo->readyTs - pStartInfo->startTs : 0; stDebug("vgId:%d all %d task(s) check downstream completed, last completed task:%s level:%d, startTs:%" PRId64 From e957e4ad5f5c059ea55f1eec0b4db13cf8ab9aec Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Fri, 10 Nov 2023 15:39:55 +0800 Subject: [PATCH 75/79] refactor: update the node change check duration. --- source/common/src/tglobal.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/common/src/tglobal.c b/source/common/src/tglobal.c index d12ebb13c2..142f7f8078 100644 --- a/source/common/src/tglobal.c +++ b/source/common/src/tglobal.c @@ -240,7 +240,7 @@ int32_t tsTransPullupInterval = 2; int32_t tsMqRebalanceInterval = 2; int32_t tsStreamCheckpointInterval = 60; float tsSinkDataRate = 2.0; -int32_t tsStreamNodeCheckInterval = 30; +int32_t tsStreamNodeCheckInterval = 15; int32_t tsTtlUnit = 86400; int32_t tsTtlPushIntervalSec = 10; int32_t tsTrimVDbIntervalSec = 60 * 60; // interval of trimming db in all vgroups From 3bacd7516e09d32e5df94e3054a706fc6a0884fe Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Fri, 10 Nov 2023 16:24:20 +0800 Subject: [PATCH 76/79] fix(stream): extact stream nodes list if not initialized. --- source/dnode/mnode/impl/src/mndStream.c | 85 ++++++++++++++----------- source/libs/stream/src/streamMeta.c | 2 +- 2 files changed, 48 insertions(+), 39 deletions(-) diff --git a/source/dnode/mnode/impl/src/mndStream.c b/source/dnode/mnode/impl/src/mndStream.c index a537b4e501..0362b328ae 100644 --- a/source/dnode/mnode/impl/src/mndStream.c +++ b/source/dnode/mnode/impl/src/mndStream.c @@ -43,7 +43,7 @@ typedef struct SNodeEntry { } SNodeEntry; typedef struct SStreamExecInfo { - SArray *pNodeEntryList; + SArray *pNodeList; int64_t ts; // snapshot ts int64_t activeCheckpoint; // active check point id SHashObj *pTaskMap; @@ -850,7 +850,7 @@ static int32_t mndProcessCreateStreamReq(SRpcMsg *pReq) { mndTransDrop(pTrans); taosThreadMutexLock(&execInfo.lock); - mDebug("register to stream task node list"); + mDebug("stream tasks register into node list"); keepStreamTasksInBuf(&streamObj, &execInfo); taosThreadMutexUnlock(&execInfo.lock); @@ -1125,6 +1125,15 @@ static const char *mndGetStreamDB(SMnode *pMnode) { return p; } +static int32_t initStreamNodeList(SMnode* pMnode) { + if (execInfo.pNodeList == NULL || (taosArrayGetSize(execInfo.pNodeList) == 0)) { + execInfo.pNodeList = taosArrayDestroy(execInfo.pNodeList); + execInfo.pNodeList = extractNodeListFromStream(pMnode); + } + + return taosArrayGetSize(execInfo.pNodeList); +} + static int32_t mndProcessStreamDoCheckpoint(SRpcMsg *pReq) { SMnode *pMnode = pReq->info.node; SSdb *pSdb = pMnode->pSdb; @@ -1135,22 +1144,18 @@ static int32_t mndProcessStreamDoCheckpoint(SRpcMsg *pReq) { { // check if the node update happens or not int64_t ts = taosGetTimestampSec(); - if (execInfo.pNodeEntryList == NULL || (taosArrayGetSize(execInfo.pNodeEntryList) == 0)) { - if (execInfo.pNodeEntryList != NULL) { - execInfo.pNodeEntryList = taosArrayDestroy(execInfo.pNodeEntryList); - } + taosThreadMutexLock(&execInfo.lock); + int32_t numOfNodes = initStreamNodeList(pMnode); + taosThreadMutexUnlock(&execInfo.lock); - execInfo.pNodeEntryList = extractNodeListFromStream(pMnode); - } - - if (taosArrayGetSize(execInfo.pNodeEntryList) == 0) { + if (numOfNodes == 0) { mDebug("stream task node change checking done, no vgroups exist, do nothing"); execInfo.ts = ts; return 0; } - for(int32_t i = 0; i < taosArrayGetSize(execInfo.pNodeEntryList); ++i) { - SNodeEntry* pNodeEntry = taosArrayGet(execInfo.pNodeEntryList, i); + for(int32_t i = 0; i < numOfNodes; ++i) { + SNodeEntry* pNodeEntry = taosArrayGet(execInfo.pNodeList, i); if (pNodeEntry->stageUpdated) { mDebug("stream task not ready due to node update detected, checkpoint not issued"); return 0; @@ -1165,7 +1170,7 @@ static int32_t mndProcessStreamDoCheckpoint(SRpcMsg *pReq) { return 0; } - SVgroupChangeInfo changeInfo = mndFindChangedNodeInfo(pMnode, execInfo.pNodeEntryList, pNodeSnapshot); + SVgroupChangeInfo changeInfo = mndFindChangedNodeInfo(pMnode, execInfo.pNodeList, pNodeSnapshot); bool nodeUpdated = (taosArrayGetSize(changeInfo.pUpdateNodeList) > 0); taosArrayDestroy(changeInfo.pUpdateNodeList); taosHashCleanup(changeInfo.pDBMap); @@ -2080,20 +2085,21 @@ static SArray *mndTakeVgroupSnapshot(SMnode *pMnode, bool* allReady) { break; } - SNodeEntry entry = {0}; + SNodeEntry entry = {.nodeId = pVgroup->vgId, .hbTimestamp = pVgroup->updateTime}; entry.epset = mndGetVgroupEpset(pMnode, pVgroup); - entry.nodeId = pVgroup->vgId; - entry.hbTimestamp = pVgroup->updateTime; + // if not all ready till now, no need to check the remaining vgroups. if (*allReady) { for (int32_t i = 0; i < pVgroup->replica; ++i) { if (!pVgroup->vnodeGid[i].syncRestore) { + mInfo("vgId:%d not restored, not ready for checkpoint or other operations", pVgroup->vgId); *allReady = false; break; } ESyncState state = pVgroup->vnodeGid[i].syncState; if (state == TAOS_SYNC_STATE_OFFLINE || state == TAOS_SYNC_STATE_ERROR) { + mInfo("vgId:%d offline/err, not ready for checkpoint or other operations", pVgroup->vgId); *allReady = false; break; } @@ -2300,8 +2306,8 @@ int32_t removeExpirednodeEntryAndTask(SArray *pNodeSnapshot) { int32_t size = taosArrayGetSize(pNodeSnapshot); SArray* pValidNodeEntryList = taosArrayInit(4, sizeof(SNodeEntry)); - for(int32_t i = 0; i < taosArrayGetSize(execInfo.pNodeEntryList); ++i) { - SNodeEntry* p = taosArrayGet(execInfo.pNodeEntryList, i); + for(int32_t i = 0; i < taosArrayGetSize(execInfo.pNodeList); ++i) { + SNodeEntry* p = taosArrayGet(execInfo.pNodeList, i); for(int32_t j = 0; j < size; ++j) { SNodeEntry* pEntry = taosArrayGet(pNodeSnapshot, j); @@ -2312,8 +2318,8 @@ int32_t removeExpirednodeEntryAndTask(SArray *pNodeSnapshot) { } } - execInfo.pNodeEntryList = taosArrayDestroy(execInfo.pNodeEntryList); - execInfo.pNodeEntryList = pValidNodeEntryList; + taosArrayDestroy(execInfo.pNodeList); + execInfo.pNodeList = pValidNodeEntryList; mDebug("remain %d valid node entries", (int32_t) taosArrayGetSize(pValidNodeEntryList)); taosArrayDestroy(pRemovedTasks); @@ -2323,6 +2329,7 @@ int32_t removeExpirednodeEntryAndTask(SArray *pNodeSnapshot) { // this function runs by only one thread, so it is not multi-thread safe static int32_t mndProcessNodeCheckReq(SRpcMsg *pMsg) { int32_t code = 0; + int32_t old = atomic_val_compare_exchange_32(&mndNodeCheckSentinel, 0, 1); if (old != 0) { mDebug("still in checking node change"); @@ -2333,23 +2340,21 @@ static int32_t mndProcessNodeCheckReq(SRpcMsg *pMsg) { int64_t ts = taosGetTimestampSec(); SMnode *pMnode = pMsg->info.node; - if (execInfo.pNodeEntryList == NULL || (taosArrayGetSize(execInfo.pNodeEntryList) == 0)) { - if (execInfo.pNodeEntryList != NULL) { - execInfo.pNodeEntryList = taosArrayDestroy(execInfo.pNodeEntryList); - } - execInfo.pNodeEntryList = extractNodeListFromStream(pMnode); - } - if (taosArrayGetSize(execInfo.pNodeEntryList) == 0) { + taosThreadMutexLock(&execInfo.lock); + int32_t numOfNodes = initStreamNodeList(pMnode); + taosThreadMutexUnlock(&execInfo.lock); + + if (numOfNodes == 0) { mDebug("end to do stream task node change checking, no vgroup exists, do nothing"); execInfo.ts = ts; atomic_store_32(&mndNodeCheckSentinel, 0); return 0; } - bool allVnodeReady = true; - SArray *pNodeSnapshot = mndTakeVgroupSnapshot(pMnode, &allVnodeReady); - if (!allVnodeReady) { + bool allVgroupsReady = true; + SArray *pNodeSnapshot = mndTakeVgroupSnapshot(pMnode, &allVgroupsReady); + if (!allVgroupsReady) { taosArrayDestroy(pNodeSnapshot); atomic_store_32(&mndNodeCheckSentinel, 0); mWarn("not all vnodes are ready, ignore the exec nodeUpdate check"); @@ -2359,7 +2364,7 @@ static int32_t mndProcessNodeCheckReq(SRpcMsg *pMsg) { taosThreadMutexLock(&execInfo.lock); removeExpirednodeEntryAndTask(pNodeSnapshot); - SVgroupChangeInfo changeInfo = mndFindChangedNodeInfo(pMnode, execInfo.pNodeEntryList, pNodeSnapshot); + SVgroupChangeInfo changeInfo = mndFindChangedNodeInfo(pMnode, execInfo.pNodeList, pNodeSnapshot); if (taosArrayGetSize(changeInfo.pUpdateNodeList) > 0) { // kill current active checkpoint transaction, since the transaction is vnode wide. @@ -2369,8 +2374,8 @@ static int32_t mndProcessNodeCheckReq(SRpcMsg *pMsg) { // keep the new vnode snapshot if (code == TSDB_CODE_SUCCESS || code == TSDB_CODE_ACTION_IN_PROGRESS) { mDebug("create trans successfully, update cached node list"); - taosArrayDestroy(execInfo.pNodeEntryList); - execInfo.pNodeEntryList = pNodeSnapshot; + taosArrayDestroy(execInfo.pNodeList); + execInfo.pNodeList = pNodeSnapshot; execInfo.ts = ts; } else { mDebug("unexpect code during create nodeUpdate trans, code:%s", tstrerror(code)); @@ -2616,9 +2621,9 @@ int32_t setNodeEpsetExpiredFlag(const SArray* pNodeList) { int32_t* pVgId = taosArrayGet(pNodeList, k); mInfo("set node expired for nodeId:%d, total:%d", *pVgId, num); - int32_t numOfNodes = taosArrayGetSize(execInfo.pNodeEntryList); + int32_t numOfNodes = taosArrayGetSize(execInfo.pNodeList); for (int i = 0; i < numOfNodes; ++i) { - SNodeEntry* pNodeEntry = taosArrayGet(execInfo.pNodeEntryList, i); + SNodeEntry* pNodeEntry = taosArrayGet(execInfo.pNodeList, i); if (pNodeEntry->nodeId == *pVgId) { mInfo("vgId:%d expired for some stream tasks, needs update nodeEp", *pVgId); @@ -2632,9 +2637,9 @@ int32_t setNodeEpsetExpiredFlag(const SArray* pNodeList) { } static void updateStageInfo(STaskStatusEntry* pTaskEntry, int32_t stage) { - int32_t numOfNodes = taosArrayGetSize(execInfo.pNodeEntryList); + int32_t numOfNodes = taosArrayGetSize(execInfo.pNodeList); for(int32_t j = 0; j < numOfNodes; ++j) { - SNodeEntry* pNodeEntry = taosArrayGet(execInfo.pNodeEntryList, j); + SNodeEntry* pNodeEntry = taosArrayGet(execInfo.pNodeList, j); if (pNodeEntry->nodeId == pTaskEntry->nodeId) { mInfo("vgId:%d stage updated from %d to %d, nodeUpdate trigger by s-task:0x%" PRIx64, pTaskEntry->nodeId, @@ -2667,14 +2672,18 @@ int32_t mndProcessStreamHb(SRpcMsg *pReq) { mTrace("receive stream-meta hb from vgId:%d, active numOfTasks:%d", req.vgId, req.numOfTasks); taosThreadMutexLock(&execInfo.lock); + + // extract stream task list int32_t numOfExisted = taosHashGetSize(execInfo.pTaskMap); if (numOfExisted == 0) { doExtractTasksFromStream(pMnode); } + initStreamNodeList(pMnode); + int32_t numOfUpdated = taosArrayGetSize(req.pUpdateNodes); if (numOfUpdated > 0) { - mDebug("%d stream nodes needs updated from tasks' report", (int32_t)taosArrayGetSize(req.pUpdateNodes)); + mDebug("%d stream node(s) need updated from report of hbMsg(vgId:%d)", numOfUpdated, req.vgId); setNodeEpsetExpiredFlag(req.pUpdateNodes); } diff --git a/source/libs/stream/src/streamMeta.c b/source/libs/stream/src/streamMeta.c index dfe5729b29..f364ed889d 100644 --- a/source/libs/stream/src/streamMeta.c +++ b/source/libs/stream/src/streamMeta.c @@ -1020,7 +1020,7 @@ void metaHbToMnode(void* param, void* tmrId) { pMeta->pHbInfo->hbCount += 1; - stDebug("vgId:%d, build and send hb to mnode, numOfTasks:%d total:%d", pMeta->vgId, hbMsg.numOfTasks, + stDebug("vgId:%d build and send hb to mnode, numOfTasks:%d total:%d", pMeta->vgId, hbMsg.numOfTasks, pMeta->pHbInfo->hbCount); tmsgSendReq(&epset, &msg); } else { From 4ff8907b5817af954a871e0b86b07c75f284d886 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Sat, 11 Nov 2023 23:05:20 +0800 Subject: [PATCH 77/79] fix: fix syntax error. --- source/common/src/tglobal.c | 14 +++++++------- source/libs/stream/src/streamMeta.c | 2 +- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/source/common/src/tglobal.c b/source/common/src/tglobal.c index 76d8a0476e..d2e4e7b845 100644 --- a/source/common/src/tglobal.c +++ b/source/common/src/tglobal.c @@ -525,13 +525,13 @@ static int32_t taosAddSystemCfg(SConfig *pCfg) { if (cfgAddBool(pCfg, "enableCoreFile", 1, CFG_SCOPE_BOTH, CFG_DYN_CLIENT) != 0) return -1; if (cfgAddFloat(pCfg, "numOfCores", tsNumOfCores, 1, 100000, CFG_SCOPE_BOTH, CFG_DYN_NONE) != 0) return -1; - if (cfgAddBool(pCfg, "ssd42", tsSSE42Enable, CFG_SCOPE_BOTH) != 0) return -1; - if (cfgAddBool(pCfg, "avx", tsAVXEnable, CFG_SCOPE_BOTH) != 0) return -1; - if (cfgAddBool(pCfg, "avx2", tsAVX2Enable, CFG_SCOPE_BOTH) != 0) return -1; - if (cfgAddBool(pCfg, "fma", tsFMAEnable, CFG_SCOPE_BOTH) != 0) return -1; - if (cfgAddBool(pCfg, "avx512", tsAVX512Enable, CFG_SCOPE_BOTH) != 0) return -1; - if (cfgAddBool(pCfg, "simdEnable", tsSIMDEnable, CFG_SCOPE_BOTH) != 0) return -1; - if (cfgAddBool(pCfg, "tagFilterCache", tsTagFilterCache, CFG_SCOPE_BOTH) != 0) return -1; + if (cfgAddBool(pCfg, "ssd42", tsSSE42Enable, CFG_SCOPE_BOTH, CFG_DYN_NONE) != 0) return -1; + if (cfgAddBool(pCfg, "avx", tsAVXEnable, CFG_SCOPE_BOTH, CFG_DYN_NONE) != 0) return -1; + if (cfgAddBool(pCfg, "avx2", tsAVX2Enable, CFG_SCOPE_BOTH, CFG_DYN_NONE) != 0) return -1; + if (cfgAddBool(pCfg, "fma", tsFMAEnable, CFG_SCOPE_BOTH, CFG_DYN_NONE) != 0) return -1; + if (cfgAddBool(pCfg, "avx512", tsAVX512Enable, CFG_SCOPE_BOTH, CFG_DYN_NONE) != 0) return -1; + if (cfgAddBool(pCfg, "simdEnable", tsSIMDEnable, CFG_SCOPE_BOTH, CFG_DYN_NONE) != 0) return -1; + if (cfgAddBool(pCfg, "tagFilterCache", tsTagFilterCache, CFG_SCOPE_BOTH, CFG_DYN_NONE) != 0) return -1; if (cfgAddInt64(pCfg, "openMax", tsOpenMax, 0, INT64_MAX, CFG_SCOPE_BOTH, CFG_DYN_NONE) != 0) return -1; #if !defined(_ALPINE) diff --git a/source/libs/stream/src/streamMeta.c b/source/libs/stream/src/streamMeta.c index 6b82e6683c..7013b43a6f 100644 --- a/source/libs/stream/src/streamMeta.c +++ b/source/libs/stream/src/streamMeta.c @@ -194,7 +194,7 @@ SStreamMeta* streamMetaOpen(const char* path, void* ahandle, FTaskExpand expandF taosInitRWLatch(&pMeta->chkpDirLock); pMeta->chkpId = streamMetaGetLatestCheckpointId(pMeta); - pMeta->streamBackend = streamBackendInit(pMeta->path, pMeta->chkpId); + pMeta->streamBackend = streamBackendInit(pMeta->path, pMeta->chkpId, pMeta->vgId); while (pMeta->streamBackend == NULL) { taosMsleep(100); pMeta->streamBackend = streamBackendInit(pMeta->path, pMeta->chkpId, vgId); From 2874346eca0d5af40b9f641863f66d17c060e14d Mon Sep 17 00:00:00 2001 From: Minglei Jin Date: Mon, 13 Nov 2023 08:57:13 +0800 Subject: [PATCH 78/79] fix(cos/multichunk): clear put object data to initial state --- source/common/src/cos.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/source/common/src/cos.c b/source/common/src/cos.c index 0b6b0db885..e54e7722e2 100644 --- a/source/common/src/cos.c +++ b/source/common/src/cos.c @@ -269,6 +269,7 @@ typedef struct list_parts_callback_data { typedef struct MultipartPartData { char err_msg[512]; S3Status status; + uint64_t content_length; put_object_callback_data put_object_data; int seq; UploadManager *manager; @@ -458,13 +459,13 @@ int32_t s3PutObjectFromFile2(const char *file, const char *object) { int metaPropertiesCount = 0; S3NameValue metaProperties[S3_MAX_METADATA_COUNT]; char useServerSideEncryption = 0; - int noStatus = 0; - put_object_callback_data data; + put_object_callback_data data = {0}; + // int noStatus = 0; // data.infile = 0; - data.infileFD = NULL; - data.gb = 0; - data.noStatus = noStatus; + // data.gb = 0; + // data.infileFD = NULL; + // data.noStatus = noStatus; if (taosStatFile(file, &contentLength, NULL, NULL) < 0) { uError("ERROR: %s Failed to stat file %s: ", __func__, file); From 244cedbc8f264835526979d3919f95f5e115b6d2 Mon Sep 17 00:00:00 2001 From: Minglei Jin Date: Mon, 13 Nov 2023 08:57:13 +0800 Subject: [PATCH 79/79] fix(cos/multichunk): clear put object data to initial state --- source/common/src/cos.c | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/source/common/src/cos.c b/source/common/src/cos.c index 0b6b0db885..ea41afd8fb 100644 --- a/source/common/src/cos.c +++ b/source/common/src/cos.c @@ -267,8 +267,6 @@ typedef struct list_parts_callback_data { } list_parts_callback_data; typedef struct MultipartPartData { - char err_msg[512]; - S3Status status; put_object_callback_data put_object_data; int seq; UploadManager *manager; @@ -276,11 +274,12 @@ typedef struct MultipartPartData { static int putObjectDataCallback(int bufferSize, char *buffer, void *callbackData) { put_object_callback_data *data = (put_object_callback_data *)callbackData; + /* if (data->infileFD == 0) { MultipartPartData *mpd = (MultipartPartData *)callbackData; data = &mpd->put_object_data; } - + */ int ret = 0; if (data->contentLength) { @@ -458,13 +457,13 @@ int32_t s3PutObjectFromFile2(const char *file, const char *object) { int metaPropertiesCount = 0; S3NameValue metaProperties[S3_MAX_METADATA_COUNT]; char useServerSideEncryption = 0; - int noStatus = 0; - put_object_callback_data data; + put_object_callback_data data = {0}; + // int noStatus = 0; // data.infile = 0; - data.infileFD = NULL; - data.gb = 0; - data.noStatus = noStatus; + // data.gb = 0; + // data.infileFD = NULL; + // data.noStatus = noStatus; if (taosStatFile(file, &contentLength, NULL, NULL) < 0) { uError("ERROR: %s Failed to stat file %s: ", __func__, file); @@ -581,9 +580,9 @@ int32_t s3PutObjectFromFile2(const char *file, const char *object) { do { S3_upload_part(&bucketContext, key, &putProperties, &putObjectHandler, seq, manager.upload_id, partContentLength, 0, timeoutMsG, &partData); - } while (S3_status_is_retryable(partData.status) && should_retry()); - if (partData.status != S3StatusOK) { - s3PrintError(__func__, partData.status, partData.err_msg); + } while (S3_status_is_retryable(partData.put_object_data.status) && should_retry()); + if (partData.put_object_data.status != S3StatusOK) { + s3PrintError(__func__, partData.put_object_data.status, partData.put_object_data.err_msg); code = TAOS_SYSTEM_ERROR(EIO); goto clean; }