From 8bdf3df6a97e6e9e40d3603826c69fb416e5080d Mon Sep 17 00:00:00 2001 From: dapan1121 Date: Fri, 22 Dec 2023 19:25:55 +0800 Subject: [PATCH] enh: left join ut --- include/common/tdatablock.h | 1 + include/os/osMath.h | 2 +- source/common/src/tdatablock.c | 83 +- source/libs/executor/inc/mergejoin.h | 23 +- source/libs/executor/src/mergejoin.c | 862 ++++++++++ source/libs/executor/src/mergejoinoperator.c | 696 +------- source/libs/executor/test/CMakeLists.txt | 36 +- source/libs/executor/test/joinTests.cpp | 1489 ++++++++++++------ source/libs/planner/src/planOptimizer.c | 5 +- 9 files changed, 1989 insertions(+), 1208 deletions(-) create mode 100755 source/libs/executor/src/mergejoin.c diff --git a/include/common/tdatablock.h b/include/common/tdatablock.h index e9e4ed403d..2378a2c5b8 100644 --- a/include/common/tdatablock.h +++ b/include/common/tdatablock.h @@ -210,6 +210,7 @@ size_t blockDataGetNumOfRows(const SSDataBlock* pBlock); int32_t blockDataMerge(SSDataBlock* pDest, const SSDataBlock* pSrc); int32_t blockDataMergeNRows(SSDataBlock* pDest, const SSDataBlock* pSrc, int32_t srcIdx, int32_t numOfRows); +void blockDataShrinkNRows(SSDataBlock* pBlock, int32_t numOfRows); int32_t blockDataSplitRows(SSDataBlock* pBlock, bool hasVarCol, int32_t startIndex, int32_t* stopIndex, int32_t pageSize); int32_t blockDataToBuf(char* buf, const SSDataBlock* pBlock); diff --git a/include/os/osMath.h b/include/os/osMath.h index ec17073e9b..3c05d15397 100644 --- a/include/os/osMath.h +++ b/include/os/osMath.h @@ -31,7 +31,7 @@ extern "C" { #define TSWAP(a, b) \ do { \ - char *__tmp = alloca(sizeof(a)); \ + char *__tmp = (char*)alloca(sizeof(a)); \ memcpy(__tmp, &(a), sizeof(a)); \ memcpy(&(a), &(b), sizeof(a)); \ memcpy(&(b), __tmp, sizeof(a)); \ diff --git a/source/common/src/tdatablock.c b/source/common/src/tdatablock.c index a09680599b..b71a067692 100644 --- a/source/common/src/tdatablock.c +++ b/source/common/src/tdatablock.c @@ -242,10 +242,12 @@ int32_t colDataSetNItems(SColumnInfoData* pColumnInfoData, uint32_t currentRow, } void colDataSetNItemsNull(SColumnInfoData* pColumnInfoData, uint32_t currentRow, uint32_t numOfRows) { + pColumnInfoData->hasNull = true; + if (IS_VAR_DATA_TYPE(pColumnInfoData->info.type)) { memset(&pColumnInfoData->varmeta.offset[currentRow], -1, sizeof(int32_t) * numOfRows); } else { - if (numOfRows < sizeof(char) * 2) { + if (numOfRows < 16) { for (int32_t i = 0; i < numOfRows; ++i) { colDataSetNull_f(pColumnInfoData->nullbitmap, currentRow + i); } @@ -259,8 +261,9 @@ void colDataSetNItemsNull(SColumnInfoData* pColumnInfoData, uint32_t currentRow, } } - memset(&BMCharPos(pColumnInfoData->nullbitmap, currentRow + i), 0xFF, (numOfRows - i) / sizeof(char)); - i += (numOfRows - i) / sizeof(char) * sizeof(char); + int32_t bytes = (numOfRows - i) / 8; + memset(&BMCharPos(pColumnInfoData->nullbitmap, currentRow + i), 0xFF, bytes); + i += bytes * 8; for (; i < numOfRows; ++i) { colDataSetNull_f(pColumnInfoData->nullbitmap, currentRow + i); @@ -542,33 +545,14 @@ int32_t colDataAssignNRows(SColumnInfoData* pDst, int32_t dstIdx, const SColumnI } } else { if (pSrc->hasNull) { - if (0 == BitPos(dstIdx) && 0 == BitPos(srcIdx)) { - memcpy(&BMCharPos(pDst->nullbitmap, dstIdx), &BMCharPos(pSrc->nullbitmap, srcIdx), BitmapLen(numOfRows)); - if (!pDst->hasNull) { - int32_t nullBytes = BitmapLen(numOfRows); - int32_t startPos = CharPos(dstIdx); - for (int32_t i = 0; i < nullBytes; ++i) { - if (pDst->nullbitmap[startPos + i]) { - pDst->hasNull = true; - break; - } - } - } - } else if (BitPos(dstIdx) == BitPos(srcIdx)) { + if (BitPos(dstIdx) == BitPos(srcIdx)) { for (int32_t i = 0; i < numOfRows; ++i) { - if (0 == BitPos(dstIdx)) { - memcpy(&BMCharPos(pDst->nullbitmap, dstIdx + i), &BMCharPos(pSrc->nullbitmap, srcIdx + i), BitmapLen(numOfRows - i)); - if (!pDst->hasNull) { - int32_t nullBytes = BitmapLen(numOfRows - i); - int32_t startPos = CharPos(dstIdx + i); - for (int32_t m = 0; m < nullBytes; ++m) { - if (pDst->nullbitmap[startPos + m]) { - pDst->hasNull = true; - break; - } - } + if (0 == BitPos(dstIdx) && (i + (1 << NBIT) <= numOfRows)) { + BMCharPos(pDst->nullbitmap, dstIdx + i) = BMCharPos(pSrc->nullbitmap, srcIdx + i); + if (BMCharPos(pDst->nullbitmap, dstIdx + i)) { + pDst->hasNull = true; } - break; + i += (1 << NBIT) - 1; } else { if (colDataIsNull_f(pSrc->nullbitmap, srcIdx + i)) { colDataSetNull_f(pDst->nullbitmap, dstIdx + i); @@ -588,9 +572,7 @@ int32_t colDataAssignNRows(SColumnInfoData* pDst, int32_t dstIdx, const SColumnI } } } - } else { - memset(&BMCharPos(pDst->nullbitmap, dstIdx), 0, BitmapLen(numOfRows)); - } + } if (pSrc->pData != NULL) { memcpy(pDst->pData + pDst->info.bytes * dstIdx, pSrc->pData + pSrc->info.bytes * srcIdx, pDst->info.bytes * numOfRows); @@ -665,10 +647,45 @@ int32_t blockDataMergeNRows(SSDataBlock* pDest, const SSDataBlock* pSrc, int32_t colDataAssignNRows(pCol2, pDest->info.rows, pCol1, srcIdx, numOfRows); } - pDest->info.rows += pSrc->info.rows; + pDest->info.rows += numOfRows; return TSDB_CODE_SUCCESS; } +void blockDataShrinkNRows(SSDataBlock* pBlock, int32_t numOfRows) { + if (numOfRows >= pBlock->info.rows) { + blockDataCleanup(pBlock); + return; + } + + size_t numOfCols = taosArrayGetSize(pBlock->pDataBlock); + for (int32_t i = 0; i < numOfCols; ++i) { + SColumnInfoData* pCol = taosArrayGet(pBlock->pDataBlock, i); + if (IS_VAR_DATA_TYPE(pCol->info.type)) { + pCol->varmeta.length = pCol->varmeta.offset[pBlock->info.rows - numOfRows]; + memset(pCol->varmeta.offset + pBlock->info.rows - numOfRows, 0, sizeof(*pCol->varmeta.offset) * numOfRows); + } else { + int32_t i = pBlock->info.rows - numOfRows; + for (; i < pBlock->info.rows; ++i) { + if (BitPos(i)) { + colDataClearNull_f(pCol->nullbitmap, i); + } else { + break; + } + } + + int32_t bytes = (pBlock->info.rows - i) / 8; + memset(&BMCharPos(pCol->nullbitmap, i), 0, bytes); + i += bytes * 8; + + for (; i < pBlock->info.rows; ++i) { + colDataClearNull_f(pCol->nullbitmap, i); + } + } + } + + pBlock->info.rows -= numOfRows; +} + size_t blockDataGetSize(const SSDataBlock* pBlock) { size_t total = 0; @@ -2542,6 +2559,8 @@ void trimDataBlock(SSDataBlock* pBlock, int32_t totalRows, const bool* pBoolList int32_t numOfRows = 0; if (IS_VAR_DATA_TYPE(pDst->info.type)) { pDst->varmeta.length = 0; + } else { + memset(pDst->nullbitmap, 0, bmLen); } } return; diff --git a/source/libs/executor/inc/mergejoin.h b/source/libs/executor/inc/mergejoin.h index bbc5ef3d13..34a173a74b 100755 --- a/source/libs/executor/inc/mergejoin.h +++ b/source/libs/executor/inc/mergejoin.h @@ -19,9 +19,9 @@ extern "C" { #endif -#define MJOIN_DEFAULT_BLK_ROWS_NUM 4096 +#define MJOIN_DEFAULT_BLK_ROWS_NUM 4 #define MJOIN_HJOIN_CART_THRESHOLD 16 -#define MJOIN_BLK_SIZE_LIMIT 10485760 +#define MJOIN_BLK_SIZE_LIMIT 20 struct SMJoinOperatorInfo; @@ -86,6 +86,7 @@ typedef struct SMJoinTableCtx { SArray* valVarCols; bool valColExist; + bool newBlk; SSDataBlock* blk; int32_t blkRowIdx; @@ -199,9 +200,11 @@ typedef struct SMJoinOperatorInfo { #define MJOIN_GET_TB_COL_TS(_col, _ts, _tb) \ do { \ - if (NULL != (_tb)->blk) { \ + if (NULL != (_tb)->blk && (_tb)->blkRowIdx < (_tb)->blk->info.rows) { \ (_col) = taosArrayGet((_tb)->blk->pDataBlock, (_tb)->primCol->srcSlot); \ (_ts) = *((int64_t*)(_col)->pData + (_tb)->blkRowIdx); \ + } else { \ + (_ts) = INT64_MIN; \ } \ } while (0) @@ -228,6 +231,20 @@ typedef struct SMJoinOperatorInfo { goto _return; \ } \ } while (0) + + + + +int32_t mJoinInitMergeCtx(SMJoinOperatorInfo* pJoin, SSortMergeJoinPhysiNode* pJoinNode); +SSDataBlock* mLeftJoinDo(struct SOperatorInfo* pOperator); +bool mJoinRetrieveImpl(SMJoinOperatorInfo* pJoin, int32_t* pIdx, SSDataBlock** ppBlk, SMJoinTableCtx* pTb); +void mJoinSetDone(SOperatorInfo* pOperator); +bool mJoinCopyKeyColsDataToBuf(SMJoinTableCtx* pTable, int32_t rowIdx, size_t *pBufLen); +void mJoinBuildEqGroups(SMJoinTableCtx* pTable, int64_t timestamp, bool* wholeBlk, bool restart); +int32_t mJoinRetrieveEqGrpRows(SOperatorInfo* pOperator, SMJoinTableCtx* pTable, int64_t timestamp); +int32_t mJoinMakeBuildTbHash(SMJoinOperatorInfo* pJoin, SMJoinTableCtx* pTable); +int32_t mJoinSetKeyColsData(SSDataBlock* pBlock, SMJoinTableCtx* pTable); + #ifdef __cplusplus diff --git a/source/libs/executor/src/mergejoin.c b/source/libs/executor/src/mergejoin.c new file mode 100755 index 0000000000..d1f40e3c72 --- /dev/null +++ b/source/libs/executor/src/mergejoin.c @@ -0,0 +1,862 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include "executorInt.h" +#include "filter.h" +#include "function.h" +#include "operator.h" +#include "os.h" +#include "querynodes.h" +#include "querytask.h" +#include "tcompare.h" +#include "tdatablock.h" +#include "thash.h" +#include "tmsg.h" +#include "ttypes.h" +#include "mergejoin.h" + +int32_t mJoinInitMergeCtx(SMJoinOperatorInfo* pJoin, SSortMergeJoinPhysiNode* pJoinNode) { + SMJoinMergeCtx* pCtx = &pJoin->ctx.mergeCtx; + + pCtx->pJoin = pJoin; + pCtx->lastEqTs = INT64_MIN; + pCtx->hashCan = pJoin->probe->keyNum > 0; + + pCtx->finBlk = createDataBlockFromDescNode(pJoinNode->node.pOutputDataBlockDesc); + blockDataEnsureCapacity(pCtx->finBlk, TMAX(MJOIN_DEFAULT_BLK_ROWS_NUM, MJOIN_BLK_SIZE_LIMIT/pJoinNode->node.pOutputDataBlockDesc->totalRowSize)); + + if (pJoin->pFPreFilter) { + pCtx->midBlk = createOneDataBlock(pCtx->finBlk, false); + blockDataEnsureCapacity(pCtx->midBlk, pCtx->finBlk->info.capacity); + } + + pCtx->blkThreshold = pCtx->finBlk->info.capacity * 0.5; + + return TSDB_CODE_SUCCESS; +} + +static int32_t mLeftJoinGrpNonEqCart(SMJoinOperatorInfo* pJoin, SSDataBlock* pRes, bool append, SMJoinGrpRows* pGrp) { + SMJoinTableCtx* probe = pJoin->probe; + SMJoinTableCtx* build = pJoin->build; + int32_t currRows = append ? pRes->info.rows : 0; + int32_t firstRows = GRP_REMAIN_ROWS(pGrp); + + for (int32_t c = 0; c < probe->finNum; ++c) { + SMJoinColMap* pFirstCol = probe->finCols + c; + SColumnInfoData* pInCol = taosArrayGet(pGrp->blk->pDataBlock, pFirstCol->srcSlot); + SColumnInfoData* pOutCol = taosArrayGet(pRes->pDataBlock, pFirstCol->dstSlot); + colDataAssignNRows(pOutCol, currRows, pInCol, pGrp->readIdx, firstRows); + } + + for (int32_t c = 0; c < build->finNum; ++c) { + SMJoinColMap* pSecondCol = build->finCols + c; + SColumnInfoData* pOutCol = taosArrayGet(pRes->pDataBlock, pSecondCol->dstSlot); + colDataSetNItemsNull(pOutCol, currRows, firstRows); + } + + pRes->info.rows = append ? (pRes->info.rows + firstRows) : firstRows; + return TSDB_CODE_SUCCESS; +} + +static int32_t mLeftJoinGrpEqCart(SMJoinOperatorInfo* pJoin, SSDataBlock* pRes, bool append, SMJoinGrpRows* pFirst, SMJoinGrpRows* pSecond) { + SMJoinTableCtx* probe = pJoin->probe; + SMJoinTableCtx* build = pJoin->build; + int32_t currRows = append ? pRes->info.rows : 0; + int32_t firstRows = GRP_REMAIN_ROWS(pFirst); + int32_t secondRows = GRP_REMAIN_ROWS(pSecond); + ASSERT(secondRows > 0); + + for (int32_t c = 0; c < probe->finNum; ++c) { + SMJoinColMap* pFirstCol = probe->finCols + c; + SColumnInfoData* pInCol = taosArrayGet(pFirst->blk->pDataBlock, pFirstCol->srcSlot); + SColumnInfoData* pOutCol = taosArrayGet(pRes->pDataBlock, pFirstCol->dstSlot); + for (int32_t r = 0; r < firstRows; ++r) { + if (colDataIsNull_s(pInCol, pFirst->readIdx + r)) { + colDataSetNItemsNull(pOutCol, currRows + r * secondRows, secondRows); + } else { + ASSERT(pRes->info.capacity >= (pRes->info.rows + firstRows * secondRows)); + uint32_t startOffset = (IS_VAR_DATA_TYPE(pOutCol->info.type)) ? pOutCol->varmeta.length : ((currRows + r * secondRows) * pOutCol->info.bytes); + ASSERT((startOffset + 1 * pOutCol->info.bytes) <= pRes->info.capacity * pOutCol->info.bytes); + colDataSetNItems(pOutCol, currRows + r * secondRows, colDataGetData(pInCol, pFirst->readIdx + r), secondRows, true); + } + } + } + + for (int32_t c = 0; c < build->finNum; ++c) { + SMJoinColMap* pSecondCol = build->finCols + c; + SColumnInfoData* pInCol = taosArrayGet(pSecond->blk->pDataBlock, pSecondCol->srcSlot); + SColumnInfoData* pOutCol = taosArrayGet(pRes->pDataBlock, pSecondCol->dstSlot); + for (int32_t r = 0; r < firstRows; ++r) { + colDataAssignNRows(pOutCol, currRows + r * secondRows, pInCol, pSecond->readIdx, secondRows); + } + } + + pRes->info.rows = append ? (pRes->info.rows + firstRows * secondRows) : firstRows * secondRows; + return TSDB_CODE_SUCCESS; +} + + +static int32_t mLeftJoinMergeFullCart(SMJoinMergeCtx* pCtx) { + int32_t rowsLeft = pCtx->finBlk->info.capacity - pCtx->finBlk->info.rows; + SMJoinTableCtx* probe = pCtx->pJoin->probe; + SMJoinTableCtx* build = pCtx->pJoin->build; + SMJoinGrpRows* probeGrp = taosArrayGet(probe->eqGrps, 0); + int32_t buildGrpNum = taosArrayGetSize(build->eqGrps); + int32_t probeRows = GRP_REMAIN_ROWS(probeGrp); + int32_t probeEndIdx = probeGrp->endIdx; + + if (0 == build->grpIdx && probeRows * build->grpTotalRows <= rowsLeft) { + SMJoinGrpRows* pFirstBuild = taosArrayGet(build->eqGrps, 0); + if (pFirstBuild->readIdx == pFirstBuild->beginIdx) { + for (; build->grpIdx < buildGrpNum; ++build->grpIdx) { + SMJoinGrpRows* buildGrp = taosArrayGet(build->eqGrps, build->grpIdx); + MJ_ERR_RET(mLeftJoinGrpEqCart(pCtx->pJoin, pCtx->finBlk, true, probeGrp, buildGrp)); + buildGrp->readIdx = buildGrp->beginIdx; + } + + pCtx->grpRemains = false; + return TSDB_CODE_SUCCESS; + } + } + + for (; !GRP_DONE(probeGrp); ) { + probeGrp->endIdx = probeGrp->readIdx; + for (; build->grpIdx < buildGrpNum && rowsLeft > 0; ++build->grpIdx) { + SMJoinGrpRows* buildGrp = taosArrayGet(build->eqGrps, build->grpIdx); + + if (rowsLeft >= GRP_REMAIN_ROWS(buildGrp)) { + MJ_ERR_RET(mLeftJoinGrpEqCart(pCtx->pJoin, pCtx->finBlk, true, probeGrp, buildGrp)); + rowsLeft -= GRP_REMAIN_ROWS(buildGrp); + buildGrp->readIdx = buildGrp->beginIdx; + continue; + } + + int32_t buildEndIdx = buildGrp->endIdx; + buildGrp->endIdx = buildGrp->readIdx + rowsLeft - 1; + mLeftJoinGrpEqCart(pCtx->pJoin, pCtx->finBlk, true, probeGrp, buildGrp); + buildGrp->readIdx += rowsLeft; + buildGrp->endIdx = buildEndIdx; + rowsLeft = 0; + break; + } + probeGrp->endIdx = probeEndIdx; + + if (build->grpIdx >= buildGrpNum) { + build->grpIdx = 0; + ++probeGrp->readIdx; + } + + if (rowsLeft <= 0) { + break; + } + } + + probeGrp->endIdx = probeEndIdx; + + pCtx->grpRemains = probeGrp->readIdx <= probeGrp->endIdx; + + return TSDB_CODE_SUCCESS; +} + +static int32_t mLeftJoinCopyMergeMidBlk(SMJoinMergeCtx* pCtx, SSDataBlock** ppMid, SSDataBlock** ppFin) { + SSDataBlock* pLess = NULL; + SSDataBlock* pMore = NULL; + if ((*ppMid)->info.rows < (*ppFin)->info.rows) { + pLess = (*ppMid); + pMore = (*ppFin); + } else { + pLess = (*ppFin); + pMore = (*ppMid); + } + + int32_t totalRows = pMore->info.rows + pLess->info.rows; + if (totalRows <= pMore->info.capacity) { + MJ_ERR_RET(blockDataMerge(pMore, pLess)); + blockDataCleanup(pLess); + pCtx->midRemains = false; + } else { + int32_t copyRows = pMore->info.capacity - pMore->info.rows; + MJ_ERR_RET(blockDataMergeNRows(pMore, pLess, pLess->info.rows - copyRows, copyRows)); + blockDataShrinkNRows(pLess, copyRows); + pCtx->midRemains = true; + } + + if (pMore != (*ppFin)) { + TSWAP(*ppMid, *ppFin); + } + + return TSDB_CODE_SUCCESS; +} + +static int32_t mLeftJoinMergeSeqCart(SMJoinMergeCtx* pCtx) { + SMJoinTableCtx* probe = pCtx->pJoin->probe; + SMJoinTableCtx* build = pCtx->pJoin->build; + SMJoinGrpRows* probeGrp = taosArrayGet(probe->eqGrps, probe->grpIdx); + int32_t buildGrpNum = taosArrayGetSize(build->eqGrps); + int32_t probeEndIdx = probeGrp->endIdx; + int32_t rowsLeft = pCtx->midBlk->info.capacity; + bool contLoop = true; + + blockDataCleanup(pCtx->midBlk); + + do { + for (; !GRP_DONE(probeGrp) && !BLK_IS_FULL(pCtx->finBlk); + ++probeGrp->readIdx, probeGrp->readMatch = false, probeGrp->endIdx = probeEndIdx, build->grpIdx = 0) { + probeGrp->endIdx = probeGrp->readIdx; + + rowsLeft = pCtx->midBlk->info.capacity; + for (; build->grpIdx < buildGrpNum && rowsLeft > 0; ++build->grpIdx) { + SMJoinGrpRows* buildGrp = taosArrayGet(build->eqGrps, build->grpIdx); + + if (rowsLeft >= GRP_REMAIN_ROWS(buildGrp)) { + MJ_ERR_RET(mLeftJoinGrpEqCart(pCtx->pJoin, pCtx->midBlk, true, probeGrp, buildGrp)); + rowsLeft -= GRP_REMAIN_ROWS(buildGrp); + buildGrp->readIdx = buildGrp->beginIdx; + continue; + } + + int32_t buildEndIdx = buildGrp->endIdx; + buildGrp->endIdx = buildGrp->readIdx + rowsLeft - 1; + ASSERT(buildGrp->endIdx >= buildGrp->readIdx); + MJ_ERR_RET(mLeftJoinGrpEqCart(pCtx->pJoin, pCtx->midBlk, true, probeGrp, buildGrp)); + buildGrp->readIdx += rowsLeft; + buildGrp->endIdx = buildEndIdx; + rowsLeft = 0; + break; + } + + if (pCtx->midBlk->info.rows > 0) { + MJ_ERR_RET(doFilter(pCtx->midBlk, pCtx->pJoin->pFPreFilter, NULL)); + if (pCtx->midBlk->info.rows > 0) { + probeGrp->readMatch = true; + } + } + + if (0 == pCtx->midBlk->info.rows) { + if (build->grpIdx == buildGrpNum) { + if (!probeGrp->readMatch) { + MJ_ERR_RET(mLeftJoinGrpNonEqCart(pCtx->pJoin, pCtx->finBlk, true, probeGrp)); + } + + continue; + } + } else { + MJ_ERR_RET(mLeftJoinCopyMergeMidBlk(pCtx, &pCtx->midBlk, &pCtx->finBlk)); + + if (pCtx->midRemains) { + contLoop = false; + } else if (build->grpIdx == buildGrpNum) { + continue; + } + } + + //need break + + probeGrp->endIdx = probeEndIdx; + + if (build->grpIdx >= buildGrpNum) { + build->grpIdx = 0; + ++probeGrp->readIdx; + probeGrp->readMatch = false; + } + + break; + } + + if (GRP_DONE(probeGrp) || BLK_IS_FULL(pCtx->finBlk)) { + break; + } + } while (contLoop); + + pCtx->grpRemains = probeGrp->readIdx <= probeGrp->endIdx; + + return TSDB_CODE_SUCCESS; +} + +static int32_t mLeftJoinMergeCart(SMJoinMergeCtx* pCtx) { + return (NULL == pCtx->pJoin->pFPreFilter) ? mLeftJoinMergeFullCart(pCtx) : mLeftJoinMergeSeqCart(pCtx); +} + +static int32_t mLeftJoinNonEqCart(SMJoinMergeCtx* pCtx) { + int32_t rowsLeft = pCtx->finBlk->info.capacity - pCtx->finBlk->info.rows; + SMJoinGrpRows* probeGrp = &pCtx->probeNEqGrp; + if (rowsLeft <= 0) { + pCtx->grpRemains = probeGrp->readIdx <= probeGrp->endIdx; + return TSDB_CODE_SUCCESS; + } + + int32_t probeRows = GRP_REMAIN_ROWS(probeGrp); + + pCtx->lastEqGrp = false; + + if (probeRows <= rowsLeft) { + MJ_ERR_RET(mLeftJoinGrpNonEqCart(pCtx->pJoin, pCtx->finBlk, true, probeGrp)); + probeGrp->readIdx = probeGrp->endIdx + 1; + pCtx->grpRemains = false; + } else { + int32_t probeEndIdx = probeGrp->endIdx; + probeGrp->endIdx = probeGrp->readIdx + rowsLeft - 1; + MJ_ERR_RET(mLeftJoinGrpNonEqCart(pCtx->pJoin, pCtx->finBlk, true, probeGrp)); + probeGrp->readIdx = probeGrp->endIdx + 1; + probeGrp->endIdx = probeEndIdx; + pCtx->grpRemains = true; + } + + return TSDB_CODE_SUCCESS; +} + + +static bool mLeftJoinRetrieve(SOperatorInfo* pOperator, SMJoinOperatorInfo* pJoin, SMJoinMergeCtx* pCtx) { + bool probeGot = mJoinRetrieveImpl(pJoin, &pJoin->probe->blkRowIdx, &pJoin->probe->blk, pJoin->probe); + bool buildGot = false; + + do { + if (probeGot || MJOIN_DS_NEED_INIT(pOperator, pJoin->build)) { + buildGot = mJoinRetrieveImpl(pJoin, &pJoin->build->blkRowIdx, &pJoin->build->blk, pJoin->build); + } + + if (!probeGot) { + mJoinSetDone(pOperator); + return false; + } + + if (buildGot) { + SColumnInfoData* pProbeCol = taosArrayGet(pJoin->probe->blk->pDataBlock, pJoin->probe->primCol->srcSlot); + SColumnInfoData* pBuildCol = taosArrayGet(pJoin->build->blk->pDataBlock, pJoin->build->primCol->srcSlot); + if (*((int64_t*)pProbeCol->pData + pJoin->probe->blkRowIdx) > *((int64_t*)pBuildCol->pData + pJoin->build->blk->info.rows - 1)) { + pJoin->build->blkRowIdx = pJoin->build->blk->info.rows; + continue; + } + } + + break; + } while (true); + + return true; +} + +static bool mLeftJoinHashGrpCart(SSDataBlock* pBlk, SMJoinGrpRows* probeGrp, bool append, SMJoinTableCtx* probe, SMJoinTableCtx* build) { + int32_t rowsLeft = append ? (pBlk->info.capacity - pBlk->info.rows) : pBlk->info.capacity; + if (rowsLeft <= 0) { + return false; + } + + int32_t buildGrpRows = taosArrayGetSize(build->pHashCurGrp); + int32_t grpRows = buildGrpRows - build->grpRowIdx; + if (grpRows <= 0 || build->grpRowIdx < 0) { + build->grpRowIdx = -1; + return true; + } + + int32_t actRows = TMIN(grpRows, rowsLeft); + int32_t currRows = append ? pBlk->info.rows : 0; + + for (int32_t c = 0; c < probe->finNum; ++c) { + SMJoinColMap* pFirstCol = probe->finCols + c; + SColumnInfoData* pInCol = taosArrayGet(probeGrp->blk->pDataBlock, pFirstCol->srcSlot); + SColumnInfoData* pOutCol = taosArrayGet(pBlk->pDataBlock, pFirstCol->dstSlot); + if (colDataIsNull_s(pInCol, probeGrp->readIdx)) { + colDataSetNItemsNull(pOutCol, currRows, actRows); + } else { + colDataSetNItems(pOutCol, currRows, colDataGetData(pInCol, probeGrp->readIdx), actRows, true); + } + } + + for (int32_t c = 0; c < build->finNum; ++c) { + SMJoinColMap* pSecondCol = build->finCols + c; + SColumnInfoData* pOutCol = taosArrayGet(pBlk->pDataBlock, pSecondCol->dstSlot); + for (int32_t r = 0; r < actRows; ++r) { + SMJoinRowPos* pRow = taosArrayGet(build->pHashCurGrp, build->grpRowIdx + r); + SColumnInfoData* pInCol = taosArrayGet(pRow->pBlk->pDataBlock, pSecondCol->srcSlot); + colDataAssignNRows(pOutCol, currRows + r, pInCol, pRow->pos, 1); + } + } + + pBlk->info.rows += actRows; + + if (actRows == grpRows) { + build->grpRowIdx = -1; + } else { + build->grpRowIdx += actRows; + } + + if (actRows == rowsLeft) { + return false; + } + + return true; +} + +static int32_t mLeftJoinHashFullCart(SMJoinMergeCtx* pCtx) { + SMJoinTableCtx* probe = pCtx->pJoin->probe; + SMJoinTableCtx* build = pCtx->pJoin->build; + SMJoinGrpRows* probeGrp = taosArrayGet(probe->eqGrps, probe->grpIdx); + + if (build->grpRowIdx >= 0) { + bool contLoop = mLeftJoinHashGrpCart(pCtx->finBlk, probeGrp, true, probe, build); + if (build->grpRowIdx < 0) { + probeGrp->readIdx++; + } + + if (!contLoop) { + goto _return; + } + } + + size_t bufLen = 0; + int32_t probeEndIdx = probeGrp->endIdx; + for (; !GRP_DONE(probeGrp) && !BLK_IS_FULL(pCtx->finBlk); ++probeGrp->readIdx) { + if (mJoinCopyKeyColsDataToBuf(probe, probeGrp->readIdx, &bufLen)) { + probeGrp->endIdx = probeGrp->readIdx; + MJ_ERR_RET(mLeftJoinGrpNonEqCart(pCtx->pJoin, pCtx->finBlk, true, probeGrp)); + probeGrp->endIdx = probeEndIdx; + continue; + } + + SArray** pGrp = tSimpleHashGet(build->pGrpHash, probe->keyData, bufLen); + if (NULL == pGrp) { + probeGrp->endIdx = probeGrp->readIdx; + MJ_ERR_RET(mLeftJoinGrpNonEqCart(pCtx->pJoin, pCtx->finBlk, true, probeGrp)); + probeGrp->endIdx = probeEndIdx; + } else { + build->pHashCurGrp = *pGrp; + build->grpRowIdx = 0; + bool contLoop = mLeftJoinHashGrpCart(pCtx->finBlk, probeGrp, true, probe, build); + if (!contLoop) { + if (build->grpRowIdx < 0) { + probeGrp->readIdx++; + } + goto _return; + } + } + } + +_return: + + pCtx->grpRemains = probeGrp->readIdx <= probeGrp->endIdx; + + return TSDB_CODE_SUCCESS; +} + +static int32_t mLeftJoinHashGrpCartFilter(SMJoinMergeCtx* pCtx, bool* contLoop) { + SMJoinTableCtx* probe = pCtx->pJoin->probe; + SMJoinTableCtx* build = pCtx->pJoin->build; + SMJoinGrpRows* probeGrp = taosArrayGet(probe->eqGrps, probe->grpIdx); + + blockDataCleanup(pCtx->midBlk); + + do { + mLeftJoinHashGrpCart(pCtx->midBlk, probeGrp, true, probe, build); + + if (pCtx->midBlk->info.rows > 0) { + MJ_ERR_RET(doFilter(pCtx->midBlk, pCtx->pJoin->pPreFilter, NULL)); + if (pCtx->midBlk->info.rows > 0) { + probeGrp->readMatch = true; + } + } + + if (0 == pCtx->midBlk->info.rows) { + if (build->grpRowIdx < 0) { + if (!probeGrp->readMatch) { + MJ_ERR_RET(mLeftJoinGrpNonEqCart(pCtx->pJoin, pCtx->finBlk, true, probeGrp)); + } + + break; + } + + continue; + } else { + MJ_ERR_RET(mLeftJoinCopyMergeMidBlk(pCtx, &pCtx->midBlk, &pCtx->finBlk)); + + if (pCtx->midRemains) { + pCtx->grpRemains = probeGrp->readIdx <= probeGrp->endIdx; + *contLoop = false; + return TSDB_CODE_SUCCESS; + } + + if (build->grpRowIdx < 0) { + break; + } + + continue; + } + } while (true); + + *contLoop = true; + return TSDB_CODE_SUCCESS; +} + + +static int32_t mLeftJoinHashSeqCart(SMJoinMergeCtx* pCtx) { + SMJoinTableCtx* probe = pCtx->pJoin->probe; + SMJoinTableCtx* build = pCtx->pJoin->build; + SMJoinGrpRows* probeGrp = taosArrayGet(probe->eqGrps, 0); + bool contLoop = false; + + if (build->grpRowIdx >= 0) { + MJ_ERR_RET(mLeftJoinHashGrpCartFilter(pCtx, &contLoop)); + if (build->grpRowIdx < 0) { + probeGrp->readIdx++; + probeGrp->readMatch = false; + } + + if (!contLoop) { + goto _return; + } + } + + size_t bufLen = 0; + int32_t probeEndIdx = probeGrp->endIdx; + for (; !GRP_DONE(probeGrp) && !BLK_IS_FULL(pCtx->finBlk);) { + if (mJoinCopyKeyColsDataToBuf(probe, probeGrp->readIdx, &bufLen)) { + probeGrp->endIdx = probeGrp->readIdx; + MJ_ERR_RET(mLeftJoinGrpNonEqCart(pCtx->pJoin, pCtx->finBlk, true, probeGrp)); + probeGrp->endIdx = probeEndIdx; + probeGrp->readIdx++; + probeGrp->readMatch = false; + continue; + } + + SArray** pGrp = tSimpleHashGet(build->pGrpHash, probe->keyData, bufLen); + if (NULL == pGrp) { + probeGrp->endIdx = probeGrp->readIdx; + MJ_ERR_RET(mLeftJoinGrpNonEqCart(pCtx->pJoin, pCtx->finBlk, true, probeGrp)); + probeGrp->endIdx = probeEndIdx; + probeGrp->readIdx++; + probeGrp->readMatch = false; + } else { + build->pHashCurGrp = *pGrp; + build->grpRowIdx = 0; + + probeGrp->endIdx = probeGrp->readIdx; + MJ_ERR_RET(mLeftJoinHashGrpCartFilter(pCtx, &contLoop)); + probeGrp->endIdx = probeEndIdx; + if (build->grpRowIdx < 0) { + probeGrp->readIdx++; + probeGrp->readMatch = false; + } + + if (!contLoop) { + break; + } + } + } + +_return: + + pCtx->grpRemains = probeGrp->readIdx <= probeGrp->endIdx; + + return TSDB_CODE_SUCCESS; +} + + +static int32_t mLeftJoinHashCart(SMJoinMergeCtx* pCtx) { + return (NULL == pCtx->pJoin->pPreFilter) ? mLeftJoinHashFullCart(pCtx) : mLeftJoinHashSeqCart(pCtx); +} + +static int32_t mLeftJoinProcessEqualGrp(SMJoinMergeCtx* pCtx, int64_t timestamp, bool lastBuildGrp) { + SMJoinOperatorInfo* pJoin = pCtx->pJoin; + + pCtx->lastEqGrp = true; + + mJoinBuildEqGroups(pJoin->probe, timestamp, NULL, true); + if (!lastBuildGrp) { + mJoinRetrieveEqGrpRows(pJoin->pOperator, pJoin->build, timestamp); + } else { + pJoin->build->grpIdx = 0; + } + + if (pCtx->hashCan && REACH_HJOIN_THRESHOLD(pJoin->probe, pJoin->build)) { + if (!lastBuildGrp || !pCtx->hashJoin) { + MJ_ERR_RET(mJoinMakeBuildTbHash(pJoin, pJoin->build)); + } + + if (pJoin->probe->newBlk) { + MJ_ERR_RET(mJoinSetKeyColsData(pJoin->probe->blk, pJoin->probe)); + pJoin->probe->newBlk = false; + } + + pCtx->hashJoin = true; + + return mLeftJoinHashCart(pCtx); + } + + pCtx->hashJoin = false; + + return mLeftJoinMergeCart(pCtx); +} + +static int32_t mLeftJoinHandleMidRemains(SMJoinMergeCtx* pCtx) { + ASSERT(0 < pCtx->midBlk->info.rows); + + TSWAP(pCtx->midBlk, pCtx->finBlk); + + pCtx->midRemains = false; + + return TSDB_CODE_SUCCESS; +} + + +static int32_t mLeftJoinHandleGrpRemains(SMJoinMergeCtx* pCtx) { + if (pCtx->lastEqGrp) { + return (pCtx->hashJoin) ? mLeftJoinHashCart(pCtx) : mLeftJoinMergeCart(pCtx); + } + + return mLeftJoinNonEqCart(pCtx); +} + +SSDataBlock* mLeftJoinDo(struct SOperatorInfo* pOperator) { + SMJoinOperatorInfo* pJoin = pOperator->info; + SMJoinMergeCtx* pCtx = &pJoin->ctx.mergeCtx; + int32_t code = TSDB_CODE_SUCCESS; + int64_t probeTs = 0; + int64_t buildTs = 0; + SColumnInfoData* pBuildCol = NULL; + SColumnInfoData* pProbeCol = NULL; + bool asc = (pJoin->inputTsOrder == TSDB_ORDER_ASC) ? true : false; + + blockDataCleanup(pCtx->finBlk); + + if (pCtx->midRemains) { + MJ_ERR_JRET(mLeftJoinHandleMidRemains(pCtx)); + if (pCtx->finBlk->info.rows >= pCtx->blkThreshold) { + return pCtx->finBlk; + } + pCtx->midRemains = false; + } + + if (pCtx->grpRemains) { + MJ_ERR_JRET(mLeftJoinHandleGrpRemains(pCtx)); + if (pCtx->finBlk->info.rows >= pCtx->blkThreshold) { + return pCtx->finBlk; + } + pCtx->grpRemains = false; + } + + do { + if (!mLeftJoinRetrieve(pOperator, pJoin, pCtx)) { + break; + } + + MJOIN_GET_TB_COL_TS(pBuildCol, buildTs, pJoin->build); + MJOIN_GET_TB_COL_TS(pProbeCol, probeTs, pJoin->probe); + + if (probeTs == pCtx->lastEqTs) { + MJ_ERR_JRET(mLeftJoinProcessEqualGrp(pCtx, probeTs, true)); + if (pCtx->finBlk->info.rows >= pCtx->blkThreshold) { + return pCtx->finBlk; + } + + if (MJOIN_PROBE_TB_ROWS_DONE(pJoin->probe)) { + continue; + } else { + MJOIN_GET_TB_CUR_TS(pProbeCol, probeTs, pJoin->probe); + } + } + + while (!MJOIN_PROBE_TB_ROWS_DONE(pJoin->probe) && !MJOIN_BUILD_TB_ROWS_DONE(pJoin->build)) { + if (probeTs == buildTs) { + pCtx->lastEqTs = probeTs; + MJ_ERR_JRET(mLeftJoinProcessEqualGrp(pCtx, probeTs, false)); + if (pCtx->finBlk->info.rows >= pCtx->blkThreshold) { + return pCtx->finBlk; + } + + MJOIN_GET_TB_COL_TS(pBuildCol, buildTs, pJoin->build); + MJOIN_GET_TB_COL_TS(pProbeCol, probeTs, pJoin->probe); + } else if (LEFT_JOIN_NO_EQUAL(asc, probeTs, buildTs)) { + pCtx->probeNEqGrp.blk = pJoin->probe->blk; + pCtx->probeNEqGrp.beginIdx = pJoin->probe->blkRowIdx; + pCtx->probeNEqGrp.readIdx = pCtx->probeNEqGrp.beginIdx; + pCtx->probeNEqGrp.endIdx = pCtx->probeNEqGrp.beginIdx; + + while (++pJoin->probe->blkRowIdx < pJoin->probe->blk->info.rows) { + MJOIN_GET_TB_CUR_TS(pProbeCol, probeTs, pJoin->probe); + if (LEFT_JOIN_NO_EQUAL(asc, probeTs, buildTs)) { + pCtx->probeNEqGrp.endIdx = pJoin->probe->blkRowIdx; + continue; + } + + break; + } + + MJ_ERR_JRET(mLeftJoinNonEqCart(pCtx)); + if (pCtx->finBlk->info.rows >= pCtx->blkThreshold) { + return pCtx->finBlk; + } + } else { + while (++pJoin->build->blkRowIdx < pJoin->build->blk->info.rows) { + MJOIN_GET_TB_CUR_TS(pBuildCol, buildTs, pJoin->build); + if (LEFT_JOIN_DISCRAD(asc, probeTs, buildTs)) { + continue; + } + + break; + } + } + } + + if (!MJOIN_PROBE_TB_ROWS_DONE(pJoin->probe) && pJoin->build->dsFetchDone) { + pCtx->probeNEqGrp.blk = pJoin->probe->blk; + pCtx->probeNEqGrp.beginIdx = pJoin->probe->blkRowIdx; + pCtx->probeNEqGrp.readIdx = pCtx->probeNEqGrp.beginIdx; + pCtx->probeNEqGrp.endIdx = pJoin->probe->blk->info.rows - 1; + + pJoin->probe->blkRowIdx = pJoin->probe->blk->info.rows; + + MJ_ERR_JRET(mLeftJoinNonEqCart(pCtx)); + if (pCtx->finBlk->info.rows >= pCtx->blkThreshold) { + return pCtx->finBlk; + } + } + } while (true); + +_return: + + if (code) { + pJoin->errCode = code; + return NULL; + } + + return pCtx->finBlk; +} + +static bool mInnerJoinRetrieve(SOperatorInfo* pOperator, SMJoinOperatorInfo* pJoin, SMJoinMergeCtx* pCtx) { + bool probeGot = mJoinRetrieveImpl(pJoin, &pJoin->probe->blkRowIdx, &pJoin->probe->blk, pJoin->probe); + bool buildGot = false; + + if (probeGot || MJOIN_DS_NEED_INIT(pOperator, pJoin->build)) { + buildGot = mJoinRetrieveImpl(pJoin, &pJoin->build->blkRowIdx, &pJoin->build->blk, pJoin->build); + } + + if (!probeGot || !buildGot) { + mJoinSetDone(pOperator); + return false; + } + + return true; +} + + +SSDataBlock* mInnerJoinDo(struct SOperatorInfo* pOperator) { + SMJoinOperatorInfo* pJoin = pOperator->info; + SMJoinMergeCtx* pCtx = &pJoin->ctx.mergeCtx; + int32_t code = TSDB_CODE_SUCCESS; + int64_t probeTs = 0; + int64_t buildTs = 0; + SColumnInfoData* pBuildCol = NULL; + SColumnInfoData* pProbeCol = NULL; + bool asc = (pJoin->inputTsOrder == TSDB_ORDER_ASC) ? true : false; + + blockDataCleanup(pCtx->finBlk); + + if (pCtx->grpRemains) { + MJ_ERR_JRET(mLeftJoinHandleGrpRemains(pCtx)); + if (pCtx->finBlk->info.rows >= pCtx->blkThreshold) { + return pCtx->finBlk; + } + pCtx->grpRemains = false; + } + + do { + if (!mInnerJoinRetrieve(pOperator, pJoin, pCtx)) { + break; + } + + MJOIN_GET_TB_COL_TS(pBuildCol, buildTs, pJoin->build); + MJOIN_GET_TB_COL_TS(pProbeCol, probeTs, pJoin->probe); + + if (probeTs == pCtx->lastEqTs) { + MJ_ERR_JRET(mLeftJoinProcessEqualGrp(pCtx, probeTs, true)); + if (pCtx->finBlk->info.rows >= pCtx->blkThreshold) { + return pCtx->finBlk; + } + + if (MJOIN_PROBE_TB_ROWS_DONE(pJoin->probe)) { + continue; + } else { + MJOIN_GET_TB_CUR_TS(pProbeCol, probeTs, pJoin->probe); + } + } + + while (!MJOIN_PROBE_TB_ROWS_DONE(pJoin->probe) && !MJOIN_BUILD_TB_ROWS_DONE(pJoin->build)) { + if (probeTs == buildTs) { + pCtx->lastEqTs = probeTs; + MJ_ERR_JRET(mLeftJoinProcessEqualGrp(pCtx, probeTs, false)); + if (pCtx->finBlk->info.rows >= pCtx->blkThreshold) { + return pCtx->finBlk; + } + + MJOIN_GET_TB_COL_TS(pBuildCol, buildTs, pJoin->build); + MJOIN_GET_TB_COL_TS(pProbeCol, probeTs, pJoin->probe); + } else if (LEFT_JOIN_NO_EQUAL(asc, probeTs, buildTs)) { + pCtx->probeNEqGrp.blk = pJoin->probe->blk; + pCtx->probeNEqGrp.beginIdx = pJoin->probe->blkRowIdx; + pCtx->probeNEqGrp.readIdx = pCtx->probeNEqGrp.beginIdx; + pCtx->probeNEqGrp.endIdx = pCtx->probeNEqGrp.beginIdx; + + while (++pJoin->probe->blkRowIdx < pJoin->probe->blk->info.rows) { + MJOIN_GET_TB_CUR_TS(pProbeCol, probeTs, pJoin->probe); + if (LEFT_JOIN_NO_EQUAL(asc, probeTs, buildTs)) { + pCtx->probeNEqGrp.endIdx = pJoin->probe->blkRowIdx; + continue; + } + + break; + } + + MJ_ERR_JRET(mLeftJoinNonEqCart(pCtx)); + if (pCtx->finBlk->info.rows >= pCtx->blkThreshold) { + return pCtx->finBlk; + } + } else { + while (++pJoin->build->blkRowIdx < pJoin->build->blk->info.rows) { + MJOIN_GET_TB_CUR_TS(pBuildCol, buildTs, pJoin->build); + if (LEFT_JOIN_DISCRAD(asc, probeTs, buildTs)) { + continue; + } + + break; + } + } + } + + if (!MJOIN_PROBE_TB_ROWS_DONE(pJoin->probe) && pJoin->build->dsFetchDone) { + pCtx->probeNEqGrp.blk = pJoin->probe->blk; + pCtx->probeNEqGrp.beginIdx = pJoin->probe->blkRowIdx; + pCtx->probeNEqGrp.readIdx = pCtx->probeNEqGrp.beginIdx; + pCtx->probeNEqGrp.endIdx = pJoin->probe->blk->info.rows - 1; + + pJoin->probe->blkRowIdx = pJoin->probe->blk->info.rows; + + MJ_ERR_JRET(mLeftJoinNonEqCart(pCtx)); + if (pCtx->finBlk->info.rows >= pCtx->blkThreshold) { + return pCtx->finBlk; + } + } + } while (true); + +_return: + + if (code) { + pJoin->errCode = code; + return NULL; + } + + return pCtx->finBlk; +} + + + diff --git a/source/libs/executor/src/mergejoinoperator.c b/source/libs/executor/src/mergejoinoperator.c index b382821691..d3162c91bc 100644 --- a/source/libs/executor/src/mergejoinoperator.c +++ b/source/libs/executor/src/mergejoinoperator.c @@ -141,7 +141,7 @@ static int32_t mJoinInitTableInfo(SMJoinOperatorInfo* pJoin, SSortMergeJoinPhysi if (E_JOIN_TB_BUILD == pTable->type) { pTable->createdBlks = taosArrayInit(8, POINTER_BYTES); pTable->pGrpArrays = taosArrayInit(32, POINTER_BYTES); - pTable->pGrpHash = tSimpleHashInit(1024, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BIGINT)); + pTable->pGrpHash = tSimpleHashInit(1024, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY)); if (NULL == pTable->createdBlks || NULL == pTable->pGrpArrays || NULL == pTable->pGrpHash) { return TSDB_CODE_OUT_OF_MEMORY; } @@ -190,26 +190,6 @@ static void mJoinSetBuildAndProbeTable(SMJoinOperatorInfo* pInfo, SSortMergeJoin pInfo->probe->type = E_JOIN_TB_PROBE; } -static int32_t mJoinInitMergeCtx(SMJoinOperatorInfo* pJoin, SSortMergeJoinPhysiNode* pJoinNode) { - SMJoinMergeCtx* pCtx = &pJoin->ctx.mergeCtx; - - pCtx->pJoin = pJoin; - pCtx->lastEqTs = INT64_MIN; - pCtx->hashCan = pJoin->probe->keyNum > 0; - - pCtx->finBlk = createDataBlockFromDescNode(pJoinNode->node.pOutputDataBlockDesc); - blockDataEnsureCapacity(pCtx->finBlk, TMAX(MJOIN_DEFAULT_BLK_ROWS_NUM, MJOIN_BLK_SIZE_LIMIT/pJoinNode->node.pOutputDataBlockDesc->totalRowSize)); - - if (pJoin->pFPreFilter) { - pCtx->midBlk = createOneDataBlock(pCtx->finBlk, false); - blockDataEnsureCapacity(pCtx->midBlk, pCtx->finBlk->info.capacity); - } - - pCtx->blkThreshold = pCtx->finBlk->info.capacity * 0.5; - - return TSDB_CODE_SUCCESS; -} - static int32_t mJoinInitCtx(SMJoinOperatorInfo* pJoin, SSortMergeJoinPhysiNode* pJoinNode) { #if 0 pJoin->joinFps = &gMJoinFps[pJoin->joinType][pJoin->subType]; @@ -223,7 +203,7 @@ static int32_t mJoinInitCtx(SMJoinOperatorInfo* pJoin, SSortMergeJoinPhysiNode* #endif } -static void mJoinSetDone(SOperatorInfo* pOperator) { +void mJoinSetDone(SOperatorInfo* pOperator) { setOperatorCompleted(pOperator); if (pOperator->pDownstreamGetParams) { freeOperatorParam(pOperator->pDownstreamGetParams[0], OP_GET_PARAM); @@ -233,252 +213,7 @@ static void mJoinSetDone(SOperatorInfo* pOperator) { } } -static int32_t mLeftJoinGrpNonEqCart(SMJoinOperatorInfo* pJoin, SSDataBlock* pRes, bool append, SMJoinGrpRows* pGrp) { - SMJoinTableCtx* probe = pJoin->probe; - SMJoinTableCtx* build = pJoin->build; - int32_t currRows = append ? pRes->info.rows : 0; - int32_t firstRows = GRP_REMAIN_ROWS(pGrp); - - for (int32_t c = 0; c < probe->finNum; ++c) { - SMJoinColMap* pFirstCol = probe->finCols + c; - SColumnInfoData* pInCol = taosArrayGet(pGrp->blk->pDataBlock, pFirstCol->srcSlot); - SColumnInfoData* pOutCol = taosArrayGet(pRes->pDataBlock, pFirstCol->dstSlot); - colDataAssignNRows(pOutCol, currRows, pInCol, pGrp->readIdx, firstRows); - } - - for (int32_t c = 0; c < build->finNum; ++c) { - SMJoinColMap* pSecondCol = build->finCols + c; - SColumnInfoData* pOutCol = taosArrayGet(pRes->pDataBlock, pSecondCol->dstSlot); - colDataSetNItemsNull(pOutCol, currRows, firstRows); - } - - pRes->info.rows = append ? (pRes->info.rows + firstRows) : firstRows; - return TSDB_CODE_SUCCESS; -} - -static int32_t mLeftJoinGrpEqCart(SMJoinOperatorInfo* pJoin, SSDataBlock* pRes, bool append, SMJoinGrpRows* pFirst, SMJoinGrpRows* pSecond) { - SMJoinTableCtx* probe = pJoin->probe; - SMJoinTableCtx* build = pJoin->build; - int32_t currRows = append ? pRes->info.rows : 0; - int32_t firstRows = GRP_REMAIN_ROWS(pFirst); - int32_t secondRows = GRP_REMAIN_ROWS(pSecond); - - for (int32_t c = 0; c < probe->finNum; ++c) { - SMJoinColMap* pFirstCol = probe->finCols + c; - SColumnInfoData* pInCol = taosArrayGet(pFirst->blk->pDataBlock, pFirstCol->srcSlot); - SColumnInfoData* pOutCol = taosArrayGet(pRes->pDataBlock, pFirstCol->dstSlot); - for (int32_t r = 0; r < firstRows; ++r) { - if (colDataIsNull_s(pInCol, pFirst->readIdx + r)) { - colDataSetNItemsNull(pOutCol, currRows + r * secondRows, secondRows); - } else { - colDataSetNItems(pOutCol, currRows + r * secondRows, colDataGetData(pInCol, pFirst->beginIdx + r), secondRows, true); - } - } - } - - for (int32_t c = 0; c < build->finNum; ++c) { - SMJoinColMap* pSecondCol = build->finCols + c; - SColumnInfoData* pInCol = taosArrayGet(pSecond->blk->pDataBlock, pSecondCol->srcSlot); - SColumnInfoData* pOutCol = taosArrayGet(pRes->pDataBlock, pSecondCol->dstSlot); - for (int32_t r = 0; r < firstRows; ++r) { - colDataAssignNRows(pOutCol, currRows + r * secondRows, pInCol, pSecond->readIdx, secondRows); - } - } - - pRes->info.rows = append ? (pRes->info.rows + firstRows * secondRows) : firstRows * secondRows; - return TSDB_CODE_SUCCESS; -} - - -static int32_t mLeftJoinMergeFullCart(SMJoinMergeCtx* pCtx) { - int32_t rowsLeft = pCtx->finBlk->info.capacity - pCtx->finBlk->info.rows; - SMJoinTableCtx* probe = pCtx->pJoin->probe; - SMJoinTableCtx* build = pCtx->pJoin->build; - SMJoinGrpRows* probeGrp = taosArrayGet(probe->eqGrps, 0); - int32_t buildGrpNum = taosArrayGetSize(build->eqGrps); - int32_t probeRows = GRP_REMAIN_ROWS(probeGrp); - int32_t probeEndIdx = probeGrp->endIdx; - - if (probeRows * build->grpTotalRows <= rowsLeft) { - for (; build->grpIdx < buildGrpNum; ++build->grpIdx) { - SMJoinGrpRows* buildGrp = taosArrayGet(build->eqGrps, build->grpIdx); - MJ_ERR_RET(mLeftJoinGrpEqCart(pCtx->pJoin, pCtx->finBlk, true, probeGrp, buildGrp)); - } - - pCtx->grpRemains = false; - return TSDB_CODE_SUCCESS; - } - - for (; !GRP_DONE(probeGrp); ++probeGrp->readIdx, build->grpIdx = 0) { - probeGrp->endIdx = probeGrp->readIdx; - for (; build->grpIdx < buildGrpNum; ++build->grpIdx) { - SMJoinGrpRows* buildGrp = taosArrayGet(build->eqGrps, build->grpIdx); - - if (rowsLeft >= GRP_REMAIN_ROWS(buildGrp)) { - MJ_ERR_RET(mLeftJoinGrpEqCart(pCtx->pJoin, pCtx->finBlk, true, probeGrp, buildGrp)); - rowsLeft -= GRP_REMAIN_ROWS(buildGrp); - continue; - } - - int32_t buildEndIdx = buildGrp->endIdx; - buildGrp->endIdx = buildGrp->readIdx + rowsLeft - 1; - mLeftJoinGrpEqCart(pCtx->pJoin, pCtx->finBlk, true, probeGrp, buildGrp); - buildGrp->readIdx += rowsLeft; - buildGrp->endIdx = buildEndIdx; - rowsLeft = 0; - break; - } - - if (rowsLeft <= 0) { - break; - } - } - - probeGrp->endIdx = probeEndIdx; - - pCtx->grpRemains = probeGrp->readIdx <= probeGrp->endIdx; - - return TSDB_CODE_SUCCESS; -} - -static int32_t mLeftJoinCopyMergeMidBlk(SMJoinMergeCtx* pCtx, SSDataBlock** ppMid, SSDataBlock** ppFin) { - SSDataBlock* pLess = NULL; - SSDataBlock* pMore = NULL; - if ((*ppMid)->info.rows < (*ppFin)->info.rows) { - pLess = (*ppMid); - pMore = (*ppFin); - } else { - pLess = (*ppFin); - pMore = (*ppMid); - } - - int32_t totalRows = pMore->info.rows + pLess->info.rows; - if (totalRows <= pMore->info.capacity) { - MJ_ERR_RET(blockDataMerge(pMore, pLess)); - blockDataReset(pLess); - pCtx->midRemains = false; - } else { - int32_t copyRows = pMore->info.capacity - pMore->info.rows; - MJ_ERR_RET(blockDataMergeNRows(pMore, pLess, pLess->info.rows - copyRows, copyRows)); - pCtx->midRemains = true; - } - - if (pMore != (*ppFin)) { - TSWAP(*ppMid, *ppFin); - } - - return TSDB_CODE_SUCCESS; -} - -static int32_t mLeftJoinMergeSeqCart(SMJoinMergeCtx* pCtx) { - SMJoinTableCtx* probe = pCtx->pJoin->probe; - SMJoinTableCtx* build = pCtx->pJoin->build; - SMJoinGrpRows* probeGrp = taosArrayGet(probe->eqGrps, probe->grpIdx); - int32_t buildGrpNum = taosArrayGetSize(build->eqGrps); - int32_t probeEndIdx = probeGrp->endIdx; - int32_t rowsLeft = pCtx->midBlk->info.capacity; - bool contLoop = true; - - blockDataReset(pCtx->midBlk); - - do { - for (; !GRP_DONE(probeGrp) && !BLK_IS_FULL(pCtx->finBlk); ++probeGrp->readIdx, probeGrp->readMatch = false, build->grpIdx = 0) { - probeGrp->endIdx = probeGrp->readIdx; - for (; build->grpIdx < buildGrpNum; ++build->grpIdx) { - SMJoinGrpRows* buildGrp = taosArrayGet(build->eqGrps, build->grpIdx); - - if (rowsLeft >= GRP_REMAIN_ROWS(buildGrp)) { - MJ_ERR_RET(mLeftJoinGrpEqCart(pCtx->pJoin, pCtx->midBlk, true, probeGrp, buildGrp)); - rowsLeft -= GRP_REMAIN_ROWS(buildGrp); - continue; - } - - int32_t buildEndIdx = buildGrp->endIdx; - buildGrp->endIdx = buildGrp->readIdx + rowsLeft - 1; - MJ_ERR_RET(mLeftJoinGrpEqCart(pCtx->pJoin, pCtx->midBlk, true, probeGrp, buildGrp)); - buildGrp->readIdx += rowsLeft; - buildGrp->endIdx = buildEndIdx; - rowsLeft = 0; - break; - } - - if (pCtx->midBlk->info.rows > 0) { - MJ_ERR_RET(doFilter(pCtx->midBlk, pCtx->pJoin->pFPreFilter, NULL)); - if (pCtx->midBlk->info.rows > 0) { - probeGrp->readMatch = true; - } - } - - if (0 == pCtx->midBlk->info.rows) { - if (build->grpIdx == buildGrpNum) { - if (!probeGrp->readMatch) { - MJ_ERR_RET(mLeftJoinGrpNonEqCart(pCtx->pJoin, pCtx->finBlk, true, probeGrp)); - } - - continue; - } - - break; - } else { - MJ_ERR_RET(mLeftJoinCopyMergeMidBlk(pCtx, &pCtx->midBlk, &pCtx->finBlk)); - - if (pCtx->midRemains) { - contLoop = false; - break; - } - - if (build->grpIdx == buildGrpNum) { - continue; - } - - break; - } - } - - if (GRP_DONE(probeGrp) || BLK_IS_FULL(pCtx->finBlk)) { - break; - } - - rowsLeft = pCtx->midBlk->info.capacity; - } while (contLoop); - - probeGrp->endIdx = probeEndIdx; - - pCtx->grpRemains = probeGrp->readIdx <= probeGrp->endIdx; - - return TSDB_CODE_SUCCESS; -} - -static int32_t mLeftJoinMergeCart(SMJoinMergeCtx* pCtx) { - return (NULL == pCtx->pJoin->pFPreFilter) ? mLeftJoinMergeFullCart(pCtx) : mLeftJoinMergeSeqCart(pCtx); -} - -static int32_t mLeftJoinNonEqCart(SMJoinMergeCtx* pCtx) { - int32_t rowsLeft = pCtx->finBlk->info.capacity - pCtx->finBlk->info.rows; - SMJoinGrpRows* probeGrp = &pCtx->probeNEqGrp; - int32_t probeRows = GRP_REMAIN_ROWS(probeGrp); - - pCtx->lastEqGrp = false; - - if (probeRows <= rowsLeft) { - MJ_ERR_RET(mLeftJoinGrpNonEqCart(pCtx->pJoin, pCtx->finBlk, true, probeGrp)); - probeGrp->readIdx = probeGrp->endIdx + 1; - pCtx->grpRemains = false; - } else { - int32_t probeEndIdx = probeGrp->endIdx; - probeGrp->endIdx = probeGrp->readIdx + rowsLeft - 1; - MJ_ERR_RET(mLeftJoinGrpNonEqCart(pCtx->pJoin, pCtx->finBlk, true, probeGrp)); - probeGrp->readIdx = probeGrp->endIdx + 1; - probeGrp->endIdx = probeEndIdx; - pCtx->grpRemains = true; - } - - return TSDB_CODE_SUCCESS; -} - - - -static bool mJoinRetrieveImpl(SMJoinOperatorInfo* pJoin, int32_t* pIdx, SSDataBlock** ppBlk, SMJoinTableCtx* pTb) { +bool mJoinRetrieveImpl(SMJoinOperatorInfo* pJoin, int32_t* pIdx, SSDataBlock** ppBlk, SMJoinTableCtx* pTb) { if (pTb->dsFetchDone) { return (NULL == (*ppBlk) || *pIdx >= (*ppBlk)->info.rows) ? false : true; } @@ -492,6 +227,8 @@ static bool mJoinRetrieveImpl(SMJoinOperatorInfo* pJoin, int32_t* pIdx, SSDataBl *pIdx = 0; if (NULL == (*ppBlk)) { pTb->dsFetchDone = true; + } else { + pTb->newBlk = true; } return ((*ppBlk) == NULL) ? false : true; @@ -500,35 +237,6 @@ static bool mJoinRetrieveImpl(SMJoinOperatorInfo* pJoin, int32_t* pIdx, SSDataBl return true; } - -static bool mLeftJoinRetrieve(SOperatorInfo* pOperator, SMJoinOperatorInfo* pJoin, SMJoinMergeCtx* pCtx) { - bool probeGot = mJoinRetrieveImpl(pJoin, &pJoin->probe->blkRowIdx, &pJoin->probe->blk, pJoin->probe); - bool buildGot = false; - - do { - if (probeGot || MJOIN_DS_NEED_INIT(pOperator, pJoin->build)) { - buildGot = mJoinRetrieveImpl(pJoin, &pJoin->build->blkRowIdx, &pJoin->build->blk, pJoin->build); - } - - if (!probeGot) { - mJoinSetDone(pOperator); - return false; - } - - if (buildGot) { - SColumnInfoData* pProbeCol = taosArrayGet(pJoin->probe->blk->pDataBlock, pJoin->probe->primCol->srcSlot); - SColumnInfoData* pBuildCol = taosArrayGet(pJoin->build->blk->pDataBlock, pJoin->build->primCol->srcSlot); - if (*((int64_t*)pProbeCol->pData + pJoin->probe->blkRowIdx) > *((int64_t*)pBuildCol->pData + pJoin->build->blk->info.rows - 1)) { - continue; - } - } - - break; - } while (true); - - return true; -} - static void mJoinDestroyCreatedBlks(SArray* pCreatedBlks) { int32_t blkNum = taosArrayGetSize(pCreatedBlks); for (int32_t i = 0; i < blkNum; ++i) { @@ -537,19 +245,23 @@ static void mJoinDestroyCreatedBlks(SArray* pCreatedBlks) { taosArrayClear(pCreatedBlks); } -static void mJoinBuildEqGroups(SMJoinTableCtx* pTable, int64_t timestamp, bool* wholeBlk, bool restart) { +void mJoinBuildEqGroups(SMJoinTableCtx* pTable, int64_t timestamp, bool* wholeBlk, bool restart) { SColumnInfoData* pCol = taosArrayGet(pTable->blk->pDataBlock, pTable->primCol->srcSlot); SMJoinGrpRows* pGrp = NULL; + if (*(int64_t*)colDataGetData(pCol, pTable->blkRowIdx) != timestamp) { + return; + } + if (restart) { pTable->grpTotalRows = 0; pTable->grpIdx = 0; mJoinDestroyCreatedBlks(pTable->createdBlks); - pGrp = taosArrayGet(pTable->eqGrps, 0); - } else { - pGrp = taosArrayReserve(pTable->eqGrps, 1); + taosArrayClear(pTable->eqGrps); } + pGrp = taosArrayReserve(pTable->eqGrps, 1); + pGrp->beginIdx = pTable->blkRowIdx++; pGrp->readIdx = pGrp->beginIdx; pGrp->endIdx = pGrp->beginIdx; @@ -584,7 +296,7 @@ static void mJoinBuildEqGroups(SMJoinTableCtx* pTable, int64_t timestamp, bool* } -static int32_t mJoinRetrieveEqGrpRows(SOperatorInfo* pOperator, SMJoinTableCtx* pTable, int64_t timestamp) { +int32_t mJoinRetrieveEqGrpRows(SOperatorInfo* pOperator, SMJoinTableCtx* pTable, int64_t timestamp) { bool wholeBlk = false; mJoinBuildEqGroups(pTable, timestamp, &wholeBlk, true); @@ -607,7 +319,7 @@ static int32_t mJoinRetrieveEqGrpRows(SOperatorInfo* pOperator, SMJoinTableCtx* return TSDB_CODE_SUCCESS; } -static int32_t mJoinSetKeyColsData(SSDataBlock* pBlock, SMJoinTableCtx* pTable) { +int32_t mJoinSetKeyColsData(SSDataBlock* pBlock, SMJoinTableCtx* pTable) { for (int32_t i = 0; i < pTable->keyNum; ++i) { SColumnInfoData* pCol = taosArrayGet(pBlock->pDataBlock, pTable->keyCols[i].srcSlot); if (pTable->keyCols[i].vardata != IS_VAR_DATA_TYPE(pCol->info.type)) { @@ -628,7 +340,7 @@ static int32_t mJoinSetKeyColsData(SSDataBlock* pBlock, SMJoinTableCtx* pTable) return TSDB_CODE_SUCCESS; } -static FORCE_INLINE bool mJoinCopyKeyColsDataToBuf(SMJoinTableCtx* pTable, int32_t rowIdx, size_t *pBufLen) { +bool mJoinCopyKeyColsDataToBuf(SMJoinTableCtx* pTable, int32_t rowIdx, size_t *pBufLen) { char *pData = NULL; size_t bufLen = 0; @@ -705,11 +417,13 @@ static int32_t mJoinAddRowToHash(SMJoinOperatorInfo* pJoin, size_t keyLen, SSDat } -static int32_t mJoinMakeBuildTbHash(SMJoinOperatorInfo* pJoin, SMJoinTableCtx* pTable) { +int32_t mJoinMakeBuildTbHash(SMJoinOperatorInfo* pJoin, SMJoinTableCtx* pTable) { size_t bufLen = 0; tSimpleHashClear(pJoin->build->pGrpHash); pJoin->build->grpArrayIdx = 0; + + pJoin->build->grpRowIdx = -1; int32_t grpNum = taosArrayGetSize(pTable->eqGrps); for (int32_t g = 0; g < grpNum; ++g) { @@ -718,7 +432,7 @@ static int32_t mJoinMakeBuildTbHash(SMJoinOperatorInfo* pJoin, SMJoinTableCtx* p int32_t grpRows = GRP_REMAIN_ROWS(pGrp); for (int32_t r = 0; r < grpRows; ++r) { - if (mJoinCopyKeyColsDataToBuf(pTable, r, &bufLen)) { + if (mJoinCopyKeyColsDataToBuf(pTable, pGrp->beginIdx + r, &bufLen)) { continue; } @@ -729,362 +443,6 @@ static int32_t mJoinMakeBuildTbHash(SMJoinOperatorInfo* pJoin, SMJoinTableCtx* p return TSDB_CODE_SUCCESS; } -static bool mLeftJoinHashGrpCart(SSDataBlock* pBlk, SMJoinGrpRows* probeGrp, bool append, SMJoinTableCtx* probe, SMJoinTableCtx* build) { - int32_t rowsLeft = append ? (pBlk->info.capacity - pBlk->info.rows) : pBlk->info.capacity; - if (rowsLeft <= 0) { - return false; - } - - int32_t buildGrpRows = taosArrayGetSize(build->pHashCurGrp); - int32_t grpRows = buildGrpRows - build->grpRowIdx; - if (grpRows <= 0) { - return true; - } - - int32_t actRows = TMIN(grpRows, rowsLeft); - int32_t currRows = append ? pBlk->info.rows : 0; - - for (int32_t c = 0; c < probe->finNum; ++c) { - SMJoinColMap* pFirstCol = probe->finCols + c; - SColumnInfoData* pInCol = taosArrayGet(probeGrp->blk->pDataBlock, pFirstCol->srcSlot); - SColumnInfoData* pOutCol = taosArrayGet(pBlk->pDataBlock, pFirstCol->dstSlot); - if (colDataIsNull_s(pInCol, probeGrp->readIdx)) { - colDataSetNItemsNull(pOutCol, currRows, actRows); - } else { - colDataSetNItems(pOutCol, currRows, colDataGetData(pInCol, probeGrp->readIdx), actRows, true); - } - } - - for (int32_t c = 0; c < build->finNum; ++c) { - SMJoinColMap* pSecondCol = build->finCols + c; - SColumnInfoData* pOutCol = taosArrayGet(pBlk->pDataBlock, pSecondCol->dstSlot); - for (int32_t r = 0; r < actRows; ++r) { - SMJoinRowPos* pRow = taosArrayGet(build->pHashCurGrp, r); - SColumnInfoData* pInCol = taosArrayGet(pRow->pBlk->pDataBlock, pSecondCol->srcSlot); - colDataAssignNRows(pOutCol, currRows + r, pInCol, pRow->pos, 1); - } - } - - pBlk->info.rows += actRows; - - if (actRows == grpRows) { - build->grpRowIdx = -1; - } else { - build->grpRowIdx += actRows; - } - - if (actRows == rowsLeft) { - return false; - } - - return true; -} - -static int32_t mLeftJoinHashFullCart(SMJoinMergeCtx* pCtx) { - SMJoinTableCtx* probe = pCtx->pJoin->probe; - SMJoinTableCtx* build = pCtx->pJoin->build; - SMJoinGrpRows* probeGrp = taosArrayGet(probe->eqGrps, probe->grpIdx); - - if (build->grpRowIdx >= 0) { - bool contLoop = mLeftJoinHashGrpCart(pCtx->finBlk, probeGrp, true, probe, build); - if (build->grpRowIdx < 0) { - probeGrp->readIdx++; - } - - if (!contLoop) { - goto _return; - } - } - - size_t bufLen = 0; - int32_t probeEndIdx = probeGrp->endIdx; - for (; !GRP_DONE(probeGrp) && !BLK_IS_FULL(pCtx->finBlk); ++probeGrp->readIdx) { - if (mJoinCopyKeyColsDataToBuf(probe, probeGrp->readIdx, &bufLen)) { - continue; - } - - SArray** pGrp = tSimpleHashGet(build->pGrpHash, probe->keyData, bufLen); - if (NULL == pGrp) { - probeGrp->endIdx = probeGrp->readIdx; - MJ_ERR_RET(mLeftJoinGrpNonEqCart(pCtx->pJoin, pCtx->finBlk, true, probeGrp)); - probeGrp->endIdx = probeEndIdx; - } else { - build->pHashCurGrp = *pGrp; - build->grpRowIdx = 0; - if (!mLeftJoinHashGrpCart(pCtx->finBlk, probeGrp, true, probe, build)) { - break; - } - } - } - -_return: - - pCtx->grpRemains = probeGrp->readIdx <= probeGrp->endIdx; - - return TSDB_CODE_SUCCESS; -} - -static int32_t mLeftJoinHashGrpCartFilter(SMJoinMergeCtx* pCtx, bool* contLoop) { - SMJoinTableCtx* probe = pCtx->pJoin->probe; - SMJoinTableCtx* build = pCtx->pJoin->build; - SMJoinGrpRows* probeGrp = taosArrayGet(probe->eqGrps, probe->grpIdx); - - blockDataReset(pCtx->midBlk); - - do { - mLeftJoinHashGrpCart(pCtx->midBlk, probeGrp, true, probe, build); - if (build->grpRowIdx < 0) { - probeGrp->readIdx++; - } - - if (pCtx->midBlk->info.rows > 0) { - MJ_ERR_RET(doFilter(pCtx->midBlk, pCtx->pJoin->pPreFilter, NULL)); - if (pCtx->midBlk->info.rows > 0) { - probeGrp->readMatch = true; - } - } - - if (0 == pCtx->midBlk->info.rows) { - if (build->grpRowIdx < 0) { - if (!probeGrp->readMatch) { - MJ_ERR_RET(mLeftJoinGrpNonEqCart(pCtx->pJoin, pCtx->finBlk, true, probeGrp)); - } - - probeGrp->readMatch = false; - break; - } - - continue; - } else { - MJ_ERR_RET(mLeftJoinCopyMergeMidBlk(pCtx, &pCtx->midBlk, &pCtx->finBlk)); - - if (pCtx->midRemains) { - pCtx->grpRemains = probeGrp->readIdx <= probeGrp->endIdx; - *contLoop = false; - return TSDB_CODE_SUCCESS; - } - - if (build->grpRowIdx < 0) { - probeGrp->readMatch = false; - break; - } - - continue; - } - } while (true); - - *contLoop = true; - return TSDB_CODE_SUCCESS; -} - - -static int32_t mLeftJoinHashSeqCart(SMJoinMergeCtx* pCtx) { - SMJoinTableCtx* probe = pCtx->pJoin->probe; - SMJoinTableCtx* build = pCtx->pJoin->build; - SMJoinGrpRows* probeGrp = taosArrayGet(probe->eqGrps, 0); - bool contLoop = false; - - if (build->grpRowIdx >= 0) { - MJ_ERR_RET(mLeftJoinHashGrpCartFilter(pCtx, &contLoop)); - if (!contLoop) { - goto _return; - } - } - - size_t bufLen = 0; - int32_t probeEndIdx = probeGrp->endIdx; - for (; !GRP_DONE(probeGrp) && !BLK_IS_FULL(pCtx->finBlk);) { - if (mJoinCopyKeyColsDataToBuf(probe, probeGrp->readIdx, &bufLen)) { - continue; - } - - SArray** pGrp = tSimpleHashGet(build->pGrpHash, probe->keyData, bufLen); - if (NULL == pGrp) { - probeGrp->endIdx = probeGrp->readIdx; - MJ_ERR_RET(mLeftJoinGrpNonEqCart(pCtx->pJoin, pCtx->finBlk, true, probeGrp)); - probeGrp->endIdx = probeEndIdx; - probeGrp->readIdx++; - probeGrp->readMatch = false; - } else { - build->pHashCurGrp = *pGrp; - build->grpRowIdx = 0; - - MJ_ERR_RET(mLeftJoinHashGrpCartFilter(pCtx, &contLoop)); - if (!contLoop) { - break; - } - } - } - -_return: - - pCtx->grpRemains = probeGrp->readIdx <= probeGrp->endIdx; - - return TSDB_CODE_SUCCESS; -} - - -static int32_t mLeftJoinHashCart(SMJoinMergeCtx* pCtx) { - return (NULL == pCtx->pJoin->pPreFilter) ? mLeftJoinHashFullCart(pCtx) : mLeftJoinHashSeqCart(pCtx); -} - -static int32_t mLeftJoinProcessEqualGrp(SMJoinMergeCtx* pCtx, int64_t timestamp, bool lastBuildGrp) { - SMJoinOperatorInfo* pJoin = pCtx->pJoin; - - pCtx->lastEqGrp = true; - - mJoinBuildEqGroups(pJoin->probe, timestamp, NULL, true); - if (!lastBuildGrp) { - mJoinRetrieveEqGrpRows(pJoin->pOperator, pJoin->build, timestamp); - } else { - pJoin->build->grpIdx = 0; - } - - if (pCtx->hashCan && REACH_HJOIN_THRESHOLD(pJoin->probe, pJoin->build)) { - if (!lastBuildGrp || NULL == pJoin->build->pGrpHash) { - MJ_ERR_RET(mJoinMakeBuildTbHash(pJoin, pJoin->build)); - MJ_ERR_RET(mJoinSetKeyColsData(pJoin->probe->blk, pJoin->probe)); - } - - pCtx->hashJoin = true; - - return mLeftJoinHashCart(pCtx); - } - - return mLeftJoinMergeCart(pCtx); -} - -static bool mLeftJoinHandleMidRemains(SMJoinMergeCtx* pCtx) { - ASSERT(0 < pCtx->midBlk->info.rows); - - TSWAP(pCtx->midBlk, pCtx->finBlk); - - return (pCtx->finBlk->info.rows >= pCtx->blkThreshold) ? false : true; -} - - -static int32_t mLeftJoinHandleGrpRemains(SMJoinMergeCtx* pCtx) { - if (pCtx->lastEqGrp) { - return (pCtx->hashJoin) ? mLeftJoinHashCart(pCtx) : mLeftJoinMergeCart(pCtx); - } - - return mLeftJoinNonEqCart(pCtx); -} - -static SSDataBlock* mLeftJoinDo(struct SOperatorInfo* pOperator) { - SMJoinOperatorInfo* pJoin = pOperator->info; - SMJoinMergeCtx* pCtx = &pJoin->ctx.mergeCtx; - int32_t code = TSDB_CODE_SUCCESS; - int64_t probeTs = 0; - int64_t buildTs = 0; - SColumnInfoData* pBuildCol = NULL; - SColumnInfoData* pProbeCol = NULL; - bool asc = (pJoin->inputTsOrder == TSDB_ORDER_ASC) ? true : false; - - blockDataReset(pCtx->finBlk); - - if (pCtx->midRemains) { - MJ_ERR_JRET(mLeftJoinHandleMidRemains(pCtx)); - if (pCtx->finBlk->info.rows >= pCtx->blkThreshold) { - return pCtx->finBlk; - } - } - - if (pCtx->grpRemains) { - MJ_ERR_JRET(mLeftJoinHandleGrpRemains(pCtx)); - if (pCtx->finBlk->info.rows >= pCtx->blkThreshold) { - return pCtx->finBlk; - } - } - - do { - if (!mLeftJoinRetrieve(pOperator, pJoin, pCtx)) { - break; - } - - MJOIN_GET_TB_COL_TS(pBuildCol, buildTs, pJoin->build); - MJOIN_GET_TB_COL_TS(pProbeCol, probeTs, pJoin->probe); - - if (probeTs == pCtx->lastEqTs) { - MJ_ERR_JRET(mLeftJoinProcessEqualGrp(pCtx, probeTs, true)); - if (pCtx->finBlk->info.rows >= pCtx->blkThreshold) { - return pCtx->finBlk; - } - - if (MJOIN_PROBE_TB_ROWS_DONE(pJoin->probe)) { - continue; - } else { - MJOIN_GET_TB_CUR_TS(pProbeCol, probeTs, pJoin->probe); - } - } - - while (!MJOIN_PROBE_TB_ROWS_DONE(pJoin->probe) && !MJOIN_BUILD_TB_ROWS_DONE(pJoin->build)) { - if (probeTs == buildTs) { - pCtx->lastEqTs = probeTs; - MJ_ERR_JRET(mLeftJoinProcessEqualGrp(pCtx, probeTs, false)); - if (pCtx->finBlk->info.rows >= pCtx->blkThreshold) { - return pCtx->finBlk; - } - - MJOIN_GET_TB_COL_TS(pBuildCol, buildTs, pJoin->build); - MJOIN_GET_TB_COL_TS(pProbeCol, probeTs, pJoin->probe); - } else if (LEFT_JOIN_NO_EQUAL(asc, probeTs, buildTs)) { - pCtx->probeNEqGrp.blk = pJoin->probe->blk; - pCtx->probeNEqGrp.beginIdx = pJoin->probe->blkRowIdx; - pCtx->probeNEqGrp.readIdx = pCtx->probeNEqGrp.beginIdx; - pCtx->probeNEqGrp.endIdx = pCtx->probeNEqGrp.beginIdx; - - while (++pJoin->probe->blkRowIdx < pJoin->probe->blk->info.rows) { - MJOIN_GET_TB_CUR_TS(pProbeCol, probeTs, pJoin->probe); - if (LEFT_JOIN_NO_EQUAL(asc, probeTs, buildTs)) { - pCtx->probeNEqGrp.endIdx = pJoin->probe->blkRowIdx; - continue; - } - - break; - } - - MJ_ERR_JRET(mLeftJoinNonEqCart(pCtx)); - if (pCtx->finBlk->info.rows >= pCtx->blkThreshold) { - return pCtx->finBlk; - } - } else { - while (++pJoin->build->blkRowIdx < pJoin->build->blk->info.rows) { - MJOIN_GET_TB_CUR_TS(pBuildCol, buildTs, pJoin->build); - if (LEFT_JOIN_DISCRAD(asc, probeTs, buildTs)) { - continue; - } - - break; - } - } - } - - if (!MJOIN_PROBE_TB_ROWS_DONE(pJoin->probe)) { - pCtx->probeNEqGrp.blk = pJoin->probe->blk; - pCtx->probeNEqGrp.beginIdx = pJoin->probe->blkRowIdx; - pCtx->probeNEqGrp.readIdx = pCtx->probeNEqGrp.beginIdx; - pCtx->probeNEqGrp.endIdx = pJoin->probe->blk->info.rows - 1; - - pJoin->probe->blkRowIdx = pJoin->probe->blk->info.rows; - - MJ_ERR_JRET(mLeftJoinNonEqCart(pCtx)); - if (pCtx->finBlk->info.rows >= pCtx->blkThreshold) { - return pCtx->finBlk; - } - } - } while (true); - -_return: - - if (code) { - pJoin->errCode = code; - return NULL; - } - - return pCtx->finBlk; -} - void mJoinResetTableCtx(SMJoinTableCtx* pCtx) { pCtx->dsInitDone = false; pCtx->dsFetchDone = false; @@ -1140,6 +498,7 @@ SSDataBlock* mJoinMainProcess(struct SOperatorInfo* pOperator) { pBlock = mLeftJoinDo(pOperator); if (NULL == pBlock) { if (pJoin->errCode) { + ASSERT(0); T_LONG_JMP(pOperator->pTaskInfo->env, pJoin->errCode); } break; @@ -1162,7 +521,18 @@ SSDataBlock* mJoinMainProcess(struct SOperatorInfo* pOperator) { void destroyMergeJoinOperator(void* param) { - SMJoinOperatorInfo* pJoinOperator = (SMJoinOperatorInfo*)param; + SOperatorInfo* pOperator = (SOperatorInfo*)param; + SMJoinOperatorInfo* pJoin = pOperator->info; + pJoin->ctx.mergeCtx.finBlk = blockDataDestroy(pJoin->ctx.mergeCtx.finBlk); + pJoin->ctx.mergeCtx.midBlk = blockDataDestroy(pJoin->ctx.mergeCtx.midBlk); + + mJoinDestroyCreatedBlks(pJoin->probe->createdBlks); + taosArrayDestroy(pJoin->probe->createdBlks); + tSimpleHashCleanup(pJoin->probe->pGrpHash); + + mJoinDestroyCreatedBlks(pJoin->build->createdBlks); + taosArrayDestroy(pJoin->build->createdBlks); + tSimpleHashCleanup(pJoin->build->pGrpHash); taosMemoryFreeClear(param); } diff --git a/source/libs/executor/test/CMakeLists.txt b/source/libs/executor/test/CMakeLists.txt index bb53213431..ebf7131aa5 100644 --- a/source/libs/executor/test/CMakeLists.txt +++ b/source/libs/executor/test/CMakeLists.txt @@ -1,23 +1,23 @@ MESSAGE(STATUS "build parser unit test") -IF(NOT TD_DARWIN) - # GoogleTest requires at least C++11 - SET(CMAKE_CXX_STANDARD 11) - AUX_SOURCE_DIRECTORY(${CMAKE_CURRENT_SOURCE_DIR} SOURCE_LIST) - - ADD_EXECUTABLE(executorTest ${SOURCE_LIST}) - TARGET_LINK_LIBRARIES( - executorTest - PRIVATE os util common transport gtest taos_static qcom executor function planner scalar nodes vnode - ) - - TARGET_INCLUDE_DIRECTORIES( - executorTest - PUBLIC "${TD_SOURCE_DIR}/include/libs/executor/" - PRIVATE "${TD_SOURCE_DIR}/source/libs/executor/inc" - ) -ENDIF () +# IF(NOT TD_DARWIN) +# # GoogleTest requires at least C++11 +# SET(CMAKE_CXX_STANDARD 11) +# AUX_SOURCE_DIRECTORY(${CMAKE_CURRENT_SOURCE_DIR} SOURCE_LIST) +# +# ADD_EXECUTABLE(executorTest ${SOURCE_LIST}) +# TARGET_LINK_LIBRARIES( +# executorTest +# PRIVATE os util common transport gtest taos_static qcom executor function planner scalar nodes vnode +# ) +# +# TARGET_INCLUDE_DIRECTORIES( +# executorTest +# PUBLIC "${TD_SOURCE_DIR}/include/libs/executor/" +# PRIVATE "${TD_SOURCE_DIR}/source/libs/executor/inc" +# ) +# ENDIF () SET(CMAKE_CXX_STANDARD 11) AUX_SOURCE_DIRECTORY(${CMAKE_CURRENT_SOURCE_DIR} SOURCE_LIST) @@ -25,7 +25,7 @@ AUX_SOURCE_DIRECTORY(${CMAKE_CURRENT_SOURCE_DIR} SOURCE_LIST) ADD_EXECUTABLE(joinTests joinTests.cpp) TARGET_LINK_LIBRARIES( joinTests - PRIVATE os util common executor gtest_main qcom + PRIVATE os util common executor gtest_main qcom function planner scalar nodes vnode ) TARGET_INCLUDE_DIRECTORIES( diff --git a/source/libs/executor/test/joinTests.cpp b/source/libs/executor/test/joinTests.cpp index 47a3fe367c..417f691d14 100755 --- a/source/libs/executor/test/joinTests.cpp +++ b/source/libs/executor/test/joinTests.cpp @@ -21,6 +21,9 @@ #pragma GCC diagnostic ignored "-Wunused-function" #pragma GCC diagnostic ignored "-Wunused-variable" #pragma GCC diagnostic ignored "-Wsign-compare" +#pragma GCC diagnostic ignored "-Wformat" +#include + #include "os.h" #include "executor.h" @@ -31,6 +34,9 @@ #include "tdatablock.h" #include "tdef.h" #include "tvariant.h" +#include "stub.h" +#include "querytask.h" + namespace { @@ -40,10 +46,57 @@ typedef struct { int32_t rowNum; int32_t addRowNum; int32_t subRowNum; - int32_t mismatchNum; int32_t matchNum; } SJoinTestResInfo; +typedef struct { + int32_t maxResRows; + int32_t maxResBlkRows; + int64_t totalResRows; + int64_t useMSecs; + SArray* pHistory; +} SJoinTestStat; + + +enum { + TEST_NO_COND = 1, + TEST_EQ_COND, + TEST_ON_COND, + TEST_FULL_COND +}; + +#define COL_DISPLAY_WIDTH 18 +#define JT_MAX_LOOP 3000 + +#define LEFT_BLK_ID 0 +#define RIGHT_BLK_ID 1 +#define RES_BLK_ID 2 +#define MAX_SLOT_NUM 4 + +#define JT_KEY_SOLT_ID (MAX_SLOT_NUM - 1) +int32_t jtInputColType[MAX_SLOT_NUM] = {TSDB_DATA_TYPE_TIMESTAMP, TSDB_DATA_TYPE_INT, TSDB_DATA_TYPE_INT, TSDB_DATA_TYPE_BIGINT}; + +char* jtColCondStr[] = {"", "NO COND", "EQ COND", "ON COND", "FULL COND"}; +char* jtJoinTypeStr[] = {"INNER", "LEFT", "RIGHT", "FULL"}; +char* jtSubTypeStr[] = {"", "OUTER", "SEMI", "ANTI", "ASOF", "WINDOW"}; + +int64_t TIMESTAMP_FILTER_VALUE = 10000000000; +int32_t INT_FILTER_VALUE = 200000000; +int64_t BIGINT_FILTER_VALUE = 3000000000000000; + +int64_t jtFilterValue[] = {TIMESTAMP_FILTER_VALUE, INT_FILTER_VALUE, INT_FILTER_VALUE, BIGINT_FILTER_VALUE}; + +bool jtErrorRerun = false; +bool jtInRerun = false; + +typedef struct { + bool printTestInfo; + bool printInputRow; + bool printResRow; + bool logHistory; +} SJoinTestCtrl; + + typedef struct { bool filter; bool asc; @@ -59,6 +112,7 @@ typedef struct { int32_t leftTotalRows; int32_t rightTotalRows; int32_t blkRowSize; + int32_t inputStat; int32_t colEqNum; int32_t colEqList[MAX_SLOT_NUM]; @@ -81,139 +135,209 @@ typedef struct { int32_t resColList[MAX_SLOT_NUM * 2]; int32_t resColOffset[MAX_SLOT_NUM * 2]; int32_t resColSize; - void* resColBuf; + char* resColBuf; int32_t colRowDataBufSize; - void* colRowDataBuf; + char* colRowDataBuf; int32_t colRowOffset[MAX_SLOT_NUM]; int64_t curTs; + int64_t curKeyOffset; + int32_t grpOffset[MAX_SLOT_NUM]; int32_t leftBlkReadIdx; SArray* leftBlkList; int32_t rightBlkReadIdx; SArray* rightBlkList; + int64_t resRows; SSHashObj* jtResRows; SOperatorInfo* pJoinOp; -} SJoinTestCtrlInfo; -enum { - TEST_NO_COND = 1, - TEST_EQ_COND, - TEST_ON_COND, - TEST_FULL_COND -}; + int32_t loopIdx; +} SJoinTestCtx; -#define LEFT_BLK_ID 0 -#define RIGHT_BLK_ID 1 -#define RES_BLK_ID 2 -#define MAX_SLOT_NUM 4 +typedef struct { + SJoinTestResInfo res; + SJoinTestCtx ctx; +} SJoinTestHistory; -#define JT_KEY_SOLT_ID 3 -int32_t jtInputColType[MAX_SLOT_NUM] = {TSDB_DATA_TYPE_TIMESTAMP, TSDB_DATA_TYPE_INT, TSDB_DATA_TYPE_INT, TSDB_DATA_TYPE_BIGINT}; +typedef struct { + EJoinType joinType; + EJoinSubType subType; + int32_t cond; + bool filter; + bool asc; + SOperatorInfo* pDownstreams; + SExecTaskInfo* pTask; +} SJoinTestParam; -char* jtColCondStr[] = {"", "NO COND", "EQ COND", "ON COND", "FULL COND"}; -char* jtJoinTypeStr[] = {"INNER", "LEFT", "RIGHT", "FULL"}; -char* jtSubTypeStr[] = {"", "OUTER", "SEMI", "ANTI", "ASOF", "WINDOW"}; -int64_t TIMESTAMP_FILTER_VALUE = 1000000000; -int32_t INT_FILTER_VALUE = 32767; -int64_t BIGINT_FILTER_VALUE = 1000000000000000; - -int64_t jtFilterValue[] = {TIMESTAMP_FILTER_VALUE, INT_FILTER_VALUE, INT_FILTER_VALUE, BIGINT_FILTER_VALUE}; - -bool jtErrorRerun = true; -bool jtInRerun = false; - -SJoinTestCtrlInfo jtCtrl = {0}; +SJoinTestCtx jtCtx = {0}; +SJoinTestCtrl jtCtrl = {1, 1, 1, 0}; +SJoinTestStat jtStat = {0}; SJoinTestResInfo jtRes = {0}; -void printResRow(void* value, int32_t type) { - for (int32_t i = 0; i < jtCtrl.resColNum; ++i) { - int32_t slot = jtCtrl.resColInSlot[i]; +void printResRow(char* value, int32_t type) { + if (!jtCtrl.printResRow) { + return; + } + + printf(" "); + for (int32_t i = 0; i < jtCtx.resColNum; ++i) { + int32_t slot = jtCtx.resColInSlot[i]; + if (*(bool*)(value + slot)) { + printf("%18s", " NULL"); + continue; + } + switch (jtInputColType[slot % MAX_SLOT_NUM]) { case TSDB_DATA_TYPE_TIMESTAMP: - printf("\t%" PRId64 , *(int64_t*)(value + jtCtrl.resColOffset[slot])); + printf("%18" PRId64 , *(int64_t*)(value + jtCtx.resColOffset[slot])); break; case TSDB_DATA_TYPE_INT: - printf("\t%d", *(int32_t*)(value + jtCtrl.resColOffset[slot])); + printf("%18d", *(int32_t*)(value + jtCtx.resColOffset[slot])); break; case TSDB_DATA_TYPE_BIGINT: - printf("\t%d", *(int64_t*)(value + jtCtrl.resColOffset[slot])); + printf("%18" PRId64, *(int64_t*)(value + jtCtx.resColOffset[slot])); break; } } printf("\t %s\n", 0 == type ? "-" : (1 == type ? "+" : "")); } +void pushResRow() { + jtCtx.resRows++; + + int32_t* rows = (int32_t*)tSimpleHashGet(jtCtx.jtResRows, jtCtx.resColBuf, jtCtx.resColSize); + if (rows) { + (*rows)++; + } else { + int32_t n = 1; + tSimpleHashPut(jtCtx.jtResRows, jtCtx.resColBuf, jtCtx.resColSize, &n, sizeof(n)); + } +} + +void rmResRow() { + int32_t* rows = (int32_t*)tSimpleHashGet(jtCtx.jtResRows, jtCtx.resColBuf, jtCtx.resColSize); + if (rows) { + (*rows)--; + if ((*rows) == 0) { + tSimpleHashRemove(jtCtx.jtResRows, jtCtx.resColBuf, jtCtx.resColSize); + } + } else { + ASSERT(0); + } +} + +static int32_t jtMergeEqCond(SNode** ppDst, SNode** ppSrc) { + if (NULL == *ppSrc) { + return TSDB_CODE_SUCCESS; + } + if (NULL == *ppDst) { + *ppDst = *ppSrc; + *ppSrc = NULL; + return TSDB_CODE_SUCCESS; + } + if (QUERY_NODE_LOGIC_CONDITION == nodeType(*ppSrc)) { + TSWAP(*ppDst, *ppSrc); + } + if (QUERY_NODE_LOGIC_CONDITION == nodeType(*ppDst)) { + SLogicConditionNode* pLogic = (SLogicConditionNode*)*ppDst; + if (QUERY_NODE_LOGIC_CONDITION == nodeType(*ppSrc)) { + nodesListStrictAppendList(pLogic->pParameterList, ((SLogicConditionNode*)(*ppSrc))->pParameterList); + ((SLogicConditionNode*)(*ppSrc))->pParameterList = NULL; + } else { + nodesListStrictAppend(pLogic->pParameterList, *ppSrc); + *ppSrc = NULL; + } + nodesDestroyNode(*ppSrc); + *ppSrc = NULL; + return TSDB_CODE_SUCCESS; + } + + SLogicConditionNode* pLogicCond = (SLogicConditionNode*)nodesMakeNode(QUERY_NODE_LOGIC_CONDITION); + if (NULL == pLogicCond) { + return TSDB_CODE_OUT_OF_MEMORY; + } + pLogicCond->node.resType.type = TSDB_DATA_TYPE_BOOL; + pLogicCond->node.resType.bytes = tDataTypes[TSDB_DATA_TYPE_BOOL].bytes; + pLogicCond->condType = LOGIC_COND_TYPE_AND; + pLogicCond->pParameterList = nodesMakeList(); + nodesListStrictAppend(pLogicCond->pParameterList, *ppSrc); + nodesListStrictAppend(pLogicCond->pParameterList, *ppDst); + + *ppDst = (SNode*)pLogicCond; + *ppSrc = NULL; + + return TSDB_CODE_SUCCESS; +} + + SOperatorInfo* createDummyDownstreamOperators(int32_t num) { - SOperatorInfo* p = taosMemoryCalloc(num, sizeof(SOperatorInfo)); + SOperatorInfo* p = (SOperatorInfo*)taosMemoryCalloc(num, sizeof(SOperatorInfo)); for (int32_t i = 0; i < num; ++i) { - p->resultDataBlockId = i; + (p + i)->resultDataBlockId = i; } return p; } void createTargetSlotList(SSortMergeJoinPhysiNode* p) { - int32_t leftTargetNum = taosRand() % MAX_SLOT_NUM; - int32_t rightTargetNum = 0; - if (0 == leftTargetNum) { - do { - rightTargetNum = taosRand() % MAX_SLOT_NUM; - } while (0 == rightTargetNum); - } else { - leftTargetNum = taosRand() % MAX_SLOT_NUM; - } + jtCtx.resColNum = 0; + memset(jtCtx.resColList, 0, sizeof(jtCtx.resColList)); + jtCtx.resColSize = MAX_SLOT_NUM * 2 * sizeof(bool); + jtCtx.keyInSlotIdx = -1; - memset(jtCtrl.resColList, 0, sizeof(jtCtrl.resColList)); - jtCtrl.resColSize = MAX_SLOT_NUM * 2 * sizeof(bool); - - int32_t idx = 0; - int32_t dstIdx = 0; - int32_t dstOffset = jtCtrl.resColSize; - for (int32_t i = 0; i < leftTargetNum; ) { - if (0 == i) { - jtCtrl.resColList[JT_KEY_SOLT_ID] = 1; - ++i; - jtCtrl.keyInSlotIdx = JT_KEY_SOLT_ID; - continue; + for (int32_t i = 0; i < MAX_SLOT_NUM; ++i) { + if (jtCtx.colOnList[i] || jtCtx.colEqList[i] || jtCtx.leftFilterColList[i]) { + jtCtx.resColList[i] = 1; } - - idx = taosRand() % MAX_SLOT_NUM; - if (jtCtrl.resColList[idx]) { - continue; - } - jtCtrl.resColList[idx] = 1; - ++i; - } - - for (int32_t i = 0; i < rightTargetNum; ) { - if (0 == i && leftTargetNum <= 0) { - jtCtrl.resColList[MAX_SLOT_NUM + JT_KEY_SOLT_ID] = 1; - ++i; - jtCtrl.keyInSlotIdx = MAX_SLOT_NUM + JT_KEY_SOLT_ID; - continue; - } - - idx = taosRand() % MAX_SLOT_NUM; - if (jtCtrl.resColList[MAX_SLOT_NUM + idx]) { - continue; - } - jtCtrl.resColList[MAX_SLOT_NUM + idx] = 1; - ++i; } for (int32_t i = 0; i < MAX_SLOT_NUM; ++i) { - if (jtCtrl.resColList[i]) { - jtCtrl.resColOffset[dstIdx] = dstOffset; - jtCtrl.resColInSlot[dstIdx] = i; - if (jtCtrl.keyInSlotIdx == i) { - jtCtrl.keyColOffset = dstOffset; - jtCtrl.keyOutSlotIdx = dstIdx; + if (jtCtx.colOnList[i] || jtCtx.colEqList[i] || jtCtx.rightFilterColList[i]) { + jtCtx.resColList[MAX_SLOT_NUM + i] = 1; + } + } + + for (int32_t i = 0; i < MAX_SLOT_NUM; ++i) { + if (0 == jtCtx.resColList[i]) { + jtCtx.resColList[i]= taosRand() % 2; + } + + if ((jtCtx.joinType == JOIN_TYPE_LEFT || jtCtx.joinType == JOIN_TYPE_FULL) && (i == JT_KEY_SOLT_ID)) { + jtCtx.resColList[i] = 1; + } + + if (jtCtx.resColList[i] && i == JT_KEY_SOLT_ID && (jtCtx.joinType == JOIN_TYPE_LEFT || jtCtx.joinType == JOIN_TYPE_FULL)) { + jtCtx.keyInSlotIdx = JT_KEY_SOLT_ID; + } + } + + if (jtCtx.keyInSlotIdx < 0 || ((jtCtx.joinType == JOIN_TYPE_RIGHT || jtCtx.joinType == JOIN_TYPE_FULL))) { + jtCtx.resColList[MAX_SLOT_NUM + JT_KEY_SOLT_ID]= 1; + jtCtx.keyInSlotIdx = JT_KEY_SOLT_ID + MAX_SLOT_NUM; + } + + for (int32_t i = 0; i < MAX_SLOT_NUM; ++i) { + if (0 == jtCtx.resColList[MAX_SLOT_NUM + i]) { + jtCtx.resColList[MAX_SLOT_NUM + i]= taosRand() % 2; + } + } + + int32_t idx = 0; + int32_t dstIdx = 0; + int32_t dstOffset = jtCtx.resColSize; + + for (int32_t i = 0; i < MAX_SLOT_NUM; ++i) { + if (jtCtx.resColList[i]) { + jtCtx.resColOffset[i] = dstOffset; + jtCtx.resColInSlot[dstIdx] = i; + if (jtCtx.keyInSlotIdx == i) { + jtCtx.keyColOffset = dstOffset; } STargetNode* pTarget = (STargetNode*)nodesMakeNode(QUERY_NODE_TARGET); @@ -224,19 +348,20 @@ void createTargetSlotList(SSortMergeJoinPhysiNode* p) { pTarget->slotId = dstIdx++; pTarget->pExpr = (SNode*)pCol; dstOffset += tDataTypes[jtInputColType[i]].bytes; - jtCtrl.resColSize += tDataTypes[jtInputColType[i]].bytes; + jtCtx.resColSize += tDataTypes[jtInputColType[i]].bytes; - nodesListMakeStrictAppend(p->pTargets, pTarget); + nodesListMakeStrictAppend(&p->pTargets, (SNode*)pTarget); + + jtCtx.resColNum++; } } for (int32_t i = 0; i < MAX_SLOT_NUM; ++i) { - if (jtCtrl.resColList[MAX_SLOT_NUM + i]) { - jtCtrl.resColOffset[dstIdx] = dstOffset; - jtCtrl.resColInSlot[dstIdx] = i + MAX_SLOT_NUM; - if (jtCtrl.keyInSlotIdx == (i + MAX_SLOT_NUM)) { - jtCtrl.keyColOffset = dstOffset; - jtCtrl.keyOutSlotIdx = dstIdx; + if (jtCtx.resColList[MAX_SLOT_NUM + i]) { + jtCtx.resColOffset[MAX_SLOT_NUM + i] = dstOffset; + jtCtx.resColInSlot[dstIdx] = i + MAX_SLOT_NUM; + if (jtCtx.keyInSlotIdx == (i + MAX_SLOT_NUM)) { + jtCtx.keyColOffset = dstOffset; } STargetNode* pTarget = (STargetNode*)nodesMakeNode(QUERY_NODE_TARGET); @@ -247,48 +372,45 @@ void createTargetSlotList(SSortMergeJoinPhysiNode* p) { pTarget->slotId = dstIdx++; pTarget->pExpr = (SNode*)pCol; dstOffset += tDataTypes[jtInputColType[i]].bytes; - jtCtrl.resColSize += tDataTypes[jtInputColType[i]].bytes; + jtCtx.resColSize += tDataTypes[jtInputColType[i]].bytes; - nodesListMakeStrictAppend(p->pTargets, pTarget); + nodesListMakeStrictAppend(&p->pTargets, (SNode*)pTarget); + jtCtx.resColNum++; } } - jtCtrl.resColNum = leftTargetNum + rightTargetNum; - jtCtrl.resColBuf = taosMemoryCalloc(jtCtrl.resColSize, 1); + jtCtx.resColBuf = (char*)taosMemoryRealloc(jtCtx.resColBuf, jtCtx.resColSize); } -void createColEqCond(SSortMergeJoinPhysiNode* p) { - jtCtrl.colEqNum = 0; +void createColEqCondStart(SSortMergeJoinPhysiNode* p) { + jtCtx.colEqNum = 0; do { - jtCtrl.colEqNum = taosRand() % MAX_SLOT_NUM; - } while (0 == jtCtrl.colEqNum); + jtCtx.colEqNum = taosRand() % MAX_SLOT_NUM; // except TIMESTAMP + } while (0 == jtCtx.colEqNum); int32_t idx = 0; - memset(jtCtrl.colEqList, 0, sizeof(jtCtrl.colEqList)); - for (int32_t i = 0; i < jtCtrl.colEqNum; ) { + memset(jtCtx.colEqList, 0, sizeof(jtCtx.colEqList)); + for (int32_t i = 0; i < jtCtx.colEqNum; ) { idx = taosRand() % MAX_SLOT_NUM; - if (jtCtrl.colEqList[idx]) { + if (jtCtx.colEqList[idx]) { continue; } - jtCtrl.colEqList[idx] = 1; + if (TSDB_DATA_TYPE_TIMESTAMP == jtInputColType[idx]) { + continue; + } + jtCtx.colEqList[idx] = 1; ++i; } - SLogicConditionNode* pLogic = NULL; - if (jtCtrl.colEqNum > 1) { - pLogic = nodesMakeNode(QUERY_NODE_LOGIC_CONDITION); - pLogic->condType = LOGIC_COND_TYPE_AND; - } - for (int32_t i = 0; i < MAX_SLOT_NUM; ++i) { - if (jtCtrl.colEqList[i]) { + if (jtCtx.colEqList[i]) { SColumnNode* pCol1 = (SColumnNode*)nodesMakeNode(QUERY_NODE_COLUMN); pCol1->dataBlockId = LEFT_BLK_ID; pCol1->slotId = i; pCol1->node.resType.type = jtInputColType[i]; pCol1->node.resType.bytes = tDataTypes[jtInputColType[i]].bytes; - nodesListMakeStrictAppend(p->pEqLeft, pCol1); + nodesListMakeStrictAppend(&p->pEqLeft, (SNode*)pCol1); SColumnNode* pCol2 = (SColumnNode*)nodesMakeNode(QUERY_NODE_COLUMN); pCol2->dataBlockId = RIGHT_BLK_ID; @@ -296,112 +418,163 @@ void createColEqCond(SSortMergeJoinPhysiNode* p) { pCol2->node.resType.type = jtInputColType[i]; pCol2->node.resType.bytes = tDataTypes[jtInputColType[i]].bytes; - nodesListMakeStrictAppend(p->pEqRight, pCol2); + nodesListMakeStrictAppend(&p->pEqRight, (SNode*)pCol2); + } + } +} - SOperatorNode* pOp = nodesMakeNode(QUERY_NODE_OPERATOR); - pOp->opType = OP_TYPE_EQUAL; - pOp->pLeft = nodesCloneNode(pCol1); - pOp->pRight = nodesCloneNode(pCol2); +void createColOnCondStart(SSortMergeJoinPhysiNode* p) { + jtCtx.colOnNum = 0; + do { + jtCtx.colOnNum = taosRand() % (MAX_SLOT_NUM + 1); + } while (0 == jtCtx.colOnNum || (jtCtx.colOnNum + jtCtx.colEqNum) > MAX_SLOT_NUM); - if (jtCtrl.colEqNum > 1) { - nodesListMakeStrictAppend(&pLogic->pParameterList, pOp); - } else { - p->pFullOnCond = pOp; - break; - } + int32_t idx = 0; + memset(jtCtx.colOnList, 0, sizeof(jtCtx.colOnList)); + for (int32_t i = 0; i < jtCtx.colOnNum; ) { + idx = taosRand() % MAX_SLOT_NUM; + if (jtCtx.colOnList[idx] || jtCtx.colEqList[idx]) { + continue; + } + jtCtx.colOnList[idx] = 1; + ++i; + } +} + +int32_t getDstSlotId(int32_t srcIdx) { + for (int32_t i = 0; i < jtCtx.resColNum; ++i) { + if (jtCtx.resColInSlot[i] == srcIdx) { + return i; } } - if (jtCtrl.colEqNum > 1) { - p->pFullOnCond = pLogic; - } + return -1; } -void createColOnCond(SSortMergeJoinPhysiNode* p) { - jtCtrl.colOnNum = 0; - do { - jtCtrl.colOnNum = taosRand() % MAX_SLOT_NUM; - } while (0 == jtCtrl.colOnNum || (jtCtrl.colOnNum + jtCtrl.colEqNum) > MAX_SLOT_NUM); - int32_t idx = 0; - memset(jtCtrl.colOnList, 0, sizeof(jtCtrl.colOnList)); - for (int32_t i = 0; i < jtCtrl.colOnNum; ) { - idx = taosRand() % MAX_SLOT_NUM; - if (jtCtrl.colOnList[idx] || jtCtrl.colEqList[idx]) { - continue; - } - jtCtrl.colOnList[idx] = 1; - ++i; +void createColEqCondEnd(SSortMergeJoinPhysiNode* p) { + if (jtCtx.colEqNum <= 0) { + return; } SLogicConditionNode* pLogic = NULL; - if (jtCtrl.colOnNum > 1) { - pLogic = nodesMakeNode(QUERY_NODE_LOGIC_CONDITION); + if (jtCtx.colEqNum > 1) { + pLogic = (SLogicConditionNode*)nodesMakeNode(QUERY_NODE_LOGIC_CONDITION); + pLogic->node.resType.type = TSDB_DATA_TYPE_BOOL; + pLogic->node.resType.bytes = tDataTypes[TSDB_DATA_TYPE_BOOL].bytes; pLogic->condType = LOGIC_COND_TYPE_AND; } for (int32_t i = 0; i < MAX_SLOT_NUM; ++i) { - if (jtCtrl.colOnList[i]) { + if (jtCtx.colEqList[i]) { SColumnNode* pCol1 = (SColumnNode*)nodesMakeNode(QUERY_NODE_COLUMN); - pCol1->dataBlockId = LEFT_BLK_ID; - pCol1->slotId = i; + pCol1->dataBlockId = RES_BLK_ID; + pCol1->slotId = getDstSlotId(i); pCol1->node.resType.type = jtInputColType[i]; - pCol1->node.resType.bytes = (TSDB_DATA_TYPE_BINARY != jtInputColType[i]) ? tDataTypes[jtInputColType[i]].bytes : MJ_TEST_BINARY_BYTES; + pCol1->node.resType.bytes = tDataTypes[jtInputColType[i]].bytes; - nodesListMakeStrictAppend(p->pEqLeft, pCol1); - SColumnNode* pCol2 = (SColumnNode*)nodesMakeNode(QUERY_NODE_COLUMN); - pCol2->dataBlockId = RIGHT_BLK_ID; - pCol2->slotId = i; + pCol2->dataBlockId = RES_BLK_ID; + pCol2->slotId = getDstSlotId(MAX_SLOT_NUM + i); pCol2->node.resType.type = jtInputColType[i]; - pCol2->node.resType.bytes = (TSDB_DATA_TYPE_BINARY != jtInputColType[i]) ? tDataTypes[jtInputColType[i]].bytes : MJ_TEST_BINARY_BYTES; - - nodesListMakeStrictAppend(p->pEqRight, pCol2); + pCol2->node.resType.bytes = tDataTypes[jtInputColType[i]].bytes; - SOperatorNode* pOp = nodesMakeNode(QUERY_NODE_OPERATOR); - pOp->opType = OP_TYPE_GREATER_THAN; - pOp->pLeft = nodesCloneNode(pCol1); - pOp->pRight = nodesCloneNode(pCol2); + SOperatorNode* pOp = (SOperatorNode*)nodesMakeNode(QUERY_NODE_OPERATOR); + pOp->opType = OP_TYPE_EQUAL; + pOp->node.resType.type = TSDB_DATA_TYPE_BOOL; + pOp->node.resType.bytes = tDataTypes[TSDB_DATA_TYPE_BOOL].bytes; + pOp->pLeft = (SNode*)pCol1; + pOp->pRight = (SNode*)pCol2; - if (jtCtrl.colOnNum > 1) { - nodesListMakeStrictAppend(&pLogic->pParameterList, pOp); + if (jtCtx.colEqNum > 1) { + nodesListMakeStrictAppend(&pLogic->pParameterList, (SNode*)pOp); } else { - p->pColOnCond = pOp; + p->pFullOnCond = (SNode*)pOp; break; } } } - if (jtCtrl.colOnNum > 1) { - p->pColOnCond = pLogic; + if (jtCtx.colEqNum > 1) { + p->pFullOnCond = (SNode*)pLogic; + } +} + +void createColOnCondEnd(SSortMergeJoinPhysiNode* p) { + if (jtCtx.colOnNum <= 0) { + return; + } + + SLogicConditionNode* pLogic = NULL; + if (jtCtx.colOnNum > 1) { + pLogic = (SLogicConditionNode*)nodesMakeNode(QUERY_NODE_LOGIC_CONDITION); + pLogic->node.resType.type = TSDB_DATA_TYPE_BOOL; + pLogic->node.resType.bytes = tDataTypes[TSDB_DATA_TYPE_BOOL].bytes; + pLogic->condType = LOGIC_COND_TYPE_AND; + } + + for (int32_t i = 0; i < MAX_SLOT_NUM; ++i) { + if (jtCtx.colOnList[i]) { + SColumnNode* pCol1 = (SColumnNode*)nodesMakeNode(QUERY_NODE_COLUMN); + pCol1->dataBlockId = RES_BLK_ID; + pCol1->slotId = getDstSlotId(i); + pCol1->node.resType.type = jtInputColType[i]; + pCol1->node.resType.bytes = tDataTypes[jtInputColType[i]].bytes; + + SColumnNode* pCol2 = (SColumnNode*)nodesMakeNode(QUERY_NODE_COLUMN); + pCol2->dataBlockId = RES_BLK_ID; + pCol2->slotId = getDstSlotId(MAX_SLOT_NUM + i); + pCol2->node.resType.type = jtInputColType[i]; + pCol2->node.resType.bytes = tDataTypes[jtInputColType[i]].bytes; + + SOperatorNode* pOp = (SOperatorNode*)nodesMakeNode(QUERY_NODE_OPERATOR); + pOp->opType = OP_TYPE_GREATER_THAN; + pOp->node.resType.type = TSDB_DATA_TYPE_BOOL; + pOp->node.resType.bytes = tDataTypes[TSDB_DATA_TYPE_BOOL].bytes; + pOp->pLeft = (SNode*)pCol1; + pOp->pRight = (SNode*)pCol2; + + if (jtCtx.colOnNum > 1) { + nodesListMakeStrictAppend(&pLogic->pParameterList, (SNode*)pOp); + } else { + p->pColOnCond = (SNode*)pOp; + break; + } + } + } + + if (jtCtx.colOnNum > 1) { + p->pColOnCond = (SNode*)pLogic; } - mergeEqCond(&p->pFullOnCond, nodesCloneNode(p->pColOnCond)); + SNode* pTmp = nodesCloneNode(p->pColOnCond); + jtMergeEqCond(&p->pFullOnCond, &pTmp); } + void createColCond(SSortMergeJoinPhysiNode* p, int32_t cond) { - jtCtrl.colCond = cond; + jtCtx.colCond = cond; switch (cond) { case TEST_NO_COND: - jtCtrl.colEqNum = 0; - jtCtrl.colOnNum = 0; - memset(jtCtrl.colEqList, 0, sizeof(jtCtrl.colEqList)); - memset(jtCtrl.colOnList, 0, sizeof(jtCtrl.colOnList)); + jtCtx.colEqNum = 0; + jtCtx.colOnNum = 0; + memset(jtCtx.colEqList, 0, sizeof(jtCtx.colEqList)); + memset(jtCtx.colOnList, 0, sizeof(jtCtx.colOnList)); break; case TEST_EQ_COND: - createColEqCond(p); - jtCtrl.colOnNum = 0; - memset(jtCtrl.colOnList, 0, sizeof(jtCtrl.colOnList)); + createColEqCondStart(p); + jtCtx.colOnNum = 0; + memset(jtCtx.colOnList, 0, sizeof(jtCtx.colOnList)); break; case TEST_ON_COND: - createColOnCond(p); - jtCtrl.colEqNum = 0; - memset(jtCtrl.colEqList, 0, sizeof(jtCtrl.colEqList)); + createColOnCondStart(p); + jtCtx.colEqNum = 0; + memset(jtCtx.colEqList, 0, sizeof(jtCtx.colEqList)); break; case TEST_FULL_COND: - createColEqCond(p); - createColOnCond(p); + createColEqCondStart(p); + createColOnCondStart(p); break; default: break; @@ -421,143 +594,177 @@ void* getFilterValue(int32_t type) { } } -void createFilterCond(SSortMergeJoinPhysiNode* p, bool filter) { - jtCtrl.filter = filter; +void createFilterStart(SSortMergeJoinPhysiNode* p, bool filter) { + jtCtx.filter = filter; if (!filter) { - jtCtrl.leftFilterNum = 0; - jtCtrl.rightFilterNum = 0; - memset(jtCtrl.leftFilterColList, 0, sizeof(jtCtrl.leftFilterColList)); - memset(jtCtrl.rightFilterColList, 0, sizeof(jtCtrl.rightFilterColList)); + jtCtx.leftFilterNum = 0; + jtCtx.rightFilterNum = 0; + memset(jtCtx.leftFilterColList, 0, sizeof(jtCtx.leftFilterColList)); + memset(jtCtx.rightFilterColList, 0, sizeof(jtCtx.rightFilterColList)); return; } - jtCtrl.leftFilterNum = taosRand() % MAX_SLOT_NUM; - if (0 == jtCtrl.leftFilterNum) { + jtCtx.leftFilterNum = taosRand() % (MAX_SLOT_NUM + 1); + if (0 == jtCtx.leftFilterNum) { do { - jtCtrl.rightFilterNum = taosRand() % MAX_SLOT_NUM; - } while (0 == jtCtrl.rightFilterNum); + jtCtx.rightFilterNum = taosRand() % (MAX_SLOT_NUM + 1); + } while (0 == jtCtx.rightFilterNum); } else { - jtCtrl.rightFilterNum = taosRand() % MAX_SLOT_NUM; + jtCtx.rightFilterNum = taosRand() % (MAX_SLOT_NUM + 1); } int32_t idx = 0; - memset(jtCtrl.leftFilterColList, 0, sizeof(jtCtrl.leftFilterColList)); - memset(jtCtrl.rightFilterColList, 0, sizeof(jtCtrl.rightFilterColList)); - for (int32_t i = 0; i < jtCtrl.leftFilterNum; ) { + memset(jtCtx.leftFilterColList, 0, sizeof(jtCtx.leftFilterColList)); + memset(jtCtx.rightFilterColList, 0, sizeof(jtCtx.rightFilterColList)); + for (int32_t i = 0; i < jtCtx.leftFilterNum; ) { idx = taosRand() % MAX_SLOT_NUM; - if (jtCtrl.leftFilterColList[idx]) { + if (jtCtx.leftFilterColList[idx]) { continue; } - jtCtrl.leftFilterColList[idx] = 1; + jtCtx.leftFilterColList[idx] = 1; ++i; } - for (int32_t i = 0; i < jtCtrl.rightFilterNum; ) { + for (int32_t i = 0; i < jtCtx.rightFilterNum; ) { idx = taosRand() % MAX_SLOT_NUM; - if (jtCtrl.rightFilterColList[idx]) { + if (jtCtx.rightFilterColList[idx]) { continue; } - jtCtrl.rightFilterColList[idx] = 1; + jtCtx.rightFilterColList[idx] = 1; ++i; } +} +void createFilterEnd(SSortMergeJoinPhysiNode* p, bool filter) { + if (!filter || (jtCtx.leftFilterNum <= 0 && jtCtx.rightFilterNum <= 0)) { + return; + } + SLogicConditionNode* pLogic = NULL; - if ((jtCtrl.leftFilterNum + jtCtrl.rightFilterNum) > 1) { - pLogic = nodesMakeNode(QUERY_NODE_LOGIC_CONDITION); - pLogic->condType = taosRand() % 2 ? LOGIC_COND_TYPE_AND : LOGIC_COND_TYPE_OR; + if ((jtCtx.leftFilterNum + jtCtx.rightFilterNum) > 1) { + pLogic = (SLogicConditionNode*)nodesMakeNode(QUERY_NODE_LOGIC_CONDITION); + pLogic->condType = LOGIC_COND_TYPE_AND; } for (int32_t i = 0; i < MAX_SLOT_NUM; ++i) { - if (jtCtrl.leftFilterColList[i]) { + if (jtCtx.leftFilterColList[i]) { SColumnNode* pCol = (SColumnNode*)nodesMakeNode(QUERY_NODE_COLUMN); - pCol->dataBlockId = LEFT_BLK_ID; - pCol->slotId = i; + pCol->dataBlockId = RES_BLK_ID; + pCol->slotId = getDstSlotId(i); pCol->node.resType.type = jtInputColType[i]; pCol->node.resType.bytes = tDataTypes[jtInputColType[i]].bytes; + sprintf(pCol->colName, "l%d", i); SValueNode* pVal = (SValueNode*)nodesMakeNode(QUERY_NODE_VALUE); - nodesSetValueNodeValue(pVal, getFilterValue(jtInputColType[i])); pVal->node.resType.type = jtInputColType[i]; pVal->node.resType.bytes = tDataTypes[jtInputColType[i]].bytes; + nodesSetValueNodeValue(pVal, getFilterValue(jtInputColType[i])); - SOperatorNode* pOp = nodesMakeNode(QUERY_NODE_OPERATOR); + SOperatorNode* pOp = (SOperatorNode*)nodesMakeNode(QUERY_NODE_OPERATOR); pOp->opType = OP_TYPE_GREATER_THAN; - pOp->pLeft = pCol; - pOp->pRight = pVal; + pOp->pLeft = (SNode*)pCol; + pOp->pRight = (SNode*)pVal; - if ((jtCtrl.leftFilterNum + jtCtrl.rightFilterNum) > 1) { - nodesListMakeStrictAppend(&pLogic->pParameterList, pOp); + if ((jtCtx.leftFilterNum + jtCtx.rightFilterNum) > 1) { + nodesListMakeStrictAppend(&pLogic->pParameterList, (SNode*)pOp); } else { - p->node.pConditions = pOp; + p->node.pConditions = (SNode*)pOp; break; } } } for (int32_t i = 0; i < MAX_SLOT_NUM; ++i) { - if (jtCtrl.rightFilterColList[i]) { + if (jtCtx.rightFilterColList[i]) { SColumnNode* pCol = (SColumnNode*)nodesMakeNode(QUERY_NODE_COLUMN); - pCol->dataBlockId = RIGHT_BLK_ID; - pCol->slotId = i; + pCol->dataBlockId = RES_BLK_ID; + pCol->slotId = getDstSlotId(MAX_SLOT_NUM + i); pCol->node.resType.type = jtInputColType[i]; pCol->node.resType.bytes = tDataTypes[jtInputColType[i]].bytes; + sprintf(pCol->colName, "r%d", i); SValueNode* pVal = (SValueNode*)nodesMakeNode(QUERY_NODE_VALUE); - nodesSetValueNodeValue(pVal, getFilterValue(jtInputColType[i])); pVal->node.resType.type = jtInputColType[i]; pVal->node.resType.bytes = tDataTypes[jtInputColType[i]].bytes; + nodesSetValueNodeValue(pVal, getFilterValue(jtInputColType[i])); - SOperatorNode* pOp = nodesMakeNode(QUERY_NODE_OPERATOR); + SOperatorNode* pOp = (SOperatorNode*)nodesMakeNode(QUERY_NODE_OPERATOR); pOp->opType = OP_TYPE_GREATER_THAN; - pOp->pLeft = pCol; - pOp->pRight = pVal; + pOp->pLeft = (SNode*)pCol; + pOp->pRight = (SNode*)pVal; - if ((jtCtrl.leftFilterNum + jtCtrl.rightFilterNum) > 1) { - nodesListMakeStrictAppend(&pLogic->pParameterList, pOp); + if ((jtCtx.leftFilterNum + jtCtx.rightFilterNum) > 1) { + nodesListMakeStrictAppend(&pLogic->pParameterList, (SNode*)pOp); } else { - p->node.pConditions = pOp; + p->node.pConditions = (SNode*)pOp; break; } } } - if ((jtCtrl.leftFilterNum + jtCtrl.rightFilterNum) > 1) { - p->node.pConditions = pLogic; + if ((jtCtx.leftFilterNum + jtCtx.rightFilterNum) > 1) { + p->node.pConditions = (SNode*)pLogic; } } + void updateColRowInfo() { - jtCtrl.blkRowSize = MAX_SLOT_NUM * sizeof(bool); + jtCtx.blkRowSize = MAX_SLOT_NUM * sizeof(bool); for (int32_t i = 0; i < MAX_SLOT_NUM; ++i) { - jtCtrl.colRowOffset[i] = jtCtrl.blkRowSize; - jtCtrl.blkRowSize += tDataTypes[jtInputColType[i]].bytes; + jtCtx.colRowOffset[i] = jtCtx.blkRowSize; + jtCtx.blkRowSize += tDataTypes[jtInputColType[i]].bytes; } } +void createBlockDescNode(SDataBlockDescNode** ppNode) { + SDataBlockDescNode* pDesc = (SDataBlockDescNode*)nodesMakeNode(QUERY_NODE_DATABLOCK_DESC); + pDesc->dataBlockId = RES_BLK_ID; + pDesc->totalRowSize = jtCtx.resColSize - MAX_SLOT_NUM * 2 * sizeof(bool); + pDesc->outputRowSize = pDesc->totalRowSize; + for (int32_t i = 0; i < jtCtx.resColNum; ++i) { + SSlotDescNode* pSlot = (SSlotDescNode*)nodesMakeNode(QUERY_NODE_SLOT_DESC); + pSlot->slotId = i; + int32_t slotIdx = jtCtx.resColInSlot[i] >= MAX_SLOT_NUM ? jtCtx.resColInSlot[i] - MAX_SLOT_NUM : jtCtx.resColInSlot[i]; + pSlot->dataType.type = jtInputColType[slotIdx]; + pSlot->dataType.bytes = tDataTypes[pSlot->dataType.type].bytes; + + nodesListMakeStrictAppend(&pDesc->pSlots, (SNode *)pSlot); + } + + *ppNode = pDesc; +} + SSortMergeJoinPhysiNode* createDummySortMergeJoinPhysiNode(EJoinType type, EJoinSubType sub, int32_t cond, bool filter, bool asc) { - SSortMergeJoinPhysiNode* p = taosMemoryCalloc(1, sizeof(SSortMergeJoinPhysiNode)); + char* t = (char*)taosMemoryCalloc(1, 1 + sizeof(SSortMergeJoinPhysiNode)); + SSortMergeJoinPhysiNode* p = (SSortMergeJoinPhysiNode*)(t + 1); p->joinType = type; p->subType = sub; - p->leftPrimSlotId = LEFT_BLK_ID; - p->rightPrimSlotId = RIGHT_BLK_ID; + p->leftPrimSlotId = 0; + p->rightPrimSlotId = 0; p->node.inputTsOrder = asc ? ORDER_ASC : ORDER_DESC; - jtCtrl.joinType = type; - jtCtrl.subType = sub; - jtCtrl.asc = asc; + jtCtx.joinType = type; + jtCtx.subType = sub; + jtCtx.asc = asc; createColCond(p, cond); + createFilterStart(p, filter); createTargetSlotList(p); - createFilterCond(p, filter); + createColEqCondEnd(p); + createColOnCondEnd(p); + createFilterEnd(p, filter); updateColRowInfo(); + createBlockDescNode(&p->node.pOutputDataBlockDesc); return p; } SExecTaskInfo* createDummyTaskInfo(char* taskId) { - SExecTaskInfo* p = taosMemoryCalloc(1, sizeof(SExecTaskInfo)); + SExecTaskInfo* p = (SExecTaskInfo*)taosMemoryCalloc(1, sizeof(SExecTaskInfo)); p->id.str = taskId; + + return p; } SSDataBlock* createDummyBlock(int32_t blkId) { @@ -570,7 +777,7 @@ SSDataBlock* createDummyBlock(int32_t blkId) { for (int32_t i = 0; i < MAX_SLOT_NUM; ++i) { SColumnInfoData idata = - createColumnInfoData(jtInputColType[i]), (TSDB_DATA_TYPE_BINARY != jtInputColType[i]) ? tDataTypes[jtInputColType[i]].bytes : MJ_TEST_BINARY_BYTES, i); + createColumnInfoData(jtInputColType[i], tDataTypes[jtInputColType[i]].bytes, i); blockDataAppendColInfo(p, &idata); } @@ -578,86 +785,88 @@ SSDataBlock* createDummyBlock(int32_t blkId) { return p; } -void initJoinTest() { - jtCtrl.leftBlkList = taosArrayInit(10, POINTER_BYTES); - jtCtrl.rightBlkList = taosArrayInit(10, POINTER_BYTES); - - jtCtrl.jtResRows = tSimpleHashInit(10000000, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BIGINT)); -} - void createGrpRows(SSDataBlock** ppBlk, int32_t blkId, int32_t grpRows) { if (grpRows <= 0) { return; } - if (NULL == ppBlk) { + if (NULL == *ppBlk) { *ppBlk = createDummyBlock((blkId == LEFT_BLK_ID) ? LEFT_BLK_ID : RIGHT_BLK_ID); - blockDataEnsureCapacity(*ppBlk, jtCtrl.blkRows); - taosArrayPush((blkId == LEFT_BLK_ID) ? jtCtrl.leftBlkList : jtCtrl.rightBlkList, ppBlk); + blockDataEnsureCapacity(*ppBlk, jtCtx.blkRows); + taosArrayPush((blkId == LEFT_BLK_ID) ? jtCtx.leftBlkList : jtCtx.rightBlkList, ppBlk); } + jtCtx.inputStat |= (1 << blkId); + int32_t tableOffset = 0; int32_t peerOffset = 0; bool keepRes = false; if (blkId == LEFT_BLK_ID) { - if ((jtCtrl.joinType == JOIN_TYPE_LEFT || jtCtrl.joinType == JOIN_TYPE_FULL) && jtCtrl.subType != JOIN_STYPE_SEMI) { + if ((jtCtx.joinType == JOIN_TYPE_LEFT || jtCtx.joinType == JOIN_TYPE_FULL) && jtCtx.subType != JOIN_STYPE_SEMI) { keepRes = true; } peerOffset = MAX_SLOT_NUM; } else { - if ((jtCtrl.joinType == JOIN_TYPE_RIGHT || jtCtrl.joinType == JOIN_TYPE_FULL) && jtCtrl.subType != JOIN_STYPE_SEMI) { + if ((jtCtx.joinType == JOIN_TYPE_RIGHT || jtCtx.joinType == JOIN_TYPE_FULL) && jtCtx.subType != JOIN_STYPE_SEMI) { keepRes = true; } tableOffset = MAX_SLOT_NUM; } - int32_t filterNum = (blkId == LEFT_BLK_ID) ? jtCtrl.leftFilterNum : jtCtrl.rightFilterNum; - int32_t peerFilterNum = (blkId == LEFT_BLK_ID) ? jtCtrl.rightFilterNum : jtCtrl.leftFilterNum; - int32_t* filterCol = (blkId == LEFT_BLK_ID) ? jtCtrl.leftFilterColList : jtCtrl.rightFilterColList; + int32_t filterNum = (blkId == LEFT_BLK_ID) ? jtCtx.leftFilterNum : jtCtx.rightFilterNum; + int32_t peerFilterNum = (blkId == LEFT_BLK_ID) ? jtCtx.rightFilterNum : jtCtx.leftFilterNum; + int32_t* filterCol = (blkId == LEFT_BLK_ID) ? jtCtx.leftFilterColList : jtCtx.rightFilterColList; - void* pData = NULL; + char* pData = NULL; int32_t tmpInt = 0; int64_t tmpBigint = 0; bool isNull = false; bool filterOut = false; + int32_t vRange = TMAX(grpRows / 3, 3); + for (int32_t c = 0; c < MAX_SLOT_NUM; ++c) { + jtCtx.grpOffset[c] = c * TMAX(100, grpRows); + } + for (int32_t i = 0; i < grpRows; ++i) { if ((*ppBlk)->info.rows >= (*ppBlk)->info.capacity) { *ppBlk = createDummyBlock((blkId == LEFT_BLK_ID) ? LEFT_BLK_ID : RIGHT_BLK_ID); - blockDataEnsureCapacity(*ppBlk, jtCtrl.blkRows); - taosArrayPush((blkId == LEFT_BLK_ID) ? jtCtrl.leftBlkList : jtCtrl.rightBlkList, ppBlk); + blockDataEnsureCapacity(*ppBlk, jtCtx.blkRows); + taosArrayPush((blkId == LEFT_BLK_ID) ? jtCtx.leftBlkList : jtCtx.rightBlkList, ppBlk); } - filterOut = peerFilterNum > 0 ? true : false; + filterOut = (peerFilterNum > 0) ? true : false; if (!filterOut) { - memset(jtCtrl.resColBuf, 0, jtCtrl.resColSize); + memset(jtCtx.resColBuf, 0, jtCtx.resColSize); } for (int32_t c = 0; c < MAX_SLOT_NUM; ++c) { switch (jtInputColType[c]) { case TSDB_DATA_TYPE_TIMESTAMP: - ++jtCtrl.curTs; - pData = &jtCtrl.curTs; + ++jtCtx.curTs; + pData = (char*)&jtCtx.curTs; isNull = false; - if (!filterOut && filterNum && filterCol[c] && jtCtrl.curTs <= TIMESTAMP_FILTER_VALUE) { + if (!filterOut && filterNum && filterCol[c] && jtCtx.curTs <= TIMESTAMP_FILTER_VALUE) { filterOut = true; } break; case TSDB_DATA_TYPE_INT: - if (taosRand() % 2) { - tmpInt = (taosRand() % 2) ? INT_FILTER_VALUE + taosRand() % 3 : INT_FILTER_VALUE - taosRand() % 3; - pData = &tmpInt; + if (taosRand() % 10) { + tmpInt = (taosRand() % 2) ? INT_FILTER_VALUE + jtCtx.grpOffset[c] + taosRand() % vRange : INT_FILTER_VALUE - jtCtx.grpOffset[c] - taosRand() % vRange; + pData = (char*)&tmpInt; isNull = false; if (filterNum && filterCol[c] && tmpInt <= INT_FILTER_VALUE) { filterOut = true; } } else { isNull = true; - filterOut = (filterNum && filterCol[c]) ? true : false; + if (filterNum && filterCol[c]) { + filterOut = true; + } } break; case TSDB_DATA_TYPE_BIGINT: - tmpBigint = (taosRand() % 2) ? BIGINT_FILTER_VALUE + taosRand() % 3 : BIGINT_FILTER_VALUE - taosRand() % 3; - pData = &tmpBigint; + tmpBigint = (taosRand() % 2) ? BIGINT_FILTER_VALUE + jtCtx.curKeyOffset++ : BIGINT_FILTER_VALUE - jtCtx.curKeyOffset++; + pData = (char*)&tmpBigint; isNull = false; if (filterNum && filterCol[c] && tmpBigint <= BIGINT_FILTER_VALUE) { filterOut = true; @@ -667,251 +876,269 @@ void createGrpRows(SSDataBlock** ppBlk, int32_t blkId, int32_t grpRows) { break; } - SColumnInfoData* pCol = taosArrayGet((*ppBlk)->pDataBlock, c); + SColumnInfoData* pCol = (SColumnInfoData*)taosArrayGet((*ppBlk)->pDataBlock, c); colDataSetVal(pCol, (*ppBlk)->info.rows, pData, isNull); - if (keepRes && !filterOut && jtCtrl.resColList[tableOffset + c]) { + if (keepRes && !filterOut && jtCtx.resColList[tableOffset + c]) { if (isNull) { - *(char*)(jtCtrl.resColBuf + tableOffset + c) = true; + *(char*)(jtCtx.resColBuf + tableOffset + c) = true; } else { - memcpy(jtCtrl.resColBuf + jtCtrl.resColOffset[tableOffset + c], pData, tDataTypes[jtInputColType[c]].bytes); + memcpy(jtCtx.resColBuf + jtCtx.resColOffset[tableOffset + c], pData, tDataTypes[jtInputColType[c]].bytes); } } } if (keepRes && !filterOut) { for (int32_t c = 0; c < MAX_SLOT_NUM; ++c) { - if (jtCtrl.resColList[peerOffset + c]) { - *(char*)(jtCtrl.resColBuf + peerOffset + c) = true; + if (jtCtx.resColList[peerOffset + c]) { + *(char*)(jtCtx.resColBuf + peerOffset + c) = true; } } - tSimpleHashPut(jtCtrl.jtResRows, jtCtrl.resColBuf + jtCtrl.keyColOffset, sizeof(int64_t), jtCtrl.resColBuf, jtCtrl.resColSize); + pushResRow(); } (*ppBlk)->info.rows++; } } -void createRowData(SSDataBlock* pBlk, int64_t tbOffset, int32_t rowIdx) { +void createRowData(SSDataBlock* pBlk, int64_t tbOffset, int32_t rowIdx, int32_t vRange) { int32_t tmpInt = 0; int64_t tmpBig = 0; - void *pData = NULL; for (int32_t c = 0; c < MAX_SLOT_NUM; ++c) { - SColumnInfoData* pCol = taosArrayGet(pBlk->pDataBlock, c); + SColumnInfoData* pCol = (SColumnInfoData*)taosArrayGet(pBlk->pDataBlock, c); - int32_t rv = taosRand() % 2; + int32_t rv = taosRand() % 10; switch (jtInputColType[c]) { case TSDB_DATA_TYPE_TIMESTAMP: - *(int64_t*)(jtCtrl.colRowDataBuf + tbOffset + rowIdx * jtCtrl.blkRowSize + jtCtrl.colRowOffset[c]) = jtCtrl.curTs; - colDataSetVal(pCol, pBlk->info.rows, &jtCtrl.curTs, false); + *(int64_t*)(jtCtx.colRowDataBuf + tbOffset + rowIdx * jtCtx.blkRowSize + jtCtx.colRowOffset[c]) = jtCtx.curTs; + colDataSetVal(pCol, pBlk->info.rows, (char*)&jtCtx.curTs, false); break; case TSDB_DATA_TYPE_INT: - if (0 == rv) { - tmpInt = (taosRand() % 2) ? INT_FILTER_VALUE + taosRand() % 3 : INT_FILTER_VALUE - taosRand() % 3; - *(int32_t*)(jtCtrl.colRowDataBuf + tbOffset + rowIdx * jtCtrl.blkRowSize + jtCtrl.colRowOffset[c]) = tmpInt; - colDataSetVal(pCol, pBlk->info.rows, &tmpInt, false); + if (rv) { + tmpInt = (taosRand() % 2) ? INT_FILTER_VALUE + jtCtx.grpOffset[c] + taosRand() % vRange : INT_FILTER_VALUE - taosRand() % vRange; + *(int32_t*)(jtCtx.colRowDataBuf + tbOffset + rowIdx * jtCtx.blkRowSize + jtCtx.colRowOffset[c]) = tmpInt; + colDataSetVal(pCol, pBlk->info.rows, (char*)&tmpInt, false); } else { - *(bool*)(jtCtrl.colRowDataBuf + tbOffset + rowIdx * jtCtrl.blkRowSize + c) = true; + *(bool*)(jtCtx.colRowDataBuf + tbOffset + rowIdx * jtCtx.blkRowSize + c) = true; colDataSetVal(pCol, pBlk->info.rows, NULL, true); } break; case TSDB_DATA_TYPE_BIGINT: - if (0 == rv) { - tmpBig = (taosRand() % 2) ? BIGINT_FILTER_VALUE + taosRand() % 3 : BIGINT_FILTER_VALUE - taosRand() % 3; - *(int64_t*)(jtCtrl.colRowDataBuf + tbOffset + rowIdx * jtCtrl.blkRowSize + jtCtrl.colRowOffset[c]) = tmpBig; - colDataSetVal(pCol, pBlk->info.rows, &tmpBig, false); - } else { - *(bool*)(jtCtrl.colRowDataBuf + tbOffset + rowIdx * jtCtrl.blkRowSize + c) = true; - colDataSetVal(pCol, pBlk->info.rows, NULL, true); - } + tmpBig = (taosRand() % 2) ? BIGINT_FILTER_VALUE + jtCtx.curKeyOffset++ : BIGINT_FILTER_VALUE - jtCtx.curKeyOffset++; + *(int64_t*)(jtCtx.colRowDataBuf + tbOffset + rowIdx * jtCtx.blkRowSize + jtCtx.colRowOffset[c]) = tmpBig; + colDataSetVal(pCol, pBlk->info.rows, (char*)&tmpBig, false); break; default: break; } } + pBlk->info.rows++; } void makeAppendBlkData(SSDataBlock** ppLeft, SSDataBlock** ppRight, int32_t leftGrpRows, int32_t rightGrpRows) { - int64_t totalSize = (leftGrpRows + rightGrpRows) * jtCtrl.blkRowSize; - int64_t rightOffset = leftGrpRows * jtCtrl.blkRowSize; + int64_t totalSize = (leftGrpRows + rightGrpRows) * jtCtx.blkRowSize; + int64_t rightOffset = leftGrpRows * jtCtx.blkRowSize; - if (jtCtrl.colRowDataBufSize < totalSize) { - jtCtrl.colRowDataBuf = taosMemoryRealloc(jtCtrl.colRowDataBuf, totalSize); + if (jtCtx.colRowDataBufSize < totalSize) { + jtCtx.colRowDataBuf = (char*)taosMemoryRealloc(jtCtx.colRowDataBuf, totalSize); } - memset(jtCtrl.colRowDataBuf, 0, totalSize); - + memset(jtCtx.colRowDataBuf, 0, totalSize); + + for (int32_t c = 0; c < MAX_SLOT_NUM; ++c) { + jtCtx.grpOffset[c] = c * TMAX(leftGrpRows, rightGrpRows); + } + + int32_t vRange = TMAX(leftGrpRows / 100, 3); for (int32_t i = 0; i < leftGrpRows; ++i) { if ((*ppLeft)->info.rows >= (*ppLeft)->info.capacity) { *ppLeft = createDummyBlock(LEFT_BLK_ID); - blockDataEnsureCapacity(*ppLeft, jtCtrl.blkRows); - taosArrayPush(jtCtrl.leftBlkList, ppLeft); + blockDataEnsureCapacity(*ppLeft, jtCtx.blkRows); + taosArrayPush(jtCtx.leftBlkList, ppLeft); } - createRowData(*ppLeft, 0, i); + createRowData(*ppLeft, 0, i, vRange); } + vRange = TMAX(rightGrpRows / 100, 3); for (int32_t i = 0; i < rightGrpRows; ++i) { if ((*ppRight)->info.rows >= (*ppRight)->info.capacity) { *ppRight = createDummyBlock(RIGHT_BLK_ID); - blockDataEnsureCapacity(*ppRight, jtCtrl.blkRows); - taosArrayPush(jtCtrl.rightBlkList, ppRight); + blockDataEnsureCapacity(*ppRight, jtCtx.blkRows); + taosArrayPush(jtCtx.rightBlkList, ppRight); } - createRowData(*ppRight, rightOffset, i); + createRowData(*ppRight, rightOffset, i, vRange); } } -void putNMatchRowToRes(void* lrow, int32_t tableOffset, int32_t peerOffset) { +void putNMatchRowToRes(char* lrow, int32_t tableOffset, int32_t peerOffset) { + memset(jtCtx.resColBuf, 0, jtCtx.resColSize); + for (int32_t c = 0; c < MAX_SLOT_NUM; ++c) { - if (jtCtrl.resColList[tableOffset + c]) { + if (jtCtx.resColList[tableOffset + c]) { if (*(bool*)(lrow + c)) { - *(bool*)(jtCtrl.resColBuf + tableOffset + c) = true; + *(bool*)(jtCtx.resColBuf + tableOffset + c) = true; } else { - memcpy(jtCtrl.resColBuf + jtCtrl.resColOffset[tableOffset + c], lrow + jtCtrl.colRowOffset[c], tDataTypes[jtInputColType[c]].bytes); + memcpy(jtCtx.resColBuf + jtCtx.resColOffset[tableOffset + c], lrow + jtCtx.colRowOffset[c], tDataTypes[jtInputColType[c]].bytes); } } } for (int32_t c = 0; c < MAX_SLOT_NUM; ++c) { - if (jtCtrl.resColList[peerOffset + c]) { - *(bool*)(jtCtrl.resColBuf + peerOffset + c) = true; + if (jtCtx.resColList[peerOffset + c]) { + *(bool*)(jtCtx.resColBuf + peerOffset + c) = true; } } - tSimpleHashPut(jtCtrl.jtResRows, jtCtrl.resColBuf + jtCtrl.keyColOffset, sizeof(int64_t), jtCtrl.resColBuf, jtCtrl.resColSize); + pushResRow(); } -void putMatchRowToRes(void* lrow, void* rrow) { +void putMatchRowToRes(char* lrow, char* rrow) { + memset(jtCtx.resColBuf, 0, jtCtx.resColSize); + for (int32_t c = 0; c < MAX_SLOT_NUM; ++c) { - if (jtCtrl.resColList[c]) { + if (jtCtx.resColList[c]) { if (*(bool*)(lrow + c)) { - *(bool*)(jtCtrl.resColBuf + c) = true; + *(bool*)(jtCtx.resColBuf + c) = true; } else { - memcpy(jtCtrl.resColBuf + jtCtrl.resColOffset[c], lrow + jtCtrl.colRowOffset[c], tDataTypes[jtInputColType[c]].bytes); + memcpy(jtCtx.resColBuf + jtCtx.resColOffset[c], lrow + jtCtx.colRowOffset[c], tDataTypes[jtInputColType[c]].bytes); } } } for (int32_t c = 0; c < MAX_SLOT_NUM; ++c) { - if (jtCtrl.resColList[MAX_SLOT_NUM + c]) { - if (*(bool*)(lrow + c)) { - *(bool*)(jtCtrl.resColBuf + MAX_SLOT_NUM + c) = true; + if (jtCtx.resColList[MAX_SLOT_NUM + c]) { + if (*(bool*)(rrow + c)) { + *(bool*)(jtCtx.resColBuf + MAX_SLOT_NUM + c) = true; } else { - memcpy(jtCtrl.resColBuf + jtCtrl.resColOffset[MAX_SLOT_NUM + c], rrow + jtCtrl.colRowOffset[c], tDataTypes[jtInputColType[c]].bytes); + memcpy(jtCtx.resColBuf + jtCtx.resColOffset[MAX_SLOT_NUM + c], rrow + jtCtx.colRowOffset[c], tDataTypes[jtInputColType[c]].bytes); } } } - tSimpleHashPut(jtCtrl.jtResRows, jtCtrl.resColBuf + jtCtrl.keyColOffset, sizeof(int64_t), jtCtrl.resColBuf, jtCtrl.resColSize); + pushResRow(); } void leftJoinAppendEqGrpRes(int32_t leftGrpRows, int32_t rightGrpRows) { - bool rowMatch = false, filterOut = false; - bool lNullValue = false, rNullValue = false; + bool leftMatch = false, rightMatch = false, filterOut = false; void* lValue = NULL, *rValue = NULL, *filterValue = NULL; - int64_t rightTbOffset = jtCtrl.blkRowSize * leftGrpRows; + int64_t lBig = 0, rBig = 0, fbig = 0; + int64_t rightTbOffset = jtCtx.blkRowSize * leftGrpRows; for (int32_t l = 0; l < leftGrpRows; ++l) { - void* lrow = jtCtrl.colRowDataBuf + jtCtrl.blkRowSize * l; + char* lrow = jtCtx.colRowDataBuf + jtCtx.blkRowSize * l; - rowMatch = false; - lNullValue= false; filterOut = false; + leftMatch = true; for (int32_t c = 0; c < MAX_SLOT_NUM; ++c) { - if (*(bool*)(lrow + c)) { - lNullValue = true; - } - + lValue = lrow + jtCtx.colRowOffset[c]; switch (jtInputColType[c]) { case TSDB_DATA_TYPE_TIMESTAMP: - filterValue = &TIMESTAMP_FILTER_VALUE; + fbig = TIMESTAMP_FILTER_VALUE; + lBig = *(int64_t*)lValue; break; case TSDB_DATA_TYPE_INT: - filterValue = &INT_FILTER_VALUE; + fbig = INT_FILTER_VALUE; + lBig = *(int32_t*)lValue; break; case TSDB_DATA_TYPE_BIGINT: - filterValue = &BIGINT_FILTER_VALUE; + fbig = BIGINT_FILTER_VALUE; + lBig = *(int64_t*)lValue; break; default: - filterValue = NULL; break; } - if (jtCtrl.leftFilterNum && jtCtrl.leftFilterColList[c] && ((*(bool*)(lrow + c)) || memcmp(lrow + jtCtrl.colRowOffset[c], filterValue, tDataTypes[jtInputColType[c]].bytes) <= 0)) { + if (jtCtx.leftFilterNum && jtCtx.leftFilterColList[c] && ((*(bool*)(lrow + c)) || lBig <= fbig)) { filterOut = true; break; } + + if (jtCtx.colEqNum && jtCtx.colEqList[c] && (*(bool*)(lrow + c))) { + leftMatch = false; + } } if (filterOut) { continue; } - if (lNullValue) { - putNMatchRowToRes(lrow, 0, MAX_SLOT_NUM); + if (false == leftMatch) { + if (0 == jtCtx.rightFilterNum) { + putNMatchRowToRes(lrow, 0, MAX_SLOT_NUM); + } continue; } - lValue = lrow + jtCtrl.colRowOffset[c]; - + leftMatch = false; for (int32_t r = 0; r < rightGrpRows; ++r) { - void* rrow = jtCtrl.colRowDataBuf + rightTbOffset + jtCtrl.blkRowSize * r; + char* rrow = jtCtx.colRowDataBuf + rightTbOffset + jtCtx.blkRowSize * r; + rightMatch = true; filterOut = false; for (int32_t c = 0; c < MAX_SLOT_NUM; ++c) { - if (*(bool*)(rrow + c)) { - rNullValue = true; - } else { - rValue = rrow + jtCtrl.colRowOffset[c]; + lValue = lrow + jtCtx.colRowOffset[c]; + + if (!*(bool*)(rrow + c)) { + rValue = rrow + jtCtx.colRowOffset[c]; } switch (jtInputColType[c]) { case TSDB_DATA_TYPE_TIMESTAMP: - filterValue = &TIMESTAMP_FILTER_VALUE; + fbig = TIMESTAMP_FILTER_VALUE; + lBig = *(int64_t*)lValue; + rBig = *(int64_t*)rValue; break; case TSDB_DATA_TYPE_INT: - filterValue = &INT_FILTER_VALUE; + fbig = INT_FILTER_VALUE; + lBig = *(int32_t*)lValue; + rBig = *(int32_t*)rValue; break; case TSDB_DATA_TYPE_BIGINT: - filterValue = &BIGINT_FILTER_VALUE; + fbig = BIGINT_FILTER_VALUE; + lBig = *(int64_t*)lValue; + rBig = *(int64_t*)rValue; break; default: - filterValue = NULL; break; } - if (jtCtrl.colEqNum && jtCtrl.colEqList[c] && ((*(bool*)(rrow + c)) || memcmp(lValue, rValue, tDataTypes[jtInputColType[c]].bytes))) { - filterOut = true; + if (jtCtx.colEqNum && jtCtx.colEqList[c] && ((*(bool*)(rrow + c)) || lBig != rBig)) { + rightMatch = false; break; } - if (jtCtrl.colOnNum && jtCtrl.colOnList[c] && ((*(bool*)(rrow + c)) || memcmp(lValue, rValue, tDataTypes[jtInputColType[c]].bytes) <= 0)) { - filterOut = true; + if (jtCtx.colOnNum && jtCtx.colOnList[c] && ((*(bool*)(rrow + c)) || lBig <= rBig)) { + rightMatch = false; break; } - if (jtCtrl.rightFilterNum && jtCtrl.rightFilterColList[c] && ((*(bool*)(rrow + c)) || memcmp(rValue, filterValue, tDataTypes[jtInputColType[c]].bytes) <= 0)) { + if (jtCtx.rightFilterNum && jtCtx.rightFilterColList[c] && ((*(bool*)(rrow + c)) || rBig <= fbig)) { filterOut = true; - break; } } + if (rightMatch) { + leftMatch = true; + } + if (filterOut) { continue; } - putMatchRowToRes(lrow, rrow); - rowMatch = true; + if (rightMatch) { + putMatchRowToRes(lrow, rrow); + } } - if (!rowMatch) { + if (!leftMatch && 0 == jtCtx.rightFilterNum) { putNMatchRowToRes(lrow, 0, MAX_SLOT_NUM); } } @@ -919,23 +1146,27 @@ void leftJoinAppendEqGrpRes(int32_t leftGrpRows, int32_t rightGrpRows) { } -void createTsEqGrpRows(SSDataBlock** ppLeft, SSDataBlock** ppRight, int32_t probeBlkId, int32_t leftGrpRows, int32_t rightGrpRows) { +void createTsEqGrpRows(SSDataBlock** ppLeft, SSDataBlock** ppRight, int32_t leftGrpRows, int32_t rightGrpRows) { if (leftGrpRows <= 0 && rightGrpRows <= 0) { return; } - ++jtCtrl.curTs; - - if (NULL == ppLeft) { - *ppLeft = createDummyBlock(LEFT_BLK_ID); - blockDataEnsureCapacity(*ppLeft, jtCtrl.blkRows); - taosArrayPush(jtCtrl.leftBlkList, ppLeft); + if (leftGrpRows > 0 && rightGrpRows > 0) { + jtCtx.inputStat |= (1 << 2); } - if (NULL == ppRight) { + ++jtCtx.curTs; + + if (NULL == *ppLeft && leftGrpRows > 0) { + *ppLeft = createDummyBlock(LEFT_BLK_ID); + blockDataEnsureCapacity(*ppLeft, jtCtx.blkRows); + taosArrayPush(jtCtx.leftBlkList, ppLeft); + } + + if (NULL == *ppRight && rightGrpRows > 0) { *ppRight = createDummyBlock(RIGHT_BLK_ID); - blockDataEnsureCapacity(*ppRight, jtCtrl.blkRows); - taosArrayPush(jtCtrl.rightBlkList, ppRight); + blockDataEnsureCapacity(*ppRight, jtCtx.blkRows); + taosArrayPush(jtCtx.rightBlkList, ppRight); } @@ -949,36 +1180,41 @@ void createBothBlkRowsData(void) { SSDataBlock* pLeft = NULL; SSDataBlock* pRight = NULL; - bool leftEnd = taosRand() % 2 == 0; - bool rightEnd = taosRand() % 2 == 0; + jtCtx.leftTotalRows = taosRand() % jtCtx.leftMaxRows; + jtCtx.rightTotalRows = taosRand() % jtCtx.rightMaxRows; - jtCtrl.leftTotalRows = taosRand() % jtCtrl.leftMaxRows; - jtCtrl.rightTotalRows = taosRand() % jtCtrl.rightMaxRows; - - int32_t minTotalRows = TMIN(jtCtrl.leftTotalRows, jtCtrl.rightTotalRows); - jtCtrl.curTs = TIMESTAMP_FILTER_VALUE - minTotalRows / 5; + int32_t minTotalRows = TMIN(jtCtx.leftTotalRows, jtCtx.rightTotalRows); + jtCtx.curTs = TIMESTAMP_FILTER_VALUE - minTotalRows / 5; int32_t leftTotalRows = 0, rightTotalRows = 0; int32_t leftGrpRows = 0, rightGrpRows = 0; int32_t grpType = 0; - while (leftTotalRows < jtCtrl.leftTotalRows && rightTotalRows < jtCtrl.rightTotalRows) { - if (leftTotalRows >= jtCtrl.leftTotalRows) { + while (leftTotalRows < jtCtx.leftTotalRows || rightTotalRows < jtCtx.rightTotalRows) { + if (leftTotalRows >= jtCtx.leftTotalRows) { grpType = 1; - } else if (rightTotalRows >= jtCtrl.rightTotalRows) { - grpType = 0 + } else if (rightTotalRows >= jtCtx.rightTotalRows) { + grpType = 0; } else { - grpType = taosRand() % 3; + grpType = taosRand() % 10; } - leftGrpRows = taosRand() % jtCtrl.leftMaxGrpRows; - rightGrpRows = taosRand() % jtCtrl.rightMaxGrpRows; + leftGrpRows = taosRand() % jtCtx.leftMaxGrpRows; + rightGrpRows = taosRand() % jtCtx.rightMaxGrpRows; - if ((leftTotalRows + leftGrpRows) > jtCtrl.leftTotalRows) { - leftGrpRows = jtCtrl.leftTotalRows - leftTotalRows; + if ((leftTotalRows + leftGrpRows) > jtCtx.leftTotalRows) { + leftGrpRows = jtCtx.leftTotalRows - leftTotalRows; } - if ((rightTotalRows + rightGrpRows) > jtCtrl.rightTotalRows) { - rightGrpRows = jtCtrl.rightTotalRows - rightTotalRows; + if ((rightTotalRows + rightGrpRows) > jtCtx.rightTotalRows) { + rightGrpRows = jtCtx.rightTotalRows - rightTotalRows; + } + + if (0 != grpType && 1 != grpType && (leftGrpRows <= 0 || rightGrpRows <= 0)) { + if (leftGrpRows <= 0) { + grpType = 1; + } else { + grpType = 0; + } } switch (grpType) { @@ -990,7 +1226,7 @@ void createBothBlkRowsData(void) { createGrpRows(&pRight, RIGHT_BLK_ID, rightGrpRows); rightTotalRows += rightGrpRows; break; - case 2: + default: createTsEqGrpRows(&pLeft, &pRight, leftGrpRows, rightGrpRows); leftTotalRows += leftGrpRows; rightTotalRows += rightGrpRows; @@ -1000,11 +1236,11 @@ void createBothBlkRowsData(void) { } void createDummyBlkList(int32_t leftMaxRows, int32_t leftMaxGrpRows, int32_t rightMaxRows, int32_t rightMaxGrpRows, int32_t blkRows) { - jtCtrl.leftMaxRows = leftMaxRows; - jtCtrl.leftMaxGrpRows = leftMaxGrpRows; - jtCtrl.rightMaxRows = rightMaxRows; - jtCtrl.rightMaxGrpRows = rightMaxGrpRows; - jtCtrl.blkRows = blkRows; + jtCtx.leftMaxRows = leftMaxRows; + jtCtx.leftMaxGrpRows = leftMaxGrpRows; + jtCtx.rightMaxRows = rightMaxRows; + jtCtx.rightMaxGrpRows = rightMaxGrpRows; + jtCtx.blkRows = blkRows; createBothBlkRowsData(); } @@ -1019,16 +1255,16 @@ void rerunBlockedHere() { SSDataBlock* getDummyInputBlock(struct SOperatorInfo* pOperator, int32_t idx) { switch (idx) { case LEFT_BLK_ID: - if (jtCtrl.leftBlkReadIdx >= taosArrayGetSize(jtCtrl.leftBlkList)) { + if (jtCtx.leftBlkReadIdx >= taosArrayGetSize(jtCtx.leftBlkList)) { return NULL; } - return taosArrayGet(jtCtrl.leftBlkList, jtCtrl.leftBlkReadIdx++); + return (SSDataBlock*)taosArrayGetP(jtCtx.leftBlkList, jtCtx.leftBlkReadIdx++); break; case RIGHT_BLK_ID: - if (jtCtrl.rightBlkReadIdx >= taosArrayGetSize(jtCtrl.rightBlkList)) { + if (jtCtx.rightBlkReadIdx >= taosArrayGetSize(jtCtx.rightBlkList)) { return NULL; } - return taosArrayGet(jtCtrl.rightBlkList, jtCtrl.rightBlkReadIdx++); + return (SSDataBlock*)taosArrayGetP(jtCtx.rightBlkList, jtCtx.rightBlkReadIdx++); break; default: return NULL; @@ -1043,15 +1279,15 @@ void joinTestReplaceRetrieveFp() { #ifdef WINDOWS AddrAny any; std::map result; - any.get_func_addr("rpcSendRecv", result); + any.get_func_addr("getNextBlockFromDownstreamRemain", result); #endif #ifdef LINUX - AddrAny any("libtransport.so"); + AddrAny any("libexecutor.so"); std::map result; - any.get_global_func_addr_dynsym("^rpcSendRecv$", result); + any.get_global_func_addr_dynsym("^getNextBlockFromDownstreamRemain$", result); #endif for (const auto &f : result) { - stub.set(f.second, ctgTestRspDbVgroups); + stub.set(f.second, getDummyInputBlock); } } } @@ -1059,7 +1295,7 @@ void joinTestReplaceRetrieveFp() { void printColList(char* title, bool left, int32_t* colList, bool filter, char* opStr) { bool first = true; - printf("\t%s:", title); + printf("\t %s:", title); for (int32_t i = 0; i < MAX_SLOT_NUM; ++i) { if (colList[i]) { if (!first) { @@ -1076,106 +1312,218 @@ void printColList(char* title, bool left, int32_t* colList, bool filter, char* o printf("\n"); } +void printInputRowData(SSDataBlock* pBlk, int32_t* rowIdx) { + for (int32_t c = 0; c < MAX_SLOT_NUM; ++c) { + SColumnInfoData* pCol = (SColumnInfoData*)taosArrayGet(pBlk->pDataBlock, c); + ASSERT(pCol->info.type == jtInputColType[c]); + if (colDataIsNull_s(pCol, *rowIdx)) { + printf("%18s", " NULL"); + } else { + switch (jtInputColType[c]) { + case TSDB_DATA_TYPE_TIMESTAMP: + case TSDB_DATA_TYPE_BIGINT: + printf("%18" PRId64, *(int64_t*)colDataGetData(pCol, *rowIdx)); + break; + case TSDB_DATA_TYPE_INT: + printf("%18d", *(int32_t*)colDataGetData(pCol, *rowIdx)); + break; + default: + ASSERT(0); + } + } + } + + (*rowIdx)++; +} + +void printInputData() { + int32_t leftRowIdx = 0, rightRowIdx = 0; + + printf("\nInput Data:\n"); + while (jtCtx.leftBlkReadIdx < taosArrayGetSize(jtCtx.leftBlkList) || jtCtx.rightBlkReadIdx < taosArrayGetSize(jtCtx.rightBlkList)) { + if (jtCtx.leftBlkReadIdx < taosArrayGetSize(jtCtx.leftBlkList)) { + while (true) { + SSDataBlock* pBlk = (SSDataBlock*)taosArrayGetP(jtCtx.leftBlkList, jtCtx.leftBlkReadIdx); + if (leftRowIdx < pBlk->info.rows) { + printInputRowData(pBlk, &leftRowIdx); + break; + } + + printf("\t--------------------------blk end-------------------------------"); + jtCtx.leftBlkReadIdx++; + leftRowIdx = 0; + break; + } + } else { + printf("%72s", " "); + } + + if (jtCtx.rightBlkReadIdx < taosArrayGetSize(jtCtx.rightBlkList)) { + while (true) { + SSDataBlock* pBlk = (SSDataBlock*)taosArrayGetP(jtCtx.rightBlkList, jtCtx.rightBlkReadIdx); + if (rightRowIdx < pBlk->info.rows) { + printInputRowData(pBlk, &rightRowIdx); + break; + } + + printf("\t--------------------------blk end----------------------------\t"); + jtCtx.rightBlkReadIdx++; + rightRowIdx = 0; + break; + } + } + + printf("\n"); + } + + jtCtx.leftBlkReadIdx = jtCtx.rightBlkReadIdx = 0; +} + +char* getInputStatStr(char* inputStat) { + if (jtCtx.inputStat & (1 << LEFT_BLK_ID)) { + strcat(inputStat, "L"); + } + if (jtCtx.inputStat & (1 << RIGHT_BLK_ID)) { + strcat(inputStat, "R"); + } + if (jtCtx.inputStat & (1 << 2)) { + strcat(inputStat, "E"); + } + return inputStat; +} + void printBasicInfo(char* caseName) { - printf("\n TEST %s START\nBasic Info:\n\t asc:%d\n\t filter:%d\n\t leftMaxRows:%d\n\t leftMaxGrpRows:%d\n\t " - "rightMaxRows:%d\n\t rightMaxGrpRows:%d\n\t blkRows:%d\n\t colCond:%s\n\t \n\tjoinType:%s\n\t " - "subType:%s\n ", caseName, jtCtrl.asc, jtCtrl.filter, jtCtrl.leftMaxRows, jtCtrl.leftMaxGrpRows, - jtCtrl.rightMaxRows, jtCtrl.rightMaxGrpRows, jtCtrl.blkRows, jtColCondStr[jtCtrl.colCond], jtJoinTypeStr[jtCtrl.joinType], - jtSubTypeStr[jtCtrl.subType]); + if (!jtCtrl.printTestInfo) { + return; + } + + char inputStat[4] = {0}; + printf("\n%dth TEST [%s] START\nBasic Info:\n\t asc:%d\n\t filter:%d\n\t maxRows:left-%d right-%d\n\t " + "maxGrpRows:left-%d right-%d\n\t blkRows:%d\n\t colCond:%s\n\t joinType:%s\n\t " + "subType:%s\n\t inputStat:%s\n", jtCtx.loopIdx, caseName, jtCtx.asc, jtCtx.filter, jtCtx.leftMaxRows, jtCtx.rightMaxRows, + jtCtx.leftMaxGrpRows, jtCtx.rightMaxGrpRows, jtCtx.blkRows, jtColCondStr[jtCtx.colCond], jtJoinTypeStr[jtCtx.joinType], + jtSubTypeStr[jtCtx.subType], getInputStatStr(inputStat)); - printf("\nInput Info:\n\t leftBlkRead:%d\n\t leftTotalBlk:%d\n\t leftTotalRows:%d\n\t rightBlkRead:%d\n\t " - "rightTotalBlk:%d\n\t rightTotalRows:%d\n\t blkRowSize:%d\n\t leftCols:%s %s %s %s\n\t rightCols:%s %s %s %s\n", - caseName, jtCtrl.leftBlkReadIdx, taosArrayGetSize(jtCtrl.leftBlkList), - jtCtrl.leftTotalRows, jtCtrl.rightBlkReadIdx, taosArrayGetSize(jtCtrl.rightBlkList), jtCtrl.rightTotalRows, - jtCtrl.blkRowSize, tDataTypes[jtInputColType[0]].name, tDataTypes[jtInputColType[1]].name, + printf("Input Info:\n\t totalBlk:left-%d right-%d\n\t totalRows:left-%d right-%d\n\t " + "blkRowSize:%d\n\t inputCols:left-%s %s %s %s right-%s %s %s %s\n", + (int32_t)taosArrayGetSize(jtCtx.leftBlkList), (int32_t)taosArrayGetSize(jtCtx.rightBlkList), + jtCtx.leftTotalRows, jtCtx.rightTotalRows, + jtCtx.blkRowSize, tDataTypes[jtInputColType[0]].name, tDataTypes[jtInputColType[1]].name, tDataTypes[jtInputColType[2]].name, tDataTypes[jtInputColType[3]].name, tDataTypes[jtInputColType[0]].name, - tDataTypes[jtInputColType[1]].name, tDataTypes[jtInputColType[2]].name, tDataTypes[jtInputColType[3]].name, - jtCtrl.colEqNum); + tDataTypes[jtInputColType[1]].name, tDataTypes[jtInputColType[2]].name, tDataTypes[jtInputColType[3]].name); - if (jtCtrl.colEqNum) { - printf("colEqNum:%d\n", jtCtrl.colEqNum); - printColList("colEqList", false, jtCtrl.colEqList, false, "="); + if (jtCtx.colEqNum) { + printf("\t colEqNum:%d\n", jtCtx.colEqNum); + printColList("colEqList", false, jtCtx.colEqList, false, "="); } - if (jtCtrl.colOnNum) { - printf("colOnNum:%d\n", jtCtrl.colOnNum); - printColList("colOnList", false, jtCtrl.colOnList, false, ">"); + if (jtCtx.colOnNum) { + printf("\t colOnNum:%d\n", jtCtx.colOnNum); + printColList("colOnList", false, jtCtx.colOnList, false, ">"); } - if (jtCtrl.colOnNum) { - printf("colOnNum:%d\n", jtCtrl.colOnNum); - printColList("colOnList", false, jtCtrl.colOnList, false, ">"); - } - - if (jtCtrl.leftFilterNum) { - printf("leftFilterNum:%d\n", jtCtrl.leftFilterNum); - printColList("leftFilterList", true, jtCtrl.leftFilterColList, true, ">"); + if (jtCtx.leftFilterNum) { + printf("\t leftFilterNum:%d\n", jtCtx.leftFilterNum); + printColList("leftFilterList", true, jtCtx.leftFilterColList, true, ">"); } - if (jtCtrl.rightFilterNum) { - printf("rightFilterNum:%d\n", jtCtrl.rightFilterNum); - printColList("rightFilterList", false, jtCtrl.rightFilterColList, true, ">"); + if (jtCtx.rightFilterNum) { + printf("\t rightFilterNum:%d\n", jtCtx.rightFilterNum); + printColList("rightFilterList", false, jtCtx.rightFilterColList, true, ">"); } - printf("\tresColSize:%d\n\t resColNum:%d\n\t resColList:"); - for (int32_t i = 0; i < jtCtrl.resColNum; ++i) { - int32_t s = jtCtrl.resColInSlot[i]; - printf("%sc%d[%s]\t", s >= MAX_SLOT_NUM ? "r" : "l", s, tDataTypes[jtInputColType[s]].name); + printf("\t resColSize:%d\n\t resColNum:%d\n\t resColList:", jtCtx.resColSize, jtCtx.resColNum); + for (int32_t i = 0; i < jtCtx.resColNum; ++i) { + int32_t s = jtCtx.resColInSlot[i]; + int32_t idx = s >= MAX_SLOT_NUM ? s - MAX_SLOT_NUM : s; + printf("%sc%d[%s]\t", s >= MAX_SLOT_NUM ? "r" : "l", s, tDataTypes[jtInputColType[idx]].name); + } + + if (jtCtrl.printInputRow) { + printInputData(); } } void printOutputInfo() { - printf("\nOutput Info:\n\t expectedRows:%d\n\t ", tSimpleHashGetSize(jtCtrl.jtResRows)); - printf("Actual Result:\n\t"); + if (!jtCtrl.printTestInfo) { + return; + } + + printf("\nOutput Info:\n\t expectedRows:%d\n\t ", jtCtx.resRows); + printf("Actual Result:\n"); } -void printActualResInfo(int32_t expectedRows) { - printf("\nActual Result Summary:\n\t blkNum:%d\n\t rowNum:%d%s\n\t +rows:%d%s\n\t " - "-rows:%d%s\n\t mismatchRows:%d%s\n\t matchRows:%d%s\n", - jtRes.blkNum, jtRes.rowNum, jtRes.rowNum == expectedRows ? "" : "*", +void printActualResInfo() { + if (!jtCtrl.printTestInfo) { + return; + } + + printf("Actual Result Summary:\n\t blkNum:%d\n\t rowNum:%d%s\n\t leftBlkRead:%d\n\t rightBlkRead:%d\n\t +rows:%d%s\n\t " + "-rows:%d%s\n\t matchRows:%d%s\n", + jtRes.blkNum, jtRes.rowNum, + jtRes.rowNum == jtCtx.resRows ? "" : "*", + jtCtx.leftBlkReadIdx, jtCtx.rightBlkReadIdx, jtRes.addRowNum, jtRes.addRowNum ? "*" : "", jtRes.subRowNum, jtRes.subRowNum ? "*" : "", - jtRes.mismatchNum, jtRes.mismatchNum ? "*" : "", - jtRes.matchNum, jtRes.matchNum == expectedRows ? "" : "*"); + jtRes.matchNum, jtRes.matchNum == jtCtx.resRows ? "" : "*"); +} + +void printStatInfo(char* caseName) { + printf("\n TEST [%s] Stat:\n\t maxResRows:%d\n\t maxResBlkRows:%d\n\t totalResRows:%" PRId64 "\n\t useMSecs:%" PRId64 "\n", + caseName, jtStat.maxResRows, jtStat.maxResBlkRows, jtStat.totalResRows, jtStat.useMSecs); + } void checkJoinDone(char* caseName) { int32_t iter = 0; void* p = NULL; - while (NULL != (p = tSimpleHashIterate(jtCtrl.jtResRows, p, &iter))) { + void* key = NULL; + while (NULL != (p = tSimpleHashIterate(jtCtx.jtResRows, p, &iter))) { + key = tSimpleHashGetKey(p, NULL); jtRes.succeed = false; - jtRes.subRowNum++; - printResRow(p, 0); + jtRes.subRowNum += *(int32_t*)p; + for (int32_t i = 0; i < *(int32_t*)p; ++i) { + printResRow((char*)key, 0); + } } - printActualResInfo(tSimpleHashGetSize(jtCtrl.jtResRows)); - printf("\n%s Final Result: %s\n", caseName, jtRes.succeed ? "SUCCEED" : "FAILED"); - + printActualResInfo(); + + printf("\n%dth TEST [%s] Final Result: %s\n", jtCtx.loopIdx, caseName, jtRes.succeed ? "SUCCEED" : "FAILED"); } void checkJoinRes(SSDataBlock* pBlock) { jtRes.rowNum += pBlock->info.rows; + if (jtRes.rowNum > jtStat.maxResRows) { + jtStat.maxResRows = jtRes.rowNum; + } jtRes.blkNum++; + if (pBlock->info.rows > jtStat.maxResBlkRows) { + jtStat.maxResBlkRows = pBlock->info.rows; + } + + jtStat.totalResRows += pBlock->info.rows; for (int32_t r = 0; r < pBlock->info.rows; ++r) { - for (int32_t c = 0; c < jtCtrl.resColNum; ++c) { - int32_t slot = jtCtrl.resColInSlot[c]; - SColumnInfoData* pCol = taosArrayGet(pBlock->pDataBlock, c); - memset(jtCtrl.resColBuf, 0, jtCtrl.resColSize); + memset(jtCtx.resColBuf, 0, jtCtx.resColSize); + + for (int32_t c = 0; c < jtCtx.resColNum; ++c) { + int32_t slot = jtCtx.resColInSlot[c]; + SColumnInfoData* pCol = (SColumnInfoData*)taosArrayGet(pBlock->pDataBlock, c); switch (jtInputColType[slot % MAX_SLOT_NUM]) { case TSDB_DATA_TYPE_TIMESTAMP: case TSDB_DATA_TYPE_BIGINT: if (colDataIsNull_s(pCol, r)) { - *(bool*)(jtCtrl.resColBuf + slot) = true; + *(bool*)(jtCtx.resColBuf + slot) = true; } else { - *(int64_t*)(jtCtrl.resColBuf + jtCtrl.resColOffset[slot]) = *(int64_t*)colDataGetData(pCol, r); + *(int64_t*)(jtCtx.resColBuf + jtCtx.resColOffset[slot]) = *(int64_t*)colDataGetData(pCol, r); } break; case TSDB_DATA_TYPE_INT: if (colDataIsNull_s(pCol, r)) { - *(bool*)(jtCtrl.resColBuf + slot) = true; + *(bool*)(jtCtx.resColBuf + slot) = true; } else { - *(int32_t*)(jtCtrl.resColBuf + jtCtrl.resColOffset[slot]) = *(int32_t*)colDataGetData(pCol, r); + *(int32_t*)(jtCtx.resColBuf + jtCtx.resColOffset[slot]) = *(int32_t*)colDataGetData(pCol, r); } break; default: @@ -1183,36 +1531,34 @@ void checkJoinRes(SSDataBlock* pBlock) { } } - void* value = tSimpleHashGet(jtCtrl.jtResRows, jtCtrl.resColBuf + jtCtrl.keyColOffset, sizeof(int64_t)); + char* value = (char*)tSimpleHashGet(jtCtx.jtResRows, jtCtx.resColBuf, jtCtx.resColSize); if (NULL == value) { - printResRow(jtCtrl.resColBuf, 1); + printResRow(jtCtx.resColBuf, 1); jtRes.succeed = false; jtRes.addRowNum++; continue; } - if (memcmp(value, jtCtrl.resColBuf, jtCtrl.resColSize)) { - printResRow(jtCtrl.resColBuf, 1); - printResRow(value, 0); - tSimpleHashRemove(jtCtrl.jtResRows, jtCtrl.resColBuf + jtCtrl.keyColOffset, sizeof(int64_t)); - jtRes.succeed = false; - jtRes.mismatchNum++; - continue; - } - + printResRow(jtCtx.resColBuf, 2); jtRes.matchNum++; + rmResRow(); } } void resetForJoinRerun(SOperatorInfo* pDownstreams, int32_t dsNum, SSortMergeJoinPhysiNode* pNode, SExecTaskInfo* pTask) { - jtCtrl.leftBlkReadIdx = 0; - jtCtrl.rightBlkReadIdx = 0; + jtCtx.leftBlkReadIdx = 0; + jtCtx.rightBlkReadIdx = 0; + jtCtx.curKeyOffset = 0; - jtCtrl.pJoinOp = createMergeJoinOperatorInfo(&pDownstreams, 2, pNode, pTask); - ASSERT_TRUE(NULL != jtCtrl.pJoinOp); + memset(&jtRes, 0, sizeof(jtRes)); + jtRes.succeed = true; + + SOperatorInfo* ppDownstreams[] = {pDownstreams, pDownstreams + 1}; + jtCtx.pJoinOp = createMergeJoinOperatorInfo(ppDownstreams, 2, pNode, pTask); + ASSERT_TRUE(NULL != jtCtx.pJoinOp); } -void handleJoinError(bool* contLoop) { +void handleJoinDone(bool* contLoop) { if (jtRes.succeed) { *contLoop = false; return; @@ -1225,42 +1571,205 @@ void handleJoinError(bool* contLoop) { jtInRerun = true; - destroyMergeJoinOperator(jtCtrl.pJoinOp); - jtCtrl.pJoinOp = NULL; + jtCtx.pJoinOp->fpSet.closeFn(jtCtx.pJoinOp); + jtCtx.pJoinOp = NULL; } -} // namespace +void jtInitLogFile() { + const char *defaultLogFileNamePrefix = "jtlog"; + const int32_t maxLogFileNum = 10; -TEST(leftOuterJoin, noCondTest) { - char* caseName = "leftOuterJoin:noCondTest"; - SOperatorInfo* pDownstreams = createDummyDownstreamOperators(2); - SSortMergeJoinPhysiNode* pNode = createDummySortMergeJoinPhysiNode(JOIN_TYPE_LEFT, JOIN_STYPE_OUTER, TEST_NO_COND, false, true); - SExecTaskInfo* pTask = createDummyTaskInfo(caseName); + tsAsyncLog = 0; + qDebugFlag = 159; + strcpy(tsLogDir, TD_LOG_DIR_PATH); + + if (taosInitLog(defaultLogFileNamePrefix, maxLogFileNum) < 0) { + printf("failed to open log file in directory:%s\n", tsLogDir); + } +} + + + +void initJoinTest() { + jtCtx.leftBlkList = taosArrayInit(10, POINTER_BYTES); + jtCtx.rightBlkList = taosArrayInit(10, POINTER_BYTES); + + jtCtx.jtResRows = tSimpleHashInit(10000000, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY)); + + joinTestReplaceRetrieveFp(); + + if (jtCtrl.logHistory) { + jtStat.pHistory = taosArrayInit(100000, sizeof(SJoinTestHistory)); + } + + jtInitLogFile(); +} + +void handleTestDone() { + if (jtCtrl.logHistory) { + SJoinTestHistory h; + memcpy(&h.ctx, &jtCtx, sizeof(h.ctx)); + memcpy(&h.res, &jtRes, sizeof(h.res)); + taosArrayPush(jtStat.pHistory, &h); + } + + int32_t blkNum = taosArrayGetSize(jtCtx.leftBlkList); + for (int32_t i = 0; i < blkNum; ++i) { + SSDataBlock* pBlk = (SSDataBlock*)taosArrayGetP(jtCtx.leftBlkList, i); + blockDataDestroy(pBlk); + } + taosArrayClear(jtCtx.leftBlkList); + + blkNum = taosArrayGetSize(jtCtx.rightBlkList); + for (int32_t i = 0; i < blkNum; ++i) { + SSDataBlock* pBlk = (SSDataBlock*)taosArrayGetP(jtCtx.rightBlkList, i); + blockDataDestroy(pBlk); + } + taosArrayClear(jtCtx.rightBlkList); + + tSimpleHashClear(jtCtx.jtResRows); + jtCtx.resRows = 0; + + jtCtx.inputStat = 0; +} + +void runSingleTest(char* caseName, SJoinTestParam* param) { bool contLoop = true; - createDummyBlkList(10, 10, 10, 10, 2); + SSortMergeJoinPhysiNode* pNode = createDummySortMergeJoinPhysiNode(param->joinType, param->subType, param->cond, param->filter, param->asc); + createDummyBlkList(200, 200, 200, 200, 10); while (contLoop) { rerunBlockedHere(); - resetForJoinRerun(pDownstreams, 2, pNode, pTask); - printBasicInfo(); + resetForJoinRerun(param->pDownstreams, 2, pNode, param->pTask); + printBasicInfo(caseName); printOutputInfo(); - - SSDataBlock* pBlock = jtCtrl.pJoinOp->fpSet.getNextFn(jtCtrl.pJoinOp); - if (NULL == pBlock) { - checkJoinDone(caseName); - } else { - checkJoinRes(pBlock); - continue; + + while (true) { + SSDataBlock* pBlock = jtCtx.pJoinOp->fpSet.getNextFn(jtCtx.pJoinOp); + if (NULL == pBlock) { + checkJoinDone(caseName); + break; + } else { + checkJoinRes(pBlock); + } } - - handleJoinError(&contLoop); + + handleJoinDone(&contLoop); } - //ASSERT_EQ(num, ekeyNum - pos + 1); + nodesDestroyNode((SNode*)pNode); + handleTestDone(); } +} // namespace + +#if 1 +TEST(leftOuterJoin, noCondTest) { + char* caseName = "leftOuterJoin:noCondTest"; + SOperatorInfo* pDownstreams = createDummyDownstreamOperators(2); + SExecTaskInfo* pTask = createDummyTaskInfo(caseName); + SJoinTestParam param; + + param.pDownstreams = pDownstreams; + param.pTask = pTask; + param.joinType = JOIN_TYPE_LEFT; + param.subType = JOIN_STYPE_OUTER; + param.cond = TEST_NO_COND; + param.asc = true; + + for (jtCtx.loopIdx = 0; jtCtx.loopIdx < JT_MAX_LOOP; ++jtCtx.loopIdx) { + param.filter = false; + runSingleTest(caseName, ¶m); + + param.filter = true; + runSingleTest(caseName, ¶m); + } + + printStatInfo(caseName); +} +#endif + +#if 1 +TEST(leftOuterJoin, eqCondTest) { + char* caseName = "leftOuterJoin:eqCondTest"; + SOperatorInfo* pDownstreams = createDummyDownstreamOperators(2); + SExecTaskInfo* pTask = createDummyTaskInfo(caseName); + SJoinTestParam param; + + param.pDownstreams = pDownstreams; + param.pTask = pTask; + param.joinType = JOIN_TYPE_LEFT; + param.subType = JOIN_STYPE_OUTER; + param.cond = TEST_EQ_COND; + param.asc = true; + + for (jtCtx.loopIdx = 0; jtCtx.loopIdx < JT_MAX_LOOP; ++jtCtx.loopIdx) { + param.filter = false; + runSingleTest(caseName, ¶m); + + param.filter = true; + runSingleTest(caseName, ¶m); + } + + printStatInfo(caseName); +} +#endif + +#if 1 +TEST(leftOuterJoin, onCondTest) { + char* caseName = "leftOuterJoin:onCondTest"; + SOperatorInfo* pDownstreams = createDummyDownstreamOperators(2); + SExecTaskInfo* pTask = createDummyTaskInfo(caseName); + SJoinTestParam param; + + param.pDownstreams = pDownstreams; + param.pTask = pTask; + param.joinType = JOIN_TYPE_LEFT; + param.subType = JOIN_STYPE_OUTER; + param.cond = TEST_ON_COND; + param.asc = true; + + for (jtCtx.loopIdx = 0; jtCtx.loopIdx < JT_MAX_LOOP; ++jtCtx.loopIdx) { + param.filter = false; + runSingleTest(caseName, ¶m); + + param.filter = true; + runSingleTest(caseName, ¶m); + } + + printStatInfo(caseName); +} +#endif + +#if 1 +TEST(leftOuterJoin, fullCondTest) { + char* caseName = "leftOuterJoin:fullCondTest"; + SOperatorInfo* pDownstreams = createDummyDownstreamOperators(2); + SExecTaskInfo* pTask = createDummyTaskInfo(caseName); + SJoinTestParam param; + + param.pDownstreams = pDownstreams; + param.pTask = pTask; + param.joinType = JOIN_TYPE_LEFT; + param.subType = JOIN_STYPE_OUTER; + param.cond = TEST_FULL_COND; + param.asc = true; + + for (jtCtx.loopIdx = 0; jtCtx.loopIdx < JT_MAX_LOOP; ++jtCtx.loopIdx) { + param.filter = false; + runSingleTest(caseName, ¶m); + + param.filter = true; + runSingleTest(caseName, ¶m); + } + + printStatInfo(caseName); + +} +#endif + int main(int argc, char** argv) { taosSeedRand(taosGetTimestampSec()); diff --git a/source/libs/planner/src/planOptimizer.c b/source/libs/planner/src/planOptimizer.c index cf4db4cb70..c44c8e89b9 100644 --- a/source/libs/planner/src/planOptimizer.c +++ b/source/libs/planner/src/planOptimizer.c @@ -1079,7 +1079,10 @@ static int32_t pdcJoinAddPreFilterColsToTarget(SOptimizeContext* pCxt, SJoinLogi if (NULL == pCondCols) { code = TSDB_CODE_OUT_OF_MEMORY; } else { - code = nodesCollectColumnsFromNode(pJoin->pFullOnCond, NULL, COLLECT_COL_TYPE_ALL, &pCondCols); + code = nodesCollectColumnsFromNode(pJoin->pColOnCond, NULL, COLLECT_COL_TYPE_ALL, &pCondCols); + } + if (TSDB_CODE_SUCCESS == code) { + code = nodesCollectColumnsFromNode(pJoin->pTagOnCond, NULL, COLLECT_COL_TYPE_ALL, &pCondCols); } if (TSDB_CODE_SUCCESS == code) { code = createColumnByRewriteExprs(pCondCols, &pTargets);