diff --git a/source/libs/executor/inc/mergejoin.h b/source/libs/executor/inc/mergejoin.h index ae3893b965..a25c26a389 100755 --- a/source/libs/executor/inc/mergejoin.h +++ b/source/libs/executor/inc/mergejoin.h @@ -19,10 +19,17 @@ extern "C" { #endif +#if 0 #define MJOIN_DEFAULT_BLK_ROWS_NUM 2 //4096 -#define MJOIN_HJOIN_CART_THRESHOLD 1024 //16 +#define MJOIN_HJOIN_CART_THRESHOLD 16 #define MJOIN_BLK_SIZE_LIMIT 0 //10485760 #define MJOIN_ROW_BITMAP_SIZE (2 * 1048576) +#else +#define MJOIN_DEFAULT_BLK_ROWS_NUM 4096 +#define MJOIN_HJOIN_CART_THRESHOLD 16 +#define MJOIN_BLK_SIZE_LIMIT 10485760 +#define MJOIN_ROW_BITMAP_SIZE (2 * 1048576) +#endif struct SMJoinOperatorInfo; @@ -94,6 +101,7 @@ typedef struct SMJoinTableCtx { SMJoinColMap* finCols; int32_t keyNum; + int32_t keyNullSize; SMJoinColInfo* keyCols; char* keyBuf; char* keyData; @@ -212,7 +220,7 @@ typedef struct SMJoinOperatorInfo { #define MJOIN_DS_NEED_INIT(_pOp, _tbctx) (MJOIN_DS_REQ_INIT(_pOp) && (!(_tbctx)->dsInitDone)) #define MJOIN_TB_LOW_BLK(_tbctx) ((_tbctx)->blkNum <= 0 || ((_tbctx)->blkNum == 1 && (_tbctx)->pHeadBlk->cloned)) -#define REACH_HJOIN_THRESHOLD(_prb, _bld) ((_prb)->grpTotalRows * (_bld)->grpTotalRows >= MJOIN_HJOIN_CART_THRESHOLD) +#define REACH_HJOIN_THRESHOLD(_prb, _bld) ((_bld)->grpTotalRows >= MJOIN_HJOIN_CART_THRESHOLD) #define SET_SAME_TS_GRP_HJOIN(_pair, _octx) ((_pair)->hashJoin = (_octx)->hashCan && REACH_HJOIN_THRESHOLD(_pair)) diff --git a/source/libs/executor/src/mergejoinoperator.c b/source/libs/executor/src/mergejoinoperator.c index 36fa07ea52..4b5cca0ad7 100644 --- a/source/libs/executor/src/mergejoinoperator.c +++ b/source/libs/executor/src/mergejoinoperator.c @@ -472,12 +472,18 @@ static int32_t mJoinInitColsInfo(int32_t* colNum, int64_t* rowSize, SMJoinColInf } -static int32_t mJoinInitKeyColsInfo(SMJoinTableCtx* pTable, SNodeList* pList) { +static int32_t mJoinInitKeyColsInfo(SMJoinTableCtx* pTable, SNodeList* pList, bool allocKeyBuf) { int64_t rowSize = 0; MJ_ERR_RET(mJoinInitColsInfo(&pTable->keyNum, &rowSize, &pTable->keyCols, pList)); - if (pTable->keyNum > 1) { - pTable->keyBuf = taosMemoryMalloc(rowSize); + if (pTable->keyNum > 1 || allocKeyBuf) { + if (rowSize > 1) { + pTable->keyNullSize = 1; + } else { + pTable->keyNullSize = 2; + } + + pTable->keyBuf = taosMemoryMalloc(TMAX(rowSize, pTable->keyNullSize)); if (NULL == pTable->keyBuf) { return TSDB_CODE_OUT_OF_MEMORY; } @@ -516,7 +522,7 @@ static int32_t mJoinInitTableInfo(SMJoinOperatorInfo* pJoin, SSortMergeJoinPhysi pTable->blkId = pDownstream[idx]->resultDataBlockId; MJ_ERR_RET(mJoinInitPrimKeyInfo(pTable, (0 == idx) ? pJoinNode->leftPrimSlotId : pJoinNode->rightPrimSlotId)); - MJ_ERR_RET(mJoinInitKeyColsInfo(pTable, (0 == idx) ? pJoinNode->pEqLeft : pJoinNode->pEqRight)); + MJ_ERR_RET(mJoinInitKeyColsInfo(pTable, (0 == idx) ? pJoinNode->pEqLeft : pJoinNode->pEqRight, JOIN_TYPE_FULL == pJoin->joinType)); MJ_ERR_RET(mJoinInitColsMap(&pTable->finNum, &pTable->finCols, pTable->blkId, pJoinNode->pTargets)); memcpy(&pTable->inputStat, pStat, sizeof(*pStat)); @@ -870,7 +876,9 @@ int32_t mJoinCreateFullBuildTbHash(SMJoinOperatorInfo* pJoin, SMJoinTableCtx* pT int32_t grpRows = GRP_REMAIN_ROWS(pGrp); for (int32_t r = 0; r < grpRows; ++r) { if (mJoinCopyKeyColsDataToBuf(pTable, pGrp->beginIdx + r, &bufLen)) { - continue; + *(int16_t *)pTable->keyBuf = 0; + pTable->keyData = pTable->keyBuf; + bufLen = pTable->keyNullSize; } MJ_ERR_RET(mJoinAddRowToFullHash(pJoin, bufLen, pGrp->blk, pGrp->beginIdx + r)); diff --git a/source/libs/executor/test/joinTests.cpp b/source/libs/executor/test/joinTests.cpp index aff129edc0..d0691b0467 100755 --- a/source/libs/executor/test/joinTests.cpp +++ b/source/libs/executor/test/joinTests.cpp @@ -66,7 +66,7 @@ enum { }; #define COL_DISPLAY_WIDTH 18 -#define JT_MAX_LOOP 3000 +#define JT_MAX_LOOP 1000 #define LEFT_BLK_ID 0 #define RIGHT_BLK_ID 1 @@ -1777,7 +1777,7 @@ void runSingleTest(char* caseName, SJoinTestParam* param) { bool contLoop = true; SSortMergeJoinPhysiNode* pNode = createDummySortMergeJoinPhysiNode(param->joinType, param->subType, param->cond, param->filter, param->asc); - createDummyBlkList(10, 10, 10, 10, 2); + createDummyBlkList(10, 10, 10, 10, 3); while (contLoop) { rerunBlockedHere(); @@ -1803,7 +1803,7 @@ void runSingleTest(char* caseName, SJoinTestParam* param) { } void handleCaseEnd() { - taosMemoryFree(jtCtx.rightFinMatch); + taosMemoryFreeClear(jtCtx.rightFinMatch); jtCtx.rightFinMatchNum = 0; } @@ -1911,7 +1911,7 @@ TEST(leftOuterJoin, fullCondTest) { #endif #endif -#if 0 +#if 1 TEST(fullOuterJoin, noCondTest) { SJoinTestParam param; char* caseName = "fullOuterJoin:noCondTest"; @@ -1936,7 +1936,7 @@ TEST(fullOuterJoin, noCondTest) { } #endif -#if 0 +#if 1 TEST(fullOuterJoin, eqCondTest) { SJoinTestParam param; char* caseName = "fullOuterJoin:eqCondTest"; diff --git a/tests/script/tsim/join/full_join.sim b/tests/script/tsim/join/full_join.sim new file mode 100644 index 0000000000..23b198dd5d --- /dev/null +++ b/tests/script/tsim/join/full_join.sim @@ -0,0 +1,188 @@ +sql connect +sql use test0; + +sql select a.col1, b.col1 from sta a full join sta b on a.ts = b.ts and a.ts < '2023-11-17 16:29:02' and b.ts < '2023-11-17 16:29:01' order by a.col1, b.col1; +if $rows != 10 then + return -1 +endi +if $data00 != 1 then + return -1 +endi +if $data01 != 1 then + return -1 +endi +if $data10 != 1 then + return -1 +endi +if $data11 != 2 then + return -1 +endi +if $data20 != 2 then + return -1 +endi +if $data21 != 1 then + return -1 +endi +if $data30 != 2 then + return -1 +endi +if $data31 != 2 then + return -1 +endi +if $data40 != 3 then + return -1 +endi +if $data41 != NULL then + return -1 +endi +if $data50 != 3 then + return -1 +endi +if $data51 != NULL then + return -1 +endi +if $data60 != 4 then + return -1 +endi +if $data61 != NULL then + return -1 +endi +if $data70 != 5 then + return -1 +endi +if $data71 != NULL then + return -1 +endi +if $data80 != 5 then + return -1 +endi +if $data81 != NULL then + return -1 +endi +if $data90 != 7 then + return -1 +endi +if $data91 != NULL then + return -1 +endi + +sql select a.col1, b.col1 from sta a full join sta b on a.ts = b.ts where a.ts < '2023-11-17 16:29:02' and b.ts < '2023-11-17 16:29:01' order by a.col1, b.col1; +if $rows != 4 then + return -1 +endi +if $data00 != 1 then + return -1 +endi +if $data01 != 1 then + return -1 +endi +if $data10 != 1 then + return -1 +endi +if $data11 != 2 then + return -1 +endi +if $data20 != 2 then + return -1 +endi +if $data21 != 1 then + return -1 +endi +if $data30 != 2 then + return -1 +endi +if $data31 != 2 then + return -1 +endi + +sql select a.col1, b.col1 from sta a full join sta b on a.ts = b.ts; +if $rows != 12 then + return -1 +endi + +sql select a.col1, b.col1 from tba1 a full join tba2 b on a.ts = b.ts order by a.col1, b.col1; +if $rows != 4 then + return -1 +endi +if $data00 != 1 then + return -1 +endi +if $data01 != 2 then + return -1 +endi +if $data10 != 3 then + return -1 +endi +if $data11 != NULL then + return -1 +endi +if $data20 != 4 then + return -1 +endi +if $data21 != 5 then + return -1 +endi +if $data30 != 5 then + return -1 +endi +if $data31 != NULL then + return -1 +endi + +sql select a.col1, b.col1 from tba2 a full join tba1 b on a.ts = b.ts order by a.col1, b.col1; +if $rows != 4 then + return -1 +endi +if $data00 != 2 then + return -1 +endi +if $data01 != 1 then + return -1 +endi +if $data10 != 3 then + return -1 +endi +if $data11 != NULL then + return -1 +endi +if $data20 != 5 then + return -1 +endi +if $data21 != 4 then + return -1 +endi +if $data30 != 7 then + return -1 +endi +if $data31 != NULL then + return -1 +endi + +sql select a.ts, b.ts from tba1 a full join tba2 b on a.ts = b.ts and a.ts < '2023-11-17 16:29:03' and b.ts < '2023-11-17 16:29:03'; +if $rows != 4 then + return -1 +endi +if $data00 != 2 then + return -1 +endi +if $data01 != 1 then + return -1 +endi +if $data10 != 3 then + return -1 +endi +if $data11 != NULL then + return -1 +endi +if $data20 != 5 then + return -1 +endi +if $data21 != 4 then + return -1 +endi +if $data30 != 7 then + return -1 +endi +if $data31 != NULL then + return -1 +endi