Merge pull request #25444 from taosdata/enh/groupJoinOpt

enh: optimize group join performance
This commit is contained in:
dapan1121 2024-04-23 10:19:47 +08:00 committed by GitHub
commit 92a7801d09
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 18 additions and 11 deletions

View File

@ -153,6 +153,7 @@ typedef struct SJoinLogicNode {
bool seqWinGroup; bool seqWinGroup;
bool grpJoin; bool grpJoin;
bool hashJoinHint; bool hashJoinHint;
bool batchScanHint;
// FOR HASH JOIN // FOR HASH JOIN
int32_t timeRangeTarget; //table onCond filter int32_t timeRangeTarget; //table onCond filter

View File

@ -511,6 +511,7 @@ static int32_t logicJoinCopy(const SJoinLogicNode* pSrc, SJoinLogicNode* pDst) {
COPY_SCALAR_FIELD(seqWinGroup); COPY_SCALAR_FIELD(seqWinGroup);
COPY_SCALAR_FIELD(grpJoin); COPY_SCALAR_FIELD(grpJoin);
COPY_SCALAR_FIELD(hashJoinHint); COPY_SCALAR_FIELD(hashJoinHint);
COPY_SCALAR_FIELD(batchScanHint);
CLONE_NODE_FIELD(pLeftOnCond); CLONE_NODE_FIELD(pLeftOnCond);
CLONE_NODE_FIELD(pRightOnCond); CLONE_NODE_FIELD(pRightOnCond);
COPY_SCALAR_FIELD(timeRangeTarget); COPY_SCALAR_FIELD(timeRangeTarget);

View File

@ -582,6 +582,7 @@ static int32_t createJoinLogicNode(SLogicPlanContext* pCxt, SSelectStmt* pSelect
pJoin->node.inputTsOrder = ORDER_ASC; pJoin->node.inputTsOrder = ORDER_ASC;
pJoin->node.groupAction = GROUP_ACTION_CLEAR; pJoin->node.groupAction = GROUP_ACTION_CLEAR;
pJoin->hashJoinHint = getHashJoinOptHint(pSelect->pHint); pJoin->hashJoinHint = getHashJoinOptHint(pSelect->pHint);
pJoin->batchScanHint = getBatchScanOptionFromHint(pSelect->pHint);
pJoin->node.requireDataOrder = pJoin->hashJoinHint ? DATA_ORDER_LEVEL_NONE : DATA_ORDER_LEVEL_GLOBAL; pJoin->node.requireDataOrder = pJoin->hashJoinHint ? DATA_ORDER_LEVEL_NONE : DATA_ORDER_LEVEL_GLOBAL;
pJoin->node.resultDataOrder = DATA_ORDER_LEVEL_NONE; pJoin->node.resultDataOrder = DATA_ORDER_LEVEL_NONE;
pJoin->isLowLevelJoin = pJoinTable->isLowLevelJoin; pJoin->isLowLevelJoin = pJoinTable->isLowLevelJoin;

View File

@ -2751,16 +2751,20 @@ static bool partTagsIsOptimizableNode(SLogicNode* pNode) {
if (!ret) return ret; if (!ret) return ret;
switch (nodeType(pNode)) { switch (nodeType(pNode)) {
case QUERY_NODE_LOGIC_PLAN_PARTITION: { case QUERY_NODE_LOGIC_PLAN_PARTITION: {
if (pNode->pParent && nodeType(pNode->pParent) == QUERY_NODE_LOGIC_PLAN_WINDOW) { if (pNode->pParent) {
SWindowLogicNode* pWindow = (SWindowLogicNode*)pNode->pParent; if (nodeType(pNode->pParent) == QUERY_NODE_LOGIC_PLAN_WINDOW) {
if (pWindow->winType == WINDOW_TYPE_INTERVAL) { SWindowLogicNode* pWindow = (SWindowLogicNode*)pNode->pParent;
// if interval has slimit, we push down partition node to scan, and scan will set groupOrderScan to true if (pWindow->winType == WINDOW_TYPE_INTERVAL) {
// we want to skip groups of blocks after slimit satisfied // if interval has slimit, we push down partition node to scan, and scan will set groupOrderScan to true
// if interval only has limit, we do not push down partition node to scan // we want to skip groups of blocks after slimit satisfied
// we want to get grouped output from partition node and make use of limit // if interval only has limit, we do not push down partition node to scan
// if no slimit and no limit, we push down partition node and groupOrderScan is false, cause we do not need // we want to get grouped output from partition node and make use of limit
// group ordered output // if no slimit and no limit, we push down partition node and groupOrderScan is false, cause we do not need
if (!pWindow->node.pSlimit && pWindow->node.pLimit) ret = false; // group ordered output
if (!pWindow->node.pSlimit && pWindow->node.pLimit) ret = false;
}
} else if (nodeType(pNode->pParent) == QUERY_NODE_LOGIC_PLAN_JOIN) {
ret = false;
} }
} }
} break; } break;
@ -5607,7 +5611,7 @@ static int32_t grpJoinOptPartByTags(SLogicNode* pNode) {
static int32_t grpJoinOptRewriteGroupJoin(SOptimizeContext* pCxt, SLogicNode* pNode, SLogicSubplan* pLogicSubplan) { static int32_t grpJoinOptRewriteGroupJoin(SOptimizeContext* pCxt, SLogicNode* pNode, SLogicSubplan* pLogicSubplan) {
SJoinLogicNode* pJoin = (SJoinLogicNode*)pNode; SJoinLogicNode* pJoin = (SJoinLogicNode*)pNode;
int32_t code = (pJoin->allEqTags && !pJoin->hasSubQuery) ? grpJoinOptPartByTags(pNode) : grpJoinOptInsertPartitionNode(pNode); int32_t code = (pJoin->allEqTags && !pJoin->hasSubQuery && !pJoin->batchScanHint) ? grpJoinOptPartByTags(pNode) : grpJoinOptInsertPartitionNode(pNode);
if (TSDB_CODE_SUCCESS == code) { if (TSDB_CODE_SUCCESS == code) {
pJoin->grpJoin = true; pJoin->grpJoin = true;
pCxt->optimized = true; pCxt->optimized = true;