From 8407eb3791def9d4ff83e5c2bef1df1433e8620a Mon Sep 17 00:00:00 2001 From: dapan1121 Date: Tue, 9 Apr 2024 09:48:38 +0800 Subject: [PATCH] enh: outer join performance optimization --- include/libs/function/functionMgt.h | 1 + source/libs/function/inc/functionMgtInt.h | 2 + source/libs/function/src/builtins.c | 32 ++-- source/libs/function/src/functionMgt.c | 2 + source/libs/planner/src/planOptimizer.c | 173 +++++++++++++++++++++- 5 files changed, 193 insertions(+), 17 deletions(-) diff --git a/include/libs/function/functionMgt.h b/include/libs/function/functionMgt.h index 3836c631d5..5cd4edc589 100644 --- a/include/libs/function/functionMgt.h +++ b/include/libs/function/functionMgt.h @@ -238,6 +238,7 @@ bool fmIsCumulativeFunc(int32_t funcId); bool fmIsInterpPseudoColumnFunc(int32_t funcId); bool fmIsGroupKeyFunc(int32_t funcId); bool fmIsBlockDistFunc(int32_t funcId); +bool fmIsIgnoreNullFunc(int32_t funcId); bool fmIsConstantResFunc(SFunctionNode* pFunc); bool fmIsSkipScanCheckFunc(int32_t funcId); diff --git a/source/libs/function/inc/functionMgtInt.h b/source/libs/function/inc/functionMgtInt.h index 30bd38e7ba..8c127effcd 100644 --- a/source/libs/function/inc/functionMgtInt.h +++ b/source/libs/function/inc/functionMgtInt.h @@ -53,6 +53,8 @@ extern "C" { #define FUNC_MGT_GEOMETRY_FUNC FUNC_MGT_FUNC_CLASSIFICATION_MASK(24) #define FUNC_MGT_FORBID_SYSTABLE_FUNC FUNC_MGT_FUNC_CLASSIFICATION_MASK(25) #define FUNC_MGT_SKIP_SCAN_CHECK_FUNC FUNC_MGT_FUNC_CLASSIFICATION_MASK(26) +#define FUNC_MGT_IGNORE_NULL_FUNC FUNC_MGT_FUNC_CLASSIFICATION_MASK(27) + #define FUNC_MGT_TEST_MASK(val, mask) (((val) & (mask)) != 0) diff --git a/source/libs/function/src/builtins.c b/source/libs/function/src/builtins.c index df9c72dcc1..23b42096b1 100644 --- a/source/libs/function/src/builtins.c +++ b/source/libs/function/src/builtins.c @@ -2391,7 +2391,7 @@ const SBuiltinFuncDefinition funcMgtBuiltins[] = { { .name = "count", .type = FUNCTION_TYPE_COUNT, - .classification = FUNC_MGT_AGG_FUNC | FUNC_MGT_SPECIAL_DATA_REQUIRED, + .classification = FUNC_MGT_AGG_FUNC | FUNC_MGT_SPECIAL_DATA_REQUIRED | FUNC_MGT_IGNORE_NULL_FUNC, .translateFunc = translateCount, .dataRequiredFunc = countDataRequired, .getEnvFunc = getCountFuncEnv, @@ -2409,7 +2409,7 @@ const SBuiltinFuncDefinition funcMgtBuiltins[] = { { .name = "sum", .type = FUNCTION_TYPE_SUM, - .classification = FUNC_MGT_AGG_FUNC | FUNC_MGT_SPECIAL_DATA_REQUIRED, + .classification = FUNC_MGT_AGG_FUNC | FUNC_MGT_SPECIAL_DATA_REQUIRED | FUNC_MGT_IGNORE_NULL_FUNC, .translateFunc = translateSum, .dataRequiredFunc = statisDataRequired, .getEnvFunc = getSumFuncEnv, @@ -2427,7 +2427,7 @@ const SBuiltinFuncDefinition funcMgtBuiltins[] = { { .name = "min", .type = FUNCTION_TYPE_MIN, - .classification = FUNC_MGT_AGG_FUNC | FUNC_MGT_SPECIAL_DATA_REQUIRED | FUNC_MGT_SELECT_FUNC, + .classification = FUNC_MGT_AGG_FUNC | FUNC_MGT_SPECIAL_DATA_REQUIRED | FUNC_MGT_SELECT_FUNC | FUNC_MGT_IGNORE_NULL_FUNC, .translateFunc = translateInOutNum, .dataRequiredFunc = statisDataRequired, .getEnvFunc = getMinmaxFuncEnv, @@ -2442,7 +2442,7 @@ const SBuiltinFuncDefinition funcMgtBuiltins[] = { { .name = "max", .type = FUNCTION_TYPE_MAX, - .classification = FUNC_MGT_AGG_FUNC | FUNC_MGT_SPECIAL_DATA_REQUIRED | FUNC_MGT_SELECT_FUNC, + .classification = FUNC_MGT_AGG_FUNC | FUNC_MGT_SPECIAL_DATA_REQUIRED | FUNC_MGT_SELECT_FUNC | FUNC_MGT_IGNORE_NULL_FUNC, .translateFunc = translateInOutNum, .dataRequiredFunc = statisDataRequired, .getEnvFunc = getMinmaxFuncEnv, @@ -2517,7 +2517,7 @@ const SBuiltinFuncDefinition funcMgtBuiltins[] = { { .name = "avg", .type = FUNCTION_TYPE_AVG, - .classification = FUNC_MGT_AGG_FUNC | FUNC_MGT_SPECIAL_DATA_REQUIRED, + .classification = FUNC_MGT_AGG_FUNC | FUNC_MGT_SPECIAL_DATA_REQUIRED | FUNC_MGT_IGNORE_NULL_FUNC, .translateFunc = translateInNumOutDou, .dataRequiredFunc = statisDataRequired, .getEnvFunc = getAvgFuncEnv, @@ -2536,7 +2536,7 @@ const SBuiltinFuncDefinition funcMgtBuiltins[] = { { .name = "_avg_partial", .type = FUNCTION_TYPE_AVG_PARTIAL, - .classification = FUNC_MGT_AGG_FUNC, + .classification = FUNC_MGT_AGG_FUNC | FUNC_MGT_IGNORE_NULL_FUNC, .translateFunc = translateAvgPartial, .dataRequiredFunc = statisDataRequired, .getEnvFunc = getAvgFuncEnv, @@ -2551,7 +2551,7 @@ const SBuiltinFuncDefinition funcMgtBuiltins[] = { { .name = "_avg_merge", .type = FUNCTION_TYPE_AVG_MERGE, - .classification = FUNC_MGT_AGG_FUNC, + .classification = FUNC_MGT_AGG_FUNC | FUNC_MGT_IGNORE_NULL_FUNC, .translateFunc = translateAvgMerge, .getEnvFunc = getAvgFuncEnv, .initFunc = avgFunctionSetup, @@ -2628,7 +2628,7 @@ const SBuiltinFuncDefinition funcMgtBuiltins[] = { .name = "top", .type = FUNCTION_TYPE_TOP, .classification = FUNC_MGT_AGG_FUNC | FUNC_MGT_SELECT_FUNC | FUNC_MGT_MULTI_ROWS_FUNC | FUNC_MGT_KEEP_ORDER_FUNC | - FUNC_MGT_FORBID_STREAM_FUNC | FUNC_MGT_FORBID_FILL_FUNC, + FUNC_MGT_FORBID_STREAM_FUNC | FUNC_MGT_FORBID_FILL_FUNC | FUNC_MGT_IGNORE_NULL_FUNC, .translateFunc = translateTopBot, .getEnvFunc = getTopBotFuncEnv, .initFunc = topBotFunctionSetup, @@ -2644,7 +2644,7 @@ const SBuiltinFuncDefinition funcMgtBuiltins[] = { .name = "bottom", .type = FUNCTION_TYPE_BOTTOM, .classification = FUNC_MGT_AGG_FUNC | FUNC_MGT_SELECT_FUNC | FUNC_MGT_MULTI_ROWS_FUNC | FUNC_MGT_KEEP_ORDER_FUNC | - FUNC_MGT_FORBID_STREAM_FUNC | FUNC_MGT_FORBID_FILL_FUNC, + FUNC_MGT_FORBID_STREAM_FUNC | FUNC_MGT_FORBID_FILL_FUNC | FUNC_MGT_IGNORE_NULL_FUNC, .translateFunc = translateTopBot, .getEnvFunc = getTopBotFuncEnv, .initFunc = topBotFunctionSetup, @@ -2842,7 +2842,7 @@ const SBuiltinFuncDefinition funcMgtBuiltins[] = { { .name = "_cache_last", .type = FUNCTION_TYPE_CACHE_LAST, - .classification = FUNC_MGT_AGG_FUNC | FUNC_MGT_MULTI_RES_FUNC | FUNC_MGT_SELECT_FUNC | FUNC_MGT_FORBID_STREAM_FUNC | FUNC_MGT_FORBID_SYSTABLE_FUNC, + .classification = FUNC_MGT_AGG_FUNC | FUNC_MGT_MULTI_RES_FUNC | FUNC_MGT_SELECT_FUNC | FUNC_MGT_FORBID_STREAM_FUNC | FUNC_MGT_FORBID_SYSTABLE_FUNC | FUNC_MGT_IGNORE_NULL_FUNC, .translateFunc = translateFirstLast, .getEnvFunc = getFirstLastFuncEnv, .initFunc = functionSetup, @@ -2876,7 +2876,7 @@ const SBuiltinFuncDefinition funcMgtBuiltins[] = { .name = "first", .type = FUNCTION_TYPE_FIRST, .classification = FUNC_MGT_AGG_FUNC | FUNC_MGT_SELECT_FUNC | FUNC_MGT_MULTI_RES_FUNC | FUNC_MGT_IMPLICIT_TS_FUNC | - FUNC_MGT_KEEP_ORDER_FUNC | FUNC_MGT_FORBID_SYSTABLE_FUNC, + FUNC_MGT_KEEP_ORDER_FUNC | FUNC_MGT_FORBID_SYSTABLE_FUNC | FUNC_MGT_IGNORE_NULL_FUNC, .translateFunc = translateFirstLast, .dynDataRequiredFunc = firstDynDataReq, .getEnvFunc = getFirstLastFuncEnv, @@ -2892,7 +2892,7 @@ const SBuiltinFuncDefinition funcMgtBuiltins[] = { .name = "_first_partial", .type = FUNCTION_TYPE_FIRST_PARTIAL, .classification = FUNC_MGT_AGG_FUNC | FUNC_MGT_SELECT_FUNC | FUNC_MGT_MULTI_RES_FUNC | FUNC_MGT_IMPLICIT_TS_FUNC | - FUNC_MGT_FORBID_SYSTABLE_FUNC, + FUNC_MGT_FORBID_SYSTABLE_FUNC | FUNC_MGT_IGNORE_NULL_FUNC, .translateFunc = translateFirstLastPartial, .dynDataRequiredFunc = firstDynDataReq, .getEnvFunc = getFirstLastFuncEnv, @@ -2905,7 +2905,7 @@ const SBuiltinFuncDefinition funcMgtBuiltins[] = { .name = "_first_merge", .type = FUNCTION_TYPE_FIRST_MERGE, .classification = FUNC_MGT_AGG_FUNC | FUNC_MGT_SELECT_FUNC | FUNC_MGT_MULTI_RES_FUNC | FUNC_MGT_IMPLICIT_TS_FUNC | - FUNC_MGT_FORBID_SYSTABLE_FUNC, + FUNC_MGT_FORBID_SYSTABLE_FUNC | FUNC_MGT_IGNORE_NULL_FUNC, .translateFunc = translateFirstLastMerge, .getEnvFunc = getFirstLastFuncEnv, .initFunc = functionSetup, @@ -2917,7 +2917,7 @@ const SBuiltinFuncDefinition funcMgtBuiltins[] = { .name = "last", .type = FUNCTION_TYPE_LAST, .classification = FUNC_MGT_AGG_FUNC | FUNC_MGT_SELECT_FUNC | FUNC_MGT_MULTI_RES_FUNC | FUNC_MGT_IMPLICIT_TS_FUNC | - FUNC_MGT_KEEP_ORDER_FUNC | FUNC_MGT_FORBID_SYSTABLE_FUNC, + FUNC_MGT_KEEP_ORDER_FUNC | FUNC_MGT_FORBID_SYSTABLE_FUNC | FUNC_MGT_IGNORE_NULL_FUNC, .translateFunc = translateFirstLast, .dynDataRequiredFunc = lastDynDataReq, .getEnvFunc = getFirstLastFuncEnv, @@ -2933,7 +2933,7 @@ const SBuiltinFuncDefinition funcMgtBuiltins[] = { .name = "_last_partial", .type = FUNCTION_TYPE_LAST_PARTIAL, .classification = FUNC_MGT_AGG_FUNC | FUNC_MGT_SELECT_FUNC | FUNC_MGT_MULTI_RES_FUNC | FUNC_MGT_IMPLICIT_TS_FUNC | - FUNC_MGT_FORBID_SYSTABLE_FUNC, + FUNC_MGT_FORBID_SYSTABLE_FUNC | FUNC_MGT_IGNORE_NULL_FUNC, .translateFunc = translateFirstLastPartial, .dynDataRequiredFunc = lastDynDataReq, .getEnvFunc = getFirstLastFuncEnv, @@ -2946,7 +2946,7 @@ const SBuiltinFuncDefinition funcMgtBuiltins[] = { .name = "_last_merge", .type = FUNCTION_TYPE_LAST_MERGE, .classification = FUNC_MGT_AGG_FUNC | FUNC_MGT_SELECT_FUNC | FUNC_MGT_MULTI_RES_FUNC | FUNC_MGT_IMPLICIT_TS_FUNC | - FUNC_MGT_FORBID_SYSTABLE_FUNC, + FUNC_MGT_FORBID_SYSTABLE_FUNC | FUNC_MGT_IGNORE_NULL_FUNC, .translateFunc = translateFirstLastMerge, .getEnvFunc = getFirstLastFuncEnv, .initFunc = functionSetup, diff --git a/source/libs/function/src/functionMgt.c b/source/libs/function/src/functionMgt.c index 068d1532c0..8a19696f3c 100644 --- a/source/libs/function/src/functionMgt.c +++ b/source/libs/function/src/functionMgt.c @@ -274,6 +274,8 @@ bool fmIsBlockDistFunc(int32_t funcId) { return FUNCTION_TYPE_BLOCK_DIST == funcMgtBuiltins[funcId].type; } +bool fmIsIgnoreNullFunc(int32_t funcId) { return isSpecificClassifyFunc(funcId, FUNC_MGT_IGNORE_NULL_FUNC); } + void fmFuncMgtDestroy() { void* m = gFunMgtService.pFuncNameHashTable; if (m != NULL && atomic_val_compare_exchange_ptr((void**)&gFunMgtService.pFuncNameHashTable, m, 0) == m) { diff --git a/source/libs/planner/src/planOptimizer.c b/source/libs/planner/src/planOptimizer.c index d0d71d3fe8..7892e66a44 100644 --- a/source/libs/planner/src/planOptimizer.c +++ b/source/libs/planner/src/planOptimizer.c @@ -61,6 +61,26 @@ typedef struct SCpdIsMultiTableCondCxt { bool condIsNull; } SCpdIsMultiTableCondCxt; +typedef struct SCpdIsMultiTableResCxt { + SSHashObj* pLeftTbls; + SSHashObj* pRightTbls; + bool haveLeftCol; + bool haveRightCol; + bool leftColOp; + bool rightColOp; + bool leftColNonNull; + bool rightColNonNull; +} SCpdIsMultiTableResCxt; + +typedef struct SCpdCollRewriteTableColsCxt { + int32_t code; + SSHashObj* pLeftTbls; + SSHashObj* pRightTbls; + SSHashObj* pLeftCols; + SSHashObj* pRightCols; +} SCpdCollRewriteTableColsCxt; + + typedef struct SCpdCollectTableColCxt { SSHashObj* pTables; SNodeList* pResCols; @@ -1338,7 +1358,9 @@ static EDealRes pdcCheckTableCondType(SNode* pNode, void* pContext) { } case QUERY_NODE_OPERATOR: { SOperatorNode* pOp = (SOperatorNode*)pNode; - pCxt->condIsNull = (OP_TYPE_IS_NULL == pOp->opType); + if (OP_TYPE_IS_NULL == pOp->opType) { + pCxt->condIsNull = true; + } break; } default: @@ -1462,6 +1484,151 @@ static int32_t pdcRewriteTypeBasedOnConds(SOptimizeContext* pCxt, SJoinLogicNode return code; } +static EDealRes pdcCheckTableResType(SNode* pNode, void* pContext) { + SCpdIsMultiTableResCxt* pCxt = pContext; + switch (nodeType(pNode)) { + case QUERY_NODE_COLUMN: { + if (pdcJoinColInTableList(pNode, pCxt->pLeftTbls)) { + pCxt->haveLeftCol = true; + } else if (pdcJoinColInTableList(pNode, pCxt->pRightTbls)) { + pCxt->haveRightCol = true; + } + break; + } + case QUERY_NODE_VALUE: + case QUERY_NODE_GROUPING_SET: + break; + case QUERY_NODE_FUNCTION: { + SFunctionNode* pFunc = (SFunctionNode*)pNode; + SCpdIsMultiTableResCxt cxt = {.pLeftTbls = pCxt->pLeftTbls, .pRightTbls = pCxt->pRightTbls, + .haveLeftCol = false, .haveRightCol = false, .leftColNonNull = true, .rightColNonNull = true}; + + nodesWalkExprs(pFunc->pParameterList, pdcCheckTableResType, &cxt); + if (!cxt.leftColNonNull) { + pCxt->leftColNonNull = false; + } + if (!cxt.rightColNonNull) { + pCxt->rightColNonNull = false; + } + if (cxt.leftColOp) { + pCxt->leftColOp = true; + } + if (cxt.rightColOp) { + pCxt->rightColOp = true; + } + if (!cxt.haveLeftCol && !cxt.haveRightCol) { + pCxt->leftColNonNull = false; + pCxt->rightColNonNull = false; + return DEAL_RES_END; + } else if (!fmIsIgnoreNullFunc(pFunc->funcId)) { + if (cxt.haveLeftCol) { + pCxt->leftColNonNull = false; + } + if (cxt.haveRightCol) { + pCxt->rightColNonNull = false; + } + } else { + if (cxt.haveLeftCol) { + pCxt->leftColOp = true; + } else if (cxt.haveRightCol) { + pCxt->rightColOp = true; + } + } + if (!pCxt->leftColNonNull && !pCxt->rightColNonNull) { + return DEAL_RES_END; + } + break; + } + default: + pCxt->leftColNonNull = false; + pCxt->rightColNonNull = false; + return DEAL_RES_END; + } + + return DEAL_RES_CONTINUE; +} + +static int32_t pdcRewriteTypeBasedOnJoinRes(SOptimizeContext* pCxt, SJoinLogicNode* pJoin) { + if (JOIN_TYPE_INNER == pJoin->joinType || JOIN_STYPE_OUTER != pJoin->subType) { + return TSDB_CODE_SUCCESS; + } + + int32_t code = 0; + SSHashObj* pLeftTables = NULL; + SSHashObj* pRightTables = NULL; + collectTableAliasFromNodes(nodesListGetNode(pJoin->node.pChildren, 0), &pLeftTables); + collectTableAliasFromNodes(nodesListGetNode(pJoin->node.pChildren, 1), &pRightTables); + + SLogicNode* pParent = pJoin->node.pParent; + bool tableResNonNull[2] = {true, true}; + bool tableResOp[2] = {false, false}; + if (QUERY_NODE_LOGIC_PLAN_AGG == nodeType(pParent)) { + SAggLogicNode* pAgg = (SAggLogicNode*)pParent; + if (NULL != pAgg->pGroupKeys) { + tableResNonNull[0] = false; + tableResNonNull[1] = false; + } else { + SCpdIsMultiTableResCxt cxt = {.pLeftTbls = pLeftTables, .pRightTbls = pRightTables, + .haveLeftCol = false, .haveRightCol = false, .leftColNonNull = true, .rightColNonNull = true, .leftColOp = false, .rightColOp = false}; + + nodesWalkExprs(pAgg->pAggFuncs, pdcCheckTableResType, &cxt); + if (!cxt.leftColNonNull) { + tableResNonNull[0] = false; + } + if (!cxt.rightColNonNull) { + tableResNonNull[1] = false; + } + if (cxt.leftColOp) { + tableResOp[0] = true; + } + if (cxt.rightColOp) { + tableResOp[1] = true; + } + } + } else { + tableResNonNull[0] = false; + tableResNonNull[1] = false; + } + + tSimpleHashCleanup(pLeftTables); + tSimpleHashCleanup(pRightTables); + + if (TSDB_CODE_SUCCESS != code) { + return code; + } + + switch (pJoin->joinType) { + case JOIN_TYPE_LEFT: + if (tableResNonNull[1] && !tableResOp[0]) { + pJoin->joinType = JOIN_TYPE_INNER; + pJoin->subType = JOIN_STYPE_NONE; + } + break; + case JOIN_TYPE_RIGHT: + if (tableResNonNull[0] && !tableResOp[1]) { + pJoin->joinType = JOIN_TYPE_INNER; + pJoin->subType = JOIN_STYPE_NONE; + } + break; + case JOIN_TYPE_FULL: + if (tableResNonNull[1] && !tableResOp[0]) { + if (tableResNonNull[0] && !tableResOp[1]) { + pJoin->joinType = JOIN_TYPE_INNER; + pJoin->subType = JOIN_STYPE_NONE; + } else { + pJoin->joinType = JOIN_TYPE_RIGHT; + } + } else if (tableResNonNull[0] && !tableResOp[1]) { + pJoin->joinType = JOIN_TYPE_LEFT; + } + break; + default: + break; + } + + return TSDB_CODE_SUCCESS; +} + static int32_t pdcDealJoin(SOptimizeContext* pCxt, SJoinLogicNode* pJoin) { if (OPTIMIZE_FLAG_TEST_MASK(pJoin->node.optimizedFlag, OPTIMIZE_FLAG_PUSH_DOWN_CONDE)) { return TSDB_CODE_SUCCESS; @@ -1495,6 +1662,10 @@ static int32_t pdcDealJoin(SOptimizeContext* pCxt, SJoinLogicNode* pJoin) { } } + if (TSDB_CODE_SUCCESS == code) { + code = pdcRewriteTypeBasedOnJoinRes(pCxt, pJoin); + } + if (TSDB_CODE_SUCCESS != code || t == pJoin->joinType) { break; }