enh: outer join performance optimization

This commit is contained in:
dapan1121 2024-04-09 09:48:38 +08:00
parent 6d52120bf7
commit 8407eb3791
5 changed files with 193 additions and 17 deletions

View File

@ -238,6 +238,7 @@ bool fmIsCumulativeFunc(int32_t funcId);
bool fmIsInterpPseudoColumnFunc(int32_t funcId);
bool fmIsGroupKeyFunc(int32_t funcId);
bool fmIsBlockDistFunc(int32_t funcId);
bool fmIsIgnoreNullFunc(int32_t funcId);
bool fmIsConstantResFunc(SFunctionNode* pFunc);
bool fmIsSkipScanCheckFunc(int32_t funcId);

View File

@ -53,6 +53,8 @@ extern "C" {
#define FUNC_MGT_GEOMETRY_FUNC FUNC_MGT_FUNC_CLASSIFICATION_MASK(24)
#define FUNC_MGT_FORBID_SYSTABLE_FUNC FUNC_MGT_FUNC_CLASSIFICATION_MASK(25)
#define FUNC_MGT_SKIP_SCAN_CHECK_FUNC FUNC_MGT_FUNC_CLASSIFICATION_MASK(26)
#define FUNC_MGT_IGNORE_NULL_FUNC FUNC_MGT_FUNC_CLASSIFICATION_MASK(27)
#define FUNC_MGT_TEST_MASK(val, mask) (((val) & (mask)) != 0)

View File

@ -2391,7 +2391,7 @@ const SBuiltinFuncDefinition funcMgtBuiltins[] = {
{
.name = "count",
.type = FUNCTION_TYPE_COUNT,
.classification = FUNC_MGT_AGG_FUNC | FUNC_MGT_SPECIAL_DATA_REQUIRED,
.classification = FUNC_MGT_AGG_FUNC | FUNC_MGT_SPECIAL_DATA_REQUIRED | FUNC_MGT_IGNORE_NULL_FUNC,
.translateFunc = translateCount,
.dataRequiredFunc = countDataRequired,
.getEnvFunc = getCountFuncEnv,
@ -2409,7 +2409,7 @@ const SBuiltinFuncDefinition funcMgtBuiltins[] = {
{
.name = "sum",
.type = FUNCTION_TYPE_SUM,
.classification = FUNC_MGT_AGG_FUNC | FUNC_MGT_SPECIAL_DATA_REQUIRED,
.classification = FUNC_MGT_AGG_FUNC | FUNC_MGT_SPECIAL_DATA_REQUIRED | FUNC_MGT_IGNORE_NULL_FUNC,
.translateFunc = translateSum,
.dataRequiredFunc = statisDataRequired,
.getEnvFunc = getSumFuncEnv,
@ -2427,7 +2427,7 @@ const SBuiltinFuncDefinition funcMgtBuiltins[] = {
{
.name = "min",
.type = FUNCTION_TYPE_MIN,
.classification = FUNC_MGT_AGG_FUNC | FUNC_MGT_SPECIAL_DATA_REQUIRED | FUNC_MGT_SELECT_FUNC,
.classification = FUNC_MGT_AGG_FUNC | FUNC_MGT_SPECIAL_DATA_REQUIRED | FUNC_MGT_SELECT_FUNC | FUNC_MGT_IGNORE_NULL_FUNC,
.translateFunc = translateInOutNum,
.dataRequiredFunc = statisDataRequired,
.getEnvFunc = getMinmaxFuncEnv,
@ -2442,7 +2442,7 @@ const SBuiltinFuncDefinition funcMgtBuiltins[] = {
{
.name = "max",
.type = FUNCTION_TYPE_MAX,
.classification = FUNC_MGT_AGG_FUNC | FUNC_MGT_SPECIAL_DATA_REQUIRED | FUNC_MGT_SELECT_FUNC,
.classification = FUNC_MGT_AGG_FUNC | FUNC_MGT_SPECIAL_DATA_REQUIRED | FUNC_MGT_SELECT_FUNC | FUNC_MGT_IGNORE_NULL_FUNC,
.translateFunc = translateInOutNum,
.dataRequiredFunc = statisDataRequired,
.getEnvFunc = getMinmaxFuncEnv,
@ -2517,7 +2517,7 @@ const SBuiltinFuncDefinition funcMgtBuiltins[] = {
{
.name = "avg",
.type = FUNCTION_TYPE_AVG,
.classification = FUNC_MGT_AGG_FUNC | FUNC_MGT_SPECIAL_DATA_REQUIRED,
.classification = FUNC_MGT_AGG_FUNC | FUNC_MGT_SPECIAL_DATA_REQUIRED | FUNC_MGT_IGNORE_NULL_FUNC,
.translateFunc = translateInNumOutDou,
.dataRequiredFunc = statisDataRequired,
.getEnvFunc = getAvgFuncEnv,
@ -2536,7 +2536,7 @@ const SBuiltinFuncDefinition funcMgtBuiltins[] = {
{
.name = "_avg_partial",
.type = FUNCTION_TYPE_AVG_PARTIAL,
.classification = FUNC_MGT_AGG_FUNC,
.classification = FUNC_MGT_AGG_FUNC | FUNC_MGT_IGNORE_NULL_FUNC,
.translateFunc = translateAvgPartial,
.dataRequiredFunc = statisDataRequired,
.getEnvFunc = getAvgFuncEnv,
@ -2551,7 +2551,7 @@ const SBuiltinFuncDefinition funcMgtBuiltins[] = {
{
.name = "_avg_merge",
.type = FUNCTION_TYPE_AVG_MERGE,
.classification = FUNC_MGT_AGG_FUNC,
.classification = FUNC_MGT_AGG_FUNC | FUNC_MGT_IGNORE_NULL_FUNC,
.translateFunc = translateAvgMerge,
.getEnvFunc = getAvgFuncEnv,
.initFunc = avgFunctionSetup,
@ -2628,7 +2628,7 @@ const SBuiltinFuncDefinition funcMgtBuiltins[] = {
.name = "top",
.type = FUNCTION_TYPE_TOP,
.classification = FUNC_MGT_AGG_FUNC | FUNC_MGT_SELECT_FUNC | FUNC_MGT_MULTI_ROWS_FUNC | FUNC_MGT_KEEP_ORDER_FUNC |
FUNC_MGT_FORBID_STREAM_FUNC | FUNC_MGT_FORBID_FILL_FUNC,
FUNC_MGT_FORBID_STREAM_FUNC | FUNC_MGT_FORBID_FILL_FUNC | FUNC_MGT_IGNORE_NULL_FUNC,
.translateFunc = translateTopBot,
.getEnvFunc = getTopBotFuncEnv,
.initFunc = topBotFunctionSetup,
@ -2644,7 +2644,7 @@ const SBuiltinFuncDefinition funcMgtBuiltins[] = {
.name = "bottom",
.type = FUNCTION_TYPE_BOTTOM,
.classification = FUNC_MGT_AGG_FUNC | FUNC_MGT_SELECT_FUNC | FUNC_MGT_MULTI_ROWS_FUNC | FUNC_MGT_KEEP_ORDER_FUNC |
FUNC_MGT_FORBID_STREAM_FUNC | FUNC_MGT_FORBID_FILL_FUNC,
FUNC_MGT_FORBID_STREAM_FUNC | FUNC_MGT_FORBID_FILL_FUNC | FUNC_MGT_IGNORE_NULL_FUNC,
.translateFunc = translateTopBot,
.getEnvFunc = getTopBotFuncEnv,
.initFunc = topBotFunctionSetup,
@ -2842,7 +2842,7 @@ const SBuiltinFuncDefinition funcMgtBuiltins[] = {
{
.name = "_cache_last",
.type = FUNCTION_TYPE_CACHE_LAST,
.classification = FUNC_MGT_AGG_FUNC | FUNC_MGT_MULTI_RES_FUNC | FUNC_MGT_SELECT_FUNC | FUNC_MGT_FORBID_STREAM_FUNC | FUNC_MGT_FORBID_SYSTABLE_FUNC,
.classification = FUNC_MGT_AGG_FUNC | FUNC_MGT_MULTI_RES_FUNC | FUNC_MGT_SELECT_FUNC | FUNC_MGT_FORBID_STREAM_FUNC | FUNC_MGT_FORBID_SYSTABLE_FUNC | FUNC_MGT_IGNORE_NULL_FUNC,
.translateFunc = translateFirstLast,
.getEnvFunc = getFirstLastFuncEnv,
.initFunc = functionSetup,
@ -2876,7 +2876,7 @@ const SBuiltinFuncDefinition funcMgtBuiltins[] = {
.name = "first",
.type = FUNCTION_TYPE_FIRST,
.classification = FUNC_MGT_AGG_FUNC | FUNC_MGT_SELECT_FUNC | FUNC_MGT_MULTI_RES_FUNC | FUNC_MGT_IMPLICIT_TS_FUNC |
FUNC_MGT_KEEP_ORDER_FUNC | FUNC_MGT_FORBID_SYSTABLE_FUNC,
FUNC_MGT_KEEP_ORDER_FUNC | FUNC_MGT_FORBID_SYSTABLE_FUNC | FUNC_MGT_IGNORE_NULL_FUNC,
.translateFunc = translateFirstLast,
.dynDataRequiredFunc = firstDynDataReq,
.getEnvFunc = getFirstLastFuncEnv,
@ -2892,7 +2892,7 @@ const SBuiltinFuncDefinition funcMgtBuiltins[] = {
.name = "_first_partial",
.type = FUNCTION_TYPE_FIRST_PARTIAL,
.classification = FUNC_MGT_AGG_FUNC | FUNC_MGT_SELECT_FUNC | FUNC_MGT_MULTI_RES_FUNC | FUNC_MGT_IMPLICIT_TS_FUNC |
FUNC_MGT_FORBID_SYSTABLE_FUNC,
FUNC_MGT_FORBID_SYSTABLE_FUNC | FUNC_MGT_IGNORE_NULL_FUNC,
.translateFunc = translateFirstLastPartial,
.dynDataRequiredFunc = firstDynDataReq,
.getEnvFunc = getFirstLastFuncEnv,
@ -2905,7 +2905,7 @@ const SBuiltinFuncDefinition funcMgtBuiltins[] = {
.name = "_first_merge",
.type = FUNCTION_TYPE_FIRST_MERGE,
.classification = FUNC_MGT_AGG_FUNC | FUNC_MGT_SELECT_FUNC | FUNC_MGT_MULTI_RES_FUNC | FUNC_MGT_IMPLICIT_TS_FUNC |
FUNC_MGT_FORBID_SYSTABLE_FUNC,
FUNC_MGT_FORBID_SYSTABLE_FUNC | FUNC_MGT_IGNORE_NULL_FUNC,
.translateFunc = translateFirstLastMerge,
.getEnvFunc = getFirstLastFuncEnv,
.initFunc = functionSetup,
@ -2917,7 +2917,7 @@ const SBuiltinFuncDefinition funcMgtBuiltins[] = {
.name = "last",
.type = FUNCTION_TYPE_LAST,
.classification = FUNC_MGT_AGG_FUNC | FUNC_MGT_SELECT_FUNC | FUNC_MGT_MULTI_RES_FUNC | FUNC_MGT_IMPLICIT_TS_FUNC |
FUNC_MGT_KEEP_ORDER_FUNC | FUNC_MGT_FORBID_SYSTABLE_FUNC,
FUNC_MGT_KEEP_ORDER_FUNC | FUNC_MGT_FORBID_SYSTABLE_FUNC | FUNC_MGT_IGNORE_NULL_FUNC,
.translateFunc = translateFirstLast,
.dynDataRequiredFunc = lastDynDataReq,
.getEnvFunc = getFirstLastFuncEnv,
@ -2933,7 +2933,7 @@ const SBuiltinFuncDefinition funcMgtBuiltins[] = {
.name = "_last_partial",
.type = FUNCTION_TYPE_LAST_PARTIAL,
.classification = FUNC_MGT_AGG_FUNC | FUNC_MGT_SELECT_FUNC | FUNC_MGT_MULTI_RES_FUNC | FUNC_MGT_IMPLICIT_TS_FUNC |
FUNC_MGT_FORBID_SYSTABLE_FUNC,
FUNC_MGT_FORBID_SYSTABLE_FUNC | FUNC_MGT_IGNORE_NULL_FUNC,
.translateFunc = translateFirstLastPartial,
.dynDataRequiredFunc = lastDynDataReq,
.getEnvFunc = getFirstLastFuncEnv,
@ -2946,7 +2946,7 @@ const SBuiltinFuncDefinition funcMgtBuiltins[] = {
.name = "_last_merge",
.type = FUNCTION_TYPE_LAST_MERGE,
.classification = FUNC_MGT_AGG_FUNC | FUNC_MGT_SELECT_FUNC | FUNC_MGT_MULTI_RES_FUNC | FUNC_MGT_IMPLICIT_TS_FUNC |
FUNC_MGT_FORBID_SYSTABLE_FUNC,
FUNC_MGT_FORBID_SYSTABLE_FUNC | FUNC_MGT_IGNORE_NULL_FUNC,
.translateFunc = translateFirstLastMerge,
.getEnvFunc = getFirstLastFuncEnv,
.initFunc = functionSetup,

View File

@ -274,6 +274,8 @@ bool fmIsBlockDistFunc(int32_t funcId) {
return FUNCTION_TYPE_BLOCK_DIST == funcMgtBuiltins[funcId].type;
}
bool fmIsIgnoreNullFunc(int32_t funcId) { return isSpecificClassifyFunc(funcId, FUNC_MGT_IGNORE_NULL_FUNC); }
void fmFuncMgtDestroy() {
void* m = gFunMgtService.pFuncNameHashTable;
if (m != NULL && atomic_val_compare_exchange_ptr((void**)&gFunMgtService.pFuncNameHashTable, m, 0) == m) {

View File

@ -61,6 +61,26 @@ typedef struct SCpdIsMultiTableCondCxt {
bool condIsNull;
} SCpdIsMultiTableCondCxt;
typedef struct SCpdIsMultiTableResCxt {
SSHashObj* pLeftTbls;
SSHashObj* pRightTbls;
bool haveLeftCol;
bool haveRightCol;
bool leftColOp;
bool rightColOp;
bool leftColNonNull;
bool rightColNonNull;
} SCpdIsMultiTableResCxt;
typedef struct SCpdCollRewriteTableColsCxt {
int32_t code;
SSHashObj* pLeftTbls;
SSHashObj* pRightTbls;
SSHashObj* pLeftCols;
SSHashObj* pRightCols;
} SCpdCollRewriteTableColsCxt;
typedef struct SCpdCollectTableColCxt {
SSHashObj* pTables;
SNodeList* pResCols;
@ -1338,7 +1358,9 @@ static EDealRes pdcCheckTableCondType(SNode* pNode, void* pContext) {
}
case QUERY_NODE_OPERATOR: {
SOperatorNode* pOp = (SOperatorNode*)pNode;
pCxt->condIsNull = (OP_TYPE_IS_NULL == pOp->opType);
if (OP_TYPE_IS_NULL == pOp->opType) {
pCxt->condIsNull = true;
}
break;
}
default:
@ -1462,6 +1484,151 @@ static int32_t pdcRewriteTypeBasedOnConds(SOptimizeContext* pCxt, SJoinLogicNode
return code;
}
static EDealRes pdcCheckTableResType(SNode* pNode, void* pContext) {
SCpdIsMultiTableResCxt* pCxt = pContext;
switch (nodeType(pNode)) {
case QUERY_NODE_COLUMN: {
if (pdcJoinColInTableList(pNode, pCxt->pLeftTbls)) {
pCxt->haveLeftCol = true;
} else if (pdcJoinColInTableList(pNode, pCxt->pRightTbls)) {
pCxt->haveRightCol = true;
}
break;
}
case QUERY_NODE_VALUE:
case QUERY_NODE_GROUPING_SET:
break;
case QUERY_NODE_FUNCTION: {
SFunctionNode* pFunc = (SFunctionNode*)pNode;
SCpdIsMultiTableResCxt cxt = {.pLeftTbls = pCxt->pLeftTbls, .pRightTbls = pCxt->pRightTbls,
.haveLeftCol = false, .haveRightCol = false, .leftColNonNull = true, .rightColNonNull = true};
nodesWalkExprs(pFunc->pParameterList, pdcCheckTableResType, &cxt);
if (!cxt.leftColNonNull) {
pCxt->leftColNonNull = false;
}
if (!cxt.rightColNonNull) {
pCxt->rightColNonNull = false;
}
if (cxt.leftColOp) {
pCxt->leftColOp = true;
}
if (cxt.rightColOp) {
pCxt->rightColOp = true;
}
if (!cxt.haveLeftCol && !cxt.haveRightCol) {
pCxt->leftColNonNull = false;
pCxt->rightColNonNull = false;
return DEAL_RES_END;
} else if (!fmIsIgnoreNullFunc(pFunc->funcId)) {
if (cxt.haveLeftCol) {
pCxt->leftColNonNull = false;
}
if (cxt.haveRightCol) {
pCxt->rightColNonNull = false;
}
} else {
if (cxt.haveLeftCol) {
pCxt->leftColOp = true;
} else if (cxt.haveRightCol) {
pCxt->rightColOp = true;
}
}
if (!pCxt->leftColNonNull && !pCxt->rightColNonNull) {
return DEAL_RES_END;
}
break;
}
default:
pCxt->leftColNonNull = false;
pCxt->rightColNonNull = false;
return DEAL_RES_END;
}
return DEAL_RES_CONTINUE;
}
static int32_t pdcRewriteTypeBasedOnJoinRes(SOptimizeContext* pCxt, SJoinLogicNode* pJoin) {
if (JOIN_TYPE_INNER == pJoin->joinType || JOIN_STYPE_OUTER != pJoin->subType) {
return TSDB_CODE_SUCCESS;
}
int32_t code = 0;
SSHashObj* pLeftTables = NULL;
SSHashObj* pRightTables = NULL;
collectTableAliasFromNodes(nodesListGetNode(pJoin->node.pChildren, 0), &pLeftTables);
collectTableAliasFromNodes(nodesListGetNode(pJoin->node.pChildren, 1), &pRightTables);
SLogicNode* pParent = pJoin->node.pParent;
bool tableResNonNull[2] = {true, true};
bool tableResOp[2] = {false, false};
if (QUERY_NODE_LOGIC_PLAN_AGG == nodeType(pParent)) {
SAggLogicNode* pAgg = (SAggLogicNode*)pParent;
if (NULL != pAgg->pGroupKeys) {
tableResNonNull[0] = false;
tableResNonNull[1] = false;
} else {
SCpdIsMultiTableResCxt cxt = {.pLeftTbls = pLeftTables, .pRightTbls = pRightTables,
.haveLeftCol = false, .haveRightCol = false, .leftColNonNull = true, .rightColNonNull = true, .leftColOp = false, .rightColOp = false};
nodesWalkExprs(pAgg->pAggFuncs, pdcCheckTableResType, &cxt);
if (!cxt.leftColNonNull) {
tableResNonNull[0] = false;
}
if (!cxt.rightColNonNull) {
tableResNonNull[1] = false;
}
if (cxt.leftColOp) {
tableResOp[0] = true;
}
if (cxt.rightColOp) {
tableResOp[1] = true;
}
}
} else {
tableResNonNull[0] = false;
tableResNonNull[1] = false;
}
tSimpleHashCleanup(pLeftTables);
tSimpleHashCleanup(pRightTables);
if (TSDB_CODE_SUCCESS != code) {
return code;
}
switch (pJoin->joinType) {
case JOIN_TYPE_LEFT:
if (tableResNonNull[1] && !tableResOp[0]) {
pJoin->joinType = JOIN_TYPE_INNER;
pJoin->subType = JOIN_STYPE_NONE;
}
break;
case JOIN_TYPE_RIGHT:
if (tableResNonNull[0] && !tableResOp[1]) {
pJoin->joinType = JOIN_TYPE_INNER;
pJoin->subType = JOIN_STYPE_NONE;
}
break;
case JOIN_TYPE_FULL:
if (tableResNonNull[1] && !tableResOp[0]) {
if (tableResNonNull[0] && !tableResOp[1]) {
pJoin->joinType = JOIN_TYPE_INNER;
pJoin->subType = JOIN_STYPE_NONE;
} else {
pJoin->joinType = JOIN_TYPE_RIGHT;
}
} else if (tableResNonNull[0] && !tableResOp[1]) {
pJoin->joinType = JOIN_TYPE_LEFT;
}
break;
default:
break;
}
return TSDB_CODE_SUCCESS;
}
static int32_t pdcDealJoin(SOptimizeContext* pCxt, SJoinLogicNode* pJoin) {
if (OPTIMIZE_FLAG_TEST_MASK(pJoin->node.optimizedFlag, OPTIMIZE_FLAG_PUSH_DOWN_CONDE)) {
return TSDB_CODE_SUCCESS;
@ -1495,6 +1662,10 @@ static int32_t pdcDealJoin(SOptimizeContext* pCxt, SJoinLogicNode* pJoin) {
}
}
if (TSDB_CODE_SUCCESS == code) {
code = pdcRewriteTypeBasedOnJoinRes(pCxt, pJoin);
}
if (TSDB_CODE_SUCCESS != code || t == pJoin->joinType) {
break;
}