diff --git a/include/libs/function/functionMgt.h b/include/libs/function/functionMgt.h index 569c16675d..6968f1712c 100644 --- a/include/libs/function/functionMgt.h +++ b/include/libs/function/functionMgt.h @@ -129,6 +129,7 @@ typedef enum EFunctionType { FUNCTION_TYPE_TO_COLUMN, FUNCTION_TYPE_GROUP_KEY, FUNCTION_TYPE_CACHE_LAST_ROW, + FUNCTION_TYPE_CACHE_LAST, // distributed splitting functions FUNCTION_TYPE_APERCENTILE_PARTIAL = 4000, @@ -216,6 +217,8 @@ bool fmIsKeepOrderFunc(int32_t funcId); bool fmIsCumulativeFunc(int32_t funcId); bool fmIsInterpPseudoColumnFunc(int32_t funcId); +void getLastCacheDataType(SDataType* pType); + int32_t fmGetDistMethod(const SFunctionNode* pFunc, SFunctionNode** pPartialFunc, SFunctionNode** pMergeFunc); typedef enum EFuncDataRequired { diff --git a/source/libs/function/src/builtins.c b/source/libs/function/src/builtins.c index 54e8c27d5b..f03d8354fa 100644 --- a/source/libs/function/src/builtins.c +++ b/source/libs/function/src/builtins.c @@ -2405,6 +2405,16 @@ const SBuiltinFuncDefinition funcMgtBuiltins[] = { .processFunc = lastFunctionMerge, .finalizeFunc = firstLastFinalize }, + { + .name = "_cache_last", + .type = FUNCTION_TYPE_CACHE_LAST, + .classification = FUNC_MGT_AGG_FUNC | FUNC_MGT_MULTI_RES_FUNC | FUNC_MGT_FORBID_STREAM_FUNC, + .translateFunc = translateFirstLast, + .getEnvFunc = getFirstLastFuncEnv, + .initFunc = functionSetup, + .processFunc = lastFunctionMerge, + .finalizeFunc = firstLastFinalize + }, { .name = "_last_row_partial", .type = FUNCTION_TYPE_LAST_PARTIAL, diff --git a/source/libs/function/src/functionMgt.c b/source/libs/function/src/functionMgt.c index ca8ddbc60a..40af7bb567 100644 --- a/source/libs/function/src/functionMgt.c +++ b/source/libs/function/src/functionMgt.c @@ -16,6 +16,7 @@ #include "functionMgt.h" #include "builtins.h" +#include "builtinsimpl.h" #include "functionMgtInt.h" #include "taos.h" #include "taoserror.h" @@ -314,6 +315,11 @@ bool fmIsSameInOutType(int32_t funcId) { return res; } +void getLastCacheDataType(SDataType* pType) { + pType->bytes = getFirstLastInfoSize(pType->bytes) + VARSTR_HEADER_SIZE; + pType->type = TSDB_DATA_TYPE_BINARY; +} + static int32_t getFuncInfo(SFunctionNode* pFunc) { char msg[128] = {0}; return fmGetFuncInfo(pFunc, msg, sizeof(msg)); diff --git a/source/libs/planner/src/planOptimizer.c b/source/libs/planner/src/planOptimizer.c index 9a2cfa3339..0fc1be8a70 100644 --- a/source/libs/planner/src/planOptimizer.c +++ b/source/libs/planner/src/planOptimizer.c @@ -2120,6 +2120,22 @@ static int32_t rewriteUniqueOptimize(SOptimizeContext* pCxt, SLogicSubplan* pLog return rewriteUniqueOptimizeImpl(pCxt, pLogicSubplan, pIndef); } +static EDealRes lastRowScanOptHasTagImpl(SNode* pNode, void* pContext) { + if (QUERY_NODE_COLUMN == nodeType(pNode)) { + if (COLUMN_TYPE_TAG == ((SColumnNode*)pNode)->colType || COLUMN_TYPE_TBNAME == ((SColumnNode*)pNode)->colType) { + *(bool*)pContext = true; + return DEAL_RES_END; + } + } + return DEAL_RES_CONTINUE; +} + +static bool lastRowScanOptHasTag(SNode* pExpr) { + bool hasTag = false; + nodesWalkExpr(pExpr, lastRowScanOptHasTagImpl, &hasTag); + return hasTag; +} + static bool lastRowScanOptMayBeOptimized(SLogicNode* pNode) { if (QUERY_NODE_LOGIC_PLAN_AGG != nodeType(pNode) || 1 != LIST_LENGTH(pNode->pChildren) || QUERY_NODE_LOGIC_PLAN_SCAN != nodeType(nodesListGetNode(pNode->pChildren, 0))) { @@ -2134,12 +2150,22 @@ static bool lastRowScanOptMayBeOptimized(SLogicNode* pNode) { return false; } + bool hasLastFunc = false; + bool hasSelectFunc = false; SNode* pFunc = NULL; FOREACH(pFunc, ((SAggLogicNode*)pNode)->pAggFuncs) { - if (FUNCTION_TYPE_LAST_ROW != ((SFunctionNode*)pFunc)->funcType && - FUNCTION_TYPE_LAST != ((SFunctionNode*)pFunc)->funcType && - FUNCTION_TYPE_SELECT_VALUE != ((SFunctionNode*)pFunc)->funcType && - FUNCTION_TYPE_GROUP_KEY != ((SFunctionNode*)pFunc)->funcType) { + SFunctionNode* pAggFunc = (SFunctionNode*)pFunc; + if (FUNCTION_TYPE_LAST == pAggFunc->funcType) { + if (hasSelectFunc || lastRowScanOptHasTag(nodesListGetNode(pAggFunc->pParameterList, 0))) { + return false; + } + hasLastFunc = true; + } else if (FUNCTION_TYPE_SELECT_VALUE == pAggFunc->funcType || FUNCTION_TYPE_GROUP_KEY == pAggFunc->funcType) { + if (hasLastFunc) { + return false; + } + hasSelectFunc = true; + } else if (FUNCTION_TYPE_LAST_ROW != pAggFunc->funcType) { return false; } } @@ -2147,6 +2173,31 @@ static bool lastRowScanOptMayBeOptimized(SLogicNode* pNode) { return true; } +typedef struct SLastRowScanOptSetColDataTypeCxt { + bool doAgg; + SNodeList* pLastCols; +} SLastRowScanOptSetColDataTypeCxt; + +static EDealRes lastRowScanOptSetColDataType(SNode* pNode, void* pContext) { + if (QUERY_NODE_COLUMN == nodeType(pNode)) { + SLastRowScanOptSetColDataTypeCxt* pCxt = pContext; + if (pCxt->doAgg) { + nodesListMakeAppend(&pCxt->pLastCols, pNode); + getLastCacheDataType(&(((SColumnNode*)pNode)->node.resType)); + } else { + SNode* pCol = NULL; + FOREACH(pCol, pCxt->pLastCols) { + if (nodesEqualNode(pCol, pNode)) { + getLastCacheDataType(&(((SColumnNode*)pNode)->node.resType)); + break; + } + } + } + return DEAL_RES_IGNORE_CHILD; + } + return DEAL_RES_CONTINUE; +} + static int32_t lastRowScanOptimize(SOptimizeContext* pCxt, SLogicSubplan* pLogicSubplan) { SAggLogicNode* pAgg = (SAggLogicNode*)optFindPossibleNode(pLogicSubplan->pNode, lastRowScanOptMayBeOptimized); @@ -2154,22 +2205,35 @@ static int32_t lastRowScanOptimize(SOptimizeContext* pCxt, SLogicSubplan* pLogic return TSDB_CODE_SUCCESS; } - SNode* pNode = NULL; + SLastRowScanOptSetColDataTypeCxt cxt = {.doAgg = true, .pLastCols = NULL}; + SNode* pNode = NULL; FOREACH(pNode, pAgg->pAggFuncs) { SFunctionNode* pFunc = (SFunctionNode*)pNode; - if (FUNCTION_TYPE_LAST_ROW == pFunc->funcType || FUNCTION_TYPE_LAST == pFunc->funcType) { - int32_t len = snprintf(pFunc->functionName, sizeof(pFunc->functionName), "_cache_last_row"); + int32_t funcType = pFunc->funcType; + if (FUNCTION_TYPE_LAST_ROW == funcType || FUNCTION_TYPE_LAST == funcType) { + int32_t len = snprintf(pFunc->functionName, sizeof(pFunc->functionName), + FUNCTION_TYPE_LAST_ROW == funcType ? "_cache_last_row" : "_cache_last"); pFunc->functionName[len] = '\0'; int32_t code = fmGetFuncInfo(pFunc, NULL, 0); if (TSDB_CODE_SUCCESS != code) { + nodesClearList(cxt.pLastCols); return code; } + if (FUNCTION_TYPE_LAST == funcType) { + nodesWalkExpr(nodesListGetNode(pFunc->pParameterList, 0), lastRowScanOptSetColDataType, &cxt); + } } } SScanLogicNode* pScan = (SScanLogicNode*)nodesListGetNode(pAgg->node.pChildren, 0); pScan->scanType = SCAN_TYPE_LAST_ROW; pScan->igLastNull = pAgg->hasLast ? true : false; + if (NULL != cxt.pLastCols) { + cxt.doAgg = false; + nodesWalkExprs(pScan->pScanCols, lastRowScanOptSetColDataType, &cxt); + nodesWalkExprs(pScan->pScanPseudoCols, lastRowScanOptSetColDataType, &cxt); + nodesClearList(cxt.pLastCols); + } pAgg->hasLastRow = false; pAgg->hasLast = false; diff --git a/source/libs/planner/test/planBasicTest.cpp b/source/libs/planner/test/planBasicTest.cpp index 0baec147a2..c5a9a447c7 100644 --- a/source/libs/planner/test/planBasicTest.cpp +++ b/source/libs/planner/test/planBasicTest.cpp @@ -105,24 +105,6 @@ TEST_F(PlanBasicTest, interpFunc) { run("SELECT _IROWTS, INTERP(c1) FROM t1 RANGE('2017-7-14 18:00:00', '2017-7-14 19:00:00') EVERY(5s) FILL(LINEAR)"); } -TEST_F(PlanBasicTest, lastRowFunc) { - useDb("root", "cache_db"); - - run("SELECT LAST_ROW(c1) FROM t1"); - - run("SELECT LAST_ROW(*) FROM t1"); - - run("SELECT LAST_ROW(c1, c2) FROM t1"); - - run("SELECT LAST_ROW(c1), c2 FROM t1"); - - run("SELECT LAST_ROW(c1) FROM st1"); - - run("SELECT LAST_ROW(c1) FROM st1 PARTITION BY TBNAME"); - - run("SELECT LAST_ROW(c1), SUM(c3) FROM t1"); -} - TEST_F(PlanBasicTest, lastRowFuncWithoutCache) { useDb("root", "test"); diff --git a/source/libs/planner/test/planOptimizeTest.cpp b/source/libs/planner/test/planOptimizeTest.cpp index c2a0aee847..fb4f32a9bf 100644 --- a/source/libs/planner/test/planOptimizeTest.cpp +++ b/source/libs/planner/test/planOptimizeTest.cpp @@ -109,9 +109,28 @@ TEST_F(PlanOptimizeTest, mergeProjects) { TEST_F(PlanOptimizeTest, pushDownProjectCond) { useDb("root", "test"); + run("select 1-abs(c1) from (select unique(c1) c1 from st1s3) where 1-c1>5 order by 1 nulls first"); } +TEST_F(PlanOptimizeTest, LastRowScan) { + useDb("root", "cache_db"); + + run("SELECT LAST_ROW(c1), c2 FROM t1"); + + run("SELECT LAST_ROW(c1), c2, tag1, tbname FROM st1"); + + run("SELECT LAST_ROW(c1) FROM st1 PARTITION BY TBNAME"); + + run("SELECT LAST_ROW(c1), SUM(c3) FROM t1"); + + run("SELECT LAST_ROW(tag1) FROM st1"); + + run("SELECT LAST(c1) FROM st1"); + + run("SELECT LAST(c1), c2 FROM st1"); +} + TEST_F(PlanOptimizeTest, tagScan) { useDb("root", "test"); run("select tag1 from st1 group by tag1");