From 5d58c27f56e11b2dc00b3b3a84157e894292be4f Mon Sep 17 00:00:00 2001 From: dapan1121 Date: Mon, 22 Jan 2024 14:03:11 +0800 Subject: [PATCH] fix: join subquery timestamp order mis-match issue --- source/libs/planner/src/planOptimizer.c | 128 ++++++++++++++++++++++++ tests/parallel_test/cases.task | 1 + tests/script/tsim/query/join_order.sim | 51 ++++++++++ 3 files changed, 180 insertions(+) create mode 100644 tests/script/tsim/query/join_order.sim diff --git a/source/libs/planner/src/planOptimizer.c b/source/libs/planner/src/planOptimizer.c index dfb9343cc8..231e2fa32f 100644 --- a/source/libs/planner/src/planOptimizer.c +++ b/source/libs/planner/src/planOptimizer.c @@ -338,6 +338,7 @@ static void scanPathOptSetScanOrder(EScanOrder scanOrder, SScanLogicNode* pScan) if (pScan->sortPrimaryKey || pScan->scanSeq[0] > 1 || pScan->scanSeq[1] > 1) { return; } + pScan->node.outputTsOrder = scanOrder; switch (scanOrder) { case SCAN_ORDER_ASC: pScan->scanSeq[0] = 1; @@ -1450,6 +1451,132 @@ static int32_t sortPrimaryKeyOptimize(SOptimizeContext* pCxt, SLogicSubplan* pLo return sortPrimaryKeyOptimizeImpl(pCxt, pLogicSubplan, pSort); } +static int32_t sortForJoinOptimizeImpl(SOptimizeContext* pCxt, SLogicSubplan* pLogicSubplan, SJoinLogicNode* pJoin) { + SLogicNode* pLeft = (SLogicNode*)nodesListGetNode(pJoin->node.pChildren, 0); + SLogicNode* pRight = (SLogicNode*)nodesListGetNode(pJoin->node.pChildren, 1); + SScanLogicNode* pScan = NULL; + + if (QUERY_NODE_LOGIC_PLAN_SCAN == nodeType(pLeft) && ((SScanLogicNode*)pLeft)->node.outputTsOrder != SCAN_ORDER_BOTH) { + pScan = (SScanLogicNode*)pLeft; + } else if (QUERY_NODE_LOGIC_PLAN_SCAN == nodeType(pRight) && ((SScanLogicNode*)pRight)->node.outputTsOrder != SCAN_ORDER_BOTH) { + pScan = (SScanLogicNode*)pRight; + } + + if (NULL != pScan) { + switch (pScan->node.outputTsOrder) { + case SCAN_ORDER_ASC: + pScan->scanSeq[0] = 0; + pScan->scanSeq[1] = 1; + pScan->node.outputTsOrder = SCAN_ORDER_DESC; + goto _return; + case SCAN_ORDER_DESC: + pScan->scanSeq[0] = 1; + pScan->scanSeq[1] = 0; + pScan->node.outputTsOrder = SCAN_ORDER_ASC; + goto _return; + default: + break; + } + } + + if (QUERY_NODE_OPERATOR != nodeType(pJoin->pPrimKeyEqCond)) { + return TSDB_CODE_PLAN_INTERNAL_ERROR; + } + + bool res = false; + SOperatorNode* pOp = (SOperatorNode*)pJoin->pPrimKeyEqCond; + if (QUERY_NODE_COLUMN != nodeType(pOp->pLeft) || QUERY_NODE_COLUMN != nodeType(pOp->pRight)) { + return TSDB_CODE_PLAN_INTERNAL_ERROR; + } + + SNode* pOrderByNode = NULL; + SSHashObj* pLeftTables = NULL; + collectTableAliasFromNodes(nodesListGetNode(pJoin->node.pChildren, 0), &pLeftTables); + + if (NULL != tSimpleHashGet(pLeftTables, ((SColumnNode*)pOp->pLeft)->tableAlias, strlen(((SColumnNode*)pOp->pLeft)->tableAlias))) { + pOrderByNode = pOp->pLeft; + } else if (NULL != tSimpleHashGet(pLeftTables, ((SColumnNode*)pOp->pRight)->tableAlias, strlen(((SColumnNode*)pOp->pRight)->tableAlias))) { + pOrderByNode = pOp->pRight; + } + + tSimpleHashCleanup(pLeftTables); + + if (NULL == pOrderByNode) { + return TSDB_CODE_PLAN_INTERNAL_ERROR; + } + + SSortLogicNode* pSort = (SSortLogicNode*)nodesMakeNode(QUERY_NODE_LOGIC_PLAN_SORT); + if (NULL == pSort) { + return TSDB_CODE_OUT_OF_MEMORY; + } + + pSort->node.outputTsOrder = (ORDER_ASC == pLeft->outputTsOrder) ? ORDER_DESC : ORDER_ASC; + pSort->groupSort = false; + SOrderByExprNode* pOrder = (SOrderByExprNode*)nodesMakeNode(QUERY_NODE_ORDER_BY_EXPR); + if (NULL == pOrder) { + nodesDestroyNode((SNode *)pSort); + return TSDB_CODE_OUT_OF_MEMORY; + } + + nodesListMakeAppend(&pSort->pSortKeys, (SNode*)pOrder); + pOrder->order = (ORDER_ASC == pLeft->outputTsOrder) ? ORDER_DESC : ORDER_ASC; + pOrder->pExpr = nodesCloneNode(pOrderByNode); + pOrder->nullOrder = (ORDER_ASC == pOrder->order) ? NULL_ORDER_FIRST : NULL_ORDER_LAST; + if (!pOrder->pExpr) { + nodesDestroyNode((SNode *)pSort); + return TSDB_CODE_OUT_OF_MEMORY; + } + + pLeft->pParent = (SLogicNode*)pSort; + nodesListMakeAppend(&pSort->node.pChildren, (SNode*)pLeft); + pJoin->node.pChildren->pHead->pNode = (SNode*)pSort; + pSort->node.pParent = (SLogicNode*)pJoin;; + +_return: + + pCxt->optimized = true; + + return TSDB_CODE_SUCCESS; +} + + +static bool sortForJoinOptMayBeOptimized(SLogicNode* pNode) { + if (QUERY_NODE_LOGIC_PLAN_JOIN != nodeType(pNode)) { + return false; + } + + SJoinLogicNode* pJoin = (SJoinLogicNode*)pNode; + if (pNode->pChildren->length != 2 || !pJoin->hasSubQuery || pJoin->isLowLevelJoin) { + return false; + } + + SLogicNode* pLeft = (SLogicNode*)nodesListGetNode(pJoin->node.pChildren, 0); + SLogicNode* pRight = (SLogicNode*)nodesListGetNode(pJoin->node.pChildren, 1); + + if (ORDER_ASC != pLeft->outputTsOrder && ORDER_DESC != pLeft->outputTsOrder) { + return false; + } + if (ORDER_ASC != pRight->outputTsOrder && ORDER_DESC != pRight->outputTsOrder) { + return false; + } + + if (pLeft->outputTsOrder == pRight->outputTsOrder) { + return false; + } + + return true; +} + + +static int32_t sortForJoinOptimize(SOptimizeContext* pCxt, SLogicSubplan* pLogicSubplan) { + SJoinLogicNode* pJoin = (SJoinLogicNode*)optFindPossibleNode(pLogicSubplan->pNode, sortForJoinOptMayBeOptimized); + if (NULL == pJoin) { + return TSDB_CODE_SUCCESS; + } + return sortForJoinOptimizeImpl(pCxt, pLogicSubplan, pJoin); +} + + static bool smaIndexOptMayBeOptimized(SLogicNode* pNode) { if (QUERY_NODE_LOGIC_PLAN_SCAN != nodeType(pNode) || NULL == pNode->pParent || QUERY_NODE_LOGIC_PLAN_WINDOW != nodeType(pNode->pParent) || @@ -4197,6 +4324,7 @@ static const SOptimizeRule optimizeRuleSet[] = { {.pName = "StableJoin", .optimizeFunc = stableJoinOptimize}, {.pName = "sortNonPriKeyOptimize", .optimizeFunc = sortNonPriKeyOptimize}, {.pName = "SortPrimaryKey", .optimizeFunc = sortPrimaryKeyOptimize}, + {.pName = "SortForjoin", .optimizeFunc = sortForJoinOptimize}, {.pName = "SmaIndex", .optimizeFunc = smaIndexOptimize}, {.pName = "PushDownLimit", .optimizeFunc = pushDownLimitOptimize}, {.pName = "PartitionTags", .optimizeFunc = partTagsOptimize}, diff --git a/tests/parallel_test/cases.task b/tests/parallel_test/cases.task index 0687a3271c..8d9794b0dd 100644 --- a/tests/parallel_test/cases.task +++ b/tests/parallel_test/cases.task @@ -1108,6 +1108,7 @@ e ,,y,script,./test.sh -f tsim/query/udf_with_const.sim ,,y,script,./test.sh -f tsim/query/join_interval.sim ,,y,script,./test.sh -f tsim/query/join_pk.sim +,,y,script,./test.sh -f tsim/query/join_order.sim ,,y,script,./test.sh -f tsim/query/count_spread.sim ,,y,script,./test.sh -f tsim/query/unionall_as_table.sim ,,y,script,./test.sh -f tsim/query/multi_order_by.sim diff --git a/tests/script/tsim/query/join_order.sim b/tests/script/tsim/query/join_order.sim new file mode 100644 index 0000000000..bd772ff407 --- /dev/null +++ b/tests/script/tsim/query/join_order.sim @@ -0,0 +1,51 @@ +system sh/stop_dnodes.sh +system sh/deploy.sh -n dnode1 -i 1 +system sh/exec.sh -n dnode1 -s start +sql connect + +sql drop database if exists db1; +sql create database db1 vgroups 1; +sql use db1; +sql create stable sta (ts timestamp, col1 int) tags(t1 int); +sql create table tba1 using sta tags(1); + +sql insert into tba1 values ('2023-11-17 16:29:00', 1); +sql insert into tba1 values ('2023-11-17 16:29:02', 3); +sql insert into tba1 values ('2023-11-17 16:29:03', 4); +sql insert into tba1 values ('2023-11-17 16:29:04', 5); + + +sql select a.*,b.* from tba1 a, (select * from tba1 order by ts) b where a.ts=b.ts; +if $rows != 4 then + return -1 +endi +sql select a.*,b.* from (select * from tba1 order by ts) a, tba1 b where a.ts=b.ts; +if $rows != 4 then + return -1 +endi +sql select a.*,b.* from tba1 a, (select * from tba1 order by ts desc) b where a.ts=b.ts; +if $rows != 4 then + return -1 +endi +sql select a.*,b.* from (select * from tba1 order by ts desc) a, tba1 b where a.ts=b.ts; +if $rows != 4 then + return -1 +endi +sql select a.*,b.* from (select * from tba1 order by ts) a, (select * from tba1 order by ts) b where a.ts=b.ts; +if $rows != 4 then + return -1 +endi +sql select a.*,b.* from (select * from tba1 order by ts desc) a, (select * from tba1 order by ts desc) b where a.ts=b.ts; +if $rows != 4 then + return -1 +endi +sql select a.*,b.* from (select * from tba1 order by ts) a, (select * from tba1 order by ts desc) b where a.ts=b.ts; +if $rows != 4 then + return -1 +endi +sql select a.*,b.* from (select * from tba1 order by ts desc) a, (select * from tba1 order by ts) b where a.ts=b.ts; +if $rows != 4 then + return -1 +endi + +system sh/exec.sh -n dnode1 -s stop -x SIGINT