add PARTITION_BEFORE_GROUP hint to use partition node before agg

This commit is contained in:
wangjiaming0909 2024-01-11 15:48:07 +08:00
parent a518cba133
commit bc1eacf626
10 changed files with 59 additions and 3 deletions

View File

@ -91,13 +91,15 @@ The list of currently supported Hints is as follows:
| :-----------: | -------------- | -------------------------- | -----------------------------------|
| BATCH_SCAN | None | Batch table scan | JOIN statment for stable |
| NO_BATCH_SCAN | None | Sequential table scan | JOIN statment for stable |
| SORT_FOR_GROUP| None | Use sort for partition | With normal column in partition by list |
| SORT_FOR_GROUP| None | Use sort for partition, conflict with PARTITION_FIRST | With normal column in partition by list |
| PARTITION_FIRST| None | Use Partition before aggregate, conflict with SORT_FOR_GROUP | With normal column in partition by list |
For example:
```sql
SELECT /*+ BATCH_SCAN() */ a.ts FROM stable1 a, stable2 b where a.tag0 = b.tag0 and a.ts = b.ts;
SELECT /*+ SORT_FOR_GROUP() */ count(*), c1 FROM stable1 PARTITION BY c1;
SELECT /*+ PARTITION_FIRST() */ count(*), c1 FROM stable1 PARTITION BY c1;
```
## Lists

View File

@ -91,13 +91,15 @@ Hints 是用户控制单个语句查询优化的一种手段,当 Hint 不适
| :-----------: | -------------- | -------------------------- | -----------------------------|
| BATCH_SCAN | 无 | 采用批量读表的方式 | 超级表 JOIN 语句 |
| NO_BATCH_SCAN | 无 | 采用顺序读表的方式 | 超级表 JOIN 语句 |
| SORT_FOR_GROUP| 无 | 采用sort方式进行分组 | partition by 列表有普通列时 |
| SORT_FOR_GROUP| 无 | 采用sort方式进行分组, 与PARTITION_FIRST冲突 | partition by 列表有普通列时 |
| PARTITION_FIRST| 无 | 在聚合之前使用PARTITION计算分组, 与SORT_FOR_GROUP冲突 | partition by 列表有普通列时 |
举例:
```sql
SELECT /*+ BATCH_SCAN() */ a.ts FROM stable1 a, stable2 b where a.tag0 = b.tag0 and a.ts = b.ts;
SELECT /*+ SORT_FOR_GROUP() */ count(*), c1 FROM stable1 PARTITION BY c1;
SELECT /*+ PARTITION_FIRST() */ count(*), c1 FROM stable1 PARTITION BY c1;
```
## 列表

View File

@ -373,6 +373,7 @@
#define TK_BATCH_SCAN 606
#define TK_NO_BATCH_SCAN 607
#define TK_SORT_FOR_GROUP 608
#define TK_PARTITION_FIRST 609
#define TK_NK_NIL 65535

View File

@ -126,6 +126,7 @@ typedef enum EHintOption {
HINT_NO_BATCH_SCAN = 1,
HINT_BATCH_SCAN,
HINT_SORT_FOR_GROUP,
HINT_PARTITION_FIRST,
} EHintOption;
typedef struct SHintNode {

View File

@ -371,6 +371,18 @@ SNode* createValueNode(SAstCreateContext* pCxt, int32_t dataType, const SToken*
return (SNode*)val;
}
static bool hasHint(SNodeList* pHintList, EHintOption hint) {
if (!pHintList) return false;
SNode* pNode;
FOREACH(pNode, pHintList) {
SHintNode* pHint = (SHintNode*)pNode;
if (pHint->option == hint) {
return true;
}
}
return false;
}
bool addHintNodeToList(SAstCreateContext* pCxt, SNodeList** ppHintList, EHintOption opt, SToken* paramList,
int32_t paramNum) {
void* value = NULL;
@ -384,6 +396,10 @@ bool addHintNodeToList(SAstCreateContext* pCxt, SNodeList** ppHintList, EHintOpt
}
case HINT_SORT_FOR_GROUP:
if (paramNum > 0) return true;
if (hasHint(*ppHintList, HINT_PARTITION_FIRST)) return true;
break;
case HINT_PARTITION_FIRST:
if (paramNum > 0 || hasHint(*ppHintList, HINT_SORT_FOR_GROUP)) return true;
break;
default:
return true;
@ -455,6 +471,14 @@ SNodeList* createHintNodeList(SAstCreateContext* pCxt, const SToken* pLiteral) {
}
opt = HINT_SORT_FOR_GROUP;
break;
case TK_PARTITION_FIRST:
lastComma = false;
if (0 != opt || inParamList) {
quit = true;
break;
}
opt = HINT_PARTITION_FIRST;
break;
case TK_NK_LP:
lastComma = false;
if (0 == opt || inParamList) {

View File

@ -170,6 +170,7 @@ static SKeyword keywordTable[] = {
{"PAGES", TK_PAGES},
{"PAGESIZE", TK_PAGESIZE},
{"PARTITION", TK_PARTITION},
{"PARTITION_FIRST", TK_PARTITION_FIRST},
{"PASS", TK_PASS},
{"PORT", TK_PORT},
{"PPS", TK_PPS},

View File

@ -47,6 +47,7 @@ int32_t validateQueryPlan(SPlanContext* pCxt, SQueryPlan* pPlan);
bool getBatchScanOptionFromHint(SNodeList* pList);
bool getSortForGroupOptHint(SNodeList* pList);
bool getOptHint(SNodeList* pList, EHintOption hint);
SLogicNode* getLogicNodeRootNode(SLogicNode* pCurr);
int32_t collectTableAliasFromNodes(SNode* pNode, SSHashObj** ppRes);
bool isPartTableAgg(SAggLogicNode* pAgg);

View File

@ -3997,7 +3997,8 @@ static int32_t partitionColsOpt(SOptimizeContext* pCxt, SLogicSubplan* pLogicSub
}
}
return code;
} else if (pNode->node.pParent && nodeType(pNode->node.pParent) == QUERY_NODE_LOGIC_PLAN_AGG) {
} else if (pNode->node.pParent && nodeType(pNode->node.pParent) == QUERY_NODE_LOGIC_PLAN_AGG &&
!getOptHint(pRootNode->pHint, HINT_PARTITION_FIRST)) {
// Check if we can delete partition node
SAggLogicNode* pAgg = (SAggLogicNode*)pNode->node.pParent;
FOREACH(node, pNode->pPartitionKeys) {

View File

@ -432,6 +432,7 @@ bool getBatchScanOptionFromHint(SNodeList* pList) {
}
bool getSortForGroupOptHint(SNodeList* pList) {
if (!pList) return false;
SNode* pNode;
FOREACH(pNode, pList) {
SHintNode* pHint = (SHintNode*)pNode;
@ -442,6 +443,18 @@ bool getSortForGroupOptHint(SNodeList* pList) {
return false;
}
bool getOptHint(SNodeList* pList, EHintOption hint) {
if (!pList) return false;
SNode* pNode;
FOREACH(pNode, pList) {
SHintNode* pHint = (SHintNode*)pNode;
if (pHint->option == hint) {
return true;
}
}
return false;
}
int32_t collectTableAliasFromNodes(SNode* pNode, SSHashObj** ppRes) {
int32_t code = TSDB_CODE_SUCCESS;
SLogicNode* pCurr = (SLogicNode*)pNode;

View File

@ -169,6 +169,16 @@ class TDTestCase:
self.check_explain_res_has_row("Partition on", self.explain_sql(sql))
self.check_explain_res_has_row("Sort", self.explain_sql(sql_hint))
sql = 'select count(*), c1 from meters partition by c1'
sql_hint = 'select /*+ sort_for_group() partition_first()*/ count(*), c1 from meters partition by c1'
self.check_explain_res_has_row("Sort", self.explain_sql(sql_hint))
sql_hint = 'select /*+ partition_first()*/ count(*), c1 from meters partition by c1'
self.check_explain_res_has_row("Partition on", self.explain_sql(sql_hint))
sql_hint = 'select /*+ partition_first() sort_for_group()*/ count(*), c1 from meters partition by c1'
self.check_explain_res_has_row("Partition on", self.explain_sql(sql_hint))
sql_hint = 'select /*+ sort_for_group() partition_first()*/ count(*), c1 from meters partition by c1'
self.check_explain_res_has_row("Sort", self.explain_sql(sql_hint))
def add_order_by(self, sql: str, order_by: str, select_list: str = "*") -> str:
return "select %s from (%s)t order by %s" % (select_list, sql, order_by)