Merge pull request #23431 from taosdata/szhou/tbname-prune-vgroup

enhance: use tbname equal cond to prune vgroups
This commit is contained in:
dapan1121 2023-11-13 08:34:31 +08:00 committed by GitHub
commit 40ab3d7cc6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 475 additions and 0 deletions

View File

@ -3925,6 +3925,267 @@ static int32_t translatePartitionBy(STranslateContext* pCxt, SSelectStmt* pSelec
return code;
}
typedef struct SEqCondTbNameTableInfo {
SRealTableNode* pRealTable;
SArray* aTbnames;
} SEqCondTbNameTableInfo;
//[tableAlias.]tbname = tbNamVal
static bool isOperatorEqTbnameCond(STranslateContext* pCxt, SOperatorNode* pOperator, char** ppTableAlias, SArray** ppTabNames) {
if (pOperator->opType != OP_TYPE_EQUAL) return false;
SFunctionNode* pTbnameFunc = NULL;
SValueNode* pValueNode = NULL;
if (nodeType(pOperator->pLeft) == QUERY_NODE_FUNCTION &&
((SFunctionNode*)(pOperator->pLeft))->funcType == FUNCTION_TYPE_TBNAME &&
nodeType(pOperator->pRight) == QUERY_NODE_VALUE) {
pTbnameFunc = (SFunctionNode*)pOperator->pLeft;
pValueNode = (SValueNode*)pOperator->pRight;
} else if (nodeType(pOperator->pRight) == QUERY_NODE_FUNCTION &&
((SFunctionNode*)(pOperator->pRight))->funcType == FUNCTION_TYPE_TBNAME &&
nodeType(pOperator->pLeft) == QUERY_NODE_VALUE) {
pTbnameFunc = (SFunctionNode*)pOperator->pRight;
pValueNode = (SValueNode*)pOperator->pLeft;
} else {
return false;
}
if (LIST_LENGTH(pTbnameFunc->pParameterList) == 0) {
*ppTableAlias = NULL;
} else if (LIST_LENGTH(pTbnameFunc->pParameterList) == 1) {
SNode* pQualNode = nodesListGetNode(pTbnameFunc->pParameterList, 0);
if (nodeType(pQualNode) != QUERY_NODE_VALUE) return false;
SValueNode* pQualValNode = (SValueNode*)pQualNode;
*ppTableAlias = pQualValNode->literal;
} else {
return false;
}
*ppTabNames = taosArrayInit(1, sizeof(void*));
taosArrayPush(*ppTabNames, &(pValueNode->literal));
return true;
}
//[tableAlias.]tbname in (value1, value2, ...)
static bool isOperatorTbnameInCond(STranslateContext* pCxt, SOperatorNode* pOperator, char** ppTableAlias, SArray** ppTbNames) {
if (pOperator->opType != OP_TYPE_IN) return false;
if (nodeType(pOperator->pLeft) != QUERY_NODE_FUNCTION ||
((SFunctionNode*)(pOperator->pLeft))->funcType != FUNCTION_TYPE_TBNAME ||
nodeType(pOperator->pRight) != QUERY_NODE_NODE_LIST) {
return false;
}
SFunctionNode* pTbnameFunc = (SFunctionNode*)pOperator->pLeft;
if (LIST_LENGTH(pTbnameFunc->pParameterList) == 0) {
*ppTableAlias = NULL;
} else if (LIST_LENGTH(pTbnameFunc->pParameterList) == 1) {
SNode* pQualNode = nodesListGetNode(pTbnameFunc->pParameterList, 0);
if (nodeType(pQualNode) != QUERY_NODE_VALUE) return false;
SValueNode* pQualValNode = (SValueNode*)pQualNode;
*ppTableAlias = pQualValNode->literal;
} else {
return false;
}
*ppTbNames = taosArrayInit(1, sizeof(void*));
SNodeListNode* pValueListNode = (SNodeListNode*)pOperator->pRight;
SNodeList* pValueNodeList = pValueListNode->pNodeList;
SNode* pValNode = NULL;
FOREACH(pValNode, pValueNodeList) {
if (nodeType(pValNode) != QUERY_NODE_VALUE) {
return false;
}
taosArrayPush(*ppTbNames, &((SValueNode*)pValNode)->literal);
}
return true;
}
static bool findEqCondTbNameInOperatorNode(STranslateContext* pCxt, SNode* pWhere, SEqCondTbNameTableInfo* pInfo) {
int32_t code = TSDB_CODE_SUCCESS;
char* pTableAlias = NULL;
char* pTbNameVal = NULL;
if (isOperatorEqTbnameCond(pCxt, (SOperatorNode*)pWhere, &pTableAlias, &pInfo->aTbnames) ||
isOperatorTbnameInCond(pCxt, (SOperatorNode*)pWhere, &pTableAlias, &pInfo->aTbnames)) {
STableNode* pTable;
if (pTableAlias == NULL) {
pTable = (STableNode*)((SSelectStmt*)(pCxt->pCurrStmt))->pFromTable;
} else {
code = findTable(pCxt, pTableAlias, &pTable);
}
if (code == TSDB_CODE_SUCCESS && nodeType(pTable) == QUERY_NODE_REAL_TABLE &&
((SRealTableNode*)pTable)->pMeta && ((SRealTableNode*)pTable)->pMeta->tableType == TSDB_SUPER_TABLE) {
pInfo->pRealTable = (SRealTableNode*)pTable;
return true;
}
taosArrayDestroy(pInfo->aTbnames);
pInfo->aTbnames = NULL;
}
return false;
}
static bool isTableExistInTableTbnames(SArray* aTableTbNames, SRealTableNode* pTable) {
for (int i = 0; i < taosArrayGetSize(aTableTbNames); ++i) {
SEqCondTbNameTableInfo* info = taosArrayGet(aTableTbNames, i);
if (info->pRealTable == pTable) {
return true;
}
}
return false;
}
static void findEqualCondTbnameInLogicCondAnd(STranslateContext* pCxt, SNode* pWhere, SArray* aTableTbnames) {
SNode* pTmpNode = NULL;
FOREACH(pTmpNode, ((SLogicConditionNode*)pWhere)->pParameterList) {
if (nodeType(pTmpNode) == QUERY_NODE_OPERATOR) {
SEqCondTbNameTableInfo info = {0};
bool bIsEqTbnameCond = findEqCondTbNameInOperatorNode(pCxt, pTmpNode, &info);
if (bIsEqTbnameCond) {
if (!isTableExistInTableTbnames(aTableTbnames, info.pRealTable)) {
//TODO: intersect tbNames of same table? speed
taosArrayPush(aTableTbnames, &info);
} else {
taosArrayDestroy(info.aTbnames);
}
}
}
//TODO: logic cond
}
}
static void unionEqualCondTbnamesOfSameTable(SArray* aTableTbnames, SEqCondTbNameTableInfo* pInfo) {
bool bFoundTable = false;
for (int i = 0; i < taosArrayGetSize(aTableTbnames); ++i) {
SEqCondTbNameTableInfo* info = taosArrayGet(aTableTbnames, i);
if (info->pRealTable == pInfo->pRealTable) {
taosArrayAddAll(info->aTbnames, pInfo->aTbnames);
taosArrayDestroy(pInfo->aTbnames);
pInfo->aTbnames = NULL;
bFoundTable = true;
break;
}
}
if (!bFoundTable) {
taosArrayPush(aTableTbnames, pInfo);
}
}
static void findEqualCondTbnameInLogicCondOr(STranslateContext* pCxt, SNode* pWhere, SArray* aTableTbnames) {
bool bAllTbName = true;
SNode* pTmpNode = NULL;
FOREACH(pTmpNode, ((SLogicConditionNode*)pWhere)->pParameterList) {
//TODO: logic cond
if (nodeType(pTmpNode) == QUERY_NODE_OPERATOR) {
SEqCondTbNameTableInfo info = {0};
bool bIsEqTbnameCond = findEqCondTbNameInOperatorNode(pCxt, pTmpNode, &info);
if (!bIsEqTbnameCond) {
bAllTbName = false;
break;
} else {
unionEqualCondTbnamesOfSameTable(aTableTbnames, &info);
}
} else {
bAllTbName = false;
break;
}
}
if (!bAllTbName) {
for (int i = 0; i < taosArrayGetSize(aTableTbnames); ++i) {
SEqCondTbNameTableInfo* pInfo = taosArrayGet(aTableTbnames, i);
taosArrayDestroy(pInfo->aTbnames);
pInfo->aTbnames = NULL;
}
taosArrayClear(aTableTbnames);
}
}
static int32_t findEqualCondTbname(STranslateContext* pCxt, SNode* pWhere, SArray* aTableTbnames) {
//TODO: optimize nested and/or condition. now only the fist level is processed.
if (nodeType(pWhere) == QUERY_NODE_OPERATOR) {
SEqCondTbNameTableInfo info = {0};
bool bIsEqTbnameCond = findEqCondTbNameInOperatorNode(pCxt, pWhere, &info);
if (bIsEqTbnameCond) {
taosArrayPush(aTableTbnames, &info);
}
} else if (nodeType(pWhere) == QUERY_NODE_LOGIC_CONDITION) {
if (((SLogicConditionNode*)pWhere)->condType == LOGIC_COND_TYPE_AND) {
findEqualCondTbnameInLogicCondAnd(pCxt, pWhere, aTableTbnames);
} else if (((SLogicConditionNode*)pWhere)->condType == LOGIC_COND_TYPE_OR) {
findEqualCondTbnameInLogicCondOr(pCxt, pWhere, aTableTbnames);
}
}
return TSDB_CODE_SUCCESS;
}
static int32_t findVgroupsFromEqualTbname(STranslateContext* pCxt, SEqCondTbNameTableInfo* pInfo, SVgroupsInfo* vgsInfo) {
int32_t nVgroups = 0;
int32_t nTbls = taosArrayGetSize(pInfo->aTbnames);
if (nTbls >= pInfo->pRealTable->pVgroupList->numOfVgroups) {
vgsInfo->numOfVgroups = 0;
return TSDB_CODE_SUCCESS;
}
for (int j = 0; j < nTbls; ++j) {
char* dbName = pInfo->pRealTable->table.dbName;
SName snameTb;
char* tbName = taosArrayGetP(pInfo->aTbnames, j);
toName(pCxt->pParseCxt->acctId, dbName, tbName, &snameTb);
SVgroupInfo vgInfo;
bool bExists;
int32_t code = catalogGetCachedTableHashVgroup(pCxt->pParseCxt->pCatalog, &snameTb, &vgInfo, &bExists);
if (code == TSDB_CODE_SUCCESS && bExists) {
bool bFoundVg = false;
for (int32_t k = 0; k < nVgroups; ++k) {
if (vgsInfo->vgroups[k].vgId == vgInfo.vgId) {
bFoundVg = true;
break;
}
}
if (!bFoundVg) {
vgsInfo->vgroups[nVgroups] = vgInfo;
++nVgroups;
vgsInfo->numOfVgroups = nVgroups;
}
} else {
vgsInfo->numOfVgroups = 0;
break;
}
}
return TSDB_CODE_SUCCESS;
}
static int32_t setEqualTbnameTableVgroups(STranslateContext* pCxt, SSelectStmt* pSelect, SArray* aTables) {
int32_t code = TSDB_CODE_SUCCESS;
for (int i = 0; i < taosArrayGetSize(aTables); ++i) {
SEqCondTbNameTableInfo* pInfo = taosArrayGet(aTables, i);
int32_t nTbls = taosArrayGetSize(pInfo->aTbnames);
SVgroupsInfo* vgsInfo = taosMemoryMalloc(sizeof(SVgroupsInfo) + nTbls * sizeof(SVgroupInfo));
int32_t nVgroups = 0;
findVgroupsFromEqualTbname(pCxt, pInfo, vgsInfo);
if (vgsInfo->numOfVgroups != 0) {
taosMemoryFree(pInfo->pRealTable->pVgroupList);
pInfo->pRealTable->pVgroupList = vgsInfo;
} else {
taosMemoryFree(vgsInfo);
}
}
return TSDB_CODE_SUCCESS;
}
static int32_t setTableVgroupsFromEqualTbnameCond(STranslateContext* pCxt, SSelectStmt* pSelect) {
int32_t code = TSDB_CODE_SUCCESS;
SArray* aTables = taosArrayInit(1, sizeof(SEqCondTbNameTableInfo));
code = findEqualCondTbname(pCxt, pSelect->pWhere, aTables);
if (code == TSDB_CODE_SUCCESS) {
code = setEqualTbnameTableVgroups(pCxt, pSelect, aTables);
}
for (int i = 0; i < taosArrayGetSize(aTables); ++i) {
SEqCondTbNameTableInfo* pInfo = taosArrayGet(aTables, i);
taosArrayDestroy(pInfo->aTbnames);
}
taosArrayDestroy(aTables);
return code;
}
static int32_t translateWhere(STranslateContext* pCxt, SSelectStmt* pSelect) {
pCxt->currClause = SQL_CLAUSE_WHERE;
int32_t code = translateExpr(pCxt, &pSelect->pWhere);
@ -3934,6 +4195,9 @@ static int32_t translateWhere(STranslateContext* pCxt, SSelectStmt* pSelect) {
if (TSDB_CODE_SUCCESS == code && pSelect->timeRange.skey > pSelect->timeRange.ekey) {
pSelect->isEmptyResult = true;
}
if (pSelect->pWhere != NULL) {
setTableVgroupsFromEqualTbnameCond(pCxt, pSelect);
}
return code;
}

View File

@ -21,6 +21,7 @@
,,y,system-test,./pytest.sh python3 ./test.py -f 8-stream/partition_interval.py
,,y,system-test,./pytest.sh python3 ./test.py -f 8-stream/pause_resume_test.py
,,y,system-test,./pytest.sh python3 ./test.py -f 2-query/tbname_vgroup.py
,,y,system-test,./pytest.sh python3 ./test.py -f 2-query/stbJoin.py
,,y,system-test,./pytest.sh python3 ./test.py -f 2-query/stbJoin.py -Q 2
,,y,system-test,./pytest.sh python3 ./test.py -f 2-query/stbJoin.py -Q 3

View File

@ -0,0 +1,210 @@
import sys
from util.log import *
from util.cases import *
from util.sql import *
from util.dnodes import tdDnodes
from math import inf
class TDTestCase:
def caseDescription(self):
'''
case1<shenglian zhou>: [TD-]
'''
return
def init(self, conn, logSql, replicaVer=1):
tdLog.debug("start to execute %s" % __file__)
tdSql.init(conn.cursor(), True)
self.conn = conn
def restartTaosd(self, index=1, dbname="db"):
tdDnodes.stop(index)
tdDnodes.startWithoutSleep(index)
tdSql.execute(f"use tbname_vgroup")
def run(self):
print("running {}".format(__file__))
tdSql.execute("drop database if exists tbname_vgroup")
tdSql.execute("create database if not exists tbname_vgroup")
tdSql.execute('use tbname_vgroup')
tdSql.execute('drop database if exists dbvg')
tdSql.execute('create database dbvg vgroups 8;')
tdSql.execute('use dbvg;')
tdSql.execute('create table st(ts timestamp, f int) tags (t int);')
tdSql.execute("insert into ct1 using st tags(1) values('2021-04-19 00:00:01', 1)")
tdSql.execute("insert into ct2 using st tags(2) values('2021-04-19 00:00:02', 2)")
tdSql.execute("insert into ct3 using st tags(3) values('2021-04-19 00:00:03', 3)")
tdSql.execute("insert into ct4 using st tags(4) values('2021-04-19 00:00:04', 4)")
tdSql.query("select * from st where tbname='ct1'")
tdSql.checkRows(1)
tdSql.checkData(0, 0, datetime.datetime(2021, 4, 19, 0, 0, 1))
tdSql.checkData(0, 1, 1)
tdSql.checkData(0, 2, 1)
tdSql.query("select * from st where tbname='ct3'")
tdSql.checkRows(1)
tdSql.checkData(0, 0, datetime.datetime(2021, 4, 19, 0, 0, 3))
tdSql.checkData(0, 1, 3)
tdSql.checkData(0, 2, 3)
tdSql.query("select * from st where tbname='ct3' and f=2")
tdSql.checkRows(0)
tdSql.query("select * from st where tbname='ct1' and tbname='ct4'")
tdSql.checkRows(0)
tdSql.query("select * from st where tbname='ct1' or tbname='ct4' order by ts")
tdSql.checkRows(2)
tdSql.checkData(0, 0, datetime.datetime(2021, 4, 19, 0, 0, 1))
tdSql.checkData(0, 1, 1)
tdSql.checkData(0, 2, 1)
tdSql.checkData(1, 0, datetime.datetime(2021, 4, 19, 0, 0, 4))
tdSql.checkData(1, 1, 4)
tdSql.checkData(1, 2, 4)
tdSql.query("select * from st where tbname='ct2' or tbname='ct3' order by ts")
tdSql.checkRows(2)
tdSql.checkData(0, 0, datetime.datetime(2021, 4, 19, 0, 0, 2))
tdSql.checkData(0, 1, 2)
tdSql.checkData(0, 2, 2)
tdSql.checkData(1, 0, datetime.datetime(2021, 4, 19, 0, 0, 3))
tdSql.checkData(1, 1, 3)
tdSql.checkData(1, 2, 3)
tdSql.query("select * from st where tbname='ct1' or tbname='ct4' or tbname='ct3' or tbname='ct2' order by ts")
tdSql.checkRows(4)
tdSql.checkData(0, 0, datetime.datetime(2021, 4, 19, 0, 0, 1))
tdSql.checkData(0, 1, 1)
tdSql.checkData(0, 2, 1)
tdSql.checkData(1, 0, datetime.datetime(2021, 4, 19, 0, 0, 2))
tdSql.checkData(1, 1, 2)
tdSql.checkData(1, 2, 2)
tdSql.checkData(2, 0, datetime.datetime(2021, 4, 19, 0, 0, 3))
tdSql.checkData(2, 1, 3)
tdSql.checkData(2, 2, 3)
tdSql.checkData(3, 0, datetime.datetime(2021, 4, 19, 0, 0, 4))
tdSql.checkData(3, 1, 4)
tdSql.checkData(3, 2, 4)
tdSql.query("select * from st where tbname='ct4' or 1=1 order by ts")
tdSql.checkRows(4)
tdSql.checkData(0, 0, datetime.datetime(2021, 4, 19, 0, 0, 1))
tdSql.checkData(0, 1, 1)
tdSql.checkData(0, 2, 1)
tdSql.checkData(1, 0, datetime.datetime(2021, 4, 19, 0, 0, 2))
tdSql.checkData(1, 1, 2)
tdSql.checkData(1, 2, 2)
tdSql.checkData(2, 0, datetime.datetime(2021, 4, 19, 0, 0, 3))
tdSql.checkData(2, 1, 3)
tdSql.checkData(2, 2, 3)
tdSql.checkData(3, 0, datetime.datetime(2021, 4, 19, 0, 0, 4))
tdSql.checkData(3, 1, 4)
tdSql.checkData(3, 2, 4)
tdSql.query("select * from st where tbname in ('ct1') order by ts")
tdSql.checkRows(1)
tdSql.checkData(0, 0, datetime.datetime(2021, 4, 19, 0, 0, 1))
tdSql.checkData(0, 1, 1)
tdSql.checkData(0, 2, 1)
tdSql.query("select * from st where tbname in ('ct1', 'ct2') order by ts")
tdSql.checkRows(2)
tdSql.checkData(0, 0, datetime.datetime(2021, 4, 19, 0, 0, 1))
tdSql.checkData(0, 1, 1)
tdSql.checkData(0, 2, 1)
tdSql.checkData(1, 0, datetime.datetime(2021, 4, 19, 0, 0, 2))
tdSql.checkData(1, 1, 2)
tdSql.checkData(1, 2, 2)
tdSql.query("select * from st where tbname in ('ct1', 'ct2') or tbname in ('ct3', 'ct4') order by ts")
tdSql.checkRows(4)
tdSql.checkData(0, 0, datetime.datetime(2021, 4, 19, 0, 0, 1))
tdSql.checkData(0, 1, 1)
tdSql.checkData(0, 2, 1)
tdSql.checkData(1, 0, datetime.datetime(2021, 4, 19, 0, 0, 2))
tdSql.checkData(1, 1, 2)
tdSql.checkData(1, 2, 2)
tdSql.checkData(2, 0, datetime.datetime(2021, 4, 19, 0, 0, 3))
tdSql.checkData(2, 1, 3)
tdSql.checkData(2, 2, 3)
tdSql.checkData(3, 0, datetime.datetime(2021, 4, 19, 0, 0, 4))
tdSql.checkData(3, 1, 4)
tdSql.checkData(3, 2, 4)
tdSql.query("select * from st where tbname in ('ct1', 'ct2') or tbname='ct3' order by ts")
tdSql.checkRows(3)
tdSql.checkData(0, 0, datetime.datetime(2021, 4, 19, 0, 0, 1))
tdSql.checkData(0, 1, 1)
tdSql.checkData(0, 2, 1)
tdSql.checkData(1, 0, datetime.datetime(2021, 4, 19, 0, 0, 2))
tdSql.checkData(1, 1, 2)
tdSql.checkData(1, 2, 2)
tdSql.checkData(2, 0, datetime.datetime(2021, 4, 19, 0, 0, 3))
tdSql.checkData(2, 1, 3)
tdSql.checkData(2, 2, 3)
tdSql.query("select * from st where tbname in ('ct1', 'ct2') and tbname='ct3' order by ts")
tdSql.checkRows(0)
tdSql.query("select * from st where tbname in ('ct1') or 1=1 order by ts")
tdSql.checkRows(4)
tdSql.checkData(0, 0, datetime.datetime(2021, 4, 19, 0, 0, 1))
tdSql.checkData(0, 1, 1)
tdSql.checkData(0, 2, 1)
tdSql.checkData(1, 0, datetime.datetime(2021, 4, 19, 0, 0, 2))
tdSql.checkData(1, 1, 2)
tdSql.checkData(1, 2, 2)
tdSql.checkData(2, 0, datetime.datetime(2021, 4, 19, 0, 0, 3))
tdSql.checkData(2, 1, 3)
tdSql.checkData(2, 2, 3)
tdSql.checkData(3, 0, datetime.datetime(2021, 4, 19, 0, 0, 4))
tdSql.checkData(3, 1, 4)
tdSql.checkData(3, 2, 4)
tdSql.query("explain select * from st where tbname='ct1'")
tdSql.checkRows(2)
tdSql.query("select table_name, vgroup_id from information_schema.ins_tables where db_name='dbvg' and type='CHILD_TABLE'");
print(tdSql.queryResult);
tdSql.query("explain select * from st where tbname in ('ct1', 'ct2')")
if tdSql.queryResult[0][0].count("Data Exchange 2:1") == 0:
tdLog.exit("failed, not two vgroups")
else:
tdLog.info("select * from st where tbname in ('ct1', 'ct2') involves two vgroups")
tdSql.execute('create table st2(ts timestamp, f int) tags (t int);')
tdSql.execute("insert into ct21 using st2 tags(1) values('2021-04-19 00:00:01', 1)")
tdSql.execute("insert into ct22 using st2 tags(2) values('2021-04-19 00:00:02', 2)")
tdSql.execute("insert into ct23 using st2 tags(3) values('2021-04-19 00:00:03', 3)")
tdSql.execute("insert into ct24 using st2 tags(4) values('2021-04-19 00:00:04', 4)")
tdSql.query("select * from st, st2 where st.ts=st2.ts and st.tbname in ('ct1', 'ct2') and st2.tbname in ('ct21', 'ct23')");
tdSql.checkRows(1);
tdSql.checkData(0, 0, datetime.datetime(2021, 4, 19, 0, 0, 1))
tdSql.checkData(0, 1, 1)
tdSql.checkData(0, 2, 1)
tdSql.checkData(0, 3, datetime.datetime(2021, 4, 19, 0, 0, 1))
tdSql.checkData(0, 4, 1)
tdSql.checkData(0, 5, 1)
#tdSql.execute('drop database dbvg;')
tdSql.execute('drop database tbname_vgroup')
def stop(self):
tdSql.close()
tdLog.success("%s successfully executed" % __file__)
tdCases.addWindows(__file__, TDTestCase())
tdCases.addLinux(__file__, TDTestCase())