Merge pull request #13869 from taosdata/feature/3.0_debug_wxy

feat: partition by tags optimize
This commit is contained in:
Xiaoyu Wang 2022-06-16 10:39:15 +08:00 committed by GitHub
commit a729c159a3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 133 additions and 74 deletions

View File

@ -65,6 +65,12 @@ extern "C" {
(list) = NULL; \
} while (0)
#define NODES_CLEAR_LIST(list) \
do { \
nodesClearList((list)); \
(list) = NULL; \
} while (0)
typedef enum ENodeType {
// Syntax nodes are used in parser and planner module, and some are also used in executor module, such as COLUMN,
// VALUE, OPERATOR, FUNCTION and so on.

View File

@ -69,6 +69,7 @@ typedef struct SScanLogicNode {
int16_t tsColId;
double filesFactor;
SArray* pSmaIndexes;
SNodeList* pPartTags;
} SScanLogicNode;
typedef struct SJoinLogicNode {
@ -257,7 +258,7 @@ typedef struct STableScanPhysiNode {
double ratio;
int32_t dataRequired;
SNodeList* pDynamicScanFuncs;
SNodeList* pPartitionKeys;
SNodeList* pPartitionTags;
int64_t interval;
int64_t offset;
int64_t sliding;

View File

@ -13,7 +13,6 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "tref.h"
#include "filter.h"
#include "function.h"
#include "functionMgt.h"
@ -21,6 +20,7 @@
#include "querynodes.h"
#include "tfill.h"
#include "tname.h"
#include "tref.h"
#include "tdatablock.h"
#include "tglobal.h"
@ -2663,8 +2663,9 @@ static SSDataBlock* concurrentlyLoadRemoteDataImpl(SOperatorInfo* pOperator, SEx
}
if (pRsp->completed == 1) {
qDebug("%s fetch msg rsp from vgId:%d, taskId:0x%" PRIx64 " index:%d completed, numOfRows:%d, rowsOfSource:%" PRIu64
", totalRows:%" PRIu64 ", totalBytes:%" PRIu64 ", completed:%d try next %d/%" PRIzu,
qDebug("%s fetch msg rsp from vgId:%d, taskId:0x%" PRIx64
" index:%d completed, numOfRows:%d, rowsOfSource:%" PRIu64 ", totalRows:%" PRIu64 ", totalBytes:%" PRIu64
", completed:%d try next %d/%" PRIzu,
GET_TASKID(pTaskInfo), pSource->addr.nodeId, pSource->taskId, i, pRes->info.rows, pDataInfo->totalRows,
pLoadInfo->totalRows, pLoadInfo->totalSize, completed + 1, i + 1, totalSources);
completed += 1;
@ -4653,7 +4654,7 @@ SOperatorInfo* createOperatorTree(SPhysiNode* pPhyNode, SExecTaskInfo* pTaskInfo
return NULL;
}
SArray* groupKeys = extractPartitionColInfo(pTableScanNode->pPartitionKeys);
SArray* groupKeys = extractPartitionColInfo(pTableScanNode->pPartitionTags);
code = generateGroupIdMap(pTableListInfo, pHandle, groupKeys); // todo for json
taosArrayDestroy(groupKeys);
if (code) {
@ -4671,11 +4672,10 @@ SOperatorInfo* createOperatorTree(SPhysiNode* pPhyNode, SExecTaskInfo* pTaskInfo
SArray* dataReaders = taosArrayInit(8, POINTER_BYTES);
createMultipleDataReaders(pTableScanNode, pHandle, pTableListInfo, dataReaders, queryId, taskId, pTagCond);
extractTableSchemaVersion(pHandle, pTableScanNode->scan.uid, pTaskInfo);
SArray* groupKeys = extractPartitionColInfo(pTableScanNode->pPartitionKeys);
SArray* groupKeys = extractPartitionColInfo(pTableScanNode->pPartitionTags);
generateGroupIdMap(pTableListInfo, pHandle, groupKeys); // todo for json
taosArrayDestroy(groupKeys);
SOperatorInfo* pOperator =
createTableMergeScanOperatorInfo(pTableScanNode, dataReaders, pHandle, pTaskInfo);
SOperatorInfo* pOperator = createTableMergeScanOperatorInfo(pTableScanNode, dataReaders, pHandle, pTaskInfo);
STableScanInfo* pScanInfo = pOperator->info;
pTaskInfo->cost.pRecoder = &pScanInfo->readRecorder;
return pOperator;
@ -4700,7 +4700,7 @@ SOperatorInfo* createOperatorTree(SPhysiNode* pPhyNode, SExecTaskInfo* pTaskInfo
qDebug("%s pDataReader is not NULL", GET_TASKID(pTaskInfo));
}
SArray* groupKeys = extractPartitionColInfo(pTableScanNode->pPartitionKeys);
SArray* groupKeys = extractPartitionColInfo(pTableScanNode->pPartitionTags);
int32_t code = generateGroupIdMap(pTableListInfo, pHandle, groupKeys); // todo for json
taosArrayDestroy(groupKeys);
if (code) {
@ -4708,8 +4708,7 @@ SOperatorInfo* createOperatorTree(SPhysiNode* pPhyNode, SExecTaskInfo* pTaskInfo
return NULL;
}
SOperatorInfo* pOperator =
createStreamScanOperatorInfo(pDataReader, pHandle, pTableScanNode, pTaskInfo, &twSup);
SOperatorInfo* pOperator = createStreamScanOperatorInfo(pDataReader, pHandle, pTableScanNode, pTaskInfo, &twSup);
return pOperator;
} else if (QUERY_NODE_PHYSICAL_PLAN_SYSTABLE_SCAN == type) {
@ -4718,7 +4717,8 @@ SOperatorInfo* createOperatorTree(SPhysiNode* pPhyNode, SExecTaskInfo* pTaskInfo
} else if (QUERY_NODE_PHYSICAL_PLAN_TAG_SCAN == type) {
STagScanPhysiNode* pScanPhyNode = (STagScanPhysiNode*)pPhyNode;
int32_t code = getTableList(pHandle->meta, pScanPhyNode->tableType, pScanPhyNode->uid, pTableListInfo, pScanPhyNode->node.pConditions);
int32_t code = getTableList(pHandle->meta, pScanPhyNode->tableType, pScanPhyNode->uid, pTableListInfo,
pScanPhyNode->node.pConditions);
if (code != TSDB_CODE_SUCCESS) {
return NULL;
}
@ -5499,4 +5499,3 @@ int32_t initStreamAggSupporter(SStreamAggSupporter* pSup, const char* pKey, SqlF
}
return code;
}

View File

@ -355,6 +355,7 @@ static SNode* logicScanCopy(const SScanLogicNode* pSrc, SScanLogicNode* pDst) {
COPY_SCALAR_FIELD(watermark);
COPY_SCALAR_FIELD(tsColId);
COPY_SCALAR_FIELD(filesFactor);
CLONE_NODE_LIST_FIELD(pPartTags);
return (SNode*)pDst;
}
@ -495,7 +496,7 @@ static SNode* physiTableScanCopy(const STableScanPhysiNode* pSrc, STableScanPhys
COPY_SCALAR_FIELD(ratio);
COPY_SCALAR_FIELD(dataRequired);
CLONE_NODE_LIST_FIELD(pDynamicScanFuncs);
CLONE_NODE_LIST_FIELD(pPartitionKeys);
CLONE_NODE_LIST_FIELD(pPartitionTags);
COPY_SCALAR_FIELD(interval);
COPY_SCALAR_FIELD(offset);
COPY_SCALAR_FIELD(sliding);

View File

@ -515,6 +515,7 @@ static const char* jkScanLogicPlanScanPseudoCols = "ScanPseudoCols";
static const char* jkScanLogicPlanTableId = "TableId";
static const char* jkScanLogicPlanTableType = "TableType";
static const char* jkScanLogicPlanTagCond = "TagCond";
static const char* jkScanLogicPlanPartTags = "PartTags";
static int32_t logicScanNodeToJson(const void* pObj, SJson* pJson) {
const SScanLogicNode* pNode = (const SScanLogicNode*)pObj;
@ -535,6 +536,9 @@ static int32_t logicScanNodeToJson(const void* pObj, SJson* pJson) {
if (TSDB_CODE_SUCCESS == code) {
code = tjsonAddObject(pJson, jkScanLogicPlanTagCond, nodeToJson, pNode->pTagCond);
}
if (TSDB_CODE_SUCCESS == code) {
code = nodeListToJson(pJson, jkScanLogicPlanPartTags, pNode->pPartTags);
}
return code;
}
@ -559,6 +563,9 @@ static int32_t jsonToLogicScanNode(const SJson* pJson, void* pObj) {
if (TSDB_CODE_SUCCESS == code) {
code = jsonToNodeObject(pJson, jkScanLogicPlanTagCond, &pNode->pTagCond);
}
if (TSDB_CODE_SUCCESS == code) {
code = jsonToNodeList(pJson, jkScanLogicPlanPartTags, &pNode->pPartTags);
}
return code;
}
@ -1368,6 +1375,7 @@ static const char* jkTableScanPhysiPlanTriggerType = "triggerType";
static const char* jkTableScanPhysiPlanWatermark = "watermark";
static const char* jkTableScanPhysiPlanTsColId = "tsColId";
static const char* jkTableScanPhysiPlanFilesFactor = "FilesFactor";
static const char* jkTableScanPhysiPlanPartitionTags = "PartitionTags";
static int32_t physiTableScanNodeToJson(const void* pObj, SJson* pJson) {
const STableScanPhysiNode* pNode = (const STableScanPhysiNode*)pObj;
@ -1421,6 +1429,9 @@ static int32_t physiTableScanNodeToJson(const void* pObj, SJson* pJson) {
if (TSDB_CODE_SUCCESS == code) {
code = tjsonAddDoubleToObject(pJson, jkTableScanPhysiPlanFilesFactor, pNode->filesFactor);
}
if (TSDB_CODE_SUCCESS == code) {
code = nodeListToJson(pJson, jkTableScanPhysiPlanPartitionTags, pNode->pPartitionTags);
}
return code;
}
@ -1446,30 +1457,24 @@ static int32_t jsonToPhysiTableScanNode(const SJson* pJson, void* pObj) {
}
if (TSDB_CODE_SUCCESS == code) {
tjsonGetNumberValue(pJson, jkTableScanPhysiPlanDataRequired, pNode->dataRequired, code);
;
}
if (TSDB_CODE_SUCCESS == code) {
code = jsonToNodeList(pJson, jkTableScanPhysiPlanDynamicScanFuncs, &pNode->pDynamicScanFuncs);
}
if (TSDB_CODE_SUCCESS == code) {
tjsonGetNumberValue(pJson, jkTableScanPhysiPlanInterval, pNode->interval, code);
;
}
if (TSDB_CODE_SUCCESS == code) {
tjsonGetNumberValue(pJson, jkTableScanPhysiPlanOffset, pNode->offset, code);
;
}
if (TSDB_CODE_SUCCESS == code) {
tjsonGetNumberValue(pJson, jkTableScanPhysiPlanSliding, pNode->sliding, code);
;
}
if (TSDB_CODE_SUCCESS == code) {
tjsonGetNumberValue(pJson, jkTableScanPhysiPlanIntervalUnit, pNode->intervalUnit, code);
;
}
if (TSDB_CODE_SUCCESS == code) {
tjsonGetNumberValue(pJson, jkTableScanPhysiPlanSlidingUnit, pNode->slidingUnit, code);
;
}
if (TSDB_CODE_SUCCESS == code) {
tjsonGetNumberValue(pJson, jkTableScanPhysiPlanTriggerType, pNode->triggerType, code);
@ -1483,6 +1488,10 @@ static int32_t jsonToPhysiTableScanNode(const SJson* pJson, void* pObj) {
if (TSDB_CODE_SUCCESS == code) {
code = tjsonGetDoubleValue(pJson, jkTableScanPhysiPlanFilesFactor, &pNode->filesFactor);
}
if (TSDB_CODE_SUCCESS == code) {
code = jsonToNodeList(pJson, jkTableScanPhysiPlanPartitionTags, &pNode->pPartitionTags);
}
return code;
}

View File

@ -109,8 +109,7 @@ static bool osdMayBeOptimized(SLogicNode* pNode) {
return false;
}
if (QUERY_NODE_LOGIC_PLAN_WINDOW == nodeType(pNode->pParent) ||
(QUERY_NODE_LOGIC_PLAN_PARTITION == nodeType(pNode->pParent) &&
pNode->pParent->pParent &&
(QUERY_NODE_LOGIC_PLAN_PARTITION == nodeType(pNode->pParent) && pNode->pParent->pParent &&
QUERY_NODE_LOGIC_PLAN_WINDOW == nodeType(pNode->pParent->pParent))) {
return true;
}
@ -222,8 +221,7 @@ static int32_t osdGetDataRequired(SNodeList* pFuncs) {
static void setScanWindowInfo(SScanLogicNode* pScan) {
SLogicNode* pParent = pScan->node.pParent;
if (QUERY_NODE_LOGIC_PLAN_PARTITION == nodeType(pParent) &&
pParent->pParent &&
if (QUERY_NODE_LOGIC_PLAN_PARTITION == nodeType(pParent) && pParent->pParent &&
QUERY_NODE_LOGIC_PLAN_WINDOW == nodeType(pParent->pParent)) {
pParent = pParent->pParent;
}
@ -1041,12 +1039,55 @@ static int32_t smaOptimize(SOptimizeContext* pCxt, SLogicSubplan* pLogicSubplan)
return smaOptimizeImpl(pCxt, pLogicSubplan, pScan);
}
static EDealRes partTagsOptHasColImpl(SNode* pNode, void* pContext) {
if (QUERY_NODE_COLUMN == nodeType(pNode)) {
if (COLUMN_TYPE_TAG != ((SColumnNode*)pNode)->colType) {
*(bool*)pContext = true;
return DEAL_RES_END;
}
}
return DEAL_RES_CONTINUE;
}
static bool partTagsOptHasCol(SNodeList* pPartKeys) {
bool hasCol = false;
nodesWalkExprs(pPartKeys, partTagsOptHasColImpl, &hasCol);
return hasCol;
}
static bool partTagsOptMayBeOptimized(SLogicNode* pNode) {
if (QUERY_NODE_LOGIC_PLAN_PARTITION != nodeType(pNode) || 1 != LIST_LENGTH(pNode->pChildren) ||
QUERY_NODE_LOGIC_PLAN_SCAN != nodeType(nodesListGetNode(pNode->pChildren, 0))) {
return false;
}
return !partTagsOptHasCol(((SPartitionLogicNode*)pNode)->pPartitionKeys);
}
static int32_t partTagsOptimize(SOptimizeContext* pCxt, SLogicSubplan* pLogicSubplan) {
SPartitionLogicNode* pPart =
(SPartitionLogicNode*)optFindPossibleNode(pLogicSubplan->pNode, partTagsOptMayBeOptimized);
if (NULL == pPart) {
return TSDB_CODE_SUCCESS;
}
SScanLogicNode* pScan = (SScanLogicNode*)nodesListGetNode(pPart->node.pChildren, 0);
TSWAP(pPart->pPartitionKeys, pScan->pPartTags);
int32_t code = replaceLogicNode(pLogicSubplan, (SLogicNode*)pPart, (SLogicNode*)pScan);
if (TSDB_CODE_SUCCESS == code) {
NODES_CLEAR_LIST(pPart->node.pChildren);
nodesDestroyNode((SNode*)pPart);
}
return code;
}
// clang-format off
static const SOptimizeRule optimizeRuleSet[] = {
{.pName = "OptimizeScanData", .optimizeFunc = osdOptimize},
{.pName = "ConditionPushDown", .optimizeFunc = cpdOptimize},
{.pName = "OrderByPrimaryKey", .optimizeFunc = opkOptimize},
{.pName = "SmaIndex", .optimizeFunc = smaOptimize}
{.pName = "SmaIndex", .optimizeFunc = smaOptimize},
{.pName = "PartitionByTags", .optimizeFunc = partTagsOptimize}
};
// clang-format on

View File

@ -500,7 +500,9 @@ static int32_t createTableScanPhysiNode(SPhysiPlanContext* pCxt, SSubplan* pSubp
tNameGetFullDbName(&pScanLogicNode->tableName, pSubplan->dbFName);
pTableScan->dataRequired = pScanLogicNode->dataRequired;
pTableScan->pDynamicScanFuncs = nodesCloneList(pScanLogicNode->pDynamicScanFuncs);
if (NULL != pScanLogicNode->pDynamicScanFuncs && NULL == pTableScan->pDynamicScanFuncs) {
pTableScan->pPartitionTags = nodesCloneList(pScanLogicNode->pPartTags);
if ((NULL != pScanLogicNode->pDynamicScanFuncs && NULL == pTableScan->pDynamicScanFuncs) ||
(NULL != pScanLogicNode->pPartTags && NULL == pTableScan->pPartitionTags)) {
nodesDestroyNode((SNode*)pTableScan);
return TSDB_CODE_OUT_OF_MEMORY;
}