From 8b6d7db7adc727f69d380a7eae108ba82c7fcb9a Mon Sep 17 00:00:00 2001 From: shenglian zhou Date: Sun, 16 Jul 2023 17:54:39 +0800 Subject: [PATCH 01/23] enhance: use one tsdb reader to read table sequentially --- source/libs/executor/inc/executorInt.h | 2 +- source/libs/executor/inc/tsort.h | 1 + source/libs/executor/src/scanoperator.c | 84 +++++-------------------- source/libs/executor/src/tsort.c | 84 +++++++++++++++++++++++++ 4 files changed, 102 insertions(+), 69 deletions(-) diff --git a/source/libs/executor/inc/executorInt.h b/source/libs/executor/inc/executorInt.h index b3d0ff8225..3b3f2cbb8a 100644 --- a/source/libs/executor/inc/executorInt.h +++ b/source/libs/executor/inc/executorInt.h @@ -232,7 +232,6 @@ typedef struct STableMergeScanInfo { int32_t tableEndIndex; bool hasGroupId; uint64_t groupId; - SArray* queryConds; // array of queryTableDataCond STableScanBase base; int32_t bufPageSize; uint32_t sortBufSize; // max buffer size for in-memory sort @@ -245,6 +244,7 @@ typedef struct STableMergeScanInfo { int64_t numOfRows; SScanInfo scanInfo; int32_t scanTimes; + int32_t readIdx; SSDataBlock* pResBlock; SSampleExecInfo sample; // sample execution info SSortExecInfo sortExecInfo; diff --git a/source/libs/executor/inc/tsort.h b/source/libs/executor/inc/tsort.h index 627aa825c6..a2cb1234ff 100644 --- a/source/libs/executor/inc/tsort.h +++ b/source/libs/executor/inc/tsort.h @@ -26,6 +26,7 @@ extern "C" { enum { SORT_MULTISOURCE_MERGE = 0x1, SORT_SINGLESOURCE_SORT = 0x2, + SORT_TABLE_MERGE_SCAN = 0x3 }; typedef struct SMultiMergeSource { diff --git a/source/libs/executor/src/scanoperator.c b/source/libs/executor/src/scanoperator.c index 9abe4ffef6..cf1e93d3f2 100644 --- a/source/libs/executor/src/scanoperator.c +++ b/source/libs/executor/src/scanoperator.c @@ -13,8 +13,6 @@ * along with this program. If not, see . */ -// clang-format off - #include "executorInt.h" #include "filter.h" #include "function.h" @@ -56,7 +54,6 @@ typedef struct STableMergeScanSortSourceParam { int32_t readerIdx; uint64_t uid; SSDataBlock* inputBlock; - STsdbReader* dataReader; } STableMergeScanSortSourceParam; typedef struct STableCountScanOperatorInfo { @@ -2741,28 +2738,28 @@ static SSDataBlock* getTableDataBlockImpl(void* param) { SSDataBlock* pBlock = source->inputBlock; int32_t code = 0; - SQueryTableDataCond* pQueryCond = taosArrayGet(pInfo->queryConds, readIdx); - int64_t st = taosGetTimestampUs(); void* p = tableListGetInfo(pInfo->base.pTableListInfo, readIdx + pInfo->tableStartIndex); SReadHandle* pHandle = &pInfo->base.readHandle; - if (NULL == source->dataReader) { - code = pAPI->tsdReader.tsdReaderOpen(pHandle->vnode, pQueryCond, p, 1, pBlock, (void**)&source->dataReader, GET_TASKID(pTaskInfo), false, NULL); + + bool hasNext = false; + + if (NULL == pInfo->base.dataReader) { + code = pAPI->tsdReader.tsdReaderOpen(pHandle->vnode, &pInfo->base.cond, p, 1, pBlock, (void**)&pInfo->base.dataReader, GET_TASKID(pTaskInfo), false, NULL); if (code != 0) { T_LONG_JMP(pTaskInfo->env, code); } + pInfo->readIdx = readIdx + pInfo->tableStartIndex ; + } else if (pInfo->readIdx != readIdx + pInfo->tableStartIndex) { + pAPI->tsdReader.tsdSetQueryTableList(pInfo->base.dataReader, p, 1); + pAPI->tsdReader.tsdReaderResetStatus(pInfo->base.dataReader, &pInfo->base.cond); } - pInfo->base.dataReader = source->dataReader; STsdbReader* reader = pInfo->base.dataReader; - bool hasNext = false; - qTrace("tsdb/read-table-data: %p, enter next reader", reader); - while (true) { code = pAPI->tsdReader.tsdNextDataBlock(reader, &hasNext); if (code != 0) { pAPI->tsdReader.tsdReaderReleaseDataBlock(reader); - pInfo->base.dataReader = NULL; T_LONG_JMP(pTaskInfo->env, code); } @@ -2772,7 +2769,6 @@ static SSDataBlock* getTableDataBlockImpl(void* param) { if (isTaskKilled(pTaskInfo)) { pAPI->tsdReader.tsdReaderReleaseDataBlock(reader); - pInfo->base.dataReader = NULL; T_LONG_JMP(pTaskInfo->env, pTaskInfo->code); } @@ -2782,12 +2778,6 @@ static SSDataBlock* getTableDataBlockImpl(void* param) { continue; } - if (pQueryCond->order == TSDB_ORDER_ASC) { - pQueryCond->twindows.skey = pBlock->info.window.ekey + 1; - } else { - pQueryCond->twindows.ekey = pBlock->info.window.skey - 1; - } - uint32_t status = 0; code = loadDataBlock(pOperator, &pInfo->base, pBlock, &status); // code = loadDataBlockFromOneTable(pOperator, pTableScanInfo, pBlock, &status); @@ -2809,14 +2799,9 @@ static SSDataBlock* getTableDataBlockImpl(void* param) { pOperator->resultInfo.totalRows += pBlock->info.rows; pInfo->base.readRecorder.elapsedTime += (taosGetTimestampUs() - st) / 1000.0; - qTrace("tsdb/read-table-data: %p, close reader", reader); - pInfo->base.dataReader = NULL; return pBlock; } - pAPI->tsdReader.tsdReaderClose(source->dataReader); - source->dataReader = NULL; - pInfo->base.dataReader = NULL; blockDataDestroy(source->inputBlock); source->inputBlock = NULL; return NULL; @@ -2870,32 +2855,18 @@ int32_t startGroupTableMergeScan(SOperatorInfo* pOperator) { int32_t tableStartIdx = pInfo->tableStartIndex; int32_t tableEndIdx = pInfo->tableEndIndex; - pInfo->base.dataReader = NULL; - // todo the total available buffer should be determined by total capacity of buffer of this task. // the additional one is reserved for merge result - // pInfo->sortBufSize = pInfo->bufPageSize * (tableEndIdx - tableStartIdx + 1 + 1); - int32_t kWay = (TSDB_MAX_BYTES_PER_ROW * 2) / (pInfo->pResBlock->info.rowSize); - if (kWay >= 128) { - kWay = 128; - } else if (kWay <= 2) { - kWay = 2; - } else { - int i = 2; - while (i * 2 <= kWay) i = i * 2; - kWay = i; - } + pInfo->sortBufSize = pInfo->bufPageSize * (tableEndIdx - tableStartIdx + 1 + 1); - pInfo->sortBufSize = pInfo->bufPageSize * (kWay + 1); int32_t numOfBufPage = pInfo->sortBufSize / pInfo->bufPageSize; - pInfo->pSortHandle = tsortCreateSortHandle(pInfo->pSortInfo, SORT_MULTISOURCE_MERGE, pInfo->bufPageSize, numOfBufPage, + pInfo->pSortHandle = tsortCreateSortHandle(pInfo->pSortInfo, SORT_TABLE_MERGE_SCAN, pInfo->bufPageSize, numOfBufPage, pInfo->pSortInputBlock, pTaskInfo->id.str, 0, 0, 0); tsortSetFetchRawDataFp(pInfo->pSortHandle, getTableDataBlockImpl, NULL, NULL); // one table has one data block int32_t numOfTable = tableEndIdx - tableStartIdx + 1; - pInfo->queryConds = taosArrayInit(numOfTable, sizeof(SQueryTableDataCond)); for (int32_t i = 0; i < numOfTable; ++i) { STableMergeScanSortSourceParam param = {0}; @@ -2904,10 +2875,6 @@ int32_t startGroupTableMergeScan(SOperatorInfo* pOperator) { param.inputBlock = createOneDataBlock(pInfo->pResBlock, false); taosArrayPush(pInfo->sortSourceParams, ¶m); - - SQueryTableDataCond cond; - dumpQueryTableCond(&pInfo->base.cond, &cond); - taosArrayPush(pInfo->queryConds, &cond); } for (int32_t i = 0; i < numOfTable; ++i) { @@ -2932,8 +2899,6 @@ int32_t stopGroupTableMergeScan(SOperatorInfo* pOperator) { SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo; SStorageAPI* pAPI = &pTaskInfo->storageAPI; - int32_t numOfTable = taosArrayGetSize(pInfo->queryConds); - SSortExecInfo sortExecInfo = tsortGetSortExecInfo(pInfo->pSortHandle); pInfo->sortExecInfo.sortMethod = sortExecInfo.sortMethod; pInfo->sortExecInfo.sortBuffer = sortExecInfo.sortBuffer; @@ -2941,24 +2906,15 @@ int32_t stopGroupTableMergeScan(SOperatorInfo* pOperator) { pInfo->sortExecInfo.readBytes += sortExecInfo.readBytes; pInfo->sortExecInfo.writeBytes += sortExecInfo.writeBytes; - for (int32_t i = 0; i < numOfTable; ++i) { + for (int32_t i = 0; i < taosArrayGetSize(pInfo->sortSourceParams); ++i) { STableMergeScanSortSourceParam* param = taosArrayGet(pInfo->sortSourceParams, i); blockDataDestroy(param->inputBlock); - pAPI->tsdReader.tsdReaderClose(param->dataReader); - param->dataReader = NULL; } taosArrayClear(pInfo->sortSourceParams); tsortDestroySortHandle(pInfo->pSortHandle); pInfo->pSortHandle = NULL; - for (int32_t i = 0; i < taosArrayGetSize(pInfo->queryConds); i++) { - SQueryTableDataCond* cond = taosArrayGet(pInfo->queryConds, i); - taosMemoryFree(cond->colList); - } - taosArrayDestroy(pInfo->queryConds); - pInfo->queryConds = NULL; - resetLimitInfoForNextGroup(&pInfo->limitInfo); return TSDB_CODE_SUCCESS; } @@ -3056,13 +3012,11 @@ void destroyTableMergeScanOperatorInfo(void* param) { STableMergeScanInfo* pTableScanInfo = (STableMergeScanInfo*)param; cleanupQueryTableDataCond(&pTableScanInfo->base.cond); - int32_t numOfTable = taosArrayGetSize(pTableScanInfo->queryConds); + int32_t numOfTable = taosArrayGetSize(pTableScanInfo->sortSourceParams); for (int32_t i = 0; i < numOfTable; i++) { STableMergeScanSortSourceParam* p = taosArrayGet(pTableScanInfo->sortSourceParams, i); blockDataDestroy(p->inputBlock); - pTableScanInfo->base.readerAPI.tsdReaderClose(p->dataReader); - p->dataReader = NULL; } pTableScanInfo->base.readerAPI.tsdReaderClose(pTableScanInfo->base.dataReader); @@ -3072,12 +3026,6 @@ void destroyTableMergeScanOperatorInfo(void* param) { tsortDestroySortHandle(pTableScanInfo->pSortHandle); pTableScanInfo->pSortHandle = NULL; - for (int i = 0; i < taosArrayGetSize(pTableScanInfo->queryConds); i++) { - SQueryTableDataCond* pCond = taosArrayGet(pTableScanInfo->queryConds, i); - taosMemoryFree(pCond->colList); - } - - taosArrayDestroy(pTableScanInfo->queryConds); destroyTableScanBase(&pTableScanInfo->base, &pTableScanInfo->base.readerAPI); pTableScanInfo->pResBlock = blockDataDestroy(pTableScanInfo->pResBlock); @@ -3143,6 +3091,8 @@ SOperatorInfo* createTableMergeScanOperatorInfo(STableScanPhysiNode* pTableScanN pInfo->base.scanFlag = MAIN_SCAN; pInfo->base.readHandle = *readHandle; + pInfo->readIdx = -1; + pInfo->base.limitInfo.limit.limit = -1; pInfo->base.limitInfo.slimit.limit = -1; pInfo->base.pTableListInfo = pTableListInfo; @@ -3573,6 +3523,4 @@ static void destoryTableCountScanOperator(void* param) { taosArrayDestroy(pTableCountScanInfo->stbUidList); taosMemoryFreeClear(param); -} - -// clang-format on +} \ No newline at end of file diff --git a/source/libs/executor/src/tsort.c b/source/libs/executor/src/tsort.c index d26db6536f..c8266ea0d9 100644 --- a/source/libs/executor/src/tsort.c +++ b/source/libs/executor/src/tsort.c @@ -769,6 +769,61 @@ int32_t getProperSortPageSize(size_t rowSize, uint32_t numOfCols) { return pgSize; } +static int32_t createPageBuf(SSortHandle* pHandle) { + if (pHandle->pBuf == NULL) { + if (!osTempSpaceAvailable()) { + terrno = TSDB_CODE_NO_DISKSPACE; + qError("create page buf failed since %s, tempDir:%s", terrstr(), tsTempDir); + return terrno; + } + + int32_t code = createDiskbasedBuf(&pHandle->pBuf, pHandle->pageSize, pHandle->numOfPages * pHandle->pageSize, + "tableBlocksBuf", tsTempDir); + dBufSetPrintInfo(pHandle->pBuf); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + } + return 0; +} + +static int32_t addDataBlockToPageBuf(SSortHandle * pHandle, SSDataBlock* pDataBlock, SArray* aPgId) { + int32_t start = 0; + while (start < pDataBlock->info.rows) { + int32_t stop = 0; + blockDataSplitRows(pDataBlock, pDataBlock->info.hasVarCol, start, &stop, pHandle->pageSize); + SSDataBlock* p = blockDataExtractBlock(pDataBlock, start, stop - start + 1); + if (p == NULL) { + taosArrayDestroy(aPgId); + return terrno; + } + + int32_t pageId = -1; + void* pPage = getNewBufPage(pHandle->pBuf, &pageId); + if (pPage == NULL) { + taosArrayDestroy(aPgId); + blockDataDestroy(p); + return terrno; + } + + taosArrayPush(aPgId, &pageId); + + int32_t size = blockDataGetSize(p) + sizeof(int32_t) + taosArrayGetSize(p->pDataBlock) * sizeof(int32_t); + ASSERT(size <= getBufPageSize(pHandle->pBuf)); + + blockDataToBuf(pPage, p); + + setBufPageDirty(pPage, true); + releaseBufPage(pHandle->pBuf, pPage); + + blockDataDestroy(p); + start = stop + 1; + } + + blockDataCleanup(pDataBlock); + return 0; +} + static int32_t createInitialSources(SSortHandle* pHandle) { size_t sortBufSize = pHandle->numOfPages * pHandle->pageSize; int32_t code = 0; @@ -875,6 +930,35 @@ static int32_t createInitialSources(SSortHandle* pHandle) { code = doAddToBuf(pHandle->pDataBlock, pHandle); } } + } else if (pHandle->type == SORT_TABLE_MERGE_SCAN) { + size_t nSrc = taosArrayGetSize(pHandle->pOrderedSource); + SArray* aExtSrc = taosArrayInit(nSrc, POINTER_BYTES); + + // pHandle->numOfPages = 1024; //todo check sortbufsize + createPageBuf(pHandle); + + for (int i = 0; i < nSrc; ++i) { + SArray* aPgId = taosArrayInit(8, sizeof(int32_t)); + + SSortSource* pSrc = taosArrayGetP(pHandle->pOrderedSource, i); + SSDataBlock* pBlk = pHandle->fetchfp(pSrc->param); + while (pBlk != NULL) { + addDataBlockToPageBuf(pHandle, pBlk, aPgId); + pBlk = pHandle->fetchfp(pSrc->param); + } + SSDataBlock* pBlock = createOneDataBlock(pHandle->pDataBlock, false); + code = doAddNewExternalMemSource(pHandle->pBuf, aExtSrc, pBlock, &pHandle->sourceId, aPgId); + if (code != TSDB_CODE_SUCCESS) { + taosArrayDestroy(aExtSrc); + return code; + } + } + tsortClearOrderdSource(pHandle->pOrderedSource, NULL, NULL); + taosArrayAddAll(pHandle->pOrderedSource, aExtSrc); + taosArrayDestroy(aExtSrc); + + pHandle->type = SORT_SINGLESOURCE_SORT; + } return code; From f93af4d2e0f9d39d45ad196e526575c3879d29d0 Mon Sep 17 00:00:00 2001 From: slzhou Date: Sun, 16 Jul 2023 20:28:54 +0800 Subject: [PATCH 02/23] enhance: pass simple test --- source/libs/executor/inc/executorInt.h | 1 + source/libs/executor/src/scanoperator.c | 19 +++++-------------- 2 files changed, 6 insertions(+), 14 deletions(-) diff --git a/source/libs/executor/inc/executorInt.h b/source/libs/executor/inc/executorInt.h index 3b3f2cbb8a..6aa93a9c5e 100644 --- a/source/libs/executor/inc/executorInt.h +++ b/source/libs/executor/inc/executorInt.h @@ -238,6 +238,7 @@ typedef struct STableMergeScanInfo { SArray* pSortInfo; SSortHandle* pSortHandle; SSDataBlock* pSortInputBlock; + SSDataBlock* pReaderBlock; int64_t startTs; // sort start time SArray* sortSourceParams; SLimitInfo limitInfo; diff --git a/source/libs/executor/src/scanoperator.c b/source/libs/executor/src/scanoperator.c index cf1e93d3f2..93addef91d 100644 --- a/source/libs/executor/src/scanoperator.c +++ b/source/libs/executor/src/scanoperator.c @@ -53,7 +53,6 @@ typedef struct STableMergeScanSortSourceParam { SOperatorInfo* pOperator; int32_t readerIdx; uint64_t uid; - SSDataBlock* inputBlock; } STableMergeScanSortSourceParam; typedef struct STableCountScanOperatorInfo { @@ -2735,7 +2734,7 @@ static SSDataBlock* getTableDataBlockImpl(void* param) { SStorageAPI* pAPI = &pTaskInfo->storageAPI; int32_t readIdx = source->readerIdx; - SSDataBlock* pBlock = source->inputBlock; + SSDataBlock* pBlock = pInfo->pReaderBlock; int32_t code = 0; int64_t st = taosGetTimestampUs(); @@ -2753,6 +2752,7 @@ static SSDataBlock* getTableDataBlockImpl(void* param) { } else if (pInfo->readIdx != readIdx + pInfo->tableStartIndex) { pAPI->tsdReader.tsdSetQueryTableList(pInfo->base.dataReader, p, 1); pAPI->tsdReader.tsdReaderResetStatus(pInfo->base.dataReader, &pInfo->base.cond); + pInfo->readIdx = readIdx + pInfo->tableStartIndex ; } STsdbReader* reader = pInfo->base.dataReader; @@ -2802,8 +2802,6 @@ static SSDataBlock* getTableDataBlockImpl(void* param) { return pBlock; } - blockDataDestroy(source->inputBlock); - source->inputBlock = NULL; return NULL; } @@ -2872,7 +2870,6 @@ int32_t startGroupTableMergeScan(SOperatorInfo* pOperator) { STableMergeScanSortSourceParam param = {0}; param.readerIdx = i; param.pOperator = pOperator; - param.inputBlock = createOneDataBlock(pInfo->pResBlock, false); taosArrayPush(pInfo->sortSourceParams, ¶m); } @@ -2906,10 +2903,6 @@ int32_t stopGroupTableMergeScan(SOperatorInfo* pOperator) { pInfo->sortExecInfo.readBytes += sortExecInfo.readBytes; pInfo->sortExecInfo.writeBytes += sortExecInfo.writeBytes; - for (int32_t i = 0; i < taosArrayGetSize(pInfo->sortSourceParams); ++i) { - STableMergeScanSortSourceParam* param = taosArrayGet(pInfo->sortSourceParams, i); - blockDataDestroy(param->inputBlock); - } taosArrayClear(pInfo->sortSourceParams); tsortDestroySortHandle(pInfo->pSortHandle); @@ -3014,11 +3007,6 @@ void destroyTableMergeScanOperatorInfo(void* param) { int32_t numOfTable = taosArrayGetSize(pTableScanInfo->sortSourceParams); - for (int32_t i = 0; i < numOfTable; i++) { - STableMergeScanSortSourceParam* p = taosArrayGet(pTableScanInfo->sortSourceParams, i); - blockDataDestroy(p->inputBlock); - } - pTableScanInfo->base.readerAPI.tsdReaderClose(pTableScanInfo->base.dataReader); pTableScanInfo->base.dataReader = NULL; @@ -3030,6 +3018,7 @@ void destroyTableMergeScanOperatorInfo(void* param) { pTableScanInfo->pResBlock = blockDataDestroy(pTableScanInfo->pResBlock); pTableScanInfo->pSortInputBlock = blockDataDestroy(pTableScanInfo->pSortInputBlock); + pTableScanInfo->pReaderBlock = blockDataDestroy(pTableScanInfo->pReaderBlock); taosArrayDestroy(pTableScanInfo->pSortInfo); taosMemoryFreeClear(param); @@ -3115,6 +3104,8 @@ SOperatorInfo* createTableMergeScanOperatorInfo(STableScanPhysiNode* pTableScanN pInfo->pSortInputBlock = createOneDataBlock(pInfo->pResBlock, false); initLimitInfo(pTableScanNode->scan.node.pLimit, pTableScanNode->scan.node.pSlimit, &pInfo->limitInfo); + pInfo->pReaderBlock = createOneDataBlock(pInfo->pResBlock, false); + int32_t rowSize = pInfo->pResBlock->info.rowSize; uint32_t nCols = taosArrayGetSize(pInfo->pResBlock->pDataBlock); pInfo->bufPageSize = getProperSortPageSize(rowSize, nCols); From 9a0e9df5678774ec586e6c7a5e367a331f55aad0 Mon Sep 17 00:00:00 2001 From: shenglian zhou Date: Sun, 16 Jul 2023 23:39:41 +0800 Subject: [PATCH 03/23] enhance: block is ts sorted and each book is a source --- source/libs/executor/src/scanoperator.c | 46 ++++++++----------------- source/libs/executor/src/tsort.c | 22 ++++++------ 2 files changed, 25 insertions(+), 43 deletions(-) diff --git a/source/libs/executor/src/scanoperator.c b/source/libs/executor/src/scanoperator.c index 93addef91d..7babf68e56 100644 --- a/source/libs/executor/src/scanoperator.c +++ b/source/libs/executor/src/scanoperator.c @@ -53,6 +53,7 @@ typedef struct STableMergeScanSortSourceParam { SOperatorInfo* pOperator; int32_t readerIdx; uint64_t uid; + STsdbReader* reader; } STableMergeScanSortSourceParam; typedef struct STableCountScanOperatorInfo { @@ -2733,28 +2734,12 @@ static SSDataBlock* getTableDataBlockImpl(void* param) { SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo; SStorageAPI* pAPI = &pTaskInfo->storageAPI; - int32_t readIdx = source->readerIdx; SSDataBlock* pBlock = pInfo->pReaderBlock; int32_t code = 0; int64_t st = taosGetTimestampUs(); - void* p = tableListGetInfo(pInfo->base.pTableListInfo, readIdx + pInfo->tableStartIndex); - SReadHandle* pHandle = &pInfo->base.readHandle; - bool hasNext = false; - if (NULL == pInfo->base.dataReader) { - code = pAPI->tsdReader.tsdReaderOpen(pHandle->vnode, &pInfo->base.cond, p, 1, pBlock, (void**)&pInfo->base.dataReader, GET_TASKID(pTaskInfo), false, NULL); - if (code != 0) { - T_LONG_JMP(pTaskInfo->env, code); - } - pInfo->readIdx = readIdx + pInfo->tableStartIndex ; - } else if (pInfo->readIdx != readIdx + pInfo->tableStartIndex) { - pAPI->tsdReader.tsdSetQueryTableList(pInfo->base.dataReader, p, 1); - pAPI->tsdReader.tsdReaderResetStatus(pInfo->base.dataReader, &pInfo->base.cond); - pInfo->readIdx = readIdx + pInfo->tableStartIndex ; - } - STsdbReader* reader = pInfo->base.dataReader; while (true) { code = pAPI->tsdReader.tsdNextDataBlock(reader, &hasNext); @@ -2837,6 +2822,8 @@ int32_t dumpQueryTableCond(const SQueryTableDataCond* src, SQueryTableDataCond* int32_t startGroupTableMergeScan(SOperatorInfo* pOperator) { STableMergeScanInfo* pInfo = pOperator->info; SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo; + SReadHandle* pHandle = &pInfo->base.readHandle; + SStorageAPI* pAPI = &pTaskInfo->storageAPI; { size_t numOfTables = tableListGetSize(pInfo->base.pTableListInfo); @@ -2866,21 +2853,15 @@ int32_t startGroupTableMergeScan(SOperatorInfo* pOperator) { // one table has one data block int32_t numOfTable = tableEndIdx - tableStartIdx + 1; - for (int32_t i = 0; i < numOfTable; ++i) { - STableMergeScanSortSourceParam param = {0}; - param.readerIdx = i; - param.pOperator = pOperator; + STableMergeScanSortSourceParam param = {0}; + param.pOperator = pOperator; + STableKeyInfo* startKeyInfo = tableListGetInfo(pInfo->base.pTableListInfo, tableStartIdx); + pAPI->tsdReader.tsdReaderOpen(pHandle->vnode, &pInfo->base.cond, startKeyInfo, numOfTable, pInfo->pReaderBlock, (void**)&pInfo->base.dataReader, GET_TASKID(pTaskInfo), false, NULL); - taosArrayPush(pInfo->sortSourceParams, ¶m); - } - - for (int32_t i = 0; i < numOfTable; ++i) { - SSortSource* ps = taosMemoryCalloc(1, sizeof(SSortSource)); - STableMergeScanSortSourceParam* param = taosArrayGet(pInfo->sortSourceParams, i); - ps->param = param; - ps->onlyRef = true; - tsortAddSource(pInfo->pSortHandle, ps); - } + SSortSource* ps = taosMemoryCalloc(1, sizeof(SSortSource)); + ps->param = ¶m; + ps->onlyRef = true; + tsortAddSource(pInfo->pSortHandle, ps); int32_t code = tsortOpen(pInfo->pSortHandle); @@ -2903,7 +2884,10 @@ int32_t stopGroupTableMergeScan(SOperatorInfo* pOperator) { pInfo->sortExecInfo.readBytes += sortExecInfo.readBytes; pInfo->sortExecInfo.writeBytes += sortExecInfo.writeBytes; - taosArrayClear(pInfo->sortSourceParams); + if (pInfo->base.dataReader != NULL) { + pAPI->tsdReader.tsdReaderClose(pInfo->base.dataReader); + pInfo->base.dataReader = NULL; + } tsortDestroySortHandle(pInfo->pSortHandle); pInfo->pSortHandle = NULL; diff --git a/source/libs/executor/src/tsort.c b/source/libs/executor/src/tsort.c index c8266ea0d9..c262153464 100644 --- a/source/libs/executor/src/tsort.c +++ b/source/libs/executor/src/tsort.c @@ -937,22 +937,20 @@ static int32_t createInitialSources(SSortHandle* pHandle) { // pHandle->numOfPages = 1024; //todo check sortbufsize createPageBuf(pHandle); - for (int i = 0; i < nSrc; ++i) { + SSortSource* pSrc = taosArrayGetP(pHandle->pOrderedSource, 0); + SSDataBlock* pBlk = pHandle->fetchfp(pSrc->param); + while (pBlk != NULL) { SArray* aPgId = taosArrayInit(8, sizeof(int32_t)); - - SSortSource* pSrc = taosArrayGetP(pHandle->pOrderedSource, i); - SSDataBlock* pBlk = pHandle->fetchfp(pSrc->param); - while (pBlk != NULL) { - addDataBlockToPageBuf(pHandle, pBlk, aPgId); - pBlk = pHandle->fetchfp(pSrc->param); - } - SSDataBlock* pBlock = createOneDataBlock(pHandle->pDataBlock, false); - code = doAddNewExternalMemSource(pHandle->pBuf, aExtSrc, pBlock, &pHandle->sourceId, aPgId); + addDataBlockToPageBuf(pHandle, pBlk, aPgId); + SSDataBlock* pMemSrcBlk = createOneDataBlock(pHandle->pDataBlock, false); + code = doAddNewExternalMemSource(pHandle->pBuf, aExtSrc, pMemSrcBlk, &pHandle->sourceId, aPgId); if (code != TSDB_CODE_SUCCESS) { - taosArrayDestroy(aExtSrc); - return code; + taosArrayDestroy(aExtSrc); + return code; } + pBlk = pHandle->fetchfp(pSrc->param); } + tsortClearOrderdSource(pHandle->pOrderedSource, NULL, NULL); taosArrayAddAll(pHandle->pOrderedSource, aExtSrc); taosArrayDestroy(aExtSrc); From 655233fd4fe8541a1af30c4ea24634307f7124c9 Mon Sep 17 00:00:00 2001 From: slzhou Date: Mon, 17 Jul 2023 15:05:30 +0800 Subject: [PATCH 04/23] fix: create initial source with blocks --- source/libs/executor/src/tsort.c | 172 +++++++++++++++++++++++-------- 1 file changed, 129 insertions(+), 43 deletions(-) diff --git a/source/libs/executor/src/tsort.c b/source/libs/executor/src/tsort.c index c262153464..e710b619b8 100644 --- a/source/libs/executor/src/tsort.c +++ b/source/libs/executor/src/tsort.c @@ -787,40 +787,106 @@ static int32_t createPageBuf(SSortHandle* pHandle) { return 0; } -static int32_t addDataBlockToPageBuf(SSortHandle * pHandle, SSDataBlock* pDataBlock, SArray* aPgId) { - int32_t start = 0; - while (start < pDataBlock->info.rows) { - int32_t stop = 0; - blockDataSplitRows(pDataBlock, pDataBlock->info.hasVarCol, start, &stop, pHandle->pageSize); - SSDataBlock* p = blockDataExtractBlock(pDataBlock, start, stop - start + 1); - if (p == NULL) { - taosArrayDestroy(aPgId); - return terrno; - } +typedef struct SBlkMergeSupport { + int64_t** aTs; + int32_t* aRowIdx; + int32_t order; +} SBlkMergeSupport; - int32_t pageId = -1; - void* pPage = getNewBufPage(pHandle->pBuf, &pageId); - if (pPage == NULL) { - taosArrayDestroy(aPgId); - blockDataDestroy(p); - return terrno; - } +static int32_t blockCompareTsFn(const void* pLeft, const void* pRight, void* param) { + int32_t left = *(int32_t*)pLeft; + int32_t right = *(int32_t*)pRight; - taosArrayPush(aPgId, &pageId); - - int32_t size = blockDataGetSize(p) + sizeof(int32_t) + taosArrayGetSize(p->pDataBlock) * sizeof(int32_t); - ASSERT(size <= getBufPageSize(pHandle->pBuf)); - - blockDataToBuf(pPage, p); - - setBufPageDirty(pPage, true); - releaseBufPage(pHandle->pBuf, pPage); - - blockDataDestroy(p); - start = stop + 1; + SBlkMergeSupport* pSup = (SBlkMergeSupport*)param; + if (pSup->aRowIdx[left] == -1) { + return 1; + } else if (pSup->aRowIdx[right] == -1) { + return -1; } - blockDataCleanup(pDataBlock); + int64_t leftTs = pSup->aTs[left][pSup->aRowIdx[left]]; + int64_t rightTs = pSup->aTs[right][pSup->aRowIdx[right]]; + + int32_t ret = leftTs>rightTs ? 1 : ((leftTs < rightTs) ? -1 : 0); + if (pSup->order == TSDB_ORDER_DESC) { + ret = -1 * ret; + } + return ret; +} + +static int32_t appendDataBlockToPageBuf(SSortHandle* pHandle, SSDataBlock* blk, SArray* aPgId) { + int32_t pageId = -1; + void* pPage = getNewBufPage(pHandle->pBuf, &pageId); + taosArrayPush(aPgId, &pageId); + blockDataToBuf(pPage, blk); + + setBufPageDirty(pPage, true); + releaseBufPage(pHandle->pBuf, pPage); + blockDataCleanup(blk); + + return 0; +} + + +static int32_t sortBlocksToExtSource(SSortHandle* pHandle, SArray* aBlk, SBlockOrderInfo* order, SArray* aExtSrc) { + int32_t rowCap = blockDataGetCapacityInRow(pHandle->pDataBlock, pHandle->pageSize, + blockDataGetSerialMetaSize(taosArrayGetSize(pHandle->pDataBlock->pDataBlock))); + blockDataEnsureCapacity(pHandle->pDataBlock, rowCap); + blockDataCleanup(pHandle->pDataBlock); + + int32_t numBlks = taosArrayGetSize(aBlk); + + SBlkMergeSupport sup; + sup.aRowIdx = taosMemoryCalloc(numBlks, sizeof(int32_t)); + sup.aTs = taosMemoryCalloc(numBlks, sizeof(int64_t*)); + sup.order = order->order; + for (int i = 0; i < numBlks; ++i) { + SSDataBlock* blk = taosArrayGetP(aBlk, i); + SColumnInfoData* col = taosArrayGet(blk->pDataBlock, order->slotId); + sup.aTs[i] = (int64_t*)col->pData; + sup.aRowIdx[i] = 0; + } + + int32_t totalRows = 0; + for (int i = 0; i < numBlks; ++i) { + SSDataBlock* blk = taosArrayGetP(aBlk, i); + totalRows += blk->info.rows; + } + + SArray* aPgId = taosArrayInit(8, sizeof(int32_t)); + + SMultiwayMergeTreeInfo* pTree = NULL; + tMergeTreeCreate(&pTree, taosArrayGetSize(aBlk), &sup, blockCompareTsFn); + int32_t numEnded = 0; + int32_t nRows = 0; + while (nRows < totalRows) { + int32_t minIdx = tMergeTreeGetChosenIndex(pTree); + SSDataBlock* minBlk = taosArrayGetP(aBlk, minIdx); + int32_t minRow = sup.aRowIdx[minIdx]; + appendOneRowToDataBlock(pHandle->pDataBlock, minBlk, &minRow); + ++nRows; + if (pHandle->pDataBlock->info.rows >= rowCap) { + appendDataBlockToPageBuf(pHandle, pHandle->pDataBlock, aPgId); + } + if (sup.aRowIdx[minIdx] == minBlk->info.rows - 1) { + sup.aRowIdx[minIdx] = -1; + ++numEnded; + } else { + ++sup.aRowIdx[minIdx]; + } + tMergeTreeAdjust(pTree, tMergeTreeGetAdjustIndex(pTree)); + } + if (pHandle->pDataBlock->info.rows > 0) { + appendDataBlockToPageBuf(pHandle, pHandle->pDataBlock, aPgId); + } + SSDataBlock* pMemSrcBlk = createOneDataBlock(pHandle->pDataBlock, false); + doAddNewExternalMemSource(pHandle->pBuf, aExtSrc, pMemSrcBlk, &pHandle->sourceId, aPgId); + + taosMemoryFree(sup.aRowIdx); + taosMemoryFree(sup.aTs); + + tMergeTreeDestroy(&pTree); + return 0; } @@ -931,26 +997,46 @@ static int32_t createInitialSources(SSortHandle* pHandle) { } } } else if (pHandle->type == SORT_TABLE_MERGE_SCAN) { + SBlockOrderInfo* pOrder = taosArrayGet(pHandle->pSortInfo, 0); size_t nSrc = taosArrayGetSize(pHandle->pOrderedSource); SArray* aExtSrc = taosArrayInit(nSrc, POINTER_BYTES); - // pHandle->numOfPages = 1024; //todo check sortbufsize + pHandle->numOfPages = 1024; //todo check sortbufsize + size_t maxBufSize = pHandle->numOfPages * pHandle->pageSize; createPageBuf(pHandle); SSortSource* pSrc = taosArrayGetP(pHandle->pOrderedSource, 0); - SSDataBlock* pBlk = pHandle->fetchfp(pSrc->param); - while (pBlk != NULL) { - SArray* aPgId = taosArrayInit(8, sizeof(int32_t)); - addDataBlockToPageBuf(pHandle, pBlk, aPgId); - SSDataBlock* pMemSrcBlk = createOneDataBlock(pHandle->pDataBlock, false); - code = doAddNewExternalMemSource(pHandle->pBuf, aExtSrc, pMemSrcBlk, &pHandle->sourceId, aPgId); - if (code != TSDB_CODE_SUCCESS) { - taosArrayDestroy(aExtSrc); - return code; - } - pBlk = pHandle->fetchfp(pSrc->param); - } + int32_t szSort = 0; + SArray* aBlkSort = taosArrayInit(8, POINTER_BYTES); + while (1) { + SSDataBlock* pBlk = pHandle->fetchfp(pSrc->param); + if (pBlk == NULL) { + break; + }; + + szSort += blockDataGetSize(pBlk); + SSDataBlock* blk = createOneDataBlock(pBlk, true); + taosArrayPush(aBlkSort, &blk); + + if (szSort > maxBufSize) { + sortBlocksToExtSource(pHandle, aBlkSort, pOrder, aExtSrc); + for (int i = 0; i < taosArrayGetSize(aBlkSort); ++i) { + blockDataDestroy(taosArrayGetP(aBlkSort, i)); + } + taosArrayClear(aBlkSort); + szSort = 0; + } + } + if (szSort > 0) { + sortBlocksToExtSource(pHandle, aBlkSort, pOrder, aExtSrc); + for (int i = 0; i < taosArrayGetSize(aBlkSort); ++i) { + blockDataDestroy(taosArrayGetP(aBlkSort, i)); + } + taosArrayClear(aBlkSort); + szSort = 0; + } + taosArrayDestroy(aBlkSort); tsortClearOrderdSource(pHandle->pOrderedSource, NULL, NULL); taosArrayAddAll(pHandle->pOrderedSource, aExtSrc); taosArrayDestroy(aExtSrc); From 1b9754f1bc41dc74b6eed22816f5a3d23e509f90 Mon Sep 17 00:00:00 2001 From: slzhou Date: Mon, 17 Jul 2023 16:18:06 +0800 Subject: [PATCH 05/23] enhance: add log with info level. change to debug later --- source/libs/executor/src/scanoperator.c | 5 +---- source/libs/executor/src/tsort.c | 7 +++++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/source/libs/executor/src/scanoperator.c b/source/libs/executor/src/scanoperator.c index 7babf68e56..0c90c654f4 100644 --- a/source/libs/executor/src/scanoperator.c +++ b/source/libs/executor/src/scanoperator.c @@ -2840,10 +2840,7 @@ int32_t startGroupTableMergeScan(SOperatorInfo* pOperator) { int32_t tableStartIdx = pInfo->tableStartIndex; int32_t tableEndIdx = pInfo->tableEndIndex; - // todo the total available buffer should be determined by total capacity of buffer of this task. - // the additional one is reserved for merge result - pInfo->sortBufSize = pInfo->bufPageSize * (tableEndIdx - tableStartIdx + 1 + 1); - + pInfo->sortBufSize = 2048 * pInfo->bufPageSize; int32_t numOfBufPage = pInfo->sortBufSize / pInfo->bufPageSize; pInfo->pSortHandle = tsortCreateSortHandle(pInfo->pSortInfo, SORT_TABLE_MERGE_SCAN, pInfo->bufPageSize, numOfBufPage, pInfo->pSortInputBlock, pTaskInfo->id.str, 0, 0, 0); diff --git a/source/libs/executor/src/tsort.c b/source/libs/executor/src/tsort.c index e710b619b8..86e2477282 100644 --- a/source/libs/executor/src/tsort.c +++ b/source/libs/executor/src/tsort.c @@ -473,6 +473,7 @@ static int32_t adjustMergeTreeForNextTuple(SSortSource* pSource, SMultiwayMergeT pSource->src.rowIndex = -1; pSource->pageIndex = -1; pSource->src.pBlock = blockDataDestroy(pSource->src.pBlock); + uInfo("adjust merge tree. %d source completed", *numOfCompleted); } else { int32_t* pPgId = taosArrayGet(pSource->pageIdList, pSource->pageIndex); @@ -668,6 +669,7 @@ static int32_t doInternalMergeSort(SSortHandle* pHandle) { // Only *numOfInputSources* can be loaded into buffer to perform the external sort. for (int32_t i = 0; i < sortGroup; ++i) { + uInfo("internal merge sort pass %d group %d. num input sources %d ", t, i, numOfInputSources); pHandle->sourceId += 1; int32_t end = (i + 1) * numOfInputSources - 1; @@ -1001,7 +1003,6 @@ static int32_t createInitialSources(SSortHandle* pHandle) { size_t nSrc = taosArrayGetSize(pHandle->pOrderedSource); SArray* aExtSrc = taosArrayInit(nSrc, POINTER_BYTES); - pHandle->numOfPages = 1024; //todo check sortbufsize size_t maxBufSize = pHandle->numOfPages * pHandle->pageSize; createPageBuf(pHandle); @@ -1021,6 +1022,8 @@ static int32_t createInitialSources(SSortHandle* pHandle) { if (szSort > maxBufSize) { sortBlocksToExtSource(pHandle, aBlkSort, pOrder, aExtSrc); + uInfo("initial source %zu created for %zu blocks", taosArrayGetSize(aExtSrc), taosArrayGetSize(aBlkSort)); + for (int i = 0; i < taosArrayGetSize(aBlkSort); ++i) { blockDataDestroy(taosArrayGetP(aBlkSort, i)); } @@ -1042,7 +1045,7 @@ static int32_t createInitialSources(SSortHandle* pHandle) { taosArrayDestroy(aExtSrc); pHandle->type = SORT_SINGLESOURCE_SORT; - + uInfo("create initial sources for table merge scan ended"); } return code; From ba2b4042950cfa4c95e9ddcbc2c4160b07be5067 Mon Sep 17 00:00:00 2001 From: slzhou Date: Mon, 17 Jul 2023 21:07:55 +0800 Subject: [PATCH 06/23] enhance: optimize msortComparFn for table merge scan --- source/libs/executor/inc/tsort.h | 6 ++ source/libs/executor/src/tsort.c | 95 +++++++++++++++++++------------- 2 files changed, 63 insertions(+), 38 deletions(-) diff --git a/source/libs/executor/inc/tsort.h b/source/libs/executor/inc/tsort.h index a2cb1234ff..a15b1e0eaf 100644 --- a/source/libs/executor/inc/tsort.h +++ b/source/libs/executor/inc/tsort.h @@ -54,6 +54,12 @@ typedef struct SMsortComparParam { int32_t numOfSources; SArray* orderInfo; // SArray bool cmpGroupId; + + int32_t sortType; + // the following field to speed up when sortType == SORT_TABLE_MERGE_SCAN + int32_t tsSlotId; + int32_t order; + __compar_fn_t cmpFn; } SMsortComparParam; typedef struct SSortHandle SSortHandle; diff --git a/source/libs/executor/src/tsort.c b/source/libs/executor/src/tsort.c index 86e2477282..f274e9717b 100644 --- a/source/libs/executor/src/tsort.c +++ b/source/libs/executor/src/tsort.c @@ -197,7 +197,13 @@ SSortHandle* tsortCreateSortHandle(SArray* pSortInfo, int32_t type, int32_t page pSortHandle->pOrderedSource = taosArrayInit(4, POINTER_BYTES); pSortHandle->cmpParam.orderInfo = pSortInfo; pSortHandle->cmpParam.cmpGroupId = false; - + pSortHandle->cmpParam.sortType = type; + if (type == SORT_TABLE_MERGE_SCAN) { + SBlockOrderInfo* pOrder = TARRAY_GET_ELEM(pSortInfo, 0); + pSortHandle->cmpParam.tsSlotId = pOrder->slotId; + pSortHandle->cmpParam.order = pOrder->order; + pSortHandle->cmpParam.cmpFn = (pOrder->order == TSDB_ORDER_ASC) ? compareInt64Val : compareInt64ValDesc; + } tsortSetComparFp(pSortHandle, msortComparFn); if (idstr != NULL) { @@ -489,6 +495,8 @@ static int32_t adjustMergeTreeForNextTuple(SSortSource* pSource, SMultiwayMergeT } releaseBufPage(pHandle->pBuf, pPage); + if (pSource->pageIndex % 256 == 0) + uInfo("got block from page %d from ext mem source %p", pSource->pageIndex, pSource); } } else { int64_t st = taosGetTimestampUs(); @@ -498,6 +506,7 @@ static int32_t adjustMergeTreeForNextTuple(SSortSource* pSource, SMultiwayMergeT if (pSource->src.pBlock == NULL) { (*numOfCompleted) += 1; pSource->src.rowIndex = -1; + uInfo("adjust merge tree. %d source completed", *numOfCompleted); } } } @@ -578,53 +587,63 @@ int32_t msortComparFn(const void* pLeft, const void* pRight, void* param) { } } - for (int32_t i = 0; i < pInfo->size; ++i) { - SBlockOrderInfo* pOrder = TARRAY_GET_ELEM(pInfo, i); - SColumnInfoData* pLeftColInfoData = TARRAY_GET_ELEM(pLeftBlock->pDataBlock, pOrder->slotId); + if (pParam->sortType == SORT_TABLE_MERGE_SCAN) { + SColumnInfoData* pLeftColInfoData = TARRAY_GET_ELEM(pLeftBlock->pDataBlock, pParam->tsSlotId); + SColumnInfoData* pRightColInfoData = TARRAY_GET_ELEM(pRightBlock->pDataBlock, pParam->tsSlotId); + int64_t* left1 = (int64_t*)(pLeftColInfoData->pData) + pLeftSource->src.rowIndex; + int64_t* right1 = (int64_t*)(pRightColInfoData->pData) + pRightSource->src.rowIndex; - bool leftNull = false; - if (pLeftColInfoData->hasNull) { - if (pLeftBlock->pBlockAgg == NULL) { - leftNull = colDataIsNull_s(pLeftColInfoData, pLeftSource->src.rowIndex); - } else { - leftNull = - colDataIsNull(pLeftColInfoData, pLeftBlock->info.rows, pLeftSource->src.rowIndex, pLeftBlock->pBlockAgg[i]); + int ret = pParam->cmpFn(left1, right1); + return ret; + } else { + for (int32_t i = 0; i < pInfo->size; ++i) { + SBlockOrderInfo* pOrder = TARRAY_GET_ELEM(pInfo, i); + SColumnInfoData* pLeftColInfoData = TARRAY_GET_ELEM(pLeftBlock->pDataBlock, pOrder->slotId); + SColumnInfoData* pRightColInfoData = TARRAY_GET_ELEM(pRightBlock->pDataBlock, pOrder->slotId); + + bool leftNull = false; + if (pLeftColInfoData->hasNull) { + if (pLeftBlock->pBlockAgg == NULL) { + leftNull = colDataIsNull_s(pLeftColInfoData, pLeftSource->src.rowIndex); + } else { + leftNull = colDataIsNull(pLeftColInfoData, pLeftBlock->info.rows, pLeftSource->src.rowIndex, + pLeftBlock->pBlockAgg[i]); + } } - } - SColumnInfoData* pRightColInfoData = TARRAY_GET_ELEM(pRightBlock->pDataBlock, pOrder->slotId); - bool rightNull = false; - if (pRightColInfoData->hasNull) { - if (pRightBlock->pBlockAgg == NULL) { - rightNull = colDataIsNull_s(pRightColInfoData, pRightSource->src.rowIndex); - } else { - rightNull = colDataIsNull(pRightColInfoData, pRightBlock->info.rows, pRightSource->src.rowIndex, - pRightBlock->pBlockAgg[i]); + bool rightNull = false; + if (pRightColInfoData->hasNull) { + if (pRightBlock->pBlockAgg == NULL) { + rightNull = colDataIsNull_s(pRightColInfoData, pRightSource->src.rowIndex); + } else { + rightNull = colDataIsNull(pRightColInfoData, pRightBlock->info.rows, pRightSource->src.rowIndex, + pRightBlock->pBlockAgg[i]); + } } - } - if (leftNull && rightNull) { - continue; // continue to next slot - } + if (leftNull && rightNull) { + continue; // continue to next slot + } - if (rightNull) { - return pOrder->nullFirst ? 1 : -1; - } + if (rightNull) { + return pOrder->nullFirst ? 1 : -1; + } - if (leftNull) { - return pOrder->nullFirst ? -1 : 1; - } + if (leftNull) { + return pOrder->nullFirst ? -1 : 1; + } - void* left1 = colDataGetData(pLeftColInfoData, pLeftSource->src.rowIndex); - void* right1 = colDataGetData(pRightColInfoData, pRightSource->src.rowIndex); + void* left1 = colDataGetData(pLeftColInfoData, pLeftSource->src.rowIndex); + void* right1 = colDataGetData(pRightColInfoData, pRightSource->src.rowIndex); - __compar_fn_t fn = getKeyComparFunc(pLeftColInfoData->info.type, pOrder->order); + __compar_fn_t fn = getKeyComparFunc(pLeftColInfoData->info.type, pOrder->order); - int ret = fn(left1, right1); - if (ret == 0) { - continue; - } else { - return ret; + int ret = fn(left1, right1); + if (ret == 0) { + continue; + } else { + return ret; + } } } return 0; From 97a6e89d1102cbbea99490be58e6a26f3b8874b7 Mon Sep 17 00:00:00 2001 From: slzhou Date: Tue, 18 Jul 2023 10:38:40 +0800 Subject: [PATCH 07/23] enhance: refactor create initial sources --- source/libs/executor/src/scanoperator.c | 2 +- source/libs/executor/src/tsort.c | 287 ++++++++++++------------ 2 files changed, 150 insertions(+), 139 deletions(-) diff --git a/source/libs/executor/src/scanoperator.c b/source/libs/executor/src/scanoperator.c index 0c90c654f4..ab8991bd05 100644 --- a/source/libs/executor/src/scanoperator.c +++ b/source/libs/executor/src/scanoperator.c @@ -2840,7 +2840,7 @@ int32_t startGroupTableMergeScan(SOperatorInfo* pOperator) { int32_t tableStartIdx = pInfo->tableStartIndex; int32_t tableEndIdx = pInfo->tableEndIndex; - pInfo->sortBufSize = 2048 * pInfo->bufPageSize; + pInfo->sortBufSize = 1024 * pInfo->bufPageSize; int32_t numOfBufPage = pInfo->sortBufSize / pInfo->bufPageSize; pInfo->pSortHandle = tsortCreateSortHandle(pInfo->pSortInfo, SORT_TABLE_MERGE_SCAN, pInfo->bufPageSize, numOfBufPage, pInfo->pSortInputBlock, pTaskInfo->id.str, 0, 0, 0); diff --git a/source/libs/executor/src/tsort.c b/source/libs/executor/src/tsort.c index f274e9717b..34f9dfc233 100644 --- a/source/libs/executor/src/tsort.c +++ b/source/libs/executor/src/tsort.c @@ -475,11 +475,11 @@ static int32_t adjustMergeTreeForNextTuple(SSortSource* pSource, SMultiwayMergeT if (pHandle->type == SORT_SINGLESOURCE_SORT) { pSource->pageIndex++; if (pSource->pageIndex >= taosArrayGetSize(pSource->pageIdList)) { + qInfo("adjust merge tree. %d source completed %d", *numOfCompleted, pSource->pageIndex); (*numOfCompleted) += 1; pSource->src.rowIndex = -1; pSource->pageIndex = -1; pSource->src.pBlock = blockDataDestroy(pSource->src.pBlock); - uInfo("adjust merge tree. %d source completed", *numOfCompleted); } else { int32_t* pPgId = taosArrayGet(pSource->pageIdList, pSource->pageIndex); @@ -495,8 +495,6 @@ static int32_t adjustMergeTreeForNextTuple(SSortSource* pSource, SMultiwayMergeT } releaseBufPage(pHandle->pBuf, pPage); - if (pSource->pageIndex % 256 == 0) - uInfo("got block from page %d from ext mem source %p", pSource->pageIndex, pSource); } } else { int64_t st = taosGetTimestampUs(); @@ -506,7 +504,7 @@ static int32_t adjustMergeTreeForNextTuple(SSortSource* pSource, SMultiwayMergeT if (pSource->src.pBlock == NULL) { (*numOfCompleted) += 1; pSource->src.rowIndex = -1; - uInfo("adjust merge tree. %d source completed", *numOfCompleted); + qInfo("adjust merge tree. %d source completed", *numOfCompleted); } } } @@ -688,7 +686,7 @@ static int32_t doInternalMergeSort(SSortHandle* pHandle) { // Only *numOfInputSources* can be loaded into buffer to perform the external sort. for (int32_t i = 0; i < sortGroup; ++i) { - uInfo("internal merge sort pass %d group %d. num input sources %d ", t, i, numOfInputSources); + qInfo("internal merge sort pass %d group %d. num input sources %d ", t, i, numOfInputSources); pHandle->sourceId += 1; int32_t end = (i + 1) * numOfInputSources - 1; @@ -884,11 +882,13 @@ static int32_t sortBlocksToExtSource(SSortHandle* pHandle, SArray* aBlk, SBlockO int32_t minIdx = tMergeTreeGetChosenIndex(pTree); SSDataBlock* minBlk = taosArrayGetP(aBlk, minIdx); int32_t minRow = sup.aRowIdx[minIdx]; + appendOneRowToDataBlock(pHandle->pDataBlock, minBlk, &minRow); ++nRows; if (pHandle->pDataBlock->info.rows >= rowCap) { appendDataBlockToPageBuf(pHandle, pHandle->pDataBlock, aPgId); - } + } + if (sup.aRowIdx[minIdx] == minBlk->info.rows - 1) { sup.aRowIdx[minIdx] = -1; ++numEnded; @@ -911,39 +911,101 @@ static int32_t sortBlocksToExtSource(SSortHandle* pHandle, SArray* aBlk, SBlockO return 0; } -static int32_t createInitialSources(SSortHandle* pHandle) { - size_t sortBufSize = pHandle->numOfPages * pHandle->pageSize; +static int32_t createBlocksMergeSortInitialSources(SSortHandle* pHandle) { + SBlockOrderInfo* pOrder = taosArrayGet(pHandle->pSortInfo, 0); + size_t nSrc = taosArrayGetSize(pHandle->pOrderedSource); + SArray* aExtSrc = taosArrayInit(nSrc, POINTER_BYTES); + + size_t maxBufSize = pHandle->numOfPages * pHandle->pageSize; + createPageBuf(pHandle); + + SSortSource* pSrc = taosArrayGetP(pHandle->pOrderedSource, 0); + int32_t szSort = 0; + + SArray* aBlkSort = taosArrayInit(8, POINTER_BYTES); + while (1) { + SSDataBlock* pBlk = pHandle->fetchfp(pSrc->param); + + if (pBlk != NULL) { + szSort += blockDataGetSize(pBlk); + SSDataBlock* blk = createOneDataBlock(pBlk, true); + taosArrayPush(aBlkSort, &blk); + } + if ((pBlk != NULL && szSort > maxBufSize) || (pBlk == NULL && szSort > 0)) { + int64_t p = taosGetTimestampUs(); + sortBlocksToExtSource(pHandle, aBlkSort, pOrder, aExtSrc); + int64_t el = taosGetTimestampUs() - p; + pHandle->sortElapsed += el; + + for (int i = 0; i < taosArrayGetSize(aBlkSort); ++i) { + blockDataDestroy(taosArrayGetP(aBlkSort, i)); + } + taosArrayClear(aBlkSort); + szSort = 0; + } + if (pBlk == NULL) { + break; + }; + } + + taosArrayDestroy(aBlkSort); + tsortClearOrderdSource(pHandle->pOrderedSource, NULL, NULL); + taosArrayAddAll(pHandle->pOrderedSource, aExtSrc); + taosArrayDestroy(aExtSrc); + + pHandle->type = SORT_SINGLESOURCE_SORT; + return 0; +} + +static int32_t createBlocksQuickSortInitialSources(SSortHandle* pHandle) { int32_t code = 0; + size_t sortBufSize = pHandle->numOfPages * pHandle->pageSize; - if (pHandle->type == SORT_SINGLESOURCE_SORT) { - SSortSource** pSource = taosArrayGet(pHandle->pOrderedSource, 0); - SSortSource* source = *pSource; - *pSource = NULL; + SSortSource** pSource = taosArrayGet(pHandle->pOrderedSource, 0); + SSortSource* source = *pSource; + *pSource = NULL; - tsortClearOrderdSource(pHandle->pOrderedSource, NULL, NULL); + tsortClearOrderdSource(pHandle->pOrderedSource, NULL, NULL); - while (1) { - SSDataBlock* pBlock = pHandle->fetchfp(source->param); - if (pBlock == NULL) { - break; + while (1) { + SSDataBlock* pBlock = pHandle->fetchfp(source->param); + if (pBlock == NULL) { + break; + } + + if (pHandle->pDataBlock == NULL) { + uint32_t numOfCols = taosArrayGetSize(pBlock->pDataBlock); + pHandle->pageSize = getProperSortPageSize(blockDataGetRowSize(pBlock), numOfCols); + + // todo, number of pages are set according to the total available sort buffer + pHandle->numOfPages = 1024; + sortBufSize = pHandle->numOfPages * pHandle->pageSize; + pHandle->pDataBlock = createOneDataBlock(pBlock, false); + } + + if (pHandle->beforeFp != NULL) { + pHandle->beforeFp(pBlock, pHandle->param); + } + + code = blockDataMerge(pHandle->pDataBlock, pBlock); + if (code != TSDB_CODE_SUCCESS) { + if (source->param && !source->onlyRef) { + taosMemoryFree(source->param); } - - if (pHandle->pDataBlock == NULL) { - uint32_t numOfCols = taosArrayGetSize(pBlock->pDataBlock); - pHandle->pageSize = getProperSortPageSize(blockDataGetRowSize(pBlock), numOfCols); - - // todo, number of pages are set according to the total available sort buffer - pHandle->numOfPages = 1024; - sortBufSize = pHandle->numOfPages * pHandle->pageSize; - pHandle->pDataBlock = createOneDataBlock(pBlock, false); + if (!source->onlyRef && source->src.pBlock) { + blockDataDestroy(source->src.pBlock); + source->src.pBlock = NULL; } + taosMemoryFree(source); + return code; + } - if (pHandle->beforeFp != NULL) { - pHandle->beforeFp(pBlock, pHandle->param); - } - - code = blockDataMerge(pHandle->pDataBlock, pBlock); - if (code != TSDB_CODE_SUCCESS) { + size_t size = blockDataGetSize(pHandle->pDataBlock); + if (size > sortBufSize) { + // Perform the in-memory sort and then flush data in the buffer into disk. + int64_t p = taosGetTimestampUs(); + code = blockDataSort(pHandle->pDataBlock, pHandle->pSortInfo); + if (code != 0) { if (source->param && !source->onlyRef) { taosMemoryFree(source->param); } @@ -951,122 +1013,71 @@ static int32_t createInitialSources(SSortHandle* pHandle) { blockDataDestroy(source->src.pBlock); source->src.pBlock = NULL; } + taosMemoryFree(source); return code; } - size_t size = blockDataGetSize(pHandle->pDataBlock); - if (size > sortBufSize) { - // Perform the in-memory sort and then flush data in the buffer into disk. - int64_t p = taosGetTimestampUs(); - code = blockDataSort(pHandle->pDataBlock, pHandle->pSortInfo); - if (code != 0) { - if (source->param && !source->onlyRef) { - taosMemoryFree(source->param); - } - if (!source->onlyRef && source->src.pBlock) { - blockDataDestroy(source->src.pBlock); - source->src.pBlock = NULL; - } - - taosMemoryFree(source); - return code; - } - - int64_t el = taosGetTimestampUs() - p; - pHandle->sortElapsed += el; - if (pHandle->maxRows > 0) blockDataKeepFirstNRows(pHandle->pDataBlock, pHandle->maxRows); - code = doAddToBuf(pHandle->pDataBlock, pHandle); - if (code != TSDB_CODE_SUCCESS) { - return code; - } - } - } - - if (source->param && !source->onlyRef) { - taosMemoryFree(source->param); - } - - taosMemoryFree(source); - - if (pHandle->pDataBlock != NULL && pHandle->pDataBlock->info.rows > 0) { - size_t size = blockDataGetSize(pHandle->pDataBlock); - - // Perform the in-memory sort and then flush data in the buffer into disk. - int64_t p = taosGetTimestampUs(); - - code = blockDataSort(pHandle->pDataBlock, pHandle->pSortInfo); - if (code != 0) { - return code; - } - - if (pHandle->maxRows > 0) blockDataKeepFirstNRows(pHandle->pDataBlock, pHandle->maxRows); int64_t el = taosGetTimestampUs() - p; pHandle->sortElapsed += el; - - // All sorted data can fit in memory, external memory sort is not needed. Return to directly - if (size <= sortBufSize && pHandle->pBuf == NULL) { - pHandle->cmpParam.numOfSources = 1; - pHandle->inMemSort = true; - - pHandle->loops = 1; - pHandle->tupleHandle.rowIndex = -1; - pHandle->tupleHandle.pBlock = pHandle->pDataBlock; - return 0; - } else { - code = doAddToBuf(pHandle->pDataBlock, pHandle); + if (pHandle->maxRows > 0) blockDataKeepFirstNRows(pHandle->pDataBlock, pHandle->maxRows); + code = doAddToBuf(pHandle->pDataBlock, pHandle); + if (code != TSDB_CODE_SUCCESS) { + return code; } } - } else if (pHandle->type == SORT_TABLE_MERGE_SCAN) { - SBlockOrderInfo* pOrder = taosArrayGet(pHandle->pSortInfo, 0); - size_t nSrc = taosArrayGetSize(pHandle->pOrderedSource); - SArray* aExtSrc = taosArrayInit(nSrc, POINTER_BYTES); - - size_t maxBufSize = pHandle->numOfPages * pHandle->pageSize; - createPageBuf(pHandle); - - SSortSource* pSrc = taosArrayGetP(pHandle->pOrderedSource, 0); - int32_t szSort = 0; - - SArray* aBlkSort = taosArrayInit(8, POINTER_BYTES); - while (1) { - SSDataBlock* pBlk = pHandle->fetchfp(pSrc->param); - if (pBlk == NULL) { - break; - }; - - szSort += blockDataGetSize(pBlk); - SSDataBlock* blk = createOneDataBlock(pBlk, true); - taosArrayPush(aBlkSort, &blk); - - if (szSort > maxBufSize) { - sortBlocksToExtSource(pHandle, aBlkSort, pOrder, aExtSrc); - uInfo("initial source %zu created for %zu blocks", taosArrayGetSize(aExtSrc), taosArrayGetSize(aBlkSort)); - - for (int i = 0; i < taosArrayGetSize(aBlkSort); ++i) { - blockDataDestroy(taosArrayGetP(aBlkSort, i)); - } - taosArrayClear(aBlkSort); - szSort = 0; - } - } - if (szSort > 0) { - sortBlocksToExtSource(pHandle, aBlkSort, pOrder, aExtSrc); - for (int i = 0; i < taosArrayGetSize(aBlkSort); ++i) { - blockDataDestroy(taosArrayGetP(aBlkSort, i)); - } - taosArrayClear(aBlkSort); - szSort = 0; - } - taosArrayDestroy(aBlkSort); - tsortClearOrderdSource(pHandle->pOrderedSource, NULL, NULL); - taosArrayAddAll(pHandle->pOrderedSource, aExtSrc); - taosArrayDestroy(aExtSrc); - - pHandle->type = SORT_SINGLESOURCE_SORT; - uInfo("create initial sources for table merge scan ended"); } + if (source->param && !source->onlyRef) { + taosMemoryFree(source->param); + } + + taosMemoryFree(source); + + if (pHandle->pDataBlock != NULL && pHandle->pDataBlock->info.rows > 0) { + size_t size = blockDataGetSize(pHandle->pDataBlock); + + // Perform the in-memory sort and then flush data in the buffer into disk. + int64_t p = taosGetTimestampUs(); + + code = blockDataSort(pHandle->pDataBlock, pHandle->pSortInfo); + if (code != 0) { + return code; + } + + if (pHandle->maxRows > 0) blockDataKeepFirstNRows(pHandle->pDataBlock, pHandle->maxRows); + int64_t el = taosGetTimestampUs() - p; + pHandle->sortElapsed += el; + + // All sorted data can fit in memory, external memory sort is not needed. Return to directly + if (size <= sortBufSize && pHandle->pBuf == NULL) { + pHandle->cmpParam.numOfSources = 1; + pHandle->inMemSort = true; + + pHandle->loops = 1; + pHandle->tupleHandle.rowIndex = -1; + pHandle->tupleHandle.pBlock = pHandle->pDataBlock; + return 0; + } else { + code = doAddToBuf(pHandle->pDataBlock, pHandle); + } + } + return code; +} + +static int32_t createInitialSources(SSortHandle* pHandle) { + int32_t code = 0; + + if (pHandle->type == SORT_SINGLESOURCE_SORT) { + code = createBlocksQuickSortInitialSources(pHandle); + } else if (pHandle->type == SORT_TABLE_MERGE_SCAN) { + code = createBlocksMergeSortInitialSources(pHandle); + } + qInfo("%zu sources created", taosArrayGetSize(pHandle->pOrderedSource)); + for (int i = 0; i < taosArrayGetSize(pHandle->pOrderedSource); ++i) { + SSortSource* pSrc = taosArrayGetP(pHandle->pOrderedSource, i); + qInfo("source %d, num of pages %zu", i,taosArrayGetSize(pSrc->pageIdList)); + } return code; } From f79fc81d9c2d53d8cfe7e0e8d4c48bb189705c45 Mon Sep 17 00:00:00 2001 From: slzhou Date: Tue, 18 Jul 2023 15:46:38 +0800 Subject: [PATCH 08/23] fix: add to page buf in the same way as single source sort --- source/libs/executor/src/tsort.c | 57 +++++++++++++++++++++++++++----- 1 file changed, 48 insertions(+), 9 deletions(-) diff --git a/source/libs/executor/src/tsort.c b/source/libs/executor/src/tsort.c index 34f9dfc233..6b93f95099 100644 --- a/source/libs/executor/src/tsort.c +++ b/source/libs/executor/src/tsort.c @@ -837,6 +837,10 @@ static int32_t appendDataBlockToPageBuf(SSortHandle* pHandle, SSDataBlock* blk, int32_t pageId = -1; void* pPage = getNewBufPage(pHandle->pBuf, &pageId); taosArrayPush(aPgId, &pageId); + + int32_t size = blockDataGetSize(blk) + sizeof(int32_t) + taosArrayGetSize(blk->pDataBlock) * sizeof(int32_t); + ASSERT(size <= getBufPageSize(pHandle->pBuf)); + blockDataToBuf(pPage, blk); setBufPageDirty(pPage, true); @@ -846,13 +850,39 @@ static int32_t appendDataBlockToPageBuf(SSortHandle* pHandle, SSDataBlock* blk, return 0; } +static int32_t getPageBufIncForRow(SSDataBlock* blk, int32_t row, int32_t rowIdxInPage) { + int sz = 0; + int numCols = taosArrayGetSize(blk->pDataBlock); + if (!blk->info.hasVarCol) { + sz += numCols * ((rowIdxInPage & 0x7) == 0 ? 1: 0); + sz += blockDataGetRowSize(blk); + } else { + for (int32_t i = 0; i < numCols; ++i) { + SColumnInfoData* pColInfoData = TARRAY_GET_ELEM(blk->pDataBlock, i); + if (IS_VAR_DATA_TYPE(pColInfoData->info.type)) { + if (pColInfoData->varmeta.offset[row] != -1) { + char* p = colDataGetData(pColInfoData, row); + sz += varDataTLen(p); + } + + sz += sizeof(pColInfoData->varmeta.offset[0]); + } else { + sz += pColInfoData->info.bytes; + + if (((rowIdxInPage) & 0x07) == 0) { + sz += 1; // bitmap + } + } + } + } + return sz; +} static int32_t sortBlocksToExtSource(SSortHandle* pHandle, SArray* aBlk, SBlockOrderInfo* order, SArray* aExtSrc) { - int32_t rowCap = blockDataGetCapacityInRow(pHandle->pDataBlock, pHandle->pageSize, - blockDataGetSerialMetaSize(taosArrayGetSize(pHandle->pDataBlock->pDataBlock))); + int pgHeaderSz = sizeof(int32_t) + sizeof(int32_t) * taosArrayGetSize(pHandle->pDataBlock->pDataBlock); + int32_t rowCap = blockDataGetCapacityInRow(pHandle->pDataBlock, pHandle->pageSize, pgHeaderSz); blockDataEnsureCapacity(pHandle->pDataBlock, rowCap); blockDataCleanup(pHandle->pDataBlock); - int32_t numBlks = taosArrayGetSize(aBlk); SBlkMergeSupport sup; @@ -878,16 +908,25 @@ static int32_t sortBlocksToExtSource(SSortHandle* pHandle, SArray* aBlk, SBlockO tMergeTreeCreate(&pTree, taosArrayGetSize(aBlk), &sup, blockCompareTsFn); int32_t numEnded = 0; int32_t nRows = 0; + + size_t blkPgSz = pgHeaderSz; + while (nRows < totalRows) { int32_t minIdx = tMergeTreeGetChosenIndex(pTree); SSDataBlock* minBlk = taosArrayGetP(aBlk, minIdx); int32_t minRow = sup.aRowIdx[minIdx]; + int32_t bufInc = getPageBufIncForRow(minBlk, minRow, pHandle->pDataBlock->info.rows); - appendOneRowToDataBlock(pHandle->pDataBlock, minBlk, &minRow); - ++nRows; - if (pHandle->pDataBlock->info.rows >= rowCap) { - appendDataBlockToPageBuf(pHandle, pHandle->pDataBlock, aPgId); + if (blkPgSz <= pHandle->pageSize && blkPgSz + bufInc > pHandle->pageSize) { + appendDataBlockToPageBuf(pHandle, pHandle->pDataBlock, aPgId); + blkPgSz = pgHeaderSz; + bufInc = getPageBufIncForRow(minBlk, minRow, 0); } + blockDataEnsureCapacity(pHandle->pDataBlock, pHandle->pDataBlock->info.rows + 1); + appendOneRowToDataBlock(pHandle->pDataBlock, minBlk, &minRow); + blkPgSz += bufInc; + + ++nRows; if (sup.aRowIdx[minIdx] == minBlk->info.rows - 1) { sup.aRowIdx[minIdx] = -1; @@ -1073,10 +1112,10 @@ static int32_t createInitialSources(SSortHandle* pHandle) { } else if (pHandle->type == SORT_TABLE_MERGE_SCAN) { code = createBlocksMergeSortInitialSources(pHandle); } - qInfo("%zu sources created", taosArrayGetSize(pHandle->pOrderedSource)); + uInfo("%zu sources created", taosArrayGetSize(pHandle->pOrderedSource)); for (int i = 0; i < taosArrayGetSize(pHandle->pOrderedSource); ++i) { SSortSource* pSrc = taosArrayGetP(pHandle->pOrderedSource, i); - qInfo("source %d, num of pages %zu", i,taosArrayGetSize(pSrc->pageIdList)); + uInfo("source %d, num of pages %zu", i,taosArrayGetSize(pSrc->pageIdList)); } return code; } From 4348b9c51dfbc059ec408dfd505adaebdbbc0fee Mon Sep 17 00:00:00 2001 From: slzhou Date: Tue, 18 Jul 2023 16:47:31 +0800 Subject: [PATCH 09/23] fix: use uinfo to show on stdout --- source/libs/executor/src/tsort.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/source/libs/executor/src/tsort.c b/source/libs/executor/src/tsort.c index 6b93f95099..4bcf455ef7 100644 --- a/source/libs/executor/src/tsort.c +++ b/source/libs/executor/src/tsort.c @@ -475,12 +475,14 @@ static int32_t adjustMergeTreeForNextTuple(SSortSource* pSource, SMultiwayMergeT if (pHandle->type == SORT_SINGLESOURCE_SORT) { pSource->pageIndex++; if (pSource->pageIndex >= taosArrayGetSize(pSource->pageIdList)) { - qInfo("adjust merge tree. %d source completed %d", *numOfCompleted, pSource->pageIndex); + uInfo("adjust merge tree. %d source completed %d", *numOfCompleted, pSource->pageIndex); (*numOfCompleted) += 1; pSource->src.rowIndex = -1; pSource->pageIndex = -1; pSource->src.pBlock = blockDataDestroy(pSource->src.pBlock); } else { + if (pSource->pageIndex % 512 == 0) uInfo("begin source %p page %d", pSource, pSource->pageIndex); + int32_t* pPgId = taosArrayGet(pSource->pageIdList, pSource->pageIndex); void* pPage = getBufPage(pHandle->pBuf, *pPgId); @@ -493,7 +495,6 @@ static int32_t adjustMergeTreeForNextTuple(SSortSource* pSource, SMultiwayMergeT if (code != TSDB_CODE_SUCCESS) { return code; } - releaseBufPage(pHandle->pBuf, pPage); } } else { @@ -504,7 +505,7 @@ static int32_t adjustMergeTreeForNextTuple(SSortSource* pSource, SMultiwayMergeT if (pSource->src.pBlock == NULL) { (*numOfCompleted) += 1; pSource->src.rowIndex = -1; - qInfo("adjust merge tree. %d source completed", *numOfCompleted); + uInfo("adjust merge tree. %d source completed", *numOfCompleted); } } } @@ -686,7 +687,7 @@ static int32_t doInternalMergeSort(SSortHandle* pHandle) { // Only *numOfInputSources* can be loaded into buffer to perform the external sort. for (int32_t i = 0; i < sortGroup; ++i) { - qInfo("internal merge sort pass %d group %d. num input sources %d ", t, i, numOfInputSources); + uInfo("internal merge sort pass %d group %d. num input sources %d ", t, i, numOfInputSources); pHandle->sourceId += 1; int32_t end = (i + 1) * numOfInputSources - 1; @@ -981,6 +982,7 @@ static int32_t createBlocksMergeSortInitialSources(SSortHandle* pHandle) { } taosArrayClear(aBlkSort); szSort = 0; + uInfo("source %zu created", taosArrayGetSize(aExtSrc)); } if (pBlk == NULL) { break; From b9aeda263c8385c11681ebef4db99d36191f9885 Mon Sep 17 00:00:00 2001 From: slzhou Date: Wed, 19 Jul 2023 09:48:52 +0800 Subject: [PATCH 10/23] fix: fix ubsan error of null argument --- source/common/src/tdatablock.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/source/common/src/tdatablock.c b/source/common/src/tdatablock.c index b2f03fa7ba..887a110831 100644 --- a/source/common/src/tdatablock.c +++ b/source/common/src/tdatablock.c @@ -632,7 +632,10 @@ int32_t blockDataToBuf(char* buf, const SSDataBlock* pBlock) { pStart += colSize; } } else { - memcpy(pStart, pCol->pData, dataSize); + if (dataSize != 0) { + // ubsan reports error if pCol->pData==NULL && dataSize==0 + memcpy(pStart, pCol->pData, dataSize); + } pStart += dataSize; } } @@ -684,8 +687,10 @@ int32_t blockDataFromBuf(SSDataBlock* pBlock, const char* buf) { return TSDB_CODE_FAILED; } } - - memcpy(pCol->pData, pStart, colLength); + if (colLength != 0) { + // ubsan reports error if colLength==0 && pCol->pData == 0 + memcpy(pCol->pData, pStart, colLength); + } pStart += colLength; } From d1a1976aa47cdac58e4fe56e3634c121ac43c96c Mon Sep 17 00:00:00 2001 From: slzhou Date: Wed, 19 Jul 2023 13:59:43 +0800 Subject: [PATCH 11/23] enhance: add limit to merge sort --- source/libs/executor/inc/tsort.h | 12 +++-- source/libs/executor/src/scanoperator.c | 8 +++- source/libs/executor/src/tsort.c | 58 ++++++++++++++++--------- 3 files changed, 51 insertions(+), 27 deletions(-) diff --git a/source/libs/executor/inc/tsort.h b/source/libs/executor/inc/tsort.h index a15b1e0eaf..538a9f18f6 100644 --- a/source/libs/executor/inc/tsort.h +++ b/source/libs/executor/inc/tsort.h @@ -26,7 +26,7 @@ extern "C" { enum { SORT_MULTISOURCE_MERGE = 0x1, SORT_SINGLESOURCE_SORT = 0x2, - SORT_TABLE_MERGE_SCAN = 0x3 + SORT_BLOCK_TS_MERGE = 0x3 }; typedef struct SMultiMergeSource { @@ -56,7 +56,7 @@ typedef struct SMsortComparParam { bool cmpGroupId; int32_t sortType; - // the following field to speed up when sortType == SORT_TABLE_MERGE_SCAN + // the following field to speed up when sortType == SORT_BLOCK_TS_MERGE int32_t tsSlotId; int32_t order; __compar_fn_t cmpFn; @@ -77,8 +77,8 @@ typedef int32_t (*_sort_merge_compar_fn_t)(const void* p1, const void* p2, void* * @return */ SSortHandle* tsortCreateSortHandle(SArray* pOrderInfo, int32_t type, int32_t pageSize, int32_t numOfPages, - SSDataBlock* pBlock, const char* idstr, uint64_t maxRows, uint32_t maxTupleLength, - uint32_t sortBufSize); + SSDataBlock* pBlock, const char* idstr, uint64_t pqMaxRows, uint32_t pqMaxTupleLength, + uint32_t pqSortBufSize); void tsortSetForceUsePQSort(SSortHandle* pHandle); @@ -117,6 +117,10 @@ int32_t tsortSetFetchRawDataFp(SSortHandle* pHandle, _sort_fetch_block_fn_t fetc */ int32_t tsortSetComparFp(SSortHandle* pHandle, _sort_merge_compar_fn_t fp); +/** + * +*/ +void tsortSetMergeLimit(SSortHandle* pHandle, int64_t mergeLimit); /** * */ diff --git a/source/libs/executor/src/scanoperator.c b/source/libs/executor/src/scanoperator.c index ab8991bd05..6edd7b2d47 100644 --- a/source/libs/executor/src/scanoperator.c +++ b/source/libs/executor/src/scanoperator.c @@ -2842,9 +2842,13 @@ int32_t startGroupTableMergeScan(SOperatorInfo* pOperator) { pInfo->sortBufSize = 1024 * pInfo->bufPageSize; int32_t numOfBufPage = pInfo->sortBufSize / pInfo->bufPageSize; - pInfo->pSortHandle = tsortCreateSortHandle(pInfo->pSortInfo, SORT_TABLE_MERGE_SCAN, pInfo->bufPageSize, numOfBufPage, + pInfo->pSortHandle = tsortCreateSortHandle(pInfo->pSortInfo, SORT_BLOCK_TS_MERGE, pInfo->bufPageSize, numOfBufPage, pInfo->pSortInputBlock, pTaskInfo->id.str, 0, 0, 0); - + int64_t mergeLimit = -1; + if (pInfo->limitInfo.limit.limit != -1 || pInfo->limitInfo.limit.offset != -1) { + mergeLimit = pInfo->limitInfo.limit.limit + pInfo->limitInfo.limit.offset; + } + tsortSetMergeLimit(pInfo->pSortHandle, mergeLimit); tsortSetFetchRawDataFp(pInfo->pSortHandle, getTableDataBlockImpl, NULL, NULL); // one table has one data block diff --git a/source/libs/executor/src/tsort.c b/source/libs/executor/src/tsort.c index 4bcf455ef7..c56dab1e33 100644 --- a/source/libs/executor/src/tsort.c +++ b/source/libs/executor/src/tsort.c @@ -42,13 +42,15 @@ struct SSortHandle { int64_t startTs; uint64_t totalElapsed; - uint64_t maxRows; - uint32_t maxTupleLength; - uint32_t sortBufSize; + uint64_t pqMaxRows; + uint32_t pqMaxTupleLength; + uint32_t pqSortBufSize; bool forceUsePQSort; BoundedQueue* pBoundedQueue; uint32_t tmpRowIdx; + int64_t mergeLimit; + int32_t sourceId; SSDataBlock* pDataBlock; SMsortComparParam cmpParam; @@ -173,8 +175,8 @@ void destroyTuple(void* t) { * @return */ SSortHandle* tsortCreateSortHandle(SArray* pSortInfo, int32_t type, int32_t pageSize, int32_t numOfPages, - SSDataBlock* pBlock, const char* idstr, uint64_t maxRows, uint32_t maxTupleLength, - uint32_t sortBufSize) { + SSDataBlock* pBlock, const char* idstr, uint64_t pqMaxRows, uint32_t pqMaxTupleLength, + uint32_t pqSortBufSize) { SSortHandle* pSortHandle = taosMemoryCalloc(1, sizeof(SSortHandle)); pSortHandle->type = type; @@ -183,10 +185,10 @@ SSortHandle* tsortCreateSortHandle(SArray* pSortInfo, int32_t type, int32_t page pSortHandle->pSortInfo = pSortInfo; pSortHandle->loops = 0; - pSortHandle->maxTupleLength = maxTupleLength; - if (maxRows != 0) { - pSortHandle->sortBufSize = sortBufSize; - pSortHandle->maxRows = maxRows; + pSortHandle->pqMaxTupleLength = pqMaxTupleLength; + if (pqMaxRows != 0) { + pSortHandle->pqSortBufSize = pqSortBufSize; + pSortHandle->pqMaxRows = pqMaxRows; } pSortHandle->forceUsePQSort = false; @@ -194,11 +196,13 @@ SSortHandle* tsortCreateSortHandle(SArray* pSortInfo, int32_t type, int32_t page pSortHandle->pDataBlock = createOneDataBlock(pBlock, false); } + pSortHandle->mergeLimit = -1; + pSortHandle->pOrderedSource = taosArrayInit(4, POINTER_BYTES); pSortHandle->cmpParam.orderInfo = pSortInfo; pSortHandle->cmpParam.cmpGroupId = false; pSortHandle->cmpParam.sortType = type; - if (type == SORT_TABLE_MERGE_SCAN) { + if (type == SORT_BLOCK_TS_MERGE) { SBlockOrderInfo* pOrder = TARRAY_GET_ELEM(pSortInfo, 0); pSortHandle->cmpParam.tsSlotId = pOrder->slotId; pSortHandle->cmpParam.order = pOrder->order; @@ -586,7 +590,7 @@ int32_t msortComparFn(const void* pLeft, const void* pRight, void* param) { } } - if (pParam->sortType == SORT_TABLE_MERGE_SCAN) { + if (pParam->sortType == SORT_BLOCK_TS_MERGE) { SColumnInfoData* pLeftColInfoData = TARRAY_GET_ELEM(pLeftBlock->pDataBlock, pParam->tsSlotId); SColumnInfoData* pRightColInfoData = TARRAY_GET_ELEM(pRightBlock->pDataBlock, pParam->tsSlotId); int64_t* left1 = (int64_t*)(pLeftColInfoData->pData) + pLeftSource->src.rowIndex; @@ -709,18 +713,23 @@ static int32_t doInternalMergeSort(SSortHandle* pHandle) { taosArrayDestroy(pResList); return code; } - + int nRows = 0; SArray* pPageIdList = taosArrayInit(4, sizeof(int32_t)); while (1) { if (tsortIsClosed(pHandle)) { code = terrno = TSDB_CODE_TSC_QUERY_CANCELLED; return code; } - + + if (pHandle->mergeLimit != -1 && nRows >= pHandle->mergeLimit) { + break; + } + SSDataBlock* pDataBlock = getSortedBlockDataInner(pHandle, &pHandle->cmpParam, numOfRows); if (pDataBlock == NULL) { break; } + nRows += pDataBlock->info.rows; int32_t pageId = -1; void* pPage = getNewBufPage(pHandle->pBuf, &pageId); @@ -740,7 +749,6 @@ static int32_t doInternalMergeSort(SSortHandle* pHandle) { setBufPageDirty(pPage, true); releaseBufPage(pHandle->pBuf, pPage); - blockDataCleanup(pDataBlock); } @@ -846,7 +854,6 @@ static int32_t appendDataBlockToPageBuf(SSortHandle* pHandle, SSDataBlock* blk, setBufPageDirty(pPage, true); releaseBufPage(pHandle->pBuf, pPage); - blockDataCleanup(blk); return 0; } @@ -913,6 +920,9 @@ static int32_t sortBlocksToExtSource(SSortHandle* pHandle, SArray* aBlk, SBlockO size_t blkPgSz = pgHeaderSz; while (nRows < totalRows) { + if (pHandle->mergeLimit != -1 && nRows >= pHandle->mergeLimit) { + break; + } int32_t minIdx = tMergeTreeGetChosenIndex(pTree); SSDataBlock* minBlk = taosArrayGetP(aBlk, minIdx); int32_t minRow = sup.aRowIdx[minIdx]; @@ -920,6 +930,7 @@ static int32_t sortBlocksToExtSource(SSortHandle* pHandle, SArray* aBlk, SBlockO if (blkPgSz <= pHandle->pageSize && blkPgSz + bufInc > pHandle->pageSize) { appendDataBlockToPageBuf(pHandle, pHandle->pDataBlock, aPgId); + blockDataCleanup(pHandle->pDataBlock); blkPgSz = pgHeaderSz; bufInc = getPageBufIncForRow(minBlk, minRow, 0); } @@ -939,6 +950,7 @@ static int32_t sortBlocksToExtSource(SSortHandle* pHandle, SArray* aBlk, SBlockO } if (pHandle->pDataBlock->info.rows > 0) { appendDataBlockToPageBuf(pHandle, pHandle->pDataBlock, aPgId); + blockDataCleanup(pHandle->pDataBlock); } SSDataBlock* pMemSrcBlk = createOneDataBlock(pHandle->pDataBlock, false); doAddNewExternalMemSource(pHandle->pBuf, aExtSrc, pMemSrcBlk, &pHandle->sourceId, aPgId); @@ -1061,7 +1073,7 @@ static int32_t createBlocksQuickSortInitialSources(SSortHandle* pHandle) { int64_t el = taosGetTimestampUs() - p; pHandle->sortElapsed += el; - if (pHandle->maxRows > 0) blockDataKeepFirstNRows(pHandle->pDataBlock, pHandle->maxRows); + if (pHandle->pqMaxRows > 0) blockDataKeepFirstNRows(pHandle->pDataBlock, pHandle->pqMaxRows); code = doAddToBuf(pHandle->pDataBlock, pHandle); if (code != TSDB_CODE_SUCCESS) { return code; @@ -1086,7 +1098,7 @@ static int32_t createBlocksQuickSortInitialSources(SSortHandle* pHandle) { return code; } - if (pHandle->maxRows > 0) blockDataKeepFirstNRows(pHandle->pDataBlock, pHandle->maxRows); + if (pHandle->pqMaxRows > 0) blockDataKeepFirstNRows(pHandle->pDataBlock, pHandle->pqMaxRows); int64_t el = taosGetTimestampUs() - p; pHandle->sortElapsed += el; @@ -1111,7 +1123,7 @@ static int32_t createInitialSources(SSortHandle* pHandle) { if (pHandle->type == SORT_SINGLESOURCE_SORT) { code = createBlocksQuickSortInitialSources(pHandle); - } else if (pHandle->type == SORT_TABLE_MERGE_SCAN) { + } else if (pHandle->type == SORT_BLOCK_TS_MERGE) { code = createBlocksMergeSortInitialSources(pHandle); } uInfo("%zu sources created", taosArrayGetSize(pHandle->pOrderedSource)); @@ -1165,6 +1177,10 @@ void tsortSetClosed(SSortHandle* pHandle) { atomic_store_8(&pHandle->closed, 2); } +void tsortSetMergeLimit(SSortHandle* pHandle, int64_t mergeLimit) { + pHandle->mergeLimit = mergeLimit; +} + int32_t tsortSetFetchRawDataFp(SSortHandle* pHandle, _sort_fetch_block_fn_t fetchFp, void (*fp)(SSDataBlock*, void*), void* param) { pHandle->fetchfp = fetchFp; @@ -1244,8 +1260,8 @@ void tsortSetForceUsePQSort(SSortHandle* pHandle) { static bool tsortIsPQSortApplicable(SSortHandle* pHandle) { if (pHandle->type != SORT_SINGLESOURCE_SORT) return false; if (tsortIsForceUsePQSort(pHandle)) return true; - uint64_t maxRowsFitInMemory = pHandle->sortBufSize / (pHandle->maxTupleLength + sizeof(char*)); - return maxRowsFitInMemory > pHandle->maxRows; + uint64_t maxRowsFitInMemory = pHandle->pqSortBufSize / (pHandle->pqMaxTupleLength + sizeof(char*)); + return maxRowsFitInMemory > pHandle->pqMaxRows; } static bool tsortPQCompFn(void* a, void* b, void* param) { @@ -1291,7 +1307,7 @@ static int32_t tupleComparFn(const void* pLeft, const void* pRight, void* param) } static int32_t tsortOpenForPQSort(SSortHandle* pHandle) { - pHandle->pBoundedQueue = createBoundedQueue(pHandle->maxRows, tsortPQCompFn, destroyTuple, pHandle); + pHandle->pBoundedQueue = createBoundedQueue(pHandle->pqMaxRows, tsortPQCompFn, destroyTuple, pHandle); if (NULL == pHandle->pBoundedQueue) return TSDB_CODE_OUT_OF_MEMORY; tsortSetComparFp(pHandle, tupleComparFn); From 265f2a0416f85a0c676b9ddc1fb396f3b8e8f8db Mon Sep 17 00:00:00 2001 From: Alex Duan <417921451@qq.com> Date: Wed, 19 Jul 2023 14:20:52 +0800 Subject: [PATCH 12/23] test: add create sma index case timeRangeWise.py --- tests/parallel_test/cases.task | 1 + tests/pytest/util/sql.py | 4 +- tests/system-test/0-others/timeRangeWise.py | 309 ++++++++++++++++++++ 3 files changed, 313 insertions(+), 1 deletion(-) create mode 100644 tests/system-test/0-others/timeRangeWise.py diff --git a/tests/parallel_test/cases.task b/tests/parallel_test/cases.task index 620748bc26..9ec12197ec 100644 --- a/tests/parallel_test/cases.task +++ b/tests/parallel_test/cases.task @@ -160,6 +160,7 @@ ,,n,system-test,python3 ./test.py -f 0-others/udfpy_main.py ,,n,system-test,python3 ./test.py -N 3 -f 0-others/walRetention.py ,,n,system-test,python3 ./test.py -f 0-others/splitVGroup.py -N 5 +,,n,system-test,python3 ./test.py -f 0-others/timeRangeWise.py -N 3 ,,y,system-test,./pytest.sh python3 ./test.py -f 1-insert/alter_database.py ,,y,system-test,./pytest.sh python3 ./test.py -f 1-insert/alter_replica.py -N 3 ,,y,system-test,./pytest.sh python3 ./test.py -f 1-insert/influxdb_line_taosc_insert.py diff --git a/tests/pytest/util/sql.py b/tests/pytest/util/sql.py index 2af8f721b6..2fa21b1983 100644 --- a/tests/pytest/util/sql.py +++ b/tests/pytest/util/sql.py @@ -440,8 +440,10 @@ class TDSql: time.sleep(1) continue - def execute(self, sql,queryTimes=30): + def execute(self, sql, queryTimes=30, show=False): self.sql = sql + if show: + tdLog.info(sql) i=1 while i <= queryTimes: try: diff --git a/tests/system-test/0-others/timeRangeWise.py b/tests/system-test/0-others/timeRangeWise.py new file mode 100644 index 0000000000..a7dc18aa82 --- /dev/null +++ b/tests/system-test/0-others/timeRangeWise.py @@ -0,0 +1,309 @@ +################################################################### +# Copyright (c) 2016 by TAOS Technologies, Inc. +# All rights reserved. +# +# This file is proprietary and confidential to TAOS Technologies. +# No part of this file may be reproduced, stored, transmitted, +# disclosed or used in any form or by any means other than as +# expressly provided by the written permission from Jianhui Tao +# +################################################################### + +# -*- coding: utf-8 -*- + +import sys +import random +import time +import copy +import string + +import taos +from util.log import * +from util.cases import * +from util.sql import * + +class TDTestCase: + + # random string + def random_string(self, count): + letters = string.ascii_letters + return ''.join(random.choice(letters) for i in range(count)) + + # get col value and total max min ... + def getColsValue(self, i, j): + # c1 value + if random.randint(1, 10) == 5: + c1 = None + else: + c1 = 1 + + # c2 value + if j % 3200 == 0: + c2 = 8764231 + elif random.randint(1, 10) == 5: + c2 = None + else: + c2 = random.randint(-87654297, 98765321) + + + value = f"({self.ts}, " + + # c1 + if c1 is None: + value += "null," + else: + self.c1Cnt += 1 + value += f"{c1}," + # c2 + if c2 is None: + value += "null," + else: + value += f"{c2}," + # total count + self.c2Cnt += 1 + # max + if self.c2Max is None: + self.c2Max = c2 + else: + if c2 > self.c2Max: + self.c2Max = c2 + # min + if self.c2Min is None: + self.c2Min = c2 + else: + if c2 < self.c2Min: + self.c2Min = c2 + # sum + if self.c2Sum is None: + self.c2Sum = c2 + else: + self.c2Sum += c2 + + # c3 same with ts + value += f"{self.ts})" + + # move next 1s interval + self.ts += 1 + + return value + + # insert data + def insertData(self): + tdLog.info("insert data ....") + sqls = "" + for i in range(self.childCnt): + # insert child table + values = "" + pre_insert = f"insert into @db_name.t{i} values " + for j in range(self.childRow): + if values == "": + values = self.getColsValue(i, j) + else: + values += "," + self.getColsValue(i, j) + + # batch insert + if j % self.batchSize == 0 and values != "": + sql = pre_insert + values + self.exeDouble(sql) + values = "" + # append last + if values != "": + sql = pre_insert + values + self.exeDouble(sql) + values = "" + + # insert finished + tdLog.info(f"insert data successfully.\n" + f" inserted child table = {self.childCnt}\n" + f" inserted child rows = {self.childRow}\n" + f" total inserted rows = {self.childCnt*self.childRow}\n") + return + + def exeDouble(self, sql): + # dbname replace + sql1 = sql.replace("@db_name", self.db1) + + if len(sql1) > 100: + tdLog.info(sql1[:100]) + else: + tdLog.info(sql1) + tdSql.execute(sql1) + + sql2 = sql.replace("@db_name", self.db2) + if len(sql2) > 100: + tdLog.info(sql2[:100]) + else: + tdLog.info(sql2) + tdSql.execute(sql2) + + + # prepareEnv + def prepareEnv(self): + # init + self.ts = 1680000000000 + self.childCnt = 2 + self.childRow = 100000 + self.batchSize = 5000 + self.vgroups1 = 4 + self.vgroups2 = 4 + self.db1 = "db1" # no sma + self.db2 = "db2" # have sma + self.smaClause = "interval(10s)" + + # total + self.c1Cnt = 0 + self.c2Cnt = 0 + self.c2Max = None + self.c2Min = None + self.c2Sum = None + + # alter local optimization to treu + sql = "alter local 'querysmaoptimize 1'" + tdSql.execute(sql, 5, True) + + # check forbid mulit-replic on create sma index + sql = f"create database db vgroups {self.vgroups1} replica 3" + tdSql.execute(sql, 5, True) + sql = f"create table db.st(ts timestamp, c1 int, c2 bigint, ts1 timestamp) tags(area int)" + tdSql.execute(sql, 5, True) + + sql = f"create sma index sma_test on db.st function(max(c1),max(c2),min(c1),min(c2)) {self.smaClause};" + tdLog.info(sql) + tdSql.error(sql) + + + # create database db + sql = f"create database @db_name vgroups {self.vgroups1} replica 1" + self.exeDouble(sql) + + # create super talbe st + sql = f"create table @db_name.st(ts timestamp, c1 int, c2 bigint, ts1 timestamp) tags(area int)" + self.exeDouble(sql) + + # create child table + for i in range(self.childCnt): + sql = f"create table @db_name.t{i} using @db_name.st tags({i}) " + self.exeDouble(sql) + + # create sma index on db2 + sql = f"use {self.db2}" + tdSql.execute(sql) + sql = f"create sma index sma_index_maxmin on {self.db2}.st function(max(c1),max(c2),min(c1),min(c2)) {self.smaClause};" + tdLog.info(sql) + tdSql.execute(sql) + + # insert data + self.insertData() + + # check data correct + def checkExpect(self, sql, expectVal): + tdSql.query(sql) + rowCnt = tdSql.getRows() + for i in range(rowCnt): + val = tdSql.getData(i,0) + if val != expectVal: + tdLog.exit(f"Not expect . query={val} expect={expectVal} i={i} sql={sql}") + return False + + tdLog.info(f"check expect ok. sql={sql} expect ={expectVal} rowCnt={rowCnt}") + return True + + # init + def init(self, conn, logSql, replicaVar=1): + seed = time.clock_gettime(time.CLOCK_REALTIME) + random.seed(seed) + self.replicaVar = int(replicaVar) + tdLog.debug(f"start to excute {__file__}") + tdSql.init(conn.cursor(), True) + + # check query result same + def queryDoubleImpl(self, sql): + # sql + sql1 = sql.replace('@db_name', self.db1) + tdLog.info(sql1) + start1 = time.time() + rows1 = tdSql.query(sql1) + spend1 = time.time() - start1 + res1 = copy.copy(tdSql.queryResult) + + sql2 = sql.replace('@db_name', self.db2) + tdLog.info(sql2) + start2 = time.time() + tdSql.query(sql2) + spend2 = time.time() - start2 + res2 = tdSql.queryResult + + rowlen1 = len(res1) + rowlen2 = len(res2) + + if rowlen1 != rowlen2: + tdLog.info(f"check error. rowlen1={rowlen1} rowlen2={rowlen2} both not equal.") + return False + + for i in range(rowlen1): + row1 = res1[i] + row2 = res2[i] + collen1 = len(row1) + collen2 = len(row2) + if collen1 != collen2: + tdLog.info(f"checkerror. collen1={collen1} collen2={collen2} both not equal.") + return False + for j in range(collen1): + if row1[j] != row2[j]: + tdLog.exit(f"col={j} col1={row1[j]} col2={row2[j]} both col not equal.") + return False + + # warning performance + multiple = spend1/spend2 + tdLog.info("spend1=%.6fs spend2=%.6fs multiple=%.1f"%(spend1, spend2, multiple)) + if spend2 > spend1 and multiple < 4: + tdLog.info(f"performace not reached: multiple(spend1/spend)={multiple} require is >=4 ") + return False + + return True + + # check query result same + def queryDouble(self, sql, tryCount=60, gap=1): + for i in range(tryCount): + if self.queryDoubleImpl(sql): + return True + # error + tdLog.info(f"queryDouble return false, try loop={i}") + time.sleep(gap) + + tdLog.exit(f"queryDouble try {tryCount} times, but all failed.") + return False + + # check result + def checkResult(self): + + # max + sql = f"select max(c1) from @db_name.st {self.smaClause}" + self.queryDouble(sql) + + # min + sql = f"select max(c2) from @db_name.st {self.smaClause}" + self.queryDouble(sql) + + # mix + sql = f"select max(c1),max(c2),min(c1),min(c2) from @db_name.st {self.smaClause}" + self.queryDouble(sql) + + + # run + def run(self): + # prepare env + self.prepareEnv() + + # check two db query result same + tdLog.info(f"check have sma(db1) and no sma(db2) performace...") + self.checkResult() + + # stop + def stop(self): + tdSql.close() + tdLog.success(f"{__file__} successfully executed") + + +tdCases.addLinux(__file__, TDTestCase()) +tdCases.addWindows(__file__, TDTestCase()) From 84817255179df77c6d4f6dfa3d9bcff6952eff2e Mon Sep 17 00:00:00 2001 From: liuyao <54liuyao@163.com> Date: Wed, 19 Jul 2023 14:58:28 +0800 Subject: [PATCH 13/23] remove redundant results --- source/libs/executor/src/timewindowoperator.c | 57 +++++++++++-------- source/libs/stream/src/streamBackendRocksdb.c | 5 -- 2 files changed, 33 insertions(+), 29 deletions(-) diff --git a/source/libs/executor/src/timewindowoperator.c b/source/libs/executor/src/timewindowoperator.c index 51da73d4d7..5d1fc0530f 100644 --- a/source/libs/executor/src/timewindowoperator.c +++ b/source/libs/executor/src/timewindowoperator.c @@ -2914,6 +2914,7 @@ void destroyStreamSessionAggOperatorInfo(void* param) { blockDataDestroy(pInfo->pDelRes); blockDataDestroy(pInfo->pWinBlock); blockDataDestroy(pInfo->pUpdateRes); + tSimpleHashCleanup(pInfo->pStUpdated); tSimpleHashCleanup(pInfo->pStDeleted); taosArrayDestroy(pInfo->historyWins); @@ -3008,14 +3009,6 @@ int32_t initStreamAggSupporter(SStreamAggSupporter* pSup, SqlFunctionCtx* pCtx, pCtx[i].saveHandle.pBuf = pSup->pResultBuf; } - if (pHandle) { - pSup->winRange = pHandle->winRange; - // temporary - if (pSup->winRange.ekey <= 0) { - pSup->winRange.ekey = INT64_MAX; - } - } - pSup->pSessionAPI = pApi; return TSDB_CODE_SUCCESS; @@ -3063,11 +3056,12 @@ void setSessionOutputBuf(SStreamAggSupporter* pAggSup, TSKEY startTs, TSKEY endT if (code == TSDB_CODE_SUCCESS && !inWinRange(&pAggSup->winRange, &pCurWin->sessionWin.win)) { code = TSDB_CODE_FAILED; releaseOutputBuf(pAggSup->pState, NULL, (SResultRow*)pCurWin->pOutputBuf, &pAggSup->pSessionAPI->stateStore); - pCurWin->pOutputBuf = taosMemoryMalloc(size); + pCurWin->pOutputBuf = taosMemoryCalloc(1, size); } if (code == TSDB_CODE_SUCCESS) { pCurWin->isOutput = true; + pAggSup->stateStore.streamStateSessionDel(pAggSup->pState, &pCurWin->sessionWin); } else { pCurWin->sessionWin.win.skey = startTs; pCurWin->sessionWin.win.ekey = endTs; @@ -3198,7 +3192,7 @@ SStreamStateCur* getNextSessionWinInfo(SStreamAggSupporter* pAggSup, SSHashObj* } static void compactSessionWindow(SOperatorInfo* pOperator, SResultWindowInfo* pCurWin, SSHashObj* pStUpdated, - SSHashObj* pStDeleted) { + SSHashObj* pStDeleted, bool addGap) { SExprSupp* pSup = &pOperator->exprSupp; SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo; SStorageAPI* pAPI = &pOperator->pTaskInfo->storageAPI; @@ -3222,7 +3216,7 @@ static void compactSessionWindow(SOperatorInfo* pOperator, SResultWindowInfo* pC initSessionOutputBuf(&winInfo, &pWinResult, pAggSup->pDummyCtx, numOfOutput, pSup->rowEntryInfoOffset); pCurWin->sessionWin.win.ekey = TMAX(pCurWin->sessionWin.win.ekey, winInfo.sessionWin.win.ekey); int64_t winDelta = 0; - if (IS_FINAL_OP(pInfo)) { + if (addGap) { winDelta = pAggSup->gap; } updateTimeWindowInfo(&pInfo->twAggSup.timeWindowData, &pCurWin->sessionWin.win, winDelta); @@ -3240,6 +3234,7 @@ static void compactSessionWindow(SOperatorInfo* pOperator, SResultWindowInfo* pC int32_t saveSessionOutputBuf(SStreamAggSupporter* pAggSup, SResultWindowInfo* pWinInfo) { saveSessionDiscBuf(pAggSup->pState, &pWinInfo->sessionWin, pWinInfo->pOutputBuf, pAggSup->resultRowSize, &pAggSup->stateStore); + pWinInfo->pOutputBuf = NULL; return TSDB_CODE_SUCCESS; } @@ -3253,8 +3248,10 @@ static void doStreamSessionAggImpl(SOperatorInfo* pOperator, SSDataBlock* pSData SResultRow* pResult = NULL; int32_t rows = pSDataBlock->info.rows; int32_t winRows = 0; + SStreamAggSupporter* pAggSup = &pInfo->streamAggSup; pInfo->dataVersion = TMAX(pInfo->dataVersion, pSDataBlock->info.version); + pAggSup->winRange = pTaskInfo->streamInfo.fillHistoryWindow; SColumnInfoData* pStartTsCol = taosArrayGet(pSDataBlock->pDataBlock, pInfo->primaryTsIndex); TSKEY* startTsCols = (int64_t*)pStartTsCol->pData; @@ -3266,7 +3263,6 @@ static void doStreamSessionAggImpl(SOperatorInfo* pOperator, SSDataBlock* pSData } TSKEY* endTsCols = (int64_t*)pEndTsCol->pData; - SStreamAggSupporter* pAggSup = &pInfo->streamAggSup; for (int32_t i = 0; i < rows;) { if (pInfo->ignoreExpiredData && isOverdue(endTsCols[i], &pInfo->twAggSup)) { i++; @@ -3291,7 +3287,7 @@ static void doStreamSessionAggImpl(SOperatorInfo* pOperator, SSDataBlock* pSData if (code != TSDB_CODE_SUCCESS || pResult == NULL) { T_LONG_JMP(pTaskInfo->env, TSDB_CODE_OUT_OF_MEMORY); } - compactSessionWindow(pOperator, &winInfo, pStUpdated, pStDeleted); + compactSessionWindow(pOperator, &winInfo, pStUpdated, pStDeleted, addGap); saveSessionOutputBuf(pAggSup, &winInfo); if (pInfo->twAggSup.calTrigger == STREAM_TRIGGER_AT_ONCE && pStUpdated) { @@ -3455,7 +3451,7 @@ static void rebuildSessionWindow(SOperatorInfo* pOperator, SArray* pWinArray, SS initSessionOutputBuf(&childWin, &pChResult, pChild->exprSupp.pCtx, numOfOutput, pChild->exprSupp.rowEntryInfoOffset); compactFunctions(pSup->pCtx, pChild->exprSupp.pCtx, numOfOutput, pTaskInfo, &pInfo->twAggSup.timeWindowData); - compactSessionWindow(pOperator, &parentWin, pStUpdated, NULL); + compactSessionWindow(pOperator, &parentWin, pStUpdated, NULL, true); saveResult(parentWin, pStUpdated); } else { break; @@ -3707,8 +3703,8 @@ void streamSessionReleaseState(SOperatorInfo* pOperator) { } void resetWinRange(STimeWindow* winRange) { - winRange->skey = INT16_MIN; - winRange->skey = INT16_MAX; + winRange->skey = INT64_MIN; + winRange->ekey = INT64_MAX; } void streamSessionReloadState(SOperatorInfo* pOperator) { @@ -3724,10 +3720,16 @@ void streamSessionReloadState(SOperatorInfo* pOperator) { int32_t num = size / sizeof(SSessionKey); SSessionKey* pSeKeyBuf = (SSessionKey*) pBuf; ASSERT(size == num * sizeof(SSessionKey)); + if (!pInfo->pStUpdated && num > 0) { + _hash_fn_t hashFn = taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY); + pInfo->pStUpdated = tSimpleHashInit(64, hashFn); + } for (int32_t i = 0; i < num; i++) { SResultWindowInfo winInfo = {0}; setSessionOutputBuf(pAggSup, pSeKeyBuf[i].win.skey, pSeKeyBuf[i].win.ekey, pSeKeyBuf[i].groupId, &winInfo); - compactSessionWindow(pOperator, &winInfo, pInfo->pStUpdated, pInfo->pStDeleted); + compactSessionWindow(pOperator, &winInfo, pInfo->pStUpdated, pInfo->pStDeleted, true); + saveSessionOutputBuf(pAggSup, &winInfo); + saveResult(winInfo, pInfo->pStUpdated); } taosMemoryFree(pBuf); @@ -4019,6 +4021,7 @@ void destroyStreamStateOperatorInfo(void* param) { colDataDestroy(&pInfo->twAggSup.timeWindowData); blockDataDestroy(pInfo->pDelRes); taosArrayDestroy(pInfo->historyWins); + tSimpleHashCleanup(pInfo->pSeUpdated); tSimpleHashCleanup(pInfo->pSeDeleted); taosMemoryFreeClear(param); } @@ -4073,6 +4076,7 @@ void setStateOutputBuf(SStreamAggSupporter* pAggSup, TSKEY ts, uint64_t groupId, if (code == TSDB_CODE_SUCCESS) { pCurWin->winInfo.isOutput = true; + pAggSup->stateStore.streamStateSessionDel(pAggSup->pState, &pCurWin->winInfo.sessionWin); } else if (pKeyData) { if (IS_VAR_DATA_TYPE(pAggSup->stateKeyType)) { varDataCopy(pCurWin->pStateKey->pData, pKeyData); @@ -4145,8 +4149,10 @@ static void doStreamStateAggImpl(SOperatorInfo* pOperator, SSDataBlock* pSDataBl TSKEY* tsCols = NULL; SResultRow* pResult = NULL; int32_t winRows = 0; + SStreamAggSupporter* pAggSup = &pInfo->streamAggSup; pInfo->dataVersion = TMAX(pInfo->dataVersion, pSDataBlock->info.version); + pAggSup->winRange = pTaskInfo->streamInfo.fillHistoryWindow; if (pSDataBlock->pDataBlock != NULL) { SColumnInfoData* pColDataInfo = taosArrayGet(pSDataBlock->pDataBlock, pInfo->primaryTsIndex); @@ -4155,7 +4161,6 @@ static void doStreamStateAggImpl(SOperatorInfo* pOperator, SSDataBlock* pSDataBl return; } - SStreamAggSupporter* pAggSup = &pInfo->streamAggSup; int32_t rows = pSDataBlock->info.rows; blockDataEnsureCapacity(pAggSup->pScanBlock, rows); SColumnInfoData* pKeyColInfo = taosArrayGet(pSDataBlock->pDataBlock, pInfo->stateCol.slotId); @@ -4335,7 +4340,7 @@ static void compactStateWindow(SOperatorInfo* pOperator, SResultWindowInfo* pCur SResultRow* pWinResult = NULL; initSessionOutputBuf(pNextWin, &pWinResult, pAggSup->pDummyCtx, numOfOutput, pSup->rowEntryInfoOffset); - updateTimeWindowInfo(&pInfo->twAggSup.timeWindowData, &pCurWin->sessionWin.win, true); + updateTimeWindowInfo(&pInfo->twAggSup.timeWindowData, &pCurWin->sessionWin.win, 1); compactFunctions(pSup->pCtx, pAggSup->pDummyCtx, numOfOutput, pTaskInfo, &pInfo->twAggSup.timeWindowData); tSimpleHashRemove(pStUpdated, &pNextWin->sessionWin, sizeof(SSessionKey)); if (pNextWin->isOutput && pStDeleted) { @@ -4344,11 +4349,10 @@ static void compactStateWindow(SOperatorInfo* pOperator, SResultWindowInfo* pCur removeSessionResult(pStUpdated, pAggSup->pResultRows, pNextWin->sessionWin); doDeleteSessionWindow(pAggSup, &pNextWin->sessionWin); taosMemoryFree(pNextWin->pOutputBuf); - saveSessionOutputBuf(pAggSup, pCurWin); } void streamStateReloadState(SOperatorInfo* pOperator) { - SStreamSessionAggOperatorInfo* pInfo = pOperator->info; + SStreamStateAggOperatorInfo* pInfo = pOperator->info; SStreamAggSupporter* pAggSup = &pInfo->streamAggSup; resetWinRange(&pAggSup->winRange); @@ -4360,14 +4364,19 @@ void streamStateReloadState(SOperatorInfo* pOperator) { int32_t num = size / sizeof(SSessionKey); SSessionKey* pSeKeyBuf = (SSessionKey*) pBuf; ASSERT(size == num * sizeof(SSessionKey)); + if (!pInfo->pSeUpdated && num > 0) { + _hash_fn_t hashFn = taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY); + pInfo->pSeUpdated = tSimpleHashInit(64, hashFn); + } for (int32_t i = 0; i < num; i++) { SStateWindowInfo curInfo = {0}; SStateWindowInfo nextInfo = {0}; SStateWindowInfo dummy = {0}; setStateOutputBuf(pAggSup, pSeKeyBuf[i].win.skey, pSeKeyBuf[i].groupId, NULL, &curInfo, &nextInfo); if (compareStateKey(curInfo.pStateKey,nextInfo.pStateKey)) { - compactStateWindow(pOperator, &curInfo.winInfo, &nextInfo.winInfo, pInfo->pStUpdated, pInfo->pStDeleted); - saveResult(curInfo.winInfo, pInfo->pStUpdated); + compactStateWindow(pOperator, &curInfo.winInfo, &nextInfo.winInfo, pInfo->pSeUpdated, pInfo->pSeUpdated); + saveSessionOutputBuf(pAggSup, &curInfo.winInfo); + saveResult(curInfo.winInfo, pInfo->pSeUpdated); } if (IS_VALID_SESSION_WIN(curInfo.winInfo)) { @@ -4855,7 +4864,7 @@ static void doMergeIntervalAggImpl(SOperatorInfo* pOperatorInfo, SResultRowInfo* doWindowBorderInterpolation(iaInfo, pBlock, pResult, &win, startPos, forwardRows, pExprSup); } - updateTimeWindowInfo(&iaInfo->twAggSup.timeWindowData, &win, true); + updateTimeWindowInfo(&iaInfo->twAggSup.timeWindowData, &win, 1); applyAggFunctionOnPartialTuples(pTaskInfo, pExprSup->pCtx, &iaInfo->twAggSup.timeWindowData, startPos, forwardRows, pBlock->info.rows, numOfOutput); doCloseWindow(pResultRowInfo, iaInfo, pResult); diff --git a/source/libs/stream/src/streamBackendRocksdb.c b/source/libs/stream/src/streamBackendRocksdb.c index 18ec80e87a..ffefdb0003 100644 --- a/source/libs/stream/src/streamBackendRocksdb.c +++ b/source/libs/stream/src/streamBackendRocksdb.c @@ -1864,7 +1864,6 @@ int32_t streamStateSessionAddIfNotExist_rocksdb(SStreamState* pState, SSessionKe if (sessionRangeKeyCmpr(&searchKey, key) == 0) { memcpy(tmp, *pVal, valSize); taosMemoryFreeClear(*pVal); - streamStateSessionDel_rocksdb(pState, key); goto _end; } taosMemoryFreeClear(*pVal); @@ -1880,7 +1879,6 @@ int32_t streamStateSessionAddIfNotExist_rocksdb(SStreamState* pState, SSessionKe if (code == 0) { if (sessionRangeKeyCmpr(&searchKey, key) == 0) { memcpy(tmp, *pVal, valSize); - streamStateSessionDel_rocksdb(pState, key); goto _end; } } @@ -1938,14 +1936,12 @@ int32_t streamStateStateAddIfNotExist_rocksdb(SStreamState* pState, SSessionKey* if (code == 0) { if (key->win.skey <= tmpKey.win.skey && tmpKey.win.ekey <= key->win.ekey) { memcpy(tmp, *pVal, valSize); - streamStateSessionDel_rocksdb(pState, key); goto _end; } void* stateKey = (char*)(*pVal) + (valSize - keyDataLen); if (fn(pKeyData, stateKey) == true) { memcpy(tmp, *pVal, valSize); - streamStateSessionDel_rocksdb(pState, key); goto _end; } @@ -1961,7 +1957,6 @@ int32_t streamStateStateAddIfNotExist_rocksdb(SStreamState* pState, SSessionKey* void* stateKey = (char*)(*pVal) + (valSize - keyDataLen); if (fn(pKeyData, stateKey) == true) { memcpy(tmp, *pVal, valSize); - streamStateSessionDel_rocksdb(pState, key); goto _end; } } From c7e4aa4f7751ec919911d3ef85e1f882753cfe8c Mon Sep 17 00:00:00 2001 From: liuyao <54liuyao@163.com> Date: Wed, 19 Jul 2023 15:51:22 +0800 Subject: [PATCH 14/23] set win range --- source/libs/executor/src/timewindowoperator.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/source/libs/executor/src/timewindowoperator.c b/source/libs/executor/src/timewindowoperator.c index 5d1fc0530f..e8059afb2f 100644 --- a/source/libs/executor/src/timewindowoperator.c +++ b/source/libs/executor/src/timewindowoperator.c @@ -3252,6 +3252,9 @@ static void doStreamSessionAggImpl(SOperatorInfo* pOperator, SSDataBlock* pSData pInfo->dataVersion = TMAX(pInfo->dataVersion, pSDataBlock->info.version); pAggSup->winRange = pTaskInfo->streamInfo.fillHistoryWindow; + if (pAggSup->winRange.ekey <= 0) { + pAggSup->winRange.ekey = INT64_MAX; + } SColumnInfoData* pStartTsCol = taosArrayGet(pSDataBlock->pDataBlock, pInfo->primaryTsIndex); TSKEY* startTsCols = (int64_t*)pStartTsCol->pData; @@ -4153,6 +4156,9 @@ static void doStreamStateAggImpl(SOperatorInfo* pOperator, SSDataBlock* pSDataBl pInfo->dataVersion = TMAX(pInfo->dataVersion, pSDataBlock->info.version); pAggSup->winRange = pTaskInfo->streamInfo.fillHistoryWindow; + if (pAggSup->winRange.ekey <= 0) { + pAggSup->winRange.ekey = INT64_MAX; + } if (pSDataBlock->pDataBlock != NULL) { SColumnInfoData* pColDataInfo = taosArrayGet(pSDataBlock->pDataBlock, pInfo->primaryTsIndex); From af18afba9d0d1fc822c3f06934376faed16a4561 Mon Sep 17 00:00:00 2001 From: liuyao <54liuyao@163.com> Date: Wed, 19 Jul 2023 17:19:55 +0800 Subject: [PATCH 15/23] fill history --- source/libs/executor/src/scanoperator.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/source/libs/executor/src/scanoperator.c b/source/libs/executor/src/scanoperator.c index 9abe4ffef6..b8d94695ca 100644 --- a/source/libs/executor/src/scanoperator.c +++ b/source/libs/executor/src/scanoperator.c @@ -1820,11 +1820,6 @@ static SSDataBlock* doStreamScan(SOperatorInfo* pOperator) { if (pStreamInfo->recoverStep == STREAM_RECOVER_STEP__SCAN1 || pStreamInfo->recoverStep == STREAM_RECOVER_STEP__SCAN2) { - if (pInfo->blockRecoverContiCnt > 100) { - pInfo->blockRecoverTotCnt += pInfo->blockRecoverContiCnt; - pInfo->blockRecoverContiCnt = 0; - return NULL; - } switch (pInfo->scanMode) { case STREAM_SCAN_FROM_RES: { From 96a9d033e54ca01d78b65252b9d7a7f1906c6ebf Mon Sep 17 00:00:00 2001 From: Shungang Li Date: Wed, 19 Jul 2023 17:41:45 +0800 Subject: [PATCH 16/23] docs: add info for geometry --- docs/en/12-taos-sql/01-data-type.md | 10 +++++++++ docs/en/12-taos-sql/03-table.md | 32 ++++++++++++++--------------- docs/en/12-taos-sql/16-operators.md | 2 +- docs/en/12-taos-sql/29-changes.md | 1 + docs/zh/12-taos-sql/01-data-type.md | 11 +++++++++- docs/zh/12-taos-sql/03-table.md | 4 ++-- docs/zh/12-taos-sql/16-operators.md | 2 +- docs/zh/12-taos-sql/29-changes.md | 1 + 8 files changed, 42 insertions(+), 21 deletions(-) diff --git a/docs/en/12-taos-sql/01-data-type.md b/docs/en/12-taos-sql/01-data-type.md index 13007d5bb1..b9d51bcfcd 100644 --- a/docs/en/12-taos-sql/01-data-type.md +++ b/docs/en/12-taos-sql/01-data-type.md @@ -42,10 +42,20 @@ In TDengine, the data types below can be used when specifying a column or tag. | 14 | NCHAR | User Defined | Multi-byte string that can include multi byte characters like Chinese characters. Each character of NCHAR type consumes 4 bytes storage. The string value should be quoted with single quotes. Literal single quote inside the string must be preceded with backslash, like `\'`. The length must be specified when defining a column or tag of NCHAR type, for example nchar(10) means it can store at most 10 characters of nchar type and will consume fixed storage of 40 bytes. An error will be reported if the string value exceeds the length defined. | | 15 | JSON | | JSON type can only be used on tags. A tag of json type is excluded with any other tags of any other type. | | 16 | VARCHAR | User-defined | Alias of BINARY | +| 16 | GEOMETRY | User-defined | Geometry | :::note +- Each row of the table cannot be longer than 48KB (64KB since version 3.0.5.0) (note that each BINARY/NCHAR/GEOMETRY column takes up an additional 2 bytes of storage space). - Only ASCII visible characters are suggested to be used in a column or tag of BINARY type. Multi-byte characters must be stored in NCHAR type. - The length of BINARY can be up to 16,374(data column is 65,517 and tag column is 16,382 since version 3.0.5.0) bytes. The string value must be quoted with single quotes. You must specify a length in bytes for a BINARY value, for example binary(20) for up to twenty single-byte characters. If the data exceeds the specified length, an error will occur. The literal single quote inside the string must be preceded with back slash like `\'` +- The maximum length of the GEOMETRY data column is 65,517 bytes, and the maximum length of the tag column is 16,382 bytes. Supports POINT, LINESTRING, and POLYGON subtypes of 2D. The following table describes the length calculation method: + + | # | **Syntax** | **MinLen** | **MaxLen** | **Growth of each point** | + |---|--------------------------------------|------------|------------|--------------------------| + | 1 | POINT(1.0 1.0) | 21 | 21 | NA | + | 2 | LINESTRING(1.0 1.0, 2.0 2.0) | 9+2*16 | 9+4094*16 | +16 | + | 3 | POLYGON((1.0 1.0, 2.0 2.0, 1.0 1.0)) | 13+3*16 | 13+4094*16 | +16 | + - Numeric values in SQL statements will be determined as integer or float type according to whether there is decimal point or whether scientific notation is used, so attention must be paid to avoid overflow. For example, 9999999999999999999 will be considered as overflow because it exceeds the upper limit of long integer, but 9999999999999999999.0 will be considered as a legal float number. ::: diff --git a/docs/en/12-taos-sql/03-table.md b/docs/en/12-taos-sql/03-table.md index 7f39fb5867..10c44848c9 100644 --- a/docs/en/12-taos-sql/03-table.md +++ b/docs/en/12-taos-sql/03-table.md @@ -9,27 +9,27 @@ You create standard tables and subtables with the `CREATE TABLE` statement. ```sql CREATE TABLE [IF NOT EXISTS] [db_name.]tb_name (create_definition [, create_definition] ...) [table_options] - + CREATE TABLE create_subtable_clause - + CREATE TABLE [IF NOT EXISTS] [db_name.]tb_name (create_definition [, create_definition] ...) [TAGS (create_definition [, create_definition] ...)] [table_options] - + create_subtable_clause: { create_subtable_clause [create_subtable_clause] ... | [IF NOT EXISTS] [db_name.]tb_name USING [db_name.]stb_name [(tag_name [, tag_name] ...)] TAGS (tag_value [, tag_value] ...) } - + create_definition: col_name column_definition - + column_definition: type_name [comment 'string_value'] - + table_options: table_option ... - + table_option: { COMMENT 'string_value' | WATERMARK duration[,duration] @@ -45,9 +45,9 @@ table_option: { 1. The first column of a table MUST be of type TIMESTAMP. It is automatically set as the primary key. 2. The maximum length of the table name is 192 bytes. -3. The maximum length of each row is 48k(64k since version 3.0.5.0) bytes, please note that the extra 2 bytes used by each BINARY/NCHAR column are also counted. +3. The maximum length of each row is 48k(64k since version 3.0.5.0) bytes, please note that the extra 2 bytes used by each BINARY/NCHAR/GEOMETRY column are also counted. 4. The name of the subtable can only consist of characters from the English alphabet, digits and underscore. Table names can't start with a digit. Table names are case insensitive. -5. The maximum length in bytes must be specified when using BINARY or NCHAR types. +5. The maximum length in bytes must be specified when using BINARY/NCHAR/GEOMETRY types. 6. Escape character "\`" can be used to avoid the conflict between table names and reserved keywords, above rules will be bypassed when using escape character on table names, but the upper limit for the name length is still valid. The table names specified using escape character are case sensitive. For example \`aBc\` and \`abc\` are different table names but `abc` and `aBc` are same table names because they are both converted to `abc` internally. Only ASCII visible characters can be used with escape character. @@ -58,7 +58,7 @@ table_option: { 3. MAX_DELAY: specifies the maximum latency for pushing computation results. The default value is 15 minutes or the value of the INTERVAL parameter, whichever is smaller. Enter a value between 0 and 15 minutes in milliseconds, seconds, or minutes. You can enter multiple values separated by commas (,). Note: Retain the default value if possible. Configuring a small MAX_DELAY may cause results to be frequently pushed, affecting storage and query performance. This parameter applies only to supertables and takes effect only when the RETENTIONS parameter has been specified for the database. 4. ROLLUP: specifies aggregate functions to roll up. Rolling up a function provides downsampled results based on multiple axes. This parameter applies only to supertables and takes effect only when the RETENTIONS parameter has been specified for the database. You can specify only one function to roll up. The rollup takes effect on all columns except TS. Enter one of the following values: avg, sum, min, max, last, or first. 5. SMA: specifies functions on which to enable small materialized aggregates (SMA). SMA is user-defined precomputation of aggregates based on data blocks. Enter one of the following values: max, min, or sum This parameter can be used with supertables and standard tables. -6. TTL: specifies the time to live (TTL) for the table. If TTL is specified when creatinga table, after the time period for which the table has been existing is over TTL, TDengine will automatically delete the table. Please be noted that the system may not delete the table at the exact moment that the TTL expires but guarantee there is such a system and finally the table will be deleted. The unit of TTL is in days. The default value is 0, i.e. never expire. +6. TTL: specifies the time to live (TTL) for the table. If TTL is specified when creatinga table, after the time period for which the table has been existing is over TTL, TDengine will automatically delete the table. Please be noted that the system may not delete the table at the exact moment that the TTL expires but guarantee there is such a system and finally the table will be deleted. The unit of TTL is in days. The default value is 0, i.e. never expire. ## Create Subtables @@ -88,7 +88,7 @@ You can create multiple subtables in a single SQL statement provided that all su ```sql ALTER TABLE [db_name.]tb_name alter_table_clause - + alter_table_clause: { alter_table_options | ADD COLUMN col_name column_type @@ -96,10 +96,10 @@ alter_table_clause: { | MODIFY COLUMN col_name column_type | RENAME COLUMN old_col_name new_col_name } - + alter_table_options: alter_table_option ... - + alter_table_option: { TTL value | COMMENT 'string_value' @@ -142,15 +142,15 @@ ALTER TABLE tb_name RENAME COLUMN old_col_name new_col_name ```sql ALTER TABLE [db_name.]tb_name alter_table_clause - + alter_table_clause: { alter_table_options | SET TAG tag_name = new_tag_value } - + alter_table_options: alter_table_option ... - + alter_table_option: { TTL value | COMMENT 'string_value' diff --git a/docs/en/12-taos-sql/16-operators.md b/docs/en/12-taos-sql/16-operators.md index 32ad4e7075..9328d1688a 100644 --- a/docs/en/12-taos-sql/16-operators.md +++ b/docs/en/12-taos-sql/16-operators.md @@ -39,7 +39,7 @@ TDengine supports the `UNION` and `UNION ALL` operations. UNION ALL collects all | 3 | \>, < | All types except BLOB, MEDIUMBLOB, and JSON | Greater than and less than | | 4 | \>=, <= | All types except BLOB, MEDIUMBLOB, and JSON | Greater than or equal to and less than or equal to | | 5 | IS [NOT] NULL | All types | Indicates whether the value is null | -| 6 | [NOT] BETWEEN AND | All types except BLOB, MEDIUMBLOB, and JSON | Closed interval comparison | +| 6 | [NOT] BETWEEN AND | All types except BLOB, MEDIUMBLOB, JSON and GEOMETRY | Closed interval comparison | | 7 | IN | All types except BLOB, MEDIUMBLOB, and JSON; the primary key (timestamp) is also not supported | Equal to any value in the list | | 8 | LIKE | BINARY, NCHAR, and VARCHAR | Wildcard match | | 9 | MATCH, NMATCH | BINARY, NCHAR, and VARCHAR | Regular expression match | diff --git a/docs/en/12-taos-sql/29-changes.md b/docs/en/12-taos-sql/29-changes.md index d668aa8345..bbb52db4d9 100644 --- a/docs/en/12-taos-sql/29-changes.md +++ b/docs/en/12-taos-sql/29-changes.md @@ -18,6 +18,7 @@ description: This document describes how TDengine SQL has changed in version 3.0 | 8 | Mixed operations | Enhanced | Mixing scalar and vector operations in queries has been enhanced and is supported in all SELECT clauses. | 9 | Tag operations | Added | Tag columns can be used in queries and clauses like data columns. | 10 | Timeline clauses and time functions in supertables | Enhanced | When PARTITION BY is not used, data in supertables is merged into a single timeline. +| 11 | GEOMETRY | Added | Geometry ## SQL Syntax diff --git a/docs/zh/12-taos-sql/01-data-type.md b/docs/zh/12-taos-sql/01-data-type.md index 4a4c1d6ec6..1df07e7e7f 100644 --- a/docs/zh/12-taos-sql/01-data-type.md +++ b/docs/zh/12-taos-sql/01-data-type.md @@ -42,12 +42,21 @@ CREATE DATABASE db_name PRECISION 'ns'; | 14 | NCHAR | 自定义 | 记录包含多字节字符在内的字符串,如中文字符。每个 NCHAR 字符占用 4 字节的存储空间。字符串两端使用单引号引用,字符串内的单引号需用转义字符 `\'`。NCHAR 使用时须指定字符串大小,类型为 NCHAR(10) 的列表示此列的字符串最多存储 10 个 NCHAR 字符。如果用户字符串长度超出声明长度,将会报错。 | | 15 | JSON | | JSON 数据类型, 只有 Tag 可以是 JSON 格式 | | 16 | VARCHAR | 自定义 | BINARY 类型的别名 | +| 17 | GEOMETRY | 自定义 | 几何类型 | :::note -- 表的每行长度不能超过 48KB(从 3.0.5.0 版本开始为 64KB)(注意:每个 BINARY/NCHAR 类型的列还会额外占用 2 个字节的存储位置)。 +- 表的每行长度不能超过 48KB(从 3.0.5.0 版本开始为 64KB)(注意:每个 BINARY/NCHAR/GEOMETRY 类型的列还会额外占用 2 个字节的存储位置)。 - 虽然 BINARY 类型在底层存储上支持字节型的二进制字符,但不同编程语言对二进制数据的处理方式并不保证一致,因此建议在 BINARY 类型中只存储 ASCII 可见字符,而避免存储不可见字符。多字节的数据,例如中文字符,则需要使用 NCHAR 类型进行保存。如果强行使用 BINARY 类型保存中文字符,虽然有时也能正常读写,但并不带有字符集信息,很容易出现数据乱码甚至数据损坏等情况。 - BINARY 类型理论上最长可以有 16,374(从 3.0.5.0 版本开始,数据列为 65,517,标签列为 16,382) 字节。BINARY 仅支持字符串输入,字符串两端需使用单引号引用。使用时须指定大小,如 BINARY(20) 定义了最长为 20 个单字节字符的字符串,每个字符占 1 字节的存储空间,总共固定占用 20 字节的空间,此时如果用户字符串超出 20 字节将会报错。对于字符串内的单引号,可以用转义字符反斜线加单引号来表示,即 `\'`。 +- GEOMETRY 类型数据列为最大长度为 65,517 字节,标签列最大长度为 16,382 字节。支持 2D 的 POINT、LINESTRING 和 POLYGON 子类型数据。长度计算方式如下表所示: + + | # | **语法** | **最小长度** | **最大长度** | **每组坐标长度增长** | + |---|--------------------------------------|----------|------------|--------------| + | 1 | POINT(1.0 1.0) | 21 | 21 | 无 | + | 2 | LINESTRING(1.0 1.0, 2.0 2.0) | 9+2*16 | 9+4094*16 | +16 | + | 3 | POLYGON((1.0 1.0, 2.0 2.0, 1.0 1.0)) | 13+3*16 | 13+4094*16 | +16 | + - SQL 语句中的数值类型将依据是否存在小数点,或使用科学计数法表示,来判断数值类型是否为整型或者浮点型,因此在使用时要注意相应类型越界的情况。例如,9999999999999999999 会认为超过长整型的上边界而溢出,而 9999999999999999999.0 会被认为是有效的浮点数。 ::: diff --git a/docs/zh/12-taos-sql/03-table.md b/docs/zh/12-taos-sql/03-table.md index 2e66ac4002..f13a4ac94a 100644 --- a/docs/zh/12-taos-sql/03-table.md +++ b/docs/zh/12-taos-sql/03-table.md @@ -43,9 +43,9 @@ table_option: { 1. 表的第一个字段必须是 TIMESTAMP,并且系统自动将其设为主键; 2. 表名最大长度为 192; -3. 表的每行长度不能超过 48KB(从 3.0.5.0 版本开始为 64KB);(注意:每个 BINARY/NCHAR 类型的列还会额外占用 2 个字节的存储位置) +3. 表的每行长度不能超过 48KB(从 3.0.5.0 版本开始为 64KB);(注意:每个 BINARY/NCHAR/GEOMETRY 类型的列还会额外占用 2 个字节的存储位置) 4. 子表名只能由字母、数字和下划线组成,且不能以数字开头,不区分大小写 -5. 使用数据类型 binary 或 nchar,需指定其最长的字节数,如 binary(20),表示 20 字节; +5. 使用数据类型 BINARY/NCHAR/GEOMETRY,需指定其最长的字节数,如 BINARY(20),表示 20 字节; 6. 为了兼容支持更多形式的表名,TDengine 引入新的转义符 "\`",可以让表名与关键词不冲突,同时不受限于上述表名称合法性约束检查。但是同样具有长度限制要求。使用转义字符以后,不再对转义字符中的内容进行大小写统一。 例如:\`aBc\` 和 \`abc\` 是不同的表名,但是 abc 和 aBc 是相同的表名。 需要注意的是转义字符中的内容必须是可打印字符。 diff --git a/docs/zh/12-taos-sql/16-operators.md b/docs/zh/12-taos-sql/16-operators.md index 48e9991799..0636121edd 100644 --- a/docs/zh/12-taos-sql/16-operators.md +++ b/docs/zh/12-taos-sql/16-operators.md @@ -39,7 +39,7 @@ TDengine 支持 `UNION ALL` 和 `UNION` 操作符。UNION ALL 将查询返回的 | 3 | \>, < | 除 BLOB、MEDIUMBLOB 和 JSON 外的所有类型 | 大于,小于 | | 4 | \>=, <= | 除 BLOB、MEDIUMBLOB 和 JSON 外的所有类型 | 大于等于,小于等于 | | 5 | IS [NOT] NULL | 所有类型 | 是否为空值 | -| 6 | [NOT] BETWEEN AND | 除 BOOL、BLOB、MEDIUMBLOB 和 JSON 外的所有类型 | 闭区间比较 | +| 6 | [NOT] BETWEEN AND | 除 BOOL、BLOB、MEDIUMBLOB、JSON 和 GEOMETRY 外的所有类型 | 闭区间比较 | | 7 | IN | 除 BLOB、MEDIUMBLOB 和 JSON 外的所有类型,且不可以为表的时间戳主键列 | 与列表内的任意值相等 | | 8 | LIKE | BINARY、NCHAR 和 VARCHAR | 通配符匹配 | | 9 | MATCH, NMATCH | BINARY、NCHAR 和 VARCHAR | 正则表达式匹配 | diff --git a/docs/zh/12-taos-sql/29-changes.md b/docs/zh/12-taos-sql/29-changes.md index 4177fa547e..2a1e5f092c 100644 --- a/docs/zh/12-taos-sql/29-changes.md +++ b/docs/zh/12-taos-sql/29-changes.md @@ -18,6 +18,7 @@ description: "TDengine 3.0 版本的语法变更说明" | 8 | 混合运算 | 增强 | 查询中的混合运算(标量运算和矢量运算混合)全面增强,SELECT的各个子句均全面支持符合语法语义的混合运算。 | 9 | 标签运算 | 新增 |在查询中,标签列可以像普通列一样参与各种运算,用于各种子句。 | 10 | 时间线子句和时间函数用于超级表查询 | 增强 |没有PARTITION BY时,超级表的数据会被合并成一条时间线。 +| 11 | GEOMETRY | 新增 | 几何类型。 ## SQL 语句变更 From 8a6ed67f9df4e779a286fa7270e918b0ed3c187b Mon Sep 17 00:00:00 2001 From: liuyao <54liuyao@163.com> Date: Thu, 20 Jul 2023 09:13:30 +0800 Subject: [PATCH 17/23] fill history --- source/libs/executor/inc/executorInt.h | 1 - source/libs/executor/src/scanoperator.c | 4 +++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/source/libs/executor/inc/executorInt.h b/source/libs/executor/inc/executorInt.h index b3d0ff8225..9c41eb2bf0 100644 --- a/source/libs/executor/inc/executorInt.h +++ b/source/libs/executor/inc/executorInt.h @@ -366,7 +366,6 @@ typedef struct SStreamScanInfo { SNode* pTagIndexCond; // recover - int32_t blockRecoverContiCnt; int32_t blockRecoverTotCnt; SSDataBlock* pRecoverRes; diff --git a/source/libs/executor/src/scanoperator.c b/source/libs/executor/src/scanoperator.c index b8d94695ca..f1de80ec4c 100644 --- a/source/libs/executor/src/scanoperator.c +++ b/source/libs/executor/src/scanoperator.c @@ -1820,6 +1820,9 @@ static SSDataBlock* doStreamScan(SOperatorInfo* pOperator) { if (pStreamInfo->recoverStep == STREAM_RECOVER_STEP__SCAN1 || pStreamInfo->recoverStep == STREAM_RECOVER_STEP__SCAN2) { + if (isTaskKilled(pTaskInfo)) { + return NULL; + } switch (pInfo->scanMode) { case STREAM_SCAN_FROM_RES: { @@ -1862,7 +1865,6 @@ static SSDataBlock* doStreamScan(SOperatorInfo* pOperator) { pInfo->pRecoverRes = doTableScan(pInfo->pTableScanOp); if (pInfo->pRecoverRes != NULL) { - pInfo->blockRecoverContiCnt++; calBlockTbName(pInfo, pInfo->pRecoverRes); if (!pInfo->igCheckUpdate && pInfo->pUpdateInfo) { if (pStreamInfo->recoverStep == STREAM_RECOVER_STEP__SCAN1) { From 30129f64da3da67b56f99951ddde7ae0a784c14d Mon Sep 17 00:00:00 2001 From: slzhou Date: Thu, 20 Jul 2023 09:28:19 +0800 Subject: [PATCH 18/23] enhance: merge blocks of the same table before sort --- source/libs/executor/src/tsort.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/source/libs/executor/src/tsort.c b/source/libs/executor/src/tsort.c index c56dab1e33..18c46cd03f 100644 --- a/source/libs/executor/src/tsort.c +++ b/source/libs/executor/src/tsort.c @@ -24,6 +24,7 @@ #include "tpagedbuf.h" #include "tsort.h" #include "tutil.h" +#include "tsimplehash.h" struct STupleHandle { SSDataBlock* pBlock; @@ -975,15 +976,27 @@ static int32_t createBlocksMergeSortInitialSources(SSortHandle* pHandle) { int32_t szSort = 0; SArray* aBlkSort = taosArrayInit(8, POINTER_BYTES); + SSHashObj* mUidBlk = tSimpleHashInit(64, taosGetDefaultHashFunction(TSDB_DATA_TYPE_UBIGINT)); while (1) { SSDataBlock* pBlk = pHandle->fetchfp(pSrc->param); if (pBlk != NULL) { szSort += blockDataGetSize(pBlk); - SSDataBlock* blk = createOneDataBlock(pBlk, true); - taosArrayPush(aBlkSort, &blk); + + void* ppBlk = tSimpleHashGet(mUidBlk, &pBlk->info.id.uid, sizeof(pBlk->info.id.uid)); + if (ppBlk != NULL) { + SSDataBlock* tBlk = *(SSDataBlock**)(ppBlk); + blockDataMerge(tBlk, pBlk); + } else { + SSDataBlock* tBlk = createOneDataBlock(pBlk, true); + tSimpleHashPut(mUidBlk, &pBlk->info.id.uid, sizeof(pBlk->info.id.uid), &tBlk, POINTER_BYTES); + taosArrayPush(aBlkSort, &tBlk); + } } + if ((pBlk != NULL && szSort > maxBufSize) || (pBlk == NULL && szSort > 0)) { + tSimpleHashClear(mUidBlk); + int64_t p = taosGetTimestampUs(); sortBlocksToExtSource(pHandle, aBlkSort, pOrder, aExtSrc); int64_t el = taosGetTimestampUs() - p; @@ -1000,7 +1013,7 @@ static int32_t createBlocksMergeSortInitialSources(SSortHandle* pHandle) { break; }; } - + tSimpleHashCleanup(mUidBlk); taosArrayDestroy(aBlkSort); tsortClearOrderdSource(pHandle->pOrderedSource, NULL, NULL); taosArrayAddAll(pHandle->pOrderedSource, aExtSrc); From 5d9f6fd4812db0bdee7f328c955e1aeb60af5864 Mon Sep 17 00:00:00 2001 From: slzhou Date: Thu, 20 Jul 2023 12:29:34 +0800 Subject: [PATCH 19/23] fix: fix limit/offset bugs --- source/libs/executor/src/scanoperator.c | 38 ++++++++++++++----------- source/libs/executor/src/tsort.c | 33 ++++++++++++--------- 2 files changed, 41 insertions(+), 30 deletions(-) diff --git a/source/libs/executor/src/scanoperator.c b/source/libs/executor/src/scanoperator.c index 6edd7b2d47..27cbdb66cf 100644 --- a/source/libs/executor/src/scanoperator.c +++ b/source/libs/executor/src/scanoperator.c @@ -2905,28 +2905,32 @@ SSDataBlock* getSortedTableMergeScanBlockData(SSortHandle* pHandle, SSDataBlock* SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo; blockDataCleanup(pResBlock); - + STupleHandle* pTupleHandle = NULL; while (1) { - STupleHandle* pTupleHandle = tsortNextTuple(pHandle); - if (pTupleHandle == NULL) { - break; + while (1) { + pTupleHandle = tsortNextTuple(pHandle); + if (pTupleHandle == NULL) { + break; + } + + appendOneRowToDataBlock(pResBlock, pTupleHandle); + if (pResBlock->info.rows >= capacity) { + break; + } } - appendOneRowToDataBlock(pResBlock, pTupleHandle); - if (pResBlock->info.rows >= capacity) { - break; + if (tsortIsClosed(pHandle)) { + terrno = TSDB_CODE_TSC_QUERY_CANCELLED; + T_LONG_JMP(pOperator->pTaskInfo->env, terrno); } + + bool limitReached = applyLimitOffset(&pInfo->limitInfo, pResBlock, pTaskInfo); + qDebug("%s get sorted row block, rows:%" PRId64 ", limit:%" PRId64, GET_TASKID(pTaskInfo), pResBlock->info.rows, + pInfo->limitInfo.numOfOutputRows); + if (pTupleHandle == NULL || limitReached || pResBlock->info.rows > 0) { + break; + } } - - if (tsortIsClosed(pHandle)) { - terrno = TSDB_CODE_TSC_QUERY_CANCELLED; - T_LONG_JMP(pOperator->pTaskInfo->env, terrno); - } - - bool limitReached = applyLimitOffset(&pInfo->limitInfo, pResBlock, pTaskInfo); - qDebug("%s get sorted row block, rows:%" PRId64 ", limit:%" PRId64, GET_TASKID(pTaskInfo), pResBlock->info.rows, - pInfo->limitInfo.numOfOutputRows); - return (pResBlock->info.rows > 0) ? pResBlock : NULL; } diff --git a/source/libs/executor/src/tsort.c b/source/libs/executor/src/tsort.c index 18c46cd03f..b51f515240 100644 --- a/source/libs/executor/src/tsort.c +++ b/source/libs/executor/src/tsort.c @@ -714,7 +714,9 @@ static int32_t doInternalMergeSort(SSortHandle* pHandle) { taosArrayDestroy(pResList); return code; } - int nRows = 0; + + int nMergedRows = 0; + SArray* pPageIdList = taosArrayInit(4, sizeof(int32_t)); while (1) { if (tsortIsClosed(pHandle)) { @@ -722,15 +724,10 @@ static int32_t doInternalMergeSort(SSortHandle* pHandle) { return code; } - if (pHandle->mergeLimit != -1 && nRows >= pHandle->mergeLimit) { - break; - } - SSDataBlock* pDataBlock = getSortedBlockDataInner(pHandle, &pHandle->cmpParam, numOfRows); if (pDataBlock == NULL) { break; } - nRows += pDataBlock->info.rows; int32_t pageId = -1; void* pPage = getNewBufPage(pHandle->pBuf, &pageId); @@ -750,7 +747,12 @@ static int32_t doInternalMergeSort(SSortHandle* pHandle) { setBufPageDirty(pPage, true); releaseBufPage(pHandle->pBuf, pPage); + nMergedRows += pDataBlock->info.rows; + blockDataCleanup(pDataBlock); + if ((pHandle->mergeLimit != -1) && (nMergedRows >= pHandle->mergeLimit)) { + break; + } } sortComparCleanup(&pHandle->cmpParam); @@ -915,15 +917,12 @@ static int32_t sortBlocksToExtSource(SSortHandle* pHandle, SArray* aBlk, SBlockO SMultiwayMergeTreeInfo* pTree = NULL; tMergeTreeCreate(&pTree, taosArrayGetSize(aBlk), &sup, blockCompareTsFn); - int32_t numEnded = 0; int32_t nRows = 0; - + int32_t nMergedRows = 0; + bool mergeLimitReached = false; size_t blkPgSz = pgHeaderSz; while (nRows < totalRows) { - if (pHandle->mergeLimit != -1 && nRows >= pHandle->mergeLimit) { - break; - } int32_t minIdx = tMergeTreeGetChosenIndex(pTree); SSDataBlock* minBlk = taosArrayGetP(aBlk, minIdx); int32_t minRow = sup.aRowIdx[minIdx]; @@ -931,9 +930,15 @@ static int32_t sortBlocksToExtSource(SSortHandle* pHandle, SArray* aBlk, SBlockO if (blkPgSz <= pHandle->pageSize && blkPgSz + bufInc > pHandle->pageSize) { appendDataBlockToPageBuf(pHandle, pHandle->pDataBlock, aPgId); + nMergedRows += pHandle->pDataBlock->info.rows; + blockDataCleanup(pHandle->pDataBlock); blkPgSz = pgHeaderSz; bufInc = getPageBufIncForRow(minBlk, minRow, 0); + if ((pHandle->mergeLimit != -1) && (nMergedRows >= pHandle->mergeLimit)) { + mergeLimitReached = true; + break; + } } blockDataEnsureCapacity(pHandle->pDataBlock, pHandle->pDataBlock->info.rows + 1); appendOneRowToDataBlock(pHandle->pDataBlock, minBlk, &minRow); @@ -943,14 +948,16 @@ static int32_t sortBlocksToExtSource(SSortHandle* pHandle, SArray* aBlk, SBlockO if (sup.aRowIdx[minIdx] == minBlk->info.rows - 1) { sup.aRowIdx[minIdx] = -1; - ++numEnded; } else { ++sup.aRowIdx[minIdx]; } tMergeTreeAdjust(pTree, tMergeTreeGetAdjustIndex(pTree)); } if (pHandle->pDataBlock->info.rows > 0) { - appendDataBlockToPageBuf(pHandle, pHandle->pDataBlock, aPgId); + if (!mergeLimitReached) { + appendDataBlockToPageBuf(pHandle, pHandle->pDataBlock, aPgId); + nMergedRows += pHandle->pDataBlock->info.rows; + } blockDataCleanup(pHandle->pDataBlock); } SSDataBlock* pMemSrcBlk = createOneDataBlock(pHandle->pDataBlock, false); From 1a0031a43c48291473fd9254fbcb033a9466a349 Mon Sep 17 00:00:00 2001 From: slzhou Date: Thu, 20 Jul 2023 13:53:13 +0800 Subject: [PATCH 20/23] enhance: uinfo to qdebug and increase pages num to 2048 --- source/libs/executor/src/scanoperator.c | 2 +- source/libs/executor/src/tsort.c | 18 +++++++----------- 2 files changed, 8 insertions(+), 12 deletions(-) diff --git a/source/libs/executor/src/scanoperator.c b/source/libs/executor/src/scanoperator.c index 27cbdb66cf..dbec81e6a4 100644 --- a/source/libs/executor/src/scanoperator.c +++ b/source/libs/executor/src/scanoperator.c @@ -2840,7 +2840,7 @@ int32_t startGroupTableMergeScan(SOperatorInfo* pOperator) { int32_t tableStartIdx = pInfo->tableStartIndex; int32_t tableEndIdx = pInfo->tableEndIndex; - pInfo->sortBufSize = 1024 * pInfo->bufPageSize; + pInfo->sortBufSize = 2048 * pInfo->bufPageSize; int32_t numOfBufPage = pInfo->sortBufSize / pInfo->bufPageSize; pInfo->pSortHandle = tsortCreateSortHandle(pInfo->pSortInfo, SORT_BLOCK_TS_MERGE, pInfo->bufPageSize, numOfBufPage, pInfo->pSortInputBlock, pTaskInfo->id.str, 0, 0, 0); diff --git a/source/libs/executor/src/tsort.c b/source/libs/executor/src/tsort.c index b51f515240..30e7148736 100644 --- a/source/libs/executor/src/tsort.c +++ b/source/libs/executor/src/tsort.c @@ -480,13 +480,13 @@ static int32_t adjustMergeTreeForNextTuple(SSortSource* pSource, SMultiwayMergeT if (pHandle->type == SORT_SINGLESOURCE_SORT) { pSource->pageIndex++; if (pSource->pageIndex >= taosArrayGetSize(pSource->pageIdList)) { - uInfo("adjust merge tree. %d source completed %d", *numOfCompleted, pSource->pageIndex); + qDebug("adjust merge tree. %d source completed %d", *numOfCompleted, pSource->pageIndex); (*numOfCompleted) += 1; pSource->src.rowIndex = -1; pSource->pageIndex = -1; pSource->src.pBlock = blockDataDestroy(pSource->src.pBlock); } else { - if (pSource->pageIndex % 512 == 0) uInfo("begin source %p page %d", pSource, pSource->pageIndex); + if (pSource->pageIndex % 512 == 0) qDebug("begin source %p page %d", pSource, pSource->pageIndex); int32_t* pPgId = taosArrayGet(pSource->pageIdList, pSource->pageIndex); @@ -510,7 +510,7 @@ static int32_t adjustMergeTreeForNextTuple(SSortSource* pSource, SMultiwayMergeT if (pSource->src.pBlock == NULL) { (*numOfCompleted) += 1; pSource->src.rowIndex = -1; - uInfo("adjust merge tree. %d source completed", *numOfCompleted); + qDebug("adjust merge tree. %d source completed", *numOfCompleted); } } } @@ -692,7 +692,7 @@ static int32_t doInternalMergeSort(SSortHandle* pHandle) { // Only *numOfInputSources* can be loaded into buffer to perform the external sort. for (int32_t i = 0; i < sortGroup; ++i) { - uInfo("internal merge sort pass %d group %d. num input sources %d ", t, i, numOfInputSources); + qDebug("internal merge sort pass %d group %d. num input sources %d ", t, i, numOfInputSources); pHandle->sourceId += 1; int32_t end = (i + 1) * numOfInputSources - 1; @@ -716,7 +716,7 @@ static int32_t doInternalMergeSort(SSortHandle* pHandle) { } int nMergedRows = 0; - + SArray* pPageIdList = taosArrayInit(4, sizeof(int32_t)); while (1) { if (tsortIsClosed(pHandle)) { @@ -1014,7 +1014,7 @@ static int32_t createBlocksMergeSortInitialSources(SSortHandle* pHandle) { } taosArrayClear(aBlkSort); szSort = 0; - uInfo("source %zu created", taosArrayGetSize(aExtSrc)); + qDebug("source %zu created", taosArrayGetSize(aExtSrc)); } if (pBlk == NULL) { break; @@ -1146,11 +1146,7 @@ static int32_t createInitialSources(SSortHandle* pHandle) { } else if (pHandle->type == SORT_BLOCK_TS_MERGE) { code = createBlocksMergeSortInitialSources(pHandle); } - uInfo("%zu sources created", taosArrayGetSize(pHandle->pOrderedSource)); - for (int i = 0; i < taosArrayGetSize(pHandle->pOrderedSource); ++i) { - SSortSource* pSrc = taosArrayGetP(pHandle->pOrderedSource, i); - uInfo("source %d, num of pages %zu", i,taosArrayGetSize(pSrc->pageIdList)); - } + qDebug("%zu sources created", taosArrayGetSize(pHandle->pOrderedSource)); return code; } From 7d850c1a5e8fc283baef118e8090ec8b7709a2ee Mon Sep 17 00:00:00 2001 From: wangjiaming0909 <604227650@qq.com> Date: Thu, 20 Jul 2023 15:07:55 +0800 Subject: [PATCH 21/23] feat: optimize partition by tbname slimit --- include/libs/nodes/plannodes.h | 4 + source/libs/executor/src/aggregateoperator.c | 93 +++++++++++-------- source/libs/executor/src/executil.c | 6 ++ source/libs/nodes/src/nodesCloneFuncs.c | 4 + source/libs/nodes/src/nodesCodeFuncs.c | 7 ++ source/libs/nodes/src/nodesMsgFuncs.c | 17 +++- source/libs/planner/inc/planInt.h | 3 + source/libs/planner/src/planOptimizer.c | 14 +++ source/libs/planner/src/planPhysiCreater.c | 2 + source/libs/planner/src/planSpliter.c | 58 +----------- source/libs/planner/src/planUtil.c | 54 +++++++++++ tests/system-test/2-query/columnLenUpdated.py | 2 +- .../5-taos-tools/taosbenchmark/insertMix.py | 5 + 13 files changed, 172 insertions(+), 97 deletions(-) diff --git a/include/libs/nodes/plannodes.h b/include/libs/nodes/plannodes.h index 453c5d4914..821defc797 100644 --- a/include/libs/nodes/plannodes.h +++ b/include/libs/nodes/plannodes.h @@ -55,6 +55,7 @@ typedef struct SLogicNode { EGroupAction groupAction; EOrder inputTsOrder; EOrder outputTsOrder; + bool forceCreateNonBlockingOptr; // true if the operator can use non-blocking(pipeline) mode } SLogicNode; typedef enum EScanType { @@ -105,6 +106,7 @@ typedef struct SScanLogicNode { bool hasNormalCols; // neither tag column nor primary key tag column bool sortPrimaryKey; bool igLastNull; + bool groupOrderScan; } SScanLogicNode; typedef struct SJoinLogicNode { @@ -316,6 +318,7 @@ typedef struct SPhysiNode { struct SPhysiNode* pParent; SNode* pLimit; SNode* pSlimit; + bool forceCreateNonBlockingOptr; } SPhysiNode; typedef struct SScanPhysiNode { @@ -326,6 +329,7 @@ typedef struct SScanPhysiNode { uint64_t suid; int8_t tableType; SName tableName; + bool groupOrderScan; } SScanPhysiNode; typedef SScanPhysiNode STagScanPhysiNode; diff --git a/source/libs/executor/src/aggregateoperator.c b/source/libs/executor/src/aggregateoperator.c index be0ad1c239..176c4b53be 100644 --- a/source/libs/executor/src/aggregateoperator.c +++ b/source/libs/executor/src/aggregateoperator.c @@ -45,6 +45,8 @@ typedef struct SAggOperatorInfo { SGroupResInfo groupResInfo; SExprSupp scalarExprSup; bool groupKeyOptimized; + bool hasValidBlock; + SSDataBlock* pNewGroupBlock; } SAggOperatorInfo; static void destroyAggOperatorInfo(void* param); @@ -53,7 +55,6 @@ static void setExecutionContext(SOperatorInfo* pOperator, int32_t numOfOutput, u static int32_t createDataBlockForEmptyInput(SOperatorInfo* pOperator, SSDataBlock** ppBlock); static void destroyDataBlockForEmptyInput(bool blockAllocated, SSDataBlock** ppBlock); -static int32_t doOpenAggregateOptr(SOperatorInfo* pOperator); static int32_t doAggregateImpl(SOperatorInfo* pOperator, SqlFunctionCtx* pCtx); static SSDataBlock* getAggregateResult(SOperatorInfo* pOperator); @@ -111,9 +112,9 @@ SOperatorInfo* createAggregateOperatorInfo(SOperatorInfo* downstream, SAggPhysiN pInfo->binfo.inputTsOrder = pAggNode->node.inputTsOrder; pInfo->binfo.outputTsOrder = pAggNode->node.outputTsOrder; - setOperatorInfo(pOperator, "TableAggregate", QUERY_NODE_PHYSICAL_PLAN_HASH_AGG, true, OP_NOT_OPENED, pInfo, - pTaskInfo); - pOperator->fpSet = createOperatorFpSet(doOpenAggregateOptr, getAggregateResult, NULL, destroyAggOperatorInfo, + setOperatorInfo(pOperator, "TableAggregate", QUERY_NODE_PHYSICAL_PLAN_HASH_AGG, + !pAggNode->node.forceCreateNonBlockingOptr, OP_NOT_OPENED, pInfo, pTaskInfo); + pOperator->fpSet = createOperatorFpSet(optrDummyOpenFn, getAggregateResult, NULL, destroyAggOperatorInfo, optrDefaultBufFn, NULL); if (downstream->operatorType == QUERY_NODE_PHYSICAL_PLAN_TABLE_SCAN) { @@ -153,28 +154,42 @@ void destroyAggOperatorInfo(void* param) { taosMemoryFreeClear(param); } -// this is a blocking operator -int32_t doOpenAggregateOptr(SOperatorInfo* pOperator) { - if (OPTR_IS_OPENED(pOperator)) { - return TSDB_CODE_SUCCESS; - } - +/** + * @brief get blocks from downstream and fill results into groupedRes after aggragation + * @retval false if no more groups + * @retval true if there could have new groups coming + * @note if pOperator.blocking is true, scan all blocks from downstream, all groups are handled + * if false, fill results of ONE GROUP + * */ +static bool nextGroupedResult(SOperatorInfo* pOperator) { SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo; SAggOperatorInfo* pAggInfo = pOperator->info; + if (pOperator->blocking && pAggInfo->hasValidBlock) return false; + SExprSupp* pSup = &pOperator->exprSupp; SOperatorInfo* downstream = pOperator->pDownstream[0]; - int64_t st = taosGetTimestampUs(); - int32_t code = TSDB_CODE_SUCCESS; - int32_t order = pAggInfo->binfo.inputTsOrder; - bool hasValidBlock = false; + int64_t st = taosGetTimestampUs(); + int32_t code = TSDB_CODE_SUCCESS; + int32_t order = pAggInfo->binfo.inputTsOrder; + SSDataBlock* pBlock = pAggInfo->pNewGroupBlock; + if (pBlock) { + pAggInfo->pNewGroupBlock = NULL; + tSimpleHashClear(pAggInfo->aggSup.pResultRowHashTable); + setExecutionContext(pOperator, pOperator->exprSupp.numOfExprs, pBlock->info.id.groupId); + setInputDataBlock(pSup, pBlock, order, pBlock->info.scanFlag, true); + code = doAggregateImpl(pOperator, pSup->pCtx); + if (code != TSDB_CODE_SUCCESS) { + T_LONG_JMP(pTaskInfo->env, code); + } + } while (1) { bool blockAllocated = false; - SSDataBlock* pBlock = downstream->fpSet.getNextFn(downstream); + pBlock = downstream->fpSet.getNextFn(downstream); if (pBlock == NULL) { - if (!hasValidBlock) { + if (!pAggInfo->hasValidBlock) { createDataBlockForEmptyInput(pOperator, &pBlock); if (pBlock == NULL) { break; @@ -184,7 +199,7 @@ int32_t doOpenAggregateOptr(SOperatorInfo* pOperator) { break; } } - hasValidBlock = true; + pAggInfo->hasValidBlock = true; pAggInfo->binfo.pRes->info.scanFlag = pBlock->info.scanFlag; // there is an scalar expression that needs to be calculated before apply the group aggregation. @@ -196,7 +211,11 @@ int32_t doOpenAggregateOptr(SOperatorInfo* pOperator) { T_LONG_JMP(pTaskInfo->env, code); } } - + // if non-blocking mode and new group arrived, save the block and break + if (!pOperator->blocking && pAggInfo->groupId != UINT64_MAX && pBlock->info.id.groupId != pAggInfo->groupId) { + pAggInfo->pNewGroupBlock = pBlock; + break; + } // the pDataBlock are always the same one, no need to call this again setExecutionContext(pOperator, pOperator->exprSupp.numOfExprs, pBlock->info.id.groupId); setInputDataBlock(pSup, pBlock, order, pBlock->info.scanFlag, true); @@ -215,10 +234,7 @@ int32_t doOpenAggregateOptr(SOperatorInfo* pOperator) { } initGroupedResultInfo(&pAggInfo->groupResInfo, pAggInfo->aggSup.pResultRowHashTable, 0); - OPTR_SET_OPENED(pOperator); - - pOperator->cost.openCost = (taosGetTimestampUs() - st) / 1000.0; - return pTaskInfo->code; + return pBlock != NULL; } SSDataBlock* getAggregateResult(SOperatorInfo* pOperator) { @@ -230,26 +246,25 @@ SSDataBlock* getAggregateResult(SOperatorInfo* pOperator) { } SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo; - pTaskInfo->code = pOperator->fpSet._openFn(pOperator); - if (pTaskInfo->code != TSDB_CODE_SUCCESS) { - setOperatorCompleted(pOperator); - return NULL; - } + bool hasNewGroups = false; + do { + hasNewGroups = nextGroupedResult(pOperator); + blockDataEnsureCapacity(pInfo->pRes, pOperator->resultInfo.capacity); - blockDataEnsureCapacity(pInfo->pRes, pOperator->resultInfo.capacity); - while (1) { - doBuildResultDatablock(pOperator, pInfo, &pAggInfo->groupResInfo, pAggInfo->aggSup.pResultBuf); - doFilter(pInfo->pRes, pOperator->exprSupp.pFilterInfo, NULL); + while (1) { + doBuildResultDatablock(pOperator, pInfo, &pAggInfo->groupResInfo, pAggInfo->aggSup.pResultBuf); + doFilter(pInfo->pRes, pOperator->exprSupp.pFilterInfo, NULL); - if (!hasRemainResults(&pAggInfo->groupResInfo)) { - setOperatorCompleted(pOperator); - break; + if (!hasRemainResults(&pAggInfo->groupResInfo)) { + if (!hasNewGroups) setOperatorCompleted(pOperator); + break; + } + + if (pInfo->pRes->info.rows > 0) { + break; + } } - - if (pInfo->pRes->info.rows > 0) { - break; - } - } + } while (pInfo->pRes->info.rows == 0 && hasNewGroups); size_t rows = blockDataGetNumOfRows(pInfo->pRes); pOperator->resultInfo.totalRows += rows; diff --git a/source/libs/executor/src/executil.c b/source/libs/executor/src/executil.c index 832750e967..e1bf4e7cb0 100644 --- a/source/libs/executor/src/executil.c +++ b/source/libs/executor/src/executil.c @@ -127,6 +127,10 @@ void initGroupedResultInfo(SGroupResInfo* pGroupResInfo, SSHashObj* pHashmap, in if (pGroupResInfo->pRows != NULL) { taosArrayDestroy(pGroupResInfo->pRows); } + if (pGroupResInfo->pBuf) { + taosMemoryFree(pGroupResInfo->pBuf); + pGroupResInfo->pBuf = NULL; + } // extract the result rows information from the hash map int32_t size = tSimpleHashGetSize(pHashmap); @@ -2104,6 +2108,8 @@ int32_t buildGroupIdMapForAllTables(STableListInfo* pTableListInfo, SReadHandle* if (groupSort && groupByTbname) { taosArraySort(pTableListInfo->pTableList, orderbyGroupIdComparFn); pTableListInfo->numOfOuputGroups = numOfTables; + } else if (groupByTbname && pScanNode->groupOrderScan){ + pTableListInfo->numOfOuputGroups = numOfTables; } else { pTableListInfo->numOfOuputGroups = 1; } diff --git a/source/libs/nodes/src/nodesCloneFuncs.c b/source/libs/nodes/src/nodesCloneFuncs.c index 6e4dde4ec1..f5eacf0bd5 100644 --- a/source/libs/nodes/src/nodesCloneFuncs.c +++ b/source/libs/nodes/src/nodesCloneFuncs.c @@ -361,6 +361,7 @@ static int32_t logicNodeCopy(const SLogicNode* pSrc, SLogicNode* pDst) { COPY_SCALAR_FIELD(groupAction); COPY_SCALAR_FIELD(inputTsOrder); COPY_SCALAR_FIELD(outputTsOrder); + COPY_SCALAR_FIELD(forceCreateNonBlockingOptr); return TSDB_CODE_SUCCESS; } @@ -397,6 +398,7 @@ static int32_t logicScanCopy(const SScanLogicNode* pSrc, SScanLogicNode* pDst) { CLONE_NODE_LIST_FIELD(pTags); CLONE_NODE_FIELD(pSubtable); COPY_SCALAR_FIELD(igLastNull); + COPY_SCALAR_FIELD(groupOrderScan); return TSDB_CODE_SUCCESS; } @@ -545,6 +547,7 @@ static int32_t physiNodeCopy(const SPhysiNode* pSrc, SPhysiNode* pDst) { CLONE_NODE_LIST_FIELD(pChildren); COPY_SCALAR_FIELD(inputTsOrder); COPY_SCALAR_FIELD(outputTsOrder); + COPY_SCALAR_FIELD(forceCreateNonBlockingOptr); return TSDB_CODE_SUCCESS; } @@ -556,6 +559,7 @@ static int32_t physiScanCopy(const SScanPhysiNode* pSrc, SScanPhysiNode* pDst) { COPY_SCALAR_FIELD(suid); COPY_SCALAR_FIELD(tableType); COPY_OBJECT_FIELD(tableName, sizeof(SName)); + COPY_SCALAR_FIELD(groupOrderScan); return TSDB_CODE_SUCCESS; } diff --git a/source/libs/nodes/src/nodesCodeFuncs.c b/source/libs/nodes/src/nodesCodeFuncs.c index 81116a60b0..f25616065e 100644 --- a/source/libs/nodes/src/nodesCodeFuncs.c +++ b/source/libs/nodes/src/nodesCodeFuncs.c @@ -1559,6 +1559,7 @@ static const char* jkScanPhysiPlanTableId = "TableId"; static const char* jkScanPhysiPlanSTableId = "STableId"; static const char* jkScanPhysiPlanTableType = "TableType"; static const char* jkScanPhysiPlanTableName = "TableName"; +static const char* jkScanPhysiPlanGroupOrderScan = "GroupOrderScan"; static int32_t physiScanNodeToJson(const void* pObj, SJson* pJson) { const STagScanPhysiNode* pNode = (const STagScanPhysiNode*)pObj; @@ -1582,6 +1583,9 @@ static int32_t physiScanNodeToJson(const void* pObj, SJson* pJson) { if (TSDB_CODE_SUCCESS == code) { code = tjsonAddObject(pJson, jkScanPhysiPlanTableName, nameToJson, &pNode->tableName); } + if (TSDB_CODE_SUCCESS == code) { + code = tjsonAddBoolToObject(pJson, jkScanPhysiPlanGroupOrderScan, pNode->groupOrderScan); + } return code; } @@ -1608,6 +1612,9 @@ static int32_t jsonToPhysiScanNode(const SJson* pJson, void* pObj) { if (TSDB_CODE_SUCCESS == code) { code = tjsonToObject(pJson, jkScanPhysiPlanTableName, jsonToName, &pNode->tableName); } + if (TSDB_CODE_SUCCESS == code) { + code = tjsonGetBoolValue(pJson, jkScanPhysiPlanGroupOrderScan, &pNode->groupOrderScan); + } return code; } diff --git a/source/libs/nodes/src/nodesMsgFuncs.c b/source/libs/nodes/src/nodesMsgFuncs.c index 1ca37defa4..20e829766d 100644 --- a/source/libs/nodes/src/nodesMsgFuncs.c +++ b/source/libs/nodes/src/nodesMsgFuncs.c @@ -1853,7 +1853,8 @@ enum { PHY_NODE_CODE_LIMIT, PHY_NODE_CODE_SLIMIT, PHY_NODE_CODE_INPUT_TS_ORDER, - PHY_NODE_CODE_OUTPUT_TS_ORDER + PHY_NODE_CODE_OUTPUT_TS_ORDER, + PHY_NODE_CODE_FORCE_NONBLOCKING_OPTR }; static int32_t physiNodeToMsg(const void* pObj, STlvEncoder* pEncoder) { @@ -1878,6 +1879,9 @@ static int32_t physiNodeToMsg(const void* pObj, STlvEncoder* pEncoder) { if (TSDB_CODE_SUCCESS == code) { code = tlvEncodeEnum(pEncoder, PHY_NODE_CODE_OUTPUT_TS_ORDER, pNode->outputTsOrder); } + if (TSDB_CODE_SUCCESS == code) { + code = tlvEncodeBool(pEncoder, PHY_NODE_CODE_FORCE_NONBLOCKING_OPTR, pNode->forceCreateNonBlockingOptr); + } return code; } @@ -1910,6 +1914,8 @@ static int32_t msgToPhysiNode(STlvDecoder* pDecoder, void* pObj) { case PHY_NODE_CODE_OUTPUT_TS_ORDER: code = tlvDecodeEnum(pTlv, &pNode->outputTsOrder, sizeof(pNode->outputTsOrder)); break; + case PHY_NODE_CODE_FORCE_NONBLOCKING_OPTR: + code = tlvDecodeBool(pTlv, &pNode->forceCreateNonBlockingOptr); default: break; } @@ -1925,7 +1931,8 @@ enum { PHY_SCAN_CODE_BASE_UID, PHY_SCAN_CODE_BASE_SUID, PHY_SCAN_CODE_BASE_TABLE_TYPE, - PHY_SCAN_CODE_BASE_TABLE_NAME + PHY_SCAN_CODE_BASE_TABLE_NAME, + PHY_SCAN_CODE_BASE_GROUP_ORDER_SCAN }; static int32_t physiScanNodeToMsg(const void* pObj, STlvEncoder* pEncoder) { @@ -1950,6 +1957,9 @@ static int32_t physiScanNodeToMsg(const void* pObj, STlvEncoder* pEncoder) { if (TSDB_CODE_SUCCESS == code) { code = tlvEncodeObj(pEncoder, PHY_SCAN_CODE_BASE_TABLE_NAME, nameToMsg, &pNode->tableName); } + if (TSDB_CODE_SUCCESS == code) { + code = tlvEncodeBool(pEncoder, PHY_SCAN_CODE_BASE_GROUP_ORDER_SCAN, pNode->groupOrderScan); + } return code; } @@ -1982,6 +1992,9 @@ static int32_t msgToPhysiScanNode(STlvDecoder* pDecoder, void* pObj) { case PHY_SCAN_CODE_BASE_TABLE_NAME: code = tlvDecodeObjFromTlv(pTlv, msgToName, &pNode->tableName); break; + case PHY_SCAN_CODE_BASE_GROUP_ORDER_SCAN: + code = tlvDecodeBool(pTlv, &pNode->groupOrderScan); + break; default: break; } diff --git a/source/libs/planner/inc/planInt.h b/source/libs/planner/inc/planInt.h index 82abc5d1a9..092fe17411 100644 --- a/source/libs/planner/inc/planInt.h +++ b/source/libs/planner/inc/planInt.h @@ -43,6 +43,9 @@ int32_t splitLogicPlan(SPlanContext* pCxt, SLogicSubplan* pLogicSubplan); int32_t scaleOutLogicPlan(SPlanContext* pCxt, SLogicSubplan* pLogicSubplan, SQueryLogicPlan** pLogicPlan); int32_t createPhysiPlan(SPlanContext* pCxt, SQueryLogicPlan* pLogicPlan, SQueryPlan** pPlan, SArray* pExecNodeList); +bool isPartTableAgg(SAggLogicNode* pAgg); +bool isPartTableWinodw(SWindowLogicNode* pWindow); + #ifdef __cplusplus } #endif diff --git a/source/libs/planner/src/planOptimizer.c b/source/libs/planner/src/planOptimizer.c index 05f478b116..32721d8060 100644 --- a/source/libs/planner/src/planOptimizer.c +++ b/source/libs/planner/src/planOptimizer.c @@ -363,6 +363,18 @@ static void scanPathOptSetScanOrder(EScanOrder scanOrder, SScanLogicNode* pScan) } } +static void scanPathOptSetGroupOrderScan(SScanLogicNode* pScan) { + if (pScan->tableType != TSDB_SUPER_TABLE) return; + + if (pScan->node.pParent && nodeType(pScan->node.pParent) == QUERY_NODE_LOGIC_PLAN_AGG) { + SAggLogicNode* pAgg = (SAggLogicNode*)pScan->node.pParent; + bool withSlimit = pAgg->node.pSlimit != NULL || (pAgg->node.pParent && pAgg->node.pParent->pSlimit); + if (withSlimit && isPartTableAgg(pAgg)) { + pScan->groupOrderScan = pAgg->node.forceCreateNonBlockingOptr = true; + } + } +} + static int32_t scanPathOptimize(SOptimizeContext* pCxt, SLogicSubplan* pLogicSubplan) { SOsdInfo info = {.scanOrder = SCAN_ORDER_ASC}; int32_t code = scanPathOptMatch(pCxt, pLogicSubplan->pNode, &info); @@ -371,6 +383,7 @@ static int32_t scanPathOptimize(SOptimizeContext* pCxt, SLogicSubplan* pLogicSub if (!pCxt->pPlanCxt->streamQuery) { scanPathOptSetScanOrder(info.scanOrder, info.pScan); } + scanPathOptSetGroupOrderScan(info.pScan); } if (TSDB_CODE_SUCCESS == code && (NULL != info.pDsoFuncs || NULL != info.pSdrFuncs)) { info.pScan->dataRequired = scanPathOptGetDataRequired(info.pSdrFuncs); @@ -1675,6 +1688,7 @@ static int32_t partTagsOptimize(SOptimizeContext* pCxt, SLogicSubplan* pLogicSub if (TSDB_CODE_SUCCESS == code) { if (QUERY_NODE_LOGIC_PLAN_AGG == pNode->pParent->type) { SAggLogicNode* pParent = (SAggLogicNode*)(pNode->pParent); + scanPathOptSetGroupOrderScan(pScan); pParent->hasGroupKeyOptimized = true; } diff --git a/source/libs/planner/src/planPhysiCreater.c b/source/libs/planner/src/planPhysiCreater.c index b3d94a5e47..1b92dcd2e7 100644 --- a/source/libs/planner/src/planPhysiCreater.c +++ b/source/libs/planner/src/planPhysiCreater.c @@ -447,6 +447,7 @@ static int32_t createScanPhysiNodeFinalize(SPhysiPlanContext* pCxt, SSubplan* pS pScanPhysiNode->uid = pScanLogicNode->tableId; pScanPhysiNode->suid = pScanLogicNode->stableId; pScanPhysiNode->tableType = pScanLogicNode->tableType; + pScanPhysiNode->groupOrderScan = pScanLogicNode->groupOrderScan; memcpy(&pScanPhysiNode->tableName, &pScanLogicNode->tableName, sizeof(SName)); if (NULL != pScanLogicNode->pTagCond) { pSubplan->pTagCond = nodesCloneNode(pScanLogicNode->pTagCond); @@ -880,6 +881,7 @@ static int32_t createAggPhysiNode(SPhysiPlanContext* pCxt, SNodeList* pChildren, pAgg->mergeDataBlock = (GROUP_ACTION_KEEP == pAggLogicNode->node.groupAction ? false : true); pAgg->groupKeyOptimized = pAggLogicNode->hasGroupKeyOptimized; + pAgg->node.forceCreateNonBlockingOptr = pAggLogicNode->node.forceCreateNonBlockingOptr; SNodeList* pPrecalcExprs = NULL; SNodeList* pGroupKeys = NULL; diff --git a/source/libs/planner/src/planSpliter.c b/source/libs/planner/src/planSpliter.c index 246ee13fb0..84a486649e 100644 --- a/source/libs/planner/src/planSpliter.c +++ b/source/libs/planner/src/planSpliter.c @@ -306,54 +306,6 @@ static bool stbSplIsTableCountQuery(SLogicNode* pNode) { return QUERY_NODE_LOGIC_PLAN_SCAN == nodeType(pChild) && SCAN_TYPE_TABLE_COUNT == ((SScanLogicNode*)pChild)->scanType; } -static SNodeList* stbSplGetPartKeys(SLogicNode* pNode) { - if (QUERY_NODE_LOGIC_PLAN_SCAN == nodeType(pNode)) { - return ((SScanLogicNode*)pNode)->pGroupTags; - } else if (QUERY_NODE_LOGIC_PLAN_PARTITION == nodeType(pNode)) { - return ((SPartitionLogicNode*)pNode)->pPartitionKeys; - } else { - return NULL; - } -} - -static bool stbSplHasPartTbname(SNodeList* pPartKeys) { - if (NULL == pPartKeys) { - return false; - } - SNode* pPartKey = NULL; - FOREACH(pPartKey, pPartKeys) { - if (QUERY_NODE_GROUPING_SET == nodeType(pPartKey)) { - pPartKey = nodesListGetNode(((SGroupingSetNode*)pPartKey)->pParameterList, 0); - } - if ((QUERY_NODE_FUNCTION == nodeType(pPartKey) && FUNCTION_TYPE_TBNAME == ((SFunctionNode*)pPartKey)->funcType) || - (QUERY_NODE_COLUMN == nodeType(pPartKey) && COLUMN_TYPE_TBNAME == ((SColumnNode*)pPartKey)->colType)) { - return true; - } - } - return false; -} - -static bool stbSplNotSystemScan(SLogicNode* pNode) { - if (QUERY_NODE_LOGIC_PLAN_SCAN == nodeType(pNode)) { - return SCAN_TYPE_SYSTEM_TABLE != ((SScanLogicNode*)pNode)->scanType; - } else if (QUERY_NODE_LOGIC_PLAN_PARTITION == nodeType(pNode)) { - return stbSplNotSystemScan((SLogicNode*)nodesListGetNode(pNode->pChildren, 0)); - } else { - return true; - } -} - -static bool stbSplIsPartTableAgg(SAggLogicNode* pAgg) { - if (1 != LIST_LENGTH(pAgg->node.pChildren)) { - return false; - } - if (NULL != pAgg->pGroupKeys) { - return stbSplHasPartTbname(pAgg->pGroupKeys) && - stbSplNotSystemScan((SLogicNode*)nodesListGetNode(pAgg->node.pChildren, 0)); - } - return stbSplHasPartTbname(stbSplGetPartKeys((SLogicNode*)nodesListGetNode(pAgg->node.pChildren, 0))); -} - static bool stbSplNeedSplit(bool streamQuery, SLogicNode* pNode) { switch (nodeType(pNode)) { case QUERY_NODE_LOGIC_PLAN_SCAN: @@ -364,7 +316,7 @@ static bool stbSplNeedSplit(bool streamQuery, SLogicNode* pNode) { return streamQuery ? false : stbSplIsMultiTbScanChild(streamQuery, pNode); case QUERY_NODE_LOGIC_PLAN_AGG: return (!stbSplHasGatherExecFunc(((SAggLogicNode*)pNode)->pAggFuncs) || - stbSplIsPartTableAgg((SAggLogicNode*)pNode)) && + isPartTableAgg((SAggLogicNode*)pNode)) && stbSplHasMultiTbScan(streamQuery, pNode) && !stbSplIsTableCountQuery(pNode); case QUERY_NODE_LOGIC_PLAN_WINDOW: return stbSplNeedSplitWindow(streamQuery, pNode); @@ -778,10 +730,6 @@ static int32_t stbSplSplitEvent(SSplitContext* pCxt, SStableSplitInfo* pInfo) { } } -static bool stbSplIsPartTableWinodw(SWindowLogicNode* pWindow) { - return stbSplHasPartTbname(stbSplGetPartKeys((SLogicNode*)nodesListGetNode(pWindow->node.pChildren, 0))); -} - static int32_t stbSplSplitWindowForCrossTable(SSplitContext* pCxt, SStableSplitInfo* pInfo) { switch (((SWindowLogicNode*)pInfo->pSplitNode)->winType) { case WINDOW_TYPE_INTERVAL: @@ -834,7 +782,7 @@ static int32_t stbSplSplitWindowForPartTable(SSplitContext* pCxt, SStableSplitIn } static int32_t stbSplSplitWindowNode(SSplitContext* pCxt, SStableSplitInfo* pInfo) { - if (stbSplIsPartTableWinodw((SWindowLogicNode*)pInfo->pSplitNode)) { + if (isPartTableWinodw((SWindowLogicNode*)pInfo->pSplitNode)) { return stbSplSplitWindowForPartTable(pCxt, pInfo); } else { return stbSplSplitWindowForCrossTable(pCxt, pInfo); @@ -920,7 +868,7 @@ static int32_t stbSplSplitAggNodeForCrossTable(SSplitContext* pCxt, SStableSplit } static int32_t stbSplSplitAggNode(SSplitContext* pCxt, SStableSplitInfo* pInfo) { - if (stbSplIsPartTableAgg((SAggLogicNode*)pInfo->pSplitNode)) { + if (isPartTableAgg((SAggLogicNode*)pInfo->pSplitNode)) { return stbSplSplitAggNodeForPartTable(pCxt, pInfo); } return stbSplSplitAggNodeForCrossTable(pCxt, pInfo); diff --git a/source/libs/planner/src/planUtil.c b/source/libs/planner/src/planUtil.c index 29e87b34ce..88086cde1d 100644 --- a/source/libs/planner/src/planUtil.c +++ b/source/libs/planner/src/planUtil.c @@ -321,3 +321,57 @@ int32_t adjustLogicNodeDataRequirement(SLogicNode* pNode, EDataOrderLevel requir } return code; } + +static bool stbNotSystemScan(SLogicNode* pNode) { + if (QUERY_NODE_LOGIC_PLAN_SCAN == nodeType(pNode)) { + return SCAN_TYPE_SYSTEM_TABLE != ((SScanLogicNode*)pNode)->scanType; + } else if (QUERY_NODE_LOGIC_PLAN_PARTITION == nodeType(pNode)) { + return stbNotSystemScan((SLogicNode*)nodesListGetNode(pNode->pChildren, 0)); + } else { + return true; + } +} + +static bool stbHasPartTbname(SNodeList* pPartKeys) { + if (NULL == pPartKeys) { + return false; + } + SNode* pPartKey = NULL; + FOREACH(pPartKey, pPartKeys) { + if (QUERY_NODE_GROUPING_SET == nodeType(pPartKey)) { + pPartKey = nodesListGetNode(((SGroupingSetNode*)pPartKey)->pParameterList, 0); + } + if ((QUERY_NODE_FUNCTION == nodeType(pPartKey) && FUNCTION_TYPE_TBNAME == ((SFunctionNode*)pPartKey)->funcType) || + (QUERY_NODE_COLUMN == nodeType(pPartKey) && COLUMN_TYPE_TBNAME == ((SColumnNode*)pPartKey)->colType)) { + return true; + } + } + return false; +} + +static SNodeList* stbSplGetPartKeys(SLogicNode* pNode) { + if (QUERY_NODE_LOGIC_PLAN_SCAN == nodeType(pNode)) { + return ((SScanLogicNode*)pNode)->pGroupTags; + } else if (QUERY_NODE_LOGIC_PLAN_PARTITION == nodeType(pNode)) { + return ((SPartitionLogicNode*)pNode)->pPartitionKeys; + } else { + return NULL; + } +} + +bool isPartTableAgg(SAggLogicNode* pAgg) { + if (1 != LIST_LENGTH(pAgg->node.pChildren)) { + return false; + } + if (NULL != pAgg->pGroupKeys) { + return stbHasPartTbname(pAgg->pGroupKeys) && + stbNotSystemScan((SLogicNode*)nodesListGetNode(pAgg->node.pChildren, 0)); + } + return stbHasPartTbname(stbSplGetPartKeys((SLogicNode*)nodesListGetNode(pAgg->node.pChildren, 0))); +} + +bool isPartTableWinodw(SWindowLogicNode* pWindow) { + return stbHasPartTbname(stbSplGetPartKeys((SLogicNode*)nodesListGetNode(pWindow->node.pChildren, 0))); +} + + diff --git a/tests/system-test/2-query/columnLenUpdated.py b/tests/system-test/2-query/columnLenUpdated.py index e43b32a716..93d9a492f9 100644 --- a/tests/system-test/2-query/columnLenUpdated.py +++ b/tests/system-test/2-query/columnLenUpdated.py @@ -202,7 +202,7 @@ class TDTestCase: if retCode != "TAOS_OK": tdLog.exit("taos -s fail") - tdSql.query("select count(*) from stb group by tg1") + tdSql.query("select count(*) from stb group by tg1 order by count(*) desc") tdSql.checkData(0, 0, 2) tdSql.checkData(1, 0, 1) diff --git a/tests/system-test/5-taos-tools/taosbenchmark/insertMix.py b/tests/system-test/5-taos-tools/taosbenchmark/insertMix.py index 60daa8cdc2..b4046b8c98 100644 --- a/tests/system-test/5-taos-tools/taosbenchmark/insertMix.py +++ b/tests/system-test/5-taos-tools/taosbenchmark/insertMix.py @@ -79,6 +79,11 @@ class TDTestCase: tdSql.query("select count(*) from (select * from meters order by ts desc)") tdSql.checkData(0, 0, allCnt) + rowCnt = tdSql.query("select tbname, count(*) from meters partition by tbname slimit 11") + if rowCnt != 10: + tdLog.exit("partition by tbname should return 10 rows of table data which is " + str(rowCnt)) + return + def run(self): binPath = self.getPath() From 076a425d5329028a0a87b39d05884d2599f61f48 Mon Sep 17 00:00:00 2001 From: Ganlin Zhao Date: Thu, 20 Jul 2023 17:21:24 +0800 Subject: [PATCH 22/23] fix: fix show create table issue for system tables --- source/dnode/mnode/impl/inc/mndStb.h | 1 + source/dnode/mnode/impl/src/mndStb.c | 13 +++++++++++-- source/libs/catalog/src/ctgAsync.c | 4 ++-- 3 files changed, 14 insertions(+), 4 deletions(-) diff --git a/source/dnode/mnode/impl/inc/mndStb.h b/source/dnode/mnode/impl/inc/mndStb.h index 99af413539..db960d790f 100644 --- a/source/dnode/mnode/impl/inc/mndStb.h +++ b/source/dnode/mnode/impl/inc/mndStb.h @@ -39,6 +39,7 @@ int32_t mndBuildSMCreateStbRsp(SMnode *pMnode, char *dbFName, char *stbFName, vo void mndExtractDbNameFromStbFullName(const char *stbFullName, char *dst); void mndExtractShortDbNameFromStbFullName(const char *stbFullName, char *dst); +void mndExtractShortDbNameFromDbFullName(const char *stbFullName, char *dst); void mndExtractTbNameFromStbFullName(const char *stbFullName, char *dst, int32_t dstSize); const char *mndGetStbStr(const char *src); diff --git a/source/dnode/mnode/impl/src/mndStb.c b/source/dnode/mnode/impl/src/mndStb.c index 162e75d783..f146e5189e 100644 --- a/source/dnode/mnode/impl/src/mndStb.c +++ b/source/dnode/mnode/impl/src/mndStb.c @@ -2495,12 +2495,14 @@ static int32_t mndProcessTableCfgReq(SRpcMsg *pReq) { goto _OVER; } - if (0 == strcmp(cfgReq.dbFName, TSDB_INFORMATION_SCHEMA_DB)) { + char dbName[TSDB_DB_NAME_LEN] = {0}; + mndExtractShortDbNameFromDbFullName(cfgReq.dbFName, dbName); + if (0 == strcmp(dbName, TSDB_INFORMATION_SCHEMA_DB)) { mInfo("information_schema table:%s.%s, start to retrieve cfg", cfgReq.dbFName, cfgReq.tbName); if (mndBuildInsTableCfg(pMnode, cfgReq.dbFName, cfgReq.tbName, &cfgRsp) != 0) { goto _OVER; } - } else if (0 == strcmp(cfgReq.dbFName, TSDB_PERFORMANCE_SCHEMA_DB)) { + } else if (0 == strcmp(dbName, TSDB_PERFORMANCE_SCHEMA_DB)) { mInfo("performance_schema table:%s.%s, start to retrieve cfg", cfgReq.dbFName, cfgReq.tbName); if (mndBuildPerfsTableCfg(pMnode, cfgReq.dbFName, cfgReq.tbName, &cfgRsp) != 0) { goto _OVER; @@ -2669,6 +2671,13 @@ void mndExtractShortDbNameFromStbFullName(const char *stbFullName, char *dst) { tNameGetDbName(&name, dst); } +void mndExtractShortDbNameFromDbFullName(const char *stbFullName, char *dst) { + SName name = {0}; + tNameFromString(&name, stbFullName, T_NAME_ACCT | T_NAME_DB); + + tNameGetDbName(&name, dst); +} + void mndExtractTbNameFromStbFullName(const char *stbFullName, char *dst, int32_t dstSize) { int32_t pos = -1; int32_t num = 0; diff --git a/source/libs/catalog/src/ctgAsync.c b/source/libs/catalog/src/ctgAsync.c index 562343c9c7..784126eee9 100644 --- a/source/libs/catalog/src/ctgAsync.c +++ b/source/libs/catalog/src/ctgAsync.c @@ -2090,7 +2090,7 @@ int32_t ctgLaunchGetTbCfgTask(SCtgTask* pTask) { } CTG_CACHE_NHIT_INC(CTG_CI_TBL_CFG, 1); - + if (pCtx->tbType <= 0) { CTG_ERR_JRET(ctgReadTbTypeFromCache(pCtg, dbFName, pCtx->pName->tname, &pCtx->tbType)); if (pCtx->tbType <= 0) { @@ -2102,7 +2102,7 @@ int32_t ctgLaunchGetTbCfgTask(SCtgTask* pTask) { } } - if (TSDB_SUPER_TABLE == pCtx->tbType) { + if (TSDB_SUPER_TABLE == pCtx->tbType || TSDB_SYSTEM_TABLE == pCtx->tbType) { CTG_ERR_JRET(ctgGetTableCfgFromMnode(pCtg, pConn, pCtx->pName, NULL, pTask)); } else { if (NULL == pCtx->pVgInfo) { From ded3ee67b3faea04c257deeb0a51d754dc0d4647 Mon Sep 17 00:00:00 2001 From: Ganlin Zhao Date: Thu, 20 Jul 2023 17:39:10 +0800 Subject: [PATCH 23/23] add test cases --- tests/system-test/0-others/show.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/tests/system-test/0-others/show.py b/tests/system-test/0-others/show.py index 50a1662ba0..9d26b3a2ae 100644 --- a/tests/system-test/0-others/show.py +++ b/tests/system-test/0-others/show.py @@ -95,6 +95,23 @@ class TDTestCase: tdSql.checkEqual(f'{db}',tdSql.queryResult[0][0]) tdSql.checkEqual(f'CREATE DATABASE `{db}`',tdSql.queryResult[0][1]) + def show_create_systb_sql(self): + for param in self.ins_param_list: + tdSql.query(f'show create table information_schema.ins_{param}') + tdSql.checkEqual(f'ins_{param}',tdSql.queryResult[0][0]) + + tdSql.execute(f'use information_schema') + tdSql.query(f'show create table ins_{param}') + tdSql.checkEqual(f'ins_{param}',tdSql.queryResult[0][0]) + + for param in self.perf_param_list: + tdSql.query(f'show create table performance_schema.perf_{param}') + tdSql.checkEqual(f'perf_{param}',tdSql.queryResult[0][0]) + + tdSql.execute(f'use performance_schema') + tdSql.query(f'show create table perf_{param}') + tdSql.checkEqual(f'perf_{param}',tdSql.queryResult[0][0]) + def show_create_sql(self): create_db_sql = self.set_create_database_sql(self.db_param) print(create_db_sql) @@ -200,6 +217,7 @@ class TDTestCase: self.perf_check() self.show_create_sql() self.show_create_sysdb_sql() + self.show_create_systb_sql() def stop(self): tdSql.close()