From 854766d9869639d71434e04cfa63ae49d0e0c0aa Mon Sep 17 00:00:00 2001 From: shenglian zhou Date: Fri, 8 Dec 2023 15:57:29 +0800 Subject: [PATCH 01/58] enhance: save block then sort by row id --- source/libs/executor/inc/executorInt.h | 13 ++ source/libs/executor/src/scanoperator.c | 205 +++++++++++++++++++++++- 2 files changed, 212 insertions(+), 6 deletions(-) diff --git a/source/libs/executor/inc/executorInt.h b/source/libs/executor/inc/executorInt.h index cba26c46b5..c103d8eee7 100644 --- a/source/libs/executor/inc/executorInt.h +++ b/source/libs/executor/inc/executorInt.h @@ -273,6 +273,17 @@ typedef struct STableScanInfo { bool filesetDelimited; } STableScanInfo; +typedef struct STmsSortRowIdInfo { + int32_t blkId; + int64_t dataFileOffset; + TdFilePtr idxFile; + char idxPath[PATH_MAX]; + TdFilePtr dataFile; + char dataPath[PATH_MAX]; + SLRUCache* pBlkInfoCache; // blkId->(offset, len) + SLRUCache* pBlkDataCache; // blkId->SSDataBlock* +} STmsSortRowIdInfo; + typedef struct STableMergeScanInfo { int32_t tableStartIndex; int32_t tableEndIndex; @@ -301,6 +312,8 @@ typedef struct STableMergeScanInfo { bool bNewFileset; bool bOnlyRetrieveBlock; bool filesetDelimited; + bool bSortRowId; + STmsSortRowIdInfo tmsSortRowIdInfo; } STableMergeScanInfo; typedef struct STagScanFilterContext { diff --git a/source/libs/executor/src/scanoperator.c b/source/libs/executor/src/scanoperator.c index a92770b1f9..d88433c5aa 100644 --- a/source/libs/executor/src/scanoperator.c +++ b/source/libs/executor/src/scanoperator.c @@ -3219,6 +3219,141 @@ _error: return NULL; } +// ========================= table merge scan +typedef struct STmsSortBlockInfo { + int32_t blkId; + int32_t length; + int64_t offset; +} STmsSortBlockInfo; + +static int32_t saveSourceBlock(STmsSortRowIdInfo* pSortInfo, const SSDataBlock* pSrcBlock, int32_t *pSzBlk) { + int32_t szBlk = blockDataGetSize(pSrcBlock) + sizeof(int32_t) + taosArrayGetSize(pSrcBlock->pDataBlock) * sizeof(int32_t); + char* buf = taosMemoryMalloc(szBlk); + if (buf == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; + } + blockDataToBuf(buf, pSrcBlock); + *pSzBlk = szBlk; + + taosLSeekFile(pSortInfo->dataFile, pSortInfo->dataFileOffset, SEEK_SET); + taosWriteFile(pSortInfo->dataFile, buf, szBlk); + + STmsSortBlockInfo info = {.blkId = pSortInfo->blkId + , .offset = pSortInfo->dataFileOffset, .length = szBlk}; + taosLSeekFile(pSortInfo->idxFile, pSortInfo->blkId*sizeof(STmsSortBlockInfo), SEEK_SET); + taosWriteFile(pSortInfo->idxFile, &info, sizeof(info)); + + return 0; +} + +static int32_t fillSortInputBlock(const STableMergeScanInfo* pInfo, + const SSDataBlock* pSrcBlock, SSDataBlock* pSortInputBlk) { + const STmsSortRowIdInfo* pSortInfo = &pInfo->tmsSortRowIdInfo; + + int32_t nRows = pSrcBlock->info.rows; + pSortInputBlk->info = pSrcBlock->info; + blockDataEnsureCapacity(pSortInputBlk, nRows); + + int32_t tsSlotId = ((SBlockOrderInfo*)taosArrayGet(pInfo->pSortInfo, 0))->slotId; + SColumnInfoData* tsCol = taosArrayGet(pSortInputBlk->pDataBlock, 0); + SColumnInfoData* pSrcTsCol = taosArrayGet(pSrcBlock->pDataBlock, tsSlotId); + colDataAssign(tsCol, pSrcTsCol, nRows, &pSortInputBlk->info); + + SColumnInfoData* blkIdCol = taosArrayGet(pSortInputBlk->pDataBlock, 1); + colDataSetNItems(blkIdCol, 0, (char*)&pSortInfo->blkId, nRows, false); + + SColumnInfoData* rowIdxCol = taosArrayGet(pSortInputBlk->pDataBlock, 2); + for (int32_t i = 0; i < nRows; ++i) { + colDataSetInt32(rowIdxCol, i, &i); + } + return 0; +} + +static int32_t transformIntoSortInputBlock(STableMergeScanInfo* pInfo, SSDataBlock* pSrcBlock, SSDataBlock* pSortInputBlk) { + //TODO: batch save + int32_t code = 0; + STmsSortRowIdInfo* pSortInfo = &pInfo->tmsSortRowIdInfo; + int32_t szBlk = 0; + code = saveSourceBlock(pSortInfo, pSrcBlock, &szBlk); + + fillSortInputBlock(pInfo, pSrcBlock, pSortInputBlk); + + ++pSortInfo->blkId; + pSortInfo->dataFileOffset = ((pSortInfo->dataFileOffset + szBlk) + 4096) & ~4096; + + return code; +} + +static void deleteBlockInfoCache(const void *key, size_t keyLen, void *value, void *ud) { + taosMemoryFree(value); +} + +static void deleteBlockDataCache(const void *key, size_t keyLen, void *value, void *ud) { + SSDataBlock* pBlock = value; + blockDataDestroy(pBlock); +} + +static int32_t retrieveSourceBlock(STableMergeScanInfo* pInfo, int32_t blockId, SSDataBlock** ppBlock) { + STmsSortRowIdInfo* pSortInfo = &pInfo->tmsSortRowIdInfo; + + LRUHandle* hBlk = taosLRUCacheLookup(pSortInfo->pBlkDataCache, &blockId, sizeof(blockId)); + if (hBlk) { + SSDataBlock* pBlk = taosLRUCacheValue(pSortInfo->pBlkDataCache, hBlk); + *ppBlock = pBlk; + } else { + STmsSortBlockInfo* blkInfo = NULL; + LRUHandle* hBlkInfo = taosLRUCacheLookup(pSortInfo->pBlkInfoCache, &blockId, sizeof(blockId)); + if (hBlkInfo) { + blkInfo = taosLRUCacheValue(pSortInfo->pBlkInfoCache, hBlkInfo); + } else { + blkInfo = taosMemoryMalloc(sizeof(STmsSortBlockInfo)); + taosLSeekFile(pSortInfo->idxFile, blockId * sizeof(STmsSortBlockInfo), SEEK_SET); + taosReadFile(pSortInfo->idxFile, &blkInfo, sizeof(blkInfo)); + ASSERT(blkInfo->blkId == blockId); + taosLRUCacheInsert(pSortInfo->pBlkInfoCache, &blockId, sizeof(blockId), blkInfo, 1, deleteBlockInfoCache, + &hBlkInfo, TAOS_LRU_PRIORITY_LOW, NULL); + } + { + taosLSeekFile(pSortInfo->dataFile, blkInfo->offset, SEEK_SET); + char* buf = taosMemoryMalloc(blkInfo->length); + taosReadFile(pSortInfo->dataFile, buf, blkInfo->length); + SSDataBlock* pBlock = createOneDataBlock(pInfo->pReaderBlock, false); + blockDataFromBuf(pBlock, buf); + *ppBlock = pBlock; + + taosLRUCacheInsert(pSortInfo->pBlkInfoCache, &blockId, sizeof(blockId), pBlock, 1, deleteBlockDataCache, + &hBlk, TAOS_LRU_PRIORITY_LOW, NULL); + } + } + return 0; +} + +void appendOneRowIdRowToDataBlock(STableMergeScanInfo* pInfo, SSDataBlock* pBlock, STupleHandle* pTupleHandle) { + int32_t blkId = *(int32_t*)tsortGetValue(pTupleHandle, 1); + int32_t rowIdx = *(int32_t*)tsortGetValue(pTupleHandle, 2); + SSDataBlock* pSrcBlk = NULL; + retrieveSourceBlock(pInfo, blkId, &pSrcBlk); + + for (int32_t i = 0; i < taosArrayGetSize(pBlock->pDataBlock); ++i) { + SColumnInfoData* pColInfo = taosArrayGet(pBlock->pDataBlock, i); + SColumnInfoData* pSrcColInfo = taosArrayGet(pSrcBlk->pDataBlock, i); + + bool isNull = colDataIsNull_s(pSrcColInfo, rowIdx); + if (isNull) { + colDataSetNULL(pColInfo, pBlock->info.rows); + } else { + char* pData = colDataGetData(pSrcColInfo, i); + if (pData != NULL) { + colDataSetVal(pColInfo, pBlock->info.rows, pData, false); + } + } + } + + pBlock->info.dataLoad = 1; + pBlock->info.scanFlag = ((SDataBlockInfo*)tsortGetBlockInfo(pTupleHandle))->scanFlag; + pBlock->info.rows += 1; +} + static int32_t tableMergeScanDoSkipTable(STableMergeScanInfo* pInfo, SSDataBlock* pBlock) { int64_t nRows = 0; void* pNum = tSimpleHashGet(pInfo->mTableNumRows, &pBlock->info.id.uid, sizeof(pBlock->info.id.uid)); @@ -3308,11 +3443,17 @@ static SSDataBlock* getBlockForTableMergeScan(void* param) { if (pInfo->mergeLimit != -1) { tableMergeScanDoSkipTable(pInfo, pBlock); } - + SSDataBlock* pSortInputBlk = NULL; + if (pInfo->bSortRowId) { + pSortInputBlk = createOneDataBlock(pInfo->pSortInputBlock, false); + transformIntoSortInputBlock(pInfo, pBlock, pSortInputBlk); + } else { + pSortInputBlk = pBlock; + } pOperator->resultInfo.totalRows += pBlock->info.rows; pInfo->base.readRecorder.elapsedTime += (taosGetTimestampUs() - st) / 1000.0; - return pBlock; + return pSortInputBlk; } return NULL; @@ -3353,6 +3494,33 @@ void tableMergeScanTsdbNotifyCb(ETsdReaderNotifyType type, STsdReaderNotifyInfo* return; } +int32_t startRowIdSort(STableMergeScanInfo *pInfo) { + STmsSortRowIdInfo* pSort = &pInfo->tmsSortRowIdInfo; + pSort->blkId = 0; + pSort->dataFileOffset = 0; + taosGetTmpfilePath(tsTempDir, "tms-block-info", pSort->idxPath); + pSort->idxFile = taosOpenFile(pSort->idxPath, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_READ | TD_FILE_TRUNC | TD_FILE_AUTO_DEL); + taosGetTmpfilePath(tsTempDir, "tms-block-data", pSort->dataPath); + pSort->dataFile = taosOpenFile(pSort->dataPath, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_READ | TD_FILE_TRUNC | TD_FILE_AUTO_DEL); + pSort->pBlkInfoCache = taosLRUCacheInit(2048, 0, 0.5); + taosLRUCacheSetStrictCapacity(pSort->pBlkInfoCache, false); + pSort->pBlkDataCache = taosLRUCacheInit(2048, 0, 0.5); + taosLRUCacheSetStrictCapacity(pSort->pBlkInfoCache, false); + return 0; +} + +int32_t stopRowIdSort(STableMergeScanInfo *pInfo) { + STmsSortRowIdInfo* pSort = &pInfo->tmsSortRowIdInfo; + taosCloseFile(&pSort->idxFile); + taosRemoveFile(pSort->idxPath); + taosCloseFile(&pSort->dataFile); + taosRemoveFile(pSort->dataPath); + + taosLRUCacheCleanup(pSort->pBlkInfoCache); + taosLRUCacheCleanup(pSort->pBlkDataCache); + return 0; +} + int32_t startDurationForGroupTableMergeScan(SOperatorInfo* pOperator) { STableMergeScanInfo* pInfo = pOperator->info; SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo; @@ -3361,6 +3529,7 @@ int32_t startDurationForGroupTableMergeScan(SOperatorInfo* pOperator) { pInfo->bNewFileset = false; + startRowIdSort(pInfo); pInfo->sortBufSize = 2048 * pInfo->bufPageSize; int32_t numOfBufPage = pInfo->sortBufSize / pInfo->bufPageSize; pInfo->pSortHandle = tsortCreateSortHandle(pInfo->pSortInfo, SORT_BLOCK_TS_MERGE, pInfo->bufPageSize, numOfBufPage, @@ -3399,6 +3568,8 @@ void stopDurationForGroupTableMergeScan(SOperatorInfo* pOperator) { tsortDestroySortHandle(pInfo->pSortHandle); pInfo->pSortHandle = NULL; + + stopRowIdSort(pInfo); } int32_t startGroupTableMergeScan(SOperatorInfo* pOperator) { @@ -3475,8 +3646,11 @@ SSDataBlock* getSortedTableMergeScanBlockData(SSortHandle* pHandle, SSDataBlock* if (pTupleHandle == NULL) { break; } - - appendOneRowToDataBlock(pResBlock, pTupleHandle); + if (!pInfo->bSortRowId) { + appendOneRowToDataBlock(pResBlock, pTupleHandle); + } else { + appendOneRowIdRowToDataBlock(pInfo, pResBlock, pTupleHandle); + } if (pResBlock->info.rows >= capacity) { break; } @@ -3659,9 +3833,27 @@ SOperatorInfo* createTableMergeScanOperatorInfo(STableScanPhysiNode* pTableScanN blockDataEnsureCapacity(pInfo->pResBlock, pOperator->resultInfo.capacity); pInfo->sortSourceParams = taosArrayInit(64, sizeof(STableMergeScanSortSourceParam)); + if (!pInfo->bSortRowId) { + pInfo->pSortInfo = generateSortByTsInfo(pInfo->base.matchInfo.pList, pInfo->base.cond.order); + pInfo->pSortInputBlock = createOneDataBlock(pInfo->pResBlock, false); + } else { + SSDataBlock* pSortInput = createDataBlock(); + SColumnInfoData tsCol = createColumnInfoData(TSDB_DATA_TYPE_TIMESTAMP, 8, 1); + blockDataAppendColInfo(pSortInput, &tsCol); + SColumnInfoData blkIdCol = createColumnInfoData(TSDB_DATA_TYPE_INT, 4, 2); + blockDataAppendColInfo(pSortInput, &blkIdCol); + SColumnInfoData rowIdxCol = createColumnInfoData(TSDB_DATA_TYPE_INT, 4, 3); + blockDataAppendColInfo(pSortInput, &rowIdxCol); + pInfo->pSortInputBlock = pSortInput; - pInfo->pSortInfo = generateSortByTsInfo(pInfo->base.matchInfo.pList, pInfo->base.cond.order); - pInfo->pSortInputBlock = createOneDataBlock(pInfo->pResBlock, false); + SArray* pList = taosArrayInit(1, sizeof(SBlockOrderInfo)); + SBlockOrderInfo bi = {0}; + bi.order = pInfo->base.cond.order; + bi.slotId = 0; + bi.nullFirst = NULL_ORDER_FIRST; + taosArrayPush(pList, &bi); + pInfo->pSortInfo = pList; + } initLimitInfo(pTableScanNode->scan.node.pLimit, pTableScanNode->scan.node.pSlimit, &pInfo->limitInfo); pInfo->mTableNumRows = tSimpleHashInit(1024, taosGetDefaultHashFunction(TSDB_DATA_TYPE_UBIGINT)); @@ -3678,6 +3870,7 @@ SOperatorInfo* createTableMergeScanOperatorInfo(STableScanPhysiNode* pTableScanN pInfo->bufPageSize = getProperSortPageSize(rowSize, nCols); pInfo->filesetDelimited = pTableScanNode->filesetDelimited; + setOperatorInfo(pOperator, "TableMergeScanOperator", QUERY_NODE_PHYSICAL_PLAN_TABLE_MERGE_SCAN, false, OP_NOT_OPENED, pInfo, pTaskInfo); pOperator->exprSupp.numOfExprs = numOfCols; From 83139b8d483b5d64ae4ba61b2ec1c8636c6c2029 Mon Sep 17 00:00:00 2001 From: slzhou Date: Fri, 8 Dec 2023 17:45:54 +0800 Subject: [PATCH 02/58] fix: save work --- source/libs/executor/src/scanoperator.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/source/libs/executor/src/scanoperator.c b/source/libs/executor/src/scanoperator.c index d88433c5aa..07c6c02ba3 100644 --- a/source/libs/executor/src/scanoperator.c +++ b/source/libs/executor/src/scanoperator.c @@ -3251,7 +3251,8 @@ static int32_t fillSortInputBlock(const STableMergeScanInfo* pInfo, const STmsSortRowIdInfo* pSortInfo = &pInfo->tmsSortRowIdInfo; int32_t nRows = pSrcBlock->info.rows; - pSortInputBlk->info = pSrcBlock->info; + pSortInputBlk->info.window = pSrcBlock->info.window; + pSortInputBlk->info.id = pSrcBlock->info.id; blockDataEnsureCapacity(pSortInputBlk, nRows); int32_t tsSlotId = ((SBlockOrderInfo*)taosArrayGet(pInfo->pSortInfo, 0))->slotId; @@ -3266,6 +3267,8 @@ static int32_t fillSortInputBlock(const STableMergeScanInfo* pInfo, for (int32_t i = 0; i < nRows; ++i) { colDataSetInt32(rowIdxCol, i, &i); } + + pSortInputBlk->info.rows = nRows; return 0; } @@ -3305,15 +3308,17 @@ static int32_t retrieveSourceBlock(STableMergeScanInfo* pInfo, int32_t blockId, LRUHandle* hBlkInfo = taosLRUCacheLookup(pSortInfo->pBlkInfoCache, &blockId, sizeof(blockId)); if (hBlkInfo) { blkInfo = taosLRUCacheValue(pSortInfo->pBlkInfoCache, hBlkInfo); + uInfo("found block info: %d for %d, offset: %"PRId64", length: %d", blkInfo->blkId, blockId, blkInfo->offset, blkInfo->length) } else { blkInfo = taosMemoryMalloc(sizeof(STmsSortBlockInfo)); taosLSeekFile(pSortInfo->idxFile, blockId * sizeof(STmsSortBlockInfo), SEEK_SET); - taosReadFile(pSortInfo->idxFile, &blkInfo, sizeof(blkInfo)); + taosReadFile(pSortInfo->idxFile, blkInfo, sizeof(STmsSortBlockInfo)); ASSERT(blkInfo->blkId == blockId); taosLRUCacheInsert(pSortInfo->pBlkInfoCache, &blockId, sizeof(blockId), blkInfo, 1, deleteBlockInfoCache, &hBlkInfo, TAOS_LRU_PRIORITY_LOW, NULL); } { + uInfo("retrieve block info: %d, offset: %"PRId64", length: %d", blkInfo->blkId, blkInfo->offset, blkInfo->length) taosLSeekFile(pSortInfo->dataFile, blkInfo->offset, SEEK_SET); char* buf = taosMemoryMalloc(blkInfo->length); taosReadFile(pSortInfo->dataFile, buf, blkInfo->length); @@ -3443,14 +3448,17 @@ static SSDataBlock* getBlockForTableMergeScan(void* param) { if (pInfo->mergeLimit != -1) { tableMergeScanDoSkipTable(pInfo, pBlock); } - SSDataBlock* pSortInputBlk = NULL; + + pOperator->resultInfo.totalRows += pBlock->info.rows; + + SSDataBlock* pSortInputBlk = pInfo->pSortInputBlock; if (pInfo->bSortRowId) { - pSortInputBlk = createOneDataBlock(pInfo->pSortInputBlock, false); + blockDataCleanup(pSortInputBlk); transformIntoSortInputBlock(pInfo, pBlock, pSortInputBlk); } else { pSortInputBlk = pBlock; } - pOperator->resultInfo.totalRows += pBlock->info.rows; + pInfo->base.readRecorder.elapsedTime += (taosGetTimestampUs() - st) / 1000.0; return pSortInputBlk; @@ -3833,6 +3841,7 @@ SOperatorInfo* createTableMergeScanOperatorInfo(STableScanPhysiNode* pTableScanN blockDataEnsureCapacity(pInfo->pResBlock, pOperator->resultInfo.capacity); pInfo->sortSourceParams = taosArrayInit(64, sizeof(STableMergeScanSortSourceParam)); + pInfo->bSortRowId = true; if (!pInfo->bSortRowId) { pInfo->pSortInfo = generateSortByTsInfo(pInfo->base.matchInfo.pList, pInfo->base.cond.order); pInfo->pSortInputBlock = createOneDataBlock(pInfo->pResBlock, false); From 6acbbc3cffb93b7dc551a565d488c7338b0f0917 Mon Sep 17 00:00:00 2001 From: slzhou Date: Fri, 8 Dec 2023 21:15:06 +0800 Subject: [PATCH 03/58] fix: typo error --- source/libs/executor/src/scanoperator.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/source/libs/executor/src/scanoperator.c b/source/libs/executor/src/scanoperator.c index 07c6c02ba3..efa054cb9a 100644 --- a/source/libs/executor/src/scanoperator.c +++ b/source/libs/executor/src/scanoperator.c @@ -3308,7 +3308,6 @@ static int32_t retrieveSourceBlock(STableMergeScanInfo* pInfo, int32_t blockId, LRUHandle* hBlkInfo = taosLRUCacheLookup(pSortInfo->pBlkInfoCache, &blockId, sizeof(blockId)); if (hBlkInfo) { blkInfo = taosLRUCacheValue(pSortInfo->pBlkInfoCache, hBlkInfo); - uInfo("found block info: %d for %d, offset: %"PRId64", length: %d", blkInfo->blkId, blockId, blkInfo->offset, blkInfo->length) } else { blkInfo = taosMemoryMalloc(sizeof(STmsSortBlockInfo)); taosLSeekFile(pSortInfo->idxFile, blockId * sizeof(STmsSortBlockInfo), SEEK_SET); @@ -3318,7 +3317,6 @@ static int32_t retrieveSourceBlock(STableMergeScanInfo* pInfo, int32_t blockId, &hBlkInfo, TAOS_LRU_PRIORITY_LOW, NULL); } { - uInfo("retrieve block info: %d, offset: %"PRId64", length: %d", blkInfo->blkId, blkInfo->offset, blkInfo->length) taosLSeekFile(pSortInfo->dataFile, blkInfo->offset, SEEK_SET); char* buf = taosMemoryMalloc(blkInfo->length); taosReadFile(pSortInfo->dataFile, buf, blkInfo->length); @@ -3326,7 +3324,7 @@ static int32_t retrieveSourceBlock(STableMergeScanInfo* pInfo, int32_t blockId, blockDataFromBuf(pBlock, buf); *ppBlock = pBlock; - taosLRUCacheInsert(pSortInfo->pBlkInfoCache, &blockId, sizeof(blockId), pBlock, 1, deleteBlockDataCache, + taosLRUCacheInsert(pSortInfo->pBlkDataCache, &blockId, sizeof(blockId), pBlock, 1, deleteBlockDataCache, &hBlk, TAOS_LRU_PRIORITY_LOW, NULL); } } From 205e3ecf271027b458c7a0666be1f53309a8145a Mon Sep 17 00:00:00 2001 From: slzhou Date: Fri, 8 Dec 2023 21:49:36 +0800 Subject: [PATCH 04/58] fix: fix mem leak of block data buf --- source/libs/executor/src/scanoperator.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/source/libs/executor/src/scanoperator.c b/source/libs/executor/src/scanoperator.c index efa054cb9a..77a4119be0 100644 --- a/source/libs/executor/src/scanoperator.c +++ b/source/libs/executor/src/scanoperator.c @@ -3233,15 +3233,16 @@ static int32_t saveSourceBlock(STmsSortRowIdInfo* pSortInfo, const SSDataBlock* return TSDB_CODE_OUT_OF_MEMORY; } blockDataToBuf(buf, pSrcBlock); - *pSzBlk = szBlk; - taosLSeekFile(pSortInfo->dataFile, pSortInfo->dataFileOffset, SEEK_SET); taosWriteFile(pSortInfo->dataFile, buf, szBlk); + taosMemoryFree(buf); STmsSortBlockInfo info = {.blkId = pSortInfo->blkId , .offset = pSortInfo->dataFileOffset, .length = szBlk}; taosLSeekFile(pSortInfo->idxFile, pSortInfo->blkId*sizeof(STmsSortBlockInfo), SEEK_SET); taosWriteFile(pSortInfo->idxFile, &info, sizeof(info)); + + *pSzBlk = szBlk; return 0; } @@ -3322,6 +3323,8 @@ static int32_t retrieveSourceBlock(STableMergeScanInfo* pInfo, int32_t blockId, taosReadFile(pSortInfo->dataFile, buf, blkInfo->length); SSDataBlock* pBlock = createOneDataBlock(pInfo->pReaderBlock, false); blockDataFromBuf(pBlock, buf); + taosMemoryFree(buf); + *ppBlock = pBlock; taosLRUCacheInsert(pSortInfo->pBlkDataCache, &blockId, sizeof(blockId), pBlock, 1, deleteBlockDataCache, From 92a695cd2f3b68ca51ccb160d7967b5a6f7fd6e3 Mon Sep 17 00:00:00 2001 From: slzhou Date: Fri, 8 Dec 2023 22:12:24 +0800 Subject: [PATCH 05/58] fix: memory leaks --- source/libs/executor/src/scanoperator.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/source/libs/executor/src/scanoperator.c b/source/libs/executor/src/scanoperator.c index 77a4119be0..bea202f96d 100644 --- a/source/libs/executor/src/scanoperator.c +++ b/source/libs/executor/src/scanoperator.c @@ -3241,7 +3241,7 @@ static int32_t saveSourceBlock(STmsSortRowIdInfo* pSortInfo, const SSDataBlock* , .offset = pSortInfo->dataFileOffset, .length = szBlk}; taosLSeekFile(pSortInfo->idxFile, pSortInfo->blkId*sizeof(STmsSortBlockInfo), SEEK_SET); taosWriteFile(pSortInfo->idxFile, &info, sizeof(info)); - + *pSzBlk = szBlk; return 0; @@ -3525,7 +3525,9 @@ int32_t stopRowIdSort(STableMergeScanInfo *pInfo) { taosCloseFile(&pSort->dataFile); taosRemoveFile(pSort->dataPath); + taosLRUCacheEraseUnrefEntries(pSort->pBlkInfoCache); taosLRUCacheCleanup(pSort->pBlkInfoCache); + taosLRUCacheEraseUnrefEntries(pSort->pBlkDataCache); taosLRUCacheCleanup(pSort->pBlkDataCache); return 0; } From 9a9264e03a2e197c67b03fb61fe7b5321eab9fd6 Mon Sep 17 00:00:00 2001 From: slzhou Date: Sat, 9 Dec 2023 19:00:23 +0800 Subject: [PATCH 06/58] fix: fix memory leak --- source/libs/executor/src/scanoperator.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/source/libs/executor/src/scanoperator.c b/source/libs/executor/src/scanoperator.c index bea202f96d..a33cc11a6e 100644 --- a/source/libs/executor/src/scanoperator.c +++ b/source/libs/executor/src/scanoperator.c @@ -3303,19 +3303,21 @@ static int32_t retrieveSourceBlock(STableMergeScanInfo* pInfo, int32_t blockId, LRUHandle* hBlk = taosLRUCacheLookup(pSortInfo->pBlkDataCache, &blockId, sizeof(blockId)); if (hBlk) { SSDataBlock* pBlk = taosLRUCacheValue(pSortInfo->pBlkDataCache, hBlk); + taosLRUCacheRelease(pSortInfo->pBlkDataCache, hBlk, false); *ppBlock = pBlk; } else { STmsSortBlockInfo* blkInfo = NULL; LRUHandle* hBlkInfo = taosLRUCacheLookup(pSortInfo->pBlkInfoCache, &blockId, sizeof(blockId)); if (hBlkInfo) { blkInfo = taosLRUCacheValue(pSortInfo->pBlkInfoCache, hBlkInfo); + taosLRUCacheRelease(pSortInfo->pBlkInfoCache, hBlkInfo, false); } else { blkInfo = taosMemoryMalloc(sizeof(STmsSortBlockInfo)); taosLSeekFile(pSortInfo->idxFile, blockId * sizeof(STmsSortBlockInfo), SEEK_SET); taosReadFile(pSortInfo->idxFile, blkInfo, sizeof(STmsSortBlockInfo)); ASSERT(blkInfo->blkId == blockId); taosLRUCacheInsert(pSortInfo->pBlkInfoCache, &blockId, sizeof(blockId), blkInfo, 1, deleteBlockInfoCache, - &hBlkInfo, TAOS_LRU_PRIORITY_LOW, NULL); + NULL, TAOS_LRU_PRIORITY_LOW, NULL); } { taosLSeekFile(pSortInfo->dataFile, blkInfo->offset, SEEK_SET); @@ -3328,7 +3330,7 @@ static int32_t retrieveSourceBlock(STableMergeScanInfo* pInfo, int32_t blockId, *ppBlock = pBlock; taosLRUCacheInsert(pSortInfo->pBlkDataCache, &blockId, sizeof(blockId), pBlock, 1, deleteBlockDataCache, - &hBlk, TAOS_LRU_PRIORITY_LOW, NULL); + NULL, TAOS_LRU_PRIORITY_LOW, NULL); } } return 0; @@ -3511,9 +3513,9 @@ int32_t startRowIdSort(STableMergeScanInfo *pInfo) { pSort->idxFile = taosOpenFile(pSort->idxPath, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_READ | TD_FILE_TRUNC | TD_FILE_AUTO_DEL); taosGetTmpfilePath(tsTempDir, "tms-block-data", pSort->dataPath); pSort->dataFile = taosOpenFile(pSort->dataPath, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_READ | TD_FILE_TRUNC | TD_FILE_AUTO_DEL); - pSort->pBlkInfoCache = taosLRUCacheInit(2048, 0, 0.5); + pSort->pBlkInfoCache = taosLRUCacheInit(2048, -1, 0.5); taosLRUCacheSetStrictCapacity(pSort->pBlkInfoCache, false); - pSort->pBlkDataCache = taosLRUCacheInit(2048, 0, 0.5); + pSort->pBlkDataCache = taosLRUCacheInit(2048, -1, 0.5); taosLRUCacheSetStrictCapacity(pSort->pBlkInfoCache, false); return 0; } From d01e82439e2e232998cc98b4ea3efdb0a9654522 Mon Sep 17 00:00:00 2001 From: slzhou Date: Sun, 10 Dec 2023 19:11:39 +0800 Subject: [PATCH 07/58] feature: replace two lru cache with hash --- source/libs/executor/inc/executorInt.h | 3 +- source/libs/executor/src/scanoperator.c | 69 ++++++++----------------- 2 files changed, 23 insertions(+), 49 deletions(-) diff --git a/source/libs/executor/inc/executorInt.h b/source/libs/executor/inc/executorInt.h index c103d8eee7..9a9338a6c7 100644 --- a/source/libs/executor/inc/executorInt.h +++ b/source/libs/executor/inc/executorInt.h @@ -280,8 +280,7 @@ typedef struct STmsSortRowIdInfo { char idxPath[PATH_MAX]; TdFilePtr dataFile; char dataPath[PATH_MAX]; - SLRUCache* pBlkInfoCache; // blkId->(offset, len) - SLRUCache* pBlkDataCache; // blkId->SSDataBlock* + SSHashObj* pBlkDataHash; // blkId->SSDataBlock* } STmsSortRowIdInfo; typedef struct STableMergeScanInfo { diff --git a/source/libs/executor/src/scanoperator.c b/source/libs/executor/src/scanoperator.c index a33cc11a6e..469d119523 100644 --- a/source/libs/executor/src/scanoperator.c +++ b/source/libs/executor/src/scanoperator.c @@ -3288,50 +3288,28 @@ static int32_t transformIntoSortInputBlock(STableMergeScanInfo* pInfo, SSDataBlo return code; } -static void deleteBlockInfoCache(const void *key, size_t keyLen, void *value, void *ud) { - taosMemoryFree(value); -} - -static void deleteBlockDataCache(const void *key, size_t keyLen, void *value, void *ud) { - SSDataBlock* pBlock = value; - blockDataDestroy(pBlock); -} static int32_t retrieveSourceBlock(STableMergeScanInfo* pInfo, int32_t blockId, SSDataBlock** ppBlock) { STmsSortRowIdInfo* pSortInfo = &pInfo->tmsSortRowIdInfo; - LRUHandle* hBlk = taosLRUCacheLookup(pSortInfo->pBlkDataCache, &blockId, sizeof(blockId)); - if (hBlk) { - SSDataBlock* pBlk = taosLRUCacheValue(pSortInfo->pBlkDataCache, hBlk); - taosLRUCacheRelease(pSortInfo->pBlkDataCache, hBlk, false); - *ppBlock = pBlk; - } else { - STmsSortBlockInfo* blkInfo = NULL; - LRUHandle* hBlkInfo = taosLRUCacheLookup(pSortInfo->pBlkInfoCache, &blockId, sizeof(blockId)); - if (hBlkInfo) { - blkInfo = taosLRUCacheValue(pSortInfo->pBlkInfoCache, hBlkInfo); - taosLRUCacheRelease(pSortInfo->pBlkInfoCache, hBlkInfo, false); - } else { - blkInfo = taosMemoryMalloc(sizeof(STmsSortBlockInfo)); - taosLSeekFile(pSortInfo->idxFile, blockId * sizeof(STmsSortBlockInfo), SEEK_SET); - taosReadFile(pSortInfo->idxFile, blkInfo, sizeof(STmsSortBlockInfo)); - ASSERT(blkInfo->blkId == blockId); - taosLRUCacheInsert(pSortInfo->pBlkInfoCache, &blockId, sizeof(blockId), blkInfo, 1, deleteBlockInfoCache, - NULL, TAOS_LRU_PRIORITY_LOW, NULL); - } - { - taosLSeekFile(pSortInfo->dataFile, blkInfo->offset, SEEK_SET); - char* buf = taosMemoryMalloc(blkInfo->length); - taosReadFile(pSortInfo->dataFile, buf, blkInfo->length); - SSDataBlock* pBlock = createOneDataBlock(pInfo->pReaderBlock, false); - blockDataFromBuf(pBlock, buf); - taosMemoryFree(buf); + void* pBlkHashVal = tSimpleHashGet(pSortInfo->pBlkDataHash, &blockId, sizeof(blockId)); + if (pBlkHashVal) { + *ppBlock = *(SSDataBlock**)pBlkHashVal; + } + else { + STmsSortBlockInfo blkInfo = {0}; - *ppBlock = pBlock; + taosLSeekFile(pSortInfo->idxFile, blockId * sizeof(STmsSortBlockInfo), SEEK_SET); + taosReadFile(pSortInfo->idxFile, &blkInfo, sizeof(STmsSortBlockInfo)); + taosLSeekFile(pSortInfo->dataFile, blkInfo.offset, SEEK_SET); + char* buf = taosMemoryMalloc(blkInfo.length); + taosReadFile(pSortInfo->dataFile, buf, blkInfo.length); + SSDataBlock* pBlock = createOneDataBlock(pInfo->pReaderBlock, false); + blockDataFromBuf(pBlock, buf); + taosMemoryFree(buf); - taosLRUCacheInsert(pSortInfo->pBlkDataCache, &blockId, sizeof(blockId), pBlock, 1, deleteBlockDataCache, - NULL, TAOS_LRU_PRIORITY_LOW, NULL); - } + *ppBlock = pBlock; + tSimpleHashPut(pSortInfo->pBlkDataHash, &blockId, sizeof(blockId), &pBlock, sizeof(pBlock)); } return 0; } @@ -3356,7 +3334,10 @@ void appendOneRowIdRowToDataBlock(STableMergeScanInfo* pInfo, SSDataBlock* pBloc } } } - + if (rowIdx == pSrcBlk->info.rows - 1) { + tSimpleHashRemove(pInfo->tmsSortRowIdInfo.pBlkDataHash, &blkId, sizeof(blkId)); + blockDataDestroy(pSrcBlk); + } pBlock->info.dataLoad = 1; pBlock->info.scanFlag = ((SDataBlockInfo*)tsortGetBlockInfo(pTupleHandle))->scanFlag; pBlock->info.rows += 1; @@ -3513,10 +3494,7 @@ int32_t startRowIdSort(STableMergeScanInfo *pInfo) { pSort->idxFile = taosOpenFile(pSort->idxPath, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_READ | TD_FILE_TRUNC | TD_FILE_AUTO_DEL); taosGetTmpfilePath(tsTempDir, "tms-block-data", pSort->dataPath); pSort->dataFile = taosOpenFile(pSort->dataPath, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_READ | TD_FILE_TRUNC | TD_FILE_AUTO_DEL); - pSort->pBlkInfoCache = taosLRUCacheInit(2048, -1, 0.5); - taosLRUCacheSetStrictCapacity(pSort->pBlkInfoCache, false); - pSort->pBlkDataCache = taosLRUCacheInit(2048, -1, 0.5); - taosLRUCacheSetStrictCapacity(pSort->pBlkInfoCache, false); + pSort->pBlkDataHash = tSimpleHashInit(2048, taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT)); return 0; } @@ -3527,10 +3505,7 @@ int32_t stopRowIdSort(STableMergeScanInfo *pInfo) { taosCloseFile(&pSort->dataFile); taosRemoveFile(pSort->dataPath); - taosLRUCacheEraseUnrefEntries(pSort->pBlkInfoCache); - taosLRUCacheCleanup(pSort->pBlkInfoCache); - taosLRUCacheEraseUnrefEntries(pSort->pBlkDataCache); - taosLRUCacheCleanup(pSort->pBlkDataCache); + tSimpleHashCleanup(pSort->pBlkDataHash); return 0; } From c705a71bd9eaf44feadf8b7137d3afb567315081 Mon Sep 17 00:00:00 2001 From: slzhou Date: Sun, 10 Dec 2023 22:25:46 +0800 Subject: [PATCH 08/58] feat: use disk based buf for src block storage --- source/libs/executor/inc/executorInt.h | 7 +- source/libs/executor/src/scanoperator.c | 109 +++++++++--------------- 2 files changed, 40 insertions(+), 76 deletions(-) diff --git a/source/libs/executor/inc/executorInt.h b/source/libs/executor/inc/executorInt.h index 9a9338a6c7..7a9160b392 100644 --- a/source/libs/executor/inc/executorInt.h +++ b/source/libs/executor/inc/executorInt.h @@ -274,12 +274,7 @@ typedef struct STableScanInfo { } STableScanInfo; typedef struct STmsSortRowIdInfo { - int32_t blkId; - int64_t dataFileOffset; - TdFilePtr idxFile; - char idxPath[PATH_MAX]; - TdFilePtr dataFile; - char dataPath[PATH_MAX]; + SDiskbasedBuf* pExtSrcBlkBuf; SSHashObj* pBlkDataHash; // blkId->SSDataBlock* } STmsSortRowIdInfo; diff --git a/source/libs/executor/src/scanoperator.c b/source/libs/executor/src/scanoperator.c index 469d119523..6f2e19e694 100644 --- a/source/libs/executor/src/scanoperator.c +++ b/source/libs/executor/src/scanoperator.c @@ -3220,36 +3220,8 @@ _error: } // ========================= table merge scan -typedef struct STmsSortBlockInfo { - int32_t blkId; - int32_t length; - int64_t offset; -} STmsSortBlockInfo; - -static int32_t saveSourceBlock(STmsSortRowIdInfo* pSortInfo, const SSDataBlock* pSrcBlock, int32_t *pSzBlk) { - int32_t szBlk = blockDataGetSize(pSrcBlock) + sizeof(int32_t) + taosArrayGetSize(pSrcBlock->pDataBlock) * sizeof(int32_t); - char* buf = taosMemoryMalloc(szBlk); - if (buf == NULL) { - return TSDB_CODE_OUT_OF_MEMORY; - } - blockDataToBuf(buf, pSrcBlock); - taosLSeekFile(pSortInfo->dataFile, pSortInfo->dataFileOffset, SEEK_SET); - taosWriteFile(pSortInfo->dataFile, buf, szBlk); - taosMemoryFree(buf); - - STmsSortBlockInfo info = {.blkId = pSortInfo->blkId - , .offset = pSortInfo->dataFileOffset, .length = szBlk}; - taosLSeekFile(pSortInfo->idxFile, pSortInfo->blkId*sizeof(STmsSortBlockInfo), SEEK_SET); - taosWriteFile(pSortInfo->idxFile, &info, sizeof(info)); - - *pSzBlk = szBlk; - - return 0; -} - -static int32_t fillSortInputBlock(const STableMergeScanInfo* pInfo, - const SSDataBlock* pSrcBlock, SSDataBlock* pSortInputBlk) { - const STmsSortRowIdInfo* pSortInfo = &pInfo->tmsSortRowIdInfo; +static int32_t transformIntoSortInputBlock(STableMergeScanInfo* pInfo, SSDataBlock* pSrcBlock, SSDataBlock* pSortInputBlk) { + STmsSortRowIdInfo* pSortInfo = &pInfo->tmsSortRowIdInfo; int32_t nRows = pSrcBlock->info.rows; pSortInputBlk->info.window = pSrcBlock->info.window; @@ -3262,32 +3234,41 @@ static int32_t fillSortInputBlock(const STableMergeScanInfo* pInfo, colDataAssign(tsCol, pSrcTsCol, nRows, &pSortInputBlk->info); SColumnInfoData* blkIdCol = taosArrayGet(pSortInputBlk->pDataBlock, 1); - colDataSetNItems(blkIdCol, 0, (char*)&pSortInfo->blkId, nRows, false); - SColumnInfoData* rowIdxCol = taosArrayGet(pSortInputBlk->pDataBlock, 2); - for (int32_t i = 0; i < nRows; ++i) { - colDataSetInt32(rowIdxCol, i, &i); + + int32_t start = 0; + while (start < pSrcBlock->info.rows) { + int32_t stop = 0; + blockDataSplitRows(pSrcBlock, pSrcBlock->info.hasVarCol, start, &stop, pInfo->bufPageSize); + SSDataBlock* p = blockDataExtractBlock(pSrcBlock, start, stop-start+1); + + int32_t pageId = -1; + void* pPage = getNewBufPage(pSortInfo->pExtSrcBlkBuf, &pageId); + + int32_t size = blockDataGetSize(p) + sizeof(int32_t) + taosArrayGetSize(p->pDataBlock) * sizeof(int32_t); + ASSERT(size <= getBufPageSize(pSortInfo->pExtSrcBlkBuf)); + + blockDataToBuf(pPage, p); + + setBufPageDirty(pPage, true); + releaseBufPage(pSortInfo->pExtSrcBlkBuf, pPage); + + blockDataDestroy(p); + uInfo("sort input block pageId %d start %d, stop %d", pageId, start, stop); + colDataSetNItems(blkIdCol, start, (char*)&pageId, stop-start+1, false); + + for (int32_t i = start; i <= stop; ++i) { + int32_t rowIdx = i - start; + colDataSetInt32(rowIdxCol, i, &rowIdx); + } + start = stop + 1; } pSortInputBlk->info.rows = nRows; + return 0; } -static int32_t transformIntoSortInputBlock(STableMergeScanInfo* pInfo, SSDataBlock* pSrcBlock, SSDataBlock* pSortInputBlk) { - //TODO: batch save - int32_t code = 0; - STmsSortRowIdInfo* pSortInfo = &pInfo->tmsSortRowIdInfo; - int32_t szBlk = 0; - code = saveSourceBlock(pSortInfo, pSrcBlock, &szBlk); - - fillSortInputBlock(pInfo, pSrcBlock, pSortInputBlk); - - ++pSortInfo->blkId; - pSortInfo->dataFileOffset = ((pSortInfo->dataFileOffset + szBlk) + 4096) & ~4096; - - return code; -} - static int32_t retrieveSourceBlock(STableMergeScanInfo* pInfo, int32_t blockId, SSDataBlock** ppBlock) { STmsSortRowIdInfo* pSortInfo = &pInfo->tmsSortRowIdInfo; @@ -3297,16 +3278,10 @@ static int32_t retrieveSourceBlock(STableMergeScanInfo* pInfo, int32_t blockId, *ppBlock = *(SSDataBlock**)pBlkHashVal; } else { - STmsSortBlockInfo blkInfo = {0}; - - taosLSeekFile(pSortInfo->idxFile, blockId * sizeof(STmsSortBlockInfo), SEEK_SET); - taosReadFile(pSortInfo->idxFile, &blkInfo, sizeof(STmsSortBlockInfo)); - taosLSeekFile(pSortInfo->dataFile, blkInfo.offset, SEEK_SET); - char* buf = taosMemoryMalloc(blkInfo.length); - taosReadFile(pSortInfo->dataFile, buf, blkInfo.length); + void* pPage = getBufPage(pSortInfo->pExtSrcBlkBuf, blockId); SSDataBlock* pBlock = createOneDataBlock(pInfo->pReaderBlock, false); - blockDataFromBuf(pBlock, buf); - taosMemoryFree(buf); + blockDataFromBuf(pBlock, pPage); + releaseBufPage(pSortInfo->pExtSrcBlkBuf, pPage); *ppBlock = pBlock; tSimpleHashPut(pSortInfo->pBlkDataHash, &blockId, sizeof(blockId), &pBlock, sizeof(pBlock)); @@ -3318,8 +3293,9 @@ void appendOneRowIdRowToDataBlock(STableMergeScanInfo* pInfo, SSDataBlock* pBloc int32_t blkId = *(int32_t*)tsortGetValue(pTupleHandle, 1); int32_t rowIdx = *(int32_t*)tsortGetValue(pTupleHandle, 2); SSDataBlock* pSrcBlk = NULL; + uInfo("sort tuple blkId %d, row idx %d", blkId, rowIdx); retrieveSourceBlock(pInfo, blkId, &pSrcBlk); - + for (int32_t i = 0; i < taosArrayGetSize(pBlock->pDataBlock); ++i) { SColumnInfoData* pColInfo = taosArrayGet(pBlock->pDataBlock, i); SColumnInfoData* pSrcColInfo = taosArrayGet(pSrcBlk->pDataBlock, i); @@ -3328,7 +3304,7 @@ void appendOneRowIdRowToDataBlock(STableMergeScanInfo* pInfo, SSDataBlock* pBloc if (isNull) { colDataSetNULL(pColInfo, pBlock->info.rows); } else { - char* pData = colDataGetData(pSrcColInfo, i); + char* pData = colDataGetData(pSrcColInfo, rowIdx); if (pData != NULL) { colDataSetVal(pColInfo, pBlock->info.rows, pData, false); } @@ -3488,23 +3464,16 @@ void tableMergeScanTsdbNotifyCb(ETsdReaderNotifyType type, STsdReaderNotifyInfo* int32_t startRowIdSort(STableMergeScanInfo *pInfo) { STmsSortRowIdInfo* pSort = &pInfo->tmsSortRowIdInfo; - pSort->blkId = 0; - pSort->dataFileOffset = 0; - taosGetTmpfilePath(tsTempDir, "tms-block-info", pSort->idxPath); - pSort->idxFile = taosOpenFile(pSort->idxPath, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_READ | TD_FILE_TRUNC | TD_FILE_AUTO_DEL); - taosGetTmpfilePath(tsTempDir, "tms-block-data", pSort->dataPath); - pSort->dataFile = taosOpenFile(pSort->dataPath, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_READ | TD_FILE_TRUNC | TD_FILE_AUTO_DEL); + createDiskbasedBuf(&pSort->pExtSrcBlkBuf, pInfo->bufPageSize, pInfo->sortBufSize, "tms-ext-src-block", tsTempDir); + dBufSetPrintInfo(pSort->pExtSrcBlkBuf); pSort->pBlkDataHash = tSimpleHashInit(2048, taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT)); return 0; } int32_t stopRowIdSort(STableMergeScanInfo *pInfo) { STmsSortRowIdInfo* pSort = &pInfo->tmsSortRowIdInfo; - taosCloseFile(&pSort->idxFile); - taosRemoveFile(pSort->idxPath); - taosCloseFile(&pSort->dataFile); - taosRemoveFile(pSort->dataPath); + destroyDiskbasedBuf(pSort->pExtSrcBlkBuf); tSimpleHashCleanup(pSort->pBlkDataHash); return 0; } From 3b1a1859496741a35bacd49d2cf1edad7fd5d3d9 Mon Sep 17 00:00:00 2001 From: slzhou Date: Mon, 11 Dec 2023 09:12:16 +0800 Subject: [PATCH 09/58] fix: coldataSetNItems error --- source/libs/executor/src/scanoperator.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/source/libs/executor/src/scanoperator.c b/source/libs/executor/src/scanoperator.c index 6f2e19e694..8914c2aeac 100644 --- a/source/libs/executor/src/scanoperator.c +++ b/source/libs/executor/src/scanoperator.c @@ -3254,10 +3254,8 @@ static int32_t transformIntoSortInputBlock(STableMergeScanInfo* pInfo, SSDataBlo releaseBufPage(pSortInfo->pExtSrcBlkBuf, pPage); blockDataDestroy(p); - uInfo("sort input block pageId %d start %d, stop %d", pageId, start, stop); - colDataSetNItems(blkIdCol, start, (char*)&pageId, stop-start+1, false); - for (int32_t i = start; i <= stop; ++i) { + colDataSetInt32(blkIdCol, i, &pageId); int32_t rowIdx = i - start; colDataSetInt32(rowIdxCol, i, &rowIdx); } @@ -3293,7 +3291,6 @@ void appendOneRowIdRowToDataBlock(STableMergeScanInfo* pInfo, SSDataBlock* pBloc int32_t blkId = *(int32_t*)tsortGetValue(pTupleHandle, 1); int32_t rowIdx = *(int32_t*)tsortGetValue(pTupleHandle, 2); SSDataBlock* pSrcBlk = NULL; - uInfo("sort tuple blkId %d, row idx %d", blkId, rowIdx); retrieveSourceBlock(pInfo, blkId, &pSrcBlk); for (int32_t i = 0; i < taosArrayGetSize(pBlock->pDataBlock); ++i) { @@ -3411,10 +3408,11 @@ static SSDataBlock* getBlockForTableMergeScan(void* param) { pOperator->resultInfo.totalRows += pBlock->info.rows; - SSDataBlock* pSortInputBlk = pInfo->pSortInputBlock; + SSDataBlock* pSortInputBlk = NULL; if (pInfo->bSortRowId) { - blockDataCleanup(pSortInputBlk); - transformIntoSortInputBlock(pInfo, pBlock, pSortInputBlk); + blockDataCleanup(pInfo->pSortInputBlock); + transformIntoSortInputBlock(pInfo, pBlock, pInfo->pSortInputBlock); + pSortInputBlk = pInfo->pSortInputBlock; } else { pSortInputBlk = pBlock; } @@ -3474,7 +3472,9 @@ int32_t stopRowIdSort(STableMergeScanInfo *pInfo) { STmsSortRowIdInfo* pSort = &pInfo->tmsSortRowIdInfo; destroyDiskbasedBuf(pSort->pExtSrcBlkBuf); + pSort->pExtSrcBlkBuf = NULL; tSimpleHashCleanup(pSort->pBlkDataHash); + pSort->pBlkDataHash = NULL; return 0; } From 7f93cb9f1a5f0524dcb917c626d119bdc0f528f1 Mon Sep 17 00:00:00 2001 From: shenglian zhou Date: Wed, 13 Dec 2023 11:38:14 +0800 Subject: [PATCH 10/58] fix: use pageid, offset, length as row index --- source/libs/executor/inc/executorInt.h | 3 +- source/libs/executor/src/scanoperator.c | 197 +++++++++++++++--------- 2 files changed, 124 insertions(+), 76 deletions(-) diff --git a/source/libs/executor/inc/executorInt.h b/source/libs/executor/inc/executorInt.h index 7a9160b392..35f2e28d36 100644 --- a/source/libs/executor/inc/executorInt.h +++ b/source/libs/executor/inc/executorInt.h @@ -274,8 +274,7 @@ typedef struct STableScanInfo { } STableScanInfo; typedef struct STmsSortRowIdInfo { - SDiskbasedBuf* pExtSrcBlkBuf; - SSHashObj* pBlkDataHash; // blkId->SSDataBlock* + SDiskbasedBuf* pExtSrcRowsBuf; } STmsSortRowIdInfo; typedef struct STableMergeScanInfo { diff --git a/source/libs/executor/src/scanoperator.c b/source/libs/executor/src/scanoperator.c index 8914c2aeac..be5cf298a4 100644 --- a/source/libs/executor/src/scanoperator.c +++ b/source/libs/executor/src/scanoperator.c @@ -3220,6 +3220,80 @@ _error: } // ========================= table merge scan + +static int32_t saveBlockRowToBuf(STableMergeScanInfo* pInfo, SSDataBlock* pBlock, int32_t rowIdx, int32_t* pPageId, int32_t* pOffset, int32_t* pLength) { + SDiskbasedBuf* pResultBuf = pInfo->tmsSortRowIdInfo.pExtSrcRowsBuf; + int32_t rowBytes = blockDataGetRowSize(pBlock); + + SFilePage* pFilePage = NULL; + + // in the first scan, new space needed for results + int32_t pageId = -1; + SArray* list = getDataBufPagesIdList(pResultBuf); + + if (taosArrayGetSize(list) == 0) { + pFilePage = getNewBufPage(pResultBuf, &pageId); + pFilePage->num = sizeof(SFilePage); + } else { + SPageInfo* pi = getLastPageInfo(list); + pFilePage = getBufPage(pResultBuf, getPageId(pi)); + if (pFilePage == NULL) { + qError("failed to get buffer, code:%s", tstrerror(terrno)); + return terrno; + } + + pageId = getPageId(pi); + + if (pFilePage->num + rowBytes > getBufPageSize(pResultBuf)) { + // release current page first, and prepare the next one + releaseBufPageInfo(pResultBuf, pi); + + pFilePage = getNewBufPage(pResultBuf, &pageId); + if (pFilePage != NULL) { + pFilePage->num = sizeof(SFilePage); + } + } + } + + if (pFilePage == NULL) { + return -1; + } + *pPageId = pageId; + *pOffset = pFilePage->num; + char* buf = (char*)pFilePage + (*pOffset); + size_t numOfCols = taosArrayGetSize(pBlock->pDataBlock); + + char* isNull = (char*)buf; + char* pStart = (char*)buf + sizeof(int8_t) * numOfCols; + for (int32_t i = 0; i < numOfCols; ++i) { + SColumnInfoData* pCol = taosArrayGet(pBlock->pDataBlock, i); + if (colDataIsNull_s(pCol, rowIdx)) { + isNull[i] = 1; + continue; + } + + isNull[i] = 0; + char* pData = colDataGetData(pCol, rowIdx); + if (pCol->info.type == TSDB_DATA_TYPE_JSON) { + int32_t dataLen = getJsonValueLen(pData); + memcpy(pStart, pData, dataLen); + pStart += dataLen; + } else if (IS_VAR_DATA_TYPE(pCol->info.type)) { + varDataCopy(pStart, pData); + pStart += varDataTLen(pData); + } else { + int32_t bytes = pCol->info.bytes; + memcpy(pStart, pData, bytes); + pStart += bytes; + } + } + *pLength = (int32_t)(pStart - (char*)buf); + pFilePage->num += (*pLength); + setBufPageDirty(pFilePage, true); + releaseBufPage(pResultBuf, pFilePage); + return 0; +} + static int32_t transformIntoSortInputBlock(STableMergeScanInfo* pInfo, SSDataBlock* pSrcBlock, SSDataBlock* pSortInputBlk) { STmsSortRowIdInfo* pSortInfo = &pInfo->tmsSortRowIdInfo; @@ -3233,33 +3307,18 @@ static int32_t transformIntoSortInputBlock(STableMergeScanInfo* pInfo, SSDataBlo SColumnInfoData* pSrcTsCol = taosArrayGet(pSrcBlock->pDataBlock, tsSlotId); colDataAssign(tsCol, pSrcTsCol, nRows, &pSortInputBlk->info); - SColumnInfoData* blkIdCol = taosArrayGet(pSortInputBlk->pDataBlock, 1); - SColumnInfoData* rowIdxCol = taosArrayGet(pSortInputBlk->pDataBlock, 2); - - int32_t start = 0; - while (start < pSrcBlock->info.rows) { - int32_t stop = 0; - blockDataSplitRows(pSrcBlock, pSrcBlock->info.hasVarCol, start, &stop, pInfo->bufPageSize); - SSDataBlock* p = blockDataExtractBlock(pSrcBlock, start, stop-start+1); + SColumnInfoData* pageIdCol = taosArrayGet(pSortInputBlk->pDataBlock, 1); + SColumnInfoData* offsetCol = taosArrayGet(pSortInputBlk->pDataBlock, 2); + SColumnInfoData* lengthCol = taosArrayGet(pSortInputBlk->pDataBlock, 3); + for (int32_t i = 0; i < pSrcBlock->info.rows; ++i) { int32_t pageId = -1; - void* pPage = getNewBufPage(pSortInfo->pExtSrcBlkBuf, &pageId); - - int32_t size = blockDataGetSize(p) + sizeof(int32_t) + taosArrayGetSize(p->pDataBlock) * sizeof(int32_t); - ASSERT(size <= getBufPageSize(pSortInfo->pExtSrcBlkBuf)); - - blockDataToBuf(pPage, p); - - setBufPageDirty(pPage, true); - releaseBufPage(pSortInfo->pExtSrcBlkBuf, pPage); - - blockDataDestroy(p); - for (int32_t i = start; i <= stop; ++i) { - colDataSetInt32(blkIdCol, i, &pageId); - int32_t rowIdx = i - start; - colDataSetInt32(rowIdxCol, i, &rowIdx); - } - start = stop + 1; + int32_t offset = -1; + int32_t length = -1; + saveBlockRowToBuf(pInfo, pSrcBlock, i, &pageId, &offset, &length); + colDataSetInt32(pageIdCol, i, &pageId); + colDataSetInt32(pageIdCol, i, &offset); + colDataSetInt32(pageIdCol, i, &length); } pSortInputBlk->info.rows = nRows; @@ -3267,50 +3326,38 @@ static int32_t transformIntoSortInputBlock(STableMergeScanInfo* pInfo, SSDataBlo return 0; } +void appendOneRowIdRowToDataBlock(STableMergeScanInfo* pInfo, SSDataBlock* pBlock, STupleHandle* pTupleHandle) { + int32_t pageId = *(int32_t*)tsortGetValue(pTupleHandle, 1); + int32_t offset = *(int32_t*)tsortGetValue(pTupleHandle, 2); + int32_t length = *(int32_t*)tsortGetValue(pTupleHandle, 2); + void* page = getBufPage(pInfo->tmsSortRowIdInfo.pExtSrcRowsBuf, pageId); -static int32_t retrieveSourceBlock(STableMergeScanInfo* pInfo, int32_t blockId, SSDataBlock** ppBlock) { STmsSortRowIdInfo* pSortInfo = &pInfo->tmsSortRowIdInfo; - void* pBlkHashVal = tSimpleHashGet(pSortInfo->pBlkDataHash, &blockId, sizeof(blockId)); - if (pBlkHashVal) { - *ppBlock = *(SSDataBlock**)pBlkHashVal; - } - else { - void* pPage = getBufPage(pSortInfo->pExtSrcBlkBuf, blockId); - SSDataBlock* pBlock = createOneDataBlock(pInfo->pReaderBlock, false); - blockDataFromBuf(pBlock, pPage); - releaseBufPage(pSortInfo->pExtSrcBlkBuf, pPage); - - *ppBlock = pBlock; - tSimpleHashPut(pSortInfo->pBlkDataHash, &blockId, sizeof(blockId), &pBlock, sizeof(pBlock)); - } - return 0; -} - -void appendOneRowIdRowToDataBlock(STableMergeScanInfo* pInfo, SSDataBlock* pBlock, STupleHandle* pTupleHandle) { - int32_t blkId = *(int32_t*)tsortGetValue(pTupleHandle, 1); - int32_t rowIdx = *(int32_t*)tsortGetValue(pTupleHandle, 2); - SSDataBlock* pSrcBlk = NULL; - retrieveSourceBlock(pInfo, blkId, &pSrcBlk); - - for (int32_t i = 0; i < taosArrayGetSize(pBlock->pDataBlock); ++i) { + int32_t numOfCols = taosArrayGetSize(pBlock->pDataBlock); + char* buf = (char*)page + offset; + char* isNull = (char*)buf; + char* pStart = (char*)buf + sizeof(int8_t) * numOfCols; + for (int32_t i = 0; i < numOfCols; ++i) { SColumnInfoData* pColInfo = taosArrayGet(pBlock->pDataBlock, i); - SColumnInfoData* pSrcColInfo = taosArrayGet(pSrcBlk->pDataBlock, i); - bool isNull = colDataIsNull_s(pSrcColInfo, rowIdx); - if (isNull) { - colDataSetNULL(pColInfo, pBlock->info.rows); - } else { - char* pData = colDataGetData(pSrcColInfo, rowIdx); - if (pData != NULL) { - colDataSetVal(pColInfo, pBlock->info.rows, pData, false); + if (!isNull[i]) { + colDataSetVal(pColInfo, pBlock->info.rows, pStart, false); + if (pColInfo->info.type == TSDB_DATA_TYPE_JSON) { + int32_t dataLen = getJsonValueLen(pStart); + pStart += dataLen; + } else if (IS_VAR_DATA_TYPE(pColInfo->info.type)) { + pStart += varDataTLen(pStart); + } else { + int32_t bytes = pColInfo->info.bytes; + pStart += bytes; } + } else { + colDataSetNULL(pColInfo, pBlock->info.rows); } } - if (rowIdx == pSrcBlk->info.rows - 1) { - tSimpleHashRemove(pInfo->tmsSortRowIdInfo.pBlkDataHash, &blkId, sizeof(blkId)); - blockDataDestroy(pSrcBlk); - } + releaseBufPage(pInfo->tmsSortRowIdInfo.pExtSrcRowsBuf, page); + pBlock->info.dataLoad = 1; pBlock->info.scanFlag = ((SDataBlockInfo*)tsortGetBlockInfo(pTupleHandle))->scanFlag; pBlock->info.rows += 1; @@ -3462,19 +3509,19 @@ void tableMergeScanTsdbNotifyCb(ETsdReaderNotifyType type, STsdReaderNotifyInfo* int32_t startRowIdSort(STableMergeScanInfo *pInfo) { STmsSortRowIdInfo* pSort = &pInfo->tmsSortRowIdInfo; - createDiskbasedBuf(&pSort->pExtSrcBlkBuf, pInfo->bufPageSize, pInfo->sortBufSize, "tms-ext-src-block", tsTempDir); - dBufSetPrintInfo(pSort->pExtSrcBlkBuf); - pSort->pBlkDataHash = tSimpleHashInit(2048, taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT)); + int32_t pageSize = getProperSortPageSize(blockDataGetRowSize(pInfo->pResBlock), + taosArrayGetSize(pInfo->pResBlock->pDataBlock)); + int32_t memSize = pageSize * 2048; + createDiskbasedBuf(&pSort->pExtSrcRowsBuf, pageSize, memSize, "tms-ext-src-block", tsTempDir); + dBufSetPrintInfo(pSort->pExtSrcRowsBuf); return 0; } int32_t stopRowIdSort(STableMergeScanInfo *pInfo) { STmsSortRowIdInfo* pSort = &pInfo->tmsSortRowIdInfo; - destroyDiskbasedBuf(pSort->pExtSrcBlkBuf); - pSort->pExtSrcBlkBuf = NULL; - tSimpleHashCleanup(pSort->pBlkDataHash); - pSort->pBlkDataHash = NULL; + destroyDiskbasedBuf(pSort->pExtSrcRowsBuf); + pSort->pExtSrcRowsBuf = NULL; return 0; } @@ -3798,10 +3845,12 @@ SOperatorInfo* createTableMergeScanOperatorInfo(STableScanPhysiNode* pTableScanN SSDataBlock* pSortInput = createDataBlock(); SColumnInfoData tsCol = createColumnInfoData(TSDB_DATA_TYPE_TIMESTAMP, 8, 1); blockDataAppendColInfo(pSortInput, &tsCol); - SColumnInfoData blkIdCol = createColumnInfoData(TSDB_DATA_TYPE_INT, 4, 2); - blockDataAppendColInfo(pSortInput, &blkIdCol); - SColumnInfoData rowIdxCol = createColumnInfoData(TSDB_DATA_TYPE_INT, 4, 3); - blockDataAppendColInfo(pSortInput, &rowIdxCol); + SColumnInfoData pageIdCol = createColumnInfoData(TSDB_DATA_TYPE_INT, 4, 2); + blockDataAppendColInfo(pSortInput, &pageIdCol); + SColumnInfoData offsetCol = createColumnInfoData(TSDB_DATA_TYPE_INT, 4, 3); + blockDataAppendColInfo(pSortInput, &offsetCol); + SColumnInfoData lengthCol = createColumnInfoData(TSDB_DATA_TYPE_INT, 4, 3); + blockDataAppendColInfo(pSortInput, &lengthCol); pInfo->pSortInputBlock = pSortInput; SArray* pList = taosArrayInit(1, sizeof(SBlockOrderInfo)); @@ -3823,8 +3872,8 @@ SOperatorInfo* createTableMergeScanOperatorInfo(STableScanPhysiNode* pTableScanN } pInfo->pReaderBlock = createOneDataBlock(pInfo->pResBlock, false); - int32_t rowSize = pInfo->pResBlock->info.rowSize; - uint32_t nCols = taosArrayGetSize(pInfo->pResBlock->pDataBlock); + int32_t rowSize = pInfo->pSortInputBlock->info.rowSize; + uint32_t nCols = taosArrayGetSize(pInfo->pSortInputBlock->pDataBlock); pInfo->bufPageSize = getProperSortPageSize(rowSize, nCols); pInfo->filesetDelimited = pTableScanNode->filesetDelimited; From b309c251dab1bac953be4121be78ce5a6141447c Mon Sep 17 00:00:00 2001 From: slzhou Date: Wed, 13 Dec 2023 13:21:26 +0800 Subject: [PATCH 11/58] fix: pass simple test --- source/libs/executor/src/scanoperator.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/source/libs/executor/src/scanoperator.c b/source/libs/executor/src/scanoperator.c index be5cf298a4..90ab994e1c 100644 --- a/source/libs/executor/src/scanoperator.c +++ b/source/libs/executor/src/scanoperator.c @@ -3317,8 +3317,8 @@ static int32_t transformIntoSortInputBlock(STableMergeScanInfo* pInfo, SSDataBlo int32_t length = -1; saveBlockRowToBuf(pInfo, pSrcBlock, i, &pageId, &offset, &length); colDataSetInt32(pageIdCol, i, &pageId); - colDataSetInt32(pageIdCol, i, &offset); - colDataSetInt32(pageIdCol, i, &length); + colDataSetInt32(offsetCol, i, &offset); + colDataSetInt32(lengthCol, i, &length); } pSortInputBlk->info.rows = nRows; @@ -3327,13 +3327,13 @@ static int32_t transformIntoSortInputBlock(STableMergeScanInfo* pInfo, SSDataBlo } void appendOneRowIdRowToDataBlock(STableMergeScanInfo* pInfo, SSDataBlock* pBlock, STupleHandle* pTupleHandle) { + STmsSortRowIdInfo* pSortInfo = &pInfo->tmsSortRowIdInfo; + int32_t pageId = *(int32_t*)tsortGetValue(pTupleHandle, 1); int32_t offset = *(int32_t*)tsortGetValue(pTupleHandle, 2); - int32_t length = *(int32_t*)tsortGetValue(pTupleHandle, 2); + int32_t length = *(int32_t*)tsortGetValue(pTupleHandle, 3); void* page = getBufPage(pInfo->tmsSortRowIdInfo.pExtSrcRowsBuf, pageId); - STmsSortRowIdInfo* pSortInfo = &pInfo->tmsSortRowIdInfo; - int32_t numOfCols = taosArrayGetSize(pBlock->pDataBlock); char* buf = (char*)page + offset; char* isNull = (char*)buf; @@ -3849,7 +3849,7 @@ SOperatorInfo* createTableMergeScanOperatorInfo(STableScanPhysiNode* pTableScanN blockDataAppendColInfo(pSortInput, &pageIdCol); SColumnInfoData offsetCol = createColumnInfoData(TSDB_DATA_TYPE_INT, 4, 3); blockDataAppendColInfo(pSortInput, &offsetCol); - SColumnInfoData lengthCol = createColumnInfoData(TSDB_DATA_TYPE_INT, 4, 3); + SColumnInfoData lengthCol = createColumnInfoData(TSDB_DATA_TYPE_INT, 4, 4); blockDataAppendColInfo(pSortInput, &lengthCol); pInfo->pSortInputBlock = pSortInput; From 5be6ed95534e8dc7f7adcccff45ba67ef3e54acc Mon Sep 17 00:00:00 2001 From: slzhou Date: Mon, 18 Dec 2023 14:23:30 +0800 Subject: [PATCH 12/58] fix: increase page size and mem size --- source/libs/executor/src/scanoperator.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/source/libs/executor/src/scanoperator.c b/source/libs/executor/src/scanoperator.c index 478504934c..4f26bc8910 100644 --- a/source/libs/executor/src/scanoperator.c +++ b/source/libs/executor/src/scanoperator.c @@ -3566,8 +3566,8 @@ int32_t startRowIdSort(STableMergeScanInfo *pInfo) { STmsSortRowIdInfo* pSort = &pInfo->tmsSortRowIdInfo; int32_t pageSize = getProperSortPageSize(blockDataGetRowSize(pInfo->pResBlock), taosArrayGetSize(pInfo->pResBlock->pDataBlock)); - int32_t memSize = pageSize * 2048; - createDiskbasedBuf(&pSort->pExtSrcRowsBuf, pageSize, memSize, "tms-ext-src-block", tsTempDir); + int32_t memSize = pageSize * 4 * 8192; + createDiskbasedBuf(&pSort->pExtSrcRowsBuf, pageSize * 4, memSize, "tms-ext-src-block", tsTempDir); dBufSetPrintInfo(pSort->pExtSrcRowsBuf); return 0; } From 1aa94a8b3d77faa6f24694187deef8ef74809bd5 Mon Sep 17 00:00:00 2001 From: slzhou Date: Tue, 19 Dec 2023 14:55:31 +0800 Subject: [PATCH 13/58] fix: ci error --- source/libs/executor/inc/executorInt.h | 1 + source/libs/executor/src/scanoperator.c | 22 ++++++++++++++++++---- 2 files changed, 19 insertions(+), 4 deletions(-) diff --git a/source/libs/executor/inc/executorInt.h b/source/libs/executor/inc/executorInt.h index 65444bfedd..e3eeab4e98 100644 --- a/source/libs/executor/inc/executorInt.h +++ b/source/libs/executor/inc/executorInt.h @@ -275,6 +275,7 @@ typedef struct STableScanInfo { typedef struct STmsSortRowIdInfo { SDiskbasedBuf* pExtSrcRowsBuf; + int32_t srcTsSlotId; } STmsSortRowIdInfo; typedef struct STableMergeScanInfo { diff --git a/source/libs/executor/src/scanoperator.c b/source/libs/executor/src/scanoperator.c index 4f26bc8910..0c24869921 100644 --- a/source/libs/executor/src/scanoperator.c +++ b/source/libs/executor/src/scanoperator.c @@ -3278,8 +3278,13 @@ static int32_t saveBlockRowToBuf(STableMergeScanInfo* pInfo, SSDataBlock* pBlock memcpy(pStart, pData, dataLen); pStart += dataLen; } else if (IS_VAR_DATA_TYPE(pCol->info.type)) { - varDataCopy(pStart, pData); - pStart += varDataTLen(pData); + if (colDataGetLength(pCol, blockDataGetNumOfRows(pBlock)) != 0) { + varDataCopy(pStart, pData); + pStart += varDataTLen(pData); + } else { + *(VarDataLenT*)(pStart) = 0; + pStart += VARSTR_HEADER_SIZE; + } } else { int32_t bytes = pCol->info.bytes; memcpy(pStart, pData, bytes); @@ -3301,9 +3306,8 @@ static int32_t transformIntoSortInputBlock(STableMergeScanInfo* pInfo, SSDataBlo pSortInputBlk->info.id = pSrcBlock->info.id; blockDataEnsureCapacity(pSortInputBlk, nRows); - int32_t tsSlotId = ((SBlockOrderInfo*)taosArrayGet(pInfo->pSortInfo, 0))->slotId; SColumnInfoData* tsCol = taosArrayGet(pSortInputBlk->pDataBlock, 0); - SColumnInfoData* pSrcTsCol = taosArrayGet(pSrcBlock->pDataBlock, tsSlotId); + SColumnInfoData* pSrcTsCol = taosArrayGet(pSrcBlock->pDataBlock, pSortInfo->srcTsSlotId); colDataAssign(tsCol, pSrcTsCol, nRows, &pSortInputBlk->info); SColumnInfoData* pageIdCol = taosArrayGet(pSortInputBlk->pDataBlock, 1); @@ -3412,6 +3416,7 @@ static void doGetBlockForTableMergeScan(SOperatorInfo* pOperator, bool* pFinishe uint32_t status = 0; code = loadDataBlock(pOperator, &pInfo->base, pBlock, &status); + if (code != TSDB_CODE_SUCCESS) { qInfo("table merge scan load datablock code %d, %s", code, GET_TASKID(pTaskInfo)); T_LONG_JMP(pTaskInfo->env, code); @@ -3918,6 +3923,7 @@ SOperatorInfo* createTableMergeScanOperatorInfo(STableScanPhysiNode* pTableScanN pInfo->pSortInfo = generateSortByTsInfo(pInfo->base.matchInfo.pList, pInfo->base.cond.order); pInfo->pSortInputBlock = createOneDataBlock(pInfo->pResBlock, false); } else { + SSDataBlock* pSortInput = createDataBlock(); SColumnInfoData tsCol = createColumnInfoData(TSDB_DATA_TYPE_TIMESTAMP, 8, 1); blockDataAppendColInfo(pSortInput, &tsCol); @@ -3929,6 +3935,14 @@ SOperatorInfo* createTableMergeScanOperatorInfo(STableScanPhysiNode* pTableScanN blockDataAppendColInfo(pSortInput, &lengthCol); pInfo->pSortInputBlock = pSortInput; + int32_t srcTsSlotId = 0; + for (int32_t i = 0; i < taosArrayGetSize(pInfo->base.matchInfo.pList); ++i) { + SColMatchItem* colInfo = taosArrayGet(pInfo->base.matchInfo.pList, i); + if (colInfo->colId == PRIMARYKEY_TIMESTAMP_COL_ID) { + srcTsSlotId = colInfo->dstSlotId; + } + } + pInfo->tmsSortRowIdInfo.srcTsSlotId = srcTsSlotId; SArray* pList = taosArrayInit(1, sizeof(SBlockOrderInfo)); SBlockOrderInfo bi = {0}; bi.order = pInfo->base.cond.order; From f3847a00c65a7f44a1062a58d81261b63aa60501 Mon Sep 17 00:00:00 2001 From: slzhou Date: Wed, 20 Dec 2023 08:10:45 +0800 Subject: [PATCH 14/58] fix: more space is required for a row --- source/libs/executor/src/scanoperator.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/libs/executor/src/scanoperator.c b/source/libs/executor/src/scanoperator.c index 0c24869921..7a778624b2 100644 --- a/source/libs/executor/src/scanoperator.c +++ b/source/libs/executor/src/scanoperator.c @@ -3222,7 +3222,7 @@ _error: static int32_t saveBlockRowToBuf(STableMergeScanInfo* pInfo, SSDataBlock* pBlock, int32_t rowIdx, int32_t* pPageId, int32_t* pOffset, int32_t* pLength) { SDiskbasedBuf* pResultBuf = pInfo->tmsSortRowIdInfo.pExtSrcRowsBuf; - int32_t rowBytes = blockDataGetRowSize(pBlock); + int32_t rowBytes = blockDataGetRowSize(pBlock) + taosArrayGetSize(pBlock->pDataBlock); SFilePage* pFilePage = NULL; From bde2cf7b34f8121059c0fb1eee7c022dc29d5760 Mon Sep 17 00:00:00 2001 From: slzhou Date: Wed, 20 Dec 2023 09:54:27 +0800 Subject: [PATCH 15/58] enhance: remove length col and refactor --- source/libs/executor/inc/executorInt.h | 2 +- source/libs/executor/src/scanoperator.c | 49 ++++++++++++++----------- 2 files changed, 28 insertions(+), 23 deletions(-) diff --git a/source/libs/executor/inc/executorInt.h b/source/libs/executor/inc/executorInt.h index e3eeab4e98..fa5ad6ff7c 100644 --- a/source/libs/executor/inc/executorInt.h +++ b/source/libs/executor/inc/executorInt.h @@ -312,7 +312,7 @@ typedef struct STableMergeScanInfo { bool rtnNextDurationBlocks; int32_t nextDurationBlocksIdx; bool bSortRowId; - STmsSortRowIdInfo tmsSortRowIdInfo; + STmsSortRowIdInfo sortRowIdInfo; } STableMergeScanInfo; typedef struct STagScanFilterContext { diff --git a/source/libs/executor/src/scanoperator.c b/source/libs/executor/src/scanoperator.c index 7a778624b2..43cfdedd46 100644 --- a/source/libs/executor/src/scanoperator.c +++ b/source/libs/executor/src/scanoperator.c @@ -3221,12 +3221,11 @@ _error: // ========================= table merge scan static int32_t saveBlockRowToBuf(STableMergeScanInfo* pInfo, SSDataBlock* pBlock, int32_t rowIdx, int32_t* pPageId, int32_t* pOffset, int32_t* pLength) { - SDiskbasedBuf* pResultBuf = pInfo->tmsSortRowIdInfo.pExtSrcRowsBuf; + SDiskbasedBuf* pResultBuf = pInfo->sortRowIdInfo.pExtSrcRowsBuf; int32_t rowBytes = blockDataGetRowSize(pBlock) + taosArrayGetSize(pBlock->pDataBlock); SFilePage* pFilePage = NULL; - // in the first scan, new space needed for results int32_t pageId = -1; SArray* list = getDataBufPagesIdList(pResultBuf); @@ -3244,7 +3243,6 @@ static int32_t saveBlockRowToBuf(STableMergeScanInfo* pInfo, SSDataBlock* pBlock pageId = getPageId(pi); if (pFilePage->num + rowBytes > getBufPageSize(pResultBuf)) { - // release current page first, and prepare the next one releaseBufPageInfo(pResultBuf, pi); pFilePage = getNewBufPage(pResultBuf, &pageId); @@ -3255,8 +3253,10 @@ static int32_t saveBlockRowToBuf(STableMergeScanInfo* pInfo, SSDataBlock* pBlock } if (pFilePage == NULL) { - return -1; + qError("failed to get buffer, code:%s", tstrerror(terrno)); + return terrno; } + *pPageId = pageId; *pOffset = pFilePage->num; char* buf = (char*)pFilePage + (*pOffset); @@ -3298,8 +3298,8 @@ static int32_t saveBlockRowToBuf(STableMergeScanInfo* pInfo, SSDataBlock* pBlock return 0; } -static int32_t transformIntoSortInputBlock(STableMergeScanInfo* pInfo, SSDataBlock* pSrcBlock, SSDataBlock* pSortInputBlk) { - STmsSortRowIdInfo* pSortInfo = &pInfo->tmsSortRowIdInfo; +static int32_t fillSortInputBlock(STableMergeScanInfo* pInfo, SSDataBlock* pSrcBlock, SSDataBlock* pSortInputBlk) { + STmsSortRowIdInfo* pSortInfo = &pInfo->sortRowIdInfo; int32_t nRows = pSrcBlock->info.rows; pSortInputBlk->info.window = pSrcBlock->info.window; @@ -3321,7 +3321,6 @@ static int32_t transformIntoSortInputBlock(STableMergeScanInfo* pInfo, SSDataBlo saveBlockRowToBuf(pInfo, pSrcBlock, i, &pageId, &offset, &length); colDataSetInt32(pageIdCol, i, &pageId); colDataSetInt32(offsetCol, i, &offset); - colDataSetInt32(lengthCol, i, &length); } pSortInputBlk->info.rows = nRows; @@ -3329,13 +3328,12 @@ static int32_t transformIntoSortInputBlock(STableMergeScanInfo* pInfo, SSDataBlo return 0; } -void appendOneRowIdRowToDataBlock(STableMergeScanInfo* pInfo, SSDataBlock* pBlock, STupleHandle* pTupleHandle) { - STmsSortRowIdInfo* pSortInfo = &pInfo->tmsSortRowIdInfo; +static void appendOneRowIdRowToDataBlock(STableMergeScanInfo* pInfo, SSDataBlock* pBlock, STupleHandle* pTupleHandle) { + STmsSortRowIdInfo* pSortInfo = &pInfo->sortRowIdInfo; int32_t pageId = *(int32_t*)tsortGetValue(pTupleHandle, 1); int32_t offset = *(int32_t*)tsortGetValue(pTupleHandle, 2); - int32_t length = *(int32_t*)tsortGetValue(pTupleHandle, 3); - void* page = getBufPage(pInfo->tmsSortRowIdInfo.pExtSrcRowsBuf, pageId); + void* page = getBufPage(pInfo->sortRowIdInfo.pExtSrcRowsBuf, pageId); int32_t numOfCols = taosArrayGetSize(pBlock->pDataBlock); char* buf = (char*)page + offset; @@ -3359,7 +3357,7 @@ void appendOneRowIdRowToDataBlock(STableMergeScanInfo* pInfo, SSDataBlock* pBloc colDataSetNULL(pColInfo, pBlock->info.rows); } } - releaseBufPage(pInfo->tmsSortRowIdInfo.pExtSrcRowsBuf, page); + releaseBufPage(pInfo->sortRowIdInfo.pExtSrcRowsBuf, page); pBlock->info.dataLoad = 1; pBlock->info.scanFlag = ((SDataBlockInfo*)tsortGetBlockInfo(pTupleHandle))->scanFlag; @@ -3511,7 +3509,7 @@ static SSDataBlock* getBlockForTableMergeScan(void* param) { SSDataBlock* pSortInputBlk = NULL; if (pInfo->bSortRowId) { blockDataCleanup(pInfo->pSortInputBlock); - transformIntoSortInputBlock(pInfo, pBlock, pInfo->pSortInputBlock); + fillSortInputBlock(pInfo, pBlock, pInfo->pSortInputBlock); pSortInputBlk = pInfo->pSortInputBlock; } else { pSortInputBlk = pBlock; @@ -3568,17 +3566,17 @@ void tableMergeScanTsdbNotifyCb(ETsdReaderNotifyType type, STsdReaderNotifyInfo* } int32_t startRowIdSort(STableMergeScanInfo *pInfo) { - STmsSortRowIdInfo* pSort = &pInfo->tmsSortRowIdInfo; + STmsSortRowIdInfo* pSort = &pInfo->sortRowIdInfo; int32_t pageSize = getProperSortPageSize(blockDataGetRowSize(pInfo->pResBlock), taosArrayGetSize(pInfo->pResBlock->pDataBlock)); - int32_t memSize = pageSize * 4 * 8192; - createDiskbasedBuf(&pSort->pExtSrcRowsBuf, pageSize * 4, memSize, "tms-ext-src-block", tsTempDir); + int32_t memSize = pageSize * 1024; + int32_t code = createDiskbasedBuf(&pSort->pExtSrcRowsBuf, pageSize, memSize, "tms-ext-src-block", tsTempDir); dBufSetPrintInfo(pSort->pExtSrcRowsBuf); - return 0; + return code; } int32_t stopRowIdSort(STableMergeScanInfo *pInfo) { - STmsSortRowIdInfo* pSort = &pInfo->tmsSortRowIdInfo; + STmsSortRowIdInfo* pSort = &pInfo->sortRowIdInfo; destroyDiskbasedBuf(pSort->pExtSrcRowsBuf); pSort->pExtSrcRowsBuf = NULL; @@ -3595,7 +3593,11 @@ int32_t startDurationForGroupTableMergeScan(SOperatorInfo* pOperator) { pInfo->bNewFilesetEvent = false; pInfo->bNextDurationBlockEvent = false; - startRowIdSort(pInfo); + code = startRowIdSort(pInfo); + if (code != TSDB_CODE_SUCCESS) { + T_LONG_JMP(pTaskInfo->env, code); + } + pInfo->sortBufSize = 2048 * pInfo->bufPageSize; int32_t numOfBufPage = pInfo->sortBufSize / pInfo->bufPageSize; pInfo->pSortHandle = tsortCreateSortHandle(pInfo->pSortInfo, SORT_BLOCK_TS_MERGE, pInfo->bufPageSize, numOfBufPage, @@ -3814,6 +3816,11 @@ void destroyTableMergeScanOperatorInfo(void* param) { STableMergeScanInfo* pTableScanInfo = (STableMergeScanInfo*)param; cleanupQueryTableDataCond(&pTableScanInfo->base.cond); + if (pTableScanInfo->sortRowIdInfo.pExtSrcRowsBuf != NULL) { + destroyDiskbasedBuf(pTableScanInfo->sortRowIdInfo.pExtSrcRowsBuf); + pTableScanInfo->sortRowIdInfo.pExtSrcRowsBuf = NULL; + } + int32_t numOfTable = taosArrayGetSize(pTableScanInfo->sortSourceParams); pTableScanInfo->base.readerAPI.tsdReaderClose(pTableScanInfo->base.dataReader); @@ -3931,8 +3938,6 @@ SOperatorInfo* createTableMergeScanOperatorInfo(STableScanPhysiNode* pTableScanN blockDataAppendColInfo(pSortInput, &pageIdCol); SColumnInfoData offsetCol = createColumnInfoData(TSDB_DATA_TYPE_INT, 4, 3); blockDataAppendColInfo(pSortInput, &offsetCol); - SColumnInfoData lengthCol = createColumnInfoData(TSDB_DATA_TYPE_INT, 4, 4); - blockDataAppendColInfo(pSortInput, &lengthCol); pInfo->pSortInputBlock = pSortInput; int32_t srcTsSlotId = 0; @@ -3942,7 +3947,7 @@ SOperatorInfo* createTableMergeScanOperatorInfo(STableScanPhysiNode* pTableScanN srcTsSlotId = colInfo->dstSlotId; } } - pInfo->tmsSortRowIdInfo.srcTsSlotId = srcTsSlotId; + pInfo->sortRowIdInfo.srcTsSlotId = srcTsSlotId; SArray* pList = taosArrayInit(1, sizeof(SBlockOrderInfo)); SBlockOrderInfo bi = {0}; bi.order = pInfo->base.cond.order; From 9ebca3eaff8e49abfaa554284571ea249f50f105 Mon Sep 17 00:00:00 2001 From: slzhou Date: Wed, 20 Dec 2023 10:06:22 +0800 Subject: [PATCH 16/58] fix: access length col removed --- source/libs/executor/src/scanoperator.c | 1 - 1 file changed, 1 deletion(-) diff --git a/source/libs/executor/src/scanoperator.c b/source/libs/executor/src/scanoperator.c index 43cfdedd46..44182a1e41 100644 --- a/source/libs/executor/src/scanoperator.c +++ b/source/libs/executor/src/scanoperator.c @@ -3312,7 +3312,6 @@ static int32_t fillSortInputBlock(STableMergeScanInfo* pInfo, SSDataBlock* pSrcB SColumnInfoData* pageIdCol = taosArrayGet(pSortInputBlk->pDataBlock, 1); SColumnInfoData* offsetCol = taosArrayGet(pSortInputBlk->pDataBlock, 2); - SColumnInfoData* lengthCol = taosArrayGet(pSortInputBlk->pDataBlock, 3); for (int32_t i = 0; i < pSrcBlock->info.rows; ++i) { int32_t pageId = -1; From d6445d2d5b707c3147270256372da6f5b14a37c3 Mon Sep 17 00:00:00 2001 From: slzhou Date: Wed, 20 Dec 2023 16:20:43 +0800 Subject: [PATCH 17/58] enhance: add length verification --- source/libs/executor/src/scanoperator.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/source/libs/executor/src/scanoperator.c b/source/libs/executor/src/scanoperator.c index 44182a1e41..308c318e04 100644 --- a/source/libs/executor/src/scanoperator.c +++ b/source/libs/executor/src/scanoperator.c @@ -3222,7 +3222,7 @@ _error: static int32_t saveBlockRowToBuf(STableMergeScanInfo* pInfo, SSDataBlock* pBlock, int32_t rowIdx, int32_t* pPageId, int32_t* pOffset, int32_t* pLength) { SDiskbasedBuf* pResultBuf = pInfo->sortRowIdInfo.pExtSrcRowsBuf; - int32_t rowBytes = blockDataGetRowSize(pBlock) + taosArrayGetSize(pBlock->pDataBlock); + int32_t rowBytes = blockDataGetRowSize(pBlock) + taosArrayGetSize(pBlock->pDataBlock) + sizeof(int32_t); SFilePage* pFilePage = NULL; @@ -3291,6 +3291,8 @@ static int32_t saveBlockRowToBuf(STableMergeScanInfo* pInfo, SSDataBlock* pBlock pStart += bytes; } } + *(int32_t*)pStart = (char*)pStart - (char*)buf; + pStart += sizeof(int32_t); *pLength = (int32_t)(pStart - (char*)buf); pFilePage->num += (*pLength); setBufPageDirty(pFilePage, true); @@ -3356,6 +3358,11 @@ static void appendOneRowIdRowToDataBlock(STableMergeScanInfo* pInfo, SSDataBlock colDataSetNULL(pColInfo, pBlock->info.rows); } } + + if (*(int32_t*)pStart != pStart-buf) { + qError("table merge scan row buf deserialization. length error %d != %d ", *(int32_t*)pStart, (int32_t)(pStart-buf)); + }; + releaseBufPage(pInfo->sortRowIdInfo.pExtSrcRowsBuf, page); pBlock->info.dataLoad = 1; From 084245e6d2ee9fcb643f51e702add85e83da50a3 Mon Sep 17 00:00:00 2001 From: slzhou Date: Thu, 21 Dec 2023 08:05:09 +0800 Subject: [PATCH 18/58] fix: row id sort when no limit and row size is more than 256 --- source/libs/executor/src/scanoperator.c | 83 ++++++++++++++----------- 1 file changed, 47 insertions(+), 36 deletions(-) diff --git a/source/libs/executor/src/scanoperator.c b/source/libs/executor/src/scanoperator.c index 44182a1e41..d0bde7f219 100644 --- a/source/libs/executor/src/scanoperator.c +++ b/source/libs/executor/src/scanoperator.c @@ -3568,8 +3568,9 @@ int32_t startRowIdSort(STableMergeScanInfo *pInfo) { STmsSortRowIdInfo* pSort = &pInfo->sortRowIdInfo; int32_t pageSize = getProperSortPageSize(blockDataGetRowSize(pInfo->pResBlock), taosArrayGetSize(pInfo->pResBlock->pDataBlock)); - int32_t memSize = pageSize * 1024; - int32_t code = createDiskbasedBuf(&pSort->pExtSrcRowsBuf, pageSize, memSize, "tms-ext-src-block", tsTempDir); + pageSize *= 2; + int32_t memSize = MIN(pageSize * 2048, 256 * 1024 * 1024); + int32_t code = createDiskbasedBuf(&pSort->pExtSrcRowsBuf, pageSize * 2, memSize, "tms-ext-src-block", tsTempDir); dBufSetPrintInfo(pSort->pExtSrcRowsBuf); return code; } @@ -3863,6 +3864,34 @@ int32_t getTableMergeScanExplainExecInfo(SOperatorInfo* pOptr, void** pOptrExpla return TSDB_CODE_SUCCESS; } +static void initRowIdSortputBlock(STableMergeScanInfo* pInfo) { + SSDataBlock* pSortInput = createDataBlock(); + SColumnInfoData tsCol = createColumnInfoData(TSDB_DATA_TYPE_TIMESTAMP, 8, 1); + blockDataAppendColInfo(pSortInput, &tsCol); + SColumnInfoData pageIdCol = createColumnInfoData(TSDB_DATA_TYPE_INT, 4, 2); + blockDataAppendColInfo(pSortInput, &pageIdCol); + SColumnInfoData offsetCol = createColumnInfoData(TSDB_DATA_TYPE_INT, 4, 3); + blockDataAppendColInfo(pSortInput, &offsetCol); + pInfo->pSortInputBlock = pSortInput; + + int32_t srcTsSlotId = 0; + for (int32_t i = 0; i < taosArrayGetSize(pInfo->base.matchInfo.pList); ++i) { + SColMatchItem* colInfo = taosArrayGet(pInfo->base.matchInfo.pList, i); + if (colInfo->colId == PRIMARYKEY_TIMESTAMP_COL_ID) { + srcTsSlotId = colInfo->dstSlotId; + } + } + pInfo->sortRowIdInfo.srcTsSlotId = srcTsSlotId; + SArray* pList = taosArrayInit(1, sizeof(SBlockOrderInfo)); + SBlockOrderInfo bi = {0}; + bi.order = pInfo->base.cond.order; + bi.slotId = 0; + bi.nullFirst = NULL_ORDER_FIRST; + taosArrayPush(pList, &bi); + pInfo->pSortInfo = pList; + return; +} + SOperatorInfo* createTableMergeScanOperatorInfo(STableScanPhysiNode* pTableScanNode, SReadHandle* readHandle, STableListInfo* pTableListInfo, SExecTaskInfo* pTaskInfo) { STableMergeScanInfo* pInfo = taosMemoryCalloc(1, sizeof(STableMergeScanInfo)); @@ -3922,39 +3951,8 @@ SOperatorInfo* createTableMergeScanOperatorInfo(STableScanPhysiNode* pTableScanN initResultSizeInfo(&pOperator->resultInfo, 1024); pInfo->pResBlock = createDataBlockFromDescNode(pDescNode); blockDataEnsureCapacity(pInfo->pResBlock, pOperator->resultInfo.capacity); + pInfo->pReaderBlock = createOneDataBlock(pInfo->pResBlock, false); - pInfo->sortSourceParams = taosArrayInit(64, sizeof(STableMergeScanSortSourceParam)); - pInfo->bSortRowId = true; - if (!pInfo->bSortRowId) { - pInfo->pSortInfo = generateSortByTsInfo(pInfo->base.matchInfo.pList, pInfo->base.cond.order); - pInfo->pSortInputBlock = createOneDataBlock(pInfo->pResBlock, false); - } else { - - SSDataBlock* pSortInput = createDataBlock(); - SColumnInfoData tsCol = createColumnInfoData(TSDB_DATA_TYPE_TIMESTAMP, 8, 1); - blockDataAppendColInfo(pSortInput, &tsCol); - SColumnInfoData pageIdCol = createColumnInfoData(TSDB_DATA_TYPE_INT, 4, 2); - blockDataAppendColInfo(pSortInput, &pageIdCol); - SColumnInfoData offsetCol = createColumnInfoData(TSDB_DATA_TYPE_INT, 4, 3); - blockDataAppendColInfo(pSortInput, &offsetCol); - pInfo->pSortInputBlock = pSortInput; - - int32_t srcTsSlotId = 0; - for (int32_t i = 0; i < taosArrayGetSize(pInfo->base.matchInfo.pList); ++i) { - SColMatchItem* colInfo = taosArrayGet(pInfo->base.matchInfo.pList, i); - if (colInfo->colId == PRIMARYKEY_TIMESTAMP_COL_ID) { - srcTsSlotId = colInfo->dstSlotId; - } - } - pInfo->sortRowIdInfo.srcTsSlotId = srcTsSlotId; - SArray* pList = taosArrayInit(1, sizeof(SBlockOrderInfo)); - SBlockOrderInfo bi = {0}; - bi.order = pInfo->base.cond.order; - bi.slotId = 0; - bi.nullFirst = NULL_ORDER_FIRST; - taosArrayPush(pList, &bi); - pInfo->pSortInfo = pList; - } initLimitInfo(pTableScanNode->scan.node.pLimit, pTableScanNode->scan.node.pSlimit, &pInfo->limitInfo); pInfo->mTableNumRows = tSimpleHashInit(1024, taosGetDefaultHashFunction(TSDB_DATA_TYPE_UBIGINT)); @@ -3964,9 +3962,22 @@ SOperatorInfo* createTableMergeScanOperatorInfo(STableScanPhysiNode* pTableScanN pInfo->mergeLimit = pInfo->limitInfo.limit.limit + pInfo->limitInfo.limit.offset; pInfo->mSkipTables = NULL; } - pInfo->pReaderBlock = createOneDataBlock(pInfo->pResBlock, false); - int32_t rowSize = pInfo->pSortInputBlock->info.rowSize; + if (!hasLimit && blockDataGetRowSize(pInfo->pResBlock) >= 256) { + pInfo->bSortRowId = true; + } else { + pInfo->bSortRowId = false; + } + + if (!pInfo->bSortRowId) { + pInfo->pSortInfo = generateSortByTsInfo(pInfo->base.matchInfo.pList, pInfo->base.cond.order); + pInfo->pSortInputBlock = createOneDataBlock(pInfo->pResBlock, false); + } else { + initRowIdSortputBlock(pInfo); + } + + pInfo->sortSourceParams = taosArrayInit(64, sizeof(STableMergeScanSortSourceParam)); + int32_t rowSize = blockDataGetRowSize(pInfo->pSortInputBlock); uint32_t nCols = taosArrayGetSize(pInfo->pSortInputBlock->pDataBlock); pInfo->bufPageSize = getProperSortPageSize(rowSize, nCols); if (!tsExperimental) { From 70419dcbc65f0be7fab754b63de0bc862d207fee Mon Sep 17 00:00:00 2001 From: slzhou Date: Thu, 21 Dec 2023 08:22:22 +0800 Subject: [PATCH 19/58] fix: change memory size for row storage in memory pages --- source/libs/executor/src/scanoperator.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/source/libs/executor/src/scanoperator.c b/source/libs/executor/src/scanoperator.c index 39572648e9..b567d46a4a 100644 --- a/source/libs/executor/src/scanoperator.c +++ b/source/libs/executor/src/scanoperator.c @@ -3575,8 +3575,9 @@ int32_t startRowIdSort(STableMergeScanInfo *pInfo) { STmsSortRowIdInfo* pSort = &pInfo->sortRowIdInfo; int32_t pageSize = getProperSortPageSize(blockDataGetRowSize(pInfo->pResBlock), taosArrayGetSize(pInfo->pResBlock->pDataBlock)); - pageSize *= 2; - int32_t memSize = MIN(pageSize * 2048, 256 * 1024 * 1024); + pageSize *= 2; + int numOfTables = pInfo->tableEndIndex - pInfo->tableStartIndex + 1; + int32_t memSize = MIN(pageSize * numOfTables, 512 * 1024 * 1024); int32_t code = createDiskbasedBuf(&pSort->pExtSrcRowsBuf, pageSize * 2, memSize, "tms-ext-src-block", tsTempDir); dBufSetPrintInfo(pSort->pExtSrcRowsBuf); return code; From 7a237504f95ec2950fa200cd83e9a0888aa7f721 Mon Sep 17 00:00:00 2001 From: slzhou Date: Thu, 21 Dec 2023 09:36:11 +0800 Subject: [PATCH 20/58] fix: change pagesize of row storage disk based buf --- source/libs/executor/src/scanoperator.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/libs/executor/src/scanoperator.c b/source/libs/executor/src/scanoperator.c index b567d46a4a..2b95fa8e4d 100644 --- a/source/libs/executor/src/scanoperator.c +++ b/source/libs/executor/src/scanoperator.c @@ -3578,7 +3578,7 @@ int32_t startRowIdSort(STableMergeScanInfo *pInfo) { pageSize *= 2; int numOfTables = pInfo->tableEndIndex - pInfo->tableStartIndex + 1; int32_t memSize = MIN(pageSize * numOfTables, 512 * 1024 * 1024); - int32_t code = createDiskbasedBuf(&pSort->pExtSrcRowsBuf, pageSize * 2, memSize, "tms-ext-src-block", tsTempDir); + int32_t code = createDiskbasedBuf(&pSort->pExtSrcRowsBuf, pageSize, memSize, "tms-ext-src-block", tsTempDir); dBufSetPrintInfo(pSort->pExtSrcRowsBuf); return code; } From 1725986d9fd3103e2051f5748207da2356684579 Mon Sep 17 00:00:00 2001 From: slzhou Date: Thu, 21 Dec 2023 11:45:04 +0800 Subject: [PATCH 21/58] fix: columns pre-allocated has no data and offsets --- source/libs/executor/src/scanoperator.c | 45 +++++++++++++++++++------ 1 file changed, 34 insertions(+), 11 deletions(-) diff --git a/source/libs/executor/src/scanoperator.c b/source/libs/executor/src/scanoperator.c index 2b95fa8e4d..a44108dc50 100644 --- a/source/libs/executor/src/scanoperator.c +++ b/source/libs/executor/src/scanoperator.c @@ -3220,10 +3220,7 @@ _error: // ========================= table merge scan -static int32_t saveBlockRowToBuf(STableMergeScanInfo* pInfo, SSDataBlock* pBlock, int32_t rowIdx, int32_t* pPageId, int32_t* pOffset, int32_t* pLength) { - SDiskbasedBuf* pResultBuf = pInfo->sortRowIdInfo.pExtSrcRowsBuf; - int32_t rowBytes = blockDataGetRowSize(pBlock) + taosArrayGetSize(pBlock->pDataBlock) + sizeof(int32_t); - +static int32_t getPageFromExtSrcRowsBuf(SDiskbasedBuf* pResultBuf, int32_t rowBytes, int32_t* pPageId, SFilePage** ppFilePage) { SFilePage* pFilePage = NULL; int32_t pageId = -1; @@ -3258,8 +3255,11 @@ static int32_t saveBlockRowToBuf(STableMergeScanInfo* pInfo, SSDataBlock* pBlock } *pPageId = pageId; - *pOffset = pFilePage->num; - char* buf = (char*)pFilePage + (*pOffset); + *ppFilePage = pFilePage; + return TSDB_CODE_SUCCESS; +} + +static int32_t blockRowToBuf(SSDataBlock* pBlock, int32_t rowIdx, char* buf) { size_t numOfCols = taosArrayGetSize(pBlock->pDataBlock); char* isNull = (char*)buf; @@ -3274,14 +3274,21 @@ static int32_t saveBlockRowToBuf(STableMergeScanInfo* pInfo, SSDataBlock* pBlock isNull[i] = 0; char* pData = colDataGetData(pCol, rowIdx); if (pCol->info.type == TSDB_DATA_TYPE_JSON) { - int32_t dataLen = getJsonValueLen(pData); - memcpy(pStart, pData, dataLen); - pStart += dataLen; + if (colDataGetLength(pCol, blockDataGetNumOfRows(pBlock)) != 0) { + int32_t dataLen = getJsonValueLen(pData); + memcpy(pStart, pData, dataLen); + pStart += dataLen; + } else { + // the column that is pre-allocated, has no data and has offset + *pStart = 0; + pStart += 1; + } } else if (IS_VAR_DATA_TYPE(pCol->info.type)) { if (colDataGetLength(pCol, blockDataGetNumOfRows(pBlock)) != 0) { varDataCopy(pStart, pData); pStart += varDataTLen(pData); } else { + // the column that is pre-allocated, has no data and has offset *(VarDataLenT*)(pStart) = 0; pStart += VARSTR_HEADER_SIZE; } @@ -3293,7 +3300,23 @@ static int32_t saveBlockRowToBuf(STableMergeScanInfo* pInfo, SSDataBlock* pBlock } *(int32_t*)pStart = (char*)pStart - (char*)buf; pStart += sizeof(int32_t); - *pLength = (int32_t)(pStart - (char*)buf); + return (int32_t)(pStart - (char*)buf); +} + +static int32_t saveBlockRowToExtRowsBuf(STableMergeScanInfo* pInfo, SSDataBlock* pBlock, int32_t rowIdx, int32_t* pPageId, int32_t* pOffset, int32_t* pLength) { + SDiskbasedBuf* pResultBuf = pInfo->sortRowIdInfo.pExtSrcRowsBuf; + int32_t rowBytes = blockDataGetRowSize(pBlock) + taosArrayGetSize(pBlock->pDataBlock) + sizeof(int32_t); + int32_t pageId = -1; + SFilePage* pFilePage = NULL; + int32_t code = getPageFromExtSrcRowsBuf(pResultBuf, rowBytes, &pageId, &pFilePage); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + + *pPageId = pageId; + *pOffset = pFilePage->num; + *pLength = blockRowToBuf(pBlock, rowIdx, (char*)pFilePage + (*pOffset)); + pFilePage->num += (*pLength); setBufPageDirty(pFilePage, true); releaseBufPage(pResultBuf, pFilePage); @@ -3319,7 +3342,7 @@ static int32_t fillSortInputBlock(STableMergeScanInfo* pInfo, SSDataBlock* pSrcB int32_t pageId = -1; int32_t offset = -1; int32_t length = -1; - saveBlockRowToBuf(pInfo, pSrcBlock, i, &pageId, &offset, &length); + saveBlockRowToExtRowsBuf(pInfo, pSrcBlock, i, &pageId, &offset, &length); colDataSetInt32(pageIdCol, i, &pageId); colDataSetInt32(offsetCol, i, &offset); } From d2b0ef75a5798bc2bdbf7ea5fcdb107438e72131 Mon Sep 17 00:00:00 2001 From: slzhou Date: Thu, 21 Dec 2023 13:10:37 +0800 Subject: [PATCH 22/58] fix: windows compilation --- source/libs/executor/src/scanoperator.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/libs/executor/src/scanoperator.c b/source/libs/executor/src/scanoperator.c index a44108dc50..d24b01db2c 100644 --- a/source/libs/executor/src/scanoperator.c +++ b/source/libs/executor/src/scanoperator.c @@ -3600,7 +3600,7 @@ int32_t startRowIdSort(STableMergeScanInfo *pInfo) { taosArrayGetSize(pInfo->pResBlock->pDataBlock)); pageSize *= 2; int numOfTables = pInfo->tableEndIndex - pInfo->tableStartIndex + 1; - int32_t memSize = MIN(pageSize * numOfTables, 512 * 1024 * 1024); + int32_t memSize = TMIN(pageSize * numOfTables, 512 * 1024 * 1024); int32_t code = createDiskbasedBuf(&pSort->pExtSrcRowsBuf, pageSize, memSize, "tms-ext-src-block", tsTempDir); dBufSetPrintInfo(pSort->pExtSrcRowsBuf); return code; From a802d35af49f5ee7c5646607a4659b787f942dcd Mon Sep 17 00:00:00 2001 From: slzhou Date: Thu, 21 Dec 2023 16:36:56 +0800 Subject: [PATCH 23/58] fix: change the pre-allocated column check --- source/libs/executor/src/scanoperator.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/source/libs/executor/src/scanoperator.c b/source/libs/executor/src/scanoperator.c index d24b01db2c..b29d707cc9 100644 --- a/source/libs/executor/src/scanoperator.c +++ b/source/libs/executor/src/scanoperator.c @@ -3274,21 +3274,21 @@ static int32_t blockRowToBuf(SSDataBlock* pBlock, int32_t rowIdx, char* buf) { isNull[i] = 0; char* pData = colDataGetData(pCol, rowIdx); if (pCol->info.type == TSDB_DATA_TYPE_JSON) { - if (colDataGetLength(pCol, blockDataGetNumOfRows(pBlock)) != 0) { + if (pCol->pData) { int32_t dataLen = getJsonValueLen(pData); memcpy(pStart, pData, dataLen); pStart += dataLen; } else { - // the column that is pre-allocated, has no data and has offset + // the column that is pre-allocated has no data and has offset *pStart = 0; pStart += 1; } } else if (IS_VAR_DATA_TYPE(pCol->info.type)) { - if (colDataGetLength(pCol, blockDataGetNumOfRows(pBlock)) != 0) { + if (pCol->pData) { varDataCopy(pStart, pData); pStart += varDataTLen(pData); } else { - // the column that is pre-allocated, has no data and has offset + // the column that is pre-allocated has no data and has offset *(VarDataLenT*)(pStart) = 0; pStart += VARSTR_HEADER_SIZE; } From fbf67e42943913787b525efcbe3edf020217899e Mon Sep 17 00:00:00 2001 From: shenglian zhou Date: Wed, 3 Jan 2024 14:03:22 +0800 Subject: [PATCH 24/58] fix: save data when creating intial sources --- source/libs/executor/inc/executorInt.h | 6 - source/libs/executor/inc/tsort.h | 3 + source/libs/executor/src/scanoperator.c | 270 ++--------------------- source/libs/executor/src/tsort.c | 277 ++++++++++++++++++++++-- 4 files changed, 276 insertions(+), 280 deletions(-) diff --git a/source/libs/executor/inc/executorInt.h b/source/libs/executor/inc/executorInt.h index 1ab27e42a0..5b014bef63 100644 --- a/source/libs/executor/inc/executorInt.h +++ b/source/libs/executor/inc/executorInt.h @@ -273,11 +273,6 @@ typedef struct STableScanInfo { bool filesetDelimited; } STableScanInfo; -typedef struct STmsSortRowIdInfo { - SDiskbasedBuf* pExtSrcRowsBuf; - int32_t srcTsSlotId; -} STmsSortRowIdInfo; - typedef struct STableMergeScanInfo { int32_t tableStartIndex; int32_t tableEndIndex; @@ -312,7 +307,6 @@ typedef struct STableMergeScanInfo { bool rtnNextDurationBlocks; int32_t nextDurationBlocksIdx; bool bSortRowId; - STmsSortRowIdInfo sortRowIdInfo; } STableMergeScanInfo; typedef struct STagScanFilterContext { diff --git a/source/libs/executor/inc/tsort.h b/source/libs/executor/inc/tsort.h index 365acf2bff..1ed4d7baa5 100644 --- a/source/libs/executor/inc/tsort.h +++ b/source/libs/executor/inc/tsort.h @@ -194,6 +194,9 @@ void tsortSetClosed(SSortHandle* pHandle); void tsortSetSingleTableMerge(SSortHandle* pHandle); void tsortSetAbortCheckFn(SSortHandle* pHandle, bool (*checkFn)(void* param), void* param); +int32_t tsortSetSortByRowId(SSortHandle* pHandle, int32_t extRowsPageSize, int32_t extRowsSize); + +void tsortAppendTupleToBlock(SSortHandle* pHandle, SSDataBlock* pBlock, STupleHandle* pTupleHandle); /** * @brief comp the tuple with keyBuf, if not equal, new keys will be built in keyBuf, newLen will be stored in keyLen * @param [in] pSortCols cols to comp and build diff --git a/source/libs/executor/src/scanoperator.c b/source/libs/executor/src/scanoperator.c index f690edcf07..36e810bd23 100644 --- a/source/libs/executor/src/scanoperator.c +++ b/source/libs/executor/src/scanoperator.c @@ -3220,179 +3220,6 @@ _error: // ========================= table merge scan -static int32_t getPageFromExtSrcRowsBuf(SDiskbasedBuf* pResultBuf, int32_t rowBytes, int32_t* pPageId, SFilePage** ppFilePage) { - SFilePage* pFilePage = NULL; - - int32_t pageId = -1; - SArray* list = getDataBufPagesIdList(pResultBuf); - - if (taosArrayGetSize(list) == 0) { - pFilePage = getNewBufPage(pResultBuf, &pageId); - pFilePage->num = sizeof(SFilePage); - } else { - SPageInfo* pi = getLastPageInfo(list); - pFilePage = getBufPage(pResultBuf, getPageId(pi)); - if (pFilePage == NULL) { - qError("failed to get buffer, code:%s", tstrerror(terrno)); - return terrno; - } - - pageId = getPageId(pi); - - if (pFilePage->num + rowBytes > getBufPageSize(pResultBuf)) { - releaseBufPageInfo(pResultBuf, pi); - - pFilePage = getNewBufPage(pResultBuf, &pageId); - if (pFilePage != NULL) { - pFilePage->num = sizeof(SFilePage); - } - } - } - - if (pFilePage == NULL) { - qError("failed to get buffer, code:%s", tstrerror(terrno)); - return terrno; - } - - *pPageId = pageId; - *ppFilePage = pFilePage; - return TSDB_CODE_SUCCESS; -} - -static int32_t blockRowToBuf(SSDataBlock* pBlock, int32_t rowIdx, char* buf) { - size_t numOfCols = taosArrayGetSize(pBlock->pDataBlock); - - char* isNull = (char*)buf; - char* pStart = (char*)buf + sizeof(int8_t) * numOfCols; - for (int32_t i = 0; i < numOfCols; ++i) { - SColumnInfoData* pCol = taosArrayGet(pBlock->pDataBlock, i); - if (colDataIsNull_s(pCol, rowIdx)) { - isNull[i] = 1; - continue; - } - - isNull[i] = 0; - char* pData = colDataGetData(pCol, rowIdx); - if (pCol->info.type == TSDB_DATA_TYPE_JSON) { - if (pCol->pData) { - int32_t dataLen = getJsonValueLen(pData); - memcpy(pStart, pData, dataLen); - pStart += dataLen; - } else { - // the column that is pre-allocated has no data and has offset - *pStart = 0; - pStart += 1; - } - } else if (IS_VAR_DATA_TYPE(pCol->info.type)) { - if (pCol->pData) { - varDataCopy(pStart, pData); - pStart += varDataTLen(pData); - } else { - // the column that is pre-allocated has no data and has offset - *(VarDataLenT*)(pStart) = 0; - pStart += VARSTR_HEADER_SIZE; - } - } else { - int32_t bytes = pCol->info.bytes; - memcpy(pStart, pData, bytes); - pStart += bytes; - } - } - *(int32_t*)pStart = (char*)pStart - (char*)buf; - pStart += sizeof(int32_t); - return (int32_t)(pStart - (char*)buf); -} - -static int32_t saveBlockRowToExtRowsBuf(STableMergeScanInfo* pInfo, SSDataBlock* pBlock, int32_t rowIdx, int32_t* pPageId, int32_t* pOffset, int32_t* pLength) { - SDiskbasedBuf* pResultBuf = pInfo->sortRowIdInfo.pExtSrcRowsBuf; - int32_t rowBytes = blockDataGetRowSize(pBlock) + taosArrayGetSize(pBlock->pDataBlock) + sizeof(int32_t); - int32_t pageId = -1; - SFilePage* pFilePage = NULL; - int32_t code = getPageFromExtSrcRowsBuf(pResultBuf, rowBytes, &pageId, &pFilePage); - if (code != TSDB_CODE_SUCCESS) { - return code; - } - - *pPageId = pageId; - *pOffset = pFilePage->num; - *pLength = blockRowToBuf(pBlock, rowIdx, (char*)pFilePage + (*pOffset)); - - pFilePage->num += (*pLength); - setBufPageDirty(pFilePage, true); - releaseBufPage(pResultBuf, pFilePage); - return 0; -} - -static int32_t fillSortInputBlock(STableMergeScanInfo* pInfo, SSDataBlock* pSrcBlock, SSDataBlock* pSortInputBlk) { - STmsSortRowIdInfo* pSortInfo = &pInfo->sortRowIdInfo; - - int32_t nRows = pSrcBlock->info.rows; - pSortInputBlk->info.window = pSrcBlock->info.window; - pSortInputBlk->info.id = pSrcBlock->info.id; - blockDataEnsureCapacity(pSortInputBlk, nRows); - - SColumnInfoData* tsCol = taosArrayGet(pSortInputBlk->pDataBlock, 0); - SColumnInfoData* pSrcTsCol = taosArrayGet(pSrcBlock->pDataBlock, pSortInfo->srcTsSlotId); - colDataAssign(tsCol, pSrcTsCol, nRows, &pSortInputBlk->info); - - SColumnInfoData* pageIdCol = taosArrayGet(pSortInputBlk->pDataBlock, 1); - SColumnInfoData* offsetCol = taosArrayGet(pSortInputBlk->pDataBlock, 2); - - for (int32_t i = 0; i < pSrcBlock->info.rows; ++i) { - int32_t pageId = -1; - int32_t offset = -1; - int32_t length = -1; - saveBlockRowToExtRowsBuf(pInfo, pSrcBlock, i, &pageId, &offset, &length); - colDataSetInt32(pageIdCol, i, &pageId); - colDataSetInt32(offsetCol, i, &offset); - } - - pSortInputBlk->info.rows = nRows; - - return 0; -} - -static void appendOneRowIdRowToDataBlock(STableMergeScanInfo* pInfo, SSDataBlock* pBlock, STupleHandle* pTupleHandle) { - STmsSortRowIdInfo* pSortInfo = &pInfo->sortRowIdInfo; - - int32_t pageId = *(int32_t*)tsortGetValue(pTupleHandle, 1); - int32_t offset = *(int32_t*)tsortGetValue(pTupleHandle, 2); - void* page = getBufPage(pInfo->sortRowIdInfo.pExtSrcRowsBuf, pageId); - - int32_t numOfCols = taosArrayGetSize(pBlock->pDataBlock); - char* buf = (char*)page + offset; - char* isNull = (char*)buf; - char* pStart = (char*)buf + sizeof(int8_t) * numOfCols; - for (int32_t i = 0; i < numOfCols; ++i) { - SColumnInfoData* pColInfo = taosArrayGet(pBlock->pDataBlock, i); - - if (!isNull[i]) { - colDataSetVal(pColInfo, pBlock->info.rows, pStart, false); - if (pColInfo->info.type == TSDB_DATA_TYPE_JSON) { - int32_t dataLen = getJsonValueLen(pStart); - pStart += dataLen; - } else if (IS_VAR_DATA_TYPE(pColInfo->info.type)) { - pStart += varDataTLen(pStart); - } else { - int32_t bytes = pColInfo->info.bytes; - pStart += bytes; - } - } else { - colDataSetNULL(pColInfo, pBlock->info.rows); - } - } - - if (*(int32_t*)pStart != pStart-buf) { - qError("table merge scan row buf deserialization. length error %d != %d ", *(int32_t*)pStart, (int32_t)(pStart-buf)); - }; - - releaseBufPage(pInfo->sortRowIdInfo.pExtSrcRowsBuf, page); - - pBlock->info.dataLoad = 1; - pBlock->info.scanFlag = ((SDataBlockInfo*)tsortGetBlockInfo(pTupleHandle))->scanFlag; - pBlock->info.rows += 1; -} - static int32_t tableMergeScanDoSkipTable(STableMergeScanInfo* pInfo, SSDataBlock* pBlock) { int64_t nRows = 0; void* pNum = tSimpleHashGet(pInfo->mTableNumRows, &pBlock->info.id.uid, sizeof(pBlock->info.id.uid)); @@ -3535,18 +3362,9 @@ static SSDataBlock* getBlockForTableMergeScan(void* param) { pOperator->resultInfo.totalRows += pBlock->info.rows; - SSDataBlock* pSortInputBlk = NULL; - if (pInfo->bSortRowId) { - blockDataCleanup(pInfo->pSortInputBlock); - fillSortInputBlock(pInfo, pBlock, pInfo->pSortInputBlock); - pSortInputBlk = pInfo->pSortInputBlock; - } else { - pSortInputBlk = pBlock; - } - pInfo->base.readRecorder.elapsedTime += (taosGetTimestampUs() - st) / 1000.0; - return pSortInputBlk; + return pBlock; } return NULL; @@ -3594,26 +3412,6 @@ void tableMergeScanTsdbNotifyCb(ETsdReaderNotifyType type, STsdReaderNotifyInfo* return; } -int32_t startRowIdSort(STableMergeScanInfo *pInfo) { - STmsSortRowIdInfo* pSort = &pInfo->sortRowIdInfo; - int32_t pageSize = getProperSortPageSize(blockDataGetRowSize(pInfo->pResBlock), - taosArrayGetSize(pInfo->pResBlock->pDataBlock)); - pageSize *= 2; - int numOfTables = pInfo->tableEndIndex - pInfo->tableStartIndex + 1; - int32_t memSize = TMIN(pageSize * numOfTables, 512 * 1024 * 1024); - int32_t code = createDiskbasedBuf(&pSort->pExtSrcRowsBuf, pageSize, memSize, "tms-ext-src-block", tsTempDir); - dBufSetPrintInfo(pSort->pExtSrcRowsBuf); - return code; -} - -int32_t stopRowIdSort(STableMergeScanInfo *pInfo) { - STmsSortRowIdInfo* pSort = &pInfo->sortRowIdInfo; - - destroyDiskbasedBuf(pSort->pExtSrcRowsBuf); - pSort->pExtSrcRowsBuf = NULL; - return 0; -} - int32_t startDurationForGroupTableMergeScan(SOperatorInfo* pOperator) { STableMergeScanInfo* pInfo = pOperator->info; SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo; @@ -3624,16 +3422,20 @@ int32_t startDurationForGroupTableMergeScan(SOperatorInfo* pOperator) { pInfo->bNewFilesetEvent = false; pInfo->bNextDurationBlockEvent = false; - code = startRowIdSort(pInfo); - if (code != TSDB_CODE_SUCCESS) { - T_LONG_JMP(pTaskInfo->env, code); - } - pInfo->sortBufSize = 2048 * pInfo->bufPageSize; int32_t numOfBufPage = pInfo->sortBufSize / pInfo->bufPageSize; - pInfo->pSortHandle = tsortCreateSortHandle(pInfo->pSortInfo, SORT_BLOCK_TS_MERGE, pInfo->bufPageSize, numOfBufPage, - pInfo->pSortInputBlock, pTaskInfo->id.str, 0, 0, 0); + if (pInfo->bSortRowId) { + pInfo->pSortHandle = tsortCreateSortHandle(pInfo->pSortInfo, SORT_BLOCK_TS_MERGE, pInfo->bufPageSize, numOfBufPage, + pInfo->pSortInputBlock, pTaskInfo->id.str, 0, 0, 0); + int32_t memSize = 512 * 1024 * 1024; + int32_t rowBytes = blockDataGetRowSize(pInfo->pResBlock) + taosArrayGetSize(pInfo->pResBlock->pDataBlock) + sizeof(int32_t); + int32_t pageSize = TMAX(memSize/numOfTable, rowBytes); + tsortSetSortByRowId(pInfo->pSortHandle, pageSize, memSize); + } else { + pInfo->pSortHandle = tsortCreateSortHandle(pInfo->pSortInfo, SORT_BLOCK_TS_MERGE, pInfo->bufPageSize, numOfBufPage, + pInfo->pSortInputBlock, pTaskInfo->id.str, 0, 0, 0); + } tsortSetMergeLimit(pInfo->pSortHandle, pInfo->mergeLimit); tsortSetAbortCheckFn(pInfo->pSortHandle, isTaskKilled, pOperator->pTaskInfo); @@ -3670,7 +3472,6 @@ void stopDurationForGroupTableMergeScan(SOperatorInfo* pOperator) { tsortDestroySortHandle(pInfo->pSortHandle); pInfo->pSortHandle = NULL; - stopRowIdSort(pInfo); } int32_t startGroupTableMergeScan(SOperatorInfo* pOperator) { @@ -3757,11 +3558,7 @@ SSDataBlock* getSortedTableMergeScanBlockData(SSortHandle* pHandle, SSDataBlock* if (pTupleHandle == NULL) { break; } - if (!pInfo->bSortRowId) { - appendOneRowToDataBlock(pResBlock, pTupleHandle); - } else { - appendOneRowIdRowToDataBlock(pInfo, pResBlock, pTupleHandle); - } + tsortAppendTupleToBlock(pInfo->pSortHandle, pResBlock, pTupleHandle); if (pResBlock->info.rows >= capacity) { break; } @@ -3847,11 +3644,6 @@ void destroyTableMergeScanOperatorInfo(void* param) { STableMergeScanInfo* pTableScanInfo = (STableMergeScanInfo*)param; cleanupQueryTableDataCond(&pTableScanInfo->base.cond); - if (pTableScanInfo->sortRowIdInfo.pExtSrcRowsBuf != NULL) { - destroyDiskbasedBuf(pTableScanInfo->sortRowIdInfo.pExtSrcRowsBuf); - pTableScanInfo->sortRowIdInfo.pExtSrcRowsBuf = NULL; - } - int32_t numOfTable = taosArrayGetSize(pTableScanInfo->sortSourceParams); pTableScanInfo->base.readerAPI.tsdReaderClose(pTableScanInfo->base.dataReader); @@ -3895,34 +3687,6 @@ int32_t getTableMergeScanExplainExecInfo(SOperatorInfo* pOptr, void** pOptrExpla return TSDB_CODE_SUCCESS; } -static void initRowIdSortputBlock(STableMergeScanInfo* pInfo) { - SSDataBlock* pSortInput = createDataBlock(); - SColumnInfoData tsCol = createColumnInfoData(TSDB_DATA_TYPE_TIMESTAMP, 8, 1); - blockDataAppendColInfo(pSortInput, &tsCol); - SColumnInfoData pageIdCol = createColumnInfoData(TSDB_DATA_TYPE_INT, 4, 2); - blockDataAppendColInfo(pSortInput, &pageIdCol); - SColumnInfoData offsetCol = createColumnInfoData(TSDB_DATA_TYPE_INT, 4, 3); - blockDataAppendColInfo(pSortInput, &offsetCol); - pInfo->pSortInputBlock = pSortInput; - - int32_t srcTsSlotId = 0; - for (int32_t i = 0; i < taosArrayGetSize(pInfo->base.matchInfo.pList); ++i) { - SColMatchItem* colInfo = taosArrayGet(pInfo->base.matchInfo.pList, i); - if (colInfo->colId == PRIMARYKEY_TIMESTAMP_COL_ID) { - srcTsSlotId = colInfo->dstSlotId; - } - } - pInfo->sortRowIdInfo.srcTsSlotId = srcTsSlotId; - SArray* pList = taosArrayInit(1, sizeof(SBlockOrderInfo)); - SBlockOrderInfo bi = {0}; - bi.order = pInfo->base.cond.order; - bi.slotId = 0; - bi.nullFirst = NULL_ORDER_FIRST; - taosArrayPush(pList, &bi); - pInfo->pSortInfo = pList; - return; -} - SOperatorInfo* createTableMergeScanOperatorInfo(STableScanPhysiNode* pTableScanNode, SReadHandle* readHandle, STableListInfo* pTableListInfo, SExecTaskInfo* pTaskInfo) { STableMergeScanInfo* pInfo = taosMemoryCalloc(1, sizeof(STableMergeScanInfo)); @@ -4000,12 +3764,8 @@ SOperatorInfo* createTableMergeScanOperatorInfo(STableScanPhysiNode* pTableScanN pInfo->bSortRowId = false; } - if (!pInfo->bSortRowId) { - pInfo->pSortInfo = generateSortByTsInfo(pInfo->base.matchInfo.pList, pInfo->base.cond.order); - pInfo->pSortInputBlock = createOneDataBlock(pInfo->pResBlock, false); - } else { - initRowIdSortputBlock(pInfo); - } + pInfo->pSortInfo = generateSortByTsInfo(pInfo->base.matchInfo.pList, pInfo->base.cond.order); + pInfo->pSortInputBlock = createOneDataBlock(pInfo->pResBlock, false); pInfo->sortSourceParams = taosArrayInit(64, sizeof(STableMergeScanSortSourceParam)); int32_t rowSize = blockDataGetRowSize(pInfo->pSortInputBlock); diff --git a/source/libs/executor/src/tsort.c b/source/libs/executor/src/tsort.c index 205cd7d3ef..0b3a7c180e 100644 --- a/source/libs/executor/src/tsort.c +++ b/source/libs/executor/src/tsort.c @@ -75,6 +75,13 @@ struct SSortHandle { bool (*abortCheckFn)(void* param); void* abortCheckParam; + + bool bSortByRowId; + SDiskbasedBuf* pExtRowsBuf; + int32_t extRowsPageSize; + int32_t extRowsMemSize; + int32_t srcTsSlotId; + SBlockOrderInfo extRowsOrderInfo; }; void tsortSetSingleTableMerge(SSortHandle* pHandle) { @@ -199,7 +206,7 @@ SSortHandle* tsortCreateSortHandle(SArray* pSortInfo, int32_t type, int32_t page pSortHandle->type = type; pSortHandle->pageSize = pageSize; pSortHandle->numOfPages = numOfPages; - pSortHandle->pSortInfo = pSortInfo; + pSortHandle->pSortInfo = taosArrayDup(pSortInfo, NULL); pSortHandle->loops = 0; pSortHandle->pqMaxTupleLength = pqMaxTupleLength; @@ -303,6 +310,10 @@ void tsortDestroySortHandle(SSortHandle* pSortHandle) { taosArrayDestroy(pSortHandle->pOrderedSource); taosMemoryFreeClear(pSortHandle); + if (pSortHandle->pExtRowsBuf != NULL) { + destroyDiskbasedBuf(pSortHandle->pExtRowsBuf); + } + taosArrayDestroy(pSortHandle->pSortInfo); } int32_t tsortAddSource(SSortHandle* pSortHandle, void* pSource) { @@ -848,6 +859,228 @@ static int32_t createPageBuf(SSortHandle* pHandle) { return 0; } +void tsortAppendTupleToBlock(SSortHandle* pHandle, SSDataBlock* pBlock, STupleHandle* pTupleHandle) { + if (pHandle->bSortByRowId) { + int32_t pageId = *(int32_t*)tsortGetValue(pTupleHandle, 1); + int32_t offset = *(int32_t*)tsortGetValue(pTupleHandle, 2); + void* page = getBufPage(pHandle->pExtRowsBuf, pageId); + + int32_t numOfCols = taosArrayGetSize(pBlock->pDataBlock); + char* buf = (char*)page + offset; + char* isNull = (char*)buf; + char* pStart = (char*)buf + sizeof(int8_t) * numOfCols; + for (int32_t i = 0; i < numOfCols; ++i) { + SColumnInfoData* pColInfo = taosArrayGet(pBlock->pDataBlock, i); + + if (!isNull[i]) { + colDataSetVal(pColInfo, pBlock->info.rows, pStart, false); + if (pColInfo->info.type == TSDB_DATA_TYPE_JSON) { + int32_t dataLen = getJsonValueLen(pStart); + pStart += dataLen; + } else if (IS_VAR_DATA_TYPE(pColInfo->info.type)) { + pStart += varDataTLen(pStart); + } else { + int32_t bytes = pColInfo->info.bytes; + pStart += bytes; + } + } else { + colDataSetNULL(pColInfo, pBlock->info.rows); + } + } + + if (*(int32_t*)pStart != pStart - buf) { + qError("table merge scan row buf deserialization. length error %d != %d ", *(int32_t*)pStart, + (int32_t)(pStart - buf)); + }; + + releaseBufPage(pHandle->pExtRowsBuf, page); + + pBlock->info.dataLoad = 1; + pBlock->info.scanFlag = ((SDataBlockInfo*)tsortGetBlockInfo(pTupleHandle))->scanFlag; + pBlock->info.rows += 1; + } else { + for (int32_t i = 0; i < taosArrayGetSize(pBlock->pDataBlock); ++i) { + SColumnInfoData* pColInfo = taosArrayGet(pBlock->pDataBlock, i); + bool isNull = tsortIsNullVal(pTupleHandle, i); + if (isNull) { + colDataSetNULL(pColInfo, pBlock->info.rows); + } else { + char* pData = tsortGetValue(pTupleHandle, i); + if (pData != NULL) { + colDataSetVal(pColInfo, pBlock->info.rows, pData, false); + } + } + } + + pBlock->info.dataLoad = 1; + pBlock->info.scanFlag = ((SDataBlockInfo*)tsortGetBlockInfo(pTupleHandle))->scanFlag; + pBlock->info.rows += 1; + } +} + +static int32_t getPageFromExtSrcRowsBuf(SDiskbasedBuf* pResultBuf, int32_t rowBytes, int32_t* pPageId, SFilePage** ppFilePage) { + SFilePage* pFilePage = NULL; + + int32_t pageId = -1; + SArray* list = getDataBufPagesIdList(pResultBuf); + + if (taosArrayGetSize(list) == 0) { + pFilePage = getNewBufPage(pResultBuf, &pageId); + pFilePage->num = sizeof(SFilePage); + } else { + SPageInfo* pi = getLastPageInfo(list); + pFilePage = getBufPage(pResultBuf, getPageId(pi)); + if (pFilePage == NULL) { + qError("failed to get buffer, code:%s", tstrerror(terrno)); + return terrno; + } + + pageId = getPageId(pi); + + if (pFilePage->num + rowBytes > getBufPageSize(pResultBuf)) { + releaseBufPageInfo(pResultBuf, pi); + + pFilePage = getNewBufPage(pResultBuf, &pageId); + if (pFilePage != NULL) { + pFilePage->num = sizeof(SFilePage); + } + } + } + + if (pFilePage == NULL) { + qError("failed to get buffer, code:%s", tstrerror(terrno)); + return terrno; + } + + *pPageId = pageId; + *ppFilePage = pFilePage; + return TSDB_CODE_SUCCESS; +} + +static int32_t blockRowToBuf(SSDataBlock* pBlock, int32_t rowIdx, char* buf) { + size_t numOfCols = taosArrayGetSize(pBlock->pDataBlock); + + char* isNull = (char*)buf; + char* pStart = (char*)buf + sizeof(int8_t) * numOfCols; + for (int32_t i = 0; i < numOfCols; ++i) { + SColumnInfoData* pCol = taosArrayGet(pBlock->pDataBlock, i); + if (colDataIsNull_s(pCol, rowIdx)) { + isNull[i] = 1; + continue; + } + + isNull[i] = 0; + char* pData = colDataGetData(pCol, rowIdx); + if (pCol->info.type == TSDB_DATA_TYPE_JSON) { + if (pCol->pData) { + int32_t dataLen = getJsonValueLen(pData); + memcpy(pStart, pData, dataLen); + pStart += dataLen; + } else { + // the column that is pre-allocated has no data and has offset + *pStart = 0; + pStart += 1; + } + } else if (IS_VAR_DATA_TYPE(pCol->info.type)) { + if (pCol->pData) { + varDataCopy(pStart, pData); + pStart += varDataTLen(pData); + } else { + // the column that is pre-allocated has no data and has offset + *(VarDataLenT*)(pStart) = 0; + pStart += VARSTR_HEADER_SIZE; + } + } else { + int32_t bytes = pCol->info.bytes; + memcpy(pStart, pData, bytes); + pStart += bytes; + } + } + *(int32_t*)pStart = (char*)pStart - (char*)buf; + pStart += sizeof(int32_t); + return (int32_t)(pStart - (char*)buf); +} + +static int32_t saveBlockRowToExtRowsBuf(SSortHandle* pHandle, SSDataBlock* pBlock, int32_t rowIdx, int32_t* pPageId, int32_t* pOffset, int32_t* pLength) { + SDiskbasedBuf* pResultBuf = pHandle->pExtRowsBuf; + int32_t rowBytes = blockDataGetRowSize(pBlock) + taosArrayGetSize(pBlock->pDataBlock) + sizeof(int32_t); + int32_t pageId = -1; + SFilePage* pFilePage = NULL; + int32_t code = getPageFromExtSrcRowsBuf(pResultBuf, rowBytes, &pageId, &pFilePage); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + + *pPageId = pageId; + *pOffset = pFilePage->num; + *pLength = blockRowToBuf(pBlock, rowIdx, (char*)pFilePage + (*pOffset)); + + pFilePage->num += (*pLength); + setBufPageDirty(pFilePage, true); + releaseBufPage(pResultBuf, pFilePage); + return 0; +} + + +static void appendToRowIndexDataBlock(SSortHandle* pHandle, SSDataBlock* pSource, int32_t* rowIndex) { + int32_t pageId = -1; + int32_t offset = -1; + int32_t length = -1; + saveBlockRowToExtRowsBuf(pHandle, pSource, *rowIndex, &pageId, &offset, &length); + + SSDataBlock* pBlock = pHandle->pDataBlock; + SColumnInfoData* pSrcTsCol = taosArrayGet(pSource->pDataBlock, pHandle->extRowsOrderInfo.slotId); + SColumnInfoData* pTsCol = taosArrayGet(pBlock->pDataBlock, 0); + char* pData = colDataGetData(pSrcTsCol, *rowIndex); + colDataSetVal(pTsCol, pBlock->info.rows, pData, false); + + SColumnInfoData* pPageIdCol = taosArrayGet(pBlock->pDataBlock, 1); + colDataSetInt32(pPageIdCol, pBlock->info.rows, &pageId); + + SColumnInfoData* pOffsetCol = taosArrayGet(pBlock->pDataBlock, 2); + colDataSetInt32(pOffsetCol, pBlock->info.rows, &offset); + + pBlock->info.rows += 1; + *rowIndex += 1; +} + +static void initRowIdSort(SSortHandle* pHandle) { + blockDataDestroy(pHandle->pDataBlock); + + SSDataBlock* pSortInput = createDataBlock(); + SColumnInfoData tsCol = createColumnInfoData(TSDB_DATA_TYPE_TIMESTAMP, 8, 1); + blockDataAppendColInfo(pSortInput, &tsCol); + SColumnInfoData pageIdCol = createColumnInfoData(TSDB_DATA_TYPE_INT, 4, 2); + blockDataAppendColInfo(pSortInput, &pageIdCol); + SColumnInfoData offsetCol = createColumnInfoData(TSDB_DATA_TYPE_INT, 4, 3); + blockDataAppendColInfo(pSortInput, &offsetCol); + pHandle->pDataBlock = pSortInput; + + SBlockOrderInfo* pOrder = taosArrayGet(pHandle->pSortInfo, 0); + SBlockOrderInfo bi = {0}; + bi.order = pOrder->order; + bi.slotId = 0; + bi.nullFirst = NULL_ORDER_FIRST; + + SArray* aOrder = taosArrayInit(1, sizeof(SBlockOrderInfo)); + taosArrayPush(aOrder, &bi); + + taosArrayDestroy(pHandle->pSortInfo); + pHandle->pSortInfo = aOrder; + return; +} + +int32_t tsortSetSortByRowId(SSortHandle* pHandle, int32_t extRowsPageSize, int32_t extRowsMemSize) { + int32_t code = createDiskbasedBuf(&pHandle->pExtRowsBuf, extRowsPageSize, extRowsMemSize, "sort-ext-rows", tsTempDir); + pHandle->extRowsPageSize = extRowsPageSize; + pHandle->extRowsMemSize = extRowsMemSize; + SBlockOrderInfo* pOrder = taosArrayGet(pHandle->pSortInfo, 0); + pHandle->extRowsOrderInfo = *pOrder; + initRowIdSort(pHandle); + pHandle->bSortByRowId = true; + return code; +} + typedef struct SBlkMergeSupport { int64_t** aTs; int32_t* aRowIdx; @@ -919,7 +1152,7 @@ static int32_t getPageBufIncForRow(SSDataBlock* blk, int32_t row, int32_t rowIdx return sz; } -static int32_t sortBlocksToExtSource(SSortHandle* pHandle, SArray* aBlk, SBlockOrderInfo* order, SArray* aExtSrc) { +static int32_t sortBlocksToExtSource(SSortHandle* pHandle, SArray* aBlk, SArray* aExtSrc) { int32_t code = TSDB_CODE_SUCCESS; int pgHeaderSz = sizeof(int32_t) + sizeof(int32_t) * taosArrayGetSize(pHandle->pDataBlock->pDataBlock); int32_t rowCap = blockDataGetCapacityInRow(pHandle->pDataBlock, pHandle->pageSize, pgHeaderSz); @@ -927,13 +1160,15 @@ static int32_t sortBlocksToExtSource(SSortHandle* pHandle, SArray* aBlk, SBlockO blockDataCleanup(pHandle->pDataBlock); int32_t numBlks = taosArrayGetSize(aBlk); + SBlockOrderInfo* pOrigBlockOrder = (!pHandle->bSortByRowId) ? taosArrayGet(pHandle->pSortInfo, 0) : &pHandle->extRowsOrderInfo; + SBlockOrderInfo* pHandleBlockOrder = taosArrayGet(pHandle->pSortInfo, 0); SBlkMergeSupport sup; sup.aRowIdx = taosMemoryCalloc(numBlks, sizeof(int32_t)); sup.aTs = taosMemoryCalloc(numBlks, sizeof(int64_t*)); - sup.order = order->order; + sup.order = pOrigBlockOrder->order; for (int i = 0; i < numBlks; ++i) { SSDataBlock* blk = taosArrayGetP(aBlk, i); - SColumnInfoData* col = taosArrayGet(blk->pDataBlock, order->slotId); + SColumnInfoData* col = taosArrayGet(blk->pDataBlock, pOrigBlockOrder->slotId); sup.aTs[i] = (int64_t*)col->pData; sup.aRowIdx[i] = 0; } @@ -958,8 +1193,8 @@ static int32_t sortBlocksToExtSource(SSortHandle* pHandle, SArray* aBlk, SBlockO int32_t nMergedRows = 0; bool mergeLimitReached = false; size_t blkPgSz = pgHeaderSz; - int64_t lastPageBufTs = (order->order == TSDB_ORDER_ASC) ? INT64_MAX : INT64_MIN; - int64_t currTs = (order->order == TSDB_ORDER_ASC) ? INT64_MAX : INT64_MIN; + int64_t lastPageBufTs = (pHandleBlockOrder->order == TSDB_ORDER_ASC) ? INT64_MAX : INT64_MIN; + int64_t currTs = (pHandleBlockOrder->order == TSDB_ORDER_ASC) ? INT64_MAX : INT64_MIN; while (nRows < totalRows) { int32_t minIdx = tMergeTreeGetChosenIndex(pTree); SSDataBlock* minBlk = taosArrayGetP(aBlk, minIdx); @@ -967,7 +1202,7 @@ static int32_t sortBlocksToExtSource(SSortHandle* pHandle, SArray* aBlk, SBlockO int32_t bufInc = getPageBufIncForRow(minBlk, minRow, pHandle->pDataBlock->info.rows); if (blkPgSz <= pHandle->pageSize && blkPgSz + bufInc > pHandle->pageSize) { - SColumnInfoData* tsCol = taosArrayGet(pHandle->pDataBlock->pDataBlock, order->slotId); + SColumnInfoData* tsCol = taosArrayGet(pHandle->pDataBlock->pDataBlock, pHandleBlockOrder->slotId); lastPageBufTs = ((int64_t*)tsCol->pData)[pHandle->pDataBlock->info.rows - 1]; appendDataBlockToPageBuf(pHandle, pHandle->pDataBlock, aPgId); nMergedRows += pHandle->pDataBlock->info.rows; @@ -977,15 +1212,19 @@ static int32_t sortBlocksToExtSource(SSortHandle* pHandle, SArray* aBlk, SBlockO if ((pHandle->mergeLimit != -1) && (nMergedRows >= pHandle->mergeLimit)) { mergeLimitReached = true; - if ((lastPageBufTs < pHandle->currMergeLimitTs && order->order == TSDB_ORDER_ASC) || - (lastPageBufTs > pHandle->currMergeLimitTs && order->order == TSDB_ORDER_DESC)) { + if ((lastPageBufTs < pHandle->currMergeLimitTs && pHandleBlockOrder->order == TSDB_ORDER_ASC) || + (lastPageBufTs > pHandle->currMergeLimitTs && pHandleBlockOrder->order == TSDB_ORDER_DESC)) { pHandle->currMergeLimitTs = lastPageBufTs; } break; } } blockDataEnsureCapacity(pHandle->pDataBlock, pHandle->pDataBlock->info.rows + 1); - appendOneRowToDataBlock(pHandle->pDataBlock, minBlk, &minRow); + if (!pHandle->bSortByRowId) { + appendOneRowToDataBlock(pHandle->pDataBlock, minBlk, &minRow); + } else { + appendToRowIndexDataBlock(pHandle, minBlk, &minRow); + } blkPgSz += bufInc; ++nRows; @@ -999,14 +1238,14 @@ static int32_t sortBlocksToExtSource(SSortHandle* pHandle, SArray* aBlk, SBlockO } if (pHandle->pDataBlock->info.rows > 0) { if (!mergeLimitReached) { - SColumnInfoData* tsCol = taosArrayGet(pHandle->pDataBlock->pDataBlock, order->slotId); + SColumnInfoData* tsCol = taosArrayGet(pHandle->pDataBlock->pDataBlock, pHandleBlockOrder->slotId); lastPageBufTs = ((int64_t*)tsCol->pData)[pHandle->pDataBlock->info.rows - 1]; appendDataBlockToPageBuf(pHandle, pHandle->pDataBlock, aPgId); nMergedRows += pHandle->pDataBlock->info.rows; if ((pHandle->mergeLimit != -1) && (nMergedRows >= pHandle->mergeLimit)) { mergeLimitReached = true; - if ((lastPageBufTs < pHandle->currMergeLimitTs && order->order == TSDB_ORDER_ASC) || - (lastPageBufTs > pHandle->currMergeLimitTs && order->order == TSDB_ORDER_DESC)) { + if ((lastPageBufTs < pHandle->currMergeLimitTs && pHandleBlockOrder->order == TSDB_ORDER_ASC) || + (lastPageBufTs > pHandle->currMergeLimitTs && pHandleBlockOrder->order == TSDB_ORDER_DESC)) { pHandle->currMergeLimitTs = lastPageBufTs; } } @@ -1025,7 +1264,6 @@ static int32_t sortBlocksToExtSource(SSortHandle* pHandle, SArray* aBlk, SBlockO } static int32_t createBlocksMergeSortInitialSources(SSortHandle* pHandle) { - SBlockOrderInfo* pOrder = taosArrayGet(pHandle->pSortInfo, 0); size_t nSrc = taosArrayGetSize(pHandle->pOrderedSource); SArray* aExtSrc = taosArrayInit(nSrc, POINTER_BYTES); @@ -1040,7 +1278,8 @@ static int32_t createBlocksMergeSortInitialSources(SSortHandle* pHandle) { SSortSource* pSrc = taosArrayGetP(pHandle->pOrderedSource, 0); int32_t szSort = 0; - if (pOrder->order == TSDB_ORDER_ASC) { + SBlockOrderInfo* pOrigOrder = (!pHandle->bSortByRowId) ? taosArrayGet(pHandle->pSortInfo, 0) : &pHandle->extRowsOrderInfo; + if (pOrigOrder->order == TSDB_ORDER_ASC) { pHandle->currMergeLimitTs = INT64_MAX; } else { pHandle->currMergeLimitTs = INT64_MIN; @@ -1051,10 +1290,10 @@ static int32_t createBlocksMergeSortInitialSources(SSortHandle* pHandle) { while (1) { SSDataBlock* pBlk = pHandle->fetchfp(pSrc->param); if (pBlk != NULL) { - SColumnInfoData* tsCol = taosArrayGet(pBlk->pDataBlock, pOrder->slotId); + SColumnInfoData* tsCol = taosArrayGet(pBlk->pDataBlock, pOrigOrder->slotId); int64_t firstRowTs = *(int64_t*)tsCol->pData; - if ((pOrder->order == TSDB_ORDER_ASC && firstRowTs > pHandle->currMergeLimitTs) || - (pOrder->order == TSDB_ORDER_DESC && firstRowTs < pHandle->currMergeLimitTs)) { + if ((pOrigOrder->order == TSDB_ORDER_ASC && firstRowTs > pHandle->currMergeLimitTs) || + (pOrigOrder->order == TSDB_ORDER_DESC && firstRowTs < pHandle->currMergeLimitTs)) { continue; } } @@ -1076,7 +1315,7 @@ static int32_t createBlocksMergeSortInitialSources(SSortHandle* pHandle) { tSimpleHashClear(mUidBlk); int64_t p = taosGetTimestampUs(); - code = sortBlocksToExtSource(pHandle, aBlkSort, pOrder, aExtSrc); + code = sortBlocksToExtSource(pHandle, aBlkSort, aExtSrc); if (code != TSDB_CODE_SUCCESS) { tSimpleHashCleanup(mUidBlk); taosArrayDestroy(aBlkSort); From c9b2bb714d77470fe81a5ee31fd2a2b0faa0163a Mon Sep 17 00:00:00 2001 From: slzhou Date: Wed, 3 Jan 2024 14:39:06 +0800 Subject: [PATCH 25/58] fix: bugs during self-test --- source/libs/executor/src/tsort.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/source/libs/executor/src/tsort.c b/source/libs/executor/src/tsort.c index 0b3a7c180e..714b268008 100644 --- a/source/libs/executor/src/tsort.c +++ b/source/libs/executor/src/tsort.c @@ -309,11 +309,11 @@ void tsortDestroySortHandle(SSortHandle* pSortHandle) { qDebug("all source fetch time: %" PRId64 "us num:%" PRId64 " %s", fetchUs, fetchNum, pSortHandle->idStr); taosArrayDestroy(pSortHandle->pOrderedSource); - taosMemoryFreeClear(pSortHandle); if (pSortHandle->pExtRowsBuf != NULL) { destroyDiskbasedBuf(pSortHandle->pExtRowsBuf); } - taosArrayDestroy(pSortHandle->pSortInfo); + taosArrayDestroy(pSortHandle->pSortInfo); + taosMemoryFreeClear(pSortHandle); } int32_t tsortAddSource(SSortHandle* pSortHandle, void* pSource) { @@ -1072,6 +1072,7 @@ static void initRowIdSort(SSortHandle* pHandle) { int32_t tsortSetSortByRowId(SSortHandle* pHandle, int32_t extRowsPageSize, int32_t extRowsMemSize) { int32_t code = createDiskbasedBuf(&pHandle->pExtRowsBuf, extRowsPageSize, extRowsMemSize, "sort-ext-rows", tsTempDir); + dBufSetPrintInfo(pHandle->pExtRowsBuf); pHandle->extRowsPageSize = extRowsPageSize; pHandle->extRowsMemSize = extRowsMemSize; SBlockOrderInfo* pOrder = taosArrayGet(pHandle->pSortInfo, 0); @@ -1267,7 +1268,7 @@ static int32_t createBlocksMergeSortInitialSources(SSortHandle* pHandle) { size_t nSrc = taosArrayGetSize(pHandle->pOrderedSource); SArray* aExtSrc = taosArrayInit(nSrc, POINTER_BYTES); - size_t maxBufSize = pHandle->numOfPages * pHandle->pageSize; + size_t maxBufSize = (pHandle->bSortByRowId) ? pHandle->extRowsMemSize : (pHandle->numOfPages * pHandle->pageSize); int32_t code = createPageBuf(pHandle); if (code != TSDB_CODE_SUCCESS) { From fb732be16dc9b48db8a969f9a04b194ed212d546 Mon Sep 17 00:00:00 2001 From: slzhou Date: Wed, 3 Jan 2024 15:09:24 +0800 Subject: [PATCH 26/58] fix: change pageSize and buf size --- source/libs/executor/src/tsort.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/source/libs/executor/src/tsort.c b/source/libs/executor/src/tsort.c index 714b268008..59146f85a5 100644 --- a/source/libs/executor/src/tsort.c +++ b/source/libs/executor/src/tsort.c @@ -1045,7 +1045,6 @@ static void appendToRowIndexDataBlock(SSortHandle* pHandle, SSDataBlock* pSource } static void initRowIdSort(SSortHandle* pHandle) { - blockDataDestroy(pHandle->pDataBlock); SSDataBlock* pSortInput = createDataBlock(); SColumnInfoData tsCol = createColumnInfoData(TSDB_DATA_TYPE_TIMESTAMP, 8, 1); @@ -1054,7 +1053,14 @@ static void initRowIdSort(SSortHandle* pHandle) { blockDataAppendColInfo(pSortInput, &pageIdCol); SColumnInfoData offsetCol = createColumnInfoData(TSDB_DATA_TYPE_INT, 4, 3); blockDataAppendColInfo(pSortInput, &offsetCol); + + blockDataDestroy(pHandle->pDataBlock); pHandle->pDataBlock = pSortInput; + + int32_t rowSize = blockDataGetRowSize(pHandle->pDataBlock); + size_t nCols = taosArrayGetSize(pHandle->pDataBlock->pDataBlock); + pHandle->pageSize = getProperSortPageSize(rowSize, nCols); + pHandle->numOfPages = 2048; SBlockOrderInfo* pOrder = taosArrayGet(pHandle->pSortInfo, 0); SBlockOrderInfo bi = {0}; From 06fd7ee96ff9e868f700127a76eba4fddf4b1dec Mon Sep 17 00:00:00 2001 From: slzhou Date: Wed, 3 Jan 2024 15:34:38 +0800 Subject: [PATCH 27/58] fix: use the correct block to calc page size --- source/libs/executor/src/tsort.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/source/libs/executor/src/tsort.c b/source/libs/executor/src/tsort.c index 59146f85a5..21c9aeeff3 100644 --- a/source/libs/executor/src/tsort.c +++ b/source/libs/executor/src/tsort.c @@ -1056,7 +1056,7 @@ static void initRowIdSort(SSortHandle* pHandle) { blockDataDestroy(pHandle->pDataBlock); pHandle->pDataBlock = pSortInput; - + int32_t rowSize = blockDataGetRowSize(pHandle->pDataBlock); size_t nCols = taosArrayGetSize(pHandle->pDataBlock->pDataBlock); pHandle->pageSize = getProperSortPageSize(rowSize, nCols); @@ -1206,7 +1206,8 @@ static int32_t sortBlocksToExtSource(SSortHandle* pHandle, SArray* aBlk, SArray* int32_t minIdx = tMergeTreeGetChosenIndex(pTree); SSDataBlock* minBlk = taosArrayGetP(aBlk, minIdx); int32_t minRow = sup.aRowIdx[minIdx]; - int32_t bufInc = getPageBufIncForRow(minBlk, minRow, pHandle->pDataBlock->info.rows); + SSDataBlock* incBlock = (pHandle->bSortByRowId) ? pHandle->pDataBlock : minBlk; + int32_t bufInc = getPageBufIncForRow(incBlock, minRow, pHandle->pDataBlock->info.rows); if (blkPgSz <= pHandle->pageSize && blkPgSz + bufInc > pHandle->pageSize) { SColumnInfoData* tsCol = taosArrayGet(pHandle->pDataBlock->pDataBlock, pHandleBlockOrder->slotId); @@ -1215,7 +1216,8 @@ static int32_t sortBlocksToExtSource(SSortHandle* pHandle, SArray* aBlk, SArray* nMergedRows += pHandle->pDataBlock->info.rows; blockDataCleanup(pHandle->pDataBlock); blkPgSz = pgHeaderSz; - bufInc = getPageBufIncForRow(minBlk, minRow, 0); + incBlock = (pHandle->bSortByRowId) ? pHandle->pDataBlock : minBlk; + bufInc = getPageBufIncForRow(incBlock, minRow, 0); if ((pHandle->mergeLimit != -1) && (nMergedRows >= pHandle->mergeLimit)) { mergeLimitReached = true; From 0f9be1033ac8fcbaafc4b854cfa3785a77f22b52 Mon Sep 17 00:00:00 2001 From: slzhou Date: Fri, 5 Jan 2024 19:22:17 +0800 Subject: [PATCH 28/58] fix: use page size of sort --- source/libs/executor/src/scanoperator.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/source/libs/executor/src/scanoperator.c b/source/libs/executor/src/scanoperator.c index 36e810bd23..b3e6c64953 100644 --- a/source/libs/executor/src/scanoperator.c +++ b/source/libs/executor/src/scanoperator.c @@ -3429,9 +3429,9 @@ int32_t startDurationForGroupTableMergeScan(SOperatorInfo* pOperator) { pInfo->pSortHandle = tsortCreateSortHandle(pInfo->pSortInfo, SORT_BLOCK_TS_MERGE, pInfo->bufPageSize, numOfBufPage, pInfo->pSortInputBlock, pTaskInfo->id.str, 0, 0, 0); int32_t memSize = 512 * 1024 * 1024; - int32_t rowBytes = blockDataGetRowSize(pInfo->pResBlock) + taosArrayGetSize(pInfo->pResBlock->pDataBlock) + sizeof(int32_t); - int32_t pageSize = TMAX(memSize/numOfTable, rowBytes); - tsortSetSortByRowId(pInfo->pSortHandle, pageSize, memSize); + // int32_t rowBytes = blockDataGetRowSize(pInfo->pResBlock) + taosArrayGetSize(pInfo->pResBlock->pDataBlock) + sizeof(int32_t); + // int32_t pageSize = TMAX(memSize/numOfTable, rowBytes); + tsortSetSortByRowId(pInfo->pSortHandle, pInfo->bufPageSize, memSize); } else { pInfo->pSortHandle = tsortCreateSortHandle(pInfo->pSortInfo, SORT_BLOCK_TS_MERGE, pInfo->bufPageSize, numOfBufPage, pInfo->pSortInputBlock, pTaskInfo->id.str, 0, 0, 0); From 7c58a9a280a1df92ab1361b814d1a049554c09f7 Mon Sep 17 00:00:00 2001 From: slzhou Date: Mon, 8 Jan 2024 08:21:27 +0800 Subject: [PATCH 29/58] fix: disable sort by row id when num of tables is 1 --- source/libs/executor/src/scanoperator.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/libs/executor/src/scanoperator.c b/source/libs/executor/src/scanoperator.c index b3e6c64953..76d63e8896 100644 --- a/source/libs/executor/src/scanoperator.c +++ b/source/libs/executor/src/scanoperator.c @@ -3425,7 +3425,7 @@ int32_t startDurationForGroupTableMergeScan(SOperatorInfo* pOperator) { pInfo->sortBufSize = 2048 * pInfo->bufPageSize; int32_t numOfBufPage = pInfo->sortBufSize / pInfo->bufPageSize; - if (pInfo->bSortRowId) { + if (pInfo->bSortRowId && numOfTable != 1) { pInfo->pSortHandle = tsortCreateSortHandle(pInfo->pSortInfo, SORT_BLOCK_TS_MERGE, pInfo->bufPageSize, numOfBufPage, pInfo->pSortInputBlock, pTaskInfo->id.str, 0, 0, 0); int32_t memSize = 512 * 1024 * 1024; From b27bf263af954db61f835a998a3620f3fe284a15 Mon Sep 17 00:00:00 2001 From: slzhou Date: Tue, 30 Jan 2024 10:26:24 +0800 Subject: [PATCH 30/58] fix: calcualte row bytes once --- source/libs/executor/src/tsort.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/source/libs/executor/src/tsort.c b/source/libs/executor/src/tsort.c index 45dfa2e026..4e5cc37d99 100644 --- a/source/libs/executor/src/tsort.c +++ b/source/libs/executor/src/tsort.c @@ -78,6 +78,7 @@ struct SSortHandle { bool bSortByRowId; SDiskbasedBuf* pExtRowsBuf; + int32_t extRowBytes; int32_t extRowsPageSize; int32_t extRowsMemSize; int32_t srcTsSlotId; @@ -1006,7 +1007,7 @@ static int32_t blockRowToBuf(SSDataBlock* pBlock, int32_t rowIdx, char* buf) { static int32_t saveBlockRowToExtRowsBuf(SSortHandle* pHandle, SSDataBlock* pBlock, int32_t rowIdx, int32_t* pPageId, int32_t* pOffset, int32_t* pLength) { SDiskbasedBuf* pResultBuf = pHandle->pExtRowsBuf; - int32_t rowBytes = blockDataGetRowSize(pBlock) + taosArrayGetSize(pBlock->pDataBlock) + sizeof(int32_t); + int32_t rowBytes = pHandle->extRowBytes; int32_t pageId = -1; SFilePage* pFilePage = NULL; int32_t code = getPageFromExtSrcRowsBuf(pResultBuf, rowBytes, &pageId, &pFilePage); @@ -1082,6 +1083,7 @@ static void initRowIdSort(SSortHandle* pHandle) { int32_t tsortSetSortByRowId(SSortHandle* pHandle, int32_t extRowsPageSize, int32_t extRowsMemSize) { int32_t code = createDiskbasedBuf(&pHandle->pExtRowsBuf, extRowsPageSize, extRowsMemSize, "sort-ext-rows", tsTempDir); dBufSetPrintInfo(pHandle->pExtRowsBuf); + pHandle->extRowBytes = blockDataGetRowSize(pHandle->pDataBlock) + taosArrayGetSize(pHandle->pDataBlock->pDataBlock) + sizeof(int32_t); pHandle->extRowsPageSize = extRowsPageSize; pHandle->extRowsMemSize = extRowsMemSize; SBlockOrderInfo* pOrder = taosArrayGet(pHandle->pSortInfo, 0); From 3facbd43dbf961b673b804d9e8132615424c5dbd Mon Sep 17 00:00:00 2001 From: slzhou Date: Wed, 31 Jan 2024 13:10:42 +0800 Subject: [PATCH 31/58] feat: first commit to optimize out tpagedbuf --- source/libs/executor/src/tsort.c | 64 ++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) diff --git a/source/libs/executor/src/tsort.c b/source/libs/executor/src/tsort.c index 4e5cc37d99..2a2d8cf2fc 100644 --- a/source/libs/executor/src/tsort.c +++ b/source/libs/executor/src/tsort.c @@ -32,6 +32,34 @@ struct STupleHandle { int32_t rowIndex; }; +typedef struct SSortMemPageEntry { + int32_t pageId; + bool active; + + void* data; + + struct SSortMemPageEntry* next; + struct SSortMemPageEntry* prev; + +} SSortMemPageEntry; + +typedef struct SSortMemFile { + int32_t pageSize; + int32_t cacheSize; + char* writeBuf; + + int32_t currPageId; + int32_t currPageOffset; + bool bDirty; + + int32_t totalMemPages; + SSortMemPageEntry* memPages; + int32_t numMemPages; + SSHashObj* mActivePages; + + TdFilePtr pBufFile; +} SSortMemFile; + struct SSortHandle { int32_t type; int32_t pageSize; @@ -78,6 +106,7 @@ struct SSortHandle { bool bSortByRowId; SDiskbasedBuf* pExtRowsBuf; + SSortMemFile* pExtRowsMemFile; int32_t extRowBytes; int32_t extRowsPageSize; int32_t extRowsMemSize; @@ -1025,6 +1054,41 @@ static int32_t saveBlockRowToExtRowsBuf(SSortHandle* pHandle, SSDataBlock* pBloc return 0; } +// pageId * pageSize == pageStartOffset in file. write in pages +// when pass the page boundaries, the page is move to the front(old). +// find hash from pageid to page entry. if the page can not be found, +// 1) nuse inactive pages, 2) then new pages if not exceeding mem limit, 3) then active pages +// new pages is added or moved to the back. + +static int32_t createSortMemFile(SSortHandle* pHandle) { + if (pHandle->pExtRowsMemFile != NULL) { + return TSDB_CODE_SUCCESS; + } + SSortMemFile* pMemFile = taosMemoryCalloc(1, sizeof(SSortMemFile)); + pMemFile->pBufFile = + taosOpenFile(pBuf->path, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_READ | TD_FILE_TRUNC | TD_FILE_AUTO_DEL); + return TSDB_CODE_SUCCESS; +} + +static int32_t saveBlockRowToExtRowsMemFile(SSortHandle* pHandle, SSDataBlock* pBlock, int32_t rowIdx, int32_t* pPageId, int32_t* pOffset, int32_t* pLength) { + SDiskbasedBuf* pResultBuf = pHandle->pExtRowsBuf; + int32_t rowBytes = pHandle->extRowBytes; + int32_t pageId = -1; + SFilePage* pFilePage = NULL; + int32_t code = getPageFromExtSrcRowsBuf(pResultBuf, rowBytes, &pageId, &pFilePage); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + + *pPageId = pageId; + *pOffset = pFilePage->num; + *pLength = blockRowToBuf(pBlock, rowIdx, (char*)pFilePage + (*pOffset)); + + pFilePage->num += (*pLength); + setBufPageDirty(pFilePage, true); + releaseBufPage(pResultBuf, pFilePage); + return 0; +} static void appendToRowIndexDataBlock(SSortHandle* pHandle, SSDataBlock* pSource, int32_t* rowIndex) { int32_t pageId = -1; From 539736cfc935c9bfbdf573ccfba01d1f9510c005 Mon Sep 17 00:00:00 2001 From: shenglian zhou Date: Sun, 4 Feb 2024 16:30:34 +0800 Subject: [PATCH 32/58] fix: first version of mem file= --- source/libs/executor/src/tsort.c | 238 +++++++++++++++++++------------ 1 file changed, 150 insertions(+), 88 deletions(-) diff --git a/source/libs/executor/src/tsort.c b/source/libs/executor/src/tsort.c index 2a2d8cf2fc..c01612675a 100644 --- a/source/libs/executor/src/tsort.c +++ b/source/libs/executor/src/tsort.c @@ -46,18 +46,20 @@ typedef struct SSortMemPageEntry { typedef struct SSortMemFile { int32_t pageSize; int32_t cacheSize; - char* writeBuf; + char* pageBuf; int32_t currPageId; int32_t currPageOffset; bool bDirty; int32_t totalMemPages; - SSortMemPageEntry* memPages; + SSortMemPageEntry* pagesHead; + SSortMemPageEntry* pagesTail; int32_t numMemPages; SSHashObj* mActivePages; - TdFilePtr pBufFile; + TdFilePtr pTdFile; + char memFilePath[PATH_MAX]; } SSortMemFile; struct SSortHandle { @@ -105,7 +107,6 @@ struct SSortHandle { void* abortCheckParam; bool bSortByRowId; - SDiskbasedBuf* pExtRowsBuf; SSortMemFile* pExtRowsMemFile; int32_t extRowBytes; int32_t extRowsPageSize; @@ -117,6 +118,11 @@ struct SSortHandle { void* mergeLimitReachedParam; }; +static int32_t destroySortMemFile(SSortHandle* pHandle); +static int32_t getPageFromExtMemFile(SSortHandle* pHandle, int32_t pageId, char** ppPage); +static void setExtMemFilePageUnused(SSortMemFile* pMemFile, int32_t pageId); +static int32_t saveLastPageToExtRowsMemFile(SSortHandle* pHandle); + void tsortSetSingleTableMerge(SSortHandle* pHandle) { pHandle->singleTableMerge = true; } @@ -342,8 +348,8 @@ void tsortDestroySortHandle(SSortHandle* pSortHandle) { qDebug("all source fetch time: %" PRId64 "us num:%" PRId64 " %s", fetchUs, fetchNum, pSortHandle->idStr); taosArrayDestroy(pSortHandle->pOrderedSource); - if (pSortHandle->pExtRowsBuf != NULL) { - destroyDiskbasedBuf(pSortHandle->pExtRowsBuf); + if (pSortHandle->pExtRowsMemFile != NULL) { + destroySortMemFile(pSortHandle); } taosArrayDestroy(pSortHandle->pSortInfo); taosMemoryFreeClear(pSortHandle); @@ -896,7 +902,9 @@ void tsortAppendTupleToBlock(SSortHandle* pHandle, SSDataBlock* pBlock, STupleHa if (pHandle->bSortByRowId) { int32_t pageId = *(int32_t*)tsortGetValue(pTupleHandle, 1); int32_t offset = *(int32_t*)tsortGetValue(pTupleHandle, 2); - void* page = getBufPage(pHandle->pExtRowsBuf, pageId); + + char* page = NULL; + getPageFromExtMemFile(pHandle, pageId, &page); int32_t numOfCols = taosArrayGetSize(pBlock->pDataBlock); char* buf = (char*)page + offset; @@ -926,11 +934,13 @@ void tsortAppendTupleToBlock(SSortHandle* pHandle, SSDataBlock* pBlock, STupleHa (int32_t)(pStart - buf)); }; - releaseBufPage(pHandle->pExtRowsBuf, page); - pBlock->info.dataLoad = 1; pBlock->info.scanFlag = ((SDataBlockInfo*)tsortGetBlockInfo(pTupleHandle))->scanFlag; pBlock->info.rows += 1; + + if (offset + pHandle->extRowBytes >= pHandle->pExtRowsMemFile->pageSize) { + setExtMemFilePageUnused(pHandle->pExtRowsMemFile, pageId); + } } else { for (int32_t i = 0; i < taosArrayGetSize(pBlock->pDataBlock); ++i) { SColumnInfoData* pColInfo = taosArrayGet(pBlock->pDataBlock, i); @@ -951,45 +961,6 @@ void tsortAppendTupleToBlock(SSortHandle* pHandle, SSDataBlock* pBlock, STupleHa } } -static int32_t getPageFromExtSrcRowsBuf(SDiskbasedBuf* pResultBuf, int32_t rowBytes, int32_t* pPageId, SFilePage** ppFilePage) { - SFilePage* pFilePage = NULL; - - int32_t pageId = -1; - SArray* list = getDataBufPagesIdList(pResultBuf); - - if (taosArrayGetSize(list) == 0) { - pFilePage = getNewBufPage(pResultBuf, &pageId); - pFilePage->num = sizeof(SFilePage); - } else { - SPageInfo* pi = getLastPageInfo(list); - pFilePage = getBufPage(pResultBuf, getPageId(pi)); - if (pFilePage == NULL) { - qError("failed to get buffer, code:%s", tstrerror(terrno)); - return terrno; - } - - pageId = getPageId(pi); - - if (pFilePage->num + rowBytes > getBufPageSize(pResultBuf)) { - releaseBufPageInfo(pResultBuf, pi); - - pFilePage = getNewBufPage(pResultBuf, &pageId); - if (pFilePage != NULL) { - pFilePage->num = sizeof(SFilePage); - } - } - } - - if (pFilePage == NULL) { - qError("failed to get buffer, code:%s", tstrerror(terrno)); - return terrno; - } - - *pPageId = pageId; - *ppFilePage = pFilePage; - return TSDB_CODE_SUCCESS; -} - static int32_t blockRowToBuf(SSDataBlock* pBlock, int32_t rowIdx, char* buf) { size_t numOfCols = taosArrayGetSize(pBlock->pDataBlock); @@ -1034,67 +1005,156 @@ static int32_t blockRowToBuf(SSDataBlock* pBlock, int32_t rowIdx, char* buf) { return (int32_t)(pStart - (char*)buf); } -static int32_t saveBlockRowToExtRowsBuf(SSortHandle* pHandle, SSDataBlock* pBlock, int32_t rowIdx, int32_t* pPageId, int32_t* pOffset, int32_t* pLength) { - SDiskbasedBuf* pResultBuf = pHandle->pExtRowsBuf; - int32_t rowBytes = pHandle->extRowBytes; - int32_t pageId = -1; - SFilePage* pFilePage = NULL; - int32_t code = getPageFromExtSrcRowsBuf(pResultBuf, rowBytes, &pageId, &pFilePage); - if (code != TSDB_CODE_SUCCESS) { - return code; - } - - *pPageId = pageId; - *pOffset = pFilePage->num; - *pLength = blockRowToBuf(pBlock, rowIdx, (char*)pFilePage + (*pOffset)); - - pFilePage->num += (*pLength); - setBufPageDirty(pFilePage, true); - releaseBufPage(pResultBuf, pFilePage); - return 0; -} - // pageId * pageSize == pageStartOffset in file. write in pages // when pass the page boundaries, the page is move to the front(old). // find hash from pageid to page entry. if the page can not be found, -// 1) nuse inactive pages, 2) then new pages if not exceeding mem limit, 3) then active pages +// 1) unused inactive pages, 2) then new pages if not exceeding mem limit, 3) then active pages // new pages is added or moved to the back. +static int32_t getPageFromExtMemFile(SSortHandle* pHandle, int32_t pageId, char** ppPage) { + SSortMemFile* pMemFile = pHandle->pExtRowsMemFile; + SSortMemPageEntry** ppPageEntry = tSimpleHashGet(pMemFile->mActivePages, &pageId, sizeof(pageId)); + if (ppPageEntry) { + *ppPage = (char*)((*ppPageEntry)->data); + } else { + SSortMemPageEntry* pEntry = pMemFile->pagesHead->next; + if (pEntry && !pEntry->active || pMemFile->numMemPages >= pMemFile->totalMemPages) { + if (pEntry->active) { + tSimpleHashRemove(pMemFile->mActivePages, &pEntry->pageId, sizeof(pEntry->pageId)); + } + pEntry->prev->next = pEntry->next; + pEntry->next->prev = pEntry->prev; + taosLSeekFile(pMemFile->pTdFile, pageId * pMemFile->pageSize, SEEK_SET); + taosReadFile(pMemFile->pTdFile, pEntry->data, pMemFile->pageSize); + pEntry->active = false; + } else if (pMemFile->numMemPages < pMemFile->totalMemPages) { + pEntry = taosMemoryCalloc(1, sizeof(SSortMemPageEntry)); + pEntry->data = taosMemoryMalloc(pMemFile->pageSize); + ++pMemFile->numMemPages; + } + { + SSortMemPageEntry* tail = pMemFile->pagesTail; + tail->next = pEntry; + pEntry->next = NULL; + pEntry->prev = tail; + pEntry->active = true; + pMemFile->pagesTail = pEntry; + tSimpleHashPut(pMemFile->mActivePages, &pageId, sizeof(pageId), &pEntry, POINTER_BYTES); + *ppPage = pEntry->data; + } + } + return TSDB_CODE_SUCCESS; +} + +static void setExtMemFilePageUnused(SSortMemFile* pMemFile, int32_t pageId) { + SSortMemPageEntry** ppPageEntry = tSimpleHashGet(pMemFile->mActivePages, &pageId, sizeof(pageId)); + SSortMemPageEntry* pEntry = *ppPageEntry; + if (pEntry == pMemFile->pagesTail) { + pMemFile->pagesTail = pEntry->prev; + } + + pEntry->prev->next = pEntry->next; + pEntry->next->prev = pEntry->prev; + + SSortMemPageEntry* first = pMemFile->pagesHead->next; + SSortMemPageEntry* head = pMemFile->pagesHead; + head->next = pEntry; + pEntry->next = first; + first->prev = pEntry; + pEntry->prev = head; + + pEntry->active = false; + tSimpleHashRemove(pMemFile->mActivePages, &pageId, sizeof(pageId)); + return; +} + static int32_t createSortMemFile(SSortHandle* pHandle) { if (pHandle->pExtRowsMemFile != NULL) { return TSDB_CODE_SUCCESS; } SSortMemFile* pMemFile = taosMemoryCalloc(1, sizeof(SSortMemFile)); - pMemFile->pBufFile = - taosOpenFile(pBuf->path, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_READ | TD_FILE_TRUNC | TD_FILE_AUTO_DEL); + + taosGetTmpfilePath(tsTempDir, "sort-ext-mem", pMemFile->memFilePath); + pMemFile->pTdFile = + taosOpenFile(pMemFile->memFilePath, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_READ | TD_FILE_TRUNC); + pMemFile->currPageId = -1; + pMemFile->currPageOffset = -1; + + pMemFile->pageSize = pHandle->extRowsPageSize; + pMemFile->cacheSize = pHandle->extRowsMemSize; + pMemFile->pageBuf = taosMemoryMalloc(pMemFile->pageSize); + pMemFile->bDirty = false; + + pMemFile->mActivePages = tSimpleHashInit(8192, taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT)); + pMemFile->pagesHead = taosMemoryCalloc(1, sizeof(SSortMemPageEntry)); + pMemFile->pagesTail = pMemFile->pagesHead; + + pMemFile->totalMemPages = pMemFile->cacheSize / pMemFile->pageSize; + pMemFile->numMemPages = 0; + + pHandle->pExtRowsMemFile = pMemFile; return TSDB_CODE_SUCCESS; } +static int32_t destroySortMemFile(SSortHandle* pHandle) { + if (pHandle->pExtRowsMemFile == NULL) return TSDB_CODE_SUCCESS; + + SSortMemFile* pMemFile = pHandle->pExtRowsMemFile; + SSortMemPageEntry* pEntry = pMemFile->pagesHead; + while (pEntry != NULL) { + if (pEntry->data) { + taosMemoryFree(pEntry->data); + } + SSortMemPageEntry* pCurr = pEntry; + pEntry = pEntry->next; + taosMemoryFree(pCurr); + } + tSimpleHashCleanup(pMemFile->mActivePages); + taosMemoryFree(pMemFile->pageBuf); + taosCloseFile(pMemFile->pTdFile); + return TSDB_CODE_SUCCESS; +} + static int32_t saveBlockRowToExtRowsMemFile(SSortHandle* pHandle, SSDataBlock* pBlock, int32_t rowIdx, int32_t* pPageId, int32_t* pOffset, int32_t* pLength) { - SDiskbasedBuf* pResultBuf = pHandle->pExtRowsBuf; - int32_t rowBytes = pHandle->extRowBytes; - int32_t pageId = -1; - SFilePage* pFilePage = NULL; - int32_t code = getPageFromExtSrcRowsBuf(pResultBuf, rowBytes, &pageId, &pFilePage); - if (code != TSDB_CODE_SUCCESS) { - return code; + SSortMemFile* pMemFile = pHandle->pExtRowsMemFile; + if (pMemFile->currPageId == -1) { + pMemFile->currPageId = 0; + pMemFile->currPageOffset = 0; + } else { + if (pMemFile->currPageOffset + pHandle->extRowBytes >= pMemFile->pageSize) { + taosLSeekFile(pMemFile->pTdFile, pMemFile->currPageId * pMemFile->pageSize, SEEK_SET); + taosWriteFile(pMemFile->pTdFile, pMemFile->pageBuf, pMemFile->currPageOffset + 1); + + ++pMemFile->currPageId; + pMemFile->currPageOffset = 0; + } } - *pPageId = pageId; - *pOffset = pFilePage->num; - *pLength = blockRowToBuf(pBlock, rowIdx, (char*)pFilePage + (*pOffset)); + *pPageId = pMemFile->currPageId; + *pOffset = pMemFile->currPageOffset; + int32_t blockLen = blockRowToBuf(pBlock, rowIdx, pMemFile->pageBuf + pMemFile->currPageOffset); + *pLength = blockLen; + pMemFile->currPageOffset += blockLen; + pMemFile->bDirty = true; + return TSDB_CODE_SUCCESS; +} - pFilePage->num += (*pLength); - setBufPageDirty(pFilePage, true); - releaseBufPage(pResultBuf, pFilePage); - return 0; +static int32_t saveLastPageToExtRowsMemFile(SSortHandle* pHandle) { + SSortMemFile* pMemFile = pHandle->pExtRowsMemFile; + if (!pMemFile->bDirty) { + return TSDB_CODE_SUCCESS; + } + taosLSeekFile(pMemFile->pTdFile, pMemFile->currPageId * pMemFile->pageSize, SEEK_SET); + taosWriteFile(pMemFile->pTdFile, pMemFile->pageBuf, pMemFile->currPageOffset + 1); + pMemFile->bDirty = false; + return TSDB_CODE_SUCCESS; } static void appendToRowIndexDataBlock(SSortHandle* pHandle, SSDataBlock* pSource, int32_t* rowIndex) { int32_t pageId = -1; int32_t offset = -1; int32_t length = -1; - saveBlockRowToExtRowsBuf(pHandle, pSource, *rowIndex, &pageId, &offset, &length); + saveBlockRowToExtRowsMemFile(pHandle, pSource, *rowIndex, &pageId, &offset, &length); SSDataBlock* pBlock = pHandle->pDataBlock; SColumnInfoData* pSrcTsCol = taosArrayGet(pSource->pDataBlock, pHandle->extRowsOrderInfo.slotId); @@ -1145,14 +1205,13 @@ static void initRowIdSort(SSortHandle* pHandle) { } int32_t tsortSetSortByRowId(SSortHandle* pHandle, int32_t extRowsPageSize, int32_t extRowsMemSize) { - int32_t code = createDiskbasedBuf(&pHandle->pExtRowsBuf, extRowsPageSize, extRowsMemSize, "sort-ext-rows", tsTempDir); - dBufSetPrintInfo(pHandle->pExtRowsBuf); pHandle->extRowBytes = blockDataGetRowSize(pHandle->pDataBlock) + taosArrayGetSize(pHandle->pDataBlock->pDataBlock) + sizeof(int32_t); pHandle->extRowsPageSize = extRowsPageSize; pHandle->extRowsMemSize = extRowsMemSize; SBlockOrderInfo* pOrder = taosArrayGet(pHandle->pSortInfo, 0); pHandle->extRowsOrderInfo = *pOrder; initRowIdSort(pHandle); + int32_t code = createSortMemFile(pHandle); pHandle->bSortByRowId = true; return code; } @@ -1346,6 +1405,9 @@ static int32_t sortBlocksToExtSource(SSortHandle* pHandle, SArray* aBlk, SArray* } blockDataCleanup(pHandle->pDataBlock); } + + saveLastPageToExtRowsMemFile(pHandle); + SSDataBlock* pMemSrcBlk = createOneDataBlock(pHandle->pDataBlock, false); doAddNewExternalMemSource(pHandle->pBuf, aExtSrc, pMemSrcBlk, &pHandle->sourceId, aPgId); From fa55a32e7761a6c277c68dea7e17797ee2bfbf8c Mon Sep 17 00:00:00 2001 From: slzhou Date: Sun, 4 Feb 2024 16:53:47 +0800 Subject: [PATCH 33/58] fix: pass compilation --- source/libs/executor/src/tsort.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/libs/executor/src/tsort.c b/source/libs/executor/src/tsort.c index c01612675a..e633af12ed 100644 --- a/source/libs/executor/src/tsort.c +++ b/source/libs/executor/src/tsort.c @@ -1111,7 +1111,7 @@ static int32_t destroySortMemFile(SSortHandle* pHandle) { } tSimpleHashCleanup(pMemFile->mActivePages); taosMemoryFree(pMemFile->pageBuf); - taosCloseFile(pMemFile->pTdFile); + taosCloseFile(&pMemFile->pTdFile); return TSDB_CODE_SUCCESS; } From 8e0f578dba5984db83c3f7b4155303bc54cdbc4b Mon Sep 17 00:00:00 2001 From: slzhou Date: Sun, 4 Feb 2024 16:57:48 +0800 Subject: [PATCH 34/58] fix: memory leak --- source/libs/executor/src/tsort.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/source/libs/executor/src/tsort.c b/source/libs/executor/src/tsort.c index e633af12ed..22c3f14b43 100644 --- a/source/libs/executor/src/tsort.c +++ b/source/libs/executor/src/tsort.c @@ -1112,6 +1112,8 @@ static int32_t destroySortMemFile(SSortHandle* pHandle) { tSimpleHashCleanup(pMemFile->mActivePages); taosMemoryFree(pMemFile->pageBuf); taosCloseFile(&pMemFile->pTdFile); + taosMemoryFree(pMemFile); + pHandle->pExtRowsMemFile = NULL; return TSDB_CODE_SUCCESS; } From 23bd2aa525801e2343ce89d36ae470787f249953 Mon Sep 17 00:00:00 2001 From: slzhou Date: Sun, 4 Feb 2024 17:51:29 +0800 Subject: [PATCH 35/58] fix: first pass --- source/libs/executor/src/tsort.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/source/libs/executor/src/tsort.c b/source/libs/executor/src/tsort.c index 22c3f14b43..3f30e3fe89 100644 --- a/source/libs/executor/src/tsort.c +++ b/source/libs/executor/src/tsort.c @@ -1024,8 +1024,6 @@ static int32_t getPageFromExtMemFile(SSortHandle* pHandle, int32_t pageId, char* } pEntry->prev->next = pEntry->next; pEntry->next->prev = pEntry->prev; - taosLSeekFile(pMemFile->pTdFile, pageId * pMemFile->pageSize, SEEK_SET); - taosReadFile(pMemFile->pTdFile, pEntry->data, pMemFile->pageSize); pEntry->active = false; } else if (pMemFile->numMemPages < pMemFile->totalMemPages) { pEntry = taosMemoryCalloc(1, sizeof(SSortMemPageEntry)); @@ -1033,6 +1031,8 @@ static int32_t getPageFromExtMemFile(SSortHandle* pHandle, int32_t pageId, char* ++pMemFile->numMemPages; } { + taosLSeekFile(pMemFile->pTdFile, pageId * pMemFile->pageSize, SEEK_SET); + taosReadFile(pMemFile->pTdFile, pEntry->data, pMemFile->pageSize); SSortMemPageEntry* tail = pMemFile->pagesTail; tail->next = pEntry; pEntry->next = NULL; From 18366934c30c5b46651d26ebd97e33da7133597b Mon Sep 17 00:00:00 2001 From: slzhou Date: Sun, 4 Feb 2024 20:16:19 +0800 Subject: [PATCH 36/58] fix: use c api --- source/libs/executor/src/tsort.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/source/libs/executor/src/tsort.c b/source/libs/executor/src/tsort.c index 3f30e3fe89..022a420c06 100644 --- a/source/libs/executor/src/tsort.c +++ b/source/libs/executor/src/tsort.c @@ -13,6 +13,8 @@ * along with this program. If not, see . */ +#define ALLOW_FORBID_FUNC + #include "query.h" #include "tcommon.h" @@ -58,7 +60,7 @@ typedef struct SSortMemFile { int32_t numMemPages; SSHashObj* mActivePages; - TdFilePtr pTdFile; + FILE* pTdFile; char memFilePath[PATH_MAX]; } SSortMemFile; @@ -1031,8 +1033,8 @@ static int32_t getPageFromExtMemFile(SSortHandle* pHandle, int32_t pageId, char* ++pMemFile->numMemPages; } { - taosLSeekFile(pMemFile->pTdFile, pageId * pMemFile->pageSize, SEEK_SET); - taosReadFile(pMemFile->pTdFile, pEntry->data, pMemFile->pageSize); + fseek(pMemFile->pTdFile, pageId * pMemFile->pageSize, SEEK_SET); + fread(pEntry->data, pMemFile->pageSize, 1, pMemFile->pTdFile); SSortMemPageEntry* tail = pMemFile->pagesTail; tail->next = pEntry; pEntry->next = NULL; @@ -1076,7 +1078,7 @@ static int32_t createSortMemFile(SSortHandle* pHandle) { taosGetTmpfilePath(tsTempDir, "sort-ext-mem", pMemFile->memFilePath); pMemFile->pTdFile = - taosOpenFile(pMemFile->memFilePath, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_READ | TD_FILE_TRUNC); + fopen(pMemFile->memFilePath, "wb+"); pMemFile->currPageId = -1; pMemFile->currPageOffset = -1; @@ -1111,7 +1113,8 @@ static int32_t destroySortMemFile(SSortHandle* pHandle) { } tSimpleHashCleanup(pMemFile->mActivePages); taosMemoryFree(pMemFile->pageBuf); - taosCloseFile(&pMemFile->pTdFile); + fclose(pMemFile->pTdFile); + taosRemoveFile(pMemFile->memFilePath); taosMemoryFree(pMemFile); pHandle->pExtRowsMemFile = NULL; return TSDB_CODE_SUCCESS; @@ -1124,8 +1127,8 @@ static int32_t saveBlockRowToExtRowsMemFile(SSortHandle* pHandle, SSDataBlock* p pMemFile->currPageOffset = 0; } else { if (pMemFile->currPageOffset + pHandle->extRowBytes >= pMemFile->pageSize) { - taosLSeekFile(pMemFile->pTdFile, pMemFile->currPageId * pMemFile->pageSize, SEEK_SET); - taosWriteFile(pMemFile->pTdFile, pMemFile->pageBuf, pMemFile->currPageOffset + 1); + fseek(pMemFile->pTdFile, pMemFile->currPageId * pMemFile->pageSize, SEEK_SET); + fwrite(pMemFile->pageBuf, pMemFile->currPageOffset + 1, 1, pMemFile->pTdFile); ++pMemFile->currPageId; pMemFile->currPageOffset = 0; @@ -1146,8 +1149,8 @@ static int32_t saveLastPageToExtRowsMemFile(SSortHandle* pHandle) { if (!pMemFile->bDirty) { return TSDB_CODE_SUCCESS; } - taosLSeekFile(pMemFile->pTdFile, pMemFile->currPageId * pMemFile->pageSize, SEEK_SET); - taosWriteFile(pMemFile->pTdFile, pMemFile->pageBuf, pMemFile->currPageOffset + 1); + fseek(pMemFile->pTdFile, pMemFile->currPageId * pMemFile->pageSize, SEEK_SET); + fwrite(pMemFile->pageBuf, pMemFile->currPageOffset + 1, 1, pMemFile->pTdFile); pMemFile->bDirty = false; return TSDB_CODE_SUCCESS; } From 247bfae03997d052d694d63de7e3a9a0df7b7649 Mon Sep 17 00:00:00 2001 From: slzhou Date: Sun, 4 Feb 2024 21:12:19 +0800 Subject: [PATCH 37/58] fix: use fseeko --- source/libs/executor/src/tsort.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/source/libs/executor/src/tsort.c b/source/libs/executor/src/tsort.c index 022a420c06..1935bb68bb 100644 --- a/source/libs/executor/src/tsort.c +++ b/source/libs/executor/src/tsort.c @@ -1127,7 +1127,7 @@ static int32_t saveBlockRowToExtRowsMemFile(SSortHandle* pHandle, SSDataBlock* p pMemFile->currPageOffset = 0; } else { if (pMemFile->currPageOffset + pHandle->extRowBytes >= pMemFile->pageSize) { - fseek(pMemFile->pTdFile, pMemFile->currPageId * pMemFile->pageSize, SEEK_SET); + fseeko(pMemFile->pTdFile, ((int64_t)pMemFile->currPageId) * pMemFile->pageSize, SEEK_SET); fwrite(pMemFile->pageBuf, pMemFile->currPageOffset + 1, 1, pMemFile->pTdFile); ++pMemFile->currPageId; @@ -1149,7 +1149,7 @@ static int32_t saveLastPageToExtRowsMemFile(SSortHandle* pHandle) { if (!pMemFile->bDirty) { return TSDB_CODE_SUCCESS; } - fseek(pMemFile->pTdFile, pMemFile->currPageId * pMemFile->pageSize, SEEK_SET); + fseeko(pMemFile->pTdFile, ((int64_t)pMemFile->currPageId) * pMemFile->pageSize, SEEK_SET); fwrite(pMemFile->pageBuf, pMemFile->currPageOffset + 1, 1, pMemFile->pTdFile); pMemFile->bDirty = false; return TSDB_CODE_SUCCESS; From 83e44f07355e09a17ed5dfa1d68d6483bfcefda8 Mon Sep 17 00:00:00 2001 From: slzhou Date: Sun, 4 Feb 2024 22:36:25 +0800 Subject: [PATCH 38/58] fix: more big batch writes than page writes --- source/libs/executor/src/tsort.c | 35 ++++++++++++++++++++++---------- 1 file changed, 24 insertions(+), 11 deletions(-) diff --git a/source/libs/executor/src/tsort.c b/source/libs/executor/src/tsort.c index 1935bb68bb..25c69c35c9 100644 --- a/source/libs/executor/src/tsort.c +++ b/source/libs/executor/src/tsort.c @@ -48,7 +48,10 @@ typedef struct SSortMemPageEntry { typedef struct SSortMemFile { int32_t pageSize; int32_t cacheSize; - char* pageBuf; + + char* writePageBuf; + int32_t startPageId; + int32_t numWritePages; int32_t currPageId; int32_t currPageOffset; @@ -123,7 +126,7 @@ struct SSortHandle { static int32_t destroySortMemFile(SSortHandle* pHandle); static int32_t getPageFromExtMemFile(SSortHandle* pHandle, int32_t pageId, char** ppPage); static void setExtMemFilePageUnused(SSortMemFile* pMemFile, int32_t pageId); -static int32_t saveLastPageToExtRowsMemFile(SSortHandle* pHandle); +static int32_t saveDirtyPagesToExtRowsMemFile(SSortHandle* pHandle); void tsortSetSingleTableMerge(SSortHandle* pHandle) { pHandle->singleTableMerge = true; @@ -1084,7 +1087,8 @@ static int32_t createSortMemFile(SSortHandle* pHandle) { pMemFile->pageSize = pHandle->extRowsPageSize; pMemFile->cacheSize = pHandle->extRowsMemSize; - pMemFile->pageBuf = taosMemoryMalloc(pMemFile->pageSize); + pMemFile->numWritePages = pMemFile->cacheSize/pMemFile->pageSize; + pMemFile->writePageBuf = taosMemoryMalloc(pMemFile->pageSize * pMemFile->numWritePages); pMemFile->bDirty = false; pMemFile->mActivePages = tSimpleHashInit(8192, taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT)); @@ -1112,7 +1116,7 @@ static int32_t destroySortMemFile(SSortHandle* pHandle) { taosMemoryFree(pCurr); } tSimpleHashCleanup(pMemFile->mActivePages); - taosMemoryFree(pMemFile->pageBuf); + taosMemoryFree(pMemFile->writePageBuf); fclose(pMemFile->pTdFile); taosRemoveFile(pMemFile->memFilePath); taosMemoryFree(pMemFile); @@ -1125,32 +1129,41 @@ static int32_t saveBlockRowToExtRowsMemFile(SSortHandle* pHandle, SSDataBlock* p if (pMemFile->currPageId == -1) { pMemFile->currPageId = 0; pMemFile->currPageOffset = 0; + pMemFile->startPageId = 0; } else { if (pMemFile->currPageOffset + pHandle->extRowBytes >= pMemFile->pageSize) { - fseeko(pMemFile->pTdFile, ((int64_t)pMemFile->currPageId) * pMemFile->pageSize, SEEK_SET); - fwrite(pMemFile->pageBuf, pMemFile->currPageOffset + 1, 1, pMemFile->pTdFile); ++pMemFile->currPageId; pMemFile->currPageOffset = 0; + + if (pMemFile->currPageId - pMemFile->startPageId >= pMemFile->numWritePages) { + fseeko(pMemFile->pTdFile, ((int64_t)pMemFile->startPageId) * pMemFile->pageSize, SEEK_SET); + fwrite(pMemFile->writePageBuf, pMemFile->pageSize * pMemFile->numWritePages, 1, pMemFile->pTdFile); + + pMemFile->startPageId = pMemFile->currPageId; + } } } *pPageId = pMemFile->currPageId; *pOffset = pMemFile->currPageOffset; - int32_t blockLen = blockRowToBuf(pBlock, rowIdx, pMemFile->pageBuf + pMemFile->currPageOffset); + int32_t offsetPages = (pMemFile->currPageId - pMemFile->startPageId) * pMemFile->pageSize; + int32_t blockLen = blockRowToBuf(pBlock, rowIdx, + pMemFile->writePageBuf + offsetPages + pMemFile->currPageOffset); *pLength = blockLen; pMemFile->currPageOffset += blockLen; pMemFile->bDirty = true; return TSDB_CODE_SUCCESS; } -static int32_t saveLastPageToExtRowsMemFile(SSortHandle* pHandle) { +static int32_t saveDirtyPagesToExtRowsMemFile(SSortHandle* pHandle) { SSortMemFile* pMemFile = pHandle->pExtRowsMemFile; if (!pMemFile->bDirty) { return TSDB_CODE_SUCCESS; } - fseeko(pMemFile->pTdFile, ((int64_t)pMemFile->currPageId) * pMemFile->pageSize, SEEK_SET); - fwrite(pMemFile->pageBuf, pMemFile->currPageOffset + 1, 1, pMemFile->pTdFile); + fseeko(pMemFile->pTdFile, ((int64_t)pMemFile->startPageId) * pMemFile->pageSize, SEEK_SET); + int32_t numWriteBytes = pMemFile->pageSize * (pMemFile->currPageId - pMemFile->startPageId) + pMemFile->currPageOffset + 1; + fwrite(pMemFile->writePageBuf, numWriteBytes, 1, pMemFile->pTdFile); pMemFile->bDirty = false; return TSDB_CODE_SUCCESS; } @@ -1411,7 +1424,7 @@ static int32_t sortBlocksToExtSource(SSortHandle* pHandle, SArray* aBlk, SArray* blockDataCleanup(pHandle->pDataBlock); } - saveLastPageToExtRowsMemFile(pHandle); + saveDirtyPagesToExtRowsMemFile(pHandle); SSDataBlock* pMemSrcBlk = createOneDataBlock(pHandle->pDataBlock, false); doAddNewExternalMemSource(pHandle->pBuf, aExtSrc, pMemSrcBlk, &pHandle->sourceId, aPgId); From 9e1446aa88fbb74340ad6f30ec6b97d990c3fa6e Mon Sep 17 00:00:00 2001 From: slzhou Date: Thu, 22 Feb 2024 10:03:29 +0800 Subject: [PATCH 39/58] fix: initialize result block --- source/libs/executor/src/scanoperator.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/source/libs/executor/src/scanoperator.c b/source/libs/executor/src/scanoperator.c index 68743a9d7a..496d2a9f43 100644 --- a/source/libs/executor/src/scanoperator.c +++ b/source/libs/executor/src/scanoperator.c @@ -4348,6 +4348,9 @@ SOperatorInfo* createTableMergeScanOperatorInfo(STableScanPhysiNode* pTableScanN pInfo->mSkipTables = NULL; } + initResultSizeInfo(&pOperator->resultInfo, 1024); + pInfo->pResBlock = createDataBlockFromDescNode(pDescNode); + blockDataEnsureCapacity(pInfo->pResBlock, pOperator->resultInfo.capacity); if (!hasLimit && blockDataGetRowSize(pInfo->pResBlock) >= 256) { pInfo->bSortRowId = true; } else { @@ -4355,9 +4358,6 @@ SOperatorInfo* createTableMergeScanOperatorInfo(STableScanPhysiNode* pTableScanN } - initResultSizeInfo(&pOperator->resultInfo, 1024); - pInfo->pResBlock = createDataBlockFromDescNode(pDescNode); - blockDataEnsureCapacity(pInfo->pResBlock, pOperator->resultInfo.capacity); pInfo->pSortInfo = generateSortByTsInfo(pInfo->base.matchInfo.pList, pInfo->base.cond.order); pInfo->pReaderBlock = createOneDataBlock(pInfo->pResBlock, false); From b5bc45c73baf12de8586003ce7416fccedb10f86 Mon Sep 17 00:00:00 2001 From: slzhou Date: Thu, 22 Feb 2024 11:21:44 +0800 Subject: [PATCH 40/58] fix: save to ext mem file only when sort by row id --- source/libs/executor/src/tsort.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/source/libs/executor/src/tsort.c b/source/libs/executor/src/tsort.c index 363125c216..b313e46270 100644 --- a/source/libs/executor/src/tsort.c +++ b/source/libs/executor/src/tsort.c @@ -1423,8 +1423,9 @@ static int32_t sortBlocksToExtSource(SSortHandle* pHandle, SArray* aBlk, SArray* } blockDataCleanup(pHandle->pDataBlock); } - - saveDirtyPagesToExtRowsMemFile(pHandle); + if (pHandle->bSortByRowId) { + saveDirtyPagesToExtRowsMemFile(pHandle); + } SSDataBlock* pMemSrcBlk = createOneDataBlock(pHandle->pDataBlock, false); doAddNewExternalMemSource(pHandle->pBuf, aExtSrc, pMemSrcBlk, &pHandle->sourceId, aPgId); From 0ae2e2905d2f0a15d56da7866ec3167bd2b0050b Mon Sep 17 00:00:00 2001 From: slzhou Date: Fri, 23 Feb 2024 08:24:16 +0800 Subject: [PATCH 41/58] fix: change fseek to fseeko for 64 bit offset --- source/libs/executor/src/tsort.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/libs/executor/src/tsort.c b/source/libs/executor/src/tsort.c index b313e46270..68b3bd2c48 100644 --- a/source/libs/executor/src/tsort.c +++ b/source/libs/executor/src/tsort.c @@ -1036,7 +1036,7 @@ static int32_t getPageFromExtMemFile(SSortHandle* pHandle, int32_t pageId, char* ++pMemFile->numMemPages; } { - fseek(pMemFile->pTdFile, pageId * pMemFile->pageSize, SEEK_SET); + fseeko(pMemFile->pTdFile, ((int64_t)pageId) * pMemFile->pageSize, SEEK_SET); fread(pEntry->data, pMemFile->pageSize, 1, pMemFile->pTdFile); SSortMemPageEntry* tail = pMemFile->pagesTail; tail->next = pEntry; From 3fb6ed8a426ab08f95c06cc2279b4240696e4f07 Mon Sep 17 00:00:00 2001 From: slzhou Date: Fri, 23 Feb 2024 08:35:01 +0800 Subject: [PATCH 42/58] feat: free write buf after reading all blocks --- source/libs/executor/src/tsort.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/source/libs/executor/src/tsort.c b/source/libs/executor/src/tsort.c index 68b3bd2c48..7f44b5e433 100644 --- a/source/libs/executor/src/tsort.c +++ b/source/libs/executor/src/tsort.c @@ -127,6 +127,7 @@ static int32_t destroySortMemFile(SSortHandle* pHandle); static int32_t getPageFromExtMemFile(SSortHandle* pHandle, int32_t pageId, char** ppPage); static void setExtMemFilePageUnused(SSortMemFile* pMemFile, int32_t pageId); static int32_t saveDirtyPagesToExtRowsMemFile(SSortHandle* pHandle); +static int32_t freeExtRowMemFileWriteBuf(SSortHandle* pHandle); void tsortSetSingleTableMerge(SSortHandle* pHandle) { pHandle->singleTableMerge = true; @@ -1116,7 +1117,11 @@ static int32_t destroySortMemFile(SSortHandle* pHandle) { taosMemoryFree(pCurr); } tSimpleHashCleanup(pMemFile->mActivePages); + pMemFile->mActivePages = NULL; + taosMemoryFree(pMemFile->writePageBuf); + pMemFile->writePageBuf = NULL; + fclose(pMemFile->pTdFile); taosRemoveFile(pMemFile->memFilePath); taosMemoryFree(pMemFile); @@ -1168,6 +1173,17 @@ static int32_t saveDirtyPagesToExtRowsMemFile(SSortHandle* pHandle) { return TSDB_CODE_SUCCESS; } +static int32_t freeExtRowMemFileWriteBuf(SSortHandle* pHandle) { + SSortMemFile* pMemFile = pHandle->pExtRowsMemFile; + + if (pMemFile == NULL) return TSDB_CODE_SUCCESS; + + taosMemoryFree(pMemFile->writePageBuf); + pMemFile->writePageBuf = NULL; + taosMemoryTrim(0); + return TSDB_CODE_SUCCESS; +} + static void appendToRowIndexDataBlock(SSortHandle* pHandle, SSDataBlock* pSource, int32_t* rowIndex) { int32_t pageId = -1; int32_t offset = -1; @@ -1585,6 +1601,9 @@ static int32_t createBlocksMergeSortInitialSources(SSortHandle* pHandle) { } taosArrayDestroy(aExtSrc); tSimpleHashCleanup(mTableNumRows); + if (pHandle->bSortByRowId) { + freeExtRowMemFileWriteBuf(pHandle); + } pHandle->type = SORT_SINGLESOURCE_SORT; return TSDB_CODE_SUCCESS; } From 754a15cac8d6f42166b0bb41dfbadc116882d23d Mon Sep 17 00:00:00 2001 From: slzhou Date: Fri, 23 Feb 2024 10:55:19 +0800 Subject: [PATCH 43/58] feat: change from FILE* to TDFilePtr --- source/libs/executor/src/tsort.c | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/source/libs/executor/src/tsort.c b/source/libs/executor/src/tsort.c index 7f44b5e433..8ed255313e 100644 --- a/source/libs/executor/src/tsort.c +++ b/source/libs/executor/src/tsort.c @@ -63,7 +63,8 @@ typedef struct SSortMemFile { int32_t numMemPages; SSHashObj* mActivePages; - FILE* pTdFile; + //FILE* pTdFile; + TdFilePtr pTdFile; char memFilePath[PATH_MAX]; } SSortMemFile; @@ -1037,8 +1038,12 @@ static int32_t getPageFromExtMemFile(SSortHandle* pHandle, int32_t pageId, char* ++pMemFile->numMemPages; } { - fseeko(pMemFile->pTdFile, ((int64_t)pageId) * pMemFile->pageSize, SEEK_SET); - fread(pEntry->data, pMemFile->pageSize, 1, pMemFile->pTdFile); + // fseeko(pMemFile->pTdFile, ((int64_t)pageId) * pMemFile->pageSize, SEEK_SET); + // fread(pEntry->data, pMemFile->pageSize, 1, pMemFile->pTdFile); + + taosLSeekFile(pMemFile->pTdFile, ((int64_t)pageId) * pMemFile->pageSize, SEEK_SET); + taosReadFile(pMemFile->pTdFile, pEntry->data, pMemFile->pageSize); + SSortMemPageEntry* tail = pMemFile->pagesTail; tail->next = pEntry; pEntry->next = NULL; @@ -1082,7 +1087,7 @@ static int32_t createSortMemFile(SSortHandle* pHandle) { taosGetTmpfilePath(tsTempDir, "sort-ext-mem", pMemFile->memFilePath); pMemFile->pTdFile = - fopen(pMemFile->memFilePath, "wb+"); + taosOpenFile(pMemFile->memFilePath, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_READ | TD_FILE_TRUNC); pMemFile->currPageId = -1; pMemFile->currPageOffset = -1; @@ -1122,7 +1127,8 @@ static int32_t destroySortMemFile(SSortHandle* pHandle) { taosMemoryFree(pMemFile->writePageBuf); pMemFile->writePageBuf = NULL; - fclose(pMemFile->pTdFile); + // fclose(pMemFile->pTdFile); + taosCloseFile(&pMemFile->pTdFile); taosRemoveFile(pMemFile->memFilePath); taosMemoryFree(pMemFile); pHandle->pExtRowsMemFile = NULL; @@ -1142,9 +1148,10 @@ static int32_t saveBlockRowToExtRowsMemFile(SSortHandle* pHandle, SSDataBlock* p pMemFile->currPageOffset = 0; if (pMemFile->currPageId - pMemFile->startPageId >= pMemFile->numWritePages) { - fseeko(pMemFile->pTdFile, ((int64_t)pMemFile->startPageId) * pMemFile->pageSize, SEEK_SET); - fwrite(pMemFile->writePageBuf, pMemFile->pageSize * pMemFile->numWritePages, 1, pMemFile->pTdFile); - + // fseeko(pMemFile->pTdFile, ((int64_t)pMemFile->startPageId) * pMemFile->pageSize, SEEK_SET); + // fwrite(pMemFile->writePageBuf, pMemFile->pageSize * pMemFile->numWritePages, 1, pMemFile->pTdFile); + taosLSeekFile(pMemFile->pTdFile, ((int64_t)pMemFile->startPageId) * pMemFile->pageSize, SEEK_SET); + taosWriteFile(pMemFile->pTdFile, pMemFile->writePageBuf, pMemFile->pageSize * pMemFile->numWritePages); pMemFile->startPageId = pMemFile->currPageId; } } @@ -1166,9 +1173,11 @@ static int32_t saveDirtyPagesToExtRowsMemFile(SSortHandle* pHandle) { if (!pMemFile->bDirty) { return TSDB_CODE_SUCCESS; } - fseeko(pMemFile->pTdFile, ((int64_t)pMemFile->startPageId) * pMemFile->pageSize, SEEK_SET); + // fseeko(pMemFile->pTdFile, ((int64_t)pMemFile->startPageId) * pMemFile->pageSize, SEEK_SET); + taosLSeekFile(pMemFile->pTdFile, ((int64_t)pMemFile->startPageId) * pMemFile->pageSize, SEEK_SET); int32_t numWriteBytes = pMemFile->pageSize * (pMemFile->currPageId - pMemFile->startPageId) + pMemFile->currPageOffset + 1; - fwrite(pMemFile->writePageBuf, numWriteBytes, 1, pMemFile->pTdFile); + // fwrite(pMemFile->writePageBuf, numWriteBytes, 1, pMemFile->pTdFile); + taosWriteFile(pMemFile->pTdFile, pMemFile->writePageBuf, numWriteBytes); pMemFile->bDirty = false; return TSDB_CODE_SUCCESS; } From 830de66111f24b08d687ccc809af8b63bae9cb52 Mon Sep 17 00:00:00 2001 From: slzhou Date: Fri, 23 Feb 2024 13:55:10 +0800 Subject: [PATCH 44/58] fix: compilation error and force sort by row id for CI --- source/libs/executor/src/scanoperator.c | 2 +- source/libs/executor/src/tsort.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/source/libs/executor/src/scanoperator.c b/source/libs/executor/src/scanoperator.c index b71d63a22d..43a716a079 100644 --- a/source/libs/executor/src/scanoperator.c +++ b/source/libs/executor/src/scanoperator.c @@ -4351,7 +4351,7 @@ SOperatorInfo* createTableMergeScanOperatorInfo(STableScanPhysiNode* pTableScanN pInfo->bSortRowId = false; } - + pInfo->bSortRowId = true; pInfo->pSortInfo = generateSortByTsInfo(pInfo->base.matchInfo.pList, pInfo->base.cond.order); pInfo->pReaderBlock = createOneDataBlock(pInfo->pResBlock, false); diff --git a/source/libs/executor/src/tsort.c b/source/libs/executor/src/tsort.c index fbde32a701..e1cd8b822f 100644 --- a/source/libs/executor/src/tsort.c +++ b/source/libs/executor/src/tsort.c @@ -1543,8 +1543,8 @@ static int32_t createBlocksMergeSortInitialSources(SSortHandle* pHandle) { if (pBlk != NULL) { SColumnInfoData* tsCol = taosArrayGet(pBlk->pDataBlock, pOrigOrder->slotId); int64_t firstRowTs = *(int64_t*)tsCol->pData; - if ((pOrder->order == TSDB_ORDER_ASC && firstRowTs > pHandle->currMergeLimitTs) || - (pOrder->order == TSDB_ORDER_DESC && firstRowTs < pHandle->currMergeLimitTs)) { + if ((pOrigOrder->order == TSDB_ORDER_ASC && firstRowTs > pHandle->currMergeLimitTs) || + (pOrigOrder->order == TSDB_ORDER_DESC && firstRowTs < pHandle->currMergeLimitTs)) { if (bExtractedBlock) { blockDataDestroy(pBlk); } From dc6d96da70aceffaa68bb16077da1a52cf4ccf9f Mon Sep 17 00:00:00 2001 From: slzhou Date: Mon, 26 Feb 2024 16:45:15 +0800 Subject: [PATCH 45/58] fix: remove taosMemoryTrim when free write buf --- source/libs/executor/src/tsort.c | 1 - 1 file changed, 1 deletion(-) diff --git a/source/libs/executor/src/tsort.c b/source/libs/executor/src/tsort.c index e1cd8b822f..a779a278cd 100644 --- a/source/libs/executor/src/tsort.c +++ b/source/libs/executor/src/tsort.c @@ -1189,7 +1189,6 @@ static int32_t freeExtRowMemFileWriteBuf(SSortHandle* pHandle) { taosMemoryFree(pMemFile->writePageBuf); pMemFile->writePageBuf = NULL; - taosMemoryTrim(0); return TSDB_CODE_SUCCESS; } From d021138f3b7515feca1d27ae35f711ac7c0eb85d Mon Sep 17 00:00:00 2001 From: slzhou Date: Tue, 27 Feb 2024 09:47:55 +0800 Subject: [PATCH 46/58] remove sort by row id and comments --- source/libs/executor/src/scanoperator.c | 2 -- source/libs/executor/src/tsort.c | 6 ------ 2 files changed, 8 deletions(-) diff --git a/source/libs/executor/src/scanoperator.c b/source/libs/executor/src/scanoperator.c index 43a716a079..7f1f0700d0 100644 --- a/source/libs/executor/src/scanoperator.c +++ b/source/libs/executor/src/scanoperator.c @@ -4351,8 +4351,6 @@ SOperatorInfo* createTableMergeScanOperatorInfo(STableScanPhysiNode* pTableScanN pInfo->bSortRowId = false; } - pInfo->bSortRowId = true; - pInfo->pSortInfo = generateSortByTsInfo(pInfo->base.matchInfo.pList, pInfo->base.cond.order); pInfo->pReaderBlock = createOneDataBlock(pInfo->pResBlock, false); diff --git a/source/libs/executor/src/tsort.c b/source/libs/executor/src/tsort.c index a779a278cd..2505223273 100644 --- a/source/libs/executor/src/tsort.c +++ b/source/libs/executor/src/tsort.c @@ -1012,12 +1012,6 @@ static int32_t blockRowToBuf(SSDataBlock* pBlock, int32_t rowIdx, char* buf) { return (int32_t)(pStart - (char*)buf); } -// pageId * pageSize == pageStartOffset in file. write in pages -// when pass the page boundaries, the page is move to the front(old). -// find hash from pageid to page entry. if the page can not be found, -// 1) unused inactive pages, 2) then new pages if not exceeding mem limit, 3) then active pages -// new pages is added or moved to the back. - static int32_t getPageFromExtMemFile(SSortHandle* pHandle, int32_t pageId, char** ppPage) { SSortMemFile* pMemFile = pHandle->pExtRowsMemFile; SSortMemPageEntry** ppPageEntry = tSimpleHashGet(pMemFile->mActivePages, &pageId, sizeof(pageId)); From 4474fcdd695912c48d47671a681cd7d3406b3b6c Mon Sep 17 00:00:00 2001 From: slzhou Date: Wed, 28 Feb 2024 11:11:42 +0800 Subject: [PATCH 47/58] fix: add error processing related to files --- source/libs/executor/src/tsort.c | 49 ++++++++++++++++++++++---------- 1 file changed, 34 insertions(+), 15 deletions(-) diff --git a/source/libs/executor/src/tsort.c b/source/libs/executor/src/tsort.c index 2505223273..1d89e6d15c 100644 --- a/source/libs/executor/src/tsort.c +++ b/source/libs/executor/src/tsort.c @@ -1019,6 +1019,7 @@ static int32_t getPageFromExtMemFile(SSortHandle* pHandle, int32_t pageId, char* *ppPage = (char*)((*ppPageEntry)->data); } else { SSortMemPageEntry* pEntry = pMemFile->pagesHead->next; + bool freeEntryWhenError = false; if (pEntry && !pEntry->active || pMemFile->numMemPages >= pMemFile->totalMemPages) { if (pEntry->active) { tSimpleHashRemove(pMemFile->mActivePages, &pEntry->pageId, sizeof(pEntry->pageId)); @@ -1029,15 +1030,22 @@ static int32_t getPageFromExtMemFile(SSortHandle* pHandle, int32_t pageId, char* } else if (pMemFile->numMemPages < pMemFile->totalMemPages) { pEntry = taosMemoryCalloc(1, sizeof(SSortMemPageEntry)); pEntry->data = taosMemoryMalloc(pMemFile->pageSize); + freeEntryWhenError = true; ++pMemFile->numMemPages; } { - // fseeko(pMemFile->pTdFile, ((int64_t)pageId) * pMemFile->pageSize, SEEK_SET); - // fread(pEntry->data, pMemFile->pageSize, 1, pMemFile->pTdFile); - - taosLSeekFile(pMemFile->pTdFile, ((int64_t)pageId) * pMemFile->pageSize, SEEK_SET); - taosReadFile(pMemFile->pTdFile, pEntry->data, pMemFile->pageSize); - + int64_t ret = taosLSeekFile(pMemFile->pTdFile, ((int64_t)pageId) * pMemFile->pageSize, SEEK_SET); + if (ret == 0) { + ret = taosReadFile(pMemFile->pTdFile, pEntry->data, pMemFile->pageSize); + } + if (ret != pMemFile->pageSize) { + terrno = TAOS_SYSTEM_ERROR(errno); + if (freeEntryWhenError) { + taosMemoryFreeClear(pEntry->data); + taosMemoryFreeClear(pEntry); + } + return terrno; + } SSortMemPageEntry* tail = pMemFile->pagesTail; tail->next = pEntry; pEntry->next = NULL; @@ -1082,6 +1090,10 @@ static int32_t createSortMemFile(SSortHandle* pHandle) { taosGetTmpfilePath(tsTempDir, "sort-ext-mem", pMemFile->memFilePath); pMemFile->pTdFile = taosOpenFile(pMemFile->memFilePath, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_READ | TD_FILE_TRUNC); + if (pMemFile->pTdFile == NULL) { + taosMemoryFree(pMemFile); + return TAOS_SYSTEM_ERROR(errno); + } pMemFile->currPageId = -1; pMemFile->currPageOffset = -1; @@ -1121,7 +1133,6 @@ static int32_t destroySortMemFile(SSortHandle* pHandle) { taosMemoryFree(pMemFile->writePageBuf); pMemFile->writePageBuf = NULL; - // fclose(pMemFile->pTdFile); taosCloseFile(&pMemFile->pTdFile); taosRemoveFile(pMemFile->memFilePath); taosMemoryFree(pMemFile); @@ -1142,10 +1153,14 @@ static int32_t saveBlockRowToExtRowsMemFile(SSortHandle* pHandle, SSDataBlock* p pMemFile->currPageOffset = 0; if (pMemFile->currPageId - pMemFile->startPageId >= pMemFile->numWritePages) { - // fseeko(pMemFile->pTdFile, ((int64_t)pMemFile->startPageId) * pMemFile->pageSize, SEEK_SET); - // fwrite(pMemFile->writePageBuf, pMemFile->pageSize * pMemFile->numWritePages, 1, pMemFile->pTdFile); - taosLSeekFile(pMemFile->pTdFile, ((int64_t)pMemFile->startPageId) * pMemFile->pageSize, SEEK_SET); - taosWriteFile(pMemFile->pTdFile, pMemFile->writePageBuf, pMemFile->pageSize * pMemFile->numWritePages); + int64_t ret = taosLSeekFile(pMemFile->pTdFile, ((int64_t)pMemFile->startPageId) * pMemFile->pageSize, SEEK_SET); + if (ret == 0) { + ret = taosWriteFile(pMemFile->pTdFile, pMemFile->writePageBuf, pMemFile->pageSize * pMemFile->numWritePages); + } + if (ret != pMemFile->pageSize * pMemFile->numWritePages) { + terrno = TAOS_SYSTEM_ERROR(errno); + return terrno; + } pMemFile->startPageId = pMemFile->currPageId; } } @@ -1167,11 +1182,15 @@ static int32_t saveDirtyPagesToExtRowsMemFile(SSortHandle* pHandle) { if (!pMemFile->bDirty) { return TSDB_CODE_SUCCESS; } - // fseeko(pMemFile->pTdFile, ((int64_t)pMemFile->startPageId) * pMemFile->pageSize, SEEK_SET); - taosLSeekFile(pMemFile->pTdFile, ((int64_t)pMemFile->startPageId) * pMemFile->pageSize, SEEK_SET); + int64_t ret = taosLSeekFile(pMemFile->pTdFile, ((int64_t)pMemFile->startPageId) * pMemFile->pageSize, SEEK_SET); int32_t numWriteBytes = pMemFile->pageSize * (pMemFile->currPageId - pMemFile->startPageId) + pMemFile->currPageOffset + 1; - // fwrite(pMemFile->writePageBuf, numWriteBytes, 1, pMemFile->pTdFile); - taosWriteFile(pMemFile->pTdFile, pMemFile->writePageBuf, numWriteBytes); + if (ret == 0) { + ret = taosWriteFile(pMemFile->pTdFile, pMemFile->writePageBuf, numWriteBytes); + } + if (ret != numWriteBytes) { + terrno = TAOS_SYSTEM_ERROR(errno); + return terrno; + } pMemFile->bDirty = false; return TSDB_CODE_SUCCESS; } From 67559afcae58959b753476c2f63b178df0af0b37 Mon Sep 17 00:00:00 2001 From: slzhou Date: Wed, 28 Feb 2024 14:29:30 +0800 Subject: [PATCH 48/58] fix: check ret value of lseek --- source/libs/executor/src/tsort.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/source/libs/executor/src/tsort.c b/source/libs/executor/src/tsort.c index 1d89e6d15c..3b2915f81d 100644 --- a/source/libs/executor/src/tsort.c +++ b/source/libs/executor/src/tsort.c @@ -1035,7 +1035,7 @@ static int32_t getPageFromExtMemFile(SSortHandle* pHandle, int32_t pageId, char* } { int64_t ret = taosLSeekFile(pMemFile->pTdFile, ((int64_t)pageId) * pMemFile->pageSize, SEEK_SET); - if (ret == 0) { + if (ret >= 0) { ret = taosReadFile(pMemFile->pTdFile, pEntry->data, pMemFile->pageSize); } if (ret != pMemFile->pageSize) { @@ -1154,7 +1154,7 @@ static int32_t saveBlockRowToExtRowsMemFile(SSortHandle* pHandle, SSDataBlock* p if (pMemFile->currPageId - pMemFile->startPageId >= pMemFile->numWritePages) { int64_t ret = taosLSeekFile(pMemFile->pTdFile, ((int64_t)pMemFile->startPageId) * pMemFile->pageSize, SEEK_SET); - if (ret == 0) { + if (ret >= 0) { ret = taosWriteFile(pMemFile->pTdFile, pMemFile->writePageBuf, pMemFile->pageSize * pMemFile->numWritePages); } if (ret != pMemFile->pageSize * pMemFile->numWritePages) { @@ -1183,8 +1183,8 @@ static int32_t saveDirtyPagesToExtRowsMemFile(SSortHandle* pHandle) { return TSDB_CODE_SUCCESS; } int64_t ret = taosLSeekFile(pMemFile->pTdFile, ((int64_t)pMemFile->startPageId) * pMemFile->pageSize, SEEK_SET); - int32_t numWriteBytes = pMemFile->pageSize * (pMemFile->currPageId - pMemFile->startPageId) + pMemFile->currPageOffset + 1; - if (ret == 0) { + int32_t numWriteBytes = pMemFile->pageSize * (pMemFile->currPageId - pMemFile->startPageId + 1); + if (ret >= 0) { ret = taosWriteFile(pMemFile->pTdFile, pMemFile->writePageBuf, numWriteBytes); } if (ret != numWriteBytes) { @@ -1460,9 +1460,6 @@ static int32_t sortBlocksToExtSource(SSortHandle* pHandle, SArray* aBlk, SArray* } blockDataCleanup(pHandle->pDataBlock); } - if (pHandle->bSortByRowId) { - saveDirtyPagesToExtRowsMemFile(pHandle); - } SSDataBlock* pMemSrcBlk = createOneDataBlock(pHandle->pDataBlock, false); doAddNewExternalMemSource(pHandle->pBuf, aExtSrc, pMemSrcBlk, &pHandle->sourceId, aPgId); @@ -1631,6 +1628,7 @@ static int32_t createBlocksMergeSortInitialSources(SSortHandle* pHandle) { taosArrayDestroy(aExtSrc); tSimpleHashCleanup(mTableNumRows); if (pHandle->bSortByRowId) { + saveDirtyPagesToExtRowsMemFile(pHandle); freeExtRowMemFileWriteBuf(pHandle); } pHandle->type = SORT_SINGLESOURCE_SORT; From 2fb0c1a00f1136f2981101bb20cabd7772646cd6 Mon Sep 17 00:00:00 2001 From: slzhou Date: Wed, 28 Feb 2024 14:31:31 +0800 Subject: [PATCH 49/58] feat: check temp space availability --- source/libs/executor/src/scanoperator.c | 12 ++++++++---- source/libs/executor/src/tsort.c | 5 +++++ 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/source/libs/executor/src/scanoperator.c b/source/libs/executor/src/scanoperator.c index 7f1f0700d0..1d64b066e9 100644 --- a/source/libs/executor/src/scanoperator.c +++ b/source/libs/executor/src/scanoperator.c @@ -4015,9 +4015,10 @@ int32_t startDurationForGroupTableMergeScan(SOperatorInfo* pOperator) { pInfo->pSortHandle = tsortCreateSortHandle(pInfo->pSortInfo, SORT_BLOCK_TS_MERGE, pInfo->bufPageSize, numOfBufPage, pInfo->pSortInputBlock, pTaskInfo->id.str, 0, 0, 0); int32_t memSize = 512 * 1024 * 1024; - // int32_t rowBytes = blockDataGetRowSize(pInfo->pResBlock) + taosArrayGetSize(pInfo->pResBlock->pDataBlock) + sizeof(int32_t); - // int32_t pageSize = TMAX(memSize/numOfTable, rowBytes); - tsortSetSortByRowId(pInfo->pSortHandle, pInfo->bufPageSize, memSize); + code = tsortSetSortByRowId(pInfo->pSortHandle, pInfo->bufPageSize, memSize); + if (code != TSDB_CODE_SUCCESS) { + return code; + } } else { pInfo->pSortHandle = tsortCreateSortHandle(pInfo->pSortInfo, SORT_BLOCK_TS_MERGE, pInfo->bufPageSize, numOfBufPage, pInfo->pSortInputBlock, pTaskInfo->id.str, 0, 0, 0); @@ -4210,7 +4211,10 @@ SSDataBlock* doTableMergeScan(SOperatorInfo* pOperator) { } else { if (pInfo->bNewFilesetEvent) { stopDurationForGroupTableMergeScan(pOperator); - startDurationForGroupTableMergeScan(pOperator); + code = startDurationForGroupTableMergeScan(pOperator); + if (code != TSDB_CODE_SUCCESS) { + T_LONG_JMP(pTaskInfo->env, terrno); + } } else { // Data of this group are all dumped, let's try the next group stopGroupTableMergeScan(pOperator); diff --git a/source/libs/executor/src/tsort.c b/source/libs/executor/src/tsort.c index 3b2915f81d..506c79ebaf 100644 --- a/source/libs/executor/src/tsort.c +++ b/source/libs/executor/src/tsort.c @@ -1266,6 +1266,11 @@ int32_t tsortSetSortByRowId(SSortHandle* pHandle, int32_t extRowsPageSize, int32 SBlockOrderInfo* pOrder = taosArrayGet(pHandle->pSortInfo, 0); pHandle->extRowsOrderInfo = *pOrder; initRowIdSort(pHandle); + if (!osTempSpaceAvailable()) { + terrno = TSDB_CODE_NO_DISKSPACE; + qError("create sort mem file failed since %s, tempDir:%s", terrstr(), tsTempDir); + return terrno; + } int32_t code = createSortMemFile(pHandle); pHandle->bSortByRowId = true; return code; From 049f27bf3e52e1f1346e31182aa714d14b38ad92 Mon Sep 17 00:00:00 2001 From: slzhou Date: Thu, 29 Feb 2024 09:47:16 +0800 Subject: [PATCH 50/58] fix: use C File API --- source/libs/executor/src/tsort.c | 42 +++++++++++++++++++------------- 1 file changed, 25 insertions(+), 17 deletions(-) diff --git a/source/libs/executor/src/tsort.c b/source/libs/executor/src/tsort.c index 506c79ebaf..1ceb5403c9 100644 --- a/source/libs/executor/src/tsort.c +++ b/source/libs/executor/src/tsort.c @@ -63,8 +63,8 @@ typedef struct SSortMemFile { int32_t numMemPages; SSHashObj* mActivePages; - //FILE* pTdFile; - TdFilePtr pTdFile; + FILE* pTdFile; + // TdFilePtr pTdFile; char memFilePath[PATH_MAX]; } SSortMemFile; @@ -239,6 +239,14 @@ void destroyTuple(void* t) { } } +int tsortSeekFile(FILE* file, int64_t offset, int whence) { + #ifdef WINDOWS + return _fseeki64(file, offset, whence); + #else + return fseeko(file, offset, whence); + #endif +} + /** * * @param type @@ -1034,11 +1042,11 @@ static int32_t getPageFromExtMemFile(SSortHandle* pHandle, int32_t pageId, char* ++pMemFile->numMemPages; } { - int64_t ret = taosLSeekFile(pMemFile->pTdFile, ((int64_t)pageId) * pMemFile->pageSize, SEEK_SET); - if (ret >= 0) { - ret = taosReadFile(pMemFile->pTdFile, pEntry->data, pMemFile->pageSize); + int ret = tsortSeekFile(pMemFile->pTdFile, ((int64_t)pageId) * pMemFile->pageSize, SEEK_SET); + if (ret == 0) { + ret = fread(pEntry->data, pMemFile->pageSize, 1, pMemFile->pTdFile); } - if (ret != pMemFile->pageSize) { + if (ret != 1) { terrno = TAOS_SYSTEM_ERROR(errno); if (freeEntryWhenError) { taosMemoryFreeClear(pEntry->data); @@ -1088,8 +1096,7 @@ static int32_t createSortMemFile(SSortHandle* pHandle) { SSortMemFile* pMemFile = taosMemoryCalloc(1, sizeof(SSortMemFile)); taosGetTmpfilePath(tsTempDir, "sort-ext-mem", pMemFile->memFilePath); - pMemFile->pTdFile = - taosOpenFile(pMemFile->memFilePath, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_READ | TD_FILE_TRUNC); + pMemFile->pTdFile = fopen(pMemFile->memFilePath, "w+"); if (pMemFile->pTdFile == NULL) { taosMemoryFree(pMemFile); return TAOS_SYSTEM_ERROR(errno); @@ -1133,7 +1140,8 @@ static int32_t destroySortMemFile(SSortHandle* pHandle) { taosMemoryFree(pMemFile->writePageBuf); pMemFile->writePageBuf = NULL; - taosCloseFile(&pMemFile->pTdFile); + fclose(pMemFile->pTdFile); + pMemFile->pTdFile = NULL; taosRemoveFile(pMemFile->memFilePath); taosMemoryFree(pMemFile); pHandle->pExtRowsMemFile = NULL; @@ -1153,11 +1161,11 @@ static int32_t saveBlockRowToExtRowsMemFile(SSortHandle* pHandle, SSDataBlock* p pMemFile->currPageOffset = 0; if (pMemFile->currPageId - pMemFile->startPageId >= pMemFile->numWritePages) { - int64_t ret = taosLSeekFile(pMemFile->pTdFile, ((int64_t)pMemFile->startPageId) * pMemFile->pageSize, SEEK_SET); - if (ret >= 0) { - ret = taosWriteFile(pMemFile->pTdFile, pMemFile->writePageBuf, pMemFile->pageSize * pMemFile->numWritePages); + int ret = tsortSeekFile(pMemFile->pTdFile, ((int64_t)pMemFile->startPageId) * pMemFile->pageSize, SEEK_SET); + if (ret == 0) { + ret = fwrite(pMemFile->writePageBuf, pMemFile->pageSize * pMemFile->numWritePages, 1, pMemFile->pTdFile); } - if (ret != pMemFile->pageSize * pMemFile->numWritePages) { + if (ret != 1) { terrno = TAOS_SYSTEM_ERROR(errno); return terrno; } @@ -1182,12 +1190,12 @@ static int32_t saveDirtyPagesToExtRowsMemFile(SSortHandle* pHandle) { if (!pMemFile->bDirty) { return TSDB_CODE_SUCCESS; } - int64_t ret = taosLSeekFile(pMemFile->pTdFile, ((int64_t)pMemFile->startPageId) * pMemFile->pageSize, SEEK_SET); + int ret = tsortSeekFile(pMemFile->pTdFile, ((int64_t)pMemFile->startPageId) * pMemFile->pageSize, SEEK_SET); int32_t numWriteBytes = pMemFile->pageSize * (pMemFile->currPageId - pMemFile->startPageId + 1); - if (ret >= 0) { - ret = taosWriteFile(pMemFile->pTdFile, pMemFile->writePageBuf, numWriteBytes); + if (ret == 0) { + ret = fwrite(pMemFile->writePageBuf, numWriteBytes, 1, pMemFile->pTdFile); } - if (ret != numWriteBytes) { + if (ret != 1) { terrno = TAOS_SYSTEM_ERROR(errno); return terrno; } From b6095261f238e486c0530dec0bd3674c19b47daa Mon Sep 17 00:00:00 2001 From: shenglian zhou Date: Fri, 1 Mar 2024 17:05:24 +0800 Subject: [PATCH 51/58] feat: ordered region and blocks --- source/libs/executor/inc/tsort.h | 2 +- source/libs/executor/src/scanoperator.c | 2 +- source/libs/executor/src/tsort.c | 347 +++++++++++------------- 3 files changed, 166 insertions(+), 185 deletions(-) diff --git a/source/libs/executor/inc/tsort.h b/source/libs/executor/inc/tsort.h index b4e0c70f31..ca799673ea 100644 --- a/source/libs/executor/inc/tsort.h +++ b/source/libs/executor/inc/tsort.h @@ -194,7 +194,7 @@ void tsortSetClosed(SSortHandle* pHandle); void tsortSetSingleTableMerge(SSortHandle* pHandle); void tsortSetAbortCheckFn(SSortHandle* pHandle, bool (*checkFn)(void* param), void* param); -int32_t tsortSetSortByRowId(SSortHandle* pHandle, int32_t extRowsPageSize, int32_t extRowsSize); +int32_t tsortSetSortByRowId(SSortHandle* pHandle, int32_t extRowsSize); void tsortAppendTupleToBlock(SSortHandle* pHandle, SSDataBlock* pBlock, STupleHandle* pTupleHandle); /** diff --git a/source/libs/executor/src/scanoperator.c b/source/libs/executor/src/scanoperator.c index 1d64b066e9..1636cd21f0 100644 --- a/source/libs/executor/src/scanoperator.c +++ b/source/libs/executor/src/scanoperator.c @@ -4015,7 +4015,7 @@ int32_t startDurationForGroupTableMergeScan(SOperatorInfo* pOperator) { pInfo->pSortHandle = tsortCreateSortHandle(pInfo->pSortInfo, SORT_BLOCK_TS_MERGE, pInfo->bufPageSize, numOfBufPage, pInfo->pSortInputBlock, pTaskInfo->id.str, 0, 0, 0); int32_t memSize = 512 * 1024 * 1024; - code = tsortSetSortByRowId(pInfo->pSortHandle, pInfo->bufPageSize, memSize); + code = tsortSetSortByRowId(pInfo->pSortHandle, memSize); if (code != TSDB_CODE_SUCCESS) { return code; } diff --git a/source/libs/executor/src/tsort.c b/source/libs/executor/src/tsort.c index 1ceb5403c9..38ab506918 100644 --- a/source/libs/executor/src/tsort.c +++ b/source/libs/executor/src/tsort.c @@ -45,23 +45,27 @@ typedef struct SSortMemPageEntry { } SSortMemPageEntry; -typedef struct SSortMemFile { - int32_t pageSize; - int32_t cacheSize; - - char* writePageBuf; - int32_t startPageId; - int32_t numWritePages; - - int32_t currPageId; - int32_t currPageOffset; - bool bDirty; +typedef struct SSortMemFileRegion { + int64_t fileOffset; + int32_t regionSize; - int32_t totalMemPages; - SSortMemPageEntry* pagesHead; - SSortMemPageEntry* pagesTail; - int32_t numMemPages; - SSHashObj* mActivePages; + int32_t bufRegOffset; + int32_t bufLen; + char* buf; +} SSortMemFileRegion; + +typedef struct SSortMemFile { + char* writeBuf; + int32_t writeBufSize; + int64_t writeFileOffset; + + int32_t currRegionId; + int32_t currRegionOffset; + bool bRegionDirty; + + SArray* aFileRegions; + int32_t cacheSize; + int32_t blockSize; FILE* pTdFile; // TdFilePtr pTdFile; @@ -125,11 +129,8 @@ struct SSortHandle { }; static int32_t destroySortMemFile(SSortHandle* pHandle); -static int32_t getPageFromExtMemFile(SSortHandle* pHandle, int32_t pageId, char** ppPage); -static void setExtMemFilePageUnused(SSortMemFile* pMemFile, int32_t pageId); -static int32_t saveDirtyPagesToExtRowsMemFile(SSortHandle* pHandle); -static int32_t freeExtRowMemFileWriteBuf(SSortHandle* pHandle); - +static int32_t getRowBufFromExtMemFile(SSortHandle* pHandle, int32_t regionId, int32_t tupleOffset, int32_t rowLen, + char** ppRow, bool* pFreeRow); void tsortSetSingleTableMerge(SSortHandle* pHandle) { pHandle->singleTableMerge = true; } @@ -915,14 +916,14 @@ static int32_t createPageBuf(SSortHandle* pHandle) { void tsortAppendTupleToBlock(SSortHandle* pHandle, SSDataBlock* pBlock, STupleHandle* pTupleHandle) { if (pHandle->bSortByRowId) { - int32_t pageId = *(int32_t*)tsortGetValue(pTupleHandle, 1); + int32_t regionId = *(int32_t*)tsortGetValue(pTupleHandle, 1); int32_t offset = *(int32_t*)tsortGetValue(pTupleHandle, 2); + int32_t length = *(int32_t*)tsortGetValue(pTupleHandle, 3); - char* page = NULL; - getPageFromExtMemFile(pHandle, pageId, &page); - + char* buf = NULL; + bool bFreeRow = false; + getRowBufFromExtMemFile(pHandle, regionId, offset, length, &buf, &bFreeRow); int32_t numOfCols = taosArrayGetSize(pBlock->pDataBlock); - char* buf = (char*)page + offset; char* isNull = (char*)buf; char* pStart = (char*)buf + sizeof(int8_t) * numOfCols; for (int32_t i = 0; i < numOfCols; ++i) { @@ -943,7 +944,9 @@ void tsortAppendTupleToBlock(SSortHandle* pHandle, SSDataBlock* pBlock, STupleHa colDataSetNULL(pColInfo, pBlock->info.rows); } } - + if (bFreeRow) { + taosMemoryFree(buf); + } if (*(int32_t*)pStart != pStart - buf) { qError("table merge scan row buf deserialization. length error %d != %d ", *(int32_t*)pStart, (int32_t)(pStart - buf)); @@ -953,9 +956,6 @@ void tsortAppendTupleToBlock(SSortHandle* pHandle, SSDataBlock* pBlock, STupleHa pBlock->info.scanFlag = ((SDataBlockInfo*)tsortGetBlockInfo(pTupleHandle))->scanFlag; pBlock->info.rows += 1; - if (offset + pHandle->extRowBytes >= pHandle->pExtRowsMemFile->pageSize) { - setExtMemFilePageUnused(pHandle->pExtRowsMemFile, pageId); - } } else { for (int32_t i = 0; i < taosArrayGetSize(pBlock->pDataBlock); ++i) { SColumnInfoData* pColInfo = taosArrayGet(pBlock->pDataBlock, i); @@ -1020,102 +1020,60 @@ static int32_t blockRowToBuf(SSDataBlock* pBlock, int32_t rowIdx, char* buf) { return (int32_t)(pStart - (char*)buf); } -static int32_t getPageFromExtMemFile(SSortHandle* pHandle, int32_t pageId, char** ppPage) { +static int32_t getRowBufFromExtMemFile(SSortHandle* pHandle, int32_t regionId, int32_t tupleOffset, int32_t rowLen, + char** ppRow, bool* pFreeRow) { SSortMemFile* pMemFile = pHandle->pExtRowsMemFile; - SSortMemPageEntry** ppPageEntry = tSimpleHashGet(pMemFile->mActivePages, &pageId, sizeof(pageId)); - if (ppPageEntry) { - *ppPage = (char*)((*ppPageEntry)->data); + SSortMemFileRegion* pRegion = taosArrayGet(pMemFile->aFileRegions, regionId); + if (pRegion->buf == NULL) { + pRegion->bufRegOffset = 0; + pRegion->buf = taosMemoryMalloc(pMemFile->blockSize); + tsortSeekFile(pMemFile->pTdFile, pRegion->fileOffset, SEEK_SET); + int32_t readBytes = TMIN(pMemFile->blockSize, pRegion->regionSize); + fread(pRegion->buf, readBytes, 1, pMemFile->pTdFile); + pRegion->bufLen = readBytes; + } + // TODO: ASSERT(pRegion->offset < tupleOffset); + if (pRegion->bufRegOffset + pRegion->bufLen >= tupleOffset + rowLen) { + *pFreeRow = false; + *ppRow = pRegion->buf + tupleOffset - pRegion->bufRegOffset; } else { - SSortMemPageEntry* pEntry = pMemFile->pagesHead->next; - bool freeEntryWhenError = false; - if (pEntry && !pEntry->active || pMemFile->numMemPages >= pMemFile->totalMemPages) { - if (pEntry->active) { - tSimpleHashRemove(pMemFile->mActivePages, &pEntry->pageId, sizeof(pEntry->pageId)); - } - pEntry->prev->next = pEntry->next; - pEntry->next->prev = pEntry->prev; - pEntry->active = false; - } else if (pMemFile->numMemPages < pMemFile->totalMemPages) { - pEntry = taosMemoryCalloc(1, sizeof(SSortMemPageEntry)); - pEntry->data = taosMemoryMalloc(pMemFile->pageSize); - freeEntryWhenError = true; - ++pMemFile->numMemPages; - } - { - int ret = tsortSeekFile(pMemFile->pTdFile, ((int64_t)pageId) * pMemFile->pageSize, SEEK_SET); - if (ret == 0) { - ret = fread(pEntry->data, pMemFile->pageSize, 1, pMemFile->pTdFile); - } - if (ret != 1) { - terrno = TAOS_SYSTEM_ERROR(errno); - if (freeEntryWhenError) { - taosMemoryFreeClear(pEntry->data); - taosMemoryFreeClear(pEntry); - } - return terrno; - } - SSortMemPageEntry* tail = pMemFile->pagesTail; - tail->next = pEntry; - pEntry->next = NULL; - pEntry->prev = tail; - pEntry->active = true; - pMemFile->pagesTail = pEntry; - tSimpleHashPut(pMemFile->mActivePages, &pageId, sizeof(pageId), &pEntry, POINTER_BYTES); - *ppPage = pEntry->data; - } + *ppRow = taosMemoryMalloc(rowLen); + int32_t szThisBlock = pRegion->bufLen - (tupleOffset - pRegion->bufRegOffset); + memcpy(*ppRow, pRegion->buf + tupleOffset - pRegion->bufRegOffset, + szThisBlock); + tsortSeekFile(pMemFile->pTdFile, pRegion->fileOffset + pRegion->bufRegOffset + pRegion->bufLen, SEEK_SET); + int32_t readBytes = TMIN(pMemFile->blockSize, pRegion->regionSize - (pRegion->bufRegOffset + pRegion->bufLen)); + fread(pRegion->buf, readBytes, 1, pMemFile->pTdFile); + memcpy(*ppRow + szThisBlock, pRegion->buf, rowLen - szThisBlock); + *pFreeRow = true; + pRegion->bufRegOffset += pRegion->bufLen; + pRegion->bufLen = readBytes; } + //TODO: free region memory return TSDB_CODE_SUCCESS; } -static void setExtMemFilePageUnused(SSortMemFile* pMemFile, int32_t pageId) { - SSortMemPageEntry** ppPageEntry = tSimpleHashGet(pMemFile->mActivePages, &pageId, sizeof(pageId)); - SSortMemPageEntry* pEntry = *ppPageEntry; - if (pEntry == pMemFile->pagesTail) { - pMemFile->pagesTail = pEntry->prev; - } - - pEntry->prev->next = pEntry->next; - pEntry->next->prev = pEntry->prev; - - SSortMemPageEntry* first = pMemFile->pagesHead->next; - SSortMemPageEntry* head = pMemFile->pagesHead; - head->next = pEntry; - pEntry->next = first; - first->prev = pEntry; - pEntry->prev = head; - - pEntry->active = false; - tSimpleHashRemove(pMemFile->mActivePages, &pageId, sizeof(pageId)); - return; -} - static int32_t createSortMemFile(SSortHandle* pHandle) { if (pHandle->pExtRowsMemFile != NULL) { return TSDB_CODE_SUCCESS; } SSortMemFile* pMemFile = taosMemoryCalloc(1, sizeof(SSortMemFile)); - + pMemFile->cacheSize = pHandle->extRowsMemSize; taosGetTmpfilePath(tsTempDir, "sort-ext-mem", pMemFile->memFilePath); pMemFile->pTdFile = fopen(pMemFile->memFilePath, "w+"); if (pMemFile->pTdFile == NULL) { taosMemoryFree(pMemFile); return TAOS_SYSTEM_ERROR(errno); } - pMemFile->currPageId = -1; - pMemFile->currPageOffset = -1; + pMemFile->currRegionId = -1; + pMemFile->currRegionOffset = -1; - pMemFile->pageSize = pHandle->extRowsPageSize; - pMemFile->cacheSize = pHandle->extRowsMemSize; - pMemFile->numWritePages = pMemFile->cacheSize/pMemFile->pageSize; - pMemFile->writePageBuf = taosMemoryMalloc(pMemFile->pageSize * pMemFile->numWritePages); - pMemFile->bDirty = false; + pMemFile->writeBufSize = 64 * 1024 * 1024; + pMemFile->writeBuf = taosMemoryMalloc(pMemFile->writeBufSize); + pMemFile->writeFileOffset = -1; + pMemFile->bRegionDirty = false; - pMemFile->mActivePages = tSimpleHashInit(8192, taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT)); - pMemFile->pagesHead = taosMemoryCalloc(1, sizeof(SSortMemPageEntry)); - pMemFile->pagesTail = pMemFile->pagesHead; - - pMemFile->totalMemPages = pMemFile->cacheSize / pMemFile->pageSize; - pMemFile->numMemPages = 0; + pMemFile->aFileRegions = taosArrayInit(64, sizeof(SSortMemFileRegion)); pHandle->pExtRowsMemFile = pMemFile; return TSDB_CODE_SUCCESS; @@ -1125,20 +1083,15 @@ static int32_t destroySortMemFile(SSortHandle* pHandle) { if (pHandle->pExtRowsMemFile == NULL) return TSDB_CODE_SUCCESS; SSortMemFile* pMemFile = pHandle->pExtRowsMemFile; - SSortMemPageEntry* pEntry = pMemFile->pagesHead; - while (pEntry != NULL) { - if (pEntry->data) { - taosMemoryFree(pEntry->data); - } - SSortMemPageEntry* pCurr = pEntry; - pEntry = pEntry->next; - taosMemoryFree(pCurr); + for (int32_t i = 0; i < taosArrayGetSize(pMemFile->aFileRegions); ++i) { + SSortMemFileRegion* pRegion = taosArrayGet(pMemFile->aFileRegions, i); + taosMemoryFree(pRegion->buf); } - tSimpleHashCleanup(pMemFile->mActivePages); - pMemFile->mActivePages = NULL; + taosArrayDestroy(pMemFile->aFileRegions); + pMemFile->aFileRegions = NULL; - taosMemoryFree(pMemFile->writePageBuf); - pMemFile->writePageBuf = NULL; + taosMemoryFree(pMemFile->writeBuf); + pMemFile->writeBuf = NULL; fclose(pMemFile->pTdFile); pMemFile->pTdFile = NULL; @@ -1148,68 +1101,89 @@ static int32_t destroySortMemFile(SSortHandle* pHandle) { return TSDB_CODE_SUCCESS; } -static int32_t saveBlockRowToExtRowsMemFile(SSortHandle* pHandle, SSDataBlock* pBlock, int32_t rowIdx, int32_t* pPageId, int32_t* pOffset, int32_t* pLength) { +static int32_t tsortOpenRegion(SSortHandle* pHandle) { SSortMemFile* pMemFile = pHandle->pExtRowsMemFile; - if (pMemFile->currPageId == -1) { - pMemFile->currPageId = 0; - pMemFile->currPageOffset = 0; - pMemFile->startPageId = 0; + if (pMemFile->currRegionId == -1) { + SSortMemFileRegion region = {0}; + region.fileOffset = 0; + region.bufRegOffset = 0; + taosArrayPush(pMemFile->aFileRegions, ®ion); + pMemFile->currRegionId = 0; + pMemFile->currRegionOffset = 0; + pMemFile->writeFileOffset = 0; } else { - if (pMemFile->currPageOffset + pHandle->extRowBytes >= pMemFile->pageSize) { + SSortMemFileRegion regionNew = {0}; + SSortMemFileRegion* pRegion = taosArrayGet(pMemFile->aFileRegions, pMemFile->currRegionId); + regionNew.fileOffset = pRegion->fileOffset + pRegion->regionSize; + regionNew.bufRegOffset = 0; + taosArrayPush(pMemFile->aFileRegions, ®ionNew); + ++pMemFile->currRegionId; + pMemFile->currRegionOffset = 0; + pMemFile->writeFileOffset = regionNew.fileOffset; + } + return TSDB_CODE_SUCCESS; +} - ++pMemFile->currPageId; - pMemFile->currPageOffset = 0; +static int32_t tsortCloseRegion(SSortHandle* pHandle) { + SSortMemFile* pMemFile = pHandle->pExtRowsMemFile; + SSortMemFileRegion* pRegion = taosArrayGet(pMemFile->aFileRegions, pMemFile->currRegionId); + pRegion->regionSize = pMemFile->currRegionOffset; + int32_t writeBytes = pRegion->regionSize - (pMemFile->writeFileOffset - pRegion->fileOffset); + if (writeBytes > 0) { + int ret = tsortSeekFile(pMemFile->pTdFile, pMemFile->writeFileOffset, SEEK_SET); + if (ret == 0) { + ret = fwrite(pMemFile->writeBuf, writeBytes, 1, pMemFile->pTdFile); + } + if (ret != 1) { + terrno = TAOS_SYSTEM_ERROR(errno); + return terrno; + } + pMemFile->bRegionDirty = false; + } + return TSDB_CODE_SUCCESS; +} - if (pMemFile->currPageId - pMemFile->startPageId >= pMemFile->numWritePages) { - int ret = tsortSeekFile(pMemFile->pTdFile, ((int64_t)pMemFile->startPageId) * pMemFile->pageSize, SEEK_SET); - if (ret == 0) { - ret = fwrite(pMemFile->writePageBuf, pMemFile->pageSize * pMemFile->numWritePages, 1, pMemFile->pTdFile); - } - if (ret != 1) { - terrno = TAOS_SYSTEM_ERROR(errno); - return terrno; - } - pMemFile->startPageId = pMemFile->currPageId; +static int32_t tsortFinalizeRegions(SSortHandle* pHandle) { + SSortMemFile* pMemFile = pHandle->pExtRowsMemFile; + size_t numRegions = taosArrayGetSize(pMemFile->aFileRegions); + ASSERT(numRegions == (pMemFile->currRegionId + 1)); + if (numRegions == 0) return TSDB_CODE_SUCCESS; + int32_t blockReadBytes = (pMemFile->cacheSize / numRegions + 4095) & ~4095; + pMemFile->blockSize = blockReadBytes; + + for (int32_t i = 0; i < numRegions; ++i) { + SSortMemFileRegion* pRegion = taosArrayGet(pMemFile->aFileRegions, i); + pRegion->bufRegOffset = 0; + } + taosMemoryFree(pMemFile->writeBuf); + pMemFile->writeBuf = NULL; + return TSDB_CODE_SUCCESS; +} + +static int32_t saveBlockRowToExtRowsMemFile(SSortHandle* pHandle, SSDataBlock* pBlock, int32_t rowIdx, int32_t* pRegionId, int32_t* pOffset, int32_t* pLength) { + SSortMemFile* pMemFile = pHandle->pExtRowsMemFile; + SSortMemFileRegion* pRegion = taosArrayGet(pMemFile->aFileRegions, pMemFile->currRegionId); + { + if (pMemFile->currRegionOffset + pHandle->extRowBytes >= pMemFile->writeBufSize) { + int ret = tsortSeekFile(pMemFile->pTdFile, pMemFile->writeFileOffset, SEEK_SET); + int32_t writeBytes = pMemFile->currRegionOffset - (pMemFile->writeFileOffset - pRegion->fileOffset); + if (ret == 0) { + ret = fwrite(pMemFile->writeBuf, writeBytes, 1, pMemFile->pTdFile); } + if (ret != 1) { + terrno = TAOS_SYSTEM_ERROR(errno); + return terrno; + } + pMemFile->writeFileOffset = pRegion->fileOffset + pMemFile->currRegionOffset; } } - - *pPageId = pMemFile->currPageId; - *pOffset = pMemFile->currPageOffset; - int32_t offsetPages = (pMemFile->currPageId - pMemFile->startPageId) * pMemFile->pageSize; - int32_t blockLen = blockRowToBuf(pBlock, rowIdx, - pMemFile->writePageBuf + offsetPages + pMemFile->currPageOffset); + *pRegionId = pMemFile->currRegionId; + *pOffset = pMemFile->currRegionOffset; + int32_t writeBufOffset = pMemFile->currRegionOffset - (pMemFile->writeFileOffset - pRegion->fileOffset); + int32_t blockLen = blockRowToBuf(pBlock, rowIdx, pMemFile->writeBuf + writeBufOffset); *pLength = blockLen; - pMemFile->currPageOffset += blockLen; - pMemFile->bDirty = true; - return TSDB_CODE_SUCCESS; -} - -static int32_t saveDirtyPagesToExtRowsMemFile(SSortHandle* pHandle) { - SSortMemFile* pMemFile = pHandle->pExtRowsMemFile; - if (!pMemFile->bDirty) { - return TSDB_CODE_SUCCESS; - } - int ret = tsortSeekFile(pMemFile->pTdFile, ((int64_t)pMemFile->startPageId) * pMemFile->pageSize, SEEK_SET); - int32_t numWriteBytes = pMemFile->pageSize * (pMemFile->currPageId - pMemFile->startPageId + 1); - if (ret == 0) { - ret = fwrite(pMemFile->writePageBuf, numWriteBytes, 1, pMemFile->pTdFile); - } - if (ret != 1) { - terrno = TAOS_SYSTEM_ERROR(errno); - return terrno; - } - pMemFile->bDirty = false; - return TSDB_CODE_SUCCESS; -} - -static int32_t freeExtRowMemFileWriteBuf(SSortHandle* pHandle) { - SSortMemFile* pMemFile = pHandle->pExtRowsMemFile; - - if (pMemFile == NULL) return TSDB_CODE_SUCCESS; - - taosMemoryFree(pMemFile->writePageBuf); - pMemFile->writePageBuf = NULL; + pMemFile->currRegionOffset += blockLen; + pMemFile->bRegionDirty = true; return TSDB_CODE_SUCCESS; } @@ -1225,12 +1199,15 @@ static void appendToRowIndexDataBlock(SSortHandle* pHandle, SSDataBlock* pSource char* pData = colDataGetData(pSrcTsCol, *rowIndex); colDataSetVal(pTsCol, pBlock->info.rows, pData, false); - SColumnInfoData* pPageIdCol = taosArrayGet(pBlock->pDataBlock, 1); - colDataSetInt32(pPageIdCol, pBlock->info.rows, &pageId); + SColumnInfoData* pRegionIdCol = taosArrayGet(pBlock->pDataBlock, 1); + colDataSetInt32(pRegionIdCol, pBlock->info.rows, &pageId); SColumnInfoData* pOffsetCol = taosArrayGet(pBlock->pDataBlock, 2); colDataSetInt32(pOffsetCol, pBlock->info.rows, &offset); + SColumnInfoData* pLengthCol = taosArrayGet(pBlock->pDataBlock, 3); + colDataSetInt32(pLengthCol, pBlock->info.rows, &length); + pBlock->info.rows += 1; *rowIndex += 1; } @@ -1240,10 +1217,12 @@ static void initRowIdSort(SSortHandle* pHandle) { SSDataBlock* pSortInput = createDataBlock(); SColumnInfoData tsCol = createColumnInfoData(TSDB_DATA_TYPE_TIMESTAMP, 8, 1); blockDataAppendColInfo(pSortInput, &tsCol); - SColumnInfoData pageIdCol = createColumnInfoData(TSDB_DATA_TYPE_INT, 4, 2); - blockDataAppendColInfo(pSortInput, &pageIdCol); + SColumnInfoData regionIdCol = createColumnInfoData(TSDB_DATA_TYPE_INT, 4, 2); + blockDataAppendColInfo(pSortInput, ®ionIdCol); SColumnInfoData offsetCol = createColumnInfoData(TSDB_DATA_TYPE_INT, 4, 3); blockDataAppendColInfo(pSortInput, &offsetCol); + SColumnInfoData lengthCol = createColumnInfoData(TSDB_DATA_TYPE_INT, 4, 4); + blockDataAppendColInfo(pSortInput, &lengthCol); blockDataDestroy(pHandle->pDataBlock); pHandle->pDataBlock = pSortInput; @@ -1267,9 +1246,8 @@ static void initRowIdSort(SSortHandle* pHandle) { return; } -int32_t tsortSetSortByRowId(SSortHandle* pHandle, int32_t extRowsPageSize, int32_t extRowsMemSize) { +int32_t tsortSetSortByRowId(SSortHandle* pHandle, int32_t extRowsMemSize) { pHandle->extRowBytes = blockDataGetRowSize(pHandle->pDataBlock) + taosArrayGetSize(pHandle->pDataBlock->pDataBlock) + sizeof(int32_t); - pHandle->extRowsPageSize = extRowsPageSize; pHandle->extRowsMemSize = extRowsMemSize; SBlockOrderInfo* pOrder = taosArrayGet(pHandle->pSortInfo, 0); pHandle->extRowsOrderInfo = *pOrder; @@ -1552,7 +1530,6 @@ static int32_t createBlocksMergeSortInitialSources(SSortHandle* pHandle) { while (1) { SSDataBlock* pBlk = pHandle->fetchfp(pSrc->param); - int64_t p = taosGetTimestampUs(); bool bExtractedBlock = false; bool bSkipBlock = false; if (pBlk != NULL && pHandle->mergeLimit > 0) { @@ -1594,6 +1571,9 @@ static int32_t createBlocksMergeSortInitialSources(SSortHandle* pHandle) { tSimpleHashClear(mUidBlk); int64_t p = taosGetTimestampUs(); + if (pHandle->bSortByRowId) { + tsortOpenRegion(pHandle); + } code = sortBlocksToExtSource(pHandle, aBlkSort, aExtSrc); if (code != TSDB_CODE_SUCCESS) { @@ -1603,7 +1583,9 @@ static int32_t createBlocksMergeSortInitialSources(SSortHandle* pHandle) { taosArrayClear(aBlkSort); break; } - + if (pHandle->bSortByRowId) { + tsortCloseRegion(pHandle); + } int64_t el = taosGetTimestampUs() - p; pHandle->sortElapsed += el; @@ -1641,8 +1623,7 @@ static int32_t createBlocksMergeSortInitialSources(SSortHandle* pHandle) { taosArrayDestroy(aExtSrc); tSimpleHashCleanup(mTableNumRows); if (pHandle->bSortByRowId) { - saveDirtyPagesToExtRowsMemFile(pHandle); - freeExtRowMemFileWriteBuf(pHandle); + tsortFinalizeRegions(pHandle); } pHandle->type = SORT_SINGLESOURCE_SORT; return code; From 4f8ff5b1a679f58eee318e3bed65ee6057ffe3d0 Mon Sep 17 00:00:00 2001 From: slzhou Date: Wed, 6 Mar 2024 09:44:14 +0800 Subject: [PATCH 52/58] feat: remove seek when write to file and use a small write buf --- source/libs/executor/src/tsort.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/source/libs/executor/src/tsort.c b/source/libs/executor/src/tsort.c index 38ab506918..822c997cf0 100644 --- a/source/libs/executor/src/tsort.c +++ b/source/libs/executor/src/tsort.c @@ -1068,7 +1068,7 @@ static int32_t createSortMemFile(SSortHandle* pHandle) { pMemFile->currRegionId = -1; pMemFile->currRegionOffset = -1; - pMemFile->writeBufSize = 64 * 1024 * 1024; + pMemFile->writeBufSize = 4 * 1024 * 1024; pMemFile->writeBuf = taosMemoryMalloc(pMemFile->writeBufSize); pMemFile->writeFileOffset = -1; pMemFile->bRegionDirty = false; @@ -1130,10 +1130,7 @@ static int32_t tsortCloseRegion(SSortHandle* pHandle) { pRegion->regionSize = pMemFile->currRegionOffset; int32_t writeBytes = pRegion->regionSize - (pMemFile->writeFileOffset - pRegion->fileOffset); if (writeBytes > 0) { - int ret = tsortSeekFile(pMemFile->pTdFile, pMemFile->writeFileOffset, SEEK_SET); - if (ret == 0) { - ret = fwrite(pMemFile->writeBuf, writeBytes, 1, pMemFile->pTdFile); - } + int ret = fwrite(pMemFile->writeBuf, writeBytes, 1, pMemFile->pTdFile); if (ret != 1) { terrno = TAOS_SYSTEM_ERROR(errno); return terrno; @@ -1165,11 +1162,8 @@ static int32_t saveBlockRowToExtRowsMemFile(SSortHandle* pHandle, SSDataBlock* p SSortMemFileRegion* pRegion = taosArrayGet(pMemFile->aFileRegions, pMemFile->currRegionId); { if (pMemFile->currRegionOffset + pHandle->extRowBytes >= pMemFile->writeBufSize) { - int ret = tsortSeekFile(pMemFile->pTdFile, pMemFile->writeFileOffset, SEEK_SET); int32_t writeBytes = pMemFile->currRegionOffset - (pMemFile->writeFileOffset - pRegion->fileOffset); - if (ret == 0) { - ret = fwrite(pMemFile->writeBuf, writeBytes, 1, pMemFile->pTdFile); - } + int ret = fwrite(pMemFile->writeBuf, writeBytes, 1, pMemFile->pTdFile); if (ret != 1) { terrno = TAOS_SYSTEM_ERROR(errno); return terrno; From ffc8c8d1485ba3de170e22b82375767955e21f36 Mon Sep 17 00:00:00 2001 From: slzhou Date: Thu, 7 Mar 2024 09:52:56 +0800 Subject: [PATCH 53/58] feat: use tdfile api --- source/libs/executor/src/tsort.c | 58 ++++++++++++++++++++------------ 1 file changed, 36 insertions(+), 22 deletions(-) diff --git a/source/libs/executor/src/tsort.c b/source/libs/executor/src/tsort.c index 822c997cf0..e8eb511ebe 100644 --- a/source/libs/executor/src/tsort.c +++ b/source/libs/executor/src/tsort.c @@ -67,8 +67,7 @@ typedef struct SSortMemFile { int32_t cacheSize; int32_t blockSize; - FILE* pTdFile; - // TdFilePtr pTdFile; + TdFilePtr pTdFile; char memFilePath[PATH_MAX]; } SSortMemFile; @@ -1027,9 +1026,17 @@ static int32_t getRowBufFromExtMemFile(SSortHandle* pHandle, int32_t regionId, i if (pRegion->buf == NULL) { pRegion->bufRegOffset = 0; pRegion->buf = taosMemoryMalloc(pMemFile->blockSize); - tsortSeekFile(pMemFile->pTdFile, pRegion->fileOffset, SEEK_SET); + if (pRegion->buf == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; + } + int64_t ret = taosLSeekFile(pMemFile->pTdFile, pRegion->fileOffset, SEEK_SET); int32_t readBytes = TMIN(pMemFile->blockSize, pRegion->regionSize); - fread(pRegion->buf, readBytes, 1, pMemFile->pTdFile); + if (ret >= 0) { + ret = taosReadFile(pMemFile->pTdFile, pRegion->buf, readBytes); + } + if (ret != readBytes) { + return TAOS_SYSTEM_ERROR(errno); + } pRegion->bufLen = readBytes; } // TODO: ASSERT(pRegion->offset < tupleOffset); @@ -1037,17 +1044,25 @@ static int32_t getRowBufFromExtMemFile(SSortHandle* pHandle, int32_t regionId, i *pFreeRow = false; *ppRow = pRegion->buf + tupleOffset - pRegion->bufRegOffset; } else { - *ppRow = taosMemoryMalloc(rowLen); - int32_t szThisBlock = pRegion->bufLen - (tupleOffset - pRegion->bufRegOffset); - memcpy(*ppRow, pRegion->buf + tupleOffset - pRegion->bufRegOffset, - szThisBlock); - tsortSeekFile(pMemFile->pTdFile, pRegion->fileOffset + pRegion->bufRegOffset + pRegion->bufLen, SEEK_SET); - int32_t readBytes = TMIN(pMemFile->blockSize, pRegion->regionSize - (pRegion->bufRegOffset + pRegion->bufLen)); - fread(pRegion->buf, readBytes, 1, pMemFile->pTdFile); - memcpy(*ppRow + szThisBlock, pRegion->buf, rowLen - szThisBlock); - *pFreeRow = true; - pRegion->bufRegOffset += pRegion->bufLen; - pRegion->bufLen = readBytes; + *ppRow = taosMemoryMalloc(rowLen); + if (*ppRow == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; + } + int32_t szThisBlock = pRegion->bufLen - (tupleOffset - pRegion->bufRegOffset); + memcpy(*ppRow, pRegion->buf + tupleOffset - pRegion->bufRegOffset, + szThisBlock); + int64_t ret = taosLSeekFile(pMemFile->pTdFile, pRegion->fileOffset + pRegion->bufRegOffset + pRegion->bufLen, SEEK_SET); + int32_t readBytes = TMIN(pMemFile->blockSize, pRegion->regionSize - (pRegion->bufRegOffset + pRegion->bufLen)); + if (ret >= 0) { + ret = taosReadFile(pMemFile->pTdFile, pRegion->buf, readBytes); + } + if (ret != readBytes) { + return TAOS_SYSTEM_ERROR(errno); + } + memcpy(*ppRow + szThisBlock, pRegion->buf, rowLen - szThisBlock); + *pFreeRow = true; + pRegion->bufRegOffset += pRegion->bufLen; + pRegion->bufLen = readBytes; } //TODO: free region memory return TSDB_CODE_SUCCESS; @@ -1060,7 +1075,7 @@ static int32_t createSortMemFile(SSortHandle* pHandle) { SSortMemFile* pMemFile = taosMemoryCalloc(1, sizeof(SSortMemFile)); pMemFile->cacheSize = pHandle->extRowsMemSize; taosGetTmpfilePath(tsTempDir, "sort-ext-mem", pMemFile->memFilePath); - pMemFile->pTdFile = fopen(pMemFile->memFilePath, "w+"); + pMemFile->pTdFile = taosOpenFile(pMemFile->memFilePath, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_READ | TD_FILE_TRUNC);; if (pMemFile->pTdFile == NULL) { taosMemoryFree(pMemFile); return TAOS_SYSTEM_ERROR(errno); @@ -1093,8 +1108,7 @@ static int32_t destroySortMemFile(SSortHandle* pHandle) { taosMemoryFree(pMemFile->writeBuf); pMemFile->writeBuf = NULL; - fclose(pMemFile->pTdFile); - pMemFile->pTdFile = NULL; + taosCloseFile(&pMemFile->pTdFile); taosRemoveFile(pMemFile->memFilePath); taosMemoryFree(pMemFile); pHandle->pExtRowsMemFile = NULL; @@ -1130,8 +1144,8 @@ static int32_t tsortCloseRegion(SSortHandle* pHandle) { pRegion->regionSize = pMemFile->currRegionOffset; int32_t writeBytes = pRegion->regionSize - (pMemFile->writeFileOffset - pRegion->fileOffset); if (writeBytes > 0) { - int ret = fwrite(pMemFile->writeBuf, writeBytes, 1, pMemFile->pTdFile); - if (ret != 1) { + int64_t ret = taosWriteFile(pMemFile->pTdFile, pMemFile->writeBuf, writeBytes); + if (ret != writeBytes) { terrno = TAOS_SYSTEM_ERROR(errno); return terrno; } @@ -1163,8 +1177,8 @@ static int32_t saveBlockRowToExtRowsMemFile(SSortHandle* pHandle, SSDataBlock* p { if (pMemFile->currRegionOffset + pHandle->extRowBytes >= pMemFile->writeBufSize) { int32_t writeBytes = pMemFile->currRegionOffset - (pMemFile->writeFileOffset - pRegion->fileOffset); - int ret = fwrite(pMemFile->writeBuf, writeBytes, 1, pMemFile->pTdFile); - if (ret != 1) { + int64_t ret = taosWriteFile(pMemFile->pTdFile, pMemFile->writeBuf, writeBytes); + if (ret != writeBytes) { terrno = TAOS_SYSTEM_ERROR(errno); return terrno; } From 53aa45e22577d0a41fcd100f422bc455e89f1daa Mon Sep 17 00:00:00 2001 From: slzhou Date: Thu, 7 Mar 2024 11:31:02 +0800 Subject: [PATCH 54/58] Revert "feat: use tdfile api" This reverts commit ffc8c8d1485ba3de170e22b82375767955e21f36. --- source/libs/executor/src/tsort.c | 58 ++++++++++++-------------------- 1 file changed, 22 insertions(+), 36 deletions(-) diff --git a/source/libs/executor/src/tsort.c b/source/libs/executor/src/tsort.c index e8eb511ebe..822c997cf0 100644 --- a/source/libs/executor/src/tsort.c +++ b/source/libs/executor/src/tsort.c @@ -67,7 +67,8 @@ typedef struct SSortMemFile { int32_t cacheSize; int32_t blockSize; - TdFilePtr pTdFile; + FILE* pTdFile; + // TdFilePtr pTdFile; char memFilePath[PATH_MAX]; } SSortMemFile; @@ -1026,17 +1027,9 @@ static int32_t getRowBufFromExtMemFile(SSortHandle* pHandle, int32_t regionId, i if (pRegion->buf == NULL) { pRegion->bufRegOffset = 0; pRegion->buf = taosMemoryMalloc(pMemFile->blockSize); - if (pRegion->buf == NULL) { - return TSDB_CODE_OUT_OF_MEMORY; - } - int64_t ret = taosLSeekFile(pMemFile->pTdFile, pRegion->fileOffset, SEEK_SET); + tsortSeekFile(pMemFile->pTdFile, pRegion->fileOffset, SEEK_SET); int32_t readBytes = TMIN(pMemFile->blockSize, pRegion->regionSize); - if (ret >= 0) { - ret = taosReadFile(pMemFile->pTdFile, pRegion->buf, readBytes); - } - if (ret != readBytes) { - return TAOS_SYSTEM_ERROR(errno); - } + fread(pRegion->buf, readBytes, 1, pMemFile->pTdFile); pRegion->bufLen = readBytes; } // TODO: ASSERT(pRegion->offset < tupleOffset); @@ -1044,25 +1037,17 @@ static int32_t getRowBufFromExtMemFile(SSortHandle* pHandle, int32_t regionId, i *pFreeRow = false; *ppRow = pRegion->buf + tupleOffset - pRegion->bufRegOffset; } else { - *ppRow = taosMemoryMalloc(rowLen); - if (*ppRow == NULL) { - return TSDB_CODE_OUT_OF_MEMORY; - } - int32_t szThisBlock = pRegion->bufLen - (tupleOffset - pRegion->bufRegOffset); - memcpy(*ppRow, pRegion->buf + tupleOffset - pRegion->bufRegOffset, - szThisBlock); - int64_t ret = taosLSeekFile(pMemFile->pTdFile, pRegion->fileOffset + pRegion->bufRegOffset + pRegion->bufLen, SEEK_SET); - int32_t readBytes = TMIN(pMemFile->blockSize, pRegion->regionSize - (pRegion->bufRegOffset + pRegion->bufLen)); - if (ret >= 0) { - ret = taosReadFile(pMemFile->pTdFile, pRegion->buf, readBytes); - } - if (ret != readBytes) { - return TAOS_SYSTEM_ERROR(errno); - } - memcpy(*ppRow + szThisBlock, pRegion->buf, rowLen - szThisBlock); - *pFreeRow = true; - pRegion->bufRegOffset += pRegion->bufLen; - pRegion->bufLen = readBytes; + *ppRow = taosMemoryMalloc(rowLen); + int32_t szThisBlock = pRegion->bufLen - (tupleOffset - pRegion->bufRegOffset); + memcpy(*ppRow, pRegion->buf + tupleOffset - pRegion->bufRegOffset, + szThisBlock); + tsortSeekFile(pMemFile->pTdFile, pRegion->fileOffset + pRegion->bufRegOffset + pRegion->bufLen, SEEK_SET); + int32_t readBytes = TMIN(pMemFile->blockSize, pRegion->regionSize - (pRegion->bufRegOffset + pRegion->bufLen)); + fread(pRegion->buf, readBytes, 1, pMemFile->pTdFile); + memcpy(*ppRow + szThisBlock, pRegion->buf, rowLen - szThisBlock); + *pFreeRow = true; + pRegion->bufRegOffset += pRegion->bufLen; + pRegion->bufLen = readBytes; } //TODO: free region memory return TSDB_CODE_SUCCESS; @@ -1075,7 +1060,7 @@ static int32_t createSortMemFile(SSortHandle* pHandle) { SSortMemFile* pMemFile = taosMemoryCalloc(1, sizeof(SSortMemFile)); pMemFile->cacheSize = pHandle->extRowsMemSize; taosGetTmpfilePath(tsTempDir, "sort-ext-mem", pMemFile->memFilePath); - pMemFile->pTdFile = taosOpenFile(pMemFile->memFilePath, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_READ | TD_FILE_TRUNC);; + pMemFile->pTdFile = fopen(pMemFile->memFilePath, "w+"); if (pMemFile->pTdFile == NULL) { taosMemoryFree(pMemFile); return TAOS_SYSTEM_ERROR(errno); @@ -1108,7 +1093,8 @@ static int32_t destroySortMemFile(SSortHandle* pHandle) { taosMemoryFree(pMemFile->writeBuf); pMemFile->writeBuf = NULL; - taosCloseFile(&pMemFile->pTdFile); + fclose(pMemFile->pTdFile); + pMemFile->pTdFile = NULL; taosRemoveFile(pMemFile->memFilePath); taosMemoryFree(pMemFile); pHandle->pExtRowsMemFile = NULL; @@ -1144,8 +1130,8 @@ static int32_t tsortCloseRegion(SSortHandle* pHandle) { pRegion->regionSize = pMemFile->currRegionOffset; int32_t writeBytes = pRegion->regionSize - (pMemFile->writeFileOffset - pRegion->fileOffset); if (writeBytes > 0) { - int64_t ret = taosWriteFile(pMemFile->pTdFile, pMemFile->writeBuf, writeBytes); - if (ret != writeBytes) { + int ret = fwrite(pMemFile->writeBuf, writeBytes, 1, pMemFile->pTdFile); + if (ret != 1) { terrno = TAOS_SYSTEM_ERROR(errno); return terrno; } @@ -1177,8 +1163,8 @@ static int32_t saveBlockRowToExtRowsMemFile(SSortHandle* pHandle, SSDataBlock* p { if (pMemFile->currRegionOffset + pHandle->extRowBytes >= pMemFile->writeBufSize) { int32_t writeBytes = pMemFile->currRegionOffset - (pMemFile->writeFileOffset - pRegion->fileOffset); - int64_t ret = taosWriteFile(pMemFile->pTdFile, pMemFile->writeBuf, writeBytes); - if (ret != writeBytes) { + int ret = fwrite(pMemFile->writeBuf, writeBytes, 1, pMemFile->pTdFile); + if (ret != 1) { terrno = TAOS_SYSTEM_ERROR(errno); return terrno; } From f690e5cfb991f59bf1766d4058b957faf9fa9ee0 Mon Sep 17 00:00:00 2001 From: slzhou Date: Thu, 7 Mar 2024 14:26:46 +0800 Subject: [PATCH 55/58] fix: add error processing --- source/libs/executor/src/tsort.c | 95 ++++++++++++++++++++++++-------- 1 file changed, 73 insertions(+), 22 deletions(-) diff --git a/source/libs/executor/src/tsort.c b/source/libs/executor/src/tsort.c index 822c997cf0..6d5886730f 100644 --- a/source/libs/executor/src/tsort.c +++ b/source/libs/executor/src/tsort.c @@ -241,11 +241,19 @@ void destroyTuple(void* t) { } int tsortSeekFile(FILE* file, int64_t offset, int whence) { - #ifdef WINDOWS +#ifdef WINDOWS return _fseeki64(file, offset, whence); - #else +#else return fseeko(file, offset, whence); - #endif +#endif +} + +int tsortSetAutoDelFile(char* path) { +#ifdef WINDOWS + return SetFileAttributes(path, FILE_ATTRIBUTE_TEMPORARY); +#else + return unlink(path); +#endif } /** @@ -1027,9 +1035,16 @@ static int32_t getRowBufFromExtMemFile(SSortHandle* pHandle, int32_t regionId, i if (pRegion->buf == NULL) { pRegion->bufRegOffset = 0; pRegion->buf = taosMemoryMalloc(pMemFile->blockSize); + if (pRegion->buf == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; + } tsortSeekFile(pMemFile->pTdFile, pRegion->fileOffset, SEEK_SET); int32_t readBytes = TMIN(pMemFile->blockSize, pRegion->regionSize); - fread(pRegion->buf, readBytes, 1, pMemFile->pTdFile); + int ret = fread(pRegion->buf, readBytes, 1, pMemFile->pTdFile); + if (ret != 1) { + terrno = TAOS_SYSTEM_ERROR(errno); + return terrno; + } pRegion->bufLen = readBytes; } // TODO: ASSERT(pRegion->offset < tupleOffset); @@ -1038,12 +1053,20 @@ static int32_t getRowBufFromExtMemFile(SSortHandle* pHandle, int32_t regionId, i *ppRow = pRegion->buf + tupleOffset - pRegion->bufRegOffset; } else { *ppRow = taosMemoryMalloc(rowLen); + if (*ppRow == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; + } int32_t szThisBlock = pRegion->bufLen - (tupleOffset - pRegion->bufRegOffset); memcpy(*ppRow, pRegion->buf + tupleOffset - pRegion->bufRegOffset, szThisBlock); tsortSeekFile(pMemFile->pTdFile, pRegion->fileOffset + pRegion->bufRegOffset + pRegion->bufLen, SEEK_SET); int32_t readBytes = TMIN(pMemFile->blockSize, pRegion->regionSize - (pRegion->bufRegOffset + pRegion->bufLen)); - fread(pRegion->buf, readBytes, 1, pMemFile->pTdFile); + int ret = fread(pRegion->buf, readBytes, 1, pMemFile->pTdFile); + if (ret != 1) { + taosMemoryFreeClear(*ppRow); + terrno = TAOS_SYSTEM_ERROR(errno); + return terrno; + } memcpy(*ppRow + szThisBlock, pRegion->buf, rowLen - szThisBlock); *pFreeRow = true; pRegion->bufRegOffset += pRegion->bufLen; @@ -1057,27 +1080,55 @@ static int32_t createSortMemFile(SSortHandle* pHandle) { if (pHandle->pExtRowsMemFile != NULL) { return TSDB_CODE_SUCCESS; } + int32_t code = TSDB_CODE_SUCCESS; SSortMemFile* pMemFile = taosMemoryCalloc(1, sizeof(SSortMemFile)); - pMemFile->cacheSize = pHandle->extRowsMemSize; - taosGetTmpfilePath(tsTempDir, "sort-ext-mem", pMemFile->memFilePath); - pMemFile->pTdFile = fopen(pMemFile->memFilePath, "w+"); - if (pMemFile->pTdFile == NULL) { - taosMemoryFree(pMemFile); - return TAOS_SYSTEM_ERROR(errno); + if (pMemFile == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; } - pMemFile->currRegionId = -1; - pMemFile->currRegionOffset = -1; + if (code == TSDB_CODE_SUCCESS) { + taosGetTmpfilePath(tsTempDir, "sort-ext-mem", pMemFile->memFilePath); + pMemFile->pTdFile = fopen(pMemFile->memFilePath, "w+"); + if (pMemFile->pTdFile == NULL) { + code = terrno = TAOS_SYSTEM_ERROR(errno); + } + } + if (code == TSDB_CODE_SUCCESS) { + tsortSetAutoDelFile(pMemFile->memFilePath); - pMemFile->writeBufSize = 4 * 1024 * 1024; - pMemFile->writeBuf = taosMemoryMalloc(pMemFile->writeBufSize); - pMemFile->writeFileOffset = -1; - pMemFile->bRegionDirty = false; + pMemFile->currRegionId = -1; + pMemFile->currRegionOffset = -1; - pMemFile->aFileRegions = taosArrayInit(64, sizeof(SSortMemFileRegion)); - - pHandle->pExtRowsMemFile = pMemFile; - return TSDB_CODE_SUCCESS; -} + pMemFile->writeBufSize = 4 * 1024 * 1024; + pMemFile->writeFileOffset = -1; + pMemFile->bRegionDirty = false; + + pMemFile->writeBuf = taosMemoryMalloc(pMemFile->writeBufSize); + if (pMemFile->writeBuf == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + } + } + if (code == TSDB_CODE_SUCCESS) { + pMemFile->cacheSize = pHandle->extRowsMemSize; + pMemFile->aFileRegions = taosArrayInit(64, sizeof(SSortMemFileRegion)); + if (pMemFile->aFileRegions == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + } + } + if (code == TSDB_CODE_SUCCESS) { + pHandle->pExtRowsMemFile = pMemFile; + } else { + if (pMemFile) { + if (pMemFile->aFileRegions) taosMemoryFreeClear(pMemFile->aFileRegions); + if (pMemFile->writeBuf) taosMemoryFreeClear(pMemFile->writeBuf); + if (pMemFile->pTdFile) { + fclose(pMemFile->pTdFile); + pMemFile->pTdFile = NULL; + } + taosMemoryFreeClear(pMemFile); + } + } + return code; +} static int32_t destroySortMemFile(SSortHandle* pHandle) { if (pHandle->pExtRowsMemFile == NULL) return TSDB_CODE_SUCCESS; From a42a897c85d662ae0036feb5d80ecaa59527d9a5 Mon Sep 17 00:00:00 2001 From: slzhou Date: Fri, 8 Mar 2024 10:47:32 +0800 Subject: [PATCH 56/58] enhance: assert the region offset is keep increasing --- source/libs/executor/src/tsort.c | 42 +++++++++++++++----------------- 1 file changed, 20 insertions(+), 22 deletions(-) diff --git a/source/libs/executor/src/tsort.c b/source/libs/executor/src/tsort.c index 6d5886730f..2c8dc7fb04 100644 --- a/source/libs/executor/src/tsort.c +++ b/source/libs/executor/src/tsort.c @@ -927,7 +927,7 @@ void tsortAppendTupleToBlock(SSortHandle* pHandle, SSDataBlock* pBlock, STupleHa int32_t regionId = *(int32_t*)tsortGetValue(pTupleHandle, 1); int32_t offset = *(int32_t*)tsortGetValue(pTupleHandle, 2); int32_t length = *(int32_t*)tsortGetValue(pTupleHandle, 3); - + char* buf = NULL; bool bFreeRow = false; getRowBufFromExtMemFile(pHandle, regionId, offset, length, &buf, &bFreeRow); @@ -1047,32 +1047,30 @@ static int32_t getRowBufFromExtMemFile(SSortHandle* pHandle, int32_t regionId, i } pRegion->bufLen = readBytes; } - // TODO: ASSERT(pRegion->offset < tupleOffset); + ASSERT(pRegion->bufRegOffset <= tupleOffset); if (pRegion->bufRegOffset + pRegion->bufLen >= tupleOffset + rowLen) { *pFreeRow = false; *ppRow = pRegion->buf + tupleOffset - pRegion->bufRegOffset; } else { - *ppRow = taosMemoryMalloc(rowLen); - if (*ppRow == NULL) { - return TSDB_CODE_OUT_OF_MEMORY; - } - int32_t szThisBlock = pRegion->bufLen - (tupleOffset - pRegion->bufRegOffset); - memcpy(*ppRow, pRegion->buf + tupleOffset - pRegion->bufRegOffset, - szThisBlock); - tsortSeekFile(pMemFile->pTdFile, pRegion->fileOffset + pRegion->bufRegOffset + pRegion->bufLen, SEEK_SET); - int32_t readBytes = TMIN(pMemFile->blockSize, pRegion->regionSize - (pRegion->bufRegOffset + pRegion->bufLen)); - int ret = fread(pRegion->buf, readBytes, 1, pMemFile->pTdFile); - if (ret != 1) { - taosMemoryFreeClear(*ppRow); - terrno = TAOS_SYSTEM_ERROR(errno); - return terrno; - } - memcpy(*ppRow + szThisBlock, pRegion->buf, rowLen - szThisBlock); - *pFreeRow = true; - pRegion->bufRegOffset += pRegion->bufLen; - pRegion->bufLen = readBytes; + *ppRow = taosMemoryMalloc(rowLen); + if (*ppRow == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; + } + int32_t szThisBlock = pRegion->bufLen - (tupleOffset - pRegion->bufRegOffset); + memcpy(*ppRow, pRegion->buf + tupleOffset - pRegion->bufRegOffset, szThisBlock); + tsortSeekFile(pMemFile->pTdFile, pRegion->fileOffset + pRegion->bufRegOffset + pRegion->bufLen, SEEK_SET); + int32_t readBytes = TMIN(pMemFile->blockSize, pRegion->regionSize - (pRegion->bufRegOffset + pRegion->bufLen)); + int ret = fread(pRegion->buf, readBytes, 1, pMemFile->pTdFile); + if (ret != 1) { + taosMemoryFreeClear(*ppRow); + terrno = TAOS_SYSTEM_ERROR(errno); + return terrno; + } + memcpy(*ppRow + szThisBlock, pRegion->buf, rowLen - szThisBlock); + *pFreeRow = true; + pRegion->bufRegOffset += pRegion->bufLen; + pRegion->bufLen = readBytes; } - //TODO: free region memory return TSDB_CODE_SUCCESS; } From 3db2c1edf5e8aa7250d3c7d5dd54dd10435d812f Mon Sep 17 00:00:00 2001 From: shenglian zhou Date: Fri, 8 Mar 2024 14:36:55 +0800 Subject: [PATCH 57/58] feat: add hint smalldata_ts_sort to disable row id sort --- include/common/ttokendef.h | 2 +- include/libs/nodes/plannodes.h | 2 ++ include/libs/nodes/querynodes.h | 3 ++- source/libs/executor/src/scanoperator.c | 2 +- source/libs/nodes/src/nodesCloneFuncs.c | 2 ++ source/libs/nodes/src/nodesCodeFuncs.c | 16 +++++++++++++++- source/libs/nodes/src/nodesMsgFuncs.c | 6 ++++++ source/libs/parser/src/parAstCreater.c | 11 +++++++++++ source/libs/parser/src/parTokenizer.c | 1 + source/libs/planner/inc/planInt.h | 3 ++- source/libs/planner/src/planLogicCreater.c | 3 ++- source/libs/planner/src/planPhysiCreater.c | 1 + source/libs/planner/src/planUtil.c | 14 +++++++++++++- 13 files changed, 59 insertions(+), 7 deletions(-) diff --git a/include/common/ttokendef.h b/include/common/ttokendef.h index 8f89857d33..020be9d447 100644 --- a/include/common/ttokendef.h +++ b/include/common/ttokendef.h @@ -380,7 +380,7 @@ #define TK_SORT_FOR_GROUP 608 #define TK_PARTITION_FIRST 609 #define TK_PARA_TABLES_SORT 610 - +#define TK_SMALLDATA_TS_SORT 611 #define TK_NK_NIL 65535 diff --git a/include/libs/nodes/plannodes.h b/include/libs/nodes/plannodes.h index 0bc3ce04ef..cbf38102de 100644 --- a/include/libs/nodes/plannodes.h +++ b/include/libs/nodes/plannodes.h @@ -122,6 +122,7 @@ typedef struct SScanLogicNode { bool isCountByTag; // true if selectstmt hasCountFunc & part by tag/tbname SArray* pFuncTypes; // for last, last_row bool paraTablesSort; // for table merge scan + bool smallDataTsSort; // disable row id sort for table merge scan } SScanLogicNode; typedef struct SJoinLogicNode { @@ -445,6 +446,7 @@ typedef struct STableScanPhysiNode { bool filesetDelimited; bool needCountEmptyTable; bool paraTablesSort; + bool smallDataTsSort; } STableScanPhysiNode; typedef STableScanPhysiNode STableSeqScanPhysiNode; diff --git a/include/libs/nodes/querynodes.h b/include/libs/nodes/querynodes.h index 7ceb7e0278..97ac4ff3b9 100644 --- a/include/libs/nodes/querynodes.h +++ b/include/libs/nodes/querynodes.h @@ -128,7 +128,8 @@ typedef enum EHintOption { HINT_BATCH_SCAN, HINT_SORT_FOR_GROUP, HINT_PARTITION_FIRST, - HINT_PARA_TABLES_SORT + HINT_PARA_TABLES_SORT, + HINT_SMALLDATA_TS_SORT, } EHintOption; typedef struct SHintNode { diff --git a/source/libs/executor/src/scanoperator.c b/source/libs/executor/src/scanoperator.c index 1636cd21f0..f44f8756a4 100644 --- a/source/libs/executor/src/scanoperator.c +++ b/source/libs/executor/src/scanoperator.c @@ -4349,7 +4349,7 @@ SOperatorInfo* createTableMergeScanOperatorInfo(STableScanPhysiNode* pTableScanN initResultSizeInfo(&pOperator->resultInfo, 1024); pInfo->pResBlock = createDataBlockFromDescNode(pDescNode); blockDataEnsureCapacity(pInfo->pResBlock, pOperator->resultInfo.capacity); - if (!hasLimit && blockDataGetRowSize(pInfo->pResBlock) >= 256) { + if (!hasLimit && blockDataGetRowSize(pInfo->pResBlock) >= 256 && !pTableScanNode->smallDataTsSort) { pInfo->bSortRowId = true; } else { pInfo->bSortRowId = false; diff --git a/source/libs/nodes/src/nodesCloneFuncs.c b/source/libs/nodes/src/nodesCloneFuncs.c index 3f5ffcae32..453d927378 100644 --- a/source/libs/nodes/src/nodesCloneFuncs.c +++ b/source/libs/nodes/src/nodesCloneFuncs.c @@ -457,6 +457,7 @@ static int32_t logicScanCopy(const SScanLogicNode* pSrc, SScanLogicNode* pDst) { COPY_SCALAR_FIELD(isCountByTag); CLONE_OBJECT_FIELD(pFuncTypes, functParamClone); COPY_SCALAR_FIELD(paraTablesSort); + COPY_SCALAR_FIELD(smallDataTsSort); return TSDB_CODE_SUCCESS; } @@ -690,6 +691,7 @@ static int32_t physiTableScanCopy(const STableScanPhysiNode* pSrc, STableScanPhy COPY_SCALAR_FIELD(filesetDelimited); COPY_SCALAR_FIELD(needCountEmptyTable); COPY_SCALAR_FIELD(paraTablesSort); + COPY_SCALAR_FIELD(smallDataTsSort); return TSDB_CODE_SUCCESS; } diff --git a/source/libs/nodes/src/nodesCodeFuncs.c b/source/libs/nodes/src/nodesCodeFuncs.c index 689886c366..019ef6f18b 100644 --- a/source/libs/nodes/src/nodesCodeFuncs.c +++ b/source/libs/nodes/src/nodesCodeFuncs.c @@ -699,6 +699,7 @@ static const char* jkScanLogicPlanGroupTags = "GroupTags"; static const char* jkScanLogicPlanOnlyMetaCtbIdx = "OnlyMetaCtbIdx"; static const char* jkScanLogicPlanFilesetDelimited = "FilesetDelimited"; static const char* jkScanLogicPlanParaTablesSort = "ParaTablesSort"; +static const char* jkScanLogicPlanSmallDataTsSort = "SmallDataTsSort"; static int32_t logicScanNodeToJson(const void* pObj, SJson* pJson) { const SScanLogicNode* pNode = (const SScanLogicNode*)pObj; @@ -749,6 +750,9 @@ static int32_t logicScanNodeToJson(const void* pObj, SJson* pJson) { if (TSDB_CODE_SUCCESS == code) { code = tjsonAddBoolToObject(pJson, jkScanLogicPlanParaTablesSort, pNode->paraTablesSort); } + if (TSDB_CODE_SUCCESS == code) { + code = tjsonAddBoolToObject(pJson, jkScanLogicPlanSmallDataTsSort, pNode->paraTablesSort); + } return code; } @@ -800,7 +804,10 @@ static int32_t jsonToLogicScanNode(const SJson* pJson, void* pObj) { code = tjsonGetBoolValue(pJson, jkScanLogicPlanFilesetDelimited, &pNode->filesetDelimited); } if (TSDB_CODE_SUCCESS == code) { - code = tjsonGetBoolValue(pJson, jkScanLogicPlanParaTablesSort, &pNode->paraTablesSort); + code = tjsonGetBoolValue(pJson, jkScanLogicPlanParaTablesSort, &pNode->smallDataTsSort); + } + if (TSDB_CODE_SUCCESS == code) { + code = tjsonGetBoolValue(pJson, jkScanLogicPlanSmallDataTsSort, &pNode->smallDataTsSort); } return code; } @@ -1896,6 +1903,7 @@ static const char* jkTableScanPhysiPlanIgnoreUpdate = "IgnoreUpdate"; static const char* jkTableScanPhysiPlanFilesetDelimited = "FilesetDelimited"; static const char* jkTableScanPhysiPlanNeedCountEmptyTable = "NeedCountEmptyTable"; static const char* jkTableScanPhysiPlanParaTablesSort = "ParaTablesSort"; +static const char* jkTableScanPhysiPlanSmallDataTsSort = "SmallDataTsSort"; static int32_t physiTableScanNodeToJson(const void* pObj, SJson* pJson) { const STableScanPhysiNode* pNode = (const STableScanPhysiNode*)pObj; @@ -1973,6 +1981,9 @@ static int32_t physiTableScanNodeToJson(const void* pObj, SJson* pJson) { if (TSDB_CODE_SUCCESS == code) { code = tjsonAddBoolToObject(pJson, jkTableScanPhysiPlanParaTablesSort, pNode->paraTablesSort); } + if (TSDB_CODE_SUCCESS == code) { + code = tjsonAddBoolToObject(pJson, jkTableScanPhysiPlanSmallDataTsSort, pNode->smallDataTsSort); + } return code; } @@ -2052,6 +2063,9 @@ static int32_t jsonToPhysiTableScanNode(const SJson* pJson, void* pObj) { if (TSDB_CODE_SUCCESS == code) { code = tjsonGetBoolValue(pJson, jkTableScanPhysiPlanParaTablesSort, &pNode->paraTablesSort); } + if (TSDB_CODE_SUCCESS == code) { + code = tjsonGetBoolValue(pJson, jkTableScanPhysiPlanSmallDataTsSort, &pNode->smallDataTsSort); + } return code; } diff --git a/source/libs/nodes/src/nodesMsgFuncs.c b/source/libs/nodes/src/nodesMsgFuncs.c index 357abc2858..95a5c2f51e 100644 --- a/source/libs/nodes/src/nodesMsgFuncs.c +++ b/source/libs/nodes/src/nodesMsgFuncs.c @@ -2188,6 +2188,9 @@ static int32_t physiTableScanNodeInlineToMsg(const void* pObj, STlvEncoder* pEnc if (TSDB_CODE_SUCCESS == code) { code = tlvEncodeValueBool(pEncoder, pNode->paraTablesSort); } + if (TSDB_CODE_SUCCESS == code) { + code = tlvEncodeValueBool(pEncoder, pNode->smallDataTsSort); + } return code; } @@ -2275,6 +2278,9 @@ static int32_t msgToPhysiTableScanNodeInline(STlvDecoder* pDecoder, void* pObj) if (TSDB_CODE_SUCCESS == code) { code = tlvDecodeValueBool(pDecoder, &pNode->paraTablesSort); } + if (TSDB_CODE_SUCCESS == code) { + code = tlvDecodeValueBool(pDecoder, &pNode->smallDataTsSort); + } return code; } diff --git a/source/libs/parser/src/parAstCreater.c b/source/libs/parser/src/parAstCreater.c index 1d6c5e800e..74a7934985 100644 --- a/source/libs/parser/src/parAstCreater.c +++ b/source/libs/parser/src/parAstCreater.c @@ -404,6 +404,9 @@ bool addHintNodeToList(SAstCreateContext* pCxt, SNodeList** ppHintList, EHintOpt case HINT_PARA_TABLES_SORT: if (paramNum > 0 || hasHint(*ppHintList, HINT_PARA_TABLES_SORT)) return true; break; + case HINT_SMALLDATA_TS_SORT: + if (paramNum > 0 || hasHint(*ppHintList, HINT_SMALLDATA_TS_SORT)) return true; + break; default: return true; } @@ -490,6 +493,14 @@ SNodeList* createHintNodeList(SAstCreateContext* pCxt, const SToken* pLiteral) { } opt = HINT_PARA_TABLES_SORT; break; + case TK_SMALLDATA_TS_SORT: + lastComma = false; + if (0 != opt || inParamList) { + quit = true; + break; + } + opt = HINT_SMALLDATA_TS_SORT; + break; case TK_NK_LP: lastComma = false; if (0 == opt || inParamList) { diff --git a/source/libs/parser/src/parTokenizer.c b/source/libs/parser/src/parTokenizer.c index f1013d6157..0d943dd9db 100644 --- a/source/libs/parser/src/parTokenizer.c +++ b/source/libs/parser/src/parTokenizer.c @@ -213,6 +213,7 @@ static SKeyword keywordTable[] = { {"SLIDING", TK_SLIDING}, {"SLIMIT", TK_SLIMIT}, {"SMA", TK_SMA}, + {"SMALLDATA_TS_SORT", TK_SMALLDATA_TS_SORT}, {"SMALLINT", TK_SMALLINT}, {"SNODE", TK_SNODE}, {"SNODES", TK_SNODES}, diff --git a/source/libs/planner/inc/planInt.h b/source/libs/planner/inc/planInt.h index fcccdcf23e..3f1cb0fbd3 100644 --- a/source/libs/planner/inc/planInt.h +++ b/source/libs/planner/inc/planInt.h @@ -47,7 +47,8 @@ int32_t validateQueryPlan(SPlanContext* pCxt, SQueryPlan* pPlan); bool getBatchScanOptionFromHint(SNodeList* pList); bool getSortForGroupOptHint(SNodeList* pList); -bool getparaTablesSortOptHint(SNodeList* pList); +bool getParaTablesSortOptHint(SNodeList* pList); +bool getSmallDataTsSortOptHint(SNodeList* pList); bool getOptHint(SNodeList* pList, EHintOption hint); SLogicNode* getLogicNodeRootNode(SLogicNode* pCurr); int32_t collectTableAliasFromNodes(SNode* pNode, SSHashObj** ppRes); diff --git a/source/libs/planner/src/planLogicCreater.c b/source/libs/planner/src/planLogicCreater.c index c5e84898ed..a796c9cdb2 100644 --- a/source/libs/planner/src/planLogicCreater.c +++ b/source/libs/planner/src/planLogicCreater.c @@ -501,7 +501,8 @@ static int32_t createScanLogicNode(SLogicPlanContext* pCxt, SSelectStmt* pSelect } else { nodesDestroyNode((SNode*)pScan); } - pScan->paraTablesSort = getparaTablesSortOptHint(pSelect->pHint); + pScan->paraTablesSort = getParaTablesSortOptHint(pSelect->pHint); + pScan->smallDataTsSort = getSmallDataTsSortOptHint(pSelect->pHint); pCxt->hasScan = true; return code; diff --git a/source/libs/planner/src/planPhysiCreater.c b/source/libs/planner/src/planPhysiCreater.c index baef39144c..5895f57acd 100644 --- a/source/libs/planner/src/planPhysiCreater.c +++ b/source/libs/planner/src/planPhysiCreater.c @@ -652,6 +652,7 @@ static int32_t createTableScanPhysiNode(SPhysiPlanContext* pCxt, SSubplan* pSubp pTableScan->filesetDelimited = pScanLogicNode->filesetDelimited; pTableScan->needCountEmptyTable = pScanLogicNode->isCountByTag; pTableScan->paraTablesSort = pScanLogicNode->paraTablesSort; + pTableScan->smallDataTsSort = pScanLogicNode->smallDataTsSort; int32_t code = createScanPhysiNodeFinalize(pCxt, pSubplan, pScanLogicNode, (SScanPhysiNode*)pTableScan, pPhyNode); if (TSDB_CODE_SUCCESS == code) { diff --git a/source/libs/planner/src/planUtil.c b/source/libs/planner/src/planUtil.c index f31bf23bc9..a6109cdacb 100644 --- a/source/libs/planner/src/planUtil.c +++ b/source/libs/planner/src/planUtil.c @@ -466,7 +466,7 @@ bool getOptHint(SNodeList* pList, EHintOption hint) { return false; } -bool getparaTablesSortOptHint(SNodeList* pList) { +bool getParaTablesSortOptHint(SNodeList* pList) { if (!pList) return false; SNode* pNode; FOREACH(pNode, pList) { @@ -478,6 +478,18 @@ bool getparaTablesSortOptHint(SNodeList* pList) { return false; } +bool getSmallDataTsSortOptHint(SNodeList* pList) { + if (!pList) return false; + SNode* pNode; + FOREACH(pNode, pList) { + SHintNode* pHint = (SHintNode*)pNode; + if (pHint->option == HINT_SMALLDATA_TS_SORT) { + return true; + } + } + return false; +} + int32_t collectTableAliasFromNodes(SNode* pNode, SSHashObj** ppRes) { int32_t code = TSDB_CODE_SUCCESS; SLogicNode* pCurr = (SLogicNode*)pNode; From 5c691685ba83e2b3ff572a17d37b64ef94da90cc Mon Sep 17 00:00:00 2001 From: slzhou Date: Tue, 12 Mar 2024 16:53:56 +0800 Subject: [PATCH 58/58] fix: review comments --- docs/en/12-taos-sql/06-select.md | 4 +- docs/zh/12-taos-sql/06-select.md | 5 ++- include/os/osFile.h | 7 ++++ source/libs/executor/src/scanoperator.c | 7 +--- source/libs/executor/src/tsort.c | 49 +++++-------------------- source/os/src/osFile.c | 32 ++++++++++++++++ 6 files changed, 58 insertions(+), 46 deletions(-) diff --git a/docs/en/12-taos-sql/06-select.md b/docs/en/12-taos-sql/06-select.md index a2e6bca46c..074fbfbc8d 100755 --- a/docs/en/12-taos-sql/06-select.md +++ b/docs/en/12-taos-sql/06-select.md @@ -24,7 +24,7 @@ SELECT [hints] [DISTINCT] [TAGS] select_list hints: /*+ [hint([hint_param_list])] [hint([hint_param_list])] */ hint: - BATCH_SCAN | NO_BATCH_SCAN | SORT_FOR_GROUP | PARA_TABLES_SORT + BATCH_SCAN | NO_BATCH_SCAN | SORT_FOR_GROUP | PARA_TABLES_SORT | PARTITION_FIRST | SMALLDATA_TS_SORT select_list: select_expr [, select_expr] ... @@ -94,6 +94,7 @@ The list of currently supported Hints is as follows: | SORT_FOR_GROUP| None | Use sort for partition, conflict with PARTITION_FIRST | With normal column in partition by list | | PARTITION_FIRST| None | Use Partition before aggregate, conflict with SORT_FOR_GROUP | With normal column in partition by list | | PARA_TABLES_SORT| None | When sorting the supertable rows by timestamp, No temporary disk space is used. When there are numerous tables, each with long rows, the corresponding algorithm associated with this prompt may consume a substantial amount of memory, potentially leading to an Out Of Memory (OOM) situation. | Sorting the supertable rows by timestamp | +| SMALLDATA_TS_SORT| None | When sorting the supertable rows by timestamp, if the length of query columns >= 256, and there are relatively few rows, this hint can improve performance. | Sorting the supertable rows by timestamp | For example: @@ -102,6 +103,7 @@ SELECT /*+ BATCH_SCAN() */ a.ts FROM stable1 a, stable2 b where a.tag0 = b.tag0 SELECT /*+ SORT_FOR_GROUP() */ count(*), c1 FROM stable1 PARTITION BY c1; SELECT /*+ PARTITION_FIRST() */ count(*), c1 FROM stable1 PARTITION BY c1; SELECT /*+ PARA_TABLES_SORT() */ * from stable1 order by ts; +SELECT /*+ SMALLDATA_TS_SORT() */ * from stable1 order by ts; ``` ## Lists diff --git a/docs/zh/12-taos-sql/06-select.md b/docs/zh/12-taos-sql/06-select.md index eec947ea23..573e854864 100755 --- a/docs/zh/12-taos-sql/06-select.md +++ b/docs/zh/12-taos-sql/06-select.md @@ -24,7 +24,7 @@ SELECT [hints] [DISTINCT] [TAGS] select_list hints: /*+ [hint([hint_param_list])] [hint([hint_param_list])] */ hint: - BATCH_SCAN | NO_BATCH_SCAN | SORT_FOR_GROUP | PARA_TABLES_SORT + BATCH_SCAN | NO_BATCH_SCAN | SORT_FOR_GROUP | PARTITION_FIRST | PARA_TABLES_SORT | SMALLDATA_TS_SORT select_list: select_expr [, select_expr] ... @@ -94,6 +94,8 @@ Hints 是用户控制单个语句查询优化的一种手段,当 Hint 不适 | SORT_FOR_GROUP| 无 | 采用sort方式进行分组, 与PARTITION_FIRST冲突 | partition by 列表有普通列时 | | PARTITION_FIRST| 无 | 在聚合之前使用PARTITION计算分组, 与SORT_FOR_GROUP冲突 | partition by 列表有普通列时 | | PARA_TABLES_SORT| 无 | 超级表的数据按时间戳排序时, 不使用临时磁盘空间, 只使用内存。当子表数量多, 行长比较大时候, 会使用大量内存, 可能发生OOM | 超级表的数据按时间戳排序时 | +| SMALLDATA_TS_SORT| 无 | 超级表的数据按时间戳排序时, 查询列长度大于等于256, 但是行数不多, 使用这个提示, 可以提高性能 | 超级表的数据按时间戳排序时 | + 举例: ```sql @@ -101,6 +103,7 @@ SELECT /*+ BATCH_SCAN() */ a.ts FROM stable1 a, stable2 b where a.tag0 = b.tag0 SELECT /*+ SORT_FOR_GROUP() */ count(*), c1 FROM stable1 PARTITION BY c1; SELECT /*+ PARTITION_FIRST() */ count(*), c1 FROM stable1 PARTITION BY c1; SELECT /*+ PARA_TABLES_SORT() */ * from stable1 order by ts; +SELECT /*+ SMALLDATA_TS_SORT() */ * from stable1 order by ts; ``` ## 列表 diff --git a/include/os/osFile.h b/include/os/osFile.h index eb0862a719..9c9027e931 100644 --- a/include/os/osFile.h +++ b/include/os/osFile.h @@ -119,6 +119,13 @@ int32_t taosSetFileHandlesLimit(); int32_t taosLinkFile(char *src, char *dst); +FILE* taosOpenCFile(const char* filename, const char* mode); +int taosSeekCFile(FILE* file, int64_t offset, int whence); +size_t taosReadFromCFile(void *buffer, size_t size, size_t count, FILE *stream ); +size_t taosWriteToCFile(const void* ptr, size_t size, size_t nitems, FILE* stream); +int taosCloseCFile(FILE *); +int taosSetAutoDelFile(char* path); + bool lastErrorIsFileNotExist(); #ifdef __cplusplus diff --git a/source/libs/executor/src/scanoperator.c b/source/libs/executor/src/scanoperator.c index f44f8756a4..4a910c9f79 100644 --- a/source/libs/executor/src/scanoperator.c +++ b/source/libs/executor/src/scanoperator.c @@ -4011,17 +4011,14 @@ int32_t startDurationForGroupTableMergeScan(SOperatorInfo* pOperator) { pInfo->sortBufSize = 2048 * pInfo->bufPageSize; int32_t numOfBufPage = pInfo->sortBufSize / pInfo->bufPageSize; - if (pInfo->bSortRowId && numOfTable != 1) { - pInfo->pSortHandle = tsortCreateSortHandle(pInfo->pSortInfo, SORT_BLOCK_TS_MERGE, pInfo->bufPageSize, numOfBufPage, + pInfo->pSortHandle = tsortCreateSortHandle(pInfo->pSortInfo, SORT_BLOCK_TS_MERGE, pInfo->bufPageSize, numOfBufPage, pInfo->pSortInputBlock, pTaskInfo->id.str, 0, 0, 0); + if (pInfo->bSortRowId && numOfTable != 1) { int32_t memSize = 512 * 1024 * 1024; code = tsortSetSortByRowId(pInfo->pSortHandle, memSize); if (code != TSDB_CODE_SUCCESS) { return code; } - } else { - pInfo->pSortHandle = tsortCreateSortHandle(pInfo->pSortInfo, SORT_BLOCK_TS_MERGE, pInfo->bufPageSize, numOfBufPage, - pInfo->pSortInputBlock, pTaskInfo->id.str, 0, 0, 0); } tsortSetMergeLimit(pInfo->pSortHandle, pInfo->mergeLimit); tsortSetMergeLimitReachedFp(pInfo->pSortHandle, tableMergeScanDoSkipTable, pInfo); diff --git a/source/libs/executor/src/tsort.c b/source/libs/executor/src/tsort.c index 2c8dc7fb04..15c42334e3 100644 --- a/source/libs/executor/src/tsort.c +++ b/source/libs/executor/src/tsort.c @@ -13,8 +13,6 @@ * along with this program. If not, see . */ -#define ALLOW_FORBID_FUNC - #include "query.h" #include "tcommon.h" @@ -34,17 +32,6 @@ struct STupleHandle { int32_t rowIndex; }; -typedef struct SSortMemPageEntry { - int32_t pageId; - bool active; - - void* data; - - struct SSortMemPageEntry* next; - struct SSortMemPageEntry* prev; - -} SSortMemPageEntry; - typedef struct SSortMemFileRegion { int64_t fileOffset; int32_t regionSize; @@ -68,7 +55,6 @@ typedef struct SSortMemFile { int32_t blockSize; FILE* pTdFile; - // TdFilePtr pTdFile; char memFilePath[PATH_MAX]; } SSortMemFile; @@ -240,21 +226,6 @@ void destroyTuple(void* t) { } } -int tsortSeekFile(FILE* file, int64_t offset, int whence) { -#ifdef WINDOWS - return _fseeki64(file, offset, whence); -#else - return fseeko(file, offset, whence); -#endif -} - -int tsortSetAutoDelFile(char* path) { -#ifdef WINDOWS - return SetFileAttributes(path, FILE_ATTRIBUTE_TEMPORARY); -#else - return unlink(path); -#endif -} /** * @@ -1038,9 +1009,9 @@ static int32_t getRowBufFromExtMemFile(SSortHandle* pHandle, int32_t regionId, i if (pRegion->buf == NULL) { return TSDB_CODE_OUT_OF_MEMORY; } - tsortSeekFile(pMemFile->pTdFile, pRegion->fileOffset, SEEK_SET); + taosSeekCFile(pMemFile->pTdFile, pRegion->fileOffset, SEEK_SET); int32_t readBytes = TMIN(pMemFile->blockSize, pRegion->regionSize); - int ret = fread(pRegion->buf, readBytes, 1, pMemFile->pTdFile); + int ret = taosReadFromCFile(pRegion->buf, readBytes, 1, pMemFile->pTdFile); if (ret != 1) { terrno = TAOS_SYSTEM_ERROR(errno); return terrno; @@ -1058,9 +1029,9 @@ static int32_t getRowBufFromExtMemFile(SSortHandle* pHandle, int32_t regionId, i } int32_t szThisBlock = pRegion->bufLen - (tupleOffset - pRegion->bufRegOffset); memcpy(*ppRow, pRegion->buf + tupleOffset - pRegion->bufRegOffset, szThisBlock); - tsortSeekFile(pMemFile->pTdFile, pRegion->fileOffset + pRegion->bufRegOffset + pRegion->bufLen, SEEK_SET); + taosSeekCFile(pMemFile->pTdFile, pRegion->fileOffset + pRegion->bufRegOffset + pRegion->bufLen, SEEK_SET); int32_t readBytes = TMIN(pMemFile->blockSize, pRegion->regionSize - (pRegion->bufRegOffset + pRegion->bufLen)); - int ret = fread(pRegion->buf, readBytes, 1, pMemFile->pTdFile); + int ret = taosReadFromCFile(pRegion->buf, readBytes, 1, pMemFile->pTdFile); if (ret != 1) { taosMemoryFreeClear(*ppRow); terrno = TAOS_SYSTEM_ERROR(errno); @@ -1085,13 +1056,13 @@ static int32_t createSortMemFile(SSortHandle* pHandle) { } if (code == TSDB_CODE_SUCCESS) { taosGetTmpfilePath(tsTempDir, "sort-ext-mem", pMemFile->memFilePath); - pMemFile->pTdFile = fopen(pMemFile->memFilePath, "w+"); + pMemFile->pTdFile = taosOpenCFile(pMemFile->memFilePath, "w+"); if (pMemFile->pTdFile == NULL) { code = terrno = TAOS_SYSTEM_ERROR(errno); } } if (code == TSDB_CODE_SUCCESS) { - tsortSetAutoDelFile(pMemFile->memFilePath); + taosSetAutoDelFile(pMemFile->memFilePath); pMemFile->currRegionId = -1; pMemFile->currRegionOffset = -1; @@ -1119,7 +1090,7 @@ static int32_t createSortMemFile(SSortHandle* pHandle) { if (pMemFile->aFileRegions) taosMemoryFreeClear(pMemFile->aFileRegions); if (pMemFile->writeBuf) taosMemoryFreeClear(pMemFile->writeBuf); if (pMemFile->pTdFile) { - fclose(pMemFile->pTdFile); + taosCloseCFile(pMemFile->pTdFile); pMemFile->pTdFile = NULL; } taosMemoryFreeClear(pMemFile); @@ -1142,7 +1113,7 @@ static int32_t destroySortMemFile(SSortHandle* pHandle) { taosMemoryFree(pMemFile->writeBuf); pMemFile->writeBuf = NULL; - fclose(pMemFile->pTdFile); + taosCloseCFile(pMemFile->pTdFile); pMemFile->pTdFile = NULL; taosRemoveFile(pMemFile->memFilePath); taosMemoryFree(pMemFile); @@ -1272,8 +1243,8 @@ static void initRowIdSort(SSortHandle* pHandle) { int32_t rowSize = blockDataGetRowSize(pHandle->pDataBlock); size_t nCols = taosArrayGetSize(pHandle->pDataBlock->pDataBlock); - pHandle->pageSize = getProperSortPageSize(rowSize, nCols); - pHandle->numOfPages = 2048; + pHandle->pageSize = 256 * 1024; // 256k + pHandle->numOfPages = 256; SBlockOrderInfo* pOrder = taosArrayGet(pHandle->pSortInfo, 0); SBlockOrderInfo bi = {0}; diff --git a/source/os/src/osFile.c b/source/os/src/osFile.c index e6491639dc..bdd43fe9fa 100644 --- a/source/os/src/osFile.c +++ b/source/os/src/osFile.c @@ -1404,3 +1404,35 @@ int32_t taosLinkFile(char *src, char *dst) { #endif return 0; } + +FILE* taosOpenCFile(const char* filename, const char* mode) { + return fopen(filename, mode); +} + +int taosSeekCFile(FILE* file, int64_t offset, int whence) { +#ifdef WINDOWS + return _fseeki64(file, offset, whence); +#else + return fseeko(file, offset, whence); +#endif +} + +size_t taosReadFromCFile(void *buffer, size_t size, size_t count, FILE *stream ) { + return fread(buffer, size, count, stream); +} + +size_t taosWriteToCFile(const void* ptr, size_t size, size_t nitems, FILE* stream) { + return fwrite(ptr, size, nitems, stream); +} + +int taosCloseCFile(FILE *f) { + return fclose(f); +} + +int taosSetAutoDelFile(char* path) { +#ifdef WINDOWS + return SetFileAttributes(path, FILE_ATTRIBUTE_TEMPORARY); +#else + return unlink(path); +#endif +} \ No newline at end of file