From 5c691685ba83e2b3ff572a17d37b64ef94da90cc Mon Sep 17 00:00:00 2001 From: slzhou Date: Tue, 12 Mar 2024 16:53:56 +0800 Subject: [PATCH] fix: review comments --- docs/en/12-taos-sql/06-select.md | 4 +- docs/zh/12-taos-sql/06-select.md | 5 ++- include/os/osFile.h | 7 ++++ source/libs/executor/src/scanoperator.c | 7 +--- source/libs/executor/src/tsort.c | 49 +++++-------------------- source/os/src/osFile.c | 32 ++++++++++++++++ 6 files changed, 58 insertions(+), 46 deletions(-) diff --git a/docs/en/12-taos-sql/06-select.md b/docs/en/12-taos-sql/06-select.md index a2e6bca46c..074fbfbc8d 100755 --- a/docs/en/12-taos-sql/06-select.md +++ b/docs/en/12-taos-sql/06-select.md @@ -24,7 +24,7 @@ SELECT [hints] [DISTINCT] [TAGS] select_list hints: /*+ [hint([hint_param_list])] [hint([hint_param_list])] */ hint: - BATCH_SCAN | NO_BATCH_SCAN | SORT_FOR_GROUP | PARA_TABLES_SORT + BATCH_SCAN | NO_BATCH_SCAN | SORT_FOR_GROUP | PARA_TABLES_SORT | PARTITION_FIRST | SMALLDATA_TS_SORT select_list: select_expr [, select_expr] ... @@ -94,6 +94,7 @@ The list of currently supported Hints is as follows: | SORT_FOR_GROUP| None | Use sort for partition, conflict with PARTITION_FIRST | With normal column in partition by list | | PARTITION_FIRST| None | Use Partition before aggregate, conflict with SORT_FOR_GROUP | With normal column in partition by list | | PARA_TABLES_SORT| None | When sorting the supertable rows by timestamp, No temporary disk space is used. When there are numerous tables, each with long rows, the corresponding algorithm associated with this prompt may consume a substantial amount of memory, potentially leading to an Out Of Memory (OOM) situation. | Sorting the supertable rows by timestamp | +| SMALLDATA_TS_SORT| None | When sorting the supertable rows by timestamp, if the length of query columns >= 256, and there are relatively few rows, this hint can improve performance. | Sorting the supertable rows by timestamp | For example: @@ -102,6 +103,7 @@ SELECT /*+ BATCH_SCAN() */ a.ts FROM stable1 a, stable2 b where a.tag0 = b.tag0 SELECT /*+ SORT_FOR_GROUP() */ count(*), c1 FROM stable1 PARTITION BY c1; SELECT /*+ PARTITION_FIRST() */ count(*), c1 FROM stable1 PARTITION BY c1; SELECT /*+ PARA_TABLES_SORT() */ * from stable1 order by ts; +SELECT /*+ SMALLDATA_TS_SORT() */ * from stable1 order by ts; ``` ## Lists diff --git a/docs/zh/12-taos-sql/06-select.md b/docs/zh/12-taos-sql/06-select.md index eec947ea23..573e854864 100755 --- a/docs/zh/12-taos-sql/06-select.md +++ b/docs/zh/12-taos-sql/06-select.md @@ -24,7 +24,7 @@ SELECT [hints] [DISTINCT] [TAGS] select_list hints: /*+ [hint([hint_param_list])] [hint([hint_param_list])] */ hint: - BATCH_SCAN | NO_BATCH_SCAN | SORT_FOR_GROUP | PARA_TABLES_SORT + BATCH_SCAN | NO_BATCH_SCAN | SORT_FOR_GROUP | PARTITION_FIRST | PARA_TABLES_SORT | SMALLDATA_TS_SORT select_list: select_expr [, select_expr] ... @@ -94,6 +94,8 @@ Hints 是用户控制单个语句查询优化的一种手段,当 Hint 不适 | SORT_FOR_GROUP| 无 | 采用sort方式进行分组, 与PARTITION_FIRST冲突 | partition by 列表有普通列时 | | PARTITION_FIRST| 无 | 在聚合之前使用PARTITION计算分组, 与SORT_FOR_GROUP冲突 | partition by 列表有普通列时 | | PARA_TABLES_SORT| 无 | 超级表的数据按时间戳排序时, 不使用临时磁盘空间, 只使用内存。当子表数量多, 行长比较大时候, 会使用大量内存, 可能发生OOM | 超级表的数据按时间戳排序时 | +| SMALLDATA_TS_SORT| 无 | 超级表的数据按时间戳排序时, 查询列长度大于等于256, 但是行数不多, 使用这个提示, 可以提高性能 | 超级表的数据按时间戳排序时 | + 举例: ```sql @@ -101,6 +103,7 @@ SELECT /*+ BATCH_SCAN() */ a.ts FROM stable1 a, stable2 b where a.tag0 = b.tag0 SELECT /*+ SORT_FOR_GROUP() */ count(*), c1 FROM stable1 PARTITION BY c1; SELECT /*+ PARTITION_FIRST() */ count(*), c1 FROM stable1 PARTITION BY c1; SELECT /*+ PARA_TABLES_SORT() */ * from stable1 order by ts; +SELECT /*+ SMALLDATA_TS_SORT() */ * from stable1 order by ts; ``` ## 列表 diff --git a/include/os/osFile.h b/include/os/osFile.h index eb0862a719..9c9027e931 100644 --- a/include/os/osFile.h +++ b/include/os/osFile.h @@ -119,6 +119,13 @@ int32_t taosSetFileHandlesLimit(); int32_t taosLinkFile(char *src, char *dst); +FILE* taosOpenCFile(const char* filename, const char* mode); +int taosSeekCFile(FILE* file, int64_t offset, int whence); +size_t taosReadFromCFile(void *buffer, size_t size, size_t count, FILE *stream ); +size_t taosWriteToCFile(const void* ptr, size_t size, size_t nitems, FILE* stream); +int taosCloseCFile(FILE *); +int taosSetAutoDelFile(char* path); + bool lastErrorIsFileNotExist(); #ifdef __cplusplus diff --git a/source/libs/executor/src/scanoperator.c b/source/libs/executor/src/scanoperator.c index f44f8756a4..4a910c9f79 100644 --- a/source/libs/executor/src/scanoperator.c +++ b/source/libs/executor/src/scanoperator.c @@ -4011,17 +4011,14 @@ int32_t startDurationForGroupTableMergeScan(SOperatorInfo* pOperator) { pInfo->sortBufSize = 2048 * pInfo->bufPageSize; int32_t numOfBufPage = pInfo->sortBufSize / pInfo->bufPageSize; - if (pInfo->bSortRowId && numOfTable != 1) { - pInfo->pSortHandle = tsortCreateSortHandle(pInfo->pSortInfo, SORT_BLOCK_TS_MERGE, pInfo->bufPageSize, numOfBufPage, + pInfo->pSortHandle = tsortCreateSortHandle(pInfo->pSortInfo, SORT_BLOCK_TS_MERGE, pInfo->bufPageSize, numOfBufPage, pInfo->pSortInputBlock, pTaskInfo->id.str, 0, 0, 0); + if (pInfo->bSortRowId && numOfTable != 1) { int32_t memSize = 512 * 1024 * 1024; code = tsortSetSortByRowId(pInfo->pSortHandle, memSize); if (code != TSDB_CODE_SUCCESS) { return code; } - } else { - pInfo->pSortHandle = tsortCreateSortHandle(pInfo->pSortInfo, SORT_BLOCK_TS_MERGE, pInfo->bufPageSize, numOfBufPage, - pInfo->pSortInputBlock, pTaskInfo->id.str, 0, 0, 0); } tsortSetMergeLimit(pInfo->pSortHandle, pInfo->mergeLimit); tsortSetMergeLimitReachedFp(pInfo->pSortHandle, tableMergeScanDoSkipTable, pInfo); diff --git a/source/libs/executor/src/tsort.c b/source/libs/executor/src/tsort.c index 2c8dc7fb04..15c42334e3 100644 --- a/source/libs/executor/src/tsort.c +++ b/source/libs/executor/src/tsort.c @@ -13,8 +13,6 @@ * along with this program. If not, see . */ -#define ALLOW_FORBID_FUNC - #include "query.h" #include "tcommon.h" @@ -34,17 +32,6 @@ struct STupleHandle { int32_t rowIndex; }; -typedef struct SSortMemPageEntry { - int32_t pageId; - bool active; - - void* data; - - struct SSortMemPageEntry* next; - struct SSortMemPageEntry* prev; - -} SSortMemPageEntry; - typedef struct SSortMemFileRegion { int64_t fileOffset; int32_t regionSize; @@ -68,7 +55,6 @@ typedef struct SSortMemFile { int32_t blockSize; FILE* pTdFile; - // TdFilePtr pTdFile; char memFilePath[PATH_MAX]; } SSortMemFile; @@ -240,21 +226,6 @@ void destroyTuple(void* t) { } } -int tsortSeekFile(FILE* file, int64_t offset, int whence) { -#ifdef WINDOWS - return _fseeki64(file, offset, whence); -#else - return fseeko(file, offset, whence); -#endif -} - -int tsortSetAutoDelFile(char* path) { -#ifdef WINDOWS - return SetFileAttributes(path, FILE_ATTRIBUTE_TEMPORARY); -#else - return unlink(path); -#endif -} /** * @@ -1038,9 +1009,9 @@ static int32_t getRowBufFromExtMemFile(SSortHandle* pHandle, int32_t regionId, i if (pRegion->buf == NULL) { return TSDB_CODE_OUT_OF_MEMORY; } - tsortSeekFile(pMemFile->pTdFile, pRegion->fileOffset, SEEK_SET); + taosSeekCFile(pMemFile->pTdFile, pRegion->fileOffset, SEEK_SET); int32_t readBytes = TMIN(pMemFile->blockSize, pRegion->regionSize); - int ret = fread(pRegion->buf, readBytes, 1, pMemFile->pTdFile); + int ret = taosReadFromCFile(pRegion->buf, readBytes, 1, pMemFile->pTdFile); if (ret != 1) { terrno = TAOS_SYSTEM_ERROR(errno); return terrno; @@ -1058,9 +1029,9 @@ static int32_t getRowBufFromExtMemFile(SSortHandle* pHandle, int32_t regionId, i } int32_t szThisBlock = pRegion->bufLen - (tupleOffset - pRegion->bufRegOffset); memcpy(*ppRow, pRegion->buf + tupleOffset - pRegion->bufRegOffset, szThisBlock); - tsortSeekFile(pMemFile->pTdFile, pRegion->fileOffset + pRegion->bufRegOffset + pRegion->bufLen, SEEK_SET); + taosSeekCFile(pMemFile->pTdFile, pRegion->fileOffset + pRegion->bufRegOffset + pRegion->bufLen, SEEK_SET); int32_t readBytes = TMIN(pMemFile->blockSize, pRegion->regionSize - (pRegion->bufRegOffset + pRegion->bufLen)); - int ret = fread(pRegion->buf, readBytes, 1, pMemFile->pTdFile); + int ret = taosReadFromCFile(pRegion->buf, readBytes, 1, pMemFile->pTdFile); if (ret != 1) { taosMemoryFreeClear(*ppRow); terrno = TAOS_SYSTEM_ERROR(errno); @@ -1085,13 +1056,13 @@ static int32_t createSortMemFile(SSortHandle* pHandle) { } if (code == TSDB_CODE_SUCCESS) { taosGetTmpfilePath(tsTempDir, "sort-ext-mem", pMemFile->memFilePath); - pMemFile->pTdFile = fopen(pMemFile->memFilePath, "w+"); + pMemFile->pTdFile = taosOpenCFile(pMemFile->memFilePath, "w+"); if (pMemFile->pTdFile == NULL) { code = terrno = TAOS_SYSTEM_ERROR(errno); } } if (code == TSDB_CODE_SUCCESS) { - tsortSetAutoDelFile(pMemFile->memFilePath); + taosSetAutoDelFile(pMemFile->memFilePath); pMemFile->currRegionId = -1; pMemFile->currRegionOffset = -1; @@ -1119,7 +1090,7 @@ static int32_t createSortMemFile(SSortHandle* pHandle) { if (pMemFile->aFileRegions) taosMemoryFreeClear(pMemFile->aFileRegions); if (pMemFile->writeBuf) taosMemoryFreeClear(pMemFile->writeBuf); if (pMemFile->pTdFile) { - fclose(pMemFile->pTdFile); + taosCloseCFile(pMemFile->pTdFile); pMemFile->pTdFile = NULL; } taosMemoryFreeClear(pMemFile); @@ -1142,7 +1113,7 @@ static int32_t destroySortMemFile(SSortHandle* pHandle) { taosMemoryFree(pMemFile->writeBuf); pMemFile->writeBuf = NULL; - fclose(pMemFile->pTdFile); + taosCloseCFile(pMemFile->pTdFile); pMemFile->pTdFile = NULL; taosRemoveFile(pMemFile->memFilePath); taosMemoryFree(pMemFile); @@ -1272,8 +1243,8 @@ static void initRowIdSort(SSortHandle* pHandle) { int32_t rowSize = blockDataGetRowSize(pHandle->pDataBlock); size_t nCols = taosArrayGetSize(pHandle->pDataBlock->pDataBlock); - pHandle->pageSize = getProperSortPageSize(rowSize, nCols); - pHandle->numOfPages = 2048; + pHandle->pageSize = 256 * 1024; // 256k + pHandle->numOfPages = 256; SBlockOrderInfo* pOrder = taosArrayGet(pHandle->pSortInfo, 0); SBlockOrderInfo bi = {0}; diff --git a/source/os/src/osFile.c b/source/os/src/osFile.c index e6491639dc..bdd43fe9fa 100644 --- a/source/os/src/osFile.c +++ b/source/os/src/osFile.c @@ -1404,3 +1404,35 @@ int32_t taosLinkFile(char *src, char *dst) { #endif return 0; } + +FILE* taosOpenCFile(const char* filename, const char* mode) { + return fopen(filename, mode); +} + +int taosSeekCFile(FILE* file, int64_t offset, int whence) { +#ifdef WINDOWS + return _fseeki64(file, offset, whence); +#else + return fseeko(file, offset, whence); +#endif +} + +size_t taosReadFromCFile(void *buffer, size_t size, size_t count, FILE *stream ) { + return fread(buffer, size, count, stream); +} + +size_t taosWriteToCFile(const void* ptr, size_t size, size_t nitems, FILE* stream) { + return fwrite(ptr, size, nitems, stream); +} + +int taosCloseCFile(FILE *f) { + return fclose(f); +} + +int taosSetAutoDelFile(char* path) { +#ifdef WINDOWS + return SetFileAttributes(path, FILE_ATTRIBUTE_TEMPORARY); +#else + return unlink(path); +#endif +} \ No newline at end of file