From b4fce76b27167489dea1c71a5198685848e2db26 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Tue, 21 Jul 2020 00:04:07 +0800 Subject: [PATCH 01/42] [td-225] fix bugs in query. and refactor some codes. --- src/client/inc/tsclient.h | 2 +- src/client/src/tscSystem.c | 2 +- src/query/inc/qExecutor.h | 2 +- src/query/inc/qResultbuf.h | 19 ++- src/query/inc/{qsqlparser.h => qSqlparser.h} | 0 src/query/src/qAst.c | 2 +- src/query/src/qExecutor.c | 14 +- src/query/src/qParserImpl.c | 2 +- src/query/src/qResultbuf.c | 28 ++-- src/query/src/sql.c | 2 +- src/tsdb/src/tsdbRead.c | 162 ++++++++++--------- src/util/src/tcache.c | 1 + 12 files changed, 127 insertions(+), 109 deletions(-) rename src/query/inc/{qsqlparser.h => qSqlparser.h} (100%) diff --git a/src/client/inc/tsclient.h b/src/client/inc/tsclient.h index 17840df4a4..7efcd54cfd 100644 --- a/src/client/inc/tsclient.h +++ b/src/client/inc/tsclient.h @@ -31,8 +31,8 @@ extern "C" { #include "tutil.h" #include "qExecutor.h" +#include "qSqlparser.h" #include "qTsbuf.h" -#include "qsqlparser.h" #include "tcmdtype.h" // forward declaration diff --git a/src/client/src/tscSystem.c b/src/client/src/tscSystem.c index 5662b4a885..42bf27c45a 100644 --- a/src/client/src/tscSystem.c +++ b/src/client/src/tscSystem.c @@ -148,7 +148,7 @@ void taos_init_imp() { refreshTime = refreshTime < 10 ? 10 : refreshTime; if (tscCacheHandle == NULL) { - tscCacheHandle = taosCacheInit(TSDB_DATA_TYPE_BINARY, refreshTime, false, NULL, "client"); + tscCacheHandle = taosCacheInit(TSDB_DATA_TYPE_BINARY, refreshTime, false, NULL, "tableMeta"); } tscDebug("client is initialized successfully"); diff --git a/src/query/inc/qExecutor.h b/src/query/inc/qExecutor.h index 127c38a6f8..92fe078c3f 100644 --- a/src/query/inc/qExecutor.h +++ b/src/query/inc/qExecutor.h @@ -20,8 +20,8 @@ #include "hash.h" #include "qFill.h" #include "qResultbuf.h" +#include "qSqlparser.h" #include "qTsbuf.h" -#include "qsqlparser.h" #include "query.h" #include "taosdef.h" #include "tarray.h" diff --git a/src/query/inc/qResultbuf.h b/src/query/inc/qResultbuf.h index 8c8afb0957..d9da6bb63e 100644 --- a/src/query/inc/qResultbuf.h +++ b/src/query/inc/qResultbuf.h @@ -13,8 +13,8 @@ * along with this program. If not, see . */ -#ifndef TDENGINE_VNODEQUERYUTIL_H -#define TDENGINE_VNODEQUERYUTIL_H +#ifndef TDENGINE_QRESULTBUF_H +#define TDENGINE_QRESULTBUF_H #ifdef __cplusplus extern "C" { @@ -26,11 +26,18 @@ extern "C" { typedef struct SArray* SIDList; +typedef struct SPageInfo { + int32_t pageId; + int32_t offset; + int32_t lengthOnDisk; +} SPageInfo; + typedef struct SDiskbasedResultBuf { int32_t numOfRowsPerPage; int32_t numOfPages; int64_t totalBufSize; - int32_t fd; // data file fd + FILE* file; +// int32_t fd; // data file fd int32_t allocateId; // allocated page id int32_t incStep; // minimum allocated pages void* pBuf; // mmap buffer pointer @@ -43,6 +50,8 @@ typedef struct SDiskbasedResultBuf { void* iBuf; // inmemory buf void* handle; // for debug purpose void* emptyDummyIdList; // dummy id list + bool comp; + } SDiskbasedResultBuf; #define DEFAULT_INTERN_BUF_PAGE_SIZE (1024L) @@ -56,7 +65,7 @@ typedef struct SDiskbasedResultBuf { * @return */ int32_t createDiskbasedResultBuffer(SDiskbasedResultBuf** pResultBuf, int32_t numOfPages, int32_t rowSize, int32_t pagesize, - int32_t inMemPages, void* handle); + int32_t inMemPages, const void* handle); /** * @@ -126,4 +135,4 @@ int32_t getLastPageId(SIDList pList); } #endif -#endif // TDENGINE_VNODEQUERYUTIL_H +#endif // TDENGINE_QRESULTBUF_H diff --git a/src/query/inc/qsqlparser.h b/src/query/inc/qSqlparser.h similarity index 100% rename from src/query/inc/qsqlparser.h rename to src/query/inc/qSqlparser.h diff --git a/src/query/src/qAst.c b/src/query/src/qAst.c index c2578c15c0..e3c0c1dbb0 100644 --- a/src/query/src/qAst.c +++ b/src/query/src/qAst.c @@ -18,8 +18,8 @@ #include "exception.h" #include "qAst.h" +#include "qSqlparser.h" #include "qSyntaxtreefunction.h" -#include "qsqlparser.h" #include "taosdef.h" #include "taosmsg.h" #include "tarray.h" diff --git a/src/query/src/qExecutor.c b/src/query/src/qExecutor.c index 906d0cfe67..30d6cc288f 100644 --- a/src/query/src/qExecutor.c +++ b/src/query/src/qExecutor.c @@ -6617,14 +6617,16 @@ void* qOpenQueryMgmt(int32_t vgId) { char cacheName[128] = {0}; sprintf(cacheName, "qhandle_%d", vgId); - SQueryMgmt* pQueryHandle = calloc(1, sizeof(SQueryMgmt)); + SQueryMgmt* pQueryMgmt = calloc(1, sizeof(SQueryMgmt)); - pQueryHandle->qinfoPool = taosCacheInit(TSDB_DATA_TYPE_BIGINT, REFRESH_HANDLE_INTERVAL, true, freeqinfoFn, cacheName); - pQueryHandle->closed = false; - pthread_mutex_init(&pQueryHandle->lock, NULL); + pQueryMgmt->qinfoPool = taosCacheInit(TSDB_DATA_TYPE_BIGINT, REFRESH_HANDLE_INTERVAL, true, freeqinfoFn, cacheName); + pQueryMgmt->closed = false; + pQueryMgmt->vgId = vgId; + + pthread_mutex_init(&pQueryMgmt->lock, NULL); qDebug("vgId:%d, open querymgmt success", vgId); - return pQueryHandle; + return pQueryMgmt; } static void queryMgmtKillQueryFn(void* handle) { @@ -6664,7 +6666,7 @@ void qCleanupQueryMgmt(void* pQMgmt) { pthread_mutex_destroy(&pQueryMgmt->lock); tfree(pQueryMgmt); - qDebug("vgId:%d querymgmt cleanup completed", vgId); + qDebug("vgId:%d queryMgmt cleanup completed", vgId); } void** qRegisterQInfo(void* pMgmt, uint64_t qInfo) { diff --git a/src/query/src/qParserImpl.c b/src/query/src/qParserImpl.c index ecc11f8f4d..1e58dbbe0b 100644 --- a/src/query/src/qParserImpl.c +++ b/src/query/src/qParserImpl.c @@ -14,7 +14,7 @@ */ #include "os.h" -#include "qsqlparser.h" +#include "qSqlparser.h" #include "queryLog.h" #include "taosdef.h" #include "taosmsg.h" diff --git a/src/query/src/qResultbuf.c b/src/query/src/qResultbuf.c index de59676e59..2443381194 100644 --- a/src/query/src/qResultbuf.c +++ b/src/query/src/qResultbuf.c @@ -5,7 +5,7 @@ #include "taoserror.h" int32_t createDiskbasedResultBuffer(SDiskbasedResultBuf** pResultBuf, int32_t numOfPages, int32_t rowSize, - int32_t pagesize, int32_t inMemPages, void* handle) { + int32_t pagesize, int32_t inMemPages, const void* handle) { *pResultBuf = calloc(1, sizeof(SDiskbasedResultBuf)); SDiskbasedResultBuf* pResBuf = *pResultBuf; @@ -24,6 +24,7 @@ int32_t createDiskbasedResultBuffer(SDiskbasedResultBuf** pResultBuf, int32_t nu pResBuf->incStep = 4; pResBuf->allocateId = -1; + // todo opt perf by on demand create in memory buffer pResBuf->iBuf = calloc(pResBuf->inMemPages, pResBuf->pageSize); // init id hash table @@ -31,10 +32,10 @@ int32_t createDiskbasedResultBuffer(SDiskbasedResultBuf** pResultBuf, int32_t nu pResBuf->list = taosArrayInit(numOfPages, POINTER_BYTES); char path[PATH_MAX] = {0}; - getTmpfilePath("tsdb_qbuf", path); + getTmpfilePath("qbuf", path); pResBuf->path = strdup(path); - pResBuf->fd = FD_INITIALIZER; + pResBuf->file = NULL; pResBuf->pBuf = NULL; pResBuf->emptyDummyIdList = taosArrayInit(1, sizeof(int32_t)); @@ -52,8 +53,9 @@ int32_t getResBufSize(SDiskbasedResultBuf* pResultBuf) { return pResultBuf->tota #define FILE_SIZE_ON_DISK(_r) (NUM_OF_PAGES_ON_DISK(_r) * (_r)->pageSize) static int32_t createDiskResidesBuf(SDiskbasedResultBuf* pResultBuf) { - pResultBuf->fd = open(pResultBuf->path, O_CREAT | O_RDWR, 0666); - if (!FD_VALID(pResultBuf->fd)) { +// pResultBuf->fd = open(pResultBuf->path, O_CREAT | O_RDWR, 0666); + pResultBuf->file = fopen(pResultBuf->path, "r+"); + if (pResultBuf->file == NULL) { qError("failed to create tmp file: %s on disk. %s", pResultBuf->path, strerror(errno)); return TAOS_SYSTEM_ERROR(errno); } @@ -61,13 +63,15 @@ static int32_t createDiskResidesBuf(SDiskbasedResultBuf* pResultBuf) { assert(pResultBuf->numOfPages == pResultBuf->inMemPages); pResultBuf->numOfPages += pResultBuf->incStep; - int32_t ret = ftruncate(pResultBuf->fd, NUM_OF_PAGES_ON_DISK(pResultBuf) * pResultBuf->pageSize); + int32_t ret = ftruncate(fileno(pResultBuf->file), NUM_OF_PAGES_ON_DISK(pResultBuf) * pResultBuf->pageSize); if (ret != TSDB_CODE_SUCCESS) { qError("failed to create tmp file: %s on disk. %s", pResultBuf->path, strerror(errno)); return TAOS_SYSTEM_ERROR(errno); } - pResultBuf->pBuf = mmap(NULL, FILE_SIZE_ON_DISK(pResultBuf), PROT_READ | PROT_WRITE, MAP_SHARED, pResultBuf->fd, 0); + pResultBuf->pBuf = mmap(NULL, FILE_SIZE_ON_DISK(pResultBuf), PROT_READ | PROT_WRITE, MAP_SHARED, + fileno(pResultBuf->file), 0); + if (pResultBuf->pBuf == MAP_FAILED) { qError("QInfo:%p failed to map temp file: %s. %s", pResultBuf->handle, pResultBuf->path, strerror(errno)); return TAOS_SYSTEM_ERROR(errno); @@ -82,7 +86,7 @@ static int32_t extendDiskFileSize(SDiskbasedResultBuf* pResultBuf, int32_t incNu int32_t ret = TSDB_CODE_SUCCESS; if (pResultBuf->pBuf == NULL) { - assert(pResultBuf->fd == FD_INITIALIZER); + assert(pResultBuf->file == NULL); if ((ret = createDiskResidesBuf(pResultBuf)) != TSDB_CODE_SUCCESS) { return ret; @@ -95,7 +99,7 @@ static int32_t extendDiskFileSize(SDiskbasedResultBuf* pResultBuf, int32_t incNu * disk-based output buffer is exhausted, try to extend the disk-based buffer, the available disk space may * be insufficient */ - ret = ftruncate(pResultBuf->fd, NUM_OF_PAGES_ON_DISK(pResultBuf) * pResultBuf->pageSize); + ret = ftruncate(fileno(pResultBuf->file), NUM_OF_PAGES_ON_DISK(pResultBuf) * pResultBuf->pageSize); if (ret != TSDB_CODE_SUCCESS) { // dError("QInfo:%p failed to create intermediate result output file:%s. %s", pQInfo, pSupporter->extBufFile, // strerror(errno)); @@ -103,7 +107,7 @@ static int32_t extendDiskFileSize(SDiskbasedResultBuf* pResultBuf, int32_t incNu } pResultBuf->totalBufSize = pResultBuf->numOfPages * pResultBuf->pageSize; - pResultBuf->pBuf = mmap(NULL, FILE_SIZE_ON_DISK(pResultBuf), PROT_READ | PROT_WRITE, MAP_SHARED, pResultBuf->fd, 0); + pResultBuf->pBuf = mmap(NULL, FILE_SIZE_ON_DISK(pResultBuf), PROT_READ | PROT_WRITE, MAP_SHARED, fileno(pResultBuf->file), 0); if (pResultBuf->pBuf == MAP_FAILED) { // dError("QInfo:%p failed to map temp file: %s. %s", pQInfo, pSupporter->extBufFile, strerror(errno)); @@ -185,11 +189,11 @@ void destroyResultBuf(SDiskbasedResultBuf* pResultBuf, void* handle) { return; } - if (FD_VALID(pResultBuf->fd)) { + if (pResultBuf->file != NULL) { qDebug("QInfo:%p disk-based output buffer closed, total:%" PRId64 " bytes, file created:%s, file size:%d", handle, pResultBuf->totalBufSize, pResultBuf->path, FILE_SIZE_ON_DISK(pResultBuf)); - close(pResultBuf->fd); + fclose(pResultBuf->file); munmap(pResultBuf->pBuf, FILE_SIZE_ON_DISK(pResultBuf)); pResultBuf->pBuf = NULL; } else { diff --git a/src/query/src/sql.c b/src/query/src/sql.c index ac9952bb97..307d5203b3 100644 --- a/src/query/src/sql.c +++ b/src/query/src/sql.c @@ -30,7 +30,7 @@ #include #include #include -#include "qsqlparser.h" +#include "qSqlparser.h" #include "tcmdtype.h" #include "tstoken.h" #include "ttokendef.h" diff --git a/src/tsdb/src/tsdbRead.c b/src/tsdb/src/tsdbRead.c index 37784577c4..3eeca85db7 100644 --- a/src/tsdb/src/tsdbRead.c +++ b/src/tsdb/src/tsdbRead.c @@ -654,8 +654,9 @@ static bool doLoadFileDataBlock(STsdbQueryHandle* pQueryHandle, SCompBlock* pBlo int64_t elapsedTime = (taosGetTimestampUs() - st); pQueryHandle->cost.blockLoadTime += elapsedTime; - tsdbDebug("%p load file block into buffer, elapsed time:%"PRId64 " us", pQueryHandle, elapsedTime); + tsdbDebug("%p load file block into buffer, brange:%"PRId64"-%"PRId64" , rows:%d, elapsed time:%"PRId64 " us", + pQueryHandle, pBlock->keyFirst, pBlock->keyLast, pBlock->numOfRows, elapsedTime); return blockLoaded; } @@ -971,6 +972,52 @@ static void copyOneRowFromMem(STsdbQueryHandle* pQueryHandle, int32_t capacity, } } +static void moveDataToFront(STsdbQueryHandle* pQueryHandle, int32_t numOfRows, int32_t numOfCols) { + if (ASCENDING_TRAVERSE(pQueryHandle->order)) { + return; + } + + // if the buffer is not full in case of descending order query, move the data in the front of the buffer + if (numOfRows < pQueryHandle->outputCapacity) { + int32_t emptySize = pQueryHandle->outputCapacity - numOfRows; + for(int32_t i = 0; i < numOfCols; ++i) { + SColumnInfoData* pColInfo = taosArrayGet(pQueryHandle->pColumns, i); + memmove(pColInfo->pData, pColInfo->pData + emptySize * pColInfo->info.bytes, numOfRows * pColInfo->info.bytes); + } + } +} + +static void getQualifiedRowsPos(STsdbQueryHandle* pQueryHandle, int32_t startPos, int32_t endPos, + int32_t numOfExisted, int32_t *start, int32_t *end) { + *start = -1; + + if (ASCENDING_TRAVERSE(pQueryHandle->order)) { + int32_t remain = endPos - startPos + 1; + if (remain + numOfExisted > pQueryHandle->outputCapacity) { + *end = (pQueryHandle->outputCapacity - numOfExisted) + startPos - 1; + } + + *start = startPos; + } else { + int32_t remain = (startPos - endPos) + 1; + if (remain + numOfExisted > pQueryHandle->outputCapacity) { + *end = startPos + 1 - (pQueryHandle->outputCapacity - numOfExisted); + } + + *start = *end; + *end = startPos; + } +} + +static void updateInfoAfterMerge(STsdbQueryHandle* pQueryHandle, STableCheckInfo* pCheckInfo, int32_t numOfRows, int32_t endPos) { + SQueryFilePos* cur = &pQueryHandle->cur; + + pCheckInfo->lastKey = cur->lastKey; + pQueryHandle->realNumOfRows = numOfRows; + cur->rows = numOfRows; + cur->pos = endPos; +} + // only return the qualified data to client in terms of query time window, data rows in the same block but do not // be included in the query time window will be discarded static void doMergeTwoLevelData(STsdbQueryHandle* pQueryHandle, STableCheckInfo* pCheckInfo, SCompBlock* pBlock) { @@ -978,7 +1025,10 @@ static void doMergeTwoLevelData(STsdbQueryHandle* pQueryHandle, STableCheckInfo* SDataBlockInfo blockInfo = GET_FILE_DATA_BLOCK_INFO(pCheckInfo, pBlock); initTableMemIterator(pQueryHandle, pCheckInfo); + SDataCols* pCols = pQueryHandle->rhelper.pDataCols[0]; + assert(pCols->cols[0].type == TSDB_DATA_TYPE_TIMESTAMP && pCols->cols[0].colId == PRIMARYKEY_TIMESTAMP_COL_INDEX); + TSKEY* tsArray = pCols->cols[0].pData; // for search the endPos, so the order needs to reverse int32_t order = (pQueryHandle->order == TSDB_ORDER_ASC)? TSDB_ORDER_DESC:TSDB_ORDER_ASC; @@ -1004,9 +1054,6 @@ static void doMergeTwoLevelData(STsdbQueryHandle* pQueryHandle, STableCheckInfo* // compared with the data from in-memory buffer, to generate the correct timestamp array list int32_t pos = cur->pos; - assert(pCols->cols[0].type == TSDB_DATA_TYPE_TIMESTAMP && pCols->cols[0].colId == 0); - TSKEY* tsArray = pCols->cols[0].pData; - int32_t numOfRows = 0; pQueryHandle->cur.win = TSWINDOW_INITIALIZER; @@ -1014,34 +1061,22 @@ static void doMergeTwoLevelData(STsdbQueryHandle* pQueryHandle, STableCheckInfo* if (pCheckInfo->iiter == NULL && pCheckInfo->iter == NULL) { int32_t start = cur->pos; int32_t end = endPos; - if (!ASCENDING_TRAVERSE(pQueryHandle->order)) { - end = cur->pos; - start = endPos; - } - - cur->win.skey = tsArray[start]; - cur->win.ekey = tsArray[end]; - - // todo opt in case of no data in buffer - numOfRows = copyDataFromFileBlock(pQueryHandle, pQueryHandle->outputCapacity, numOfRows, start, end); - - // if the buffer is not full in case of descending order query, move the data in the front of the buffer - if (!ASCENDING_TRAVERSE(pQueryHandle->order) && numOfRows < pQueryHandle->outputCapacity) { - int32_t emptySize = pQueryHandle->outputCapacity - numOfRows; - for(int32_t i = 0; i < numOfCols; ++i) { - SColumnInfoData* pColInfo = taosArrayGet(pQueryHandle->pColumns, i); - memmove(pColInfo->pData, pColInfo->pData + emptySize * pColInfo->info.bytes, numOfRows * pColInfo->info.bytes); - } + if (!ASCENDING_TRAVERSE(pQueryHandle->order)) { + SWAP(start, end, int32_t); } - + + numOfRows = copyDataFromFileBlock(pQueryHandle, pQueryHandle->outputCapacity, numOfRows, start, end); + cur->win = (STimeWindow) {.skey = tsArray[start], .ekey = tsArray[end]}; + pos += (end - start + 1) * step; cur->blockCompleted = (((pos >= endPos || cur->lastKey > pQueryHandle->window.ekey) && ASCENDING_TRAVERSE(pQueryHandle->order)) || ((pos <= endPos || cur->lastKey < pQueryHandle->window.ekey) && !ASCENDING_TRAVERSE(pQueryHandle->order))); - - pCheckInfo->lastKey = cur->lastKey; - pQueryHandle->realNumOfRows = numOfRows; - cur->rows = numOfRows; + + // if the buffer is not full in case of descending order query, move the data in the front of the buffer + moveDataToFront(pQueryHandle, numOfRows, numOfCols); + updateInfoAfterMerge(pQueryHandle, pCheckInfo, numOfRows, pos); + return; } else if (pCheckInfo->iter != NULL || pCheckInfo->iiter != NULL) { SSkipListNode* node = NULL; @@ -1087,27 +1122,15 @@ static void doMergeTwoLevelData(STsdbQueryHandle* pQueryHandle, STableCheckInfo* if (tsArray[end] == key) { // the value of key in cache equals to the end timestamp value, ignore it moveToNextRowInMem(pCheckInfo); } - - int32_t start = -1; - if (ASCENDING_TRAVERSE(pQueryHandle->order)) { - int32_t remain = end - pos + 1; - if (remain + numOfRows > pQueryHandle->outputCapacity) { - end = (pQueryHandle->outputCapacity - numOfRows) + pos - 1; - } - start = pos; - } else { - int32_t remain = (pos - end) + 1; - if (remain + numOfRows > pQueryHandle->outputCapacity) { - end = pos + 1 - (pQueryHandle->outputCapacity - numOfRows); - } + int32_t qstart = 0, qend = 0; + getQualifiedRowsPos(pQueryHandle, pos, end, numOfRows, &qstart, &qend); - start = end; - end = pos; - } + numOfRows = copyDataFromFileBlock(pQueryHandle, pQueryHandle->outputCapacity, numOfRows, qstart, qend); + pos += (qend - qstart + 1) * step; - numOfRows = copyDataFromFileBlock(pQueryHandle, pQueryHandle->outputCapacity, numOfRows, start, end); - pos += (end - start + 1) * step; + cur->win.ekey = tsArray[end]; + cur->lastKey = cur->win.ekey + step; } } while (numOfRows < pQueryHandle->outputCapacity); @@ -1124,30 +1147,14 @@ static void doMergeTwoLevelData(STsdbQueryHandle* pQueryHandle, STableCheckInfo* cur->win.skey = tsArray[pos]; } - int32_t start = -1; - int32_t end = -1; - - // all remain data are qualified, but check the remain capacity in the first place. - if (ASCENDING_TRAVERSE(pQueryHandle->order)) { - int32_t remain = endPos - pos + 1; - if (remain + numOfRows > pQueryHandle->outputCapacity) { - endPos = (pQueryHandle->outputCapacity - numOfRows) + pos - 1; - } - - start = pos; - end = endPos; - } else { - int32_t remain = pos + 1; - if (remain + numOfRows > pQueryHandle->outputCapacity) { - endPos = pos + 1 - (pQueryHandle->outputCapacity - numOfRows); - } - - start = endPos; - end = pos; - } + int32_t start = -1, end = -1; + getQualifiedRowsPos(pQueryHandle, pos, endPos, numOfRows, &start, &end); numOfRows = copyDataFromFileBlock(pQueryHandle, pQueryHandle->outputCapacity, numOfRows, start, end); pos += (end - start + 1) * step; + + cur->win.ekey = tsArray[end]; + cur->lastKey = cur->win.ekey + step; } } } @@ -1157,21 +1164,16 @@ static void doMergeTwoLevelData(STsdbQueryHandle* pQueryHandle, STableCheckInfo* if (!ASCENDING_TRAVERSE(pQueryHandle->order)) { SWAP(cur->win.skey, cur->win.ekey, TSKEY); - - // if the buffer is not full in case of descending order query, move the data in the front of the buffer - if (numOfRows < pQueryHandle->outputCapacity) { - int32_t emptySize = pQueryHandle->outputCapacity - numOfRows; - for(int32_t i = 0; i < numOfCols; ++i) { - SColumnInfoData* pColInfo = taosArrayGet(pQueryHandle->pColumns, i); - memmove(pColInfo->pData, pColInfo->pData + emptySize * pColInfo->info.bytes, numOfRows * pColInfo->info.bytes); - } - } } - - pCheckInfo->lastKey = cur->lastKey; - pQueryHandle->realNumOfRows = numOfRows; - cur->rows = numOfRows; - cur->pos = pos; + + moveDataToFront(pQueryHandle, numOfRows, numOfCols); + updateInfoAfterMerge(pQueryHandle, pCheckInfo, numOfRows, pos); + + if (ASCENDING_TRAVERSE(pQueryHandle->order)) { + assert(cur->win.skey >= pQueryHandle->window.skey && cur->win.ekey <= pQueryHandle->window.ekey); + } else { + assert(cur->win.skey >= pQueryHandle->window.ekey && cur->win.ekey <= pQueryHandle->window.skey); + } tsdbDebug("%p uid:%" PRIu64",tid:%d data block created, brange:%"PRIu64"-%"PRIu64" rows:%d, %p", pQueryHandle, pCheckInfo->tableId.uid, pCheckInfo->tableId.tid, cur->win.skey, cur->win.ekey, cur->rows, pQueryHandle->qinfo); diff --git a/src/util/src/tcache.c b/src/util/src/tcache.c index 92d4b2caac..89199f035d 100644 --- a/src/util/src/tcache.c +++ b/src/util/src/tcache.c @@ -674,6 +674,7 @@ void* taosCacheTimedRefresh(void *handle) { // check if current cache object will be deleted every 500ms. if (pCacheObj->deleting) { + uDebug("%s refresh threads quit", pCacheObj->name); break; } From 431945fa2def3ed582d35a51846136cf9516df55 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Tue, 21 Jul 2020 00:37:12 +0800 Subject: [PATCH 02/42] [td-225] fix bugs in query. --- src/query/inc/qResultbuf.h | 1 - src/tsdb/src/tsdbRead.c | 4 ++++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/src/query/inc/qResultbuf.h b/src/query/inc/qResultbuf.h index d9da6bb63e..e8b3af7ccf 100644 --- a/src/query/inc/qResultbuf.h +++ b/src/query/inc/qResultbuf.h @@ -37,7 +37,6 @@ typedef struct SDiskbasedResultBuf { int32_t numOfPages; int64_t totalBufSize; FILE* file; -// int32_t fd; // data file fd int32_t allocateId; // allocated page id int32_t incStep; // minimum allocated pages void* pBuf; // mmap buffer pointer diff --git a/src/tsdb/src/tsdbRead.c b/src/tsdb/src/tsdbRead.c index 3eeca85db7..71d3fd9cc4 100644 --- a/src/tsdb/src/tsdbRead.c +++ b/src/tsdb/src/tsdbRead.c @@ -995,6 +995,8 @@ static void getQualifiedRowsPos(STsdbQueryHandle* pQueryHandle, int32_t startPos int32_t remain = endPos - startPos + 1; if (remain + numOfExisted > pQueryHandle->outputCapacity) { *end = (pQueryHandle->outputCapacity - numOfExisted) + startPos - 1; + } else { + *end = endPos; } *start = startPos; @@ -1002,6 +1004,8 @@ static void getQualifiedRowsPos(STsdbQueryHandle* pQueryHandle, int32_t startPos int32_t remain = (startPos - endPos) + 1; if (remain + numOfExisted > pQueryHandle->outputCapacity) { *end = startPos + 1 - (pQueryHandle->outputCapacity - numOfExisted); + } else { + *end = endPos; } *start = *end; From ae65941ce8945f0c21c550260c43292f9e5449be Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Tue, 21 Jul 2020 11:33:31 +0800 Subject: [PATCH 03/42] [td-225] add check --- src/tsdb/src/tsdbRead.c | 344 ++++++++++++++++++++-------------------- 1 file changed, 175 insertions(+), 169 deletions(-) diff --git a/src/tsdb/src/tsdbRead.c b/src/tsdb/src/tsdbRead.c index 71d3fd9cc4..1d15860912 100644 --- a/src/tsdb/src/tsdbRead.c +++ b/src/tsdb/src/tsdbRead.c @@ -198,38 +198,38 @@ TsdbQueryHandleT* tsdbQueryTables(TSDB_REPO_T* tsdb, STsdbQueryCond* pCond, STab // allocate buffer in order to load data blocks from file int32_t numOfCols = pCond->numOfCols; - + pQueryHandle->statis = calloc(numOfCols, sizeof(SDataStatis)); pQueryHandle->pColumns = taosArrayInit(numOfCols, sizeof(SColumnInfoData)); // todo: use list instead of array? - + for (int32_t i = 0; i < numOfCols; ++i) { SColumnInfoData colInfo = {{0}, 0}; - + colInfo.info = pCond->colList[i]; colInfo.pData = calloc(1, EXTRA_BYTES + pQueryHandle->outputCapacity * pCond->colList[i].bytes); taosArrayPush(pQueryHandle->pColumns, &colInfo); pQueryHandle->statis[i].colId = colInfo.info.colId; } - + pQueryHandle->pTableCheckInfo = taosArrayInit(groupList->numOfTables, sizeof(STableCheckInfo)); STsdbMeta* pMeta = tsdbGetMeta(tsdb); assert(pMeta != NULL); - + for (int32_t i = 0; i < sizeOfGroup; ++i) { SArray* group = *(SArray**) taosArrayGet(groupList->pGroupList, i); - + size_t gsize = taosArrayGetSize(group); assert(gsize > 0); - + for (int32_t j = 0; j < gsize; ++j) { STable* pTable = (STable*) taosArrayGetP(group, j); - + STableCheckInfo info = { .lastKey = pQueryHandle->window.skey, .tableId = pTable->tableId, .pTableObj = pTable, }; - + assert(info.pTableObj != NULL && (info.pTableObj->type == TSDB_NORMAL_TABLE || info.pTableObj->type == TSDB_CHILD_TABLE || info.pTableObj->type == TSDB_STREAM_TABLE)); @@ -259,17 +259,17 @@ TsdbQueryHandleT tsdbQueryLastRow(TSDB_REPO_T *tsdb, STsdbQueryCond *pCond, STab SArray* tsdbGetQueriedTableList(TsdbQueryHandleT *pHandle) { assert(pHandle != NULL); - + STsdbQueryHandle *pQueryHandle = (STsdbQueryHandle*) pHandle; - + size_t size = taosArrayGetSize(pQueryHandle->pTableCheckInfo); SArray* res = taosArrayInit(size, POINTER_BYTES); - + for(int32_t i = 0; i < size; ++i) { STableCheckInfo* pCheckInfo = taosArrayGet(pQueryHandle->pTableCheckInfo, i); taosArrayPush(res, &pCheckInfo->pTableObj); } - + return res; } @@ -285,11 +285,11 @@ TsdbQueryHandleT tsdbQueryRowsInExternalWindow(TSDB_REPO_T *tsdb, STsdbQueryCond static bool initTableMemIterator(STsdbQueryHandle* pHandle, STableCheckInfo* pCheckInfo) { STable* pTable = pCheckInfo->pTableObj; assert(pTable != NULL); - + if (pCheckInfo->initBuf) { return true; } - + pCheckInfo->initBuf = true; int32_t order = pHandle->order; @@ -297,34 +297,34 @@ static bool initTableMemIterator(STsdbQueryHandle* pHandle, STableCheckInfo* pCh if (pHandle->mem == NULL && pHandle->imem == NULL) { return false; } - + assert(pCheckInfo->iter == NULL && pCheckInfo->iiter == NULL); - + if (pHandle->mem && pHandle->mem->tData[pCheckInfo->tableId.tid] != NULL) { pCheckInfo->iter = tSkipListCreateIterFromVal(pHandle->mem->tData[pCheckInfo->tableId.tid]->pData, (const char*) &pCheckInfo->lastKey, TSDB_DATA_TYPE_TIMESTAMP, order); } - + if (pHandle->imem && pHandle->imem->tData[pCheckInfo->tableId.tid] != NULL) { pCheckInfo->iiter = tSkipListCreateIterFromVal(pHandle->imem->tData[pCheckInfo->tableId.tid]->pData, (const char*) &pCheckInfo->lastKey, TSDB_DATA_TYPE_TIMESTAMP, order); } - + // both iterators are NULL, no data in buffer right now if (pCheckInfo->iter == NULL && pCheckInfo->iiter == NULL) { return false; } - + bool memEmpty = (pCheckInfo->iter == NULL) || (pCheckInfo->iter != NULL && !tSkipListIterNext(pCheckInfo->iter)); bool imemEmpty = (pCheckInfo->iiter == NULL) || (pCheckInfo->iiter != NULL && !tSkipListIterNext(pCheckInfo->iiter)); if (memEmpty && imemEmpty) { // buffer is empty return false; } - + if (!memEmpty) { SSkipListNode* node = tSkipListIterGet(pCheckInfo->iter); assert(node != NULL); - + SDataRow row = SL_GET_NODE_DATA(node); TSKEY key = dataRowKey(row); // first timestamp in buffer tsdbDebug("%p uid:%" PRId64", tid:%d check data in mem from skey:%" PRId64 ", order:%d, %p", pHandle, @@ -333,11 +333,11 @@ static bool initTableMemIterator(STsdbQueryHandle* pHandle, STableCheckInfo* pCh tsdbDebug("%p uid:%"PRId64", tid:%d no data in mem, %p", pHandle, pCheckInfo->tableId.uid, pCheckInfo->tableId.tid, pHandle->qinfo); } - + if (!imemEmpty) { SSkipListNode* node = tSkipListIterGet(pCheckInfo->iiter); assert(node != NULL); - + SDataRow row = SL_GET_NODE_DATA(node); TSKEY key = dataRowKey(row); // first timestamp in buffer tsdbDebug("%p uid:%" PRId64", tid:%d check data in imem from skey:%" PRId64 ", order:%d, %p", pHandle, @@ -346,7 +346,7 @@ static bool initTableMemIterator(STsdbQueryHandle* pHandle, STableCheckInfo* pCh tsdbDebug("%p uid:%"PRId64", tid:%d no data in imem, %p", pHandle, pCheckInfo->tableId.uid, pCheckInfo->tableId.tid, pHandle->qinfo); } - + return true; } @@ -449,7 +449,7 @@ static bool hasMoreDataInCache(STsdbQueryHandle* pHandle) { size_t size = taosArrayGetSize(pHandle->pTableCheckInfo); assert(pHandle->activeIndex < size && pHandle->activeIndex >= 0 && size >= 1); pHandle->cur.fid = -1; - + STableCheckInfo* pCheckInfo = taosArrayGet(pHandle->pTableCheckInfo, pHandle->activeIndex); STable* pTable = pCheckInfo->pTableObj; @@ -467,17 +467,17 @@ static bool hasMoreDataInCache(STsdbQueryHandle* pHandle) { pCheckInfo->lastKey = dataRowKey(row); // first timestamp in buffer tsdbDebug("%p uid:%" PRId64", tid:%d check data in buffer from skey:%" PRId64 ", order:%d, %p", pHandle, pCheckInfo->tableId.uid, pCheckInfo->tableId.tid, pCheckInfo->lastKey, pHandle->order, pHandle->qinfo); - + // all data in mem are checked already. if ((pCheckInfo->lastKey > pHandle->window.ekey && ASCENDING_TRAVERSE(pHandle->order)) || (pCheckInfo->lastKey < pHandle->window.ekey && !ASCENDING_TRAVERSE(pHandle->order))) { return false; } - + int32_t step = ASCENDING_TRAVERSE(pHandle->order)? 1:-1; STimeWindow* win = &pHandle->cur.win; pHandle->cur.rows = tsdbReadRowsFromCache(pCheckInfo, pHandle->window.ekey, pHandle->outputCapacity, win, pHandle); - + // update the last key value pCheckInfo->lastKey = win->ekey + step; pHandle->cur.lastKey = win->ekey + step; @@ -486,7 +486,7 @@ static bool hasMoreDataInCache(STsdbQueryHandle* pHandle) { if (!ASCENDING_TRAVERSE(pHandle->order)) { SWAP(win->skey, win->ekey, TSKEY); } - + return true; } @@ -495,31 +495,31 @@ static int32_t getFileIdFromKey(TSKEY key, int32_t daysPerFile, int32_t precisio if (key == TSKEY_INITIAL_VAL) { return INT32_MIN; } - + int64_t fid = (int64_t)(key / (daysPerFile * tsMsPerDay[precision])); // set the starting fileId if (fid < 0L && llabs(fid) > INT32_MAX) { // data value overflow for INT32 fid = INT32_MIN; } - + if (fid > 0L && fid > INT32_MAX) { fid = INT32_MAX; } - + return fid; } static int32_t binarySearchForBlock(SCompBlock* pBlock, int32_t numOfBlocks, TSKEY skey, int32_t order) { int32_t firstSlot = 0; int32_t lastSlot = numOfBlocks - 1; - + int32_t midSlot = firstSlot; - + while (1) { numOfBlocks = lastSlot - firstSlot + 1; midSlot = (firstSlot + (numOfBlocks >> 1)); - + if (numOfBlocks == 1) break; - + if (skey > pBlock[midSlot].keyLast) { if (numOfBlocks == 2) break; if ((order == TSDB_ORDER_DESC) && (skey < pBlock[midSlot + 1].keyFirst)) break; @@ -531,7 +531,7 @@ static int32_t binarySearchForBlock(SCompBlock* pBlock, int32_t numOfBlocks, TSK break; // got the slot } } - + return midSlot; } @@ -669,10 +669,10 @@ static void handleDataMergeIfNeeded(STsdbQueryHandle* pQueryHandle, SCompBlock* TSKEY key = (row != NULL)? dataRowKey(row):TSKEY_INITIAL_VAL; cur->pos = ASCENDING_TRAVERSE(pQueryHandle->order)? 0:(binfo.rows-1); - + if ((ASCENDING_TRAVERSE(pQueryHandle->order) && (key != TSKEY_INITIAL_VAL && key <= binfo.window.ekey)) || (!ASCENDING_TRAVERSE(pQueryHandle->order) && (key != TSKEY_INITIAL_VAL && key >= binfo.window.skey))) { - + if ((ASCENDING_TRAVERSE(pQueryHandle->order) && (key != TSKEY_INITIAL_VAL && key < binfo.window.skey)) || (!ASCENDING_TRAVERSE(pQueryHandle->order) && (key != TSKEY_INITIAL_VAL && key > binfo.window.ekey))) { @@ -688,12 +688,12 @@ static void handleDataMergeIfNeeded(STsdbQueryHandle* pQueryHandle, SCompBlock* if (!ASCENDING_TRAVERSE(pQueryHandle->order)) { SWAP(cur->win.skey, cur->win.ekey, TSKEY); } - + cur->mixBlock = true; cur->blockCompleted = false; return; } - + doLoadFileDataBlock(pQueryHandle, pBlock, pCheckInfo); doMergeTwoLevelData(pQueryHandle, pCheckInfo, pBlock); } else { @@ -727,14 +727,14 @@ static bool loadFileDataBlock(STsdbQueryHandle* pQueryHandle, SCompBlock* pBlock SDataCols* pTSCol = pQueryHandle->rhelper.pDataCols[0]; assert(pTSCol->cols->type == TSDB_DATA_TYPE_TIMESTAMP && pTSCol->numOfRows == pBlock->numOfRows); - + if (pCheckInfo->lastKey > pBlock->keyFirst) { cur->pos = binarySearchForKey(pTSCol->cols[0].pData, pBlock->numOfRows, pCheckInfo->lastKey, pQueryHandle->order); } else { cur->pos = 0; } - + doMergeTwoLevelData(pQueryHandle, pCheckInfo, pBlock); } else { // the whole block is loaded in to buffer handleDataMergeIfNeeded(pQueryHandle, pBlock, pCheckInfo); @@ -744,14 +744,14 @@ static bool loadFileDataBlock(STsdbQueryHandle* pQueryHandle, SCompBlock* pBlock if (!doLoadFileDataBlock(pQueryHandle, pBlock, pCheckInfo)) { return false; } - + SDataCols* pTSCol = pQueryHandle->rhelper.pDataCols[0]; if (pCheckInfo->lastKey < pBlock->keyLast) { cur->pos = binarySearchForKey(pTSCol->cols[0].pData, pBlock->numOfRows, pCheckInfo->lastKey, pQueryHandle->order); } else { cur->pos = pBlock->numOfRows - 1; } - + doMergeTwoLevelData(pQueryHandle, pCheckInfo, pBlock); } else { handleDataMergeIfNeeded(pQueryHandle, pBlock, pCheckInfo); @@ -767,7 +767,7 @@ static int doBinarySearchKey(char* pValue, int num, TSKEY key, int order) { TSKEY* keyList; assert(order == TSDB_ORDER_ASC || order == TSDB_ORDER_DESC); - + if (num <= 0) return -1; keyList = (TSKEY*)pValue; @@ -826,13 +826,13 @@ static int doBinarySearchKey(char* pValue, int num, TSKEY key, int order) { static int32_t copyDataFromFileBlock(STsdbQueryHandle* pQueryHandle, int32_t capacity, int32_t numOfRows, int32_t start, int32_t end) { char* pData = NULL; int32_t step = ASCENDING_TRAVERSE(pQueryHandle->order)? 1 : -1; - + SDataCols* pCols = pQueryHandle->rhelper.pDataCols[0]; TSKEY* tsArray = pCols->cols[0].pData; - + int32_t num = end - start + 1; int32_t requiredNumOfCols = taosArrayGetSize(pQueryHandle->pColumns); - + //data in buffer has greater timestamp, copy data in file block int32_t i = 0, j = 0; while(i < requiredNumOfCols && j < pCols->numOfCols) { @@ -905,7 +905,7 @@ static int32_t copyDataFromFileBlock(STsdbQueryHandle* pQueryHandle, int32_t cap i++; } - + pQueryHandle->cur.win.ekey = tsArray[end]; pQueryHandle->cur.lastKey = tsArray[end] + step; @@ -1027,7 +1027,7 @@ static void updateInfoAfterMerge(STsdbQueryHandle* pQueryHandle, STableCheckInfo static void doMergeTwoLevelData(STsdbQueryHandle* pQueryHandle, STableCheckInfo* pCheckInfo, SCompBlock* pBlock) { SQueryFilePos* cur = &pQueryHandle->cur; SDataBlockInfo blockInfo = GET_FILE_DATA_BLOCK_INFO(pCheckInfo, pBlock); - + initTableMemIterator(pQueryHandle, pCheckInfo); SDataCols* pCols = pQueryHandle->rhelper.pDataCols[0]; @@ -1038,7 +1038,7 @@ static void doMergeTwoLevelData(STsdbQueryHandle* pQueryHandle, STableCheckInfo* int32_t order = (pQueryHandle->order == TSDB_ORDER_ASC)? TSDB_ORDER_DESC:TSDB_ORDER_ASC; int32_t step = ASCENDING_TRAVERSE(pQueryHandle->order)? 1:-1; - int32_t numOfCols = taosArrayGetSize(pQueryHandle->pColumns); + int32_t numOfCols = QH_GET_NUM_OF_COLS(pQueryHandle); STable* pTable = pCheckInfo->pTableObj; @@ -1054,12 +1054,11 @@ static void doMergeTwoLevelData(STsdbQueryHandle* pQueryHandle, STableCheckInfo* endPos = doBinarySearchKey(pCols->cols[0].pData, pCols->numOfRows, pQueryHandle->window.ekey, order); cur->mixBlock = true; } - + // compared with the data from in-memory buffer, to generate the correct timestamp array list - int32_t pos = cur->pos; - int32_t numOfRows = 0; - pQueryHandle->cur.win = TSWINDOW_INITIALIZER; + int32_t pos = cur->pos; + cur->win = TSWINDOW_INITIALIZER; // no data in buffer, load data from file directly if (pCheckInfo->iiter == NULL && pCheckInfo->iter == NULL) { @@ -1069,13 +1068,16 @@ static void doMergeTwoLevelData(STsdbQueryHandle* pQueryHandle, STableCheckInfo* if (!ASCENDING_TRAVERSE(pQueryHandle->order)) { SWAP(start, end, int32_t); } - - numOfRows = copyDataFromFileBlock(pQueryHandle, pQueryHandle->outputCapacity, numOfRows, start, end); - cur->win = (STimeWindow) {.skey = tsArray[start], .ekey = tsArray[end]}; + numOfRows = copyDataFromFileBlock(pQueryHandle, pQueryHandle->outputCapacity, numOfRows, start, end); + + // the time window should always be right order: skey <= ekey + cur->win = (STimeWindow) {.skey = tsArray[start], .ekey = tsArray[end]}; pos += (end - start + 1) * step; - cur->blockCompleted = (((pos >= endPos || cur->lastKey > pQueryHandle->window.ekey) && ASCENDING_TRAVERSE(pQueryHandle->order)) || - ((pos <= endPos || cur->lastKey < pQueryHandle->window.ekey) && !ASCENDING_TRAVERSE(pQueryHandle->order))); + + cur->blockCompleted = + (((pos >= endPos || cur->lastKey > pQueryHandle->window.ekey) && ASCENDING_TRAVERSE(pQueryHandle->order)) || + ((pos <= endPos || cur->lastKey < pQueryHandle->window.ekey) && !ASCENDING_TRAVERSE(pQueryHandle->order))); // if the buffer is not full in case of descending order query, move the data in the front of the buffer moveDataToFront(pQueryHandle, numOfRows, numOfCols); @@ -1133,11 +1135,11 @@ static void doMergeTwoLevelData(STsdbQueryHandle* pQueryHandle, STableCheckInfo* numOfRows = copyDataFromFileBlock(pQueryHandle, pQueryHandle->outputCapacity, numOfRows, qstart, qend); pos += (qend - qstart + 1) * step; - cur->win.ekey = tsArray[end]; + cur->win.ekey = ASCENDING_TRAVERSE(pQueryHandle->order)? tsArray[qend]:tsArray[qstart]; cur->lastKey = cur->win.ekey + step; } } while (numOfRows < pQueryHandle->outputCapacity); - + if (numOfRows < pQueryHandle->outputCapacity) { /** * if cache is empty, load remain file block data. In contrast, if there are remain data in cache, do NOT @@ -1157,14 +1159,15 @@ static void doMergeTwoLevelData(STsdbQueryHandle* pQueryHandle, STableCheckInfo* numOfRows = copyDataFromFileBlock(pQueryHandle, pQueryHandle->outputCapacity, numOfRows, start, end); pos += (end - start + 1) * step; - cur->win.ekey = tsArray[end]; + cur->win.ekey = ASCENDING_TRAVERSE(pQueryHandle->order)? tsArray[end]:tsArray[start]; cur->lastKey = cur->win.ekey + step; } } } - - cur->blockCompleted = (((pos >= endPos || cur->lastKey > pQueryHandle->window.ekey) && ASCENDING_TRAVERSE(pQueryHandle->order)) || - ((pos <= endPos || cur->lastKey < pQueryHandle->window.ekey) && !ASCENDING_TRAVERSE(pQueryHandle->order))); + + cur->blockCompleted = + (((pos >= endPos || cur->lastKey > pQueryHandle->window.ekey) && ASCENDING_TRAVERSE(pQueryHandle->order)) || + ((pos <= endPos || cur->lastKey < pQueryHandle->window.ekey) && !ASCENDING_TRAVERSE(pQueryHandle->order))); if (!ASCENDING_TRAVERSE(pQueryHandle->order)) { SWAP(cur->win.skey, cur->win.ekey, TSKEY); @@ -1179,6 +1182,9 @@ static void doMergeTwoLevelData(STsdbQueryHandle* pQueryHandle, STableCheckInfo* assert(cur->win.skey >= pQueryHandle->window.ekey && cur->win.ekey <= pQueryHandle->window.skey); } + SColumnInfoData* pColInfoData = taosArrayGet(pQueryHandle->pColumns, 0); + assert(cur->win.skey == ((TSKEY*)pColInfoData->pData)[0] && cur->win.ekey == ((TSKEY*)pColInfoData->pData)[cur->rows-1]); + tsdbDebug("%p uid:%" PRIu64",tid:%d data block created, brange:%"PRIu64"-%"PRIu64" rows:%d, %p", pQueryHandle, pCheckInfo->tableId.uid, pCheckInfo->tableId.tid, cur->win.skey, cur->win.ekey, cur->rows, pQueryHandle->qinfo); } @@ -1314,16 +1320,16 @@ static int32_t createDataBlocksInfo(STsdbQueryHandle* pQueryHandle, int32_t numO cleanBlockOrderSupporter(&sup, 0); return TSDB_CODE_TDB_OUT_OF_MEMORY; } - + int32_t cnt = 0; int32_t numOfQualTables = 0; - + for (int32_t j = 0; j < numOfTables; ++j) { STableCheckInfo* pTableCheck = (STableCheckInfo*)taosArrayGet(pQueryHandle->pTableCheckInfo, j); if (pTableCheck->numOfBlocks <= 0) { continue; } - + SCompBlock* pBlock = pTableCheck->pCompInfo->blocks; sup.numOfBlocksPerTable[numOfQualTables] = pTableCheck->numOfBlocks; @@ -1428,26 +1434,26 @@ static int32_t getDataBlocksInFilesImpl(STsdbQueryHandle* pQueryHandle, bool* ex if ((code = getFileCompInfo(pQueryHandle, &numOfBlocks)) != TSDB_CODE_SUCCESS) { break; } - + tsdbDebug("%p %d blocks found in file for %d table(s), fid:%d, %p", pQueryHandle, numOfBlocks, numOfTables, pQueryHandle->pFileGroup->fileId, pQueryHandle->qinfo); - + assert(numOfBlocks >= 0); if (numOfBlocks == 0) { continue; } - + // todo return error code to query engine if (createDataBlocksInfo(pQueryHandle, numOfBlocks, &pQueryHandle->numOfBlocks) != TSDB_CODE_SUCCESS) { break; } - + assert(numOfBlocks >= pQueryHandle->numOfBlocks); if (pQueryHandle->numOfBlocks > 0) { break; } } - + // no data in file anymore if (pQueryHandle->numOfBlocks <= 0) { if (code == TSDB_CODE_SUCCESS) { @@ -1458,10 +1464,10 @@ static int32_t getDataBlocksInFilesImpl(STsdbQueryHandle* pQueryHandle, bool* ex *exists = false; return code; } - + cur->slot = ASCENDING_TRAVERSE(pQueryHandle->order)? 0:pQueryHandle->numOfBlocks-1; cur->fid = pQueryHandle->pFileGroup->fileId; - + STableBlockInfo* pBlockInfo = &pQueryHandle->pDataBlockInfo[cur->slot]; *exists = loadFileDataBlock(pQueryHandle, pBlockInfo->compBlock, pBlockInfo->pTableCheckInfo); @@ -1477,7 +1483,7 @@ static int32_t getDataBlocksInFiles(STsdbQueryHandle* pQueryHandle, bool* exists pQueryHandle->locateStart = true; STsdbCfg* pCfg = &pQueryHandle->pTsdb->config; int32_t fid = getFileIdFromKey(pQueryHandle->window.skey, pCfg->daysPerFile, pCfg->precision); - + tsdbInitFileGroupIter(pFileHandle, &pQueryHandle->fileIter, pQueryHandle->order); tsdbSeekFileGroupIter(&pQueryHandle->fileIter, fid); @@ -1486,7 +1492,7 @@ static int32_t getDataBlocksInFiles(STsdbQueryHandle* pQueryHandle, bool* exists // check if current file block is all consumed STableBlockInfo* pBlockInfo = &pQueryHandle->pDataBlockInfo[cur->slot]; STableCheckInfo* pCheckInfo = pBlockInfo->pTableCheckInfo; - + // current block is done, try next if (!cur->mixBlock || cur->blockCompleted) { if ((cur->slot == pQueryHandle->numOfBlocks - 1 && ASCENDING_TRAVERSE(pQueryHandle->order)) || @@ -1497,10 +1503,10 @@ static int32_t getDataBlocksInFiles(STsdbQueryHandle* pQueryHandle, bool* exists // next block of the same file int32_t step = ASCENDING_TRAVERSE(pQueryHandle->order) ? 1 : -1; cur->slot += step; - + cur->mixBlock = false; cur->blockCompleted = false; - + STableBlockInfo* pNext = &pQueryHandle->pDataBlockInfo[cur->slot]; *exists = loadFileDataBlock(pQueryHandle, pNext->compBlock, pNext->pTableCheckInfo); @@ -1518,15 +1524,15 @@ static int32_t getDataBlocksInFiles(STsdbQueryHandle* pQueryHandle, bool* exists static bool doHasDataInBuffer(STsdbQueryHandle* pQueryHandle) { size_t numOfTables = taosArrayGetSize(pQueryHandle->pTableCheckInfo); assert(numOfTables <= ((STsdbRepo*)pQueryHandle->pTsdb)->config.maxTables); - + while (pQueryHandle->activeIndex < numOfTables) { if (hasMoreDataInCache(pQueryHandle)) { return true; } - + pQueryHandle->activeIndex += 1; } - + return false; } @@ -1544,14 +1550,14 @@ bool tsdbNextDataBlock(TsdbQueryHandleT* pHandle) { if (pQueryHandle->type == TSDB_QUERY_TYPE_EXTERNAL) { pQueryHandle->type = TSDB_QUERY_TYPE_ALL; pQueryHandle->order = TSDB_ORDER_DESC; - + if (!tsdbNextDataBlock(pHandle)) { return false; } - + /*SDataBlockInfo* pBlockInfo =*/ tsdbRetrieveDataBlockInfo(pHandle, &blockInfo); /*SArray *pDataBlock = */tsdbRetrieveDataBlock(pHandle, pQueryHandle->defaultLoadColumn); - + if (pQueryHandle->cur.win.ekey == pQueryHandle->window.skey) { // data already retrieve, discard other data rows and return int32_t numOfCols = QH_GET_NUM_OF_COLS(pQueryHandle); @@ -1559,7 +1565,7 @@ bool tsdbNextDataBlock(TsdbQueryHandleT* pHandle) { SColumnInfoData* pCol = taosArrayGet(pQueryHandle->pColumns, i); memcpy(pCol->pData, pCol->pData + pCol->info.bytes * (pQueryHandle->cur.rows-1), pCol->info.bytes); } - + pQueryHandle->cur.win = (STimeWindow){pQueryHandle->window.skey, pQueryHandle->window.skey}; pQueryHandle->window = pQueryHandle->cur.win; pQueryHandle->cur.rows = 1; @@ -1576,7 +1582,7 @@ bool tsdbNextDataBlock(TsdbQueryHandleT* pHandle) { pSecQueryHandle->checkFiles = true; pSecQueryHandle->activeIndex = 0; pSecQueryHandle->outputCapacity = ((STsdbRepo*)pSecQueryHandle->pTsdb)->config.maxRowsPerFileBlock; - + if (tsdbInitReadHelper(&pSecQueryHandle->rhelper, (STsdbRepo*) pSecQueryHandle->pTsdb) != 0) { free(pSecQueryHandle); return false; @@ -1586,24 +1592,24 @@ bool tsdbNextDataBlock(TsdbQueryHandleT* pHandle) { // allocate buffer in order to load data blocks from file int32_t numOfCols = QH_GET_NUM_OF_COLS(pQueryHandle); - + pSecQueryHandle->statis = calloc(numOfCols, sizeof(SDataStatis)); pSecQueryHandle->pColumns = taosArrayInit(numOfCols, sizeof(SColumnInfoData)); for (int32_t i = 0; i < numOfCols; ++i) { SColumnInfoData colInfo = {{0}, 0}; SColumnInfoData* pCol = taosArrayGet(pQueryHandle->pColumns, i); - + colInfo.info = pCol->info; colInfo.pData = calloc(1, EXTRA_BYTES + pQueryHandle->outputCapacity * pCol->info.bytes); taosArrayPush(pSecQueryHandle->pColumns, &colInfo); } - + size_t si = taosArrayGetSize(pQueryHandle->pTableCheckInfo); pSecQueryHandle->pTableCheckInfo = taosArrayInit(si, sizeof(STableCheckInfo)); STsdbMeta* pMeta = tsdbGetMeta(pQueryHandle->pTsdb); assert(pMeta != NULL); - + for (int32_t j = 0; j < si; ++j) { STableCheckInfo* pCheckInfo = (STableCheckInfo*) taosArrayGet(pQueryHandle->pTableCheckInfo, j); STableCheckInfo info = { @@ -1611,10 +1617,10 @@ bool tsdbNextDataBlock(TsdbQueryHandleT* pHandle) { .tableId = pCheckInfo->tableId, .pTableObj = pCheckInfo->pTableObj, }; - + taosArrayPush(pSecQueryHandle->pTableCheckInfo, &info); } - + tsdbInitDataBlockLoadInfo(&pSecQueryHandle->dataBlockLoadInfo); tsdbInitCompBlockLoadInfo(&pSecQueryHandle->compBlockLoadInfo); pSecQueryHandle->defaultLoadColumn = taosArrayClone(pQueryHandle->defaultLoadColumn); @@ -1624,17 +1630,17 @@ bool tsdbNextDataBlock(TsdbQueryHandleT* pHandle) { tsdbRetrieveDataBlockInfo((void*) pSecQueryHandle, &blockInfo); tsdbRetrieveDataBlock((void*) pSecQueryHandle, pSecQueryHandle->defaultLoadColumn); - + for (int32_t i = 0; i < numOfCols; ++i) { SColumnInfoData* pCol = taosArrayGet(pQueryHandle->pColumns, i); memcpy(pCol->pData, pCol->pData + pCol->info.bytes * (pQueryHandle->cur.rows-1), pCol->info.bytes); - + SColumnInfoData* pCol1 = taosArrayGet(pSecQueryHandle->pColumns, i); assert(pCol->info.colId == pCol1->info.colId); - + memcpy(pCol->pData + pCol->info.bytes, pCol1->pData, pCol1->info.bytes); } - + SColumnInfoData* pTSCol = taosArrayGet(pQueryHandle->pColumns, 0); // it is ascending order @@ -1658,7 +1664,7 @@ bool tsdbNextDataBlock(TsdbQueryHandleT* pHandle) { pQueryHandle->checkFiles = false; return true; } - + if (pQueryHandle->checkFiles) { bool exists = true; int32_t code = getDataBlocksInFiles(pQueryHandle, &exists); @@ -1671,11 +1677,11 @@ bool tsdbNextDataBlock(TsdbQueryHandleT* pHandle) { pQueryHandle->cost.checkForNextTime += elapsedTime; return exists; } - + pQueryHandle->activeIndex = 0; pQueryHandle->checkFiles = false; } - + // TODO: opt by consider the scan order bool ret = doHasDataInBuffer(pQueryHandle); terrno = TSDB_CODE_SUCCESS; @@ -1688,15 +1694,15 @@ bool tsdbNextDataBlock(TsdbQueryHandleT* pHandle) { void changeQueryHandleForLastrowQuery(TsdbQueryHandleT pqHandle) { STsdbQueryHandle* pQueryHandle = (STsdbQueryHandle*) pqHandle; assert(!ASCENDING_TRAVERSE(pQueryHandle->order)); - + // starts from the buffer in case of descending timestamp order check data blocks - + // todo consider the query time window, current last_row does not apply the query time window size_t numOfTables = taosArrayGetSize(pQueryHandle->pTableCheckInfo); - + TSKEY key = TSKEY_INITIAL_VAL; int32_t index = -1; - + for(int32_t i = 0; i < numOfTables; ++i) { STableCheckInfo* pCheckInfo = taosArrayGet(pQueryHandle->pTableCheckInfo, i); if (pCheckInfo->pTableObj->lastKey > key) { @@ -1704,36 +1710,36 @@ void changeQueryHandleForLastrowQuery(TsdbQueryHandleT pqHandle) { index = i; } } - + if (index == -1) { // todo add failure test cases return; } - + // erase all other elements in array list size_t size = taosArrayGetSize(pQueryHandle->pTableCheckInfo); for (int32_t i = 0; i < size; ++i) { if (i == index) { continue; } - + STableCheckInfo* pTableCheckInfo = taosArrayGet(pQueryHandle->pTableCheckInfo, i); tSkipListDestroyIter(pTableCheckInfo->iter); - + if (pTableCheckInfo->pDataCols != NULL) { tfree(pTableCheckInfo->pDataCols->buf); } - + tfree(pTableCheckInfo->pDataCols); tfree(pTableCheckInfo->pCompInfo); } - + STableCheckInfo info = *(STableCheckInfo*) taosArrayGet(pQueryHandle->pTableCheckInfo, index); taosArrayClear(pQueryHandle->pTableCheckInfo); - + info.lastKey = key; taosArrayPush(pQueryHandle->pTableCheckInfo, &info); - + // update the query time window according to the chosen last timestamp pQueryHandle->window = (STimeWindow) {key, key}; } @@ -1742,13 +1748,13 @@ static void changeQueryHandleForInterpQuery(TsdbQueryHandleT pHandle) { // filter the queried time stamp in the first place STsdbQueryHandle* pQueryHandle = (STsdbQueryHandle*) pHandle; pQueryHandle->order = TSDB_ORDER_DESC; - + assert(pQueryHandle->window.skey == pQueryHandle->window.ekey); - + // starts from the buffer in case of descending timestamp order check data blocks // todo consider the query time window, current last_row does not apply the query time window size_t numOfTables = taosArrayGetSize(pQueryHandle->pTableCheckInfo); - + int32_t i = 0; while(i < numOfTables) { STableCheckInfo* pCheckInfo = taosArrayGet(pQueryHandle->pTableCheckInfo, i); @@ -1756,21 +1762,21 @@ static void changeQueryHandleForInterpQuery(TsdbQueryHandleT pHandle) { pCheckInfo->pTableObj->lastKey != TSKEY_INITIAL_VAL) { break; } - + i++; } - + // there are no data in all the tables if (i == numOfTables) { return; } - + STableCheckInfo info = *(STableCheckInfo*) taosArrayGet(pQueryHandle->pTableCheckInfo, i); taosArrayClear(pQueryHandle->pTableCheckInfo); - + info.lastKey = pQueryHandle->window.skey; taosArrayPush(pQueryHandle->pTableCheckInfo, &info); - + // update the query time window according to the chosen last timestamp pQueryHandle->window = (STimeWindow) {info.lastKey, TSKEY_INITIAL_VAL}; } @@ -1794,7 +1800,7 @@ static int tsdbReadRowsFromCache(STableCheckInfo* pCheckInfo, TSKEY maxKey, int if ((key > maxKey && ASCENDING_TRAVERSE(pQueryHandle->order)) || (key < maxKey && !ASCENDING_TRAVERSE(pQueryHandle->order))) { tsdbDebug("%p key:%"PRIu64" beyond qrange:%"PRId64" - %"PRId64", no more data in buffer", pQueryHandle, key, pQueryHandle->window.skey, pQueryHandle->window.ekey); - + break; } @@ -1809,21 +1815,21 @@ static int tsdbReadRowsFromCache(STableCheckInfo* pCheckInfo, TSKEY maxKey, int moveToNextRowInMem(pCheckInfo); break; } - + } while(moveToNextRowInMem(pCheckInfo)); assert(numOfRows <= maxRowsToRead); - + // if the buffer is not full in case of descending order query, move the data in the front of the buffer if (!ASCENDING_TRAVERSE(pQueryHandle->order) && numOfRows < maxRowsToRead) { int32_t emptySize = maxRowsToRead - numOfRows; - + for(int32_t i = 0; i < numOfCols; ++i) { SColumnInfoData* pColInfo = taosArrayGet(pQueryHandle->pColumns, i); memmove(pColInfo->pData, pColInfo->pData + emptySize * pColInfo->info.bytes, numOfRows * pColInfo->info.bytes); } } - + int64_t elapsedTime = taosGetTimestampUs() - st; tsdbDebug("%p build data block from cache completed, elapsed time:%"PRId64" us, numOfRows:%d, numOfCols:%d", pQueryHandle, elapsedTime, numOfRows, numOfCols); @@ -1835,7 +1841,7 @@ void tsdbRetrieveDataBlockInfo(TsdbQueryHandleT* pQueryHandle, SDataBlockInfo* p STsdbQueryHandle* pHandle = (STsdbQueryHandle*)pQueryHandle; SQueryFilePos* cur = &pHandle->cur; STable* pTable = NULL; - + // there are data in file if (pHandle->cur.fid >= 0) { STableBlockInfo* pBlockInfo = &pHandle->pDataBlockInfo[cur->slot]; @@ -1857,13 +1863,13 @@ void tsdbRetrieveDataBlockInfo(TsdbQueryHandleT* pQueryHandle, SDataBlockInfo* p */ int32_t tsdbRetrieveDataBlockStatisInfo(TsdbQueryHandleT* pQueryHandle, SDataStatis** pBlockStatis) { STsdbQueryHandle* pHandle = (STsdbQueryHandle*) pQueryHandle; - + SQueryFilePos* c = &pHandle->cur; if (c->mixBlock) { *pBlockStatis = NULL; return TSDB_CODE_SUCCESS; } - + STableBlockInfo* pBlockInfo = &pHandle->pDataBlockInfo[c->slot]; assert((c->slot >= 0 && c->slot < pHandle->numOfBlocks) || ((c->slot == pHandle->numOfBlocks) && (c->slot == 0))); @@ -1883,7 +1889,7 @@ int32_t tsdbRetrieveDataBlockStatisInfo(TsdbQueryHandleT* pQueryHandle, SDataSta for(int32_t i = 0; i < numOfCols; ++i) { pHandle->statis[i].colId = colIds[i]; } - + tsdbGetDataStatis(&pHandle->rhelper, pHandle->statis, numOfCols); // always load the first primary timestamp column data @@ -1932,31 +1938,31 @@ SArray* tsdbRetrieveDataBlock(TsdbQueryHandleT* pQueryHandle, SArray* pIdList) { } else { SDataBlockInfo binfo = GET_FILE_DATA_BLOCK_INFO(pCheckInfo, pBlockInfo->compBlock); assert(pHandle->realNumOfRows <= binfo.rows); - + // data block has been loaded, todo extract method SDataBlockLoadInfo* pBlockLoadInfo = &pHandle->dataBlockLoadInfo; - + if (pBlockLoadInfo->slot == pHandle->cur.slot && pBlockLoadInfo->fileGroup->fileId == pHandle->cur.fid && pBlockLoadInfo->tid == pCheckInfo->pTableObj->tableId.tid) { return pHandle->pColumns; } else { // only load the file block SCompBlock* pBlock = pBlockInfo->compBlock; doLoadFileDataBlock(pHandle, pBlock, pCheckInfo); - + // todo refactor int32_t numOfRows = copyDataFromFileBlock(pHandle, pHandle->outputCapacity, 0, 0, pBlock->numOfRows - 1); - + // if the buffer is not full in case of descending order query, move the data in the front of the buffer if (!ASCENDING_TRAVERSE(pHandle->order) && numOfRows < pHandle->outputCapacity) { int32_t emptySize = pHandle->outputCapacity - numOfRows; int32_t reqNumOfCols = taosArrayGetSize(pHandle->pColumns); - + for(int32_t i = 0; i < reqNumOfCols; ++i) { SColumnInfoData* pColInfo = taosArrayGet(pHandle->pColumns, i); memmove(pColInfo->pData, pColInfo->pData + emptySize * pColInfo->info.bytes, numOfRows * pColInfo->info.bytes); } } - + return pHandle->pColumns; } } @@ -1967,11 +1973,11 @@ static int32_t getAllTableList(STable* pSuperTable, SArray* list) { SSkipListIterator* iter = tSkipListCreateIter(pSuperTable->pIndex); while (tSkipListIterNext(iter)) { SSkipListNode* pNode = tSkipListIterGet(iter); - + STable** pTable = (STable**) SL_GET_NODE_DATA((SSkipListNode*) pNode); taosArrayPush(list, pTable); } - + tSkipListDestroyIter(iter); return TSDB_CODE_SUCCESS; } @@ -1981,12 +1987,12 @@ static void destroyHelper(void* param) { return; } - + tQueryInfo* pInfo = (tQueryInfo*)param; if (pInfo->optr != TSDB_RELATION_IN) { tfree(pInfo->q); } - + // tVariantDestroy(&(pInfo->q)); free(param); } @@ -1998,7 +2004,7 @@ void filterPrepare(void* expr, void* param) { } pExpr->_node.info = calloc(1, sizeof(tQueryInfo)); - + STSchema* pTSSchema = (STSchema*) param; tQueryInfo* pInfo = pExpr->_node.info; tVariant* pCond = pExpr->_node.pRight->pVal; @@ -2008,7 +2014,7 @@ void filterPrepare(void* expr, void* param) { pInfo->optr = pExpr->_node.optr; pInfo->compare = getComparFunc(pSchema->type, pInfo->optr); pInfo->param = pTSSchema; - + if (pInfo->optr == TSDB_RELATION_IN) { pInfo->q = (char*) pCond->arr; } else { @@ -2028,18 +2034,18 @@ int32_t tableGroupComparFn(const void *p1, const void *p2, const void *param) { STableGroupSupporter* pTableGroupSupp = (STableGroupSupporter*) param; STable* pTable1 = *(STable**) p1; STable* pTable2 = *(STable**) p2; - + for (int32_t i = 0; i < pTableGroupSupp->numOfCols; ++i) { SColIndex* pColIndex = &pTableGroupSupp->pCols[i]; int32_t colIndex = pColIndex->colIndex; - + assert(colIndex >= TSDB_TBNAME_COLUMN_INDEX); - + char * f1 = NULL; char * f2 = NULL; int32_t type = 0; int32_t bytes = 0; - + if (colIndex == TSDB_TBNAME_COLUMN_INDEX) { f1 = (char*) TABLE_NAME(pTable1); f2 = (char*) TABLE_NAME(pTable2); @@ -2073,14 +2079,14 @@ int32_t tableGroupComparFn(const void *p1, const void *p2, const void *param) { return ret; } } - + return 0; } void createTableGroupImpl(SArray* pGroups, SArray* pTableList, size_t numOfTables, STableGroupSupporter* pSupp, __ext_compar_fn_t compareFn) { STable* pTable = taosArrayGetP(pTableList, 0); - + SArray* g = taosArrayInit(16, POINTER_BYTES); taosArrayPush(g, &pTable); tsdbRefTable(pTable); @@ -2088,10 +2094,10 @@ void createTableGroupImpl(SArray* pGroups, SArray* pTableList, size_t numOfTable for (int32_t i = 1; i < numOfTables; ++i) { STable** prev = taosArrayGet(pTableList, i - 1); STable** p = taosArrayGet(pTableList, i); - + int32_t ret = compareFn(prev, p, pSupp); assert(ret == 0 || ret == -1); - + tsdbRefTable(*p); assert((*p)->type == TSDB_CHILD_TABLE); @@ -2103,20 +2109,20 @@ void createTableGroupImpl(SArray* pGroups, SArray* pTableList, size_t numOfTable taosArrayPush(g, p); } } - + taosArrayPush(pGroups, &g); } SArray* createTableGroup(SArray* pTableList, STSchema* pTagSchema, SColIndex* pCols, int32_t numOfOrderCols) { assert(pTableList != NULL); SArray* pTableGroup = taosArrayInit(1, POINTER_BYTES); - + size_t size = taosArrayGetSize(pTableList); if (size == 0) { tsdbDebug("no qualified tables"); return pTableGroup; } - + if (numOfOrderCols == 0 || size == 1) { // no group by tags clause or only one table SArray* sa = taosArrayInit(size, POINTER_BYTES); for(int32_t i = 0; i < size; ++i) { @@ -2126,7 +2132,7 @@ SArray* createTableGroup(SArray* pTableList, STSchema* pTagSchema, SColIndex* pC tsdbRefTable(*pTable); taosArrayPush(sa, pTable); } - + taosArrayPush(pTableGroup, &sa); tsdbDebug("all %zu tables belong to one group", size); } else { @@ -2134,18 +2140,18 @@ SArray* createTableGroup(SArray* pTableList, STSchema* pTagSchema, SColIndex* pC pSupp->numOfCols = numOfOrderCols; pSupp->pTagSchema = pTagSchema; pSupp->pCols = pCols; - + taosqsort(pTableList->pData, size, POINTER_BYTES, pSupp, tableGroupComparFn); createTableGroupImpl(pTableGroup, pTableList, size, pSupp, tableGroupComparFn); tfree(pSupp); } - + return pTableGroup; } bool indexedNodeFilterFp(const void* pNode, void* param) { tQueryInfo* pInfo = (tQueryInfo*) param; - + STable* pTable = *(STable**)(SL_GET_NODE_DATA((SSkipListNode*)pNode)); char* val = NULL; @@ -2155,7 +2161,7 @@ bool indexedNodeFilterFp(const void* pNode, void* param) { } else { val = tdGetKVRowValOfCol(pTable->tagVal, pInfo->sch.colId); } - + int32_t ret = 0; if (val == NULL) { //the val is possible to be null, so check it out carefully ret = -1; // val is missing in table tags value pairs @@ -2192,7 +2198,7 @@ bool indexedNodeFilterFp(const void* pNode, void* param) { default: assert(false); } - + return true; } @@ -2222,7 +2228,7 @@ int32_t tsdbQuerySTableByTagCond(TSDB_REPO_T* tsdb, uint64_t uid, const char* pT goto _error; } - + if (pTable->type != TSDB_SUPER_TABLE) { tsdbError("%p query normal tag not allowed, uid:%" PRIu64 ", tid:%d, name:%s", tsdb, uid, pTable->tableId.tid, pTable->name->data); @@ -2235,7 +2241,7 @@ int32_t tsdbQuerySTableByTagCond(TSDB_REPO_T* tsdb, uint64_t uid, const char* pT //NOTE: not add ref count for super table SArray* res = taosArrayInit(8, POINTER_BYTES); STSchema* pTagSchema = tsdbGetTableTagSchema(pTable); - + // no tags and tbname condition, all child tables of this stable are involved if (tbnameCond == NULL && (pTagCond == NULL || len == 0)) { int32_t ret = getAllTableList(pTable, res); @@ -2246,7 +2252,7 @@ int32_t tsdbQuerySTableByTagCond(TSDB_REPO_T* tsdb, uint64_t uid, const char* pT pGroupInfo->numOfTables = taosArrayGetSize(res); pGroupInfo->pGroupList = createTableGroup(res, pTagSchema, pColIndex, numOfCols); - + tsdbDebug("%p no table name/tag condition, all tables belong to one group, numOfTables:%zu", tsdb, pGroupInfo->numOfTables); taosArrayDestroy(res); @@ -2282,7 +2288,7 @@ int32_t tsdbQuerySTableByTagCond(TSDB_REPO_T* tsdb, uint64_t uid, const char* pT } CATCH( code ) { CLEANUP_EXECUTE(); terrno = code; - goto _error; + goto _error; // TODO: more error handling } END_TRY @@ -2318,12 +2324,12 @@ int32_t tsdbGetOneTableGroup(TSDB_REPO_T* tsdb, uint64_t uid, STableGroupInfo* p pGroupInfo->numOfTables = 1; pGroupInfo->pGroupList = taosArrayInit(1, POINTER_BYTES); - + SArray* group = taosArrayInit(1, POINTER_BYTES); - + taosArrayPush(group, &pTable); taosArrayPush(pGroupInfo->pGroupList, &group); - + return TSDB_CODE_SUCCESS; _error: @@ -2375,7 +2381,7 @@ void tsdbCleanupQueryHandle(TsdbQueryHandleT queryHandle) { if (pQueryHandle == NULL) { return; } - + size_t size = taosArrayGetSize(pQueryHandle->pTableCheckInfo); for (int32_t i = 0; i < size; ++i) { STableCheckInfo* pTableCheckInfo = taosArrayGet(pQueryHandle->pTableCheckInfo, i); From dbe47d986fdc7d52e97601801d540d64b5c4dbbf Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Tue, 21 Jul 2020 12:03:36 +0800 Subject: [PATCH 04/42] [td-225] --- src/query/src/qResultbuf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/query/src/qResultbuf.c b/src/query/src/qResultbuf.c index 2443381194..c1ce43b9dd 100644 --- a/src/query/src/qResultbuf.c +++ b/src/query/src/qResultbuf.c @@ -54,7 +54,7 @@ int32_t getResBufSize(SDiskbasedResultBuf* pResultBuf) { return pResultBuf->tota static int32_t createDiskResidesBuf(SDiskbasedResultBuf* pResultBuf) { // pResultBuf->fd = open(pResultBuf->path, O_CREAT | O_RDWR, 0666); - pResultBuf->file = fopen(pResultBuf->path, "r+"); + pResultBuf->file = fopen(pResultBuf->path, "w"); if (pResultBuf->file == NULL) { qError("failed to create tmp file: %s on disk. %s", pResultBuf->path, strerror(errno)); return TAOS_SYSTEM_ERROR(errno); From c129e42232ac74dd8a009412943b0d73513377a9 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Tue, 21 Jul 2020 12:22:10 +0800 Subject: [PATCH 05/42] [td-225] fix error in taoscache. update the resbuf file descriptor. --- src/query/inc/qResultbuf.h | 3 ++- src/query/src/qResultbuf.c | 23 +++++++++++------------ src/util/src/tcache.c | 2 +- 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/src/query/inc/qResultbuf.h b/src/query/inc/qResultbuf.h index e8b3af7ccf..9d60870962 100644 --- a/src/query/inc/qResultbuf.h +++ b/src/query/inc/qResultbuf.h @@ -36,7 +36,8 @@ typedef struct SDiskbasedResultBuf { int32_t numOfRowsPerPage; int32_t numOfPages; int64_t totalBufSize; - FILE* file; + int32_t fd; +// FILE* file; int32_t allocateId; // allocated page id int32_t incStep; // minimum allocated pages void* pBuf; // mmap buffer pointer diff --git a/src/query/src/qResultbuf.c b/src/query/src/qResultbuf.c index c1ce43b9dd..9e597aea5c 100644 --- a/src/query/src/qResultbuf.c +++ b/src/query/src/qResultbuf.c @@ -35,7 +35,7 @@ int32_t createDiskbasedResultBuffer(SDiskbasedResultBuf** pResultBuf, int32_t nu getTmpfilePath("qbuf", path); pResBuf->path = strdup(path); - pResBuf->file = NULL; + pResBuf->fd = FD_INITIALIZER; pResBuf->pBuf = NULL; pResBuf->emptyDummyIdList = taosArrayInit(1, sizeof(int32_t)); @@ -53,9 +53,9 @@ int32_t getResBufSize(SDiskbasedResultBuf* pResultBuf) { return pResultBuf->tota #define FILE_SIZE_ON_DISK(_r) (NUM_OF_PAGES_ON_DISK(_r) * (_r)->pageSize) static int32_t createDiskResidesBuf(SDiskbasedResultBuf* pResultBuf) { -// pResultBuf->fd = open(pResultBuf->path, O_CREAT | O_RDWR, 0666); - pResultBuf->file = fopen(pResultBuf->path, "w"); - if (pResultBuf->file == NULL) { + pResultBuf->fd = open(pResultBuf->path, O_CREAT | O_RDWR, 0666); +// pResultBuf->file = fopen(pResultBuf->path, "w"); + if (!FD_VALID(pResultBuf->fd)) { qError("failed to create tmp file: %s on disk. %s", pResultBuf->path, strerror(errno)); return TAOS_SYSTEM_ERROR(errno); } @@ -63,14 +63,13 @@ static int32_t createDiskResidesBuf(SDiskbasedResultBuf* pResultBuf) { assert(pResultBuf->numOfPages == pResultBuf->inMemPages); pResultBuf->numOfPages += pResultBuf->incStep; - int32_t ret = ftruncate(fileno(pResultBuf->file), NUM_OF_PAGES_ON_DISK(pResultBuf) * pResultBuf->pageSize); + int32_t ret = ftruncate(pResultBuf->fd, NUM_OF_PAGES_ON_DISK(pResultBuf) * pResultBuf->pageSize); if (ret != TSDB_CODE_SUCCESS) { qError("failed to create tmp file: %s on disk. %s", pResultBuf->path, strerror(errno)); return TAOS_SYSTEM_ERROR(errno); } - pResultBuf->pBuf = mmap(NULL, FILE_SIZE_ON_DISK(pResultBuf), PROT_READ | PROT_WRITE, MAP_SHARED, - fileno(pResultBuf->file), 0); + pResultBuf->pBuf = mmap(NULL, FILE_SIZE_ON_DISK(pResultBuf), PROT_READ | PROT_WRITE, MAP_SHARED, pResultBuf->fd, 0); if (pResultBuf->pBuf == MAP_FAILED) { qError("QInfo:%p failed to map temp file: %s. %s", pResultBuf->handle, pResultBuf->path, strerror(errno)); @@ -86,7 +85,7 @@ static int32_t extendDiskFileSize(SDiskbasedResultBuf* pResultBuf, int32_t incNu int32_t ret = TSDB_CODE_SUCCESS; if (pResultBuf->pBuf == NULL) { - assert(pResultBuf->file == NULL); + assert(!FD_VALID(pResultBuf->fd)); if ((ret = createDiskResidesBuf(pResultBuf)) != TSDB_CODE_SUCCESS) { return ret; @@ -99,7 +98,7 @@ static int32_t extendDiskFileSize(SDiskbasedResultBuf* pResultBuf, int32_t incNu * disk-based output buffer is exhausted, try to extend the disk-based buffer, the available disk space may * be insufficient */ - ret = ftruncate(fileno(pResultBuf->file), NUM_OF_PAGES_ON_DISK(pResultBuf) * pResultBuf->pageSize); + ret = ftruncate(pResultBuf->fd, NUM_OF_PAGES_ON_DISK(pResultBuf) * pResultBuf->pageSize); if (ret != TSDB_CODE_SUCCESS) { // dError("QInfo:%p failed to create intermediate result output file:%s. %s", pQInfo, pSupporter->extBufFile, // strerror(errno)); @@ -107,7 +106,7 @@ static int32_t extendDiskFileSize(SDiskbasedResultBuf* pResultBuf, int32_t incNu } pResultBuf->totalBufSize = pResultBuf->numOfPages * pResultBuf->pageSize; - pResultBuf->pBuf = mmap(NULL, FILE_SIZE_ON_DISK(pResultBuf), PROT_READ | PROT_WRITE, MAP_SHARED, fileno(pResultBuf->file), 0); + pResultBuf->pBuf = mmap(NULL, FILE_SIZE_ON_DISK(pResultBuf), PROT_READ | PROT_WRITE, MAP_SHARED, pResultBuf->fd, 0); if (pResultBuf->pBuf == MAP_FAILED) { // dError("QInfo:%p failed to map temp file: %s. %s", pQInfo, pSupporter->extBufFile, strerror(errno)); @@ -189,11 +188,11 @@ void destroyResultBuf(SDiskbasedResultBuf* pResultBuf, void* handle) { return; } - if (pResultBuf->file != NULL) { + if (FD_VALID(pResultBuf->fd)) { qDebug("QInfo:%p disk-based output buffer closed, total:%" PRId64 " bytes, file created:%s, file size:%d", handle, pResultBuf->totalBufSize, pResultBuf->path, FILE_SIZE_ON_DISK(pResultBuf)); - fclose(pResultBuf->file); + close(pResultBuf->fd); munmap(pResultBuf->pBuf, FILE_SIZE_ON_DISK(pResultBuf)); pResultBuf->pBuf = NULL; } else { diff --git a/src/util/src/tcache.c b/src/util/src/tcache.c index 89199f035d..7c2a6b3219 100644 --- a/src/util/src/tcache.c +++ b/src/util/src/tcache.c @@ -381,7 +381,7 @@ void *taosCacheAcquireByData(SCacheObj *pCacheObj, void *data) { } void *taosCacheTransfer(SCacheObj *pCacheObj, void **data) { - if (pCacheObj == NULL || data == NULL) return NULL; + if (pCacheObj == NULL || data == NULL || *data == NULL) return NULL; size_t offset = offsetof(SCacheDataNode, data); SCacheDataNode *ptNode = (SCacheDataNode *)((char *)(*data) - offset); From df80c010a921eede036767944d3a097c23b0ea39 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Tue, 21 Jul 2020 12:48:30 +0800 Subject: [PATCH 06/42] [td-225] set correct error code when errors occuring. --- src/client/src/tscAsync.c | 2 +- src/client/src/tscSubquery.c | 7 +++---- src/client/src/tscUtil.c | 14 +++++++++++++- 3 files changed, 17 insertions(+), 6 deletions(-) diff --git a/src/client/src/tscAsync.c b/src/client/src/tscAsync.c index 9dd33e03cb..85cff4ba17 100644 --- a/src/client/src/tscAsync.c +++ b/src/client/src/tscAsync.c @@ -430,7 +430,7 @@ void tscTableMetaCallBack(void *param, TAOS_RES *res, int code) { pRes->code = code; if (code != TSDB_CODE_SUCCESS) { - tscError("%p ge tableMeta failed, code:%s", pSql, tstrerror(code)); + tscError("%p get tableMeta failed, code:%s", pSql, tstrerror(code)); goto _error; } else { tscDebug("%p get tableMeta successfully", pSql); diff --git a/src/client/src/tscSubquery.c b/src/client/src/tscSubquery.c index 5d26d09fae..95d65f4aff 100644 --- a/src/client/src/tscSubquery.c +++ b/src/client/src/tscSubquery.c @@ -1505,12 +1505,11 @@ static int32_t tscReissueSubquery(SRetrieveSupport *trsupport, SSqlObj *pSql, in SSqlObj *pNew = tscCreateSqlObjForSubquery(trsupport->pParentSql, trsupport, pSql); - // todo add to async res or not?? if (pNew == NULL) { - tscError("%p sub:%p failed to create new subquery due to out of memory, abort retry, vgId:%d, orderOfSub:%d", - trsupport->pParentSql, pSql, pVgroup->vgId, trsupport->subqueryIndex); + tscError("%p sub:%p failed to create new subquery due to error:%s, abort retry, vgId:%d, orderOfSub:%d", + trsupport->pParentSql, pSql, tstrerror(terrno), pVgroup->vgId, trsupport->subqueryIndex); - pParentSql->res.code = TSDB_CODE_TSC_OUT_OF_MEMORY; + pParentSql->res.code = terrno; trsupport->numOfRetry = MAX_NUM_OF_SUBQUERY_RETRY; return pParentSql->res.code; diff --git a/src/client/src/tscUtil.c b/src/client/src/tscUtil.c index 17adc0c03d..27f2535bdc 100644 --- a/src/client/src/tscUtil.c +++ b/src/client/src/tscUtil.c @@ -1675,6 +1675,7 @@ SSqlObj* createSubqueryObj(SSqlObj* pSql, int16_t tableIndex, void (*fp)(), void SSqlObj* pNew = (SSqlObj*)calloc(1, sizeof(SSqlObj)); if (pNew == NULL) { tscError("%p new subquery failed, tableIndex:%d", pSql, tableIndex); + terrno = TSDB_CODE_TSC_OUT_OF_MEMORY; return NULL; } @@ -1688,6 +1689,7 @@ SSqlObj* createSubqueryObj(SSqlObj* pSql, int16_t tableIndex, void (*fp)(), void tscError("%p new subquery failed, tableIndex:%d, vgroupIndex:%d", pSql, tableIndex, pTableMetaInfo->vgroupIndex); free(pNew); + terrno = TSDB_CODE_TSC_OUT_OF_MEMORY; return NULL; } @@ -1706,6 +1708,7 @@ SSqlObj* createSubqueryObj(SSqlObj* pSql, int16_t tableIndex, void (*fp)(), void if (tscAddSubqueryInfo(pnCmd) != TSDB_CODE_SUCCESS) { tscFreeSqlObj(pNew); + terrno = TSDB_CODE_TSC_OUT_OF_MEMORY; return NULL; } @@ -1743,6 +1746,7 @@ SSqlObj* createSubqueryObj(SSqlObj* pSql, int16_t tableIndex, void (*fp)(), void if (tscAllocPayload(pnCmd, TSDB_DEFAULT_PAYLOAD_SIZE) != TSDB_CODE_SUCCESS) { tscError("%p new subquery failed, tableIndex:%d, vgroupIndex:%d", pSql, tableIndex, pTableMetaInfo->vgroupIndex); tscFreeSqlObj(pNew); + terrno = TSDB_CODE_TSC_OUT_OF_MEMORY; return NULL; } @@ -1827,8 +1831,16 @@ SSqlObj* createSubqueryObj(SSqlObj* pSql, int16_t tableIndex, void (*fp)(), void } if (pFinalInfo->pTableMeta == NULL) { - tscError("%p new subquery failed for get tableMeta is NULL from cache", pSql); + tscError("%p new subquery failed since no tableMeta in cache, name:%s", pSql, name); tscFreeSqlObj(pNew); + + if (pPrevSql != NULL) { + assert(pPrevSql->res.code != TSDB_CODE_SUCCESS); + terrno = pPrevSql->res.code; + } else { + terrno = TSDB_CODE_TSC_APP_ERROR; + } + return NULL; } From f72183fea5d047adfc3e9e95bc667f79fc8b065a Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Tue, 21 Jul 2020 14:35:49 +0800 Subject: [PATCH 07/42] [td-225] enable block ts check. --- src/tsdb/src/tsdbRead.c | 42 +++++++++++++++++++++++++++++------------ 1 file changed, 30 insertions(+), 12 deletions(-) diff --git a/src/tsdb/src/tsdbRead.c b/src/tsdb/src/tsdbRead.c index 1d15860912..7538c6f7e1 100644 --- a/src/tsdb/src/tsdbRead.c +++ b/src/tsdb/src/tsdbRead.c @@ -831,6 +831,12 @@ static int32_t copyDataFromFileBlock(STsdbQueryHandle* pQueryHandle, int32_t cap TSKEY* tsArray = pCols->cols[0].pData; int32_t num = end - start + 1; + assert(num >= 0); + + if (num == 0) { + return numOfRows; + } + int32_t requiredNumOfCols = taosArrayGetSize(pQueryHandle->pColumns); //data in buffer has greater timestamp, copy data in file block @@ -973,7 +979,7 @@ static void copyOneRowFromMem(STsdbQueryHandle* pQueryHandle, int32_t capacity, } static void moveDataToFront(STsdbQueryHandle* pQueryHandle, int32_t numOfRows, int32_t numOfCols) { - if (ASCENDING_TRAVERSE(pQueryHandle->order)) { + if (numOfRows == 0 || ASCENDING_TRAVERSE(pQueryHandle->order)) { return; } @@ -1022,6 +1028,26 @@ static void updateInfoAfterMerge(STsdbQueryHandle* pQueryHandle, STableCheckInfo cur->pos = endPos; } +static void doCheckGeneratedBlockRange(STsdbQueryHandle* pQueryHandle) { + SQueryFilePos* cur = &pQueryHandle->cur; + + if (cur->rows > 0) { + if (ASCENDING_TRAVERSE(pQueryHandle->order)) { + assert(cur->win.skey >= pQueryHandle->window.skey && cur->win.ekey <= pQueryHandle->window.ekey); + } else { + assert(cur->win.skey >= pQueryHandle->window.ekey && cur->win.ekey <= pQueryHandle->window.skey); + } + + SColumnInfoData* pColInfoData = taosArrayGet(pQueryHandle->pColumns, 0); + assert(cur->win.skey == ((TSKEY*)pColInfoData->pData)[0] && cur->win.ekey == ((TSKEY*)pColInfoData->pData)[cur->rows-1]); + } else { + cur->win = pQueryHandle->window; + + int32_t step = ASCENDING_TRAVERSE(pQueryHandle->order)? 1:-1; + cur->lastKey = pQueryHandle->window.ekey + step; + } +} + // only return the qualified data to client in terms of query time window, data rows in the same block but do not // be included in the query time window will be discarded static void doMergeTwoLevelData(STsdbQueryHandle* pQueryHandle, STableCheckInfo* pCheckInfo, SCompBlock* pBlock) { @@ -1073,6 +1099,7 @@ static void doMergeTwoLevelData(STsdbQueryHandle* pQueryHandle, STableCheckInfo* // the time window should always be right order: skey <= ekey cur->win = (STimeWindow) {.skey = tsArray[start], .ekey = tsArray[end]}; + cur->lastKey = tsArray[endPos]; pos += (end - start + 1) * step; cur->blockCompleted = @@ -1082,7 +1109,7 @@ static void doMergeTwoLevelData(STsdbQueryHandle* pQueryHandle, STableCheckInfo* // if the buffer is not full in case of descending order query, move the data in the front of the buffer moveDataToFront(pQueryHandle, numOfRows, numOfCols); updateInfoAfterMerge(pQueryHandle, pCheckInfo, numOfRows, pos); - + doCheckGeneratedBlockRange(pQueryHandle); return; } else if (pCheckInfo->iter != NULL || pCheckInfo->iiter != NULL) { SSkipListNode* node = NULL; @@ -1175,15 +1202,7 @@ static void doMergeTwoLevelData(STsdbQueryHandle* pQueryHandle, STableCheckInfo* moveDataToFront(pQueryHandle, numOfRows, numOfCols); updateInfoAfterMerge(pQueryHandle, pCheckInfo, numOfRows, pos); - - if (ASCENDING_TRAVERSE(pQueryHandle->order)) { - assert(cur->win.skey >= pQueryHandle->window.skey && cur->win.ekey <= pQueryHandle->window.ekey); - } else { - assert(cur->win.skey >= pQueryHandle->window.ekey && cur->win.ekey <= pQueryHandle->window.skey); - } - - SColumnInfoData* pColInfoData = taosArrayGet(pQueryHandle->pColumns, 0); - assert(cur->win.skey == ((TSKEY*)pColInfoData->pData)[0] && cur->win.ekey == ((TSKEY*)pColInfoData->pData)[cur->rows-1]); + doCheckGeneratedBlockRange(pQueryHandle); tsdbDebug("%p uid:%" PRIu64",tid:%d data block created, brange:%"PRIu64"-%"PRIu64" rows:%d, %p", pQueryHandle, pCheckInfo->tableId.uid, pCheckInfo->tableId.tid, cur->win.skey, cur->win.ekey, cur->rows, pQueryHandle->qinfo); @@ -2027,7 +2046,6 @@ typedef struct STableGroupSupporter { int32_t numOfCols; SColIndex* pCols; STSchema* pTagSchema; -// void* tsdbMeta; } STableGroupSupporter; int32_t tableGroupComparFn(const void *p1, const void *p2, const void *param) { From 7ff6bc2eb02ad0c9367e5db1673f631461e49a36 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Wed, 22 Jul 2020 13:08:32 +0800 Subject: [PATCH 08/42] [td-225] add checks. --- src/tsdb/src/tsdbRead.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/tsdb/src/tsdbRead.c b/src/tsdb/src/tsdbRead.c index 3b6e90b740..6107c73921 100644 --- a/src/tsdb/src/tsdbRead.c +++ b/src/tsdb/src/tsdbRead.c @@ -721,7 +721,7 @@ static bool loadFileDataBlock(STsdbQueryHandle* pQueryHandle, SCompBlock* pBlock SQueryFilePos* cur = &pQueryHandle->cur; if (ASCENDING_TRAVERSE(pQueryHandle->order)) { - // query ended in current block + // query ended in/started from current block if (pQueryHandle->window.ekey < pBlock->keyLast || pCheckInfo->lastKey > pBlock->keyFirst) { if (!doLoadFileDataBlock(pQueryHandle, pBlock, pCheckInfo)) { return false; @@ -737,6 +737,7 @@ static bool loadFileDataBlock(STsdbQueryHandle* pQueryHandle, SCompBlock* pBlock cur->pos = 0; } + assert(pCheckInfo->lastKey <= pBlock->keyLast); doMergeTwoLevelData(pQueryHandle, pCheckInfo, pBlock); } else { // the whole block is loaded in to buffer handleDataMergeIfNeeded(pQueryHandle, pBlock, pCheckInfo); @@ -754,6 +755,7 @@ static bool loadFileDataBlock(STsdbQueryHandle* pQueryHandle, SCompBlock* pBlock cur->pos = pBlock->numOfRows - 1; } + assert(pCheckInfo->lastKey >= pBlock->keyFirst); doMergeTwoLevelData(pQueryHandle, pCheckInfo, pBlock); } else { handleDataMergeIfNeeded(pQueryHandle, pBlock, pCheckInfo); @@ -1059,7 +1061,9 @@ static void doMergeTwoLevelData(STsdbQueryHandle* pQueryHandle, STableCheckInfo* initTableMemIterator(pQueryHandle, pCheckInfo); SDataCols* pCols = pQueryHandle->rhelper.pDataCols[0]; - assert(pCols->cols[0].type == TSDB_DATA_TYPE_TIMESTAMP && pCols->cols[0].colId == PRIMARYKEY_TIMESTAMP_COL_INDEX); + assert(pCols->cols[0].type == TSDB_DATA_TYPE_TIMESTAMP && pCols->cols[0].colId == PRIMARYKEY_TIMESTAMP_COL_INDEX && + cur->pos >= 0 && cur->pos < pBlock->numOfRows); + TSKEY* tsArray = pCols->cols[0].pData; // for search the endPos, so the order needs to reverse @@ -1069,8 +1073,8 @@ static void doMergeTwoLevelData(STsdbQueryHandle* pQueryHandle, STableCheckInfo* int32_t numOfCols = QH_GET_NUM_OF_COLS(pQueryHandle); STable* pTable = pCheckInfo->pTableObj; - int32_t endPos = cur->pos; + if (ASCENDING_TRAVERSE(pQueryHandle->order) && pQueryHandle->window.ekey > blockInfo.window.ekey) { endPos = blockInfo.rows - 1; cur->mixBlock = (cur->pos != 0); From 7e7f91b8fc20f3128d9d741b3ee72b8565eabecd Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Wed, 22 Jul 2020 13:09:53 +0800 Subject: [PATCH 09/42] [td-225] add checks. --- src/tsdb/src/tsdbRead.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/tsdb/src/tsdbRead.c b/src/tsdb/src/tsdbRead.c index 6107c73921..9962019591 100644 --- a/src/tsdb/src/tsdbRead.c +++ b/src/tsdb/src/tsdbRead.c @@ -1490,6 +1490,7 @@ static int32_t getDataBlocksInFilesImpl(STsdbQueryHandle* pQueryHandle, bool* ex return code; } + assert(pQueryHandle->pFileGroup != NULL); cur->slot = ASCENDING_TRAVERSE(pQueryHandle->order)? 0:pQueryHandle->numOfBlocks-1; cur->fid = pQueryHandle->pFileGroup->fileId; From 1f3571926e41a573146bbd373ca2354a987030d4 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Wed, 22 Jul 2020 13:21:49 +0800 Subject: [PATCH 10/42] [td-225] refactor codes. --- src/tsdb/src/tsdbRead.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/tsdb/src/tsdbRead.c b/src/tsdb/src/tsdbRead.c index 9962019591..7577a8e8fc 100644 --- a/src/tsdb/src/tsdbRead.c +++ b/src/tsdb/src/tsdbRead.c @@ -1450,9 +1450,11 @@ static int32_t getDataBlocksInFilesImpl(STsdbQueryHandle* pQueryHandle, bool* ex // current file are not overlapped with query time window, ignore remain files if ((ASCENDING_TRAVERSE(pQueryHandle->order) && win.skey > pQueryHandle->window.ekey) || - (!ASCENDING_TRAVERSE(pQueryHandle->order) && win.ekey < pQueryHandle->window.ekey)) { - tsdbDebug("%p remain files are not qualified for qrange:%"PRId64"-%"PRId64", ignore, %p", pQueryHandle, pQueryHandle->window.skey, pQueryHandle->window.ekey, pQueryHandle->qinfo) + (!ASCENDING_TRAVERSE(pQueryHandle->order) && win.ekey < pQueryHandle->window.ekey)) { + tsdbDebug("%p remain files are not qualified for qrange:%" PRId64 "-%" PRId64 ", ignore, %p", pQueryHandle, + pQueryHandle->window.skey, pQueryHandle->window.ekey, pQueryHandle->qinfo); pQueryHandle->pFileGroup = NULL; + assert(pQueryHandle->numOfBlocks == 0); break; } @@ -1460,8 +1462,8 @@ static int32_t getDataBlocksInFilesImpl(STsdbQueryHandle* pQueryHandle, bool* ex break; } - tsdbDebug("%p %d blocks found in file for %d table(s), fid:%d, %p", pQueryHandle, numOfBlocks, - numOfTables, pQueryHandle->pFileGroup->fileId, pQueryHandle->qinfo); + tsdbDebug("%p %d blocks found in file for %d table(s), fid:%d, %p", pQueryHandle, numOfBlocks, numOfTables, + pQueryHandle->pFileGroup->fileId, pQueryHandle->qinfo); assert(numOfBlocks >= 0); if (numOfBlocks == 0) { @@ -1469,7 +1471,7 @@ static int32_t getDataBlocksInFilesImpl(STsdbQueryHandle* pQueryHandle, bool* ex } // todo return error code to query engine - if (createDataBlocksInfo(pQueryHandle, numOfBlocks, &pQueryHandle->numOfBlocks) != TSDB_CODE_SUCCESS) { + if ((code = createDataBlocksInfo(pQueryHandle, numOfBlocks, &pQueryHandle->numOfBlocks)) != TSDB_CODE_SUCCESS) { break; } @@ -1480,7 +1482,7 @@ static int32_t getDataBlocksInFilesImpl(STsdbQueryHandle* pQueryHandle, bool* ex } // no data in file anymore - if (pQueryHandle->numOfBlocks <= 0) { + if (pQueryHandle->numOfBlocks <= 0 || code != TSDB_CODE_SUCCESS) { if (code == TSDB_CODE_SUCCESS) { assert(pQueryHandle->pFileGroup == NULL); } From e8b0eb7f224d96acc7ba7bdcd5e62c1af717d50f Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Wed, 22 Jul 2020 13:29:31 +0800 Subject: [PATCH 11/42] [td-225] fix compiler errors. --- src/query/inc/qResultbuf.h | 9 ++------- src/query/src/qResultbuf.c | 8 ++++++++ 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/src/query/inc/qResultbuf.h b/src/query/inc/qResultbuf.h index 9d60870962..2e1126b517 100644 --- a/src/query/inc/qResultbuf.h +++ b/src/query/inc/qResultbuf.h @@ -97,13 +97,8 @@ SIDList getDataBufPagesIdList(SDiskbasedResultBuf* pResultBuf, int32_t groupId); * @param id * @return */ -static FORCE_INLINE tFilePage* getResBufPage(SDiskbasedResultBuf* pResultBuf, int32_t id) { - if (id < pResultBuf->inMemPages) { - return (tFilePage*) ((char*) pResultBuf->iBuf + id * pResultBuf->pageSize); - } else { - return (tFilePage*) ((char*) pResultBuf->pBuf + (id - pResultBuf->inMemPages) * pResultBuf->pageSize); - } -} +tFilePage* getResBufPage(SDiskbasedResultBuf* pResultBuf, int32_t id); + /** * get the total buffer size in the format of disk file * @param pResultBuf diff --git a/src/query/src/qResultbuf.c b/src/query/src/qResultbuf.c index 9e597aea5c..a4696ea62c 100644 --- a/src/query/src/qResultbuf.c +++ b/src/query/src/qResultbuf.c @@ -144,6 +144,14 @@ static int32_t addNewGroupId(SDiskbasedResultBuf* pResultBuf, int32_t groupId) { return num; } +tFilePage* getResBufPage(SDiskbasedResultBuf* pResultBuf, int32_t id) { + if (id < pResultBuf->inMemPages) { + return (tFilePage*) ((char*) pResultBuf->iBuf + id * pResultBuf->pageSize); + } else { + return (tFilePage*) ((char*) pResultBuf->pBuf + (id - pResultBuf->inMemPages) * pResultBuf->pageSize); + } +} + static void registerPageId(SDiskbasedResultBuf* pResultBuf, int32_t groupId, int32_t pageId) { int32_t slot = getGroupIndex(pResultBuf, groupId); if (slot < 0) { From 9a9ea692ed538c07d306ee414ca87da87e4bf45a Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Wed, 22 Jul 2020 14:02:23 +0800 Subject: [PATCH 12/42] [td-225] update resbuf --- src/query/inc/qResultbuf.h | 47 ++-- src/query/src/qResultbuf.c | 331 ++++++++++++++++++--------- src/query/tests/resultBufferTest.cpp | 28 ++- src/util/src/tlist.c | 1 + 4 files changed, 281 insertions(+), 126 deletions(-) diff --git a/src/query/inc/qResultbuf.h b/src/query/inc/qResultbuf.h index 9d60870962..b4c830370c 100644 --- a/src/query/inc/qResultbuf.h +++ b/src/query/inc/qResultbuf.h @@ -20,42 +20,56 @@ extern "C" { #endif +#include #include "hash.h" #include "os.h" #include "qExtbuffer.h" +#include "tlockfree.h" typedef struct SArray* SIDList; -typedef struct SPageInfo { - int32_t pageId; +typedef struct SPageDiskInfo { int32_t offset; - int32_t lengthOnDisk; + int32_t length; +} SPageDiskInfo; + +typedef struct SPageInfo { + int32_t pageId; + SPageDiskInfo info; + void* pData; + T_REF_DECLARE(); } SPageInfo; +typedef struct SFreeListItem { + int32_t offset; + int32_t len; +} SFreeListItem; + typedef struct SDiskbasedResultBuf { int32_t numOfRowsPerPage; int32_t numOfPages; int64_t totalBufSize; - int32_t fd; -// FILE* file; +// int32_t fd; + FILE* file; int32_t allocateId; // allocated page id - int32_t incStep; // minimum allocated pages +// int32_t incStep; // minimum allocated pages void* pBuf; // mmap buffer pointer char* path; // file path int32_t pageSize; // current used page size int32_t inMemPages; // numOfPages that are allocated in memory SHashObj* idsTable; // id hash table - SIDList list; // for each id, there is a page id list - - void* iBuf; // inmemory buf + SHashObj* all; + SList* pPageList; void* handle; // for debug purpose void* emptyDummyIdList; // dummy id list bool comp; - + SArray* pFree; // free area in file + int32_t nextPos; // next page flush position } SDiskbasedResultBuf; #define DEFAULT_INTERN_BUF_PAGE_SIZE (1024L) #define DEFAULT_INMEM_BUF_PAGES 10 +#define PAGE_INFO_INITIALIZER (SPageDiskInfo){-1, -1} /** * create disk-based result buffer @@ -65,7 +79,7 @@ typedef struct SDiskbasedResultBuf { * @return */ int32_t createDiskbasedResultBuffer(SDiskbasedResultBuf** pResultBuf, int32_t numOfPages, int32_t rowSize, int32_t pagesize, - int32_t inMemPages, const void* handle); + int32_t inMemPages, const void* handle); /** * @@ -97,13 +111,10 @@ SIDList getDataBufPagesIdList(SDiskbasedResultBuf* pResultBuf, int32_t groupId); * @param id * @return */ -static FORCE_INLINE tFilePage* getResBufPage(SDiskbasedResultBuf* pResultBuf, int32_t id) { - if (id < pResultBuf->inMemPages) { - return (tFilePage*) ((char*) pResultBuf->iBuf + id * pResultBuf->pageSize); - } else { - return (tFilePage*) ((char*) pResultBuf->pBuf + (id - pResultBuf->inMemPages) * pResultBuf->pageSize); - } -} +tFilePage* getResBufPage(SDiskbasedResultBuf* pResultBuf, int32_t id); + +void releaseResBufPage(SDiskbasedResultBuf* pResultBuf, void* page); + /** * get the total buffer size in the format of disk file * @param pResultBuf diff --git a/src/query/src/qResultbuf.c b/src/query/src/qResultbuf.c index 9e597aea5c..dff629e6a8 100644 --- a/src/query/src/qResultbuf.c +++ b/src/query/src/qResultbuf.c @@ -1,4 +1,5 @@ #include "qResultbuf.h" +#include #include "hash.h" #include "qExtbuffer.h" #include "queryLog.h" @@ -14,29 +15,26 @@ int32_t createDiskbasedResultBuffer(SDiskbasedResultBuf** pResultBuf, int32_t nu } pResBuf->pageSize = pagesize; - pResBuf->numOfPages = inMemPages; // all pages are in buffer in the first place + pResBuf->numOfPages = 0; // all pages are in buffer in the first place pResBuf->inMemPages = inMemPages; assert(inMemPages <= numOfPages); pResBuf->numOfRowsPerPage = (pagesize - sizeof(tFilePage)) / rowSize; pResBuf->totalBufSize = pResBuf->numOfPages * pagesize; - pResBuf->incStep = 4; pResBuf->allocateId = -1; - // todo opt perf by on demand create in memory buffer - pResBuf->iBuf = calloc(pResBuf->inMemPages, pResBuf->pageSize); + pResBuf->pPageList = tdListNew(POINTER_BYTES); // init id hash table - pResBuf->idsTable = taosHashInit(numOfPages, taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), false); - pResBuf->list = taosArrayInit(numOfPages, POINTER_BYTES); + pResBuf->idsTable = taosHashInit(10, taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), false); + pResBuf->all = taosHashInit(10, taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), false); char path[PATH_MAX] = {0}; getTmpfilePath("qbuf", path); pResBuf->path = strdup(path); - pResBuf->fd = FD_INITIALIZER; - pResBuf->pBuf = NULL; + pResBuf->file = NULL; pResBuf->emptyDummyIdList = taosArrayInit(1, sizeof(int32_t)); qDebug("QInfo:%p create resBuf for output, page size:%d, initial pages:%d, %" PRId64 "bytes", handle, @@ -53,133 +51,258 @@ int32_t getResBufSize(SDiskbasedResultBuf* pResultBuf) { return pResultBuf->tota #define FILE_SIZE_ON_DISK(_r) (NUM_OF_PAGES_ON_DISK(_r) * (_r)->pageSize) static int32_t createDiskResidesBuf(SDiskbasedResultBuf* pResultBuf) { - pResultBuf->fd = open(pResultBuf->path, O_CREAT | O_RDWR, 0666); -// pResultBuf->file = fopen(pResultBuf->path, "w"); - if (!FD_VALID(pResultBuf->fd)) { +// pResultBuf->fd = open(pResultBuf->path, O_CREAT | O_RDWR, 0666); + pResultBuf->file = fopen(pResultBuf->path, "w"); + if (pResultBuf->file == NULL) { qError("failed to create tmp file: %s on disk. %s", pResultBuf->path, strerror(errno)); return TAOS_SYSTEM_ERROR(errno); } - - assert(pResultBuf->numOfPages == pResultBuf->inMemPages); - pResultBuf->numOfPages += pResultBuf->incStep; - - int32_t ret = ftruncate(pResultBuf->fd, NUM_OF_PAGES_ON_DISK(pResultBuf) * pResultBuf->pageSize); - if (ret != TSDB_CODE_SUCCESS) { - qError("failed to create tmp file: %s on disk. %s", pResultBuf->path, strerror(errno)); - return TAOS_SYSTEM_ERROR(errno); - } - - pResultBuf->pBuf = mmap(NULL, FILE_SIZE_ON_DISK(pResultBuf), PROT_READ | PROT_WRITE, MAP_SHARED, pResultBuf->fd, 0); - - if (pResultBuf->pBuf == MAP_FAILED) { - qError("QInfo:%p failed to map temp file: %s. %s", pResultBuf->handle, pResultBuf->path, strerror(errno)); - return TAOS_SYSTEM_ERROR(errno); - } - - pResultBuf->totalBufSize = pResultBuf->numOfPages * pResultBuf->pageSize; return TSDB_CODE_SUCCESS; } -static int32_t extendDiskFileSize(SDiskbasedResultBuf* pResultBuf, int32_t incNumOfPages) { - assert(pResultBuf->numOfPages * pResultBuf->pageSize == pResultBuf->totalBufSize); +static char* doCompressData(void* data, int32_t srcSize, int32_t *dst) { // do nothing + *dst = srcSize; + return data; +} + +static int32_t allocatePositionInFile(SDiskbasedResultBuf* pResultBuf, size_t size) { + if (pResultBuf->pFree == NULL) { + return pResultBuf->nextPos; + } else { //todo speed up the search procedure + size_t num = taosArrayGetSize(pResultBuf->pFree); + + int32_t offset = -1; + + for(int32_t i = 0; i < num; ++i) { + SFreeListItem* pi = taosArrayGet(pResultBuf->pFree, i); + if (pi->len >= size) { + offset = pi->offset; + pi->offset += size; + pi->len -= size; + + return offset; + } + } + + // no available recycle space, allocate new area in file + return pResultBuf->nextPos; + } +} + +static void doFlushPageToDisk(SDiskbasedResultBuf* pResultBuf, SPageInfo* pg) { + assert(T_REF_VAL_GET(pg) == 0); + + int32_t size = -1; + char* t = doCompressData(pg->pData + POINTER_BYTES, pResultBuf->pageSize, &size); + + // this page is flushed to disk for the first time + if (pg->info.offset == -1) { + int32_t offset = allocatePositionInFile(pResultBuf, size); + pResultBuf->nextPos += size; + + fseek(pResultBuf->file, offset, SEEK_SET); + fwrite(t, size, 1, pResultBuf->file); + } else { + if (pg->info.length < size) { // length becomes greater, current space is not enough, allocate new place. + //1. add current space to free list + taosArrayPush(pResultBuf->pFree, &pg->info); + + //2. allocate new position, and update the info + int32_t offset = allocatePositionInFile(pResultBuf, size); + pResultBuf->nextPos += size; + + //3. write to disk. + fseek(pResultBuf->file, offset, SEEK_SET); + fwrite(t, size, 1, pResultBuf->file); + } + } +} + +static int32_t flushPageToDisk(SDiskbasedResultBuf* pResultBuf, SPageInfo* pg) { int32_t ret = TSDB_CODE_SUCCESS; + assert(pResultBuf->numOfPages * pResultBuf->pageSize == pResultBuf->totalBufSize && pResultBuf->numOfPages >= pResultBuf->inMemPages); if (pResultBuf->pBuf == NULL) { - assert(!FD_VALID(pResultBuf->fd)); - + assert(pResultBuf->file == NULL); if ((ret = createDiskResidesBuf(pResultBuf)) != TSDB_CODE_SUCCESS) { return ret; } - } else { - ret = munmap(pResultBuf->pBuf, FILE_SIZE_ON_DISK(pResultBuf)); - pResultBuf->numOfPages += incNumOfPages; - - /* - * disk-based output buffer is exhausted, try to extend the disk-based buffer, the available disk space may - * be insufficient - */ - ret = ftruncate(pResultBuf->fd, NUM_OF_PAGES_ON_DISK(pResultBuf) * pResultBuf->pageSize); - if (ret != TSDB_CODE_SUCCESS) { - // dError("QInfo:%p failed to create intermediate result output file:%s. %s", pQInfo, pSupporter->extBufFile, - // strerror(errno)); - return TSDB_CODE_QRY_NO_DISKSPACE; - } - - pResultBuf->totalBufSize = pResultBuf->numOfPages * pResultBuf->pageSize; - pResultBuf->pBuf = mmap(NULL, FILE_SIZE_ON_DISK(pResultBuf), PROT_READ | PROT_WRITE, MAP_SHARED, pResultBuf->fd, 0); - - if (pResultBuf->pBuf == MAP_FAILED) { - // dError("QInfo:%p failed to map temp file: %s. %s", pQInfo, pSupporter->extBufFile, strerror(errno)); - return TSDB_CODE_QRY_OUT_OF_MEMORY; - } } + doFlushPageToDisk(pResultBuf, pg); return TSDB_CODE_SUCCESS; } -#define NO_AVAILABLE_PAGES(_b) ((_b)->allocateId == (_b)->numOfPages - 1) +#define NO_AVAILABLE_PAGES(_b) ((_b)->numOfPages >= (_b)->inMemPages) -static FORCE_INLINE int32_t getGroupIndex(SDiskbasedResultBuf* pResultBuf, int32_t groupId) { - assert(pResultBuf != NULL); +static SIDList addNewGroup(SDiskbasedResultBuf* pResultBuf, int32_t groupId) { + assert(taosHashGet(pResultBuf->idsTable, (const char*) &groupId, sizeof(int32_t)) == NULL); - char* p = taosHashGet(pResultBuf->idsTable, (const char*)&groupId, sizeof(int32_t)); + SArray* pa = taosArrayInit(1, sizeof(SPageInfo)); + int32_t ret = taosHashPut(pResultBuf->idsTable, (const char*)&groupId, sizeof(int32_t), &pa, POINTER_BYTES); + assert(ret == 0); + + return pa; +} + +static SPageInfo* registerPage(SDiskbasedResultBuf* pResultBuf, int32_t groupId, int32_t pageId) { + SIDList list = NULL; + + char** p = taosHashGet(pResultBuf->idsTable, (const char*)&groupId, sizeof(int32_t)); if (p == NULL) { // it is a new group id - return -1; + list = addNewGroup(pResultBuf, groupId); + } else { + list = (SIDList) (*p); } - int32_t slot = GET_INT32_VAL(p); - assert(slot >= 0 && slot < taosHashGetSize(pResultBuf->idsTable)); + pResultBuf->numOfPages += 1; - return slot; -} - -static int32_t addNewGroupId(SDiskbasedResultBuf* pResultBuf, int32_t groupId) { - int32_t num = getNumOfResultBufGroupId(pResultBuf); // the num is the newest allocated group id slot - taosHashPut(pResultBuf->idsTable, (const char*)&groupId, sizeof(int32_t), &num, sizeof(int32_t)); - - SArray* pa = taosArrayInit(1, sizeof(int32_t)); - taosArrayPush(pResultBuf->list, &pa); - - assert(taosArrayGetSize(pResultBuf->list) == taosHashGetSize(pResultBuf->idsTable)); - return num; -} - -static void registerPageId(SDiskbasedResultBuf* pResultBuf, int32_t groupId, int32_t pageId) { - int32_t slot = getGroupIndex(pResultBuf, groupId); - if (slot < 0) { - slot = addNewGroupId(pResultBuf, groupId); - } - - SIDList pList = taosArrayGetP(pResultBuf->list, slot); - taosArrayPush(pList, &pageId); + SPageInfo ppi = { .info = PAGE_INFO_INITIALIZER, .pageId = pageId, }; + return taosArrayPush(list, &ppi); } tFilePage* getNewDataBuf(SDiskbasedResultBuf* pResultBuf, int32_t groupId, int32_t* pageId) { if (NO_AVAILABLE_PAGES(pResultBuf)) { - if (extendDiskFileSize(pResultBuf, pResultBuf->incStep) != TSDB_CODE_SUCCESS) { - return NULL; + // get the last page in linked list + SListIter iter = {0}; + tdListInitIter(pResultBuf->pPageList, &iter, TD_LIST_BACKWARD); + + SListNode* pn = NULL; + while((pn = tdListNext(&iter)) != NULL) { + assert(pn != NULL); + if (T_REF_VAL_GET(*(SPageInfo**)pn->data) == 0) { + break; + } + } + + // all pages are referenced by user, try to allocate new space + if (pn == NULL) { + int32_t prev = pResultBuf->inMemPages; + pResultBuf->inMemPages = pResultBuf->inMemPages * 1.5; + + qWarn("%p in memory buf page not sufficient, expand from %d to %d, page size:%d", pResultBuf, prev, + pResultBuf->inMemPages, pResultBuf->pageSize); + } else { + tdListPopNode(pResultBuf->pPageList, pn); + if (flushPageToDisk(pResultBuf, *(SPageInfo**)pn->data) != TSDB_CODE_SUCCESS) { + return NULL; + } } } // register new id in this group *pageId = (++pResultBuf->allocateId); - registerPageId(pResultBuf, groupId, *pageId); - // clear memory for the new page - tFilePage* page = getResBufPage(pResultBuf, *pageId); - memset(page, 0, pResultBuf->pageSize); - - return page; + // register page id info + SPageInfo* pi = registerPage(pResultBuf, groupId, *pageId); + + // add to LRU list + assert(listNEles(pResultBuf->pPageList) < pResultBuf->inMemPages); + tdListPrepend(pResultBuf->pPageList, &pi); + + // add to hash map + taosHashPut(pResultBuf->all, pageId, sizeof(int32_t), &pi, POINTER_BYTES); + + // allocate buf + pi->pData = calloc(1, pResultBuf->pageSize + POINTER_BYTES); + pResultBuf->totalBufSize += pResultBuf->pageSize; + + T_REF_INC(pi); // add ref count + ((void**)pi->pData)[0] = pi; + + return pi->pData + POINTER_BYTES; } +tFilePage* getResBufPage(SDiskbasedResultBuf* pResultBuf, int32_t id) { + assert(pResultBuf != NULL && id >= 0); + + SPageInfo** pi = taosHashGet(pResultBuf->all, &id, sizeof(int32_t)); + assert(pi != NULL && *pi != NULL); + + if ((*pi)->pData != NULL) { // it is in memory + // no need to update the LRU list + if (pResultBuf->numOfPages == 1) { + return (*pi)->pData + POINTER_BYTES; + } + + SListNode* pnode = NULL; // todo speed up + + SListIter iter = {0}; + tdListInitIter(pResultBuf->pPageList, &iter, TD_LIST_FORWARD); + + while((pnode = tdListNext(&iter)) != NULL) { + SPageInfo** pInfo = (SPageInfo**) pnode->data; + + // remove it and add it into the front of linked-list + if ((*pInfo)->pageId == id) { + tdListPopNode(pResultBuf->pPageList, pnode); + tdListPrependNode(pResultBuf->pPageList, pnode); + T_REF_INC(*(SPageInfo**)pnode->data); + + return ((*(SPageInfo**)pnode->data)->pData + POINTER_BYTES); + } + } + } else { // not in memory + // choose the be flushed page + // get the last page in linked list + SListIter iter1 = {0}; + tdListInitIter(pResultBuf->pPageList, &iter1, TD_LIST_BACKWARD); + + SListNode* pn = NULL; + while((pn = tdListNext(&iter1)) != NULL) { + assert(pn != NULL); + if (T_REF_VAL_GET(*(SPageInfo**)pn->data) == 0) { + break; + } + } + + // all pages are referenced by user, try to allocate new space + if (pn == NULL) { + pResultBuf->inMemPages = pResultBuf->inMemPages * 1.5; + assert(0); + return NULL; + } else { + tdListPopNode(pResultBuf->pPageList, pn); + if (flushPageToDisk(pResultBuf, *(SPageInfo**)pn->data) != TSDB_CODE_SUCCESS) { + return NULL; + } + + char* buf = (*(SPageInfo**)pn->data)->pData; + (*(SPageInfo**)pn->data)->pData = NULL; + + // load file in disk + fseek(pResultBuf->file, (*pi)->info.offset, SEEK_SET); + fread(buf, (*pi)->info.length, 1, pResultBuf->file); + + (*pi)->pData = buf; + return (*pi)->pData; + } + } + + return NULL; +} + +void releaseResBufPage(SDiskbasedResultBuf* pResultBuf, void* page) { + assert(pResultBuf != NULL && page != NULL); + char* p = (char*) page - POINTER_BYTES; + + SPageInfo* ppi = ((SPageInfo**) p)[0]; + + assert(T_REF_VAL_GET(ppi) > 0); + T_REF_DEC(ppi); +} + + int32_t getNumOfRowsPerPage(SDiskbasedResultBuf* pResultBuf) { return pResultBuf->numOfRowsPerPage; } SIDList getDataBufPagesIdList(SDiskbasedResultBuf* pResultBuf, int32_t groupId) { - int32_t slot = getGroupIndex(pResultBuf, groupId); - if (slot < 0) { + assert(pResultBuf != NULL); + + char** p = taosHashGet(pResultBuf->idsTable, (const char*)&groupId, sizeof(int32_t)); + if (p == NULL) { // it is a new group id return pResultBuf->emptyDummyIdList; } else { - return taosArrayGetP(pResultBuf->list, slot); + return (SArray*) (*p); } } @@ -188,12 +311,11 @@ void destroyResultBuf(SDiskbasedResultBuf* pResultBuf, void* handle) { return; } - if (FD_VALID(pResultBuf->fd)) { + if (pResultBuf->file != NULL) { qDebug("QInfo:%p disk-based output buffer closed, total:%" PRId64 " bytes, file created:%s, file size:%d", handle, pResultBuf->totalBufSize, pResultBuf->path, FILE_SIZE_ON_DISK(pResultBuf)); - close(pResultBuf->fd); - munmap(pResultBuf->pBuf, FILE_SIZE_ON_DISK(pResultBuf)); + fclose(pResultBuf->file); pResultBuf->pBuf = NULL; } else { qDebug("QInfo:%p disk-based output buffer closed, total:%" PRId64 " bytes, no file created", handle, @@ -203,17 +325,16 @@ void destroyResultBuf(SDiskbasedResultBuf* pResultBuf, void* handle) { unlink(pResultBuf->path); tfree(pResultBuf->path); - size_t size = taosArrayGetSize(pResultBuf->list); - for (int32_t i = 0; i < size; ++i) { - SArray* pa = taosArrayGetP(pResultBuf->list, i); - taosArrayDestroy(pa); - } +// size_t size = taosArrayGetSize(pResultBuf->list); +// for (int32_t i = 0; i < size; ++i) { +// SArray* pa = taosArrayGetP(pResultBuf->list, i); +// taosArrayDestroy(pa); +// } - taosArrayDestroy(pResultBuf->list); + tdListFree(pResultBuf->pPageList); taosArrayDestroy(pResultBuf->emptyDummyIdList); taosHashCleanup(pResultBuf->idsTable); - tfree(pResultBuf->iBuf); tfree(pResultBuf); } diff --git a/src/query/tests/resultBufferTest.cpp b/src/query/tests/resultBufferTest.cpp index 63ed89ab9f..93c1a65218 100644 --- a/src/query/tests/resultBufferTest.cpp +++ b/src/query/tests/resultBufferTest.cpp @@ -18,13 +18,35 @@ void simpleTest() { tFilePage* pBufPage = getNewDataBuf(pResultBuf, groupId, &pageId); ASSERT_TRUE(pBufPage != NULL); - ASSERT_EQ(getNumOfRowsPerPage(pResultBuf), (16384L - sizeof(int64_t))/64); - ASSERT_EQ(getResBufSize(pResultBuf), 1000*16384L); + ASSERT_EQ(getResBufSize(pResultBuf), 1024); SIDList list = getDataBufPagesIdList(pResultBuf, groupId); ASSERT_EQ(taosArrayGetSize(list), 1); ASSERT_EQ(getNumOfResultBufGroupId(pResultBuf), 1); - + + releaseResBufPage(pResultBuf, pBufPage); + + tFilePage* pBufPage1 = getNewDataBuf(pResultBuf, groupId, &pageId); + + tFilePage* t = getResBufPage(pResultBuf, pageId); + assert(t == pBufPage1); + + tFilePage* pBufPage2 = getNewDataBuf(pResultBuf, groupId, &pageId); + tFilePage* t1 = getResBufPage(pResultBuf, pageId); + assert(t1 == pBufPage2); + + tFilePage* pBufPage3 = getNewDataBuf(pResultBuf, groupId, &pageId); + tFilePage* t2 = getResBufPage(pResultBuf, pageId); + assert(t2 == pBufPage3); + + tFilePage* pBufPage4 = getNewDataBuf(pResultBuf, groupId, &pageId); + tFilePage* t3 = getResBufPage(pResultBuf, pageId); + assert(t3 == pBufPage4); + + tFilePage* pBufPage5 = getNewDataBuf(pResultBuf, groupId, &pageId); + tFilePage* t4 = getResBufPage(pResultBuf, pageId); + assert(t4 == pBufPage5); + destroyResultBuf(pResultBuf, NULL); } } // namespace diff --git a/src/util/src/tlist.c b/src/util/src/tlist.c index f402c2307e..93293b8b91 100644 --- a/src/util/src/tlist.c +++ b/src/util/src/tlist.c @@ -76,6 +76,7 @@ int tdListPrepend(SList *list, void *data) { SListNode *node = (SListNode *)malloc(sizeof(SListNode) + list->eleSize); if (node == NULL) return -1; + node->next = node->prev = NULL; memcpy((void *)(node->data), data, list->eleSize); tdListPrependNode(list, node); From 85def32807a982751c3aa760dbf9577699c01a4c Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Wed, 22 Jul 2020 14:18:18 +0800 Subject: [PATCH 13/42] [td-225] add check. --- src/tsdb/src/tsdbRead.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tsdb/src/tsdbRead.c b/src/tsdb/src/tsdbRead.c index 7577a8e8fc..63ad8701ff 100644 --- a/src/tsdb/src/tsdbRead.c +++ b/src/tsdb/src/tsdbRead.c @@ -1492,7 +1492,7 @@ static int32_t getDataBlocksInFilesImpl(STsdbQueryHandle* pQueryHandle, bool* ex return code; } - assert(pQueryHandle->pFileGroup != NULL); + assert(pQueryHandle->pFileGroup != NULL && pQueryHandle->numOfBlocks > 0); cur->slot = ASCENDING_TRAVERSE(pQueryHandle->order)? 0:pQueryHandle->numOfBlocks-1; cur->fid = pQueryHandle->pFileGroup->fileId; From e41aeb834903d2dc49fdf9400d7eb989925ef536 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Wed, 22 Jul 2020 15:15:37 +0800 Subject: [PATCH 14/42] [td-225] update intermediate buf page size. --- src/query/inc/qExtbuffer.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/query/inc/qExtbuffer.h b/src/query/inc/qExtbuffer.h index b57c48933f..bde5fc35ac 100644 --- a/src/query/inc/qExtbuffer.h +++ b/src/query/inc/qExtbuffer.h @@ -28,9 +28,9 @@ extern "C" { #include "tdataformat.h" #include "talgo.h" -#define DEFAULT_PAGE_SIZE (1024L*4) // 16k larger than the SHistoInfo #define MAX_TMPFILE_PATH_LENGTH PATH_MAX -#define INITIAL_ALLOCATION_BUFFER_SIZE 64 +#define INITIAL_ALLOCATION_BUFFER_SIZE 64L +#define DEFAULT_PAGE_SIZE (1024L*(INITIAL_ALLOCATION_BUFFER_SIZE)) // 16k larger than the SHistoInfo typedef enum EXT_BUFFER_FLUSH_MODEL { /* From 8e97949a143d27ab7d15c73833687aca4d0f3f05 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Wed, 22 Jul 2020 17:04:05 +0800 Subject: [PATCH 15/42] [td-225] fix memory leaks. --- src/query/src/qResultbuf.c | 21 ++++++++++++++------- src/query/tests/CMakeLists.txt | 2 +- 2 files changed, 15 insertions(+), 8 deletions(-) diff --git a/src/query/src/qResultbuf.c b/src/query/src/qResultbuf.c index dff629e6a8..e54217f7e5 100644 --- a/src/query/src/qResultbuf.c +++ b/src/query/src/qResultbuf.c @@ -184,7 +184,10 @@ tFilePage* getNewDataBuf(SDiskbasedResultBuf* pResultBuf, int32_t groupId, int32 pResultBuf->inMemPages, pResultBuf->pageSize); } else { tdListPopNode(pResultBuf->pPageList, pn); - if (flushPageToDisk(pResultBuf, *(SPageInfo**)pn->data) != TSDB_CODE_SUCCESS) { + SPageInfo* d = *(SPageInfo**) pn->data; + tfree(pn); + + if (flushPageToDisk(pResultBuf, d) != TSDB_CODE_SUCCESS) { return NULL; } } @@ -275,6 +278,8 @@ tFilePage* getResBufPage(SDiskbasedResultBuf* pResultBuf, int32_t id) { fread(buf, (*pi)->info.length, 1, pResultBuf->file); (*pi)->pData = buf; + + tfree(pn); return (*pi)->pData; } } @@ -292,7 +297,6 @@ void releaseResBufPage(SDiskbasedResultBuf* pResultBuf, void* page) { T_REF_DEC(ppi); } - int32_t getNumOfRowsPerPage(SDiskbasedResultBuf* pResultBuf) { return pResultBuf->numOfRowsPerPage; } SIDList getDataBufPagesIdList(SDiskbasedResultBuf* pResultBuf, int32_t groupId) { @@ -325,15 +329,18 @@ void destroyResultBuf(SDiskbasedResultBuf* pResultBuf, void* handle) { unlink(pResultBuf->path); tfree(pResultBuf->path); -// size_t size = taosArrayGetSize(pResultBuf->list); -// for (int32_t i = 0; i < size; ++i) { -// SArray* pa = taosArrayGetP(pResultBuf->list, i); -// taosArrayDestroy(pa); -// } + SHashMutableIterator* iter = taosHashCreateIter(pResultBuf->idsTable); + while(taosHashIterNext(iter)) { + SArray** p = (SArray**) taosHashIterGet(iter); + taosArrayDestroy(*p); + } + + taosHashDestroyIter(iter); tdListFree(pResultBuf->pPageList); taosArrayDestroy(pResultBuf->emptyDummyIdList); taosHashCleanup(pResultBuf->idsTable); + taosHashCleanup(pResultBuf->all); tfree(pResultBuf); } diff --git a/src/query/tests/CMakeLists.txt b/src/query/tests/CMakeLists.txt index 0ae8600756..86715a4a0f 100644 --- a/src/query/tests/CMakeLists.txt +++ b/src/query/tests/CMakeLists.txt @@ -10,6 +10,6 @@ IF (HEADER_GTEST_INCLUDE_DIR AND LIB_GTEST_STATIC_DIR) INCLUDE_DIRECTORIES(${HEADER_GTEST_INCLUDE_DIR}) AUX_SOURCE_DIRECTORY(${CMAKE_CURRENT_SOURCE_DIR} SOURCE_LIST) - ADD_EXECUTABLE(queryTest ${SOURCE_LIST}) + ADD_EXECUTABLE(queryTest ./unitTest.cpp ./resultBufferTest.cpp) TARGET_LINK_LIBRARIES(queryTest taos query gtest pthread) ENDIF() \ No newline at end of file From b337c07e8dc670fbbd7135bca005a39a04603950 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Wed, 22 Jul 2020 17:08:56 +0800 Subject: [PATCH 16/42] [td-225] fix concurrent exec query. --- src/query/inc/qExtbuffer.h | 1 - src/query/src/qExecutor.c | 6 ++---- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/src/query/inc/qExtbuffer.h b/src/query/inc/qExtbuffer.h index bde5fc35ac..a992f6153c 100644 --- a/src/query/inc/qExtbuffer.h +++ b/src/query/inc/qExtbuffer.h @@ -19,7 +19,6 @@ extern "C" { #endif - #include "os.h" #include "taosmsg.h" diff --git a/src/query/src/qExecutor.c b/src/query/src/qExecutor.c index 30d6cc288f..458ce228bb 100644 --- a/src/query/src/qExecutor.c +++ b/src/query/src/qExecutor.c @@ -5963,8 +5963,6 @@ static int32_t initQInfo(SQueryTableMsg *pQueryMsg, void *tsdb, int32_t vgId, SQ pQuery->window.ekey, pQuery->order.order); setQueryStatus(pQuery, QUERY_COMPLETED); pQInfo->tableqinfoGroupInfo.numOfTables = 0; - - sem_post(&pQInfo->dataReady); return TSDB_CODE_SUCCESS; } @@ -5973,8 +5971,6 @@ static int32_t initQInfo(SQueryTableMsg *pQueryMsg, void *tsdb, int32_t vgId, SQ if (pQInfo->tableqinfoGroupInfo.numOfTables == 0) { qDebug("QInfo:%p no table qualified for tag filter, abort query", pQInfo); setQueryStatus(pQuery, QUERY_COMPLETED); - - sem_post(&pQInfo->dataReady); return TSDB_CODE_SUCCESS; } @@ -6319,6 +6315,8 @@ void qTableQuery(qinfo_t qinfo) { } if (pQInfo->tableqinfoGroupInfo.numOfTables == 0) { + setQueryStatus(pQInfo->runtimeEnv.pQuery, QUERY_COMPLETED); + qDebug("QInfo:%p no table exists for query, abort", pQInfo); sem_post(&pQInfo->dataReady); return; From 057e808930c97d9f16ec8f51ef88f40f2a3e5126 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Wed, 22 Jul 2020 18:17:38 +0800 Subject: [PATCH 17/42] [td-225] for expriments. --- src/vnode/src/vnodeRead.c | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/src/vnode/src/vnodeRead.c b/src/vnode/src/vnodeRead.c index 973df7c5a1..83986de682 100644 --- a/src/vnode/src/vnodeRead.c +++ b/src/vnode/src/vnodeRead.c @@ -122,7 +122,8 @@ static int32_t vnodeProcessQueryMsg(SVnodeObj *pVnode, SReadMsg *pReadMsg) { // current connect is broken if (code == TSDB_CODE_SUCCESS) { - handle = qRegisterQInfo(pVnode->qMgmt, (uint64_t) pQInfo); +// handle = qRegisterQInfo(pVnode->qMgmt, (uint64_t) pQInfo); + handle = &pQInfo; if (handle == NULL) { // failed to register qhandle vError("vgId:%d QInfo:%p register qhandle failed, return to app, code:%s", pVnode->vgId, (void *)pQInfo, tstrerror(pRsp->code)); @@ -133,11 +134,11 @@ static int32_t vnodeProcessQueryMsg(SVnodeObj *pVnode, SReadMsg *pReadMsg) { pRsp->qhandle = htobe64((uint64_t) pQInfo); } - pQInfo = NULL; +// pQInfo = NULL; if (handle != NULL && vnodeNotifyCurrentQhandle(pReadMsg->rpcMsg.handle, *handle, pVnode->vgId) != TSDB_CODE_SUCCESS) { vError("vgId:%d, QInfo:%p, query discarded since link is broken, %p", pVnode->vgId, *handle, pReadMsg->rpcMsg.handle); pRsp->code = TSDB_CODE_RPC_NETWORK_UNAVAIL; - qReleaseQInfo(pVnode->qMgmt, (void**) &handle, true); +// qReleaseQInfo(pVnode->qMgmt, (void**) &handle, true); return pRsp->code; } } else { @@ -148,12 +149,14 @@ static int32_t vnodeProcessQueryMsg(SVnodeObj *pVnode, SReadMsg *pReadMsg) { vDebug("vgId:%d, QInfo:%p, dnode query msg disposed, register qhandle and return to app", vgId, *handle); vnodePutItemIntoReadQueue(pVnode, *handle); - qReleaseQInfo(pVnode->qMgmt, (void**) &handle, false); +// qReleaseQInfo(pVnode->qMgmt, (void**) &handle, false); } } else { assert(pCont != NULL); - handle = qAcquireQInfo(pVnode->qMgmt, (uint64_t) pCont); + void* p = (void*) pCont; + handle = &p; +// handle = qAcquireQInfo(pVnode->qMgmt, (uint64_t) pCont); if (handle == NULL) { vWarn("QInfo:%p invalid qhandle in continuing exec query, conn:%p", (void*) pCont, pReadMsg->rpcMsg.handle); code = TSDB_CODE_QRY_INVALID_QHANDLE; @@ -162,7 +165,7 @@ static int32_t vnodeProcessQueryMsg(SVnodeObj *pVnode, SReadMsg *pReadMsg) { code = TSDB_CODE_VND_ACTION_IN_PROGRESS; qTableQuery(*handle); // do execute query } - qReleaseQInfo(pVnode->qMgmt, (void**) &handle, false); +// qReleaseQInfo(pVnode->qMgmt, (void**) &handle, false); } return code; @@ -181,7 +184,11 @@ static int32_t vnodeProcessFetchMsg(SVnodeObj *pVnode, SReadMsg *pReadMsg) { memset(pRet, 0, sizeof(SRspRet)); int32_t code = TSDB_CODE_SUCCESS; - void** handle = qAcquireQInfo(pVnode->qMgmt, pRetrieve->qhandle); + void** handle = NULL; + void* p1 = (void*) pRetrieve->qhandle; + handle = &p1; + +// void** handle = qAcquireQInfo(pVnode->qMgmt, pRetrieve->qhandle); if (handle == NULL || (*handle) != (void*) pRetrieve->qhandle) { code = TSDB_CODE_QRY_INVALID_QHANDLE; vDebug("vgId:%d, invalid qhandle in fetch result, QInfo:%p", pVnode->vgId, (void*) pRetrieve->qhandle); @@ -201,7 +208,7 @@ static int32_t vnodeProcessFetchMsg(SVnodeObj *pVnode, SReadMsg *pReadMsg) { if (pRetrieve->free == 1) { vDebug("vgId:%d, QInfo:%p, retrieve msg received to kill query and free qhandle", pVnode->vgId, *handle); qKillQuery(*handle); - qReleaseQInfo(pVnode->qMgmt, (void**) &handle, true); +// qReleaseQInfo(pVnode->qMgmt, (void**) &handle, true); pRet->rsp = (SRetrieveTableRsp *)rpcMallocCont(sizeof(SRetrieveTableRsp)); pRet->len = sizeof(SRetrieveTableRsp); @@ -234,7 +241,8 @@ static int32_t vnodeProcessFetchMsg(SVnodeObj *pVnode, SReadMsg *pReadMsg) { } } - qReleaseQInfo(pVnode->qMgmt, (void**) &handle, freeHandle); + UNUSED(freeHandle); +// qReleaseQInfo(pVnode->qMgmt, (void**) &handle, freeHandle); return code; } From a6d92173d937d1cf5e276d5a014b5e9f02b36979 Mon Sep 17 00:00:00 2001 From: Liu Tao Date: Wed, 22 Jul 2020 18:53:49 +0800 Subject: [PATCH 18/42] performance tune --- src/client/src/tscUtil.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/client/src/tscUtil.c b/src/client/src/tscUtil.c index 27f2535bdc..1ef5d3feaf 100644 --- a/src/client/src/tscUtil.c +++ b/src/client/src/tscUtil.c @@ -351,14 +351,14 @@ void tscPartiallyFreeSqlObj(SSqlObj* pSql) { int32_t cmd = pCmd->command; if (cmd < TSDB_SQL_INSERT || cmd == TSDB_SQL_RETRIEVE_LOCALMERGE || cmd == TSDB_SQL_RETRIEVE_EMPTY_RESULT || cmd == TSDB_SQL_TABLE_JOIN_RETRIEVE) { - tscRemoveFromSqlList(pSql); + //tscRemoveFromSqlList(pSql); } // pSql->sqlstr will be used by tscBuildQueryStreamDesc if (pObj->signature == pObj) { - pthread_mutex_lock(&pObj->mutex); + //pthread_mutex_lock(&pObj->mutex); tfree(pSql->sqlstr); - pthread_mutex_unlock(&pObj->mutex); + //pthread_mutex_unlock(&pObj->mutex); } tscFreeSqlResult(pSql); @@ -1885,7 +1885,7 @@ void tscDoQuery(SSqlObj* pSql) { } if (pCmd->command == TSDB_SQL_SELECT) { - tscAddIntoSqlList(pSql); + //tscAddIntoSqlList(pSql); } if (pCmd->dataSourceType == DATA_FROM_DATA_FILE) { From 10b9968d4a0a690b7ebac0c9b6f23b868f7d5bd3 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Thu, 23 Jul 2020 13:40:34 +0800 Subject: [PATCH 19/42] [td-225] fix bugs in resbuf --- src/query/inc/qResultbuf.h | 22 ++-- src/query/src/qResultbuf.c | 153 +++++++++++++++++---------- src/query/tests/resultBufferTest.cpp | 62 ++++++++++- src/util/src/tcompare.c | 2 +- 4 files changed, 170 insertions(+), 69 deletions(-) diff --git a/src/query/inc/qResultbuf.h b/src/query/inc/qResultbuf.h index b4c830370c..da5f46b5e9 100644 --- a/src/query/inc/qResultbuf.h +++ b/src/query/inc/qResultbuf.h @@ -49,20 +49,19 @@ typedef struct SDiskbasedResultBuf { int32_t numOfRowsPerPage; int32_t numOfPages; int64_t totalBufSize; -// int32_t fd; + int64_t diskFileSize; // disk file size FILE* file; int32_t allocateId; // allocated page id -// int32_t incStep; // minimum allocated pages - void* pBuf; // mmap buffer pointer char* path; // file path int32_t pageSize; // current used page size int32_t inMemPages; // numOfPages that are allocated in memory - SHashObj* idsTable; // id hash table + SHashObj* groupSet; // id hash table SHashObj* all; - SList* pPageList; + SList* lruList; void* handle; // for debug purpose void* emptyDummyIdList; // dummy id list - bool comp; + bool comp; // compressed before flushed to disk + void* assistBuf; // assistant buffer for compress data SArray* pFree; // free area in file int32_t nextPos; // next page flush position } SDiskbasedResultBuf; @@ -95,7 +94,7 @@ tFilePage* getNewDataBuf(SDiskbasedResultBuf* pResultBuf, int32_t groupId, int32 * @param pResultBuf * @return */ -int32_t getNumOfRowsPerPage(SDiskbasedResultBuf* pResultBuf); +size_t getNumOfRowsPerPage(const SDiskbasedResultBuf* pResultBuf); /** * @@ -113,6 +112,11 @@ SIDList getDataBufPagesIdList(SDiskbasedResultBuf* pResultBuf, int32_t groupId); */ tFilePage* getResBufPage(SDiskbasedResultBuf* pResultBuf, int32_t id); +/** + * release the referenced buf pages + * @param pResultBuf + * @param page + */ void releaseResBufPage(SDiskbasedResultBuf* pResultBuf, void* page); /** @@ -120,14 +124,14 @@ void releaseResBufPage(SDiskbasedResultBuf* pResultBuf, void* page); * @param pResultBuf * @return */ -int32_t getResBufSize(SDiskbasedResultBuf* pResultBuf); +size_t getResBufSize(const SDiskbasedResultBuf* pResultBuf); /** * get the number of groups in the result buffer * @param pResultBuf * @return */ -int32_t getNumOfResultBufGroupId(SDiskbasedResultBuf* pResultBuf); +size_t getNumOfResultBufGroupId(const SDiskbasedResultBuf* pResultBuf); /** * destroy result buffer diff --git a/src/query/src/qResultbuf.c b/src/query/src/qResultbuf.c index e54217f7e5..fa2bb814f2 100644 --- a/src/query/src/qResultbuf.c +++ b/src/query/src/qResultbuf.c @@ -1,5 +1,6 @@ #include "qResultbuf.h" #include +#include #include "hash.h" #include "qExtbuffer.h" #include "queryLog.h" @@ -24,11 +25,13 @@ int32_t createDiskbasedResultBuffer(SDiskbasedResultBuf** pResultBuf, int32_t nu pResBuf->totalBufSize = pResBuf->numOfPages * pagesize; pResBuf->allocateId = -1; - pResBuf->pPageList = tdListNew(POINTER_BYTES); + pResBuf->lruList = tdListNew(POINTER_BYTES); // init id hash table - pResBuf->idsTable = taosHashInit(10, taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), false); + pResBuf->groupSet = taosHashInit(10, taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), false); pResBuf->all = taosHashInit(10, taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), false); + pResBuf->assistBuf = malloc(pResBuf->pageSize + 2); // EXTRA BYTES + pResBuf->comp = true; char path[PATH_MAX] = {0}; getTmpfilePath("qbuf", path); @@ -43,25 +46,28 @@ int32_t createDiskbasedResultBuffer(SDiskbasedResultBuf** pResultBuf, int32_t nu return TSDB_CODE_SUCCESS; } -int32_t getNumOfResultBufGroupId(SDiskbasedResultBuf* pResultBuf) { return taosHashGetSize(pResultBuf->idsTable); } - -int32_t getResBufSize(SDiskbasedResultBuf* pResultBuf) { return pResultBuf->totalBufSize; } - #define NUM_OF_PAGES_ON_DISK(_r) ((_r)->numOfPages - (_r)->inMemPages) #define FILE_SIZE_ON_DISK(_r) (NUM_OF_PAGES_ON_DISK(_r) * (_r)->pageSize) static int32_t createDiskResidesBuf(SDiskbasedResultBuf* pResultBuf) { -// pResultBuf->fd = open(pResultBuf->path, O_CREAT | O_RDWR, 0666); - pResultBuf->file = fopen(pResultBuf->path, "w"); + pResultBuf->file = fopen(pResultBuf->path, "wb+"); if (pResultBuf->file == NULL) { qError("failed to create tmp file: %s on disk. %s", pResultBuf->path, strerror(errno)); return TAOS_SYSTEM_ERROR(errno); } + return TSDB_CODE_SUCCESS; } -static char* doCompressData(void* data, int32_t srcSize, int32_t *dst) { // do nothing - *dst = srcSize; +static char* doCompressData(void* data, int32_t srcSize, int32_t *dst, bool comp, void* assistBuf) { // do nothing + if (!comp) { + *dst = srcSize; + return data; + } + + *dst = tsCompressString(data, srcSize, 1, assistBuf, srcSize, ONE_STAGE_COMP, NULL, 0); + + memcpy(data, assistBuf, *dst); return data; } @@ -89,57 +95,64 @@ static int32_t allocatePositionInFile(SDiskbasedResultBuf* pResultBuf, size_t si } } -static void doFlushPageToDisk(SDiskbasedResultBuf* pResultBuf, SPageInfo* pg) { - assert(T_REF_VAL_GET(pg) == 0); +static char* doFlushPageToDisk(SDiskbasedResultBuf* pResultBuf, SPageInfo* pg) { + assert(T_REF_VAL_GET(pg) == 0 && pg->pData != NULL); int32_t size = -1; - char* t = doCompressData(pg->pData + POINTER_BYTES, pResultBuf->pageSize, &size); + char* t = doCompressData(pg->pData + POINTER_BYTES, pResultBuf->pageSize, &size, pResultBuf->comp, pResultBuf->assistBuf); + pg->info.length = size; // this page is flushed to disk for the first time if (pg->info.offset == -1) { - int32_t offset = allocatePositionInFile(pResultBuf, size); + pg->info.offset = allocatePositionInFile(pResultBuf, size); pResultBuf->nextPos += size; - fseek(pResultBuf->file, offset, SEEK_SET); - fwrite(t, size, 1, pResultBuf->file); + fseek(pResultBuf->file, pg->info.offset, SEEK_SET); + int32_t ret = fwrite(t, 1, size, pResultBuf->file); + + UNUSED(ret); } else { if (pg->info.length < size) { // length becomes greater, current space is not enough, allocate new place. //1. add current space to free list taosArrayPush(pResultBuf->pFree, &pg->info); //2. allocate new position, and update the info - int32_t offset = allocatePositionInFile(pResultBuf, size); + pg->info.offset = allocatePositionInFile(pResultBuf, size); pResultBuf->nextPos += size; //3. write to disk. - fseek(pResultBuf->file, offset, SEEK_SET); + fseek(pResultBuf->file, pg->info.offset, SEEK_SET); fwrite(t, size, 1, pResultBuf->file); } } + + char* ret = pg->pData; + pg->pData = NULL; + + return ret; } -static int32_t flushPageToDisk(SDiskbasedResultBuf* pResultBuf, SPageInfo* pg) { +static char* flushPageToDisk(SDiskbasedResultBuf* pResultBuf, SPageInfo* pg) { int32_t ret = TSDB_CODE_SUCCESS; assert(pResultBuf->numOfPages * pResultBuf->pageSize == pResultBuf->totalBufSize && pResultBuf->numOfPages >= pResultBuf->inMemPages); - if (pResultBuf->pBuf == NULL) { - assert(pResultBuf->file == NULL); + if (pResultBuf->file == NULL) { if ((ret = createDiskResidesBuf(pResultBuf)) != TSDB_CODE_SUCCESS) { - return ret; + terrno = ret; + return NULL; } } - doFlushPageToDisk(pResultBuf, pg); - return TSDB_CODE_SUCCESS; + return doFlushPageToDisk(pResultBuf, pg); } #define NO_AVAILABLE_PAGES(_b) ((_b)->numOfPages >= (_b)->inMemPages) static SIDList addNewGroup(SDiskbasedResultBuf* pResultBuf, int32_t groupId) { - assert(taosHashGet(pResultBuf->idsTable, (const char*) &groupId, sizeof(int32_t)) == NULL); + assert(taosHashGet(pResultBuf->groupSet, (const char*) &groupId, sizeof(int32_t)) == NULL); SArray* pa = taosArrayInit(1, sizeof(SPageInfo)); - int32_t ret = taosHashPut(pResultBuf->idsTable, (const char*)&groupId, sizeof(int32_t), &pa, POINTER_BYTES); + int32_t ret = taosHashPut(pResultBuf->groupSet, (const char*)&groupId, sizeof(int32_t), &pa, POINTER_BYTES); assert(ret == 0); return pa; @@ -148,7 +161,7 @@ static SIDList addNewGroup(SDiskbasedResultBuf* pResultBuf, int32_t groupId) { static SPageInfo* registerPage(SDiskbasedResultBuf* pResultBuf, int32_t groupId, int32_t pageId) { SIDList list = NULL; - char** p = taosHashGet(pResultBuf->idsTable, (const char*)&groupId, sizeof(int32_t)); + char** p = taosHashGet(pResultBuf->groupSet, (const char*)&groupId, sizeof(int32_t)); if (p == NULL) { // it is a new group id list = addNewGroup(pResultBuf, groupId); } else { @@ -162,10 +175,13 @@ static SPageInfo* registerPage(SDiskbasedResultBuf* pResultBuf, int32_t groupId, } tFilePage* getNewDataBuf(SDiskbasedResultBuf* pResultBuf, int32_t groupId, int32_t* pageId) { + char* allocPg = NULL; + if (NO_AVAILABLE_PAGES(pResultBuf)) { + // get the last page in linked list SListIter iter = {0}; - tdListInitIter(pResultBuf->pPageList, &iter, TD_LIST_BACKWARD); + tdListInitIter(pResultBuf->lruList, &iter, TD_LIST_BACKWARD); SListNode* pn = NULL; while((pn = tdListNext(&iter)) != NULL) { @@ -183,11 +199,12 @@ tFilePage* getNewDataBuf(SDiskbasedResultBuf* pResultBuf, int32_t groupId, int32 qWarn("%p in memory buf page not sufficient, expand from %d to %d, page size:%d", pResultBuf, prev, pResultBuf->inMemPages, pResultBuf->pageSize); } else { - tdListPopNode(pResultBuf->pPageList, pn); + tdListPopNode(pResultBuf->lruList, pn); SPageInfo* d = *(SPageInfo**) pn->data; tfree(pn); - if (flushPageToDisk(pResultBuf, d) != TSDB_CODE_SUCCESS) { + allocPg = flushPageToDisk(pResultBuf, d); + if (allocPg == NULL) { return NULL; } } @@ -200,14 +217,19 @@ tFilePage* getNewDataBuf(SDiskbasedResultBuf* pResultBuf, int32_t groupId, int32 SPageInfo* pi = registerPage(pResultBuf, groupId, *pageId); // add to LRU list - assert(listNEles(pResultBuf->pPageList) < pResultBuf->inMemPages); - tdListPrepend(pResultBuf->pPageList, &pi); + assert(listNEles(pResultBuf->lruList) < pResultBuf->inMemPages); + tdListPrepend(pResultBuf->lruList, &pi); // add to hash map taosHashPut(pResultBuf->all, pageId, sizeof(int32_t), &pi, POINTER_BYTES); // allocate buf - pi->pData = calloc(1, pResultBuf->pageSize + POINTER_BYTES); + if (allocPg == NULL) { + pi->pData = calloc(1, pResultBuf->pageSize + POINTER_BYTES); + } else { + pi->pData = allocPg; + } + pResultBuf->totalBufSize += pResultBuf->pageSize; T_REF_INC(pi); // add ref count @@ -231,41 +253,47 @@ tFilePage* getResBufPage(SDiskbasedResultBuf* pResultBuf, int32_t id) { SListNode* pnode = NULL; // todo speed up SListIter iter = {0}; - tdListInitIter(pResultBuf->pPageList, &iter, TD_LIST_FORWARD); + tdListInitIter(pResultBuf->lruList, &iter, TD_LIST_FORWARD); while((pnode = tdListNext(&iter)) != NULL) { SPageInfo** pInfo = (SPageInfo**) pnode->data; // remove it and add it into the front of linked-list if ((*pInfo)->pageId == id) { - tdListPopNode(pResultBuf->pPageList, pnode); - tdListPrependNode(pResultBuf->pPageList, pnode); + tdListPopNode(pResultBuf->lruList, pnode); + tdListPrependNode(pResultBuf->lruList, pnode); T_REF_INC(*(SPageInfo**)pnode->data); return ((*(SPageInfo**)pnode->data)->pData + POINTER_BYTES); } } } else { // not in memory - // choose the be flushed page - // get the last page in linked list + assert((*pi)->pData == NULL && (*pi)->info.length >= 0 && (*pi)->info.offset >= 0); + + // choose the be flushed page: get the last page in linked list SListIter iter1 = {0}; - tdListInitIter(pResultBuf->pPageList, &iter1, TD_LIST_BACKWARD); + tdListInitIter(pResultBuf->lruList, &iter1, TD_LIST_BACKWARD); SListNode* pn = NULL; while((pn = tdListNext(&iter1)) != NULL) { assert(pn != NULL); - if (T_REF_VAL_GET(*(SPageInfo**)pn->data) == 0) { + if (T_REF_VAL_GET(*(SPageInfo**)(pn->data)) == 0) { break; } } // all pages are referenced by user, try to allocate new space if (pn == NULL) { + int32_t prev = pResultBuf->inMemPages; pResultBuf->inMemPages = pResultBuf->inMemPages * 1.5; - assert(0); - return NULL; + + qWarn("%p in memory buf page not sufficient, expand from %d to %d, page size:%d", pResultBuf, prev, + pResultBuf->inMemPages, pResultBuf->pageSize); + + (*pi)->pData = calloc(1, pResultBuf->pageSize + POINTER_BYTES); } else { - tdListPopNode(pResultBuf->pPageList, pn); + tdListPopNode(pResultBuf->lruList, pn); + if (flushPageToDisk(pResultBuf, *(SPageInfo**)pn->data) != TSDB_CODE_SUCCESS) { return NULL; } @@ -273,15 +301,23 @@ tFilePage* getResBufPage(SDiskbasedResultBuf* pResultBuf, int32_t id) { char* buf = (*(SPageInfo**)pn->data)->pData; (*(SPageInfo**)pn->data)->pData = NULL; - // load file in disk - fseek(pResultBuf->file, (*pi)->info.offset, SEEK_SET); - fread(buf, (*pi)->info.length, 1, pResultBuf->file); - (*pi)->pData = buf; + ((void**)((*pi)->pData))[0] = (*pi); tfree(pn); - return (*pi)->pData; } + + // load file in disk + int32_t ret = fseek(pResultBuf->file, (*pi)->info.offset, SEEK_SET); + ret = fread((*pi)->pData + POINTER_BYTES, 1, (*pi)->info.length, pResultBuf->file); + if (ret != (*pi)->info.length) { + terrno = errno; + return NULL; + } + + // todo do decomp + + return (*pi)->pData + POINTER_BYTES; } return NULL; @@ -297,12 +333,16 @@ void releaseResBufPage(SDiskbasedResultBuf* pResultBuf, void* page) { T_REF_DEC(ppi); } -int32_t getNumOfRowsPerPage(SDiskbasedResultBuf* pResultBuf) { return pResultBuf->numOfRowsPerPage; } +size_t getNumOfRowsPerPage(const SDiskbasedResultBuf* pResultBuf) { return pResultBuf->numOfRowsPerPage; } + +size_t getNumOfResultBufGroupId(const SDiskbasedResultBuf* pResultBuf) { return taosHashGetSize(pResultBuf->groupSet); } + +size_t getResBufSize(const SDiskbasedResultBuf* pResultBuf) { return pResultBuf->totalBufSize; } SIDList getDataBufPagesIdList(SDiskbasedResultBuf* pResultBuf, int32_t groupId) { assert(pResultBuf != NULL); - char** p = taosHashGet(pResultBuf->idsTable, (const char*)&groupId, sizeof(int32_t)); + char** p = taosHashGet(pResultBuf->groupSet, (const char*)&groupId, sizeof(int32_t)); if (p == NULL) { // it is a new group id return pResultBuf->emptyDummyIdList; } else { @@ -320,7 +360,6 @@ void destroyResultBuf(SDiskbasedResultBuf* pResultBuf, void* handle) { pResultBuf->totalBufSize, pResultBuf->path, FILE_SIZE_ON_DISK(pResultBuf)); fclose(pResultBuf->file); - pResultBuf->pBuf = NULL; } else { qDebug("QInfo:%p disk-based output buffer closed, total:%" PRId64 " bytes, no file created", handle, pResultBuf->totalBufSize); @@ -329,19 +368,25 @@ void destroyResultBuf(SDiskbasedResultBuf* pResultBuf, void* handle) { unlink(pResultBuf->path); tfree(pResultBuf->path); - SHashMutableIterator* iter = taosHashCreateIter(pResultBuf->idsTable); + SHashMutableIterator* iter = taosHashCreateIter(pResultBuf->groupSet); while(taosHashIterNext(iter)) { SArray** p = (SArray**) taosHashIterGet(iter); + size_t n = taosArrayGetSize(*p); + for(int32_t i = 0; i < n; ++i) { + SPageInfo* pi = taosArrayGet(*p, i); + tfree(pi->pData); + } taosArrayDestroy(*p); } taosHashDestroyIter(iter); - tdListFree(pResultBuf->pPageList); + tdListFree(pResultBuf->lruList); taosArrayDestroy(pResultBuf->emptyDummyIdList); - taosHashCleanup(pResultBuf->idsTable); + taosHashCleanup(pResultBuf->groupSet); taosHashCleanup(pResultBuf->all); + tfree(pResultBuf->assistBuf); tfree(pResultBuf); } diff --git a/src/query/tests/resultBufferTest.cpp b/src/query/tests/resultBufferTest.cpp index 93c1a65218..53a05925c2 100644 --- a/src/query/tests/resultBufferTest.cpp +++ b/src/query/tests/resultBufferTest.cpp @@ -29,28 +29,80 @@ void simpleTest() { tFilePage* pBufPage1 = getNewDataBuf(pResultBuf, groupId, &pageId); tFilePage* t = getResBufPage(pResultBuf, pageId); - assert(t == pBufPage1); + ASSERT_TRUE(t == pBufPage1); tFilePage* pBufPage2 = getNewDataBuf(pResultBuf, groupId, &pageId); tFilePage* t1 = getResBufPage(pResultBuf, pageId); - assert(t1 == pBufPage2); + ASSERT_TRUE(t1 == pBufPage2); tFilePage* pBufPage3 = getNewDataBuf(pResultBuf, groupId, &pageId); tFilePage* t2 = getResBufPage(pResultBuf, pageId); - assert(t2 == pBufPage3); + ASSERT_TRUE(t2 == pBufPage3); tFilePage* pBufPage4 = getNewDataBuf(pResultBuf, groupId, &pageId); tFilePage* t3 = getResBufPage(pResultBuf, pageId); - assert(t3 == pBufPage4); + ASSERT_TRUE(t3 == pBufPage4); tFilePage* pBufPage5 = getNewDataBuf(pResultBuf, groupId, &pageId); tFilePage* t4 = getResBufPage(pResultBuf, pageId); - assert(t4 == pBufPage5); + ASSERT_TRUE(t4 == pBufPage5); + + destroyResultBuf(pResultBuf, NULL); +} + +void writeDownTest() { + SDiskbasedResultBuf* pResultBuf = NULL; + int32_t ret = createDiskbasedResultBuffer(&pResultBuf, 1000, 64, 1024, 4, NULL); + + int32_t pageId = 0; + int32_t writePageId = 0; + int32_t groupId = 0; + int32_t nx = 12345; + + tFilePage* pBufPage = getNewDataBuf(pResultBuf, groupId, &pageId); + ASSERT_TRUE(pBufPage != NULL); + + *(int32_t*)(pBufPage->data) = nx; + writePageId = pageId; + releaseResBufPage(pResultBuf, pBufPage); + + tFilePage* pBufPage1 = getNewDataBuf(pResultBuf, groupId, &pageId); + tFilePage* t1 = getResBufPage(pResultBuf, pageId); + ASSERT_TRUE(t1 == pBufPage1); + ASSERT_TRUE(pageId == 1); + + tFilePage* pBufPage2 = getNewDataBuf(pResultBuf, groupId, &pageId); + tFilePage* t2 = getResBufPage(pResultBuf, pageId); + ASSERT_TRUE(t2 == pBufPage2); + ASSERT_TRUE(pageId == 2); + + tFilePage* pBufPage3 = getNewDataBuf(pResultBuf, groupId, &pageId); + tFilePage* t3 = getResBufPage(pResultBuf, pageId); + ASSERT_TRUE(t3 == pBufPage3); + ASSERT_TRUE(pageId == 3); + + tFilePage* pBufPage4 = getNewDataBuf(pResultBuf, groupId, &pageId); + tFilePage* t4 = getResBufPage(pResultBuf, pageId); + ASSERT_TRUE(t4 == pBufPage4); + ASSERT_TRUE(pageId == 4); + releaseResBufPage(pResultBuf, t4); + + // flush the written page to disk, and read it out again + tFilePage* pBufPagex = getResBufPage(pResultBuf, writePageId); + ASSERT_EQ(*(int32_t*)pBufPagex->data, nx); + + SArray* pa = getDataBufPagesIdList(pResultBuf, groupId); + ASSERT_EQ(taosArrayGetSize(pa), 5); + + destroyResultBuf(pResultBuf, NULL); } } // namespace + TEST(testCase, resultBufferTest) { + srand(time(NULL)); simpleTest(); + writeDownTest(); } diff --git a/src/util/src/tcompare.c b/src/util/src/tcompare.c index 889d38ff20..9564588254 100644 --- a/src/util/src/tcompare.c +++ b/src/util/src/tcompare.c @@ -1,6 +1,6 @@ #include "taosdef.h" #include "tcompare.h" -#include +#include "tarray.h" #include "tutil.h" int32_t compareInt32Val(const void *pLeft, const void *pRight) { From 74a5a231ad4a372c7da16d3ef643bb6892aa9710 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Thu, 23 Jul 2020 14:31:44 +0800 Subject: [PATCH 20/42] [td-225]add some logs. --- src/tsdb/src/tsdbRead.c | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/src/tsdb/src/tsdbRead.c b/src/tsdb/src/tsdbRead.c index 63ad8701ff..ccfcff3599 100644 --- a/src/tsdb/src/tsdbRead.c +++ b/src/tsdb/src/tsdbRead.c @@ -622,7 +622,7 @@ static int32_t getFileCompInfo(STsdbQueryHandle* pQueryHandle, int32_t* numOfBlo .uid = (_checkInfo)->tableId.uid}) -static bool doLoadFileDataBlock(STsdbQueryHandle* pQueryHandle, SCompBlock* pBlock, STableCheckInfo* pCheckInfo) { +static bool doLoadFileDataBlock(STsdbQueryHandle* pQueryHandle, SCompBlock* pBlock, STableCheckInfo* pCheckInfo, int32_t slotIndex) { STsdbRepo *pRepo = pQueryHandle->pTsdb; bool blockLoaded = false; int64_t st = taosGetTimestampUs(); @@ -657,8 +657,8 @@ static bool doLoadFileDataBlock(STsdbQueryHandle* pQueryHandle, SCompBlock* pBlo int64_t elapsedTime = (taosGetTimestampUs() - st); pQueryHandle->cost.blockLoadTime += elapsedTime; - tsdbDebug("%p load file block into buffer, brange:%"PRId64"-%"PRId64" , rows:%d, elapsed time:%"PRId64 " us", - pQueryHandle, pBlock->keyFirst, pBlock->keyLast, pBlock->numOfRows, elapsedTime); + tsdbDebug("%p load file block into buffer, index:%d, brange:%"PRId64"-%"PRId64" , rows:%d, elapsed time:%"PRId64 " us, %p", + pQueryHandle, slotIndex, pBlock->keyFirst, pBlock->keyLast, pBlock->numOfRows, elapsedTime, pQueryHandle->qinfo); return blockLoaded; } @@ -681,8 +681,7 @@ static void handleDataMergeIfNeeded(STsdbQueryHandle* pQueryHandle, SCompBlock* // do not load file block into buffer int32_t step = ASCENDING_TRAVERSE(pQueryHandle->order) ? 1 : -1; - cur->rows = tsdbReadRowsFromCache(pCheckInfo, binfo.window.skey - step, - pQueryHandle->outputCapacity, &cur->win, pQueryHandle); + cur->rows = tsdbReadRowsFromCache(pCheckInfo, binfo.window.skey - step, pQueryHandle->outputCapacity, &cur->win, pQueryHandle); pQueryHandle->realNumOfRows = cur->rows; // update the last key value @@ -696,7 +695,7 @@ static void handleDataMergeIfNeeded(STsdbQueryHandle* pQueryHandle, SCompBlock* return; } - doLoadFileDataBlock(pQueryHandle, pBlock, pCheckInfo); + doLoadFileDataBlock(pQueryHandle, pBlock, pCheckInfo, cur->slot); doMergeTwoLevelData(pQueryHandle, pCheckInfo, pBlock); } else { /* @@ -723,7 +722,7 @@ static bool loadFileDataBlock(STsdbQueryHandle* pQueryHandle, SCompBlock* pBlock if (ASCENDING_TRAVERSE(pQueryHandle->order)) { // query ended in/started from current block if (pQueryHandle->window.ekey < pBlock->keyLast || pCheckInfo->lastKey > pBlock->keyFirst) { - if (!doLoadFileDataBlock(pQueryHandle, pBlock, pCheckInfo)) { + if (!doLoadFileDataBlock(pQueryHandle, pBlock, pCheckInfo, cur->slot)) { return false; } @@ -744,7 +743,7 @@ static bool loadFileDataBlock(STsdbQueryHandle* pQueryHandle, SCompBlock* pBlock } } else { //desc order, query ended in current block if (pQueryHandle->window.ekey > pBlock->keyFirst || pCheckInfo->lastKey < pBlock->keyLast) { - if (!doLoadFileDataBlock(pQueryHandle, pBlock, pCheckInfo)) { + if (!doLoadFileDataBlock(pQueryHandle, pBlock, pCheckInfo, cur->slot)) { return false; } @@ -1859,8 +1858,8 @@ static int tsdbReadRowsFromCache(STableCheckInfo* pCheckInfo, TSKEY maxKey, int } int64_t elapsedTime = taosGetTimestampUs() - st; - tsdbDebug("%p build data block from cache completed, elapsed time:%"PRId64" us, numOfRows:%d, numOfCols:%d", pQueryHandle, - elapsedTime, numOfRows, numOfCols); + tsdbDebug("%p build data block from cache completed, elapsed time:%"PRId64" us, numOfRows:%d, numOfCols:%d, %p", pQueryHandle, + elapsedTime, numOfRows, numOfCols, pQueryHandle->qinfo); return numOfRows; } @@ -1975,7 +1974,7 @@ SArray* tsdbRetrieveDataBlock(TsdbQueryHandleT* pQueryHandle, SArray* pIdList) { return pHandle->pColumns; } else { // only load the file block SCompBlock* pBlock = pBlockInfo->compBlock; - doLoadFileDataBlock(pHandle, pBlock, pCheckInfo); + doLoadFileDataBlock(pHandle, pBlock, pCheckInfo, pHandle->cur.slot); // todo refactor int32_t numOfRows = copyDataFromFileBlock(pHandle, pHandle->outputCapacity, 0, 0, pBlock->numOfRows - 1); From b472a553830a89735b5383d277e074fe9119847c Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Thu, 23 Jul 2020 14:50:25 +0800 Subject: [PATCH 21/42] [td-225] destory qhandle. --- src/vnode/src/vnodeRead.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/vnode/src/vnodeRead.c b/src/vnode/src/vnodeRead.c index 83986de682..2586a2db99 100644 --- a/src/vnode/src/vnodeRead.c +++ b/src/vnode/src/vnodeRead.c @@ -236,6 +236,7 @@ static int32_t vnodeProcessFetchMsg(SVnodeObj *pVnode, SReadMsg *pReadMsg) { freeHandle = false; } else { qKillQuery(*handle); + qDestroyQueryInfo(*handle); freeHandle = true; } } From da88d370db238662dfeed1d3788efb1bd2153c60 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Thu, 23 Jul 2020 17:51:06 +0800 Subject: [PATCH 22/42] [td-225] update test scripts. --- src/query/inc/qExtbuffer.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/query/inc/qExtbuffer.h b/src/query/inc/qExtbuffer.h index b57c48933f..aa30de4d0f 100644 --- a/src/query/inc/qExtbuffer.h +++ b/src/query/inc/qExtbuffer.h @@ -19,7 +19,6 @@ extern "C" { #endif - #include "os.h" #include "taosmsg.h" @@ -28,9 +27,9 @@ extern "C" { #include "tdataformat.h" #include "talgo.h" -#define DEFAULT_PAGE_SIZE (1024L*4) // 16k larger than the SHistoInfo #define MAX_TMPFILE_PATH_LENGTH PATH_MAX -#define INITIAL_ALLOCATION_BUFFER_SIZE 64 +#define INITIAL_ALLOCATION_BUFFER_SIZE 1L +#define DEFAULT_PAGE_SIZE (4096L*(INITIAL_ALLOCATION_BUFFER_SIZE)) // 16k larger than the SHistoInfo typedef enum EXT_BUFFER_FLUSH_MODEL { /* From 26bbbe221d0dcacda58b0d6d314278efecbcca49 Mon Sep 17 00:00:00 2001 From: Shengliang Guan Date: Fri, 24 Jul 2020 03:02:18 +0000 Subject: [PATCH 23/42] add query not ready msg --- src/dnode/src/dnodeVRead.c | 2 +- src/inc/taoserror.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/dnode/src/dnodeVRead.c b/src/dnode/src/dnodeVRead.c index bbea1a5e0b..ebbad525dd 100644 --- a/src/dnode/src/dnodeVRead.c +++ b/src/dnode/src/dnodeVRead.c @@ -206,7 +206,7 @@ static void *dnodeProcessReadQueue(void *param) { taosMsg[pReadMsg->rpcMsg.msgType], type); int32_t code = vnodeProcessRead(pVnode, pReadMsg); - if (type == TAOS_QTYPE_RPC) { + if (type == TAOS_QTYPE_RPC && code != TSDB_CODE_QRY_NOT_READY) { dnodeSendRpcReadRsp(pVnode, pReadMsg, code); } else { dnodeDispatchNonRspMsg(pVnode, pReadMsg, code); diff --git a/src/inc/taoserror.h b/src/inc/taoserror.h index 59b2c0220b..5a49f24804 100644 --- a/src/inc/taoserror.h +++ b/src/inc/taoserror.h @@ -210,6 +210,7 @@ TAOS_DEFINE_ERROR(TSDB_CODE_QRY_OUT_OF_MEMORY, 0, 0x0703, "query out TAOS_DEFINE_ERROR(TSDB_CODE_QRY_APP_ERROR, 0, 0x0704, "query app error") TAOS_DEFINE_ERROR(TSDB_CODE_QRY_DUP_JOIN_KEY, 0, 0x0705, "query duplicated join key") TAOS_DEFINE_ERROR(TSDB_CODE_QRY_EXCEED_TAGS_LIMIT, 0, 0x0706, "query tag conditon too many") +TAOS_DEFINE_ERROR(TSDB_CODE_QRY_NOT_READY, 0, 0x0707, "query not ready") // grant TAOS_DEFINE_ERROR(TSDB_CODE_GRANT_EXPIRED, 0, 0x0800, "grant expired") From 1a9fce812657ef47ac7323330b338bb1d086f013 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Fri, 24 Jul 2020 14:38:57 +0800 Subject: [PATCH 24/42] [td-225] avoid the retrieved thread being blocked. --- src/dnode/src/dnodeVRead.c | 6 ++- src/inc/query.h | 11 +++-- src/inc/taoserror.h | 1 + src/query/inc/qExecutor.h | 14 ++++-- src/query/src/qExecutor.c | 95 ++++++++++++++++++++++++++------------ src/vnode/src/vnodeRead.c | 50 ++++++++++++++------ 6 files changed, 127 insertions(+), 50 deletions(-) diff --git a/src/dnode/src/dnodeVRead.c b/src/dnode/src/dnodeVRead.c index ebbad525dd..40b29cab79 100644 --- a/src/dnode/src/dnodeVRead.c +++ b/src/dnode/src/dnodeVRead.c @@ -209,7 +209,11 @@ static void *dnodeProcessReadQueue(void *param) { if (type == TAOS_QTYPE_RPC && code != TSDB_CODE_QRY_NOT_READY) { dnodeSendRpcReadRsp(pVnode, pReadMsg, code); } else { - dnodeDispatchNonRspMsg(pVnode, pReadMsg, code); + if (code == TSDB_CODE_QRY_HAS_RSP) { + dnodeSendRpcReadRsp(pVnode, pReadMsg, code); + } else { + dnodeDispatchNonRspMsg(pVnode, pReadMsg, code); + } } taosFreeQitem(pReadMsg); diff --git a/src/inc/query.h b/src/inc/query.h index d201b649f9..a9a21bc911 100644 --- a/src/inc/query.h +++ b/src/inc/query.h @@ -28,7 +28,7 @@ typedef void* qinfo_t; * @param qinfo * @return */ -int32_t qCreateQueryInfo(void* tsdb, int32_t vgId, SQueryTableMsg* pQueryTableMsg, void* param, qinfo_t* qinfo); +int32_t qCreateQueryInfo(void* tsdb, int32_t vgId, SQueryTableMsg* pQueryTableMsg, qinfo_t* qinfo); /** @@ -38,7 +38,10 @@ int32_t qCreateQueryInfo(void* tsdb, int32_t vgId, SQueryTableMsg* pQueryTableMs * @param qinfo * @return */ -void qTableQuery(qinfo_t qinfo); +bool qTableQuery(qinfo_t qinfo); + +void* pGetRspMsg(qinfo_t qinfo); + /** * Retrieve the produced results information, if current query is not paused or completed, @@ -48,7 +51,7 @@ void qTableQuery(qinfo_t qinfo); * @param qinfo * @return */ -int32_t qRetrieveQueryResultInfo(qinfo_t qinfo); +int32_t qRetrieveQueryResultInfo(qinfo_t qinfo, bool* buildRes, void* pRspContext); /** * @@ -60,7 +63,7 @@ int32_t qRetrieveQueryResultInfo(qinfo_t qinfo); * @param contLen payload length * @return */ -int32_t qDumpRetrieveResult(qinfo_t qinfo, SRetrieveTableRsp** pRsp, int32_t* contLen); +int32_t qDumpRetrieveResult(qinfo_t qinfo, SRetrieveTableRsp** pRsp, int32_t* contLen, bool* continueExec); /** * Decide if more results will be produced or not, NOTE: this function will increase the ref count of QInfo, diff --git a/src/inc/taoserror.h b/src/inc/taoserror.h index 5a49f24804..697ce79715 100644 --- a/src/inc/taoserror.h +++ b/src/inc/taoserror.h @@ -211,6 +211,7 @@ TAOS_DEFINE_ERROR(TSDB_CODE_QRY_APP_ERROR, 0, 0x0704, "query app TAOS_DEFINE_ERROR(TSDB_CODE_QRY_DUP_JOIN_KEY, 0, 0x0705, "query duplicated join key") TAOS_DEFINE_ERROR(TSDB_CODE_QRY_EXCEED_TAGS_LIMIT, 0, 0x0706, "query tag conditon too many") TAOS_DEFINE_ERROR(TSDB_CODE_QRY_NOT_READY, 0, 0x0707, "query not ready") +TAOS_DEFINE_ERROR(TSDB_CODE_QRY_HAS_RSP, 0, 0x0708, "query should response") // grant TAOS_DEFINE_ERROR(TSDB_CODE_GRANT_EXPIRED, 0, 0x0800, "grant expired") diff --git a/src/query/inc/qExecutor.h b/src/query/inc/qExecutor.h index 92fe078c3f..b6f7b3600b 100644 --- a/src/query/inc/qExecutor.h +++ b/src/query/inc/qExecutor.h @@ -177,13 +177,18 @@ typedef struct SQueryRuntimeEnv { SDiskbasedResultBuf* pResultBuf; // query result buffer based on blocked-wised disk file } SQueryRuntimeEnv; +enum { + QUERY_RESULT_NOT_READY = 1, + QUERY_RESULT_READY = 2, +}; + typedef struct SQInfo { void* signature; int32_t pointsInterpo; int32_t code; // error code to returned to client - sem_t dataReady; +// sem_t dataReady; + void* tsdb; - void* param; int32_t vgId; STableGroupInfo tableGroupInfo; // table id list < only includes the STable list> STableGroupInfo tableqinfoGroupInfo; // this is a group array list, including SArray structure @@ -200,8 +205,11 @@ typedef struct SQInfo { */ int32_t tableIndex; int32_t numOfGroupResultPages; - void* pBuf; // allocated buffer for STableQueryInfo, sizeof(STableQueryInfo)*numOfTables; + void* pBuf; // allocated buffer for STableQueryInfo, sizeof(STableQueryInfo)*numOfTables; + pthread_mutex_t lock; // used to synchronize the rsp/query threads + int32_t dataReady; // denote if query result is ready or not + void* rspContext; // response context } SQInfo; #endif // TDENGINE_QUERYEXECUTOR_H diff --git a/src/query/src/qExecutor.c b/src/query/src/qExecutor.c index 532bdeb43b..9e8a5839dd 100644 --- a/src/query/src/qExecutor.c +++ b/src/query/src/qExecutor.c @@ -5894,16 +5894,11 @@ static SQInfo *createQInfoImpl(SQueryTableMsg *pQueryMsg, SArray* pTableIdList, } pQInfo->arrTableIdInfo = taosArrayInit(tableIndex, sizeof(STableIdInfo)); + pQInfo->dataReady = QUERY_RESULT_NOT_READY; + pthread_mutex_init(&pQInfo->lock, NULL); pQuery->pos = -1; pQuery->window = pQueryMsg->window; - - if (sem_init(&pQInfo->dataReady, 0, 0) != 0) { - int32_t code = TAOS_SYSTEM_ERROR(errno); - qError("QInfo:%p init dataReady sem failed, reason:%s", pQInfo, tstrerror(code)); - goto _cleanup; - } - colIdCheck(pQuery); qDebug("qmsg:%p QInfo:%p created", pQueryMsg, pQInfo); @@ -5943,7 +5938,7 @@ static bool isValidQInfo(void *param) { return (sig == (uint64_t)pQInfo); } -static int32_t initQInfo(SQueryTableMsg *pQueryMsg, void *tsdb, int32_t vgId, SQInfo *pQInfo, bool isSTable, void* param) { +static int32_t initQInfo(SQueryTableMsg *pQueryMsg, void *tsdb, int32_t vgId, SQInfo *pQInfo, bool isSTable) { int32_t code = TSDB_CODE_SUCCESS; SQuery *pQuery = pQInfo->runtimeEnv.pQuery; @@ -5966,8 +5961,6 @@ static int32_t initQInfo(SQueryTableMsg *pQueryMsg, void *tsdb, int32_t vgId, SQ return TSDB_CODE_SUCCESS; } - pQInfo->param = param; - if (pQInfo->tableqinfoGroupInfo.numOfTables == 0) { qDebug("QInfo:%p no table qualified for tag filter, abort query", pQInfo); setQueryStatus(pQuery, QUERY_COMPLETED); @@ -6012,7 +6005,6 @@ static void freeQInfo(SQInfo *pQInfo) { tfree(pQuery->sdata[col]); } - sem_destroy(&(pQInfo->dataReady)); teardownQueryRuntimeEnv(&pQInfo->runtimeEnv); for (int32_t i = 0; i < pQuery->numOfFilterCols; ++i) { @@ -6164,7 +6156,7 @@ typedef struct SQueryMgmt { pthread_mutex_t lock; } SQueryMgmt; -int32_t qCreateQueryInfo(void* tsdb, int32_t vgId, SQueryTableMsg* pQueryMsg, void* param, qinfo_t* pQInfo) { +int32_t qCreateQueryInfo(void* tsdb, int32_t vgId, SQueryTableMsg* pQueryMsg, qinfo_t* pQInfo) { assert(pQueryMsg != NULL && tsdb != NULL); int32_t code = TSDB_CODE_SUCCESS; @@ -6260,7 +6252,7 @@ int32_t qCreateQueryInfo(void* tsdb, int32_t vgId, SQueryTableMsg* pQueryMsg, vo goto _over; } - code = initQInfo(pQueryMsg, tsdb, vgId, *pQInfo, isSTableQuery, param); + code = initQInfo(pQueryMsg, tsdb, vgId, *pQInfo, isSTableQuery); _over: free(tagCond); @@ -6300,26 +6292,32 @@ void qDestroyQueryInfo(qinfo_t qHandle) { freeQInfo(pQInfo); } -void qTableQuery(qinfo_t qinfo) { +static void setQueryResultReady(SQInfo* pQInfo) { + pthread_mutex_lock(&pQInfo->lock); + pQInfo->dataReady = QUERY_RESULT_READY; + pthread_mutex_unlock(&pQInfo->lock); +} + +bool qTableQuery(qinfo_t qinfo) { SQInfo *pQInfo = (SQInfo *)qinfo; if (pQInfo == NULL || pQInfo->signature != pQInfo) { qDebug("QInfo:%p has been freed, no need to execute", pQInfo); - return; + return false; } if (IS_QUERY_KILLED(pQInfo)) { qDebug("QInfo:%p it is already killed, abort", pQInfo); - sem_post(&pQInfo->dataReady); - return; + setQueryResultReady(pQInfo); + return false; } if (pQInfo->tableqinfoGroupInfo.numOfTables == 0) { setQueryStatus(pQInfo->runtimeEnv.pQuery, QUERY_COMPLETED); + setQueryResultReady(pQInfo); qDebug("QInfo:%p no table exists for query, abort", pQInfo); - sem_post(&pQInfo->dataReady); - return; + return false; } // error occurs, record the error code and return to client @@ -6327,8 +6325,9 @@ void qTableQuery(qinfo_t qinfo) { if (ret != TSDB_CODE_SUCCESS) { pQInfo->code = ret; qDebug("QInfo:%p query abort due to error/cancel occurs, code:%s", pQInfo, tstrerror(pQInfo->code)); - sem_post(&pQInfo->dataReady); - return; + + setQueryResultReady(pQInfo); + return false; } qDebug("QInfo:%p query task is launched", pQInfo); @@ -6353,10 +6352,23 @@ void qTableQuery(qinfo_t qinfo) { pQInfo, pQuery->rec.rows, pQuery->rec.total + pQuery->rec.rows); } - sem_post(&pQInfo->dataReady); + taosMsleep(1000); + +// pQInfo->dataReady = QUERY_RESULT_READY; + bool buildRes = false; + pthread_mutex_lock(&pQInfo->lock); + pQInfo->dataReady = QUERY_RESULT_READY; + + if (pQInfo->rspContext != NULL) { + buildRes = true; + } + pthread_mutex_unlock(&pQInfo->lock); + + return buildRes; +// sem_post(&pQInfo->dataReady); } -int32_t qRetrieveQueryResultInfo(qinfo_t qinfo) { +int32_t qRetrieveQueryResultInfo(qinfo_t qinfo, bool* buildRes, void* pRspContext) { SQInfo *pQInfo = (SQInfo *)qinfo; if (pQInfo == NULL || !isValidQInfo(pQInfo)) { @@ -6369,10 +6381,20 @@ int32_t qRetrieveQueryResultInfo(qinfo_t qinfo) { return pQInfo->code; } - sem_wait(&pQInfo->dataReady); - qDebug("QInfo:%p retrieve result info, rowsize:%d, rows:%"PRId64", code:%d", pQInfo, pQuery->rowSize, pQuery->rec.rows, - pQInfo->code); + *buildRes = false; + pthread_mutex_lock(&pQInfo->lock); + if (pQInfo->dataReady == QUERY_RESULT_READY) { + *buildRes = true; + qDebug("QInfo:%p retrieve result info, rowsize:%d, rows:%"PRId64", code:%d", pQInfo, pQuery->rowSize, pQuery->rec.rows, + pQInfo->code); + } else { + pQInfo->rspContext = pRspContext; + } + + pthread_mutex_unlock(&pQInfo->lock); + +// sem_wait(&pQInfo->dataReady); return pQInfo->code; } @@ -6385,6 +6407,7 @@ bool qHasMoreResultsToRetrieve(qinfo_t qinfo) { } SQuery *pQuery = pQInfo->runtimeEnv.pQuery; + bool ret = false; if (Q_STATUS_EQUAL(pQuery->status, QUERY_OVER)) { ret = false; @@ -6403,7 +6426,7 @@ bool qHasMoreResultsToRetrieve(qinfo_t qinfo) { return ret; } -int32_t qDumpRetrieveResult(qinfo_t qinfo, SRetrieveTableRsp **pRsp, int32_t *contLen) { +int32_t qDumpRetrieveResult(qinfo_t qinfo, SRetrieveTableRsp **pRsp, int32_t *contLen, bool* continueExec) { SQInfo *pQInfo = (SQInfo *)qinfo; if (pQInfo == NULL || !isValidQInfo(pQInfo)) { @@ -6413,8 +6436,10 @@ int32_t qDumpRetrieveResult(qinfo_t qinfo, SRetrieveTableRsp **pRsp, int32_t *co SQueryRuntimeEnv* pRuntimeEnv = &pQInfo->runtimeEnv; SQuery *pQuery = pQInfo->runtimeEnv.pQuery; size_t size = getResultSize(pQInfo, &pQuery->rec.rows); + size += sizeof(int32_t); size += sizeof(STableIdInfo) * taosArrayGetSize(pQInfo->arrTableIdInfo); + *contLen = size + sizeof(SRetrieveTableRsp); // todo proper handle failed to allocate memory, @@ -6423,6 +6448,7 @@ int32_t qDumpRetrieveResult(qinfo_t qinfo, SRetrieveTableRsp **pRsp, int32_t *co if (*pRsp == NULL) { return TSDB_CODE_QRY_OUT_OF_MEMORY; } + (*pRsp)->numOfRows = htonl(pQuery->rec.rows); int32_t code = pQInfo->code; @@ -6430,8 +6456,8 @@ int32_t qDumpRetrieveResult(qinfo_t qinfo, SRetrieveTableRsp **pRsp, int32_t *co (*pRsp)->offset = htobe64(pQuery->limit.offset); (*pRsp)->useconds = htobe64(pRuntimeEnv->summary.elapsedTime); } else { - (*pRsp)->offset = 0; (*pRsp)->useconds = 0; + (*pRsp)->offset = 0; } (*pRsp)->precision = htons(pQuery->precision); @@ -6442,10 +6468,21 @@ int32_t qDumpRetrieveResult(qinfo_t qinfo, SRetrieveTableRsp **pRsp, int32_t *co code = pQInfo->code; } + pQInfo->rspContext = NULL; + pQInfo->dataReady = QUERY_RESULT_NOT_READY; + if (IS_QUERY_KILLED(pQInfo) || Q_STATUS_EQUAL(pQuery->status, QUERY_OVER)) { (*pRsp)->completed = 1; // notify no more result to client } + if (qHasMoreResultsToRetrieve(pQInfo)) { + *continueExec = true; + } else { // failed to dump result, free qhandle immediately + *continueExec = false; + qKillQuery(pQInfo); + qDestroyQueryInfo(pQInfo); + } + return code; } @@ -6456,7 +6493,7 @@ int32_t qKillQuery(qinfo_t qinfo) { return TSDB_CODE_QRY_INVALID_QHANDLE; } - sem_post(&pQInfo->dataReady); +// sem_post(&pQInfo->dataReady); setQueryKilled(pQInfo); return TSDB_CODE_SUCCESS; } diff --git a/src/vnode/src/vnodeRead.c b/src/vnode/src/vnodeRead.c index 2586a2db99..542c132e03 100644 --- a/src/vnode/src/vnodeRead.c +++ b/src/vnode/src/vnodeRead.c @@ -66,11 +66,12 @@ int32_t vnodeProcessRead(void *param, SReadMsg *pReadMsg) { return (*vnodeProcessReadMsgFp[msgType])(pVnode, pReadMsg); } -static void vnodePutItemIntoReadQueue(SVnodeObj *pVnode, void *qhandle) { +static void vnodePutItemIntoReadQueue(SVnodeObj *pVnode, void *qhandle, void* handle) { SReadMsg *pRead = (SReadMsg *)taosAllocateQitem(sizeof(SReadMsg)); pRead->rpcMsg.msgType = TSDB_MSG_TYPE_QUERY; pRead->pCont = qhandle; pRead->contLen = 0; + pRead->rpcMsg.handle = handle; atomic_add_fetch_32(&pVnode->refCount, 1); taosWriteQitem(pVnode->rqueue, TAOS_QTYPE_QUERY, pRead); @@ -110,7 +111,7 @@ static int32_t vnodeProcessQueryMsg(SVnodeObj *pVnode, SReadMsg *pReadMsg) { if (contLen != 0) { qinfo_t pQInfo = NULL; - code = qCreateQueryInfo(pVnode->tsdb, pVnode->vgId, pQueryTableMsg, pVnode, &pQInfo); + code = qCreateQueryInfo(pVnode->tsdb, pVnode->vgId, pQueryTableMsg, &pQInfo); SQueryTableRsp *pRsp = (SQueryTableRsp *) rpcMallocCont(sizeof(SQueryTableRsp)); pRsp->code = code; @@ -148,7 +149,7 @@ static int32_t vnodeProcessQueryMsg(SVnodeObj *pVnode, SReadMsg *pReadMsg) { if (handle != NULL) { vDebug("vgId:%d, QInfo:%p, dnode query msg disposed, register qhandle and return to app", vgId, *handle); - vnodePutItemIntoReadQueue(pVnode, *handle); + vnodePutItemIntoReadQueue(pVnode, *handle, pReadMsg->rpcMsg.handle); // qReleaseQInfo(pVnode->qMgmt, (void**) &handle, false); } @@ -163,7 +164,23 @@ static int32_t vnodeProcessQueryMsg(SVnodeObj *pVnode, SReadMsg *pReadMsg) { } else { vDebug("vgId:%d, QInfo:%p, dnode continue exec query", pVnode->vgId, (void*) pCont); code = TSDB_CODE_VND_ACTION_IN_PROGRESS; - qTableQuery(*handle); // do execute query + bool buildRes = qTableQuery(*handle); // do execute query + + if (buildRes) { // build result rsp + pRet = &pReadMsg->rspRet; + + bool continueExec = false; + if ((code = qDumpRetrieveResult(*handle, (SRetrieveTableRsp **)&pRet->rsp, &pRet->len, &continueExec)) == TSDB_CODE_SUCCESS) { + if (continueExec) { + vnodePutItemIntoReadQueue(pVnode, *handle, pReadMsg->rpcMsg.handle); + pRet->qhandle = *handle; + + } + } else { // todo handle error + } + + code = TSDB_CODE_QRY_HAS_RSP; + } } // qReleaseQInfo(pVnode->qMgmt, (void**) &handle, false); } @@ -223,22 +240,29 @@ static int32_t vnodeProcessFetchMsg(SVnodeObj *pVnode, SReadMsg *pReadMsg) { } bool freeHandle = true; - code = qRetrieveQueryResultInfo(*handle); + bool buildRes = false; + + code = qRetrieveQueryResultInfo(*handle, &buildRes, pReadMsg); if (code != TSDB_CODE_SUCCESS) { //TODO handle malloc failure pRet->rsp = (SRetrieveTableRsp *)rpcMallocCont(sizeof(SRetrieveTableRsp)); memset(pRet->rsp, 0, sizeof(SRetrieveTableRsp)); - } else { // if failed to dump result, free qhandle immediately - if ((code = qDumpRetrieveResult(*handle, (SRetrieveTableRsp **)&pRet->rsp, &pRet->len)) == TSDB_CODE_SUCCESS) { - if (qHasMoreResultsToRetrieve(*handle)) { - vnodePutItemIntoReadQueue(pVnode, *handle); + } else { + // result is not ready, return immediately + if (!buildRes) { + return TSDB_CODE_QRY_NOT_READY; + } + + bool continueExec = false; + if ((code = qDumpRetrieveResult(*handle, (SRetrieveTableRsp **)&pRet->rsp, &pRet->len, &continueExec)) == TSDB_CODE_SUCCESS) { + if (continueExec) { + vnodePutItemIntoReadQueue(pVnode, *handle, pReadMsg->rpcMsg.handle); pRet->qhandle = *handle; freeHandle = false; - } else { - qKillQuery(*handle); - qDestroyQueryInfo(*handle); - freeHandle = true; } + } else { + pRet->rsp = (SRetrieveTableRsp *)rpcMallocCont(sizeof(SRetrieveTableRsp)); + memset(pRet->rsp, 0, sizeof(SRetrieveTableRsp)); } } From d9a04b397ffe8eada57a3cc87781b971896a4232 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Fri, 24 Jul 2020 15:05:44 +0800 Subject: [PATCH 25/42] [td-225] fix bugs in non-blocking processing. --- src/dnode/src/dnodeVRead.c | 2 +- src/query/src/qExecutor.c | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/src/dnode/src/dnodeVRead.c b/src/dnode/src/dnodeVRead.c index 40b29cab79..f8a31d568e 100644 --- a/src/dnode/src/dnodeVRead.c +++ b/src/dnode/src/dnodeVRead.c @@ -210,7 +210,7 @@ static void *dnodeProcessReadQueue(void *param) { dnodeSendRpcReadRsp(pVnode, pReadMsg, code); } else { if (code == TSDB_CODE_QRY_HAS_RSP) { - dnodeSendRpcReadRsp(pVnode, pReadMsg, code); + dnodeSendRpcReadRsp(pVnode, pReadMsg, TSDB_CODE_SUCCESS); } else { dnodeDispatchNonRspMsg(pVnode, pReadMsg, code); } diff --git a/src/query/src/qExecutor.c b/src/query/src/qExecutor.c index 9e8a5839dd..c4142f5d93 100644 --- a/src/query/src/qExecutor.c +++ b/src/query/src/qExecutor.c @@ -6352,8 +6352,6 @@ bool qTableQuery(qinfo_t qinfo) { pQInfo, pQuery->rec.rows, pQuery->rec.total + pQuery->rec.rows); } - taosMsleep(1000); - // pQInfo->dataReady = QUERY_RESULT_READY; bool buildRes = false; pthread_mutex_lock(&pQInfo->lock); From 699f28284132596e0288ebb2c5d6b0059995e48b Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Sat, 25 Jul 2020 00:22:46 +0800 Subject: [PATCH 26/42] [td-225] fix bugs in non-blocking processing. --- src/dnode/src/dnodeVRead.c | 2 +- src/query/src/qExecutor.c | 7 +++---- src/vnode/src/vnodeRead.c | 7 +++++-- 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/src/dnode/src/dnodeVRead.c b/src/dnode/src/dnodeVRead.c index f8a31d568e..85ed194976 100644 --- a/src/dnode/src/dnodeVRead.c +++ b/src/dnode/src/dnodeVRead.c @@ -49,7 +49,7 @@ static taos_qset readQset; int32_t dnodeInitVnodeRead() { readQset = taosOpenQset(); - readPool.min = 2; + readPool.min = 4; readPool.max = tsNumOfCores * tsNumOfThreadsPerCore; if (readPool.max <= readPool.min * 2) readPool.max = 2 * readPool.min; readPool.readWorker = (SReadWorker *)calloc(sizeof(SReadWorker), readPool.max); diff --git a/src/query/src/qExecutor.c b/src/query/src/qExecutor.c index c4142f5d93..6de300400e 100644 --- a/src/query/src/qExecutor.c +++ b/src/query/src/qExecutor.c @@ -6352,7 +6352,6 @@ bool qTableQuery(qinfo_t qinfo) { pQInfo, pQuery->rec.rows, pQuery->rec.total + pQuery->rec.rows); } -// pQInfo->dataReady = QUERY_RESULT_READY; bool buildRes = false; pthread_mutex_lock(&pQInfo->lock); pQInfo->dataReady = QUERY_RESULT_READY; @@ -6360,8 +6359,9 @@ bool qTableQuery(qinfo_t qinfo) { if (pQInfo->rspContext != NULL) { buildRes = true; } - pthread_mutex_unlock(&pQInfo->lock); + + pthread_mutex_unlock(&pQInfo->lock); return buildRes; // sem_post(&pQInfo->dataReady); } @@ -6387,12 +6387,11 @@ int32_t qRetrieveQueryResultInfo(qinfo_t qinfo, bool* buildRes, void* pRspContex qDebug("QInfo:%p retrieve result info, rowsize:%d, rows:%"PRId64", code:%d", pQInfo, pQuery->rowSize, pQuery->rec.rows, pQInfo->code); } else { + qDebug("QInfo:%p retrieve req set query return result after paused", pQInfo); pQInfo->rspContext = pRspContext; } pthread_mutex_unlock(&pQInfo->lock); - -// sem_wait(&pQInfo->dataReady); return pQInfo->code; } diff --git a/src/vnode/src/vnodeRead.c b/src/vnode/src/vnodeRead.c index 542c132e03..5edfcf597c 100644 --- a/src/vnode/src/vnodeRead.c +++ b/src/vnode/src/vnodeRead.c @@ -163,18 +163,21 @@ static int32_t vnodeProcessQueryMsg(SVnodeObj *pVnode, SReadMsg *pReadMsg) { code = TSDB_CODE_QRY_INVALID_QHANDLE; } else { vDebug("vgId:%d, QInfo:%p, dnode continue exec query", pVnode->vgId, (void*) pCont); - code = TSDB_CODE_VND_ACTION_IN_PROGRESS; bool buildRes = qTableQuery(*handle); // do execute query if (buildRes) { // build result rsp + vDebug("vgId:%d, QInfo:%p, start to build result rsp after query paused", pVnode->vgId, *handle); + pRet = &pReadMsg->rspRet; bool continueExec = false; + code = TSDB_CODE_QRY_HAS_RSP; if ((code = qDumpRetrieveResult(*handle, (SRetrieveTableRsp **)&pRet->rsp, &pRet->len, &continueExec)) == TSDB_CODE_SUCCESS) { + if (continueExec) { vnodePutItemIntoReadQueue(pVnode, *handle, pReadMsg->rpcMsg.handle); pRet->qhandle = *handle; - + code = TSDB_CODE_SUCCESS; } } else { // todo handle error } From 4ce8f84e7e9f8a0ed7a1a255df6a24bd67b57118 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Sat, 25 Jul 2020 10:27:27 +0800 Subject: [PATCH 27/42] [td-225] fix bugs in non-blocking processing. --- src/query/src/qExecutor.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/query/src/qExecutor.c b/src/query/src/qExecutor.c index 6de300400e..28d1c138f6 100644 --- a/src/query/src/qExecutor.c +++ b/src/query/src/qExecutor.c @@ -6380,6 +6380,8 @@ int32_t qRetrieveQueryResultInfo(qinfo_t qinfo, bool* buildRes, void* pRspContex } *buildRes = false; + int32_t code = TSDB_CODE_SUCCESS; + pthread_mutex_lock(&pQInfo->lock); if (pQInfo->dataReady == QUERY_RESULT_READY) { *buildRes = true; @@ -6391,8 +6393,9 @@ int32_t qRetrieveQueryResultInfo(qinfo_t qinfo, bool* buildRes, void* pRspContex pQInfo->rspContext = pRspContext; } + code = pQInfo->code; pthread_mutex_unlock(&pQInfo->lock); - return pQInfo->code; + return code; } bool qHasMoreResultsToRetrieve(qinfo_t qinfo) { From 956eafa03165ab59c8f6b84fd416fd981f143320 Mon Sep 17 00:00:00 2001 From: Shengliang Guan Date: Sat, 25 Jul 2020 06:45:58 +0000 Subject: [PATCH 28/42] update cache expire time --- src/mnode/src/mnodeProfile.c | 4 ++-- src/plugins/http/src/httpContext.c | 2 +- src/util/src/tcache.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/mnode/src/mnodeProfile.c b/src/mnode/src/mnodeProfile.c index 9121f31131..30a292f522 100644 --- a/src/mnode/src/mnodeProfile.c +++ b/src/mnode/src/mnodeProfile.c @@ -68,7 +68,7 @@ int32_t mnodeInitProfile() { mnodeAddWriteMsgHandle(TSDB_MSG_TYPE_CM_KILL_STREAM, mnodeProcessKillStreamMsg); mnodeAddWriteMsgHandle(TSDB_MSG_TYPE_CM_KILL_CONN, mnodeProcessKillConnectionMsg); - tsMnodeConnCache = taosCacheInit(TSDB_DATA_TYPE_INT, CONN_CHECK_TIME, false, mnodeFreeConn, "conn"); + tsMnodeConnCache = taosCacheInit(TSDB_DATA_TYPE_INT, CONN_CHECK_TIME, true, mnodeFreeConn, "conn"); return 0; } @@ -119,7 +119,7 @@ SConnObj *mnodeAccquireConn(int32_t connId, char *user, uint32_t ip, uint16_t po return NULL; } - if (pConn->ip != ip || pConn->port != port /* || strcmp(pConn->user, user) != 0 */) { + if (/* pConn->ip != ip || */ pConn->port != port /* || strcmp(pConn->user, user) != 0 */) { mError("connId:%d, incoming conn user:%s ip:%s:%u, not match exist conn user:%s ip:%s:%u", connId, user, taosIpStr(ip), port, pConn->user, taosIpStr(pConn->ip), pConn->port); taosCacheRelease(tsMnodeConnCache, (void **)&pConn, false); diff --git a/src/plugins/http/src/httpContext.c b/src/plugins/http/src/httpContext.c index 225977abae..ca65f65608 100644 --- a/src/plugins/http/src/httpContext.c +++ b/src/plugins/http/src/httpContext.c @@ -58,7 +58,7 @@ static void httpDestroyContext(void *data) { } bool httpInitContexts() { - tsHttpServer.contextCache = taosCacheInit(TSDB_DATA_TYPE_BIGINT, 2, false, httpDestroyContext, "restc"); + tsHttpServer.contextCache = taosCacheInit(TSDB_DATA_TYPE_BIGINT, 2, true, httpDestroyContext, "restc"); if (tsHttpServer.contextCache == NULL) { httpError("failed to init context cache"); return false; diff --git a/src/util/src/tcache.c b/src/util/src/tcache.c index 7c2a6b3219..3a3acdc05b 100644 --- a/src/util/src/tcache.c +++ b/src/util/src/tcache.c @@ -343,7 +343,7 @@ void* taosCacheUpdateExpireTimeByName(SCacheObj *pCacheObj, void *key, size_t ke SCacheDataNode **ptNode = (SCacheDataNode **)taosHashGet(pCacheObj->pHashTable, key, keyLen); if (ptNode != NULL) { T_REF_INC(*ptNode); - (*ptNode)->expireTime = taosGetTimestampMs() + (*ptNode)->lifespan; + (*ptNode)->expireTime = expireTime; // taosGetTimestampMs() + (*ptNode)->lifespan; } __cache_unlock(pCacheObj); From b04e3da3d8847229af3fa67f06d21674dd562f4b Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Mon, 27 Jul 2020 15:32:53 +0800 Subject: [PATCH 29/42] [td-225] fix bugs in res buf --- src/client/src/tscFunctionImpl.c | 5 + src/query/inc/qExecutor.h | 4 +- src/query/inc/qExtbuffer.h | 2 +- src/query/inc/qResultbuf.h | 26 +- src/query/inc/qUtil.h | 7 +- src/query/src/qExecutor.c | 64 ++--- src/query/src/qResultbuf.c | 302 ++++++++++++---------- src/query/src/qUtil.c | 13 +- src/query/tests/resultBufferTest.cpp | 53 ++++ src/util/inc/tlist.h | 2 + src/util/src/tlist.c | 16 ++ tests/script/general/parser/testSuite.sim | 2 + tests/script/general/parser/topbot.sim | 74 ++++++ 13 files changed, 395 insertions(+), 175 deletions(-) create mode 100644 tests/script/general/parser/topbot.sim diff --git a/src/client/src/tscFunctionImpl.c b/src/client/src/tscFunctionImpl.c index 72ccd5adc6..17f6c97ea1 100644 --- a/src/client/src/tscFunctionImpl.c +++ b/src/client/src/tscFunctionImpl.c @@ -2131,6 +2131,11 @@ static STopBotInfo *getTopBotOutputInfo(SQLFunctionCtx *pCtx) { } bool topbot_datablock_filter(SQLFunctionCtx *pCtx, int32_t functionId, const char *minval, const char *maxval) { + SResultInfo *pResInfo = GET_RES_INFO(pCtx); + if (pResInfo == NULL) { + return true; + } + STopBotInfo *pTopBotInfo = getTopBotOutputInfo(pCtx); // required number of results are not reached, continue load data block diff --git a/src/query/inc/qExecutor.h b/src/query/inc/qExecutor.h index 92fe078c3f..a18f8e2731 100644 --- a/src/query/inc/qExecutor.h +++ b/src/query/inc/qExecutor.h @@ -42,8 +42,8 @@ typedef struct SSqlGroupbyExpr { } SSqlGroupbyExpr; typedef struct SPosInfo { - int16_t pageId; - int16_t rowId; + int32_t pageId; + int32_t rowId; } SPosInfo; typedef struct SWindowStatus { diff --git a/src/query/inc/qExtbuffer.h b/src/query/inc/qExtbuffer.h index b57c48933f..2cbef2b1be 100644 --- a/src/query/inc/qExtbuffer.h +++ b/src/query/inc/qExtbuffer.h @@ -28,7 +28,7 @@ extern "C" { #include "tdataformat.h" #include "talgo.h" -#define DEFAULT_PAGE_SIZE (1024L*4) // 16k larger than the SHistoInfo +#define DEFAULT_PAGE_SIZE (1024L*64) // 16k larger than the SHistoInfo #define MAX_TMPFILE_PATH_LENGTH PATH_MAX #define INITIAL_ALLOCATION_BUFFER_SIZE 64 diff --git a/src/query/inc/qResultbuf.h b/src/query/inc/qResultbuf.h index da5f46b5e9..ba446b4627 100644 --- a/src/query/inc/qResultbuf.h +++ b/src/query/inc/qResultbuf.h @@ -34,10 +34,11 @@ typedef struct SPageDiskInfo { } SPageDiskInfo; typedef struct SPageInfo { + SListNode* pn; // point to list node int32_t pageId; SPageDiskInfo info; void* pData; - T_REF_DECLARE(); + bool used; // set current page is in used } SPageInfo; typedef struct SFreeListItem { @@ -45,6 +46,15 @@ typedef struct SFreeListItem { int32_t len; } SFreeListItem; +typedef struct SResultBufStatis { + int32_t flushBytes; + int32_t loadBytes; + int32_t getPages; + int32_t releasePages; + int32_t flushPages; + int32_t fileSize; +} SResultBufStatis; + typedef struct SDiskbasedResultBuf { int32_t numOfRowsPerPage; int32_t numOfPages; @@ -64,6 +74,8 @@ typedef struct SDiskbasedResultBuf { void* assistBuf; // assistant buffer for compress data SArray* pFree; // free area in file int32_t nextPos; // next page flush position + + SResultBufStatis statis; } SDiskbasedResultBuf; #define DEFAULT_INTERN_BUF_PAGE_SIZE (1024L) @@ -119,6 +131,16 @@ tFilePage* getResBufPage(SDiskbasedResultBuf* pResultBuf, int32_t id); */ void releaseResBufPage(SDiskbasedResultBuf* pResultBuf, void* page); +void releaseResBufPageInfo(SDiskbasedResultBuf* pResultBuf, SPageInfo* pi); + +/** + * + * @param pResultBuf + * @param id + * @return + */ +//tFilePage* getResBufPage(SDiskbasedResultBuf* pResultBuf, int32_t id); + /** * get the total buffer size in the format of disk file * @param pResultBuf @@ -144,7 +166,7 @@ void destroyResultBuf(SDiskbasedResultBuf* pResultBuf, void* handle); * @param pList * @return */ -int32_t getLastPageId(SIDList pList); +SPageInfo* getLastPageInfo(SIDList pList); #ifdef __cplusplus } diff --git a/src/query/inc/qUtil.h b/src/query/inc/qUtil.h index 7119cb75fe..ed7c7e8845 100644 --- a/src/query/inc/qUtil.h +++ b/src/query/inc/qUtil.h @@ -45,13 +45,14 @@ bool isWindowResClosed(SWindowResInfo *pWindowResInfo, int32_t slot); int32_t createQueryResultInfo(SQuery *pQuery, SWindowResult *pResultRow, bool isSTableQuery, size_t interBufSize); -static FORCE_INLINE char *getPosInResultPage(SQueryRuntimeEnv *pRuntimeEnv, int32_t columnIndex, SWindowResult *pResult) { +static FORCE_INLINE char *getPosInResultPage(SQueryRuntimeEnv *pRuntimeEnv, int32_t columnIndex, SWindowResult *pResult, + tFilePage* page) { assert(pResult != NULL && pRuntimeEnv != NULL); SQuery *pQuery = pRuntimeEnv->pQuery; - tFilePage *page = getResBufPage(pRuntimeEnv->pResultBuf, pResult->pos.pageId); - int32_t realRowId = pResult->pos.rowId * GET_ROW_PARAM_FOR_MULTIOUTPUT(pQuery, pRuntimeEnv->topBotQuery, pRuntimeEnv->stableQuery); +// tFilePage *page = getResBufPage(pRuntimeEnv->pResultBuf, pResult->pos.pageId); + int32_t realRowId = pResult->pos.rowId * GET_ROW_PARAM_FOR_MULTIOUTPUT(pQuery, pRuntimeEnv->topBotQuery, pRuntimeEnv->stableQuery); return ((char *)page->data) + pRuntimeEnv->offset[columnIndex] * pRuntimeEnv->numOfRowsPerPage + pQuery->pSelectExpr[columnIndex].bytes * realRowId; } diff --git a/src/query/src/qExecutor.c b/src/query/src/qExecutor.c index 30d6cc288f..9d873dc95f 100644 --- a/src/query/src/qExecutor.c +++ b/src/query/src/qExecutor.c @@ -221,7 +221,7 @@ void updateNumOfResult(SQueryRuntimeEnv *pRuntimeEnv, int32_t numOfRes) { } static int32_t getGroupResultId(int32_t groupIndex) { - int32_t base = 200000; + int32_t base = 20000000; return base + (groupIndex * 10000); } @@ -478,10 +478,14 @@ static int32_t addNewWindowResultBuf(SWindowResult *pWindowRes, SDiskbasedResult if (taosArrayGetSize(list) == 0) { pData = getNewDataBuf(pResultBuf, sid, &pageId); } else { - pageId = getLastPageId(list); - pData = getResBufPage(pResultBuf, pageId); + SPageInfo* pi = getLastPageInfo(list); + pData = getResBufPage(pResultBuf, pi->pageId); + pageId = pi->pageId; if (pData->num >= numOfRowsPerPage) { + // release current page first, and prepare the next one + releaseResBufPageInfo(pResultBuf, pi); + pData = getNewDataBuf(pResultBuf, sid, &pageId); if (pData != NULL) { assert(pData->num == 0); // number of elements must be 0 for new allocated buffer @@ -497,6 +501,8 @@ static int32_t addNewWindowResultBuf(SWindowResult *pWindowRes, SDiskbasedResult if (pWindowRes->pos.pageId == -1) { // not allocated yet, allocate new buffer pWindowRes->pos.pageId = pageId; pWindowRes->pos.rowId = pData->num++; + + assert(pWindowRes->pos.pageId >= 0); } return 0; @@ -2111,9 +2117,6 @@ int32_t loadDataBlockOnDemand(SQueryRuntimeEnv *pRuntimeEnv, void* pQueryHandle, } if (!needToLoadDataBlock(pRuntimeEnv, *pStatis, pRuntimeEnv->pCtx, pBlockInfo->rows)) { -#if defined(_DEBUG_VIEW) - qDebug("QInfo:%p block discarded by per-filter", GET_QINFO_ADDR(pRuntimeEnv)); -#endif // current block has been discard due to filter applied pRuntimeEnv->summary.discardBlocks += 1; qDebug("QInfo:%p data block discard, brange:%"PRId64 "-%"PRId64", rows:%d", GET_QINFO_ADDR(pRuntimeEnv), @@ -2446,6 +2449,8 @@ static void doMerge(SQueryRuntimeEnv *pRuntimeEnv, int64_t timestamp, SWindowRes SQuery * pQuery = pRuntimeEnv->pQuery; SQLFunctionCtx *pCtx = pRuntimeEnv->pCtx; + tFilePage *page = getResBufPage(pRuntimeEnv->pResultBuf, pWindowRes->pos.pageId); + for (int32_t i = 0; i < pQuery->numOfOutput; ++i) { int32_t functionId = pQuery->pSelectExpr[i].base.functionId; if (!mergeFlag) { @@ -2458,7 +2463,7 @@ static void doMerge(SQueryRuntimeEnv *pRuntimeEnv, int64_t timestamp, SWindowRes pCtx[i].hasNull = true; pCtx[i].nStartQueryTimestamp = timestamp; - pCtx[i].aInputElemBuf = getPosInResultPage(pRuntimeEnv, i, pWindowRes); + pCtx[i].aInputElemBuf = getPosInResultPage(pRuntimeEnv, i, pWindowRes, page); // in case of tag column, the tag information should be extracted from input buffer if (functionId == TSDB_FUNC_TAG_DUMMY || functionId == TSDB_FUNC_TAG) { @@ -2615,14 +2620,16 @@ int32_t tableResultComparFn(const void *pLeft, const void *pRight, void *param) SWindowResInfo *pWindowResInfo1 = &supporter->pTableQueryInfo[left]->windowResInfo; SWindowResult * pWindowRes1 = getWindowResult(pWindowResInfo1, leftPos); + tFilePage *page1 = getResBufPage(pRuntimeEnv->pResultBuf, pWindowRes1->pos.pageId); - char *b1 = getPosInResultPage(pRuntimeEnv, PRIMARYKEY_TIMESTAMP_COL_INDEX, pWindowRes1); + char *b1 = getPosInResultPage(pRuntimeEnv, PRIMARYKEY_TIMESTAMP_COL_INDEX, pWindowRes1, page1); TSKEY leftTimestamp = GET_INT64_VAL(b1); SWindowResInfo *pWindowResInfo2 = &supporter->pTableQueryInfo[right]->windowResInfo; SWindowResult * pWindowRes2 = getWindowResult(pWindowResInfo2, rightPos); + tFilePage *page2 = getResBufPage(pRuntimeEnv->pResultBuf, pWindowRes2->pos.pageId); - char *b2 = getPosInResultPage(pRuntimeEnv, PRIMARYKEY_TIMESTAMP_COL_INDEX, pWindowRes2); + char *b2 = getPosInResultPage(pRuntimeEnv, PRIMARYKEY_TIMESTAMP_COL_INDEX, pWindowRes2, page2); TSKEY rightTimestamp = GET_INT64_VAL(b2); if (leftTimestamp == rightTimestamp) { @@ -2685,35 +2692,26 @@ void copyResToQueryResultBuf(SQInfo *pQInfo, SQuery *pQuery) { int32_t id = getGroupResultId(pQInfo->groupIndex - 1); SIDList list = getDataBufPagesIdList(pResultBuf, pQInfo->offset + id); - int32_t total = 0; int32_t size = taosArrayGetSize(list); - for (int32_t i = 0; i < size; ++i) { - int32_t* pgId = taosArrayGet(list, i); - tFilePage *pData = getResBufPage(pResultBuf, *pgId); - total += pData->num; - } - - int32_t rows = total; int32_t offset = 0; for (int32_t j = 0; j < size; ++j) { - int32_t* pgId = taosArrayGet(list, j); - tFilePage *pData = getResBufPage(pResultBuf, *pgId); + SPageInfo* pi = *(SPageInfo**) taosArrayGet(list, j); + tFilePage *pData = getResBufPage(pResultBuf, pi->pageId); for (int32_t i = 0; i < pQuery->numOfOutput; ++i) { int32_t bytes = pRuntimeEnv->pCtx[i].outputBytes; char * pDest = pQuery->sdata[i]->data; - - memcpy(pDest + offset * bytes, pData->data + pRuntimeEnv->offset[i] * pData->num, - bytes * pData->num); + memcpy(pDest + offset * bytes, pData->data + pRuntimeEnv->offset[i] * pData->num, bytes * pData->num); } +// rows += pData->num; offset += pData->num; } assert(pQuery->rec.rows == 0); - pQuery->rec.rows += rows; + pQuery->rec.rows += offset; pQInfo->offset += 1; } @@ -2777,7 +2775,6 @@ int32_t mergeIntoGroupResultImpl(SQInfo *pQInfo, SArray *pGroup) { assert(pQInfo->numOfGroupResultPages == 0); return 0; } else if (numOfTables == 1) { // no need to merge results since only one table in each group - } SCompSupporter cs = {pTableList, posList, pQInfo}; @@ -2802,8 +2799,9 @@ int32_t mergeIntoGroupResultImpl(SQInfo *pQInfo, SArray *pGroup) { SWindowResInfo *pWindowResInfo = &pTableList[pos]->windowResInfo; SWindowResult * pWindowRes = getWindowResult(pWindowResInfo, cs.position[pos]); + tFilePage *page = getResBufPage(pRuntimeEnv->pResultBuf, pWindowRes->pos.pageId); - char *b = getPosInResultPage(pRuntimeEnv, PRIMARYKEY_TIMESTAMP_COL_INDEX, pWindowRes); + char *b = getPosInResultPage(pRuntimeEnv, PRIMARYKEY_TIMESTAMP_COL_INDEX, pWindowRes, page); TSKEY ts = GET_INT64_VAL(b); assert(ts == pWindowRes->window.skey); @@ -3517,9 +3515,11 @@ void setWindowResOutputBuf(SQueryRuntimeEnv *pRuntimeEnv, SWindowResult *pResult SQuery *pQuery = pRuntimeEnv->pQuery; // Note: pResult->pos[i]->num == 0, there is only fixed number of results for each group + tFilePage *page = getResBufPage(pRuntimeEnv->pResultBuf, pResult->pos.pageId); + for (int32_t i = 0; i < pQuery->numOfOutput; ++i) { SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i]; - pCtx->aOutputBuf = getPosInResultPage(pRuntimeEnv, i, pResult); + pCtx->aOutputBuf = getPosInResultPage(pRuntimeEnv, i, pResult, page); int32_t functionId = pQuery->pSelectExpr[i].base.functionId; if (functionId == TSDB_FUNC_TOP || functionId == TSDB_FUNC_BOTTOM || functionId == TSDB_FUNC_DIFF) { @@ -3542,6 +3542,8 @@ void setWindowResOutputBufInitCtx(SQueryRuntimeEnv *pRuntimeEnv, SWindowResult * SQuery *pQuery = pRuntimeEnv->pQuery; // Note: pResult->pos[i]->num == 0, there is only fixed number of results for each group + tFilePage* bufPage = getResBufPage(pRuntimeEnv->pResultBuf, pResult->pos.pageId); + for (int32_t i = 0; i < pQuery->numOfOutput; ++i) { SQLFunctionCtx *pCtx = &pRuntimeEnv->pCtx[i]; @@ -3550,7 +3552,7 @@ void setWindowResOutputBufInitCtx(SQueryRuntimeEnv *pRuntimeEnv, SWindowResult * continue; } - pCtx->aOutputBuf = getPosInResultPage(pRuntimeEnv, i, pResult); + pCtx->aOutputBuf = getPosInResultPage(pRuntimeEnv, i, pResult, bufPage); pCtx->currentStage = 0; int32_t functionId = pCtx->functionId; @@ -3713,11 +3715,13 @@ static int32_t doCopyToSData(SQInfo *pQInfo, SWindowResInfo *pResultInfo, int32_ pQInfo->groupIndex += 1; } + tFilePage *page = getResBufPage(pRuntimeEnv->pResultBuf, result[i].pos.pageId); + for (int32_t j = 0; j < pQuery->numOfOutput; ++j) { int32_t size = pRuntimeEnv->pCtx[j].outputBytes; char *out = pQuery->sdata[j]->data + numOfResult * size; - char *in = getPosInResultPage(pRuntimeEnv, j, &result[i]); + char *in = getPosInResultPage(pRuntimeEnv, j, &result[i], page); memcpy(out, in + oldOffset * size, size * numOfRowsToCopy); } @@ -4240,8 +4244,8 @@ int32_t doInitQInfo(SQInfo *pQInfo, STSBuf *pTsBuf, void *tsdb, int32_t vgId, bo getIntermediateBufInfo(pRuntimeEnv, &ps, &rowsize); if (isSTableQuery && !onlyQueryTags(pRuntimeEnv->pQuery)) { - int32_t numOfPages = getInitialPageNum(pQInfo); - code = createDiskbasedResultBuffer(&pRuntimeEnv->pResultBuf, numOfPages, rowsize, ps, numOfPages, pQInfo); +// int32_t numOfPages = getInitialPageNum(pQInfo); + code = createDiskbasedResultBuffer(&pRuntimeEnv->pResultBuf, 2, rowsize, ps, 2, pQInfo); if (code != TSDB_CODE_SUCCESS) { return code; } diff --git a/src/query/src/qResultbuf.c b/src/query/src/qResultbuf.c index fa2bb814f2..fbb5f116e6 100644 --- a/src/query/src/qResultbuf.c +++ b/src/query/src/qResultbuf.c @@ -1,11 +1,13 @@ #include "qResultbuf.h" -#include -#include +#include "stddef.h" +#include "tscompression.h" #include "hash.h" #include "qExtbuffer.h" #include "queryLog.h" #include "taoserror.h" +#define GET_DATA_PAYLOAD(_p) ((_p)->pData + POINTER_BYTES) + int32_t createDiskbasedResultBuffer(SDiskbasedResultBuf** pResultBuf, int32_t numOfPages, int32_t rowSize, int32_t pagesize, int32_t inMemPages, const void* handle) { @@ -15,23 +17,22 @@ int32_t createDiskbasedResultBuffer(SDiskbasedResultBuf** pResultBuf, int32_t nu return TSDB_CODE_COM_OUT_OF_MEMORY; } - pResBuf->pageSize = pagesize; - pResBuf->numOfPages = 0; // all pages are in buffer in the first place - pResBuf->inMemPages = inMemPages; + pResBuf->pageSize = pagesize; + pResBuf->numOfPages = 0; // all pages are in buffer in the first place + pResBuf->inMemPages = inMemPages; + pResBuf->totalBufSize = pResBuf->numOfPages * pagesize; + pResBuf->allocateId = -1; + pResBuf->comp = true; + assert(inMemPages <= numOfPages); pResBuf->numOfRowsPerPage = (pagesize - sizeof(tFilePage)) / rowSize; - - pResBuf->totalBufSize = pResBuf->numOfPages * pagesize; - pResBuf->allocateId = -1; - pResBuf->lruList = tdListNew(POINTER_BYTES); // init id hash table pResBuf->groupSet = taosHashInit(10, taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), false); pResBuf->all = taosHashInit(10, taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), false); pResBuf->assistBuf = malloc(pResBuf->pageSize + 2); // EXTRA BYTES - pResBuf->comp = true; char path[PATH_MAX] = {0}; getTmpfilePath("qbuf", path); @@ -49,7 +50,7 @@ int32_t createDiskbasedResultBuffer(SDiskbasedResultBuf** pResultBuf, int32_t nu #define NUM_OF_PAGES_ON_DISK(_r) ((_r)->numOfPages - (_r)->inMemPages) #define FILE_SIZE_ON_DISK(_r) (NUM_OF_PAGES_ON_DISK(_r) * (_r)->pageSize) -static int32_t createDiskResidesBuf(SDiskbasedResultBuf* pResultBuf) { +static int32_t createDiskFile(SDiskbasedResultBuf* pResultBuf) { pResultBuf->file = fopen(pResultBuf->path, "wb+"); if (pResultBuf->file == NULL) { qError("failed to create tmp file: %s on disk. %s", pResultBuf->path, strerror(errno)); @@ -59,15 +60,27 @@ static int32_t createDiskResidesBuf(SDiskbasedResultBuf* pResultBuf) { return TSDB_CODE_SUCCESS; } -static char* doCompressData(void* data, int32_t srcSize, int32_t *dst, bool comp, void* assistBuf) { // do nothing - if (!comp) { +static char* doCompressData(void* data, int32_t srcSize, int32_t *dst, SDiskbasedResultBuf* pResultBuf) { // do nothing + if (!pResultBuf->comp) { *dst = srcSize; return data; } - *dst = tsCompressString(data, srcSize, 1, assistBuf, srcSize, ONE_STAGE_COMP, NULL, 0); + *dst = tsCompressString(data, srcSize, 1, pResultBuf->assistBuf, srcSize, ONE_STAGE_COMP, NULL, 0); - memcpy(data, assistBuf, *dst); + memcpy(data, pResultBuf->assistBuf, *dst); + return data; +} + +static char* doDecompressData(void* data, int32_t srcSize, int32_t *dst, SDiskbasedResultBuf* pResultBuf) { // do nothing + if (!pResultBuf->comp) { + *dst = srcSize; + return data; + } + + *dst = tsDecompressString(data, srcSize, 1, pResultBuf->assistBuf, pResultBuf->pageSize, ONE_STAGE_COMP, NULL, 0); + + memcpy(data, pResultBuf->assistBuf, *dst); return data; } @@ -96,11 +109,10 @@ static int32_t allocatePositionInFile(SDiskbasedResultBuf* pResultBuf, size_t si } static char* doFlushPageToDisk(SDiskbasedResultBuf* pResultBuf, SPageInfo* pg) { - assert(T_REF_VAL_GET(pg) == 0 && pg->pData != NULL); + assert(!pg->used && pg->pData != NULL); int32_t size = -1; - char* t = doCompressData(pg->pData + POINTER_BYTES, pResultBuf->pageSize, &size, pResultBuf->comp, pResultBuf->assistBuf); - pg->info.length = size; + char* t = doCompressData(GET_DATA_PAYLOAD(pg), pResultBuf->pageSize, &size, pResultBuf); // this page is flushed to disk for the first time if (pg->info.offset == -1) { @@ -108,26 +120,30 @@ static char* doFlushPageToDisk(SDiskbasedResultBuf* pResultBuf, SPageInfo* pg) { pResultBuf->nextPos += size; fseek(pResultBuf->file, pg->info.offset, SEEK_SET); - int32_t ret = fwrite(t, 1, size, pResultBuf->file); - - UNUSED(ret); + /*int32_t ret =*/ fwrite(t, 1, size, pResultBuf->file); } else { - if (pg->info.length < size) { // length becomes greater, current space is not enough, allocate new place. - //1. add current space to free list + // length becomes greater, current space is not enough, allocate new place, otherwise, do nothing + if (pg->info.length < size) { + // 1. add current space to free list taosArrayPush(pResultBuf->pFree, &pg->info); - //2. allocate new position, and update the info + // 2. allocate new position, and update the info pg->info.offset = allocatePositionInFile(pResultBuf, size); pResultBuf->nextPos += size; - - //3. write to disk. - fseek(pResultBuf->file, pg->info.offset, SEEK_SET); - fwrite(t, size, 1, pResultBuf->file); } + + //3. write to disk. + fseek(pResultBuf->file, pg->info.offset, SEEK_SET); + fwrite(t, size, 1, pResultBuf->file); } char* ret = pg->pData; + memset(ret, 0, pResultBuf->pageSize); + pg->pData = NULL; + pg->info.length = size; + + pResultBuf->statis.flushBytes += pg->info.length; return ret; } @@ -137,7 +153,7 @@ static char* flushPageToDisk(SDiskbasedResultBuf* pResultBuf, SPageInfo* pg) { assert(pResultBuf->numOfPages * pResultBuf->pageSize == pResultBuf->totalBufSize && pResultBuf->numOfPages >= pResultBuf->inMemPages); if (pResultBuf->file == NULL) { - if ((ret = createDiskResidesBuf(pResultBuf)) != TSDB_CODE_SUCCESS) { + if ((ret = createDiskFile(pResultBuf)) != TSDB_CODE_SUCCESS) { terrno = ret; return NULL; } @@ -146,12 +162,29 @@ static char* flushPageToDisk(SDiskbasedResultBuf* pResultBuf, SPageInfo* pg) { return doFlushPageToDisk(pResultBuf, pg); } +// load file block data in disk +static char* loadPageFromDisk(SDiskbasedResultBuf* pResultBuf, SPageInfo* pg) { + int32_t ret = fseek(pResultBuf->file, pg->info.offset, SEEK_SET); + ret = fread(GET_DATA_PAYLOAD(pg), 1, pg->info.length, pResultBuf->file); + if (ret != pg->info.length) { + terrno = errno; + return NULL; + } + + pResultBuf->statis.loadBytes += pg->info.length; + + int32_t fullSize = 0; + doDecompressData(GET_DATA_PAYLOAD(pg), pg->info.length, &fullSize, pResultBuf); + + return GET_DATA_PAYLOAD(pg); +} + #define NO_AVAILABLE_PAGES(_b) ((_b)->numOfPages >= (_b)->inMemPages) static SIDList addNewGroup(SDiskbasedResultBuf* pResultBuf, int32_t groupId) { assert(taosHashGet(pResultBuf->groupSet, (const char*) &groupId, sizeof(int32_t)) == NULL); - SArray* pa = taosArrayInit(1, sizeof(SPageInfo)); + SArray* pa = taosArrayInit(1, POINTER_BYTES); int32_t ret = taosHashPut(pResultBuf->groupSet, (const char*)&groupId, sizeof(int32_t), &pa, POINTER_BYTES); assert(ret == 0); @@ -170,44 +203,79 @@ static SPageInfo* registerPage(SDiskbasedResultBuf* pResultBuf, int32_t groupId, pResultBuf->numOfPages += 1; - SPageInfo ppi = { .info = PAGE_INFO_INITIALIZER, .pageId = pageId, }; - return taosArrayPush(list, &ppi); + SPageInfo* ppi = malloc(sizeof(SPageInfo));//{ .info = PAGE_INFO_INITIALIZER, .pageId = pageId, .pn = NULL}; + ppi->info = PAGE_INFO_INITIALIZER; + ppi->pageId = pageId; + ppi->pData = NULL; + ppi->pn = NULL; + ppi->used = true; + + return *(SPageInfo**) taosArrayPush(list, &ppi); +} + +static SListNode* getEldestUnrefedPage(SDiskbasedResultBuf* pResultBuf) { + SListIter iter = {0}; + tdListInitIter(pResultBuf->lruList, &iter, TD_LIST_BACKWARD); + + SListNode* pn = NULL; + while((pn = tdListNext(&iter)) != NULL) { + assert(pn != NULL); + + SPageInfo* pageInfo = *(SPageInfo**) pn->data; + assert(pageInfo->pageId >= 0 && pageInfo->pn == pn); + + if (!pageInfo->used) { + break; + } + } + + return pn; +} + +static char* evicOneDataPage(SDiskbasedResultBuf* pResultBuf) { + char* bufPage = NULL; + SListNode* pn = getEldestUnrefedPage(pResultBuf); + + // all pages are referenced by user, try to allocate new space + if (pn == NULL) { + int32_t prev = pResultBuf->inMemPages; + pResultBuf->inMemPages = pResultBuf->inMemPages * 1.5; + + qWarn("%p in memory buf page not sufficient, expand from %d to %d, page size:%d", pResultBuf, prev, + pResultBuf->inMemPages, pResultBuf->pageSize); + } else { + pResultBuf->statis.flushPages += 1; + tdListPopNode(pResultBuf->lruList, pn); + + SPageInfo* d = *(SPageInfo**) pn->data; + assert(d->pn == pn); + + d->pn = NULL; + tfree(pn); + + bufPage = flushPageToDisk(pResultBuf, d); + } + + return bufPage; +} + +static void lruListPushFront(SList *pList, SPageInfo* pi) { + tdListPrepend(pList, &pi); + SListNode* front = tdListGetHead(pList); + pi->pn = front; +} + +static void lruListMoveToFront(SList *pList, SPageInfo* pi) { + tdListPopNode(pList, pi->pn); + tdListPrependNode(pList, pi->pn); } tFilePage* getNewDataBuf(SDiskbasedResultBuf* pResultBuf, int32_t groupId, int32_t* pageId) { - char* allocPg = NULL; + pResultBuf->statis.getPages += 1; + char* availablePage = NULL; if (NO_AVAILABLE_PAGES(pResultBuf)) { - - // get the last page in linked list - SListIter iter = {0}; - tdListInitIter(pResultBuf->lruList, &iter, TD_LIST_BACKWARD); - - SListNode* pn = NULL; - while((pn = tdListNext(&iter)) != NULL) { - assert(pn != NULL); - if (T_REF_VAL_GET(*(SPageInfo**)pn->data) == 0) { - break; - } - } - - // all pages are referenced by user, try to allocate new space - if (pn == NULL) { - int32_t prev = pResultBuf->inMemPages; - pResultBuf->inMemPages = pResultBuf->inMemPages * 1.5; - - qWarn("%p in memory buf page not sufficient, expand from %d to %d, page size:%d", pResultBuf, prev, - pResultBuf->inMemPages, pResultBuf->pageSize); - } else { - tdListPopNode(pResultBuf->lruList, pn); - SPageInfo* d = *(SPageInfo**) pn->data; - tfree(pn); - - allocPg = flushPageToDisk(pResultBuf, d); - if (allocPg == NULL) { - return NULL; - } - } + availablePage = evicOneDataPage(pResultBuf); } // register new id in this group @@ -216,111 +284,72 @@ tFilePage* getNewDataBuf(SDiskbasedResultBuf* pResultBuf, int32_t groupId, int32 // register page id info SPageInfo* pi = registerPage(pResultBuf, groupId, *pageId); + assert(pResultBuf->inMemPages > 0); + // add to LRU list assert(listNEles(pResultBuf->lruList) < pResultBuf->inMemPages); - tdListPrepend(pResultBuf->lruList, &pi); + lruListPushFront(pResultBuf->lruList, pi); // add to hash map taosHashPut(pResultBuf->all, pageId, sizeof(int32_t), &pi, POINTER_BYTES); // allocate buf - if (allocPg == NULL) { + if (availablePage == NULL) { pi->pData = calloc(1, pResultBuf->pageSize + POINTER_BYTES); } else { - pi->pData = allocPg; + pi->pData = availablePage; } pResultBuf->totalBufSize += pResultBuf->pageSize; - T_REF_INC(pi); // add ref count ((void**)pi->pData)[0] = pi; + pi->used = true; - return pi->pData + POINTER_BYTES; + return GET_DATA_PAYLOAD(pi); } tFilePage* getResBufPage(SDiskbasedResultBuf* pResultBuf, int32_t id) { assert(pResultBuf != NULL && id >= 0); + pResultBuf->statis.getPages += 1; SPageInfo** pi = taosHashGet(pResultBuf->all, &id, sizeof(int32_t)); assert(pi != NULL && *pi != NULL); if ((*pi)->pData != NULL) { // it is in memory - // no need to update the LRU list + // no need to update the LRU list if only one page exists if (pResultBuf->numOfPages == 1) { - return (*pi)->pData + POINTER_BYTES; + (*pi)->used = true; + return GET_DATA_PAYLOAD(*pi); } - SListNode* pnode = NULL; // todo speed up + SPageInfo** pInfo = (SPageInfo**) ((*pi)->pn->data); + assert(*pInfo == *pi); - SListIter iter = {0}; - tdListInitIter(pResultBuf->lruList, &iter, TD_LIST_FORWARD); + lruListMoveToFront(pResultBuf->lruList, (*pi)); + (*pi)->used = true; - while((pnode = tdListNext(&iter)) != NULL) { - SPageInfo** pInfo = (SPageInfo**) pnode->data; + return GET_DATA_PAYLOAD(*pi); - // remove it and add it into the front of linked-list - if ((*pInfo)->pageId == id) { - tdListPopNode(pResultBuf->lruList, pnode); - tdListPrependNode(pResultBuf->lruList, pnode); - T_REF_INC(*(SPageInfo**)pnode->data); - - return ((*(SPageInfo**)pnode->data)->pData + POINTER_BYTES); - } - } } else { // not in memory - assert((*pi)->pData == NULL && (*pi)->info.length >= 0 && (*pi)->info.offset >= 0); + assert((*pi)->pData == NULL && (*pi)->pn == NULL && (*pi)->info.length >= 0 && (*pi)->info.offset >= 0); - // choose the be flushed page: get the last page in linked list - SListIter iter1 = {0}; - tdListInitIter(pResultBuf->lruList, &iter1, TD_LIST_BACKWARD); - - SListNode* pn = NULL; - while((pn = tdListNext(&iter1)) != NULL) { - assert(pn != NULL); - if (T_REF_VAL_GET(*(SPageInfo**)(pn->data)) == 0) { - break; - } + char* availablePage = NULL; + if (NO_AVAILABLE_PAGES(pResultBuf)) { + availablePage = evicOneDataPage(pResultBuf); } - // all pages are referenced by user, try to allocate new space - if (pn == NULL) { - int32_t prev = pResultBuf->inMemPages; - pResultBuf->inMemPages = pResultBuf->inMemPages * 1.5; - - qWarn("%p in memory buf page not sufficient, expand from %d to %d, page size:%d", pResultBuf, prev, - pResultBuf->inMemPages, pResultBuf->pageSize); - + if (availablePage == NULL) { (*pi)->pData = calloc(1, pResultBuf->pageSize + POINTER_BYTES); } else { - tdListPopNode(pResultBuf->lruList, pn); - - if (flushPageToDisk(pResultBuf, *(SPageInfo**)pn->data) != TSDB_CODE_SUCCESS) { - return NULL; - } - - char* buf = (*(SPageInfo**)pn->data)->pData; - (*(SPageInfo**)pn->data)->pData = NULL; - - (*pi)->pData = buf; - - ((void**)((*pi)->pData))[0] = (*pi); - tfree(pn); + (*pi)->pData = availablePage; } - // load file in disk - int32_t ret = fseek(pResultBuf->file, (*pi)->info.offset, SEEK_SET); - ret = fread((*pi)->pData + POINTER_BYTES, 1, (*pi)->info.length, pResultBuf->file); - if (ret != (*pi)->info.length) { - terrno = errno; - return NULL; - } + ((void**)((*pi)->pData))[0] = (*pi); - // todo do decomp - - return (*pi)->pData + POINTER_BYTES; + lruListPushFront(pResultBuf->lruList, *pi); + loadPageFromDisk(pResultBuf, *pi); + return GET_DATA_PAYLOAD(*pi); } - - return NULL; } void releaseResBufPage(SDiskbasedResultBuf* pResultBuf, void* page) { @@ -328,9 +357,14 @@ void releaseResBufPage(SDiskbasedResultBuf* pResultBuf, void* page) { char* p = (char*) page - POINTER_BYTES; SPageInfo* ppi = ((SPageInfo**) p)[0]; + releaseResBufPageInfo(pResultBuf, ppi); +} - assert(T_REF_VAL_GET(ppi) > 0); - T_REF_DEC(ppi); +void releaseResBufPageInfo(SDiskbasedResultBuf* pResultBuf, SPageInfo* pi) { + assert(pi->pData != NULL && pi->used); + + pi->used = false; + pResultBuf->statis.releasePages += 1; } size_t getNumOfRowsPerPage(const SDiskbasedResultBuf* pResultBuf) { return pResultBuf->numOfRowsPerPage; } @@ -373,9 +407,11 @@ void destroyResultBuf(SDiskbasedResultBuf* pResultBuf, void* handle) { SArray** p = (SArray**) taosHashIterGet(iter); size_t n = taosArrayGetSize(*p); for(int32_t i = 0; i < n; ++i) { - SPageInfo* pi = taosArrayGet(*p, i); + SPageInfo* pi = taosArrayGetP(*p, i); tfree(pi->pData); + tfree(pi); } + taosArrayDestroy(*p); } @@ -390,8 +426,8 @@ void destroyResultBuf(SDiskbasedResultBuf* pResultBuf, void* handle) { tfree(pResultBuf); } -int32_t getLastPageId(SIDList pList) { +SPageInfo* getLastPageInfo(SIDList pList) { size_t size = taosArrayGetSize(pList); - return *(int32_t*) taosArrayGet(pList, size - 1); + return (SPageInfo*) taosArrayGetP(pList, size - 1); } diff --git a/src/query/src/qUtil.c b/src/query/src/qUtil.c index be84471493..8ddc2d3857 100644 --- a/src/query/src/qUtil.c +++ b/src/query/src/qUtil.c @@ -233,11 +233,13 @@ void clearTimeWindowResBuf(SQueryRuntimeEnv *pRuntimeEnv, SWindowResult *pWindow if (pWindowRes == NULL) { return; } - + + tFilePage *page = getResBufPage(pRuntimeEnv->pResultBuf, pWindowRes->pos.pageId); + for (int32_t i = 0; i < pRuntimeEnv->pQuery->numOfOutput; ++i) { SResultInfo *pResultInfo = &pWindowRes->resultInfo[i]; - char * s = getPosInResultPage(pRuntimeEnv, i, pWindowRes); + char * s = getPosInResultPage(pRuntimeEnv, i, pWindowRes, page); size_t size = pRuntimeEnv->pQuery->pSelectExpr[i].bytes; memset(s, 0, size); @@ -274,8 +276,11 @@ void copyTimeWindowResBuf(SQueryRuntimeEnv *pRuntimeEnv, SWindowResult *dst, con memcpy(pDst->interResultBuf, pSrc->interResultBuf, pDst->bufLen); // copy the output buffer data from src to dst, the position info keep unchanged - char * dstBuf = getPosInResultPage(pRuntimeEnv, i, dst); - char * srcBuf = getPosInResultPage(pRuntimeEnv, i, (SWindowResult *)src); + tFilePage *dstpage = getResBufPage(pRuntimeEnv->pResultBuf, dst->pos.pageId); + char * dstBuf = getPosInResultPage(pRuntimeEnv, i, dst, dstpage); + + tFilePage *srcpage = getResBufPage(pRuntimeEnv->pResultBuf, src->pos.pageId); + char * srcBuf = getPosInResultPage(pRuntimeEnv, i, (SWindowResult *)src, srcpage); size_t s = pRuntimeEnv->pQuery->pSelectExpr[i].bytes; memcpy(dstBuf, srcBuf, s); diff --git a/src/query/tests/resultBufferTest.cpp b/src/query/tests/resultBufferTest.cpp index 53a05925c2..3171a7b322 100644 --- a/src/query/tests/resultBufferTest.cpp +++ b/src/query/tests/resultBufferTest.cpp @@ -94,7 +94,59 @@ void writeDownTest() { SArray* pa = getDataBufPagesIdList(pResultBuf, groupId); ASSERT_EQ(taosArrayGetSize(pa), 5); + destroyResultBuf(pResultBuf, NULL); +} +void recyclePageTest() { + SDiskbasedResultBuf* pResultBuf = NULL; + int32_t ret = createDiskbasedResultBuffer(&pResultBuf, 1000, 64, 1024, 4, NULL); + + int32_t pageId = 0; + int32_t writePageId = 0; + int32_t groupId = 0; + int32_t nx = 12345; + + tFilePage* pBufPage = getNewDataBuf(pResultBuf, groupId, &pageId); + ASSERT_TRUE(pBufPage != NULL); + releaseResBufPage(pResultBuf, pBufPage); + + tFilePage* pBufPage1 = getNewDataBuf(pResultBuf, groupId, &pageId); + tFilePage* t1 = getResBufPage(pResultBuf, pageId); + ASSERT_TRUE(t1 == pBufPage1); + ASSERT_TRUE(pageId == 1); + + tFilePage* pBufPage2 = getNewDataBuf(pResultBuf, groupId, &pageId); + tFilePage* t2 = getResBufPage(pResultBuf, pageId); + ASSERT_TRUE(t2 == pBufPage2); + ASSERT_TRUE(pageId == 2); + + tFilePage* pBufPage3 = getNewDataBuf(pResultBuf, groupId, &pageId); + tFilePage* t3 = getResBufPage(pResultBuf, pageId); + ASSERT_TRUE(t3 == pBufPage3); + ASSERT_TRUE(pageId == 3); + + tFilePage* pBufPage4 = getNewDataBuf(pResultBuf, groupId, &pageId); + tFilePage* t4 = getResBufPage(pResultBuf, pageId); + ASSERT_TRUE(t4 == pBufPage4); + ASSERT_TRUE(pageId == 4); + releaseResBufPage(pResultBuf, t4); + releaseResBufPage(pResultBuf, t4); + + tFilePage* pBufPage5 = getNewDataBuf(pResultBuf, groupId, &pageId); + tFilePage* t5 = getResBufPage(pResultBuf, pageId); + ASSERT_TRUE(t5 == pBufPage5); + ASSERT_TRUE(pageId == 5); + + // flush the written page to disk, and read it out again + tFilePage* pBufPagex = getResBufPage(pResultBuf, writePageId); + *(int32_t*)(pBufPagex->data) = nx; + writePageId = pageId; // update the data + releaseResBufPage(pResultBuf, pBufPagex); + + tFilePage* pBufPagex1 = getResBufPage(pResultBuf, 1); + + SArray* pa = getDataBufPagesIdList(pResultBuf, groupId); + ASSERT_EQ(taosArrayGetSize(pa), 6); destroyResultBuf(pResultBuf, NULL); } @@ -105,4 +157,5 @@ TEST(testCase, resultBufferTest) { srand(time(NULL)); simpleTest(); writeDownTest(); + recyclePageTest(); } diff --git a/src/util/inc/tlist.h b/src/util/inc/tlist.h index a4ed9311e2..e8380294da 100644 --- a/src/util/inc/tlist.h +++ b/src/util/inc/tlist.h @@ -55,6 +55,8 @@ int tdListPrepend(SList *list, void *data); int tdListAppend(SList *list, void *data); SListNode *tdListPopHead(SList *list); SListNode *tdListPopTail(SList *list); +SListNode *tdListGetHead(SList *list); +SListNode *tsListGetTail(SList *list); SListNode *tdListPopNode(SList *list, SListNode *node); void tdListMove(SList *src, SList *dst); void tdListDiscard(SList *list); diff --git a/src/util/src/tlist.c b/src/util/src/tlist.c index 93293b8b91..8c2ad83de1 100644 --- a/src/util/src/tlist.c +++ b/src/util/src/tlist.c @@ -122,6 +122,22 @@ SListNode *tdListPopTail(SList *list) { return node; } +SListNode *tdListGetHead(SList *list) { + if (list == NULL || list->numOfEles == 0) { + return NULL; + } + + return list->head; +} + +SListNode *tsListGetTail(SList *list) { + if (list == NULL || list->numOfEles == 0) { + return NULL; + } + + return list->tail; +} + SListNode *tdListPopNode(SList *list, SListNode *node) { if (list->head == node) { list->head = node->next; diff --git a/tests/script/general/parser/testSuite.sim b/tests/script/general/parser/testSuite.sim index c6981d2902..1e73893793 100644 --- a/tests/script/general/parser/testSuite.sim +++ b/tests/script/general/parser/testSuite.sim @@ -93,6 +93,8 @@ run general/parser/groupby.sim sleep 2000 run general/parser/tags_filter.sim sleep 2000 +run general/parser/topbot.sim +sleep 2000 run general/parser/union.sim sleep 2000 run general/parser/sliding.sim diff --git a/tests/script/general/parser/topbot.sim b/tests/script/general/parser/topbot.sim new file mode 100644 index 0000000000..a0c46dbc65 --- /dev/null +++ b/tests/script/general/parser/topbot.sim @@ -0,0 +1,74 @@ +system sh/stop_dnodes.sh + +system sh/deploy.sh -n dnode1 -i 1 +system sh/cfg.sh -n dnode1 -c walLevel -v 0 +system sh/exec.sh -n dnode1 -s start +sleep 3000 +sql connect + +$dbPrefix = tb_db +$tbPrefix = tb_tb +$stbPrefix = tb_stb +$tbNum = 10 +$rowNum = 1000 +$totalNum = $tbNum * $rowNum +$loops = 200000 +$log = 10000 +$ts0 = 1537146000000 +$delta = 600000 +print ========== topbot.sim +$i = 0 +$db = $dbPrefix . $i +$stb = $stbPrefix . $i + +sql drop database $db -x step1 +step1: +sql create database $db cache 16 maxtables 200 +print ====== create tables +sql use $db +sql create table $stb (ts timestamp, c1 int, c2 bigint, c3 float, c4 double, c5 smallint, c6 tinyint, c7 bool, c8 binary(10), c9 nchar(10)) tags(t1 int) + +$i = 0 +$ts = $ts0 +$halfNum = $tbNum / 2 +while $i < $halfNum + $tbId = $i + $halfNum + $tb = $tbPrefix . $i + $tb1 = $tbPrefix . $tbId + sql create table $tb using $stb tags( $i ) + sql create table $tb1 using $stb tags( $tbId ) + + $x = 0 + while $x < $rowNum + $xs = $x * $delta + $ts = $ts0 + $xs + $c = $x / 10 + $c = $c * 10 + $c = $x - $c + $binary = 'binary . $c + $binary = $binary . ' + $nchar = 'nchar . $c + $nchar = $nchar . ' + sql insert into $tb values ( $ts , $c , $c , $c , $c , $c , $c , true, $binary , $nchar ) + sql insert into $tb1 values ( $ts , $c , NULL , $c , NULL , $c , $c , true, $binary , $nchar ) + $x = $x + 1 + endw + + $i = $i + 1 +endw +print ====== tables created + +sql use $db +##### select from table +print ====== select top/bot from table and check num of rows returned +sql select top(c1, 100) from tb_stb0 +if $row != 100 then + return -1 +endi + +sql select last(c2) from tb_tb9 +if $row != 1 then + return -1 +endi + +system sh/exec.sh -n dnode1 -s stop -x SIGINT \ No newline at end of file From d9622d9d8cb52de2343fb0f7137a6ab44d088ea1 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Mon, 27 Jul 2020 16:17:34 +0800 Subject: [PATCH 30/42] [td-225] fix bugs in rsp data using wrong connection handle --- src/inc/query.h | 9 +-------- src/query/src/qExecutor.c | 8 ++++++++ src/vnode/src/vnodeRead.c | 8 ++++++-- 3 files changed, 15 insertions(+), 10 deletions(-) diff --git a/src/inc/query.h b/src/inc/query.h index a9a21bc911..ccff05bc1b 100644 --- a/src/inc/query.h +++ b/src/inc/query.h @@ -65,14 +65,7 @@ int32_t qRetrieveQueryResultInfo(qinfo_t qinfo, bool* buildRes, void* pRspContex */ int32_t qDumpRetrieveResult(qinfo_t qinfo, SRetrieveTableRsp** pRsp, int32_t* contLen, bool* continueExec); -/** - * Decide if more results will be produced or not, NOTE: this function will increase the ref count of QInfo, - * so it can be only called once for each retrieve - * - * @param qinfo - * @return - */ -bool qHasMoreResultsToRetrieve(qinfo_t qinfo); +void* qGetResultRetrieveMsg(qinfo_t qinfo); /** * kill current ongoing query and free query handle automatically diff --git a/src/query/src/qExecutor.c b/src/query/src/qExecutor.c index 28d1c138f6..5b904936f2 100644 --- a/src/query/src/qExecutor.c +++ b/src/query/src/qExecutor.c @@ -6640,6 +6640,13 @@ static void buildTagQueryResult(SQInfo* pQInfo) { setQueryStatus(pQuery, QUERY_COMPLETED); } +void* qGetResultRetrieveMsg(qinfo_t qinfo) { + SQInfo* pQInfo = (SQInfo*) qinfo; + assert(pQInfo != NULL); + + return pQInfo->rspContext; +} + void freeqinfoFn(void *qhandle) { void** handle = qhandle; if (handle == NULL || *handle == NULL) { @@ -6762,3 +6769,4 @@ void** qReleaseQInfo(void* pMgmt, void* pQInfo, bool needFree) { return 0; } + diff --git a/src/vnode/src/vnodeRead.c b/src/vnode/src/vnodeRead.c index 5edfcf597c..3659ef1430 100644 --- a/src/vnode/src/vnodeRead.c +++ b/src/vnode/src/vnodeRead.c @@ -166,12 +166,16 @@ static int32_t vnodeProcessQueryMsg(SVnodeObj *pVnode, SReadMsg *pReadMsg) { bool buildRes = qTableQuery(*handle); // do execute query if (buildRes) { // build result rsp - vDebug("vgId:%d, QInfo:%p, start to build result rsp after query paused", pVnode->vgId, *handle); + + SReadMsg* pRetrieveMsg = qGetResultRetrieveMsg(*handle); + assert(pRetrieveMsg != NULL); + vDebug("vgId:%d, QInfo:%p, start to build result rsp after query paused, %p", pVnode->vgId, *handle, pRetrieveMsg->rpcMsg.handle); + pReadMsg->rpcMsg.handle = pRetrieveMsg->rpcMsg.handle; // update the connection info according to the retrieve connection pRet = &pReadMsg->rspRet; + code = TSDB_CODE_QRY_HAS_RSP; bool continueExec = false; - code = TSDB_CODE_QRY_HAS_RSP; if ((code = qDumpRetrieveResult(*handle, (SRetrieveTableRsp **)&pRet->rsp, &pRet->len, &continueExec)) == TSDB_CODE_SUCCESS) { if (continueExec) { From f32a87041316315664aa87a07a70bc44c1b23793 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Mon, 27 Jul 2020 16:40:34 +0800 Subject: [PATCH 31/42] [td-225] fix bugs in rsp data using wrong connection handle --- src/vnode/src/vnodeRead.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/vnode/src/vnodeRead.c b/src/vnode/src/vnodeRead.c index 3659ef1430..bb7b7c5a80 100644 --- a/src/vnode/src/vnodeRead.c +++ b/src/vnode/src/vnodeRead.c @@ -168,7 +168,8 @@ static int32_t vnodeProcessQueryMsg(SVnodeObj *pVnode, SReadMsg *pReadMsg) { if (buildRes) { // build result rsp SReadMsg* pRetrieveMsg = qGetResultRetrieveMsg(*handle); - assert(pRetrieveMsg != NULL); + assert(pRetrieveMsg != NULL && pRetrieveMsg->rpcMsg.handle != NULL); + vDebug("vgId:%d, QInfo:%p, start to build result rsp after query paused, %p", pVnode->vgId, *handle, pRetrieveMsg->rpcMsg.handle); pReadMsg->rpcMsg.handle = pRetrieveMsg->rpcMsg.handle; // update the connection info according to the retrieve connection @@ -203,7 +204,7 @@ static int32_t vnodeProcessFetchMsg(SVnodeObj *pVnode, SReadMsg *pReadMsg) { pRetrieve->qhandle = htobe64(pRetrieve->qhandle); pRetrieve->free = htons(pRetrieve->free); - vDebug("vgId:%d, QInfo:%p, retrieve msg is disposed", pVnode->vgId, (void*) pRetrieve->qhandle); + vDebug("vgId:%d, QInfo:%p, retrieve msg is disposed, free:%d, conn:%p", pVnode->vgId, (void*) pRetrieve->qhandle, pRetrieve->free, pReadMsg->rpcMsg.handle); memset(pRet, 0, sizeof(SRspRet)); From d32eb69fdb4f600304555a70fd720519cb2d2656 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Mon, 27 Jul 2020 16:45:18 +0800 Subject: [PATCH 32/42] [td-225] fix bugs in rsp data using wrong connection handle --- src/vnode/src/vnodeRead.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/vnode/src/vnodeRead.c b/src/vnode/src/vnodeRead.c index bb7b7c5a80..49c10dca3f 100644 --- a/src/vnode/src/vnodeRead.c +++ b/src/vnode/src/vnodeRead.c @@ -167,11 +167,11 @@ static int32_t vnodeProcessQueryMsg(SVnodeObj *pVnode, SReadMsg *pReadMsg) { if (buildRes) { // build result rsp - SReadMsg* pRetrieveMsg = qGetResultRetrieveMsg(*handle); - assert(pRetrieveMsg != NULL && pRetrieveMsg->rpcMsg.handle != NULL); + void* retrieveHandle = qGetResultRetrieveMsg(*handle); + assert(retrieveHandle != NULL); - vDebug("vgId:%d, QInfo:%p, start to build result rsp after query paused, %p", pVnode->vgId, *handle, pRetrieveMsg->rpcMsg.handle); - pReadMsg->rpcMsg.handle = pRetrieveMsg->rpcMsg.handle; // update the connection info according to the retrieve connection + vDebug("vgId:%d, QInfo:%p, start to build result rsp after query paused, %p", pVnode->vgId, *handle, retrieveHandle); + pReadMsg->rpcMsg.handle = retrieveHandle; // update the connection info according to the retrieve connection pRet = &pReadMsg->rspRet; code = TSDB_CODE_QRY_HAS_RSP; @@ -250,7 +250,7 @@ static int32_t vnodeProcessFetchMsg(SVnodeObj *pVnode, SReadMsg *pReadMsg) { bool freeHandle = true; bool buildRes = false; - code = qRetrieveQueryResultInfo(*handle, &buildRes, pReadMsg); + code = qRetrieveQueryResultInfo(*handle, &buildRes, pReadMsg->rpcMsg.handle); if (code != TSDB_CODE_SUCCESS) { //TODO handle malloc failure pRet->rsp = (SRetrieveTableRsp *)rpcMallocCont(sizeof(SRetrieveTableRsp)); From ed4fb4ac0822356d601e4c764b4c8ab317333187 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Mon, 27 Jul 2020 17:02:21 +0800 Subject: [PATCH 33/42] [td-225] --- src/client/src/tscUtil.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/client/src/tscUtil.c b/src/client/src/tscUtil.c index 1ef5d3feaf..e7fa2a84a9 100644 --- a/src/client/src/tscUtil.c +++ b/src/client/src/tscUtil.c @@ -351,7 +351,7 @@ void tscPartiallyFreeSqlObj(SSqlObj* pSql) { int32_t cmd = pCmd->command; if (cmd < TSDB_SQL_INSERT || cmd == TSDB_SQL_RETRIEVE_LOCALMERGE || cmd == TSDB_SQL_RETRIEVE_EMPTY_RESULT || cmd == TSDB_SQL_TABLE_JOIN_RETRIEVE) { - //tscRemoveFromSqlList(pSql); + tscRemoveFromSqlList(pSql); } // pSql->sqlstr will be used by tscBuildQueryStreamDesc @@ -1885,7 +1885,7 @@ void tscDoQuery(SSqlObj* pSql) { } if (pCmd->command == TSDB_SQL_SELECT) { - //tscAddIntoSqlList(pSql); + tscAddIntoSqlList(pSql); } if (pCmd->dataSourceType == DATA_FROM_DATA_FILE) { From bb6cbf5fef1b1f2d032c21824ea0e284e2cf4036 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Mon, 27 Jul 2020 19:01:36 +0800 Subject: [PATCH 34/42] [td-225] --- src/client/src/tscLocalMerge.c | 8 +++++++- src/query/inc/qExtbuffer.h | 6 +++--- src/query/inc/qResultbuf.h | 27 +++++++++++++------------- src/query/src/qExecutor.c | 9 ++++----- src/query/src/qExtbuffer.c | 4 ++-- src/query/src/qPercentile.c | 2 +- src/query/src/qResultbuf.c | 29 ++++++++++++++-------------- src/query/tests/resultBufferTest.cpp | 6 +++--- src/util/src/tcache.c | 2 +- 9 files changed, 48 insertions(+), 45 deletions(-) diff --git a/src/client/src/tscLocalMerge.c b/src/client/src/tscLocalMerge.c index 80fc82d90b..bf76b8cbe8 100644 --- a/src/client/src/tscLocalMerge.c +++ b/src/client/src/tscLocalMerge.c @@ -691,9 +691,15 @@ int32_t tscLocalReducerEnvCreate(SSqlObj *pSql, tExtMemBuffer ***pMemBuffer, tOr pModel = createColumnModel(pSchema, size, capacity); + int32_t pg = DEFAULT_PAGE_SIZE; + int32_t overhead = sizeof(tFilePage); + while((pg - overhead) < pModel->rowSize * 2) { + pg *= 2; + } + size_t numOfSubs = pTableMetaInfo->vgroupList->numOfVgroups; for (int32_t i = 0; i < numOfSubs; ++i) { - (*pMemBuffer)[i] = createExtMemBuffer(nBufferSizes, rlen, pModel); + (*pMemBuffer)[i] = createExtMemBuffer(nBufferSizes, rlen, pg, pModel); (*pMemBuffer)[i]->flushModel = MULTIPLE_APPEND_MODEL; } diff --git a/src/query/inc/qExtbuffer.h b/src/query/inc/qExtbuffer.h index 2cbef2b1be..9823e7d6ce 100644 --- a/src/query/inc/qExtbuffer.h +++ b/src/query/inc/qExtbuffer.h @@ -28,9 +28,9 @@ extern "C" { #include "tdataformat.h" #include "talgo.h" -#define DEFAULT_PAGE_SIZE (1024L*64) // 16k larger than the SHistoInfo -#define MAX_TMPFILE_PATH_LENGTH PATH_MAX +#define MAX_TMPFILE_PATH_LENGTH PATH_MAX #define INITIAL_ALLOCATION_BUFFER_SIZE 64 +#define DEFAULT_PAGE_SIZE (4096L) // 16k larger than the SHistoInfo typedef enum EXT_BUFFER_FLUSH_MODEL { /* @@ -126,7 +126,7 @@ typedef struct tExtMemBuffer { * @param pModel * @return */ -tExtMemBuffer *createExtMemBuffer(int32_t inMemSize, int32_t elemSize, SColumnModel *pModel); +tExtMemBuffer *createExtMemBuffer(int32_t inMemSize, int32_t elemSize, int32_t pagesize, SColumnModel *pModel); /** * diff --git a/src/query/inc/qResultbuf.h b/src/query/inc/qResultbuf.h index ba446b4627..ac907cfee6 100644 --- a/src/query/inc/qResultbuf.h +++ b/src/query/inc/qResultbuf.h @@ -52,7 +52,6 @@ typedef struct SResultBufStatis { int32_t getPages; int32_t releasePages; int32_t flushPages; - int32_t fileSize; } SResultBufStatis; typedef struct SDiskbasedResultBuf { @@ -68,29 +67,31 @@ typedef struct SDiskbasedResultBuf { SHashObj* groupSet; // id hash table SHashObj* all; SList* lruList; - void* handle; // for debug purpose void* emptyDummyIdList; // dummy id list - bool comp; // compressed before flushed to disk - void* assistBuf; // assistant buffer for compress data + void* assistBuf; // assistant buffer for compress/decompress data SArray* pFree; // free area in file + bool comp; // compressed before flushed to disk int32_t nextPos; // next page flush position + const void* handle; // for debug purpose SResultBufStatis statis; } SDiskbasedResultBuf; -#define DEFAULT_INTERN_BUF_PAGE_SIZE (1024L) +#define DEFAULT_INTERN_BUF_PAGE_SIZE (4096L) #define DEFAULT_INMEM_BUF_PAGES 10 #define PAGE_INFO_INITIALIZER (SPageDiskInfo){-1, -1} /** * create disk-based result buffer * @param pResultBuf - * @param size * @param rowSize + * @param pagesize + * @param inMemPages + * @param handle * @return */ -int32_t createDiskbasedResultBuffer(SDiskbasedResultBuf** pResultBuf, int32_t numOfPages, int32_t rowSize, int32_t pagesize, - int32_t inMemPages, const void* handle); +int32_t createDiskbasedResultBuffer(SDiskbasedResultBuf** pResultBuf, int32_t rowSize, int32_t pagesize, + int32_t inMemBufSize, const void* handle); /** * @@ -131,15 +132,13 @@ tFilePage* getResBufPage(SDiskbasedResultBuf* pResultBuf, int32_t id); */ void releaseResBufPage(SDiskbasedResultBuf* pResultBuf, void* page); -void releaseResBufPageInfo(SDiskbasedResultBuf* pResultBuf, SPageInfo* pi); - /** * * @param pResultBuf - * @param id - * @return + * @param pi */ -//tFilePage* getResBufPage(SDiskbasedResultBuf* pResultBuf, int32_t id); +void releaseResBufPageInfo(SDiskbasedResultBuf* pResultBuf, SPageInfo* pi); + /** * get the total buffer size in the format of disk file @@ -159,7 +158,7 @@ size_t getNumOfResultBufGroupId(const SDiskbasedResultBuf* pResultBuf); * destroy result buffer * @param pResultBuf */ -void destroyResultBuf(SDiskbasedResultBuf* pResultBuf, void* handle); +void destroyResultBuf(SDiskbasedResultBuf* pResultBuf); /** * diff --git a/src/query/src/qExecutor.c b/src/query/src/qExecutor.c index 9d873dc95f..ee24365d87 100644 --- a/src/query/src/qExecutor.c +++ b/src/query/src/qExecutor.c @@ -1621,7 +1621,7 @@ static void teardownQueryRuntimeEnv(SQueryRuntimeEnv *pRuntimeEnv) { pRuntimeEnv->pFillInfo = taosDestoryFillInfo(pRuntimeEnv->pFillInfo); - destroyResultBuf(pRuntimeEnv->pResultBuf, pQInfo); + destroyResultBuf(pRuntimeEnv->pResultBuf); tsdbCleanupQueryHandle(pRuntimeEnv->pQueryHandle); tsdbCleanupQueryHandle(pRuntimeEnv->pSecQueryHandle); @@ -4242,10 +4242,10 @@ int32_t doInitQInfo(SQInfo *pQInfo, STSBuf *pTsBuf, void *tsdb, int32_t vgId, bo int32_t ps = DEFAULT_PAGE_SIZE; int32_t rowsize = 0; getIntermediateBufInfo(pRuntimeEnv, &ps, &rowsize); + int32_t TWOMB = 1024*1024*2; if (isSTableQuery && !onlyQueryTags(pRuntimeEnv->pQuery)) { -// int32_t numOfPages = getInitialPageNum(pQInfo); - code = createDiskbasedResultBuffer(&pRuntimeEnv->pResultBuf, 2, rowsize, ps, 2, pQInfo); + code = createDiskbasedResultBuffer(&pRuntimeEnv->pResultBuf, rowsize, ps, TWOMB, pQInfo); if (code != TSDB_CODE_SUCCESS) { return code; } @@ -4273,8 +4273,7 @@ int32_t doInitQInfo(SQInfo *pQInfo, STSBuf *pTsBuf, void *tsdb, int32_t vgId, bo } else if (pRuntimeEnv->groupbyNormalCol || QUERY_IS_INTERVAL_QUERY(pQuery)) { int32_t numOfResultRows = getInitialPageNum(pQInfo); getIntermediateBufInfo(pRuntimeEnv, &ps, &rowsize); - - code = createDiskbasedResultBuffer(&pRuntimeEnv->pResultBuf, numOfResultRows, rowsize, ps, numOfResultRows, pQInfo); + code = createDiskbasedResultBuffer(&pRuntimeEnv->pResultBuf, rowsize, ps, TWOMB, pQInfo); if (code != TSDB_CODE_SUCCESS) { return code; } diff --git a/src/query/src/qExtbuffer.c b/src/query/src/qExtbuffer.c index 69c5f0e24f..fb57f71199 100644 --- a/src/query/src/qExtbuffer.c +++ b/src/query/src/qExtbuffer.c @@ -28,10 +28,10 @@ /* * SColumnModel is deeply copy */ -tExtMemBuffer* createExtMemBuffer(int32_t inMemSize, int32_t elemSize, SColumnModel *pModel) { +tExtMemBuffer* createExtMemBuffer(int32_t inMemSize, int32_t elemSize, int32_t pagesize, SColumnModel *pModel) { tExtMemBuffer* pMemBuffer = (tExtMemBuffer *)calloc(1, sizeof(tExtMemBuffer)); - pMemBuffer->pageSize = DEFAULT_PAGE_SIZE; + pMemBuffer->pageSize = pagesize; pMemBuffer->inMemCapacity = ALIGN8(inMemSize) / pMemBuffer->pageSize; pMemBuffer->nElemSize = elemSize; diff --git a/src/query/src/qPercentile.c b/src/query/src/qPercentile.c index c4490a01e7..85e45e46b3 100644 --- a/src/query/src/qPercentile.c +++ b/src/query/src/qPercentile.c @@ -535,7 +535,7 @@ void tMemBucketPut(tMemBucket *pBucket, void *data, int32_t numOfRows) { if (pSeg->pBuffer[slotIdx] == NULL) { pSeg->pBuffer[slotIdx] = createExtMemBuffer(pBucket->numOfTotalPages * pBucket->pageSize, pBucket->nElemSize, - pBucket->pOrderDesc->pColumnModel); + pBucket->pageSize, pBucket->pOrderDesc->pColumnModel); pSeg->pBuffer[slotIdx]->flushModel = SINGLE_APPEND_MODEL; pBucket->pOrderDesc->pColumnModel->capacity = pSeg->pBuffer[slotIdx]->numOfElemsPerPage; } diff --git a/src/query/src/qResultbuf.c b/src/query/src/qResultbuf.c index fbb5f116e6..33ae93f434 100644 --- a/src/query/src/qResultbuf.c +++ b/src/query/src/qResultbuf.c @@ -8,31 +8,33 @@ #define GET_DATA_PAYLOAD(_p) ((_p)->pData + POINTER_BYTES) -int32_t createDiskbasedResultBuffer(SDiskbasedResultBuf** pResultBuf, int32_t numOfPages, int32_t rowSize, - int32_t pagesize, int32_t inMemPages, const void* handle) { - +int32_t createDiskbasedResultBuffer(SDiskbasedResultBuf** pResultBuf, int32_t rowSize, int32_t pagesize, + int32_t inMemBufSize, const void* handle) { *pResultBuf = calloc(1, sizeof(SDiskbasedResultBuf)); + SDiskbasedResultBuf* pResBuf = *pResultBuf; if (pResBuf == NULL) { return TSDB_CODE_COM_OUT_OF_MEMORY; } pResBuf->pageSize = pagesize; - pResBuf->numOfPages = 0; // all pages are in buffer in the first place - pResBuf->inMemPages = inMemPages; + pResBuf->numOfPages = 0; // all pages are in buffer in the first place + pResBuf->inMemPages = inMemBufSize/pagesize; // maximum allowed pages, it is a soft limit. pResBuf->totalBufSize = pResBuf->numOfPages * pagesize; pResBuf->allocateId = -1; pResBuf->comp = true; + pResBuf->handle = handle; - assert(inMemPages <= numOfPages); + // at least more than 2 pages must be in memory + assert(inMemBufSize >= pagesize * 2); pResBuf->numOfRowsPerPage = (pagesize - sizeof(tFilePage)) / rowSize; pResBuf->lruList = tdListNew(POINTER_BYTES); // init id hash table - pResBuf->groupSet = taosHashInit(10, taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), false); - pResBuf->all = taosHashInit(10, taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), false); + pResBuf->groupSet = taosHashInit(10, taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), false); pResBuf->assistBuf = malloc(pResBuf->pageSize + 2); // EXTRA BYTES + pResBuf->all = taosHashInit(10, taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), false); char path[PATH_MAX] = {0}; getTmpfilePath("qbuf", path); @@ -47,9 +49,6 @@ int32_t createDiskbasedResultBuffer(SDiskbasedResultBuf** pResultBuf, int32_t nu return TSDB_CODE_SUCCESS; } -#define NUM_OF_PAGES_ON_DISK(_r) ((_r)->numOfPages - (_r)->inMemPages) -#define FILE_SIZE_ON_DISK(_r) (NUM_OF_PAGES_ON_DISK(_r) * (_r)->pageSize) - static int32_t createDiskFile(SDiskbasedResultBuf* pResultBuf) { pResultBuf->file = fopen(pResultBuf->path, "wb+"); if (pResultBuf->file == NULL) { @@ -384,18 +383,18 @@ SIDList getDataBufPagesIdList(SDiskbasedResultBuf* pResultBuf, int32_t groupId) } } -void destroyResultBuf(SDiskbasedResultBuf* pResultBuf, void* handle) { +void destroyResultBuf(SDiskbasedResultBuf* pResultBuf) { if (pResultBuf == NULL) { return; } if (pResultBuf->file != NULL) { - qDebug("QInfo:%p disk-based output buffer closed, total:%" PRId64 " bytes, file created:%s, file size:%d", handle, - pResultBuf->totalBufSize, pResultBuf->path, FILE_SIZE_ON_DISK(pResultBuf)); + qDebug("QInfo:%p disk-based output buffer closed, total:%" PRId64 " bytes, file created:%s, file size:%"PRId64, pResultBuf->handle, + pResultBuf->totalBufSize, pResultBuf->path, pResultBuf->diskFileSize); fclose(pResultBuf->file); } else { - qDebug("QInfo:%p disk-based output buffer closed, total:%" PRId64 " bytes, no file created", handle, + qDebug("QInfo:%p disk-based output buffer closed, total:%" PRId64 " bytes, no file created", pResultBuf->handle, pResultBuf->totalBufSize); } diff --git a/src/query/tests/resultBufferTest.cpp b/src/query/tests/resultBufferTest.cpp index 3171a7b322..3b74bf1b64 100644 --- a/src/query/tests/resultBufferTest.cpp +++ b/src/query/tests/resultBufferTest.cpp @@ -47,7 +47,7 @@ void simpleTest() { tFilePage* t4 = getResBufPage(pResultBuf, pageId); ASSERT_TRUE(t4 == pBufPage5); - destroyResultBuf(pResultBuf, NULL); + destroyResultBuf(pResultBuf); } void writeDownTest() { @@ -94,7 +94,7 @@ void writeDownTest() { SArray* pa = getDataBufPagesIdList(pResultBuf, groupId); ASSERT_EQ(taosArrayGetSize(pa), 5); - destroyResultBuf(pResultBuf, NULL); + destroyResultBuf(pResultBuf); } void recyclePageTest() { @@ -148,7 +148,7 @@ void recyclePageTest() { SArray* pa = getDataBufPagesIdList(pResultBuf, groupId); ASSERT_EQ(taosArrayGetSize(pa), 6); - destroyResultBuf(pResultBuf, NULL); + destroyResultBuf(pResultBuf); } } // namespace diff --git a/src/util/src/tcache.c b/src/util/src/tcache.c index 7c2a6b3219..4058bf1672 100644 --- a/src/util/src/tcache.c +++ b/src/util/src/tcache.c @@ -381,7 +381,7 @@ void *taosCacheAcquireByData(SCacheObj *pCacheObj, void *data) { } void *taosCacheTransfer(SCacheObj *pCacheObj, void **data) { - if (pCacheObj == NULL || data == NULL || *data == NULL) return NULL; + if (pCacheObj == NULL || data == NULL || (*data) == NULL) return NULL; size_t offset = offsetof(SCacheDataNode, data); SCacheDataNode *ptNode = (SCacheDataNode *)((char *)(*data) - offset); From fe05941584cc6c5973a3cb0c55c5f85bd148802c Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Tue, 28 Jul 2020 10:51:21 +0800 Subject: [PATCH 35/42] [td-255] --- src/query/src/qExecutor.c | 5 +--- src/util/src/tcache.c | 3 +- src/vnode/src/vnodeRead.c | 58 ++++++++++++++++++--------------------- 3 files changed, 30 insertions(+), 36 deletions(-) diff --git a/src/query/src/qExecutor.c b/src/query/src/qExecutor.c index cc4fab343d..bca07e7150 100644 --- a/src/query/src/qExecutor.c +++ b/src/query/src/qExecutor.c @@ -6372,16 +6372,14 @@ int32_t qRetrieveQueryResultInfo(qinfo_t qinfo, bool* buildRes, void* pRspContex return pQInfo->code; } - *buildRes = false; int32_t code = TSDB_CODE_SUCCESS; - pthread_mutex_lock(&pQInfo->lock); if (pQInfo->dataReady == QUERY_RESULT_READY) { *buildRes = true; - qDebug("QInfo:%p retrieve result info, rowsize:%d, rows:%"PRId64", code:%d", pQInfo, pQuery->rowSize, pQuery->rec.rows, pQInfo->code); } else { + *buildRes = false; qDebug("QInfo:%p retrieve req set query return result after paused", pQInfo); pQInfo->rspContext = pRspContext; } @@ -6473,7 +6471,6 @@ int32_t qDumpRetrieveResult(qinfo_t qinfo, SRetrieveTableRsp **pRsp, int32_t *co } else { // failed to dump result, free qhandle immediately *continueExec = false; qKillQuery(pQInfo); - qDestroyQueryInfo(pQInfo); } return code; diff --git a/src/util/src/tcache.c b/src/util/src/tcache.c index 0d295a0cfa..a086a87b4b 100644 --- a/src/util/src/tcache.c +++ b/src/util/src/tcache.c @@ -419,7 +419,7 @@ void taosCacheRelease(SCacheObj *pCacheObj, void **data, bool _remove) { // note: extend lifespan before dec ref count bool inTrashCan = pNode->inTrashCan; - if (pCacheObj->extendLifespan && (!inTrashCan)) { + if (pCacheObj->extendLifespan && (!inTrashCan) && (!_remove)) { atomic_store_64(&pNode->expireTime, pNode->lifespan + taosGetTimestampMs()); uDebug("cache:%s data:%p extend life time to %"PRId64 " before release", pCacheObj->name, pNode->data, pNode->expireTime); } @@ -643,6 +643,7 @@ static void doCacheRefresh(SCacheObj* pCacheObj, int64_t time, __cache_free_fn_t __cache_wr_lock(pCacheObj); while (taosHashIterNext(pIter)) { SCacheDataNode *pNode = *(SCacheDataNode **)taosHashIterGet(pIter); + if (pNode->expireTime < time && T_REF_VAL_GET(pNode) <= 0) { taosCacheReleaseNode(pCacheObj, pNode); continue; diff --git a/src/vnode/src/vnodeRead.c b/src/vnode/src/vnodeRead.c index 49c10dca3f..2a4ca0e663 100644 --- a/src/vnode/src/vnodeRead.c +++ b/src/vnode/src/vnodeRead.c @@ -66,12 +66,12 @@ int32_t vnodeProcessRead(void *param, SReadMsg *pReadMsg) { return (*vnodeProcessReadMsgFp[msgType])(pVnode, pReadMsg); } -static void vnodePutItemIntoReadQueue(SVnodeObj *pVnode, void *qhandle, void* handle) { +static void vnodePutItemIntoReadQueue(SVnodeObj *pVnode, void *qhandle) { SReadMsg *pRead = (SReadMsg *)taosAllocateQitem(sizeof(SReadMsg)); pRead->rpcMsg.msgType = TSDB_MSG_TYPE_QUERY; pRead->pCont = qhandle; pRead->contLen = 0; - pRead->rpcMsg.handle = handle; + pRead->rpcMsg.handle = NULL; atomic_add_fetch_32(&pVnode->refCount, 1); taosWriteQitem(pVnode->rqueue, TAOS_QTYPE_QUERY, pRead); @@ -99,6 +99,7 @@ static int32_t vnodeProcessQueryMsg(SVnodeObj *pVnode, SReadMsg *pReadMsg) { vWarn("QInfo:%p invalid qhandle, no matched query handle, conn:%p", (void*) killQueryMsg->qhandle, pReadMsg->rpcMsg.handle); } else { assert(*qhandle == (void*) killQueryMsg->qhandle); + qKillQuery(*qhandle); qReleaseQInfo(pVnode->qMgmt, (void**) &qhandle, true); } @@ -123,8 +124,7 @@ static int32_t vnodeProcessQueryMsg(SVnodeObj *pVnode, SReadMsg *pReadMsg) { // current connect is broken if (code == TSDB_CODE_SUCCESS) { -// handle = qRegisterQInfo(pVnode->qMgmt, (uint64_t) pQInfo); - handle = &pQInfo; + handle = qRegisterQInfo(pVnode->qMgmt, (uint64_t) pQInfo); if (handle == NULL) { // failed to register qhandle vError("vgId:%d QInfo:%p register qhandle failed, return to app, code:%s", pVnode->vgId, (void *)pQInfo, tstrerror(pRsp->code)); @@ -135,11 +135,10 @@ static int32_t vnodeProcessQueryMsg(SVnodeObj *pVnode, SReadMsg *pReadMsg) { pRsp->qhandle = htobe64((uint64_t) pQInfo); } -// pQInfo = NULL; if (handle != NULL && vnodeNotifyCurrentQhandle(pReadMsg->rpcMsg.handle, *handle, pVnode->vgId) != TSDB_CODE_SUCCESS) { vError("vgId:%d, QInfo:%p, query discarded since link is broken, %p", pVnode->vgId, *handle, pReadMsg->rpcMsg.handle); pRsp->code = TSDB_CODE_RPC_NETWORK_UNAVAIL; -// qReleaseQInfo(pVnode->qMgmt, (void**) &handle, true); + qReleaseQInfo(pVnode->qMgmt, (void**) &handle, true); return pRsp->code; } } else { @@ -149,15 +148,14 @@ static int32_t vnodeProcessQueryMsg(SVnodeObj *pVnode, SReadMsg *pReadMsg) { if (handle != NULL) { vDebug("vgId:%d, QInfo:%p, dnode query msg disposed, register qhandle and return to app", vgId, *handle); - vnodePutItemIntoReadQueue(pVnode, *handle, pReadMsg->rpcMsg.handle); -// qReleaseQInfo(pVnode->qMgmt, (void**) &handle, false); + vnodePutItemIntoReadQueue(pVnode, *handle); + qReleaseQInfo(pVnode->qMgmt, (void**) &handle, false); } } else { assert(pCont != NULL); - void* p = (void*) pCont; - handle = &p; -// handle = qAcquireQInfo(pVnode->qMgmt, (uint64_t) pCont); + + handle = qAcquireQInfo(pVnode->qMgmt, (uint64_t) pCont); if (handle == NULL) { vWarn("QInfo:%p invalid qhandle in continuing exec query, conn:%p", (void*) pCont, pReadMsg->rpcMsg.handle); code = TSDB_CODE_QRY_INVALID_QHANDLE; @@ -166,23 +164,25 @@ static int32_t vnodeProcessQueryMsg(SVnodeObj *pVnode, SReadMsg *pReadMsg) { bool buildRes = qTableQuery(*handle); // do execute query if (buildRes) { // build result rsp + // update the connection info according to the retrieve connection + pReadMsg->rpcMsg.handle = qGetResultRetrieveMsg(*handle); + assert(pReadMsg->rpcMsg.handle != NULL); - void* retrieveHandle = qGetResultRetrieveMsg(*handle); - assert(retrieveHandle != NULL); - - vDebug("vgId:%d, QInfo:%p, start to build result rsp after query paused, %p", pVnode->vgId, *handle, retrieveHandle); - pReadMsg->rpcMsg.handle = retrieveHandle; // update the connection info according to the retrieve connection + vDebug("vgId:%d, QInfo:%p, start to build result rsp after query paused, %p", pVnode->vgId, *handle, pReadMsg->rpcMsg.handle); pRet = &pReadMsg->rspRet; - code = TSDB_CODE_QRY_HAS_RSP; +// code = TSDB_CODE_QRY_HAS_RSP; bool continueExec = false; if ((code = qDumpRetrieveResult(*handle, (SRetrieveTableRsp **)&pRet->rsp, &pRet->len, &continueExec)) == TSDB_CODE_SUCCESS) { if (continueExec) { - vnodePutItemIntoReadQueue(pVnode, *handle, pReadMsg->rpcMsg.handle); + vTrace("QInfo:%p add to queue for further exec", *handle); + vnodePutItemIntoReadQueue(pVnode, *handle); pRet->qhandle = *handle; - code = TSDB_CODE_SUCCESS; +// code = TSDB_CODE_SUCCESS; + } else { + vDebug("QInfo:%p query completed", *handle); } } else { // todo handle error } @@ -190,7 +190,8 @@ static int32_t vnodeProcessQueryMsg(SVnodeObj *pVnode, SReadMsg *pReadMsg) { code = TSDB_CODE_QRY_HAS_RSP; } } -// qReleaseQInfo(pVnode->qMgmt, (void**) &handle, false); + + qReleaseQInfo(pVnode->qMgmt, (void**) &handle, false); } return code; @@ -209,11 +210,7 @@ static int32_t vnodeProcessFetchMsg(SVnodeObj *pVnode, SReadMsg *pReadMsg) { memset(pRet, 0, sizeof(SRspRet)); int32_t code = TSDB_CODE_SUCCESS; - void** handle = NULL; - void* p1 = (void*) pRetrieve->qhandle; - handle = &p1; - -// void** handle = qAcquireQInfo(pVnode->qMgmt, pRetrieve->qhandle); + void** handle = qAcquireQInfo(pVnode->qMgmt, pRetrieve->qhandle); if (handle == NULL || (*handle) != (void*) pRetrieve->qhandle) { code = TSDB_CODE_QRY_INVALID_QHANDLE; vDebug("vgId:%d, invalid qhandle in fetch result, QInfo:%p", pVnode->vgId, (void*) pRetrieve->qhandle); @@ -233,7 +230,7 @@ static int32_t vnodeProcessFetchMsg(SVnodeObj *pVnode, SReadMsg *pReadMsg) { if (pRetrieve->free == 1) { vDebug("vgId:%d, QInfo:%p, retrieve msg received to kill query and free qhandle", pVnode->vgId, *handle); qKillQuery(*handle); -// qReleaseQInfo(pVnode->qMgmt, (void**) &handle, true); + qReleaseQInfo(pVnode->qMgmt, (void**) &handle, true); pRet->rsp = (SRetrieveTableRsp *)rpcMallocCont(sizeof(SRetrieveTableRsp)); pRet->len = sizeof(SRetrieveTableRsp); @@ -255,16 +252,16 @@ static int32_t vnodeProcessFetchMsg(SVnodeObj *pVnode, SReadMsg *pReadMsg) { //TODO handle malloc failure pRet->rsp = (SRetrieveTableRsp *)rpcMallocCont(sizeof(SRetrieveTableRsp)); memset(pRet->rsp, 0, sizeof(SRetrieveTableRsp)); - } else { - // result is not ready, return immediately + } else { // result is not ready, return immediately if (!buildRes) { + qReleaseQInfo(pVnode->qMgmt, (void**) &handle, false); return TSDB_CODE_QRY_NOT_READY; } bool continueExec = false; if ((code = qDumpRetrieveResult(*handle, (SRetrieveTableRsp **)&pRet->rsp, &pRet->len, &continueExec)) == TSDB_CODE_SUCCESS) { if (continueExec) { - vnodePutItemIntoReadQueue(pVnode, *handle, pReadMsg->rpcMsg.handle); + vnodePutItemIntoReadQueue(pVnode, *handle); pRet->qhandle = *handle; freeHandle = false; } @@ -274,8 +271,7 @@ static int32_t vnodeProcessFetchMsg(SVnodeObj *pVnode, SReadMsg *pReadMsg) { } } - UNUSED(freeHandle); -// qReleaseQInfo(pVnode->qMgmt, (void**) &handle, freeHandle); + qReleaseQInfo(pVnode->qMgmt, (void**) &handle, freeHandle); return code; } From 11a649240627c59ab8e8e978db55ea74205921c1 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Tue, 28 Jul 2020 11:42:52 +0800 Subject: [PATCH 36/42] [td-255] --- src/dnode/src/dnodeVRead.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/dnode/src/dnodeVRead.c b/src/dnode/src/dnodeVRead.c index 85ed194976..506cc0895b 100644 --- a/src/dnode/src/dnodeVRead.c +++ b/src/dnode/src/dnodeVRead.c @@ -49,7 +49,7 @@ static taos_qset readQset; int32_t dnodeInitVnodeRead() { readQset = taosOpenQset(); - readPool.min = 4; + readPool.min = 32; readPool.max = tsNumOfCores * tsNumOfThreadsPerCore; if (readPool.max <= readPool.min * 2) readPool.max = 2 * readPool.min; readPool.readWorker = (SReadWorker *)calloc(sizeof(SReadWorker), readPool.max); From 534a0a32cddfb7204dcd8843da5f35783617f790 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Tue, 28 Jul 2020 11:49:50 +0800 Subject: [PATCH 37/42] [td-255] change the min worker threads --- src/dnode/src/dnodeVRead.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/dnode/src/dnodeVRead.c b/src/dnode/src/dnodeVRead.c index 506cc0895b..8064f4d496 100644 --- a/src/dnode/src/dnodeVRead.c +++ b/src/dnode/src/dnodeVRead.c @@ -49,7 +49,7 @@ static taos_qset readQset; int32_t dnodeInitVnodeRead() { readQset = taosOpenQset(); - readPool.min = 32; + readPool.min = tsNumOfCores / 2; readPool.max = tsNumOfCores * tsNumOfThreadsPerCore; if (readPool.max <= readPool.min * 2) readPool.max = 2 * readPool.min; readPool.readWorker = (SReadWorker *)calloc(sizeof(SReadWorker), readPool.max); From 9d2d061c15b5a0fcb2259a876565c30ad5fa85ab Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Tue, 28 Jul 2020 11:53:02 +0800 Subject: [PATCH 38/42] [td-255] change the min worker threads --- src/dnode/src/dnodeVRead.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/dnode/src/dnodeVRead.c b/src/dnode/src/dnodeVRead.c index 8064f4d496..75e5ff7b6b 100644 --- a/src/dnode/src/dnodeVRead.c +++ b/src/dnode/src/dnodeVRead.c @@ -49,7 +49,7 @@ static taos_qset readQset; int32_t dnodeInitVnodeRead() { readQset = taosOpenQset(); - readPool.min = tsNumOfCores / 2; + readPool.min = (tsNumOfCores * 2) / 3; readPool.max = tsNumOfCores * tsNumOfThreadsPerCore; if (readPool.max <= readPool.min * 2) readPool.max = 2 * readPool.min; readPool.readWorker = (SReadWorker *)calloc(sizeof(SReadWorker), readPool.max); From 228ea8ae9c7021c5bb110af3ac1d7fa04871d7f6 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Tue, 28 Jul 2020 11:54:59 +0800 Subject: [PATCH 39/42] [td-255] change the min worker threads --- src/dnode/src/dnodeVRead.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/dnode/src/dnodeVRead.c b/src/dnode/src/dnodeVRead.c index 75e5ff7b6b..cb53bb5e60 100644 --- a/src/dnode/src/dnodeVRead.c +++ b/src/dnode/src/dnodeVRead.c @@ -49,7 +49,7 @@ static taos_qset readQset; int32_t dnodeInitVnodeRead() { readQset = taosOpenQset(); - readPool.min = (tsNumOfCores * 2) / 3; + readPool.min = tsNumOfCores; readPool.max = tsNumOfCores * tsNumOfThreadsPerCore; if (readPool.max <= readPool.min * 2) readPool.max = 2 * readPool.min; readPool.readWorker = (SReadWorker *)calloc(sizeof(SReadWorker), readPool.max); From 5d6079e3ded2f3ed06dec2f4ee9af799d8c34ca6 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Tue, 28 Jul 2020 13:50:08 +0800 Subject: [PATCH 40/42] [td-255] refactor codes. --- src/query/src/qExecutor.c | 1 - src/vnode/src/vnodeRead.c | 97 +++++++++++++++++++-------------------- 2 files changed, 47 insertions(+), 51 deletions(-) diff --git a/src/query/src/qExecutor.c b/src/query/src/qExecutor.c index bca07e7150..5aa453f30e 100644 --- a/src/query/src/qExecutor.c +++ b/src/query/src/qExecutor.c @@ -6356,7 +6356,6 @@ bool qTableQuery(qinfo_t qinfo) { pthread_mutex_unlock(&pQInfo->lock); return buildRes; -// sem_post(&pQInfo->dataReady); } int32_t qRetrieveQueryResultInfo(qinfo_t qinfo, bool* buildRes, void* pRspContext) { diff --git a/src/vnode/src/vnodeRead.c b/src/vnode/src/vnodeRead.c index 2a4ca0e663..066770e1bb 100644 --- a/src/vnode/src/vnodeRead.c +++ b/src/vnode/src/vnodeRead.c @@ -77,6 +77,39 @@ static void vnodePutItemIntoReadQueue(SVnodeObj *pVnode, void *qhandle) { taosWriteQitem(pVnode->rqueue, TAOS_QTYPE_QUERY, pRead); } +static int32_t vnodeDumpQueryResult(SRspRet *pRet, void* pVnode, void* handle, bool* freeHandle) { + bool continueExec = false; + + int32_t code = TSDB_CODE_SUCCESS; + if ((code = qDumpRetrieveResult(handle, (SRetrieveTableRsp **)&pRet->rsp, &pRet->len, &continueExec)) == TSDB_CODE_SUCCESS) { + if (continueExec) { + vDebug("QInfo:%p add to query task queue for exec", handle); + vnodePutItemIntoReadQueue(pVnode, handle); + pRet->qhandle = handle; + *freeHandle = false; + } else { + vDebug("QInfo:%p exec completed", handle); + *freeHandle = true; + } + } else { + pRet->rsp = (SRetrieveTableRsp *)rpcMallocCont(sizeof(SRetrieveTableRsp)); + memset(pRet->rsp, 0, sizeof(SRetrieveTableRsp)); + *freeHandle = true; + } + + return code; +} + +static void vnodeBuildNoResultQueryRsp(SRspRet* pRet) { + pRet->rsp = (SRetrieveTableRsp *)rpcMallocCont(sizeof(SRetrieveTableRsp)); + pRet->len = sizeof(SRetrieveTableRsp); + + memset(pRet->rsp, 0, sizeof(SRetrieveTableRsp)); + SRetrieveTableRsp* pRsp = pRet->rsp; + + pRsp->completed = true; +} + static int32_t vnodeProcessQueryMsg(SVnodeObj *pVnode, SReadMsg *pReadMsg) { void *pCont = pReadMsg->pCont; int32_t contLen = pReadMsg->contLen; @@ -161,37 +194,27 @@ static int32_t vnodeProcessQueryMsg(SVnodeObj *pVnode, SReadMsg *pReadMsg) { code = TSDB_CODE_QRY_INVALID_QHANDLE; } else { vDebug("vgId:%d, QInfo:%p, dnode continue exec query", pVnode->vgId, (void*) pCont); + + bool freehandle = false; bool buildRes = qTableQuery(*handle); // do execute query - if (buildRes) { // build result rsp + // build query rsp + if (buildRes) { // update the connection info according to the retrieve connection pReadMsg->rpcMsg.handle = qGetResultRetrieveMsg(*handle); assert(pReadMsg->rpcMsg.handle != NULL); vDebug("vgId:%d, QInfo:%p, start to build result rsp after query paused, %p", pVnode->vgId, *handle, pReadMsg->rpcMsg.handle); + code = vnodeDumpQueryResult(&pReadMsg->rspRet, pVnode, *handle, &freehandle); - pRet = &pReadMsg->rspRet; -// code = TSDB_CODE_QRY_HAS_RSP; - - bool continueExec = false; - if ((code = qDumpRetrieveResult(*handle, (SRetrieveTableRsp **)&pRet->rsp, &pRet->len, &continueExec)) == TSDB_CODE_SUCCESS) { - - if (continueExec) { - vTrace("QInfo:%p add to queue for further exec", *handle); - vnodePutItemIntoReadQueue(pVnode, *handle); - pRet->qhandle = *handle; -// code = TSDB_CODE_SUCCESS; - } else { - vDebug("QInfo:%p query completed", *handle); - } - } else { // todo handle error + // todo test the error code case + if (code == TSDB_CODE_SUCCESS) { + code = TSDB_CODE_QRY_HAS_RSP; } - - code = TSDB_CODE_QRY_HAS_RSP; } - } - qReleaseQInfo(pVnode->qMgmt, (void**) &handle, false); + qReleaseQInfo(pVnode->qMgmt, (void**) &handle, freehandle); + } } return code; @@ -214,16 +237,8 @@ static int32_t vnodeProcessFetchMsg(SVnodeObj *pVnode, SReadMsg *pReadMsg) { if (handle == NULL || (*handle) != (void*) pRetrieve->qhandle) { code = TSDB_CODE_QRY_INVALID_QHANDLE; vDebug("vgId:%d, invalid qhandle in fetch result, QInfo:%p", pVnode->vgId, (void*) pRetrieve->qhandle); - - pRet->rsp = (SRetrieveTableRsp *)rpcMallocCont(sizeof(SRetrieveTableRsp)); - pRet->len = sizeof(SRetrieveTableRsp); - - memset(pRet->rsp, 0, sizeof(SRetrieveTableRsp)); - SRetrieveTableRsp* pRsp = pRet->rsp; - pRsp->numOfRows = 0; - pRsp->useconds = 0; - pRsp->completed = true; - + + vnodeBuildNoResultQueryRsp(pRet); return code; } @@ -232,15 +247,7 @@ static int32_t vnodeProcessFetchMsg(SVnodeObj *pVnode, SReadMsg *pReadMsg) { qKillQuery(*handle); qReleaseQInfo(pVnode->qMgmt, (void**) &handle, true); - pRet->rsp = (SRetrieveTableRsp *)rpcMallocCont(sizeof(SRetrieveTableRsp)); - pRet->len = sizeof(SRetrieveTableRsp); - - memset(pRet->rsp, 0, sizeof(SRetrieveTableRsp)); - SRetrieveTableRsp* pRsp = pRet->rsp; - pRsp->numOfRows = 0; - pRsp->completed = true; - pRsp->useconds = 0; - + vnodeBuildNoResultQueryRsp(pRet); return code; } @@ -258,17 +265,7 @@ static int32_t vnodeProcessFetchMsg(SVnodeObj *pVnode, SReadMsg *pReadMsg) { return TSDB_CODE_QRY_NOT_READY; } - bool continueExec = false; - if ((code = qDumpRetrieveResult(*handle, (SRetrieveTableRsp **)&pRet->rsp, &pRet->len, &continueExec)) == TSDB_CODE_SUCCESS) { - if (continueExec) { - vnodePutItemIntoReadQueue(pVnode, *handle); - pRet->qhandle = *handle; - freeHandle = false; - } - } else { - pRet->rsp = (SRetrieveTableRsp *)rpcMallocCont(sizeof(SRetrieveTableRsp)); - memset(pRet->rsp, 0, sizeof(SRetrieveTableRsp)); - } + code = vnodeDumpQueryResult(pRet, pVnode, *handle, &freeHandle); } qReleaseQInfo(pVnode->qMgmt, (void**) &handle, freeHandle); From 5b248a9808c618c1ab46453e0208ace4418428ed Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Tue, 28 Jul 2020 14:24:12 +0800 Subject: [PATCH 41/42] [td-255] refactor codes. --- src/util/src/tcache.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/util/src/tcache.c b/src/util/src/tcache.c index a086a87b4b..ca0fdc36be 100644 --- a/src/util/src/tcache.c +++ b/src/util/src/tcache.c @@ -457,8 +457,9 @@ void taosCacheRelease(SCacheObj *pCacheObj, void **data, bool _remove) { } else { // NOTE: once refcount is decrease, pNode may be freed by other thread immediately. int32_t ref = T_REF_DEC(pNode); - uDebug("cache:%s, key:%p, %p is released, refcnt:%d, in trashcan:%d", pCacheObj->name, pNode->key, pNode->data, ref, - inTrashCan); + + uDebug("cache:%s, key:%p, %p released, refcnt:%d, data in trancan:%d", pCacheObj->name, pNode->key, pNode->data, + ref, inTrashCan); } } From 71b8850cce28900db8d34de415fa035e1f76bc97 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Tue, 28 Jul 2020 14:36:05 +0800 Subject: [PATCH 42/42] [td-255] refactor codes. --- src/query/src/qExecutor.c | 2 +- src/util/inc/tcache.h | 2 -- src/util/src/tcache.c | 1 + 3 files changed, 2 insertions(+), 3 deletions(-) diff --git a/src/query/src/qExecutor.c b/src/query/src/qExecutor.c index 5aa453f30e..5313e5378a 100644 --- a/src/query/src/qExecutor.c +++ b/src/query/src/qExecutor.c @@ -6647,7 +6647,7 @@ void freeqinfoFn(void *qhandle) { } void* qOpenQueryMgmt(int32_t vgId) { - const int32_t REFRESH_HANDLE_INTERVAL = 2; // every 2 seconds, refresh handle pool + const int32_t REFRESH_HANDLE_INTERVAL = 30; // every 30 seconds, refresh handle pool char cacheName[128] = {0}; sprintf(cacheName, "qhandle_%d", vgId); diff --git a/src/util/inc/tcache.h b/src/util/inc/tcache.h index 5a3545fd8f..1e2aeae394 100644 --- a/src/util/inc/tcache.h +++ b/src/util/inc/tcache.h @@ -68,8 +68,6 @@ typedef struct { int64_t refreshTime; STrashElem * pTrash; char* name; -// void * tmrCtrl; -// void * pTimer; SCacheStatis statistics; SHashObj * pHashTable; __cache_free_fn_t freeFp; diff --git a/src/util/src/tcache.c b/src/util/src/tcache.c index ca0fdc36be..0a9a69737b 100644 --- a/src/util/src/tcache.c +++ b/src/util/src/tcache.c @@ -573,6 +573,7 @@ void taosRemoveFromTrashCan(SCacheObj *pCacheObj, STrashElem *pElem) { free(pElem); } +// TODO add another lock when scanning trashcan void taosTrashCanEmpty(SCacheObj *pCacheObj, bool force) { __cache_wr_lock(pCacheObj);