diff --git a/include/common/tcommon.h b/include/common/tcommon.h index 63167bd834..779453f714 100644 --- a/include/common/tcommon.h +++ b/include/common/tcommon.h @@ -88,8 +88,6 @@ typedef struct { #pragma pack(push, 1) typedef struct SColumnDataAgg { int16_t colId; - int16_t minIndex; - int16_t maxIndex; int16_t numOfNull; int64_t sum; int64_t max; diff --git a/source/client/test/clientTests.cpp b/source/client/test/clientTests.cpp index e8e3237b67..7927c1e008 100644 --- a/source/client/test/clientTests.cpp +++ b/source/client/test/clientTests.cpp @@ -750,7 +750,6 @@ TEST(testCase, projection_query_stables) { taos_close(pConn); } - TEST(testCase, agg_query_tables) { TAOS* pConn = taos_connect("localhost", "root", "taosdata", NULL, 0); ASSERT_NE(pConn, nullptr); @@ -763,7 +762,7 @@ TEST(testCase, agg_query_tables) { } taos_free_result(pRes); - pRes = taos_query(pConn, "show table distributed st1"); + pRes = taos_query(pConn, "show table distributed tup"); if (taos_errno(pRes) != 0) { printf("failed to select from table, reason:%s\n", taos_errstr(pRes)); taos_free_result(pRes); @@ -822,13 +821,29 @@ TEST(testCase, async_api_test) { } #endif + TEST(testCase, update_test) { TAOS* pConn = taos_connect("localhost", "root", "taosdata", NULL, 0); ASSERT_NE(pConn, nullptr); - taos_query(pConn, "use abc1"); + TAOS_RES* pRes = taos_query(pConn, "create database if not exists abc1"); + if (taos_errno(pRes) != TSDB_CODE_SUCCESS) { + printf("failed to create database, code:%s", taos_errstr(pRes)); + taos_free_result(pRes); + return; + } - TAOS_RES* pRes = taos_query(pConn, "create table tup (ts timestamp, k int);"); + taos_free_result(pRes); + + pRes = taos_query(pConn, "use abc1"); + if (taos_errno(pRes) != TSDB_CODE_SUCCESS) { + printf("failed to use db, code:%s", taos_errstr(pRes)); + taos_free_result(pRes); + return; + } + taos_free_result(pRes); + + pRes = taos_query(pConn, "create table tup (ts timestamp, k int);"); if (taos_errno(pRes) != 0) { printf("failed to create table, reason:%s", taos_errstr(pRes)); } @@ -836,11 +851,10 @@ TEST(testCase, update_test) { taos_free_result(pRes); char s[256] = {0}; - for(int32_t i = 0; i < 7000; ++i) { - sprintf(s, "insert into tup values('2020-1-1 1:1:1', %d)", i); + for(int32_t i = 0; i < 17000; ++i) { + sprintf(s, "insert into tup values(now+%da, %d)", i, i); pRes = taos_query(pConn, s); taos_free_result(pRes); } } - #pragma GCC diagnostic pop diff --git a/source/dnode/vnode/inc/vnode.h b/source/dnode/vnode/inc/vnode.h index b381046ecf..11a20285c0 100644 --- a/source/dnode/vnode/inc/vnode.h +++ b/source/dnode/vnode/inc/vnode.h @@ -131,7 +131,7 @@ int32_t tsdbReaderOpen(SVnode *pVnode, SQueryTableDataCond *pCond, SArray *pTabl void tsdbReaderClose(STsdbReader *pReader); bool tsdbNextDataBlock(STsdbReader *pReader); void tsdbRetrieveDataBlockInfo(STsdbReader *pReader, SDataBlockInfo *pDataBlockInfo); -int32_t tsdbRetrieveDataBlockStatisInfo(STsdbReader *pReader, SColumnDataAgg ***pBlockStatis, bool *allHave); +int32_t tsdbRetrieveDatablockSMA(STsdbReader *pReader, SColumnDataAgg ***pBlockStatis, bool *allHave); SArray *tsdbRetrieveDataBlock(STsdbReader *pTsdbReadHandle, SArray *pColumnIdList); int32_t tsdbReaderReset(STsdbReader *pReader, SQueryTableDataCond *pCond, int32_t tWinIdx); int32_t tsdbGetFileBlocksDistInfo(STsdbReader *pReader, STableBlockDistInfo *pTableBlockInfo); diff --git a/source/dnode/vnode/src/tsdb/tsdbRead.c b/source/dnode/vnode/src/tsdb/tsdbRead.c index 8e5b135858..47aec548a1 100644 --- a/source/dnode/vnode/src/tsdb/tsdbRead.c +++ b/source/dnode/vnode/src/tsdb/tsdbRead.c @@ -48,14 +48,15 @@ typedef struct SBlockOrderSupporter { typedef struct SIOCostSummary { int64_t blockLoadTime; - int64_t statisInfoLoadTime; + int64_t smaLoadTime; int64_t checkForNextTime; int64_t headFileLoad; int64_t headFileLoadTime; } SIOCostSummary; typedef struct SBlockLoadSuppInfo { - SColumnDataAgg* pstatis; + SArray* pColAgg; + SColumnDataAgg tsColAgg; SColumnDataAgg** plist; int16_t* colIds; // column ids for loading file block data int32_t* slotIds; // colId to slotId @@ -63,10 +64,10 @@ typedef struct SBlockLoadSuppInfo { } SBlockLoadSuppInfo; typedef struct SFilesetIter { - int32_t numOfFiles; // number of total files - int32_t index; // current accessed index in the list - SArray* pFileList; // data file list - int32_t order; + int32_t numOfFiles; // number of total files + int32_t index; // current accessed index in the list + SArray* pFileList; // data file list + int32_t order; } SFilesetIter; typedef struct SFileDataBlockInfo { @@ -119,24 +120,10 @@ struct STsdbReader { int32_t type; // query type: 1. retrieve all data blocks, 2. retrieve direct prev|next rows SBlockLoadSuppInfo suppInfo; - SIOCostSummary cost; - STSchema* pSchema; - SDataFReader* pFileReader; - SVersionRange verRange; -#if 0 - SArray* prev; // previous row which is before than time window - SArray* next; // next row which is after the query time window - SFileBlockInfo* pDataBlockInfo; - SDataCols* pDataCols; // in order to hold current file data block - int32_t allocSize; // allocated data block size - SDataBlockLoadInfo dataBlockLoadInfo; /* record current block load information */ - SLoadCompBlockInfo compBlockLoadInfo; /* record current compblock information in SQueryAttr */ - // SDFileSet* pFileGroup; - // SFSIter fileIter; - // SReadH rhelper; - // SColumnDataAgg* statis; // query level statistics, only one table block statistics info exists at any time - // SColumnDataAgg** pstatis;// the ptr array list to return to caller -#endif + SIOCostSummary cost; + STSchema* pSchema; + SDataFReader* pFileReader; + SVersionRange verRange; }; static SFileDataBlockInfo* getCurrentBlockInfo(SDataBlockIter* pBlockIter); @@ -150,7 +137,7 @@ static int32_t doMergeRowsInBuf(SIterInfo* pIter, int64_t ts, SArray* pDelList, static int32_t doAppendOneRow(SSDataBlock* pBlock, STsdbReader* pReader, STSRow* pTSRow); static void setComposedBlockFlag(STsdbReader* pReader, bool composed); static void updateSchema(TSDBROW* pRow, uint64_t uid, STsdbReader* pReader); -static bool hasBeenDropped(const SArray* pDelList, int32_t* index, TSDBKEY* pKey); +static bool hasBeenDropped(const SArray* pDelList, int32_t* index, TSDBKEY* pKey, int32_t order); static void doMergeMultiRows(TSDBROW* pRow, uint64_t uid, SIterInfo* pIter, SArray* pDelList, STSRow** pTSRow, STsdbReader* pReader); @@ -252,33 +239,6 @@ static STimeWindow updateQueryTimeWindow(STsdb* pTsdb, STimeWindow* pWindow) { return win; } -// todo remove this -static void setQueryTimewindow(STsdbReader* pReader, SQueryTableDataCond* pCond, int32_t tWinIdx) { - // pReader->window = pCond->twindows[tWinIdx]; - - // bool updateTs = false; - // int64_t startTs = updateQueryTimeWindow(pReader->pTsdb); - // if (ASCENDING_TRAVERSE(pReader->order)) { - // if (startTs > pReader->window.skey) { - // pReader->window.skey = startTs; - // pCond->twindows[tWinIdx].skey = startTs; - // updateTs = true; - // } - // } else { - // if (startTs > pReader->window.ekey) { - // pReader->window.ekey = startTs; - // pCond->twindows[tWinIdx].ekey = startTs; - // updateTs = true; - // } - // } - - // if (updateTs) { - // tsdbDebug("%p update the query time window, old:%" PRId64 " - %" PRId64 ", new:%" PRId64 " - %" PRId64 ", %s", - // pReader, pCond->twindows[tWinIdx].skey, pCond->twindows[tWinIdx].ekey, pReader->window.skey, - // pReader->window.ekey, pReader->idStr); - // } -} - static void limitOutputBufferSize(const SQueryTableDataCond* pCond, int32_t* capacity) { int32_t rowLen = 0; for (int32_t i = 0; i < pCond->numOfCols; ++i) { @@ -405,21 +365,21 @@ static int32_t tsdbReaderCreate(SVnode* pVnode, SQueryTableDataCond* pCond, STsd pReader->type = pCond->type; pReader->window = updateQueryTimeWindow(pVnode->pTsdb, pCond->twindows); - // todo remove this - setQueryTimewindow(pReader, pCond, 0); ASSERT(pCond->numOfCols > 0); limitOutputBufferSize(pCond, &pReader->capacity); // allocate buffer in order to load data blocks from file SBlockLoadSuppInfo* pSup = &pReader->suppInfo; - pSup->pstatis = taosMemoryCalloc(pCond->numOfCols, sizeof(SColumnDataAgg)); + pSup->pColAgg = taosArrayInit(4, sizeof(SColumnDataAgg)); pSup->plist = taosMemoryCalloc(pCond->numOfCols, POINTER_BYTES); - if (pSup->pstatis == NULL || pSup->plist == NULL) { + if (pSup->pColAgg == NULL || pSup->plist == NULL) { code = TSDB_CODE_OUT_OF_MEMORY; goto _end; } + pSup->tsColAgg.colId = PRIMARYKEY_TIMESTAMP_COL_ID; + pReader->pResBlock = createResBlock(pCond, pReader->capacity); if (pReader->pResBlock == NULL) { code = terrno; @@ -920,200 +880,6 @@ _error: // return midPos; // } -// static int32_t mergeTwoRowFromMem(STsdbReader* pTsdbReadHandle, int32_t capacity, int32_t* curRow, STSRow* row1, -// STSRow* row2, int32_t numOfCols, uint64_t uid, STSchema* pSchema1, STSchema* -// pSchema2, bool update, TSKEY* lastRowKey) { -// #if 1 -// STSchema* pSchema; -// STSRow* row; -// int16_t colId; -// int16_t offset; - -// bool isRow1DataRow = TD_IS_TP_ROW(row1); -// bool isRow2DataRow; -// bool isChosenRowDataRow; -// int32_t chosen_itr; -// SCellVal sVal = {0}; -// TSKEY rowKey = TSKEY_INITIAL_VAL; -// int32_t nResult = 0; -// int32_t mergeOption = 0; // 0 discard 1 overwrite 2 merge - -// // the schema version info is embeded in STSRow -// int32_t numOfColsOfRow1 = 0; - -// if (pSchema1 == NULL) { -// pSchema1 = metaGetTbTSchema(REPO_META(pTsdbReadHandle->pTsdb), uid, TD_ROW_SVER(row1)); -// } - -// #ifdef TD_DEBUG_PRINT_ROW -// char flags[70] = {0}; -// STsdb* pTsdb = pTsdbReadHandle->rhelper.pRepo; -// snprintf(flags, 70, "%s:%d vgId:%d dir:%s row1%s=NULL,row2%s=NULL", __func__, __LINE__, TD_VID(pTsdb->pVnode), -// pTsdb->dir, row1 ? "!" : "", row2 ? "!" : ""); -// tdSRowPrint(row1, pSchema1, flags); -// #endif - -// if (isRow1DataRow) { -// numOfColsOfRow1 = schemaNCols(pSchema1); -// } else { -// numOfColsOfRow1 = tdRowGetNCols(row1); -// } - -// int32_t numOfColsOfRow2 = 0; -// if (row2) { -// isRow2DataRow = TD_IS_TP_ROW(row2); -// if (pSchema2 == NULL) { -// pSchema2 = metaGetTbTSchema(REPO_META(pTsdbReadHandle->pTsdb), uid, TD_ROW_SVER(row2)); -// } -// if (isRow2DataRow) { -// numOfColsOfRow2 = schemaNCols(pSchema2); -// } else { -// numOfColsOfRow2 = tdRowGetNCols(row2); -// } -// } - -// int32_t i = 0, j = 0, k = 0; -// while (i < numOfCols && (j < numOfColsOfRow1 || k < numOfColsOfRow2)) { -// SColumnInfoData* pColInfo = taosArrayGet(pTsdbReadHandle->pColumns, i); - -// int32_t colIdOfRow1; -// if (j >= numOfColsOfRow1) { -// colIdOfRow1 = INT32_MAX; -// } else if (isRow1DataRow) { -// colIdOfRow1 = pSchema1->columns[j].colId; -// } else { -// colIdOfRow1 = tdKvRowColIdAt(row1, j); -// } - -// int32_t colIdOfRow2; -// if (k >= numOfColsOfRow2) { -// colIdOfRow2 = INT32_MAX; -// } else if (isRow2DataRow) { -// colIdOfRow2 = pSchema2->columns[k].colId; -// } else { -// colIdOfRow2 = tdKvRowColIdAt(row2, k); -// } - -// if (colIdOfRow1 < colIdOfRow2) { // the most probability -// if (colIdOfRow1 < pColInfo->info.colId) { -// ++j; -// continue; -// } -// row = row1; -// pSchema = pSchema1; -// isChosenRowDataRow = isRow1DataRow; -// chosen_itr = j; -// } else if (colIdOfRow1 == colIdOfRow2) { -// if (colIdOfRow1 < pColInfo->info.colId) { -// ++j; -// ++k; -// continue; -// } -// row = row1; -// pSchema = pSchema1; -// isChosenRowDataRow = isRow1DataRow; -// chosen_itr = j; -// } else { -// if (colIdOfRow2 < pColInfo->info.colId) { -// ++k; -// continue; -// } -// row = row2; -// pSchema = pSchema2; -// chosen_itr = k; -// isChosenRowDataRow = isRow2DataRow; -// } - -// if (isChosenRowDataRow) { -// colId = pSchema->columns[chosen_itr].colId; -// offset = pSchema->columns[chosen_itr].offset; -// // TODO: use STSRowIter -// tdSTpRowGetVal(row, colId, pSchema->columns[chosen_itr].type, pSchema->flen, offset, chosen_itr - 1, &sVal); -// if (colId == PRIMARYKEY_TIMESTAMP_COL_ID) { -// rowKey = *(TSKEY*)sVal.val; -// if (rowKey != *lastRowKey) { -// mergeOption = 1; -// if (*lastRowKey != TSKEY_INITIAL_VAL) { -// ++(*curRow); -// } -// *lastRowKey = rowKey; -// ++nResult; -// } else if (update) { -// mergeOption = 2; -// } else { -// mergeOption = 0; -// break; -// } -// } -// } else { -// // TODO: use STSRowIter -// if (chosen_itr == 0) { -// colId = PRIMARYKEY_TIMESTAMP_COL_ID; -// tdSKvRowGetVal(row, PRIMARYKEY_TIMESTAMP_COL_ID, -1, -1, &sVal); -// rowKey = *(TSKEY*)sVal.val; -// if (rowKey != *lastRowKey) { -// mergeOption = 1; -// if (*lastRowKey != TSKEY_INITIAL_VAL) { -// ++(*curRow); -// } -// *lastRowKey = rowKey; -// ++nResult; -// } else if (update) { -// mergeOption = 2; -// } else { -// mergeOption = 0; -// break; -// } -// } else { -// SKvRowIdx* pColIdx = tdKvRowColIdxAt(row, chosen_itr - 1); -// colId = pColIdx->colId; -// offset = pColIdx->offset; -// tdSKvRowGetVal(row, colId, offset, chosen_itr - 1, &sVal); -// } -// } - -// ASSERT(rowKey != TSKEY_INITIAL_VAL); - -// if (colId == pColInfo->info.colId) { -// if (tdValTypeIsNorm(sVal.valType)) { -// colDataAppend(pColInfo, *curRow, sVal.val, false); -// } else if (tdValTypeIsNull(sVal.valType)) { -// colDataAppend(pColInfo, *curRow, NULL, true); -// } else if (tdValTypeIsNone(sVal.valType)) { -// // TODO: Set null if nothing append for this row -// if (mergeOption == 1) { -// colDataAppend(pColInfo, *curRow, NULL, true); -// } -// } else { -// ASSERT(0); -// } - -// ++i; - -// if (row == row1) { -// ++j; -// } else { -// ++k; -// } -// } else { -// if (mergeOption == 1) { -// colDataAppend(pColInfo, *curRow, NULL, true); -// } -// ++i; -// } -// } - -// if (mergeOption == 1) { -// while (i < numOfCols) { // the remain columns are all null data -// SColumnInfoData* pColInfo = taosArrayGet(pTsdbReadHandle->pColumns, i); -// colDataAppend(pColInfo, *curRow, NULL, true); -// ++i; -// } -// } - -// return nResult; -// #endif -// } // static void doCheckGeneratedBlockRange(STsdbReader* pTsdbReadHandle) { // SQueryFilePos* cur = &pTsdbReadHandle->cur; @@ -1393,66 +1159,6 @@ _error: // pTsdbReadHandle->idStr); // } -// int32_t binarySearchForKey(char* pValue, int num, TSKEY key, int order) { -// int firstPos, lastPos, midPos = -1; -// int numOfRows; -// TSKEY* keyList; - -// if (num <= 0) return -1; - -// keyList = (TSKEY*)pValue; -// firstPos = 0; -// lastPos = num - 1; - -// if (order == TSDB_ORDER_DESC) { -// // find the first position which is smaller than the key -// while (1) { -// if (key >= keyList[lastPos]) return lastPos; -// if (key == keyList[firstPos]) return firstPos; -// if (key < keyList[firstPos]) return firstPos - 1; - -// numOfRows = lastPos - firstPos + 1; -// midPos = (numOfRows >> 1) + firstPos; - -// if (key < keyList[midPos]) { -// lastPos = midPos - 1; -// } else if (key > keyList[midPos]) { -// firstPos = midPos + 1; -// } else { -// break; -// } -// } - -// } else { -// // find the first position which is bigger than the key -// while (1) { -// if (key <= keyList[firstPos]) return firstPos; -// if (key == keyList[lastPos]) return lastPos; - -// if (key > keyList[lastPos]) { -// lastPos = lastPos + 1; -// if (lastPos >= num) -// return -1; -// else -// return lastPos; -// } - -// numOfRows = lastPos - firstPos + 1; -// midPos = (numOfRows >> 1) + firstPos; - -// if (key < keyList[midPos]) { -// lastPos = midPos - 1; -// } else if (key > keyList[midPos]) { -// firstPos = midPos + 1; -// } else { -// break; -// } -// } -// } - -// return midPos; -// } - static void cleanupBlockOrderSupporter(SBlockOrderSupporter* pSup) { taosMemoryFreeClear(pSup->numOfBlocksPerTable); taosMemoryFreeClear(pSup->indexPerTable); @@ -1710,29 +1416,60 @@ static bool keyOverlapFileBlock(TSDBKEY key, SBlock* pBlock, SVersionRange* pVer (pBlock->minVersion <= pVerRange->maxVer); } +static bool overlapWithDelSkyline(STableBlockScanInfo* pBlockScanInfo, const SBlock* pBlock, int32_t order) { + if (pBlockScanInfo->delSkyline == NULL) { + return false; + } + + TSDBKEY* pFirst = taosArrayGet(pBlockScanInfo->delSkyline, 0); + TSDBKEY* pLast = taosArrayGetLast(pBlockScanInfo->delSkyline); + + // ts is not overlap + if (pBlock->minKey.ts > pLast->ts || pBlock->maxKey.ts < pFirst->ts) { + return false; + } + + int32_t step = ASCENDING_TRAVERSE(order)? 1:-1; + + // version is not overlap + size_t num = taosArrayGetSize(pBlockScanInfo->delSkyline); + for(int32_t i = pBlockScanInfo->fileDelIndex; i < num; i += step) { + TSDBKEY* p = taosArrayGet(pBlockScanInfo->delSkyline, i); + if (p->ts >= pBlock->minKey.ts && p->ts <= pBlock->maxKey.ts) { + if (p->version >= pBlock->minVersion) { + return true; + } + } else if (p->ts > pBlock->maxKey.ts) { + return false; + } + } + + ASSERT(0); + return false; +} + // 1. the version of all rows should be less than the endVersion // 2. current block should not overlap with next neighbor block // 3. current timestamp should not be overlap with each other // 4. output buffer should be large enough to hold all rows in current block +// 5. delete info should not overlap with current block data static bool fileBlockShouldLoad(STsdbReader* pReader, SFileDataBlockInfo* pFBlock, SBlock* pBlock, STableBlockScanInfo* pScanInfo, TSDBKEY key) { int32_t neighborIndex = 0; SBlock* pNeighbor = getNeighborBlockOfSameTable(pFBlock, pScanInfo, &neighborIndex, pReader->order); + // overlap with neighbor bool overlapWithNeighbor = false; if (pNeighbor) { overlapWithNeighbor = overlapWithNeighborBlock(pBlock, pNeighbor, pReader->order); } - bool hasDup = false; - if (pBlock->nSubBlock == 1) { - hasDup = pBlock->hasDup; - } else { - hasDup = true; - } + // has duplicated ts of different version in this block + bool hasDup = (pBlock->nSubBlock == 1)? pBlock->hasDup:true; + bool overlapWithDel= overlapWithDelSkyline(pScanInfo, pBlock, pReader->order); return (overlapWithNeighbor || hasDup || dataBlockPartiallyRequired(&pReader->window, &pReader->verRange, pBlock) || - keyOverlapFileBlock(key, pBlock, &pReader->verRange) || (pBlock->nRow > pReader->capacity)); + keyOverlapFileBlock(key, pBlock, &pReader->verRange) || (pBlock->nRow > pReader->capacity) || overlapWithDel); } static int32_t buildDataBlockFromBuf(STsdbReader* pReader, STableBlockScanInfo* pBlockScanInfo, int64_t endKey) { @@ -1957,7 +1694,7 @@ static bool isValidFileBlockRow(SBlockData* pBlockData, SFileBlockDumpInfo* pDum } TSDBKEY k = {.ts = ts, .version = ver}; - if (hasBeenDropped(pBlockScanInfo->delSkyline, &pBlockScanInfo->fileDelIndex, &k)) { + if (hasBeenDropped(pBlockScanInfo->delSkyline, &pBlockScanInfo->fileDelIndex, &k, pReader->order)) { return false; } @@ -2486,41 +2223,104 @@ static SVersionRange getQueryVerRange(SVnode* pVnode, SQueryTableDataCond* pCond // taosArrayPush(pTsdbReadHandle->pTableCheckInfo, &info); // } -bool hasBeenDropped(const SArray* pDelList, int32_t* index, TSDBKEY* pKey) { +bool hasBeenDropped(const SArray* pDelList, int32_t* index, TSDBKEY* pKey, int32_t order) { ASSERT(pKey != NULL); if (pDelList == NULL) { return false; } + size_t num = taosArrayGetSize(pDelList); + bool asc = ASCENDING_TRAVERSE(order); + int32_t step = asc? 1:-1; - if (*index >= taosArrayGetSize(pDelList) - 1) { - TSDBKEY* last = taosArrayGetLast(pDelList); - if (pKey->ts > last->ts) { - return false; - } else if (pKey->ts == last->ts) { - size_t size = taosArrayGetSize(pDelList); - TSDBKEY* prev = taosArrayGet(pDelList, size - 2); - if (prev->version >= pKey->version) { - return true; - } else { + if (asc) { + if (*index >= num - 1) { + TSDBKEY* last = taosArrayGetLast(pDelList); + ASSERT(pKey->ts >= last->ts); + + if (pKey->ts > last->ts) { return false; + } else if (pKey->ts == last->ts) { + TSDBKEY* prev = taosArrayGet(pDelList, num - 2); + return (prev->version >= pKey->version); } } else { - ASSERT(0); + TSDBKEY* pCurrent = taosArrayGet(pDelList, *index); + TSDBKEY* pNext = taosArrayGet(pDelList, (*index) + 1); + + if (pKey->ts < pCurrent->ts) { + return false; + } + + if (pCurrent->ts <= pKey->ts && pNext->ts >= pKey->ts && pCurrent->version >= pKey->version) { + return true; + } + + while (pNext->ts <= pKey->ts && (*index) < num - 1) { + (*index) += 1; + + if ((*index) < num - 1) { + pCurrent = taosArrayGet(pDelList, *index); + pNext = taosArrayGet(pDelList, (*index) + 1); + + // it is not a consecutive deletion range, ignore it + if (pCurrent->version == 0 && pNext->version > 0) { + continue; + } + + if (pCurrent->ts <= pKey->ts && pNext->ts >= pKey->ts && pCurrent->version >= pKey->version) { + return true; + } + } + } + + return false; } } else { - TSDBKEY* pCurrent = taosArrayGet(pDelList, *index); - TSDBKEY* pNext = taosArrayGet(pDelList, (*index) + 1); + if (*index <= 0) { + TSDBKEY* pFirst = taosArrayGet(pDelList, 0); - if (pCurrent->ts <= pKey->ts && pNext->ts >= pKey->ts && pCurrent->version >= pKey->version) { - return true; + if (pKey->ts < pFirst->ts) { + return false; + } else if (pKey->ts == pFirst->ts) { + return pFirst->version >= pKey->version; + } else { + ASSERT(0); + } } else { - while (pNext->ts < pKey->ts && (*index) < taosArrayGetSize(pDelList) - 1) { - (*index) += 1; + TSDBKEY* pCurrent = taosArrayGet(pDelList, *index); + TSDBKEY* pPrev = taosArrayGet(pDelList, (*index) - 1); + + if (pKey->ts > pCurrent->ts) { + return false; + } + + if (pPrev->ts <= pKey->ts && pCurrent->ts >= pKey->ts && pPrev->version >= pKey->version) { + return true; + } + + while (pPrev->ts >= pKey->ts && (*index) > 1) { + (*index) += step; + + if ((*index) >= 1) { + pCurrent = taosArrayGet(pDelList, *index); + pPrev = taosArrayGet(pDelList, (*index) - 1); + + // it is not a consecutive deletion range, ignore it + if (pCurrent->version > 0 && pPrev->version == 0) { + continue; + } + + if (pPrev->ts <= pKey->ts && pCurrent->ts >= pKey->ts && pPrev->version >= pKey->version) { + return true; + } + } } return false; } } + + return false; } TSDBROW* getValidRow(SIterInfo* pIter, const SArray* pDelList, STsdbReader* pReader) { @@ -2537,7 +2337,7 @@ TSDBROW* getValidRow(SIterInfo* pIter, const SArray* pDelList, STsdbReader* pRea // it is a valid data version if ((key.version <= pReader->verRange.maxVer && key.version >= pReader->verRange.minVer) && - (!hasBeenDropped(pDelList, &pIter->index, &key))) { + (!hasBeenDropped(pDelList, &pIter->index, &key, pReader->order))) { return pRow; } @@ -2556,7 +2356,7 @@ TSDBROW* getValidRow(SIterInfo* pIter, const SArray* pDelList, STsdbReader* pRea } if (key.version <= pReader->verRange.maxVer && key.version >= pReader->verRange.minVer && - (!hasBeenDropped(pDelList, &pIter->index, &key))) { + (!hasBeenDropped(pDelList, &pIter->index, &key, pReader->order))) { return pRow; } } @@ -2908,162 +2708,6 @@ int32_t tsdbGetStbIdList(SMeta* pMeta, int64_t suid, SArray* list) { return TSDB_CODE_SUCCESS; } -// static void destroyHelper(void* param) { -// if (param == NULL) { -// return; -// } - -// // tQueryInfo* pInfo = (tQueryInfo*)param; -// // if (pInfo->optr != TSDB_RELATION_IN) { -// // taosMemoryFreeClear(pInfo->q); -// // } else { -// // taosHashCleanup((SHashObj *)(pInfo->q)); -// // } - -// taosMemoryFree(param); -// } - -// #define TSDB_PREV_ROW 0x1 -// #define TSDB_NEXT_ROW 0x2 - -// static bool loadBlockOfActiveTable(STsdbReader* pTsdbReadHandle) { -// if (pTsdbReadHandle->checkFiles) { -// // check if the query range overlaps with the file data block -// bool exists = true; - -// int32_t code = buildBlockFromFiles(pTsdbReadHandle, &exists); -// if (code != TSDB_CODE_SUCCESS) { -// pTsdbReadHandle->checkFiles = false; -// return false; -// } - -// if (exists) { -// tsdbRetrieveDataBlock((STsdbReader**)pTsdbReadHandle, NULL); -// if (pTsdbReadHandle->currentLoadExternalRows && pTsdbReadHandle->window.skey == pTsdbReadHandle->window.ekey) { -// SColumnInfoData* pColInfo = taosArrayGet(pTsdbReadHandle->pColumns, 0); -// assert(*(int64_t*)pColInfo->pData == pTsdbReadHandle->window.skey); -// } - -// pTsdbReadHandle->currentLoadExternalRows = false; // clear the flag, since the exact matched row is found. -// return exists; -// } - -// pTsdbReadHandle->checkFiles = false; -// } - -// if (hasMoreDataInCache(pTsdbReadHandle)) { -// pTsdbReadHandle->currentLoadExternalRows = false; -// return true; -// } - -// // current result is empty -// if (pTsdbReadHandle->currentLoadExternalRows && pTsdbReadHandle->window.skey == pTsdbReadHandle->window.ekey && -// pTsdbReadHandle->cur.rows == 0) { -// // SMemTable* pMemRef = pTsdbReadHandle->pMemTable; - -// // doGetExternalRow(pTsdbReadHandle, TSDB_PREV_ROW, pMemRef); -// // doGetExternalRow(pTsdbReadHandle, TSDB_NEXT_ROW, pMemRef); - -// bool result = tsdbGetExternalRow(pTsdbReadHandle); - -// // pTsdbReadHandle->prev = doFreeColumnInfoData(pTsdbReadHandle->prev); -// // pTsdbReadHandle->next = doFreeColumnInfoData(pTsdbReadHandle->next); -// pTsdbReadHandle->currentLoadExternalRows = false; - -// return result; -// } - -// return false; -// } - -// static bool loadDataBlockFromTableSeq(STsdbReader* pTsdbReadHandle) { -// size_t numOfTables = taosArrayGetSize(pTsdbReadHandle->pTableCheckInfo); -// assert(numOfTables > 0); - -// int64_t stime = taosGetTimestampUs(); - -// while (pTsdbReadHandle->activeIndex < numOfTables) { -// if (loadBlockOfActiveTable(pTsdbReadHandle)) { -// return true; -// } - -// STableBlockScanInfo* pCheckInfo = taosArrayGet(pTsdbReadHandle->pTableCheckInfo, pTsdbReadHandle->activeIndex); -// pCheckInfo->numOfBlocks = 0; - -// pTsdbReadHandle->activeIndex += 1; -// pTsdbReadHandle->locateStart = false; -// pTsdbReadHandle->checkFiles = true; -// pTsdbReadHandle->cur.rows = 0; -// pTsdbReadHandle->currentLoadExternalRows = pTsdbReadHandle->loadExternalRow; - -// terrno = TSDB_CODE_SUCCESS; - -// int64_t elapsedTime = taosGetTimestampUs() - stime; -// pTsdbReadHandle->cost.checkForNextTime += elapsedTime; -// } - -// return false; -// } - -// bool tsdbGetExternalRow(STsdbReader* pHandle) { -// STsdbReader* pTsdbReadHandle = (STsdbReader*)pHandle; -// SQueryFilePos* cur = &pTsdbReadHandle->cur; - -// cur->fid = INT32_MIN; -// cur->mixBlock = true; -// if (pTsdbReadHandle->prev == NULL || pTsdbReadHandle->next == NULL) { -// cur->rows = 0; -// return false; -// } - -// int32_t numOfCols = (int32_t)QH_GET_NUM_OF_COLS(pTsdbReadHandle); -// for (int32_t i = 0; i < numOfCols; ++i) { -// SColumnInfoData* pColInfoData = taosArrayGet(pTsdbReadHandle->pColumns, i); -// SColumnInfoData* first = taosArrayGet(pTsdbReadHandle->prev, i); - -// memcpy(pColInfoData->pData, first->pData, pColInfoData->info.bytes); - -// SColumnInfoData* sec = taosArrayGet(pTsdbReadHandle->next, i); -// memcpy(((char*)pColInfoData->pData) + pColInfoData->info.bytes, sec->pData, pColInfoData->info.bytes); - -// if (i == 0 && pColInfoData->info.type == TSDB_DATA_TYPE_TIMESTAMP) { -// cur->win.skey = *(TSKEY*)pColInfoData->pData; -// cur->win.ekey = *(TSKEY*)(((char*)pColInfoData->pData) + TSDB_KEYSIZE); -// } -// } - -// cur->rows = 2; -// return true; -// } - -// static void* doFreeColumnInfoData(SArray* pColumnInfoData) { -// if (pColumnInfoData == NULL) { -// return NULL; -// } - -// size_t cols = taosArrayGetSize(pColumnInfoData); -// for (int32_t i = 0; i < cols; ++i) { -// SColumnInfoData* pColInfo = taosArrayGet(pColumnInfoData, i); -// colDataDestroy(pColInfo); -// } - -// taosArrayDestroy(pColumnInfoData); -// return NULL; -// } - -// static void* destroyTableCheckInfo(SArray* pTableCheckInfo) { -// size_t size = taosArrayGetSize(pTableCheckInfo); -// for (int32_t i = 0; i < size; ++i) { -// STableBlockScanInfo* p = taosArrayGet(pTableCheckInfo, i); -// destroyTableMemIterator(p); - -// taosMemoryFreeClear(p->pCompInfo); -// } - -// taosArrayDestroy(pTableCheckInfo); -// return NULL; -// } - // ====================================== EXPOSED APIs ====================================== int32_t tsdbReaderOpen(SVnode* pVnode, SQueryTableDataCond* pCond, SArray* pTableList, STsdbReader** ppReader, const char* idstr) { @@ -3126,16 +2770,11 @@ void tsdbReaderClose(STsdbReader* pReader) { } blockDataDestroy(pReader->pResBlock); - - taosMemoryFreeClear(pReader->suppInfo.pstatis); taosMemoryFreeClear(pReader->suppInfo.plist); + + taosArrayDestroy(pReader->suppInfo.pColAgg); taosMemoryFree(pReader->suppInfo.slotIds); - if (!isEmptyQueryTimeWindow(&pReader->window)) { - // tsdbMayUnTakeMemSnapshot(pTsdbReadHandle); - } else { - ASSERT(pReader->status.pTableMap == NULL); - } #if 0 // if (pReader->status.pTableScanInfo != NULL) { // pReader->status.pTableScanInfo = destroyTableCheckInfo(pReader->status.pTableScanInfo); @@ -3154,7 +2793,7 @@ void tsdbReaderClose(STsdbReader* pReader) { tsdbDebug("%p :io-cost summary: head-file read cnt:%" PRIu64 ", head-file time:%" PRIu64 " us, statis-info:%" PRId64 " us, datablock:%" PRId64 " us, check data:%" PRId64 " us, %s", - pReader, pCost->headFileLoad, pCost->headFileLoadTime, pCost->statisInfoLoadTime, pCost->blockLoadTime, + pReader, pCost->headFileLoad, pCost->headFileLoadTime, pCost->smaLoadTime, pCost->blockLoadTime, pCost->checkForNextTime, pReader->idStr); taosMemoryFree(pReader->idStr); @@ -3207,77 +2846,76 @@ void tsdbRetrieveDataBlockInfo(STsdbReader* pReader, SDataBlockInfo* pDataBlockI pDataBlockInfo->window = pReader->pResBlock->info.window; } -int32_t tsdbRetrieveDataBlockStatisInfo(STsdbReader* pReader, SColumnDataAgg*** pBlockStatis, bool* allHave) { +int32_t tsdbRetrieveDatablockSMA(STsdbReader* pReader, SColumnDataAgg*** pBlockStatis, bool* allHave) { int32_t code = 0; *allHave = false; + // there is no statistics data for composed block if (pReader->status.composedDataBlock) { *pBlockStatis = NULL; return TSDB_CODE_SUCCESS; } - // SFileBlockInfo* pBlockInfo = &pReader->pDataBlockInfo[c->slot]; - // assert((c->slot >= 0 && c->slot < pReader->numOfBlocks) || ((c->slot == pReader->numOfBlocks) && (c->slot == 0))); + SFileDataBlockInfo* pFBlock = getCurrentBlockInfo(&pReader->status.blockIter); + STableBlockScanInfo* pBlockScanInfo = taosHashGet(pReader->status.pTableMap, &pFBlock->uid, sizeof(pFBlock->uid)); + SBlock* pBlock = taosArrayGet(pBlockScanInfo->pBlockList, pFBlock->tbBlockIdx); - // // file block with sub-blocks has no statistics data - // if (pBlockInfo->compBlock->numOfSubBlocks > 1) { - // *pBlockStatis = NULL; - // return TSDB_CODE_SUCCESS; - // } + int64_t stime = taosGetTimestampUs(); - // int64_t stime = taosGetTimestampUs(); - // int statisStatus = tsdbLoadBlockStatis(&pReader->rhelper, pBlockInfo->compBlock); - // if (statisStatus < TSDB_STATIS_OK) { - // return terrno; - // } else if (statisStatus > TSDB_STATIS_OK) { - // *pBlockStatis = NULL; - // return TSDB_CODE_SUCCESS; - // } + SBlockLoadSuppInfo* pSup = &pReader->suppInfo; - // tsdbDebug("vgId:%d, succeed to load block statis part for uid %" PRIu64, REPO_ID(pReader->pTsdb), - // TSDB_READ_TABLE_UID(&pReader->rhelper)); + if (tBlockHasSma(pBlock)) { + code = tsdbReadBlockSma(pReader->pFileReader, pBlock, pSup->pColAgg, NULL); + if (code != TSDB_CODE_SUCCESS) { + tsdbDebug("vgId:%d, failed to load block SMA for uid %" PRIu64 ", code:%s, %s", 0, pFBlock->uid, tstrerror(code), + pReader->idStr); + return code; + } + } else { + *pBlockStatis = NULL; + return TSDB_CODE_SUCCESS; + } - // int16_t* colIds = pReader->suppInfo.defaultLoadColumn->pData; + *allHave = true; - // size_t numOfCols = QH_GET_NUM_OF_COLS(pReader); - // memset(pReader->suppInfo.plist, 0, numOfCols * POINTER_BYTES); - // memset(pReader->suppInfo.pstatis, 0, numOfCols * sizeof(SColumnDataAgg)); + // always load the first primary timestamp column data + SColumnDataAgg* pTsAgg = &pSup->tsColAgg; - // for (int32_t i = 0; i < numOfCols; ++i) { - // pReader->suppInfo.pstatis[i].colId = colIds[i]; - // } + pTsAgg->numOfNull = 0; + pTsAgg->colId = PRIMARYKEY_TIMESTAMP_COL_ID; + pTsAgg->min = pReader->pResBlock->info.window.skey; + pTsAgg->max = pReader->pResBlock->info.window.ekey; + pSup->plist[0] = pTsAgg; - // *allHave = true; - // tsdbGetBlockStatis(&pReader->rhelper, pReader->suppInfo.pstatis, (int)numOfCols, pBlockInfo->compBlock); + // update the number of NULL data rows + size_t numOfCols = blockDataGetNumOfCols(pReader->pResBlock); - // // always load the first primary timestamp column data - // SColumnDataAgg* pPrimaryColStatis = &pReader->suppInfo.pstatis[0]; - // assert(pPrimaryColStatis->colId == PRIMARYKEY_TIMESTAMP_COL_ID); + int32_t i = 0, j = 0; + while (j < numOfCols && i < taosArrayGetSize(pSup->pColAgg)) { + SColumnDataAgg* pAgg = taosArrayGet(pSup->pColAgg, i); + if (pAgg->colId == pSup->colIds[j]) { + if (IS_BSMA_ON(&(pReader->pSchema->columns[i]))) { + pSup->plist[j] = pAgg; + } else { + *allHave = false; + } + i += 1; + j += 1; + } else if (pAgg->colId < pSup->colIds[j]) { + i += 1; + } else if (pSup->colIds[j] < pAgg->colId) { + j += 1; + } + } - // pPrimaryColStatis->numOfNull = 0; - // pPrimaryColStatis->min = pBlockInfo->compBlock->minKey.ts; - // pPrimaryColStatis->max = pBlockInfo->compBlock->maxKey.ts; - // pReader->suppInfo.plist[0] = &pReader->suppInfo.pstatis[0]; + int64_t elapsed = taosGetTimestampUs() - stime; + pReader->cost.smaLoadTime += elapsed; - // // update the number of NULL data rows - // int32_t* slotIds = pReader->suppInfo.slotIds; - // for (int32_t i = 1; i < numOfCols; ++i) { - // ASSERT(colIds[i] == pReader->pSchema->columns[slotIds[i]].colId); - // if (IS_BSMA_ON(&(pReader->pSchema->columns[slotIds[i]]))) { - // if (pReader->suppInfo.pstatis[i].numOfNull == -1) { // set the column data are all NULL - // pReader->suppInfo.pstatis[i].numOfNull = pBlockInfo->compBlock->numOfRows; - // } + *pBlockStatis = pSup->plist; - // pReader->suppInfo.plist[i] = &pReader->suppInfo.pstatis[i]; - // } else { - // *allHave = false; - // } - // } + tsdbDebug("vgId:%d, succeed to load block SMA for uid %" PRIu64 ", elapsed time:%" PRId64 "us, %s", 0, pFBlock->uid, + elapsed, pReader->idStr); - // int64_t elapsed = taosGetTimestampUs() - stime; - // pReader->cost.statisInfoLoadTime += elapsed; - - // *pBlockStatis = pReader->suppInfo.plist; return code; } @@ -3312,19 +2950,19 @@ int32_t tsdbReaderReset(STsdbReader* pReader, SQueryTableDataCond* pCond, int32_ return TSDB_CODE_SUCCESS; } - setQueryTimewindow(pReader, pCond, tWinIdx); - - pReader->order = pCond->order; - pReader->type = BLOCK_LOAD_OFFSET_ORDER; + pReader->order = pCond->order; + pReader->type = BLOCK_LOAD_OFFSET_ORDER; pReader->status.loadFromFile = true; pReader->status.pTableIter = NULL; pReader->window = updateQueryTimeWindow(pReader->pTsdb, &pCond->twindows[tWinIdx]); // allocate buffer in order to load data blocks from file - memset(pReader->suppInfo.pstatis, 0, sizeof(SColumnDataAgg)); + memset(&pReader->suppInfo.tsColAgg, 0, sizeof(SColumnDataAgg)); memset(pReader->suppInfo.plist, 0, POINTER_BYTES); + pReader->suppInfo.tsColAgg.colId = PRIMARYKEY_TIMESTAMP_COL_ID; + // todo set the correct numOfTables int32_t numOfTables = 1; SDataBlockIter* pBlockIter = &pReader->status.blockIter; @@ -3350,114 +2988,80 @@ int32_t tsdbReaderReset(STsdbReader* pReader, SQueryTableDataCond* pCond, int32_ return code; } +static int32_t getBucketIndex(int32_t startRow, int32_t bucketRange, int32_t numOfRows) { + return (numOfRows - startRow) / bucketRange; +} + int32_t tsdbGetFileBlocksDistInfo(STsdbReader* pReader, STableBlockDistInfo* pTableBlockInfo) { - int32_t code = 0; - // pTableBlockInfo->totalSize = 0; - // pTableBlockInfo->totalRows = 0; + int32_t code = TSDB_CODE_SUCCESS; + pTableBlockInfo->totalSize = 0; + pTableBlockInfo->totalRows = 0; - // STsdbFS* pFileHandle = REPO_FS(pReader->pTsdb); + // find the start data block in file + SReaderStatus* pStatus = &pReader->status; - // // find the start data block in file - // pReader->locateStart = true; - // STsdbKeepCfg* pCfg = REPO_KEEP_CFG(pReader->pTsdb); - // int32_t fid = getFileIdFromKey(pReader->window.skey, pCfg->days, pCfg->precision); + STsdbCfg* pc = &pReader->pTsdb->pVnode->config.tsdbCfg; + pTableBlockInfo->defMinRows = pc->minRows; + pTableBlockInfo->defMaxRows = pc->maxRows; - // tsdbRLockFS(pFileHandle); - // tsdbFSIterInit(&pReader->fileIter, pFileHandle, pReader->order); - // tsdbFSIterSeek(&pReader->fileIter, fid); - // tsdbUnLockFS(pFileHandle); + int32_t bucketRange = ceil((pc->maxRows - pc->minRows) / 20.0); - // STsdbCfg* pc = REPO_CFG(pReader->pTsdb); - // pTableBlockInfo->defMinRows = pc->minRows; - // pTableBlockInfo->defMaxRows = pc->maxRows; + pTableBlockInfo->numOfFiles += 1; - // int32_t bucketRange = ceil((pc->maxRows - pc->minRows) / 20.0); + int32_t numOfTables = (int32_t)taosHashGetSize(pStatus->pTableMap); + int defaultRows = 4096; - // pTableBlockInfo->numOfFiles += 1; + SDataBlockIter* pBlockIter = &pStatus->blockIter; + pTableBlockInfo->numOfFiles += pStatus->fileIter.numOfFiles; + pTableBlockInfo->numOfBlocks += pBlockIter->numOfBlocks; - // int32_t code = TSDB_CODE_SUCCESS; - // int32_t numOfBlocks = 0; - // int32_t numOfTables = (int32_t)taosArrayGetSize(pReader->pTableCheckInfo); - // int defaultRows = 4096; - // STimeWindow win = TSWINDOW_INITIALIZER; + pTableBlockInfo->numOfTables = numOfTables; + bool hasNext = true; - // while (true) { - // numOfBlocks = 0; - // tsdbRLockFS(REPO_FS(pReader->pTsdb)); + while (true) { + if (hasNext) { + SFileDataBlockInfo* pFBlock = getCurrentBlockInfo(pBlockIter); + STableBlockScanInfo* pScanInfo = taosHashGet(pStatus->pTableMap, &pFBlock->uid, sizeof(pFBlock->uid)); + SBlock* pBlock = taosArrayGet(pScanInfo->pBlockList, pFBlock->tbBlockIdx); - // if ((pReader->pFileGroup = tsdbFSIterNext(&pReader->fileIter)) == NULL) { - // tsdbUnLockFS(REPO_FS(pReader->pTsdb)); - // break; - // } + int32_t numOfRows = pBlock->nRow; + pTableBlockInfo->totalRows += numOfRows; - // tsdbGetFidKeyRange(pCfg->days, pCfg->precision, pReader->pFileGroup->fid, &win.skey, &win.ekey); + if (numOfRows > pTableBlockInfo->maxRows) { + pTableBlockInfo->maxRows = numOfRows; + } - // // current file are not overlapped with query time window, ignore remain files - // if ((win.skey > pReader->window.ekey) /* || (!ascTraverse && win.ekey < pTsdbReadHandle->window.ekey)*/) { - // tsdbUnLockFS(REPO_FS(pReader->pTsdb)); - // tsdbDebug("%p remain files are not qualified for qrange:%" PRId64 "-%" PRId64 ", ignore, %s", pReader, - // pReader->window.skey, pReader->window.ekey, pReader->idStr); - // pReader->pFileGroup = NULL; - // break; - // } + if (numOfRows < pTableBlockInfo->minRows) { + pTableBlockInfo->minRows = numOfRows; + } - // pTableBlockInfo->numOfFiles += 1; - // if (tsdbSetAndOpenReadFSet(&pReader->rhelper, pReader->pFileGroup) < 0) { - // tsdbUnLockFS(REPO_FS(pReader->pTsdb)); - // code = terrno; - // break; - // } + if (numOfRows < defaultRows) { + pTableBlockInfo->numOfSmallBlocks += 1; + } - // tsdbUnLockFS(REPO_FS(pReader->pTsdb)); + int32_t bucketIndex = getBucketIndex(pTableBlockInfo->defMinRows, bucketRange, numOfRows); + pTableBlockInfo->blockRowsHisto[bucketIndex]++; - // if (tsdbLoadBlockIdx(&pReader->rhelper) < 0) { - // code = terrno; - // break; - // } + hasNext = blockIteratorNext(&pStatus->blockIter); - // if ((code = getFileCompInfo(pReader, &numOfBlocks)) != TSDB_CODE_SUCCESS) { - // break; - // } + } else { + code = initForFirstBlockInFile(pReader, pBlockIter); + if ((code != TSDB_CODE_SUCCESS) || (pReader->status.loadFromFile == false)) { + break; + } - // tsdbDebug("%p %d blocks found in file for %d table(s), fid:%d, %s", pReader, numOfBlocks, numOfTables, - // pReader->pFileGroup->fid, pReader->idStr); + pTableBlockInfo->numOfBlocks += pBlockIter->numOfBlocks; + } - // if (numOfBlocks == 0) { - // continue; - // } +/* + hasNext = blockIteratorNext(&pStatus->blockIter); +*/ - // pTableBlockInfo->numOfBlocks += numOfBlocks; - // for (int32_t i = 0; i < numOfTables; ++i) { - // STableBlockScanInfo* pCheckInfo = taosArrayGet(pReader->pTableCheckInfo, i); +// tsdbDebug("%p %d blocks found in file for %d table(s), fid:%d, %s", pReader, numOfBlocks, numOfTables, +// pReader->pFileGroup->fid, pReader->idStr); + } - // SBlock* pBlock = pCheckInfo->pCompInfo->blocks; - - // for (int32_t j = 0; j < pCheckInfo->numOfBlocks; ++j) { - // pTableBlockInfo->totalSize += pBlock[j].len; - - // int32_t numOfRows = pBlock[j].numOfRows; - // pTableBlockInfo->totalRows += numOfRows; - - // if (numOfRows > pTableBlockInfo->maxRows) { - // pTableBlockInfo->maxRows = numOfRows; - // } - - // if (numOfRows < pTableBlockInfo->minRows) { - // pTableBlockInfo->minRows = numOfRows; - // } - - // if (numOfRows < defaultRows) { - // pTableBlockInfo->numOfSmallBlocks += 1; - // } - - // int32_t bucketIndex = getBucketIndex(pTableBlockInfo->defMinRows, bucketRange, numOfRows); - // pTableBlockInfo->blockRowsHisto[bucketIndex]++; - // } - // } - // } - - // pTableBlockInfo->numOfTables = numOfTables; return code; } diff --git a/source/dnode/vnode/src/tsdb/tsdbReaderWriter.c b/source/dnode/vnode/src/tsdb/tsdbReaderWriter.c index 98decfe002..a331f88e61 100644 --- a/source/dnode/vnode/src/tsdb/tsdbReaderWriter.c +++ b/source/dnode/vnode/src/tsdb/tsdbReaderWriter.c @@ -1156,7 +1156,7 @@ _err: int32_t tsdbReadBlockSma(SDataFReader *pReader, SBlock *pBlock, SArray *aColumnDataAgg, uint8_t **ppBuf) { int32_t code = 0; TdFilePtr pFD = pReader->pSmaFD; - int64_t offset = pBlock->aSubBlock[0].offset; + int64_t offset = pBlock->aSubBlock[0].sOffset; int64_t size = pBlock->aSubBlock[0].nSma * sizeof(SColumnDataAgg) + sizeof(TSCKSUM); uint8_t *pBuf = NULL; int64_t n; @@ -1179,10 +1179,13 @@ int32_t tsdbReadBlockSma(SDataFReader *pReader, SBlock *pBlock, SArray *aColumnD if (n < 0) { code = TAOS_SYSTEM_ERROR(errno); goto _err; + } else if (n < size) { + code = TSDB_CODE_FILE_CORRUPTED; + goto _err; } // check - if (!taosCheckChecksumWhole(NULL, size)) { + if (!taosCheckChecksumWhole(*ppBuf, size)) { code = TSDB_CODE_FILE_CORRUPTED; goto _err; } diff --git a/source/dnode/vnode/src/tsdb/tsdbUtil.c b/source/dnode/vnode/src/tsdb/tsdbUtil.c index 24b7c08476..ed8bdc17a5 100644 --- a/source/dnode/vnode/src/tsdb/tsdbUtil.c +++ b/source/dnode/vnode/src/tsdb/tsdbUtil.c @@ -1234,10 +1234,26 @@ void tsdbCalcColDataSMA(SColData *pColData, SColumnDataAgg *pColAgg) { break; case TSDB_DATA_TYPE_SMALLINT: break; - case TSDB_DATA_TYPE_INT: + case TSDB_DATA_TYPE_INT: { + pColAgg->sum += colVal.value.i32; + if (pColAgg->min > colVal.value.i32) { + pColAgg->min = colVal.value.i32; + } + if (pColAgg->max < colVal.value.i32) { + pColAgg->max = colVal.value.i32; + } break; - case TSDB_DATA_TYPE_BIGINT: + } + case TSDB_DATA_TYPE_BIGINT: { + pColAgg->sum += colVal.value.i64; + if (pColAgg->min > colVal.value.i64) { + pColAgg->min = colVal.value.i64; + } + if (pColAgg->max < colVal.value.i64) { + pColAgg->max = colVal.value.i64; + } break; + } case TSDB_DATA_TYPE_FLOAT: break; case TSDB_DATA_TYPE_DOUBLE: diff --git a/source/libs/executor/src/executorimpl.c b/source/libs/executor/src/executorimpl.c index ab08cfd2b3..581c3f179b 100644 --- a/source/libs/executor/src/executorimpl.c +++ b/source/libs/executor/src/executorimpl.c @@ -741,10 +741,10 @@ static int32_t doCreateConstantValColumnAggInfo(SInputColumnInfoData* pInput, SF if (type == TSDB_DATA_TYPE_BIGINT) { int64_t v = pFuncParam->param.i; - *da = (SColumnDataAgg){.numOfNull = 0, .min = v, .max = v, .maxIndex = 0, .minIndex = 0, .sum = v * numOfRows}; + *da = (SColumnDataAgg){.numOfNull = 0, .min = v, .max = v, .sum = v * numOfRows}; } else if (type == TSDB_DATA_TYPE_DOUBLE) { double v = pFuncParam->param.d; - *da = (SColumnDataAgg){.numOfNull = 0, .maxIndex = 0, .minIndex = 0}; + *da = (SColumnDataAgg){.numOfNull = 0}; *(double*)&da->min = v; *(double*)&da->max = v; @@ -752,7 +752,7 @@ static int32_t doCreateConstantValColumnAggInfo(SInputColumnInfoData* pInput, SF } else if (type == TSDB_DATA_TYPE_BOOL) { // todo validate this data type bool v = pFuncParam->param.i; - *da = (SColumnDataAgg){.numOfNull = 0, .maxIndex = 0, .minIndex = 0}; + *da = (SColumnDataAgg){.numOfNull = 0}; *(bool*)&da->min = 0; *(bool*)&da->max = v; *(bool*)&da->sum = v * numOfRows; @@ -1130,7 +1130,7 @@ int32_t loadDataBlockOnDemand(SExecTaskInfo* pTaskInfo, STableScanInfo* pTableSc } else if ((*status) == BLK_DATA_SMA_LOAD) { // this function never returns error? pCost->loadBlockStatis += 1; -// tsdbRetrieveDataBlockStatisInfo(pTableScanInfo->pTsdbReadHandle, &pBlock->pBlockAgg); +// tsdbRetrieveDatablockSMA(pTableScanInfo->pTsdbReadHandle, &pBlock->pBlockAgg); if (pBlock->pBlockAgg == NULL) { // data block statistics does not exist, load data block // pBlock->pDataBlock = tsdbRetrieveDataBlock(pTableScanInfo->pTsdbReadHandle, NULL); @@ -1141,7 +1141,7 @@ int32_t loadDataBlockOnDemand(SExecTaskInfo* pTaskInfo, STableScanInfo* pTableSc // load the data block statistics to perform further filter pCost->loadBlockStatis += 1; -// tsdbRetrieveDataBlockStatisInfo(pTableScanInfo->pTsdbReadHandle, &pBlock->pBlockAgg); +// tsdbRetrieveDatablockSMA(pTableScanInfo->pTsdbReadHandle, &pBlock->pBlockAgg); if (pQueryAttr->topBotQuery && pBlock->pBlockAgg != NULL) { { // set previous window diff --git a/source/libs/executor/src/scanoperator.c b/source/libs/executor/src/scanoperator.c index b1bad28294..e016344f2b 100644 --- a/source/libs/executor/src/scanoperator.c +++ b/source/libs/executor/src/scanoperator.c @@ -210,7 +210,10 @@ static int32_t loadDataBlock(SOperatorInfo* pOperator, STableScanInfo* pTableSca bool allColumnsHaveAgg = true; SColumnDataAgg** pColAgg = NULL; - tsdbRetrieveDataBlockStatisInfo(pTableScanInfo->dataReader, &pColAgg, &allColumnsHaveAgg); + int32_t code = tsdbRetrieveDatablockSMA(pTableScanInfo->dataReader, &pColAgg, &allColumnsHaveAgg); + if (code != TSDB_CODE_SUCCESS) { + longjmp(pTaskInfo->env, code); + } if (allColumnsHaveAgg == true) { int32_t numOfCols = taosArrayGetSize(pBlock->pDataBlock); @@ -2349,7 +2352,7 @@ static int32_t loadDataBlockFromOneTable(SOperatorInfo* pOperator, STableMergeSc bool allColumnsHaveAgg = true; SColumnDataAgg** pColAgg = NULL; STsdbReader* reader = taosArrayGetP(pTableScanInfo->dataReaders, readerIdx); - tsdbRetrieveDataBlockStatisInfo(reader, &pColAgg, &allColumnsHaveAgg); + tsdbRetrieveDatablockSMA(reader, &pColAgg, &allColumnsHaveAgg); if (allColumnsHaveAgg == true) { int32_t numOfCols = taosArrayGetSize(pBlock->pDataBlock); diff --git a/source/libs/function/src/builtinsimpl.c b/source/libs/function/src/builtinsimpl.c index f94522f0d8..d8fcca30c0 100644 --- a/source/libs/function/src/builtinsimpl.c +++ b/source/libs/function/src/builtinsimpl.c @@ -1080,6 +1080,19 @@ bool getMinmaxFuncEnv(SFunctionNode* UNUSED_PARAM(pFunc), SFuncExecEnv* pEnv) { static void saveTupleData(SqlFunctionCtx* pCtx, int32_t rowIndex, const SSDataBlock* pSrcBlock, STuplePos* pPos); static void copyTupleData(SqlFunctionCtx* pCtx, int32_t rowIndex, const SSDataBlock* pSrcBlock, STuplePos* pPos); +static int32_t findRowIndex(int32_t start, int32_t num, SColumnInfoData* pCol, const char* tval) { + // the data is loaded, not only the block SMA value + for(int32_t i = start; i < num + start; ++i) { + char* p = colDataGetData(pCol, i); + if (memcpy((void*)tval, p, pCol->info.bytes) == 0) { + return i; + } + } + + ASSERT(0); +} + + int32_t doMinMaxHelper(SqlFunctionCtx* pCtx, int32_t isMinFunc) { int32_t numOfElems = 0; @@ -1111,15 +1124,14 @@ int32_t doMinMaxHelper(SqlFunctionCtx* pCtx, int32_t isMinFunc) { if (isMinFunc) { tval = &pInput->pColumnDataAgg[0]->min; - index = pInput->pColumnDataAgg[0]->minIndex; } else { tval = &pInput->pColumnDataAgg[0]->max; - index = pInput->pColumnDataAgg[0]->maxIndex; } if (!pBuf->assign) { pBuf->v = *(int64_t*)tval; if (pCtx->subsidiaries.num > 0) { + index = findRowIndex(pInput->startRowIndex, pInput->numOfRows, pCol, tval); saveTupleData(pCtx, index, pCtx->pSrcBlock, &pBuf->tuplePos); } } else { @@ -1131,6 +1143,7 @@ int32_t doMinMaxHelper(SqlFunctionCtx* pCtx, int32_t isMinFunc) { if ((prev < val) ^ isMinFunc) { pBuf->v = val; if (pCtx->subsidiaries.num > 0) { + index = findRowIndex(pInput->startRowIndex, pInput->numOfRows, pCol, tval); saveTupleData(pCtx, index, pCtx->pSrcBlock, &pBuf->tuplePos); } } @@ -1143,6 +1156,7 @@ int32_t doMinMaxHelper(SqlFunctionCtx* pCtx, int32_t isMinFunc) { if ((prev < val) ^ isMinFunc) { pBuf->v = val; if (pCtx->subsidiaries.num > 0) { + index = findRowIndex(pInput->startRowIndex, pInput->numOfRows, pCol, tval); saveTupleData(pCtx, index, pCtx->pSrcBlock, &pBuf->tuplePos); } } @@ -1154,6 +1168,7 @@ int32_t doMinMaxHelper(SqlFunctionCtx* pCtx, int32_t isMinFunc) { if ((prev < val) ^ isMinFunc) { pBuf->v = val; if (pCtx->subsidiaries.num > 0) { + index = findRowIndex(pInput->startRowIndex, pInput->numOfRows, pCol, tval); saveTupleData(pCtx, index, pCtx->pSrcBlock, &pBuf->tuplePos); } } @@ -1167,6 +1182,7 @@ int32_t doMinMaxHelper(SqlFunctionCtx* pCtx, int32_t isMinFunc) { } if (pCtx->subsidiaries.num > 0) { + index = findRowIndex(pInput->startRowIndex, pInput->numOfRows, pCol, tval); saveTupleData(pCtx, index, pCtx->pSrcBlock, &pBuf->tuplePos); } } @@ -5547,30 +5563,18 @@ int32_t blockDistFinalize(SqlFunctionCtx* pCtx, SSDataBlock* pBlock) { } } - int32_t delta = maxVal - minVal; - int32_t step = delta / 50; - if (step == 0) { - step = 1; - } + // maximum number of step is 80 + double factor = pData->numOfBlocks / 80.0; int32_t numOfBuckets = sizeof(pData->blockRowsHisto) / sizeof(pData->blockRowsHisto[0]); - int32_t bucketRange = (pData->maxRows - pData->minRows) / numOfBuckets; - - bool singleModel = false; - if (bucketRange == 0) { - singleModel = true; - step = 20; - bucketRange = (pData->defMaxRows - pData->defMinRows) / numOfBuckets; - } + int32_t bucketRange = (pData->defMaxRows - pData->defMinRows) / numOfBuckets; for (int32_t i = 0; i < tListLen(pData->blockRowsHisto); ++i) { - len = sprintf(st + VARSTR_HEADER_SIZE, "%04d |", pData->defMinRows + bucketRange * (i + 1)); + len = sprintf(st + VARSTR_HEADER_SIZE, "%04d |", pData->defMinRows + bucketRange * i); int32_t num = 0; - if (singleModel && pData->blockRowsHisto[i] > 0) { - num = 20; - } else { - num = (pData->blockRowsHisto[i] + step - 1) / step; + if (pData->blockRowsHisto[i] > 0) { + num = (pData->blockRowsHisto[i]) / factor; } for (int32_t j = 0; j < num; ++j) { @@ -5578,9 +5582,10 @@ int32_t blockDistFinalize(SqlFunctionCtx* pCtx, SSDataBlock* pBlock) { len += x; } - double v = pData->blockRowsHisto[i] * 100.0 / pData->numOfBlocks; - len += sprintf(st + VARSTR_HEADER_SIZE + len, " %d (%.2f%c)", pData->blockRowsHisto[i], v, '%'); - printf("%s\n", st); + if (num > 0) { + double v = pData->blockRowsHisto[i] * 100.0 / pData->numOfBlocks; + len += sprintf(st + VARSTR_HEADER_SIZE + len, " %d (%.2f%c)", pData->blockRowsHisto[i], v, '%'); + } varDataSetLen(st, len); colDataAppend(pColInfo, row++, st, false); diff --git a/tools/taosadapter b/tools/taosadapter index 389047db71..c885e967e4 160000 --- a/tools/taosadapter +++ b/tools/taosadapter @@ -1 +1 @@ -Subproject commit 389047db713a3dddfbce292c3260b0864b17d936 +Subproject commit c885e967e490105999b84d009a15168728dfafaf