From 4d2bc796e74b2c015f955996383bfdf9a9fdd0e0 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Thu, 7 Jul 2022 11:26:58 +0800 Subject: [PATCH 01/12] enh(query): add block sma for int type column data. --- source/client/test/clientTests.cpp | 23 +- source/dnode/vnode/src/tsdb/tsdbRead.c | 689 ++++--------------------- source/dnode/vnode/src/tsdb/tsdbUtil.c | 20 +- 3 files changed, 134 insertions(+), 598 deletions(-) diff --git a/source/client/test/clientTests.cpp b/source/client/test/clientTests.cpp index e8e3237b67..b8e4dcbea0 100644 --- a/source/client/test/clientTests.cpp +++ b/source/client/test/clientTests.cpp @@ -826,9 +826,24 @@ TEST(testCase, update_test) { TAOS* pConn = taos_connect("localhost", "root", "taosdata", NULL, 0); ASSERT_NE(pConn, nullptr); - taos_query(pConn, "use abc1"); + TAOS_RES* pRes = taos_query(pConn, "create database if not exists abc1"); + if (taos_errno(pRes) != TSDB_CODE_SUCCESS) { + printf("failed to create database, code:%s", taos_errstr(pRes)); + taos_free_result(pRes); + return; + } - TAOS_RES* pRes = taos_query(pConn, "create table tup (ts timestamp, k int);"); + taos_free_result(pRes); + + pRes = taos_query(pConn, "use abc1"); + if (taos_errno(pRes) != TSDB_CODE_SUCCESS) { + printf("failed to use db, code:%s", taos_errstr(pRes)); + taos_free_result(pRes); + return; + } + taos_free_result(pRes); + + pRes = taos_query(pConn, "create table tup (ts timestamp, k int);"); if (taos_errno(pRes) != 0) { printf("failed to create table, reason:%s", taos_errstr(pRes)); } @@ -836,8 +851,8 @@ TEST(testCase, update_test) { taos_free_result(pRes); char s[256] = {0}; - for(int32_t i = 0; i < 7000; ++i) { - sprintf(s, "insert into tup values('2020-1-1 1:1:1', %d)", i); + for(int32_t i = 0; i < 17000; ++i) { + sprintf(s, "insert into tup values(now+%da, %d)", i, i); pRes = taos_query(pConn, s); taos_free_result(pRes); } diff --git a/source/dnode/vnode/src/tsdb/tsdbRead.c b/source/dnode/vnode/src/tsdb/tsdbRead.c index ae9caa3444..b7429a5b89 100644 --- a/source/dnode/vnode/src/tsdb/tsdbRead.c +++ b/source/dnode/vnode/src/tsdb/tsdbRead.c @@ -48,7 +48,7 @@ typedef struct SBlockOrderSupporter { typedef struct SIOCostSummary { int64_t blockLoadTime; - int64_t statisInfoLoadTime; + int64_t smaLoadTime; int64_t checkForNextTime; int64_t headFileLoad; int64_t headFileLoadTime; @@ -63,10 +63,10 @@ typedef struct SBlockLoadSuppInfo { } SBlockLoadSuppInfo; typedef struct SFilesetIter { - int32_t numOfFiles; // number of total files - int32_t index; // current accessed index in the list - SArray* pFileList; // data file list - int32_t order; + int32_t numOfFiles; // number of total files + int32_t index; // current accessed index in the list + SArray* pFileList; // data file list + int32_t order; } SFilesetIter; typedef struct SFileDataBlockInfo { @@ -122,20 +122,6 @@ struct STsdbReader { STSchema* pSchema; SDataFReader* pFileReader; SVersionRange verRange; -#if 0 - SArray* prev; // previous row which is before than time window - SArray* next; // next row which is after the query time window - SFileBlockInfo* pDataBlockInfo; - SDataCols* pDataCols; // in order to hold current file data block - int32_t allocSize; // allocated data block size - SDataBlockLoadInfo dataBlockLoadInfo; /* record current block load information */ - SLoadCompBlockInfo compBlockLoadInfo; /* record current compblock information in SQueryAttr */ - // SDFileSet* pFileGroup; - // SFSIter fileIter; - // SReadH rhelper; - // SColumnDataAgg* statis; // query level statistics, only one table block statistics info exists at any time - // SColumnDataAgg** pstatis;// the ptr array list to return to caller -#endif }; static SFileDataBlockInfo* getCurrentBlockInfo(SDataBlockIter* pBlockIter); @@ -247,33 +233,6 @@ static STimeWindow updateQueryTimeWindow(STsdb* pTsdb, STimeWindow* pWindow) { return win; } -// todo remove this -static void setQueryTimewindow(STsdbReader* pReader, SQueryTableDataCond* pCond, int32_t tWinIdx) { - // pReader->window = pCond->twindows[tWinIdx]; - - // bool updateTs = false; - // int64_t startTs = updateQueryTimeWindow(pReader->pTsdb); - // if (ASCENDING_TRAVERSE(pReader->order)) { - // if (startTs > pReader->window.skey) { - // pReader->window.skey = startTs; - // pCond->twindows[tWinIdx].skey = startTs; - // updateTs = true; - // } - // } else { - // if (startTs > pReader->window.ekey) { - // pReader->window.ekey = startTs; - // pCond->twindows[tWinIdx].ekey = startTs; - // updateTs = true; - // } - // } - - // if (updateTs) { - // tsdbDebug("%p update the query time window, old:%" PRId64 " - %" PRId64 ", new:%" PRId64 " - %" PRId64 ", %s", - // pReader, pCond->twindows[tWinIdx].skey, pCond->twindows[tWinIdx].ekey, pReader->window.skey, - // pReader->window.ekey, pReader->idStr); - // } -} - static void limitOutputBufferSize(const SQueryTableDataCond* pCond, int32_t* capacity) { int32_t rowLen = 0; for (int32_t i = 0; i < pCond->numOfCols; ++i) { @@ -399,8 +358,6 @@ static int32_t tsdbReaderCreate(SVnode* pVnode, SQueryTableDataCond* pCond, STsd pReader->type = pCond->type; pReader->window = updateQueryTimeWindow(pVnode->pTsdb, pCond->twindows); - // todo remove this - setQueryTimewindow(pReader, pCond, 0); ASSERT(pCond->numOfCols > 0); limitOutputBufferSize(pCond, &pReader->capacity); @@ -914,200 +871,6 @@ _error: // return midPos; // } -// static int32_t mergeTwoRowFromMem(STsdbReader* pTsdbReadHandle, int32_t capacity, int32_t* curRow, STSRow* row1, -// STSRow* row2, int32_t numOfCols, uint64_t uid, STSchema* pSchema1, STSchema* -// pSchema2, bool update, TSKEY* lastRowKey) { -// #if 1 -// STSchema* pSchema; -// STSRow* row; -// int16_t colId; -// int16_t offset; - -// bool isRow1DataRow = TD_IS_TP_ROW(row1); -// bool isRow2DataRow; -// bool isChosenRowDataRow; -// int32_t chosen_itr; -// SCellVal sVal = {0}; -// TSKEY rowKey = TSKEY_INITIAL_VAL; -// int32_t nResult = 0; -// int32_t mergeOption = 0; // 0 discard 1 overwrite 2 merge - -// // the schema version info is embeded in STSRow -// int32_t numOfColsOfRow1 = 0; - -// if (pSchema1 == NULL) { -// pSchema1 = metaGetTbTSchema(REPO_META(pTsdbReadHandle->pTsdb), uid, TD_ROW_SVER(row1)); -// } - -// #ifdef TD_DEBUG_PRINT_ROW -// char flags[70] = {0}; -// STsdb* pTsdb = pTsdbReadHandle->rhelper.pRepo; -// snprintf(flags, 70, "%s:%d vgId:%d dir:%s row1%s=NULL,row2%s=NULL", __func__, __LINE__, TD_VID(pTsdb->pVnode), -// pTsdb->dir, row1 ? "!" : "", row2 ? "!" : ""); -// tdSRowPrint(row1, pSchema1, flags); -// #endif - -// if (isRow1DataRow) { -// numOfColsOfRow1 = schemaNCols(pSchema1); -// } else { -// numOfColsOfRow1 = tdRowGetNCols(row1); -// } - -// int32_t numOfColsOfRow2 = 0; -// if (row2) { -// isRow2DataRow = TD_IS_TP_ROW(row2); -// if (pSchema2 == NULL) { -// pSchema2 = metaGetTbTSchema(REPO_META(pTsdbReadHandle->pTsdb), uid, TD_ROW_SVER(row2)); -// } -// if (isRow2DataRow) { -// numOfColsOfRow2 = schemaNCols(pSchema2); -// } else { -// numOfColsOfRow2 = tdRowGetNCols(row2); -// } -// } - -// int32_t i = 0, j = 0, k = 0; -// while (i < numOfCols && (j < numOfColsOfRow1 || k < numOfColsOfRow2)) { -// SColumnInfoData* pColInfo = taosArrayGet(pTsdbReadHandle->pColumns, i); - -// int32_t colIdOfRow1; -// if (j >= numOfColsOfRow1) { -// colIdOfRow1 = INT32_MAX; -// } else if (isRow1DataRow) { -// colIdOfRow1 = pSchema1->columns[j].colId; -// } else { -// colIdOfRow1 = tdKvRowColIdAt(row1, j); -// } - -// int32_t colIdOfRow2; -// if (k >= numOfColsOfRow2) { -// colIdOfRow2 = INT32_MAX; -// } else if (isRow2DataRow) { -// colIdOfRow2 = pSchema2->columns[k].colId; -// } else { -// colIdOfRow2 = tdKvRowColIdAt(row2, k); -// } - -// if (colIdOfRow1 < colIdOfRow2) { // the most probability -// if (colIdOfRow1 < pColInfo->info.colId) { -// ++j; -// continue; -// } -// row = row1; -// pSchema = pSchema1; -// isChosenRowDataRow = isRow1DataRow; -// chosen_itr = j; -// } else if (colIdOfRow1 == colIdOfRow2) { -// if (colIdOfRow1 < pColInfo->info.colId) { -// ++j; -// ++k; -// continue; -// } -// row = row1; -// pSchema = pSchema1; -// isChosenRowDataRow = isRow1DataRow; -// chosen_itr = j; -// } else { -// if (colIdOfRow2 < pColInfo->info.colId) { -// ++k; -// continue; -// } -// row = row2; -// pSchema = pSchema2; -// chosen_itr = k; -// isChosenRowDataRow = isRow2DataRow; -// } - -// if (isChosenRowDataRow) { -// colId = pSchema->columns[chosen_itr].colId; -// offset = pSchema->columns[chosen_itr].offset; -// // TODO: use STSRowIter -// tdSTpRowGetVal(row, colId, pSchema->columns[chosen_itr].type, pSchema->flen, offset, chosen_itr - 1, &sVal); -// if (colId == PRIMARYKEY_TIMESTAMP_COL_ID) { -// rowKey = *(TSKEY*)sVal.val; -// if (rowKey != *lastRowKey) { -// mergeOption = 1; -// if (*lastRowKey != TSKEY_INITIAL_VAL) { -// ++(*curRow); -// } -// *lastRowKey = rowKey; -// ++nResult; -// } else if (update) { -// mergeOption = 2; -// } else { -// mergeOption = 0; -// break; -// } -// } -// } else { -// // TODO: use STSRowIter -// if (chosen_itr == 0) { -// colId = PRIMARYKEY_TIMESTAMP_COL_ID; -// tdSKvRowGetVal(row, PRIMARYKEY_TIMESTAMP_COL_ID, -1, -1, &sVal); -// rowKey = *(TSKEY*)sVal.val; -// if (rowKey != *lastRowKey) { -// mergeOption = 1; -// if (*lastRowKey != TSKEY_INITIAL_VAL) { -// ++(*curRow); -// } -// *lastRowKey = rowKey; -// ++nResult; -// } else if (update) { -// mergeOption = 2; -// } else { -// mergeOption = 0; -// break; -// } -// } else { -// SKvRowIdx* pColIdx = tdKvRowColIdxAt(row, chosen_itr - 1); -// colId = pColIdx->colId; -// offset = pColIdx->offset; -// tdSKvRowGetVal(row, colId, offset, chosen_itr - 1, &sVal); -// } -// } - -// ASSERT(rowKey != TSKEY_INITIAL_VAL); - -// if (colId == pColInfo->info.colId) { -// if (tdValTypeIsNorm(sVal.valType)) { -// colDataAppend(pColInfo, *curRow, sVal.val, false); -// } else if (tdValTypeIsNull(sVal.valType)) { -// colDataAppend(pColInfo, *curRow, NULL, true); -// } else if (tdValTypeIsNone(sVal.valType)) { -// // TODO: Set null if nothing append for this row -// if (mergeOption == 1) { -// colDataAppend(pColInfo, *curRow, NULL, true); -// } -// } else { -// ASSERT(0); -// } - -// ++i; - -// if (row == row1) { -// ++j; -// } else { -// ++k; -// } -// } else { -// if (mergeOption == 1) { -// colDataAppend(pColInfo, *curRow, NULL, true); -// } -// ++i; -// } -// } - -// if (mergeOption == 1) { -// while (i < numOfCols) { // the remain columns are all null data -// SColumnInfoData* pColInfo = taosArrayGet(pTsdbReadHandle->pColumns, i); -// colDataAppend(pColInfo, *curRow, NULL, true); -// ++i; -// } -// } - -// return nResult; -// #endif -// } // static void doCheckGeneratedBlockRange(STsdbReader* pTsdbReadHandle) { // SQueryFilePos* cur = &pTsdbReadHandle->cur; @@ -1387,66 +1150,6 @@ _error: // pTsdbReadHandle->idStr); // } -// int32_t binarySearchForKey(char* pValue, int num, TSKEY key, int order) { -// int firstPos, lastPos, midPos = -1; -// int numOfRows; -// TSKEY* keyList; - -// if (num <= 0) return -1; - -// keyList = (TSKEY*)pValue; -// firstPos = 0; -// lastPos = num - 1; - -// if (order == TSDB_ORDER_DESC) { -// // find the first position which is smaller than the key -// while (1) { -// if (key >= keyList[lastPos]) return lastPos; -// if (key == keyList[firstPos]) return firstPos; -// if (key < keyList[firstPos]) return firstPos - 1; - -// numOfRows = lastPos - firstPos + 1; -// midPos = (numOfRows >> 1) + firstPos; - -// if (key < keyList[midPos]) { -// lastPos = midPos - 1; -// } else if (key > keyList[midPos]) { -// firstPos = midPos + 1; -// } else { -// break; -// } -// } - -// } else { -// // find the first position which is bigger than the key -// while (1) { -// if (key <= keyList[firstPos]) return firstPos; -// if (key == keyList[lastPos]) return lastPos; - -// if (key > keyList[lastPos]) { -// lastPos = lastPos + 1; -// if (lastPos >= num) -// return -1; -// else -// return lastPos; -// } - -// numOfRows = lastPos - firstPos + 1; -// midPos = (numOfRows >> 1) + firstPos; - -// if (key < keyList[midPos]) { -// lastPos = midPos - 1; -// } else if (key > keyList[midPos]) { -// firstPos = midPos + 1; -// } else { -// break; -// } -// } -// } - -// return midPos; -// } - static void cleanupBlockOrderSupporter(SBlockOrderSupporter* pSup) { taosMemoryFreeClear(pSup->numOfBlocksPerTable); taosMemoryFreeClear(pSup->indexPerTable); @@ -2882,162 +2585,6 @@ int32_t tsdbGetStbIdList(SMeta* pMeta, int64_t suid, SArray* list) { return TSDB_CODE_SUCCESS; } -// static void destroyHelper(void* param) { -// if (param == NULL) { -// return; -// } - -// // tQueryInfo* pInfo = (tQueryInfo*)param; -// // if (pInfo->optr != TSDB_RELATION_IN) { -// // taosMemoryFreeClear(pInfo->q); -// // } else { -// // taosHashCleanup((SHashObj *)(pInfo->q)); -// // } - -// taosMemoryFree(param); -// } - -// #define TSDB_PREV_ROW 0x1 -// #define TSDB_NEXT_ROW 0x2 - -// static bool loadBlockOfActiveTable(STsdbReader* pTsdbReadHandle) { -// if (pTsdbReadHandle->checkFiles) { -// // check if the query range overlaps with the file data block -// bool exists = true; - -// int32_t code = buildBlockFromFiles(pTsdbReadHandle, &exists); -// if (code != TSDB_CODE_SUCCESS) { -// pTsdbReadHandle->checkFiles = false; -// return false; -// } - -// if (exists) { -// tsdbRetrieveDataBlock((STsdbReader**)pTsdbReadHandle, NULL); -// if (pTsdbReadHandle->currentLoadExternalRows && pTsdbReadHandle->window.skey == pTsdbReadHandle->window.ekey) { -// SColumnInfoData* pColInfo = taosArrayGet(pTsdbReadHandle->pColumns, 0); -// assert(*(int64_t*)pColInfo->pData == pTsdbReadHandle->window.skey); -// } - -// pTsdbReadHandle->currentLoadExternalRows = false; // clear the flag, since the exact matched row is found. -// return exists; -// } - -// pTsdbReadHandle->checkFiles = false; -// } - -// if (hasMoreDataInCache(pTsdbReadHandle)) { -// pTsdbReadHandle->currentLoadExternalRows = false; -// return true; -// } - -// // current result is empty -// if (pTsdbReadHandle->currentLoadExternalRows && pTsdbReadHandle->window.skey == pTsdbReadHandle->window.ekey && -// pTsdbReadHandle->cur.rows == 0) { -// // SMemTable* pMemRef = pTsdbReadHandle->pMemTable; - -// // doGetExternalRow(pTsdbReadHandle, TSDB_PREV_ROW, pMemRef); -// // doGetExternalRow(pTsdbReadHandle, TSDB_NEXT_ROW, pMemRef); - -// bool result = tsdbGetExternalRow(pTsdbReadHandle); - -// // pTsdbReadHandle->prev = doFreeColumnInfoData(pTsdbReadHandle->prev); -// // pTsdbReadHandle->next = doFreeColumnInfoData(pTsdbReadHandle->next); -// pTsdbReadHandle->currentLoadExternalRows = false; - -// return result; -// } - -// return false; -// } - -// static bool loadDataBlockFromTableSeq(STsdbReader* pTsdbReadHandle) { -// size_t numOfTables = taosArrayGetSize(pTsdbReadHandle->pTableCheckInfo); -// assert(numOfTables > 0); - -// int64_t stime = taosGetTimestampUs(); - -// while (pTsdbReadHandle->activeIndex < numOfTables) { -// if (loadBlockOfActiveTable(pTsdbReadHandle)) { -// return true; -// } - -// STableBlockScanInfo* pCheckInfo = taosArrayGet(pTsdbReadHandle->pTableCheckInfo, pTsdbReadHandle->activeIndex); -// pCheckInfo->numOfBlocks = 0; - -// pTsdbReadHandle->activeIndex += 1; -// pTsdbReadHandle->locateStart = false; -// pTsdbReadHandle->checkFiles = true; -// pTsdbReadHandle->cur.rows = 0; -// pTsdbReadHandle->currentLoadExternalRows = pTsdbReadHandle->loadExternalRow; - -// terrno = TSDB_CODE_SUCCESS; - -// int64_t elapsedTime = taosGetTimestampUs() - stime; -// pTsdbReadHandle->cost.checkForNextTime += elapsedTime; -// } - -// return false; -// } - -// bool tsdbGetExternalRow(STsdbReader* pHandle) { -// STsdbReader* pTsdbReadHandle = (STsdbReader*)pHandle; -// SQueryFilePos* cur = &pTsdbReadHandle->cur; - -// cur->fid = INT32_MIN; -// cur->mixBlock = true; -// if (pTsdbReadHandle->prev == NULL || pTsdbReadHandle->next == NULL) { -// cur->rows = 0; -// return false; -// } - -// int32_t numOfCols = (int32_t)QH_GET_NUM_OF_COLS(pTsdbReadHandle); -// for (int32_t i = 0; i < numOfCols; ++i) { -// SColumnInfoData* pColInfoData = taosArrayGet(pTsdbReadHandle->pColumns, i); -// SColumnInfoData* first = taosArrayGet(pTsdbReadHandle->prev, i); - -// memcpy(pColInfoData->pData, first->pData, pColInfoData->info.bytes); - -// SColumnInfoData* sec = taosArrayGet(pTsdbReadHandle->next, i); -// memcpy(((char*)pColInfoData->pData) + pColInfoData->info.bytes, sec->pData, pColInfoData->info.bytes); - -// if (i == 0 && pColInfoData->info.type == TSDB_DATA_TYPE_TIMESTAMP) { -// cur->win.skey = *(TSKEY*)pColInfoData->pData; -// cur->win.ekey = *(TSKEY*)(((char*)pColInfoData->pData) + TSDB_KEYSIZE); -// } -// } - -// cur->rows = 2; -// return true; -// } - -// static void* doFreeColumnInfoData(SArray* pColumnInfoData) { -// if (pColumnInfoData == NULL) { -// return NULL; -// } - -// size_t cols = taosArrayGetSize(pColumnInfoData); -// for (int32_t i = 0; i < cols; ++i) { -// SColumnInfoData* pColInfo = taosArrayGet(pColumnInfoData, i); -// colDataDestroy(pColInfo); -// } - -// taosArrayDestroy(pColumnInfoData); -// return NULL; -// } - -// static void* destroyTableCheckInfo(SArray* pTableCheckInfo) { -// size_t size = taosArrayGetSize(pTableCheckInfo); -// for (int32_t i = 0; i < size; ++i) { -// STableBlockScanInfo* p = taosArrayGet(pTableCheckInfo, i); -// destroyTableMemIterator(p); - -// taosMemoryFreeClear(p->pCompInfo); -// } - -// taosArrayDestroy(pTableCheckInfo); -// return NULL; -// } - // ====================================== EXPOSED APIs ====================================== int32_t tsdbReaderOpen(SVnode* pVnode, SQueryTableDataCond* pCond, SArray* pTableList, STsdbReader** ppReader, const char* idstr) { @@ -3128,7 +2675,7 @@ void tsdbReaderClose(STsdbReader* pReader) { tsdbDebug("%p :io-cost summary: head-file read cnt:%" PRIu64 ", head-file time:%" PRIu64 " us, statis-info:%" PRId64 " us, datablock:%" PRId64 " us, check data:%" PRId64 " us, %s", - pReader, pCost->headFileLoad, pCost->headFileLoadTime, pCost->statisInfoLoadTime, pCost->blockLoadTime, + pReader, pCost->headFileLoad, pCost->headFileLoadTime, pCost->smaLoadTime, pCost->blockLoadTime, pCost->checkForNextTime, pReader->idStr); taosMemoryFree(pReader->idStr); @@ -3185,31 +2732,31 @@ int32_t tsdbRetrieveDataBlockStatisInfo(STsdbReader* pReader, SColumnDataAgg*** int32_t code = 0; *allHave = false; + // there is no statistics data for composed block if (pReader->status.composedDataBlock) { *pBlockStatis = NULL; return TSDB_CODE_SUCCESS; } - // SFileBlockInfo* pBlockInfo = &pReader->pDataBlockInfo[c->slot]; - // assert((c->slot >= 0 && c->slot < pReader->numOfBlocks) || ((c->slot == pReader->numOfBlocks) && (c->slot == 0))); + SFileDataBlockInfo* pFBlock = getCurrentBlockInfo(&pReader->status.blockIter); + STableBlockScanInfo* pBlockScanInfo = taosHashGet(pReader->status.pTableMap, &pFBlock->uid, sizeof(pFBlock->uid)); + SBlock* pBlock = taosArrayGet(pBlockScanInfo->pBlockList, pFBlock->tbBlockIdx); - // // file block with sub-blocks has no statistics data - // if (pBlockInfo->compBlock->numOfSubBlocks > 1) { - // *pBlockStatis = NULL; - // return TSDB_CODE_SUCCESS; - // } + int64_t stime = taosGetTimestampUs(); - // int64_t stime = taosGetTimestampUs(); - // int statisStatus = tsdbLoadBlockStatis(&pReader->rhelper, pBlockInfo->compBlock); - // if (statisStatus < TSDB_STATIS_OK) { - // return terrno; - // } else if (statisStatus > TSDB_STATIS_OK) { - // *pBlockStatis = NULL; - // return TSDB_CODE_SUCCESS; - // } + if (tBlockHasSma(pBlock)) { + SArray* pColAgg = taosArrayInit(4, sizeof(SColumnDataAgg)); + code = tsdbReadBlockSma(pReader->pFileReader, pBlock, pColAgg, NULL); + if (code != TSDB_CODE_SUCCESS) { + tsdbDebug("vgId:%d, failed to load block SMA for uid %" PRIu64", code:%s, %s", 0, pFBlock->uid, + tstrerror(code), pReader->idStr); + return code; + } + } - // tsdbDebug("vgId:%d, succeed to load block statis part for uid %" PRIu64, REPO_ID(pReader->pTsdb), - // TSDB_READ_TABLE_UID(&pReader->rhelper)); + int64_t el = taosGetTimestampUs() - stime; + tsdbDebug("vgId:%d, succeed to load block SMA for uid %" PRIu64", elapsed time:%"PRId64"us, %s", 0, pFBlock->uid, + el, pReader->idStr); // int16_t* colIds = pReader->suppInfo.defaultLoadColumn->pData; @@ -3224,34 +2771,36 @@ int32_t tsdbRetrieveDataBlockStatisInfo(STsdbReader* pReader, SColumnDataAgg*** // *allHave = true; // tsdbGetBlockStatis(&pReader->rhelper, pReader->suppInfo.pstatis, (int)numOfCols, pBlockInfo->compBlock); - // // always load the first primary timestamp column data - // SColumnDataAgg* pPrimaryColStatis = &pReader->suppInfo.pstatis[0]; - // assert(pPrimaryColStatis->colId == PRIMARYKEY_TIMESTAMP_COL_ID); + // always load the first primary timestamp column data + SColumnDataAgg* pTsAgg = &pReader->suppInfo.pstatis[0]; + assert(pTsAgg->colId == PRIMARYKEY_TIMESTAMP_COL_ID); - // pPrimaryColStatis->numOfNull = 0; - // pPrimaryColStatis->min = pBlockInfo->compBlock->minKey.ts; - // pPrimaryColStatis->max = pBlockInfo->compBlock->maxKey.ts; - // pReader->suppInfo.plist[0] = &pReader->suppInfo.pstatis[0]; + pTsAgg->numOfNull = 0; + pTsAgg->min = pReader->pResBlock->info.window.skey; + pTsAgg->max = pReader->pResBlock->info.window.ekey; + pReader->suppInfo.plist[0] = &pReader->suppInfo.pstatis[0]; - // // update the number of NULL data rows - // int32_t* slotIds = pReader->suppInfo.slotIds; - // for (int32_t i = 1; i < numOfCols; ++i) { - // ASSERT(colIds[i] == pReader->pSchema->columns[slotIds[i]].colId); - // if (IS_BSMA_ON(&(pReader->pSchema->columns[slotIds[i]]))) { - // if (pReader->suppInfo.pstatis[i].numOfNull == -1) { // set the column data are all NULL - // pReader->suppInfo.pstatis[i].numOfNull = pBlockInfo->compBlock->numOfRows; - // } + // update the number of NULL data rows + size_t numOfCols = blockDataGetNumOfCols(pReader->pResBlock); + int32_t* slotIds = pReader->suppInfo.slotIds; - // pReader->suppInfo.plist[i] = &pReader->suppInfo.pstatis[i]; - // } else { - // *allHave = false; - // } - // } + for (int32_t i = 1; i < numOfCols; ++i) { +// ASSERT(colIds[i] == pReader->pSchema->columns[slotIds[i]].colId); + if (IS_BSMA_ON(&(pReader->pSchema->columns[slotIds[i]]))) { + if (pReader->suppInfo.pstatis[i].numOfNull == -1) { // set the column data are all NULL +// pReader->suppInfo.pstatis[i].numOfNull = pBlockInfo->compBlock->numOfRows; + } - // int64_t elapsed = taosGetTimestampUs() - stime; - // pReader->cost.statisInfoLoadTime += elapsed; + pReader->suppInfo.plist[i] = &pReader->suppInfo.pstatis[i]; + } else { + *allHave = false; + } + } - // *pBlockStatis = pReader->suppInfo.plist; + int64_t elapsed = taosGetTimestampUs() - stime; + pReader->cost.smaLoadTime += elapsed; + + *pBlockStatis = pReader->suppInfo.plist; return code; } @@ -3286,8 +2835,6 @@ int32_t tsdbReaderReset(STsdbReader* pReader, SQueryTableDataCond* pCond, int32_ return TSDB_CODE_SUCCESS; } - setQueryTimewindow(pReader, pCond, tWinIdx); - pReader->order = pCond->order; pReader->type = BLOCK_LOAD_OFFSET_ORDER; pReader->status.loadFromFile = true; @@ -3324,114 +2871,72 @@ int32_t tsdbReaderReset(STsdbReader* pReader, SQueryTableDataCond* pCond, int32_ return code; } +static int32_t getBucketIndex(int32_t startRow, int32_t bucketRange, int32_t numOfRows) { + return (numOfRows - startRow) / bucketRange; +} + int32_t tsdbGetFileBlocksDistInfo(STsdbReader* pReader, STableBlockDistInfo* pTableBlockInfo) { - int32_t code = 0; - // pTableBlockInfo->totalSize = 0; - // pTableBlockInfo->totalRows = 0; + int32_t code = TSDB_CODE_SUCCESS; + pTableBlockInfo->totalSize = 0; + pTableBlockInfo->totalRows = 0; - // STsdbFS* pFileHandle = REPO_FS(pReader->pTsdb); + // find the start data block in file + SReaderStatus* pStatus = &pReader->status; - // // find the start data block in file - // pReader->locateStart = true; - // STsdbKeepCfg* pCfg = REPO_KEEP_CFG(pReader->pTsdb); - // int32_t fid = getFileIdFromKey(pReader->window.skey, pCfg->days, pCfg->precision); + STsdbCfg* pc = &pReader->pTsdb->pVnode->config.tsdbCfg; + pTableBlockInfo->defMinRows = pc->minRows; + pTableBlockInfo->defMaxRows = pc->maxRows; - // tsdbRLockFS(pFileHandle); - // tsdbFSIterInit(&pReader->fileIter, pFileHandle, pReader->order); - // tsdbFSIterSeek(&pReader->fileIter, fid); - // tsdbUnLockFS(pFileHandle); + int32_t bucketRange = ceil((pc->maxRows - pc->minRows) / 20.0); - // STsdbCfg* pc = REPO_CFG(pReader->pTsdb); - // pTableBlockInfo->defMinRows = pc->minRows; - // pTableBlockInfo->defMaxRows = pc->maxRows; + pTableBlockInfo->numOfFiles += 1; - // int32_t bucketRange = ceil((pc->maxRows - pc->minRows) / 20.0); + int32_t numOfTables = (int32_t)taosHashGetSize(pStatus->pTableMap); + int defaultRows = 4096; - // pTableBlockInfo->numOfFiles += 1; + SDataBlockIter* pBlockIter = &pStatus->blockIter; + pTableBlockInfo->numOfFiles += pStatus->fileIter.numOfFiles; + pTableBlockInfo->numOfBlocks += pBlockIter->numOfBlocks; - // int32_t code = TSDB_CODE_SUCCESS; - // int32_t numOfBlocks = 0; - // int32_t numOfTables = (int32_t)taosArrayGetSize(pReader->pTableCheckInfo); - // int defaultRows = 4096; - // STimeWindow win = TSWINDOW_INITIALIZER; + pTableBlockInfo->numOfTables = numOfTables; - // while (true) { - // numOfBlocks = 0; - // tsdbRLockFS(REPO_FS(pReader->pTsdb)); + while (true) { + bool hasNext = blockIteratorNext(&pStatus->blockIter); + if (hasNext) { + SFileDataBlockInfo* pFBlock = getCurrentBlockInfo(pBlockIter); + STableBlockScanInfo* pScanInfo = taosHashGet(pStatus->pTableMap, &pFBlock->uid, sizeof(pFBlock->uid)); + SBlock* pBlock = taosArrayGet(pScanInfo->pBlockList, pFBlock->tbBlockIdx); - // if ((pReader->pFileGroup = tsdbFSIterNext(&pReader->fileIter)) == NULL) { - // tsdbUnLockFS(REPO_FS(pReader->pTsdb)); - // break; - // } + int32_t numOfRows = pBlock->nRow; + pTableBlockInfo->totalRows += numOfRows; - // tsdbGetFidKeyRange(pCfg->days, pCfg->precision, pReader->pFileGroup->fid, &win.skey, &win.ekey); + if (numOfRows > pTableBlockInfo->maxRows) { + pTableBlockInfo->maxRows = numOfRows; + } - // // current file are not overlapped with query time window, ignore remain files - // if ((win.skey > pReader->window.ekey) /* || (!ascTraverse && win.ekey < pTsdbReadHandle->window.ekey)*/) { - // tsdbUnLockFS(REPO_FS(pReader->pTsdb)); - // tsdbDebug("%p remain files are not qualified for qrange:%" PRId64 "-%" PRId64 ", ignore, %s", pReader, - // pReader->window.skey, pReader->window.ekey, pReader->idStr); - // pReader->pFileGroup = NULL; - // break; - // } + if (numOfRows < pTableBlockInfo->minRows) { + pTableBlockInfo->minRows = numOfRows; + } - // pTableBlockInfo->numOfFiles += 1; - // if (tsdbSetAndOpenReadFSet(&pReader->rhelper, pReader->pFileGroup) < 0) { - // tsdbUnLockFS(REPO_FS(pReader->pTsdb)); - // code = terrno; - // break; - // } + if (numOfRows < defaultRows) { + pTableBlockInfo->numOfSmallBlocks += 1; + } - // tsdbUnLockFS(REPO_FS(pReader->pTsdb)); + int32_t bucketIndex = getBucketIndex(pTableBlockInfo->defMinRows, bucketRange, numOfRows); + pTableBlockInfo->blockRowsHisto[bucketIndex]++; + } else { + code = initForFirstBlockInFile(pReader, pBlockIter); + if ((code != TSDB_CODE_SUCCESS) || (pReader->status.loadFromFile == false)) { + break; + } - // if (tsdbLoadBlockIdx(&pReader->rhelper) < 0) { - // code = terrno; - // break; - // } + pTableBlockInfo->numOfBlocks += pBlockIter->numOfBlocks; + } - // if ((code = getFileCompInfo(pReader, &numOfBlocks)) != TSDB_CODE_SUCCESS) { - // break; - // } +// tsdbDebug("%p %d blocks found in file for %d table(s), fid:%d, %s", pReader, numOfBlocks, numOfTables, +// pReader->pFileGroup->fid, pReader->idStr); + } - // tsdbDebug("%p %d blocks found in file for %d table(s), fid:%d, %s", pReader, numOfBlocks, numOfTables, - // pReader->pFileGroup->fid, pReader->idStr); - - // if (numOfBlocks == 0) { - // continue; - // } - - // pTableBlockInfo->numOfBlocks += numOfBlocks; - - // for (int32_t i = 0; i < numOfTables; ++i) { - // STableBlockScanInfo* pCheckInfo = taosArrayGet(pReader->pTableCheckInfo, i); - - // SBlock* pBlock = pCheckInfo->pCompInfo->blocks; - - // for (int32_t j = 0; j < pCheckInfo->numOfBlocks; ++j) { - // pTableBlockInfo->totalSize += pBlock[j].len; - - // int32_t numOfRows = pBlock[j].numOfRows; - // pTableBlockInfo->totalRows += numOfRows; - - // if (numOfRows > pTableBlockInfo->maxRows) { - // pTableBlockInfo->maxRows = numOfRows; - // } - - // if (numOfRows < pTableBlockInfo->minRows) { - // pTableBlockInfo->minRows = numOfRows; - // } - - // if (numOfRows < defaultRows) { - // pTableBlockInfo->numOfSmallBlocks += 1; - // } - - // int32_t bucketIndex = getBucketIndex(pTableBlockInfo->defMinRows, bucketRange, numOfRows); - // pTableBlockInfo->blockRowsHisto[bucketIndex]++; - // } - // } - // } - - // pTableBlockInfo->numOfTables = numOfTables; return code; } diff --git a/source/dnode/vnode/src/tsdb/tsdbUtil.c b/source/dnode/vnode/src/tsdb/tsdbUtil.c index f906ef1b54..25c8598696 100644 --- a/source/dnode/vnode/src/tsdb/tsdbUtil.c +++ b/source/dnode/vnode/src/tsdb/tsdbUtil.c @@ -1230,10 +1230,26 @@ void tsdbCalcColDataSMA(SColData *pColData, SColumnDataAgg *pColAgg) { break; case TSDB_DATA_TYPE_SMALLINT: break; - case TSDB_DATA_TYPE_INT: + case TSDB_DATA_TYPE_INT: { + pColAgg->sum += colVal.value.i32; + if (pColAgg->min > colVal.value.i32) { + pColAgg->min = colVal.value.i32; + } + if (pColAgg->max < colVal.value.i32) { + pColAgg->max = colVal.value.i32; + } break; - case TSDB_DATA_TYPE_BIGINT: + } + case TSDB_DATA_TYPE_BIGINT: { + pColAgg->sum += colVal.value.i64; + if (pColAgg->min > colVal.value.i64) { + pColAgg->min = colVal.value.i64; + } + if (pColAgg->max < colVal.value.i64) { + pColAgg->max = colVal.value.i64; + } break; + } case TSDB_DATA_TYPE_FLOAT: break; case TSDB_DATA_TYPE_DOUBLE: From 9a0e15f1c182b7526c6a7ad633ae6f862b60ab72 Mon Sep 17 00:00:00 2001 From: Hongze Cheng Date: Thu, 7 Jul 2022 05:32:24 +0000 Subject: [PATCH 02/12] fix coredump --- source/dnode/vnode/src/tsdb/tsdbReaderWriter.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/dnode/vnode/src/tsdb/tsdbReaderWriter.c b/source/dnode/vnode/src/tsdb/tsdbReaderWriter.c index c22d1a4064..38bc5f855e 100644 --- a/source/dnode/vnode/src/tsdb/tsdbReaderWriter.c +++ b/source/dnode/vnode/src/tsdb/tsdbReaderWriter.c @@ -1178,7 +1178,7 @@ int32_t tsdbReadBlockSma(SDataFReader *pReader, SBlock *pBlock, SArray *aColumnD } // check - if (!taosCheckChecksumWhole(NULL, size)) { + if (!taosCheckChecksumWhole(*ppBuf, size)) { code = TSDB_CODE_FILE_CORRUPTED; goto _err; } From d9e5172357f546591d5abb1e5c4958d930d42685 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Thu, 7 Jul 2022 15:32:56 +0800 Subject: [PATCH 03/12] fix(query): set the correct sma data --- source/client/test/clientTests.cpp | 9 +-- source/dnode/vnode/src/tsdb/tsdbRead.c | 79 +++++++++++++------------ source/libs/function/src/builtinsimpl.c | 31 ++++------ 3 files changed, 56 insertions(+), 63 deletions(-) diff --git a/source/client/test/clientTests.cpp b/source/client/test/clientTests.cpp index b8e4dcbea0..69cca1441a 100644 --- a/source/client/test/clientTests.cpp +++ b/source/client/test/clientTests.cpp @@ -750,7 +750,7 @@ TEST(testCase, projection_query_stables) { taos_close(pConn); } - +#endif TEST(testCase, agg_query_tables) { TAOS* pConn = taos_connect("localhost", "root", "taosdata", NULL, 0); ASSERT_NE(pConn, nullptr); @@ -763,7 +763,7 @@ TEST(testCase, agg_query_tables) { } taos_free_result(pRes); - pRes = taos_query(pConn, "show table distributed st1"); + pRes = taos_query(pConn, "show table distributed tup"); if (taos_errno(pRes) != 0) { printf("failed to select from table, reason:%s\n", taos_errstr(pRes)); taos_free_result(pRes); @@ -775,6 +775,7 @@ TEST(testCase, agg_query_tables) { taos_close(pConn); } +#if 0 /* --- copy the following script in the shell to setup the environment --- @@ -820,7 +821,7 @@ TEST(testCase, async_api_test) { getchar(); taos_close(pConn); } -#endif + TEST(testCase, update_test) { TAOS* pConn = taos_connect("localhost", "root", "taosdata", NULL, 0); @@ -857,5 +858,5 @@ TEST(testCase, update_test) { taos_free_result(pRes); } } - +#endif #pragma GCC diagnostic pop diff --git a/source/dnode/vnode/src/tsdb/tsdbRead.c b/source/dnode/vnode/src/tsdb/tsdbRead.c index b7429a5b89..a1ac165797 100644 --- a/source/dnode/vnode/src/tsdb/tsdbRead.c +++ b/source/dnode/vnode/src/tsdb/tsdbRead.c @@ -55,7 +55,7 @@ typedef struct SIOCostSummary { } SIOCostSummary; typedef struct SBlockLoadSuppInfo { - SColumnDataAgg* pstatis; + SColumnDataAgg tsColAgg; SColumnDataAgg** plist; int16_t* colIds; // column ids for loading file block data int32_t* slotIds; // colId to slotId @@ -364,13 +364,14 @@ static int32_t tsdbReaderCreate(SVnode* pVnode, SQueryTableDataCond* pCond, STsd // allocate buffer in order to load data blocks from file SBlockLoadSuppInfo* pSup = &pReader->suppInfo; - pSup->pstatis = taosMemoryCalloc(pCond->numOfCols, sizeof(SColumnDataAgg)); pSup->plist = taosMemoryCalloc(pCond->numOfCols, POINTER_BYTES); - if (pSup->pstatis == NULL || pSup->plist == NULL) { + if (pSup->plist == NULL) { code = TSDB_CODE_OUT_OF_MEMORY; goto _end; } + pSup->tsColAgg.colId = PRIMARYKEY_TIMESTAMP_COL_ID; + pReader->pResBlock = createResBlock(pCond, pReader->capacity); if (pReader->pResBlock == NULL) { code = terrno; @@ -2647,8 +2648,6 @@ void tsdbReaderClose(STsdbReader* pReader) { } blockDataDestroy(pReader->pResBlock); - - taosMemoryFreeClear(pReader->suppInfo.pstatis); taosMemoryFreeClear(pReader->suppInfo.plist); taosMemoryFree(pReader->suppInfo.slotIds); @@ -2744,56 +2743,48 @@ int32_t tsdbRetrieveDataBlockStatisInfo(STsdbReader* pReader, SColumnDataAgg*** int64_t stime = taosGetTimestampUs(); + SArray* pColAgg = taosArrayInit(4, sizeof(SColumnDataAgg)); if (tBlockHasSma(pBlock)) { - SArray* pColAgg = taosArrayInit(4, sizeof(SColumnDataAgg)); code = tsdbReadBlockSma(pReader->pFileReader, pBlock, pColAgg, NULL); if (code != TSDB_CODE_SUCCESS) { tsdbDebug("vgId:%d, failed to load block SMA for uid %" PRIu64", code:%s, %s", 0, pFBlock->uid, tstrerror(code), pReader->idStr); return code; } + } else { + *pBlockStatis = NULL; + return TSDB_CODE_SUCCESS; } - int64_t el = taosGetTimestampUs() - stime; - tsdbDebug("vgId:%d, succeed to load block SMA for uid %" PRIu64", elapsed time:%"PRId64"us, %s", 0, pFBlock->uid, - el, pReader->idStr); - - // int16_t* colIds = pReader->suppInfo.defaultLoadColumn->pData; - - // size_t numOfCols = QH_GET_NUM_OF_COLS(pReader); - // memset(pReader->suppInfo.plist, 0, numOfCols * POINTER_BYTES); - // memset(pReader->suppInfo.pstatis, 0, numOfCols * sizeof(SColumnDataAgg)); - - // for (int32_t i = 0; i < numOfCols; ++i) { - // pReader->suppInfo.pstatis[i].colId = colIds[i]; - // } - - // *allHave = true; - // tsdbGetBlockStatis(&pReader->rhelper, pReader->suppInfo.pstatis, (int)numOfCols, pBlockInfo->compBlock); + *allHave = true; // always load the first primary timestamp column data - SColumnDataAgg* pTsAgg = &pReader->suppInfo.pstatis[0]; - assert(pTsAgg->colId == PRIMARYKEY_TIMESTAMP_COL_ID); + SColumnDataAgg* pTsAgg = &pReader->suppInfo.tsColAgg; - pTsAgg->numOfNull = 0; + pTsAgg->numOfNull = 0; + pTsAgg->colId = PRIMARYKEY_TIMESTAMP_COL_ID; pTsAgg->min = pReader->pResBlock->info.window.skey; pTsAgg->max = pReader->pResBlock->info.window.ekey; - pReader->suppInfo.plist[0] = &pReader->suppInfo.pstatis[0]; + pReader->suppInfo.plist[0] = pTsAgg; // update the number of NULL data rows size_t numOfCols = blockDataGetNumOfCols(pReader->pResBlock); - int32_t* slotIds = pReader->suppInfo.slotIds; - for (int32_t i = 1; i < numOfCols; ++i) { -// ASSERT(colIds[i] == pReader->pSchema->columns[slotIds[i]].colId); - if (IS_BSMA_ON(&(pReader->pSchema->columns[slotIds[i]]))) { - if (pReader->suppInfo.pstatis[i].numOfNull == -1) { // set the column data are all NULL -// pReader->suppInfo.pstatis[i].numOfNull = pBlockInfo->compBlock->numOfRows; + int32_t i = 0, j = 0; + while(j < numOfCols && i < taosArrayGetSize(pColAgg)) { + SColumnDataAgg* pAgg = taosArrayGet(pColAgg, i); + if (pAgg->colId == pReader->suppInfo.colIds[j]) { + if (IS_BSMA_ON(&(pReader->pSchema->columns[i]))) { + pReader->suppInfo.plist[j] = pAgg; + i += 1; + j += 1; + } else { + *allHave = false; } - - pReader->suppInfo.plist[i] = &pReader->suppInfo.pstatis[i]; - } else { - *allHave = false; + } else if (pAgg->colId < pReader->suppInfo.colIds[j]) { + i += 1; + } else if (pReader->suppInfo.colIds[j] < pAgg->colId) { + j += 1; } } @@ -2801,6 +2792,10 @@ int32_t tsdbRetrieveDataBlockStatisInfo(STsdbReader* pReader, SColumnDataAgg*** pReader->cost.smaLoadTime += elapsed; *pBlockStatis = pReader->suppInfo.plist; + + tsdbDebug("vgId:%d, succeed to load block SMA for uid %" PRIu64", elapsed time:%"PRId64"us, %s", 0, pFBlock->uid, + elapsed, pReader->idStr); + return code; } @@ -2843,7 +2838,7 @@ int32_t tsdbReaderReset(STsdbReader* pReader, SQueryTableDataCond* pCond, int32_ pReader->window = updateQueryTimeWindow(pReader->pTsdb, &pCond->twindows[tWinIdx]); // allocate buffer in order to load data blocks from file - memset(pReader->suppInfo.pstatis, 0, sizeof(SColumnDataAgg)); + memset(&pReader->suppInfo.tsColAgg, 0, sizeof(SColumnDataAgg)); memset(pReader->suppInfo.plist, 0, POINTER_BYTES); // todo set the correct numOfTables @@ -2899,9 +2894,9 @@ int32_t tsdbGetFileBlocksDistInfo(STsdbReader* pReader, STableBlockDistInfo* pTa pTableBlockInfo->numOfBlocks += pBlockIter->numOfBlocks; pTableBlockInfo->numOfTables = numOfTables; + bool hasNext = true; while (true) { - bool hasNext = blockIteratorNext(&pStatus->blockIter); if (hasNext) { SFileDataBlockInfo* pFBlock = getCurrentBlockInfo(pBlockIter); STableBlockScanInfo* pScanInfo = taosHashGet(pStatus->pTableMap, &pFBlock->uid, sizeof(pFBlock->uid)); @@ -2924,6 +2919,9 @@ int32_t tsdbGetFileBlocksDistInfo(STsdbReader* pReader, STableBlockDistInfo* pTa int32_t bucketIndex = getBucketIndex(pTableBlockInfo->defMinRows, bucketRange, numOfRows); pTableBlockInfo->blockRowsHisto[bucketIndex]++; + + hasNext = blockIteratorNext(&pStatus->blockIter); + } else { code = initForFirstBlockInFile(pReader, pBlockIter); if ((code != TSDB_CODE_SUCCESS) || (pReader->status.loadFromFile == false)) { @@ -2933,6 +2931,11 @@ int32_t tsdbGetFileBlocksDistInfo(STsdbReader* pReader, STableBlockDistInfo* pTa pTableBlockInfo->numOfBlocks += pBlockIter->numOfBlocks; } +/* + hasNext = blockIteratorNext(&pStatus->blockIter); +*/ + + // tsdbDebug("%p %d blocks found in file for %d table(s), fid:%d, %s", pReader, numOfBlocks, numOfTables, // pReader->pFileGroup->fid, pReader->idStr); } diff --git a/source/libs/function/src/builtinsimpl.c b/source/libs/function/src/builtinsimpl.c index f94522f0d8..03532caac0 100644 --- a/source/libs/function/src/builtinsimpl.c +++ b/source/libs/function/src/builtinsimpl.c @@ -5547,30 +5547,18 @@ int32_t blockDistFinalize(SqlFunctionCtx* pCtx, SSDataBlock* pBlock) { } } - int32_t delta = maxVal - minVal; - int32_t step = delta / 50; - if (step == 0) { - step = 1; - } + // maximum number of step is 80 + double factor = pData->numOfBlocks / 80.0; int32_t numOfBuckets = sizeof(pData->blockRowsHisto) / sizeof(pData->blockRowsHisto[0]); - int32_t bucketRange = (pData->maxRows - pData->minRows) / numOfBuckets; - - bool singleModel = false; - if (bucketRange == 0) { - singleModel = true; - step = 20; - bucketRange = (pData->defMaxRows - pData->defMinRows) / numOfBuckets; - } + int32_t bucketRange = (pData->defMaxRows - pData->defMinRows) / numOfBuckets; for (int32_t i = 0; i < tListLen(pData->blockRowsHisto); ++i) { - len = sprintf(st + VARSTR_HEADER_SIZE, "%04d |", pData->defMinRows + bucketRange * (i + 1)); + len = sprintf(st + VARSTR_HEADER_SIZE, "%04d |", pData->defMinRows + bucketRange * i); int32_t num = 0; - if (singleModel && pData->blockRowsHisto[i] > 0) { - num = 20; - } else { - num = (pData->blockRowsHisto[i] + step - 1) / step; + if (pData->blockRowsHisto[i] > 0) { + num = (pData->blockRowsHisto[i]) / factor; } for (int32_t j = 0; j < num; ++j) { @@ -5578,9 +5566,10 @@ int32_t blockDistFinalize(SqlFunctionCtx* pCtx, SSDataBlock* pBlock) { len += x; } - double v = pData->blockRowsHisto[i] * 100.0 / pData->numOfBlocks; - len += sprintf(st + VARSTR_HEADER_SIZE + len, " %d (%.2f%c)", pData->blockRowsHisto[i], v, '%'); - printf("%s\n", st); + if (num > 0) { + double v = pData->blockRowsHisto[i] * 100.0 / pData->numOfBlocks; + len += sprintf(st + VARSTR_HEADER_SIZE + len, " %d (%.2f%c)", pData->blockRowsHisto[i], v, '%'); + } varDataSetLen(st, len); colDataAppend(pColInfo, row++, st, false); From 44d7397e18a4869bbf043a92a793960f8be8df1a Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Thu, 7 Jul 2022 15:33:53 +0800 Subject: [PATCH 04/12] test:update the unit test. --- source/client/test/clientTests.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/source/client/test/clientTests.cpp b/source/client/test/clientTests.cpp index 69cca1441a..78a446eaad 100644 --- a/source/client/test/clientTests.cpp +++ b/source/client/test/clientTests.cpp @@ -750,7 +750,6 @@ TEST(testCase, projection_query_stables) { taos_close(pConn); } -#endif TEST(testCase, agg_query_tables) { TAOS* pConn = taos_connect("localhost", "root", "taosdata", NULL, 0); ASSERT_NE(pConn, nullptr); @@ -775,7 +774,6 @@ TEST(testCase, agg_query_tables) { taos_close(pConn); } -#if 0 /* --- copy the following script in the shell to setup the environment --- @@ -821,6 +819,7 @@ TEST(testCase, async_api_test) { getchar(); taos_close(pConn); } +#endif TEST(testCase, update_test) { From 6ac3ce0213fea80e19f11b262b68a48108e40823 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Thu, 7 Jul 2022 15:34:59 +0800 Subject: [PATCH 05/12] fix(query): fix syntax error. --- source/client/test/clientTests.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/source/client/test/clientTests.cpp b/source/client/test/clientTests.cpp index 78a446eaad..7927c1e008 100644 --- a/source/client/test/clientTests.cpp +++ b/source/client/test/clientTests.cpp @@ -857,5 +857,4 @@ TEST(testCase, update_test) { taos_free_result(pRes); } } -#endif #pragma GCC diagnostic pop From aab82fc0809d28e38683ef89397a4a99b990480a Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Thu, 7 Jul 2022 15:38:17 +0800 Subject: [PATCH 06/12] fix(query): check return code while loading sma --- source/libs/executor/src/scanoperator.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/source/libs/executor/src/scanoperator.c b/source/libs/executor/src/scanoperator.c index b3a00cd6f2..8b8d6903c1 100644 --- a/source/libs/executor/src/scanoperator.c +++ b/source/libs/executor/src/scanoperator.c @@ -210,7 +210,10 @@ static int32_t loadDataBlock(SOperatorInfo* pOperator, STableScanInfo* pTableSca bool allColumnsHaveAgg = true; SColumnDataAgg** pColAgg = NULL; - tsdbRetrieveDataBlockStatisInfo(pTableScanInfo->dataReader, &pColAgg, &allColumnsHaveAgg); + int32_t code = tsdbRetrieveDataBlockStatisInfo(pTableScanInfo->dataReader, &pColAgg, &allColumnsHaveAgg); + if (code != TSDB_CODE_SUCCESS) { + longjmp(pTaskInfo->env, code); + } if (allColumnsHaveAgg == true) { int32_t numOfCols = taosArrayGetSize(pBlock->pDataBlock); From 57bf509dee14cbe55dd0f3f036f88369c24fde02 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Thu, 7 Jul 2022 15:40:04 +0800 Subject: [PATCH 07/12] refactor: do some internal refactor. --- source/dnode/vnode/inc/vnode.h | 2 +- source/dnode/vnode/src/tsdb/tsdbRead.c | 2 +- source/libs/executor/src/executorimpl.c | 4 ++-- source/libs/executor/src/scanoperator.c | 4 ++-- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/source/dnode/vnode/inc/vnode.h b/source/dnode/vnode/inc/vnode.h index 6320f4719d..60a731a9a8 100644 --- a/source/dnode/vnode/inc/vnode.h +++ b/source/dnode/vnode/inc/vnode.h @@ -131,7 +131,7 @@ int32_t tsdbReaderOpen(SVnode *pVnode, SQueryTableDataCond *pCond, SArray *pTabl void tsdbReaderClose(STsdbReader *pReader); bool tsdbNextDataBlock(STsdbReader *pReader); void tsdbRetrieveDataBlockInfo(STsdbReader *pReader, SDataBlockInfo *pDataBlockInfo); -int32_t tsdbRetrieveDataBlockStatisInfo(STsdbReader *pReader, SColumnDataAgg ***pBlockStatis, bool *allHave); +int32_t tsdbRetrieveDatablockSMAInfo(STsdbReader *pReader, SColumnDataAgg ***pBlockStatis, bool *allHave); SArray *tsdbRetrieveDataBlock(STsdbReader *pTsdbReadHandle, SArray *pColumnIdList); int32_t tsdbReaderReset(STsdbReader *pReader, SQueryTableDataCond *pCond, int32_t tWinIdx); int32_t tsdbGetFileBlocksDistInfo(STsdbReader *pReader, STableBlockDistInfo *pTableBlockInfo); diff --git a/source/dnode/vnode/src/tsdb/tsdbRead.c b/source/dnode/vnode/src/tsdb/tsdbRead.c index a1ac165797..db8c348673 100644 --- a/source/dnode/vnode/src/tsdb/tsdbRead.c +++ b/source/dnode/vnode/src/tsdb/tsdbRead.c @@ -2727,7 +2727,7 @@ void tsdbRetrieveDataBlockInfo(STsdbReader* pReader, SDataBlockInfo* pDataBlockI pDataBlockInfo->window = pReader->pResBlock->info.window; } -int32_t tsdbRetrieveDataBlockStatisInfo(STsdbReader* pReader, SColumnDataAgg*** pBlockStatis, bool* allHave) { +int32_t tsdbRetrieveDatablockSMAInfo(STsdbReader* pReader, SColumnDataAgg*** pBlockStatis, bool* allHave) { int32_t code = 0; *allHave = false; diff --git a/source/libs/executor/src/executorimpl.c b/source/libs/executor/src/executorimpl.c index 52c8960102..1e72aee3e3 100644 --- a/source/libs/executor/src/executorimpl.c +++ b/source/libs/executor/src/executorimpl.c @@ -1130,7 +1130,7 @@ int32_t loadDataBlockOnDemand(SExecTaskInfo* pTaskInfo, STableScanInfo* pTableSc } else if ((*status) == BLK_DATA_SMA_LOAD) { // this function never returns error? pCost->loadBlockStatis += 1; -// tsdbRetrieveDataBlockStatisInfo(pTableScanInfo->pTsdbReadHandle, &pBlock->pBlockAgg); +// tsdbRetrieveDatablockSMAInfo(pTableScanInfo->pTsdbReadHandle, &pBlock->pBlockAgg); if (pBlock->pBlockAgg == NULL) { // data block statistics does not exist, load data block // pBlock->pDataBlock = tsdbRetrieveDataBlock(pTableScanInfo->pTsdbReadHandle, NULL); @@ -1141,7 +1141,7 @@ int32_t loadDataBlockOnDemand(SExecTaskInfo* pTaskInfo, STableScanInfo* pTableSc // load the data block statistics to perform further filter pCost->loadBlockStatis += 1; -// tsdbRetrieveDataBlockStatisInfo(pTableScanInfo->pTsdbReadHandle, &pBlock->pBlockAgg); +// tsdbRetrieveDatablockSMAInfo(pTableScanInfo->pTsdbReadHandle, &pBlock->pBlockAgg); if (pQueryAttr->topBotQuery && pBlock->pBlockAgg != NULL) { { // set previous window diff --git a/source/libs/executor/src/scanoperator.c b/source/libs/executor/src/scanoperator.c index 8b8d6903c1..eb7bdfebab 100644 --- a/source/libs/executor/src/scanoperator.c +++ b/source/libs/executor/src/scanoperator.c @@ -210,7 +210,7 @@ static int32_t loadDataBlock(SOperatorInfo* pOperator, STableScanInfo* pTableSca bool allColumnsHaveAgg = true; SColumnDataAgg** pColAgg = NULL; - int32_t code = tsdbRetrieveDataBlockStatisInfo(pTableScanInfo->dataReader, &pColAgg, &allColumnsHaveAgg); + int32_t code = tsdbRetrieveDatablockSMAInfo(pTableScanInfo->dataReader, &pColAgg, &allColumnsHaveAgg); if (code != TSDB_CODE_SUCCESS) { longjmp(pTaskInfo->env, code); } @@ -2230,7 +2230,7 @@ static int32_t loadDataBlockFromOneTable(SOperatorInfo* pOperator, STableMergeSc bool allColumnsHaveAgg = true; SColumnDataAgg** pColAgg = NULL; STsdbReader* reader = taosArrayGetP(pTableScanInfo->dataReaders, readerIdx); - tsdbRetrieveDataBlockStatisInfo(reader, &pColAgg, &allColumnsHaveAgg); + tsdbRetrieveDatablockSMAInfo(reader, &pColAgg, &allColumnsHaveAgg); if (allColumnsHaveAgg == true) { int32_t numOfCols = taosArrayGetSize(pBlock->pDataBlock); From 41be3e4d0468dbcdcc4c19054664eb589bfd807c Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Thu, 7 Jul 2022 15:56:43 +0800 Subject: [PATCH 08/12] refactor: do some internal refactor. --- source/dnode/vnode/inc/vnode.h | 2 +- source/dnode/vnode/src/tsdb/tsdbRead.c | 82 +++++++++++++------------ source/libs/executor/src/executorimpl.c | 4 +- source/libs/executor/src/scanoperator.c | 4 +- 4 files changed, 47 insertions(+), 45 deletions(-) diff --git a/source/dnode/vnode/inc/vnode.h b/source/dnode/vnode/inc/vnode.h index 60a731a9a8..59ee89762e 100644 --- a/source/dnode/vnode/inc/vnode.h +++ b/source/dnode/vnode/inc/vnode.h @@ -131,7 +131,7 @@ int32_t tsdbReaderOpen(SVnode *pVnode, SQueryTableDataCond *pCond, SArray *pTabl void tsdbReaderClose(STsdbReader *pReader); bool tsdbNextDataBlock(STsdbReader *pReader); void tsdbRetrieveDataBlockInfo(STsdbReader *pReader, SDataBlockInfo *pDataBlockInfo); -int32_t tsdbRetrieveDatablockSMAInfo(STsdbReader *pReader, SColumnDataAgg ***pBlockStatis, bool *allHave); +int32_t tsdbRetrieveDatablockSMA(STsdbReader *pReader, SColumnDataAgg ***pBlockStatis, bool *allHave); SArray *tsdbRetrieveDataBlock(STsdbReader *pTsdbReadHandle, SArray *pColumnIdList); int32_t tsdbReaderReset(STsdbReader *pReader, SQueryTableDataCond *pCond, int32_t tWinIdx); int32_t tsdbGetFileBlocksDistInfo(STsdbReader *pReader, STableBlockDistInfo *pTableBlockInfo); diff --git a/source/dnode/vnode/src/tsdb/tsdbRead.c b/source/dnode/vnode/src/tsdb/tsdbRead.c index db8c348673..d64fc15143 100644 --- a/source/dnode/vnode/src/tsdb/tsdbRead.c +++ b/source/dnode/vnode/src/tsdb/tsdbRead.c @@ -55,6 +55,7 @@ typedef struct SIOCostSummary { } SIOCostSummary; typedef struct SBlockLoadSuppInfo { + SArray* pColAgg; SColumnDataAgg tsColAgg; SColumnDataAgg** plist; int16_t* colIds; // column ids for loading file block data @@ -364,8 +365,9 @@ static int32_t tsdbReaderCreate(SVnode* pVnode, SQueryTableDataCond* pCond, STsd // allocate buffer in order to load data blocks from file SBlockLoadSuppInfo* pSup = &pReader->suppInfo; + pSup->pColAgg = taosArrayInit(4, sizeof(SColumnDataAgg)); pSup->plist = taosMemoryCalloc(pCond->numOfCols, POINTER_BYTES); - if (pSup->plist == NULL) { + if (pSup->pColAgg == NULL || pSup->plist == NULL) { code = TSDB_CODE_OUT_OF_MEMORY; goto _end; } @@ -2649,13 +2651,10 @@ void tsdbReaderClose(STsdbReader* pReader) { blockDataDestroy(pReader->pResBlock); taosMemoryFreeClear(pReader->suppInfo.plist); + + taosArrayDestroy(pReader->suppInfo.pColAgg); taosMemoryFree(pReader->suppInfo.slotIds); - if (!isEmptyQueryTimeWindow(&pReader->window)) { - // tsdbMayUnTakeMemSnapshot(pTsdbReadHandle); - } else { - ASSERT(pReader->status.pTableMap == NULL); - } #if 0 // if (pReader->status.pTableScanInfo != NULL) { // pReader->status.pTableScanInfo = destroyTableCheckInfo(pReader->status.pTableScanInfo); @@ -2727,7 +2726,7 @@ void tsdbRetrieveDataBlockInfo(STsdbReader* pReader, SDataBlockInfo* pDataBlockI pDataBlockInfo->window = pReader->pResBlock->info.window; } -int32_t tsdbRetrieveDatablockSMAInfo(STsdbReader* pReader, SColumnDataAgg*** pBlockStatis, bool* allHave) { +int32_t tsdbRetrieveDatablockSMA(STsdbReader* pReader, SColumnDataAgg*** pBlockStatis, bool* allHave) { int32_t code = 0; *allHave = false; @@ -2743,12 +2742,13 @@ int32_t tsdbRetrieveDatablockSMAInfo(STsdbReader* pReader, SColumnDataAgg*** pBl int64_t stime = taosGetTimestampUs(); - SArray* pColAgg = taosArrayInit(4, sizeof(SColumnDataAgg)); + SBlockLoadSuppInfo* pSup = &pReader->suppInfo; + if (tBlockHasSma(pBlock)) { - code = tsdbReadBlockSma(pReader->pFileReader, pBlock, pColAgg, NULL); + code = tsdbReadBlockSma(pReader->pFileReader, pBlock, pSup->pColAgg, NULL); if (code != TSDB_CODE_SUCCESS) { - tsdbDebug("vgId:%d, failed to load block SMA for uid %" PRIu64", code:%s, %s", 0, pFBlock->uid, - tstrerror(code), pReader->idStr); + tsdbDebug("vgId:%d, failed to load block SMA for uid %" PRIu64 ", code:%s, %s", 0, pFBlock->uid, tstrerror(code), + pReader->idStr); return code; } } else { @@ -2756,44 +2756,44 @@ int32_t tsdbRetrieveDatablockSMAInfo(STsdbReader* pReader, SColumnDataAgg*** pBl return TSDB_CODE_SUCCESS; } - *allHave = true; + *allHave = true; - // always load the first primary timestamp column data - SColumnDataAgg* pTsAgg = &pReader->suppInfo.tsColAgg; + // always load the first primary timestamp column data + SColumnDataAgg* pTsAgg = &pSup->tsColAgg; pTsAgg->numOfNull = 0; pTsAgg->colId = PRIMARYKEY_TIMESTAMP_COL_ID; - pTsAgg->min = pReader->pResBlock->info.window.skey; - pTsAgg->max = pReader->pResBlock->info.window.ekey; - pReader->suppInfo.plist[0] = pTsAgg; + pTsAgg->min = pReader->pResBlock->info.window.skey; + pTsAgg->max = pReader->pResBlock->info.window.ekey; + pSup->plist[0] = pTsAgg; - // update the number of NULL data rows - size_t numOfCols = blockDataGetNumOfCols(pReader->pResBlock); + // update the number of NULL data rows + size_t numOfCols = blockDataGetNumOfCols(pReader->pResBlock); - int32_t i = 0, j = 0; - while(j < numOfCols && i < taosArrayGetSize(pColAgg)) { - SColumnDataAgg* pAgg = taosArrayGet(pColAgg, i); - if (pAgg->colId == pReader->suppInfo.colIds[j]) { - if (IS_BSMA_ON(&(pReader->pSchema->columns[i]))) { - pReader->suppInfo.plist[j] = pAgg; - i += 1; - j += 1; - } else { - *allHave = false; - } - } else if (pAgg->colId < pReader->suppInfo.colIds[j]) { - i += 1; - } else if (pReader->suppInfo.colIds[j] < pAgg->colId) { - j += 1; - } - } + int32_t i = 0, j = 0; + while (j < numOfCols && i < taosArrayGetSize(pSup->pColAgg)) { + SColumnDataAgg* pAgg = taosArrayGet(pSup->pColAgg, i); + if (pAgg->colId == pSup->colIds[j]) { + if (IS_BSMA_ON(&(pReader->pSchema->columns[i]))) { + pSup->plist[j] = pAgg; + i += 1; + j += 1; + } else { + *allHave = false; + } + } else if (pAgg->colId < pSup->colIds[j]) { + i += 1; + } else if (pSup->colIds[j] < pAgg->colId) { + j += 1; + } + } - int64_t elapsed = taosGetTimestampUs() - stime; - pReader->cost.smaLoadTime += elapsed; + int64_t elapsed = taosGetTimestampUs() - stime; + pReader->cost.smaLoadTime += elapsed; - *pBlockStatis = pReader->suppInfo.plist; + *pBlockStatis = pSup->plist; - tsdbDebug("vgId:%d, succeed to load block SMA for uid %" PRIu64", elapsed time:%"PRId64"us, %s", 0, pFBlock->uid, + tsdbDebug("vgId:%d, succeed to load block SMA for uid %" PRIu64 ", elapsed time:%" PRId64 "us, %s", 0, pFBlock->uid, elapsed, pReader->idStr); return code; @@ -2841,6 +2841,8 @@ int32_t tsdbReaderReset(STsdbReader* pReader, SQueryTableDataCond* pCond, int32_ memset(&pReader->suppInfo.tsColAgg, 0, sizeof(SColumnDataAgg)); memset(pReader->suppInfo.plist, 0, POINTER_BYTES); + pReader->suppInfo.tsColAgg.colId = PRIMARYKEY_TIMESTAMP_COL_ID; + // todo set the correct numOfTables int32_t numOfTables = 1; SDataBlockIter* pBlockIter = &pReader->status.blockIter; diff --git a/source/libs/executor/src/executorimpl.c b/source/libs/executor/src/executorimpl.c index 1e72aee3e3..ee6b58c3fb 100644 --- a/source/libs/executor/src/executorimpl.c +++ b/source/libs/executor/src/executorimpl.c @@ -1130,7 +1130,7 @@ int32_t loadDataBlockOnDemand(SExecTaskInfo* pTaskInfo, STableScanInfo* pTableSc } else if ((*status) == BLK_DATA_SMA_LOAD) { // this function never returns error? pCost->loadBlockStatis += 1; -// tsdbRetrieveDatablockSMAInfo(pTableScanInfo->pTsdbReadHandle, &pBlock->pBlockAgg); +// tsdbRetrieveDatablockSMA(pTableScanInfo->pTsdbReadHandle, &pBlock->pBlockAgg); if (pBlock->pBlockAgg == NULL) { // data block statistics does not exist, load data block // pBlock->pDataBlock = tsdbRetrieveDataBlock(pTableScanInfo->pTsdbReadHandle, NULL); @@ -1141,7 +1141,7 @@ int32_t loadDataBlockOnDemand(SExecTaskInfo* pTaskInfo, STableScanInfo* pTableSc // load the data block statistics to perform further filter pCost->loadBlockStatis += 1; -// tsdbRetrieveDatablockSMAInfo(pTableScanInfo->pTsdbReadHandle, &pBlock->pBlockAgg); +// tsdbRetrieveDatablockSMA(pTableScanInfo->pTsdbReadHandle, &pBlock->pBlockAgg); if (pQueryAttr->topBotQuery && pBlock->pBlockAgg != NULL) { { // set previous window diff --git a/source/libs/executor/src/scanoperator.c b/source/libs/executor/src/scanoperator.c index eb7bdfebab..c6b4938ac7 100644 --- a/source/libs/executor/src/scanoperator.c +++ b/source/libs/executor/src/scanoperator.c @@ -210,7 +210,7 @@ static int32_t loadDataBlock(SOperatorInfo* pOperator, STableScanInfo* pTableSca bool allColumnsHaveAgg = true; SColumnDataAgg** pColAgg = NULL; - int32_t code = tsdbRetrieveDatablockSMAInfo(pTableScanInfo->dataReader, &pColAgg, &allColumnsHaveAgg); + int32_t code = tsdbRetrieveDatablockSMA(pTableScanInfo->dataReader, &pColAgg, &allColumnsHaveAgg); if (code != TSDB_CODE_SUCCESS) { longjmp(pTaskInfo->env, code); } @@ -2230,7 +2230,7 @@ static int32_t loadDataBlockFromOneTable(SOperatorInfo* pOperator, STableMergeSc bool allColumnsHaveAgg = true; SColumnDataAgg** pColAgg = NULL; STsdbReader* reader = taosArrayGetP(pTableScanInfo->dataReaders, readerIdx); - tsdbRetrieveDatablockSMAInfo(reader, &pColAgg, &allColumnsHaveAgg); + tsdbRetrieveDatablockSMA(reader, &pColAgg, &allColumnsHaveAgg); if (allColumnsHaveAgg == true) { int32_t numOfCols = taosArrayGetSize(pBlock->pDataBlock); From dd4f67df2fe61ddf9f5c2e86e6df3f70b3487f42 Mon Sep 17 00:00:00 2001 From: Hongze Cheng Date: Thu, 7 Jul 2022 08:01:01 +0000 Subject: [PATCH 09/12] fix: sma read bug --- source/dnode/vnode/src/tsdb/tsdbReaderWriter.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/source/dnode/vnode/src/tsdb/tsdbReaderWriter.c b/source/dnode/vnode/src/tsdb/tsdbReaderWriter.c index 38bc5f855e..0cf56621aa 100644 --- a/source/dnode/vnode/src/tsdb/tsdbReaderWriter.c +++ b/source/dnode/vnode/src/tsdb/tsdbReaderWriter.c @@ -1152,7 +1152,7 @@ _err: int32_t tsdbReadBlockSma(SDataFReader *pReader, SBlock *pBlock, SArray *aColumnDataAgg, uint8_t **ppBuf) { int32_t code = 0; TdFilePtr pFD = pReader->pSmaFD; - int64_t offset = pBlock->aSubBlock[0].offset; + int64_t offset = pBlock->aSubBlock[0].sOffset; int64_t size = pBlock->aSubBlock[0].nSma * sizeof(SColumnDataAgg) + sizeof(TSCKSUM); uint8_t *pBuf = NULL; int64_t n; @@ -1175,6 +1175,9 @@ int32_t tsdbReadBlockSma(SDataFReader *pReader, SBlock *pBlock, SArray *aColumnD if (n < 0) { code = TAOS_SYSTEM_ERROR(errno); goto _err; + } else if (n < size) { + code = TSDB_CODE_FILE_CORRUPTED; + goto _err; } // check From 211985f03ea41426f7e88d4cc777d212baef2eea Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Thu, 7 Jul 2022 17:23:54 +0800 Subject: [PATCH 10/12] refactor: do some internal refactor. --- include/common/tcommon.h | 2 -- source/dnode/vnode/src/tsdb/tsdbRead.c | 4 ++-- source/libs/executor/src/executorimpl.c | 6 +++--- source/libs/function/src/builtinsimpl.c | 20 ++++++++++++++++++-- tests/system-test/failed.txt | 1 + 5 files changed, 24 insertions(+), 9 deletions(-) create mode 100644 tests/system-test/failed.txt diff --git a/include/common/tcommon.h b/include/common/tcommon.h index fd4ed6b180..7eff58d28f 100644 --- a/include/common/tcommon.h +++ b/include/common/tcommon.h @@ -88,8 +88,6 @@ typedef struct { #pragma pack(push, 1) typedef struct SColumnDataAgg { int16_t colId; - int16_t minIndex; - int16_t maxIndex; int16_t numOfNull; int64_t sum; int64_t max; diff --git a/source/dnode/vnode/src/tsdb/tsdbRead.c b/source/dnode/vnode/src/tsdb/tsdbRead.c index bd6306e48d..377561baa4 100644 --- a/source/dnode/vnode/src/tsdb/tsdbRead.c +++ b/source/dnode/vnode/src/tsdb/tsdbRead.c @@ -2801,11 +2801,11 @@ int32_t tsdbRetrieveDatablockSMA(STsdbReader* pReader, SColumnDataAgg*** pBlockS if (pAgg->colId == pSup->colIds[j]) { if (IS_BSMA_ON(&(pReader->pSchema->columns[i]))) { pSup->plist[j] = pAgg; - i += 1; - j += 1; } else { *allHave = false; } + i += 1; + j += 1; } else if (pAgg->colId < pSup->colIds[j]) { i += 1; } else if (pSup->colIds[j] < pAgg->colId) { diff --git a/source/libs/executor/src/executorimpl.c b/source/libs/executor/src/executorimpl.c index c3a125da66..f1cca70212 100644 --- a/source/libs/executor/src/executorimpl.c +++ b/source/libs/executor/src/executorimpl.c @@ -741,10 +741,10 @@ static int32_t doCreateConstantValColumnAggInfo(SInputColumnInfoData* pInput, SF if (type == TSDB_DATA_TYPE_BIGINT) { int64_t v = pFuncParam->param.i; - *da = (SColumnDataAgg){.numOfNull = 0, .min = v, .max = v, .maxIndex = 0, .minIndex = 0, .sum = v * numOfRows}; + *da = (SColumnDataAgg){.numOfNull = 0, .min = v, .max = v, .sum = v * numOfRows}; } else if (type == TSDB_DATA_TYPE_DOUBLE) { double v = pFuncParam->param.d; - *da = (SColumnDataAgg){.numOfNull = 0, .maxIndex = 0, .minIndex = 0}; + *da = (SColumnDataAgg){.numOfNull = 0}; *(double*)&da->min = v; *(double*)&da->max = v; @@ -752,7 +752,7 @@ static int32_t doCreateConstantValColumnAggInfo(SInputColumnInfoData* pInput, SF } else if (type == TSDB_DATA_TYPE_BOOL) { // todo validate this data type bool v = pFuncParam->param.i; - *da = (SColumnDataAgg){.numOfNull = 0, .maxIndex = 0, .minIndex = 0}; + *da = (SColumnDataAgg){.numOfNull = 0}; *(bool*)&da->min = 0; *(bool*)&da->max = v; *(bool*)&da->sum = v * numOfRows; diff --git a/source/libs/function/src/builtinsimpl.c b/source/libs/function/src/builtinsimpl.c index 03532caac0..d8fcca30c0 100644 --- a/source/libs/function/src/builtinsimpl.c +++ b/source/libs/function/src/builtinsimpl.c @@ -1080,6 +1080,19 @@ bool getMinmaxFuncEnv(SFunctionNode* UNUSED_PARAM(pFunc), SFuncExecEnv* pEnv) { static void saveTupleData(SqlFunctionCtx* pCtx, int32_t rowIndex, const SSDataBlock* pSrcBlock, STuplePos* pPos); static void copyTupleData(SqlFunctionCtx* pCtx, int32_t rowIndex, const SSDataBlock* pSrcBlock, STuplePos* pPos); +static int32_t findRowIndex(int32_t start, int32_t num, SColumnInfoData* pCol, const char* tval) { + // the data is loaded, not only the block SMA value + for(int32_t i = start; i < num + start; ++i) { + char* p = colDataGetData(pCol, i); + if (memcpy((void*)tval, p, pCol->info.bytes) == 0) { + return i; + } + } + + ASSERT(0); +} + + int32_t doMinMaxHelper(SqlFunctionCtx* pCtx, int32_t isMinFunc) { int32_t numOfElems = 0; @@ -1111,15 +1124,14 @@ int32_t doMinMaxHelper(SqlFunctionCtx* pCtx, int32_t isMinFunc) { if (isMinFunc) { tval = &pInput->pColumnDataAgg[0]->min; - index = pInput->pColumnDataAgg[0]->minIndex; } else { tval = &pInput->pColumnDataAgg[0]->max; - index = pInput->pColumnDataAgg[0]->maxIndex; } if (!pBuf->assign) { pBuf->v = *(int64_t*)tval; if (pCtx->subsidiaries.num > 0) { + index = findRowIndex(pInput->startRowIndex, pInput->numOfRows, pCol, tval); saveTupleData(pCtx, index, pCtx->pSrcBlock, &pBuf->tuplePos); } } else { @@ -1131,6 +1143,7 @@ int32_t doMinMaxHelper(SqlFunctionCtx* pCtx, int32_t isMinFunc) { if ((prev < val) ^ isMinFunc) { pBuf->v = val; if (pCtx->subsidiaries.num > 0) { + index = findRowIndex(pInput->startRowIndex, pInput->numOfRows, pCol, tval); saveTupleData(pCtx, index, pCtx->pSrcBlock, &pBuf->tuplePos); } } @@ -1143,6 +1156,7 @@ int32_t doMinMaxHelper(SqlFunctionCtx* pCtx, int32_t isMinFunc) { if ((prev < val) ^ isMinFunc) { pBuf->v = val; if (pCtx->subsidiaries.num > 0) { + index = findRowIndex(pInput->startRowIndex, pInput->numOfRows, pCol, tval); saveTupleData(pCtx, index, pCtx->pSrcBlock, &pBuf->tuplePos); } } @@ -1154,6 +1168,7 @@ int32_t doMinMaxHelper(SqlFunctionCtx* pCtx, int32_t isMinFunc) { if ((prev < val) ^ isMinFunc) { pBuf->v = val; if (pCtx->subsidiaries.num > 0) { + index = findRowIndex(pInput->startRowIndex, pInput->numOfRows, pCol, tval); saveTupleData(pCtx, index, pCtx->pSrcBlock, &pBuf->tuplePos); } } @@ -1167,6 +1182,7 @@ int32_t doMinMaxHelper(SqlFunctionCtx* pCtx, int32_t isMinFunc) { } if (pCtx->subsidiaries.num > 0) { + index = findRowIndex(pInput->startRowIndex, pInput->numOfRows, pCol, tval); saveTupleData(pCtx, index, pCtx->pSrcBlock, &pBuf->tuplePos); } } diff --git a/tests/system-test/failed.txt b/tests/system-test/failed.txt new file mode 100644 index 0000000000..d0b66b1769 --- /dev/null +++ b/tests/system-test/failed.txt @@ -0,0 +1 @@ +#python3 ./test.py -f 2-query/last.py -Q 3 From 5c2e98544d3cb15d724ba4031e92af8ee392f642 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Thu, 7 Jul 2022 22:40:20 +0800 Subject: [PATCH 11/12] fix(query): check if the data block overlaps with the delete skyline --- source/dnode/vnode/src/tsdb/tsdbRead.c | 43 +++++++++++++++++++++----- 1 file changed, 36 insertions(+), 7 deletions(-) diff --git a/source/dnode/vnode/src/tsdb/tsdbRead.c b/source/dnode/vnode/src/tsdb/tsdbRead.c index 377561baa4..43315073aa 100644 --- a/source/dnode/vnode/src/tsdb/tsdbRead.c +++ b/source/dnode/vnode/src/tsdb/tsdbRead.c @@ -1416,29 +1416,58 @@ static bool keyOverlapFileBlock(TSDBKEY key, SBlock* pBlock, SVersionRange* pVer (pBlock->minVersion <= pVerRange->maxVer); } +static bool overlapWithDelSkyline(STableBlockScanInfo* pBlockScanInfo, const SBlock* pBlock) { + if (pBlockScanInfo->delSkyline == NULL) { + return false; + } + + TSDBKEY* pFirst = taosArrayGet(pBlockScanInfo->delSkyline, 0); + TSDBKEY* pLast = taosArrayGetLast(pBlockScanInfo->delSkyline); + + // ts is not overlap + if (pBlock->minKey.ts > pLast->ts || pBlock->maxKey.ts < pFirst->ts) { + return false; + } + + // version is not overlap + size_t num = taosArrayGetSize(pBlockScanInfo->delSkyline); + for(int32_t i = pBlockScanInfo->fileDelIndex; i < num; ++i) { + TSDBKEY* p = taosArrayGet(pBlockScanInfo->delSkyline, i); + if (p->ts >= pBlock->minKey.ts && p->ts <= pBlock->maxKey.ts) { + if (p->version >= pBlock->minVersion) { + return true; + } + } else if (p->ts > pBlock->maxKey.ts) { + return false; + } + } + + ASSERT(0); + return false; +} + // 1. the version of all rows should be less than the endVersion // 2. current block should not overlap with next neighbor block // 3. current timestamp should not be overlap with each other // 4. output buffer should be large enough to hold all rows in current block +// 5. delete info should not overlap with current block data static bool fileBlockShouldLoad(STsdbReader* pReader, SFileDataBlockInfo* pFBlock, SBlock* pBlock, STableBlockScanInfo* pScanInfo, TSDBKEY key) { int32_t neighborIndex = 0; SBlock* pNeighbor = getNeighborBlockOfSameTable(pFBlock, pScanInfo, &neighborIndex, pReader->order); + // overlap with neighbor bool overlapWithNeighbor = false; if (pNeighbor) { overlapWithNeighbor = overlapWithNeighborBlock(pBlock, pNeighbor, pReader->order); } - bool hasDup = false; - if (pBlock->nSubBlock == 1) { - hasDup = pBlock->hasDup; - } else { - hasDup = true; - } + // has duplicated ts of different version in this block + bool hasDup = (pBlock->nSubBlock == 1)? pBlock->hasDup:true; + bool overlapWithDel= overlapWithDelSkyline(pScanInfo, pBlock); return (overlapWithNeighbor || hasDup || dataBlockPartiallyRequired(&pReader->window, &pReader->verRange, pBlock) || - keyOverlapFileBlock(key, pBlock, &pReader->verRange) || (pBlock->nRow > pReader->capacity)); + keyOverlapFileBlock(key, pBlock, &pReader->verRange) || (pBlock->nRow > pReader->capacity) || overlapWithDel); } static int32_t buildDataBlockFromBuf(STsdbReader* pReader, STableBlockScanInfo* pBlockScanInfo, int64_t endKey) { From c3398006117274e509bd3ddf70c2cad19196f920 Mon Sep 17 00:00:00 2001 From: Haojun Liao Date: Fri, 8 Jul 2022 11:02:12 +0800 Subject: [PATCH 12/12] fix(query): handle delete duration generating data block. --- source/dnode/vnode/src/tsdb/tsdbRead.c | 115 +++++++++++++++++++------ 1 file changed, 90 insertions(+), 25 deletions(-) diff --git a/source/dnode/vnode/src/tsdb/tsdbRead.c b/source/dnode/vnode/src/tsdb/tsdbRead.c index 43315073aa..53579474df 100644 --- a/source/dnode/vnode/src/tsdb/tsdbRead.c +++ b/source/dnode/vnode/src/tsdb/tsdbRead.c @@ -137,7 +137,7 @@ static int32_t doMergeRowsInBuf(SIterInfo* pIter, int64_t ts, SArray* pDelList, static int32_t doAppendOneRow(SSDataBlock* pBlock, STsdbReader* pReader, STSRow* pTSRow); static void setComposedBlockFlag(STsdbReader* pReader, bool composed); static void updateSchema(TSDBROW* pRow, uint64_t uid, STsdbReader* pReader); -static bool hasBeenDropped(const SArray* pDelList, int32_t* index, TSDBKEY* pKey); +static bool hasBeenDropped(const SArray* pDelList, int32_t* index, TSDBKEY* pKey, int32_t order); static void doMergeMultiRows(TSDBROW* pRow, uint64_t uid, SIterInfo* pIter, SArray* pDelList, STSRow** pTSRow, STsdbReader* pReader); @@ -1416,7 +1416,7 @@ static bool keyOverlapFileBlock(TSDBKEY key, SBlock* pBlock, SVersionRange* pVer (pBlock->minVersion <= pVerRange->maxVer); } -static bool overlapWithDelSkyline(STableBlockScanInfo* pBlockScanInfo, const SBlock* pBlock) { +static bool overlapWithDelSkyline(STableBlockScanInfo* pBlockScanInfo, const SBlock* pBlock, int32_t order) { if (pBlockScanInfo->delSkyline == NULL) { return false; } @@ -1429,9 +1429,11 @@ static bool overlapWithDelSkyline(STableBlockScanInfo* pBlockScanInfo, const SBl return false; } + int32_t step = ASCENDING_TRAVERSE(order)? 1:-1; + // version is not overlap size_t num = taosArrayGetSize(pBlockScanInfo->delSkyline); - for(int32_t i = pBlockScanInfo->fileDelIndex; i < num; ++i) { + for(int32_t i = pBlockScanInfo->fileDelIndex; i < num; i += step) { TSDBKEY* p = taosArrayGet(pBlockScanInfo->delSkyline, i); if (p->ts >= pBlock->minKey.ts && p->ts <= pBlock->maxKey.ts) { if (p->version >= pBlock->minVersion) { @@ -1464,7 +1466,7 @@ static bool fileBlockShouldLoad(STsdbReader* pReader, SFileDataBlockInfo* pFBloc // has duplicated ts of different version in this block bool hasDup = (pBlock->nSubBlock == 1)? pBlock->hasDup:true; - bool overlapWithDel= overlapWithDelSkyline(pScanInfo, pBlock); + bool overlapWithDel= overlapWithDelSkyline(pScanInfo, pBlock, pReader->order); return (overlapWithNeighbor || hasDup || dataBlockPartiallyRequired(&pReader->window, &pReader->verRange, pBlock) || keyOverlapFileBlock(key, pBlock, &pReader->verRange) || (pBlock->nRow > pReader->capacity) || overlapWithDel); @@ -1691,7 +1693,7 @@ static bool isValidFileBlockRow(SBlockData* pBlockData, SFileBlockDumpInfo* pDum } TSDBKEY k = {.ts = ts, .version = ver}; - if (hasBeenDropped(pBlockScanInfo->delSkyline, &pBlockScanInfo->fileDelIndex, &k)) { + if (hasBeenDropped(pBlockScanInfo->delSkyline, &pBlockScanInfo->fileDelIndex, &k, pReader->order)) { return false; } @@ -2220,41 +2222,104 @@ static SVersionRange getQueryVerRange(SVnode* pVnode, SQueryTableDataCond* pCond // taosArrayPush(pTsdbReadHandle->pTableCheckInfo, &info); // } -bool hasBeenDropped(const SArray* pDelList, int32_t* index, TSDBKEY* pKey) { +bool hasBeenDropped(const SArray* pDelList, int32_t* index, TSDBKEY* pKey, int32_t order) { ASSERT(pKey != NULL); if (pDelList == NULL) { return false; } + size_t num = taosArrayGetSize(pDelList); + bool asc = ASCENDING_TRAVERSE(order); + int32_t step = asc? 1:-1; - if (*index >= taosArrayGetSize(pDelList) - 1) { - TSDBKEY* last = taosArrayGetLast(pDelList); - if (pKey->ts > last->ts) { - return false; - } else if (pKey->ts == last->ts) { - size_t size = taosArrayGetSize(pDelList); - TSDBKEY* prev = taosArrayGet(pDelList, size - 2); - if (prev->version >= pKey->version) { - return true; - } else { + if (asc) { + if (*index >= num - 1) { + TSDBKEY* last = taosArrayGetLast(pDelList); + ASSERT(pKey->ts >= last->ts); + + if (pKey->ts > last->ts) { return false; + } else if (pKey->ts == last->ts) { + TSDBKEY* prev = taosArrayGet(pDelList, num - 2); + return (prev->version >= pKey->version); } } else { - ASSERT(0); + TSDBKEY* pCurrent = taosArrayGet(pDelList, *index); + TSDBKEY* pNext = taosArrayGet(pDelList, (*index) + 1); + + if (pKey->ts < pCurrent->ts) { + return false; + } + + if (pCurrent->ts <= pKey->ts && pNext->ts >= pKey->ts && pCurrent->version >= pKey->version) { + return true; + } + + while (pNext->ts <= pKey->ts && (*index) < num - 1) { + (*index) += 1; + + if ((*index) < num - 1) { + pCurrent = taosArrayGet(pDelList, *index); + pNext = taosArrayGet(pDelList, (*index) + 1); + + // it is not a consecutive deletion range, ignore it + if (pCurrent->version == 0 && pNext->version > 0) { + continue; + } + + if (pCurrent->ts <= pKey->ts && pNext->ts >= pKey->ts && pCurrent->version >= pKey->version) { + return true; + } + } + } + + return false; } } else { - TSDBKEY* pCurrent = taosArrayGet(pDelList, *index); - TSDBKEY* pNext = taosArrayGet(pDelList, (*index) + 1); + if (*index <= 0) { + TSDBKEY* pFirst = taosArrayGet(pDelList, 0); - if (pCurrent->ts <= pKey->ts && pNext->ts >= pKey->ts && pCurrent->version >= pKey->version) { - return true; + if (pKey->ts < pFirst->ts) { + return false; + } else if (pKey->ts == pFirst->ts) { + return pFirst->version >= pKey->version; + } else { + ASSERT(0); + } } else { - while (pNext->ts < pKey->ts && (*index) < taosArrayGetSize(pDelList) - 1) { - (*index) += 1; + TSDBKEY* pCurrent = taosArrayGet(pDelList, *index); + TSDBKEY* pPrev = taosArrayGet(pDelList, (*index) - 1); + + if (pKey->ts > pCurrent->ts) { + return false; + } + + if (pPrev->ts <= pKey->ts && pCurrent->ts >= pKey->ts && pPrev->version >= pKey->version) { + return true; + } + + while (pPrev->ts >= pKey->ts && (*index) > 1) { + (*index) += step; + + if ((*index) >= 1) { + pCurrent = taosArrayGet(pDelList, *index); + pPrev = taosArrayGet(pDelList, (*index) - 1); + + // it is not a consecutive deletion range, ignore it + if (pCurrent->version > 0 && pPrev->version == 0) { + continue; + } + + if (pPrev->ts <= pKey->ts && pCurrent->ts >= pKey->ts && pPrev->version >= pKey->version) { + return true; + } + } } return false; } } + + return false; } TSDBROW* getValidRow(SIterInfo* pIter, const SArray* pDelList, STsdbReader* pReader) { @@ -2271,7 +2336,7 @@ TSDBROW* getValidRow(SIterInfo* pIter, const SArray* pDelList, STsdbReader* pRea // it is a valid data version if ((key.version <= pReader->verRange.maxVer && key.version >= pReader->verRange.minVer) && - (!hasBeenDropped(pDelList, &pIter->index, &key))) { + (!hasBeenDropped(pDelList, &pIter->index, &key, pReader->order))) { return pRow; } @@ -2290,7 +2355,7 @@ TSDBROW* getValidRow(SIterInfo* pIter, const SArray* pDelList, STsdbReader* pRea } if (key.version <= pReader->verRange.maxVer && key.version >= pReader->verRange.minVer && - (!hasBeenDropped(pDelList, &pIter->index, &key))) { + (!hasBeenDropped(pDelList, &pIter->index, &key, pReader->order))) { return pRow; } }