diff --git a/include/common/tdataformat.h b/include/common/tdataformat.h index 3679b3773b..af7c88acde 100644 --- a/include/common/tdataformat.h +++ b/include/common/tdataformat.h @@ -38,22 +38,18 @@ typedef struct STagVal STagVal; typedef struct STag STag; // bitmap -#define N1(n) ((1 << (n)) - 1) -#define BIT1_SIZE(n) (((n)-1) / 8 + 1) -#define BIT2_SIZE(n) (((n)-1) / 4 + 1) -#define SET_BIT1(p, i, v) \ - do { \ - (p)[(i) / 8] &= N1((i) % 8); \ - (p)[(i) / 8] |= (((uint8_t)(v)) << (((i) % 8))); \ - } while (0) +const static uint8_t BIT2_MAP[4][4] = {{0b00000000, 0b00000001, 0b00000010, 0}, + {0b00000000, 0b00000100, 0b00001000, 2}, + {0b00000000, 0b00010000, 0b00100000, 4}, + {0b00000000, 0b01000000, 0b10000000, 6}}; -#define GET_BIT1(p, i) (((p)[(i) / 8] >> ((i) % 8)) & ((uint8_t)1)) -#define SET_BIT2(p, i, v) \ - do { \ - p[(i) / 4] &= N1((i) % 4 * 2); \ - (p)[(i) / 4] |= (((uint8_t)(v)) << (((i) % 4) * 2)); \ - } while (0) -#define GET_BIT2(p, i) (((p)[(i) / 4] >> (((i) % 4) * 2)) & ((uint8_t)3)) +#define N1(n) ((((uint8_t)1) << (n)) - 1) +#define BIT1_SIZE(n) ((((n)-1) >> 3) + 1) +#define BIT2_SIZE(n) ((((n)-1) >> 2) + 1) +#define SET_BIT1(p, i, v) ((p)[(i) >> 3] = (p)[(i) >> 3] & N1((i)&7) | (((uint8_t)(v)) << ((i)&7))) +#define GET_BIT1(p, i) (((p)[(i) >> 3] >> ((i)&7)) & ((uint8_t)1)) +#define SET_BIT2(p, i, v) ((p)[(i) >> 2] = (p)[(i) >> 2] & N1(BIT2_MAP[(i)&3][3]) | BIT2_MAP[(i)&3][(v)]) +#define GET_BIT2(p, i) (((p)[(i) >> 2] >> BIT2_MAP[(i)&3][3]) & ((uint8_t)3)) // STSchema int32_t tTSchemaCreate(int32_t sver, SSchema *pSchema, int32_t nCols, STSchema **ppTSchema); @@ -171,7 +167,7 @@ struct SColVal { #pragma pack(push, 1) struct STagVal { -// char colName[TSDB_COL_NAME_LEN]; // only used for tmq_get_meta + // char colName[TSDB_COL_NAME_LEN]; // only used for tmq_get_meta union { int16_t cid; char *pKey; diff --git a/source/client/test/clientTests.cpp b/source/client/test/clientTests.cpp index ec270889e2..4ea5443678 100644 --- a/source/client/test/clientTests.cpp +++ b/source/client/test/clientTests.cpp @@ -123,7 +123,7 @@ void createNewTable(TAOS* pConn, int32_t index) { } taos_free_result(pRes); - for(int32_t i = 0; i < 100000; i += 20) { + for(int32_t i = 0; i < 3280; i += 20) { char sql[1024] = {0}; sprintf(sql, "insert into tu%d values(now+%da, %d)(now+%da, %d)(now+%da, %d)(now+%da, %d)" @@ -679,30 +679,28 @@ TEST(testCase, projection_query_tables) { TAOS_RES* pRes = taos_query(pConn, "use abc1"); taos_free_result(pRes); - pRes = taos_query(pConn, "explain verbose true select _wstart,count(*),a from st1 partition by a interval(1s)"); - printResult(pRes); -// pRes = taos_query(pConn, "create stable st1 (ts timestamp, k int) tags(a int)"); -// if (taos_errno(pRes) != 0) { -// printf("failed to create table tu, reason:%s\n", taos_errstr(pRes)); -// } -// taos_free_result(pRes); -// -// pRes = taos_query(pConn, "create stable st2 (ts timestamp, k int) tags(a int)"); -// if (taos_errno(pRes) != 0) { -// printf("failed to create table tu, reason:%s\n", taos_errstr(pRes)); -// } -// taos_free_result(pRes); -// -// pRes = taos_query(pConn, "create table tu using st1 tags(1)"); -// if (taos_errno(pRes) != 0) { -// printf("failed to create table tu, reason:%s\n", taos_errstr(pRes)); -// } -// taos_free_result(pRes); -// -// for(int32_t i = 0; i < 1; ++i) { -// printf("create table :%d\n", i); -// createNewTable(pConn, i); -// } + pRes = taos_query(pConn, "create stable st1 (ts timestamp, k int) tags(a int)"); + if (taos_errno(pRes) != 0) { + printf("failed to create table tu, reason:%s\n", taos_errstr(pRes)); + } + taos_free_result(pRes); + + pRes = taos_query(pConn, "create stable st2 (ts timestamp, k int) tags(a int)"); + if (taos_errno(pRes) != 0) { + printf("failed to create table tu, reason:%s\n", taos_errstr(pRes)); + } + taos_free_result(pRes); + + pRes = taos_query(pConn, "create table tu using st1 tags(1)"); + if (taos_errno(pRes) != 0) { + printf("failed to create table tu, reason:%s\n", taos_errstr(pRes)); + } + taos_free_result(pRes); + + for(int32_t i = 0; i < 2; ++i) { + printf("create table :%d\n", i); + createNewTable(pConn, i); + } // // pRes = taos_query(pConn, "select * from tu"); // if (taos_errno(pRes) != 0) { diff --git a/source/common/src/ttypes.c b/source/common/src/ttypes.c index 156b66ae86..fee89e2f37 100644 --- a/source/common/src/ttypes.c +++ b/source/common/src/ttypes.c @@ -392,10 +392,10 @@ tDataTypeDescriptor tDataTypes[TSDB_DATA_TYPE_MAX] = { getStatics_i64}, {TSDB_DATA_TYPE_FLOAT, 5, FLOAT_BYTES, "FLOAT", 0, 0, tsCompressFloat, tsDecompressFloat, getStatics_f}, {TSDB_DATA_TYPE_DOUBLE, 6, DOUBLE_BYTES, "DOUBLE", 0, 0, tsCompressDouble, tsDecompressDouble, getStatics_d}, - {TSDB_DATA_TYPE_VARCHAR, 6, 0, "VARCHAR", 0, 0, tsCompressString, tsDecompressString, getStatics_bin}, + {TSDB_DATA_TYPE_VARCHAR, 6, 1, "VARCHAR", 0, 0, tsCompressString, tsDecompressString, getStatics_bin}, {TSDB_DATA_TYPE_TIMESTAMP, 9, LONG_BYTES, "TIMESTAMP", INT64_MIN, INT64_MAX, tsCompressTimestamp, tsDecompressTimestamp, getStatics_i64}, - {TSDB_DATA_TYPE_NCHAR, 5, 8, "NCHAR", 0, 0, tsCompressString, tsDecompressString, getStatics_nchr}, + {TSDB_DATA_TYPE_NCHAR, 5, 1, "NCHAR", 0, 0, tsCompressString, tsDecompressString, getStatics_nchr}, {TSDB_DATA_TYPE_UTINYINT, 16, CHAR_BYTES, "TINYINT UNSIGNED", 0, UINT8_MAX, tsCompressTinyint, tsDecompressTinyint, getStatics_u8}, {TSDB_DATA_TYPE_USMALLINT, 17, SHORT_BYTES, "SMALLINT UNSIGNED", 0, UINT16_MAX, tsCompressSmallint, diff --git a/source/dnode/vnode/src/inc/meta.h b/source/dnode/vnode/src/inc/meta.h index 2efc33a8ee..adfbb91920 100644 --- a/source/dnode/vnode/src/inc/meta.h +++ b/source/dnode/vnode/src/inc/meta.h @@ -66,7 +66,6 @@ int32_t metaCacheOpen(SMeta* pMeta); void metaCacheClose(SMeta* pMeta); int32_t metaCacheUpsert(SMeta* pMeta, SMetaInfo* pInfo); int32_t metaCacheDrop(SMeta* pMeta, int64_t uid); -int32_t metaCacheGet(SMeta* pMeta, int64_t uid, SMetaInfo* pInfo); struct SMeta { TdThreadRwlock lock; diff --git a/source/dnode/vnode/src/inc/tsdb.h b/source/dnode/vnode/src/inc/tsdb.h index a30f308ecd..d1f5cfb122 100644 --- a/source/dnode/vnode/src/inc/tsdb.h +++ b/source/dnode/vnode/src/inc/tsdb.h @@ -45,7 +45,7 @@ typedef struct SBlockIdx SBlockIdx; typedef struct SBlock SBlock; typedef struct SBlockL SBlockL; typedef struct SColData SColData; -typedef struct SBlockDataHdr SBlockDataHdr; +typedef struct SDiskDataHdr SDiskDataHdr; typedef struct SBlockData SBlockData; typedef struct SDelFile SDelFile; typedef struct SHeadFile SHeadFile; @@ -61,7 +61,11 @@ typedef struct SRowIter SRowIter; typedef struct STsdbFS STsdbFS; typedef struct SRowMerger SRowMerger; typedef struct STsdbReadSnap STsdbReadSnap; +typedef struct SBlockInfo SBlockInfo; +typedef struct SSmaInfo SSmaInfo; +typedef struct SBlockCol SBlockCol; +#define TSDB_FILE_DLMT ((uint32_t)0xF00AFA0F) #define TSDB_MAX_SUBBLOCKS 8 #define TSDB_FHDR_SIZE 512 @@ -113,10 +117,14 @@ int32_t tPutBlock(uint8_t *p, void *ph); int32_t tGetBlock(uint8_t *p, void *ph); int32_t tBlockCmprFn(const void *p1, const void *p2); bool tBlockHasSma(SBlock *pBlock); +// SBlockL +int32_t tPutBlockL(uint8_t *p, void *ph); +int32_t tGetBlockL(uint8_t *p, void *ph); // SBlockIdx int32_t tPutBlockIdx(uint8_t *p, void *ph); int32_t tGetBlockIdx(uint8_t *p, void *ph); int32_t tCmprBlockIdx(void const *lhs, void const *rhs); +int32_t tCmprBlockL(void const *lhs, void const *rhs); // SColdata void tColDataInit(SColData *pColData, int16_t cid, int8_t type, int8_t smaOn); void tColDataReset(SColData *pColData); @@ -131,20 +139,25 @@ int32_t tGetColData(uint8_t *p, SColData *pColData); #define tBlockDataLastRow(PBLOCKDATA) tsdbRowFromBlockData(PBLOCKDATA, (PBLOCKDATA)->nRow - 1) #define tBlockDataFirstKey(PBLOCKDATA) TSDBROW_KEY(&tBlockDataFirstRow(PBLOCKDATA)) #define tBlockDataLastKey(PBLOCKDATA) TSDBROW_KEY(&tBlockDataLastRow(PBLOCKDATA)) -int32_t tBlockDataInit(SBlockData *pBlockData); + +int32_t tBlockDataCreate(SBlockData *pBlockData); +void tBlockDataDestroy(SBlockData *pBlockData, int8_t deepClear); +int32_t tBlockDataInit(SBlockData *pBlockData, int64_t suid, int64_t uid, STSchema *pTSchema); +int32_t tBlockDataInitEx(SBlockData *pBlockData, SBlockData *pBlockDataFrom); void tBlockDataReset(SBlockData *pBlockData); -int32_t tBlockDataSetSchema(SBlockData *pBlockData, STSchema *pTSchema); -int32_t tBlockDataCorrectSchema(SBlockData *pBlockData, SBlockData *pBlockDataFrom); -void tBlockDataClearData(SBlockData *pBlockData); -void tBlockDataClear(SBlockData *pBlockData, int8_t deepClear); -int32_t tBlockDataAddColData(SBlockData *pBlockData, int32_t iColData, SColData **ppColData); -int32_t tBlockDataAppendRow(SBlockData *pBlockData, TSDBROW *pRow, STSchema *pTSchema); -int32_t tBlockDataMerge(SBlockData *pBlockData1, SBlockData *pBlockData2, SBlockData *pBlockData); -int32_t tBlockDataCopy(SBlockData *pBlockDataSrc, SBlockData *pBlockDataDest); +int32_t tBlockDataAppendRow(SBlockData *pBlockData, TSDBROW *pRow, STSchema *pTSchema, int64_t uid); +void tBlockDataClear(SBlockData *pBlockData); SColData *tBlockDataGetColDataByIdx(SBlockData *pBlockData, int32_t idx); void tBlockDataGetColData(SBlockData *pBlockData, int16_t cid, SColData **ppColData); -int32_t tPutBlockData(uint8_t *p, SBlockData *pBlockData); -int32_t tGetBlockData(uint8_t *p, SBlockData *pBlockData); +int32_t tBlockDataCopy(SBlockData *pBlockDataSrc, SBlockData *pBlockDataDest); +int32_t tBlockDataMerge(SBlockData *pBlockData1, SBlockData *pBlockData2, SBlockData *pBlockData); +int32_t tBlockDataAddColData(SBlockData *pBlockData, int32_t iColData, SColData **ppColData); +int32_t tCmprBlockData(SBlockData *pBlockData, int8_t cmprAlg, uint8_t **ppOut, int32_t *szOut, uint8_t *aBuf[], + int32_t aBufN[]); +int32_t tDecmprBlockData(uint8_t *pIn, int32_t szIn, SBlockData *pBlockData, uint8_t *aBuf[]); +// SDiskDataHdr +int32_t tPutDiskDataHdr(uint8_t *p, void *ph); +int32_t tGetDiskDataHdr(uint8_t *p, void *ph); // SDelIdx int32_t tPutDelIdx(uint8_t *p, void *ph); int32_t tGetDelIdx(uint8_t *p, void *ph); @@ -168,13 +181,25 @@ void tsdbFidKeyRange(int32_t fid, int32_t minutes, int8_t precision, TSKEY *m int32_t tsdbFidLevel(int32_t fid, STsdbKeepCfg *pKeepCfg, int64_t now); int32_t tsdbBuildDeleteSkyline(SArray *aDelData, int32_t sidx, int32_t eidx, SArray *aSkyline); void tsdbCalcColDataSMA(SColData *pColData, SColumnDataAgg *pColAgg); +int32_t tPutColumnDataAgg(uint8_t *p, SColumnDataAgg *pColAgg); +int32_t tGetColumnDataAgg(uint8_t *p, SColumnDataAgg *pColAgg); +int32_t tsdbCmprData(uint8_t *pIn, int32_t szIn, int8_t type, int8_t cmprAlg, uint8_t **ppOut, int32_t nOut, + int32_t *szOut, uint8_t **ppBuf); +int32_t tsdbDecmprData(uint8_t *pIn, int32_t szIn, int8_t type, int8_t cmprAlg, uint8_t **ppOut, int32_t szOut, + uint8_t **ppBuf); +int32_t tsdbCmprColData(SColData *pColData, int8_t cmprAlg, SBlockCol *pBlockCol, uint8_t **ppOut, int32_t nOut, + uint8_t **ppBuf); +int32_t tsdbDecmprColData(uint8_t *pIn, SBlockCol *pBlockCol, int8_t cmprAlg, int32_t nVal, SColData *pColData, + uint8_t **ppBuf); +int32_t tsdbReadAndCheck(TdFilePtr pFD, int64_t offset, uint8_t **ppOut, int32_t size, int8_t toCheck); // tsdbMemTable ============================================================================================== // SMemTable -int32_t tsdbMemTableCreate(STsdb *pTsdb, SMemTable **ppMemTable); -void tsdbMemTableDestroy(SMemTable *pMemTable); -void tsdbGetTbDataFromMemTable(SMemTable *pMemTable, tb_uid_t suid, tb_uid_t uid, STbData **ppTbData); -void tsdbRefMemTable(SMemTable *pMemTable); -void tsdbUnrefMemTable(SMemTable *pMemTable); +int32_t tsdbMemTableCreate(STsdb *pTsdb, SMemTable **ppMemTable); +void tsdbMemTableDestroy(SMemTable *pMemTable); +STbData *tsdbGetTbDataFromMemTable(SMemTable *pMemTable, tb_uid_t suid, tb_uid_t uid); +void tsdbRefMemTable(SMemTable *pMemTable); +void tsdbUnrefMemTable(SMemTable *pMemTable); +SArray *tsdbMemTableGetTbDataArray(SMemTable *pMemTable); // STbDataIter int32_t tsdbTbDataIterCreate(STbData *pTbData, TSDBKEY *pFrom, int8_t backward, STbDataIter **ppIter); void *tsdbTbDataIterDestroy(STbDataIter *pIter); @@ -223,33 +248,33 @@ int32_t tsdbFSUpsertDelFile(STsdbFS *pFS, SDelFile *pDelFile); int32_t tsdbDataFWriterOpen(SDataFWriter **ppWriter, STsdb *pTsdb, SDFileSet *pSet); int32_t tsdbDataFWriterClose(SDataFWriter **ppWriter, int8_t sync); int32_t tsdbUpdateDFileSetHeader(SDataFWriter *pWriter); -int32_t tsdbWriteBlockIdx(SDataFWriter *pWriter, SArray *aBlockIdx, uint8_t **ppBuf); -int32_t tsdbWriteBlock(SDataFWriter *pWriter, SMapData *pMapData, uint8_t **ppBuf, SBlockIdx *pBlockIdx); -int32_t tsdbWriteBlockData(SDataFWriter *pWriter, SBlockData *pBlockData, uint8_t **ppBuf1, uint8_t **ppBuf2, - SBlockIdx *pBlockIdx, SBlock *pBlock, int8_t cmprAlg); +int32_t tsdbWriteBlockIdx(SDataFWriter *pWriter, SArray *aBlockIdx); +int32_t tsdbWriteBlock(SDataFWriter *pWriter, SMapData *pMapData, SBlockIdx *pBlockIdx); +int32_t tsdbWriteBlockL(SDataFWriter *pWriter, SArray *aBlockL); +int32_t tsdbWriteBlockData(SDataFWriter *pWriter, SBlockData *pBlockData, SBlockInfo *pBlkInfo, SSmaInfo *pSmaInfo, + int8_t cmprAlg, int8_t toLast); int32_t tsdbDFileSetCopy(STsdb *pTsdb, SDFileSet *pSetFrom, SDFileSet *pSetTo); // SDataFReader int32_t tsdbDataFReaderOpen(SDataFReader **ppReader, STsdb *pTsdb, SDFileSet *pSet); int32_t tsdbDataFReaderClose(SDataFReader **ppReader); -int32_t tsdbReadBlockIdx(SDataFReader *pReader, SArray *aBlockIdx, uint8_t **ppBuf); -int32_t tsdbReadBlock(SDataFReader *pReader, SBlockIdx *pBlockIdx, SMapData *pMapData, uint8_t **ppBuf); -int32_t tsdbReadColData(SDataFReader *pReader, SBlockIdx *pBlockIdx, SBlock *pBlock, int16_t *aColId, int32_t nCol, - SBlockData *pBlockData, uint8_t **ppBuf1, uint8_t **ppBuf2); -int32_t tsdbReadBlockData(SDataFReader *pReader, SBlockIdx *pBlockIdx, SBlock *pBlock, SBlockData *pBlockData, - uint8_t **ppBuf1, uint8_t **ppBuf2); -int32_t tsdbReadBlockSma(SDataFReader *pReader, SBlock *pBlock, SArray *aColumnDataAgg, uint8_t **ppBuf); +int32_t tsdbReadBlockIdx(SDataFReader *pReader, SArray *aBlockIdx); +int32_t tsdbReadBlock(SDataFReader *pReader, SBlockIdx *pBlockIdx, SMapData *pMapData); +int32_t tsdbReadBlockL(SDataFReader *pReader, SArray *aBlockL); +int32_t tsdbReadBlockSma(SDataFReader *pReader, SBlock *pBlock, SArray *aColumnDataAgg); +int32_t tsdbReadDataBlock(SDataFReader *pReader, SBlock *pBlock, SBlockData *pBlockData); +int32_t tsdbReadLastBlock(SDataFReader *pReader, SBlockL *pBlockL, SBlockData *pBlockData); // SDelFWriter int32_t tsdbDelFWriterOpen(SDelFWriter **ppWriter, SDelFile *pFile, STsdb *pTsdb); int32_t tsdbDelFWriterClose(SDelFWriter **ppWriter, int8_t sync); -int32_t tsdbWriteDelData(SDelFWriter *pWriter, SArray *aDelData, uint8_t **ppBuf, SDelIdx *pDelIdx); -int32_t tsdbWriteDelIdx(SDelFWriter *pWriter, SArray *aDelIdx, uint8_t **ppBuf); +int32_t tsdbWriteDelData(SDelFWriter *pWriter, SArray *aDelData, SDelIdx *pDelIdx); +int32_t tsdbWriteDelIdx(SDelFWriter *pWriter, SArray *aDelIdx); int32_t tsdbUpdateDelFileHdr(SDelFWriter *pWriter); // SDelFReader -int32_t tsdbDelFReaderOpen(SDelFReader **ppReader, SDelFile *pFile, STsdb *pTsdb, uint8_t **ppBuf); +int32_t tsdbDelFReaderOpen(SDelFReader **ppReader, SDelFile *pFile, STsdb *pTsdb); int32_t tsdbDelFReaderClose(SDelFReader **ppReader); -int32_t tsdbReadDelData(SDelFReader *pReader, SDelIdx *pDelIdx, SArray *aDelData, uint8_t **ppBuf); -int32_t tsdbReadDelIdx(SDelFReader *pReader, SArray *aDelIdx, uint8_t **ppBuf); +int32_t tsdbReadDelData(SDelFReader *pReader, SDelIdx *pDelIdx, SArray *aDelData); +int32_t tsdbReadDelIdx(SDelFReader *pReader, SArray *aDelIdx); // tsdbRead.c ============================================================================================== int32_t tsdbTakeReadSnap(STsdb *pTsdb, STsdbReadSnap **ppSnap); void tsdbUntakeReadSnap(STsdb *pTsdb, STsdbReadSnap *pSnap); @@ -277,13 +302,6 @@ size_t tsdbCacheGetCapacity(SVnode *pVnode); int32_t tsdbCacheLastArray2Row(SArray *pLastArray, STSRow **ppRow, STSchema *pSchema); // structs ======================= -typedef struct { - int minFid; - int midFid; - int maxFid; - TSKEY minKey; -} SRtn; - struct STsdbFS { SDelFile *pDelFile; SArray *aDFileSet; // SArray @@ -312,30 +330,23 @@ struct SMemSkipListNode { SMemSkipListNode *forwards[0]; }; typedef struct SMemSkipList { - uint32_t seed; int64_t size; + uint32_t seed; int8_t maxLevel; int8_t level; SMemSkipListNode *pHead; SMemSkipListNode *pTail; } SMemSkipList; -struct SDelDataInfo { - tb_uid_t suid; - tb_uid_t uid; -}; - struct STbData { tb_uid_t suid; tb_uid_t uid; TSKEY minKey; TSKEY maxKey; - int64_t minVersion; - int64_t maxVersion; - int32_t maxSkmVer; SDelData *pHead; SDelData *pTail; SMemSkipList sl; + STbData *next; }; struct SMemTable { @@ -345,11 +356,13 @@ struct SMemTable { volatile int32_t nRef; TSKEY minKey; TSKEY maxKey; - int64_t minVersion; - int64_t maxVersion; int64_t nRow; int64_t nDel; - SArray *aTbData; // SArray + struct { + int32_t nTbData; + int32_t nBucket; + STbData **aBucket; + }; }; struct TSDBROW { @@ -380,63 +393,51 @@ struct SMapData { uint8_t *pData; }; -typedef struct { +struct SBlockCol { int16_t cid; int8_t type; int8_t smaOn; - int8_t flag; // HAS_NONE|HAS_NULL|HAS_VALUE - int32_t offset; - int32_t szBitmap; // bitmap size - int32_t szOffset; // size of offset, only for variant-length data type - int32_t szValue; // compressed column value size + int8_t flag; // HAS_NONE|HAS_NULL|HAS_VALUE int32_t szOrigin; // original column value size (only save for variant data type) -} SBlockCol; + int32_t szBitmap; // bitmap size, 0 only for flag == HAS_VAL + int32_t szOffset; // offset size, 0 only for non-variant-length type + int32_t szValue; // value size, 0 when flag == (HAS_NULL | HAS_NONE) + int32_t offset; +}; -typedef struct { - int32_t nRow; - int8_t cmprAlg; - int64_t offset; // block data offset - int32_t szBlockCol; // SBlockCol size - int32_t szVersion; // VERSION size - int32_t szTSKEY; // TSKEY size - int32_t szBlock; // total block size - int64_t sOffset; // sma offset - int32_t nSma; // sma size -} SSubBlock; +struct SBlockInfo { + int64_t offset; // block data offset + int32_t szBlock; + int32_t szKey; +}; + +struct SSmaInfo { + int64_t offset; + int32_t size; +}; struct SBlock { - TSDBKEY minKey; - TSDBKEY maxKey; - int64_t minVersion; - int64_t maxVersion; - int32_t nRow; - int8_t last; - int8_t hasDup; - int8_t nSubBlock; - SSubBlock aSubBlock[TSDB_MAX_SUBBLOCKS]; + TSDBKEY minKey; + TSDBKEY maxKey; + int64_t minVer; + int64_t maxVer; + int32_t nRow; + int8_t hasDup; + int8_t nSubBlock; + SBlockInfo aSubBlock[TSDB_MAX_SUBBLOCKS]; + SSmaInfo smaInfo; }; struct SBlockL { - struct { - int64_t uid; - int64_t version; - TSKEY ts; - } minKey; - struct { - int64_t uid; - int64_t version; - TSKEY ts; - } maxKey; - int64_t minVer; - int64_t maxVer; - int32_t nRow; - int8_t cmprAlg; - int64_t offset; - int32_t szBlock; - int32_t szBlockCol; - int32_t szUid; - int32_t szVer; - int32_t szTSKEY; + int64_t suid; + int64_t minUid; + int64_t maxUid; + TSKEY minKey; + TSKEY maxKey; + int64_t minVer; + int64_t maxVer; + int32_t nRow; + SBlockInfo bInfo; }; struct SColData { @@ -451,10 +452,17 @@ struct SColData { uint8_t *pData; }; +// (SBlockData){.suid = 0, .uid = 0}: block data not initialized +// (SBlockData){.suid = suid, .uid = uid}: block data for ONE child table int .data file +// (SBlockData){.suid = suid, .uid = 0}: block data for N child tables int .last file +// (SBlockData){.suid = 0, .uid = uid}: block data for 1 normal table int .last/.data file struct SBlockData { - int32_t nRow; - int64_t *aVersion; - TSKEY *aTSKEY; + int64_t suid; // 0 means normal table block data, otherwise child table block data + int64_t uid; // 0 means block data in .last file, otherwise in .data file + int32_t nRow; // number of rows + int64_t *aUid; // uids of each row, only exist in block data in .last file (uid == 0) + int64_t *aVersion; // versions of each row + TSKEY *aTSKEY; // timestamp of each row SArray *aIdx; // SArray SArray *aColData; // SArray }; @@ -492,13 +500,18 @@ struct SDelIdx { int64_t size; }; -#pragma pack(push, 1) -struct SBlockDataHdr { +struct SDiskDataHdr { uint32_t delimiter; + uint32_t fmtVer; int64_t suid; int64_t uid; + int32_t szUid; + int32_t szVer; + int32_t szKey; + int32_t szBlkCol; + int32_t nRow; + int8_t cmprAlg; }; -#pragma pack(pop) struct SDelFile { volatile int32_t nRef; @@ -528,6 +541,7 @@ struct SLastFile { int64_t commitID; int64_t size; + int64_t offset; }; struct SSmaFile { @@ -562,6 +576,8 @@ struct SDelFWriter { STsdb *pTsdb; SDelFile fDel; TdFilePtr pWriteH; + + uint8_t *aBuf[1]; }; struct SDataFWriter { @@ -577,6 +593,8 @@ struct SDataFWriter { SDataFile fData; SLastFile fLast; SSmaFile fSma; + + uint8_t *aBuf[4]; }; struct STsdbReadSnap { diff --git a/source/dnode/vnode/src/inc/vnodeInt.h b/source/dnode/vnode/src/inc/vnodeInt.h index 7ac1cc4f0e..8bc82928ed 100644 --- a/source/dnode/vnode/src/inc/vnodeInt.h +++ b/source/dnode/vnode/src/inc/vnodeInt.h @@ -369,6 +369,7 @@ struct SSma { void smaHandleRes(void* pVnode, int64_t smaId, const SArray* data); enum { + SNAP_DATA_CFG = 0, SNAP_DATA_META = 1, SNAP_DATA_TSDB = 2, SNAP_DATA_DEL = 3, diff --git a/source/dnode/vnode/src/meta/metaQuery.c b/source/dnode/vnode/src/meta/metaQuery.c index 7cf365d372..805bc24d8c 100644 --- a/source/dnode/vnode/src/meta/metaQuery.c +++ b/source/dnode/vnode/src/meta/metaQuery.c @@ -127,7 +127,7 @@ _err: // return 0; // } -bool metaIsTableExist(SMeta *pMeta, tb_uid_t uid) { +bool metaIsTableExist(SMeta *pMeta, tb_uid_t uid) { // query uid.idx if (tdbTbGet(pMeta->pUidIdx, &uid, sizeof(uid), NULL, NULL) < 0) { return false; @@ -512,18 +512,65 @@ STSchema *metaGetTbTSchema(SMeta *pMeta, tb_uid_t uid, int32_t sver) { } int32_t metaGetTbTSchemaEx(SMeta *pMeta, tb_uid_t suid, tb_uid_t uid, int32_t sver, STSchema **ppTSchema) { - int32_t code = 0; - STSchema *pTSchema = NULL; - SSkmDbKey skmDbKey = {.uid = suid ? suid : uid, .sver = sver}; + int32_t code = 0; + void *pData = NULL; int nData = 0; + SSkmDbKey skmDbKey; + if (sver <= 0) { + SMetaInfo info; + if (metaGetInfo(pMeta, suid ? suid : uid, &info) == 0) { + sver = info.skmVer; + } else { + TBC *pSkmDbC = NULL; + int c; - // query + skmDbKey.uid = suid ? suid : uid; + skmDbKey.sver = INT32_MAX; + + tdbTbcOpen(pMeta->pSkmDb, &pSkmDbC, NULL); + metaRLock(pMeta); + + if (tdbTbcMoveTo(pSkmDbC, &skmDbKey, sizeof(skmDbKey), &c) < 0) { + metaULock(pMeta); + tdbTbcClose(pSkmDbC); + code = TSDB_CODE_NOT_FOUND; + goto _exit; + } + + ASSERT(c); + + if (c < 0) { + tdbTbcMoveToPrev(pSkmDbC); + } + + const void *pKey = NULL; + int32_t nKey = 0; + tdbTbcGet(pSkmDbC, &pKey, &nKey, NULL, NULL); + + if (((SSkmDbKey *)pKey)->uid != skmDbKey.uid) { + metaULock(pMeta); + tdbTbcClose(pSkmDbC); + code = TSDB_CODE_NOT_FOUND; + goto _exit; + } + + sver = ((SSkmDbKey *)pKey)->sver; + + metaULock(pMeta); + tdbTbcClose(pSkmDbC); + } + } + + ASSERT(sver > 0); + + skmDbKey.uid = suid ? suid : uid; + skmDbKey.sver = sver; metaRLock(pMeta); - if (tdbTbGet(pMeta->pSkmDb, &skmDbKey, sizeof(skmDbKey), &pData, &nData) < 0) { - code = TSDB_CODE_NOT_FOUND; + if (tdbTbGet(pMeta->pSkmDb, &skmDbKey, sizeof(SSkmDbKey), &pData, &nData) < 0) { metaULock(pMeta); - goto _err; + code = TSDB_CODE_NOT_FOUND; + goto _exit; } metaULock(pMeta); @@ -545,15 +592,13 @@ int32_t metaGetTbTSchemaEx(SMeta *pMeta, tb_uid_t suid, tb_uid_t uid, int32_t sv SSchema *pSchema = pSchemaWrapper->pSchema + i; tdAddColToSchema(&sb, pSchema->type, pSchema->flags, pSchema->colId, pSchema->bytes); } - pTSchema = tdGetSchemaFromBuilder(&sb); + STSchema *pTSchema = tdGetSchemaFromBuilder(&sb); tdDestroyTSchemaBuilder(&sb); *ppTSchema = pTSchema; taosMemoryFree(pSchemaWrapper->pSchema); - return code; -_err: - *ppTSchema = NULL; +_exit: return code; } @@ -1006,6 +1051,8 @@ int32_t metaGetTableTags(SMeta *pMeta, uint64_t suid, SArray *uidList, SHashObj return TSDB_CODE_SUCCESS; } +int32_t metaCacheGet(SMeta *pMeta, int64_t uid, SMetaInfo *pInfo); + int32_t metaGetInfo(SMeta *pMeta, int64_t uid, SMetaInfo *pInfo) { int32_t code = 0; void *pData = NULL; diff --git a/source/dnode/vnode/src/meta/metaTable.c b/source/dnode/vnode/src/meta/metaTable.c index c1e59adbb0..aa107ab253 100644 --- a/source/dnode/vnode/src/meta/metaTable.c +++ b/source/dnode/vnode/src/meta/metaTable.c @@ -357,10 +357,7 @@ int metaAlterSTable(SMeta *pMeta, int64_t version, SVCreateStbReq *pReq) { metaSaveToTbDb(pMeta, &nStbEntry); // update uid index - SMetaInfo info; - metaGetEntryInfo(&nStbEntry, &info); - tdbTbcUpsert(pUidIdxc, &pReq->suid, sizeof(tb_uid_t), - &(SUidIdxVal){.suid = info.suid, .version = info.version, .skmVer = info.skmVer}, sizeof(SUidIdxVal), 0); + metaUpdateUidIdx(pMeta, &nStbEntry); if (oStbEntry.pBuf) taosMemoryFree(oStbEntry.pBuf); metaULock(pMeta); @@ -884,7 +881,8 @@ static int metaUpdateTableTagVal(SMeta *pMeta, int64_t version, SVAlterTbReq *pA } SCtbIdxKey ctbIdxKey = {.suid = ctbEntry.ctbEntry.suid, .uid = uid}; - tdbTbUpsert(pMeta->pCtbIdx, &ctbIdxKey, sizeof(ctbIdxKey), ctbEntry.ctbEntry.pTags, ((STag*)(ctbEntry.ctbEntry.pTags))->len, &pMeta->txn); + tdbTbUpsert(pMeta->pCtbIdx, &ctbIdxKey, sizeof(ctbIdxKey), ctbEntry.ctbEntry.pTags, + ((STag *)(ctbEntry.ctbEntry.pTags))->len, &pMeta->txn); tDecoderClear(&dc1); tDecoderClear(&dc2); @@ -1091,7 +1089,8 @@ static int metaUpdateTtlIdx(SMeta *pMeta, const SMetaEntry *pME) { static int metaUpdateCtbIdx(SMeta *pMeta, const SMetaEntry *pME) { SCtbIdxKey ctbIdxKey = {.suid = pME->ctbEntry.suid, .uid = pME->uid}; - return tdbTbInsert(pMeta->pCtbIdx, &ctbIdxKey, sizeof(ctbIdxKey), pME->ctbEntry.pTags, ((STag*)(pME->ctbEntry.pTags))->len, &pMeta->txn); + return tdbTbInsert(pMeta->pCtbIdx, &ctbIdxKey, sizeof(ctbIdxKey), pME->ctbEntry.pTags, + ((STag *)(pME->ctbEntry.pTags))->len, &pMeta->txn); } int metaCreateTagIdxKey(tb_uid_t suid, int32_t cid, const void *pTagData, int32_t nTagData, int8_t type, tb_uid_t uid, diff --git a/source/dnode/vnode/src/tsdb/tsdbCache.c b/source/dnode/vnode/src/tsdb/tsdbCache.c index bb367ff8b1..ed25783e9f 100644 --- a/source/dnode/vnode/src/tsdb/tsdbCache.c +++ b/source/dnode/vnode/src/tsdb/tsdbCache.c @@ -266,14 +266,14 @@ int32_t tsdbCacheInsertLast(SLRUCache *pCache, tb_uid_t uid, STSRow *row, STsdb } for (++iCol; iCol < nCol; ++iCol) { - SLastCol *tTsVal = (SLastCol *)taosArrayGet(pLast, iCol); - if (keyTs >= tTsVal->ts) { - SColVal *tColVal = &tTsVal->colVal; + SLastCol *tTsVal1 = (SLastCol *)taosArrayGet(pLast, iCol); + if (keyTs >= tTsVal1->ts) { + SColVal *tColVal = &tTsVal1->colVal; SColVal colVal = {0}; tTSRowGetVal(row, pTSchema, iCol, &colVal); if (colVal.isNone || colVal.isNull) { - if (keyTs == tTsVal->ts && !tColVal->isNone && !tColVal->isNull) { + if (keyTs == tTsVal1->ts && !tColVal->isNone && !tColVal->isNull) { invalidate = true; break; @@ -284,6 +284,7 @@ int32_t tsdbCacheInsertLast(SLRUCache *pCache, tb_uid_t uid, STSRow *row, STsdb } } + _invalidate: taosMemoryFreeClear(pTSchema); taosLRUCacheRelease(pCache, h, invalidate); @@ -322,7 +323,7 @@ static int32_t getTableDelDataFromDelIdx(SDelFReader *pDelReader, SDelIdx *pDelI int32_t code = 0; if (pDelIdx) { - code = tsdbReadDelData(pDelReader, pDelIdx, aDelData, NULL); + code = tsdbReadDelData(pDelReader, pDelIdx, aDelData); } return code; @@ -393,8 +394,7 @@ static int32_t getTableDelIdx(SDelFReader *pDelFReader, tb_uid_t suid, tb_uid_t SDelIdx idx = {.suid = suid, .uid = uid}; // tMapDataReset(&delIdxMap); - // code = tsdbReadDelIdx(pDelFReader, &delIdxMap, NULL); - code = tsdbReadDelIdx(pDelFReader, pDelIdxArray, NULL); + code = tsdbReadDelIdx(pDelFReader, pDelIdxArray); if (code) goto _err; // code = tMapDataSearch(&delIdxMap, &idx, tGetDelIdx, tCmprDelIdx, pDelIdx); @@ -410,6 +410,178 @@ _err: return code; } +typedef enum { + SFSLASTNEXTROW_FS, + SFSLASTNEXTROW_FILESET, + SFSLASTNEXTROW_BLOCKDATA, + SFSLASTNEXTROW_BLOCKROW +} SFSLASTNEXTROWSTATES; + +typedef struct { + SFSLASTNEXTROWSTATES state; // [input] + STsdb *pTsdb; // [input] + SBlockIdx *pBlockIdxExp; // [input] + STSchema *pTSchema; // [input] + int32_t nFileSet; + int32_t iFileSet; + SArray *aDFileSet; + SDataFReader *pDataFReader; + SArray *aBlockL; + SBlockL *pBlockL; + SBlockData *pBlockDataL; + SBlockData blockDataL; + int32_t nRow; + int32_t iRow; + TSDBROW row; + /* + SArray *aBlockIdx; + SBlockIdx *pBlockIdx; + SMapData blockMap; + int32_t nBlock; + int32_t iBlock; + SBlock block; + */ +} SFSLastNextRowIter; + +static int32_t getNextRowFromFSLast(void *iter, TSDBROW **ppRow) { + SFSLastNextRowIter *state = (SFSLastNextRowIter *)iter; + int32_t code = 0; + + switch (state->state) { + case SFSLASTNEXTROW_FS: + // state->aDFileSet = state->pTsdb->pFS->cState->aDFileSet; + state->nFileSet = taosArrayGetSize(state->aDFileSet); + state->iFileSet = state->nFileSet; + + state->pBlockDataL = NULL; + + case SFSLASTNEXTROW_FILESET: { + SDFileSet *pFileSet = NULL; + _next_fileset: + if (--state->iFileSet >= 0) { + pFileSet = (SDFileSet *)taosArrayGet(state->aDFileSet, state->iFileSet); + } else { + if (state->pBlockDataL) { + tBlockDataDestroy(state->pBlockDataL, 1); + state->pBlockDataL = NULL; + } + + *ppRow = NULL; + return code; + } + + code = tsdbDataFReaderOpen(&state->pDataFReader, state->pTsdb, pFileSet); + if (code) goto _err; + + if (!state->aBlockL) { + state->aBlockL = taosArrayInit(0, sizeof(SBlockIdx)); + } else { + taosArrayClear(state->aBlockL); + } + + code = tsdbReadBlockL(state->pDataFReader, state->aBlockL); + if (code) goto _err; + + // SBlockL *pBlockL = (SBlockL *)taosArrayGet(state->aBlockL, state->iBlockL); + + state->pBlockL = taosArraySearch(state->aBlockL, state->pBlockIdxExp, tCmprBlockL, TD_EQ); + if (!state->pBlockL) { + goto _next_fileset; + } + + int64_t suid = state->pBlockL->suid; + int64_t uid = state->pBlockL->maxUid; + + if (!state->pBlockDataL) { + state->pBlockDataL = &state->blockDataL; + } + code = tBlockDataInit(state->pBlockDataL, suid, suid ? 0 : uid, state->pTSchema); + if (code) goto _err; + } + case SFSLASTNEXTROW_BLOCKDATA: + code = tsdbReadLastBlock(state->pDataFReader, state->pBlockL, state->pBlockDataL); + if (code) goto _err; + + state->nRow = state->blockDataL.nRow; + state->iRow = state->nRow - 1; + + if (!state->pBlockDataL->uid) { + while (state->pBlockIdxExp->uid != state->pBlockDataL->aUid[state->iRow]) { + --state->iRow; + } + } + + state->state = SFSLASTNEXTROW_BLOCKROW; + case SFSLASTNEXTROW_BLOCKROW: + if (state->pBlockDataL->uid) { + if (state->iRow >= 0) { + state->row = tsdbRowFromBlockData(state->pBlockDataL, state->iRow); + *ppRow = &state->row; + + if (--state->iRow < 0) { + state->state = SFSLASTNEXTROW_FILESET; + } + } + } else { + if (state->iRow >= 0 && state->pBlockIdxExp->uid == state->pBlockDataL->aUid[state->iRow]) { + state->row = tsdbRowFromBlockData(state->pBlockDataL, state->iRow); + *ppRow = &state->row; + + if (--state->iRow < 0 || state->pBlockIdxExp->uid != state->pBlockDataL->aUid[state->iRow]) { + state->state = SFSLASTNEXTROW_FILESET; + } + } + } + + return code; + default: + ASSERT(0); + break; + } + +_err: + if (state->pDataFReader) { + tsdbDataFReaderClose(&state->pDataFReader); + state->pDataFReader = NULL; + } + if (state->aBlockL) { + taosArrayDestroy(state->aBlockL); + state->aBlockL = NULL; + } + if (state->pBlockDataL) { + tBlockDataDestroy(state->pBlockDataL, 1); + state->pBlockDataL = NULL; + } + + *ppRow = NULL; + + return code; +} + +int32_t clearNextRowFromFSLast(void *iter) { + SFSLastNextRowIter *state = (SFSLastNextRowIter *)iter; + int32_t code = 0; + + if (!state) { + return code; + } + + if (state->pDataFReader) { + tsdbDataFReaderClose(&state->pDataFReader); + state->pDataFReader = NULL; + } + if (state->aBlockL) { + taosArrayDestroy(state->aBlockL); + state->aBlockL = NULL; + } + if (state->pBlockDataL) { + tBlockDataDestroy(state->pBlockDataL, 1); + state->pBlockDataL = NULL; + } + + return code; +} + typedef enum SFSNEXTROWSTATES { SFSNEXTROW_FS, SFSNEXTROW_FILESET, @@ -456,9 +628,9 @@ static int32_t getNextRowFromFS(void *iter, TSDBROW **ppRow) { if (--state->iFileSet >= 0) { pFileSet = (SDFileSet *)taosArrayGet(state->aDFileSet, state->iFileSet); } else { - // tBlockDataClear(&state->blockData, 1); + // tBlockDataDestroy(&state->blockData, 1); if (state->pBlockData) { - tBlockDataClear(state->pBlockData, 1); + tBlockDataDestroy(state->pBlockData, 1); state->pBlockData = NULL; } @@ -470,13 +642,12 @@ static int32_t getNextRowFromFS(void *iter, TSDBROW **ppRow) { if (code) goto _err; // tMapDataReset(&state->blockIdxMap); - // code = tsdbReadBlockIdx(state->pDataFReader, &state->blockIdxMap, NULL); if (!state->aBlockIdx) { state->aBlockIdx = taosArrayInit(0, sizeof(SBlockIdx)); } else { taosArrayClear(state->aBlockIdx); } - code = tsdbReadBlockIdx(state->pDataFReader, state->aBlockIdx, NULL); + code = tsdbReadBlockIdx(state->pDataFReader, state->aBlockIdx); if (code) goto _err; /* if (state->pBlockIdx) { */ @@ -492,8 +663,7 @@ static int32_t getNextRowFromFS(void *iter, TSDBROW **ppRow) { } tMapDataReset(&state->blockMap); - code = tsdbReadBlock(state->pDataFReader, state->pBlockIdx, &state->blockMap, NULL); - /* code = tsdbReadBlock(state->pDataFReader, &state->blockIdx, &state->blockMap, NULL); */ + code = tsdbReadBlock(state->pDataFReader, state->pBlockIdx, &state->blockMap); if (code) goto _err; state->nBlock = state->blockMap.nItem; @@ -502,7 +672,7 @@ static int32_t getNextRowFromFS(void *iter, TSDBROW **ppRow) { if (!state->pBlockData) { state->pBlockData = &state->blockData; - tBlockDataInit(&state->blockData); + tBlockDataCreate(&state->blockData); } } case SFSNEXTROW_BLOCKDATA: @@ -515,7 +685,7 @@ static int32_t getNextRowFromFS(void *iter, TSDBROW **ppRow) { tMapDataGetItemByIdx(&state->blockMap, state->iBlock, &block, tGetBlock); /* code = tsdbReadBlockData(state->pDataFReader, &state->blockIdx, &block, &state->blockData, NULL, NULL); */ - code = tsdbReadBlockData(state->pDataFReader, state->pBlockIdx, &block, state->pBlockData, NULL, NULL); + code = tsdbReadDataBlock(state->pDataFReader, &block, state->pBlockData); if (code) goto _err; state->nRow = state->blockData.nRow; @@ -560,8 +730,8 @@ _err: state->aBlockIdx = NULL; } if (state->pBlockData) { - // tBlockDataClear(&state->blockData, 1); - tBlockDataClear(state->pBlockData, 1); + // tBlockDataDestroy(&state->blockData, 1); + tBlockDataDestroy(state->pBlockData, 1); state->pBlockData = NULL; } @@ -587,8 +757,8 @@ int32_t clearNextRowFromFS(void *iter) { state->aBlockIdx = NULL; } if (state->pBlockData) { - // tBlockDataClear(&state->blockData, 1); - tBlockDataClear(state->pBlockData, 1); + // tBlockDataDestroy(&state->blockData, 1); + tBlockDataDestroy(state->pBlockData, 1); state->pBlockData = NULL; } @@ -730,18 +900,19 @@ typedef struct { SArray *pSkyline; int64_t iSkyline; - SBlockIdx idx; - SMemNextRowIter memState; - SMemNextRowIter imemState; - SFSNextRowIter fsState; - TSDBROW memRow, imemRow, fsRow; + SBlockIdx idx; + SMemNextRowIter memState; + SMemNextRowIter imemState; + SFSLastNextRowIter fsLastState; + SFSNextRowIter fsState; + TSDBROW memRow, imemRow, fsLastRow, fsRow; - TsdbNextRowState input[3]; + TsdbNextRowState input[4]; STsdbReadSnap *pReadSnap; STsdb *pTsdb; } CacheNextRowIter; -static int32_t nextRowIterOpen(CacheNextRowIter *pIter, tb_uid_t uid, STsdb *pTsdb) { +static int32_t nextRowIterOpen(CacheNextRowIter *pIter, tb_uid_t uid, STsdb *pTsdb, STSchema *pTSchema) { int code = 0; tb_uid_t suid = getTableSuidByUid(uid, pTsdb); @@ -750,12 +921,12 @@ static int32_t nextRowIterOpen(CacheNextRowIter *pIter, tb_uid_t uid, STsdb *pTs STbData *pMem = NULL; if (pIter->pReadSnap->pMem) { - tsdbGetTbDataFromMemTable(pIter->pReadSnap->pMem, suid, uid, &pMem); + pMem = tsdbGetTbDataFromMemTable(pIter->pReadSnap->pMem, suid, uid); } STbData *pIMem = NULL; if (pIter->pReadSnap->pIMem) { - tsdbGetTbDataFromMemTable(pIter->pReadSnap->pIMem, suid, uid, &pIMem); + pIMem = tsdbGetTbDataFromMemTable(pIter->pReadSnap->pIMem, suid, uid); } pIter->pTsdb = pTsdb; @@ -768,7 +939,7 @@ static int32_t nextRowIterOpen(CacheNextRowIter *pIter, tb_uid_t uid, STsdb *pTs if (pDelFile) { SDelFReader *pDelFReader; - code = tsdbDelFReaderOpen(&pDelFReader, pDelFile, pTsdb, NULL); + code = tsdbDelFReaderOpen(&pDelFReader, pDelFile, pTsdb); if (code) goto _err; code = getTableDelIdx(pDelFReader, suid, uid, &delIdx); @@ -787,6 +958,12 @@ static int32_t nextRowIterOpen(CacheNextRowIter *pIter, tb_uid_t uid, STsdb *pTs pIter->idx = (SBlockIdx){.suid = suid, .uid = uid}; + pIter->fsLastState.state = (SFSLASTNEXTROWSTATES) SFSNEXTROW_FS; + pIter->fsLastState.pTsdb = pTsdb; + pIter->fsLastState.aDFileSet = pIter->pReadSnap->fs.aDFileSet; + pIter->fsLastState.pBlockIdxExp = &pIter->idx; + pIter->fsLastState.pTSchema = pTSchema; + pIter->fsState.state = SFSNEXTROW_FS; pIter->fsState.pTsdb = pTsdb; pIter->fsState.aDFileSet = pIter->pReadSnap->fs.aDFileSet; @@ -794,7 +971,9 @@ static int32_t nextRowIterOpen(CacheNextRowIter *pIter, tb_uid_t uid, STsdb *pTs pIter->input[0] = (TsdbNextRowState){&pIter->memRow, true, false, &pIter->memState, getNextRowFromMem, NULL}; pIter->input[1] = (TsdbNextRowState){&pIter->imemRow, true, false, &pIter->imemState, getNextRowFromMem, NULL}; - pIter->input[2] = + pIter->input[2] = (TsdbNextRowState){&pIter->fsLastRow, false, true, &pIter->fsLastState, getNextRowFromFSLast, + clearNextRowFromFSLast}; + pIter->input[3] = (TsdbNextRowState){&pIter->fsRow, false, true, &pIter->fsState, getNextRowFromFS, clearNextRowFromFS}; if (pMem) { @@ -819,7 +998,7 @@ _err: static int32_t nextRowIterClose(CacheNextRowIter *pIter) { int code = 0; - for (int i = 0; i < 3; ++i) { + for (int i = 0; i < 4; ++i) { if (pIter->input[i].nextRowClearFn) { pIter->input[i].nextRowClearFn(pIter->input[i].iter); } @@ -831,7 +1010,6 @@ static int32_t nextRowIterClose(CacheNextRowIter *pIter) { tsdbUntakeReadSnap(pIter->pTsdb, pIter->pReadSnap); - return code; _err: return code; } @@ -840,7 +1018,7 @@ _err: static int32_t nextRowIterGet(CacheNextRowIter *pIter, TSDBROW **ppRow) { int code = 0; - for (int i = 0; i < 3; ++i) { + for (int i = 0; i < 4; ++i) { if (pIter->input[i].next && !pIter->input[i].stop) { code = pIter->input[i].nextRowFn(pIter->input[i].iter, &pIter->input[i].pRow); if (code) goto _err; @@ -852,18 +1030,18 @@ static int32_t nextRowIterGet(CacheNextRowIter *pIter, TSDBROW **ppRow) { } } - if (pIter->input[0].stop && pIter->input[1].stop && pIter->input[2].stop) { + if (pIter->input[0].stop && pIter->input[1].stop && pIter->input[2].stop && pIter->input[3].stop) { *ppRow = NULL; return code; } - // select maxpoint(s) from mem, imem, fs - TSDBROW *max[3] = {0}; - int iMax[3] = {-1, -1, -1}; + // select maxpoint(s) from mem, imem, fs and last + TSDBROW *max[4] = {0}; + int iMax[4] = {-1, -1, -1, -1}; int nMax = 0; TSKEY maxKey = TSKEY_MIN; - for (int i = 0; i < 3; ++i) { + for (int i = 0; i < 4; ++i) { if (!pIter->input[i].stop && pIter->input[i].pRow != NULL) { TSDBKEY key = TSDBROW_KEY(pIter->input[i].pRow); @@ -881,13 +1059,13 @@ static int32_t nextRowIterGet(CacheNextRowIter *pIter, TSDBROW **ppRow) { } // delete detection - TSDBROW *merge[3] = {0}; - int iMerge[3] = {-1, -1, -1}; + TSDBROW *merge[4] = {0}; + int iMerge[4] = {-1, -1, -1, -1}; int nMerge = 0; for (int i = 0; i < nMax; ++i) { - TSDBKEY maxKey = TSDBROW_KEY(max[i]); + TSDBKEY maxKey1 = TSDBROW_KEY(max[i]); - bool deleted = tsdbKeyDeleted(&maxKey, pIter->pSkyline, &pIter->iSkyline); + bool deleted = tsdbKeyDeleted(&maxKey1, pIter->pSkyline, &pIter->iSkyline); if (!deleted) { iMerge[nMerge] = iMax[i]; merge[nMerge++] = max[i]; @@ -923,7 +1101,7 @@ static int32_t mergeLastRow(tb_uid_t uid, STsdb *pTsdb, bool *dup, STSRow **ppRo TSKEY lastRowTs = TSKEY_MAX; CacheNextRowIter iter = {0}; - nextRowIterOpen(&iter, uid, pTsdb); + nextRowIterOpen(&iter, uid, pTsdb, pTSchema); do { TSDBROW *pRow = NULL; @@ -1020,7 +1198,7 @@ static int32_t mergeLast(tb_uid_t uid, STsdb *pTsdb, SArray **ppLastArray) { TSKEY lastRowTs = TSKEY_MAX; CacheNextRowIter iter = {0}; - nextRowIterOpen(&iter, uid, pTsdb); + nextRowIterOpen(&iter, uid, pTsdb, pTSchema); do { TSDBROW *pRow = NULL; diff --git a/source/dnode/vnode/src/tsdb/tsdbCommit.c b/source/dnode/vnode/src/tsdb/tsdbCommit.c index 6e25166203..020f3b0bc6 100644 --- a/source/dnode/vnode/src/tsdb/tsdbCommit.c +++ b/source/dnode/vnode/src/tsdb/tsdbCommit.c @@ -20,6 +20,12 @@ typedef struct { STSchema *pTSchema; } SSkmInfo; +typedef struct { + int64_t suid; + int64_t uid; + TSDBROW row; +} SRowInfo; + typedef struct { STsdb *pTsdb; /* commit data */ @@ -29,6 +35,7 @@ typedef struct { int32_t minRow; int32_t maxRow; int8_t cmprAlg; + SArray *aTbDataP; STsdbFS fs; // -------------- TSKEY nextKey; // reset by each table commit @@ -38,15 +45,27 @@ typedef struct { // commit file data struct { SDataFReader *pReader; - SArray *aBlockIdx; // SArray - SMapData mBlock; // SMapData, read from reader - SBlockData bData; + // data + SArray *aBlockIdx; // SArray + int32_t iBlockIdx; + SBlockIdx *pBlockIdx; + SMapData mBlock; // SMapData + SBlockData bData; + // last + SArray *aBlockL; // SArray + int32_t iBlockL; + SBlockData bDatal; + int32_t iRow; + SRowInfo *pRowInfo; + SRowInfo rowInfo; } dReader; struct { SDataFWriter *pWriter; SArray *aBlockIdx; // SArray + SArray *aBlockL; // SArray SMapData mBlock; // SMapData SBlockData bData; + SBlockData bDatal; } dWriter; SSkmInfo skmTable; SSkmInfo skmRow; @@ -162,10 +181,10 @@ static int32_t tsdbCommitDelStart(SCommitter *pCommitter) { SDelFile *pDelFileR = pCommitter->fs.pDelFile; if (pDelFileR) { - code = tsdbDelFReaderOpen(&pCommitter->pDelFReader, pDelFileR, pTsdb, NULL); + code = tsdbDelFReaderOpen(&pCommitter->pDelFReader, pDelFileR, pTsdb); if (code) goto _err; - code = tsdbReadDelIdx(pCommitter->pDelFReader, pCommitter->aDelIdx, NULL); + code = tsdbReadDelIdx(pCommitter->pDelFReader, pCommitter->aDelIdx); if (code) goto _err; } @@ -202,7 +221,7 @@ static int32_t tsdbCommitTableDel(SCommitter *pCommitter, STbData *pTbData, SDel suid = pDelIdx->suid; uid = pDelIdx->uid; - code = tsdbReadDelData(pCommitter->pDelFReader, pDelIdx, pCommitter->aDelData, NULL); + code = tsdbReadDelData(pCommitter->pDelFReader, pDelIdx, pCommitter->aDelData); if (code) goto _err; } else { taosArrayClear(pCommitter->aDelData); @@ -222,7 +241,7 @@ static int32_t tsdbCommitTableDel(SCommitter *pCommitter, STbData *pTbData, SDel } // write - code = tsdbWriteDelData(pCommitter->pDelFWriter, pCommitter->aDelData, NULL, &delIdx); + code = tsdbWriteDelData(pCommitter->pDelFWriter, pCommitter->aDelData, &delIdx); if (code) goto _err; // put delIdx @@ -243,7 +262,7 @@ static int32_t tsdbCommitDelEnd(SCommitter *pCommitter) { int32_t code = 0; STsdb *pTsdb = pCommitter->pTsdb; - code = tsdbWriteDelIdx(pCommitter->pDelFWriter, pCommitter->aDelIdxN, NULL); + code = tsdbWriteDelIdx(pCommitter->pDelFWriter, pCommitter->aDelIdxN); if (code) goto _err; code = tsdbUpdateDelFileHdr(pCommitter->pDelFWriter); @@ -271,87 +290,19 @@ _err: return code; } -static int32_t tsdbCommitFileDataStart(SCommitter *pCommitter) { - int32_t code = 0; - STsdb *pTsdb = pCommitter->pTsdb; - SDFileSet *pRSet = NULL; - - // memory - pCommitter->nextKey = TSKEY_MAX; - - // old - taosArrayClear(pCommitter->dReader.aBlockIdx); - tMapDataReset(&pCommitter->dReader.mBlock); - tBlockDataReset(&pCommitter->dReader.bData); - pRSet = (SDFileSet *)taosArraySearch(pCommitter->fs.aDFileSet, &(SDFileSet){.fid = pCommitter->commitFid}, - tDFileSetCmprFn, TD_EQ); - if (pRSet) { - code = tsdbDataFReaderOpen(&pCommitter->dReader.pReader, pTsdb, pRSet); - if (code) goto _err; - - code = tsdbReadBlockIdx(pCommitter->dReader.pReader, pCommitter->dReader.aBlockIdx, NULL); - if (code) goto _err; - } - - // new - SHeadFile fHead; - SDataFile fData; - SLastFile fLast; - SSmaFile fSma; - SDFileSet wSet = {.pHeadF = &fHead, .pDataF = &fData, .pLastF = &fLast, .pSmaF = &fSma}; - - taosArrayClear(pCommitter->dWriter.aBlockIdx); - tMapDataReset(&pCommitter->dWriter.mBlock); - tBlockDataReset(&pCommitter->dWriter.bData); - if (pRSet) { - wSet.diskId = pRSet->diskId; - wSet.fid = pCommitter->commitFid; - fHead = (SHeadFile){.commitID = pCommitter->commitID, .offset = 0, .size = 0}; - fData = *pRSet->pDataF; - fLast = (SLastFile){.commitID = pCommitter->commitID, .size = 0}; - fSma = *pRSet->pSmaF; - } else { - SDiskID did = {0}; - - tfsAllocDisk(pTsdb->pVnode->pTfs, 0, &did); - - tfsMkdirRecurAt(pTsdb->pVnode->pTfs, pTsdb->path, did); - - wSet.diskId = did; - wSet.fid = pCommitter->commitFid; - fHead = (SHeadFile){.commitID = pCommitter->commitID, .offset = 0, .size = 0}; - fData = (SDataFile){.commitID = pCommitter->commitID, .size = 0}; - fLast = (SLastFile){.commitID = pCommitter->commitID, .size = 0}; - fSma = (SSmaFile){.commitID = pCommitter->commitID, .size = 0}; - } - code = tsdbDataFWriterOpen(&pCommitter->dWriter.pWriter, pTsdb, &wSet); - if (code) goto _err; - -_exit: - return code; - -_err: - tsdbError("vgId:%d, commit file data start failed since %s", TD_VID(pTsdb->pVnode), tstrerror(code)); - return code; -} - -static int32_t tsdbCommitterUpdateTableSchema(SCommitter *pCommitter, int64_t suid, int64_t uid, int32_t sver) { +static int32_t tsdbCommitterUpdateTableSchema(SCommitter *pCommitter, int64_t suid, int64_t uid) { int32_t code = 0; - if (pCommitter->skmTable.pTSchema) { - if (pCommitter->skmTable.suid == suid) { - if (suid == 0) { - if (pCommitter->skmTable.uid == uid && sver == pCommitter->skmTable.pTSchema->version) goto _exit; - } else { - if (sver == pCommitter->skmTable.pTSchema->version) goto _exit; - } - } + if (suid) { + if (pCommitter->skmTable.suid == suid) goto _exit; + } else { + if (pCommitter->skmTable.uid == uid) goto _exit; } pCommitter->skmTable.suid = suid; pCommitter->skmTable.uid = uid; tTSchemaDestroy(pCommitter->skmTable.pTSchema); - code = metaGetTbTSchemaEx(pCommitter->pTsdb->pVnode->pMeta, suid, uid, sver, &pCommitter->skmTable.pTSchema); + code = metaGetTbTSchemaEx(pCommitter->pTsdb->pVnode->pMeta, suid, uid, -1, &pCommitter->skmTable.pTSchema); if (code) goto _exit; _exit: @@ -383,152 +334,355 @@ _exit: return code; } -static int32_t tsdbCommitBlockData(SCommitter *pCommitter, SBlockData *pBlockData, SBlock *pBlock, SBlockIdx *pBlockIdx, - int8_t toDataOnly) { +static int32_t tsdbCommitterNextLastRow(SCommitter *pCommitter) { int32_t code = 0; - if (pBlock->nSubBlock == 0) { - if (!toDataOnly && pBlockData->nRow < pCommitter->minRow) { - pBlock->last = 1; + ASSERT(pCommitter->dReader.pReader); + ASSERT(pCommitter->dReader.pRowInfo); + + SBlockData *pBlockDatal = &pCommitter->dReader.bDatal; + pCommitter->dReader.iRow++; + if (pCommitter->dReader.iRow < pBlockDatal->nRow) { + if (pBlockDatal->uid) { + pCommitter->dReader.pRowInfo->uid = pBlockDatal->uid; } else { - pBlock->last = 0; + pCommitter->dReader.pRowInfo->uid = pBlockDatal->aUid[pCommitter->dReader.iRow]; + } + pCommitter->dReader.pRowInfo->row = tsdbRowFromBlockData(pBlockDatal, pCommitter->dReader.iRow); + } else { + pCommitter->dReader.iBlockL++; + if (pCommitter->dReader.iBlockL < taosArrayGetSize(pCommitter->dReader.aBlockL)) { + SBlockL *pBlockL = (SBlockL *)taosArrayGet(pCommitter->dReader.aBlockL, pCommitter->dReader.iBlockL); + int64_t suid = pBlockL->suid; + int64_t uid = pBlockL->maxUid; + + code = tsdbCommitterUpdateTableSchema(pCommitter, suid, uid); + if (code) goto _exit; + + code = tBlockDataInit(pBlockDatal, suid, suid ? 0 : uid, pCommitter->skmTable.pTSchema); + if (code) goto _exit; + + code = tsdbReadLastBlock(pCommitter->dReader.pReader, pBlockL, pBlockDatal); + if (code) goto _exit; + + pCommitter->dReader.iRow = 0; + pCommitter->dReader.pRowInfo->suid = pBlockDatal->suid; + if (pBlockDatal->uid) { + pCommitter->dReader.pRowInfo->uid = pBlockDatal->uid; + } else { + pCommitter->dReader.pRowInfo->uid = pBlockDatal->aUid[0]; + } + pCommitter->dReader.pRowInfo->row = tsdbRowFromBlockData(pBlockDatal, pCommitter->dReader.iRow); + } else { + pCommitter->dReader.pRowInfo = NULL; } } - code = - tsdbWriteBlockData(pCommitter->dWriter.pWriter, pBlockData, NULL, NULL, pBlockIdx, pBlock, pCommitter->cmprAlg); +_exit: + return code; +} + +static int32_t tsdbCommitterNextTableData(SCommitter *pCommitter) { + int32_t code = 0; + + ASSERT(pCommitter->dReader.pBlockIdx); + + pCommitter->dReader.iBlockIdx++; + if (pCommitter->dReader.iBlockIdx < taosArrayGetSize(pCommitter->dReader.aBlockIdx)) { + pCommitter->dReader.pBlockIdx = + (SBlockIdx *)taosArrayGet(pCommitter->dReader.aBlockIdx, pCommitter->dReader.iBlockIdx); + + code = tsdbReadBlock(pCommitter->dReader.pReader, pCommitter->dReader.pBlockIdx, &pCommitter->dReader.mBlock); + if (code) goto _exit; + + ASSERT(pCommitter->dReader.mBlock.nItem > 0); + } else { + pCommitter->dReader.pBlockIdx = NULL; + } + +_exit: + return code; +} + +static int32_t tsdbCommitFileDataStart(SCommitter *pCommitter) { + int32_t code = 0; + STsdb *pTsdb = pCommitter->pTsdb; + SDFileSet *pRSet = NULL; + + // memory + pCommitter->commitFid = tsdbKeyFid(pCommitter->nextKey, pCommitter->minutes, pCommitter->precision); + tsdbFidKeyRange(pCommitter->commitFid, pCommitter->minutes, pCommitter->precision, &pCommitter->minKey, + &pCommitter->maxKey); + pCommitter->nextKey = TSKEY_MAX; + + // Reader + pRSet = (SDFileSet *)taosArraySearch(pCommitter->fs.aDFileSet, &(SDFileSet){.fid = pCommitter->commitFid}, + tDFileSetCmprFn, TD_EQ); + if (pRSet) { + code = tsdbDataFReaderOpen(&pCommitter->dReader.pReader, pTsdb, pRSet); + if (code) goto _err; + + // data + code = tsdbReadBlockIdx(pCommitter->dReader.pReader, pCommitter->dReader.aBlockIdx); + if (code) goto _err; + + pCommitter->dReader.iBlockIdx = 0; + if (pCommitter->dReader.iBlockIdx < taosArrayGetSize(pCommitter->dReader.aBlockIdx)) { + pCommitter->dReader.pBlockIdx = + (SBlockIdx *)taosArrayGet(pCommitter->dReader.aBlockIdx, pCommitter->dReader.iBlockIdx); + + code = tsdbReadBlock(pCommitter->dReader.pReader, pCommitter->dReader.pBlockIdx, &pCommitter->dReader.mBlock); + if (code) goto _err; + } else { + pCommitter->dReader.pBlockIdx = NULL; + } + tBlockDataReset(&pCommitter->dReader.bData); + + // last + code = tsdbReadBlockL(pCommitter->dReader.pReader, pCommitter->dReader.aBlockL); + if (code) goto _err; + + pCommitter->dReader.iBlockL = -1; + pCommitter->dReader.iRow = -1; + pCommitter->dReader.pRowInfo = &pCommitter->dReader.rowInfo; + tBlockDataReset(&pCommitter->dReader.bDatal); + code = tsdbCommitterNextLastRow(pCommitter); + if (code) goto _err; + } else { + pCommitter->dReader.pBlockIdx = NULL; + pCommitter->dReader.pRowInfo = NULL; + } + + // Writer + SHeadFile fHead; + SDataFile fData; + SLastFile fLast; + SSmaFile fSma; + SDFileSet wSet = {.pHeadF = &fHead, .pDataF = &fData, .pLastF = &fLast, .pSmaF = &fSma}; + if (pRSet) { + wSet.diskId = pRSet->diskId; + wSet.fid = pCommitter->commitFid; + fHead = (SHeadFile){.commitID = pCommitter->commitID, .size = 0, .offset = 0}; + fData = *pRSet->pDataF; + fLast = (SLastFile){.commitID = pCommitter->commitID, .size = 0, .offset = 0}; + fSma = *pRSet->pSmaF; + } else { + SDiskID did = {0}; + + tfsAllocDisk(pTsdb->pVnode->pTfs, 0, &did); + + tfsMkdirRecurAt(pTsdb->pVnode->pTfs, pTsdb->path, did); + + wSet.diskId = did; + wSet.fid = pCommitter->commitFid; + fHead = (SHeadFile){.commitID = pCommitter->commitID, .size = 0, .offset = 0}; + fData = (SDataFile){.commitID = pCommitter->commitID, .size = 0}; + fLast = (SLastFile){.commitID = pCommitter->commitID, .size = 0, .offset = 0}; + fSma = (SSmaFile){.commitID = pCommitter->commitID, .size = 0}; + } + code = tsdbDataFWriterOpen(&pCommitter->dWriter.pWriter, pTsdb, &wSet); if (code) goto _err; - code = tMapDataPutItem(&pCommitter->dWriter.mBlock, pBlock, tPutBlock); + taosArrayClear(pCommitter->dWriter.aBlockIdx); + taosArrayClear(pCommitter->dWriter.aBlockL); + tMapDataReset(&pCommitter->dWriter.mBlock); + tBlockDataReset(&pCommitter->dWriter.bData); + tBlockDataReset(&pCommitter->dWriter.bDatal); + +_exit: + return code; + +_err: + tsdbError("vgId:%d, commit file data start failed since %s", TD_VID(pTsdb->pVnode), tstrerror(code)); + return code; +} + +static int32_t tsdbCommitDataBlock(SCommitter *pCommitter, SBlock *pBlock) { + int32_t code = 0; + SBlockData *pBlockData = &pCommitter->dWriter.bData; + SBlock block; + + ASSERT(pBlockData->nRow > 0); + + if (pBlock) { + block = *pBlock; // as a subblock + } else { + tBlockReset(&block); // as a new block + } + + // info + block.nRow += pBlockData->nRow; + for (int32_t iRow = 0; iRow < pBlockData->nRow; iRow++) { + TSDBKEY key = {.ts = pBlockData->aTSKEY[iRow], .version = pBlockData->aVersion[iRow]}; + + if (iRow == 0) { + if (tsdbKeyCmprFn(&block.minKey, &key) > 0) { + block.minKey = key; + } + } else { + if (pBlockData->aTSKEY[iRow] == pBlockData->aTSKEY[iRow - 1]) { + block.hasDup = 1; + } + } + + if (iRow == pBlockData->nRow - 1 && tsdbKeyCmprFn(&block.maxKey, &key) < 0) { + block.maxKey = key; + } + + block.minVer = TMIN(block.minVer, key.version); + block.maxVer = TMAX(block.maxVer, key.version); + } + + // write + block.nSubBlock++; + code = tsdbWriteBlockData(pCommitter->dWriter.pWriter, pBlockData, &block.aSubBlock[block.nSubBlock - 1], + ((block.nSubBlock == 1) && !block.hasDup) ? &block.smaInfo : NULL, pCommitter->cmprAlg, 0); if (code) goto _err; + // put SBlock + code = tMapDataPutItem(&pCommitter->dWriter.mBlock, &block, tPutBlock); + if (code) goto _err; + + // clear + tBlockDataClear(pBlockData); + return code; _err: + tsdbError("vgId:%d tsdb commit data block failed since %s", TD_VID(pCommitter->pTsdb->pVnode), tstrerror(code)); return code; } -static int32_t tsdbMergeTableData(SCommitter *pCommitter, STbDataIter *pIter, SBlock *pBlockMerge, TSDBKEY toKey, - int8_t toDataOnly) { +static int32_t tsdbCommitLastBlock(SCommitter *pCommitter) { int32_t code = 0; - SBlockIdx *pBlockIdx = &(SBlockIdx){.suid = pIter->pTbData->suid, .uid = pIter->pTbData->uid}; - SBlockData *pBlockDataMerge = &pCommitter->dReader.bData; - SBlockData *pBlockData = &pCommitter->dWriter.bData; - SBlock block; - SBlock *pBlock = █ - TSDBROW *pRow1; - TSDBROW row2; - TSDBROW *pRow2 = &row2; + SBlockL blockL; + SBlockData *pBlockData = &pCommitter->dWriter.bDatal; - // read SBlockData - code = tsdbReadBlockData(pCommitter->dReader.pReader, pBlockIdx, pBlockMerge, pBlockDataMerge, NULL, NULL); + ASSERT(pBlockData->nRow > 0); + + // info + blockL.suid = pBlockData->suid; + blockL.nRow = pBlockData->nRow; + blockL.minKey = TSKEY_MAX; + blockL.maxKey = TSKEY_MIN; + blockL.minVer = VERSION_MAX; + blockL.maxVer = VERSION_MIN; + for (int32_t iRow = 0; iRow < pBlockData->nRow; iRow++) { + blockL.minKey = TMIN(blockL.minKey, pBlockData->aTSKEY[iRow]); + blockL.maxKey = TMAX(blockL.maxKey, pBlockData->aTSKEY[iRow]); + blockL.minVer = TMIN(blockL.minVer, pBlockData->aVersion[iRow]); + blockL.maxVer = TMAX(blockL.maxVer, pBlockData->aVersion[iRow]); + } + blockL.minUid = pBlockData->uid ? pBlockData->uid : pBlockData->aUid[0]; + blockL.maxUid = pBlockData->uid ? pBlockData->uid : pBlockData->aUid[pBlockData->nRow - 1]; + + // write + code = tsdbWriteBlockData(pCommitter->dWriter.pWriter, pBlockData, &blockL.bInfo, NULL, pCommitter->cmprAlg, 1); if (code) goto _err; - code = tBlockDataSetSchema(pBlockData, pCommitter->skmTable.pTSchema); + // push SBlockL + if (taosArrayPush(pCommitter->dWriter.aBlockL, &blockL) == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _err; + } + + // clear + tBlockDataClear(pBlockData); + + return code; + +_err: + tsdbError("vgId:%d tsdb commit last block failed since %s", TD_VID(pCommitter->pTsdb->pVnode), tstrerror(code)); + return code; +} + +static int32_t tsdbMergeCommitData(SCommitter *pCommitter, STbDataIter *pIter, SBlock *pBlock) { + int32_t code = 0; + STbData *pTbData = pIter->pTbData; + SBlockData *pBlockDataR = &pCommitter->dReader.bData; + SBlockData *pBlockDataW = &pCommitter->dWriter.bData; + + code = tsdbReadDataBlock(pCommitter->dReader.pReader, pBlock, pBlockDataR); if (code) goto _err; - // loop to merge - pRow1 = tsdbTbDataIterGet(pIter); - *pRow2 = tsdbRowFromBlockData(pBlockDataMerge, 0); - ASSERT(pRow1 && tsdbKeyCmprFn(&TSDBROW_KEY(pRow1), &toKey) < 0); - ASSERT(tsdbKeyCmprFn(&TSDBROW_KEY(pRow2), &toKey) < 0); - code = tsdbCommitterUpdateRowSchema(pCommitter, pBlockIdx->suid, pBlockIdx->uid, TSDBROW_SVERSION(pRow1)); - if (code) goto _err; + tBlockDataClear(pBlockDataW); + int32_t iRow = 0; + TSDBROW row; + TSDBROW *pRow1 = tsdbTbDataIterGet(pIter); + TSDBROW *pRow2 = &row; + *pRow2 = tsdbRowFromBlockData(pBlockDataR, iRow); + while (pRow1 && pRow2) { + int32_t c = tsdbRowCmprFn(pRow1, pRow2); - tBlockReset(pBlock); - tBlockDataClearData(pBlockData); - while (true) { - if (pRow1 == NULL && pRow2 == NULL) { - if (pBlockData->nRow == 0) { - break; + if (c < 0) { + code = tsdbCommitterUpdateRowSchema(pCommitter, pTbData->suid, pTbData->uid, TSDBROW_SVERSION(pRow1)); + if (code) goto _err; + + code = tBlockDataAppendRow(pBlockDataW, pRow1, pCommitter->skmRow.pTSchema, pTbData->uid); + if (code) goto _err; + + // next + tsdbTbDataIterNext(pIter); + pRow1 = tsdbTbDataIterGet(pIter); + } else if (c > 0) { + code = tBlockDataAppendRow(pBlockDataW, pRow2, NULL, pTbData->uid); + if (code) goto _err; + + iRow++; + if (iRow < pBlockDataR->nRow) { + *pRow2 = tsdbRowFromBlockData(pBlockDataR, iRow); } else { - goto _write_block; + pRow2 = NULL; } - } - - if (pRow1 && pRow2) { - int32_t c = tsdbRowCmprFn(pRow1, pRow2); - if (c < 0) { - goto _append_mem_row; - } else if (c > 0) { - goto _append_block_row; - } else { - ASSERT(0); - } - } else if (pRow1) { - goto _append_mem_row; } else { - goto _append_block_row; + ASSERT(0); } - _append_mem_row: - code = tBlockDataAppendRow(pBlockData, pRow1, pCommitter->skmRow.pTSchema); + // check + if (pBlockDataW->nRow >= pCommitter->maxRow * 4 / 5) { + code = tsdbCommitDataBlock(pCommitter, NULL); + if (code) goto _err; + } + } + + while (pRow2) { + code = tBlockDataAppendRow(pBlockDataW, pRow2, NULL, pTbData->uid); if (code) goto _err; - tsdbTbDataIterNext(pIter); - pRow1 = tsdbTbDataIterGet(pIter); - if (pRow1) { - if (tsdbKeyCmprFn(&TSDBROW_KEY(pRow1), &toKey) < 0) { - code = tsdbCommitterUpdateRowSchema(pCommitter, pBlockIdx->suid, pBlockIdx->uid, TSDBROW_SVERSION(pRow1)); - if (code) goto _err; - } else { - pRow1 = NULL; - } - } - - if (pBlockData->nRow >= pCommitter->maxRow * 4 / 5) { - goto _write_block; - } else { - continue; - } - - _append_block_row: - code = tBlockDataAppendRow(pBlockData, pRow2, NULL); - if (code) goto _err; - - if (pRow2->iRow + 1 < pBlockDataMerge->nRow) { - *pRow2 = tsdbRowFromBlockData(pBlockDataMerge, pRow2->iRow + 1); + iRow++; + if (iRow < pBlockDataR->nRow) { + *pRow2 = tsdbRowFromBlockData(pBlockDataR, iRow); } else { pRow2 = NULL; } - if (pBlockData->nRow >= pCommitter->maxRow * 4 / 5) { - goto _write_block; - } else { - continue; + // check + if (pBlockDataW->nRow >= pCommitter->maxRow * 4 / 5) { + code = tsdbCommitDataBlock(pCommitter, NULL); + if (code) goto _err; } + } - _write_block: - code = tsdbCommitBlockData(pCommitter, pBlockData, pBlock, pBlockIdx, toDataOnly); + // check + if (pBlockDataW->nRow > 0) { + code = tsdbCommitDataBlock(pCommitter, NULL); if (code) goto _err; - - tBlockReset(pBlock); - tBlockDataClearData(pBlockData); } return code; _err: - tsdbError("vgId:%d, tsdb merge block and mem failed since %s", TD_VID(pCommitter->pTsdb->pVnode), tstrerror(code)); + tsdbError("vgId:%d, tsdb merge commit data failed since %s", TD_VID(pCommitter->pTsdb->pVnode), tstrerror(code)); return code; } -static int32_t tsdbCommitTableMemData(SCommitter *pCommitter, STbDataIter *pIter, TSDBKEY toKey, int8_t toDataOnly) { +static int32_t tsdbCommitTableMemData(SCommitter *pCommitter, STbDataIter *pIter, TSDBKEY toKey) { int32_t code = 0; - TSDBROW *pRow; - SBlock block; - SBlock *pBlock = █ + STbData *pTbData = pIter->pTbData; SBlockData *pBlockData = &pCommitter->dWriter.bData; - int64_t suid = pIter->pTbData->suid; - int64_t uid = pIter->pTbData->uid; - code = tBlockDataSetSchema(pBlockData, pCommitter->skmTable.pTSchema); - if (code) goto _err; - - tBlockReset(pBlock); - tBlockDataClearData(pBlockData); - pRow = tsdbTbDataIterGet(pIter); - ASSERT(pRow && tsdbKeyCmprFn(&TSDBROW_KEY(pRow), &toKey) < 0); + tBlockDataClear(pBlockData); + TSDBROW *pRow = tsdbTbDataIterGet(pIter); while (true) { if (pRow == NULL) { if (pBlockData->nRow > 0) { @@ -539,33 +693,27 @@ static int32_t tsdbCommitTableMemData(SCommitter *pCommitter, STbDataIter *pIter } // update schema - code = tsdbCommitterUpdateRowSchema(pCommitter, suid, uid, TSDBROW_SVERSION(pRow)); + code = tsdbCommitterUpdateRowSchema(pCommitter, pTbData->suid, pTbData->uid, TSDBROW_SVERSION(pRow)); if (code) goto _err; // append - code = tBlockDataAppendRow(pBlockData, pRow, pCommitter->skmRow.pTSchema); + code = tBlockDataAppendRow(pBlockData, pRow, pCommitter->skmRow.pTSchema, pTbData->uid); if (code) goto _err; tsdbTbDataIterNext(pIter); pRow = tsdbTbDataIterGet(pIter); - // if (pRow && tsdbKeyCmprFn(&TSDBROW_KEY(pRow), &toKey) >= 0) pRow = NULL; - // crash on CI, use the block following if (pRow) { - TSDBKEY tmpKey = TSDBROW_KEY(pRow); - if (tsdbKeyCmprFn(&tmpKey, &toKey) >= 0) { + TSDBKEY rowKey = TSDBROW_KEY(pRow); + if (tsdbKeyCmprFn(&rowKey, &toKey) >= 0) { pRow = NULL; } } - if (pBlockData->nRow >= pCommitter->maxRow * 4 / 5) goto _write_block; - continue; - - _write_block: - code = tsdbCommitBlockData(pCommitter, pBlockData, pBlock, &(SBlockIdx){.suid = suid, .uid = uid}, toDataOnly); - if (code) goto _err; - - tBlockReset(pBlock); - tBlockDataClearData(pBlockData); + if (pBlockData->nRow >= pCommitter->maxRow * 4 / 5) { + _write_block: + code = tsdbCommitDataBlock(pCommitter, NULL); + if (code) goto _err; + } } return code; @@ -575,65 +723,16 @@ _err: return code; } -static int32_t tsdbCommitTableDiskData(SCommitter *pCommitter, SBlock *pBlock, SBlockIdx *pBlockIdx) { - int32_t code = 0; - SBlock block; +static int32_t tsdbGetNumOfRowsLessThan(STbDataIter *pIter, TSDBKEY key) { + int32_t nRow = 0; - if (pBlock->last) { - code = tsdbReadBlockData(pCommitter->dReader.pReader, pBlockIdx, pBlock, &pCommitter->dReader.bData, NULL, NULL); - if (code) goto _err; - - tBlockReset(&block); - code = tsdbCommitBlockData(pCommitter, &pCommitter->dReader.bData, &block, pBlockIdx, 0); - if (code) goto _err; - } else { - code = tMapDataPutItem(&pCommitter->dWriter.mBlock, pBlock, tPutBlock); - if (code) goto _err; - } - - return code; - -_err: - tsdbError("vgId:%d, tsdb commit table disk data failed since %s", TD_VID(pCommitter->pTsdb->pVnode), tstrerror(code)); - return code; -} - -static int32_t tsdbCommitTableDataEnd(SCommitter *pCommitter, int64_t suid, int64_t uid) { - int32_t code = 0; - SBlockIdx blockIdx = {.suid = suid, .uid = uid}; - SBlockIdx *pBlockIdx = &blockIdx; - - code = tsdbWriteBlock(pCommitter->dWriter.pWriter, &pCommitter->dWriter.mBlock, NULL, pBlockIdx); - if (code) goto _err; - - if (taosArrayPush(pCommitter->dWriter.aBlockIdx, pBlockIdx) == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - goto _err; - } - - return code; - -_err: - tsdbError("vgId:%d, commit table data end failed since %s", TD_VID(pCommitter->pTsdb->pVnode), tstrerror(code)); - return code; -} - -static int32_t tsdbGetOvlpNRow(STbDataIter *pIter, SBlock *pBlock) { - int32_t nRow = 0; - TSDBROW *pRow; - TSDBKEY key; - int32_t c = 0; STbDataIter iter = *pIter; - - iter.pRow = NULL; while (true) { - pRow = tsdbTbDataIterGet(&iter); - + TSDBROW *pRow = tsdbTbDataIterGet(&iter); if (pRow == NULL) break; - key = TSDBROW_KEY(pRow); - c = tBlockCmprFn(&(SBlock){.maxKey = key, .minKey = key}, pBlock); - if (c == 0) { + int32_t c = tsdbKeyCmprFn(&TSDBROW_KEY(pRow), &key); + if (c < 0) { nRow++; tsdbTbDataIterNext(&iter); } else if (c > 0) { @@ -648,42 +747,33 @@ static int32_t tsdbGetOvlpNRow(STbDataIter *pIter, SBlock *pBlock) { static int32_t tsdbMergeAsSubBlock(SCommitter *pCommitter, STbDataIter *pIter, SBlock *pBlock) { int32_t code = 0; + STbData *pTbData = pIter->pTbData; SBlockData *pBlockData = &pCommitter->dWriter.bData; - SBlockIdx *pBlockIdx = &(SBlockIdx){.suid = pIter->pTbData->suid, .uid = pIter->pTbData->uid}; - SBlock block; - TSDBROW *pRow; - code = tBlockDataSetSchema(pBlockData, pCommitter->skmTable.pTSchema); - if (code) goto _err; - - pRow = tsdbTbDataIterGet(pIter); - code = tsdbCommitterUpdateRowSchema(pCommitter, pBlockIdx->suid, pBlockIdx->uid, TSDBROW_SVERSION(pRow)); - if (code) goto _err; + tBlockDataClear(pBlockData); + TSDBROW *pRow = tsdbTbDataIterGet(pIter); while (true) { if (pRow == NULL) break; - code = tBlockDataAppendRow(pBlockData, pRow, pCommitter->skmRow.pTSchema); + + code = tsdbCommitterUpdateRowSchema(pCommitter, pTbData->suid, pTbData->uid, TSDBROW_SVERSION(pRow)); + if (code) goto _err; + + code = tBlockDataAppendRow(pBlockData, pRow, pCommitter->skmRow.pTSchema, pTbData->uid); if (code) goto _err; tsdbTbDataIterNext(pIter); pRow = tsdbTbDataIterGet(pIter); if (pRow) { - TSDBKEY key = TSDBROW_KEY(pRow); - int32_t c = tBlockCmprFn(&(SBlock){.minKey = key, .maxKey = key}, pBlock); - - if (c == 0) { - code = - tsdbCommitterUpdateRowSchema(pCommitter, pIter->pTbData->suid, pIter->pTbData->uid, TSDBROW_SVERSION(pRow)); - if (code) goto _err; - } else if (c > 0) { + TSDBKEY rowKey = TSDBROW_KEY(pRow); + if (tsdbKeyCmprFn(&rowKey, &pBlock->maxKey) > 0) { pRow = NULL; - } else { - ASSERT(0); } } } - block = *pBlock; - code = tsdbCommitBlockData(pCommitter, pBlockData, &block, pBlockIdx, 0); + ASSERT(pBlockData->nRow > 0 && pBlock->nRow + pBlockData->nRow <= pCommitter->maxRow); + + code = tsdbCommitDataBlock(pCommitter, pBlock); if (code) goto _err; return code; @@ -693,176 +783,307 @@ _err: return code; } -static int32_t tsdbCommitTableData(SCommitter *pCommitter, STbData *pTbData, SBlockIdx *pBlockIdx) { - int32_t code = 0; - STbDataIter iter = {0}; - STbDataIter *pIter = &iter; - TSDBROW *pRow; - int32_t iBlock; - int32_t nBlock; - int64_t suid; - int64_t uid; +static int32_t tsdbMergeCommitLast(SCommitter *pCommitter, STbDataIter *pIter) { + int32_t code = 0; + STbData *pTbData = pIter->pTbData; + int32_t nRow = tsdbGetNumOfRowsLessThan(pIter, (TSDBKEY){.ts = pCommitter->maxKey + 1, .version = VERSION_MIN}); - if (pTbData) { - tsdbTbDataIterOpen(pTbData, &(TSDBKEY){.ts = pCommitter->minKey, .version = VERSION_MIN}, 0, pIter); - pRow = tsdbTbDataIterGet(pIter); - if (pRow && TSDBROW_TS(pRow) > pCommitter->maxKey) pRow = NULL; + if (pCommitter->dReader.pRowInfo && tTABLEIDCmprFn(pTbData, pCommitter->dReader.pRowInfo) == 0) { + if (pCommitter->dReader.pRowInfo->suid) { // super table + for (int32_t iRow = pCommitter->dReader.iRow; iRow < pCommitter->dReader.bDatal.nRow; iRow++) { + if (pTbData->uid != pCommitter->dReader.bDatal.aUid[iRow]) break; + nRow++; + } + } else { // normal table + ASSERT(pCommitter->dReader.iRow == 0); + nRow += pCommitter->dReader.bDatal.nRow; + } + } - suid = pTbData->suid; - uid = pTbData->uid; - } else { - pIter = NULL; + if (nRow == 0) goto _exit; + + TSDBROW *pRow = tsdbTbDataIterGet(pIter); + if (pRow && TSDBROW_TS(pRow) > pCommitter->maxKey) { pRow = NULL; } - if (pBlockIdx) { - code = tsdbReadBlock(pCommitter->dReader.pReader, pBlockIdx, &pCommitter->dReader.mBlock, NULL); - if (code) goto _err; - - nBlock = pCommitter->dReader.mBlock.nItem; - ASSERT(nBlock > 0); - - suid = pBlockIdx->suid; - uid = pBlockIdx->uid; - } else { - nBlock = 0; + SRowInfo *pRowInfo = pCommitter->dReader.pRowInfo; + if (pRowInfo && pRowInfo->uid != pTbData->uid) { + pRowInfo = NULL; } - if (pRow == NULL && nBlock == 0) goto _exit; + while (nRow) { + SBlockData *pBlockData; + int8_t toData; - // start =========== - tMapDataReset(&pCommitter->dWriter.mBlock); + if (nRow < pCommitter->minRow) { // to .last + toData = 0; + pBlockData = &pCommitter->dWriter.bDatal; + + // commit and reset block data schema if need + // QUESTION: Is there a case that pBlockData->nRow == 0 but need to change schema ? + if (pBlockData->suid || pBlockData->uid) { + if (pBlockData->suid != pTbData->suid || pBlockData->suid == 0) { + if (pBlockData->nRow > 0) { + code = tsdbCommitLastBlock(pCommitter); + if (code) goto _err; + } + + tBlockDataReset(pBlockData); + } + } + + // set block data schema if need + if (pBlockData->suid == 0 && pBlockData->uid == 0) { + code = + tBlockDataInit(pBlockData, pTbData->suid, pTbData->suid ? 0 : pTbData->uid, pCommitter->skmTable.pTSchema); + if (code) goto _err; + } + + if (pBlockData->nRow + nRow > pCommitter->maxRow) { + code = tsdbCommitLastBlock(pCommitter); + if (code) goto _err; + } + } else { // to .data + toData = 1; + pBlockData = &pCommitter->dWriter.bData; + ASSERT(pBlockData->nRow == 0); + } + + while (pRow && pRowInfo) { + int32_t c = tsdbRowCmprFn(pRow, &pRowInfo->row); + if (c < 0) { + code = tsdbCommitterUpdateRowSchema(pCommitter, pTbData->suid, pTbData->uid, TSDBROW_SVERSION(pRow)); + if (code) goto _err; + + code = tBlockDataAppendRow(pBlockData, pRow, pCommitter->skmRow.pTSchema, pTbData->uid); + if (code) goto _err; + + tsdbTbDataIterNext(pIter); + pRow = tsdbTbDataIterGet(pIter); + if (pRow && TSDBROW_TS(pRow) > pCommitter->maxKey) { + pRow = NULL; + } + } else if (c > 0) { + code = tBlockDataAppendRow(pBlockData, &pRowInfo->row, NULL, pTbData->uid); + if (code) goto _err; + + code = tsdbCommitterNextLastRow(pCommitter); + if (code) goto _err; + + pRowInfo = pCommitter->dReader.pRowInfo; + if (pRowInfo && pRowInfo->uid != pTbData->uid) { + pRowInfo = NULL; + } + } else { + ASSERT(0); + } + + nRow--; + if (toData) { + if (nRow == 0 || pBlockData->nRow >= pCommitter->maxRow * 4 / 5) { + code = tsdbCommitDataBlock(pCommitter, NULL); + if (code) goto _err; + goto _outer_break; + } + } + } + + while (pRow) { + code = tsdbCommitterUpdateRowSchema(pCommitter, pTbData->suid, pTbData->uid, TSDBROW_SVERSION(pRow)); + if (code) goto _err; + + code = tBlockDataAppendRow(pBlockData, pRow, pCommitter->skmRow.pTSchema, pTbData->uid); + if (code) goto _err; + + tsdbTbDataIterNext(pIter); + pRow = tsdbTbDataIterGet(pIter); + if (pRow && TSDBROW_TS(pRow) > pCommitter->maxKey) { + pRow = NULL; + } + + nRow--; + if (toData) { + if (nRow == 0 || pBlockData->nRow >= pCommitter->maxRow * 4 / 5) { + code = tsdbCommitDataBlock(pCommitter, NULL); + if (code) goto _err; + goto _outer_break; + } + } + } + + while (pRowInfo) { + code = tBlockDataAppendRow(pBlockData, &pRowInfo->row, NULL, pTbData->uid); + if (code) goto _err; + + code = tsdbCommitterNextLastRow(pCommitter); + if (code) goto _err; + + pRowInfo = pCommitter->dReader.pRowInfo; + if (pRowInfo && pRowInfo->uid != pTbData->uid) { + pRowInfo = NULL; + } + + nRow--; + if (toData) { + if (nRow == 0 || pBlockData->nRow >= pCommitter->maxRow * 4 / 5) { + code = tsdbCommitDataBlock(pCommitter, NULL); + if (code) goto _err; + goto _outer_break; + } + } + } + + _outer_break: + ASSERT(nRow >= 0); + } + +_exit: + return code; + +_err: + tsdbError("vgId:%d tsdb merge commit last failed since %s", TD_VID(pCommitter->pTsdb->pVnode), tstrerror(code)); + return code; +} + +static int32_t tsdbCommitTableData(SCommitter *pCommitter, STbData *pTbData) { + int32_t code = 0; + + ASSERT(pCommitter->dReader.pBlockIdx == NULL || tTABLEIDCmprFn(pCommitter->dReader.pBlockIdx, pTbData) >= 0); + ASSERT(pCommitter->dReader.pRowInfo == NULL || tTABLEIDCmprFn(pCommitter->dReader.pRowInfo, pTbData) >= 0); + + // merge commit table data + STbDataIter iter = {0}; + STbDataIter *pIter = &iter; + TSDBROW *pRow; + + tsdbTbDataIterOpen(pTbData, &(TSDBKEY){.ts = pCommitter->minKey, .version = VERSION_MIN}, 0, pIter); + pRow = tsdbTbDataIterGet(pIter); + if (pRow && TSDBROW_TS(pRow) > pCommitter->maxKey) { + pRow = NULL; + } + + if (pRow == NULL) goto _exit; + + int32_t iBlock = 0; SBlock block; SBlock *pBlock = █ - - iBlock = 0; - if (iBlock < nBlock) { + if (pCommitter->dReader.pBlockIdx && tTABLEIDCmprFn(pTbData, pCommitter->dReader.pBlockIdx) == 0) { tMapDataGetItemByIdx(&pCommitter->dReader.mBlock, iBlock, pBlock, tGetBlock); } else { pBlock = NULL; } - if (pRow) { - code = tsdbCommitterUpdateTableSchema(pCommitter, pTbData->suid, pTbData->uid, pTbData->maxSkmVer); - if (code) goto _err; - } + code = tsdbCommitterUpdateTableSchema(pCommitter, pTbData->suid, pTbData->uid); + if (code) goto _err; - // merge =========== - while (true) { - if (pRow == NULL && pBlock == NULL) break; + tMapDataReset(&pCommitter->dWriter.mBlock); + code = tBlockDataInit(&pCommitter->dReader.bData, pTbData->suid, pTbData->uid, pCommitter->skmTable.pTSchema); + if (code) goto _err; + code = tBlockDataInit(&pCommitter->dWriter.bData, pTbData->suid, pTbData->uid, pCommitter->skmTable.pTSchema); + if (code) goto _err; - if (pRow && pBlock) { - if (pBlock->last) { - code = tsdbMergeTableData(pCommitter, pIter, pBlock, - (TSDBKEY){.ts = pCommitter->maxKey + 1, .version = VERSION_MIN}, 0); - if (code) goto _err; - - pRow = tsdbTbDataIterGet(pIter); - if (pRow && TSDBROW_TS(pRow) > pCommitter->maxKey) pRow = NULL; - iBlock++; - if (iBlock < nBlock) { - tMapDataGetItemByIdx(&pCommitter->dReader.mBlock, iBlock, pBlock, tGetBlock); - } else { - pBlock = NULL; - } - - ASSERT(pRow == NULL && pBlock == NULL); - } else { - int32_t c = tBlockCmprFn(&(SBlock){.maxKey = TSDBROW_KEY(pRow), .minKey = TSDBROW_KEY(pRow)}, pBlock); - if (c > 0) { - // only disk data - code = tsdbCommitTableDiskData(pCommitter, pBlock, pBlockIdx); - if (code) goto _err; - - iBlock++; - if (iBlock < nBlock) { - tMapDataGetItemByIdx(&pCommitter->dReader.mBlock, iBlock, pBlock, tGetBlock); - } else { - pBlock = NULL; - } - } else if (c < 0) { - // only memory data - code = tsdbCommitTableMemData(pCommitter, pIter, pBlock->minKey, 1); - if (code) goto _err; - - pRow = tsdbTbDataIterGet(pIter); - if (pRow && TSDBROW_TS(pRow) > pCommitter->maxKey) pRow = NULL; - } else { - // merge memory and disk - int32_t nOvlp = tsdbGetOvlpNRow(pIter, pBlock); - ASSERT(nOvlp); - if (pBlock->nRow + nOvlp <= pCommitter->maxRow && pBlock->nSubBlock < TSDB_MAX_SUBBLOCKS) { - code = tsdbMergeAsSubBlock(pCommitter, pIter, pBlock); - if (code) goto _err; - } else { - TSDBKEY toKey = {.ts = pCommitter->maxKey + 1, .version = VERSION_MIN}; - int8_t toDataOnly = 0; - - if (iBlock < nBlock - 1) { - toDataOnly = 1; - - SBlock nextBlock = {0}; - tBlockReset(&nextBlock); - tMapDataGetItemByIdx(&pCommitter->dReader.mBlock, iBlock + 1, &nextBlock, tGetBlock); - toKey = nextBlock.minKey; - } - - code = tsdbMergeTableData(pCommitter, pIter, pBlock, toKey, toDataOnly); - if (code) goto _err; - } - - pRow = tsdbTbDataIterGet(pIter); - if (pRow && TSDBROW_TS(pRow) > pCommitter->maxKey) pRow = NULL; - iBlock++; - if (iBlock < nBlock) { - tMapDataGetItemByIdx(&pCommitter->dReader.mBlock, iBlock, pBlock, tGetBlock); - } else { - pBlock = NULL; - } - } - } - } else if (pBlock) { - code = tsdbCommitTableDiskData(pCommitter, pBlock, pBlockIdx); + // .data merge + while (pBlock && pRow) { + int32_t c = tBlockCmprFn(pBlock, &(SBlock){.minKey = TSDBROW_KEY(pRow), .maxKey = TSDBROW_KEY(pRow)}); + if (c < 0) { // disk + code = tMapDataPutItem(&pCommitter->dWriter.mBlock, pBlock, tPutBlock); if (code) goto _err; + // next iBlock++; - if (iBlock < nBlock) { + if (iBlock < pCommitter->dReader.mBlock.nItem) { tMapDataGetItemByIdx(&pCommitter->dReader.mBlock, iBlock, pBlock, tGetBlock); } else { pBlock = NULL; } - } else { - code = - tsdbCommitTableMemData(pCommitter, pIter, (TSDBKEY){.ts = pCommitter->maxKey + 1, .version = VERSION_MIN}, 0); + } else if (c > 0) { // memory + code = tsdbCommitTableMemData(pCommitter, pIter, pBlock->minKey); if (code) goto _err; + // next pRow = tsdbTbDataIterGet(pIter); - if (pRow && TSDBROW_TS(pRow) > pCommitter->maxKey) pRow = NULL; - ASSERT(pRow == NULL); + if (pRow && TSDBROW_TS(pRow) > pCommitter->maxKey) { + pRow = NULL; + } + } else { // merge + int32_t nOvlp = tsdbGetNumOfRowsLessThan(pIter, pBlock->maxKey); + + ASSERT(nOvlp > 0); + + if (pBlock->nRow + nOvlp <= pCommitter->maxRow && pBlock->nSubBlock < TSDB_MAX_SUBBLOCKS) { + code = tsdbMergeAsSubBlock(pCommitter, pIter, pBlock); + if (code) goto _err; + } else { + code = tsdbMergeCommitData(pCommitter, pIter, pBlock); + if (code) goto _err; + } + + // next + pRow = tsdbTbDataIterGet(pIter); + if (pRow && TSDBROW_TS(pRow) > pCommitter->maxKey) { + pRow = NULL; + } + iBlock++; + if (iBlock < pCommitter->dReader.mBlock.nItem) { + tMapDataGetItemByIdx(&pCommitter->dReader.mBlock, iBlock, pBlock, tGetBlock); + } else { + pBlock = NULL; + } } } - // end ===================== - code = tsdbCommitTableDataEnd(pCommitter, suid, uid); + while (pBlock) { + code = tMapDataPutItem(&pCommitter->dWriter.mBlock, pBlock, tPutBlock); + if (code) goto _err; + + // next + iBlock++; + if (iBlock < pCommitter->dReader.mBlock.nItem) { + tMapDataGetItemByIdx(&pCommitter->dReader.mBlock, iBlock, pBlock, tGetBlock); + } else { + pBlock = NULL; + } + } + + // .data append and .last merge + code = tsdbMergeCommitLast(pCommitter, pIter); if (code) goto _err; -_exit: - if (pIter) { - pRow = tsdbTbDataIterGet(pIter); - if (pRow) pCommitter->nextKey = TMIN(pCommitter->nextKey, TSDBROW_TS(pRow)); + // end + if (pCommitter->dWriter.mBlock.nItem > 0) { + SBlockIdx blockIdx = {.suid = pTbData->suid, .uid = pTbData->uid}; + code = tsdbWriteBlock(pCommitter->dWriter.pWriter, &pCommitter->dWriter.mBlock, &blockIdx); + if (code) goto _err; + + if (taosArrayPush(pCommitter->dWriter.aBlockIdx, &blockIdx) == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _err; + } } + +_exit: + pRow = tsdbTbDataIterGet(pIter); + if (pRow) { + pCommitter->nextKey = TMIN(pCommitter->nextKey, TSDBROW_TS(pRow)); + } + return code; _err: - tsdbError("vgId:%d, tsdb commit table data failed since %s", TD_VID(pCommitter->pTsdb->pVnode), tstrerror(code)); + tsdbError("vgId:%d tsdb commit table data failed since %s", TD_VID(pCommitter->pTsdb->pVnode), tstrerror(code)); return code; } static int32_t tsdbCommitFileDataEnd(SCommitter *pCommitter) { int32_t code = 0; - // write blockIdx - code = tsdbWriteBlockIdx(pCommitter->dWriter.pWriter, pCommitter->dWriter.aBlockIdx, NULL); + // write aBlockIdx + code = tsdbWriteBlockIdx(pCommitter->dWriter.pWriter, pCommitter->dWriter.aBlockIdx); + if (code) goto _err; + + // write aBlockL + code = tsdbWriteBlockL(pCommitter->dWriter.pWriter, pCommitter->dWriter.aBlockL); if (code) goto _err; // update file header @@ -890,6 +1111,98 @@ _err: return code; } +static int32_t tsdbMoveCommitData(SCommitter *pCommitter, TABLEID toTable) { + int32_t code = 0; + + // .data + while (true) { + if (pCommitter->dReader.pBlockIdx == NULL || tTABLEIDCmprFn(pCommitter->dReader.pBlockIdx, &toTable) >= 0) break; + + SBlockIdx blockIdx = *pCommitter->dReader.pBlockIdx; + code = tsdbWriteBlock(pCommitter->dWriter.pWriter, &pCommitter->dReader.mBlock, &blockIdx); + if (code) goto _err; + + if (taosArrayPush(pCommitter->dWriter.aBlockIdx, &blockIdx) == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _err; + } + + code = tsdbCommitterNextTableData(pCommitter); + if (code) goto _err; + } + + // .last + while (true) { + if (pCommitter->dReader.pRowInfo == NULL || tTABLEIDCmprFn(pCommitter->dReader.pRowInfo, &toTable) >= 0) break; + + SBlockData *pBlockDataR = &pCommitter->dReader.bDatal; + SBlockData *pBlockDataW = &pCommitter->dWriter.bDatal; + tb_uid_t suid = pCommitter->dReader.pRowInfo->suid; + tb_uid_t uid = pCommitter->dReader.pRowInfo->uid; + + ASSERT((pBlockDataR->suid && !pBlockDataR->uid) || (!pBlockDataR->suid && pBlockDataR->uid)); + ASSERT(pBlockDataR->nRow > 0); + + // commit and reset block data schema if need + if (pBlockDataW->suid || pBlockDataW->uid) { + if (pBlockDataW->suid != suid || pBlockDataW->suid == 0) { + if (pBlockDataW->nRow > 0) { + code = tsdbCommitLastBlock(pCommitter); + if (code) goto _err; + } + tBlockDataReset(pBlockDataW); + } + } + + // set block data schema if need + if (pBlockDataW->suid == 0 && pBlockDataW->uid == 0) { + code = tsdbCommitterUpdateTableSchema(pCommitter, suid, uid); + if (code) goto _err; + + code = tBlockDataInit(pBlockDataW, suid, suid ? 0 : uid, pCommitter->skmTable.pTSchema); + if (code) goto _err; + } + + // check if it can make sure that one table data in one block + int32_t nRow = 0; + if (pBlockDataR->suid) { + int32_t iRow = pCommitter->dReader.iRow; + while ((iRow < pBlockDataR->nRow) && (pBlockDataR->aUid[iRow] == uid)) { + nRow++; + iRow++; + } + } else { + ASSERT(pCommitter->dReader.iRow == 0); + nRow = pBlockDataR->nRow; + } + + ASSERT(nRow > 0 && nRow < pCommitter->minRow); + + if (pBlockDataW->nRow + nRow > pCommitter->maxRow) { + ASSERT(pBlockDataW->nRow > 0); + + code = tsdbCommitLastBlock(pCommitter); + if (code) goto _err; + } + + while (nRow > 0) { + code = tBlockDataAppendRow(pBlockDataW, &pCommitter->dReader.pRowInfo->row, NULL, uid); + if (code) goto _err; + + code = tsdbCommitterNextLastRow(pCommitter); + if (code) goto _err; + + nRow--; + } + } + + return code; + +_err: + tsdbError("vgId:%d tsdb move commit data failed since %s", TD_VID(pCommitter->pTsdb->pVnode), tstrerror(code)); + return code; +} + static int32_t tsdbCommitFileData(SCommitter *pCommitter) { int32_t code = 0; STsdb *pTsdb = pCommitter->pTsdb; @@ -900,59 +1213,30 @@ static int32_t tsdbCommitFileData(SCommitter *pCommitter) { if (code) goto _err; // commit file data impl - int32_t iTbData = 0; - int32_t nTbData = taosArrayGetSize(pMemTable->aTbData); - int32_t iBlockIdx = 0; - int32_t nBlockIdx = taosArrayGetSize(pCommitter->dReader.aBlockIdx); - STbData *pTbData; - SBlockIdx *pBlockIdx; + for (int32_t iTbData = 0; iTbData < taosArrayGetSize(pCommitter->aTbDataP); iTbData++) { + STbData *pTbData = (STbData *)taosArrayGetP(pCommitter->aTbDataP, iTbData); - ASSERT(nTbData > 0); + // move commit until current (suid, uid) + code = tsdbMoveCommitData(pCommitter, *(TABLEID *)pTbData); + if (code) goto _err; - pTbData = (STbData *)taosArrayGetP(pMemTable->aTbData, iTbData); - pBlockIdx = (iBlockIdx < nBlockIdx) ? (SBlockIdx *)taosArrayGet(pCommitter->dReader.aBlockIdx, iBlockIdx) : NULL; - while (pTbData || pBlockIdx) { - if (pTbData && pBlockIdx) { - int32_t c = tTABLEIDCmprFn(pTbData, pBlockIdx); + // commit current table data + code = tsdbCommitTableData(pCommitter, pTbData); + if (code) goto _err; - if (c == 0) { - goto _commit_table_mem_and_disk; - } else if (c < 0) { - goto _commit_table_mem_data; - } else { - goto _commit_table_disk_data; - } - } else if (pBlockIdx) { - goto _commit_table_disk_data; - } else { - goto _commit_table_mem_data; + // move next reader table data if need + if (pCommitter->dReader.pBlockIdx && tTABLEIDCmprFn(pTbData, pCommitter->dReader.pBlockIdx) == 0) { + code = tsdbCommitterNextTableData(pCommitter); + if (code) goto _err; } + } - _commit_table_mem_data: - code = tsdbCommitTableData(pCommitter, pTbData, NULL); + code = tsdbMoveCommitData(pCommitter, (TABLEID){.suid = INT64_MAX, .uid = INT64_MAX}); + if (code) goto _err; + + if (pCommitter->dWriter.bDatal.nRow > 0) { + code = tsdbCommitLastBlock(pCommitter); if (code) goto _err; - - iTbData++; - pTbData = (iTbData < nTbData) ? (STbData *)taosArrayGetP(pMemTable->aTbData, iTbData) : NULL; - continue; - - _commit_table_disk_data: - code = tsdbCommitTableData(pCommitter, NULL, pBlockIdx); - if (code) goto _err; - - iBlockIdx++; - pBlockIdx = (iBlockIdx < nBlockIdx) ? (SBlockIdx *)taosArrayGet(pCommitter->dReader.aBlockIdx, iBlockIdx) : NULL; - continue; - - _commit_table_mem_and_disk: - code = tsdbCommitTableData(pCommitter, pTbData, pBlockIdx); - if (code) goto _err; - - iBlockIdx++; - pBlockIdx = (iBlockIdx < nBlockIdx) ? (SBlockIdx *)taosArrayGet(pCommitter->dReader.aBlockIdx, iBlockIdx) : NULL; - iTbData++; - pTbData = (iTbData < nTbData) ? (STbData *)taosArrayGetP(pMemTable->aTbData, iTbData) : NULL; - continue; } // commit file data end @@ -987,6 +1271,11 @@ static int32_t tsdbStartCommit(STsdb *pTsdb, SCommitter *pCommitter) { pCommitter->minRow = pTsdb->pVnode->config.tsdbCfg.minRows; pCommitter->maxRow = pTsdb->pVnode->config.tsdbCfg.maxRows; pCommitter->cmprAlg = pTsdb->pVnode->config.tsdbCfg.compression; + pCommitter->aTbDataP = tsdbMemTableGetTbDataArray(pTsdb->imem); + if (pCommitter->aTbDataP == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _err; + } code = tsdbFSCopy(pTsdb, &pCommitter->fs); if (code) goto _err; @@ -1001,22 +1290,42 @@ _err: static int32_t tsdbCommitDataStart(SCommitter *pCommitter) { int32_t code = 0; + // Reader pCommitter->dReader.aBlockIdx = taosArrayInit(0, sizeof(SBlockIdx)); if (pCommitter->dReader.aBlockIdx == NULL) { code = TSDB_CODE_OUT_OF_MEMORY; goto _exit; } + code = tBlockDataCreate(&pCommitter->dReader.bData); + if (code) goto _exit; + + pCommitter->dReader.aBlockL = taosArrayInit(0, sizeof(SBlockL)); + if (pCommitter->dReader.aBlockL == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _exit; + } + + code = tBlockDataCreate(&pCommitter->dReader.bDatal); + if (code) goto _exit; + + // Writer pCommitter->dWriter.aBlockIdx = taosArrayInit(0, sizeof(SBlockIdx)); if (pCommitter->dWriter.aBlockIdx == NULL) { code = TSDB_CODE_OUT_OF_MEMORY; goto _exit; } - code = tBlockDataInit(&pCommitter->dReader.bData); + pCommitter->dWriter.aBlockL = taosArrayInit(0, sizeof(SBlockL)); + if (pCommitter->dWriter.aBlockL == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _exit; + } + + code = tBlockDataCreate(&pCommitter->dWriter.bData); if (code) goto _exit; - code = tBlockDataInit(&pCommitter->dWriter.bData); + code = tBlockDataCreate(&pCommitter->dWriter.bDatal); if (code) goto _exit; _exit: @@ -1024,12 +1333,19 @@ _exit: } static void tsdbCommitDataEnd(SCommitter *pCommitter) { + // Reader taosArrayDestroy(pCommitter->dReader.aBlockIdx); tMapDataClear(&pCommitter->dReader.mBlock); - tBlockDataClear(&pCommitter->dReader.bData, 1); + tBlockDataDestroy(&pCommitter->dReader.bData, 1); + taosArrayDestroy(pCommitter->dReader.aBlockL); + tBlockDataDestroy(&pCommitter->dReader.bDatal, 1); + + // Writer taosArrayDestroy(pCommitter->dWriter.aBlockIdx); + taosArrayDestroy(pCommitter->dWriter.aBlockL); tMapDataClear(&pCommitter->dWriter.mBlock); - tBlockDataClear(&pCommitter->dWriter.bData, 1); + tBlockDataDestroy(&pCommitter->dWriter.bData, 1); + tBlockDataDestroy(&pCommitter->dWriter.bDatal, 1); tTSchemaDestroy(pCommitter->skmTable.pTSchema); tTSchemaDestroy(pCommitter->skmRow.pTSchema); } @@ -1049,9 +1365,6 @@ static int32_t tsdbCommitData(SCommitter *pCommitter) { // impl ==================== pCommitter->nextKey = pMemTable->minKey; while (pCommitter->nextKey < TSKEY_MAX) { - pCommitter->commitFid = tsdbKeyFid(pCommitter->nextKey, pCommitter->minutes, pCommitter->precision); - tsdbFidKeyRange(pCommitter->commitFid, pCommitter->minutes, pCommitter->precision, &pCommitter->minKey, - &pCommitter->maxKey); code = tsdbCommitFileData(pCommitter); if (code) goto _err; } @@ -1088,13 +1401,13 @@ static int32_t tsdbCommitDel(SCommitter *pCommitter) { int32_t iDelIdx = 0; int32_t nDelIdx = taosArrayGetSize(pCommitter->aDelIdx); int32_t iTbData = 0; - int32_t nTbData = taosArrayGetSize(pMemTable->aTbData); + int32_t nTbData = taosArrayGetSize(pCommitter->aTbDataP); STbData *pTbData; SDelIdx *pDelIdx; ASSERT(nTbData > 0); - pTbData = (STbData *)taosArrayGetP(pMemTable->aTbData, iTbData); + pTbData = (STbData *)taosArrayGetP(pCommitter->aTbDataP, iTbData); pDelIdx = (iDelIdx < nDelIdx) ? (SDelIdx *)taosArrayGet(pCommitter->aDelIdx, iDelIdx) : NULL; while (true) { if (pTbData == NULL && pDelIdx == NULL) break; @@ -1120,7 +1433,7 @@ static int32_t tsdbCommitDel(SCommitter *pCommitter) { if (code) goto _err; iTbData++; - pTbData = (iTbData < nTbData) ? (STbData *)taosArrayGetP(pMemTable->aTbData, iTbData) : NULL; + pTbData = (iTbData < nTbData) ? (STbData *)taosArrayGetP(pCommitter->aTbDataP, iTbData) : NULL; continue; _commit_disk_del: @@ -1136,7 +1449,7 @@ static int32_t tsdbCommitDel(SCommitter *pCommitter) { if (code) goto _err; iTbData++; - pTbData = (iTbData < nTbData) ? (STbData *)taosArrayGetP(pMemTable->aTbData, iTbData) : NULL; + pTbData = (iTbData < nTbData) ? (STbData *)taosArrayGetP(pCommitter->aTbDataP, iTbData) : NULL; iDelIdx++; pDelIdx = (iDelIdx < nDelIdx) ? (SDelIdx *)taosArrayGet(pCommitter->aDelIdx, iDelIdx) : NULL; continue; @@ -1184,6 +1497,7 @@ static int32_t tsdbEndCommit(SCommitter *pCommitter, int32_t eno) { tsdbUnrefMemTable(pMemTable); tsdbFSDestroy(&pCommitter->fs); + taosArrayDestroy(pCommitter->aTbDataP); tsdbInfo("vgId:%d, tsdb end commit", TD_VID(pTsdb->pVnode)); return code; diff --git a/source/dnode/vnode/src/tsdb/tsdbFS.c b/source/dnode/vnode/src/tsdb/tsdbFS.c index 74f1aef1fc..247de99338 100644 --- a/source/dnode/vnode/src/tsdb/tsdbFS.c +++ b/source/dnode/vnode/src/tsdb/tsdbFS.c @@ -576,10 +576,7 @@ int32_t tsdbFSCopy(STsdb *pTsdb, STsdbFS *pFS) { code = TSDB_CODE_OUT_OF_MEMORY; goto _exit; } - fSet.pHeadF->nRef = 0; - fSet.pHeadF->commitID = pSet->pHeadF->commitID; - fSet.pHeadF->size = pSet->pHeadF->size; - fSet.pHeadF->offset = pSet->pHeadF->offset; + *fSet.pHeadF = *pSet->pHeadF; // data fSet.pDataF = (SDataFile *)taosMemoryMalloc(sizeof(SDataFile)); @@ -587,9 +584,7 @@ int32_t tsdbFSCopy(STsdb *pTsdb, STsdbFS *pFS) { code = TSDB_CODE_OUT_OF_MEMORY; goto _exit; } - fSet.pDataF->nRef = 0; - fSet.pDataF->commitID = pSet->pDataF->commitID; - fSet.pDataF->size = pSet->pDataF->size; + *fSet.pDataF = *pSet->pDataF; // data fSet.pLastF = (SLastFile *)taosMemoryMalloc(sizeof(SLastFile)); @@ -597,9 +592,7 @@ int32_t tsdbFSCopy(STsdb *pTsdb, STsdbFS *pFS) { code = TSDB_CODE_OUT_OF_MEMORY; goto _exit; } - fSet.pLastF->nRef = 0; - fSet.pLastF->commitID = pSet->pLastF->commitID; - fSet.pLastF->size = pSet->pLastF->size; + *fSet.pLastF = *pSet->pLastF; // last fSet.pSmaF = (SSmaFile *)taosMemoryMalloc(sizeof(SSmaFile)); @@ -607,9 +600,7 @@ int32_t tsdbFSCopy(STsdb *pTsdb, STsdbFS *pFS) { code = TSDB_CODE_OUT_OF_MEMORY; goto _exit; } - fSet.pSmaF->nRef = 0; - fSet.pSmaF->commitID = pSet->pSmaF->commitID; - fSet.pSmaF->size = pSet->pSmaF->size; + *fSet.pSmaF = *pSet->pSmaF; if (taosArrayPush(pFS->aDFileSet, &fSet) == NULL) { code = TSDB_CODE_OUT_OF_MEMORY; diff --git a/source/dnode/vnode/src/tsdb/tsdbFile.c b/source/dnode/vnode/src/tsdb/tsdbFile.c index 52a102f911..00d2ac848f 100644 --- a/source/dnode/vnode/src/tsdb/tsdbFile.c +++ b/source/dnode/vnode/src/tsdb/tsdbFile.c @@ -58,6 +58,7 @@ int32_t tPutLastFile(uint8_t *p, SLastFile *pLastFile) { n += tPutI64v(p ? p + n : p, pLastFile->commitID); n += tPutI64v(p ? p + n : p, pLastFile->size); + n += tPutI64v(p ? p + n : p, pLastFile->offset); return n; } @@ -67,6 +68,7 @@ static int32_t tGetLastFile(uint8_t *p, SLastFile *pLastFile) { n += tGetI64v(p + n, &pLastFile->commitID); n += tGetI64v(p + n, &pLastFile->size); + n += tGetI64v(p + n, &pLastFile->offset); return n; } @@ -186,11 +188,16 @@ int32_t tPutDFileSet(uint8_t *p, SDFileSet *pSet) { n += tPutI32v(p ? p + n : p, pSet->diskId.level); n += tPutI32v(p ? p + n : p, pSet->diskId.id); n += tPutI32v(p ? p + n : p, pSet->fid); + + // data n += tPutHeadFile(p ? p + n : p, pSet->pHeadF); n += tPutDataFile(p ? p + n : p, pSet->pDataF); - n += tPutLastFile(p ? p + n : p, pSet->pLastF); n += tPutSmaFile(p ? p + n : p, pSet->pSmaF); + // last + n += tPutU8(p ? p + n : p, 1); // for future compatibility + n += tPutLastFile(p ? p + n : p, pSet->pLastF); + return n; } @@ -200,11 +207,17 @@ int32_t tGetDFileSet(uint8_t *p, SDFileSet *pSet) { n += tGetI32v(p + n, &pSet->diskId.level); n += tGetI32v(p + n, &pSet->diskId.id); n += tGetI32v(p + n, &pSet->fid); + + // data n += tGetHeadFile(p + n, pSet->pHeadF); n += tGetDataFile(p + n, pSet->pDataF); - n += tGetLastFile(p + n, pSet->pLastF); n += tGetSmaFile(p + n, pSet->pSmaF); + // last + uint8_t nLast; + n += tGetU8(p + n, &nLast); + n += tGetLastFile(p + n, pSet->pLastF); + return n; } diff --git a/source/dnode/vnode/src/tsdb/tsdbMemTable.c b/source/dnode/vnode/src/tsdb/tsdbMemTable.c index 34b37ffe9b..a6628463f8 100644 --- a/source/dnode/vnode/src/tsdb/tsdbMemTable.c +++ b/source/dnode/vnode/src/tsdb/tsdbMemTable.c @@ -15,6 +15,7 @@ #include "tsdb.h" +#define MEM_MIN_HASH 1024 #define SL_MAX_LEVEL 5 #define SL_NODE_SIZE(l) (sizeof(SMemSkipListNode) + sizeof(SMemSkipListNode *) * (l)*2) @@ -45,12 +46,12 @@ int32_t tsdbMemTableCreate(STsdb *pTsdb, SMemTable **ppMemTable) { pMemTable->nRef = 1; pMemTable->minKey = TSKEY_MAX; pMemTable->maxKey = TSKEY_MIN; - pMemTable->minVersion = VERSION_MAX; - pMemTable->maxVersion = VERSION_MIN; pMemTable->nRow = 0; pMemTable->nDel = 0; - pMemTable->aTbData = taosArrayInit(128, sizeof(STbData *)); - if (pMemTable->aTbData == NULL) { + pMemTable->nTbData = 0; + pMemTable->nBucket = MEM_MIN_HASH; + pMemTable->aBucket = (STbData **)taosMemoryCalloc(pMemTable->nBucket, sizeof(STbData *)); + if (pMemTable->aBucket == NULL) { code = TSDB_CODE_OUT_OF_MEMORY; taosMemoryFree(pMemTable); goto _err; @@ -68,37 +69,30 @@ _err: void tsdbMemTableDestroy(SMemTable *pMemTable) { if (pMemTable) { vnodeBufPoolUnRef(pMemTable->pPool); - taosArrayDestroy(pMemTable->aTbData); + taosMemoryFree(pMemTable->aBucket); taosMemoryFree(pMemTable); } } -static int32_t tbDataPCmprFn(const void *p1, const void *p2) { - STbData *pTbData1 = *(STbData **)p1; - STbData *pTbData2 = *(STbData **)p2; +static FORCE_INLINE STbData *tsdbGetTbDataFromMemTableImpl(SMemTable *pMemTable, tb_uid_t suid, tb_uid_t uid) { + STbData *pTbData = pMemTable->aBucket[TABS(uid) % pMemTable->nBucket]; - if (pTbData1->suid < pTbData2->suid) { - return -1; - } else if (pTbData1->suid > pTbData2->suid) { - return 1; + while (pTbData) { + if (pTbData->uid == uid) break; + pTbData = pTbData->next; } - if (pTbData1->uid < pTbData2->uid) { - return -1; - } else if (pTbData1->uid > pTbData2->uid) { - return 1; - } - - return 0; + return pTbData; } -void tsdbGetTbDataFromMemTable(SMemTable *pMemTable, tb_uid_t suid, tb_uid_t uid, STbData **ppTbData) { - STbData *pTbData = &(STbData){.suid = suid, .uid = uid}; + +STbData *tsdbGetTbDataFromMemTable(SMemTable *pMemTable, tb_uid_t suid, tb_uid_t uid) { + STbData *pTbData; taosRLockLatch(&pMemTable->latch); - void *p = taosArraySearch(pMemTable->aTbData, &pTbData, tbDataPCmprFn, TD_EQ); + pTbData = tsdbGetTbDataFromMemTableImpl(pMemTable, suid, uid); taosRUnLockLatch(&pMemTable->latch); - *ppTbData = p ? *(STbData **)p : NULL; + return pTbData; } int32_t tsdbInsertTableData(STsdb *pTsdb, int64_t version, SSubmitMsgIter *pMsgIter, SSubmitBlk *pBlock, @@ -184,10 +178,6 @@ int32_t tsdbDeleteTableData(STsdb *pTsdb, int64_t version, tb_uid_t suid, tb_uid pTbData->pTail = pDelData; } - // update the state of pMemTable and other (todo) - - pMemTable->minVersion = TMIN(pMemTable->minVersion, version); - pMemTable->maxVersion = TMAX(pMemTable->maxVersion, version); pMemTable->nDel++; if (TSDB_CACHE_LAST_ROW(pMemTable->pTsdb->pVnode->config) && tsdbKeyCmprFn(&lastKey, &pTbData->maxKey) >= 0) { @@ -320,18 +310,44 @@ _exit: return pIter->pRow; } +static int32_t tsdbMemTableRehash(SMemTable *pMemTable) { + int32_t code = 0; + + int32_t nBucket = pMemTable->nBucket * 2; + STbData **aBucket = (STbData **)taosMemoryCalloc(nBucket, sizeof(STbData *)); + if (aBucket == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _exit; + } + + for (int32_t iBucket = 0; iBucket < pMemTable->nBucket; iBucket++) { + STbData *pTbData = pMemTable->aBucket[iBucket]; + + while (pTbData) { + STbData *pNext = pTbData->next; + + int32_t idx = TABS(pTbData->uid) % nBucket; + pTbData->next = aBucket[idx]; + aBucket[idx] = pTbData; + + pTbData = pNext; + } + } + + taosMemoryFree(pMemTable->aBucket); + pMemTable->nBucket = nBucket; + pMemTable->aBucket = aBucket; + +_exit: + return code; +} + static int32_t tsdbGetOrCreateTbData(SMemTable *pMemTable, tb_uid_t suid, tb_uid_t uid, STbData **ppTbData) { - int32_t code = 0; - int32_t idx = 0; - STbData *pTbData = NULL; - STbData *pTbDataT = &(STbData){.suid = suid, .uid = uid}; + int32_t code = 0; // get - idx = taosArraySearchIdx(pMemTable->aTbData, &pTbDataT, tbDataPCmprFn, TD_GE); - if (idx >= 0) { - pTbData = (STbData *)taosArrayGetP(pMemTable->aTbData, idx); - if (tbDataPCmprFn(&pTbDataT, &pTbData) == 0) goto _exit; - } + STbData *pTbData = tsdbGetTbDataFromMemTableImpl(pMemTable, suid, uid); + if (pTbData) goto _exit; // create SVBufPool *pPool = pMemTable->pTsdb->pVnode->inUse; @@ -346,9 +362,6 @@ static int32_t tsdbGetOrCreateTbData(SMemTable *pMemTable, tb_uid_t suid, tb_uid pTbData->uid = uid; pTbData->minKey = TSKEY_MAX; pTbData->maxKey = TSKEY_MIN; - pTbData->minVersion = VERSION_MAX; - pTbData->maxVersion = VERSION_MIN; - pTbData->maxSkmVer = -1; pTbData->pHead = NULL; pTbData->pTail = NULL; pTbData->sl.seed = taosRand(); @@ -367,21 +380,23 @@ static int32_t tsdbGetOrCreateTbData(SMemTable *pMemTable, tb_uid_t suid, tb_uid SL_NODE_FORWARD(pTbData->sl.pTail, iLevel) = NULL; } - void *p; - if (idx < 0) { - idx = taosArrayGetSize(pMemTable->aTbData); + taosWLockLatch(&pMemTable->latch); + + if (pMemTable->nTbData >= pMemTable->nBucket) { + code = tsdbMemTableRehash(pMemTable); + if (code) { + taosWUnLockLatch(&pMemTable->latch); + goto _err; + } } - taosWLockLatch(&pMemTable->latch); - p = taosArrayInsert(pMemTable->aTbData, idx, &pTbData); + int32_t idx = TABS(uid) % pMemTable->nBucket; + pTbData->next = pMemTable->aBucket[idx]; + pMemTable->aBucket[idx] = pTbData; + pMemTable->nTbData++; + taosWUnLockLatch(&pMemTable->latch); - tsdbDebug("vgId:%d, add table data %p at idx:%d", TD_VID(pMemTable->pTsdb->pVnode), pTbData, idx); - - if (p == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - goto _err; - } _exit: *ppTbData = pTbData; return code; @@ -591,15 +606,9 @@ static int32_t tsdbInsertTableDataImpl(SMemTable *pMemTable, STbData *pTbData, i tsdbCacheInsertLast(pMemTable->pTsdb->lruCache, pTbData->uid, pLastRow, pMemTable->pTsdb); } - pTbData->minVersion = TMIN(pTbData->minVersion, version); - pTbData->maxVersion = TMAX(pTbData->maxVersion, version); - pTbData->maxSkmVer = TMAX(pTbData->maxSkmVer, pMsgIter->sversion); - // SMemTable pMemTable->minKey = TMIN(pMemTable->minKey, pTbData->minKey); pMemTable->maxKey = TMAX(pMemTable->maxKey, pTbData->maxKey); - pMemTable->minVersion = TMIN(pMemTable->minVersion, pTbData->minVersion); - pMemTable->maxVersion = TMAX(pMemTable->maxVersion, pTbData->maxVersion); pMemTable->nRow += nRow; pRsp->numOfRows = nRow; @@ -624,3 +633,41 @@ void tsdbUnrefMemTable(SMemTable *pMemTable) { tsdbMemTableDestroy(pMemTable); } } + +static FORCE_INLINE int32_t tbDataPCmprFn(const void *p1, const void *p2) { + STbData *pTbData1 = *(STbData **)p1; + STbData *pTbData2 = *(STbData **)p2; + + if (pTbData1->suid < pTbData2->suid) { + return -1; + } else if (pTbData1->suid > pTbData2->suid) { + return 1; + } + + if (pTbData1->uid < pTbData2->uid) { + return -1; + } else if (pTbData1->uid > pTbData2->uid) { + return 1; + } + + return 0; +} + +SArray *tsdbMemTableGetTbDataArray(SMemTable *pMemTable) { + SArray *aTbDataP = taosArrayInit(pMemTable->nTbData, sizeof(STbData *)); + if (aTbDataP == NULL) goto _exit; + + for (int32_t iBucket = 0; iBucket < pMemTable->nBucket; iBucket++) { + STbData *pTbData = pMemTable->aBucket[iBucket]; + + while (pTbData) { + taosArrayPush(aTbDataP, &pTbData); + pTbData = pTbData->next; + } + } + + taosArraySort(aTbDataP, tbDataPCmprFn); + +_exit: + return aTbDataP; +} diff --git a/source/dnode/vnode/src/tsdb/tsdbRead.c b/source/dnode/vnode/src/tsdb/tsdbRead.c index 335b311d00..fc7595627d 100644 --- a/source/dnode/vnode/src/tsdb/tsdbRead.c +++ b/source/dnode/vnode/src/tsdb/tsdbRead.c @@ -15,7 +15,10 @@ #include "osDef.h" #include "tsdb.h" + #define ASCENDING_TRAVERSE(o) (o == TSDB_ORDER_ASC) +#define ALL_ROWS_CHECKED_INDEX (INT16_MIN) +#define DEFAULT_ROW_INDEX_VAL (-1) typedef enum { EXTERNAL_ROWS_PREV = 0x1, @@ -29,16 +32,23 @@ typedef struct { bool hasVal; } SIterInfo; +typedef struct { + int32_t numOfBlocks; + int32_t numOfLastBlocks; +} SBlockNumber; + typedef struct STableBlockScanInfo { uint64_t uid; TSKEY lastKey; - SMapData mapData; // block info (compressed) - SArray* pBlockList; // block data index list - SIterInfo iter; // mem buffer skip list iterator - SIterInfo iiter; // imem buffer skip list iterator - SArray* delSkyline; // delete info for this table - int32_t fileDelIndex; - bool iterInit; // whether to initialize the in-memory skip list iterator or not + SMapData mapData; // block info (compressed) + SArray* pBlockList; // block data index list + SIterInfo iter; // mem buffer skip list iterator + SIterInfo iiter; // imem buffer skip list iterator + SArray* delSkyline; // delete info for this table + int32_t fileDelIndex; // file block delete index + int32_t lastBlockDelIndex;// delete index for last block + bool iterInit; // whether to initialize the in-memory skip list iterator or not + int16_t indexInBlockL;// row position in last block } STableBlockScanInfo; typedef struct SBlockOrderWrapper { @@ -71,11 +81,28 @@ typedef struct SBlockLoadSuppInfo { char** buildBuf; // build string tmp buffer, todo remove it later after all string format being updated. } SBlockLoadSuppInfo; +typedef struct SVersionRange { + uint64_t minVer; + uint64_t maxVer; +} SVersionRange; + +typedef struct SLastBlockReader { + SArray* pBlockL; + int32_t currentBlockIndex; + SBlockData lastBlockData; + STimeWindow window; + SVersionRange verRange; + int32_t order; + uint64_t uid; + int16_t* rowIndex; // row index ptr, usually from the STableBlockScanInfo->indexInBlockL +} SLastBlockReader; + typedef struct SFilesetIter { - int32_t numOfFiles; // number of total files - int32_t index; // current accessed index in the list - SArray* pFileList; // data file list + int32_t numOfFiles; // number of total files + int32_t index; // current accessed index in the list + SArray* pFileList; // data file list int32_t order; + SLastBlockReader* pLastBlockReader; // last file block reader } SFilesetIter; typedef struct SFileDataBlockInfo { @@ -87,9 +114,9 @@ typedef struct SFileDataBlockInfo { typedef struct SDataBlockIter { int32_t numOfBlocks; int32_t index; - SArray* blockList; // SArray + SArray* blockList; // SArray int32_t order; - SBlock block; // current SBlock data + SBlock block; // current SBlock data SHashObj* pTableMap; } SDataBlockIter; @@ -100,11 +127,6 @@ typedef struct SFileBlockDumpInfo { bool allDumped; } SFileBlockDumpInfo; -typedef struct SVersionRange { - uint64_t minVer; - uint64_t maxVer; -} SVersionRange; - typedef struct SReaderStatus { bool loadFromFile; // check file stage SHashObj* pTableMap; // SHash @@ -145,10 +167,11 @@ static int buildDataBlockFromBufImpl(STableBlockScanInfo* pBlockScanInfo, i static TSDBROW* getValidRow(SIterInfo* pIter, const SArray* pDelList, STsdbReader* pReader); static int32_t doMergeRowsInFileBlocks(SBlockData* pBlockData, STableBlockScanInfo* pScanInfo, STsdbReader* pReader, SRowMerger* pMerger); +static int32_t doMergeRowsInLastBlock(SLastBlockReader* pLastBlockReader, STableBlockScanInfo* pScanInfo, int64_t ts, SRowMerger* pMerger); static int32_t doMergeRowsInBuf(SIterInfo* pIter, uint64_t uid, int64_t ts, SArray* pDelList, SRowMerger* pMerger, STsdbReader* pReader); static int32_t doAppendRowFromTSRow(SSDataBlock* pBlock, STsdbReader* pReader, STSRow* pTSRow, uint64_t uid); -static int32_t doAppendRowFromBlock(SSDataBlock* pResBlock, STsdbReader* pReader, SBlockData* pBlockData, +static int32_t doAppendRowFromFileBlock(SSDataBlock* pResBlock, STsdbReader* pReader, SBlockData* pBlockData, int32_t rowIndex); static void setComposedBlockFlag(STsdbReader* pReader, bool composed); static bool hasBeenDropped(const SArray* pDelList, int32_t* index, TSDBKEY* pKey, int32_t order); @@ -162,6 +185,9 @@ static int32_t initDelSkylineIterator(STableBlockScanInfo* pBlockScanInfo, STsdb static STsdb* getTsdbByRetentions(SVnode* pVnode, TSKEY winSKey, SRetention* retentions, const char* idstr, int8_t* pLevel); static SVersionRange getQueryVerRange(SVnode* pVnode, SQueryTableDataCond* pCond, int8_t level); +static int64_t getCurrentKeyInLastBlock(SLastBlockReader* pLastBlockReader); +static bool hasDataInLastBlock(SLastBlockReader* pLastBlockReader); +static int32_t doBuildDataBlock(STsdbReader* pReader); static int32_t setColumnIdSlotList(STsdbReader* pReader, SSDataBlock* pBlock) { SBlockLoadSuppInfo* pSupInfo = &pReader->suppInfo; @@ -182,7 +208,6 @@ static int32_t setColumnIdSlotList(STsdbReader* pReader, SSDataBlock* pBlock) { if (IS_VAR_DATA_TYPE(pCol->info.type)) { pSupInfo->buildBuf[i] = taosMemoryMalloc(pCol->info.bytes); - // tsdbInfo("-------------------%d\n", pCol->info.bytes); } } @@ -199,7 +224,7 @@ static SHashObj* createDataBlockScanInfo(STsdbReader* pTsdbReader, const STableK } for (int32_t j = 0; j < numOfTables; ++j) { - STableBlockScanInfo info = {.lastKey = 0, .uid = idList[j].uid}; + STableBlockScanInfo info = {.lastKey = 0, .uid = idList[j].uid, .indexInBlockL = DEFAULT_ROW_INDEX_VAL}; if (ASCENDING_TRAVERSE(pTsdbReader->order)) { if (info.lastKey == INT64_MIN || info.lastKey < pTsdbReader->window.skey) { info.lastKey = pTsdbReader->window.skey; @@ -293,15 +318,35 @@ static void limitOutputBufferSize(const SQueryTableDataCond* pCond, int32_t* cap } // init file iterator -static int32_t initFilesetIterator(SFilesetIter* pIter, SArray* aDFileSet, int32_t order, const char* idstr) { +static int32_t initFilesetIterator(SFilesetIter* pIter, SArray* aDFileSet, STsdbReader* pReader/*int32_t order, const char* idstr*/) { size_t numOfFileset = taosArrayGetSize(aDFileSet); - pIter->index = ASCENDING_TRAVERSE(order) ? -1 : numOfFileset; - pIter->order = order; + pIter->index = ASCENDING_TRAVERSE(pReader->order) ? -1 : numOfFileset; + pIter->order = pReader->order; pIter->pFileList = aDFileSet; pIter->numOfFiles = numOfFileset; - tsdbDebug("init fileset iterator, total files:%d %s", pIter->numOfFiles, idstr); + if (pIter->pLastBlockReader == NULL) { + pIter->pLastBlockReader = taosMemoryCalloc(1, sizeof(struct SLastBlockReader)); + if (pIter->pLastBlockReader == NULL) { + int32_t code = TSDB_CODE_OUT_OF_MEMORY; + tsdbError("failed to prepare the last block iterator, code:%d %s", tstrerror(code), pReader->idStr); + return code; + } + + SLastBlockReader* pLReader = pIter->pLastBlockReader; + pLReader->pBlockL = taosArrayInit(4, sizeof(SBlockL)); + pLReader->order = pReader->order; + pLReader->window = pReader->window; + pLReader->verRange = pReader->verRange; + + int32_t code = tBlockDataCreate(&pLReader->lastBlockData); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + } + + tsdbDebug("init fileset iterator, total files:%d %s", pIter->numOfFiles, pReader->idStr); return TSDB_CODE_SUCCESS; } @@ -361,7 +406,7 @@ _err: static void resetDataBlockIterator(SDataBlockIter* pIter, int32_t order, SHashObj* pTableMap) { pIter->order = order; pIter->index = -1; - pIter->numOfBlocks = -1; + pIter->numOfBlocks = 0; if (pIter->blockList == NULL) { pIter->blockList = taosArrayInit(4, sizeof(SFileDataBlockInfo)); } else { @@ -419,7 +464,7 @@ static int32_t tsdbReaderCreate(SVnode* pVnode, SQueryTableDataCond* pCond, STsd pReader->pTsdb = getTsdbByRetentions(pVnode, pCond->twindows.skey, pVnode->config.tsdbCfg.retentions, idstr, &level); pReader->suid = pCond->suid; pReader->order = pCond->order; - pReader->capacity = capacity; + pReader->capacity = 4096; pReader->idStr = (idstr != NULL) ? strdup(idstr) : NULL; pReader->verRange = getQueryVerRange(pVnode, pCond, level); pReader->type = pCond->type; @@ -440,7 +485,7 @@ static int32_t tsdbReaderCreate(SVnode* pVnode, SQueryTableDataCond* pCond, STsd pSup->tsColAgg.colId = PRIMARYKEY_TIMESTAMP_COL_ID; - code = tBlockDataInit(&pReader->status.fileBlockData); + code = tBlockDataCreate(&pReader->status.fileBlockData); if (code != TSDB_CODE_SUCCESS) { terrno = code; goto _end; @@ -547,14 +592,14 @@ static int32_t doLoadBlockIndex(STsdbReader* pReader, SDataFReader* pFileReader, SArray* aBlockIdx = taosArrayInit(8, sizeof(SBlockIdx)); int64_t st = taosGetTimestampUs(); - int32_t code = tsdbReadBlockIdx(pFileReader, aBlockIdx, NULL); + int32_t code = tsdbReadBlockIdx(pFileReader, aBlockIdx); if (code != TSDB_CODE_SUCCESS) { goto _end; } size_t num = taosArrayGetSize(aBlockIdx); if (num == 0) { - taosArrayClear(aBlockIdx); + taosArrayDestroy(aBlockIdx); return TSDB_CODE_SUCCESS; } @@ -594,24 +639,29 @@ _end: return code; } -static int32_t doLoadFileBlock(STsdbReader* pReader, SArray* pIndexList, uint32_t* numOfValidTables, - int32_t* numOfBlocks) { - size_t numOfTables = taosArrayGetSize(pIndexList); - *numOfValidTables = 0; - - int64_t st = taosGetTimestampUs(); - size_t size = 0; - +static void cleanupTableScanInfo(SHashObj* pTableMap) { STableBlockScanInfo* px = NULL; while (1) { - px = taosHashIterate(pReader->status.pTableMap, px); + px = taosHashIterate(pTableMap, px); if (px == NULL) { break; } + // reset the index in last block when handing a new file + px->indexInBlockL = -1; tMapDataClear(&px->mapData); taosArrayClear(px->pBlockList); } +} + +static int32_t doLoadFileBlock(STsdbReader* pReader, SArray* pIndexList, SArray* pLastBlockIndex, + SBlockNumber * pBlockNum, SArray* pQualifiedLastBlock) { + int32_t numOfQTable = 0; + size_t sizeInDisk = 0; + size_t numOfTables = taosArrayGetSize(pIndexList); + + int64_t st = taosGetTimestampUs(); + cleanupTableScanInfo(pReader->status.pTableMap); for (int32_t i = 0; i < numOfTables; ++i) { SBlockIdx* pBlockIdx = taosArrayGet(pIndexList, i); @@ -619,9 +669,9 @@ static int32_t doLoadFileBlock(STsdbReader* pReader, SArray* pIndexList, uint32_ STableBlockScanInfo* pScanInfo = taosHashGet(pReader->status.pTableMap, &pBlockIdx->uid, sizeof(int64_t)); tMapDataReset(&pScanInfo->mapData); - tsdbReadBlock(pReader->pFileReader, pBlockIdx, &pScanInfo->mapData, NULL); + tsdbReadBlock(pReader->pFileReader, pBlockIdx, &pScanInfo->mapData); - size += pScanInfo->mapData.nData; + sizeInDisk += pScanInfo->mapData.nData; for (int32_t j = 0; j < pScanInfo->mapData.nItem; ++j) { SBlock block = {0}; tMapDataGetItemByIdx(&pScanInfo->mapData, j, &block, tGetBlock); @@ -632,7 +682,7 @@ static int32_t doLoadFileBlock(STsdbReader* pReader, SArray* pIndexList, uint32_ } // 2. version range check - if (block.minVersion > pReader->verRange.maxVer || block.maxVersion < pReader->verRange.minVer) { + if (block.minVer > pReader->verRange.maxVer || block.maxVer < pReader->verRange.minVer) { continue; } @@ -642,30 +692,54 @@ static int32_t doLoadFileBlock(STsdbReader* pReader, SArray* pIndexList, uint32_ return TSDB_CODE_OUT_OF_MEMORY; } - (*numOfBlocks) += 1; + pBlockNum->numOfBlocks += 1; } if (pScanInfo->pBlockList != NULL && taosArrayGetSize(pScanInfo->pBlockList) > 0) { - (*numOfValidTables) += 1; + numOfQTable += 1; } } - double el = (taosGetTimestampUs() - st) / 1000.0; - tsdbDebug("load block of %d tables completed, blocks:%d in %d tables, size:%.2f Kb, elapsed time:%.2f ms %s", - numOfTables, *numOfBlocks, *numOfValidTables, size / 1000.0, el, pReader->idStr); + size_t numOfLast = taosArrayGetSize(pLastBlockIndex); + for(int32_t i = 0; i < numOfLast; ++i) { + SBlockL* pLastBlock = taosArrayGet(pLastBlockIndex, i); + if (pLastBlock->suid != pReader->suid) { + continue; + } - pReader->cost.numOfBlocks += (*numOfBlocks); + { + // 1. time range check + if (pLastBlock->minKey > pReader->window.ekey || pLastBlock->maxKey < pReader->window.skey) { + continue; + } + + // 2. version range check + if (pLastBlock->minVer > pReader->verRange.maxVer || pLastBlock->maxVer < pReader->verRange.minVer) { + continue; + } + + pBlockNum->numOfLastBlocks += 1; + taosArrayPush(pQualifiedLastBlock, pLastBlock); + } + } + + int32_t total = pBlockNum->numOfLastBlocks + pBlockNum->numOfBlocks; + + double el = (taosGetTimestampUs() - st) / 1000.0; + tsdbDebug("load block of %d tables completed, blocks:%d in %d tables, lastBlock:%d, size:%.2f Kb, elapsed time:%.2f ms %s", + numOfTables, total, numOfQTable, pBlockNum->numOfLastBlocks, sizeInDisk + / 1000.0, el, pReader->idStr); + + pReader->cost.numOfBlocks += total; pReader->cost.headFileLoadTime += el; return TSDB_CODE_SUCCESS; } -// todo remove pblock parameter -static void setBlockAllDumped(SFileBlockDumpInfo* pDumpInfo, SBlock* pBlock, int32_t order) { +static void setBlockAllDumped(SFileBlockDumpInfo* pDumpInfo, int64_t maxKey, int32_t order) { int32_t step = ASCENDING_TRAVERSE(order) ? 1 : -1; - pDumpInfo->allDumped = true; - pDumpInfo->lastKey = pBlock->maxKey.ts + step; + pDumpInfo->lastKey = maxKey + step; } static void doCopyColVal(SColumnInfoData* pColInfoData, int32_t rowIndex, int32_t colIndex, SColVal* pColVal, @@ -685,8 +759,13 @@ static void doCopyColVal(SColumnInfoData* pColInfoData, int32_t rowIndex, int32_ } static SFileDataBlockInfo* getCurrentBlockInfo(SDataBlockIter* pBlockIter) { - SFileDataBlockInfo* pFBlockInfo = taosArrayGet(pBlockIter->blockList, pBlockIter->index); - return pFBlockInfo; + if (taosArrayGetSize(pBlockIter->blockList) == 0) { + ASSERT(pBlockIter->numOfBlocks == taosArrayGetSize(pBlockIter->blockList)); + return NULL; + } + + SFileDataBlockInfo* pBlockInfo = taosArrayGet(pBlockIter->blockList, pBlockIter->index); + return pBlockInfo; } static SBlock* getCurrentBlock(SDataBlockIter* pBlockIter) { return &pBlockIter->block; } @@ -736,19 +815,20 @@ static int32_t copyBlockDataToSDataBlock(STsdbReader* pReader, STableBlockScanIn pColData = taosArrayGet(pResBlock->pDataBlock, i); SColData* pData = tBlockDataGetColDataByIdx(pBlockData, colIndex); - - if (pData->cid == pColData->info.colId) { + if (pData->cid < pColData->info.colId) { + colIndex += 1; + } else if (pData->cid == pColData->info.colId) { for (int32_t j = pDumpInfo->rowIndex; j < endIndex && j >= 0; j += step) { tColDataGetValue(pData, j, &cv); doCopyColVal(pColData, rowIndex++, i, &cv, pSupInfo); } colIndex += 1; + i += 1; ASSERT(rowIndex == remain); } else { // the specified column does not exist in file block, fill with null data colDataAppendNNULL(pColData, 0, remain); + i += 1; } - - i += 1; } while (i < numOfOutputCols) { @@ -760,7 +840,7 @@ static int32_t copyBlockDataToSDataBlock(STsdbReader* pReader, STableBlockScanIn pResBlock->info.rows = remain; pDumpInfo->rowIndex += step * remain; - setBlockAllDumped(pDumpInfo, pBlock, pReader->order); + setBlockAllDumped(pDumpInfo, pBlock->maxKey.ts, pReader->order); double elapsedTime = (taosGetTimestampUs() - st) / 1000.0; pReader->cost.blockLoadTime += elapsedTime; @@ -769,47 +849,77 @@ static int32_t copyBlockDataToSDataBlock(STsdbReader* pReader, STableBlockScanIn tsdbDebug("%p copy file block to sdatablock, global index:%d, table index:%d, brange:%" PRId64 "-%" PRId64 ", rows:%d, remain:%d, minVer:%" PRId64 ", maxVer:%" PRId64 ", elapsed time:%.2f ms, %s", pReader, pBlockIter->index, pFBlock->tbBlockIdx, pBlock->minKey.ts, pBlock->maxKey.ts, remain, unDumpedRows, - pBlock->minVersion, pBlock->maxVersion, elapsedTime, pReader->idStr); + pBlock->minVer, pBlock->maxVer, elapsedTime, pReader->idStr); return TSDB_CODE_SUCCESS; } -static int32_t doLoadFileBlockData(STsdbReader* pReader, SDataBlockIter* pBlockIter, - STableBlockScanInfo* pBlockScanInfo, SBlockData* pBlockData) { +static int32_t doLoadFileBlockData(STsdbReader* pReader, SDataBlockIter* pBlockIter, SBlockData* pBlockData) { int64_t st = taosGetTimestampUs(); + double elapsedTime = 0; + int32_t code = 0; - SFileDataBlockInfo* pFBlock = getCurrentBlockInfo(pBlockIter); - SBlock* pBlock = getCurrentBlock(pBlockIter); - - SSDataBlock* pResBlock = pReader->pResBlock; - int32_t numOfCols = blockDataGetNumOfCols(pResBlock); - - SBlockLoadSuppInfo* pSupInfo = &pReader->suppInfo; + SFileDataBlockInfo* pBlockInfo = getCurrentBlockInfo(pBlockIter); SFileBlockDumpInfo* pDumpInfo = &pReader->status.fBlockDumpInfo; - SBlockIdx blockIdx = {.suid = pReader->suid, .uid = pBlockScanInfo->uid}; - int32_t code = - tsdbReadColData(pReader->pFileReader, &blockIdx, pBlock, pSupInfo->colIds, numOfCols, pBlockData, NULL, NULL); - if (code != TSDB_CODE_SUCCESS) { - goto _error; + if (pBlockInfo != NULL) { + SBlock* pBlock = getCurrentBlock(pBlockIter); + code = tsdbReadDataBlock(pReader->pFileReader, pBlock, pBlockData); + if (code != TSDB_CODE_SUCCESS) { + tsdbError("%p error occurs in loading file block, global index:%d, table index:%d, brange:%" PRId64 "-%" PRId64 + ", rows:%d, code:%s %s", + pReader, pBlockIter->index, pBlockInfo->tbBlockIdx, pBlock->minKey.ts, pBlock->maxKey.ts, pBlock->nRow, + tstrerror(code), pReader->idStr); + goto _error; + } + + elapsedTime = (taosGetTimestampUs() - st) / 1000.0; + + tsdbDebug("%p load file block into buffer, global index:%d, table index:%d, brange:%" PRId64 "-%" PRId64 + ", rows:%d, minVer:%" PRId64 ", maxVer:%" PRId64 ", elapsed time:%.2f ms, %s", + pReader, pBlockIter->index, pBlockInfo->tbBlockIdx, pBlock->minKey.ts, pBlock->maxKey.ts, pBlock->nRow, + pBlock->minVer, pBlock->maxVer, elapsedTime, pReader->idStr); + } else { +#if 0 + SLastBlockReader* pLastBlockReader = pReader->status.fileIter.pLastBlockReader; + + uint64_t uid = pBlockInfo->uid; + SArray* pBlocks = pLastBlockReader->pBlockL; + + pLastBlockReader->currentBlockIndex = -1; + + // find the correct SBlockL + for(int32_t i = 0; i < taosArrayGetSize(pBlocks); ++i) { + SBlockL* pBlock = taosArrayGet(pBlocks, i); + if (pBlock->minUid >= uid && pBlock->maxUid <= uid) { + pLastBlockReader->currentBlockIndex = i; + break; + } + } + +// SBlockL* pBlockL = taosArrayGet(pLastBlockReader->pBlockL, *index); + code = tsdbReadLastBlock(pReader->pFileReader, pBlockL, pBlockData); + if (code != TSDB_CODE_SUCCESS) { + tsdbDebug("%p error occurs in loading last block into buffer, last block index:%d, total:%d brange:%" PRId64 "-%" PRId64 + ", rows:%d, minVer:%" PRId64 ", maxVer:%" PRId64 ", code:%s %s", + pReader, *index, pBlockIter->numOfBlocks.numOfLastBlocks, 0, 0, pBlockL->nRow, + pBlockL->minVer, pBlockL->maxVer, tstrerror(code), pReader->idStr); + goto _error; + } + + tsdbDebug("%p load last file block into buffer, last block index:%d, total:%d brange:%" PRId64 "-%" PRId64 + ", rows:%d, minVer:%" PRId64 ", maxVer:%" PRId64 ", elapsed time:%.2f ms, %s", + pReader, *index, pBlockIter->numOfBlocks.numOfLastBlocks, 0, 0, pBlockL->nRow, + pBlockL->minVer, pBlockL->maxVer, elapsedTime, pReader->idStr); +#endif } - double elapsedTime = (taosGetTimestampUs() - st) / 1000.0; pReader->cost.blockLoadTime += elapsedTime; - pDumpInfo->allDumped = false; - tsdbDebug("%p load file block into buffer, global index:%d, table index:%d, brange:%" PRId64 "-%" PRId64 - ", rows:%d, minVer:%" PRId64 ", maxVer:%" PRId64 ", elapsed time:%.2f ms, %s", - pReader, pBlockIter->index, pFBlock->tbBlockIdx, pBlock->minKey.ts, pBlock->maxKey.ts, pBlock->nRow, - pBlock->minVersion, pBlock->maxVersion, elapsedTime, pReader->idStr); return TSDB_CODE_SUCCESS; _error: - tsdbError("%p error occurs in loading file block, global index:%d, table index:%d, brange:%" PRId64 "-%" PRId64 - ", rows:%d, %s", - pReader, pBlockIter->index, pFBlock->tbBlockIdx, pBlock->minKey.ts, pBlock->maxKey.ts, pBlock->nRow, - pReader->idStr); return code; } @@ -865,10 +975,11 @@ static int32_t fileDataBlockOrderCompar(const void* pLeft, const void* pRight, v static int32_t doSetCurrentBlock(SDataBlockIter* pBlockIter) { SFileDataBlockInfo* pFBlock = getCurrentBlockInfo(pBlockIter); - STableBlockScanInfo* pScanInfo = taosHashGet(pBlockIter->pTableMap, &pFBlock->uid, sizeof(pFBlock->uid)); - - int32_t* mapDataIndex = taosArrayGet(pScanInfo->pBlockList, pFBlock->tbBlockIdx); - tMapDataGetItemByIdx(&pScanInfo->mapData, *mapDataIndex, &pBlockIter->block, tGetBlock); + if (pFBlock != NULL) { + STableBlockScanInfo* pScanInfo = taosHashGet(pBlockIter->pTableMap, &pFBlock->uid, sizeof(pFBlock->uid)); + int32_t* mapDataIndex = taosArrayGet(pScanInfo->pBlockList, pFBlock->tbBlockIdx); + tMapDataGetItemByIdx(&pScanInfo->mapData, *mapDataIndex, &pBlockIter->block, tGetBlock); + } #if 0 qDebug("check file block, table uid:%"PRIu64" index:%d offset:%"PRId64", ", pScanInfo->uid, *mapDataIndex, pBlockIter->block.aSubBlock[0].offset); @@ -945,7 +1056,7 @@ static int32_t initBlockIterator(STsdbReader* pReader, SDataBlockIter* pBlockIte int64_t et = taosGetTimestampUs(); tsdbDebug("%p create blocks info struct completed for one table, %d blocks not sorted, elapsed time:%.2f ms %s", - pReader, cnt, (et - st) / 1000.0, pReader->idStr); + pReader, numOfBlocks, (et - st) / 1000.0, pReader->idStr); pBlockIter->index = asc ? 0 : (numOfBlocks - 1); cleanupBlockOrderSupporter(&sup); @@ -956,7 +1067,7 @@ static int32_t initBlockIterator(STsdbReader* pReader, SDataBlockIter* pBlockIte tsdbDebug("%p create data blocks info struct completed, %d blocks in %d tables %s", pReader, cnt, sup.numOfTables, pReader->idStr); - assert(cnt <= numOfBlocks && sup.numOfTables <= numOfTables); + ASSERT(cnt <= numOfBlocks && sup.numOfTables <= numOfTables); SMultiwayMergeTreeInfo* pTree = NULL; uint8_t ret = tMergeTreeCreate(&pTree, sup.numOfTables, &sup, fileDataBlockOrderCompar); @@ -983,7 +1094,7 @@ static int32_t initBlockIterator(STsdbReader* pReader, SDataBlockIter* pBlockIte } int64_t et = taosGetTimestampUs(); - tsdbDebug("%p %d data blocks access order completed, elapsed time:%.2f ms %s", pReader, cnt, (et - st) / 1000.0, + tsdbDebug("%p %d data blocks access order completed, elapsed time:%.2f ms %s", pReader, numOfBlocks, (et - st) / 1000.0, pReader->idStr); cleanupBlockOrderSupporter(&sup); taosMemoryFree(pTree); @@ -1014,8 +1125,8 @@ static bool blockIteratorNext(SDataBlockIter* pBlockIter) { static int32_t dataBlockPartiallyRequired(STimeWindow* pWindow, SVersionRange* pVerRange, SBlock* pBlock) { return (pWindow->ekey < pBlock->maxKey.ts && pWindow->ekey >= pBlock->minKey.ts) || (pWindow->skey > pBlock->minKey.ts && pWindow->skey <= pBlock->maxKey.ts) || - (pVerRange->minVer > pBlock->minVersion && pVerRange->minVer <= pBlock->maxVersion) || - (pVerRange->maxVer < pBlock->maxVersion && pVerRange->maxVer >= pBlock->minVersion); + (pVerRange->minVer > pBlock->minVer && pVerRange->minVer <= pBlock->maxVer) || + (pVerRange->maxVer < pBlock->maxVer && pVerRange->maxVer >= pBlock->minVer); } static SBlock* getNeighborBlockOfSameTable(SFileDataBlockInfo* pFBlockInfo, STableBlockScanInfo* pTableBlockScanInfo, @@ -1095,8 +1206,8 @@ static bool bufferDataInFileBlockGap(int32_t order, TSDBKEY key, SBlock* pBlock) } static bool keyOverlapFileBlock(TSDBKEY key, SBlock* pBlock, SVersionRange* pVerRange) { - return (key.ts >= pBlock->minKey.ts && key.ts <= pBlock->maxKey.ts) && (pBlock->maxVersion >= pVerRange->minVer) && - (pBlock->minVersion <= pVerRange->maxVer); + return (key.ts >= pBlock->minKey.ts && key.ts <= pBlock->maxKey.ts) && (pBlock->maxVer >= pVerRange->minVer) && + (pBlock->minVer <= pVerRange->maxVer); } static bool doCheckforDatablockOverlap(STableBlockScanInfo* pBlockScanInfo, const SBlock* pBlock) { @@ -1105,11 +1216,11 @@ static bool doCheckforDatablockOverlap(STableBlockScanInfo* pBlockScanInfo, cons for (int32_t i = pBlockScanInfo->fileDelIndex; i < num; i += 1) { TSDBKEY* p = taosArrayGet(pBlockScanInfo->delSkyline, i); if (p->ts >= pBlock->minKey.ts && p->ts <= pBlock->maxKey.ts) { - if (p->version >= pBlock->minVersion) { + if (p->version >= pBlock->minVer) { return true; } } else if (p->ts < pBlock->minKey.ts) { // p->ts < pBlock->minKey.ts - if (p->version >= pBlock->minVersion) { + if (p->version >= pBlock->minVer) { if (i < num - 1) { TSDBKEY* pnext = taosArrayGet(pBlockScanInfo->delSkyline, i + 1); if (i + 1 == num - 1) { // pnext is the last point @@ -1117,7 +1228,7 @@ static bool doCheckforDatablockOverlap(STableBlockScanInfo* pBlockScanInfo, cons return true; } } else { - if (pnext->ts >= pBlock->minKey.ts && pnext->version >= pBlock->minVersion) { + if (pnext->ts >= pBlock->minKey.ts && pnext->version >= pBlock->minVer) { return true; } } @@ -1169,7 +1280,7 @@ static bool overlapWithDelSkyline(STableBlockScanInfo* pBlockScanInfo, const SBl // 4. output buffer should be large enough to hold all rows in current block // 5. delete info should not overlap with current block data static bool fileBlockShouldLoad(STsdbReader* pReader, SFileDataBlockInfo* pFBlock, SBlock* pBlock, - STableBlockScanInfo* pScanInfo, TSDBKEY key) { + STableBlockScanInfo* pScanInfo, TSDBKEY key, SLastBlockReader* pLastBlockReader) { int32_t neighborIndex = 0; SBlock* pNeighbor = getNeighborBlockOfSameTable(pFBlock, pScanInfo, &neighborIndex, pReader->order); @@ -1184,8 +1295,16 @@ static bool fileBlockShouldLoad(STsdbReader* pReader, SFileDataBlockInfo* pFBloc bool hasDup = (pBlock->nSubBlock == 1) ? pBlock->hasDup : true; bool overlapWithDel = overlapWithDelSkyline(pScanInfo, pBlock, pReader->order); + // todo here we need to each key in the last files to identify if it is really overlapped with last block + bool overlapWithlastBlock = false; + if (taosArrayGetSize(pLastBlockReader->pBlockL) > 0 && (pLastBlockReader->currentBlockIndex != -1)) { + SBlockL *pBlockL = taosArrayGet(pLastBlockReader->pBlockL, pLastBlockReader->currentBlockIndex); + overlapWithlastBlock = !(pBlock->maxKey.ts < pBlockL->minKey || pBlock->minKey.ts > pBlockL->maxKey); + } + return (overlapWithNeighbor || hasDup || dataBlockPartiallyRequired(&pReader->window, &pReader->verRange, pBlock) || - keyOverlapFileBlock(key, pBlock, &pReader->verRange) || (pBlock->nRow > pReader->capacity) || overlapWithDel); + keyOverlapFileBlock(key, pBlock, &pReader->verRange) || (pBlock->nRow > pReader->capacity) || + overlapWithDel || overlapWithlastBlock); } static int32_t buildDataBlockFromBuf(STsdbReader* pReader, STableBlockScanInfo* pBlockScanInfo, int64_t endKey) { @@ -1224,7 +1343,7 @@ static bool tryCopyDistinctRowFromFileBlock(STsdbReader* pReader, SBlockData* pB int64_t nextKey = pBlockData->aTSKEY[pDumpInfo->rowIndex + step]; if (nextKey != key) { // merge is not needed - doAppendRowFromBlock(pReader->pResBlock, pReader, pBlockData, pDumpInfo->rowIndex); + doAppendRowFromFileBlock(pReader->pResBlock, pReader, pBlockData, pDumpInfo->rowIndex); pDumpInfo->rowIndex += step; return true; } @@ -1258,8 +1377,124 @@ static FORCE_INLINE STSchema* doGetSchemaForTSRow(int32_t sversion, STsdbReader* return pReader->pMemSchema; } +static int32_t doMergeBufAndFileRows_Rv(STsdbReader* pReader, STableBlockScanInfo* pBlockScanInfo, TSDBROW* pRow, + SIterInfo* pIter, int64_t key, SLastBlockReader* pLastBlockReader) { + SRowMerger merge = {0}; + STSRow* pTSRow = NULL; + SBlockData* pBlockData = &pReader->status.fileBlockData; + SFileBlockDumpInfo* pDumpInfo = &pReader->status.fBlockDumpInfo; + + int64_t tsLast = INT64_MIN; + if (pLastBlockReader->lastBlockData.nRow > 0) { + tsLast = getCurrentKeyInLastBlock(pLastBlockReader); + } + + TSDBKEY k = TSDBROW_KEY(pRow); + TSDBROW fRow = tsdbRowFromBlockData(pBlockData, pDumpInfo->rowIndex); + + SBlockData* pLastBlockData = &pLastBlockReader->lastBlockData; + + int64_t minKey = 0; + if (pReader->order == TSDB_ORDER_ASC) { + minKey = INT64_MAX; // chosen the minimum value + if (minKey > tsLast && pLastBlockReader->lastBlockData.nRow > 0) { + minKey = tsLast; + } + + if (minKey > k.ts) { + minKey = k.ts; + } + + if (minKey > key && pBlockData->nRow > 0) { + minKey = key; + } + } else { + minKey = INT64_MIN; + if (minKey < tsLast && pLastBlockReader->lastBlockData.nRow > 0) { + minKey = tsLast; + } + + if (minKey < k.ts) { + minKey = k.ts; + } + + if (minKey < key && pBlockData->nRow > 0) { + minKey = key; + } + } + + bool init = false; + + // ASC: file block ---> last block -----> imem -----> mem + //DESC: mem -----> imem -----> last block -----> file block + if (pReader->order == TSDB_ORDER_ASC) { + if (minKey == key) { + init = true; + tRowMergerInit(&merge, &fRow, pReader->pSchema); + doMergeRowsInFileBlocks(pBlockData, pBlockScanInfo, pReader, &merge); + } + + if (minKey == tsLast) { + TSDBROW fRow1 = tsdbRowFromBlockData(pLastBlockData, *pLastBlockReader->rowIndex); + if (init) { + tRowMerge(&merge, &fRow1); + } else { + init = true; + tRowMergerInit(&merge, &fRow1, pReader->pSchema); + } + doMergeRowsInLastBlock(pLastBlockReader, pBlockScanInfo, tsLast, &merge); + } + + if (minKey == k.ts) { + if (init) { + tRowMerge(&merge, pRow); + } else { + init = true; + STSchema* pSchema = doGetSchemaForTSRow(TSDBROW_SVERSION(pRow), pReader, pBlockScanInfo->uid); + tRowMergerInit(&merge, pRow, pSchema); + } + doMergeRowsInBuf(pIter, pBlockScanInfo->uid, k.ts, pBlockScanInfo->delSkyline, &merge, pReader); + } + } else { + if (minKey == k.ts) { + init = true; + STSchema* pSchema = doGetSchemaForTSRow(TSDBROW_SVERSION(pRow), pReader, pBlockScanInfo->uid); + tRowMergerInit(&merge, pRow, pSchema); + doMergeRowsInBuf(pIter, pBlockScanInfo->uid, k.ts, pBlockScanInfo->delSkyline, &merge, pReader); + } + + if (minKey == tsLast) { + TSDBROW fRow1 = tsdbRowFromBlockData(pLastBlockData, *pLastBlockReader->rowIndex); + if (init) { + tRowMerge(&merge, &fRow1); + } else { + init = true; + tRowMergerInit(&merge, &fRow1, pReader->pSchema); + } + doMergeRowsInLastBlock(pLastBlockReader, pBlockScanInfo, tsLast, &merge); + } + + if (minKey == key) { + if (init) { + tRowMerge(&merge, &fRow); + } else { + init = true; + tRowMergerInit(&merge, &fRow, pReader->pSchema); + } + doMergeRowsInFileBlocks(pBlockData, pBlockScanInfo, pReader, &merge); + } + } + + tRowMergerGetRow(&merge, &pTSRow); + doAppendRowFromTSRow(pReader->pResBlock, pReader, pTSRow, pBlockScanInfo->uid); + + taosMemoryFree(pTSRow); + tRowMergerClear(&merge); + return TSDB_CODE_SUCCESS; +} + static int32_t doMergeBufAndFileRows(STsdbReader* pReader, STableBlockScanInfo* pBlockScanInfo, TSDBROW* pRow, - SIterInfo* pIter, int64_t key) { + SIterInfo* pIter, int64_t key, SLastBlockReader* pLastBlockReader) { SRowMerger merge = {0}; STSRow* pTSRow = NULL; SBlockData* pBlockData = &pReader->status.fileBlockData; @@ -1331,12 +1566,159 @@ static int32_t doMergeBufAndFileRows(STsdbReader* pReader, STableBlockScanInfo* return TSDB_CODE_SUCCESS; } -static int32_t doMergeThreeLevelRows(STsdbReader* pReader, STableBlockScanInfo* pBlockScanInfo) { +static int32_t doMergeMultiLevelRowsRv(STsdbReader* pReader, STableBlockScanInfo* pBlockScanInfo, SBlockData* pBlockData, SLastBlockReader* pLastBlockReader) { + SRowMerger merge = {0}; + STSRow* pTSRow = NULL; + + SFileBlockDumpInfo* pDumpInfo = &pReader->status.fBlockDumpInfo; + SArray* pDelList = pBlockScanInfo->delSkyline; + + TSDBROW* pRow = getValidRow(&pBlockScanInfo->iter, pDelList, pReader); + TSDBROW* piRow = getValidRow(&pBlockScanInfo->iiter, pDelList, pReader); + ASSERT(pRow != NULL && piRow != NULL); + + SBlockData* pLastBlockData = &pLastBlockReader->lastBlockData; + int64_t tsLast = getCurrentKeyInLastBlock(pLastBlockReader); + + int64_t key = pBlockData->aTSKEY[pDumpInfo->rowIndex]; + + TSDBKEY k = TSDBROW_KEY(pRow); + TSDBKEY ik = TSDBROW_KEY(piRow); + + int64_t minKey = 0;//INT64_MAX; + if (ASCENDING_TRAVERSE(pReader->order)) { + minKey = INT64_MAX; // let's find the minimum + if (minKey > k.ts) { + minKey = k.ts; + } + + if (minKey > ik.ts) { + minKey = ik.ts; + } + + if (minKey > key && pBlockData->nRow > 0) { + minKey = key; + } + + if (minKey > tsLast && pLastBlockData->nRow > 0) { + minKey = tsLast; + } + } else { + minKey = INT64_MIN; // let find the maximum ts value + if (minKey < k.ts) { + minKey = k.ts; + } + + if (minKey < ik.ts) { + minKey = ik.ts; + } + + if (minKey < key && pBlockData->nRow > 0) { + minKey = key; + } + + if (minKey < tsLast && pLastBlockData->nRow > 0) { + minKey = tsLast; + } + } + + bool init = false; + + // ASC: file block -----> last block -----> imem -----> mem + // DESC: mem -----> imem -----> last block -----> file block + if (ASCENDING_TRAVERSE(pReader->order)) { + if (minKey == key) { + init = true; + TSDBROW fRow = tsdbRowFromBlockData(pBlockData, pDumpInfo->rowIndex); + tRowMergerInit(&merge, &fRow, pReader->pSchema); + doMergeRowsInFileBlocks(pBlockData, pBlockScanInfo, pReader, &merge); + } + + if (minKey == tsLast) { + TSDBROW fRow1 = tsdbRowFromBlockData(pLastBlockData, *pLastBlockReader->rowIndex); + if (init) { + tRowMerge(&merge, &fRow1); + } else { + init = true; + tRowMergerInit(&merge, &fRow1, pReader->pSchema); + } + doMergeRowsInLastBlock(pLastBlockReader, pBlockScanInfo, tsLast, &merge); + } + + if (minKey == ik.ts) { + if (init) { + tRowMerge(&merge, piRow); + } else { + init = true; + STSchema* pSchema = doGetSchemaForTSRow(TSDBROW_SVERSION(piRow), pReader, pBlockScanInfo->uid); + tRowMergerInit(&merge, piRow, pSchema); + } + doMergeRowsInBuf(&pBlockScanInfo->iiter, pBlockScanInfo->uid, ik.ts, pBlockScanInfo->delSkyline, &merge, pReader); + } + + if (minKey == k.ts) { + if (init) { + tRowMerge(&merge, pRow); + } else { + STSchema* pSchema = doGetSchemaForTSRow(TSDBROW_SVERSION(pRow), pReader, pBlockScanInfo->uid); + tRowMergerInit(&merge, pRow, pSchema); + } + doMergeRowsInBuf(&pBlockScanInfo->iter, pBlockScanInfo->uid, k.ts, pBlockScanInfo->delSkyline, &merge, pReader); + } + } else { + if (minKey == k.ts) { + init = true; + STSchema* pSchema = doGetSchemaForTSRow(TSDBROW_SVERSION(pRow), pReader, pBlockScanInfo->uid); + tRowMergerInit(&merge, pRow, pSchema); + doMergeRowsInBuf(&pBlockScanInfo->iter, pBlockScanInfo->uid, k.ts, pBlockScanInfo->delSkyline, &merge, pReader); + } + + if (minKey == ik.ts) { + if (init) { + tRowMerge(&merge, piRow); + } else { + init = true; + STSchema* pSchema = doGetSchemaForTSRow(TSDBROW_SVERSION(piRow), pReader, pBlockScanInfo->uid); + tRowMergerInit(&merge, piRow, pSchema); + } + doMergeRowsInBuf(&pBlockScanInfo->iiter, pBlockScanInfo->uid, ik.ts, pBlockScanInfo->delSkyline, &merge, pReader); + } + + if (minKey == tsLast) { + TSDBROW fRow1 = tsdbRowFromBlockData(pLastBlockData, *pLastBlockReader->rowIndex); + if (init) { + tRowMerge(&merge, &fRow1); + } else { + init = true; + tRowMergerInit(&merge, &fRow1, pReader->pSchema); + } + doMergeRowsInLastBlock(pLastBlockReader, pBlockScanInfo, tsLast, &merge); + } + + if (minKey == key) { + TSDBROW fRow = tsdbRowFromBlockData(pBlockData, pDumpInfo->rowIndex); + if (!init) { + tRowMergerInit(&merge, &fRow, pReader->pSchema); + } else { + tRowMerge(&merge, &fRow); + } + doMergeRowsInFileBlocks(pBlockData, pBlockScanInfo, pReader, &merge); + } + } + + tRowMergerGetRow(&merge, &pTSRow); + doAppendRowFromTSRow(pReader->pResBlock, pReader, pTSRow, pBlockScanInfo->uid); + + taosMemoryFree(pTSRow); + tRowMergerClear(&merge); + return TSDB_CODE_SUCCESS; +} + +static int32_t doMergeThreeLevelRows(STsdbReader* pReader, STableBlockScanInfo* pBlockScanInfo, SBlockData* pBlockData) { SRowMerger merge = {0}; STSRow* pTSRow = NULL; SFileBlockDumpInfo* pDumpInfo = &pReader->status.fBlockDumpInfo; - SBlockData* pBlockData = &pReader->status.fileBlockData; SArray* pDelList = pBlockScanInfo->delSkyline; TSDBROW* pRow = getValidRow(&pBlockScanInfo->iter, pDelList, pReader); @@ -1477,6 +1859,14 @@ static int32_t doMergeThreeLevelRows(STsdbReader* pReader, STableBlockScanInfo* static bool isValidFileBlockRow(SBlockData* pBlockData, SFileBlockDumpInfo* pDumpInfo, STableBlockScanInfo* pBlockScanInfo, STsdbReader* pReader) { + // it is an multi-table data block + if (pBlockData->aUid != NULL) { + uint64_t uid = pBlockData->aUid[pDumpInfo->rowIndex]; + if (uid != pBlockScanInfo->uid) { // move to next row + return false; + } + } + // check for version and time range int64_t ver = pBlockData->aVersion[pDumpInfo->rowIndex]; if (ver > pReader->verRange.maxVer || ver < pReader->verRange.minVer) { @@ -1498,39 +1888,191 @@ static bool isValidFileBlockRow(SBlockData* pBlockData, SFileBlockDumpInfo* pDum static bool outOfTimeWindow(int64_t ts, STimeWindow* pWindow) { return (ts > pWindow->ekey) || (ts < pWindow->skey); } -static int32_t buildComposedDataBlockImpl(STsdbReader* pReader, STableBlockScanInfo* pBlockScanInfo) { - SFileBlockDumpInfo* pDumpInfo = &pReader->status.fBlockDumpInfo; - SBlockData* pBlockData = &pReader->status.fileBlockData; +static void initLastBlockReader(SLastBlockReader* pLastBlockReader, uint64_t uid, int16_t* startPos) { + pLastBlockReader->uid = uid; + pLastBlockReader->rowIndex = startPos; - int64_t key = pBlockData->aTSKEY[pDumpInfo->rowIndex]; + if (*startPos == -1) { + if (ASCENDING_TRAVERSE(pLastBlockReader->order)) { + // do nothing + } else { + *startPos = pLastBlockReader->lastBlockData.nRow; + } + } +} + +static void setAllRowsChecked(SLastBlockReader *pLastBlockReader) { + *pLastBlockReader->rowIndex = ALL_ROWS_CHECKED_INDEX; +} + +static bool nextRowInLastBlock(SLastBlockReader *pLastBlockReader, STableBlockScanInfo* pBlockScanInfo) { + int32_t step = (pLastBlockReader->order == TSDB_ORDER_ASC) ? 1 : -1; + if (*pLastBlockReader->rowIndex == ALL_ROWS_CHECKED_INDEX) { + return false; + } + + *(pLastBlockReader->rowIndex) += step; + + SBlockData* pBlockData = &pLastBlockReader->lastBlockData; + for(int32_t i = *(pLastBlockReader->rowIndex); i < pBlockData->nRow && i >= 0; i += step) { + if (pBlockData->aUid != NULL && pBlockData->aUid[i] != pLastBlockReader->uid) { + continue; + } + + int64_t ts = pBlockData->aTSKEY[i]; + if (ts < pLastBlockReader->window.skey) { + continue; + } + + int64_t ver = pBlockData->aVersion[i]; + if (ver < pLastBlockReader->verRange.minVer) { + continue; + } + + // no data any more, todo opt handle desc case + if (ts > pLastBlockReader->window.ekey) { + continue; + } + + // todo opt handle desc case + if (ver > pLastBlockReader->verRange.maxVer) { + continue; + } + + TSDBKEY k = {.ts = ts, .version = ver}; + if (hasBeenDropped(pBlockScanInfo->delSkyline, &pBlockScanInfo->lastBlockDelIndex, &k, pLastBlockReader->order)) { + continue; + } + + *(pLastBlockReader->rowIndex) = i; + return true; + } + + // set all data is consumed in last block + setAllRowsChecked(pLastBlockReader); + return false; +} + +static int64_t getCurrentKeyInLastBlock(SLastBlockReader* pLastBlockReader) { + SBlockData* pBlockData = &pLastBlockReader->lastBlockData; + return pBlockData->aTSKEY[*pLastBlockReader->rowIndex]; +} + +static bool hasDataInLastBlock(SLastBlockReader* pLastBlockReader) { + if (*pLastBlockReader->rowIndex == ALL_ROWS_CHECKED_INDEX) { + return false; + } + return true; +} + +// todo refactor +static int32_t buildComposedDataBlockImpl(STsdbReader* pReader, STableBlockScanInfo* pBlockScanInfo, + SBlockData* pBlockData, SLastBlockReader* pLastBlockReader) { + SFileBlockDumpInfo* pDumpInfo = &pReader->status.fBlockDumpInfo; + + int64_t key = (pBlockData->nRow > 0)? pBlockData->aTSKEY[pDumpInfo->rowIndex]:INT64_MIN; TSDBROW* pRow = getValidRow(&pBlockScanInfo->iter, pBlockScanInfo->delSkyline, pReader); TSDBROW* piRow = getValidRow(&pBlockScanInfo->iiter, pBlockScanInfo->delSkyline, pReader); if (pBlockScanInfo->iter.hasVal && pBlockScanInfo->iiter.hasVal) { - return doMergeThreeLevelRows(pReader, pBlockScanInfo); + return doMergeMultiLevelRowsRv(pReader, pBlockScanInfo, pBlockData, pLastBlockReader); } else { - // imem + file + // imem + file + last block if (pBlockScanInfo->iiter.hasVal) { - return doMergeBufAndFileRows(pReader, pBlockScanInfo, piRow, &pBlockScanInfo->iiter, key); + return doMergeBufAndFileRows_Rv(pReader, pBlockScanInfo, piRow, &pBlockScanInfo->iiter, key, pLastBlockReader); } // mem + file if (pBlockScanInfo->iter.hasVal) { - return doMergeBufAndFileRows(pReader, pBlockScanInfo, pRow, &pBlockScanInfo->iter, key); + return doMergeBufAndFileRows_Rv(pReader, pBlockScanInfo, pRow, &pBlockScanInfo->iter, key, pLastBlockReader); } - // imem & mem are all empty, only file exist - if (tryCopyDistinctRowFromFileBlock(pReader, pBlockData, key, pDumpInfo)) { - return TSDB_CODE_SUCCESS; - } else { + if (pBlockData->nRow > 0) { TSDBROW fRow = tsdbRowFromBlockData(pBlockData, pDumpInfo->rowIndex); + // no last block + if (pLastBlockReader->lastBlockData.nRow == 0) { + if (tryCopyDistinctRowFromFileBlock(pReader, pBlockData, key, pDumpInfo)) { + return TSDB_CODE_SUCCESS; + } else { + STSRow* pTSRow = NULL; + SRowMerger merge = {0}; + + tRowMergerInit(&merge, &fRow, pReader->pSchema); + doMergeRowsInFileBlocks(pBlockData, pBlockScanInfo, pReader, &merge); + tRowMergerGetRow(&merge, &pTSRow); + doAppendRowFromTSRow(pReader->pResBlock, pReader, pTSRow, pBlockScanInfo->uid); + + taosMemoryFree(pTSRow); + tRowMergerClear(&merge); + return TSDB_CODE_SUCCESS; + } + } + + // row in last file block + int64_t ts = getCurrentKeyInLastBlock(pLastBlockReader); + if (ts < key) { // save rows in last block + SBlockData* pLastBlockData = &pLastBlockReader->lastBlockData; + + STSRow* pTSRow = NULL; + SRowMerger merge = {0}; + + TSDBROW fRow1 = tsdbRowFromBlockData(pLastBlockData, *pLastBlockReader->rowIndex); + + tRowMergerInit(&merge, &fRow1, pReader->pSchema); + doMergeRowsInLastBlock(pLastBlockReader, pBlockScanInfo, ts, &merge); + tRowMergerGetRow(&merge, &pTSRow); + + doAppendRowFromTSRow(pReader->pResBlock, pReader, pTSRow, pBlockScanInfo->uid); + + taosMemoryFree(pTSRow); + tRowMergerClear(&merge); + return TSDB_CODE_SUCCESS; + } else if (ts == key) { + STSRow* pTSRow = NULL; + SRowMerger merge = {0}; + + tRowMergerInit(&merge, &fRow, pReader->pSchema); + doMergeRowsInFileBlocks(pBlockData, pBlockScanInfo, pReader, &merge); + doMergeRowsInLastBlock(pLastBlockReader, pBlockScanInfo, ts, &merge); + + tRowMergerGetRow(&merge, &pTSRow); + doAppendRowFromTSRow(pReader->pResBlock, pReader, pTSRow, pBlockScanInfo->uid); + + taosMemoryFree(pTSRow); + tRowMergerClear(&merge); + return TSDB_CODE_SUCCESS; + } else { // ts > key, asc; todo handle desc + // imem & mem are all empty, only file exist + if (tryCopyDistinctRowFromFileBlock(pReader, pBlockData, key, pDumpInfo)) { + return TSDB_CODE_SUCCESS; + } else { + STSRow* pTSRow = NULL; + SRowMerger merge = {0}; + + tRowMergerInit(&merge, &fRow, pReader->pSchema); + doMergeRowsInFileBlocks(pBlockData, pBlockScanInfo, pReader, &merge); + tRowMergerGetRow(&merge, &pTSRow); + doAppendRowFromTSRow(pReader->pResBlock, pReader, pTSRow, pBlockScanInfo->uid); + + taosMemoryFree(pTSRow); + tRowMergerClear(&merge); + return TSDB_CODE_SUCCESS; + } + } + } else { // only last block exists + SBlockData* pLastBlockData = &pLastBlockReader->lastBlockData; + int64_t tsLastBlock = getCurrentKeyInLastBlock(pLastBlockReader); + STSRow* pTSRow = NULL; SRowMerger merge = {0}; + TSDBROW fRow = tsdbRowFromBlockData(pLastBlockData, *pLastBlockReader->rowIndex); + tRowMergerInit(&merge, &fRow, pReader->pSchema); - doMergeRowsInFileBlocks(pBlockData, pBlockScanInfo, pReader, &merge); + doMergeRowsInLastBlock(pLastBlockReader, pBlockScanInfo, tsLastBlock, &merge); tRowMergerGetRow(&merge, &pTSRow); + doAppendRowFromTSRow(pReader->pResBlock, pReader, pTSRow, pBlockScanInfo->uid); taosMemoryFree(pTSRow); @@ -1540,41 +2082,59 @@ static int32_t buildComposedDataBlockImpl(STsdbReader* pReader, STableBlockScanI } } -static int32_t buildComposedDataBlock(STsdbReader* pReader, STableBlockScanInfo* pBlockScanInfo) { +static int32_t buildComposedDataBlock(STsdbReader* pReader) { SSDataBlock* pResBlock = pReader->pResBlock; + SFileDataBlockInfo* pBlockInfo = getCurrentBlockInfo(&pReader->status.blockIter); + + STableBlockScanInfo* pBlockScanInfo = NULL; + if (pBlockInfo != NULL) { + pBlockScanInfo = taosHashGet(pReader->status.pTableMap, &pBlockInfo->uid, sizeof(pBlockInfo->uid)); + } else { + pBlockScanInfo = pReader->status.pTableIter; + } + + SLastBlockReader* pLastBlockReader = pReader->status.fileIter.pLastBlockReader; SFileBlockDumpInfo* pDumpInfo = &pReader->status.fBlockDumpInfo; SBlockData* pBlockData = &pReader->status.fileBlockData; int32_t step = ASCENDING_TRAVERSE(pReader->order) ? 1 : -1; - int32_t numOfSub = 1; - int64_t st = taosGetTimestampUs(); while (1) { // todo check the validate of row in file block { - if (!isValidFileBlockRow(pBlockData, pDumpInfo, pBlockScanInfo, pReader)) { - pDumpInfo->rowIndex += step; + bool hasBlockData = false; - SBlock* pBlock = getCurrentBlock(&pReader->status.blockIter); - numOfSub = pBlock->nSubBlock; - - if (pDumpInfo->rowIndex >= pBlock->nRow || pDumpInfo->rowIndex < 0) { - setBlockAllDumped(pDumpInfo, pBlock, pReader->order); + while (pBlockData->nRow > 0) { // find the first qualified row in data block + if (isValidFileBlockRow(pBlockData, pDumpInfo, pBlockScanInfo, pReader)) { + hasBlockData = true; break; } - continue; + pDumpInfo->rowIndex += step; + + SBlock* pBlock = getCurrentBlock(&pReader->status.blockIter); + if (pDumpInfo->rowIndex >= pBlock->nRow || pDumpInfo->rowIndex < 0) { + setBlockAllDumped(pDumpInfo, pBlock->maxKey.ts, pReader->order); + break; + } + } + + bool hasBlockLData = hasDataInLastBlock(pLastBlockReader); + + // no data in last block and block, no need to proceed. + if ((hasBlockData == false) && (hasBlockLData == false)) { + break; } } - buildComposedDataBlockImpl(pReader, pBlockScanInfo); - SBlock* pBlock = getCurrentBlock(&pReader->status.blockIter); + buildComposedDataBlockImpl(pReader, pBlockScanInfo, pBlockData, pLastBlockReader); // currently loaded file data block is consumed - if (pDumpInfo->rowIndex >= pBlock->nRow || pDumpInfo->rowIndex < 0) { - setBlockAllDumped(pDumpInfo, pBlock, pReader->order); + if ((pBlockData->nRow > 0) && (pDumpInfo->rowIndex >= pBlockData->nRow || pDumpInfo->rowIndex < 0)) { + SBlock* pBlock = getCurrentBlock(&pReader->status.blockIter); + setBlockAllDumped(pDumpInfo, pBlock->maxKey.ts, pReader->order); break; } @@ -1589,9 +2149,8 @@ static int32_t buildComposedDataBlock(STsdbReader* pReader, STableBlockScanInfo* setComposedBlockFlag(pReader, true); int64_t et = taosGetTimestampUs(); - tsdbDebug("%p uid:%" PRIu64 ", composed data block created, subBlock:%d, brange:%" PRIu64 "-%" PRIu64 - " rows:%d, elapsed time:%.2f ms %s", - pReader, pBlockScanInfo->uid, numOfSub, pResBlock->info.window.skey, pResBlock->info.window.ekey, + tsdbDebug("%p uid:%" PRIu64 ", composed data block created, brange:%" PRIu64 "-%" PRIu64 " rows:%d, elapsed time:%.2f ms %s", + pReader, pBlockScanInfo->uid, pResBlock->info.window.skey, pResBlock->info.window.ekey, pResBlock->info.rows, (et - st) / 1000.0, pReader->idStr); return TSDB_CODE_SUCCESS; @@ -1617,7 +2176,7 @@ static int32_t initMemDataIterator(STableBlockScanInfo* pBlockScanInfo, STsdbRea STbData* d = NULL; if (pReader->pReadSnap->pMem != NULL) { - tsdbGetTbDataFromMemTable(pReader->pReadSnap->pMem, pReader->suid, pBlockScanInfo->uid, &d); + d = tsdbGetTbDataFromMemTable(pReader->pReadSnap->pMem, pReader->suid, pBlockScanInfo->uid); if (d != NULL) { code = tsdbTbDataIterCreate(d, &startKey, backward, &pBlockScanInfo->iter.iter); if (code == TSDB_CODE_SUCCESS) { @@ -1638,7 +2197,7 @@ static int32_t initMemDataIterator(STableBlockScanInfo* pBlockScanInfo, STsdbRea STbData* di = NULL; if (pReader->pReadSnap->pIMem != NULL) { - tsdbGetTbDataFromMemTable(pReader->pReadSnap->pIMem, pReader->suid, pBlockScanInfo->uid, &di); + di = tsdbGetTbDataFromMemTable(pReader->pReadSnap->pIMem, pReader->suid, pBlockScanInfo->uid); if (di != NULL) { code = tsdbTbDataIterCreate(di, &startKey, backward, &pBlockScanInfo->iiter.iter); if (code == TSDB_CODE_SUCCESS) { @@ -1677,7 +2236,7 @@ int32_t initDelSkylineIterator(STableBlockScanInfo* pBlockScanInfo, STsdbReader* SDelFile* pDelFile = pReader->pReadSnap->fs.pDelFile; if (pDelFile) { SDelFReader* pDelFReader = NULL; - code = tsdbDelFReaderOpen(&pDelFReader, pDelFile, pTsdb, NULL); + code = tsdbDelFReaderOpen(&pDelFReader, pDelFile, pTsdb); if (code != TSDB_CODE_SUCCESS) { goto _err; } @@ -1688,7 +2247,7 @@ int32_t initDelSkylineIterator(STableBlockScanInfo* pBlockScanInfo, STsdbReader* goto _err; } - code = tsdbReadDelIdx(pDelFReader, aDelIdx, NULL); + code = tsdbReadDelIdx(pDelFReader, aDelIdx); if (code != TSDB_CODE_SUCCESS) { taosArrayDestroy(aDelIdx); tsdbDelFReaderClose(&pDelFReader); @@ -1699,7 +2258,7 @@ int32_t initDelSkylineIterator(STableBlockScanInfo* pBlockScanInfo, STsdbReader* SDelIdx* pIdx = taosArraySearch(aDelIdx, &idx, tCmprDelIdx, TD_EQ); if (pIdx != NULL) { - code = tsdbReadDelData(pDelFReader, pIdx, pDelData, NULL); + code = tsdbReadDelData(pDelFReader, pIdx, pDelData); } taosArrayDestroy(aDelIdx); @@ -1737,6 +2296,7 @@ int32_t initDelSkylineIterator(STableBlockScanInfo* pBlockScanInfo, STsdbReader* ASCENDING_TRAVERSE(pReader->order) ? 0 : taosArrayGetSize(pBlockScanInfo->delSkyline) - 1; pBlockScanInfo->iiter.index = pBlockScanInfo->iter.index; pBlockScanInfo->fileDelIndex = pBlockScanInfo->iter.index; + pBlockScanInfo->lastBlockDelIndex = pBlockScanInfo->iter.index; return code; _err: @@ -1744,12 +2304,9 @@ _err: return code; } -static TSDBKEY getCurrentKeyInBuf(SDataBlockIter* pBlockIter, STsdbReader* pReader) { +static TSDBKEY getCurrentKeyInBuf(STableBlockScanInfo* pScanInfo, STsdbReader* pReader) { TSDBKEY key = {.ts = TSKEY_INITIAL_VAL}; - SFileDataBlockInfo* pFBlock = getCurrentBlockInfo(pBlockIter); - STableBlockScanInfo* pScanInfo = taosHashGet(pReader->status.pTableMap, &pFBlock->uid, sizeof(pFBlock->uid)); - initMemDataIterator(pScanInfo, pReader); TSDBROW* pRow = getValidRow(&pScanInfo->iter, pScanInfo->delSkyline, pReader); if (pRow != NULL) { @@ -1767,15 +2324,17 @@ static TSDBKEY getCurrentKeyInBuf(SDataBlockIter* pBlockIter, STsdbReader* pRead return key; } -static int32_t moveToNextFile(STsdbReader* pReader, int32_t* numOfBlocks) { +static int32_t moveToNextFile(STsdbReader* pReader, SBlockNumber* pBlockNum) { SReaderStatus* pStatus = &pReader->status; size_t numOfTables = taosHashGetSize(pReader->status.pTableMap); SArray* pIndexList = taosArrayInit(numOfTables, sizeof(SBlockIdx)); + SArray* pLastBlocks = pStatus->fileIter.pLastBlockReader->pBlockL; while (1) { bool hasNext = filesetIteratorNext(&pStatus->fileIter, pReader); if (!hasNext) { // no data files on disk + taosArrayClear(pLastBlocks); break; } @@ -1786,18 +2345,34 @@ static int32_t moveToNextFile(STsdbReader* pReader, int32_t* numOfBlocks) { return code; } - if (taosArrayGetSize(pIndexList) > 0) { - uint32_t numOfValidTable = 0; - code = doLoadFileBlock(pReader, pIndexList, &numOfValidTable, numOfBlocks); + code = tsdbReadBlockL(pReader->pFileReader, pLastBlocks); + if (code != TSDB_CODE_SUCCESS) { + taosArrayDestroy(pIndexList); + return code; + } + + if (taosArrayGetSize(pIndexList) > 0 || taosArrayGetSize(pLastBlocks) > 0) { + SArray* pQLastBlock = taosArrayInit(4, sizeof(SBlockL)); + + code = doLoadFileBlock(pReader, pIndexList, pLastBlocks, pBlockNum, pQLastBlock); if (code != TSDB_CODE_SUCCESS) { taosArrayDestroy(pIndexList); + taosArrayDestroy(pQLastBlock); return code; } - if (numOfValidTable > 0) { + if (pBlockNum->numOfBlocks + pBlockNum->numOfLastBlocks > 0) { + ASSERT(taosArrayGetSize(pQLastBlock) == pBlockNum->numOfLastBlocks); + taosArrayClear(pLastBlocks); + taosArrayAddAll(pLastBlocks, pQLastBlock); + + taosArrayDestroy(pQLastBlock); break; } + + taosArrayDestroy(pQLastBlock); } + // no blocks in current file, try next files } @@ -1805,28 +2380,160 @@ static int32_t moveToNextFile(STsdbReader* pReader, int32_t* numOfBlocks) { return TSDB_CODE_SUCCESS; } +static int32_t doLoadRelatedLastBlock(SLastBlockReader* pLastBlockReader, STableBlockScanInfo *pBlockScanInfo, STsdbReader* pReader) { + SArray* pBlocks = pLastBlockReader->pBlockL; + SBlockL* pBlock = NULL; + + uint64_t uid = pBlockScanInfo->uid; + initMemDataIterator(pBlockScanInfo, pReader); + pLastBlockReader->currentBlockIndex = -1; + + // find the correct SBlockL + for (int32_t i = 0; i < taosArrayGetSize(pBlocks); ++i) { + SBlockL* p = taosArrayGet(pBlocks, i); + if (p->minUid <= uid && p->maxUid >= uid) { + pLastBlockReader->currentBlockIndex = i; + pBlock = p; + break; + } + } + + if (pLastBlockReader->currentBlockIndex == -1) { + tBlockDataReset(&pLastBlockReader->lastBlockData); + return TSDB_CODE_SUCCESS; + } + + int32_t code = tBlockDataInit(&pLastBlockReader->lastBlockData, pReader->suid, pReader->suid ? 0 : uid, pReader->pSchema); + if (code != TSDB_CODE_SUCCESS) { + tsdbError("%p init block data failed, code:%s %s", pReader, tstrerror(code), pReader->idStr); + return code; + } + + code = tsdbReadLastBlock(pReader->pFileReader, pBlock, &pLastBlockReader->lastBlockData); + if (code != TSDB_CODE_SUCCESS) { + tsdbError( + "%p error occurs in loading last block into buffer, last block index:%d, total:%d rows:%d, minVer:%" PRId64 + ", maxVer:%" PRId64 ", code:%s %s", + pReader, pLastBlockReader->currentBlockIndex, (int32_t)taosArrayGetSize(pBlocks), pBlock->nRow, pBlock->minVer, + pBlock->maxVer, tstrerror(code), pReader->idStr); + } + + return TSDB_CODE_SUCCESS; +} + +static int32_t doLoadLastBlockSequentially(STsdbReader* pReader) { + SReaderStatus* pStatus = &pReader->status; + SLastBlockReader* pLastBlockReader = pStatus->fileIter.pLastBlockReader; + + while(1) { + if (pStatus->pTableIter == NULL) { + pStatus->pTableIter = taosHashIterate(pStatus->pTableMap, NULL); + if (pStatus->pTableIter == NULL) { + return TSDB_CODE_SUCCESS; + } + } + + // load the last data block of current table + // todo opt perf by avoiding load last block repeatly + STableBlockScanInfo* pScanInfo = pStatus->pTableIter; + int32_t code = doLoadRelatedLastBlock(pLastBlockReader, pScanInfo, pReader); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + + if (pLastBlockReader->currentBlockIndex != -1) { + initLastBlockReader(pLastBlockReader, pScanInfo->uid, &pScanInfo->indexInBlockL); + int32_t index = pScanInfo->indexInBlockL; + if (index == DEFAULT_ROW_INDEX_VAL || index == pLastBlockReader->lastBlockData.nRow) { + bool hasData = nextRowInLastBlock(pLastBlockReader, pScanInfo); + if (!hasData) { // current table does not have rows in last block, try next table + pStatus->pTableIter = taosHashIterate(pStatus->pTableMap, pStatus->pTableIter); + if (pStatus->pTableIter == NULL) { + return TSDB_CODE_SUCCESS; + } + continue; + } + } + } else { // no data in last block, try next table + pStatus->pTableIter = taosHashIterate(pStatus->pTableMap, pStatus->pTableIter); + if (pStatus->pTableIter == NULL) { + return TSDB_CODE_SUCCESS; + } + continue; + } + + code = doBuildDataBlock(pReader); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + + if (pReader->pResBlock->info.rows > 0) { + return TSDB_CODE_SUCCESS; + } + + // current table is exhausted, let's try next table + pStatus->pTableIter = taosHashIterate(pStatus->pTableMap, pStatus->pTableIter); + if (pStatus->pTableIter == NULL) { + return TSDB_CODE_SUCCESS; + } + } +} + static int32_t doBuildDataBlock(STsdbReader* pReader) { int32_t code = TSDB_CODE_SUCCESS; SReaderStatus* pStatus = &pReader->status; SDataBlockIter* pBlockIter = &pStatus->blockIter; - SFileDataBlockInfo* pFBlock = getCurrentBlockInfo(pBlockIter); - STableBlockScanInfo* pScanInfo = taosHashGet(pStatus->pTableMap, &pFBlock->uid, sizeof(pFBlock->uid)); + TSDBKEY key = {0}; + SBlock* pBlock = NULL; + STableBlockScanInfo* pScanInfo = NULL; + SFileDataBlockInfo* pBlockInfo = getCurrentBlockInfo(pBlockIter); + SLastBlockReader* pLastBlockReader = pReader->status.fileIter.pLastBlockReader; - SBlock* pBlock = getCurrentBlock(pBlockIter); + if (pBlockInfo != NULL) { + pScanInfo = taosHashGet(pReader->status.pTableMap, &pBlockInfo->uid, sizeof(pBlockInfo->uid)); + } else { + pScanInfo = pReader->status.pTableIter; + } - TSDBKEY key = getCurrentKeyInBuf(pBlockIter, pReader); - if (fileBlockShouldLoad(pReader, pFBlock, pBlock, pScanInfo, key)) { + if (pBlockInfo != NULL) { + pBlock = getCurrentBlock(pBlockIter); + } + + { + key = getCurrentKeyInBuf(pScanInfo, pReader); + + // load the last data block of current table + code = doLoadRelatedLastBlock(pLastBlockReader, pScanInfo, pReader); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + + // note: the lastblock may be null here + initLastBlockReader(pLastBlockReader, pScanInfo->uid, &pScanInfo->indexInBlockL); + if (pScanInfo->indexInBlockL == DEFAULT_ROW_INDEX_VAL || pScanInfo->indexInBlockL == pLastBlockReader->lastBlockData.nRow) { + bool hasData = nextRowInLastBlock(pLastBlockReader, pScanInfo); + } + } + + if (pBlockInfo == NULL) { // build data block from last data file + ASSERT(pBlockIter->numOfBlocks == 0); + code = buildComposedDataBlock(pReader); + } else if (fileBlockShouldLoad(pReader, pBlockInfo, pBlock, pScanInfo, key, pLastBlockReader)) { tBlockDataReset(&pStatus->fileBlockData); - tBlockDataClearData(&pStatus->fileBlockData); - code = doLoadFileBlockData(pReader, pBlockIter, pScanInfo, &pStatus->fileBlockData); + code = tBlockDataInit(&pStatus->fileBlockData, pReader->suid, pScanInfo->uid, pReader->pSchema); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + + code = doLoadFileBlockData(pReader, pBlockIter, &pStatus->fileBlockData); if (code != TSDB_CODE_SUCCESS) { return code; } // build composed data block - code = buildComposedDataBlock(pReader, pScanInfo); + code = buildComposedDataBlock(pReader); } else if (bufferDataInFileBlockGap(pReader->order, key, pBlock)) { // data in memory that are earlier than current file block // todo rows in buffer should be less than the file block in asc, greater than file block in desc @@ -1838,7 +2545,7 @@ static int32_t doBuildDataBlock(STsdbReader* pReader) { pInfo->uid = pScanInfo->uid; pInfo->window = (STimeWindow){.skey = pBlock->minKey.ts, .ekey = pBlock->maxKey.ts}; setComposedBlockFlag(pReader, false); - setBlockAllDumped(&pStatus->fBlockDumpInfo, pBlock, pReader->order); + setBlockAllDumped(&pStatus->fBlockDumpInfo, pBlock->maxKey.ts, pReader->order); } return code; @@ -1890,20 +2597,26 @@ static void initBlockDumpInfo(STsdbReader* pReader, SDataBlockIter* pBlockIter) } static int32_t initForFirstBlockInFile(STsdbReader* pReader, SDataBlockIter* pBlockIter) { - int32_t numOfBlocks = 0; - int32_t code = moveToNextFile(pReader, &numOfBlocks); + SBlockNumber num = {0}; + + int32_t code = moveToNextFile(pReader, &num); if (code != TSDB_CODE_SUCCESS) { return code; } // all data files are consumed, try data in buffer - if (numOfBlocks == 0) { + if (num.numOfBlocks + num.numOfLastBlocks == 0) { pReader->status.loadFromFile = false; return code; } // initialize the block iterator for a new fileset - code = initBlockIterator(pReader, pBlockIter, numOfBlocks); + if (num.numOfBlocks > 0) { + code = initBlockIterator(pReader, pBlockIter, num.numOfBlocks); + } else { + tBlockDataReset(&pReader->status.fileBlockData); + resetDataBlockIterator(pBlockIter, pReader->order, pReader->status.pTableMap); + } // set the correct start position according to the query time window initBlockDumpInfo(pReader, pBlockIter); @@ -1921,14 +2634,47 @@ static int32_t buildBlockFromFiles(STsdbReader* pReader) { SDataBlockIter* pBlockIter = &pReader->status.blockIter; - while (1) { - SFileDataBlockInfo* pFBlock = getCurrentBlockInfo(&pReader->status.blockIter); - STableBlockScanInfo* pScanInfo = taosHashGet(pReader->status.pTableMap, &pFBlock->uid, sizeof(pFBlock->uid)); + if (pBlockIter->numOfBlocks == 0) { + _begin: + code = doLoadLastBlockSequentially(pReader); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + if (pReader->pResBlock->info.rows > 0) { + return TSDB_CODE_SUCCESS; + } + + // all data blocks are checked in this last block file, now let's try the next file + if (pReader->status.pTableIter == NULL) { + code = initForFirstBlockInFile(pReader, pBlockIter); + + // error happens or all the data files are completely checked + if ((code != TSDB_CODE_SUCCESS) || (pReader->status.loadFromFile == false)) { + return code; + } + + // this file does not have data files, let's start check the last block file if exists + if (pBlockIter->numOfBlocks == 0) { + goto _begin; + } + } + + code = doBuildDataBlock(pReader); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + + if (pReader->pResBlock->info.rows > 0) { + return TSDB_CODE_SUCCESS; + } + } + + while (1) { SFileBlockDumpInfo* pDumpInfo = &pReader->status.fBlockDumpInfo; if (fileBlockPartiallyRead(pDumpInfo, asc)) { // file data block is partially loaded - code = buildComposedDataBlock(pReader, pScanInfo); + code = buildComposedDataBlock(pReader); } else { // current block are exhausted, try the next file block if (pDumpInfo->allDumped) { @@ -1936,17 +2682,26 @@ static int32_t buildBlockFromFiles(STsdbReader* pReader) { bool hasNext = blockIteratorNext(&pReader->status.blockIter); if (hasNext) { // check for the next block in the block accessed order list initBlockDumpInfo(pReader, pBlockIter); - } else { // data blocks in current file are exhausted, let's try the next file now + } else if (taosArrayGetSize(pReader->status.fileIter.pLastBlockReader->pBlockL) > 0) { // data blocks in current file are exhausted, let's try the next file now + // todo dump all data in last block if exists. + tBlockDataReset(&pReader->status.fileBlockData); + resetDataBlockIterator(pBlockIter, pReader->order, pReader->status.pTableMap); + goto _begin; + } else { code = initForFirstBlockInFile(pReader, pBlockIter); // error happens or all the data files are completely checked if ((code != TSDB_CODE_SUCCESS) || (pReader->status.loadFromFile == false)) { return code; } + + // this file does not have blocks, let's start check the last block file + if (pBlockIter->numOfBlocks == 0) { + goto _begin; + } } } - // current block is not loaded yet, or data in buffer may overlap with the file block. code = doBuildDataBlock(pReader); } @@ -2014,39 +2769,6 @@ SVersionRange getQueryVerRange(SVnode* pVnode, SQueryTableDataCond* pCond, int8_ return (SVersionRange){.minVer = startVer, .maxVer = endVer}; } -// // todo not unref yet, since it is not support multi-group interpolation query -// static UNUSED_FUNC void changeQueryHandleForInterpQuery(STsdbReader* pHandle) { -// // filter the queried time stamp in the first place -// STsdbReader* pTsdbReadHandle = (STsdbReader*)pHandle; - -// // starts from the buffer in case of descending timestamp order check data blocks -// size_t numOfTables = taosArrayGetSize(pTsdbReadHandle->pTableCheckInfo); - -// int32_t i = 0; -// while (i < numOfTables) { -// STableBlockScanInfo* pCheckInfo = taosArrayGet(pTsdbReadHandle->pTableCheckInfo, i); - -// // the first qualified table for interpolation query -// // if ((pTsdbReadHandle->window.skey <= pCheckInfo->pTableObj->lastKey) && -// // (pCheckInfo->pTableObj->lastKey != TSKEY_INITIAL_VAL)) { -// // break; -// // } - -// i++; -// } - -// // there are no data in all the tables -// if (i == numOfTables) { -// return; -// } - -// STableBlockScanInfo info = *(STableBlockScanInfo*)taosArrayGet(pTsdbReadHandle->pTableCheckInfo, i); -// taosArrayClear(pTsdbReadHandle->pTableCheckInfo); - -// info.lastKey = pTsdbReadHandle->window.skey; -// taosArrayPush(pTsdbReadHandle->pTableCheckInfo, &info); -// } - bool hasBeenDropped(const SArray* pDelList, int32_t* index, TSDBKEY* pKey, int32_t order) { ASSERT(pKey != NULL); if (pDelList == NULL) { @@ -2265,8 +2987,7 @@ static int32_t checkForNeighborFileBlock(STsdbReader* pReader, STableBlockScanIn // 3. load the neighbor block, and set it to be the currently accessed file data block tBlockDataReset(&pStatus->fileBlockData); - tBlockDataClearData(&pStatus->fileBlockData); - int32_t code = doLoadFileBlockData(pReader, pBlockIter, pScanInfo, &pStatus->fileBlockData); + int32_t code = doLoadFileBlockData(pReader, pBlockIter, &pStatus->fileBlockData); if (code != TSDB_CODE_SUCCESS) { return code; } @@ -2315,6 +3036,21 @@ int32_t doMergeRowsInFileBlocks(SBlockData* pBlockData, STableBlockScanInfo* pSc return TSDB_CODE_SUCCESS; } +// todo check if the rows are dropped or not +int32_t doMergeRowsInLastBlock(SLastBlockReader* pLastBlockReader, STableBlockScanInfo* pScanInfo, int64_t ts, SRowMerger* pMerger) { + while(nextRowInLastBlock(pLastBlockReader, pScanInfo)) { + int64_t next1 = getCurrentKeyInLastBlock(pLastBlockReader); + if (next1 == ts) { + TSDBROW fRow1 = tsdbRowFromBlockData(&pLastBlockReader->lastBlockData, *pLastBlockReader->rowIndex); + tRowMerge(pMerger, &fRow1); + } else { + break; + } + } + + return TSDB_CODE_SUCCESS; +} + void doMergeMultiRows(TSDBROW* pRow, uint64_t uid, SIterInfo* pIter, SArray* pDelList, STSRow** pTSRow, STsdbReader* pReader, bool* freeTSRow) { TSDBROW* pNextRow = NULL; @@ -2487,7 +3223,7 @@ int32_t doAppendRowFromTSRow(SSDataBlock* pBlock, STsdbReader* pReader, STSRow* return TSDB_CODE_SUCCESS; } -int32_t doAppendRowFromBlock(SSDataBlock* pResBlock, STsdbReader* pReader, SBlockData* pBlockData, int32_t rowIndex) { +int32_t doAppendRowFromFileBlock(SSDataBlock* pResBlock, STsdbReader* pReader, SBlockData* pBlockData, int32_t rowIndex) { int32_t i = 0, j = 0; int32_t outputRowIndex = pResBlock->info.rows; @@ -2564,7 +3300,7 @@ int32_t tsdbSetTableId(STsdbReader* pReader, int64_t uid) { ASSERT(pReader != NULL); taosHashClear(pReader->status.pTableMap); - STableBlockScanInfo info = {.lastKey = 0, .uid = uid}; + STableBlockScanInfo info = {.lastKey = 0, .uid = uid, .indexInBlockL = DEFAULT_ROW_INDEX_VAL}; taosHashPut(pReader->status.pTableMap, &info.uid, sizeof(uint64_t), &info, sizeof(info)); return TDB_CODE_SUCCESS; } @@ -2615,6 +3351,7 @@ int32_t tsdbReaderOpen(SVnode* pVnode, SQueryTableDataCond* pCond, SArray* pTabl pCond->order = TSDB_ORDER_ASC; } + // here we only need one more row, so the capacity is set to be ONE. code = tsdbReaderCreate(pVnode, pCond, &pReader->innerReader[0], 1, idstr); if (code != TSDB_CODE_SUCCESS) { goto _err; @@ -2658,7 +3395,7 @@ int32_t tsdbReaderOpen(SVnode* pVnode, SQueryTableDataCond* pCond, SArray* pTabl if (pReader->type == TIMEWINDOW_RANGE_CONTAINED) { SDataBlockIter* pBlockIter = &pReader->status.blockIter; - initFilesetIterator(&pReader->status.fileIter, pReader->pReadSnap->fs.aDFileSet, pReader->order, pReader->idStr); + initFilesetIterator(&pReader->status.fileIter, pReader->pReadSnap->fs.aDFileSet, pReader); resetDataBlockIterator(&pReader->status.blockIter, pReader->order, pReader->status.pTableMap); // no data in files, let's try buffer in memory @@ -2679,8 +3416,7 @@ int32_t tsdbReaderOpen(SVnode* pVnode, SQueryTableDataCond* pCond, SArray* pTabl goto _err; } - initFilesetIterator(&pPrevReader->status.fileIter, pPrevReader->pReadSnap->fs.aDFileSet, pPrevReader->order, - pPrevReader->idStr); + initFilesetIterator(&pPrevReader->status.fileIter, pPrevReader->pReadSnap->fs.aDFileSet, pPrevReader); resetDataBlockIterator(&pPrevReader->status.blockIter, pPrevReader->order, pReader->status.pTableMap); // no data in files, let's try buffer in memory @@ -2720,7 +3456,7 @@ void tsdbReaderClose(STsdbReader* pReader) { } } taosMemoryFree(pSupInfo->buildBuf); - tBlockDataClear(&pReader->status.fileBlockData, true); + tBlockDataDestroy(&pReader->status.fileBlockData, true); cleanupDataBlockIterator(&pReader->status.blockIter); @@ -2732,6 +3468,13 @@ void tsdbReaderClose(STsdbReader* pReader) { tsdbDataFReaderClose(&pReader->pFileReader); } + SFilesetIter* pFilesetIter = &pReader->status.fileIter; + if (pFilesetIter->pLastBlockReader != NULL) { + tBlockDataDestroy(&pFilesetIter->pLastBlockReader->lastBlockData, true); + taosArrayDestroy(pFilesetIter->pLastBlockReader->pBlockL); + taosMemoryFree(pFilesetIter->pLastBlockReader); + } + SIOCostSummary* pCost = &pReader->cost; tsdbDebug("%p :io-cost summary: head-file:%" PRIu64 ", head-file time:%.2f ms, SMA:%" PRId64 @@ -2857,7 +3600,7 @@ int32_t tsdbRetrieveDatablockSMA(STsdbReader* pReader, SColumnDataAgg*** pBlockS SBlockLoadSuppInfo* pSup = &pReader->suppInfo; if (tBlockHasSma(pBlock)) { - code = tsdbReadBlockSma(pReader->pFileReader, pBlock, pSup->pColAgg, NULL); + code = tsdbReadBlockSma(pReader->pFileReader, pBlock, pSup->pColAgg); if (code != TSDB_CODE_SUCCESS) { tsdbDebug("vgId:%d, failed to load block SMA for uid %" PRIu64 ", code:%s, %s", 0, pFBlock->uid, tstrerror(code), pReader->idStr); @@ -2923,11 +3666,15 @@ static SArray* doRetrieveDataBlock(STsdbReader* pReader) { STableBlockScanInfo* pBlockScanInfo = taosHashGet(pStatus->pTableMap, &pFBlock->uid, sizeof(pFBlock->uid)); tBlockDataReset(&pStatus->fileBlockData); - tBlockDataClearData(&pStatus->fileBlockData); - int32_t code = doLoadFileBlockData(pReader, &pStatus->blockIter, pBlockScanInfo, &pStatus->fileBlockData); + int32_t code = tBlockDataInit(&pStatus->fileBlockData, pReader->suid, pBlockScanInfo->uid, pReader->pSchema); if (code != TSDB_CODE_SUCCESS) { - tBlockDataClear(&pStatus->fileBlockData, 1); + terrno = code; + return NULL; + } + code = doLoadFileBlockData(pReader, &pStatus->blockIter, &pStatus->fileBlockData); + if (code != TSDB_CODE_SUCCESS) { + tBlockDataDestroy(&pStatus->fileBlockData, 1); terrno = code; return NULL; } @@ -2969,7 +3716,7 @@ int32_t tsdbReaderReset(STsdbReader* pReader, SQueryTableDataCond* pCond) { int32_t numOfTables = taosHashGetSize(pReader->status.pTableMap); tsdbDataFReaderClose(&pReader->pFileReader); - initFilesetIterator(&pReader->status.fileIter, pReader->pReadSnap->fs.aDFileSet, pReader->order, pReader->idStr); + initFilesetIterator(&pReader->status.fileIter, pReader->pReadSnap->fs.aDFileSet, pReader); resetDataBlockIterator(&pReader->status.blockIter, pReader->order, pReader->status.pTableMap); resetDataBlockScanInfo(pReader->status.pTableMap); @@ -3078,7 +3825,7 @@ int64_t tsdbGetNumOfRowsInMemTable(STsdbReader* pReader) { STbData* d = NULL; if (pReader->pTsdb->mem != NULL) { - tsdbGetTbDataFromMemTable(pReader->pReadSnap->pMem, pReader->suid, pBlockScanInfo->uid, &d); + d = tsdbGetTbDataFromMemTable(pReader->pReadSnap->pMem, pReader->suid, pBlockScanInfo->uid); if (d != NULL) { rows += tsdbGetNRowsInTbData(d); } @@ -3086,7 +3833,7 @@ int64_t tsdbGetNumOfRowsInMemTable(STsdbReader* pReader) { STbData* di = NULL; if (pReader->pTsdb->imem != NULL) { - tsdbGetTbDataFromMemTable(pReader->pReadSnap->pIMem, pReader->suid, pBlockScanInfo->uid, &di); + di = tsdbGetTbDataFromMemTable(pReader->pReadSnap->pIMem, pReader->suid, pBlockScanInfo->uid); if (di != NULL) { rows += tsdbGetNRowsInTbData(di); } diff --git a/source/dnode/vnode/src/tsdb/tsdbReaderWriter.c b/source/dnode/vnode/src/tsdb/tsdbReaderWriter.c index ea9c3e5313..c8f3862071 100644 --- a/source/dnode/vnode/src/tsdb/tsdbReaderWriter.c +++ b/source/dnode/vnode/src/tsdb/tsdbReaderWriter.c @@ -15,8 +15,6 @@ #include "tsdb.h" -#define TSDB_FILE_DLMT ((uint32_t)0xF00AFA0F) - // SDelFWriter ==================================================== int32_t tsdbDelFWriterOpen(SDelFWriter **ppWriter, SDelFile *pFile, STsdb *pTsdb) { int32_t code = 0; @@ -63,6 +61,7 @@ _err: int32_t tsdbDelFWriterClose(SDelFWriter **ppWriter, int8_t sync) { int32_t code = 0; SDelFWriter *pWriter = *ppWriter; + STsdb *pTsdb = pWriter->pTsdb; // sync if (sync && taosFsyncFile(pWriter->pWriteH) < 0) { @@ -76,47 +75,47 @@ int32_t tsdbDelFWriterClose(SDelFWriter **ppWriter, int8_t sync) { goto _err; } + for (int32_t iBuf = 0; iBuf < sizeof(pWriter->aBuf) / sizeof(uint8_t *); iBuf++) { + tFree(pWriter->aBuf[iBuf]); + } + taosMemoryFree(pWriter); + *ppWriter = NULL; return code; _err: - tsdbError("vgId:%d, failed to close del file writer since %s", TD_VID(pWriter->pTsdb->pVnode), tstrerror(code)); + tsdbError("vgId:%d, failed to close del file writer since %s", TD_VID(pTsdb->pVnode), tstrerror(code)); return code; } -int32_t tsdbWriteDelData(SDelFWriter *pWriter, SArray *aDelData, uint8_t **ppBuf, SDelIdx *pDelIdx) { - int32_t code = 0; - uint8_t *pBuf = NULL; - int64_t size; - int64_t n; - SBlockDataHdr hdr = {.delimiter = TSDB_FILE_DLMT, .suid = pDelIdx->suid, .uid = pDelIdx->uid}; - - if (!ppBuf) ppBuf = &pBuf; +int32_t tsdbWriteDelData(SDelFWriter *pWriter, SArray *aDelData, SDelIdx *pDelIdx) { + int32_t code = 0; + int64_t size; + int64_t n; // prepare - size = sizeof(hdr); + size = sizeof(uint32_t); for (int32_t iDelData = 0; iDelData < taosArrayGetSize(aDelData); iDelData++) { size += tPutDelData(NULL, taosArrayGet(aDelData, iDelData)); } size += sizeof(TSCKSUM); // alloc - code = tRealloc(ppBuf, size); + code = tRealloc(&pWriter->aBuf[0], size); if (code) goto _err; // build n = 0; - *(SBlockDataHdr *)(*ppBuf) = hdr; - n += sizeof(hdr); + n += tPutU32(pWriter->aBuf[0] + n, TSDB_FILE_DLMT); for (int32_t iDelData = 0; iDelData < taosArrayGetSize(aDelData); iDelData++) { - n += tPutDelData(*ppBuf + n, taosArrayGet(aDelData, iDelData)); + n += tPutDelData(pWriter->aBuf[0] + n, taosArrayGet(aDelData, iDelData)); } - taosCalcChecksumAppend(0, *ppBuf, size); + taosCalcChecksumAppend(0, pWriter->aBuf[0], size); ASSERT(n + sizeof(TSCKSUM) == size); // write - n = taosWriteFile(pWriter->pWriteH, *ppBuf, size); + n = taosWriteFile(pWriter->pWriteH, pWriter->aBuf[0], size); if (n < 0) { code = TAOS_SYSTEM_ERROR(errno); goto _err; @@ -129,48 +128,42 @@ int32_t tsdbWriteDelData(SDelFWriter *pWriter, SArray *aDelData, uint8_t **ppBuf pDelIdx->size = size; pWriter->fDel.size += size; - tFree(pBuf); return code; _err: tsdbError("vgId:%d, failed to write del data since %s", TD_VID(pWriter->pTsdb->pVnode), tstrerror(code)); - tFree(pBuf); return code; } -int32_t tsdbWriteDelIdx(SDelFWriter *pWriter, SArray *aDelIdx, uint8_t **ppBuf) { +int32_t tsdbWriteDelIdx(SDelFWriter *pWriter, SArray *aDelIdx) { int32_t code = 0; int64_t size; int64_t n; - uint8_t *pBuf = NULL; SDelIdx *pDelIdx; - if (!ppBuf) ppBuf = &pBuf; - // prepare - size = 0; - size += tPutU32(NULL, TSDB_FILE_DLMT); + size = sizeof(uint32_t); for (int32_t iDelIdx = 0; iDelIdx < taosArrayGetSize(aDelIdx); iDelIdx++) { size += tPutDelIdx(NULL, taosArrayGet(aDelIdx, iDelIdx)); } size += sizeof(TSCKSUM); // alloc - code = tRealloc(ppBuf, size); + code = tRealloc(&pWriter->aBuf[0], size); if (code) goto _err; // build n = 0; - n += tPutU32(*ppBuf + n, TSDB_FILE_DLMT); + n += tPutU32(pWriter->aBuf[0] + n, TSDB_FILE_DLMT); for (int32_t iDelIdx = 0; iDelIdx < taosArrayGetSize(aDelIdx); iDelIdx++) { - n += tPutDelIdx(*ppBuf + n, taosArrayGet(aDelIdx, iDelIdx)); + n += tPutDelIdx(pWriter->aBuf[0] + n, taosArrayGet(aDelIdx, iDelIdx)); } - taosCalcChecksumAppend(0, *ppBuf, size); + taosCalcChecksumAppend(0, pWriter->aBuf[0], size); ASSERT(n + sizeof(TSCKSUM) == size); // write - n = taosWriteFile(pWriter->pWriteH, *ppBuf, size); + n = taosWriteFile(pWriter->pWriteH, pWriter->aBuf[0], size); if (n < 0) { code = TAOS_SYSTEM_ERROR(errno); goto _err; @@ -180,12 +173,10 @@ int32_t tsdbWriteDelIdx(SDelFWriter *pWriter, SArray *aDelIdx, uint8_t **ppBuf) pWriter->fDel.offset = pWriter->fDel.size; pWriter->fDel.size += size; - tFree(pBuf); return code; _err: tsdbError("vgId:%d, write del idx failed since %s", TD_VID(pWriter->pTsdb->pVnode), tstrerror(code)); - tFree(pBuf); return code; } @@ -225,9 +216,11 @@ struct SDelFReader { STsdb *pTsdb; SDelFile fDel; TdFilePtr pReadH; + + uint8_t *aBuf[1]; }; -int32_t tsdbDelFReaderOpen(SDelFReader **ppReader, SDelFile *pFile, STsdb *pTsdb, uint8_t **ppBuf) { +int32_t tsdbDelFReaderOpen(SDelFReader **ppReader, SDelFile *pFile, STsdb *pTsdb) { int32_t code = 0; char fname[TSDB_FILENAME_LEN]; SDelFReader *pDelFReader; @@ -252,32 +245,6 @@ int32_t tsdbDelFReaderOpen(SDelFReader **ppReader, SDelFile *pFile, STsdb *pTsdb goto _err; } -#if 0 - // load and check hdr if buffer is given - if (ppBuf) { - code = tRealloc(ppBuf, TSDB_FHDR_SIZE); - if (code) { - goto _err; - } - - n = taosReadFile(pDelFReader->pReadH, *ppBuf, TSDB_FHDR_SIZE); - if (n < 0) { - code = TAOS_SYSTEM_ERROR(errno); - goto _err; - } else if (n < TSDB_FHDR_SIZE) { - code = TSDB_CODE_FILE_CORRUPTED; - goto _err; - } - - if (!taosCheckChecksumWhole(*ppBuf, TSDB_FHDR_SIZE)) { - code = TSDB_CODE_FILE_CORRUPTED; - goto _err; - } - - // TODO: check the content - } -#endif - _exit: *ppReader = pDelFReader; return code; @@ -297,6 +264,9 @@ int32_t tsdbDelFReaderClose(SDelFReader **ppReader) { code = TAOS_SYSTEM_ERROR(errno); goto _exit; } + for (int32_t iBuf = 0; iBuf < sizeof(pReader->aBuf) / sizeof(uint8_t *); iBuf++) { + tFree(pReader->aBuf[iBuf]); + } taosMemoryFree(pReader); } *ppReader = NULL; @@ -305,16 +275,13 @@ _exit: return code; } -int32_t tsdbReadDelData(SDelFReader *pReader, SDelIdx *pDelIdx, SArray *aDelData, uint8_t **ppBuf) { - int32_t code = 0; - int64_t offset = pDelIdx->offset; - int64_t size = pDelIdx->size; - int64_t n; - uint8_t *pBuf = NULL; - SBlockDataHdr *pHdr; - SDelData *pDelData = &(SDelData){0}; +int32_t tsdbReadDelData(SDelFReader *pReader, SDelIdx *pDelIdx, SArray *aDelData) { + int32_t code = 0; + int64_t offset = pDelIdx->offset; + int64_t size = pDelIdx->size; + int64_t n; - if (!ppBuf) ppBuf = &pBuf; + taosArrayClear(aDelData); // seek if (taosLSeekFile(pReader->pReadH, offset, SEEK_SET) < 0) { @@ -323,11 +290,11 @@ int32_t tsdbReadDelData(SDelFReader *pReader, SDelIdx *pDelIdx, SArray *aDelData } // alloc - code = tRealloc(ppBuf, size); + code = tRealloc(&pReader->aBuf[0], size); if (code) goto _err; // read - n = taosReadFile(pReader->pReadH, *ppBuf, size); + n = taosReadFile(pReader->pReadH, pReader->aBuf[0], size); if (n < 0) { code = TAOS_SYSTEM_ERROR(errno); goto _err; @@ -337,23 +304,21 @@ int32_t tsdbReadDelData(SDelFReader *pReader, SDelIdx *pDelIdx, SArray *aDelData } // check - if (!taosCheckChecksumWhole(*ppBuf, size)) { + if (!taosCheckChecksumWhole(pReader->aBuf[0], size)) { code = TSDB_CODE_FILE_CORRUPTED; goto _err; } // // decode n = 0; - pHdr = (SBlockDataHdr *)(*ppBuf + n); - ASSERT(pHdr->delimiter == TSDB_FILE_DLMT); - ASSERT(pHdr->suid == pDelIdx->suid); - ASSERT(pHdr->uid == pDelIdx->uid); - n += sizeof(*pHdr); - taosArrayClear(aDelData); - while (n < size - sizeof(TSCKSUM)) { - n += tGetDelData(*ppBuf + n, pDelData); - if (taosArrayPush(aDelData, pDelData) == NULL) { + uint32_t delimiter; + n += tGetU32(pReader->aBuf[0] + n, &delimiter); + while (n < size - sizeof(TSCKSUM)) { + SDelData delData; + n += tGetDelData(pReader->aBuf[0] + n, &delData); + + if (taosArrayPush(aDelData, &delData) == NULL) { code = TSDB_CODE_OUT_OF_MEMORY; goto _err; } @@ -361,25 +326,20 @@ int32_t tsdbReadDelData(SDelFReader *pReader, SDelIdx *pDelIdx, SArray *aDelData ASSERT(n == size - sizeof(TSCKSUM)); - tFree(pBuf); return code; _err: tsdbError("vgId:%d, read del data failed since %s", TD_VID(pReader->pTsdb->pVnode), tstrerror(code)); - tFree(pBuf); return code; } -int32_t tsdbReadDelIdx(SDelFReader *pReader, SArray *aDelIdx, uint8_t **ppBuf) { - int32_t code = 0; - int32_t n; - int64_t offset = pReader->fDel.offset; - int64_t size = pReader->fDel.size - offset; - uint32_t delimiter; - uint8_t *pBuf = NULL; - SDelIdx *pDelIdx = &(SDelIdx){0}; +int32_t tsdbReadDelIdx(SDelFReader *pReader, SArray *aDelIdx) { + int32_t code = 0; + int32_t n; + int64_t offset = pReader->fDel.offset; + int64_t size = pReader->fDel.size - offset; - if (!ppBuf) ppBuf = &pBuf; + taosArrayClear(aDelIdx); // seek if (taosLSeekFile(pReader->pReadH, offset, SEEK_SET) < 0) { @@ -388,11 +348,11 @@ int32_t tsdbReadDelIdx(SDelFReader *pReader, SArray *aDelIdx, uint8_t **ppBuf) { } // alloc - code = tRealloc(ppBuf, size); + code = tRealloc(&pReader->aBuf[0], size); if (code) goto _err; // read - n = taosReadFile(pReader->pReadH, *ppBuf, size); + n = taosReadFile(pReader->pReadH, pReader->aBuf[0], size); if (n < 0) { code = TAOS_SYSTEM_ERROR(errno); goto _err; @@ -402,21 +362,23 @@ int32_t tsdbReadDelIdx(SDelFReader *pReader, SArray *aDelIdx, uint8_t **ppBuf) { } // check - if (!taosCheckChecksumWhole(*ppBuf, size)) { + if (!taosCheckChecksumWhole(pReader->aBuf[0], size)) { code = TSDB_CODE_FILE_CORRUPTED; goto _err; } // decode n = 0; - n += tGetU32(*ppBuf + n, &delimiter); + uint32_t delimiter; + n += tGetU32(pReader->aBuf[0] + n, &delimiter); ASSERT(delimiter == TSDB_FILE_DLMT); - taosArrayClear(aDelIdx); while (n < size - sizeof(TSCKSUM)) { - n += tGetDelIdx(*ppBuf + n, pDelIdx); + SDelIdx delIdx; - if (taosArrayPush(aDelIdx, pDelIdx) == NULL) { + n += tGetDelIdx(pReader->aBuf[0] + n, &delIdx); + + if (taosArrayPush(aDelIdx, &delIdx) == NULL) { code = TSDB_CODE_OUT_OF_MEMORY; goto _err; } @@ -424,12 +386,10 @@ int32_t tsdbReadDelIdx(SDelFReader *pReader, SArray *aDelIdx, uint8_t **ppBuf) { ASSERT(n == size - sizeof(TSCKSUM)); - tFree(pBuf); return code; _err: tsdbError("vgId:%d, read del idx failed since %s", TD_VID(pReader->pTsdb->pVnode), tstrerror(code)); - tFree(pBuf); return code; } @@ -441,6 +401,8 @@ struct SDataFReader { TdFilePtr pDataFD; TdFilePtr pLastFD; TdFilePtr pSmaFD; + + uint8_t *aBuf[3]; }; int32_t tsdbDataFReaderOpen(SDataFReader **ppReader, STsdb *pTsdb, SDFileSet *pSet) { @@ -523,6 +485,10 @@ int32_t tsdbDataFReaderClose(SDataFReader **ppReader) { goto _err; } + for (int32_t iBuf = 0; iBuf < sizeof((*ppReader)->aBuf) / sizeof(uint8_t *); iBuf++) { + tFree((*ppReader)->aBuf[iBuf]); + } + taosMemoryFree(*ppReader); _exit: @@ -534,19 +500,20 @@ _err: return code; } -int32_t tsdbReadBlockIdx(SDataFReader *pReader, SArray *aBlockIdx, uint8_t **ppBuf) { - int32_t code = 0; - int64_t offset = pReader->pSet->pHeadF->offset; - int64_t size = pReader->pSet->pHeadF->size - offset; - uint8_t *pBuf = NULL; - int64_t n; - uint32_t delimiter; - SBlockIdx blockIdx; +int32_t tsdbReadBlockIdx(SDataFReader *pReader, SArray *aBlockIdx) { + int32_t code = 0; + int64_t offset = pReader->pSet->pHeadF->offset; + int64_t size = pReader->pSet->pHeadF->size - offset; + int64_t n; + uint32_t delimiter; - if (!ppBuf) ppBuf = &pBuf; + taosArrayClear(aBlockIdx); + if (size == 0) { + goto _exit; + } // alloc - code = tRealloc(ppBuf, size); + code = tRealloc(&pReader->aBuf[0], size); if (code) goto _err; // seek @@ -556,7 +523,7 @@ int32_t tsdbReadBlockIdx(SDataFReader *pReader, SArray *aBlockIdx, uint8_t **ppB } // read - n = taosReadFile(pReader->pHeadFD, *ppBuf, size); + n = taosReadFile(pReader->pHeadFD, pReader->aBuf[0], size); if (n < 0) { code = TAOS_SYSTEM_ERROR(errno); goto _err; @@ -566,19 +533,19 @@ int32_t tsdbReadBlockIdx(SDataFReader *pReader, SArray *aBlockIdx, uint8_t **ppB } // check - if (!taosCheckChecksumWhole(*ppBuf, size)) { + if (!taosCheckChecksumWhole(pReader->aBuf[0], size)) { code = TSDB_CODE_FILE_CORRUPTED; goto _err; } // decode n = 0; - n = tGetU32(*ppBuf + n, &delimiter); + n = tGetU32(pReader->aBuf[0] + n, &delimiter); ASSERT(delimiter == TSDB_FILE_DLMT); - taosArrayClear(aBlockIdx); while (n < size - sizeof(TSCKSUM)) { - n += tGetBlockIdx(*ppBuf + n, &blockIdx); + SBlockIdx blockIdx; + n += tGetBlockIdx(pReader->aBuf[0] + n, &blockIdx); if (taosArrayPush(aBlockIdx, &blockIdx) == NULL) { code = TSDB_CODE_OUT_OF_MEMORY; @@ -588,28 +555,86 @@ int32_t tsdbReadBlockIdx(SDataFReader *pReader, SArray *aBlockIdx, uint8_t **ppB ASSERT(n + sizeof(TSCKSUM) == size); - tFree(pBuf); +_exit: return code; _err: tsdbError("vgId:%d, read block idx failed since %s", TD_VID(pReader->pTsdb->pVnode), tstrerror(code)); - tFree(pBuf); return code; } -int32_t tsdbReadBlock(SDataFReader *pReader, SBlockIdx *pBlockIdx, SMapData *mBlock, uint8_t **ppBuf) { - int32_t code = 0; - int64_t offset = pBlockIdx->offset; - int64_t size = pBlockIdx->size; - uint8_t *pBuf = NULL; - int64_t n; - int64_t tn; - SBlockDataHdr hdr; +int32_t tsdbReadBlockL(SDataFReader *pReader, SArray *aBlockL) { + int32_t code = 0; + int64_t offset = pReader->pSet->pLastF->offset; + int64_t size = pReader->pSet->pLastF->size - offset; + int64_t n; + uint32_t delimiter; - if (!ppBuf) ppBuf = &pBuf; + taosArrayClear(aBlockL); + if (size == 0) { + goto _exit; + } // alloc - code = tRealloc(ppBuf, size); + code = tRealloc(&pReader->aBuf[0], size); + if (code) goto _err; + + // seek + if (taosLSeekFile(pReader->pLastFD, offset, SEEK_SET) < 0) { + code = TAOS_SYSTEM_ERROR(errno); + goto _err; + } + + // read + n = taosReadFile(pReader->pLastFD, pReader->aBuf[0], size); + if (n < 0) { + code = TAOS_SYSTEM_ERROR(errno); + goto _err; + } else if (n < size) { + code = TSDB_CODE_FILE_CORRUPTED; + goto _err; + } + + // check + if (!taosCheckChecksumWhole(pReader->aBuf[0], size)) { + code = TSDB_CODE_FILE_CORRUPTED; + goto _err; + } + + // decode + n = 0; + n = tGetU32(pReader->aBuf[0] + n, &delimiter); + ASSERT(delimiter == TSDB_FILE_DLMT); + + while (n < size - sizeof(TSCKSUM)) { + SBlockL blockl; + n += tGetBlockL(pReader->aBuf[0] + n, &blockl); + + if (taosArrayPush(aBlockL, &blockl) == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _err; + } + } + + ASSERT(n + sizeof(TSCKSUM) == size); + +_exit: + return code; + +_err: + tsdbError("vgId:%d read blockl failed since %s", TD_VID(pReader->pTsdb->pVnode), tstrerror(code)); + return code; +} + +int32_t tsdbReadBlock(SDataFReader *pReader, SBlockIdx *pBlockIdx, SMapData *mBlock) { + int32_t code = 0; + int64_t offset = pBlockIdx->offset; + int64_t size = pBlockIdx->size; + int64_t n; + int64_t tn; + + // alloc + code = tRealloc(&pReader->aBuf[0], size); if (code) goto _err; // seek @@ -619,7 +644,7 @@ int32_t tsdbReadBlock(SDataFReader *pReader, SBlockIdx *pBlockIdx, SMapData *mBl } // read - n = taosReadFile(pReader->pHeadFD, *ppBuf, size); + n = taosReadFile(pReader->pHeadFD, pReader->aBuf[0], size); if (n < 0) { code = TAOS_SYSTEM_ERROR(errno); goto _err; @@ -629,19 +654,19 @@ int32_t tsdbReadBlock(SDataFReader *pReader, SBlockIdx *pBlockIdx, SMapData *mBl } // check - if (!taosCheckChecksumWhole(*ppBuf, size)) { + if (!taosCheckChecksumWhole(pReader->aBuf[0], size)) { code = TSDB_CODE_FILE_CORRUPTED; goto _err; } // decode - hdr = *(SBlockDataHdr *)(*ppBuf); - ASSERT(hdr.delimiter == TSDB_FILE_DLMT); - ASSERT(hdr.suid == pBlockIdx->suid); - ASSERT(hdr.uid == pBlockIdx->uid); + n = 0; - n = sizeof(hdr); - tn = tGetMapData(*ppBuf + n, mBlock); + uint32_t delimiter; + n += tGetU32(pReader->aBuf[0] + n, &delimiter); + ASSERT(delimiter == TSDB_FILE_DLMT); + + tn = tGetMapData(pReader->aBuf[0] + n, mBlock); if (tn < 0) { code = TSDB_CODE_OUT_OF_MEMORY; goto _err; @@ -649,535 +674,38 @@ int32_t tsdbReadBlock(SDataFReader *pReader, SBlockIdx *pBlockIdx, SMapData *mBl n += tn; ASSERT(n + sizeof(TSCKSUM) == size); - tFree(pBuf); return code; _err: tsdbError("vgId:%d, read block failed since %s", TD_VID(pReader->pTsdb->pVnode), tstrerror(code)); - tFree(pBuf); return code; } -static int32_t tsdbReadBlockDataKey(SBlockData *pBlockData, SSubBlock *pSubBlock, uint8_t *pBuf, uint8_t **ppBuf) { - int32_t code = 0; - int64_t size = pSubBlock->szVersion + pSubBlock->szTSKEY + sizeof(TSCKSUM); - int64_t n; +int32_t tsdbReadBlockSma(SDataFReader *pReader, SBlock *pBlock, SArray *aColumnDataAgg) { + int32_t code = 0; + SSmaInfo *pSmaInfo = &pBlock->smaInfo; - if (!taosCheckChecksumWhole(pBuf, size)) { - code = TSDB_CODE_FILE_CORRUPTED; - goto _err; - } + ASSERT(pSmaInfo->size > 0); - code = tRealloc((uint8_t **)&pBlockData->aVersion, sizeof(int64_t) * pSubBlock->nRow); - if (code) goto _err; - code = tRealloc((uint8_t **)&pBlockData->aTSKEY, sizeof(TSKEY) * pSubBlock->nRow); - if (code) goto _err; + taosArrayClear(aColumnDataAgg); - if (pSubBlock->cmprAlg == NO_COMPRESSION) { - ASSERT(pSubBlock->szVersion == sizeof(int64_t) * pSubBlock->nRow); - ASSERT(pSubBlock->szTSKEY == sizeof(TSKEY) * pSubBlock->nRow); - - // VERSION - memcpy(pBlockData->aVersion, pBuf, pSubBlock->szVersion); - - // TSKEY - memcpy(pBlockData->aTSKEY, pBuf + pSubBlock->szVersion, pSubBlock->szTSKEY); - } else { - size = sizeof(int64_t) * pSubBlock->nRow + COMP_OVERFLOW_BYTES; - if (pSubBlock->cmprAlg == TWO_STAGE_COMP) { - code = tRealloc(ppBuf, size); - if (code) goto _err; - } - - // VERSION - n = tsDecompressBigint(pBuf, pSubBlock->szVersion, pSubBlock->nRow, (char *)pBlockData->aVersion, - sizeof(int64_t) * pSubBlock->nRow, pSubBlock->cmprAlg, *ppBuf, size); - if (n < 0) { - code = TSDB_CODE_COMPRESS_ERROR; - goto _err; - } - - // TSKEY - n = tsDecompressTimestamp(pBuf + pSubBlock->szVersion, pSubBlock->szTSKEY, pSubBlock->nRow, - (char *)pBlockData->aTSKEY, sizeof(TSKEY) * pSubBlock->nRow, pSubBlock->cmprAlg, *ppBuf, - size); - if (n < 0) { - code = TSDB_CODE_COMPRESS_ERROR; - goto _err; - } - } - - return code; - -_err: - return code; -} - -static int32_t tsdbReadColDataImpl(SSubBlock *pSubBlock, SBlockCol *pBlockCol, SColData *pColData, uint8_t *pBuf, - uint8_t **ppBuf) { - int32_t code = 0; - int64_t size; - int64_t n; - - if (!taosCheckChecksumWhole(pBuf, pBlockCol->szBitmap + pBlockCol->szOffset + pBlockCol->szValue + sizeof(TSCKSUM))) { - code = TSDB_CODE_FILE_CORRUPTED; - goto _err; - } - - pColData->nVal = pSubBlock->nRow; - pColData->flag = pBlockCol->flag; - - // BITMAP - if (pBlockCol->flag != HAS_VALUE) { - ASSERT(pBlockCol->szBitmap); - - size = BIT2_SIZE(pColData->nVal); - code = tRealloc(&pColData->pBitMap, size); - if (code) goto _err; - - code = tRealloc(ppBuf, size + COMP_OVERFLOW_BYTES); - if (code) goto _err; - - n = tsDecompressTinyint(pBuf, pBlockCol->szBitmap, size, pColData->pBitMap, size, TWO_STAGE_COMP, *ppBuf, - size + COMP_OVERFLOW_BYTES); - if (n <= 0) { - code = TSDB_CODE_COMPRESS_ERROR; - goto _err; - } - - ASSERT(n == size); - } else { - ASSERT(pBlockCol->szBitmap == 0); - } - pBuf = pBuf + pBlockCol->szBitmap; - - // OFFSET - if (IS_VAR_DATA_TYPE(pColData->type)) { - ASSERT(pBlockCol->szOffset); - - size = sizeof(int32_t) * pColData->nVal; - code = tRealloc((uint8_t **)&pColData->aOffset, size); - if (code) goto _err; - - code = tRealloc(ppBuf, size + COMP_OVERFLOW_BYTES); - if (code) goto _err; - - n = tsDecompressInt(pBuf, pBlockCol->szOffset, pColData->nVal, (char *)pColData->aOffset, size, TWO_STAGE_COMP, - *ppBuf, size + COMP_OVERFLOW_BYTES); - if (n <= 0) { - code = TSDB_CODE_COMPRESS_ERROR; - goto _err; - } - - ASSERT(n == size); - } else { - ASSERT(pBlockCol->szOffset == 0); - } - pBuf = pBuf + pBlockCol->szOffset; - - // VALUE - pColData->nData = pBlockCol->szOrigin; - - code = tRealloc(&pColData->pData, pColData->nData); - if (code) goto _err; - - if (pSubBlock->cmprAlg == NO_COMPRESSION) { - memcpy(pColData->pData, pBuf, pColData->nData); - } else { - if (pSubBlock->cmprAlg == TWO_STAGE_COMP) { - code = tRealloc(ppBuf, pColData->nData + COMP_OVERFLOW_BYTES); - if (code) goto _err; - } - - n = tDataTypes[pBlockCol->type].decompFunc(pBuf, pBlockCol->szValue, pSubBlock->nRow, pColData->pData, - pColData->nData, pSubBlock->cmprAlg, *ppBuf, - pColData->nData + COMP_OVERFLOW_BYTES); - if (n < 0) { - code = TSDB_CODE_COMPRESS_ERROR; - goto _err; - } - - ASSERT(n == pColData->nData); - } - - return code; - -_err: - return code; -} - -static int32_t tsdbReadBlockCol(SSubBlock *pSubBlock, uint8_t *p, SArray *aBlockCol) { - int32_t code = 0; - int32_t n = 0; - SBlockCol blockCol; - SBlockCol *pBlockCol = &blockCol; - - if (!taosCheckChecksumWhole(p, pSubBlock->szBlockCol + sizeof(TSCKSUM))) { - code = TSDB_CODE_FILE_CORRUPTED; - goto _err; - } - - n += sizeof(SBlockDataHdr); - while (n < pSubBlock->szBlockCol) { - n += tGetBlockCol(p + n, pBlockCol); - - if (taosArrayPush(aBlockCol, pBlockCol) == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - goto _err; - } - } - - ASSERT(n == pSubBlock->szBlockCol); - - return code; - -_err: - return code; -} - -static int32_t tsdbReadSubColData(SDataFReader *pReader, SBlockIdx *pBlockIdx, SBlock *pBlock, int32_t iSubBlock, - int16_t *aColId, int32_t nCol, SBlockData *pBlockData, uint8_t **ppBuf1, - uint8_t **ppBuf2) { - TdFilePtr pFD = pBlock->last ? pReader->pLastFD : pReader->pDataFD; - SSubBlock *pSubBlock = &pBlock->aSubBlock[iSubBlock]; - SArray *aBlockCol = NULL; - int32_t code = 0; - int64_t offset; - int64_t size; - int64_t n; - - tBlockDataReset(pBlockData); - pBlockData->nRow = pSubBlock->nRow; - - // TSDBKEY and SBlockCol - if (nCol == 1) { - offset = pSubBlock->offset + pSubBlock->szBlockCol + sizeof(TSCKSUM); - size = pSubBlock->szVersion + pSubBlock->szTSKEY + sizeof(TSCKSUM); - } else { - offset = pSubBlock->offset; - size = pSubBlock->szBlockCol + sizeof(TSCKSUM) + pSubBlock->szVersion + pSubBlock->szTSKEY + sizeof(TSCKSUM); - } - - code = tRealloc(ppBuf1, size); - if (code) goto _err; - - n = taosLSeekFile(pFD, offset, SEEK_SET); - if (n < 0) { - code = TAOS_SYSTEM_ERROR(errno); - goto _err; - } - - n = taosReadFile(pFD, *ppBuf1, size); - if (n < 0) { - code = TAOS_SYSTEM_ERROR(errno); - goto _err; - } else if (n < size) { - code = TSDB_CODE_FILE_CORRUPTED; - goto _err; - } - - if (nCol == 1) { - code = tsdbReadBlockDataKey(pBlockData, pSubBlock, *ppBuf1, ppBuf2); - if (code) goto _err; - - goto _exit; - } else { - aBlockCol = taosArrayInit(0, sizeof(SBlockCol)); - if (aBlockCol == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - goto _err; - } - - code = tsdbReadBlockCol(pSubBlock, *ppBuf1, aBlockCol); - if (code) goto _err; - - code = tsdbReadBlockDataKey(pBlockData, pSubBlock, *ppBuf1 + pSubBlock->szBlockCol + sizeof(TSCKSUM), ppBuf2); - if (code) goto _err; - } - - for (int32_t iCol = 1; iCol < nCol; iCol++) { - void *p = taosArraySearch(aBlockCol, &(SBlockCol){.cid = aColId[iCol]}, tBlockColCmprFn, TD_EQ); - - if (p) { - SBlockCol *pBlockCol = (SBlockCol *)p; - SColData *pColData; - - ASSERT(pBlockCol->flag && pBlockCol->flag != HAS_NONE); - - code = tBlockDataAddColData(pBlockData, taosArrayGetSize(pBlockData->aIdx), &pColData); - if (code) goto _err; - - tColDataInit(pColData, pBlockCol->cid, pBlockCol->type, pBlockCol->smaOn); - if (pBlockCol->flag == HAS_NULL) { - for (int32_t iRow = 0; iRow < pSubBlock->nRow; iRow++) { - code = tColDataAppendValue(pColData, &COL_VAL_NULL(pBlockCol->cid, pBlockCol->type)); - if (code) goto _err; - } - } else { - offset = pSubBlock->offset + pSubBlock->szBlockCol + sizeof(TSCKSUM) + pSubBlock->szVersion + - pSubBlock->szTSKEY + sizeof(TSCKSUM) + pBlockCol->offset; - size = pBlockCol->szBitmap + pBlockCol->szOffset + pBlockCol->szValue + sizeof(TSCKSUM); - - code = tRealloc(ppBuf1, size); - if (code) goto _err; - - // seek - n = taosLSeekFile(pFD, offset, SEEK_SET); - if (n < 0) { - code = TAOS_SYSTEM_ERROR(errno); - goto _err; - } - - // read - n = taosReadFile(pFD, *ppBuf1, size); - if (n < 0) { - code = TAOS_SYSTEM_ERROR(errno); - goto _err; - } else if (n < size) { - code = TSDB_CODE_FILE_CORRUPTED; - goto _err; - } - - code = tsdbReadColDataImpl(pSubBlock, pBlockCol, pColData, *ppBuf1, ppBuf2); - if (code) goto _err; - } - } - } - -_exit: - taosArrayDestroy(aBlockCol); - return code; - -_err: - taosArrayDestroy(aBlockCol); - return code; -} - -int32_t tsdbReadColData(SDataFReader *pReader, SBlockIdx *pBlockIdx, SBlock *pBlock, int16_t *aColId, int32_t nCol, - SBlockData *pBlockData, uint8_t **ppBuf1, uint8_t **ppBuf2) { - int32_t code = 0; - uint8_t *pBuf1 = NULL; - uint8_t *pBuf2 = NULL; - - ASSERT(aColId[0] == PRIMARYKEY_TIMESTAMP_COL_ID); - - if (!ppBuf1) ppBuf1 = &pBuf1; - if (!ppBuf2) ppBuf2 = &pBuf2; - - code = tsdbReadSubColData(pReader, pBlockIdx, pBlock, 0, aColId, nCol, pBlockData, ppBuf1, ppBuf2); - if (code) goto _err; - - if (pBlock->nSubBlock > 1) { - SBlockData *pBlockData1 = &(SBlockData){0}; - SBlockData *pBlockData2 = &(SBlockData){0}; - - tBlockDataInit(pBlockData1); - tBlockDataInit(pBlockData2); - for (int32_t iSubBlock = 1; iSubBlock < pBlock->nSubBlock; iSubBlock++) { - code = tsdbReadSubColData(pReader, pBlockIdx, pBlock, iSubBlock, aColId, nCol, pBlockData1, ppBuf1, ppBuf2); - if (code) goto _err; - - code = tBlockDataCopy(pBlockData, pBlockData2); - if (code) { - tBlockDataClear(pBlockData1, 1); - tBlockDataClear(pBlockData2, 1); - goto _err; - } - - code = tBlockDataMerge(pBlockData1, pBlockData2, pBlockData); - if (code) { - tBlockDataClear(pBlockData1, 1); - tBlockDataClear(pBlockData2, 1); - goto _err; - } - } - - tBlockDataClear(pBlockData1, 1); - tBlockDataClear(pBlockData2, 1); - } - - tFree(pBuf1); - tFree(pBuf2); - return code; - -_err: - tsdbError("vgId:%d, tsdb read col data failed since %s", TD_VID(pReader->pTsdb->pVnode), tstrerror(code)); - tFree(pBuf1); - tFree(pBuf2); - return code; -} - -static int32_t tsdbReadSubBlockData(SDataFReader *pReader, SBlockIdx *pBlockIdx, SBlock *pBlock, int32_t iSubBlock, - SBlockData *pBlockData, uint8_t **ppBuf1, uint8_t **ppBuf2) { - int32_t code = 0; - uint8_t *p; - int64_t size; - int64_t n; - TdFilePtr pFD = pBlock->last ? pReader->pLastFD : pReader->pDataFD; - SSubBlock *pSubBlock = &pBlock->aSubBlock[iSubBlock]; - SArray *aBlockCol = NULL; - - tBlockDataReset(pBlockData); - - // realloc - code = tRealloc(ppBuf1, pSubBlock->szBlock); + // alloc + int32_t size = pSmaInfo->size + sizeof(TSCKSUM); + code = tRealloc(&pReader->aBuf[0], size); if (code) goto _err; // seek - n = taosLSeekFile(pFD, pSubBlock->offset, SEEK_SET); + int64_t n = taosLSeekFile(pReader->pSmaFD, pSmaInfo->offset, SEEK_SET); if (n < 0) { code = TAOS_SYSTEM_ERROR(errno); goto _err; - } - - // read - n = taosReadFile(pFD, *ppBuf1, pSubBlock->szBlock); - if (n < 0) { - code = TAOS_SYSTEM_ERROR(errno); - goto _err; - } else if (n < pSubBlock->szBlock) { + } else if (n < pSmaInfo->offset) { code = TSDB_CODE_FILE_CORRUPTED; goto _err; } - pBlockData->nRow = pSubBlock->nRow; - - // TSDBKEY - p = *ppBuf1 + pSubBlock->szBlockCol + sizeof(TSCKSUM); - code = tsdbReadBlockDataKey(pBlockData, pSubBlock, p, ppBuf2); - if (code) goto _err; - - // COLUMNS - aBlockCol = taosArrayInit(0, sizeof(SBlockCol)); - if (aBlockCol == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - goto _err; - } - - code = tsdbReadBlockCol(pSubBlock, *ppBuf1, aBlockCol); - if (code) goto _err; - - for (int32_t iBlockCol = 0; iBlockCol < taosArrayGetSize(aBlockCol); iBlockCol++) { - SColData *pColData; - SBlockCol *pBlockCol = (SBlockCol *)taosArrayGet(aBlockCol, iBlockCol); - - ASSERT(pBlockCol->flag && pBlockCol->flag != HAS_NONE); - - code = tBlockDataAddColData(pBlockData, iBlockCol, &pColData); - if (code) goto _err; - - tColDataInit(pColData, pBlockCol->cid, pBlockCol->type, pBlockCol->smaOn); - if (pBlockCol->flag == HAS_NULL) { - for (int32_t iRow = 0; iRow < pSubBlock->nRow; iRow++) { - code = tColDataAppendValue(pColData, &COL_VAL_NULL(pBlockCol->cid, pBlockCol->type)); - if (code) goto _err; - } - } else { - p = *ppBuf1 + pSubBlock->szBlockCol + sizeof(TSCKSUM) + pSubBlock->szVersion + pSubBlock->szTSKEY + - sizeof(TSCKSUM) + pBlockCol->offset; - code = tsdbReadColDataImpl(pSubBlock, pBlockCol, pColData, p, ppBuf2); - if (code) goto _err; - } - } - - taosArrayDestroy(aBlockCol); - return code; - -_err: - tsdbError("vgId:%d, tsdb read sub block data failed since %s", TD_VID(pReader->pTsdb->pVnode), tstrerror(code)); - taosArrayDestroy(aBlockCol); - return code; -} - -int32_t tsdbReadBlockData(SDataFReader *pReader, SBlockIdx *pBlockIdx, SBlock *pBlock, SBlockData *pBlockData, - uint8_t **ppBuf1, uint8_t **ppBuf2) { - int32_t code = 0; - TdFilePtr pFD = pBlock->last ? pReader->pLastFD : pReader->pDataFD; - uint8_t *pBuf1 = NULL; - uint8_t *pBuf2 = NULL; - int32_t iSubBlock; - - if (!ppBuf1) ppBuf1 = &pBuf1; - if (!ppBuf2) ppBuf2 = &pBuf2; - - // read the first sub-block - iSubBlock = 0; - code = tsdbReadSubBlockData(pReader, pBlockIdx, pBlock, iSubBlock, pBlockData, ppBuf1, ppBuf2); - if (code) goto _err; - - // read remain block data and do merg - if (pBlock->nSubBlock > 1) { - SBlockData *pBlockData1 = &(SBlockData){0}; - SBlockData *pBlockData2 = &(SBlockData){0}; - - tBlockDataInit(pBlockData1); - tBlockDataInit(pBlockData2); - for (iSubBlock = 1; iSubBlock < pBlock->nSubBlock; iSubBlock++) { - code = tsdbReadSubBlockData(pReader, pBlockIdx, pBlock, iSubBlock, pBlockData1, ppBuf1, ppBuf2); - if (code) { - tBlockDataClear(pBlockData1, 1); - tBlockDataClear(pBlockData2, 1); - goto _err; - } - - code = tBlockDataCopy(pBlockData, pBlockData2); - if (code) { - tBlockDataClear(pBlockData1, 1); - tBlockDataClear(pBlockData2, 1); - goto _err; - } - - // merge two block data - code = tBlockDataMerge(pBlockData1, pBlockData2, pBlockData); - if (code) { - tBlockDataClear(pBlockData1, 1); - tBlockDataClear(pBlockData2, 1); - goto _err; - } - } - - tBlockDataClear(pBlockData1, 1); - tBlockDataClear(pBlockData2, 1); - } - - ASSERT(pBlock->nRow == pBlockData->nRow); - ASSERT(tsdbKeyCmprFn(&pBlock->minKey, &TSDBROW_KEY(&tBlockDataFirstRow(pBlockData))) == 0); - ASSERT(tsdbKeyCmprFn(&pBlock->maxKey, &TSDBROW_KEY(&tBlockDataLastRow(pBlockData))) == 0); - - if (pBuf1) tFree(pBuf1); - if (pBuf2) tFree(pBuf2); - return code; - -_err: - tsdbError("vgId:%d, tsdb read block data failed since %s", TD_VID(pReader->pTsdb->pVnode), tstrerror(code)); - if (pBuf1) tFree(pBuf1); - if (pBuf2) tFree(pBuf2); - return code; -} - -int32_t tsdbReadBlockSma(SDataFReader *pReader, SBlock *pBlock, SArray *aColumnDataAgg, uint8_t **ppBuf) { - int32_t code = 0; - TdFilePtr pFD = pReader->pSmaFD; - int64_t offset = pBlock->aSubBlock[0].sOffset; - int64_t size = pBlock->aSubBlock[0].nSma * sizeof(SColumnDataAgg) + sizeof(TSCKSUM); - uint8_t *pBuf = NULL; - int64_t n; - - ASSERT(tBlockHasSma(pBlock)); - - if (!ppBuf) ppBuf = &pBuf; - code = tRealloc(ppBuf, size); - if (code) goto _err; - - // lseek - n = taosLSeekFile(pFD, offset, SEEK_SET); - if (n < 0) { - code = TAOS_SYSTEM_ERROR(errno); - goto _err; - } - // read - n = taosReadFile(pFD, *ppBuf, size); + n = taosReadFile(pReader->pSmaFD, pReader->aBuf[0], size); if (n < 0) { code = TAOS_SYSTEM_ERROR(errno); goto _err; @@ -1187,26 +715,202 @@ int32_t tsdbReadBlockSma(SDataFReader *pReader, SBlock *pBlock, SArray *aColumnD } // check - if (!taosCheckChecksumWhole(*ppBuf, size)) { + if (!taosCheckChecksumWhole(pReader->aBuf[0], size)) { code = TSDB_CODE_FILE_CORRUPTED; goto _err; } // decode - taosArrayClear(aColumnDataAgg); - for (int32_t iSma = 0; iSma < pBlock->aSubBlock[0].nSma; iSma++) { - if (taosArrayPush(aColumnDataAgg, &((SColumnDataAgg *)(*ppBuf))[iSma]) == NULL) { + n = 0; + while (n < pSmaInfo->size) { + SColumnDataAgg sma; + + n += tGetColumnDataAgg(pReader->aBuf[0] + n, &sma); + if (taosArrayPush(aColumnDataAgg, &sma) == NULL) { code = TSDB_CODE_OUT_OF_MEMORY; goto _err; } } - tFree(pBuf); return code; _err: - tsdbError("vgId:%d, read block sma failed since %s", TD_VID(pReader->pTsdb->pVnode), tstrerror(code)); - tFree(pBuf); + tsdbError("vgId:%d tsdb read block sma failed since %s", TD_VID(pReader->pTsdb->pVnode), tstrerror(code)); + return code; +} + +static int32_t tsdbReadBlockDataImpl(SDataFReader *pReader, SBlockInfo *pBlkInfo, int8_t fromLast, + SBlockData *pBlockData) { + int32_t code = 0; + + tBlockDataClear(pBlockData); + + TdFilePtr pFD = fromLast ? pReader->pLastFD : pReader->pDataFD; + + // uid + version + tskey + code = tsdbReadAndCheck(pFD, pBlkInfo->offset, &pReader->aBuf[0], pBlkInfo->szKey, 1); + if (code) goto _err; + SDiskDataHdr hdr; + uint8_t *p = pReader->aBuf[0] + tGetDiskDataHdr(pReader->aBuf[0], &hdr); + + ASSERT(hdr.delimiter == TSDB_FILE_DLMT); + ASSERT(pBlockData->suid == hdr.suid); + ASSERT(pBlockData->uid == hdr.uid); + + pBlockData->nRow = hdr.nRow; + + // uid + if (hdr.uid == 0) { + ASSERT(hdr.szUid); + code = tsdbDecmprData(p, hdr.szUid, TSDB_DATA_TYPE_BIGINT, hdr.cmprAlg, (uint8_t **)&pBlockData->aUid, + sizeof(int64_t) * hdr.nRow, &pReader->aBuf[1]); + if (code) goto _err; + } else { + ASSERT(!hdr.szUid); + } + p += hdr.szUid; + + // version + code = tsdbDecmprData(p, hdr.szVer, TSDB_DATA_TYPE_BIGINT, hdr.cmprAlg, (uint8_t **)&pBlockData->aVersion, + sizeof(int64_t) * hdr.nRow, &pReader->aBuf[1]); + if (code) goto _err; + p += hdr.szVer; + + // TSKEY + code = tsdbDecmprData(p, hdr.szKey, TSDB_DATA_TYPE_TIMESTAMP, hdr.cmprAlg, (uint8_t **)&pBlockData->aTSKEY, + sizeof(TSKEY) * hdr.nRow, &pReader->aBuf[1]); + if (code) goto _err; + p += hdr.szKey; + + ASSERT(p - pReader->aBuf[0] == pBlkInfo->szKey - sizeof(TSCKSUM)); + + // read and decode columns + if (taosArrayGetSize(pBlockData->aIdx) == 0) goto _exit; + + if (hdr.szBlkCol > 0) { + int64_t offset = pBlkInfo->offset + pBlkInfo->szKey; + code = tsdbReadAndCheck(pFD, offset, &pReader->aBuf[0], hdr.szBlkCol + sizeof(TSCKSUM), 1); + if (code) goto _err; + } + + SBlockCol blockCol = {.cid = 0}; + SBlockCol *pBlockCol = &blockCol; + int32_t n = 0; + + for (int32_t iColData = 0; iColData < taosArrayGetSize(pBlockData->aIdx); iColData++) { + SColData *pColData = tBlockDataGetColDataByIdx(pBlockData, iColData); + + while (pBlockCol && pBlockCol->cid < pColData->cid) { + if (n < hdr.szBlkCol) { + n += tGetBlockCol(pReader->aBuf[0] + n, pBlockCol); + } else { + ASSERT(n == hdr.szBlkCol); + pBlockCol = NULL; + } + } + + if (pBlockCol == NULL || pBlockCol->cid > pColData->cid) { + // add a lot of NONE + for (int32_t iRow = 0; iRow < hdr.nRow; iRow++) { + code = tColDataAppendValue(pColData, &COL_VAL_NONE(pColData->cid, pColData->type)); + if (code) goto _err; + } + } else { + ASSERT(pBlockCol->type == pColData->type); + ASSERT(pBlockCol->flag && pBlockCol->flag != HAS_NONE); + + if (pBlockCol->flag == HAS_NULL) { + // add a lot of NULL + for (int32_t iRow = 0; iRow < hdr.nRow; iRow++) { + code = tColDataAppendValue(pColData, &COL_VAL_NULL(pBlockCol->cid, pBlockCol->type)); + if (code) goto _err; + } + } else { + // decode from binary + int64_t offset = pBlkInfo->offset + pBlkInfo->szKey + hdr.szBlkCol + sizeof(TSCKSUM) + pBlockCol->offset; + int32_t size = pBlockCol->szBitmap + pBlockCol->szOffset + pBlockCol->szValue + sizeof(TSCKSUM); + + code = tsdbReadAndCheck(pFD, offset, &pReader->aBuf[1], size, 0); + if (code) goto _err; + + code = tsdbDecmprColData(pReader->aBuf[1], pBlockCol, hdr.cmprAlg, hdr.nRow, pColData, &pReader->aBuf[2]); + if (code) goto _err; + } + } + } + +_exit: + return code; + +_err: + tsdbError("vgId:%d tsdb read block data impl failed since %s", TD_VID(pReader->pTsdb->pVnode), tstrerror(code)); + return code; +} + +int32_t tsdbReadDataBlock(SDataFReader *pReader, SBlock *pBlock, SBlockData *pBlockData) { + int32_t code = 0; + + code = tsdbReadBlockDataImpl(pReader, &pBlock->aSubBlock[0], 0, pBlockData); + if (code) goto _err; + + if (pBlock->nSubBlock > 1) { + SBlockData bData1; + SBlockData bData2; + + // create + code = tBlockDataCreate(&bData1); + if (code) goto _err; + code = tBlockDataCreate(&bData2); + if (code) goto _err; + + // init + tBlockDataInitEx(&bData1, pBlockData); + tBlockDataInitEx(&bData2, pBlockData); + + for (int32_t iSubBlock = 1; iSubBlock < pBlock->nSubBlock; iSubBlock++) { + code = tsdbReadBlockDataImpl(pReader, &pBlock->aSubBlock[iSubBlock], 0, &bData1); + if (code) { + tBlockDataDestroy(&bData1, 1); + tBlockDataDestroy(&bData2, 1); + goto _err; + } + + code = tBlockDataCopy(pBlockData, &bData2); + if (code) { + tBlockDataDestroy(&bData1, 1); + tBlockDataDestroy(&bData2, 1); + goto _err; + } + + code = tBlockDataMerge(&bData1, &bData2, pBlockData); + if (code) { + tBlockDataDestroy(&bData1, 1); + tBlockDataDestroy(&bData2, 1); + goto _err; + } + } + + tBlockDataDestroy(&bData1, 1); + tBlockDataDestroy(&bData2, 1); + } + + return code; + +_err: + tsdbError("vgId:%d tsdb read data block failed since %s", TD_VID(pReader->pTsdb->pVnode), tstrerror(code)); + return code; +} + +int32_t tsdbReadLastBlock(SDataFReader *pReader, SBlockL *pBlockL, SBlockData *pBlockData) { + int32_t code = 0; + + code = tsdbReadBlockDataImpl(pReader, &pBlockL->bInfo, 1, pBlockData); + if (code) goto _err; + + return code; + +_err: + tsdbError("vgId:%d tsdb read last block failed since %s", TD_VID(pReader->pTsdb->pVnode), tstrerror(code)); return code; } @@ -1225,6 +929,7 @@ int32_t tsdbDataFWriterOpen(SDataFWriter **ppWriter, STsdb *pTsdb, SDFileSet *pS code = TSDB_CODE_OUT_OF_MEMORY; goto _err; } + if (code) goto _err; pWriter->pTsdb = pTsdb; pWriter->wSet = (SDFileSet){.diskId = pSet->diskId, .fid = pSet->fid, @@ -1357,10 +1062,11 @@ _err: int32_t tsdbDataFWriterClose(SDataFWriter **ppWriter, int8_t sync) { int32_t code = 0; - STsdb *pTsdb = (*ppWriter)->pTsdb; + STsdb *pTsdb = NULL; if (*ppWriter == NULL) goto _exit; + pTsdb = (*ppWriter)->pTsdb; if (sync) { if (taosFsyncFile((*ppWriter)->pHeadFD) < 0) { code = TAOS_SYSTEM_ERROR(errno); @@ -1403,6 +1109,9 @@ int32_t tsdbDataFWriterClose(SDataFWriter **ppWriter, int8_t sync) { goto _err; } + for (int32_t iBuf = 0; iBuf < sizeof((*ppWriter)->aBuf) / sizeof(uint8_t *); iBuf++) { + tFree((*ppWriter)->aBuf[iBuf]); + } taosMemoryFree(*ppWriter); _exit: *ppWriter = NULL; @@ -1493,38 +1202,41 @@ _err: return code; } -int32_t tsdbWriteBlockIdx(SDataFWriter *pWriter, SArray *aBlockIdx, uint8_t **ppBuf) { +int32_t tsdbWriteBlockIdx(SDataFWriter *pWriter, SArray *aBlockIdx) { int32_t code = 0; SHeadFile *pHeadFile = &pWriter->fHead; - uint8_t *pBuf = NULL; - int64_t size; + int64_t size = 0; int64_t n; - if (!ppBuf) ppBuf = &pBuf; + // check + if (taosArrayGetSize(aBlockIdx) == 0) { + pHeadFile->offset = pHeadFile->size; + goto _exit; + } // prepare - size = tPutU32(NULL, TSDB_FILE_DLMT); + size = sizeof(uint32_t); for (int32_t iBlockIdx = 0; iBlockIdx < taosArrayGetSize(aBlockIdx); iBlockIdx++) { size += tPutBlockIdx(NULL, taosArrayGet(aBlockIdx, iBlockIdx)); } size += sizeof(TSCKSUM); // alloc - code = tRealloc(ppBuf, size); + code = tRealloc(&pWriter->aBuf[0], size); if (code) goto _err; // build n = 0; - n = tPutU32(*ppBuf + n, TSDB_FILE_DLMT); + n = tPutU32(pWriter->aBuf[0] + n, TSDB_FILE_DLMT); for (int32_t iBlockIdx = 0; iBlockIdx < taosArrayGetSize(aBlockIdx); iBlockIdx++) { - n += tPutBlockIdx(*ppBuf + n, taosArrayGet(aBlockIdx, iBlockIdx)); + n += tPutBlockIdx(pWriter->aBuf[0] + n, taosArrayGet(aBlockIdx, iBlockIdx)); } - taosCalcChecksumAppend(0, *ppBuf, size); + taosCalcChecksumAppend(0, pWriter->aBuf[0], size); ASSERT(n + sizeof(TSCKSUM) == size); // write - n = taosWriteFile(pWriter->pHeadFD, *ppBuf, size); + n = taosWriteFile(pWriter->pHeadFD, pWriter->aBuf[0], size); if (n < 0) { code = TAOS_SYSTEM_ERROR(errno); goto _err; @@ -1534,44 +1246,39 @@ int32_t tsdbWriteBlockIdx(SDataFWriter *pWriter, SArray *aBlockIdx, uint8_t **pp pHeadFile->offset = pHeadFile->size; pHeadFile->size += size; - tFree(pBuf); +_exit: + tsdbTrace("vgId:%d write block idx, offset:%" PRId64 " size:%" PRId64 " nBlockIdx:%d", TD_VID(pWriter->pTsdb->pVnode), + pHeadFile->offset, size, taosArrayGetSize(aBlockIdx)); return code; _err: tsdbError("vgId:%d, write block idx failed since %s", TD_VID(pWriter->pTsdb->pVnode), tstrerror(code)); - tFree(pBuf); return code; } -int32_t tsdbWriteBlock(SDataFWriter *pWriter, SMapData *mBlock, uint8_t **ppBuf, SBlockIdx *pBlockIdx) { - int32_t code = 0; - SHeadFile *pHeadFile = &pWriter->fHead; - SBlockDataHdr hdr = {.delimiter = TSDB_FILE_DLMT, .suid = pBlockIdx->suid, .uid = pBlockIdx->uid}; - uint8_t *pBuf = NULL; - int64_t size; - int64_t n; +int32_t tsdbWriteBlock(SDataFWriter *pWriter, SMapData *mBlock, SBlockIdx *pBlockIdx) { + int32_t code = 0; + SHeadFile *pHeadFile = &pWriter->fHead; + int64_t size; + int64_t n; ASSERT(mBlock->nItem > 0); - // prepare - size = sizeof(SBlockDataHdr) + tPutMapData(NULL, mBlock) + sizeof(TSCKSUM); - // alloc - if (!ppBuf) ppBuf = &pBuf; - code = tRealloc(ppBuf, size); + size = sizeof(uint32_t) + tPutMapData(NULL, mBlock) + sizeof(TSCKSUM); + code = tRealloc(&pWriter->aBuf[0], size); if (code) goto _err; // build n = 0; - *(SBlockDataHdr *)(*ppBuf) = hdr; - n += sizeof(hdr); - n += tPutMapData(*ppBuf + n, mBlock); - taosCalcChecksumAppend(0, *ppBuf, size); + n += tPutU32(pWriter->aBuf[0] + n, TSDB_FILE_DLMT); + n += tPutMapData(pWriter->aBuf[0] + n, mBlock); + taosCalcChecksumAppend(0, pWriter->aBuf[0], size); ASSERT(n + sizeof(TSCKSUM) == size); // write - n = taosWriteFile(pWriter->pHeadFD, *ppBuf, size); + n = taosWriteFile(pWriter->pHeadFD, pWriter->aBuf[0], size); if (n < 0) { code = TAOS_SYSTEM_ERROR(errno); goto _err; @@ -1582,17 +1289,71 @@ int32_t tsdbWriteBlock(SDataFWriter *pWriter, SMapData *mBlock, uint8_t **ppBuf, pBlockIdx->size = size; pHeadFile->size += size; - tFree(pBuf); - tsdbTrace("vgId:%d, write block, offset:%" PRId64 " size:%" PRId64, TD_VID(pWriter->pTsdb->pVnode), pBlockIdx->offset, - pBlockIdx->size); + tsdbTrace("vgId:%d, write block, file ID:%d commit ID:%d suid:%" PRId64 " uid:%" PRId64 " offset:%" PRId64 + " size:%" PRId64 " nItem:%d", + TD_VID(pWriter->pTsdb->pVnode), pWriter->wSet.fid, pHeadFile->commitID, pBlockIdx->suid, pBlockIdx->uid, + pBlockIdx->offset, pBlockIdx->size, mBlock->nItem); return code; _err: - tFree(pBuf); tsdbError("vgId:%d, write block failed since %s", TD_VID(pWriter->pTsdb->pVnode), tstrerror(code)); return code; } +int32_t tsdbWriteBlockL(SDataFWriter *pWriter, SArray *aBlockL) { + int32_t code = 0; + SLastFile *pLastFile = &pWriter->fLast; + int64_t size; + int64_t n; + + // check + if (taosArrayGetSize(aBlockL) == 0) { + pLastFile->offset = pLastFile->size; + goto _exit; + } + + // size + size = sizeof(uint32_t); // TSDB_FILE_DLMT + for (int32_t iBlockL = 0; iBlockL < taosArrayGetSize(aBlockL); iBlockL++) { + size += tPutBlockL(NULL, taosArrayGet(aBlockL, iBlockL)); + } + size += sizeof(TSCKSUM); + + // alloc + code = tRealloc(&pWriter->aBuf[0], size); + if (code) goto _err; + + // encode + n = 0; + n += tPutU32(pWriter->aBuf[0] + n, TSDB_FILE_DLMT); + for (int32_t iBlockL = 0; iBlockL < taosArrayGetSize(aBlockL); iBlockL++) { + n += tPutBlockL(pWriter->aBuf[0] + n, taosArrayGet(aBlockL, iBlockL)); + } + taosCalcChecksumAppend(0, pWriter->aBuf[0], size); + + ASSERT(n + sizeof(TSCKSUM) == size); + + // write + n = taosWriteFile(pWriter->pLastFD, pWriter->aBuf[0], size); + if (n < 0) { + code = TAOS_SYSTEM_ERROR(errno); + goto _err; + } + + // update + pLastFile->offset = pLastFile->size; + pLastFile->size += size; + +_exit: + tsdbTrace("vgId:%d tsdb write blockl, loffset:%" PRId64 " size:%" PRId64, TD_VID(pWriter->pTsdb->pVnode), + pLastFile->offset, size); + return code; + +_err: + tsdbError("vgId:%d tsdb write blockl failed since %s", TD_VID(pWriter->pTsdb->pVnode), tstrerror(code)); + return code; +} + static void tsdbUpdateBlockInfo(SBlockData *pBlockData, SBlock *pBlock) { for (int32_t iRow = 0; iRow < pBlockData->nRow; iRow++) { TSDBKEY key = {.ts = pBlockData->aTSKEY[iRow], .version = pBlockData->aVersion[iRow]}; @@ -1611,357 +1372,127 @@ static void tsdbUpdateBlockInfo(SBlockData *pBlockData, SBlock *pBlock) { pBlock->maxKey = key; } - pBlock->minVersion = TMIN(pBlock->minVersion, key.version); - pBlock->maxVersion = TMAX(pBlock->maxVersion, key.version); + pBlock->minVer = TMIN(pBlock->minVer, key.version); + pBlock->maxVer = TMAX(pBlock->maxVer, key.version); } pBlock->nRow += pBlockData->nRow; } -static int32_t tsdbWriteBlockDataKey(SSubBlock *pSubBlock, SBlockData *pBlockData, uint8_t **ppBuf1, int64_t *nDataP, - uint8_t **ppBuf2) { +static int32_t tsdbWriteBlockSma(SDataFWriter *pWriter, SBlockData *pBlockData, SSmaInfo *pSmaInfo) { int32_t code = 0; - int64_t size; - int64_t tsize; - if (pSubBlock->cmprAlg == NO_COMPRESSION) { - pSubBlock->szVersion = sizeof(int64_t) * pSubBlock->nRow; - pSubBlock->szTSKEY = sizeof(TSKEY) * pSubBlock->nRow; + pSmaInfo->offset = 0; + pSmaInfo->size = 0; - code = tRealloc(ppBuf1, *nDataP + pSubBlock->szVersion + pSubBlock->szTSKEY + sizeof(TSCKSUM)); - if (code) goto _err; - - // VERSION - memcpy(*ppBuf1 + *nDataP, pBlockData->aVersion, pSubBlock->szVersion); - - // TSKEY - memcpy(*ppBuf1 + *nDataP + pSubBlock->szVersion, pBlockData->aTSKEY, pSubBlock->szTSKEY); - } else { - size = (sizeof(int64_t) + sizeof(TSKEY)) * pSubBlock->nRow + COMP_OVERFLOW_BYTES * 2; - - code = tRealloc(ppBuf1, *nDataP + size + sizeof(TSCKSUM)); - if (code) goto _err; - - tsize = sizeof(int64_t) * pSubBlock->nRow + COMP_OVERFLOW_BYTES; - if (pSubBlock->cmprAlg == TWO_STAGE_COMP) { - code = tRealloc(ppBuf2, tsize); - if (code) goto _err; - } - - // VERSION - pSubBlock->szVersion = - tsCompressBigint((char *)pBlockData->aVersion, sizeof(int64_t) * pBlockData->nRow, pBlockData->nRow, - *ppBuf1 + *nDataP, size, pSubBlock->cmprAlg, *ppBuf2, tsize); - if (pSubBlock->szVersion <= 0) { - code = TSDB_CODE_COMPRESS_ERROR; - goto _err; - } - - // TSKEY - pSubBlock->szTSKEY = tsCompressTimestamp((char *)pBlockData->aTSKEY, sizeof(TSKEY) * pBlockData->nRow, - pBlockData->nRow, *ppBuf1 + *nDataP + pSubBlock->szVersion, - size - pSubBlock->szVersion, pSubBlock->cmprAlg, *ppBuf2, tsize); - if (pSubBlock->szTSKEY <= 0) { - code = TSDB_CODE_COMPRESS_ERROR; - goto _err; - } - - ASSERT(pSubBlock->szVersion + pSubBlock->szTSKEY <= size); - } - - // checksum - size = pSubBlock->szVersion + pSubBlock->szTSKEY + sizeof(TSCKSUM); - taosCalcChecksumAppend(0, *ppBuf1 + *nDataP, size); - - *nDataP += size; - return code; - -_err: - return code; -} - -static int32_t tsdbWriteColData(SColData *pColData, SBlockCol *pBlockCol, SSubBlock *pSubBlock, uint8_t **ppBuf1, - int64_t *nDataP, uint8_t **ppBuf2) { - int32_t code = 0; - int64_t size; - int64_t n = 0; - - // BITMAP - if (pColData->flag != HAS_VALUE) { - size = BIT2_SIZE(pColData->nVal) + COMP_OVERFLOW_BYTES; - - code = tRealloc(ppBuf1, *nDataP + n + size); - if (code) goto _err; - - code = tRealloc(ppBuf2, size); - if (code) goto _err; - - pBlockCol->szBitmap = - tsCompressTinyint((char *)pColData->pBitMap, BIT2_SIZE(pColData->nVal), BIT2_SIZE(pColData->nVal), - *ppBuf1 + *nDataP + n, size, TWO_STAGE_COMP, *ppBuf2, size); - if (pBlockCol->szBitmap <= 0) { - code = TSDB_CODE_COMPRESS_ERROR; - goto _err; - } - } else { - pBlockCol->szBitmap = 0; - } - n += pBlockCol->szBitmap; - - // OFFSET - if (IS_VAR_DATA_TYPE(pColData->type)) { - size = sizeof(int32_t) * pColData->nVal + COMP_OVERFLOW_BYTES; - - code = tRealloc(ppBuf1, *nDataP + n + size); - if (code) goto _err; - - code = tRealloc(ppBuf2, size); - if (code) goto _err; - - pBlockCol->szOffset = tsCompressInt((char *)pColData->aOffset, sizeof(int32_t) * pColData->nVal, pColData->nVal, - *ppBuf1 + *nDataP + n, size, TWO_STAGE_COMP, *ppBuf2, size); - if (pBlockCol->szOffset <= 0) { - code = TSDB_CODE_COMPRESS_ERROR; - goto _err; - } - } else { - pBlockCol->szOffset = 0; - } - n += pBlockCol->szOffset; - - // VALUE - if (pSubBlock->cmprAlg == NO_COMPRESSION) { - pBlockCol->szValue = pColData->nData; - - code = tRealloc(ppBuf1, *nDataP + n + pBlockCol->szValue + sizeof(TSCKSUM)); - if (code) goto _err; - - memcpy(*ppBuf1 + *nDataP + n, pColData->pData, pBlockCol->szValue); - } else { - size = pColData->nData + COMP_OVERFLOW_BYTES; - - code = tRealloc(ppBuf1, *nDataP + n + size + sizeof(TSCKSUM)); - if (code) goto _err; - - if (pSubBlock->cmprAlg == TWO_STAGE_COMP) { - code = tRealloc(ppBuf2, size); - if (code) goto _err; - } - - pBlockCol->szValue = - tDataTypes[pColData->type].compFunc((char *)pColData->pData, pColData->nData, pColData->nVal, - *ppBuf1 + *nDataP + n, size, pSubBlock->cmprAlg, *ppBuf2, size); - if (pBlockCol->szValue <= 0) { - code = TSDB_CODE_COMPRESS_ERROR; - goto _err; - } - } - n += pBlockCol->szValue; - pBlockCol->szOrigin = pColData->nData; - - // checksum - n += sizeof(TSCKSUM); - taosCalcChecksumAppend(0, *ppBuf1 + *nDataP, n); - - *nDataP += n; - - return code; - -_err: - return code; -} - -static int32_t tsdbWriteBlockDataImpl(TdFilePtr pFD, SSubBlock *pSubBlock, SBlockDataHdr hdr, SArray *aBlockCol, - uint8_t *pData, int64_t nData, uint8_t **ppBuf) { - int32_t code = 0; - int32_t nBlockCol = taosArrayGetSize(aBlockCol); - int64_t size; - int64_t n; - - // HDR + SArray - pSubBlock->szBlockCol = sizeof(hdr); - for (int32_t iBlockCol = 0; iBlockCol < nBlockCol; iBlockCol++) { - pSubBlock->szBlockCol += tPutBlockCol(NULL, taosArrayGet(aBlockCol, iBlockCol)); - } - - code = tRealloc(ppBuf, pSubBlock->szBlockCol + sizeof(TSCKSUM)); - if (code) goto _err; - - n = 0; - memcpy(*ppBuf, &hdr, sizeof(hdr)); - n += sizeof(hdr); - for (int32_t iBlockCol = 0; iBlockCol < nBlockCol; iBlockCol++) { - n += tPutBlockCol(*ppBuf + n, taosArrayGet(aBlockCol, iBlockCol)); - } - taosCalcChecksumAppend(0, *ppBuf, pSubBlock->szBlockCol + sizeof(TSCKSUM)); - - ASSERT(n == pSubBlock->szBlockCol); - - n = taosWriteFile(pFD, *ppBuf, pSubBlock->szBlockCol + sizeof(TSCKSUM)); - if (n < 0) { - code = TAOS_SYSTEM_ERROR(errno); - goto _err; - } - - // SBlockData - n = taosWriteFile(pFD, pData, nData); - if (n < 0) { - code = TAOS_SYSTEM_ERROR(errno); - goto _err; - } - - return code; - -_err: - return code; -} - -static int32_t tsdbWriteBlockSma(TdFilePtr pFD, SBlockData *pBlockData, SSubBlock *pSubBlock, uint8_t **ppBuf) { - int32_t code = 0; - int64_t n; - SColData *pColData; - - // prepare - pSubBlock->nSma = 0; + // encode for (int32_t iColData = 0; iColData < taosArrayGetSize(pBlockData->aIdx); iColData++) { - pColData = tBlockDataGetColDataByIdx(pBlockData, iColData); + SColData *pColData = tBlockDataGetColDataByIdx(pBlockData, iColData); - if (IS_VAR_DATA_TYPE(pColData->type) || (!pColData->smaOn)) continue; + if ((!pColData->smaOn) || IS_VAR_DATA_TYPE(pColData->type)) continue; - pSubBlock->nSma++; - } - if (pSubBlock->nSma == 0) goto _exit; + SColumnDataAgg sma; + tsdbCalcColDataSMA(pColData, &sma); - // calc - code = tRealloc(ppBuf, sizeof(SColumnDataAgg) * pSubBlock->nSma + sizeof(TSCKSUM)); - if (code) goto _err; - n = 0; - for (int32_t iColData = 0; iColData < taosArrayGetSize(pBlockData->aIdx); iColData++) { - pColData = tBlockDataGetColDataByIdx(pBlockData, iColData); - - if (IS_VAR_DATA_TYPE(pColData->type) || (!pColData->smaOn)) continue; - - tsdbCalcColDataSMA(pColData, &((SColumnDataAgg *)(*ppBuf))[n]); - n++; - } - taosCalcChecksumAppend(0, *ppBuf, sizeof(SColumnDataAgg) * pSubBlock->nSma + sizeof(TSCKSUM)); - - // write - n = taosWriteFile(pFD, *ppBuf, sizeof(SColumnDataAgg) * pSubBlock->nSma + sizeof(TSCKSUM)); - if (n < 0) { - code = TAOS_SYSTEM_ERROR(errno); - goto _err; - } - -_exit: - return code; - -_err: - return code; -} - -int32_t tsdbWriteBlockData(SDataFWriter *pWriter, SBlockData *pBlockData, uint8_t **ppBuf1, uint8_t **ppBuf2, - SBlockIdx *pBlockIdx, SBlock *pBlock, int8_t cmprAlg) { - int32_t code = 0; - SSubBlock *pSubBlock = &pBlock->aSubBlock[pBlock->nSubBlock++]; - SBlockCol blockCol; - SBlockCol *pBlockCol = &blockCol; - int64_t n; - TdFilePtr pFileFD = pBlock->last ? pWriter->pLastFD : pWriter->pDataFD; - SBlockDataHdr hdr = {.delimiter = TSDB_FILE_DLMT, .suid = pBlockIdx->suid, .uid = pBlockIdx->uid}; - uint8_t *p; - int64_t nData; - uint8_t *pBuf1 = NULL; - uint8_t *pBuf2 = NULL; - SArray *aBlockCol = NULL; - - if (!ppBuf1) ppBuf1 = &pBuf1; - if (!ppBuf2) ppBuf2 = &pBuf2; - - tsdbUpdateBlockInfo(pBlockData, pBlock); - - pSubBlock->nRow = pBlockData->nRow; - pSubBlock->cmprAlg = cmprAlg; - if (pBlock->last) { - pSubBlock->offset = pWriter->fLast.size; - } else { - pSubBlock->offset = pWriter->fData.size; - } - - // ======================= BLOCK DATA ======================= - // TSDBKEY - nData = 0; - code = tsdbWriteBlockDataKey(pSubBlock, pBlockData, ppBuf1, &nData, ppBuf2); - if (code) goto _err; - - // COLUMNS - aBlockCol = taosArrayInit(taosArrayGetSize(pBlockData->aIdx), sizeof(SBlockCol)); - if (aBlockCol == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - goto _err; - } - int32_t offset = 0; - for (int32_t iCol = 0; iCol < taosArrayGetSize(pBlockData->aIdx); iCol++) { - SColData *pColData = tBlockDataGetColDataByIdx(pBlockData, iCol); - - ASSERT(pColData->flag); - - if (pColData->flag == HAS_NONE) continue; - - pBlockCol->cid = pColData->cid; - pBlockCol->type = pColData->type; - pBlockCol->smaOn = pColData->smaOn; - pBlockCol->flag = pColData->flag; - - if (pColData->flag != HAS_NULL) { - code = tsdbWriteColData(pColData, pBlockCol, pSubBlock, ppBuf1, &nData, ppBuf2); - if (code) goto _err; - - pBlockCol->offset = offset; - offset = offset + pBlockCol->szBitmap + pBlockCol->szOffset + pBlockCol->szValue + sizeof(TSCKSUM); - } - - if (taosArrayPush(aBlockCol, pBlockCol) == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - goto _err; - } + code = tRealloc(&pWriter->aBuf[0], pSmaInfo->size + tPutColumnDataAgg(NULL, &sma)); + if (code) goto _err; + pSmaInfo->size += tPutColumnDataAgg(pWriter->aBuf[0] + pSmaInfo->size, &sma); } // write - code = tsdbWriteBlockDataImpl(pFileFD, pSubBlock, hdr, aBlockCol, *ppBuf1, nData, ppBuf2); - if (code) goto _err; + if (pSmaInfo->size) { + int32_t size = pSmaInfo->size + sizeof(TSCKSUM); - pSubBlock->szBlock = pSubBlock->szBlockCol + sizeof(TSCKSUM) + nData; - if (pBlock->last) { - pWriter->fLast.size += pSubBlock->szBlock; - } else { - pWriter->fData.size += pSubBlock->szBlock; + code = tRealloc(&pWriter->aBuf[0], size); + if (code) goto _err; + + taosCalcChecksumAppend(0, pWriter->aBuf[0], size); + + int64_t n = taosWriteFile(pWriter->pSmaFD, pWriter->aBuf[0], size); + if (n < 0) { + code = TAOS_SYSTEM_ERROR(errno); + goto _err; + } + + pSmaInfo->offset = pWriter->fSma.size; + pWriter->fSma.size += size; } - // ======================= BLOCK SMA ======================= - pSubBlock->sOffset = 0; - pSubBlock->nSma = 0; - - if (pBlock->nSubBlock > 1 || pBlock->last || pBlock->hasDup) goto _exit; - - code = tsdbWriteBlockSma(pWriter->pSmaFD, pBlockData, pSubBlock, ppBuf1); - if (code) goto _err; - - if (pSubBlock->nSma > 0) { - pSubBlock->sOffset = pWriter->fSma.size; - pWriter->fSma.size += (sizeof(SColumnDataAgg) * pSubBlock->nSma + sizeof(TSCKSUM)); - } - -_exit: - tFree(pBuf1); - tFree(pBuf2); - taosArrayDestroy(aBlockCol); return code; _err: - tsdbError("vgId:%d, write block data failed since %s", TD_VID(pWriter->pTsdb->pVnode), tstrerror(code)); - tFree(pBuf1); - tFree(pBuf2); - taosArrayDestroy(aBlockCol); + tsdbError("vgId:%d tsdb write block sma failed since %s", TD_VID(pWriter->pTsdb->pVnode), tstrerror(code)); + return code; +} + +int32_t tsdbWriteBlockData(SDataFWriter *pWriter, SBlockData *pBlockData, SBlockInfo *pBlkInfo, SSmaInfo *pSmaInfo, + int8_t cmprAlg, int8_t toLast) { + int32_t code = 0; + + ASSERT(pBlockData->nRow > 0); + + pBlkInfo->offset = toLast ? pWriter->fLast.size : pWriter->fData.size; + pBlkInfo->szBlock = 0; + pBlkInfo->szKey = 0; + + int32_t aBufN[4] = {0}; + code = tCmprBlockData(pBlockData, cmprAlg, NULL, NULL, pWriter->aBuf, aBufN); + if (code) goto _err; + + // write ================= + TdFilePtr pFD = toLast ? pWriter->pLastFD : pWriter->pDataFD; + + pBlkInfo->szKey = aBufN[3] + aBufN[2]; + pBlkInfo->szBlock = aBufN[0] + aBufN[1] + aBufN[2] + aBufN[3]; + + int64_t n = taosWriteFile(pFD, pWriter->aBuf[3], aBufN[3]); + if (n < 0) { + code = TAOS_SYSTEM_ERROR(errno); + goto _err; + } + + n = taosWriteFile(pFD, pWriter->aBuf[2], aBufN[2]); + if (n < 0) { + code = TAOS_SYSTEM_ERROR(errno); + goto _err; + } + + if (aBufN[1]) { + n = taosWriteFile(pFD, pWriter->aBuf[1], aBufN[1]); + if (n < 0) { + code = TAOS_SYSTEM_ERROR(errno); + goto _err; + } + } + + if (aBufN[0]) { + n = taosWriteFile(pFD, pWriter->aBuf[0], aBufN[0]); + if (n < 0) { + code = TAOS_SYSTEM_ERROR(errno); + goto _err; + } + } + + // update info + if (toLast) { + pWriter->fLast.size += pBlkInfo->szBlock; + } else { + pWriter->fData.size += pBlkInfo->szBlock; + } + + // ================= SMA ==================== + if (pSmaInfo) { + code = tsdbWriteBlockSma(pWriter, pBlockData, pSmaInfo); + if (code) goto _err; + } + +_exit: + tsdbTrace("vgId:%d tsdb write block data, suid:%" PRId64 " uid:%" PRId64 " nRow:%d, offset:%" PRId64 " size:%d", + TD_VID(pWriter->pTsdb->pVnode), pBlockData->suid, pBlockData->uid, pBlockData->nRow, pBlkInfo->offset, + pBlkInfo->szBlock); + return code; + +_err: + tsdbError("vgId:%d tsdb write block data failed since %s", TD_VID(pWriter->pTsdb->pVnode), tstrerror(code)); return code; } @@ -2075,4 +1606,4 @@ int32_t tsdbDFileSetCopy(STsdb *pTsdb, SDFileSet *pSetFrom, SDFileSet *pSetTo) { _err: tsdbError("vgId:%d, tsdb DFileSet copy failed since %s", TD_VID(pTsdb->pVnode), tstrerror(code)); return code; -} \ No newline at end of file +} diff --git a/source/dnode/vnode/src/tsdb/tsdbSnapshot.c b/source/dnode/vnode/src/tsdb/tsdbSnapshot.c index c40fb98d62..ab2b2b617a 100644 --- a/source/dnode/vnode/src/tsdb/tsdbSnapshot.c +++ b/source/dnode/vnode/src/tsdb/tsdbSnapshot.c @@ -27,12 +27,16 @@ struct STsdbSnapReader { int32_t fid; SDataFReader* pDataFReader; SArray* aBlockIdx; // SArray - int32_t iBlockIdx; + SArray* aBlockL; // SArray SBlockIdx* pBlockIdx; - SMapData mBlock; // SMapData - int32_t iBlock; - SBlockData oBlockData; - SBlockData nBlockData; + SBlockL* pBlockL; + + int32_t iBlockIdx; + int32_t iBlockL; + SMapData mBlock; // SMapData + int32_t iBlock; + SBlockData oBlockData; + SBlockData nBlockData; // for del file int8_t delDone; SDelFReader* pDelFReader; @@ -47,114 +51,116 @@ static int32_t tsdbSnapReadData(STsdbSnapReader* pReader, uint8_t** ppData) { while (true) { if (pReader->pDataFReader == NULL) { - SDFileSet* pSet = - taosArraySearch(pReader->fs.aDFileSet, &(SDFileSet){.fid = pReader->fid}, tDFileSetCmprFn, TD_GT); - + // next + SDFileSet dFileSet = {.fid = pReader->fid}; + SDFileSet* pSet = taosArraySearch(pReader->fs.aDFileSet, &dFileSet, tDFileSetCmprFn, TD_GT); if (pSet == NULL) goto _exit; - pReader->fid = pSet->fid; - code = tsdbDataFReaderOpen(&pReader->pDataFReader, pReader->pTsdb, pSet); + + // load + code = tsdbDataFReaderOpen(&pReader->pDataFReader, pTsdb, pSet); if (code) goto _err; - // SBlockIdx - code = tsdbReadBlockIdx(pReader->pDataFReader, pReader->aBlockIdx, NULL); + code = tsdbReadBlockIdx(pReader->pDataFReader, pReader->aBlockIdx); if (code) goto _err; + code = tsdbReadBlockL(pReader->pDataFReader, pReader->aBlockL); + if (code) goto _err; + + // init pReader->iBlockIdx = 0; - pReader->pBlockIdx = NULL; + if (pReader->iBlockIdx < taosArrayGetSize(pReader->aBlockIdx)) { + pReader->pBlockIdx = (SBlockIdx*)taosArrayGet(pReader->aBlockIdx, pReader->iBlockIdx); + + code = tsdbReadBlock(pReader->pDataFReader, pReader->pBlockIdx, &pReader->mBlock); + if (code) goto _err; + + pReader->iBlock = 0; + } else { + pReader->pBlockIdx = NULL; + } + + pReader->iBlockL = 0; + while (true) { + if (pReader->iBlockL >= taosArrayGetSize(pReader->aBlockL)) { + pReader->pBlockL = NULL; + break; + } + + pReader->pBlockL = (SBlockL*)taosArrayGet(pReader->aBlockL, pReader->iBlockL); + if (pReader->pBlockL->minVer <= pReader->ever && pReader->pBlockL->maxVer >= pReader->sver) { + // TODO + break; + } + + pReader->iBlockL++; + } tsdbInfo("vgId:%d, vnode snapshot tsdb open data file to read for %s, fid:%d", TD_VID(pTsdb->pVnode), pTsdb->path, pReader->fid); } while (true) { - if (pReader->pBlockIdx == NULL) { - if (pReader->iBlockIdx >= taosArrayGetSize(pReader->aBlockIdx)) { - tsdbDataFReaderClose(&pReader->pDataFReader); - break; + if (pReader->pBlockIdx && pReader->pBlockL) { + TABLEID id = {.suid = pReader->pBlockL->suid, .uid = pReader->pBlockL->minUid}; + + ASSERT(0); + + // if (tTABLEIDCmprFn(pReader->pBlockIdx, &minId) < 0) { + // // TODO + // } else if (tTABLEIDCmprFn(pReader->pBlockIdx, &maxId) < 0) { + // // TODO + // } else { + // // TODO + // } + } else if (pReader->pBlockIdx) { + while (pReader->iBlock < pReader->mBlock.nItem) { + SBlock block; + tMapDataGetItemByIdx(&pReader->mBlock, pReader->iBlock, &block, tGetBlock); + + if (block.minVer <= pReader->ever && block.maxVer >= pReader->sver) { + // load data (todo) + } + + // next + pReader->iBlock++; + if (*ppData) break; } - pReader->pBlockIdx = (SBlockIdx*)taosArrayGet(pReader->aBlockIdx, pReader->iBlockIdx); - pReader->iBlockIdx++; - - code = tsdbReadBlock(pReader->pDataFReader, pReader->pBlockIdx, &pReader->mBlock, NULL); - if (code) goto _err; - - pReader->iBlock = 0; - } - - SBlock block; - SBlock* pBlock = █ - while (true) { if (pReader->iBlock >= pReader->mBlock.nItem) { - pReader->pBlockIdx = NULL; - break; + pReader->iBlockIdx++; + if (pReader->iBlockIdx < taosArrayGetSize(pReader->aBlockIdx)) { + pReader->pBlockIdx = (SBlockIdx*)taosArrayGet(pReader->aBlockIdx, pReader->iBlockIdx); + + code = tsdbReadBlock(pReader->pDataFReader, pReader->pBlockIdx, &pReader->mBlock); + if (code) goto _err; + + pReader->iBlock = 0; + } else { + pReader->pBlockIdx = NULL; + } } - tMapDataGetItemByIdx(&pReader->mBlock, pReader->iBlock, pBlock, tGetBlock); - pReader->iBlock++; + if (*ppData) goto _exit; + } else if (pReader->pBlockL) { + while (pReader->pBlockL) { + if (pReader->pBlockL->minVer <= pReader->ever && pReader->pBlockL->maxVer >= pReader->sver) { + // load data (todo) + } - if (pBlock->minVersion > pReader->ever || pBlock->maxVersion < pReader->sver) continue; + // next + pReader->iBlockL++; + if (pReader->iBlockL < taosArrayGetSize(pReader->aBlockL)) { + pReader->pBlockL = (SBlockL*)taosArrayGetSize(pReader->aBlockL); + } else { + pReader->pBlockL = NULL; + } - code = tsdbReadBlockData(pReader->pDataFReader, pReader->pBlockIdx, pBlock, &pReader->oBlockData, NULL, NULL); - if (code) goto _err; - - // filter - tBlockDataReset(&pReader->nBlockData); - for (int32_t iColData = 0; iColData < taosArrayGetSize(pReader->oBlockData.aIdx); iColData++) { - SColData* pColDataO = tBlockDataGetColDataByIdx(&pReader->oBlockData, iColData); - SColData* pColDataN = NULL; - - code = tBlockDataAddColData(&pReader->nBlockData, taosArrayGetSize(pReader->nBlockData.aIdx), &pColDataN); - if (code) goto _err; - - tColDataInit(pColDataN, pColDataO->cid, pColDataO->type, pColDataO->smaOn); + if (*ppData) goto _exit; } - - for (int32_t iRow = 0; iRow < pReader->oBlockData.nRow; iRow++) { - TSDBROW row = tsdbRowFromBlockData(&pReader->oBlockData, iRow); - int64_t version = TSDBROW_VERSION(&row); - - tsdbTrace("vgId:%d, vnode snapshot tsdb read for %s, %" PRId64 "(%" PRId64 " , %" PRId64 ")", - TD_VID(pReader->pTsdb->pVnode), pReader->pTsdb->path, version, pReader->sver, pReader->ever); - - if (version < pReader->sver || version > pReader->ever) continue; - - code = tBlockDataAppendRow(&pReader->nBlockData, &row, NULL); - if (code) goto _err; - } - - if (pReader->nBlockData.nRow <= 0) { - continue; - } - - // org data - // compress data (todo) - int32_t size = sizeof(TABLEID) + tPutBlockData(NULL, &pReader->nBlockData); - - *ppData = taosMemoryMalloc(sizeof(SSnapDataHdr) + size); - if (*ppData == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - goto _err; - } - - SSnapDataHdr* pHdr = (SSnapDataHdr*)(*ppData); - pHdr->type = pReader->type; - pHdr->size = size; - - TABLEID* pId = (TABLEID*)(&pHdr[1]); - pId->suid = pReader->pBlockIdx->suid; - pId->uid = pReader->pBlockIdx->uid; - - tPutBlockData((uint8_t*)(&pId[1]), &pReader->nBlockData); - - tsdbInfo("vgId:%d, vnode snapshot read data for %s, fid:%d suid:%" PRId64 " uid:%" PRId64 - " iBlock:%d minVersion:%d maxVersion:%d nRow:%d out of %d size:%d", - TD_VID(pTsdb->pVnode), pTsdb->path, pReader->fid, pReader->pBlockIdx->suid, pReader->pBlockIdx->uid, - pReader->iBlock - 1, pBlock->minVersion, pBlock->maxVersion, pReader->nBlockData.nRow, pBlock->nRow, - size); - - goto _exit; + } else { + tsdbDataFReaderClose(&pReader->pDataFReader); + break; } } } @@ -179,11 +185,11 @@ static int32_t tsdbSnapReadDel(STsdbSnapReader* pReader, uint8_t** ppData) { } // open - code = tsdbDelFReaderOpen(&pReader->pDelFReader, pDelFile, pTsdb, NULL); + code = tsdbDelFReaderOpen(&pReader->pDelFReader, pDelFile, pTsdb); if (code) goto _err; // read index - code = tsdbReadDelIdx(pReader->pDelFReader, pReader->aDelIdx, NULL); + code = tsdbReadDelIdx(pReader->pDelFReader, pReader->aDelIdx); if (code) goto _err; pReader->iDelIdx = 0; @@ -199,7 +205,7 @@ static int32_t tsdbSnapReadDel(STsdbSnapReader* pReader, uint8_t** ppData) { pReader->iDelIdx++; - code = tsdbReadDelData(pReader->pDelFReader, pDelIdx, pReader->aDelData, NULL); + code = tsdbReadDelData(pReader->pDelFReader, pDelIdx, pReader->aDelData); if (code) goto _err; int32_t size = 0; @@ -292,10 +298,15 @@ int32_t tsdbSnapReaderOpen(STsdb* pTsdb, int64_t sver, int64_t ever, int8_t type code = TSDB_CODE_OUT_OF_MEMORY; goto _err; } + pReader->aBlockL = taosArrayInit(0, sizeof(SBlockL)); + if (pReader->aBlockL == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _err; + } pReader->mBlock = tMapDataInit(); - code = tBlockDataInit(&pReader->oBlockData); + code = tBlockDataCreate(&pReader->oBlockData); if (code) goto _err; - code = tBlockDataInit(&pReader->nBlockData); + code = tBlockDataCreate(&pReader->nBlockData); if (code) goto _err; pReader->aDelIdx = taosArrayInit(0, sizeof(SDelIdx)); @@ -327,10 +338,11 @@ int32_t tsdbSnapReaderClose(STsdbSnapReader** ppReader) { if (pReader->pDataFReader) { tsdbDataFReaderClose(&pReader->pDataFReader); } + taosArrayDestroy(pReader->aBlockL); taosArrayDestroy(pReader->aBlockIdx); tMapDataClear(&pReader->mBlock); - tBlockDataClear(&pReader->oBlockData, 1); - tBlockDataClear(&pReader->nBlockData, 1); + tBlockDataDestroy(&pReader->oBlockData, 1); + tBlockDataDestroy(&pReader->nBlockData, 1); if (pReader->pDelFReader) { tsdbDelFReaderClose(&pReader->pDelFReader); @@ -405,6 +417,7 @@ struct STsdbSnapWriter { int8_t cmprAlg; int64_t commitID; + uint8_t* aBuf[5]; // for data file SBlockData bData; @@ -418,6 +431,9 @@ struct STsdbSnapWriter { SBlockData* pBlockData; int32_t iRow; SBlockData bDataR; + SArray* aBlockL; // SArray + int32_t iBlockL; + SBlockData lDataR; SDataFWriter* pDataFWriter; SBlockIdx* pBlockIdxW; // NULL when no committing table @@ -427,6 +443,7 @@ struct STsdbSnapWriter { SMapData mBlockW; // SMapData SArray* aBlockIdxW; // SArray + SArray* aBlockLW; // SArray // for del file SDelFReader* pDelFReader; @@ -437,25 +454,6 @@ struct STsdbSnapWriter { SArray* aDelIdxW; }; -static int32_t tsdbSnapWriteAppendData(STsdbSnapWriter* pWriter, uint8_t* pData, uint32_t nData) { - int32_t code = 0; - int32_t iRow = 0; // todo - int32_t nRow = 0; // todo - SBlockData* pBlockData = NULL; // todo - - while (iRow < nRow) { - code = tBlockDataAppendRow(&pWriter->bDataW, &tsdbRowFromBlockData(pBlockData, iRow), NULL); - if (code) goto _err; - } - - return code; - -_err: - tsdbError("vgId:%d, tsdb snapshot write append data for %s failed since %s", TD_VID(pWriter->pTsdb->pVnode), - pWriter->pTsdb->path, tstrerror(code)); - return code; -} - static int32_t tsdbSnapWriteTableDataEnd(STsdbSnapWriter* pWriter) { int32_t code = 0; @@ -467,20 +465,21 @@ static int32_t tsdbSnapWriteTableDataEnd(STsdbSnapWriter* pWriter) { if (pWriter->pBlockData) { ASSERT(pWriter->iRow < pWriter->pBlockData->nRow); while (pWriter->iRow < pWriter->pBlockData->nRow) { - code = tBlockDataAppendRow(&pWriter->bDataW, &tsdbRowFromBlockData(pWriter->pBlockData, pWriter->iRow), NULL); + code = tBlockDataAppendRow(&pWriter->bDataW, &tsdbRowFromBlockData(pWriter->pBlockData, pWriter->iRow), NULL, + 0); // todo if (code) goto _err; if (pWriter->bDataW.nRow >= pWriter->maxRow * 4 / 5) { - pWriter->blockW.last = 0; - code = tsdbWriteBlockData(pWriter->pDataFWriter, &pWriter->bDataW, NULL, NULL, pWriter->pBlockIdxW, - &pWriter->blockW, pWriter->cmprAlg); + // pWriter->blockW.last = 0; + // code = tsdbWriteBlockData(pWriter->pDataFWriter, &pWriter->bDataW, NULL, NULL, pWriter->pBlockIdxW, + // &pWriter->blockW, pWriter->cmprAlg); if (code) goto _err; code = tMapDataPutItem(&pWriter->mBlockW, &pWriter->blockW, tPutBlock); if (code) goto _err; tBlockReset(&pWriter->blockW); - tBlockDataClearData(&pWriter->bDataW); + tBlockDataClear(&pWriter->bDataW); } pWriter->iRow++; @@ -489,16 +488,16 @@ static int32_t tsdbSnapWriteTableDataEnd(STsdbSnapWriter* pWriter) { // write remain data if has if (pWriter->bDataW.nRow > 0) { - pWriter->blockW.last = 0; + // pWriter->blockW.last = 0; if (pWriter->bDataW.nRow < pWriter->minRow) { if (pWriter->iBlock > pWriter->mBlock.nItem) { - pWriter->blockW.last = 1; + // pWriter->blockW.last = 1; } } - code = tsdbWriteBlockData(pWriter->pDataFWriter, &pWriter->bDataW, NULL, NULL, pWriter->pBlockIdxW, - &pWriter->blockW, pWriter->cmprAlg); - if (code) goto _err; + // code = tsdbWriteBlockData(pWriter->pDataFWriter, &pWriter->bDataW, NULL, NULL, pWriter->pBlockIdxW, + // &pWriter->blockW, pWriter->cmprAlg); + // if (code) goto _err; code = tMapDataPutItem(&pWriter->mBlockW, &pWriter->blockW, tPutBlock); if (code) goto _err; @@ -510,16 +509,16 @@ static int32_t tsdbSnapWriteTableDataEnd(STsdbSnapWriter* pWriter) { SBlock block; tMapDataGetItemByIdx(&pWriter->mBlock, pWriter->iBlock, &block, tGetBlock); - if (block.last) { - code = tsdbReadBlockData(pWriter->pDataFReader, pWriter->pBlockIdx, &block, &pWriter->bDataR, NULL, NULL); - if (code) goto _err; + // if (block.last) { + // code = tsdbReadBlockData(pWriter->pDataFReader, pWriter->pBlockIdx, &block, &pWriter->bDataR, NULL, NULL); + // if (code) goto _err; - tBlockReset(&block); - block.last = 1; - code = tsdbWriteBlockData(pWriter->pDataFWriter, &pWriter->bDataR, NULL, NULL, pWriter->pBlockIdxW, &block, - pWriter->cmprAlg); - if (code) goto _err; - } + // tBlockReset(&block); + // block.last = 1; + // code = tsdbWriteBlockData(pWriter->pDataFWriter, &pWriter->bDataR, NULL, NULL, pWriter->pBlockIdxW, &block, + // pWriter->cmprAlg); + // if (code) goto _err; + // } code = tMapDataPutItem(&pWriter->mBlockW, &block, tPutBlock); if (code) goto _err; @@ -528,8 +527,8 @@ static int32_t tsdbSnapWriteTableDataEnd(STsdbSnapWriter* pWriter) { } // SBlock - code = tsdbWriteBlock(pWriter->pDataFWriter, &pWriter->mBlockW, NULL, pWriter->pBlockIdxW); - if (code) goto _err; + // code = tsdbWriteBlock(pWriter->pDataFWriter, &pWriter->mBlockW, NULL, pWriter->pBlockIdxW); + // if (code) goto _err; // SBlockIdx if (taosArrayPush(pWriter->aBlockIdxW, pWriter->pBlockIdxW) == NULL) { @@ -550,7 +549,7 @@ _err: static int32_t tsdbSnapMoveWriteTableData(STsdbSnapWriter* pWriter, SBlockIdx* pBlockIdx) { int32_t code = 0; - code = tsdbReadBlock(pWriter->pDataFReader, pBlockIdx, &pWriter->mBlock, NULL); + code = tsdbReadBlock(pWriter->pDataFReader, pBlockIdx, &pWriter->mBlock); if (code) goto _err; // SBlockData @@ -559,16 +558,17 @@ static int32_t tsdbSnapMoveWriteTableData(STsdbSnapWriter* pWriter, SBlockIdx* p for (int32_t iBlock = 0; iBlock < pWriter->mBlock.nItem; iBlock++) { tMapDataGetItemByIdx(&pWriter->mBlock, iBlock, &block, tGetBlock); - if (block.last) { - code = tsdbReadBlockData(pWriter->pDataFReader, pBlockIdx, &block, &pWriter->bDataR, NULL, NULL); - if (code) goto _err; + // if (block.last) { + // code = tsdbReadBlockData(pWriter->pDataFReader, pBlockIdx, &block, &pWriter->bDataR, NULL, NULL); + // if (code) goto _err; - tBlockReset(&block); - block.last = 1; - code = - tsdbWriteBlockData(pWriter->pDataFWriter, &pWriter->bDataR, NULL, NULL, pBlockIdx, &block, pWriter->cmprAlg); - if (code) goto _err; - } + // tBlockReset(&block); + // block.last = 1; + // code = + // tsdbWriteBlockData(pWriter->pDataFWriter, &pWriter->bDataR, NULL, NULL, pBlockIdx, &block, + // pWriter->cmprAlg); + // if (code) goto _err; + // } code = tMapDataPutItem(&pWriter->mBlockW, &block, tPutBlock); if (code) goto _err; @@ -576,7 +576,7 @@ static int32_t tsdbSnapMoveWriteTableData(STsdbSnapWriter* pWriter, SBlockIdx* p // SBlock SBlockIdx blockIdx = {.suid = pBlockIdx->suid, .uid = pBlockIdx->uid}; - code = tsdbWriteBlock(pWriter->pDataFWriter, &pWriter->mBlockW, NULL, &blockIdx); + code = tsdbWriteBlock(pWriter->pDataFWriter, &pWriter->mBlockW, &blockIdx); if (code) goto _err; // SBlockIdx @@ -601,9 +601,9 @@ static int32_t tsdbSnapWriteTableDataImpl(STsdbSnapWriter* pWriter) { TSDBROW row; TSDBROW* pRow = &row; - // correct schema - code = tBlockDataCorrectSchema(&pWriter->bDataW, pBlockData); - if (code) goto _err; + // // correct schema + // code = tBlockDataCorrectSchema(&pWriter->bDataW, pBlockData); + // if (code) goto _err; // loop to merge *pRow = tsdbRowFromBlockData(pBlockData, iRow); @@ -618,8 +618,8 @@ static int32_t tsdbSnapWriteTableDataImpl(STsdbSnapWriter* pWriter) { ASSERT(c); if (c < 0) { - code = tBlockDataAppendRow(&pWriter->bDataW, pRow, NULL); - if (code) goto _err; + // code = tBlockDataAppendRow(&pWriter->bDataW, pRow, NULL); + // if (code) goto _err; iRow++; if (iRow < pWriter->pBlockData->nRow) { @@ -628,8 +628,8 @@ static int32_t tsdbSnapWriteTableDataImpl(STsdbSnapWriter* pWriter) { pRow = NULL; } } else if (c > 0) { - code = tBlockDataAppendRow(&pWriter->bDataW, &tsdbRowFromBlockData(pWriter->pBlockData, pWriter->iRow), NULL); - if (code) goto _err; + // code = tBlockDataAppendRow(&pWriter->bDataW, &tsdbRowFromBlockData(pWriter->pBlockData, pWriter->iRow), + // NULL); if (code) goto _err; pWriter->iRow++; if (pWriter->iRow >= pWriter->pBlockData->nRow) { @@ -647,16 +647,15 @@ static int32_t tsdbSnapWriteTableDataImpl(STsdbSnapWriter* pWriter) { tMapDataGetItemByIdx(&pWriter->mBlock, pWriter->iBlock, &block, tGetBlock); - if (block.last) { - pWriter->pBlockData = &pWriter->bDataR; + // if (block.last) { + // pWriter->pBlockData = &pWriter->bDataR; - code = tsdbReadBlockData(pWriter->pDataFReader, pWriter->pBlockIdx, &block, pWriter->pBlockData, NULL, NULL); - if (code) goto _err; - pWriter->iRow = 0; + // code = tsdbReadBlockData(pWriter->pDataFReader, pWriter->pBlockIdx, &block, pWriter->pBlockData, NULL, + // NULL); if (code) goto _err; pWriter->iRow = 0; - pWriter->iBlock++; - break; - } + // pWriter->iBlock++; + // break; + // } c = tsdbKeyCmprFn(&block.maxKey, &key); @@ -664,16 +663,16 @@ static int32_t tsdbSnapWriteTableDataImpl(STsdbSnapWriter* pWriter) { if (c < 0) { if (pWriter->bDataW.nRow) { - pWriter->blockW.last = 0; - code = tsdbWriteBlockData(pWriter->pDataFWriter, &pWriter->bDataW, NULL, NULL, pWriter->pBlockIdxW, - &pWriter->blockW, pWriter->cmprAlg); - if (code) goto _err; + // pWriter->blockW.last = 0; + // code = tsdbWriteBlockData(pWriter->pDataFWriter, &pWriter->bDataW, NULL, NULL, pWriter->pBlockIdxW, + // &pWriter->blockW, pWriter->cmprAlg); + // if (code) goto _err; code = tMapDataPutItem(&pWriter->mBlockW, &pWriter->blockW, tPutBlock); if (code) goto _err; tBlockReset(&pWriter->blockW); - tBlockDataClearData(&pWriter->bDataW); + tBlockDataClear(&pWriter->bDataW); } code = tMapDataPutItem(&pWriter->mBlockW, &block, tPutBlock); @@ -687,9 +686,10 @@ static int32_t tsdbSnapWriteTableDataImpl(STsdbSnapWriter* pWriter) { if (c > 0) { pWriter->pBlockData = &pWriter->bDataR; - code = - tsdbReadBlockData(pWriter->pDataFReader, pWriter->pBlockIdx, &block, pWriter->pBlockData, NULL, NULL); - if (code) goto _err; + // code = + // tsdbReadBlockData(pWriter->pDataFReader, pWriter->pBlockIdx, &block, pWriter->pBlockData, NULL, + // NULL); + // if (code) goto _err; pWriter->iRow = 0; pWriter->iBlock++; @@ -700,8 +700,8 @@ static int32_t tsdbSnapWriteTableDataImpl(STsdbSnapWriter* pWriter) { if (pWriter->pBlockData) continue; - code = tBlockDataAppendRow(&pWriter->bDataW, pRow, NULL); - if (code) goto _err; + // code = tBlockDataAppendRow(&pWriter->bDataW, pRow, NULL); + // if (code) goto _err; iRow++; if (iRow < pBlockData->nRow) { @@ -715,15 +715,15 @@ static int32_t tsdbSnapWriteTableDataImpl(STsdbSnapWriter* pWriter) { if (pWriter->bDataW.nRow < pWriter->maxRow * 4 / 5) continue; _write_block: - code = tsdbWriteBlockData(pWriter->pDataFWriter, &pWriter->bDataW, NULL, NULL, pWriter->pBlockIdxW, - &pWriter->blockW, pWriter->cmprAlg); - if (code) goto _err; + // code = tsdbWriteBlockData(pWriter->pDataFWriter, &pWriter->bDataW, NULL, NULL, pWriter->pBlockIdxW, + // &pWriter->blockW, pWriter->cmprAlg); + // if (code) goto _err; code = tMapDataPutItem(&pWriter->mBlockW, &pWriter->blockW, tPutBlock); if (code) goto _err; tBlockReset(&pWriter->blockW); - tBlockDataClearData(&pWriter->bDataW); + tBlockDataClear(&pWriter->bDataW); } return code; @@ -789,7 +789,7 @@ static int32_t tsdbSnapWriteTableData(STsdbSnapWriter* pWriter, TABLEID id) { } if (pWriter->pBlockIdx) { - code = tsdbReadBlock(pWriter->pDataFReader, pWriter->pBlockIdx, &pWriter->mBlock, NULL); + code = tsdbReadBlock(pWriter->pDataFReader, pWriter->pBlockIdx, &pWriter->mBlock); if (code) goto _err; } else { tMapDataReset(&pWriter->mBlock); @@ -831,9 +831,11 @@ static int32_t tsdbSnapWriteDataEnd(STsdbSnapWriter* pWriter) { if (pWriter->pDataFWriter == NULL) goto _exit; + // finish current table code = tsdbSnapWriteTableDataEnd(pWriter); if (code) goto _err; + // move remain table while (pWriter->iBlockIdx < taosArrayGetSize(pWriter->aBlockIdx)) { code = tsdbSnapMoveWriteTableData(pWriter, (SBlockIdx*)taosArrayGet(pWriter->aBlockIdx, pWriter->iBlockIdx)); if (code) goto _err; @@ -841,8 +843,16 @@ static int32_t tsdbSnapWriteDataEnd(STsdbSnapWriter* pWriter) { pWriter->iBlockIdx++; } - code = tsdbWriteBlockIdx(pWriter->pDataFWriter, pWriter->aBlockIdxW, NULL); - if (code) goto _err; + // write remain stuff + if (taosArrayGetSize(pWriter->aBlockLW) > 0) { + code = tsdbWriteBlockL(pWriter->pDataFWriter, pWriter->aBlockIdxW); + if (code) goto _err; + } + + if (taosArrayGetSize(pWriter->aBlockIdx) > 0) { + code = tsdbWriteBlockIdx(pWriter->pDataFWriter, pWriter->aBlockIdxW); + if (code) goto _err; + } code = tsdbFSUpsertFSet(&pWriter->fs, &pWriter->pDataFWriter->wSet); if (code) goto _err; @@ -866,19 +876,22 @@ _err: } static int32_t tsdbSnapWriteData(STsdbSnapWriter* pWriter, uint8_t* pData, uint32_t nData) { - int32_t code = 0; - STsdb* pTsdb = pWriter->pTsdb; - TABLEID id = *(TABLEID*)(pData + sizeof(SSnapDataHdr)); - int64_t n; + int32_t code = 0; + STsdb* pTsdb = pWriter->pTsdb; + SSnapDataHdr* pHdr = (SSnapDataHdr*)pData; + TABLEID id = *(TABLEID*)(pData + sizeof(SSnapDataHdr)); + int64_t n; // decode SBlockData* pBlockData = &pWriter->bData; - n = tGetBlockData(pData + sizeof(SSnapDataHdr) + sizeof(TABLEID), pBlockData); - ASSERT(n + sizeof(SSnapDataHdr) + sizeof(TABLEID) == nData); + code = tDecmprBlockData(pData + sizeof(SSnapDataHdr) + sizeof(TABLEID), pHdr->size - sizeof(TABLEID), pBlockData, + pWriter->aBuf); + if (code) goto _err; // open file - TSDBKEY keyFirst = tBlockDataFirstKey(pBlockData); - TSDBKEY keyLast = tBlockDataLastKey(pBlockData); + TSDBKEY keyFirst = {.version = pBlockData->aVersion[0], .ts = pBlockData->aTSKEY[0]}; + TSDBKEY keyLast = {.version = pBlockData->aVersion[pBlockData->nRow - 1], + .ts = pBlockData->aTSKEY[pBlockData->nRow - 1]}; int32_t fid = tsdbKeyFid(keyFirst.ts, pWriter->minutes, pWriter->precision); ASSERT(fid == tsdbKeyFid(keyLast.ts, pWriter->minutes, pWriter->precision)); @@ -895,11 +908,15 @@ static int32_t tsdbSnapWriteData(STsdbSnapWriter* pWriter, uint8_t* pData, uint3 code = tsdbDataFReaderOpen(&pWriter->pDataFReader, pTsdb, pSet); if (code) goto _err; - code = tsdbReadBlockIdx(pWriter->pDataFReader, pWriter->aBlockIdx, NULL); + code = tsdbReadBlockIdx(pWriter->pDataFReader, pWriter->aBlockIdx); + if (code) goto _err; + + code = tsdbReadBlockL(pWriter->pDataFReader, pWriter->aBlockL); if (code) goto _err; } else { ASSERT(pWriter->pDataFReader == NULL); taosArrayClear(pWriter->aBlockIdx); + taosArrayClear(pWriter->aBlockL); } pWriter->iBlockIdx = 0; pWriter->pBlockIdx = NULL; @@ -907,7 +924,9 @@ static int32_t tsdbSnapWriteData(STsdbSnapWriter* pWriter, uint8_t* pData, uint3 pWriter->iBlock = 0; pWriter->pBlockData = NULL; pWriter->iRow = 0; + pWriter->iBlockL = 0; tBlockDataReset(&pWriter->bDataR); + tBlockDataReset(&pWriter->lDataR); // write SHeadFile fHead; @@ -928,7 +947,7 @@ static int32_t tsdbSnapWriteData(STsdbSnapWriter* pWriter, uint8_t* pData, uint3 wSet.fid = fid; fHead = (SHeadFile){.commitID = pWriter->commitID, .offset = 0, .size = 0}; fData = (SDataFile){.commitID = pWriter->commitID, .size = 0}; - fLast = (SLastFile){.commitID = pWriter->commitID, .size = 0}; + fLast = (SLastFile){.commitID = pWriter->commitID, .size = 0, .offset = 0}; fSma = (SSmaFile){.commitID = pWriter->commitID, .size = 0}; } @@ -936,6 +955,7 @@ static int32_t tsdbSnapWriteData(STsdbSnapWriter* pWriter, uint8_t* pData, uint3 if (code) goto _err; taosArrayClear(pWriter->aBlockIdxW); + taosArrayClear(pWriter->aBlockLW); tMapDataReset(&pWriter->mBlockW); pWriter->pBlockIdxW = NULL; tBlockDataReset(&pWriter->bDataW); @@ -963,10 +983,10 @@ static int32_t tsdbSnapWriteDel(STsdbSnapWriter* pWriter, uint8_t* pData, uint32 // reader if (pDelFile) { - code = tsdbDelFReaderOpen(&pWriter->pDelFReader, pDelFile, pTsdb, NULL); + code = tsdbDelFReaderOpen(&pWriter->pDelFReader, pDelFile, pTsdb); if (code) goto _err; - code = tsdbReadDelIdx(pWriter->pDelFReader, pWriter->aDelIdxR, NULL); + code = tsdbReadDelIdx(pWriter->pDelFReader, pWriter->aDelIdxR); if (code) goto _err; } @@ -980,52 +1000,16 @@ static int32_t tsdbSnapWriteDel(STsdbSnapWriter* pWriter, uint8_t* pData, uint32 TABLEID id = *(TABLEID*)(pData + sizeof(SSnapDataHdr)); while (true) { - SDelIdx* pDelIdx = NULL; - int64_t n = sizeof(SSnapDataHdr) + sizeof(TABLEID); - SDelData delData; - SDelIdx delIdx; - int8_t toBreak = 0; + if (pWriter->iDelIdx >= taosArrayGetSize(pWriter->aDelIdxR)) break; + if (tTABLEIDCmprFn(taosArrayGet(pWriter->aDelIdxR, pWriter->iDelIdx), &id) >= 0) break; - if (pWriter->iDelIdx < taosArrayGetSize(pWriter->aDelIdxR)) { - pDelIdx = (SDelIdx*)taosArrayGet(pWriter->aDelIdxR, pWriter->iDelIdx); - } + SDelIdx* pDelIdx = (SDelIdx*)taosArrayGet(pWriter->aDelIdxR, pWriter->iDelIdx); - if (pDelIdx) { - int32_t c = tTABLEIDCmprFn(&id, pDelIdx); - if (c < 0) { - goto _new_del; - } else { - code = tsdbReadDelData(pWriter->pDelFReader, pDelIdx, pWriter->aDelData, NULL); - if (code) goto _err; + code = tsdbReadDelData(pWriter->pDelFReader, pDelIdx, pWriter->aDelData); + if (code) goto _err; - pWriter->iDelIdx++; - if (c == 0) { - toBreak = 1; - delIdx = (SDelIdx){.suid = id.suid, .uid = id.uid}; - goto _merge_del; - } else { - delIdx = (SDelIdx){.suid = pDelIdx->suid, .uid = pDelIdx->uid}; - goto _write_del; - } - } - } - - _new_del: - toBreak = 1; - delIdx = (SDelIdx){.suid = id.suid, .uid = id.uid}; - taosArrayClear(pWriter->aDelData); - - _merge_del: - while (n < nData) { - n += tGetDelData(pData + n, &delData); - if (taosArrayPush(pWriter->aDelData, &delData) == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - goto _err; - } - } - - _write_del: - code = tsdbWriteDelData(pWriter->pDelFWriter, pWriter->aDelData, NULL, &delIdx); + SDelIdx delIdx = *pDelIdx; + code = tsdbWriteDelData(pWriter->pDelFWriter, pWriter->aDelData, &delIdx); if (code) goto _err; if (taosArrayPush(pWriter->aDelIdxW, &delIdx) == NULL) { @@ -1033,7 +1017,40 @@ static int32_t tsdbSnapWriteDel(STsdbSnapWriter* pWriter, uint8_t* pData, uint32 goto _err; } - if (toBreak) break; + pWriter->iDelIdx++; + } + + if (pWriter->iDelIdx < taosArrayGetSize(pWriter->aDelIdxR) && + tTABLEIDCmprFn(taosArrayGet(pWriter->aDelIdxR, pWriter->iDelIdx), &id) == 0) { + SDelIdx* pDelIdx = (SDelIdx*)taosArrayGet(pWriter->aDelIdxR, pWriter->iDelIdx); + + code = tsdbReadDelData(pWriter->pDelFReader, pDelIdx, pWriter->aDelData); + if (code) goto _err; + + pWriter->iDelIdx++; + } else { + taosArrayClear(pWriter->aDelData); + } + + int64_t n = sizeof(SSnapDataHdr) + sizeof(TABLEID); + while (n < nData) { + SDelData delData; + + n += tGetDelData(pData + n, &delData); + + if (taosArrayPush(pWriter->aDelData, &delData) == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _err; + } + } + + SDelIdx delIdx = {.suid = id.suid, .uid = id.uid}; + code = tsdbWriteDelData(pWriter->pDelFWriter, pWriter->aDelData, &delIdx); + if (code) goto _err; + + if (taosArrayPush(pWriter->aDelIdxW, &delIdx) == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _err; } _exit: @@ -1054,11 +1071,11 @@ static int32_t tsdbSnapWriteDelEnd(STsdbSnapWriter* pWriter) { for (; pWriter->iDelIdx < taosArrayGetSize(pWriter->aDelIdxR); pWriter->iDelIdx++) { SDelIdx* pDelIdx = (SDelIdx*)taosArrayGet(pWriter->aDelIdxR, pWriter->iDelIdx); - code = tsdbReadDelData(pWriter->pDelFReader, pDelIdx, pWriter->aDelData, NULL); + code = tsdbReadDelData(pWriter->pDelFReader, pDelIdx, pWriter->aDelData); if (code) goto _err; - SDelIdx delIdx = (SDelIdx){.suid = pDelIdx->suid, .uid = pDelIdx->uid}; - code = tsdbWriteDelData(pWriter->pDelFWriter, pWriter->aDelData, NULL, &delIdx); + SDelIdx delIdx = *pDelIdx; + code = tsdbWriteDelData(pWriter->pDelFWriter, pWriter->aDelData, &delIdx); if (code) goto _err; if (taosArrayPush(pWriter->aDelIdxR, &delIdx) == NULL) { @@ -1117,7 +1134,7 @@ int32_t tsdbSnapWriterOpen(STsdb* pTsdb, int64_t sver, int64_t ever, STsdbSnapWr pWriter->commitID = pTsdb->pVnode->state.commitID; // for data file - code = tBlockDataInit(&pWriter->bData); + code = tBlockDataCreate(&pWriter->bData); if (code) goto _err; pWriter->aBlockIdx = taosArrayInit(0, sizeof(SBlockIdx)); @@ -1125,17 +1142,29 @@ int32_t tsdbSnapWriterOpen(STsdb* pTsdb, int64_t sver, int64_t ever, STsdbSnapWr code = TSDB_CODE_OUT_OF_MEMORY; goto _err; } - code = tBlockDataInit(&pWriter->bDataR); + code = tBlockDataCreate(&pWriter->bDataR); if (code) goto _err; + pWriter->aBlockL = taosArrayInit(0, sizeof(SBlockL)); + if (pWriter->aBlockL == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _err; + } + pWriter->aBlockIdxW = taosArrayInit(0, sizeof(SBlockIdx)); if (pWriter->aBlockIdxW == NULL) { code = TSDB_CODE_OUT_OF_MEMORY; goto _err; } - code = tBlockDataInit(&pWriter->bDataW); + code = tBlockDataCreate(&pWriter->bDataW); if (code) goto _err; + pWriter->aBlockLW = taosArrayInit(0, sizeof(SBlockL)); + if (pWriter->aBlockLW == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _err; + } + // for del file pWriter->aDelIdxR = taosArrayInit(0, sizeof(SDelIdx)); if (pWriter->aDelIdxR == NULL) { @@ -1186,6 +1215,10 @@ int32_t tsdbSnapWriterClose(STsdbSnapWriter** ppWriter, int8_t rollback) { if (code) goto _err; } + for (int32_t iBuf = 0; iBuf < sizeof(pWriter->aBuf) / sizeof(uint8_t*); iBuf++) { + tFree(pWriter->aBuf[iBuf]); + } + tsdbInfo("vgId:%d, vnode snapshot tsdb writer close for %s", TD_VID(pWriter->pTsdb->pVnode), pWriter->pTsdb->path); taosMemoryFree(pWriter); *ppWriter = NULL; @@ -1224,6 +1257,7 @@ int32_t tsdbSnapWrite(STsdbSnapWriter* pWriter, uint8_t* pData, uint32_t nData) _exit: tsdbDebug("vgId:%d, tsdb snapshot write for %s succeed", TD_VID(pWriter->pTsdb->pVnode), pWriter->pTsdb->path); + return code; _err: diff --git a/source/dnode/vnode/src/tsdb/tsdbUtil.c b/source/dnode/vnode/src/tsdb/tsdbUtil.c index 76af751196..6db9d5e6f4 100644 --- a/source/dnode/vnode/src/tsdb/tsdbUtil.c +++ b/source/dnode/vnode/src/tsdb/tsdbUtil.c @@ -152,25 +152,6 @@ int32_t tTABLEIDCmprFn(const void *p1, const void *p2) { return 0; } -// TSDBKEY ====================================================== -static FORCE_INLINE int32_t tPutTSDBKEY(uint8_t *p, TSDBKEY *pKey) { - int32_t n = 0; - - n += tPutI64v(p ? p + n : p, pKey->version); - n += tPutI64(p ? p + n : p, pKey->ts); - - return n; -} - -static FORCE_INLINE int32_t tGetTSDBKEY(uint8_t *p, TSDBKEY *pKey) { - int32_t n = 0; - - n += tGetI64v(p + n, &pKey->version); - n += tGetI64(p + n, &pKey->ts); - - return n; -} - // SBlockIdx ====================================================== int32_t tPutBlockIdx(uint8_t *p, void *ph) { int32_t n = 0; @@ -215,34 +196,51 @@ int32_t tCmprBlockIdx(void const *lhs, void const *rhs) { return 0; } +int32_t tCmprBlockL(void const *lhs, void const *rhs) { + SBlockIdx *lBlockIdx = (SBlockIdx *)lhs; + SBlockL *rBlockL = (SBlockL *)rhs; + + if (lBlockIdx->suid < rBlockL->suid) { + return -1; + } else if (lBlockIdx->suid > rBlockL->suid) { + return 1; + } + + if (lBlockIdx->uid < rBlockL->minUid) { + return -1; + } else if (lBlockIdx->uid > rBlockL->maxUid) { + return 1; + } + + return 0; +} + // SBlock ====================================================== void tBlockReset(SBlock *pBlock) { - *pBlock = - (SBlock){.minKey = TSDBKEY_MAX, .maxKey = TSDBKEY_MIN, .minVersion = VERSION_MAX, .maxVersion = VERSION_MIN}; + *pBlock = (SBlock){.minKey = TSDBKEY_MAX, .maxKey = TSDBKEY_MIN, .minVer = VERSION_MAX, .maxVer = VERSION_MIN}; } int32_t tPutBlock(uint8_t *p, void *ph) { int32_t n = 0; SBlock *pBlock = (SBlock *)ph; - n += tPutTSDBKEY(p ? p + n : p, &pBlock->minKey); - n += tPutTSDBKEY(p ? p + n : p, &pBlock->maxKey); - n += tPutI64v(p ? p + n : p, pBlock->minVersion); - n += tPutI64v(p ? p + n : p, pBlock->maxVersion); + n += tPutI64v(p ? p + n : p, pBlock->minKey.version); + n += tPutI64v(p ? p + n : p, pBlock->minKey.ts); + n += tPutI64v(p ? p + n : p, pBlock->maxKey.version); + n += tPutI64v(p ? p + n : p, pBlock->maxKey.ts); + n += tPutI64v(p ? p + n : p, pBlock->minVer); + n += tPutI64v(p ? p + n : p, pBlock->maxVer); n += tPutI32v(p ? p + n : p, pBlock->nRow); - n += tPutI8(p ? p + n : p, pBlock->last); n += tPutI8(p ? p + n : p, pBlock->hasDup); n += tPutI8(p ? p + n : p, pBlock->nSubBlock); for (int8_t iSubBlock = 0; iSubBlock < pBlock->nSubBlock; iSubBlock++) { - n += tPutI32v(p ? p + n : p, pBlock->aSubBlock[iSubBlock].nRow); - n += tPutI8(p ? p + n : p, pBlock->aSubBlock[iSubBlock].cmprAlg); n += tPutI64v(p ? p + n : p, pBlock->aSubBlock[iSubBlock].offset); - n += tPutI32v(p ? p + n : p, pBlock->aSubBlock[iSubBlock].szBlockCol); - n += tPutI32v(p ? p + n : p, pBlock->aSubBlock[iSubBlock].szVersion); - n += tPutI32v(p ? p + n : p, pBlock->aSubBlock[iSubBlock].szTSKEY); n += tPutI32v(p ? p + n : p, pBlock->aSubBlock[iSubBlock].szBlock); - n += tPutI64v(p ? p + n : p, pBlock->aSubBlock[iSubBlock].sOffset); - n += tPutI32v(p ? p + n : p, pBlock->aSubBlock[iSubBlock].nSma); + n += tPutI32v(p ? p + n : p, pBlock->aSubBlock[iSubBlock].szKey); + } + if (pBlock->nSubBlock == 1 && !pBlock->hasDup) { + n += tPutI64v(p ? p + n : p, pBlock->smaInfo.offset); + n += tPutI32v(p ? p + n : p, pBlock->smaInfo.size); } return n; @@ -252,24 +250,26 @@ int32_t tGetBlock(uint8_t *p, void *ph) { int32_t n = 0; SBlock *pBlock = (SBlock *)ph; - n += tGetTSDBKEY(p + n, &pBlock->minKey); - n += tGetTSDBKEY(p + n, &pBlock->maxKey); - n += tGetI64v(p + n, &pBlock->minVersion); - n += tGetI64v(p + n, &pBlock->maxVersion); + n += tGetI64v(p + n, &pBlock->minKey.version); + n += tGetI64v(p + n, &pBlock->minKey.ts); + n += tGetI64v(p + n, &pBlock->maxKey.version); + n += tGetI64v(p + n, &pBlock->maxKey.ts); + n += tGetI64v(p + n, &pBlock->minVer); + n += tGetI64v(p + n, &pBlock->maxVer); n += tGetI32v(p + n, &pBlock->nRow); - n += tGetI8(p + n, &pBlock->last); n += tGetI8(p + n, &pBlock->hasDup); n += tGetI8(p + n, &pBlock->nSubBlock); for (int8_t iSubBlock = 0; iSubBlock < pBlock->nSubBlock; iSubBlock++) { - n += tGetI32v(p + n, &pBlock->aSubBlock[iSubBlock].nRow); - n += tGetI8(p + n, &pBlock->aSubBlock[iSubBlock].cmprAlg); n += tGetI64v(p + n, &pBlock->aSubBlock[iSubBlock].offset); - n += tGetI32v(p + n, &pBlock->aSubBlock[iSubBlock].szBlockCol); - n += tGetI32v(p + n, &pBlock->aSubBlock[iSubBlock].szVersion); - n += tGetI32v(p + n, &pBlock->aSubBlock[iSubBlock].szTSKEY); n += tGetI32v(p + n, &pBlock->aSubBlock[iSubBlock].szBlock); - n += tGetI64v(p + n, &pBlock->aSubBlock[iSubBlock].sOffset); - n += tGetI32v(p + n, &pBlock->aSubBlock[iSubBlock].nSma); + n += tGetI32v(p + n, &pBlock->aSubBlock[iSubBlock].szKey); + } + if (pBlock->nSubBlock == 1 && !pBlock->hasDup) { + n += tGetI64v(p + n, &pBlock->smaInfo.offset); + n += tGetI32v(p + n, &pBlock->smaInfo.size); + } else { + pBlock->smaInfo.offset = 0; + pBlock->smaInfo.size = 0; } return n; @@ -290,10 +290,48 @@ int32_t tBlockCmprFn(const void *p1, const void *p2) { bool tBlockHasSma(SBlock *pBlock) { if (pBlock->nSubBlock > 1) return false; - if (pBlock->last) return false; if (pBlock->hasDup) return false; - return pBlock->aSubBlock[0].nSma > 0; + return pBlock->smaInfo.size > 0; +} + +// SBlockL ====================================================== +int32_t tPutBlockL(uint8_t *p, void *ph) { + int32_t n = 0; + SBlockL *pBlockL = (SBlockL *)ph; + + n += tPutI64(p ? p + n : p, pBlockL->suid); + n += tPutI64(p ? p + n : p, pBlockL->minUid); + n += tPutI64(p ? p + n : p, pBlockL->maxUid); + n += tPutI64v(p ? p + n : p, pBlockL->minKey); + n += tPutI64v(p ? p + n : p, pBlockL->maxKey); + n += tPutI64v(p ? p + n : p, pBlockL->minVer); + n += tPutI64v(p ? p + n : p, pBlockL->maxVer); + n += tPutI32v(p ? p + n : p, pBlockL->nRow); + n += tPutI64v(p ? p + n : p, pBlockL->bInfo.offset); + n += tPutI32v(p ? p + n : p, pBlockL->bInfo.szBlock); + n += tPutI32v(p ? p + n : p, pBlockL->bInfo.szKey); + + return n; +} + +int32_t tGetBlockL(uint8_t *p, void *ph) { + int32_t n = 0; + SBlockL *pBlockL = (SBlockL *)ph; + + n += tGetI64(p + n, &pBlockL->suid); + n += tGetI64(p + n, &pBlockL->minUid); + n += tGetI64(p + n, &pBlockL->maxUid); + n += tGetI64v(p + n, &pBlockL->minKey); + n += tGetI64v(p + n, &pBlockL->maxKey); + n += tGetI64v(p + n, &pBlockL->minVer); + n += tGetI64v(p + n, &pBlockL->maxVer); + n += tGetI32v(p + n, &pBlockL->nRow); + n += tGetI64v(p + n, &pBlockL->bInfo.offset); + n += tGetI32v(p + n, &pBlockL->bInfo.szBlock); + n += tGetI32v(p + n, &pBlockL->bInfo.szKey); + + return n; } // SBlockCol ====================================================== @@ -307,15 +345,25 @@ int32_t tPutBlockCol(uint8_t *p, void *ph) { n += tPutI8(p ? p + n : p, pBlockCol->type); n += tPutI8(p ? p + n : p, pBlockCol->smaOn); n += tPutI8(p ? p + n : p, pBlockCol->flag); + n += tPutI32v(p ? p + n : p, pBlockCol->szOrigin); if (pBlockCol->flag != HAS_NULL) { + if (pBlockCol->flag != HAS_VALUE) { + n += tPutI32v(p ? p + n : p, pBlockCol->szBitmap); + } + + if (IS_VAR_DATA_TYPE(pBlockCol->type)) { + n += tPutI32v(p ? p + n : p, pBlockCol->szOffset); + } + + if (pBlockCol->flag != (HAS_NULL | HAS_NONE)) { + n += tPutI32v(p ? p + n : p, pBlockCol->szValue); + } + n += tPutI32v(p ? p + n : p, pBlockCol->offset); - n += tPutI32v(p ? p + n : p, pBlockCol->szBitmap); - n += tPutI32v(p ? p + n : p, pBlockCol->szOffset); - n += tPutI32v(p ? p + n : p, pBlockCol->szValue); - n += tPutI32v(p ? p + n : p, pBlockCol->szOrigin); } +_exit: return n; } @@ -327,15 +375,29 @@ int32_t tGetBlockCol(uint8_t *p, void *ph) { n += tGetI8(p + n, &pBlockCol->type); n += tGetI8(p + n, &pBlockCol->smaOn); n += tGetI8(p + n, &pBlockCol->flag); + n += tGetI32v(p + n, &pBlockCol->szOrigin); ASSERT(pBlockCol->flag && (pBlockCol->flag != HAS_NONE)); + pBlockCol->szBitmap = 0; + pBlockCol->szOffset = 0; + pBlockCol->szValue = 0; + pBlockCol->offset = 0; + if (pBlockCol->flag != HAS_NULL) { + if (pBlockCol->flag != HAS_VALUE) { + n += tGetI32v(p + n, &pBlockCol->szBitmap); + } + + if (IS_VAR_DATA_TYPE(pBlockCol->type)) { + n += tGetI32v(p + n, &pBlockCol->szOffset); + } + + if (pBlockCol->flag != (HAS_NULL | HAS_NONE)) { + n += tGetI32v(p + n, &pBlockCol->szValue); + } + n += tGetI32v(p + n, &pBlockCol->offset); - n += tGetI32v(p + n, &pBlockCol->szBitmap); - n += tGetI32v(p + n, &pBlockCol->szOffset); - n += tGetI32v(p + n, &pBlockCol->szValue); - n += tGetI32v(p + n, &pBlockCol->szOrigin); } return n; @@ -866,6 +928,9 @@ int32_t tColDataAppendValue(SColData *pColData, SColVal *pColVal) { size = BIT2_SIZE(pColData->nVal + 1); code = tRealloc(&pColData->pBitMap, size); if (code) goto _exit; + if ((pColData->nVal & 3) == 0) { + pColData->pBitMap[pColData->nVal >> 2] = 0; + } // put value if (pColVal->isNone) { @@ -910,13 +975,14 @@ int32_t tColDataCopy(SColData *pColDataSrc, SColData *pColDataDest) { int32_t size; ASSERT(pColDataSrc->nVal > 0); + ASSERT(pColDataDest->cid = pColDataSrc->cid); + ASSERT(pColDataDest->type = pColDataSrc->type); - pColDataDest->cid = pColDataSrc->cid; - pColDataDest->type = pColDataSrc->type; pColDataDest->smaOn = pColDataSrc->smaOn; pColDataDest->nVal = pColDataSrc->nVal; pColDataDest->flag = pColDataSrc->flag; + // bitmap if (pColDataSrc->flag != HAS_NONE && pColDataSrc->flag != HAS_NULL && pColDataSrc->flag != HAS_VALUE) { size = BIT2_SIZE(pColDataSrc->nVal); code = tRealloc(&pColDataDest->pBitMap, size); @@ -924,6 +990,7 @@ int32_t tColDataCopy(SColData *pColDataSrc, SColData *pColDataDest) { memcpy(pColDataDest->pBitMap, pColDataSrc->pBitMap, size); } + // offset if (IS_VAR_DATA_TYPE(pColDataDest->type)) { size = sizeof(int32_t) * pColDataSrc->nVal; @@ -933,9 +1000,10 @@ int32_t tColDataCopy(SColData *pColDataSrc, SColData *pColDataDest) { memcpy(pColDataDest->aOffset, pColDataSrc->aOffset, size); } + // value + pColDataDest->nData = pColDataSrc->nData; code = tRealloc(&pColDataDest->pData, pColDataSrc->nData); if (code) goto _exit; - pColDataDest->nData = pColDataSrc->nData; memcpy(pColDataDest->pData, pColDataSrc->pData, pColDataDest->nData); _exit: @@ -1068,10 +1136,13 @@ static FORCE_INLINE int32_t tColDataCmprFn(const void *p1, const void *p2) { } // SBlockData ====================================================== -int32_t tBlockDataInit(SBlockData *pBlockData) { +int32_t tBlockDataCreate(SBlockData *pBlockData) { int32_t code = 0; + pBlockData->suid = 0; + pBlockData->uid = 0; pBlockData->nRow = 0; + pBlockData->aUid = NULL; pBlockData->aVersion = NULL; pBlockData->aTSKEY = NULL; pBlockData->aIdx = taosArrayInit(0, sizeof(int32_t)); @@ -1090,42 +1161,77 @@ _exit: return code; } -void tBlockDataReset(SBlockData *pBlockData) { - pBlockData->nRow = 0; - taosArrayClear(pBlockData->aIdx); -} - -void tBlockDataClear(SBlockData *pBlockData, int8_t deepClear) { +void tBlockDataDestroy(SBlockData *pBlockData, int8_t deepClear) { + tFree((uint8_t *)pBlockData->aUid); tFree((uint8_t *)pBlockData->aVersion); tFree((uint8_t *)pBlockData->aTSKEY); taosArrayDestroy(pBlockData->aIdx); taosArrayDestroyEx(pBlockData->aColData, deepClear ? tColDataClear : NULL); - pBlockData->aColData = NULL; - pBlockData->aIdx = NULL; - pBlockData->aTSKEY = NULL; + pBlockData->aUid = NULL; pBlockData->aVersion = NULL; + pBlockData->aTSKEY = NULL; + pBlockData->aIdx = NULL; + pBlockData->aColData = NULL; } -int32_t tBlockDataSetSchema(SBlockData *pBlockData, STSchema *pTSchema) { - int32_t code = 0; - SColData *pColData; - STColumn *pTColumn; +int32_t tBlockDataInit(SBlockData *pBlockData, int64_t suid, int64_t uid, STSchema *pTSchema) { + int32_t code = 0; - tBlockDataReset(pBlockData); + ASSERT(suid || uid); + + pBlockData->suid = suid; + pBlockData->uid = uid; + pBlockData->nRow = 0; + + taosArrayClear(pBlockData->aIdx); for (int32_t iColumn = 1; iColumn < pTSchema->numOfCols; iColumn++) { - pTColumn = &pTSchema->columns[iColumn]; + STColumn *pTColumn = &pTSchema->columns[iColumn]; + SColData *pColData; code = tBlockDataAddColData(pBlockData, iColumn - 1, &pColData); if (code) goto _exit; - tColDataInit(pColData, pTColumn->colId, pTColumn->type, (pTColumn->flags & COL_SMA_ON) != 0); + tColDataInit(pColData, pTColumn->colId, pTColumn->type, (pTColumn->flags & COL_SMA_ON) ? 1 : 0); } _exit: return code; } -void tBlockDataClearData(SBlockData *pBlockData) { +int32_t tBlockDataInitEx(SBlockData *pBlockData, SBlockData *pBlockDataFrom) { + int32_t code = 0; + + ASSERT(pBlockDataFrom->suid || pBlockDataFrom->uid); + + pBlockData->suid = pBlockDataFrom->suid; + pBlockData->uid = pBlockDataFrom->uid; + pBlockData->nRow = 0; + + taosArrayClear(pBlockData->aIdx); + for (int32_t iColData = 0; iColData < taosArrayGetSize(pBlockDataFrom->aIdx); iColData++) { + SColData *pColDataFrom = tBlockDataGetColDataByIdx(pBlockDataFrom, iColData); + + SColData *pColData; + code = tBlockDataAddColData(pBlockData, iColData, &pColData); + if (code) goto _exit; + + tColDataInit(pColData, pColDataFrom->cid, pColDataFrom->type, pColDataFrom->smaOn); + } + +_exit: + return code; +} + +void tBlockDataReset(SBlockData *pBlockData) { + pBlockData->suid = 0; + pBlockData->uid = 0; + pBlockData->nRow = 0; + taosArrayClear(pBlockData->aIdx); +} + +void tBlockDataClear(SBlockData *pBlockData) { + ASSERT(pBlockData->suid || pBlockData->uid); + pBlockData->nRow = 0; for (int32_t iColData = 0; iColData < taosArrayGetSize(pBlockData->aIdx); iColData++) { SColData *pColData = tBlockDataGetColDataByIdx(pBlockData, iColData); @@ -1159,52 +1265,47 @@ _err: return code; } -int32_t tBlockDataAppendRow(SBlockData *pBlockData, TSDBROW *pRow, STSchema *pTSchema) { +int32_t tBlockDataAppendRow(SBlockData *pBlockData, TSDBROW *pRow, STSchema *pTSchema, int64_t uid) { int32_t code = 0; - // TSDBKEY + ASSERT(pBlockData->suid || pBlockData->uid); + + // uid + if (pBlockData->uid == 0) { + ASSERT(uid); + code = tRealloc((uint8_t **)&pBlockData->aUid, sizeof(int64_t) * (pBlockData->nRow + 1)); + if (code) goto _err; + pBlockData->aUid[pBlockData->nRow] = uid; + } + // version code = tRealloc((uint8_t **)&pBlockData->aVersion, sizeof(int64_t) * (pBlockData->nRow + 1)); if (code) goto _err; + pBlockData->aVersion[pBlockData->nRow] = TSDBROW_VERSION(pRow); + // timestamp code = tRealloc((uint8_t **)&pBlockData->aTSKEY, sizeof(TSKEY) * (pBlockData->nRow + 1)); if (code) goto _err; - pBlockData->aVersion[pBlockData->nRow] = TSDBROW_VERSION(pRow); pBlockData->aTSKEY[pBlockData->nRow] = TSDBROW_TS(pRow); // OTHER - int32_t iColData = 0; - int32_t nColData = taosArrayGetSize(pBlockData->aIdx); - SRowIter iter = {0}; - SRowIter *pIter = &iter; - SColData *pColData; - SColVal *pColVal; + SRowIter rIter = {0}; + SColVal *pColVal; - if (nColData == 0) goto _exit; + tRowIterInit(&rIter, pRow, pTSchema); + pColVal = tRowIterNext(&rIter); + for (int32_t iColData = 0; iColData < taosArrayGetSize(pBlockData->aIdx); iColData++) { + SColData *pColData = tBlockDataGetColDataByIdx(pBlockData, iColData); - tRowIterInit(pIter, pRow, pTSchema); - pColData = tBlockDataGetColDataByIdx(pBlockData, iColData); - pColVal = tRowIterNext(pIter); + while (pColVal && pColVal->cid < pColData->cid) { + pColVal = tRowIterNext(&rIter); + } - while (pColData) { - if (pColVal) { - if (pColData->cid == pColVal->cid) { - code = tColDataAppendValue(pColData, pColVal); - if (code) goto _err; - - pColVal = tRowIterNext(pIter); - pColData = ((++iColData) < nColData) ? tBlockDataGetColDataByIdx(pBlockData, iColData) : NULL; - } else if (pColData->cid < pColVal->cid) { - code = tColDataAppendValue(pColData, &COL_VAL_NONE(pColData->cid, pColData->type)); - if (code) goto _err; - - pColData = ((++iColData) < nColData) ? tBlockDataGetColDataByIdx(pBlockData, iColData) : NULL; - } else { - pColVal = tRowIterNext(pIter); - } - } else { + if (pColVal == NULL || pColVal->cid > pColData->cid) { code = tColDataAppendValue(pColData, &COL_VAL_NONE(pColData->cid, pColData->type)); if (code) goto _err; - - pColData = ((++iColData) < nColData) ? tBlockDataGetColDataByIdx(pBlockData, iColData) : NULL; + } else { + code = tColDataAppendValue(pColData, pColVal); + if (code) goto _err; + pColVal = tRowIterNext(&rIter); } } @@ -1259,128 +1360,111 @@ _exit: int32_t tBlockDataMerge(SBlockData *pBlockData1, SBlockData *pBlockData2, SBlockData *pBlockData) { int32_t code = 0; - // set target - int32_t iColData1 = 0; - int32_t nColData1 = taosArrayGetSize(pBlockData1->aIdx); - int32_t iColData2 = 0; - int32_t nColData2 = taosArrayGetSize(pBlockData2->aIdx); - SColData *pColData1; - SColData *pColData2; - SColData *pColData; + ASSERT(pBlockData->suid == pBlockData1->suid); + ASSERT(pBlockData->uid == pBlockData1->uid); + ASSERT(pBlockData1->nRow > 0); + ASSERT(pBlockData2->nRow > 0); - tBlockDataReset(pBlockData); - while (iColData1 < nColData1 && iColData2 < nColData2) { - pColData1 = tBlockDataGetColDataByIdx(pBlockData1, iColData1); - pColData2 = tBlockDataGetColDataByIdx(pBlockData2, iColData2); + tBlockDataClear(pBlockData); - if (pColData1->cid == pColData2->cid) { - code = tBlockDataAddColData(pBlockData, taosArrayGetSize(pBlockData->aIdx), &pColData); - if (code) goto _exit; - tColDataInit(pColData, pColData2->cid, pColData2->type, pColData2->smaOn); + TSDBROW row1 = tsdbRowFromBlockData(pBlockData1, 0); + TSDBROW row2 = tsdbRowFromBlockData(pBlockData2, 0); + TSDBROW *pRow1 = &row1; + TSDBROW *pRow2 = &row2; - iColData1++; - iColData2++; - } else if (pColData1->cid < pColData2->cid) { - code = tBlockDataAddColData(pBlockData, taosArrayGetSize(pBlockData->aIdx), &pColData); - if (code) goto _exit; - tColDataInit(pColData, pColData1->cid, pColData1->type, pColData1->smaOn); + while (pRow1 && pRow2) { + int32_t c = tsdbRowCmprFn(pRow1, pRow2); - iColData1++; - } else { - code = tBlockDataAddColData(pBlockData, taosArrayGetSize(pBlockData->aIdx), &pColData); - if (code) goto _exit; - tColDataInit(pColData, pColData2->cid, pColData2->type, pColData2->smaOn); - - iColData2++; - } - } - - while (iColData1 < nColData1) { - code = tBlockDataAddColData(pBlockData, taosArrayGetSize(pBlockData->aIdx), &pColData); - if (code) goto _exit; - tColDataInit(pColData, pColData1->cid, pColData1->type, pColData1->smaOn); - - iColData1++; - } - - while (iColData2 < nColData2) { - code = tBlockDataAddColData(pBlockData, taosArrayGetSize(pBlockData->aIdx), &pColData); - if (code) goto _exit; - tColDataInit(pColData, pColData2->cid, pColData2->type, pColData2->smaOn); - - iColData2++; - } - - // loop to merge - int32_t iRow1 = 0; - int32_t nRow1 = pBlockData1->nRow; - int32_t iRow2 = 0; - int32_t nRow2 = pBlockData2->nRow; - TSDBROW row1; - TSDBROW row2; - int32_t c; - - while (iRow1 < nRow1 && iRow2 < nRow2) { - row1 = tsdbRowFromBlockData(pBlockData1, iRow1); - row2 = tsdbRowFromBlockData(pBlockData2, iRow2); - - c = tsdbKeyCmprFn(&TSDBROW_KEY(&row1), &TSDBROW_KEY(&row2)); if (c < 0) { - code = tBlockDataAppendRow(pBlockData, &row1, NULL); + code = tBlockDataAppendRow(pBlockData, pRow1, NULL, + pBlockData1->uid ? pBlockData1->uid : pBlockData1->aUid[pRow1->iRow]); if (code) goto _exit; - iRow1++; + + pRow1->iRow++; + if (pRow1->iRow < pBlockData1->nRow) { + *pRow1 = tsdbRowFromBlockData(pBlockData1, pRow1->iRow); + } else { + pRow1 = NULL; + } } else if (c > 0) { - code = tBlockDataAppendRow(pBlockData, &row2, NULL); + code = tBlockDataAppendRow(pBlockData, pRow2, NULL, + pBlockData2->uid ? pBlockData2->uid : pBlockData2->aUid[pRow2->iRow]); if (code) goto _exit; - iRow2++; + + pRow2->iRow++; + if (pRow2->iRow < pBlockData2->nRow) { + *pRow2 = tsdbRowFromBlockData(pBlockData2, pRow2->iRow); + } else { + pRow2 = NULL; + } } else { ASSERT(0); } } - while (iRow1 < nRow1) { - row1 = tsdbRowFromBlockData(pBlockData1, iRow1); - code = tBlockDataAppendRow(pBlockData, &row1, NULL); + while (pRow1) { + code = tBlockDataAppendRow(pBlockData, pRow1, NULL, + pBlockData1->uid ? pBlockData1->uid : pBlockData1->aUid[pRow1->iRow]); if (code) goto _exit; - iRow1++; + + pRow1->iRow++; + if (pRow1->iRow < pBlockData1->nRow) { + *pRow1 = tsdbRowFromBlockData(pBlockData1, pRow1->iRow); + } else { + pRow1 = NULL; + } } - while (iRow2 < nRow2) { - row2 = tsdbRowFromBlockData(pBlockData2, iRow2); - code = tBlockDataAppendRow(pBlockData, &row2, NULL); + while (pRow2) { + code = tBlockDataAppendRow(pBlockData, pRow2, NULL, + pBlockData2->uid ? pBlockData2->uid : pBlockData2->aUid[pRow2->iRow]); if (code) goto _exit; - iRow2++; + + pRow2->iRow++; + if (pRow2->iRow < pBlockData2->nRow) { + *pRow2 = tsdbRowFromBlockData(pBlockData2, pRow2->iRow); + } else { + pRow2 = NULL; + } } _exit: return code; } -int32_t tBlockDataCopy(SBlockData *pBlockDataSrc, SBlockData *pBlockDataDest) { - int32_t code = 0; - SColData *pColDataSrc; - SColData *pColDataDest; +int32_t tBlockDataCopy(SBlockData *pSrc, SBlockData *pDest) { + int32_t code = 0; - ASSERT(pBlockDataSrc->nRow > 0); + tBlockDataClear(pDest); - tBlockDataReset(pBlockDataDest); + ASSERT(pDest->suid == pSrc->suid); + ASSERT(pDest->uid == pSrc->uid); + ASSERT(taosArrayGetSize(pSrc->aIdx) == taosArrayGetSize(pDest->aIdx)); - pBlockDataDest->nRow = pBlockDataSrc->nRow; - // TSDBKEY - code = tRealloc((uint8_t **)&pBlockDataDest->aVersion, sizeof(int64_t) * pBlockDataSrc->nRow); - if (code) goto _exit; - code = tRealloc((uint8_t **)&pBlockDataDest->aTSKEY, sizeof(TSKEY) * pBlockDataSrc->nRow); - if (code) goto _exit; - memcpy(pBlockDataDest->aVersion, pBlockDataSrc->aVersion, sizeof(int64_t) * pBlockDataSrc->nRow); - memcpy(pBlockDataDest->aTSKEY, pBlockDataSrc->aTSKEY, sizeof(TSKEY) * pBlockDataSrc->nRow); + pDest->nRow = pSrc->nRow; - // other - for (size_t iColData = 0; iColData < taosArrayGetSize(pBlockDataSrc->aIdx); iColData++) { - pColDataSrc = tBlockDataGetColDataByIdx(pBlockDataSrc, iColData); - code = tBlockDataAddColData(pBlockDataDest, iColData, &pColDataDest); + if (pSrc->uid == 0) { + code = tRealloc((uint8_t **)&pDest->aUid, sizeof(int64_t) * pDest->nRow); if (code) goto _exit; + memcpy(pDest->aUid, pSrc->aUid, sizeof(int64_t) * pDest->nRow); + } - code = tColDataCopy(pColDataSrc, pColDataDest); + code = tRealloc((uint8_t **)&pDest->aVersion, sizeof(int64_t) * pDest->nRow); + if (code) goto _exit; + memcpy(pDest->aVersion, pSrc->aVersion, sizeof(int64_t) * pDest->nRow); + + code = tRealloc((uint8_t **)&pDest->aTSKEY, sizeof(TSKEY) * pDest->nRow); + if (code) goto _exit; + memcpy(pDest->aTSKEY, pSrc->aTSKEY, sizeof(TSKEY) * pDest->nRow); + + for (int32_t iColData = 0; iColData < taosArrayGetSize(pSrc->aIdx); iColData++) { + SColData *pColSrc = tBlockDataGetColDataByIdx(pSrc, iColData); + SColData *pColDest = tBlockDataGetColDataByIdx(pDest, iColData); + + ASSERT(pColSrc->cid == pColDest->cid); + ASSERT(pColSrc->type == pColDest->type); + + code = tColDataCopy(pColSrc, pColDest); if (code) goto _exit; } @@ -1416,53 +1500,241 @@ void tBlockDataGetColData(SBlockData *pBlockData, int16_t cid, SColData **ppColD *ppColData = NULL; } -int32_t tPutBlockData(uint8_t *p, SBlockData *pBlockData) { - int32_t n = 0; +int32_t tCmprBlockData(SBlockData *pBlockData, int8_t cmprAlg, uint8_t **ppOut, int32_t *szOut, uint8_t *aBuf[], + int32_t aBufN[]) { + int32_t code = 0; - n += tPutI32v(p ? p + n : p, pBlockData->nRow); - if (p) { - memcpy(p + n, pBlockData->aVersion, sizeof(int64_t) * pBlockData->nRow); - } - n = n + sizeof(int64_t) * pBlockData->nRow; - if (p) { - memcpy(p + n, pBlockData->aTSKEY, sizeof(TSKEY) * pBlockData->nRow); - } - n = n + sizeof(TSKEY) * pBlockData->nRow; + SDiskDataHdr hdr = {.delimiter = TSDB_FILE_DLMT, + .fmtVer = 0, + .suid = pBlockData->suid, + .uid = pBlockData->uid, + .nRow = pBlockData->nRow, + .cmprAlg = cmprAlg}; - int32_t nCol = taosArrayGetSize(pBlockData->aIdx); - n += tPutI32v(p ? p + n : p, nCol); - for (int32_t iCol = 0; iCol < nCol; iCol++) { - SColData *pColData = tBlockDataGetColDataByIdx(pBlockData, iCol); - n += tPutColData(p ? p + n : p, pColData); + // encode ================= + // columns AND SBlockCol + aBufN[0] = 0; + for (int32_t iColData = 0; iColData < taosArrayGetSize(pBlockData->aIdx); iColData++) { + SColData *pColData = tBlockDataGetColDataByIdx(pBlockData, iColData); + + ASSERT(pColData->flag); + + if (pColData->flag == HAS_NONE) continue; + + SBlockCol blockCol = {.cid = pColData->cid, + .type = pColData->type, + .smaOn = pColData->smaOn, + .flag = pColData->flag, + .szOrigin = pColData->nData}; + + if (pColData->flag != HAS_NULL) { + code = tsdbCmprColData(pColData, cmprAlg, &blockCol, &aBuf[0], aBufN[0], &aBuf[2]); + if (code) goto _exit; + + blockCol.offset = aBufN[0]; + aBufN[0] = aBufN[0] + blockCol.szBitmap + blockCol.szOffset + blockCol.szValue + sizeof(TSCKSUM); + } + + code = tRealloc(&aBuf[1], hdr.szBlkCol + tPutBlockCol(NULL, &blockCol)); + if (code) goto _exit; + hdr.szBlkCol += tPutBlockCol(aBuf[1] + hdr.szBlkCol, &blockCol); } + aBufN[1] = 0; + if (hdr.szBlkCol > 0) { + aBufN[1] = hdr.szBlkCol + sizeof(TSCKSUM); + + code = tRealloc(&aBuf[1], aBufN[1]); + if (code) goto _exit; + + taosCalcChecksumAppend(0, aBuf[1], aBufN[1]); + } + + // uid + version + tskey + aBufN[2] = 0; + if (pBlockData->uid == 0) { + code = tsdbCmprData((uint8_t *)pBlockData->aUid, sizeof(int64_t) * pBlockData->nRow, TSDB_DATA_TYPE_BIGINT, cmprAlg, + &aBuf[2], aBufN[2], &hdr.szUid, &aBuf[3]); + if (code) goto _exit; + } + aBufN[2] += hdr.szUid; + + code = tsdbCmprData((uint8_t *)pBlockData->aVersion, sizeof(int64_t) * pBlockData->nRow, TSDB_DATA_TYPE_BIGINT, + cmprAlg, &aBuf[2], aBufN[2], &hdr.szVer, &aBuf[3]); + if (code) goto _exit; + aBufN[2] += hdr.szVer; + + code = tsdbCmprData((uint8_t *)pBlockData->aTSKEY, sizeof(TSKEY) * pBlockData->nRow, TSDB_DATA_TYPE_TIMESTAMP, + cmprAlg, &aBuf[2], aBufN[2], &hdr.szKey, &aBuf[3]); + if (code) goto _exit; + aBufN[2] += hdr.szKey; + + aBufN[2] += sizeof(TSCKSUM); + code = tRealloc(&aBuf[2], aBufN[2]); + if (code) goto _exit; + + // hdr + aBufN[3] = tPutDiskDataHdr(NULL, &hdr); + code = tRealloc(&aBuf[3], aBufN[3]); + if (code) goto _exit; + tPutDiskDataHdr(aBuf[3], &hdr); + taosCalcChecksumAppend(taosCalcChecksum(0, aBuf[3], aBufN[3]), aBuf[2], aBufN[2]); + + // aggragate + if (ppOut) { + *szOut = aBufN[0] + aBufN[1] + aBufN[2] + aBufN[3]; + code = tRealloc(ppOut, *szOut); + if (code) goto _exit; + + memcpy(*ppOut, aBuf[3], aBufN[3]); + memcpy(*ppOut + aBufN[3], aBuf[2], aBufN[2]); + if (aBufN[1]) { + memcpy(*ppOut + aBufN[3] + aBufN[2], aBuf[1], aBufN[1]); + } + if (aBufN[0]) { + memcpy(*ppOut + aBufN[3] + aBufN[2] + aBufN[1], aBuf[0], aBufN[0]); + } + } + +_exit: + return code; +} + +int32_t tDecmprBlockData(uint8_t *pIn, int32_t szIn, SBlockData *pBlockData, uint8_t *aBuf[]) { + int32_t code = 0; + + tBlockDataClear(pBlockData); + + int32_t n = 0; + SDiskDataHdr hdr = {0}; + + // SDiskDataHdr + n += tGetDiskDataHdr(pIn + n, &hdr); + if (!taosCheckChecksumWhole(pIn, n + hdr.szUid + hdr.szVer + hdr.szKey + sizeof(TSCKSUM))) { + code = TSDB_CODE_FILE_CORRUPTED; + goto _exit; + } + ASSERT(hdr.delimiter == TSDB_FILE_DLMT); + + pBlockData->suid = hdr.suid; + pBlockData->uid = hdr.uid; + pBlockData->nRow = hdr.nRow; + + // uid + if (hdr.uid == 0) { + ASSERT(hdr.szUid); + code = tsdbDecmprData(pIn + n, hdr.szUid, TSDB_DATA_TYPE_BIGINT, hdr.cmprAlg, (uint8_t **)&pBlockData->aUid, + sizeof(int64_t) * hdr.nRow, &aBuf[0]); + if (code) goto _exit; + } else { + ASSERT(!hdr.szUid); + } + n += hdr.szUid; + + // version + code = tsdbDecmprData(pIn + n, hdr.szVer, TSDB_DATA_TYPE_BIGINT, hdr.cmprAlg, (uint8_t **)&pBlockData->aVersion, + sizeof(int64_t) * hdr.nRow, &aBuf[0]); + if (code) goto _exit; + n += hdr.szVer; + + // TSKEY + code = tsdbDecmprData(pIn + n, hdr.szKey, TSDB_DATA_TYPE_TIMESTAMP, hdr.cmprAlg, (uint8_t **)&pBlockData->aTSKEY, + sizeof(TSKEY) * hdr.nRow, &aBuf[0]); + if (code) goto _exit; + n = n + hdr.szKey + sizeof(TSCKSUM); + + // loop to decode each column data + if (hdr.szBlkCol == 0) goto _exit; + + int32_t nt = 0; + while (nt < hdr.szBlkCol) { + SBlockCol blockCol = {0}; + nt += tGetBlockCol(pIn + n + nt, &blockCol); + ASSERT(nt <= hdr.szBlkCol); + + SColData *pColData; + code = tBlockDataAddColData(pBlockData, taosArrayGetSize(pBlockData->aIdx), &pColData); + if (code) goto _exit; + + tColDataInit(pColData, blockCol.cid, blockCol.type, blockCol.smaOn); + if (blockCol.flag == HAS_NULL) { + for (int32_t iRow = 0; iRow < hdr.nRow; iRow++) { + code = tColDataAppendValue(pColData, &COL_VAL_NULL(blockCol.cid, blockCol.type)); + if (code) goto _exit; + } + } else { + code = tsdbDecmprColData(pIn + n + hdr.szBlkCol + sizeof(TSCKSUM) + blockCol.offset, &blockCol, hdr.cmprAlg, + hdr.nRow, pColData, &aBuf[0]); + if (code) goto _exit; + } + } + +_exit: + return code; +} + +// SDiskDataHdr ============================== +int32_t tPutDiskDataHdr(uint8_t *p, void *ph) { + int32_t n = 0; + SDiskDataHdr *pHdr = (SDiskDataHdr *)ph; + + n += tPutU32(p ? p + n : p, pHdr->delimiter); + n += tPutU32v(p ? p + n : p, pHdr->fmtVer); + n += tPutI64(p ? p + n : p, pHdr->suid); + n += tPutI64(p ? p + n : p, pHdr->uid); + n += tPutI32v(p ? p + n : p, pHdr->szUid); + n += tPutI32v(p ? p + n : p, pHdr->szVer); + n += tPutI32v(p ? p + n : p, pHdr->szKey); + n += tPutI32v(p ? p + n : p, pHdr->szBlkCol); + n += tPutI32v(p ? p + n : p, pHdr->nRow); + n += tPutI8(p ? p + n : p, pHdr->cmprAlg); + return n; } -int32_t tGetBlockData(uint8_t *p, SBlockData *pBlockData) { - int32_t n = 0; +int32_t tGetDiskDataHdr(uint8_t *p, void *ph) { + int32_t n = 0; + SDiskDataHdr *pHdr = (SDiskDataHdr *)ph; - tBlockDataReset(pBlockData); - - n += tGetI32v(p + n, &pBlockData->nRow); - pBlockData->aVersion = (int64_t *)(p + n); - n = n + sizeof(int64_t) * pBlockData->nRow; - pBlockData->aTSKEY = (TSKEY *)(p + n); - n = n + sizeof(TSKEY) * pBlockData->nRow; - - int32_t nCol; - n += tGetI32v(p + n, &nCol); - for (int32_t iCol = 0; iCol < nCol; iCol++) { - SColData *pColData; - - if (tBlockDataAddColData(pBlockData, iCol, &pColData)) return -1; - n += tGetColData(p + n, pColData); - } + n += tGetU32(p + n, &pHdr->delimiter); + n += tGetU32v(p + n, &pHdr->fmtVer); + n += tGetI64(p + n, &pHdr->suid); + n += tGetI64(p + n, &pHdr->uid); + n += tGetI32v(p + n, &pHdr->szUid); + n += tGetI32v(p + n, &pHdr->szVer); + n += tGetI32v(p + n, &pHdr->szKey); + n += tGetI32v(p + n, &pHdr->szBlkCol); + n += tGetI32v(p + n, &pHdr->nRow); + n += tGetI8(p + n, &pHdr->cmprAlg); return n; } // ALGORITHM ============================== +int32_t tPutColumnDataAgg(uint8_t *p, SColumnDataAgg *pColAgg) { + int32_t n = 0; + + n += tPutI16v(p ? p + n : p, pColAgg->colId); + n += tPutI16v(p ? p + n : p, pColAgg->numOfNull); + n += tPutI64(p ? p + n : p, pColAgg->sum); + n += tPutI64(p ? p + n : p, pColAgg->max); + n += tPutI64(p ? p + n : p, pColAgg->min); + + return n; +} + +int32_t tGetColumnDataAgg(uint8_t *p, SColumnDataAgg *pColAgg) { + int32_t n = 0; + + n += tGetI16v(p + n, &pColAgg->colId); + n += tGetI16v(p + n, &pColAgg->numOfNull); + n += tGetI64(p + n, &pColAgg->sum); + n += tGetI64(p + n, &pColAgg->max); + n += tGetI64(p + n, &pColAgg->min); + + return n; +} + void tsdbCalcColDataSMA(SColData *pColData, SColumnDataAgg *pColAgg) { SColVal colVal; SColVal *pColVal = &colVal; @@ -1532,25 +1804,25 @@ void tsdbCalcColDataSMA(SColData *pColData, SColumnDataAgg *pColAgg) { break; } case TSDB_DATA_TYPE_FLOAT: { - *(double*)(&pColAgg->sum) += colVal.value.f; - if (!minAssigned || *(double*)(&pColAgg->min) > colVal.value.f) { - *(double*)(&pColAgg->min) = colVal.value.f; + *(double *)(&pColAgg->sum) += colVal.value.f; + if (!minAssigned || *(double *)(&pColAgg->min) > colVal.value.f) { + *(double *)(&pColAgg->min) = colVal.value.f; minAssigned = true; } - if (!maxAssigned || *(double*)(&pColAgg->max) < colVal.value.f) { - *(double*)(&pColAgg->max) = colVal.value.f; + if (!maxAssigned || *(double *)(&pColAgg->max) < colVal.value.f) { + *(double *)(&pColAgg->max) = colVal.value.f; maxAssigned = true; } break; } case TSDB_DATA_TYPE_DOUBLE: { - *(double*)(&pColAgg->sum) += colVal.value.d; - if (!minAssigned || *(double*)(&pColAgg->min) > colVal.value.d) { - *(double*)(&pColAgg->min) = colVal.value.d; + *(double *)(&pColAgg->sum) += colVal.value.d; + if (!minAssigned || *(double *)(&pColAgg->min) > colVal.value.d) { + *(double *)(&pColAgg->min) = colVal.value.d; minAssigned = true; } - if (!maxAssigned || *(double*)(&pColAgg->max) < colVal.value.d) { - *(double*)(&pColAgg->max) = colVal.value.d; + if (!maxAssigned || *(double *)(&pColAgg->max) < colVal.value.d) { + *(double *)(&pColAgg->max) = colVal.value.d; maxAssigned = true; } break; @@ -1634,3 +1906,268 @@ void tsdbCalcColDataSMA(SColData *pColData, SColumnDataAgg *pColAgg) { } } } + +int32_t tsdbCmprData(uint8_t *pIn, int32_t szIn, int8_t type, int8_t cmprAlg, uint8_t **ppOut, int32_t nOut, + int32_t *szOut, uint8_t **ppBuf) { + int32_t code = 0; + + ASSERT(szIn > 0 && ppOut); + + if (cmprAlg == NO_COMPRESSION) { + code = tRealloc(ppOut, nOut + szIn); + if (code) goto _exit; + + memcpy(*ppOut + nOut, pIn, szIn); + *szOut = szIn; + } else { + int32_t size = szIn + COMP_OVERFLOW_BYTES; + + code = tRealloc(ppOut, nOut + size); + if (code) goto _exit; + + if (cmprAlg == TWO_STAGE_COMP) { + ASSERT(ppBuf); + code = tRealloc(ppBuf, size); + if (code) goto _exit; + } + + *szOut = + tDataTypes[type].compFunc(pIn, szIn, szIn / tDataTypes[type].bytes, *ppOut + nOut, size, cmprAlg, *ppBuf, size); + if (*szOut <= 0) { + code = TSDB_CODE_COMPRESS_ERROR; + goto _exit; + } + } + +_exit: + return code; +} + +int32_t tsdbDecmprData(uint8_t *pIn, int32_t szIn, int8_t type, int8_t cmprAlg, uint8_t **ppOut, int32_t szOut, + uint8_t **ppBuf) { + int32_t code = 0; + + code = tRealloc(ppOut, szOut); + if (code) goto _exit; + + if (cmprAlg == NO_COMPRESSION) { + ASSERT(szIn == szOut); + memcpy(*ppOut, pIn, szOut); + } else { + if (cmprAlg == TWO_STAGE_COMP) { + code = tRealloc(ppBuf, szOut + COMP_OVERFLOW_BYTES); + if (code) goto _exit; + } + + int32_t size = tDataTypes[type].decompFunc(pIn, szIn, szOut / tDataTypes[type].bytes, *ppOut, szOut, cmprAlg, + *ppBuf, szOut + COMP_OVERFLOW_BYTES); + if (size <= 0) { + code = TSDB_CODE_COMPRESS_ERROR; + goto _exit; + } + + ASSERT(size == szOut); + } + +_exit: + return code; +} + +int32_t tsdbCmprColData(SColData *pColData, int8_t cmprAlg, SBlockCol *pBlockCol, uint8_t **ppOut, int32_t nOut, + uint8_t **ppBuf) { + int32_t code = 0; + + ASSERT(pColData->flag && (pColData->flag != HAS_NONE) && (pColData->flag != HAS_NULL)); + + pBlockCol->szBitmap = 0; + pBlockCol->szOffset = 0; + pBlockCol->szValue = 0; + + int32_t size = 0; + // bitmap + if (pColData->flag != HAS_VALUE) { + uint8_t *pBitMap = pColData->pBitMap; + int32_t szBitMap = BIT2_SIZE(pColData->nVal); + + // BIT2 to BIT1 + if (pColData->flag != (HAS_VALUE | HAS_NULL | HAS_NONE)) { + szBitMap = BIT1_SIZE(pColData->nVal); + pBitMap = taosMemoryCalloc(1, szBitMap); + if (pBitMap == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _exit; + } + + for (int32_t iVal = 0; iVal < pColData->nVal; iVal++) { + uint8_t v = GET_BIT2(pColData->pBitMap, iVal); + switch (pColData->flag) { + case (HAS_NULL | HAS_NONE): + SET_BIT1(pBitMap, iVal, v); + break; + case (HAS_VALUE | HAS_NONE): + if (v) { + SET_BIT1(pBitMap, iVal, 1); + } else { + SET_BIT1(pBitMap, iVal, 0); + } + break; + case (HAS_VALUE | HAS_NULL): + SET_BIT1(pBitMap, iVal, v - 1); + break; + default: + ASSERT(0); + } + } + } + + code = tsdbCmprData(pBitMap, szBitMap, TSDB_DATA_TYPE_TINYINT, cmprAlg, ppOut, nOut + size, &pBlockCol->szBitmap, + ppBuf); + if (code) goto _exit; + + if (pColData->flag != (HAS_VALUE | HAS_NULL | HAS_NONE)) { + taosMemoryFree(pBitMap); + } + } + size += pBlockCol->szBitmap; + + // offset + if (IS_VAR_DATA_TYPE(pColData->type)) { + code = tsdbCmprData((uint8_t *)pColData->aOffset, sizeof(int32_t) * pColData->nVal, TSDB_DATA_TYPE_INT, cmprAlg, + ppOut, nOut + size, &pBlockCol->szOffset, ppBuf); + if (code) goto _exit; + } + size += pBlockCol->szOffset; + + // value + if (pColData->flag != (HAS_NULL | HAS_NONE)) { + code = tsdbCmprData((uint8_t *)pColData->pData, pColData->nData, pColData->type, cmprAlg, ppOut, nOut + size, + &pBlockCol->szValue, ppBuf); + if (code) goto _exit; + } + size += pBlockCol->szValue; + + // checksum + size += sizeof(TSCKSUM); + code = tRealloc(ppOut, nOut + size); + if (code) goto _exit; + taosCalcChecksumAppend(0, *ppOut + nOut, size); + +_exit: + return code; +} + +int32_t tsdbDecmprColData(uint8_t *pIn, SBlockCol *pBlockCol, int8_t cmprAlg, int32_t nVal, SColData *pColData, + uint8_t **ppBuf) { + int32_t code = 0; + + int32_t size = pBlockCol->szBitmap + pBlockCol->szOffset + pBlockCol->szValue + sizeof(TSCKSUM); + if (!taosCheckChecksumWhole(pIn, size)) { + code = TSDB_CODE_FILE_CORRUPTED; + goto _exit; + } + + ASSERT(pColData->cid == pBlockCol->cid); + ASSERT(pColData->type == pBlockCol->type); + pColData->smaOn = pBlockCol->smaOn; + pColData->flag = pBlockCol->flag; + pColData->nVal = nVal; + pColData->nData = pBlockCol->szOrigin; + + uint8_t *p = pIn; + // bitmap + if (pBlockCol->szBitmap) { + if (pBlockCol->flag != (HAS_VALUE | HAS_NULL | HAS_NONE)) { + uint8_t *pBitMap = NULL; + code = tsdbDecmprData(p, pBlockCol->szBitmap, TSDB_DATA_TYPE_TINYINT, cmprAlg, &pBitMap, + BIT1_SIZE(pColData->nVal), ppBuf); + if (code) goto _exit; + + code = tRealloc(&pColData->pBitMap, BIT2_SIZE(pColData->nVal)); + if (code) { + tFree(pBitMap); + goto _exit; + } + + // BIT1 to BIT2 + for (int32_t iVal = 0; iVal < nVal; iVal++) { + uint8_t v = GET_BIT1(pBitMap, iVal); + switch (pBlockCol->flag) { + case (HAS_NULL | HAS_NONE): + SET_BIT2(pColData->pBitMap, iVal, v); + break; + case (HAS_VALUE | HAS_NONE): + if (v) { + SET_BIT2(pColData->pBitMap, iVal, 2); + } else { + SET_BIT2(pColData->pBitMap, iVal, 0); + } + break; + case (HAS_VALUE | HAS_NULL): + SET_BIT2(pColData->pBitMap, iVal, v + 1); + break; + default: + ASSERT(0); + } + } + + tFree(pBitMap); + } else { + code = tsdbDecmprData(p, pBlockCol->szBitmap, TSDB_DATA_TYPE_TINYINT, cmprAlg, &pColData->pBitMap, + BIT2_SIZE(pColData->nVal), ppBuf); + if (code) goto _exit; + } + } + p += pBlockCol->szBitmap; + + // offset + if (pBlockCol->szOffset) { + code = tsdbDecmprData(p, pBlockCol->szOffset, TSDB_DATA_TYPE_INT, cmprAlg, (uint8_t **)&pColData->aOffset, + sizeof(int32_t) * pColData->nVal, ppBuf); + if (code) goto _exit; + } + p += pBlockCol->szOffset; + + // value + if (pBlockCol->szValue) { + code = tsdbDecmprData(p, pBlockCol->szValue, pColData->type, cmprAlg, &pColData->pData, pColData->nData, ppBuf); + if (code) goto _exit; + } + p += pBlockCol->szValue; + +_exit: + return code; +} + +int32_t tsdbReadAndCheck(TdFilePtr pFD, int64_t offset, uint8_t **ppOut, int32_t size, int8_t toCheck) { + int32_t code = 0; + + // alloc + code = tRealloc(ppOut, size); + if (code) goto _exit; + + // seek + int64_t n = taosLSeekFile(pFD, offset, SEEK_SET); + if (n < 0) { + code = TAOS_SYSTEM_ERROR(errno); + goto _exit; + } + + // read + n = taosReadFile(pFD, *ppOut, size); + if (n < 0) { + code = TAOS_SYSTEM_ERROR(errno); + goto _exit; + } else if (n < size) { + code = TSDB_CODE_FILE_CORRUPTED; + goto _exit; + } + + // check + if (toCheck && !taosCheckChecksumWhole(*ppOut, size)) { + code = TSDB_CODE_FILE_CORRUPTED; + goto _exit; + } + +_exit: + return code; +} diff --git a/source/libs/executor/src/executorimpl.c b/source/libs/executor/src/executorimpl.c index 9ff5b5d759..9366f014c1 100644 --- a/source/libs/executor/src/executorimpl.c +++ b/source/libs/executor/src/executorimpl.c @@ -76,6 +76,12 @@ static UNUSED_FUNC void* u_realloc(void* p, size_t __size) { #define realloc u_realloc #endif +#define T_LONG_JMP(_obj, _c) \ + do { \ + assert((_c) != -1); \ + longjmp((_obj), (_c)); \ + } while (0); + #define CLEAR_QUERY_STATUS(q, st) ((q)->status &= (~(st))) #define QUERY_IS_INTERVAL_QUERY(_q) ((_q)->interval.interval > 0) diff --git a/source/libs/function/src/builtinsimpl.c b/source/libs/function/src/builtinsimpl.c index bf4a07f8e2..5051dcd65c 100644 --- a/source/libs/function/src/builtinsimpl.c +++ b/source/libs/function/src/builtinsimpl.c @@ -5574,6 +5574,7 @@ int32_t twaFunction(SqlFunctionCtx* pCtx) { if (pCtx->end.key != INT64_MIN) { pInfo->dOutput += twa_get_area(pInfo->p, pCtx->end); pInfo->p = pCtx->end; + numOfElems += 1; } pInfo->win.ekey = pInfo->p.key; diff --git a/source/util/src/trbtree.c b/source/util/src/trbtree.c new file mode 100644 index 0000000000..0970485dad --- /dev/null +++ b/source/util/src/trbtree.c @@ -0,0 +1,210 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include "os.h" + +typedef int32_t (*tRBTreeCmprFn)(void *, void *); + +typedef struct SRBTree SRBTree; +typedef struct SRBTreeNode SRBTreeNode; +typedef struct SRBTreeIter SRBTreeIter; + +struct SRBTreeNode { + enum { RED, BLACK } color; + SRBTreeNode *parent; + SRBTreeNode *left; + SRBTreeNode *right; + uint8_t payload[]; +}; + +struct SRBTree { + tRBTreeCmprFn cmprFn; + SRBTreeNode *root; +}; + +struct SRBTreeIter { + SRBTree *pTree; +}; + +#define RBTREE_NODE_COLOR(N) ((N) ? (N)->color : BLACK) + +// APIs ================================================ +static void tRBTreeRotateLeft(SRBTree *pTree, SRBTreeNode *pNode) { + SRBTreeNode *right = pNode->right; + + pNode->right = right->left; + if (pNode->right) { + pNode->right->parent = pNode; + } + + right->parent = pNode->parent; + if (pNode->parent == NULL) { + pTree->root = right; + } else if (pNode == pNode->parent->left) { + pNode->parent->left = right; + } else { + pNode->parent->right = right; + } + + right->left = pNode; + pNode->parent = right; +} + +static void tRBTreeRotateRight(SRBTree *pTree, SRBTreeNode *pNode) { + SRBTreeNode *left = pNode->left; + + pNode->left = left->right; + if (pNode->left) { + pNode->left->parent = pNode; + } + + left->parent = pNode->parent; + if (pNode->parent == NULL) { + pTree->root = left; + } else if (pNode == pNode->parent->left) { + pNode->parent->left = left; + } else { + pNode->parent->right = left; + } + + left->right = pNode; + pNode->parent = left; +} + +#define tRBTreeCreate(compare) \ + (SRBTree) { .cmprFn = (compare), .root = NULL } + +SRBTreeNode *tRBTreePut(SRBTree *pTree, SRBTreeNode *pNew) { + pNew->left = NULL; + pNew->right = NULL; + pNew->color = RED; + + // insert + if (pTree->root == NULL) { + pNew->parent = NULL; + pTree->root = pNew; + } else { + SRBTreeNode *pNode = pTree->root; + while (true) { + ASSERT(pNode); + + int32_t c = pTree->cmprFn(pNew->payload, pNode->payload); + if (c < 0) { + if (pNode->left) { + pNode = pNode->left; + } else { + pNew->parent = pNode; + pNode->left = pNew; + break; + } + } else if (c > 0) { + if (pNode->right) { + pNode = pNode->right; + } else { + pNew->parent = pNode; + pNode->right = pNew; + break; + } + } else { + return NULL; + } + } + } + + // fix + SRBTreeNode *pNode = pNew; + while (pNode->parent && pNode->parent->color == RED) { + SRBTreeNode *p = pNode->parent; + SRBTreeNode *g = p->parent; + + if (p == g->left) { + SRBTreeNode *u = g->right; + + if (RBTREE_NODE_COLOR(u) == RED) { + p->color = BLACK; + u->color = BLACK; + g->color = RED; + pNode = g; + } else { + if (pNode == p->right) { + pNode = p; + tRBTreeRotateLeft(pTree, pNode); + } + pNode->parent->color = BLACK; + pNode->parent->parent->color = RED; + tRBTreeRotateRight(pTree, pNode->parent->parent); + } + } else { + SRBTreeNode *u = g->left; + + if (RBTREE_NODE_COLOR(u) == RED) { + p->color = BLACK; + u->color = BLACK; + g->color = RED; + } else { + if (pNode == p->left) { + pNode = p; + tRBTreeRotateRight(pTree, pNode); + } + pNode->parent->color = BLACK; + pNode->parent->parent->color = RED; + tRBTreeRotateLeft(pTree, pNode->parent->parent); + } + } + } + + pTree->root->color = BLACK; + return pNew; +} + +SRBTreeNode *tRBTreeDrop(SRBTree *pTree, void *pKey) { + SRBTreeNode *pNode = pTree->root; + + while (pNode) { + int32_t c = pTree->cmprFn(pKey, pNode->payload); + + if (c < 0) { + pNode = pNode->left; + } else if (c > 0) { + pNode = pNode->right; + } else { + break; + } + } + + if (pNode) { + // TODO + } + + return pNode; +} + +SRBTreeNode *tRBTreeGet(SRBTree *pTree, void *pKey) { + SRBTreeNode *pNode = pTree->root; + + while (pNode) { + int32_t c = pTree->cmprFn(pKey, pNode->payload); + + if (c < 0) { + pNode = pNode->left; + } else if (c > 0) { + pNode = pNode->right; + } else { + break; + } + } + + return pNode; +} diff --git a/tests/script/tsim/parser/function.sim b/tests/script/tsim/parser/function.sim index 0219a84c64..0002a5d095 100644 --- a/tests/script/tsim/parser/function.sim +++ b/tests/script/tsim/parser/function.sim @@ -70,6 +70,7 @@ if $data00 != @15-08-18 00:00:00.000@ then return -1 endi if $data01 != 2.068333156 then + print expect 2.068333156, actual: $data01 return -1 endi if $data02 != 2.063999891 then @@ -128,6 +129,7 @@ if $data03 != 2 then return -1 endi if $data11 != 2.077099980 then + print expect 2.077099980, actual: $data11 return -1 endi if $data12 != 2.077000022 then