From 573c28fd067364d1db2b3ff36bad58735d88caf5 Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Wed, 23 Aug 2023 18:37:29 +0800 Subject: [PATCH 01/61] enh: iterate over a list of snap ranges for STsdbSnapReader --- source/dnode/vnode/src/inc/tsdb.h | 3 ++ source/dnode/vnode/src/tsdb/tsdbFS2.c | 34 ++++++++++++++++++ source/dnode/vnode/src/tsdb/tsdbFS2.h | 5 ++- source/dnode/vnode/src/tsdb/tsdbFSet2.c | 21 ++++++++++- source/dnode/vnode/src/tsdb/tsdbFSet2.h | 13 ++++++- source/dnode/vnode/src/tsdb/tsdbFile2.h | 4 ++- source/dnode/vnode/src/tsdb/tsdbSnapshot.c | 41 ++++++++++------------ 7 files changed, 95 insertions(+), 26 deletions(-) diff --git a/source/dnode/vnode/src/inc/tsdb.h b/source/dnode/vnode/src/inc/tsdb.h index e83f47f7b6..83c14a50d0 100644 --- a/source/dnode/vnode/src/inc/tsdb.h +++ b/source/dnode/vnode/src/inc/tsdb.h @@ -672,6 +672,9 @@ struct SDelFWriter { typedef struct STFileSet STFileSet; typedef TARRAY2(STFileSet *) TFileSetArray; +typedef struct STSnapRange STSnapRange; +typedef TARRAY2(STSnapRange *) TSnapRangeArray; // disjoint snap ranges + struct STsdbReadSnap { SMemTable *pMem; SQueryNode *pNode; diff --git a/source/dnode/vnode/src/tsdb/tsdbFS2.c b/source/dnode/vnode/src/tsdb/tsdbFS2.c index afe6ef6e1a..cae6f06636 100644 --- a/source/dnode/vnode/src/tsdb/tsdbFS2.c +++ b/source/dnode/vnode/src/tsdb/tsdbFS2.c @@ -991,6 +991,40 @@ int32_t tsdbFSDestroyRefSnapshot(TFileSetArray **fsetArr) { return 0; } +int32_t tsdbFSCreateRefRangedSnapshot(STFileSystem *fs, TSnapRangeArray **fsrArr) { + int32_t code = 0; + STFileSet *fset; + STSnapRange *fsr1; + + fsrArr[0] = taosMemoryCalloc(1, sizeof(*fsrArr[0])); + if (fsrArr[0] == NULL) return TSDB_CODE_OUT_OF_MEMORY; + + taosThreadRwlockRdlock(&fs->tsdb->rwLock); + TARRAY2_FOREACH(fs->fSetArr, fset) { + code = tsdbTSnapRangeInitRef(fs->tsdb, fset, &fsr1); + if (code) break; + + code = TARRAY2_APPEND(fsrArr[0], fsr1); + if (code) break; + } + taosThreadRwlockUnlock(&fs->tsdb->rwLock); + + if (code) { + TARRAY2_DESTROY(fsrArr[0], tsdbTSnapRangeClear); + fsrArr[0] = NULL; + } + return code; +} + +int32_t tsdbFSDestroyRefRangedSnapshot(TSnapRangeArray **fsrArr) { + if (fsrArr[0]) { + TARRAY2_DESTROY(fsrArr[0], tsdbTSnapRangeClear); + taosMemoryFreeClear(fsrArr[0]); + fsrArr[0] = NULL; + } + return 0; +} + const char *gFSBgTaskName[] = {NULL, "MERGE", "RETENTION", "COMPACT"}; static int32_t tsdbFSRunBgTask(void *arg) { diff --git a/source/dnode/vnode/src/tsdb/tsdbFS2.h b/source/dnode/vnode/src/tsdb/tsdbFS2.h index b0f42a0c48..4b7e0bba2c 100644 --- a/source/dnode/vnode/src/tsdb/tsdbFS2.h +++ b/source/dnode/vnode/src/tsdb/tsdbFS2.h @@ -52,7 +52,10 @@ int32_t tsdbCloseFS(STFileSystem **fs); int32_t tsdbFSCreateCopySnapshot(STFileSystem *fs, TFileSetArray **fsetArr); int32_t tsdbFSDestroyCopySnapshot(TFileSetArray **fsetArr); int32_t tsdbFSCreateRefSnapshot(STFileSystem *fs, TFileSetArray **fsetArr); -int32_t tsdbFSDestroyRefSnapshot(TFileSetArray **fsetArr); +int32_t tsdbFSDestroyRefSnapshot(TFileSetArray **fsrArr); + +int32_t tsdbFSCreateRefRangedSnapshot(STFileSystem *fs, TSnapRangeArray **fsrArr); +int32_t tsdbFSDestroyRefRangedSnapshot(TSnapRangeArray **fsrArr); // txn int64_t tsdbFSAllocEid(STFileSystem *fs); int32_t tsdbFSEditBegin(STFileSystem *fs, const TFileOpArray *opArray, EFEditT etype); diff --git a/source/dnode/vnode/src/tsdb/tsdbFSet2.c b/source/dnode/vnode/src/tsdb/tsdbFSet2.c index cd47a54973..67b6d8f003 100644 --- a/source/dnode/vnode/src/tsdb/tsdbFSet2.c +++ b/source/dnode/vnode/src/tsdb/tsdbFSet2.c @@ -458,6 +458,16 @@ int32_t tsdbTFileSetInitDup(STsdb *pTsdb, const STFileSet *fset1, STFileSet **fs return 0; } +int32_t tsdbTSnapRangeInitRef(STsdb *pTsdb, const STFileSet *fset1, STSnapRange **fsr) { + fsr[0] = taosMemoryCalloc(1, sizeof(STSnapRange)); + if (fsr[0] == NULL) return TSDB_CODE_OUT_OF_MEMORY; + + fsr[0]->fid = fset1->fid; + // fsr[0]->sver = sver; + // fsr[0]->ever = ever; + return tsdbTFileSetInitRef(pTsdb, fset1, &fsr[0]->fset); +} + int32_t tsdbTFileSetInitRef(STsdb *pTsdb, const STFileSet *fset1, STFileSet **fset) { int32_t code = tsdbTFileSetInit(fset1->fid, fset); if (code) return code; @@ -485,6 +495,15 @@ int32_t tsdbTFileSetInitRef(STsdb *pTsdb, const STFileSet *fset1, STFileSet **fs return 0; } +int32_t tsdbTSnapRangeClear(STSnapRange **fsr) { + if (!fsr[0]) return 0; + + tsdbTFileSetClear(&fsr[0]->fset); + taosMemoryFree(fsr[0]); + fsr[0] = NULL; + return 0; +} + int32_t tsdbTFileSetClear(STFileSet **fset) { if (!fset[0]) return 0; @@ -545,4 +564,4 @@ bool tsdbTFileSetIsEmpty(const STFileSet *fset) { if (fset->farr[ftype] != NULL) return false; } return TARRAY2_SIZE(fset->lvlArr) == 0; -} \ No newline at end of file +} diff --git a/source/dnode/vnode/src/tsdb/tsdbFSet2.h b/source/dnode/vnode/src/tsdb/tsdbFSet2.h index d7b3c1fc8c..da5c4cbcc3 100644 --- a/source/dnode/vnode/src/tsdb/tsdbFSet2.h +++ b/source/dnode/vnode/src/tsdb/tsdbFSet2.h @@ -45,6 +45,10 @@ int32_t tsdbTFileSetInitDup(STsdb *pTsdb, const STFileSet *fset1, STFileSet **fs int32_t tsdbTFileSetInitRef(STsdb *pTsdb, const STFileSet *fset1, STFileSet **fset); int32_t tsdbTFileSetClear(STFileSet **fset); int32_t tsdbTFileSetRemove(STFileSet **fset); + +int32_t tsdbTSnapRangeInitRef(STsdb *pTsdb, const STFileSet *fset1, STSnapRange **fsr); +int32_t tsdbTSnapRangeClear(STSnapRange **fsr); + // to/from json int32_t tsdbTFileSetToJson(const STFileSet *fset, cJSON *json); int32_t tsdbJsonToTFileSet(STsdb *pTsdb, const cJSON *json, STFileSet **fset); @@ -78,8 +82,15 @@ struct STFileSet { TSttLvlArray lvlArr[1]; // level array }; +struct STSnapRange { + int32_t fid; + int64_t sver; + int64_t ever; + STFileSet *fset; +}; + #ifdef __cplusplus } #endif -#endif /*_TSDB_FILE_SET2_H*/ \ No newline at end of file +#endif /*_TSDB_FILE_SET2_H*/ diff --git a/source/dnode/vnode/src/tsdb/tsdbFile2.h b/source/dnode/vnode/src/tsdb/tsdbFile2.h index 11d08e45e6..33d8ac5478 100644 --- a/source/dnode/vnode/src/tsdb/tsdbFile2.h +++ b/source/dnode/vnode/src/tsdb/tsdbFile2.h @@ -61,6 +61,8 @@ struct STFile { int32_t fid; // file id int64_t cid; // commit id int64_t size; + int64_t minVer; + int64_t maxVer; union { struct { int32_t level; @@ -80,4 +82,4 @@ struct STFileObj { } #endif -#endif /*_TSDB_FILE_H*/ \ No newline at end of file +#endif /*_TSDB_FILE_H*/ diff --git a/source/dnode/vnode/src/tsdb/tsdbSnapshot.c b/source/dnode/vnode/src/tsdb/tsdbSnapshot.c index e4011ca400..e016654b4b 100644 --- a/source/dnode/vnode/src/tsdb/tsdbSnapshot.c +++ b/source/dnode/vnode/src/tsdb/tsdbSnapshot.c @@ -32,12 +32,12 @@ struct STsdbSnapReader { uint8_t* aBuf[5]; SSkmInfo skmTb[1]; - TFileSetArray* fsetArr; + TSnapRangeArray* fsrArr; // context struct { - int32_t fsetArrIdx; - STFileSet* fset; + int32_t fsrArrIdx; + STSnapRange* fsr; bool isDataDone; bool isTombDone; } ctx[1]; @@ -72,10 +72,10 @@ static int32_t tsdbSnapReadFileSetOpenReader(STsdbSnapReader* reader) { }; bool hasDataFile = false; for (int32_t ftype = 0; ftype < TSDB_FTYPE_MAX; ftype++) { - if (reader->ctx->fset->farr[ftype] != NULL) { + if (reader->ctx->fsr->fset->farr[ftype] != NULL) { hasDataFile = true; config.files[ftype].exist = true; - config.files[ftype].file = reader->ctx->fset->farr[ftype]->f[0]; + config.files[ftype].file = reader->ctx->fsr->fset->farr[ftype]->f[0]; } } @@ -86,7 +86,7 @@ static int32_t tsdbSnapReadFileSetOpenReader(STsdbSnapReader* reader) { // stt SSttLvl* lvl; - TARRAY2_FOREACH(reader->ctx->fset->lvlArr, lvl) { + TARRAY2_FOREACH(reader->ctx->fsr->fset->lvlArr, lvl) { STFileObj* fobj; TARRAY2_FOREACH(lvl->fobjArr, fobj) { SSttFileReader* sttReader; @@ -211,14 +211,14 @@ static int32_t tsdbSnapReadFileSetCloseIter(STsdbSnapReader* reader) { return 0; } -static int32_t tsdbSnapReadFileSetBegin(STsdbSnapReader* reader) { +static int32_t tsdbSnapReadRangeBegin(STsdbSnapReader* reader) { int32_t code = 0; int32_t lino = 0; - ASSERT(reader->ctx->fset == NULL); + ASSERT(reader->ctx->fsr == NULL); - if (reader->ctx->fsetArrIdx < TARRAY2_SIZE(reader->fsetArr)) { - reader->ctx->fset = TARRAY2_GET(reader->fsetArr, reader->ctx->fsetArrIdx++); + if (reader->ctx->fsrArrIdx < TARRAY2_SIZE(reader->fsrArr)) { + reader->ctx->fsr = TARRAY2_GET(reader->fsrArr, reader->ctx->fsrArrIdx++); reader->ctx->isDataDone = false; reader->ctx->isTombDone = false; @@ -236,10 +236,10 @@ _exit: return code; } -static int32_t tsdbSnapReadFileSetEnd(STsdbSnapReader* reader) { +static int32_t tsdbSnapReadRangeEnd(STsdbSnapReader* reader) { tsdbSnapReadFileSetCloseIter(reader); tsdbSnapReadFileSetCloseReader(reader); - reader->ctx->fset = NULL; + reader->ctx->fsr = NULL; return 0; } @@ -424,17 +424,14 @@ int32_t tsdbSnapReaderOpen(STsdb* tsdb, int64_t sver, int64_t ever, int8_t type, reader[0]->ever = ever; reader[0]->type = type; - taosThreadRwlockRdlock(&tsdb->rwLock); - code = tsdbFSCreateRefSnapshot(tsdb->pFS, &reader[0]->fsetArr); - taosThreadRwlockUnlock(&tsdb->rwLock); - + code = tsdbFSCreateRefRangedSnapshot(tsdb->pFS, &reader[0]->fsrArr); TSDB_CHECK_CODE(code, lino, _exit); _exit: if (code) { tsdbError("vgId:%d %s failed at line %d since %s, sver:%" PRId64 " ever:%" PRId64 " type:%d", TD_VID(tsdb->pVnode), __func__, lino, tstrerror(code), sver, ever, type); - tsdbFSDestroyRefSnapshot(&reader[0]->fsetArr); + tsdbFSDestroyRefRangedSnapshot(&reader[0]->fsrArr); taosMemoryFree(reader[0]); reader[0] = NULL; } else { @@ -462,7 +459,7 @@ int32_t tsdbSnapReaderClose(STsdbSnapReader** reader) { TARRAY2_DESTROY(reader[0]->sttReaderArr, tsdbSttFileReaderClose); tsdbDataFileReaderClose(&reader[0]->dataReader); - tsdbFSDestroyRefSnapshot(&reader[0]->fsetArr); + tsdbFSDestroyRefRangedSnapshot(&reader[0]->fsrArr); tDestroyTSchema(reader[0]->skmTb->pTSchema); for (int32_t i = 0; i < ARRAY_SIZE(reader[0]->aBuf); ++i) { @@ -488,11 +485,11 @@ int32_t tsdbSnapRead(STsdbSnapReader* reader, uint8_t** data) { data[0] = NULL; for (;;) { - if (reader->ctx->fset == NULL) { - code = tsdbSnapReadFileSetBegin(reader); + if (reader->ctx->fsr == NULL) { + code = tsdbSnapReadRangeBegin(reader); TSDB_CHECK_CODE(code, lino, _exit); - if (reader->ctx->fset == NULL) { + if (reader->ctx->fsr == NULL) { break; } } @@ -517,7 +514,7 @@ int32_t tsdbSnapRead(STsdbSnapReader* reader, uint8_t** data) { } } - code = tsdbSnapReadFileSetEnd(reader); + code = tsdbSnapReadRangeEnd(reader); TSDB_CHECK_CODE(code, lino, _exit); } From 1be2835fc1c3e0e71d226829b21c58697a79a2c8 Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Thu, 24 Aug 2023 16:17:50 +0800 Subject: [PATCH 02/61] enh: exclude a list of snap ranges in tsdbSnapReaderOpen --- source/dnode/vnode/src/inc/vnodeInt.h | 3 ++- source/dnode/vnode/src/sma/smaSnapshot.c | 2 +- source/dnode/vnode/src/tsdb/tsdbFS2.c | 10 +++++++--- source/dnode/vnode/src/tsdb/tsdbFS2.h | 3 ++- source/dnode/vnode/src/tsdb/tsdbFSet2.c | 17 +++++++++++------ source/dnode/vnode/src/tsdb/tsdbFSet2.h | 2 +- source/dnode/vnode/src/tsdb/tsdbSnapshot.c | 4 ++-- source/dnode/vnode/src/vnd/vnodeSnapshot.c | 6 +++++- 8 files changed, 31 insertions(+), 16 deletions(-) diff --git a/source/dnode/vnode/src/inc/vnodeInt.h b/source/dnode/vnode/src/inc/vnodeInt.h index 823e9d57f6..cc355c5f32 100644 --- a/source/dnode/vnode/src/inc/vnodeInt.h +++ b/source/dnode/vnode/src/inc/vnodeInt.h @@ -295,7 +295,8 @@ int32_t metaSnapWriterOpen(SMeta* pMeta, int64_t sver, int64_t ever, SMetaSnapWr int32_t metaSnapWrite(SMetaSnapWriter* pWriter, uint8_t* pData, uint32_t nData); int32_t metaSnapWriterClose(SMetaSnapWriter** ppWriter, int8_t rollback); // STsdbSnapReader ======================================== -int32_t tsdbSnapReaderOpen(STsdb* pTsdb, int64_t sver, int64_t ever, int8_t type, STsdbSnapReader** ppReader); +int32_t tsdbSnapReaderOpen(STsdb* pTsdb, int64_t sver, int64_t ever, int8_t type, void* pEx, + STsdbSnapReader** ppReader); int32_t tsdbSnapReaderClose(STsdbSnapReader** ppReader); int32_t tsdbSnapRead(STsdbSnapReader* pReader, uint8_t** ppData); // STsdbSnapWriter ======================================== diff --git a/source/dnode/vnode/src/sma/smaSnapshot.c b/source/dnode/vnode/src/sma/smaSnapshot.c index e01a33936b..2a010f5a84 100644 --- a/source/dnode/vnode/src/sma/smaSnapshot.c +++ b/source/dnode/vnode/src/sma/smaSnapshot.c @@ -48,7 +48,7 @@ int32_t rsmaSnapReaderOpen(SSma* pSma, int64_t sver, int64_t ever, SRSmaSnapRead // open rsma1/rsma2 for (int32_t i = 0; i < TSDB_RETENTION_L2; ++i) { if (pSma->pRSmaTsdb[i]) { - code = tsdbSnapReaderOpen(pSma->pRSmaTsdb[i], sver, ever, i == 0 ? SNAP_DATA_RSMA1 : SNAP_DATA_RSMA2, + code = tsdbSnapReaderOpen(pSma->pRSmaTsdb[i], sver, ever, (i == 0 ? SNAP_DATA_RSMA1 : SNAP_DATA_RSMA2), NULL, &pReader->pDataReader[i]); TSDB_CHECK_CODE(code, lino, _exit); } diff --git a/source/dnode/vnode/src/tsdb/tsdbFS2.c b/source/dnode/vnode/src/tsdb/tsdbFS2.c index cae6f06636..0a8bb4c37a 100644 --- a/source/dnode/vnode/src/tsdb/tsdbFS2.c +++ b/source/dnode/vnode/src/tsdb/tsdbFS2.c @@ -991,25 +991,29 @@ int32_t tsdbFSDestroyRefSnapshot(TFileSetArray **fsetArr) { return 0; } -int32_t tsdbFSCreateRefRangedSnapshot(STFileSystem *fs, TSnapRangeArray **fsrArr) { +int32_t tsdbFSCreateRefRangedSnapshot(STFileSystem *fs, int64_t sver, int64_t ever, TSnapRangeArray *pEx, + TSnapRangeArray **fsrArr) { int32_t code = 0; STFileSet *fset; - STSnapRange *fsr1; + STSnapRange *fsr1 = NULL; fsrArr[0] = taosMemoryCalloc(1, sizeof(*fsrArr[0])); if (fsrArr[0] == NULL) return TSDB_CODE_OUT_OF_MEMORY; taosThreadRwlockRdlock(&fs->tsdb->rwLock); TARRAY2_FOREACH(fs->fSetArr, fset) { - code = tsdbTSnapRangeInitRef(fs->tsdb, fset, &fsr1); + code = tsdbTSnapRangeInitRef(fs->tsdb, fset, sver, ever, &fsr1); if (code) break; code = TARRAY2_APPEND(fsrArr[0], fsr1); if (code) break; + + fsr1 = NULL; } taosThreadRwlockUnlock(&fs->tsdb->rwLock); if (code) { + tsdbTSnapRangeClear(&fsr1); TARRAY2_DESTROY(fsrArr[0], tsdbTSnapRangeClear); fsrArr[0] = NULL; } diff --git a/source/dnode/vnode/src/tsdb/tsdbFS2.h b/source/dnode/vnode/src/tsdb/tsdbFS2.h index 4b7e0bba2c..8dfc86ee83 100644 --- a/source/dnode/vnode/src/tsdb/tsdbFS2.h +++ b/source/dnode/vnode/src/tsdb/tsdbFS2.h @@ -54,7 +54,8 @@ int32_t tsdbFSDestroyCopySnapshot(TFileSetArray **fsetArr); int32_t tsdbFSCreateRefSnapshot(STFileSystem *fs, TFileSetArray **fsetArr); int32_t tsdbFSDestroyRefSnapshot(TFileSetArray **fsrArr); -int32_t tsdbFSCreateRefRangedSnapshot(STFileSystem *fs, TSnapRangeArray **fsrArr); +int32_t tsdbFSCreateRefRangedSnapshot(STFileSystem *fs, int64_t sver, int64_t ever, TSnapRangeArray *pEx, + TSnapRangeArray **fsrArr); int32_t tsdbFSDestroyRefRangedSnapshot(TSnapRangeArray **fsrArr); // txn int64_t tsdbFSAllocEid(STFileSystem *fs); diff --git a/source/dnode/vnode/src/tsdb/tsdbFSet2.c b/source/dnode/vnode/src/tsdb/tsdbFSet2.c index 67b6d8f003..e0434b7da6 100644 --- a/source/dnode/vnode/src/tsdb/tsdbFSet2.c +++ b/source/dnode/vnode/src/tsdb/tsdbFSet2.c @@ -458,14 +458,19 @@ int32_t tsdbTFileSetInitDup(STsdb *pTsdb, const STFileSet *fset1, STFileSet **fs return 0; } -int32_t tsdbTSnapRangeInitRef(STsdb *pTsdb, const STFileSet *fset1, STSnapRange **fsr) { - fsr[0] = taosMemoryCalloc(1, sizeof(STSnapRange)); +int32_t tsdbTSnapRangeInitRef(STsdb *pTsdb, const STFileSet *fset1, int64_t sver, int64_t ever, STSnapRange **fsr) { + fsr[0] = taosMemoryCalloc(1, sizeof(*fsr[0])); if (fsr[0] == NULL) return TSDB_CODE_OUT_OF_MEMORY; - fsr[0]->fid = fset1->fid; - // fsr[0]->sver = sver; - // fsr[0]->ever = ever; - return tsdbTFileSetInitRef(pTsdb, fset1, &fsr[0]->fset); + fsr[0]->sver = sver; + fsr[0]->ever = ever; + + int32_t code = tsdbTFileSetInitRef(pTsdb, fset1, &fsr[0]->fset); + if (code) { + taosMemoryFree(fsr[0]); + fsr[0] = NULL; + } + return code; } int32_t tsdbTFileSetInitRef(STsdb *pTsdb, const STFileSet *fset1, STFileSet **fset) { diff --git a/source/dnode/vnode/src/tsdb/tsdbFSet2.h b/source/dnode/vnode/src/tsdb/tsdbFSet2.h index da5c4cbcc3..8155328e70 100644 --- a/source/dnode/vnode/src/tsdb/tsdbFSet2.h +++ b/source/dnode/vnode/src/tsdb/tsdbFSet2.h @@ -46,7 +46,7 @@ int32_t tsdbTFileSetInitRef(STsdb *pTsdb, const STFileSet *fset1, STFileSet **fs int32_t tsdbTFileSetClear(STFileSet **fset); int32_t tsdbTFileSetRemove(STFileSet **fset); -int32_t tsdbTSnapRangeInitRef(STsdb *pTsdb, const STFileSet *fset1, STSnapRange **fsr); +int32_t tsdbTSnapRangeInitRef(STsdb *pTsdb, const STFileSet *fset1, int64_t sver, int64_t ever, STSnapRange **fsr); int32_t tsdbTSnapRangeClear(STSnapRange **fsr); // to/from json diff --git a/source/dnode/vnode/src/tsdb/tsdbSnapshot.c b/source/dnode/vnode/src/tsdb/tsdbSnapshot.c index e016654b4b..618e10195b 100644 --- a/source/dnode/vnode/src/tsdb/tsdbSnapshot.c +++ b/source/dnode/vnode/src/tsdb/tsdbSnapshot.c @@ -412,7 +412,7 @@ _exit: return code; } -int32_t tsdbSnapReaderOpen(STsdb* tsdb, int64_t sver, int64_t ever, int8_t type, STsdbSnapReader** reader) { +int32_t tsdbSnapReaderOpen(STsdb* tsdb, int64_t sver, int64_t ever, int8_t type, void* pEx, STsdbSnapReader** reader) { int32_t code = 0; int32_t lino = 0; @@ -424,7 +424,7 @@ int32_t tsdbSnapReaderOpen(STsdb* tsdb, int64_t sver, int64_t ever, int8_t type, reader[0]->ever = ever; reader[0]->type = type; - code = tsdbFSCreateRefRangedSnapshot(tsdb->pFS, &reader[0]->fsrArr); + code = tsdbFSCreateRefRangedSnapshot(tsdb->pFS, sver, ever, (TSnapRangeArray*)pEx, &reader[0]->fsrArr); TSDB_CHECK_CODE(code, lino, _exit); _exit: diff --git a/source/dnode/vnode/src/vnd/vnodeSnapshot.c b/source/dnode/vnode/src/vnd/vnodeSnapshot.c index 3abcf79839..b89fed73f8 100644 --- a/source/dnode/vnode/src/vnd/vnodeSnapshot.c +++ b/source/dnode/vnode/src/vnd/vnodeSnapshot.c @@ -14,6 +14,7 @@ */ #include "vnd.h" +#include "tsdb.h" // SVSnapReader ======================================================== struct SVSnapReader { @@ -28,6 +29,7 @@ struct SVSnapReader { SMetaSnapReader *pMetaReader; // tsdb int8_t tsdbDone; + TSnapRangeArray *pEx; STsdbSnapReader *pTsdbReader; // tq int8_t tqHandleDone; @@ -59,6 +61,8 @@ int32_t vnodeSnapReaderOpen(SVnode *pVnode, int64_t sver, int64_t ever, SVSnapRe pReader->sver = sver; pReader->ever = ever; + // TODO: pReader->pEx + vInfo("vgId:%d, vnode snapshot reader opened, sver:%" PRId64 " ever:%" PRId64, TD_VID(pVnode), sver, ever); *ppReader = pReader; return code; @@ -175,7 +179,7 @@ int32_t vnodeSnapRead(SVSnapReader *pReader, uint8_t **ppData, uint32_t *nData) if (!pReader->tsdbDone) { // open if not if (pReader->pTsdbReader == NULL) { - code = tsdbSnapReaderOpen(pReader->pVnode->pTsdb, pReader->sver, pReader->ever, SNAP_DATA_TSDB, + code = tsdbSnapReaderOpen(pReader->pVnode->pTsdb, pReader->sver, pReader->ever, SNAP_DATA_TSDB, pReader->pEx, &pReader->pTsdbReader); if (code) goto _err; } From 7a13e4b2cf0c454cd92c635b96ecdafb67771ddd Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Fri, 25 Aug 2023 11:50:36 +0800 Subject: [PATCH 03/61] enh: add minVer and maxVer for entries in current.json --- source/dnode/vnode/src/tsdb/tsdbFile2.c | 26 ++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/source/dnode/vnode/src/tsdb/tsdbFile2.c b/source/dnode/vnode/src/tsdb/tsdbFile2.c index 3d8964d41b..963c5bad34 100644 --- a/source/dnode/vnode/src/tsdb/tsdbFile2.c +++ b/source/dnode/vnode/src/tsdb/tsdbFile2.c @@ -76,6 +76,17 @@ static int32_t tfile_to_json(const STFile *file, cJSON *json) { return TSDB_CODE_OUT_OF_MEMORY; } + if (file->minVer <= file->maxVer) { + /* minVer */ + if (cJSON_AddNumberToObject(json, "minVer", file->minVer) == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; + } + + /* maxVer */ + if (cJSON_AddNumberToObject(json, "maxVer", file->maxVer) == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; + } + } return 0; } @@ -122,6 +133,19 @@ static int32_t tfile_from_json(const cJSON *json, STFile *file) { return TSDB_CODE_FILE_CORRUPTED; } + /* minVer */ + file->minVer = VERSION_MAX; + item = cJSON_GetObjectItem(json, "minVer"); + if (cJSON_IsNumber(item)) { + file->minVer = item->valuedouble; + } + + /* maxVer */ + file->maxVer = VERSION_MIN; + item = cJSON_GetObjectItem(json, "maxVer"); + if (cJSON_IsNumber(item)) { + file->maxVer = item->valuedouble; + } return 0; } @@ -296,4 +320,4 @@ int32_t tsdbTFileObjCmpr(const STFileObj **fobj1, const STFileObj **fobj2) { } else { return 0; } -} \ No newline at end of file +} From da28d490aab664086feaafcf88b2e0428f21b6ca Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Fri, 25 Aug 2023 11:59:59 +0800 Subject: [PATCH 04/61] enh: filterByVersion with snap ranges --- source/dnode/vnode/src/tsdb/tsdbSnapshot.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/source/dnode/vnode/src/tsdb/tsdbSnapshot.c b/source/dnode/vnode/src/tsdb/tsdbSnapshot.c index 618e10195b..b604256e89 100644 --- a/source/dnode/vnode/src/tsdb/tsdbSnapshot.c +++ b/source/dnode/vnode/src/tsdb/tsdbSnapshot.c @@ -138,8 +138,8 @@ static int32_t tsdbSnapReadFileSetOpenIter(STsdbSnapReader* reader) { STsdbIter* iter; STsdbIterConfig config = { .filterByVersion = true, - .verRange[0] = reader->sver, - .verRange[1] = reader->ever, + .verRange[0] = reader->ctx->fsr->sver, + .verRange[1] = reader->ctx->fsr->ever, }; // data file From ab7a20c1171a847da18c4e9bef8b030c83bfa9ae Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Fri, 25 Aug 2023 19:04:10 +0800 Subject: [PATCH 05/61] enh: write stt and data files with minVer and maxVer --- source/dnode/vnode/src/inc/vnodeInt.h | 6 ++++++ source/dnode/vnode/src/tsdb/tsdbCommit2.c | 8 ++++++++ source/dnode/vnode/src/tsdb/tsdbDataFileRW.c | 8 ++++++++ source/dnode/vnode/src/tsdb/tsdbDataFileRW.h | 4 +++- source/dnode/vnode/src/tsdb/tsdbFSetRW.c | 6 +++++- source/dnode/vnode/src/tsdb/tsdbFSetRW.h | 4 +++- source/dnode/vnode/src/tsdb/tsdbMerge.c | 1 + source/dnode/vnode/src/tsdb/tsdbSttFileRW.c | 2 ++ source/dnode/vnode/src/tsdb/tsdbSttFileRW.h | 4 +++- source/dnode/vnode/src/vnd/vnodeCommit.c | 3 +++ 10 files changed, 42 insertions(+), 4 deletions(-) diff --git a/source/dnode/vnode/src/inc/vnodeInt.h b/source/dnode/vnode/src/inc/vnodeInt.h index cc355c5f32..7a242b55cf 100644 --- a/source/dnode/vnode/src/inc/vnodeInt.h +++ b/source/dnode/vnode/src/inc/vnodeInt.h @@ -527,10 +527,16 @@ struct SSnapDataHdr { uint8_t data[]; }; +typedef struct SRange { + int64_t start; + int64_t end; +} SRange; + struct SCommitInfo { SVnodeInfo info; SVnode* pVnode; TXN* txn; + SRange vers; }; struct SCompactInfo { diff --git a/source/dnode/vnode/src/tsdb/tsdbCommit2.c b/source/dnode/vnode/src/tsdb/tsdbCommit2.c index 79964c5636..4e096a7f17 100644 --- a/source/dnode/vnode/src/tsdb/tsdbCommit2.c +++ b/source/dnode/vnode/src/tsdb/tsdbCommit2.c @@ -43,6 +43,8 @@ typedef struct { SDiskID did; TSKEY minKey; TSKEY maxKey; + int64_t minVer; + int64_t maxVer; STFileSet *fset; TABLEID tbid[1]; bool hasTSData; @@ -74,6 +76,8 @@ static int32_t tsdbCommitOpenWriter(SCommitter2 *committer) { .szPage = committer->szPage, .cmprAlg = committer->cmprAlg, .fid = committer->ctx->fid, + .minVer = committer->ctx->minVer, + .maxVer = committer->ctx->maxVer, .cid = committer->ctx->cid, .did = committer->ctx->did, .level = 0, @@ -87,6 +91,8 @@ static int32_t tsdbCommitOpenWriter(SCommitter2 *committer) { if (committer->ctx->fset->farr[ftype] != NULL) { config.files[ftype].exist = true; config.files[ftype].file = committer->ctx->fset->farr[ftype]->f[0]; + config.files[ftype].file.minVer = TMIN(config.files[ftype].file.minVer, config.minVer); + config.files[ftype].file.maxVer = TMAX(config.files[ftype].file.maxVer, config.maxVer); } } } @@ -463,6 +469,8 @@ static int32_t tsdbOpenCommitter(STsdb *tsdb, SCommitInfo *info, SCommitter2 *co committer->compactVersion = INT64_MAX; committer->ctx->cid = tsdbFSAllocEid(tsdb->pFS); committer->ctx->now = taosGetTimestampSec(); + committer->ctx->minVer = info->vers.start; + committer->ctx->maxVer = info->vers.end; committer->ctx->nextKey = tsdb->imem->minKey; if (tsdb->imem->nDel > 0) { diff --git a/source/dnode/vnode/src/tsdb/tsdbDataFileRW.c b/source/dnode/vnode/src/tsdb/tsdbDataFileRW.c index 6e4cb517ff..3265bb7cc7 100644 --- a/source/dnode/vnode/src/tsdb/tsdbDataFileRW.c +++ b/source/dnode/vnode/src/tsdb/tsdbDataFileRW.c @@ -589,6 +589,8 @@ static int32_t tsdbDataFileWriterDoOpen(SDataFileWriter *writer) { .fid = writer->config->fid, .cid = writer->config->cid, .size = 0, + .minVer = writer->config->minVer, + .maxVer = writer->config->maxVer, }; // .data @@ -602,6 +604,8 @@ static int32_t tsdbDataFileWriterDoOpen(SDataFileWriter *writer) { .fid = writer->config->fid, .cid = writer->config->cid, .size = 0, + .minVer = writer->config->minVer, + .maxVer = writer->config->maxVer, }; } @@ -616,6 +620,8 @@ static int32_t tsdbDataFileWriterDoOpen(SDataFileWriter *writer) { .fid = writer->config->fid, .cid = writer->config->cid, .size = 0, + .minVer = writer->config->minVer, + .maxVer = writer->config->maxVer, }; } @@ -627,6 +633,8 @@ static int32_t tsdbDataFileWriterDoOpen(SDataFileWriter *writer) { .fid = writer->config->fid, .cid = writer->config->cid, .size = 0, + .minVer = writer->config->minVer, + .maxVer = writer->config->maxVer, }; writer->ctx->opened = true; diff --git a/source/dnode/vnode/src/tsdb/tsdbDataFileRW.h b/source/dnode/vnode/src/tsdb/tsdbDataFileRW.h index 827b58fb4a..e87c00d382 100644 --- a/source/dnode/vnode/src/tsdb/tsdbDataFileRW.h +++ b/source/dnode/vnode/src/tsdb/tsdbDataFileRW.h @@ -76,6 +76,8 @@ typedef struct SDataFileWriterConfig { int32_t maxRow; int32_t szPage; int32_t fid; + int64_t minVer; + int64_t maxVer; int64_t cid; SDiskID did; int64_t compactVersion; @@ -101,4 +103,4 @@ int32_t tsdbDataFileWriteTombRecord(SDataFileWriter *writer, const STombRecord * } #endif -#endif /*_TSDB_DATA_FILE_RW_H*/ \ No newline at end of file +#endif /*_TSDB_DATA_FILE_RW_H*/ diff --git a/source/dnode/vnode/src/tsdb/tsdbFSetRW.c b/source/dnode/vnode/src/tsdb/tsdbFSetRW.c index 83ae8c2429..801ae59838 100644 --- a/source/dnode/vnode/src/tsdb/tsdbFSetRW.c +++ b/source/dnode/vnode/src/tsdb/tsdbFSetRW.c @@ -143,6 +143,8 @@ int32_t tsdbFSetWriterOpen(SFSetWriterConfig *config, SFSetWriter **writer) { .maxRow = config->maxRow, .szPage = config->szPage, .fid = config->fid, + .minVer = config->minVer, + .maxVer = config->maxVer, .cid = config->cid, .did = config->did, .compactVersion = config->compactVersion, @@ -168,6 +170,8 @@ int32_t tsdbFSetWriterOpen(SFSetWriterConfig *config, SFSetWriter **writer) { .compactVersion = config->compactVersion, .did = config->did, .fid = config->fid, + .minVer = config->minVer, + .maxVer = config->maxVer, .cid = config->cid, .level = config->level, .skmTb = writer[0]->skmTb, @@ -292,4 +296,4 @@ _exit: TSDB_ERROR_LOG(TD_VID(writer->config->tsdb->pVnode), lino, code); } return code; -} \ No newline at end of file +} diff --git a/source/dnode/vnode/src/tsdb/tsdbFSetRW.h b/source/dnode/vnode/src/tsdb/tsdbFSetRW.h index b5710407cf..a733bb3c44 100644 --- a/source/dnode/vnode/src/tsdb/tsdbFSetRW.h +++ b/source/dnode/vnode/src/tsdb/tsdbFSetRW.h @@ -34,6 +34,8 @@ typedef struct { int32_t szPage; int8_t cmprAlg; int32_t fid; + int64_t minVer; + int64_t maxVer; int64_t cid; SDiskID did; int32_t level; @@ -52,4 +54,4 @@ int32_t tsdbFSetWriteTombRecord(SFSetWriter *writer, const STombRecord *tombReco } #endif -#endif /*_TSDB_FSET_RW_H*/ \ No newline at end of file +#endif /*_TSDB_FSET_RW_H*/ diff --git a/source/dnode/vnode/src/tsdb/tsdbMerge.c b/source/dnode/vnode/src/tsdb/tsdbMerge.c index 42a8b5bb3f..e659cedba3 100644 --- a/source/dnode/vnode/src/tsdb/tsdbMerge.c +++ b/source/dnode/vnode/src/tsdb/tsdbMerge.c @@ -313,6 +313,7 @@ static int32_t tsdbMergeFileSetBeginOpenWriter(SMerger *merger) { if (merger->ctx->fset->farr[ftype]) { config.files[ftype].exist = true; config.files[ftype].file = merger->ctx->fset->farr[ftype]->f[0]; + } else { config.files[ftype].exist = false; } diff --git a/source/dnode/vnode/src/tsdb/tsdbSttFileRW.c b/source/dnode/vnode/src/tsdb/tsdbSttFileRW.c index fa8d2d5ba4..4f1eb49959 100644 --- a/source/dnode/vnode/src/tsdb/tsdbSttFileRW.c +++ b/source/dnode/vnode/src/tsdb/tsdbSttFileRW.c @@ -694,6 +694,8 @@ static int32_t tsdbSttFWriterDoOpen(SSttFileWriter *writer) { .fid = writer->config->fid, .cid = writer->config->cid, .size = 0, + .minVer = writer->config->minVer, + .maxVer = writer->config->maxVer, .stt[0] = { .level = writer->config->level, diff --git a/source/dnode/vnode/src/tsdb/tsdbSttFileRW.h b/source/dnode/vnode/src/tsdb/tsdbSttFileRW.h index 242b55795c..d0481d5ec3 100644 --- a/source/dnode/vnode/src/tsdb/tsdbSttFileRW.h +++ b/source/dnode/vnode/src/tsdb/tsdbSttFileRW.h @@ -79,6 +79,8 @@ struct SSttFileWriterConfig { int64_t compactVersion; SDiskID did; int32_t fid; + int64_t minVer; + int64_t maxVer; int64_t cid; int32_t level; SSkmInfo *skmTb; @@ -90,4 +92,4 @@ struct SSttFileWriterConfig { } #endif -#endif /*_TSDB_STT_FILE_RW_H*/ \ No newline at end of file +#endif /*_TSDB_STT_FILE_RW_H*/ diff --git a/source/dnode/vnode/src/vnd/vnodeCommit.c b/source/dnode/vnode/src/vnd/vnodeCommit.c index 136168c5cc..775b298268 100644 --- a/source/dnode/vnode/src/vnd/vnodeCommit.c +++ b/source/dnode/vnode/src/vnd/vnodeCommit.c @@ -285,6 +285,7 @@ static int32_t vnodePrepareCommit(SVnode *pVnode, SCommitInfo *pInfo) { int32_t code = 0; int32_t lino = 0; char dir[TSDB_FILENAME_LEN] = {0}; + int64_t lastCommitted = pInfo->info.state.committed; tsem_wait(&pVnode->canCommit); @@ -296,6 +297,8 @@ static int32_t vnodePrepareCommit(SVnode *pVnode, SCommitInfo *pInfo) { pInfo->info.state.committed = pVnode->state.applied; pInfo->info.state.commitTerm = pVnode->state.applyTerm; pInfo->info.state.commitID = ++pVnode->state.commitID; + pInfo->vers.start = lastCommitted + 1; + pInfo->vers.end = pInfo->info.state.committed; pInfo->pVnode = pVnode; pInfo->txn = metaGetTxn(pVnode->pMeta); From c8a46394f1774da2ba857d46279ade0f62924798 Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Fri, 25 Aug 2023 19:05:03 +0800 Subject: [PATCH 06/61] enh: set default values of minVer and maxVer while upgrading from CURRENT --- source/dnode/vnode/src/tsdb/tsdbUpgrade.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/source/dnode/vnode/src/tsdb/tsdbUpgrade.c b/source/dnode/vnode/src/tsdb/tsdbUpgrade.c index 0884c32385..225822ed97 100644 --- a/source/dnode/vnode/src/tsdb/tsdbUpgrade.c +++ b/source/dnode/vnode/src/tsdb/tsdbUpgrade.c @@ -78,6 +78,8 @@ static int32_t tsdbUpgradeHead(STsdb *tsdb, SDFileSet *pDFileSet, SDataFReader * .fid = fset->fid, .cid = pDFileSet->pHeadF->commitID, .size = pDFileSet->pHeadF->size, + .minVer = VERSION_MAX, + .maxVer = VERSION_MIN, }; code = tsdbTFileObjInit(tsdb, &file, &fset->farr[TSDB_FTYPE_HEAD]); @@ -182,6 +184,8 @@ static int32_t tsdbUpgradeData(STsdb *tsdb, SDFileSet *pDFileSet, SDataFReader * .fid = fset->fid, .cid = pDFileSet->pDataF->commitID, .size = pDFileSet->pDataF->size, + .minVer = VERSION_MAX, + .maxVer = VERSION_MIN, }; code = tsdbTFileObjInit(tsdb, &file, &fset->farr[TSDB_FTYPE_DATA]); @@ -208,6 +212,8 @@ static int32_t tsdbUpgradeSma(STsdb *tsdb, SDFileSet *pDFileSet, SDataFReader *r .fid = fset->fid, .cid = pDFileSet->pSmaF->commitID, .size = pDFileSet->pSmaF->size, + .minVer = VERSION_MAX, + .maxVer = VERSION_MIN, }; code = tsdbTFileObjInit(tsdb, &file, &fset->farr[TSDB_FTYPE_SMA]); @@ -253,6 +259,8 @@ static int32_t tsdbUpgradeSttFile(STsdb *tsdb, SDFileSet *pDFileSet, SDataFReade .fid = fset->fid, .cid = pSttF->commitID, .size = pSttF->size, + .minVer = VERSION_MAX, + .maxVer = VERSION_MIN, }; code = tsdbTFileObjInit(tsdb, &file, &fobj); TSDB_CHECK_CODE(code, lino, _exit1); @@ -382,6 +390,8 @@ static int32_t tsdbUpgradeOpenTombFile(STsdb *tsdb, STFileSet *fset, STsdbFD **f .fid = fset->fid, .cid = 0, .size = 0, + .minVer = VERSION_MAX, + .maxVer = VERSION_MIN, }; code = tsdbTFileObjInit(tsdb, &file, fobj); @@ -398,6 +408,8 @@ static int32_t tsdbUpgradeOpenTombFile(STsdb *tsdb, STFileSet *fset, STsdbFD **f .fid = fset->fid, .cid = 0, .size = 0, + .minVer = VERSION_MAX, + .maxVer = VERSION_MIN, }; code = tsdbTFileObjInit(tsdb, &file, fobj); From 1446b4473328934d2f5967a03328c6c4fa573a0e Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Thu, 31 Aug 2023 10:25:53 +0800 Subject: [PATCH 07/61] fixup: remove minVer and maxVer in FSetWriterConfig --- source/dnode/vnode/src/inc/vnodeInt.h | 6 ------ source/dnode/vnode/src/tsdb/tsdbCommit2.c | 6 ------ source/dnode/vnode/src/tsdb/tsdbFSetRW.c | 8 ++++---- source/dnode/vnode/src/tsdb/tsdbFSetRW.h | 2 -- source/dnode/vnode/src/vnd/vnodeCommit.c | 2 -- 5 files changed, 4 insertions(+), 20 deletions(-) diff --git a/source/dnode/vnode/src/inc/vnodeInt.h b/source/dnode/vnode/src/inc/vnodeInt.h index 7a242b55cf..cc355c5f32 100644 --- a/source/dnode/vnode/src/inc/vnodeInt.h +++ b/source/dnode/vnode/src/inc/vnodeInt.h @@ -527,16 +527,10 @@ struct SSnapDataHdr { uint8_t data[]; }; -typedef struct SRange { - int64_t start; - int64_t end; -} SRange; - struct SCommitInfo { SVnodeInfo info; SVnode* pVnode; TXN* txn; - SRange vers; }; struct SCompactInfo { diff --git a/source/dnode/vnode/src/tsdb/tsdbCommit2.c b/source/dnode/vnode/src/tsdb/tsdbCommit2.c index 4e096a7f17..302991fb0f 100644 --- a/source/dnode/vnode/src/tsdb/tsdbCommit2.c +++ b/source/dnode/vnode/src/tsdb/tsdbCommit2.c @@ -76,8 +76,6 @@ static int32_t tsdbCommitOpenWriter(SCommitter2 *committer) { .szPage = committer->szPage, .cmprAlg = committer->cmprAlg, .fid = committer->ctx->fid, - .minVer = committer->ctx->minVer, - .maxVer = committer->ctx->maxVer, .cid = committer->ctx->cid, .did = committer->ctx->did, .level = 0, @@ -91,8 +89,6 @@ static int32_t tsdbCommitOpenWriter(SCommitter2 *committer) { if (committer->ctx->fset->farr[ftype] != NULL) { config.files[ftype].exist = true; config.files[ftype].file = committer->ctx->fset->farr[ftype]->f[0]; - config.files[ftype].file.minVer = TMIN(config.files[ftype].file.minVer, config.minVer); - config.files[ftype].file.maxVer = TMAX(config.files[ftype].file.maxVer, config.maxVer); } } } @@ -469,8 +465,6 @@ static int32_t tsdbOpenCommitter(STsdb *tsdb, SCommitInfo *info, SCommitter2 *co committer->compactVersion = INT64_MAX; committer->ctx->cid = tsdbFSAllocEid(tsdb->pFS); committer->ctx->now = taosGetTimestampSec(); - committer->ctx->minVer = info->vers.start; - committer->ctx->maxVer = info->vers.end; committer->ctx->nextKey = tsdb->imem->minKey; if (tsdb->imem->nDel > 0) { diff --git a/source/dnode/vnode/src/tsdb/tsdbFSetRW.c b/source/dnode/vnode/src/tsdb/tsdbFSetRW.c index 801ae59838..4397d1cb5c 100644 --- a/source/dnode/vnode/src/tsdb/tsdbFSetRW.c +++ b/source/dnode/vnode/src/tsdb/tsdbFSetRW.c @@ -143,8 +143,8 @@ int32_t tsdbFSetWriterOpen(SFSetWriterConfig *config, SFSetWriter **writer) { .maxRow = config->maxRow, .szPage = config->szPage, .fid = config->fid, - .minVer = config->minVer, - .maxVer = config->maxVer, + .minVer = VERSION_MAX, + .maxVer = VERSION_MIN, .cid = config->cid, .did = config->did, .compactVersion = config->compactVersion, @@ -170,8 +170,8 @@ int32_t tsdbFSetWriterOpen(SFSetWriterConfig *config, SFSetWriter **writer) { .compactVersion = config->compactVersion, .did = config->did, .fid = config->fid, - .minVer = config->minVer, - .maxVer = config->maxVer, + .minVer = VERSION_MAX, + .maxVer = VERSION_MIN, .cid = config->cid, .level = config->level, .skmTb = writer[0]->skmTb, diff --git a/source/dnode/vnode/src/tsdb/tsdbFSetRW.h b/source/dnode/vnode/src/tsdb/tsdbFSetRW.h index a733bb3c44..0a8049cded 100644 --- a/source/dnode/vnode/src/tsdb/tsdbFSetRW.h +++ b/source/dnode/vnode/src/tsdb/tsdbFSetRW.h @@ -34,8 +34,6 @@ typedef struct { int32_t szPage; int8_t cmprAlg; int32_t fid; - int64_t minVer; - int64_t maxVer; int64_t cid; SDiskID did; int32_t level; diff --git a/source/dnode/vnode/src/vnd/vnodeCommit.c b/source/dnode/vnode/src/vnd/vnodeCommit.c index 775b298268..50ca2f5d03 100644 --- a/source/dnode/vnode/src/vnd/vnodeCommit.c +++ b/source/dnode/vnode/src/vnd/vnodeCommit.c @@ -297,8 +297,6 @@ static int32_t vnodePrepareCommit(SVnode *pVnode, SCommitInfo *pInfo) { pInfo->info.state.committed = pVnode->state.applied; pInfo->info.state.commitTerm = pVnode->state.applyTerm; pInfo->info.state.commitID = ++pVnode->state.commitID; - pInfo->vers.start = lastCommitted + 1; - pInfo->vers.end = pInfo->info.state.committed; pInfo->pVnode = pVnode; pInfo->txn = metaGetTxn(pVnode->pMeta); From 3cd458f5c98a53e1616b4bba78210f6bfeab5425 Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Thu, 31 Aug 2023 11:13:03 +0800 Subject: [PATCH 08/61] enh: fill minVer and maxVer of nf in STFileObj during migration --- source/dnode/vnode/src/tsdb/tsdbRetention.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/source/dnode/vnode/src/tsdb/tsdbRetention.c b/source/dnode/vnode/src/tsdb/tsdbRetention.c index c3b1a18fd8..f2665dcf26 100644 --- a/source/dnode/vnode/src/tsdb/tsdbRetention.c +++ b/source/dnode/vnode/src/tsdb/tsdbRetention.c @@ -151,6 +151,8 @@ static int32_t tsdbDoMigrateFileObj(SRTNer *rtner, const STFileObj *fobj, const .type = fobj->f->type, .did = did[0], .fid = fobj->f->fid, + .minVer = fobj->f->minVer, + .maxVer = fobj->f->maxVer, .cid = fobj->f->cid, .size = fobj->f->size, .stt[0] = @@ -198,6 +200,8 @@ static int32_t tsdbMigrateDataFileS3(SRTNer *rtner, const STFileObj *fobj, const .type = fobj->f->type, .did = did[0], .fid = fobj->f->fid, + .minVer = fobj->f->minVer, + .maxVer = fobj->f->maxVer, .cid = fobj->f->cid, .size = fobj->f->size, .stt[0] = From 823aad4a5ec831854acf930017bc647a703bc819 Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Thu, 31 Aug 2023 14:06:09 +0800 Subject: [PATCH 09/61] enh: record version range of STFile for data and stt files --- source/dnode/vnode/src/tsdb/tsdbDataFileRW.c | 62 ++++++++++++++++---- source/dnode/vnode/src/tsdb/tsdbSttFileRW.c | 32 ++++++++-- source/dnode/vnode/src/tsdb/tsdbUpgrade.c | 16 +++-- 3 files changed, 85 insertions(+), 25 deletions(-) diff --git a/source/dnode/vnode/src/tsdb/tsdbDataFileRW.c b/source/dnode/vnode/src/tsdb/tsdbDataFileRW.c index 3265bb7cc7..64c9e9d517 100644 --- a/source/dnode/vnode/src/tsdb/tsdbDataFileRW.c +++ b/source/dnode/vnode/src/tsdb/tsdbDataFileRW.c @@ -16,7 +16,7 @@ #include "tsdbDataFileRW.h" extern int32_t tsdbFileWriteTombBlock(STsdbFD *fd, STombBlock *tombBlock, int8_t cmprAlg, int64_t *fileSize, - TTombBlkArray *tombBlkArray, uint8_t **bufArr); + TTombBlkArray *tombBlkArray, uint8_t **bufArr, SVersionRange *range); extern int32_t tsdbFileWriteTombBlk(STsdbFD *fd, const TTombBlkArray *tombBlkArray, SFDataPtr *ptr, int64_t *fileSize); // SDataFileReader ============================================= @@ -589,8 +589,8 @@ static int32_t tsdbDataFileWriterDoOpen(SDataFileWriter *writer) { .fid = writer->config->fid, .cid = writer->config->cid, .size = 0, - .minVer = writer->config->minVer, - .maxVer = writer->config->maxVer, + .minVer = VERSION_MAX, + .maxVer = VERSION_MIN, }; // .data @@ -604,8 +604,8 @@ static int32_t tsdbDataFileWriterDoOpen(SDataFileWriter *writer) { .fid = writer->config->fid, .cid = writer->config->cid, .size = 0, - .minVer = writer->config->minVer, - .maxVer = writer->config->maxVer, + .minVer = VERSION_MAX, + .maxVer = VERSION_MIN, }; } @@ -620,8 +620,8 @@ static int32_t tsdbDataFileWriterDoOpen(SDataFileWriter *writer) { .fid = writer->config->fid, .cid = writer->config->cid, .size = 0, - .minVer = writer->config->minVer, - .maxVer = writer->config->maxVer, + .minVer = VERSION_MAX, + .maxVer = VERSION_MIN, }; } @@ -633,8 +633,8 @@ static int32_t tsdbDataFileWriterDoOpen(SDataFileWriter *writer) { .fid = writer->config->fid, .cid = writer->config->cid, .size = 0, - .minVer = writer->config->minVer, - .maxVer = writer->config->maxVer, + .minVer = VERSION_MAX, + .maxVer = VERSION_MIN, }; writer->ctx->opened = true; @@ -646,8 +646,14 @@ _exit: return code; } +static int32_t tsdbSDataUpdVerRange(SDataFileWriterConfig *config, SVersionRange *range) { + config->minVer = TMIN(config->minVer, range->minVer); + config->maxVer = TMAX(config->maxVer, range->maxVer); + return 0; +} + int32_t tsdbFileWriteBrinBlock(STsdbFD *fd, SBrinBlock *brinBlock, int8_t cmprAlg, int64_t *fileSize, - TBrinBlkArray *brinBlkArray, uint8_t **bufArr) { + TBrinBlkArray *brinBlkArray, uint8_t **bufArr, SVersionRange *range) { if (BRIN_BLOCK_SIZE(brinBlock) == 0) return 0; int32_t code; @@ -686,6 +692,9 @@ int32_t tsdbFileWriteBrinBlock(STsdbFD *fd, SBrinBlock *brinBlock, int8_t cmprAl } } + range->minVer = brinBlk->minVer; + range->maxVer = brinBlk->maxVer; + // write to file for (int32_t i = 0; i < ARRAY_SIZE(brinBlock->dataArr1); i++) { code = tsdbCmprData((uint8_t *)TARRAY2_DATA(brinBlock->dataArr1 + i), TARRAY2_DATA_LEN(brinBlock->dataArr1 + i), @@ -734,11 +743,15 @@ static int32_t tsdbDataFileWriteBrinBlock(SDataFileWriter *writer) { int32_t code = 0; int32_t lino = 0; + SVersionRange range = {.minVer = VERSION_MAX, .maxVer = VERSION_MIN}; code = tsdbFileWriteBrinBlock(writer->fd[TSDB_FTYPE_HEAD], writer->brinBlock, writer->config->cmprAlg, - &writer->files[TSDB_FTYPE_HEAD].size, writer->brinBlkArray, writer->config->bufArr); + &writer->files[TSDB_FTYPE_HEAD].size, writer->brinBlkArray, writer->config->bufArr, + &range); TSDB_CHECK_CODE(code, lino, _exit); + tsdbSDataUpdVerRange(writer->config, &range); + _exit: if (code) { TSDB_ERROR_LOG(TD_VID(writer->config->tsdb->pVnode), lino, code); @@ -803,6 +816,9 @@ static int32_t tsdbDataFileDoWriteBlockData(SDataFileWriter *writer, SBlockData } } + SVersionRange range = {.minVer = record->minVer, .maxVer = record->maxVer}; + tsdbSDataUpdVerRange(writer->config, &range); + // to .data file int32_t sizeArr[5] = {0}; @@ -1170,10 +1186,12 @@ static int32_t tsdbDataFileDoWriteTombBlock(SDataFileWriter *writer) { int32_t code = 0; int32_t lino = 0; - + + SVersionRange range = {.minVer = VERSION_MAX, .maxVer = VERSION_MIN}; code = tsdbFileWriteTombBlock(writer->fd[TSDB_FTYPE_TOMB], writer->tombBlock, writer->config->cmprAlg, - &writer->files[TSDB_FTYPE_TOMB].size, writer->tombBlkArray, writer->config->bufArr); + &writer->files[TSDB_FTYPE_TOMB].size, writer->tombBlkArray, writer->config->bufArr, &range); TSDB_CHECK_CODE(code, lino, _exit); + tsdbSDataUpdVerRange(writer->config, &range); _exit: if (code) { @@ -1358,6 +1376,8 @@ static int32_t tsdbDataFileWriterCloseCommit(SDataFileWriter *writer, TFileOpArr .fid = writer->config->fid, .nf = writer->files[ftype], }; + op.nf.minVer = TMIN(op.nf.minVer, writer->config->minVer); + op.nf.maxVer = TMAX(op.nf.maxVer, writer->config->maxVer); code = TARRAY2_APPEND(opArr, op); TSDB_CHECK_CODE(code, lino, _exit); @@ -1369,6 +1389,8 @@ static int32_t tsdbDataFileWriterCloseCommit(SDataFileWriter *writer, TFileOpArr .fid = writer->config->fid, .nf = writer->files[ftype], }; + op.nf.minVer = TMIN(op.nf.minVer, writer->config->minVer); + op.nf.maxVer = TMAX(op.nf.maxVer, writer->config->maxVer); code = TARRAY2_APPEND(opArr, op); TSDB_CHECK_CODE(code, lino, _exit); } else if (writer->config->files[ftype].file.size != writer->files[ftype].size) { @@ -1378,6 +1400,8 @@ static int32_t tsdbDataFileWriterCloseCommit(SDataFileWriter *writer, TFileOpArr .of = writer->config->files[ftype].file, .nf = writer->files[ftype], }; + op.nf.minVer = TMIN(op.nf.minVer, writer->config->minVer); + op.nf.maxVer = TMAX(op.nf.maxVer, writer->config->maxVer); code = TARRAY2_APPEND(opArr, op); TSDB_CHECK_CODE(code, lino, _exit); } @@ -1390,6 +1414,8 @@ static int32_t tsdbDataFileWriterCloseCommit(SDataFileWriter *writer, TFileOpArr .fid = writer->config->fid, .nf = writer->files[ftype], }; + op.nf.minVer = TMIN(op.nf.minVer, writer->config->minVer); + op.nf.maxVer = TMAX(op.nf.maxVer, writer->config->maxVer); code = TARRAY2_APPEND(opArr, op); TSDB_CHECK_CODE(code, lino, _exit); } else if (writer->config->files[ftype].file.size != writer->files[ftype].size) { @@ -1399,6 +1425,8 @@ static int32_t tsdbDataFileWriterCloseCommit(SDataFileWriter *writer, TFileOpArr .of = writer->config->files[ftype].file, .nf = writer->files[ftype], }; + op.nf.minVer = TMIN(op.nf.minVer, writer->config->minVer); + op.nf.maxVer = TMAX(op.nf.maxVer, writer->config->maxVer); code = TARRAY2_APPEND(opArr, op); TSDB_CHECK_CODE(code, lino, _exit); } @@ -1438,6 +1466,8 @@ static int32_t tsdbDataFileWriterCloseCommit(SDataFileWriter *writer, TFileOpArr .fid = writer->config->fid, .nf = writer->files[ftype], }; + op.nf.minVer = TMIN(op.nf.minVer, writer->config->minVer); + op.nf.maxVer = TMAX(op.nf.maxVer, writer->config->maxVer); code = TARRAY2_APPEND(opArr, op); TSDB_CHECK_CODE(code, lino, _exit); } @@ -1447,9 +1477,14 @@ static int32_t tsdbDataFileWriterCloseCommit(SDataFileWriter *writer, TFileOpArr code = tsdbFsyncFile(writer->fd[i]); TSDB_CHECK_CODE(code, lino, _exit); tsdbCloseFile(&writer->fd[i]); + writer->files[i].minVer = TMIN(writer->files[i].minVer, writer->config->minVer); + writer->files[i].maxVer = TMAX(writer->files[i].maxVer, writer->config->maxVer); } } + writer->config->minVer = VERSION_MAX; + writer->config->maxVer = VERSION_MIN; + _exit: if (code) { TSDB_ERROR_LOG(TD_VID(writer->config->tsdb->pVnode), lino, code); @@ -1606,6 +1641,7 @@ int32_t tsdbDataFileWriteBlockData(SDataFileWriter *writer, SBlockData *bData) { ) { code = tsdbDataFileDoWriteBlockData(writer, bData); TSDB_CHECK_CODE(code, lino, _exit); + } else { for (int32_t i = 0; i < bData->nRow; ++i) { TSDBROW row[1] = {tsdbRowFromBlockData(bData, i)}; diff --git a/source/dnode/vnode/src/tsdb/tsdbSttFileRW.c b/source/dnode/vnode/src/tsdb/tsdbSttFileRW.c index 4f1eb49959..eaa5e40726 100644 --- a/source/dnode/vnode/src/tsdb/tsdbSttFileRW.c +++ b/source/dnode/vnode/src/tsdb/tsdbSttFileRW.c @@ -401,8 +401,14 @@ struct SSttFileWriter { uint8_t *bufArr[5]; }; -int32_t tsdbFileDoWriteBlockData(STsdbFD *fd, SBlockData *blockData, int8_t cmprAlg, int64_t *fileSize, - TSttBlkArray *sttBlkArray, uint8_t **bufArr) { +static int32_t tsdbSSttUpdVerRange(SSttFileWriterConfig *config, SVersionRange *range) { + config->minVer = TMIN(config->minVer, range->minVer); + config->maxVer = TMAX(config->maxVer, range->maxVer); + return 0; +} + +static int32_t tsdbFileDoWriteSttBlockData(STsdbFD *fd, SBlockData *blockData, int8_t cmprAlg, int64_t *fileSize, + TSttBlkArray *sttBlkArray, uint8_t **bufArr, SVersionRange *range) { if (blockData->nRow == 0) return 0; int32_t code = 0; @@ -425,6 +431,9 @@ int32_t tsdbFileDoWriteBlockData(STsdbFD *fd, SBlockData *blockData, int8_t cmpr if (sttBlk->maxVer < blockData->aVersion[iRow]) sttBlk->maxVer = blockData->aVersion[iRow]; } + range->minVer = sttBlk->minVer; + range->maxVer = sttBlk->maxVer; + int32_t sizeArr[5] = {0}; code = tCmprBlockData(blockData, cmprAlg, NULL, NULL, bufArr, sizeArr); if (code) return code; @@ -455,9 +464,11 @@ static int32_t tsdbSttFileDoWriteBlockData(SSttFileWriter *writer) { int32_t code = 0; int32_t lino = 0; - code = tsdbFileDoWriteBlockData(writer->fd, writer->blockData, writer->config->cmprAlg, &writer->file->size, - writer->sttBlkArray, writer->config->bufArr); + SVersionRange range = {.minVer = VERSION_MAX, .maxVer = VERSION_MIN}; + code = tsdbFileDoWriteSttBlockData(writer->fd, writer->blockData, writer->config->cmprAlg, &writer->file->size, + writer->sttBlkArray, writer->config->bufArr, &range); TSDB_CHECK_CODE(code, lino, _exit); + tsdbSSttUpdVerRange(writer->config, &range); _exit: if (code) { @@ -518,7 +529,7 @@ _exit: } int32_t tsdbFileWriteTombBlock(STsdbFD *fd, STombBlock *tombBlock, int8_t cmprAlg, int64_t *fileSize, - TTombBlkArray *tombBlkArray, uint8_t **bufArr) { + TTombBlkArray *tombBlkArray, uint8_t **bufArr, SVersionRange *range) { int32_t code; if (TOMB_BLOCK_SIZE(tombBlock) == 0) return 0; @@ -554,6 +565,9 @@ int32_t tsdbFileWriteTombBlock(STsdbFD *fd, STombBlock *tombBlock, int8_t cmprAl } } + range->minVer = tombBlk->minVer; + range->maxVer = tombBlk->maxVer; + for (int32_t i = 0; i < ARRAY_SIZE(tombBlock->dataArr); i++) { code = tsdbCmprData((uint8_t *)TARRAY2_DATA(&tombBlock->dataArr[i]), TARRAY2_DATA_LEN(&tombBlock->dataArr[i]), TSDB_DATA_TYPE_BIGINT, tombBlk->cmprAlg, &bufArr[0], 0, &tombBlk->size[i], &bufArr[1]); @@ -579,9 +593,11 @@ static int32_t tsdbSttFileDoWriteTombBlock(SSttFileWriter *writer) { int32_t code = 0; int32_t lino = 0; + SVersionRange range = {.minVer = VERSION_MAX, .maxVer = VERSION_MIN}; code = tsdbFileWriteTombBlock(writer->fd, writer->tombBlock, writer->config->cmprAlg, &writer->file->size, - writer->tombBlkArray, writer->config->bufArr); + writer->tombBlkArray, writer->config->bufArr, &range); TSDB_CHECK_CODE(code, lino, _exit); + tsdbSSttUpdVerRange(writer->config, &range); _exit: if (code) { @@ -784,6 +800,10 @@ static int32_t tsdbSttFWriterCloseCommit(SSttFileWriter *writer, TFileOpArray *o .fid = writer->config->fid, .nf = writer->file[0], }; + op.nf.minVer = TMIN(op.nf.minVer, writer->config->minVer); + op.nf.maxVer = TMAX(op.nf.maxVer, writer->config->maxVer); + writer->config->minVer = VERSION_MAX; + writer->config->maxVer = VERSION_MIN; code = TARRAY2_APPEND(opArray, op); TSDB_CHECK_CODE(code, lino, _exit); diff --git a/source/dnode/vnode/src/tsdb/tsdbUpgrade.c b/source/dnode/vnode/src/tsdb/tsdbUpgrade.c index 225822ed97..16f649cd9d 100644 --- a/source/dnode/vnode/src/tsdb/tsdbUpgrade.c +++ b/source/dnode/vnode/src/tsdb/tsdbUpgrade.c @@ -23,7 +23,7 @@ extern int32_t tsdbReadDataBlockEx(SDataFReader *pReader, SDataBlk *pDataBlk, SB extern int32_t save_fs(const TFileSetArray *arr, const char *fname); extern int32_t current_fname(STsdb *pTsdb, char *fname, EFCurrentT ftype); extern int32_t tsdbFileWriteBrinBlock(STsdbFD *fd, SBrinBlock *brinBlock, int8_t cmprAlg, int64_t *fileSize, - TBrinBlkArray *brinBlkArray, uint8_t **bufArr); + TBrinBlkArray *brinBlkArray, uint8_t **bufArr, SVersionRange *range); extern int32_t tsdbFileWriteBrinBlk(STsdbFD *fd, TBrinBlkArray *brinBlkArray, SFDataPtr *ptr, int64_t *fileSize); extern int32_t tsdbFileWriteHeadFooter(STsdbFD *fd, int64_t *fileSize, const SHeadFooter *footer); extern int32_t tsdbSttLvlInit(int32_t level, SSttLvl **lvl); @@ -31,7 +31,7 @@ extern int32_t tsdbSttLvlClear(SSttLvl **lvl); extern int32_t tsdbFileWriteSttBlk(STsdbFD *fd, const TSttBlkArray *sttBlkArray, SFDataPtr *ptr, int64_t *fileSize); extern int32_t tsdbFileWriteSttFooter(STsdbFD *fd, const SSttFooter *footer, int64_t *fileSize); extern int32_t tsdbFileWriteTombBlock(STsdbFD *fd, STombBlock *tombBlock, int8_t cmprAlg, int64_t *fileSize, - TTombBlkArray *tombBlkArray, uint8_t **bufArr); + TTombBlkArray *tombBlkArray, uint8_t **bufArr, SVersionRange *range); extern int32_t tsdbFileWriteTombBlk(STsdbFD *fd, const TTombBlkArray *tombBlkArray, SFDataPtr *ptr, int64_t *fileSize); extern int32_t tsdbFileWriteTombFooter(STsdbFD *fd, const STombFooter *footer, int64_t *fileSize); @@ -129,16 +129,18 @@ static int32_t tsdbUpgradeHead(STsdb *tsdb, SDFileSet *pDFileSet, SDataFReader * TSDB_CHECK_CODE(code, lino, _exit); if (BRIN_BLOCK_SIZE(ctx->brinBlock) >= ctx->maxRow) { + SVersionRange range = {.minVer = VERSION_MAX, .maxVer = VERSION_MIN}; code = tsdbFileWriteBrinBlock(ctx->fd, ctx->brinBlock, ctx->cmprAlg, &fset->farr[TSDB_FTYPE_HEAD]->f->size, - ctx->brinBlkArray, ctx->bufArr); + ctx->brinBlkArray, ctx->bufArr, &range); TSDB_CHECK_CODE(code, lino, _exit); } } } if (BRIN_BLOCK_SIZE(ctx->brinBlock) > 0) { + SVersionRange range = {.minVer = VERSION_MAX, .maxVer = VERSION_MIN}; code = tsdbFileWriteBrinBlock(ctx->fd, ctx->brinBlock, ctx->cmprAlg, &fset->farr[TSDB_FTYPE_HEAD]->f->size, - ctx->brinBlkArray, ctx->bufArr); + ctx->brinBlkArray, ctx->bufArr, &range); TSDB_CHECK_CODE(code, lino, _exit); } @@ -493,8 +495,9 @@ static int32_t tsdbDumpTombDataToFSet(STsdb *tsdb, SDelFReader *reader, SArray * code = tsdbUpgradeOpenTombFile(tsdb, fset, &ctx->fd, &ctx->fobj, &ctx->toStt); TSDB_CHECK_CODE(code, lino, _exit); } + SVersionRange range = {.minVer = VERSION_MAX, .maxVer = VERSION_MIN}; code = tsdbFileWriteTombBlock(ctx->fd, ctx->tombBlock, ctx->cmprAlg, &ctx->fobj->f->size, ctx->tombBlkArray, - ctx->bufArr); + ctx->bufArr, &range); TSDB_CHECK_CODE(code, lino, _exit); } } @@ -505,8 +508,9 @@ static int32_t tsdbDumpTombDataToFSet(STsdb *tsdb, SDelFReader *reader, SArray * code = tsdbUpgradeOpenTombFile(tsdb, fset, &ctx->fd, &ctx->fobj, &ctx->toStt); TSDB_CHECK_CODE(code, lino, _exit); } + SVersionRange range = {.minVer = VERSION_MAX, .maxVer = VERSION_MIN}; code = tsdbFileWriteTombBlock(ctx->fd, ctx->tombBlock, ctx->cmprAlg, &ctx->fobj->f->size, ctx->tombBlkArray, - ctx->bufArr); + ctx->bufArr, &range); TSDB_CHECK_CODE(code, lino, _exit); } From ed6d7c4c4d52f15262ec96cea820c01ed8c8fc48 Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Thu, 31 Aug 2023 14:59:07 +0800 Subject: [PATCH 10/61] refact: relocate declarations and defs of tsdbFileWriteTombBlock and tsdbFileWriteTombBlk --- source/dnode/vnode/src/tsdb/tsdbDataFileRW.c | 78 +++++++++++++++++++- source/dnode/vnode/src/tsdb/tsdbDataFileRW.h | 4 + source/dnode/vnode/src/tsdb/tsdbSttFileRW.c | 75 +------------------ source/dnode/vnode/src/tsdb/tsdbUpgrade.c | 5 +- 4 files changed, 81 insertions(+), 81 deletions(-) diff --git a/source/dnode/vnode/src/tsdb/tsdbDataFileRW.c b/source/dnode/vnode/src/tsdb/tsdbDataFileRW.c index 64c9e9d517..2798eb0291 100644 --- a/source/dnode/vnode/src/tsdb/tsdbDataFileRW.c +++ b/source/dnode/vnode/src/tsdb/tsdbDataFileRW.c @@ -15,10 +15,6 @@ #include "tsdbDataFileRW.h" -extern int32_t tsdbFileWriteTombBlock(STsdbFD *fd, STombBlock *tombBlock, int8_t cmprAlg, int64_t *fileSize, - TTombBlkArray *tombBlkArray, uint8_t **bufArr, SVersionRange *range); -extern int32_t tsdbFileWriteTombBlk(STsdbFD *fd, const TTombBlkArray *tombBlkArray, SFDataPtr *ptr, int64_t *fileSize); - // SDataFileReader ============================================= struct SDataFileReader { SDataFileReaderConfig config[1]; @@ -1167,6 +1163,65 @@ int32_t tsdbFileWriteHeadFooter(STsdbFD *fd, int64_t *fileSize, const SHeadFoote return 0; } +int32_t tsdbFileWriteTombBlock(STsdbFD *fd, STombBlock *tombBlock, int8_t cmprAlg, int64_t *fileSize, + TTombBlkArray *tombBlkArray, uint8_t **bufArr, SVersionRange *range) { + int32_t code; + + if (TOMB_BLOCK_SIZE(tombBlock) == 0) return 0; + + STombBlk tombBlk[1] = {{ + .dp[0] = + { + .offset = *fileSize, + .size = 0, + }, + .minTbid = + { + .suid = TARRAY2_FIRST(tombBlock->suid), + .uid = TARRAY2_FIRST(tombBlock->uid), + }, + .maxTbid = + { + .suid = TARRAY2_LAST(tombBlock->suid), + .uid = TARRAY2_LAST(tombBlock->uid), + }, + .minVer = TARRAY2_FIRST(tombBlock->version), + .maxVer = TARRAY2_FIRST(tombBlock->version), + .numRec = TOMB_BLOCK_SIZE(tombBlock), + .cmprAlg = cmprAlg, + }}; + + for (int32_t i = 1; i < TOMB_BLOCK_SIZE(tombBlock); i++) { + if (tombBlk->minVer > TARRAY2_GET(tombBlock->version, i)) { + tombBlk->minVer = TARRAY2_GET(tombBlock->version, i); + } + if (tombBlk->maxVer < TARRAY2_GET(tombBlock->version, i)) { + tombBlk->maxVer = TARRAY2_GET(tombBlock->version, i); + } + } + + range->minVer = tombBlk->minVer; + range->maxVer = tombBlk->maxVer; + + for (int32_t i = 0; i < ARRAY_SIZE(tombBlock->dataArr); i++) { + code = tsdbCmprData((uint8_t *)TARRAY2_DATA(&tombBlock->dataArr[i]), TARRAY2_DATA_LEN(&tombBlock->dataArr[i]), + TSDB_DATA_TYPE_BIGINT, tombBlk->cmprAlg, &bufArr[0], 0, &tombBlk->size[i], &bufArr[1]); + if (code) return code; + + code = tsdbWriteFile(fd, *fileSize, bufArr[0], tombBlk->size[i]); + if (code) return code; + + tombBlk->dp->size += tombBlk->size[i]; + *fileSize += tombBlk->size[i]; + } + + code = TARRAY2_APPEND_PTR(tombBlkArray, tombBlk); + if (code) return code; + + tTombBlockClear(tombBlock); + return 0; +} + static int32_t tsdbDataFileWriteHeadFooter(SDataFileWriter *writer) { int32_t code = 0; int32_t lino = 0; @@ -1200,6 +1255,21 @@ _exit: return code; } +int32_t tsdbFileWriteTombBlk(STsdbFD *fd, const TTombBlkArray *tombBlkArray, SFDataPtr *ptr, int64_t *fileSize) { + ptr->size = TARRAY2_DATA_LEN(tombBlkArray); + if (ptr->size > 0) { + ptr->offset = *fileSize; + + int32_t code = tsdbWriteFile(fd, *fileSize, (const uint8_t *)TARRAY2_DATA(tombBlkArray), ptr->size); + if (code) { + return code; + } + + *fileSize += ptr->size; + } + return 0; +} + static int32_t tsdbDataFileDoWriteTombBlk(SDataFileWriter *writer) { ASSERT(TARRAY2_SIZE(writer->tombBlkArray) > 0); diff --git a/source/dnode/vnode/src/tsdb/tsdbDataFileRW.h b/source/dnode/vnode/src/tsdb/tsdbDataFileRW.h index e87c00d382..b084ed13ae 100644 --- a/source/dnode/vnode/src/tsdb/tsdbDataFileRW.h +++ b/source/dnode/vnode/src/tsdb/tsdbDataFileRW.h @@ -97,7 +97,11 @@ int32_t tsdbDataFileWriteRow(SDataFileWriter *writer, SRowInfo *row); int32_t tsdbDataFileWriteBlockData(SDataFileWriter *writer, SBlockData *bData); int32_t tsdbDataFileFlush(SDataFileWriter *writer); +// tomb int32_t tsdbDataFileWriteTombRecord(SDataFileWriter *writer, const STombRecord *record); +int32_t tsdbFileWriteTombBlock(STsdbFD *fd, STombBlock *tombBlock, int8_t cmprAlg, int64_t *fileSize, + TTombBlkArray *tombBlkArray, uint8_t **bufArr, SVersionRange *range); +int32_t tsdbFileWriteTombBlk(STsdbFD *fd, const TTombBlkArray *tombBlkArray, SFDataPtr *ptr, int64_t *fileSize); #ifdef __cplusplus } diff --git a/source/dnode/vnode/src/tsdb/tsdbSttFileRW.c b/source/dnode/vnode/src/tsdb/tsdbSttFileRW.c index eaa5e40726..10fa1e2b0b 100644 --- a/source/dnode/vnode/src/tsdb/tsdbSttFileRW.c +++ b/source/dnode/vnode/src/tsdb/tsdbSttFileRW.c @@ -14,6 +14,7 @@ */ #include "tsdbSttFileRW.h" +#include "tsdbDataFileRW.h" // SSttFReader ============================================================ struct SSttFileReader { @@ -528,65 +529,6 @@ _exit: return code; } -int32_t tsdbFileWriteTombBlock(STsdbFD *fd, STombBlock *tombBlock, int8_t cmprAlg, int64_t *fileSize, - TTombBlkArray *tombBlkArray, uint8_t **bufArr, SVersionRange *range) { - int32_t code; - - if (TOMB_BLOCK_SIZE(tombBlock) == 0) return 0; - - STombBlk tombBlk[1] = {{ - .dp[0] = - { - .offset = *fileSize, - .size = 0, - }, - .minTbid = - { - .suid = TARRAY2_FIRST(tombBlock->suid), - .uid = TARRAY2_FIRST(tombBlock->uid), - }, - .maxTbid = - { - .suid = TARRAY2_LAST(tombBlock->suid), - .uid = TARRAY2_LAST(tombBlock->uid), - }, - .minVer = TARRAY2_FIRST(tombBlock->version), - .maxVer = TARRAY2_FIRST(tombBlock->version), - .numRec = TOMB_BLOCK_SIZE(tombBlock), - .cmprAlg = cmprAlg, - }}; - - for (int32_t i = 1; i < TOMB_BLOCK_SIZE(tombBlock); i++) { - if (tombBlk->minVer > TARRAY2_GET(tombBlock->version, i)) { - tombBlk->minVer = TARRAY2_GET(tombBlock->version, i); - } - if (tombBlk->maxVer < TARRAY2_GET(tombBlock->version, i)) { - tombBlk->maxVer = TARRAY2_GET(tombBlock->version, i); - } - } - - range->minVer = tombBlk->minVer; - range->maxVer = tombBlk->maxVer; - - for (int32_t i = 0; i < ARRAY_SIZE(tombBlock->dataArr); i++) { - code = tsdbCmprData((uint8_t *)TARRAY2_DATA(&tombBlock->dataArr[i]), TARRAY2_DATA_LEN(&tombBlock->dataArr[i]), - TSDB_DATA_TYPE_BIGINT, tombBlk->cmprAlg, &bufArr[0], 0, &tombBlk->size[i], &bufArr[1]); - if (code) return code; - - code = tsdbWriteFile(fd, *fileSize, bufArr[0], tombBlk->size[i]); - if (code) return code; - - tombBlk->dp->size += tombBlk->size[i]; - *fileSize += tombBlk->size[i]; - } - - code = TARRAY2_APPEND_PTR(tombBlkArray, tombBlk); - if (code) return code; - - tTombBlockClear(tombBlock); - return 0; -} - static int32_t tsdbSttFileDoWriteTombBlock(SSttFileWriter *writer) { if (TOMB_BLOCK_SIZE(writer->tombBlock) == 0) return 0; @@ -655,21 +597,6 @@ _exit: return code; } -int32_t tsdbFileWriteTombBlk(STsdbFD *fd, const TTombBlkArray *tombBlkArray, SFDataPtr *ptr, int64_t *fileSize) { - ptr->size = TARRAY2_DATA_LEN(tombBlkArray); - if (ptr->size > 0) { - ptr->offset = *fileSize; - - int32_t code = tsdbWriteFile(fd, *fileSize, (const uint8_t *)TARRAY2_DATA(tombBlkArray), ptr->size); - if (code) { - return code; - } - - *fileSize += ptr->size; - } - return 0; -} - static int32_t tsdbSttFileDoWriteTombBlk(SSttFileWriter *writer) { int32_t code = 0; int32_t lino = 0; diff --git a/source/dnode/vnode/src/tsdb/tsdbUpgrade.c b/source/dnode/vnode/src/tsdb/tsdbUpgrade.c index 16f649cd9d..2a5b7f1080 100644 --- a/source/dnode/vnode/src/tsdb/tsdbUpgrade.c +++ b/source/dnode/vnode/src/tsdb/tsdbUpgrade.c @@ -20,6 +20,8 @@ extern void tsdbGetCurrentFName(STsdb *pTsdb, char *current, char *current_t) extern int32_t tsdbReadDataBlockEx(SDataFReader *pReader, SDataBlk *pDataBlk, SBlockData *pBlockData); // new +#include "tsdbDataFileRW.h" +#include "tsdbSttFileRW.h" extern int32_t save_fs(const TFileSetArray *arr, const char *fname); extern int32_t current_fname(STsdb *pTsdb, char *fname, EFCurrentT ftype); extern int32_t tsdbFileWriteBrinBlock(STsdbFD *fd, SBrinBlock *brinBlock, int8_t cmprAlg, int64_t *fileSize, @@ -30,9 +32,6 @@ extern int32_t tsdbSttLvlInit(int32_t level, SSttLvl **lvl); extern int32_t tsdbSttLvlClear(SSttLvl **lvl); extern int32_t tsdbFileWriteSttBlk(STsdbFD *fd, const TSttBlkArray *sttBlkArray, SFDataPtr *ptr, int64_t *fileSize); extern int32_t tsdbFileWriteSttFooter(STsdbFD *fd, const SSttFooter *footer, int64_t *fileSize); -extern int32_t tsdbFileWriteTombBlock(STsdbFD *fd, STombBlock *tombBlock, int8_t cmprAlg, int64_t *fileSize, - TTombBlkArray *tombBlkArray, uint8_t **bufArr, SVersionRange *range); -extern int32_t tsdbFileWriteTombBlk(STsdbFD *fd, const TTombBlkArray *tombBlkArray, SFDataPtr *ptr, int64_t *fileSize); extern int32_t tsdbFileWriteTombFooter(STsdbFD *fd, const STombFooter *footer, int64_t *fileSize); static int32_t tsdbUpgradeHead(STsdb *tsdb, SDFileSet *pDFileSet, SDataFReader *reader, STFileSet *fset) { From 399afe1094d5ed1605270c36b8e6d81b70a43e75 Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Thu, 31 Aug 2023 15:45:19 +0800 Subject: [PATCH 11/61] refact: tidy up declarations of funcs in tsdbUpgrade.c --- source/dnode/vnode/src/inc/tsdb.h | 1 + source/dnode/vnode/src/tsdb/tsdbDataFileRW.h | 7 +++++++ source/dnode/vnode/src/tsdb/tsdbFS2.h | 3 +++ source/dnode/vnode/src/tsdb/tsdbFSet2.h | 3 +++ source/dnode/vnode/src/tsdb/tsdbSttFileRW.h | 3 +++ source/dnode/vnode/src/tsdb/tsdbUpgrade.c | 18 +++++------------- 6 files changed, 22 insertions(+), 13 deletions(-) diff --git a/source/dnode/vnode/src/inc/tsdb.h b/source/dnode/vnode/src/inc/tsdb.h index 83c14a50d0..4996c6c484 100644 --- a/source/dnode/vnode/src/inc/tsdb.h +++ b/source/dnode/vnode/src/inc/tsdb.h @@ -263,6 +263,7 @@ int32_t tsdbFSRollback(STsdb *pTsdb); int32_t tsdbFSPrepareCommit(STsdb *pTsdb, STsdbFS *pFS); int32_t tsdbFSRef(STsdb *pTsdb, STsdbFS *pFS); void tsdbFSUnref(STsdb *pTsdb, STsdbFS *pFS); +void tsdbGetCurrentFName(STsdb *pTsdb, char *current, char *current_t); int32_t tsdbFSUpsertFSet(STsdbFS *pFS, SDFileSet *pSet); int32_t tsdbFSUpsertDelFile(STsdbFS *pFS, SDelFile *pDelFile); diff --git a/source/dnode/vnode/src/tsdb/tsdbDataFileRW.h b/source/dnode/vnode/src/tsdb/tsdbDataFileRW.h index b084ed13ae..a91852575d 100644 --- a/source/dnode/vnode/src/tsdb/tsdbDataFileRW.h +++ b/source/dnode/vnode/src/tsdb/tsdbDataFileRW.h @@ -97,11 +97,18 @@ int32_t tsdbDataFileWriteRow(SDataFileWriter *writer, SRowInfo *row); int32_t tsdbDataFileWriteBlockData(SDataFileWriter *writer, SBlockData *bData); int32_t tsdbDataFileFlush(SDataFileWriter *writer); +// head +int32_t tsdbFileWriteBrinBlock(STsdbFD *fd, SBrinBlock *brinBlock, int8_t cmprAlg, int64_t *fileSize, + TBrinBlkArray *brinBlkArray, uint8_t **bufArr, SVersionRange *range); +int32_t tsdbFileWriteBrinBlk(STsdbFD *fd, TBrinBlkArray *brinBlkArray, SFDataPtr *ptr, int64_t *fileSize); +int32_t tsdbFileWriteHeadFooter(STsdbFD *fd, int64_t *fileSize, const SHeadFooter *footer); + // tomb int32_t tsdbDataFileWriteTombRecord(SDataFileWriter *writer, const STombRecord *record); int32_t tsdbFileWriteTombBlock(STsdbFD *fd, STombBlock *tombBlock, int8_t cmprAlg, int64_t *fileSize, TTombBlkArray *tombBlkArray, uint8_t **bufArr, SVersionRange *range); int32_t tsdbFileWriteTombBlk(STsdbFD *fd, const TTombBlkArray *tombBlkArray, SFDataPtr *ptr, int64_t *fileSize); +int32_t tsdbFileWriteTombFooter(STsdbFD *fd, const STombFooter *footer, int64_t *fileSize); #ifdef __cplusplus } diff --git a/source/dnode/vnode/src/tsdb/tsdbFS2.h b/source/dnode/vnode/src/tsdb/tsdbFS2.h index 8dfc86ee83..f8b87b8a84 100644 --- a/source/dnode/vnode/src/tsdb/tsdbFS2.h +++ b/source/dnode/vnode/src/tsdb/tsdbFS2.h @@ -72,6 +72,9 @@ int32_t tsdbFSEnableBgTask(STFileSystem *fs); // other int32_t tsdbFSGetFSet(STFileSystem *fs, int32_t fid, STFileSet **fset); int32_t tsdbFSCheckCommit(STFileSystem *fs); +// utils +int32_t save_fs(const TFileSetArray *arr, const char *fname); +int32_t current_fname(STsdb *pTsdb, char *fname, EFCurrentT ftype); struct STFSBgTask { EFSBgTaskT type; diff --git a/source/dnode/vnode/src/tsdb/tsdbFSet2.h b/source/dnode/vnode/src/tsdb/tsdbFSet2.h index 8155328e70..756250157b 100644 --- a/source/dnode/vnode/src/tsdb/tsdbFSet2.h +++ b/source/dnode/vnode/src/tsdb/tsdbFSet2.h @@ -63,6 +63,9 @@ int64_t tsdbTFileSetMaxCid(const STFileSet *fset); SSttLvl *tsdbTFileSetGetSttLvl(STFileSet *fset, int32_t level); // is empty bool tsdbTFileSetIsEmpty(const STFileSet *fset); +// stt +int32_t tsdbSttLvlInit(int32_t level, SSttLvl **lvl); +int32_t tsdbSttLvlClear(SSttLvl **lvl); struct STFileOp { tsdb_fop_t optype; diff --git a/source/dnode/vnode/src/tsdb/tsdbSttFileRW.h b/source/dnode/vnode/src/tsdb/tsdbSttFileRW.h index d0481d5ec3..5b225da4e3 100644 --- a/source/dnode/vnode/src/tsdb/tsdbSttFileRW.h +++ b/source/dnode/vnode/src/tsdb/tsdbSttFileRW.h @@ -71,6 +71,9 @@ int32_t tsdbSttFileWriteBlockData(SSttFileWriter *writer, SBlockData *pBlockData int32_t tsdbSttFileWriteTombRecord(SSttFileWriter *writer, const STombRecord *record); bool tsdbSttFileWriterIsOpened(SSttFileWriter *writer); +int32_t tsdbFileWriteSttBlk(STsdbFD *fd, const TSttBlkArray *sttBlkArray, SFDataPtr *ptr, int64_t *fileSize); +int32_t tsdbFileWriteSttFooter(STsdbFD *fd, const SSttFooter *footer, int64_t *fileSize); + struct SSttFileWriterConfig { STsdb *tsdb; int32_t maxRow; diff --git a/source/dnode/vnode/src/tsdb/tsdbUpgrade.c b/source/dnode/vnode/src/tsdb/tsdbUpgrade.c index 2a5b7f1080..24dfbf7450 100644 --- a/source/dnode/vnode/src/tsdb/tsdbUpgrade.c +++ b/source/dnode/vnode/src/tsdb/tsdbUpgrade.c @@ -16,23 +16,15 @@ #include "tsdbUpgrade.h" // old -extern void tsdbGetCurrentFName(STsdb *pTsdb, char *current, char *current_t); -extern int32_t tsdbReadDataBlockEx(SDataFReader *pReader, SDataBlk *pDataBlk, SBlockData *pBlockData); +#include "tsdb.h" +// extern void tsdbGetCurrentFName(STsdb *pTsdb, char *current, char *current_t); // new #include "tsdbDataFileRW.h" +#include "tsdbFS2.h" #include "tsdbSttFileRW.h" -extern int32_t save_fs(const TFileSetArray *arr, const char *fname); -extern int32_t current_fname(STsdb *pTsdb, char *fname, EFCurrentT ftype); -extern int32_t tsdbFileWriteBrinBlock(STsdbFD *fd, SBrinBlock *brinBlock, int8_t cmprAlg, int64_t *fileSize, - TBrinBlkArray *brinBlkArray, uint8_t **bufArr, SVersionRange *range); -extern int32_t tsdbFileWriteBrinBlk(STsdbFD *fd, TBrinBlkArray *brinBlkArray, SFDataPtr *ptr, int64_t *fileSize); -extern int32_t tsdbFileWriteHeadFooter(STsdbFD *fd, int64_t *fileSize, const SHeadFooter *footer); -extern int32_t tsdbSttLvlInit(int32_t level, SSttLvl **lvl); -extern int32_t tsdbSttLvlClear(SSttLvl **lvl); -extern int32_t tsdbFileWriteSttBlk(STsdbFD *fd, const TSttBlkArray *sttBlkArray, SFDataPtr *ptr, int64_t *fileSize); -extern int32_t tsdbFileWriteSttFooter(STsdbFD *fd, const SSttFooter *footer, int64_t *fileSize); -extern int32_t tsdbFileWriteTombFooter(STsdbFD *fd, const STombFooter *footer, int64_t *fileSize); +// extern int32_t save_fs(const TFileSetArray *arr, const char *fname); +// extern int32_t current_fname(STsdb *pTsdb, char *fname, EFCurrentT ftype); static int32_t tsdbUpgradeHead(STsdb *tsdb, SDFileSet *pDFileSet, SDataFReader *reader, STFileSet *fset) { int32_t code = 0; From f207b3ddd529cfc5c7bc2eb8b6402eb58775bd6b Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Thu, 31 Aug 2023 15:53:51 +0800 Subject: [PATCH 12/61] refact: rename func names of tsdbDataWriterUpdVerRange and tsdbSttWriterUpdVerRange --- source/dnode/vnode/src/tsdb/tsdbDataFileRW.c | 8 ++++---- source/dnode/vnode/src/tsdb/tsdbSttFileRW.c | 6 +++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/source/dnode/vnode/src/tsdb/tsdbDataFileRW.c b/source/dnode/vnode/src/tsdb/tsdbDataFileRW.c index 2798eb0291..6029ddcc29 100644 --- a/source/dnode/vnode/src/tsdb/tsdbDataFileRW.c +++ b/source/dnode/vnode/src/tsdb/tsdbDataFileRW.c @@ -642,7 +642,7 @@ _exit: return code; } -static int32_t tsdbSDataUpdVerRange(SDataFileWriterConfig *config, SVersionRange *range) { +static int32_t tsdbDataWriterUpdVerRange(SDataFileWriterConfig *config, SVersionRange *range) { config->minVer = TMIN(config->minVer, range->minVer); config->maxVer = TMAX(config->maxVer, range->maxVer); return 0; @@ -746,7 +746,7 @@ static int32_t tsdbDataFileWriteBrinBlock(SDataFileWriter *writer) { &range); TSDB_CHECK_CODE(code, lino, _exit); - tsdbSDataUpdVerRange(writer->config, &range); + tsdbDataWriterUpdVerRange(writer->config, &range); _exit: if (code) { @@ -813,7 +813,7 @@ static int32_t tsdbDataFileDoWriteBlockData(SDataFileWriter *writer, SBlockData } SVersionRange range = {.minVer = record->minVer, .maxVer = record->maxVer}; - tsdbSDataUpdVerRange(writer->config, &range); + tsdbDataWriterUpdVerRange(writer->config, &range); // to .data file int32_t sizeArr[5] = {0}; @@ -1246,7 +1246,7 @@ static int32_t tsdbDataFileDoWriteTombBlock(SDataFileWriter *writer) { code = tsdbFileWriteTombBlock(writer->fd[TSDB_FTYPE_TOMB], writer->tombBlock, writer->config->cmprAlg, &writer->files[TSDB_FTYPE_TOMB].size, writer->tombBlkArray, writer->config->bufArr, &range); TSDB_CHECK_CODE(code, lino, _exit); - tsdbSDataUpdVerRange(writer->config, &range); + tsdbDataWriterUpdVerRange(writer->config, &range); _exit: if (code) { diff --git a/source/dnode/vnode/src/tsdb/tsdbSttFileRW.c b/source/dnode/vnode/src/tsdb/tsdbSttFileRW.c index 10fa1e2b0b..1b1438168d 100644 --- a/source/dnode/vnode/src/tsdb/tsdbSttFileRW.c +++ b/source/dnode/vnode/src/tsdb/tsdbSttFileRW.c @@ -402,7 +402,7 @@ struct SSttFileWriter { uint8_t *bufArr[5]; }; -static int32_t tsdbSSttUpdVerRange(SSttFileWriterConfig *config, SVersionRange *range) { +static int32_t tsdbSttWriterUpdVerRange(SSttFileWriterConfig *config, SVersionRange *range) { config->minVer = TMIN(config->minVer, range->minVer); config->maxVer = TMAX(config->maxVer, range->maxVer); return 0; @@ -469,7 +469,7 @@ static int32_t tsdbSttFileDoWriteBlockData(SSttFileWriter *writer) { code = tsdbFileDoWriteSttBlockData(writer->fd, writer->blockData, writer->config->cmprAlg, &writer->file->size, writer->sttBlkArray, writer->config->bufArr, &range); TSDB_CHECK_CODE(code, lino, _exit); - tsdbSSttUpdVerRange(writer->config, &range); + tsdbSttWriterUpdVerRange(writer->config, &range); _exit: if (code) { @@ -539,7 +539,7 @@ static int32_t tsdbSttFileDoWriteTombBlock(SSttFileWriter *writer) { code = tsdbFileWriteTombBlock(writer->fd, writer->tombBlock, writer->config->cmprAlg, &writer->file->size, writer->tombBlkArray, writer->config->bufArr, &range); TSDB_CHECK_CODE(code, lino, _exit); - tsdbSSttUpdVerRange(writer->config, &range); + tsdbSttWriterUpdVerRange(writer->config, &range); _exit: if (code) { From 01d0c1247d2de0500a4366e712ac7e9ad9564756 Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Thu, 31 Aug 2023 19:13:58 +0800 Subject: [PATCH 13/61] enh: record ver range in writer ctx --- source/dnode/vnode/src/tsdb/tsdbDataFileRW.c | 61 +++++++++----------- source/dnode/vnode/src/tsdb/tsdbDataFileRW.h | 5 +- source/dnode/vnode/src/tsdb/tsdbFSetRW.c | 4 -- source/dnode/vnode/src/tsdb/tsdbSttFileRW.c | 32 ++++------ source/dnode/vnode/src/tsdb/tsdbSttFileRW.h | 2 - 5 files changed, 42 insertions(+), 62 deletions(-) diff --git a/source/dnode/vnode/src/tsdb/tsdbDataFileRW.c b/source/dnode/vnode/src/tsdb/tsdbDataFileRW.c index 6029ddcc29..08771efb04 100644 --- a/source/dnode/vnode/src/tsdb/tsdbDataFileRW.c +++ b/source/dnode/vnode/src/tsdb/tsdbDataFileRW.c @@ -487,6 +487,8 @@ struct SDataFileWriter { int32_t tombBlkArrayIdx; STombBlock tombBlock[1]; int32_t tombBlockIdx; + // range + SVersionRange range; } ctx[1]; STFile files[TSDB_FTYPE_MAX]; @@ -633,6 +635,9 @@ static int32_t tsdbDataFileWriterDoOpen(SDataFileWriter *writer) { .maxVer = VERSION_MIN, }; + // range + writer->ctx->range = (SVersionRange){.minVer = VERSION_MAX, .maxVer = VERSION_MIN}; + writer->ctx->opened = true; _exit: @@ -642,9 +647,9 @@ _exit: return code; } -static int32_t tsdbDataWriterUpdVerRange(SDataFileWriterConfig *config, SVersionRange *range) { - config->minVer = TMIN(config->minVer, range->minVer); - config->maxVer = TMAX(config->maxVer, range->maxVer); +int32_t tsdbWriterUpdVerRange(SVersionRange *range, int64_t minVer, int64_t maxVer) { + range->minVer = TMIN(range->minVer, minVer); + range->maxVer = TMAX(range->maxVer, maxVer); return 0; } @@ -688,8 +693,7 @@ int32_t tsdbFileWriteBrinBlock(STsdbFD *fd, SBrinBlock *brinBlock, int8_t cmprAl } } - range->minVer = brinBlk->minVer; - range->maxVer = brinBlk->maxVer; + tsdbWriterUpdVerRange(range, brinBlk->minVer, brinBlk->maxVer); // write to file for (int32_t i = 0; i < ARRAY_SIZE(brinBlock->dataArr1); i++) { @@ -739,15 +743,12 @@ static int32_t tsdbDataFileWriteBrinBlock(SDataFileWriter *writer) { int32_t code = 0; int32_t lino = 0; - SVersionRange range = {.minVer = VERSION_MAX, .maxVer = VERSION_MIN}; code = tsdbFileWriteBrinBlock(writer->fd[TSDB_FTYPE_HEAD], writer->brinBlock, writer->config->cmprAlg, &writer->files[TSDB_FTYPE_HEAD].size, writer->brinBlkArray, writer->config->bufArr, - &range); + &writer->ctx->range); TSDB_CHECK_CODE(code, lino, _exit); - tsdbDataWriterUpdVerRange(writer->config, &range); - _exit: if (code) { TSDB_ERROR_LOG(TD_VID(writer->config->tsdb->pVnode), lino, code); @@ -812,8 +813,7 @@ static int32_t tsdbDataFileDoWriteBlockData(SDataFileWriter *writer, SBlockData } } - SVersionRange range = {.minVer = record->minVer, .maxVer = record->maxVer}; - tsdbDataWriterUpdVerRange(writer->config, &range); + tsdbWriterUpdVerRange(&writer->ctx->range, record->minVer, record->maxVer); // to .data file int32_t sizeArr[5] = {0}; @@ -1200,8 +1200,7 @@ int32_t tsdbFileWriteTombBlock(STsdbFD *fd, STombBlock *tombBlock, int8_t cmprAl } } - range->minVer = tombBlk->minVer; - range->maxVer = tombBlk->maxVer; + tsdbWriterUpdVerRange(range, tombBlk->minVer, tombBlk->maxVer); for (int32_t i = 0; i < ARRAY_SIZE(tombBlock->dataArr); i++) { code = tsdbCmprData((uint8_t *)TARRAY2_DATA(&tombBlock->dataArr[i]), TARRAY2_DATA_LEN(&tombBlock->dataArr[i]), @@ -1241,12 +1240,11 @@ static int32_t tsdbDataFileDoWriteTombBlock(SDataFileWriter *writer) { int32_t code = 0; int32_t lino = 0; - - SVersionRange range = {.minVer = VERSION_MAX, .maxVer = VERSION_MIN}; + code = tsdbFileWriteTombBlock(writer->fd[TSDB_FTYPE_TOMB], writer->tombBlock, writer->config->cmprAlg, - &writer->files[TSDB_FTYPE_TOMB].size, writer->tombBlkArray, writer->config->bufArr, &range); + &writer->files[TSDB_FTYPE_TOMB].size, writer->tombBlkArray, writer->config->bufArr, + &writer->ctx->range); TSDB_CHECK_CODE(code, lino, _exit); - tsdbDataWriterUpdVerRange(writer->config, &range); _exit: if (code) { @@ -1402,6 +1400,12 @@ _exit: return code; } +int32_t tsdbTFileUpdVerRange(STFile *f, SVersionRange range) { + f->minVer = TMIN(f->minVer, range.minVer); + f->maxVer = TMAX(f->maxVer, range.maxVer); + return 0; +} + static int32_t tsdbDataFileWriterCloseCommit(SDataFileWriter *writer, TFileOpArray *opArr) { int32_t code = 0; int32_t lino = 0; @@ -1446,8 +1450,7 @@ static int32_t tsdbDataFileWriterCloseCommit(SDataFileWriter *writer, TFileOpArr .fid = writer->config->fid, .nf = writer->files[ftype], }; - op.nf.minVer = TMIN(op.nf.minVer, writer->config->minVer); - op.nf.maxVer = TMAX(op.nf.maxVer, writer->config->maxVer); + tsdbTFileUpdVerRange(&op.nf, writer->ctx->range); code = TARRAY2_APPEND(opArr, op); TSDB_CHECK_CODE(code, lino, _exit); @@ -1459,8 +1462,7 @@ static int32_t tsdbDataFileWriterCloseCommit(SDataFileWriter *writer, TFileOpArr .fid = writer->config->fid, .nf = writer->files[ftype], }; - op.nf.minVer = TMIN(op.nf.minVer, writer->config->minVer); - op.nf.maxVer = TMAX(op.nf.maxVer, writer->config->maxVer); + tsdbTFileUpdVerRange(&op.nf, writer->ctx->range); code = TARRAY2_APPEND(opArr, op); TSDB_CHECK_CODE(code, lino, _exit); } else if (writer->config->files[ftype].file.size != writer->files[ftype].size) { @@ -1470,8 +1472,7 @@ static int32_t tsdbDataFileWriterCloseCommit(SDataFileWriter *writer, TFileOpArr .of = writer->config->files[ftype].file, .nf = writer->files[ftype], }; - op.nf.minVer = TMIN(op.nf.minVer, writer->config->minVer); - op.nf.maxVer = TMAX(op.nf.maxVer, writer->config->maxVer); + tsdbTFileUpdVerRange(&op.nf, writer->ctx->range); code = TARRAY2_APPEND(opArr, op); TSDB_CHECK_CODE(code, lino, _exit); } @@ -1484,8 +1485,7 @@ static int32_t tsdbDataFileWriterCloseCommit(SDataFileWriter *writer, TFileOpArr .fid = writer->config->fid, .nf = writer->files[ftype], }; - op.nf.minVer = TMIN(op.nf.minVer, writer->config->minVer); - op.nf.maxVer = TMAX(op.nf.maxVer, writer->config->maxVer); + tsdbTFileUpdVerRange(&op.nf, writer->ctx->range); code = TARRAY2_APPEND(opArr, op); TSDB_CHECK_CODE(code, lino, _exit); } else if (writer->config->files[ftype].file.size != writer->files[ftype].size) { @@ -1495,8 +1495,7 @@ static int32_t tsdbDataFileWriterCloseCommit(SDataFileWriter *writer, TFileOpArr .of = writer->config->files[ftype].file, .nf = writer->files[ftype], }; - op.nf.minVer = TMIN(op.nf.minVer, writer->config->minVer); - op.nf.maxVer = TMAX(op.nf.maxVer, writer->config->maxVer); + tsdbTFileUpdVerRange(&op.nf, writer->ctx->range); code = TARRAY2_APPEND(opArr, op); TSDB_CHECK_CODE(code, lino, _exit); } @@ -1536,8 +1535,7 @@ static int32_t tsdbDataFileWriterCloseCommit(SDataFileWriter *writer, TFileOpArr .fid = writer->config->fid, .nf = writer->files[ftype], }; - op.nf.minVer = TMIN(op.nf.minVer, writer->config->minVer); - op.nf.maxVer = TMAX(op.nf.maxVer, writer->config->maxVer); + tsdbTFileUpdVerRange(&op.nf, writer->ctx->range); code = TARRAY2_APPEND(opArr, op); TSDB_CHECK_CODE(code, lino, _exit); } @@ -1547,14 +1545,9 @@ static int32_t tsdbDataFileWriterCloseCommit(SDataFileWriter *writer, TFileOpArr code = tsdbFsyncFile(writer->fd[i]); TSDB_CHECK_CODE(code, lino, _exit); tsdbCloseFile(&writer->fd[i]); - writer->files[i].minVer = TMIN(writer->files[i].minVer, writer->config->minVer); - writer->files[i].maxVer = TMAX(writer->files[i].maxVer, writer->config->maxVer); } } - writer->config->minVer = VERSION_MAX; - writer->config->maxVer = VERSION_MIN; - _exit: if (code) { TSDB_ERROR_LOG(TD_VID(writer->config->tsdb->pVnode), lino, code); diff --git a/source/dnode/vnode/src/tsdb/tsdbDataFileRW.h b/source/dnode/vnode/src/tsdb/tsdbDataFileRW.h index a91852575d..ca55a5420a 100644 --- a/source/dnode/vnode/src/tsdb/tsdbDataFileRW.h +++ b/source/dnode/vnode/src/tsdb/tsdbDataFileRW.h @@ -76,8 +76,6 @@ typedef struct SDataFileWriterConfig { int32_t maxRow; int32_t szPage; int32_t fid; - int64_t minVer; - int64_t maxVer; int64_t cid; SDiskID did; int64_t compactVersion; @@ -110,6 +108,9 @@ int32_t tsdbFileWriteTombBlock(STsdbFD *fd, STombBlock *tombBlock, int8_t cmprAl int32_t tsdbFileWriteTombBlk(STsdbFD *fd, const TTombBlkArray *tombBlkArray, SFDataPtr *ptr, int64_t *fileSize); int32_t tsdbFileWriteTombFooter(STsdbFD *fd, const STombFooter *footer, int64_t *fileSize); +// utils +int32_t tsdbWriterUpdVerRange(SVersionRange *range, int64_t minVer, int64_t maxVer); + #ifdef __cplusplus } #endif diff --git a/source/dnode/vnode/src/tsdb/tsdbFSetRW.c b/source/dnode/vnode/src/tsdb/tsdbFSetRW.c index 4397d1cb5c..e6b3cf8f54 100644 --- a/source/dnode/vnode/src/tsdb/tsdbFSetRW.c +++ b/source/dnode/vnode/src/tsdb/tsdbFSetRW.c @@ -143,8 +143,6 @@ int32_t tsdbFSetWriterOpen(SFSetWriterConfig *config, SFSetWriter **writer) { .maxRow = config->maxRow, .szPage = config->szPage, .fid = config->fid, - .minVer = VERSION_MAX, - .maxVer = VERSION_MIN, .cid = config->cid, .did = config->did, .compactVersion = config->compactVersion, @@ -170,8 +168,6 @@ int32_t tsdbFSetWriterOpen(SFSetWriterConfig *config, SFSetWriter **writer) { .compactVersion = config->compactVersion, .did = config->did, .fid = config->fid, - .minVer = VERSION_MAX, - .maxVer = VERSION_MIN, .cid = config->cid, .level = config->level, .skmTb = writer[0]->skmTb, diff --git a/source/dnode/vnode/src/tsdb/tsdbSttFileRW.c b/source/dnode/vnode/src/tsdb/tsdbSttFileRW.c index 1b1438168d..c1bf8bb027 100644 --- a/source/dnode/vnode/src/tsdb/tsdbSttFileRW.c +++ b/source/dnode/vnode/src/tsdb/tsdbSttFileRW.c @@ -384,6 +384,8 @@ struct SSttFileWriter { struct { bool opened; TABLEID tbid[1]; + // range + SVersionRange range; } ctx[1]; // file STsdbFD *fd; @@ -402,12 +404,6 @@ struct SSttFileWriter { uint8_t *bufArr[5]; }; -static int32_t tsdbSttWriterUpdVerRange(SSttFileWriterConfig *config, SVersionRange *range) { - config->minVer = TMIN(config->minVer, range->minVer); - config->maxVer = TMAX(config->maxVer, range->maxVer); - return 0; -} - static int32_t tsdbFileDoWriteSttBlockData(STsdbFD *fd, SBlockData *blockData, int8_t cmprAlg, int64_t *fileSize, TSttBlkArray *sttBlkArray, uint8_t **bufArr, SVersionRange *range) { if (blockData->nRow == 0) return 0; @@ -432,8 +428,7 @@ static int32_t tsdbFileDoWriteSttBlockData(STsdbFD *fd, SBlockData *blockData, i if (sttBlk->maxVer < blockData->aVersion[iRow]) sttBlk->maxVer = blockData->aVersion[iRow]; } - range->minVer = sttBlk->minVer; - range->maxVer = sttBlk->maxVer; + tsdbWriterUpdVerRange(range, sttBlk->minVer, sttBlk->maxVer); int32_t sizeArr[5] = {0}; code = tCmprBlockData(blockData, cmprAlg, NULL, NULL, bufArr, sizeArr); @@ -465,11 +460,9 @@ static int32_t tsdbSttFileDoWriteBlockData(SSttFileWriter *writer) { int32_t code = 0; int32_t lino = 0; - SVersionRange range = {.minVer = VERSION_MAX, .maxVer = VERSION_MIN}; code = tsdbFileDoWriteSttBlockData(writer->fd, writer->blockData, writer->config->cmprAlg, &writer->file->size, - writer->sttBlkArray, writer->config->bufArr, &range); + writer->sttBlkArray, writer->config->bufArr, &writer->ctx->range); TSDB_CHECK_CODE(code, lino, _exit); - tsdbSttWriterUpdVerRange(writer->config, &range); _exit: if (code) { @@ -535,11 +528,9 @@ static int32_t tsdbSttFileDoWriteTombBlock(SSttFileWriter *writer) { int32_t code = 0; int32_t lino = 0; - SVersionRange range = {.minVer = VERSION_MAX, .maxVer = VERSION_MIN}; code = tsdbFileWriteTombBlock(writer->fd, writer->tombBlock, writer->config->cmprAlg, &writer->file->size, - writer->tombBlkArray, writer->config->bufArr, &range); + writer->tombBlkArray, writer->config->bufArr, &writer->ctx->range); TSDB_CHECK_CODE(code, lino, _exit); - tsdbSttWriterUpdVerRange(writer->config, &range); _exit: if (code) { @@ -637,8 +628,8 @@ static int32_t tsdbSttFWriterDoOpen(SSttFileWriter *writer) { .fid = writer->config->fid, .cid = writer->config->cid, .size = 0, - .minVer = writer->config->minVer, - .maxVer = writer->config->maxVer, + .minVer = VERSION_MAX, + .maxVer = VERSION_MIN, .stt[0] = { .level = writer->config->level, @@ -658,6 +649,9 @@ static int32_t tsdbSttFWriterDoOpen(SSttFileWriter *writer) { TSDB_CHECK_CODE(code, lino, _exit); writer->file->size += sizeof(hdr); + // range + writer->ctx->range = (SVersionRange){.minVer = VERSION_MAX, .maxVer = VERSION_MIN}; + writer->ctx->opened = true; _exit: @@ -727,10 +721,8 @@ static int32_t tsdbSttFWriterCloseCommit(SSttFileWriter *writer, TFileOpArray *o .fid = writer->config->fid, .nf = writer->file[0], }; - op.nf.minVer = TMIN(op.nf.minVer, writer->config->minVer); - op.nf.maxVer = TMAX(op.nf.maxVer, writer->config->maxVer); - writer->config->minVer = VERSION_MAX; - writer->config->maxVer = VERSION_MIN; + op.nf.minVer = TMIN(op.nf.minVer, writer->ctx->range.minVer); + op.nf.maxVer = TMAX(op.nf.maxVer, writer->ctx->range.maxVer); code = TARRAY2_APPEND(opArray, op); TSDB_CHECK_CODE(code, lino, _exit); diff --git a/source/dnode/vnode/src/tsdb/tsdbSttFileRW.h b/source/dnode/vnode/src/tsdb/tsdbSttFileRW.h index 5b225da4e3..0051a6cd92 100644 --- a/source/dnode/vnode/src/tsdb/tsdbSttFileRW.h +++ b/source/dnode/vnode/src/tsdb/tsdbSttFileRW.h @@ -82,8 +82,6 @@ struct SSttFileWriterConfig { int64_t compactVersion; SDiskID did; int32_t fid; - int64_t minVer; - int64_t maxVer; int64_t cid; int32_t level; SSkmInfo *skmTb; From f8c6e8744a5595c84028667e484e820d0485e68a Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Thu, 31 Aug 2023 19:39:47 +0800 Subject: [PATCH 14/61] refact: change field name of pExclude in SVSnapReader --- source/dnode/vnode/src/inc/vnodeInt.h | 2 +- source/dnode/vnode/src/tsdb/tsdbFS2.c | 2 +- source/dnode/vnode/src/tsdb/tsdbSnapshot.c | 5 +++-- source/dnode/vnode/src/vnd/vnodeSnapshot.c | 4 ++-- 4 files changed, 7 insertions(+), 6 deletions(-) diff --git a/source/dnode/vnode/src/inc/vnodeInt.h b/source/dnode/vnode/src/inc/vnodeInt.h index cc355c5f32..902cc782ab 100644 --- a/source/dnode/vnode/src/inc/vnodeInt.h +++ b/source/dnode/vnode/src/inc/vnodeInt.h @@ -295,7 +295,7 @@ int32_t metaSnapWriterOpen(SMeta* pMeta, int64_t sver, int64_t ever, SMetaSnapWr int32_t metaSnapWrite(SMetaSnapWriter* pWriter, uint8_t* pData, uint32_t nData); int32_t metaSnapWriterClose(SMetaSnapWriter** ppWriter, int8_t rollback); // STsdbSnapReader ======================================== -int32_t tsdbSnapReaderOpen(STsdb* pTsdb, int64_t sver, int64_t ever, int8_t type, void* pEx, +int32_t tsdbSnapReaderOpen(STsdb* pTsdb, int64_t sver, int64_t ever, int8_t type, void* pExclude, STsdbSnapReader** ppReader); int32_t tsdbSnapReaderClose(STsdbSnapReader** ppReader); int32_t tsdbSnapRead(STsdbSnapReader* pReader, uint8_t** ppData); diff --git a/source/dnode/vnode/src/tsdb/tsdbFS2.c b/source/dnode/vnode/src/tsdb/tsdbFS2.c index 0a8bb4c37a..0c9c47fb98 100644 --- a/source/dnode/vnode/src/tsdb/tsdbFS2.c +++ b/source/dnode/vnode/src/tsdb/tsdbFS2.c @@ -991,7 +991,7 @@ int32_t tsdbFSDestroyRefSnapshot(TFileSetArray **fsetArr) { return 0; } -int32_t tsdbFSCreateRefRangedSnapshot(STFileSystem *fs, int64_t sver, int64_t ever, TSnapRangeArray *pEx, +int32_t tsdbFSCreateRefRangedSnapshot(STFileSystem *fs, int64_t sver, int64_t ever, TSnapRangeArray *pExclude, TSnapRangeArray **fsrArr) { int32_t code = 0; STFileSet *fset; diff --git a/source/dnode/vnode/src/tsdb/tsdbSnapshot.c b/source/dnode/vnode/src/tsdb/tsdbSnapshot.c index b604256e89..2fdad2d662 100644 --- a/source/dnode/vnode/src/tsdb/tsdbSnapshot.c +++ b/source/dnode/vnode/src/tsdb/tsdbSnapshot.c @@ -412,7 +412,8 @@ _exit: return code; } -int32_t tsdbSnapReaderOpen(STsdb* tsdb, int64_t sver, int64_t ever, int8_t type, void* pEx, STsdbSnapReader** reader) { +int32_t tsdbSnapReaderOpen(STsdb* tsdb, int64_t sver, int64_t ever, int8_t type, void* pExclude, + STsdbSnapReader** reader) { int32_t code = 0; int32_t lino = 0; @@ -424,7 +425,7 @@ int32_t tsdbSnapReaderOpen(STsdb* tsdb, int64_t sver, int64_t ever, int8_t type, reader[0]->ever = ever; reader[0]->type = type; - code = tsdbFSCreateRefRangedSnapshot(tsdb->pFS, sver, ever, (TSnapRangeArray*)pEx, &reader[0]->fsrArr); + code = tsdbFSCreateRefRangedSnapshot(tsdb->pFS, sver, ever, (TSnapRangeArray*)pExclude, &reader[0]->fsrArr); TSDB_CHECK_CODE(code, lino, _exit); _exit: diff --git a/source/dnode/vnode/src/vnd/vnodeSnapshot.c b/source/dnode/vnode/src/vnd/vnodeSnapshot.c index b89fed73f8..6beb8e8dbf 100644 --- a/source/dnode/vnode/src/vnd/vnodeSnapshot.c +++ b/source/dnode/vnode/src/vnd/vnodeSnapshot.c @@ -29,7 +29,7 @@ struct SVSnapReader { SMetaSnapReader *pMetaReader; // tsdb int8_t tsdbDone; - TSnapRangeArray *pEx; + TSnapRangeArray *pExclude; STsdbSnapReader *pTsdbReader; // tq int8_t tqHandleDone; @@ -179,7 +179,7 @@ int32_t vnodeSnapRead(SVSnapReader *pReader, uint8_t **ppData, uint32_t *nData) if (!pReader->tsdbDone) { // open if not if (pReader->pTsdbReader == NULL) { - code = tsdbSnapReaderOpen(pReader->pVnode->pTsdb, pReader->sver, pReader->ever, SNAP_DATA_TSDB, pReader->pEx, + code = tsdbSnapReaderOpen(pReader->pVnode->pTsdb, pReader->sver, pReader->ever, SNAP_DATA_TSDB, pReader->pExclude, &pReader->pTsdbReader); if (code) goto _err; } From dc402e21e36c05427e6be245827d218ec3670ffd Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Fri, 1 Sep 2023 17:32:59 +0800 Subject: [PATCH 15/61] fix: merge old version range of head and tomb files in tsdbDataFileWriterCloseCommit --- source/dnode/vnode/src/tsdb/tsdbDataFileRW.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/source/dnode/vnode/src/tsdb/tsdbDataFileRW.c b/source/dnode/vnode/src/tsdb/tsdbDataFileRW.c index 08771efb04..e6f1d29b22 100644 --- a/source/dnode/vnode/src/tsdb/tsdbDataFileRW.c +++ b/source/dnode/vnode/src/tsdb/tsdbDataFileRW.c @@ -1434,6 +1434,8 @@ static int32_t tsdbDataFileWriterCloseCommit(SDataFileWriter *writer, TFileOpArr code = tsdbDataFileWriteHeadFooter(writer); TSDB_CHECK_CODE(code, lino, _exit); + SVersionRange ofRange = {.minVer = VERSION_MAX, .maxVer = VERSION_MIN}; + // .head ftype = TSDB_FTYPE_HEAD; if (writer->config->files[ftype].exist) { @@ -1442,6 +1444,7 @@ static int32_t tsdbDataFileWriterCloseCommit(SDataFileWriter *writer, TFileOpArr .fid = writer->config->fid, .of = writer->config->files[ftype].file, }; + ofRange = (SVersionRange){.minVer = op.of.minVer, .maxVer = op.of.maxVer}; code = TARRAY2_APPEND(opArr, op); TSDB_CHECK_CODE(code, lino, _exit); } @@ -1450,6 +1453,7 @@ static int32_t tsdbDataFileWriterCloseCommit(SDataFileWriter *writer, TFileOpArr .fid = writer->config->fid, .nf = writer->files[ftype], }; + tsdbTFileUpdVerRange(&op.nf, ofRange); tsdbTFileUpdVerRange(&op.nf, writer->ctx->range); code = TARRAY2_APPEND(opArr, op); TSDB_CHECK_CODE(code, lino, _exit); @@ -1520,6 +1524,8 @@ static int32_t tsdbDataFileWriterCloseCommit(SDataFileWriter *writer, TFileOpArr code = tsdbDataFileWriteTombFooter(writer); TSDB_CHECK_CODE(code, lino, _exit); + SVersionRange ofRange = (SVersionRange){.minVer = VERSION_MAX, .maxVer = VERSION_MIN}; + ftype = TSDB_FTYPE_TOMB; if (writer->config->files[ftype].exist) { op = (STFileOp){ @@ -1527,6 +1533,7 @@ static int32_t tsdbDataFileWriterCloseCommit(SDataFileWriter *writer, TFileOpArr .fid = writer->config->fid, .of = writer->config->files[ftype].file, }; + ofRange = (SVersionRange){.minVer = op.of.minVer, .maxVer = op.of.maxVer}; code = TARRAY2_APPEND(opArr, op); TSDB_CHECK_CODE(code, lino, _exit); } @@ -1535,6 +1542,7 @@ static int32_t tsdbDataFileWriterCloseCommit(SDataFileWriter *writer, TFileOpArr .fid = writer->config->fid, .nf = writer->files[ftype], }; + tsdbTFileUpdVerRange(&op.nf, ofRange); tsdbTFileUpdVerRange(&op.nf, writer->ctx->range); code = TARRAY2_APPEND(opArr, op); TSDB_CHECK_CODE(code, lino, _exit); From 6b753cc0cdda5547337b6c778b810fa58673823e Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Fri, 1 Sep 2023 17:39:55 +0800 Subject: [PATCH 16/61] refact: remove unused code is_same_file --- source/dnode/vnode/src/tsdb/tsdbFS2.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/source/dnode/vnode/src/tsdb/tsdbFS2.c b/source/dnode/vnode/src/tsdb/tsdbFS2.c index 0c9c47fb98..d6c81b1fe5 100644 --- a/source/dnode/vnode/src/tsdb/tsdbFS2.c +++ b/source/dnode/vnode/src/tsdb/tsdbFS2.c @@ -258,14 +258,6 @@ _exit: return code; } -static bool is_same_file(const STFile *f1, const STFile f2) { - if (f1->type != f2.type) return false; - if (f1->did.level != f2.did.level) return false; - if (f1->did.id != f2.did.id) return false; - if (f1->cid != f2.cid) return false; - return true; -} - static int32_t apply_commit(STFileSystem *fs) { int32_t code = 0; TFileSetArray *fsetArray1 = fs->fSetArr; From 54a10154a4459af056b3f63b0d4557d40f14c472 Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Fri, 1 Sep 2023 18:12:45 +0800 Subject: [PATCH 17/61] fixup: remove unused minVer and maxVer in ctx of SCommitter2 --- source/dnode/vnode/src/tsdb/tsdbCommit2.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/source/dnode/vnode/src/tsdb/tsdbCommit2.c b/source/dnode/vnode/src/tsdb/tsdbCommit2.c index 302991fb0f..79964c5636 100644 --- a/source/dnode/vnode/src/tsdb/tsdbCommit2.c +++ b/source/dnode/vnode/src/tsdb/tsdbCommit2.c @@ -43,8 +43,6 @@ typedef struct { SDiskID did; TSKEY minKey; TSKEY maxKey; - int64_t minVer; - int64_t maxVer; STFileSet *fset; TABLEID tbid[1]; bool hasTSData; From 0a67cc84778b1fd07a1345d85c43315f15ab14e2 Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Fri, 1 Sep 2023 19:06:48 +0800 Subject: [PATCH 18/61] refact: use tsdbTFileUpdVerRange in tsdbSttFWriterCloseCommit --- source/dnode/vnode/src/tsdb/tsdbDataFileRW.h | 1 + source/dnode/vnode/src/tsdb/tsdbSttFileRW.c | 3 +-- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/source/dnode/vnode/src/tsdb/tsdbDataFileRW.h b/source/dnode/vnode/src/tsdb/tsdbDataFileRW.h index ca55a5420a..c4aed6e787 100644 --- a/source/dnode/vnode/src/tsdb/tsdbDataFileRW.h +++ b/source/dnode/vnode/src/tsdb/tsdbDataFileRW.h @@ -110,6 +110,7 @@ int32_t tsdbFileWriteTombFooter(STsdbFD *fd, const STombFooter *footer, int64_t // utils int32_t tsdbWriterUpdVerRange(SVersionRange *range, int64_t minVer, int64_t maxVer); +int32_t tsdbTFileUpdVerRange(STFile *f, SVersionRange range); #ifdef __cplusplus } diff --git a/source/dnode/vnode/src/tsdb/tsdbSttFileRW.c b/source/dnode/vnode/src/tsdb/tsdbSttFileRW.c index c1bf8bb027..7c3b185e20 100644 --- a/source/dnode/vnode/src/tsdb/tsdbSttFileRW.c +++ b/source/dnode/vnode/src/tsdb/tsdbSttFileRW.c @@ -721,8 +721,7 @@ static int32_t tsdbSttFWriterCloseCommit(SSttFileWriter *writer, TFileOpArray *o .fid = writer->config->fid, .nf = writer->file[0], }; - op.nf.minVer = TMIN(op.nf.minVer, writer->ctx->range.minVer); - op.nf.maxVer = TMAX(op.nf.maxVer, writer->ctx->range.maxVer); + tsdbTFileUpdVerRange(&op.nf, writer->ctx->range); code = TARRAY2_APPEND(opArray, op); TSDB_CHECK_CODE(code, lino, _exit); From 6ae9bd0e9a09a1115396c882e648d45d1b5f2240 Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Tue, 5 Sep 2023 10:20:37 +0800 Subject: [PATCH 19/61] feat: add tsdbFSToSnapRangeArray --- source/dnode/vnode/src/tsdb/tsdbFS2.c | 75 +++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) diff --git a/source/dnode/vnode/src/tsdb/tsdbFS2.c b/source/dnode/vnode/src/tsdb/tsdbFS2.c index d6c81b1fe5..fed43a16c1 100644 --- a/source/dnode/vnode/src/tsdb/tsdbFS2.c +++ b/source/dnode/vnode/src/tsdb/tsdbFS2.c @@ -983,6 +983,81 @@ int32_t tsdbFSDestroyRefSnapshot(TFileSetArray **fsetArr) { return 0; } +int32_t tsdbTSnapRangeCmprFn(STSnapRange *fsr1, STSnapRange *fsr2) { + if (fsr1->fid < fsr2->fid) return -1; + if (fsr1->fid > fsr2->fid) return 1; + if (fsr1->sver < fsr2->sver) return -1; + if (fsr1->sver > fsr2->sver) return 1; + if (fsr1->ever < fsr2->ever) return -1; + if (fsr1->ever < fsr2->ever) return 1; + return 0; +} + +int32_t tsdbTFileInsertSnapRange(STFile *f, TSnapRangeArray *snapR) { + STSnapRange *fsr = taosMemoryCalloc(1, sizeof(*fsr)); + if (fsr == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + return -1; + } + fsr->fid = f->fid; + fsr->sver = f->minVer; + fsr->ever = f->maxVer; + + int32_t code = TARRAY2_SORT_INSERT(snapR, fsr, tsdbTSnapRangeCmprFn); + if (code) { + taosMemoryFree(fsr); + fsr = NULL; + } + return code; +} + +int32_t tsdbTFSetInsertSnapRange(STFileSet *fset, TSnapRangeArray *snapR) { + STFile tf = {.fid = fset->fid, .minVer = VERSION_MAX, .maxVer = VERSION_MIN}; + for (int32_t ftype = TSDB_FTYPE_MIN; ftype < TSDB_FTYPE_MAX; ++ftype) { + if (fset->farr[ftype] == NULL) continue; + STFile *f = fset->farr[ftype]->f; + tsdbTFileUpdVerRange(&tf, (SVersionRange){.minVer = f->minVer, .maxVer = f->maxVer}); + } + + int32_t code = tsdbTFileInsertSnapRange(&tf, snapR); + if (code) return code; + + const SSttLvl *lvl; + TARRAY2_FOREACH(fset->lvlArr, lvl) { + STFileObj *fobj; + TARRAY2_FOREACH(lvl->fobjArr, fobj) { + code = tsdbTFileInsertSnapRange(fobj->f, snapR); + if (code) return code; + } + } + return code; +} + +TSnapRangeArray *tsdbFSToSnapRangeArray(STFileSystem *fs) { + int32_t code = 0; + TSnapRangeArray *snapR = taosMemoryCalloc(1, sizeof(*snapR)); + if (snapR == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + return NULL; + } + TARRAY2_INIT(snapR); + + taosThreadRwlockRdlock(&fs->tsdb->rwLock); + STFileSet *fset; + TARRAY2_FOREACH(fs->fSetArr, fset) { + code = tsdbTFSetInsertSnapRange(fset, snapR); + if (code) break; + } + taosThreadRwlockUnlock(&fs->tsdb->rwLock); + + if (code) { + TARRAY2_DESTROY(snapR, tsdbTSnapRangeClear); + taosMemoryFree(snapR); + snapR = NULL; + } + return snapR; +} + int32_t tsdbFSCreateRefRangedSnapshot(STFileSystem *fs, int64_t sver, int64_t ever, TSnapRangeArray *pExclude, TSnapRangeArray **fsrArr) { int32_t code = 0; From 44a7f5df4799cf9802cfd2bb1421691229964627 Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Tue, 5 Sep 2023 15:14:43 +0800 Subject: [PATCH 20/61] feat: add tsdbSnapDiff --- source/dnode/vnode/src/tsdb/tsdbFS2.c | 254 ++++++++++++++++++++++++-- 1 file changed, 243 insertions(+), 11 deletions(-) diff --git a/source/dnode/vnode/src/tsdb/tsdbFS2.c b/source/dnode/vnode/src/tsdb/tsdbFS2.c index fed43a16c1..231cd95658 100644 --- a/source/dnode/vnode/src/tsdb/tsdbFS2.c +++ b/source/dnode/vnode/src/tsdb/tsdbFS2.c @@ -983,7 +983,7 @@ int32_t tsdbFSDestroyRefSnapshot(TFileSetArray **fsetArr) { return 0; } -int32_t tsdbTSnapRangeCmprFn(STSnapRange *fsr1, STSnapRange *fsr2) { +static int32_t tsdbTSnapRangeCmprFn(STSnapRange *fsr1, STSnapRange *fsr2) { if (fsr1->fid < fsr2->fid) return -1; if (fsr1->fid > fsr2->fid) return 1; if (fsr1->sver < fsr2->sver) return -1; @@ -993,7 +993,7 @@ int32_t tsdbTSnapRangeCmprFn(STSnapRange *fsr1, STSnapRange *fsr2) { return 0; } -int32_t tsdbTFileInsertSnapRange(STFile *f, TSnapRangeArray *snapR) { +static int32_t tsdbTFileInsertSnapRange(STFile *f, TSnapRangeArray *snapR) { STSnapRange *fsr = taosMemoryCalloc(1, sizeof(*fsr)); if (fsr == NULL) { terrno = TSDB_CODE_OUT_OF_MEMORY; @@ -1011,7 +1011,7 @@ int32_t tsdbTFileInsertSnapRange(STFile *f, TSnapRangeArray *snapR) { return code; } -int32_t tsdbTFSetInsertSnapRange(STFileSet *fset, TSnapRangeArray *snapR) { +static int32_t tsdbTFSetInsertSnapRange(STFileSet *fset, TSnapRangeArray *snapR) { STFile tf = {.fid = fset->fid, .minVer = VERSION_MAX, .maxVer = VERSION_MIN}; for (int32_t ftype = TSDB_FTYPE_MIN; ftype < TSDB_FTYPE_MAX; ++ftype) { if (fset->farr[ftype] == NULL) continue; @@ -1058,24 +1058,246 @@ TSnapRangeArray *tsdbFSToSnapRangeArray(STFileSystem *fs) { return snapR; } +static STSnapRange *taosDupSnapRange(STSnapRange *x) { + STSnapRange *y = taosMemoryCalloc(1, sizeof(STSnapRange)); + if (y == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + ASSERTS(terrno == 0, "Out of memory"); + return NULL; + } + y->fid = x->fid; + y->sver = x->sver; + y->ever = x->ever; + return y; +} + +static TSnapRangeArray *taosDupSnapRangeArray(const TSnapRangeArray *X) { + TSnapRangeArray *Y = taosMemoryCalloc(1, sizeof(*Y)); + if (Y == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + return NULL; + } + TARRAY2_INIT(Y); + + if (X) { + STSnapRange *x; + TARRAY2_FOREACH(X, x) { + STSnapRange *tp = taosDupSnapRange(x); + TARRAY2_APPEND(Y, tp); + } + } + return Y; +} + +static TSnapRangeArray *tsdbSnapDiff(const TSnapRangeArray *snapR, const TSnapRangeArray *pExclude) { + TSnapRangeArray *Z = NULL; + TSnapRangeArray *U = NULL; + TSnapRangeArray *V = NULL; + TSnapRangeArray *X = taosDupSnapRangeArray(snapR); + TSnapRangeArray *Y = taosDupSnapRangeArray(pExclude); + int32_t code = -1; + + // separate intersections of snap ranges + U = taosMemoryCalloc(1, sizeof(TSnapRangeArray)); + if (U == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + goto _out; + } + TARRAY2_INIT(U); + + V = taosMemoryCalloc(1, sizeof(TSnapRangeArray)); + if (V == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + goto _out; + } + TARRAY2_INIT(V); + + int32_t i = 0; + int32_t j = 0; + while (i < TARRAY2_SIZE(X) && j < TARRAY2_SIZE(Y)) { + STSnapRange *x = TARRAY2_GET(X, i); + STSnapRange *y = TARRAY2_GET(Y, j); + + if (x->fid < y->fid) { + STSnapRange *tmp = taosDupSnapRange(x); + TARRAY2_APPEND(U, tmp); + i++; + } else if (x->fid > y->fid) { + STSnapRange *tmp = taosDupSnapRange(y); + TARRAY2_APPEND(V, tmp); + j++; + } else { + if (x->sver < y->sver) { + if (x->ever < y->ever) { + STSnapRange *tmp = taosDupSnapRange(x); + TARRAY2_APPEND(U, tmp); + i++; + } else { + STSnapRange *tmp = taosDupSnapRange(x); + tmp->ever = y->sver - 1; + TARRAY2_APPEND(U, tmp); + x->sver = y->sver; + } + } else if (x->sver > y->sver) { + if (y->ever < x->ever) { + STSnapRange *tmp = taosDupSnapRange(y); + TARRAY2_APPEND(V, tmp); + j++; + } else { + STSnapRange *tmp = taosDupSnapRange(y); + tmp->ever = x->sver - 1; + TARRAY2_APPEND(V, tmp); + y->sver = x->sver; + } + } else { + if (x->ever < y->ever) { + STSnapRange *tmp = taosDupSnapRange(x); + TARRAY2_APPEND(U, tmp); + i++; + tmp = taosDupSnapRange(y); + tmp->ever = x->ever; + TARRAY2_APPEND(V, tmp); + y->sver = x->ever + 1; + } else if (x->ever > y->ever) { + STSnapRange *tmp = taosDupSnapRange(y); + TARRAY2_APPEND(V, tmp); + j++; + tmp = taosDupSnapRange(x); + tmp->ever = y->ever; + TARRAY2_APPEND(U, tmp); + x->sver = y->ever + 1; + } else { + STSnapRange *tmp = taosDupSnapRange(x); + TARRAY2_APPEND(U, tmp); + i++; + tmp = taosDupSnapRange(y); + TARRAY2_APPEND(V, tmp); + j++; + } + } + } + } + while (i < TARRAY2_SIZE(X)) { + STSnapRange *x = TARRAY2_GET(X, i); + STSnapRange *tmp = taosDupSnapRange(x); + TARRAY2_APPEND(U, tmp); + i++; + } + while (j < TARRAY2_SIZE(Y)) { + STSnapRange *y = TARRAY2_GET(Y, j); + STSnapRange *tmp = taosDupSnapRange(y); + TARRAY2_APPEND(V, tmp); + j++; + } + + // difference of snap ranges + Z = taosMemoryCalloc(1, sizeof(*Z)); + if (Z == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + goto _out; + } + TARRAY2_INIT(Z); + + i = 0; + j = 0; + while (i < TARRAY2_SIZE(U) && j < TARRAY2_SIZE(V)) { + STSnapRange *u = TARRAY2_GET(U, i); + STSnapRange *v = TARRAY2_GET(V, j); + + if (u->fid < v->fid) { + STSnapRange *tmp = taosDupSnapRange(u); + TARRAY2_APPEND(Z, tmp); + i++; + } else if (u->fid == v->fid) { + if (u->sver < v->sver) { + STSnapRange *tmp = taosDupSnapRange(u); + TARRAY2_APPEND(Z, tmp); + i++; + } else if (u->sver > v->sver) { + ASSERT(u->ever > v->ever); + j++; + } else { + ASSERT(u->ever == v->ever); + i++; + j++; + } + } + } + while (i < TARRAY2_SIZE(U)) { + STSnapRange *u = TARRAY2_GET(U, i); + STSnapRange *tmp = taosDupSnapRange(u); + TARRAY2_APPEND(Z, tmp); + i++; + } + + code = 0; +_out: + TSnapRangeArray **ppArrs[4] = {&X, &Y, &U, &V}; + int len = sizeof(ppArrs) / sizeof(ppArrs[0]); + for (int i = 0; i < len; i++) { + if (ppArrs[i][0] == NULL) continue; + TARRAY2_DESTROY(ppArrs[i][0], tsdbTSnapRangeClear); + taosMemoryFree(ppArrs[i][0]); + ppArrs[i][0] = NULL; + } + if (code != 0 && Z) { + TARRAY2_DESTROY(Z, tsdbTSnapRangeClear); + taosMemoryFree(Z); + Z = NULL; + } + return Z; +} + int32_t tsdbFSCreateRefRangedSnapshot(STFileSystem *fs, int64_t sver, int64_t ever, TSnapRangeArray *pExclude, TSnapRangeArray **fsrArr) { - int32_t code = 0; + int32_t code = -1; STFileSet *fset; STSnapRange *fsr1 = NULL; - fsrArr[0] = taosMemoryCalloc(1, sizeof(*fsrArr[0])); - if (fsrArr[0] == NULL) return TSDB_CODE_OUT_OF_MEMORY; + TSnapRangeArray *snapF = tsdbFSToSnapRangeArray(fs); + if (snapF == NULL) { + tsdbError("failed to generate snap ranges from fs since %s.", terrstr()); + goto _out; + } + TSnapRangeArray *snapD = tsdbSnapDiff(snapF, pExclude); + if (snapD == NULL) { + tsdbError("failed to get diff of snap ranges since %s.", terrstr()); + goto _out; + } + fsrArr[0] = taosMemoryCalloc(1, sizeof(*fsrArr[0])); + if (fsrArr[0] == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _out; + } + + int32_t i = 0; taosThreadRwlockRdlock(&fs->tsdb->rwLock); TARRAY2_FOREACH(fs->fSetArr, fset) { - code = tsdbTSnapRangeInitRef(fs->tsdb, fset, sver, ever, &fsr1); - if (code) break; + while (i < TARRAY2_SIZE(snapD)) { + STSnapRange *u = TARRAY2_GET(snapD, i); + if (fset->fid < u->fid) { + break; + } else if (fset->fid > u->fid) { + i++; + continue; + } else { + i++; + } + int64_t sver1 = TMAX(sver, u->sver); + int64_t ever1 = TMIN(ever, u->ever); + if (sver1 > ever1) { + continue; + } + code = tsdbTSnapRangeInitRef(fs->tsdb, fset, sver1, ever1, &fsr1); + if (code) break; - code = TARRAY2_APPEND(fsrArr[0], fsr1); - if (code) break; + code = TARRAY2_APPEND(fsrArr[0], fsr1); + if (code) break; - fsr1 = NULL; + fsr1 = NULL; + } + if (code) break; } taosThreadRwlockUnlock(&fs->tsdb->rwLock); @@ -1084,6 +1306,16 @@ int32_t tsdbFSCreateRefRangedSnapshot(STFileSystem *fs, int64_t sver, int64_t ev TARRAY2_DESTROY(fsrArr[0], tsdbTSnapRangeClear); fsrArr[0] = NULL; } + +_out: + TSnapRangeArray **ppArrs[2] = {&snapF, &snapD}; + int len = sizeof(ppArrs) / sizeof(ppArrs[0]); + for (int i = 0; i < len; i++) { + if (ppArrs[i][0] == NULL) continue; + TARRAY2_DESTROY(ppArrs[i][0], tsdbTSnapRangeClear); + taosMemoryFree(ppArrs[i][0]); + ppArrs[i][0] = NULL; + } return code; } From 4540bcb170d209c8f755ab7b5f95061fb96e85de Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Tue, 5 Sep 2023 15:33:35 +0800 Subject: [PATCH 21/61] fixup: work-around a compiler bug --- source/dnode/vnode/src/tsdb/tsdbFS2.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/source/dnode/vnode/src/tsdb/tsdbFS2.c b/source/dnode/vnode/src/tsdb/tsdbFS2.c index 231cd95658..78480988cf 100644 --- a/source/dnode/vnode/src/tsdb/tsdbFS2.c +++ b/source/dnode/vnode/src/tsdb/tsdbFS2.c @@ -1234,11 +1234,11 @@ static TSnapRangeArray *tsdbSnapDiff(const TSnapRangeArray *snapR, const TSnapRa _out: TSnapRangeArray **ppArrs[4] = {&X, &Y, &U, &V}; int len = sizeof(ppArrs) / sizeof(ppArrs[0]); - for (int i = 0; i < len; i++) { - if (ppArrs[i][0] == NULL) continue; - TARRAY2_DESTROY(ppArrs[i][0], tsdbTSnapRangeClear); - taosMemoryFree(ppArrs[i][0]); - ppArrs[i][0] = NULL; + for (int k = 0; k < len; k++) { + if (ppArrs[k][0] == NULL) continue; + TARRAY2_DESTROY(ppArrs[k][0], tsdbTSnapRangeClear); + taosMemoryFree(ppArrs[k][0]); + ppArrs[k][0] = NULL; } if (code != 0 && Z) { TARRAY2_DESTROY(Z, tsdbTSnapRangeClear); @@ -1310,11 +1310,11 @@ int32_t tsdbFSCreateRefRangedSnapshot(STFileSystem *fs, int64_t sver, int64_t ev _out: TSnapRangeArray **ppArrs[2] = {&snapF, &snapD}; int len = sizeof(ppArrs) / sizeof(ppArrs[0]); - for (int i = 0; i < len; i++) { - if (ppArrs[i][0] == NULL) continue; - TARRAY2_DESTROY(ppArrs[i][0], tsdbTSnapRangeClear); - taosMemoryFree(ppArrs[i][0]); - ppArrs[i][0] = NULL; + for (int k = 0; k < len; k++) { + if (ppArrs[k][0] == NULL) continue; + TARRAY2_DESTROY(ppArrs[k][0], tsdbTSnapRangeClear); + taosMemoryFree(ppArrs[k][0]); + ppArrs[k][0] = NULL; } return code; } From 0ddbcd50d0e5cd6f17954f9313bce283c41a582c Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Wed, 6 Sep 2023 11:43:13 +0800 Subject: [PATCH 22/61] enh: maintain independent version range of tombs for SDataFileWriter --- source/dnode/vnode/src/tsdb/tsdbDataFileRW.c | 8 +++++--- source/dnode/vnode/src/tsdb/tsdbUpgrade.c | 8 ++++---- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/source/dnode/vnode/src/tsdb/tsdbDataFileRW.c b/source/dnode/vnode/src/tsdb/tsdbDataFileRW.c index e6f1d29b22..df6b85a889 100644 --- a/source/dnode/vnode/src/tsdb/tsdbDataFileRW.c +++ b/source/dnode/vnode/src/tsdb/tsdbDataFileRW.c @@ -488,7 +488,8 @@ struct SDataFileWriter { STombBlock tombBlock[1]; int32_t tombBlockIdx; // range - SVersionRange range; + SVersionRange range; + SVersionRange tombRange; } ctx[1]; STFile files[TSDB_FTYPE_MAX]; @@ -637,6 +638,7 @@ static int32_t tsdbDataFileWriterDoOpen(SDataFileWriter *writer) { // range writer->ctx->range = (SVersionRange){.minVer = VERSION_MAX, .maxVer = VERSION_MIN}; + writer->ctx->tombRange = (SVersionRange){.minVer = VERSION_MAX, .maxVer = VERSION_MIN}; writer->ctx->opened = true; @@ -1243,7 +1245,7 @@ static int32_t tsdbDataFileDoWriteTombBlock(SDataFileWriter *writer) { code = tsdbFileWriteTombBlock(writer->fd[TSDB_FTYPE_TOMB], writer->tombBlock, writer->config->cmprAlg, &writer->files[TSDB_FTYPE_TOMB].size, writer->tombBlkArray, writer->config->bufArr, - &writer->ctx->range); + &writer->ctx->tombRange); TSDB_CHECK_CODE(code, lino, _exit); _exit: @@ -1543,7 +1545,7 @@ static int32_t tsdbDataFileWriterCloseCommit(SDataFileWriter *writer, TFileOpArr .nf = writer->files[ftype], }; tsdbTFileUpdVerRange(&op.nf, ofRange); - tsdbTFileUpdVerRange(&op.nf, writer->ctx->range); + tsdbTFileUpdVerRange(&op.nf, writer->ctx->tombRange); code = TARRAY2_APPEND(opArr, op); TSDB_CHECK_CODE(code, lino, _exit); } diff --git a/source/dnode/vnode/src/tsdb/tsdbUpgrade.c b/source/dnode/vnode/src/tsdb/tsdbUpgrade.c index 24dfbf7450..876c0df4a0 100644 --- a/source/dnode/vnode/src/tsdb/tsdbUpgrade.c +++ b/source/dnode/vnode/src/tsdb/tsdbUpgrade.c @@ -486,9 +486,9 @@ static int32_t tsdbDumpTombDataToFSet(STsdb *tsdb, SDelFReader *reader, SArray * code = tsdbUpgradeOpenTombFile(tsdb, fset, &ctx->fd, &ctx->fobj, &ctx->toStt); TSDB_CHECK_CODE(code, lino, _exit); } - SVersionRange range = {.minVer = VERSION_MAX, .maxVer = VERSION_MIN}; + SVersionRange tombRange = {.minVer = VERSION_MAX, .maxVer = VERSION_MIN}; code = tsdbFileWriteTombBlock(ctx->fd, ctx->tombBlock, ctx->cmprAlg, &ctx->fobj->f->size, ctx->tombBlkArray, - ctx->bufArr, &range); + ctx->bufArr, &tombRange); TSDB_CHECK_CODE(code, lino, _exit); } } @@ -499,9 +499,9 @@ static int32_t tsdbDumpTombDataToFSet(STsdb *tsdb, SDelFReader *reader, SArray * code = tsdbUpgradeOpenTombFile(tsdb, fset, &ctx->fd, &ctx->fobj, &ctx->toStt); TSDB_CHECK_CODE(code, lino, _exit); } - SVersionRange range = {.minVer = VERSION_MAX, .maxVer = VERSION_MIN}; + SVersionRange tombRange = {.minVer = VERSION_MAX, .maxVer = VERSION_MIN}; code = tsdbFileWriteTombBlock(ctx->fd, ctx->tombBlock, ctx->cmprAlg, &ctx->fobj->f->size, ctx->tombBlkArray, - ctx->bufArr, &range); + ctx->bufArr, &tombRange); TSDB_CHECK_CODE(code, lino, _exit); } From b23bcee690b0b4d5f8aed86d9a6a5828b13381ae Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Mon, 11 Sep 2023 10:54:28 +0800 Subject: [PATCH 23/61] fixup: about to revert this commit --- source/dnode/vnode/src/tsdb/tsdbFS2.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/source/dnode/vnode/src/tsdb/tsdbFS2.c b/source/dnode/vnode/src/tsdb/tsdbFS2.c index 78480988cf..b082c450f5 100644 --- a/source/dnode/vnode/src/tsdb/tsdbFS2.c +++ b/source/dnode/vnode/src/tsdb/tsdbFS2.c @@ -1026,10 +1026,14 @@ static int32_t tsdbTFSetInsertSnapRange(STFileSet *fset, TSnapRangeArray *snapR) TARRAY2_FOREACH(fset->lvlArr, lvl) { STFileObj *fobj; TARRAY2_FOREACH(lvl->fobjArr, fobj) { + // tsdbTFileUpdVerRange(&tf, (SVersionRange){.minVer = fobj->f->minVer, .maxVer = fobj->f->maxVer}); code = tsdbTFileInsertSnapRange(fobj->f, snapR); if (code) return code; } } + + // int32_t code = tsdbTFileInsertSnapRange(&tf, snapR); + // if (code) return code; return code; } @@ -1272,6 +1276,10 @@ int32_t tsdbFSCreateRefRangedSnapshot(STFileSystem *fs, int64_t sver, int64_t ev } int32_t i = 0; + code = 0; + + // TODO: use the same fs fSetArr as get snapDiff. The following treatment is potentially wrong + // if the fSetArr are changed. taosThreadRwlockRdlock(&fs->tsdb->rwLock); TARRAY2_FOREACH(fs->fSetArr, fset) { while (i < TARRAY2_SIZE(snapD)) { @@ -1279,6 +1287,7 @@ int32_t tsdbFSCreateRefRangedSnapshot(STFileSystem *fs, int64_t sver, int64_t ev if (fset->fid < u->fid) { break; } else if (fset->fid > u->fid) { + ASSERT(false); i++; continue; } else { @@ -1289,6 +1298,8 @@ int32_t tsdbFSCreateRefRangedSnapshot(STFileSystem *fs, int64_t sver, int64_t ev if (sver1 > ever1) { continue; } + tsdbInfo("fsrArr:%p, fid:%d, sver:%" PRId64 ", ever:%" PRId64, fsrArr, fset->fid, sver1, ever1); + code = tsdbTSnapRangeInitRef(fs->tsdb, fset, sver1, ever1, &fsr1); if (code) break; From 083dd148be6e7fe9569fbb9ae82f3d24375b1ef6 Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Mon, 11 Sep 2023 19:05:40 +0800 Subject: [PATCH 24/61] feat: exchange difference of snapshot info for replication --- include/libs/sync/sync.h | 12 +++- source/dnode/mnode/impl/src/mndSync.c | 3 +- source/dnode/vnode/inc/vnode.h | 2 +- source/dnode/vnode/src/vnd/vnodeOpen.c | 8 ++- source/dnode/vnode/src/vnd/vnodeSync.c | 5 +- source/libs/sync/inc/syncMessage.h | 7 ++- source/libs/sync/src/syncMessage.c | 4 +- source/libs/sync/src/syncSnapshot.c | 82 ++++++++++++++++++++++---- 8 files changed, 100 insertions(+), 23 deletions(-) diff --git a/include/libs/sync/sync.h b/include/libs/sync/sync.h index f69afbd71b..53e6ec0d71 100644 --- a/include/libs/sync/sync.h +++ b/include/libs/sync/sync.h @@ -87,6 +87,12 @@ typedef enum { TAOS_SYNC_ROLE_ERROR = 2, } ESyncRole; +typedef enum { + TAOS_SYNC_SNAP_INFO_BRIEF = 0, + TAOS_SYNC_SNAP_INFO_FULL = 1, + TAOS_SYNC_SNAP_INFO_DIFF = 2, +} ESyncSnapInfoTyp; + typedef struct SNodeInfo { int64_t clusterId; int32_t nodeId; @@ -139,10 +145,12 @@ typedef struct SReConfigCbMeta { typedef struct SSnapshotParam { SyncIndex start; SyncIndex end; + void* data; // with SMsgHead } SSnapshotParam; typedef struct SSnapshot { - void* data; + ESyncSnapInfoTyp typ; + void* data; // with SMsgHead SyncIndex lastApplyIndex; SyncTerm lastApplyTerm; SyncIndex lastConfigIndex; @@ -171,7 +179,7 @@ typedef struct SSyncFSM { void (*FpBecomeLearnerCb)(const struct SSyncFSM* pFsm); int32_t (*FpGetSnapshot)(const struct SSyncFSM* pFsm, SSnapshot* pSnapshot, void* pReaderParam, void** ppReader); - void (*FpGetSnapshotInfo)(const struct SSyncFSM* pFsm, SSnapshot* pSnapshot); + int32_t (*FpGetSnapshotInfo)(const struct SSyncFSM* pFsm, SSnapshot* pSnapshot); int32_t (*FpSnapshotStartRead)(const struct SSyncFSM* pFsm, void* pReaderParam, void** ppReader); void (*FpSnapshotStopRead)(const struct SSyncFSM* pFsm, void* pReader); diff --git a/source/dnode/mnode/impl/src/mndSync.c b/source/dnode/mnode/impl/src/mndSync.c index 5759737a6a..7f6a0397ad 100644 --- a/source/dnode/mnode/impl/src/mndSync.c +++ b/source/dnode/mnode/impl/src/mndSync.c @@ -286,9 +286,10 @@ int32_t mndSyncGetSnapshot(const SSyncFSM *pFsm, SSnapshot *pSnapshot, void *pRe return 0; } -static void mndSyncGetSnapshotInfo(const SSyncFSM *pFsm, SSnapshot *pSnapshot) { +static int32_t mndSyncGetSnapshotInfo(const SSyncFSM *pFsm, SSnapshot *pSnapshot) { SMnode *pMnode = pFsm->data; sdbGetCommitInfo(pMnode->pSdb, &pSnapshot->lastApplyIndex, &pSnapshot->lastApplyTerm, &pSnapshot->lastConfigIndex); + return 0; } void mndRestoreFinish(const SSyncFSM *pFsm, const SyncIndex commitIdx) { diff --git a/source/dnode/vnode/inc/vnode.h b/source/dnode/vnode/inc/vnode.h index c40e2657f9..ba7bad67e4 100644 --- a/source/dnode/vnode/inc/vnode.h +++ b/source/dnode/vnode/inc/vnode.h @@ -69,7 +69,7 @@ int32_t vnodeBegin(SVnode *pVnode); int32_t vnodeStart(SVnode *pVnode); void vnodeStop(SVnode *pVnode); int64_t vnodeGetSyncHandle(SVnode *pVnode); -void vnodeGetSnapshot(SVnode *pVnode, SSnapshot *pSnapshot); +int32_t vnodeGetSnapshot(SVnode *pVnode, SSnapshot *pSnapshot); void vnodeGetInfo(void *pVnode, const char **dbname, int32_t *vgId, int64_t *numOfTables, int64_t *numOfNormalTables); int32_t vnodeProcessCreateTSma(SVnode *pVnode, void *pCont, uint32_t contLen); int32_t vnodeGetTableList(void *pVnode, int8_t type, SArray *pList); diff --git a/source/dnode/vnode/src/vnd/vnodeOpen.c b/source/dnode/vnode/src/vnd/vnodeOpen.c index db94f32459..9228269992 100644 --- a/source/dnode/vnode/src/vnd/vnodeOpen.c +++ b/source/dnode/vnode/src/vnd/vnodeOpen.c @@ -518,9 +518,13 @@ void vnodeStop(SVnode *pVnode) {} int64_t vnodeGetSyncHandle(SVnode *pVnode) { return pVnode->sync; } -void vnodeGetSnapshot(SVnode *pVnode, SSnapshot *pSnapshot) { - pSnapshot->data = NULL; +int32_t vnodeGetSnapshot(SVnode *pVnode, SSnapshot *pSnapshot) { pSnapshot->lastApplyIndex = pVnode->state.committed; pSnapshot->lastApplyTerm = pVnode->state.commitTerm; pSnapshot->lastConfigIndex = -1; + + if (pSnapshot->typ == TAOS_SYNC_SNAP_INFO_FULL) { + // TODO: get full info of snapshots + } + return 0; } diff --git a/source/dnode/vnode/src/vnd/vnodeSync.c b/source/dnode/vnode/src/vnd/vnodeSync.c index a6c743c87d..b9f2d23c7b 100644 --- a/source/dnode/vnode/src/vnd/vnodeSync.c +++ b/source/dnode/vnode/src/vnd/vnodeSync.c @@ -416,8 +416,8 @@ static int32_t vnodeSyncSendMsg(const SEpSet *pEpSet, SRpcMsg *pMsg) { return code; } -static void vnodeSyncGetSnapshotInfo(const SSyncFSM *pFsm, SSnapshot *pSnapshot) { - vnodeGetSnapshot(pFsm->data, pSnapshot); +static int32_t vnodeSyncGetSnapshotInfo(const SSyncFSM *pFsm, SSnapshot *pSnapshot) { + return vnodeGetSnapshot(pFsm->data, pSnapshot); } static int32_t vnodeSyncApplyMsg(const SSyncFSM *pFsm, SRpcMsg *pMsg, const SFsmCbMeta *pMeta) { @@ -642,6 +642,7 @@ static SSyncFSM *vnodeSyncMakeFsm(SVnode *pVnode) { pFsm->FpAppliedIndexCb = vnodeSyncAppliedIndex; pFsm->FpPreCommitCb = vnodeSyncPreCommitMsg; pFsm->FpRollBackCb = vnodeSyncRollBackMsg; + pFsm->FpGetSnapshot = NULL; pFsm->FpGetSnapshotInfo = vnodeSyncGetSnapshotInfo; pFsm->FpRestoreFinishCb = vnodeRestoreFinish; pFsm->FpLeaderTransferCb = NULL; diff --git a/source/libs/sync/inc/syncMessage.h b/source/libs/sync/inc/syncMessage.h index f8c96d8be2..c0d3663a8f 100644 --- a/source/libs/sync/inc/syncMessage.h +++ b/source/libs/sync/inc/syncMessage.h @@ -200,7 +200,7 @@ typedef struct SyncSnapshotSend { SSyncCfg lastConfig; int64_t startTime; int32_t seq; - int16_t reserved; + int16_t payloadType; uint32_t dataLen; char data[]; } SyncSnapshotSend; @@ -219,7 +219,8 @@ typedef struct SyncSnapshotRsp { int32_t ack; int32_t code; SyncIndex snapBeginIndex; // when ack = SYNC_SNAPSHOT_SEQ_BEGIN, it's valid - int16_t reserved; + int16_t payloadType; + char data[]; } SyncSnapshotRsp; typedef struct SyncLeaderTransfer { @@ -267,7 +268,7 @@ int32_t syncBuildPreSnapshot(SRpcMsg* pMsg, int32_t vgId); int32_t syncBuildPreSnapshotReply(SRpcMsg* pMsg, int32_t vgId); int32_t syncBuildApplyMsg(SRpcMsg* pMsg, const SRpcMsg* pOriginal, int32_t vgId, SFsmCbMeta* pMeta); int32_t syncBuildSnapshotSend(SRpcMsg* pMsg, int32_t dataLen, int32_t vgId); -int32_t syncBuildSnapshotSendRsp(SRpcMsg* pMsg, int32_t vgId); +int32_t syncBuildSnapshotSendRsp(SRpcMsg* pMsg, int32_t dataLen, int32_t vgId); int32_t syncBuildLeaderTransfer(SRpcMsg* pMsg, int32_t vgId); int32_t syncBuildLocalCmd(SRpcMsg* pMsg, int32_t vgId); diff --git a/source/libs/sync/src/syncMessage.c b/source/libs/sync/src/syncMessage.c index 72c8887803..00ca6d8f90 100644 --- a/source/libs/sync/src/syncMessage.c +++ b/source/libs/sync/src/syncMessage.c @@ -270,8 +270,8 @@ int32_t syncBuildSnapshotSend(SRpcMsg* pMsg, int32_t dataLen, int32_t vgId) { return 0; } -int32_t syncBuildSnapshotSendRsp(SRpcMsg* pMsg, int32_t vgId) { - int32_t bytes = sizeof(SyncSnapshotRsp); +int32_t syncBuildSnapshotSendRsp(SRpcMsg* pMsg, int32_t dataLen, int32_t vgId) { + int32_t bytes = sizeof(SyncSnapshotRsp) + dataLen; pMsg->pCont = rpcMallocCont(bytes); pMsg->msgType = TDMT_SYNC_SNAPSHOT_RSP; pMsg->contLen = bytes; diff --git a/source/libs/sync/src/syncSnapshot.c b/source/libs/sync/src/syncSnapshot.c index 763d4ec5d6..00dcd7e949 100644 --- a/source/libs/sync/src/syncSnapshot.c +++ b/source/libs/sync/src/syncSnapshot.c @@ -74,6 +74,7 @@ void snapshotSenderDestroy(SSyncSnapshotSender *pSender) { bool snapshotSenderIsStart(SSyncSnapshotSender *pSender) { return pSender->start; } int32_t snapshotSenderStart(SSyncSnapshotSender *pSender) { + int32_t code = -1; pSender->start = true; pSender->seq = SYNC_SNAPSHOT_SEQ_BEGIN; pSender->ack = SYNC_SNAPSHOT_SEQ_INVALID; @@ -95,11 +96,26 @@ int32_t snapshotSenderStart(SSyncSnapshotSender *pSender) { pSender->lastSendTime = pSender->startTime; pSender->finish = false; + // Get full snapshot info + SSyncNode *pSyncNode = pSender->pSyncNode; + SSnapshot snapInfo = {.typ = TAOS_SYNC_SNAP_INFO_FULL}; + if (pSyncNode->pFsm->FpGetSnapshotInfo(pSyncNode->pFsm, &snapInfo) != 0) { + sSError(pSender, "snapshot get info failure since %s", terrstr()); + goto _out; + } + + int dataLen = 0; + if (snapInfo.data) { + SMsgHead *msgHead = snapInfo.data; + ASSERT(msgHead->vgId == pSyncNode->vgId); + dataLen = sizeof(SMsgHead) + msgHead->contLen; + } + // build begin msg SRpcMsg rpcMsg = {0}; - if (syncBuildSnapshotSend(&rpcMsg, 0, pSender->pSyncNode->vgId) != 0) { + if (syncBuildSnapshotSend(&rpcMsg, dataLen, pSender->pSyncNode->vgId) != 0) { sSError(pSender, "snapshot sender build msg failed since %s", terrstr()); - return -1; + goto _out; } SyncSnapshotSend *pMsg = rpcMsg.pCont; @@ -114,16 +130,27 @@ int32_t snapshotSenderStart(SSyncSnapshotSender *pSender) { pMsg->startTime = pSender->startTime; pMsg->seq = SYNC_SNAPSHOT_SEQ_PREP_SNAPSHOT; + if (dataLen > 0) { + pMsg->payloadType = snapInfo.typ; + memcpy(pMsg->data, snapInfo.data, dataLen); + } + // event log syncLogSendSyncSnapshotSend(pSender->pSyncNode, pMsg, "snapshot sender start"); // send msg if (syncNodeSendMsgById(&pMsg->destId, pSender->pSyncNode, &rpcMsg) != 0) { sSError(pSender, "snapshot sender send msg failed since %s", terrstr()); - return -1; + goto _out; } - return 0; + code = 0; +_out: + if (snapInfo.data) { + taosMemoryFree(snapInfo.data); + snapInfo.data = NULL; + } + return code; } void snapshotSenderStop(SSyncSnapshotSender *pSender, bool finish) { @@ -578,10 +605,29 @@ _SEND_REPLY: // build msg ; // make complier happy + code = -1; + SSnapshot snapInfo = {.typ = TAOS_SYNC_SNAP_INFO_DIFF}; + int32_t dataLen = 0; + if (pMsg->dataLen > 0) { + void *data = taosMemoryCalloc(1, pMsg->dataLen); + if (data == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + goto _out; + } + memcpy(data, pMsg->data, dataLen); + snapInfo.data = data; + data = NULL; + pSyncNode->pFsm->FpGetSnapshotInfo(pSyncNode->pFsm, &snapInfo); + + SMsgHead *msgHead = snapInfo.data; + ASSERT(msgHead->vgId == pSyncNode->vgId); + dataLen = msgHead->contLen; + } + SRpcMsg rpcMsg = {0}; - if (syncBuildSnapshotSendRsp(&rpcMsg, pSyncNode->vgId) != 0) { + if (syncBuildSnapshotSendRsp(&rpcMsg, dataLen, pSyncNode->vgId) != 0) { sRError(pReceiver, "snapshot receiver failed to build resp since %s", terrstr()); - return -1; + goto _out; } SyncSnapshotRsp *pRspMsg = rpcMsg.pCont; @@ -595,13 +641,24 @@ _SEND_REPLY: pRspMsg->code = code; pRspMsg->snapBeginIndex = syncNodeGetSnapBeginIndex(pSyncNode); + if (snapInfo.data) { + pRspMsg->payloadType = snapInfo.typ; + memcpy(pRspMsg->data, snapInfo.data, dataLen); + } + // send msg syncLogSendSyncSnapshotRsp(pSyncNode, pRspMsg, "snapshot receiver pre-snapshot"); if (syncNodeSendMsgById(&pRspMsg->destId, pSyncNode, &rpcMsg) != 0) { sRError(pReceiver, "snapshot receiver failed to build resp since %s", terrstr()); - return -1; + goto _out; } + code = 0; +_out: + if (snapInfo.data) { + taosMemoryFree(snapInfo.data); + snapInfo.data = NULL; + } return code; } @@ -635,7 +692,7 @@ _SEND_REPLY: // build msg SRpcMsg rpcMsg = {0}; - if (syncBuildSnapshotSendRsp(&rpcMsg, pSyncNode->vgId) != 0) { + if (syncBuildSnapshotSendRsp(&rpcMsg, 0, pSyncNode->vgId) != 0) { sRError(pReceiver, "snapshot receiver build resp failed since %s", terrstr()); return -1; } @@ -685,7 +742,7 @@ static int32_t syncNodeOnSnapshotReceive(SSyncNode *pSyncNode, SyncSnapshotSend // build msg SRpcMsg rpcMsg = {0}; - if (syncBuildSnapshotSendRsp(&rpcMsg, pSyncNode->vgId)) { + if (syncBuildSnapshotSendRsp(&rpcMsg, 0, pSyncNode->vgId)) { sRError(pReceiver, "snapshot receiver build resp failed since %s", terrstr()); return -1; } @@ -732,7 +789,7 @@ static int32_t syncNodeOnSnapshotEnd(SSyncNode *pSyncNode, SyncSnapshotSend *pMs // build msg SRpcMsg rpcMsg = {0}; - if (syncBuildSnapshotSendRsp(&rpcMsg, pSyncNode->vgId) != 0) { + if (syncBuildSnapshotSendRsp(&rpcMsg, 0, pSyncNode->vgId) != 0) { sRError(pReceiver, "snapshot receiver build rsp failed since %s", terrstr()); return -1; } @@ -869,6 +926,11 @@ static int32_t syncNodeOnSnapshotPrepRsp(SSyncNode *pSyncNode, SSyncSnapshotSend // update sender pSender->snapshot = snapshot; + if (pMsg->payloadType == TAOS_SYNC_SNAP_INFO_DIFF) { + SMsgHead *msgHead = (void *)pMsg->data; + ASSERT(msgHead->vgId == pSyncNode->vgId); + pSender->snapshotParam.data = pMsg->data; + } // start reader int32_t code = pSyncNode->pFsm->FpSnapshotStartRead(pSyncNode->pFsm, &pSender->snapshotParam, &pSender->pReader); if (code != 0) { From 4d554884950565f45490ce8c2aacc47aac4f5187 Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Tue, 12 Sep 2023 14:34:06 +0800 Subject: [PATCH 25/61] feat: use SSnapshotParam as a parameter to vnodeSnapReaderOpen --- source/dnode/vnode/inc/vnode.h | 2 +- source/dnode/vnode/src/inc/vnodeInt.h | 3 +- source/dnode/vnode/src/tsdb/tsdbFS2.c | 62 +++++++--------------- source/dnode/vnode/src/tsdb/tsdbSnapshot.c | 9 +++- source/dnode/vnode/src/vnd/vnodeOpen.c | 14 ++--- source/dnode/vnode/src/vnd/vnodeSnapshot.c | 10 ++-- source/dnode/vnode/src/vnd/vnodeSync.c | 3 +- 7 files changed, 41 insertions(+), 62 deletions(-) diff --git a/source/dnode/vnode/inc/vnode.h b/source/dnode/vnode/inc/vnode.h index ba7bad67e4..baff19e3d4 100644 --- a/source/dnode/vnode/inc/vnode.h +++ b/source/dnode/vnode/inc/vnode.h @@ -259,7 +259,7 @@ int32_t vnodeEnqueueStreamMsg(SVnode *pVnode, SRpcMsg *pMsg); int32_t smaGetTSmaDays(SVnodeCfg *pCfg, void *pCont, uint32_t contLen, int32_t *days); // SVSnapReader -int32_t vnodeSnapReaderOpen(SVnode *pVnode, int64_t sver, int64_t ever, SVSnapReader **ppReader); +int32_t vnodeSnapReaderOpen(SVnode *pVnode, SSnapshotParam *pParam, SVSnapReader **ppReader); void vnodeSnapReaderClose(SVSnapReader *pReader); int32_t vnodeSnapRead(SVSnapReader *pReader, uint8_t **ppData, uint32_t *nData); // SVSnapWriter diff --git a/source/dnode/vnode/src/inc/vnodeInt.h b/source/dnode/vnode/src/inc/vnodeInt.h index 902cc782ab..27a393abf4 100644 --- a/source/dnode/vnode/src/inc/vnodeInt.h +++ b/source/dnode/vnode/src/inc/vnodeInt.h @@ -295,7 +295,8 @@ int32_t metaSnapWriterOpen(SMeta* pMeta, int64_t sver, int64_t ever, SMetaSnapWr int32_t metaSnapWrite(SMetaSnapWriter* pWriter, uint8_t* pData, uint32_t nData); int32_t metaSnapWriterClose(SMetaSnapWriter** ppWriter, int8_t rollback); // STsdbSnapReader ======================================== -int32_t tsdbSnapReaderOpen(STsdb* pTsdb, int64_t sver, int64_t ever, int8_t type, void* pExclude, +int32_t tsdbSnapGetInfo(STsdb* pTsdb, SSnapshot* pSnap); +int32_t tsdbSnapReaderOpen(STsdb* pTsdb, int64_t sver, int64_t ever, int8_t type, void* pRanges, STsdbSnapReader** ppReader); int32_t tsdbSnapReaderClose(STsdbSnapReader** ppReader); int32_t tsdbSnapRead(STsdbSnapReader* pReader, uint8_t** ppData); diff --git a/source/dnode/vnode/src/tsdb/tsdbFS2.c b/source/dnode/vnode/src/tsdb/tsdbFS2.c index b082c450f5..be0271135d 100644 --- a/source/dnode/vnode/src/tsdb/tsdbFS2.c +++ b/source/dnode/vnode/src/tsdb/tsdbFS2.c @@ -1252,23 +1252,12 @@ _out: return Z; } -int32_t tsdbFSCreateRefRangedSnapshot(STFileSystem *fs, int64_t sver, int64_t ever, TSnapRangeArray *pExclude, +int32_t tsdbFSCreateRefRangedSnapshot(STFileSystem *fs, int64_t sver, int64_t ever, TSnapRangeArray *pRanges, TSnapRangeArray **fsrArr) { int32_t code = -1; STFileSet *fset; STSnapRange *fsr1 = NULL; - TSnapRangeArray *snapF = tsdbFSToSnapRangeArray(fs); - if (snapF == NULL) { - tsdbError("failed to generate snap ranges from fs since %s.", terrstr()); - goto _out; - } - TSnapRangeArray *snapD = tsdbSnapDiff(snapF, pExclude); - if (snapD == NULL) { - tsdbError("failed to get diff of snap ranges since %s.", terrstr()); - goto _out; - } - fsrArr[0] = taosMemoryCalloc(1, sizeof(*fsrArr[0])); if (fsrArr[0] == NULL) { code = TSDB_CODE_OUT_OF_MEMORY; @@ -1278,37 +1267,32 @@ int32_t tsdbFSCreateRefRangedSnapshot(STFileSystem *fs, int64_t sver, int64_t ev int32_t i = 0; code = 0; - // TODO: use the same fs fSetArr as get snapDiff. The following treatment is potentially wrong - // if the fSetArr are changed. taosThreadRwlockRdlock(&fs->tsdb->rwLock); TARRAY2_FOREACH(fs->fSetArr, fset) { - while (i < TARRAY2_SIZE(snapD)) { - STSnapRange *u = TARRAY2_GET(snapD, i); - if (fset->fid < u->fid) { - break; - } else if (fset->fid > u->fid) { - ASSERT(false); + int64_t sver1 = sver; + int64_t ever1 = ever; + + while (pRanges && i < TARRAY2_SIZE(pRanges)) { + STSnapRange *u = TARRAY2_GET(pRanges, i); + if (fset->fid > u->fid) { i++; continue; - } else { + } + + if (fset->fid == u->fid) { + sver1 = u->sver; i++; } - int64_t sver1 = TMAX(sver, u->sver); - int64_t ever1 = TMIN(ever, u->ever); - if (sver1 > ever1) { - continue; - } - tsdbInfo("fsrArr:%p, fid:%d, sver:%" PRId64 ", ever:%" PRId64, fsrArr, fset->fid, sver1, ever1); - - code = tsdbTSnapRangeInitRef(fs->tsdb, fset, sver1, ever1, &fsr1); - if (code) break; - - code = TARRAY2_APPEND(fsrArr[0], fsr1); - if (code) break; - - fsr1 = NULL; } + tsdbInfo("fsrArr:%p, fid:%d, sver:%" PRId64 ", ever:%" PRId64, fsrArr, fset->fid, sver1, ever1); + + code = tsdbTSnapRangeInitRef(fs->tsdb, fset, sver1, ever1, &fsr1); if (code) break; + + code = TARRAY2_APPEND(fsrArr[0], fsr1); + if (code) break; + + fsr1 = NULL; } taosThreadRwlockUnlock(&fs->tsdb->rwLock); @@ -1319,14 +1303,6 @@ int32_t tsdbFSCreateRefRangedSnapshot(STFileSystem *fs, int64_t sver, int64_t ev } _out: - TSnapRangeArray **ppArrs[2] = {&snapF, &snapD}; - int len = sizeof(ppArrs) / sizeof(ppArrs[0]); - for (int k = 0; k < len; k++) { - if (ppArrs[k][0] == NULL) continue; - TARRAY2_DESTROY(ppArrs[k][0], tsdbTSnapRangeClear); - taosMemoryFree(ppArrs[k][0]); - ppArrs[k][0] = NULL; - } return code; } diff --git a/source/dnode/vnode/src/tsdb/tsdbSnapshot.c b/source/dnode/vnode/src/tsdb/tsdbSnapshot.c index 2fdad2d662..be88f4e671 100644 --- a/source/dnode/vnode/src/tsdb/tsdbSnapshot.c +++ b/source/dnode/vnode/src/tsdb/tsdbSnapshot.c @@ -412,7 +412,7 @@ _exit: return code; } -int32_t tsdbSnapReaderOpen(STsdb* tsdb, int64_t sver, int64_t ever, int8_t type, void* pExclude, +int32_t tsdbSnapReaderOpen(STsdb* tsdb, int64_t sver, int64_t ever, int8_t type, void* pRanges, STsdbSnapReader** reader) { int32_t code = 0; int32_t lino = 0; @@ -425,7 +425,7 @@ int32_t tsdbSnapReaderOpen(STsdb* tsdb, int64_t sver, int64_t ever, int8_t type, reader[0]->ever = ever; reader[0]->type = type; - code = tsdbFSCreateRefRangedSnapshot(tsdb->pFS, sver, ever, (TSnapRangeArray*)pExclude, &reader[0]->fsrArr); + code = tsdbFSCreateRefRangedSnapshot(tsdb->pFS, sver, ever, (TSnapRangeArray*)pRanges, &reader[0]->fsrArr); TSDB_CHECK_CODE(code, lino, _exit); _exit: @@ -1157,3 +1157,8 @@ _exit: } return code; } + +int32_t tsdbSnapGetInfo(STsdb* pTsdb, SSnapshot* pSnap) { + // TODO: get the full and diff info of tsdb Snap + return 0; +} diff --git a/source/dnode/vnode/src/vnd/vnodeOpen.c b/source/dnode/vnode/src/vnd/vnodeOpen.c index 9228269992..fada83a7f1 100644 --- a/source/dnode/vnode/src/vnd/vnodeOpen.c +++ b/source/dnode/vnode/src/vnd/vnodeOpen.c @@ -518,13 +518,9 @@ void vnodeStop(SVnode *pVnode) {} int64_t vnodeGetSyncHandle(SVnode *pVnode) { return pVnode->sync; } -int32_t vnodeGetSnapshot(SVnode *pVnode, SSnapshot *pSnapshot) { - pSnapshot->lastApplyIndex = pVnode->state.committed; - pSnapshot->lastApplyTerm = pVnode->state.commitTerm; - pSnapshot->lastConfigIndex = -1; - - if (pSnapshot->typ == TAOS_SYNC_SNAP_INFO_FULL) { - // TODO: get full info of snapshots - } - return 0; +int32_t vnodeGetSnapshot(SVnode *pVnode, SSnapshot *pSnap) { + pSnap->lastApplyIndex = pVnode->state.committed; + pSnap->lastApplyTerm = pVnode->state.commitTerm; + pSnap->lastConfigIndex = -1; + return tsdbSnapGetInfo(pVnode->pTsdb, pSnap); } diff --git a/source/dnode/vnode/src/vnd/vnodeSnapshot.c b/source/dnode/vnode/src/vnd/vnodeSnapshot.c index 6beb8e8dbf..5934826ea4 100644 --- a/source/dnode/vnode/src/vnd/vnodeSnapshot.c +++ b/source/dnode/vnode/src/vnd/vnodeSnapshot.c @@ -29,7 +29,7 @@ struct SVSnapReader { SMetaSnapReader *pMetaReader; // tsdb int8_t tsdbDone; - TSnapRangeArray *pExclude; + TSnapRangeArray *pRanges; STsdbSnapReader *pTsdbReader; // tq int8_t tqHandleDone; @@ -48,8 +48,10 @@ struct SVSnapReader { SRSmaSnapReader *pRsmaReader; }; -int32_t vnodeSnapReaderOpen(SVnode *pVnode, int64_t sver, int64_t ever, SVSnapReader **ppReader) { +int32_t vnodeSnapReaderOpen(SVnode *pVnode, SSnapshotParam *pParam, SVSnapReader **ppReader) { int32_t code = 0; + int64_t sver = pParam->start; + int64_t ever = pParam->end; SVSnapReader *pReader = NULL; pReader = (SVSnapReader *)taosMemoryCalloc(1, sizeof(*pReader)); @@ -61,7 +63,7 @@ int32_t vnodeSnapReaderOpen(SVnode *pVnode, int64_t sver, int64_t ever, SVSnapRe pReader->sver = sver; pReader->ever = ever; - // TODO: pReader->pEx + // TODO: decode pParam->data and store the result in pReader->pRanges vInfo("vgId:%d, vnode snapshot reader opened, sver:%" PRId64 " ever:%" PRId64, TD_VID(pVnode), sver, ever); *ppReader = pReader; @@ -179,7 +181,7 @@ int32_t vnodeSnapRead(SVSnapReader *pReader, uint8_t **ppData, uint32_t *nData) if (!pReader->tsdbDone) { // open if not if (pReader->pTsdbReader == NULL) { - code = tsdbSnapReaderOpen(pReader->pVnode->pTsdb, pReader->sver, pReader->ever, SNAP_DATA_TSDB, pReader->pExclude, + code = tsdbSnapReaderOpen(pReader->pVnode->pTsdb, pReader->sver, pReader->ever, SNAP_DATA_TSDB, pReader->pRanges, &pReader->pTsdbReader); if (code) goto _err; } diff --git a/source/dnode/vnode/src/vnd/vnodeSync.c b/source/dnode/vnode/src/vnd/vnodeSync.c index b9f2d23c7b..e676219b11 100644 --- a/source/dnode/vnode/src/vnd/vnodeSync.c +++ b/source/dnode/vnode/src/vnd/vnodeSync.c @@ -475,8 +475,7 @@ static void vnodeSyncRollBackMsg(const SSyncFSM *pFsm, SRpcMsg *pMsg, SFsmCbMeta static int32_t vnodeSnapshotStartRead(const SSyncFSM *pFsm, void *pParam, void **ppReader) { SVnode *pVnode = pFsm->data; - SSnapshotParam *pSnapshotParam = pParam; - int32_t code = vnodeSnapReaderOpen(pVnode, pSnapshotParam->start, pSnapshotParam->end, (SVSnapReader **)ppReader); + int32_t code = vnodeSnapReaderOpen(pVnode, (SSnapshotParam *)pParam, (SVSnapReader **)ppReader); return code; } From 517f1f7e40b1ecb5ef086c0463e58a8a06f7a5ff Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Tue, 12 Sep 2023 19:27:54 +0800 Subject: [PATCH 26/61] fixup: fix syncNodeOnSnapshotPrep --- source/libs/sync/src/syncSnapshot.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/source/libs/sync/src/syncSnapshot.c b/source/libs/sync/src/syncSnapshot.c index 00dcd7e949..73b6940628 100644 --- a/source/libs/sync/src/syncSnapshot.c +++ b/source/libs/sync/src/syncSnapshot.c @@ -605,15 +605,16 @@ _SEND_REPLY: // build msg ; // make complier happy - code = -1; SSnapshot snapInfo = {.typ = TAOS_SYNC_SNAP_INFO_DIFF}; int32_t dataLen = 0; if (pMsg->dataLen > 0) { void *data = taosMemoryCalloc(1, pMsg->dataLen); if (data == NULL) { terrno = TSDB_CODE_OUT_OF_MEMORY; + code = terrno; goto _out; } + dataLen = pMsg->dataLen; memcpy(data, pMsg->data, dataLen); snapInfo.data = data; data = NULL; @@ -627,6 +628,7 @@ _SEND_REPLY: SRpcMsg rpcMsg = {0}; if (syncBuildSnapshotSendRsp(&rpcMsg, dataLen, pSyncNode->vgId) != 0) { sRError(pReceiver, "snapshot receiver failed to build resp since %s", terrstr()); + code = terrno; goto _out; } @@ -650,10 +652,9 @@ _SEND_REPLY: syncLogSendSyncSnapshotRsp(pSyncNode, pRspMsg, "snapshot receiver pre-snapshot"); if (syncNodeSendMsgById(&pRspMsg->destId, pSyncNode, &rpcMsg) != 0) { sRError(pReceiver, "snapshot receiver failed to build resp since %s", terrstr()); - goto _out; + code = terrno; } - code = 0; _out: if (snapInfo.data) { taosMemoryFree(snapInfo.data); From c1b2eedd3cdf2994a1e6f96ece68285ea20e8113 Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Tue, 12 Sep 2023 19:29:22 +0800 Subject: [PATCH 27/61] feat: impl tsdbSnapGetInfo --- source/dnode/vnode/src/inc/tsdb.h | 5 + source/dnode/vnode/src/tsdb/tsdbFS2.c | 269 --------------------- source/dnode/vnode/src/tsdb/tsdbSnapshot.c | 198 ++++++++++++++- source/dnode/vnode/src/vnd/vnodeSnapshot.c | 20 +- 4 files changed, 220 insertions(+), 272 deletions(-) diff --git a/source/dnode/vnode/src/inc/tsdb.h b/source/dnode/vnode/src/inc/tsdb.h index 4996c6c484..ffd74dc3d1 100644 --- a/source/dnode/vnode/src/inc/tsdb.h +++ b/source/dnode/vnode/src/inc/tsdb.h @@ -676,6 +676,11 @@ typedef TARRAY2(STFileSet *) TFileSetArray; typedef struct STSnapRange STSnapRange; typedef TARRAY2(STSnapRange *) TSnapRangeArray; // disjoint snap ranges +// util +int32_t tSerializeSnapRangeArray(void *buf, int32_t bufLen, TSnapRangeArray *pSnapR); +int32_t tDeserializeSnapRangeArray(void *buf, int32_t bufLen, TSnapRangeArray *pSnapR); +void tsdbSnapRangeArrayDestroy(TSnapRangeArray **ppSnap); + struct STsdbReadSnap { SMemTable *pMem; SQueryNode *pNode; diff --git a/source/dnode/vnode/src/tsdb/tsdbFS2.c b/source/dnode/vnode/src/tsdb/tsdbFS2.c index be0271135d..01d423da2c 100644 --- a/source/dnode/vnode/src/tsdb/tsdbFS2.c +++ b/source/dnode/vnode/src/tsdb/tsdbFS2.c @@ -983,275 +983,6 @@ int32_t tsdbFSDestroyRefSnapshot(TFileSetArray **fsetArr) { return 0; } -static int32_t tsdbTSnapRangeCmprFn(STSnapRange *fsr1, STSnapRange *fsr2) { - if (fsr1->fid < fsr2->fid) return -1; - if (fsr1->fid > fsr2->fid) return 1; - if (fsr1->sver < fsr2->sver) return -1; - if (fsr1->sver > fsr2->sver) return 1; - if (fsr1->ever < fsr2->ever) return -1; - if (fsr1->ever < fsr2->ever) return 1; - return 0; -} - -static int32_t tsdbTFileInsertSnapRange(STFile *f, TSnapRangeArray *snapR) { - STSnapRange *fsr = taosMemoryCalloc(1, sizeof(*fsr)); - if (fsr == NULL) { - terrno = TSDB_CODE_OUT_OF_MEMORY; - return -1; - } - fsr->fid = f->fid; - fsr->sver = f->minVer; - fsr->ever = f->maxVer; - - int32_t code = TARRAY2_SORT_INSERT(snapR, fsr, tsdbTSnapRangeCmprFn); - if (code) { - taosMemoryFree(fsr); - fsr = NULL; - } - return code; -} - -static int32_t tsdbTFSetInsertSnapRange(STFileSet *fset, TSnapRangeArray *snapR) { - STFile tf = {.fid = fset->fid, .minVer = VERSION_MAX, .maxVer = VERSION_MIN}; - for (int32_t ftype = TSDB_FTYPE_MIN; ftype < TSDB_FTYPE_MAX; ++ftype) { - if (fset->farr[ftype] == NULL) continue; - STFile *f = fset->farr[ftype]->f; - tsdbTFileUpdVerRange(&tf, (SVersionRange){.minVer = f->minVer, .maxVer = f->maxVer}); - } - - int32_t code = tsdbTFileInsertSnapRange(&tf, snapR); - if (code) return code; - - const SSttLvl *lvl; - TARRAY2_FOREACH(fset->lvlArr, lvl) { - STFileObj *fobj; - TARRAY2_FOREACH(lvl->fobjArr, fobj) { - // tsdbTFileUpdVerRange(&tf, (SVersionRange){.minVer = fobj->f->minVer, .maxVer = fobj->f->maxVer}); - code = tsdbTFileInsertSnapRange(fobj->f, snapR); - if (code) return code; - } - } - - // int32_t code = tsdbTFileInsertSnapRange(&tf, snapR); - // if (code) return code; - return code; -} - -TSnapRangeArray *tsdbFSToSnapRangeArray(STFileSystem *fs) { - int32_t code = 0; - TSnapRangeArray *snapR = taosMemoryCalloc(1, sizeof(*snapR)); - if (snapR == NULL) { - terrno = TSDB_CODE_OUT_OF_MEMORY; - return NULL; - } - TARRAY2_INIT(snapR); - - taosThreadRwlockRdlock(&fs->tsdb->rwLock); - STFileSet *fset; - TARRAY2_FOREACH(fs->fSetArr, fset) { - code = tsdbTFSetInsertSnapRange(fset, snapR); - if (code) break; - } - taosThreadRwlockUnlock(&fs->tsdb->rwLock); - - if (code) { - TARRAY2_DESTROY(snapR, tsdbTSnapRangeClear); - taosMemoryFree(snapR); - snapR = NULL; - } - return snapR; -} - -static STSnapRange *taosDupSnapRange(STSnapRange *x) { - STSnapRange *y = taosMemoryCalloc(1, sizeof(STSnapRange)); - if (y == NULL) { - terrno = TSDB_CODE_OUT_OF_MEMORY; - ASSERTS(terrno == 0, "Out of memory"); - return NULL; - } - y->fid = x->fid; - y->sver = x->sver; - y->ever = x->ever; - return y; -} - -static TSnapRangeArray *taosDupSnapRangeArray(const TSnapRangeArray *X) { - TSnapRangeArray *Y = taosMemoryCalloc(1, sizeof(*Y)); - if (Y == NULL) { - terrno = TSDB_CODE_OUT_OF_MEMORY; - return NULL; - } - TARRAY2_INIT(Y); - - if (X) { - STSnapRange *x; - TARRAY2_FOREACH(X, x) { - STSnapRange *tp = taosDupSnapRange(x); - TARRAY2_APPEND(Y, tp); - } - } - return Y; -} - -static TSnapRangeArray *tsdbSnapDiff(const TSnapRangeArray *snapR, const TSnapRangeArray *pExclude) { - TSnapRangeArray *Z = NULL; - TSnapRangeArray *U = NULL; - TSnapRangeArray *V = NULL; - TSnapRangeArray *X = taosDupSnapRangeArray(snapR); - TSnapRangeArray *Y = taosDupSnapRangeArray(pExclude); - int32_t code = -1; - - // separate intersections of snap ranges - U = taosMemoryCalloc(1, sizeof(TSnapRangeArray)); - if (U == NULL) { - terrno = TSDB_CODE_OUT_OF_MEMORY; - goto _out; - } - TARRAY2_INIT(U); - - V = taosMemoryCalloc(1, sizeof(TSnapRangeArray)); - if (V == NULL) { - terrno = TSDB_CODE_OUT_OF_MEMORY; - goto _out; - } - TARRAY2_INIT(V); - - int32_t i = 0; - int32_t j = 0; - while (i < TARRAY2_SIZE(X) && j < TARRAY2_SIZE(Y)) { - STSnapRange *x = TARRAY2_GET(X, i); - STSnapRange *y = TARRAY2_GET(Y, j); - - if (x->fid < y->fid) { - STSnapRange *tmp = taosDupSnapRange(x); - TARRAY2_APPEND(U, tmp); - i++; - } else if (x->fid > y->fid) { - STSnapRange *tmp = taosDupSnapRange(y); - TARRAY2_APPEND(V, tmp); - j++; - } else { - if (x->sver < y->sver) { - if (x->ever < y->ever) { - STSnapRange *tmp = taosDupSnapRange(x); - TARRAY2_APPEND(U, tmp); - i++; - } else { - STSnapRange *tmp = taosDupSnapRange(x); - tmp->ever = y->sver - 1; - TARRAY2_APPEND(U, tmp); - x->sver = y->sver; - } - } else if (x->sver > y->sver) { - if (y->ever < x->ever) { - STSnapRange *tmp = taosDupSnapRange(y); - TARRAY2_APPEND(V, tmp); - j++; - } else { - STSnapRange *tmp = taosDupSnapRange(y); - tmp->ever = x->sver - 1; - TARRAY2_APPEND(V, tmp); - y->sver = x->sver; - } - } else { - if (x->ever < y->ever) { - STSnapRange *tmp = taosDupSnapRange(x); - TARRAY2_APPEND(U, tmp); - i++; - tmp = taosDupSnapRange(y); - tmp->ever = x->ever; - TARRAY2_APPEND(V, tmp); - y->sver = x->ever + 1; - } else if (x->ever > y->ever) { - STSnapRange *tmp = taosDupSnapRange(y); - TARRAY2_APPEND(V, tmp); - j++; - tmp = taosDupSnapRange(x); - tmp->ever = y->ever; - TARRAY2_APPEND(U, tmp); - x->sver = y->ever + 1; - } else { - STSnapRange *tmp = taosDupSnapRange(x); - TARRAY2_APPEND(U, tmp); - i++; - tmp = taosDupSnapRange(y); - TARRAY2_APPEND(V, tmp); - j++; - } - } - } - } - while (i < TARRAY2_SIZE(X)) { - STSnapRange *x = TARRAY2_GET(X, i); - STSnapRange *tmp = taosDupSnapRange(x); - TARRAY2_APPEND(U, tmp); - i++; - } - while (j < TARRAY2_SIZE(Y)) { - STSnapRange *y = TARRAY2_GET(Y, j); - STSnapRange *tmp = taosDupSnapRange(y); - TARRAY2_APPEND(V, tmp); - j++; - } - - // difference of snap ranges - Z = taosMemoryCalloc(1, sizeof(*Z)); - if (Z == NULL) { - terrno = TSDB_CODE_OUT_OF_MEMORY; - goto _out; - } - TARRAY2_INIT(Z); - - i = 0; - j = 0; - while (i < TARRAY2_SIZE(U) && j < TARRAY2_SIZE(V)) { - STSnapRange *u = TARRAY2_GET(U, i); - STSnapRange *v = TARRAY2_GET(V, j); - - if (u->fid < v->fid) { - STSnapRange *tmp = taosDupSnapRange(u); - TARRAY2_APPEND(Z, tmp); - i++; - } else if (u->fid == v->fid) { - if (u->sver < v->sver) { - STSnapRange *tmp = taosDupSnapRange(u); - TARRAY2_APPEND(Z, tmp); - i++; - } else if (u->sver > v->sver) { - ASSERT(u->ever > v->ever); - j++; - } else { - ASSERT(u->ever == v->ever); - i++; - j++; - } - } - } - while (i < TARRAY2_SIZE(U)) { - STSnapRange *u = TARRAY2_GET(U, i); - STSnapRange *tmp = taosDupSnapRange(u); - TARRAY2_APPEND(Z, tmp); - i++; - } - - code = 0; -_out: - TSnapRangeArray **ppArrs[4] = {&X, &Y, &U, &V}; - int len = sizeof(ppArrs) / sizeof(ppArrs[0]); - for (int k = 0; k < len; k++) { - if (ppArrs[k][0] == NULL) continue; - TARRAY2_DESTROY(ppArrs[k][0], tsdbTSnapRangeClear); - taosMemoryFree(ppArrs[k][0]); - ppArrs[k][0] = NULL; - } - if (code != 0 && Z) { - TARRAY2_DESTROY(Z, tsdbTSnapRangeClear); - taosMemoryFree(Z); - Z = NULL; - } - return Z; -} - int32_t tsdbFSCreateRefRangedSnapshot(STFileSystem *fs, int64_t sver, int64_t ever, TSnapRangeArray *pRanges, TSnapRangeArray **fsrArr) { int32_t code = -1; diff --git a/source/dnode/vnode/src/tsdb/tsdbSnapshot.c b/source/dnode/vnode/src/tsdb/tsdbSnapshot.c index be88f4e671..de1ea27c4d 100644 --- a/source/dnode/vnode/src/tsdb/tsdbSnapshot.c +++ b/source/dnode/vnode/src/tsdb/tsdbSnapshot.c @@ -1158,7 +1158,201 @@ _exit: return code; } -int32_t tsdbSnapGetInfo(STsdb* pTsdb, SSnapshot* pSnap) { - // TODO: get the full and diff info of tsdb Snap +static int32_t tsdbTSnapRangeCmprFn(STSnapRange* fsr1, STSnapRange* fsr2) { + if (fsr1->fid < fsr2->fid) return -1; + if (fsr1->fid > fsr2->fid) return 1; return 0; } + +static int32_t tsdbTFileInsertSnapRange(STFile* f, TSnapRangeArray* snapR) { + STSnapRange* fsr = taosMemoryCalloc(1, sizeof(*fsr)); + if (fsr == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + return -1; + } + fsr->fid = f->fid; + fsr->sver = f->minVer; + fsr->ever = f->maxVer; + + int32_t code = TARRAY2_SORT_INSERT(snapR, fsr, tsdbTSnapRangeCmprFn); + if (code) { + taosMemoryFree(fsr); + fsr = NULL; + } + return code; +} + +static int32_t tsdbTFSetInsertSnapRange(STFileSet* fset, TSnapRangeArray* snapR) { + STFile tf = {.fid = fset->fid, .minVer = VERSION_MAX, .maxVer = VERSION_MIN}; + for (int32_t ftype = TSDB_FTYPE_MIN; ftype < TSDB_FTYPE_MAX; ++ftype) { + if (fset->farr[ftype] == NULL) continue; + STFile* f = fset->farr[ftype]->f; + tsdbTFileUpdVerRange(&tf, (SVersionRange){.minVer = f->minVer, .maxVer = f->maxVer}); + } + + const SSttLvl* lvl; + TARRAY2_FOREACH(fset->lvlArr, lvl) { + STFileObj* fobj; + TARRAY2_FOREACH(lvl->fobjArr, fobj) { + tsdbTFileUpdVerRange(&tf, (SVersionRange){.minVer = fobj->f->minVer, .maxVer = fobj->f->maxVer}); + } + } + + int32_t code = tsdbTFileInsertSnapRange(&tf, snapR); + if (code) return code; + return code; +} + +static TSnapRangeArray* tsdbGetSnapRangeArray(STFileSystem* fs) { + int32_t code = 0; + TSnapRangeArray* snapR = taosMemoryCalloc(1, sizeof(*snapR)); + if (snapR == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + return NULL; + } + TARRAY2_INIT(snapR); + + taosThreadRwlockRdlock(&fs->tsdb->rwLock); + STFileSet* fset; + TARRAY2_FOREACH(fs->fSetArr, fset) { + code = tsdbTFSetInsertSnapRange(fset, snapR); + if (code) break; + } + taosThreadRwlockUnlock(&fs->tsdb->rwLock); + + if (code) { + TARRAY2_DESTROY(snapR, tsdbTSnapRangeClear); + taosMemoryFree(snapR); + snapR = NULL; + } + return snapR; +} + +int32_t tSerializeSnapRangeArray(void* buf, int32_t bufLen, TSnapRangeArray* pSnapR) { + SEncoder encoder = {0}; + tEncoderInit(&encoder, buf, bufLen); + + int8_t msgVer = 1; + int32_t arrLen = TARRAY2_SIZE(pSnapR); + int8_t reserved8 = 0; + if (tStartEncode(&encoder) < 0) goto _err; + if (tEncodeI8(&encoder, msgVer) < 0) goto _err; + if (tEncodeI8(&encoder, reserved8) < 0) goto _err; + if (tEncodeI32(&encoder, arrLen) < 0) goto _err; + + int64_t reserved64 = 0; + for (int32_t i = 0; i < arrLen; i++) { + STSnapRange* u = TARRAY2_GET(pSnapR, i); + int64_t fid = u->fid; + if (tEncodeI64(&encoder, fid) < 0) goto _err; + if (tEncodeI64(&encoder, u->sver) < 0) goto _err; + if (tEncodeI64(&encoder, u->ever) < 0) goto _err; + if (tEncodeI64(&encoder, reserved64) < 0) goto _err; + } + + tEndEncode(&encoder); + int32_t tlen = encoder.pos; + tEncoderClear(&encoder); + return tlen; + +_err: + tEncoderClear(&encoder); + return -1; +} + +int32_t tDeserializeSnapRangeArray(void* buf, int32_t bufLen, TSnapRangeArray* pSnapR) { + SDecoder decoder = {0}; + tDecoderInit(&decoder, buf, bufLen); + + int8_t msgVer = 0; + int32_t arrLen = 0; + int8_t reserved8 = 0; + if (tStartDecode(&decoder) < 0) goto _err; + if (tDecodeI8(&decoder, &msgVer) < 0) goto _err; + if (tDecodeI8(&decoder, &reserved8) < 0) goto _err; + if (tDecodeI32(&decoder, &arrLen) < 0) goto _err; + + int64_t fid = 0; + int64_t reserved64 = 0; + STSnapRange* pRange = NULL; + for (int32_t i = 0; i < arrLen; i++) { + pRange = taosMemoryCalloc(1, sizeof(STSnapRange)); + if (tDecodeI64(&decoder, &fid) < 0) goto _err; + pRange->fid = fid; + if (tDecodeI64(&decoder, &pRange->sver) < 0) goto _err; + if (tDecodeI64(&decoder, &pRange->ever) < 0) goto _err; + if (tDecodeI64(&decoder, &reserved64) < 0) goto _err; + TARRAY2_APPEND(pSnapR, pRange); + pRange = NULL; + } + + tEndDecode(&decoder); + tDecoderClear(&decoder); + return 0; + +_err: + if (pRange) { + taosMemoryFree(pRange); + pRange = NULL; + } + tDecoderClear(&decoder); + return -1; +} + +void tsdbSnapRangeArrayDestroy(TSnapRangeArray** ppSnap) { + TARRAY2_DESTROY(ppSnap[0], tsdbTSnapRangeClear); + taosMemoryFree(ppSnap[0]); + ppSnap[0] = NULL; +} + +static int32_t tsdbSnapInfoDataLenCalc(TSnapRangeArray* pSnap) { + int32_t headerLen = 8; + int32_t itemLen = sizeof(STSnapRange) + 8; + int32_t size = TARRAY2_SIZE(pSnap); + return headerLen + itemLen * size; +} + +int32_t tsdbSnapGetInfo(STsdb* pTsdb, SSnapshot* pSnap) { + int32_t code = 0; + if (pSnap->typ == TAOS_SYNC_SNAP_INFO_BRIEF) { + return 0; + } + code = -1; + TSnapRangeArray* snapR = tsdbGetSnapRangeArray(pTsdb->pFS); + if (snapR == NULL) { + goto _out; + } + if (pSnap->typ == TAOS_SYNC_SNAP_INFO_DIFF) { + for (int32_t i = 0; i < TARRAY2_SIZE(snapR); i++) { + STSnapRange* u = TARRAY2_GET(snapR, i); + u->sver = u->ever + 1; + u->ever = VERSION_MAX; + } + } + + int32_t bufLen = sizeof(SMsgHead) + tsdbSnapInfoDataLenCalc(snapR); + void* data = taosMemoryRealloc(pSnap->data, bufLen); + if (data == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + goto _out; + } + pSnap->data = data; + void* buf = ((char*)data) + sizeof(SMsgHead); + int32_t tlen = 0; + + if ((tlen = tSerializeSnapRangeArray(buf, bufLen, snapR)) < 0) { + tsdbError("vgId:%d, failed to serialize snap range since %s", TD_VID(pTsdb->pVnode), terrstr()); + goto _out; + } + SMsgHead* msgHead = pSnap->data; + msgHead->contLen = tlen; + msgHead->vgId = TD_VID(pTsdb->pVnode); + + code = 0; +_out: + if (snapR) { + tsdbSnapRangeArrayDestroy(&snapR); + } + + return code; +} diff --git a/source/dnode/vnode/src/vnd/vnodeSnapshot.c b/source/dnode/vnode/src/vnd/vnodeSnapshot.c index 5934826ea4..418d8632c9 100644 --- a/source/dnode/vnode/src/vnd/vnodeSnapshot.c +++ b/source/dnode/vnode/src/vnd/vnodeSnapshot.c @@ -63,7 +63,22 @@ int32_t vnodeSnapReaderOpen(SVnode *pVnode, SSnapshotParam *pParam, SVSnapReader pReader->sver = sver; pReader->ever = ever; - // TODO: decode pParam->data and store the result in pReader->pRanges + if (pParam->data) { + pReader->pRanges = taosMemoryCalloc(1, sizeof(*pReader->pRanges)); + if (pReader->pRanges == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + goto _err; + } + TARRAY2_INIT(pReader->pRanges); + SMsgHead *msgHead = pParam->data; + ASSERT(msgHead->vgId == TD_VID(pVnode)); + void *buf = (char *)pParam->data + sizeof(SMsgHead); + + if (tDeserializeSnapRangeArray(buf, msgHead->contLen, pReader->pRanges) < 0) { + vError("vgId:%d, failed to deserialize snap range.", TD_VID(pVnode)); + goto _err; + } + } vInfo("vgId:%d, vnode snapshot reader opened, sver:%" PRId64 " ever:%" PRId64, TD_VID(pVnode), sver, ever); *ppReader = pReader; @@ -101,6 +116,9 @@ void vnodeSnapReaderClose(SVSnapReader *pReader) { tqCheckInfoReaderClose(&pReader->pTqCheckInfoReader); } + if (pReader->pRanges) { + tsdbSnapRangeArrayDestroy(&pReader->pRanges); + } taosMemoryFree(pReader); } From f99795d027f8a1ed95abed40787735bb321cbfe5 Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Wed, 13 Sep 2023 09:16:08 +0800 Subject: [PATCH 28/61] enh: use tsdbSnapRangeArrayDestroy --- source/dnode/vnode/src/tsdb/tsdbFS2.c | 9 --------- source/dnode/vnode/src/tsdb/tsdbSnapshot.c | 12 +++++++----- 2 files changed, 7 insertions(+), 14 deletions(-) diff --git a/source/dnode/vnode/src/tsdb/tsdbFS2.c b/source/dnode/vnode/src/tsdb/tsdbFS2.c index 01d423da2c..d71473b079 100644 --- a/source/dnode/vnode/src/tsdb/tsdbFS2.c +++ b/source/dnode/vnode/src/tsdb/tsdbFS2.c @@ -1037,15 +1037,6 @@ _out: return code; } -int32_t tsdbFSDestroyRefRangedSnapshot(TSnapRangeArray **fsrArr) { - if (fsrArr[0]) { - TARRAY2_DESTROY(fsrArr[0], tsdbTSnapRangeClear); - taosMemoryFreeClear(fsrArr[0]); - fsrArr[0] = NULL; - } - return 0; -} - const char *gFSBgTaskName[] = {NULL, "MERGE", "RETENTION", "COMPACT"}; static int32_t tsdbFSRunBgTask(void *arg) { diff --git a/source/dnode/vnode/src/tsdb/tsdbSnapshot.c b/source/dnode/vnode/src/tsdb/tsdbSnapshot.c index de1ea27c4d..3e18f01f04 100644 --- a/source/dnode/vnode/src/tsdb/tsdbSnapshot.c +++ b/source/dnode/vnode/src/tsdb/tsdbSnapshot.c @@ -432,7 +432,7 @@ _exit: if (code) { tsdbError("vgId:%d %s failed at line %d since %s, sver:%" PRId64 " ever:%" PRId64 " type:%d", TD_VID(tsdb->pVnode), __func__, lino, tstrerror(code), sver, ever, type); - tsdbFSDestroyRefRangedSnapshot(&reader[0]->fsrArr); + tsdbSnapRangeArrayDestroy(&reader[0]->fsrArr); taosMemoryFree(reader[0]); reader[0] = NULL; } else { @@ -460,7 +460,7 @@ int32_t tsdbSnapReaderClose(STsdbSnapReader** reader) { TARRAY2_DESTROY(reader[0]->sttReaderArr, tsdbSttFileReaderClose); tsdbDataFileReaderClose(&reader[0]->dataReader); - tsdbFSDestroyRefRangedSnapshot(&reader[0]->fsrArr); + tsdbSnapRangeArrayDestroy(&reader[0]->fsrArr); tDestroyTSchema(reader[0]->skmTb->pTSchema); for (int32_t i = 0; i < ARRAY_SIZE(reader[0]->aBuf); ++i) { @@ -1300,9 +1300,11 @@ _err: } void tsdbSnapRangeArrayDestroy(TSnapRangeArray** ppSnap) { - TARRAY2_DESTROY(ppSnap[0], tsdbTSnapRangeClear); - taosMemoryFree(ppSnap[0]); - ppSnap[0] = NULL; + if (ppSnap && ppSnap[0]) { + TARRAY2_DESTROY(ppSnap[0], tsdbTSnapRangeClear); + taosMemoryFree(ppSnap[0]); + ppSnap[0] = NULL; + } } static int32_t tsdbSnapInfoDataLenCalc(TSnapRangeArray* pSnap) { From 410ced8320c554fffec525c78bb35ffb05968a65 Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Thu, 14 Sep 2023 18:22:19 +0800 Subject: [PATCH 29/61] feat: use TLV format to encode data of snapshot info --- include/common/tmsgdef.h | 4 +- include/libs/sync/sync.h | 6 + source/dnode/mgmt/mgmt_mnode/src/mmHandle.c | 4 +- source/dnode/mgmt/mgmt_vnode/src/vmHandle.c | 4 +- source/dnode/vnode/src/inc/tsdb.h | 27 ++ source/dnode/vnode/src/tsdb/tsdbFSet2.h | 1 + source/dnode/vnode/src/tsdb/tsdbSnapshot.c | 370 ++++++++++++++------ source/dnode/vnode/src/vnd/vnodeSnapshot.c | 46 ++- source/libs/sync/src/syncMessage.c | 36 -- source/libs/sync/src/syncSnapshot.c | 41 ++- 10 files changed, 371 insertions(+), 168 deletions(-) diff --git a/include/common/tmsgdef.h b/include/common/tmsgdef.h index 4a2ae18765..b92bba831c 100644 --- a/include/common/tmsgdef.h +++ b/include/common/tmsgdef.h @@ -299,8 +299,8 @@ enum { // WARN: new msg should be appended to segment tail TD_DEF_MSG_TYPE(TDMT_SYNC_HEARTBEAT, "sync-heartbeat", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_SYNC_HEARTBEAT_REPLY, "sync-heartbeat-reply", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_SYNC_LOCAL_CMD, "sync-local-cmd", NULL, NULL) - TD_DEF_MSG_TYPE(TDMT_SYNC_PRE_SNAPSHOT, "sync-pre-snapshot", NULL, NULL) // no longer used - TD_DEF_MSG_TYPE(TDMT_SYNC_PRE_SNAPSHOT_REPLY, "sync-pre-snapshot-reply", NULL, NULL) // no longer used + TD_DEF_MSG_TYPE(TDMT_SYNC_PREP_SNAPSHOT, "sync-prep-snapshot", NULL, NULL) + TD_DEF_MSG_TYPE(TDMT_SYNC_PREP_SNAPSHOT_REPLY, "sync-prep-snapshot-reply", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_SYNC_MAX_MSG, "sync-max", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_SYNC_FORCE_FOLLOWER, "sync-force-become-follower", NULL, NULL) diff --git a/include/libs/sync/sync.h b/include/libs/sync/sync.h index 53e6ec0d71..cc381bb54e 100644 --- a/include/libs/sync/sync.h +++ b/include/libs/sync/sync.h @@ -101,6 +101,12 @@ typedef struct SNodeInfo { ESyncRole nodeRole; } SNodeInfo; +typedef struct SSyncTLV { + int32_t typ; + int32_t len; + char val[]; +} SSyncTLV; + typedef struct SSyncCfg { int32_t totalReplicaNum; int32_t replicaNum; diff --git a/source/dnode/mgmt/mgmt_mnode/src/mmHandle.c b/source/dnode/mgmt/mgmt_mnode/src/mmHandle.c index 24b5b2566c..d5488da770 100644 --- a/source/dnode/mgmt/mgmt_mnode/src/mmHandle.c +++ b/source/dnode/mgmt/mgmt_mnode/src/mmHandle.c @@ -238,7 +238,7 @@ SArray *mmGetMsgHandles() { if (dmSetMgmtHandle(pArray, TDMT_SYNC_APPEND_ENTRIES_BATCH, mmPutMsgToSyncQueue, 1) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_SYNC_APPEND_ENTRIES_REPLY, mmPutMsgToSyncQueue, 1) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_SYNC_SNAPSHOT_SEND, mmPutMsgToSyncQueue, 1) == NULL) goto _OVER; - if (dmSetMgmtHandle(pArray, TDMT_SYNC_PRE_SNAPSHOT, mmPutMsgToSyncQueue, 1) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_SYNC_PREP_SNAPSHOT, mmPutMsgToSyncQueue, 1) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_SYNC_FORCE_FOLLOWER_RSP, mmPutMsgToWriteQueue, 0) == NULL) goto _OVER; @@ -246,7 +246,7 @@ SArray *mmGetMsgHandles() { if (dmSetMgmtHandle(pArray, TDMT_SYNC_HEARTBEAT, mmPutMsgToSyncRdQueue, 1) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_SYNC_HEARTBEAT_REPLY, mmPutMsgToSyncRdQueue, 1) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_SYNC_SNAPSHOT_RSP, mmPutMsgToSyncRdQueue, 1) == NULL) goto _OVER; - if (dmSetMgmtHandle(pArray, TDMT_SYNC_PRE_SNAPSHOT_REPLY, mmPutMsgToSyncRdQueue, 1) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_SYNC_PREP_SNAPSHOT_REPLY, mmPutMsgToSyncRdQueue, 1) == NULL) goto _OVER; code = 0; diff --git a/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c b/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c index 0e17d2b75f..b4fe824466 100644 --- a/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c +++ b/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c @@ -848,14 +848,14 @@ SArray *vmGetMsgHandles() { if (dmSetMgmtHandle(pArray, TDMT_SYNC_APPEND_ENTRIES_BATCH, vmPutMsgToSyncQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_SYNC_APPEND_ENTRIES_REPLY, vmPutMsgToSyncQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_SYNC_SNAPSHOT_SEND, vmPutMsgToSyncQueue, 0) == NULL) goto _OVER; - if (dmSetMgmtHandle(pArray, TDMT_SYNC_PRE_SNAPSHOT, vmPutMsgToSyncQueue, 0) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_SYNC_PREP_SNAPSHOT, vmPutMsgToSyncQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_SYNC_FORCE_FOLLOWER, vmPutMsgToSyncQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_SYNC_TIMEOUT, vmPutMsgToSyncRdQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_SYNC_HEARTBEAT, vmPutMsgToSyncRdQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_SYNC_HEARTBEAT_REPLY, vmPutMsgToSyncRdQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_SYNC_SNAPSHOT_RSP, vmPutMsgToSyncRdQueue, 0) == NULL) goto _OVER; - if (dmSetMgmtHandle(pArray, TDMT_SYNC_PRE_SNAPSHOT_REPLY, vmPutMsgToSyncRdQueue, 0) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_SYNC_PREP_SNAPSHOT_REPLY, vmPutMsgToSyncRdQueue, 0) == NULL) goto _OVER; code = 0; diff --git a/source/dnode/vnode/src/inc/tsdb.h b/source/dnode/vnode/src/inc/tsdb.h index ffd74dc3d1..4cea7c5e85 100644 --- a/source/dnode/vnode/src/inc/tsdb.h +++ b/source/dnode/vnode/src/inc/tsdb.h @@ -681,6 +681,33 @@ int32_t tSerializeSnapRangeArray(void *buf, int32_t bufLen, TSnapRangeArray *pSn int32_t tDeserializeSnapRangeArray(void *buf, int32_t bufLen, TSnapRangeArray *pSnapR); void tsdbSnapRangeArrayDestroy(TSnapRangeArray **ppSnap); +// snap partition list +typedef TARRAY2(SVersionRange) SVerRangeList; +typedef struct STsdbSnapPartition STsdbSnapPartition; +typedef TARRAY2(STsdbSnapPartition *) STsdbSnapPartList; +// util +STsdbSnapPartList *tsdbSnapPartListCreate(); +void tsdbSnapPartListDestroy(STsdbSnapPartList **ppList); +int32_t tSerializeTsdbSnapPartList(void *buf, int32_t bufLen, STsdbSnapPartList *pList); +int32_t tDeserializeTsdbSnapPartList(void *buf, int32_t bufLen, STsdbSnapPartList *pList); +int32_t tsdbSnapPartListToRangeDiff(STsdbSnapPartList *pList, TSnapRangeArray **ppRanges); + +enum { + TSDB_SNAP_RANGE_TYP_HEAD = 0, + TSDB_SNAP_RANGE_TYP_DATA, + TSDB_SNAP_RANGE_TYP_SMA, + TSDB_SNAP_RANGE_TYP_TOMB, + TSDB_SNAP_RANGE_TYP_STT, + TSDB_SNAP_RANGE_TYP_MAX, +}; + +struct STsdbSnapPartition { + int64_t fid; + int8_t stat; + SVerRangeList verRanges[TSDB_SNAP_RANGE_TYP_MAX]; +}; + +// snap read struct STsdbReadSnap { SMemTable *pMem; SQueryNode *pNode; diff --git a/source/dnode/vnode/src/tsdb/tsdbFSet2.h b/source/dnode/vnode/src/tsdb/tsdbFSet2.h index 756250157b..e6d78a8cfe 100644 --- a/source/dnode/vnode/src/tsdb/tsdbFSet2.h +++ b/source/dnode/vnode/src/tsdb/tsdbFSet2.h @@ -81,6 +81,7 @@ struct SSttLvl { struct STFileSet { int32_t fid; + int8_t stat; STFileObj *farr[TSDB_FTYPE_MAX]; // file array TSttLvlArray lvlArr[1]; // level array }; diff --git a/source/dnode/vnode/src/tsdb/tsdbSnapshot.c b/source/dnode/vnode/src/tsdb/tsdbSnapshot.c index 3e18f01f04..9710eee6c4 100644 --- a/source/dnode/vnode/src/tsdb/tsdbSnapshot.c +++ b/source/dnode/vnode/src/tsdb/tsdbSnapshot.c @@ -1158,96 +1158,198 @@ _exit: return code; } -static int32_t tsdbTSnapRangeCmprFn(STSnapRange* fsr1, STSnapRange* fsr2) { - if (fsr1->fid < fsr2->fid) return -1; - if (fsr1->fid > fsr2->fid) return 1; +// snap part +static int32_t tsdbSnapPartCmprFn(STsdbSnapPartition* x, STsdbSnapPartition* y) { + if (x->fid < y->fid) return -1; + if (x->fid > y->fid) return 1; return 0; } -static int32_t tsdbTFileInsertSnapRange(STFile* f, TSnapRangeArray* snapR) { - STSnapRange* fsr = taosMemoryCalloc(1, sizeof(*fsr)); - if (fsr == NULL) { - terrno = TSDB_CODE_OUT_OF_MEMORY; - return -1; - } - fsr->fid = f->fid; - fsr->sver = f->minVer; - fsr->ever = f->maxVer; - - int32_t code = TARRAY2_SORT_INSERT(snapR, fsr, tsdbTSnapRangeCmprFn); - if (code) { - taosMemoryFree(fsr); - fsr = NULL; - } - return code; +static int32_t tVersionRangeCmprFn(SVersionRange* x, SVersionRange* y) { + if (x->minVer < y->minVer) return -1; + if (x->minVer > y->minVer) return 1; + if (x->maxVer < y->maxVer) return -1; + if (x->maxVer > y->maxVer) return 1; + return 0; } -static int32_t tsdbTFSetInsertSnapRange(STFileSet* fset, TSnapRangeArray* snapR) { - STFile tf = {.fid = fset->fid, .minVer = VERSION_MAX, .maxVer = VERSION_MIN}; - for (int32_t ftype = TSDB_FTYPE_MIN; ftype < TSDB_FTYPE_MAX; ++ftype) { - if (fset->farr[ftype] == NULL) continue; - STFile* f = fset->farr[ftype]->f; - tsdbTFileUpdVerRange(&tf, (SVersionRange){.minVer = f->minVer, .maxVer = f->maxVer}); +STsdbSnapPartition* tsdbSnapPartitionCreate() { + STsdbSnapPartition* pSP = taosMemoryCalloc(1, sizeof(STsdbSnapPartition)); + if (pSP == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + return NULL; + } + for (int32_t i = 0; i < TSDB_SNAP_RANGE_TYP_MAX; i++) { + TARRAY2_INIT(&pSP->verRanges[i]); + } + return pSP; +} + +void tsdbSnapPartitionClear(STsdbSnapPartition** ppSP) { + if (ppSP == NULL || ppSP[0] == NULL) { + return; + } + for (int32_t i = 0; i < TSDB_SNAP_RANGE_TYP_MAX; i++) { + TARRAY2_DESTROY(&ppSP[0]->verRanges[i], NULL); + } + taosMemoryFree(ppSP[0]); + ppSP[0] = NULL; +} + +static int32_t tsdbFTypeToSRangeTyp(tsdb_ftype_t ftype) { + switch (ftype) { + case TSDB_FTYPE_HEAD: + return TSDB_SNAP_RANGE_TYP_HEAD; + case TSDB_FTYPE_DATA: + return TSDB_SNAP_RANGE_TYP_DATA; + case TSDB_FTYPE_SMA: + return TSDB_SNAP_RANGE_TYP_SMA; + case TSDB_FTYPE_TOMB: + return TSDB_SNAP_RANGE_TYP_TOMB; + case TSDB_FTYPE_STT: + return TSDB_SNAP_RANGE_TYP_STT; + } + return TSDB_SNAP_RANGE_TYP_MAX; +} + +static int32_t tsdbTFileSetToSnapPart(STFileSet* fset, STsdbSnapPartition** ppSP) { + STsdbSnapPartition* p = tsdbSnapPartitionCreate(); + if (p == NULL) { + goto _err; } + int32_t typ = 0; + for (int32_t ftype = TSDB_FTYPE_MIN; ftype < TSDB_FTYPE_MAX; ++ftype) { + if (fset->farr[ftype] == NULL) continue; + typ = tsdbFTypeToSRangeTyp(ftype); + ASSERT(typ < TSDB_SNAP_RANGE_TYP_MAX); + STFile* f = fset->farr[ftype]->f; + SVersionRange vr = {.minVer = f->minVer, .maxVer = f->maxVer}; + TARRAY2_SORT_INSERT(&p->verRanges[typ], vr, tVersionRangeCmprFn); + } + + typ = TSDB_SNAP_RANGE_TYP_STT; const SSttLvl* lvl; TARRAY2_FOREACH(fset->lvlArr, lvl) { STFileObj* fobj; TARRAY2_FOREACH(lvl->fobjArr, fobj) { - tsdbTFileUpdVerRange(&tf, (SVersionRange){.minVer = fobj->f->minVer, .maxVer = fobj->f->maxVer}); + STFile* f = fobj->f; + SVersionRange vr = {.minVer = f->minVer, .maxVer = f->maxVer}; + TARRAY2_SORT_INSERT(&p->verRanges[typ], vr, tVersionRangeCmprFn); } } + ppSP[0] = p; + return 0; - int32_t code = tsdbTFileInsertSnapRange(&tf, snapR); - if (code) return code; - return code; +_err: + tsdbSnapPartitionClear(&p); + return -1; } -static TSnapRangeArray* tsdbGetSnapRangeArray(STFileSystem* fs) { - int32_t code = 0; - TSnapRangeArray* snapR = taosMemoryCalloc(1, sizeof(*snapR)); - if (snapR == NULL) { +STsdbSnapPartList* tsdbSnapPartListCreate() { + STsdbSnapPartList* pList = taosMemoryCalloc(1, sizeof(STsdbSnapPartList)); + if (pList == NULL) { terrno = TSDB_CODE_OUT_OF_MEMORY; return NULL; } - TARRAY2_INIT(snapR); + TARRAY2_INIT(pList); + return pList; +} +static STsdbSnapPartList* tsdbGetSnapPartList(STFileSystem* fs) { + STsdbSnapPartList* pList = tsdbSnapPartListCreate(); + if (pList == NULL) { + return NULL; + } + + int32_t code = 0; taosThreadRwlockRdlock(&fs->tsdb->rwLock); STFileSet* fset; TARRAY2_FOREACH(fs->fSetArr, fset) { - code = tsdbTFSetInsertSnapRange(fset, snapR); - if (code) break; + STsdbSnapPartition* pItem = NULL; + if (tsdbTFileSetToSnapPart(fset, &pItem) < 0) { + code = -1; + break; + } + ASSERT(pItem != NULL); + TARRAY2_SORT_INSERT(pList, pItem, tsdbSnapPartCmprFn); } taosThreadRwlockUnlock(&fs->tsdb->rwLock); if (code) { - TARRAY2_DESTROY(snapR, tsdbTSnapRangeClear); - taosMemoryFree(snapR); - snapR = NULL; + TARRAY2_DESTROY(pList, tsdbSnapPartitionClear); + taosMemoryFree(pList); + pList = NULL; } - return snapR; + return pList; } -int32_t tSerializeSnapRangeArray(void* buf, int32_t bufLen, TSnapRangeArray* pSnapR) { +int32_t tTsdbSnapPartListDataLenCalc(STsdbSnapPartList* pList) { + int32_t hdrLen = sizeof(int32_t); + int32_t datLen = 0; + + int8_t msgVer = 1; + int32_t len = TARRAY2_SIZE(pList); + hdrLen += sizeof(msgVer); + hdrLen += sizeof(len); + datLen += hdrLen; + + for (int32_t u = 0; u < len; u++) { + STsdbSnapPartition* p = TARRAY2_GET(pList, u); + int32_t typMax = TSDB_SNAP_RANGE_TYP_MAX; + int32_t uItem = 0; + uItem += sizeof(STsdbSnapPartition); + uItem += sizeof(typMax); + + for (int32_t i = 0; i < typMax; i++) { + int32_t iLen = TARRAY2_SIZE(&p->verRanges[i]); + int32_t jItem = 0; + jItem += sizeof(SVersionRange); + jItem += sizeof(int64_t); + uItem += sizeof(iLen) + jItem * iLen; + } + datLen += uItem; + } + return datLen; +} + +int32_t tSerializeTsdbSnapPartList(void* buf, int32_t bufLen, STsdbSnapPartList* pList) { SEncoder encoder = {0}; tEncoderInit(&encoder, buf, bufLen); - int8_t msgVer = 1; - int32_t arrLen = TARRAY2_SIZE(pSnapR); int8_t reserved8 = 0; + int16_t reserved16 = 0; + int64_t reserved64 = 0; + + int8_t msgVer = 1; + int32_t len = TARRAY2_SIZE(pList); + if (tStartEncode(&encoder) < 0) goto _err; if (tEncodeI8(&encoder, msgVer) < 0) goto _err; - if (tEncodeI8(&encoder, reserved8) < 0) goto _err; - if (tEncodeI32(&encoder, arrLen) < 0) goto _err; + if (tEncodeI32(&encoder, len) < 0) goto _err; - int64_t reserved64 = 0; - for (int32_t i = 0; i < arrLen; i++) { - STSnapRange* u = TARRAY2_GET(pSnapR, i); - int64_t fid = u->fid; - if (tEncodeI64(&encoder, fid) < 0) goto _err; - if (tEncodeI64(&encoder, u->sver) < 0) goto _err; - if (tEncodeI64(&encoder, u->ever) < 0) goto _err; - if (tEncodeI64(&encoder, reserved64) < 0) goto _err; + for (int32_t u = 0; u < len; u++) { + STsdbSnapPartition* p = TARRAY2_GET(pList, u); + if (tEncodeI64(&encoder, p->fid) < 0) goto _err; + if (tEncodeI8(&encoder, p->stat) < 0) goto _err; + if (tEncodeI8(&encoder, reserved8) < 0) goto _err; + if (tEncodeI16(&encoder, reserved16) < 0) goto _err; + + int32_t typMax = TSDB_SNAP_RANGE_TYP_MAX; + if (tEncodeI32(&encoder, typMax) < 0) goto _err; + + for (int32_t i = 0; i < typMax; i++) { + SVerRangeList* iList = &p->verRanges[i]; + int32_t iLen = TARRAY2_SIZE(iList); + + if (tEncodeI32(&encoder, iLen) < 0) goto _err; + for (int32_t j = 0; j < iLen; j++) { + SVersionRange r = TARRAY2_GET(iList, j); + if (tEncodeI64(&encoder, r.minVer) < 0) goto _err; + if (tEncodeI64(&encoder, r.maxVer) < 0) goto _err; + if (tEncodeI64(&encoder, reserved64) < 0) goto _err; + } + } } tEndEncode(&encoder); @@ -1260,30 +1362,47 @@ _err: return -1; } -int32_t tDeserializeSnapRangeArray(void* buf, int32_t bufLen, TSnapRangeArray* pSnapR) { +int32_t tDeserializeTsdbSnapPartList(void* buf, int32_t bufLen, STsdbSnapPartList* pList) { SDecoder decoder = {0}; tDecoderInit(&decoder, buf, bufLen); - int8_t msgVer = 0; - int32_t arrLen = 0; int8_t reserved8 = 0; + int16_t reserved16 = 0; + int64_t reserved64 = 0; + + STsdbSnapPartition* p = NULL; + + int8_t msgVer = 0; + int32_t len = 0; if (tStartDecode(&decoder) < 0) goto _err; if (tDecodeI8(&decoder, &msgVer) < 0) goto _err; - if (tDecodeI8(&decoder, &reserved8) < 0) goto _err; - if (tDecodeI32(&decoder, &arrLen) < 0) goto _err; + if (tDecodeI32(&decoder, &len) < 0) goto _err; - int64_t fid = 0; - int64_t reserved64 = 0; - STSnapRange* pRange = NULL; - for (int32_t i = 0; i < arrLen; i++) { - pRange = taosMemoryCalloc(1, sizeof(STSnapRange)); - if (tDecodeI64(&decoder, &fid) < 0) goto _err; - pRange->fid = fid; - if (tDecodeI64(&decoder, &pRange->sver) < 0) goto _err; - if (tDecodeI64(&decoder, &pRange->ever) < 0) goto _err; - if (tDecodeI64(&decoder, &reserved64) < 0) goto _err; - TARRAY2_APPEND(pSnapR, pRange); - pRange = NULL; + for (int32_t u = 0; u < len; u++) { + p = tsdbSnapPartitionCreate(); + if (p == NULL) goto _err; + if (tDecodeI64(&decoder, &p->fid) < 0) goto _err; + if (tDecodeI8(&decoder, &p->stat) < 0) goto _err; + if (tDecodeI8(&decoder, &reserved8) < 0) goto _err; + if (tDecodeI16(&decoder, &reserved16) < 0) goto _err; + + int32_t typMax = 0; + if (tDecodeI32(&decoder, &typMax) < 0) goto _err; + + for (int32_t i = 0; i < typMax; i++) { + SVerRangeList* iList = &p->verRanges[i]; + int32_t iLen = 0; + if (tDecodeI32(&decoder, &iLen) < 0) goto _err; + for (int32_t j = 0; j < iLen; j++) { + SVersionRange r = {0}; + if (tDecodeI64(&decoder, &r.minVer) < 0) goto _err; + if (tDecodeI64(&decoder, &r.maxVer) < 0) goto _err; + if (tDecodeI64(&decoder, &reserved64) < 0) goto _err; + TARRAY2_APPEND(iList, r); + } + } + TARRAY2_APPEND(pList, p); + p = NULL; } tEndDecode(&decoder); @@ -1291,14 +1410,52 @@ int32_t tDeserializeSnapRangeArray(void* buf, int32_t bufLen, TSnapRangeArray* p return 0; _err: - if (pRange) { - taosMemoryFree(pRange); - pRange = NULL; + if (p) { + tsdbSnapPartitionClear(&p); } tDecoderClear(&decoder); return -1; } +int32_t tsdbSnapPartListToRangeDiff(STsdbSnapPartList* pList, TSnapRangeArray** ppRanges) { + TSnapRangeArray* pDiff = taosMemoryCalloc(1, sizeof(TSnapRangeArray)); + if (pDiff == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + goto _err; + } + TARRAY2_INIT(pDiff); + + STsdbSnapPartition* part; + TARRAY2_FOREACH(pList, part) { + STSnapRange* r = taosMemoryCalloc(1, sizeof(STSnapRange)); + if (r == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + goto _err; + } + int64_t ever = -1; + int32_t typMax = TSDB_SNAP_RANGE_TYP_MAX; + for (int32_t i = 0; i < typMax; i++) { + SVerRangeList* iList = &part->verRanges[i]; + SVersionRange r = {0}; + TARRAY2_FOREACH(iList, r) { + if (r.maxVer < r.minVer) continue; + ever = TMAX(ever, r.maxVer); + } + } + r->sver = ever + 1; + r->ever = VERSION_MAX; + TARRAY2_APPEND(pDiff, r); + } + ppRanges[0] = pDiff; + return 0; + +_err: + if (pDiff) { + tsdbSnapRangeArrayDestroy(&pDiff); + } + return -1; +} + void tsdbSnapRangeArrayDestroy(TSnapRangeArray** ppSnap) { if (ppSnap && ppSnap[0]) { TARRAY2_DESTROY(ppSnap[0], tsdbTSnapRangeClear); @@ -1307,53 +1464,64 @@ void tsdbSnapRangeArrayDestroy(TSnapRangeArray** ppSnap) { } } -static int32_t tsdbSnapInfoDataLenCalc(TSnapRangeArray* pSnap) { - int32_t headerLen = 8; - int32_t itemLen = sizeof(STSnapRange) + 8; - int32_t size = TARRAY2_SIZE(pSnap); - return headerLen + itemLen * size; +void tsdbSnapPartListDestroy(STsdbSnapPartList** ppList) { + if (ppList == NULL || ppList[0] == NULL) return; + + TARRAY2_DESTROY(ppList[0], tsdbSnapPartitionClear); + taosMemoryFree(ppList[0]); + ppList[0] = NULL; } int32_t tsdbSnapGetInfo(STsdb* pTsdb, SSnapshot* pSnap) { - int32_t code = 0; - if (pSnap->typ == TAOS_SYNC_SNAP_INFO_BRIEF) { + if (pSnap->typ != TDMT_SYNC_PREP_SNAPSHOT && pSnap->typ != TDMT_SYNC_PREP_SNAPSHOT_REPLY) { return 0; } - code = -1; - TSnapRangeArray* snapR = tsdbGetSnapRangeArray(pTsdb->pFS); - if (snapR == NULL) { - goto _out; - } - if (pSnap->typ == TAOS_SYNC_SNAP_INFO_DIFF) { - for (int32_t i = 0; i < TARRAY2_SIZE(snapR); i++) { - STSnapRange* u = TARRAY2_GET(snapR, i); - u->sver = u->ever + 1; - u->ever = VERSION_MAX; - } + int code = -1; + STsdbSnapPartList* pList = tsdbGetSnapPartList(pTsdb->pFS); + if (pList == NULL) goto _out; + + if (pSnap->typ == TDMT_SYNC_PREP_SNAPSHOT_REPLY) { } - int32_t bufLen = sizeof(SMsgHead) + tsdbSnapInfoDataLenCalc(snapR); - void* data = taosMemoryRealloc(pSnap->data, bufLen); + void* buf = NULL; + int32_t tlen = 0; + // estimate data length encode + int32_t bufLen = sizeof(SSyncTLV); // typ: TDMT_SYNC_PREP_SNAPSHOT or TDMT_SYNC_PREP_SNAPSOT_REPLY + bufLen += sizeof(SSyncTLV); // subtyp: SNAP_DATA_TSDB + bufLen += tTsdbSnapPartListDataLenCalc(pList); + + void* data = taosMemoryRealloc(pSnap->data, bufLen); if (data == NULL) { terrno = TSDB_CODE_OUT_OF_MEMORY; goto _out; } pSnap->data = data; - void* buf = ((char*)data) + sizeof(SMsgHead); - int32_t tlen = 0; - if ((tlen = tSerializeSnapRangeArray(buf, bufLen, snapR)) < 0) { + // header + SSyncTLV* datHead = (void*)pSnap->data; + datHead->typ = pSnap->typ; + datHead->len = 0; + + // tsdb + SSyncTLV* tsdbHead = (void*)datHead->val; + tsdbHead->typ = SNAP_DATA_TSDB; + + buf = tsdbHead->val; + tlen = 0; + if ((tlen = tSerializeTsdbSnapPartList(buf, bufLen, pList)) < 0) { tsdbError("vgId:%d, failed to serialize snap range since %s", TD_VID(pTsdb->pVnode), terrstr()); goto _out; } - SMsgHead* msgHead = pSnap->data; - msgHead->contLen = tlen; - msgHead->vgId = TD_VID(pTsdb->pVnode); + tsdbHead->len = tlen; + datHead->len += sizeof(SSyncTLV) + tsdbHead->len; + + // rsma code = 0; + _out: - if (snapR) { - tsdbSnapRangeArrayDestroy(&snapR); + if (pList) { + tsdbSnapPartListDestroy(&pList); } return code; diff --git a/source/dnode/vnode/src/vnd/vnodeSnapshot.c b/source/dnode/vnode/src/vnd/vnodeSnapshot.c index 418d8632c9..c56644ca2c 100644 --- a/source/dnode/vnode/src/vnd/vnodeSnapshot.c +++ b/source/dnode/vnode/src/vnd/vnodeSnapshot.c @@ -45,6 +45,7 @@ struct SVSnapReader { SStreamStateReader *pStreamStateReader; // rsma int8_t rsmaDone; + TSnapRangeArray *pRsmaRanges; SRSmaSnapReader *pRsmaReader; }; @@ -64,19 +65,44 @@ int32_t vnodeSnapReaderOpen(SVnode *pVnode, SSnapshotParam *pParam, SVSnapReader pReader->ever = ever; if (pParam->data) { - pReader->pRanges = taosMemoryCalloc(1, sizeof(*pReader->pRanges)); - if (pReader->pRanges == NULL) { - terrno = TSDB_CODE_OUT_OF_MEMORY; + SSyncTLV *datHead = (void *)pParam->data; + if (datHead->typ != TDMT_SYNC_PREP_SNAPSHOT_REPLY) { + terrno = TSDB_CODE_INVALID_DATA_FMT; goto _err; } - TARRAY2_INIT(pReader->pRanges); - SMsgHead *msgHead = pParam->data; - ASSERT(msgHead->vgId == TD_VID(pVnode)); - void *buf = (char *)pParam->data + sizeof(SMsgHead); - if (tDeserializeSnapRangeArray(buf, msgHead->contLen, pReader->pRanges) < 0) { - vError("vgId:%d, failed to deserialize snap range.", TD_VID(pVnode)); - goto _err; + int32_t offset = 0; + while (offset + sizeof(SSyncTLV) < datHead->len) { + SSyncTLV *sectHead = (void *)(datHead->val + offset); + offset += sizeof(SSyncTLV) + sectHead->len; + void *buf = sectHead->val; + int32_t bufLen = sectHead->len; + ASSERT(sectHead->typ == SNAP_DATA_TSDB || sectHead->typ == SNAP_DATA_RSMA1); + STsdbSnapPartList *pList = tsdbSnapPartListCreate(); + if (pList == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + goto _err; + } + if (tDeserializeTsdbSnapPartList(buf, bufLen, pList) < 0) { + terrno = TSDB_CODE_INVALID_DATA_FMT; + goto _err; + } + TSnapRangeArray **ppRanges = NULL; + if (sectHead->typ == SNAP_DATA_TSDB) { + ppRanges = &pReader->pRanges; + } else if (sectHead->typ == SNAP_DATA_RSMA1) { + ppRanges = &pReader->pRsmaRanges; + } + if (ppRanges == NULL) { + tsdbSnapPartListDestroy(&pList); + continue; + } + if (tsdbSnapPartListToRangeDiff(pList, ppRanges) < 0) { + vError("vgId:%d, failed to get range diff since %s", TD_VID(pVnode), terrstr()); + tsdbSnapPartListDestroy(&pList); + goto _err; + } + tsdbSnapPartListDestroy(&pList); } } diff --git a/source/libs/sync/src/syncMessage.c b/source/libs/sync/src/syncMessage.c index 00ca6d8f90..9e035f60c2 100644 --- a/source/libs/sync/src/syncMessage.c +++ b/source/libs/sync/src/syncMessage.c @@ -216,42 +216,6 @@ int32_t syncBuildHeartbeatReply(SRpcMsg* pMsg, int32_t vgId) { return 0; } -#if 0 -int32_t syncBuildPreSnapshot(SRpcMsg* pMsg, int32_t vgId) { - int32_t bytes = sizeof(SyncPreSnapshot); - pMsg->pCont = rpcMallocCont(bytes); - pMsg->msgType = TDMT_SYNC_PRE_SNAPSHOT; - pMsg->contLen = bytes; - if (pMsg->pCont == NULL) { - terrno = TSDB_CODE_OUT_OF_MEMORY; - return -1; - } - - SyncPreSnapshot* pPreSnapshot = pMsg->pCont; - pPreSnapshot->bytes = bytes; - pPreSnapshot->msgType = TDMT_SYNC_PRE_SNAPSHOT; - pPreSnapshot->vgId = vgId; - return 0; -} - -int32_t syncBuildPreSnapshotReply(SRpcMsg* pMsg, int32_t vgId) { - int32_t bytes = sizeof(SyncPreSnapshotReply); - pMsg->pCont = rpcMallocCont(bytes); - pMsg->msgType = TDMT_SYNC_PRE_SNAPSHOT_REPLY; - pMsg->contLen = bytes; - if (pMsg->pCont == NULL) { - terrno = TSDB_CODE_OUT_OF_MEMORY; - return -1; - } - - SyncPreSnapshotReply* pPreSnapshotReply = pMsg->pCont; - pPreSnapshotReply->bytes = bytes; - pPreSnapshotReply->msgType = TDMT_SYNC_PRE_SNAPSHOT_REPLY; - pPreSnapshotReply->vgId = vgId; - return 0; -} -#endif - int32_t syncBuildSnapshotSend(SRpcMsg* pMsg, int32_t dataLen, int32_t vgId) { int32_t bytes = sizeof(SyncSnapshotSend) + dataLen; pMsg->pCont = rpcMallocCont(bytes); diff --git a/source/libs/sync/src/syncSnapshot.c b/source/libs/sync/src/syncSnapshot.c index 73b6940628..681d256ec9 100644 --- a/source/libs/sync/src/syncSnapshot.c +++ b/source/libs/sync/src/syncSnapshot.c @@ -98,7 +98,7 @@ int32_t snapshotSenderStart(SSyncSnapshotSender *pSender) { // Get full snapshot info SSyncNode *pSyncNode = pSender->pSyncNode; - SSnapshot snapInfo = {.typ = TAOS_SYNC_SNAP_INFO_FULL}; + SSnapshot snapInfo = {.typ = TDMT_SYNC_PREP_SNAPSHOT}; if (pSyncNode->pFsm->FpGetSnapshotInfo(pSyncNode->pFsm, &snapInfo) != 0) { sSError(pSender, "snapshot get info failure since %s", terrstr()); goto _out; @@ -106,12 +106,15 @@ int32_t snapshotSenderStart(SSyncSnapshotSender *pSender) { int dataLen = 0; if (snapInfo.data) { - SMsgHead *msgHead = snapInfo.data; - ASSERT(msgHead->vgId == pSyncNode->vgId); - dataLen = sizeof(SMsgHead) + msgHead->contLen; + SSyncTLV *datHead = snapInfo.data; + if (datHead->typ != TDMT_SYNC_PREP_SNAPSHOT) { + sSError(pSender, "unexpected data typ in data of snapshot info. typ: %d", datHead->typ); + terrno = TSDB_CODE_INVALID_DATA_FMT; + goto _out; + } + dataLen = sizeof(SSyncTLV) + datHead->len; } - // build begin msg SRpcMsg rpcMsg = {0}; if (syncBuildSnapshotSend(&rpcMsg, dataLen, pSender->pSyncNode->vgId) != 0) { sSError(pSender, "snapshot sender build msg failed since %s", terrstr()); @@ -605,7 +608,7 @@ _SEND_REPLY: // build msg ; // make complier happy - SSnapshot snapInfo = {.typ = TAOS_SYNC_SNAP_INFO_DIFF}; + SSnapshot snapInfo = {.typ = TDMT_SYNC_PREP_SNAPSHOT_REPLY}; int32_t dataLen = 0; if (pMsg->dataLen > 0) { void *data = taosMemoryCalloc(1, pMsg->dataLen); @@ -614,15 +617,18 @@ _SEND_REPLY: code = terrno; goto _out; } - dataLen = pMsg->dataLen; - memcpy(data, pMsg->data, dataLen); + memcpy(data, pMsg->data, pMsg->dataLen); snapInfo.data = data; data = NULL; pSyncNode->pFsm->FpGetSnapshotInfo(pSyncNode->pFsm, &snapInfo); - SMsgHead *msgHead = snapInfo.data; - ASSERT(msgHead->vgId == pSyncNode->vgId); - dataLen = msgHead->contLen; + SSyncTLV *datHead = snapInfo.data; + if (datHead->typ != TDMT_SYNC_PREP_SNAPSHOT_REPLY) { + sRError(pReceiver, "unexpected data typ in data of snapshot info. typ: %d", datHead->typ); + code = TSDB_CODE_INVALID_DATA_FMT; + goto _out; + } + dataLen = sizeof(SSyncTLV) + datHead->len; } SRpcMsg rpcMsg = {0}; @@ -927,12 +933,17 @@ static int32_t syncNodeOnSnapshotPrepRsp(SSyncNode *pSyncNode, SSyncSnapshotSend // update sender pSender->snapshot = snapshot; - if (pMsg->payloadType == TAOS_SYNC_SNAP_INFO_DIFF) { - SMsgHead *msgHead = (void *)pMsg->data; - ASSERT(msgHead->vgId == pSyncNode->vgId); + // start reader + if (pMsg->payloadType == TDMT_SYNC_PREP_SNAPSHOT_REPLY) { + SSyncTLV *datHead = (void *)pMsg->data; + if (datHead->typ != pMsg->payloadType) { + sSError(pSender, "unexpected data type in data of SyncSnapshotRsp. typ: %d", datHead->typ); + terrno = TSDB_CODE_INVALID_DATA_FMT; + return -1; + } pSender->snapshotParam.data = pMsg->data; } - // start reader + int32_t code = pSyncNode->pFsm->FpSnapshotStartRead(pSyncNode->pFsm, &pSender->snapshotParam, &pSender->pReader); if (code != 0) { sSError(pSender, "prepare snapshot failed since %s", terrstr()); From e1c03118abe8142ab48b15d8ddc8cfd20c4aa477 Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Fri, 15 Sep 2023 19:58:48 +0800 Subject: [PATCH 30/61] enh: extract snapshot info for both of snap reader and writer --- include/libs/sync/sync.h | 6 +- source/dnode/vnode/inc/vnode.h | 2 +- source/dnode/vnode/src/vnd/vnodeSnapshot.c | 106 +++++++++++++++------ source/dnode/vnode/src/vnd/vnodeSync.c | 5 +- source/libs/sync/src/syncSnapshot.c | 24 ++++- 5 files changed, 108 insertions(+), 35 deletions(-) diff --git a/include/libs/sync/sync.h b/include/libs/sync/sync.h index cc381bb54e..50e60d2ef4 100644 --- a/include/libs/sync/sync.h +++ b/include/libs/sync/sync.h @@ -151,12 +151,12 @@ typedef struct SReConfigCbMeta { typedef struct SSnapshotParam { SyncIndex start; SyncIndex end; - void* data; // with SMsgHead + SSyncTLV* data; } SSnapshotParam; typedef struct SSnapshot { - ESyncSnapInfoTyp typ; - void* data; // with SMsgHead + int32_t typ; + SSyncTLV* data; SyncIndex lastApplyIndex; SyncTerm lastApplyTerm; SyncIndex lastConfigIndex; diff --git a/source/dnode/vnode/inc/vnode.h b/source/dnode/vnode/inc/vnode.h index baff19e3d4..a120ecf9db 100644 --- a/source/dnode/vnode/inc/vnode.h +++ b/source/dnode/vnode/inc/vnode.h @@ -263,7 +263,7 @@ int32_t vnodeSnapReaderOpen(SVnode *pVnode, SSnapshotParam *pParam, SVSnapReader void vnodeSnapReaderClose(SVSnapReader *pReader); int32_t vnodeSnapRead(SVSnapReader *pReader, uint8_t **ppData, uint32_t *nData); // SVSnapWriter -int32_t vnodeSnapWriterOpen(SVnode *pVnode, int64_t sver, int64_t ever, SVSnapWriter **ppWriter); +int32_t vnodeSnapWriterOpen(SVnode *pVnode, SSnapshotParam *pParam, SVSnapWriter **ppWriter); int32_t vnodeSnapWriterClose(SVSnapWriter *pWriter, int8_t rollback, SSnapshot *pSnapshot); int32_t vnodeSnapWrite(SVSnapWriter *pWriter, uint8_t *pData, uint32_t nData); diff --git a/source/dnode/vnode/src/vnd/vnodeSnapshot.c b/source/dnode/vnode/src/vnd/vnodeSnapshot.c index c56644ca2c..cf04471be0 100644 --- a/source/dnode/vnode/src/vnd/vnodeSnapshot.c +++ b/source/dnode/vnode/src/vnd/vnodeSnapshot.c @@ -49,6 +49,26 @@ struct SVSnapReader { SRSmaSnapReader *pRsmaReader; }; +static int32_t vnodeExtractSnapInfoDiff(void *buf, int32_t bufLen, TSnapRangeArray **ppRanges) { + int32_t code = -1; + STsdbSnapPartList *pList = tsdbSnapPartListCreate(); + if (pList == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + goto _out; + } + if (tDeserializeTsdbSnapPartList(buf, bufLen, pList) < 0) { + terrno = TSDB_CODE_INVALID_DATA_FMT; + goto _out; + } + if (tsdbSnapPartListToRangeDiff(pList, ppRanges) < 0) { + goto _out; + } + code = 0; +_out: + tsdbSnapPartListDestroy(&pList); + return code; +} + int32_t vnodeSnapReaderOpen(SVnode *pVnode, SSnapshotParam *pParam, SVSnapReader **ppReader) { int32_t code = 0; int64_t sver = pParam->start; @@ -64,6 +84,7 @@ int32_t vnodeSnapReaderOpen(SVnode *pVnode, SSnapshotParam *pParam, SVSnapReader pReader->sver = sver; pReader->ever = ever; + // snapshot info if (pParam->data) { SSyncTLV *datHead = (void *)pParam->data; if (datHead->typ != TDMT_SYNC_PREP_SNAPSHOT_REPLY) { @@ -72,37 +93,29 @@ int32_t vnodeSnapReaderOpen(SVnode *pVnode, SSnapshotParam *pParam, SVSnapReader } int32_t offset = 0; + TSnapRangeArray **ppRanges = NULL; + while (offset + sizeof(SSyncTLV) < datHead->len) { - SSyncTLV *sectHead = (void *)(datHead->val + offset); - offset += sizeof(SSyncTLV) + sectHead->len; - void *buf = sectHead->val; - int32_t bufLen = sectHead->len; - ASSERT(sectHead->typ == SNAP_DATA_TSDB || sectHead->typ == SNAP_DATA_RSMA1); - STsdbSnapPartList *pList = tsdbSnapPartListCreate(); - if (pList == NULL) { - terrno = TSDB_CODE_OUT_OF_MEMORY; - goto _err; + SSyncTLV *subField = (void *)(datHead->val + offset); + offset += sizeof(SSyncTLV) + subField->len; + void *buf = subField->val; + int32_t bufLen = subField->len; + switch (subField->typ) { + case SNAP_DATA_TSDB: + ppRanges = &pReader->pRanges; + break; + case SNAP_DATA_RSMA1: + ppRanges = &pReader->pRsmaRanges; + break; + default: + vError("vgId:%d, unexpected subfield type in data of snapshot param. subtyp:%d", TD_VID(pVnode), + subField->typ); + goto _err; } - if (tDeserializeTsdbSnapPartList(buf, bufLen, pList) < 0) { - terrno = TSDB_CODE_INVALID_DATA_FMT; - goto _err; - } - TSnapRangeArray **ppRanges = NULL; - if (sectHead->typ == SNAP_DATA_TSDB) { - ppRanges = &pReader->pRanges; - } else if (sectHead->typ == SNAP_DATA_RSMA1) { - ppRanges = &pReader->pRsmaRanges; - } - if (ppRanges == NULL) { - tsdbSnapPartListDestroy(&pList); - continue; - } - if (tsdbSnapPartListToRangeDiff(pList, ppRanges) < 0) { + if (vnodeExtractSnapInfoDiff(buf, bufLen, ppRanges) < 0) { vError("vgId:%d, failed to get range diff since %s", TD_VID(pVnode), terrstr()); - tsdbSnapPartListDestroy(&pList); goto _err; } - tsdbSnapPartListDestroy(&pList); } } @@ -414,6 +427,7 @@ struct SVSnapWriter { // meta SMetaSnapWriter *pMetaSnapWriter; // tsdb + TSnapRangeArray *pRanges; STsdbSnapWriter *pTsdbSnapWriter; // tq STqSnapWriter *pTqSnapWriter; @@ -423,12 +437,15 @@ struct SVSnapWriter { SStreamTaskWriter *pStreamTaskWriter; SStreamStateWriter *pStreamStateWriter; // rsma + TSnapRangeArray *pRsmaRanges; SRSmaSnapWriter *pRsmaSnapWriter; }; -int32_t vnodeSnapWriterOpen(SVnode *pVnode, int64_t sver, int64_t ever, SVSnapWriter **ppWriter) { +int32_t vnodeSnapWriterOpen(SVnode *pVnode, SSnapshotParam *pParam, SVSnapWriter **ppWriter) { int32_t code = 0; SVSnapWriter *pWriter = NULL; + int64_t sver = pParam->start; + int64_t ever = pParam->end; // commit memory data vnodeAsyncCommit(pVnode); @@ -447,6 +464,41 @@ int32_t vnodeSnapWriterOpen(SVnode *pVnode, int64_t sver, int64_t ever, SVSnapWr // inc commit ID pWriter->commitID = ++pVnode->state.commitID; + // snapshot info + if (pParam->data) { + SSyncTLV *datHead = (void *)pParam->data; + if (datHead->typ != TDMT_SYNC_PREP_SNAPSHOT_REPLY) { + terrno = TSDB_CODE_INVALID_DATA_FMT; + goto _err; + } + + int32_t offset = 0; + TSnapRangeArray **ppRanges = NULL; + + while (offset + sizeof(SSyncTLV) < datHead->len) { + SSyncTLV *subField = (void *)(datHead->val + offset); + offset += sizeof(SSyncTLV) + subField->len; + void *buf = subField->val; + int32_t bufLen = subField->len; + switch (subField->typ) { + case SNAP_DATA_TSDB: + ppRanges = &pWriter->pRanges; + break; + case SNAP_DATA_RSMA1: + ppRanges = &pWriter->pRsmaRanges; + break; + default: + vError("vgId:%d, unexpected subfield type in data of snapshot param. subtyp:%d", TD_VID(pVnode), + subField->typ); + goto _err; + } + if (vnodeExtractSnapInfoDiff(buf, bufLen, ppRanges) < 0) { + vError("vgId:%d, failed to get range diff since %s", TD_VID(pVnode), terrstr()); + goto _err; + } + } + } + vInfo("vgId:%d, vnode snapshot writer opened, sver:%" PRId64 " ever:%" PRId64 " commit id:%" PRId64, TD_VID(pVnode), sver, ever, pWriter->commitID); *ppWriter = pWriter; diff --git a/source/dnode/vnode/src/vnd/vnodeSync.c b/source/dnode/vnode/src/vnd/vnodeSync.c index e676219b11..b73c9b8c65 100644 --- a/source/dnode/vnode/src/vnd/vnodeSync.c +++ b/source/dnode/vnode/src/vnd/vnodeSync.c @@ -491,8 +491,7 @@ static int32_t vnodeSnapshotDoRead(const SSyncFSM *pFsm, void *pReader, void **p } static int32_t vnodeSnapshotStartWrite(const SSyncFSM *pFsm, void *pParam, void **ppWriter) { - SVnode *pVnode = pFsm->data; - SSnapshotParam *pSnapshotParam = pParam; + SVnode *pVnode = pFsm->data; do { int32_t itemSize = tmsgGetQueueSize(&pVnode->msgCb, pVnode->config.vgId, APPLY_QUEUE); @@ -505,7 +504,7 @@ static int32_t vnodeSnapshotStartWrite(const SSyncFSM *pFsm, void *pParam, void } } while (true); - int32_t code = vnodeSnapWriterOpen(pVnode, pSnapshotParam->start, pSnapshotParam->end, (SVSnapWriter **)ppWriter); + int32_t code = vnodeSnapWriterOpen(pVnode, (SSnapshotParam *)pParam, (SVSnapWriter **)ppWriter); return code; } diff --git a/source/libs/sync/src/syncSnapshot.c b/source/libs/sync/src/syncSnapshot.c index 681d256ec9..99e8fd55a2 100644 --- a/source/libs/sync/src/syncSnapshot.c +++ b/source/libs/sync/src/syncSnapshot.c @@ -368,6 +368,17 @@ void snapshotReceiverDestroy(SSyncSnapshotReceiver *pReceiver) { pReceiver->pWriter = NULL; } + // free data of snapshot info + if (pReceiver->snapshotParam.data) { + taosMemoryFree(pReceiver->snapshotParam.data); + pReceiver->snapshotParam.data = NULL; + } + + if (pReceiver->snapshot.data) { + taosMemoryFree(pReceiver->snapshot.data); + pReceiver->snapshot.data = NULL; + } + // free receiver taosMemoryFree(pReceiver); } @@ -652,6 +663,17 @@ _SEND_REPLY: if (snapInfo.data) { pRspMsg->payloadType = snapInfo.typ; memcpy(pRspMsg->data, snapInfo.data, dataLen); + + // save snapshot info + SSnapshotParam *pParam = &pReceiver->snapshotParam; + void *data = taosMemoryRealloc(pParam->data, dataLen); + if (data == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + code = terrno; + goto _out; + } + pParam->data = data; + memcpy(pParam->data, snapInfo.data, dataLen); } // send msg @@ -941,7 +963,7 @@ static int32_t syncNodeOnSnapshotPrepRsp(SSyncNode *pSyncNode, SSyncSnapshotSend terrno = TSDB_CODE_INVALID_DATA_FMT; return -1; } - pSender->snapshotParam.data = pMsg->data; + pSender->snapshotParam.data = (void *)pMsg->data; } int32_t code = pSyncNode->pFsm->FpSnapshotStartRead(pSyncNode->pFsm, &pSender->snapshotParam, &pSender->pReader); From 0c41fa56dc113ed5294eb2d27b1fc4a332631710 Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Mon, 18 Sep 2023 14:46:26 +0800 Subject: [PATCH 31/61] feat: filter pExclude in tsdbFSCreateCopyRangedSnapshot --- source/dnode/vnode/src/inc/vnodeInt.h | 4 +- source/dnode/vnode/src/sma/smaSnapshot.c | 4 +- source/dnode/vnode/src/tsdb/tsdbFS2.c | 46 ++++++++++++++- source/dnode/vnode/src/tsdb/tsdbFS2.h | 7 ++- source/dnode/vnode/src/tsdb/tsdbFSet2.c | 68 ++++++++++++++++++++++ source/dnode/vnode/src/tsdb/tsdbFSet2.h | 3 + source/dnode/vnode/src/tsdb/tsdbSnapshot.c | 4 +- source/dnode/vnode/src/vnd/vnodeSnapshot.c | 19 +++++- 8 files changed, 144 insertions(+), 11 deletions(-) diff --git a/source/dnode/vnode/src/inc/vnodeInt.h b/source/dnode/vnode/src/inc/vnodeInt.h index 27a393abf4..d5e1585af4 100644 --- a/source/dnode/vnode/src/inc/vnodeInt.h +++ b/source/dnode/vnode/src/inc/vnodeInt.h @@ -301,7 +301,7 @@ int32_t tsdbSnapReaderOpen(STsdb* pTsdb, int64_t sver, int64_t ever, int8_t type int32_t tsdbSnapReaderClose(STsdbSnapReader** ppReader); int32_t tsdbSnapRead(STsdbSnapReader* pReader, uint8_t** ppData); // STsdbSnapWriter ======================================== -int32_t tsdbSnapWriterOpen(STsdb* pTsdb, int64_t sver, int64_t ever, STsdbSnapWriter** ppWriter); +int32_t tsdbSnapWriterOpen(STsdb* pTsdb, int64_t sver, int64_t ever, void* pRanges, STsdbSnapWriter** ppWriter); int32_t tsdbSnapWrite(STsdbSnapWriter* pWriter, SSnapDataHdr* pHdr); int32_t tsdbSnapWriterPrepareClose(STsdbSnapWriter* pWriter); int32_t tsdbSnapWriterClose(STsdbSnapWriter** ppWriter, int8_t rollback); @@ -358,7 +358,7 @@ int32_t rsmaSnapReaderOpen(SSma* pSma, int64_t sver, int64_t ever, SRSmaSnapRead int32_t rsmaSnapReaderClose(SRSmaSnapReader** ppReader); int32_t rsmaSnapRead(SRSmaSnapReader* pReader, uint8_t** ppData); // SRSmaSnapWriter ======================================== -int32_t rsmaSnapWriterOpen(SSma* pSma, int64_t sver, int64_t ever, SRSmaSnapWriter** ppWriter); +int32_t rsmaSnapWriterOpen(SSma* pSma, int64_t sver, int64_t ever, void* pRanges, SRSmaSnapWriter** ppWriter); int32_t rsmaSnapWrite(SRSmaSnapWriter* pWriter, uint8_t* pData, uint32_t nData); int32_t rsmaSnapWriterClose(SRSmaSnapWriter** ppWriter, int8_t rollback); diff --git a/source/dnode/vnode/src/sma/smaSnapshot.c b/source/dnode/vnode/src/sma/smaSnapshot.c index 2a010f5a84..ca46b4728f 100644 --- a/source/dnode/vnode/src/sma/smaSnapshot.c +++ b/source/dnode/vnode/src/sma/smaSnapshot.c @@ -128,7 +128,7 @@ struct SRSmaSnapWriter { STsdbSnapWriter* pDataWriter[TSDB_RETENTION_L2]; }; -int32_t rsmaSnapWriterOpen(SSma* pSma, int64_t sver, int64_t ever, SRSmaSnapWriter** ppWriter) { +int32_t rsmaSnapWriterOpen(SSma* pSma, int64_t sver, int64_t ever, void* pRanges, SRSmaSnapWriter** ppWriter) { int32_t code = 0; int32_t lino = 0; SVnode* pVnode = pSma->pVnode; @@ -147,7 +147,7 @@ int32_t rsmaSnapWriterOpen(SSma* pSma, int64_t sver, int64_t ever, SRSmaSnapWrit // rsma1/rsma2 for (int32_t i = 0; i < TSDB_RETENTION_L2; ++i) { if (pSma->pRSmaTsdb[i]) { - code = tsdbSnapWriterOpen(pSma->pRSmaTsdb[i], sver, ever, &pWriter->pDataWriter[i]); + code = tsdbSnapWriterOpen(pSma->pRSmaTsdb[i], sver, ever, pRanges, &pWriter->pDataWriter[i]); TSDB_CHECK_CODE(code, lino, _exit); } } diff --git a/source/dnode/vnode/src/tsdb/tsdbFS2.c b/source/dnode/vnode/src/tsdb/tsdbFS2.c index d71473b079..759fded522 100644 --- a/source/dnode/vnode/src/tsdb/tsdbFS2.c +++ b/source/dnode/vnode/src/tsdb/tsdbFS2.c @@ -983,6 +983,50 @@ int32_t tsdbFSDestroyRefSnapshot(TFileSetArray **fsetArr) { return 0; } +int32_t tsdbFSCreateCopyRangedSnapshot(STFileSystem *fs, TSnapRangeArray *pExclude, TFileSetArray **fsetArr, + TFileOpArray *fopArr) { + int32_t code = 0; + STFileSet *fset; + STFileSet *fset1; + + fsetArr[0] = taosMemoryMalloc(sizeof(TFileSetArray)); + if (fsetArr == NULL) return TSDB_CODE_OUT_OF_MEMORY; + + TARRAY2_INIT(fsetArr[0]); + + int32_t i = 0; + + taosThreadRwlockRdlock(&fs->tsdb->rwLock); + TARRAY2_FOREACH(fs->fSetArr, fset) { + int64_t ever = VERSION_MAX; + while (pExclude && i < TARRAY2_SIZE(pExclude)) { + STSnapRange *u = TARRAY2_GET(pExclude, i); + if (fset->fid > u->fid) { + i++; + continue; + } + if (fset->fid == u->fid) { + ever = u->sver - 1; + i++; + } + } + + code = tsdbTFileSetFilteredInitDup(fs->tsdb, fset, ever, &fset1, fopArr); + if (code) break; + + code = TARRAY2_APPEND(fsetArr[0], fset1); + if (code) break; + } + taosThreadRwlockUnlock(&fs->tsdb->rwLock); + + if (code) { + TARRAY2_DESTROY(fsetArr[0], tsdbTFileSetClear); + taosMemoryFree(fsetArr[0]); + fsetArr[0] = NULL; + } + return code; +} + int32_t tsdbFSCreateRefRangedSnapshot(STFileSystem *fs, int64_t sver, int64_t ever, TSnapRangeArray *pRanges, TSnapRangeArray **fsrArr) { int32_t code = -1; @@ -1009,12 +1053,12 @@ int32_t tsdbFSCreateRefRangedSnapshot(STFileSystem *fs, int64_t sver, int64_t ev i++; continue; } - if (fset->fid == u->fid) { sver1 = u->sver; i++; } } + tsdbInfo("fsrArr:%p, fid:%d, sver:%" PRId64 ", ever:%" PRId64, fsrArr, fset->fid, sver1, ever1); code = tsdbTSnapRangeInitRef(fs->tsdb, fset, sver1, ever1, &fsr1); diff --git a/source/dnode/vnode/src/tsdb/tsdbFS2.h b/source/dnode/vnode/src/tsdb/tsdbFS2.h index f8b87b8a84..73b27a8fbd 100644 --- a/source/dnode/vnode/src/tsdb/tsdbFS2.h +++ b/source/dnode/vnode/src/tsdb/tsdbFS2.h @@ -52,9 +52,12 @@ int32_t tsdbCloseFS(STFileSystem **fs); int32_t tsdbFSCreateCopySnapshot(STFileSystem *fs, TFileSetArray **fsetArr); int32_t tsdbFSDestroyCopySnapshot(TFileSetArray **fsetArr); int32_t tsdbFSCreateRefSnapshot(STFileSystem *fs, TFileSetArray **fsetArr); -int32_t tsdbFSDestroyRefSnapshot(TFileSetArray **fsrArr); +int32_t tsdbFSDestroyRefSnapshot(TFileSetArray **fsetArr); -int32_t tsdbFSCreateRefRangedSnapshot(STFileSystem *fs, int64_t sver, int64_t ever, TSnapRangeArray *pEx, +int32_t tsdbFSCreateCopyRangedSnapshot(STFileSystem *fs, TSnapRangeArray *pExclude, TFileSetArray **fsetArr, + TFileOpArray *fopArr); +int32_t tsdbFSDestroyCopyRangedSnapshot(TFileSetArray **fsetArr, TFileOpArray *fopArr); +int32_t tsdbFSCreateRefRangedSnapshot(STFileSystem *fs, int64_t sver, int64_t ever, TSnapRangeArray *pRanges, TSnapRangeArray **fsrArr); int32_t tsdbFSDestroyRefRangedSnapshot(TSnapRangeArray **fsrArr); // txn diff --git a/source/dnode/vnode/src/tsdb/tsdbFSet2.c b/source/dnode/vnode/src/tsdb/tsdbFSet2.c index e0434b7da6..dd86d01598 100644 --- a/source/dnode/vnode/src/tsdb/tsdbFSet2.c +++ b/source/dnode/vnode/src/tsdb/tsdbFSet2.c @@ -65,6 +65,34 @@ static int32_t tsdbSttLvlInitRef(STsdb *pTsdb, const SSttLvl *lvl1, SSttLvl **lv return 0; } +static int32_t tsdbSttLvlFilteredInitEx(STsdb *pTsdb, const SSttLvl *lvl1, int64_t ever, SSttLvl **lvl, + TFileOpArray *fopArr) { + int32_t code = tsdbSttLvlInit(lvl1->level, lvl); + if (code) return code; + + const STFileObj *fobj1; + TARRAY2_FOREACH(lvl1->fobjArr, fobj1) { + if (fobj1->f->maxVer <= ever) { + STFileObj *fobj; + code = tsdbTFileObjInit(pTsdb, fobj1->f, &fobj); + if (code) { + tsdbSttLvlClear(lvl); + return code; + } + + TARRAY2_APPEND(lvl[0]->fobjArr, fobj); + } else { + STFileOp op = { + .optype = TSDB_FOP_REMOVE, + .fid = fobj1->f->fid, + .of = fobj1->f[0], + }; + TARRAY2_APPEND(fopArr, op); + } + } + return 0; +} + static void tsdbSttLvlRemoveFObj(void *data) { tsdbTFileObjRemove(*(STFileObj **)data); } static void tsdbSttLvlRemove(SSttLvl **lvl) { TARRAY2_DESTROY(lvl[0]->fobjArr, tsdbSttLvlRemoveFObj); @@ -458,6 +486,46 @@ int32_t tsdbTFileSetInitDup(STsdb *pTsdb, const STFileSet *fset1, STFileSet **fs return 0; } +int32_t tsdbTFileSetFilteredInitDup(STsdb *pTsdb, const STFileSet *fset1, int64_t ever, STFileSet **fset, + TFileOpArray *fopArr) { + int32_t code = tsdbTFileSetInit(fset1->fid, fset); + if (code) return code; + + for (int32_t ftype = TSDB_FTYPE_MIN; ftype < TSDB_FTYPE_MAX; ++ftype) { + if (fset1->farr[ftype] == NULL) continue; + STFileObj *fobj = fset1->farr[ftype]; + if (fobj->f->maxVer <= ever) { + code = tsdbTFileObjInit(pTsdb, fobj->f, &fset[0]->farr[ftype]); + if (code) { + tsdbTFileSetClear(fset); + return code; + } + } else { + STFileOp op = { + .optype = TSDB_FOP_REMOVE, + .fid = fobj->f->fid, + .of = fobj->f[0], + }; + TARRAY2_APPEND(fopArr, op); + } + } + + const SSttLvl *lvl1; + TARRAY2_FOREACH(fset1->lvlArr, lvl1) { + SSttLvl *lvl; + code = tsdbSttLvlFilteredInitEx(pTsdb, lvl1, ever, &lvl, fopArr); + if (code) { + tsdbTFileSetClear(fset); + return code; + } + + code = TARRAY2_APPEND(fset[0]->lvlArr, lvl); + if (code) return code; + } + + return 0; +} + int32_t tsdbTSnapRangeInitRef(STsdb *pTsdb, const STFileSet *fset1, int64_t sver, int64_t ever, STSnapRange **fsr) { fsr[0] = taosMemoryCalloc(1, sizeof(*fsr[0])); if (fsr[0] == NULL) return TSDB_CODE_OUT_OF_MEMORY; diff --git a/source/dnode/vnode/src/tsdb/tsdbFSet2.h b/source/dnode/vnode/src/tsdb/tsdbFSet2.h index e6d78a8cfe..c78cc179df 100644 --- a/source/dnode/vnode/src/tsdb/tsdbFSet2.h +++ b/source/dnode/vnode/src/tsdb/tsdbFSet2.h @@ -46,6 +46,9 @@ int32_t tsdbTFileSetInitRef(STsdb *pTsdb, const STFileSet *fset1, STFileSet **fs int32_t tsdbTFileSetClear(STFileSet **fset); int32_t tsdbTFileSetRemove(STFileSet **fset); +int32_t tsdbTFileSetFilteredInitDup(STsdb *pTsdb, const STFileSet *fset1, int64_t ever, STFileSet **fset, + TFileOpArray *fopArr); + int32_t tsdbTSnapRangeInitRef(STsdb *pTsdb, const STFileSet *fset1, int64_t sver, int64_t ever, STSnapRange **fsr); int32_t tsdbTSnapRangeClear(STSnapRange **fsr); diff --git a/source/dnode/vnode/src/tsdb/tsdbSnapshot.c b/source/dnode/vnode/src/tsdb/tsdbSnapshot.c index 9710eee6c4..3b74475870 100644 --- a/source/dnode/vnode/src/tsdb/tsdbSnapshot.c +++ b/source/dnode/vnode/src/tsdb/tsdbSnapshot.c @@ -1028,7 +1028,7 @@ _exit: return code; } -int32_t tsdbSnapWriterOpen(STsdb* pTsdb, int64_t sver, int64_t ever, STsdbSnapWriter** writer) { +int32_t tsdbSnapWriterOpen(STsdb* pTsdb, int64_t sver, int64_t ever, void* pRanges, STsdbSnapWriter** writer) { int32_t code = 0; int32_t lino = 0; @@ -1052,7 +1052,7 @@ int32_t tsdbSnapWriterOpen(STsdb* pTsdb, int64_t sver, int64_t ever, STsdbSnapWr writer[0]->compactVersion = INT64_MAX; writer[0]->now = taosGetTimestampMs(); - code = tsdbFSCreateCopySnapshot(pTsdb->pFS, &writer[0]->fsetArr); + code = tsdbFSCreateCopyRangedSnapshot(pTsdb->pFS, (TSnapRangeArray*)pRanges, &writer[0]->fsetArr, writer[0]->fopArr); TSDB_CHECK_CODE(code, lino, _exit); _exit: diff --git a/source/dnode/vnode/src/vnd/vnodeSnapshot.c b/source/dnode/vnode/src/vnd/vnodeSnapshot.c index cf04471be0..b2fbdc07e9 100644 --- a/source/dnode/vnode/src/vnd/vnodeSnapshot.c +++ b/source/dnode/vnode/src/vnd/vnodeSnapshot.c @@ -158,6 +158,11 @@ void vnodeSnapReaderClose(SVSnapReader *pReader) { if (pReader->pRanges) { tsdbSnapRangeArrayDestroy(&pReader->pRanges); } + + if (pReader->pRsmaRanges) { + tsdbSnapRangeArrayDestroy(&pReader->pRsmaRanges); + } + taosMemoryFree(pReader); } @@ -543,6 +548,10 @@ int32_t vnodeSnapWriterClose(SVSnapWriter *pWriter, int8_t rollback, SSnapshot * if (code) goto _exit; } + if (pWriter->pRanges) { + tsdbSnapRangeArrayDestroy(&pWriter->pRanges); + } + if (pWriter->pTsdbSnapWriter) { code = tsdbSnapWriterClose(&pWriter->pTsdbSnapWriter, rollback); if (code) goto _exit; @@ -577,6 +586,10 @@ int32_t vnodeSnapWriterClose(SVSnapWriter *pWriter, int8_t rollback, SSnapshot * if (code) goto _exit; } + if (pWriter->pRsmaRanges) { + tsdbSnapRangeArrayDestroy(&pWriter->pRsmaRanges); + } + if (pWriter->pRsmaSnapWriter) { code = rsmaSnapWriterClose(&pWriter->pRsmaSnapWriter, rollback); if (code) goto _exit; @@ -663,7 +676,8 @@ int32_t vnodeSnapWrite(SVSnapWriter *pWriter, uint8_t *pData, uint32_t nData) { case SNAP_DATA_DEL: { // tsdb if (pWriter->pTsdbSnapWriter == NULL) { - code = tsdbSnapWriterOpen(pVnode->pTsdb, pWriter->sver, pWriter->ever, &pWriter->pTsdbSnapWriter); + code = tsdbSnapWriterOpen(pVnode->pTsdb, pWriter->sver, pWriter->ever, pWriter->pRanges, + &pWriter->pTsdbSnapWriter); if (code) goto _err; } @@ -723,7 +737,8 @@ int32_t vnodeSnapWrite(SVSnapWriter *pWriter, uint8_t *pData, uint32_t nData) { case SNAP_DATA_QTASK: { // rsma1/rsma2/qtask for rsma if (pWriter->pRsmaSnapWriter == NULL) { - code = rsmaSnapWriterOpen(pVnode->pSma, pWriter->sver, pWriter->ever, &pWriter->pRsmaSnapWriter); + code = rsmaSnapWriterOpen(pVnode->pSma, pWriter->sver, pWriter->ever, pWriter->pRsmaRanges, + &pWriter->pRsmaSnapWriter); if (code) goto _err; } From eb4e2aa58fbae2a33ee6fe2a8fe02d3d9ba880ac Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Wed, 20 Sep 2023 09:54:28 +0800 Subject: [PATCH 32/61] feat: restore incomplete fsm state with maxVerValid via snapshot replication --- include/common/tcommon.h | 4 +- include/libs/sync/sync.h | 13 +- include/util/taoserror.h | 2 +- source/dnode/vnode/src/inc/tsdb.h | 5 + source/dnode/vnode/src/tsdb/tsdbFS2.c | 33 ++-- source/dnode/vnode/src/tsdb/tsdbFS2.h | 2 +- source/dnode/vnode/src/tsdb/tsdbFSet2.c | 1 + source/dnode/vnode/src/tsdb/tsdbFSet2.h | 2 +- source/dnode/vnode/src/tsdb/tsdbSnapshot.c | 65 ++++++-- source/dnode/vnode/src/vnd/vnodeOpen.c | 9 +- source/dnode/vnode/src/vnd/vnodeSnapshot.c | 4 +- source/dnode/vnode/src/vnd/vnodeSync.c | 13 ++ source/libs/sync/inc/syncInt.h | 1 + source/libs/sync/inc/syncMessage.h | 2 +- source/libs/sync/inc/syncSnapshot.h | 6 +- source/libs/sync/src/syncAppendEntries.c | 9 +- source/libs/sync/src/syncElection.c | 5 + source/libs/sync/src/syncMain.c | 16 +- source/libs/sync/src/syncPipeline.c | 15 +- source/libs/sync/src/syncSnapshot.c | 175 ++++++++++++--------- source/util/src/terror.c | 1 + 21 files changed, 242 insertions(+), 141 deletions(-) diff --git a/include/common/tcommon.h b/include/common/tcommon.h index 6f4f15d1e8..72aab9adf0 100644 --- a/include/common/tcommon.h +++ b/include/common/tcommon.h @@ -55,8 +55,8 @@ typedef struct SSessionKey { } SSessionKey; typedef struct SVersionRange { - uint64_t minVer; - uint64_t maxVer; + int64_t minVer; + int64_t maxVer; } SVersionRange; static inline int winKeyCmprImpl(const void* pKey1, const void* pKey2) { diff --git a/include/libs/sync/sync.h b/include/libs/sync/sync.h index 50e60d2ef4..71c56e8c86 100644 --- a/include/libs/sync/sync.h +++ b/include/libs/sync/sync.h @@ -36,8 +36,7 @@ extern "C" { #define SYNC_DEL_WAL_MS (1000 * 60) #define SYNC_ADD_QUORUM_COUNT 3 #define SYNC_VNODE_LOG_RETENTION (TSDB_SYNC_LOG_BUFFER_RETENTION + 1) -#define SNAPSHOT_MAX_CLOCK_SKEW_MS 1000 * 10 -#define SNAPSHOT_WAIT_MS 1000 * 30 +#define SNAPSHOT_WAIT_MS 1000 * 5 #define SYNC_MAX_RETRY_BACKOFF 5 #define SYNC_LOG_REPL_RETRY_WAIT_MS 100 @@ -88,10 +87,9 @@ typedef enum { } ESyncRole; typedef enum { - TAOS_SYNC_SNAP_INFO_BRIEF = 0, - TAOS_SYNC_SNAP_INFO_FULL = 1, - TAOS_SYNC_SNAP_INFO_DIFF = 2, -} ESyncSnapInfoTyp; + SYNC_FSM_STATE_NORMAL = 0, + SYNC_FSM_STATE_INCOMPLETE, +} ESyncFsmState; typedef struct SNodeInfo { int64_t clusterId; @@ -155,8 +153,9 @@ typedef struct SSnapshotParam { } SSnapshotParam; typedef struct SSnapshot { - int32_t typ; + int32_t type; SSyncTLV* data; + ESyncFsmState state; SyncIndex lastApplyIndex; SyncTerm lastApplyTerm; SyncIndex lastConfigIndex; diff --git a/include/util/taoserror.h b/include/util/taoserror.h index 39ae3fb97a..6fbe4422ac 100644 --- a/include/util/taoserror.h +++ b/include/util/taoserror.h @@ -557,7 +557,7 @@ int32_t* taosGetErrno(); // #define TSDB_CODE_SYN_TOO_MANY_FWDINFO TAOS_DEF_ERROR_CODE(0, 0x0904) // 2.x // #define TSDB_CODE_SYN_MISMATCHED_PROTOCOL TAOS_DEF_ERROR_CODE(0, 0x0905) // 2.x // #define TSDB_CODE_SYN_MISMATCHED_CLUSTERID TAOS_DEF_ERROR_CODE(0, 0x0906) // 2.x -// #define TSDB_CODE_SYN_MISMATCHED_SIGNATURE TAOS_DEF_ERROR_CODE(0, 0x0907) // 2.x +#define TSDB_CODE_SYN_MISMATCHED_SIGNATURE TAOS_DEF_ERROR_CODE(0, 0x0907) // #define TSDB_CODE_SYN_INVALID_CHECKSUM TAOS_DEF_ERROR_CODE(0, 0x0908) // 2.x // #define TSDB_CODE_SYN_INVALID_MSGLEN TAOS_DEF_ERROR_CODE(0, 0x0909) // 2.x // #define TSDB_CODE_SYN_INVALID_MSGTYPE TAOS_DEF_ERROR_CODE(0, 0x090A) // 2.x diff --git a/source/dnode/vnode/src/inc/tsdb.h b/source/dnode/vnode/src/inc/tsdb.h index 4cea7c5e85..1d17d616ab 100644 --- a/source/dnode/vnode/src/inc/tsdb.h +++ b/source/dnode/vnode/src/inc/tsdb.h @@ -1025,6 +1025,11 @@ struct STsdbFilterInfo { TABLEID tbid; }; +enum { + TSDB_FS_STATE_NORMAL = 0, + TSDB_FS_STATE_INCOMPLETE, +}; + #ifdef __cplusplus } #endif diff --git a/source/dnode/vnode/src/tsdb/tsdbFS2.c b/source/dnode/vnode/src/tsdb/tsdbFS2.c index 759fded522..ef2f81fa02 100644 --- a/source/dnode/vnode/src/tsdb/tsdbFS2.c +++ b/source/dnode/vnode/src/tsdb/tsdbFS2.c @@ -38,13 +38,6 @@ typedef struct { STFileHashEntry **buckets; } STFileHash; -enum { - TSDB_FS_STATE_NONE = 0, - TSDB_FS_STATE_OPEN, - TSDB_FS_STATE_EDIT, - TSDB_FS_STATE_CLOSE, -}; - static const char *gCurrentFname[] = { [TSDB_FCURRENT] = "current.json", [TSDB_FCURRENT_C] = "current.c.json", @@ -57,7 +50,7 @@ static int32_t create_fs(STsdb *pTsdb, STFileSystem **fs) { fs[0]->tsdb = pTsdb; tsem_init(&fs[0]->canEdit, 0, 1); - fs[0]->state = TSDB_FS_STATE_NONE; + fs[0]->fsstate = TSDB_FS_STATE_NORMAL; fs[0]->neid = 0; TARRAY2_INIT(fs[0]->fSetArr); TARRAY2_INIT(fs[0]->fSetArrTmp); @@ -496,6 +489,7 @@ static void tsdbFSDestroyFileObjHash(STFileHash *hash) { static int32_t tsdbFSDoSanAndFix(STFileSystem *fs) { int32_t code = 0; int32_t lino = 0; + int32_t corrupt = false; { // scan each file STFileSet *fset = NULL; @@ -503,8 +497,12 @@ static int32_t tsdbFSDoSanAndFix(STFileSystem *fs) { // data file for (int32_t ftype = 0; ftype < TSDB_FTYPE_MAX; ftype++) { if (fset->farr[ftype] == NULL) continue; - code = tsdbFSDoScanAndFixFile(fs, fset->farr[ftype]); - TSDB_CHECK_CODE(code, lino, _exit); + STFileObj *fobj = fset->farr[ftype]; + code = tsdbFSDoScanAndFixFile(fs, fobj); + if (code) { + fset->maxVerValid = TMIN(fset->maxVerValid, fobj->f->minVer - 1); + corrupt = true; + } } // stt file @@ -513,12 +511,21 @@ static int32_t tsdbFSDoSanAndFix(STFileSystem *fs) { STFileObj *fobj; TARRAY2_FOREACH(lvl->fobjArr, fobj) { code = tsdbFSDoScanAndFixFile(fs, fobj); - TSDB_CHECK_CODE(code, lino, _exit); + if (code) { + fset->maxVerValid = TMIN(fset->maxVerValid, fobj->f->minVer - 1); + corrupt = true; + } } } } } + if (corrupt) { + tsdbError("vgId:%d, not to clear unreferenced files since some fset corrupted", TD_VID(fs->tsdb->pVnode)); + fs->fsstate = TSDB_FS_STATE_INCOMPLETE; + goto _exit; + } + { // clear unreferenced files STfsDir *dir = tfsOpendir(fs->tsdb->pVnode->pTfs, fs->tsdb->path); if (dir == NULL) { @@ -1009,6 +1016,7 @@ int32_t tsdbFSCreateCopyRangedSnapshot(STFileSystem *fs, TSnapRangeArray *pExclu ever = u->sver - 1; i++; } + break; } code = tsdbTFileSetFilteredInitDup(fs->tsdb, fset, ever, &fset1, fopArr); @@ -1057,8 +1065,11 @@ int32_t tsdbFSCreateRefRangedSnapshot(STFileSystem *fs, int64_t sver, int64_t ev sver1 = u->sver; i++; } + break; } + if (sver1 > ever1) continue; + tsdbInfo("fsrArr:%p, fid:%d, sver:%" PRId64 ", ever:%" PRId64, fsrArr, fset->fid, sver1, ever1); code = tsdbTSnapRangeInitRef(fs->tsdb, fset, sver1, ever1, &fsr1); diff --git a/source/dnode/vnode/src/tsdb/tsdbFS2.h b/source/dnode/vnode/src/tsdb/tsdbFS2.h index 73b27a8fbd..851459df53 100644 --- a/source/dnode/vnode/src/tsdb/tsdbFS2.h +++ b/source/dnode/vnode/src/tsdb/tsdbFS2.h @@ -101,7 +101,7 @@ struct STFSBgTask { struct STFileSystem { STsdb *tsdb; tsem_t canEdit; - int32_t state; + int32_t fsstate; int64_t neid; EFEditT etype; TFileSetArray fSetArr[1]; diff --git a/source/dnode/vnode/src/tsdb/tsdbFSet2.c b/source/dnode/vnode/src/tsdb/tsdbFSet2.c index dd86d01598..620fcb3a47 100644 --- a/source/dnode/vnode/src/tsdb/tsdbFSet2.c +++ b/source/dnode/vnode/src/tsdb/tsdbFSet2.c @@ -452,6 +452,7 @@ int32_t tsdbTFileSetInit(int32_t fid, STFileSet **fset) { if (fset[0] == NULL) return TSDB_CODE_OUT_OF_MEMORY; fset[0]->fid = fid; + fset[0]->maxVerValid = VERSION_MAX; TARRAY2_INIT(fset[0]->lvlArr); return 0; } diff --git a/source/dnode/vnode/src/tsdb/tsdbFSet2.h b/source/dnode/vnode/src/tsdb/tsdbFSet2.h index c78cc179df..ea0f99f68e 100644 --- a/source/dnode/vnode/src/tsdb/tsdbFSet2.h +++ b/source/dnode/vnode/src/tsdb/tsdbFSet2.h @@ -84,7 +84,7 @@ struct SSttLvl { struct STFileSet { int32_t fid; - int8_t stat; + int64_t maxVerValid; STFileObj *farr[TSDB_FTYPE_MAX]; // file array TSttLvlArray lvlArr[1]; // level array }; diff --git a/source/dnode/vnode/src/tsdb/tsdbSnapshot.c b/source/dnode/vnode/src/tsdb/tsdbSnapshot.c index 3b74475870..6ee7112906 100644 --- a/source/dnode/vnode/src/tsdb/tsdbSnapshot.c +++ b/source/dnode/vnode/src/tsdb/tsdbSnapshot.c @@ -436,8 +436,8 @@ _exit: taosMemoryFree(reader[0]); reader[0] = NULL; } else { - tsdbInfo("vgId:%d %s done, sver:%" PRId64 " ever:%" PRId64 " type:%d", TD_VID(tsdb->pVnode), __func__, sver, ever, - type); + tsdbInfo("vgId:%d tsdb snapshot reader opened. sver:%" PRId64 " ever:%" PRId64 " type:%d", TD_VID(tsdb->pVnode), + sver, ever, type); } return code; } @@ -1103,6 +1103,8 @@ int32_t tsdbSnapWriterClose(STsdbSnapWriter** writer, int8_t rollback) { TSDB_CHECK_CODE(code, lino, _exit); } + writer[0]->tsdb->pFS->fsstate = TSDB_FS_STATE_NORMAL; + taosThreadRwlockUnlock(&writer[0]->tsdb->rwLock); } tsdbFSEnableBgTask(tsdb->pFS); @@ -1218,14 +1220,28 @@ static int32_t tsdbTFileSetToSnapPart(STFileSet* fset, STsdbSnapPartition** ppSP goto _err; } + p->fid = fset->fid; + + int32_t code = 0; int32_t typ = 0; + int32_t corrupt = false; + int32_t count = 0; for (int32_t ftype = TSDB_FTYPE_MIN; ftype < TSDB_FTYPE_MAX; ++ftype) { if (fset->farr[ftype] == NULL) continue; typ = tsdbFTypeToSRangeTyp(ftype); ASSERT(typ < TSDB_SNAP_RANGE_TYP_MAX); STFile* f = fset->farr[ftype]->f; + if (f->maxVer > fset->maxVerValid) { + corrupt = true; + tsdbError("skip incomplete data file: fid:%d, maxVerValid:%" PRId64 ", minVer:%" PRId64 ", maxVer:%" PRId64 + ", ftype: %d", + fset->fid, fset->maxVerValid, f->minVer, f->maxVer, ftype); + continue; + } + count++; SVersionRange vr = {.minVer = f->minVer, .maxVer = f->maxVer}; - TARRAY2_SORT_INSERT(&p->verRanges[typ], vr, tVersionRangeCmprFn); + code = TARRAY2_SORT_INSERT(&p->verRanges[typ], vr, tVersionRangeCmprFn); + ASSERT(code == 0); } typ = TSDB_SNAP_RANGE_TYP_STT; @@ -1234,10 +1250,24 @@ static int32_t tsdbTFileSetToSnapPart(STFileSet* fset, STsdbSnapPartition** ppSP STFileObj* fobj; TARRAY2_FOREACH(lvl->fobjArr, fobj) { STFile* f = fobj->f; + if (f->maxVer > fset->maxVerValid) { + corrupt = true; + tsdbError("skip incomplete stt file.fid:%d, maxVerValid:%" PRId64 ", minVer:%" PRId64 ", maxVer:%" PRId64 + ", ftype: %d", + fset->fid, fset->maxVerValid, f->minVer, f->maxVer, typ); + continue; + } + count++; SVersionRange vr = {.minVer = f->minVer, .maxVer = f->maxVer}; - TARRAY2_SORT_INSERT(&p->verRanges[typ], vr, tVersionRangeCmprFn); + code = TARRAY2_SORT_INSERT(&p->verRanges[typ], vr, tVersionRangeCmprFn); + ASSERT(code == 0); } } + if (corrupt && count == 0) { + SVersionRange vr = {.minVer = VERSION_MIN, .maxVer = fset->maxVerValid}; + code = TARRAY2_SORT_INSERT(&p->verRanges[typ], vr, tVersionRangeCmprFn); + ASSERT(code == 0); + } ppSP[0] = p; return 0; @@ -1272,7 +1302,8 @@ static STsdbSnapPartList* tsdbGetSnapPartList(STFileSystem* fs) { break; } ASSERT(pItem != NULL); - TARRAY2_SORT_INSERT(pList, pItem, tsdbSnapPartCmprFn); + code = TARRAY2_SORT_INSERT(pList, pItem, tsdbSnapPartCmprFn); + ASSERT(code == 0); } taosThreadRwlockUnlock(&fs->tsdb->rwLock); @@ -1432,18 +1463,22 @@ int32_t tsdbSnapPartListToRangeDiff(STsdbSnapPartList* pList, TSnapRangeArray** terrno = TSDB_CODE_OUT_OF_MEMORY; goto _err; } - int64_t ever = -1; + int64_t maxVerValid = -1; int32_t typMax = TSDB_SNAP_RANGE_TYP_MAX; for (int32_t i = 0; i < typMax; i++) { SVerRangeList* iList = &part->verRanges[i]; - SVersionRange r = {0}; - TARRAY2_FOREACH(iList, r) { - if (r.maxVer < r.minVer) continue; - ever = TMAX(ever, r.maxVer); + SVersionRange vr = {0}; + TARRAY2_FOREACH(iList, vr) { + if (vr.maxVer < vr.minVer) { + continue; + } + maxVerValid = TMAX(maxVerValid, vr.maxVer); } } - r->sver = ever + 1; + r->fid = part->fid; + r->sver = maxVerValid + 1; r->ever = VERSION_MAX; + tsdbInfo("range diff fid:%" PRId64 ", sver:%" PRId64 ", ever:%" PRId64, part->fid, r->sver, r->ever); TARRAY2_APPEND(pDiff, r); } ppRanges[0] = pDiff; @@ -1473,14 +1508,16 @@ void tsdbSnapPartListDestroy(STsdbSnapPartList** ppList) { } int32_t tsdbSnapGetInfo(STsdb* pTsdb, SSnapshot* pSnap) { - if (pSnap->typ != TDMT_SYNC_PREP_SNAPSHOT && pSnap->typ != TDMT_SYNC_PREP_SNAPSHOT_REPLY) { + pSnap->state = pTsdb->pFS->fsstate; + if (pSnap->type != TDMT_SYNC_PREP_SNAPSHOT && pSnap->type != TDMT_SYNC_PREP_SNAPSHOT_REPLY) { return 0; } + int code = -1; STsdbSnapPartList* pList = tsdbGetSnapPartList(pTsdb->pFS); if (pList == NULL) goto _out; - if (pSnap->typ == TDMT_SYNC_PREP_SNAPSHOT_REPLY) { + if (pSnap->type == TDMT_SYNC_PREP_SNAPSHOT_REPLY) { } void* buf = NULL; @@ -1499,7 +1536,7 @@ int32_t tsdbSnapGetInfo(STsdb* pTsdb, SSnapshot* pSnap) { // header SSyncTLV* datHead = (void*)pSnap->data; - datHead->typ = pSnap->typ; + datHead->typ = pSnap->type; datHead->len = 0; // tsdb diff --git a/source/dnode/vnode/src/vnd/vnodeOpen.c b/source/dnode/vnode/src/vnd/vnodeOpen.c index fada83a7f1..5084cc2ff5 100644 --- a/source/dnode/vnode/src/vnd/vnodeOpen.c +++ b/source/dnode/vnode/src/vnd/vnodeOpen.c @@ -13,6 +13,8 @@ * along with this program. If not, see . */ +#include "sync.h" +#include "tsdb.h" #include "vnd.h" #include "vndCos.h" @@ -517,10 +519,3 @@ ESyncRole vnodeGetRole(SVnode *pVnode) { return syncGetRole(pVnode->sync); } void vnodeStop(SVnode *pVnode) {} int64_t vnodeGetSyncHandle(SVnode *pVnode) { return pVnode->sync; } - -int32_t vnodeGetSnapshot(SVnode *pVnode, SSnapshot *pSnap) { - pSnap->lastApplyIndex = pVnode->state.committed; - pSnap->lastApplyTerm = pVnode->state.commitTerm; - pSnap->lastConfigIndex = -1; - return tsdbSnapGetInfo(pVnode->pTsdb, pSnap); -} diff --git a/source/dnode/vnode/src/vnd/vnodeSnapshot.c b/source/dnode/vnode/src/vnd/vnodeSnapshot.c index b2fbdc07e9..0874e5e0d8 100644 --- a/source/dnode/vnode/src/vnd/vnodeSnapshot.c +++ b/source/dnode/vnode/src/vnd/vnodeSnapshot.c @@ -537,7 +537,9 @@ int32_t vnodeSnapWriterClose(SVSnapWriter *pWriter, int8_t rollback, SSnapshot * char dir[TSDB_FILENAME_LEN] = {0}; vnodeGetPrimaryDir(pVnode->path, pVnode->diskPrimary, pVnode->pTfs, dir, TSDB_FILENAME_LEN); - vnodeCommitInfo(dir); + code = vnodeCommitInfo(dir); + if (code) goto _exit; + } else { vnodeRollback(pWriter->pVnode); } diff --git a/source/dnode/vnode/src/vnd/vnodeSync.c b/source/dnode/vnode/src/vnd/vnodeSync.c index b73c9b8c65..ba142ddb6d 100644 --- a/source/dnode/vnode/src/vnd/vnodeSync.c +++ b/source/dnode/vnode/src/vnd/vnodeSync.c @@ -15,6 +15,8 @@ #define _DEFAULT_SOURCE #include "tq.h" +#include "sync.h" +#include "tsdb.h" #include "vnd.h" #define BATCH_ENABLE 0 @@ -783,3 +785,14 @@ bool vnodeIsLeader(SVnode *pVnode) { return true; } + +int32_t vnodeGetSnapshot(SVnode *pVnode, SSnapshot *pSnap) { + pSnap->lastApplyIndex = pVnode->state.committed; + pSnap->lastApplyTerm = pVnode->state.commitTerm; + pSnap->lastConfigIndex = -1; + + int32_t code = tsdbSnapGetInfo(pVnode->pTsdb, pSnap); + + pSnap->state = (pSnap->state == TSDB_FS_STATE_INCOMPLETE) ? SYNC_FSM_STATE_INCOMPLETE : SYNC_FSM_STATE_NORMAL; + return code; +} diff --git a/source/libs/sync/inc/syncInt.h b/source/libs/sync/inc/syncInt.h index 870cdd6a72..cec1a12024 100644 --- a/source/libs/sync/inc/syncInt.h +++ b/source/libs/sync/inc/syncInt.h @@ -139,6 +139,7 @@ typedef struct SSyncNode { SSyncFSM* pFsm; int32_t quorum; SRaftId leaderCache; + ESyncFsmState fsmState; // life cycle int64_t rid; diff --git a/source/libs/sync/inc/syncMessage.h b/source/libs/sync/inc/syncMessage.h index c0d3663a8f..9054f47d37 100644 --- a/source/libs/sync/inc/syncMessage.h +++ b/source/libs/sync/inc/syncMessage.h @@ -116,7 +116,7 @@ typedef struct SyncAppendEntriesReply { SyncIndex matchIndex; SyncIndex lastSendIndex; int64_t startTime; - int16_t reserved; + int16_t fsmState; } SyncAppendEntriesReply; typedef struct SyncHeartbeat { diff --git a/source/libs/sync/inc/syncSnapshot.h b/source/libs/sync/inc/syncSnapshot.h index 063b4f51f5..2a19945c5a 100644 --- a/source/libs/sync/inc/syncSnapshot.h +++ b/source/libs/sync/inc/syncSnapshot.h @@ -31,7 +31,7 @@ extern "C" { #define SYNC_SNAPSHOT_RETRY_MS 5000 typedef struct SSyncSnapshotSender { - bool start; + int8_t start; int32_t seq; int32_t ack; void *pReader; @@ -60,8 +60,8 @@ void snapshotSenderStop(SSyncSnapshotSender *pSender, bool finis int32_t snapshotReSend(SSyncSnapshotSender *pSender); typedef struct SSyncSnapshotReceiver { - // update when pre snapshot - bool start; + // update when prep snapshot + int8_t start; int32_t ack; SyncTerm term; SRaftId fromId; diff --git a/source/libs/sync/src/syncAppendEntries.c b/source/libs/sync/src/syncAppendEntries.c index 925988f43a..8ae1dd2a54 100644 --- a/source/libs/sync/src/syncAppendEntries.c +++ b/source/libs/sync/src/syncAppendEntries.c @@ -155,6 +155,13 @@ int32_t syncNodeOnAppendEntries(SSyncNode* ths, const SRpcMsg* pRpcMsg) { pMsg->vgId, pMsg->prevLogIndex + 1, pMsg->term, pMsg->prevLogIndex, pMsg->prevLogTerm, pMsg->commitIndex, pEntry->term); + if (ths->fsmState == SYNC_FSM_STATE_INCOMPLETE) { + pReply->fsmState = ths->fsmState; + sError("vgId:%d, not allow to accept sync log msg due to incomplete fsm state", ths->vgId); + syncEntryDestroy(pEntry); + goto _SEND_RESPONSE; + } + // accept if (syncLogBufferAccept(ths->pLogBuf, ths, pEntry, pMsg->prevLogTerm) < 0) { goto _SEND_RESPONSE; @@ -175,7 +182,7 @@ _SEND_RESPONSE: (void)syncNodeSendMsgById(&pReply->destId, ths, &rpcRsp); // commit index, i.e. leader notice me - if (syncLogBufferCommit(ths->pLogBuf, ths, ths->commitIndex) < 0) { + if (ths->fsmState != SYNC_FSM_STATE_INCOMPLETE && syncLogBufferCommit(ths->pLogBuf, ths, ths->commitIndex) < 0) { sError("vgId:%d, failed to commit raft fsm log since %s.", ths->vgId, terrstr()); } diff --git a/source/libs/sync/src/syncElection.c b/source/libs/sync/src/syncElection.c index 86e28db90c..b4e2049a64 100644 --- a/source/libs/sync/src/syncElection.c +++ b/source/libs/sync/src/syncElection.c @@ -71,6 +71,11 @@ static int32_t syncNodeRequestVotePeers(SSyncNode* pNode) { } int32_t syncNodeElect(SSyncNode* pSyncNode) { + if (pSyncNode->fsmState == SYNC_FSM_STATE_INCOMPLETE) { + sNError(pSyncNode, "ignore leader hb timeout due to incomplete fsm state"); + return -1; + } + sNInfo(pSyncNode, "begin election"); pSyncNode->electNum++; diff --git a/source/libs/sync/src/syncMain.c b/source/libs/sync/src/syncMain.c index eca499cf28..8ddd55d906 100644 --- a/source/libs/sync/src/syncMain.c +++ b/source/libs/sync/src/syncMain.c @@ -1009,6 +1009,10 @@ SSyncNode* syncNodeOpen(SSyncInfo* pSyncInfo, int32_t vnodeVersion) { commitIndex = snapshot.lastApplyIndex; sNTrace(pSyncNode, "reset commit index by snapshot"); } + pSyncNode->fsmState = snapshot.state; + if (pSyncNode->fsmState) { + sError("vgId:%d, fsm state incomplete.", pSyncNode->vgId); + } } pSyncNode->commitIndex = commitIndex; sInfo("vgId:%d, sync node commitIndex initialized as %" PRId64, pSyncNode->vgId, pSyncNode->commitIndex); @@ -1163,7 +1167,8 @@ int32_t syncNodeRestore(SSyncNode* pSyncNode) { pSyncNode->commitIndex = TMAX(pSyncNode->commitIndex, commitIndex); sInfo("vgId:%d, restore sync until commitIndex:%" PRId64, pSyncNode->vgId, pSyncNode->commitIndex); - if (syncLogBufferCommit(pSyncNode->pLogBuf, pSyncNode, pSyncNode->commitIndex) < 0) { + if (pSyncNode->fsmState != SYNC_FSM_STATE_INCOMPLETE && + syncLogBufferCommit(pSyncNode->pLogBuf, pSyncNode, pSyncNode->commitIndex) < 0) { return -1; } @@ -1455,10 +1460,9 @@ int32_t syncNodeSendMsgById(const SRaftId* destRaftId, SSyncNode* pNode, SRpcMsg } if (code < 0) { - sError("vgId:%d, sync send msg by id error, epset:%p dnode:%d addr:%" PRId64 " err:0x%x", pNode->vgId, epSet, - DID(destRaftId), destRaftId->addr, terrno); + sError("vgId:%d, failed to send sync msg since %s. epset:%p dnode:%d addr:%" PRId64, pNode->vgId, terrstr(), epSet, + DID(destRaftId), destRaftId->addr); rpcFreeCont(pMsg->pCont); - terrno = TSDB_CODE_SYN_INTERNAL_ERROR; } return code; @@ -2895,7 +2899,7 @@ _out:; // single replica (void)syncNodeUpdateCommitIndex(ths, matchIndex); - if (syncLogBufferCommit(ths->pLogBuf, ths, ths->commitIndex) < 0) { + if (ths->fsmState != SYNC_FSM_STATE_INCOMPLETE && syncLogBufferCommit(ths->pLogBuf, ths, ths->commitIndex) < 0) { sError("vgId:%d, failed to commit until commitIndex:%" PRId64 "", ths->vgId, ths->commitIndex); code = -1; } @@ -3139,7 +3143,7 @@ int32_t syncNodeOnLocalCmd(SSyncNode* ths, const SRpcMsg* pRpcMsg) { if (pMsg->currentTerm == matchTerm) { (void)syncNodeUpdateCommitIndex(ths, pMsg->commitIndex); } - if (syncLogBufferCommit(ths->pLogBuf, ths, ths->commitIndex) < 0) { + if (ths->fsmState != SYNC_FSM_STATE_INCOMPLETE && syncLogBufferCommit(ths->pLogBuf, ths, ths->commitIndex) < 0) { sError("vgId:%d, failed to commit raft log since %s. commit index:%" PRId64 "", ths->vgId, terrstr(), ths->commitIndex); } diff --git a/source/libs/sync/src/syncPipeline.c b/source/libs/sync/src/syncPipeline.c index 019f8f7e62..a38d67a388 100644 --- a/source/libs/sync/src/syncPipeline.c +++ b/source/libs/sync/src/syncPipeline.c @@ -839,14 +839,16 @@ int32_t syncLogReplRecover(SSyncLogReplMgr* pMgr, SSyncNode* pNode, SyncAppendEn return 0; } - if (pMsg->success == false && pMsg->matchIndex >= pMsg->lastSendIndex) { - sWarn("vgId:%d, failed to rollback match index. peer: dnode:%d, match index:%" PRId64 ", last sent:%" PRId64, - pNode->vgId, DID(&destId), pMsg->matchIndex, pMsg->lastSendIndex); + if (pMsg->fsmState == SYNC_FSM_STATE_INCOMPLETE || (!pMsg->success && pMsg->matchIndex >= pMsg->lastSendIndex)) { + char* msg1 = "rollback match index failure"; + char* msg2 = "incomplete fsm state"; + sInfo("vgId:%d, snapshot replication to dnode:%d. reason:%s, match index:%" PRId64 ", last sent:%" PRId64, + pNode->vgId, DID(&destId), (pMsg->fsmState == SYNC_FSM_STATE_INCOMPLETE ? msg2 : msg1), pMsg->matchIndex, + pMsg->lastSendIndex); if (syncNodeStartSnapshot(pNode, &destId) < 0) { sError("vgId:%d, failed to start snapshot for peer dnode:%d", pNode->vgId, DID(&destId)); return -1; } - sInfo("vgId:%d, snapshot replication to peer dnode:%d", pNode->vgId, DID(&destId)); return 0; } } @@ -1000,10 +1002,9 @@ int32_t syncLogReplAttempt(SSyncLogReplMgr* pMgr, SSyncNode* pNode) { pMgr->endIndex = index + 1; if (barrier) { - sInfo("vgId:%d, replicated sync barrier to dest:%" PRIx64 ". index:%" PRId64 ", term:%" PRId64 + sInfo("vgId:%d, replicated sync barrier to dnode:%d. index:%" PRId64 ", term:%" PRId64 ", repl mgr: rs(%d) [%" PRId64 " %" PRId64 ", %" PRId64 ")", - pNode->vgId, pDestId->addr, index, term, pMgr->restored, pMgr->startIndex, pMgr->matchIndex, - pMgr->endIndex); + pNode->vgId, DID(pDestId), index, term, pMgr->restored, pMgr->startIndex, pMgr->matchIndex, pMgr->endIndex); break; } } diff --git a/source/libs/sync/src/syncSnapshot.c b/source/libs/sync/src/syncSnapshot.c index 99e8fd55a2..383fda89b0 100644 --- a/source/libs/sync/src/syncSnapshot.c +++ b/source/libs/sync/src/syncSnapshot.c @@ -44,8 +44,8 @@ SSyncSnapshotSender *snapshotSenderCreate(SSyncNode *pSyncNode, int32_t replicaI pSender->pSyncNode = pSyncNode; pSender->replicaIndex = replicaIndex; pSender->term = raftStoreGetTerm(pSyncNode); - pSender->startTime = 0; - pSender->endTime = 0; + pSender->startTime = -1; + pSender->endTime = -1; pSender->pSyncNode->pFsm->FpGetSnapshotInfo(pSender->pSyncNode->pFsm, &pSender->snapshot); pSender->finish = false; @@ -71,11 +71,16 @@ void snapshotSenderDestroy(SSyncSnapshotSender *pSender) { taosMemoryFree(pSender); } -bool snapshotSenderIsStart(SSyncSnapshotSender *pSender) { return pSender->start; } +bool snapshotSenderIsStart(SSyncSnapshotSender *pSender) { return atomic_load_8(&pSender->start); } int32_t snapshotSenderStart(SSyncSnapshotSender *pSender) { int32_t code = -1; - pSender->start = true; + + int8_t started = atomic_val_compare_exchange_8(&pSender->start, false, true); + if (started) return 0; + + taosMsleep(1); + pSender->seq = SYNC_SNAPSHOT_SEQ_BEGIN; pSender->ack = SYNC_SNAPSHOT_SEQ_INVALID; pSender->pReader = NULL; @@ -92,13 +97,13 @@ int32_t snapshotSenderStart(SSyncSnapshotSender *pSender) { memset(&pSender->lastConfig, 0, sizeof(pSender->lastConfig)); pSender->sendingMS = 0; pSender->term = raftStoreGetTerm(pSender->pSyncNode); - pSender->startTime = taosGetTimestampMs(); + pSender->startTime = taosGetMonoTimestampMs(); pSender->lastSendTime = pSender->startTime; pSender->finish = false; // Get full snapshot info SSyncNode *pSyncNode = pSender->pSyncNode; - SSnapshot snapInfo = {.typ = TDMT_SYNC_PREP_SNAPSHOT}; + SSnapshot snapInfo = {.type = TDMT_SYNC_PREP_SNAPSHOT}; if (pSyncNode->pFsm->FpGetSnapshotInfo(pSyncNode->pFsm, &snapInfo) != 0) { sSError(pSender, "snapshot get info failure since %s", terrstr()); goto _out; @@ -130,11 +135,11 @@ int32_t snapshotSenderStart(SSyncSnapshotSender *pSender) { pMsg->lastTerm = pSender->snapshot.lastApplyTerm; pMsg->lastConfigIndex = pSender->snapshot.lastConfigIndex; pMsg->lastConfig = pSender->lastConfig; - pMsg->startTime = pSender->startTime; + pMsg->startTime = atomic_load_64(&pSender->startTime); pMsg->seq = SYNC_SNAPSHOT_SEQ_PREP_SNAPSHOT; if (dataLen > 0) { - pMsg->payloadType = snapInfo.typ; + pMsg->payloadType = snapInfo.type; memcpy(pMsg->data, snapInfo.data, dataLen); } @@ -160,7 +165,9 @@ void snapshotSenderStop(SSyncSnapshotSender *pSender, bool finish) { sSDebug(pSender, "snapshot sender stop, finish:%d reader:%p", finish, pSender->pReader); // update flag - pSender->start = false; + int8_t stopped = !atomic_val_compare_exchange_8(&pSender->start, true, false); + if (stopped) return; + pSender->finish = finish; pSender->endTime = taosGetTimestampMs(); @@ -223,6 +230,7 @@ static int32_t snapshotSend(SSyncSnapshotSender *pSender) { pMsg->lastTerm = pSender->snapshot.lastApplyTerm; pMsg->lastConfigIndex = pSender->snapshot.lastConfigIndex; pMsg->lastConfig = pSender->lastConfig; + pMsg->startTime = pSender->startTime; pMsg->seq = pSender->seq; if (pSender->pCurrentBlock != NULL) { @@ -286,7 +294,7 @@ int32_t snapshotReSend(SSyncSnapshotSender *pSender) { static int32_t snapshotSenderUpdateProgress(SSyncSnapshotSender *pSender, SyncSnapshotRsp *pMsg) { if (pMsg->ack != pSender->seq) { sSError(pSender, "snapshot sender update seq failed, ack:%d seq:%d", pMsg->ack, pSender->seq); - terrno = TSDB_CODE_SYN_INTERNAL_ERROR; + terrno = TSDB_CODE_SYN_MISMATCHED_SIGNATURE; return -1; } @@ -301,8 +309,6 @@ static int32_t snapshotSenderUpdateProgress(SSyncSnapshotSender *pSender, SyncSn // return 1, last snapshot finish ok // return -1, error int32_t syncNodeStartSnapshot(SSyncNode *pSyncNode, SRaftId *pDestId) { - sNInfo(pSyncNode, "snapshot sender starting ..."); - SSyncSnapshotSender *pSender = syncNodeGetSnapshotSender(pSyncNode, pDestId); if (pSender == NULL) { sNError(pSyncNode, "snapshot sender start error since get failed"); @@ -310,12 +316,12 @@ int32_t syncNodeStartSnapshot(SSyncNode *pSyncNode, SRaftId *pDestId) { } if (snapshotSenderIsStart(pSender)) { - sSInfo(pSender, "snapshot sender already start, ignore"); + sSDebug(pSender, "snapshot sender already start, ignore"); return 0; } if (pSender->finish && taosGetTimestampMs() - pSender->endTime < SNAPSHOT_WAIT_MS) { - sSInfo(pSender, "snapshot sender start too frequently, ignore"); + sSDebug(pSender, "snapshot sender start too frequently, ignore"); return 0; } @@ -342,6 +348,7 @@ SSyncSnapshotReceiver *snapshotReceiverCreate(SSyncNode *pSyncNode, SRaftId from } pReceiver->start = false; + pReceiver->startTime = 0; pReceiver->ack = SYNC_SNAPSHOT_SEQ_BEGIN; pReceiver->pWriter = NULL; pReceiver->pSyncNode = pSyncNode; @@ -384,7 +391,7 @@ void snapshotReceiverDestroy(SSyncSnapshotReceiver *pReceiver) { } bool snapshotReceiverIsStart(SSyncSnapshotReceiver *pReceiver) { - return (pReceiver != NULL ? pReceiver->start : false); + return (pReceiver != NULL ? atomic_load_8(&pReceiver->start) : false); } static int32_t snapshotReceiverStartWriter(SSyncSnapshotReceiver *pReceiver, SyncSnapshotSend *pBeginMsg) { @@ -423,11 +430,14 @@ void snapshotReceiverStart(SSyncSnapshotReceiver *pReceiver, SyncSnapshotSend *p return; } - pReceiver->start = true; + int8_t started = atomic_val_compare_exchange_8(&pReceiver->start, false, true); + if (started) return; + pReceiver->ack = SYNC_SNAPSHOT_SEQ_PREP_SNAPSHOT; pReceiver->term = raftStoreGetTerm(pReceiver->pSyncNode); pReceiver->fromId = pPreMsg->srcId; pReceiver->startTime = pPreMsg->startTime; + ASSERT(pReceiver->startTime); // event log sRInfo(pReceiver, "snapshot receiver is start"); @@ -438,6 +448,9 @@ void snapshotReceiverStart(SSyncSnapshotReceiver *pReceiver, SyncSnapshotSend *p void snapshotReceiverStop(SSyncSnapshotReceiver *pReceiver) { sRInfo(pReceiver, "snapshot receiver stop, not apply, writer:%p", pReceiver->pWriter); + int8_t stopped = !atomic_val_compare_exchange_8(&pReceiver->start, true, false); + if (stopped) return; + if (pReceiver->pWriter != NULL) { int32_t ret = pReceiver->pSyncNode->pFsm->FpSnapshotStopWrite(pReceiver->pSyncNode->pFsm, pReceiver->pWriter, false, &pReceiver->snapshot); @@ -448,8 +461,6 @@ void snapshotReceiverStop(SSyncSnapshotReceiver *pReceiver) { } else { sRInfo(pReceiver, "snapshot receiver stop, writer is null"); } - - pReceiver->start = false; } // when recv last snapshot block, apply data into snapshot @@ -499,6 +510,10 @@ static int32_t snapshotReceiverFinish(SSyncSnapshotReceiver *pReceiver, SyncSnap // update progress pReceiver->ack = SYNC_SNAPSHOT_SEQ_END; + SSnapshot snapshot = {0}; + pReceiver->pSyncNode->pFsm->FpGetSnapshotInfo(pReceiver->pSyncNode->pFsm, &snapshot); + pReceiver->pSyncNode->fsmState = snapshot.state; + } else { sRError(pReceiver, "snapshot receiver finish error since writer is null"); return -1; @@ -582,7 +597,7 @@ static int32_t syncNodeOnSnapshotPrep(SSyncNode *pSyncNode, SyncSnapshotSend *pM // ignore sRError(pReceiver, "snapshot receiver startTime:%" PRId64 " < msg startTime:%" PRId64 " ignore", pReceiver->startTime, pMsg->startTime); - terrno = TSDB_CODE_SYN_INTERNAL_ERROR; + terrno = TSDB_CODE_SYN_MISMATCHED_SIGNATURE; code = terrno; goto _SEND_REPLY; } @@ -593,33 +608,18 @@ static int32_t syncNodeOnSnapshotPrep(SSyncNode *pSyncNode, SyncSnapshotSend *pM } _START_RECEIVER: - if (timeNow - pMsg->startTime > SNAPSHOT_MAX_CLOCK_SKEW_MS) { - sRError(pReceiver, "snapshot receiver time skew too much, now:%" PRId64 " msg startTime:%" PRId64, timeNow, - pMsg->startTime); - terrno = TSDB_CODE_SYN_INTERNAL_ERROR; - code = terrno; - } else { - // waiting for clock match - while (timeNow < pMsg->startTime) { - sRInfo(pReceiver, "snapshot receiver pre waitting for true time, now:%" PRId64 ", startTime:%" PRId64, timeNow, - pMsg->startTime); - taosMsleep(10); - timeNow = taosGetTimestampMs(); - } - - if (snapshotReceiverIsStart(pReceiver)) { - sRInfo(pReceiver, "snapshot receiver already start and force stop pre one"); - snapshotReceiverStop(pReceiver); - } - - snapshotReceiverStart(pReceiver, pMsg); // set start-time same with sender + if (snapshotReceiverIsStart(pReceiver)) { + sRInfo(pReceiver, "snapshot receiver already start and force stop pre one"); + snapshotReceiverStop(pReceiver); } + snapshotReceiverStart(pReceiver, pMsg); // set start-time same with sender + _SEND_REPLY: // build msg ; // make complier happy - SSnapshot snapInfo = {.typ = TDMT_SYNC_PREP_SNAPSHOT_REPLY}; + SSnapshot snapInfo = {.type = TDMT_SYNC_PREP_SNAPSHOT_REPLY}; int32_t dataLen = 0; if (pMsg->dataLen > 0) { void *data = taosMemoryCalloc(1, pMsg->dataLen); @@ -655,13 +655,15 @@ _SEND_REPLY: pRspMsg->term = raftStoreGetTerm(pSyncNode); pRspMsg->lastIndex = pMsg->lastIndex; pRspMsg->lastTerm = pMsg->lastTerm; - pRspMsg->startTime = pReceiver->startTime; + pRspMsg->startTime = pMsg->startTime; pRspMsg->ack = pMsg->seq; // receiver maybe already closed pRspMsg->code = code; pRspMsg->snapBeginIndex = syncNodeGetSnapBeginIndex(pSyncNode); + ASSERT(pRspMsg->startTime); + if (snapInfo.data) { - pRspMsg->payloadType = snapInfo.typ; + pRspMsg->payloadType = snapInfo.type; memcpy(pRspMsg->data, snapInfo.data, dataLen); // save snapshot info @@ -704,6 +706,7 @@ static int32_t syncNodeOnSnapshotBegin(SSyncNode *pSyncNode, SyncSnapshotSend *p if (pReceiver->startTime != pMsg->startTime) { sRError(pReceiver, "snapshot receiver begin failed since startTime:%" PRId64 " not equal to msg startTime:%" PRId64, pReceiver->startTime, pMsg->startTime); + terrno = TSDB_CODE_SYN_MISMATCHED_SIGNATURE; goto _SEND_REPLY; } @@ -732,11 +735,13 @@ _SEND_REPLY: pRspMsg->term = raftStoreGetTerm(pSyncNode); pRspMsg->lastIndex = pMsg->lastIndex; pRspMsg->lastTerm = pMsg->lastTerm; - pRspMsg->startTime = pReceiver->startTime; + pRspMsg->startTime = pMsg->startTime; pRspMsg->ack = pReceiver->ack; // receiver maybe already closed pRspMsg->code = code; pRspMsg->snapBeginIndex = pReceiver->snapshotParam.start; + ASSERT(pRspMsg->startTime); + // send msg syncLogSendSyncSnapshotRsp(pSyncNode, pRspMsg, "snapshot receiver begin"); if (syncNodeSendMsgById(&pRspMsg->destId, pSyncNode, &rpcMsg) != 0) { @@ -751,17 +756,17 @@ static int32_t syncNodeOnSnapshotReceive(SSyncNode *pSyncNode, SyncSnapshotSend // condition 4 // transfering SSyncSnapshotReceiver *pReceiver = pSyncNode->pNewNodeReceiver; - - // waiting for clock match int64_t timeNow = taosGetTimestampMs(); - while (timeNow < pMsg->startTime) { - sRInfo(pReceiver, "snapshot receiver receiving waitting for true time, now:%" PRId64 ", stime:%" PRId64, timeNow, - pMsg->startTime); - taosMsleep(10); - timeNow = taosGetTimestampMs(); + int32_t code = 0; + + if (pReceiver->startTime != pMsg->startTime) { + sRError(pReceiver, "snapshot receive failed since startTime:%" PRId64 " not equal to msg startTime:%" PRId64, + pReceiver->startTime, pMsg->startTime); + terrno = TSDB_CODE_SYN_MISMATCHED_SIGNATURE; + code = terrno; + goto _SEND_REPLY; } - int32_t code = 0; if (snapshotReceiverGotData(pReceiver, pMsg) != 0) { code = terrno; if (code >= SYNC_SNAPSHOT_SEQ_INVALID) { @@ -769,6 +774,7 @@ static int32_t syncNodeOnSnapshotReceive(SSyncNode *pSyncNode, SyncSnapshotSend } } +_SEND_REPLY: // build msg SRpcMsg rpcMsg = {0}; if (syncBuildSnapshotSendRsp(&rpcMsg, 0, pSyncNode->vgId)) { @@ -782,11 +788,12 @@ static int32_t syncNodeOnSnapshotReceive(SSyncNode *pSyncNode, SyncSnapshotSend pRspMsg->term = raftStoreGetTerm(pSyncNode); pRspMsg->lastIndex = pMsg->lastIndex; pRspMsg->lastTerm = pMsg->lastTerm; - pRspMsg->startTime = pReceiver->startTime; + pRspMsg->startTime = pMsg->startTime; pRspMsg->ack = pReceiver->ack; // receiver maybe already closed pRspMsg->code = code; pRspMsg->snapBeginIndex = pReceiver->snapshotParam.start; + ASSERT(pRspMsg->startTime); // send msg syncLogSendSyncSnapshotRsp(pSyncNode, pRspMsg, "snapshot receiver received"); if (syncNodeSendMsgById(&pRspMsg->destId, pSyncNode, &rpcMsg) != 0) { @@ -801,21 +808,23 @@ static int32_t syncNodeOnSnapshotEnd(SSyncNode *pSyncNode, SyncSnapshotSend *pMs // condition 2 // end, finish FSM SSyncSnapshotReceiver *pReceiver = pSyncNode->pNewNodeReceiver; - - // waiting for clock match int64_t timeNow = taosGetTimestampMs(); - while (timeNow < pMsg->startTime) { - sRInfo(pReceiver, "snapshot receiver finish waitting for true time, now:%" PRId64 ", stime:%" PRId64, timeNow, - pMsg->startTime); - taosMsleep(10); - timeNow = taosGetTimestampMs(); + int32_t code = 0; + + if (pReceiver->startTime != pMsg->startTime) { + sRError(pReceiver, "snapshot end failed since startTime:%" PRId64 " not equal to msg startTime:%" PRId64, + pReceiver->startTime, pMsg->startTime); + terrno = TSDB_CODE_SYN_MISMATCHED_SIGNATURE; + code = terrno; + goto _SEND_REPLY; } - int32_t code = snapshotReceiverFinish(pReceiver, pMsg); + code = snapshotReceiverFinish(pReceiver, pMsg); if (code == 0) { snapshotReceiverStop(pReceiver); } +_SEND_REPLY: // build msg SRpcMsg rpcMsg = {0}; if (syncBuildSnapshotSendRsp(&rpcMsg, 0, pSyncNode->vgId) != 0) { @@ -829,7 +838,7 @@ static int32_t syncNodeOnSnapshotEnd(SSyncNode *pSyncNode, SyncSnapshotSend *pMs pRspMsg->term = raftStoreGetTerm(pSyncNode); pRspMsg->lastIndex = pMsg->lastIndex; pRspMsg->lastTerm = pMsg->lastTerm; - pRspMsg->startTime = pReceiver->startTime; + pRspMsg->startTime = pMsg->startTime; pRspMsg->ack = pReceiver->ack; // receiver maybe already closed pRspMsg->code = code; pRspMsg->snapBeginIndex = pReceiver->snapshotParam.start; @@ -945,13 +954,6 @@ static int32_t syncNodeOnSnapshotPrepRsp(SSyncNode *pSyncNode, SSyncSnapshotSend sSInfo(pSender, "prepare snapshot, recv-begin:%" PRId64 ", snapshot.last:%" PRId64 ", snapshot.term:%" PRId64, pMsg->snapBeginIndex, snapshot.lastApplyIndex, snapshot.lastApplyTerm); - if (pMsg->snapBeginIndex > snapshot.lastApplyIndex) { - sSError(pSender, "prepare snapshot failed since beginIndex:%" PRId64 " larger than applyIndex:%" PRId64, - pMsg->snapBeginIndex, snapshot.lastApplyIndex); - terrno = TSDB_CODE_SYN_INTERNAL_ERROR; - return -1; - } - // update sender pSender->snapshot = snapshot; @@ -964,6 +966,7 @@ static int32_t syncNodeOnSnapshotPrepRsp(SSyncNode *pSyncNode, SSyncSnapshotSend return -1; } pSender->snapshotParam.data = (void *)pMsg->data; + sSInfo(pSender, "data of snapshot param. len: %d", datHead->len); } int32_t code = pSyncNode->pFsm->FpSnapshotStartRead(pSyncNode->pFsm, &pSender->snapshotParam, &pSender->pReader); @@ -997,6 +1000,11 @@ static int32_t syncNodeOnSnapshotPrepRsp(SSyncNode *pSyncNode, SSyncSnapshotSend pSendMsg->startTime = pSender->startTime; pSendMsg->seq = SYNC_SNAPSHOT_SEQ_BEGIN; + ASSERT(pSendMsg->startTime); + + sSInfo(pSender, "begin snapshot replication to dnode %d. startTime:%" PRId64, DID(&pSendMsg->destId), + pSendMsg->startTime); + // send msg syncLogSendSyncSnapshotSend(pSyncNode, pSendMsg, "snapshot sender reply pre"); if (syncNodeSendMsgById(&pSendMsg->destId, pSender->pSyncNode, &rpcMsg) != 0) { @@ -1019,7 +1027,7 @@ int32_t syncNodeOnSnapshotRsp(SSyncNode *pSyncNode, const SRpcMsg *pRpcMsg) { // if already drop replica, do not process if (!syncNodeInRaftGroup(pSyncNode, &pMsg->srcId)) { syncLogRecvSyncSnapshotRsp(pSyncNode, pMsg, "maybe replica already dropped"); - terrno = TSDB_CODE_SYN_INTERNAL_ERROR; + terrno = TSDB_CODE_SYN_MISMATCHED_SIGNATURE; return -1; } @@ -1031,6 +1039,25 @@ int32_t syncNodeOnSnapshotRsp(SSyncNode *pSyncNode, const SRpcMsg *pRpcMsg) { return -1; } + if (!snapshotSenderIsStart(pSender)) { + sSError(pSender, "snapshot sender not started yet. sender startTime:%" PRId64 ", msg startTime:%" PRId64, + pSender->startTime, pMsg->startTime); + return -1; + } + + if (pMsg->startTime < pSender->startTime) { + sSError(pSender, "ignore stale rsp received. sender startTime:%" PRId64 ", msg startTime:%" PRId64, + pSender->startTime, pMsg->startTime); + terrno = pMsg->code; + return -1; + } else if (pMsg->startTime > pSender->startTime) { + sSError(pSender, "unexpected start time in msg. sender startTime:%" PRId64 ", msg startTime:%" PRId64, + pSender->startTime, pMsg->startTime); + goto _ERROR; + } + + ASSERT(pMsg->startTime == pSender->startTime); + // state, term, seq/ack if (pSyncNode->state != TAOS_SYNC_STATE_LEADER) { syncLogRecvSyncSnapshotRsp(pSyncNode, pMsg, "snapshot sender not leader"); @@ -1039,20 +1066,12 @@ int32_t syncNodeOnSnapshotRsp(SSyncNode *pSyncNode, const SRpcMsg *pRpcMsg) { goto _ERROR; } - if (pMsg->startTime != pSender->startTime) { - syncLogRecvSyncSnapshotRsp(pSyncNode, pMsg, "snapshot sender and receiver time not match"); - sSError(pSender, "sender:%" PRId64 " receiver:%" PRId64 " time not match, error:%s 0x%x", pMsg->startTime, - pSender->startTime, tstrerror(pMsg->code), pMsg->code); - terrno = TSDB_CODE_SYN_INTERNAL_ERROR; - goto _ERROR; - } - SyncTerm currentTerm = raftStoreGetTerm(pSyncNode); if (pMsg->term != currentTerm) { syncLogRecvSyncSnapshotRsp(pSyncNode, pMsg, "snapshot sender and receiver term not match"); sSError(pSender, "snapshot sender term not equal, msg term:%" PRId64 " currentTerm:%" PRId64, pMsg->term, currentTerm); - terrno = TSDB_CODE_SYN_INTERNAL_ERROR; + terrno = TSDB_CODE_SYN_MISMATCHED_SIGNATURE; goto _ERROR; } diff --git a/source/util/src/terror.c b/source/util/src/terror.c index 383e4e9d8a..4cc86d51b7 100644 --- a/source/util/src/terror.c +++ b/source/util/src/terror.c @@ -440,6 +440,7 @@ TAOS_DEFINE_ERROR(TSDB_CODE_GRANT_GEN_ENC_IVLD_KLEN, "Invalid klen to encod // sync TAOS_DEFINE_ERROR(TSDB_CODE_SYN_TIMEOUT, "Sync timeout") +TAOS_DEFINE_ERROR(TSDB_CODE_SYN_MISMATCHED_SIGNATURE, "Sync signature mismatch") TAOS_DEFINE_ERROR(TSDB_CODE_SYN_NOT_LEADER, "Sync leader is unreachable") TAOS_DEFINE_ERROR(TSDB_CODE_SYN_NEW_CONFIG_ERROR, "Sync new config error") TAOS_DEFINE_ERROR(TSDB_CODE_SYN_PROPOSE_NOT_READY, "Sync not ready to propose") From e901adfdf3653c224ffc24da86bd4bafc560ebff Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Wed, 20 Sep 2023 14:27:05 +0800 Subject: [PATCH 33/61] enh: check snapshot receiver and sender by signature --- source/libs/sync/src/syncSnapshot.c | 79 ++++++++++++++++++----------- 1 file changed, 50 insertions(+), 29 deletions(-) diff --git a/source/libs/sync/src/syncSnapshot.c b/source/libs/sync/src/syncSnapshot.c index 383fda89b0..59b33d20cc 100644 --- a/source/libs/sync/src/syncSnapshot.c +++ b/source/libs/sync/src/syncSnapshot.c @@ -79,8 +79,6 @@ int32_t snapshotSenderStart(SSyncSnapshotSender *pSender) { int8_t started = atomic_val_compare_exchange_8(&pSender->start, false, true); if (started) return 0; - taosMsleep(1); - pSender->seq = SYNC_SNAPSHOT_SEQ_BEGIN; pSender->ack = SYNC_SNAPSHOT_SEQ_INVALID; pSender->pReader = NULL; @@ -394,6 +392,14 @@ bool snapshotReceiverIsStart(SSyncSnapshotReceiver *pReceiver) { return (pReceiver != NULL ? atomic_load_8(&pReceiver->start) : false); } +static int32_t snapshotReceiverSignatureCmp(SSyncSnapshotReceiver *pReceiver, SyncSnapshotSend *pMsg) { + if (pReceiver->term < pMsg->term) return -1; + if (pReceiver->term > pMsg->term) return 1; + if (pReceiver->startTime < pMsg->startTime) return -1; + if (pReceiver->startTime > pMsg->startTime) return 1; + return 0; +} + static int32_t snapshotReceiverStartWriter(SSyncSnapshotReceiver *pReceiver, SyncSnapshotSend *pBeginMsg) { if (pReceiver->pWriter != NULL) { sRError(pReceiver, "vgId:%d, snapshot receiver writer is not null", pReceiver->pSyncNode->vgId); @@ -434,7 +440,7 @@ void snapshotReceiverStart(SSyncSnapshotReceiver *pReceiver, SyncSnapshotSend *p if (started) return; pReceiver->ack = SYNC_SNAPSHOT_SEQ_PREP_SNAPSHOT; - pReceiver->term = raftStoreGetTerm(pReceiver->pSyncNode); + pReceiver->term = pPreMsg->term; pReceiver->fromId = pPreMsg->srcId; pReceiver->startTime = pPreMsg->startTime; ASSERT(pReceiver->startTime); @@ -585,18 +591,25 @@ static int32_t syncNodeOnSnapshotPrep(SSyncNode *pSyncNode, SyncSnapshotSend *pM if (snapshotReceiverIsStart(pReceiver)) { // already start - if (pMsg->startTime > pReceiver->startTime) { - sRInfo(pReceiver, "snapshot receiver startTime:%" PRId64 " > msg startTime:%" PRId64 " start receiver", - pReceiver->startTime, pMsg->startTime); + int32_t order = 0; + if ((order = snapshotReceiverSignatureCmp(pReceiver, pMsg)) < 0) { + sRInfo(pReceiver, + "received a new snapshot preparation. restart receiver" + "receiver signature: (%" PRId64 ", %" PRId64 "), msg signature:(%" PRId64 ", %" PRId64 ")", + pReceiver->term, pReceiver->startTime, pMsg->term, pMsg->startTime); goto _START_RECEIVER; - } else if (pMsg->startTime == pReceiver->startTime) { - sRInfo(pReceiver, "snapshot receiver startTime:%" PRId64 " == msg startTime:%" PRId64 " send reply", - pReceiver->startTime, pMsg->startTime); + } else if (order == 0) { + sRInfo(pReceiver, + "received a duplicate snapshot preparation. send reply" + "receiver signature: (%" PRId64 ", %" PRId64 "), msg signature:(%" PRId64 ", %" PRId64 ")", + pReceiver->term, pReceiver->startTime, pMsg->term, pMsg->startTime); goto _SEND_REPLY; } else { // ignore - sRError(pReceiver, "snapshot receiver startTime:%" PRId64 " < msg startTime:%" PRId64 " ignore", - pReceiver->startTime, pMsg->startTime); + sRError(pReceiver, + "received a stale snapshot preparation. ignore" + "receiver signature: (%" PRId64 ", %" PRId64 "), msg signature:(%" PRId64 ", %" PRId64 ")", + pReceiver->term, pReceiver->startTime, pMsg->term, pMsg->startTime); terrno = TSDB_CODE_SYN_MISMATCHED_SIGNATURE; code = terrno; goto _SEND_REPLY; @@ -703,10 +716,9 @@ static int32_t syncNodeOnSnapshotBegin(SSyncNode *pSyncNode, SyncSnapshotSend *p goto _SEND_REPLY; } - if (pReceiver->startTime != pMsg->startTime) { - sRError(pReceiver, "snapshot receiver begin failed since startTime:%" PRId64 " not equal to msg startTime:%" PRId64, - pReceiver->startTime, pMsg->startTime); + if (snapshotReceiverSignatureCmp(pReceiver, pMsg) != 0) { terrno = TSDB_CODE_SYN_MISMATCHED_SIGNATURE; + sRError(pReceiver, "snapshot receiver begin failed since %s", terrstr()); goto _SEND_REPLY; } @@ -759,10 +771,9 @@ static int32_t syncNodeOnSnapshotReceive(SSyncNode *pSyncNode, SyncSnapshotSend int64_t timeNow = taosGetTimestampMs(); int32_t code = 0; - if (pReceiver->startTime != pMsg->startTime) { - sRError(pReceiver, "snapshot receive failed since startTime:%" PRId64 " not equal to msg startTime:%" PRId64, - pReceiver->startTime, pMsg->startTime); + if (snapshotReceiverSignatureCmp(pReceiver, pMsg) != 0) { terrno = TSDB_CODE_SYN_MISMATCHED_SIGNATURE; + sRError(pReceiver, "snapshot receive failed since %s.", terrstr()); code = terrno; goto _SEND_REPLY; } @@ -811,7 +822,7 @@ static int32_t syncNodeOnSnapshotEnd(SSyncNode *pSyncNode, SyncSnapshotSend *pMs int64_t timeNow = taosGetTimestampMs(); int32_t code = 0; - if (pReceiver->startTime != pMsg->startTime) { + if (snapshotReceiverSignatureCmp(pReceiver, pMsg) != 0) { sRError(pReceiver, "snapshot end failed since startTime:%" PRId64 " not equal to msg startTime:%" PRId64, pReceiver->startTime, pMsg->startTime); terrno = TSDB_CODE_SYN_MISMATCHED_SIGNATURE; @@ -880,13 +891,13 @@ int32_t syncNodeOnSnapshot(SSyncNode *pSyncNode, const SRpcMsg *pRpcMsg) { // if already drop replica, do not process if (!syncNodeInRaftGroup(pSyncNode, &pMsg->srcId)) { syncLogRecvSyncSnapshotSend(pSyncNode, pMsg, "not in my config"); - terrno = TSDB_CODE_SYN_INTERNAL_ERROR; + terrno = TSDB_CODE_SYN_MISMATCHED_SIGNATURE; return -1; } if (pMsg->term < raftStoreGetTerm(pSyncNode)) { syncLogRecvSyncSnapshotSend(pSyncNode, pMsg, "reject since small term"); - terrno = TSDB_CODE_SYN_INTERNAL_ERROR; + terrno = TSDB_CODE_SYN_MISMATCHED_SIGNATURE; return -1; } @@ -1015,6 +1026,14 @@ static int32_t syncNodeOnSnapshotPrepRsp(SSyncNode *pSyncNode, SSyncSnapshotSend return 0; } +static int32_t snapshotSenderSignatureCmp(SSyncSnapshotSender *pSender, SyncSnapshotRsp *pMsg) { + if (pSender->term < pMsg->term) return -1; + if (pSender->term > pMsg->term) return 1; + if (pSender->startTime < pMsg->startTime) return -1; + if (pSender->startTime > pMsg->startTime) return 1; + return 0; +} + // sender on message // // condition 1 sender receives SYNC_SNAPSHOT_SEQ_END, close sender @@ -1045,19 +1064,21 @@ int32_t syncNodeOnSnapshotRsp(SSyncNode *pSyncNode, const SRpcMsg *pRpcMsg) { return -1; } - if (pMsg->startTime < pSender->startTime) { - sSError(pSender, "ignore stale rsp received. sender startTime:%" PRId64 ", msg startTime:%" PRId64, - pSender->startTime, pMsg->startTime); - terrno = pMsg->code; + // check signature + int32_t order = 0; + if ((order = snapshotSenderSignatureCmp(pSender, pMsg)) > 0) { + sSError(pSender, + "received a stale snapshot rsp. ignore it" + "sender signature: (%" PRId64 ", %" PRId64 "), msg signature:(%" PRId64 ", %" PRId64 ")", + pSender->term, pSender->startTime, pMsg->term, pMsg->startTime); + terrno = TSDB_CODE_SYN_MISMATCHED_SIGNATURE; return -1; - } else if (pMsg->startTime > pSender->startTime) { - sSError(pSender, "unexpected start time in msg. sender startTime:%" PRId64 ", msg startTime:%" PRId64, - pSender->startTime, pMsg->startTime); + } else if (order < 0) { + sSError(pSender, "snapshot sender is stale. stop"); + terrno = TSDB_CODE_SYN_MISMATCHED_SIGNATURE; goto _ERROR; } - ASSERT(pMsg->startTime == pSender->startTime); - // state, term, seq/ack if (pSyncNode->state != TAOS_SYNC_STATE_LEADER) { syncLogRecvSyncSnapshotRsp(pSyncNode, pMsg, "snapshot sender not leader"); From a6d5deb5d14c34d2d77341c9268be137c6bf5a86 Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Wed, 20 Sep 2023 15:38:34 +0800 Subject: [PATCH 34/61] fixup: fix tsdbFSDoSanAndFix --- source/dnode/vnode/src/tsdb/tsdbFS2.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/source/dnode/vnode/src/tsdb/tsdbFS2.c b/source/dnode/vnode/src/tsdb/tsdbFS2.c index ef2f81fa02..f307411520 100644 --- a/source/dnode/vnode/src/tsdb/tsdbFS2.c +++ b/source/dnode/vnode/src/tsdb/tsdbFS2.c @@ -521,8 +521,9 @@ static int32_t tsdbFSDoSanAndFix(STFileSystem *fs) { } if (corrupt) { - tsdbError("vgId:%d, not to clear unreferenced files since some fset corrupted", TD_VID(fs->tsdb->pVnode)); + tsdbError("vgId:%d, not to clear unreferenced files due to fset incompleteness", TD_VID(fs->tsdb->pVnode)); fs->fsstate = TSDB_FS_STATE_INCOMPLETE; + code = 0; goto _exit; } From a53ba24118ba94b3fd3d212a6948b3aa69290829 Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Wed, 20 Sep 2023 15:49:34 +0800 Subject: [PATCH 35/61] fix: set startTime in snapshotReSend --- source/libs/sync/src/syncSnapshot.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/source/libs/sync/src/syncSnapshot.c b/source/libs/sync/src/syncSnapshot.c index 59b33d20cc..1ca93d3844 100644 --- a/source/libs/sync/src/syncSnapshot.c +++ b/source/libs/sync/src/syncSnapshot.c @@ -133,7 +133,7 @@ int32_t snapshotSenderStart(SSyncSnapshotSender *pSender) { pMsg->lastTerm = pSender->snapshot.lastApplyTerm; pMsg->lastConfigIndex = pSender->snapshot.lastConfigIndex; pMsg->lastConfig = pSender->lastConfig; - pMsg->startTime = atomic_load_64(&pSender->startTime); + pMsg->startTime = pSender->startTime; pMsg->seq = SYNC_SNAPSHOT_SEQ_PREP_SNAPSHOT; if (dataLen > 0) { @@ -270,6 +270,7 @@ int32_t snapshotReSend(SSyncSnapshotSender *pSender) { pMsg->lastTerm = pSender->snapshot.lastApplyTerm; pMsg->lastConfigIndex = pSender->snapshot.lastConfigIndex; pMsg->lastConfig = pSender->lastConfig; + pMsg->startTime = pSender->startTime; pMsg->seq = pSender->seq; if (pSender->pCurrentBlock != NULL && pSender->blockLen > 0) { From 00aeb031d4bdd87ba46bcab4a891e9358f36b9da Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Thu, 21 Sep 2023 09:43:57 +0800 Subject: [PATCH 36/61] enh: replicate a fset if it is corrupted without valid minVer --- source/dnode/vnode/src/tsdb/tsdbFS2.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/source/dnode/vnode/src/tsdb/tsdbFS2.c b/source/dnode/vnode/src/tsdb/tsdbFS2.c index f307411520..cf1158fb01 100644 --- a/source/dnode/vnode/src/tsdb/tsdbFS2.c +++ b/source/dnode/vnode/src/tsdb/tsdbFS2.c @@ -500,7 +500,7 @@ static int32_t tsdbFSDoSanAndFix(STFileSystem *fs) { STFileObj *fobj = fset->farr[ftype]; code = tsdbFSDoScanAndFixFile(fs, fobj); if (code) { - fset->maxVerValid = TMIN(fset->maxVerValid, fobj->f->minVer - 1); + fset->maxVerValid = (fobj->f->minVer <= fobj->f->maxVer) ? TMIN(fset->maxVerValid, fobj->f->minVer - 1) : -1; corrupt = true; } } @@ -512,7 +512,7 @@ static int32_t tsdbFSDoSanAndFix(STFileSystem *fs) { TARRAY2_FOREACH(lvl->fobjArr, fobj) { code = tsdbFSDoScanAndFixFile(fs, fobj); if (code) { - fset->maxVerValid = TMIN(fset->maxVerValid, fobj->f->minVer - 1); + fset->maxVerValid = (fobj->f->minVer <= fobj->f->maxVer) ? TMIN(fset->maxVerValid, fobj->f->minVer - 1) : -1; corrupt = true; } } From be6411ebbe88dd4bb81f4e9cb084a30ac8aa0e95 Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Thu, 21 Sep 2023 16:11:06 +0800 Subject: [PATCH 37/61] fixup: set lastSendTime with taosGetTimestampMs --- source/libs/sync/src/syncSnapshot.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/libs/sync/src/syncSnapshot.c b/source/libs/sync/src/syncSnapshot.c index 1ca93d3844..2948dbf3d2 100644 --- a/source/libs/sync/src/syncSnapshot.c +++ b/source/libs/sync/src/syncSnapshot.c @@ -96,7 +96,7 @@ int32_t snapshotSenderStart(SSyncSnapshotSender *pSender) { pSender->sendingMS = 0; pSender->term = raftStoreGetTerm(pSender->pSyncNode); pSender->startTime = taosGetMonoTimestampMs(); - pSender->lastSendTime = pSender->startTime; + pSender->lastSendTime = taosGetTimestampMs(); pSender->finish = false; // Get full snapshot info From c89c69f951351dcf73dff363563293e74a5237fe Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Thu, 21 Sep 2023 18:38:10 +0800 Subject: [PATCH 38/61] enh: use waitTime to prevent from starting snapshot too frequently --- source/libs/sync/inc/syncSnapshot.h | 2 +- source/libs/sync/src/syncAppendEntries.c | 2 +- source/libs/sync/src/syncElection.c | 2 +- source/libs/sync/src/syncSnapshot.c | 27 +++++++++++++++--------- 4 files changed, 20 insertions(+), 13 deletions(-) diff --git a/source/libs/sync/inc/syncSnapshot.h b/source/libs/sync/inc/syncSnapshot.h index 2a19945c5a..95382132b5 100644 --- a/source/libs/sync/inc/syncSnapshot.h +++ b/source/libs/sync/inc/syncSnapshot.h @@ -43,7 +43,7 @@ typedef struct SSyncSnapshotSender { int64_t sendingMS; SyncTerm term; int64_t startTime; - int64_t endTime; + int64_t waitTime; int64_t lastSendTime; bool finish; diff --git a/source/libs/sync/src/syncAppendEntries.c b/source/libs/sync/src/syncAppendEntries.c index 8ae1dd2a54..4776b2bb1b 100644 --- a/source/libs/sync/src/syncAppendEntries.c +++ b/source/libs/sync/src/syncAppendEntries.c @@ -157,7 +157,7 @@ int32_t syncNodeOnAppendEntries(SSyncNode* ths, const SRpcMsg* pRpcMsg) { if (ths->fsmState == SYNC_FSM_STATE_INCOMPLETE) { pReply->fsmState = ths->fsmState; - sError("vgId:%d, not allow to accept sync log msg due to incomplete fsm state", ths->vgId); + sError("vgId:%d, not to accept sync log msg due to incomplete fsm state", ths->vgId); syncEntryDestroy(pEntry); goto _SEND_RESPONSE; } diff --git a/source/libs/sync/src/syncElection.c b/source/libs/sync/src/syncElection.c index b4e2049a64..c57e7e273f 100644 --- a/source/libs/sync/src/syncElection.c +++ b/source/libs/sync/src/syncElection.c @@ -72,7 +72,7 @@ static int32_t syncNodeRequestVotePeers(SSyncNode* pNode) { int32_t syncNodeElect(SSyncNode* pSyncNode) { if (pSyncNode->fsmState == SYNC_FSM_STATE_INCOMPLETE) { - sNError(pSyncNode, "ignore leader hb timeout due to incomplete fsm state"); + sNError(pSyncNode, "skip leader election due to incomplete fsm state"); return -1; } diff --git a/source/libs/sync/src/syncSnapshot.c b/source/libs/sync/src/syncSnapshot.c index 2948dbf3d2..f9bde9517e 100644 --- a/source/libs/sync/src/syncSnapshot.c +++ b/source/libs/sync/src/syncSnapshot.c @@ -45,7 +45,7 @@ SSyncSnapshotSender *snapshotSenderCreate(SSyncNode *pSyncNode, int32_t replicaI pSender->replicaIndex = replicaIndex; pSender->term = raftStoreGetTerm(pSyncNode); pSender->startTime = -1; - pSender->endTime = -1; + pSender->waitTime = -1; pSender->pSyncNode->pFsm->FpGetSnapshotInfo(pSender->pSyncNode->pFsm, &pSender->snapshot); pSender->finish = false; @@ -167,7 +167,7 @@ void snapshotSenderStop(SSyncSnapshotSender *pSender, bool finish) { if (stopped) return; pSender->finish = finish; - pSender->endTime = taosGetTimestampMs(); + pSender->waitTime = -1; // close reader if (pSender->pReader != NULL) { @@ -319,8 +319,12 @@ int32_t syncNodeStartSnapshot(SSyncNode *pSyncNode, SRaftId *pDestId) { return 0; } - if (pSender->finish && taosGetTimestampMs() - pSender->endTime < SNAPSHOT_WAIT_MS) { - sSDebug(pSender, "snapshot sender start too frequently, ignore"); + int64_t timeNow = taosGetTimestampMs(); + if (pSender->waitTime <= 0) { + pSender->waitTime = timeNow + SNAPSHOT_WAIT_MS; + } + if (timeNow < pSender->waitTime) { + sSDebug(pSender, "snapshot sender waitTime not expired yet, ignore"); return 0; } @@ -674,8 +678,6 @@ _SEND_REPLY: pRspMsg->code = code; pRspMsg->snapBeginIndex = syncNodeGetSnapBeginIndex(pSyncNode); - ASSERT(pRspMsg->startTime); - if (snapInfo.data) { pRspMsg->payloadType = snapInfo.type; memcpy(pRspMsg->data, snapInfo.data, dataLen); @@ -684,6 +686,8 @@ _SEND_REPLY: SSnapshotParam *pParam = &pReceiver->snapshotParam; void *data = taosMemoryRealloc(pParam->data, dataLen); if (data == NULL) { + sError("vgId:%d, failed to realloc memory for snapshot prep due to %s. dataLen:%d", pSyncNode->vgId, + strerror(errno), dataLen); terrno = TSDB_CODE_OUT_OF_MEMORY; code = terrno; goto _out; @@ -695,7 +699,7 @@ _SEND_REPLY: // send msg syncLogSendSyncSnapshotRsp(pSyncNode, pRspMsg, "snapshot receiver pre-snapshot"); if (syncNodeSendMsgById(&pRspMsg->destId, pSyncNode, &rpcMsg) != 0) { - sRError(pReceiver, "snapshot receiver failed to build resp since %s", terrstr()); + sRError(pReceiver, "failed to send resp since %s", terrstr()); code = terrno; } @@ -916,13 +920,16 @@ int32_t syncNodeOnSnapshot(SSyncNode *pSyncNode, const SRpcMsg *pRpcMsg) { if (pSyncNode->state == TAOS_SYNC_STATE_FOLLOWER || pSyncNode->state == TAOS_SYNC_STATE_LEARNER) { if (pMsg->term == raftStoreGetTerm(pSyncNode)) { if (pMsg->seq == SYNC_SNAPSHOT_SEQ_PREP_SNAPSHOT) { - syncLogRecvSyncSnapshotSend(pSyncNode, pMsg, "process seq pre-snapshot"); + sInfo("vgId:%d, receive pre-snapshot msg of snapshot replication. signature:(%" PRId64 ", %" PRId64 ")", + pSyncNode->vgId, pMsg->term, pMsg->startTime); code = syncNodeOnSnapshotPrep(pSyncNode, pMsg); } else if (pMsg->seq == SYNC_SNAPSHOT_SEQ_BEGIN) { - syncLogRecvSyncSnapshotSend(pSyncNode, pMsg, "process seq begin"); + sInfo("vgId:%d, receive begin msg of snapshot replication. signature:(%" PRId64 ", %" PRId64 ")", + pSyncNode->vgId, pMsg->term, pMsg->startTime); code = syncNodeOnSnapshotBegin(pSyncNode, pMsg); } else if (pMsg->seq == SYNC_SNAPSHOT_SEQ_END) { - syncLogRecvSyncSnapshotSend(pSyncNode, pMsg, "process seq end"); + sInfo("vgId:%d, receive end msg of snapshot replication. signature: (%" PRId64 ", %" PRId64 ")", + pSyncNode->vgId, pMsg->term, pMsg->startTime); code = syncNodeOnSnapshotEnd(pSyncNode, pMsg); if (syncLogBufferReInit(pSyncNode->pLogBuf, pSyncNode) != 0) { sRError(pReceiver, "failed to reinit log buffer since %s", terrstr()); From 810678ebcb59ba8980a5ba9c8c1be22f965e2807 Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Thu, 21 Sep 2023 19:02:12 +0800 Subject: [PATCH 39/61] enh: tidy up debugging msgs of snapshot replication --- source/libs/sync/src/syncSnapshot.c | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/source/libs/sync/src/syncSnapshot.c b/source/libs/sync/src/syncSnapshot.c index f9bde9517e..a90877ddfc 100644 --- a/source/libs/sync/src/syncSnapshot.c +++ b/source/libs/sync/src/syncSnapshot.c @@ -697,7 +697,6 @@ _SEND_REPLY: } // send msg - syncLogSendSyncSnapshotRsp(pSyncNode, pRspMsg, "snapshot receiver pre-snapshot"); if (syncNodeSendMsgById(&pRspMsg->destId, pSyncNode, &rpcMsg) != 0) { sRError(pReceiver, "failed to send resp since %s", terrstr()); code = terrno; @@ -717,19 +716,19 @@ static int32_t syncNodeOnSnapshotBegin(SSyncNode *pSyncNode, SyncSnapshotSend *p int32_t code = TSDB_CODE_SYN_INTERNAL_ERROR; if (!snapshotReceiverIsStart(pReceiver)) { - sRError(pReceiver, "snapshot receiver begin failed since not start"); + sRError(pReceiver, "failed to begin snapshot receiver since not started"); goto _SEND_REPLY; } if (snapshotReceiverSignatureCmp(pReceiver, pMsg) != 0) { terrno = TSDB_CODE_SYN_MISMATCHED_SIGNATURE; - sRError(pReceiver, "snapshot receiver begin failed since %s", terrstr()); + sRError(pReceiver, "failed to begin snapshot receiver since %s", terrstr()); goto _SEND_REPLY; } // start writer if (snapshotReceiverStartWriter(pReceiver, pMsg) != 0) { - sRError(pReceiver, "snapshot receiver begin failed since start writer failed"); + sRError(pReceiver, "failed to start snapshot writer since %s", terrstr()); goto _SEND_REPLY; } @@ -742,7 +741,7 @@ _SEND_REPLY: // build msg SRpcMsg rpcMsg = {0}; if (syncBuildSnapshotSendRsp(&rpcMsg, 0, pSyncNode->vgId) != 0) { - sRError(pReceiver, "snapshot receiver build resp failed since %s", terrstr()); + sRError(pReceiver, "failed to build snapshot receiver resp since %s", terrstr()); return -1; } @@ -757,12 +756,9 @@ _SEND_REPLY: pRspMsg->code = code; pRspMsg->snapBeginIndex = pReceiver->snapshotParam.start; - ASSERT(pRspMsg->startTime); - // send msg - syncLogSendSyncSnapshotRsp(pSyncNode, pRspMsg, "snapshot receiver begin"); if (syncNodeSendMsgById(&pRspMsg->destId, pSyncNode, &rpcMsg) != 0) { - sRError(pReceiver, "snapshot receiver send resp failed since %s", terrstr()); + sRError(pReceiver, "failed to send snapshot receiver resp since %s", terrstr()); return -1; } @@ -778,7 +774,7 @@ static int32_t syncNodeOnSnapshotReceive(SSyncNode *pSyncNode, SyncSnapshotSend if (snapshotReceiverSignatureCmp(pReceiver, pMsg) != 0) { terrno = TSDB_CODE_SYN_MISMATCHED_SIGNATURE; - sRError(pReceiver, "snapshot receive failed since %s.", terrstr()); + sRError(pReceiver, "failed to receive snapshot data since %s.", terrstr()); code = terrno; goto _SEND_REPLY; } @@ -794,7 +790,7 @@ _SEND_REPLY: // build msg SRpcMsg rpcMsg = {0}; if (syncBuildSnapshotSendRsp(&rpcMsg, 0, pSyncNode->vgId)) { - sRError(pReceiver, "snapshot receiver build resp failed since %s", terrstr()); + sRError(pReceiver, "failed to build snapshot receiver resp since %s", terrstr()); return -1; } @@ -809,11 +805,9 @@ _SEND_REPLY: pRspMsg->code = code; pRspMsg->snapBeginIndex = pReceiver->snapshotParam.start; - ASSERT(pRspMsg->startTime); // send msg - syncLogSendSyncSnapshotRsp(pSyncNode, pRspMsg, "snapshot receiver received"); if (syncNodeSendMsgById(&pRspMsg->destId, pSyncNode, &rpcMsg) != 0) { - sRError(pReceiver, "snapshot receiver send resp failed since %s", terrstr()); + sRError(pReceiver, "failed to send snapshot receiver resp since %s", terrstr()); return -1; } From 233a3c403444ba77727d41b996f35601295e936d Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Thu, 21 Sep 2023 20:06:30 +0800 Subject: [PATCH 40/61] enh: terminate on incompletenss of fsm state for dbs of single replica --- source/libs/sync/src/syncMain.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/source/libs/sync/src/syncMain.c b/source/libs/sync/src/syncMain.c index 8ddd55d906..eaaccecf90 100644 --- a/source/libs/sync/src/syncMain.c +++ b/source/libs/sync/src/syncMain.c @@ -1010,8 +1010,11 @@ SSyncNode* syncNodeOpen(SSyncInfo* pSyncInfo, int32_t vnodeVersion) { sNTrace(pSyncNode, "reset commit index by snapshot"); } pSyncNode->fsmState = snapshot.state; - if (pSyncNode->fsmState) { - sError("vgId:%d, fsm state incomplete.", pSyncNode->vgId); + if (pSyncNode->fsmState != SYNC_FSM_STATE_NORMAL) { + sError("vgId:%d, fsm state is incomplete.", pSyncNode->vgId); + if (pSyncNode->replicaNum == 1) { + goto _error; + } } } pSyncNode->commitIndex = commitIndex; From 9643799dc5712aa335fc752b5095df4e3edf746b Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Fri, 22 Sep 2023 08:53:14 +0800 Subject: [PATCH 41/61] fixup: compiler error at labels since allowed only at statements --- source/libs/sync/src/syncSnapshot.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/source/libs/sync/src/syncSnapshot.c b/source/libs/sync/src/syncSnapshot.c index a90877ddfc..924813eb98 100644 --- a/source/libs/sync/src/syncSnapshot.c +++ b/source/libs/sync/src/syncSnapshot.c @@ -786,7 +786,8 @@ static int32_t syncNodeOnSnapshotReceive(SSyncNode *pSyncNode, SyncSnapshotSend } } -_SEND_REPLY: +_SEND_REPLY:; + // build msg SRpcMsg rpcMsg = {0}; if (syncBuildSnapshotSendRsp(&rpcMsg, 0, pSyncNode->vgId)) { @@ -834,7 +835,8 @@ static int32_t syncNodeOnSnapshotEnd(SSyncNode *pSyncNode, SyncSnapshotSend *pMs snapshotReceiverStop(pReceiver); } -_SEND_REPLY: +_SEND_REPLY:; + // build msg SRpcMsg rpcMsg = {0}; if (syncBuildSnapshotSendRsp(&rpcMsg, 0, pSyncNode->vgId) != 0) { From 29bbebc3238fbf420bc119d6f6d8690097a62553 Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Wed, 27 Sep 2023 09:29:38 +0800 Subject: [PATCH 42/61] enh: refactor a func name as tsdbSnapGetDetails --- source/dnode/vnode/src/inc/vnodeInt.h | 2 +- source/dnode/vnode/src/tsdb/tsdbSnapshot.c | 13 ++++++++----- source/dnode/vnode/src/vnd/vnodeSync.c | 7 ++++--- 3 files changed, 13 insertions(+), 9 deletions(-) diff --git a/source/dnode/vnode/src/inc/vnodeInt.h b/source/dnode/vnode/src/inc/vnodeInt.h index d5e1585af4..eaffa2500d 100644 --- a/source/dnode/vnode/src/inc/vnodeInt.h +++ b/source/dnode/vnode/src/inc/vnodeInt.h @@ -295,7 +295,7 @@ int32_t metaSnapWriterOpen(SMeta* pMeta, int64_t sver, int64_t ever, SMetaSnapWr int32_t metaSnapWrite(SMetaSnapWriter* pWriter, uint8_t* pData, uint32_t nData); int32_t metaSnapWriterClose(SMetaSnapWriter** ppWriter, int8_t rollback); // STsdbSnapReader ======================================== -int32_t tsdbSnapGetInfo(STsdb* pTsdb, SSnapshot* pSnap); +int32_t tsdbSnapGetDetails(SVnode* pVnode, SSnapshot* pSnap); int32_t tsdbSnapReaderOpen(STsdb* pTsdb, int64_t sver, int64_t ever, int8_t type, void* pRanges, STsdbSnapReader** ppReader); int32_t tsdbSnapReaderClose(STsdbSnapReader** ppReader); diff --git a/source/dnode/vnode/src/tsdb/tsdbSnapshot.c b/source/dnode/vnode/src/tsdb/tsdbSnapshot.c index 6ee7112906..5fcf384bfb 100644 --- a/source/dnode/vnode/src/tsdb/tsdbSnapshot.c +++ b/source/dnode/vnode/src/tsdb/tsdbSnapshot.c @@ -1507,13 +1507,15 @@ void tsdbSnapPartListDestroy(STsdbSnapPartList** ppList) { ppList[0] = NULL; } -int32_t tsdbSnapGetInfo(STsdb* pTsdb, SSnapshot* pSnap) { - pSnap->state = pTsdb->pFS->fsstate; - if (pSnap->type != TDMT_SYNC_PREP_SNAPSHOT && pSnap->type != TDMT_SYNC_PREP_SNAPSHOT_REPLY) { - return 0; +int32_t tsdbSnapGetDetails(SVnode* pVnode, SSnapshot* pSnap) { + int code = -1; + if (pVnode->pTsdb->pFS->fsstate == TSDB_FS_STATE_NORMAL) { + pSnap->state = SYNC_FSM_STATE_NORMAL; + } else { + pSnap->state = SYNC_FSM_STATE_INCOMPLETE; } - int code = -1; + STsdb* pTsdb = pVnode->pTsdb; STsdbSnapPartList* pList = tsdbGetSnapPartList(pTsdb->pFS); if (pList == NULL) goto _out; @@ -1522,6 +1524,7 @@ int32_t tsdbSnapGetInfo(STsdb* pTsdb, SSnapshot* pSnap) { void* buf = NULL; int32_t tlen = 0; + // estimate data length encode int32_t bufLen = sizeof(SSyncTLV); // typ: TDMT_SYNC_PREP_SNAPSHOT or TDMT_SYNC_PREP_SNAPSOT_REPLY bufLen += sizeof(SSyncTLV); // subtyp: SNAP_DATA_TSDB diff --git a/source/dnode/vnode/src/vnd/vnodeSync.c b/source/dnode/vnode/src/vnd/vnodeSync.c index ba142ddb6d..b424e28f72 100644 --- a/source/dnode/vnode/src/vnd/vnodeSync.c +++ b/source/dnode/vnode/src/vnd/vnodeSync.c @@ -787,12 +787,13 @@ bool vnodeIsLeader(SVnode *pVnode) { } int32_t vnodeGetSnapshot(SVnode *pVnode, SSnapshot *pSnap) { + int code = 0; pSnap->lastApplyIndex = pVnode->state.committed; pSnap->lastApplyTerm = pVnode->state.commitTerm; pSnap->lastConfigIndex = -1; - int32_t code = tsdbSnapGetInfo(pVnode->pTsdb, pSnap); - - pSnap->state = (pSnap->state == TSDB_FS_STATE_INCOMPLETE) ? SYNC_FSM_STATE_INCOMPLETE : SYNC_FSM_STATE_NORMAL; + if (pSnap->type == TDMT_SYNC_PREP_SNAPSHOT || pSnap->type == TDMT_SYNC_PREP_SNAPSHOT_REPLY) { + code = tsdbSnapGetDetails(pVnode, pSnap); + } return code; } From b80f8201cea02020d3b5c071b11fe3bb0f9a07c8 Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Wed, 27 Sep 2023 15:22:24 +0800 Subject: [PATCH 43/61] feat: detect fs incompleteness of RSMA tsdbs --- source/dnode/vnode/src/inc/tsdb.h | 8 ++++++-- source/dnode/vnode/src/inc/vnodeInt.h | 2 +- source/dnode/vnode/src/tsdb/tsdbSnapshot.c | 18 ++++++++++++------ source/dnode/vnode/src/vnd/vnodeSync.c | 5 +++++ 4 files changed, 24 insertions(+), 9 deletions(-) diff --git a/source/dnode/vnode/src/inc/tsdb.h b/source/dnode/vnode/src/inc/tsdb.h index 1d17d616ab..ec721308b2 100644 --- a/source/dnode/vnode/src/inc/tsdb.h +++ b/source/dnode/vnode/src/inc/tsdb.h @@ -1025,10 +1025,14 @@ struct STsdbFilterInfo { TABLEID tbid; }; -enum { +typedef enum { TSDB_FS_STATE_NORMAL = 0, TSDB_FS_STATE_INCOMPLETE, -}; +} ETsdbFsState; + +// utils +ETsdbFsState tsdbSnapGetFsState(SVnode *pVnode); +int32_t tsdbSnapGetDetails(SVnode *pVnode, SSnapshot *pSnap); #ifdef __cplusplus } diff --git a/source/dnode/vnode/src/inc/vnodeInt.h b/source/dnode/vnode/src/inc/vnodeInt.h index eaffa2500d..6682ff0133 100644 --- a/source/dnode/vnode/src/inc/vnodeInt.h +++ b/source/dnode/vnode/src/inc/vnodeInt.h @@ -295,7 +295,6 @@ int32_t metaSnapWriterOpen(SMeta* pMeta, int64_t sver, int64_t ever, SMetaSnapWr int32_t metaSnapWrite(SMetaSnapWriter* pWriter, uint8_t* pData, uint32_t nData); int32_t metaSnapWriterClose(SMetaSnapWriter** ppWriter, int8_t rollback); // STsdbSnapReader ======================================== -int32_t tsdbSnapGetDetails(SVnode* pVnode, SSnapshot* pSnap); int32_t tsdbSnapReaderOpen(STsdb* pTsdb, int64_t sver, int64_t ever, int8_t type, void* pRanges, STsdbSnapReader** ppReader); int32_t tsdbSnapReaderClose(STsdbSnapReader** ppReader); @@ -499,6 +498,7 @@ struct SSma { #define SMA_RSMA_TSDB0(s) ((s)->pVnode->pTsdb) #define SMA_RSMA_TSDB1(s) ((s)->pRSmaTsdb[TSDB_RETENTION_L0]) #define SMA_RSMA_TSDB2(s) ((s)->pRSmaTsdb[TSDB_RETENTION_L1]) +#define SMA_RSMA_GET_TSDB(pVnode, level) ((level == 0) ? pVnode->pTsdb : pVnode->pSma->pRSmaTsdb[level - 1]) // sma void smaHandleRes(void* pVnode, int64_t smaId, const SArray* data); diff --git a/source/dnode/vnode/src/tsdb/tsdbSnapshot.c b/source/dnode/vnode/src/tsdb/tsdbSnapshot.c index 5fcf384bfb..69ead6feef 100644 --- a/source/dnode/vnode/src/tsdb/tsdbSnapshot.c +++ b/source/dnode/vnode/src/tsdb/tsdbSnapshot.c @@ -1507,14 +1507,20 @@ void tsdbSnapPartListDestroy(STsdbSnapPartList** ppList) { ppList[0] = NULL; } -int32_t tsdbSnapGetDetails(SVnode* pVnode, SSnapshot* pSnap) { - int code = -1; - if (pVnode->pTsdb->pFS->fsstate == TSDB_FS_STATE_NORMAL) { - pSnap->state = SYNC_FSM_STATE_NORMAL; - } else { - pSnap->state = SYNC_FSM_STATE_INCOMPLETE; +ETsdbFsState tsdbSnapGetFsState(SVnode* pVnode) { + if (!VND_IS_RSMA(pVnode)) { + return pVnode->pTsdb->pFS->fsstate; } + for (int32_t lvl = 0; lvl < TSDB_RETENTION_MAX; ++lvl) { + if (SMA_RSMA_GET_TSDB(pVnode, lvl)->pFS->fsstate != TSDB_FS_STATE_NORMAL) { + return TSDB_FS_STATE_INCOMPLETE; + } + } + return TSDB_FS_STATE_NORMAL; +} +int32_t tsdbSnapGetDetails(SVnode* pVnode, SSnapshot* pSnap) { + int code = -1; STsdb* pTsdb = pVnode->pTsdb; STsdbSnapPartList* pList = tsdbGetSnapPartList(pTsdb->pFS); if (pList == NULL) goto _out; diff --git a/source/dnode/vnode/src/vnd/vnodeSync.c b/source/dnode/vnode/src/vnd/vnodeSync.c index b424e28f72..86fb7cb55b 100644 --- a/source/dnode/vnode/src/vnd/vnodeSync.c +++ b/source/dnode/vnode/src/vnd/vnodeSync.c @@ -791,6 +791,11 @@ int32_t vnodeGetSnapshot(SVnode *pVnode, SSnapshot *pSnap) { pSnap->lastApplyIndex = pVnode->state.committed; pSnap->lastApplyTerm = pVnode->state.commitTerm; pSnap->lastConfigIndex = -1; + pSnap->state = SYNC_FSM_STATE_NORMAL; + + if (tsdbSnapGetFsState(pVnode) != TSDB_FS_STATE_NORMAL) { + pSnap->state = SYNC_FSM_STATE_INCOMPLETE; + } if (pSnap->type == TDMT_SYNC_PREP_SNAPSHOT || pSnap->type == TDMT_SYNC_PREP_SNAPSHOT_REPLY) { code = tsdbSnapGetDetails(pVnode, pSnap); From d5d713b1b40f0d6300a5de850b83209ebacf163f Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Wed, 27 Sep 2023 17:25:56 +0800 Subject: [PATCH 44/61] enh: fill snapshot info for rsma tsdbs --- source/dnode/vnode/src/tsdb/tsdbFS2.c | 2 +- source/dnode/vnode/src/tsdb/tsdbSnapshot.c | 66 +++++++++++++--------- 2 files changed, 40 insertions(+), 28 deletions(-) diff --git a/source/dnode/vnode/src/tsdb/tsdbFS2.c b/source/dnode/vnode/src/tsdb/tsdbFS2.c index cf1158fb01..813123ae5c 100644 --- a/source/dnode/vnode/src/tsdb/tsdbFS2.c +++ b/source/dnode/vnode/src/tsdb/tsdbFS2.c @@ -521,7 +521,7 @@ static int32_t tsdbFSDoSanAndFix(STFileSystem *fs) { } if (corrupt) { - tsdbError("vgId:%d, not to clear unreferenced files due to fset incompleteness", TD_VID(fs->tsdb->pVnode)); + tsdbError("vgId:%d, not to clear dangling files due to fset incompleteness", TD_VID(fs->tsdb->pVnode)); fs->fsstate = TSDB_FS_STATE_INCOMPLETE; code = 0; goto _exit; diff --git a/source/dnode/vnode/src/tsdb/tsdbSnapshot.c b/source/dnode/vnode/src/tsdb/tsdbSnapshot.c index 69ead6feef..2cf4521eff 100644 --- a/source/dnode/vnode/src/tsdb/tsdbSnapshot.c +++ b/source/dnode/vnode/src/tsdb/tsdbSnapshot.c @@ -1521,53 +1521,65 @@ ETsdbFsState tsdbSnapGetFsState(SVnode* pVnode) { int32_t tsdbSnapGetDetails(SVnode* pVnode, SSnapshot* pSnap) { int code = -1; - STsdb* pTsdb = pVnode->pTsdb; - STsdbSnapPartList* pList = tsdbGetSnapPartList(pTsdb->pFS); - if (pList == NULL) goto _out; + int32_t tsdbMaxCnt = (!VND_IS_RSMA(pVnode) ? 1 : TSDB_RETENTION_MAX); + int32_t subTyps[TSDB_RETENTION_MAX] = {SNAP_DATA_TSDB, SNAP_DATA_RSMA1, SNAP_DATA_RSMA2}; + STsdbSnapPartList* pLists[TSDB_RETENTION_MAX] = {0}; - if (pSnap->type == TDMT_SYNC_PREP_SNAPSHOT_REPLY) { + for (int32_t j = 0; j < tsdbMaxCnt; ++j) { + STsdb* pTsdb = SMA_RSMA_GET_TSDB(pVnode, j); + pLists[j] = tsdbGetSnapPartList(pTsdb->pFS); + if (pLists[j] == NULL) goto _out; } - void* buf = NULL; - int32_t tlen = 0; - - // estimate data length encode + // estimate bufLen and prepare int32_t bufLen = sizeof(SSyncTLV); // typ: TDMT_SYNC_PREP_SNAPSHOT or TDMT_SYNC_PREP_SNAPSOT_REPLY - bufLen += sizeof(SSyncTLV); // subtyp: SNAP_DATA_TSDB - bufLen += tTsdbSnapPartListDataLenCalc(pList); + for (int32_t j = 0; j < tsdbMaxCnt; ++j) { + bufLen += sizeof(SSyncTLV); // subTyps[j] + bufLen += tTsdbSnapPartListDataLenCalc(pLists[j]); + } + + tsdbInfo("vgId:%d, allocate %d bytes for data of snapshot info.", TD_VID(pVnode), bufLen); void* data = taosMemoryRealloc(pSnap->data, bufLen); if (data == NULL) { terrno = TSDB_CODE_OUT_OF_MEMORY; + tsdbError("vgId:%d, failed to realloc memory for data of snapshot info. bytes:%d", TD_VID(pVnode), bufLen); goto _out; } pSnap->data = data; // header - SSyncTLV* datHead = (void*)pSnap->data; - datHead->typ = pSnap->type; - datHead->len = 0; + SSyncTLV* head = data; + head->len = 0; + head->typ = pSnap->type; + int32_t offset = sizeof(SSyncTLV); - // tsdb - SSyncTLV* tsdbHead = (void*)datHead->val; - tsdbHead->typ = SNAP_DATA_TSDB; + // fill snapshot info + for (int32_t j = 0; j < tsdbMaxCnt; ++j) { + if (pSnap->type == TDMT_SYNC_PREP_SNAPSHOT_REPLY) { + } - buf = tsdbHead->val; - tlen = 0; - if ((tlen = tSerializeTsdbSnapPartList(buf, bufLen, pList)) < 0) { - tsdbError("vgId:%d, failed to serialize snap range since %s", TD_VID(pTsdb->pVnode), terrstr()); - goto _out; + // subHead + SSyncTLV* subHead = (void*)((char*)data + offset); + subHead->typ = subTyps[j]; + ASSERT(subHead->val == (char*)data + offset + sizeof(SSyncTLV)); + + int32_t tlen = 0; + if ((tlen = tSerializeTsdbSnapPartList(subHead->val, bufLen - offset - sizeof(SSyncTLV), pLists[j])) < 0) { + tsdbError("vgId:%d, failed to serialize snap partition list of tsdb %d since %s", TD_VID(pVnode), j, terrstr()); + goto _out; + } + subHead->len = tlen; + offset += sizeof(SSyncTLV) + tlen; } - tsdbHead->len = tlen; - datHead->len += sizeof(SSyncTLV) + tsdbHead->len; - - // rsma + head->len = offset; code = 0; _out: - if (pList) { - tsdbSnapPartListDestroy(&pList); + for (int32_t j = 0; j < tsdbMaxCnt; ++j) { + if (pLists[j] == NULL) continue; + tsdbSnapPartListDestroy(&pLists[j]); } return code; From 74185b8b9c393c79bc8a26a78268fcca9a20b0ac Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Sat, 7 Oct 2023 16:07:21 +0800 Subject: [PATCH 45/61] feat: exchange snapshot info for rsma tsdbs --- source/dnode/vnode/src/inc/vnodeInt.h | 2 +- source/dnode/vnode/src/sma/smaSnapshot.c | 4 +- source/dnode/vnode/src/vnd/vnodeSnapshot.c | 106 ++++++++++++--------- 3 files changed, 66 insertions(+), 46 deletions(-) diff --git a/source/dnode/vnode/src/inc/vnodeInt.h b/source/dnode/vnode/src/inc/vnodeInt.h index 6682ff0133..5015202865 100644 --- a/source/dnode/vnode/src/inc/vnodeInt.h +++ b/source/dnode/vnode/src/inc/vnodeInt.h @@ -357,7 +357,7 @@ int32_t rsmaSnapReaderOpen(SSma* pSma, int64_t sver, int64_t ever, SRSmaSnapRead int32_t rsmaSnapReaderClose(SRSmaSnapReader** ppReader); int32_t rsmaSnapRead(SRSmaSnapReader* pReader, uint8_t** ppData); // SRSmaSnapWriter ======================================== -int32_t rsmaSnapWriterOpen(SSma* pSma, int64_t sver, int64_t ever, void* pRanges, SRSmaSnapWriter** ppWriter); +int32_t rsmaSnapWriterOpen(SSma* pSma, int64_t sver, int64_t ever, void** ppRanges, SRSmaSnapWriter** ppWriter); int32_t rsmaSnapWrite(SRSmaSnapWriter* pWriter, uint8_t* pData, uint32_t nData); int32_t rsmaSnapWriterClose(SRSmaSnapWriter** ppWriter, int8_t rollback); diff --git a/source/dnode/vnode/src/sma/smaSnapshot.c b/source/dnode/vnode/src/sma/smaSnapshot.c index ca46b4728f..1e921b23d1 100644 --- a/source/dnode/vnode/src/sma/smaSnapshot.c +++ b/source/dnode/vnode/src/sma/smaSnapshot.c @@ -128,7 +128,7 @@ struct SRSmaSnapWriter { STsdbSnapWriter* pDataWriter[TSDB_RETENTION_L2]; }; -int32_t rsmaSnapWriterOpen(SSma* pSma, int64_t sver, int64_t ever, void* pRanges, SRSmaSnapWriter** ppWriter) { +int32_t rsmaSnapWriterOpen(SSma* pSma, int64_t sver, int64_t ever, void** ppRanges, SRSmaSnapWriter** ppWriter) { int32_t code = 0; int32_t lino = 0; SVnode* pVnode = pSma->pVnode; @@ -147,7 +147,7 @@ int32_t rsmaSnapWriterOpen(SSma* pSma, int64_t sver, int64_t ever, void* pRanges // rsma1/rsma2 for (int32_t i = 0; i < TSDB_RETENTION_L2; ++i) { if (pSma->pRSmaTsdb[i]) { - code = tsdbSnapWriterOpen(pSma->pRSmaTsdb[i], sver, ever, pRanges, &pWriter->pDataWriter[i]); + code = tsdbSnapWriterOpen(pSma->pRSmaTsdb[i], sver, ever, ((void**)ppRanges)[i], &pWriter->pDataWriter[i]); TSDB_CHECK_CODE(code, lino, _exit); } } diff --git a/source/dnode/vnode/src/vnd/vnodeSnapshot.c b/source/dnode/vnode/src/vnd/vnodeSnapshot.c index 0874e5e0d8..f5a5249d1f 100644 --- a/source/dnode/vnode/src/vnd/vnodeSnapshot.c +++ b/source/dnode/vnode/src/vnd/vnodeSnapshot.c @@ -45,7 +45,7 @@ struct SVSnapReader { SStreamStateReader *pStreamStateReader; // rsma int8_t rsmaDone; - TSnapRangeArray *pRsmaRanges; + TSnapRangeArray *pRsmaRanges[TSDB_RETENTION_L2]; SRSmaSnapReader *pRsmaReader; }; @@ -69,6 +69,20 @@ _out: return code; } +static TSnapRangeArray **vnodeSnapReaderGetTsdbRanges(SVSnapReader *pReader, int32_t tsdbTyp) { + _Static_assert(sizeof(pReader->pRsmaRanges) / sizeof(pReader->pRsmaRanges[0]) == 2, "Unexpected array size"); + switch (tsdbTyp) { + case SNAP_DATA_TSDB: + return &pReader->pRanges; + case SNAP_DATA_RSMA1: + return &pReader->pRsmaRanges[0]; + case SNAP_DATA_RSMA2: + return &pReader->pRsmaRanges[1]; + default: + return NULL; + } +} + int32_t vnodeSnapReaderOpen(SVnode *pVnode, SSnapshotParam *pParam, SVSnapReader **ppReader) { int32_t code = 0; int64_t sver = pParam->start; @@ -92,25 +106,18 @@ int32_t vnodeSnapReaderOpen(SVnode *pVnode, SSnapshotParam *pParam, SVSnapReader goto _err; } - int32_t offset = 0; TSnapRangeArray **ppRanges = NULL; + int32_t offset = 0; while (offset + sizeof(SSyncTLV) < datHead->len) { SSyncTLV *subField = (void *)(datHead->val + offset); offset += sizeof(SSyncTLV) + subField->len; void *buf = subField->val; int32_t bufLen = subField->len; - switch (subField->typ) { - case SNAP_DATA_TSDB: - ppRanges = &pReader->pRanges; - break; - case SNAP_DATA_RSMA1: - ppRanges = &pReader->pRsmaRanges; - break; - default: - vError("vgId:%d, unexpected subfield type in data of snapshot param. subtyp:%d", TD_VID(pVnode), - subField->typ); - goto _err; + ppRanges = vnodeSnapReaderGetTsdbRanges(pReader, subField->typ); + if (ppRanges == NULL) { + vError("vgId:%d, unexpected subfield type in data of snapshot param. subtyp:%d", TD_VID(pVnode), subField->typ); + goto _err; } if (vnodeExtractSnapInfoDiff(buf, bufLen, ppRanges) < 0) { vError("vgId:%d, failed to get range diff since %s", TD_VID(pVnode), terrstr()); @@ -129,8 +136,19 @@ _err: return code; } +static void vnodeSnapReaderDestroyTsdbRanges(SVSnapReader *pReader) { + int32_t tsdbTyps[TSDB_RETENTION_MAX] = {SNAP_DATA_TSDB, SNAP_DATA_RSMA1, SNAP_DATA_RSMA2}; + for (int32_t j = 0; j < TSDB_RETENTION_MAX; ++j) { + TSnapRangeArray **ppRanges = vnodeSnapReaderGetTsdbRanges(pReader, tsdbTyps[j]); + if (ppRanges == NULL) continue; + tsdbSnapRangeArrayDestroy(ppRanges); + } +} + void vnodeSnapReaderClose(SVSnapReader *pReader) { vInfo("vgId:%d, close vnode snapshot reader", TD_VID(pReader->pVnode)); + vnodeSnapReaderDestroyTsdbRanges(pReader); + if (pReader->pRsmaReader) { rsmaSnapReaderClose(&pReader->pRsmaReader); } @@ -155,14 +173,6 @@ void vnodeSnapReaderClose(SVSnapReader *pReader) { tqCheckInfoReaderClose(&pReader->pTqCheckInfoReader); } - if (pReader->pRanges) { - tsdbSnapRangeArrayDestroy(&pReader->pRanges); - } - - if (pReader->pRsmaRanges) { - tsdbSnapRangeArrayDestroy(&pReader->pRsmaRanges); - } - taosMemoryFree(pReader); } @@ -442,10 +452,24 @@ struct SVSnapWriter { SStreamTaskWriter *pStreamTaskWriter; SStreamStateWriter *pStreamStateWriter; // rsma - TSnapRangeArray *pRsmaRanges; + TSnapRangeArray *pRsmaRanges[TSDB_RETENTION_L2]; SRSmaSnapWriter *pRsmaSnapWriter; }; +TSnapRangeArray **vnodeSnapWriterGetTsdbRanges(SVSnapWriter *pWriter, int32_t tsdbTyp) { + _Static_assert(sizeof(pWriter->pRsmaRanges) / sizeof(pWriter->pRsmaRanges[0]) == 2, "Unexpected array size"); + switch (tsdbTyp) { + case SNAP_DATA_TSDB: + return &pWriter->pRanges; + case SNAP_DATA_RSMA1: + return &pWriter->pRsmaRanges[0]; + case SNAP_DATA_RSMA2: + return &pWriter->pRsmaRanges[1]; + default: + return NULL; + } +} + int32_t vnodeSnapWriterOpen(SVnode *pVnode, SSnapshotParam *pParam, SVSnapWriter **ppWriter) { int32_t code = 0; SVSnapWriter *pWriter = NULL; @@ -477,25 +501,18 @@ int32_t vnodeSnapWriterOpen(SVnode *pVnode, SSnapshotParam *pParam, SVSnapWriter goto _err; } - int32_t offset = 0; TSnapRangeArray **ppRanges = NULL; + int32_t offset = 0; while (offset + sizeof(SSyncTLV) < datHead->len) { SSyncTLV *subField = (void *)(datHead->val + offset); offset += sizeof(SSyncTLV) + subField->len; void *buf = subField->val; int32_t bufLen = subField->len; - switch (subField->typ) { - case SNAP_DATA_TSDB: - ppRanges = &pWriter->pRanges; - break; - case SNAP_DATA_RSMA1: - ppRanges = &pWriter->pRsmaRanges; - break; - default: - vError("vgId:%d, unexpected subfield type in data of snapshot param. subtyp:%d", TD_VID(pVnode), - subField->typ); - goto _err; + ppRanges = vnodeSnapWriterGetTsdbRanges(pWriter, subField->typ); + if (ppRanges == NULL) { + vError("vgId:%d, unexpected subfield type in data of snapshot param. subtyp:%d", TD_VID(pVnode), subField->typ); + goto _err; } if (vnodeExtractSnapInfoDiff(buf, bufLen, ppRanges) < 0) { vError("vgId:%d, failed to get range diff since %s", TD_VID(pVnode), terrstr()); @@ -515,10 +532,21 @@ _err: return code; } +static void vnodeSnapWriterDestroyTsdbRanges(SVSnapWriter *pWriter) { + int32_t tsdbTyps[TSDB_RETENTION_MAX] = {SNAP_DATA_TSDB, SNAP_DATA_RSMA1, SNAP_DATA_RSMA2}; + for (int32_t j = 0; j < TSDB_RETENTION_MAX; ++j) { + TSnapRangeArray **ppRanges = vnodeSnapWriterGetTsdbRanges(pWriter, tsdbTyps[j]); + if (ppRanges == NULL) continue; + tsdbSnapRangeArrayDestroy(ppRanges); + } +} + int32_t vnodeSnapWriterClose(SVSnapWriter *pWriter, int8_t rollback, SSnapshot *pSnapshot) { int32_t code = 0; SVnode *pVnode = pWriter->pVnode; + vnodeSnapWriterDestroyTsdbRanges(pWriter); + // prepare if (pWriter->pTsdbSnapWriter) { tsdbSnapWriterPrepareClose(pWriter->pTsdbSnapWriter); @@ -550,10 +578,6 @@ int32_t vnodeSnapWriterClose(SVSnapWriter *pWriter, int8_t rollback, SSnapshot * if (code) goto _exit; } - if (pWriter->pRanges) { - tsdbSnapRangeArrayDestroy(&pWriter->pRanges); - } - if (pWriter->pTsdbSnapWriter) { code = tsdbSnapWriterClose(&pWriter->pTsdbSnapWriter, rollback); if (code) goto _exit; @@ -588,10 +612,6 @@ int32_t vnodeSnapWriterClose(SVSnapWriter *pWriter, int8_t rollback, SSnapshot * if (code) goto _exit; } - if (pWriter->pRsmaRanges) { - tsdbSnapRangeArrayDestroy(&pWriter->pRsmaRanges); - } - if (pWriter->pRsmaSnapWriter) { code = rsmaSnapWriterClose(&pWriter->pRsmaSnapWriter, rollback); if (code) goto _exit; @@ -739,7 +759,7 @@ int32_t vnodeSnapWrite(SVSnapWriter *pWriter, uint8_t *pData, uint32_t nData) { case SNAP_DATA_QTASK: { // rsma1/rsma2/qtask for rsma if (pWriter->pRsmaSnapWriter == NULL) { - code = rsmaSnapWriterOpen(pVnode->pSma, pWriter->sver, pWriter->ever, pWriter->pRsmaRanges, + code = rsmaSnapWriterOpen(pVnode->pSma, pWriter->sver, pWriter->ever, (void **)pWriter->pRsmaRanges, &pWriter->pRsmaSnapWriter); if (code) goto _err; } From c1f1709d59c6b279c6d4f52bde05c16b4924e927 Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Sun, 8 Oct 2023 17:03:07 +0800 Subject: [PATCH 46/61] fix: add rsmaSnapWriterPrepareClose --- source/dnode/vnode/src/inc/vnodeInt.h | 1 + source/dnode/vnode/src/sma/smaSnapshot.c | 15 +++++++++++++++ source/dnode/vnode/src/vnd/vnodeSnapshot.c | 4 ++++ 3 files changed, 20 insertions(+) diff --git a/source/dnode/vnode/src/inc/vnodeInt.h b/source/dnode/vnode/src/inc/vnodeInt.h index 5015202865..68334b3b63 100644 --- a/source/dnode/vnode/src/inc/vnodeInt.h +++ b/source/dnode/vnode/src/inc/vnodeInt.h @@ -359,6 +359,7 @@ int32_t rsmaSnapRead(SRSmaSnapReader* pReader, uint8_t** ppData); // SRSmaSnapWriter ======================================== int32_t rsmaSnapWriterOpen(SSma* pSma, int64_t sver, int64_t ever, void** ppRanges, SRSmaSnapWriter** ppWriter); int32_t rsmaSnapWrite(SRSmaSnapWriter* pWriter, uint8_t* pData, uint32_t nData); +int32_t rsmaSnapWriterPrepareClose(SRSmaSnapWriter* pWriter); int32_t rsmaSnapWriterClose(SRSmaSnapWriter** ppWriter, int8_t rollback); typedef struct { diff --git a/source/dnode/vnode/src/sma/smaSnapshot.c b/source/dnode/vnode/src/sma/smaSnapshot.c index 1e921b23d1..c93d9a7de6 100644 --- a/source/dnode/vnode/src/sma/smaSnapshot.c +++ b/source/dnode/vnode/src/sma/smaSnapshot.c @@ -165,6 +165,21 @@ _exit: return code; } +int32_t rsmaSnapWriterPrepareClose(SRSmaSnapWriter* pWriter) { + int32_t code = 0; + for (int32_t i = 0; i < TSDB_RETENTION_L2; ++i) { + if (pWriter->pDataWriter[i]) { + code = tsdbSnapWriterPrepareClose(pWriter->pDataWriter[i]); + if (code) { + smaError("vgId:%d, failed to prepare close tsdbSnapWriter since %s. i: %d", SMA_VID(pWriter->pSma), terrstr(), + i); + return -1; + } + } + } + return code; +} + int32_t rsmaSnapWriterClose(SRSmaSnapWriter** ppWriter, int8_t rollback) { int32_t code = 0; int32_t lino = 0; diff --git a/source/dnode/vnode/src/vnd/vnodeSnapshot.c b/source/dnode/vnode/src/vnd/vnodeSnapshot.c index f5a5249d1f..f5d0bf6d0f 100644 --- a/source/dnode/vnode/src/vnd/vnodeSnapshot.c +++ b/source/dnode/vnode/src/vnd/vnodeSnapshot.c @@ -552,6 +552,10 @@ int32_t vnodeSnapWriterClose(SVSnapWriter *pWriter, int8_t rollback, SSnapshot * tsdbSnapWriterPrepareClose(pWriter->pTsdbSnapWriter); } + if (pWriter->pRsmaSnapWriter) { + rsmaSnapWriterPrepareClose(pWriter->pRsmaSnapWriter); + } + // commit json if (!rollback) { pWriter->info.state.committed = pWriter->ever; From d671283b8be9412f76027d2913f51d7387cd1aab Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Sun, 8 Oct 2023 17:41:33 +0800 Subject: [PATCH 47/61] refact: adjust logging msg for incomplete fsm state --- source/libs/sync/src/syncAppendEntries.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/source/libs/sync/src/syncAppendEntries.c b/source/libs/sync/src/syncAppendEntries.c index 4776b2bb1b..647b86bae9 100644 --- a/source/libs/sync/src/syncAppendEntries.c +++ b/source/libs/sync/src/syncAppendEntries.c @@ -155,9 +155,9 @@ int32_t syncNodeOnAppendEntries(SSyncNode* ths, const SRpcMsg* pRpcMsg) { pMsg->vgId, pMsg->prevLogIndex + 1, pMsg->term, pMsg->prevLogIndex, pMsg->prevLogTerm, pMsg->commitIndex, pEntry->term); - if (ths->fsmState == SYNC_FSM_STATE_INCOMPLETE) { + if (ths->fsmState != SYNC_FSM_STATE_NORMAL) { pReply->fsmState = ths->fsmState; - sError("vgId:%d, not to accept sync log msg due to incomplete fsm state", ths->vgId); + sWarn("vgId:%d, unable to accept, due to incomplete fsm state. index:%" PRId64, ths->vgId, pEntry->index); syncEntryDestroy(pEntry); goto _SEND_RESPONSE; } From 05ba5e1ed06cfa23d10adbeffed5737e042866e7 Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Sun, 8 Oct 2023 17:42:20 +0800 Subject: [PATCH 48/61] refact: adjust logging msg in walLogEntriesComplete --- source/libs/wal/src/walMeta.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/source/libs/wal/src/walMeta.c b/source/libs/wal/src/walMeta.c index e700ef3d0a..f5e5427c68 100644 --- a/source/libs/wal/src/walMeta.c +++ b/source/libs/wal/src/walMeta.c @@ -339,8 +339,9 @@ bool walLogEntriesComplete(const SWal* pWal) { } if (!complete) { - wError("vgId:%d, WAL log entries incomplete in range [%" PRId64 ", %" PRId64 "], aligned with snaphotVer:%" PRId64, - pWal->cfg.vgId, pWal->vers.firstVer, pWal->vers.lastVer, pWal->vers.snapshotVer); + wError("vgId:%d, WAL log entries incomplete in range [%" PRId64 ", %" PRId64 "], index:%" PRId64 + ", snaphotVer:%" PRId64, + pWal->cfg.vgId, pWal->vers.firstVer, pWal->vers.lastVer, index, pWal->vers.snapshotVer); terrno = TSDB_CODE_WAL_LOG_INCOMPLETE; } From 094cf408dfdf72ffcda5cc092e0f3a9c6e836f00 Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Sun, 8 Oct 2023 18:28:47 +0800 Subject: [PATCH 49/61] enh: prepare vnode dir again in vnodeOpen --- source/dnode/vnode/src/vnd/vnodeOpen.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/source/dnode/vnode/src/vnd/vnodeOpen.c b/source/dnode/vnode/src/vnd/vnodeOpen.c index 5084cc2ff5..28d1e171d8 100644 --- a/source/dnode/vnode/src/vnd/vnodeOpen.c +++ b/source/dnode/vnode/src/vnd/vnodeOpen.c @@ -345,6 +345,10 @@ SVnode *vnodeOpen(const char *path, int32_t diskPrimary, STfs *pTfs, SMsgCb msgC return NULL; } + if (vnodeMkDir(pTfs, path)) { + vError("vgId:%d, failed to prepare vnode dir since %s, path: %s", info.config.vgId, strerror(errno), path); + return NULL; + } // save vnode info on dnode ep changed bool updated = false; SSyncCfg *pCfg = &info.config.syncCfg; From 4e74533878bb2a8d6c1b08dc02940ed2052fe04f Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Mon, 9 Oct 2023 17:19:47 +0800 Subject: [PATCH 50/61] enh: avoid _Static_assert --- source/dnode/vnode/src/vnd/vnodeSnapshot.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/source/dnode/vnode/src/vnd/vnodeSnapshot.c b/source/dnode/vnode/src/vnd/vnodeSnapshot.c index f5d0bf6d0f..cdf3fff47f 100644 --- a/source/dnode/vnode/src/vnd/vnodeSnapshot.c +++ b/source/dnode/vnode/src/vnd/vnodeSnapshot.c @@ -70,7 +70,7 @@ _out: } static TSnapRangeArray **vnodeSnapReaderGetTsdbRanges(SVSnapReader *pReader, int32_t tsdbTyp) { - _Static_assert(sizeof(pReader->pRsmaRanges) / sizeof(pReader->pRsmaRanges[0]) == 2, "Unexpected array size"); + ASSERTS(sizeof(pReader->pRsmaRanges) / sizeof(pReader->pRsmaRanges[0]) == 2, "Unexpected array size"); switch (tsdbTyp) { case SNAP_DATA_TSDB: return &pReader->pRanges; @@ -457,7 +457,7 @@ struct SVSnapWriter { }; TSnapRangeArray **vnodeSnapWriterGetTsdbRanges(SVSnapWriter *pWriter, int32_t tsdbTyp) { - _Static_assert(sizeof(pWriter->pRsmaRanges) / sizeof(pWriter->pRsmaRanges[0]) == 2, "Unexpected array size"); + ASSERTS(sizeof(pWriter->pRsmaRanges) / sizeof(pWriter->pRsmaRanges[0]) == 2, "Unexpected array size"); switch (tsdbTyp) { case SNAP_DATA_TSDB: return &pWriter->pRanges; From eb8c4d3e8c1d2dad9d23a5d50ad5b85424c86b89 Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Tue, 10 Oct 2023 11:08:12 +0800 Subject: [PATCH 51/61] enh: use hash table for filtering fset in tsdbSnapReaderOpen and tsdbSnapWriterOpen --- source/dnode/vnode/src/inc/tsdb.h | 1 + source/dnode/vnode/src/tsdb/tsdbFS2.c | 72 +++++++++++++++------- source/dnode/vnode/src/tsdb/tsdbSnapshot.c | 13 +++- 3 files changed, 62 insertions(+), 24 deletions(-) diff --git a/source/dnode/vnode/src/inc/tsdb.h b/source/dnode/vnode/src/inc/tsdb.h index ec721308b2..79112babc3 100644 --- a/source/dnode/vnode/src/inc/tsdb.h +++ b/source/dnode/vnode/src/inc/tsdb.h @@ -680,6 +680,7 @@ typedef TARRAY2(STSnapRange *) TSnapRangeArray; // disjoint snap ranges int32_t tSerializeSnapRangeArray(void *buf, int32_t bufLen, TSnapRangeArray *pSnapR); int32_t tDeserializeSnapRangeArray(void *buf, int32_t bufLen, TSnapRangeArray *pSnapR); void tsdbSnapRangeArrayDestroy(TSnapRangeArray **ppSnap); +SHashObj *tsdbGetSnapRangeHash(TSnapRangeArray *pRanges); // snap partition list typedef TARRAY2(SVersionRange) SVerRangeList; diff --git a/source/dnode/vnode/src/tsdb/tsdbFS2.c b/source/dnode/vnode/src/tsdb/tsdbFS2.c index 813123ae5c..7b8c8696ea 100644 --- a/source/dnode/vnode/src/tsdb/tsdbFS2.c +++ b/source/dnode/vnode/src/tsdb/tsdbFS2.c @@ -991,11 +991,12 @@ int32_t tsdbFSDestroyRefSnapshot(TFileSetArray **fsetArr) { return 0; } -int32_t tsdbFSCreateCopyRangedSnapshot(STFileSystem *fs, TSnapRangeArray *pExclude, TFileSetArray **fsetArr, +int32_t tsdbFSCreateCopyRangedSnapshot(STFileSystem *fs, TSnapRangeArray *pRanges, TFileSetArray **fsetArr, TFileOpArray *fopArr) { int32_t code = 0; STFileSet *fset; STFileSet *fset1; + SHashObj *pHash = NULL; fsetArr[0] = taosMemoryMalloc(sizeof(TFileSetArray)); if (fsetArr == NULL) return TSDB_CODE_OUT_OF_MEMORY; @@ -1003,21 +1004,19 @@ int32_t tsdbFSCreateCopyRangedSnapshot(STFileSystem *fs, TSnapRangeArray *pExclu TARRAY2_INIT(fsetArr[0]); int32_t i = 0; + if (pRanges) { + pHash = tsdbGetSnapRangeHash(pRanges); + } taosThreadRwlockRdlock(&fs->tsdb->rwLock); TARRAY2_FOREACH(fs->fSetArr, fset) { int64_t ever = VERSION_MAX; - while (pExclude && i < TARRAY2_SIZE(pExclude)) { - STSnapRange *u = TARRAY2_GET(pExclude, i); - if (fset->fid > u->fid) { - i++; - continue; - } - if (fset->fid == u->fid) { + if (pHash) { + int32_t fid = fset->fid; + STSnapRange *u = taosHashGet(pHash, &fid, sizeof(fid)); + if (u) { ever = u->sver - 1; - i++; } - break; } code = tsdbTFileSetFilteredInitDup(fs->tsdb, fset, ever, &fset1, fopArr); @@ -1033,14 +1032,37 @@ int32_t tsdbFSCreateCopyRangedSnapshot(STFileSystem *fs, TSnapRangeArray *pExclu taosMemoryFree(fsetArr[0]); fsetArr[0] = NULL; } + if (pHash) { + taosHashCleanup(pHash); + pHash = NULL; + } return code; } +SHashObj *tsdbGetSnapRangeHash(TSnapRangeArray *pRanges) { + int32_t capacity = TARRAY2_SIZE(pRanges) * 2; + SHashObj *pHash = taosHashInit(capacity, taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), false, HASH_ENTRY_LOCK); + if (pHash == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + return NULL; + } + + for (int32_t i = 0; i < TARRAY2_SIZE(pRanges); i++) { + STSnapRange *u = TARRAY2_GET(pRanges, i); + int32_t fid = u->fid; + int32_t code = taosHashPut(pHash, &fid, sizeof(fid), u, sizeof(*u)); + ASSERT(code == 0); + tsdbDebug("range diff hash fid:%d, sver:%" PRId64 ", ever:%" PRId64, u->fid, u->sver, u->ever); + } + return pHash; +} + int32_t tsdbFSCreateRefRangedSnapshot(STFileSystem *fs, int64_t sver, int64_t ever, TSnapRangeArray *pRanges, TSnapRangeArray **fsrArr) { int32_t code = -1; STFileSet *fset; STSnapRange *fsr1 = NULL; + SHashObj *pHash = NULL; fsrArr[0] = taosMemoryCalloc(1, sizeof(*fsrArr[0])); if (fsrArr[0] == NULL) { @@ -1048,30 +1070,32 @@ int32_t tsdbFSCreateRefRangedSnapshot(STFileSystem *fs, int64_t sver, int64_t ev goto _out; } - int32_t i = 0; + tsdbInfo("pRanges size:%d", (pRanges == NULL ? 0 : TARRAY2_SIZE(pRanges))); code = 0; + if (pRanges) { + pHash = tsdbGetSnapRangeHash(pRanges); + } taosThreadRwlockRdlock(&fs->tsdb->rwLock); TARRAY2_FOREACH(fs->fSetArr, fset) { int64_t sver1 = sver; int64_t ever1 = ever; - while (pRanges && i < TARRAY2_SIZE(pRanges)) { - STSnapRange *u = TARRAY2_GET(pRanges, i); - if (fset->fid > u->fid) { - i++; - continue; - } - if (fset->fid == u->fid) { + if (pHash) { + int32_t fid = fset->fid; + STSnapRange *u = taosHashGet(pHash, &fid, sizeof(fid)); + if (u) { sver1 = u->sver; - i++; + tsdbDebug("range hash get fid:%d, sver:%" PRId64 ", ever:%" PRId64, u->fid, u->sver, u->ever); } - break; } - if (sver1 > ever1) continue; + if (sver1 > ever1) { + tsdbDebug("skip fid:%d, sver:%" PRId64 ", ever:%" PRId64, fset->fid, sver1, ever1); + continue; + } - tsdbInfo("fsrArr:%p, fid:%d, sver:%" PRId64 ", ever:%" PRId64, fsrArr, fset->fid, sver1, ever1); + tsdbDebug("fsrArr:%p, fid:%d, sver:%" PRId64 ", ever:%" PRId64, fsrArr, fset->fid, sver1, ever1); code = tsdbTSnapRangeInitRef(fs->tsdb, fset, sver1, ever1, &fsr1); if (code) break; @@ -1090,6 +1114,10 @@ int32_t tsdbFSCreateRefRangedSnapshot(STFileSystem *fs, int64_t sver, int64_t ev } _out: + if (pHash) { + taosHashCleanup(pHash); + pHash = NULL; + } return code; } diff --git a/source/dnode/vnode/src/tsdb/tsdbSnapshot.c b/source/dnode/vnode/src/tsdb/tsdbSnapshot.c index 2cf4521eff..d348c318b7 100644 --- a/source/dnode/vnode/src/tsdb/tsdbSnapshot.c +++ b/source/dnode/vnode/src/tsdb/tsdbSnapshot.c @@ -1175,6 +1175,12 @@ static int32_t tVersionRangeCmprFn(SVersionRange* x, SVersionRange* y) { return 0; } +static int32_t tsdbSnapRangeCmprFn(STSnapRange* x, STSnapRange* y) { + if (x->fid < y->fid) return -1; + if (x->fid > y->fid) return 1; + return 0; +} + STsdbSnapPartition* tsdbSnapPartitionCreate() { STsdbSnapPartition* pSP = taosMemoryCalloc(1, sizeof(STsdbSnapPartition)); if (pSP == NULL) { @@ -1478,10 +1484,13 @@ int32_t tsdbSnapPartListToRangeDiff(STsdbSnapPartList* pList, TSnapRangeArray** r->fid = part->fid; r->sver = maxVerValid + 1; r->ever = VERSION_MAX; - tsdbInfo("range diff fid:%" PRId64 ", sver:%" PRId64 ", ever:%" PRId64, part->fid, r->sver, r->ever); - TARRAY2_APPEND(pDiff, r); + tsdbDebug("range diff fid:%" PRId64 ", sver:%" PRId64 ", ever:%" PRId64, part->fid, r->sver, r->ever); + int32_t code = TARRAY2_SORT_INSERT(pDiff, r, tsdbSnapRangeCmprFn); + ASSERT(code == 0); } ppRanges[0] = pDiff; + + tsdbInfo("pDiff size:%d", TARRAY2_SIZE(pDiff)); return 0; _err: From 59e8a2104c226884ea8e62b27794ad3a428de595 Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Tue, 10 Oct 2023 20:19:07 +0800 Subject: [PATCH 52/61] refact: wrap funcs to handle snapshot info for vnodeSnapReaderOpen and vnodeSnapWriterOpen --- source/dnode/vnode/src/vnd/vnodeSnapshot.c | 126 +++++++++++++-------- 1 file changed, 76 insertions(+), 50 deletions(-) diff --git a/source/dnode/vnode/src/vnd/vnodeSnapshot.c b/source/dnode/vnode/src/vnd/vnodeSnapshot.c index cdf3fff47f..87b407efcb 100644 --- a/source/dnode/vnode/src/vnd/vnodeSnapshot.c +++ b/source/dnode/vnode/src/vnd/vnodeSnapshot.c @@ -83,6 +83,42 @@ static TSnapRangeArray **vnodeSnapReaderGetTsdbRanges(SVSnapReader *pReader, int } } +static int32_t vnodeSnapReaderDoSnapInfo(SVSnapReader *pReader, SSnapshotParam *pParam) { + SVnode *pVnode = pReader->pVnode; + int32_t code = -1; + + if (pParam->data) { + SSyncTLV *datHead = (void *)pParam->data; + if (datHead->typ != TDMT_SYNC_PREP_SNAPSHOT_REPLY) { + terrno = TSDB_CODE_INVALID_DATA_FMT; + goto _out; + } + + TSnapRangeArray **ppRanges = NULL; + int32_t offset = 0; + + while (offset + sizeof(SSyncTLV) < datHead->len) { + SSyncTLV *subField = (void *)(datHead->val + offset); + offset += sizeof(SSyncTLV) + subField->len; + void *buf = subField->val; + int32_t bufLen = subField->len; + ppRanges = vnodeSnapReaderGetTsdbRanges(pReader, subField->typ); + if (ppRanges == NULL) { + vError("vgId:%d, unexpected subfield type in data of snapshot param. subtyp:%d", TD_VID(pVnode), subField->typ); + goto _out; + } + if (vnodeExtractSnapInfoDiff(buf, bufLen, ppRanges) < 0) { + vError("vgId:%d, failed to get range diff since %s", TD_VID(pVnode), terrstr()); + goto _out; + } + } + } + + code = 0; +_out: + return code; +} + int32_t vnodeSnapReaderOpen(SVnode *pVnode, SSnapshotParam *pParam, SVSnapReader **ppReader) { int32_t code = 0; int64_t sver = pParam->start; @@ -99,31 +135,8 @@ int32_t vnodeSnapReaderOpen(SVnode *pVnode, SSnapshotParam *pParam, SVSnapReader pReader->ever = ever; // snapshot info - if (pParam->data) { - SSyncTLV *datHead = (void *)pParam->data; - if (datHead->typ != TDMT_SYNC_PREP_SNAPSHOT_REPLY) { - terrno = TSDB_CODE_INVALID_DATA_FMT; - goto _err; - } - - TSnapRangeArray **ppRanges = NULL; - int32_t offset = 0; - - while (offset + sizeof(SSyncTLV) < datHead->len) { - SSyncTLV *subField = (void *)(datHead->val + offset); - offset += sizeof(SSyncTLV) + subField->len; - void *buf = subField->val; - int32_t bufLen = subField->len; - ppRanges = vnodeSnapReaderGetTsdbRanges(pReader, subField->typ); - if (ppRanges == NULL) { - vError("vgId:%d, unexpected subfield type in data of snapshot param. subtyp:%d", TD_VID(pVnode), subField->typ); - goto _err; - } - if (vnodeExtractSnapInfoDiff(buf, bufLen, ppRanges) < 0) { - vError("vgId:%d, failed to get range diff since %s", TD_VID(pVnode), terrstr()); - goto _err; - } - } + if (vnodeSnapReaderDoSnapInfo(pReader, pParam) < 0) { + goto _err; } vInfo("vgId:%d, vnode snapshot reader opened, sver:%" PRId64 " ever:%" PRId64, TD_VID(pVnode), sver, ever); @@ -470,6 +483,42 @@ TSnapRangeArray **vnodeSnapWriterGetTsdbRanges(SVSnapWriter *pWriter, int32_t ts } } +static int32_t vnodeSnapWriterDoSnapInfo(SVSnapWriter *pWriter, SSnapshotParam *pParam) { + SVnode *pVnode = pWriter->pVnode; + int32_t code = -1; + + if (pParam->data) { + SSyncTLV *datHead = (void *)pParam->data; + if (datHead->typ != TDMT_SYNC_PREP_SNAPSHOT_REPLY) { + terrno = TSDB_CODE_INVALID_DATA_FMT; + goto _out; + } + + TSnapRangeArray **ppRanges = NULL; + int32_t offset = 0; + + while (offset + sizeof(SSyncTLV) < datHead->len) { + SSyncTLV *subField = (void *)(datHead->val + offset); + offset += sizeof(SSyncTLV) + subField->len; + void *buf = subField->val; + int32_t bufLen = subField->len; + ppRanges = vnodeSnapWriterGetTsdbRanges(pWriter, subField->typ); + if (ppRanges == NULL) { + vError("vgId:%d, unexpected subfield type in data of snapshot param. subtyp:%d", TD_VID(pVnode), subField->typ); + goto _out; + } + if (vnodeExtractSnapInfoDiff(buf, bufLen, ppRanges) < 0) { + vError("vgId:%d, failed to get range diff since %s", TD_VID(pVnode), terrstr()); + goto _out; + } + } + } + + code = 0; +_out: + return code; +} + int32_t vnodeSnapWriterOpen(SVnode *pVnode, SSnapshotParam *pParam, SVSnapWriter **ppWriter) { int32_t code = 0; SVSnapWriter *pWriter = NULL; @@ -494,31 +543,8 @@ int32_t vnodeSnapWriterOpen(SVnode *pVnode, SSnapshotParam *pParam, SVSnapWriter pWriter->commitID = ++pVnode->state.commitID; // snapshot info - if (pParam->data) { - SSyncTLV *datHead = (void *)pParam->data; - if (datHead->typ != TDMT_SYNC_PREP_SNAPSHOT_REPLY) { - terrno = TSDB_CODE_INVALID_DATA_FMT; - goto _err; - } - - TSnapRangeArray **ppRanges = NULL; - int32_t offset = 0; - - while (offset + sizeof(SSyncTLV) < datHead->len) { - SSyncTLV *subField = (void *)(datHead->val + offset); - offset += sizeof(SSyncTLV) + subField->len; - void *buf = subField->val; - int32_t bufLen = subField->len; - ppRanges = vnodeSnapWriterGetTsdbRanges(pWriter, subField->typ); - if (ppRanges == NULL) { - vError("vgId:%d, unexpected subfield type in data of snapshot param. subtyp:%d", TD_VID(pVnode), subField->typ); - goto _err; - } - if (vnodeExtractSnapInfoDiff(buf, bufLen, ppRanges) < 0) { - vError("vgId:%d, failed to get range diff since %s", TD_VID(pVnode), terrstr()); - goto _err; - } - } + if (vnodeSnapWriterDoSnapInfo(pWriter, pParam) < 0) { + goto _err; } vInfo("vgId:%d, vnode snapshot writer opened, sver:%" PRId64 " ever:%" PRId64 " commit id:%" PRId64, TD_VID(pVnode), From 83013e0fe518827ccbfed805abce27d73d59b3f9 Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Wed, 11 Oct 2023 15:17:32 +0800 Subject: [PATCH 53/61] enh: return error in case of out of memory for tsdbGetSnapRangeHash --- source/dnode/vnode/src/tsdb/tsdbFS2.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/source/dnode/vnode/src/tsdb/tsdbFS2.c b/source/dnode/vnode/src/tsdb/tsdbFS2.c index 7b8c8696ea..4df5a1eeec 100644 --- a/source/dnode/vnode/src/tsdb/tsdbFS2.c +++ b/source/dnode/vnode/src/tsdb/tsdbFS2.c @@ -1000,12 +1000,14 @@ int32_t tsdbFSCreateCopyRangedSnapshot(STFileSystem *fs, TSnapRangeArray *pRange fsetArr[0] = taosMemoryMalloc(sizeof(TFileSetArray)); if (fsetArr == NULL) return TSDB_CODE_OUT_OF_MEMORY; - TARRAY2_INIT(fsetArr[0]); - int32_t i = 0; if (pRanges) { pHash = tsdbGetSnapRangeHash(pRanges); + if (pHash == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _out; + } } taosThreadRwlockRdlock(&fs->tsdb->rwLock); @@ -1027,6 +1029,7 @@ int32_t tsdbFSCreateCopyRangedSnapshot(STFileSystem *fs, TSnapRangeArray *pRange } taosThreadRwlockUnlock(&fs->tsdb->rwLock); +_out: if (code) { TARRAY2_DESTROY(fsetArr[0], tsdbTFileSetClear); taosMemoryFree(fsetArr[0]); @@ -1059,7 +1062,7 @@ SHashObj *tsdbGetSnapRangeHash(TSnapRangeArray *pRanges) { int32_t tsdbFSCreateRefRangedSnapshot(STFileSystem *fs, int64_t sver, int64_t ever, TSnapRangeArray *pRanges, TSnapRangeArray **fsrArr) { - int32_t code = -1; + int32_t code = 0; STFileSet *fset; STSnapRange *fsr1 = NULL; SHashObj *pHash = NULL; @@ -1071,9 +1074,12 @@ int32_t tsdbFSCreateRefRangedSnapshot(STFileSystem *fs, int64_t sver, int64_t ev } tsdbInfo("pRanges size:%d", (pRanges == NULL ? 0 : TARRAY2_SIZE(pRanges))); - code = 0; if (pRanges) { pHash = tsdbGetSnapRangeHash(pRanges); + if (pHash == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _out; + } } taosThreadRwlockRdlock(&fs->tsdb->rwLock); From f2bd43c07e9f1302ccfe3fb29c580ddbda0b196f Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Fri, 13 Oct 2023 20:46:53 +0800 Subject: [PATCH 54/61] fix: set field length properly in tsdbGetSnapDetails --- include/common/tgrant.h | 6 +++--- source/dnode/vnode/src/tsdb/tsdbSnapshot.c | 5 +++-- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/include/common/tgrant.h b/include/common/tgrant.h index edbc74bf18..cfc6c13c48 100644 --- a/include/common/tgrant.h +++ b/include/common/tgrant.h @@ -52,9 +52,9 @@ typedef enum { int32_t grantCheck(EGrantType grant); #ifndef TD_GRANT_OPTIMIZE -int32_t grantAlterActiveCode(const char* old, const char* new, char* out, int8_t type); +int32_t grantAlterActiveCode(const char* old, const char* newer, char* out, int8_t type); #else -int32_t grantAlterActiveCode(int32_t did, const char* old, const char* new, char* out, int8_t type); +int32_t grantAlterActiveCode(int32_t did, const char* old, const char* newer, char* out, int8_t type); #endif #ifndef GRANTS_CFG @@ -114,4 +114,4 @@ int32_t grantAlterActiveCode(int32_t did, const char* old, const char* new, char } #endif -#endif /*_TD_COMMON_GRANT_H_*/ \ No newline at end of file +#endif /*_TD_COMMON_GRANT_H_*/ diff --git a/source/dnode/vnode/src/tsdb/tsdbSnapshot.c b/source/dnode/vnode/src/tsdb/tsdbSnapshot.c index d348c318b7..3b4827a6be 100644 --- a/source/dnode/vnode/src/tsdb/tsdbSnapshot.c +++ b/source/dnode/vnode/src/tsdb/tsdbSnapshot.c @@ -1562,6 +1562,7 @@ int32_t tsdbSnapGetDetails(SVnode* pVnode, SSnapshot* pSnap) { head->len = 0; head->typ = pSnap->type; int32_t offset = sizeof(SSyncTLV); + int32_t tlen = 0; // fill snapshot info for (int32_t j = 0; j < tsdbMaxCnt; ++j) { @@ -1573,7 +1574,6 @@ int32_t tsdbSnapGetDetails(SVnode* pVnode, SSnapshot* pSnap) { subHead->typ = subTyps[j]; ASSERT(subHead->val == (char*)data + offset + sizeof(SSyncTLV)); - int32_t tlen = 0; if ((tlen = tSerializeTsdbSnapPartList(subHead->val, bufLen - offset - sizeof(SSyncTLV), pLists[j])) < 0) { tsdbError("vgId:%d, failed to serialize snap partition list of tsdb %d since %s", TD_VID(pVnode), j, terrstr()); goto _out; @@ -1582,7 +1582,8 @@ int32_t tsdbSnapGetDetails(SVnode* pVnode, SSnapshot* pSnap) { offset += sizeof(SSyncTLV) + tlen; } - head->len = offset; + head->len = offset - sizeof(SSyncTLV); + ASSERT(offset <= bufLen); code = 0; _out: From 5c85525fd07f9cd8b6a805928ea848d80bc6bd18 Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Mon, 16 Oct 2023 11:21:30 +0800 Subject: [PATCH 55/61] enh: add tsdbFSCreateRefSnapshotWithoutLock --- source/dnode/vnode/src/tsdb/tsdbFS2.c | 9 ++++++++- source/dnode/vnode/src/tsdb/tsdbFS2.h | 1 + source/dnode/vnode/src/tsdb/tsdbRead2.c | 2 +- 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/source/dnode/vnode/src/tsdb/tsdbFS2.c b/source/dnode/vnode/src/tsdb/tsdbFS2.c index 4df5a1eeec..93a16b5502 100644 --- a/source/dnode/vnode/src/tsdb/tsdbFS2.c +++ b/source/dnode/vnode/src/tsdb/tsdbFS2.c @@ -961,6 +961,13 @@ int32_t tsdbFSDestroyCopySnapshot(TFileSetArray **fsetArr) { } int32_t tsdbFSCreateRefSnapshot(STFileSystem *fs, TFileSetArray **fsetArr) { + taosThreadRwlockRdlock(&fs->tsdb->rwLock); + int32_t code = tsdbFSCreateRefSnapshotWithoutLock(fs, fsetArr); + taosThreadRwlockUnlock(&fs->tsdb->rwLock); + return code; +} + +int32_t tsdbFSCreateRefSnapshotWithoutLock(STFileSystem *fs, TFileSetArray **fsetArr) { int32_t code = 0; STFileSet *fset, *fset1; @@ -1284,4 +1291,4 @@ int32_t tsdbFSEnableBgTask(STFileSystem *fs) { fs->stop = false; taosThreadMutexUnlock(fs->mutex); return 0; -} \ No newline at end of file +} diff --git a/source/dnode/vnode/src/tsdb/tsdbFS2.h b/source/dnode/vnode/src/tsdb/tsdbFS2.h index 851459df53..31b98e5656 100644 --- a/source/dnode/vnode/src/tsdb/tsdbFS2.h +++ b/source/dnode/vnode/src/tsdb/tsdbFS2.h @@ -52,6 +52,7 @@ int32_t tsdbCloseFS(STFileSystem **fs); int32_t tsdbFSCreateCopySnapshot(STFileSystem *fs, TFileSetArray **fsetArr); int32_t tsdbFSDestroyCopySnapshot(TFileSetArray **fsetArr); int32_t tsdbFSCreateRefSnapshot(STFileSystem *fs, TFileSetArray **fsetArr); +int32_t tsdbFSCreateRefSnapshotWithoutLock(STFileSystem *fs, TFileSetArray **fsetArr); int32_t tsdbFSDestroyRefSnapshot(TFileSetArray **fsetArr); int32_t tsdbFSCreateCopyRangedSnapshot(STFileSystem *fs, TSnapRangeArray *pExclude, TFileSetArray **fsetArr, diff --git a/source/dnode/vnode/src/tsdb/tsdbRead2.c b/source/dnode/vnode/src/tsdb/tsdbRead2.c index 1139524cb3..22d2a2098c 100644 --- a/source/dnode/vnode/src/tsdb/tsdbRead2.c +++ b/source/dnode/vnode/src/tsdb/tsdbRead2.c @@ -4947,7 +4947,7 @@ int32_t tsdbTakeReadSnap2(STsdbReader* pReader, _query_reseek_func_t reseek, STs } // fs - code = tsdbFSCreateRefSnapshot(pTsdb->pFS, &pSnap->pfSetArray); + code = tsdbFSCreateRefSnapshotWithoutLock(pTsdb->pFS, &pSnap->pfSetArray); // unlock taosThreadRwlockUnlock(&pTsdb->rwLock); From c4e9069a664f53d282e165866582c3c43b893524 Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Mon, 16 Oct 2023 16:36:38 +0800 Subject: [PATCH 56/61] fix: set nextProcessedVer properly in tqProcessTaskScanHistory --- source/dnode/vnode/src/tq/tq.c | 5 ++++- source/libs/stream/src/streamExec.c | 4 +++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/source/dnode/vnode/src/tq/tq.c b/source/dnode/vnode/src/tq/tq.c index 98464d082c..a5832d3c66 100644 --- a/source/dnode/vnode/src/tq/tq.c +++ b/source/dnode/vnode/src/tq/tq.c @@ -1200,6 +1200,9 @@ int32_t tqProcessTaskScanHistory(STQ* pTq, SRpcMsg* pMsg) { pStreamTask->status.taskStatus = TASK_STATUS__HALT; nextProcessedVer = walReaderGetCurrentVer(pStreamTask->exec.pWalReader); + if (nextProcessedVer == -1) { + nextProcessedVer = pStreamTask->dataRange.range.maxVer + 1; + } tqDebug("s-task:%s level:%d nextProcessedVer:%" PRId64 ", sched-status:%d is halt by fill-history task:%s", pStreamTask->id.idStr, pStreamTask->info.taskLevel, nextProcessedVer, pStreamTask->status.schedStatus, @@ -1975,4 +1978,4 @@ int32_t tqProcessTaskResetReq(STQ* pTq, SRpcMsg* pMsg) { streamMetaReleaseTask(pMeta, pTask); return TSDB_CODE_SUCCESS; -} \ No newline at end of file +} diff --git a/source/libs/stream/src/streamExec.c b/source/libs/stream/src/streamExec.c index 12b51e6c93..c49c647906 100644 --- a/source/libs/stream/src/streamExec.c +++ b/source/libs/stream/src/streamExec.c @@ -309,7 +309,9 @@ int32_t streamDoTransferStateToStreamTask(SStreamTask* pTask) { pStreamTask->id.idStr); } - ASSERT(pStreamTask->hTaskInfo.id.taskId == pTask->id.taskId && pTask->status.appendTranstateBlock == true); + ASSERT(((pStreamTask->status.taskStatus == TASK_STATUS__STOP) || + (pStreamTask->hTaskInfo.id.taskId == pTask->id.taskId)) && + pTask->status.appendTranstateBlock == true); STimeWindow* pTimeWindow = &pStreamTask->dataRange.window; From e463e0690d23debc2d98fe105b487b51d1fac758 Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Tue, 17 Oct 2023 16:02:22 +0800 Subject: [PATCH 57/61] enh: check existence of files properly in multilevel.py --- tests/system-test/0-others/multilevel.py | 43 ++++++++++++++++++------ 1 file changed, 33 insertions(+), 10 deletions(-) diff --git a/tests/system-test/0-others/multilevel.py b/tests/system-test/0-others/multilevel.py index 66434fff67..def2c3152b 100644 --- a/tests/system-test/0-others/multilevel.py +++ b/tests/system-test/0-others/multilevel.py @@ -17,6 +17,28 @@ from util.cases import * from util.sql import * from util.common import * from util.sqlset import * +import glob + +def scanFiles(pattern): + res = [] + for f in glob.iglob(pattern): + res += [f] + return res + +def checkFiles(pattern, state): + res = scanFiles(pattern) + tdLog.info(res) + num = len(res) + if num: + if state: + tdLog.info("%s: %d files exist. expect: files exist" % (pattern, num)) + else: + tdLog.exit("%s: %d files exist. expect: files not exist." % (pattern, num)) + else: + if state: + tdLog.exit("%s: %d files exist. expect: files exist" % (pattern, num)) + else: + tdLog.info("%s: %d files exist. expect: files not exist." % (pattern, num)) class TDTestCase: def init(self, conn, logSql, replicaVar=1): @@ -41,8 +63,8 @@ class TDTestCase: tdDnodes.start(1) tdLog.info("================= step2") - tdSql.haveFile('/mnt/data1/',1) - tdSql.haveFile('/mnt/data2/',0) + checkFiles(r'/mnt/data1/*/*',1) + checkFiles(r'/mnt/data2/*/*',0) tdDnodes.stop(1) def dir_not_exist(self): tdLog.info("============== dir_not_exist test ===============") @@ -156,9 +178,9 @@ class TDTestCase: tdDnodes.start(1) for i in dir_list: if i == '/mnt/data000': - tdSql.haveFile(i,1) + checkFiles("%s/*/*" % i, 1) else: - tdSql.haveFile(i,0) + checkFiles("%s/*/*" % i, 0) def more_than_16_disks(self): tdLog.info("============== more_than_16_disks test ===============") @@ -223,7 +245,8 @@ class TDTestCase: for i in range(10,30): tdSql.execute(f'insert into tb1 values(now-{i}d,10)') tdSql.execute('flush database dbtest') - tdSql.haveFile('/mnt/data1/',1) + time.sleep(3) + checkFiles('/mnt/data1/vnode/*/tsdb/v*',1) tdDnodes.stop(1) cfg={ '/mnt/data1 0 1' : 'dataDir', @@ -234,14 +257,14 @@ class TDTestCase: tdSql.createDir('/mnt/data3') tdDnodes.deploy(1,cfg) tdDnodes.start(1) - tdSql.haveFile('/mnt/data1/',1) - tdSql.haveFile('/mnt/data2/',0) - tdSql.haveFile('/mnt/data3/',0) + checkFiles('/mnt/data1/vnode/*/tsdb/v*',1) + checkFiles('/mnt/data2/vnode/*/tsdb/v*',0) + checkFiles('/mnt/data3/vnode/*/tsdb/v*',0) tdSql.execute('alter database dbtest keep 10d,365d,3650d') tdSql.execute('trim database dbtest') time.sleep(3) - tdSql.haveFile('/mnt/data1/',1) - tdSql.haveFile('/mnt/data2/',1) + checkFiles('/mnt/data1/vnode/*/tsdb/v*',1) + checkFiles('/mnt/data2/vnode/*/tsdb/v*',1) def run(self): self.basic() From a95f6e686287eea552faccae5d4ef216b29c1864 Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Wed, 11 Oct 2023 20:49:43 +0800 Subject: [PATCH 58/61] feat: support restore dnode with vnodes of replaced disks for primary dirs --- source/dnode/mgmt/mgmt_vnode/inc/vmInt.h | 1 + source/dnode/mgmt/mgmt_vnode/src/vmHandle.c | 37 +++++++++++++-------- source/dnode/mgmt/mgmt_vnode/src/vmInt.c | 32 +++++++++++++----- source/dnode/mgmt/mgmt_vnode/src/vmWorker.c | 10 +++--- source/dnode/vnode/src/vnd/vnodeOpen.c | 7 +++- 5 files changed, 60 insertions(+), 27 deletions(-) diff --git a/source/dnode/mgmt/mgmt_vnode/inc/vmInt.h b/source/dnode/mgmt/mgmt_vnode/inc/vmInt.h index cddf132bce..34f2b5c446 100644 --- a/source/dnode/mgmt/mgmt_vnode/inc/vmInt.h +++ b/source/dnode/mgmt/mgmt_vnode/inc/vmInt.h @@ -56,6 +56,7 @@ typedef struct { int32_t vgVersion; int32_t refCount; int8_t dropped; + int8_t failed; int8_t disable; int32_t diskPrimary; int32_t toVgId; diff --git a/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c b/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c index b4fe824466..3d7f2b9e9e 100644 --- a/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c +++ b/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c @@ -30,9 +30,11 @@ void vmGetVnodeLoads(SVnodeMgmt *pMgmt, SMonVloadInfo *pInfo, bool isReset) { if (ppVnode == NULL || *ppVnode == NULL) continue; SVnodeObj *pVnode = *ppVnode; - SVnodeLoad vload = {0}; - vnodeGetLoad(pVnode->pImpl, &vload); - if (isReset) vnodeResetLoad(pVnode->pImpl, &vload); + SVnodeLoad vload = {.vgId = pVnode->vgId}; + if (!pVnode->failed) { + vnodeGetLoad(pVnode->pImpl, &vload); + if (isReset) vnodeResetLoad(pVnode->pImpl, &vload); + } taosArrayPush(pInfo->pVloads, &vload); pIter = taosHashIterate(pMgmt->hash, pIter); } @@ -52,9 +54,11 @@ void vmGetVnodeLoadsLite(SVnodeMgmt *pMgmt, SMonVloadInfo *pInfo) { if (ppVnode == NULL || *ppVnode == NULL) continue; SVnodeObj *pVnode = *ppVnode; - SVnodeLoadLite vload = {0}; - if (vnodeGetLoadLite(pVnode->pImpl, &vload) == 0) { - taosArrayPush(pInfo->pVloads, &vload); + if (!pVnode->failed) { + SVnodeLoadLite vload = {0}; + if (vnodeGetLoadLite(pVnode->pImpl, &vload) == 0) { + taosArrayPush(pInfo->pVloads, &vload); + } } pIter = taosHashIterate(pMgmt->hash, pIter); } @@ -278,7 +282,7 @@ int32_t vmProcessCreateVnodeReq(SVnodeMgmt *pMgmt, SRpcMsg *pMsg) { vmGenerateWrapperCfg(pMgmt, &req, &wrapperCfg); SVnodeObj *pVnode = vmAcquireVnode(pMgmt, req.vgId); - if (pVnode != NULL) { + if (pVnode != NULL && !pVnode->failed) { dError("vgId:%d, already exist", req.vgId); tFreeSCreateVnodeReq(&req); vmReleaseVnode(pMgmt, pVnode); @@ -287,7 +291,9 @@ int32_t vmProcessCreateVnodeReq(SVnodeMgmt *pMgmt, SRpcMsg *pMsg) { return 0; } - wrapperCfg.diskPrimary = vmAllocPrimaryDisk(pMgmt, vnodeCfg.vgId); + ASSERT(pVnode == NULL || pVnode->failed); + + wrapperCfg.diskPrimary = pVnode ? pVnode->diskPrimary : vmAllocPrimaryDisk(pMgmt, vnodeCfg.vgId); int32_t diskPrimary = wrapperCfg.diskPrimary; snprintf(path, TSDB_FILENAME_LEN, "vnode%svnode%d", TD_DIRSEP, vnodeCfg.vgId); @@ -364,9 +370,10 @@ int32_t vmProcessAlterVnodeTypeReq(SVnodeMgmt *pMgmt, SRpcMsg *pMsg) { TMSG_INFO(pMsg->msgType)); SVnodeObj *pVnode = vmAcquireVnode(pMgmt, req.vgId); - if (pVnode == NULL) { + if (pVnode == NULL || pVnode->failed) { dError("vgId:%d, failed to alter vnode type since %s", req.vgId, terrstr()); terrno = TSDB_CODE_VND_NOT_EXIST; + if (pVnode) vmReleaseVnode(pMgmt, pVnode); return -1; } @@ -481,9 +488,10 @@ int32_t vmProcessCheckLearnCatchupReq(SVnodeMgmt *pMgmt, SRpcMsg *pMsg) { req.vgId, TMSG_INFO(pMsg->msgType)); SVnodeObj *pVnode = vmAcquireVnode(pMgmt, req.vgId); - if (pVnode == NULL) { + if (pVnode == NULL || pVnode->failed) { dError("vgId:%d, failed to alter vnode type since %s", req.vgId, terrstr()); terrno = TSDB_CODE_VND_NOT_EXIST; + if (pVnode) vmReleaseVnode(pMgmt, pVnode); return -1; } @@ -523,9 +531,10 @@ int32_t vmProcessDisableVnodeWriteReq(SVnodeMgmt *pMgmt, SRpcMsg *pMsg) { dInfo("vgId:%d, vnode write disable:%d", req.vgId, req.disable); SVnodeObj *pVnode = vmAcquireVnode(pMgmt, req.vgId); - if (pVnode == NULL) { + if (pVnode == NULL || pVnode->failed) { dError("vgId:%d, failed to disable write since %s", req.vgId, terrstr()); terrno = TSDB_CODE_VND_NOT_EXIST; + if (pVnode) vmReleaseVnode(pMgmt, pVnode); return -1; } @@ -555,9 +564,10 @@ int32_t vmProcessAlterHashRangeReq(SVnodeMgmt *pMgmt, SRpcMsg *pMsg) { dInfo("vgId:%d, start to alter vnode hashrange:[%u, %u], dstVgId:%d", req.srcVgId, req.hashBegin, req.hashEnd, req.dstVgId); pVnode = vmAcquireVnode(pMgmt, srcVgId); - if (pVnode == NULL) { + if (pVnode == NULL || pVnode->failed) { dError("vgId:%d, failed to alter hashrange since %s", srcVgId, terrstr()); terrno = TSDB_CODE_VND_NOT_EXIST; + if (pVnode) vmReleaseVnode(pMgmt, pVnode); return -1; } @@ -669,9 +679,10 @@ int32_t vmProcessAlterVnodeReplicaReq(SVnodeMgmt *pMgmt, SRpcMsg *pMsg) { } SVnodeObj *pVnode = vmAcquireVnode(pMgmt, vgId); - if (pVnode == NULL) { + if (pVnode == NULL || pVnode->failed) { dError("vgId:%d, failed to alter replica since %s", vgId, terrstr()); terrno = TSDB_CODE_VND_NOT_EXIST; + if (pVnode) vmReleaseVnode(pMgmt, pVnode); return -1; } diff --git a/source/dnode/mgmt/mgmt_vnode/src/vmInt.c b/source/dnode/mgmt/mgmt_vnode/src/vmInt.c index 963bfa3197..973c45eda7 100644 --- a/source/dnode/mgmt/mgmt_vnode/src/vmInt.c +++ b/source/dnode/mgmt/mgmt_vnode/src/vmInt.c @@ -112,6 +112,7 @@ int32_t vmOpenVnode(SVnodeMgmt *pMgmt, SWrapperCfg *pCfg, SVnode *pImpl) { pVnode->diskPrimary = pCfg->diskPrimary; pVnode->refCount = 0; pVnode->dropped = 0; + pVnode->failed = 0; pVnode->path = taosStrdup(pCfg->path); pVnode->pImpl = pImpl; @@ -121,11 +122,15 @@ int32_t vmOpenVnode(SVnodeMgmt *pMgmt, SWrapperCfg *pCfg, SVnode *pImpl) { return -1; } - if (vmAllocQueue(pMgmt, pVnode) != 0) { - terrno = TSDB_CODE_OUT_OF_MEMORY; - taosMemoryFree(pVnode->path); - taosMemoryFree(pVnode); - return -1; + if (pImpl) { + if (vmAllocQueue(pMgmt, pVnode) != 0) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + taosMemoryFree(pVnode->path); + taosMemoryFree(pVnode); + return -1; + } + } else { + pVnode->failed = 1; } taosThreadRwlockWrlock(&pMgmt->lock); @@ -271,8 +276,10 @@ static void *vmOpenVnodeInThread(void *param) { if (pImpl == NULL) { dError("vgId:%d, failed to open vnode by thread:%d since %s", pCfg->vgId, pThread->threadIndex, terrstr()); - pThread->failed++; - continue; + if (terrno != TSDB_CODE_VND_NOT_EXIST) { + pThread->failed++; + continue; + } } if (vmOpenVnode(pMgmt, pCfg, pImpl) != 0) { @@ -379,6 +386,7 @@ static void *vmCloseVnodeInThread(void *param) { for (int32_t v = 0; v < pThread->vnodeNum; ++v) { SVnodeObj *pVnode = pThread->ppVnodes[v]; + if (pVnode->failed) continue; char stepDesc[TSDB_STEP_DESC_LEN] = {0}; snprintf(stepDesc, TSDB_STEP_DESC_LEN, "vgId:%d, start to close, %d of %d have been closed", pVnode->vgId, @@ -473,7 +481,9 @@ static void vmCheckSyncTimeout(SVnodeMgmt *pMgmt) { if (ppVnodes != NULL) { for (int32_t i = 0; i < numOfVnodes; ++i) { SVnodeObj *pVnode = ppVnodes[i]; - vnodeSyncCheckTimeout(pVnode->pImpl); + if (!pVnode->failed) { + vnodeSyncCheckTimeout(pVnode->pImpl); + } vmReleaseVnode(pMgmt, pVnode); } taosMemoryFree(ppVnodes); @@ -605,6 +615,12 @@ static void *vmRestoreVnodeInThread(void *param) { for (int32_t v = 0; v < pThread->vnodeNum; ++v) { SVnodeObj *pVnode = pThread->ppVnodes[v]; + if (pVnode->failed) { + dError("vgId:%d, skip restoring vnode in failure mode.", pVnode->vgId); + continue; + } + + ASSERT(pVnode->pImpl); char stepDesc[TSDB_STEP_DESC_LEN] = {0}; snprintf(stepDesc, TSDB_STEP_DESC_LEN, "vgId:%d, start to restore, %d of %d have been restored", pVnode->vgId, diff --git a/source/dnode/mgmt/mgmt_vnode/src/vmWorker.c b/source/dnode/mgmt/mgmt_vnode/src/vmWorker.c index 696107ca90..4b18ec4fb0 100644 --- a/source/dnode/mgmt/mgmt_vnode/src/vmWorker.c +++ b/source/dnode/mgmt/mgmt_vnode/src/vmWorker.c @@ -187,9 +187,9 @@ static int32_t vmPutMsgToQueue(SVnodeMgmt *pMgmt, SRpcMsg *pMsg, EQueueType qtyp pHead->vgId = ntohl(pHead->vgId); SVnodeObj *pVnode = vmAcquireVnode(pMgmt, pHead->vgId); - if (pVnode == NULL) { - dGWarn("vgId:%d, msg:%p failed to put into vnode queue since %s, type:%s qtype:%d contLen:%d", pHead->vgId, pMsg, - terrstr(), TMSG_INFO(pMsg->msgType), qtype, pHead->contLen); + if (pVnode == NULL || pVnode->failed) { + dGDebug("vgId:%d, msg:%p failed to put into vnode queue since %s, type:%s qtype:%d contLen:%d", pHead->vgId, pMsg, + terrstr(), TMSG_INFO(pMsg->msgType), qtype, pHead->contLen); terrno = (terrno != 0) ? terrno : -1; return terrno; } @@ -316,7 +316,7 @@ int32_t vmPutRpcMsgToQueue(SVnodeMgmt *pMgmt, EQueueType qtype, SRpcMsg *pRpc) { int32_t vmGetQueueSize(SVnodeMgmt *pMgmt, int32_t vgId, EQueueType qtype) { int32_t size = -1; SVnodeObj *pVnode = vmAcquireVnode(pMgmt, vgId); - if (pVnode != NULL) { + if (pVnode != NULL && !pVnode->failed) { switch (qtype) { case WRITE_QUEUE: size = taosQueueItemSize(pVnode->pWriteW.queue); @@ -339,8 +339,8 @@ int32_t vmGetQueueSize(SVnodeMgmt *pMgmt, int32_t vgId, EQueueType qtype) { default: break; } - vmReleaseVnode(pMgmt, pVnode); } + if (pVnode) vmReleaseVnode(pMgmt, pVnode); if (size < 0) { dTrace("vgId:%d, can't get size from queue since %s, qtype:%d", vgId, terrstr(), qtype); size = 0; diff --git a/source/dnode/vnode/src/vnd/vnodeOpen.c b/source/dnode/vnode/src/vnd/vnodeOpen.c index 28d1e171d8..f9499cda6d 100644 --- a/source/dnode/vnode/src/vnd/vnodeOpen.c +++ b/source/dnode/vnode/src/vnd/vnodeOpen.c @@ -329,6 +329,7 @@ SVnode *vnodeOpen(const char *path, int32_t diskPrimary, STfs *pTfs, SMsgCb msgC char dir[TSDB_FILENAME_LEN] = {0}; char tdir[TSDB_FILENAME_LEN * 2] = {0}; int32_t ret = 0; + terrno = TSDB_CODE_SUCCESS; if (vnodeCheckDisk(diskPrimary, pTfs)) { vError("failed to open vnode from %s since %s. diskPrimary:%d", path, terrstr(), diskPrimary); @@ -342,6 +343,7 @@ SVnode *vnodeOpen(const char *path, int32_t diskPrimary, STfs *pTfs, SMsgCb msgC ret = vnodeLoadInfo(dir, &info); if (ret < 0) { vError("failed to open vnode from %s since %s", path, tstrerror(terrno)); + terrno = TSDB_CODE_VND_NOT_EXIST; return NULL; } @@ -514,7 +516,10 @@ void vnodeClose(SVnode *pVnode) { } // start the sync timer after the queue is ready -int32_t vnodeStart(SVnode *pVnode) { return vnodeSyncStart(pVnode); } +int32_t vnodeStart(SVnode *pVnode) { + ASSERT(pVnode); + return vnodeSyncStart(pVnode); +} int32_t vnodeIsCatchUp(SVnode *pVnode) { return syncIsCatchUp(pVnode->sync); } From a515f8a94f576c2b1ec7f85df2c39e137417ba43 Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Wed, 18 Oct 2023 14:40:00 +0800 Subject: [PATCH 59/61] feat: use vnode config info if existing during vnodeCreate --- source/dnode/vnode/src/vnd/vnodeOpen.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/source/dnode/vnode/src/vnd/vnodeOpen.c b/source/dnode/vnode/src/vnd/vnodeOpen.c index f9499cda6d..7ba542cbf1 100644 --- a/source/dnode/vnode/src/vnd/vnodeOpen.c +++ b/source/dnode/vnode/src/vnd/vnodeOpen.c @@ -64,6 +64,13 @@ int32_t vnodeCreate(const char *path, SVnodeCfg *pCfg, int32_t diskPrimary, STfs info.state.applied = -1; info.state.commitID = 0; + SVnodeInfo oldInfo = {0}; + oldInfo.config = vnodeCfgDefault; + if (vnodeLoadInfo(dir, &oldInfo) == 0) { + vWarn("vgId:%d, vnode config info already exists at %s.", oldInfo.config.vgId, dir); + return (oldInfo.config.dbId == info.config.dbId) ? 0 : -1; + } + vInfo("vgId:%d, save config while create", info.config.vgId); if (vnodeSaveInfo(dir, &info) < 0 || vnodeCommitInfo(dir) < 0) { vError("vgId:%d, failed to save vnode config since %s", pCfg ? pCfg->vgId : 0, tstrerror(terrno)); From a2e0480839c932ae45d12509c4519575c224255f Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Wed, 18 Oct 2023 16:09:51 +0800 Subject: [PATCH 60/61] refact: rename as SYNC_FSM_STATE_COMPLETE --- include/libs/sync/sync.h | 2 +- source/dnode/vnode/src/vnd/vnodeSync.c | 2 +- source/libs/sync/src/syncAppendEntries.c | 2 +- source/libs/sync/src/syncMain.c | 2 +- source/libs/sync/src/syncPipeline.c | 4 ++-- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/include/libs/sync/sync.h b/include/libs/sync/sync.h index 71c56e8c86..ad525a2aa7 100644 --- a/include/libs/sync/sync.h +++ b/include/libs/sync/sync.h @@ -87,7 +87,7 @@ typedef enum { } ESyncRole; typedef enum { - SYNC_FSM_STATE_NORMAL = 0, + SYNC_FSM_STATE_COMPLETE = 0, SYNC_FSM_STATE_INCOMPLETE, } ESyncFsmState; diff --git a/source/dnode/vnode/src/vnd/vnodeSync.c b/source/dnode/vnode/src/vnd/vnodeSync.c index 86fb7cb55b..6c03ed68e9 100644 --- a/source/dnode/vnode/src/vnd/vnodeSync.c +++ b/source/dnode/vnode/src/vnd/vnodeSync.c @@ -791,7 +791,7 @@ int32_t vnodeGetSnapshot(SVnode *pVnode, SSnapshot *pSnap) { pSnap->lastApplyIndex = pVnode->state.committed; pSnap->lastApplyTerm = pVnode->state.commitTerm; pSnap->lastConfigIndex = -1; - pSnap->state = SYNC_FSM_STATE_NORMAL; + pSnap->state = SYNC_FSM_STATE_COMPLETE; if (tsdbSnapGetFsState(pVnode) != TSDB_FS_STATE_NORMAL) { pSnap->state = SYNC_FSM_STATE_INCOMPLETE; diff --git a/source/libs/sync/src/syncAppendEntries.c b/source/libs/sync/src/syncAppendEntries.c index 647b86bae9..51a0679889 100644 --- a/source/libs/sync/src/syncAppendEntries.c +++ b/source/libs/sync/src/syncAppendEntries.c @@ -155,7 +155,7 @@ int32_t syncNodeOnAppendEntries(SSyncNode* ths, const SRpcMsg* pRpcMsg) { pMsg->vgId, pMsg->prevLogIndex + 1, pMsg->term, pMsg->prevLogIndex, pMsg->prevLogTerm, pMsg->commitIndex, pEntry->term); - if (ths->fsmState != SYNC_FSM_STATE_NORMAL) { + if (ths->fsmState == SYNC_FSM_STATE_INCOMPLETE) { pReply->fsmState = ths->fsmState; sWarn("vgId:%d, unable to accept, due to incomplete fsm state. index:%" PRId64, ths->vgId, pEntry->index); syncEntryDestroy(pEntry); diff --git a/source/libs/sync/src/syncMain.c b/source/libs/sync/src/syncMain.c index eaaccecf90..f9dc10da02 100644 --- a/source/libs/sync/src/syncMain.c +++ b/source/libs/sync/src/syncMain.c @@ -1010,7 +1010,7 @@ SSyncNode* syncNodeOpen(SSyncInfo* pSyncInfo, int32_t vnodeVersion) { sNTrace(pSyncNode, "reset commit index by snapshot"); } pSyncNode->fsmState = snapshot.state; - if (pSyncNode->fsmState != SYNC_FSM_STATE_NORMAL) { + if (pSyncNode->fsmState == SYNC_FSM_STATE_INCOMPLETE) { sError("vgId:%d, fsm state is incomplete.", pSyncNode->vgId); if (pSyncNode->replicaNum == 1) { goto _error; diff --git a/source/libs/sync/src/syncPipeline.c b/source/libs/sync/src/syncPipeline.c index a38d67a388..a7ee37cc3b 100644 --- a/source/libs/sync/src/syncPipeline.c +++ b/source/libs/sync/src/syncPipeline.c @@ -840,8 +840,8 @@ int32_t syncLogReplRecover(SSyncLogReplMgr* pMgr, SSyncNode* pNode, SyncAppendEn } if (pMsg->fsmState == SYNC_FSM_STATE_INCOMPLETE || (!pMsg->success && pMsg->matchIndex >= pMsg->lastSendIndex)) { - char* msg1 = "rollback match index failure"; - char* msg2 = "incomplete fsm state"; + char* msg1 = " rollback match index failure"; + char* msg2 = " incomplete fsm state"; sInfo("vgId:%d, snapshot replication to dnode:%d. reason:%s, match index:%" PRId64 ", last sent:%" PRId64, pNode->vgId, DID(&destId), (pMsg->fsmState == SYNC_FSM_STATE_INCOMPLETE ? msg2 : msg1), pMsg->matchIndex, pMsg->lastSendIndex); From 959f8105ee7a641dc9ede2d9487b73f45b5614a0 Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Thu, 19 Oct 2023 18:55:12 +0800 Subject: [PATCH 61/61] enh: require command to trigger repairing vnodes on replaced disks --- source/dnode/mgmt/mgmt_vnode/src/vmHandle.c | 8 ++++---- source/dnode/mgmt/mgmt_vnode/src/vmInt.c | 4 ++-- source/dnode/vnode/inc/vnode.h | 2 +- source/dnode/vnode/src/inc/vnodeInt.h | 4 ++-- source/dnode/vnode/src/sma/smaOpen.c | 12 ++++++------ source/dnode/vnode/src/tsdb/tsdbOpen.c | 7 ++++++- source/dnode/vnode/src/vnd/vnodeOpen.c | 8 ++++---- 7 files changed, 25 insertions(+), 20 deletions(-) diff --git a/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c b/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c index 3d7f2b9e9e..cc542f51ce 100644 --- a/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c +++ b/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c @@ -305,7 +305,7 @@ int32_t vmProcessCreateVnodeReq(SVnodeMgmt *pMgmt, SRpcMsg *pMsg) { goto _OVER; } - SVnode *pImpl = vnodeOpen(path, diskPrimary, pMgmt->pTfs, pMgmt->msgCb); + SVnode *pImpl = vnodeOpen(path, diskPrimary, pMgmt->pTfs, pMgmt->msgCb, true); if (pImpl == NULL) { dError("vgId:%d, failed to open vnode since %s", req.vgId, terrstr()); code = terrno; @@ -452,7 +452,7 @@ int32_t vmProcessAlterVnodeTypeReq(SVnodeMgmt *pMgmt, SRpcMsg *pMsg) { } dInfo("vgId:%d, begin to open vnode", vgId); - SVnode *pImpl = vnodeOpen(path, diskPrimary, pMgmt->pTfs, pMgmt->msgCb); + SVnode *pImpl = vnodeOpen(path, diskPrimary, pMgmt->pTfs, pMgmt->msgCb, false); if (pImpl == NULL) { dError("vgId:%d, failed to open vnode at %s since %s", vgId, path, terrstr()); return -1; @@ -602,7 +602,7 @@ int32_t vmProcessAlterHashRangeReq(SVnodeMgmt *pMgmt, SRpcMsg *pMsg) { } dInfo("vgId:%d, open vnode", dstVgId); - SVnode *pImpl = vnodeOpen(dstPath, diskPrimary, pMgmt->pTfs, pMgmt->msgCb); + SVnode *pImpl = vnodeOpen(dstPath, diskPrimary, pMgmt->pTfs, pMgmt->msgCb, false); if (pImpl == NULL) { dError("vgId:%d, failed to open vnode at %s since %s", dstVgId, dstPath, terrstr()); @@ -707,7 +707,7 @@ int32_t vmProcessAlterVnodeReplicaReq(SVnodeMgmt *pMgmt, SRpcMsg *pMsg) { } dInfo("vgId:%d, begin to open vnode", vgId); - SVnode *pImpl = vnodeOpen(path, diskPrimary, pMgmt->pTfs, pMgmt->msgCb); + SVnode *pImpl = vnodeOpen(path, diskPrimary, pMgmt->pTfs, pMgmt->msgCb, false); if (pImpl == NULL) { dError("vgId:%d, failed to open vnode at %s since %s", vgId, path, terrstr()); return -1; diff --git a/source/dnode/mgmt/mgmt_vnode/src/vmInt.c b/source/dnode/mgmt/mgmt_vnode/src/vmInt.c index 973c45eda7..d2093ff77c 100644 --- a/source/dnode/mgmt/mgmt_vnode/src/vmInt.c +++ b/source/dnode/mgmt/mgmt_vnode/src/vmInt.c @@ -272,11 +272,11 @@ static void *vmOpenVnodeInThread(void *param) { int32_t diskPrimary = pCfg->diskPrimary; snprintf(path, TSDB_FILENAME_LEN, "vnode%svnode%d", TD_DIRSEP, pCfg->vgId); - SVnode *pImpl = vnodeOpen(path, diskPrimary, pMgmt->pTfs, pMgmt->msgCb); + SVnode *pImpl = vnodeOpen(path, diskPrimary, pMgmt->pTfs, pMgmt->msgCb, false); if (pImpl == NULL) { dError("vgId:%d, failed to open vnode by thread:%d since %s", pCfg->vgId, pThread->threadIndex, terrstr()); - if (terrno != TSDB_CODE_VND_NOT_EXIST) { + if (terrno != TSDB_CODE_NEED_RETRY) { pThread->failed++; continue; } diff --git a/source/dnode/vnode/inc/vnode.h b/source/dnode/vnode/inc/vnode.h index a120ecf9db..6a0c991be4 100644 --- a/source/dnode/vnode/inc/vnode.h +++ b/source/dnode/vnode/inc/vnode.h @@ -58,7 +58,7 @@ int32_t vnodeAlterHashRange(const char *srcPath, const char *dstPath, SAlterVnod int32_t vnodeRestoreVgroupId(const char *srcPath, const char *dstPath, int32_t srcVgId, int32_t dstVgId, int32_t diskPrimary, STfs *pTfs); void vnodeDestroy(int32_t vgId, const char *path, STfs *pTfs); -SVnode *vnodeOpen(const char *path, int32_t diskPrimary, STfs *pTfs, SMsgCb msgCb); +SVnode *vnodeOpen(const char *path, int32_t diskPrimary, STfs *pTfs, SMsgCb msgCb, bool force); void vnodePreClose(SVnode *pVnode); void vnodePostClose(SVnode *pVnode); void vnodeSyncCheckTimeout(SVnode *pVnode); diff --git a/source/dnode/vnode/src/inc/vnodeInt.h b/source/dnode/vnode/src/inc/vnodeInt.h index 68334b3b63..12e273c32d 100644 --- a/source/dnode/vnode/src/inc/vnodeInt.h +++ b/source/dnode/vnode/src/inc/vnodeInt.h @@ -202,7 +202,7 @@ typedef struct SMetaInfo { int32_t metaGetInfo(SMeta* pMeta, int64_t uid, SMetaInfo* pInfo, SMetaReader* pReader); // tsdb -int tsdbOpen(SVnode* pVnode, STsdb** ppTsdb, const char* dir, STsdbKeepCfg* pKeepCfg, int8_t rollback); +int tsdbOpen(SVnode* pVnode, STsdb** ppTsdb, const char* dir, STsdbKeepCfg* pKeepCfg, int8_t rollback, bool force); int tsdbClose(STsdb** pTsdb); int32_t tsdbBegin(STsdb* pTsdb); // int32_t tsdbPrepareCommit(STsdb* pTsdb); @@ -267,7 +267,7 @@ int32_t tqProcessTaskScanHistoryFinishRsp(STQ* pTq, SRpcMsg* pMsg); // sma int32_t smaInit(); void smaCleanUp(); -int32_t smaOpen(SVnode* pVnode, int8_t rollback); +int32_t smaOpen(SVnode* pVnode, int8_t rollback, bool force); int32_t smaClose(SSma* pSma); int32_t smaBegin(SSma* pSma); int32_t smaPrepareAsyncCommit(SSma* pSma); diff --git a/source/dnode/vnode/src/sma/smaOpen.c b/source/dnode/vnode/src/sma/smaOpen.c index 09929d138e..49f25c0b0a 100644 --- a/source/dnode/vnode/src/sma/smaOpen.c +++ b/source/dnode/vnode/src/sma/smaOpen.c @@ -30,7 +30,7 @@ static int32_t rsmaRestore(SSma *pSma); pKeepCfg->keepTimeOffset = 0; \ } while (0) -#define SMA_OPEN_RSMA_IMPL(v, l) \ +#define SMA_OPEN_RSMA_IMPL(v, l, force) \ do { \ SRetention *r = (SRetention *)VND_RETENTIONS(v) + l; \ if (!RETENTION_VALID(r)) { \ @@ -42,7 +42,7 @@ static int32_t rsmaRestore(SSma *pSma); } \ code = smaSetKeepCfg(v, &keepCfg, pCfg, TSDB_TYPE_RSMA_L##l); \ TSDB_CHECK_CODE(code, lino, _exit); \ - if (tsdbOpen(v, &SMA_RSMA_TSDB##l(pSma), VNODE_RSMA##l##_DIR, &keepCfg, rollback) < 0) { \ + if (tsdbOpen(v, &SMA_RSMA_TSDB##l(pSma), VNODE_RSMA##l##_DIR, &keepCfg, rollback, force) < 0) { \ code = terrno; \ TSDB_CHECK_CODE(code, lino, _exit); \ } \ @@ -118,7 +118,7 @@ int smaSetKeepCfg(SVnode *pVnode, STsdbKeepCfg *pKeepCfg, STsdbCfg *pCfg, int ty return terrno; } -int32_t smaOpen(SVnode *pVnode, int8_t rollback) { +int32_t smaOpen(SVnode *pVnode, int8_t rollback, bool force) { int32_t code = 0; int32_t lino = 0; STsdbCfg *pCfg = &pVnode->config.tsdbCfg; @@ -139,11 +139,11 @@ int32_t smaOpen(SVnode *pVnode, int8_t rollback) { STsdbKeepCfg keepCfg = {0}; for (int32_t i = 0; i < TSDB_RETENTION_MAX; ++i) { if (i == TSDB_RETENTION_L0) { - SMA_OPEN_RSMA_IMPL(pVnode, 0); + SMA_OPEN_RSMA_IMPL(pVnode, 0, force); } else if (i == TSDB_RETENTION_L1) { - SMA_OPEN_RSMA_IMPL(pVnode, 1); + SMA_OPEN_RSMA_IMPL(pVnode, 1, force); } else if (i == TSDB_RETENTION_L2) { - SMA_OPEN_RSMA_IMPL(pVnode, 2); + SMA_OPEN_RSMA_IMPL(pVnode, 2, force); } } diff --git a/source/dnode/vnode/src/tsdb/tsdbOpen.c b/source/dnode/vnode/src/tsdb/tsdbOpen.c index b060edbd91..6dd66c7a40 100644 --- a/source/dnode/vnode/src/tsdb/tsdbOpen.c +++ b/source/dnode/vnode/src/tsdb/tsdbOpen.c @@ -35,7 +35,7 @@ int32_t tsdbSetKeepCfg(STsdb *pTsdb, STsdbCfg *pCfg) { * @param dir * @return int */ -int tsdbOpen(SVnode *pVnode, STsdb **ppTsdb, const char *dir, STsdbKeepCfg *pKeepCfg, int8_t rollback) { +int tsdbOpen(SVnode *pVnode, STsdb **ppTsdb, const char *dir, STsdbKeepCfg *pKeepCfg, int8_t rollback, bool force) { STsdb *pTsdb = NULL; int slen = 0; @@ -72,6 +72,11 @@ int tsdbOpen(SVnode *pVnode, STsdb **ppTsdb, const char *dir, STsdbKeepCfg *pKee goto _err; } + if (pTsdb->pFS->fsstate == TSDB_FS_STATE_INCOMPLETE && force == false) { + terrno = TSDB_CODE_NEED_RETRY; + goto _err; + } + if (tsdbOpenCache(pTsdb) < 0) { goto _err; } diff --git a/source/dnode/vnode/src/vnd/vnodeOpen.c b/source/dnode/vnode/src/vnd/vnodeOpen.c index 7ba542cbf1..3bdecee79b 100644 --- a/source/dnode/vnode/src/vnd/vnodeOpen.c +++ b/source/dnode/vnode/src/vnd/vnodeOpen.c @@ -330,7 +330,7 @@ static int32_t vnodeCheckDisk(int32_t diskPrimary, STfs *pTfs) { return 0; } -SVnode *vnodeOpen(const char *path, int32_t diskPrimary, STfs *pTfs, SMsgCb msgCb) { +SVnode *vnodeOpen(const char *path, int32_t diskPrimary, STfs *pTfs, SMsgCb msgCb, bool force) { SVnode *pVnode = NULL; SVnodeInfo info = {0}; char dir[TSDB_FILENAME_LEN] = {0}; @@ -350,7 +350,7 @@ SVnode *vnodeOpen(const char *path, int32_t diskPrimary, STfs *pTfs, SMsgCb msgC ret = vnodeLoadInfo(dir, &info); if (ret < 0) { vError("failed to open vnode from %s since %s", path, tstrerror(terrno)); - terrno = TSDB_CODE_VND_NOT_EXIST; + terrno = TSDB_CODE_NEED_RETRY; return NULL; } @@ -419,7 +419,7 @@ SVnode *vnodeOpen(const char *path, int32_t diskPrimary, STfs *pTfs, SMsgCb msgC } // open tsdb - if (!VND_IS_RSMA(pVnode) && tsdbOpen(pVnode, &VND_TSDB(pVnode), VNODE_TSDB_DIR, NULL, rollback) < 0) { + if (!VND_IS_RSMA(pVnode) && tsdbOpen(pVnode, &VND_TSDB(pVnode), VNODE_TSDB_DIR, NULL, rollback, force) < 0) { vError("vgId:%d, failed to open vnode tsdb since %s", TD_VID(pVnode), tstrerror(terrno)); goto _err; } @@ -453,7 +453,7 @@ SVnode *vnodeOpen(const char *path, int32_t diskPrimary, STfs *pTfs, SMsgCb msgC } // open sma - if (smaOpen(pVnode, rollback)) { + if (smaOpen(pVnode, rollback, force)) { vError("vgId:%d, failed to open vnode sma since %s", TD_VID(pVnode), tstrerror(terrno)); goto _err; }