From ca7f490e6d6fbf07aa400c1c37d292180985924c Mon Sep 17 00:00:00 2001 From: xiao-77 Date: Tue, 15 Oct 2024 16:53:15 +0800 Subject: [PATCH 1/5] fix invaild snapshotVer while repair wal meta file --- include/libs/wal/wal.h | 1 + source/dnode/mnode/impl/src/mndMain.c | 1 + source/dnode/vnode/src/vnd/vnodeCfg.c | 1 + source/dnode/vnode/src/vnd/vnodeCommit.c | 1 + source/libs/wal/src/walMeta.c | 6 ++++++ 5 files changed, 10 insertions(+) diff --git a/include/libs/wal/wal.h b/include/libs/wal/wal.h index 74ab0bf484..f95b3f20ca 100644 --- a/include/libs/wal/wal.h +++ b/include/libs/wal/wal.h @@ -50,6 +50,7 @@ typedef struct { int32_t rollPeriod; // secs int64_t retentionSize; int64_t segSize; + int64_t committed; EWalType level; // wal level int32_t encryptAlgorithm; char encryptKey[ENCRYPT_KEY_LEN + 1]; diff --git a/source/dnode/mnode/impl/src/mndMain.c b/source/dnode/mnode/impl/src/mndMain.c index bee971b966..685ad2b7a5 100644 --- a/source/dnode/mnode/impl/src/mndMain.c +++ b/source/dnode/mnode/impl/src/mndMain.c @@ -515,6 +515,7 @@ static int32_t mndInitWal(SMnode *pMnode) { .fsyncPeriod = 0, .rollPeriod = -1, .segSize = -1, + .committed = -1, .retentionPeriod = 0, .retentionSize = 0, .level = TAOS_WAL_FSYNC, diff --git a/source/dnode/vnode/src/vnd/vnodeCfg.c b/source/dnode/vnode/src/vnd/vnodeCfg.c index d3acea4766..7c789e84ae 100644 --- a/source/dnode/vnode/src/vnd/vnodeCfg.c +++ b/source/dnode/vnode/src/vnd/vnodeCfg.c @@ -45,6 +45,7 @@ const SVnodeCfg vnodeCfgDefault = {.vgId = -1, .retentionPeriod = -1, .rollPeriod = 0, .segSize = 0, + .committed = 0, .retentionSize = -1, .level = TAOS_WAL_WRITE, .clearFiles = 0, diff --git a/source/dnode/vnode/src/vnd/vnodeCommit.c b/source/dnode/vnode/src/vnd/vnodeCommit.c index 4a4d305f25..dae2b3a5ec 100644 --- a/source/dnode/vnode/src/vnd/vnodeCommit.c +++ b/source/dnode/vnode/src/vnd/vnodeCommit.c @@ -257,6 +257,7 @@ int vnodeLoadInfo(const char *dir, SVnodeInfo *pInfo) { code = vnodeDecodeInfo(pData, pInfo); TSDB_CHECK_CODE(code, lino, _exit); + pInfo->config.walCfg.committed = pInfo->state.committed; _exit: if (code) { if (pFile) { diff --git a/source/libs/wal/src/walMeta.c b/source/libs/wal/src/walMeta.c index 9ade5e5638..8649581d5d 100644 --- a/source/libs/wal/src/walMeta.c +++ b/source/libs/wal/src/walMeta.c @@ -282,6 +282,12 @@ static int32_t walRebuildFileInfoSet(SArray* metaLogList, SArray* actualLogList) } static void walAlignVersions(SWal* pWal) { + if (pWal->cfg.committed > 0 && pWal->cfg.committed != pWal->vers.snapshotVer) { + wWarn("vgId:%d, snapshotVer:%" PRId64 " in wal is different from commited:%" PRId64 + ". in vnode/mnode. align with it.", + pWal->cfg.vgId, pWal->vers.snapshotVer, pWal->cfg.committed); + pWal->vers.snapshotVer = pWal->cfg.committed; + } if (pWal->vers.firstVer > pWal->vers.snapshotVer + 1) { wWarn("vgId:%d, firstVer:%" PRId64 " is larger than snapshotVer:%" PRId64 " + 1. align with it.", pWal->cfg.vgId, pWal->vers.firstVer, pWal->vers.snapshotVer); From 47d39c4ab8d5295750992b7373f57bb4943ec4fd Mon Sep 17 00:00:00 2001 From: xiao-77 Date: Tue, 15 Oct 2024 17:23:28 +0800 Subject: [PATCH 2/5] fix wal test in ci --- source/libs/wal/test/walMetaTest.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/source/libs/wal/test/walMetaTest.cpp b/source/libs/wal/test/walMetaTest.cpp index 8bd4de0a89..a0285f1363 100644 --- a/source/libs/wal/test/walMetaTest.cpp +++ b/source/libs/wal/test/walMetaTest.cpp @@ -127,6 +127,7 @@ class WalRetentionEnv : public ::testing::Test { SWalCfg cfg; cfg.rollPeriod = -1; cfg.segSize = -1; + cfg.committed =-1; cfg.retentionPeriod = -1; cfg.retentionSize = 0; cfg.rollPeriod = 0; From 3310e8145620ff06332d262ad2411ada14bb36ab Mon Sep 17 00:00:00 2001 From: xiao-77 Date: Wed, 16 Oct 2024 10:15:12 +0800 Subject: [PATCH 3/5] make sure mnode can be started --- source/libs/wal/src/walMeta.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/source/libs/wal/src/walMeta.c b/source/libs/wal/src/walMeta.c index 8649581d5d..042024284c 100644 --- a/source/libs/wal/src/walMeta.c +++ b/source/libs/wal/src/walMeta.c @@ -288,6 +288,11 @@ static void walAlignVersions(SWal* pWal) { pWal->cfg.vgId, pWal->vers.snapshotVer, pWal->cfg.committed); pWal->vers.snapshotVer = pWal->cfg.committed; } + if (pWal->vers.snapshotVer < 0) { + wWarn("vgId:%d, snapshotVer:%" PRId64 " in wal is an invalid value. align it with firstVer:%" PRId64 ".", + pWal->cfg.vgId, pWal->vers.snapshotVer, pWal->vers.firstVer); + pWal->vers.snapshotVer = pWal->vers.firstVer; + } if (pWal->vers.firstVer > pWal->vers.snapshotVer + 1) { wWarn("vgId:%d, firstVer:%" PRId64 " is larger than snapshotVer:%" PRId64 " + 1. align with it.", pWal->cfg.vgId, pWal->vers.firstVer, pWal->vers.snapshotVer); From f206837d48f86fb3102c2e40626a7cbe5cdc6c5f Mon Sep 17 00:00:00 2001 From: xiao-77 Date: Wed, 16 Oct 2024 10:30:27 +0800 Subject: [PATCH 4/5] modify log level while walLoadMeta failed --- source/libs/wal/src/walMgmt.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/source/libs/wal/src/walMgmt.c b/source/libs/wal/src/walMgmt.c index 3b23a2db80..d8a58efe4e 100644 --- a/source/libs/wal/src/walMgmt.c +++ b/source/libs/wal/src/walMgmt.c @@ -91,7 +91,8 @@ static int32_t walInitLock(SWal *pWal) { } SWal *walOpen(const char *path, SWalCfg *pCfg) { - SWal *pWal = taosMemoryCalloc(1, sizeof(SWal)); + int32_t code = 0; + SWal *pWal = taosMemoryCalloc(1, sizeof(SWal)); if (pWal == NULL) { terrno = TAOS_SYSTEM_ERROR(errno); return NULL; @@ -160,17 +161,20 @@ SWal *walOpen(const char *path, SWalCfg *pCfg) { pWal->writeHead.magic = WAL_MAGIC; // load meta - if (walLoadMeta(pWal) < 0) { - wInfo("vgId:%d, failed to load meta since %s", pWal->cfg.vgId, tstrerror(terrno)); + code = walLoadMeta(pWal); + if (code < 0) { + wWarn("vgId:%d, failed to load meta since %s", pWal->cfg.vgId, tstrerror(code)); } - if (walCheckAndRepairMeta(pWal) < 0) { - wError("vgId:%d, cannot open wal since repair meta file failed", pWal->cfg.vgId); + code = walCheckAndRepairMeta(pWal); + if (code < 0) { + wError("vgId:%d, cannot open wal since repair meta file failed since %s", pWal->cfg.vgId, tstrerror(code)); goto _err; } - if (walCheckAndRepairIdx(pWal) < 0) { - wError("vgId:%d, cannot open wal since repair idx file failed", pWal->cfg.vgId); + code = walCheckAndRepairIdx(pWal); + if (code < 0) { + wError("vgId:%d, cannot open wal since repair idx file failed since %s", pWal->cfg.vgId, tstrerror(code)); goto _err; } From 48d9f2da65e8aea47a03e44f30084bbac478a06a Mon Sep 17 00:00:00 2001 From: xiao-77 Date: Wed, 16 Oct 2024 13:38:45 +0800 Subject: [PATCH 5/5] fix ci walTest --- source/libs/wal/src/walMeta.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/libs/wal/src/walMeta.c b/source/libs/wal/src/walMeta.c index 042024284c..92ad760a20 100644 --- a/source/libs/wal/src/walMeta.c +++ b/source/libs/wal/src/walMeta.c @@ -288,7 +288,7 @@ static void walAlignVersions(SWal* pWal) { pWal->cfg.vgId, pWal->vers.snapshotVer, pWal->cfg.committed); pWal->vers.snapshotVer = pWal->cfg.committed; } - if (pWal->vers.snapshotVer < 0) { + if (pWal->vers.snapshotVer < 0 && pWal->vers.firstVer > 0) { wWarn("vgId:%d, snapshotVer:%" PRId64 " in wal is an invalid value. align it with firstVer:%" PRId64 ".", pWal->cfg.vgId, pWal->vers.snapshotVer, pWal->vers.firstVer); pWal->vers.snapshotVer = pWal->vers.firstVer;