Merge pull request #28372 from taosdata/fix/3.0/TD-32551

fix invaild snapshotVer while repair wal meta file
This commit is contained in:
Hongze Cheng 2024-10-16 16:49:49 +08:00 committed by GitHub
commit 0c305a0678
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 27 additions and 7 deletions

View File

@ -50,6 +50,7 @@ typedef struct {
int32_t rollPeriod; // secs
int64_t retentionSize;
int64_t segSize;
int64_t committed;
EWalType level; // wal level
int32_t encryptAlgorithm;
char encryptKey[ENCRYPT_KEY_LEN + 1];

View File

@ -515,6 +515,7 @@ static int32_t mndInitWal(SMnode *pMnode) {
.fsyncPeriod = 0,
.rollPeriod = -1,
.segSize = -1,
.committed = -1,
.retentionPeriod = 0,
.retentionSize = 0,
.level = TAOS_WAL_FSYNC,

View File

@ -45,6 +45,7 @@ const SVnodeCfg vnodeCfgDefault = {.vgId = -1,
.retentionPeriod = -1,
.rollPeriod = 0,
.segSize = 0,
.committed = 0,
.retentionSize = -1,
.level = TAOS_WAL_WRITE,
.clearFiles = 0,

View File

@ -257,6 +257,7 @@ int vnodeLoadInfo(const char *dir, SVnodeInfo *pInfo) {
code = vnodeDecodeInfo(pData, pInfo);
TSDB_CHECK_CODE(code, lino, _exit);
pInfo->config.walCfg.committed = pInfo->state.committed;
_exit:
if (code) {
if (pFile) {

View File

@ -282,6 +282,17 @@ static int32_t walRebuildFileInfoSet(SArray* metaLogList, SArray* actualLogList)
}
static void walAlignVersions(SWal* pWal) {
if (pWal->cfg.committed > 0 && pWal->cfg.committed != pWal->vers.snapshotVer) {
wWarn("vgId:%d, snapshotVer:%" PRId64 " in wal is different from commited:%" PRId64
". in vnode/mnode. align with it.",
pWal->cfg.vgId, pWal->vers.snapshotVer, pWal->cfg.committed);
pWal->vers.snapshotVer = pWal->cfg.committed;
}
if (pWal->vers.snapshotVer < 0 && pWal->vers.firstVer > 0) {
wWarn("vgId:%d, snapshotVer:%" PRId64 " in wal is an invalid value. align it with firstVer:%" PRId64 ".",
pWal->cfg.vgId, pWal->vers.snapshotVer, pWal->vers.firstVer);
pWal->vers.snapshotVer = pWal->vers.firstVer;
}
if (pWal->vers.firstVer > pWal->vers.snapshotVer + 1) {
wWarn("vgId:%d, firstVer:%" PRId64 " is larger than snapshotVer:%" PRId64 " + 1. align with it.", pWal->cfg.vgId,
pWal->vers.firstVer, pWal->vers.snapshotVer);

View File

@ -91,7 +91,8 @@ static int32_t walInitLock(SWal *pWal) {
}
SWal *walOpen(const char *path, SWalCfg *pCfg) {
SWal *pWal = taosMemoryCalloc(1, sizeof(SWal));
int32_t code = 0;
SWal *pWal = taosMemoryCalloc(1, sizeof(SWal));
if (pWal == NULL) {
terrno = TAOS_SYSTEM_ERROR(errno);
return NULL;
@ -160,17 +161,20 @@ SWal *walOpen(const char *path, SWalCfg *pCfg) {
pWal->writeHead.magic = WAL_MAGIC;
// load meta
if (walLoadMeta(pWal) < 0) {
wInfo("vgId:%d, failed to load meta since %s", pWal->cfg.vgId, tstrerror(terrno));
code = walLoadMeta(pWal);
if (code < 0) {
wWarn("vgId:%d, failed to load meta since %s", pWal->cfg.vgId, tstrerror(code));
}
if (walCheckAndRepairMeta(pWal) < 0) {
wError("vgId:%d, cannot open wal since repair meta file failed", pWal->cfg.vgId);
code = walCheckAndRepairMeta(pWal);
if (code < 0) {
wError("vgId:%d, cannot open wal since repair meta file failed since %s", pWal->cfg.vgId, tstrerror(code));
goto _err;
}
if (walCheckAndRepairIdx(pWal) < 0) {
wError("vgId:%d, cannot open wal since repair idx file failed", pWal->cfg.vgId);
code = walCheckAndRepairIdx(pWal);
if (code < 0) {
wError("vgId:%d, cannot open wal since repair idx file failed since %s", pWal->cfg.vgId, tstrerror(code));
goto _err;
}

View File

@ -127,6 +127,7 @@ class WalRetentionEnv : public ::testing::Test {
SWalCfg cfg;
cfg.rollPeriod = -1;
cfg.segSize = -1;
cfg.committed =-1;
cfg.retentionPeriod = -1;
cfg.retentionSize = 0;
cfg.rollPeriod = 0;