Merge pull request #16993 from taosdata/feature/wal
feat(wal): auto fix corrupt file
This commit is contained in:
commit
14e1e47062
|
@ -116,7 +116,6 @@ static FORCE_INLINE int64_t walScanLogGetLastVer(SWal* pWal) {
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
// TODO truncate file
|
|
||||||
|
|
||||||
if (found == NULL) {
|
if (found == NULL) {
|
||||||
// file corrupted, no complete log
|
// file corrupted, no complete log
|
||||||
|
@ -125,8 +124,20 @@ static FORCE_INLINE int64_t walScanLogGetLastVer(SWal* pWal) {
|
||||||
terrno = TSDB_CODE_WAL_FILE_CORRUPTED;
|
terrno = TSDB_CODE_WAL_FILE_CORRUPTED;
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// truncate file
|
||||||
SWalCkHead* lastEntry = (SWalCkHead*)found;
|
SWalCkHead* lastEntry = (SWalCkHead*)found;
|
||||||
int64_t retVer = lastEntry->head.version;
|
int64_t retVer = lastEntry->head.version;
|
||||||
|
int64_t lastEntryBeginOffset = offset + (int64_t)((char*)found - (char*)buf);
|
||||||
|
int64_t lastEntryEndOffset = lastEntryBeginOffset + sizeof(SWalCkHead) + lastEntry->head.bodyLen;
|
||||||
|
if (lastEntryEndOffset != fileSize) {
|
||||||
|
wWarn("vgId:%d repair meta truncate file %s to %ld, orig size %ld", pWal->cfg.vgId, fnameStr, lastEntryEndOffset,
|
||||||
|
fileSize);
|
||||||
|
taosFtruncateFile(pFile, lastEntryEndOffset);
|
||||||
|
((SWalFileInfo*)taosArrayGetLast(pWal->fileInfoSet))->fileSize = lastEntryEndOffset;
|
||||||
|
pWal->totSize -= (fileSize - lastEntryEndOffset);
|
||||||
|
}
|
||||||
|
|
||||||
taosCloseFile(&pFile);
|
taosCloseFile(&pFile);
|
||||||
taosMemoryFree(buf);
|
taosMemoryFree(buf);
|
||||||
|
|
||||||
|
@ -226,16 +237,92 @@ int walCheckAndRepairMeta(SWal* pWal) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: set fileSize and lastVer if necessary
|
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
int walCheckAndRepairIdx(SWal* pWal) {
|
int walCheckAndRepairIdx(SWal* pWal) {
|
||||||
// TODO: iterate all log files
|
int32_t sz = taosArrayGetSize(pWal->fileInfoSet);
|
||||||
// if idx not found, scan log and write idx
|
for (int32_t i = 0; i < sz; i++) {
|
||||||
// if found, check complete by first and last entry of each idx file
|
SWalFileInfo* pFileInfo = taosArrayGet(pWal->fileInfoSet, i);
|
||||||
// if idx incomplete, binary search last valid entry, and then build other part
|
|
||||||
|
char fnameStr[WAL_FILE_LEN];
|
||||||
|
walBuildIdxName(pWal, pFileInfo->firstVer, fnameStr);
|
||||||
|
int64_t fsize;
|
||||||
|
TdFilePtr pIdxFile = taosOpenFile(fnameStr, TD_FILE_READ | TD_FILE_WRITE | TD_FILE_CREATE);
|
||||||
|
if (pIdxFile == NULL) {
|
||||||
|
ASSERT(0);
|
||||||
|
terrno = TAOS_SYSTEM_ERROR(errno);
|
||||||
|
wError("vgId:%d, cannot open file %s, since %s", pWal->cfg.vgId, fnameStr, terrstr());
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
taosFStatFile(pIdxFile, &fsize, NULL);
|
||||||
|
if (fsize == (pFileInfo->lastVer - pFileInfo->firstVer + 1) * sizeof(SWalIdxEntry)) {
|
||||||
|
taosCloseFile(&pIdxFile);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
int32_t left = fsize % sizeof(SWalIdxEntry);
|
||||||
|
int64_t offset = taosLSeekFile(pIdxFile, -left, SEEK_END);
|
||||||
|
if (left != 0) {
|
||||||
|
taosFtruncateFile(pIdxFile, offset);
|
||||||
|
wWarn("vgId:%d wal truncate file %s to offset %ld since size invalid, file size %ld", pWal->cfg.vgId, fnameStr,
|
||||||
|
offset, fsize);
|
||||||
|
}
|
||||||
|
offset -= sizeof(SWalIdxEntry);
|
||||||
|
|
||||||
|
SWalIdxEntry idxEntry = {.ver = pFileInfo->firstVer};
|
||||||
|
while (1) {
|
||||||
|
if (offset < 0) {
|
||||||
|
taosLSeekFile(pIdxFile, 0, SEEK_SET);
|
||||||
|
taosWriteFile(pIdxFile, &idxEntry, sizeof(SWalIdxEntry));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
taosLSeekFile(pIdxFile, offset, SEEK_SET);
|
||||||
|
int64_t contLen = taosReadFile(pIdxFile, &idxEntry, sizeof(SWalIdxEntry));
|
||||||
|
if (contLen < 0 || contLen != sizeof(SWalIdxEntry)) {
|
||||||
|
terrno = TAOS_SYSTEM_ERROR(errno);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if ((idxEntry.ver - pFileInfo->firstVer) * sizeof(SWalIdxEntry) != offset) {
|
||||||
|
taosFtruncateFile(pIdxFile, offset);
|
||||||
|
wWarn("vgId:%d wal truncate file %s to offset %ld since entry invalid, entry ver %ld, entry offset %ld",
|
||||||
|
pWal->cfg.vgId, fnameStr, offset, idxEntry.ver, idxEntry.offset);
|
||||||
|
offset -= sizeof(SWalIdxEntry);
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (idxEntry.ver < pFileInfo->lastVer) {
|
||||||
|
char fLogNameStr[WAL_FILE_LEN];
|
||||||
|
walBuildLogName(pWal, pFileInfo->firstVer, fLogNameStr);
|
||||||
|
TdFilePtr pLogFile = taosOpenFile(fLogNameStr, TD_FILE_READ);
|
||||||
|
if (pLogFile == NULL) {
|
||||||
|
terrno = TAOS_SYSTEM_ERROR(errno);
|
||||||
|
wError("vgId:%d, cannot open file %s, since %s", pWal->cfg.vgId, fLogNameStr, terrstr());
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
while (idxEntry.ver < pFileInfo->lastVer) {
|
||||||
|
taosLSeekFile(pLogFile, idxEntry.offset, SEEK_SET);
|
||||||
|
SWalCkHead ckHead;
|
||||||
|
taosReadFile(pLogFile, &ckHead, sizeof(SWalCkHead));
|
||||||
|
if (idxEntry.ver != ckHead.head.version) {
|
||||||
|
// todo truncate this idx also
|
||||||
|
taosCloseFile(&pLogFile);
|
||||||
|
wError("vgId:%d, invalid repair case, log seek to %ld to find ver %ld, actual ver %ld", pWal->cfg.vgId,
|
||||||
|
idxEntry.offset, idxEntry.ver, ckHead.head.version);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
idxEntry.ver = ckHead.head.version + 1;
|
||||||
|
idxEntry.offset = idxEntry.offset + sizeof(SWalCkHead) + ckHead.head.bodyLen;
|
||||||
|
wWarn("vgId:%d wal idx append new entry %ld %ld", pWal->cfg.vgId, idxEntry.ver, idxEntry.offset);
|
||||||
|
taosWriteFile(pIdxFile, &idxEntry, sizeof(SWalIdxEntry));
|
||||||
|
}
|
||||||
|
taosCloseFile(&pLogFile);
|
||||||
|
}
|
||||||
|
taosCloseFile(&pIdxFile);
|
||||||
|
}
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -149,15 +149,21 @@ SWal *walOpen(const char *path, SWalCfg *pCfg) {
|
||||||
walLoadMeta(pWal);
|
walLoadMeta(pWal);
|
||||||
|
|
||||||
if (walCheckAndRepairMeta(pWal) < 0) {
|
if (walCheckAndRepairMeta(pWal) < 0) {
|
||||||
|
wError("vgId:%d cannot open wal since repair meta file failed", pWal->cfg.vgId);
|
||||||
taosHashCleanup(pWal->pRefHash);
|
taosHashCleanup(pWal->pRefHash);
|
||||||
taosRemoveRef(tsWal.refSetId, pWal->refId);
|
taosRemoveRef(tsWal.refSetId, pWal->refId);
|
||||||
taosThreadMutexDestroy(&pWal->mutex);
|
taosThreadMutexDestroy(&pWal->mutex);
|
||||||
taosArrayDestroy(pWal->fileInfoSet);
|
taosArrayDestroy(pWal->fileInfoSet);
|
||||||
taosMemoryFree(pWal);
|
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (walCheckAndRepairIdx(pWal) < 0) {
|
if (walCheckAndRepairIdx(pWal) < 0) {
|
||||||
|
wError("vgId:%d cannot open wal since repair idx file failed", pWal->cfg.vgId);
|
||||||
|
taosHashCleanup(pWal->pRefHash);
|
||||||
|
taosRemoveRef(tsWal.refSetId, pWal->refId);
|
||||||
|
taosThreadMutexDestroy(&pWal->mutex);
|
||||||
|
taosArrayDestroy(pWal->fileInfoSet);
|
||||||
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
wDebug("vgId:%d, wal:%p is opened, level:%d fsyncPeriod:%d", pWal->cfg.vgId, pWal, pWal->cfg.level,
|
wDebug("vgId:%d, wal:%p is opened, level:%d fsyncPeriod:%d", pWal->cfg.vgId, pWal, pWal->cfg.level,
|
||||||
|
|
Loading…
Reference in New Issue