Merge pull request #17392 from taosdata/FIX/TD-19183-3.0
enh: not allowed to start on disk space unavailable
This commit is contained in:
commit
3cbfd2d160
|
@ -51,6 +51,7 @@ static int32_t dmInitMonitor() {
|
|||
|
||||
static bool dmCheckDiskSpace() {
|
||||
osUpdate();
|
||||
// sufficiency
|
||||
if (!osDataSpaceSufficient()) {
|
||||
dWarn("free data disk size: %f GB, not sufficient, expected %f GB at least", (double)tsDataSpace.size.avail / 1024.0 / 1024.0 / 1024.0, (double)tsDataSpace.reserved / 1024.0 / 1024.0 / 1024.0);
|
||||
}
|
||||
|
@ -60,7 +61,24 @@ static bool dmCheckDiskSpace() {
|
|||
if (!osTempSpaceSufficient()) {
|
||||
dWarn("free temp disk size: %f GB, not sufficient, expected %f GB at least", (double)tsTempSpace.size.avail / 1024.0 / 1024.0 / 1024.0, (double)tsTempSpace.reserved / 1024.0 / 1024.0 / 1024.0);
|
||||
}
|
||||
return true;
|
||||
// availability
|
||||
bool ret = true;
|
||||
if (!osDataSpaceAvailable()) {
|
||||
dError("data disk space unavailable, i.e. %s", tsDataDir);
|
||||
terrno = TSDB_CODE_VND_NO_DISKSPACE;
|
||||
ret = false;
|
||||
}
|
||||
if (!osLogSpaceAvailable()) {
|
||||
dError("log disk space unavailable, i.e. %s", tsLogDir);
|
||||
terrno = TSDB_CODE_VND_NO_DISKSPACE;
|
||||
ret = false;
|
||||
}
|
||||
if (!osTempSpaceAvailable()) {
|
||||
dError("temp disk space unavailable, i.e. %s", tsTempDir);
|
||||
terrno = TSDB_CODE_VND_NO_DISKSPACE;
|
||||
ret = false;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
static bool dmCheckDataDirVersion() {
|
||||
|
|
|
@ -43,9 +43,7 @@ void Testbase::InitLog(const char* path) {
|
|||
}
|
||||
|
||||
void Testbase::Init(const char* path, int16_t port) {
|
||||
#ifdef _TD_DARWIN_64
|
||||
osDefaultInit();
|
||||
#endif
|
||||
tsServerPort = port;
|
||||
strcpy(tsLocalFqdn, "localhost");
|
||||
snprintf(tsLocalEp, TSDB_EP_LEN, "%s:%u", tsLocalFqdn, tsServerPort);
|
||||
|
|
|
@ -123,8 +123,8 @@ static FORCE_INLINE int64_t walScanLogGetLastVer(SWal* pWal, int32_t fileIdx) {
|
|||
}
|
||||
SWalCkHead* logContent = (SWalCkHead*)candidate;
|
||||
if (walValidHeadCksum(logContent) != 0) {
|
||||
wError("vgId:%d, failed to validate checksum of wal entry header. offset:% %" PRId64 ", file:%s",
|
||||
((char*)(logContent)-buf), fnameStr);
|
||||
wWarn("vgId:%d, failed to validate checksum of wal entry header. offset:%" PRId64 ", file:%s", pWal->cfg.vgId,
|
||||
offset + ((char*)(logContent)-buf), fnameStr);
|
||||
haystack = candidate + 1;
|
||||
if (firstTrial) {
|
||||
break;
|
||||
|
@ -162,8 +162,8 @@ static FORCE_INLINE int64_t walScanLogGetLastVer(SWal* pWal, int32_t fileIdx) {
|
|||
}
|
||||
if (walValidBodyCksum(logContent) != 0) {
|
||||
terrno = TSDB_CODE_WAL_CHKSUM_MISMATCH;
|
||||
wError("vgId:%d, failed to validate checksum of wal entry body. offset:% %" PRId64 ", file:%s",
|
||||
((char*)(logContent)-buf), fnameStr);
|
||||
wWarn("vgId:%d, failed to validate checksum of wal entry body. offset:%" PRId64 ", file:%s", pWal->cfg.vgId,
|
||||
offset + ((char*)(logContent)-buf), fnameStr);
|
||||
haystack = candidate + 1;
|
||||
if (firstTrial) {
|
||||
break;
|
||||
|
@ -481,6 +481,10 @@ int walCheckAndRepairIdxFile(SWal* pWal, int32_t fileIdx) {
|
|||
continue;
|
||||
}
|
||||
|
||||
if (offset != (idxEntry.ver - pFileInfo->firstVer) * sizeof(SWalIdxEntry)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (walReadLogHead(pLogFile, idxEntry.offset, &ckHead) < 0) {
|
||||
wWarn("vgId:%d, failed to read log file since %s. file:%s, offset:%" PRId64 ", idx entry ver:%" PRId64 "",
|
||||
pWal->cfg.vgId, terrstr(), fLogNameStr, idxEntry.offset, idxEntry.ver);
|
||||
|
@ -493,6 +497,8 @@ int walCheckAndRepairIdxFile(SWal* pWal, int32_t fileIdx) {
|
|||
}
|
||||
offset += sizeof(SWalIdxEntry);
|
||||
|
||||
ASSERT(offset == (idxEntry.ver - pFileInfo->firstVer + 1) * sizeof(SWalIdxEntry));
|
||||
|
||||
// ftruncate idx file
|
||||
if (offset < fileSize) {
|
||||
if (taosFtruncateFile(pIdxFile, offset) < 0) {
|
||||
|
|
|
@ -410,25 +410,35 @@ END:
|
|||
|
||||
static int32_t walWriteIndex(SWal *pWal, int64_t ver, int64_t offset) {
|
||||
SWalIdxEntry entry = {.ver = ver, .offset = offset};
|
||||
int64_t idxOffset = taosLSeekFile(pWal->pIdxFile, 0, SEEK_END);
|
||||
SWalFileInfo *pFileInfo = walGetCurFileInfo(pWal);
|
||||
ASSERT(pFileInfo != NULL);
|
||||
ASSERT(pFileInfo->firstVer >= 0);
|
||||
int64_t idxOffset = (entry.ver - pFileInfo->firstVer) * sizeof(SWalIdxEntry);
|
||||
wDebug("vgId:%d, write index, index:%" PRId64 ", offset:%" PRId64 ", at %" PRId64, pWal->cfg.vgId, ver, offset,
|
||||
idxOffset);
|
||||
|
||||
int64_t size = taosWriteFile(pWal->pIdxFile, &entry, sizeof(SWalIdxEntry));
|
||||
if (size != sizeof(SWalIdxEntry)) {
|
||||
wError("vgId:%d, failed to write idx entry due to %s. ver:%lld", pWal->cfg.vgId, strerror(errno), ver);
|
||||
terrno = TAOS_SYSTEM_ERROR(errno);
|
||||
// TODO truncate
|
||||
return -1;
|
||||
}
|
||||
|
||||
ASSERT(taosLSeekFile(pWal->pIdxFile, 0, SEEK_END) == idxOffset + sizeof(SWalIdxEntry) && "Offset of idx entries misaligned");
|
||||
return 0;
|
||||
}
|
||||
|
||||
// TODO gurantee atomicity by truncate failed writing
|
||||
static FORCE_INLINE int32_t walWriteImpl(SWal *pWal, int64_t index, tmsg_t msgType, SWalSyncInfo syncMeta,
|
||||
const void *body, int32_t bodyLen) {
|
||||
int64_t code = 0;
|
||||
|
||||
int64_t offset = walGetCurFileOffset(pWal);
|
||||
SWalFileInfo *pFileInfo = walGetCurFileInfo(pWal);
|
||||
ASSERT(pFileInfo != NULL);
|
||||
|
||||
if (pFileInfo->firstVer == -1) {
|
||||
pFileInfo->firstVer = index;
|
||||
}
|
||||
pWal->writeHead.head.version = index;
|
||||
pWal->writeHead.head.bodyLen = bodyLen;
|
||||
pWal->writeHead.head.msgType = msgType;
|
||||
|
@ -439,11 +449,14 @@ static FORCE_INLINE int32_t walWriteImpl(SWal *pWal, int64_t index, tmsg_t msgTy
|
|||
|
||||
pWal->writeHead.cksumHead = walCalcHeadCksum(&pWal->writeHead);
|
||||
pWal->writeHead.cksumBody = walCalcBodyCksum(body, bodyLen);
|
||||
|
||||
wDebug("vgId:%d, wal write log %ld, msgType: %s", pWal->cfg.vgId, index, TMSG_INFO(msgType));
|
||||
|
||||
code = walWriteIndex(pWal, index, offset);
|
||||
if (code < 0) {
|
||||
goto END;
|
||||
}
|
||||
|
||||
if (taosWriteFile(pWal->pLogFile, &pWal->writeHead, sizeof(SWalCkHead)) != sizeof(SWalCkHead)) {
|
||||
// TODO ftruncate
|
||||
terrno = TAOS_SYSTEM_ERROR(errno);
|
||||
wError("vgId:%d, file:%" PRId64 ".log, failed to write since %s", pWal->cfg.vgId, walGetLastFileFirstVer(pWal),
|
||||
strerror(errno));
|
||||
|
@ -452,7 +465,6 @@ static FORCE_INLINE int32_t walWriteImpl(SWal *pWal, int64_t index, tmsg_t msgTy
|
|||
}
|
||||
|
||||
if (taosWriteFile(pWal->pLogFile, (char *)body, bodyLen) != bodyLen) {
|
||||
// TODO ftruncate
|
||||
terrno = TAOS_SYSTEM_ERROR(errno);
|
||||
wError("vgId:%d, file:%" PRId64 ".log, failed to write since %s", pWal->cfg.vgId, walGetLastFileFirstVer(pWal),
|
||||
strerror(errno));
|
||||
|
@ -460,24 +472,31 @@ static FORCE_INLINE int32_t walWriteImpl(SWal *pWal, int64_t index, tmsg_t msgTy
|
|||
goto END;
|
||||
}
|
||||
|
||||
code = walWriteIndex(pWal, index, offset);
|
||||
if (code < 0) {
|
||||
// TODO ftruncate
|
||||
goto END;
|
||||
}
|
||||
|
||||
// set status
|
||||
if (pWal->vers.firstVer == -1) pWal->vers.firstVer = index;
|
||||
pWal->vers.lastVer = index;
|
||||
pWal->totSize += sizeof(SWalCkHead) + bodyLen;
|
||||
if (walGetCurFileInfo(pWal)->firstVer == -1) {
|
||||
walGetCurFileInfo(pWal)->firstVer = index;
|
||||
}
|
||||
walGetCurFileInfo(pWal)->lastVer = index;
|
||||
walGetCurFileInfo(pWal)->fileSize += sizeof(SWalCkHead) + bodyLen;
|
||||
pFileInfo->lastVer = index;
|
||||
pFileInfo->fileSize += sizeof(SWalCkHead) + bodyLen;
|
||||
|
||||
return 0;
|
||||
|
||||
END:
|
||||
// recover in a reverse order
|
||||
if (taosFtruncateFile(pWal->pLogFile, offset) < 0) {
|
||||
wFatal("vgId:%d, failed to ftruncate logfile to offset:%lld during recovery due to %s", pWal->cfg.vgId, offset,
|
||||
strerror(errno));
|
||||
terrno = TAOS_SYSTEM_ERROR(errno);
|
||||
ASSERT(0 && "failed to recover from error");
|
||||
}
|
||||
|
||||
int64_t idxOffset = (index - pFileInfo->firstVer) * sizeof(SWalIdxEntry);
|
||||
if (taosFtruncateFile(pWal->pIdxFile, idxOffset) < 0) {
|
||||
wFatal("vgId:%d, failed to ftruncate idxfile to offset:%lld during recovery due to %s", pWal->cfg.vgId, idxOffset,
|
||||
strerror(errno));
|
||||
terrno = TAOS_SYSTEM_ERROR(errno);
|
||||
ASSERT(0 && "failed to recover from error");
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
|
|
@ -443,10 +443,13 @@ static inline int32_t taosBuildLogHead(char *buffer, const char *flags) {
|
|||
static inline void taosPrintLogImp(ELogLevel level, int32_t dflag, const char *buffer, int32_t len) {
|
||||
if ((dflag & DEBUG_FILE) && tsLogObj.logHandle && tsLogObj.logHandle->pFile != NULL && osLogSpaceAvailable()) {
|
||||
taosUpdateLogNums(level);
|
||||
if (tsAsyncLog) {
|
||||
if (tsAsyncLog && level != DEBUG_FATAL) {
|
||||
taosPushLogBuffer(tsLogObj.logHandle, buffer, len);
|
||||
} else {
|
||||
taosWriteFile(tsLogObj.logHandle->pFile, buffer, len);
|
||||
if (level == DEBUG_FATAL) {
|
||||
taosFsyncFile(tsLogObj.logHandle->pFile);
|
||||
}
|
||||
}
|
||||
|
||||
if (tsLogObj.maxLines > 0) {
|
||||
|
|
Loading…
Reference in New Issue