Merge pull request #17392 from taosdata/FIX/TD-19183-3.0
enh: not allowed to start on disk space unavailable
This commit is contained in:
commit
3cbfd2d160
|
@ -51,6 +51,7 @@ static int32_t dmInitMonitor() {
|
||||||
|
|
||||||
static bool dmCheckDiskSpace() {
|
static bool dmCheckDiskSpace() {
|
||||||
osUpdate();
|
osUpdate();
|
||||||
|
// sufficiency
|
||||||
if (!osDataSpaceSufficient()) {
|
if (!osDataSpaceSufficient()) {
|
||||||
dWarn("free data disk size: %f GB, not sufficient, expected %f GB at least", (double)tsDataSpace.size.avail / 1024.0 / 1024.0 / 1024.0, (double)tsDataSpace.reserved / 1024.0 / 1024.0 / 1024.0);
|
dWarn("free data disk size: %f GB, not sufficient, expected %f GB at least", (double)tsDataSpace.size.avail / 1024.0 / 1024.0 / 1024.0, (double)tsDataSpace.reserved / 1024.0 / 1024.0 / 1024.0);
|
||||||
}
|
}
|
||||||
|
@ -60,7 +61,24 @@ static bool dmCheckDiskSpace() {
|
||||||
if (!osTempSpaceSufficient()) {
|
if (!osTempSpaceSufficient()) {
|
||||||
dWarn("free temp disk size: %f GB, not sufficient, expected %f GB at least", (double)tsTempSpace.size.avail / 1024.0 / 1024.0 / 1024.0, (double)tsTempSpace.reserved / 1024.0 / 1024.0 / 1024.0);
|
dWarn("free temp disk size: %f GB, not sufficient, expected %f GB at least", (double)tsTempSpace.size.avail / 1024.0 / 1024.0 / 1024.0, (double)tsTempSpace.reserved / 1024.0 / 1024.0 / 1024.0);
|
||||||
}
|
}
|
||||||
return true;
|
// availability
|
||||||
|
bool ret = true;
|
||||||
|
if (!osDataSpaceAvailable()) {
|
||||||
|
dError("data disk space unavailable, i.e. %s", tsDataDir);
|
||||||
|
terrno = TSDB_CODE_VND_NO_DISKSPACE;
|
||||||
|
ret = false;
|
||||||
|
}
|
||||||
|
if (!osLogSpaceAvailable()) {
|
||||||
|
dError("log disk space unavailable, i.e. %s", tsLogDir);
|
||||||
|
terrno = TSDB_CODE_VND_NO_DISKSPACE;
|
||||||
|
ret = false;
|
||||||
|
}
|
||||||
|
if (!osTempSpaceAvailable()) {
|
||||||
|
dError("temp disk space unavailable, i.e. %s", tsTempDir);
|
||||||
|
terrno = TSDB_CODE_VND_NO_DISKSPACE;
|
||||||
|
ret = false;
|
||||||
|
}
|
||||||
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool dmCheckDataDirVersion() {
|
static bool dmCheckDataDirVersion() {
|
||||||
|
|
|
@ -43,9 +43,7 @@ void Testbase::InitLog(const char* path) {
|
||||||
}
|
}
|
||||||
|
|
||||||
void Testbase::Init(const char* path, int16_t port) {
|
void Testbase::Init(const char* path, int16_t port) {
|
||||||
#ifdef _TD_DARWIN_64
|
|
||||||
osDefaultInit();
|
osDefaultInit();
|
||||||
#endif
|
|
||||||
tsServerPort = port;
|
tsServerPort = port;
|
||||||
strcpy(tsLocalFqdn, "localhost");
|
strcpy(tsLocalFqdn, "localhost");
|
||||||
snprintf(tsLocalEp, TSDB_EP_LEN, "%s:%u", tsLocalFqdn, tsServerPort);
|
snprintf(tsLocalEp, TSDB_EP_LEN, "%s:%u", tsLocalFqdn, tsServerPort);
|
||||||
|
|
|
@ -123,8 +123,8 @@ static FORCE_INLINE int64_t walScanLogGetLastVer(SWal* pWal, int32_t fileIdx) {
|
||||||
}
|
}
|
||||||
SWalCkHead* logContent = (SWalCkHead*)candidate;
|
SWalCkHead* logContent = (SWalCkHead*)candidate;
|
||||||
if (walValidHeadCksum(logContent) != 0) {
|
if (walValidHeadCksum(logContent) != 0) {
|
||||||
wError("vgId:%d, failed to validate checksum of wal entry header. offset:% %" PRId64 ", file:%s",
|
wWarn("vgId:%d, failed to validate checksum of wal entry header. offset:%" PRId64 ", file:%s", pWal->cfg.vgId,
|
||||||
((char*)(logContent)-buf), fnameStr);
|
offset + ((char*)(logContent)-buf), fnameStr);
|
||||||
haystack = candidate + 1;
|
haystack = candidate + 1;
|
||||||
if (firstTrial) {
|
if (firstTrial) {
|
||||||
break;
|
break;
|
||||||
|
@ -162,8 +162,8 @@ static FORCE_INLINE int64_t walScanLogGetLastVer(SWal* pWal, int32_t fileIdx) {
|
||||||
}
|
}
|
||||||
if (walValidBodyCksum(logContent) != 0) {
|
if (walValidBodyCksum(logContent) != 0) {
|
||||||
terrno = TSDB_CODE_WAL_CHKSUM_MISMATCH;
|
terrno = TSDB_CODE_WAL_CHKSUM_MISMATCH;
|
||||||
wError("vgId:%d, failed to validate checksum of wal entry body. offset:% %" PRId64 ", file:%s",
|
wWarn("vgId:%d, failed to validate checksum of wal entry body. offset:%" PRId64 ", file:%s", pWal->cfg.vgId,
|
||||||
((char*)(logContent)-buf), fnameStr);
|
offset + ((char*)(logContent)-buf), fnameStr);
|
||||||
haystack = candidate + 1;
|
haystack = candidate + 1;
|
||||||
if (firstTrial) {
|
if (firstTrial) {
|
||||||
break;
|
break;
|
||||||
|
@ -481,6 +481,10 @@ int walCheckAndRepairIdxFile(SWal* pWal, int32_t fileIdx) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (offset != (idxEntry.ver - pFileInfo->firstVer) * sizeof(SWalIdxEntry)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
if (walReadLogHead(pLogFile, idxEntry.offset, &ckHead) < 0) {
|
if (walReadLogHead(pLogFile, idxEntry.offset, &ckHead) < 0) {
|
||||||
wWarn("vgId:%d, failed to read log file since %s. file:%s, offset:%" PRId64 ", idx entry ver:%" PRId64 "",
|
wWarn("vgId:%d, failed to read log file since %s. file:%s, offset:%" PRId64 ", idx entry ver:%" PRId64 "",
|
||||||
pWal->cfg.vgId, terrstr(), fLogNameStr, idxEntry.offset, idxEntry.ver);
|
pWal->cfg.vgId, terrstr(), fLogNameStr, idxEntry.offset, idxEntry.ver);
|
||||||
|
@ -493,6 +497,8 @@ int walCheckAndRepairIdxFile(SWal* pWal, int32_t fileIdx) {
|
||||||
}
|
}
|
||||||
offset += sizeof(SWalIdxEntry);
|
offset += sizeof(SWalIdxEntry);
|
||||||
|
|
||||||
|
ASSERT(offset == (idxEntry.ver - pFileInfo->firstVer + 1) * sizeof(SWalIdxEntry));
|
||||||
|
|
||||||
// ftruncate idx file
|
// ftruncate idx file
|
||||||
if (offset < fileSize) {
|
if (offset < fileSize) {
|
||||||
if (taosFtruncateFile(pIdxFile, offset) < 0) {
|
if (taosFtruncateFile(pIdxFile, offset) < 0) {
|
||||||
|
|
|
@ -410,25 +410,35 @@ END:
|
||||||
|
|
||||||
static int32_t walWriteIndex(SWal *pWal, int64_t ver, int64_t offset) {
|
static int32_t walWriteIndex(SWal *pWal, int64_t ver, int64_t offset) {
|
||||||
SWalIdxEntry entry = {.ver = ver, .offset = offset};
|
SWalIdxEntry entry = {.ver = ver, .offset = offset};
|
||||||
int64_t idxOffset = taosLSeekFile(pWal->pIdxFile, 0, SEEK_END);
|
SWalFileInfo *pFileInfo = walGetCurFileInfo(pWal);
|
||||||
|
ASSERT(pFileInfo != NULL);
|
||||||
|
ASSERT(pFileInfo->firstVer >= 0);
|
||||||
|
int64_t idxOffset = (entry.ver - pFileInfo->firstVer) * sizeof(SWalIdxEntry);
|
||||||
wDebug("vgId:%d, write index, index:%" PRId64 ", offset:%" PRId64 ", at %" PRId64, pWal->cfg.vgId, ver, offset,
|
wDebug("vgId:%d, write index, index:%" PRId64 ", offset:%" PRId64 ", at %" PRId64, pWal->cfg.vgId, ver, offset,
|
||||||
idxOffset);
|
idxOffset);
|
||||||
|
|
||||||
int64_t size = taosWriteFile(pWal->pIdxFile, &entry, sizeof(SWalIdxEntry));
|
int64_t size = taosWriteFile(pWal->pIdxFile, &entry, sizeof(SWalIdxEntry));
|
||||||
if (size != sizeof(SWalIdxEntry)) {
|
if (size != sizeof(SWalIdxEntry)) {
|
||||||
|
wError("vgId:%d, failed to write idx entry due to %s. ver:%lld", pWal->cfg.vgId, strerror(errno), ver);
|
||||||
terrno = TAOS_SYSTEM_ERROR(errno);
|
terrno = TAOS_SYSTEM_ERROR(errno);
|
||||||
// TODO truncate
|
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ASSERT(taosLSeekFile(pWal->pIdxFile, 0, SEEK_END) == idxOffset + sizeof(SWalIdxEntry) && "Offset of idx entries misaligned");
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO gurantee atomicity by truncate failed writing
|
|
||||||
static FORCE_INLINE int32_t walWriteImpl(SWal *pWal, int64_t index, tmsg_t msgType, SWalSyncInfo syncMeta,
|
static FORCE_INLINE int32_t walWriteImpl(SWal *pWal, int64_t index, tmsg_t msgType, SWalSyncInfo syncMeta,
|
||||||
const void *body, int32_t bodyLen) {
|
const void *body, int32_t bodyLen) {
|
||||||
int64_t code = 0;
|
int64_t code = 0;
|
||||||
|
|
||||||
int64_t offset = walGetCurFileOffset(pWal);
|
int64_t offset = walGetCurFileOffset(pWal);
|
||||||
|
SWalFileInfo *pFileInfo = walGetCurFileInfo(pWal);
|
||||||
|
ASSERT(pFileInfo != NULL);
|
||||||
|
|
||||||
|
if (pFileInfo->firstVer == -1) {
|
||||||
|
pFileInfo->firstVer = index;
|
||||||
|
}
|
||||||
pWal->writeHead.head.version = index;
|
pWal->writeHead.head.version = index;
|
||||||
pWal->writeHead.head.bodyLen = bodyLen;
|
pWal->writeHead.head.bodyLen = bodyLen;
|
||||||
pWal->writeHead.head.msgType = msgType;
|
pWal->writeHead.head.msgType = msgType;
|
||||||
|
@ -439,11 +449,14 @@ static FORCE_INLINE int32_t walWriteImpl(SWal *pWal, int64_t index, tmsg_t msgTy
|
||||||
|
|
||||||
pWal->writeHead.cksumHead = walCalcHeadCksum(&pWal->writeHead);
|
pWal->writeHead.cksumHead = walCalcHeadCksum(&pWal->writeHead);
|
||||||
pWal->writeHead.cksumBody = walCalcBodyCksum(body, bodyLen);
|
pWal->writeHead.cksumBody = walCalcBodyCksum(body, bodyLen);
|
||||||
|
|
||||||
wDebug("vgId:%d, wal write log %ld, msgType: %s", pWal->cfg.vgId, index, TMSG_INFO(msgType));
|
wDebug("vgId:%d, wal write log %ld, msgType: %s", pWal->cfg.vgId, index, TMSG_INFO(msgType));
|
||||||
|
|
||||||
|
code = walWriteIndex(pWal, index, offset);
|
||||||
|
if (code < 0) {
|
||||||
|
goto END;
|
||||||
|
}
|
||||||
|
|
||||||
if (taosWriteFile(pWal->pLogFile, &pWal->writeHead, sizeof(SWalCkHead)) != sizeof(SWalCkHead)) {
|
if (taosWriteFile(pWal->pLogFile, &pWal->writeHead, sizeof(SWalCkHead)) != sizeof(SWalCkHead)) {
|
||||||
// TODO ftruncate
|
|
||||||
terrno = TAOS_SYSTEM_ERROR(errno);
|
terrno = TAOS_SYSTEM_ERROR(errno);
|
||||||
wError("vgId:%d, file:%" PRId64 ".log, failed to write since %s", pWal->cfg.vgId, walGetLastFileFirstVer(pWal),
|
wError("vgId:%d, file:%" PRId64 ".log, failed to write since %s", pWal->cfg.vgId, walGetLastFileFirstVer(pWal),
|
||||||
strerror(errno));
|
strerror(errno));
|
||||||
|
@ -452,7 +465,6 @@ static FORCE_INLINE int32_t walWriteImpl(SWal *pWal, int64_t index, tmsg_t msgTy
|
||||||
}
|
}
|
||||||
|
|
||||||
if (taosWriteFile(pWal->pLogFile, (char *)body, bodyLen) != bodyLen) {
|
if (taosWriteFile(pWal->pLogFile, (char *)body, bodyLen) != bodyLen) {
|
||||||
// TODO ftruncate
|
|
||||||
terrno = TAOS_SYSTEM_ERROR(errno);
|
terrno = TAOS_SYSTEM_ERROR(errno);
|
||||||
wError("vgId:%d, file:%" PRId64 ".log, failed to write since %s", pWal->cfg.vgId, walGetLastFileFirstVer(pWal),
|
wError("vgId:%d, file:%" PRId64 ".log, failed to write since %s", pWal->cfg.vgId, walGetLastFileFirstVer(pWal),
|
||||||
strerror(errno));
|
strerror(errno));
|
||||||
|
@ -460,24 +472,31 @@ static FORCE_INLINE int32_t walWriteImpl(SWal *pWal, int64_t index, tmsg_t msgTy
|
||||||
goto END;
|
goto END;
|
||||||
}
|
}
|
||||||
|
|
||||||
code = walWriteIndex(pWal, index, offset);
|
|
||||||
if (code < 0) {
|
|
||||||
// TODO ftruncate
|
|
||||||
goto END;
|
|
||||||
}
|
|
||||||
|
|
||||||
// set status
|
// set status
|
||||||
if (pWal->vers.firstVer == -1) pWal->vers.firstVer = index;
|
if (pWal->vers.firstVer == -1) pWal->vers.firstVer = index;
|
||||||
pWal->vers.lastVer = index;
|
pWal->vers.lastVer = index;
|
||||||
pWal->totSize += sizeof(SWalCkHead) + bodyLen;
|
pWal->totSize += sizeof(SWalCkHead) + bodyLen;
|
||||||
if (walGetCurFileInfo(pWal)->firstVer == -1) {
|
pFileInfo->lastVer = index;
|
||||||
walGetCurFileInfo(pWal)->firstVer = index;
|
pFileInfo->fileSize += sizeof(SWalCkHead) + bodyLen;
|
||||||
}
|
|
||||||
walGetCurFileInfo(pWal)->lastVer = index;
|
|
||||||
walGetCurFileInfo(pWal)->fileSize += sizeof(SWalCkHead) + bodyLen;
|
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
END:
|
END:
|
||||||
|
// recover in a reverse order
|
||||||
|
if (taosFtruncateFile(pWal->pLogFile, offset) < 0) {
|
||||||
|
wFatal("vgId:%d, failed to ftruncate logfile to offset:%lld during recovery due to %s", pWal->cfg.vgId, offset,
|
||||||
|
strerror(errno));
|
||||||
|
terrno = TAOS_SYSTEM_ERROR(errno);
|
||||||
|
ASSERT(0 && "failed to recover from error");
|
||||||
|
}
|
||||||
|
|
||||||
|
int64_t idxOffset = (index - pFileInfo->firstVer) * sizeof(SWalIdxEntry);
|
||||||
|
if (taosFtruncateFile(pWal->pIdxFile, idxOffset) < 0) {
|
||||||
|
wFatal("vgId:%d, failed to ftruncate idxfile to offset:%lld during recovery due to %s", pWal->cfg.vgId, idxOffset,
|
||||||
|
strerror(errno));
|
||||||
|
terrno = TAOS_SYSTEM_ERROR(errno);
|
||||||
|
ASSERT(0 && "failed to recover from error");
|
||||||
|
}
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -443,10 +443,13 @@ static inline int32_t taosBuildLogHead(char *buffer, const char *flags) {
|
||||||
static inline void taosPrintLogImp(ELogLevel level, int32_t dflag, const char *buffer, int32_t len) {
|
static inline void taosPrintLogImp(ELogLevel level, int32_t dflag, const char *buffer, int32_t len) {
|
||||||
if ((dflag & DEBUG_FILE) && tsLogObj.logHandle && tsLogObj.logHandle->pFile != NULL && osLogSpaceAvailable()) {
|
if ((dflag & DEBUG_FILE) && tsLogObj.logHandle && tsLogObj.logHandle->pFile != NULL && osLogSpaceAvailable()) {
|
||||||
taosUpdateLogNums(level);
|
taosUpdateLogNums(level);
|
||||||
if (tsAsyncLog) {
|
if (tsAsyncLog && level != DEBUG_FATAL) {
|
||||||
taosPushLogBuffer(tsLogObj.logHandle, buffer, len);
|
taosPushLogBuffer(tsLogObj.logHandle, buffer, len);
|
||||||
} else {
|
} else {
|
||||||
taosWriteFile(tsLogObj.logHandle->pFile, buffer, len);
|
taosWriteFile(tsLogObj.logHandle->pFile, buffer, len);
|
||||||
|
if (level == DEBUG_FATAL) {
|
||||||
|
taosFsyncFile(tsLogObj.logHandle->pFile);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (tsLogObj.maxLines > 0) {
|
if (tsLogObj.maxLines > 0) {
|
||||||
|
|
Loading…
Reference in New Issue