Fix TD-1635
This commit is contained in:
parent
38c6df299f
commit
2f346de95e
|
@ -246,6 +246,7 @@ TAOS_DEFINE_ERROR(TSDB_CODE_SYN_NOT_ENABLED, 0, 0x0901, "Sync modul
|
||||||
|
|
||||||
// wal
|
// wal
|
||||||
TAOS_DEFINE_ERROR(TSDB_CODE_WAL_APP_ERROR, 0, 0x1000, "Unexpected generic error in wal")
|
TAOS_DEFINE_ERROR(TSDB_CODE_WAL_APP_ERROR, 0, 0x1000, "Unexpected generic error in wal")
|
||||||
|
TAOS_DEFINE_ERROR(TSDB_CODE_WAL_FILE_CORRUPTED, 0, 0x1001, "WAL file is corrupted")
|
||||||
|
|
||||||
// http
|
// http
|
||||||
TAOS_DEFINE_ERROR(TSDB_CODE_HTTP_SERVER_OFFLINE, 0, 0x1100, "http server is not onlin")
|
TAOS_DEFINE_ERROR(TSDB_CODE_HTTP_SERVER_OFFLINE, 0, 0x1100, "http server is not onlin")
|
||||||
|
|
|
@ -185,7 +185,11 @@ static int32_t sdbInitWal() {
|
||||||
}
|
}
|
||||||
|
|
||||||
sdbInfo("open sdb wal for restore");
|
sdbInfo("open sdb wal for restore");
|
||||||
walRestore(tsSdbObj.wal, NULL, sdbWrite);
|
int code = walRestore(tsSdbObj.wal, NULL, sdbWrite);
|
||||||
|
if (code != TSDB_CODE_SUCCESS) {
|
||||||
|
sdbError("failed to open wal for restore, reason:%s", tstrerror(code));
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -347,9 +347,10 @@ static void walRelease(SWal *pWal) {
|
||||||
|
|
||||||
static int walRestoreWalFile(SWal *pWal, void *pVnode, FWalWrite writeFp) {
|
static int walRestoreWalFile(SWal *pWal, void *pVnode, FWalWrite writeFp) {
|
||||||
char *name = pWal->name;
|
char *name = pWal->name;
|
||||||
|
int size = 1024 * 1024; // default 1M buffer size
|
||||||
|
|
||||||
terrno = 0;
|
terrno = 0;
|
||||||
char *buffer = malloc(1024000); // size for one record
|
char *buffer = malloc(size);
|
||||||
if (buffer == NULL) {
|
if (buffer == NULL) {
|
||||||
terrno = TAOS_SYSTEM_ERROR(errno);
|
terrno = TAOS_SYSTEM_ERROR(errno);
|
||||||
return terrno;
|
return terrno;
|
||||||
|
@ -357,7 +358,7 @@ static int walRestoreWalFile(SWal *pWal, void *pVnode, FWalWrite writeFp) {
|
||||||
|
|
||||||
SWalHead *pHead = (SWalHead *)buffer;
|
SWalHead *pHead = (SWalHead *)buffer;
|
||||||
|
|
||||||
int fd = open(name, O_RDONLY);
|
int fd = open(name, O_RDWR);
|
||||||
if (fd < 0) {
|
if (fd < 0) {
|
||||||
wError("wal:%s, failed to open for restore(%s)", name, strerror(errno));
|
wError("wal:%s, failed to open for restore(%s)", name, strerror(errno));
|
||||||
terrno = TAOS_SYSTEM_ERROR(errno);
|
terrno = TAOS_SYSTEM_ERROR(errno);
|
||||||
|
@ -367,29 +368,58 @@ static int walRestoreWalFile(SWal *pWal, void *pVnode, FWalWrite writeFp) {
|
||||||
|
|
||||||
wDebug("wal:%s, start to restore", name);
|
wDebug("wal:%s, start to restore", name);
|
||||||
|
|
||||||
|
size_t offset = 0;
|
||||||
while (1) {
|
while (1) {
|
||||||
int ret = taosTRead(fd, pHead, sizeof(SWalHead));
|
int ret = taosTRead(fd, pHead, sizeof(SWalHead));
|
||||||
if ( ret == 0) break;
|
if (ret == 0) break;
|
||||||
|
|
||||||
if (ret != sizeof(SWalHead)) {
|
if (ret < 0) {
|
||||||
wWarn("wal:%s, failed to read head, skip, ret:%d(%s)", name, ret, strerror(errno));
|
wError("wal:%s, failed to read wal head part since %s", name, strerror(errno));
|
||||||
terrno = TAOS_SYSTEM_ERROR(errno);
|
terrno = TAOS_SYSTEM_ERROR(errno);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (ret < sizeof(SWalHead)) {
|
||||||
|
wError("wal:%s, failed to read head, ret:%d, skip the rest of file", name, ret);
|
||||||
|
taosFtruncate(fd, offset);
|
||||||
|
fsync(fd);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
if (!taosCheckChecksumWhole((uint8_t *)pHead, sizeof(SWalHead))) {
|
if (!taosCheckChecksumWhole((uint8_t *)pHead, sizeof(SWalHead))) {
|
||||||
wWarn("wal:%s, cksum is messed up, skip the rest of file", name);
|
wWarn("wal:%s, cksum is messed up, skip the rest of file", name);
|
||||||
terrno = TAOS_SYSTEM_ERROR(errno);
|
terrno = TSDB_CODE_WAL_FILE_CORRUPTED;
|
||||||
|
ASSERT(false);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (pHead->len > size - sizeof(SWalHead)) {
|
||||||
|
size = sizeof(SWalHead) + pHead->len;
|
||||||
|
buffer = realloc(buffer, size);
|
||||||
|
if (buffer == NULL) {
|
||||||
|
terrno = TAOS_SYSTEM_ERROR(errno);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
pHead = (SWalHead *)buffer;
|
||||||
|
}
|
||||||
|
|
||||||
ret = taosTRead(fd, pHead->cont, pHead->len);
|
ret = taosTRead(fd, pHead->cont, pHead->len);
|
||||||
if ( ret != pHead->len) {
|
if (ret < 0) {
|
||||||
wWarn("wal:%s, failed to read body, skip, len:%d ret:%d", name, pHead->len, ret);
|
wError("wal:%s failed to read wal body part since %s", name, strerror(errno));
|
||||||
terrno = TAOS_SYSTEM_ERROR(errno);
|
terrno = TAOS_SYSTEM_ERROR(errno);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (ret < pHead->len) {
|
||||||
|
wError("wal:%s, failed to read body, len:%d ret:%d, skip the rest of file", name, pHead->len, ret);
|
||||||
|
taosFtruncate(fd, offset);
|
||||||
|
fsync(fd);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
offset = offset + sizeof(SWalHead) + pHead->len;
|
||||||
|
|
||||||
if (pWal->keep) pWal->version = pHead->version;
|
if (pWal->keep) pWal->version = pHead->version;
|
||||||
(*writeFp)(pVnode, pHead, TAOS_QTYPE_WAL);
|
(*writeFp)(pVnode, pHead, TAOS_QTYPE_WAL);
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue