From a593ade3341b5ba3da36aef2e88274142bdc3b3f Mon Sep 17 00:00:00 2001 From: Minglei Jin Date: Fri, 12 Feb 2021 21:06:36 +0800 Subject: [PATCH 1/5] gitignore: ignore emacs temporary files --- .gitignore | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/.gitignore b/.gitignore index e6e327327c..b400d719cc 100644 --- a/.gitignore +++ b/.gitignore @@ -79,3 +79,15 @@ tests/comparisonTest/opentsdb/opentsdbtest/.settings/ tests/examples/JDBC/JDBCDemo/.classpath tests/examples/JDBC/JDBCDemo/.project tests/examples/JDBC/JDBCDemo/.settings/ + +# Emacs +# -*- mode: gitignore; -*- +*~ +\#*\# +/.emacs.desktop +/.emacs.desktop.lock +*.elc +auto-save-list +tramp +.\#* +TAGS From 76d7b954cf916f6c286a8423115966c6cc70802b Mon Sep 17 00:00:00 2001 From: Minglei Jin Date: Fri, 12 Feb 2021 21:07:31 +0800 Subject: [PATCH 2/5] [TD-2955]: [wal][v2] checksum whole wal record instead of just head --- src/wal/CMakeLists.txt | 2 + src/wal/src/walWrite.c | 112 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 114 insertions(+) diff --git a/src/wal/CMakeLists.txt b/src/wal/CMakeLists.txt index 42a764fce2..a89024dab5 100644 --- a/src/wal/CMakeLists.txt +++ b/src/wal/CMakeLists.txt @@ -1,6 +1,8 @@ CMAKE_MINIMUM_REQUIRED(VERSION 2.8) PROJECT(TDengine) +ADD_DEFINITIONS(-DWAL_CHECKSUM_WHOLE) + INCLUDE_DIRECTORIES(inc) AUX_SOURCE_DIRECTORY(${CMAKE_CURRENT_SOURCE_DIR}/src SRC) diff --git a/src/wal/src/walWrite.c b/src/wal/src/walWrite.c index ea1eaa4fee..cf3d9fd00a 100644 --- a/src/wal/src/walWrite.c +++ b/src/wal/src/walWrite.c @@ -111,6 +111,28 @@ void walRemoveAllOldFiles(void *handle) { pthread_mutex_unlock(&pWal->mutex); } +#if defined(WAL_CHECKSUM_WHOLE) + +static void walUpdateChecksum(SWalHead *pHead) { + pHead->sver = 1; + pHead->cksum = 0; + pHead->cksum = taosCalcChecksum(0, (uint8_t *)pHead, sizeof(*pHead) + pHead->len); +} + +static int walValidateChecksum(SWalHead *pHead) { + if (pHead->sver == 0) { // for compatible with wal before sver 1 + return taosCheckChecksumWhole((uint8_t *)pHead, sizeof(*pHead)); + } else if (pHead->sver == 1) { + uint32_t cksum = pHead->cksum; + pHead->cksum = 0; + return taosCheckChecksum((uint8_t *)pHead, sizeof(*pHead) + pHead->len, cksum); + } + + return 0; +} + +#endif + int32_t walWrite(void *handle, SWalHead *pHead) { if (handle == NULL) return -1; @@ -123,7 +145,12 @@ int32_t walWrite(void *handle, SWalHead *pHead) { if (pHead->version <= pWal->version) return 0; pHead->signature = WAL_SIGNATURE; +#if defined(WAL_CHECKSUM_WHOLE) + walUpdateChecksum(pHead); +#else taosCalcChecksumAppend(0, (uint8_t *)pHead, sizeof(SWalHead)); +#endif + int32_t contLen = pHead->len + sizeof(SWalHead); pthread_mutex_lock(&pWal->mutex); @@ -246,16 +273,40 @@ static int32_t walSkipCorruptedRecord(SWal *pWal, SWalHead *pHead, int64_t tfd, continue; } +#if defined(WAL_CHECKSUM_WHOLE) + if (pHead->sver == 0 && walValidateChecksum(pHead)) { + wInfo("vgId:%d, wal head cksum check passed, offset:%" PRId64, pWal->vgId, pos); + *offset = pos; + return TSDB_CODE_SUCCESS; + } + + if (pHead->sver == 1) { + if (tfRead(tfd, pHead->cont, pHead->len) < pHead->len) { + wError("vgId:%d, read to end of corrupted wal file, offset:%" PRId64, pWal->vgId, pos); + return TSDB_CODE_WAL_FILE_CORRUPTED; + } + + if (walValidateChecksum(pHead)) { + wInfo("vgId:%d, wal whole cksum check passed, offset:%" PRId64, pWal->vgId, pos); + *offset = pos; + return TSDB_CODE_SUCCESS; + } + } + +#else if (taosCheckChecksumWhole((uint8_t *)pHead, sizeof(SWalHead))) { wInfo("vgId:%d, wal head cksum check passed, offset:%" PRId64, pWal->vgId, pos); *offset = pos; return TSDB_CODE_SUCCESS; } + +#endif } return TSDB_CODE_WAL_FILE_CORRUPTED; } + static int32_t walRestoreWalFile(SWal *pWal, void *pVnode, FWalWrite writeFp, char *name, int64_t fileId) { int32_t size = WAL_MAX_SIZE; void * buffer = tmalloc(size); @@ -278,6 +329,66 @@ static int32_t walRestoreWalFile(SWal *pWal, void *pVnode, FWalWrite writeFp, ch SWalHead *pHead = buffer; while (1) { +#if defined(WAL_CHECKSUM_WHOLE) + int32_t ret = (int32_t)tfRead(tfd, pHead, sizeof(SWalHead)); + if (ret == 0) break; + + if (ret < 0) { + wError("vgId:%d, file:%s, failed to read wal head since %s", pWal->vgId, name, strerror(errno)); + code = TAOS_SYSTEM_ERROR(errno); + break; + } + + if (ret < sizeof(SWalHead)) { + wError("vgId:%d, file:%s, failed to read wal head, ret is %d", pWal->vgId, name, ret); + walFtruncate(pWal, tfd, offset); + break; + } + + if (pHead->sver == 0 && !walValidateChecksum(pHead)) { + wError("vgId:%d, file:%s, wal head cksum is messed up, hver:%" PRIu64 " len:%d offset:%" PRId64, pWal->vgId, name, + pHead->version, pHead->len, offset); + code = walSkipCorruptedRecord(pWal, pHead, tfd, &offset); + if (code != TSDB_CODE_SUCCESS) { + walFtruncate(pWal, tfd, offset); + break; + } + } + + if (pHead->len < 0 || pHead->len > size - sizeof(SWalHead)) { + wError("vgId:%d, file:%s, wal head len out of range, hver:%" PRIu64 " len:%d offset:%" PRId64, pWal->vgId, name, + pHead->version, pHead->len, offset); + code = walSkipCorruptedRecord(pWal, pHead, tfd, &offset); + if (code != TSDB_CODE_SUCCESS) { + walFtruncate(pWal, tfd, offset); + break; + } + } + + ret = (int32_t)tfRead(tfd, pHead->cont, pHead->len); + if (ret < 0) { + wError("vgId:%d, file:%s, failed to read wal body since %s", pWal->vgId, name, strerror(errno)); + code = TAOS_SYSTEM_ERROR(errno); + break; + } + + if (ret < pHead->len) { + wError("vgId:%d, file:%s, failed to read wal body, ret:%d len:%d", pWal->vgId, name, ret, pHead->len); + offset += sizeof(SWalHead); + continue; + } + + if (pHead->sver == 1 && !walValidateChecksum(pHead)) { + wError("vgId:%d, file:%s, wal whole cksum is messed up, hver:%" PRIu64 " len:%d offset:%" PRId64, pWal->vgId, name, + pHead->version, pHead->len, offset); + code = walSkipCorruptedRecord(pWal, pHead, tfd, &offset); + if (code != TSDB_CODE_SUCCESS) { + walFtruncate(pWal, tfd, offset); + break; + } + } + +#else int32_t ret = (int32_t)tfRead(tfd, pHead, sizeof(SWalHead)); if (ret == 0) break; @@ -326,6 +437,7 @@ static int32_t walRestoreWalFile(SWal *pWal, void *pVnode, FWalWrite writeFp, ch continue; } +#endif offset = offset + sizeof(SWalHead) + pHead->len; wTrace("vgId:%d, restore wal, fileId:%" PRId64 " hver:%" PRIu64 " wver:%" PRIu64 " len:%d", pWal->vgId, From 6e95d874f0356da1743a1bc60bc9017e31358375 Mon Sep 17 00:00:00 2001 From: Minglei Jin Date: Sun, 14 Feb 2021 10:44:54 +0800 Subject: [PATCH 3/5] walWrite: make sure sver initialized to 0 in old wal files --- src/wal/src/walWrite.c | 18 ++---------------- 1 file changed, 2 insertions(+), 16 deletions(-) diff --git a/src/wal/src/walWrite.c b/src/wal/src/walWrite.c index cf3d9fd00a..aeb4983029 100644 --- a/src/wal/src/walWrite.c +++ b/src/wal/src/walWrite.c @@ -148,6 +148,7 @@ int32_t walWrite(void *handle, SWalHead *pHead) { #if defined(WAL_CHECKSUM_WHOLE) walUpdateChecksum(pHead); #else + pHead->sver = 0; taosCalcChecksumAppend(0, (uint8_t *)pHead, sizeof(SWalHead)); #endif @@ -329,7 +330,6 @@ static int32_t walRestoreWalFile(SWal *pWal, void *pVnode, FWalWrite writeFp, ch SWalHead *pHead = buffer; while (1) { -#if defined(WAL_CHECKSUM_WHOLE) int32_t ret = (int32_t)tfRead(tfd, pHead, sizeof(SWalHead)); if (ret == 0) break; @@ -345,6 +345,7 @@ static int32_t walRestoreWalFile(SWal *pWal, void *pVnode, FWalWrite writeFp, ch break; } +#if defined(WAL_CHECKSUM_WHOLE) if (pHead->sver == 0 && !walValidateChecksum(pHead)) { wError("vgId:%d, file:%s, wal head cksum is messed up, hver:%" PRIu64 " len:%d offset:%" PRId64, pWal->vgId, name, pHead->version, pHead->len, offset); @@ -389,21 +390,6 @@ static int32_t walRestoreWalFile(SWal *pWal, void *pVnode, FWalWrite writeFp, ch } #else - int32_t ret = (int32_t)tfRead(tfd, pHead, sizeof(SWalHead)); - if (ret == 0) break; - - if (ret < 0) { - wError("vgId:%d, file:%s, failed to read wal head since %s", pWal->vgId, name, strerror(errno)); - code = TAOS_SYSTEM_ERROR(errno); - break; - } - - if (ret < sizeof(SWalHead)) { - wError("vgId:%d, file:%s, failed to read wal head, ret is %d", pWal->vgId, name, ret); - walFtruncate(pWal, tfd, offset); - break; - } - if (!taosCheckChecksumWhole((uint8_t *)pHead, sizeof(SWalHead))) { wError("vgId:%d, file:%s, wal head cksum is messed up, hver:%" PRIu64 " len:%d offset:%" PRId64, pWal->vgId, name, pHead->version, pHead->len, offset); From 24ab0bdfbda21194248d361f20f5a2180a565bec Mon Sep 17 00:00:00 2001 From: Minglei Jin Date: Sun, 14 Feb 2021 11:40:52 +0800 Subject: [PATCH 4/5] WAL_CHECKSUM_WHOLE: turn off to make sure sver 0 in wal files --- src/wal/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/wal/CMakeLists.txt b/src/wal/CMakeLists.txt index a89024dab5..096516918e 100644 --- a/src/wal/CMakeLists.txt +++ b/src/wal/CMakeLists.txt @@ -1,7 +1,7 @@ CMAKE_MINIMUM_REQUIRED(VERSION 2.8) PROJECT(TDengine) -ADD_DEFINITIONS(-DWAL_CHECKSUM_WHOLE) +#ADD_DEFINITIONS(-DWAL_CHECKSUM_WHOLE) INCLUDE_DIRECTORIES(inc) AUX_SOURCE_DIRECTORY(${CMAKE_CURRENT_SOURCE_DIR}/src SRC) From 910f2507b36e9e7089fbf5f009560e23b4d805a6 Mon Sep 17 00:00:00 2001 From: Minglei Jin Date: Thu, 18 Feb 2021 19:09:42 +0800 Subject: [PATCH 5/5] Revert "WAL_CHECKSUM_WHOLE: turn off to make sure sver 0 in wal files" 1, sdb designated initialization makes walHead all fields 0; 2, dnodeVWrite: user "t" in SRpcHead makes sver 0; This reverts commit 24ab0bdfbda21194248d361f20f5a2180a565bec. --- src/wal/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/wal/CMakeLists.txt b/src/wal/CMakeLists.txt index 096516918e..a89024dab5 100644 --- a/src/wal/CMakeLists.txt +++ b/src/wal/CMakeLists.txt @@ -1,7 +1,7 @@ CMAKE_MINIMUM_REQUIRED(VERSION 2.8) PROJECT(TDengine) -#ADD_DEFINITIONS(-DWAL_CHECKSUM_WHOLE) +ADD_DEFINITIONS(-DWAL_CHECKSUM_WHOLE) INCLUDE_DIRECTORIES(inc) AUX_SOURCE_DIRECTORY(${CMAKE_CURRENT_SOURCE_DIR}/src SRC)