From 823e0d2614c9e8f7de05c0c0f19cff1d6c954eec Mon Sep 17 00:00:00 2001 From: Cary Xu Date: Fri, 13 Aug 2021 22:46:47 +0800 Subject: [PATCH 1/5] [TD-6044]: WAL compatibility since v2.1.5.0 --- src/inc/twal.h | 2 +- src/wal/src/walWrite.c | 99 +++++++++++++++++++++++++++++++++++++++--- 2 files changed, 94 insertions(+), 7 deletions(-) diff --git a/src/inc/twal.h b/src/inc/twal.h index bce398d6f9..868a1fbd78 100644 --- a/src/inc/twal.h +++ b/src/inc/twal.h @@ -32,7 +32,7 @@ typedef enum { typedef struct { int8_t msgType; - int8_t sver; + int8_t sver; // sver 2 for WAL SDataRow/SMemRow compatibility int8_t reserved[2]; int32_t len; uint64_t version; diff --git a/src/wal/src/walWrite.c b/src/wal/src/walWrite.c index 4774998799..2dfdb84818 100644 --- a/src/wal/src/walWrite.c +++ b/src/wal/src/walWrite.c @@ -17,6 +17,7 @@ #define TAOS_RANDOM_FILE_FAIL_TEST #include "os.h" #include "taoserror.h" +#include "taosmsg.h" #include "tchecksum.h" #include "tfile.h" #include "twal.h" @@ -114,7 +115,7 @@ void walRemoveAllOldFiles(void *handle) { #if defined(WAL_CHECKSUM_WHOLE) static void walUpdateChecksum(SWalHead *pHead) { - pHead->sver = 1; + pHead->sver = 2; pHead->cksum = 0; pHead->cksum = taosCalcChecksum(0, (uint8_t *)pHead, sizeof(*pHead) + pHead->len); } @@ -122,7 +123,7 @@ static void walUpdateChecksum(SWalHead *pHead) { static int walValidateChecksum(SWalHead *pHead) { if (pHead->sver == 0) { // for compatible with wal before sver 1 return taosCheckChecksumWhole((uint8_t *)pHead, sizeof(*pHead)); - } else if (pHead->sver == 1) { + } else if (pHead->sver >= 1) { uint32_t cksum = pHead->cksum; pHead->cksum = 0; return taosCheckChecksum((uint8_t *)pHead, sizeof(*pHead) + pHead->len, cksum); @@ -281,7 +282,7 @@ static int32_t walSkipCorruptedRecord(SWal *pWal, SWalHead *pHead, int64_t tfd, return TSDB_CODE_SUCCESS; } - if (pHead->sver == 1) { + if (pHead->sver >= 1) { if (tfRead(tfd, pHead->cont, pHead->len) < pHead->len) { wError("vgId:%d, read to end of corrupted wal file, offset:%" PRId64, pWal->vgId, pos); return TSDB_CODE_WAL_FILE_CORRUPTED; @@ -306,7 +307,88 @@ static int32_t walSkipCorruptedRecord(SWal *pWal, SWalHead *pHead, int64_t tfd, return TSDB_CODE_WAL_FILE_CORRUPTED; } +// Add SMemRowType ahead of SDataRow +static void expandSubmitBlk(SSubmitBlk *pDest, SSubmitBlk *pSrc, int32_t *lenExpand) { + memcpy(pDest, pSrc, sizeof(SSubmitBlk)); + int nRows = htons(pSrc->numOfRows); + if (nRows <= 0) { + return; + } + char *pDestData = pDest->data; + char *pSrcData = pSrc->data; + for (int i = 0; i < nRows; ++i) { + memRowSetType(pDestData, SMEM_ROW_DATA); + memcpy(memRowDataBody(pDestData), pSrcData, dataRowLen(pSrcData)); + pDestData = POINTER_SHIFT(pDestData, memRowTLen(pDestData)); + pSrcData = POINTER_SHIFT(pSrcData, dataRowLen(pSrcData)); + ++(*lenExpand); + } + int32_t dataLen = htonl(pDest->dataLen); + pDest->dataLen = htonl(dataLen + nRows * sizeof(uint8_t)); +} +static bool walIsSDataRow(void *pBlkData, int nRows, int32_t dataLen) { + int32_t len = 0; + for (int i = 0; i < nRows; ++i) { + len += dataRowLen(pBlkData); + if (len > dataLen) { + return false; + } + pBlkData = POINTER_SHIFT(pBlkData, dataRowLen(pBlkData)); + } + if (len != dataLen) { + return false; + } + return true; +} +// for WAL SMemRow/SDataRow compatibility +static int walSMemRowCheck(SWalHead *pHead) { + if ((pHead->sver < 2) && (pHead->msgType == TSDB_MSG_TYPE_SUBMIT)) { + SSubmitMsg *pMsg = (SSubmitMsg *)pHead->cont; + int32_t numOfBlocks = htonl(pMsg->numOfBlocks); + if (numOfBlocks <= 0) { + return 0; + } + + int32_t nTotalRows = 0; + SSubmitBlk *pBlk = (SSubmitBlk *)pMsg->blocks; + for (int32_t i = 0; i < numOfBlocks; ++i) { + int32_t dataLen = htonl(pBlk->dataLen); + int32_t nRows = htons(pBlk->numOfRows); + nTotalRows += nRows; + if (!walIsSDataRow(pBlk->data, nRows, dataLen)) { + return 0; + } + pBlk = (SSubmitBlk *)POINTER_SHIFT(pBlk, sizeof(SSubmitBlk) + dataLen); + } + + SWalHead *pWalHead = (SWalHead *)calloc(sizeof(SWalHead) + pHead->len + nTotalRows * sizeof(uint8_t), 1); + if (pWalHead == NULL) { + return -1; + } + + memcpy(pWalHead, pHead, sizeof(SWalHead) + sizeof(SSubmitMsg)); + + SSubmitMsg *pDestMsg = (SSubmitMsg *)pWalHead->cont; + SSubmitBlk *pDestBlks = (SSubmitBlk *)pDestMsg->blocks; + SSubmitBlk *pSrcBlks = (SSubmitBlk *)pMsg->blocks; + int32_t lenExpand = 0; + for (int32_t i = 0; i < numOfBlocks; ++i) { + expandSubmitBlk(pDestBlks, pSrcBlks, &lenExpand); + pDestBlks = POINTER_SHIFT(pDestBlks, htonl(pDestBlks->dataLen) + sizeof(SSubmitBlk)); + pSrcBlks = POINTER_SHIFT(pSrcBlks, htonl(pSrcBlks->dataLen) + sizeof(SSubmitBlk)); + } + if (lenExpand > 0) { + pDestMsg->header.contLen = htonl(pDestMsg->length) + lenExpand; + pDestMsg->length = htonl(pDestMsg->header.contLen); + pWalHead->len = pWalHead->len + lenExpand; + } + + memcpy(pHead, pWalHead, sizeof(SWalHead) + pWalHead->len); + tfree(pWalHead); + } + return 0; +} static int32_t walRestoreWalFile(SWal *pWal, void *pVnode, FWalWrite writeFp, char *name, int64_t fileId) { int32_t size = WAL_MAX_SIZE; @@ -346,7 +428,7 @@ static int32_t walRestoreWalFile(SWal *pWal, void *pVnode, FWalWrite writeFp, ch } #if defined(WAL_CHECKSUM_WHOLE) - if ((pHead->sver == 0 && !walValidateChecksum(pHead)) || pHead->sver < 0 || pHead->sver > 1) { + if ((pHead->sver == 0 && !walValidateChecksum(pHead)) || pHead->sver < 0 || pHead->sver > 2) { wError("vgId:%d, file:%s, wal head cksum is messed up, hver:%" PRIu64 " len:%d offset:%" PRId64, pWal->vgId, name, pHead->version, pHead->len, offset); code = walSkipCorruptedRecord(pWal, pHead, tfd, &offset); @@ -379,7 +461,7 @@ static int32_t walRestoreWalFile(SWal *pWal, void *pVnode, FWalWrite writeFp, ch continue; } - if (pHead->sver == 1 && !walValidateChecksum(pHead)) { + if ((pHead->sver >= 1) && !walValidateChecksum(pHead)) { wError("vgId:%d, file:%s, wal whole cksum is messed up, hver:%" PRIu64 " len:%d offset:%" PRId64, pWal->vgId, name, pHead->version, pHead->len, offset); code = walSkipCorruptedRecord(pWal, pHead, tfd, &offset); @@ -431,7 +513,12 @@ static int32_t walRestoreWalFile(SWal *pWal, void *pVnode, FWalWrite writeFp, ch pWal->version = pHead->version; - //wInfo("writeFp: %ld", offset); + // wInfo("writeFp: %ld", offset); + if (0 != walSMemRowCheck(pHead)) { + wError("vgId:%d, restore wal, fileId:%" PRId64 " hver:%" PRIu64 " wver:%" PRIu64 " len:%d offset:%" PRId64, + pWal->vgId, fileId, pHead->version, pWal->version, pHead->len, offset); + return TAOS_SYSTEM_ERROR(errno); + } (*writeFp)(pVnode, pHead, TAOS_QTYPE_WAL, NULL); } From 11b0755ef56006fd4256316ed2f1b7e7b4817b93 Mon Sep 17 00:00:00 2001 From: Cary Xu Date: Sat, 14 Aug 2021 08:48:49 +0800 Subject: [PATCH 2/5] do further check for blk with len 0 and SKVRow --- src/wal/src/walWrite.c | 37 ++++++++++++++++++++++++++++++++----- 1 file changed, 32 insertions(+), 5 deletions(-) diff --git a/src/wal/src/walWrite.c b/src/wal/src/walWrite.c index 2dfdb84818..7523369dc2 100644 --- a/src/wal/src/walWrite.c +++ b/src/wal/src/walWrite.c @@ -309,36 +309,63 @@ static int32_t walSkipCorruptedRecord(SWal *pWal, SWalHead *pHead, int64_t tfd, } // Add SMemRowType ahead of SDataRow static void expandSubmitBlk(SSubmitBlk *pDest, SSubmitBlk *pSrc, int32_t *lenExpand) { + // copy the header firstly memcpy(pDest, pSrc, sizeof(SSubmitBlk)); - int nRows = htons(pSrc->numOfRows); - if (nRows <= 0) { + + int32_t nRows = htons(pDest->numOfRows); + int32_t dataLen = htonl(pDest->dataLen); + + if ((nRows <= 0) || (dataLen <= 0)) { return; } + char *pDestData = pDest->data; char *pSrcData = pSrc->data; - for (int i = 0; i < nRows; ++i) { + for (int32_t i = 0; i < nRows; ++i) { memRowSetType(pDestData, SMEM_ROW_DATA); memcpy(memRowDataBody(pDestData), pSrcData, dataRowLen(pSrcData)); pDestData = POINTER_SHIFT(pDestData, memRowTLen(pDestData)); pSrcData = POINTER_SHIFT(pSrcData, dataRowLen(pSrcData)); ++(*lenExpand); } - int32_t dataLen = htonl(pDest->dataLen); pDest->dataLen = htonl(dataLen + nRows * sizeof(uint8_t)); } +// Check SDataRow by comparing the SDataRow len and SSubmitBlk dataLen static bool walIsSDataRow(void *pBlkData, int nRows, int32_t dataLen) { - int32_t len = 0; + if ((nRows <= 0) || (dataLen <= 0)) { + return true; + } + int32_t len = 0, kvLen = 0; for (int i = 0; i < nRows; ++i) { len += dataRowLen(pBlkData); if (len > dataLen) { return false; } + + /** + * For SDataRow between version [2.1.5.0 and 2.1.6.X], it would never conflict. + * For SKVRow between version [2.1.5.0 and 2.1.6.X], it may conflict in below scenario + * - with 1st type byte 0x01 and sversion 0x0101(257), thus do further check + */ + if (dataRowLen(pBlkData) == 257) { + SMemRow memRow = pBlkData; + SKVRow kvRow = memRowKvBody(memRow); + int nCols = kvRowNCols(kvRow); + uint16_t calcTsOffset = (uint16_t)(TD_MEM_ROW_KV_HEAD_SIZE + sizeof(SColIdx) * nCols); + uint16_t realTsOffset = (kvRowColIdx(kvRow))->offset; + if (calcTsOffset == realTsOffset) { + kvLen += memRowKvTLen(memRow); + } + } pBlkData = POINTER_SHIFT(pBlkData, dataRowLen(pBlkData)); } if (len != dataLen) { return false; } + if (kvLen == dataLen) { + return false; + } return true; } // for WAL SMemRow/SDataRow compatibility From 4fcb2b07eff360a368db4f469c37e3972696b06a Mon Sep 17 00:00:00 2001 From: Cary Xu Date: Sat, 14 Aug 2021 09:42:35 +0800 Subject: [PATCH 3/5] update header --- src/wal/src/walWrite.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/wal/src/walWrite.c b/src/wal/src/walWrite.c index 7523369dc2..9590aba224 100644 --- a/src/wal/src/walWrite.c +++ b/src/wal/src/walWrite.c @@ -352,7 +352,7 @@ static bool walIsSDataRow(void *pBlkData, int nRows, int32_t dataLen) { SMemRow memRow = pBlkData; SKVRow kvRow = memRowKvBody(memRow); int nCols = kvRowNCols(kvRow); - uint16_t calcTsOffset = (uint16_t)(TD_MEM_ROW_KV_HEAD_SIZE + sizeof(SColIdx) * nCols); + uint16_t calcTsOffset = (uint16_t)(TD_KV_ROW_HEAD_SIZE + sizeof(SColIdx) * nCols); uint16_t realTsOffset = (kvRowColIdx(kvRow))->offset; if (calcTsOffset == realTsOffset) { kvLen += memRowKvTLen(memRow); From b5c505a6b821092b6062a0e60b4fcb223d6b8b02 Mon Sep 17 00:00:00 2001 From: Cary Xu Date: Sat, 14 Aug 2021 11:11:20 +0800 Subject: [PATCH 4/5] code optimization --- src/wal/src/walWrite.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/wal/src/walWrite.c b/src/wal/src/walWrite.c index 9590aba224..e991bf02aa 100644 --- a/src/wal/src/walWrite.c +++ b/src/wal/src/walWrite.c @@ -388,7 +388,7 @@ static int walSMemRowCheck(SWalHead *pHead) { } pBlk = (SSubmitBlk *)POINTER_SHIFT(pBlk, sizeof(SSubmitBlk) + dataLen); } - + ASSERT(nTotalRows >= 0); SWalHead *pWalHead = (SWalHead *)calloc(sizeof(SWalHead) + pHead->len + nTotalRows * sizeof(uint8_t), 1); if (pWalHead == NULL) { return -1; @@ -544,6 +544,8 @@ static int32_t walRestoreWalFile(SWal *pWal, void *pVnode, FWalWrite writeFp, ch if (0 != walSMemRowCheck(pHead)) { wError("vgId:%d, restore wal, fileId:%" PRId64 " hver:%" PRIu64 " wver:%" PRIu64 " len:%d offset:%" PRId64, pWal->vgId, fileId, pHead->version, pWal->version, pHead->len, offset); + tfClose(tfd); + tfree(buffer); return TAOS_SYSTEM_ERROR(errno); } (*writeFp)(pVnode, pHead, TAOS_QTYPE_WAL, NULL); From e11f30466a456a1b8b526248eb7995155b6ce0d3 Mon Sep 17 00:00:00 2001 From: Cary Xu Date: Mon, 16 Aug 2021 11:38:16 +0800 Subject: [PATCH 5/5] [ci skip] --- src/wal/src/walWrite.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/wal/src/walWrite.c b/src/wal/src/walWrite.c index e991bf02aa..cfadafebdd 100644 --- a/src/wal/src/walWrite.c +++ b/src/wal/src/walWrite.c @@ -540,7 +540,7 @@ static int32_t walRestoreWalFile(SWal *pWal, void *pVnode, FWalWrite writeFp, ch pWal->version = pHead->version; - // wInfo("writeFp: %ld", offset); + //wInfo("writeFp: %ld", offset); if (0 != walSMemRowCheck(pHead)) { wError("vgId:%d, restore wal, fileId:%" PRId64 " hver:%" PRIu64 " wver:%" PRIu64 " len:%d offset:%" PRId64, pWal->vgId, fileId, pHead->version, pWal->version, pHead->len, offset);