From 1c3c270307cc9df5abbc9ed68f27605c6de3596f Mon Sep 17 00:00:00 2001 From: Minghao Li Date: Thu, 7 Jul 2022 17:00:01 +0800 Subject: [PATCH] fix(sync): snapshot strategy wal first --- source/dnode/mgmt/mgmt_mnode/src/mmHandle.c | 2 + source/dnode/mgmt/mgmt_vnode/src/vmHandle.c | 2 + source/libs/sync/inc/syncInt.h | 1 - source/libs/sync/src/syncAppendEntries.c | 17 +++---- source/libs/sync/src/syncAppendEntriesReply.c | 48 +++++++++++++++++-- source/libs/sync/src/syncMain.c | 19 ++++---- source/libs/sync/src/syncMessage.c | 2 +- source/libs/sync/src/syncRaftCfg.c | 8 ++-- source/libs/sync/src/syncReplication.c | 19 +++++++- tests/script/tsim/sync/vnodesnapshot.sim | 4 +- 10 files changed, 91 insertions(+), 31 deletions(-) diff --git a/source/dnode/mgmt/mgmt_mnode/src/mmHandle.c b/source/dnode/mgmt/mgmt_mnode/src/mmHandle.c index 59d68b2110..f4ff64520e 100644 --- a/source/dnode/mgmt/mgmt_mnode/src/mmHandle.c +++ b/source/dnode/mgmt/mgmt_mnode/src/mmHandle.c @@ -237,10 +237,12 @@ SArray *mmGetMsgHandles() { if (dmSetMgmtHandle(pArray, TDMT_SYNC_PING, mmPutMsgToSyncQueue, 1) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_SYNC_PING_REPLY, mmPutMsgToSyncQueue, 1) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_SYNC_CLIENT_REQUEST, mmPutMsgToSyncQueue, 1) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_SYNC_CLIENT_REQUEST_BATCH, mmPutMsgToSyncQueue, 1) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_SYNC_CLIENT_REQUEST_REPLY, mmPutMsgToSyncQueue, 1) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_SYNC_REQUEST_VOTE, mmPutMsgToSyncQueue, 1) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_SYNC_REQUEST_VOTE_REPLY, mmPutMsgToSyncQueue, 1) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_SYNC_APPEND_ENTRIES, mmPutMsgToSyncQueue, 1) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_SYNC_APPEND_ENTRIES_BATCH, mmPutMsgToSyncQueue, 1) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_SYNC_APPEND_ENTRIES_REPLY, mmPutMsgToSyncQueue, 1) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_SYNC_SNAPSHOT_SEND, mmPutMsgToSyncQueue, 1) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_SYNC_SNAPSHOT_RSP, mmPutMsgToSyncQueue, 1) == NULL) goto _OVER; diff --git a/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c b/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c index 1f7347203d..c81f77fa56 100644 --- a/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c +++ b/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c @@ -376,10 +376,12 @@ SArray *vmGetMsgHandles() { if (dmSetMgmtHandle(pArray, TDMT_SYNC_PING, vmPutMsgToSyncQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_SYNC_PING_REPLY, vmPutMsgToSyncQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_SYNC_CLIENT_REQUEST, vmPutMsgToSyncQueue, 0) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_SYNC_CLIENT_REQUEST_BATCH, vmPutMsgToSyncQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_SYNC_CLIENT_REQUEST_REPLY, vmPutMsgToSyncQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_SYNC_REQUEST_VOTE, vmPutMsgToSyncQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_SYNC_REQUEST_VOTE_REPLY, vmPutMsgToSyncQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_SYNC_APPEND_ENTRIES, vmPutMsgToSyncQueue, 0) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_SYNC_APPEND_ENTRIES_BATCH, vmPutMsgToSyncQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_SYNC_APPEND_ENTRIES_REPLY, vmPutMsgToSyncQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_SYNC_SET_VNODE_STANDBY, vmPutMsgToSyncQueue, 0) == NULL) goto _OVER; diff --git a/source/libs/sync/inc/syncInt.h b/source/libs/sync/inc/syncInt.h index 8936cd6ed9..9742f93824 100644 --- a/source/libs/sync/inc/syncInt.h +++ b/source/libs/sync/inc/syncInt.h @@ -67,7 +67,6 @@ typedef struct SSyncNode { char path[TSDB_FILENAME_LEN]; char raftStorePath[TSDB_FILENAME_LEN * 2]; char configPath[TSDB_FILENAME_LEN * 2]; - int32_t batchSize; // sync io SWal* pWal; diff --git a/source/libs/sync/src/syncAppendEntries.c b/source/libs/sync/src/syncAppendEntries.c index 885ab1acae..2d511b6003 100644 --- a/source/libs/sync/src/syncAppendEntries.c +++ b/source/libs/sync/src/syncAppendEntries.c @@ -842,8 +842,8 @@ int32_t syncNodeOnAppendEntriesSnapshot2Cb(SSyncNode* ths, SyncAppendEntriesBatc do { char logBuf[128]; snprintf(logBuf, sizeof(logBuf), - "recv sync-append-entries-batch, fake match2, pre-index:%ld, pre-term:%lu, datalen:%d", - pMsg->prevLogIndex, pMsg->prevLogTerm, pMsg->dataLen); + "recv sync-append-entries-batch, fake match2, {pre-index:%ld, pre-term:%lu, datalen:%d, datacount:%d}", + pMsg->prevLogIndex, pMsg->prevLogTerm, pMsg->dataLen, pMsg->dataCount); syncNodeEventLog(ths, logBuf); } while (0); @@ -876,7 +876,7 @@ int32_t syncNodeOnAppendEntriesSnapshot2Cb(SSyncNode* ths, SyncAppendEntriesBatc code = syncNodePreCommit(ths, pAppendEntry); ASSERT(code == 0); - syncEntryDestory(pAppendEntry); + // syncEntryDestory(pAppendEntry); } // fsync once @@ -931,8 +931,8 @@ int32_t syncNodeOnAppendEntriesSnapshot2Cb(SSyncNode* ths, SyncAppendEntriesBatc do { char logBuf[128]; snprintf(logBuf, sizeof(logBuf), - "recv sync-append-entries-batch, not match, pre-index:%ld, pre-term:%lu, datalen:%d", - pMsg->prevLogIndex, pMsg->prevLogTerm, pMsg->dataLen); + "recv sync-append-entries-batch, not match, {pre-index:%ld, pre-term:%lu, datalen:%d, datacount:%d}", + pMsg->prevLogIndex, pMsg->prevLogTerm, pMsg->dataLen, pMsg->dataCount); syncNodeEventLog(ths, logBuf); } while (0); @@ -976,8 +976,9 @@ int32_t syncNodeOnAppendEntriesSnapshot2Cb(SSyncNode* ths, SyncAppendEntriesBatc do { char logBuf[128]; - snprintf(logBuf, sizeof(logBuf), "recv sync-append-entries, match, pre-index:%ld, pre-term:%lu, datalen:%d", - pMsg->prevLogIndex, pMsg->prevLogTerm, pMsg->dataLen); + snprintf(logBuf, sizeof(logBuf), + "recv sync-append-entries, match, {pre-index:%ld, pre-term:%lu, datalen:%d, datacount:%d}", + pMsg->prevLogIndex, pMsg->prevLogTerm, pMsg->dataLen, pMsg->dataCount); syncNodeEventLog(ths, logBuf); } while (0); @@ -999,7 +1000,7 @@ int32_t syncNodeOnAppendEntriesSnapshot2Cb(SSyncNode* ths, SyncAppendEntriesBatc code = syncNodePreCommit(ths, pAppendEntry); ASSERT(code == 0); - syncEntryDestory(pAppendEntry); + // syncEntryDestory(pAppendEntry); } // fsync once diff --git a/source/libs/sync/src/syncAppendEntriesReply.c b/source/libs/sync/src/syncAppendEntriesReply.c index e4724fc999..3f19b6759e 100644 --- a/source/libs/sync/src/syncAppendEntriesReply.c +++ b/source/libs/sync/src/syncAppendEntriesReply.c @@ -174,8 +174,12 @@ int32_t syncNodeOnAppendEntriesReplySnapshot2Cb(SSyncNode* ths, SyncAppendEntrie SyncIndex newNextIndex = pMsg->matchIndex + 1; SyncIndex newMatchIndex = pMsg->matchIndex; - if (ths->pLogStore->syncLogExist(ths->pLogStore, newNextIndex) && - ths->pLogStore->syncLogExist(ths->pLogStore, newNextIndex - 1)) { + bool needStartSnapshot = false; + if (newMatchIndex >= SYNC_INDEX_BEGIN && !ths->pLogStore->syncLogExist(ths->pLogStore, newMatchIndex)) { + needStartSnapshot = true; + } + + if (!needStartSnapshot) { // update next-index, match-index syncIndexMgrSetIndex(ths->pNextIndex, &(pMsg->srcId), newNextIndex); syncIndexMgrSetIndex(ths->pMatchIndex, &(pMsg->srcId), newMatchIndex); @@ -197,15 +201,36 @@ int32_t syncNodeOnAppendEntriesReplySnapshot2Cb(SSyncNode* ths, SyncAppendEntrie syncIndexMgrSetIndex(ths->pMatchIndex, &(pMsg->srcId), newMatchIndex); } + // event log, update next-index + do { + char host[64]; + int16_t port; + syncUtilU642Addr(pMsg->srcId.addr, host, sizeof(host), &port); + + char logBuf[256]; + snprintf(logBuf, sizeof(logBuf), "reset next-index:%ld, match-index:%ld for %s:%d", newNextIndex, newMatchIndex, + host, port); + syncNodeEventLog(ths, logBuf); + + } while (0); + } else { SyncIndex nextIndex = syncIndexMgrGetIndex(ths->pNextIndex, &(pMsg->srcId)); if (nextIndex > SYNC_INDEX_BEGIN) { --nextIndex; - if (ths->pLogStore->syncLogExist(ths->pLogStore, nextIndex) && - ths->pLogStore->syncLogExist(ths->pLogStore, nextIndex - 1)) { + bool needStartSnapshot = false; + if (nextIndex >= SYNC_INDEX_BEGIN && !ths->pLogStore->syncLogExist(ths->pLogStore, nextIndex)) { + needStartSnapshot = true; + } + if (nextIndex - 1 >= SYNC_INDEX_BEGIN && !ths->pLogStore->syncLogExist(ths->pLogStore, nextIndex - 1)) { + needStartSnapshot = true; + } + + if (!needStartSnapshot) { // do nothing + } else { SSyncRaftEntry* pEntry; int32_t code = ths->pLogStore->syncLogGetEntry(ths->pLogStore, nextIndex, &pEntry); @@ -227,6 +252,21 @@ int32_t syncNodeOnAppendEntriesReplySnapshot2Cb(SSyncNode* ths, SyncAppendEntrie nextIndex = SYNC_INDEX_BEGIN; } syncIndexMgrSetIndex(ths->pNextIndex, &(pMsg->srcId), nextIndex); + + // event log, update next-index + do { + char host[64]; + int16_t port; + syncUtilU642Addr(pMsg->srcId.addr, host, sizeof(host), &port); + + SyncIndex newNextIndex = nextIndex; + SyncIndex newMatchIndex = syncIndexMgrGetIndex(ths->pMatchIndex, &(pMsg->srcId)); + char logBuf[256]; + snprintf(logBuf, sizeof(logBuf), "reset2 next-index:%ld, match-index:%ld for %s:%d", newNextIndex, newMatchIndex, + host, port); + syncNodeEventLog(ths, logBuf); + + } while (0); } return 0; diff --git a/source/libs/sync/src/syncMain.c b/source/libs/sync/src/syncMain.c index e207d89212..5951078822 100644 --- a/source/libs/sync/src/syncMain.c +++ b/source/libs/sync/src/syncMain.c @@ -648,8 +648,6 @@ void setHeartbeatTimerMS(int64_t rid, int32_t hbTimerMS) { } int32_t syncPropose(int64_t rid, SRpcMsg* pMsg, bool isWeak) { - int32_t ret = 0; - SSyncNode* pSyncNode = taosAcquireRef(tsNodeRefId, rid); if (pSyncNode == NULL) { taosReleaseRef(tsNodeRefId, rid); @@ -657,8 +655,8 @@ int32_t syncPropose(int64_t rid, SRpcMsg* pMsg, bool isWeak) { return -1; } ASSERT(rid == pSyncNode->rid); - ret = syncNodePropose(pSyncNode, pMsg, isWeak); + int32_t ret = syncNodePropose(pSyncNode, pMsg, isWeak); taosReleaseRef(tsNodeRefId, pSyncNode->rid); return ret; } @@ -669,15 +667,14 @@ int32_t syncProposeBatch(int64_t rid, SRpcMsg* pMsgArr, bool* pIsWeakArr, int32_ return -1; } - int32_t ret = 0; SSyncNode* pSyncNode = taosAcquireRef(tsNodeRefId, rid); if (pSyncNode == NULL) { terrno = TSDB_CODE_SYN_INTERNAL_ERROR; return -1; } ASSERT(rid == pSyncNode->rid); - ret = syncNodeProposeBatch(pSyncNode, pMsgArr, pIsWeakArr, arrSize); + int32_t ret = syncNodeProposeBatch(pSyncNode, pMsgArr, pIsWeakArr, arrSize); taosReleaseRef(tsNodeRefId, pSyncNode->rid); return ret; } @@ -1529,12 +1526,14 @@ void syncNodeEventLog(const SSyncNode* pSyncNode, char* str) { if (pSyncNode != NULL && pSyncNode->pRaftCfg != NULL && pSyncNode->pRaftStore != NULL) { snprintf(logBuf, sizeof(logBuf), "vgId:%d, sync %s %s, term:%lu, commit:%ld, beginlog:%ld, lastlog:%ld, lastsnapshot:%ld, standby:%d, " + "strategy:%d, batch:%d, " "replica-num:%d, " "lconfig:%ld, changing:%d, restore:%d, %s", pSyncNode->vgId, syncUtilState2String(pSyncNode->state), str, pSyncNode->pRaftStore->currentTerm, pSyncNode->commitIndex, logBeginIndex, logLastIndex, snapshot.lastApplyIndex, - pSyncNode->pRaftCfg->isStandBy, pSyncNode->replicaNum, pSyncNode->pRaftCfg->lastConfigIndex, - pSyncNode->changing, pSyncNode->restoreFinish, printStr); + pSyncNode->pRaftCfg->isStandBy, pSyncNode->pRaftCfg->snapshotStrategy, pSyncNode->pRaftCfg->batchSize, + pSyncNode->replicaNum, pSyncNode->pRaftCfg->lastConfigIndex, pSyncNode->changing, + pSyncNode->restoreFinish, printStr); } else { snprintf(logBuf, sizeof(logBuf), "%s", str); } @@ -1546,12 +1545,14 @@ void syncNodeEventLog(const SSyncNode* pSyncNode, char* str) { if (pSyncNode != NULL && pSyncNode->pRaftCfg != NULL && pSyncNode->pRaftStore != NULL) { snprintf(s, len, "vgId:%d, sync %s %s, term:%lu, commit:%ld, beginlog:%ld, lastlog:%ld, lastsnapshot:%ld, standby:%d, " + "strategy:%d, batch:%d, " "replica-num:%d, " "lconfig:%ld, changing:%d, restore:%d, %s", pSyncNode->vgId, syncUtilState2String(pSyncNode->state), str, pSyncNode->pRaftStore->currentTerm, pSyncNode->commitIndex, logBeginIndex, logLastIndex, snapshot.lastApplyIndex, - pSyncNode->pRaftCfg->isStandBy, pSyncNode->replicaNum, pSyncNode->pRaftCfg->lastConfigIndex, - pSyncNode->changing, pSyncNode->restoreFinish, printStr); + pSyncNode->pRaftCfg->isStandBy, pSyncNode->pRaftCfg->snapshotStrategy, pSyncNode->pRaftCfg->batchSize, + pSyncNode->replicaNum, pSyncNode->pRaftCfg->lastConfigIndex, pSyncNode->changing, + pSyncNode->restoreFinish, printStr); } else { snprintf(s, len, "%s", str); } diff --git a/source/libs/sync/src/syncMessage.c b/source/libs/sync/src/syncMessage.c index e53b9d5e36..697c25f304 100644 --- a/source/libs/sync/src/syncMessage.c +++ b/source/libs/sync/src/syncMessage.c @@ -1605,7 +1605,7 @@ void syncAppendEntriesLog2(char* s, const SyncAppendEntries* pMsg) { SyncAppendEntriesBatch* syncAppendEntriesBatchBuild(SSyncRaftEntry** entryPArr, int32_t arrSize, int32_t vgId) { ASSERT(entryPArr != NULL); - ASSERT(arrSize > 0); + ASSERT(arrSize >= 0); int32_t dataLen = 0; int32_t metaArrayLen = sizeof(SOffsetAndContLen) * arrSize; // diff --git a/source/libs/sync/src/syncRaftCfg.c b/source/libs/sync/src/syncRaftCfg.c index ead1168632..6c381f6e7d 100644 --- a/source/libs/sync/src/syncRaftCfg.c +++ b/source/libs/sync/src/syncRaftCfg.c @@ -101,7 +101,7 @@ cJSON *syncCfg2Json(SSyncCfg *pSyncCfg) { char *syncCfg2Str(SSyncCfg *pSyncCfg) { cJSON *pJson = syncCfg2Json(pSyncCfg); - char * serialized = cJSON_Print(pJson); + char *serialized = cJSON_Print(pJson); cJSON_Delete(pJson); return serialized; } @@ -109,7 +109,7 @@ char *syncCfg2Str(SSyncCfg *pSyncCfg) { char *syncCfg2SimpleStr(SSyncCfg *pSyncCfg) { if (pSyncCfg != NULL) { int32_t len = 512; - char * s = taosMemoryMalloc(len); + char *s = taosMemoryMalloc(len); memset(s, 0, len); snprintf(s, len, "{replica-num:%d, my-index:%d, ", pSyncCfg->replicaNum, pSyncCfg->myIndex); @@ -206,7 +206,7 @@ cJSON *raftCfg2Json(SRaftCfg *pRaftCfg) { char *raftCfg2Str(SRaftCfg *pRaftCfg) { cJSON *pJson = raftCfg2Json(pRaftCfg); - char * serialized = cJSON_Print(pJson); + char *serialized = cJSON_Print(pJson); cJSON_Delete(pJson); return serialized; } @@ -285,7 +285,7 @@ int32_t raftCfgFromJson(const cJSON *pRoot, SRaftCfg *pRaftCfg) { (pRaftCfg->configIndexArr)[i] = atoll(pIndex->valuestring); } - cJSON * pJsonSyncCfg = cJSON_GetObjectItem(pJson, "SSyncCfg"); + cJSON *pJsonSyncCfg = cJSON_GetObjectItem(pJson, "SSyncCfg"); int32_t code = syncCfgFromJson(pJsonSyncCfg, &(pRaftCfg->cfg)); ASSERT(code == 0); diff --git a/source/libs/sync/src/syncReplication.c b/source/libs/sync/src/syncReplication.c index da31e9c4c4..17d9aeaea0 100644 --- a/source/libs/sync/src/syncReplication.c +++ b/source/libs/sync/src/syncReplication.c @@ -148,9 +148,17 @@ int32_t syncNodeAppendEntriesPeersSnapshot2(SSyncNode* pSyncNode) { // get entry batch int32_t getCount = 0; SyncIndex getEntryIndex = nextIndex; - for (int32_t i = 0; i < pSyncNode->batchSize; ++i) { + for (int32_t i = 0; i < pSyncNode->pRaftCfg->batchSize; ++i) { SSyncRaftEntry* pEntry = NULL; int32_t code = pSyncNode->pLogStore->syncLogGetEntry(pSyncNode->pLogStore, getEntryIndex, &pEntry); + + // event log + do { + char logBuf[128]; + snprintf(logBuf, sizeof(logBuf), "get index:%d, code:%d, %s", getEntryIndex, code, tstrerror(terrno)); + syncNodeEventLog(pSyncNode, logBuf); + } while (0); + if (code == 0) { ASSERT(pEntry != NULL); entryPArr[i] = pEntry; @@ -162,12 +170,19 @@ int32_t syncNodeAppendEntriesPeersSnapshot2(SSyncNode* pSyncNode) { } } + // event log + do { + char logBuf[128]; + snprintf(logBuf, sizeof(logBuf), "build batch:%d", getCount); + syncNodeEventLog(pSyncNode, logBuf); + } while (0); + // build msg SyncAppendEntriesBatch* pMsg = syncAppendEntriesBatchBuild(entryPArr, getCount, pSyncNode->vgId); ASSERT(pMsg != NULL); // free entries - for (int32_t i = 0; i < pSyncNode->batchSize; ++i) { + for (int32_t i = 0; i < pSyncNode->pRaftCfg->batchSize; ++i) { SSyncRaftEntry* pEntry = entryPArr[i]; if (pEntry != NULL) { syncEntryDestory(pEntry); diff --git a/tests/script/tsim/sync/vnodesnapshot.sim b/tests/script/tsim/sync/vnodesnapshot.sim index 7ad3afe309..da97f40f1a 100644 --- a/tests/script/tsim/sync/vnodesnapshot.sim +++ b/tests/script/tsim/sync/vnodesnapshot.sim @@ -145,9 +145,9 @@ system sh/exec.sh -n dnode4 -s stop -x SIGINT sql insert into ct1 values(now+0s, 10, 2.0, 3.0) sql insert into ct1 values(now+1s, 11, 2.1, 3.1)(now+2s, -12, -2.2, -3.2)(now+3s, -13, -2.3, -3.3) -sql flush database db; +#sql flush database db; -system sh/exec.sh -n dnode4 -s start +#system sh/exec.sh -n dnode4 -s start sql insert into ct1 values(now+1s, 81, 8.1, 8.1)(now+2s, -92, -9.2, -9.2)(now+3s, -73, -7.3, -7.3)