diff --git a/source/dnode/vnode/src/vnd/vnodeOpen.c b/source/dnode/vnode/src/vnd/vnodeOpen.c index 0914827950..1645aa37f9 100644 --- a/source/dnode/vnode/src/vnd/vnodeOpen.c +++ b/source/dnode/vnode/src/vnd/vnodeOpen.c @@ -119,7 +119,7 @@ SVnode *vnodeOpen(const char *path, STfs *pTfs, SMsgCb msgCb) { taosRealPath(tdir, NULL, sizeof(tdir)); // for test tsdb snapshot -#if 0 +#if 1 pVnode->config.walCfg.segSize = 200; pVnode->config.walCfg.retentionSize = 2000; #endif diff --git a/source/libs/sync/inc/syncInt.h b/source/libs/sync/inc/syncInt.h index 1f26033f63..0cbd7d36b2 100644 --- a/source/libs/sync/inc/syncInt.h +++ b/source/libs/sync/inc/syncInt.h @@ -223,6 +223,7 @@ void syncNodeVoteForSelf(SSyncNode* pSyncNode); // snapshot -------------- bool syncNodeHasSnapshot(SSyncNode* pSyncNode); +void syncNodeMaybeUpdateCommitBySnapshot(SSyncNode* pSyncNode); SyncIndex syncNodeGetLastIndex(SSyncNode* pSyncNode); SyncTerm syncNodeGetLastTerm(SSyncNode* pSyncNode); diff --git a/source/libs/sync/src/syncAppendEntries.c b/source/libs/sync/src/syncAppendEntries.c index dca80f6826..9678b335fd 100644 --- a/source/libs/sync/src/syncAppendEntries.c +++ b/source/libs/sync/src/syncAppendEntries.c @@ -711,6 +711,9 @@ int32_t syncNodeOnAppendEntriesSnapshot2Cb(SSyncNode* ths, SyncAppendEntriesBatc syncNodeEventLog(ths, logBuf); } while (0); + // maybe update commit index by snapshot + syncNodeMaybeUpdateCommitBySnapshot(ths); + // prepare response msg SyncAppendEntriesReply* pReply = syncAppendEntriesReplyBuild(ths->vgId); pReply->srcId = ths->myRaftId; @@ -718,7 +721,7 @@ int32_t syncNodeOnAppendEntriesSnapshot2Cb(SSyncNode* ths, SyncAppendEntriesBatc pReply->term = ths->pRaftStore->currentTerm; pReply->privateTerm = ths->pNewNodeReceiver->privateTerm; pReply->success = false; - pReply->matchIndex = SYNC_INDEX_INVALID; + pReply->matchIndex = ths->commitIndex; // msg event log do { diff --git a/source/libs/sync/src/syncAppendEntriesReply.c b/source/libs/sync/src/syncAppendEntriesReply.c index c18c2b4d38..bb55e3d9d6 100644 --- a/source/libs/sync/src/syncAppendEntriesReply.c +++ b/source/libs/sync/src/syncAppendEntriesReply.c @@ -147,6 +147,15 @@ static void syncNodeStartSnapshotOnce(SSyncNode* ths, SyncIndex beginIndex, Sync int32_t syncNodeOnAppendEntriesReplySnapshot2Cb(SSyncNode* ths, SyncAppendEntriesReply* pMsg) { int32_t ret = 0; + // print log + do { + char logBuf[256]; + snprintf(logBuf, sizeof(logBuf), "recv sync-append-entries-reply, term:%lu, match:%ld, success:%d", pMsg->term, + pMsg->matchIndex, pMsg->success); + syncNodeEventLog(ths, logBuf); + + } while (0); + // if already drop replica, do not process if (!syncNodeInRaftGroup(ths, &(pMsg->srcId)) && !ths->pRaftCfg->isStandBy) { syncNodeEventLog(ths, "recv sync-append-entries-reply, maybe replica already dropped"); @@ -238,7 +247,7 @@ int32_t syncNodeOnAppendEntriesReplySnapshot2Cb(SSyncNode* ths, SyncAppendEntrie SSnapshot oldSnapshot; ths->pFsm->FpGetSnapshotInfo(ths->pFsm, &oldSnapshot); SyncTerm newSnapshotTerm = oldSnapshot.lastApplyTerm; - syncNodeStartSnapshotOnce(ths, SYNC_INDEX_BEGIN, nextIndex, newSnapshotTerm, pMsg); + syncNodeStartSnapshotOnce(ths, pMsg->matchIndex + 1, nextIndex, newSnapshotTerm, pMsg); // get sender SSyncSnapshotSender* pSender = syncNodeGetSnapshotSender(ths, &(pMsg->srcId)); @@ -256,6 +265,11 @@ int32_t syncNodeOnAppendEntriesReplySnapshot2Cb(SSyncNode* ths, SyncAppendEntrie } syncIndexMgrSetIndex(ths->pNextIndex, &(pMsg->srcId), nextIndex); + SyncIndex oldMatchIndex = syncIndexMgrGetIndex(ths->pMatchIndex, &(pMsg->srcId)); + if (pMsg->matchIndex > oldMatchIndex) { + syncIndexMgrSetIndex(ths->pMatchIndex, &(pMsg->srcId), pMsg->matchIndex); + } + // event log, update next-index do { char host[64]; diff --git a/source/libs/sync/src/syncMain.c b/source/libs/sync/src/syncMain.c index b6c01f2923..bb7454ea6f 100644 --- a/source/libs/sync/src/syncMain.c +++ b/source/libs/sync/src/syncMain.c @@ -1083,6 +1083,17 @@ SSyncNode* syncNodeOpen(const SSyncInfo* pOldSyncInfo) { return pSyncNode; } +void syncNodeMaybeUpdateCommitBySnapshot(SSyncNode* pSyncNode) { + if (pSyncNode->pFsm != NULL && pSyncNode->pFsm->FpGetSnapshotInfo != NULL) { + SSnapshot snapshot; + int32_t code = pSyncNode->pFsm->FpGetSnapshotInfo(pSyncNode->pFsm, &snapshot); + ASSERT(code == 0); + if (snapshot.lastApplyIndex > pSyncNode->commitIndex) { + pSyncNode->commitIndex = snapshot.lastApplyIndex; + } + } +} + void syncNodeStart(SSyncNode* pSyncNode) { // start raft if (pSyncNode->replicaNum == 1) { diff --git a/tests/script/tsim/sync/vnodesnapshot-test.sim b/tests/script/tsim/sync/vnodesnapshot-test.sim index 8589078b50..2f0eccf02e 100644 --- a/tests/script/tsim/sync/vnodesnapshot-test.sim +++ b/tests/script/tsim/sync/vnodesnapshot-test.sim @@ -170,84 +170,3 @@ if $rows != 100 then return -1 endi -system sh/exec.sh -n dnode1 -s stop -x SIGINT -system sh/exec.sh -n dnode2 -s stop -x SIGINT -system sh/exec.sh -n dnode3 -s stop -x SIGINT -system sh/exec.sh -n dnode4 -s stop -x SIGINT -######################################################## - - -######################################################## -print ===> start dnode1 dnode3 dnode4 -system sh/exec.sh -n dnode1 -s start -#system sh/exec.sh -n dnode2 -s start -system sh/exec.sh -n dnode3 -s start -system sh/exec.sh -n dnode4 -s start - -sleep 7000 - -print =============== query data -sql connect -sql use db -sql select * from ct1 -print rows: $rows -print $data00 $data01 $data02 -if $rows != 100 then - return -1 -endi - -system sh/exec.sh -n dnode1 -s stop -x SIGINT -#system sh/exec.sh -n dnode2 -s stop -x SIGINT -system sh/exec.sh -n dnode3 -s stop -x SIGINT -system sh/exec.sh -n dnode4 -s stop -x SIGINT -######################################################## - - -######################################################## -print ===> start dnode1 dnode2 dnode4 -system sh/exec.sh -n dnode1 -s start -system sh/exec.sh -n dnode2 -s start -#system sh/exec.sh -n dnode3 -s start -system sh/exec.sh -n dnode4 -s start - -sleep 3000 - -print =============== query data -sql select * from ct1 -print rows: $rows -print $data00 $data01 $data02 -if $rows != 100 then - return -1 -endi - -system sh/exec.sh -n dnode1 -s stop -x SIGINT -system sh/exec.sh -n dnode2 -s stop -x SIGINT -#system sh/exec.sh -n dnode3 -s stop -x SIGINT -system sh/exec.sh -n dnode4 -s stop -x SIGINT -######################################################## - - -######################################################## -print ===> start dnode1 dnode2 dnode3 -system sh/exec.sh -n dnode1 -s start -system sh/exec.sh -n dnode2 -s start -system sh/exec.sh -n dnode3 -s start -#system sh/exec.sh -n dnode4 -s start - -sleep 3000 - -print =============== query data -sql select * from ct1 -print rows: $rows -print $data00 $data01 $data02 -if $rows != 100 then - return -1 -endi - -system sh/exec.sh -n dnode1 -s stop -x SIGINT -system sh/exec.sh -n dnode2 -s stop -x SIGINT -system sh/exec.sh -n dnode3 -s stop -x SIGINT -#system sh/exec.sh -n dnode4 -s stop -x SIGINT -######################################################## - -