fix: update sync node commitIndex only if matchTerm equals currentTerm upon heartbeat

This commit is contained in:
Benguang Zhao 2022-12-28 20:20:41 +08:00
parent c4fad84c7c
commit 0c4ade9373
8 changed files with 34 additions and 15 deletions

View File

@ -247,8 +247,8 @@ typedef struct SyncLocalCmd {
SRaftId destId; SRaftId destId;
int32_t cmd; int32_t cmd;
SyncTerm sdNewTerm; // step down new term SyncTerm currentTerm; // step down new term
SyncIndex fcIndex; // follower commit index SyncIndex commitIndex; // follower commit index
} SyncLocalCmd; } SyncLocalCmd;
int32_t syncBuildTimeout(SRpcMsg* pMsg, ESyncTimeoutType ttype, uint64_t logicClock, int32_t ms, SSyncNode* pNode); int32_t syncBuildTimeout(SRpcMsg* pMsg, ESyncTimeoutType ttype, uint64_t logicClock, int32_t ms, SSyncNode* pNode);

View File

@ -98,6 +98,7 @@ int32_t syncLogBufferReInit(SSyncLogBuffer* pBuf, SSyncNode* pNode);
// access // access
int64_t syncLogBufferGetEndIndex(SSyncLogBuffer* pBuf); int64_t syncLogBufferGetEndIndex(SSyncLogBuffer* pBuf);
SyncTerm syncLogBufferGetLastMatchTerm(SSyncLogBuffer* pBuf);
int32_t syncLogBufferAppend(SSyncLogBuffer* pBuf, SSyncNode* pNode, SSyncRaftEntry* pEntry); int32_t syncLogBufferAppend(SSyncLogBuffer* pBuf, SSyncNode* pNode, SSyncRaftEntry* pEntry);
int32_t syncLogBufferAccept(SSyncLogBuffer* pBuf, SSyncNode* pNode, SSyncRaftEntry* pEntry, SyncTerm prevTerm); int32_t syncLogBufferAccept(SSyncLogBuffer* pBuf, SSyncNode* pNode, SSyncRaftEntry* pEntry, SyncTerm prevTerm);
int64_t syncLogBufferProceed(SSyncLogBuffer* pBuf, SSyncNode* pNode, SyncTerm* pMatchTerm); int64_t syncLogBufferProceed(SSyncLogBuffer* pBuf, SSyncNode* pNode, SyncTerm* pMatchTerm);

View File

@ -90,6 +90,7 @@
// //
int32_t syncNodeFollowerCommit(SSyncNode* ths, SyncIndex newCommitIndex) { int32_t syncNodeFollowerCommit(SSyncNode* ths, SyncIndex newCommitIndex) {
ASSERT(false && "deprecated");
if (ths->state != TAOS_SYNC_STATE_FOLLOWER) { if (ths->state != TAOS_SYNC_STATE_FOLLOWER) {
sNTrace(ths, "can not do follower commit"); sNTrace(ths, "can not do follower commit");
return -1; return -1;

View File

@ -44,6 +44,7 @@
// /\ UNCHANGED <<messages, serverVars, candidateVars, leaderVars, log>> // /\ UNCHANGED <<messages, serverVars, candidateVars, leaderVars, log>>
// //
void syncOneReplicaAdvance(SSyncNode* pSyncNode) { void syncOneReplicaAdvance(SSyncNode* pSyncNode) {
ASSERT(false && "deprecated");
if (pSyncNode == NULL) { if (pSyncNode == NULL) {
sError("pSyncNode is NULL"); sError("pSyncNode is NULL");
return; return;

View File

@ -1036,6 +1036,7 @@ SSyncNode* syncNodeOpen(SSyncInfo* pSyncInfo) {
} }
} }
pSyncNode->commitIndex = commitIndex; pSyncNode->commitIndex = commitIndex;
sInfo("vgId:%d, sync node commitIndex initialized as %" PRId64, pSyncNode->vgId, pSyncNode->commitIndex);
if (syncNodeLogStoreRestoreOnNeed(pSyncNode) < 0) { if (syncNodeLogStoreRestoreOnNeed(pSyncNode) < 0) {
goto _error; goto _error;
@ -1176,9 +1177,10 @@ int32_t syncNodeRestore(SSyncNode* pSyncNode) {
} }
ASSERT(endIndex == lastVer + 1); ASSERT(endIndex == lastVer + 1);
commitIndex = TMAX(pSyncNode->commitIndex, commitIndex); pSyncNode->commitIndex = TMAX(pSyncNode->commitIndex, commitIndex);
sInfo("vgId:%d, restore sync until commitIndex:%" PRId64, pSyncNode->vgId, pSyncNode->commitIndex);
if (syncLogBufferCommit(pSyncNode->pLogBuf, pSyncNode, commitIndex) < 0) { if (syncLogBufferCommit(pSyncNode->pLogBuf, pSyncNode, pSyncNode->commitIndex) < 0) {
return -1; return -1;
} }
@ -2545,8 +2547,9 @@ int32_t syncNodeOnHeartbeat(SSyncNode* ths, const SRpcMsg* pRpcMsg) {
SyncLocalCmd* pSyncMsg = rpcMsgLocalCmd.pCont; SyncLocalCmd* pSyncMsg = rpcMsgLocalCmd.pCont;
pSyncMsg->cmd = SYNC_LOCAL_CMD_FOLLOWER_CMT; pSyncMsg->cmd = SYNC_LOCAL_CMD_FOLLOWER_CMT;
pSyncMsg->fcIndex = pMsg->commitIndex; pSyncMsg->commitIndex = pMsg->commitIndex;
SyncIndex fcIndex = pSyncMsg->fcIndex; pSyncMsg->currentTerm = pMsg->term;
SyncIndex fcIndex = pSyncMsg->commitIndex;
if (ths->syncEqMsg != NULL && ths->msgcb != NULL) { if (ths->syncEqMsg != NULL && ths->msgcb != NULL) {
int32_t code = ths->syncEqMsg(ths->msgcb, &rpcMsgLocalCmd); int32_t code = ths->syncEqMsg(ths->msgcb, &rpcMsgLocalCmd);
@ -2567,7 +2570,8 @@ int32_t syncNodeOnHeartbeat(SSyncNode* ths, const SRpcMsg* pRpcMsg) {
SyncLocalCmd* pSyncMsg = rpcMsgLocalCmd.pCont; SyncLocalCmd* pSyncMsg = rpcMsgLocalCmd.pCont;
pSyncMsg->cmd = SYNC_LOCAL_CMD_STEP_DOWN; pSyncMsg->cmd = SYNC_LOCAL_CMD_STEP_DOWN;
pSyncMsg->sdNewTerm = pMsg->term; pSyncMsg->currentTerm = pMsg->term;
pSyncMsg->commitIndex = pMsg->commitIndex;
if (ths->syncEqMsg != NULL && ths->msgcb != NULL) { if (ths->syncEqMsg != NULL && ths->msgcb != NULL) {
int32_t code = ths->syncEqMsg(ths->msgcb, &rpcMsgLocalCmd); int32_t code = ths->syncEqMsg(ths->msgcb, &rpcMsgLocalCmd);
@ -2575,7 +2579,7 @@ int32_t syncNodeOnHeartbeat(SSyncNode* ths, const SRpcMsg* pRpcMsg) {
sError("vgId:%d, sync enqueue step-down msg error, code:%d", ths->vgId, code); sError("vgId:%d, sync enqueue step-down msg error, code:%d", ths->vgId, code);
rpcFreeCont(rpcMsgLocalCmd.pCont); rpcFreeCont(rpcMsgLocalCmd.pCont);
} else { } else {
sTrace("vgId:%d, sync enqueue step-down msg, new-term: %" PRId64, ths->vgId, pSyncMsg->sdNewTerm); sTrace("vgId:%d, sync enqueue step-down msg, new-term: %" PRId64, ths->vgId, pSyncMsg->currentTerm);
} }
} }
} }
@ -2633,10 +2637,13 @@ int32_t syncNodeOnLocalCmd(SSyncNode* ths, const SRpcMsg* pRpcMsg) {
syncLogRecvLocalCmd(ths, pMsg, ""); syncLogRecvLocalCmd(ths, pMsg, "");
if (pMsg->cmd == SYNC_LOCAL_CMD_STEP_DOWN) { if (pMsg->cmd == SYNC_LOCAL_CMD_STEP_DOWN) {
syncNodeStepDown(ths, pMsg->sdNewTerm); syncNodeStepDown(ths, pMsg->currentTerm);
} else if (pMsg->cmd == SYNC_LOCAL_CMD_FOLLOWER_CMT) { } else if (pMsg->cmd == SYNC_LOCAL_CMD_FOLLOWER_CMT) {
(void)syncNodeUpdateCommitIndex(ths, pMsg->fcIndex); SyncTerm matchTerm = syncLogBufferGetLastMatchTerm(ths->pLogBuf);
if (pMsg->currentTerm == matchTerm) {
(void)syncNodeUpdateCommitIndex(ths, pMsg->commitIndex);
}
if (syncLogBufferCommit(ths->pLogBuf, ths, ths->commitIndex) < 0) { if (syncLogBufferCommit(ths->pLogBuf, ths, ths->commitIndex) < 0) {
sError("vgId:%d, failed to commit raft log since %s. commit index: %" PRId64 "", ths->vgId, terrstr(), sError("vgId:%d, failed to commit raft log since %s. commit index: %" PRId64 "", ths->vgId, terrstr(),
ths->commitIndex); ths->commitIndex);
@ -2649,14 +2656,15 @@ int32_t syncNodeOnLocalCmd(SSyncNode* ths, const SRpcMsg* pRpcMsg) {
} }
int32_t syncNodeOnLocalCmdOld(SSyncNode* ths, const SRpcMsg* pRpcMsg) { int32_t syncNodeOnLocalCmdOld(SSyncNode* ths, const SRpcMsg* pRpcMsg) {
ASSERT(false && "deprecated");
SyncLocalCmd* pMsg = pRpcMsg->pCont; SyncLocalCmd* pMsg = pRpcMsg->pCont;
syncLogRecvLocalCmd(ths, pMsg, ""); syncLogRecvLocalCmd(ths, pMsg, "");
if (pMsg->cmd == SYNC_LOCAL_CMD_STEP_DOWN) { if (pMsg->cmd == SYNC_LOCAL_CMD_STEP_DOWN) {
syncNodeStepDown(ths, pMsg->sdNewTerm); syncNodeStepDown(ths, pMsg->currentTerm);
} else if (pMsg->cmd == SYNC_LOCAL_CMD_FOLLOWER_CMT) { } else if (pMsg->cmd == SYNC_LOCAL_CMD_FOLLOWER_CMT) {
syncNodeFollowerCommit(ths, pMsg->fcIndex); syncNodeFollowerCommit(ths, pMsg->commitIndex);
} else { } else {
sError("error local cmd"); sError("error local cmd");

View File

@ -265,20 +265,27 @@ int32_t syncLogBufferReInit(SSyncLogBuffer* pBuf, SSyncNode* pNode) {
return ret; return ret;
} }
FORCE_INLINE SyncTerm syncLogBufferGetLastMatchTerm(SSyncLogBuffer* pBuf) { FORCE_INLINE SyncTerm syncLogBufferGetLastMatchTermWithoutLock(SSyncLogBuffer* pBuf) {
SyncIndex index = pBuf->matchIndex; SyncIndex index = pBuf->matchIndex;
SSyncRaftEntry* pEntry = pBuf->entries[(index + pBuf->size) % pBuf->size].pItem; SSyncRaftEntry* pEntry = pBuf->entries[(index + pBuf->size) % pBuf->size].pItem;
ASSERT(pEntry != NULL); ASSERT(pEntry != NULL);
return pEntry->term; return pEntry->term;
} }
SyncTerm syncLogBufferGetLastMatchTerm(SSyncLogBuffer* pBuf) {
taosThreadMutexLock(&pBuf->mutex);
SyncTerm term = syncLogBufferGetLastMatchTermWithoutLock(pBuf);
taosThreadMutexUnlock(&pBuf->mutex);
return term;
}
int32_t syncLogBufferAccept(SSyncLogBuffer* pBuf, SSyncNode* pNode, SSyncRaftEntry* pEntry, SyncTerm prevTerm) { int32_t syncLogBufferAccept(SSyncLogBuffer* pBuf, SSyncNode* pNode, SSyncRaftEntry* pEntry, SyncTerm prevTerm) {
taosThreadMutexLock(&pBuf->mutex); taosThreadMutexLock(&pBuf->mutex);
syncLogBufferValidate(pBuf); syncLogBufferValidate(pBuf);
int32_t ret = -1; int32_t ret = -1;
SyncIndex index = pEntry->index; SyncIndex index = pEntry->index;
SyncIndex prevIndex = pEntry->index - 1; SyncIndex prevIndex = pEntry->index - 1;
SyncTerm lastMatchTerm = syncLogBufferGetLastMatchTerm(pBuf); SyncTerm lastMatchTerm = syncLogBufferGetLastMatchTermWithoutLock(pBuf);
SSyncRaftEntry* pExist = NULL; SSyncRaftEntry* pExist = NULL;
bool inBuf = true; bool inBuf = true;

View File

@ -411,7 +411,7 @@ void syncLogRecvLocalCmd(SSyncNode* pSyncNode, const SyncLocalCmd* pMsg, const c
if (!(sDebugFlag & DEBUG_TRACE)) return; if (!(sDebugFlag & DEBUG_TRACE)) return;
sNTrace(pSyncNode, "recv sync-local-cmd {cmd:%d-%s, sd-new-term:%" PRId64 ", fc-index:%" PRId64 "}, %s", pMsg->cmd, sNTrace(pSyncNode, "recv sync-local-cmd {cmd:%d-%s, sd-new-term:%" PRId64 ", fc-index:%" PRId64 "}, %s", pMsg->cmd,
syncLocalCmdGetStr(pMsg->cmd), pMsg->sdNewTerm, pMsg->fcIndex, s); syncLocalCmdGetStr(pMsg->cmd), pMsg->currentTerm, pMsg->commitIndex, s);
} }
void syncLogSendAppendEntriesReply(SSyncNode* pSyncNode, const SyncAppendEntriesReply* pMsg, const char* s) { void syncLogSendAppendEntriesReply(SSyncNode* pSyncNode, const SyncAppendEntriesReply* pMsg, const char* s) {

View File

@ -295,6 +295,7 @@ void walAlignVersions(SWal* pWal) {
// reset commitVer and appliedVer // reset commitVer and appliedVer
pWal->vers.commitVer = pWal->vers.snapshotVer; pWal->vers.commitVer = pWal->vers.snapshotVer;
pWal->vers.appliedVer = pWal->vers.snapshotVer; pWal->vers.appliedVer = pWal->vers.snapshotVer;
wInfo("vgId:%d, reset commitVer to %" PRId64, pWal->cfg.vgId, pWal->vers.commitVer);
} }
bool walLogEntriesComplete(const SWal* pWal) { bool walLogEntriesComplete(const SWal* pWal) {