diff --git a/source/libs/sync/src/syncAppendEntries.c b/source/libs/sync/src/syncAppendEntries.c index 7013d281e3..c490b82d68 100644 --- a/source/libs/sync/src/syncAppendEntries.c +++ b/source/libs/sync/src/syncAppendEntries.c @@ -94,6 +94,7 @@ int32_t syncNodeOnAppendEntriesCb(SSyncNode* ths, SyncAppendEntries* pMsg) { } assert(pMsg->term <= ths->pRaftStore->currentTerm); + // reset elect timer if (pMsg->term == ths->pRaftStore->currentTerm) { ths->leaderCache = pMsg->srcId; syncNodeResetElectTimer(ths); @@ -135,38 +136,117 @@ int32_t syncNodeOnAppendEntriesCb(SSyncNode* ths, SyncAppendEntries* pMsg) { if (pMsg->term == ths->pRaftStore->currentTerm && ths->state == TAOS_SYNC_STATE_CANDIDATE) { syncNodeBecomeFollower(ths); - // need ret? + // ret or reply? return ret; } // accept request if (pMsg->term == ths->pRaftStore->currentTerm && ths->state == TAOS_SYNC_STATE_FOLLOWER && logOK) { - bool matchSuccess = false; + bool preMatch = false; if (pMsg->prevLogIndex == SYNC_INDEX_INVALID && ths->pLogStore->getLastIndex(ths->pLogStore) == SYNC_INDEX_INVALID) { - matchSuccess = true; + preMatch = true; } if (pMsg->prevLogIndex >= SYNC_INDEX_BEGIN && pMsg->prevLogIndex <= ths->pLogStore->getLastIndex(ths->pLogStore)) { SSyncRaftEntry* pPreEntry = logStoreGetEntry(ths->pLogStore, pMsg->prevLogIndex); assert(pPreEntry != NULL); if (pMsg->prevLogTerm == pPreEntry->term) { - matchSuccess = true; + preMatch = true; } syncEntryDestory(pPreEntry); } - if (matchSuccess) { - // delete conflict entries - if (pMsg->prevLogIndex < ths->pLogStore->getLastIndex(ths->pLogStore)) { - SyncIndex fromIndex = pMsg->prevLogIndex + 1; - ths->pLogStore->truncate(ths->pLogStore, fromIndex); - } + if (preMatch) { + // must has preIndex in local log + assert(pMsg->prevLogIndex <= ths->pLogStore->getLastIndex(ths->pLogStore)); - // append one entry - if (pMsg->dataLen > 0) { - SSyncRaftEntry* pEntry = syncEntryDeserialize(pMsg->data, pMsg->dataLen); - ths->pLogStore->appendEntry(ths->pLogStore, pEntry); - syncEntryDestory(pEntry); + bool hasExtraEntries = pMsg->prevLogIndex < ths->pLogStore->getLastIndex(ths->pLogStore); + bool hasAppendEntries = pMsg->dataLen > 0; + + if (hasExtraEntries && hasAppendEntries) { + // conflict + bool conflict = false; + + SyncIndex extraIndex = pMsg->prevLogIndex + 1; + SSyncRaftEntry* pExtraEntry = logStoreGetEntry(ths->pLogStore, extraIndex); + assert(pExtraEntry != NULL); + + SSyncRaftEntry* pAppendEntry = syncEntryDeserialize(pMsg->data, pMsg->dataLen); + assert(pAppendEntry != NULL); + + assert(extraIndex == pAppendEntry->index); + if (pExtraEntry->term == pAppendEntry->term) { + conflict = true; + } + + if (conflict) { + // roll back + SyncIndex delBegin = ths->pLogStore->getLastIndex(ths->pLogStore); + SyncIndex delEnd = extraIndex; + + // notice! reverse roll back! + for (SyncIndex index = delEnd; index >= delBegin; --index) { + if (ths->pFsm->FpRollBackCb != NULL) { + SSyncRaftEntry* pRollBackEntry = logStoreGetEntry(ths->pLogStore, index); + assert(pRollBackEntry != NULL); + + SRpcMsg rpcMsg; + syncEntry2OriginalRpc(pRollBackEntry, &rpcMsg); + ths->pFsm->FpRollBackCb(ths->pFsm, &rpcMsg, pRollBackEntry->index, pRollBackEntry->isWeak, 0); + rpcFreeCont(rpcMsg.pCont); + syncEntryDestory(pRollBackEntry); + } + } + + // delete confict entries + ths->pLogStore->truncate(ths->pLogStore, extraIndex); + + // append new entries + ths->pLogStore->appendEntry(ths->pLogStore, pAppendEntry); + + // pre commit + SRpcMsg rpcMsg; + syncEntry2OriginalRpc(pAppendEntry, &rpcMsg); + if (ths->pFsm != NULL) { + if (ths->pFsm->FpPreCommitCb != NULL) { + ths->pFsm->FpPreCommitCb(ths->pFsm, &rpcMsg, pAppendEntry->index, pAppendEntry->isWeak, 0); + } + } + rpcFreeCont(rpcMsg.pCont); + } + + // free memory + syncEntryDestory(pExtraEntry); + syncEntryDestory(pAppendEntry); + + } else if (hasExtraEntries && !hasAppendEntries) { + // do nothing + + } else if (!hasExtraEntries && hasAppendEntries) { + SSyncRaftEntry* pAppendEntry = syncEntryDeserialize(pMsg->data, pMsg->dataLen); + assert(pAppendEntry != NULL); + + // append new entries + ths->pLogStore->appendEntry(ths->pLogStore, pAppendEntry); + + // pre commit + SRpcMsg rpcMsg; + syncEntry2OriginalRpc(pAppendEntry, &rpcMsg); + if (ths->pFsm != NULL) { + if (ths->pFsm->FpPreCommitCb != NULL) { + ths->pFsm->FpPreCommitCb(ths->pFsm, &rpcMsg, pAppendEntry->index, pAppendEntry->isWeak, 0); + } + } + rpcFreeCont(rpcMsg.pCont); + + // free memory + syncEntryDestory(pAppendEntry); + + } else if (!hasExtraEntries && !hasAppendEntries) { + // do nothing + + } else { + assert(0); } SyncAppendEntriesReply* pReply = syncAppendEntriesReplyBuild(); @@ -175,7 +255,7 @@ int32_t syncNodeOnAppendEntriesCb(SSyncNode* ths, SyncAppendEntries* pMsg) { pReply->term = ths->pRaftStore->currentTerm; pReply->success = true; - if (pMsg->dataLen > 0) { + if (hasAppendEntries > 0) { pReply->matchIndex = pMsg->prevLogIndex + 1; } else { pReply->matchIndex = pMsg->prevLogIndex; @@ -201,11 +281,38 @@ int32_t syncNodeOnAppendEntriesCb(SSyncNode* ths, SyncAppendEntries* pMsg) { syncAppendEntriesReplyDestroy(pReply); } + // maybe update commit index from leader if (pMsg->commitIndex > ths->commitIndex) { + // has commit entry in local if (pMsg->commitIndex <= ths->pLogStore->getLastIndex(ths->pLogStore)) { - // commit + SyncIndex beginIndex = ths->commitIndex + 1; + SyncIndex endIndex = pMsg->commitIndex; + + // update commit index ths->commitIndex = pMsg->commitIndex; + + // call back Wal ths->pLogStore->updateCommitIndex(ths->pLogStore, ths->commitIndex); + + // execute fsm + if (ths->pFsm != NULL) { + for (SyncIndex i = beginIndex; i <= endIndex; ++i) { + if (i != SYNC_INDEX_INVALID) { + SSyncRaftEntry* pEntry = ths->pLogStore->getEntry(ths->pLogStore, i); + assert(pEntry != NULL); + + SRpcMsg rpcMsg; + syncEntry2OriginalRpc(pEntry, &rpcMsg); + + if (ths->pFsm->FpCommitCb != NULL) { + ths->pFsm->FpCommitCb(ths->pFsm, &rpcMsg, pEntry->index, pEntry->isWeak, 0); + } + + rpcFreeCont(rpcMsg.pCont); + syncEntryDestory(pEntry); + } + } + } } } } diff --git a/source/libs/sync/src/syncCommit.c b/source/libs/sync/src/syncCommit.c index 539bb21b71..c75d23d96d 100644 --- a/source/libs/sync/src/syncCommit.c +++ b/source/libs/sync/src/syncCommit.c @@ -17,6 +17,7 @@ #include "syncIndexMgr.h" #include "syncInt.h" #include "syncRaftLog.h" +#include "syncRaftStore.h" // \* Leader i advances its commitIndex. // \* This is done as a separate step from handling AppendEntries responses, @@ -50,16 +51,30 @@ void syncMaybeAdvanceCommitIndex(SSyncNode* pSyncNode) { for (SyncIndex index = pSyncNode->pLogStore->getLastIndex(pSyncNode->pLogStore); index > pSyncNode->commitIndex; ++index) { if (syncAgree(pSyncNode, index)) { - newCommitIndex = index; - break; + // term + SSyncRaftEntry* pEntry = pSyncNode->pLogStore->getEntry(pSyncNode->pLogStore, index); + assert(pEntry != NULL); + + // cannot commit, even if quorum agree. need check term! + if (pEntry->term == pSyncNode->pRaftStore->currentTerm) { + // update commit index + newCommitIndex = index; + break; + } } } if (newCommitIndex > pSyncNode->commitIndex) { SyncIndex beginIndex = pSyncNode->commitIndex + 1; SyncIndex endIndex = newCommitIndex; + + // update commit index pSyncNode->commitIndex = newCommitIndex; + // call back Wal + pSyncNode->pLogStore->updateCommitIndex(pSyncNode->pLogStore, pSyncNode->commitIndex); + + // execute fsm if (pSyncNode->pFsm != NULL) { for (SyncIndex i = beginIndex; i <= endIndex; ++i) { if (i != SYNC_INDEX_INVALID) {