Merge pull request #14438 from taosdata/feature/3.0_mhli
refactor(sync): add snapshot2 interface
This commit is contained in:
commit
3d03d326a7
|
@ -362,6 +362,11 @@ typedef struct SOffsetAndContLen {
|
|||
} SOffsetAndContLen;
|
||||
*/
|
||||
|
||||
// block1: SOffsetAndContLen
|
||||
// block2: SOffsetAndContLen Array
|
||||
// block3: SRpcMsg Array
|
||||
// block4: SRpcMsg pCont Array
|
||||
|
||||
typedef struct SyncAppendEntriesBatch {
|
||||
uint32_t bytes;
|
||||
int32_t vgId;
|
||||
|
|
|
@ -56,8 +56,8 @@ typedef struct SSyncSnapshotSender {
|
|||
SSyncSnapshotSender *snapshotSenderCreate(SSyncNode *pSyncNode, int32_t replicaIndex);
|
||||
void snapshotSenderDestroy(SSyncSnapshotSender *pSender);
|
||||
bool snapshotSenderIsStart(SSyncSnapshotSender *pSender);
|
||||
void snapshotSenderStart(SSyncSnapshotSender *pSender, SSnapshot snapshot, void *pReader);
|
||||
void snapshotSenderStop(SSyncSnapshotSender *pSender, bool finish);
|
||||
int32_t snapshotSenderStart(SSyncSnapshotSender *pSender, SSnapshot snapshot, void *pReader);
|
||||
int32_t snapshotSenderStop(SSyncSnapshotSender *pSender, bool finish);
|
||||
int32_t snapshotSend(SSyncSnapshotSender *pSender);
|
||||
int32_t snapshotReSend(SSyncSnapshotSender *pSender);
|
||||
|
||||
|
@ -80,9 +80,9 @@ typedef struct SSyncSnapshotReceiver {
|
|||
|
||||
SSyncSnapshotReceiver *snapshotReceiverCreate(SSyncNode *pSyncNode, SRaftId fromId);
|
||||
void snapshotReceiverDestroy(SSyncSnapshotReceiver *pReceiver);
|
||||
void snapshotReceiverStart(SSyncSnapshotReceiver *pReceiver, SyncTerm privateTerm, SyncSnapshotSend *pBeginMsg);
|
||||
int32_t snapshotReceiverStart(SSyncSnapshotReceiver *pReceiver, SyncTerm privateTerm, SyncSnapshotSend *pBeginMsg);
|
||||
int32_t snapshotReceiverStop(SSyncSnapshotReceiver *pReceiver);
|
||||
bool snapshotReceiverIsStart(SSyncSnapshotReceiver *pReceiver);
|
||||
void snapshotReceiverStop(SSyncSnapshotReceiver *pReceiver);
|
||||
|
||||
cJSON *snapshotReceiver2Json(SSyncSnapshotReceiver *pReceiver);
|
||||
char *snapshotReceiver2Str(SSyncSnapshotReceiver *pReceiver);
|
||||
|
|
|
@ -628,6 +628,8 @@ int32_t syncNodeOnAppendEntriesCb(SSyncNode* ths, SyncAppendEntries* pMsg) {
|
|||
|
||||
#endif
|
||||
|
||||
static int32_t syncNodeMakeLogSame2(SSyncNode* ths, SyncAppendEntriesBatch* pMsg) { return 0; }
|
||||
|
||||
static int32_t syncNodeMakeLogSame(SSyncNode* ths, SyncAppendEntries* pMsg) {
|
||||
int32_t code;
|
||||
|
||||
|
@ -719,7 +721,282 @@ static bool syncNodeOnAppendEntriesLogOK(SSyncNode* pSyncNode, SyncAppendEntries
|
|||
return false;
|
||||
}
|
||||
|
||||
int32_t syncNodeOnAppendEntriesSnapshot2Cb(SSyncNode* ths, SyncAppendEntriesBatch* pMsg) { return 0; }
|
||||
int32_t syncNodeOnAppendEntriesSnapshot2Cb(SSyncNode* ths, SyncAppendEntriesBatch* pMsg) {
|
||||
int32_t ret = 0;
|
||||
int32_t code = 0;
|
||||
|
||||
// if already drop replica, do not process
|
||||
if (!syncNodeInRaftGroup(ths, &(pMsg->srcId)) && !ths->pRaftCfg->isStandBy) {
|
||||
syncNodeEventLog(ths, "recv sync-append-entries-batch, maybe replica already dropped");
|
||||
return ret;
|
||||
}
|
||||
|
||||
// maybe update term
|
||||
if (pMsg->term > ths->pRaftStore->currentTerm) {
|
||||
syncNodeUpdateTerm(ths, pMsg->term);
|
||||
}
|
||||
ASSERT(pMsg->term <= ths->pRaftStore->currentTerm);
|
||||
|
||||
// reset elect timer
|
||||
if (pMsg->term == ths->pRaftStore->currentTerm) {
|
||||
ths->leaderCache = pMsg->srcId;
|
||||
syncNodeResetElectTimer(ths);
|
||||
}
|
||||
ASSERT(pMsg->dataLen >= 0);
|
||||
|
||||
// candidate to follower
|
||||
//
|
||||
// operation:
|
||||
// to follower
|
||||
do {
|
||||
bool condition = pMsg->term == ths->pRaftStore->currentTerm && ths->state == TAOS_SYNC_STATE_CANDIDATE;
|
||||
if (condition) {
|
||||
syncNodeEventLog(ths, "recv sync-append-entries-batch, candidate to follower");
|
||||
|
||||
syncNodeBecomeFollower(ths, "from candidate by append entries");
|
||||
// do not reply?
|
||||
return ret;
|
||||
}
|
||||
} while (0);
|
||||
|
||||
// fake match2
|
||||
//
|
||||
// condition1:
|
||||
// preIndex <= my commit index
|
||||
//
|
||||
// operation:
|
||||
// if hasAppendEntries && pMsg->prevLogIndex == ths->commitIndex, append entry
|
||||
// match my-commit-index or my-commit-index + 1
|
||||
// no operation on log
|
||||
do {
|
||||
bool condition = (pMsg->term == ths->pRaftStore->currentTerm) && (ths->state == TAOS_SYNC_STATE_FOLLOWER) &&
|
||||
(pMsg->prevLogIndex <= ths->commitIndex);
|
||||
if (condition) {
|
||||
do {
|
||||
char logBuf[128];
|
||||
snprintf(logBuf, sizeof(logBuf),
|
||||
"recv sync-append-entries-batch, fake match2, pre-index:%ld, pre-term:%lu, datalen:%d",
|
||||
pMsg->prevLogIndex, pMsg->prevLogTerm, pMsg->dataLen);
|
||||
syncNodeEventLog(ths, logBuf);
|
||||
} while (0);
|
||||
|
||||
SyncIndex matchIndex = ths->commitIndex;
|
||||
bool hasAppendEntries = pMsg->dataLen > 0;
|
||||
if (hasAppendEntries && pMsg->prevLogIndex == ths->commitIndex) {
|
||||
SRpcMsg rpcMsgArr[SYNC_MAX_BATCH_SIZE];
|
||||
memset(rpcMsgArr, 0, sizeof(rpcMsgArr));
|
||||
int32_t retArrSize = 0;
|
||||
syncAppendEntriesBatch2RpcMsgArray(pMsg, rpcMsgArr, SYNC_MAX_BATCH_SIZE, &retArrSize);
|
||||
|
||||
// make log same
|
||||
do {
|
||||
SyncIndex logLastIndex = ths->pLogStore->syncLogLastIndex(ths->pLogStore);
|
||||
bool hasExtraEntries = logLastIndex > pMsg->prevLogIndex;
|
||||
|
||||
if (hasExtraEntries) {
|
||||
// make log same, rollback deleted entries
|
||||
code = syncNodeMakeLogSame2(ths, pMsg);
|
||||
ASSERT(code == 0);
|
||||
}
|
||||
|
||||
} while (0);
|
||||
|
||||
// append entry batch
|
||||
for (int32_t i = 0; i < retArrSize; ++i) {
|
||||
SSyncRaftEntry* pAppendEntry = syncEntryBuild(1234);
|
||||
code = ths->pLogStore->syncLogAppendEntry(ths->pLogStore, pAppendEntry);
|
||||
if (code != 0) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
code = syncNodePreCommit(ths, pAppendEntry);
|
||||
ASSERT(code == 0);
|
||||
|
||||
syncEntryDestory(pAppendEntry);
|
||||
}
|
||||
|
||||
// fsync once
|
||||
SSyncLogStoreData* pData = ths->pLogStore->data;
|
||||
SWal* pWal = pData->pWal;
|
||||
walFsync(pWal, true);
|
||||
|
||||
// update match index
|
||||
matchIndex = pMsg->prevLogIndex + retArrSize;
|
||||
}
|
||||
|
||||
// prepare response msg
|
||||
SyncAppendEntriesReply* pReply = syncAppendEntriesReplyBuild(ths->vgId);
|
||||
pReply->srcId = ths->myRaftId;
|
||||
pReply->destId = pMsg->srcId;
|
||||
pReply->term = ths->pRaftStore->currentTerm;
|
||||
pReply->privateTerm = ths->pNewNodeReceiver->privateTerm;
|
||||
pReply->success = true;
|
||||
pReply->matchIndex = matchIndex;
|
||||
|
||||
// send response
|
||||
SRpcMsg rpcMsg;
|
||||
syncAppendEntriesReply2RpcMsg(pReply, &rpcMsg);
|
||||
syncNodeSendMsgById(&pReply->destId, ths, &rpcMsg);
|
||||
syncAppendEntriesReplyDestroy(pReply);
|
||||
|
||||
return ret;
|
||||
}
|
||||
} while (0);
|
||||
|
||||
// calculate logOK here, before will coredump, due to fake match
|
||||
// bool logOK = syncNodeOnAppendEntriesLogOK(ths, pMsg);
|
||||
bool logOK = true;
|
||||
|
||||
// not match
|
||||
//
|
||||
// condition1:
|
||||
// term < myTerm
|
||||
//
|
||||
// condition2:
|
||||
// !logOK
|
||||
//
|
||||
// operation:
|
||||
// not match
|
||||
// no operation on log
|
||||
do {
|
||||
bool condition1 = pMsg->term < ths->pRaftStore->currentTerm;
|
||||
bool condition2 =
|
||||
(pMsg->term == ths->pRaftStore->currentTerm) && (ths->state == TAOS_SYNC_STATE_FOLLOWER) && !logOK;
|
||||
bool condition = condition1 || condition2;
|
||||
|
||||
if (condition) {
|
||||
char logBuf[128];
|
||||
snprintf(logBuf, sizeof(logBuf), "recv sync-append-entries, not match, pre-index:%ld, pre-term:%lu, datalen:%d",
|
||||
pMsg->prevLogIndex, pMsg->prevLogTerm, pMsg->dataLen);
|
||||
syncNodeEventLog(ths, logBuf);
|
||||
|
||||
// prepare response msg
|
||||
SyncAppendEntriesReply* pReply = syncAppendEntriesReplyBuild(ths->vgId);
|
||||
pReply->srcId = ths->myRaftId;
|
||||
pReply->destId = pMsg->srcId;
|
||||
pReply->term = ths->pRaftStore->currentTerm;
|
||||
pReply->privateTerm = ths->pNewNodeReceiver->privateTerm;
|
||||
pReply->success = false;
|
||||
pReply->matchIndex = SYNC_INDEX_INVALID;
|
||||
|
||||
// send response
|
||||
SRpcMsg rpcMsg;
|
||||
syncAppendEntriesReply2RpcMsg(pReply, &rpcMsg);
|
||||
syncNodeSendMsgById(&pReply->destId, ths, &rpcMsg);
|
||||
syncAppendEntriesReplyDestroy(pReply);
|
||||
|
||||
return ret;
|
||||
}
|
||||
} while (0);
|
||||
|
||||
// really match
|
||||
//
|
||||
// condition:
|
||||
// logOK
|
||||
//
|
||||
// operation:
|
||||
// match
|
||||
// make log same
|
||||
do {
|
||||
bool condition = (pMsg->term == ths->pRaftStore->currentTerm) && (ths->state == TAOS_SYNC_STATE_FOLLOWER) && logOK;
|
||||
if (condition) {
|
||||
// has extra entries (> preIndex) in local log
|
||||
SyncIndex myLastIndex = syncNodeGetLastIndex(ths);
|
||||
bool hasExtraEntries = myLastIndex > pMsg->prevLogIndex;
|
||||
|
||||
// has entries in SyncAppendEntries msg
|
||||
bool hasAppendEntries = pMsg->dataLen > 0;
|
||||
|
||||
char logBuf[128];
|
||||
snprintf(logBuf, sizeof(logBuf), "recv sync-append-entries, match, pre-index:%ld, pre-term:%lu, datalen:%d",
|
||||
pMsg->prevLogIndex, pMsg->prevLogTerm, pMsg->dataLen);
|
||||
syncNodeEventLog(ths, logBuf);
|
||||
|
||||
if (hasExtraEntries) {
|
||||
// make log same, rollback deleted entries
|
||||
// code = syncNodeMakeLogSame(ths, pMsg);
|
||||
ASSERT(code == 0);
|
||||
}
|
||||
|
||||
int32_t retArrSize = 0;
|
||||
if (hasAppendEntries) {
|
||||
SRpcMsg rpcMsgArr[SYNC_MAX_BATCH_SIZE];
|
||||
memset(rpcMsgArr, 0, sizeof(rpcMsgArr));
|
||||
syncAppendEntriesBatch2RpcMsgArray(pMsg, rpcMsgArr, SYNC_MAX_BATCH_SIZE, &retArrSize);
|
||||
|
||||
// append entry batch
|
||||
for (int32_t i = 0; i < retArrSize; ++i) {
|
||||
SSyncRaftEntry* pAppendEntry = syncEntryBuild(1234);
|
||||
code = ths->pLogStore->syncLogAppendEntry(ths->pLogStore, pAppendEntry);
|
||||
if (code != 0) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
code = syncNodePreCommit(ths, pAppendEntry);
|
||||
ASSERT(code == 0);
|
||||
|
||||
syncEntryDestory(pAppendEntry);
|
||||
}
|
||||
|
||||
// fsync once
|
||||
SSyncLogStoreData* pData = ths->pLogStore->data;
|
||||
SWal* pWal = pData->pWal;
|
||||
walFsync(pWal, true);
|
||||
}
|
||||
|
||||
// prepare response msg
|
||||
SyncAppendEntriesReply* pReply = syncAppendEntriesReplyBuild(ths->vgId);
|
||||
pReply->srcId = ths->myRaftId;
|
||||
pReply->destId = pMsg->srcId;
|
||||
pReply->term = ths->pRaftStore->currentTerm;
|
||||
pReply->privateTerm = ths->pNewNodeReceiver->privateTerm;
|
||||
pReply->success = true;
|
||||
pReply->matchIndex = hasAppendEntries ? pMsg->prevLogIndex + retArrSize : pMsg->prevLogIndex;
|
||||
|
||||
// send response
|
||||
SRpcMsg rpcMsg;
|
||||
syncAppendEntriesReply2RpcMsg(pReply, &rpcMsg);
|
||||
syncNodeSendMsgById(&pReply->destId, ths, &rpcMsg);
|
||||
syncAppendEntriesReplyDestroy(pReply);
|
||||
|
||||
// maybe update commit index, leader notice me
|
||||
if (pMsg->commitIndex > ths->commitIndex) {
|
||||
// has commit entry in local
|
||||
if (pMsg->commitIndex <= ths->pLogStore->syncLogLastIndex(ths->pLogStore)) {
|
||||
// advance commit index to sanpshot first
|
||||
SSnapshot snapshot;
|
||||
ths->pFsm->FpGetSnapshotInfo(ths->pFsm, &snapshot);
|
||||
if (snapshot.lastApplyIndex >= 0 && snapshot.lastApplyIndex > ths->commitIndex) {
|
||||
SyncIndex commitBegin = ths->commitIndex;
|
||||
SyncIndex commitEnd = snapshot.lastApplyIndex;
|
||||
ths->commitIndex = snapshot.lastApplyIndex;
|
||||
|
||||
char eventLog[128];
|
||||
snprintf(eventLog, sizeof(eventLog), "commit by snapshot from index:%ld to index:%ld", commitBegin,
|
||||
commitEnd);
|
||||
syncNodeEventLog(ths, eventLog);
|
||||
}
|
||||
|
||||
SyncIndex beginIndex = ths->commitIndex + 1;
|
||||
SyncIndex endIndex = pMsg->commitIndex;
|
||||
|
||||
// update commit index
|
||||
ths->commitIndex = pMsg->commitIndex;
|
||||
|
||||
// call back Wal
|
||||
code = ths->pLogStore->updateCommitIndex(ths->pLogStore, ths->commitIndex);
|
||||
ASSERT(code == 0);
|
||||
|
||||
code = syncNodeCommit(ths, beginIndex, endIndex, ths->state);
|
||||
ASSERT(code == 0);
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
} while (0);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int32_t syncNodeOnAppendEntriesSnapshotCb(SSyncNode* ths, SyncAppendEntries* pMsg) {
|
||||
int32_t ret = 0;
|
||||
|
|
|
@ -80,7 +80,7 @@ void snapshotSenderDestroy(SSyncSnapshotSender *pSender) {
|
|||
bool snapshotSenderIsStart(SSyncSnapshotSender *pSender) { return pSender->start; }
|
||||
|
||||
// begin send snapshot by snapshot, pReader
|
||||
void snapshotSenderStart(SSyncSnapshotSender *pSender, SSnapshot snapshot, void *pReader) {
|
||||
int32_t snapshotSenderStart(SSyncSnapshotSender *pSender, SSnapshot snapshot, void *pReader) {
|
||||
ASSERT(!snapshotSenderIsStart(pSender));
|
||||
|
||||
// init snapshot and reader
|
||||
|
@ -181,9 +181,11 @@ void snapshotSenderStart(SSyncSnapshotSender *pSender, SSnapshot snapshot, void
|
|||
syncNodeEventLog(pSender->pSyncNode, eventLog);
|
||||
taosMemoryFree(eventLog);
|
||||
} while (0);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void snapshotSenderStop(SSyncSnapshotSender *pSender, bool finish) {
|
||||
int32_t snapshotSenderStop(SSyncSnapshotSender *pSender, bool finish) {
|
||||
// close reader
|
||||
if (pSender->pReader != NULL) {
|
||||
int32_t ret = pSender->pSyncNode->pFsm->FpSnapshotStopRead(pSender->pSyncNode->pFsm, pSender->pReader);
|
||||
|
@ -208,6 +210,8 @@ void snapshotSenderStop(SSyncSnapshotSender *pSender, bool finish) {
|
|||
syncNodeEventLog(pSender->pSyncNode, eventLog);
|
||||
taosMemoryFree(eventLog);
|
||||
} while (0);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// when sender receive ack, call this function to send msg from seq
|
||||
|
@ -471,7 +475,7 @@ static void snapshotReceiverForceStop(SSyncSnapshotReceiver *pReceiver) {
|
|||
|
||||
// if receiver receive msg from seq = SYNC_SNAPSHOT_SEQ_BEGIN, start receiver
|
||||
// if already start, force close, start again
|
||||
void snapshotReceiverStart(SSyncSnapshotReceiver *pReceiver, SyncTerm privateTerm, SyncSnapshotSend *pBeginMsg) {
|
||||
int32_t snapshotReceiverStart(SSyncSnapshotReceiver *pReceiver, SyncTerm privateTerm, SyncSnapshotSend *pBeginMsg) {
|
||||
if (!snapshotReceiverIsStart(pReceiver)) {
|
||||
// first start
|
||||
snapshotReceiverDoStart(pReceiver, privateTerm, pBeginMsg);
|
||||
|
@ -486,9 +490,11 @@ void snapshotReceiverStart(SSyncSnapshotReceiver *pReceiver, SyncTerm privateTer
|
|||
// start again
|
||||
snapshotReceiverDoStart(pReceiver, privateTerm, pBeginMsg);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void snapshotReceiverStop(SSyncSnapshotReceiver *pReceiver) {
|
||||
int32_t snapshotReceiverStop(SSyncSnapshotReceiver *pReceiver) {
|
||||
if (pReceiver->pWriter != NULL) {
|
||||
int32_t ret =
|
||||
pReceiver->pSyncNode->pFsm->FpSnapshotStopWrite(pReceiver->pSyncNode->pFsm, pReceiver->pWriter, false);
|
||||
|
@ -506,6 +512,8 @@ void snapshotReceiverStop(SSyncSnapshotReceiver *pReceiver) {
|
|||
syncNodeEventLog(pReceiver->pSyncNode, eventLog);
|
||||
taosMemoryFree(eventLog);
|
||||
} while (0);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int32_t snapshotReceiverFinish(SSyncSnapshotReceiver *pReceiver, SyncSnapshotSend *pMsg) {
|
||||
|
|
Loading…
Reference in New Issue