From f7191d2ffdc03a0d292072df94030c6cdfa65479 Mon Sep 17 00:00:00 2001 From: Minghao Li Date: Wed, 2 Nov 2022 15:25:14 +0800 Subject: [PATCH] refactor(sync): add pre snapshot --- source/libs/sync/inc/syncInt.h | 9 +- source/libs/sync/inc/syncSnapshot.h | 25 +- source/libs/sync/src/syncAppendEntries.c | 1 - source/libs/sync/src/syncAppendEntriesReply.c | 2 + source/libs/sync/src/syncMain.c | 10 + source/libs/sync/src/syncMessage.c | 11 +- source/libs/sync/src/syncReplication.c | 4 +- source/libs/sync/src/syncSnapshot.c | 488 +++++++++++------- .../sync/test/syncSnapshotReceiverTest.cpp | 1 - source/libs/sync/test/syncSnapshotRspTest.cpp | 2 +- .../libs/sync/test/syncSnapshotSendTest.cpp | 1 - .../libs/sync/test/syncSnapshotSenderTest.cpp | 3 +- 12 files changed, 359 insertions(+), 198 deletions(-) diff --git a/source/libs/sync/inc/syncInt.h b/source/libs/sync/inc/syncInt.h index f1bdcf83f2..b5c01bf388 100644 --- a/source/libs/sync/inc/syncInt.h +++ b/source/libs/sync/inc/syncInt.h @@ -22,9 +22,8 @@ extern "C" { #include "sync.h" #include "syncTools.h" -#include "tlog.h" -#include "ttimer.h" #include "taosdef.h" +#include "tlog.h" #include "ttimer.h" // clang-format off @@ -344,6 +343,12 @@ void syncLogRecvSyncPreSnapshot(SSyncNode* pSyncNode, const SyncPreSnapshot* pMs void syncLogSendSyncPreSnapshotReply(SSyncNode* pSyncNode, const SyncPreSnapshotReply* pMsg, const char* s); void syncLogRecvSyncPreSnapshotReply(SSyncNode* pSyncNode, const SyncPreSnapshotReply* pMsg, const char* s); +void syncLogSendSyncSnapshotSend(SSyncNode* pSyncNode, const SyncSnapshotSend* pMsg, const char* s); +void syncLogRecvSyncSnapshotSend(SSyncNode* pSyncNode, const SyncSnapshotSend* pMsg, const char* s); + +void syncLogSendSyncSnapshotRsp(SSyncNode* pSyncNode, const SyncSnapshotRsp* pMsg, const char* s); +void syncLogRecvSyncSnapshotRsp(SSyncNode* pSyncNode, const SyncSnapshotRsp* pMsg, const char* s); + // for debug -------------- void syncNodePrint(SSyncNode* pObj); void syncNodePrint2(char* s, SSyncNode* pObj); diff --git a/source/libs/sync/inc/syncSnapshot.h b/source/libs/sync/inc/syncSnapshot.h index b8b7af2dda..760fc8ac73 100644 --- a/source/libs/sync/inc/syncSnapshot.h +++ b/source/libs/sync/inc/syncSnapshot.h @@ -28,10 +28,11 @@ extern "C" { #include "syncMessage.h" #include "taosdef.h" -#define SYNC_SNAPSHOT_SEQ_INVALID -1 -#define SYNC_SNAPSHOT_SEQ_FORCE_CLOSE -2 -#define SYNC_SNAPSHOT_SEQ_BEGIN 0 -#define SYNC_SNAPSHOT_SEQ_END 0x7FFFFFFF +#define SYNC_SNAPSHOT_SEQ_INVALID -2 +#define SYNC_SNAPSHOT_SEQ_FORCE_CLOSE -3 +#define SYNC_SNAPSHOT_SEQ_PRE_SNAPSHOT -1 +#define SYNC_SNAPSHOT_SEQ_BEGIN 0 +#define SYNC_SNAPSHOT_SEQ_END 0x7FFFFFFF #define SYNC_SNAPSHOT_RETRY_MS 5000 @@ -47,19 +48,19 @@ typedef struct SSyncSnapshotSender { SSnapshot snapshot; SSyncCfg lastConfig; int64_t sendingMS; - SSyncNode *pSyncNode; - int32_t replicaIndex; SyncTerm term; - SyncTerm privateTerm; int64_t startTime; bool finish; + + // init when create + SSyncNode *pSyncNode; + int32_t replicaIndex; } SSyncSnapshotSender; SSyncSnapshotSender *snapshotSenderCreate(SSyncNode *pSyncNode, int32_t replicaIndex); void snapshotSenderDestroy(SSyncSnapshotSender *pSender); bool snapshotSenderIsStart(SSyncSnapshotSender *pSender); -int32_t snapshotSenderStart(SSyncSnapshotSender *pSender, SSnapshotParam snapshotParam, SSnapshot snapshot, - void *pReader); +int32_t snapshotSenderStart(SSyncSnapshotSender *pSender); int32_t snapshotSenderStop(SSyncSnapshotSender *pSender, bool finish); int32_t snapshotSend(SSyncSnapshotSender *pSender); int32_t snapshotReSend(SSyncSnapshotSender *pSender); @@ -76,11 +77,13 @@ typedef struct SSyncSnapshotReceiver { int32_t ack; void *pWriter; SyncTerm term; - SyncTerm privateTerm; SSnapshotParam snapshotParam; SSnapshot snapshot; SRaftId fromId; - SSyncNode *pSyncNode; + int64_t startTime; + + // init when create + SSyncNode *pSyncNode; } SSyncSnapshotReceiver; diff --git a/source/libs/sync/src/syncAppendEntries.c b/source/libs/sync/src/syncAppendEntries.c index 7dab112a51..4638475e71 100644 --- a/source/libs/sync/src/syncAppendEntries.c +++ b/source/libs/sync/src/syncAppendEntries.c @@ -142,7 +142,6 @@ int32_t syncNodeOnAppendEntries(SSyncNode* ths, SyncAppendEntries* pMsg) { // pReply->matchIndex = ths->pLogStore->syncLogLastIndex(ths->pLogStore); pReply->matchIndex = SYNC_INDEX_INVALID; pReply->lastSendIndex = pMsg->prevLogIndex + 1; - pReply->privateTerm = ths->pNewNodeReceiver->privateTerm; pReply->startTime = ths->startTime; if (pMsg->term < ths->pRaftStore->currentTerm) { diff --git a/source/libs/sync/src/syncAppendEntriesReply.c b/source/libs/sync/src/syncAppendEntriesReply.c index 5e6c9f1534..5d54db5b07 100644 --- a/source/libs/sync/src/syncAppendEntriesReply.c +++ b/source/libs/sync/src/syncAppendEntriesReply.c @@ -73,6 +73,7 @@ static void syncNodeStartSnapshotOnce(SSyncNode* ths, SyncIndex beginIndex, Sync int32_t code = ths->pFsm->FpSnapshotStartRead(ths->pFsm, &readerParam, &pReader); ASSERT(code == 0); +#if 0 if (pMsg->privateTerm < pSender->privateTerm) { ASSERT(pReader != NULL); snapshotSenderStart(pSender, readerParam, snapshot, pReader); @@ -82,6 +83,7 @@ static void syncNodeStartSnapshotOnce(SSyncNode* ths, SyncIndex beginIndex, Sync ths->pFsm->FpSnapshotStopRead(ths->pFsm, pReader); } } +#endif } int32_t syncNodeOnAppendEntriesReply(SSyncNode* ths, SyncAppendEntriesReply* pMsg) { diff --git a/source/libs/sync/src/syncMain.c b/source/libs/sync/src/syncMain.c index b70d6d5f09..d53cdc99f8 100644 --- a/source/libs/sync/src/syncMain.c +++ b/source/libs/sync/src/syncMain.c @@ -2355,6 +2355,7 @@ void syncNodeBecomeLeader(SSyncNode* pSyncNode, const char* debugStr) { // init peer mgr syncNodePeerStateInit(pSyncNode); +#if 0 // update sender private term SSyncSnapshotSender* pMySender = syncNodeGetSnapshotSender(pSyncNode, &(pSyncNode->myRaftId)); if (pMySender != NULL) { @@ -2365,6 +2366,7 @@ void syncNodeBecomeLeader(SSyncNode* pSyncNode, const char* debugStr) { } (pMySender->privateTerm) += 100; } +#endif // close receiver if (snapshotReceiverIsStart(pSyncNode->pNewNodeReceiver)) { @@ -3718,3 +3720,11 @@ void syncLogRecvSyncPreSnapshotReply(SSyncNode* pSyncNode, const SyncPreSnapshot pMsg->term, pMsg->snapStart, s); syncNodeEventLog(pSyncNode, logBuf); } + +void syncLogSendSyncSnapshotSend(SSyncNode* pSyncNode, const SyncSnapshotSend* pMsg, const char* s) {} + +void syncLogRecvSyncSnapshotSend(SSyncNode* pSyncNode, const SyncSnapshotSend* pMsg, const char* s) {} + +void syncLogSendSyncSnapshotRsp(SSyncNode* pSyncNode, const SyncSnapshotRsp* pMsg, const char* s) {} + +void syncLogRecvSyncSnapshotRsp(SSyncNode* pSyncNode, const SyncSnapshotRsp* pMsg, const char* s) {} diff --git a/source/libs/sync/src/syncMessage.c b/source/libs/sync/src/syncMessage.c index d070d3e744..ba81e61cb2 100644 --- a/source/libs/sync/src/syncMessage.c +++ b/source/libs/sync/src/syncMessage.c @@ -2873,8 +2873,8 @@ cJSON* syncSnapshotSend2Json(const SyncSnapshotSend* pMsg) { snprintf(u64buf, sizeof(u64buf), "%" PRIu64, pMsg->term); cJSON_AddStringToObject(pRoot, "term", u64buf); - snprintf(u64buf, sizeof(u64buf), "%" PRIu64, pMsg->privateTerm); - cJSON_AddStringToObject(pRoot, "privateTerm", u64buf); + snprintf(u64buf, sizeof(u64buf), "%" PRId64, pMsg->startTime); + cJSON_AddStringToObject(pRoot, "startTime", u64buf); snprintf(u64buf, sizeof(u64buf), "%" PRId64, pMsg->beginIndex); cJSON_AddStringToObject(pRoot, "beginIndex", u64buf); @@ -3048,8 +3048,8 @@ cJSON* syncSnapshotRsp2Json(const SyncSnapshotRsp* pMsg) { snprintf(u64buf, sizeof(u64buf), "%" PRIu64, pMsg->term); cJSON_AddStringToObject(pRoot, "term", u64buf); - snprintf(u64buf, sizeof(u64buf), "%" PRIu64, pMsg->privateTerm); - cJSON_AddStringToObject(pRoot, "privateTerm", u64buf); + snprintf(u64buf, sizeof(u64buf), "%" PRId64, pMsg->startTime); + cJSON_AddStringToObject(pRoot, "startTime", u64buf); snprintf(u64buf, sizeof(u64buf), "%" PRId64, pMsg->lastIndex); cJSON_AddStringToObject(pRoot, "lastIndex", u64buf); @@ -3059,6 +3059,9 @@ cJSON* syncSnapshotRsp2Json(const SyncSnapshotRsp* pMsg) { cJSON_AddNumberToObject(pRoot, "ack", pMsg->ack); cJSON_AddNumberToObject(pRoot, "code", pMsg->code); + + snprintf(u64buf, sizeof(u64buf), "%" PRId64, pMsg->snapBeginIndex); + cJSON_AddStringToObject(pRoot, "snap-begin", u64buf); } cJSON* pJson = cJSON_CreateObject(); diff --git a/source/libs/sync/src/syncReplication.c b/source/libs/sync/src/syncReplication.c index af53123421..4aa8b0bc34 100644 --- a/source/libs/sync/src/syncReplication.c +++ b/source/libs/sync/src/syncReplication.c @@ -62,8 +62,8 @@ int32_t syncNodeReplicateOne(SSyncNode* pSyncNode, SRaftId* pDestId) { syncNodeEventLog(pSyncNode, logBuf); // start snapshot - int32_t code = syncNodeStartSnapshot(pSyncNode, pDestId); - ASSERT(code == 0); + // int32_t code = syncNodeStartSnapshot(pSyncNode, pDestId); + return 0; } diff --git a/source/libs/sync/src/syncSnapshot.c b/source/libs/sync/src/syncSnapshot.c index 6706e2f213..b728315746 100644 --- a/source/libs/sync/src/syncSnapshot.c +++ b/source/libs/sync/src/syncSnapshot.c @@ -41,8 +41,6 @@ SSyncSnapshotSender *snapshotSenderCreate(SSyncNode *pSyncNode, int32_t replicaI } memset(pSender, 0, sizeof(*pSender)); - int64_t timeNow = taosGetTimestampMs(); - pSender->start = false; pSender->seq = SYNC_SNAPSHOT_SEQ_INVALID; pSender->ack = SYNC_SNAPSHOT_SEQ_INVALID; @@ -53,8 +51,7 @@ SSyncSnapshotSender *snapshotSenderCreate(SSyncNode *pSyncNode, int32_t replicaI pSender->pSyncNode = pSyncNode; pSender->replicaIndex = replicaIndex; pSender->term = pSyncNode->pRaftStore->currentTerm; - pSender->privateTerm = timeNow + 100; - pSender->startTime = timeNow; + pSender->startTime = 0; pSender->pSyncNode->pFsm->FpGetSnapshotInfo(pSender->pSyncNode->pFsm, &(pSender->snapshot)); pSender->finish = false; } else { @@ -88,88 +85,30 @@ void snapshotSenderDestroy(SSyncSnapshotSender *pSender) { bool snapshotSenderIsStart(SSyncSnapshotSender *pSender) { return pSender->start; } -// begin send snapshot by param, snapshot, pReader -// -// action: -// 1. assert reader not start -// 2. update state -// 3. send first snapshot block -int32_t snapshotSenderStart(SSyncSnapshotSender *pSender, SSnapshotParam snapshotParam, SSnapshot snapshot, - void *pReader) { +int32_t snapshotSenderStart(SSyncSnapshotSender *pSender) { ASSERT(!snapshotSenderIsStart(pSender)); - // init snapshot, parm, reader - ASSERT(pSender->pReader == NULL); - pSender->pReader = pReader; - pSender->snapshot = snapshot; - pSender->snapshotParam = snapshotParam; - - // init current block - if (pSender->pCurrentBlock != NULL) { - taosMemoryFree(pSender->pCurrentBlock); - } - pSender->blockLen = 0; - - // update term - pSender->term = pSender->pSyncNode->pRaftStore->currentTerm; - ++(pSender->privateTerm); // increase private term - - // update state - pSender->finish = false; pSender->start = true; pSender->seq = SYNC_SNAPSHOT_SEQ_BEGIN; pSender->ack = SYNC_SNAPSHOT_SEQ_INVALID; + pSender->pReader = NULL; + pSender->pCurrentBlock = NULL; + pSender->blockLen = 0; - // init last config - if (pSender->snapshot.lastConfigIndex != SYNC_INDEX_INVALID) { - int32_t code = 0; - SSyncRaftEntry *pEntry = NULL; - bool getLastConfig = false; + pSender->snapshotParam.start = SYNC_INDEX_INVALID; + pSender->snapshotParam.end = SYNC_INDEX_INVALID; - code = pSender->pSyncNode->pLogStore->syncLogGetEntry(pSender->pSyncNode->pLogStore, - pSender->snapshot.lastConfigIndex, &pEntry); - if (code == 0 && pEntry != NULL) { - SRpcMsg rpcMsg; - syncEntry2OriginalRpc(pEntry, &rpcMsg); + pSender->snapshot.data = NULL; + pSender->snapshotParam.end = SYNC_INDEX_INVALID; + pSender->snapshot.lastApplyIndex = SYNC_INDEX_INVALID; + pSender->snapshot.lastApplyTerm = SYNC_TERM_INVALID; + pSender->snapshot.lastConfigIndex = SYNC_INDEX_INVALID; - SSyncCfg lastConfig; - int32_t ret = syncCfgFromStr(rpcMsg.pCont, &lastConfig); - ASSERT(ret == 0); - pSender->lastConfig = lastConfig; - getLastConfig = true; - - rpcFreeCont(rpcMsg.pCont); - syncEntryDestory(pEntry); - } else { - if (pSender->snapshot.lastConfigIndex == pSender->pSyncNode->pRaftCfg->lastConfigIndex) { - sTrace("vgId:%d, sync sender get cfg from local", pSender->pSyncNode->vgId); - pSender->lastConfig = pSender->pSyncNode->pRaftCfg->cfg; - getLastConfig = true; - } - } - - // last config not found in wal, update to -1 - if (!getLastConfig) { - SyncIndex oldLastConfigIndex = pSender->snapshot.lastConfigIndex; - SyncIndex newLastConfigIndex = SYNC_INDEX_INVALID; - pSender->snapshot.lastConfigIndex = SYNC_INDEX_INVALID; - memset(&(pSender->lastConfig), 0, sizeof(SSyncCfg)); - - // event log - do { - char logBuf[128]; - snprintf(logBuf, sizeof(logBuf), "snapshot sender update lcindex from %" PRId64 " to %" PRId64, - oldLastConfigIndex, newLastConfigIndex); - char *eventLog = snapshotSender2SimpleStr(pSender, logBuf); - syncNodeEventLog(pSender->pSyncNode, eventLog); - taosMemoryFree(eventLog); - } while (0); - } - - } else { - // no last config - memset(&(pSender->lastConfig), 0, sizeof(SSyncCfg)); - } + memset(&(pSender->lastConfig), 0, sizeof(pSender->lastConfig)); + pSender->sendingMS = 0; + pSender->term = pSender->pSyncNode->pRaftStore->currentTerm; + pSender->startTime = taosGetTimestampMs(); + pSender->finish = false; // build begin msg SyncSnapshotSend *pMsg = syncSnapshotSendBuild(0, pSender->pSyncNode->vgId); @@ -181,8 +120,8 @@ int32_t snapshotSenderStart(SSyncSnapshotSender *pSender, SSnapshotParam snapsho pMsg->lastTerm = pSender->snapshot.lastApplyTerm; pMsg->lastConfigIndex = pSender->snapshot.lastConfigIndex; pMsg->lastConfig = pSender->lastConfig; - pMsg->seq = pSender->seq; // SYNC_SNAPSHOT_SEQ_BEGIN - pMsg->privateTerm = pSender->privateTerm; + pMsg->startTime = pSender->startTime; + pMsg->seq = SYNC_SNAPSHOT_SEQ_PRE_SNAPSHOT; // send msg SRpcMsg rpcMsg; @@ -201,6 +140,10 @@ int32_t snapshotSenderStart(SSyncSnapshotSender *pSender, SSnapshotParam snapsho } int32_t snapshotSenderStop(SSyncSnapshotSender *pSender, bool finish) { + // update flag + pSender->start = false; + pSender->finish = finish; + // close reader if (pSender->pReader != NULL) { int32_t ret = pSender->pSyncNode->pFsm->FpSnapshotStopRead(pSender->pSyncNode->pFsm, pSender->pReader); @@ -215,12 +158,6 @@ int32_t snapshotSenderStop(SSyncSnapshotSender *pSender, bool finish) { pSender->blockLen = 0; } - // update flag - pSender->start = false; - pSender->finish = finish; - - // do not update term, maybe print - // event log do { char *eventLog = snapshotSender2SimpleStr(pSender, "snapshot sender stop"); @@ -263,7 +200,9 @@ int32_t snapshotSend(SSyncSnapshotSender *pSender) { pMsg->lastConfigIndex = pSender->snapshot.lastConfigIndex; pMsg->lastConfig = pSender->lastConfig; pMsg->seq = pSender->seq; - pMsg->privateTerm = pSender->privateTerm; + + // pMsg->privateTerm = pSender->privateTerm; + memcpy(pMsg->data, pSender->pCurrentBlock, pSender->blockLen); // send msg @@ -302,7 +241,9 @@ int32_t snapshotReSend(SSyncSnapshotSender *pSender) { pMsg->lastConfigIndex = pSender->snapshot.lastConfigIndex; pMsg->lastConfig = pSender->lastConfig; pMsg->seq = pSender->seq; - pMsg->privateTerm = pSender->privateTerm; + + // pMsg->privateTerm = pSender->privateTerm; + memcpy(pMsg->data, pSender->pCurrentBlock, pSender->blockLen); // send msg @@ -367,8 +308,10 @@ cJSON *snapshotSender2Json(SSyncSnapshotSender *pSender) { cJSON_AddNumberToObject(pRoot, "replicaIndex", pSender->replicaIndex); snprintf(u64buf, sizeof(u64buf), "%" PRIu64, pSender->term); cJSON_AddStringToObject(pRoot, "term", u64buf); - snprintf(u64buf, sizeof(u64buf), "%" PRIu64, pSender->privateTerm); - cJSON_AddStringToObject(pRoot, "privateTerm", u64buf); + + // snprintf(u64buf, sizeof(u64buf), "%" PRIu64, pSender->privateTerm); + // cJSON_AddStringToObject(pRoot, "privateTerm", u64buf); + cJSON_AddNumberToObject(pRoot, "finish", pSender->finish); } @@ -395,30 +338,38 @@ char *snapshotSender2SimpleStr(SSyncSnapshotSender *pSender, char *event) { snprintf(s, len, "%s {%p s-param:%" PRId64 " e-param:%" PRId64 " laindex:%" PRId64 " laterm:%" PRIu64 " lcindex:%" PRId64 - " seq:%d ack:%d finish:%d pterm:%" PRIu64 - " " - "replica-index:%d %s:%d}", + " seq:%d ack:%d finish:%d replica-index:%d %s:%d}", event, pSender, pSender->snapshotParam.start, pSender->snapshotParam.end, pSender->snapshot.lastApplyIndex, pSender->snapshot.lastApplyTerm, pSender->snapshot.lastConfigIndex, pSender->seq, pSender->ack, - pSender->finish, pSender->privateTerm, pSender->replicaIndex, host, port); + pSender->finish, pSender->replicaIndex, host, port); return s; } int32_t syncNodeStartSnapshot(SSyncNode *pSyncNode, SRaftId *pDestId) { - // calculate index - - syncNodeEventLog(pSyncNode, "start snapshot ..."); + syncNodeEventLog(pSyncNode, "starting snapshot ..."); SSyncSnapshotSender *pSender = syncNodeGetSnapshotSender(pSyncNode, pDestId); if (pSender == NULL) { - // create sender - } else { - // if is same - // return 0; + syncNodeErrorLog(pSyncNode, "start snapshot error, sender is null"); + return -1; } - // send begin msg + int32_t code = 0; + + if (snapshotSenderIsStart(pSender)) { + code = snapshotSenderStop(pSender, false); + if (code != 0) { + syncNodeErrorLog(pSyncNode, "snapshot sender stop error"); + return -1; + } + } + + code = snapshotSenderStart(pSender); + if (code != 0) { + syncNodeErrorLog(pSyncNode, "snapshot sender start error"); + return -1; + } return 0; } @@ -440,7 +391,6 @@ SSyncSnapshotReceiver *snapshotReceiverCreate(SSyncNode *pSyncNode, SRaftId from pReceiver->pSyncNode = pSyncNode; pReceiver->fromId = fromId; pReceiver->term = pSyncNode->pRaftStore->currentTerm; - pReceiver->privateTerm = 0; pReceiver->snapshot.data = NULL; pReceiver->snapshot.lastApplyIndex = SYNC_INDEX_INVALID; pReceiver->snapshot.lastApplyTerm = 0; @@ -474,19 +424,8 @@ bool snapshotReceiverIsStart(SSyncSnapshotReceiver *pReceiver) { return pReceive // receive first snapshot data // write first block data static void snapshotReceiverDoStart(SSyncSnapshotReceiver *pReceiver, SyncSnapshotSend *pBeginMsg) { - // update state - pReceiver->term = pReceiver->pSyncNode->pRaftStore->currentTerm; - pReceiver->privateTerm = pBeginMsg->privateTerm; - pReceiver->ack = SYNC_SNAPSHOT_SEQ_BEGIN; - pReceiver->fromId = pBeginMsg->srcId; pReceiver->start = true; - - // update snapshot - pReceiver->snapshot.lastApplyIndex = pBeginMsg->lastIndex; - pReceiver->snapshot.lastApplyTerm = pBeginMsg->lastTerm; - pReceiver->snapshot.lastConfigIndex = pBeginMsg->lastConfigIndex; - pReceiver->snapshotParam.start = pBeginMsg->beginIndex; - pReceiver->snapshotParam.end = pBeginMsg->lastIndex; + pReceiver->ack = SYNC_SNAPSHOT_SEQ_BEGIN; // start writer ASSERT(pReceiver->pWriter == NULL); @@ -494,6 +433,19 @@ static void snapshotReceiverDoStart(SSyncSnapshotReceiver *pReceiver, SyncSnapsh &(pReceiver->snapshotParam), &(pReceiver->pWriter)); ASSERT(ret == 0); + pReceiver->term = pReceiver->pSyncNode->pRaftStore->currentTerm; + pReceiver->snapshotParam.start = pBeginMsg->beginIndex; + pReceiver->snapshotParam.end = pBeginMsg->lastIndex; + + pReceiver->fromId = pBeginMsg->srcId; + + // update snapshot + pReceiver->snapshot.lastApplyIndex = pBeginMsg->lastIndex; + pReceiver->snapshot.lastApplyTerm = pBeginMsg->lastTerm; + pReceiver->snapshot.lastConfigIndex = pBeginMsg->lastConfigIndex; + + pReceiver->startTime = pBeginMsg->startTime; + // event log do { char *eventLog = snapshotReceiver2SimpleStr(pReceiver, "snapshot receiver start"); @@ -523,22 +475,9 @@ void snapshotReceiverForceStop(SSyncSnapshotReceiver *pReceiver) { } // if receiver receive msg from seq = SYNC_SNAPSHOT_SEQ_BEGIN, start receiver -// if already start, force close, start again int32_t snapshotReceiverStart(SSyncSnapshotReceiver *pReceiver, SyncSnapshotSend *pBeginMsg) { - if (!snapshotReceiverIsStart(pReceiver)) { - // first start - snapshotReceiverDoStart(pReceiver, pBeginMsg); - - } else { - // already start - sInfo("vgId:%d, snapshot recv, receiver already start", pReceiver->pSyncNode->vgId); - - // force close, abandon incomplete data - snapshotReceiverForceStop(pReceiver); - - // start again - snapshotReceiverDoStart(pReceiver, pBeginMsg); - } + ASSERT(!snapshotReceiverIsStart(pReceiver)); + snapshotReceiverDoStart(pReceiver, pBeginMsg); return 0; } @@ -698,8 +637,8 @@ cJSON *snapshotReceiver2Json(SSyncSnapshotReceiver *pReceiver) { snprintf(u64buf, sizeof(u64buf), "%" PRIu64, pReceiver->term); cJSON_AddStringToObject(pRoot, "term", u64buf); - snprintf(u64buf, sizeof(u64buf), "%" PRIu64, pReceiver->privateTerm); - cJSON_AddStringToObject(pRoot, "privateTerm", u64buf); + snprintf(u64buf, sizeof(u64buf), "%" PRId64, pReceiver->startTime); + cJSON_AddStringToObject(pRoot, "startTime", u64buf); } cJSON *pJson = cJSON_CreateObject(); @@ -724,38 +663,204 @@ char *snapshotReceiver2SimpleStr(SSyncSnapshotReceiver *pReceiver, char *event) syncUtilU642Addr(fromId.addr, host, sizeof(host), &port); snprintf(s, len, - "%s {%p start:%d ack:%d term:%" PRIu64 " pterm:%" PRIu64 " from:%s:%d s-param:%" PRId64 " e-param:%" PRId64 - " laindex:%" PRId64 " laterm:%" PRIu64 + "%s {%p start:%d ack:%d term:%" PRIu64 " start-time:%" PRId64 " from:%s:%d s-param:%" PRId64 + " e-param:%" PRId64 " laindex:%" PRId64 " laterm:%" PRIu64 " " "lcindex:%" PRId64 "}", - event, pReceiver, pReceiver->start, pReceiver->ack, pReceiver->term, pReceiver->privateTerm, host, port, + event, pReceiver, pReceiver->start, pReceiver->ack, pReceiver->term, pReceiver->startTime, host, port, pReceiver->snapshotParam.start, pReceiver->snapshotParam.end, pReceiver->snapshot.lastApplyIndex, pReceiver->snapshot.lastApplyTerm, pReceiver->snapshot.lastConfigIndex); return s; } +SyncIndex syncNodeGetSnapBeginIndex(SSyncNode *ths) { + SyncIndex snapStart = SYNC_INDEX_INVALID; + + if (syncNodeIsMnode(ths)) { + snapStart = SYNC_INDEX_BEGIN; + + } else { + SSyncLogStoreData *pData = ths->pLogStore->data; + SWal *pWal = pData->pWal; + + bool isEmpty = ths->pLogStore->syncLogIsEmpty(ths->pLogStore); + int64_t walCommitVer = walGetCommittedVer(pWal); + + if (!isEmpty && ths->commitIndex != walCommitVer) { + char logBuf[128]; + snprintf(logBuf, sizeof(logBuf), "commit not same, wal-commit:%" PRId64 ", commit:%" PRId64 ", ignore", + walCommitVer, ths->commitIndex); + syncNodeErrorLog(ths, logBuf); + + snapStart = walCommitVer + 1; + } else { + snapStart = ths->commitIndex + 1; + } + } + + return snapStart; +} + +static int32_t syncNodeOnSnapshotPre(SSyncNode *pSyncNode, SyncSnapshotSend *pMsg) { + SSyncSnapshotReceiver *pReceiver = pSyncNode->pNewNodeReceiver; + + if (snapshotReceiverIsStart(pReceiver)) { + // already start + + if (pMsg->startTime > pReceiver->startTime) { + goto _START_RECEIVER; + + } else if (pMsg->startTime == pReceiver->startTime) { + goto _SEND_REPLY; + + } else { + // ignore + return 0; + } + + } else { + // start new + goto _START_RECEIVER; + } + +_START_RECEIVER: + if (taosGetTimestampMs() - pMsg->startTime > SNAPSHOT_MAX_CLOCK_SKEW_MS) { + syncNodeErrorLog(pSyncNode, "snapshot receiver time skew too much"); + return -1; + } else { + // waiting for clock match + while (taosGetTimestampMs() > pMsg->startTime) { + taosMsleep(10); + } + + snapshotReceiverStart(pReceiver, pMsg); // set start-time same with sender + } + +_SEND_REPLY: + // build msg + ; // make complier happy + SyncSnapshotRsp *pRspMsg = syncSnapshotRspBuild(pSyncNode->vgId); + pRspMsg->srcId = pSyncNode->myRaftId; + pRspMsg->destId = pMsg->srcId; + pRspMsg->term = pSyncNode->pRaftStore->currentTerm; + pRspMsg->lastIndex = pMsg->lastIndex; + pRspMsg->lastTerm = pMsg->lastTerm; + pRspMsg->startTime = pReceiver->startTime; + pRspMsg->ack = pMsg->seq; // receiver maybe already closed + pRspMsg->code = 0; + pRspMsg->snapBeginIndex = syncNodeGetSnapBeginIndex(pSyncNode); + + // send msg + SRpcMsg rpcMsg; + syncSnapshotRsp2RpcMsg(pRspMsg, &rpcMsg); + syncNodeSendMsgById(&(pRspMsg->destId), pSyncNode, &rpcMsg); + syncSnapshotRspDestroy(pRspMsg); + + return 0; +} + +static int32_t syncNodeOnSnapshotBegin(SSyncNode *pSyncNode, SyncSnapshotSend *pMsg) { + // condition 1 + SSyncSnapshotReceiver *pReceiver = pSyncNode->pNewNodeReceiver; + + if (snapshotReceiverIsStart(pReceiver)) { + if (pMsg->startTime > pReceiver->startTime) { + snapshotReceiverStop(pReceiver); + + } else if (pMsg->startTime == pReceiver->startTime) { + return 0; + } else { + // ignore + syncNodeEventLog(pSyncNode, "msg ignore"); + return 0; + } + } + +_START_RECEIVER: + if (taosGetTimestampMs() - pMsg->startTime > SNAPSHOT_MAX_CLOCK_SKEW_MS) { + syncNodeErrorLog(pSyncNode, "snapshot receiver time skew too much"); + return -1; + } else { + // waiting for clock match + while (taosGetTimestampMs() > pMsg->startTime) { + taosMsleep(10); + } + + snapshotReceiverStart(pReceiver, pMsg); + + // build msg + SyncSnapshotRsp *pRspMsg = syncSnapshotRspBuild(pSyncNode->vgId); + pRspMsg->srcId = pSyncNode->myRaftId; + pRspMsg->destId = pMsg->srcId; + pRspMsg->term = pSyncNode->pRaftStore->currentTerm; + pRspMsg->lastIndex = pMsg->lastIndex; + pRspMsg->lastTerm = pMsg->lastTerm; + pRspMsg->ack = pReceiver->ack; // receiver maybe already closed + pRspMsg->code = 0; + + // send msg + SRpcMsg rpcMsg; + syncSnapshotRsp2RpcMsg(pRspMsg, &rpcMsg); + syncNodeSendMsgById(&(pRspMsg->destId), pSyncNode, &rpcMsg); + syncSnapshotRspDestroy(pRspMsg); + } + + return 0; +} + +static int32_t syncNodeOnSnapshotTransfer(SSyncNode *pSyncNode, SyncSnapshotSend *pMsg) { return 0; } + +static int32_t syncNodeOnSnapshotEnd(SSyncNode *pSyncNode, SyncSnapshotSend *pMsg) { return 0; } + // receiver on message // -// condition 1, recv SYNC_SNAPSHOT_SEQ_BEGIN, start receiver, update privateTerm -// condition 2, recv SYNC_SNAPSHOT_SEQ_END, finish receiver(apply snapshot data, update commit index, maybe reconfig) -// condition 3, recv SYNC_SNAPSHOT_SEQ_FORCE_CLOSE, force close -// condition 4, got data, update ack +// condition 1, recv SYNC_SNAPSHOT_SEQ_PRE_SNAPSHOT +// if receiver already start +// if sender.start-time > receiver.start-time, restart receiver(reply snapshot start) +// if sender.start-time = receiver.start-time, maybe duplicate msg +// if sender.start-time < receiver.start-time, ignore +// else +// waiting for clock match +// start receiver(reply snapshot start) +// +// condition 2, recv SYNC_SNAPSHOT_SEQ_BEGIN +// a. create writer with +// +// condition 3, recv SYNC_SNAPSHOT_SEQ_END, finish receiver(apply snapshot data, update commit index, maybe reconfig) +// +// condition 4, recv SYNC_SNAPSHOT_SEQ_FORCE_CLOSE, force close +// +// condition 5, got data, update ack // int32_t syncNodeOnSnapshot(SSyncNode *pSyncNode, SyncSnapshotSend *pMsg) { - // get receiver - SSyncSnapshotReceiver *pReceiver = pSyncNode->pNewNodeReceiver; - bool needRsp = false; + // if already drop replica, do not process + if (!syncNodeInRaftGroup(pSyncNode, &(pMsg->srcId))) { + syncLogRecvSyncSnapshotSend(pSyncNode, pMsg, "not in my config"); + return 0; + } + + if (pMsg->term < pSyncNode->pRaftStore->currentTerm) { + syncLogRecvSyncSnapshotSend(pSyncNode, pMsg, "reject, small term"); + return 0; + } + + if (pMsg->term > pSyncNode->pRaftStore->currentTerm) { + syncNodeStepDown(pSyncNode, pMsg->term); + } + syncNodeResetElectTimer(pSyncNode); + int32_t code = 0; + SSyncSnapshotReceiver *pReceiver = pSyncNode->pNewNodeReceiver; // state, term, seq/ack if (pSyncNode->state == TAOS_SYNC_STATE_FOLLOWER) { if (pMsg->term == pSyncNode->pRaftStore->currentTerm) { - if (pMsg->seq == SYNC_SNAPSHOT_SEQ_BEGIN) { - // condition 1 - // begin, no data - snapshotReceiverStart(pReceiver, pMsg); - needRsp = true; + if (pMsg->seq == SYNC_SNAPSHOT_SEQ_PRE_SNAPSHOT) { + syncNodeOnSnapshotPre(pSyncNode, pMsg); + + } else if (pMsg->seq == SYNC_SNAPSHOT_SEQ_BEGIN) { + syncNodeOnSnapshotBegin(pSyncNode, pMsg); } else if (pMsg->seq == SYNC_SNAPSHOT_SEQ_END) { // condition 2 @@ -764,7 +869,7 @@ int32_t syncNodeOnSnapshot(SSyncNode *pSyncNode, SyncSnapshotSend *pMsg) { if (code == 0) { snapshotReceiverStop(pReceiver); } - needRsp = true; + bool needRsp = true; // maybe update lastconfig if (pMsg->lastConfigIndex >= SYNC_INDEX_BEGIN) { @@ -782,7 +887,7 @@ int32_t syncNodeOnSnapshot(SSyncNode *pSyncNode, SyncSnapshotSend *pMsg) { // condition 3 // force close snapshotReceiverForceStop(pReceiver); - needRsp = false; + bool needRsp = false; } else if (pMsg->seq > SYNC_SNAPSHOT_SEQ_BEGIN && pMsg->seq < SYNC_SNAPSHOT_SEQ_END) { // condition 4 @@ -790,7 +895,7 @@ int32_t syncNodeOnSnapshot(SSyncNode *pSyncNode, SyncSnapshotSend *pMsg) { if (pMsg->seq == pReceiver->ack + 1) { snapshotReceiverGotData(pReceiver, pMsg); } - needRsp = true; + bool needRsp = true; } else { // error log @@ -805,26 +910,6 @@ int32_t syncNodeOnSnapshot(SSyncNode *pSyncNode, SyncSnapshotSend *pMsg) { return -1; } - // send ack - if (needRsp) { - // build msg - SyncSnapshotRsp *pRspMsg = syncSnapshotRspBuild(pSyncNode->vgId); - pRspMsg->srcId = pSyncNode->myRaftId; - pRspMsg->destId = pMsg->srcId; - pRspMsg->term = pSyncNode->pRaftStore->currentTerm; - pRspMsg->lastIndex = pMsg->lastIndex; - pRspMsg->lastTerm = pMsg->lastTerm; - pRspMsg->ack = pReceiver->ack; // receiver maybe already closed - pRspMsg->code = 0; - pRspMsg->privateTerm = pReceiver->privateTerm; // receiver maybe already closed - - // send msg - SRpcMsg rpcMsg; - syncSnapshotRsp2RpcMsg(pRspMsg, &rpcMsg); - syncNodeSendMsgById(&(pRspMsg->destId), pSyncNode, &rpcMsg); - syncSnapshotRspDestroy(pRspMsg); - } - } else { // error log do { @@ -849,6 +934,52 @@ int32_t syncNodeOnSnapshot(SSyncNode *pSyncNode, SyncSnapshotSend *pMsg) { return 0; } +int32_t syncNodeOnSnapshotReplyPre(SSyncNode *pSyncNode, SyncSnapshotRsp *pMsg) { + // get sender + SSyncSnapshotSender *pSender = syncNodeGetSnapshotSender(pSyncNode, &(pMsg->srcId)); + ASSERT(pSender != NULL); + + SSnapshot snapshot; + pSyncNode->pFsm->FpGetSnapshotInfo(pSyncNode->pFsm, &snapshot); + + // prepare + pSender->snapshotParam.start = pMsg->snapBeginIndex; + pSender->snapshotParam.end = snapshot.lastApplyIndex; + + if (pMsg->snapBeginIndex > snapshot.lastApplyIndex) { + syncNodeErrorLog(pSyncNode, "snapshot last index too small"); + return -1; + } + + // start reader + int32_t code = pSyncNode->pFsm->FpSnapshotStartRead(pSyncNode->pFsm, &(pSender->snapshotParam), &(pSender->pReader)); + if (code != 0) { + syncNodeErrorLog(pSyncNode, "create snapshot reader error"); + return -1; + } + + // build begin msg + SyncSnapshotSend *pSendMsg = syncSnapshotSendBuild(0, pSender->pSyncNode->vgId); + pSendMsg->srcId = pSender->pSyncNode->myRaftId; + pSendMsg->destId = (pSender->pSyncNode->replicasId)[pSender->replicaIndex]; + pSendMsg->term = pSender->pSyncNode->pRaftStore->currentTerm; + pSendMsg->beginIndex = pSender->snapshotParam.start; + pSendMsg->lastIndex = pSender->snapshot.lastApplyIndex; + pSendMsg->lastTerm = pSender->snapshot.lastApplyTerm; + pSendMsg->lastConfigIndex = pSender->snapshot.lastConfigIndex; + pSendMsg->lastConfig = pSender->lastConfig; + pSendMsg->startTime = pSender->startTime; + pSendMsg->seq = SYNC_SNAPSHOT_SEQ_BEGIN; + + // send msg + SRpcMsg rpcMsg; + syncSnapshotSend2RpcMsg(pSendMsg, &rpcMsg); + syncNodeSendMsgById(&(pSendMsg->destId), pSender->pSyncNode, &rpcMsg); + syncSnapshotSendDestroy(pSendMsg); + + return 0; +} + // sender on message // // condition 1 sender receives SYNC_SNAPSHOT_SEQ_END, close sender @@ -857,8 +988,8 @@ int32_t syncNodeOnSnapshot(SSyncNode *pSyncNode, SyncSnapshotSend *pMsg) { // int32_t syncNodeOnSnapshotReply(SSyncNode *pSyncNode, SyncSnapshotRsp *pMsg) { // if already drop replica, do not process - if (!syncNodeInRaftGroup(pSyncNode, &(pMsg->srcId)) && pSyncNode->state == TAOS_SYNC_STATE_LEADER) { - sError("vgId:%d, recv sync-snapshot-rsp, maybe replica already dropped", pSyncNode->vgId); + if (!syncNodeInRaftGroup(pSyncNode, &(pMsg->srcId))) { + syncLogRecvSyncSnapshotRsp(pSyncNode, pMsg, "maybe replica already dropped"); return -1; } @@ -866,17 +997,26 @@ int32_t syncNodeOnSnapshotReply(SSyncNode *pSyncNode, SyncSnapshotRsp *pMsg) { SSyncSnapshotSender *pSender = syncNodeGetSnapshotSender(pSyncNode, &(pMsg->srcId)); ASSERT(pSender != NULL); + if (pMsg->startTime != pSender->startTime) { + syncLogRecvSyncSnapshotRsp(pSyncNode, pMsg, "sender/receiver start time not match"); + return -1; + } + // state, term, seq/ack if (pSyncNode->state == TAOS_SYNC_STATE_LEADER) { if (pMsg->term == pSyncNode->pRaftStore->currentTerm) { - // condition 1 + // prepare , send begin msg + if (pMsg->ack == SYNC_SNAPSHOT_SEQ_PRE_SNAPSHOT) { + syncNodeOnSnapshotReplyPre(pSyncNode, pMsg); + return 0; + } + // receive ack is finish, close sender if (pMsg->ack == SYNC_SNAPSHOT_SEQ_END) { snapshotSenderStop(pSender, true); return 0; } - // condition 2 // send next msg if (pMsg->ack == pSender->seq) { // update sender ack diff --git a/source/libs/sync/test/syncSnapshotReceiverTest.cpp b/source/libs/sync/test/syncSnapshotReceiverTest.cpp index 0f8e76f121..0a96c7dadc 100644 --- a/source/libs/sync/test/syncSnapshotReceiverTest.cpp +++ b/source/libs/sync/test/syncSnapshotReceiverTest.cpp @@ -50,7 +50,6 @@ SSyncSnapshotReceiver* createReceiver() { pReceiver->ack = 20; pReceiver->pWriter = (void*)0x11; pReceiver->term = 66; - pReceiver->privateTerm = 99; return pReceiver; } diff --git a/source/libs/sync/test/syncSnapshotRspTest.cpp b/source/libs/sync/test/syncSnapshotRspTest.cpp index 89348da617..63905c2182 100644 --- a/source/libs/sync/test/syncSnapshotRspTest.cpp +++ b/source/libs/sync/test/syncSnapshotRspTest.cpp @@ -21,7 +21,7 @@ SyncSnapshotRsp *createMsg() { pMsg->destId.addr = syncUtilAddr2U64("127.0.0.1", 5678); pMsg->destId.vgId = 100; pMsg->term = 11; - pMsg->privateTerm = 99; + pMsg->startTime = 99; pMsg->lastIndex = 22; pMsg->lastTerm = 33; pMsg->ack = 44; diff --git a/source/libs/sync/test/syncSnapshotSendTest.cpp b/source/libs/sync/test/syncSnapshotSendTest.cpp index 6fcfa6f6c4..83f1dfebb3 100644 --- a/source/libs/sync/test/syncSnapshotSendTest.cpp +++ b/source/libs/sync/test/syncSnapshotSendTest.cpp @@ -21,7 +21,6 @@ SyncSnapshotSend *createMsg() { pMsg->destId.addr = syncUtilAddr2U64("127.0.0.1", 5678); pMsg->destId.vgId = 100; pMsg->term = 11; - pMsg->privateTerm = 99; pMsg->lastIndex = 22; pMsg->lastTerm = 33; diff --git a/source/libs/sync/test/syncSnapshotSenderTest.cpp b/source/libs/sync/test/syncSnapshotSenderTest.cpp index 8d1f83b3b1..010757e724 100644 --- a/source/libs/sync/test/syncSnapshotSenderTest.cpp +++ b/source/libs/sync/test/syncSnapshotSenderTest.cpp @@ -55,7 +55,8 @@ SSyncSnapshotSender* createSender() { pSender->snapshot.lastApplyTerm = 88; pSender->sendingMS = 77; pSender->term = 66; - pSender->privateTerm = 99; + + //pSender->privateTerm = 99; return pSender; }