refactor(sync): refacotr sync
This commit is contained in:
commit
5965629b2e
|
@ -99,6 +99,7 @@ int32_t mndGetLoad(SMnode *pMnode, SMnodeLoad *pLoad);
|
|||
*/
|
||||
int32_t mndProcessRpcMsg(SRpcMsg *pMsg);
|
||||
int32_t mndProcessSyncMsg(SRpcMsg *pMsg);
|
||||
int32_t mndProcessSyncCtrlMsg(SRpcMsg *pMsg);
|
||||
int32_t mndPreProcessQueryMsg(SRpcMsg *pMsg);
|
||||
void mndPostProcessQueryMsg(SRpcMsg *pMsg);
|
||||
|
||||
|
|
|
@ -35,7 +35,12 @@ extern bool gRaftDetailLog;
|
|||
#define SYNC_MAX_PROGRESS_WAIT_MS 4000
|
||||
#define SYNC_MAX_START_TIME_RANGE_MS (1000 * 20)
|
||||
#define SYNC_MAX_RECV_TIME_RANGE_MS 1200
|
||||
#define SYNC_DEL_WAL_MS (1000 * 60)
|
||||
#define SYNC_ADD_QUORUM_COUNT 3
|
||||
#define SYNC_MNODE_LOG_RETENTION 10000
|
||||
#define SYNC_VNODE_LOG_RETENTION 500
|
||||
|
||||
#define SYNC_APPEND_ENTRIES_TIMEOUT_MS 10000
|
||||
|
||||
#define SYNC_MAX_BATCH_SIZE 1
|
||||
#define SYNC_INDEX_BEGIN 0
|
||||
|
@ -157,32 +162,15 @@ typedef struct SSyncLogStore {
|
|||
SLRUCache* pCache;
|
||||
void* data;
|
||||
|
||||
// append one log entry
|
||||
int32_t (*appendEntry)(struct SSyncLogStore* pLogStore, SSyncRaftEntry* pEntry);
|
||||
|
||||
// get one log entry, user need to free pEntry->pCont
|
||||
SSyncRaftEntry* (*getEntry)(struct SSyncLogStore* pLogStore, SyncIndex index);
|
||||
|
||||
// truncate log with index, entries after the given index (>=index) will be deleted
|
||||
int32_t (*truncate)(struct SSyncLogStore* pLogStore, SyncIndex fromIndex);
|
||||
|
||||
// return index of last entry
|
||||
SyncIndex (*getLastIndex)(struct SSyncLogStore* pLogStore);
|
||||
|
||||
// return term of last entry
|
||||
SyncTerm (*getLastTerm)(struct SSyncLogStore* pLogStore);
|
||||
|
||||
// update log store commit index with "index"
|
||||
int32_t (*updateCommitIndex)(struct SSyncLogStore* pLogStore, SyncIndex index);
|
||||
|
||||
// return commit index of log
|
||||
SyncIndex (*getCommitIndex)(struct SSyncLogStore* pLogStore);
|
||||
int32_t (*syncLogUpdateCommitIndex)(struct SSyncLogStore* pLogStore, SyncIndex index);
|
||||
SyncIndex (*syncLogCommitIndex)(struct SSyncLogStore* pLogStore);
|
||||
|
||||
SyncIndex (*syncLogBeginIndex)(struct SSyncLogStore* pLogStore);
|
||||
SyncIndex (*syncLogEndIndex)(struct SSyncLogStore* pLogStore);
|
||||
bool (*syncLogIsEmpty)(struct SSyncLogStore* pLogStore);
|
||||
|
||||
int32_t (*syncLogEntryCount)(struct SSyncLogStore* pLogStore);
|
||||
int32_t (*syncLogRestoreFromSnapshot)(struct SSyncLogStore* pLogStore, SyncIndex index);
|
||||
bool (*syncLogIsEmpty)(struct SSyncLogStore* pLogStore);
|
||||
bool (*syncLogExist)(struct SSyncLogStore* pLogStore, SyncIndex index);
|
||||
|
||||
SyncIndex (*syncLogWriteIndex)(struct SSyncLogStore* pLogStore);
|
||||
|
@ -207,6 +195,7 @@ typedef struct SSyncInfo {
|
|||
SMsgCb* msgcb;
|
||||
int32_t (*FpSendMsg)(const SEpSet* pEpSet, SRpcMsg* pMsg);
|
||||
int32_t (*FpEqMsg)(const SMsgCb* msgcb, SRpcMsg* pMsg);
|
||||
int32_t (*FpEqCtrlMsg)(const SMsgCb* msgcb, SRpcMsg* pMsg);
|
||||
} SSyncInfo;
|
||||
|
||||
int32_t syncInit();
|
||||
|
@ -217,7 +206,6 @@ void syncStop(int64_t rid);
|
|||
int32_t syncSetStandby(int64_t rid);
|
||||
ESyncState syncGetMyRole(int64_t rid);
|
||||
bool syncIsReady(int64_t rid);
|
||||
bool syncIsReadyForRead(int64_t rid);
|
||||
const char* syncGetMyRoleStr(int64_t rid);
|
||||
bool syncRestoreFinish(int64_t rid);
|
||||
SyncTerm syncGetMyTerm(int64_t rid);
|
||||
|
@ -227,13 +215,13 @@ SyncGroupId syncGetVgId(int64_t rid);
|
|||
void syncGetEpSet(int64_t rid, SEpSet* pEpSet);
|
||||
void syncGetRetryEpSet(int64_t rid, SEpSet* pEpSet);
|
||||
int32_t syncPropose(int64_t rid, SRpcMsg* pMsg, bool isWeak);
|
||||
int32_t syncProposeBatch(int64_t rid, SRpcMsg** pMsgPArr, bool* pIsWeakArr, int32_t arrSize);
|
||||
// int32_t syncProposeBatch(int64_t rid, SRpcMsg** pMsgPArr, bool* pIsWeakArr, int32_t arrSize);
|
||||
bool syncEnvIsStart();
|
||||
const char* syncStr(ESyncState state);
|
||||
bool syncIsRestoreFinish(int64_t rid);
|
||||
int32_t syncGetSnapshotByIndex(int64_t rid, SyncIndex index, SSnapshot* pSnapshot);
|
||||
|
||||
int32_t syncReconfig(int64_t rid, SSyncCfg* pNewCfg);
|
||||
int32_t syncReconfig(int64_t rid, const SSyncCfg* pNewCfg);
|
||||
|
||||
// build SRpcMsg, need to call syncPropose with SRpcMsg
|
||||
int32_t syncReconfigBuild(int64_t rid, const SSyncCfg* pNewCfg, SRpcMsg* pRpcMsg);
|
||||
|
@ -241,6 +229,9 @@ int32_t syncReconfigBuild(int64_t rid, const SSyncCfg* pNewCfg, SRpcMsg* pRpcMsg
|
|||
int32_t syncLeaderTransfer(int64_t rid);
|
||||
int32_t syncLeaderTransferTo(int64_t rid, SNodeInfo newLeader);
|
||||
|
||||
int32_t syncBeginSnapshot(int64_t rid, int64_t lastApplyIndex);
|
||||
int32_t syncEndSnapshot(int64_t rid);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -157,6 +157,8 @@ typedef enum ESyncTimeoutType {
|
|||
SYNC_TIMEOUT_HEARTBEAT,
|
||||
} ESyncTimeoutType;
|
||||
|
||||
const char* syncTimerTypeStr(enum ESyncTimeoutType timerType);
|
||||
|
||||
typedef struct SyncTimeout {
|
||||
uint32_t bytes;
|
||||
int32_t vgId;
|
||||
|
@ -423,6 +425,7 @@ typedef struct SyncAppendEntriesReply {
|
|||
SyncTerm privateTerm;
|
||||
bool success;
|
||||
SyncIndex matchIndex;
|
||||
SyncIndex lastSendIndex;
|
||||
int64_t startTime;
|
||||
} SyncAppendEntriesReply;
|
||||
|
||||
|
@ -456,6 +459,8 @@ typedef struct SyncHeartbeat {
|
|||
SyncTerm term;
|
||||
SyncIndex commitIndex;
|
||||
SyncTerm privateTerm;
|
||||
SyncTerm minMatchIndex;
|
||||
|
||||
} SyncHeartbeat;
|
||||
|
||||
SyncHeartbeat* syncHeartbeatBuild(int32_t vgId);
|
||||
|
@ -676,24 +681,17 @@ void syncReconfigFinishLog2(char* s, const SyncReconfigFinish* pMsg);
|
|||
// on message ----------------------
|
||||
int32_t syncNodeOnPingCb(SSyncNode* ths, SyncPing* pMsg);
|
||||
int32_t syncNodeOnPingReplyCb(SSyncNode* ths, SyncPingReply* pMsg);
|
||||
int32_t syncNodeOnTimeoutCb(SSyncNode* ths, SyncTimeout* pMsg);
|
||||
int32_t syncNodeOnClientRequestCb(SSyncNode* ths, SyncClientRequest* pMsg, SyncIndex* pRetIndex);
|
||||
int32_t syncNodeOnClientRequestBatchCb(SSyncNode* ths, SyncClientRequestBatch* pMsg);
|
||||
int32_t syncNodeOnRequestVoteCb(SSyncNode* ths, SyncRequestVote* pMsg);
|
||||
int32_t syncNodeOnRequestVoteReplyCb(SSyncNode* ths, SyncRequestVoteReply* pMsg);
|
||||
int32_t syncNodeOnAppendEntriesCb(SSyncNode* ths, SyncAppendEntries* pMsg);
|
||||
int32_t syncNodeOnAppendEntriesReplyCb(SSyncNode* ths, SyncAppendEntriesReply* pMsg);
|
||||
int32_t syncNodeOnTimer(SSyncNode* ths, SyncTimeout* pMsg);
|
||||
|
||||
int32_t syncNodeOnRequestVoteSnapshotCb(SSyncNode* ths, SyncRequestVote* pMsg);
|
||||
int32_t syncNodeOnRequestVoteReplySnapshotCb(SSyncNode* ths, SyncRequestVoteReply* pMsg);
|
||||
int32_t syncNodeOnAppendEntriesSnapshotCb(SSyncNode* ths, SyncAppendEntries* pMsg);
|
||||
int32_t syncNodeOnAppendEntriesReplySnapshotCb(SSyncNode* ths, SyncAppendEntriesReply* pMsg);
|
||||
|
||||
int32_t syncNodeOnAppendEntriesSnapshot2Cb(SSyncNode* ths, SyncAppendEntriesBatch* pMsg);
|
||||
int32_t syncNodeOnAppendEntriesReplySnapshot2Cb(SSyncNode* ths, SyncAppendEntriesReply* pMsg);
|
||||
|
||||
int32_t syncNodeOnSnapshotSendCb(SSyncNode* ths, SyncSnapshotSend* pMsg);
|
||||
int32_t syncNodeOnSnapshotRspCb(SSyncNode* ths, SyncSnapshotRsp* pMsg);
|
||||
int32_t syncNodeOnHeartbeat(SSyncNode* ths, SyncHeartbeat* pMsg);
|
||||
int32_t syncNodeOnHeartbeatReply(SSyncNode* ths, SyncHeartbeatReply* pMsg);
|
||||
int32_t syncNodeOnClientRequest(SSyncNode* ths, SyncClientRequest* pMsg, SyncIndex* pRetIndex);
|
||||
int32_t syncNodeOnRequestVote(SSyncNode* ths, SyncRequestVote* pMsg);
|
||||
int32_t syncNodeOnRequestVoteReply(SSyncNode* ths, SyncRequestVoteReply* pMsg);
|
||||
int32_t syncNodeOnAppendEntries(SSyncNode* ths, SyncAppendEntries* pMsg);
|
||||
int32_t syncNodeOnAppendEntriesReply(SSyncNode* ths, SyncAppendEntriesReply* pMsg);
|
||||
int32_t syncNodeOnSnapshot(SSyncNode* ths, SyncSnapshotSend* pMsg);
|
||||
int32_t syncNodeOnSnapshotReply(SSyncNode* ths, SyncSnapshotRsp* pMsg);
|
||||
|
||||
int32_t syncNodeOnHeartbeat(SSyncNode* ths, SyncHeartbeat* pMsg);
|
||||
int32_t syncNodeOnHeartbeatReply(SSyncNode* ths, SyncHeartbeatReply* pMsg);
|
||||
|
@ -707,8 +705,8 @@ typedef int32_t (*FpOnRequestVoteReplyCb)(SSyncNode* ths, SyncRequestVoteReply*
|
|||
typedef int32_t (*FpOnAppendEntriesCb)(SSyncNode* ths, SyncAppendEntries* pMsg);
|
||||
typedef int32_t (*FpOnAppendEntriesReplyCb)(SSyncNode* ths, SyncAppendEntriesReply* pMsg);
|
||||
typedef int32_t (*FpOnTimeoutCb)(SSyncNode* pSyncNode, SyncTimeout* pMsg);
|
||||
typedef int32_t (*FpOnSnapshotSendCb)(SSyncNode* ths, SyncSnapshotSend* pMsg);
|
||||
typedef int32_t (*FpOnSnapshotRspCb)(SSyncNode* ths, SyncSnapshotRsp* pMsg);
|
||||
typedef int32_t (*FpOnSnapshotCb)(SSyncNode* ths, SyncSnapshotSend* pMsg);
|
||||
typedef int32_t (*FpOnSnapshotReplyCb)(SSyncNode* ths, SyncSnapshotRsp* pMsg);
|
||||
|
||||
// option ----------------------------------
|
||||
bool syncNodeSnapshotEnable(SSyncNode* pSyncNode);
|
||||
|
|
|
@ -34,6 +34,7 @@ typedef struct SMnodeMgmt {
|
|||
SSingleWorker readWorker;
|
||||
SSingleWorker writeWorker;
|
||||
SSingleWorker syncWorker;
|
||||
SSingleWorker syncCtrlWorker;
|
||||
bool stopped;
|
||||
int32_t refCount;
|
||||
TdThreadRwlock lock;
|
||||
|
@ -53,6 +54,7 @@ int32_t mmStartWorker(SMnodeMgmt *pMgmt);
|
|||
void mmStopWorker(SMnodeMgmt *pMgmt);
|
||||
int32_t mmPutMsgToWriteQueue(SMnodeMgmt *pMgmt, SRpcMsg *pMsg);
|
||||
int32_t mmPutMsgToSyncQueue(SMnodeMgmt *pMgmt, SRpcMsg *pMsg);
|
||||
int32_t mmPutMsgToSyncCtrlQueue(SMnodeMgmt *pMgmt, SRpcMsg *pMsg);
|
||||
int32_t mmPutMsgToReadQueue(SMnodeMgmt *pMgmt, SRpcMsg *pMsg);
|
||||
int32_t mmPutMsgToQueryQueue(SMnodeMgmt *pMgmt, SRpcMsg *pMsg);
|
||||
int32_t mmPutMsgToFetchQueue(SMnodeMgmt *pMgmt, SRpcMsg *pMsg);
|
||||
|
|
|
@ -201,6 +201,9 @@ SArray *mmGetMsgHandles() {
|
|||
if (dmSetMgmtHandle(pArray, TDMT_SYNC_SET_MNODE_STANDBY_RSP, mmPutMsgToWriteQueue, 0) == NULL) goto _OVER;
|
||||
if (dmSetMgmtHandle(pArray, TDMT_SYNC_SET_VNODE_STANDBY_RSP, mmPutMsgToWriteQueue, 0) == NULL) goto _OVER;
|
||||
|
||||
if (dmSetMgmtHandle(pArray, TDMT_SYNC_HEARTBEAT, mmPutMsgToSyncCtrlQueue, 1) == NULL) goto _OVER;
|
||||
if (dmSetMgmtHandle(pArray, TDMT_SYNC_HEARTBEAT_REPLY, mmPutMsgToSyncCtrlQueue, 1) == NULL) goto _OVER;
|
||||
|
||||
code = 0;
|
||||
|
||||
_OVER:
|
||||
|
|
|
@ -67,6 +67,24 @@ static void mmProcessRpcMsg(SQueueInfo *pInfo, SRpcMsg *pMsg) {
|
|||
taosFreeQitem(pMsg);
|
||||
}
|
||||
|
||||
static void mmProcessSyncCtrlMsg(SQueueInfo *pInfo, SRpcMsg *pMsg) {
|
||||
SMnodeMgmt *pMgmt = pInfo->ahandle;
|
||||
pMsg->info.node = pMgmt->pMnode;
|
||||
|
||||
const STraceId *trace = &pMsg->info.traceId;
|
||||
dGTrace("msg:%p, get from mnode-sync-ctrl queue", pMsg);
|
||||
|
||||
SMsgHead *pHead = pMsg->pCont;
|
||||
pHead->contLen = ntohl(pHead->contLen);
|
||||
pHead->vgId = ntohl(pHead->vgId);
|
||||
|
||||
int32_t code = mndProcessSyncCtrlMsg(pMsg);
|
||||
|
||||
dGTrace("msg:%p, is freed, code:0x%x", pMsg, code);
|
||||
rpcFreeCont(pMsg->pCont);
|
||||
taosFreeQitem(pMsg);
|
||||
}
|
||||
|
||||
static void mmProcessSyncMsg(SQueueInfo *pInfo, SRpcMsg *pMsg) {
|
||||
SMnodeMgmt *pMgmt = pInfo->ahandle;
|
||||
pMsg->info.node = pMgmt->pMnode;
|
||||
|
@ -108,6 +126,10 @@ int32_t mmPutMsgToSyncQueue(SMnodeMgmt *pMgmt, SRpcMsg *pMsg) {
|
|||
return mmPutMsgToWorker(pMgmt, &pMgmt->syncWorker, pMsg);
|
||||
}
|
||||
|
||||
int32_t mmPutMsgToSyncCtrlQueue(SMnodeMgmt *pMgmt, SRpcMsg *pMsg) {
|
||||
return mmPutMsgToWorker(pMgmt, &pMgmt->syncCtrlWorker, pMsg);
|
||||
}
|
||||
|
||||
int32_t mmPutMsgToReadQueue(SMnodeMgmt *pMgmt, SRpcMsg *pMsg) {
|
||||
return mmPutMsgToWorker(pMgmt, &pMgmt->readWorker, pMsg);
|
||||
}
|
||||
|
@ -144,6 +166,9 @@ int32_t mmPutMsgToQueue(SMnodeMgmt *pMgmt, EQueueType qtype, SRpcMsg *pRpc) {
|
|||
case SYNC_QUEUE:
|
||||
pWorker = &pMgmt->syncWorker;
|
||||
break;
|
||||
case SYNC_CTRL_QUEUE:
|
||||
pWorker = &pMgmt->syncCtrlWorker;
|
||||
break;
|
||||
default:
|
||||
terrno = TSDB_CODE_INVALID_PARA;
|
||||
}
|
||||
|
@ -223,6 +248,18 @@ int32_t mmStartWorker(SMnodeMgmt *pMgmt) {
|
|||
return -1;
|
||||
}
|
||||
|
||||
SSingleWorkerCfg scCfg = {
|
||||
.min = 1,
|
||||
.max = 1,
|
||||
.name = "mnode-sync-ctrl",
|
||||
.fp = (FItem)mmProcessSyncCtrlMsg,
|
||||
.param = pMgmt,
|
||||
};
|
||||
if (tSingleWorkerInit(&pMgmt->syncCtrlWorker, &scCfg) != 0) {
|
||||
dError("failed to start mnode mnode-sync-ctrl worker since %s", terrstr());
|
||||
return -1;
|
||||
}
|
||||
|
||||
dDebug("mnode workers are initialized");
|
||||
return 0;
|
||||
}
|
||||
|
@ -235,5 +272,6 @@ void mmStopWorker(SMnodeMgmt *pMgmt) {
|
|||
tSingleWorkerCleanup(&pMgmt->readWorker);
|
||||
tSingleWorkerCleanup(&pMgmt->writeWorker);
|
||||
tSingleWorkerCleanup(&pMgmt->syncWorker);
|
||||
tSingleWorkerCleanup(&pMgmt->syncCtrlWorker);
|
||||
dDebug("mnode workers are closed");
|
||||
}
|
||||
|
|
|
@ -231,11 +231,9 @@ static int32_t vmPutMsgToQueue(SVnodeMgmt *pMgmt, SRpcMsg *pMsg, EQueueType qtyp
|
|||
return code;
|
||||
}
|
||||
|
||||
int32_t vmPutMsgToSyncQueue(SVnodeMgmt *pMgmt, SRpcMsg *pMsg) { return vmPutMsgToQueue(pMgmt, pMsg, SYNC_QUEUE); }
|
||||
int32_t vmPutMsgToSyncCtrlQueue(SVnodeMgmt *pMgmt, SRpcMsg *pMsg) { return vmPutMsgToQueue(pMgmt, pMsg, SYNC_CTRL_QUEUE); }
|
||||
|
||||
int32_t vmPutMsgToSyncCtrlQueue(SVnodeMgmt *pMgmt, SRpcMsg *pMsg) {
|
||||
return vmPutMsgToQueue(pMgmt, pMsg, SYNC_CTRL_QUEUE);
|
||||
}
|
||||
int32_t vmPutMsgToSyncQueue(SVnodeMgmt *pMgmt, SRpcMsg *pMsg) { return vmPutMsgToQueue(pMgmt, pMsg, SYNC_QUEUE); }
|
||||
|
||||
int32_t vmPutMsgToWriteQueue(SVnodeMgmt *pMgmt, SRpcMsg *pMsg) { return vmPutMsgToQueue(pMgmt, pMsg, WRITE_QUEUE); }
|
||||
|
||||
|
|
|
@ -474,6 +474,45 @@ void mndStop(SMnode *pMnode) {
|
|||
mndCleanupTimer(pMnode);
|
||||
}
|
||||
|
||||
int32_t mndProcessSyncCtrlMsg(SRpcMsg *pMsg) {
|
||||
SMnode *pMnode = pMsg->info.node;
|
||||
SSyncMgmt *pMgmt = &pMnode->syncMgmt;
|
||||
int32_t code = 0;
|
||||
|
||||
mInfo("vgId:%d, process sync ctrl msg", 1);
|
||||
|
||||
if (!syncEnvIsStart()) {
|
||||
mError("failed to process sync msg:%p type:%s since syncEnv stop", pMsg, TMSG_INFO(pMsg->msgType));
|
||||
terrno = TSDB_CODE_SYN_INTERNAL_ERROR;
|
||||
return -1;
|
||||
}
|
||||
|
||||
SSyncNode *pSyncNode = syncNodeAcquire(pMgmt->sync);
|
||||
if (pSyncNode == NULL) {
|
||||
mError("failed to process sync msg:%p type:%s since syncNode is null", pMsg, TMSG_INFO(pMsg->msgType));
|
||||
terrno = TSDB_CODE_SYN_INTERNAL_ERROR;
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (pMsg->msgType == TDMT_SYNC_HEARTBEAT) {
|
||||
SyncHeartbeat *pSyncMsg = syncHeartbeatFromRpcMsg2(pMsg);
|
||||
code = syncNodeOnHeartbeat(pSyncNode, pSyncMsg);
|
||||
syncHeartbeatDestroy(pSyncMsg);
|
||||
|
||||
} else if (pMsg->msgType == TDMT_SYNC_HEARTBEAT_REPLY) {
|
||||
SyncHeartbeatReply *pSyncMsg = syncHeartbeatReplyFromRpcMsg2(pMsg);
|
||||
code = syncNodeOnHeartbeatReply(pSyncNode, pSyncMsg);
|
||||
syncHeartbeatReplyDestroy(pSyncMsg);
|
||||
}
|
||||
|
||||
syncNodeRelease(pSyncNode);
|
||||
|
||||
if (code != 0) {
|
||||
terrno = TSDB_CODE_SYN_INTERNAL_ERROR;
|
||||
}
|
||||
return code;
|
||||
}
|
||||
|
||||
int32_t mndProcessSyncMsg(SRpcMsg *pMsg) {
|
||||
SMnode *pMnode = pMsg->info.node;
|
||||
SSyncMgmt *pMgmt = &pMnode->syncMgmt;
|
||||
|
@ -492,98 +531,65 @@ int32_t mndProcessSyncMsg(SRpcMsg *pMsg) {
|
|||
return -1;
|
||||
}
|
||||
|
||||
// ToDo: ugly! use function pointer
|
||||
if (syncNodeStrategy(pSyncNode) == SYNC_STRATEGY_STANDARD_SNAPSHOT) {
|
||||
if (pMsg->msgType == TDMT_SYNC_TIMEOUT) {
|
||||
SyncTimeout *pSyncMsg = syncTimeoutFromRpcMsg2(pMsg);
|
||||
code = syncNodeOnTimeoutCb(pSyncNode, pSyncMsg);
|
||||
code = syncNodeOnTimer(pSyncNode, pSyncMsg);
|
||||
syncTimeoutDestroy(pSyncMsg);
|
||||
|
||||
} else if (pMsg->msgType == TDMT_SYNC_PING) {
|
||||
SyncPing *pSyncMsg = syncPingFromRpcMsg2(pMsg);
|
||||
code = syncNodeOnPingCb(pSyncNode, pSyncMsg);
|
||||
syncPingDestroy(pSyncMsg);
|
||||
|
||||
} else if (pMsg->msgType == TDMT_SYNC_PING_REPLY) {
|
||||
SyncPingReply *pSyncMsg = syncPingReplyFromRpcMsg2(pMsg);
|
||||
code = syncNodeOnPingReplyCb(pSyncNode, pSyncMsg);
|
||||
syncPingReplyDestroy(pSyncMsg);
|
||||
|
||||
} else if (pMsg->msgType == TDMT_SYNC_CLIENT_REQUEST) {
|
||||
SyncClientRequest *pSyncMsg = syncClientRequestFromRpcMsg2(pMsg);
|
||||
code = syncNodeOnClientRequestCb(pSyncNode, pSyncMsg, NULL);
|
||||
code = syncNodeOnClientRequest(pSyncNode, pSyncMsg, NULL);
|
||||
syncClientRequestDestroy(pSyncMsg);
|
||||
|
||||
} else if (pMsg->msgType == TDMT_SYNC_REQUEST_VOTE) {
|
||||
SyncRequestVote *pSyncMsg = syncRequestVoteFromRpcMsg2(pMsg);
|
||||
code = syncNodeOnRequestVoteSnapshotCb(pSyncNode, pSyncMsg);
|
||||
code = syncNodeOnRequestVote(pSyncNode, pSyncMsg);
|
||||
syncRequestVoteDestroy(pSyncMsg);
|
||||
|
||||
} else if (pMsg->msgType == TDMT_SYNC_REQUEST_VOTE_REPLY) {
|
||||
SyncRequestVoteReply *pSyncMsg = syncRequestVoteReplyFromRpcMsg2(pMsg);
|
||||
code = syncNodeOnRequestVoteReplySnapshotCb(pSyncNode, pSyncMsg);
|
||||
code = syncNodeOnRequestVoteReply(pSyncNode, pSyncMsg);
|
||||
syncRequestVoteReplyDestroy(pSyncMsg);
|
||||
|
||||
} else if (pMsg->msgType == TDMT_SYNC_APPEND_ENTRIES) {
|
||||
SyncAppendEntries *pSyncMsg = syncAppendEntriesFromRpcMsg2(pMsg);
|
||||
code = syncNodeOnAppendEntriesSnapshotCb(pSyncNode, pSyncMsg);
|
||||
code = syncNodeOnAppendEntries(pSyncNode, pSyncMsg);
|
||||
syncAppendEntriesDestroy(pSyncMsg);
|
||||
|
||||
} else if (pMsg->msgType == TDMT_SYNC_APPEND_ENTRIES_REPLY) {
|
||||
SyncAppendEntriesReply *pSyncMsg = syncAppendEntriesReplyFromRpcMsg2(pMsg);
|
||||
code = syncNodeOnAppendEntriesReplySnapshotCb(pSyncNode, pSyncMsg);
|
||||
code = syncNodeOnAppendEntriesReply(pSyncNode, pSyncMsg);
|
||||
syncAppendEntriesReplyDestroy(pSyncMsg);
|
||||
|
||||
} else if (pMsg->msgType == TDMT_SYNC_SNAPSHOT_SEND) {
|
||||
SyncSnapshotSend *pSyncMsg = syncSnapshotSendFromRpcMsg2(pMsg);
|
||||
code = syncNodeOnSnapshotSendCb(pSyncNode, pSyncMsg);
|
||||
code = syncNodeOnSnapshot(pSyncNode, pSyncMsg);
|
||||
syncSnapshotSendDestroy(pSyncMsg);
|
||||
|
||||
} else if (pMsg->msgType == TDMT_SYNC_SNAPSHOT_RSP) {
|
||||
SyncSnapshotRsp *pSyncMsg = syncSnapshotRspFromRpcMsg2(pMsg);
|
||||
code = syncNodeOnSnapshotRspCb(pSyncNode, pSyncMsg);
|
||||
code = syncNodeOnSnapshotReply(pSyncNode, pSyncMsg);
|
||||
syncSnapshotRspDestroy(pSyncMsg);
|
||||
|
||||
} else if (pMsg->msgType == TDMT_SYNC_SET_MNODE_STANDBY) {
|
||||
code = syncSetStandby(pMgmt->sync);
|
||||
SRpcMsg rsp = {.code = code, .info = pMsg->info};
|
||||
tmsgSendRsp(&rsp);
|
||||
|
||||
} else {
|
||||
mError("failed to process msg:%p since invalid type:%s", pMsg, TMSG_INFO(pMsg->msgType));
|
||||
code = -1;
|
||||
}
|
||||
} else {
|
||||
if (pMsg->msgType == TDMT_SYNC_TIMEOUT) {
|
||||
SyncTimeout *pSyncMsg = syncTimeoutFromRpcMsg2(pMsg);
|
||||
code = syncNodeOnTimeoutCb(pSyncNode, pSyncMsg);
|
||||
syncTimeoutDestroy(pSyncMsg);
|
||||
} else if (pMsg->msgType == TDMT_SYNC_PING) {
|
||||
SyncPing *pSyncMsg = syncPingFromRpcMsg2(pMsg);
|
||||
code = syncNodeOnPingCb(pSyncNode, pSyncMsg);
|
||||
syncPingDestroy(pSyncMsg);
|
||||
} else if (pMsg->msgType == TDMT_SYNC_PING_REPLY) {
|
||||
SyncPingReply *pSyncMsg = syncPingReplyFromRpcMsg2(pMsg);
|
||||
code = syncNodeOnPingReplyCb(pSyncNode, pSyncMsg);
|
||||
syncPingReplyDestroy(pSyncMsg);
|
||||
} else if (pMsg->msgType == TDMT_SYNC_CLIENT_REQUEST) {
|
||||
SyncClientRequest *pSyncMsg = syncClientRequestFromRpcMsg2(pMsg);
|
||||
code = syncNodeOnClientRequestCb(pSyncNode, pSyncMsg, NULL);
|
||||
syncClientRequestDestroy(pSyncMsg);
|
||||
} else if (pMsg->msgType == TDMT_SYNC_REQUEST_VOTE) {
|
||||
SyncRequestVote *pSyncMsg = syncRequestVoteFromRpcMsg2(pMsg);
|
||||
code = syncNodeOnRequestVoteCb(pSyncNode, pSyncMsg);
|
||||
syncRequestVoteDestroy(pSyncMsg);
|
||||
} else if (pMsg->msgType == TDMT_SYNC_REQUEST_VOTE_REPLY) {
|
||||
SyncRequestVoteReply *pSyncMsg = syncRequestVoteReplyFromRpcMsg2(pMsg);
|
||||
code = syncNodeOnRequestVoteReplyCb(pSyncNode, pSyncMsg);
|
||||
syncRequestVoteReplyDestroy(pSyncMsg);
|
||||
} else if (pMsg->msgType == TDMT_SYNC_APPEND_ENTRIES) {
|
||||
SyncAppendEntries *pSyncMsg = syncAppendEntriesFromRpcMsg2(pMsg);
|
||||
code = syncNodeOnAppendEntriesCb(pSyncNode, pSyncMsg);
|
||||
syncAppendEntriesDestroy(pSyncMsg);
|
||||
} else if (pMsg->msgType == TDMT_SYNC_APPEND_ENTRIES_REPLY) {
|
||||
SyncAppendEntriesReply *pSyncMsg = syncAppendEntriesReplyFromRpcMsg2(pMsg);
|
||||
code = syncNodeOnAppendEntriesReplyCb(pSyncNode, pSyncMsg);
|
||||
syncAppendEntriesReplyDestroy(pSyncMsg);
|
||||
} else if (pMsg->msgType == TDMT_SYNC_SET_MNODE_STANDBY) {
|
||||
code = syncSetStandby(pMgmt->sync);
|
||||
SRpcMsg rsp = {.code = code, .info = pMsg->info};
|
||||
tmsgSendRsp(&rsp);
|
||||
} else {
|
||||
mError("failed to process msg:%p since invalid type:%s", pMsg, TMSG_INFO(pMsg->msgType));
|
||||
code = -1;
|
||||
}
|
||||
}
|
||||
|
||||
syncNodeRelease(pSyncNode);
|
||||
|
||||
|
|
|
@ -237,7 +237,9 @@ int vnodeCommit(SVnode *pVnode) {
|
|||
code = terrno;
|
||||
TSDB_CHECK_CODE(code, lino, _exit);
|
||||
}
|
||||
walBeginSnapshot(pVnode->pWal, pVnode->state.applied);
|
||||
|
||||
// walBeginSnapshot(pVnode->pWal, pVnode->state.applied);
|
||||
syncBeginSnapshot(pVnode->sync, pVnode->state.applied);
|
||||
|
||||
if (smaPreCommit(pVnode->pSma) < 0) {
|
||||
vError("vgId:%d, failed to pre-commit sma since %s", TD_VID(pVnode), tstrerror(terrno));
|
||||
|
@ -289,7 +291,8 @@ int vnodeCommit(SVnode *pVnode) {
|
|||
}
|
||||
|
||||
// apply the commit (TODO)
|
||||
walEndSnapshot(pVnode->pWal);
|
||||
// walEndSnapshot(pVnode->pWal);
|
||||
syncEndSnapshot(pVnode->sync);
|
||||
|
||||
_exit:
|
||||
if (code) {
|
||||
|
|
|
@ -385,127 +385,74 @@ int32_t vnodeProcessSyncMsg(SVnode *pVnode, SRpcMsg *pMsg, SRpcMsg **pRsp) {
|
|||
|
||||
vGTrace("vgId:%d, sync msg:%p will be processed, type:%s", pVnode->config.vgId, pMsg, TMSG_INFO(pMsg->msgType));
|
||||
|
||||
if (syncNodeStrategy(pSyncNode) == SYNC_STRATEGY_NO_SNAPSHOT) {
|
||||
if (pMsg->msgType == TDMT_SYNC_TIMEOUT) {
|
||||
SyncTimeout *pSyncMsg = syncTimeoutFromRpcMsg2(pMsg);
|
||||
ASSERT(pSyncMsg != NULL);
|
||||
code = syncNodeOnTimeoutCb(pSyncNode, pSyncMsg);
|
||||
code = syncNodeOnTimer(pSyncNode, pSyncMsg);
|
||||
syncTimeoutDestroy(pSyncMsg);
|
||||
|
||||
} else if (pMsg->msgType == TDMT_SYNC_PING) {
|
||||
SyncPing *pSyncMsg = syncPingFromRpcMsg2(pMsg);
|
||||
ASSERT(pSyncMsg != NULL);
|
||||
code = syncNodeOnPingCb(pSyncNode, pSyncMsg);
|
||||
syncPingDestroy(pSyncMsg);
|
||||
|
||||
} else if (pMsg->msgType == TDMT_SYNC_PING_REPLY) {
|
||||
SyncPingReply *pSyncMsg = syncPingReplyFromRpcMsg2(pMsg);
|
||||
ASSERT(pSyncMsg != NULL);
|
||||
code = syncNodeOnPingReplyCb(pSyncNode, pSyncMsg);
|
||||
syncPingReplyDestroy(pSyncMsg);
|
||||
|
||||
} else if (pMsg->msgType == TDMT_SYNC_CLIENT_REQUEST) {
|
||||
SyncClientRequest *pSyncMsg = syncClientRequestFromRpcMsg2(pMsg);
|
||||
ASSERT(pSyncMsg != NULL);
|
||||
code = syncNodeOnClientRequestCb(pSyncNode, pSyncMsg, NULL);
|
||||
code = syncNodeOnClientRequest(pSyncNode, pSyncMsg, NULL);
|
||||
syncClientRequestDestroy(pSyncMsg);
|
||||
} else if (pMsg->msgType == TDMT_SYNC_CLIENT_REQUEST_BATCH) {
|
||||
SyncClientRequestBatch *pSyncMsg = syncClientRequestBatchFromRpcMsg(pMsg);
|
||||
ASSERT(pSyncMsg != NULL);
|
||||
code = syncNodeOnClientRequestBatchCb(pSyncNode, pSyncMsg);
|
||||
syncClientRequestBatchDestroyDeep(pSyncMsg);
|
||||
|
||||
} else if (pMsg->msgType == TDMT_SYNC_REQUEST_VOTE) {
|
||||
SyncRequestVote *pSyncMsg = syncRequestVoteFromRpcMsg2(pMsg);
|
||||
ASSERT(pSyncMsg != NULL);
|
||||
code = syncNodeOnRequestVoteCb(pSyncNode, pSyncMsg);
|
||||
code = syncNodeOnRequestVote(pSyncNode, pSyncMsg);
|
||||
syncRequestVoteDestroy(pSyncMsg);
|
||||
|
||||
} else if (pMsg->msgType == TDMT_SYNC_REQUEST_VOTE_REPLY) {
|
||||
SyncRequestVoteReply *pSyncMsg = syncRequestVoteReplyFromRpcMsg2(pMsg);
|
||||
ASSERT(pSyncMsg != NULL);
|
||||
code = syncNodeOnRequestVoteReplyCb(pSyncNode, pSyncMsg);
|
||||
code = syncNodeOnRequestVoteReply(pSyncNode, pSyncMsg);
|
||||
syncRequestVoteReplyDestroy(pSyncMsg);
|
||||
|
||||
} else if (pMsg->msgType == TDMT_SYNC_APPEND_ENTRIES) {
|
||||
SyncAppendEntries *pSyncMsg = syncAppendEntriesFromRpcMsg2(pMsg);
|
||||
ASSERT(pSyncMsg != NULL);
|
||||
code = syncNodeOnAppendEntriesCb(pSyncNode, pSyncMsg);
|
||||
code = syncNodeOnAppendEntries(pSyncNode, pSyncMsg);
|
||||
syncAppendEntriesDestroy(pSyncMsg);
|
||||
} else if (pMsg->msgType == TDMT_SYNC_APPEND_ENTRIES_REPLY) {
|
||||
SyncAppendEntriesReply *pSyncMsg = syncAppendEntriesReplyFromRpcMsg2(pMsg);
|
||||
ASSERT(pSyncMsg != NULL);
|
||||
code = syncNodeOnAppendEntriesReplyCb(pSyncNode, pSyncMsg);
|
||||
syncAppendEntriesReplyDestroy(pSyncMsg);
|
||||
} else if (pMsg->msgType == TDMT_SYNC_SET_VNODE_STANDBY) {
|
||||
code = vnodeSetStandBy(pVnode);
|
||||
if (code != 0 && terrno != 0) code = terrno;
|
||||
SRpcMsg rsp = {.code = code, .info = pMsg->info};
|
||||
tmsgSendRsp(&rsp);
|
||||
} else {
|
||||
vGError("vgId:%d, msg:%p failed to process since error msg type:%d", pVnode->config.vgId, pMsg, pMsg->msgType);
|
||||
code = -1;
|
||||
}
|
||||
|
||||
} else if (syncNodeStrategy(pSyncNode) == SYNC_STRATEGY_WAL_FIRST) {
|
||||
// use wal first strategy
|
||||
if (pMsg->msgType == TDMT_SYNC_TIMEOUT) {
|
||||
SyncTimeout *pSyncMsg = syncTimeoutFromRpcMsg2(pMsg);
|
||||
ASSERT(pSyncMsg != NULL);
|
||||
code = syncNodeOnTimeoutCb(pSyncNode, pSyncMsg);
|
||||
syncTimeoutDestroy(pSyncMsg);
|
||||
} else if (pMsg->msgType == TDMT_SYNC_PING) {
|
||||
SyncPing *pSyncMsg = syncPingFromRpcMsg2(pMsg);
|
||||
ASSERT(pSyncMsg != NULL);
|
||||
code = syncNodeOnPingCb(pSyncNode, pSyncMsg);
|
||||
syncPingDestroy(pSyncMsg);
|
||||
} else if (pMsg->msgType == TDMT_SYNC_PING_REPLY) {
|
||||
SyncPingReply *pSyncMsg = syncPingReplyFromRpcMsg2(pMsg);
|
||||
ASSERT(pSyncMsg != NULL);
|
||||
code = syncNodeOnPingReplyCb(pSyncNode, pSyncMsg);
|
||||
syncPingReplyDestroy(pSyncMsg);
|
||||
} else if (pMsg->msgType == TDMT_SYNC_CLIENT_REQUEST) {
|
||||
SyncClientRequest *pSyncMsg = syncClientRequestFromRpcMsg2(pMsg);
|
||||
ASSERT(pSyncMsg != NULL);
|
||||
code = syncNodeOnClientRequestCb(pSyncNode, pSyncMsg, NULL);
|
||||
syncClientRequestDestroy(pSyncMsg);
|
||||
} else if (pMsg->msgType == TDMT_SYNC_CLIENT_REQUEST_BATCH) {
|
||||
SyncClientRequestBatch *pSyncMsg = syncClientRequestBatchFromRpcMsg(pMsg);
|
||||
ASSERT(pSyncMsg != NULL);
|
||||
code = syncNodeOnClientRequestBatchCb(pSyncNode, pSyncMsg);
|
||||
syncClientRequestBatchDestroy(pSyncMsg);
|
||||
} else if (pMsg->msgType == TDMT_SYNC_REQUEST_VOTE) {
|
||||
SyncRequestVote *pSyncMsg = syncRequestVoteFromRpcMsg2(pMsg);
|
||||
ASSERT(pSyncMsg != NULL);
|
||||
code = syncNodeOnRequestVoteSnapshotCb(pSyncNode, pSyncMsg);
|
||||
syncRequestVoteDestroy(pSyncMsg);
|
||||
} else if (pMsg->msgType == TDMT_SYNC_REQUEST_VOTE_REPLY) {
|
||||
SyncRequestVoteReply *pSyncMsg = syncRequestVoteReplyFromRpcMsg2(pMsg);
|
||||
ASSERT(pSyncMsg != NULL);
|
||||
code = syncNodeOnRequestVoteReplySnapshotCb(pSyncNode, pSyncMsg);
|
||||
syncRequestVoteReplyDestroy(pSyncMsg);
|
||||
} else if (pMsg->msgType == TDMT_SYNC_APPEND_ENTRIES_BATCH) {
|
||||
SyncAppendEntriesBatch *pSyncMsg = syncAppendEntriesBatchFromRpcMsg2(pMsg);
|
||||
ASSERT(pSyncMsg != NULL);
|
||||
code = syncNodeOnAppendEntriesSnapshot2Cb(pSyncNode, pSyncMsg);
|
||||
syncAppendEntriesBatchDestroy(pSyncMsg);
|
||||
} else if (pMsg->msgType == TDMT_SYNC_APPEND_ENTRIES_REPLY) {
|
||||
SyncAppendEntriesReply *pSyncMsg = syncAppendEntriesReplyFromRpcMsg2(pMsg);
|
||||
ASSERT(pSyncMsg != NULL);
|
||||
code = syncNodeOnAppendEntriesReplySnapshot2Cb(pSyncNode, pSyncMsg);
|
||||
code = syncNodeOnAppendEntriesReply(pSyncNode, pSyncMsg);
|
||||
syncAppendEntriesReplyDestroy(pSyncMsg);
|
||||
|
||||
} else if (pMsg->msgType == TDMT_SYNC_SNAPSHOT_SEND) {
|
||||
SyncSnapshotSend *pSyncMsg = syncSnapshotSendFromRpcMsg2(pMsg);
|
||||
code = syncNodeOnSnapshotSendCb(pSyncNode, pSyncMsg);
|
||||
code = syncNodeOnSnapshot(pSyncNode, pSyncMsg);
|
||||
syncSnapshotSendDestroy(pSyncMsg);
|
||||
|
||||
} else if (pMsg->msgType == TDMT_SYNC_SNAPSHOT_RSP) {
|
||||
SyncSnapshotRsp *pSyncMsg = syncSnapshotRspFromRpcMsg2(pMsg);
|
||||
code = syncNodeOnSnapshotRspCb(pSyncNode, pSyncMsg);
|
||||
code = syncNodeOnSnapshotReply(pSyncNode, pSyncMsg);
|
||||
syncSnapshotRspDestroy(pSyncMsg);
|
||||
|
||||
} else if (pMsg->msgType == TDMT_SYNC_SET_VNODE_STANDBY) {
|
||||
code = vnodeSetStandBy(pVnode);
|
||||
if (code != 0 && terrno != 0) code = terrno;
|
||||
SRpcMsg rsp = {.code = code, .info = pMsg->info};
|
||||
tmsgSendRsp(&rsp);
|
||||
|
||||
} else {
|
||||
vGError("vgId:%d, msg:%p failed to process since error msg type:%d", pVnode->config.vgId, pMsg, pMsg->msgType);
|
||||
code = -1;
|
||||
}
|
||||
}
|
||||
|
||||
vTrace("vgId:%d, sync msg:%p is processed, type:%s code:0x%x", pVnode->config.vgId, pMsg, TMSG_INFO(pMsg->msgType),
|
||||
code);
|
||||
|
@ -516,6 +463,19 @@ int32_t vnodeProcessSyncMsg(SVnode *pVnode, SRpcMsg *pMsg, SRpcMsg **pRsp) {
|
|||
return code;
|
||||
}
|
||||
|
||||
static int32_t vnodeSyncEqCtrlMsg(const SMsgCb *msgcb, SRpcMsg *pMsg) {
|
||||
if (msgcb == NULL) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
int32_t code = tmsgPutToQueue(msgcb, SYNC_CTRL_QUEUE, pMsg);
|
||||
if (code != 0) {
|
||||
rpcFreeCont(pMsg->pCont);
|
||||
pMsg->pCont = NULL;
|
||||
}
|
||||
return code;
|
||||
}
|
||||
|
||||
static int32_t vnodeSyncEqMsg(const SMsgCb *msgcb, SRpcMsg *pMsg) {
|
||||
if (msgcb == NULL) {
|
||||
return -1;
|
||||
|
@ -801,6 +761,7 @@ int32_t vnodeSyncOpen(SVnode *pVnode, char *path) {
|
|||
.msgcb = NULL,
|
||||
.FpSendMsg = vnodeSyncSendMsg,
|
||||
.FpEqMsg = vnodeSyncEqMsg,
|
||||
.FpEqCtrlMsg = vnodeSyncEqCtrlMsg,
|
||||
};
|
||||
|
||||
snprintf(syncInfo.path, sizeof(syncInfo.path), "%s%ssync", path, TD_DIRSEP);
|
||||
|
|
|
@ -92,9 +92,8 @@ extern "C" {
|
|||
// /\ UNCHANGED <<serverVars, commitIndex, messages>>
|
||||
// /\ UNCHANGED <<candidateVars, leaderVars>>
|
||||
//
|
||||
int32_t syncNodeOnAppendEntriesCb(SSyncNode* ths, SyncAppendEntries* pMsg);
|
||||
int32_t syncNodeOnAppendEntriesSnapshotCb(SSyncNode* ths, SyncAppendEntries* pMsg);
|
||||
int32_t syncNodeOnAppendEntriesSnapshot2Cb(SSyncNode* ths, SyncAppendEntriesBatch* pMsg);
|
||||
|
||||
int32_t syncNodeOnAppendEntries(SSyncNode* ths, SyncAppendEntries* pMsg);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
|
|
@ -40,9 +40,7 @@ extern "C" {
|
|||
// /\ Discard(m)
|
||||
// /\ UNCHANGED <<serverVars, candidateVars, logVars, elections>>
|
||||
//
|
||||
int32_t syncNodeOnAppendEntriesReplyCb(SSyncNode* ths, SyncAppendEntriesReply* pMsg);
|
||||
int32_t syncNodeOnAppendEntriesReplySnapshotCb(SSyncNode* ths, SyncAppendEntriesReply* pMsg);
|
||||
int32_t syncNodeOnAppendEntriesReplySnapshot2Cb(SSyncNode* ths, SyncAppendEntriesReply* pMsg);
|
||||
int32_t syncNodeOnAppendEntriesReply(SSyncNode* ths, SyncAppendEntriesReply* pMsg);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
|
|
@ -37,12 +37,10 @@ extern "C" {
|
|||
// msource |-> i,
|
||||
// mdest |-> j])
|
||||
// /\ UNCHANGED <<serverVars, candidateVars, leaderVars, logVars>>
|
||||
//
|
||||
int32_t syncNodeRequestVotePeers(SSyncNode* pSyncNode);
|
||||
int32_t syncNodeRequestVotePeersSnapshot(SSyncNode* pSyncNode);
|
||||
|
||||
int32_t syncNodeElect(SSyncNode* pSyncNode);
|
||||
int32_t syncNodeRequestVote(SSyncNode* pSyncNode, const SRaftId* destRaftId, const SyncRequestVote* pMsg);
|
||||
int32_t syncNodeRequestVotePeers(SSyncNode* pSyncNode);
|
||||
int32_t syncNodeSendRequestVote(SSyncNode* pSyncNode, const SRaftId* destRaftId, const SyncRequestVote* pMsg);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
|
|
@ -56,9 +56,8 @@ typedef struct SSyncIO {
|
|||
int32_t (*FpOnSyncAppendEntries)(SSyncNode *pSyncNode, SyncAppendEntries *pMsg);
|
||||
int32_t (*FpOnSyncAppendEntriesReply)(SSyncNode *pSyncNode, SyncAppendEntriesReply *pMsg);
|
||||
int32_t (*FpOnSyncTimeout)(SSyncNode *pSyncNode, SyncTimeout *pMsg);
|
||||
|
||||
int32_t (*FpOnSyncSnapshotSend)(SSyncNode *pSyncNode, SyncSnapshotSend *pMsg);
|
||||
int32_t (*FpOnSyncSnapshotRsp)(SSyncNode *pSyncNode, SyncSnapshotRsp *pMsg);
|
||||
int32_t (*FpOnSyncSnapshot)(SSyncNode *pSyncNode, SyncSnapshotSend *pMsg);
|
||||
int32_t (*FpOnSyncSnapshotReply)(SSyncNode *pSyncNode, SyncSnapshotRsp *pMsg);
|
||||
|
||||
int8_t isStart;
|
||||
|
||||
|
|
|
@ -45,8 +45,8 @@ void syncIndexMgrDestroy(SSyncIndexMgr *pSyncIndexMgr);
|
|||
void syncIndexMgrClear(SSyncIndexMgr *pSyncIndexMgr);
|
||||
void syncIndexMgrSetIndex(SSyncIndexMgr *pSyncIndexMgr, const SRaftId *pRaftId, SyncIndex index);
|
||||
SyncIndex syncIndexMgrGetIndex(SSyncIndexMgr *pSyncIndexMgr, const SRaftId *pRaftId);
|
||||
cJSON *syncIndexMgr2Json(SSyncIndexMgr *pSyncIndexMgr);
|
||||
char *syncIndexMgr2Str(SSyncIndexMgr *pSyncIndexMgr);
|
||||
cJSON * syncIndexMgr2Json(SSyncIndexMgr *pSyncIndexMgr);
|
||||
char * syncIndexMgr2Str(SSyncIndexMgr *pSyncIndexMgr);
|
||||
|
||||
void syncIndexMgrSetStartTime(SSyncIndexMgr *pSyncIndexMgr, const SRaftId *pRaftId, int64_t startTime);
|
||||
int64_t syncIndexMgrGetStartTime(SSyncIndexMgr *pSyncIndexMgr, const SRaftId *pRaftId);
|
||||
|
|
|
@ -57,9 +57,37 @@ typedef struct SRaftCfg SRaftCfg;
|
|||
typedef struct SSyncRespMgr SSyncRespMgr;
|
||||
typedef struct SSyncSnapshotSender SSyncSnapshotSender;
|
||||
typedef struct SSyncSnapshotReceiver SSyncSnapshotReceiver;
|
||||
typedef struct SSyncTimer SSyncTimer;
|
||||
typedef struct SSyncHbTimerData SSyncHbTimerData;
|
||||
|
||||
extern bool gRaftDetailLog;
|
||||
|
||||
typedef struct SSyncHbTimerData {
|
||||
SSyncNode* pSyncNode;
|
||||
SSyncTimer* pTimer;
|
||||
SRaftId destId;
|
||||
uint64_t logicClock;
|
||||
} SSyncHbTimerData;
|
||||
|
||||
typedef struct SSyncTimer {
|
||||
void* pTimer;
|
||||
TAOS_TMR_CALLBACK timerCb;
|
||||
uint64_t logicClock;
|
||||
uint64_t counter;
|
||||
int32_t timerMS;
|
||||
SRaftId destId;
|
||||
void* pData;
|
||||
} SSyncTimer;
|
||||
|
||||
int32_t syncHbTimerInit(SSyncNode* pSyncNode, SSyncTimer* pSyncTimer, SRaftId destId);
|
||||
int32_t syncHbTimerStart(SSyncNode* pSyncNode, SSyncTimer* pSyncTimer);
|
||||
int32_t syncHbTimerStop(SSyncNode* pSyncNode, SSyncTimer* pSyncTimer);
|
||||
|
||||
typedef struct SPeerState {
|
||||
SyncIndex lastSendIndex;
|
||||
int64_t lastSendTime;
|
||||
} SPeerState;
|
||||
|
||||
typedef struct SSyncNode {
|
||||
// init by SSyncInfo
|
||||
SyncGroupId vgId;
|
||||
|
@ -73,6 +101,7 @@ typedef struct SSyncNode {
|
|||
const SMsgCb* msgcb;
|
||||
int32_t (*FpSendMsg)(const SEpSet* pEpSet, SRpcMsg* pMsg);
|
||||
int32_t (*FpEqMsg)(const SMsgCb* msgcb, SRpcMsg* pMsg);
|
||||
int32_t (*FpEqCtrlMsg)(const SMsgCb* msgcb, SRpcMsg* pMsg);
|
||||
|
||||
// init internal
|
||||
SNodeInfo myNodeInfo;
|
||||
|
@ -138,6 +167,9 @@ typedef struct SSyncNode {
|
|||
TAOS_TMR_CALLBACK FpHeartbeatTimerCB; // Timer Fp
|
||||
uint64_t heartbeatTimerCounter;
|
||||
|
||||
// peer heartbeat timer
|
||||
SSyncTimer peerHeartbeatTimerArr[TSDB_MAX_REPLICA];
|
||||
|
||||
// callback
|
||||
FpOnPingCb FpOnPing;
|
||||
FpOnPingReplyCb FpOnPingReply;
|
||||
|
@ -147,8 +179,8 @@ typedef struct SSyncNode {
|
|||
FpOnRequestVoteReplyCb FpOnRequestVoteReply;
|
||||
FpOnAppendEntriesCb FpOnAppendEntries;
|
||||
FpOnAppendEntriesReplyCb FpOnAppendEntriesReply;
|
||||
FpOnSnapshotSendCb FpOnSnapshotSend;
|
||||
FpOnSnapshotRspCb FpOnSnapshotRsp;
|
||||
FpOnSnapshotCb FpOnSnapshot;
|
||||
FpOnSnapshotReplyCb FpOnSnapshotReply;
|
||||
|
||||
// tools
|
||||
SSyncRespMgr* pSyncRespMgr;
|
||||
|
@ -159,9 +191,15 @@ typedef struct SSyncNode {
|
|||
SSyncSnapshotSender* senders[TSDB_MAX_REPLICA];
|
||||
SSyncSnapshotReceiver* pNewNodeReceiver;
|
||||
|
||||
SPeerState peerStates[TSDB_MAX_REPLICA];
|
||||
|
||||
// is config changing
|
||||
bool changing;
|
||||
|
||||
int64_t snapshottingIndex;
|
||||
int64_t snapshottingTime;
|
||||
int64_t minMatchIndex;
|
||||
|
||||
int64_t startTime;
|
||||
int64_t leaderTime;
|
||||
int64_t lastReplicateTime;
|
||||
|
@ -174,7 +212,6 @@ void syncNodeStart(SSyncNode* pSyncNode);
|
|||
void syncNodeStartStandBy(SSyncNode* pSyncNode);
|
||||
void syncNodeClose(SSyncNode* pSyncNode);
|
||||
int32_t syncNodePropose(SSyncNode* pSyncNode, SRpcMsg* pMsg, bool isWeak);
|
||||
int32_t syncNodeProposeBatch(SSyncNode* pSyncNode, SRpcMsg** pMsgPArr, bool* pIsWeakArr, int32_t arrSize);
|
||||
|
||||
// option
|
||||
bool syncNodeSnapshotEnable(SSyncNode* pSyncNode);
|
||||
|
@ -197,12 +234,8 @@ int32_t syncNodeRestartElectTimer(SSyncNode* pSyncNode, int32_t ms);
|
|||
int32_t syncNodeResetElectTimer(SSyncNode* pSyncNode);
|
||||
|
||||
int32_t syncNodeStartHeartbeatTimer(SSyncNode* pSyncNode);
|
||||
int32_t syncNodeStartHeartbeatTimerNow(SSyncNode* pSyncNode);
|
||||
int32_t syncNodeStartHeartbeatTimerMS(SSyncNode* pSyncNode, int32_t ms);
|
||||
int32_t syncNodeStopHeartbeatTimer(SSyncNode* pSyncNode);
|
||||
int32_t syncNodeRestartHeartbeatTimer(SSyncNode* pSyncNode);
|
||||
int32_t syncNodeRestartHeartbeatTimerNow(SSyncNode* pSyncNode);
|
||||
int32_t syncNodeRestartNowHeartbeatTimerMS(SSyncNode* pSyncNode, int32_t ms);
|
||||
|
||||
// utils --------------
|
||||
int32_t syncNodeSendMsgById(const SRaftId* destRaftId, SSyncNode* pSyncNode, SRpcMsg* pMsg);
|
||||
|
@ -214,6 +247,7 @@ void syncNodeErrorLog(const SSyncNode* pSyncNode, char* str);
|
|||
char* syncNode2SimpleStr(const SSyncNode* pSyncNode);
|
||||
bool syncNodeInConfig(SSyncNode* pSyncNode, const SSyncCfg* config);
|
||||
void syncNodeDoConfigChange(SSyncNode* pSyncNode, SSyncCfg* newConfig, SyncIndex lastConfigChangeIndex);
|
||||
SyncIndex syncMinMatchIndex(SSyncNode* pSyncNode);
|
||||
|
||||
SSyncNode* syncNodeAcquire(int64_t rid);
|
||||
void syncNodeRelease(SSyncNode* pNode);
|
||||
|
@ -221,6 +255,7 @@ void syncNodeRelease(SSyncNode* pNode);
|
|||
// raft state change --------------
|
||||
void syncNodeUpdateTerm(SSyncNode* pSyncNode, SyncTerm term);
|
||||
void syncNodeUpdateTermWithoutStepDown(SSyncNode* pSyncNode, SyncTerm term);
|
||||
void syncNodeStepDown(SSyncNode* pSyncNode, SyncTerm newTerm);
|
||||
void syncNodeBecomeFollower(SSyncNode* pSyncNode, const char* debugStr);
|
||||
void syncNodeBecomeLeader(SSyncNode* pSyncNode, const char* debugStr);
|
||||
|
||||
|
@ -240,21 +275,23 @@ void syncNodeMaybeUpdateCommitBySnapshot(SSyncNode* pSyncNode);
|
|||
SyncIndex syncNodeGetLastIndex(const SSyncNode* pSyncNode);
|
||||
SyncTerm syncNodeGetLastTerm(SSyncNode* pSyncNode);
|
||||
int32_t syncNodeGetLastIndexTerm(SSyncNode* pSyncNode, SyncIndex* pLastIndex, SyncTerm* pLastTerm);
|
||||
|
||||
SyncIndex syncNodeSyncStartIndex(SSyncNode* pSyncNode);
|
||||
|
||||
SyncIndex syncNodeGetPreIndex(SSyncNode* pSyncNode, SyncIndex index);
|
||||
SyncTerm syncNodeGetPreTerm(SSyncNode* pSyncNode, SyncIndex index);
|
||||
int32_t syncNodeGetPreIndexTerm(SSyncNode* pSyncNode, SyncIndex index, SyncIndex* pPreIndex, SyncTerm* pPreTerm);
|
||||
|
||||
bool syncNodeIsOptimizedOneReplica(SSyncNode* ths, SRpcMsg* pMsg);
|
||||
int32_t syncNodeCommit(SSyncNode* ths, SyncIndex beginIndex, SyncIndex endIndex, uint64_t flag);
|
||||
int32_t syncNodeDoCommit(SSyncNode* ths, SyncIndex beginIndex, SyncIndex endIndex, uint64_t flag);
|
||||
int32_t syncNodeFollowerCommit(SSyncNode* ths, SyncIndex newCommitIndex);
|
||||
int32_t syncNodePreCommit(SSyncNode* ths, SSyncRaftEntry* pEntry, int32_t code);
|
||||
|
||||
int32_t syncNodeUpdateNewConfigIndex(SSyncNode* ths, SSyncCfg* pNewCfg);
|
||||
|
||||
bool syncNodeInRaftGroup(SSyncNode* ths, SRaftId* pRaftId);
|
||||
SSyncSnapshotSender* syncNodeGetSnapshotSender(SSyncNode* ths, SRaftId* pDestId);
|
||||
SSyncTimer* syncNodeGetHbTimer(SSyncNode* ths, SRaftId* pDestId);
|
||||
SPeerState* syncNodeGetPeerState(SSyncNode* ths, const SRaftId* pDestId);
|
||||
bool syncNodeNeedSendAppendEntries(SSyncNode* ths, const SRaftId* pDestId, const SyncAppendEntries* pMsg);
|
||||
|
||||
int32_t syncGetSnapshotMeta(int64_t rid, struct SSnapshotMeta* sMeta);
|
||||
int32_t syncGetSnapshotMetaByIndex(int64_t rid, SyncIndex snapshotIndex, struct SSnapshotMeta* sMeta);
|
||||
|
@ -271,7 +308,12 @@ int32_t syncDoLeaderTransfer(SSyncNode* ths, SRpcMsg* pRpcMsg, SSyncRaftEntry* p
|
|||
|
||||
int32_t syncNodeDynamicQuorum(const SSyncNode* pSyncNode);
|
||||
|
||||
bool syncNodeIsMnode(SSyncNode* pSyncNode);
|
||||
int32_t syncNodePeerStateInit(SSyncNode* pSyncNode);
|
||||
|
||||
// trace log
|
||||
void syncLogRecvTimer(SSyncNode* pSyncNode, const SyncTimeout* pMsg, const char* s);
|
||||
|
||||
void syncLogSendRequestVote(SSyncNode* pSyncNode, const SyncRequestVote* pMsg, const char* s);
|
||||
void syncLogRecvRequestVote(SSyncNode* pSyncNode, const SyncRequestVote* pMsg, const char* s);
|
||||
|
||||
|
|
|
@ -45,8 +45,8 @@ int32_t raftCfgIndexClose(SRaftCfgIndex *pRaftCfgIndex);
|
|||
int32_t raftCfgIndexPersist(SRaftCfgIndex *pRaftCfgIndex);
|
||||
int32_t raftCfgIndexAddConfigIndex(SRaftCfgIndex *pRaftCfgIndex, SyncIndex configIndex);
|
||||
|
||||
cJSON *raftCfgIndex2Json(SRaftCfgIndex *pRaftCfgIndex);
|
||||
char *raftCfgIndex2Str(SRaftCfgIndex *pRaftCfgIndex);
|
||||
cJSON * raftCfgIndex2Json(SRaftCfgIndex *pRaftCfgIndex);
|
||||
char * raftCfgIndex2Str(SRaftCfgIndex *pRaftCfgIndex);
|
||||
int32_t raftCfgIndexFromJson(const cJSON *pRoot, SRaftCfgIndex *pRaftCfgIndex);
|
||||
int32_t raftCfgIndexFromStr(const char *s, SRaftCfgIndex *pRaftCfgIndex);
|
||||
|
||||
|
@ -73,14 +73,14 @@ int32_t raftCfgClose(SRaftCfg *pRaftCfg);
|
|||
int32_t raftCfgPersist(SRaftCfg *pRaftCfg);
|
||||
int32_t raftCfgAddConfigIndex(SRaftCfg *pRaftCfg, SyncIndex configIndex);
|
||||
|
||||
cJSON *syncCfg2Json(SSyncCfg *pSyncCfg);
|
||||
char *syncCfg2Str(SSyncCfg *pSyncCfg);
|
||||
char *syncCfg2SimpleStr(SSyncCfg *pSyncCfg);
|
||||
cJSON * syncCfg2Json(SSyncCfg *pSyncCfg);
|
||||
char * syncCfg2Str(SSyncCfg *pSyncCfg);
|
||||
char * syncCfg2SimpleStr(SSyncCfg *pSyncCfg);
|
||||
int32_t syncCfgFromJson(const cJSON *pRoot, SSyncCfg *pSyncCfg);
|
||||
int32_t syncCfgFromStr(const char *s, SSyncCfg *pSyncCfg);
|
||||
|
||||
cJSON *raftCfg2Json(SRaftCfg *pRaftCfg);
|
||||
char *raftCfg2Str(SRaftCfg *pRaftCfg);
|
||||
cJSON * raftCfg2Json(SRaftCfg *pRaftCfg);
|
||||
char * raftCfg2Str(SRaftCfg *pRaftCfg);
|
||||
int32_t raftCfgFromJson(const cJSON *pRoot, SRaftCfg *pRaftCfg);
|
||||
int32_t raftCfgFromStr(const char *s, SRaftCfg *pRaftCfg);
|
||||
|
||||
|
|
|
@ -50,16 +50,15 @@ extern "C" {
|
|||
// msource |-> i,
|
||||
// mdest |-> j])
|
||||
// /\ UNCHANGED <<serverVars, candidateVars, leaderVars, logVars>>
|
||||
//
|
||||
int32_t syncNodeAppendEntriesPeers(SSyncNode* pSyncNode);
|
||||
int32_t syncNodeAppendEntriesPeersSnapshot(SSyncNode* pSyncNode);
|
||||
int32_t syncNodeAppendEntriesPeersSnapshot2(SSyncNode* pSyncNode);
|
||||
|
||||
int32_t syncNodeAppendEntriesOnePeer(SSyncNode* pSyncNode, SRaftId* pDestId, SyncIndex nextIndex);
|
||||
int32_t syncNodeHeartbeatPeers(SSyncNode* pSyncNode);
|
||||
int32_t syncNodeSendHeartbeat(SSyncNode* pSyncNode, const SRaftId* pDestId, const SyncHeartbeat* pMsg);
|
||||
|
||||
int32_t syncNodeReplicate(SSyncNode* pSyncNode, bool isTimer);
|
||||
int32_t syncNodeAppendEntries(SSyncNode* pSyncNode, const SRaftId* destRaftId, const SyncAppendEntries* pMsg);
|
||||
int32_t syncNodeAppendEntriesBatch(SSyncNode* pSyncNode, const SRaftId* destRaftId, const SyncAppendEntriesBatch* pMsg);
|
||||
int32_t syncNodeReplicate(SSyncNode* pSyncNode);
|
||||
int32_t syncNodeReplicateOne(SSyncNode* pSyncNode, SRaftId* pDestId);
|
||||
|
||||
int32_t syncNodeSendAppendEntries(SSyncNode* pSyncNode, const SRaftId* pDestId, const SyncAppendEntries* pMsg);
|
||||
int32_t syncNodeMaybeSendAppendEntries(SSyncNode* pSyncNode, const SRaftId* pDestId, const SyncAppendEntries* pMsg);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
|
|
@ -49,8 +49,7 @@ extern "C" {
|
|||
// m)
|
||||
// /\ UNCHANGED <<state, currentTerm, candidateVars, leaderVars, logVars>>
|
||||
//
|
||||
int32_t syncNodeOnRequestVoteCb(SSyncNode* ths, SyncRequestVote* pMsg);
|
||||
int32_t syncNodeOnRequestVoteSnapshotCb(SSyncNode* ths, SyncRequestVote* pMsg);
|
||||
int32_t syncNodeOnRequestVote(SSyncNode* ths, SyncRequestVote* pMsg);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
|
|
@ -44,8 +44,7 @@ extern "C" {
|
|||
// /\ Discard(m)
|
||||
// /\ UNCHANGED <<serverVars, votedFor, leaderVars, logVars>>
|
||||
//
|
||||
int32_t syncNodeOnRequestVoteReplyCb(SSyncNode* ths, SyncRequestVoteReply* pMsg);
|
||||
int32_t syncNodeOnRequestVoteReplySnapshotCb(SSyncNode* ths, SyncRequestVoteReply* pMsg);
|
||||
int32_t syncNodeOnRequestVoteReply(SSyncNode* ths, SyncRequestVoteReply* pMsg);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
|
|
@ -32,9 +32,9 @@ typedef struct SRespStub {
|
|||
} SRespStub;
|
||||
|
||||
typedef struct SSyncRespMgr {
|
||||
SHashObj *pRespHash;
|
||||
SHashObj * pRespHash;
|
||||
int64_t ttl;
|
||||
void *data;
|
||||
void * data;
|
||||
TdThreadMutex mutex;
|
||||
uint64_t seqNum;
|
||||
} SSyncRespMgr;
|
||||
|
|
|
@ -51,6 +51,7 @@ typedef struct SSyncSnapshotSender {
|
|||
int32_t replicaIndex;
|
||||
SyncTerm term;
|
||||
SyncTerm privateTerm;
|
||||
int64_t startTime;
|
||||
bool finish;
|
||||
} SSyncSnapshotSender;
|
||||
|
||||
|
@ -67,6 +68,8 @@ cJSON *snapshotSender2Json(SSyncSnapshotSender *pSender);
|
|||
char *snapshotSender2Str(SSyncSnapshotSender *pSender);
|
||||
char *snapshotSender2SimpleStr(SSyncSnapshotSender *pSender, char *event);
|
||||
|
||||
int32_t syncNodeStartSnapshot(SSyncNode *pSyncNode, SRaftId *pDestId);
|
||||
|
||||
//---------------------------------------------------
|
||||
typedef struct SSyncSnapshotReceiver {
|
||||
bool start;
|
||||
|
@ -94,8 +97,8 @@ char *snapshotReceiver2SimpleStr(SSyncSnapshotReceiver *pReceiver, char *event)
|
|||
|
||||
//---------------------------------------------------
|
||||
// on message
|
||||
int32_t syncNodeOnSnapshotSendCb(SSyncNode *ths, SyncSnapshotSend *pMsg);
|
||||
int32_t syncNodeOnSnapshotRspCb(SSyncNode *ths, SyncSnapshotRsp *pMsg);
|
||||
int32_t syncNodeOnSnapshot(SSyncNode *ths, SyncSnapshotSend *pMsg);
|
||||
int32_t syncNodeOnSnapshotReply(SSyncNode *ths, SyncSnapshotRsp *pMsg);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
|
|
@ -39,7 +39,7 @@ extern "C" {
|
|||
// /\ voterLog' = [voterLog EXCEPT ![i] = [j \in {} |-> <<>>]]
|
||||
// /\ UNCHANGED <<messages, leaderVars, logVars>>
|
||||
//
|
||||
int32_t syncNodeOnTimeoutCb(SSyncNode* ths, SyncTimeout* pMsg);
|
||||
int32_t syncNodeOnTimer(SSyncNode* ths, SyncTimeout* pMsg);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
|
|
@ -89,240 +89,6 @@
|
|||
// /\ UNCHANGED <<candidateVars, leaderVars>>
|
||||
//
|
||||
|
||||
int32_t syncNodeOnAppendEntriesCb(SSyncNode* ths, SyncAppendEntries* pMsg) {
|
||||
int32_t ret = 0;
|
||||
|
||||
// if already drop replica, do not process
|
||||
if (!syncNodeInRaftGroup(ths, &(pMsg->srcId)) && !ths->pRaftCfg->isStandBy) {
|
||||
syncLogRecvAppendEntries(ths, pMsg, "maybe replica already dropped");
|
||||
return -1;
|
||||
}
|
||||
|
||||
// maybe update term
|
||||
if (pMsg->term > ths->pRaftStore->currentTerm) {
|
||||
syncNodeUpdateTerm(ths, pMsg->term);
|
||||
}
|
||||
ASSERT(pMsg->term <= ths->pRaftStore->currentTerm);
|
||||
|
||||
// reset elect timer
|
||||
if (pMsg->term == ths->pRaftStore->currentTerm) {
|
||||
ths->leaderCache = pMsg->srcId;
|
||||
syncNodeResetElectTimer(ths);
|
||||
}
|
||||
ASSERT(pMsg->dataLen >= 0);
|
||||
|
||||
// return to follower state
|
||||
if (pMsg->term == ths->pRaftStore->currentTerm && ths->state == TAOS_SYNC_STATE_CANDIDATE) {
|
||||
syncLogRecvAppendEntries(ths, pMsg, "candidate to follower");
|
||||
syncNodeBecomeFollower(ths, "from candidate by append entries");
|
||||
return -1; // ret or reply?
|
||||
}
|
||||
|
||||
SyncTerm localPreLogTerm = 0;
|
||||
if (pMsg->prevLogIndex >= SYNC_INDEX_BEGIN && pMsg->prevLogIndex <= ths->pLogStore->getLastIndex(ths->pLogStore)) {
|
||||
SSyncRaftEntry* pEntry = ths->pLogStore->getEntry(ths->pLogStore, pMsg->prevLogIndex);
|
||||
if (pEntry == NULL) {
|
||||
char logBuf[128];
|
||||
snprintf(logBuf, sizeof(logBuf), "getEntry error, index:%" PRId64 ", since %s", pMsg->prevLogIndex, terrstr());
|
||||
syncNodeErrorLog(ths, logBuf);
|
||||
return -1;
|
||||
}
|
||||
|
||||
localPreLogTerm = pEntry->term;
|
||||
syncEntryDestory(pEntry);
|
||||
}
|
||||
|
||||
bool logOK =
|
||||
(pMsg->prevLogIndex == SYNC_INDEX_INVALID) ||
|
||||
((pMsg->prevLogIndex >= SYNC_INDEX_BEGIN) &&
|
||||
(pMsg->prevLogIndex <= ths->pLogStore->getLastIndex(ths->pLogStore)) && (pMsg->prevLogTerm == localPreLogTerm));
|
||||
|
||||
// reject request
|
||||
if ((pMsg->term < ths->pRaftStore->currentTerm) ||
|
||||
((pMsg->term == ths->pRaftStore->currentTerm) && (ths->state == TAOS_SYNC_STATE_FOLLOWER) && !logOK)) {
|
||||
syncLogRecvAppendEntries(ths, pMsg, "reject");
|
||||
|
||||
SyncAppendEntriesReply* pReply = syncAppendEntriesReplyBuild(ths->vgId);
|
||||
pReply->srcId = ths->myRaftId;
|
||||
pReply->destId = pMsg->srcId;
|
||||
pReply->term = ths->pRaftStore->currentTerm;
|
||||
pReply->success = false;
|
||||
pReply->matchIndex = SYNC_INDEX_INVALID;
|
||||
pReply->startTime = ths->startTime;
|
||||
|
||||
// msg event log
|
||||
syncLogSendAppendEntriesReply(ths, pReply, "");
|
||||
|
||||
SRpcMsg rpcMsg;
|
||||
syncAppendEntriesReply2RpcMsg(pReply, &rpcMsg);
|
||||
syncNodeSendMsgById(&pReply->destId, ths, &rpcMsg);
|
||||
syncAppendEntriesReplyDestroy(pReply);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
// accept request
|
||||
if (pMsg->term == ths->pRaftStore->currentTerm && ths->state == TAOS_SYNC_STATE_FOLLOWER && logOK) {
|
||||
// preIndex = -1, or has preIndex entry in local log
|
||||
ASSERT(pMsg->prevLogIndex <= ths->pLogStore->getLastIndex(ths->pLogStore));
|
||||
|
||||
// has extra entries (> preIndex) in local log
|
||||
bool hasExtraEntries = pMsg->prevLogIndex < ths->pLogStore->getLastIndex(ths->pLogStore);
|
||||
|
||||
// has entries in SyncAppendEntries msg
|
||||
bool hasAppendEntries = pMsg->dataLen > 0;
|
||||
|
||||
syncLogRecvAppendEntries(ths, pMsg, "accept");
|
||||
|
||||
if (hasExtraEntries && hasAppendEntries) {
|
||||
// not conflict by default
|
||||
bool conflict = false;
|
||||
|
||||
SyncIndex extraIndex = pMsg->prevLogIndex + 1;
|
||||
SSyncRaftEntry* pExtraEntry = ths->pLogStore->getEntry(ths->pLogStore, extraIndex);
|
||||
if (pExtraEntry == NULL) {
|
||||
char logBuf[128];
|
||||
snprintf(logBuf, sizeof(logBuf), "getEntry error2, index:%" PRId64 ", since %s", extraIndex, terrstr());
|
||||
syncNodeErrorLog(ths, logBuf);
|
||||
return -1;
|
||||
}
|
||||
|
||||
SSyncRaftEntry* pAppendEntry = syncEntryDeserialize(pMsg->data, pMsg->dataLen);
|
||||
if (pAppendEntry == NULL) {
|
||||
syncNodeErrorLog(ths, "syncEntryDeserialize pAppendEntry error");
|
||||
return -1;
|
||||
}
|
||||
|
||||
// log not match, conflict
|
||||
ASSERT(extraIndex == pAppendEntry->index);
|
||||
if (pExtraEntry->term != pAppendEntry->term) {
|
||||
conflict = true;
|
||||
}
|
||||
|
||||
if (conflict) {
|
||||
// roll back
|
||||
SyncIndex delBegin = ths->pLogStore->getLastIndex(ths->pLogStore);
|
||||
SyncIndex delEnd = extraIndex;
|
||||
|
||||
sTrace("syncNodeOnAppendEntriesCb --> conflict:%d, delBegin:%" PRId64 ", delEnd:%" PRId64, conflict, delBegin,
|
||||
delEnd);
|
||||
|
||||
// notice! reverse roll back!
|
||||
for (SyncIndex index = delEnd; index >= delBegin; --index) {
|
||||
if (ths->pFsm->FpRollBackCb != NULL) {
|
||||
SSyncRaftEntry* pRollBackEntry = ths->pLogStore->getEntry(ths->pLogStore, index);
|
||||
if (pRollBackEntry == NULL) {
|
||||
char logBuf[128];
|
||||
snprintf(logBuf, sizeof(logBuf), "getEntry error3, index:%" PRId64 ", since %s", index, terrstr());
|
||||
syncNodeErrorLog(ths, logBuf);
|
||||
return -1;
|
||||
}
|
||||
|
||||
// if (pRollBackEntry->msgType != TDMT_SYNC_NOOP) {
|
||||
if (syncUtilUserRollback(pRollBackEntry->msgType)) {
|
||||
SRpcMsg rpcMsg;
|
||||
syncEntry2OriginalRpc(pRollBackEntry, &rpcMsg);
|
||||
|
||||
SFsmCbMeta cbMeta = {0};
|
||||
cbMeta.index = pRollBackEntry->index;
|
||||
cbMeta.lastConfigIndex = syncNodeGetSnapshotConfigIndex(ths, cbMeta.index);
|
||||
cbMeta.isWeak = pRollBackEntry->isWeak;
|
||||
cbMeta.code = 0;
|
||||
cbMeta.state = ths->state;
|
||||
cbMeta.seqNum = pRollBackEntry->seqNum;
|
||||
ths->pFsm->FpRollBackCb(ths->pFsm, &rpcMsg, cbMeta);
|
||||
rpcFreeCont(rpcMsg.pCont);
|
||||
}
|
||||
|
||||
syncEntryDestory(pRollBackEntry);
|
||||
}
|
||||
}
|
||||
|
||||
// delete confict entries
|
||||
ths->pLogStore->truncate(ths->pLogStore, extraIndex);
|
||||
|
||||
// append new entries
|
||||
ths->pLogStore->appendEntry(ths->pLogStore, pAppendEntry);
|
||||
|
||||
// pre commit
|
||||
syncNodePreCommit(ths, pAppendEntry, 0);
|
||||
}
|
||||
|
||||
// free memory
|
||||
syncEntryDestory(pExtraEntry);
|
||||
syncEntryDestory(pAppendEntry);
|
||||
|
||||
} else if (hasExtraEntries && !hasAppendEntries) {
|
||||
// do nothing
|
||||
|
||||
} else if (!hasExtraEntries && hasAppendEntries) {
|
||||
SSyncRaftEntry* pAppendEntry = syncEntryDeserialize(pMsg->data, pMsg->dataLen);
|
||||
if (pAppendEntry == NULL) {
|
||||
syncNodeErrorLog(ths, "syncEntryDeserialize pAppendEntry2 error");
|
||||
return -1;
|
||||
}
|
||||
|
||||
// append new entries
|
||||
ths->pLogStore->appendEntry(ths->pLogStore, pAppendEntry);
|
||||
|
||||
// pre commit
|
||||
syncNodePreCommit(ths, pAppendEntry, 0);
|
||||
|
||||
// free memory
|
||||
syncEntryDestory(pAppendEntry);
|
||||
|
||||
} else if (!hasExtraEntries && !hasAppendEntries) {
|
||||
// do nothing
|
||||
|
||||
} else {
|
||||
syncNodeLog3("", ths);
|
||||
ASSERT(0);
|
||||
}
|
||||
|
||||
SyncAppendEntriesReply* pReply = syncAppendEntriesReplyBuild(ths->vgId);
|
||||
pReply->srcId = ths->myRaftId;
|
||||
pReply->destId = pMsg->srcId;
|
||||
pReply->term = ths->pRaftStore->currentTerm;
|
||||
pReply->success = true;
|
||||
|
||||
if (hasAppendEntries) {
|
||||
pReply->matchIndex = pMsg->prevLogIndex + 1;
|
||||
} else {
|
||||
pReply->matchIndex = pMsg->prevLogIndex;
|
||||
}
|
||||
|
||||
pReply->startTime = ths->startTime;
|
||||
|
||||
// msg event log
|
||||
syncLogSendAppendEntriesReply(ths, pReply, "");
|
||||
|
||||
SRpcMsg rpcMsg;
|
||||
syncAppendEntriesReply2RpcMsg(pReply, &rpcMsg);
|
||||
syncNodeSendMsgById(&pReply->destId, ths, &rpcMsg);
|
||||
syncAppendEntriesReplyDestroy(pReply);
|
||||
|
||||
// maybe update commit index from leader
|
||||
if (pMsg->commitIndex > ths->commitIndex) {
|
||||
// has commit entry in local
|
||||
if (pMsg->commitIndex <= ths->pLogStore->getLastIndex(ths->pLogStore)) {
|
||||
SyncIndex beginIndex = ths->commitIndex + 1;
|
||||
SyncIndex endIndex = pMsg->commitIndex;
|
||||
|
||||
// update commit index
|
||||
ths->commitIndex = pMsg->commitIndex;
|
||||
|
||||
// call back Wal
|
||||
ths->pLogStore->updateCommitIndex(ths->pLogStore, ths->commitIndex);
|
||||
|
||||
int32_t code = syncNodeCommit(ths, beginIndex, endIndex, ths->state);
|
||||
ASSERT(code == 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int32_t syncNodeMakeLogSame(SSyncNode* ths, SyncAppendEntries* pMsg) {
|
||||
int32_t code;
|
||||
|
||||
|
@ -408,7 +174,7 @@ static int32_t syncNodeDoMakeLogSame(SSyncNode* ths, SyncIndex FromIndex) {
|
|||
|
||||
do {
|
||||
char logBuf[128];
|
||||
snprintf(logBuf, sizeof(logBuf), "update delete begin to %" PRId64, delBegin);
|
||||
snprintf(logBuf, sizeof(logBuf), "update delete begin to %ld", delBegin);
|
||||
syncNodeEventLog(ths, logBuf);
|
||||
} while (0);
|
||||
}
|
||||
|
@ -419,8 +185,7 @@ static int32_t syncNodeDoMakeLogSame(SSyncNode* ths, SyncIndex FromIndex) {
|
|||
|
||||
do {
|
||||
char logBuf[128];
|
||||
snprintf(logBuf, sizeof(logBuf), "make log same from:%" PRId64 ", delbegin:%" PRId64 ", pass:%d", FromIndex,
|
||||
delBegin, pass);
|
||||
snprintf(logBuf, sizeof(logBuf), "make log same from:%ld, delbegin:%ld, pass:%d", FromIndex, delBegin, pass);
|
||||
syncNodeEventLog(ths, logBuf);
|
||||
} while (0);
|
||||
|
||||
|
@ -505,513 +270,11 @@ static bool syncNodeOnAppendEntriesLogOK(SSyncNode* pSyncNode, SyncAppendEntries
|
|||
return false;
|
||||
}
|
||||
|
||||
int32_t syncNodeOnAppendEntriesSnapshot2Cb(SSyncNode* ths, SyncAppendEntriesBatch* pMsg) {
|
||||
int32_t ret = 0;
|
||||
int32_t code = 0;
|
||||
|
||||
// if already drop replica, do not process
|
||||
if (!syncNodeInRaftGroup(ths, &(pMsg->srcId)) && !ths->pRaftCfg->isStandBy) {
|
||||
syncLogRecvAppendEntriesBatch(ths, pMsg, "maybe replica already dropped");
|
||||
return -1;
|
||||
}
|
||||
|
||||
// maybe update term
|
||||
if (pMsg->term > ths->pRaftStore->currentTerm) {
|
||||
syncNodeUpdateTerm(ths, pMsg->term);
|
||||
}
|
||||
ASSERT(pMsg->term <= ths->pRaftStore->currentTerm);
|
||||
|
||||
// reset elect timer
|
||||
if (pMsg->term == ths->pRaftStore->currentTerm) {
|
||||
ths->leaderCache = pMsg->srcId;
|
||||
syncNodeResetElectTimer(ths);
|
||||
}
|
||||
ASSERT(pMsg->dataLen >= 0);
|
||||
|
||||
// candidate to follower
|
||||
//
|
||||
// operation:
|
||||
// to follower
|
||||
do {
|
||||
bool condition = pMsg->term == ths->pRaftStore->currentTerm && ths->state == TAOS_SYNC_STATE_CANDIDATE;
|
||||
if (condition) {
|
||||
syncLogRecvAppendEntriesBatch(ths, pMsg, "candidate to follower");
|
||||
syncNodeBecomeFollower(ths, "from candidate by append entries");
|
||||
return 0; // do not reply?
|
||||
}
|
||||
} while (0);
|
||||
|
||||
// fake match
|
||||
//
|
||||
// condition1:
|
||||
// preIndex <= my commit index
|
||||
//
|
||||
// operation:
|
||||
// if hasAppendEntries && pMsg->prevLogIndex == ths->commitIndex, append entry
|
||||
// match my-commit-index or my-commit-index + batchSize
|
||||
do {
|
||||
bool condition = (pMsg->term == ths->pRaftStore->currentTerm) && (ths->state == TAOS_SYNC_STATE_FOLLOWER) &&
|
||||
(pMsg->prevLogIndex <= ths->commitIndex);
|
||||
if (condition) {
|
||||
syncLogRecvAppendEntriesBatch(ths, pMsg, "fake match");
|
||||
|
||||
SyncIndex matchIndex = ths->commitIndex;
|
||||
bool hasAppendEntries = pMsg->dataLen > 0;
|
||||
SOffsetAndContLen* metaTableArr = syncAppendEntriesBatchMetaTableArray(pMsg);
|
||||
|
||||
if (hasAppendEntries && pMsg->prevLogIndex == ths->commitIndex) {
|
||||
int32_t pass = 0;
|
||||
SyncIndex logLastIndex = ths->pLogStore->syncLogLastIndex(ths->pLogStore);
|
||||
bool hasExtraEntries = logLastIndex > pMsg->prevLogIndex;
|
||||
|
||||
// make log same
|
||||
if (hasExtraEntries) {
|
||||
// make log same, rollback deleted entries
|
||||
pass = syncNodeDoMakeLogSame(ths, pMsg->prevLogIndex + 1);
|
||||
ASSERT(pass >= 0);
|
||||
}
|
||||
|
||||
// append entry batch
|
||||
if (pass == 0) {
|
||||
// assert! no batch
|
||||
ASSERT(pMsg->dataCount <= 1);
|
||||
|
||||
for (int32_t i = 0; i < pMsg->dataCount; ++i) {
|
||||
SSyncRaftEntry* pAppendEntry = (SSyncRaftEntry*)(pMsg->data + metaTableArr[i].offset);
|
||||
code = ths->pLogStore->syncLogAppendEntry(ths->pLogStore, pAppendEntry);
|
||||
if (code != 0) {
|
||||
sError("vgId:%d, failed to append log entry since %s", ths->vgId, tstrerror(terrno));
|
||||
return -1;
|
||||
}
|
||||
|
||||
code = syncNodePreCommit(ths, pAppendEntry, 0);
|
||||
ASSERT(code == 0);
|
||||
|
||||
// syncEntryDestory(pAppendEntry);
|
||||
}
|
||||
}
|
||||
|
||||
// fsync once
|
||||
SSyncLogStoreData* pData = ths->pLogStore->data;
|
||||
SWal* pWal = pData->pWal;
|
||||
walFsync(pWal, false);
|
||||
|
||||
// update match index
|
||||
matchIndex = pMsg->prevLogIndex + pMsg->dataCount;
|
||||
}
|
||||
|
||||
// prepare response msg
|
||||
SyncAppendEntriesReply* pReply = syncAppendEntriesReplyBuild(ths->vgId);
|
||||
pReply->srcId = ths->myRaftId;
|
||||
pReply->destId = pMsg->srcId;
|
||||
pReply->term = ths->pRaftStore->currentTerm;
|
||||
pReply->privateTerm = ths->pNewNodeReceiver->privateTerm;
|
||||
pReply->success = true;
|
||||
pReply->matchIndex = matchIndex;
|
||||
pReply->startTime = ths->startTime;
|
||||
|
||||
// msg event log
|
||||
syncLogSendAppendEntriesReply(ths, pReply, "");
|
||||
|
||||
// send response
|
||||
SRpcMsg rpcMsg;
|
||||
syncAppendEntriesReply2RpcMsg(pReply, &rpcMsg);
|
||||
syncNodeSendMsgById(&pReply->destId, ths, &rpcMsg);
|
||||
syncAppendEntriesReplyDestroy(pReply);
|
||||
|
||||
return 0;
|
||||
}
|
||||
} while (0);
|
||||
|
||||
// calculate logOK here, before will coredump, due to fake match
|
||||
bool logOK = syncNodeOnAppendEntriesBatchLogOK(ths, pMsg);
|
||||
|
||||
// not match
|
||||
//
|
||||
// condition1:
|
||||
// term < myTerm
|
||||
//
|
||||
// condition2:
|
||||
// !logOK
|
||||
//
|
||||
// operation:
|
||||
// not match
|
||||
// no operation on log
|
||||
do {
|
||||
bool condition1 = pMsg->term < ths->pRaftStore->currentTerm;
|
||||
bool condition2 =
|
||||
(pMsg->term == ths->pRaftStore->currentTerm) && (ths->state == TAOS_SYNC_STATE_FOLLOWER) && !logOK;
|
||||
bool condition = condition1 || condition2;
|
||||
|
||||
if (condition) {
|
||||
syncLogRecvAppendEntriesBatch(ths, pMsg, "not match");
|
||||
|
||||
// maybe update commit index by snapshot
|
||||
syncNodeMaybeUpdateCommitBySnapshot(ths);
|
||||
|
||||
// prepare response msg
|
||||
SyncAppendEntriesReply* pReply = syncAppendEntriesReplyBuild(ths->vgId);
|
||||
pReply->srcId = ths->myRaftId;
|
||||
pReply->destId = pMsg->srcId;
|
||||
pReply->term = ths->pRaftStore->currentTerm;
|
||||
pReply->privateTerm = ths->pNewNodeReceiver->privateTerm;
|
||||
pReply->success = false;
|
||||
pReply->matchIndex = ths->commitIndex;
|
||||
pReply->startTime = ths->startTime;
|
||||
|
||||
// msg event log
|
||||
syncLogSendAppendEntriesReply(ths, pReply, "");
|
||||
|
||||
// send response
|
||||
SRpcMsg rpcMsg;
|
||||
syncAppendEntriesReply2RpcMsg(pReply, &rpcMsg);
|
||||
syncNodeSendMsgById(&pReply->destId, ths, &rpcMsg);
|
||||
syncAppendEntriesReplyDestroy(pReply);
|
||||
|
||||
return 0;
|
||||
}
|
||||
} while (0);
|
||||
|
||||
// really match
|
||||
//
|
||||
// condition:
|
||||
// logOK
|
||||
//
|
||||
// operation:
|
||||
// match
|
||||
// make log same
|
||||
do {
|
||||
bool condition = (pMsg->term == ths->pRaftStore->currentTerm) && (ths->state == TAOS_SYNC_STATE_FOLLOWER) && logOK;
|
||||
if (condition) {
|
||||
// has extra entries (> preIndex) in local log
|
||||
SyncIndex myLastIndex = syncNodeGetLastIndex(ths);
|
||||
bool hasExtraEntries = myLastIndex > pMsg->prevLogIndex;
|
||||
|
||||
// has entries in SyncAppendEntries msg
|
||||
bool hasAppendEntries = pMsg->dataLen > 0;
|
||||
SOffsetAndContLen* metaTableArr = syncAppendEntriesBatchMetaTableArray(pMsg);
|
||||
|
||||
syncLogRecvAppendEntriesBatch(ths, pMsg, "really match");
|
||||
|
||||
int32_t pass = 0;
|
||||
|
||||
if (hasExtraEntries) {
|
||||
// make log same, rollback deleted entries
|
||||
pass = syncNodeDoMakeLogSame(ths, pMsg->prevLogIndex + 1);
|
||||
ASSERT(pass >= 0);
|
||||
}
|
||||
|
||||
if (hasAppendEntries) {
|
||||
// append entry batch
|
||||
if (pass == 0) {
|
||||
// assert! no batch
|
||||
ASSERT(pMsg->dataCount <= 1);
|
||||
|
||||
// append entry batch
|
||||
for (int32_t i = 0; i < pMsg->dataCount; ++i) {
|
||||
SSyncRaftEntry* pAppendEntry = (SSyncRaftEntry*)(pMsg->data + metaTableArr[i].offset);
|
||||
code = ths->pLogStore->syncLogAppendEntry(ths->pLogStore, pAppendEntry);
|
||||
if (code != 0) {
|
||||
sError("vgId:%d, failed to append log entry since %s", ths->vgId, tstrerror(terrno));
|
||||
return -1;
|
||||
}
|
||||
|
||||
code = syncNodePreCommit(ths, pAppendEntry, 0);
|
||||
ASSERT(code == 0);
|
||||
|
||||
// syncEntryDestory(pAppendEntry);
|
||||
}
|
||||
}
|
||||
|
||||
// fsync once
|
||||
SSyncLogStoreData* pData = ths->pLogStore->data;
|
||||
SWal* pWal = pData->pWal;
|
||||
walFsync(pWal, false);
|
||||
}
|
||||
|
||||
// prepare response msg
|
||||
SyncAppendEntriesReply* pReply = syncAppendEntriesReplyBuild(ths->vgId);
|
||||
pReply->srcId = ths->myRaftId;
|
||||
pReply->destId = pMsg->srcId;
|
||||
pReply->term = ths->pRaftStore->currentTerm;
|
||||
pReply->privateTerm = ths->pNewNodeReceiver->privateTerm;
|
||||
pReply->success = true;
|
||||
pReply->matchIndex = hasAppendEntries ? pMsg->prevLogIndex + pMsg->dataCount : pMsg->prevLogIndex;
|
||||
pReply->startTime = ths->startTime;
|
||||
|
||||
// msg event log
|
||||
syncLogSendAppendEntriesReply(ths, pReply, "");
|
||||
|
||||
// send response
|
||||
SRpcMsg rpcMsg;
|
||||
syncAppendEntriesReply2RpcMsg(pReply, &rpcMsg);
|
||||
syncNodeSendMsgById(&pReply->destId, ths, &rpcMsg);
|
||||
syncAppendEntriesReplyDestroy(pReply);
|
||||
|
||||
int32_t syncNodeFollowerCommit(SSyncNode* ths, SyncIndex newCommitIndex) {
|
||||
// maybe update commit index, leader notice me
|
||||
if (pMsg->commitIndex > ths->commitIndex) {
|
||||
SyncIndex lastIndex = ths->pLogStore->syncLogLastIndex(ths->pLogStore);
|
||||
|
||||
SyncIndex beginIndex = 0;
|
||||
SyncIndex endIndex = -1;
|
||||
|
||||
if (newCommitIndex > ths->commitIndex) {
|
||||
// has commit entry in local
|
||||
if (pMsg->commitIndex <= lastIndex) {
|
||||
beginIndex = ths->commitIndex + 1;
|
||||
endIndex = pMsg->commitIndex;
|
||||
|
||||
// update commit index
|
||||
ths->commitIndex = pMsg->commitIndex;
|
||||
|
||||
// call back Wal
|
||||
code = ths->pLogStore->updateCommitIndex(ths->pLogStore, ths->commitIndex);
|
||||
ASSERT(code == 0);
|
||||
|
||||
} else if (pMsg->commitIndex > lastIndex && ths->commitIndex < lastIndex) {
|
||||
beginIndex = ths->commitIndex + 1;
|
||||
endIndex = lastIndex;
|
||||
|
||||
// update commit index, speed up
|
||||
ths->commitIndex = lastIndex;
|
||||
|
||||
// call back Wal
|
||||
code = ths->pLogStore->updateCommitIndex(ths->pLogStore, ths->commitIndex);
|
||||
ASSERT(code == 0);
|
||||
}
|
||||
|
||||
code = syncNodeCommit(ths, beginIndex, endIndex, ths->state);
|
||||
ASSERT(code == 0);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
} while (0);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int32_t syncNodeOnAppendEntriesSnapshotCb(SSyncNode* ths, SyncAppendEntries* pMsg) {
|
||||
int32_t ret = 0;
|
||||
int32_t code = 0;
|
||||
|
||||
// if already drop replica, do not process
|
||||
if (!syncNodeInRaftGroup(ths, &(pMsg->srcId)) && !ths->pRaftCfg->isStandBy) {
|
||||
syncLogRecvAppendEntries(ths, pMsg, "maybe replica already dropped");
|
||||
return -1;
|
||||
}
|
||||
|
||||
// maybe update term
|
||||
if (pMsg->term > ths->pRaftStore->currentTerm) {
|
||||
syncNodeUpdateTerm(ths, pMsg->term);
|
||||
}
|
||||
ASSERT(pMsg->term <= ths->pRaftStore->currentTerm);
|
||||
|
||||
// reset elect timer
|
||||
if (pMsg->term == ths->pRaftStore->currentTerm) {
|
||||
ths->leaderCache = pMsg->srcId;
|
||||
syncNodeResetElectTimer(ths);
|
||||
}
|
||||
ASSERT(pMsg->dataLen >= 0);
|
||||
|
||||
// candidate to follower
|
||||
//
|
||||
// operation:
|
||||
// to follower
|
||||
do {
|
||||
bool condition = pMsg->term == ths->pRaftStore->currentTerm && ths->state == TAOS_SYNC_STATE_CANDIDATE;
|
||||
if (condition) {
|
||||
syncLogRecvAppendEntries(ths, pMsg, "candidate to follower");
|
||||
syncNodeBecomeFollower(ths, "from candidate by append entries");
|
||||
return 0; // do not reply?
|
||||
}
|
||||
} while (0);
|
||||
|
||||
// fake match
|
||||
//
|
||||
// condition1:
|
||||
// preIndex <= my commit index
|
||||
//
|
||||
// operation:
|
||||
// if hasAppendEntries && pMsg->prevLogIndex == ths->commitIndex, append entry
|
||||
// match my-commit-index or my-commit-index + 1
|
||||
// no operation on log
|
||||
do {
|
||||
bool condition = (pMsg->term == ths->pRaftStore->currentTerm) && (ths->state == TAOS_SYNC_STATE_FOLLOWER) &&
|
||||
(pMsg->prevLogIndex <= ths->commitIndex);
|
||||
if (condition) {
|
||||
syncLogRecvAppendEntries(ths, pMsg, "fake match");
|
||||
|
||||
SyncIndex matchIndex = ths->commitIndex;
|
||||
bool hasAppendEntries = pMsg->dataLen > 0;
|
||||
if (hasAppendEntries && pMsg->prevLogIndex == ths->commitIndex) {
|
||||
// append entry
|
||||
SSyncRaftEntry* pAppendEntry = syncEntryDeserialize(pMsg->data, pMsg->dataLen);
|
||||
ASSERT(pAppendEntry != NULL);
|
||||
|
||||
{
|
||||
// has extra entries (> preIndex) in local log
|
||||
SyncIndex logLastIndex = ths->pLogStore->syncLogLastIndex(ths->pLogStore);
|
||||
bool hasExtraEntries = logLastIndex > pMsg->prevLogIndex;
|
||||
|
||||
if (hasExtraEntries) {
|
||||
// make log same, rollback deleted entries
|
||||
code = syncNodeMakeLogSame(ths, pMsg);
|
||||
ASSERT(code == 0);
|
||||
}
|
||||
}
|
||||
|
||||
code = ths->pLogStore->syncLogAppendEntry(ths->pLogStore, pAppendEntry);
|
||||
if (code != 0) {
|
||||
sError("vgId:%d, failed to append log entry since %s", ths->vgId, tstrerror(terrno));
|
||||
return -1;
|
||||
}
|
||||
|
||||
// pre commit
|
||||
code = syncNodePreCommit(ths, pAppendEntry, 0);
|
||||
ASSERT(code == 0);
|
||||
|
||||
// update match index
|
||||
matchIndex = pMsg->prevLogIndex + 1;
|
||||
|
||||
syncEntryDestory(pAppendEntry);
|
||||
}
|
||||
|
||||
// prepare response msg
|
||||
SyncAppendEntriesReply* pReply = syncAppendEntriesReplyBuild(ths->vgId);
|
||||
pReply->srcId = ths->myRaftId;
|
||||
pReply->destId = pMsg->srcId;
|
||||
pReply->term = ths->pRaftStore->currentTerm;
|
||||
pReply->privateTerm = ths->pNewNodeReceiver->privateTerm;
|
||||
pReply->success = true;
|
||||
pReply->matchIndex = matchIndex;
|
||||
pReply->startTime = ths->startTime;
|
||||
|
||||
// msg event log
|
||||
syncLogSendAppendEntriesReply(ths, pReply, "");
|
||||
|
||||
// send response
|
||||
SRpcMsg rpcMsg;
|
||||
syncAppendEntriesReply2RpcMsg(pReply, &rpcMsg);
|
||||
syncNodeSendMsgById(&pReply->destId, ths, &rpcMsg);
|
||||
syncAppendEntriesReplyDestroy(pReply);
|
||||
|
||||
return ret;
|
||||
}
|
||||
} while (0);
|
||||
|
||||
// calculate logOK here, before will coredump, due to fake match
|
||||
bool logOK = syncNodeOnAppendEntriesLogOK(ths, pMsg);
|
||||
|
||||
// not match
|
||||
//
|
||||
// condition1:
|
||||
// term < myTerm
|
||||
//
|
||||
// condition2:
|
||||
// !logOK
|
||||
//
|
||||
// operation:
|
||||
// not match
|
||||
// no operation on log
|
||||
do {
|
||||
bool condition1 = pMsg->term < ths->pRaftStore->currentTerm;
|
||||
bool condition2 =
|
||||
(pMsg->term == ths->pRaftStore->currentTerm) && (ths->state == TAOS_SYNC_STATE_FOLLOWER) && !logOK;
|
||||
bool condition = condition1 || condition2;
|
||||
|
||||
if (condition) {
|
||||
syncLogRecvAppendEntries(ths, pMsg, "not match");
|
||||
|
||||
// prepare response msg
|
||||
SyncAppendEntriesReply* pReply = syncAppendEntriesReplyBuild(ths->vgId);
|
||||
pReply->srcId = ths->myRaftId;
|
||||
pReply->destId = pMsg->srcId;
|
||||
pReply->term = ths->pRaftStore->currentTerm;
|
||||
pReply->privateTerm = ths->pNewNodeReceiver->privateTerm;
|
||||
pReply->success = false;
|
||||
pReply->matchIndex = SYNC_INDEX_INVALID;
|
||||
pReply->startTime = ths->startTime;
|
||||
|
||||
// msg event log
|
||||
syncLogSendAppendEntriesReply(ths, pReply, "");
|
||||
|
||||
// send response
|
||||
SRpcMsg rpcMsg;
|
||||
syncAppendEntriesReply2RpcMsg(pReply, &rpcMsg);
|
||||
syncNodeSendMsgById(&pReply->destId, ths, &rpcMsg);
|
||||
syncAppendEntriesReplyDestroy(pReply);
|
||||
|
||||
return ret;
|
||||
}
|
||||
} while (0);
|
||||
|
||||
// really match
|
||||
//
|
||||
// condition:
|
||||
// logOK
|
||||
//
|
||||
// operation:
|
||||
// match
|
||||
// make log same
|
||||
do {
|
||||
bool condition = (pMsg->term == ths->pRaftStore->currentTerm) && (ths->state == TAOS_SYNC_STATE_FOLLOWER) && logOK;
|
||||
if (condition) {
|
||||
// has extra entries (> preIndex) in local log
|
||||
SyncIndex myLastIndex = syncNodeGetLastIndex(ths);
|
||||
bool hasExtraEntries = myLastIndex > pMsg->prevLogIndex;
|
||||
|
||||
// has entries in SyncAppendEntries msg
|
||||
bool hasAppendEntries = pMsg->dataLen > 0;
|
||||
|
||||
syncLogRecvAppendEntries(ths, pMsg, "really match");
|
||||
|
||||
if (hasExtraEntries) {
|
||||
// make log same, rollback deleted entries
|
||||
code = syncNodeMakeLogSame(ths, pMsg);
|
||||
ASSERT(code == 0);
|
||||
}
|
||||
|
||||
if (hasAppendEntries) {
|
||||
// append entry
|
||||
SSyncRaftEntry* pAppendEntry = syncEntryDeserialize(pMsg->data, pMsg->dataLen);
|
||||
ASSERT(pAppendEntry != NULL);
|
||||
|
||||
code = ths->pLogStore->syncLogAppendEntry(ths->pLogStore, pAppendEntry);
|
||||
if (code != 0) {
|
||||
sError("vgId:%d, failed to append log entry since %s", ths->vgId, tstrerror(terrno));
|
||||
return -1;
|
||||
}
|
||||
|
||||
// pre commit
|
||||
code = syncNodePreCommit(ths, pAppendEntry, 0);
|
||||
ASSERT(code == 0);
|
||||
|
||||
syncEntryDestory(pAppendEntry);
|
||||
}
|
||||
|
||||
// prepare response msg
|
||||
SyncAppendEntriesReply* pReply = syncAppendEntriesReplyBuild(ths->vgId);
|
||||
pReply->srcId = ths->myRaftId;
|
||||
pReply->destId = pMsg->srcId;
|
||||
pReply->term = ths->pRaftStore->currentTerm;
|
||||
pReply->privateTerm = ths->pNewNodeReceiver->privateTerm;
|
||||
pReply->success = true;
|
||||
pReply->matchIndex = hasAppendEntries ? pMsg->prevLogIndex + 1 : pMsg->prevLogIndex;
|
||||
pReply->startTime = ths->startTime;
|
||||
|
||||
// msg event log
|
||||
syncLogSendAppendEntriesReply(ths, pReply, "");
|
||||
|
||||
// send response
|
||||
SRpcMsg rpcMsg;
|
||||
syncAppendEntriesReply2RpcMsg(pReply, &rpcMsg);
|
||||
syncNodeSendMsgById(&pReply->destId, ths, &rpcMsg);
|
||||
syncAppendEntriesReplyDestroy(pReply);
|
||||
|
||||
// maybe update commit index, leader notice me
|
||||
if (pMsg->commitIndex > ths->commitIndex) {
|
||||
// has commit entry in local
|
||||
if (pMsg->commitIndex <= ths->pLogStore->syncLogLastIndex(ths->pLogStore)) {
|
||||
if (newCommitIndex <= ths->pLogStore->syncLogLastIndex(ths->pLogStore)) {
|
||||
// advance commit index to sanpshot first
|
||||
SSnapshot snapshot;
|
||||
ths->pFsm->FpGetSnapshotInfo(ths->pFsm, &snapshot);
|
||||
|
@ -1021,28 +284,208 @@ int32_t syncNodeOnAppendEntriesSnapshotCb(SSyncNode* ths, SyncAppendEntries* pMs
|
|||
ths->commitIndex = snapshot.lastApplyIndex;
|
||||
|
||||
char eventLog[128];
|
||||
snprintf(eventLog, sizeof(eventLog), "commit by snapshot from index:%" PRId64 " to index:%" PRId64,
|
||||
commitBegin, commitEnd);
|
||||
snprintf(eventLog, sizeof(eventLog), "commit by snapshot from index:%" PRId64 " to index:%" PRId64, commitBegin,
|
||||
commitEnd);
|
||||
syncNodeEventLog(ths, eventLog);
|
||||
}
|
||||
|
||||
SyncIndex beginIndex = ths->commitIndex + 1;
|
||||
SyncIndex endIndex = pMsg->commitIndex;
|
||||
SyncIndex endIndex = newCommitIndex;
|
||||
|
||||
// update commit index
|
||||
ths->commitIndex = pMsg->commitIndex;
|
||||
ths->commitIndex = newCommitIndex;
|
||||
|
||||
// call back Wal
|
||||
code = ths->pLogStore->updateCommitIndex(ths->pLogStore, ths->commitIndex);
|
||||
int32_t code = ths->pLogStore->syncLogUpdateCommitIndex(ths->pLogStore, ths->commitIndex);
|
||||
ASSERT(code == 0);
|
||||
|
||||
code = syncNodeCommit(ths, beginIndex, endIndex, ths->state);
|
||||
code = syncNodeDoCommit(ths, beginIndex, endIndex, ths->state);
|
||||
ASSERT(code == 0);
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
} while (0);
|
||||
|
||||
return ret;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int32_t syncNodeOnAppendEntries(SSyncNode* ths, SyncAppendEntries* pMsg) {
|
||||
// if already drop replica, do not process
|
||||
if (!syncNodeInRaftGroup(ths, &(pMsg->srcId)) && !ths->pRaftCfg->isStandBy) {
|
||||
syncLogRecvAppendEntries(ths, pMsg, "ignore, maybe replica already dropped");
|
||||
goto _IGNORE;
|
||||
}
|
||||
|
||||
// prepare response msg
|
||||
SyncAppendEntriesReply* pReply = syncAppendEntriesReplyBuild(ths->vgId);
|
||||
pReply->srcId = ths->myRaftId;
|
||||
pReply->destId = pMsg->srcId;
|
||||
pReply->term = ths->pRaftStore->currentTerm;
|
||||
pReply->success = false;
|
||||
// pReply->matchIndex = ths->pLogStore->syncLogLastIndex(ths->pLogStore);
|
||||
pReply->matchIndex = SYNC_INDEX_INVALID;
|
||||
pReply->lastSendIndex = pMsg->prevLogIndex + 1;
|
||||
pReply->privateTerm = ths->pNewNodeReceiver->privateTerm;
|
||||
pReply->startTime = ths->startTime;
|
||||
|
||||
if (pMsg->term < ths->pRaftStore->currentTerm) {
|
||||
syncLogRecvAppendEntries(ths, pMsg, "reject, small term");
|
||||
goto _SEND_RESPONSE;
|
||||
}
|
||||
|
||||
if (pMsg->term > ths->pRaftStore->currentTerm) {
|
||||
pReply->term = pMsg->term;
|
||||
}
|
||||
|
||||
syncNodeStepDown(ths, pMsg->term);
|
||||
syncNodeResetElectTimer(ths);
|
||||
|
||||
SyncIndex startIndex = ths->pLogStore->syncLogBeginIndex(ths->pLogStore);
|
||||
SyncIndex lastIndex = ths->pLogStore->syncLogLastIndex(ths->pLogStore);
|
||||
|
||||
if (pMsg->prevLogIndex > lastIndex) {
|
||||
syncLogRecvAppendEntries(ths, pMsg, "reject, index not match");
|
||||
goto _SEND_RESPONSE;
|
||||
}
|
||||
|
||||
if (pMsg->prevLogIndex >= startIndex) {
|
||||
SyncTerm myPreLogTerm = syncNodeGetPreTerm(ths, pMsg->prevLogIndex + 1);
|
||||
ASSERT(myPreLogTerm != SYNC_TERM_INVALID);
|
||||
|
||||
if (myPreLogTerm != pMsg->prevLogTerm) {
|
||||
syncLogRecvAppendEntries(ths, pMsg, "reject, pre-term not match");
|
||||
goto _SEND_RESPONSE;
|
||||
}
|
||||
}
|
||||
|
||||
// accept
|
||||
pReply->success = true;
|
||||
bool hasAppendEntries = pMsg->dataLen > 0;
|
||||
if (hasAppendEntries) {
|
||||
SSyncRaftEntry* pAppendEntry = syncEntryDeserialize(pMsg->data, pMsg->dataLen);
|
||||
ASSERT(pAppendEntry != NULL);
|
||||
|
||||
SyncIndex appendIndex = pMsg->prevLogIndex + 1;
|
||||
SSyncRaftEntry* pLocalEntry = NULL;
|
||||
int32_t code = ths->pLogStore->syncLogGetEntry(ths->pLogStore, appendIndex, &pLocalEntry);
|
||||
if (code == 0) {
|
||||
if (pLocalEntry->term == pAppendEntry->term) {
|
||||
// do nothing
|
||||
|
||||
char logBuf[128];
|
||||
snprintf(logBuf, sizeof(logBuf), "log match, do nothing, index:%ld", appendIndex);
|
||||
syncNodeEventLog(ths, logBuf);
|
||||
|
||||
} else {
|
||||
// truncate
|
||||
code = ths->pLogStore->syncLogTruncate(ths->pLogStore, appendIndex);
|
||||
if (code != 0) {
|
||||
char logBuf[128];
|
||||
snprintf(logBuf, sizeof(logBuf), "ignore, truncate error, append-index:%ld", appendIndex);
|
||||
syncLogRecvAppendEntries(ths, pMsg, logBuf);
|
||||
|
||||
goto _IGNORE;
|
||||
}
|
||||
|
||||
// append
|
||||
code = ths->pLogStore->syncLogAppendEntry(ths->pLogStore, pAppendEntry);
|
||||
if (code != 0) {
|
||||
char logBuf[128];
|
||||
snprintf(logBuf, sizeof(logBuf), "ignore, append error, append-index:%ld", appendIndex);
|
||||
syncLogRecvAppendEntries(ths, pMsg, logBuf);
|
||||
|
||||
goto _IGNORE;
|
||||
}
|
||||
}
|
||||
|
||||
} else {
|
||||
if (terrno == TSDB_CODE_WAL_LOG_NOT_EXIST) {
|
||||
// log not exist
|
||||
|
||||
// truncate
|
||||
code = ths->pLogStore->syncLogTruncate(ths->pLogStore, appendIndex);
|
||||
if (code != 0) {
|
||||
char logBuf[128];
|
||||
snprintf(logBuf, sizeof(logBuf), "ignore, log not exist, truncate error, append-index:%ld", appendIndex);
|
||||
syncLogRecvAppendEntries(ths, pMsg, logBuf);
|
||||
|
||||
goto _IGNORE;
|
||||
}
|
||||
|
||||
// append
|
||||
code = ths->pLogStore->syncLogAppendEntry(ths->pLogStore, pAppendEntry);
|
||||
if (code != 0) {
|
||||
char logBuf[128];
|
||||
snprintf(logBuf, sizeof(logBuf), "ignore, log not exist, append error, append-index:%ld", appendIndex);
|
||||
syncLogRecvAppendEntries(ths, pMsg, logBuf);
|
||||
|
||||
goto _IGNORE;
|
||||
}
|
||||
|
||||
} else {
|
||||
// error
|
||||
char logBuf[128];
|
||||
snprintf(logBuf, sizeof(logBuf), "ignore, get local entry error, append-index:%ld", appendIndex);
|
||||
syncLogRecvAppendEntries(ths, pMsg, logBuf);
|
||||
|
||||
goto _IGNORE;
|
||||
}
|
||||
}
|
||||
|
||||
#if 0
|
||||
if (code != 0 && terrno == TSDB_CODE_WAL_LOG_NOT_EXIST) {
|
||||
code = ths->pLogStore->syncLogTruncate(ths->pLogStore, appendIndex);
|
||||
ASSERT(code == 0);
|
||||
|
||||
code = ths->pLogStore->syncLogAppendEntry(ths->pLogStore, pAppendEntry);
|
||||
ASSERT(code == 0);
|
||||
|
||||
} else {
|
||||
ASSERT(code == 0);
|
||||
|
||||
if (pLocalEntry->term == pAppendEntry->term) {
|
||||
// do nothing
|
||||
} else {
|
||||
code = ths->pLogStore->syncLogTruncate(ths->pLogStore, appendIndex);
|
||||
ASSERT(code == 0);
|
||||
|
||||
code = ths->pLogStore->syncLogAppendEntry(ths->pLogStore, pAppendEntry);
|
||||
ASSERT(code == 0);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
// update match index
|
||||
pReply->matchIndex = pAppendEntry->index;
|
||||
|
||||
syncEntryDestory(pLocalEntry);
|
||||
syncEntryDestory(pAppendEntry);
|
||||
|
||||
} else {
|
||||
// no append entries, do nothing
|
||||
// maybe has extra entries, no harm
|
||||
|
||||
// update match index
|
||||
pReply->matchIndex = pMsg->prevLogIndex;
|
||||
}
|
||||
|
||||
// maybe update commit index, leader notice me
|
||||
syncNodeFollowerCommit(ths, pMsg->commitIndex);
|
||||
|
||||
syncLogRecvAppendEntries(ths, pMsg, "accept");
|
||||
goto _SEND_RESPONSE;
|
||||
|
||||
_IGNORE:
|
||||
syncAppendEntriesReplyDestroy(pReply);
|
||||
return 0;
|
||||
|
||||
_SEND_RESPONSE:
|
||||
// msg event log
|
||||
syncLogSendAppendEntriesReply(ths, pReply, "");
|
||||
|
||||
// send response
|
||||
SRpcMsg rpcMsg;
|
||||
syncAppendEntriesReply2RpcMsg(pReply, &rpcMsg);
|
||||
syncNodeSendMsgById(&pReply->destId, ths, &rpcMsg);
|
||||
syncAppendEntriesReplyDestroy(pReply);
|
||||
|
||||
return 0;
|
||||
}
|
|
@ -20,6 +20,7 @@
|
|||
#include "syncRaftCfg.h"
|
||||
#include "syncRaftLog.h"
|
||||
#include "syncRaftStore.h"
|
||||
#include "syncReplication.h"
|
||||
#include "syncSnapshot.h"
|
||||
#include "syncUtil.h"
|
||||
#include "syncVoteMgr.h"
|
||||
|
@ -37,74 +38,6 @@
|
|||
// /\ Discard(m)
|
||||
// /\ UNCHANGED <<serverVars, candidateVars, logVars, elections>>
|
||||
//
|
||||
int32_t syncNodeOnAppendEntriesReplyCb(SSyncNode* ths, SyncAppendEntriesReply* pMsg) {
|
||||
int32_t ret = 0;
|
||||
|
||||
// if already drop replica, do not process
|
||||
if (!syncNodeInRaftGroup(ths, &(pMsg->srcId)) && !ths->pRaftCfg->isStandBy) {
|
||||
syncLogRecvAppendEntriesReply(ths, pMsg, "maybe replica already dropped");
|
||||
return -1;
|
||||
}
|
||||
|
||||
// drop stale response
|
||||
if (pMsg->term < ths->pRaftStore->currentTerm) {
|
||||
syncLogRecvAppendEntriesReply(ths, pMsg, "drop stale response");
|
||||
return 0;
|
||||
}
|
||||
|
||||
// no need this code, because if I receive reply.term, then I must have sent for that term.
|
||||
// if (pMsg->term > ths->pRaftStore->currentTerm) {
|
||||
// syncNodeUpdateTerm(ths, pMsg->term);
|
||||
// }
|
||||
|
||||
if (pMsg->term > ths->pRaftStore->currentTerm) {
|
||||
syncLogRecvAppendEntriesReply(ths, pMsg, "error term");
|
||||
return -1;
|
||||
}
|
||||
|
||||
ASSERT(pMsg->term == ths->pRaftStore->currentTerm);
|
||||
|
||||
// update time
|
||||
syncIndexMgrSetStartTime(ths->pNextIndex, &(pMsg->srcId), pMsg->startTime);
|
||||
syncIndexMgrSetRecvTime(ths->pNextIndex, &(pMsg->srcId), taosGetTimestampMs());
|
||||
|
||||
SyncIndex beforeNextIndex = syncIndexMgrGetIndex(ths->pNextIndex, &(pMsg->srcId));
|
||||
SyncIndex beforeMatchIndex = syncIndexMgrGetIndex(ths->pMatchIndex, &(pMsg->srcId));
|
||||
|
||||
if (pMsg->success) {
|
||||
// nextIndex' = [nextIndex EXCEPT ![i][j] = m.mmatchIndex + 1]
|
||||
syncIndexMgrSetIndex(ths->pNextIndex, &(pMsg->srcId), pMsg->matchIndex + 1);
|
||||
|
||||
// matchIndex' = [matchIndex EXCEPT ![i][j] = m.mmatchIndex]
|
||||
syncIndexMgrSetIndex(ths->pMatchIndex, &(pMsg->srcId), pMsg->matchIndex);
|
||||
|
||||
// maybe commit
|
||||
syncMaybeAdvanceCommitIndex(ths);
|
||||
|
||||
} else {
|
||||
SyncIndex nextIndex = syncIndexMgrGetIndex(ths->pNextIndex, &(pMsg->srcId));
|
||||
|
||||
// notice! int64, uint64
|
||||
if (nextIndex > SYNC_INDEX_BEGIN) {
|
||||
--nextIndex;
|
||||
} else {
|
||||
nextIndex = SYNC_INDEX_BEGIN;
|
||||
}
|
||||
syncIndexMgrSetIndex(ths->pNextIndex, &(pMsg->srcId), nextIndex);
|
||||
}
|
||||
|
||||
SyncIndex afterNextIndex = syncIndexMgrGetIndex(ths->pNextIndex, &(pMsg->srcId));
|
||||
SyncIndex afterMatchIndex = syncIndexMgrGetIndex(ths->pMatchIndex, &(pMsg->srcId));
|
||||
do {
|
||||
char logBuf[256];
|
||||
snprintf(logBuf, sizeof(logBuf),
|
||||
"before next:%" PRId64 ", match:%" PRId64 ", after next:%" PRId64 ", match:%" PRId64, beforeNextIndex,
|
||||
beforeMatchIndex, afterNextIndex, afterMatchIndex);
|
||||
syncLogRecvAppendEntriesReply(ths, pMsg, logBuf);
|
||||
} while (0);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// only start once
|
||||
static void syncNodeStartSnapshotOnce(SSyncNode* ths, SyncIndex beginIndex, SyncIndex endIndex, SyncTerm lastApplyTerm,
|
||||
|
@ -151,7 +84,7 @@ static void syncNodeStartSnapshotOnce(SSyncNode* ths, SyncIndex beginIndex, Sync
|
|||
}
|
||||
}
|
||||
|
||||
int32_t syncNodeOnAppendEntriesReplySnapshot2Cb(SSyncNode* ths, SyncAppendEntriesReply* pMsg) {
|
||||
int32_t syncNodeOnAppendEntriesReply(SSyncNode* ths, SyncAppendEntriesReply* pMsg) {
|
||||
int32_t ret = 0;
|
||||
|
||||
// if already drop replica, do not process
|
||||
|
@ -166,251 +99,40 @@ int32_t syncNodeOnAppendEntriesReplySnapshot2Cb(SSyncNode* ths, SyncAppendEntrie
|
|||
return 0;
|
||||
}
|
||||
|
||||
// error term
|
||||
if (ths->state == TAOS_SYNC_STATE_LEADER) {
|
||||
if (pMsg->term > ths->pRaftStore->currentTerm) {
|
||||
syncLogRecvAppendEntriesReply(ths, pMsg, "error term");
|
||||
syncNodeStepDown(ths, pMsg->term);
|
||||
return -1;
|
||||
}
|
||||
|
||||
ASSERT(pMsg->term == ths->pRaftStore->currentTerm);
|
||||
|
||||
// update time
|
||||
syncIndexMgrSetStartTime(ths->pNextIndex, &(pMsg->srcId), pMsg->startTime);
|
||||
syncIndexMgrSetRecvTime(ths->pNextIndex, &(pMsg->srcId), taosGetTimestampMs());
|
||||
|
||||
SyncIndex beforeNextIndex = syncIndexMgrGetIndex(ths->pNextIndex, &(pMsg->srcId));
|
||||
SyncIndex beforeMatchIndex = syncIndexMgrGetIndex(ths->pMatchIndex, &(pMsg->srcId));
|
||||
|
||||
if (pMsg->success) {
|
||||
SyncIndex newNextIndex = pMsg->matchIndex + 1;
|
||||
SyncIndex newMatchIndex = pMsg->matchIndex;
|
||||
|
||||
bool needStartSnapshot = false;
|
||||
if (newMatchIndex >= SYNC_INDEX_BEGIN && !ths->pLogStore->syncLogExist(ths->pLogStore, newMatchIndex)) {
|
||||
needStartSnapshot = true;
|
||||
}
|
||||
|
||||
if (!needStartSnapshot) {
|
||||
// update next-index, match-index
|
||||
syncIndexMgrSetIndex(ths->pNextIndex, &(pMsg->srcId), newNextIndex);
|
||||
syncIndexMgrSetIndex(ths->pMatchIndex, &(pMsg->srcId), newMatchIndex);
|
||||
|
||||
// maybe commit
|
||||
if (ths->state == TAOS_SYNC_STATE_LEADER) {
|
||||
syncMaybeAdvanceCommitIndex(ths);
|
||||
}
|
||||
|
||||
} else {
|
||||
// start snapshot <match+1, old snapshot.end>
|
||||
SSnapshot oldSnapshot;
|
||||
ths->pFsm->FpGetSnapshotInfo(ths->pFsm, &oldSnapshot);
|
||||
if (oldSnapshot.lastApplyIndex > newMatchIndex) {
|
||||
syncNodeStartSnapshotOnce(ths, newMatchIndex + 1, oldSnapshot.lastApplyIndex, oldSnapshot.lastApplyTerm,
|
||||
pMsg); // term maybe not ok?
|
||||
}
|
||||
|
||||
syncIndexMgrSetIndex(ths->pNextIndex, &(pMsg->srcId), oldSnapshot.lastApplyIndex + 1);
|
||||
syncIndexMgrSetIndex(ths->pMatchIndex, &(pMsg->srcId), newMatchIndex);
|
||||
}
|
||||
|
||||
// event log, update next-index
|
||||
do {
|
||||
char host[64];
|
||||
int16_t port;
|
||||
syncUtilU642Addr(pMsg->srcId.addr, host, sizeof(host), &port);
|
||||
|
||||
char logBuf[256];
|
||||
snprintf(logBuf, sizeof(logBuf), "reset next-index:%" PRId64 ", match-index:%" PRId64 " for %s:%d", newNextIndex,
|
||||
newMatchIndex, host, port);
|
||||
syncNodeEventLog(ths, logBuf);
|
||||
|
||||
} while (0);
|
||||
|
||||
} else {
|
||||
SyncIndex nextIndex = syncIndexMgrGetIndex(ths->pNextIndex, &(pMsg->srcId));
|
||||
|
||||
if (nextIndex > SYNC_INDEX_BEGIN) {
|
||||
--nextIndex;
|
||||
|
||||
// speed up
|
||||
if (nextIndex > pMsg->matchIndex + 1) {
|
||||
nextIndex = pMsg->matchIndex + 1;
|
||||
}
|
||||
|
||||
bool needStartSnapshot = false;
|
||||
if (nextIndex >= SYNC_INDEX_BEGIN && !ths->pLogStore->syncLogExist(ths->pLogStore, nextIndex)) {
|
||||
needStartSnapshot = true;
|
||||
}
|
||||
if (nextIndex - 1 >= SYNC_INDEX_BEGIN && !ths->pLogStore->syncLogExist(ths->pLogStore, nextIndex - 1)) {
|
||||
needStartSnapshot = true;
|
||||
}
|
||||
|
||||
if (!needStartSnapshot) {
|
||||
// do nothing
|
||||
|
||||
} else {
|
||||
SSnapshot oldSnapshot;
|
||||
ths->pFsm->FpGetSnapshotInfo(ths->pFsm, &oldSnapshot);
|
||||
SyncTerm newSnapshotTerm = oldSnapshot.lastApplyTerm;
|
||||
|
||||
SyncIndex endIndex;
|
||||
if (ths->pLogStore->syncLogExist(ths->pLogStore, nextIndex + 1)) {
|
||||
endIndex = nextIndex;
|
||||
} else {
|
||||
endIndex = oldSnapshot.lastApplyIndex;
|
||||
}
|
||||
syncNodeStartSnapshotOnce(ths, pMsg->matchIndex + 1, endIndex, newSnapshotTerm, pMsg);
|
||||
|
||||
// get sender
|
||||
SSyncSnapshotSender* pSender = syncNodeGetSnapshotSender(ths, &(pMsg->srcId));
|
||||
ASSERT(pSender != NULL);
|
||||
SyncIndex sentryIndex = pSender->snapshot.lastApplyIndex + 1;
|
||||
|
||||
// update nextIndex to sentryIndex
|
||||
if (nextIndex <= sentryIndex) {
|
||||
nextIndex = sentryIndex;
|
||||
}
|
||||
}
|
||||
|
||||
} else {
|
||||
nextIndex = SYNC_INDEX_BEGIN;
|
||||
}
|
||||
syncIndexMgrSetIndex(ths->pNextIndex, &(pMsg->srcId), nextIndex);
|
||||
|
||||
SyncIndex oldMatchIndex = syncIndexMgrGetIndex(ths->pMatchIndex, &(pMsg->srcId));
|
||||
if (pMsg->matchIndex > oldMatchIndex) {
|
||||
syncIndexMgrSetIndex(ths->pMatchIndex, &(pMsg->srcId), pMsg->matchIndex);
|
||||
}
|
||||
|
||||
// event log, update next-index
|
||||
do {
|
||||
char host[64];
|
||||
int16_t port;
|
||||
syncUtilU642Addr(pMsg->srcId.addr, host, sizeof(host), &port);
|
||||
|
||||
SyncIndex newNextIndex = nextIndex;
|
||||
SyncIndex newMatchIndex = syncIndexMgrGetIndex(ths->pMatchIndex, &(pMsg->srcId));
|
||||
char logBuf[256];
|
||||
snprintf(logBuf, sizeof(logBuf), "reset2 next-index:%" PRId64 ", match-index:%" PRId64 " for %s:%d", newNextIndex,
|
||||
newMatchIndex, host, port);
|
||||
syncNodeEventLog(ths, logBuf);
|
||||
|
||||
} while (0);
|
||||
}
|
||||
|
||||
SyncIndex afterNextIndex = syncIndexMgrGetIndex(ths->pNextIndex, &(pMsg->srcId));
|
||||
SyncIndex afterMatchIndex = syncIndexMgrGetIndex(ths->pMatchIndex, &(pMsg->srcId));
|
||||
do {
|
||||
char logBuf[256];
|
||||
snprintf(logBuf, sizeof(logBuf),
|
||||
"before next:%" PRId64 ", match:%" PRId64 ", after next:%" PRId64 ", match:%" PRId64, beforeNextIndex,
|
||||
beforeMatchIndex, afterNextIndex, afterMatchIndex);
|
||||
syncLogRecvAppendEntriesReply(ths, pMsg, logBuf);
|
||||
} while (0);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int32_t syncNodeOnAppendEntriesReplySnapshotCb(SSyncNode* ths, SyncAppendEntriesReply* pMsg) {
|
||||
int32_t ret = 0;
|
||||
|
||||
// if already drop replica, do not process
|
||||
if (!syncNodeInRaftGroup(ths, &(pMsg->srcId)) && !ths->pRaftCfg->isStandBy) {
|
||||
syncLogRecvAppendEntriesReply(ths, pMsg, "maybe replica already dropped");
|
||||
return -1;
|
||||
}
|
||||
|
||||
// drop stale response
|
||||
if (pMsg->term < ths->pRaftStore->currentTerm) {
|
||||
syncLogRecvAppendEntriesReply(ths, pMsg, "drop stale response");
|
||||
return 0;
|
||||
}
|
||||
|
||||
// no need this code, because if I receive reply.term, then I must have sent for that term.
|
||||
// if (pMsg->term > ths->pRaftStore->currentTerm) {
|
||||
// syncNodeUpdateTerm(ths, pMsg->term);
|
||||
// }
|
||||
|
||||
if (pMsg->term > ths->pRaftStore->currentTerm) {
|
||||
syncLogRecvAppendEntriesReply(ths, pMsg, "error term");
|
||||
return -1;
|
||||
}
|
||||
|
||||
ASSERT(pMsg->term == ths->pRaftStore->currentTerm);
|
||||
|
||||
// update time
|
||||
syncIndexMgrSetStartTime(ths->pNextIndex, &(pMsg->srcId), pMsg->startTime);
|
||||
syncIndexMgrSetRecvTime(ths->pNextIndex, &(pMsg->srcId), taosGetTimestampMs());
|
||||
|
||||
SyncIndex beforeNextIndex = syncIndexMgrGetIndex(ths->pNextIndex, &(pMsg->srcId));
|
||||
SyncIndex beforeMatchIndex = syncIndexMgrGetIndex(ths->pMatchIndex, &(pMsg->srcId));
|
||||
|
||||
if (pMsg->success) {
|
||||
// nextIndex' = [nextIndex EXCEPT ![i][j] = m.mmatchIndex + 1]
|
||||
syncIndexMgrSetIndex(ths->pNextIndex, &(pMsg->srcId), pMsg->matchIndex + 1);
|
||||
|
||||
// matchIndex' = [matchIndex EXCEPT ![i][j] = m.mmatchIndex]
|
||||
syncIndexMgrSetIndex(ths->pMatchIndex, &(pMsg->srcId), pMsg->matchIndex);
|
||||
|
||||
// maybe commit
|
||||
if (ths->state == TAOS_SYNC_STATE_LEADER) {
|
||||
syncMaybeAdvanceCommitIndex(ths);
|
||||
}
|
||||
syncIndexMgrSetIndex(ths->pNextIndex, &(pMsg->srcId), pMsg->matchIndex + 1);
|
||||
|
||||
} else {
|
||||
SyncIndex nextIndex = syncIndexMgrGetIndex(ths->pNextIndex, &(pMsg->srcId));
|
||||
|
||||
// notice! int64, uint64
|
||||
if (nextIndex > SYNC_INDEX_BEGIN) {
|
||||
--nextIndex;
|
||||
|
||||
// get sender
|
||||
SSyncSnapshotSender* pSender = syncNodeGetSnapshotSender(ths, &(pMsg->srcId));
|
||||
ASSERT(pSender != NULL);
|
||||
|
||||
SSnapshot snapshot = {.data = NULL,
|
||||
.lastApplyIndex = SYNC_INDEX_INVALID,
|
||||
.lastApplyTerm = 0,
|
||||
.lastConfigIndex = SYNC_INDEX_INVALID};
|
||||
void* pReader = NULL;
|
||||
ths->pFsm->FpGetSnapshot(ths->pFsm, &snapshot, NULL, &pReader);
|
||||
if (snapshot.lastApplyIndex >= SYNC_INDEX_BEGIN && nextIndex <= snapshot.lastApplyIndex + 1 &&
|
||||
!snapshotSenderIsStart(pSender) && pMsg->privateTerm < pSender->privateTerm) {
|
||||
// has snapshot
|
||||
ASSERT(pReader != NULL);
|
||||
SSnapshotParam readerParam = {.start = 0, .end = snapshot.lastApplyIndex};
|
||||
snapshotSenderStart(pSender, readerParam, snapshot, pReader);
|
||||
|
||||
} else {
|
||||
// no snapshot
|
||||
if (pReader != NULL) {
|
||||
ths->pFsm->FpSnapshotStopRead(ths->pFsm, pReader);
|
||||
}
|
||||
}
|
||||
|
||||
SyncIndex sentryIndex = pSender->snapshot.lastApplyIndex + 1;
|
||||
|
||||
// update nextIndex to sentryIndex
|
||||
if (nextIndex <= sentryIndex) {
|
||||
nextIndex = sentryIndex;
|
||||
}
|
||||
|
||||
} else {
|
||||
nextIndex = SYNC_INDEX_BEGIN;
|
||||
}
|
||||
|
||||
syncIndexMgrSetIndex(ths->pNextIndex, &(pMsg->srcId), nextIndex);
|
||||
}
|
||||
|
||||
SyncIndex afterNextIndex = syncIndexMgrGetIndex(ths->pNextIndex, &(pMsg->srcId));
|
||||
SyncIndex afterMatchIndex = syncIndexMgrGetIndex(ths->pMatchIndex, &(pMsg->srcId));
|
||||
do {
|
||||
char logBuf[256];
|
||||
snprintf(logBuf, sizeof(logBuf),
|
||||
"before next:%" PRId64 ", match:%" PRId64 ", after next:%" PRId64 ", match:%" PRId64, beforeNextIndex,
|
||||
beforeMatchIndex, afterNextIndex, afterMatchIndex);
|
||||
syncLogRecvAppendEntriesReply(ths, pMsg, logBuf);
|
||||
} while (0);
|
||||
// send next append entries
|
||||
SPeerState* pState = syncNodeGetPeerState(ths, &(pMsg->srcId));
|
||||
ASSERT(pState != NULL);
|
||||
|
||||
if (pMsg->lastSendIndex == pState->lastSendIndex) {
|
||||
syncNodeReplicateOne(ths, &(pMsg->srcId));
|
||||
}
|
||||
}
|
||||
|
||||
syncLogRecvAppendEntriesReply(ths, pMsg, "process");
|
||||
return 0;
|
||||
}
|
|
@ -45,8 +45,10 @@
|
|||
// /\ UNCHANGED <<messages, serverVars, candidateVars, leaderVars, log>>
|
||||
//
|
||||
void syncMaybeAdvanceCommitIndex(SSyncNode* pSyncNode) {
|
||||
syncIndexMgrLog2("==syncNodeMaybeAdvanceCommitIndex== pNextIndex", pSyncNode->pNextIndex);
|
||||
syncIndexMgrLog2("==syncNodeMaybeAdvanceCommitIndex== pMatchIndex", pSyncNode->pMatchIndex);
|
||||
if (pSyncNode->state != TAOS_SYNC_STATE_LEADER) {
|
||||
syncNodeErrorLog(pSyncNode, "not leader, can not advance commit index");
|
||||
return;
|
||||
}
|
||||
|
||||
// advance commit index to sanpshot first
|
||||
SSnapshot snapshot;
|
||||
|
@ -75,9 +77,11 @@ void syncMaybeAdvanceCommitIndex(SSyncNode* pSyncNode) {
|
|||
if (h) {
|
||||
pEntry = (SSyncRaftEntry*)taosLRUCacheValue(pCache, h);
|
||||
} else {
|
||||
pEntry = pSyncNode->pLogStore->getEntry(pSyncNode->pLogStore, index);
|
||||
if (pEntry == NULL) {
|
||||
sError("failed to get entry since %s. index:%" PRId64, tstrerror(terrno), index);
|
||||
int32_t code = pSyncNode->pLogStore->syncLogGetEntry(pSyncNode->pLogStore, index, &pEntry);
|
||||
if (code != 0) {
|
||||
char logBuf[128];
|
||||
snprintf(logBuf, sizeof(logBuf), "advance commit index error, read wal index:%ld", index);
|
||||
syncNodeErrorLog(pSyncNode, logBuf);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
@ -125,13 +129,17 @@ void syncMaybeAdvanceCommitIndex(SSyncNode* pSyncNode) {
|
|||
pSyncNode->commitIndex = newCommitIndex;
|
||||
|
||||
// call back Wal
|
||||
pSyncNode->pLogStore->updateCommitIndex(pSyncNode->pLogStore, pSyncNode->commitIndex);
|
||||
pSyncNode->pLogStore->syncLogUpdateCommitIndex(pSyncNode->pLogStore, pSyncNode->commitIndex);
|
||||
|
||||
// execute fsm
|
||||
if (pSyncNode->pFsm != NULL) {
|
||||
int32_t code = syncNodeCommit(pSyncNode, beginIndex, endIndex, pSyncNode->state);
|
||||
int32_t code = syncNodeDoCommit(pSyncNode, beginIndex, endIndex, pSyncNode->state);
|
||||
if (code != 0) {
|
||||
wError("failed to commit sync node since %s", tstrerror(terrno));
|
||||
char logBuf[128];
|
||||
snprintf(logBuf, sizeof(logBuf), "advance commit index error, do commit begin:%ld, end:%ld", beginIndex,
|
||||
endIndex);
|
||||
syncNodeErrorLog(pSyncNode, logBuf);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -220,6 +228,7 @@ int32_t syncNodeDynamicQuorum(const SSyncNode* pSyncNode) {
|
|||
return quorum;
|
||||
}
|
||||
|
||||
/*
|
||||
bool syncAgree(SSyncNode* pSyncNode, SyncIndex index) {
|
||||
int agreeCount = 0;
|
||||
for (int i = 0; i < pSyncNode->replicaNum; ++i) {
|
||||
|
@ -232,8 +241,8 @@ bool syncAgree(SSyncNode* pSyncNode, SyncIndex index) {
|
|||
}
|
||||
return false;
|
||||
}
|
||||
*/
|
||||
|
||||
/*
|
||||
bool syncAgree(SSyncNode* pSyncNode, SyncIndex index) {
|
||||
int agreeCount = 0;
|
||||
for (int i = 0; i < pSyncNode->replicaNum; ++i) {
|
||||
|
@ -246,4 +255,3 @@ bool syncAgree(SSyncNode* pSyncNode, SyncIndex index) {
|
|||
}
|
||||
return false;
|
||||
}
|
||||
*/
|
||||
|
|
|
@ -30,45 +30,6 @@
|
|||
// msource |-> i,
|
||||
// mdest |-> j])
|
||||
// /\ UNCHANGED <<serverVars, candidateVars, leaderVars, logVars>>
|
||||
//
|
||||
int32_t syncNodeRequestVotePeers(SSyncNode* pSyncNode) {
|
||||
ASSERT(pSyncNode->state == TAOS_SYNC_STATE_CANDIDATE);
|
||||
|
||||
int32_t ret = 0;
|
||||
for (int i = 0; i < pSyncNode->peersNum; ++i) {
|
||||
SyncRequestVote* pMsg = syncRequestVoteBuild(pSyncNode->vgId);
|
||||
pMsg->srcId = pSyncNode->myRaftId;
|
||||
pMsg->destId = pSyncNode->peersId[i];
|
||||
pMsg->term = pSyncNode->pRaftStore->currentTerm;
|
||||
pMsg->lastLogIndex = pSyncNode->pLogStore->getLastIndex(pSyncNode->pLogStore);
|
||||
pMsg->lastLogTerm = pSyncNode->pLogStore->getLastTerm(pSyncNode->pLogStore);
|
||||
|
||||
ret = syncNodeRequestVote(pSyncNode, &pSyncNode->peersId[i], pMsg);
|
||||
ASSERT(ret == 0);
|
||||
syncRequestVoteDestroy(pMsg);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
int32_t syncNodeRequestVotePeersSnapshot(SSyncNode* pSyncNode) {
|
||||
ASSERT(pSyncNode->state == TAOS_SYNC_STATE_CANDIDATE);
|
||||
|
||||
int32_t ret = 0;
|
||||
for (int i = 0; i < pSyncNode->peersNum; ++i) {
|
||||
SyncRequestVote* pMsg = syncRequestVoteBuild(pSyncNode->vgId);
|
||||
pMsg->srcId = pSyncNode->myRaftId;
|
||||
pMsg->destId = pSyncNode->peersId[i];
|
||||
pMsg->term = pSyncNode->pRaftStore->currentTerm;
|
||||
|
||||
ret = syncNodeGetLastIndexTerm(pSyncNode, &(pMsg->lastLogIndex), &(pMsg->lastLogTerm));
|
||||
ASSERT(ret == 0);
|
||||
|
||||
ret = syncNodeRequestVote(pSyncNode, &pSyncNode->peersId[i], pMsg);
|
||||
ASSERT(ret == 0);
|
||||
syncRequestVoteDestroy(pMsg);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
int32_t syncNodeElect(SSyncNode* pSyncNode) {
|
||||
syncNodeEventLog(pSyncNode, "begin election");
|
||||
|
@ -111,27 +72,38 @@ int32_t syncNodeElect(SSyncNode* pSyncNode) {
|
|||
|
||||
}
|
||||
|
||||
switch (pSyncNode->pRaftCfg->snapshotStrategy) {
|
||||
case SYNC_STRATEGY_NO_SNAPSHOT:
|
||||
ret = syncNodeRequestVotePeers(pSyncNode);
|
||||
break;
|
||||
|
||||
case SYNC_STRATEGY_STANDARD_SNAPSHOT:
|
||||
case SYNC_STRATEGY_WAL_FIRST:
|
||||
ret = syncNodeRequestVotePeersSnapshot(pSyncNode);
|
||||
break;
|
||||
|
||||
default:
|
||||
ret = syncNodeRequestVotePeers(pSyncNode);
|
||||
break;
|
||||
}
|
||||
ASSERT(ret == 0);
|
||||
|
||||
syncNodeResetElectTimer(pSyncNode);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int32_t syncNodeRequestVote(SSyncNode* pSyncNode, const SRaftId* destRaftId, const SyncRequestVote* pMsg) {
|
||||
int32_t syncNodeRequestVotePeers(SSyncNode* pSyncNode) {
|
||||
if (pSyncNode->state != TAOS_SYNC_STATE_CANDIDATE) {
|
||||
syncNodeEventLog(pSyncNode, "not candidate, stop elect");
|
||||
return 0;
|
||||
}
|
||||
|
||||
int32_t ret = 0;
|
||||
for (int i = 0; i < pSyncNode->peersNum; ++i) {
|
||||
SyncRequestVote* pMsg = syncRequestVoteBuild(pSyncNode->vgId);
|
||||
pMsg->srcId = pSyncNode->myRaftId;
|
||||
pMsg->destId = pSyncNode->peersId[i];
|
||||
pMsg->term = pSyncNode->pRaftStore->currentTerm;
|
||||
|
||||
ret = syncNodeGetLastIndexTerm(pSyncNode, &(pMsg->lastLogIndex), &(pMsg->lastLogTerm));
|
||||
ASSERT(ret == 0);
|
||||
|
||||
ret = syncNodeSendRequestVote(pSyncNode, &pSyncNode->peersId[i], pMsg);
|
||||
ASSERT(ret == 0);
|
||||
syncRequestVoteDestroy(pMsg);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
int32_t syncNodeSendRequestVote(SSyncNode* pSyncNode, const SRaftId* destRaftId, const SyncRequestVote* pMsg) {
|
||||
int32_t ret = 0;
|
||||
syncLogSendRequestVote(pSyncNode, pMsg, "");
|
||||
|
||||
|
|
|
@ -326,18 +326,18 @@ static void *syncIOConsumerFunc(void *param) {
|
|||
}
|
||||
|
||||
} else if (pRpcMsg->msgType == TDMT_SYNC_SNAPSHOT_SEND) {
|
||||
if (io->FpOnSyncSnapshotSend != NULL) {
|
||||
if (io->FpOnSyncSnapshot != NULL) {
|
||||
SyncSnapshotSend *pSyncMsg = syncSnapshotSendFromRpcMsg2(pRpcMsg);
|
||||
ASSERT(pSyncMsg != NULL);
|
||||
io->FpOnSyncSnapshotSend(io->pSyncNode, pSyncMsg);
|
||||
io->FpOnSyncSnapshot(io->pSyncNode, pSyncMsg);
|
||||
syncSnapshotSendDestroy(pSyncMsg);
|
||||
}
|
||||
|
||||
} else if (pRpcMsg->msgType == TDMT_SYNC_SNAPSHOT_RSP) {
|
||||
if (io->FpOnSyncSnapshotRsp != NULL) {
|
||||
if (io->FpOnSyncSnapshotReply != NULL) {
|
||||
SyncSnapshotRsp *pSyncMsg = syncSnapshotRspFromRpcMsg2(pRpcMsg);
|
||||
ASSERT(pSyncMsg != NULL);
|
||||
io->FpOnSyncSnapshotRsp(io->pSyncNode, pSyncMsg);
|
||||
io->FpOnSyncSnapshotReply(io->pSyncNode, pSyncMsg);
|
||||
syncSnapshotRspDestroy(pSyncMsg);
|
||||
}
|
||||
|
||||
|
|
|
@ -136,7 +136,7 @@ cJSON *syncIndexMgr2Json(SSyncIndexMgr *pSyncIndexMgr) {
|
|||
|
||||
char *syncIndexMgr2Str(SSyncIndexMgr *pSyncIndexMgr) {
|
||||
cJSON *pJson = syncIndexMgr2Json(pSyncIndexMgr);
|
||||
char *serialized = cJSON_Print(pJson);
|
||||
char * serialized = cJSON_Print(pJson);
|
||||
cJSON_Delete(pJson);
|
||||
return serialized;
|
||||
}
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -29,7 +29,7 @@ SRaftCfgIndex *raftCfgIndexOpen(const char *path) {
|
|||
taosLSeekFile(pRaftCfgIndex->pFile, 0, SEEK_SET);
|
||||
|
||||
int32_t bufLen = MAX_CONFIG_INDEX_COUNT * 16;
|
||||
char *pBuf = taosMemoryMalloc(bufLen);
|
||||
char * pBuf = taosMemoryMalloc(bufLen);
|
||||
memset(pBuf, 0, bufLen);
|
||||
int64_t len = taosReadFile(pRaftCfgIndex->pFile, pBuf, bufLen);
|
||||
ASSERT(len > 0);
|
||||
|
@ -93,7 +93,7 @@ cJSON *raftCfgIndex2Json(SRaftCfgIndex *pRaftCfgIndex) {
|
|||
|
||||
char *raftCfgIndex2Str(SRaftCfgIndex *pRaftCfgIndex) {
|
||||
cJSON *pJson = raftCfgIndex2Json(pRaftCfgIndex);
|
||||
char *serialized = cJSON_Print(pJson);
|
||||
char * serialized = cJSON_Print(pJson);
|
||||
cJSON_Delete(pJson);
|
||||
return serialized;
|
||||
}
|
||||
|
@ -151,7 +151,7 @@ int32_t raftCfgIndexCreateFile(const char *path) {
|
|||
raftCfgIndex.configIndexCount = 1;
|
||||
raftCfgIndex.configIndexArr[0] = -1;
|
||||
|
||||
char *s = raftCfgIndex2Str(&raftCfgIndex);
|
||||
char * s = raftCfgIndex2Str(&raftCfgIndex);
|
||||
int64_t ret = taosWriteFile(pFile, s, strlen(s) + 1);
|
||||
ASSERT(ret == strlen(s) + 1);
|
||||
|
||||
|
@ -244,7 +244,7 @@ cJSON *syncCfg2Json(SSyncCfg *pSyncCfg) {
|
|||
|
||||
char *syncCfg2Str(SSyncCfg *pSyncCfg) {
|
||||
cJSON *pJson = syncCfg2Json(pSyncCfg);
|
||||
char *serialized = cJSON_Print(pJson);
|
||||
char * serialized = cJSON_Print(pJson);
|
||||
cJSON_Delete(pJson);
|
||||
return serialized;
|
||||
}
|
||||
|
@ -252,7 +252,7 @@ char *syncCfg2Str(SSyncCfg *pSyncCfg) {
|
|||
char *syncCfg2SimpleStr(SSyncCfg *pSyncCfg) {
|
||||
if (pSyncCfg != NULL) {
|
||||
int32_t len = 512;
|
||||
char *s = taosMemoryMalloc(len);
|
||||
char * s = taosMemoryMalloc(len);
|
||||
memset(s, 0, len);
|
||||
|
||||
snprintf(s, len, "{r-num:%d, my:%d, ", pSyncCfg->replicaNum, pSyncCfg->myIndex);
|
||||
|
@ -349,7 +349,7 @@ cJSON *raftCfg2Json(SRaftCfg *pRaftCfg) {
|
|||
|
||||
char *raftCfg2Str(SRaftCfg *pRaftCfg) {
|
||||
cJSON *pJson = raftCfg2Json(pRaftCfg);
|
||||
char *serialized = cJSON_Print(pJson);
|
||||
char * serialized = cJSON_Print(pJson);
|
||||
cJSON_Delete(pJson);
|
||||
return serialized;
|
||||
}
|
||||
|
@ -426,7 +426,7 @@ int32_t raftCfgFromJson(const cJSON *pRoot, SRaftCfg *pRaftCfg) {
|
|||
(pRaftCfg->configIndexArr)[i] = atoll(pIndex->valuestring);
|
||||
}
|
||||
|
||||
cJSON *pJsonSyncCfg = cJSON_GetObjectItem(pJson, "SSyncCfg");
|
||||
cJSON * pJsonSyncCfg = cJSON_GetObjectItem(pJson, "SSyncCfg");
|
||||
int32_t code = syncCfgFromJson(pJsonSyncCfg, &(pRaftCfg->cfg));
|
||||
ASSERT(code == 0);
|
||||
|
||||
|
|
|
@ -33,21 +33,11 @@ static int32_t raftLogAppendEntry(struct SSyncLogStore* pLogStore, SSyncRaftEn
|
|||
static int32_t raftLogGetEntry(struct SSyncLogStore* pLogStore, SyncIndex index, SSyncRaftEntry** ppEntry);
|
||||
static int32_t raftLogTruncate(struct SSyncLogStore* pLogStore, SyncIndex fromIndex);
|
||||
static bool raftLogExist(struct SSyncLogStore* pLogStore, SyncIndex index);
|
||||
static int32_t raftLogUpdateCommitIndex(SSyncLogStore* pLogStore, SyncIndex index);
|
||||
static SyncIndex raftlogCommitIndex(SSyncLogStore* pLogStore);
|
||||
|
||||
// private function
|
||||
static int32_t raftLogGetLastEntry(SSyncLogStore* pLogStore, SSyncRaftEntry** ppLastEntry);
|
||||
|
||||
//-------------------------------
|
||||
// log[0 .. n]
|
||||
static SSyncRaftEntry* logStoreGetLastEntry(SSyncLogStore* pLogStore);
|
||||
static SyncIndex logStoreLastIndex(SSyncLogStore* pLogStore);
|
||||
static SyncTerm logStoreLastTerm(SSyncLogStore* pLogStore);
|
||||
static SSyncRaftEntry* logStoreGetEntry(SSyncLogStore* pLogStore, SyncIndex index);
|
||||
static int32_t logStoreAppendEntry(SSyncLogStore* pLogStore, SSyncRaftEntry* pEntry);
|
||||
static int32_t logStoreTruncate(SSyncLogStore* pLogStore, SyncIndex fromIndex);
|
||||
static int32_t logStoreUpdateCommitIndex(SSyncLogStore* pLogStore, SyncIndex index);
|
||||
static SyncIndex logStoreGetCommitIndex(SSyncLogStore* pLogStore);
|
||||
|
||||
//-------------------------------
|
||||
SSyncLogStore* logStoreCreate(SSyncNode* pSyncNode) {
|
||||
SSyncLogStore* pLogStore = taosMemoryMalloc(sizeof(SSyncLogStore));
|
||||
|
@ -74,14 +64,8 @@ SSyncLogStore* logStoreCreate(SSyncNode* pSyncNode) {
|
|||
pData->pWalHandle = walOpenReader(pData->pWal, NULL);
|
||||
ASSERT(pData->pWalHandle != NULL);
|
||||
|
||||
pLogStore->appendEntry = logStoreAppendEntry;
|
||||
pLogStore->getEntry = logStoreGetEntry;
|
||||
pLogStore->truncate = logStoreTruncate;
|
||||
pLogStore->getLastIndex = logStoreLastIndex;
|
||||
pLogStore->getLastTerm = logStoreLastTerm;
|
||||
pLogStore->updateCommitIndex = logStoreUpdateCommitIndex;
|
||||
pLogStore->getCommitIndex = logStoreGetCommitIndex;
|
||||
|
||||
pLogStore->syncLogUpdateCommitIndex = raftLogUpdateCommitIndex;
|
||||
pLogStore->syncLogCommitIndex = raftlogCommitIndex;
|
||||
pLogStore->syncLogRestoreFromSnapshot = raftLogRestoreFromSnapshot;
|
||||
pLogStore->syncLogBeginIndex = raftLogBeginIndex;
|
||||
pLogStore->syncLogEndIndex = raftLogEndIndex;
|
||||
|
@ -234,6 +218,8 @@ static int32_t raftLogAppendEntry(struct SSyncLogStore* pLogStore, SSyncRaftEntr
|
|||
snprintf(logBuf, sizeof(logBuf), "wal write error, index:%" PRId64 ", err:%d %X, msg:%s, syserr:%d, sysmsg:%s",
|
||||
pEntry->index, err, err, errStr, sysErr, sysErrStr);
|
||||
syncNodeErrorLog(pData->pSyncNode, logBuf);
|
||||
|
||||
ASSERT(0);
|
||||
return -1;
|
||||
}
|
||||
pEntry->index = index;
|
||||
|
@ -277,11 +263,15 @@ static int32_t raftLogGetEntry(struct SSyncLogStore* pLogStore, SyncIndex index,
|
|||
|
||||
do {
|
||||
char logBuf[128];
|
||||
if (terrno == TSDB_CODE_WAL_LOG_NOT_EXIST) {
|
||||
snprintf(logBuf, sizeof(logBuf),
|
||||
"wal read not exist, index:%" PRId64 ", err:%d %X, msg:%s, syserr:%d, sysmsg:%s", index, err, err,
|
||||
errStr, sysErr, sysErrStr);
|
||||
syncNodeEventLog(pData->pSyncNode, logBuf);
|
||||
|
||||
} else {
|
||||
snprintf(logBuf, sizeof(logBuf), "wal read error, index:%" PRId64 ", err:%d %X, msg:%s, syserr:%d, sysmsg:%s",
|
||||
index, err, err, errStr, sysErr, sysErrStr);
|
||||
if (terrno == TSDB_CODE_WAL_LOG_NOT_EXIST) {
|
||||
// syncNodeEventLog(pData->pSyncNode, logBuf);
|
||||
} else {
|
||||
syncNodeErrorLog(pData->pSyncNode, logBuf);
|
||||
}
|
||||
} while (0);
|
||||
|
@ -372,157 +362,7 @@ static int32_t raftLogGetLastEntry(SSyncLogStore* pLogStore, SSyncRaftEntry** pp
|
|||
return -1;
|
||||
}
|
||||
|
||||
//-------------------------------
|
||||
// log[0 .. n]
|
||||
|
||||
int32_t logStoreAppendEntry(SSyncLogStore* pLogStore, SSyncRaftEntry* pEntry) {
|
||||
SSyncLogStoreData* pData = pLogStore->data;
|
||||
SWal* pWal = pData->pWal;
|
||||
|
||||
SyncIndex index = 0;
|
||||
SWalSyncInfo syncMeta = {0};
|
||||
syncMeta.isWeek = pEntry->isWeak;
|
||||
syncMeta.seqNum = pEntry->seqNum;
|
||||
syncMeta.term = pEntry->term;
|
||||
|
||||
index = walAppendLog(pWal, pEntry->originalRpcType, syncMeta, pEntry->data, pEntry->dataLen);
|
||||
if (index < 0) {
|
||||
int32_t err = terrno;
|
||||
const char* errStr = tstrerror(err);
|
||||
int32_t sysErr = errno;
|
||||
const char* sysErrStr = strerror(errno);
|
||||
|
||||
char logBuf[128];
|
||||
snprintf(logBuf, sizeof(logBuf), "wal write error, index:%" PRId64 ", err:%d %X, msg:%s, syserr:%d, sysmsg:%s",
|
||||
pEntry->index, err, err, errStr, sysErr, sysErrStr);
|
||||
syncNodeErrorLog(pData->pSyncNode, logBuf);
|
||||
|
||||
ASSERT(0);
|
||||
return -1;
|
||||
}
|
||||
pEntry->index = index;
|
||||
|
||||
do {
|
||||
char eventLog[128];
|
||||
snprintf(eventLog, sizeof(eventLog), "write2 index:%" PRId64 ", type:%s, origin type:%s", pEntry->index,
|
||||
TMSG_INFO(pEntry->msgType), TMSG_INFO(pEntry->originalRpcType));
|
||||
syncNodeEventLog(pData->pSyncNode, eventLog);
|
||||
} while (0);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
SSyncRaftEntry* logStoreGetEntryWithoutLock(SSyncLogStore* pLogStore, SyncIndex index) {
|
||||
SSyncLogStoreData* pData = pLogStore->data;
|
||||
SWal* pWal = pData->pWal;
|
||||
|
||||
if (index >= SYNC_INDEX_BEGIN && index <= logStoreLastIndex(pLogStore)) {
|
||||
// SWalReadHandle* pWalHandle = walOpenReadHandle(pWal);
|
||||
SWalReader* pWalHandle = pData->pWalHandle;
|
||||
ASSERT(pWalHandle != NULL);
|
||||
|
||||
int32_t code = walReadVer(pWalHandle, index);
|
||||
// int32_t code = walReadVerCached(pWalHandle, index);
|
||||
if (code != 0) {
|
||||
int32_t err = terrno;
|
||||
const char* errStr = tstrerror(err);
|
||||
int32_t sysErr = errno;
|
||||
const char* sysErrStr = strerror(errno);
|
||||
|
||||
do {
|
||||
char logBuf[128];
|
||||
snprintf(logBuf, sizeof(logBuf), "wal read error, index:%" PRId64 ", err:%d %X, msg:%s, syserr:%d, sysmsg:%s",
|
||||
index, err, err, errStr, sysErr, sysErrStr);
|
||||
if (terrno == TSDB_CODE_WAL_LOG_NOT_EXIST) {
|
||||
// syncNodeEventLog(pData->pSyncNode, logBuf);
|
||||
} else {
|
||||
syncNodeErrorLog(pData->pSyncNode, logBuf);
|
||||
}
|
||||
} while (0);
|
||||
|
||||
sError("failed to read ver since %s. index:%" PRId64 "", tstrerror(terrno), index);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
SSyncRaftEntry* pEntry = syncEntryBuild(pWalHandle->pHead->head.bodyLen);
|
||||
ASSERT(pEntry != NULL);
|
||||
|
||||
pEntry->msgType = TDMT_SYNC_CLIENT_REQUEST;
|
||||
pEntry->originalRpcType = pWalHandle->pHead->head.msgType;
|
||||
pEntry->seqNum = pWalHandle->pHead->head.syncMeta.seqNum;
|
||||
pEntry->isWeak = pWalHandle->pHead->head.syncMeta.isWeek;
|
||||
pEntry->term = pWalHandle->pHead->head.syncMeta.term;
|
||||
pEntry->index = index;
|
||||
ASSERT(pEntry->dataLen == pWalHandle->pHead->head.bodyLen);
|
||||
memcpy(pEntry->data, pWalHandle->pHead->head.body, pWalHandle->pHead->head.bodyLen);
|
||||
|
||||
/*
|
||||
int32_t saveErr = terrno;
|
||||
walCloseReadHandle(pWalHandle);
|
||||
terrno = saveErr;
|
||||
*/
|
||||
|
||||
return pEntry;
|
||||
|
||||
} else {
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
SSyncRaftEntry* logStoreGetEntry(SSyncLogStore* pLogStore, SyncIndex index) {
|
||||
SSyncLogStoreData* pData = pLogStore->data;
|
||||
SSyncRaftEntry *pEntry = NULL;
|
||||
|
||||
taosThreadMutexLock(&pData->mutex);
|
||||
pEntry = logStoreGetEntryWithoutLock(pLogStore, index);
|
||||
taosThreadMutexUnlock(&pData->mutex);
|
||||
return pEntry;
|
||||
}
|
||||
|
||||
int32_t logStoreTruncate(SSyncLogStore* pLogStore, SyncIndex fromIndex) {
|
||||
SSyncLogStoreData* pData = pLogStore->data;
|
||||
SWal* pWal = pData->pWal;
|
||||
// ASSERT(walRollback(pWal, fromIndex) == 0);
|
||||
int32_t code = walRollback(pWal, fromIndex);
|
||||
if (code != 0) {
|
||||
int32_t err = terrno;
|
||||
const char* errStr = tstrerror(err);
|
||||
int32_t sysErr = errno;
|
||||
const char* sysErrStr = strerror(errno);
|
||||
sError("vgId:%d, wal truncate error, from-index:%" PRId64 ", err:%d %X, msg:%s, syserr:%d, sysmsg:%s",
|
||||
pData->pSyncNode->vgId, fromIndex, err, err, errStr, sysErr, sysErrStr);
|
||||
|
||||
ASSERT(0);
|
||||
}
|
||||
|
||||
// event log
|
||||
do {
|
||||
char logBuf[128];
|
||||
snprintf(logBuf, sizeof(logBuf), "wal truncate, from-index:%" PRId64, fromIndex);
|
||||
syncNodeEventLog(pData->pSyncNode, logBuf);
|
||||
} while (0);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
SyncIndex logStoreLastIndex(SSyncLogStore* pLogStore) {
|
||||
SSyncLogStoreData* pData = pLogStore->data;
|
||||
SWal* pWal = pData->pWal;
|
||||
SyncIndex lastIndex = walGetLastVer(pWal);
|
||||
return lastIndex;
|
||||
}
|
||||
|
||||
SyncTerm logStoreLastTerm(SSyncLogStore* pLogStore) {
|
||||
SyncTerm lastTerm = 0;
|
||||
SSyncRaftEntry* pLastEntry = logStoreGetLastEntry(pLogStore);
|
||||
if (pLastEntry != NULL) {
|
||||
lastTerm = pLastEntry->term;
|
||||
taosMemoryFree(pLastEntry);
|
||||
}
|
||||
return lastTerm;
|
||||
}
|
||||
|
||||
int32_t logStoreUpdateCommitIndex(SSyncLogStore* pLogStore, SyncIndex index) {
|
||||
int32_t raftLogUpdateCommitIndex(SSyncLogStore* pLogStore, SyncIndex index) {
|
||||
SSyncLogStoreData* pData = pLogStore->data;
|
||||
SWal* pWal = pData->pWal;
|
||||
// ASSERT(walCommit(pWal, index) == 0);
|
||||
|
@ -540,23 +380,11 @@ int32_t logStoreUpdateCommitIndex(SSyncLogStore* pLogStore, SyncIndex index) {
|
|||
return 0;
|
||||
}
|
||||
|
||||
SyncIndex logStoreGetCommitIndex(SSyncLogStore* pLogStore) {
|
||||
SyncIndex raftlogCommitIndex(SSyncLogStore* pLogStore) {
|
||||
SSyncLogStoreData* pData = pLogStore->data;
|
||||
return pData->pSyncNode->commitIndex;
|
||||
}
|
||||
|
||||
SSyncRaftEntry* logStoreGetLastEntry(SSyncLogStore* pLogStore) {
|
||||
SSyncLogStoreData* pData = pLogStore->data;
|
||||
SWal* pWal = pData->pWal;
|
||||
SyncIndex lastIndex = walGetLastVer(pWal);
|
||||
|
||||
SSyncRaftEntry* pEntry = NULL;
|
||||
if (lastIndex > 0) {
|
||||
pEntry = logStoreGetEntry(pLogStore, lastIndex);
|
||||
}
|
||||
return pEntry;
|
||||
}
|
||||
|
||||
cJSON* logStore2Json(SSyncLogStore* pLogStore) {
|
||||
char u64buf[128] = {0};
|
||||
SSyncLogStoreData* pData = (SSyncLogStoreData*)pLogStore->data;
|
||||
|
@ -595,7 +423,9 @@ cJSON* logStore2Json(SSyncLogStore* pLogStore) {
|
|||
|
||||
if (!raftLogIsEmpty(pLogStore)) {
|
||||
for (SyncIndex i = beginIndex; i <= endIndex; ++i) {
|
||||
SSyncRaftEntry* pEntry = logStoreGetEntry(pLogStore, i);
|
||||
SSyncRaftEntry* pEntry = NULL;
|
||||
raftLogGetEntry(pLogStore, i, &pEntry);
|
||||
|
||||
cJSON_AddItemToArray(pEntries, syncEntry2Json(pEntry));
|
||||
syncEntryDestory(pEntry);
|
||||
}
|
||||
|
@ -675,14 +505,14 @@ SyncIndex logStoreWalCommitVer(SSyncLogStore* pLogStore) {
|
|||
// for debug -----------------
|
||||
void logStorePrint(SSyncLogStore* pLogStore) {
|
||||
char* serialized = logStore2Str(pLogStore);
|
||||
printf("logStorePrint | len:%lu | %s \n", strlen(serialized), serialized);
|
||||
printf("logStorePrint | len:%" PRIu64 " | %s \n", strlen(serialized), serialized);
|
||||
fflush(NULL);
|
||||
taosMemoryFree(serialized);
|
||||
}
|
||||
|
||||
void logStorePrint2(char* s, SSyncLogStore* pLogStore) {
|
||||
char* serialized = logStore2Str(pLogStore);
|
||||
printf("logStorePrint2 | len:%lu | %s | %s \n", strlen(serialized), s, serialized);
|
||||
printf("logStorePrint2 | len:%" PRIu64 " | %s | %s \n", strlen(serialized), s, serialized);
|
||||
fflush(NULL);
|
||||
taosMemoryFree(serialized);
|
||||
}
|
||||
|
@ -690,7 +520,7 @@ void logStorePrint2(char* s, SSyncLogStore* pLogStore) {
|
|||
void logStoreLog(SSyncLogStore* pLogStore) {
|
||||
if (gRaftDetailLog) {
|
||||
char* serialized = logStore2Str(pLogStore);
|
||||
sTraceLong("logStoreLog | len:%lu | %s", strlen(serialized), serialized);
|
||||
sTraceLong("logStoreLog | len:%" PRIu64 " | %s", strlen(serialized), serialized);
|
||||
taosMemoryFree(serialized);
|
||||
}
|
||||
}
|
||||
|
@ -698,7 +528,7 @@ void logStoreLog(SSyncLogStore* pLogStore) {
|
|||
void logStoreLog2(char* s, SSyncLogStore* pLogStore) {
|
||||
if (gRaftDetailLog) {
|
||||
char* serialized = logStore2Str(pLogStore);
|
||||
sTraceLong("logStoreLog2 | len:%lu | %s | %s", strlen(serialized), s, serialized);
|
||||
sTraceLong("logStoreLog2 | len:%" PRIu64 " | %s | %s", strlen(serialized), s, serialized);
|
||||
taosMemoryFree(serialized);
|
||||
}
|
||||
}
|
||||
|
@ -706,28 +536,28 @@ void logStoreLog2(char* s, SSyncLogStore* pLogStore) {
|
|||
// for debug -----------------
|
||||
void logStoreSimplePrint(SSyncLogStore* pLogStore) {
|
||||
char* serialized = logStoreSimple2Str(pLogStore);
|
||||
printf("logStoreSimplePrint | len:%lu | %s \n", strlen(serialized), serialized);
|
||||
printf("logStoreSimplePrint | len:%" PRIu64 " | %s \n", strlen(serialized), serialized);
|
||||
fflush(NULL);
|
||||
taosMemoryFree(serialized);
|
||||
}
|
||||
|
||||
void logStoreSimplePrint2(char* s, SSyncLogStore* pLogStore) {
|
||||
char* serialized = logStoreSimple2Str(pLogStore);
|
||||
printf("logStoreSimplePrint2 | len:%lu | %s | %s \n", strlen(serialized), s, serialized);
|
||||
printf("logStoreSimplePrint2 | len:%" PRIu64 " | %s | %s \n", strlen(serialized), s, serialized);
|
||||
fflush(NULL);
|
||||
taosMemoryFree(serialized);
|
||||
}
|
||||
|
||||
void logStoreSimpleLog(SSyncLogStore* pLogStore) {
|
||||
char* serialized = logStoreSimple2Str(pLogStore);
|
||||
sTrace("logStoreSimpleLog | len:%lu | %s", strlen(serialized), serialized);
|
||||
sTrace("logStoreSimpleLog | len:%" PRIu64 " | %s", strlen(serialized), serialized);
|
||||
taosMemoryFree(serialized);
|
||||
}
|
||||
|
||||
void logStoreSimpleLog2(char* s, SSyncLogStore* pLogStore) {
|
||||
if (gRaftDetailLog) {
|
||||
char* serialized = logStoreSimple2Str(pLogStore);
|
||||
sTrace("logStoreSimpleLog2 | len:%lu | %s | %s", strlen(serialized), s, serialized);
|
||||
sTrace("logStoreSimpleLog2 | len:%" PRIu64 " | %s | %s", strlen(serialized), s, serialized);
|
||||
taosMemoryFree(serialized);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -47,325 +47,29 @@
|
|||
// msource |-> i,
|
||||
// mdest |-> j])
|
||||
// /\ UNCHANGED <<serverVars, candidateVars, leaderVars, logVars>>
|
||||
//
|
||||
int32_t syncNodeAppendEntriesPeers(SSyncNode* pSyncNode) {
|
||||
ASSERT(pSyncNode->state == TAOS_SYNC_STATE_LEADER);
|
||||
|
||||
syncIndexMgrLog2("==syncNodeAppendEntriesPeers== pNextIndex", pSyncNode->pNextIndex);
|
||||
syncIndexMgrLog2("==syncNodeAppendEntriesPeers== pMatchIndex", pSyncNode->pMatchIndex);
|
||||
logStoreSimpleLog2("==syncNodeAppendEntriesPeers==", pSyncNode->pLogStore);
|
||||
|
||||
int32_t ret = 0;
|
||||
for (int i = 0; i < pSyncNode->peersNum; ++i) {
|
||||
SRaftId* pDestId = &(pSyncNode->peersId[i]);
|
||||
|
||||
// set prevLogIndex
|
||||
int32_t syncNodeReplicateOne(SSyncNode* pSyncNode, SRaftId* pDestId) {
|
||||
// next index
|
||||
SyncIndex nextIndex = syncIndexMgrGetIndex(pSyncNode->pNextIndex, pDestId);
|
||||
|
||||
SyncIndex preLogIndex = nextIndex - 1;
|
||||
// maybe start snapshot
|
||||
SyncIndex logStartIndex = pSyncNode->pLogStore->syncLogBeginIndex(pSyncNode->pLogStore);
|
||||
SyncIndex logEndIndex = pSyncNode->pLogStore->syncLogEndIndex(pSyncNode->pLogStore);
|
||||
if (nextIndex < logStartIndex || nextIndex - 1 > logEndIndex) {
|
||||
char logBuf[128];
|
||||
snprintf(logBuf, sizeof(logBuf), "start snapshot for next-index:%ld, start:%ld, end:%ld", nextIndex, logStartIndex,
|
||||
logEndIndex);
|
||||
syncNodeEventLog(pSyncNode, logBuf);
|
||||
|
||||
// set preLogTerm
|
||||
SyncTerm preLogTerm = 0;
|
||||
if (preLogIndex >= SYNC_INDEX_BEGIN) {
|
||||
SSyncRaftEntry* pPreEntry = pSyncNode->pLogStore->getEntry(pSyncNode->pLogStore, preLogIndex);
|
||||
ASSERT(pPreEntry != NULL);
|
||||
|
||||
preLogTerm = pPreEntry->term;
|
||||
syncEntryDestory(pPreEntry);
|
||||
// start snapshot
|
||||
int32_t code = syncNodeStartSnapshot(pSyncNode, pDestId);
|
||||
ASSERT(code == 0);
|
||||
return 0;
|
||||
}
|
||||
|
||||
// batch optimized
|
||||
// SyncIndex lastIndex = syncUtilMinIndex(pSyncNode->pLogStore->getLastIndex(pSyncNode->pLogStore), nextIndex);
|
||||
|
||||
SyncAppendEntries* pMsg = NULL;
|
||||
SSyncRaftEntry* pEntry = pSyncNode->pLogStore->getEntry(pSyncNode->pLogStore, nextIndex);
|
||||
if (pEntry != NULL) {
|
||||
pMsg = syncAppendEntriesBuild(pEntry->bytes, pSyncNode->vgId);
|
||||
ASSERT(pMsg != NULL);
|
||||
|
||||
// add pEntry into msg
|
||||
uint32_t len;
|
||||
char* serialized = syncEntrySerialize(pEntry, &len);
|
||||
ASSERT(len == pEntry->bytes);
|
||||
memcpy(pMsg->data, serialized, len);
|
||||
|
||||
taosMemoryFree(serialized);
|
||||
syncEntryDestory(pEntry);
|
||||
|
||||
} else {
|
||||
// maybe overflow, send empty record
|
||||
pMsg = syncAppendEntriesBuild(0, pSyncNode->vgId);
|
||||
ASSERT(pMsg != NULL);
|
||||
}
|
||||
|
||||
ASSERT(pMsg != NULL);
|
||||
pMsg->srcId = pSyncNode->myRaftId;
|
||||
pMsg->destId = *pDestId;
|
||||
pMsg->term = pSyncNode->pRaftStore->currentTerm;
|
||||
pMsg->prevLogIndex = preLogIndex;
|
||||
pMsg->prevLogTerm = preLogTerm;
|
||||
pMsg->commitIndex = pSyncNode->commitIndex;
|
||||
|
||||
syncAppendEntriesLog2("==syncNodeAppendEntriesPeers==", pMsg);
|
||||
|
||||
// send AppendEntries
|
||||
syncNodeAppendEntries(pSyncNode, pDestId, pMsg);
|
||||
syncAppendEntriesDestroy(pMsg);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int32_t syncNodeAppendEntriesOnePeer(SSyncNode* pSyncNode, SRaftId* pDestId, SyncIndex nextIndex) {
|
||||
int32_t ret = 0;
|
||||
|
||||
// pre index, pre term
|
||||
SyncIndex preLogIndex = syncNodeGetPreIndex(pSyncNode, nextIndex);
|
||||
SyncTerm preLogTerm = syncNodeGetPreTerm(pSyncNode, nextIndex);
|
||||
if (preLogTerm == SYNC_TERM_INVALID) {
|
||||
SyncIndex newNextIndex = syncNodeGetLastIndex(pSyncNode) + 1;
|
||||
// SyncIndex newNextIndex = nextIndex + 1;
|
||||
|
||||
syncIndexMgrSetIndex(pSyncNode->pNextIndex, pDestId, newNextIndex);
|
||||
syncIndexMgrSetIndex(pSyncNode->pMatchIndex, pDestId, SYNC_INDEX_INVALID);
|
||||
sError("vgId:%d, sync get pre term error, nextIndex:%" PRId64 ", update next-index:%" PRId64
|
||||
", match-index:%d, raftid:%" PRId64,
|
||||
pSyncNode->vgId, nextIndex, newNextIndex, SYNC_INDEX_INVALID, pDestId->addr);
|
||||
return -1;
|
||||
}
|
||||
|
||||
// entry pointer array
|
||||
SSyncRaftEntry* entryPArr[SYNC_MAX_BATCH_SIZE];
|
||||
memset(entryPArr, 0, sizeof(entryPArr));
|
||||
|
||||
// get entry batch
|
||||
int32_t getCount = 0;
|
||||
SyncIndex getEntryIndex = nextIndex;
|
||||
for (int32_t i = 0; i < pSyncNode->pRaftCfg->batchSize; ++i) {
|
||||
SSyncRaftEntry* pEntry = NULL;
|
||||
int32_t code = pSyncNode->pLogStore->syncLogGetEntry(pSyncNode->pLogStore, getEntryIndex, &pEntry);
|
||||
if (code == 0) {
|
||||
ASSERT(pEntry != NULL);
|
||||
entryPArr[i] = pEntry;
|
||||
getCount++;
|
||||
getEntryIndex++;
|
||||
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// event log
|
||||
do {
|
||||
char logBuf[128];
|
||||
char host[64];
|
||||
uint16_t port;
|
||||
syncUtilU642Addr(pDestId->addr, host, sizeof(host), &port);
|
||||
snprintf(logBuf, sizeof(logBuf), "build batch:%d for %s:%d", getCount, host, port);
|
||||
syncNodeEventLog(pSyncNode, logBuf);
|
||||
} while (0);
|
||||
|
||||
// build msg
|
||||
SyncAppendEntriesBatch* pMsg = syncAppendEntriesBatchBuild(entryPArr, getCount, pSyncNode->vgId);
|
||||
ASSERT(pMsg != NULL);
|
||||
|
||||
// free entries
|
||||
for (int32_t i = 0; i < pSyncNode->pRaftCfg->batchSize; ++i) {
|
||||
SSyncRaftEntry* pEntry = entryPArr[i];
|
||||
if (pEntry != NULL) {
|
||||
syncEntryDestory(pEntry);
|
||||
entryPArr[i] = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
// prepare msg
|
||||
pMsg->srcId = pSyncNode->myRaftId;
|
||||
pMsg->destId = *pDestId;
|
||||
pMsg->term = pSyncNode->pRaftStore->currentTerm;
|
||||
pMsg->prevLogIndex = preLogIndex;
|
||||
pMsg->prevLogTerm = preLogTerm;
|
||||
pMsg->commitIndex = pSyncNode->commitIndex;
|
||||
pMsg->privateTerm = 0;
|
||||
pMsg->dataCount = getCount;
|
||||
|
||||
// send msg
|
||||
syncNodeAppendEntriesBatch(pSyncNode, pDestId, pMsg);
|
||||
|
||||
// speed up
|
||||
if (pMsg->dataCount > 0 && pSyncNode->commitIndex - pMsg->prevLogIndex > SYNC_SLOW_DOWN_RANGE) {
|
||||
ret = 1;
|
||||
|
||||
#if 0
|
||||
do {
|
||||
char logBuf[128];
|
||||
char host[64];
|
||||
uint16_t port;
|
||||
syncUtilU642Addr(pDestId->addr, host, sizeof(host), &port);
|
||||
snprintf(logBuf, sizeof(logBuf), "maybe speed up for %s:%d, pre-index:%ld", host, port, pMsg->prevLogIndex);
|
||||
syncNodeEventLog(pSyncNode, logBuf);
|
||||
} while (0);
|
||||
#endif
|
||||
}
|
||||
|
||||
syncAppendEntriesBatchDestroy(pMsg);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int32_t syncNodeAppendEntriesPeersSnapshot2(SSyncNode* pSyncNode) {
|
||||
if (pSyncNode->state != TAOS_SYNC_STATE_LEADER) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
int32_t ret = 0;
|
||||
for (int i = 0; i < pSyncNode->peersNum; ++i) {
|
||||
SRaftId* pDestId = &(pSyncNode->peersId[i]);
|
||||
|
||||
// next index
|
||||
SyncIndex nextIndex = syncIndexMgrGetIndex(pSyncNode->pNextIndex, pDestId);
|
||||
ret = syncNodeAppendEntriesOnePeer(pSyncNode, pDestId, nextIndex);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
#if 0
|
||||
int32_t syncNodeAppendEntriesPeersSnapshot2(SSyncNode* pSyncNode) {
|
||||
if (pSyncNode->state != TAOS_SYNC_STATE_LEADER) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
int32_t ret = 0;
|
||||
for (int i = 0; i < pSyncNode->peersNum; ++i) {
|
||||
SRaftId* pDestId = &(pSyncNode->peersId[i]);
|
||||
|
||||
// next index
|
||||
SyncIndex nextIndex = syncIndexMgrGetIndex(pSyncNode->pNextIndex, pDestId);
|
||||
|
||||
// pre index, pre term
|
||||
SyncIndex preLogIndex = syncNodeGetPreIndex(pSyncNode, nextIndex);
|
||||
SyncTerm preLogTerm = syncNodeGetPreTerm(pSyncNode, nextIndex);
|
||||
if (preLogTerm == SYNC_TERM_INVALID) {
|
||||
SyncIndex newNextIndex = syncNodeGetLastIndex(pSyncNode) + 1;
|
||||
// SyncIndex newNextIndex = nextIndex + 1;
|
||||
|
||||
syncIndexMgrSetIndex(pSyncNode->pNextIndex, pDestId, newNextIndex);
|
||||
syncIndexMgrSetIndex(pSyncNode->pMatchIndex, pDestId, SYNC_INDEX_INVALID);
|
||||
sError("vgId:%d, sync get pre term error, nextIndex:%" PRId64 ", update next-index:%" PRId64
|
||||
", match-index:%d, raftid:%" PRId64,
|
||||
pSyncNode->vgId, nextIndex, newNextIndex, SYNC_INDEX_INVALID, pDestId->addr);
|
||||
return -1;
|
||||
}
|
||||
|
||||
// entry pointer array
|
||||
SSyncRaftEntry* entryPArr[SYNC_MAX_BATCH_SIZE];
|
||||
memset(entryPArr, 0, sizeof(entryPArr));
|
||||
|
||||
// get entry batch
|
||||
int32_t getCount = 0;
|
||||
SyncIndex getEntryIndex = nextIndex;
|
||||
for (int32_t i = 0; i < pSyncNode->pRaftCfg->batchSize; ++i) {
|
||||
SSyncRaftEntry* pEntry = NULL;
|
||||
int32_t code = pSyncNode->pLogStore->syncLogGetEntry(pSyncNode->pLogStore, getEntryIndex, &pEntry);
|
||||
if (code == 0) {
|
||||
ASSERT(pEntry != NULL);
|
||||
entryPArr[i] = pEntry;
|
||||
getCount++;
|
||||
getEntryIndex++;
|
||||
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// event log
|
||||
do {
|
||||
char logBuf[128];
|
||||
char host[64];
|
||||
uint16_t port;
|
||||
syncUtilU642Addr(pDestId->addr, host, sizeof(host), &port);
|
||||
snprintf(logBuf, sizeof(logBuf), "build batch:%d for %s:%d", getCount, host, port);
|
||||
syncNodeEventLog(pSyncNode, logBuf);
|
||||
} while (0);
|
||||
|
||||
// build msg
|
||||
SyncAppendEntriesBatch* pMsg = syncAppendEntriesBatchBuild(entryPArr, getCount, pSyncNode->vgId);
|
||||
ASSERT(pMsg != NULL);
|
||||
|
||||
// free entries
|
||||
for (int32_t i = 0; i < pSyncNode->pRaftCfg->batchSize; ++i) {
|
||||
SSyncRaftEntry* pEntry = entryPArr[i];
|
||||
if (pEntry != NULL) {
|
||||
syncEntryDestory(pEntry);
|
||||
entryPArr[i] = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
// prepare msg
|
||||
pMsg->srcId = pSyncNode->myRaftId;
|
||||
pMsg->destId = *pDestId;
|
||||
pMsg->term = pSyncNode->pRaftStore->currentTerm;
|
||||
pMsg->prevLogIndex = preLogIndex;
|
||||
pMsg->prevLogTerm = preLogTerm;
|
||||
pMsg->commitIndex = pSyncNode->commitIndex;
|
||||
pMsg->privateTerm = 0;
|
||||
pMsg->dataCount = getCount;
|
||||
|
||||
// send msg
|
||||
syncNodeAppendEntriesBatch(pSyncNode, pDestId, pMsg);
|
||||
|
||||
// speed up
|
||||
if (pMsg->dataCount > 0 && pSyncNode->commitIndex - pMsg->prevLogIndex > SYNC_SLOW_DOWN_RANGE) {
|
||||
ret = 1;
|
||||
|
||||
#if 0
|
||||
do {
|
||||
char logBuf[128];
|
||||
char host[64];
|
||||
uint16_t port;
|
||||
syncUtilU642Addr(pDestId->addr, host, sizeof(host), &port);
|
||||
snprintf(logBuf, sizeof(logBuf), "maybe speed up for %s:%d, pre-index:%ld", host, port, pMsg->prevLogIndex);
|
||||
syncNodeEventLog(pSyncNode, logBuf);
|
||||
} while (0);
|
||||
#endif
|
||||
}
|
||||
|
||||
syncAppendEntriesBatchDestroy(pMsg);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
#endif
|
||||
|
||||
int32_t syncNodeAppendEntriesPeersSnapshot(SSyncNode* pSyncNode) {
|
||||
ASSERT(pSyncNode->state == TAOS_SYNC_STATE_LEADER);
|
||||
|
||||
syncIndexMgrLog2("begin append entries peers pNextIndex:", pSyncNode->pNextIndex);
|
||||
syncIndexMgrLog2("begin append entries peers pMatchIndex:", pSyncNode->pMatchIndex);
|
||||
logStoreSimpleLog2("begin append entries peers LogStore:", pSyncNode->pLogStore);
|
||||
|
||||
int32_t ret = 0;
|
||||
for (int i = 0; i < pSyncNode->peersNum; ++i) {
|
||||
SRaftId* pDestId = &(pSyncNode->peersId[i]);
|
||||
|
||||
// next index
|
||||
SyncIndex nextIndex = syncIndexMgrGetIndex(pSyncNode->pNextIndex, pDestId);
|
||||
|
||||
// pre index, pre term
|
||||
SyncIndex preLogIndex = syncNodeGetPreIndex(pSyncNode, nextIndex);
|
||||
SyncTerm preLogTerm = syncNodeGetPreTerm(pSyncNode, nextIndex);
|
||||
if (preLogTerm == SYNC_TERM_INVALID) {
|
||||
SyncIndex newNextIndex = syncNodeGetLastIndex(pSyncNode) + 1;
|
||||
// SyncIndex newNextIndex = nextIndex + 1;
|
||||
|
||||
syncIndexMgrSetIndex(pSyncNode->pNextIndex, pDestId, newNextIndex);
|
||||
syncIndexMgrSetIndex(pSyncNode->pMatchIndex, pDestId, SYNC_INDEX_INVALID);
|
||||
sError("vgId:%d, sync get pre term error, nextIndex:%" PRId64 ", update next-index:%" PRId64
|
||||
", match-index:%d, raftid:%" PRId64,
|
||||
pSyncNode->vgId, nextIndex, newNextIndex, SYNC_INDEX_INVALID, pDestId->addr);
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
// prepare entry
|
||||
SyncAppendEntries* pMsg = NULL;
|
||||
|
@ -395,8 +99,18 @@ int32_t syncNodeAppendEntriesPeersSnapshot(SSyncNode* pSyncNode) {
|
|||
ASSERT(pMsg != NULL);
|
||||
|
||||
} else {
|
||||
syncNodeLog3("", pSyncNode);
|
||||
ASSERT(0);
|
||||
do {
|
||||
char host[64];
|
||||
uint16_t port;
|
||||
syncUtilU642Addr(pDestId->addr, host, sizeof(host), &port);
|
||||
|
||||
char logBuf[128];
|
||||
snprintf(logBuf, sizeof(logBuf), "replicate to %s:%d error, next-index:%ld", host, port, nextIndex);
|
||||
syncNodeErrorLog(pSyncNode, logBuf);
|
||||
} while (0);
|
||||
|
||||
syncAppendEntriesDestroy(pMsg);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -412,67 +126,64 @@ int32_t syncNodeAppendEntriesPeersSnapshot(SSyncNode* pSyncNode) {
|
|||
// pMsg->privateTerm = syncIndexMgrGetTerm(pSyncNode->pNextIndex, pDestId);
|
||||
|
||||
// send msg
|
||||
syncNodeAppendEntries(pSyncNode, pDestId, pMsg);
|
||||
syncNodeMaybeSendAppendEntries(pSyncNode, pDestId, pMsg);
|
||||
syncAppendEntriesDestroy(pMsg);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int32_t syncNodeReplicate(SSyncNode* pSyncNode) {
|
||||
if (pSyncNode->state != TAOS_SYNC_STATE_LEADER) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
syncNodeEventLog(pSyncNode, "do replicate");
|
||||
|
||||
int32_t ret = 0;
|
||||
for (int i = 0; i < pSyncNode->peersNum; ++i) {
|
||||
SRaftId* pDestId = &(pSyncNode->peersId[i]);
|
||||
ret = syncNodeReplicateOne(pSyncNode, pDestId);
|
||||
if (ret != 0) {
|
||||
char host[64];
|
||||
int16_t port;
|
||||
syncUtilU642Addr(pDestId->addr, host, sizeof(host), &port);
|
||||
sError("vgId:%d, do append entries error for %s:%d", pSyncNode->vgId, host, port);
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int32_t syncNodeSendAppendEntries(SSyncNode* pSyncNode, const SRaftId* destRaftId, const SyncAppendEntries* pMsg) {
|
||||
int32_t ret = 0;
|
||||
syncLogSendAppendEntries(pSyncNode, pMsg, "");
|
||||
|
||||
SRpcMsg rpcMsg;
|
||||
syncAppendEntries2RpcMsg(pMsg, &rpcMsg);
|
||||
syncNodeSendMsgById(destRaftId, pSyncNode, &rpcMsg);
|
||||
|
||||
SPeerState* pState = syncNodeGetPeerState(pSyncNode, destRaftId);
|
||||
ASSERT(pState != NULL);
|
||||
|
||||
pState->lastSendIndex = pMsg->prevLogIndex + 1;
|
||||
pState->lastSendTime = taosGetTimestampMs();
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int32_t syncNodeReplicate(SSyncNode* pSyncNode, bool isTimer) {
|
||||
// start replicate
|
||||
int32_t syncNodeMaybeSendAppendEntries(SSyncNode* pSyncNode, const SRaftId* destRaftId, const SyncAppendEntries* pMsg) {
|
||||
int32_t ret = 0;
|
||||
|
||||
switch (pSyncNode->pRaftCfg->snapshotStrategy) {
|
||||
case SYNC_STRATEGY_NO_SNAPSHOT:
|
||||
ret = syncNodeAppendEntriesPeers(pSyncNode);
|
||||
break;
|
||||
|
||||
case SYNC_STRATEGY_STANDARD_SNAPSHOT:
|
||||
ret = syncNodeAppendEntriesPeersSnapshot(pSyncNode);
|
||||
break;
|
||||
|
||||
case SYNC_STRATEGY_WAL_FIRST:
|
||||
ret = syncNodeAppendEntriesPeersSnapshot2(pSyncNode);
|
||||
break;
|
||||
|
||||
default:
|
||||
ret = syncNodeAppendEntriesPeers(pSyncNode);
|
||||
break;
|
||||
}
|
||||
|
||||
// start delay
|
||||
int64_t timeNow = taosGetTimestampMs();
|
||||
int64_t startDelay = timeNow - pSyncNode->startTime;
|
||||
|
||||
// replicate delay
|
||||
int64_t replicateDelay = timeNow - pSyncNode->lastReplicateTime;
|
||||
pSyncNode->lastReplicateTime = timeNow;
|
||||
|
||||
if (ret > 0 && isTimer && startDelay > SYNC_SPEED_UP_AFTER_MS) {
|
||||
// speed up replicate
|
||||
int32_t ms =
|
||||
pSyncNode->heartbeatTimerMS < SYNC_SPEED_UP_HB_TIMER ? pSyncNode->heartbeatTimerMS : SYNC_SPEED_UP_HB_TIMER;
|
||||
syncNodeRestartNowHeartbeatTimerMS(pSyncNode, ms);
|
||||
|
||||
#if 0
|
||||
do {
|
||||
char logBuf[128];
|
||||
snprintf(logBuf, sizeof(logBuf), "replicate speed up");
|
||||
syncNodeEventLog(pSyncNode, logBuf);
|
||||
} while (0);
|
||||
#endif
|
||||
if (syncNodeNeedSendAppendEntries(pSyncNode, destRaftId, pMsg)) {
|
||||
ret = syncNodeSendAppendEntries(pSyncNode, destRaftId, pMsg);
|
||||
|
||||
} else {
|
||||
syncNodeRestartHeartbeatTimer(pSyncNode);
|
||||
|
||||
#if 0
|
||||
do {
|
||||
char logBuf[128];
|
||||
snprintf(logBuf, sizeof(logBuf), "replicate slow down");
|
||||
char host[64];
|
||||
int16_t port;
|
||||
syncUtilU642Addr(destRaftId->addr, host, sizeof(host), &port);
|
||||
|
||||
snprintf(logBuf, sizeof(logBuf), "do not repcate to %s:%d for index:%ld", host, port, pMsg->prevLogIndex + 1);
|
||||
syncNodeEventLog(pSyncNode, logBuf);
|
||||
} while (0);
|
||||
#endif
|
||||
}
|
||||
|
||||
return ret;
|
||||
|
@ -488,12 +199,34 @@ int32_t syncNodeAppendEntries(SSyncNode* pSyncNode, const SRaftId* destRaftId, c
|
|||
return ret;
|
||||
}
|
||||
|
||||
int32_t syncNodeAppendEntriesBatch(SSyncNode* pSyncNode, const SRaftId* destRaftId,
|
||||
const SyncAppendEntriesBatch* pMsg) {
|
||||
syncLogSendAppendEntriesBatch(pSyncNode, pMsg, "");
|
||||
int32_t syncNodeSendHeartbeat(SSyncNode* pSyncNode, const SRaftId* destRaftId, const SyncHeartbeat* pMsg) {
|
||||
int32_t ret = 0;
|
||||
syncLogSendHeartbeat(pSyncNode, pMsg, "");
|
||||
|
||||
SRpcMsg rpcMsg;
|
||||
syncAppendEntriesBatch2RpcMsg(pMsg, &rpcMsg);
|
||||
syncNodeSendMsgById(destRaftId, pSyncNode, &rpcMsg);
|
||||
syncHeartbeat2RpcMsg(pMsg, &rpcMsg);
|
||||
syncNodeSendMsgById(&(pMsg->destId), pSyncNode, &rpcMsg);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int32_t syncNodeHeartbeatPeers(SSyncNode* pSyncNode) {
|
||||
for (int32_t i = 0; i < pSyncNode->peersNum; ++i) {
|
||||
SyncHeartbeat* pSyncMsg = syncHeartbeatBuild(pSyncNode->vgId);
|
||||
pSyncMsg->srcId = pSyncNode->myRaftId;
|
||||
pSyncMsg->destId = pSyncNode->peersId[i];
|
||||
pSyncMsg->term = pSyncNode->pRaftStore->currentTerm;
|
||||
pSyncMsg->commitIndex = pSyncNode->commitIndex;
|
||||
pSyncMsg->minMatchIndex = syncMinMatchIndex(pSyncNode);
|
||||
pSyncMsg->privateTerm = 0;
|
||||
|
||||
SRpcMsg rpcMsg;
|
||||
syncHeartbeat2RpcMsg(pSyncMsg, &rpcMsg);
|
||||
|
||||
// send msg
|
||||
syncNodeSendHeartbeat(pSyncNode, &(pSyncMsg->destId), pSyncMsg);
|
||||
|
||||
syncHeartbeatDestroy(pSyncMsg);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
|
@ -42,65 +42,6 @@
|
|||
// m)
|
||||
// /\ UNCHANGED <<state, currentTerm, candidateVars, leaderVars, logVars>>
|
||||
//
|
||||
int32_t syncNodeOnRequestVoteCb(SSyncNode* ths, SyncRequestVote* pMsg) {
|
||||
int32_t ret = 0;
|
||||
|
||||
// if already drop replica, do not process
|
||||
if (!syncNodeInRaftGroup(ths, &(pMsg->srcId)) && !ths->pRaftCfg->isStandBy) {
|
||||
syncLogRecvRequestVote(ths, pMsg, "maybe replica already dropped");
|
||||
return -1;
|
||||
}
|
||||
|
||||
bool logOK = (pMsg->lastLogTerm > ths->pLogStore->getLastTerm(ths->pLogStore)) ||
|
||||
((pMsg->lastLogTerm == ths->pLogStore->getLastTerm(ths->pLogStore)) &&
|
||||
(pMsg->lastLogIndex >= ths->pLogStore->getLastIndex(ths->pLogStore)));
|
||||
|
||||
// maybe update term
|
||||
if (pMsg->term > ths->pRaftStore->currentTerm) {
|
||||
syncNodeUpdateTerm(ths, pMsg->term);
|
||||
#if 0
|
||||
if (logOK) {
|
||||
syncNodeUpdateTerm(ths, pMsg->term);
|
||||
} else {
|
||||
syncNodeUpdateTermWithoutStepDown(ths, pMsg->term);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
ASSERT(pMsg->term <= ths->pRaftStore->currentTerm);
|
||||
|
||||
bool grant = (pMsg->term == ths->pRaftStore->currentTerm) && logOK &&
|
||||
((!raftStoreHasVoted(ths->pRaftStore)) || (syncUtilSameId(&(ths->pRaftStore->voteFor), &(pMsg->srcId))));
|
||||
if (grant) {
|
||||
// maybe has already voted for pMsg->srcId
|
||||
// vote again, no harm
|
||||
raftStoreVote(ths->pRaftStore, &(pMsg->srcId));
|
||||
|
||||
// forbid elect for this round
|
||||
syncNodeResetElectTimer(ths);
|
||||
}
|
||||
|
||||
// send msg
|
||||
SyncRequestVoteReply* pReply = syncRequestVoteReplyBuild(ths->vgId);
|
||||
pReply->srcId = ths->myRaftId;
|
||||
pReply->destId = pMsg->srcId;
|
||||
pReply->term = ths->pRaftStore->currentTerm;
|
||||
pReply->voteGranted = grant;
|
||||
|
||||
// trace log
|
||||
do {
|
||||
char logBuf[32];
|
||||
snprintf(logBuf, sizeof(logBuf), "grant:%d", pReply->voteGranted);
|
||||
syncLogRecvRequestVote(ths, pMsg, logBuf);
|
||||
syncLogSendRequestVoteReply(ths, pReply, "");
|
||||
} while (0);
|
||||
|
||||
SRpcMsg rpcMsg;
|
||||
syncRequestVoteReply2RpcMsg(pReply, &rpcMsg);
|
||||
syncNodeSendMsgById(&pReply->destId, ths, &rpcMsg);
|
||||
syncRequestVoteReplyDestroy(pReply);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static bool syncNodeOnRequestVoteLogOK(SSyncNode* pSyncNode, SyncRequestVote* pMsg) {
|
||||
SyncTerm myLastTerm = syncNodeGetLastTerm(pSyncNode);
|
||||
|
@ -157,7 +98,7 @@ static bool syncNodeOnRequestVoteLogOK(SSyncNode* pSyncNode, SyncRequestVote* pM
|
|||
return false;
|
||||
}
|
||||
|
||||
int32_t syncNodeOnRequestVoteSnapshotCb(SSyncNode* ths, SyncRequestVote* pMsg) {
|
||||
int32_t syncNodeOnRequestVote(SSyncNode* ths, SyncRequestVote* pMsg) {
|
||||
int32_t ret = 0;
|
||||
|
||||
// if already drop replica, do not process
|
||||
|
@ -170,14 +111,8 @@ int32_t syncNodeOnRequestVoteSnapshotCb(SSyncNode* ths, SyncRequestVote* pMsg) {
|
|||
|
||||
// maybe update term
|
||||
if (pMsg->term > ths->pRaftStore->currentTerm) {
|
||||
syncNodeUpdateTerm(ths, pMsg->term);
|
||||
#if 0
|
||||
if (logOK) {
|
||||
syncNodeUpdateTerm(ths, pMsg->term);
|
||||
} else {
|
||||
syncNodeUpdateTermWithoutStepDown(ths, pMsg->term);
|
||||
}
|
||||
#endif
|
||||
syncNodeStepDown(ths, pMsg->term);
|
||||
// syncNodeUpdateTerm(ths, pMsg->term);
|
||||
}
|
||||
ASSERT(pMsg->term <= ths->pRaftStore->currentTerm);
|
||||
|
||||
|
@ -188,6 +123,9 @@ int32_t syncNodeOnRequestVoteSnapshotCb(SSyncNode* ths, SyncRequestVote* pMsg) {
|
|||
// vote again, no harm
|
||||
raftStoreVote(ths->pRaftStore, &(pMsg->srcId));
|
||||
|
||||
// candidate ?
|
||||
syncNodeStepDown(ths, ths->pRaftStore->currentTerm);
|
||||
|
||||
// forbid elect for this round
|
||||
syncNodeResetElectTimer(ths);
|
||||
}
|
||||
|
|
|
@ -37,63 +37,7 @@
|
|||
// /\ Discard(m)
|
||||
// /\ UNCHANGED <<serverVars, votedFor, leaderVars, logVars>>
|
||||
//
|
||||
int32_t syncNodeOnRequestVoteReplyCb(SSyncNode* ths, SyncRequestVoteReply* pMsg) {
|
||||
int32_t ret = 0;
|
||||
|
||||
// if already drop replica, do not process
|
||||
if (!syncNodeInRaftGroup(ths, &(pMsg->srcId)) && !ths->pRaftCfg->isStandBy) {
|
||||
syncLogRecvRequestVoteReply(ths, pMsg, "maybe replica already dropped");
|
||||
return -1;
|
||||
}
|
||||
|
||||
// drop stale response
|
||||
if (pMsg->term < ths->pRaftStore->currentTerm) {
|
||||
syncLogRecvRequestVoteReply(ths, pMsg, "drop stale response");
|
||||
return -1;
|
||||
}
|
||||
|
||||
// ASSERT(!(pMsg->term > ths->pRaftStore->currentTerm));
|
||||
// no need this code, because if I receive reply.term, then I must have sent for that term.
|
||||
// if (pMsg->term > ths->pRaftStore->currentTerm) {
|
||||
// syncNodeUpdateTerm(ths, pMsg->term);
|
||||
// }
|
||||
|
||||
if (pMsg->term > ths->pRaftStore->currentTerm) {
|
||||
syncLogRecvRequestVoteReply(ths, pMsg, "error term");
|
||||
return -1;
|
||||
}
|
||||
|
||||
syncLogRecvRequestVoteReply(ths, pMsg, "");
|
||||
ASSERT(pMsg->term == ths->pRaftStore->currentTerm);
|
||||
|
||||
// This tallies votes even when the current state is not Candidate,
|
||||
// but they won't be looked at, so it doesn't matter.
|
||||
if (ths->state == TAOS_SYNC_STATE_CANDIDATE) {
|
||||
votesRespondAdd(ths->pVotesRespond, pMsg);
|
||||
if (pMsg->voteGranted) {
|
||||
// add vote
|
||||
voteGrantedVote(ths->pVotesGranted, pMsg);
|
||||
|
||||
// maybe to leader
|
||||
if (voteGrantedMajority(ths->pVotesGranted)) {
|
||||
if (!ths->pVotesGranted->toLeader) {
|
||||
syncNodeCandidate2Leader(ths);
|
||||
|
||||
// prevent to leader again!
|
||||
ths->pVotesGranted->toLeader = true;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
;
|
||||
// do nothing
|
||||
// UNCHANGED <<votesGranted, voterLog>>
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int32_t syncNodeOnRequestVoteReplySnapshotCb(SSyncNode* ths, SyncRequestVoteReply* pMsg) {
|
||||
int32_t syncNodeOnRequestVoteReply(SSyncNode* ths, SyncRequestVoteReply* pMsg) {
|
||||
int32_t ret = 0;
|
||||
|
||||
// if already drop replica, do not process
|
||||
|
@ -116,6 +60,7 @@ int32_t syncNodeOnRequestVoteReplySnapshotCb(SSyncNode* ths, SyncRequestVoteRepl
|
|||
|
||||
if (pMsg->term > ths->pRaftStore->currentTerm) {
|
||||
syncLogRecvRequestVoteReply(ths, pMsg, "error term");
|
||||
syncNodeStepDown(ths, pMsg->term);
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
|
|
@ -136,7 +136,7 @@ void syncRespCleanByTTL(SSyncRespMgr *pObj, int64_t ttl, bool rsp) {
|
|||
|
||||
while (pStub) {
|
||||
size_t len;
|
||||
void *key = taosHashGetKey(pStub, &len);
|
||||
void * key = taosHashGetKey(pStub, &len);
|
||||
uint64_t *pSeqNum = (uint64_t *)key;
|
||||
sum++;
|
||||
|
||||
|
|
|
@ -41,6 +41,8 @@ SSyncSnapshotSender *snapshotSenderCreate(SSyncNode *pSyncNode, int32_t replicaI
|
|||
}
|
||||
memset(pSender, 0, sizeof(*pSender));
|
||||
|
||||
int64_t timeNow = taosGetTimestampMs();
|
||||
|
||||
pSender->start = false;
|
||||
pSender->seq = SYNC_SNAPSHOT_SEQ_INVALID;
|
||||
pSender->ack = SYNC_SNAPSHOT_SEQ_INVALID;
|
||||
|
@ -51,7 +53,8 @@ SSyncSnapshotSender *snapshotSenderCreate(SSyncNode *pSyncNode, int32_t replicaI
|
|||
pSender->pSyncNode = pSyncNode;
|
||||
pSender->replicaIndex = replicaIndex;
|
||||
pSender->term = pSyncNode->pRaftStore->currentTerm;
|
||||
pSender->privateTerm = taosGetTimestampMs() + 100;
|
||||
pSender->privateTerm = timeNow + 100;
|
||||
pSender->startTime = timeNow;
|
||||
pSender->pSyncNode->pFsm->FpGetSnapshotInfo(pSender->pSyncNode->pFsm, &(pSender->snapshot));
|
||||
pSender->finish = false;
|
||||
} else {
|
||||
|
@ -402,6 +405,24 @@ char *snapshotSender2SimpleStr(SSyncSnapshotSender *pSender, char *event) {
|
|||
return s;
|
||||
}
|
||||
|
||||
int32_t syncNodeStartSnapshot(SSyncNode *pSyncNode, SRaftId *pDestId) {
|
||||
// calculate <start, end> index
|
||||
|
||||
syncNodeEventLog(pSyncNode, "start snapshot ...");
|
||||
|
||||
SSyncSnapshotSender *pSender = syncNodeGetSnapshotSender(pSyncNode, pDestId);
|
||||
if (pSender == NULL) {
|
||||
// create sender
|
||||
} else {
|
||||
// if <start, end> is same
|
||||
// return 0;
|
||||
}
|
||||
|
||||
// send begin msg
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// -------------------------------------
|
||||
SSyncSnapshotReceiver *snapshotReceiverCreate(SSyncNode *pSyncNode, SRaftId fromId) {
|
||||
bool condition = (pSyncNode->pFsm->FpSnapshotStartWrite != NULL) && (pSyncNode->pFsm->FpSnapshotStopWrite != NULL) &&
|
||||
|
@ -721,7 +742,7 @@ char *snapshotReceiver2SimpleStr(SSyncSnapshotReceiver *pReceiver, char *event)
|
|||
// condition 3, recv SYNC_SNAPSHOT_SEQ_FORCE_CLOSE, force close
|
||||
// condition 4, got data, update ack
|
||||
//
|
||||
int32_t syncNodeOnSnapshotSendCb(SSyncNode *pSyncNode, SyncSnapshotSend *pMsg) {
|
||||
int32_t syncNodeOnSnapshot(SSyncNode *pSyncNode, SyncSnapshotSend *pMsg) {
|
||||
// get receiver
|
||||
SSyncSnapshotReceiver *pReceiver = pSyncNode->pNewNodeReceiver;
|
||||
bool needRsp = false;
|
||||
|
@ -834,7 +855,7 @@ int32_t syncNodeOnSnapshotSendCb(SSyncNode *pSyncNode, SyncSnapshotSend *pMsg) {
|
|||
// condition 2 sender receives ack, set seq = ack + 1, send msg from seq
|
||||
// condition 3 sender receives error msg, just print error log
|
||||
//
|
||||
int32_t syncNodeOnSnapshotRspCb(SSyncNode *pSyncNode, SyncSnapshotRsp *pMsg) {
|
||||
int32_t syncNodeOnSnapshotReply(SSyncNode *pSyncNode, SyncSnapshotRsp *pMsg) {
|
||||
// if already drop replica, do not process
|
||||
if (!syncNodeInRaftGroup(pSyncNode, &(pMsg->srcId)) && pSyncNode->state == TAOS_SYNC_STATE_LEADER) {
|
||||
sError("vgId:%d, recv sync-snapshot-rsp, maybe replica already dropped", pSyncNode->vgId);
|
||||
|
|
|
@ -16,6 +16,7 @@
|
|||
#include "syncTimeout.h"
|
||||
#include "syncElection.h"
|
||||
#include "syncRaftCfg.h"
|
||||
#include "syncRaftLog.h"
|
||||
#include "syncReplication.h"
|
||||
#include "syncRespMgr.h"
|
||||
|
||||
|
@ -60,12 +61,36 @@ static void syncNodeCleanConfigIndex(SSyncNode* ths) {
|
|||
int32_t syncNodeTimerRoutine(SSyncNode* ths) {
|
||||
syncNodeEventLog(ths, "timer routines");
|
||||
|
||||
if (ths->vgId == 1) {
|
||||
// timer replicate
|
||||
syncNodeReplicate(ths);
|
||||
|
||||
// clean mnode index
|
||||
if (syncNodeIsMnode(ths)) {
|
||||
syncNodeCleanConfigIndex(ths);
|
||||
}
|
||||
|
||||
// end timeout wal snapshot
|
||||
int64_t timeNow = taosGetTimestampMs();
|
||||
if (timeNow - ths->snapshottingIndex > SYNC_DEL_WAL_MS &&
|
||||
atomic_load_64(&ths->snapshottingIndex) != SYNC_INDEX_INVALID) {
|
||||
SSyncLogStoreData* pData = ths->pLogStore->data;
|
||||
int32_t code = walEndSnapshot(pData->pWal);
|
||||
if (code != 0) {
|
||||
sError("vgId:%d, wal snapshot end error since:%s", ths->vgId, terrstr(terrno));
|
||||
return -1;
|
||||
} else {
|
||||
do {
|
||||
char logBuf[256];
|
||||
snprintf(logBuf, sizeof(logBuf), "wal snapshot end, index:%ld", atomic_load_64(&ths->snapshottingIndex));
|
||||
syncNodeEventLog(ths, logBuf);
|
||||
} while (0);
|
||||
|
||||
atomic_store_64(&ths->snapshottingIndex, SYNC_INDEX_INVALID);
|
||||
}
|
||||
}
|
||||
|
||||
#if 0
|
||||
if (ths->vgId != 1) {
|
||||
if (!syncNodeIsMnode(ths)) {
|
||||
syncRespClean(ths->pSyncRespMgr);
|
||||
}
|
||||
#endif
|
||||
|
@ -73,9 +98,9 @@ int32_t syncNodeTimerRoutine(SSyncNode* ths) {
|
|||
return 0;
|
||||
}
|
||||
|
||||
int32_t syncNodeOnTimeoutCb(SSyncNode* ths, SyncTimeout* pMsg) {
|
||||
int32_t syncNodeOnTimer(SSyncNode* ths, SyncTimeout* pMsg) {
|
||||
int32_t ret = 0;
|
||||
syncTimeoutLog2("==syncNodeOnTimeoutCb==", pMsg);
|
||||
syncLogRecvTimer(ths, pMsg, "");
|
||||
|
||||
if (pMsg->timeoutType == SYNC_TIMEOUT_PING) {
|
||||
if (atomic_load_64(&ths->pingTimerLogicClockUser) <= pMsg->logicClock) {
|
||||
|
@ -84,27 +109,29 @@ int32_t syncNodeOnTimeoutCb(SSyncNode* ths, SyncTimeout* pMsg) {
|
|||
// syncNodePingAll(ths);
|
||||
// syncNodePingPeers(ths);
|
||||
|
||||
// sTrace("vgId:%d, sync timeout, type:ping count:%d", ths->vgId, ths->pingTimerCounter);
|
||||
syncNodeTimerRoutine(ths);
|
||||
}
|
||||
|
||||
} else if (pMsg->timeoutType == SYNC_TIMEOUT_ELECTION) {
|
||||
if (atomic_load_64(&ths->electTimerLogicClockUser) <= pMsg->logicClock) {
|
||||
++(ths->electTimerCounter);
|
||||
sTrace("vgId:%d, sync timer, type:election count:%" PRId64 ", electTimerLogicClockUser:%" PRId64 "", ths->vgId,
|
||||
ths->electTimerCounter, ths->electTimerLogicClockUser);
|
||||
sTrace("vgId:%d, sync timer, type:election count:%lu, lc-user:%ld", ths->vgId, ths->electTimerCounter,
|
||||
ths->electTimerLogicClockUser);
|
||||
|
||||
syncNodeElect(ths);
|
||||
}
|
||||
|
||||
} else if (pMsg->timeoutType == SYNC_TIMEOUT_HEARTBEAT) {
|
||||
if (atomic_load_64(&ths->heartbeatTimerLogicClockUser) <= pMsg->logicClock) {
|
||||
++(ths->heartbeatTimerCounter);
|
||||
sTrace("vgId:%d, sync timer, type:replicate count:%" PRId64 ", heartbeatTimerLogicClockUser:%" PRId64 "",
|
||||
ths->vgId, ths->heartbeatTimerCounter, ths->heartbeatTimerLogicClockUser);
|
||||
syncNodeReplicate(ths, true);
|
||||
sTrace("vgId:%d, sync timer, type:replicate count:%lu, lc-user:%ld", ths->vgId, ths->heartbeatTimerCounter,
|
||||
ths->heartbeatTimerLogicClockUser);
|
||||
|
||||
// syncNodeReplicate(ths, true);
|
||||
}
|
||||
|
||||
} else {
|
||||
sError("vgId:%d, unknown timeout-type:%d", ths->vgId, pMsg->timeoutType);
|
||||
sError("vgId:%d, recv unknown timer-type:%d", ths->vgId, pMsg->timeoutType);
|
||||
}
|
||||
|
||||
return ret;
|
||||
|
|
|
@ -237,8 +237,8 @@ int64_t createSyncNode(int32_t replicaNum, int32_t myIndex, int32_t vgId, SWal*
|
|||
gSyncIO->FpOnSyncAppendEntries = pSyncNode->FpOnAppendEntries;
|
||||
gSyncIO->FpOnSyncAppendEntriesReply = pSyncNode->FpOnAppendEntriesReply;
|
||||
|
||||
gSyncIO->FpOnSyncSnapshotSend = pSyncNode->FpOnSnapshotSend;
|
||||
gSyncIO->FpOnSyncSnapshotRsp = pSyncNode->FpOnSnapshotRsp;
|
||||
gSyncIO->FpOnSyncSnapshot = pSyncNode->FpOnSnapshot;
|
||||
gSyncIO->FpOnSyncSnapshotReply = pSyncNode->FpOnSnapshotReply;
|
||||
|
||||
gSyncIO->pSyncNode = pSyncNode;
|
||||
syncNodeRelease(pSyncNode);
|
||||
|
|
|
@ -181,8 +181,11 @@ int main(int argc, char **argv) {
|
|||
SSyncNode *pSyncNode = syncNodeInit();
|
||||
assert(pSyncNode != NULL);
|
||||
SSyncRaftEntry *pEntry = pMsg4;
|
||||
pSyncNode->pLogStore->appendEntry(pSyncNode->pLogStore, pEntry);
|
||||
SSyncRaftEntry *pEntry2 = pSyncNode->pLogStore->getEntry(pSyncNode->pLogStore, pEntry->index);
|
||||
pSyncNode->pLogStore->syncLogAppendEntry(pSyncNode->pLogStore, pEntry);
|
||||
|
||||
int32_t code = pSyncNode->pLogStore->syncLogGetEntry(pSyncNode->pLogStore, pEntry->index, &pEntry);
|
||||
ASSERT(code == 0);
|
||||
|
||||
syncEntryLog2((char *)"==pEntry2==", pEntry2);
|
||||
|
||||
// step5
|
||||
|
|
|
@ -36,7 +36,7 @@ void test1() {
|
|||
void test2() {
|
||||
SyncHeartbeatReply *pMsg = createMsg();
|
||||
uint32_t len = pMsg->bytes;
|
||||
char *serialized = (char *)taosMemoryMalloc(len);
|
||||
char * serialized = (char *)taosMemoryMalloc(len);
|
||||
syncHeartbeatReplySerialize(pMsg, serialized, len);
|
||||
SyncHeartbeatReply *pMsg2 = syncHeartbeatReplyBuild(1000);
|
||||
syncHeartbeatReplyDeserialize(serialized, len, pMsg2);
|
||||
|
@ -50,7 +50,7 @@ void test2() {
|
|||
void test3() {
|
||||
SyncHeartbeatReply *pMsg = createMsg();
|
||||
uint32_t len;
|
||||
char *serialized = syncHeartbeatReplySerialize2(pMsg, &len);
|
||||
char * serialized = syncHeartbeatReplySerialize2(pMsg, &len);
|
||||
SyncHeartbeatReply *pMsg2 = syncHeartbeatReplyDeserialize2(serialized, len);
|
||||
syncHeartbeatReplyLog2((char *)"test3: syncHeartbeatReplySerialize3 -> syncHeartbeatReplyDeserialize2 ", pMsg2);
|
||||
|
||||
|
|
|
@ -35,7 +35,7 @@ void test1() {
|
|||
void test2() {
|
||||
SyncHeartbeat *pMsg = createMsg();
|
||||
uint32_t len = pMsg->bytes;
|
||||
char *serialized = (char *)taosMemoryMalloc(len);
|
||||
char * serialized = (char *)taosMemoryMalloc(len);
|
||||
syncHeartbeatSerialize(pMsg, serialized, len);
|
||||
SyncHeartbeat *pMsg2 = syncHeartbeatBuild(789);
|
||||
syncHeartbeatDeserialize(serialized, len, pMsg2);
|
||||
|
@ -49,7 +49,7 @@ void test2() {
|
|||
void test3() {
|
||||
SyncHeartbeat *pMsg = createMsg();
|
||||
uint32_t len;
|
||||
char *serialized = syncHeartbeatSerialize2(pMsg, &len);
|
||||
char * serialized = syncHeartbeatSerialize2(pMsg, &len);
|
||||
SyncHeartbeat *pMsg2 = syncHeartbeatDeserialize2(serialized, len);
|
||||
syncHeartbeatLog2((char *)"test3: syncHeartbeatSerialize2 -> syncHeartbeatDeserialize2 ", pMsg2);
|
||||
|
||||
|
|
|
@ -52,7 +52,7 @@ void cleanup() {
|
|||
void logStoreTest() {
|
||||
pLogStore = logStoreCreate(pSyncNode);
|
||||
assert(pLogStore);
|
||||
assert(pLogStore->getLastIndex(pLogStore) == SYNC_INDEX_INVALID);
|
||||
assert(pLogStore->syncLogLastIndex(pLogStore) == SYNC_INDEX_INVALID);
|
||||
|
||||
logStoreLog2((char*)"logStoreTest", pLogStore);
|
||||
|
||||
|
@ -65,22 +65,20 @@ void logStoreTest() {
|
|||
pEntry->seqNum = 3;
|
||||
pEntry->isWeak = true;
|
||||
pEntry->term = 100 + i;
|
||||
pEntry->index = pLogStore->getLastIndex(pLogStore) + 1;
|
||||
pEntry->index = pLogStore->syncLogLastIndex(pLogStore) + 1;
|
||||
snprintf(pEntry->data, dataLen, "value%d", i);
|
||||
|
||||
syncEntryLog2((char*)"==write entry== :", pEntry);
|
||||
pLogStore->appendEntry(pLogStore, pEntry);
|
||||
pLogStore->syncLogAppendEntry(pLogStore, pEntry);
|
||||
syncEntryDestory(pEntry);
|
||||
|
||||
if (i == 0) {
|
||||
assert(pLogStore->getLastIndex(pLogStore) == SYNC_INDEX_BEGIN);
|
||||
assert(pLogStore->syncLogLastIndex(pLogStore) == SYNC_INDEX_BEGIN);
|
||||
}
|
||||
}
|
||||
logStoreLog2((char*)"after appendEntry", pLogStore);
|
||||
|
||||
pLogStore->truncate(pLogStore, 3);
|
||||
pLogStore->syncLogTruncate(pLogStore, 3);
|
||||
logStoreLog2((char*)"after truncate 3", pLogStore);
|
||||
|
||||
logStoreDestory(pLogStore);
|
||||
}
|
||||
|
||||
|
|
|
@ -266,14 +266,12 @@ int64_t createSyncNode(int32_t replicaNum, int32_t myIndex, int32_t vgId, SWal*
|
|||
gSyncIO->FpOnSyncPingReply = pSyncNode->FpOnPingReply;
|
||||
gSyncIO->FpOnSyncTimeout = pSyncNode->FpOnTimeout;
|
||||
gSyncIO->FpOnSyncClientRequest = pSyncNode->FpOnClientRequest;
|
||||
|
||||
gSyncIO->FpOnSyncRequestVote = pSyncNode->FpOnRequestVote;
|
||||
gSyncIO->FpOnSyncRequestVoteReply = pSyncNode->FpOnRequestVoteReply;
|
||||
gSyncIO->FpOnSyncAppendEntries = pSyncNode->FpOnAppendEntries;
|
||||
gSyncIO->FpOnSyncAppendEntriesReply = pSyncNode->FpOnAppendEntriesReply;
|
||||
|
||||
gSyncIO->FpOnSyncSnapshotSend = pSyncNode->FpOnSnapshotSend;
|
||||
gSyncIO->FpOnSyncSnapshotRsp = pSyncNode->FpOnSnapshotRsp;
|
||||
gSyncIO->FpOnSyncSnapshot = pSyncNode->FpOnSnapshot;
|
||||
gSyncIO->FpOnSyncSnapshotReply = pSyncNode->FpOnSnapshotReply;
|
||||
|
||||
gSyncIO->pSyncNode = pSyncNode;
|
||||
syncNodeRelease(pSyncNode);
|
||||
|
|
|
@ -0,0 +1,175 @@
|
|||
system sh/stop_dnodes.sh
|
||||
system sh/deploy.sh -n dnode1 -i 1
|
||||
system sh/deploy.sh -n dnode2 -i 2
|
||||
system sh/deploy.sh -n dnode3 -i 3
|
||||
system sh/deploy.sh -n dnode4 -i 4
|
||||
|
||||
system sh/cfg.sh -n dnode1 -c supportVnodes -v 0
|
||||
|
||||
system sh/exec.sh -n dnode1 -s start
|
||||
system sh/exec.sh -n dnode2 -s start
|
||||
system sh/exec.sh -n dnode3 -s start
|
||||
system sh/exec.sh -n dnode4 -s start
|
||||
|
||||
sql connect
|
||||
sql create dnode $hostname port 7200
|
||||
sql create dnode $hostname port 7300
|
||||
sql create dnode $hostname port 7400
|
||||
|
||||
$x = 0
|
||||
step1:
|
||||
$x = $x + 1
|
||||
sleep 1000
|
||||
if $x == 10 then
|
||||
print ====> dnode not ready!
|
||||
return -1
|
||||
endi
|
||||
sql select * from information_schema.ins_dnodes
|
||||
print ===> $data00 $data01 $data02 $data03 $data04 $data05
|
||||
print ===> $data10 $data11 $data12 $data13 $data14 $data15
|
||||
print ===> $data20 $data21 $data22 $data23 $data24 $data25
|
||||
print ===> $data30 $data31 $data32 $data33 $data34 $data35
|
||||
if $rows != 4 then
|
||||
return -1
|
||||
endi
|
||||
if $data(1)[4] != ready then
|
||||
goto step1
|
||||
endi
|
||||
if $data(2)[4] != ready then
|
||||
goto step1
|
||||
endi
|
||||
if $data(3)[4] != ready then
|
||||
goto step1
|
||||
endi
|
||||
if $data(4)[4] != ready then
|
||||
goto step1
|
||||
endi
|
||||
|
||||
$replica = 3
|
||||
$vgroups = 1
|
||||
|
||||
print ============= create database
|
||||
sql create database db replica $replica vgroups $vgroups
|
||||
|
||||
$loop_cnt = 0
|
||||
check_db_ready:
|
||||
$loop_cnt = $loop_cnt + 1
|
||||
sleep 200
|
||||
if $loop_cnt == 100 then
|
||||
print ====> db not ready!
|
||||
return -1
|
||||
endi
|
||||
sql select * from information_schema.ins_databases
|
||||
print ===> rows: $rows
|
||||
print $data[2][0] $data[2][1] $data[2][2] $data[2][3] $data[2][4] $data[2][5] $data[2][6] $data[2][7] $data[2][8] $data[2][9] $data[2][6] $data[2][11] $data[2][12] $data[2][13] $data[2][14] $data[2][15] $data[2][16] $data[2][17] $data[2][18] $data[2][19]
|
||||
if $rows != 3 then
|
||||
return -1
|
||||
endi
|
||||
if $data[2][15] != ready then
|
||||
goto check_db_ready
|
||||
endi
|
||||
|
||||
sql use db
|
||||
|
||||
$loop_cnt = 0
|
||||
check_vg_ready:
|
||||
$loop_cnt = $loop_cnt + 1
|
||||
sleep 200
|
||||
if $loop_cnt == 300 then
|
||||
print ====> vgroups not ready!
|
||||
return -1
|
||||
endi
|
||||
|
||||
sql show vgroups
|
||||
print ===> rows: $rows
|
||||
print $data[0][0] $data[0][1] $data[0][2] $data[0][3] $data[0][4] $data[0][5] $data[0][6] $data[0][7] $data[0][8] $data[0][9] $data[0][10] $data[0][11]
|
||||
|
||||
if $rows != $vgroups then
|
||||
return -1
|
||||
endi
|
||||
|
||||
if $data[0][4] == leader then
|
||||
if $data[0][6] == follower then
|
||||
if $data[0][8] == follower then
|
||||
print ---- vgroup $data[0][0] leader locate on dnode $data[0][3]
|
||||
endi
|
||||
endi
|
||||
elif $data[0][6] == leader then
|
||||
if $data[0][4] == follower then
|
||||
if $data[0][8] == follower then
|
||||
print ---- vgroup $data[0][0] leader locate on dnode $data[0][5]
|
||||
endi
|
||||
endi
|
||||
elif $data[0][8] == leader then
|
||||
if $data[0][4] == follower then
|
||||
if $data[0][6] == follower then
|
||||
print ---- vgroup $data[0][0] leader locate on dnode $data[0][7]
|
||||
endi
|
||||
endi
|
||||
else
|
||||
goto check_vg_ready
|
||||
endi
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
#return 0
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
vg_ready:
|
||||
print ====> create stable/child table
|
||||
sql create table stb (ts timestamp, c1 int, c2 float, c3 double) tags (t1 int)
|
||||
|
||||
|
||||
|
||||
|
||||
#return 0
|
||||
|
||||
|
||||
|
||||
sql show stables
|
||||
if $rows != 1 then
|
||||
return -1
|
||||
endi
|
||||
|
||||
sql create table ct1 using stb tags(1000)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
print ====> step1 insert 1000 records
|
||||
$N = 1000
|
||||
$count = 0
|
||||
while $count < $N
|
||||
$ms = 1591200000000 + $count
|
||||
sql insert into ct1 values( $ms , $count , 2.1, 3.1)
|
||||
$count = $count + 1
|
||||
endw
|
||||
|
||||
print ====> step2 sleep 20s, checking data
|
||||
sleep 20000
|
||||
|
||||
|
||||
print ====> step3 sleep 30s, kill leader
|
||||
sleep 30000
|
||||
|
||||
print ====> step4 insert 1000 records
|
||||
$N = 1000
|
||||
$count = 0
|
||||
while $count < $N
|
||||
$ms = 1591201000000 + $count
|
||||
sql insert into ct1 values( $ms , $count , 2.1, 3.1)
|
||||
$count = $count + 1
|
||||
endw
|
||||
|
||||
print ====> step5 sleep 20s, checking data
|
||||
sleep 20000
|
||||
|
Loading…
Reference in New Issue