homework-jianmu/source/libs/sync/inc/raft_progress.h

213 lines
6.8 KiB
C
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/*
* Copyright (c) 2019 TAOS Data, Inc. <cli@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef TD_SYNC_RAFT_PROGRESS_H
#define TD_SYNC_RAFT_PROGRESS_H
#include "sync_type.h"
/**
* SSyncRaftInflights is a sliding window for the inflight messages.
* Thus inflight effectively limits both the number of inflight messages
* and the bandwidth each Progress can use.
* When inflights is full, no more message should be sent.
* When a leader sends out a message, the index of the last
* entry should be added to inflights. The index MUST be added
* into inflights in order.
* When a leader receives a reply, the previous inflights should
* be freed by calling syncRaftInflightFreeTo with the index of the last
* received entry.
**/
typedef struct SSyncRaftInflights {
/* the starting index in the buffer */
int start;
/* number of inflights in the buffer */
int count;
/* the size of the buffer */
int size;
/**
* buffer contains the index of the last entry
* inside one message.
**/
SyncIndex* buffer;
} SSyncRaftInflights;
/**
* State defines how the leader should interact with the follower.
*
* When in PROGRESS_PROBE, leader sends at most one replication message
* per heartbeat interval. It also probes actual progress of the follower.
*
* When in PROGRESS_REPLICATE, leader optimistically increases next
* to the latest entry sent after sending replication message. This is
* an optimized state for fast replicating log entries to the follower.
*
* When in PROGRESS_SNAPSHOT, leader should have sent out snapshot
* before and stops sending any replication message.
*
* PROGRESS_PROBE is the initial state.
**/
typedef enum RaftProgressState {
PROGRESS_PROBE = 0,
PROGRESS_REPLICATE,
PROGRESS_SNAPSHOT,
} RaftProgressState;
/**
* Progress represents a followers progress in the view of the leader. Leader maintains
* progresses of all followers, and sends entries to the follower based on its progress.
**/
struct SSyncRaftProgress {
SyncIndex nextIndex;
SyncIndex matchIndex;
RaftProgressState state;
/**
* paused is used in PROGRESS_PROBE.
* When paused is true, raft should pause sending replication message to this peer.
**/
bool paused;
// last send append message tick
uint32_t lastSendTick;
/**
* pendingSnapshotIndex is used in PROGRESS_SNAPSHOT.
* If there is a pending snapshot, the pendingSnapshotIndex will be set to the
* index of the snapshot. If pendingSnapshotIndex is set, the replication process of
* this Progress will be paused. raft will not resend snapshot until the pending one
* is reported to be failed.
**/
SyncIndex pendingSnapshotIndex;
/**
* recentActive is true if the progress is recently active. Receiving any messages
* from the corresponding follower indicates the progress is active.
* RecentActive can be reset to false after an election timeout.
**/
bool recentActive;
/**
* flow control sliding window
**/
SSyncRaftInflights inflights;
};
int syncRaftProgressCreate(SSyncRaft* pRaft);
//int syncRaftProgressRecreate(SSyncRaft* pRaft, const RaftConfiguration* configuration);
/**
* syncRaftProgressMaybeUpdate returns false if the given lastIndex index comes from i-th node's log.
* Otherwise it updates the progress and returns true.
**/
bool syncRaftProgressMaybeUpdate(SSyncRaft* pRaft, int i, SyncIndex lastIndex);
static FORCE_INLINE void syncRaftProgressOptimisticNextIndex(SSyncRaftProgress* progress, SyncIndex nextIndex) {
progress->nextIndex = nextIndex + 1;
}
/**
* syncRaftProgressMaybeDecrTo returns false if the given to index comes from an out of order message.
* Otherwise it decreases the progress next index to min(rejected, last) and returns true.
**/
bool syncRaftProgressMaybeDecrTo(SSyncRaft* pRaft, int i,
SyncIndex rejected, SyncIndex lastIndex);
/**
* syncRaftProgressIsPaused returns whether sending log entries to this node has been
* paused. A node may be paused because it has rejected recent
* MsgApps, is currently waiting for a snapshot, or has reached the
* MaxInflightMsgs limit.
**/
bool syncRaftProgressIsPaused(SSyncRaftProgress* progress);
static FORCE_INLINE void syncRaftProgressPause(SSyncRaftProgress* progress) {
progress->paused = true;
}
static FORCE_INLINE SyncIndex syncRaftProgressNextIndex(SSyncRaftProgress* progress) {
return progress->nextIndex;
}
static FORCE_INLINE RaftProgressState syncRaftProgressInReplicate(SSyncRaftProgress* progress) {
return progress->state == PROGRESS_REPLICATE;
}
static FORCE_INLINE RaftProgressState syncRaftProgressInSnapshot(SSyncRaftProgress* progress) {
return progress->state == PROGRESS_SNAPSHOT;
}
static FORCE_INLINE RaftProgressState syncRaftProgressInProbe(SSyncRaftProgress* progress) {
return progress->state == PROGRESS_PROBE;
}
static FORCE_INLINE bool syncRaftProgressRecentActive(SSyncRaftProgress* progress) {
return progress->recentActive;
}
static FORCE_INLINE bool syncRaftProgressUpdateSendTick(SSyncRaftProgress* progress, SyncTick current) {
return progress->lastSendTick = current;
}
void syncRaftProgressFailure(SSyncRaft* pRaft, int i);
bool syncRaftProgressNeedAbortSnapshot(SSyncRaft* pRaft, int i);
/**
* return true if i-th node's log is up-todate
**/
bool syncRaftProgressIsUptodate(SSyncRaft* pRaft, int i);
void syncRaftProgressBecomeProbe(SSyncRaft* pRaft, int i);
void syncRaftProgressBecomeReplicate(SSyncRaft* pRaft, int i);
void syncRaftProgressBecomeSnapshot(SSyncRaft* pRaft, int i, SyncIndex snapshotIndex);
/* inflights APIs */
int syncRaftInflightReset(SSyncRaftInflights* inflights);
bool syncRaftInflightFull(SSyncRaftInflights* inflights);
void syncRaftInflightAdd(SSyncRaftInflights* inflights, SyncIndex inflightIndex);
void syncRaftInflightFreeTo(SSyncRaftInflights* inflights, SyncIndex toIndex);
void syncRaftInflightFreeFirstOne(SSyncRaftInflights* inflights);
#if 0
void syncRaftProgressAbortSnapshot(SSyncRaft* pRaft, int i);
SyncIndex syncRaftProgressMatchIndex(SSyncRaft* pRaft, int i);
void syncRaftProgressUpdateLastSend(SSyncRaft* pRaft, int i);
void syncRaftProgressUpdateSnapshotLastSend(SSyncRaft* pRaft, int i);
bool syncRaftProgressResetRecentRecv(SSyncRaft* pRaft, int i);
void syncRaftProgressMarkRecentRecv(SSyncRaft* pRaft, int i);
void syncRaftProgressAbortSnapshot(SSyncRaft* pRaft, int i);
#endif
#endif /* TD_SYNC_RAFT_PROGRESS_H */