[TD-10645][raft]<feature>add restore process
This commit is contained in:
parent
8eeaa2712d
commit
fb01dd5628
|
@ -0,0 +1,42 @@
|
|||
/*
|
||||
* Copyright (c) 2019 TAOS Data, Inc. <cli@taosdata.com>
|
||||
*
|
||||
* This program is free software: you can use, redistribute, and/or modify
|
||||
* it under the terms of the GNU Affero General Public License, version 3
|
||||
* or later ("AGPL"), as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef TD_SYNC_RAFT_CONFIG_CHANGE_H
|
||||
#define TD_SYNC_RAFT_CONFIG_CHANGE_H
|
||||
|
||||
#include "sync_type.h"
|
||||
#include "sync_raft_proto.h"
|
||||
|
||||
/**
|
||||
* Changer facilitates configuration changes. It exposes methods to handle
|
||||
* simple and joint consensus while performing the proper validation that allows
|
||||
* refusing invalid configuration changes before they affect the active
|
||||
* configuration.
|
||||
**/
|
||||
struct SSyncRaftChanger {
|
||||
SSyncRaftProgressTracker* tracker;
|
||||
SyncIndex lastIndex;
|
||||
};
|
||||
|
||||
typedef int (*configChangeFp)(SSyncRaftChanger* changer, const SSyncConfChangeSingleArray* css,
|
||||
SSyncRaftProgressTrackerConfig* config, SSyncRaftProgressMap* progressMap);
|
||||
|
||||
int syncRaftChangerSimpleConfig(SSyncRaftChanger* changer, const SSyncConfChangeSingleArray* css,
|
||||
SSyncRaftProgressTrackerConfig* config, SSyncRaftProgressMap* progressMap);
|
||||
|
||||
int syncRaftChangerEnterJoint(SSyncRaftChanger* changer, const SSyncConfChangeSingleArray* css,
|
||||
SSyncRaftProgressTrackerConfig* config, SSyncRaftProgressMap* progressMap);
|
||||
|
||||
#endif /* TD_SYNC_RAFT_CONFIG_CHANGE_H */
|
|
@ -129,6 +129,10 @@ struct SSyncRaftProgress {
|
|||
bool isLearner;
|
||||
};
|
||||
|
||||
struct SSyncRaftProgressMap {
|
||||
SSyncRaftProgress progress[TSDB_MAX_REPLICA];
|
||||
};
|
||||
|
||||
void syncRaftInitProgress(int i, SSyncRaft* pRaft, SSyncRaftProgress* progress);
|
||||
|
||||
/**
|
||||
|
@ -210,7 +214,9 @@ bool syncRaftProgressIsUptodate(SSyncRaft* pRaft, SSyncRaftProgress* progress);
|
|||
|
||||
void syncRaftProgressBecomeSnapshot(SSyncRaftProgress* progress, SyncIndex snapshotIndex);
|
||||
|
||||
void syncRaftProgressCopy(const SSyncRaftProgress* from, SSyncRaftProgress* to);
|
||||
|
||||
void syncRaftProgressMapCopy(const SSyncRaftProgressMap* from, SSyncRaftProgressMap* to);
|
||||
|
||||
#if 0
|
||||
|
||||
|
|
|
@ -17,77 +17,72 @@
|
|||
#define _TD_LIBS_SYNC_RAFT_PROGRESS_TRACKER_H
|
||||
|
||||
#include "sync_type.h"
|
||||
#include "sync_raft_quorum.h"
|
||||
#include "sync_raft_quorum_joint.h"
|
||||
#include "sync_raft_progress.h"
|
||||
|
||||
struct SSyncRaftProgressTrackerConfig {
|
||||
SSyncRaftQuorumJointConfig voters;
|
||||
|
||||
/**
|
||||
* autoLeave is true if the configuration is joint and a transition to the
|
||||
* incoming configuration should be carried out automatically by Raft when
|
||||
* this is possible. If false, the configuration will be joint until the
|
||||
* application initiates the transition manually.
|
||||
**/
|
||||
// autoLeave is true if the configuration is joint and a transition to the
|
||||
// incoming configuration should be carried out automatically by Raft when
|
||||
// this is possible. If false, the configuration will be joint until the
|
||||
// application initiates the transition manually.
|
||||
bool autoLeave;
|
||||
|
||||
/**
|
||||
* Learners is a set of IDs corresponding to the learners active in the
|
||||
* current configuration.
|
||||
*
|
||||
* Invariant: Learners and Voters does not intersect, i.e. if a peer is in
|
||||
* either half of the joint config, it can't be a learner; if it is a
|
||||
* learner it can't be in either half of the joint config. This invariant
|
||||
* simplifies the implementation since it allows peers to have clarity about
|
||||
* its current role without taking into account joint consensus.
|
||||
**/
|
||||
SyncNodeId learners[TSDB_MAX_REPLICA];
|
||||
// Learners is a set of IDs corresponding to the learners active in the
|
||||
// current configuration.
|
||||
//
|
||||
// Invariant: Learners and Voters does not intersect, i.e. if a peer is in
|
||||
// either half of the joint config, it can't be a learner; if it is a
|
||||
// learner it can't be in either half of the joint config. This invariant
|
||||
// simplifies the implementation since it allows peers to have clarity about
|
||||
// its current role without taking into account joint consensus.
|
||||
SSyncRaftNodeMap learners;
|
||||
|
||||
/**
|
||||
* When we turn a voter into a learner during a joint consensus transition,
|
||||
* we cannot add the learner directly when entering the joint state. This is
|
||||
* because this would violate the invariant that the intersection of
|
||||
* voters and learners is empty. For example, assume a Voter is removed and
|
||||
* immediately re-added as a learner (or in other words, it is demoted):
|
||||
*
|
||||
* Initially, the configuration will be
|
||||
*
|
||||
* voters: {1 2 3}
|
||||
* learners: {}
|
||||
*
|
||||
* and we want to demote 3. Entering the joint configuration, we naively get
|
||||
*
|
||||
* voters: {1 2} & {1 2 3}
|
||||
* learners: {3}
|
||||
*
|
||||
* but this violates the invariant (3 is both voter and learner). Instead,
|
||||
* we get
|
||||
*
|
||||
* voters: {1 2} & {1 2 3}
|
||||
* learners: {}
|
||||
* next_learners: {3}
|
||||
*
|
||||
* Where 3 is now still purely a voter, but we are remembering the intention
|
||||
* to make it a learner upon transitioning into the final configuration:
|
||||
*
|
||||
* voters: {1 2}
|
||||
* learners: {3}
|
||||
* next_learners: {}
|
||||
*
|
||||
* Note that next_learners is not used while adding a learner that is not
|
||||
* also a voter in the joint config. In this case, the learner is added
|
||||
* right away when entering the joint configuration, so that it is caught up
|
||||
* as soon as possible.
|
||||
**/
|
||||
SyncNodeId learnersNext[TSDB_MAX_REPLICA];
|
||||
// When we turn a voter into a learner during a joint consensus transition,
|
||||
// we cannot add the learner directly when entering the joint state. This is
|
||||
// because this would violate the invariant that the intersection of
|
||||
// voters and learners is empty. For example, assume a Voter is removed and
|
||||
// immediately re-added as a learner (or in other words, it is demoted):
|
||||
//
|
||||
// Initially, the configuration will be
|
||||
//
|
||||
// voters: {1 2 3}
|
||||
// learners: {}
|
||||
//
|
||||
// and we want to demote 3. Entering the joint configuration, we naively get
|
||||
//
|
||||
// voters: {1 2} & {1 2 3}
|
||||
// learners: {3}
|
||||
//
|
||||
// but this violates the invariant (3 is both voter and learner). Instead,
|
||||
// we get
|
||||
//
|
||||
// voters: {1 2} & {1 2 3}
|
||||
// learners: {}
|
||||
// next_learners: {3}
|
||||
//
|
||||
// Where 3 is now still purely a voter, but we are remembering the intention
|
||||
// to make it a learner upon transitioning into the final configuration:
|
||||
//
|
||||
// voters: {1 2}
|
||||
// learners: {3}
|
||||
// next_learners: {}
|
||||
//
|
||||
// Note that next_learners is not used while adding a learner that is not
|
||||
// also a voter in the joint config. In this case, the learner is added
|
||||
// right away when entering the joint configuration, so that it is caught up
|
||||
// as soon as possible.
|
||||
SSyncRaftNodeMap learnersNext;
|
||||
};
|
||||
|
||||
struct SSyncRaftProgressTracker {
|
||||
SSyncRaftProgressTrackerConfig config;
|
||||
|
||||
SSyncRaftProgress progressMap[TSDB_MAX_REPLICA];
|
||||
SSyncRaftProgressMap progressMap;
|
||||
|
||||
ESyncRaftVoteResult votes[TSDB_MAX_REPLICA];
|
||||
ESyncRaftVoteType votes[TSDB_MAX_REPLICA];
|
||||
int maxInflightMsgs;
|
||||
};
|
||||
|
||||
|
@ -104,6 +99,10 @@ void syncRaftProgressVisit(SSyncRaftProgressTracker*, visitProgressFp visit, voi
|
|||
**/
|
||||
void syncRaftRecordVote(SSyncRaftProgressTracker* tracker, int i, bool grant);
|
||||
|
||||
void syncRaftCloneTrackerConfig(const SSyncRaftProgressTrackerConfig* config, SSyncRaftProgressTrackerConfig* result);
|
||||
|
||||
int syncRaftCheckProgress(const SSyncRaftProgressTrackerConfig* config, SSyncRaftProgressMap* progressMap);
|
||||
|
||||
/**
|
||||
* syncRaftTallyVotes returns the number of granted and rejected Votes, and whether the
|
||||
* election outcome is known.
|
||||
|
|
|
@ -0,0 +1,61 @@
|
|||
/*
|
||||
* Copyright (c) 2019 TAOS Data, Inc. <cli@taosdata.com>
|
||||
*
|
||||
* This program is free software: you can use, redistribute, and/or modify
|
||||
* it under the terms of the GNU Affero General Public License, version 3
|
||||
* or later ("AGPL"), as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef TD_SYNC_RAFT_PROTO_H
|
||||
#define TD_SYNC_RAFT_PROTO_H
|
||||
|
||||
#include "sync_type.h"
|
||||
|
||||
typedef enum ESyncRaftConfChangeType {
|
||||
SYNC_RAFT_Conf_AddNode = 0,
|
||||
SYNC_RAFT_Conf_RemoveNode = 1,
|
||||
SYNC_RAFT_Conf_UpdateNode = 2,
|
||||
SYNC_RAFT_Conf_AddLearnerNode = 2,
|
||||
} ESyncRaftConfChangeType;
|
||||
|
||||
// ConfChangeSingle is an individual configuration change operation. Multiple
|
||||
// such operations can be carried out atomically via a ConfChangeV2.
|
||||
typedef struct SSyncConfChangeSingle {
|
||||
ESyncRaftConfChangeType type;
|
||||
SyncNodeId nodeId;
|
||||
} SSyncConfChangeSingle;
|
||||
|
||||
typedef struct SSyncConfChangeSingleArray {
|
||||
int n;
|
||||
SSyncConfChangeSingle* changes;
|
||||
} SSyncConfChangeSingleArray;
|
||||
|
||||
typedef struct SSyncConfigState {
|
||||
// The voters in the incoming config. (If the configuration is not joint,
|
||||
// then the outgoing config is empty).
|
||||
SSyncRaftNodeMap voters;
|
||||
|
||||
// The learners in the incoming config.
|
||||
SSyncRaftNodeMap learners;
|
||||
|
||||
// The voters in the outgoing config.
|
||||
SSyncRaftNodeMap votersOutgoing;
|
||||
|
||||
// The nodes that will become learners when the outgoing config is removed.
|
||||
// These nodes are necessarily currently in nodes_joint (or they would have
|
||||
// been added to the incoming config right away).
|
||||
SSyncRaftNodeMap learnersNext;
|
||||
|
||||
// If set, the config is joint and Raft will automatically transition into
|
||||
// the final config (i.e. remove the outgoing config) when this is safe.
|
||||
bool autoLeave;
|
||||
} SSyncConfigState;
|
||||
|
||||
#endif /* TD_SYNC_RAFT_PROTO_H */
|
|
@ -25,7 +25,8 @@
|
|||
* majority configurations. Decisions require the support of both majorities.
|
||||
**/
|
||||
typedef struct SSyncRaftQuorumJointConfig {
|
||||
SSyncCluster majorityConfig[2];
|
||||
SSyncCluster outgoing;
|
||||
SSyncCluster incoming;
|
||||
} SSyncRaftQuorumJointConfig;
|
||||
|
||||
/**
|
||||
|
@ -33,6 +34,25 @@ typedef struct SSyncRaftQuorumJointConfig {
|
|||
* a result indicating whether the vote is pending, lost, or won. A joint quorum
|
||||
* requires both majority quorums to vote in favor.
|
||||
**/
|
||||
ESyncRaftVoteResult syncRaftVoteResult(SSyncRaftQuorumJointConfig* config, const ESyncRaftVoteResult* votes);
|
||||
ESyncRaftVoteType syncRaftVoteResult(SSyncRaftQuorumJointConfig* config, const ESyncRaftVoteType* votes);
|
||||
|
||||
static FORCE_INLINE bool syncRaftJointConfigInCluster(const SSyncCluster* cluster, SyncNodeId id) {
|
||||
int i;
|
||||
for (i = 0; i < cluster->replica; ++i) {
|
||||
if (cluster->nodeInfo[i].nodeId == id) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static FORCE_INLINE bool syncRaftJointConfigInOutgoing(const SSyncRaftQuorumJointConfig* config, SyncNodeId id) {
|
||||
return syncRaftJointConfigInCluster(&config->outgoing, id);
|
||||
}
|
||||
|
||||
static FORCE_INLINE bool syncRaftJointConfigInIncoming(const SSyncRaftQuorumJointConfig* config, SyncNodeId id) {
|
||||
return syncRaftJointConfigInCluster(&config->incoming, id);
|
||||
}
|
||||
|
||||
#endif /* _TD_LIBS_SYNC_RAFT_QUORUM_JOINT_H */
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
|
||||
#include "sync.h"
|
||||
#include "sync_type.h"
|
||||
#include "sync_raft_quorum.h"
|
||||
|
||||
/**
|
||||
* syncRaftMajorityVoteResult takes a mapping of voters to yes/no (true/false) votes and returns
|
||||
|
@ -25,6 +26,6 @@
|
|||
* yes/no has been reached), won (a quorum of yes has been reached), or lost (a
|
||||
* quorum of no has been reached).
|
||||
**/
|
||||
ESyncRaftVoteResult syncRaftMajorityVoteResult(SSyncCluster* config, const ESyncRaftVoteResult* votes);
|
||||
ESyncRaftVoteResult syncRaftMajorityVoteResult(SSyncCluster* config, const ESyncRaftVoteType* votes);
|
||||
|
||||
#endif /* _TD_LIBS_SYNC_RAFT_QUORUM_MAJORITY_H */
|
||||
|
|
|
@ -0,0 +1,25 @@
|
|||
/*
|
||||
* Copyright (c) 2019 TAOS Data, Inc. <cli@taosdata.com>
|
||||
*
|
||||
* This program is free software: you can use, redistribute, and/or modify
|
||||
* it under the terms of the GNU Affero General Public License, version 3
|
||||
* or later ("AGPL"), as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef TD_SYNC_RAFT_RESTORE_H
|
||||
#define TD_SYNC_RAFT_RESTORE_H
|
||||
|
||||
#include "sync_type.h"
|
||||
#include "sync_raft_proto.h"
|
||||
|
||||
int syncRaftRestoreConfig(SSyncRaftChanger* changer, const SSyncConfigState* cs,
|
||||
SSyncRaftProgressTrackerConfig* config, SSyncRaftProgressMap* progressMap);
|
||||
|
||||
#endif /* TD_SYNC_RAFT_RESTORE_H */
|
|
@ -17,6 +17,7 @@
|
|||
#define _TD_LIBS_SYNC_TYPE_H
|
||||
|
||||
#include <stdint.h>
|
||||
#include "sync.h"
|
||||
#include "osMath.h"
|
||||
|
||||
#define SYNC_NON_NODE_ID -1
|
||||
|
@ -28,10 +29,13 @@ typedef uint32_t SyncTick;
|
|||
typedef struct SSyncRaft SSyncRaft;
|
||||
|
||||
typedef struct SSyncRaftProgress SSyncRaftProgress;
|
||||
typedef struct SSyncRaftProgressMap SSyncRaftProgressMap;
|
||||
typedef struct SSyncRaftProgressTrackerConfig SSyncRaftProgressTrackerConfig;
|
||||
|
||||
typedef struct SSyncRaftProgressTracker SSyncRaftProgressTracker;
|
||||
|
||||
typedef struct SSyncRaftChanger SSyncRaftChanger;
|
||||
|
||||
typedef struct SSyncRaftLog SSyncRaftLog;
|
||||
|
||||
typedef struct SSyncRaftEntry SSyncRaftEntry;
|
||||
|
@ -46,6 +50,11 @@ typedef struct SSyncRaftEntry SSyncRaftEntry;
|
|||
#endif
|
||||
#endif
|
||||
|
||||
typedef struct {
|
||||
int32_t replica;
|
||||
SyncNodeId nodeId[TSDB_MAX_REPLICA];
|
||||
} SSyncRaftNodeMap;
|
||||
|
||||
typedef enum {
|
||||
SYNC_RAFT_CAMPAIGN_PRE_ELECTION = 0,
|
||||
SYNC_RAFT_CAMPAIGN_ELECTION = 1,
|
||||
|
@ -61,6 +70,6 @@ typedef enum {
|
|||
|
||||
//reject the vote request
|
||||
SYNC_RAFT_VOTE_RESP_REJECT = 2,
|
||||
} ESyncRaftVoteResult;
|
||||
} ESyncRaftVoteType;
|
||||
|
||||
#endif /* _TD_LIBS_SYNC_TYPE_H */
|
||||
|
|
|
@ -0,0 +1,154 @@
|
|||
/*
|
||||
* Copyright (c) 2019 TAOS Data, Inc. <cli@taosdata.com>
|
||||
*
|
||||
* This program is free software: you can use, redistribute, and/or modify
|
||||
* it under the terms of the GNU Affero General Public License, version 3
|
||||
* or later ("AGPL"), as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "syncInt.h"
|
||||
#include "sync_raft_config_change.h"
|
||||
#include "sync_raft_progress.h"
|
||||
#include "sync_raft_progress_tracker.h"
|
||||
|
||||
static int checkAndCopy(SSyncRaftChanger* changer, SSyncRaftProgressTrackerConfig* config, SSyncRaftProgressMap* progressMap);
|
||||
static int checkAndReturn(SSyncRaftProgressTrackerConfig* config, SSyncRaftProgressMap* progressMap);
|
||||
static int checkInvariants(SSyncRaftProgressTrackerConfig* config, SSyncRaftProgressMap* progressMap);
|
||||
static int checkInvariants(SSyncRaftProgressTrackerConfig* config, SSyncRaftProgressMap* progressMap);
|
||||
static bool hasJointConfig(const SSyncRaftProgressTrackerConfig* config);
|
||||
static int applyConfig(SSyncRaftChanger* changer, const SSyncRaftProgressTrackerConfig* config,
|
||||
const SSyncRaftProgressMap* progressMap, const SSyncConfChangeSingleArray* css);
|
||||
|
||||
// Simple carries out a series of configuration changes that (in aggregate)
|
||||
// mutates the incoming majority config Voters[0] by at most one. This method
|
||||
// will return an error if that is not the case, if the resulting quorum is
|
||||
// zero, or if the configuration is in a joint state (i.e. if there is an
|
||||
// outgoing configuration).
|
||||
int syncRaftChangerSimpleConfig(SSyncRaftChanger* changer, const SSyncConfChangeSingleArray* css,
|
||||
SSyncRaftProgressTrackerConfig* config, SSyncRaftProgressMap* progressMap) {
|
||||
int ret;
|
||||
|
||||
ret = checkAndCopy(changer, config, progressMap);
|
||||
if (ret != 0) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (hasJointConfig(config)) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
ret = applyConfig(changer, config, progressMap, css);
|
||||
if (ret != 0) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
return checkAndReturn(config, progressMap);
|
||||
}
|
||||
|
||||
// checkAndCopy copies the tracker's config and progress map (deeply enough for
|
||||
// the purposes of the Changer) and returns those copies. It returns an error
|
||||
// if checkInvariants does.
|
||||
static int checkAndCopy(SSyncRaftChanger* changer, SSyncRaftProgressTrackerConfig* config, SSyncRaftProgressMap* progressMap) {
|
||||
syncRaftCloneTrackerConfig(&changer->tracker->config, config);
|
||||
int i;
|
||||
for (i = 0; i < TSDB_MAX_REPLICA; ++i) {
|
||||
SSyncRaftProgress* progress = &(changer->tracker->progressMap.progress[i]);
|
||||
if (progress->id == SYNC_NON_NODE_ID) {
|
||||
continue;
|
||||
}
|
||||
syncRaftProgressCopy(progress, &(progressMap->progress[i]));
|
||||
}
|
||||
return checkAndReturn(config, progressMap);
|
||||
}
|
||||
|
||||
// checkAndReturn calls checkInvariants on the input and returns either the
|
||||
// resulting error or the input.
|
||||
static int checkAndReturn(SSyncRaftProgressTrackerConfig* config, SSyncRaftProgressMap* progressMap) {
|
||||
if (checkInvariants(config, progressMap) != 0) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// checkInvariants makes sure that the config and progress are compatible with
|
||||
// each other. This is used to check both what the Changer is initialized with,
|
||||
// as well as what it returns.
|
||||
static int checkInvariants(SSyncRaftProgressTrackerConfig* config, SSyncRaftProgressMap* progressMap) {
|
||||
int ret = syncRaftCheckProgress(config, progressMap);
|
||||
if (ret != 0) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
int i;
|
||||
// Any staged learner was staged because it could not be directly added due
|
||||
// to a conflicting voter in the outgoing config.
|
||||
for (i = 0; i < TSDB_MAX_REPLICA; ++i) {
|
||||
if (!syncRaftJointConfigInOutgoing(&config->voters, config->learnersNext.nodeId[i])) {
|
||||
return -1;
|
||||
}
|
||||
if (progressMap->progress[i].id != SYNC_NON_NODE_ID && progressMap->progress[i].isLearner) {
|
||||
syncError("%d is in LearnersNext, but is already marked as learner", progressMap->progress[i].id);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
// Conversely Learners and Voters doesn't intersect at all.
|
||||
for (i = 0; i < TSDB_MAX_REPLICA; ++i) {
|
||||
if (syncRaftJointConfigInIncoming(&config->voters, config->learners.nodeId[i])) {
|
||||
syncError("%d is in Learners and voter.incoming", progressMap->progress[i].id);
|
||||
return -1;
|
||||
}
|
||||
if (progressMap->progress[i].id != SYNC_NON_NODE_ID && !progressMap->progress[i].isLearner) {
|
||||
syncError("%d is in Learners, but is not marked as learner", progressMap->progress[i].id);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
if (!hasJointConfig(config)) {
|
||||
// We enforce that empty maps are nil instead of zero.
|
||||
if (config->learnersNext.replica > 0) {
|
||||
syncError("cfg.LearnersNext must be nil when not joint");
|
||||
return -1;
|
||||
}
|
||||
if (config->autoLeave) {
|
||||
syncError("AutoLeave must be false when not joint");
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static bool hasJointConfig(const SSyncRaftProgressTrackerConfig* config) {
|
||||
return config->voters.outgoing.replica > 0;
|
||||
}
|
||||
|
||||
static int applyConfig(SSyncRaftChanger* changer, const SSyncRaftProgressTrackerConfig* config,
|
||||
const SSyncRaftProgressMap* progressMap, const SSyncConfChangeSingleArray* css) {
|
||||
int i;
|
||||
|
||||
for (i = 0; i < css->n; ++i) {
|
||||
const SSyncConfChangeSingle* cs = &(css->changes[i]);
|
||||
if (cs->nodeId == SYNC_NON_NODE_ID) {
|
||||
continue;
|
||||
}
|
||||
|
||||
ESyncRaftConfChangeType type = cs->type;
|
||||
switch (type) {
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
if (config->voters.incoming.replica == 0) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
|
@ -243,7 +243,7 @@ static void appendEntries(SSyncRaft* pRaft, SSyncRaftEntry* entries, int n) {
|
|||
|
||||
syncRaftLogAppend(pRaft->log, entries, n);
|
||||
|
||||
SSyncRaftProgress* progress = &(pRaft->tracker->progressMap[pRaft->cluster.selfIndex]);
|
||||
SSyncRaftProgress* progress = &(pRaft->tracker->progressMap.progress[pRaft->cluster.selfIndex]);
|
||||
syncRaftProgressMaybeUpdate(progress, lastIndex);
|
||||
// Regardless of maybeCommit's return, our caller will call bcastAppend.
|
||||
maybeCommit(pRaft);
|
||||
|
|
|
@ -149,6 +149,10 @@ void syncRaftProgressBecomeSnapshot(SSyncRaftProgress* progress, SyncIndex snaps
|
|||
progress->pendingSnapshotIndex = snapshotIndex;
|
||||
}
|
||||
|
||||
void syncRaftProgressCopy(const SSyncRaftProgress* progress, SSyncRaftProgress* out) {
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* ResetState moves the Progress into the specified State, resetting ProbeSent,
|
||||
* PendingSnapshot, and Inflights.
|
||||
|
|
|
@ -25,13 +25,13 @@ SSyncRaftProgressTracker* syncRaftOpenProgressTracker() {
|
|||
}
|
||||
|
||||
void syncRaftResetVotes(SSyncRaftProgressTracker* tracker) {
|
||||
memset(tracker->votes, SYNC_RAFT_VOTE_RESP_UNKNOWN, sizeof(ESyncRaftVoteResult) * TSDB_MAX_REPLICA);
|
||||
memset(tracker->votes, SYNC_RAFT_VOTE_RESP_UNKNOWN, sizeof(ESyncRaftVoteType) * TSDB_MAX_REPLICA);
|
||||
}
|
||||
|
||||
void syncRaftProgressVisit(SSyncRaftProgressTracker* tracker, visitProgressFp visit, void* arg) {
|
||||
int i;
|
||||
for (i = 0; i < TSDB_MAX_REPLICA; ++i) {
|
||||
SSyncRaftProgress* progress = &(tracker->progressMap[i]);
|
||||
SSyncRaftProgress* progress = &(tracker->progressMap.progress[i]);
|
||||
visit(i, progress, arg);
|
||||
}
|
||||
}
|
||||
|
@ -44,6 +44,10 @@ void syncRaftRecordVote(SSyncRaftProgressTracker* tracker, int i, bool grant) {
|
|||
tracker->votes[i] = grant ? SYNC_RAFT_VOTE_RESP_GRANT : SYNC_RAFT_VOTE_RESP_REJECT;
|
||||
}
|
||||
|
||||
void syncRaftCloneTrackerConfig(const SSyncRaftProgressTrackerConfig* from, SSyncRaftProgressTrackerConfig* to) {
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* syncRaftTallyVotes returns the number of granted and rejected Votes, and whether the
|
||||
* election outcome is known.
|
||||
|
@ -54,7 +58,7 @@ ESyncRaftVoteResult syncRaftTallyVotes(SSyncRaftProgressTracker* tracker, int* r
|
|||
int r, g;
|
||||
|
||||
for (i = 0, r = 0, g = 0; i < TSDB_MAX_REPLICA; ++i) {
|
||||
progress = &(tracker->progressMap[i]);
|
||||
progress = &(tracker->progressMap.progress[i]);
|
||||
if (progress->id == SYNC_NON_NODE_ID) {
|
||||
continue;
|
||||
}
|
||||
|
|
|
@ -22,9 +22,9 @@
|
|||
* a result indicating whether the vote is pending, lost, or won. A joint quorum
|
||||
* requires both majority quorums to vote in favor.
|
||||
**/
|
||||
ESyncRaftVoteResult syncRaftVoteResult(SSyncRaftQuorumJointConfig* config, const ESyncRaftVoteResult* votes) {
|
||||
ESyncRaftVoteResult r1 = syncRaftMajorityVoteResult(&(config->majorityConfig[0]), votes);
|
||||
ESyncRaftVoteResult r2 = syncRaftMajorityVoteResult(&(config->majorityConfig[1]), votes);
|
||||
ESyncRaftVoteType syncRaftVoteResult(SSyncRaftQuorumJointConfig* config, const ESyncRaftVoteType* votes) {
|
||||
ESyncRaftVoteResult r1 = syncRaftMajorityVoteResult(&(config->incoming), votes);
|
||||
ESyncRaftVoteResult r2 = syncRaftMajorityVoteResult(&(config->outgoing), votes);
|
||||
|
||||
if (r1 == r2) {
|
||||
// If they agree, return the agreed state.
|
||||
|
|
|
@ -22,7 +22,7 @@
|
|||
* yes/no has been reached), won (a quorum of yes has been reached), or lost (a
|
||||
* quorum of no has been reached).
|
||||
**/
|
||||
ESyncRaftVoteResult syncRaftMajorityVoteResult(SSyncCluster* config, const ESyncRaftVoteResult* votes) {
|
||||
ESyncRaftVoteResult syncRaftMajorityVoteResult(SSyncCluster* config, const ESyncRaftVoteType* votes) {
|
||||
if (config->replica == 0) {
|
||||
return SYNC_RAFT_VOTE_WON;
|
||||
}
|
||||
|
|
|
@ -0,0 +1,177 @@
|
|||
/*
|
||||
* Copyright (c) 2019 TAOS Data, Inc. <cli@taosdata.com>
|
||||
*
|
||||
* This program is free software: you can use, redistribute, and/or modify
|
||||
* it under the terms of the GNU Affero General Public License, version 3
|
||||
* or later ("AGPL"), as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "sync_raft_config_change.h"
|
||||
#include "sync_raft_restore.h"
|
||||
#include "sync_raft_progress_tracker.h"
|
||||
|
||||
static int toConfChangeSingle(const SSyncConfigState* cs, SSyncConfChangeSingleArray* out, SSyncConfChangeSingleArray* in);
|
||||
|
||||
int syncRaftRestoreConfig(SSyncRaftChanger* changer, const SSyncConfigState* cs,
|
||||
SSyncRaftProgressTrackerConfig* config, SSyncRaftProgressMap* progressMap) {
|
||||
SSyncConfChangeSingleArray outgoing;
|
||||
SSyncConfChangeSingleArray incoming;
|
||||
SSyncConfChangeSingleArray css;
|
||||
int i, ret;
|
||||
|
||||
ret = toConfChangeSingle(cs, &outgoing, &incoming);
|
||||
if (ret != 0) {
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (outgoing.n == 0) {
|
||||
// No outgoing config, so just apply the incoming changes one by one.
|
||||
for (i = 0; i < incoming.n; ++i) {
|
||||
css = (SSyncConfChangeSingleArray) {
|
||||
.n = 1,
|
||||
.changes = &incoming.changes[i],
|
||||
};
|
||||
ret = syncRaftChangerSimpleConfig(changer, &css, config, progressMap);
|
||||
if (ret != 0) {
|
||||
goto out;
|
||||
}
|
||||
syncRaftCloneTrackerConfig(config, &changer->tracker->config);
|
||||
syncRaftProgressMapCopy(progressMap, &changer->tracker->progressMap);
|
||||
}
|
||||
} else {
|
||||
// The ConfState describes a joint configuration.
|
||||
//
|
||||
// First, apply all of the changes of the outgoing config one by one, so
|
||||
// that it temporarily becomes the incoming active config. For example,
|
||||
// if the config is (1 2 3)&(2 3 4), this will establish (2 3 4)&().
|
||||
for (i = 0; i < outgoing.n; ++i) {
|
||||
css = (SSyncConfChangeSingleArray) {
|
||||
.n = 1,
|
||||
.changes = &outgoing.changes[i],
|
||||
};
|
||||
ret = syncRaftChangerSimpleConfig(changer, &css, config, progressMap);
|
||||
if (ret != 0) {
|
||||
goto out;
|
||||
}
|
||||
syncRaftCloneTrackerConfig(config, &changer->tracker->config);
|
||||
syncRaftProgressMapCopy(progressMap, &changer->tracker->progressMap);
|
||||
}
|
||||
|
||||
ret = syncRaftChangerEnterJoint(changer, &incoming, config, progressMap);
|
||||
if (ret != 0) {
|
||||
goto out;
|
||||
}
|
||||
syncRaftCloneTrackerConfig(config, &changer->tracker->config);
|
||||
syncRaftProgressMapCopy(progressMap, &changer->tracker->progressMap);
|
||||
}
|
||||
|
||||
out:
|
||||
if (incoming.n != 0) free(incoming.changes);
|
||||
if (outgoing.n != 0) free(outgoing.changes);
|
||||
return ret;
|
||||
}
|
||||
|
||||
// toConfChangeSingle translates a conf state into 1) a slice of operations creating
|
||||
// first the config that will become the outgoing one, and then the incoming one, and
|
||||
// b) another slice that, when applied to the config resulted from 1), represents the
|
||||
// ConfState.
|
||||
static int toConfChangeSingle(const SSyncConfigState* cs, SSyncConfChangeSingleArray* out, SSyncConfChangeSingleArray* in) {
|
||||
int i;
|
||||
|
||||
out->n = in->n = 0;
|
||||
|
||||
out->n = cs->votersOutgoing.replica;
|
||||
out->changes = (SSyncConfChangeSingle*)malloc(sizeof(SSyncConfChangeSingle) * out->n);
|
||||
if (out->changes == NULL) {
|
||||
out->n = 0;
|
||||
return -1;
|
||||
}
|
||||
in->n = cs->votersOutgoing.replica + cs->voters.replica + cs->learners.replica + cs->learnersNext.replica;
|
||||
out->changes = (SSyncConfChangeSingle*)malloc(sizeof(SSyncConfChangeSingle) * in->n);
|
||||
if (in->changes == NULL) {
|
||||
in->n = 0;
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Example to follow along this code:
|
||||
// voters=(1 2 3) learners=(5) outgoing=(1 2 4 6) learners_next=(4)
|
||||
//
|
||||
// This means that before entering the joint config, the configuration
|
||||
// had voters (1 2 4 6) and perhaps some learners that are already gone.
|
||||
// The new set of voters is (1 2 3), i.e. (1 2) were kept around, and (4 6)
|
||||
// are no longer voters; however 4 is poised to become a learner upon leaving
|
||||
// the joint state.
|
||||
// We can't tell whether 5 was a learner before entering the joint config,
|
||||
// but it doesn't matter (we'll pretend that it wasn't).
|
||||
//
|
||||
// The code below will construct
|
||||
// outgoing = add 1; add 2; add 4; add 6
|
||||
// incoming = remove 1; remove 2; remove 4; remove 6
|
||||
// add 1; add 2; add 3;
|
||||
// add-learner 5;
|
||||
// add-learner 4;
|
||||
//
|
||||
// So, when starting with an empty config, after applying 'outgoing' we have
|
||||
//
|
||||
// quorum=(1 2 4 6)
|
||||
//
|
||||
// From which we enter a joint state via 'incoming'
|
||||
//
|
||||
// quorum=(1 2 3)&&(1 2 4 6) learners=(5) learners_next=(4)
|
||||
//
|
||||
// as desired.
|
||||
|
||||
for (i = 0; i < cs->votersOutgoing.replica; ++i) {
|
||||
// If there are outgoing voters, first add them one by one so that the
|
||||
// (non-joint) config has them all.
|
||||
out->changes[i] = (SSyncConfChangeSingle) {
|
||||
.type = SYNC_RAFT_Conf_AddNode,
|
||||
.nodeId = cs->votersOutgoing.nodeId[i],
|
||||
};
|
||||
}
|
||||
|
||||
// We're done constructing the outgoing slice, now on to the incoming one
|
||||
// (which will apply on top of the config created by the outgoing slice).
|
||||
|
||||
// First, we'll remove all of the outgoing voters.
|
||||
int j = 0;
|
||||
for (i = 0; i < cs->votersOutgoing.replica; ++i) {
|
||||
in->changes[j] = (SSyncConfChangeSingle) {
|
||||
.type = SYNC_RAFT_Conf_RemoveNode,
|
||||
.nodeId = cs->votersOutgoing.nodeId[i],
|
||||
};
|
||||
j += 1;
|
||||
}
|
||||
// Then we'll add the incoming voters and learners.
|
||||
for (i = 0; i < cs->voters.replica; ++i) {
|
||||
in->changes[j] = (SSyncConfChangeSingle) {
|
||||
.type = SYNC_RAFT_Conf_AddNode,
|
||||
.nodeId = cs->voters.nodeId[i],
|
||||
};
|
||||
j += 1;
|
||||
}
|
||||
for (i = 0; i < cs->learners.replica; ++i) {
|
||||
in->changes[j] = (SSyncConfChangeSingle) {
|
||||
.type = SYNC_RAFT_Conf_AddLearnerNode,
|
||||
.nodeId = cs->learners.nodeId[i],
|
||||
};
|
||||
j += 1;
|
||||
}
|
||||
// Same for LearnersNext; these are nodes we want to be learners but which
|
||||
// are currently voters in the outgoing config.
|
||||
for (i = 0; i < cs->learnersNext.replica; ++i) {
|
||||
in->changes[j] = (SSyncConfChangeSingle) {
|
||||
.type = SYNC_RAFT_Conf_AddLearnerNode,
|
||||
.nodeId = cs->learnersNext.nodeId[i],
|
||||
};
|
||||
j += 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
Loading…
Reference in New Issue