add learner
This commit is contained in:
parent
230adc5433
commit
3b78d4382b
|
@ -1286,6 +1286,9 @@ typedef struct {
|
|||
int16_t hashSuffix;
|
||||
int32_t tsdbPageSize;
|
||||
int64_t reserved[8];
|
||||
int8_t learnerReplica;
|
||||
int8_t learnerSelfIndex;
|
||||
SReplica learnerReplicas[TSDB_MAX_LEARNER_REPLICA];
|
||||
} SCreateVnodeReq;
|
||||
|
||||
int32_t tSerializeSCreateVnodeReq(void* buf, int32_t bufLen, SCreateVnodeReq* pReq);
|
||||
|
@ -1357,7 +1360,10 @@ typedef struct {
|
|||
int8_t replica;
|
||||
SReplica replicas[TSDB_MAX_REPLICA];
|
||||
int64_t reserved[8];
|
||||
} SAlterVnodeReplicaReq;
|
||||
int8_t learnerSelfIndex;
|
||||
int8_t learnerReplica;
|
||||
SReplica learnerReplicas[TSDB_MAX_LEARNER_REPLICA];
|
||||
} SAlterVnodeReplicaReq, SAlterVnodeTypeReq;
|
||||
|
||||
int32_t tSerializeSAlterVnodeReplicaReq(void* buf, int32_t bufLen, SAlterVnodeReplicaReq* pReq);
|
||||
int32_t tDeserializeSAlterVnodeReplicaReq(void* buf, int32_t bufLen, SAlterVnodeReplicaReq* pReq);
|
||||
|
@ -1629,7 +1635,10 @@ int32_t tDeserializeSCreateDropMQSNodeReq(void* buf, int32_t bufLen, SMCreateQno
|
|||
typedef struct {
|
||||
int8_t replica;
|
||||
SReplica replicas[TSDB_MAX_REPLICA];
|
||||
} SDCreateMnodeReq, SDAlterMnodeReq;
|
||||
int8_t learnerReplica;
|
||||
SReplica learnerReplicas[TSDB_MAX_LEARNER_REPLICA];
|
||||
int64_t lastIndex;
|
||||
} SDCreateMnodeReq, SDAlterMnodeReq, SDAlterMnodeTypeReq;
|
||||
|
||||
int32_t tSerializeSDCreateMnodeReq(void* buf, int32_t bufLen, SDCreateMnodeReq* pReq);
|
||||
int32_t tDeserializeSDCreateMnodeReq(void* buf, int32_t bufLen, SDCreateMnodeReq* pReq);
|
||||
|
|
|
@ -83,6 +83,8 @@ enum {
|
|||
TD_DEF_MSG_TYPE(TDMT_DND_CONFIG_DNODE, "config-dnode", NULL, NULL)
|
||||
TD_DEF_MSG_TYPE(TDMT_DND_SYSTABLE_RETRIEVE, "dnode-retrieve", NULL, NULL)
|
||||
TD_DEF_MSG_TYPE(TDMT_DND_MAX_MSG, "dnd-max", NULL, NULL)
|
||||
TD_DEF_MSG_TYPE(TDMT_DND_ALTER_MNODE_TYPE, "dnode-alter-mnode-type", NULL, NULL)
|
||||
TD_DEF_MSG_TYPE(TDMT_DND_ALTER_VNODE_TYPE, "dnode-alter-vnode-type", NULL, NULL)
|
||||
|
||||
TD_NEW_MSG_SEG(TDMT_MND_MSG)
|
||||
TD_DEF_MSG_TYPE(TDMT_MND_CONNECT, "connect", NULL, NULL)
|
||||
|
|
|
@ -33,8 +33,11 @@ typedef struct {
|
|||
bool deploy;
|
||||
int8_t selfIndex;
|
||||
int8_t numOfReplicas;
|
||||
SReplica replicas[TSDB_MAX_REPLICA];
|
||||
int8_t numOfTotalReplicas;
|
||||
SReplica replicas[TSDB_MAX_REPLICA + TSDB_MAX_LEARNER_REPLICA];
|
||||
int32_t nodeRoles[TSDB_MAX_REPLICA + TSDB_MAX_LEARNER_REPLICA];
|
||||
SMsgCb msgCb;
|
||||
int64_t lastIndex;
|
||||
} SMnodeOpt;
|
||||
|
||||
/* ------------------------ SMnode ------------------------ */
|
||||
|
@ -69,6 +72,8 @@ int32_t mndStart(SMnode *pMnode);
|
|||
*/
|
||||
void mndStop(SMnode *pMnode);
|
||||
|
||||
int32_t mndIsCatchUp(SMnode *pMnode);
|
||||
|
||||
/**
|
||||
* @brief Get mnode monitor info.
|
||||
*
|
||||
|
|
|
@ -55,6 +55,8 @@ extern "C" {
|
|||
#define SYNC_INDEX_INVALID -1
|
||||
#define SYNC_TERM_INVALID -1
|
||||
|
||||
#define SYNC_LEARNER_CATCHUP 10
|
||||
|
||||
typedef enum {
|
||||
SYNC_STRATEGY_NO_SNAPSHOT = 0,
|
||||
SYNC_STRATEGY_STANDARD_SNAPSHOT = 1,
|
||||
|
@ -76,19 +78,29 @@ typedef enum {
|
|||
TAOS_SYNC_STATE_CANDIDATE = 101,
|
||||
TAOS_SYNC_STATE_LEADER = 102,
|
||||
TAOS_SYNC_STATE_ERROR = 103,
|
||||
TAOS_SYNC_STATE_LEARNER = 104,
|
||||
} ESyncState;
|
||||
|
||||
typedef enum {
|
||||
TAOS_SYNC_ROLE_VOTER = 0,
|
||||
TAOS_SYNC_ROLE_LEARNER = 1,
|
||||
TAOS_SYNC_ROLE_ERROR = 2,
|
||||
} ESyncRole;
|
||||
|
||||
typedef struct SNodeInfo {
|
||||
int64_t clusterId;
|
||||
int32_t nodeId;
|
||||
uint16_t nodePort;
|
||||
char nodeFqdn[TSDB_FQDN_LEN];
|
||||
int64_t clusterId;
|
||||
int32_t nodeId;
|
||||
uint16_t nodePort;
|
||||
char nodeFqdn[TSDB_FQDN_LEN];
|
||||
ESyncRole nodeRole;
|
||||
} SNodeInfo;
|
||||
|
||||
typedef struct SSyncCfg {
|
||||
int32_t totalReplicaNum;
|
||||
int32_t replicaNum;
|
||||
int32_t myIndex;
|
||||
SNodeInfo nodeInfo[TSDB_MAX_REPLICA];
|
||||
SNodeInfo nodeInfo[TSDB_MAX_REPLICA + TSDB_MAX_LEARNER_REPLICA];
|
||||
SyncIndex lastIndex;
|
||||
} SSyncCfg;
|
||||
|
||||
typedef struct SFsmCbMeta {
|
||||
|
@ -155,6 +167,7 @@ typedef struct SSyncFSM {
|
|||
|
||||
void (*FpBecomeLeaderCb)(const struct SSyncFSM* pFsm);
|
||||
void (*FpBecomeFollowerCb)(const struct SSyncFSM* pFsm);
|
||||
void (*FpBecomeLearnerCb)(const struct SSyncFSM* pFsm);
|
||||
|
||||
int32_t (*FpGetSnapshot)(const struct SSyncFSM* pFsm, SSnapshot* pSnapshot, void* pReaderParam, void** ppReader);
|
||||
void (*FpGetSnapshotInfo)(const struct SSyncFSM* pFsm, SSnapshot* pSnapshot);
|
||||
|
@ -236,6 +249,7 @@ void syncStop(int64_t rid);
|
|||
void syncPreStop(int64_t rid);
|
||||
void syncPostStop(int64_t rid);
|
||||
int32_t syncPropose(int64_t rid, SRpcMsg* pMsg, bool isWeak, int64_t* seq);
|
||||
int32_t syncIsCatchUp(int64_t rid);
|
||||
int32_t syncProcessMsg(int64_t rid, SRpcMsg* pMsg);
|
||||
int32_t syncReconfig(int64_t rid, SSyncCfg* pCfg);
|
||||
int32_t syncBeginSnapshot(int64_t rid, int64_t lastApplyIndex);
|
||||
|
|
|
@ -285,6 +285,7 @@ typedef enum ELogicConditionType {
|
|||
#define TSDB_DNODE_ROLE_VNODE 2
|
||||
|
||||
#define TSDB_MAX_REPLICA 5
|
||||
#define TSDB_MAX_LEARNER_REPLICA 10
|
||||
#define TSDB_SYNC_LOG_BUFFER_SIZE 4096
|
||||
#define TSDB_SYNC_LOG_BUFFER_RETENTION (TSDB_SYNC_LOG_BUFFER_SIZE >> 4)
|
||||
|
||||
|
|
|
@ -4129,6 +4129,12 @@ int32_t tSerializeSCreateVnodeReq(void *buf, int32_t bufLen, SCreateVnodeReq *pR
|
|||
for (int32_t i = 0; i < 8; ++i) {
|
||||
if (tEncodeI64(&encoder, pReq->reserved[i]) < 0) return -1;
|
||||
}
|
||||
if (tEncodeI8(&encoder, pReq->learnerReplica) < 0) return -1;
|
||||
if (tEncodeI8(&encoder, pReq->learnerSelfIndex) < 0) return -1;
|
||||
for (int32_t i = 0; i < TSDB_MAX_LEARNER_REPLICA; ++i) {
|
||||
SReplica *pReplica = &pReq->learnerReplicas[i];
|
||||
if (tEncodeSReplica(&encoder, pReplica) < 0) return -1;
|
||||
}
|
||||
|
||||
tEndEncode(&encoder);
|
||||
|
||||
|
@ -4207,6 +4213,12 @@ int32_t tDeserializeSCreateVnodeReq(void *buf, int32_t bufLen, SCreateVnodeReq *
|
|||
for (int32_t i = 0; i < 8; ++i) {
|
||||
if (tDecodeI64(&decoder, &pReq->reserved[i]) < 0) return -1;
|
||||
}
|
||||
if (tDecodeI8(&decoder, &pReq->learnerReplica) < 0) return -1;
|
||||
if (tDecodeI8(&decoder, &pReq->learnerSelfIndex) < 0) return -1;
|
||||
for (int32_t i = 0; i < TSDB_MAX_LEARNER_REPLICA; ++i) {
|
||||
SReplica *pReplica = &pReq->learnerReplicas[i];
|
||||
if (tDecodeSReplica(&decoder, pReplica) < 0) return -1;
|
||||
}
|
||||
|
||||
tEndDecode(&decoder);
|
||||
tDecoderClear(&decoder);
|
||||
|
@ -4433,6 +4445,12 @@ int32_t tSerializeSAlterVnodeReplicaReq(void *buf, int32_t bufLen, SAlterVnodeRe
|
|||
for (int32_t i = 0; i < 8; ++i) {
|
||||
if (tEncodeI64(&encoder, pReq->reserved[i]) < 0) return -1;
|
||||
}
|
||||
if (tEncodeI8(&encoder, pReq->learnerSelfIndex) < 0) return -1;
|
||||
if (tEncodeI8(&encoder, pReq->learnerReplica) < 0) return -1;
|
||||
for (int32_t i = 0; i < TSDB_MAX_LEARNER_REPLICA; ++i) {
|
||||
SReplica *pReplica = &pReq->learnerReplicas[i];
|
||||
if (tEncodeSReplica(&encoder, pReplica) < 0) return -1;
|
||||
}
|
||||
tEndEncode(&encoder);
|
||||
|
||||
int32_t tlen = encoder.pos;
|
||||
|
@ -4456,6 +4474,12 @@ int32_t tDeserializeSAlterVnodeReplicaReq(void *buf, int32_t bufLen, SAlterVnode
|
|||
for (int32_t i = 0; i < 8; ++i) {
|
||||
if (tDecodeI64(&decoder, &pReq->reserved[i]) < 0) return -1;
|
||||
}
|
||||
if (tDecodeI8(&decoder, &pReq->learnerSelfIndex) < 0) return -1;
|
||||
if (tDecodeI8(&decoder, &pReq->learnerReplica) < 0) return -1;
|
||||
for (int32_t i = 0; i < TSDB_MAX_LEARNER_REPLICA; ++i) {
|
||||
SReplica *pReplica = &pReq->learnerReplicas[i];
|
||||
if (tDecodeSReplica(&decoder, pReplica) < 0) return -1;
|
||||
}
|
||||
|
||||
tEndDecode(&decoder);
|
||||
tDecoderClear(&decoder);
|
||||
|
@ -4767,6 +4791,12 @@ int32_t tSerializeSDCreateMnodeReq(void *buf, int32_t bufLen, SDCreateMnodeReq *
|
|||
SReplica *pReplica = &pReq->replicas[i];
|
||||
if (tEncodeSReplica(&encoder, pReplica) < 0) return -1;
|
||||
}
|
||||
if (tEncodeI8(&encoder, pReq->learnerReplica) < 0) return -1;
|
||||
for (int32_t i = 0; i < TSDB_MAX_LEARNER_REPLICA; ++i) {
|
||||
SReplica *pReplica = &pReq->learnerReplicas[i];
|
||||
if (tEncodeSReplica(&encoder, pReplica) < 0) return -1;
|
||||
}
|
||||
if (tEncodeI64(&encoder, pReq->lastIndex) < 0) return -1;
|
||||
tEndEncode(&encoder);
|
||||
|
||||
int32_t tlen = encoder.pos;
|
||||
|
@ -4784,6 +4814,12 @@ int32_t tDeserializeSDCreateMnodeReq(void *buf, int32_t bufLen, SDCreateMnodeReq
|
|||
SReplica *pReplica = &pReq->replicas[i];
|
||||
if (tDecodeSReplica(&decoder, pReplica) < 0) return -1;
|
||||
}
|
||||
if (tDecodeI8(&decoder, &pReq->learnerReplica) < 0) return -1;
|
||||
for (int32_t i = 0; i < TSDB_MAX_LEARNER_REPLICA; ++i) {
|
||||
SReplica *pReplica = &pReq->learnerReplicas[i];
|
||||
if (tDecodeSReplica(&decoder, pReplica) < 0) return -1;
|
||||
}
|
||||
if (tDecodeI64(&decoder, &pReq->lastIndex) < 0) return -1;
|
||||
tEndDecode(&decoder);
|
||||
|
||||
tDecoderClear(&decoder);
|
||||
|
|
|
@ -32,6 +32,7 @@ typedef struct SDnodeMgmt {
|
|||
TdThread crashReportThread;
|
||||
SSingleWorker mgmtWorker;
|
||||
ProcessCreateNodeFp processCreateNodeFp;
|
||||
ProcessAlterNodeTypeFp processAlterNodeTypeFp;
|
||||
ProcessDropNodeFp processDropNodeFp;
|
||||
SendMonitorReportFp sendMonitorReportFp;
|
||||
GetVnodeLoadsFp getVnodeLoadsFp;
|
||||
|
|
|
@ -352,6 +352,7 @@ SArray *dmGetMsgHandles() {
|
|||
if (dmSetMgmtHandle(pArray, TDMT_DND_CONFIG_DNODE, dmPutNodeMsgToMgmtQueue, 0) == NULL) goto _OVER;
|
||||
if (dmSetMgmtHandle(pArray, TDMT_DND_SERVER_STATUS, dmPutNodeMsgToMgmtQueue, 0) == NULL) goto _OVER;
|
||||
if (dmSetMgmtHandle(pArray, TDMT_DND_SYSTABLE_RETRIEVE, dmPutNodeMsgToMgmtQueue, 0) == NULL) goto _OVER;
|
||||
if (dmSetMgmtHandle(pArray, TDMT_DND_ALTER_MNODE_TYPE, dmPutNodeMsgToMgmtQueue, 0) == NULL) goto _OVER;
|
||||
|
||||
// Requests handled by MNODE
|
||||
if (dmSetMgmtHandle(pArray, TDMT_MND_GRANT, dmPutNodeMsgToMgmtQueue, 0) == NULL) goto _OVER;
|
||||
|
|
|
@ -48,6 +48,7 @@ static int32_t dmOpenMgmt(SMgmtInputOpt *pInput, SMgmtOutputOpt *pOutput) {
|
|||
pMgmt->path = pInput->path;
|
||||
pMgmt->name = pInput->name;
|
||||
pMgmt->processCreateNodeFp = pInput->processCreateNodeFp;
|
||||
pMgmt->processAlterNodeTypeFp = pInput->processAlterNodeTypeFp;
|
||||
pMgmt->processDropNodeFp = pInput->processDropNodeFp;
|
||||
pMgmt->sendMonitorReportFp = pInput->sendMonitorReportFp;
|
||||
pMgmt->getVnodeLoadsFp = pInput->getVnodeLoadsFp;
|
||||
|
|
|
@ -227,6 +227,9 @@ static void dmProcessMgmtQueue(SQueueInfo *pInfo, SRpcMsg *pMsg) {
|
|||
case TDMT_DND_DROP_SNODE:
|
||||
code = (*pMgmt->processDropNodeFp)(SNODE, pMsg);
|
||||
break;
|
||||
case TDMT_DND_ALTER_MNODE_TYPE:
|
||||
code = (*pMgmt->processAlterNodeTypeFp)(MNODE, pMsg);
|
||||
break;
|
||||
case TDMT_DND_SERVER_STATUS:
|
||||
code = dmProcessServerRunStatus(pMgmt, pMsg);
|
||||
break;
|
||||
|
|
|
@ -24,12 +24,16 @@ static int32_t mmDecodeOption(SJson *pJson, SMnodeOpt *pOption) {
|
|||
if (code < 0) return -1;
|
||||
tjsonGetInt32ValueFromDouble(pJson, "selfIndex", pOption->selfIndex, code);
|
||||
if (code < 0) return 0;
|
||||
tjsonGetInt32ValueFromDouble(pJson, "lastIndex", pOption->lastIndex, code);
|
||||
if (code < 0) return 0;
|
||||
|
||||
SJson *replicas = tjsonGetObjectItem(pJson, "replicas");
|
||||
if (replicas == NULL) return 0;
|
||||
pOption->numOfReplicas = tjsonGetArraySize(replicas);
|
||||
pOption->numOfTotalReplicas = tjsonGetArraySize(replicas);
|
||||
|
||||
for (int32_t i = 0; i < pOption->numOfReplicas; ++i) {
|
||||
pOption->numOfReplicas = 0;
|
||||
|
||||
for (int32_t i = 0; i < pOption->numOfTotalReplicas; ++i) {
|
||||
SJson *replica = tjsonGetArrayItem(replicas, i);
|
||||
if (replica == NULL) return -1;
|
||||
|
||||
|
@ -40,6 +44,14 @@ static int32_t mmDecodeOption(SJson *pJson, SMnodeOpt *pOption) {
|
|||
if (code < 0) return -1;
|
||||
tjsonGetUInt16ValueFromDouble(replica, "port", pReplica->port, code);
|
||||
if (code < 0) return -1;
|
||||
tjsonGetInt32ValueFromDouble(replica, "role", pOption->nodeRoles[i], code);
|
||||
if (code < 0) return -1;
|
||||
if(pOption->nodeRoles[i] == TAOS_SYNC_ROLE_VOTER){
|
||||
pOption->numOfReplicas++;
|
||||
}
|
||||
}
|
||||
|
||||
for (int32_t i = 0; i < pOption->numOfTotalReplicas; ++i) {
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
@ -112,14 +124,14 @@ _OVER:
|
|||
}
|
||||
|
||||
static int32_t mmEncodeOption(SJson *pJson, const SMnodeOpt *pOption) {
|
||||
if (pOption->deploy && pOption->numOfReplicas > 0) {
|
||||
if (pOption->deploy && pOption->numOfTotalReplicas > 0) {
|
||||
if (tjsonAddDoubleToObject(pJson, "selfIndex", pOption->selfIndex) < 0) return -1;
|
||||
|
||||
SJson *replicas = tjsonCreateArray();
|
||||
if (replicas == NULL) return -1;
|
||||
if (tjsonAddItemToObject(pJson, "replicas", replicas) < 0) return -1;
|
||||
|
||||
for (int32_t i = 0; i < pOption->numOfReplicas; ++i) {
|
||||
for (int32_t i = 0; i < pOption->numOfTotalReplicas; ++i) {
|
||||
SJson *replica = tjsonCreateObject();
|
||||
if (replica == NULL) return -1;
|
||||
|
||||
|
@ -127,10 +139,13 @@ static int32_t mmEncodeOption(SJson *pJson, const SMnodeOpt *pOption) {
|
|||
if (tjsonAddDoubleToObject(replica, "id", pReplica->id) < 0) return -1;
|
||||
if (tjsonAddStringToObject(replica, "fqdn", pReplica->fqdn) < 0) return -1;
|
||||
if (tjsonAddDoubleToObject(replica, "port", pReplica->port) < 0) return -1;
|
||||
if (tjsonAddDoubleToObject(replica, "role", pOption->nodeRoles[i]) < 0) return -1;
|
||||
if (tjsonAddItemToArray(replicas, replica) < 0) return -1;
|
||||
}
|
||||
}
|
||||
|
||||
if (tjsonAddDoubleToObject(pJson, "lastIndex", pOption->lastIndex) < 0) return -1;
|
||||
|
||||
if (tjsonAddDoubleToObject(pJson, "deployed", pOption->deploy) < 0) return -1;
|
||||
|
||||
return 0;
|
||||
|
|
|
@ -33,12 +33,24 @@ int32_t mmProcessCreateReq(const SMgmtInputOpt *pInput, SRpcMsg *pMsg) {
|
|||
return -1;
|
||||
}
|
||||
|
||||
SMnodeOpt option = {.deploy = true, .numOfReplicas = createReq.replica, .selfIndex = -1};
|
||||
SMnodeOpt option = {.deploy = true, .numOfReplicas = createReq.replica,
|
||||
.numOfTotalReplicas = createReq.replica + createReq.learnerReplica,
|
||||
.selfIndex = -1, .lastIndex = createReq.lastIndex};
|
||||
|
||||
memcpy(option.replicas, createReq.replicas, sizeof(createReq.replicas));
|
||||
for (int32_t i = 0; i < option.numOfReplicas; ++i) {
|
||||
for (int32_t i = 0; i < createReq.replica; ++i) {
|
||||
if (createReq.replicas[i].id == pInput->pData->dnodeId) {
|
||||
option.selfIndex = i;
|
||||
}
|
||||
option.nodeRoles[i] = TAOS_SYNC_ROLE_VOTER;
|
||||
}
|
||||
|
||||
memcpy(&(option.replicas[createReq.replica]), createReq.learnerReplicas, sizeof(createReq.learnerReplicas));
|
||||
for (int32_t i = 0; i < createReq.learnerReplica; ++i) {
|
||||
if (createReq.learnerReplicas[i].id == pInput->pData->dnodeId) {
|
||||
option.selfIndex = createReq.replica + i;
|
||||
}
|
||||
option.nodeRoles[createReq.replica + i] = TAOS_SYNC_ROLE_LEARNER;
|
||||
}
|
||||
|
||||
if (option.selfIndex == -1) {
|
||||
|
@ -92,6 +104,8 @@ SArray *mmGetMsgHandles() {
|
|||
if (dmSetMgmtHandle(pArray, TDMT_DND_CREATE_VNODE_RSP, mmPutMsgToWriteQueue, 0) == NULL) goto _OVER;
|
||||
if (dmSetMgmtHandle(pArray, TDMT_DND_DROP_VNODE_RSP, mmPutMsgToWriteQueue, 0) == NULL) goto _OVER;
|
||||
if (dmSetMgmtHandle(pArray, TDMT_DND_CONFIG_DNODE_RSP, mmPutMsgToReadQueue, 0) == NULL) goto _OVER;
|
||||
if (dmSetMgmtHandle(pArray, TDMT_DND_ALTER_MNODE_TYPE_RSP, mmPutMsgToReadQueue, 0) == NULL) goto _OVER;
|
||||
if (dmSetMgmtHandle(pArray, TDMT_DND_ALTER_VNODE_TYPE_RSP, mmPutMsgToReadQueue, 0) == NULL) goto _OVER;
|
||||
|
||||
if (dmSetMgmtHandle(pArray, TDMT_MND_CONNECT, mmPutMsgToReadQueue, 0) == NULL) goto _OVER;
|
||||
if (dmSetMgmtHandle(pArray, TDMT_MND_CREATE_ACCT, mmPutMsgToWriteQueue, 0) == NULL) goto _OVER;
|
||||
|
|
|
@ -45,9 +45,11 @@ static void mmBuildOptionForDeploy(SMnodeMgmt *pMgmt, const SMgmtInputOpt *pInpu
|
|||
pOption->dnodeId = pMgmt->pData->dnodeId;
|
||||
pOption->selfIndex = 0;
|
||||
pOption->numOfReplicas = 1;
|
||||
pOption->numOfTotalReplicas = 1;
|
||||
pOption->replicas[0].id = 1;
|
||||
pOption->replicas[0].port = tsServerPort;
|
||||
tstrncpy(pOption->replicas[0].fqdn, tsLocalFqdn, TSDB_FQDN_LEN);
|
||||
pOption->lastIndex = SYNC_INDEX_INVALID;
|
||||
}
|
||||
|
||||
static void mmBuildOptionForOpen(SMnodeMgmt *pMgmt, SMnodeOpt *pOption) {
|
||||
|
@ -123,9 +125,10 @@ static int32_t mmOpen(SMgmtInputOpt *pInput, SMgmtOutputOpt *pOutput) {
|
|||
}
|
||||
tmsgReportStartup("mnode-worker", "initialized");
|
||||
|
||||
if (option.numOfReplicas > 0) {
|
||||
if (option.numOfTotalReplicas > 0) {
|
||||
option.deploy = true;
|
||||
option.numOfReplicas = 0;
|
||||
option.numOfTotalReplicas = 0;
|
||||
if (mmWriteFile(pMgmt->path, &option) != 0) {
|
||||
dError("failed to write mnode file since %s", terrstr());
|
||||
return -1;
|
||||
|
@ -152,6 +155,10 @@ static void mmStop(SMnodeMgmt *pMgmt) {
|
|||
mndStop(pMgmt->pMnode);
|
||||
}
|
||||
|
||||
static int32_t mmSyncIsCatchUp(SMnodeMgmt *pMgmt) {
|
||||
return mndIsCatchUp(pMgmt->pMnode);
|
||||
}
|
||||
|
||||
SMgmtFunc mmGetMgmtFunc() {
|
||||
SMgmtFunc mgmtFunc = {0};
|
||||
mgmtFunc.openFp = mmOpen;
|
||||
|
@ -162,6 +169,7 @@ SMgmtFunc mmGetMgmtFunc() {
|
|||
mgmtFunc.dropFp = (NodeDropFp)mmProcessDropReq;
|
||||
mgmtFunc.requiredFp = mmRequire;
|
||||
mgmtFunc.getHandlesFp = mmGetMsgHandles;
|
||||
mgmtFunc.isCatchUpFp = (NodeIsCatchUpFp)mmSyncIsCatchUp;
|
||||
|
||||
return mgmtFunc;
|
||||
}
|
||||
|
|
|
@ -90,6 +90,7 @@ int32_t vmProcessDropVnodeReq(SVnodeMgmt *pMgmt, SRpcMsg *pMsg);
|
|||
int32_t vmProcessAlterVnodeReplicaReq(SVnodeMgmt *pMgmt, SRpcMsg *pMsg);
|
||||
int32_t vmProcessDisableVnodeWriteReq(SVnodeMgmt *pMgmt, SRpcMsg *pMsg);
|
||||
int32_t vmProcessAlterHashRangeReq(SVnodeMgmt *pMgmt, SRpcMsg *pMsg);
|
||||
int32_t vmProcessAlterVnodeTypeReq(SVnodeMgmt *pMgmt, SRpcMsg *pMsg);
|
||||
|
||||
// vmFile.c
|
||||
int32_t vmGetVnodeListFromFile(SVnodeMgmt *pMgmt, SWrapperCfg **ppCfgs, int32_t *numOfVnodes);
|
||||
|
|
|
@ -131,15 +131,34 @@ static void vmGenerateVnodeCfg(SCreateVnodeReq *pCreate, SVnodeCfg *pCfg) {
|
|||
pCfg->tsdbPageSize = pCreate->tsdbPageSize * 1024;
|
||||
|
||||
pCfg->standby = 0;
|
||||
pCfg->syncCfg.myIndex = pCreate->selfIndex;
|
||||
pCfg->syncCfg.replicaNum = pCreate->replica;
|
||||
pCfg->syncCfg.replicaNum = 0;
|
||||
pCfg->syncCfg.totalReplicaNum = 0;
|
||||
|
||||
memset(&pCfg->syncCfg.nodeInfo, 0, sizeof(pCfg->syncCfg.nodeInfo));
|
||||
for (int32_t i = 0; i < pCreate->replica; ++i) {
|
||||
SNodeInfo *pNode = &pCfg->syncCfg.nodeInfo[i];
|
||||
pNode->nodeId = pCreate->replicas[i].id;
|
||||
pNode->nodePort = pCreate->replicas[i].port;
|
||||
tstrncpy(pNode->nodeFqdn, pCreate->replicas[i].fqdn, TSDB_FQDN_LEN);
|
||||
pNode->nodeId = pCreate->replicas[pCfg->syncCfg.replicaNum].id;
|
||||
pNode->nodePort = pCreate->replicas[pCfg->syncCfg.replicaNum].port;
|
||||
pNode->nodeRole = TAOS_SYNC_ROLE_VOTER;
|
||||
tstrncpy(pNode->nodeFqdn, pCreate->replicas[pCfg->syncCfg.replicaNum].fqdn, TSDB_FQDN_LEN);
|
||||
tmsgUpdateDnodeInfo(&pNode->nodeId, &pNode->clusterId, pNode->nodeFqdn, &pNode->nodePort);
|
||||
pCfg->syncCfg.replicaNum++;
|
||||
}
|
||||
if(pCreate->selfIndex != -1){
|
||||
pCfg->syncCfg.myIndex = pCreate->selfIndex;
|
||||
}
|
||||
for (int32_t i = pCfg->syncCfg.replicaNum; i < pCreate->replica + pCreate->learnerReplica; ++i) {
|
||||
SNodeInfo *pNode = &pCfg->syncCfg.nodeInfo[i];
|
||||
pNode->nodeId = pCreate->learnerReplicas[pCfg->syncCfg.totalReplicaNum].id;
|
||||
pNode->nodePort = pCreate->learnerReplicas[pCfg->syncCfg.totalReplicaNum].port;
|
||||
pNode->nodeRole = TAOS_SYNC_ROLE_LEARNER;
|
||||
tstrncpy(pNode->nodeFqdn, pCreate->learnerReplicas[pCfg->syncCfg.totalReplicaNum].fqdn, TSDB_FQDN_LEN);
|
||||
tmsgUpdateDnodeInfo(&pNode->nodeId, &pNode->clusterId, pNode->nodeFqdn, &pNode->nodePort);
|
||||
pCfg->syncCfg.totalReplicaNum++;
|
||||
}
|
||||
pCfg->syncCfg.totalReplicaNum += pCfg->syncCfg.replicaNum;
|
||||
if(pCreate->learnerSelfIndex != -1){
|
||||
pCfg->syncCfg.myIndex = pCreate->replica + pCreate->learnerSelfIndex;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -182,23 +201,35 @@ int32_t vmProcessCreateVnodeReq(SVnodeMgmt *pMgmt, SRpcMsg *pMsg) {
|
|||
return -1;
|
||||
}
|
||||
|
||||
dInfo("vgId:%d, start to create vnode, page:%d pageSize:%d buffer:%d szPage:%d szBuf:%" PRIu64
|
||||
dInfo("vgId:%d, vnode management handle msgType:%s, start to create vnode, page:%d pageSize:%d buffer:%d szPage:%d szBuf:%" PRIu64
|
||||
", cacheLast:%d cacheLastSize:%d sstTrigger:%d tsdbPageSize:%d %d dbname:%s dbId:%" PRId64
|
||||
", days:%d keep0:%d keep1:%d keep2:%d tsma:%d precision:%d compression:%d minRows:%d maxRows:%d"
|
||||
", wal fsync:%d level:%d retentionPeriod:%d retentionSize:%" PRId64 " rollPeriod:%d segSize:%" PRId64
|
||||
", hash method:%d begin:%u end:%u prefix:%d surfix:%d replica:%d selfIndex:%d strict:%d",
|
||||
req.vgId, req.pages, req.pageSize, req.buffer, req.pageSize * 1024, (uint64_t)req.buffer * 1024 * 1024,
|
||||
", hash method:%d begin:%u end:%u prefix:%d surfix:%d replica:%d selfIndex:%d "
|
||||
"learnerReplica:%d learnerSelfIndex:%d strict:%d",
|
||||
req.vgId, TMSG_INFO(pMsg->msgType), req.pages, req.pageSize, req.buffer, req.pageSize * 1024,
|
||||
(uint64_t)req.buffer * 1024 * 1024,
|
||||
req.cacheLast, req.cacheLastSize, req.sstTrigger, req.tsdbPageSize, req.tsdbPageSize * 1024, req.db, req.dbUid,
|
||||
req.daysPerFile, req.daysToKeep0, req.daysToKeep1, req.daysToKeep2, req.isTsma, req.precision, req.compression,
|
||||
req.minRows, req.maxRows, req.walFsyncPeriod, req.walLevel, req.walRetentionPeriod, req.walRetentionSize,
|
||||
req.walRollPeriod, req.walSegmentSize, req.hashMethod, req.hashBegin, req.hashEnd, req.hashPrefix,
|
||||
req.hashSuffix, req.replica, req.selfIndex, req.strict);
|
||||
req.hashSuffix, req.replica, req.selfIndex, req.learnerReplica, req.learnerSelfIndex, req.strict);
|
||||
for (int32_t i = 0; i < req.replica; ++i) {
|
||||
dInfo("vgId:%d, replica:%d ep:%s:%u dnode:%d", req.vgId, i, req.replicas[i].fqdn, req.replicas[i].port,
|
||||
req.replicas[i].id);
|
||||
}
|
||||
for (int32_t i = 0; i < req.learnerReplica; ++i) {
|
||||
dInfo("vgId:%d, learnerReplica:%d ep:%s:%u dnode:%d", req.vgId, i, req.learnerReplicas[i].fqdn,
|
||||
req.learnerReplicas[i].port, req.replicas[i].id);
|
||||
}
|
||||
|
||||
SReplica *pReplica = &req.replicas[req.selfIndex];
|
||||
SReplica *pReplica = NULL;
|
||||
if(req.selfIndex != -1){
|
||||
pReplica = &req.replicas[req.selfIndex];
|
||||
}
|
||||
else{
|
||||
pReplica = &req.learnerReplicas[req.learnerSelfIndex];
|
||||
}
|
||||
if (pReplica->id != pMgmt->pData->dnodeId || pReplica->port != tsServerPort ||
|
||||
strcmp(pReplica->fqdn, tsLocalFqdn) != 0) {
|
||||
terrno = TSDB_CODE_INVALID_MSG;
|
||||
|
@ -275,7 +306,8 @@ _OVER:
|
|||
vnodeClose(pImpl);
|
||||
vnodeDestroy(path, pMgmt->pTfs);
|
||||
} else {
|
||||
dInfo("vgId:%d, vnode is created", req.vgId);
|
||||
dInfo("vgId:%d, vnode management handle msgType:%s, end to create vnode, vnode is created",
|
||||
req.vgId, TMSG_INFO(pMsg->msgType));
|
||||
}
|
||||
|
||||
tFreeSCreateVnodeReq(&req);
|
||||
|
@ -283,6 +315,106 @@ _OVER:
|
|||
return code;
|
||||
}
|
||||
|
||||
int32_t vmProcessAlterVnodeTypeReq(SVnodeMgmt *pMgmt, SRpcMsg *pMsg) {
|
||||
SAlterVnodeTypeReq req = {0};
|
||||
if (tDeserializeSAlterVnodeReplicaReq(pMsg->pCont, pMsg->contLen, &req) != 0) {
|
||||
terrno = TSDB_CODE_INVALID_MSG;
|
||||
return -1;
|
||||
}
|
||||
|
||||
dInfo("vgId:%d, vnode management handle msgType:%s, start to process alter-node-type-request",
|
||||
req.vgId, TMSG_INFO(pMsg->msgType));
|
||||
|
||||
SVnodeObj *pVnode = vmAcquireVnode(pMgmt, req.vgId);
|
||||
if (pVnode == NULL) {
|
||||
dError("vgId:%d, failed to alter hashrange since %s", req.vgId, terrstr());
|
||||
terrno = TSDB_CODE_VND_NOT_EXIST;
|
||||
return -1;
|
||||
}
|
||||
|
||||
dInfo("vgId:%d, checking node catch up", req.vgId);
|
||||
if(!vnodeIsCatchUp(pVnode->pImpl) == 0){
|
||||
return -1;
|
||||
}
|
||||
|
||||
dInfo("node:%s, catched up leader, continue to process alter-node-type-request", pMgmt->name);
|
||||
|
||||
int32_t vgId = req.vgId;
|
||||
dInfo("vgId:%d, start to alter vnode type replica:%d selfIndex:%d strict:%d", vgId, req.replica, req.selfIndex,
|
||||
req.strict);
|
||||
for (int32_t i = 0; i < req.replica; ++i) {
|
||||
SReplica *pReplica = &req.replicas[i];
|
||||
dInfo("vgId:%d, replica:%d ep:%s:%u dnode:%d", vgId, i, pReplica->fqdn, pReplica->port, pReplica->id);
|
||||
}
|
||||
for (int32_t i = 0; i < req.learnerReplica; ++i) {
|
||||
SReplica *pReplica = &req.learnerReplicas[i];
|
||||
dInfo("vgId:%d, learnerReplicas:%d ep:%s:%u dnode:%d", vgId, i, pReplica->fqdn, pReplica->port, pReplica->id);
|
||||
}
|
||||
|
||||
if (req.replica <= 0 ||
|
||||
(req.selfIndex < 0 && req.learnerSelfIndex <0)||
|
||||
req.selfIndex >= req.replica || req.learnerSelfIndex >= req.learnerReplica) {
|
||||
terrno = TSDB_CODE_INVALID_MSG;
|
||||
dError("vgId:%d, failed to alter replica since invalid msg", vgId);
|
||||
return -1;
|
||||
}
|
||||
|
||||
SReplica *pReplica = NULL;
|
||||
if(req.selfIndex > 0){
|
||||
pReplica = &req.replicas[req.selfIndex];
|
||||
}
|
||||
else{
|
||||
pReplica = &req.learnerReplicas[req.learnerSelfIndex];
|
||||
}
|
||||
|
||||
if (pReplica->id != pMgmt->pData->dnodeId || pReplica->port != tsServerPort ||
|
||||
strcmp(pReplica->fqdn, tsLocalFqdn) != 0) {
|
||||
terrno = TSDB_CODE_INVALID_MSG;
|
||||
dError("vgId:%d, dnodeId:%d ep:%s:%u not matched with local dnode", vgId, pReplica->id, pReplica->fqdn,
|
||||
pReplica->port);
|
||||
return -1;
|
||||
}
|
||||
|
||||
dInfo("vgId:%d, start to close vnode", vgId);
|
||||
SWrapperCfg wrapperCfg = {
|
||||
.dropped = pVnode->dropped,
|
||||
.vgId = pVnode->vgId,
|
||||
.vgVersion = pVnode->vgVersion,
|
||||
};
|
||||
tstrncpy(wrapperCfg.path, pVnode->path, sizeof(wrapperCfg.path));
|
||||
vmCloseVnode(pMgmt, pVnode, false);
|
||||
|
||||
char path[TSDB_FILENAME_LEN] = {0};
|
||||
snprintf(path, TSDB_FILENAME_LEN, "vnode%svnode%d", TD_DIRSEP, vgId);
|
||||
|
||||
dInfo("vgId:%d, start to alter vnode replica at %s", vgId, path);
|
||||
if (vnodeAlterReplica(path, &req, pMgmt->pTfs) < 0) {
|
||||
dError("vgId:%d, failed to alter vnode at %s since %s", vgId, path, terrstr());
|
||||
return -1;
|
||||
}
|
||||
|
||||
dInfo("vgId:%d, begin to open vnode", vgId);
|
||||
SVnode *pImpl = vnodeOpen(path, pMgmt->pTfs, pMgmt->msgCb);
|
||||
if (pImpl == NULL) {
|
||||
dError("vgId:%d, failed to open vnode at %s since %s", vgId, path, terrstr());
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (vmOpenVnode(pMgmt, &wrapperCfg, pImpl) != 0) {
|
||||
dError("vgId:%d, failed to open vnode mgmt since %s", vgId, terrstr());
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (vnodeStart(pImpl) != 0) {
|
||||
dError("vgId:%d, failed to start sync since %s", vgId, terrstr());
|
||||
return -1;
|
||||
}
|
||||
|
||||
dInfo("vgId:%d, vnode management handle msgType:%s, end to process alter-node-type-request, vnode config is altered",
|
||||
req.vgId, TMSG_INFO(pMsg->msgType));
|
||||
return 0;
|
||||
}
|
||||
|
||||
int32_t vmProcessDisableVnodeWriteReq(SVnodeMgmt *pMgmt, SRpcMsg *pMsg) {
|
||||
SDisableVnodeWriteReq req = {0};
|
||||
if (tDeserializeSDisableVnodeWriteReq(pMsg->pCont, pMsg->contLen, &req) != 0) {
|
||||
|
@ -378,20 +510,35 @@ int32_t vmProcessAlterVnodeReplicaReq(SVnodeMgmt *pMgmt, SRpcMsg *pMsg) {
|
|||
}
|
||||
|
||||
int32_t vgId = alterReq.vgId;
|
||||
dInfo("vgId:%d, start to alter vnode replica:%d selfIndex:%d strict:%d", vgId, alterReq.replica, alterReq.selfIndex,
|
||||
alterReq.strict);
|
||||
dInfo("vgId:%d,vnode management handle msgType:%s, start to alter vnode replica:%d selfIndex:%d leanerReplica:%d "
|
||||
"learnerSelfIndex:%d strict:%d",
|
||||
vgId, TMSG_INFO(pMsg->msgType), alterReq.replica, alterReq.selfIndex, alterReq.learnerReplica,
|
||||
alterReq.learnerSelfIndex, alterReq.strict);
|
||||
for (int32_t i = 0; i < alterReq.replica; ++i) {
|
||||
SReplica *pReplica = &alterReq.replicas[i];
|
||||
dInfo("vgId:%d, replica:%d ep:%s:%u dnode:%d", vgId, i, pReplica->fqdn, pReplica->port, pReplica->port);
|
||||
}
|
||||
|
||||
if (alterReq.replica <= 0 || alterReq.selfIndex < 0 || alterReq.selfIndex >= alterReq.replica) {
|
||||
for (int32_t i = 0; i < alterReq.learnerReplica; ++i) {
|
||||
SReplica *pReplica = &alterReq.learnerReplicas[i];
|
||||
dInfo("vgId:%d, learnerReplicas:%d ep:%s:%u dnode:%d", vgId, i, pReplica->fqdn, pReplica->port, pReplica->port);
|
||||
}
|
||||
|
||||
if (alterReq.replica <= 0 ||
|
||||
(alterReq.selfIndex < 0 && alterReq.learnerSelfIndex <0)||
|
||||
alterReq.selfIndex >= alterReq.replica || alterReq.learnerSelfIndex >= alterReq.learnerReplica) {
|
||||
terrno = TSDB_CODE_INVALID_MSG;
|
||||
dError("vgId:%d, failed to alter replica since invalid msg", vgId);
|
||||
return -1;
|
||||
}
|
||||
|
||||
SReplica *pReplica = &alterReq.replicas[alterReq.selfIndex];
|
||||
SReplica *pReplica = NULL;
|
||||
if(alterReq.selfIndex != -1){
|
||||
pReplica = &alterReq.replicas[alterReq.selfIndex];
|
||||
}
|
||||
else{
|
||||
pReplica = &alterReq.learnerReplicas[alterReq.learnerSelfIndex];
|
||||
}
|
||||
|
||||
if (pReplica->id != pMgmt->pData->dnodeId || pReplica->port != tsServerPort ||
|
||||
strcmp(pReplica->fqdn, tsLocalFqdn) != 0) {
|
||||
terrno = TSDB_CODE_INVALID_MSG;
|
||||
|
@ -425,7 +572,7 @@ int32_t vmProcessAlterVnodeReplicaReq(SVnodeMgmt *pMgmt, SRpcMsg *pMsg) {
|
|||
return -1;
|
||||
}
|
||||
|
||||
dInfo("vgId:%d, close vnode", vgId);
|
||||
dInfo("vgId:%d, begin to open vnode", vgId);
|
||||
SVnode *pImpl = vnodeOpen(path, pMgmt->pTfs, pMgmt->msgCb);
|
||||
if (pImpl == NULL) {
|
||||
dError("vgId:%d, failed to open vnode at %s since %s", vgId, path, terrstr());
|
||||
|
@ -442,7 +589,10 @@ int32_t vmProcessAlterVnodeReplicaReq(SVnodeMgmt *pMgmt, SRpcMsg *pMsg) {
|
|||
return -1;
|
||||
}
|
||||
|
||||
dInfo("vgId:%d, vnode config is altered", vgId);
|
||||
dInfo("vgId:%d, vnode management handle msgType:%s, end to alter vnode replica:%d selfIndex:%d leanerReplica:%d "
|
||||
"learnerSelfIndex:%d strict:%d",
|
||||
vgId, TMSG_INFO(pMsg->msgType), alterReq.replica, alterReq.selfIndex, alterReq.learnerReplica,
|
||||
alterReq.learnerSelfIndex, alterReq.strict);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -548,6 +698,7 @@ SArray *vmGetMsgHandles() {
|
|||
if (dmSetMgmtHandle(pArray, TDMT_VND_TRIM, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER;
|
||||
if (dmSetMgmtHandle(pArray, TDMT_DND_CREATE_VNODE, vmPutMsgToMgmtQueue, 0) == NULL) goto _OVER;
|
||||
if (dmSetMgmtHandle(pArray, TDMT_DND_DROP_VNODE, vmPutMsgToMgmtQueue, 0) == NULL) goto _OVER;
|
||||
if (dmSetMgmtHandle(pArray, TDMT_DND_ALTER_VNODE_TYPE, vmPutMsgToMgmtQueue, 0) == NULL) goto _OVER;
|
||||
|
||||
if (dmSetMgmtHandle(pArray, TDMT_SYNC_TIMEOUT_ELECTION, vmPutMsgToSyncQueue, 0) == NULL) goto _OVER;
|
||||
if (dmSetMgmtHandle(pArray, TDMT_SYNC_CLIENT_REQUEST, vmPutMsgToSyncQueue, 0) == NULL) goto _OVER;
|
||||
|
|
|
@ -49,6 +49,9 @@ static void vmProcessMgmtQueue(SQueueInfo *pInfo, SRpcMsg *pMsg) {
|
|||
case TDMT_VND_ALTER_HASHRANGE:
|
||||
code = vmProcessAlterHashRangeReq(pMgmt, pMsg);
|
||||
break;
|
||||
case TDMT_DND_ALTER_VNODE_TYPE:
|
||||
code = vmProcessAlterVnodeTypeReq(pMgmt, pMsg);
|
||||
break;
|
||||
default:
|
||||
terrno = TSDB_CODE_MSG_NOT_PROCESSED;
|
||||
dGError("msg:%p, not processed in vnode-mgmt queue", pMsg);
|
||||
|
|
|
@ -169,6 +169,8 @@ static int32_t dmProcessCreateNodeReq(EDndNodeType ntype, SRpcMsg *pMsg) {
|
|||
return -1;
|
||||
}
|
||||
|
||||
dInfo("start to process create-node-request");
|
||||
|
||||
pWrapper = &pDnode->wrappers[ntype];
|
||||
if (taosMkDir(pWrapper->path) != 0) {
|
||||
dmReleaseWrapper(pWrapper);
|
||||
|
@ -198,6 +200,64 @@ static int32_t dmProcessCreateNodeReq(EDndNodeType ntype, SRpcMsg *pMsg) {
|
|||
return code;
|
||||
}
|
||||
|
||||
static int32_t dmProcessAlterNodeTypeReq(EDndNodeType ntype, SRpcMsg *pMsg) {
|
||||
SDnode *pDnode = dmInstance();
|
||||
|
||||
SMgmtWrapper *pWrapper = dmAcquireWrapper(pDnode, ntype);
|
||||
if (pWrapper == NULL) {
|
||||
dError("fail to process alter node type since node not exist");
|
||||
return -1;
|
||||
}
|
||||
dmReleaseWrapper(pWrapper);
|
||||
|
||||
dInfo("node:%s, start to process alter-node-type-request", pWrapper->name);
|
||||
|
||||
pWrapper = &pDnode->wrappers[ntype];
|
||||
|
||||
if(pWrapper->func.isCatchUpFp != NULL){
|
||||
dInfo("node:%s, checking node catch up", pWrapper->name);
|
||||
if(!(*pWrapper->func.isCatchUpFp)(pWrapper->pMgmt) == 0){
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
dInfo("node:%s, catched up leader, continue to process alter-node-type-request", pWrapper->name);
|
||||
|
||||
taosThreadMutexLock(&pDnode->mutex);
|
||||
|
||||
dInfo("node:%s, stopping node", pWrapper->name);
|
||||
dmStopNode(pWrapper);
|
||||
dInfo("node:%s, closing node", pWrapper->name);
|
||||
dmCloseNode(pWrapper);
|
||||
|
||||
pWrapper = &pDnode->wrappers[ntype];
|
||||
if (taosMkDir(pWrapper->path) != 0) {
|
||||
dmReleaseWrapper(pWrapper);
|
||||
terrno = TAOS_SYSTEM_ERROR(errno);
|
||||
dError("failed to create dir:%s since %s", pWrapper->path, terrstr());
|
||||
return -1;
|
||||
}
|
||||
|
||||
SMgmtInputOpt input = dmBuildMgmtInputOpt(pWrapper);
|
||||
|
||||
dInfo("node:%s, start to create", pWrapper->name);
|
||||
int32_t code = (*pWrapper->func.createFp)(&input, pMsg);
|
||||
if (code != 0) {
|
||||
dError("node:%s, failed to create since %s", pWrapper->name, terrstr());
|
||||
} else {
|
||||
dInfo("node:%s, has been created", pWrapper->name);
|
||||
code = dmOpenNode(pWrapper);
|
||||
if (code == 0) {
|
||||
code = dmStartNode(pWrapper);
|
||||
}
|
||||
pWrapper->deployed = true;
|
||||
pWrapper->required = true;
|
||||
}
|
||||
|
||||
taosThreadMutexUnlock(&pDnode->mutex);
|
||||
return code;
|
||||
}
|
||||
|
||||
static int32_t dmProcessDropNodeReq(EDndNodeType ntype, SRpcMsg *pMsg) {
|
||||
SDnode *pDnode = dmInstance();
|
||||
|
||||
|
@ -251,6 +311,7 @@ SMgmtInputOpt dmBuildMgmtInputOpt(SMgmtWrapper *pWrapper) {
|
|||
.name = pWrapper->name,
|
||||
.pData = &pWrapper->pDnode->data,
|
||||
.processCreateNodeFp = dmProcessCreateNodeReq,
|
||||
.processAlterNodeTypeFp = dmProcessAlterNodeTypeReq,
|
||||
.processDropNodeFp = dmProcessDropNodeReq,
|
||||
.sendMonitorReportFp = dmSendMonitorReport,
|
||||
.getVnodeLoadsFp = dmGetVnodeLoads,
|
||||
|
|
|
@ -89,6 +89,7 @@ typedef void (*SendMonitorReportFp)();
|
|||
typedef void (*GetVnodeLoadsFp)(SMonVloadInfo *pInfo);
|
||||
typedef void (*GetMnodeLoadsFp)(SMonMloadInfo *pInfo);
|
||||
typedef void (*GetQnodeLoadsFp)(SQnodeLoad *pInfo);
|
||||
typedef int32_t (*ProcessAlterNodeTypeFp)(EDndNodeType ntype, SRpcMsg *pMsg);
|
||||
|
||||
typedef struct {
|
||||
int32_t dnodeId;
|
||||
|
@ -112,6 +113,7 @@ typedef struct {
|
|||
SDnodeData *pData;
|
||||
SMsgCb msgCb;
|
||||
ProcessCreateNodeFp processCreateNodeFp;
|
||||
ProcessAlterNodeTypeFp processAlterNodeTypeFp;
|
||||
ProcessDropNodeFp processDropNodeFp;
|
||||
SendMonitorReportFp sendMonitorReportFp;
|
||||
GetVnodeLoadsFp getVnodeLoadsFp;
|
||||
|
@ -132,6 +134,7 @@ typedef int32_t (*NodeCreateFp)(const SMgmtInputOpt *pInput, SRpcMsg *pMsg);
|
|||
typedef int32_t (*NodeDropFp)(const SMgmtInputOpt *pInput, SRpcMsg *pMsg);
|
||||
typedef int32_t (*NodeRequireFp)(const SMgmtInputOpt *pInput, bool *required);
|
||||
typedef SArray *(*NodeGetHandlesFp)(); // array of SMgmtHandle
|
||||
typedef bool (*NodeIsCatchUpFp)(void *pMgmt);
|
||||
|
||||
typedef struct {
|
||||
NodeOpenFp openFp;
|
||||
|
@ -142,6 +145,7 @@ typedef struct {
|
|||
NodeDropFp dropFp;
|
||||
NodeRequireFp requiredFp;
|
||||
NodeGetHandlesFp getHandlesFp;
|
||||
NodeIsCatchUpFp isCatchUpFp;
|
||||
} SMgmtFunc;
|
||||
|
||||
typedef struct {
|
||||
|
|
|
@ -215,6 +215,8 @@ typedef struct {
|
|||
bool syncRestore;
|
||||
int64_t stateStartTime;
|
||||
SDnodeObj* pDnode;
|
||||
int32_t role;
|
||||
SyncIndex lastIndex;
|
||||
} SMnodeObj;
|
||||
|
||||
typedef struct {
|
||||
|
@ -341,6 +343,7 @@ typedef struct {
|
|||
ESyncState syncState;
|
||||
bool syncRestore;
|
||||
bool syncCanRead;
|
||||
ESyncRole nodeRole;
|
||||
} SVnodeGid;
|
||||
|
||||
typedef struct {
|
||||
|
@ -361,7 +364,7 @@ typedef struct {
|
|||
int8_t compact;
|
||||
int8_t isTsma;
|
||||
int8_t replica;
|
||||
SVnodeGid vnodeGid[TSDB_MAX_REPLICA];
|
||||
SVnodeGid vnodeGid[TSDB_MAX_REPLICA + TSDB_MAX_LEARNER_REPLICA];
|
||||
void* pTsma;
|
||||
int32_t numOfCachedTables;
|
||||
} SVgObj;
|
||||
|
|
|
@ -92,8 +92,11 @@ typedef struct {
|
|||
int64_t transSeq;
|
||||
TdThreadMutex lock;
|
||||
int8_t selfIndex;
|
||||
int8_t numOfTotalReplicas;
|
||||
int8_t numOfReplicas;
|
||||
SReplica replicas[TSDB_MAX_REPLICA];
|
||||
SReplica replicas[TSDB_MAX_REPLICA + TSDB_MAX_LEARNER_REPLICA];
|
||||
ESyncRole nodeRoles[TSDB_MAX_REPLICA + TSDB_MAX_LEARNER_REPLICA];
|
||||
SyncIndex lastIndex;
|
||||
} SSyncMgmt;
|
||||
|
||||
typedef struct {
|
||||
|
|
|
@ -491,7 +491,10 @@ static void mndSetOptions(SMnode *pMnode, const SMnodeOpt *pOption) {
|
|||
pMnode->selfDnodeId = pOption->dnodeId;
|
||||
pMnode->syncMgmt.selfIndex = pOption->selfIndex;
|
||||
pMnode->syncMgmt.numOfReplicas = pOption->numOfReplicas;
|
||||
pMnode->syncMgmt.numOfTotalReplicas = pOption->numOfTotalReplicas;
|
||||
pMnode->syncMgmt.lastIndex = pOption->lastIndex;
|
||||
memcpy(pMnode->syncMgmt.replicas, pOption->replicas, sizeof(pOption->replicas));
|
||||
memcpy(pMnode->syncMgmt.nodeRoles, pOption->nodeRoles, sizeof(pOption->nodeRoles));
|
||||
}
|
||||
|
||||
SMnode *mndOpen(const char *path, const SMnodeOpt *pOption) {
|
||||
|
@ -582,6 +585,11 @@ int32_t mndStart(SMnode *pMnode) {
|
|||
return mndInitTimer(pMnode);
|
||||
}
|
||||
|
||||
int32_t mndIsCatchUp(SMnode *pMnode) {
|
||||
int64_t rid = pMnode->syncMgmt.sync;
|
||||
return syncIsCatchUp(rid);
|
||||
}
|
||||
|
||||
void mndStop(SMnode *pMnode) {
|
||||
mndSetStop(pMnode);
|
||||
mndSyncStop(pMnode);
|
||||
|
|
|
@ -23,7 +23,7 @@
|
|||
#include "mndTrans.h"
|
||||
#include "tmisce.h"
|
||||
|
||||
#define MNODE_VER_NUMBER 1
|
||||
#define MNODE_VER_NUMBER 2
|
||||
#define MNODE_RESERVE_SIZE 64
|
||||
|
||||
static int32_t mndCreateDefaultMnode(SMnode *pMnode);
|
||||
|
@ -53,6 +53,7 @@ int32_t mndInitMnode(SMnode *pMnode) {
|
|||
|
||||
mndSetMsgHandle(pMnode, TDMT_MND_CREATE_MNODE, mndProcessCreateMnodeReq);
|
||||
mndSetMsgHandle(pMnode, TDMT_DND_CREATE_MNODE_RSP, mndTransProcessRsp);
|
||||
mndSetMsgHandle(pMnode, TDMT_DND_ALTER_MNODE_TYPE_RSP, mndTransProcessRsp);
|
||||
mndSetMsgHandle(pMnode, TDMT_MND_ALTER_MNODE, mndProcessAlterMnodeReq);
|
||||
mndSetMsgHandle(pMnode, TDMT_MND_ALTER_MNODE_RSP, mndTransProcessRsp);
|
||||
mndSetMsgHandle(pMnode, TDMT_MND_DROP_MNODE, mndProcessDropMnodeReq);
|
||||
|
@ -126,6 +127,8 @@ static SSdbRaw *mndMnodeActionEncode(SMnodeObj *pObj) {
|
|||
SDB_SET_INT32(pRaw, dataPos, pObj->id, _OVER)
|
||||
SDB_SET_INT64(pRaw, dataPos, pObj->createdTime, _OVER)
|
||||
SDB_SET_INT64(pRaw, dataPos, pObj->updateTime, _OVER)
|
||||
SDB_SET_INT32(pRaw, dataPos, pObj->role, _OVER)
|
||||
SDB_SET_INT64(pRaw, dataPos, pObj->lastIndex, _OVER)
|
||||
SDB_SET_RESERVE(pRaw, dataPos, MNODE_RESERVE_SIZE, _OVER)
|
||||
|
||||
terrno = 0;
|
||||
|
@ -149,7 +152,7 @@ static SSdbRow *mndMnodeActionDecode(SSdbRaw *pRaw) {
|
|||
int8_t sver = 0;
|
||||
if (sdbGetRawSoftVer(pRaw, &sver) != 0) return NULL;
|
||||
|
||||
if (sver != MNODE_VER_NUMBER) {
|
||||
if (sver != 1 && sver != 2) {
|
||||
terrno = TSDB_CODE_SDB_INVALID_DATA_VER;
|
||||
goto _OVER;
|
||||
}
|
||||
|
@ -164,6 +167,10 @@ static SSdbRow *mndMnodeActionDecode(SSdbRaw *pRaw) {
|
|||
SDB_GET_INT32(pRaw, dataPos, &pObj->id, _OVER)
|
||||
SDB_GET_INT64(pRaw, dataPos, &pObj->createdTime, _OVER)
|
||||
SDB_GET_INT64(pRaw, dataPos, &pObj->updateTime, _OVER)
|
||||
if(sver >=2){
|
||||
SDB_GET_INT32(pRaw, dataPos, &pObj->role, _OVER)
|
||||
SDB_GET_INT64(pRaw, dataPos, &pObj->lastIndex, _OVER)
|
||||
}
|
||||
SDB_GET_RESERVE(pRaw, dataPos, MNODE_RESERVE_SIZE, _OVER)
|
||||
|
||||
terrno = 0;
|
||||
|
@ -204,7 +211,9 @@ static int32_t mndMnodeActionDelete(SSdb *pSdb, SMnodeObj *pObj) {
|
|||
|
||||
static int32_t mndMnodeActionUpdate(SSdb *pSdb, SMnodeObj *pOld, SMnodeObj *pNew) {
|
||||
mTrace("mnode:%d, perform update action, old row:%p new row:%p", pOld->id, pOld, pNew);
|
||||
pOld->role = pNew->role;
|
||||
pOld->updateTime = pNew->updateTime;
|
||||
pOld->lastIndex = pNew->lastIndex;
|
||||
mndReloadSyncConfig(pSdb->pMnode);
|
||||
|
||||
return 0;
|
||||
|
@ -302,6 +311,27 @@ static int32_t mndBuildCreateMnodeRedoAction(STrans *pTrans, SDCreateMnodeReq *p
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int32_t mndBuildAlterMnodeTypeRedoAction(STrans *pTrans,
|
||||
SDAlterMnodeTypeReq *pAlterMnodeTypeReq, SEpSet *pAlterMnodeTypeEpSet) {
|
||||
int32_t contLen = tSerializeSDCreateMnodeReq(NULL, 0, pAlterMnodeTypeReq);
|
||||
void *pReq = taosMemoryMalloc(contLen);
|
||||
tSerializeSDCreateMnodeReq(pReq, contLen, pAlterMnodeTypeReq);
|
||||
|
||||
STransAction action = {
|
||||
.epSet = *pAlterMnodeTypeEpSet,
|
||||
.pCont = pReq,
|
||||
.contLen = contLen,
|
||||
.msgType = TDMT_DND_ALTER_MNODE_TYPE,
|
||||
.acceptableCode = TSDB_CODE_MNODE_ALREADY_DEPLOYED,
|
||||
};
|
||||
|
||||
if (mndTransAppendRedoAction(pTrans, &action) != 0) {
|
||||
taosMemoryFree(pReq);
|
||||
return -1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int32_t mndBuildAlterMnodeRedoAction(STrans *pTrans, SDCreateMnodeReq *pAlterReq, SEpSet *pAlterEpSet) {
|
||||
int32_t contLen = tSerializeSDCreateMnodeReq(NULL, 0, pAlterReq);
|
||||
void *pReq = taosMemoryMalloc(contLen);
|
||||
|
@ -347,6 +377,7 @@ static int32_t mndSetCreateMnodeRedoActions(SMnode *pMnode, STrans *pTrans, SDno
|
|||
SSdb *pSdb = pMnode->pSdb;
|
||||
void *pIter = NULL;
|
||||
int32_t numOfReplicas = 0;
|
||||
int32_t numOfLearnerReplicas = 0;
|
||||
SDCreateMnodeReq createReq = {0};
|
||||
SEpSet createEpset = {0};
|
||||
|
||||
|
@ -355,18 +386,29 @@ static int32_t mndSetCreateMnodeRedoActions(SMnode *pMnode, STrans *pTrans, SDno
|
|||
pIter = sdbFetch(pSdb, SDB_MNODE, pIter, (void **)&pMObj);
|
||||
if (pIter == NULL) break;
|
||||
|
||||
createReq.replicas[numOfReplicas].id = pMObj->id;
|
||||
createReq.replicas[numOfReplicas].port = pMObj->pDnode->port;
|
||||
memcpy(createReq.replicas[numOfReplicas].fqdn, pMObj->pDnode->fqdn, TSDB_FQDN_LEN);
|
||||
if(pMObj->role == TAOS_SYNC_ROLE_VOTER){
|
||||
createReq.replicas[numOfReplicas].id = pMObj->id;
|
||||
createReq.replicas[numOfReplicas].port = pMObj->pDnode->port;
|
||||
memcpy(createReq.replicas[numOfReplicas].fqdn, pMObj->pDnode->fqdn, TSDB_FQDN_LEN);
|
||||
numOfReplicas++;
|
||||
}
|
||||
else{
|
||||
createReq.learnerReplicas[numOfLearnerReplicas].id = pMObj->id;
|
||||
createReq.learnerReplicas[numOfLearnerReplicas].port = pMObj->pDnode->port;
|
||||
memcpy(createReq.learnerReplicas[numOfLearnerReplicas].fqdn, pMObj->pDnode->fqdn, TSDB_FQDN_LEN);
|
||||
numOfLearnerReplicas++;
|
||||
}
|
||||
|
||||
numOfReplicas++;
|
||||
sdbRelease(pSdb, pMObj);
|
||||
}
|
||||
|
||||
createReq.replica = numOfReplicas + 1;
|
||||
createReq.replicas[numOfReplicas].id = pDnode->id;
|
||||
createReq.replicas[numOfReplicas].port = pDnode->port;
|
||||
memcpy(createReq.replicas[numOfReplicas].fqdn, pDnode->fqdn, TSDB_FQDN_LEN);
|
||||
createReq.replica = numOfReplicas;
|
||||
createReq.learnerReplica = numOfLearnerReplicas + 1;
|
||||
createReq.learnerReplicas[numOfLearnerReplicas].id = pDnode->id;
|
||||
createReq.learnerReplicas[numOfLearnerReplicas].port = pDnode->port;
|
||||
memcpy(createReq.learnerReplicas[numOfLearnerReplicas].fqdn, pDnode->fqdn, TSDB_FQDN_LEN);
|
||||
|
||||
createReq.lastIndex = pObj->lastIndex;
|
||||
|
||||
createEpset.inUse = 0;
|
||||
createEpset.numOfEps = 1;
|
||||
|
@ -378,23 +420,78 @@ static int32_t mndSetCreateMnodeRedoActions(SMnode *pMnode, STrans *pTrans, SDno
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int32_t mndSetAlterMnodeTypeRedoActions(SMnode *pMnode, STrans *pTrans, SDnodeObj *pDnode, SMnodeObj *pObj) {
|
||||
SSdb *pSdb = pMnode->pSdb;
|
||||
void *pIter = NULL;
|
||||
SDAlterMnodeTypeReq alterReq = {0};
|
||||
SEpSet createEpset = {0};
|
||||
|
||||
while (1) {
|
||||
SMnodeObj *pMObj = NULL;
|
||||
pIter = sdbFetch(pSdb, SDB_MNODE, pIter, (void **)&pMObj);
|
||||
if (pIter == NULL) break;
|
||||
|
||||
if(pMObj->role == TAOS_SYNC_ROLE_VOTER){
|
||||
alterReq.replicas[alterReq.replica].id = pMObj->id;
|
||||
alterReq.replicas[alterReq.replica].port = pMObj->pDnode->port;
|
||||
memcpy(alterReq.replicas[alterReq.replica].fqdn, pMObj->pDnode->fqdn, TSDB_FQDN_LEN);
|
||||
alterReq.replica++;
|
||||
}
|
||||
else{
|
||||
alterReq.learnerReplicas[alterReq.learnerReplica].id = pMObj->id;
|
||||
alterReq.learnerReplicas[alterReq.learnerReplica].port = pMObj->pDnode->port;
|
||||
memcpy(alterReq.learnerReplicas[alterReq.learnerReplica].fqdn, pMObj->pDnode->fqdn, TSDB_FQDN_LEN);
|
||||
alterReq.learnerReplica++;
|
||||
}
|
||||
|
||||
sdbRelease(pSdb, pMObj);
|
||||
}
|
||||
|
||||
alterReq.replicas[alterReq.replica].id = pDnode->id;
|
||||
alterReq.replicas[alterReq.replica].port = pDnode->port;
|
||||
memcpy(alterReq.replicas[alterReq.replica].fqdn, pDnode->fqdn, TSDB_FQDN_LEN);
|
||||
alterReq.replica++;
|
||||
|
||||
alterReq.lastIndex = pObj->lastIndex;
|
||||
|
||||
createEpset.inUse = 0;
|
||||
createEpset.numOfEps = 1;
|
||||
createEpset.eps[0].port = pDnode->port;
|
||||
memcpy(createEpset.eps[0].fqdn, pDnode->fqdn, TSDB_FQDN_LEN);
|
||||
|
||||
if (mndBuildAlterMnodeTypeRedoAction(pTrans, &alterReq, &createEpset) != 0) return -1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int32_t mndCreateMnode(SMnode *pMnode, SRpcMsg *pReq, SDnodeObj *pDnode, SMCreateMnodeReq *pCreate) {
|
||||
int32_t code = -1;
|
||||
|
||||
SMnodeObj mnodeObj = {0};
|
||||
mnodeObj.id = pDnode->id;
|
||||
mnodeObj.createdTime = taosGetTimestampMs();
|
||||
mnodeObj.updateTime = mnodeObj.createdTime;
|
||||
|
||||
STrans *pTrans = mndTransCreate(pMnode, TRN_POLICY_RETRY, TRN_CONFLICT_GLOBAL, pReq, "create-mnode");
|
||||
if (pTrans == NULL) goto _OVER;
|
||||
mndTransSetSerial(pTrans);
|
||||
mInfo("trans:%d, used to create mnode:%d", pTrans->id, pCreate->dnodeId);
|
||||
if (mndTrancCheckConflict(pMnode, pTrans) != 0) goto _OVER;
|
||||
|
||||
SMnodeObj mnodeObj = {0};
|
||||
mnodeObj.id = pDnode->id;
|
||||
mnodeObj.createdTime = taosGetTimestampMs();
|
||||
mnodeObj.updateTime = mnodeObj.createdTime;
|
||||
mnodeObj.role = TAOS_SYNC_ROLE_LEARNER;
|
||||
mnodeObj.lastIndex = pMnode->applied;
|
||||
|
||||
if (mndSetCreateMnodeRedoActions(pMnode, pTrans, pDnode, &mnodeObj) != 0) goto _OVER;
|
||||
if (mndSetCreateMnodeRedoLogs(pMnode, pTrans, &mnodeObj) != 0) goto _OVER;
|
||||
if (mndSetCreateMnodeCommitLogs(pMnode, pTrans, &mnodeObj) != 0) goto _OVER;
|
||||
|
||||
SMnodeObj mnodeLeaderObj = {0};
|
||||
mnodeLeaderObj.id = pDnode->id;
|
||||
mnodeLeaderObj.createdTime = taosGetTimestampMs();
|
||||
mnodeLeaderObj.updateTime = mnodeLeaderObj.createdTime;
|
||||
mnodeLeaderObj.role = TAOS_SYNC_ROLE_VOTER;
|
||||
mnodeLeaderObj.lastIndex = pMnode->applied + 1;
|
||||
|
||||
if (mndSetAlterMnodeTypeRedoActions(pMnode, pTrans, pDnode, &mnodeLeaderObj) != 0) goto _OVER;
|
||||
if (mndSetCreateMnodeCommitLogs(pMnode, pTrans, &mnodeLeaderObj) != 0) goto _OVER;
|
||||
if (mndTransPrepare(pMnode, pTrans) != 0) goto _OVER;
|
||||
|
||||
code = 0;
|
||||
|
@ -514,6 +611,7 @@ static int32_t mndSetDropMnodeRedoActions(SMnode *pMnode, STrans *pTrans, SDnode
|
|||
|
||||
int32_t mndSetDropMnodeInfoToTrans(SMnode *pMnode, STrans *pTrans, SMnodeObj *pObj, bool force) {
|
||||
if (pObj == NULL) return 0;
|
||||
pObj->lastIndex = pMnode->applied;
|
||||
if (mndSetDropMnodeRedoActions(pMnode, pTrans, pObj->pDnode, pObj, force) != 0) return -1;
|
||||
if (mndSetDropMnodeCommitLogs(pMnode, pTrans, pObj) != 0) return -1;
|
||||
return 0;
|
||||
|
@ -730,7 +828,8 @@ static void mndReloadSyncConfig(SMnode *pMnode) {
|
|||
void *pIter = NULL;
|
||||
int32_t updatingMnodes = 0;
|
||||
int32_t readyMnodes = 0;
|
||||
SSyncCfg cfg = {.myIndex = -1};
|
||||
SSyncCfg cfg = {.myIndex = -1, .lastIndex = 0,};
|
||||
SyncIndex maxIndex = 0;
|
||||
|
||||
while (1) {
|
||||
pIter = sdbFetchAll(pSdb, SDB_MNODE, pIter, (void **)&pObj, &objStatus, false);
|
||||
|
@ -745,26 +844,41 @@ static void mndReloadSyncConfig(SMnode *pMnode) {
|
|||
}
|
||||
|
||||
if (objStatus == SDB_STATUS_READY || objStatus == SDB_STATUS_CREATING) {
|
||||
SNodeInfo *pNode = &cfg.nodeInfo[cfg.replicaNum];
|
||||
SNodeInfo *pNode = &cfg.nodeInfo[cfg.totalReplicaNum];
|
||||
pNode->nodeId = pObj->pDnode->id;
|
||||
pNode->clusterId = mndGetClusterId(pMnode);
|
||||
pNode->nodePort = pObj->pDnode->port;
|
||||
pNode->nodeRole = pObj->role;
|
||||
tstrncpy(pNode->nodeFqdn, pObj->pDnode->fqdn, TSDB_FQDN_LEN);
|
||||
(void)tmsgUpdateDnodeInfo(&pNode->nodeId, &pNode->clusterId, pNode->nodeFqdn, &pNode->nodePort);
|
||||
mInfo("vgId:1, ep:%s:%u dnode:%d", pNode->nodeFqdn, pNode->nodePort, pNode->nodeId);
|
||||
if (pObj->pDnode->id == pMnode->selfDnodeId) {
|
||||
cfg.myIndex = cfg.replicaNum;
|
||||
cfg.myIndex = cfg.totalReplicaNum;
|
||||
}
|
||||
if(pNode->nodeRole == TAOS_SYNC_ROLE_VOTER){
|
||||
cfg.replicaNum++;
|
||||
}
|
||||
cfg.totalReplicaNum++;
|
||||
if(pObj->lastIndex > cfg.lastIndex){
|
||||
cfg.lastIndex = pObj->lastIndex;
|
||||
}
|
||||
cfg.replicaNum++;
|
||||
}
|
||||
|
||||
if (objStatus == SDB_STATUS_DROPPING) {
|
||||
if(pObj->lastIndex > cfg.lastIndex){
|
||||
cfg.lastIndex = pObj->lastIndex;
|
||||
}
|
||||
}
|
||||
|
||||
mInfo("vgId:1, mnode:%d, role:%d, lastIndex:%" PRId64, pObj->id, pObj->role, pObj->lastIndex);
|
||||
|
||||
sdbReleaseLock(pSdb, pObj, false);
|
||||
}
|
||||
|
||||
if (readyMnodes <= 0 || updatingMnodes <= 0) {
|
||||
mInfo("vgId:1, mnode sync not reconfig since readyMnodes:%d updatingMnodes:%d", readyMnodes, updatingMnodes);
|
||||
return;
|
||||
}
|
||||
//if (readyMnodes <= 0 || updatingMnodes <= 0) {
|
||||
// mInfo("vgId:1, mnode sync not reconfig since readyMnodes:%d updatingMnodes:%d", readyMnodes, updatingMnodes);
|
||||
// return;
|
||||
//}
|
||||
|
||||
if (cfg.myIndex == -1) {
|
||||
#if 1
|
||||
|
@ -777,12 +891,14 @@ static void mndReloadSyncConfig(SMnode *pMnode) {
|
|||
return;
|
||||
}
|
||||
|
||||
if (updatingMnodes > 0) {
|
||||
mInfo("vgId:1, mnode sync reconfig, replica:%d myIndex:%d", cfg.replicaNum, cfg.myIndex);
|
||||
for (int32_t i = 0; i < cfg.replicaNum; ++i) {
|
||||
if (pMnode->syncMgmt.sync > 0) {
|
||||
mInfo("vgId:1, mnode sync reconfig, totalReplica:%d replica:%d myIndex:%d",
|
||||
cfg.totalReplicaNum, cfg.replicaNum, cfg.myIndex);
|
||||
|
||||
for (int32_t i = 0; i < cfg.totalReplicaNum; ++i) {
|
||||
SNodeInfo *pNode = &cfg.nodeInfo[i];
|
||||
mInfo("vgId:1, index:%d, ep:%s:%u dnode:%d cluster:%" PRId64, i, pNode->nodeFqdn, pNode->nodePort, pNode->nodeId,
|
||||
pNode->clusterId);
|
||||
mInfo("vgId:1, index:%d, ep:%s:%u dnode:%d cluster:%" PRId64 " role:%d", i, pNode->nodeFqdn, pNode->nodePort,
|
||||
pNode->nodeId, pNode->clusterId, pNode->nodeRole);
|
||||
}
|
||||
|
||||
int32_t code = syncReconfig(pMnode->syncMgmt.sync, &cfg);
|
||||
|
|
|
@ -237,6 +237,23 @@ static void mndBecomeFollower(const SSyncFSM *pFsm) {
|
|||
taosThreadMutexUnlock(&pMgmt->lock);
|
||||
}
|
||||
|
||||
static void mndBecomeLearner(const SSyncFSM *pFsm) {
|
||||
SMnode *pMnode = pFsm->data;
|
||||
SSyncMgmt *pMgmt = &pMnode->syncMgmt;
|
||||
mInfo("vgId:1, become learner");
|
||||
|
||||
taosThreadMutexLock(&pMgmt->lock);
|
||||
if (pMgmt->transId != 0) {
|
||||
mInfo("vgId:1, become learner and post sem, trans:%d, failed to propose since not leader", pMgmt->transId);
|
||||
pMgmt->transId = 0;
|
||||
pMgmt->transSec = 0;
|
||||
pMgmt->transSeq = 0;
|
||||
pMgmt->errCode = TSDB_CODE_SYN_NOT_LEADER;
|
||||
tsem_post(&pMgmt->syncSem);
|
||||
}
|
||||
taosThreadMutexUnlock(&pMgmt->lock);
|
||||
}
|
||||
|
||||
static void mndBecomeLeader(const SSyncFSM *pFsm) {
|
||||
mInfo("vgId:1, become leader");
|
||||
SMnode *pMnode = pFsm->data;
|
||||
|
@ -278,6 +295,7 @@ SSyncFSM *mndSyncMakeFsm(SMnode *pMnode) {
|
|||
pFsm->FpReConfigCb = NULL;
|
||||
pFsm->FpBecomeLeaderCb = mndBecomeLeader;
|
||||
pFsm->FpBecomeFollowerCb = mndBecomeFollower;
|
||||
pFsm->FpBecomeLearnerCb = mndBecomeLearner;
|
||||
pFsm->FpGetSnapshot = mndSyncGetSnapshot;
|
||||
pFsm->FpGetSnapshotInfo = mndSyncGetSnapshotInfo;
|
||||
pFsm->FpSnapshotStartRead = mndSnapshotStartRead;
|
||||
|
@ -317,13 +335,16 @@ int32_t mndInitSync(SMnode *pMnode) {
|
|||
|
||||
mInfo("vgId:1, start to open sync, replica:%d selfIndex:%d", pMgmt->numOfReplicas, pMgmt->selfIndex);
|
||||
SSyncCfg *pCfg = &syncInfo.syncCfg;
|
||||
pCfg->totalReplicaNum = pMgmt->numOfTotalReplicas;
|
||||
pCfg->replicaNum = pMgmt->numOfReplicas;
|
||||
pCfg->myIndex = pMgmt->selfIndex;
|
||||
for (int32_t i = 0; i < pMgmt->numOfReplicas; ++i) {
|
||||
pCfg->lastIndex = pMgmt->lastIndex;
|
||||
for (int32_t i = 0; i < pMgmt->numOfTotalReplicas; ++i) {
|
||||
SNodeInfo *pNode = &pCfg->nodeInfo[i];
|
||||
pNode->nodeId = pMgmt->replicas[i].id;
|
||||
pNode->nodePort = pMgmt->replicas[i].port;
|
||||
tstrncpy(pNode->nodeFqdn, pMgmt->replicas[i].fqdn, sizeof(pNode->nodeFqdn));
|
||||
pNode->nodeRole = pMgmt->nodeRoles[i];
|
||||
(void)tmsgUpdateDnodeInfo(&pNode->nodeId, &pNode->clusterId, pNode->nodeFqdn, &pNode->nodePort);
|
||||
mInfo("vgId:1, index:%d ep:%s:%u dnode:%d cluster:%" PRId64, i, pNode->nodeFqdn, pNode->nodePort, pNode->nodeId,
|
||||
pNode->clusterId);
|
||||
|
|
|
@ -62,6 +62,7 @@ int32_t mndInitVgroup(SMnode *pMnode) {
|
|||
mndSetMsgHandle(pMnode, TDMT_VND_COMPACT_RSP, mndTransProcessRsp);
|
||||
mndSetMsgHandle(pMnode, TDMT_VND_DISABLE_WRITE_RSP, mndTransProcessRsp);
|
||||
mndSetMsgHandle(pMnode, TDMT_SYNC_FORCE_FOLLOWER_RSP, mndTransProcessRsp);
|
||||
mndSetMsgHandle(pMnode, TDMT_DND_ALTER_VNODE_TYPE_RSP, mndTransProcessRsp);
|
||||
|
||||
mndSetMsgHandle(pMnode, TDMT_MND_REDISTRIBUTE_VGROUP, mndProcessRedistributeVgroupMsg);
|
||||
mndSetMsgHandle(pMnode, TDMT_MND_SPLIT_VGROUP, mndProcessSplitVgroupMsg);
|
||||
|
@ -203,7 +204,7 @@ static int32_t mndVgroupActionUpdate(SSdb *pSdb, SVgObj *pOld, SVgObj *pNew) {
|
|||
pNew->compStorage = pOld->compStorage;
|
||||
pNew->pointsWritten = pOld->pointsWritten;
|
||||
pNew->compact = pOld->compact;
|
||||
memcpy(pOld->vnodeGid, pNew->vnodeGid, TSDB_MAX_REPLICA * sizeof(SVnodeGid));
|
||||
memcpy(pOld->vnodeGid, pNew->vnodeGid, (TSDB_MAX_REPLICA + TSDB_MAX_LEARNER_REPLICA) * sizeof(SVnodeGid));
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -244,8 +245,10 @@ void *mndBuildCreateVnodeReq(SMnode *pMnode, SDnodeObj *pDnode, SDbObj *pDb, SVg
|
|||
createReq.compression = pDb->cfg.compression;
|
||||
createReq.strict = pDb->cfg.strict;
|
||||
createReq.cacheLast = pDb->cfg.cacheLast;
|
||||
createReq.replica = pVgroup->replica;
|
||||
createReq.replica = 0;
|
||||
createReq.learnerReplica = 0;
|
||||
createReq.selfIndex = -1;
|
||||
createReq.learnerSelfIndex = -1;
|
||||
createReq.hashBegin = pVgroup->hashBegin;
|
||||
createReq.hashEnd = pVgroup->hashEnd;
|
||||
createReq.hashMethod = pDb->cfg.hashMethod;
|
||||
|
@ -263,7 +266,15 @@ void *mndBuildCreateVnodeReq(SMnode *pMnode, SDnodeObj *pDnode, SDbObj *pDb, SVg
|
|||
createReq.tsdbPageSize = pDb->cfg.tsdbPageSize;
|
||||
|
||||
for (int32_t v = 0; v < pVgroup->replica; ++v) {
|
||||
SReplica *pReplica = &createReq.replicas[v];
|
||||
SReplica *pReplica = NULL;
|
||||
|
||||
if(pVgroup->vnodeGid[v].nodeRole == TAOS_SYNC_ROLE_VOTER){
|
||||
pReplica = &createReq.replicas[createReq.replica];
|
||||
}
|
||||
else{
|
||||
pReplica = &createReq.learnerReplicas[createReq.learnerReplica];
|
||||
}
|
||||
|
||||
SVnodeGid *pVgid = &pVgroup->vnodeGid[v];
|
||||
SDnodeObj *pVgidDnode = mndAcquireDnode(pMnode, pVgid->dnodeId);
|
||||
if (pVgidDnode == NULL) {
|
||||
|
@ -275,21 +286,40 @@ void *mndBuildCreateVnodeReq(SMnode *pMnode, SDnodeObj *pDnode, SDbObj *pDb, SVg
|
|||
memcpy(pReplica->fqdn, pVgidDnode->fqdn, TSDB_FQDN_LEN);
|
||||
mndReleaseDnode(pMnode, pVgidDnode);
|
||||
|
||||
if (pDnode->id == pVgid->dnodeId) {
|
||||
createReq.selfIndex = v;
|
||||
if(pVgroup->vnodeGid[v].nodeRole == TAOS_SYNC_ROLE_VOTER){
|
||||
if (pDnode->id == pVgid->dnodeId) {
|
||||
createReq.selfIndex = createReq.replica;
|
||||
}
|
||||
}
|
||||
else{
|
||||
if (pDnode->id == pVgid->dnodeId) {
|
||||
createReq.learnerSelfIndex = createReq.learnerReplica;
|
||||
}
|
||||
}
|
||||
|
||||
if(pVgroup->vnodeGid[v].nodeRole == TAOS_SYNC_ROLE_VOTER){
|
||||
createReq.replica++;
|
||||
}
|
||||
else{
|
||||
createReq.learnerReplica++;
|
||||
}
|
||||
}
|
||||
|
||||
if (createReq.selfIndex == -1) {
|
||||
if (createReq.selfIndex == -1 && createReq.learnerSelfIndex == -1) {
|
||||
terrno = TSDB_CODE_APP_ERROR;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
mInfo("vgId:%d, build create vnode req, replica:%d selfIndex:%d strict:%d", createReq.vgId, createReq.replica,
|
||||
createReq.selfIndex, createReq.strict);
|
||||
mInfo("vgId:%d, build create vnode req, replica:%d selfIndex:%d learnerReplica:%d learnerSelfIndex:%d strict:%d",
|
||||
createReq.vgId, createReq.replica, createReq.selfIndex, createReq.learnerReplica,
|
||||
createReq.learnerReplica, createReq.strict);
|
||||
for (int32_t i = 0; i < createReq.replica; ++i) {
|
||||
mInfo("vgId:%d, replica:%d ep:%s:%u", createReq.vgId, i, createReq.replicas[i].fqdn, createReq.replicas[i].port);
|
||||
}
|
||||
for (int32_t i = 0; i < createReq.learnerReplica; ++i) {
|
||||
mInfo("vgId:%d, replica:%d ep:%s:%u", createReq.vgId, i, createReq.learnerReplicas[i].fqdn,
|
||||
createReq.learnerReplicas[i].port);
|
||||
}
|
||||
|
||||
int32_t contLen = tSerializeSCreateVnodeReq(NULL, 0, &createReq);
|
||||
if (contLen < 0) {
|
||||
|
@ -356,12 +386,24 @@ static void *mndBuildAlterVnodeReplicaReq(SMnode *pMnode, SDbObj *pDb, SVgObj *p
|
|||
SAlterVnodeReplicaReq alterReq = {
|
||||
.vgId = pVgroup->vgId,
|
||||
.strict = pDb->cfg.strict,
|
||||
.replica = pVgroup->replica,
|
||||
.replica = 0,
|
||||
.learnerReplica = 0,
|
||||
.selfIndex = -1,
|
||||
.learnerSelfIndex = -1,
|
||||
};
|
||||
|
||||
for (int32_t v = 0; v < pVgroup->replica; ++v) {
|
||||
SReplica *pReplica = &alterReq.replicas[v];
|
||||
SReplica *pReplica = NULL;
|
||||
|
||||
if(pVgroup->vnodeGid[v].nodeRole == TAOS_SYNC_ROLE_VOTER){
|
||||
pReplica = &alterReq.replicas[alterReq.replica];
|
||||
alterReq.replica++;
|
||||
}
|
||||
else{
|
||||
pReplica = &alterReq.learnerReplicas[alterReq.learnerReplica];
|
||||
alterReq.learnerReplica++;
|
||||
}
|
||||
|
||||
SVnodeGid *pVgid = &pVgroup->vnodeGid[v];
|
||||
SDnodeObj *pVgidDnode = mndAcquireDnode(pMnode, pVgid->dnodeId);
|
||||
if (pVgidDnode == NULL) return NULL;
|
||||
|
@ -371,18 +413,30 @@ static void *mndBuildAlterVnodeReplicaReq(SMnode *pMnode, SDbObj *pDb, SVgObj *p
|
|||
memcpy(pReplica->fqdn, pVgidDnode->fqdn, TSDB_FQDN_LEN);
|
||||
mndReleaseDnode(pMnode, pVgidDnode);
|
||||
|
||||
if (dnodeId == pVgid->dnodeId) {
|
||||
alterReq.selfIndex = v;
|
||||
if(pVgroup->vnodeGid[v].nodeRole == TAOS_SYNC_ROLE_VOTER){
|
||||
if (dnodeId == pVgid->dnodeId) {
|
||||
alterReq.selfIndex = v;
|
||||
}
|
||||
}
|
||||
else{
|
||||
if (dnodeId == pVgid->dnodeId) {
|
||||
alterReq.learnerSelfIndex = v;
|
||||
}
|
||||
}
|
||||
}
|
||||
alterReq.replica = pVgroup->replica;
|
||||
mInfo("vgId:%d, build alter vnode req, replica:%d selfIndex:%d strict:%d", alterReq.vgId, alterReq.replica,
|
||||
alterReq.selfIndex, alterReq.strict);
|
||||
|
||||
mInfo("vgId:%d, build alter vnode req, replica:%d selfIndex:%d learnerReplica:%d learnerSelfIndex:%d strict:%d",
|
||||
alterReq.vgId, alterReq.replica, alterReq.selfIndex, alterReq.learnerReplica,
|
||||
alterReq.learnerSelfIndex, alterReq.strict);
|
||||
for (int32_t i = 0; i < alterReq.replica; ++i) {
|
||||
mInfo("vgId:%d, replica:%d ep:%s:%u", alterReq.vgId, i, alterReq.replicas[i].fqdn, alterReq.replicas[i].port);
|
||||
}
|
||||
for (int32_t i = 0; i < alterReq.learnerReplica; ++i) {
|
||||
mInfo("vgId:%d, learnerReplica:%d ep:%s:%u", alterReq.vgId, i,
|
||||
alterReq.learnerReplicas[i].fqdn, alterReq.learnerReplicas[i].port);
|
||||
}
|
||||
|
||||
if (alterReq.selfIndex == -1) {
|
||||
if (alterReq.selfIndex == -1 && alterReq.learnerSelfIndex == -1) {
|
||||
terrno = TSDB_CODE_APP_ERROR;
|
||||
return NULL;
|
||||
}
|
||||
|
@ -1194,6 +1248,30 @@ int32_t mndAddAlterVnodeReplicaAction(SMnode *pMnode, STrans *pTrans, SDbObj *pD
|
|||
return 0;
|
||||
}
|
||||
|
||||
int32_t mndAddAlterVnodeTypeAction(SMnode *pMnode, STrans *pTrans, SDbObj *pDb, SVgObj *pVgroup, int32_t dnodeId) {
|
||||
SDnodeObj *pDnode = mndAcquireDnode(pMnode, dnodeId);
|
||||
if (pDnode == NULL) return -1;
|
||||
|
||||
STransAction action = {0};
|
||||
action.epSet = mndGetDnodeEpset(pDnode);
|
||||
mndReleaseDnode(pMnode, pDnode);
|
||||
|
||||
int32_t contLen = 0;
|
||||
void *pReq = mndBuildAlterVnodeReplicaReq(pMnode, pDb, pVgroup, dnodeId, &contLen);
|
||||
if (pReq == NULL) return -1;
|
||||
|
||||
action.pCont = pReq;
|
||||
action.contLen = contLen;
|
||||
action.msgType = TDMT_DND_ALTER_VNODE_TYPE;
|
||||
|
||||
if (mndTransAppendRedoAction(pTrans, &action) != 0) {
|
||||
taosMemoryFree(pReq);
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int32_t mndAddDisableVnodeWriteAction(SMnode *pMnode, STrans *pTrans, SDbObj *pDb, SVgObj *pVgroup,
|
||||
int32_t dnodeId) {
|
||||
SDnodeObj *pDnode = mndAcquireDnode(pMnode, dnodeId);
|
||||
|
@ -1953,18 +2031,33 @@ int32_t mndBuildAlterVgroupAction(SMnode *pMnode, STrans *pTrans, SDbObj *pOldDb
|
|||
mInfo("db:%s, vgId:%d, will add 2 vnodes, vn:0 dnode:%d", pVgroup->dbName, pVgroup->vgId,
|
||||
pVgroup->vnodeGid[0].dnodeId);
|
||||
|
||||
//add first
|
||||
if (mndAddVnodeToVgroup(pMnode, pTrans, &newVgroup, pArray) != 0) return -1;
|
||||
|
||||
newVgroup.vnodeGid[0].nodeRole = TAOS_SYNC_ROLE_VOTER;
|
||||
newVgroup.vnodeGid[1].nodeRole = TAOS_SYNC_ROLE_LEARNER;
|
||||
if (mndAddAlterVnodeReplicaAction(pMnode, pTrans, pNewDb, &newVgroup, newVgroup.vnodeGid[0].dnodeId) != 0)
|
||||
return -1;
|
||||
|
||||
if (mndAddCreateVnodeAction(pMnode, pTrans, pNewDb, &newVgroup, &newVgroup.vnodeGid[1]) != 0) return -1;
|
||||
newVgroup.vnodeGid[1].nodeRole = TAOS_SYNC_ROLE_VOTER;
|
||||
if (mndAddAlterVnodeTypeAction(pMnode, pTrans, pNewDb, &newVgroup, newVgroup.vnodeGid[1].dnodeId) != 0)
|
||||
return -1;
|
||||
|
||||
if (mndAddAlterVnodeConfirmAction(pMnode, pTrans, pNewDb, &newVgroup) != 0) return -1;
|
||||
|
||||
//add second
|
||||
if (mndAddVnodeToVgroup(pMnode, pTrans, &newVgroup, pArray) != 0) return -1;
|
||||
newVgroup.vnodeGid[0].nodeRole = TAOS_SYNC_ROLE_VOTER;
|
||||
newVgroup.vnodeGid[1].nodeRole = TAOS_SYNC_ROLE_VOTER;
|
||||
newVgroup.vnodeGid[2].nodeRole = TAOS_SYNC_ROLE_VOTER;
|
||||
|
||||
if (mndAddAlterVnodeReplicaAction(pMnode, pTrans, pNewDb, &newVgroup, newVgroup.vnodeGid[0].dnodeId) != 0)
|
||||
return -1;
|
||||
if (mndAddAlterVnodeReplicaAction(pMnode, pTrans, pNewDb, &newVgroup, newVgroup.vnodeGid[1].dnodeId) != 0)
|
||||
return -1;
|
||||
if (mndAddCreateVnodeAction(pMnode, pTrans, pNewDb, &newVgroup, &newVgroup.vnodeGid[2]) != 0) return -1;
|
||||
|
||||
if (mndAddAlterVnodeConfirmAction(pMnode, pTrans, pNewDb, &newVgroup) != 0) return -1;
|
||||
} else if (newVgroup.replica == 3 && pNewDb->cfg.replications == 1) {
|
||||
mInfo("db:%s, vgId:%d, will remove 2 vnodes, vn:0 dnode:%d vn:1 dnode:%d vn:2 dnode:%d", pVgroup->dbName,
|
||||
|
|
|
@ -68,6 +68,7 @@ void vnodeGetSnapshot(SVnode *pVnode, SSnapshot *pSnapshot);
|
|||
void vnodeGetInfo(SVnode *pVnode, const char **dbname, int32_t *vgId);
|
||||
int32_t vnodeProcessCreateTSma(SVnode *pVnode, void *pCont, uint32_t contLen);
|
||||
int32_t vnodeGetAllTableList(SVnode *pVnode, uint64_t uid, SArray *list);
|
||||
int32_t vnodeIsCatchUp(SVnode *pVnode);
|
||||
|
||||
int32_t vnodeGetCtbIdList(SVnode *pVnode, int64_t suid, SArray *list);
|
||||
int32_t vnodeGetCtbIdListByFilter(SVnode *pVnode, int64_t suid, SArray *list, bool (*filter)(void *arg), void *arg);
|
||||
|
|
|
@ -57,6 +57,28 @@ int vnodeCheckCfg(const SVnodeCfg *pCfg) {
|
|||
return 0;
|
||||
}
|
||||
|
||||
const char* vnodeRoleToStr(ESyncRole role) {
|
||||
switch (role) {
|
||||
case TAOS_SYNC_ROLE_VOTER:
|
||||
return "voter";
|
||||
case TAOS_SYNC_ROLE_LEARNER:
|
||||
return "learner";
|
||||
default:
|
||||
return "unknown";
|
||||
}
|
||||
}
|
||||
|
||||
const ESyncRole vnodeStrToRole(char* str) {
|
||||
if(strcmp(str, "voter") == 0){
|
||||
return TAOS_SYNC_ROLE_VOTER;
|
||||
}
|
||||
if(strcmp(str, "learner") == 0){
|
||||
return TAOS_SYNC_ROLE_LEARNER;
|
||||
}
|
||||
|
||||
return TAOS_SYNC_ROLE_ERROR;
|
||||
}
|
||||
|
||||
int vnodeEncodeConfig(const void *pObj, SJson *pJson) {
|
||||
const SVnodeCfg *pCfg = (SVnodeCfg *)pObj;
|
||||
|
||||
|
@ -117,6 +139,7 @@ int vnodeEncodeConfig(const void *pObj, SJson *pJson) {
|
|||
if (tjsonAddIntegerToObject(pJson, "hashSuffix", pCfg->hashSuffix) < 0) return -1;
|
||||
|
||||
if (tjsonAddIntegerToObject(pJson, "syncCfg.replicaNum", pCfg->syncCfg.replicaNum) < 0) return -1;
|
||||
if (tjsonAddIntegerToObject(pJson, "syncCfg.totalReplicaNum", pCfg->syncCfg.totalReplicaNum) < 0) return -1;
|
||||
if (tjsonAddIntegerToObject(pJson, "syncCfg.myIndex", pCfg->syncCfg.myIndex) < 0) return -1;
|
||||
|
||||
if (tjsonAddIntegerToObject(pJson, "vndStats.stables", pCfg->vndStats.numOfSTables) < 0) return -1;
|
||||
|
@ -128,9 +151,9 @@ int vnodeEncodeConfig(const void *pObj, SJson *pJson) {
|
|||
SJson *nodeInfo = tjsonCreateArray();
|
||||
if (nodeInfo == NULL) return -1;
|
||||
if (tjsonAddItemToObject(pJson, "syncCfg.nodeInfo", nodeInfo) < 0) return -1;
|
||||
vDebug("vgId:%d, encode config, replicas:%d selfIndex:%d", pCfg->vgId, pCfg->syncCfg.replicaNum,
|
||||
pCfg->syncCfg.myIndex);
|
||||
for (int i = 0; i < pCfg->syncCfg.replicaNum; ++i) {
|
||||
vDebug("vgId:%d, encode config, replicas:%d totalReplicas:%d selfIndex:%d", pCfg->vgId, pCfg->syncCfg.replicaNum,
|
||||
pCfg->syncCfg.totalReplicaNum, pCfg->syncCfg.myIndex);
|
||||
for (int i = 0; i < pCfg->syncCfg.totalReplicaNum; ++i) {
|
||||
SJson *info = tjsonCreateObject();
|
||||
SNodeInfo *pNode = (SNodeInfo *)&pCfg->syncCfg.nodeInfo[i];
|
||||
if (info == NULL) return -1;
|
||||
|
@ -138,6 +161,7 @@ int vnodeEncodeConfig(const void *pObj, SJson *pJson) {
|
|||
if (tjsonAddStringToObject(info, "nodeFqdn", pNode->nodeFqdn) < 0) return -1;
|
||||
if (tjsonAddIntegerToObject(info, "nodeId", pNode->nodeId) < 0) return -1;
|
||||
if (tjsonAddIntegerToObject(info, "clusterId", pNode->clusterId) < 0) return -1;
|
||||
if (tjsonAddStringToObject(info, "nodeRole", vnodeRoleToStr(pNode->nodeRole)) < 0) return -1;
|
||||
if (tjsonAddItemToArray(nodeInfo, info) < 0) return -1;
|
||||
vDebug("vgId:%d, encode config, replica:%d ep:%s:%u dnode:%d", pCfg->vgId, i, pNode->nodeFqdn, pNode->nodePort,
|
||||
pNode->nodeId);
|
||||
|
@ -235,6 +259,8 @@ int vnodeDecodeConfig(const SJson *pJson, void *pObj) {
|
|||
|
||||
tjsonGetNumberValue(pJson, "syncCfg.replicaNum", pCfg->syncCfg.replicaNum, code);
|
||||
if (code < 0) return -1;
|
||||
tjsonGetNumberValue(pJson, "syncCfg.totalReplicaNum", pCfg->syncCfg.totalReplicaNum, code);
|
||||
if (code < 0) return -1;
|
||||
tjsonGetNumberValue(pJson, "syncCfg.myIndex", pCfg->syncCfg.myIndex, code);
|
||||
if (code < 0) return -1;
|
||||
|
||||
|
@ -251,10 +277,10 @@ int vnodeDecodeConfig(const SJson *pJson, void *pObj) {
|
|||
|
||||
SJson *nodeInfo = tjsonGetObjectItem(pJson, "syncCfg.nodeInfo");
|
||||
int arraySize = tjsonGetArraySize(nodeInfo);
|
||||
if (arraySize != pCfg->syncCfg.replicaNum) return -1;
|
||||
if (arraySize != pCfg->syncCfg.totalReplicaNum) return -1;
|
||||
|
||||
vDebug("vgId:%d, decode config, replicas:%d selfIndex:%d", pCfg->vgId, pCfg->syncCfg.replicaNum,
|
||||
pCfg->syncCfg.myIndex);
|
||||
vDebug("vgId:%d, decode config, replicas:%d totalReplicas:%d selfIndex:%d", pCfg->vgId, pCfg->syncCfg.replicaNum,
|
||||
pCfg->syncCfg.totalReplicaNum, pCfg->syncCfg.myIndex);
|
||||
for (int i = 0; i < arraySize; ++i) {
|
||||
SJson *info = tjsonGetArrayItem(nodeInfo, i);
|
||||
SNodeInfo *pNode = &pCfg->syncCfg.nodeInfo[i];
|
||||
|
@ -266,6 +292,9 @@ int vnodeDecodeConfig(const SJson *pJson, void *pObj) {
|
|||
if (code < 0) return -1;
|
||||
tjsonGetNumberValue(info, "clusterId", pNode->clusterId, code);
|
||||
if (code < 0) return -1;
|
||||
char role[10];
|
||||
tjsonGetStringValue(info, "nodeRole", role);
|
||||
pNode->nodeRole = vnodeStrToRole(role);
|
||||
vDebug("vgId:%d, decode config, replica:%d ep:%s:%u dnode:%d", pCfg->vgId, i, pNode->nodeFqdn, pNode->nodePort,
|
||||
pNode->nodeId);
|
||||
}
|
||||
|
|
|
@ -76,19 +76,41 @@ int32_t vnodeAlterReplica(const char *path, SAlterVnodeReplicaReq *pReq, STfs *p
|
|||
}
|
||||
|
||||
SSyncCfg *pCfg = &info.config.syncCfg;
|
||||
pCfg->myIndex = pReq->selfIndex;
|
||||
pCfg->replicaNum = pReq->replica;
|
||||
|
||||
pCfg->replicaNum = 0;
|
||||
pCfg->totalReplicaNum = 0;
|
||||
memset(&pCfg->nodeInfo, 0, sizeof(pCfg->nodeInfo));
|
||||
|
||||
vInfo("vgId:%d, save config while alter, replicas:%d selfIndex:%d", pReq->vgId, pCfg->replicaNum, pCfg->myIndex);
|
||||
for (int i = 0; i < pReq->replica; ++i) {
|
||||
SNodeInfo *pNode = &pCfg->nodeInfo[i];
|
||||
pNode->nodeId = pReq->replicas[i].id;
|
||||
pNode->nodePort = pReq->replicas[i].port;
|
||||
tstrncpy(pNode->nodeFqdn, pReq->replicas[i].fqdn, sizeof(pNode->nodeFqdn));
|
||||
pNode->nodeRole = TAOS_SYNC_ROLE_VOTER;
|
||||
(void)tmsgUpdateDnodeInfo(&pNode->nodeId, &pNode->clusterId, pNode->nodeFqdn, &pNode->nodePort);
|
||||
vInfo("vgId:%d, replica:%d ep:%s:%u dnode:%d", pReq->vgId, i, pNode->nodeFqdn, pNode->nodePort, pNode->nodeId);
|
||||
pCfg->replicaNum++;
|
||||
}
|
||||
if(pReq->selfIndex != -1){
|
||||
pCfg->myIndex = pReq->selfIndex;
|
||||
}
|
||||
for (int i = pCfg->replicaNum; i < pReq->replica + pReq->learnerReplica; ++i) {
|
||||
SNodeInfo *pNode = &pCfg->nodeInfo[i];
|
||||
pNode->nodeId = pReq->learnerReplicas[pCfg->totalReplicaNum].id;
|
||||
pNode->nodePort = pReq->learnerReplicas[pCfg->totalReplicaNum].port;
|
||||
pNode->nodeRole = TAOS_SYNC_ROLE_LEARNER;
|
||||
tstrncpy(pNode->nodeFqdn, pReq->learnerReplicas[pCfg->totalReplicaNum].fqdn, sizeof(pNode->nodeFqdn));
|
||||
(void)tmsgUpdateDnodeInfo(&pNode->nodeId, &pNode->clusterId, pNode->nodeFqdn, &pNode->nodePort);
|
||||
vInfo("vgId:%d, replica:%d ep:%s:%u dnode:%d", pReq->vgId, i, pNode->nodeFqdn, pNode->nodePort, pNode->nodeId);
|
||||
pCfg->totalReplicaNum++;
|
||||
}
|
||||
pCfg->totalReplicaNum += pReq->replica;
|
||||
if(pReq->learnerSelfIndex != -1){
|
||||
pCfg->myIndex = pReq->replica + pReq->learnerSelfIndex;
|
||||
}
|
||||
|
||||
vInfo("vgId:%d, save config while alter, replicas:%d totalReplicas:%d selfIndex:%d",
|
||||
pReq->vgId, pCfg->replicaNum, pCfg->totalReplicaNum, pCfg->myIndex);
|
||||
|
||||
info.config.syncCfg = *pCfg;
|
||||
ret = vnodeSaveInfo(dir, &info);
|
||||
|
@ -181,6 +203,7 @@ int32_t vnodeAlterHashRange(const char *srcPath, const char *dstPath, SAlterVnod
|
|||
SSyncCfg *pCfg = &info.config.syncCfg;
|
||||
pCfg->myIndex = 0;
|
||||
pCfg->replicaNum = 1;
|
||||
pCfg->totalReplicaNum = 1;
|
||||
memset(&pCfg->nodeInfo, 0, sizeof(pCfg->nodeInfo));
|
||||
|
||||
vInfo("vgId:%d, alter vnode replicas to 1", pReq->srcVgId);
|
||||
|
@ -248,7 +271,7 @@ SVnode *vnodeOpen(const char *path, STfs *pTfs, SMsgCb msgCb) {
|
|||
// save vnode info on dnode ep changed
|
||||
bool updated = false;
|
||||
SSyncCfg *pCfg = &info.config.syncCfg;
|
||||
for (int32_t i = 0; i < pCfg->replicaNum; ++i) {
|
||||
for (int32_t i = 0; i < pCfg->totalReplicaNum; ++i) {
|
||||
SNodeInfo *pNode = &pCfg->nodeInfo[i];
|
||||
if (tmsgUpdateDnodeInfo(&pNode->nodeId, &pNode->clusterId, pNode->nodeFqdn, &pNode->nodePort)) {
|
||||
updated = true;
|
||||
|
@ -406,6 +429,10 @@ void vnodeClose(SVnode *pVnode) {
|
|||
// start the sync timer after the queue is ready
|
||||
int32_t vnodeStart(SVnode *pVnode) { return vnodeSyncStart(pVnode); }
|
||||
|
||||
int32_t vnodeIsCatchUp(SVnode *pVnode){
|
||||
return syncIsCatchUp(pVnode->sync);
|
||||
}
|
||||
|
||||
void vnodeStop(SVnode *pVnode) {}
|
||||
|
||||
int64_t vnodeGetSyncHandle(SVnode *pVnode) { return pVnode->sync; }
|
||||
|
|
|
@ -1447,7 +1447,8 @@ int32_t vnodeProcessCreateTSma(SVnode *pVnode, void *pCont, uint32_t contLen) {
|
|||
}
|
||||
|
||||
static int32_t vnodeProcessAlterConfirmReq(SVnode *pVnode, int64_t version, void *pReq, int32_t len, SRpcMsg *pRsp) {
|
||||
vInfo("vgId:%d, alter replica confim msg is processed", TD_VID(pVnode));
|
||||
vInfo("vgId:%d, vnode management handle msgType:alter-confirm, alter replica confim msg is processed",
|
||||
TD_VID(pVnode));
|
||||
pRsp->msgType = TDMT_VND_ALTER_CONFIRM_RSP;
|
||||
pRsp->code = TSDB_CODE_SUCCESS;
|
||||
pRsp->pCont = NULL;
|
||||
|
|
|
@ -571,6 +571,19 @@ static void vnodeBecomeFollower(const SSyncFSM *pFsm) {
|
|||
taosThreadMutexUnlock(&pVnode->lock);
|
||||
}
|
||||
|
||||
static void vnodeBecomeLearner(const SSyncFSM *pFsm) {
|
||||
SVnode *pVnode = pFsm->data;
|
||||
vInfo("vgId:%d, become learner", pVnode->config.vgId);
|
||||
|
||||
taosThreadMutexLock(&pVnode->lock);
|
||||
if (pVnode->blocked) {
|
||||
pVnode->blocked = false;
|
||||
vDebug("vgId:%d, become learner and post block", pVnode->config.vgId);
|
||||
tsem_post(&pVnode->syncSem);
|
||||
}
|
||||
taosThreadMutexUnlock(&pVnode->lock);
|
||||
}
|
||||
|
||||
static void vnodeBecomeLeader(const SSyncFSM *pFsm) {
|
||||
SVnode *pVnode = pFsm->data;
|
||||
vDebug("vgId:%d, become leader", pVnode->config.vgId);
|
||||
|
@ -612,6 +625,7 @@ static SSyncFSM *vnodeSyncMakeFsm(SVnode *pVnode) {
|
|||
pFsm->FpApplyQueueItems = vnodeApplyQueueItems;
|
||||
pFsm->FpBecomeLeaderCb = vnodeBecomeLeader;
|
||||
pFsm->FpBecomeFollowerCb = vnodeBecomeFollower;
|
||||
pFsm->FpBecomeLearnerCb = vnodeBecomeLearner;
|
||||
pFsm->FpReConfigCb = NULL;
|
||||
pFsm->FpSnapshotStartRead = vnodeSnapshotStartRead;
|
||||
pFsm->FpSnapshotStopRead = vnodeSnapshotStopRead;
|
||||
|
@ -644,7 +658,7 @@ int32_t vnodeSyncOpen(SVnode *pVnode, char *path) {
|
|||
|
||||
SSyncCfg *pCfg = &syncInfo.syncCfg;
|
||||
vInfo("vgId:%d, start to open sync, replica:%d selfIndex:%d", pVnode->config.vgId, pCfg->replicaNum, pCfg->myIndex);
|
||||
for (int32_t i = 0; i < pCfg->replicaNum; ++i) {
|
||||
for (int32_t i = 0; i < pCfg->totalReplicaNum; ++i) {
|
||||
SNodeInfo *pNode = &pCfg->nodeInfo[i];
|
||||
vInfo("vgId:%d, index:%d ep:%s:%u dnode:%d cluster:%" PRId64, pVnode->config.vgId, i, pNode->nodeFqdn, pNode->nodePort,
|
||||
pNode->nodeId, pNode->clusterId);
|
||||
|
|
|
@ -24,12 +24,13 @@ extern "C" {
|
|||
|
||||
// SIndexMgr -----------------------------
|
||||
typedef struct SSyncIndexMgr {
|
||||
SRaftId (*replicas)[TSDB_MAX_REPLICA];
|
||||
SyncIndex index[TSDB_MAX_REPLICA];
|
||||
SyncTerm privateTerm[TSDB_MAX_REPLICA]; // for advanced function
|
||||
int64_t startTimeArr[TSDB_MAX_REPLICA];
|
||||
int64_t recvTimeArr[TSDB_MAX_REPLICA];
|
||||
SRaftId (*replicas)[TSDB_MAX_REPLICA + TSDB_MAX_LEARNER_REPLICA];
|
||||
SyncIndex index[TSDB_MAX_REPLICA + TSDB_MAX_LEARNER_REPLICA];
|
||||
SyncTerm privateTerm[TSDB_MAX_REPLICA + TSDB_MAX_LEARNER_REPLICA]; // for advanced function
|
||||
int64_t startTimeArr[TSDB_MAX_REPLICA + TSDB_MAX_LEARNER_REPLICA];
|
||||
int64_t recvTimeArr[TSDB_MAX_REPLICA + TSDB_MAX_LEARNER_REPLICA];
|
||||
int32_t replicaNum;
|
||||
int32_t totalReplicaNum;
|
||||
SSyncNode *pNode;
|
||||
} SSyncIndexMgr;
|
||||
|
||||
|
|
|
@ -54,13 +54,13 @@ typedef struct SSyncLogReplMgr SSyncLogReplMgr;
|
|||
#define MAX_CONFIG_INDEX_COUNT 256
|
||||
|
||||
typedef struct SRaftCfg {
|
||||
SSyncCfg cfg;
|
||||
int32_t batchSize;
|
||||
int8_t isStandBy;
|
||||
int8_t snapshotStrategy;
|
||||
SyncIndex lastConfigIndex;
|
||||
int32_t configIndexCount;
|
||||
SyncIndex configIndexArr[MAX_CONFIG_INDEX_COUNT];
|
||||
SSyncCfg cfg;
|
||||
int32_t batchSize;
|
||||
int8_t isStandBy;
|
||||
int8_t snapshotStrategy;
|
||||
SyncIndex lastConfigIndex;
|
||||
int32_t configIndexCount;
|
||||
SyncIndex configIndexArr[MAX_CONFIG_INDEX_COUNT];
|
||||
} SRaftCfg;
|
||||
|
||||
typedef struct SRaftId {
|
||||
|
@ -127,12 +127,13 @@ typedef struct SSyncNode {
|
|||
SRaftId myRaftId;
|
||||
|
||||
int32_t peersNum;
|
||||
SNodeInfo peersNodeInfo[TSDB_MAX_REPLICA];
|
||||
SEpSet peersEpset[TSDB_MAX_REPLICA];
|
||||
SRaftId peersId[TSDB_MAX_REPLICA];
|
||||
SNodeInfo peersNodeInfo[TSDB_MAX_REPLICA + TSDB_MAX_LEARNER_REPLICA];
|
||||
SEpSet peersEpset[TSDB_MAX_REPLICA + TSDB_MAX_LEARNER_REPLICA];
|
||||
SRaftId peersId[TSDB_MAX_REPLICA + TSDB_MAX_LEARNER_REPLICA];
|
||||
|
||||
int32_t replicaNum;
|
||||
SRaftId replicasId[TSDB_MAX_REPLICA];
|
||||
int32_t totalReplicaNum;
|
||||
SRaftId replicasId[TSDB_MAX_REPLICA + TSDB_MAX_LEARNER_REPLICA];
|
||||
|
||||
// raft algorithm
|
||||
SSyncFSM* pFsm;
|
||||
|
@ -188,7 +189,7 @@ typedef struct SSyncNode {
|
|||
uint64_t heartbeatTimerCounter;
|
||||
|
||||
// peer heartbeat timer
|
||||
SSyncTimer peerHeartbeatTimerArr[TSDB_MAX_REPLICA];
|
||||
SSyncTimer peerHeartbeatTimerArr[TSDB_MAX_REPLICA + TSDB_MAX_LEARNER_REPLICA];
|
||||
|
||||
// tools
|
||||
SSyncRespMgr* pSyncRespMgr;
|
||||
|
@ -196,13 +197,13 @@ typedef struct SSyncNode {
|
|||
// restore state
|
||||
bool restoreFinish;
|
||||
// SSnapshot* pSnapshot;
|
||||
SSyncSnapshotSender* senders[TSDB_MAX_REPLICA];
|
||||
SSyncSnapshotSender* senders[TSDB_MAX_REPLICA + TSDB_MAX_LEARNER_REPLICA];
|
||||
SSyncSnapshotReceiver* pNewNodeReceiver;
|
||||
|
||||
// log replication mgr
|
||||
SSyncLogReplMgr* logReplMgrs[TSDB_MAX_REPLICA];
|
||||
SSyncLogReplMgr* logReplMgrs[TSDB_MAX_REPLICA + TSDB_MAX_LEARNER_REPLICA];
|
||||
|
||||
SPeerState peerStates[TSDB_MAX_REPLICA];
|
||||
SPeerState peerStates[TSDB_MAX_REPLICA + TSDB_MAX_LEARNER_REPLICA];
|
||||
|
||||
// is config changing
|
||||
bool changing;
|
||||
|
@ -275,6 +276,7 @@ void syncNodeUpdateTerm(SSyncNode* pSyncNode, SyncTerm term);
|
|||
void syncNodeUpdateTermWithoutStepDown(SSyncNode* pSyncNode, SyncTerm term);
|
||||
void syncNodeStepDown(SSyncNode* pSyncNode, SyncTerm newTerm);
|
||||
void syncNodeBecomeFollower(SSyncNode* pSyncNode, const char* debugStr);
|
||||
void syncNodeBecomeLearner(SSyncNode* pSyncNode, const char* debugStr);
|
||||
void syncNodeBecomeLeader(SSyncNode* pSyncNode, const char* debugStr);
|
||||
void syncNodeCandidate2Leader(SSyncNode* pSyncNode);
|
||||
void syncNodeFollower2Candidate(SSyncNode* pSyncNode);
|
||||
|
|
|
@ -237,6 +237,7 @@ typedef struct SyncLeaderTransfer {
|
|||
typedef enum {
|
||||
SYNC_LOCAL_CMD_STEP_DOWN = 100,
|
||||
SYNC_LOCAL_CMD_FOLLOWER_CMT,
|
||||
SYNC_LOCAL_CMD_LEARNER_CMT,
|
||||
} ESyncLocalCmd;
|
||||
|
||||
typedef struct SyncLocalCmd {
|
||||
|
|
|
@ -56,6 +56,8 @@ typedef struct SSyncLogBuffer {
|
|||
int64_t size;
|
||||
TdThreadMutex mutex;
|
||||
TdThreadMutexAttr attr;
|
||||
int64_t totalIndex;
|
||||
bool isCatchup;
|
||||
} SSyncLogBuffer;
|
||||
|
||||
// SSyncLogRepMgr
|
||||
|
|
|
@ -137,8 +137,10 @@ int32_t syncNodeOnAppendEntries(SSyncNode* ths, const SRpcMsg* pRpcMsg) {
|
|||
pReply->term = pMsg->term;
|
||||
}
|
||||
|
||||
syncNodeStepDown(ths, pMsg->term);
|
||||
resetElect = true;
|
||||
if(ths->raftCfg.cfg.nodeInfo[ths->raftCfg.cfg.myIndex].nodeRole != TAOS_SYNC_ROLE_LEARNER){
|
||||
syncNodeStepDown(ths, pMsg->term);
|
||||
resetElect = true;
|
||||
}
|
||||
|
||||
if (pMsg->dataLen < sizeof(SSyncRaftEntry)) {
|
||||
sError("vgId:%d, incomplete append entries received. prev index:%" PRId64 ", term:%" PRId64 ", datalen:%d",
|
||||
|
|
|
@ -41,6 +41,8 @@ static int32_t syncNodeRequestVotePeers(SSyncNode* pNode) {
|
|||
|
||||
int32_t ret = 0;
|
||||
for (int i = 0; i < pNode->peersNum; ++i) {
|
||||
if(pNode->peersNodeInfo[i].nodeRole == TAOS_SYNC_ROLE_LEARNER) continue;
|
||||
|
||||
SRpcMsg rpcMsg = {0};
|
||||
ret = syncBuildRequestVote(&rpcMsg, pNode->vgId);
|
||||
if (ret < 0) {
|
||||
|
|
|
@ -26,6 +26,7 @@ SSyncIndexMgr *syncIndexMgrCreate(SSyncNode *pNode) {
|
|||
|
||||
pIndexMgr->replicas = &pNode->replicasId;
|
||||
pIndexMgr->replicaNum = pNode->replicaNum;
|
||||
pIndexMgr->totalReplicaNum = pNode->totalReplicaNum;
|
||||
pIndexMgr->pNode = pNode;
|
||||
syncIndexMgrClear(pIndexMgr);
|
||||
|
||||
|
@ -35,6 +36,7 @@ SSyncIndexMgr *syncIndexMgrCreate(SSyncNode *pNode) {
|
|||
void syncIndexMgrUpdate(SSyncIndexMgr *pIndexMgr, SSyncNode *pNode) {
|
||||
pIndexMgr->replicas = &pNode->replicasId;
|
||||
pIndexMgr->replicaNum = pNode->replicaNum;
|
||||
pIndexMgr->totalReplicaNum = pNode->totalReplicaNum;
|
||||
pIndexMgr->pNode = pNode;
|
||||
syncIndexMgrClear(pIndexMgr);
|
||||
}
|
||||
|
@ -50,14 +52,14 @@ void syncIndexMgrClear(SSyncIndexMgr *pIndexMgr) {
|
|||
memset(pIndexMgr->privateTerm, 0, sizeof(pIndexMgr->privateTerm));
|
||||
|
||||
int64_t timeNow = taosGetTimestampMs();
|
||||
for (int i = 0; i < pIndexMgr->replicaNum; ++i) {
|
||||
for (int i = 0; i < pIndexMgr->totalReplicaNum; ++i) {
|
||||
pIndexMgr->startTimeArr[i] = 0;
|
||||
pIndexMgr->recvTimeArr[i] = timeNow;
|
||||
}
|
||||
}
|
||||
|
||||
void syncIndexMgrSetIndex(SSyncIndexMgr *pIndexMgr, const SRaftId *pRaftId, SyncIndex index) {
|
||||
for (int i = 0; i < pIndexMgr->replicaNum; ++i) {
|
||||
for (int i = 0; i < pIndexMgr->totalReplicaNum; ++i) {
|
||||
if (syncUtilSameId(&((*(pIndexMgr->replicas))[i]), pRaftId)) {
|
||||
(pIndexMgr->index)[i] = index;
|
||||
return;
|
||||
|
@ -69,7 +71,7 @@ void syncIndexMgrSetIndex(SSyncIndexMgr *pIndexMgr, const SRaftId *pRaftId, Sync
|
|||
}
|
||||
|
||||
SSyncLogReplMgr *syncNodeGetLogReplMgr(SSyncNode *pNode, SRaftId *pRaftId) {
|
||||
for (int i = 0; i < pNode->replicaNum; i++) {
|
||||
for (int i = 0; i < pNode->totalReplicaNum; i++) {
|
||||
if (syncUtilSameId(&pNode->replicasId[i], pRaftId)) {
|
||||
return pNode->logReplMgrs[i];
|
||||
}
|
||||
|
@ -80,7 +82,7 @@ SSyncLogReplMgr *syncNodeGetLogReplMgr(SSyncNode *pNode, SRaftId *pRaftId) {
|
|||
}
|
||||
|
||||
SyncIndex syncIndexMgrGetIndex(SSyncIndexMgr *pIndexMgr, const SRaftId *pRaftId) {
|
||||
for (int i = 0; i < pIndexMgr->replicaNum; ++i) {
|
||||
for (int i = 0; i < pIndexMgr->totalReplicaNum; ++i) {
|
||||
if (syncUtilSameId(&((*(pIndexMgr->replicas))[i]), pRaftId)) {
|
||||
SyncIndex idx = (pIndexMgr->index)[i];
|
||||
return idx;
|
||||
|
@ -93,7 +95,7 @@ SyncIndex syncIndexMgrGetIndex(SSyncIndexMgr *pIndexMgr, const SRaftId *pRaftId)
|
|||
}
|
||||
|
||||
void syncIndexMgrSetStartTime(SSyncIndexMgr *pIndexMgr, const SRaftId *pRaftId, int64_t startTime) {
|
||||
for (int i = 0; i < pIndexMgr->replicaNum; ++i) {
|
||||
for (int i = 0; i < pIndexMgr->totalReplicaNum; ++i) {
|
||||
if (syncUtilSameId(&((*(pIndexMgr->replicas))[i]), pRaftId)) {
|
||||
(pIndexMgr->startTimeArr)[i] = startTime;
|
||||
return;
|
||||
|
@ -105,7 +107,7 @@ void syncIndexMgrSetStartTime(SSyncIndexMgr *pIndexMgr, const SRaftId *pRaftId,
|
|||
}
|
||||
|
||||
int64_t syncIndexMgrGetStartTime(SSyncIndexMgr *pIndexMgr, const SRaftId *pRaftId) {
|
||||
for (int i = 0; i < pIndexMgr->replicaNum; ++i) {
|
||||
for (int i = 0; i < pIndexMgr->totalReplicaNum; ++i) {
|
||||
if (syncUtilSameId(&((*(pIndexMgr->replicas))[i]), pRaftId)) {
|
||||
int64_t startTime = (pIndexMgr->startTimeArr)[i];
|
||||
return startTime;
|
||||
|
@ -118,7 +120,7 @@ int64_t syncIndexMgrGetStartTime(SSyncIndexMgr *pIndexMgr, const SRaftId *pRaftI
|
|||
}
|
||||
|
||||
void syncIndexMgrSetRecvTime(SSyncIndexMgr *pIndexMgr, const SRaftId *pRaftId, int64_t recvTime) {
|
||||
for (int i = 0; i < pIndexMgr->replicaNum; ++i) {
|
||||
for (int i = 0; i < pIndexMgr->totalReplicaNum; ++i) {
|
||||
if (syncUtilSameId(&((*(pIndexMgr->replicas))[i]), pRaftId)) {
|
||||
(pIndexMgr->recvTimeArr)[i] = recvTime;
|
||||
return;
|
||||
|
@ -130,7 +132,7 @@ void syncIndexMgrSetRecvTime(SSyncIndexMgr *pIndexMgr, const SRaftId *pRaftId, i
|
|||
}
|
||||
|
||||
int64_t syncIndexMgrGetRecvTime(SSyncIndexMgr *pIndexMgr, const SRaftId *pRaftId) {
|
||||
for (int i = 0; i < pIndexMgr->replicaNum; ++i) {
|
||||
for (int i = 0; i < pIndexMgr->totalReplicaNum; ++i) {
|
||||
if (syncUtilSameId(&((*(pIndexMgr->replicas))[i]), pRaftId)) {
|
||||
int64_t recvTime = (pIndexMgr->recvTimeArr)[i];
|
||||
return recvTime;
|
||||
|
@ -143,7 +145,7 @@ int64_t syncIndexMgrGetRecvTime(SSyncIndexMgr *pIndexMgr, const SRaftId *pRaftId
|
|||
}
|
||||
|
||||
void syncIndexMgrSetTerm(SSyncIndexMgr *pIndexMgr, const SRaftId *pRaftId, SyncTerm term) {
|
||||
for (int i = 0; i < pIndexMgr->replicaNum; ++i) {
|
||||
for (int i = 0; i < pIndexMgr->totalReplicaNum; ++i) {
|
||||
if (syncUtilSameId(&((*(pIndexMgr->replicas))[i]), pRaftId)) {
|
||||
(pIndexMgr->privateTerm)[i] = term;
|
||||
return;
|
||||
|
@ -155,7 +157,7 @@ void syncIndexMgrSetTerm(SSyncIndexMgr *pIndexMgr, const SRaftId *pRaftId, SyncT
|
|||
}
|
||||
|
||||
SyncTerm syncIndexMgrGetTerm(SSyncIndexMgr *pIndexMgr, const SRaftId *pRaftId) {
|
||||
for (int i = 0; i < pIndexMgr->replicaNum; ++i) {
|
||||
for (int i = 0; i < pIndexMgr->totalReplicaNum; ++i) {
|
||||
if (syncUtilSameId(&((*(pIndexMgr->replicas))[i]), pRaftId)) {
|
||||
SyncTerm term = (pIndexMgr->privateTerm)[i];
|
||||
return term;
|
||||
|
|
|
@ -141,6 +141,13 @@ int32_t syncReconfig(int64_t rid, SSyncCfg* pNewCfg) {
|
|||
SSyncNode* pSyncNode = syncNodeAcquire(rid);
|
||||
if (pSyncNode == NULL) return -1;
|
||||
|
||||
if(pSyncNode->raftCfg.lastConfigIndex >= pNewCfg->lastIndex){
|
||||
syncNodeRelease(pSyncNode);
|
||||
sInfo("vgId:%d, no need Reconfig, current index:%" PRId64 ", new index:%" PRId64, pSyncNode->vgId,
|
||||
pSyncNode->raftCfg.lastConfigIndex, pNewCfg->lastIndex);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (!syncNodeCheckNewConfig(pSyncNode, pNewCfg)) {
|
||||
syncNodeRelease(pSyncNode);
|
||||
terrno = TSDB_CODE_SYN_NEW_CONFIG_ERROR;
|
||||
|
@ -149,12 +156,12 @@ int32_t syncReconfig(int64_t rid, SSyncCfg* pNewCfg) {
|
|||
}
|
||||
|
||||
syncNodeUpdateNewConfigIndex(pSyncNode, pNewCfg);
|
||||
syncNodeDoConfigChange(pSyncNode, pNewCfg, SYNC_INDEX_INVALID);
|
||||
syncNodeDoConfigChange(pSyncNode, pNewCfg, pNewCfg->lastIndex);
|
||||
|
||||
if (pSyncNode->state == TAOS_SYNC_STATE_LEADER) {
|
||||
syncNodeStopHeartbeatTimer(pSyncNode);
|
||||
|
||||
for (int32_t i = 0; i < TSDB_MAX_REPLICA; ++i) {
|
||||
for (int32_t i = 0; i < TSDB_MAX_REPLICA + TSDB_MAX_LEARNER_REPLICA; ++i) {
|
||||
syncHbTimerInit(pSyncNode, &pSyncNode->peerHeartbeatTimerArr[i], pSyncNode->replicasId[i]);
|
||||
}
|
||||
|
||||
|
@ -315,8 +322,9 @@ int32_t syncBeginSnapshot(int64_t rid, int64_t lastApplyIndex) {
|
|||
}
|
||||
}
|
||||
|
||||
if (pSyncNode->replicaNum > 1) {
|
||||
if (pSyncNode->state != TAOS_SYNC_STATE_LEADER && pSyncNode->state != TAOS_SYNC_STATE_FOLLOWER) {
|
||||
if (pSyncNode->totalReplicaNum > 1) {
|
||||
if (pSyncNode->state != TAOS_SYNC_STATE_LEADER && pSyncNode->state != TAOS_SYNC_STATE_FOLLOWER
|
||||
&& pSyncNode->state != TAOS_SYNC_STATE_LEARNER) {
|
||||
sNTrace(pSyncNode, "new-snapshot-index:%" PRId64 " candidate or unknown state, do not delete wal",
|
||||
lastApplyIndex);
|
||||
syncNodeRelease(pSyncNode);
|
||||
|
@ -537,7 +545,8 @@ void syncGetRetryEpSet(int64_t rid, SEpSet* pEpSet) {
|
|||
SSyncNode* pSyncNode = syncNodeAcquire(rid);
|
||||
if (pSyncNode == NULL) return;
|
||||
|
||||
for (int32_t i = 0; i < pSyncNode->raftCfg.cfg.replicaNum; ++i) {
|
||||
for (int32_t i = 0; i < pSyncNode->raftCfg.cfg.totalReplicaNum; ++i) {
|
||||
if(pSyncNode->raftCfg.cfg.nodeInfo[i].nodeRole == TAOS_SYNC_ROLE_LEARNER) continue;
|
||||
SEp* pEp = &pEpSet->eps[i];
|
||||
tstrncpy(pEp->fqdn, pSyncNode->raftCfg.cfg.nodeInfo[i].nodeFqdn, TSDB_FQDN_LEN);
|
||||
pEp->port = (pSyncNode->raftCfg.cfg.nodeInfo)[i].nodePort;
|
||||
|
@ -564,6 +573,34 @@ int32_t syncPropose(int64_t rid, SRpcMsg* pMsg, bool isWeak, int64_t* seq) {
|
|||
return ret;
|
||||
}
|
||||
|
||||
int32_t syncIsCatchUp(int64_t rid) {
|
||||
SSyncNode* pSyncNode = syncNodeAcquire(rid);
|
||||
if (pSyncNode == NULL) {
|
||||
sError("sync Node Acquire error since %d", errno);
|
||||
return -1;
|
||||
}
|
||||
|
||||
while(1){
|
||||
if(pSyncNode->pLogBuf->totalIndex < 0 || pSyncNode->pLogBuf->commitIndex < 0 ||
|
||||
pSyncNode->pLogBuf->totalIndex < pSyncNode->pLogBuf->commitIndex ||
|
||||
pSyncNode->pLogBuf->totalIndex - pSyncNode->pLogBuf->commitIndex > SYNC_LEARNER_CATCHUP){
|
||||
sInfo("vgId:%d, Not catch up, wait one second, totalIndex:%" PRId64 " commitIndex:%" PRId64 " matchIndex:%" PRId64,
|
||||
pSyncNode->vgId, pSyncNode->pLogBuf->totalIndex, pSyncNode->pLogBuf->commitIndex,
|
||||
pSyncNode->pLogBuf->matchIndex);
|
||||
taosSsleep(1);
|
||||
}
|
||||
else{
|
||||
sInfo("vgId:%d, Catch up, totalIndex:%" PRId64 " commitIndex:%" PRId64 " matchIndex:%" PRId64,
|
||||
pSyncNode->vgId, pSyncNode->pLogBuf->totalIndex, pSyncNode->pLogBuf->commitIndex,
|
||||
pSyncNode->pLogBuf->matchIndex);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
syncNodeRelease(pSyncNode);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int32_t syncNodePropose(SSyncNode* pSyncNode, SRpcMsg* pMsg, bool isWeak, int64_t* seq) {
|
||||
if (pSyncNode->state != TAOS_SYNC_STATE_LEADER) {
|
||||
terrno = TSDB_CODE_SYN_NOT_LEADER;
|
||||
|
@ -655,6 +692,8 @@ static int32_t syncHbTimerStart(SSyncNode* pSyncNode, SSyncTimer* pSyncTimer) {
|
|||
pData->logicClock = pSyncTimer->logicClock;
|
||||
pData->execTime = tsNow + pSyncTimer->timerMS;
|
||||
|
||||
sTrace("vgId:%d, start hb timer, rid:%" PRId64 " addr:%" PRId64, pSyncNode->vgId, pData->rid, pData->destId.addr);
|
||||
|
||||
taosTmrReset(pSyncTimer->timerCb, pSyncTimer->timerMS / HEARTBEAT_TICK_NUM, (void*)(pData->rid),
|
||||
syncEnv()->pTimerManager, &pSyncTimer->pTimer);
|
||||
} else {
|
||||
|
@ -719,7 +758,7 @@ SSyncNode* syncNodeOpen(SSyncInfo* pSyncInfo) {
|
|||
sInfo("vgId:%d, create a new raft config file", pSyncNode->vgId);
|
||||
pSyncNode->raftCfg.isStandBy = pSyncInfo->isStandBy;
|
||||
pSyncNode->raftCfg.snapshotStrategy = pSyncInfo->snapshotStrategy;
|
||||
pSyncNode->raftCfg.lastConfigIndex = SYNC_INDEX_INVALID;
|
||||
pSyncNode->raftCfg.lastConfigIndex = pSyncInfo->syncCfg.lastIndex;
|
||||
pSyncNode->raftCfg.batchSize = pSyncInfo->batchSize;
|
||||
pSyncNode->raftCfg.cfg = pSyncInfo->syncCfg;
|
||||
pSyncNode->raftCfg.configIndexCount = 1;
|
||||
|
@ -736,7 +775,7 @@ SSyncNode* syncNodeOpen(SSyncInfo* pSyncInfo) {
|
|||
goto _error;
|
||||
}
|
||||
|
||||
if (pSyncInfo->syncCfg.replicaNum > 0 && syncIsConfigChanged(&pSyncNode->raftCfg.cfg, &pSyncInfo->syncCfg)) {
|
||||
if (pSyncInfo->syncCfg.totalReplicaNum > 0 && syncIsConfigChanged(&pSyncNode->raftCfg.cfg, &pSyncInfo->syncCfg)) {
|
||||
sInfo("vgId:%d, use sync config from input options and write to cfg file", pSyncNode->vgId);
|
||||
pSyncNode->raftCfg.cfg = pSyncInfo->syncCfg;
|
||||
if (syncWriteCfgFile(pSyncNode) != 0) {
|
||||
|
@ -753,8 +792,9 @@ SSyncNode* syncNodeOpen(SSyncInfo* pSyncInfo) {
|
|||
pSyncNode->vgId = pSyncInfo->vgId;
|
||||
SSyncCfg* pCfg = &pSyncNode->raftCfg.cfg;
|
||||
bool updated = false;
|
||||
sInfo("vgId:%d, start to open sync node, replica:%d selfIndex:%d", pSyncNode->vgId, pCfg->replicaNum, pCfg->myIndex);
|
||||
for (int32_t i = 0; i < pCfg->replicaNum; ++i) {
|
||||
sInfo("vgId:%d, start to open sync node, totalReplicaNum:%d replicaNum:%d selfIndex:%d",
|
||||
pSyncNode->vgId, pCfg->totalReplicaNum, pCfg->replicaNum, pCfg->myIndex);
|
||||
for (int32_t i = 0; i < pCfg->totalReplicaNum; ++i) {
|
||||
SNodeInfo* pNode = &pCfg->nodeInfo[i];
|
||||
if (tmsgUpdateDnodeInfo(&pNode->nodeId, &pNode->clusterId, pNode->nodeFqdn, &pNode->nodePort)) {
|
||||
updated = true;
|
||||
|
@ -792,9 +832,9 @@ SSyncNode* syncNodeOpen(SSyncInfo* pSyncInfo) {
|
|||
}
|
||||
|
||||
// init peersNum, peers, peersId
|
||||
pSyncNode->peersNum = pSyncNode->raftCfg.cfg.replicaNum - 1;
|
||||
pSyncNode->peersNum = pSyncNode->raftCfg.cfg.totalReplicaNum - 1;
|
||||
int32_t j = 0;
|
||||
for (int32_t i = 0; i < pSyncNode->raftCfg.cfg.replicaNum; ++i) {
|
||||
for (int32_t i = 0; i < pSyncNode->raftCfg.cfg.totalReplicaNum; ++i) {
|
||||
if (i != pSyncNode->raftCfg.cfg.myIndex) {
|
||||
pSyncNode->peersNodeInfo[j] = pSyncNode->raftCfg.cfg.nodeInfo[i];
|
||||
syncUtilNodeInfo2EpSet(&pSyncNode->peersNodeInfo[j], &pSyncNode->peersEpset[j]);
|
||||
|
@ -810,7 +850,8 @@ SSyncNode* syncNodeOpen(SSyncInfo* pSyncInfo) {
|
|||
|
||||
// init replicaNum, replicasId
|
||||
pSyncNode->replicaNum = pSyncNode->raftCfg.cfg.replicaNum;
|
||||
for (int32_t i = 0; i < pSyncNode->raftCfg.cfg.replicaNum; ++i) {
|
||||
pSyncNode->totalReplicaNum = pSyncNode->raftCfg.cfg.totalReplicaNum;
|
||||
for (int32_t i = 0; i < pSyncNode->raftCfg.cfg.totalReplicaNum; ++i) {
|
||||
if (!syncUtilNodeInfo2RaftId(&pSyncNode->raftCfg.cfg.nodeInfo[i], pSyncNode->vgId, &pSyncNode->replicasId[i])) {
|
||||
sError("vgId:%d, failed to determine raft member id, replica:%d", pSyncNode->vgId, i);
|
||||
goto _error;
|
||||
|
@ -934,7 +975,7 @@ SSyncNode* syncNodeOpen(SSyncInfo* pSyncInfo) {
|
|||
pSyncNode->heartbeatTimerCounter = 0;
|
||||
|
||||
// init peer heartbeat timer
|
||||
for (int32_t i = 0; i < TSDB_MAX_REPLICA; ++i) {
|
||||
for (int32_t i = 0; i < TSDB_MAX_REPLICA + TSDB_MAX_LEARNER_REPLICA; ++i) {
|
||||
syncHbTimerInit(pSyncNode, &(pSyncNode->peerHeartbeatTimerArr[i]), (pSyncNode->replicasId)[i]);
|
||||
}
|
||||
|
||||
|
@ -949,7 +990,7 @@ SSyncNode* syncNodeOpen(SSyncInfo* pSyncInfo) {
|
|||
pSyncNode->restoreFinish = false;
|
||||
|
||||
// snapshot senders
|
||||
for (int32_t i = 0; i < TSDB_MAX_REPLICA; ++i) {
|
||||
for (int32_t i = 0; i < TSDB_MAX_REPLICA + TSDB_MAX_LEARNER_REPLICA; ++i) {
|
||||
SSyncSnapshotSender* pSender = snapshotSenderCreate(pSyncNode, i);
|
||||
if (pSender == NULL) return NULL;
|
||||
|
||||
|
@ -1059,14 +1100,19 @@ int32_t syncNodeRestore(SSyncNode* pSyncNode) {
|
|||
|
||||
int32_t syncNodeStart(SSyncNode* pSyncNode) {
|
||||
// start raft
|
||||
if (pSyncNode->replicaNum == 1) {
|
||||
raftStoreNextTerm(pSyncNode);
|
||||
syncNodeBecomeLeader(pSyncNode, "one replica start");
|
||||
if(pSyncNode->raftCfg.cfg.nodeInfo[pSyncNode->raftCfg.cfg.myIndex].nodeRole == TAOS_SYNC_ROLE_LEARNER){
|
||||
syncNodeBecomeLearner(pSyncNode, "first start");
|
||||
}
|
||||
else{
|
||||
if (pSyncNode->replicaNum == 1) {
|
||||
raftStoreNextTerm(pSyncNode);
|
||||
syncNodeBecomeLeader(pSyncNode, "one replica start");
|
||||
|
||||
// Raft 3.6.2 Committing entries from previous terms
|
||||
syncNodeAppendNoop(pSyncNode);
|
||||
} else {
|
||||
syncNodeBecomeFollower(pSyncNode, "first start");
|
||||
// Raft 3.6.2 Committing entries from previous terms
|
||||
syncNodeAppendNoop(pSyncNode);
|
||||
} else {
|
||||
syncNodeBecomeFollower(pSyncNode, "first start");
|
||||
}
|
||||
}
|
||||
|
||||
int32_t ret = 0;
|
||||
|
@ -1157,7 +1203,7 @@ void syncNodeClose(SSyncNode* pSyncNode) {
|
|||
syncLogBufferDestroy(pSyncNode->pLogBuf);
|
||||
pSyncNode->pLogBuf = NULL;
|
||||
|
||||
for (int32_t i = 0; i < TSDB_MAX_REPLICA; ++i) {
|
||||
for (int32_t i = 0; i < TSDB_MAX_REPLICA + TSDB_MAX_LEARNER_REPLICA; ++i) {
|
||||
if (pSyncNode->senders[i] != NULL) {
|
||||
sDebug("vgId:%d, snapshot sender destroy while close, data:%p", pSyncNode->vgId, pSyncNode->senders[i]);
|
||||
|
||||
|
@ -1350,7 +1396,7 @@ inline bool syncNodeInConfig(SSyncNode* pNode, const SSyncCfg* pCfg) {
|
|||
bool b1 = false;
|
||||
bool b2 = false;
|
||||
|
||||
for (int32_t i = 0; i < pCfg->replicaNum; ++i) {
|
||||
for (int32_t i = 0; i < pCfg->totalReplicaNum; ++i) {
|
||||
if (strcmp(pCfg->nodeInfo[i].nodeFqdn, pNode->myNodeInfo.nodeFqdn) == 0 &&
|
||||
pCfg->nodeInfo[i].nodePort == pNode->myNodeInfo.nodePort) {
|
||||
b1 = true;
|
||||
|
@ -1358,7 +1404,7 @@ inline bool syncNodeInConfig(SSyncNode* pNode, const SSyncCfg* pCfg) {
|
|||
}
|
||||
}
|
||||
|
||||
for (int32_t i = 0; i < pCfg->replicaNum; ++i) {
|
||||
for (int32_t i = 0; i < pCfg->totalReplicaNum; ++i) {
|
||||
SRaftId raftId = {
|
||||
.addr = SYNC_ADDR(&pCfg->nodeInfo[i]),
|
||||
.vgId = pNode->vgId,
|
||||
|
@ -1375,13 +1421,14 @@ inline bool syncNodeInConfig(SSyncNode* pNode, const SSyncCfg* pCfg) {
|
|||
}
|
||||
|
||||
static bool syncIsConfigChanged(const SSyncCfg* pOldCfg, const SSyncCfg* pNewCfg) {
|
||||
if (pOldCfg->replicaNum != pNewCfg->replicaNum) return true;
|
||||
if (pOldCfg->totalReplicaNum != pNewCfg->totalReplicaNum) return true;
|
||||
if (pOldCfg->myIndex != pNewCfg->myIndex) return true;
|
||||
for (int32_t i = 0; i < pOldCfg->replicaNum; ++i) {
|
||||
for (int32_t i = 0; i < pOldCfg->totalReplicaNum; ++i) {
|
||||
const SNodeInfo* pOldInfo = &pOldCfg->nodeInfo[i];
|
||||
const SNodeInfo* pNewInfo = &pNewCfg->nodeInfo[i];
|
||||
if (strcmp(pOldInfo->nodeFqdn, pNewInfo->nodeFqdn) != 0) return true;
|
||||
if (pOldInfo->nodePort != pNewInfo->nodePort) return true;
|
||||
if(pOldInfo->nodeRole != pNewInfo->nodeRole) return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
|
@ -1418,8 +1465,10 @@ void syncNodeDoConfigChange(SSyncNode* pSyncNode, SSyncCfg* pNewConfig, SyncInde
|
|||
}
|
||||
|
||||
// log begin config change
|
||||
sNInfo(pSyncNode, "begin do config change, from %d to %d, replicas:%d", pSyncNode->vgId, oldConfig.replicaNum,
|
||||
pNewConfig->replicaNum);
|
||||
sNInfo(pSyncNode, "begin do config change, from %d to %d, from %" PRId64 " to %" PRId64 ", replicas:%d",
|
||||
pSyncNode->vgId,
|
||||
oldConfig.totalReplicaNum, pNewConfig->totalReplicaNum,
|
||||
oldConfig.lastIndex, pNewConfig->lastIndex);
|
||||
|
||||
if (IamInNew) {
|
||||
pSyncNode->raftCfg.isStandBy = 0; // change isStandBy to normal
|
||||
|
@ -1436,11 +1485,10 @@ void syncNodeDoConfigChange(SSyncNode* pSyncNode, SSyncCfg* pNewConfig, SyncInde
|
|||
int32_t ret = 0;
|
||||
|
||||
// save snapshot senders
|
||||
int32_t oldReplicaNum = pSyncNode->replicaNum;
|
||||
SRaftId oldReplicasId[TSDB_MAX_REPLICA];
|
||||
SRaftId oldReplicasId[TSDB_MAX_REPLICA + TSDB_MAX_LEARNER_REPLICA];
|
||||
memcpy(oldReplicasId, pSyncNode->replicasId, sizeof(oldReplicasId));
|
||||
SSyncSnapshotSender* oldSenders[TSDB_MAX_REPLICA];
|
||||
for (int32_t i = 0; i < TSDB_MAX_REPLICA; ++i) {
|
||||
SSyncSnapshotSender* oldSenders[TSDB_MAX_REPLICA + TSDB_MAX_LEARNER_REPLICA];
|
||||
for (int32_t i = 0; i < TSDB_MAX_REPLICA + TSDB_MAX_LEARNER_REPLICA; ++i) {
|
||||
oldSenders[i] = pSyncNode->senders[i];
|
||||
sSTrace(oldSenders[i], "snapshot sender save old");
|
||||
}
|
||||
|
@ -1450,9 +1498,9 @@ void syncNodeDoConfigChange(SSyncNode* pSyncNode, SSyncCfg* pNewConfig, SyncInde
|
|||
syncUtilNodeInfo2RaftId(&pSyncNode->myNodeInfo, pSyncNode->vgId, &pSyncNode->myRaftId);
|
||||
|
||||
// init peersNum, peers, peersId
|
||||
pSyncNode->peersNum = pSyncNode->raftCfg.cfg.replicaNum - 1;
|
||||
pSyncNode->peersNum = pSyncNode->raftCfg.cfg.totalReplicaNum - 1;
|
||||
int32_t j = 0;
|
||||
for (int32_t i = 0; i < pSyncNode->raftCfg.cfg.replicaNum; ++i) {
|
||||
for (int32_t i = 0; i < pSyncNode->raftCfg.cfg.totalReplicaNum; ++i) {
|
||||
if (i != pSyncNode->raftCfg.cfg.myIndex) {
|
||||
pSyncNode->peersNodeInfo[j] = pSyncNode->raftCfg.cfg.nodeInfo[i];
|
||||
syncUtilNodeInfo2EpSet(&pSyncNode->peersNodeInfo[j], &pSyncNode->peersEpset[j]);
|
||||
|
@ -1465,7 +1513,8 @@ void syncNodeDoConfigChange(SSyncNode* pSyncNode, SSyncCfg* pNewConfig, SyncInde
|
|||
|
||||
// init replicaNum, replicasId
|
||||
pSyncNode->replicaNum = pSyncNode->raftCfg.cfg.replicaNum;
|
||||
for (int32_t i = 0; i < pSyncNode->raftCfg.cfg.replicaNum; ++i) {
|
||||
pSyncNode->totalReplicaNum = pSyncNode->raftCfg.cfg.totalReplicaNum;
|
||||
for (int32_t i = 0; i < pSyncNode->raftCfg.cfg.totalReplicaNum; ++i) {
|
||||
syncUtilNodeInfo2RaftId(&pSyncNode->raftCfg.cfg.nodeInfo[i], pSyncNode->vgId, &pSyncNode->replicasId[i]);
|
||||
}
|
||||
|
||||
|
@ -1480,15 +1529,15 @@ void syncNodeDoConfigChange(SSyncNode* pSyncNode, SSyncCfg* pNewConfig, SyncInde
|
|||
// reset snapshot senders
|
||||
|
||||
// clear new
|
||||
for (int32_t i = 0; i < TSDB_MAX_REPLICA; ++i) {
|
||||
for (int32_t i = 0; i < TSDB_MAX_REPLICA + TSDB_MAX_LEARNER_REPLICA; ++i) {
|
||||
pSyncNode->senders[i] = NULL;
|
||||
}
|
||||
|
||||
// reset new
|
||||
for (int32_t i = 0; i < pSyncNode->replicaNum; ++i) {
|
||||
for (int32_t i = 0; i < pSyncNode->totalReplicaNum; ++i) {
|
||||
// reset sender
|
||||
bool reset = false;
|
||||
for (int32_t j = 0; j < TSDB_MAX_REPLICA; ++j) {
|
||||
for (int32_t j = 0; j < TSDB_MAX_REPLICA + TSDB_MAX_LEARNER_REPLICA; ++j) {
|
||||
if (syncUtilSameId(&(pSyncNode->replicasId)[i], &oldReplicasId[j]) && oldSenders[j] != NULL) {
|
||||
sNTrace(pSyncNode, "snapshot sender reset for:%" PRId64 ", newIndex:%d, dnode:%d, %p",
|
||||
(pSyncNode->replicasId)[i].addr, i, DID(&pSyncNode->replicasId[i]), oldSenders[j]);
|
||||
|
@ -1510,7 +1559,7 @@ void syncNodeDoConfigChange(SSyncNode* pSyncNode, SSyncCfg* pNewConfig, SyncInde
|
|||
}
|
||||
|
||||
// create new
|
||||
for (int32_t i = 0; i < TSDB_MAX_REPLICA; ++i) {
|
||||
for (int32_t i = 0; i < TSDB_MAX_REPLICA + TSDB_MAX_LEARNER_REPLICA; ++i) {
|
||||
if (pSyncNode->senders[i] == NULL) {
|
||||
pSyncNode->senders[i] = snapshotSenderCreate(pSyncNode, i);
|
||||
if (pSyncNode->senders[i] == NULL) {
|
||||
|
@ -1525,7 +1574,7 @@ void syncNodeDoConfigChange(SSyncNode* pSyncNode, SSyncCfg* pNewConfig, SyncInde
|
|||
}
|
||||
|
||||
// free old
|
||||
for (int32_t i = 0; i < TSDB_MAX_REPLICA; ++i) {
|
||||
for (int32_t i = 0; i < TSDB_MAX_REPLICA + TSDB_MAX_LEARNER_REPLICA; ++i) {
|
||||
if (oldSenders[i] != NULL) {
|
||||
sSDebug(oldSenders[i], "snapshot sender destroy old, data:%p replica-index:%d", oldSenders[i], i);
|
||||
snapshotSenderDestroy(oldSenders[i]);
|
||||
|
@ -1550,12 +1599,12 @@ void syncNodeDoConfigChange(SSyncNode* pSyncNode, SSyncCfg* pNewConfig, SyncInde
|
|||
} else {
|
||||
// persist cfg
|
||||
syncWriteCfgFile(pSyncNode);
|
||||
sNInfo(pSyncNode, "do not config change from %d to %d", oldConfig.replicaNum, pNewConfig->replicaNum);
|
||||
sNInfo(pSyncNode, "do not config change from %d to %d", oldConfig.totalReplicaNum, pNewConfig->totalReplicaNum);
|
||||
}
|
||||
|
||||
_END:
|
||||
// log end config change
|
||||
sNInfo(pSyncNode, "end do config change, from %d to %d", oldConfig.replicaNum, pNewConfig->replicaNum);
|
||||
sNInfo(pSyncNode, "end do config change, from %d to %d", oldConfig.totalReplicaNum, pNewConfig->totalReplicaNum);
|
||||
}
|
||||
|
||||
// raft state change --------------
|
||||
|
@ -1635,6 +1684,27 @@ void syncNodeBecomeFollower(SSyncNode* pSyncNode, const char* debugStr) {
|
|||
syncNodeResetElectTimer(pSyncNode);
|
||||
}
|
||||
|
||||
void syncNodeBecomeLearner(SSyncNode* pSyncNode, const char* debugStr) {
|
||||
pSyncNode->hbSlowNum = 0;
|
||||
|
||||
// state change
|
||||
pSyncNode->state = TAOS_SYNC_STATE_LEARNER;
|
||||
|
||||
// trace log
|
||||
sNTrace(pSyncNode, "become learner %s", debugStr);
|
||||
|
||||
// call back
|
||||
if (pSyncNode->pFsm != NULL && pSyncNode->pFsm->FpBecomeLearnerCb != NULL) {
|
||||
pSyncNode->pFsm->FpBecomeLearnerCb(pSyncNode->pFsm);
|
||||
}
|
||||
|
||||
// min match index
|
||||
pSyncNode->minMatchIndex = SYNC_INDEX_INVALID;
|
||||
|
||||
// reset log buffer
|
||||
syncLogBufferReset(pSyncNode->pLogBuf, pSyncNode);
|
||||
}
|
||||
|
||||
// TLA+ Spec
|
||||
// \* Candidate i transitions to leader.
|
||||
// BecomeLeader(i) ==
|
||||
|
@ -1752,7 +1822,7 @@ void syncNodeCandidate2Leader(SSyncNode* pSyncNode) {
|
|||
bool syncNodeIsMnode(SSyncNode* pSyncNode) { return (pSyncNode->vgId == 1); }
|
||||
|
||||
int32_t syncNodePeerStateInit(SSyncNode* pSyncNode) {
|
||||
for (int32_t i = 0; i < TSDB_MAX_REPLICA; ++i) {
|
||||
for (int32_t i = 0; i < TSDB_MAX_REPLICA + TSDB_MAX_LEARNER_REPLICA; ++i) {
|
||||
pSyncNode->peerStates[i].lastSendIndex = SYNC_INDEX_INVALID;
|
||||
pSyncNode->peerStates[i].lastSendTime = 0;
|
||||
}
|
||||
|
@ -2039,7 +2109,7 @@ static void syncNodeEqHeartbeatTimer(void* param, void* tmrId) {
|
|||
if (!syncIsInit()) return;
|
||||
|
||||
SSyncNode* pNode = param;
|
||||
if (pNode->replicaNum > 1) {
|
||||
if (pNode->totalReplicaNum > 1) {
|
||||
if (atomic_load_64(&pNode->heartbeatTimerLogicClockUser) <= atomic_load_64(&pNode->heartbeatTimerLogicClock)) {
|
||||
SRpcMsg rpcMsg = {0};
|
||||
int32_t code = syncBuildTimeout(&rpcMsg, SYNC_TIMEOUT_HEARTBEAT, atomic_load_64(&pNode->heartbeatTimerLogicClock),
|
||||
|
@ -2074,7 +2144,7 @@ static void syncNodeEqPeerHeartbeatTimer(void* param, void* tmrId) {
|
|||
|
||||
SSyncHbTimerData* pData = syncHbTimerDataAcquire(hbDataRid);
|
||||
if (pData == NULL) {
|
||||
sError("hb timer get pData NULL, %" PRId64, hbDataRid);
|
||||
sError("hb timer get pData NULL, rid:%" PRId64 " addr:%" PRId64, hbDataRid, pData->destId.addr);
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -2101,9 +2171,9 @@ static void syncNodeEqPeerHeartbeatTimer(void* param, void* tmrId) {
|
|||
return;
|
||||
}
|
||||
|
||||
// sTrace("vgId:%d, eq peer hb timer", pSyncNode->vgId);
|
||||
sTrace("vgId:%d, eq peer hb timer, rid:%" PRId64 " addr:%" PRId64, pSyncNode->vgId, hbDataRid, pData->destId.addr);
|
||||
|
||||
if (pSyncNode->replicaNum > 1) {
|
||||
if (pSyncNode->totalReplicaNum > 1) {
|
||||
int64_t timerLogicClock = atomic_load_64(&pSyncTimer->logicClock);
|
||||
int64_t msgLogicClock = atomic_load_64(&pData->logicClock);
|
||||
|
||||
|
@ -2130,6 +2200,7 @@ static void syncNodeEqPeerHeartbeatTimer(void* param, void* tmrId) {
|
|||
pSyncTimer->timeStamp = tsNow;
|
||||
|
||||
// send msg
|
||||
sTrace("vgId:%d, send heartbeat to dnode:%d", pSyncNode->vgId, DID(&(pSyncMsg->destId)));
|
||||
syncLogSendHeartbeat(pSyncNode, pSyncMsg, false, timerElapsed, pData->execTime);
|
||||
syncNodeSendHeartbeat(pSyncNode, &pSyncMsg->destId, &rpcMsg);
|
||||
} else {
|
||||
|
@ -2212,7 +2283,7 @@ int32_t syncNodeAppend(SSyncNode* ths, SSyncRaftEntry* pEntry) {
|
|||
}
|
||||
|
||||
bool syncNodeHeartbeatReplyTimeout(SSyncNode* pSyncNode) {
|
||||
if (pSyncNode->replicaNum == 1) {
|
||||
if (pSyncNode->totalReplicaNum == 1) {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -2237,7 +2308,7 @@ bool syncNodeHeartbeatReplyTimeout(SSyncNode* pSyncNode) {
|
|||
bool syncNodeSnapshotSending(SSyncNode* pSyncNode) {
|
||||
if (pSyncNode == NULL) return false;
|
||||
bool b = false;
|
||||
for (int32_t i = 0; i < pSyncNode->replicaNum; ++i) {
|
||||
for (int32_t i = 0; i < pSyncNode->totalReplicaNum; ++i) {
|
||||
if (pSyncNode->senders[i] != NULL && pSyncNode->senders[i]->start) {
|
||||
b = true;
|
||||
break;
|
||||
|
@ -2328,13 +2399,17 @@ int32_t syncNodeOnHeartbeat(SSyncNode* ths, const SRpcMsg* pRpcMsg) {
|
|||
pMsgReply->startTime = ths->startTime;
|
||||
pMsgReply->timeStamp = tsMs;
|
||||
|
||||
sTrace(
|
||||
"vgId:%d, heartbeat msg from dnode:%d, cluster:%d, Msgterm:%" PRId64 " currentTerm:%" PRId64,
|
||||
ths->vgId, DID(&(pMsg->srcId)), CID(&(pMsg->srcId)), pMsg->term, currentTerm);
|
||||
|
||||
if (pMsg->term == currentTerm && ths->state != TAOS_SYNC_STATE_LEADER) {
|
||||
syncIndexMgrSetRecvTime(ths->pNextIndex, &(pMsg->srcId), tsMs);
|
||||
resetElect = true;
|
||||
|
||||
ths->minMatchIndex = pMsg->minMatchIndex;
|
||||
|
||||
if (ths->state == TAOS_SYNC_STATE_FOLLOWER) {
|
||||
if (ths->state == TAOS_SYNC_STATE_FOLLOWER || ths->state == TAOS_SYNC_STATE_LEARNER) {
|
||||
SRpcMsg rpcMsgLocalCmd = {0};
|
||||
(void)syncBuildLocalCmd(&rpcMsgLocalCmd, ths->vgId);
|
||||
|
||||
|
@ -2356,7 +2431,7 @@ int32_t syncNodeOnHeartbeat(SSyncNode* ths, const SRpcMsg* pRpcMsg) {
|
|||
}
|
||||
}
|
||||
|
||||
if (pMsg->term >= currentTerm && ths->state != TAOS_SYNC_STATE_FOLLOWER) {
|
||||
if (pMsg->term >= currentTerm && ths->state == TAOS_SYNC_STATE_LEADER) {
|
||||
SRpcMsg rpcMsgLocalCmd = {0};
|
||||
(void)syncBuildLocalCmd(&rpcMsgLocalCmd, ths->vgId);
|
||||
|
||||
|
@ -2376,6 +2451,26 @@ int32_t syncNodeOnHeartbeat(SSyncNode* ths, const SRpcMsg* pRpcMsg) {
|
|||
}
|
||||
}
|
||||
|
||||
if (pMsg->term >= currentTerm && ths->state == TAOS_SYNC_STATE_LEARNER) {
|
||||
SRpcMsg rpcMsgLocalCmd = {0};
|
||||
(void)syncBuildLocalCmd(&rpcMsgLocalCmd, ths->vgId);
|
||||
|
||||
SyncLocalCmd* pSyncMsg = rpcMsgLocalCmd.pCont;
|
||||
pSyncMsg->cmd = SYNC_LOCAL_CMD_LEARNER_CMT;
|
||||
pSyncMsg->currentTerm = pMsg->term;
|
||||
pSyncMsg->commitIndex = pMsg->commitIndex;
|
||||
|
||||
if (ths->syncEqMsg != NULL && ths->msgcb != NULL) {
|
||||
int32_t code = ths->syncEqMsg(ths->msgcb, &rpcMsgLocalCmd);
|
||||
if (code != 0) {
|
||||
sError("vgId:%d, sync enqueue step-down msg error, code:%d", ths->vgId, code);
|
||||
rpcFreeCont(rpcMsgLocalCmd.pCont);
|
||||
} else {
|
||||
sTrace("vgId:%d, sync enqueue step-down msg, new-term:%" PRId64, ths->vgId, pSyncMsg->currentTerm);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// reply
|
||||
syncNodeSendMsgById(&pMsgReply->destId, ths, &rpcMsg);
|
||||
|
||||
|
@ -2439,7 +2534,20 @@ int32_t syncNodeOnLocalCmd(SSyncNode* ths, const SRpcMsg* pRpcMsg) {
|
|||
sError("vgId:%d, failed to commit raft log since %s. commit index:%" PRId64 "", ths->vgId, terrstr(),
|
||||
ths->commitIndex);
|
||||
}
|
||||
} else {
|
||||
} else if (pMsg->cmd == SYNC_LOCAL_CMD_LEARNER_CMT){
|
||||
if (syncLogBufferIsEmpty(ths->pLogBuf)) {
|
||||
sError("vgId:%d, sync log buffer is empty.", ths->vgId);
|
||||
return 0;
|
||||
}
|
||||
raftStoreSetTerm(ths, pMsg->currentTerm);
|
||||
(void)syncNodeUpdateCommitIndex(ths, pMsg->commitIndex);
|
||||
sTrace("vgId:%d, start to commit raft log in heartbeat. commit index:%" PRId64 "", ths->vgId, ths->commitIndex);
|
||||
if (syncLogBufferCommit(ths->pLogBuf, ths, ths->commitIndex) < 0) {
|
||||
sError("vgId:%d, failed to commit raft log since %s. commit index:%" PRId64 "", ths->vgId, terrstr(),
|
||||
ths->commitIndex);
|
||||
}
|
||||
}
|
||||
else {
|
||||
sError("error local cmd");
|
||||
}
|
||||
|
||||
|
@ -2502,13 +2610,15 @@ const char* syncStr(ESyncState state) {
|
|||
return "error";
|
||||
case TAOS_SYNC_STATE_OFFLINE:
|
||||
return "offline";
|
||||
case TAOS_SYNC_STATE_LEARNER:
|
||||
return "learner";
|
||||
default:
|
||||
return "unknown";
|
||||
}
|
||||
}
|
||||
|
||||
int32_t syncNodeUpdateNewConfigIndex(SSyncNode* ths, SSyncCfg* pNewCfg) {
|
||||
for (int32_t i = 0; i < pNewCfg->replicaNum; ++i) {
|
||||
for (int32_t i = 0; i < pNewCfg->totalReplicaNum; ++i) {
|
||||
SRaftId raftId = {
|
||||
.addr = SYNC_ADDR(&pNewCfg->nodeInfo[i]),
|
||||
.vgId = ths->vgId,
|
||||
|
@ -2528,7 +2638,7 @@ bool syncNodeIsOptimizedOneReplica(SSyncNode* ths, SRpcMsg* pMsg) {
|
|||
}
|
||||
|
||||
bool syncNodeInRaftGroup(SSyncNode* ths, SRaftId* pRaftId) {
|
||||
for (int32_t i = 0; i < ths->replicaNum; ++i) {
|
||||
for (int32_t i = 0; i < ths->totalReplicaNum; ++i) {
|
||||
if (syncUtilSameId(&((ths->replicasId)[i]), pRaftId)) {
|
||||
return true;
|
||||
}
|
||||
|
@ -2538,7 +2648,7 @@ bool syncNodeInRaftGroup(SSyncNode* ths, SRaftId* pRaftId) {
|
|||
|
||||
SSyncSnapshotSender* syncNodeGetSnapshotSender(SSyncNode* ths, SRaftId* pDestId) {
|
||||
SSyncSnapshotSender* pSender = NULL;
|
||||
for (int32_t i = 0; i < ths->replicaNum; ++i) {
|
||||
for (int32_t i = 0; i < ths->totalReplicaNum; ++i) {
|
||||
if (syncUtilSameId(pDestId, &((ths->replicasId)[i]))) {
|
||||
pSender = (ths->senders)[i];
|
||||
}
|
||||
|
@ -2548,7 +2658,7 @@ SSyncSnapshotSender* syncNodeGetSnapshotSender(SSyncNode* ths, SRaftId* pDestId)
|
|||
|
||||
SSyncTimer* syncNodeGetHbTimer(SSyncNode* ths, SRaftId* pDestId) {
|
||||
SSyncTimer* pTimer = NULL;
|
||||
for (int32_t i = 0; i < ths->replicaNum; ++i) {
|
||||
for (int32_t i = 0; i < ths->totalReplicaNum; ++i) {
|
||||
if (syncUtilSameId(pDestId, &((ths->replicasId)[i]))) {
|
||||
pTimer = &((ths->peerHeartbeatTimerArr)[i]);
|
||||
}
|
||||
|
@ -2558,7 +2668,7 @@ SSyncTimer* syncNodeGetHbTimer(SSyncNode* ths, SRaftId* pDestId) {
|
|||
|
||||
SPeerState* syncNodeGetPeerState(SSyncNode* ths, const SRaftId* pDestId) {
|
||||
SPeerState* pState = NULL;
|
||||
for (int32_t i = 0; i < ths->replicaNum; ++i) {
|
||||
for (int32_t i = 0; i < ths->totalReplicaNum; ++i) {
|
||||
if (syncUtilSameId(pDestId, &((ths->replicasId)[i]))) {
|
||||
pState = &((ths->peerStates)[i]);
|
||||
}
|
||||
|
|
|
@ -242,6 +242,8 @@ int32_t syncLogBufferInitWithoutLock(SSyncLogBuffer* pBuf, SSyncNode* pNode) {
|
|||
// update startIndex
|
||||
pBuf->startIndex = takeDummy ? index : index + 1;
|
||||
|
||||
pBuf->isCatchup = false;
|
||||
|
||||
sInfo("vgId:%d, init sync log buffer. buffer: [%" PRId64 " %" PRId64 " %" PRId64 ", %" PRId64 ")", pNode->vgId,
|
||||
pBuf->startIndex, pBuf->commitIndex, pBuf->matchIndex, pBuf->endIndex);
|
||||
|
||||
|
@ -324,6 +326,15 @@ int32_t syncLogBufferAccept(SSyncLogBuffer* pBuf, SSyncNode* pNode, SSyncRaftEnt
|
|||
goto _out;
|
||||
}
|
||||
|
||||
if(pNode->raftCfg.cfg.nodeInfo[pNode->raftCfg.cfg.myIndex].nodeRole == TAOS_SYNC_ROLE_LEARNER &&
|
||||
index > 0 && index > pBuf->totalIndex){
|
||||
pBuf->totalIndex = index;
|
||||
sTrace("vgId:%d, update learner progress. index:%" PRId64 ", term:%" PRId64 ": prevterm:%" PRId64
|
||||
" != lastmatch:%" PRId64 ". log buffer: [%" PRId64 " %" PRId64 " %" PRId64 ", %" PRId64 ")",
|
||||
pNode->vgId, pEntry->index, pEntry->term, prevTerm, lastMatchTerm, pBuf->startIndex, pBuf->commitIndex,
|
||||
pBuf->matchIndex, pBuf->endIndex);
|
||||
}
|
||||
|
||||
if (index - pBuf->startIndex >= pBuf->size) {
|
||||
sWarn("vgId:%d, out of buffer range. index:%" PRId64 ", term:%" PRId64 ". log buffer: [%" PRId64 " %" PRId64
|
||||
" %" PRId64 ", %" PRId64 ")",
|
||||
|
@ -483,7 +494,7 @@ _out:
|
|||
}
|
||||
|
||||
int32_t syncFsmExecute(SSyncNode* pNode, SSyncFSM* pFsm, ESyncState role, SyncTerm term, SSyncRaftEntry* pEntry,
|
||||
int32_t applyCode) {
|
||||
int32_t applyCode) {
|
||||
if (pNode->replicaNum == 1 && pNode->restoreFinish && pNode->vgId != 1) {
|
||||
return 0;
|
||||
}
|
||||
|
@ -957,7 +968,7 @@ void syncLogReplDestroy(SSyncLogReplMgr* pMgr) {
|
|||
}
|
||||
|
||||
int32_t syncNodeLogReplInit(SSyncNode* pNode) {
|
||||
for (int i = 0; i < TSDB_MAX_REPLICA; i++) {
|
||||
for (int i = 0; i < TSDB_MAX_REPLICA + TSDB_MAX_LEARNER_REPLICA; i++) {
|
||||
ASSERT(pNode->logReplMgrs[i] == NULL);
|
||||
pNode->logReplMgrs[i] = syncLogReplCreate();
|
||||
if (pNode->logReplMgrs[i] == NULL) {
|
||||
|
@ -970,7 +981,7 @@ int32_t syncNodeLogReplInit(SSyncNode* pNode) {
|
|||
}
|
||||
|
||||
void syncNodeLogReplDestroy(SSyncNode* pNode) {
|
||||
for (int i = 0; i < TSDB_MAX_REPLICA; i++) {
|
||||
for (int i = 0; i < TSDB_MAX_REPLICA + TSDB_MAX_LEARNER_REPLICA; i++) {
|
||||
syncLogReplDestroy(pNode->logReplMgrs[i]);
|
||||
pNode->logReplMgrs[i] = NULL;
|
||||
}
|
||||
|
@ -1100,7 +1111,7 @@ int32_t syncLogBufferReset(SSyncLogBuffer* pBuf, SSyncNode* pNode) {
|
|||
pBuf->endIndex = pBuf->matchIndex + 1;
|
||||
|
||||
// reset repl mgr
|
||||
for (int i = 0; i < pNode->replicaNum; i++) {
|
||||
for (int i = 0; i < pNode->totalReplicaNum; i++) {
|
||||
SSyncLogReplMgr* pMgr = pNode->logReplMgrs[i];
|
||||
syncLogReplReset(pMgr);
|
||||
}
|
||||
|
|
|
@ -18,21 +18,45 @@
|
|||
#include "syncUtil.h"
|
||||
#include "tjson.h"
|
||||
|
||||
const char* syncRoleToStr(ESyncRole role) {
|
||||
switch (role) {
|
||||
case TAOS_SYNC_ROLE_VOTER:
|
||||
return "voter";
|
||||
case TAOS_SYNC_ROLE_LEARNER:
|
||||
return "learner";
|
||||
default:
|
||||
return "unknown";
|
||||
}
|
||||
}
|
||||
|
||||
const ESyncRole syncStrToRole(char* str) {
|
||||
if(strcmp(str, "voter") == 0){
|
||||
return TAOS_SYNC_ROLE_VOTER;
|
||||
}
|
||||
if(strcmp(str, "learner") == 0){
|
||||
return TAOS_SYNC_ROLE_LEARNER;
|
||||
}
|
||||
|
||||
return TAOS_SYNC_ROLE_ERROR;
|
||||
}
|
||||
|
||||
static int32_t syncEncodeSyncCfg(const void *pObj, SJson *pJson) {
|
||||
SSyncCfg *pCfg = (SSyncCfg *)pObj;
|
||||
if (tjsonAddDoubleToObject(pJson, "totalReplicaNum", pCfg->totalReplicaNum) < 0) return -1;
|
||||
if (tjsonAddDoubleToObject(pJson, "replicaNum", pCfg->replicaNum) < 0) return -1;
|
||||
if (tjsonAddDoubleToObject(pJson, "myIndex", pCfg->myIndex) < 0) return -1;
|
||||
|
||||
SJson *nodeInfo = tjsonCreateArray();
|
||||
if (nodeInfo == NULL) return -1;
|
||||
if (tjsonAddItemToObject(pJson, "nodeInfo", nodeInfo) < 0) return -1;
|
||||
for (int32_t i = 0; i < pCfg->replicaNum; ++i) {
|
||||
for (int32_t i = 0; i < pCfg->totalReplicaNum; ++i) {
|
||||
SJson *info = tjsonCreateObject();
|
||||
if (info == NULL) return -1;
|
||||
if (tjsonAddDoubleToObject(info, "nodePort", pCfg->nodeInfo[i].nodePort) < 0) return -1;
|
||||
if (tjsonAddStringToObject(info, "nodeFqdn", pCfg->nodeInfo[i].nodeFqdn) < 0) return -1;
|
||||
if (tjsonAddIntegerToObject(info, "nodeId", pCfg->nodeInfo[i].nodeId) < 0) return -1;
|
||||
if (tjsonAddIntegerToObject(info, "clusterId", pCfg->nodeInfo[i].clusterId) < 0) return -1;
|
||||
if (tjsonAddStringToObject(info, "nodeRole", syncRoleToStr(pCfg->nodeInfo[i].nodeRole)) < 0) return -1;
|
||||
if (tjsonAddItemToArray(nodeInfo, info) < 0) return -1;
|
||||
}
|
||||
|
||||
|
@ -90,7 +114,8 @@ int32_t syncWriteCfgFile(SSyncNode *pNode) {
|
|||
if (taosRenameFile(file, realfile) != 0) goto _OVER;
|
||||
|
||||
code = 0;
|
||||
sInfo("vgId:%d, succeed to write sync cfg file:%s, len:%d", pNode->vgId, realfile, len);
|
||||
sInfo("vgId:%d, succeed to write sync cfg file:%s, len:%d, lastConfigIndex:%" PRId64, pNode->vgId,
|
||||
realfile, len, pNode->raftCfg.lastConfigIndex);
|
||||
|
||||
_OVER:
|
||||
if (pJson != NULL) tjsonDelete(pJson);
|
||||
|
@ -108,6 +133,7 @@ static int32_t syncDecodeSyncCfg(const SJson *pJson, void *pObj) {
|
|||
SSyncCfg *pCfg = (SSyncCfg *)pObj;
|
||||
int32_t code = 0;
|
||||
|
||||
tjsonGetInt32ValueFromDouble(pJson, "totalReplicaNum", pCfg->totalReplicaNum, code);
|
||||
tjsonGetInt32ValueFromDouble(pJson, "replicaNum", pCfg->replicaNum, code);
|
||||
if (code < 0) return -1;
|
||||
tjsonGetInt32ValueFromDouble(pJson, "myIndex", pCfg->myIndex, code);
|
||||
|
@ -115,9 +141,9 @@ static int32_t syncDecodeSyncCfg(const SJson *pJson, void *pObj) {
|
|||
|
||||
SJson *nodeInfo = tjsonGetObjectItem(pJson, "nodeInfo");
|
||||
if (nodeInfo == NULL) return -1;
|
||||
pCfg->replicaNum = tjsonGetArraySize(nodeInfo);
|
||||
pCfg->totalReplicaNum = tjsonGetArraySize(nodeInfo);
|
||||
|
||||
for (int32_t i = 0; i < pCfg->replicaNum; ++i) {
|
||||
for (int32_t i = 0; i < pCfg->totalReplicaNum; ++i) {
|
||||
SJson *info = tjsonGetArrayItem(nodeInfo, i);
|
||||
if (info == NULL) return -1;
|
||||
tjsonGetUInt16ValueFromDouble(info, "nodePort", pCfg->nodeInfo[i].nodePort, code);
|
||||
|
@ -126,6 +152,10 @@ static int32_t syncDecodeSyncCfg(const SJson *pJson, void *pObj) {
|
|||
if (code < 0) return -1;
|
||||
tjsonGetNumberValue(info, "nodeId", pCfg->nodeInfo[i].nodeId, code);
|
||||
tjsonGetNumberValue(info, "clusterId", pCfg->nodeInfo[i].clusterId, code);
|
||||
char role[10];
|
||||
code = tjsonGetStringValue(info, "nodeRole", role);
|
||||
if(code < 0) return -1;
|
||||
pCfg->nodeInfo[i].nodeRole = syncStrToRole(role);
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
|
|
@ -66,10 +66,10 @@ int32_t syncNodeReplicate(SSyncNode* pNode) {
|
|||
}
|
||||
|
||||
int32_t syncNodeReplicateWithoutLock(SSyncNode* pNode) {
|
||||
if (pNode->state != TAOS_SYNC_STATE_LEADER || pNode->replicaNum == 1) {
|
||||
if (pNode->state != TAOS_SYNC_STATE_LEADER || pNode->raftCfg.cfg.totalReplicaNum == 1) {
|
||||
return -1;
|
||||
}
|
||||
for (int32_t i = 0; i < pNode->replicaNum; i++) {
|
||||
for (int32_t i = 0; i < pNode->totalReplicaNum; i++) {
|
||||
if (syncUtilSameId(&pNode->replicasId[i], &pNode->myRaftId)) {
|
||||
continue;
|
||||
}
|
||||
|
|
|
@ -795,13 +795,18 @@ int32_t syncNodeOnSnapshot(SSyncNode *pSyncNode, const SRpcMsg *pRpcMsg) {
|
|||
return -1;
|
||||
}
|
||||
|
||||
if (pMsg->term > raftStoreGetTerm(pSyncNode)) {
|
||||
syncNodeStepDown(pSyncNode, pMsg->term);
|
||||
if(pSyncNode->raftCfg.cfg.nodeInfo[pSyncNode->raftCfg.cfg.myIndex].nodeRole != TAOS_SYNC_ROLE_LEARNER){
|
||||
if (pMsg->term > raftStoreGetTerm(pSyncNode)) {
|
||||
syncNodeStepDown(pSyncNode, pMsg->term);
|
||||
}
|
||||
}
|
||||
else{
|
||||
syncNodeUpdateTermWithoutStepDown(pSyncNode, pMsg->term);
|
||||
}
|
||||
|
||||
// state, term, seq/ack
|
||||
int32_t code = 0;
|
||||
if (pSyncNode->state == TAOS_SYNC_STATE_FOLLOWER) {
|
||||
if (pSyncNode->state == TAOS_SYNC_STATE_FOLLOWER || pSyncNode->state == TAOS_SYNC_STATE_LEARNER) {
|
||||
if (pMsg->term == raftStoreGetTerm(pSyncNode)) {
|
||||
if (pMsg->seq == SYNC_SNAPSHOT_SEQ_PREP_SNAPSHOT) {
|
||||
syncLogRecvSyncSnapshotSend(pSyncNode, pMsg, "process seq pre-snapshot");
|
||||
|
|
|
@ -58,7 +58,7 @@ static void syncNodeCleanConfigIndex(SSyncNode* ths) {
|
|||
static int32_t syncNodeTimerRoutine(SSyncNode* ths) {
|
||||
ths->tmrRoutineNum++;
|
||||
|
||||
if (ths->tmrRoutineNum % 60 == 0 && ths->replicaNum > 1) {
|
||||
if (ths->tmrRoutineNum % 60 == 0 && ths->totalReplicaNum > 1) {
|
||||
sNInfo(ths, "timer routines");
|
||||
} else {
|
||||
sNTrace(ths, "timer routines");
|
||||
|
|
|
@ -30,7 +30,7 @@ SVotesGranted *voteGrantedCreate(SSyncNode *pNode) {
|
|||
return NULL;
|
||||
}
|
||||
|
||||
pVotesGranted->replicas = &pNode->replicasId;
|
||||
pVotesGranted->replicas = (void*)&pNode->replicasId;
|
||||
pVotesGranted->replicaNum = pNode->replicaNum;
|
||||
voteGrantedClearVotes(pVotesGranted);
|
||||
|
||||
|
@ -49,7 +49,7 @@ void voteGrantedDestroy(SVotesGranted *pVotesGranted) {
|
|||
}
|
||||
|
||||
void voteGrantedUpdate(SVotesGranted *pVotesGranted, SSyncNode *pNode) {
|
||||
pVotesGranted->replicas = &pNode->replicasId;
|
||||
pVotesGranted->replicas = (void*)&pNode->replicasId;
|
||||
pVotesGranted->replicaNum = pNode->replicaNum;
|
||||
voteGrantedClearVotes(pVotesGranted);
|
||||
|
||||
|
@ -115,7 +115,7 @@ SVotesRespond *votesRespondCreate(SSyncNode *pNode) {
|
|||
return NULL;
|
||||
}
|
||||
|
||||
pVotesRespond->replicas = &pNode->replicasId;
|
||||
pVotesRespond->replicas = (void*)&pNode->replicasId;
|
||||
pVotesRespond->replicaNum = pNode->replicaNum;
|
||||
pVotesRespond->term = 0;
|
||||
pVotesRespond->pNode = pNode;
|
||||
|
@ -130,7 +130,7 @@ void votesRespondDestory(SVotesRespond *pVotesRespond) {
|
|||
}
|
||||
|
||||
void votesRespondUpdate(SVotesRespond *pVotesRespond, SSyncNode *pNode) {
|
||||
pVotesRespond->replicas = &pNode->replicasId;
|
||||
pVotesRespond->replicas = (void*)&pNode->replicasId;
|
||||
pVotesRespond->replicaNum = pNode->replicaNum;
|
||||
pVotesRespond->term = 0;
|
||||
pVotesRespond->pNode = pNode;
|
||||
|
|
Loading…
Reference in New Issue