TD-1382
This commit is contained in:
parent
a0f01c7dfe
commit
fa5d57a49a
|
@ -35,8 +35,6 @@ int tsSyncTcpThreads = 2;
|
|||
int tsMaxWatchFiles = 500;
|
||||
int tsMaxFwdInfo = 200;
|
||||
int tsSyncTimer = 1;
|
||||
//int sDebugFlag = 135;
|
||||
//char tsArbitrator[TSDB_FQDN_LEN] = {0};
|
||||
|
||||
// module global, not configurable
|
||||
int tsSyncNum; // number of sync in process in whole system
|
||||
|
@ -164,9 +162,10 @@ void *syncStart(const SSyncInfo *pInfo) {
|
|||
for (int i = 0; i < pCfg->replica; ++i) {
|
||||
const SNodeInfo *pNodeInfo = pCfg->nodeInfo + i;
|
||||
pNode->peerInfo[i] = syncAddPeer(pNode, pNodeInfo);
|
||||
if ((strcmp(pNodeInfo->nodeFqdn, tsNodeFqdn) == 0) && (pNodeInfo->nodePort == tsSyncPort))
|
||||
if ((strcmp(pNodeInfo->nodeFqdn, tsNodeFqdn) == 0) && (pNodeInfo->nodePort == tsSyncPort)) {
|
||||
pNode->selfIndex = i;
|
||||
}
|
||||
}
|
||||
|
||||
if (pNode->selfIndex < 0) {
|
||||
sInfo("vgId:%d, this node is not configured", pNode->vgId);
|
||||
|
@ -198,8 +197,9 @@ void *syncStart(const SSyncInfo *pInfo) {
|
|||
syncAddNodeRef(pNode);
|
||||
taosHashPut(vgIdHash, (const char *)&pNode->vgId, sizeof(int32_t), (char *)(&pNode), sizeof(SSyncNode *));
|
||||
|
||||
if (pNode->notifyRole)
|
||||
if (pNode->notifyRole) {
|
||||
(*pNode->notifyRole)(pNode->ahandle, nodeRole);
|
||||
}
|
||||
|
||||
return pNode;
|
||||
}
|
||||
|
@ -234,8 +234,8 @@ int32_t syncReconfig(void *param, const SSyncCfg *pNewCfg) {
|
|||
int i, j;
|
||||
|
||||
if (pNode == NULL) return TSDB_CODE_SYN_INVALID_CONFIG;
|
||||
sInfo("vgId:%d, reconfig, role:%s replica:%d old:%d", pNode->vgId, syncRole[nodeRole],
|
||||
pNewCfg->replica, pNode->replica);
|
||||
sInfo("vgId:%d, reconfig, role:%s replica:%d old:%d", pNode->vgId, syncRole[nodeRole], pNewCfg->replica,
|
||||
pNode->replica);
|
||||
|
||||
pthread_mutex_lock(&(pNode->mutex));
|
||||
|
||||
|
@ -268,17 +268,19 @@ int32_t syncReconfig(void *param, const SSyncCfg *pNewCfg) {
|
|||
newPeers[i] = pNode->peerInfo[j];
|
||||
}
|
||||
|
||||
if ((strcmp(pNewNode->nodeFqdn, tsNodeFqdn) == 0) && (pNewNode->nodePort == tsSyncPort))
|
||||
if ((strcmp(pNewNode->nodeFqdn, tsNodeFqdn) == 0) && (pNewNode->nodePort == tsSyncPort)) {
|
||||
pNode->selfIndex = i;
|
||||
}
|
||||
}
|
||||
|
||||
pNode->replica = pNewCfg->replica;
|
||||
pNode->quorum = pNewCfg->quorum;
|
||||
if (pNode->quorum > pNode->replica) pNode->quorum = pNode->replica;
|
||||
memcpy(pNode->peerInfo, newPeers, sizeof(SSyncPeer *) * pNewCfg->replica);
|
||||
|
||||
for (i = pNewCfg->replica; i < TAOS_SYNC_MAX_REPLICA; ++i)
|
||||
for (i = pNewCfg->replica; i < TAOS_SYNC_MAX_REPLICA; ++i) {
|
||||
pNode->peerInfo[i] = NULL;
|
||||
}
|
||||
|
||||
syncAddArbitrator(pNode);
|
||||
|
||||
|
@ -290,7 +292,8 @@ int32_t syncReconfig(void *param, const SSyncCfg *pNewCfg) {
|
|||
|
||||
pthread_mutex_unlock(&(pNode->mutex));
|
||||
|
||||
sInfo("vgId:%d, %d replicas are configured, quorum:%d role:%s", pNode->vgId, pNode->replica, pNode->quorum, syncRole[nodeRole]);
|
||||
sInfo("vgId:%d, %d replicas are configured, quorum:%d role:%s", pNode->vgId, pNode->replica, pNode->quorum,
|
||||
syncRole[nodeRole]);
|
||||
syncBroadcastStatus(pNode);
|
||||
|
||||
return 0;
|
||||
|
@ -439,13 +442,11 @@ static void syncAddArbitrator(SSyncNode *pNode) {
|
|||
pNode->peerInfo[TAOS_SYNC_MAX_REPLICA] = syncAddPeer(pNode, &nodeInfo);
|
||||
}
|
||||
|
||||
static void syncAddNodeRef(SSyncNode *pNode)
|
||||
{
|
||||
static void syncAddNodeRef(SSyncNode *pNode) {
|
||||
atomic_add_fetch_8(&pNode->refCount, 1);
|
||||
}
|
||||
|
||||
static void syncDecNodeRef(SSyncNode *pNode)
|
||||
{
|
||||
static void syncDecNodeRef(SSyncNode *pNode) {
|
||||
if (atomic_sub_fetch_8(&pNode->refCount, 1) == 0) {
|
||||
pthread_mutex_destroy(&pNode->mutex);
|
||||
taosTFree(pNode->pRecv);
|
||||
|
@ -547,18 +548,20 @@ static void syncChooseMaster(SSyncNode *pNode) {
|
|||
sDebug("vgId:%d, choose master", pNode->vgId);
|
||||
|
||||
for (int i = 0; i < pNode->replica; ++i) {
|
||||
if (pNode->peerInfo[i]->role != TAOS_SYNC_ROLE_OFFLINE)
|
||||
if (pNode->peerInfo[i]->role != TAOS_SYNC_ROLE_OFFLINE) {
|
||||
onlineNum++;
|
||||
}
|
||||
}
|
||||
|
||||
if (onlineNum == pNode->replica) {
|
||||
// if all peers are online, peer with highest version shall be master
|
||||
index = 0;
|
||||
for (int i = 1; i < pNode->replica; ++i) {
|
||||
if (pNode->peerInfo[i]->version > pNode->peerInfo[index]->version)
|
||||
if (pNode->peerInfo[i]->version > pNode->peerInfo[index]->version) {
|
||||
index = i;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// add arbitrator connection
|
||||
SSyncPeer *pArb = pNode->peerInfo[TAOS_SYNC_MAX_REPLICA];
|
||||
|
@ -573,11 +576,12 @@ static void syncChooseMaster(SSyncNode *pNode) {
|
|||
//slave with highest version shall be master
|
||||
pPeer = pNode->peerInfo[i];
|
||||
if (pPeer->role == TAOS_SYNC_ROLE_SLAVE || pPeer->role == TAOS_SYNC_ROLE_MASTER) {
|
||||
if (index < 0 || pPeer->version > pNode->peerInfo[index]->version)
|
||||
if (index < 0 || pPeer->version > pNode->peerInfo[index]->version) {
|
||||
index = i;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (index >= 0) {
|
||||
if (index == pNode->selfIndex) {
|
||||
|
@ -600,9 +604,10 @@ static SSyncPeer *syncCheckMaster(SSyncNode *pNode) {
|
|||
int replica = pNode->replica;
|
||||
|
||||
for (int i = 0; i < pNode->replica; ++i) {
|
||||
if (pNode->peerInfo[i]->role != TAOS_SYNC_ROLE_OFFLINE)
|
||||
if (pNode->peerInfo[i]->role != TAOS_SYNC_ROLE_OFFLINE) {
|
||||
onlineNum++;
|
||||
}
|
||||
}
|
||||
|
||||
// add arbitrator connection
|
||||
SSyncPeer *pArb = pNode->peerInfo[TAOS_SYNC_MAX_REPLICA];
|
||||
|
@ -666,8 +671,7 @@ static void syncCheckRole(SSyncPeer *pPeer, SPeerStatus peersStatus[], int8_t ne
|
|||
pNode->peerInfo[pNode->selfIndex]->version = nodeVersion;
|
||||
pPeer->role = newRole;
|
||||
|
||||
sDebug("%s, own role:%s, new peer role:%s", pPeer->id,
|
||||
syncRole[nodeRole], syncRole[pPeer->role]);
|
||||
sDebug("%s, own role:%s, new peer role:%s", pPeer->id, syncRole[nodeRole], syncRole[pPeer->role]);
|
||||
|
||||
SSyncPeer *pMaster = syncCheckMaster(pNode);
|
||||
|
||||
|
@ -704,20 +708,23 @@ static void syncCheckRole(SSyncPeer *pPeer, SPeerStatus peersStatus[], int8_t ne
|
|||
if (pNode->replica == 2) consistent = 1;
|
||||
}
|
||||
|
||||
if (consistent)
|
||||
if (consistent) {
|
||||
syncChooseMaster(pNode);
|
||||
}
|
||||
}
|
||||
|
||||
if (syncRequired) {
|
||||
syncRecoverFromMaster(pMaster);
|
||||
}
|
||||
|
||||
if (peerOldRole != newRole || nodeRole != selfOldRole)
|
||||
if (peerOldRole != newRole || nodeRole != selfOldRole) {
|
||||
syncBroadcastStatus(pNode);
|
||||
}
|
||||
|
||||
if (nodeRole != TAOS_SYNC_ROLE_MASTER)
|
||||
if (nodeRole != TAOS_SYNC_ROLE_MASTER) {
|
||||
syncResetFlowCtrl(pNode);
|
||||
}
|
||||
}
|
||||
|
||||
static void syncRestartPeer(SSyncPeer *pPeer) {
|
||||
sDebug("%s, restart connection", pPeer->id);
|
||||
|
@ -727,9 +734,10 @@ static void syncRestartPeer(SSyncPeer *pPeer) {
|
|||
pPeer->sstatus = TAOS_SYNC_STATUS_INIT;
|
||||
|
||||
int ret = strcmp(pPeer->fqdn, tsNodeFqdn);
|
||||
if (ret > 0 || (ret == 0 && pPeer->port > tsSyncPort))
|
||||
if (ret > 0 || (ret == 0 && pPeer->port > tsSyncPort)) {
|
||||
taosTmrReset(syncCheckPeerConnection, tsSyncTimer * 1000, pPeer, syncTmrCtrl, &pPeer->timer);
|
||||
}
|
||||
}
|
||||
|
||||
void syncRestartConnection(SSyncPeer *pPeer) {
|
||||
if (pPeer->ip == 0) return;
|
||||
|
@ -810,7 +818,7 @@ static void syncRecoverFromMaster(SSyncPeer *pPeer) {
|
|||
return;
|
||||
}
|
||||
|
||||
sDebug("%s, try to sync", pPeer->id)
|
||||
sDebug("%s, try to sync", pPeer->id);
|
||||
|
||||
SFirstPkt firstPkt;
|
||||
memset(&firstPkt, 0, sizeof(firstPkt));
|
||||
|
@ -827,8 +835,6 @@ static void syncRecoverFromMaster(SSyncPeer *pPeer) {
|
|||
nodeSStatus = TAOS_SYNC_STATUS_START;
|
||||
sInfo("%s, sync-req is sent", pPeer->id);
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
static void syncProcessFwdResponse(char *cont, SSyncPeer *pPeer) {
|
||||
|
@ -882,9 +888,10 @@ static void syncProcessPeersStatusMsg(char *cont, SSyncPeer *pPeer) {
|
|||
pPeer->version = pPeersStatus->version;
|
||||
syncCheckRole(pPeer, pPeersStatus->peersStatus, pPeersStatus->role);
|
||||
|
||||
if (pPeersStatus->ack)
|
||||
if (pPeersStatus->ack) {
|
||||
syncSendPeersStatusMsgToPeer(pPeer, 0);
|
||||
}
|
||||
}
|
||||
|
||||
static int syncReadPeerMsg(SSyncPeer *pPeer, SSyncHead *pHead, char *cont) {
|
||||
if (pPeer->peerFd < 0) return -1;
|
||||
|
@ -1070,8 +1077,7 @@ static void syncProcessIncommingConnection(int connFd, uint32_t sourceIp) {
|
|||
SSyncPeer *pPeer;
|
||||
for (i = 0; i < pNode->replica; ++i) {
|
||||
pPeer = pNode->peerInfo[i];
|
||||
if (pPeer && (strcmp(pPeer->fqdn, firstPkt.fqdn) == 0) && (pPeer->port == firstPkt.port))
|
||||
break;
|
||||
if (pPeer && (strcmp(pPeer->fqdn, firstPkt.fqdn) == 0) && (pPeer->port == firstPkt.port)) break;
|
||||
}
|
||||
|
||||
pPeer = (i < pNode->replica) ? pNode->peerInfo[i] : NULL;
|
||||
|
@ -1096,8 +1102,6 @@ static void syncProcessIncommingConnection(int connFd, uint32_t sourceIp) {
|
|||
}
|
||||
|
||||
pthread_mutex_unlock(&(pNode->mutex));
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
static void syncProcessBrokenLink(void *param) {
|
||||
|
@ -1128,8 +1132,10 @@ static void syncSaveFwdInfo(SSyncNode *pNode, uint64_t version, void *mhandle) {
|
|||
pSyncFwds->fwds--;
|
||||
}
|
||||
|
||||
if (pSyncFwds->fwds > 0)
|
||||
if (pSyncFwds->fwds > 0) {
|
||||
pSyncFwds->last = (pSyncFwds->last + 1) % tsMaxFwdInfo;
|
||||
}
|
||||
|
||||
SFwdInfo *pFwdInfo = pSyncFwds->fwdInfo + pSyncFwds->last;
|
||||
pFwdInfo->version = version;
|
||||
pFwdInfo->mhandle = mhandle;
|
||||
|
@ -1164,13 +1170,15 @@ static void syncProcessFwdAck(SSyncNode *pNode, SFwdInfo *pFwdInfo, int32_t code
|
|||
|
||||
if (code == 0) {
|
||||
pFwdInfo->acks++;
|
||||
if (pFwdInfo->acks >= pNode->quorum-1)
|
||||
if (pFwdInfo->acks >= pNode->quorum - 1) {
|
||||
confirm = 1;
|
||||
}
|
||||
} else {
|
||||
pFwdInfo->nacks++;
|
||||
if (pFwdInfo->nacks > pNode->replica-pNode->quorum)
|
||||
if (pFwdInfo->nacks > pNode->replica - pNode->quorum) {
|
||||
confirm = 1;
|
||||
}
|
||||
}
|
||||
|
||||
if (confirm && pFwdInfo->confirmed == 0) {
|
||||
sDebug("vgId:%d, forward is confirmed, ver:%" PRIu64 " code:%x", pNode->vgId, pFwdInfo->version, pFwdInfo->code);
|
||||
|
|
Loading…
Reference in New Issue