commit
11a5fe2719
|
@ -172,6 +172,7 @@ typedef struct SSyncNode {
|
||||||
// sync module global
|
// sync module global
|
||||||
extern int32_t tsSyncNum;
|
extern int32_t tsSyncNum;
|
||||||
extern char tsNodeFqdn[TSDB_FQDN_LEN];
|
extern char tsNodeFqdn[TSDB_FQDN_LEN];
|
||||||
|
extern char * syncStatus[];
|
||||||
|
|
||||||
void *syncRetrieveData(void *param);
|
void *syncRetrieveData(void *param);
|
||||||
void *syncRestoreData(void *param);
|
void *syncRestoreData(void *param);
|
||||||
|
|
|
@ -73,6 +73,14 @@ char* syncRole[] = {
|
||||||
"master"
|
"master"
|
||||||
};
|
};
|
||||||
|
|
||||||
|
char *syncStatus[] = {
|
||||||
|
"init",
|
||||||
|
"start",
|
||||||
|
"file",
|
||||||
|
"cache",
|
||||||
|
"invalid"
|
||||||
|
};
|
||||||
|
|
||||||
typedef enum {
|
typedef enum {
|
||||||
SYNC_STATUS_BROADCAST,
|
SYNC_STATUS_BROADCAST,
|
||||||
SYNC_STATUS_BROADCAST_RSP,
|
SYNC_STATUS_BROADCAST_RSP,
|
||||||
|
@ -282,7 +290,7 @@ void syncStop(int64_t rid) {
|
||||||
pPeer = pNode->peerInfo[TAOS_SYNC_MAX_REPLICA];
|
pPeer = pNode->peerInfo[TAOS_SYNC_MAX_REPLICA];
|
||||||
if (pPeer) syncRemovePeer(pPeer);
|
if (pPeer) syncRemovePeer(pPeer);
|
||||||
|
|
||||||
pthread_mutex_unlock(&(pNode->mutex));
|
pthread_mutex_unlock(&pNode->mutex);
|
||||||
|
|
||||||
taosReleaseRef(tsSyncRefId, rid);
|
taosReleaseRef(tsSyncRefId, rid);
|
||||||
taosRemoveRef(tsSyncRefId, rid);
|
taosRemoveRef(tsSyncRefId, rid);
|
||||||
|
@ -350,7 +358,7 @@ int32_t syncReconfig(int64_t rid, const SSyncCfg *pNewCfg) {
|
||||||
(*pNode->notifyRole)(pNode->vgId, nodeRole);
|
(*pNode->notifyRole)(pNode->vgId, nodeRole);
|
||||||
}
|
}
|
||||||
|
|
||||||
pthread_mutex_unlock(&(pNode->mutex));
|
pthread_mutex_unlock(&pNode->mutex);
|
||||||
|
|
||||||
sInfo("vgId:%d, %d replicas are configured, quorum:%d", pNode->vgId, pNode->replica, pNode->quorum);
|
sInfo("vgId:%d, %d replicas are configured, quorum:%d", pNode->vgId, pNode->replica, pNode->quorum);
|
||||||
syncBroadcastStatus(pNode);
|
syncBroadcastStatus(pNode);
|
||||||
|
@ -423,7 +431,7 @@ void syncRecover(int64_t rid) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pthread_mutex_unlock(&(pNode->mutex));
|
pthread_mutex_unlock(&pNode->mutex);
|
||||||
|
|
||||||
taosReleaseRef(tsSyncRefId, rid);
|
taosReleaseRef(tsSyncRefId, rid);
|
||||||
}
|
}
|
||||||
|
@ -498,6 +506,8 @@ int32_t syncDecPeerRef(SSyncPeer *pPeer) {
|
||||||
}
|
}
|
||||||
|
|
||||||
static void syncClosePeerConn(SSyncPeer *pPeer) {
|
static void syncClosePeerConn(SSyncPeer *pPeer) {
|
||||||
|
sDebug("%s, pfd:%d sfd:%d will be closed", pPeer->id, pPeer->peerFd, pPeer->syncFd);
|
||||||
|
|
||||||
taosTmrStopA(&pPeer->timer);
|
taosTmrStopA(&pPeer->timer);
|
||||||
taosClose(pPeer->syncFd);
|
taosClose(pPeer->syncFd);
|
||||||
if (pPeer->peerFd >= 0) {
|
if (pPeer->peerFd >= 0) {
|
||||||
|
@ -751,7 +761,7 @@ static void syncCheckRole(SSyncPeer *pPeer, SPeerStatus* peersStatus, int8_t new
|
||||||
sDebug("vgId:%d, choose master", pNode->vgId);
|
sDebug("vgId:%d, choose master", pNode->vgId);
|
||||||
syncChooseMaster(pNode);
|
syncChooseMaster(pNode);
|
||||||
} else {
|
} else {
|
||||||
sDebug("vgId:%d, version inconsistent, cannot choose master", pNode->vgId);
|
sDebug("vgId:%d, cannot choose master since roles inconsistent", pNode->vgId);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -770,11 +780,12 @@ static void syncCheckRole(SSyncPeer *pPeer, SPeerStatus* peersStatus, int8_t new
|
||||||
}
|
}
|
||||||
|
|
||||||
static void syncRestartPeer(SSyncPeer *pPeer) {
|
static void syncRestartPeer(SSyncPeer *pPeer) {
|
||||||
sDebug("%s, restart peer connection", pPeer->id);
|
sDebug("%s, restart peer connection, last sstatus:%s", pPeer->id, syncStatus[pPeer->sstatus]);
|
||||||
|
|
||||||
syncClosePeerConn(pPeer);
|
syncClosePeerConn(pPeer);
|
||||||
|
|
||||||
pPeer->sstatus = TAOS_SYNC_STATUS_INIT;
|
pPeer->sstatus = TAOS_SYNC_STATUS_INIT;
|
||||||
|
sDebug("%s, peer conn is restart and set sstatus:%s", pPeer->id, syncStatus[pPeer->sstatus]);
|
||||||
|
|
||||||
int32_t ret = strcmp(pPeer->fqdn, tsNodeFqdn);
|
int32_t ret = strcmp(pPeer->fqdn, tsNodeFqdn);
|
||||||
if (ret > 0 || (ret == 0 && pPeer->port > tsSyncPort)) {
|
if (ret > 0 || (ret == 0 && pPeer->port > tsSyncPort)) {
|
||||||
|
@ -803,7 +814,7 @@ static void syncProcessSyncRequest(char *msg, SSyncPeer *pPeer) {
|
||||||
}
|
}
|
||||||
|
|
||||||
if (pPeer->sstatus != TAOS_SYNC_STATUS_INIT) {
|
if (pPeer->sstatus != TAOS_SYNC_STATUS_INIT) {
|
||||||
sDebug("%s, sync is already started", pPeer->id);
|
sDebug("%s, sync is already started for sstatus:%s", pPeer->id, syncStatus[pPeer->sstatus]);
|
||||||
return; // already started
|
return; // already started
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -821,7 +832,7 @@ static void syncProcessSyncRequest(char *msg, SSyncPeer *pPeer) {
|
||||||
syncDecPeerRef(pPeer);
|
syncDecPeerRef(pPeer);
|
||||||
} else {
|
} else {
|
||||||
pPeer->sstatus = TAOS_SYNC_STATUS_START;
|
pPeer->sstatus = TAOS_SYNC_STATUS_START;
|
||||||
sDebug("%s, thread is created to retrieve data", pPeer->id);
|
sDebug("%s, thread is created to retrieve data, set sstatus:%s", pPeer->id, syncStatus[pPeer->sstatus]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -831,9 +842,10 @@ static void syncNotStarted(void *param, void *tmrId) {
|
||||||
|
|
||||||
pthread_mutex_lock(&pNode->mutex);
|
pthread_mutex_lock(&pNode->mutex);
|
||||||
pPeer->timer = NULL;
|
pPeer->timer = NULL;
|
||||||
sInfo("%s, sync connection is still not up, restart", pPeer->id);
|
pPeer->sstatus = TAOS_SYNC_STATUS_INIT;
|
||||||
|
sInfo("%s, sync conn is still not up, restart and set sstatus:%s", pPeer->id, syncStatus[pPeer->sstatus]);
|
||||||
syncRestartConnection(pPeer);
|
syncRestartConnection(pPeer);
|
||||||
pthread_mutex_unlock(&(pNode->mutex));
|
pthread_mutex_unlock(&pNode->mutex);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void syncTryRecoverFromMaster(void *param, void *tmrId) {
|
static void syncTryRecoverFromMaster(void *param, void *tmrId) {
|
||||||
|
@ -842,14 +854,14 @@ static void syncTryRecoverFromMaster(void *param, void *tmrId) {
|
||||||
|
|
||||||
pthread_mutex_lock(&pNode->mutex);
|
pthread_mutex_lock(&pNode->mutex);
|
||||||
syncRecoverFromMaster(pPeer);
|
syncRecoverFromMaster(pPeer);
|
||||||
pthread_mutex_unlock(&(pNode->mutex));
|
pthread_mutex_unlock(&pNode->mutex);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void syncRecoverFromMaster(SSyncPeer *pPeer) {
|
static void syncRecoverFromMaster(SSyncPeer *pPeer) {
|
||||||
SSyncNode *pNode = pPeer->pSyncNode;
|
SSyncNode *pNode = pPeer->pSyncNode;
|
||||||
|
|
||||||
if (nodeSStatus != TAOS_SYNC_STATUS_INIT) {
|
if (nodeSStatus != TAOS_SYNC_STATUS_INIT) {
|
||||||
sDebug("%s, sync is already started, status:%d", pPeer->id, nodeSStatus);
|
sDebug("%s, sync is already started since sstatus:%s", pPeer->id, syncStatus[nodeSStatus]);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -877,7 +889,7 @@ static void syncRecoverFromMaster(SSyncPeer *pPeer) {
|
||||||
sError("%s, failed to send sync-req to peer", pPeer->id);
|
sError("%s, failed to send sync-req to peer", pPeer->id);
|
||||||
} else {
|
} else {
|
||||||
nodeSStatus = TAOS_SYNC_STATUS_START;
|
nodeSStatus = TAOS_SYNC_STATUS_START;
|
||||||
sInfo("%s, sync-req is sent", pPeer->id);
|
sInfo("%s, sync-req is sent to peer, set sstatus:%s", pPeer->id, syncStatus[nodeSStatus]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -915,7 +927,8 @@ static void syncProcessForwardFromPeer(char *cont, SSyncPeer *pPeer) {
|
||||||
if (nodeSStatus != TAOS_SYNC_STATUS_INIT) {
|
if (nodeSStatus != TAOS_SYNC_STATUS_INIT) {
|
||||||
syncSaveIntoBuffer(pPeer, pHead);
|
syncSaveIntoBuffer(pPeer, pHead);
|
||||||
} else {
|
} else {
|
||||||
sError("%s, forward discarded, hver:%" PRIu64, pPeer->id, pHead->version);
|
sError("%s, forward discarded since sstatus:%s, hver:%" PRIu64, pPeer->id, syncStatus[nodeSStatus],
|
||||||
|
pHead->version);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -924,8 +937,9 @@ static void syncProcessPeersStatusMsg(char *cont, SSyncPeer *pPeer) {
|
||||||
SSyncNode * pNode = pPeer->pSyncNode;
|
SSyncNode * pNode = pPeer->pSyncNode;
|
||||||
SPeersStatus *pPeersStatus = (SPeersStatus *)cont;
|
SPeersStatus *pPeersStatus = (SPeersStatus *)cont;
|
||||||
|
|
||||||
sDebug("%s, status msg is received, self:%s sver:%" PRIu64 " peer:%s sver:%" PRIu64 ", ack:%d tranId:%u type:%s", pPeer->id,
|
sDebug("%s, status is received, self:%s:%s:%" PRIu64 ", peer:%s:%" PRIu64 ", ack:%d tranId:%u type:%s pfd:%d",
|
||||||
syncRole[nodeRole], nodeVersion, syncRole[pPeersStatus->role], pPeersStatus->version, pPeersStatus->ack, pPeersStatus->tranId, statusType[pPeersStatus->type]);
|
pPeer->id, syncRole[nodeRole], syncStatus[nodeSStatus], nodeVersion, syncRole[pPeersStatus->role],
|
||||||
|
pPeersStatus->version, pPeersStatus->ack, pPeersStatus->tranId, statusType[pPeersStatus->type], pPeer->peerFd);
|
||||||
|
|
||||||
pPeer->version = pPeersStatus->version;
|
pPeer->version = pPeersStatus->version;
|
||||||
syncCheckRole(pPeer, pPeersStatus->peersStatus, pPeersStatus->role);
|
syncCheckRole(pPeer, pPeersStatus->peersStatus, pPeersStatus->role);
|
||||||
|
@ -982,7 +996,7 @@ static int32_t syncProcessPeerMsg(void *param, void *buffer) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pthread_mutex_unlock(&(pNode->mutex));
|
pthread_mutex_unlock(&pNode->mutex);
|
||||||
|
|
||||||
return code;
|
return code;
|
||||||
}
|
}
|
||||||
|
@ -1014,8 +1028,10 @@ static void syncSendPeersStatusMsgToPeer(SSyncPeer *pPeer, char ack, int8_t type
|
||||||
|
|
||||||
int32_t retLen = taosWriteMsg(pPeer->peerFd, msg, statusMsgLen);
|
int32_t retLen = taosWriteMsg(pPeer->peerFd, msg, statusMsgLen);
|
||||||
if (retLen == statusMsgLen) {
|
if (retLen == statusMsgLen) {
|
||||||
sDebug("%s, status msg is sent, self:%s sver:%" PRIu64 ", ack:%d tranId:%u type:%s", pPeer->id, syncRole[pPeersStatus->role],
|
sDebug("%s, status is sent, self:%s:%s:%" PRIu64 ", peer:%s:%s:%" PRIu64 ", ack:%d tranId:%u type:%s pfd:%d",
|
||||||
pPeersStatus->version, pPeersStatus->ack, pPeersStatus->tranId, statusType[pPeersStatus->type]);
|
pPeer->id, syncRole[nodeRole], syncStatus[nodeSStatus], nodeVersion, syncRole[pPeer->role],
|
||||||
|
syncStatus[pPeer->sstatus], pPeer->version, pPeersStatus->ack, pPeersStatus->tranId,
|
||||||
|
statusType[pPeersStatus->type], pPeer->peerFd);
|
||||||
} else {
|
} else {
|
||||||
sDebug("%s, failed to send status msg, restart", pPeer->id);
|
sDebug("%s, failed to send status msg, restart", pPeer->id);
|
||||||
syncRestartConnection(pPeer);
|
syncRestartConnection(pPeer);
|
||||||
|
@ -1048,7 +1064,7 @@ static void syncSetupPeerConnection(SSyncPeer *pPeer) {
|
||||||
firstPkt.sourceId = pNode->vgId; // tell arbitrator its vgId
|
firstPkt.sourceId = pNode->vgId; // tell arbitrator its vgId
|
||||||
|
|
||||||
if (taosWriteMsg(connFd, &firstPkt, sizeof(firstPkt)) == sizeof(firstPkt)) {
|
if (taosWriteMsg(connFd, &firstPkt, sizeof(firstPkt)) == sizeof(firstPkt)) {
|
||||||
sDebug("%s, connection to peer server is setup", pPeer->id);
|
sDebug("%s, connection to peer server is setup, pfd:%d sfd:%d", pPeer->id, connFd, pPeer->syncFd);
|
||||||
pPeer->peerFd = connFd;
|
pPeer->peerFd = connFd;
|
||||||
pPeer->role = TAOS_SYNC_ROLE_UNSYNCED;
|
pPeer->role = TAOS_SYNC_ROLE_UNSYNCED;
|
||||||
pPeer->pConn = taosAllocateTcpConn(tsTcpPool, pPeer, connFd);
|
pPeer->pConn = taosAllocateTcpConn(tsTcpPool, pPeer, connFd);
|
||||||
|
@ -1069,7 +1085,7 @@ static void syncCheckPeerConnection(void *param, void *tmrId) {
|
||||||
sDebug("%s, check peer connection", pPeer->id);
|
sDebug("%s, check peer connection", pPeer->id);
|
||||||
syncSetupPeerConnection(pPeer);
|
syncSetupPeerConnection(pPeer);
|
||||||
|
|
||||||
pthread_mutex_unlock(&(pNode->mutex));
|
pthread_mutex_unlock(&pNode->mutex);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void syncCreateRestoreDataThread(SSyncPeer *pPeer) {
|
static void syncCreateRestoreDataThread(SSyncPeer *pPeer) {
|
||||||
|
@ -1135,7 +1151,8 @@ static void syncProcessIncommingConnection(int32_t connFd, uint32_t sourceIp) {
|
||||||
pPeer->syncFd = connFd;
|
pPeer->syncFd = connFd;
|
||||||
syncCreateRestoreDataThread(pPeer);
|
syncCreateRestoreDataThread(pPeer);
|
||||||
} else {
|
} else {
|
||||||
sDebug("%s, TCP connection is already up, close one", pPeer->id);
|
sDebug("%s, TCP connection is already up(pfd:%d), close one, new pfd:%d sfd:%d", pPeer->id, pPeer->peerFd, connFd,
|
||||||
|
pPeer->syncFd);
|
||||||
syncClosePeerConn(pPeer);
|
syncClosePeerConn(pPeer);
|
||||||
pPeer->peerFd = connFd;
|
pPeer->peerFd = connFd;
|
||||||
pPeer->pConn = taosAllocateTcpConn(tsTcpPool, pPeer, connFd);
|
pPeer->pConn = taosAllocateTcpConn(tsTcpPool, pPeer, connFd);
|
||||||
|
@ -1145,7 +1162,7 @@ static void syncProcessIncommingConnection(int32_t connFd, uint32_t sourceIp) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pthread_mutex_unlock(&(pNode->mutex));
|
pthread_mutex_unlock(&pNode->mutex);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void syncProcessBrokenLink(void *param) {
|
static void syncProcessBrokenLink(void *param) {
|
||||||
|
@ -1156,14 +1173,14 @@ static void syncProcessBrokenLink(void *param) {
|
||||||
if (taosAcquireRef(tsSyncRefId, pNode->rid) == NULL) return;
|
if (taosAcquireRef(tsSyncRefId, pNode->rid) == NULL) return;
|
||||||
pthread_mutex_lock(&pNode->mutex);
|
pthread_mutex_lock(&pNode->mutex);
|
||||||
|
|
||||||
sDebug("%s, TCP link is broken since %s", pPeer->id, strerror(errno));
|
sDebug("%s, TCP link is broken since %s, pfd:%d sfd:%d", pPeer->id, strerror(errno), pPeer->peerFd, pPeer->syncFd);
|
||||||
pPeer->peerFd = -1;
|
pPeer->peerFd = -1;
|
||||||
|
|
||||||
if (syncDecPeerRef(pPeer) != 0) {
|
if (syncDecPeerRef(pPeer) != 0) {
|
||||||
syncRestartConnection(pPeer);
|
syncRestartConnection(pPeer);
|
||||||
}
|
}
|
||||||
|
|
||||||
pthread_mutex_unlock(&(pNode->mutex));
|
pthread_mutex_unlock(&pNode->mutex);
|
||||||
taosReleaseRef(tsSyncRefId, pNode->rid);
|
taosReleaseRef(tsSyncRefId, pNode->rid);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1239,10 +1256,13 @@ static void syncMonitorNodeRole(void *param, void *tmrId) {
|
||||||
if (index == pNode->selfIndex) continue;
|
if (index == pNode->selfIndex) continue;
|
||||||
|
|
||||||
SSyncPeer *pPeer = pNode->peerInfo[index];
|
SSyncPeer *pPeer = pNode->peerInfo[index];
|
||||||
if (pPeer->role > TAOS_SYNC_ROLE_UNSYNCED && nodeRole > TAOS_SYNC_ROLE_UNSYNCED) continue;
|
if (/*pPeer->role > TAOS_SYNC_ROLE_UNSYNCED && */ nodeRole > TAOS_SYNC_ROLE_UNSYNCED) continue;
|
||||||
if (pPeer->sstatus > TAOS_SYNC_STATUS_INIT || nodeSStatus > TAOS_SYNC_STATUS_INIT) continue;
|
if (/*pPeer->sstatus > TAOS_SYNC_STATUS_INIT || */ nodeSStatus > TAOS_SYNC_STATUS_INIT) continue;
|
||||||
|
|
||||||
|
sDebug("%s, check roles since self:%s sstatus:%s, peer:%s sstatus:%s", pPeer->id, syncRole[pPeer->role],
|
||||||
|
syncStatus[pPeer->sstatus], syncRole[nodeRole], syncStatus[nodeSStatus]);
|
||||||
syncSendPeersStatusMsgToPeer(pPeer, 1, SYNC_STATUS_CHECK_ROLE, syncGenTranId());
|
syncSendPeersStatusMsgToPeer(pPeer, 1, SYNC_STATUS_CHECK_ROLE, syncGenTranId());
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
pNode->pRoleTimer = taosTmrStart(syncMonitorNodeRole, SYNC_ROLE_TIMER, (void *)pNode->rid, tsSyncTmrCtrl);
|
pNode->pRoleTimer = taosTmrStart(syncMonitorNodeRole, SYNC_ROLE_TIMER, (void *)pNode->rid, tsSyncTmrCtrl);
|
||||||
|
@ -1271,7 +1291,7 @@ static void syncMonitorFwdInfos(void *param, void *tmrId) {
|
||||||
}
|
}
|
||||||
|
|
||||||
syncRemoveConfirmedFwdInfo(pNode);
|
syncRemoveConfirmedFwdInfo(pNode);
|
||||||
pthread_mutex_unlock(&(pNode->mutex));
|
pthread_mutex_unlock(&pNode->mutex);
|
||||||
}
|
}
|
||||||
|
|
||||||
pNode->pFwdTimer = taosTmrStart(syncMonitorFwdInfos, SYNC_FWD_TIMER, (void *)pNode->rid, tsSyncTmrCtrl);
|
pNode->pFwdTimer = taosTmrStart(syncMonitorFwdInfos, SYNC_FWD_TIMER, (void *)pNode->rid, tsSyncTmrCtrl);
|
||||||
|
@ -1339,7 +1359,7 @@ static int32_t syncForwardToPeerImpl(SSyncNode *pNode, void *data, void *mhandle
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pthread_mutex_unlock(&(pNode->mutex));
|
pthread_mutex_unlock(&pNode->mutex);
|
||||||
|
|
||||||
return code;
|
return code;
|
||||||
}
|
}
|
||||||
|
|
|
@ -65,7 +65,10 @@ static int32_t syncRestoreFile(SSyncPeer *pPeer, uint64_t *fversion) {
|
||||||
while (1) {
|
while (1) {
|
||||||
// read file info
|
// read file info
|
||||||
int32_t ret = taosReadMsg(pPeer->syncFd, &(minfo), sizeof(minfo));
|
int32_t ret = taosReadMsg(pPeer->syncFd, &(minfo), sizeof(minfo));
|
||||||
if (ret < 0) break;
|
if (ret < 0) {
|
||||||
|
sError("%s, failed to read file info while restore file since %s", pPeer->id, strerror(errno));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
// if no more file from master, break;
|
// if no more file from master, break;
|
||||||
if (minfo.name[0] == 0 || minfo.magic == 0) {
|
if (minfo.name[0] == 0 || minfo.magic == 0) {
|
||||||
|
@ -83,7 +86,7 @@ static int32_t syncRestoreFile(SSyncPeer *pPeer, uint64_t *fversion) {
|
||||||
|
|
||||||
// check the file info
|
// check the file info
|
||||||
sinfo = minfo;
|
sinfo = minfo;
|
||||||
sDebug("%s, get file info:%s", pPeer->id, minfo.name);
|
sDebug("%s, get file:%s info size:%" PRId64, pPeer->id, minfo.name, minfo.size);
|
||||||
sinfo.magic = (*pNode->getFileInfo)(pNode->vgId, sinfo.name, &sinfo.index, TAOS_SYNC_MAX_INDEX, &sinfo.size,
|
sinfo.magic = (*pNode->getFileInfo)(pNode->vgId, sinfo.name, &sinfo.index, TAOS_SYNC_MAX_INDEX, &sinfo.size,
|
||||||
&sinfo.fversion);
|
&sinfo.fversion);
|
||||||
|
|
||||||
|
@ -92,8 +95,11 @@ static int32_t syncRestoreFile(SSyncPeer *pPeer, uint64_t *fversion) {
|
||||||
fileAck.sync = (sinfo.magic != minfo.magic || sinfo.name[0] == 0) ? 1 : 0;
|
fileAck.sync = (sinfo.magic != minfo.magic || sinfo.name[0] == 0) ? 1 : 0;
|
||||||
|
|
||||||
// send file ack
|
// send file ack
|
||||||
ret = taosWriteMsg(pPeer->syncFd, &(fileAck), sizeof(fileAck));
|
ret = taosWriteMsg(pPeer->syncFd, &fileAck, sizeof(fileAck));
|
||||||
if (ret < 0) break;
|
if (ret < 0) {
|
||||||
|
sError("%s, failed to write file:%s ack while restore file since %s", pPeer->id, minfo.name, strerror(errno));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
// if sync is not required, continue
|
// if sync is not required, continue
|
||||||
if (fileAck.sync == 0) {
|
if (fileAck.sync == 0) {
|
||||||
|
@ -108,14 +114,17 @@ static int32_t syncRestoreFile(SSyncPeer *pPeer, uint64_t *fversion) {
|
||||||
|
|
||||||
int32_t dfd = open(name, O_WRONLY | O_CREAT | O_TRUNC, S_IRWXU | S_IRWXG | S_IRWXO);
|
int32_t dfd = open(name, O_WRONLY | O_CREAT | O_TRUNC, S_IRWXU | S_IRWXG | S_IRWXO);
|
||||||
if (dfd < 0) {
|
if (dfd < 0) {
|
||||||
sError("%s, failed to open file:%s", pPeer->id, name);
|
sError("%s, failed to open file:%s while restore file since %s", pPeer->id, minfo.name, strerror(errno));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
ret = taosCopyFds(pPeer->syncFd, dfd, minfo.size);
|
ret = taosCopyFds(pPeer->syncFd, dfd, minfo.size);
|
||||||
fsync(dfd);
|
fsync(dfd);
|
||||||
close(dfd);
|
close(dfd);
|
||||||
if (ret < 0) break;
|
if (ret < 0) {
|
||||||
|
sError("%s, failed to copy file:%s while restore file since %s", pPeer->id, minfo.name, strerror(errno));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
fileChanged = true;
|
fileChanged = true;
|
||||||
sDebug("%s, %s is received, size:%" PRId64, pPeer->id, minfo.name, minfo.size);
|
sDebug("%s, %s is received, size:%" PRId64, pPeer->id, minfo.name, minfo.size);
|
||||||
|
@ -125,6 +134,7 @@ static int32_t syncRestoreFile(SSyncPeer *pPeer, uint64_t *fversion) {
|
||||||
// data file is changed, code shall be set to 1
|
// data file is changed, code shall be set to 1
|
||||||
*fversion = minfo.fversion;
|
*fversion = minfo.fversion;
|
||||||
code = 1;
|
code = 1;
|
||||||
|
sDebug("%s, file changed while restore file", pPeer->id);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (code < 0) {
|
if (code < 0) {
|
||||||
|
@ -146,15 +156,22 @@ static int32_t syncRestoreWal(SSyncPeer *pPeer) {
|
||||||
|
|
||||||
while (1) {
|
while (1) {
|
||||||
ret = taosReadMsg(pPeer->syncFd, pHead, sizeof(SWalHead));
|
ret = taosReadMsg(pPeer->syncFd, pHead, sizeof(SWalHead));
|
||||||
if (ret < 0) break;
|
if (ret < 0) {
|
||||||
|
sError("%s, failed to read walhead while restore wal since %s", pPeer->id, strerror(errno));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
if (pHead->len == 0) {
|
if (pHead->len == 0) {
|
||||||
|
sDebug("%s, wal is synced over", pPeer->id);
|
||||||
code = 0;
|
code = 0;
|
||||||
break;
|
break;
|
||||||
} // wal sync over
|
} // wal sync over
|
||||||
|
|
||||||
ret = taosReadMsg(pPeer->syncFd, pHead->cont, pHead->len);
|
ret = taosReadMsg(pPeer->syncFd, pHead->cont, pHead->len);
|
||||||
if (ret < 0) break;
|
if (ret < 0) {
|
||||||
|
sError("%s, failed to read walcont, len:%d while restore wal since %s", pPeer->id, pHead->len, strerror(errno));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
sTrace("%s, restore a record, qtype:wal len:%d hver:%" PRIu64, pPeer->id, pHead->len, pHead->version);
|
sTrace("%s, restore a record, qtype:wal len:%d hver:%" PRIu64, pPeer->id, pHead->len, pHead->version);
|
||||||
|
|
||||||
|
@ -267,7 +284,7 @@ static int32_t syncRestoreDataStepByStep(SSyncPeer *pPeer) {
|
||||||
nodeSStatus = TAOS_SYNC_STATUS_FILE;
|
nodeSStatus = TAOS_SYNC_STATUS_FILE;
|
||||||
uint64_t fversion = 0;
|
uint64_t fversion = 0;
|
||||||
|
|
||||||
sDebug("%s, start to restore file", pPeer->id);
|
sDebug("%s, start to restore file, set sstatus:%s", pPeer->id, syncStatus[nodeSStatus]);
|
||||||
int32_t code = syncRestoreFile(pPeer, &fversion);
|
int32_t code = syncRestoreFile(pPeer, &fversion);
|
||||||
if (code < 0) {
|
if (code < 0) {
|
||||||
sError("%s, failed to restore file", pPeer->id);
|
sError("%s, failed to restore file", pPeer->id);
|
||||||
|
@ -291,7 +308,7 @@ static int32_t syncRestoreDataStepByStep(SSyncPeer *pPeer) {
|
||||||
}
|
}
|
||||||
|
|
||||||
nodeSStatus = TAOS_SYNC_STATUS_CACHE;
|
nodeSStatus = TAOS_SYNC_STATUS_CACHE;
|
||||||
sDebug("%s, start to insert buffered points", pPeer->id);
|
sDebug("%s, start to insert buffered points, set sstatus:%s", pPeer->id, syncStatus[nodeSStatus]);
|
||||||
if (syncProcessBufferedFwd(pPeer) < 0) {
|
if (syncProcessBufferedFwd(pPeer) < 0) {
|
||||||
sError("%s, failed to insert buffered points", pPeer->id);
|
sError("%s, failed to insert buffered points", pPeer->id);
|
||||||
return -1;
|
return -1;
|
||||||
|
@ -327,6 +344,8 @@ void *syncRestoreData(void *param) {
|
||||||
(*pNode->notifyRole)(pNode->vgId, nodeRole);
|
(*pNode->notifyRole)(pNode->vgId, nodeRole);
|
||||||
|
|
||||||
nodeSStatus = TAOS_SYNC_STATUS_INIT;
|
nodeSStatus = TAOS_SYNC_STATUS_INIT;
|
||||||
|
sInfo("%s, sync over, set sstatus:%s", pPeer->id, syncStatus[nodeSStatus]);
|
||||||
|
|
||||||
taosClose(pPeer->syncFd);
|
taosClose(pPeer->syncFd);
|
||||||
syncCloseRecvBuffer(pNode);
|
syncCloseRecvBuffer(pNode);
|
||||||
__sync_fetch_and_sub(&tsSyncNum, 1);
|
__sync_fetch_and_sub(&tsSyncNum, 1);
|
||||||
|
|
|
@ -114,7 +114,10 @@ static int32_t syncRetrieveFile(SSyncPeer *pPeer) {
|
||||||
|
|
||||||
// send the file info
|
// send the file info
|
||||||
int32_t ret = taosWriteMsg(pPeer->syncFd, &(fileInfo), sizeof(fileInfo));
|
int32_t ret = taosWriteMsg(pPeer->syncFd, &(fileInfo), sizeof(fileInfo));
|
||||||
if (ret < 0) break;
|
if (ret < 0) {
|
||||||
|
sError("%s, failed to write file:%s info while retrieve file since %s", pPeer->id, fileInfo.name, strerror(errno));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
// if no file anymore, break
|
// if no file anymore, break
|
||||||
if (fileInfo.magic == 0 || fileInfo.name[0] == 0) {
|
if (fileInfo.magic == 0 || fileInfo.name[0] == 0) {
|
||||||
|
@ -124,8 +127,11 @@ static int32_t syncRetrieveFile(SSyncPeer *pPeer) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// wait for the ack from peer
|
// wait for the ack from peer
|
||||||
ret = taosReadMsg(pPeer->syncFd, &(fileAck), sizeof(fileAck));
|
ret = taosReadMsg(pPeer->syncFd, &fileAck, sizeof(fileAck));
|
||||||
if (ret < 0) break;
|
if (ret < 0) {
|
||||||
|
sError("%s, failed to read file:%s ack while retrieve file since %s", pPeer->id, fileInfo.name, strerror(errno));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
// set the peer sync version
|
// set the peer sync version
|
||||||
pPeer->sversion = fileInfo.fversion;
|
pPeer->sversion = fileInfo.fversion;
|
||||||
|
@ -134,7 +140,10 @@ static int32_t syncRetrieveFile(SSyncPeer *pPeer) {
|
||||||
snprintf(name, sizeof(name), "%s/%s", pNode->path, fileInfo.name);
|
snprintf(name, sizeof(name), "%s/%s", pNode->path, fileInfo.name);
|
||||||
|
|
||||||
// add the file into watch list
|
// add the file into watch list
|
||||||
if (syncAddIntoWatchList(pPeer, name) < 0) break;
|
if (syncAddIntoWatchList(pPeer, name) < 0) {
|
||||||
|
sError("%s, failed to watch file:%s while retrieve file since %s", pPeer->id, fileInfo.name, strerror(errno));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
// if sync is not required, continue
|
// if sync is not required, continue
|
||||||
if (fileAck.sync == 0) {
|
if (fileAck.sync == 0) {
|
||||||
|
@ -145,21 +154,30 @@ static int32_t syncRetrieveFile(SSyncPeer *pPeer) {
|
||||||
|
|
||||||
// send the file to peer
|
// send the file to peer
|
||||||
int32_t sfd = open(name, O_RDONLY);
|
int32_t sfd = open(name, O_RDONLY);
|
||||||
if (sfd < 0) break;
|
if (sfd < 0) {
|
||||||
|
sError("%s, failed to open file:%s while retrieve file since %s", pPeer->id, fileInfo.name, strerror(errno));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
ret = taosSendFile(pPeer->syncFd, sfd, NULL, fileInfo.size);
|
ret = taosSendFile(pPeer->syncFd, sfd, NULL, fileInfo.size);
|
||||||
close(sfd);
|
close(sfd);
|
||||||
if (ret < 0) break;
|
if (ret < 0) {
|
||||||
|
sError("%s, failed to send file:%s while retrieve file since %s", pPeer->id, fileInfo.name, strerror(errno));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
sDebug("%s, %s is sent, size:%" PRId64, pPeer->id, name, fileInfo.size);
|
sDebug("%s, %s is sent, size:%" PRId64, pPeer->id, name, fileInfo.size);
|
||||||
fileInfo.index++;
|
fileInfo.index++;
|
||||||
|
|
||||||
// check if processed files are modified
|
// check if processed files are modified
|
||||||
if (syncAreFilesModified(pPeer) != 0) break;
|
if (syncAreFilesModified(pPeer) != 0) {
|
||||||
|
sInfo("%s, file:%s are modified while retrieve file since %s", pPeer->id, fileInfo.name, strerror(errno));
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (code < 0) {
|
if (code < 0) {
|
||||||
sError("%s, failed to retrieve file since %s", pPeer->id, strerror(errno));
|
sError("%s, failed to retrieve file", pPeer->id);
|
||||||
}
|
}
|
||||||
|
|
||||||
return code;
|
return code;
|
||||||
|
@ -318,6 +336,7 @@ static int32_t syncProcessLastWal(SSyncPeer *pPeer, char *wname, int64_t index)
|
||||||
if (((event & IN_MODIFY) == 0) || once) {
|
if (((event & IN_MODIFY) == 0) || once) {
|
||||||
if (fversion == 0) {
|
if (fversion == 0) {
|
||||||
pPeer->sstatus = TAOS_SYNC_STATUS_CACHE; // start to forward pkt
|
pPeer->sstatus = TAOS_SYNC_STATUS_CACHE; // start to forward pkt
|
||||||
|
sDebug("%s, fversion is 0 then set sstatus:%s", pPeer->id, syncStatus[pPeer->sstatus]);
|
||||||
fversion = nodeVersion; // must read data to fversion
|
fversion = nodeVersion; // must read data to fversion
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -416,8 +435,9 @@ static int32_t syncRetrieveWal(SSyncPeer *pPeer) {
|
||||||
}
|
}
|
||||||
|
|
||||||
if (code == 0) {
|
if (code == 0) {
|
||||||
sInfo("%s, wal retrieve is finished", pPeer->id);
|
|
||||||
pPeer->sstatus = TAOS_SYNC_STATUS_CACHE;
|
pPeer->sstatus = TAOS_SYNC_STATUS_CACHE;
|
||||||
|
sInfo("%s, wal retrieve is finished, set sstatus:%s", pPeer->id, syncStatus[pPeer->sstatus]);
|
||||||
|
|
||||||
SWalHead walHead;
|
SWalHead walHead;
|
||||||
memset(&walHead, 0, sizeof(walHead));
|
memset(&walHead, 0, sizeof(walHead));
|
||||||
code = taosWriteMsg(pPeer->syncFd, &walHead, sizeof(walHead));
|
code = taosWriteMsg(pPeer->syncFd, &walHead, sizeof(walHead));
|
||||||
|
@ -445,7 +465,7 @@ static int32_t syncRetrieveDataStepByStep(SSyncPeer *pPeer) {
|
||||||
|
|
||||||
pPeer->sversion = 0;
|
pPeer->sversion = 0;
|
||||||
pPeer->sstatus = TAOS_SYNC_STATUS_FILE;
|
pPeer->sstatus = TAOS_SYNC_STATUS_FILE;
|
||||||
sInfo("%s, start to retrieve file", pPeer->id);
|
sInfo("%s, start to retrieve file, set sstatus:%s", pPeer->id, syncStatus[pPeer->sstatus]);
|
||||||
if (syncRetrieveFile(pPeer) < 0) {
|
if (syncRetrieveFile(pPeer) < 0) {
|
||||||
sError("%s, failed to retrieve file", pPeer->id);
|
sError("%s, failed to retrieve file", pPeer->id);
|
||||||
return -1;
|
return -1;
|
||||||
|
|
|
@ -107,7 +107,7 @@ int32_t taosWriteMsg(SOCKET fd, void *buf, int32_t nbytes) {
|
||||||
while (nleft > 0) {
|
while (nleft > 0) {
|
||||||
nwritten = (int32_t)taosWriteSocket(fd, (char *)ptr, (size_t)nleft);
|
nwritten = (int32_t)taosWriteSocket(fd, (char *)ptr, (size_t)nleft);
|
||||||
if (nwritten <= 0) {
|
if (nwritten <= 0) {
|
||||||
if (errno == EINTR || errno == EAGAIN || errno == EWOULDBLOCK)
|
if (errno == EINTR /* || errno == EAGAIN || errno == EWOULDBLOCK */)
|
||||||
continue;
|
continue;
|
||||||
else
|
else
|
||||||
return -1;
|
return -1;
|
||||||
|
@ -133,7 +133,7 @@ int32_t taosReadMsg(SOCKET fd, void *buf, int32_t nbytes) {
|
||||||
if (nread == 0) {
|
if (nread == 0) {
|
||||||
break;
|
break;
|
||||||
} else if (nread < 0) {
|
} else if (nread < 0) {
|
||||||
if (errno == EINTR || errno == EAGAIN || errno == EWOULDBLOCK) {
|
if (errno == EINTR/* || errno == EAGAIN || errno == EWOULDBLOCK*/) {
|
||||||
continue;
|
continue;
|
||||||
} else {
|
} else {
|
||||||
return -1;
|
return -1;
|
||||||
|
|
Loading…
Reference in New Issue