Merge pull request #4321 from taosdata/feature/wal

TD-2157
This commit is contained in:
Shengliang Guan 2020-11-23 21:57:49 +08:00 committed by GitHub
commit 11a5fe2719
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 110 additions and 50 deletions

View File

@ -172,6 +172,7 @@ typedef struct SSyncNode {
// sync module global // sync module global
extern int32_t tsSyncNum; extern int32_t tsSyncNum;
extern char tsNodeFqdn[TSDB_FQDN_LEN]; extern char tsNodeFqdn[TSDB_FQDN_LEN];
extern char * syncStatus[];
void *syncRetrieveData(void *param); void *syncRetrieveData(void *param);
void *syncRestoreData(void *param); void *syncRestoreData(void *param);

View File

@ -73,6 +73,14 @@ char* syncRole[] = {
"master" "master"
}; };
char *syncStatus[] = {
"init",
"start",
"file",
"cache",
"invalid"
};
typedef enum { typedef enum {
SYNC_STATUS_BROADCAST, SYNC_STATUS_BROADCAST,
SYNC_STATUS_BROADCAST_RSP, SYNC_STATUS_BROADCAST_RSP,
@ -282,7 +290,7 @@ void syncStop(int64_t rid) {
pPeer = pNode->peerInfo[TAOS_SYNC_MAX_REPLICA]; pPeer = pNode->peerInfo[TAOS_SYNC_MAX_REPLICA];
if (pPeer) syncRemovePeer(pPeer); if (pPeer) syncRemovePeer(pPeer);
pthread_mutex_unlock(&(pNode->mutex)); pthread_mutex_unlock(&pNode->mutex);
taosReleaseRef(tsSyncRefId, rid); taosReleaseRef(tsSyncRefId, rid);
taosRemoveRef(tsSyncRefId, rid); taosRemoveRef(tsSyncRefId, rid);
@ -350,7 +358,7 @@ int32_t syncReconfig(int64_t rid, const SSyncCfg *pNewCfg) {
(*pNode->notifyRole)(pNode->vgId, nodeRole); (*pNode->notifyRole)(pNode->vgId, nodeRole);
} }
pthread_mutex_unlock(&(pNode->mutex)); pthread_mutex_unlock(&pNode->mutex);
sInfo("vgId:%d, %d replicas are configured, quorum:%d", pNode->vgId, pNode->replica, pNode->quorum); sInfo("vgId:%d, %d replicas are configured, quorum:%d", pNode->vgId, pNode->replica, pNode->quorum);
syncBroadcastStatus(pNode); syncBroadcastStatus(pNode);
@ -423,7 +431,7 @@ void syncRecover(int64_t rid) {
} }
} }
pthread_mutex_unlock(&(pNode->mutex)); pthread_mutex_unlock(&pNode->mutex);
taosReleaseRef(tsSyncRefId, rid); taosReleaseRef(tsSyncRefId, rid);
} }
@ -498,6 +506,8 @@ int32_t syncDecPeerRef(SSyncPeer *pPeer) {
} }
static void syncClosePeerConn(SSyncPeer *pPeer) { static void syncClosePeerConn(SSyncPeer *pPeer) {
sDebug("%s, pfd:%d sfd:%d will be closed", pPeer->id, pPeer->peerFd, pPeer->syncFd);
taosTmrStopA(&pPeer->timer); taosTmrStopA(&pPeer->timer);
taosClose(pPeer->syncFd); taosClose(pPeer->syncFd);
if (pPeer->peerFd >= 0) { if (pPeer->peerFd >= 0) {
@ -751,7 +761,7 @@ static void syncCheckRole(SSyncPeer *pPeer, SPeerStatus* peersStatus, int8_t new
sDebug("vgId:%d, choose master", pNode->vgId); sDebug("vgId:%d, choose master", pNode->vgId);
syncChooseMaster(pNode); syncChooseMaster(pNode);
} else { } else {
sDebug("vgId:%d, version inconsistent, cannot choose master", pNode->vgId); sDebug("vgId:%d, cannot choose master since roles inconsistent", pNode->vgId);
} }
} }
@ -770,11 +780,12 @@ static void syncCheckRole(SSyncPeer *pPeer, SPeerStatus* peersStatus, int8_t new
} }
static void syncRestartPeer(SSyncPeer *pPeer) { static void syncRestartPeer(SSyncPeer *pPeer) {
sDebug("%s, restart peer connection", pPeer->id); sDebug("%s, restart peer connection, last sstatus:%s", pPeer->id, syncStatus[pPeer->sstatus]);
syncClosePeerConn(pPeer); syncClosePeerConn(pPeer);
pPeer->sstatus = TAOS_SYNC_STATUS_INIT; pPeer->sstatus = TAOS_SYNC_STATUS_INIT;
sDebug("%s, peer conn is restart and set sstatus:%s", pPeer->id, syncStatus[pPeer->sstatus]);
int32_t ret = strcmp(pPeer->fqdn, tsNodeFqdn); int32_t ret = strcmp(pPeer->fqdn, tsNodeFqdn);
if (ret > 0 || (ret == 0 && pPeer->port > tsSyncPort)) { if (ret > 0 || (ret == 0 && pPeer->port > tsSyncPort)) {
@ -803,7 +814,7 @@ static void syncProcessSyncRequest(char *msg, SSyncPeer *pPeer) {
} }
if (pPeer->sstatus != TAOS_SYNC_STATUS_INIT) { if (pPeer->sstatus != TAOS_SYNC_STATUS_INIT) {
sDebug("%s, sync is already started", pPeer->id); sDebug("%s, sync is already started for sstatus:%s", pPeer->id, syncStatus[pPeer->sstatus]);
return; // already started return; // already started
} }
@ -821,7 +832,7 @@ static void syncProcessSyncRequest(char *msg, SSyncPeer *pPeer) {
syncDecPeerRef(pPeer); syncDecPeerRef(pPeer);
} else { } else {
pPeer->sstatus = TAOS_SYNC_STATUS_START; pPeer->sstatus = TAOS_SYNC_STATUS_START;
sDebug("%s, thread is created to retrieve data", pPeer->id); sDebug("%s, thread is created to retrieve data, set sstatus:%s", pPeer->id, syncStatus[pPeer->sstatus]);
} }
} }
@ -831,9 +842,10 @@ static void syncNotStarted(void *param, void *tmrId) {
pthread_mutex_lock(&pNode->mutex); pthread_mutex_lock(&pNode->mutex);
pPeer->timer = NULL; pPeer->timer = NULL;
sInfo("%s, sync connection is still not up, restart", pPeer->id); pPeer->sstatus = TAOS_SYNC_STATUS_INIT;
sInfo("%s, sync conn is still not up, restart and set sstatus:%s", pPeer->id, syncStatus[pPeer->sstatus]);
syncRestartConnection(pPeer); syncRestartConnection(pPeer);
pthread_mutex_unlock(&(pNode->mutex)); pthread_mutex_unlock(&pNode->mutex);
} }
static void syncTryRecoverFromMaster(void *param, void *tmrId) { static void syncTryRecoverFromMaster(void *param, void *tmrId) {
@ -842,14 +854,14 @@ static void syncTryRecoverFromMaster(void *param, void *tmrId) {
pthread_mutex_lock(&pNode->mutex); pthread_mutex_lock(&pNode->mutex);
syncRecoverFromMaster(pPeer); syncRecoverFromMaster(pPeer);
pthread_mutex_unlock(&(pNode->mutex)); pthread_mutex_unlock(&pNode->mutex);
} }
static void syncRecoverFromMaster(SSyncPeer *pPeer) { static void syncRecoverFromMaster(SSyncPeer *pPeer) {
SSyncNode *pNode = pPeer->pSyncNode; SSyncNode *pNode = pPeer->pSyncNode;
if (nodeSStatus != TAOS_SYNC_STATUS_INIT) { if (nodeSStatus != TAOS_SYNC_STATUS_INIT) {
sDebug("%s, sync is already started, status:%d", pPeer->id, nodeSStatus); sDebug("%s, sync is already started since sstatus:%s", pPeer->id, syncStatus[nodeSStatus]);
return; return;
} }
@ -877,7 +889,7 @@ static void syncRecoverFromMaster(SSyncPeer *pPeer) {
sError("%s, failed to send sync-req to peer", pPeer->id); sError("%s, failed to send sync-req to peer", pPeer->id);
} else { } else {
nodeSStatus = TAOS_SYNC_STATUS_START; nodeSStatus = TAOS_SYNC_STATUS_START;
sInfo("%s, sync-req is sent", pPeer->id); sInfo("%s, sync-req is sent to peer, set sstatus:%s", pPeer->id, syncStatus[nodeSStatus]);
} }
} }
@ -915,7 +927,8 @@ static void syncProcessForwardFromPeer(char *cont, SSyncPeer *pPeer) {
if (nodeSStatus != TAOS_SYNC_STATUS_INIT) { if (nodeSStatus != TAOS_SYNC_STATUS_INIT) {
syncSaveIntoBuffer(pPeer, pHead); syncSaveIntoBuffer(pPeer, pHead);
} else { } else {
sError("%s, forward discarded, hver:%" PRIu64, pPeer->id, pHead->version); sError("%s, forward discarded since sstatus:%s, hver:%" PRIu64, pPeer->id, syncStatus[nodeSStatus],
pHead->version);
} }
} }
} }
@ -924,8 +937,9 @@ static void syncProcessPeersStatusMsg(char *cont, SSyncPeer *pPeer) {
SSyncNode * pNode = pPeer->pSyncNode; SSyncNode * pNode = pPeer->pSyncNode;
SPeersStatus *pPeersStatus = (SPeersStatus *)cont; SPeersStatus *pPeersStatus = (SPeersStatus *)cont;
sDebug("%s, status msg is received, self:%s sver:%" PRIu64 " peer:%s sver:%" PRIu64 ", ack:%d tranId:%u type:%s", pPeer->id, sDebug("%s, status is received, self:%s:%s:%" PRIu64 ", peer:%s:%" PRIu64 ", ack:%d tranId:%u type:%s pfd:%d",
syncRole[nodeRole], nodeVersion, syncRole[pPeersStatus->role], pPeersStatus->version, pPeersStatus->ack, pPeersStatus->tranId, statusType[pPeersStatus->type]); pPeer->id, syncRole[nodeRole], syncStatus[nodeSStatus], nodeVersion, syncRole[pPeersStatus->role],
pPeersStatus->version, pPeersStatus->ack, pPeersStatus->tranId, statusType[pPeersStatus->type], pPeer->peerFd);
pPeer->version = pPeersStatus->version; pPeer->version = pPeersStatus->version;
syncCheckRole(pPeer, pPeersStatus->peersStatus, pPeersStatus->role); syncCheckRole(pPeer, pPeersStatus->peersStatus, pPeersStatus->role);
@ -982,7 +996,7 @@ static int32_t syncProcessPeerMsg(void *param, void *buffer) {
} }
} }
pthread_mutex_unlock(&(pNode->mutex)); pthread_mutex_unlock(&pNode->mutex);
return code; return code;
} }
@ -1014,8 +1028,10 @@ static void syncSendPeersStatusMsgToPeer(SSyncPeer *pPeer, char ack, int8_t type
int32_t retLen = taosWriteMsg(pPeer->peerFd, msg, statusMsgLen); int32_t retLen = taosWriteMsg(pPeer->peerFd, msg, statusMsgLen);
if (retLen == statusMsgLen) { if (retLen == statusMsgLen) {
sDebug("%s, status msg is sent, self:%s sver:%" PRIu64 ", ack:%d tranId:%u type:%s", pPeer->id, syncRole[pPeersStatus->role], sDebug("%s, status is sent, self:%s:%s:%" PRIu64 ", peer:%s:%s:%" PRIu64 ", ack:%d tranId:%u type:%s pfd:%d",
pPeersStatus->version, pPeersStatus->ack, pPeersStatus->tranId, statusType[pPeersStatus->type]); pPeer->id, syncRole[nodeRole], syncStatus[nodeSStatus], nodeVersion, syncRole[pPeer->role],
syncStatus[pPeer->sstatus], pPeer->version, pPeersStatus->ack, pPeersStatus->tranId,
statusType[pPeersStatus->type], pPeer->peerFd);
} else { } else {
sDebug("%s, failed to send status msg, restart", pPeer->id); sDebug("%s, failed to send status msg, restart", pPeer->id);
syncRestartConnection(pPeer); syncRestartConnection(pPeer);
@ -1048,7 +1064,7 @@ static void syncSetupPeerConnection(SSyncPeer *pPeer) {
firstPkt.sourceId = pNode->vgId; // tell arbitrator its vgId firstPkt.sourceId = pNode->vgId; // tell arbitrator its vgId
if (taosWriteMsg(connFd, &firstPkt, sizeof(firstPkt)) == sizeof(firstPkt)) { if (taosWriteMsg(connFd, &firstPkt, sizeof(firstPkt)) == sizeof(firstPkt)) {
sDebug("%s, connection to peer server is setup", pPeer->id); sDebug("%s, connection to peer server is setup, pfd:%d sfd:%d", pPeer->id, connFd, pPeer->syncFd);
pPeer->peerFd = connFd; pPeer->peerFd = connFd;
pPeer->role = TAOS_SYNC_ROLE_UNSYNCED; pPeer->role = TAOS_SYNC_ROLE_UNSYNCED;
pPeer->pConn = taosAllocateTcpConn(tsTcpPool, pPeer, connFd); pPeer->pConn = taosAllocateTcpConn(tsTcpPool, pPeer, connFd);
@ -1069,7 +1085,7 @@ static void syncCheckPeerConnection(void *param, void *tmrId) {
sDebug("%s, check peer connection", pPeer->id); sDebug("%s, check peer connection", pPeer->id);
syncSetupPeerConnection(pPeer); syncSetupPeerConnection(pPeer);
pthread_mutex_unlock(&(pNode->mutex)); pthread_mutex_unlock(&pNode->mutex);
} }
static void syncCreateRestoreDataThread(SSyncPeer *pPeer) { static void syncCreateRestoreDataThread(SSyncPeer *pPeer) {
@ -1135,7 +1151,8 @@ static void syncProcessIncommingConnection(int32_t connFd, uint32_t sourceIp) {
pPeer->syncFd = connFd; pPeer->syncFd = connFd;
syncCreateRestoreDataThread(pPeer); syncCreateRestoreDataThread(pPeer);
} else { } else {
sDebug("%s, TCP connection is already up, close one", pPeer->id); sDebug("%s, TCP connection is already up(pfd:%d), close one, new pfd:%d sfd:%d", pPeer->id, pPeer->peerFd, connFd,
pPeer->syncFd);
syncClosePeerConn(pPeer); syncClosePeerConn(pPeer);
pPeer->peerFd = connFd; pPeer->peerFd = connFd;
pPeer->pConn = taosAllocateTcpConn(tsTcpPool, pPeer, connFd); pPeer->pConn = taosAllocateTcpConn(tsTcpPool, pPeer, connFd);
@ -1145,7 +1162,7 @@ static void syncProcessIncommingConnection(int32_t connFd, uint32_t sourceIp) {
} }
} }
pthread_mutex_unlock(&(pNode->mutex)); pthread_mutex_unlock(&pNode->mutex);
} }
static void syncProcessBrokenLink(void *param) { static void syncProcessBrokenLink(void *param) {
@ -1156,14 +1173,14 @@ static void syncProcessBrokenLink(void *param) {
if (taosAcquireRef(tsSyncRefId, pNode->rid) == NULL) return; if (taosAcquireRef(tsSyncRefId, pNode->rid) == NULL) return;
pthread_mutex_lock(&pNode->mutex); pthread_mutex_lock(&pNode->mutex);
sDebug("%s, TCP link is broken since %s", pPeer->id, strerror(errno)); sDebug("%s, TCP link is broken since %s, pfd:%d sfd:%d", pPeer->id, strerror(errno), pPeer->peerFd, pPeer->syncFd);
pPeer->peerFd = -1; pPeer->peerFd = -1;
if (syncDecPeerRef(pPeer) != 0) { if (syncDecPeerRef(pPeer) != 0) {
syncRestartConnection(pPeer); syncRestartConnection(pPeer);
} }
pthread_mutex_unlock(&(pNode->mutex)); pthread_mutex_unlock(&pNode->mutex);
taosReleaseRef(tsSyncRefId, pNode->rid); taosReleaseRef(tsSyncRefId, pNode->rid);
} }
@ -1239,10 +1256,13 @@ static void syncMonitorNodeRole(void *param, void *tmrId) {
if (index == pNode->selfIndex) continue; if (index == pNode->selfIndex) continue;
SSyncPeer *pPeer = pNode->peerInfo[index]; SSyncPeer *pPeer = pNode->peerInfo[index];
if (pPeer->role > TAOS_SYNC_ROLE_UNSYNCED && nodeRole > TAOS_SYNC_ROLE_UNSYNCED) continue; if (/*pPeer->role > TAOS_SYNC_ROLE_UNSYNCED && */ nodeRole > TAOS_SYNC_ROLE_UNSYNCED) continue;
if (pPeer->sstatus > TAOS_SYNC_STATUS_INIT || nodeSStatus > TAOS_SYNC_STATUS_INIT) continue; if (/*pPeer->sstatus > TAOS_SYNC_STATUS_INIT || */ nodeSStatus > TAOS_SYNC_STATUS_INIT) continue;
sDebug("%s, check roles since self:%s sstatus:%s, peer:%s sstatus:%s", pPeer->id, syncRole[pPeer->role],
syncStatus[pPeer->sstatus], syncRole[nodeRole], syncStatus[nodeSStatus]);
syncSendPeersStatusMsgToPeer(pPeer, 1, SYNC_STATUS_CHECK_ROLE, syncGenTranId()); syncSendPeersStatusMsgToPeer(pPeer, 1, SYNC_STATUS_CHECK_ROLE, syncGenTranId());
break;
} }
pNode->pRoleTimer = taosTmrStart(syncMonitorNodeRole, SYNC_ROLE_TIMER, (void *)pNode->rid, tsSyncTmrCtrl); pNode->pRoleTimer = taosTmrStart(syncMonitorNodeRole, SYNC_ROLE_TIMER, (void *)pNode->rid, tsSyncTmrCtrl);
@ -1271,7 +1291,7 @@ static void syncMonitorFwdInfos(void *param, void *tmrId) {
} }
syncRemoveConfirmedFwdInfo(pNode); syncRemoveConfirmedFwdInfo(pNode);
pthread_mutex_unlock(&(pNode->mutex)); pthread_mutex_unlock(&pNode->mutex);
} }
pNode->pFwdTimer = taosTmrStart(syncMonitorFwdInfos, SYNC_FWD_TIMER, (void *)pNode->rid, tsSyncTmrCtrl); pNode->pFwdTimer = taosTmrStart(syncMonitorFwdInfos, SYNC_FWD_TIMER, (void *)pNode->rid, tsSyncTmrCtrl);
@ -1339,7 +1359,7 @@ static int32_t syncForwardToPeerImpl(SSyncNode *pNode, void *data, void *mhandle
} }
} }
pthread_mutex_unlock(&(pNode->mutex)); pthread_mutex_unlock(&pNode->mutex);
return code; return code;
} }

View File

@ -65,7 +65,10 @@ static int32_t syncRestoreFile(SSyncPeer *pPeer, uint64_t *fversion) {
while (1) { while (1) {
// read file info // read file info
int32_t ret = taosReadMsg(pPeer->syncFd, &(minfo), sizeof(minfo)); int32_t ret = taosReadMsg(pPeer->syncFd, &(minfo), sizeof(minfo));
if (ret < 0) break; if (ret < 0) {
sError("%s, failed to read file info while restore file since %s", pPeer->id, strerror(errno));
break;
}
// if no more file from master, break; // if no more file from master, break;
if (minfo.name[0] == 0 || minfo.magic == 0) { if (minfo.name[0] == 0 || minfo.magic == 0) {
@ -83,7 +86,7 @@ static int32_t syncRestoreFile(SSyncPeer *pPeer, uint64_t *fversion) {
// check the file info // check the file info
sinfo = minfo; sinfo = minfo;
sDebug("%s, get file info:%s", pPeer->id, minfo.name); sDebug("%s, get file:%s info size:%" PRId64, pPeer->id, minfo.name, minfo.size);
sinfo.magic = (*pNode->getFileInfo)(pNode->vgId, sinfo.name, &sinfo.index, TAOS_SYNC_MAX_INDEX, &sinfo.size, sinfo.magic = (*pNode->getFileInfo)(pNode->vgId, sinfo.name, &sinfo.index, TAOS_SYNC_MAX_INDEX, &sinfo.size,
&sinfo.fversion); &sinfo.fversion);
@ -92,8 +95,11 @@ static int32_t syncRestoreFile(SSyncPeer *pPeer, uint64_t *fversion) {
fileAck.sync = (sinfo.magic != minfo.magic || sinfo.name[0] == 0) ? 1 : 0; fileAck.sync = (sinfo.magic != minfo.magic || sinfo.name[0] == 0) ? 1 : 0;
// send file ack // send file ack
ret = taosWriteMsg(pPeer->syncFd, &(fileAck), sizeof(fileAck)); ret = taosWriteMsg(pPeer->syncFd, &fileAck, sizeof(fileAck));
if (ret < 0) break; if (ret < 0) {
sError("%s, failed to write file:%s ack while restore file since %s", pPeer->id, minfo.name, strerror(errno));
break;
}
// if sync is not required, continue // if sync is not required, continue
if (fileAck.sync == 0) { if (fileAck.sync == 0) {
@ -108,14 +114,17 @@ static int32_t syncRestoreFile(SSyncPeer *pPeer, uint64_t *fversion) {
int32_t dfd = open(name, O_WRONLY | O_CREAT | O_TRUNC, S_IRWXU | S_IRWXG | S_IRWXO); int32_t dfd = open(name, O_WRONLY | O_CREAT | O_TRUNC, S_IRWXU | S_IRWXG | S_IRWXO);
if (dfd < 0) { if (dfd < 0) {
sError("%s, failed to open file:%s", pPeer->id, name); sError("%s, failed to open file:%s while restore file since %s", pPeer->id, minfo.name, strerror(errno));
break; break;
} }
ret = taosCopyFds(pPeer->syncFd, dfd, minfo.size); ret = taosCopyFds(pPeer->syncFd, dfd, minfo.size);
fsync(dfd); fsync(dfd);
close(dfd); close(dfd);
if (ret < 0) break; if (ret < 0) {
sError("%s, failed to copy file:%s while restore file since %s", pPeer->id, minfo.name, strerror(errno));
break;
}
fileChanged = true; fileChanged = true;
sDebug("%s, %s is received, size:%" PRId64, pPeer->id, minfo.name, minfo.size); sDebug("%s, %s is received, size:%" PRId64, pPeer->id, minfo.name, minfo.size);
@ -125,6 +134,7 @@ static int32_t syncRestoreFile(SSyncPeer *pPeer, uint64_t *fversion) {
// data file is changed, code shall be set to 1 // data file is changed, code shall be set to 1
*fversion = minfo.fversion; *fversion = minfo.fversion;
code = 1; code = 1;
sDebug("%s, file changed while restore file", pPeer->id);
} }
if (code < 0) { if (code < 0) {
@ -146,15 +156,22 @@ static int32_t syncRestoreWal(SSyncPeer *pPeer) {
while (1) { while (1) {
ret = taosReadMsg(pPeer->syncFd, pHead, sizeof(SWalHead)); ret = taosReadMsg(pPeer->syncFd, pHead, sizeof(SWalHead));
if (ret < 0) break; if (ret < 0) {
sError("%s, failed to read walhead while restore wal since %s", pPeer->id, strerror(errno));
break;
}
if (pHead->len == 0) { if (pHead->len == 0) {
sDebug("%s, wal is synced over", pPeer->id);
code = 0; code = 0;
break; break;
} // wal sync over } // wal sync over
ret = taosReadMsg(pPeer->syncFd, pHead->cont, pHead->len); ret = taosReadMsg(pPeer->syncFd, pHead->cont, pHead->len);
if (ret < 0) break; if (ret < 0) {
sError("%s, failed to read walcont, len:%d while restore wal since %s", pPeer->id, pHead->len, strerror(errno));
break;
}
sTrace("%s, restore a record, qtype:wal len:%d hver:%" PRIu64, pPeer->id, pHead->len, pHead->version); sTrace("%s, restore a record, qtype:wal len:%d hver:%" PRIu64, pPeer->id, pHead->len, pHead->version);
@ -267,7 +284,7 @@ static int32_t syncRestoreDataStepByStep(SSyncPeer *pPeer) {
nodeSStatus = TAOS_SYNC_STATUS_FILE; nodeSStatus = TAOS_SYNC_STATUS_FILE;
uint64_t fversion = 0; uint64_t fversion = 0;
sDebug("%s, start to restore file", pPeer->id); sDebug("%s, start to restore file, set sstatus:%s", pPeer->id, syncStatus[nodeSStatus]);
int32_t code = syncRestoreFile(pPeer, &fversion); int32_t code = syncRestoreFile(pPeer, &fversion);
if (code < 0) { if (code < 0) {
sError("%s, failed to restore file", pPeer->id); sError("%s, failed to restore file", pPeer->id);
@ -291,7 +308,7 @@ static int32_t syncRestoreDataStepByStep(SSyncPeer *pPeer) {
} }
nodeSStatus = TAOS_SYNC_STATUS_CACHE; nodeSStatus = TAOS_SYNC_STATUS_CACHE;
sDebug("%s, start to insert buffered points", pPeer->id); sDebug("%s, start to insert buffered points, set sstatus:%s", pPeer->id, syncStatus[nodeSStatus]);
if (syncProcessBufferedFwd(pPeer) < 0) { if (syncProcessBufferedFwd(pPeer) < 0) {
sError("%s, failed to insert buffered points", pPeer->id); sError("%s, failed to insert buffered points", pPeer->id);
return -1; return -1;
@ -327,6 +344,8 @@ void *syncRestoreData(void *param) {
(*pNode->notifyRole)(pNode->vgId, nodeRole); (*pNode->notifyRole)(pNode->vgId, nodeRole);
nodeSStatus = TAOS_SYNC_STATUS_INIT; nodeSStatus = TAOS_SYNC_STATUS_INIT;
sInfo("%s, sync over, set sstatus:%s", pPeer->id, syncStatus[nodeSStatus]);
taosClose(pPeer->syncFd); taosClose(pPeer->syncFd);
syncCloseRecvBuffer(pNode); syncCloseRecvBuffer(pNode);
__sync_fetch_and_sub(&tsSyncNum, 1); __sync_fetch_and_sub(&tsSyncNum, 1);

View File

@ -114,7 +114,10 @@ static int32_t syncRetrieveFile(SSyncPeer *pPeer) {
// send the file info // send the file info
int32_t ret = taosWriteMsg(pPeer->syncFd, &(fileInfo), sizeof(fileInfo)); int32_t ret = taosWriteMsg(pPeer->syncFd, &(fileInfo), sizeof(fileInfo));
if (ret < 0) break; if (ret < 0) {
sError("%s, failed to write file:%s info while retrieve file since %s", pPeer->id, fileInfo.name, strerror(errno));
break;
}
// if no file anymore, break // if no file anymore, break
if (fileInfo.magic == 0 || fileInfo.name[0] == 0) { if (fileInfo.magic == 0 || fileInfo.name[0] == 0) {
@ -124,8 +127,11 @@ static int32_t syncRetrieveFile(SSyncPeer *pPeer) {
} }
// wait for the ack from peer // wait for the ack from peer
ret = taosReadMsg(pPeer->syncFd, &(fileAck), sizeof(fileAck)); ret = taosReadMsg(pPeer->syncFd, &fileAck, sizeof(fileAck));
if (ret < 0) break; if (ret < 0) {
sError("%s, failed to read file:%s ack while retrieve file since %s", pPeer->id, fileInfo.name, strerror(errno));
break;
}
// set the peer sync version // set the peer sync version
pPeer->sversion = fileInfo.fversion; pPeer->sversion = fileInfo.fversion;
@ -134,7 +140,10 @@ static int32_t syncRetrieveFile(SSyncPeer *pPeer) {
snprintf(name, sizeof(name), "%s/%s", pNode->path, fileInfo.name); snprintf(name, sizeof(name), "%s/%s", pNode->path, fileInfo.name);
// add the file into watch list // add the file into watch list
if (syncAddIntoWatchList(pPeer, name) < 0) break; if (syncAddIntoWatchList(pPeer, name) < 0) {
sError("%s, failed to watch file:%s while retrieve file since %s", pPeer->id, fileInfo.name, strerror(errno));
break;
}
// if sync is not required, continue // if sync is not required, continue
if (fileAck.sync == 0) { if (fileAck.sync == 0) {
@ -145,21 +154,30 @@ static int32_t syncRetrieveFile(SSyncPeer *pPeer) {
// send the file to peer // send the file to peer
int32_t sfd = open(name, O_RDONLY); int32_t sfd = open(name, O_RDONLY);
if (sfd < 0) break; if (sfd < 0) {
sError("%s, failed to open file:%s while retrieve file since %s", pPeer->id, fileInfo.name, strerror(errno));
break;
}
ret = taosSendFile(pPeer->syncFd, sfd, NULL, fileInfo.size); ret = taosSendFile(pPeer->syncFd, sfd, NULL, fileInfo.size);
close(sfd); close(sfd);
if (ret < 0) break; if (ret < 0) {
sError("%s, failed to send file:%s while retrieve file since %s", pPeer->id, fileInfo.name, strerror(errno));
break;
}
sDebug("%s, %s is sent, size:%" PRId64, pPeer->id, name, fileInfo.size); sDebug("%s, %s is sent, size:%" PRId64, pPeer->id, name, fileInfo.size);
fileInfo.index++; fileInfo.index++;
// check if processed files are modified // check if processed files are modified
if (syncAreFilesModified(pPeer) != 0) break; if (syncAreFilesModified(pPeer) != 0) {
sInfo("%s, file:%s are modified while retrieve file since %s", pPeer->id, fileInfo.name, strerror(errno));
break;
}
} }
if (code < 0) { if (code < 0) {
sError("%s, failed to retrieve file since %s", pPeer->id, strerror(errno)); sError("%s, failed to retrieve file", pPeer->id);
} }
return code; return code;
@ -318,6 +336,7 @@ static int32_t syncProcessLastWal(SSyncPeer *pPeer, char *wname, int64_t index)
if (((event & IN_MODIFY) == 0) || once) { if (((event & IN_MODIFY) == 0) || once) {
if (fversion == 0) { if (fversion == 0) {
pPeer->sstatus = TAOS_SYNC_STATUS_CACHE; // start to forward pkt pPeer->sstatus = TAOS_SYNC_STATUS_CACHE; // start to forward pkt
sDebug("%s, fversion is 0 then set sstatus:%s", pPeer->id, syncStatus[pPeer->sstatus]);
fversion = nodeVersion; // must read data to fversion fversion = nodeVersion; // must read data to fversion
} }
} }
@ -416,8 +435,9 @@ static int32_t syncRetrieveWal(SSyncPeer *pPeer) {
} }
if (code == 0) { if (code == 0) {
sInfo("%s, wal retrieve is finished", pPeer->id);
pPeer->sstatus = TAOS_SYNC_STATUS_CACHE; pPeer->sstatus = TAOS_SYNC_STATUS_CACHE;
sInfo("%s, wal retrieve is finished, set sstatus:%s", pPeer->id, syncStatus[pPeer->sstatus]);
SWalHead walHead; SWalHead walHead;
memset(&walHead, 0, sizeof(walHead)); memset(&walHead, 0, sizeof(walHead));
code = taosWriteMsg(pPeer->syncFd, &walHead, sizeof(walHead)); code = taosWriteMsg(pPeer->syncFd, &walHead, sizeof(walHead));
@ -445,7 +465,7 @@ static int32_t syncRetrieveDataStepByStep(SSyncPeer *pPeer) {
pPeer->sversion = 0; pPeer->sversion = 0;
pPeer->sstatus = TAOS_SYNC_STATUS_FILE; pPeer->sstatus = TAOS_SYNC_STATUS_FILE;
sInfo("%s, start to retrieve file", pPeer->id); sInfo("%s, start to retrieve file, set sstatus:%s", pPeer->id, syncStatus[pPeer->sstatus]);
if (syncRetrieveFile(pPeer) < 0) { if (syncRetrieveFile(pPeer) < 0) {
sError("%s, failed to retrieve file", pPeer->id); sError("%s, failed to retrieve file", pPeer->id);
return -1; return -1;

View File

@ -107,7 +107,7 @@ int32_t taosWriteMsg(SOCKET fd, void *buf, int32_t nbytes) {
while (nleft > 0) { while (nleft > 0) {
nwritten = (int32_t)taosWriteSocket(fd, (char *)ptr, (size_t)nleft); nwritten = (int32_t)taosWriteSocket(fd, (char *)ptr, (size_t)nleft);
if (nwritten <= 0) { if (nwritten <= 0) {
if (errno == EINTR || errno == EAGAIN || errno == EWOULDBLOCK) if (errno == EINTR /* || errno == EAGAIN || errno == EWOULDBLOCK */)
continue; continue;
else else
return -1; return -1;
@ -133,7 +133,7 @@ int32_t taosReadMsg(SOCKET fd, void *buf, int32_t nbytes) {
if (nread == 0) { if (nread == 0) {
break; break;
} else if (nread < 0) { } else if (nread < 0) {
if (errno == EINTR || errno == EAGAIN || errno == EWOULDBLOCK) { if (errno == EINTR/* || errno == EAGAIN || errno == EWOULDBLOCK*/) {
continue; continue;
} else { } else {
return -1; return -1;