fix: restart snapshot sender on receiver is restart
This commit is contained in:
parent
773423a6b6
commit
b59bee6696
|
@ -112,7 +112,7 @@ SyncTerm syncLogReplMgrGetPrevLogTerm(SSyncLogReplMgr* pMgr, SSyncNode* pNode, S
|
|||
return prevLogTerm;
|
||||
}
|
||||
|
||||
sError("vgId:%d, failed to get log term since %s. index: %" PRId64 "", pNode->vgId, terrstr(), prevIndex);
|
||||
sInfo("vgId:%d, failed to get log term since %s. index:%" PRId64, pNode->vgId, terrstr(), prevIndex);
|
||||
terrno = TSDB_CODE_WAL_LOG_NOT_EXIST;
|
||||
return -1;
|
||||
}
|
||||
|
|
|
@ -115,8 +115,8 @@ static int32_t raftLogRestoreFromSnapshot(struct SSyncLogStore* pLogStore, SyncI
|
|||
const char* sysErrStr = strerror(errno);
|
||||
|
||||
sNError(pData->pSyncNode,
|
||||
"wal restore from snapshot error, index:%" PRId64 ", err:%d %X, msg:%s, syserr:%d, sysmsg:%s",
|
||||
snapshotIndex, err, err, errStr, sysErr, sysErrStr);
|
||||
"wal restore from snapshot error, index:%" PRId64 ", err:0x%x, msg:%s, syserr:%d, sysmsg:%s", snapshotIndex,
|
||||
err, errStr, sysErr, sysErrStr);
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
@ -212,8 +212,8 @@ static int32_t raftLogAppendEntry(struct SSyncLogStore* pLogStore, SSyncRaftEntr
|
|||
int32_t sysErr = errno;
|
||||
const char* sysErrStr = strerror(errno);
|
||||
|
||||
sNError(pData->pSyncNode, "wal write error, index:%" PRId64 ", err:%d %X, msg:%s, syserr:%d, sysmsg:%s",
|
||||
pEntry->index, err, err, errStr, sysErr, sysErrStr);
|
||||
sNError(pData->pSyncNode, "wal write error, index:%" PRId64 ", err:0x%x, msg:%s, syserr:%d, sysmsg:%s",
|
||||
pEntry->index, err, errStr, sysErr, sysErrStr);
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
@ -257,11 +257,11 @@ int32_t raftLogGetEntry(struct SSyncLogStore* pLogStore, SyncIndex index, SSyncR
|
|||
const char* sysErrStr = strerror(errno);
|
||||
|
||||
if (terrno == TSDB_CODE_WAL_LOG_NOT_EXIST) {
|
||||
sNTrace(pData->pSyncNode, "wal read not exist, index:%" PRId64 ", err:%d %X, msg:%s, syserr:%d, sysmsg:%s", index,
|
||||
err, err, errStr, sysErr, sysErrStr);
|
||||
sNTrace(pData->pSyncNode, "wal read not exist, index:%" PRId64 ", err:0x%x, msg:%s, syserr:%d, sysmsg:%s", index,
|
||||
err, errStr, sysErr, sysErrStr);
|
||||
} else {
|
||||
sNTrace(pData->pSyncNode, "wal read error, index:%" PRId64 ", err:%d %X, msg:%s, syserr:%d, sysmsg:%s", index,
|
||||
err, err, errStr, sysErr, sysErrStr);
|
||||
sNTrace(pData->pSyncNode, "wal read error, index:%" PRId64 ", err:0x%x, msg:%s, syserr:%d, sysmsg:%s", index, err,
|
||||
errStr, sysErr, sysErrStr);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -341,8 +341,8 @@ static int32_t raftLogTruncate(struct SSyncLogStore* pLogStore, SyncIndex fromIn
|
|||
const char* errStr = tstrerror(err);
|
||||
int32_t sysErr = errno;
|
||||
const char* sysErrStr = strerror(errno);
|
||||
sError("vgId:%d, wal truncate error, from-index:%" PRId64 ", err:%d %X, msg:%s, syserr:%d, sysmsg:%s",
|
||||
pData->pSyncNode->vgId, fromIndex, err, err, errStr, sysErr, sysErrStr);
|
||||
sError("vgId:%d, wal truncate error, from-index:%" PRId64 ", err:0x%x, msg:%s, syserr:%d, sysmsg:%s",
|
||||
pData->pSyncNode->vgId, fromIndex, err, errStr, sysErr, sysErrStr);
|
||||
}
|
||||
|
||||
// event log
|
||||
|
@ -392,8 +392,8 @@ int32_t raftLogUpdateCommitIndex(SSyncLogStore* pLogStore, SyncIndex index) {
|
|||
const char* errStr = tstrerror(err);
|
||||
int32_t sysErr = errno;
|
||||
const char* sysErrStr = strerror(errno);
|
||||
sError("vgId:%d, wal update commit index error, index:%" PRId64 ", err:%d %X, msg:%s, syserr:%d, sysmsg:%s",
|
||||
pData->pSyncNode->vgId, index, err, err, errStr, sysErr, sysErrStr);
|
||||
sError("vgId:%d, wal update commit index error, index:%" PRId64 ", err:0x%x, msg:%s, syserr:%d, sysmsg:%s",
|
||||
pData->pSyncNode->vgId, index, err, errStr, sysErr, sysErrStr);
|
||||
return -1;
|
||||
}
|
||||
return 0;
|
||||
|
|
|
@ -747,7 +747,7 @@ static int32_t syncNodeOnSnapshotTransfering(SSyncNode *pSyncNode, SyncSnapshotS
|
|||
pRspMsg->snapBeginIndex = pReceiver->snapshotParam.start;
|
||||
|
||||
// send msg
|
||||
syncLogSendSyncSnapshotRsp(pSyncNode, pRspMsg, "snapshot receiver receiving");
|
||||
syncLogSendSyncSnapshotRsp(pSyncNode, pRspMsg, "snapshot receiver received");
|
||||
if (syncNodeSendMsgById(&pRspMsg->destId, pSyncNode, &rpcMsg) != 0) {
|
||||
sRError(pReceiver, "snapshot receiver send resp failed since %s", terrstr());
|
||||
return -1;
|
||||
|
@ -979,32 +979,31 @@ int32_t syncNodeOnSnapshotRsp(SSyncNode *pSyncNode, const SRpcMsg *pRpcMsg) {
|
|||
return -1;
|
||||
}
|
||||
|
||||
if (pMsg->startTime != pSender->startTime) {
|
||||
syncLogRecvSyncSnapshotRsp(pSyncNode, pMsg, "sender:% " PRId64 " receiver:%" PRId64 " time not match");
|
||||
return -1;
|
||||
}
|
||||
|
||||
// state, term, seq/ack
|
||||
if (pSyncNode->state != TAOS_SYNC_STATE_LEADER) {
|
||||
syncLogRecvSyncSnapshotRsp(pSyncNode, pMsg, "snapshot sender not leader");
|
||||
sSError(pSender, "snapshot sender not leader");
|
||||
return -1;
|
||||
goto _ERROR;
|
||||
}
|
||||
|
||||
if (pMsg->startTime != pSender->startTime) {
|
||||
syncLogRecvSyncSnapshotRsp(pSyncNode, pMsg, "snapshot sender and receiver time not match");
|
||||
sSError(pSender, "sender:%" PRId64 " receiver:%" PRId64 " time not match, code:0x%x", pMsg->startTime,
|
||||
pSender->startTime, pMsg->code);
|
||||
goto _ERROR;
|
||||
}
|
||||
|
||||
if (pMsg->term != pSyncNode->pRaftStore->currentTerm) {
|
||||
sSError(pSender, "snapshot sender term not equal");
|
||||
return -1;
|
||||
syncLogRecvSyncSnapshotRsp(pSyncNode, pMsg, "snapshot sender and receiver term not match");
|
||||
sSError(pSender, "snapshot sender term not equal, msg term:%" PRId64 " currentTerm:%" PRId64, pMsg->term,
|
||||
pSyncNode->pRaftStore->currentTerm);
|
||||
goto _ERROR;
|
||||
}
|
||||
|
||||
if (pMsg->code != 0) {
|
||||
syncLogRecvSyncSnapshotRsp(pSyncNode, pMsg, "receive error code");
|
||||
sSError(pSender, "snapshot sender receive error code:0x%x and stop sender", pMsg->code);
|
||||
snapshotSenderStop(pSender, true);
|
||||
SSyncLogReplMgr *pMgr = syncNodeGetLogReplMgr(pSyncNode, &pMsg->srcId);
|
||||
if (pMgr) {
|
||||
syncLogReplMgrReset(pMgr);
|
||||
}
|
||||
|
||||
return -1;
|
||||
goto _ERROR;
|
||||
}
|
||||
|
||||
// prepare <begin, end>, send begin msg
|
||||
|
@ -1068,4 +1067,14 @@ int32_t syncNodeOnSnapshotRsp(SSyncNode *pSyncNode, const SRpcMsg *pRpcMsg) {
|
|||
}
|
||||
|
||||
return 0;
|
||||
|
||||
_ERROR:
|
||||
snapshotSenderStop(pSender, true);
|
||||
SSyncLogReplMgr *pMgr = syncNodeGetLogReplMgr(pSyncNode, &pMsg->srcId);
|
||||
if (pMgr) {
|
||||
syncLogRecvSyncSnapshotRsp(pSyncNode, pMsg, "reset repl mgr");
|
||||
syncLogReplMgrReset(pMgr);
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue