enh: terminate snap replication on timeout

This commit is contained in:
Benguang Zhao 2023-10-27 19:21:54 +08:00
parent c3f9cae36b
commit 70e261f662
3 changed files with 15 additions and 7 deletions

View File

@ -46,7 +46,7 @@ extern "C" {
#define SYNC_HEARTBEAT_SLOW_MS 1500
#define SYNC_HEARTBEAT_REPLY_SLOW_MS 1500
#define SYNC_SNAP_RESEND_MS 1000 * 60
#define SYNC_SNAP_TIMEOUT_MS 1000 * 180
#define SYNC_SNAP_TIMEOUT_MS 1000 * 600
#define SYNC_VND_COMMIT_MIN_MS 3000

View File

@ -327,7 +327,6 @@ _OUT:;
int32_t snapshotReSend(SSyncSnapshotSender *pSender) {
SSyncSnapBuffer *pSndBuf = pSender->pSndBuf;
int32_t code = -1;
taosThreadMutexLock(&pSndBuf->mutex);
for (int32_t seq = pSndBuf->cursor + 1; seq < pSndBuf->end; ++seq) {
@ -366,12 +365,11 @@ int32_t snapshotReSend(SSyncSnapshotSender *pSender) {
goto _out;
}
pBlk->sendTimeMs = nowMs;
pSender->lastSendTime = nowMs;
}
code = 0;
_out:;
taosThreadMutexUnlock(&pSndBuf->mutex);
return 0;
return code;
}
// return 0, start ok

View File

@ -77,9 +77,19 @@ static int32_t syncNodeTimerRoutine(SSyncNode* ths) {
for (int i = 0; i < ths->peersNum; ++i) {
SSyncSnapshotSender* pSender = syncNodeGetSnapshotSender(ths, &(ths->peersId[i]));
if (pSender != NULL) {
if (ths->isStart && ths->state == TAOS_SYNC_STATE_LEADER && pSender->start &&
timeNow - pSender->lastSendTime > SYNC_SNAP_RESEND_MS) {
snapshotReSend(pSender);
if (ths->isStart && ths->state == TAOS_SYNC_STATE_LEADER && pSender->start) {
int64_t elapsedMs = timeNow - pSender->lastSendTime;
if (elapsedMs < SYNC_SNAP_RESEND_MS) {
continue;
}
if (elapsedMs > SYNC_SNAP_TIMEOUT_MS) {
sSError(pSender, "snap replication timeout, terminate.");
snapshotSenderStop(pSender, false);
} else {
sSWarn(pSender, "snap replication resend.");
snapshotReSend(pSender);
}
}
}
}