From 2526df745ee62db8786adcd732e8b0cd4646a723 Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Thu, 28 Dec 2023 17:15:09 +0800 Subject: [PATCH 1/3] enh: adjust threshold of snap replication timeout --- include/libs/sync/sync.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/libs/sync/sync.h b/include/libs/sync/sync.h index e54237fe8b..cb053d2548 100644 --- a/include/libs/sync/sync.h +++ b/include/libs/sync/sync.h @@ -46,8 +46,8 @@ extern "C" { #define SYNC_HEARTBEAT_SLOW_MS 1500 #define SYNC_HEARTBEAT_REPLY_SLOW_MS 1500 -#define SYNC_SNAP_RESEND_MS 1000 * 300 -#define SYNC_SNAP_TIMEOUT_MS 1000 * 1800 +#define SYNC_SNAP_RESEND_MS 1000 * 60 +#define SYNC_SNAP_TIMEOUT_MS 1000 * 300 #define SYNC_VND_COMMIT_MIN_MS 3000 From d83f85bf2642982d8fdcb5bd53b58011cc8f81e1 Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Thu, 28 Dec 2023 17:26:46 +0800 Subject: [PATCH 2/3] enh: send rsp msg on rejecting snap replication due to smaller term --- source/libs/sync/src/syncSnapshot.c | 1 + 1 file changed, 1 insertion(+) diff --git a/source/libs/sync/src/syncSnapshot.c b/source/libs/sync/src/syncSnapshot.c index 10a8734617..f0e457ef8d 100644 --- a/source/libs/sync/src/syncSnapshot.c +++ b/source/libs/sync/src/syncSnapshot.c @@ -1001,6 +1001,7 @@ int32_t syncNodeOnSnapshot(SSyncNode *pSyncNode, SRpcMsg *pRpcMsg) { sRError(pReceiver, "reject snap replication with smaller term. msg term:%" PRId64 ", seq:%d", pMsg->term, pMsg->seq); terrno = TSDB_CODE_SYN_MISMATCHED_SIGNATURE; + syncSnapSendRsp(pReceiver, pMsg, NULL, 0, 0, terrno); return -1; } From baa7f9c895faf0ed12d59c5ff02460ae8eb3f1da Mon Sep 17 00:00:00 2001 From: Benguang Zhao Date: Thu, 28 Dec 2023 17:29:49 +0800 Subject: [PATCH 3/3] enh: adjust error msg as warn for not ready to propose etc --- source/dnode/vnode/src/vnd/vnodeSync.c | 8 ++++---- source/libs/sync/src/syncMain.c | 6 +++--- source/libs/sync/src/syncPipeline.c | 6 +++--- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/source/dnode/vnode/src/vnd/vnodeSync.c b/source/dnode/vnode/src/vnd/vnodeSync.c index 048092131d..0f491e9a58 100644 --- a/source/dnode/vnode/src/vnd/vnodeSync.c +++ b/source/dnode/vnode/src/vnd/vnodeSync.c @@ -220,8 +220,8 @@ void vnodeProposeWriteMsg(SQueueInfo *pInfo, STaosQall *qall, int32_t numOfMsgs) isWeak, isBlock, msg, numOfMsgs, arrayPos, pMsg->info.handle); if (!pVnode->restored) { - vGError("vgId:%d, msg:%p failed to process since restore not finished, type:%s", vgId, pMsg, - TMSG_INFO(pMsg->msgType)); + vGWarn("vgId:%d, msg:%p failed to process since restore not finished, type:%s", vgId, pMsg, + TMSG_INFO(pMsg->msgType)); terrno = TSDB_CODE_SYN_RESTORING; vnodeHandleProposeError(pVnode, pMsg, TSDB_CODE_SYN_RESTORING); rpcFreeCont(pMsg->pCont); @@ -284,8 +284,8 @@ void vnodeProposeWriteMsg(SQueueInfo *pInfo, STaosQall *qall, int32_t numOfMsgs) vnodeIsMsgBlock(pMsg->msgType), msg, numOfMsgs, pMsg->info.handle); if (!pVnode->restored) { - vGError("vgId:%d, msg:%p failed to process since restore not finished, type:%s", vgId, pMsg, - TMSG_INFO(pMsg->msgType)); + vGWarn("vgId:%d, msg:%p failed to process since restore not finished, type:%s", vgId, pMsg, + TMSG_INFO(pMsg->msgType)); vnodeHandleProposeError(pVnode, pMsg, TSDB_CODE_SYN_RESTORING); rpcFreeCont(pMsg->pCont); taosFreeQitem(pMsg); diff --git a/source/libs/sync/src/syncMain.c b/source/libs/sync/src/syncMain.c index 6f3b3fdf98..52557f7b9c 100644 --- a/source/libs/sync/src/syncMain.c +++ b/source/libs/sync/src/syncMain.c @@ -662,14 +662,14 @@ ESyncRole syncGetRole(int64_t rid) { int32_t syncNodePropose(SSyncNode* pSyncNode, SRpcMsg* pMsg, bool isWeak, int64_t* seq) { if (pSyncNode->state != TAOS_SYNC_STATE_LEADER) { terrno = TSDB_CODE_SYN_NOT_LEADER; - sNError(pSyncNode, "sync propose not leader, type:%s", TMSG_INFO(pMsg->msgType)); + sNWarn(pSyncNode, "sync propose not leader, type:%s", TMSG_INFO(pMsg->msgType)); return -1; } if (!pSyncNode->restoreFinish) { terrno = TSDB_CODE_SYN_PROPOSE_NOT_READY; - sNError(pSyncNode, "failed to sync propose since not ready, type:%s, last:%" PRId64 ", cmt:%" PRId64, - TMSG_INFO(pMsg->msgType), syncNodeGetLastIndex(pSyncNode), pSyncNode->commitIndex); + sNWarn(pSyncNode, "failed to sync propose since not ready, type:%s, last:%" PRId64 ", cmt:%" PRId64, + TMSG_INFO(pMsg->msgType), syncNodeGetLastIndex(pSyncNode), pSyncNode->commitIndex); return -1; } diff --git a/source/libs/sync/src/syncPipeline.c b/source/libs/sync/src/syncPipeline.c index 28ee5ba841..70bdd4a837 100644 --- a/source/libs/sync/src/syncPipeline.c +++ b/source/libs/sync/src/syncPipeline.c @@ -206,7 +206,7 @@ int32_t syncLogBufferInitWithoutLock(SSyncLogBuffer* pBuf, SSyncNode* pNode) { } if (pLogStore->syncLogGetEntry(pLogStore, index, &pEntry) < 0) { - sError("vgId:%d, failed to get log entry since %s. index:%" PRId64 "", pNode->vgId, terrstr(), index); + sWarn("vgId:%d, failed to get log entry since %s. index:%" PRId64 "", pNode->vgId, terrstr(), index); break; } @@ -1237,7 +1237,7 @@ SSyncRaftEntry* syncLogBufferGetOneEntry(SSyncLogBuffer* pBuf, SSyncNode* pNode, } else { *pInBuf = false; if (pNode->pLogStore->syncLogGetEntry(pNode->pLogStore, index, &pEntry) < 0) { - sError("vgId:%d, failed to get log entry since %s. index:%" PRId64 "", pNode->vgId, terrstr(), index); + sWarn("vgId:%d, failed to get log entry since %s. index:%" PRId64 "", pNode->vgId, terrstr(), index); } } return pEntry; @@ -1253,7 +1253,7 @@ int32_t syncLogReplSendTo(SSyncLogReplMgr* pMgr, SSyncNode* pNode, SyncIndex ind pEntry = syncLogBufferGetOneEntry(pBuf, pNode, index, &inBuf); if (pEntry == NULL) { - sError("vgId:%d, failed to get raft entry for index:%" PRId64 "", pNode->vgId, index); + sWarn("vgId:%d, failed to get raft entry for index:%" PRId64 "", pNode->vgId, index); if (terrno == TSDB_CODE_WAL_LOG_NOT_EXIST) { SSyncLogReplMgr* pMgr = syncNodeGetLogReplMgr(pNode, pDestId); if (pMgr) {