From b0f048a3f2d539fc54003f1820cc3595b863fa3b Mon Sep 17 00:00:00 2001 From: Shengliang Guan Date: Wed, 15 Jun 2022 20:57:55 +0800 Subject: [PATCH 1/2] feat: redistribute vgroup --- source/dnode/mnode/impl/src/mndTrans.c | 19 +++++++++++++++---- source/dnode/vnode/src/vnd/vnodeSync.c | 14 +++++++++++++- ...distribute_vgroup_replica3_v1_follower.sim | 19 +++++++++++-------- 3 files changed, 39 insertions(+), 13 deletions(-) diff --git a/source/dnode/mnode/impl/src/mndTrans.c b/source/dnode/mnode/impl/src/mndTrans.c index 4be40e6a5c..5b819ab092 100644 --- a/source/dnode/mnode/impl/src/mndTrans.c +++ b/source/dnode/mnode/impl/src/mndTrans.c @@ -1078,6 +1078,8 @@ static int32_t mndTransExecuteRedoActionsSerial(SMnode *pMnode, STrans *pTrans) if (numOfActions == 0) return code; if (pTrans->redoActionPos >= numOfActions) return code; + int32_t retryTimes = 0; + for (int32_t action = pTrans->redoActionPos; action < numOfActions; ++action) { STransAction *pAction = taosArrayGet(pTrans->redoActions, pTrans->redoActionPos); @@ -1126,17 +1128,26 @@ static int32_t mndTransExecuteRedoActionsSerial(SMnode *pMnode, STrans *pTrans) pTrans->code = terrno; mError("trans:%d, %s:%d is executed and failed to sync to other mnodes since %s", pTrans->id, mndTransStr(pAction->stage), pAction->id, terrstr()); - break; + if (retryTimes >= 3) + break; + else + continue; } + retryTimes = 0; } else if (code == TSDB_CODE_ACTION_IN_PROGRESS) { mDebug("trans:%d, %s:%d is in progress and wait it finish", pTrans->id, mndTransStr(pAction->stage), pAction->id); break; } else { terrno = code; pTrans->code = code; - mError("trans:%d, %s:%d failed to execute since %s", pTrans->id, mndTransStr(pAction->stage), pAction->id, - terrstr()); - break; + pAction->epSet.inUse++; + if (pAction->epSet.inUse >= pAction->epSet.numOfEps) pAction->epSet.inUse = 0; + mError("trans:%d, %s:%d failed to execute since %s, inUse set to %d", pTrans->id, mndTransStr(pAction->stage), + pAction->id, terrstr(), pAction->epSet.inUse); + if (retryTimes >= 3) + break; + else + continue; } } diff --git a/source/dnode/vnode/src/vnd/vnodeSync.c b/source/dnode/vnode/src/vnd/vnodeSync.c index 975dff9fb6..ce0c21bc35 100644 --- a/source/dnode/vnode/src/vnd/vnodeSync.c +++ b/source/dnode/vnode/src/vnd/vnodeSync.c @@ -72,7 +72,19 @@ static int32_t vnodeProcessAlterReplicaReq(SVnode *pVnode, SRpcMsg *pMsg) { return -1; } - return syncPropose(pVnode->sync, &rpcMsg, false); + int32_t code = syncPropose(pVnode->sync, &rpcMsg, false); + if (code != 0) { + vDebug("vgId:%d, failed to propose reconfig msg since %s", TD_VID(pVnode), terrstr()); + if (syncLeaderTransfer(pVnode->sync) != 0) { + vError("vgId:%d, failed to transfer leader since %s", TD_VID(pVnode), terrstr()); + } else { + vDebug("vgId:%d, transfer leader success, propose reconfig config again", TD_VID(pVnode)); + taosMsleep(10); + code = syncPropose(pVnode->sync, &rpcMsg, false); + } + } + + return code; } void vnodeProposeMsg(SQueueInfo *pInfo, STaosQall *qall, int32_t numOfMsgs) { diff --git a/tests/script/tsim/dnode/redistribute_vgroup_replica3_v1_follower.sim b/tests/script/tsim/dnode/redistribute_vgroup_replica3_v1_follower.sim index 73be4a35bf..3532332174 100644 --- a/tests/script/tsim/dnode/redistribute_vgroup_replica3_v1_follower.sim +++ b/tests/script/tsim/dnode/redistribute_vgroup_replica3_v1_follower.sim @@ -4,6 +4,11 @@ system sh/deploy.sh -n dnode2 -i 2 system sh/deploy.sh -n dnode3 -i 3 system sh/deploy.sh -n dnode4 -i 4 system sh/deploy.sh -n dnode5 -i 5 +system sh/cfg.sh -n dnode1 -c transPullupInterval -v 1 +system sh/cfg.sh -n dnode2 -c transPullupInterval -v 1 +system sh/cfg.sh -n dnode3 -c transPullupInterval -v 1 +system sh/cfg.sh -n dnode4 -c transPullupInterval -v 1 +system sh/cfg.sh -n dnode5 -c transPullupInterval -v 1 system sh/cfg.sh -n dnode1 -c supportVnodes -v 0 system sh/exec.sh -n dnode1 -s start system sh/exec.sh -n dnode2 -s start @@ -128,9 +133,9 @@ if $rows != 1 then endi if $data(2)[4] == leader then $leaderExist = 1 - $leaderVnode = 4 - $follower1 = 2 - $follower2 = 3 + $leaderVnode = 2 + $follower1 = 3 + $follower2 = 4 endi if $data(2)[6] == leader then $leaderExist = 1 @@ -140,9 +145,9 @@ if $data(2)[6] == leader then endi if $data(2)[8] == leader then $leaderExist = 1 - $leaderVnode = 2 - $follower1 = 3 - $follower2 = 4 + $leaderVnode = 4 + $follower1 = 2 + $follower2 = 3 endi if $leaderExist != 1 then goto step3 @@ -171,8 +176,6 @@ if $rows != 1 then return -1 endi -return - print =============== step33: move follower1 print redistribute vgroup 2 dnode $leaderVnode dnode $follower1 dnode 5 sql redistribute vgroup 2 dnode $leaderVnode dnode $follower1 dnode 5 From fdee8a0cb37910013b7de71a8452ea98f5163b54 Mon Sep 17 00:00:00 2001 From: Shengliang Guan Date: Thu, 16 Jun 2022 09:55:03 +0800 Subject: [PATCH 2/2] feat: redistribute vgroup --- source/dnode/mnode/impl/src/mndTrans.c | 19 +++---------------- source/dnode/vnode/src/vnd/vnodeSync.c | 2 -- 2 files changed, 3 insertions(+), 18 deletions(-) diff --git a/source/dnode/mnode/impl/src/mndTrans.c b/source/dnode/mnode/impl/src/mndTrans.c index 5b819ab092..dafb2fa1d1 100644 --- a/source/dnode/mnode/impl/src/mndTrans.c +++ b/source/dnode/mnode/impl/src/mndTrans.c @@ -873,7 +873,8 @@ static void mndTransResetActions(SMnode *pMnode, STrans *pTrans, SArray *pArray) pAction->rawWritten = 0; pAction->msgSent = 0; pAction->msgReceived = 0; - if (pAction->errCode == TSDB_CODE_RPC_REDIRECT) { + if (pAction->errCode == TSDB_CODE_RPC_REDIRECT || pAction->errCode == TSDB_CODE_SYN_NOT_IN_NEW_CONFIG || + pAction->errCode == TSDB_CODE_SYN_INTERNAL_ERROR || pAction->errCode == TSDB_CODE_SYN_NOT_LEADER) { pAction->epSet.inUse = (pAction->epSet.inUse + 1) % pAction->epSet.numOfEps; mDebug("trans:%d, %s:%d execute status is reset and set epset inuse:%d", pTrans->id, mndTransStr(pAction->stage), action, pAction->epSet.inUse); @@ -1078,8 +1079,6 @@ static int32_t mndTransExecuteRedoActionsSerial(SMnode *pMnode, STrans *pTrans) if (numOfActions == 0) return code; if (pTrans->redoActionPos >= numOfActions) return code; - int32_t retryTimes = 0; - for (int32_t action = pTrans->redoActionPos; action < numOfActions; ++action) { STransAction *pAction = taosArrayGet(pTrans->redoActions, pTrans->redoActionPos); @@ -1128,26 +1127,14 @@ static int32_t mndTransExecuteRedoActionsSerial(SMnode *pMnode, STrans *pTrans) pTrans->code = terrno; mError("trans:%d, %s:%d is executed and failed to sync to other mnodes since %s", pTrans->id, mndTransStr(pAction->stage), pAction->id, terrstr()); - if (retryTimes >= 3) - break; - else - continue; } - retryTimes = 0; } else if (code == TSDB_CODE_ACTION_IN_PROGRESS) { mDebug("trans:%d, %s:%d is in progress and wait it finish", pTrans->id, mndTransStr(pAction->stage), pAction->id); break; } else { terrno = code; pTrans->code = code; - pAction->epSet.inUse++; - if (pAction->epSet.inUse >= pAction->epSet.numOfEps) pAction->epSet.inUse = 0; - mError("trans:%d, %s:%d failed to execute since %s, inUse set to %d", pTrans->id, mndTransStr(pAction->stage), - pAction->id, terrstr(), pAction->epSet.inUse); - if (retryTimes >= 3) - break; - else - continue; + break; } } diff --git a/source/dnode/vnode/src/vnd/vnodeSync.c b/source/dnode/vnode/src/vnd/vnodeSync.c index ce0c21bc35..25661d2e69 100644 --- a/source/dnode/vnode/src/vnd/vnodeSync.c +++ b/source/dnode/vnode/src/vnd/vnodeSync.c @@ -79,8 +79,6 @@ static int32_t vnodeProcessAlterReplicaReq(SVnode *pVnode, SRpcMsg *pMsg) { vError("vgId:%d, failed to transfer leader since %s", TD_VID(pVnode), terrstr()); } else { vDebug("vgId:%d, transfer leader success, propose reconfig config again", TD_VID(pVnode)); - taosMsleep(10); - code = syncPropose(pVnode->sync, &rpcMsg, false); } }