From 2c28cdcbb59c935db1b9ddf88c96f49d6776b910 Mon Sep 17 00:00:00 2001 From: Shengliang Guan Date: Fri, 12 Aug 2022 18:49:28 +0800 Subject: [PATCH] fix: deadlock of mnode if its state changed --- source/dnode/mnode/impl/src/mndMnode.c | 2 ++ source/dnode/mnode/impl/src/mndSync.c | 23 +++++++++++++++-------- source/dnode/vnode/src/vnd/vnodeSync.c | 7 +++++++ 3 files changed, 24 insertions(+), 8 deletions(-) diff --git a/source/dnode/mnode/impl/src/mndMnode.c b/source/dnode/mnode/impl/src/mndMnode.c index 13655ac21f..4f07d9e014 100644 --- a/source/dnode/mnode/impl/src/mndMnode.c +++ b/source/dnode/mnode/impl/src/mndMnode.c @@ -742,7 +742,9 @@ static int32_t mndProcessAlterMnodeReq(SRpcMsg *pReq) { return code; } else { pMgmt->errCode = 0; + taosWLockLatch(&pMgmt->lock); pMgmt->transId = -1; + taosWUnLockLatch(&pMgmt->lock); tsem_wait(&pMgmt->syncSem); mInfo("alter mnode sync result:0x%x %s", pMgmt->errCode, tstrerror(pMgmt->errCode)); terrno = pMgmt->errCode; diff --git a/source/dnode/mnode/impl/src/mndSync.c b/source/dnode/mnode/impl/src/mndSync.c index 03e5c2b3a2..e899f71052 100644 --- a/source/dnode/mnode/impl/src/mndSync.c +++ b/source/dnode/mnode/impl/src/mndSync.c @@ -60,22 +60,22 @@ void mndSyncCommitMsg(struct SSyncFSM *pFsm, const SRpcMsg *pMsg, SFsmCbMeta cbM sdbSetApplyInfo(pMnode->pSdb, cbMeta.index, cbMeta.term, cbMeta.lastConfigIndex); } - taosRLockLatch(&pMgmt->lock); + taosWLockLatch(&pMgmt->lock); if (transId <= 0) { - taosRUnLockLatch(&pMgmt->lock); + taosWUnLockLatch(&pMgmt->lock); mError("trans:%d, invalid commit msg", transId); } else if (transId == pMgmt->transId) { - taosRUnLockLatch(&pMgmt->lock); if (pMgmt->errCode != 0) { mError("trans:%d, failed to propose since %s", transId, tstrerror(pMgmt->errCode)); } pMgmt->transId = 0; + taosWUnLockLatch(&pMgmt->lock); tsem_post(&pMgmt->syncSem); } else { - taosRUnLockLatch(&pMgmt->lock); + taosWUnLockLatch(&pMgmt->lock); STrans *pTrans = mndAcquireTrans(pMnode, transId); if (pTrans != NULL) { - mDebug("trans:%d, execute in mnode which not leader", transId); + mInfo("trans:%d, execute in mnode which not leader", transId); mndTransExecute(pMnode, pTrans); mndReleaseTrans(pMnode, pTrans); // sdbWriteFile(pMnode->pSdb, SDB_WRITE_DELTA); @@ -275,9 +275,16 @@ int32_t mndSyncPropose(SMnode *pMnode, SSdbRaw *pRaw, int32_t transId) { pMgmt->errCode = 0; taosWLockLatch(&pMgmt->lock); - pMgmt->transId = transId; - taosWUnLockLatch(&pMgmt->lock); - mTrace("trans:%d, will be proposed", pMgmt->transId); + if (pMgmt->transId != 0) { + mInfo("trans:%d, can't be proposed since trans:%s alrady waiting for confirm", transId, pMgmt->transId); + taosWUnLockLatch(&pMgmt->lock); + terrno = TSDB_CODE_APP_NOT_READY; + return -1; + } else { + pMgmt->transId = transId; + mDebug("trans:%d, will be proposed", pMgmt->transId); + taosWUnLockLatch(&pMgmt->lock); + } const bool isWeak = false; int32_t code = syncPropose(pMgmt->sync, &req, isWeak); diff --git a/source/dnode/vnode/src/vnd/vnodeSync.c b/source/dnode/vnode/src/vnd/vnodeSync.c index fdd930ebd8..c7b5ce052d 100644 --- a/source/dnode/vnode/src/vnd/vnodeSync.c +++ b/source/dnode/vnode/src/vnd/vnodeSync.c @@ -696,6 +696,13 @@ static void vnodeBecomeFollower(struct SSyncFSM *pFsm) { static void vnodeBecomeLeader(struct SSyncFSM *pFsm) { SVnode *pVnode = pFsm->data; vDebug("vgId:%d, become leader", pVnode->config.vgId); + + taosThreadMutexLock(&pVnode->lock); + if (pVnode->blocked) { + pVnode->blocked = false; + tsem_post(&pVnode->syncSem); + } + taosThreadMutexUnlock(&pVnode->lock); } static SSyncFSM *vnodeSyncMakeFsm(SVnode *pVnode) {