fix: deadlock of mnode if its state changed

This commit is contained in:
Shengliang Guan 2022-08-12 18:49:28 +08:00
parent 646cb378ae
commit 2c28cdcbb5
3 changed files with 24 additions and 8 deletions

View File

@ -742,7 +742,9 @@ static int32_t mndProcessAlterMnodeReq(SRpcMsg *pReq) {
return code; return code;
} else { } else {
pMgmt->errCode = 0; pMgmt->errCode = 0;
taosWLockLatch(&pMgmt->lock);
pMgmt->transId = -1; pMgmt->transId = -1;
taosWUnLockLatch(&pMgmt->lock);
tsem_wait(&pMgmt->syncSem); tsem_wait(&pMgmt->syncSem);
mInfo("alter mnode sync result:0x%x %s", pMgmt->errCode, tstrerror(pMgmt->errCode)); mInfo("alter mnode sync result:0x%x %s", pMgmt->errCode, tstrerror(pMgmt->errCode));
terrno = pMgmt->errCode; terrno = pMgmt->errCode;

View File

@ -60,22 +60,22 @@ void mndSyncCommitMsg(struct SSyncFSM *pFsm, const SRpcMsg *pMsg, SFsmCbMeta cbM
sdbSetApplyInfo(pMnode->pSdb, cbMeta.index, cbMeta.term, cbMeta.lastConfigIndex); sdbSetApplyInfo(pMnode->pSdb, cbMeta.index, cbMeta.term, cbMeta.lastConfigIndex);
} }
taosRLockLatch(&pMgmt->lock); taosWLockLatch(&pMgmt->lock);
if (transId <= 0) { if (transId <= 0) {
taosRUnLockLatch(&pMgmt->lock); taosWUnLockLatch(&pMgmt->lock);
mError("trans:%d, invalid commit msg", transId); mError("trans:%d, invalid commit msg", transId);
} else if (transId == pMgmt->transId) { } else if (transId == pMgmt->transId) {
taosRUnLockLatch(&pMgmt->lock);
if (pMgmt->errCode != 0) { if (pMgmt->errCode != 0) {
mError("trans:%d, failed to propose since %s", transId, tstrerror(pMgmt->errCode)); mError("trans:%d, failed to propose since %s", transId, tstrerror(pMgmt->errCode));
} }
pMgmt->transId = 0; pMgmt->transId = 0;
taosWUnLockLatch(&pMgmt->lock);
tsem_post(&pMgmt->syncSem); tsem_post(&pMgmt->syncSem);
} else { } else {
taosRUnLockLatch(&pMgmt->lock); taosWUnLockLatch(&pMgmt->lock);
STrans *pTrans = mndAcquireTrans(pMnode, transId); STrans *pTrans = mndAcquireTrans(pMnode, transId);
if (pTrans != NULL) { if (pTrans != NULL) {
mDebug("trans:%d, execute in mnode which not leader", transId); mInfo("trans:%d, execute in mnode which not leader", transId);
mndTransExecute(pMnode, pTrans); mndTransExecute(pMnode, pTrans);
mndReleaseTrans(pMnode, pTrans); mndReleaseTrans(pMnode, pTrans);
// sdbWriteFile(pMnode->pSdb, SDB_WRITE_DELTA); // sdbWriteFile(pMnode->pSdb, SDB_WRITE_DELTA);
@ -275,9 +275,16 @@ int32_t mndSyncPropose(SMnode *pMnode, SSdbRaw *pRaw, int32_t transId) {
pMgmt->errCode = 0; pMgmt->errCode = 0;
taosWLockLatch(&pMgmt->lock); taosWLockLatch(&pMgmt->lock);
pMgmt->transId = transId; if (pMgmt->transId != 0) {
taosWUnLockLatch(&pMgmt->lock); mInfo("trans:%d, can't be proposed since trans:%s alrady waiting for confirm", transId, pMgmt->transId);
mTrace("trans:%d, will be proposed", pMgmt->transId); taosWUnLockLatch(&pMgmt->lock);
terrno = TSDB_CODE_APP_NOT_READY;
return -1;
} else {
pMgmt->transId = transId;
mDebug("trans:%d, will be proposed", pMgmt->transId);
taosWUnLockLatch(&pMgmt->lock);
}
const bool isWeak = false; const bool isWeak = false;
int32_t code = syncPropose(pMgmt->sync, &req, isWeak); int32_t code = syncPropose(pMgmt->sync, &req, isWeak);

View File

@ -696,6 +696,13 @@ static void vnodeBecomeFollower(struct SSyncFSM *pFsm) {
static void vnodeBecomeLeader(struct SSyncFSM *pFsm) { static void vnodeBecomeLeader(struct SSyncFSM *pFsm) {
SVnode *pVnode = pFsm->data; SVnode *pVnode = pFsm->data;
vDebug("vgId:%d, become leader", pVnode->config.vgId); vDebug("vgId:%d, become leader", pVnode->config.vgId);
taosThreadMutexLock(&pVnode->lock);
if (pVnode->blocked) {
pVnode->blocked = false;
tsem_post(&pVnode->syncSem);
}
taosThreadMutexUnlock(&pVnode->lock);
} }
static SSyncFSM *vnodeSyncMakeFsm(SVnode *pVnode) { static SSyncFSM *vnodeSyncMakeFsm(SVnode *pVnode) {