fix: error in sync sem

This commit is contained in:
Shengliang Guan 2022-05-27 15:21:23 +08:00
parent 9d93b75020
commit c44ec05229
9 changed files with 65 additions and 23 deletions

View File

@ -81,6 +81,7 @@ typedef struct {
bool standby; bool standby;
bool restored; bool restored;
int32_t errCode; int32_t errCode;
int32_t transId;
} SSyncMgmt; } SSyncMgmt;
typedef struct { typedef struct {

View File

@ -25,7 +25,7 @@ extern "C" {
int32_t mndInitSync(SMnode *pMnode); int32_t mndInitSync(SMnode *pMnode);
void mndCleanupSync(SMnode *pMnode); void mndCleanupSync(SMnode *pMnode);
bool mndIsMaster(SMnode *pMnode); bool mndIsMaster(SMnode *pMnode);
int32_t mndSyncPropose(SMnode *pMnode, SSdbRaw *pRaw); int32_t mndSyncPropose(SMnode *pMnode, SSdbRaw *pRaw, int32_t transId);
void mndSyncStart(SMnode *pMnode); void mndSyncStart(SMnode *pMnode);
void mndSyncStop(SMnode *pMnode); void mndSyncStop(SMnode *pMnode);

View File

@ -702,14 +702,17 @@ static int32_t mndProcessAlterMnodeReq(SRpcMsg *pReq) {
} }
} }
mTrace("trans:-1, sync reconfig will be proposed");
SSyncMgmt *pMgmt = &pMnode->syncMgmt; SSyncMgmt *pMgmt = &pMnode->syncMgmt;
pMgmt->standby = 0; pMgmt->standby = 0;
int32_t code = syncReconfig(pMgmt->sync, &cfg); int32_t code = syncReconfig(pMgmt->sync, &cfg);
if (code != 0) { if (code != 0) {
mError("failed to alter mnode sync since %s", terrstr()); mError("trans:-1, failed to propose sync reconfig since %s", terrstr());
return code; return code;
} else { } else {
pMgmt->errCode = 0; pMgmt->errCode = 0;
pMgmt->transId = -1;
tsem_wait(&pMgmt->syncSem); tsem_wait(&pMgmt->syncSem);
mInfo("alter mnode sync result:%s", tstrerror(pMgmt->errCode)); mInfo("alter mnode sync result:%s", tstrerror(pMgmt->errCode));
terrno = pMgmt->errCode; terrno = pMgmt->errCode;

View File

@ -28,16 +28,26 @@ int32_t mndSyncEqMsg(const SMsgCb *msgcb, SRpcMsg *pMsg) {
int32_t mndSyncSendMsg(const SEpSet *pEpSet, SRpcMsg *pMsg) { return tmsgSendReq(pEpSet, pMsg); } int32_t mndSyncSendMsg(const SEpSet *pEpSet, SRpcMsg *pMsg) { return tmsgSendReq(pEpSet, pMsg); }
void mndSyncCommitMsg(struct SSyncFSM *pFsm, const SRpcMsg *pMsg, SFsmCbMeta cbMeta) { void mndSyncCommitMsg(struct SSyncFSM *pFsm, const SRpcMsg *pMsg, SFsmCbMeta cbMeta) {
SMnode *pMnode = pFsm->data; SMnode *pMnode = pFsm->data;
SSdbRaw *pRaw = pMsg->pCont; SSyncMgmt *pMgmt = &pMnode->syncMgmt;
SSdbRaw *pRaw = pMsg->pCont;
mTrace("raw:%p, apply to sdb, ver:%" PRId64 " term:%" PRId64 " role:%s", pRaw, cbMeta.index, cbMeta.term, int32_t transId = sdbGetIdFromRaw(pRaw);
syncStr(cbMeta.state)); pMgmt->errCode = cbMeta.code;
sdbWriteWithoutFree(pMnode->pSdb, pRaw); mTrace("trans:%d, is proposed, savedTransId:%d code:0x%x, ver:%" PRId64 " term:%" PRId64 " role:%s raw:%p", transId,
sdbSetApplyIndex(pMnode->pSdb, cbMeta.index); pMgmt->transId, cbMeta.code, cbMeta.index, cbMeta.term, syncStr(cbMeta.state), pRaw);
sdbSetApplyTerm(pMnode->pSdb, cbMeta.term);
if (cbMeta.state == TAOS_SYNC_STATE_LEADER) { if (pMgmt->errCode == 0) {
tsem_post(&pMnode->syncMgmt.syncSem); sdbWriteWithoutFree(pMnode->pSdb, pRaw);
sdbSetApplyIndex(pMnode->pSdb, cbMeta.index);
sdbSetApplyTerm(pMnode->pSdb, cbMeta.term);
}
if (pMgmt->transId == transId) {
if (pMgmt->errCode != 0) {
mError("trans:%d, failed to propose since %s", transId, tstrerror(pMgmt->errCode));
}
tsem_post(&pMgmt->syncSem);
} }
} }
@ -78,11 +88,19 @@ int32_t mndSnapshotApply(struct SSyncFSM* pFsm, const SSnapshot* pSnapshot, char
} }
void mndReConfig(struct SSyncFSM *pFsm, SSyncCfg newCfg, SReConfigCbMeta cbMeta) { void mndReConfig(struct SSyncFSM *pFsm, SSyncCfg newCfg, SReConfigCbMeta cbMeta) {
mInfo("mndReConfig cbMeta.code:%d, cbMeta.currentTerm:%" PRId64 ", cbMeta.term:%" PRId64 ", cbMeta.index:%" PRId64, SMnode *pMnode = pFsm->data;
cbMeta.code, cbMeta.currentTerm, cbMeta.term, cbMeta.index); SSyncMgmt *pMgmt = &pMnode->syncMgmt;
SMnode *pMnode = pFsm->data;
pMnode->syncMgmt.errCode = cbMeta.code; pMgmt->errCode = cbMeta.code;
tsem_post(&pMnode->syncMgmt.syncSem); mInfo("trans:-1, sync reconfig is proposed, savedTransId:%d code:0x%x, curTerm:%" PRId64 " term:%" PRId64,
pMgmt->transId, cbMeta.code, cbMeta.index, cbMeta.term);
if (pMgmt->transId == -1) {
if (pMgmt->errCode != 0) {
mError("trans:-1, failed to propose sync reconfig since %s", tstrerror(pMgmt->errCode));
}
tsem_post(&pMgmt->syncSem);
}
} }
SSyncFSM *mndSyncMakeFsm(SMnode *pMnode) { SSyncFSM *mndSyncMakeFsm(SMnode *pMnode) {
@ -165,15 +183,17 @@ void mndCleanupSync(SMnode *pMnode) {
memset(pMgmt, 0, sizeof(SSyncMgmt)); memset(pMgmt, 0, sizeof(SSyncMgmt));
} }
int32_t mndSyncPropose(SMnode *pMnode, SSdbRaw *pRaw) { int32_t mndSyncPropose(SMnode *pMnode, SSdbRaw *pRaw, int32_t transId) {
SSyncMgmt *pMgmt = &pMnode->syncMgmt; SSyncMgmt *pMgmt = &pMnode->syncMgmt;
pMgmt->errCode = 0; SRpcMsg rsp = {.code = TDMT_MND_APPLY_MSG, .contLen = sdbGetRawTotalSize(pRaw)};
SRpcMsg rsp = {.code = TDMT_MND_APPLY_MSG, .contLen = sdbGetRawTotalSize(pRaw)};
rsp.pCont = rpcMallocCont(rsp.contLen); rsp.pCont = rpcMallocCont(rsp.contLen);
if (rsp.pCont == NULL) return -1; if (rsp.pCont == NULL) return -1;
memcpy(rsp.pCont, pRaw, rsp.contLen); memcpy(rsp.pCont, pRaw, rsp.contLen);
pMgmt->errCode = 0;
pMgmt->transId = transId;
mTrace("trans:%d, will be proposed", pMgmt->transId);
const bool isWeak = false; const bool isWeak = false;
int32_t code = syncPropose(pMgmt->sync, &rsp, isWeak); int32_t code = syncPropose(pMgmt->sync, &rsp, isWeak);
if (code == 0) { if (code == 0) {
@ -187,7 +207,11 @@ int32_t mndSyncPropose(SMnode *pMnode, SSdbRaw *pRaw) {
} }
rpcFreeCont(rsp.pCont); rpcFreeCont(rsp.pCont);
if (code != 0) return code; if (code != 0) {
mError("trans:%d, failed to propose, code:0x%x", pMgmt->transId, code);
return code;
}
return pMgmt->errCode; return pMgmt->errCode;
} }

View File

@ -680,7 +680,7 @@ static int32_t mndTransSync(SMnode *pMnode, STrans *pTrans) {
sdbSetRawStatus(pRaw, SDB_STATUS_READY); sdbSetRawStatus(pRaw, SDB_STATUS_READY);
mDebug("trans:%d, sync to other nodes", pTrans->id); mDebug("trans:%d, sync to other nodes", pTrans->id);
int32_t code = mndSyncPropose(pMnode, pRaw); int32_t code = mndSyncPropose(pMnode, pRaw, pTrans->id);
if (code != 0) { if (code != 0) {
mError("trans:%d, failed to sync since %s", pTrans->id, terrstr()); mError("trans:%d, failed to sync since %s", pTrans->id, terrstr());
sdbFreeRaw(pRaw); sdbFreeRaw(pRaw);

View File

@ -386,6 +386,8 @@ SSdbIter *sdbIterRead(SSdb *pSdb, SSdbIter *iter, char **ppBuf, int32_t *len);
const char *sdbTableName(ESdbType type); const char *sdbTableName(ESdbType type);
void sdbPrintOper(SSdb *pSdb, SSdbRow *pRow, const char *oper); void sdbPrintOper(SSdb *pSdb, SSdbRow *pRow, const char *oper);
int32_t sdbGetIdFromRaw(SSdbRaw *pRaw);
#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif

View File

@ -16,6 +16,11 @@
#define _DEFAULT_SOURCE #define _DEFAULT_SOURCE
#include "sdb.h" #include "sdb.h"
int32_t sdbGetIdFromRaw(SSdbRaw *pRaw) {
int32_t id = *((int32_t *)(pRaw->pData));
return id;
}
SSdbRaw *sdbAllocRaw(ESdbType type, int8_t sver, int32_t dataLen) { SSdbRaw *sdbAllocRaw(ESdbType type, int8_t sver, int32_t dataLen) {
SSdbRaw *pRaw = taosMemoryCalloc(1, dataLen + sizeof(SSdbRaw)); SSdbRaw *pRaw = taosMemoryCalloc(1, dataLen + sizeof(SSdbRaw));
if (pRaw == NULL) { if (pRaw == NULL) {

View File

@ -123,5 +123,12 @@ sql create table db.stb (ts timestamp, c1 int, c2 binary(4)) tags(t1 int, t2 flo
sql create table db.ctb using db.stb tags(101, 102, "103") sql create table db.ctb using db.stb tags(101, 102, "103")
sql insert into db.ctb values(now, 1, "2") sql insert into db.ctb values(now, 1, "2")
sql select * from db.ctb
print $data[0][0] $data[0][1] $data[0][2] $data[0][3] $data[0][4] $data[0][5] $data[0][6]
if $rows != 1 then
return -1
endi
system sh/exec.sh -n dnode1 -s stop system sh/exec.sh -n dnode1 -s stop
system sh/exec.sh -n dnode2 -s stop system sh/exec.sh -n dnode2 -s stop

View File

@ -71,7 +71,7 @@ print ====> start to check if there are ERRORS in vagrind log file for each dnod
# -n : dnode[x] be check # -n : dnode[x] be check
system_content sh/checkValgrind.sh -n dnode1 system_content sh/checkValgrind.sh -n dnode1
print cmd return result----> [ $system_content ] print cmd return result----> [ $system_content ]
if $system_content <= 1 then if $system_content <= 2 then
return 0 return 0
endi endi