From c1557c423f72a74e8c9e3ecc3674beb90f6a785d Mon Sep 17 00:00:00 2001 From: dmchen Date: Tue, 15 Oct 2024 01:12:52 +0000 Subject: [PATCH] fix/TD-32555-reset-actionpos-when-rollbak --- source/dnode/mnode/impl/src/mndTrans.c | 44 +++++++++++++++----------- source/util/src/terror.c | 2 +- 2 files changed, 27 insertions(+), 19 deletions(-) diff --git a/source/dnode/mnode/impl/src/mndTrans.c b/source/dnode/mnode/impl/src/mndTrans.c index fcf7905cd1..657601f5ae 100644 --- a/source/dnode/mnode/impl/src/mndTrans.c +++ b/source/dnode/mnode/impl/src/mndTrans.c @@ -1252,8 +1252,9 @@ int32_t mndTransProcessRsp(SRpcMsg *pRsp) { pAction->errCode = pRsp->code; pTrans->lastErrorNo = pRsp->code; - mInfo("trans:%d, %s:%d response is received, code:0x%x, accept:0x%x retry:0x%x", transId, - mndTransStr(pAction->stage), action, pRsp->code, pAction->acceptableCode, pAction->retryCode); + mInfo("trans:%d, %s:%d response is received, received code:0x%x(%s), accept:0x%x(%s) retry:0x%x(%s)", transId, + mndTransStr(pAction->stage), action, pRsp->code, tstrerror(pRsp->code), pAction->acceptableCode, + tstrerror(pAction->acceptableCode), pAction->retryCode, tstrerror(pAction->retryCode)); } else { mInfo("trans:%d, invalid action, index:%d, code:0x%x", transId, action, pRsp->code); } @@ -1469,8 +1470,8 @@ static int32_t mndTransExecuteActions(SMnode *pMnode, STrans *pTrans, SArray *pA static int32_t mndTransExecuteRedoActions(SMnode *pMnode, STrans *pTrans, bool topHalf) { int32_t code = mndTransExecuteActions(pMnode, pTrans, pTrans->redoActions, topHalf); if (code != 0 && code != TSDB_CODE_ACTION_IN_PROGRESS && code != TSDB_CODE_MND_TRANS_CTX_SWITCH) { - mError("trans:%d, failed to execute redoActions since:%s, code:0x%x, topHalf:%d", pTrans->id, terrstr(), terrno, - topHalf); + mError("trans:%d, failed to execute redoActions since:%s, code:0x%x, topHalf(TransContext):%d", pTrans->id, + terrstr(), terrno, topHalf); } return code; } @@ -1478,7 +1479,8 @@ static int32_t mndTransExecuteRedoActions(SMnode *pMnode, STrans *pTrans, bool t static int32_t mndTransExecuteUndoActions(SMnode *pMnode, STrans *pTrans, bool topHalf) { int32_t code = mndTransExecuteActions(pMnode, pTrans, pTrans->undoActions, topHalf); if (code != 0 && code != TSDB_CODE_ACTION_IN_PROGRESS && code != TSDB_CODE_MND_TRANS_CTX_SWITCH) { - mError("trans:%d, failed to execute undoActions since %s. topHalf:%d", pTrans->id, terrstr(), topHalf); + mError("trans:%d, failed to execute undoActions since %s. topHalf(TransContext):%d", pTrans->id, terrstr(), + topHalf); } return code; } @@ -1486,7 +1488,8 @@ static int32_t mndTransExecuteUndoActions(SMnode *pMnode, STrans *pTrans, bool t static int32_t mndTransExecuteCommitActions(SMnode *pMnode, STrans *pTrans, bool topHalf) { int32_t code = mndTransExecuteActions(pMnode, pTrans, pTrans->commitActions, topHalf); if (code != 0 && code != TSDB_CODE_ACTION_IN_PROGRESS && code != TSDB_CODE_MND_TRANS_CTX_SWITCH) { - mError("trans:%d, failed to execute commitActions since %s. topHalf:%d", pTrans->id, terrstr(), topHalf); + mError("trans:%d, failed to execute commitActions since %s. topHalf(TransContext):%d", pTrans->id, terrstr(), + topHalf); } return code; } @@ -1500,11 +1503,15 @@ static int32_t mndTransExecuteActionsSerial(SMnode *pMnode, STrans *pTrans, SArr return code; } - mInfo("trans:%d, execute %d actions serial, current action:%d", pTrans->id, numOfActions, pTrans->actionPos); + mInfo("trans:%d, execute %d actions serial, begin at action:%d, stage:%s", pTrans->id, numOfActions, + pTrans->actionPos, mndTransStr(pTrans->stage)); for (int32_t action = pTrans->actionPos; action < numOfActions; ++action) { STransAction *pAction = taosArrayGet(pActions, action); + mInfo("trans:%d, current action:%d, stage:%s, actionType(0:log,1:msg):%d", pTrans->id, pTrans->actionPos, + mndTransStr(pAction->stage), pAction->actionType); + code = mndTransExecSingleAction(pMnode, pTrans, pAction, topHalf); if (code == 0) { if (pAction->msgSent) { @@ -1536,8 +1543,8 @@ static int32_t mndTransExecuteActionsSerial(SMnode *pMnode, STrans *pTrans, SArr if (mndCannotExecuteTransAction(pMnode, topHalf)) { pTrans->lastErrorNo = code; pTrans->code = code; - mInfo("trans:%d, %s:%d, topHalf:%d, not execute next action, code:%s", pTrans->id, mndTransStr(pAction->stage), - action, topHalf, tstrerror(code)); + mInfo("trans:%d, %s:%d, topHalf(TransContext):%d, not execute next action, code:%s", pTrans->id, + mndTransStr(pAction->stage), action, topHalf, tstrerror(code)); break; } @@ -1561,7 +1568,8 @@ static int32_t mndTransExecuteActionsSerial(SMnode *pMnode, STrans *pTrans, SArr break; } else if (code == pAction->retryCode || code == TSDB_CODE_SYN_PROPOSE_NOT_READY || code == TSDB_CODE_SYN_RESTORING || code == TSDB_CODE_SYN_NOT_LEADER) { - mInfo("trans:%d, %s:%d receive code:0x%x and retry", pTrans->id, mndTransStr(pAction->stage), pAction->id, code); + mInfo("trans:%d, %s:%d receive code:0x%x(%s) and retry", pTrans->id, mndTransStr(pAction->stage), pAction->id, + code, tstrerror(code)); pTrans->lastErrorNo = code; taosMsleep(300); action--; @@ -1570,8 +1578,8 @@ static int32_t mndTransExecuteActionsSerial(SMnode *pMnode, STrans *pTrans, SArr terrno = code; pTrans->lastErrorNo = code; pTrans->code = code; - mInfo("trans:%d, %s:%d receive code:0x%x and wait another schedule, failedTimes:%d", pTrans->id, - mndTransStr(pAction->stage), pAction->id, code, pTrans->failedTimes); + mInfo("trans:%d, %s:%d receive code:0x%x(%s) and wait another schedule, failedTimes:%d", pTrans->id, + mndTransStr(pAction->stage), pAction->id, code, tstrerror(code), pTrans->failedTimes); break; } } @@ -1647,8 +1655,8 @@ static bool mndTransPerformRedoActionStage(SMnode *pMnode, STrans *pTrans, bool } else { continueExec = false; } - mInfo("trans:%d, cannot execute redo action stage, topHalf:%d, continueExec:%d, code:%s", pTrans->id, topHalf, - continueExec, tstrerror(code)); + mInfo("trans:%d, cannot execute redo action stage, topHalf(TransContext):%d, continueExec:%d, code:%s", pTrans->id, + topHalf, continueExec, tstrerror(code)); return continueExec; } @@ -1680,7 +1688,9 @@ static bool mndTransPerformRedoActionStage(SMnode *pMnode, STrans *pTrans, bool } pTrans->stage = TRN_STAGE_ROLLBACK; - mError("trans:%d, stage from redoAction to rollback since %s", pTrans->id, terrstr()); + pTrans->actionPos = 0; + mError("trans:%d, stage from redoAction to rollback since %s, and set actionPos to %d", pTrans->id, terrstr(), + pTrans->actionPos); continueExec = true; } else { mError("trans:%d, stage keep on redoAction since %s, failedTimes:%d", pTrans->id, terrstr(), pTrans->failedTimes); @@ -1773,8 +1783,6 @@ static bool mndTransPerformRollbackStage(SMnode *pMnode, STrans *pTrans, bool to if (code == 0) { pTrans->stage = TRN_STAGE_UNDO_ACTION; - pTrans->actionPos = 0; - mInfo("trans:%d, stage from rollback to undoAction, actionPos:%d", pTrans->id, pTrans->actionPos); continueExec = true; } else { pTrans->failedTimes++; @@ -1829,7 +1837,7 @@ void mndTransExecuteImp(SMnode *pMnode, STrans *pTrans, bool topHalf) { bool continueExec = true; while (continueExec) { - mInfo("trans:%d, continue to execute, stage:%s createTime:%" PRId64 " topHalf:%d", pTrans->id, + mInfo("trans:%d, continue to execute, stage:%s createTime:%" PRId64 " topHalf(TransContext):%d", pTrans->id, mndTransStr(pTrans->stage), pTrans->createdTime, topHalf); pTrans->lastExecTime = taosGetTimestampMs(); switch (pTrans->stage) { diff --git a/source/util/src/terror.c b/source/util/src/terror.c index 1c94a5f2e4..35f0bc73eb 100644 --- a/source/util/src/terror.c +++ b/source/util/src/terror.c @@ -314,7 +314,7 @@ TAOS_DEFINE_ERROR(TSDB_CODE_MND_TRANS_CLOG_IS_NULL, "Transaction commitlog TAOS_DEFINE_ERROR(TSDB_CODE_MND_TRANS_NETWORK_UNAVAILL, "Unable to establish connection While execute transaction and will continue in the background") TAOS_DEFINE_ERROR(TSDB_CODE_MND_LAST_TRANS_NOT_FINISHED, "Last Transaction not finished") TAOS_DEFINE_ERROR(TSDB_CODE_MND_TRANS_SYNC_TIMEOUT, "Sync timeout While execute transaction and will continue in the background") -TAOS_DEFINE_ERROR(TSDB_CODE_MND_TRANS_CTX_SWITCH, "Transaction context switch") +TAOS_DEFINE_ERROR(TSDB_CODE_MND_TRANS_CTX_SWITCH, "Wrong transaction execution context") TAOS_DEFINE_ERROR(TSDB_CODE_MND_TRANS_CONFLICT_COMPACT, "Transaction not completed due to conflict with compact") TAOS_DEFINE_ERROR(TSDB_CODE_MND_TRANS_UNKNOW_ERROR, "Unknown transaction error")