fix: adjust raft logs (#30258)

This commit is contained in:
Simon Guan 2025-03-19 21:44:39 +08:00 committed by GitHub
parent 276aa602e8
commit 0a96294056
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
15 changed files with 146 additions and 115 deletions

View File

@ -48,7 +48,7 @@ int meteEncodeColRefEntry(SEncoder *pCoder, const SMetaEntry *pME) {
const SColRefWrapper *pw = &pME->colRef; const SColRefWrapper *pw = &pME->colRef;
TAOS_CHECK_RETURN(tEncodeI32v(pCoder, pw->nCols)); TAOS_CHECK_RETURN(tEncodeI32v(pCoder, pw->nCols));
TAOS_CHECK_RETURN(tEncodeI32v(pCoder, pw->version)); TAOS_CHECK_RETURN(tEncodeI32v(pCoder, pw->version));
uDebug("encode cols:%d", pw->nCols); uTrace("encode cols:%d", pw->nCols);
for (int32_t i = 0; i < pw->nCols; i++) { for (int32_t i = 0; i < pw->nCols; i++) {
SColRef *p = &pw->pColRef[i]; SColRef *p = &pw->pColRef[i];
@ -171,7 +171,7 @@ int meteEncodeColCmprEntry(SEncoder *pCoder, const SMetaEntry *pME) {
const SColCmprWrapper *pw = &pME->colCmpr; const SColCmprWrapper *pw = &pME->colCmpr;
TAOS_CHECK_RETURN(tEncodeI32v(pCoder, pw->nCols)); TAOS_CHECK_RETURN(tEncodeI32v(pCoder, pw->nCols));
TAOS_CHECK_RETURN(tEncodeI32v(pCoder, pw->version)); TAOS_CHECK_RETURN(tEncodeI32v(pCoder, pw->version));
uDebug("encode cols:%d", pw->nCols); uTrace("encode cols:%d", pw->nCols);
for (int32_t i = 0; i < pw->nCols; i++) { for (int32_t i = 0; i < pw->nCols; i++) {
SColCmpr *p = &pw->pColCmpr[i]; SColCmpr *p = &pw->pColCmpr[i];

View File

@ -2515,8 +2515,8 @@ int32_t metaHandleEntry2(SMeta *pMeta, const SMetaEntry *pEntry) {
if (TSDB_CODE_SUCCESS == code) { if (TSDB_CODE_SUCCESS == code) {
pMeta->changed = true; pMeta->changed = true;
metaDebug("vgId:%d, %s success, version:%" PRId64 " type:%d uid:%" PRId64 " name:%s", vgId, __func__, metaDebug("vgId:%d, index:%" PRId64 ", handle meta entry success, type:%d tb:%s uid:%" PRId64, vgId, pEntry->version,
pEntry->version, pEntry->type, pEntry->uid, pEntry->type > 0 ? pEntry->name : ""); pEntry->type, pEntry->type > 0 ? pEntry->name : "", pEntry->uid);
} else { } else {
metaErr(vgId, code); metaErr(vgId, code);
} }

View File

@ -381,8 +381,8 @@ static int32_t metaCreateChildTable(SMeta *pMeta, int64_t version, SVCreateTbReq
// handle entry // handle entry
code = metaHandleEntry2(pMeta, &entry); code = metaHandleEntry2(pMeta, &entry);
if (TSDB_CODE_SUCCESS == code) { if (TSDB_CODE_SUCCESS == code) {
metaInfo("vgId:%d, child table:%s uid %" PRId64 " suid:%" PRId64 " is created, version:%" PRId64, metaInfo("vgId:%d, index:%" PRId64 ", child table is created, tb:%s uid:%" PRId64 " suid:%" PRId64,
TD_VID(pMeta->pVnode), pReq->name, pReq->uid, pReq->ctb.suid, version); TD_VID(pMeta->pVnode), version, pReq->name, pReq->uid, pReq->ctb.suid);
} else { } else {
metaError("vgId:%d, %s failed at %s:%d since %s, uid:%" PRId64 " name:%s suid:%" PRId64 " version:%" PRId64, metaError("vgId:%d, %s failed at %s:%d since %s, uid:%" PRId64 " name:%s suid:%" PRId64 " version:%" PRId64,
TD_VID(pMeta->pVnode), __func__, __FILE__, __LINE__, tstrerror(code), pReq->uid, pReq->name, TD_VID(pMeta->pVnode), __func__, __FILE__, __LINE__, tstrerror(code), pReq->uid, pReq->name,
@ -493,8 +493,8 @@ static int32_t metaCreateNormalTable(SMeta *pMeta, int64_t version, SVCreateTbRe
// handle entry // handle entry
code = metaHandleEntry2(pMeta, &entry); code = metaHandleEntry2(pMeta, &entry);
if (TSDB_CODE_SUCCESS == code) { if (TSDB_CODE_SUCCESS == code) {
metaInfo("vgId:%d, normal table:%s uid %" PRId64 " is created, version:%" PRId64, TD_VID(pMeta->pVnode), pReq->name, metaInfo("vgId:%d, index:%" PRId64 ", normal table is created, tb:%s uid:%" PRId64, TD_VID(pMeta->pVnode), version,
pReq->uid, version); pReq->name, pReq->uid);
} else { } else {
metaError("vgId:%d, %s failed at %s:%d since %s, uid:%" PRId64 " name:%s version:%" PRId64, TD_VID(pMeta->pVnode), metaError("vgId:%d, %s failed at %s:%d since %s, uid:%" PRId64 " name:%s version:%" PRId64, TD_VID(pMeta->pVnode),
__func__, __FILE__, __LINE__, tstrerror(code), pReq->uid, pReq->name, version); __func__, __FILE__, __LINE__, tstrerror(code), pReq->uid, pReq->name, version);
@ -557,8 +557,8 @@ static int32_t metaCreateVirtualNormalTable(SMeta *pMeta, int64_t version, SVCre
// handle entry // handle entry
code = metaHandleEntry2(pMeta, &entry); code = metaHandleEntry2(pMeta, &entry);
if (TSDB_CODE_SUCCESS == code) { if (TSDB_CODE_SUCCESS == code) {
metaInfo("vgId:%d, normal table:%s uid %" PRId64 " is created, version:%" PRId64, TD_VID(pMeta->pVnode), pReq->name, metaInfo("vgId:%d, index:%" PRId64 ", virtual normal table is created, tb:%s uid:%" PRId64, TD_VID(pMeta->pVnode),
pReq->uid, version); version, pReq->name, pReq->uid);
} else { } else {
metaError("vgId:%d, %s failed at %s:%d since %s, uid:%" PRId64 " name:%s version:%" PRId64, TD_VID(pMeta->pVnode), metaError("vgId:%d, %s failed at %s:%d since %s, uid:%" PRId64 " name:%s version:%" PRId64, TD_VID(pMeta->pVnode),
__func__, __FILE__, __LINE__, tstrerror(code), pReq->uid, pReq->name, version); __func__, __FILE__, __LINE__, tstrerror(code), pReq->uid, pReq->name, version);
@ -625,8 +625,8 @@ static int32_t metaCreateVirtualChildTable(SMeta *pMeta, int64_t version, SVCrea
// handle entry // handle entry
code = metaHandleEntry2(pMeta, &entry); code = metaHandleEntry2(pMeta, &entry);
if (TSDB_CODE_SUCCESS == code) { if (TSDB_CODE_SUCCESS == code) {
metaInfo("vgId:%d, normal table:%s uid %" PRId64 " is created, version:%" PRId64, TD_VID(pMeta->pVnode), pReq->name, metaInfo("vgId:%d, index:%" PRId64 ", virtual child table is created, tb:%s uid:%" PRId64, TD_VID(pMeta->pVnode),
pReq->uid, version); version, pReq->name, pReq->uid);
} else { } else {
metaError("vgId:%d, %s failed at %s:%d since %s, uid:%" PRId64 " name:%s version:%" PRId64, TD_VID(pMeta->pVnode), metaError("vgId:%d, %s failed at %s:%d since %s, uid:%" PRId64 " name:%s version:%" PRId64, TD_VID(pMeta->pVnode),
__func__, __FILE__, __LINE__, tstrerror(code), pReq->uid, pReq->name, version); __func__, __FILE__, __LINE__, tstrerror(code), pReq->uid, pReq->name, version);

View File

@ -1354,7 +1354,7 @@ static int32_t vnodeProcessCreateTbReq(SVnode *pVnode, int64_t ver, void *pReq,
} }
} }
vDebug("vgId:%d, add %d new created tables into query table list", TD_VID(pVnode), (int32_t)taosArrayGetSize(tbUids)); vTrace("vgId:%d, add %d new created tables into query table list", TD_VID(pVnode), (int32_t)taosArrayGetSize(tbUids));
if (tqUpdateTbUidList(pVnode->pTq, tbUids, true) < 0) { if (tqUpdateTbUidList(pVnode->pTq, tbUids, true) < 0) {
vError("vgId:%d, failed to update tbUid list since %s", TD_VID(pVnode), tstrerror(terrno)); vError("vgId:%d, failed to update tbUid list since %s", TD_VID(pVnode), tstrerror(terrno));
} }

View File

@ -335,12 +335,12 @@ void vnodeApplyWriteMsg(SQueueInfo *pInfo, STaosQall *qall, int32_t numOfMsgs) {
if (taosGetQitem(qall, (void **)&pMsg) == 0) continue; if (taosGetQitem(qall, (void **)&pMsg) == 0) continue;
if (vnodeIsMsgBlock(pMsg->msgType)) { if (vnodeIsMsgBlock(pMsg->msgType)) {
vGTrace(&pMsg->info.traceId, "vgId:%d, msg:%p, get from vnode-apply queue, type:%s handle:%p index:%" PRId64 vGDebug(&pMsg->info.traceId, "vgId:%d, msg:%p, get from vnode-apply queue, type:%s handle:%p index:%" PRId64
", blocking msg obtained sec:%d seq:%" PRId64, ", blocking msg obtained sec:%d seq:%" PRId64,
vgId, pMsg, TMSG_INFO(pMsg->msgType), pMsg->info.handle, pMsg->info.conn.applyIndex, pVnode->blockSec, vgId, pMsg, TMSG_INFO(pMsg->msgType), pMsg->info.handle, pMsg->info.conn.applyIndex, pVnode->blockSec,
pVnode->blockSeq); pVnode->blockSeq);
} else { } else {
vGTrace(&pMsg->info.traceId, "vgId:%d, msg:%p, get from vnode-apply queue, type:%s handle:%p index:%" PRId64, vgId, pMsg, vGDebug(&pMsg->info.traceId, "vgId:%d, msg:%p, get from vnode-apply queue, type:%s handle:%p index:%" PRId64, vgId, pMsg,
TMSG_INFO(pMsg->msgType), pMsg->info.handle, pMsg->info.conn.applyIndex); TMSG_INFO(pMsg->msgType), pMsg->info.handle, pMsg->info.conn.applyIndex);
} }
@ -437,10 +437,10 @@ static int32_t vnodeSyncApplyMsg(const SSyncFSM *pFsm, SRpcMsg *pMsg, const SFsm
pMsg->info.conn.applyIndex = pMeta->index; pMsg->info.conn.applyIndex = pMeta->index;
pMsg->info.conn.applyTerm = pMeta->term; pMsg->info.conn.applyTerm = pMeta->term;
vGTrace(&pMsg->info.traceId, vGDebug(&pMsg->info.traceId,
"vgId:%d, commit-cb is excuted, fsm:%p, index:%" PRId64 ", term:%" PRIu64 ", msg-index:%" PRId64 "vgId:%d, index:%" PRId64 ", execute commit cb, fsm:%p, term:%" PRIu64 ", msg-index:%" PRId64
", weak:%d, code:%d, state:%d %s, type:%s code:0x%x", ", weak:%d, code:%d, state:%d %s, type:%s code:0x%x",
pVnode->config.vgId, pFsm, pMeta->index, pMeta->term, pMsg->info.conn.applyIndex, pMeta->isWeak, pMeta->code, pVnode->config.vgId, pMeta->index, pFsm, pMeta->term, pMsg->info.conn.applyIndex, pMeta->isWeak, pMeta->code,
pMeta->state, syncStr(pMeta->state), TMSG_INFO(pMsg->msgType), pMsg->code); pMeta->state, syncStr(pMeta->state), TMSG_INFO(pMsg->msgType), pMsg->code);
int32_t code = tmsgPutToQueue(&pVnode->msgCb, APPLY_QUEUE, pMsg); int32_t code = tmsgPutToQueue(&pVnode->msgCb, APPLY_QUEUE, pMsg);
@ -456,7 +456,11 @@ static int32_t vnodeSyncCommitMsg(const SSyncFSM *pFsm, SRpcMsg *pMsg, SFsmCbMet
SVnode *pVnode = pFsm->data; SVnode *pVnode = pFsm->data;
vnodePostBlockMsg(pVnode, pMsg); vnodePostBlockMsg(pVnode, pMsg);
SRpcMsg rsp = {.code = pMsg->code, .info = pMsg->info}; SRpcMsg rsp = {
.code = pMsg->code,
.info = pMsg->info,
};
if (rsp.info.handle != NULL) { if (rsp.info.handle != NULL) {
tmsgSendRsp(&rsp); tmsgSendRsp(&rsp);
} }
@ -482,9 +486,10 @@ static SyncIndex vnodeSyncAppliedIndex(const SSyncFSM *pFSM) {
static void vnodeSyncRollBackMsg(const SSyncFSM *pFsm, SRpcMsg *pMsg, SFsmCbMeta *pMeta) { static void vnodeSyncRollBackMsg(const SSyncFSM *pFsm, SRpcMsg *pMsg, SFsmCbMeta *pMeta) {
SVnode *pVnode = pFsm->data; SVnode *pVnode = pFsm->data;
vTrace("vgId:%d, rollback-cb is excuted, fsm:%p, index:%" PRId64 ", weak:%d, code:%d, state:%d %s, type:%s", vGDebug(&pMsg->info.traceId,
pVnode->config.vgId, pFsm, pMeta->index, pMeta->isWeak, pMeta->code, pMeta->state, syncStr(pMeta->state), "vgId:%d, rollback-cb is excuted, fsm:%p, index:%" PRId64 ", weak:%d, code:%d, state:%d %s, type:%s",
TMSG_INFO(pMsg->msgType)); pVnode->config.vgId, pFsm, pMeta->index, pMeta->isWeak, pMeta->code, pMeta->state, syncStr(pMeta->state),
TMSG_INFO(pMsg->msgType));
} }
static int32_t vnodeSnapshotStartRead(const SSyncFSM *pFsm, void *pParam, void **ppReader) { static int32_t vnodeSnapshotStartRead(const SSyncFSM *pFsm, void *pParam, void **ppReader) {

View File

@ -51,7 +51,7 @@ void syncMaybeAdvanceCommitIndex(SSyncNode* pSyncNode);
bool syncNodeAgreedUpon(SSyncNode* pNode, SyncIndex index); bool syncNodeAgreedUpon(SSyncNode* pNode, SyncIndex index);
int64_t syncNodeUpdateCommitIndex(SSyncNode* ths, SyncIndex commitIndex); int64_t syncNodeUpdateCommitIndex(SSyncNode* ths, SyncIndex commitIndex);
int64_t syncNodeCheckCommitIndex(SSyncNode* ths, SyncIndex indexLikely); int64_t syncNodeCheckCommitIndex(SSyncNode* ths, SyncIndex indexLikely, const STraceId *trace);
int64_t syncNodeUpdateAssignedCommitIndex(SSyncNode* ths, SyncIndex assignedCommitIndex); int64_t syncNodeUpdateAssignedCommitIndex(SSyncNode* ths, SyncIndex assignedCommitIndex);

View File

@ -108,8 +108,10 @@ bool syncLogBufferIsEmpty(SSyncLogBuffer* pBuf);
int32_t syncLogBufferAppend(SSyncLogBuffer* pBuf, SSyncNode* pNode, SSyncRaftEntry* pEntry); int32_t syncLogBufferAppend(SSyncLogBuffer* pBuf, SSyncNode* pNode, SSyncRaftEntry* pEntry);
int32_t syncLogBufferAccept(SSyncLogBuffer* pBuf, SSyncNode* pNode, SSyncRaftEntry* pEntry, SyncTerm prevTerm); int32_t syncLogBufferAccept(SSyncLogBuffer* pBuf, SSyncNode* pNode, SSyncRaftEntry* pEntry, SyncTerm prevTerm);
int64_t syncLogBufferProceed(SSyncLogBuffer* pBuf, SSyncNode* pNode, SyncTerm* pMatchTerm, char *str, const SRpcMsg *pMsg); int64_t syncLogBufferProceed(SSyncLogBuffer* pBuf, SSyncNode* pNode, SyncTerm* pMatchTerm, char* str,
int32_t syncLogBufferCommit(SSyncLogBuffer* pBuf, SSyncNode* pNode, int64_t commitIndex); const SRpcMsg* pMsg);
int32_t syncLogBufferCommit(SSyncLogBuffer* pBuf, SSyncNode* pNode, int64_t commitIndex, const STraceId* trace,
const char* src);
int32_t syncLogBufferReset(SSyncLogBuffer* pBuf, SSyncNode* pNode); int32_t syncLogBufferReset(SSyncLogBuffer* pBuf, SSyncNode* pNode);
// private // private

View File

@ -36,7 +36,7 @@ extern "C" {
#define sGFatal(trace, param, ...) do { if (sDebugFlag & DEBUG_FATAL) { sFatal(param ", QID:0x%" PRIx64 ":0x%" PRIx64, __VA_ARGS__, (trace) ? (trace)->rootId : 0, (trace) ? (trace)->msgId : 0);}} while(0) #define sGFatal(trace, param, ...) do { if (sDebugFlag & DEBUG_FATAL) { sFatal(param ", QID:0x%" PRIx64 ":0x%" PRIx64, __VA_ARGS__, (trace) ? (trace)->rootId : 0, (trace) ? (trace)->msgId : 0);}} while(0)
#define sGError(trace, param, ...) do { if (sDebugFlag & DEBUG_ERROR) { sError(param ", QID:0x%" PRIx64 ":0x%" PRIx64, __VA_ARGS__, (trace) ? (trace)->rootId : 0, (trace) ? (trace)->msgId : 0);}} while(0) #define sGError(trace, param, ...) do { if (sDebugFlag & DEBUG_ERROR) { sError(param ", QID:0x%" PRIx64 ":0x%" PRIx64, __VA_ARGS__, (trace) ? (trace)->rootId : 0, (trace) ? (trace)->msgId : 0);}} while(0)
#define sGWarn(trace, param, ...) do { if (sDebugFlag & DEBUG_WARN) { sWarn(param ", QID:0x%" PRIx64 ":0x%" PRIx64, __VA_ARGS__, (trace) ? (trace)->rootId : 0, (trace) ? (trace)->msgId : 0);}} while(0) #define sGWarn(trace, param, ...) do { if (sDebugFlag & DEBUG_WARN) { sWarn(param ", QID:0x%" PRIx64 ":0x%" PRIx64, __VA_ARGS__, (trace) ? (trace)->rootId : 0, (trace) ? (trace)->msgId : 0);}} while(0)
#define sGInfo(ptrace, aram, ...) do { if (sDebugFlag & DEBUG_INFO) { sInfo(param ", QID:0x%" PRIx64 ":0x%" PRIx64, __VA_ARGS__, (trace) ? (trace)->rootId : 0, (trace) ? (trace)->msgId : 0);}} while(0) #define sGInfo(trace, param, ...) do { if (sDebugFlag & DEBUG_INFO) { sInfo(param ", QID:0x%" PRIx64 ":0x%" PRIx64, __VA_ARGS__, (trace) ? (trace)->rootId : 0, (trace) ? (trace)->msgId : 0);}} while(0)
#define sGDebug(trace, param, ...) do { if (sDebugFlag & DEBUG_DEBUG) { sDebug(param ", QID:0x%" PRIx64 ":0x%" PRIx64, __VA_ARGS__, (trace) ? (trace)->rootId : 0, (trace) ? (trace)->msgId : 0);}} while(0) #define sGDebug(trace, param, ...) do { if (sDebugFlag & DEBUG_DEBUG) { sDebug(param ", QID:0x%" PRIx64 ":0x%" PRIx64, __VA_ARGS__, (trace) ? (trace)->rootId : 0, (trace) ? (trace)->msgId : 0);}} while(0)
#define sLFatal(...) if (sDebugFlag & DEBUG_FATAL) { taosPrintLongString("SYN FATAL ", DEBUG_FATAL, 255, __VA_ARGS__); } #define sLFatal(...) if (sDebugFlag & DEBUG_FATAL) { taosPrintLongString("SYN FATAL ", DEBUG_FATAL, 255, __VA_ARGS__); }

View File

@ -112,6 +112,7 @@ int32_t syncNodeOnAppendEntries(SSyncNode* ths, const SRpcMsg* pRpcMsg) {
syncLogRecvAppendEntries(ths, pMsg, "build rsp error", &pRpcMsg->info.traceId); syncLogRecvAppendEntries(ths, pMsg, "build rsp error", &pRpcMsg->info.traceId);
goto _IGNORE; goto _IGNORE;
} }
rpcRsp.info.traceId = pRpcMsg->info.traceId;
SyncAppendEntriesReply* pReply = rpcRsp.pCont; SyncAppendEntriesReply* pReply = rpcRsp.pCont;
// prepare response msg // prepare response msg
@ -155,7 +156,8 @@ int32_t syncNodeOnAppendEntries(SSyncNode* ths, const SRpcMsg* pRpcMsg) {
goto _IGNORE; goto _IGNORE;
} }
sGTrace(&pRpcMsg->info.traceId, "vgId:%d, recv append entries msg. index:%" PRId64 ", term:%" PRId64 ", preLogIndex:%" PRId64 sGDebug(&pRpcMsg->info.traceId,
"vgId:%d, index:%" PRId64 ", recv append entries msg, term:%" PRId64 ", preLogIndex:%" PRId64
", prevLogTerm:%" PRId64 " commitIndex:%" PRId64 " entryterm:%" PRId64, ", prevLogTerm:%" PRId64 " commitIndex:%" PRId64 " entryterm:%" PRId64,
pMsg->vgId, pMsg->prevLogIndex + 1, pMsg->term, pMsg->prevLogIndex, pMsg->prevLogTerm, pMsg->commitIndex, pMsg->vgId, pMsg->prevLogIndex + 1, pMsg->term, pMsg->prevLogIndex, pMsg->prevLogTerm, pMsg->commitIndex,
pEntry->term); pEntry->term);
@ -181,22 +183,25 @@ _SEND_RESPONSE:
pReply->success = true; pReply->success = true;
// update commit index only after matching // update commit index only after matching
SyncIndex returnIndex = syncNodeUpdateCommitIndex(ths, TMIN(pMsg->commitIndex, pReply->lastSendIndex)); SyncIndex returnIndex = syncNodeUpdateCommitIndex(ths, TMIN(pMsg->commitIndex, pReply->lastSendIndex));
sTrace("vgId:%d, update commit return index:%" PRId64, ths->vgId, returnIndex); sGDebug(&rpcRsp.info.traceId, "vgId:%d, index:%" PRId64 ", last commit index:%" PRId64, ths->vgId,
pMsg->prevLogIndex + 1, returnIndex);
} }
TRACE_SET_MSGID(&(rpcRsp.info.traceId), tGenIdPI64()); TRACE_SET_MSGID(&(rpcRsp.info.traceId), tGenIdPI64());
{ {
sGTrace(&rpcRsp.info.traceId, sGDebug(&rpcRsp.info.traceId,
"vgId:%d, send append reply matchIndex:%" PRId64 " term:%" PRId64 " lastSendIndex:%" PRId64 "vgId:%d, index:%" PRId64 ", send append entries reply, matchIndex:%" PRId64 " term:%" PRId64
" to dest:0x%016" PRIx64, " lastSendIndex:%" PRId64 " to dest addr:0x%016" PRIx64,
ths->vgId, pReply->matchIndex, pReply->term, pReply->lastSendIndex, pReply->destId.addr); ths->vgId, pMsg->prevLogIndex + 1, pReply->matchIndex, pReply->term, pReply->lastSendIndex,
pReply->destId.addr);
} }
// ack, i.e. send response // ack, i.e. send response
TAOS_CHECK_RETURN(syncNodeSendMsgById(&pReply->destId, ths, &rpcRsp)); TAOS_CHECK_RETURN(syncNodeSendMsgById(&pReply->destId, ths, &rpcRsp));
// commit index, i.e. leader notice me // commit index, i.e. leader notice me
if (ths->fsmState != SYNC_FSM_STATE_INCOMPLETE && syncLogBufferCommit(ths->pLogBuf, ths, ths->commitIndex) < 0) { if (ths->fsmState != SYNC_FSM_STATE_INCOMPLETE &&
sError("vgId:%d, failed to commit raft fsm log since %s.", ths->vgId, terrstr()); syncLogBufferCommit(ths->pLogBuf, ths, ths->commitIndex, &pRpcMsg->info.traceId, "sync-append-entries") < 0) {
sGError(&pRpcMsg->info.traceId, "vgId:%d, failed to commit raft fsm log since %s", ths->vgId, terrstr());
} }
if (resetElect) syncNodeResetElectTimer(ths); if (resetElect) syncNodeResetElectTimer(ths);

View File

@ -63,7 +63,7 @@ int32_t syncNodeOnAppendEntriesReply(SSyncNode* ths, const SRpcMsg* pRpcMsg) {
return TSDB_CODE_SYN_WRONG_TERM; return TSDB_CODE_SYN_WRONG_TERM;
} }
sGDebug(&pRpcMsg->info.traceId, "vgId:%d, received append entries reply, srcId:0x%016" PRIx64 ", term:%" PRId64 ", matchIndex:%" PRId64, sGDebug(&pRpcMsg->info.traceId, "vgId:%d, received append entries reply, src addr:0x%" PRIx64 ", term:%" PRId64 ", matchIndex:%" PRId64,
pMsg->vgId, pMsg->srcId.addr, pMsg->term, pMsg->matchIndex); pMsg->vgId, pMsg->srcId.addr, pMsg->term, pMsg->matchIndex);
if (pMsg->success) { if (pMsg->success) {
@ -74,13 +74,13 @@ int32_t syncNodeOnAppendEntriesReply(SSyncNode* ths, const SRpcMsg* pRpcMsg) {
// commit if needed // commit if needed
SyncIndex indexLikely = TMIN(pMsg->matchIndex, ths->pLogBuf->matchIndex); SyncIndex indexLikely = TMIN(pMsg->matchIndex, ths->pLogBuf->matchIndex);
SyncIndex commitIndex = syncNodeCheckCommitIndex(ths, indexLikely); SyncIndex commitIndex = syncNodeCheckCommitIndex(ths, indexLikely, &pRpcMsg->info.traceId);
if (ths->state == TAOS_SYNC_STATE_ASSIGNED_LEADER) { if (ths->state == TAOS_SYNC_STATE_ASSIGNED_LEADER) {
if (commitIndex >= ths->assignedCommitIndex) { if (commitIndex >= ths->assignedCommitIndex) {
syncNodeStepDown(ths, pMsg->term, pMsg->destId); syncNodeStepDown(ths, pMsg->term, pMsg->destId);
} }
} else { } else {
TAOS_CHECK_RETURN(syncLogBufferCommit(ths->pLogBuf, ths, commitIndex)); TAOS_CHECK_RETURN(syncLogBufferCommit(ths->pLogBuf, ths, commitIndex, &pRpcMsg->info.traceId, "sync-append-entries-reply"));
} }
} }
@ -89,7 +89,7 @@ int32_t syncNodeOnAppendEntriesReply(SSyncNode* ths, const SRpcMsg* pRpcMsg) {
if (pMgr == NULL) { if (pMgr == NULL) {
code = TSDB_CODE_MND_RETURN_VALUE_NULL; code = TSDB_CODE_MND_RETURN_VALUE_NULL;
if (terrno != 0) code = terrno; if (terrno != 0) code = terrno;
sError("vgId:%d, failed to get log repl mgr for src addr: 0x%016" PRIx64, ths->vgId, pMsg->srcId.addr); sError("vgId:%d, failed to get log repl mgr for src addr:0x%" PRIx64, ths->vgId, pMsg->srcId.addr);
TAOS_RETURN(code); TAOS_RETURN(code);
} }
TAOS_CHECK_RETURN(syncLogReplProcessReply(pMgr, ths, pMsg)); TAOS_CHECK_RETURN(syncLogReplProcessReply(pMgr, ths, pMsg));

View File

@ -81,13 +81,13 @@ int64_t syncNodeUpdateCommitIndex(SSyncNode* ths, SyncIndex commitIndex) {
return ths->commitIndex; return ths->commitIndex;
} }
int64_t syncNodeCheckCommitIndex(SSyncNode* ths, SyncIndex indexLikely) { int64_t syncNodeCheckCommitIndex(SSyncNode* ths, SyncIndex indexLikely, const STraceId *trace) {
int32_t code = 0; int32_t code = 0;
if (indexLikely > ths->commitIndex && syncNodeAgreedUpon(ths, indexLikely)) { if (indexLikely > ths->commitIndex && syncNodeAgreedUpon(ths, indexLikely)) {
SyncIndex commitIndex = indexLikely; SyncIndex commitIndex = indexLikely;
SyncIndex returnIndex = syncNodeUpdateCommitIndex(ths, commitIndex); SyncIndex returnIndex = syncNodeUpdateCommitIndex(ths, commitIndex);
sTrace("vgId:%d, agreed upon, role:%d, term:%" PRId64 ", index:%" PRId64 ", return:%" PRId64, ths->vgId, ths->state, sGDebug(trace, "vgId:%d, index:%" PRId64 ", agreed upon, role:%d term:%" PRId64 " return index:%" PRId64, ths->vgId,
raftStoreGetTerm(ths), commitIndex, returnIndex); commitIndex, ths->state, raftStoreGetTerm(ths), returnIndex);
} }
return ths->commitIndex; return ths->commitIndex;
} }

View File

@ -744,7 +744,7 @@ int32_t syncUpdateArbTerm(int64_t rid, SyncTerm arbTerm) {
} }
SyncIndex syncNodeGetSnapshotConfigIndex(SSyncNode* pSyncNode, SyncIndex snapshotLastApplyIndex) { SyncIndex syncNodeGetSnapshotConfigIndex(SSyncNode* pSyncNode, SyncIndex snapshotLastApplyIndex) {
if (!(pSyncNode->raftCfg.configIndexCount >= 1)) { if (pSyncNode->raftCfg.configIndexCount < 1) {
sError("vgId:%d, failed get snapshot config index, configIndexCount:%d", pSyncNode->vgId, sError("vgId:%d, failed get snapshot config index, configIndexCount:%d", pSyncNode->vgId,
pSyncNode->raftCfg.configIndexCount); pSyncNode->raftCfg.configIndexCount);
terrno = TSDB_CODE_SYN_INTERNAL_ERROR; terrno = TSDB_CODE_SYN_INTERNAL_ERROR;
@ -758,8 +758,8 @@ SyncIndex syncNodeGetSnapshotConfigIndex(SSyncNode* pSyncNode, SyncIndex snapsho
lastIndex = (pSyncNode->raftCfg.configIndexArr)[i]; lastIndex = (pSyncNode->raftCfg.configIndexArr)[i];
} }
} }
sTrace("vgId:%d, sync get last config index, index:%" PRId64 " lcindex:%" PRId64, pSyncNode->vgId, sTrace("vgId:%d, index:%" PRId64 ", get last snapshot config index:%" PRId64, pSyncNode->vgId, snapshotLastApplyIndex,
snapshotLastApplyIndex, lastIndex); lastIndex);
return lastIndex; return lastIndex;
} }
@ -1010,7 +1010,7 @@ static int32_t syncHbTimerStart(SSyncNode* pSyncNode, SSyncTimer* pSyncTimer) {
pData->logicClock = pSyncTimer->logicClock; pData->logicClock = pSyncTimer->logicClock;
pData->execTime = tsNow + pSyncTimer->timerMS; pData->execTime = tsNow + pSyncTimer->timerMS;
sTrace("vgId:%d, start hb timer, rid:%" PRId64 " addr:%" PRId64 " at %d", pSyncNode->vgId, pData->rid, sTrace("vgId:%d, start hb timer, rid:%" PRId64 " addr:0x%" PRIx64 " at %d", pSyncNode->vgId, pData->rid,
pData->destId.addr, pSyncTimer->timerMS); pData->destId.addr, pSyncTimer->timerMS);
bool stopped = taosTmrResetPriority(pSyncTimer->timerCb, pSyncTimer->timerMS, (void*)(pData->rid), bool stopped = taosTmrResetPriority(pSyncTimer->timerCb, pSyncTimer->timerMS, (void*)(pData->rid),
@ -1474,7 +1474,7 @@ int32_t syncNodeRestore(SSyncNode* pSyncNode) {
sInfo("vgId:%d, restore began, and keep syncing until commitIndex:%" PRId64, pSyncNode->vgId, pSyncNode->commitIndex); sInfo("vgId:%d, restore began, and keep syncing until commitIndex:%" PRId64, pSyncNode->vgId, pSyncNode->commitIndex);
if (pSyncNode->fsmState != SYNC_FSM_STATE_INCOMPLETE && if (pSyncNode->fsmState != SYNC_FSM_STATE_INCOMPLETE &&
(code = syncLogBufferCommit(pSyncNode->pLogBuf, pSyncNode, pSyncNode->commitIndex)) < 0) { (code = syncLogBufferCommit(pSyncNode->pLogBuf, pSyncNode, pSyncNode->commitIndex, NULL, "restore")) < 0) {
TAOS_RETURN(code); TAOS_RETURN(code);
} }
@ -1708,7 +1708,7 @@ int32_t syncNodeStopElectTimer(SSyncNode* pSyncNode) {
int32_t code = 0; int32_t code = 0;
(void)atomic_add_fetch_64(&pSyncNode->electTimerLogicClock, 1); (void)atomic_add_fetch_64(&pSyncNode->electTimerLogicClock, 1);
bool stop = taosTmrStop(pSyncNode->pElectTimer); bool stop = taosTmrStop(pSyncNode->pElectTimer);
sDebug("vgId:%d, stop elect timer, stop:%d", pSyncNode->vgId, stop); sTrace("vgId:%d, stop elect timer, stop:%d", pSyncNode->vgId, stop);
pSyncNode->pElectTimer = NULL; pSyncNode->pElectTimer = NULL;
return code; return code;
@ -1821,7 +1821,7 @@ int32_t syncNodeSendMsgById(const SRaftId* destRaftId, SSyncNode* pNode, SRpcMsg
} }
if (code < 0) { if (code < 0) {
sError("vgId:%d, failed to send sync msg since %s. epset:%p dnode:%d addr:%" PRId64, pNode->vgId, tstrerror(code), sError("vgId:%d, failed to send sync msg since %s. epset:%p dnode:%d addr:0x%" PRIx64, pNode->vgId, tstrerror(code),
epSet, DID(destRaftId), destRaftId->addr); epSet, DID(destRaftId), destRaftId->addr);
rpcFreeCont(pMsg->pCont); rpcFreeCont(pMsg->pCont);
} }
@ -2754,7 +2754,7 @@ static void syncNodeEqPeerHeartbeatTimer(void* param, void* tmrId) {
return; return;
} }
sTrace("vgId:%d, peer hb timer execution, rid:%" PRId64 " addr:%" PRId64, pSyncNode->vgId, hbDataRid, sTrace("vgId:%d, peer hb timer execution, rid:%" PRId64 " addr:0x%" PRIx64, pSyncNode->vgId, hbDataRid,
pData->destId.addr); pData->destId.addr);
if (pSyncNode->totalReplicaNum > 1) { if (pSyncNode->totalReplicaNum > 1) {
@ -2790,6 +2790,7 @@ static void syncNodeEqPeerHeartbeatTimer(void* param, void* tmrId) {
// send msg // send msg
TRACE_SET_MSGID(&(rpcMsg.info.traceId), tGenIdPI64()); TRACE_SET_MSGID(&(rpcMsg.info.traceId), tGenIdPI64());
TRACE_SET_ROOTID(&(rpcMsg.info.traceId), tGenIdPI64());
sGTrace(&rpcMsg.info.traceId, "vgId:%d, send sync-heartbeat to dnode:%d", pSyncNode->vgId, sGTrace(&rpcMsg.info.traceId, "vgId:%d, send sync-heartbeat to dnode:%d", pSyncNode->vgId,
DID(&(pSyncMsg->destId))); DID(&(pSyncMsg->destId)));
syncLogSendHeartbeat(pSyncNode, pSyncMsg, false, timerElapsed, pData->execTime); syncLogSendHeartbeat(pSyncNode, pSyncMsg, false, timerElapsed, pData->execTime);
@ -2949,7 +2950,7 @@ void syncNodeLogConfigInfo(SSyncNode* ths, SSyncCfg* cfg, char* str) {
} }
for (int32_t i = 0; i < ths->peersNum; ++i) { for (int32_t i = 0; i < ths->peersNum; ++i) {
sInfo("vgId:%d, %s, peersId%d, addr:%" PRId64, ths->vgId, str, i, ths->peersId[i].addr); sInfo("vgId:%d, %s, peersId%d, addr:0x%" PRIx64, ths->vgId, str, i, ths->peersId[i].addr);
} }
for (int32_t i = 0; i < ths->raftCfg.cfg.totalReplicaNum; ++i) { for (int32_t i = 0; i < ths->raftCfg.cfg.totalReplicaNum; ++i) {
@ -2960,7 +2961,7 @@ void syncNodeLogConfigInfo(SSyncNode* ths, SSyncCfg* cfg, char* str) {
} }
for (int32_t i = 0; i < ths->raftCfg.cfg.totalReplicaNum; ++i) { for (int32_t i = 0; i < ths->raftCfg.cfg.totalReplicaNum; ++i) {
sInfo("vgId:%d, %s, replicasId%d, addr:%" PRId64, ths->vgId, str, i, ths->replicasId[i].addr); sInfo("vgId:%d, %s, replicasId%d, addr:0x%" PRIx64, ths->vgId, str, i, ths->replicasId[i].addr);
} }
} }
@ -3198,7 +3199,7 @@ void syncNodeChangeToVoter(SSyncNode* ths) {
ths->replicaNum = ths->raftCfg.cfg.replicaNum; ths->replicaNum = ths->raftCfg.cfg.replicaNum;
sDebug("vgId:%d, totalReplicaNum:%d", ths->vgId, ths->totalReplicaNum); sDebug("vgId:%d, totalReplicaNum:%d", ths->vgId, ths->totalReplicaNum);
for (int32_t i = 0; i < ths->totalReplicaNum; ++i) { for (int32_t i = 0; i < ths->totalReplicaNum; ++i) {
sDebug("vgId:%d, i:%d, replicaId.addr:%" PRIx64, ths->vgId, i, ths->replicasId[i].addr); sDebug("vgId:%d, i:%d, replicaId.addr:0x%" PRIx64, ths->vgId, i, ths->replicasId[i].addr);
} }
// pMatchIndex, pNextIndex, only need to change replicaNum when 1->3 // pMatchIndex, pNextIndex, only need to change replicaNum when 1->3
@ -3430,10 +3431,11 @@ int32_t syncNodeAppend(SSyncNode* ths, SSyncRaftEntry* pEntry, SRpcMsg* pMsg) {
_out:; _out:;
// proceed match index, with replicating on needed // proceed match index, with replicating on needed
SyncIndex matchIndex = syncLogBufferProceed(ths->pLogBuf, ths, NULL, "Append", pMsg); SyncIndex matchIndex = syncLogBufferProceed(ths->pLogBuf, ths, NULL, "Append", pMsg);
const STraceId* trace = pEntry ? &pEntry->originRpcTraceId : NULL;
if (pEntry != NULL) { if (pEntry != NULL) {
sGDebug(&pEntry->originRpcTraceId, sGDebug(trace,
"vgId:%d, index:%" PRId64 ", append raft entry, msg:%p term:%" PRId64 " buf:[%" PRId64 " %" PRId64 "vgId:%d, index:%" PRId64 ", raft entry appended, msg:%p term:%" PRId64 " buf:[%" PRId64 " %" PRId64
" %" PRId64 ", %" PRId64 ")", " %" PRId64 ", %" PRId64 ")",
ths->vgId, pEntry->index, pMsg, pEntry->term, ths->pLogBuf->startIndex, ths->pLogBuf->commitIndex, ths->vgId, pEntry->index, pMsg, pEntry->term, ths->pLogBuf->startIndex, ths->pLogBuf->commitIndex,
ths->pLogBuf->matchIndex, ths->pLogBuf->endIndex); ths->pLogBuf->matchIndex, ths->pLogBuf->endIndex);
@ -3441,14 +3443,12 @@ _out:;
if (code == 0 && ths->state == TAOS_SYNC_STATE_ASSIGNED_LEADER) { if (code == 0 && ths->state == TAOS_SYNC_STATE_ASSIGNED_LEADER) {
int64_t index = syncNodeUpdateAssignedCommitIndex(ths, matchIndex); int64_t index = syncNodeUpdateAssignedCommitIndex(ths, matchIndex);
sGTrace(pEntry ? &pEntry->originRpcTraceId : NULL, "vgId:%d, index:%" PRId64 ", update assigned commit, msg:%p", sGTrace(trace, "vgId:%d, index:%" PRId64 ", update assigned commit, msg:%p", ths->vgId, index, pMsg);
ths->vgId, index, pMsg);
if (ths->fsmState != SYNC_FSM_STATE_INCOMPLETE && if (ths->fsmState != SYNC_FSM_STATE_INCOMPLETE &&
syncLogBufferCommit(ths->pLogBuf, ths, ths->assignedCommitIndex) < 0) { syncLogBufferCommit(ths->pLogBuf, ths, ths->assignedCommitIndex, trace, "append-entry") < 0) {
sGError(pEntry ? &pEntry->originRpcTraceId : NULL, sGError(trace, "vgId:%d, index:%" PRId64 ", failed to commit, msg:%p commit index:%" PRId64, ths->vgId, index,
"vgId:%d, index:%" PRId64 ", failed to commit, msg:%p commit index:%" PRId64, ths->vgId, index, pMsg, pMsg, ths->commitIndex);
ths->commitIndex);
code = TSDB_CODE_SYN_INTERNAL_ERROR; code = TSDB_CODE_SYN_INTERNAL_ERROR;
} }
} }
@ -3460,13 +3460,12 @@ _out:;
// single replica // single replica
SyncIndex returnIndex = syncNodeUpdateCommitIndex(ths, matchIndex); SyncIndex returnIndex = syncNodeUpdateCommitIndex(ths, matchIndex);
sGTrace(pEntry ? &pEntry->originRpcTraceId : NULL, sGTrace(trace, "vgId:%d, index:%" PRId64 ", raft entry update commit, msg:%p return index:%" PRId64, ths->vgId,
"vgId:%d, index:%" PRId64 ", raft entry update commit, msg:%p return index:%" PRId64, ths->vgId, matchIndex, matchIndex, pMsg, returnIndex);
pMsg, returnIndex);
if (ths->fsmState != SYNC_FSM_STATE_INCOMPLETE && if (ths->fsmState != SYNC_FSM_STATE_INCOMPLETE &&
(code = syncLogBufferCommit(ths->pLogBuf, ths, ths->commitIndex)) < 0) { (code = syncLogBufferCommit(ths->pLogBuf, ths, ths->commitIndex, trace, "append-entry")) < 0) {
sGError(pEntry ? &pEntry->originRpcTraceId : NULL, sGError(trace,
"vgId:%d, index:%" PRId64 ", failed to commit, msg:%p commit index:%" PRId64 " return index:%" PRId64, "vgId:%d, index:%" PRId64 ", failed to commit, msg:%p commit index:%" PRId64 " return index:%" PRId64,
ths->vgId, matchIndex, pMsg, ths->commitIndex, returnIndex); ths->vgId, matchIndex, pMsg, ths->commitIndex, returnIndex);
} }
@ -3592,18 +3591,20 @@ int32_t syncNodeOnHeartbeat(SSyncNode* ths, const SRpcMsg* pRpcMsg) {
pMsgReply->privateTerm = 8864; // magic number pMsgReply->privateTerm = 8864; // magic number
pMsgReply->startTime = ths->startTime; pMsgReply->startTime = ths->startTime;
pMsgReply->timeStamp = tsMs; pMsgReply->timeStamp = tsMs;
rpcMsg.info.traceId = pRpcMsg->info.traceId;
// reply // reply
TRACE_SET_MSGID(&(rpcMsg.info.traceId), tGenIdPI64()); TRACE_SET_MSGID(&(rpcMsg.info.traceId), tGenIdPI64());
sGTrace(&rpcMsg.info.traceId, "vgId:%d, send sync-heartbeat-reply to dnode:%d term:%" PRId64 " timestamp:%" PRId64, sGDebug(&rpcMsg.info.traceId, "vgId:%d, send sync-heartbeat-reply to dnode:%d term:%" PRId64 " timestamp:%" PRId64,
ths->vgId, DID(&(pMsgReply->destId)), pMsgReply->term, pMsgReply->timeStamp); ths->vgId, DID(&(pMsgReply->destId)), pMsgReply->term, pMsgReply->timeStamp);
TAOS_CHECK_RETURN(syncNodeSendMsgById(&pMsgReply->destId, ths, &rpcMsg)); TAOS_CHECK_RETURN(syncNodeSendMsgById(&pMsgReply->destId, ths, &rpcMsg));
if (currentTerm == 0) currentTerm = raftStoreGetTerm(ths); if (currentTerm == 0) currentTerm = raftStoreGetTerm(ths);
sGTrace(&rpcMsg.info.traceId, sGDebug(&rpcMsg.info.traceId,
"vgId:%d, process sync-heartbeat msg from dnode:%d, cluster:%d, Msgterm:%" PRId64 " currentTerm:%" PRId64, "vgId:%d, process sync-heartbeat msg from dnode:%d, commit-index:%" PRId64 ", cluster:%d msgTerm:%" PRId64
ths->vgId, DID(&(pMsg->srcId)), CID(&(pMsg->srcId)), pMsg->term, currentTerm); " currentTerm:%" PRId64,
ths->vgId, DID(&(pMsg->srcId)), pMsg->commitIndex, CID(&(pMsg->srcId)), pMsg->term, currentTerm);
if (pMsg->term > currentTerm && ths->state == TAOS_SYNC_STATE_LEARNER) { if (pMsg->term > currentTerm && ths->state == TAOS_SYNC_STATE_LEARNER) {
raftStoreSetTerm(ths, pMsg->term); raftStoreSetTerm(ths, pMsg->term);
@ -3620,6 +3621,7 @@ int32_t syncNodeOnHeartbeat(SSyncNode* ths, const SRpcMsg* pRpcMsg) {
if (ths->state == TAOS_SYNC_STATE_FOLLOWER || ths->state == TAOS_SYNC_STATE_LEARNER) { if (ths->state == TAOS_SYNC_STATE_FOLLOWER || ths->state == TAOS_SYNC_STATE_LEARNER) {
SRpcMsg rpcMsgLocalCmd = {0}; SRpcMsg rpcMsgLocalCmd = {0};
TAOS_CHECK_RETURN(syncBuildLocalCmd(&rpcMsgLocalCmd, ths->vgId)); TAOS_CHECK_RETURN(syncBuildLocalCmd(&rpcMsgLocalCmd, ths->vgId));
rpcMsgLocalCmd.info.traceId = rpcMsg.info.traceId;
SyncLocalCmd* pSyncMsg = rpcMsgLocalCmd.pCont; SyncLocalCmd* pSyncMsg = rpcMsgLocalCmd.pCont;
pSyncMsg->cmd = pSyncMsg->cmd =
@ -3633,8 +3635,9 @@ int32_t syncNodeOnHeartbeat(SSyncNode* ths, const SRpcMsg* pRpcMsg) {
sError("vgId:%d, failed to enqueue commit msg from heartbeat since %s, code:%d", ths->vgId, terrstr(), code); sError("vgId:%d, failed to enqueue commit msg from heartbeat since %s, code:%d", ths->vgId, terrstr(), code);
rpcFreeCont(rpcMsgLocalCmd.pCont); rpcFreeCont(rpcMsgLocalCmd.pCont);
} else { } else {
sTrace("vgId:%d, enqueue commit msg from heartbeat, commit-index:%" PRId64 ", term:%" PRId64, ths->vgId, sGTrace(&rpcMsg.info.traceId,
pMsg->commitIndex, pMsg->term); "vgId:%d, enqueue commit msg from heartbeat, commit-index:%" PRId64 ", term:%" PRId64, ths->vgId,
pMsg->commitIndex, pMsg->term);
} }
} }
} }
@ -3644,6 +3647,7 @@ int32_t syncNodeOnHeartbeat(SSyncNode* ths, const SRpcMsg* pRpcMsg) {
(ths->state == TAOS_SYNC_STATE_LEADER || ths->state == TAOS_SYNC_STATE_ASSIGNED_LEADER)) { (ths->state == TAOS_SYNC_STATE_LEADER || ths->state == TAOS_SYNC_STATE_ASSIGNED_LEADER)) {
SRpcMsg rpcMsgLocalCmd = {0}; SRpcMsg rpcMsgLocalCmd = {0};
TAOS_CHECK_RETURN(syncBuildLocalCmd(&rpcMsgLocalCmd, ths->vgId)); TAOS_CHECK_RETURN(syncBuildLocalCmd(&rpcMsgLocalCmd, ths->vgId));
rpcMsgLocalCmd.info.traceId = rpcMsg.info.traceId;
SyncLocalCmd* pSyncMsg = rpcMsgLocalCmd.pCont; SyncLocalCmd* pSyncMsg = rpcMsgLocalCmd.pCont;
pSyncMsg->cmd = SYNC_LOCAL_CMD_STEP_DOWN; pSyncMsg->cmd = SYNC_LOCAL_CMD_STEP_DOWN;
@ -3718,9 +3722,10 @@ int32_t syncNodeOnLocalCmd(SSyncNode* ths, const SRpcMsg* pRpcMsg) {
} }
if (pMsg->currentTerm == matchTerm) { if (pMsg->currentTerm == matchTerm) {
SyncIndex returnIndex = syncNodeUpdateCommitIndex(ths, pMsg->commitIndex); SyncIndex returnIndex = syncNodeUpdateCommitIndex(ths, pMsg->commitIndex);
sTrace("vgId:%d, raft entry update commit return index:%" PRId64, ths->vgId, returnIndex); sTrace("vgId:%d, raft entry update commit, return index:%" PRId64, ths->vgId, returnIndex);
} }
if (ths->fsmState != SYNC_FSM_STATE_INCOMPLETE && syncLogBufferCommit(ths->pLogBuf, ths, ths->commitIndex) < 0) { if (ths->fsmState != SYNC_FSM_STATE_INCOMPLETE &&
syncLogBufferCommit(ths->pLogBuf, ths, ths->commitIndex, &pRpcMsg->info.traceId, "heartbeat") < 0) {
sError("vgId:%d, failed to commit raft log since %s. commit index:%" PRId64, ths->vgId, terrstr(), sError("vgId:%d, failed to commit raft log since %s. commit index:%" PRId64, ths->vgId, terrstr(),
ths->commitIndex); ths->commitIndex);
} }

View File

@ -526,7 +526,7 @@ int32_t syncLogStorePersist(SSyncLogStore* pLogStore, SSyncNode* pNode, SSyncRaf
bool doFsync = syncLogStoreNeedFlush(pEntry, pNode->replicaNum); bool doFsync = syncLogStoreNeedFlush(pEntry, pNode->replicaNum);
if ((code = pLogStore->syncLogAppendEntry(pLogStore, pEntry, doFsync)) < 0) { if ((code = pLogStore->syncLogAppendEntry(pLogStore, pEntry, doFsync)) < 0) {
sError("failed to append sync log entry since %s, index:%" PRId64 ", term:%" PRId64, tstrerror(code), sError("failed to persist raft entry since %s, index:%" PRId64 ", term:%" PRId64, tstrerror(code),
pEntry->index, pEntry->term); pEntry->index, pEntry->term);
TAOS_RETURN(code); TAOS_RETURN(code);
} }
@ -725,29 +725,37 @@ int32_t syncFsmExecute(SSyncNode* pNode, SSyncFSM* pFsm, ESyncState role, SyncTe
cbMeta.term = pEntry->term; cbMeta.term = pEntry->term;
cbMeta.currentTerm = term; cbMeta.currentTerm = term;
cbMeta.flag = -1; cbMeta.flag = -1;
rpcMsg.info.traceId = pEntry->originRpcTraceId;
int32_t num = syncRespMgrGetAndDel(pNode->pSyncRespMgr, cbMeta.seqNum, &rpcMsg.info); int32_t num = syncRespMgrGetAndDel(pNode->pSyncRespMgr, cbMeta.seqNum, &rpcMsg.info);
sDebug("vgId:%d, get response info, seqNum:%" PRId64 ", num:%d", pNode->vgId, cbMeta.seqNum, num); sGDebug(&rpcMsg.info.traceId, "vgId:%d, index:%" PRId64 ", get response info, handle:%p seq:%" PRId64 " num:%d",
pNode->vgId, pEntry->index, &rpcMsg.info, cbMeta.seqNum, num);
code = pFsm->FpCommitCb(pFsm, &rpcMsg, &cbMeta); code = pFsm->FpCommitCb(pFsm, &rpcMsg, &cbMeta);
retry = (code != 0) && (terrno == TSDB_CODE_OUT_OF_RPC_MEMORY_QUEUE); retry = (code != 0) && (terrno == TSDB_CODE_OUT_OF_RPC_MEMORY_QUEUE);
sDebug("vgId:%d, fsm execute, index:%" PRId64 ", term:%" PRId64 ", type:%s, code:%d, retry:%d", pNode->vgId,
pEntry->index, pEntry->term, TMSG_INFO(pEntry->originalRpcType), code, retry); sGTrace(&rpcMsg.info.traceId,
"vgId:%d, index:%" PRId64 ", fsm execute, term:%" PRId64 ", type:%s, code:%d, retry:%d", pNode->vgId,
pEntry->index, pEntry->term, TMSG_INFO(pEntry->originalRpcType), code, retry);
if (retry) { if (retry) {
taosMsleep(10); taosMsleep(10);
if (code == TSDB_CODE_OUT_OF_RPC_MEMORY_QUEUE) { if (code == TSDB_CODE_OUT_OF_RPC_MEMORY_QUEUE) {
sError("vgId:%d, will retry to execute fsm after 10ms, last error is %s, index:%" PRId64, pNode->vgId, sGError(&rpcMsg.info.traceId,
tstrerror(code), pEntry->index); "vgId:%d, index:%" PRId64 ", will retry to execute fsm after 10ms, last error is %s", pNode->vgId,
pEntry->index, tstrerror(code));
} else { } else {
sDebug("vgId:%d, will retry to execute fsm after 10ms, last error is %s, index:%" PRId64, pNode->vgId, sGTrace(&rpcMsg.info.traceId,
tstrerror(code), pEntry->index); "vgId:%d, index:%" PRId64 ", will retry to execute fsm after 10ms, last error is %s", pNode->vgId,
pEntry->index, tstrerror(code));
} }
} }
} while (retry); } while (retry);
_exit: _exit:
if (code < 0) { if (code < 0) {
sError("vgId:%d, failed to execute fsm at line %d since %s, index:%" PRId64 ", term:%" PRId64 ", type:%s", sError("vgId:%d, index:%" PRId64 ", failed to execute fsm at line %d since %s, term:%" PRId64 ", type:%s",
pNode->vgId, lino, tstrerror(code), pEntry->index, pEntry->term, TMSG_INFO(pEntry->originalRpcType)); pNode->vgId, pEntry->index, lino, tstrerror(code), pEntry->term, TMSG_INFO(pEntry->originalRpcType));
} }
return code; return code;
} }
@ -780,7 +788,8 @@ int32_t syncLogBufferValidate(SSyncLogBuffer* pBuf) {
return 0; return 0;
} }
int32_t syncLogBufferCommit(SSyncLogBuffer* pBuf, SSyncNode* pNode, int64_t commitIndex) { int32_t syncLogBufferCommit(SSyncLogBuffer* pBuf, SSyncNode* pNode, int64_t commitIndex, const STraceId* trace,
const char* src) {
TAOS_CHECK_RETURN(syncLogBufferValidate(pBuf)); TAOS_CHECK_RETURN(syncLogBufferValidate(pBuf));
(void)taosThreadMutexLock(&pBuf->mutex); (void)taosThreadMutexLock(&pBuf->mutex);
@ -798,13 +807,14 @@ int32_t syncLogBufferCommit(SSyncLogBuffer* pBuf, SSyncNode* pNode, int64_t comm
bool restoreFinishAtThisCommit = false; bool restoreFinishAtThisCommit = false;
if (commitIndex <= pBuf->commitIndex) { if (commitIndex <= pBuf->commitIndex) {
sDebug("vgId:%d, stale commit index, current:%" PRId64 ", notified:%" PRId64, vgId, pBuf->commitIndex, sGDebug(trace, "vgId:%d, stale commit index:%" PRId64 ", notified:%" PRId64, vgId, commitIndex, pBuf->commitIndex);
commitIndex);
goto _out; goto _out;
} }
sTrace("vgId:%d, log commit, buffer: [%" PRId64 " %" PRId64 " %" PRId64 ", %" PRId64 "), role:%d, term:%" PRId64, sGDebug(trace,
pNode->vgId, pBuf->startIndex, pBuf->commitIndex, pBuf->matchIndex, pBuf->endIndex, role, currentTerm); "vgId:%d, log commit since %s, buffer:[%" PRId64 " %" PRId64 " %" PRId64 ", %" PRId64
"), role:%d, term:%" PRId64,
pNode->vgId, src, pBuf->startIndex, pBuf->commitIndex, pBuf->matchIndex, pBuf->endIndex, role, currentTerm);
// execute in fsm // execute in fsm
for (int64_t index = pBuf->commitIndex + 1; index <= upperIndex; index++) { for (int64_t index = pBuf->commitIndex + 1; index <= upperIndex; index++) {
@ -816,14 +826,16 @@ int32_t syncLogBufferCommit(SSyncLogBuffer* pBuf, SSyncNode* pNode, int64_t comm
// execute it // execute it
if (!syncUtilUserCommit(pEntry->originalRpcType)) { if (!syncUtilUserCommit(pEntry->originalRpcType)) {
sInfo("vgId:%d, log commit sync barrier, index:%" PRId64 ", term:%" PRId64 ", type:%s", vgId, pEntry->index, sGInfo(&pEntry->originRpcTraceId,
pEntry->term, TMSG_INFO(pEntry->originalRpcType)); "vgId:%d, index:%" PRId64 ", log commit sync barrier, term:%" PRId64 ", type:%s", vgId, pEntry->index,
pEntry->term, TMSG_INFO(pEntry->originalRpcType));
} }
if ((code = syncFsmExecute(pNode, pFsm, role, currentTerm, pEntry, 0, false)) != 0) { if ((code = syncFsmExecute(pNode, pFsm, role, currentTerm, pEntry, 0, false)) != 0) {
sError("vgId:%d, failed to execute sync log entry, index:%" PRId64 ", term:%" PRId64 sGError(&pEntry->originRpcTraceId,
", role:%d, current term:%" PRId64, "vgId:%d, index:%" PRId64 ", failed to execute sync log entry, term:%" PRId64
vgId, pEntry->index, pEntry->term, role, currentTerm); ", role:%d, current term:%" PRId64,
vgId, pEntry->index, pEntry->term, role, currentTerm);
goto _out; goto _out;
} }
pBuf->commitIndex = index; pBuf->commitIndex = index;
@ -967,7 +979,7 @@ int32_t syncLogReplRetryOnNeed(SSyncLogReplMgr* pMgr, SSyncNode* pNode) {
SRaftId* pDestId = &pNode->replicasId[pMgr->peerId]; SRaftId* pDestId = &pNode->replicasId[pMgr->peerId];
if (pMgr->retryBackoff == SYNC_MAX_RETRY_BACKOFF) { if (pMgr->retryBackoff == SYNC_MAX_RETRY_BACKOFF) {
syncLogReplReset(pMgr); syncLogReplReset(pMgr);
sWarn("vgId:%d, reset sync log repl since retry backoff exceeding limit, peer:%" PRIx64, pNode->vgId, sWarn("vgId:%d, reset sync log repl since retry backoff exceeding limit, peer addr:0x%" PRIx64, pNode->vgId,
pDestId->addr); pDestId->addr);
return TSDB_CODE_OUT_OF_RANGE; return TSDB_CODE_OUT_OF_RANGE;
} }
@ -995,7 +1007,7 @@ int32_t syncLogReplRetryOnNeed(SSyncLogReplMgr* pMgr, SSyncNode* pNode) {
if (pMgr->states[pos].acked) { if (pMgr->states[pos].acked) {
if (pMgr->matchIndex < index && pMgr->states[pos].timeMs + (syncGetRetryMaxWaitMs() << 3) < nowMs) { if (pMgr->matchIndex < index && pMgr->states[pos].timeMs + (syncGetRetryMaxWaitMs() << 3) < nowMs) {
syncLogReplReset(pMgr); syncLogReplReset(pMgr);
sWarn("vgId:%d, reset sync log repl since stagnation, index:%" PRId64 ", peer:%" PRIx64, pNode->vgId, index, sWarn("vgId:%d, reset sync log repl since stagnation, index:%" PRId64 ", peer addr:0x%" PRIx64, pNode->vgId, index,
pDestId->addr); pDestId->addr);
code = TSDB_CODE_ACTION_IN_PROGRESS; code = TSDB_CODE_ACTION_IN_PROGRESS;
goto _out; goto _out;
@ -1005,7 +1017,7 @@ int32_t syncLogReplRetryOnNeed(SSyncLogReplMgr* pMgr, SSyncNode* pNode) {
bool barrier = false; bool barrier = false;
if ((code = syncLogReplSendTo(pMgr, pNode, index, &term, pDestId, &barrier)) < 0) { if ((code = syncLogReplSendTo(pMgr, pNode, index, &term, pDestId, &barrier)) < 0) {
sError("vgId:%d, failed to replicate sync log entry since %s, index:%" PRId64 ", dest:%" PRIx64, pNode->vgId, sError("vgId:%d, failed to replicate sync log entry since %s, index:%" PRId64 ", dest addr:0x%" PRIx64, pNode->vgId,
tstrerror(code), index, pDestId->addr); tstrerror(code), index, pDestId->addr);
goto _out; goto _out;
} }
@ -1029,7 +1041,7 @@ _out:
if (retried) { if (retried) {
pMgr->retryBackoff = syncLogReplGetNextRetryBackoff(pMgr); pMgr->retryBackoff = syncLogReplGetNextRetryBackoff(pMgr);
SSyncLogBuffer* pBuf = pNode->pLogBuf; SSyncLogBuffer* pBuf = pNode->pLogBuf;
sInfo("vgId:%d, resend %d sync log entries, dest:%" PRIx64 ", indexes:%" PRId64 " ..., terms: .., %" PRId64 sInfo("vgId:%d, resend %d sync log entries, dest addr:0x%" PRIx64 ", indexes:%" PRId64 " ..., terms: .., %" PRId64
", retryWaitMs:%" PRId64 ", repl-mgr:[%" PRId64 " %" PRId64 ", %" PRId64 "), buffer: [%" PRId64 " %" PRId64 ", retryWaitMs:%" PRId64 ", repl-mgr:[%" PRId64 " %" PRId64 ", %" PRId64 "), buffer: [%" PRId64 " %" PRId64
" %" PRId64 ", %" PRId64 ")", " %" PRId64 ", %" PRId64 ")",
pNode->vgId, count, pDestId->addr, firstIndex, term, retryWaitMs, pMgr->startIndex, pMgr->matchIndex, pNode->vgId, count, pDestId->addr, firstIndex, term, retryWaitMs, pMgr->startIndex, pMgr->matchIndex,
@ -1044,7 +1056,7 @@ int32_t syncLogReplRecover(SSyncLogReplMgr* pMgr, SSyncNode* pNode, SyncAppendEn
int32_t code = 0; int32_t code = 0;
if (pMgr->restored != false) return TSDB_CODE_SYN_INTERNAL_ERROR; if (pMgr->restored != false) return TSDB_CODE_SYN_INTERNAL_ERROR;
sTrace("vgId:%d, begin to recover sync log repl, peer: dnode:%d (%" PRIx64 "), repl-mgr:[%" PRId64 ", %" PRId64 sTrace("vgId:%d, begin to recover sync log repl, peer dnode:%d (0x%" PRIx64 "), repl-mgr:[%" PRId64 ", %" PRId64
", %" PRId64 ") restore:%d, buffer: [%" PRId64 ", %" PRId64 ", %" PRId64 ", %" PRId64 ", %" PRId64 ") restore:%d, buffer: [%" PRId64 ", %" PRId64 ", %" PRId64 ", %" PRId64
"), msg: {lastSendIndex:%" PRId64 ", matchIndex:%" PRId64 ", fsmState:%d, success:%d, lastMatchTerm:%" PRId64 "), msg: {lastSendIndex:%" PRId64 ", matchIndex:%" PRId64 ", fsmState:%d, success:%d, lastMatchTerm:%" PRId64
"}", "}",
@ -1057,7 +1069,7 @@ int32_t syncLogReplRecover(SSyncLogReplMgr* pMgr, SSyncNode* pNode, SyncAppendEn
if (pMgr->matchIndex != 0) return TSDB_CODE_SYN_INTERNAL_ERROR; if (pMgr->matchIndex != 0) return TSDB_CODE_SYN_INTERNAL_ERROR;
if (pMsg->matchIndex < 0) { if (pMsg->matchIndex < 0) {
pMgr->restored = true; pMgr->restored = true;
sInfo("vgId:%d, sync log repl restored, peer: dnode:%d (%" PRIx64 "), repl-mgr:[%" PRId64 " %" PRId64 ", %" PRId64 sInfo("vgId:%d, sync log repl restored, peer dnode:%d (0x%" PRIx64 "), repl-mgr:[%" PRId64 " %" PRId64 ", %" PRId64
"), buffer: [%" PRId64 " %" PRId64 " %" PRId64 ", %" PRId64 ")", "), buffer: [%" PRId64 " %" PRId64 " %" PRId64 ", %" PRId64 ")",
pNode->vgId, DID(&destId), destId.addr, pMgr->startIndex, pMgr->matchIndex, pMgr->endIndex, pNode->vgId, DID(&destId), destId.addr, pMgr->startIndex, pMgr->matchIndex, pMgr->endIndex,
pBuf->startIndex, pBuf->commitIndex, pBuf->matchIndex, pBuf->endIndex); pBuf->startIndex, pBuf->commitIndex, pBuf->matchIndex, pBuf->endIndex);
@ -1074,7 +1086,7 @@ int32_t syncLogReplRecover(SSyncLogReplMgr* pMgr, SSyncNode* pNode, SyncAppendEn
if (pMsg->success && pMsg->matchIndex == pMsg->lastSendIndex) { if (pMsg->success && pMsg->matchIndex == pMsg->lastSendIndex) {
pMgr->matchIndex = pMsg->matchIndex; pMgr->matchIndex = pMsg->matchIndex;
pMgr->restored = true; pMgr->restored = true;
sInfo("vgId:%d, sync log repl restored, peer: dnode:%d (%" PRIx64 "), repl-mgr:[%" PRId64 " %" PRId64 ", %" PRId64 sInfo("vgId:%d, sync log repl restored, peer dnode:%d (0x%" PRIx64 "), repl-mgr:[%" PRId64 " %" PRId64 ", %" PRId64
"), buffer: [%" PRId64 " %" PRId64 " %" PRId64 ", %" PRId64 ")", "), buffer: [%" PRId64 " %" PRId64 " %" PRId64 ", %" PRId64 ")",
pNode->vgId, DID(&destId), destId.addr, pMgr->startIndex, pMgr->matchIndex, pMgr->endIndex, pNode->vgId, DID(&destId), destId.addr, pMgr->startIndex, pMgr->matchIndex, pMgr->endIndex,
pBuf->startIndex, pBuf->commitIndex, pBuf->matchIndex, pBuf->endIndex); pBuf->startIndex, pBuf->commitIndex, pBuf->matchIndex, pBuf->endIndex);
@ -1110,7 +1122,7 @@ int32_t syncLogReplRecover(SSyncLogReplMgr* pMgr, SSyncNode* pNode, SyncAppendEn
if (pMsg->matchIndex == -1) { if (pMsg->matchIndex == -1) {
// first time to restore // first time to restore
sInfo("vgId:%d, first time to restore sync log repl, peer: dnode:%d (%" PRIx64 "), repl-mgr:[%" PRId64 " %" PRId64 sInfo("vgId:%d, first time to restore sync log repl, peer dnode:%d (0x%" PRIx64 "), repl-mgr:[%" PRId64 " %" PRId64
", %" PRId64 "), buffer: [%" PRId64 " %" PRId64 " %" PRId64 ", %" PRId64 "), index:%" PRId64 ", %" PRId64 "), buffer: [%" PRId64 " %" PRId64 " %" PRId64 ", %" PRId64 "), index:%" PRId64
", firstVer:%" PRId64 ", term:%" PRId64 ", lastMatchTerm:%" PRId64, ", firstVer:%" PRId64 ", term:%" PRId64 ", lastMatchTerm:%" PRId64,
pNode->vgId, DID(&destId), destId.addr, pMgr->startIndex, pMgr->matchIndex, pMgr->endIndex, pNode->vgId, DID(&destId), destId.addr, pMgr->startIndex, pMgr->matchIndex, pMgr->endIndex,
@ -1148,7 +1160,7 @@ int32_t syncLogReplProcessHeartbeatReply(SSyncLogReplMgr* pMgr, SSyncNode* pNode
SSyncLogBuffer* pBuf = pNode->pLogBuf; SSyncLogBuffer* pBuf = pNode->pLogBuf;
(void)taosThreadMutexLock(&pMgr->mutex); (void)taosThreadMutexLock(&pMgr->mutex);
if (pMsg->startTime != 0 && pMsg->startTime != pMgr->peerStartTime) { if (pMsg->startTime != 0 && pMsg->startTime != pMgr->peerStartTime) {
sInfo("vgId:%d, reset sync log repl in heartbeat, peer:%" PRIx64 ", start time:%" PRId64 ", old:%" PRId64, sInfo("vgId:%d, reset sync log repl in heartbeat, peer addr:0x%" PRIx64 ", start time:%" PRId64 ", old:%" PRId64,
pNode->vgId, pMsg->srcId.addr, pMsg->startTime, pMgr->peerStartTime); pNode->vgId, pMsg->srcId.addr, pMsg->startTime, pMgr->peerStartTime);
syncLogReplReset(pMgr); syncLogReplReset(pMgr);
pMgr->peerStartTime = pMsg->startTime; pMgr->peerStartTime = pMsg->startTime;
@ -1161,7 +1173,7 @@ int32_t syncLogReplProcessReply(SSyncLogReplMgr* pMgr, SSyncNode* pNode, SyncApp
SSyncLogBuffer* pBuf = pNode->pLogBuf; SSyncLogBuffer* pBuf = pNode->pLogBuf;
(void)taosThreadMutexLock(&pMgr->mutex); (void)taosThreadMutexLock(&pMgr->mutex);
if (pMsg->startTime != pMgr->peerStartTime) { if (pMsg->startTime != pMgr->peerStartTime) {
sInfo("vgId:%d, reset sync log repl in appendlog reply, peer:%" PRIx64 ", start time:%" PRId64 ", old:%" PRId64, sInfo("vgId:%d, reset sync log repl in appendlog reply, peer addr:0x%" PRIx64 ", start time:%" PRId64 ", old:%" PRId64,
pNode->vgId, pMsg->srcId.addr, pMsg->startTime, pMgr->peerStartTime); pNode->vgId, pMsg->srcId.addr, pMsg->startTime, pMgr->peerStartTime);
syncLogReplReset(pMgr); syncLogReplReset(pMgr);
pMgr->peerStartTime = pMsg->startTime; pMgr->peerStartTime = pMsg->startTime;
@ -1200,7 +1212,7 @@ int32_t syncLogReplProbe(SSyncLogReplMgr* pMgr, SSyncNode* pNode, SyncIndex inde
int64_t nowMs = taosGetMonoTimestampMs(); int64_t nowMs = taosGetMonoTimestampMs();
int32_t code = 0; int32_t code = 0;
sTrace("vgId:%d, begin to probe peer:%" PRIx64 " with msg of index:%" PRId64 ", repl-mgr:[%" PRId64 ", %" PRId64 sTrace("vgId:%d, begin to probe peer addr:0x%" PRIx64 " with msg of index:%" PRId64 ", repl-mgr:[%" PRId64 ", %" PRId64
", %" PRId64 "), restored:%d", ", %" PRId64 "), restored:%d",
pNode->vgId, pNode->replicasId[pMgr->peerId].addr, index, pMgr->startIndex, pMgr->matchIndex, pMgr->endIndex, pNode->vgId, pNode->replicasId[pMgr->peerId].addr, index, pMgr->startIndex, pMgr->matchIndex, pMgr->endIndex,
pMgr->restored); pMgr->restored);
@ -1215,7 +1227,7 @@ int32_t syncLogReplProbe(SSyncLogReplMgr* pMgr, SSyncNode* pNode, SyncIndex inde
bool barrier = false; bool barrier = false;
SyncTerm term = -1; SyncTerm term = -1;
if ((code = syncLogReplSendTo(pMgr, pNode, index, &term, pDestId, &barrier)) < 0) { if ((code = syncLogReplSendTo(pMgr, pNode, index, &term, pDestId, &barrier)) < 0) {
sError("vgId:%d, failed to replicate log entry since %s, index:%" PRId64 ", dest:0x%016" PRIx64, pNode->vgId, sError("vgId:%d, failed to replicate log entry since %s, index:%" PRId64 ", dest addr:0x%016" PRIx64, pNode->vgId,
tstrerror(code), index, pDestId->addr); tstrerror(code), index, pDestId->addr);
TAOS_RETURN(code); TAOS_RETURN(code);
} }
@ -1230,7 +1242,7 @@ int32_t syncLogReplProbe(SSyncLogReplMgr* pMgr, SSyncNode* pNode, SyncIndex inde
pMgr->endIndex = index + 1; pMgr->endIndex = index + 1;
SSyncLogBuffer* pBuf = pNode->pLogBuf; SSyncLogBuffer* pBuf = pNode->pLogBuf;
sTrace("vgId:%d, probe peer:%" PRIx64 " with msg of index:%" PRId64 " term:%" PRId64 ", repl-mgr:[%" PRId64 sTrace("vgId:%d, probe peer addr:0x%" PRIx64 " with msg of index:%" PRId64 " term:%" PRId64 ", repl-mgr:[%" PRId64
" %" PRId64 ", %" PRId64 "), buffer: [%" PRId64 " %" PRId64 " %" PRId64 ", %" PRId64 ")", " %" PRId64 ", %" PRId64 "), buffer: [%" PRId64 " %" PRId64 " %" PRId64 ", %" PRId64 ")",
pNode->vgId, pDestId->addr, index, term, pMgr->startIndex, pMgr->matchIndex, pMgr->endIndex, pBuf->startIndex, pNode->vgId, pDestId->addr, index, term, pMgr->startIndex, pMgr->matchIndex, pMgr->endIndex, pBuf->startIndex,
pBuf->commitIndex, pBuf->matchIndex, pBuf->endIndex); pBuf->commitIndex, pBuf->matchIndex, pBuf->endIndex);
@ -1240,7 +1252,7 @@ int32_t syncLogReplProbe(SSyncLogReplMgr* pMgr, SSyncNode* pNode, SyncIndex inde
int32_t syncLogReplAttempt(SSyncLogReplMgr* pMgr, SSyncNode* pNode) { int32_t syncLogReplAttempt(SSyncLogReplMgr* pMgr, SSyncNode* pNode) {
if (!pMgr->restored) return TSDB_CODE_SYN_INTERNAL_ERROR; if (!pMgr->restored) return TSDB_CODE_SYN_INTERNAL_ERROR;
sTrace("vgId:%d, replicate log entries from end to match, repl-mgr:[%" PRId64 ", %" PRId64 ", %" PRId64 sTrace("vgId:%d, replicate raft entries from end to match, repl-mgr:[%" PRId64 ", %" PRId64 ", %" PRId64
"), restore:%d", "), restore:%d",
pNode->vgId, pMgr->startIndex, pMgr->matchIndex, pMgr->endIndex, pMgr->restored); pNode->vgId, pMgr->startIndex, pMgr->matchIndex, pMgr->endIndex, pMgr->restored);
@ -1267,7 +1279,7 @@ int32_t syncLogReplAttempt(SSyncLogReplMgr* pMgr, SSyncNode* pNode) {
code = syncLogReplSendTo(pMgr, pNode, index, &term, pDestId, &barrier); code = syncLogReplSendTo(pMgr, pNode, index, &term, pDestId, &barrier);
if (code < 0) { if (code < 0) {
sError("vgId:%d, failed to replicate log entry since %s, index:%" PRId64 ", dest:0x%016" PRIx64, pNode->vgId, sError("vgId:%d, failed to replicate log entry since %s, index:%" PRId64 ", dest addr:0x%016" PRIx64, pNode->vgId,
tstrerror(code), index, pDestId->addr); tstrerror(code), index, pDestId->addr);
TAOS_RETURN(code); TAOS_RETURN(code);
} }
@ -1294,7 +1306,7 @@ int32_t syncLogReplAttempt(SSyncLogReplMgr* pMgr, SSyncNode* pNode) {
TAOS_CHECK_RETURN(syncLogReplRetryOnNeed(pMgr, pNode)); TAOS_CHECK_RETURN(syncLogReplRetryOnNeed(pMgr, pNode));
SSyncLogBuffer* pBuf = pNode->pLogBuf; SSyncLogBuffer* pBuf = pNode->pLogBuf;
sTrace("vgId:%d, replicated %d msgs to peer:%" PRIx64 ", indexes:%" PRId64 "..., terms: ...%" PRId64 sTrace("vgId:%d, replicated %d msgs to peer addr:0x%" PRIx64 ", indexes:%" PRId64 "..., terms: ...%" PRId64
", repl-mgr:[%" PRId64 " %" PRId64 ", %" PRId64 "), buffer: [%" PRId64 " %" PRId64 " %" PRId64 ", %" PRId64 ", repl-mgr:[%" PRId64 " %" PRId64 ", %" PRId64 "), buffer: [%" PRId64 " %" PRId64 " %" PRId64 ", %" PRId64
")", ")",
pNode->vgId, count, pDestId->addr, firstIndex, term, pMgr->startIndex, pMgr->matchIndex, pMgr->endIndex, pNode->vgId, count, pDestId->addr, firstIndex, term, pMgr->startIndex, pMgr->matchIndex, pMgr->endIndex,
@ -1555,7 +1567,7 @@ int32_t syncLogReplSendTo(SSyncLogReplMgr* pMgr, SSyncNode* pNode, SyncIndex ind
if (code == TSDB_CODE_WAL_LOG_NOT_EXIST) { if (code == TSDB_CODE_WAL_LOG_NOT_EXIST) {
SSyncLogReplMgr* pMgr = syncNodeGetLogReplMgr(pNode, pDestId); SSyncLogReplMgr* pMgr = syncNodeGetLogReplMgr(pNode, pDestId);
if (pMgr) { if (pMgr) {
sInfo("vgId:%d, reset sync log repl of peer:%" PRIx64 " since %s, index:%" PRId64, pNode->vgId, pDestId->addr, sInfo("vgId:%d, reset sync log repl of peer addr:0x%" PRIx64 " since %s, index:%" PRId64, pNode->vgId, pDestId->addr,
tstrerror(code), index); tstrerror(code), index);
syncLogReplReset(pMgr); syncLogReplReset(pMgr);
} }
@ -1579,8 +1591,8 @@ int32_t syncLogReplSendTo(SSyncLogReplMgr* pMgr, SSyncNode* pNode, SyncIndex ind
TRACE_SET_MSGID(&(msgOut.info.traceId), tGenIdPI64()); TRACE_SET_MSGID(&(msgOut.info.traceId), tGenIdPI64());
sGDebug(&msgOut.info.traceId, sGDebug(&msgOut.info.traceId,
"vgId:%d, replicate one msg index:%" PRId64 " term:%" PRId64 " prevterm:%" PRId64 " to dest:0x%016" PRIx64, "vgId:%d, index:%" PRId64 ", replicate one msg to dest addr:0x%" PRIx64 ", term:%" PRId64 " prevterm:%" PRId64,
pNode->vgId, pEntry->index, pEntry->term, prevLogTerm, pDestId->addr); pNode->vgId, pEntry->index, pDestId->addr, pEntry->term, prevLogTerm);
TAOS_CHECK_GOTO(syncNodeSendAppendEntries(pNode, pDestId, &msgOut), &lino, _err); TAOS_CHECK_GOTO(syncNodeSendAppendEntries(pNode, pDestId, &msgOut), &lino, _err);
if (!inBuf) { if (!inBuf) {

View File

@ -249,8 +249,10 @@ static int32_t raftLogAppendEntry(struct SSyncLogStore* pLogStore, SSyncRaftEntr
TAOS_RETURN(code); TAOS_RETURN(code);
} }
sNTrace(pData->pSyncNode, "index:%" PRId64 ", append raft log, type:%s origin type:%s elapsed:%" PRId64, pEntry->index, sGDebug(&pEntry->originRpcTraceId,
TMSG_INFO(pEntry->msgType), TMSG_INFO(pEntry->originalRpcType), tsElapsed); "vgId:%d, index:%" PRId64 ", persist raft entry, type:%s origin type:%s elapsed:%" PRId64,
pData->pSyncNode->vgId, pEntry->index, TMSG_INFO(pEntry->msgType), TMSG_INFO(pEntry->originalRpcType),
tsElapsed);
TAOS_RETURN(TSDB_CODE_SUCCESS); TAOS_RETURN(TSDB_CODE_SUCCESS);
} }

View File

@ -69,7 +69,7 @@ bool syncUtilNodeInfo2RaftId(const SNodeInfo* pInfo, SyncGroupId vgId, SRaftId*
raftId->addr = SYNC_ADDR(pInfo); raftId->addr = SYNC_ADDR(pInfo);
raftId->vgId = vgId; raftId->vgId = vgId;
sInfo("vgId:%d, sync addr:%" PRIu64 " is resolved, ep:%s:%u ip:%s ipv4:%u dnode:%d cluster:%" PRId64, vgId, sInfo("vgId:%d, sync addr:0x%" PRIx64 " is resolved, ep:%s:%u ip:%s ipv4:%u dnode:%d cluster:%" PRId64, vgId,
raftId->addr, pInfo->nodeFqdn, pInfo->nodePort, ipbuf, ipv4, pInfo->nodeId, pInfo->clusterId); raftId->addr, pInfo->nodeFqdn, pInfo->nodePort, ipbuf, ipv4, pInfo->nodeId, pInfo->clusterId);
return true; return true;
} }