Merge pull request #18544 from taosdata/feature/3.0_mhli
refactor(sync): add trace log for elect count, become leader count, c…
This commit is contained in:
commit
a7cd465a6a
|
@ -162,7 +162,10 @@ typedef struct SSyncFSM {
|
|||
// SWal implements it
|
||||
typedef struct SSyncLogStore {
|
||||
SLRUCache* pCache;
|
||||
void* data;
|
||||
int32_t cacheHit;
|
||||
int32_t cacheMiss;
|
||||
|
||||
void* data;
|
||||
|
||||
int32_t (*syncLogUpdateCommitIndex)(struct SSyncLogStore* pLogStore, SyncIndex index);
|
||||
SyncIndex (*syncLogCommitIndex)(struct SSyncLogStore* pLogStore);
|
||||
|
|
|
@ -192,6 +192,10 @@ typedef struct SSyncNode {
|
|||
int64_t leaderTime;
|
||||
int64_t lastReplicateTime;
|
||||
|
||||
int32_t electNum;
|
||||
int32_t becomeLeaderNum;
|
||||
int32_t configChangeNum;
|
||||
|
||||
bool isStart;
|
||||
|
||||
} SSyncNode;
|
||||
|
|
|
@ -205,9 +205,11 @@ int32_t syncNodeOnAppendEntries(SSyncNode* ths, const SRpcMsg* pRpcMsg) {
|
|||
pLocalEntry = (SSyncRaftEntry*)taosLRUCacheValue(pCache, hLocal);
|
||||
code = 0;
|
||||
|
||||
ths->pLogStore->cacheHit++;
|
||||
sNTrace(ths, "hit cache index:%" PRId64 ", bytes:%u, %p", appendIndex, pLocalEntry->bytes, pLocalEntry);
|
||||
|
||||
} else {
|
||||
ths->pLogStore->cacheMiss++;
|
||||
sNTrace(ths, "miss cache index:%" PRId64, appendIndex);
|
||||
|
||||
code = ths->pLogStore->syncLogGetEntry(ths->pLogStore, appendIndex, &pLocalEntry);
|
||||
|
|
|
@ -117,9 +117,11 @@ void syncMaybeAdvanceCommitIndex(SSyncNode* pSyncNode) {
|
|||
if (h) {
|
||||
pEntry = (SSyncRaftEntry*)taosLRUCacheValue(pCache, h);
|
||||
|
||||
pSyncNode->pLogStore->cacheHit++;
|
||||
sNTrace(pSyncNode, "hit cache index:%" PRId64 ", bytes:%u, %p", index, pEntry->bytes, pEntry);
|
||||
|
||||
} else {
|
||||
pSyncNode->pLogStore->cacheMiss++;
|
||||
sNTrace(pSyncNode, "miss cache index:%" PRId64, index);
|
||||
|
||||
int32_t code = pSyncNode->pLogStore->syncLogGetEntry(pSyncNode->pLogStore, index, &pEntry);
|
||||
|
|
|
@ -61,7 +61,8 @@ static int32_t syncNodeRequestVotePeers(SSyncNode* pNode) {
|
|||
}
|
||||
|
||||
int32_t syncNodeElect(SSyncNode* pSyncNode) {
|
||||
sNTrace(pSyncNode, "begin election");
|
||||
sNInfo(pSyncNode, "begin election");
|
||||
pSyncNode->electNum++;
|
||||
|
||||
int32_t ret = 0;
|
||||
if (pSyncNode->state == TAOS_SYNC_STATE_FOLLOWER) {
|
||||
|
@ -86,7 +87,7 @@ int32_t syncNodeElect(SSyncNode* pSyncNode) {
|
|||
syncNodeCandidate2Leader(pSyncNode);
|
||||
pSyncNode->pVotesGranted->toLeader = true;
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
if (pSyncNode->replicaNum == 1) {
|
||||
// only myself, to leader
|
||||
|
@ -98,7 +99,6 @@ int32_t syncNodeElect(SSyncNode* pSyncNode) {
|
|||
syncNodeCandidate2Leader(pSyncNode);
|
||||
pSyncNode->pVotesGranted->toLeader = true;
|
||||
return ret;
|
||||
|
||||
}
|
||||
|
||||
ret = syncNodeRequestVotePeers(pSyncNode);
|
||||
|
|
|
@ -410,9 +410,11 @@ bool syncIsReadyForRead(int64_t rid) {
|
|||
pEntry = (SSyncRaftEntry*)taosLRUCacheValue(pCache, h);
|
||||
code = 0;
|
||||
|
||||
pSyncNode->pLogStore->cacheHit++;
|
||||
sNTrace(pSyncNode, "hit cache index:%" PRId64 ", bytes:%u, %p", lastIndex, pEntry->bytes, pEntry);
|
||||
|
||||
} else {
|
||||
pSyncNode->pLogStore->cacheMiss++;
|
||||
sNTrace(pSyncNode, "miss cache index:%" PRId64, lastIndex);
|
||||
|
||||
code = pSyncNode->pLogStore->syncLogGetEntry(pSyncNode->pLogStore, lastIndex, &pEntry);
|
||||
|
@ -1008,6 +1010,10 @@ SSyncNode* syncNodeOpen(SSyncInfo* pSyncInfo) {
|
|||
atomic_store_64(&pSyncNode->snapshottingIndex, SYNC_INDEX_INVALID);
|
||||
|
||||
pSyncNode->isStart = true;
|
||||
pSyncNode->electNum = 0;
|
||||
pSyncNode->becomeLeaderNum = 0;
|
||||
pSyncNode->configChangeNum = 0;
|
||||
|
||||
sNTrace(pSyncNode, "sync open, node:%p", pSyncNode);
|
||||
|
||||
return pSyncNode;
|
||||
|
@ -1340,6 +1346,8 @@ void syncNodeDoConfigChange(SSyncNode* pSyncNode, SSyncCfg* pNewConfig, SyncInde
|
|||
pSyncNode->pRaftCfg->cfg = *pNewConfig;
|
||||
pSyncNode->pRaftCfg->lastConfigIndex = lastConfigChangeIndex;
|
||||
|
||||
pSyncNode->configChangeNum++;
|
||||
|
||||
bool IamInOld = syncNodeInConfig(pSyncNode, &oldConfig);
|
||||
bool IamInNew = syncNodeInConfig(pSyncNode, pNewConfig);
|
||||
|
||||
|
@ -1363,7 +1371,7 @@ void syncNodeDoConfigChange(SSyncNode* pSyncNode, SSyncCfg* pNewConfig, SyncInde
|
|||
char newCfgStr[1024] = {0};
|
||||
syncCfg2SimpleStr(&oldConfig, oldCfgStr, sizeof(oldCfgStr));
|
||||
syncCfg2SimpleStr(pNewConfig, oldCfgStr, sizeof(oldCfgStr));
|
||||
sNTrace(pSyncNode, "begin do config change, from %s to %s", oldCfgStr, oldCfgStr);
|
||||
sNInfo(pSyncNode, "begin do config change, from %s to %s", oldCfgStr, oldCfgStr);
|
||||
|
||||
if (IamInNew) {
|
||||
pSyncNode->pRaftCfg->isStandBy = 0; // change isStandBy to normal
|
||||
|
@ -1495,13 +1503,13 @@ void syncNodeDoConfigChange(SSyncNode* pSyncNode, SSyncCfg* pNewConfig, SyncInde
|
|||
} else {
|
||||
// persist cfg
|
||||
raftCfgPersist(pSyncNode->pRaftCfg);
|
||||
sNTrace(pSyncNode, "do not config change from %d to %d, index:%" PRId64 ", %s --> %s", oldConfig.replicaNum,
|
||||
pNewConfig->replicaNum, lastConfigChangeIndex, oldCfgStr, newCfgStr);
|
||||
sNInfo(pSyncNode, "do not config change from %d to %d, index:%" PRId64 ", %s --> %s", oldConfig.replicaNum,
|
||||
pNewConfig->replicaNum, lastConfigChangeIndex, oldCfgStr, newCfgStr);
|
||||
}
|
||||
|
||||
_END:
|
||||
// log end config change
|
||||
sNTrace(pSyncNode, "end do config change, from %s to %s", oldCfgStr, newCfgStr);
|
||||
sNInfo(pSyncNode, "end do config change, from %s to %s", oldCfgStr, newCfgStr);
|
||||
}
|
||||
|
||||
// raft state change --------------
|
||||
|
@ -1598,6 +1606,8 @@ void syncNodeBecomeFollower(SSyncNode* pSyncNode, const char* debugStr) {
|
|||
void syncNodeBecomeLeader(SSyncNode* pSyncNode, const char* debugStr) {
|
||||
pSyncNode->leaderTime = taosGetTimestampMs();
|
||||
|
||||
pSyncNode->becomeLeaderNum++;
|
||||
|
||||
// reset restoreFinish
|
||||
pSyncNode->restoreFinish = false;
|
||||
|
||||
|
@ -1666,7 +1676,7 @@ void syncNodeBecomeLeader(SSyncNode* pSyncNode, const char* debugStr) {
|
|||
pSyncNode->minMatchIndex = SYNC_INDEX_INVALID;
|
||||
|
||||
// trace log
|
||||
sNTrace(pSyncNode, "become leader %s", debugStr);
|
||||
sNInfo(pSyncNode, "become leader %s", debugStr);
|
||||
}
|
||||
|
||||
void syncNodeCandidate2Leader(SSyncNode* pSyncNode) {
|
||||
|
@ -1842,9 +1852,11 @@ SyncTerm syncNodeGetPreTerm(SSyncNode* pSyncNode, SyncIndex index) {
|
|||
pPreEntry = (SSyncRaftEntry*)taosLRUCacheValue(pCache, h);
|
||||
code = 0;
|
||||
|
||||
pSyncNode->pLogStore->cacheHit++;
|
||||
sNTrace(pSyncNode, "hit cache index:%" PRId64 ", bytes:%u, %p", preIndex, pPreEntry->bytes, pPreEntry);
|
||||
|
||||
} else {
|
||||
pSyncNode->pLogStore->cacheMiss++;
|
||||
sNTrace(pSyncNode, "miss cache index:%" PRId64, preIndex);
|
||||
|
||||
code = pSyncNode->pLogStore->syncLogGetEntry(pSyncNode->pLogStore, preIndex, &pPreEntry);
|
||||
|
@ -1971,7 +1983,7 @@ static void syncNodeEqHeartbeatTimer(void* param, void* tmrId) {
|
|||
return;
|
||||
}
|
||||
|
||||
sTrace("enqueue heartbeat timer");
|
||||
sTrace("vgId:%d, enqueue heartbeat timer", pNode->vgId);
|
||||
code = pNode->syncEqMsg(pNode->msgcb, &rpcMsg);
|
||||
if (code != 0) {
|
||||
sError("failed to enqueue heartbeat msg since %s", terrstr());
|
||||
|
@ -2526,9 +2538,11 @@ int32_t syncNodeDoCommit(SSyncNode* ths, SyncIndex beginIndex, SyncIndex endInde
|
|||
if (h) {
|
||||
pEntry = (SSyncRaftEntry*)taosLRUCacheValue(pCache, h);
|
||||
|
||||
ths->pLogStore->cacheHit++;
|
||||
sNTrace(ths, "hit cache index:%" PRId64 ", bytes:%u, %p", i, pEntry->bytes, pEntry);
|
||||
|
||||
} else {
|
||||
ths->pLogStore->cacheMiss++;
|
||||
sNTrace(ths, "miss cache index:%" PRId64, i);
|
||||
|
||||
code = ths->pLogStore->syncLogGetEntry(ths->pLogStore, i, &pEntry);
|
||||
|
|
|
@ -45,6 +45,9 @@ SSyncLogStore* logStoreCreate(SSyncNode* pSyncNode) {
|
|||
return NULL;
|
||||
}
|
||||
|
||||
pLogStore->cacheHit = 0;
|
||||
pLogStore->cacheMiss = 0;
|
||||
|
||||
taosLRUCacheSetStrictCapacity(pLogStore->pCache, false);
|
||||
|
||||
pLogStore->data = taosMemoryMalloc(sizeof(SSyncLogStoreData));
|
||||
|
|
|
@ -80,9 +80,11 @@ int32_t syncNodeReplicateOne(SSyncNode* pSyncNode, SRaftId* pDestId, bool snapsh
|
|||
pEntry = (SSyncRaftEntry*)taosLRUCacheValue(pCache, h);
|
||||
code = 0;
|
||||
|
||||
pSyncNode->pLogStore->cacheHit++;
|
||||
sNTrace(pSyncNode, "hit cache index:%" PRId64 ", bytes:%u, %p", nextIndex, pEntry->bytes, pEntry);
|
||||
|
||||
} else {
|
||||
pSyncNode->pLogStore->cacheMiss++;
|
||||
sNTrace(pSyncNode, "miss cache index:%" PRId64, nextIndex);
|
||||
|
||||
code = pSyncNode->pLogStore->syncLogGetEntry(pSyncNode->pLogStore, nextIndex, &pEntry);
|
||||
|
|
|
@ -52,7 +52,7 @@ static void syncNodeCleanConfigIndex(SSyncNode* ths) {
|
|||
}
|
||||
|
||||
static int32_t syncNodeTimerRoutine(SSyncNode* ths) {
|
||||
sNTrace(ths, "timer routines");
|
||||
sNInfo(ths, "timer routines");
|
||||
|
||||
// timer replicate
|
||||
syncNodeReplicate(ths);
|
||||
|
|
|
@ -242,6 +242,9 @@ void syncPrintNodeLog(const char* flags, ELogLevel level, int32_t dflag, SSyncNo
|
|||
logBeginIndex = pNode->pLogStore->syncLogBeginIndex(pNode->pLogStore);
|
||||
}
|
||||
|
||||
int32_t cacheHit = pNode->pLogStore->cacheHit;
|
||||
int32_t cacheMiss = pNode->pLogStore->cacheMiss;
|
||||
|
||||
char cfgStr[1024];
|
||||
if (pNode->pRaftCfg != NULL) {
|
||||
syncCfg2SimpleStr(&(pNode->pRaftCfg->cfg), cfgStr, sizeof(cfgStr));
|
||||
|
@ -275,18 +278,18 @@ void syncPrintNodeLog(const char* flags, ELogLevel level, int32_t dflag, SSyncNo
|
|||
terrno = errCode;
|
||||
|
||||
if (pNode != NULL && pNode->pRaftCfg != NULL) {
|
||||
taosPrintLog(flags, level, dflag,
|
||||
"vgId:%d, sync %s "
|
||||
"%s"
|
||||
", tm:%" PRIu64 ", cmt:%" PRId64 ", fst:%" PRId64 ", lst:%" PRId64 ", min:%" PRId64 ", snap:%" PRId64
|
||||
", snap-tm:%" PRIu64 ", sby:%d, aq:%d, snaping:%" PRId64 ", r-num:%d, lcfg:%" PRId64
|
||||
", chging:%d, rsto:%d, dquorum:%d, elt:%" PRId64 ", hb:%" PRId64 ", %s, %s, %s, %s",
|
||||
pNode->vgId, syncStr(pNode->state), eventLog, currentTerm, pNode->commitIndex, logBeginIndex,
|
||||
logLastIndex, pNode->minMatchIndex, snapshot.lastApplyIndex, snapshot.lastApplyTerm,
|
||||
pNode->pRaftCfg->isStandBy, aqItems, pNode->snapshottingIndex, pNode->replicaNum,
|
||||
pNode->pRaftCfg->lastConfigIndex, pNode->changing, pNode->restoreFinish, quorum,
|
||||
pNode->electTimerLogicClock, pNode->heartbeatTimerLogicClockUser, peerStr, cfgStr, hbTimeStr,
|
||||
hbrTimeStr);
|
||||
taosPrintLog(
|
||||
flags, level, dflag,
|
||||
"vgId:%d, sync %s "
|
||||
"%s"
|
||||
", tm:%" PRIu64 ", cmt:%" PRId64 ", fst:%" PRId64 ", lst:%" PRId64 ", min:%" PRId64 ", snap:%" PRId64
|
||||
", snap-tm:%" PRIu64 ", elt-num:%d, bl-num:%d, cc-num:%d, hit:%d, mis:%d, aq:%d, snaping:%" PRId64
|
||||
", r-num:%d, lcfg:%" PRId64 ", chging:%d, rsto:%d, dquorum:%d, elt:%" PRId64 ", hb:%" PRId64 ", %s, %s, %s, %s",
|
||||
pNode->vgId, syncStr(pNode->state), eventLog, currentTerm, pNode->commitIndex, logBeginIndex, logLastIndex,
|
||||
pNode->minMatchIndex, snapshot.lastApplyIndex, snapshot.lastApplyTerm, pNode->electNum, pNode->becomeLeaderNum,
|
||||
pNode->configChangeNum, cacheHit, cacheMiss, aqItems, pNode->snapshottingIndex, pNode->replicaNum,
|
||||
pNode->pRaftCfg->lastConfigIndex, pNode->changing, pNode->restoreFinish, quorum, pNode->electTimerLogicClock,
|
||||
pNode->heartbeatTimerLogicClockUser, peerStr, cfgStr, hbTimeStr, hbrTimeStr);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -438,7 +441,8 @@ void syncLogRecvAppendEntriesReply(SSyncNode* pSyncNode, const SyncAppendEntries
|
|||
host, port, pMsg->term, pMsg->privateTerm, pMsg->success, pMsg->lastSendIndex, pMsg->matchIndex, s);
|
||||
}
|
||||
|
||||
void syncLogSendHeartbeat(SSyncNode* pSyncNode, const SyncHeartbeat* pMsg, bool printX, int64_t timerElapsed, int64_t execTime) {
|
||||
void syncLogSendHeartbeat(SSyncNode* pSyncNode, const SyncHeartbeat* pMsg, bool printX, int64_t timerElapsed,
|
||||
int64_t execTime) {
|
||||
if (!(sDebugFlag & DEBUG_TRACE)) return;
|
||||
|
||||
char host[64];
|
||||
|
|
Loading…
Reference in New Issue