Merge pull request #20300 from taosdata/FIX/TD-23009-main

enh: rotate mnode epSet on sending dnode status req failure
This commit is contained in:
Shengliang Guan 2023-03-07 17:21:26 +08:00 committed by GitHub
commit 0874b71bf2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 38 additions and 6 deletions

View File

@ -61,6 +61,16 @@ static void dmProcessStatusRsp(SDnodeMgmt *pMgmt, SRpcMsg *pRsp) {
rpcFreeCont(pRsp->pCont);
}
void dmEpSetToStr(char *buf, int32_t len, SEpSet *epSet) {
int32_t n = 0;
n += snprintf(buf + n, len - n, "%s", "{");
for (int i = 0; i < epSet->numOfEps; i++) {
n += snprintf(buf + n, len - n, "%s:%d%s", epSet->eps[i].fqdn, epSet->eps[i].port,
(i + 1 < epSet->numOfEps ? ", " : ""));
}
n += snprintf(buf + n, len - n, "%s", "}");
}
void dmSendStatusReq(SDnodeMgmt *pMgmt) {
SStatusReq req = {0};
@ -119,11 +129,10 @@ void dmSendStatusReq(SDnodeMgmt *pMgmt) {
dmGetMnodeEpSet(pMgmt->pData, &epSet);
rpcSendRecv(pMgmt->msgCb.clientRpc, &epSet, &rpcMsg, &rpcRsp);
if (rpcRsp.code != 0) {
dError("failed to send status req since %s, numOfEps:%d inUse:%d", tstrerror(rpcRsp.code), epSet.numOfEps,
epSet.inUse);
for (int32_t i = 0; i < epSet.numOfEps; ++i) {
dDebug("index:%d, mnode ep:%s:%u", i, epSet.eps[i].fqdn, epSet.eps[i].port);
}
dmRotateMnodeEpSet(pMgmt->pData);
char tbuf[256];
dmEpSetToStr(tbuf, sizeof(tbuf), &epSet);
dError("failed to send status req since %s, epSet:%s, inUse:%d", tstrerror(rpcRsp.code), tbuf, epSet.inUse);
}
dmProcessStatusRsp(pMgmt, &rpcRsp);
}

View File

@ -166,6 +166,7 @@ int32_t dmReadEps(SDnodeData *pData);
int32_t dmWriteEps(SDnodeData *pData);
void dmUpdateEps(SDnodeData *pData, SArray *pDnodeEps);
void dmGetMnodeEpSet(SDnodeData *pData, SEpSet *pEpSet);
void dmRotateMnodeEpSet(SDnodeData *pData);
void dmGetMnodeEpSetForRedirect(SDnodeData *pData, SRpcMsg *pMsg, SEpSet *pEpSet);
void dmSetMnodeEpSet(SDnodeData *pData, SEpSet *pEpSet);
bool dmUpdateDnodeInfo(void *pData, int32_t *dnodeId, int64_t *clusterId, char *fqdn, uint16_t *port);

View File

@ -325,6 +325,28 @@ void dmGetMnodeEpSet(SDnodeData *pData, SEpSet *pEpSet) {
taosThreadRwlockUnlock(&pData->lock);
}
static FORCE_INLINE void dmSwapEps(SEp *epLhs, SEp *epRhs) {
SEp epTmp;
epTmp.port = epLhs->port;
tstrncpy(epTmp.fqdn, epLhs->fqdn, tListLen(epTmp.fqdn));
epLhs->port = epRhs->port;
tstrncpy(epLhs->fqdn, epRhs->fqdn, tListLen(epLhs->fqdn));
epRhs->port = epTmp.port;
tstrncpy(epRhs->fqdn, epTmp.fqdn, tListLen(epRhs->fqdn));
}
void dmRotateMnodeEpSet(SDnodeData *pData) {
taosThreadRwlockRdlock(&pData->lock);
SEpSet *pEpSet = &pData->mnodeEps;
for (int i = 1; i < pEpSet->numOfEps; i++) {
dmSwapEps(&pEpSet->eps[i - 1], &pEpSet->eps[i]);
}
taosThreadRwlockUnlock(&pData->lock);
}
void dmGetMnodeEpSetForRedirect(SDnodeData *pData, SRpcMsg *pMsg, SEpSet *pEpSet) {
dmGetMnodeEpSet(pData, pEpSet);
dTrace("msg is redirected, handle:%p num:%d use:%d", pMsg->info.handle, pEpSet->numOfEps, pEpSet->inUse);

View File

@ -139,7 +139,7 @@ int32_t raftStoreWriteFile(SSyncNode *pNode) {
if (taosRenameFile(file, realfile) != 0) goto _OVER;
code = 0;
sInfo("vgId:%d, succeed to write raft store file:%s, len:%d", pNode->vgId, realfile, len);
sInfo("vgId:%d, succeed to write raft store file:%s, term:%" PRId64, pNode->vgId, realfile, pStore->currentTerm);
_OVER:
if (pJson != NULL) tjsonDelete(pJson);