From ab3f6619b979e01144c8692d984f3c90baabe2e9 Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Wed, 14 Sep 2022 23:29:38 +0800 Subject: [PATCH 1/5] enh(tsc): handle deadlock --- source/client/src/clientHb.c | 33 +++++++++++++++++++++++++++------ 1 file changed, 27 insertions(+), 6 deletions(-) diff --git a/source/client/src/clientHb.c b/source/client/src/clientHb.c index 84a827ed78..fc03a66655 100644 --- a/source/client/src/clientHb.c +++ b/source/client/src/clientHb.c @@ -173,7 +173,8 @@ static int32_t hbQueryHbRspHandle(SAppHbMgr *pAppHbMgr, SClientHbRsp *pRsp) { pTscObj->pAppInfo->totalDnodes = pRsp->query->totalDnodes; pTscObj->pAppInfo->onlineDnodes = pRsp->query->onlineDnodes; pTscObj->connId = pRsp->query->connId; - tscTrace("conn %p hb rsp, dnodes %d/%d", pTscObj->connId, pTscObj->pAppInfo->onlineDnodes, pTscObj->pAppInfo->totalDnodes); + tscTrace("conn %p hb rsp, dnodes %d/%d", pTscObj->connId, pTscObj->pAppInfo->onlineDnodes, + pTscObj->pAppInfo->totalDnodes); if (pRsp->query->killRid) { tscDebug("request rid %" PRIx64 " need to be killed now", pRsp->query->killRid); @@ -297,7 +298,8 @@ static int32_t hbAsyncCallBack(void *param, SDataBuf *pMsg, int32_t code) { if (code != 0) { (*pInst)->onlineDnodes = ((*pInst)->totalDnodes ? 0 : -1); - tscDebug("hb rsp error %s, update server status %d/%d", tstrerror(code), (*pInst)->onlineDnodes, (*pInst)->totalDnodes); + tscDebug("hb rsp error %s, update server status %d/%d", tstrerror(code), (*pInst)->onlineDnodes, + (*pInst)->totalDnodes); } if (rspNum) { @@ -654,6 +656,8 @@ int32_t hbGatherAppInfo(void) { for (int32_t i = 0; i < sz; ++i) { SAppHbMgr *pAppHbMgr = taosArrayGetP(clientHbMgr.appHbMgrs, i); + if (pAppHbMgr == NULL) continue; + uint64_t clusterId = pAppHbMgr->pAppInstInfo->clusterId; SAppHbReq *pApp = taosHashGet(clientHbMgr.appSummary, &clusterId, sizeof(clusterId)); if (NULL == pApp) { @@ -691,15 +695,20 @@ static void *hbThreadFunc(void *param) { hbGatherAppInfo(); } + SArray *mgr = taosArrayInit(sz, sizeof(void *)); for (int i = 0; i < sz; i++) { SAppHbMgr *pAppHbMgr = taosArrayGetP(clientHbMgr.appHbMgrs, i); + if (pAppHbMgr == NULL) { + continue; + } int32_t connCnt = atomic_load_32(&pAppHbMgr->connKeyCnt); if (connCnt == 0) { continue; } SClientHbBatchReq *pReq = hbGatherAllInfo(pAppHbMgr); - if (pReq == NULL) { + if (pReq == NULL || taosArrayGetP(clientHbMgr.appHbMgrs, i) == NULL) { + tFreeClientHbBatchReq(pReq); continue; } int tlen = tSerializeSClientHbBatchReq(NULL, 0, pReq); @@ -726,7 +735,7 @@ static void *hbThreadFunc(void *param) { pInfo->msgInfo.len = tlen; pInfo->msgType = TDMT_MND_HEARTBEAT; pInfo->param = strdup(pAppHbMgr->key); - pInfo->paramFreeFp = taosMemoryFree; + pInfo->paramFreeFp = taosMemoryFree; pInfo->requestId = generateRequestId(); pInfo->requestObjRefId = 0; @@ -738,8 +747,12 @@ static void *hbThreadFunc(void *param) { // hbClearReqInfo(pAppHbMgr); atomic_add_fetch_32(&pAppHbMgr->reportCnt, 1); + taosArrayPush(mgr, &pAppHbMgr); } + taosArrayDestroy(clientHbMgr.appHbMgrs); + clientHbMgr.appHbMgrs = mgr; + taosThreadMutexUnlock(&clientHbMgr.lock); taosMsleep(HEARTBEAT_INTERVAL); @@ -831,7 +844,7 @@ void hbRemoveAppHbMrg(SAppHbMgr **pAppHbMgr) { if (pItem == *pAppHbMgr) { hbFreeAppHbMgr(*pAppHbMgr); *pAppHbMgr = NULL; - taosArrayRemove(clientHbMgr.appHbMgrs, i); + taosArraySet(clientHbMgr.appHbMgrs, i, NULL); break; } } @@ -856,7 +869,15 @@ int hbMgrInit() { clientHbMgr.appSummary = taosHashInit(10, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BIGINT), false, HASH_NO_LOCK); clientHbMgr.appHbMgrs = taosArrayInit(0, sizeof(void *)); - taosThreadMutexInit(&clientHbMgr.lock, NULL); + + TdThreadMutexAttr attr = {0}; + taosThreadMutexAttrSetType(&attr, PTHREAD_MUTEX_RECURSIVE); + + int ret = taosThreadMutexAttrInit(&attr); + assert(ret == 0); + + taosThreadMutexInit(&clientHbMgr.lock, &attr); + taosThreadMutexAttrDestroy(&attr); // init handle funcs hbMgrInitHandle(); From ef18966fa95ffbe23dcff2a1c520ad8517b89dae Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Thu, 15 Sep 2022 09:53:32 +0800 Subject: [PATCH 2/5] enh(tsc): handle deadlock --- source/client/src/clientHb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/client/src/clientHb.c b/source/client/src/clientHb.c index fc03a66655..cf968937ac 100644 --- a/source/client/src/clientHb.c +++ b/source/client/src/clientHb.c @@ -844,7 +844,7 @@ void hbRemoveAppHbMrg(SAppHbMgr **pAppHbMgr) { if (pItem == *pAppHbMgr) { hbFreeAppHbMgr(*pAppHbMgr); *pAppHbMgr = NULL; - taosArraySet(clientHbMgr.appHbMgrs, i, NULL); + taosArraySet(clientHbMgr.appHbMgrs, i, pAppHbMgr); break; } } From 1091302e8d948737c101d1cd1e0d4e2062d23ab3 Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Thu, 15 Sep 2022 10:30:17 +0800 Subject: [PATCH 3/5] fix: fix mem leak --- source/client/src/clientHb.c | 1 + 1 file changed, 1 insertion(+) diff --git a/source/client/src/clientHb.c b/source/client/src/clientHb.c index cf968937ac..9b85d403be 100644 --- a/source/client/src/clientHb.c +++ b/source/client/src/clientHb.c @@ -855,6 +855,7 @@ void appHbMgrCleanup(void) { int sz = taosArrayGetSize(clientHbMgr.appHbMgrs); for (int i = 0; i < sz; i++) { SAppHbMgr *pTarget = taosArrayGetP(clientHbMgr.appHbMgrs, i); + if (pTarget == NULL) continue; hbFreeAppHbMgr(pTarget); } } From 091c8ecfcd07f716873ecbcad3d5970c438cdfef Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Thu, 15 Sep 2022 11:30:27 +0800 Subject: [PATCH 4/5] enh(tsc): handle deadlock --- source/client/src/clientHb.c | 1 - 1 file changed, 1 deletion(-) diff --git a/source/client/src/clientHb.c b/source/client/src/clientHb.c index 9b85d403be..1cf53881a8 100644 --- a/source/client/src/clientHb.c +++ b/source/client/src/clientHb.c @@ -873,7 +873,6 @@ int hbMgrInit() { TdThreadMutexAttr attr = {0}; taosThreadMutexAttrSetType(&attr, PTHREAD_MUTEX_RECURSIVE); - int ret = taosThreadMutexAttrInit(&attr); assert(ret == 0); From fe29f000193f79977b79b45fe33194c988e1078c Mon Sep 17 00:00:00 2001 From: yihaoDeng Date: Thu, 15 Sep 2022 17:23:48 +0800 Subject: [PATCH 5/5] fix: fix deadlock --- source/client/src/clientHb.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/source/client/src/clientHb.c b/source/client/src/clientHb.c index a7e42a01a3..7ce80553a0 100644 --- a/source/client/src/clientHb.c +++ b/source/client/src/clientHb.c @@ -707,6 +707,7 @@ static void *hbThreadFunc(void *param) { int32_t connCnt = atomic_load_32(&pAppHbMgr->connKeyCnt); if (connCnt == 0) { + taosArrayPush(mgr, &pAppHbMgr); continue; } SClientHbBatchReq *pReq = hbGatherAllInfo(pAppHbMgr); @@ -720,6 +721,7 @@ static void *hbThreadFunc(void *param) { terrno = TSDB_CODE_TSC_OUT_OF_MEMORY; tFreeClientHbBatchReq(pReq); // hbClearReqInfo(pAppHbMgr); + taosArrayPush(mgr, &pAppHbMgr); break; } @@ -731,6 +733,7 @@ static void *hbThreadFunc(void *param) { tFreeClientHbBatchReq(pReq); // hbClearReqInfo(pAppHbMgr); taosMemoryFree(buf); + taosArrayPush(mgr, &pAppHbMgr); break; } pInfo->fp = hbAsyncCallBack;