From 4d77d321df142b9f9371111dcaf10e63ab06a43c Mon Sep 17 00:00:00 2001 From: wangmm0220 Date: Thu, 27 Jun 2024 14:13:18 +0800 Subject: [PATCH 01/27] fix:print error msg --- source/client/src/clientMonitor.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/source/client/src/clientMonitor.c b/source/client/src/clientMonitor.c index 9e990dd545..3067961696 100644 --- a/source/client/src/clientMonitor.c +++ b/source/client/src/clientMonitor.c @@ -147,7 +147,8 @@ static void monitorReadSendSlowLog(TdFilePtr pFile, void* pTransporter, SEpSet * while(1){ int64_t readSize = taosReadFile(pFile, buf + offset, SLOW_LOG_SEND_SIZE - offset); if (readSize <= 0) { - uError("failed to read len from file:%p since %s", pFile, terrstr()); + if (readSize < 0) + uError("failed to read len from file:%p since %s", pFile, terrstr()); return; } @@ -423,7 +424,7 @@ void monitorCounterInc(int64_t clusterId, const char* counterName, const char** MonitorClient* pMonitor = *ppMonitor; taos_counter_t** ppCounter = (taos_counter_t**)taosHashGet(pMonitor->counters, counterName, strlen(counterName)); - if (ppCounter == NULL || *ppCounter != NULL) { + if (ppCounter == NULL || *ppCounter == NULL) { uError("monitorCounterInc not found pCounter %"PRIx64":%s.", clusterId, counterName); goto end; } @@ -543,10 +544,6 @@ static void* monitorThreadFunc(void *param){ } #endif - if (-1 != atomic_val_compare_exchange_32(&slowLogFlag, -1, 0)) { - return NULL; - } - char tmpPath[PATH_MAX] = {0}; if (getSlowLogTmpDir(tmpPath, sizeof(tmpPath)) < 0){ return NULL; @@ -568,6 +565,10 @@ static void* monitorThreadFunc(void *param){ uError("open queue error since %s", terrstr()); return NULL; } + + if (-1 != atomic_val_compare_exchange_32(&slowLogFlag, -1, 0)) { + return NULL; + } uDebug("monitorThreadFunc start"); while (1) { if (slowLogFlag > 0) break; @@ -666,8 +667,8 @@ int32_t monitorPutData2MonitorQueue(int64_t clusterId, char* value){ slowLogData->clusterId = clusterId; slowLogData->value = value; uDebug("[monitor] write slow log to queue, clusterId:%"PRIx64 " value:%s", slowLogData->clusterId, slowLogData->value); - while (monitorQueue == NULL) { - taosMsleep(100); + while (atomic_load_32(&slowLogFlag) == -1) { + taosMsleep(5); } if (taosWriteQitem(monitorQueue, slowLogData) == 0){ tsem2_post(&monitorSem); From e73f6ba47e44159d606a8cd1f3b32d0554e7760f Mon Sep 17 00:00:00 2001 From: wangmm0220 Date: Thu, 27 Jun 2024 16:05:49 +0800 Subject: [PATCH 02/27] fix:[TD-30769]make monitorInterval effect right now --- source/client/src/clientHb.c | 8 ++++++++ source/client/src/clientMonitor.c | 31 ++++++++++++++++++++++++++----- 2 files changed, 34 insertions(+), 5 deletions(-) diff --git a/source/client/src/clientHb.c b/source/client/src/clientHb.c index 19b6655af1..0a480e1cbd 100644 --- a/source/client/src/clientHb.c +++ b/source/client/src/clientHb.c @@ -19,6 +19,7 @@ #include "scheduler.h" #include "trpc.h" #include "tglobal.h" +#include "clientMonitor.h" typedef struct { union { @@ -546,7 +547,14 @@ static int32_t hbAsyncCallBack(void *param, SDataBuf *pMsg, int32_t code) { } SAppInstInfo *pInst = pAppHbMgr->pAppInstInfo; + int32_t oldInterval = pInst->monitorParas.tsMonitorInterval; pInst->monitorParas = pRsp.monitorParas; + if(oldInterval > pInst->monitorParas.tsMonitorInterval){ + char* value = taosStrdup(""); + if(monitorPutData2MonitorQueue(pInst->clusterId, value) < 0){ + taosMemoryFree(value); + } + } tscDebug("[monitor] paras from hb, clusterId:%" PRIx64 " monitorParas threshold:%d scope:%d", pInst->clusterId, pRsp.monitorParas.tsSlowLogThreshold, pRsp.monitorParas.tsSlowLogScope); diff --git a/source/client/src/clientMonitor.c b/source/client/src/clientMonitor.c index 3067961696..260a0ebbaa 100644 --- a/source/client/src/clientMonitor.c +++ b/source/client/src/clientMonitor.c @@ -221,8 +221,8 @@ static void reportSendProcess(void* param, void* tmrId) { SEpSet ep = getEpSet_s(&pInst->mgmtEp); generateClusterReport(pMonitor->registry, pInst->pTransporter, &ep); - taosRUnLockLatch(&monitorLock); taosTmrReset(reportSendProcess, pInst->monitorParas.tsMonitorInterval * 1000, param, monitorTimer, &tmrId); + taosRUnLockLatch(&monitorLock); } static void sendAllSlowLog(){ @@ -450,7 +450,7 @@ static void monitorFreeSlowLogData(MonitorSlowLogData* pData) { static void monitorThreadFuncUnexpectedStopped(void) { atomic_store_32(&slowLogFlag, -1); } static void reportSlowLog(void* param, void* tmrId) { - taosRLockLatch(&monitorLock); + taosWLockLatch(&monitorLock); if (atomic_load_32(&monitorFlag) == 1) { taosRUnLockLatch(&monitorLock); return; @@ -471,9 +471,11 @@ static void reportSlowLog(void* param, void* tmrId) { SEpSet ep = getEpSet_s(&pInst->mgmtEp); monitorReadSendSlowLog((*(SlowLogClient**)tmp)->pFile, pInst->pTransporter, &ep); - taosRUnLockLatch(&monitorLock); - taosTmrReset(reportSlowLog, pInst->monitorParas.tsMonitorInterval * 1000, param, monitorTimer, &tmrId); + if((*(SlowLogClient**)tmp)->timer == tmrId){ + taosTmrReset(reportSlowLog, pInst->monitorParas.tsMonitorInterval * 1000, param, monitorTimer, &(*(SlowLogClient**)tmp)->timer); + } + taosWUnLockLatch(&monitorLock); } static void monitorWriteSlowLog2File(MonitorSlowLogData* slowLogData, char *tmpPath){ @@ -535,6 +537,23 @@ FAILED: taosWUnLockLatch(&monitorLock); } +static void restartReportTimer(int64_t clusterId){ + taosWLockLatch(&monitorLock); + + void* tmp = taosHashGet(monitorSlowLogHash, &clusterId, LONG_BYTES); + if(tmp){ + taosTmrStopA(&(*(SlowLogClient**)tmp)->timer); + SAppInstInfo* pInst = getAppInstByClusterId(clusterId); + if(pInst == NULL){ + uError("failed to get app inst, clusterId:%"PRIx64, clusterId); + return; + } + (*(SlowLogClient**)tmp)->timer = taosTmrStart(reportSlowLog, pInst->monitorParas.tsMonitorInterval * 1000, (void*)clusterId, monitorTimer); + + } + taosWUnLockLatch(&monitorLock); +} + static void* monitorThreadFunc(void *param){ setThreadName("client-monitor-slowlog"); @@ -579,7 +598,9 @@ static void* monitorThreadFunc(void *param){ uDebug("[monitor] read slow log data from queue, clusterId:%" PRIx64 " value:%s", slowLogData->clusterId, slowLogData->value); if (slowLogData->value == NULL){ monitorSendAllSlowLogFromTempDir(slowLogData->clusterId); - }else{ + } else if(strlen(slowLogData->value) == 0){ + restartReportTimer(slowLogData->clusterId); + } else{ monitorWriteSlowLog2File(slowLogData, tmpPath); } } From ad6eb205241de8eb7846d9121f2268a03f34946d Mon Sep 17 00:00:00 2001 From: wangmm0220 Date: Thu, 27 Jun 2024 16:52:10 +0800 Subject: [PATCH 03/27] fix:disable slow log monitor --- source/client/src/clientMonitor.c | 1 + 1 file changed, 1 insertion(+) diff --git a/source/client/src/clientMonitor.c b/source/client/src/clientMonitor.c index 260a0ebbaa..1c636a5505 100644 --- a/source/client/src/clientMonitor.c +++ b/source/client/src/clientMonitor.c @@ -680,6 +680,7 @@ void monitorClose() { } int32_t monitorPutData2MonitorQueue(int64_t clusterId, char* value){ + return -1; // disable slow log monitor MonitorSlowLogData* slowLogData = taosAllocateQitem(sizeof(MonitorSlowLogData), DEF_QITEM, 0); if (slowLogData == NULL) { uError("[monitor] failed to allocate slow log data"); From b5a29541a3e14d770e1557423a8db0e401f519e5 Mon Sep 17 00:00:00 2001 From: wangmm0220 Date: Fri, 28 Jun 2024 09:14:01 +0800 Subject: [PATCH 04/27] fix:type error in stmt & open slow log --- source/client/src/clientMonitor.c | 1 - source/client/src/clientSml.c | 3 ++- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/source/client/src/clientMonitor.c b/source/client/src/clientMonitor.c index 1c636a5505..260a0ebbaa 100644 --- a/source/client/src/clientMonitor.c +++ b/source/client/src/clientMonitor.c @@ -680,7 +680,6 @@ void monitorClose() { } int32_t monitorPutData2MonitorQueue(int64_t clusterId, char* value){ - return -1; // disable slow log monitor MonitorSlowLogData* slowLogData = taosAllocateQitem(sizeof(MonitorSlowLogData), DEF_QITEM, 0); if (slowLogData == NULL) { uError("[monitor] failed to allocate slow log data"); diff --git a/source/client/src/clientSml.c b/source/client/src/clientSml.c index a771fc1635..7e63dcd36a 100644 --- a/source/client/src/clientSml.c +++ b/source/client/src/clientSml.c @@ -1380,7 +1380,7 @@ void freeSSmlKv(void *data) { void smlDestroyInfo(SSmlHandle *info) { if (!info) return; - qDestroyQuery(info->pQuery); +// qDestroyQuery(info->pQuery); taosHashCleanup(info->pVgHash); taosHashCleanup(info->childTables); @@ -1912,6 +1912,7 @@ TAOS_RES *taos_schemaless_insert_inner(TAOS *taos, char *lines[], char *rawLine, return (TAOS_RES *)request; } info->pRequest = request; + info->pRequest->pQuery = info->pQuery; info->ttl = ttl; info->precision = precision; info->protocol = (TSDB_SML_PROTOCOL_TYPE)protocol; From 328dfa3146d320a2e291fe2bd63f8cf5db6ae6ff Mon Sep 17 00:00:00 2001 From: wangmm0220 Date: Fri, 28 Jun 2024 20:58:48 +0800 Subject: [PATCH 05/27] fix:remove using alarm when sending slow log & use one thread to send all slow log --- include/libs/monitor/clientMonitor.h | 3 +- source/client/src/clientEnv.c | 34 ++--- source/client/src/clientHb.c | 6 - source/client/src/clientMonitor.c | 189 ++++++++++++--------------- 4 files changed, 99 insertions(+), 133 deletions(-) diff --git a/include/libs/monitor/clientMonitor.h b/include/libs/monitor/clientMonitor.h index 68b55e71a9..a8cdbe4ad1 100644 --- a/include/libs/monitor/clientMonitor.h +++ b/include/libs/monitor/clientMonitor.h @@ -43,7 +43,8 @@ typedef struct { typedef struct { TdFilePtr pFile; - void* timer; + int64_t lastCheckTime; + char path[PATH_MAX]; } SlowLogClient; typedef struct { diff --git a/source/client/src/clientEnv.c b/source/client/src/clientEnv.c index af396876bb..1248f87f19 100644 --- a/source/client/src/clientEnv.c +++ b/source/client/src/clientEnv.c @@ -202,24 +202,24 @@ static void deregisterRequest(SRequestObj *pRequest) { pRequest->self, pTscObj->id, pRequest->requestId, duration / 1000.0, num, currentInst); if (TSDB_CODE_SUCCESS == nodesSimAcquireAllocator(pRequest->allocatorRefId)) { - if (pRequest->pQuery && pRequest->pQuery->pRoot) { - if (QUERY_NODE_VNODE_MODIFY_STMT == pRequest->pQuery->pRoot->type && - (0 == ((SVnodeModifyOpStmt *)pRequest->pQuery->pRoot)->sqlNodeType)) { - tscDebug("insert duration %" PRId64 "us: parseCost:%" PRId64 "us, ctgCost:%" PRId64 "us, analyseCost:%" PRId64 - "us, planCost:%" PRId64 "us, exec:%" PRId64 "us", - duration, pRequest->metric.parseCostUs, pRequest->metric.ctgCostUs, pRequest->metric.analyseCostUs, - pRequest->metric.planCostUs, pRequest->metric.execCostUs); - atomic_add_fetch_64((int64_t *)&pActivity->insertElapsedTime, duration); - reqType = SLOW_LOG_TYPE_INSERT; - } else if (QUERY_NODE_SELECT_STMT == pRequest->stmtType) { - tscDebug("query duration %" PRId64 "us: parseCost:%" PRId64 "us, ctgCost:%" PRId64 "us, analyseCost:%" PRId64 - "us, planCost:%" PRId64 "us, exec:%" PRId64 "us", - duration, pRequest->metric.parseCostUs, pRequest->metric.ctgCostUs, pRequest->metric.analyseCostUs, - pRequest->metric.planCostUs, pRequest->metric.execCostUs); + if ((pRequest->pQuery && pRequest->pQuery->pRoot && + QUERY_NODE_VNODE_MODIFY_STMT == pRequest->pQuery->pRoot->type && + (0 == ((SVnodeModifyOpStmt *)pRequest->pQuery->pRoot)->sqlNodeType)) || + QUERY_NODE_VNODE_MODIFY_STMT == pRequest->stmtType) { + tscDebug("insert duration %" PRId64 "us: parseCost:%" PRId64 "us, ctgCost:%" PRId64 "us, analyseCost:%" PRId64 + "us, planCost:%" PRId64 "us, exec:%" PRId64 "us", + duration, pRequest->metric.parseCostUs, pRequest->metric.ctgCostUs, pRequest->metric.analyseCostUs, + pRequest->metric.planCostUs, pRequest->metric.execCostUs); + atomic_add_fetch_64((int64_t *)&pActivity->insertElapsedTime, duration); + reqType = SLOW_LOG_TYPE_INSERT; + } else if (QUERY_NODE_SELECT_STMT == pRequest->stmtType) { + tscDebug("query duration %" PRId64 "us: parseCost:%" PRId64 "us, ctgCost:%" PRId64 "us, analyseCost:%" PRId64 + "us, planCost:%" PRId64 "us, exec:%" PRId64 "us", + duration, pRequest->metric.parseCostUs, pRequest->metric.ctgCostUs, pRequest->metric.analyseCostUs, + pRequest->metric.planCostUs, pRequest->metric.execCostUs); - atomic_add_fetch_64((int64_t *)&pActivity->queryElapsedTime, duration); - reqType = SLOW_LOG_TYPE_QUERY; - } + atomic_add_fetch_64((int64_t *)&pActivity->queryElapsedTime, duration); + reqType = SLOW_LOG_TYPE_QUERY; } nodesSimReleaseAllocator(pRequest->allocatorRefId); diff --git a/source/client/src/clientHb.c b/source/client/src/clientHb.c index 0a480e1cbd..07a9e0c463 100644 --- a/source/client/src/clientHb.c +++ b/source/client/src/clientHb.c @@ -549,12 +549,6 @@ static int32_t hbAsyncCallBack(void *param, SDataBuf *pMsg, int32_t code) { SAppInstInfo *pInst = pAppHbMgr->pAppInstInfo; int32_t oldInterval = pInst->monitorParas.tsMonitorInterval; pInst->monitorParas = pRsp.monitorParas; - if(oldInterval > pInst->monitorParas.tsMonitorInterval){ - char* value = taosStrdup(""); - if(monitorPutData2MonitorQueue(pInst->clusterId, value) < 0){ - taosMemoryFree(value); - } - } tscDebug("[monitor] paras from hb, clusterId:%" PRIx64 " monitorParas threshold:%d scope:%d", pInst->clusterId, pRsp.monitorParas.tsSlowLogThreshold, pRsp.monitorParas.tsSlowLogScope); diff --git a/source/client/src/clientMonitor.c b/source/client/src/clientMonitor.c index 260a0ebbaa..5911d5d04c 100644 --- a/source/client/src/clientMonitor.c +++ b/source/client/src/clientMonitor.c @@ -45,19 +45,6 @@ static void destroySlowLogClient(void* data){ return; } SlowLogClient* slowLogClient = *(SlowLogClient**)data; - if(slowLogClient == NULL){ - return; - } - taosTmrStopA(&(*(SlowLogClient**)data)->timer); - - TdFilePtr pFile = slowLogClient->pFile; - if(pFile == NULL){ - taosMemoryFree(slowLogClient); - return; - } - - taosUnLockFile(pFile); - taosCloseFile(&pFile); taosMemoryFree(slowLogClient); } @@ -85,6 +72,11 @@ static SAppInstInfo* getAppInstByClusterId(int64_t clusterId) { return *(SAppInstInfo**)p; } +typedef struct { + tsem_t sem; + int32_t code; +} SlowLogParam; + static int32_t monitorReportAsyncCB(void* param, SDataBuf* pMsg, int32_t code) { if (TSDB_CODE_SUCCESS != code) { uError("found error in monitorReport send callback, code:%d, please check the network.", code); @@ -93,10 +85,15 @@ static int32_t monitorReportAsyncCB(void* param, SDataBuf* pMsg, int32_t code) { taosMemoryFree(pMsg->pData); taosMemoryFree(pMsg->pEpSet); } + if(param != NULL){ + SlowLogParam* p = (SlowLogParam*)param; + p->code = code; + tsem_post(&p->sem); + } return code; } -static int32_t sendReport(void* pTransporter, SEpSet *epSet, char* pCont, MONITOR_TYPE type) { +static int32_t sendReport(void* pTransporter, SEpSet *epSet, char* pCont, MONITOR_TYPE type, void* param) { SStatisReq sStatisReq; sStatisReq.pCont = pCont; sStatisReq.contLen = strlen(pCont); @@ -122,9 +119,8 @@ static int32_t sendReport(void* pTransporter, SEpSet *epSet, char* pCont, MONITO pInfo->msgInfo.pData = buf; pInfo->msgInfo.len = tlen; pInfo->msgType = TDMT_MND_STATIS; - // pInfo->param = taosMemoryMalloc(sizeof(int32_t)); - // *(int32_t*)pInfo->param = i; - pInfo->paramFreeFp = taosMemoryFree; + pInfo->param = param; +// pInfo->paramFreeFp = taosMemoryFree; pInfo->requestId = tGenIdPI64(); pInfo->requestObjRefId = 0; @@ -136,20 +132,22 @@ static int32_t sendReport(void* pTransporter, SEpSet *epSet, char* pCont, MONITO return code; } -static void monitorReadSendSlowLog(TdFilePtr pFile, void* pTransporter, SEpSet *epSet){ +static bool monitorReadSendSlowLog(TdFilePtr pFile, void* pTransporter, SEpSet *epSet){ char buf[SLOW_LOG_SEND_SIZE + 1] = {0}; // +1 for \0, for print log char pCont[SLOW_LOG_SEND_SIZE + 1] = {0}; // +1 for \0, for print log int32_t offset = 0; if(taosLSeekFile(pFile, 0, SEEK_SET) < 0){ uError("failed to seek file:%p code: %d", pFile, errno); - return; + return false; } while(1){ int64_t readSize = taosReadFile(pFile, buf + offset, SLOW_LOG_SEND_SIZE - offset); if (readSize <= 0) { - if (readSize < 0) + if (readSize < 0){ uError("failed to read len from file:%p since %s", pFile, terrstr()); - return; + return false; + } + break; } memset(pCont, 0, sizeof(pCont)); @@ -165,13 +163,30 @@ static void monitorReadSendSlowLog(TdFilePtr pFile, void* pTransporter, SEpSet * } strcat(pCont, "]"); if (pTransporter && pCont != NULL) { - if(sendReport(pTransporter, epSet, pCont, MONITOR_TYPE_SLOW_LOG) != 0){ + SlowLogParam* pParam = taosMemoryMalloc(sizeof(SlowLogParam)); + if (pParam == NULL) { + return false; + } + if (tsem_init(&pParam->sem, 0, 0) != 0){ + taosMemoryFree(pParam); + return false; + } + pParam->code = sendReport(pTransporter, epSet, pCont, MONITOR_TYPE_SLOW_LOG, pParam); + if(pParam->code == TSDB_CODE_SUCCESS){ + tsem_wait(&pParam->sem); + } + tsem_destroy(&pParam->sem); + + if(pParam->code != TSDB_CODE_SUCCESS){ if(taosLSeekFile(pFile, -readSize, SEEK_CUR) < 0){ uError("failed to seek file:%p code: %d", pFile, errno); } uError("failed to send report:%s", pCont); - return; + taosMemoryFree(pParam); + return false; } + taosMemoryFree(pParam); + uDebug("[monitor] monitorReadSendSlowLog send slow log to mnode:%s", pCont) } @@ -188,6 +203,7 @@ static void monitorReadSendSlowLog(TdFilePtr pFile, void* pTransporter, SEpSet * uError("failed to truncate file:%p code: %d", pFile, errno); } uDebug("[monitor] monitorReadSendSlowLog send slow log file:%p", pFile); + return true; } static void generateClusterReport(taos_collector_registry_t* registry, void* pTransporter, SEpSet *epSet) { @@ -199,7 +215,7 @@ static void generateClusterReport(taos_collector_registry_t* registry, void* pTr return; } - if (strlen(pCont) != 0 && sendReport(pTransporter, epSet, pCont, MONITOR_TYPE_COUNTER) == 0) { + if (strlen(pCont) != 0 && sendReport(pTransporter, epSet, pCont, MONITOR_TYPE_COUNTER, NULL) == 0) { taos_collector_registry_clear_batch(registry); } taosMemoryFreeClear(pCont); @@ -225,25 +241,6 @@ static void reportSendProcess(void* param, void* tmrId) { taosRUnLockLatch(&monitorLock); } -static void sendAllSlowLog(){ - void* data = taosHashIterate(monitorSlowLogHash, NULL); - while (data != NULL) { - TdFilePtr pFile = (*(SlowLogClient**)data)->pFile; - if (pFile != NULL){ - int64_t clusterId = *(int64_t*)taosHashGetKey(data, NULL); - SAppInstInfo* pInst = getAppInstByClusterId(clusterId); - if(pInst == NULL){ - taosHashCancelIterate(monitorSlowLogHash, data); - break; - } - SEpSet ep = getEpSet_s(&pInst->mgmtEp); - monitorReadSendSlowLog(pFile, pInst->pTransporter, &ep); - } - data = taosHashIterate(monitorSlowLogHash, data); - } - uDebug("[monitor] sendAllSlowLog when client close"); -} - static void monitorSendAllSlowLogFromTempDir(int64_t clusterId){ SAppInstInfo* pInst = getAppInstByClusterId((int64_t)clusterId); @@ -257,16 +254,14 @@ static void monitorSendAllSlowLogFromTempDir(int64_t clusterId){ return; } - taosRLockLatch(&monitorLock); - char tmpPath[PATH_MAX] = {0}; if (getSlowLogTmpDir(tmpPath, sizeof(tmpPath)) < 0) { - goto END; + return; } TdDirPtr pDir = taosOpenDir(tmpPath); if (pDir == NULL) { - goto END; + return; } TdDirEntryPtr de = NULL; @@ -296,18 +291,18 @@ static void monitorSendAllSlowLogFromTempDir(int64_t clusterId){ continue; } SEpSet ep = getEpSet_s(&pInst->mgmtEp); - monitorReadSendSlowLog(pFile, pInst->pTransporter, &ep); + bool truncated = monitorReadSendSlowLog(pFile, pInst->pTransporter, &ep); taosUnLockFile(pFile); taosCloseFile(&pFile); - taosRemoveFile(filename); + + if(truncated){ + taosRemoveFile(filename); + } uDebug("[monitor] send and delete slow log file when reveive connect rsp:%s", filename); } taosCloseDir(&pDir); - -END: - taosRUnLockLatch(&monitorLock); } static void sendAllCounter(){ @@ -449,37 +444,7 @@ static void monitorFreeSlowLogData(MonitorSlowLogData* pData) { static void monitorThreadFuncUnexpectedStopped(void) { atomic_store_32(&slowLogFlag, -1); } -static void reportSlowLog(void* param, void* tmrId) { - taosWLockLatch(&monitorLock); - if (atomic_load_32(&monitorFlag) == 1) { - taosRUnLockLatch(&monitorLock); - return; - } - SAppInstInfo* pInst = getAppInstByClusterId((int64_t)param); - if(pInst == NULL){ - uError("failed to get app inst, clusterId:%"PRIx64, (int64_t)param); - taosRUnLockLatch(&monitorLock); - return; - } - - void* tmp = taosHashGet(monitorSlowLogHash, ¶m, LONG_BYTES); - if(tmp == NULL){ - uError("failed to get file inst, clusterId:%"PRIx64, (int64_t)param); - taosRUnLockLatch(&monitorLock); - return; - } - - SEpSet ep = getEpSet_s(&pInst->mgmtEp); - monitorReadSendSlowLog((*(SlowLogClient**)tmp)->pFile, pInst->pTransporter, &ep); - - if((*(SlowLogClient**)tmp)->timer == tmrId){ - taosTmrReset(reportSlowLog, pInst->monitorParas.tsMonitorInterval * 1000, param, monitorTimer, &(*(SlowLogClient**)tmp)->timer); - } - taosWUnLockLatch(&monitorLock); -} - static void monitorWriteSlowLog2File(MonitorSlowLogData* slowLogData, char *tmpPath){ - taosWLockLatch(&monitorLock); TdFilePtr pFile = NULL; void* tmp = taosHashGet(monitorSlowLogHash, &slowLogData->clusterId, LONG_BYTES); if (tmp == NULL){ @@ -487,7 +452,7 @@ static void monitorWriteSlowLog2File(MonitorSlowLogData* slowLogData, char *tmpP char clusterId[32] = {0}; if (snprintf(clusterId, sizeof(clusterId), "%" PRIx64, slowLogData->clusterId) < 0){ uError("failed to generate clusterId:%" PRIx64, slowLogData->clusterId); - goto FAILED; + return; } taosGetTmpfilePath(tmpPath, clusterId, path); uInfo("[monitor] create slow log file:%s", path); @@ -495,35 +460,35 @@ static void monitorWriteSlowLog2File(MonitorSlowLogData* slowLogData, char *tmpP if (pFile == NULL) { terrno = TAOS_SYSTEM_ERROR(errno); uError("failed to open file:%s since %s", path, terrstr()); - goto FAILED; + return; } SlowLogClient *pClient = taosMemoryCalloc(1, sizeof(SlowLogClient)); if (pClient == NULL){ uError("failed to allocate memory for slow log client"); taosCloseFile(&pFile); - goto FAILED; + return; } + pClient->lastCheckTime = taosGetMonoTimestampMs(); + strcpy(pClient->path, path); pClient->pFile = pFile; if (taosHashPut(monitorSlowLogHash, &slowLogData->clusterId, LONG_BYTES, &pClient, POINTER_BYTES) != 0){ uError("failed to put clusterId:%" PRId64 " to hash table", slowLogData->clusterId); taosCloseFile(&pFile); taosMemoryFree(pClient); - goto FAILED; + return; } if(taosLockFile(pFile) < 0){ uError("failed to lock file:%p since %s", pFile, terrstr()); - goto FAILED; + return; } SAppInstInfo* pInst = getAppInstByClusterId(slowLogData->clusterId); if(pInst == NULL){ uError("failed to get app instance by clusterId:%" PRId64, slowLogData->clusterId); - goto FAILED; + return; } - - pClient->timer = taosTmrStart(reportSlowLog, pInst->monitorParas.tsMonitorInterval * 1000, (void*)slowLogData->clusterId, monitorTimer); }else{ pFile = (*(SlowLogClient**)tmp)->pFile; } @@ -532,26 +497,30 @@ static void monitorWriteSlowLog2File(MonitorSlowLogData* slowLogData, char *tmpP uError("failed to write len to file:%p since %s", pFile, terrstr()); } uDebug("[monitor] write slow log to file:%p, clusterId:%"PRIx64, pFile, slowLogData->clusterId); - -FAILED: - taosWUnLockLatch(&monitorLock); } -static void restartReportTimer(int64_t clusterId){ - taosWLockLatch(&monitorLock); +static void monitorSendAllSlowLog(bool quit){ + int64_t t = taosGetMonoTimestampMs(); - void* tmp = taosHashGet(monitorSlowLogHash, &clusterId, LONG_BYTES); - if(tmp){ - taosTmrStopA(&(*(SlowLogClient**)tmp)->timer); - SAppInstInfo* pInst = getAppInstByClusterId(clusterId); - if(pInst == NULL){ - uError("failed to get app inst, clusterId:%"PRIx64, clusterId); - return; + void* pIter = NULL; + while ((pIter = taosHashIterate(monitorSlowLogHash, pIter))) { + int64_t* clusterId = (int64_t*)taosHashGetKey(pIter, NULL); + SAppInstInfo* pInst = getAppInstByClusterId(*clusterId); + bool truncated = false; + if(quit || (pInst != NULL && t - (*(SlowLogClient**)pIter)->lastCheckTime > pInst->monitorParas.tsMonitorInterval * 1000)) { + (*(SlowLogClient**)pIter)->lastCheckTime = t; + SEpSet ep = getEpSet_s(&pInst->mgmtEp); + truncated = monitorReadSendSlowLog((*(SlowLogClient**)pIter)->pFile, pInst->pTransporter, &ep); } - (*(SlowLogClient**)tmp)->timer = taosTmrStart(reportSlowLog, pInst->monitorParas.tsMonitorInterval * 1000, (void*)clusterId, monitorTimer); + if(quit){ + taosUnLockFile((*(SlowLogClient**)pIter)->pFile); + taosCloseFile(&((*(SlowLogClient**)pIter)->pFile)); + if(truncated){ + taosRemoveFile((*(SlowLogClient**)pIter)->path); + } + } } - taosWUnLockLatch(&monitorLock); } static void* monitorThreadFunc(void *param){ @@ -590,7 +559,10 @@ static void* monitorThreadFunc(void *param){ } uDebug("monitorThreadFunc start"); while (1) { - if (slowLogFlag > 0) break; + if (slowLogFlag > 0) { + monitorSendAllSlowLog(true); + break; + } MonitorSlowLogData* slowLogData = NULL; taosReadQitem(monitorQueue, (void**)&slowLogData); @@ -598,15 +570,15 @@ static void* monitorThreadFunc(void *param){ uDebug("[monitor] read slow log data from queue, clusterId:%" PRIx64 " value:%s", slowLogData->clusterId, slowLogData->value); if (slowLogData->value == NULL){ monitorSendAllSlowLogFromTempDir(slowLogData->clusterId); - } else if(strlen(slowLogData->value) == 0){ - restartReportTimer(slowLogData->clusterId); } else{ monitorWriteSlowLog2File(slowLogData, tmpPath); } } monitorFreeSlowLogData(slowLogData); taosFreeQitem(slowLogData); - tsem2_timewait(&monitorSem, 500); + + monitorSendAllSlowLog(false); + tsem2_timewait(&monitorSem, 100); } taosCloseQueue(monitorQueue); @@ -671,7 +643,6 @@ void monitorClose() { uDebug("[monitor] monitorFlag is not 0"); } tscMonitorStop(); - sendAllSlowLog(); sendAllCounter(); taosHashCleanup(monitorCounterHash); taosHashCleanup(monitorSlowLogHash); From 69f235d6b51fc064dedee43b0887ad324cce676d Mon Sep 17 00:00:00 2001 From: wangmm0220 Date: Sat, 29 Jun 2024 16:16:36 +0800 Subject: [PATCH 06/27] fix:remove using alarm when sending slow log & use one thread to send all slow log --- source/client/src/clientMonitor.c | 15 ++++++++++++--- utils/test/c/sml_test.c | 1 + 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/source/client/src/clientMonitor.c b/source/client/src/clientMonitor.c index 5911d5d04c..fe5848f24c 100644 --- a/source/client/src/clientMonitor.c +++ b/source/client/src/clientMonitor.c @@ -132,7 +132,16 @@ static int32_t sendReport(void* pTransporter, SEpSet *epSet, char* pCont, MONITO return code; } -static bool monitorReadSendSlowLog(TdFilePtr pFile, void* pTransporter, SEpSet *epSet){ +static bool monitorReadSendSlowLog(TdFilePtr pFile, char* path, void* pTransporter, SEpSet *epSet){ + int64_t filesize = 0; + if (taosStatFile(path, &filesize, NULL, NULL) < 0) { + return false; + } + + if (filesize == 0) { + return true; + } + char buf[SLOW_LOG_SEND_SIZE + 1] = {0}; // +1 for \0, for print log char pCont[SLOW_LOG_SEND_SIZE + 1] = {0}; // +1 for \0, for print log int32_t offset = 0; @@ -291,7 +300,7 @@ static void monitorSendAllSlowLogFromTempDir(int64_t clusterId){ continue; } SEpSet ep = getEpSet_s(&pInst->mgmtEp); - bool truncated = monitorReadSendSlowLog(pFile, pInst->pTransporter, &ep); + bool truncated = monitorReadSendSlowLog(pFile, filename, pInst->pTransporter, &ep); taosUnLockFile(pFile); taosCloseFile(&pFile); @@ -510,7 +519,7 @@ static void monitorSendAllSlowLog(bool quit){ if(quit || (pInst != NULL && t - (*(SlowLogClient**)pIter)->lastCheckTime > pInst->monitorParas.tsMonitorInterval * 1000)) { (*(SlowLogClient**)pIter)->lastCheckTime = t; SEpSet ep = getEpSet_s(&pInst->mgmtEp); - truncated = monitorReadSendSlowLog((*(SlowLogClient**)pIter)->pFile, pInst->pTransporter, &ep); + truncated = monitorReadSendSlowLog((*(SlowLogClient**)pIter)->pFile, (*(SlowLogClient**)pIter)->path, pInst->pTransporter, &ep); } if(quit){ diff --git a/utils/test/c/sml_test.c b/utils/test/c/sml_test.c index f91b14b6d8..2338674ae3 100644 --- a/utils/test/c/sml_test.c +++ b/utils/test/c/sml_test.c @@ -1065,6 +1065,7 @@ int sml_escape1_Test() { for(int i = 0; i < sizeof(sql) / sizeof(sql[0]); i++){ pRes = taos_schemaless_insert(taos, (char**)&sql[i], 1, TSDB_SML_LINE_PROTOCOL, 0); int code = taos_errno(pRes); + taos_free_result(pRes); ASSERT(code); } From a17ff3dbf5a3f4cdb642d865c05720897b061b87 Mon Sep 17 00:00:00 2001 From: wangmm0220 Date: Mon, 1 Jul 2024 19:08:16 +0800 Subject: [PATCH 07/27] fix:[TS-4921]refactor reporting logic for slow log --- include/libs/monitor/clientMonitor.h | 22 +- source/client/src/clientEnv.c | 3 +- source/client/src/clientMonitor.c | 462 ++++++++++++++++----------- source/client/src/clientMsgHandler.c | 3 +- 4 files changed, 295 insertions(+), 195 deletions(-) diff --git a/include/libs/monitor/clientMonitor.h b/include/libs/monitor/clientMonitor.h index a8cdbe4ad1..5e89c07b2b 100644 --- a/include/libs/monitor/clientMonitor.h +++ b/include/libs/monitor/clientMonitor.h @@ -25,12 +25,19 @@ extern "C" { #include "query.h" #include "tqueue.h" -typedef enum SQL_RESULT_CODE { +typedef enum { SQL_RESULT_SUCCESS = 0, SQL_RESULT_FAILED = 1, SQL_RESULT_CANCEL = 2, } SQL_RESULT_CODE; +typedef enum { + SLOW_LOG_WRITE = 0, + SLOW_LOG_READ_RUNNING = 1, + SLOW_LOG_READ_BEGINNIG = 2, + SLOW_LOG_READ_QUIT = 2, +} SLOW_LOG_QUEUE_TYPE; + #define SLOW_LOG_SEND_SIZE 32*1024 typedef struct { @@ -45,11 +52,18 @@ typedef struct { TdFilePtr pFile; int64_t lastCheckTime; char path[PATH_MAX]; + int64_t offset; } SlowLogClient; typedef struct { - int64_t clusterId; - char *value; + int64_t clusterId; + SLOW_LOG_QUEUE_TYPE type; + union{ + char* data; + int64_t offset; + }; + TdFilePtr pFile; + char* fileName; } MonitorSlowLogData; void monitorClose(); @@ -61,7 +75,7 @@ void monitorCreateClient(int64_t clusterId); void monitorCreateClientCounter(int64_t clusterId, const char* name, const char* help, size_t label_key_count, const char** label_keys); void monitorCounterInc(int64_t clusterId, const char* counterName, const char** label_values); const char* monitorResultStr(SQL_RESULT_CODE code); -int32_t monitorPutData2MonitorQueue(int64_t clusterId, char* value); +int32_t monitorPutData2MonitorQueue(MonitorSlowLogData data); #ifdef __cplusplus } #endif diff --git a/source/client/src/clientEnv.c b/source/client/src/clientEnv.c index 1248f87f19..6689334c1e 100644 --- a/source/client/src/clientEnv.c +++ b/source/client/src/clientEnv.c @@ -157,7 +157,8 @@ static void generateWriteSlowLog(STscObj *pTscObj, SRequestObj *pRequest, int32_ } char* value = cJSON_PrintUnformatted(json); - if(monitorPutData2MonitorQueue(pTscObj->pAppInfo->clusterId, value) < 0){ + MonitorSlowLogData data = {.clusterId = pTscObj->pAppInfo->clusterId, .type = SLOW_LOG_WRITE, .data = value}; + if(monitorPutData2MonitorQueue(data) < 0){ taosMemoryFree(value); } diff --git a/source/client/src/clientMonitor.c b/source/client/src/clientMonitor.c index fe5848f24c..a4be6210bb 100644 --- a/source/client/src/clientMonitor.c +++ b/source/client/src/clientMonitor.c @@ -13,6 +13,7 @@ void* monitorTimer; SHashObj* monitorCounterHash; int32_t slowLogFlag = -1; int32_t monitorFlag = -1; +int32_t quitCnt = 0; tsem2_t monitorSem; STaosQueue* monitorQueue; SHashObj* monitorSlowLogHash; @@ -63,6 +64,19 @@ static void destroyMonitorClient(void* data){ taosMemoryFree(pMonitor); } +static void monitorFreeSlowLogData(void *paras) { + MonitorSlowLogData* pData = (MonitorSlowLogData*)paras; + if (pData == NULL) { + return; + } + if (pData->type == SLOW_LOG_WRITE){ + taosMemoryFree(pData->data); + } + if (pData->type == SLOW_LOG_READ_BEGINNIG){ + taosMemoryFree(pData->fileName); + } +} + static SAppInstInfo* getAppInstByClusterId(int64_t clusterId) { void *p = taosHashGet(appInfo.pInstMapByClusterId, &clusterId, LONG_BYTES); if(p == NULL){ @@ -72,11 +86,6 @@ static SAppInstInfo* getAppInstByClusterId(int64_t clusterId) { return *(SAppInstInfo**)p; } -typedef struct { - tsem_t sem; - int32_t code; -} SlowLogParam; - static int32_t monitorReportAsyncCB(void* param, SDataBuf* pMsg, int32_t code) { if (TSDB_CODE_SUCCESS != code) { uError("found error in monitorReport send callback, code:%d, please check the network.", code); @@ -86,9 +95,13 @@ static int32_t monitorReportAsyncCB(void* param, SDataBuf* pMsg, int32_t code) { taosMemoryFree(pMsg->pEpSet); } if(param != NULL){ - SlowLogParam* p = (SlowLogParam*)param; - p->code = code; - tsem_post(&p->sem); + MonitorSlowLogData* p = (MonitorSlowLogData*)param; + if(code != 0){ + uError("failed to send slow log:%s, clusterId:%" PRIx64, p->data, p->clusterId); + } + if(monitorPutData2MonitorQueue(*p) == 0){ + p->fileName = NULL; + } } return code; } @@ -100,11 +113,15 @@ static int32_t sendReport(void* pTransporter, SEpSet *epSet, char* pCont, MONITO sStatisReq.type = type; int tlen = tSerializeSStatisReq(NULL, 0, &sStatisReq); - if (tlen < 0) return 0; + if (tlen < 0) { + monitorFreeSlowLogData(param); + return -1; + } void* buf = taosMemoryMalloc(tlen); if (buf == NULL) { uError("sendReport failed, out of memory, len:%d", tlen); terrno = TSDB_CODE_OUT_OF_MEMORY; + monitorFreeSlowLogData(param); return -1; } tSerializeSStatisReq(buf, tlen, &sStatisReq); @@ -113,6 +130,8 @@ static int32_t sendReport(void* pTransporter, SEpSet *epSet, char* pCont, MONITO if (pInfo == NULL) { uError("sendReport failed, out of memory send info"); terrno = TSDB_CODE_OUT_OF_MEMORY; + monitorFreeSlowLogData(param); + taosMemoryFree(buf); return -1; } pInfo->fp = monitorReportAsyncCB; @@ -120,7 +139,7 @@ static int32_t sendReport(void* pTransporter, SEpSet *epSet, char* pCont, MONITO pInfo->msgInfo.len = tlen; pInfo->msgType = TDMT_MND_STATIS; pInfo->param = param; -// pInfo->paramFreeFp = taosMemoryFree; + pInfo->paramFreeFp = monitorFreeSlowLogData; pInfo->requestId = tGenIdPI64(); pInfo->requestObjRefId = 0; @@ -132,89 +151,6 @@ static int32_t sendReport(void* pTransporter, SEpSet *epSet, char* pCont, MONITO return code; } -static bool monitorReadSendSlowLog(TdFilePtr pFile, char* path, void* pTransporter, SEpSet *epSet){ - int64_t filesize = 0; - if (taosStatFile(path, &filesize, NULL, NULL) < 0) { - return false; - } - - if (filesize == 0) { - return true; - } - - char buf[SLOW_LOG_SEND_SIZE + 1] = {0}; // +1 for \0, for print log - char pCont[SLOW_LOG_SEND_SIZE + 1] = {0}; // +1 for \0, for print log - int32_t offset = 0; - if(taosLSeekFile(pFile, 0, SEEK_SET) < 0){ - uError("failed to seek file:%p code: %d", pFile, errno); - return false; - } - while(1){ - int64_t readSize = taosReadFile(pFile, buf + offset, SLOW_LOG_SEND_SIZE - offset); - if (readSize <= 0) { - if (readSize < 0){ - uError("failed to read len from file:%p since %s", pFile, terrstr()); - return false; - } - break; - } - - memset(pCont, 0, sizeof(pCont)); - strcat(pCont, "["); - char* string = buf; - for(int i = 0; i < readSize + offset; i++){ - if (buf[i] == '\0') { - if (string != buf) strcat(pCont, ","); - strcat(pCont, string); - uDebug("[monitor] monitorReadSendSlowLog slow log:%s", string); - string = buf + i + 1; - } - } - strcat(pCont, "]"); - if (pTransporter && pCont != NULL) { - SlowLogParam* pParam = taosMemoryMalloc(sizeof(SlowLogParam)); - if (pParam == NULL) { - return false; - } - if (tsem_init(&pParam->sem, 0, 0) != 0){ - taosMemoryFree(pParam); - return false; - } - pParam->code = sendReport(pTransporter, epSet, pCont, MONITOR_TYPE_SLOW_LOG, pParam); - if(pParam->code == TSDB_CODE_SUCCESS){ - tsem_wait(&pParam->sem); - } - tsem_destroy(&pParam->sem); - - if(pParam->code != TSDB_CODE_SUCCESS){ - if(taosLSeekFile(pFile, -readSize, SEEK_CUR) < 0){ - uError("failed to seek file:%p code: %d", pFile, errno); - } - uError("failed to send report:%s", pCont); - taosMemoryFree(pParam); - return false; - } - taosMemoryFree(pParam); - - uDebug("[monitor] monitorReadSendSlowLog send slow log to mnode:%s", pCont) - } - - if (readSize + offset < SLOW_LOG_SEND_SIZE) { - break; - } - offset = SLOW_LOG_SEND_SIZE - (string - buf); - if(buf != string && offset != 0){ - memmove(buf, string, offset); - uDebug("[monitor] monitorReadSendSlowLog left slow log:%s", buf) - } - } - if(taosFtruncateFile(pFile, 0) < 0){ - uError("failed to truncate file:%p code: %d", pFile, errno); - } - uDebug("[monitor] monitorReadSendSlowLog send slow log file:%p", pFile); - return true; -} - static void generateClusterReport(taos_collector_registry_t* registry, void* pTransporter, SEpSet *epSet) { char ts[50] = {0}; sprintf(ts, "%" PRId64, taosGetTimestamp(TSDB_TIME_PRECISION_MILLI)); @@ -250,70 +186,6 @@ static void reportSendProcess(void* param, void* tmrId) { taosRUnLockLatch(&monitorLock); } -static void monitorSendAllSlowLogFromTempDir(int64_t clusterId){ - SAppInstInfo* pInst = getAppInstByClusterId((int64_t)clusterId); - - if(pInst == NULL || !pInst->monitorParas.tsEnableMonitor){ - uInfo("[monitor] monitor is disabled, skip send slow log"); - return; - } - char namePrefix[PATH_MAX] = {0}; - if (snprintf(namePrefix, sizeof(namePrefix), "%s%"PRIx64, TD_TMP_FILE_PREFIX, pInst->clusterId) < 0) { - uError("failed to generate slow log file name prefix"); - return; - } - - char tmpPath[PATH_MAX] = {0}; - if (getSlowLogTmpDir(tmpPath, sizeof(tmpPath)) < 0) { - return; - } - - TdDirPtr pDir = taosOpenDir(tmpPath); - if (pDir == NULL) { - return; - } - - TdDirEntryPtr de = NULL; - while ((de = taosReadDir(pDir)) != NULL) { - if (taosDirEntryIsDir(de)) { - continue; - } - - char *name = taosGetDirEntryName(de); - if (strcmp(name, ".") == 0 || - strcmp(name, "..") == 0 || - strstr(name, namePrefix) == NULL) { - uInfo("skip file:%s, for cluster id:%"PRIx64, name, pInst->clusterId); - continue; - } - - char filename[PATH_MAX] = {0}; - snprintf(filename, sizeof(filename), "%s%s", tmpPath, name); - TdFilePtr pFile = taosOpenFile(filename, TD_FILE_READ); - if (pFile == NULL) { - uError("failed to open file:%s since %s", filename, terrstr()); - continue; - } - if (taosLockFile(pFile) < 0) { - uError("failed to lock file:%s since %s, maybe used by other process", filename, terrstr()); - taosCloseFile(&pFile); - continue; - } - SEpSet ep = getEpSet_s(&pInst->mgmtEp); - bool truncated = monitorReadSendSlowLog(pFile, filename, pInst->pTransporter, &ep); - taosUnLockFile(pFile); - taosCloseFile(&pFile); - - if(truncated){ - taosRemoveFile(filename); - } - uDebug("[monitor] send and delete slow log file when reveive connect rsp:%s", filename); - - } - - taosCloseDir(&pDir); -} - static void sendAllCounter(){ MonitorClient** ppMonitor = (MonitorClient**)taosHashIterate(monitorCounterHash, NULL); while (ppMonitor != NULL) { @@ -444,13 +316,6 @@ const char* monitorResultStr(SQL_RESULT_CODE code) { return result_state[code]; } -static void monitorFreeSlowLogData(MonitorSlowLogData* pData) { - if (pData == NULL) { - return; - } - taosMemoryFree(pData->value); -} - static void monitorThreadFuncUnexpectedStopped(void) { atomic_store_32(&slowLogFlag, -1); } static void monitorWriteSlowLog2File(MonitorSlowLogData* slowLogData, char *tmpPath){ @@ -480,6 +345,7 @@ static void monitorWriteSlowLog2File(MonitorSlowLogData* slowLogData, char *tmpP } pClient->lastCheckTime = taosGetMonoTimestampMs(); strcpy(pClient->path, path); + pClient->offset = 0; pClient->pFile = pFile; if (taosHashPut(monitorSlowLogHash, &slowLogData->clusterId, LONG_BYTES, &pClient, POINTER_BYTES) != 0){ uError("failed to put clusterId:%" PRId64 " to hash table", slowLogData->clusterId); @@ -502,36 +368,236 @@ static void monitorWriteSlowLog2File(MonitorSlowLogData* slowLogData, char *tmpP pFile = (*(SlowLogClient**)tmp)->pFile; } - if (taosWriteFile(pFile, slowLogData->value, strlen(slowLogData->value) + 1) < 0){ + if(taosLSeekFile(pFile, 0, SEEK_END) < 0){ + uError("failed to seek file:%p code: %d", pFile, errno); + return; + } + if (taosWriteFile(pFile, slowLogData->data, strlen(slowLogData->data) + 1) < 0){ uError("failed to write len to file:%p since %s", pFile, terrstr()); } uDebug("[monitor] write slow log to file:%p, clusterId:%"PRIx64, pFile, slowLogData->clusterId); } -static void monitorSendAllSlowLog(bool quit){ - int64_t t = taosGetMonoTimestampMs(); +static char* readFile(TdFilePtr pFile, int64_t *offset, bool* isEnd){ + if(taosLSeekFile(pFile, *offset, SEEK_SET) < 0){ + uError("failed to seek file:%p code: %d", pFile, errno); + return NULL; + } + int64_t totalSize = 0; + char* pCont = taosMemoryCalloc(1, SLOW_LOG_SEND_SIZE); + if(pCont == NULL){ + return NULL; + } + strcat(pCont, "["); + + while(1) { + char* pLine = NULL; + int64_t readLen = taosGetLineFile(pFile, &pLine); + + if(totalSize + readLen >= SLOW_LOG_SEND_SIZE){ + break; + } + if (readLen <= 0) { + if (readLen < 0) { + uError("failed to read len from file:%p since %s", pFile, terrstr()); + }else{ + *isEnd = true; + } + break; + } + + if (totalSize != 0) strcat(pCont, ","); + strcat(pCont, pLine); + totalSize += readLen; + } + strcat(pCont, "]"); + uDebug("[monitor] monitorReadSendSlowLog slow log:%s", pCont); + *offset += totalSize; + return pCont; +} + +static bool isFileEmpty(char* path){ + int64_t filesize = 0; + if (taosStatFile(path, &filesize, NULL, NULL) < 0) { + return false; + } + + if (filesize == 0) { + return true; + } + return false; +} + +static int32_t sendSlowLog(int64_t clusterId, char* data, TdFilePtr pFile, int64_t offset, SLOW_LOG_QUEUE_TYPE type, char* fileName, void* pTransporter, SEpSet *epSet){ + MonitorSlowLogData* pParam = taosMemoryMalloc(sizeof(MonitorSlowLogData)); + pParam->data = data; + pParam->offset = offset; + pParam->clusterId = clusterId; + pParam->type = type; + pParam->pFile = pFile; + pParam->fileName = fileName; + return sendReport(pTransporter, epSet, data, MONITOR_TYPE_SLOW_LOG, pParam); +} + +static void monitorSendSlowLogAtBeginning(int64_t clusterId, char* fileName, TdFilePtr pFile, int64_t offset, void* pTransporter, SEpSet *epSet){ + bool isEnd = false; + char* data = readFile(pFile, &offset, &isEnd); + if(isEnd){ + taosFtruncateFile(pFile, 0); + taosUnLockFile(pFile); + taosCloseFile(&pFile); + taosRemoveFile(fileName); + uDebug("[monitor] monitorSendSlowLogAtBeginning delete file:%s", fileName); + }else{ + sendSlowLog(clusterId, data, pFile, offset, SLOW_LOG_READ_BEGINNIG, taosStrdup(fileName), pTransporter, epSet); + uDebug("[monitor] monitorSendSlowLogAtBeginning send slow log file:%p", pFile); + } +} + +static void monitorSendSlowLogAtRunning(int64_t clusterId){ + void* tmp = taosHashGet(monitorSlowLogHash, &clusterId, LONG_BYTES); + SlowLogClient* pClient = (*(SlowLogClient**)tmp); + bool isEnd = false; + char* data = readFile(pClient->pFile, &pClient->offset, &isEnd); + if(isEnd){ + if(taosFtruncateFile(pClient->pFile, 0) < 0){ + uError("failed to truncate file:%p code: %d", pClient->pFile, errno); + } + pClient->offset = 0; + }else if(data != NULL){ + SAppInstInfo* pInst = getAppInstByClusterId(clusterId); + if(pInst == NULL){ + uError("failed to get app instance by clusterId:%" PRId64, clusterId); + return; + } + SEpSet ep = getEpSet_s(&pInst->mgmtEp); + sendSlowLog(clusterId, data, pClient->pFile, pClient->offset, SLOW_LOG_READ_RUNNING, NULL, pInst->pTransporter, &ep); + uDebug("[monitor] monitorReadSendSlowLog send slow log:%s", data); + } +} + +static bool monitorSendSlowLogAtQuit(int64_t clusterId) { + void* tmp = taosHashGet(monitorSlowLogHash, &clusterId, LONG_BYTES); + SlowLogClient* pClient = (*(SlowLogClient**)tmp); + + bool isEnd = false; + char* data = readFile(pClient->pFile, &pClient->offset, &isEnd); + if(isEnd){ + taosUnLockFile(pClient->pFile); + taosCloseFile(&(pClient->pFile)); + taosRemoveFile(pClient->path); + if((--quitCnt) == 0){ + return true; + } + }else if(data != NULL){ + SAppInstInfo* pInst = getAppInstByClusterId(clusterId); + if(pInst == NULL) { + return true; + } + SEpSet ep = getEpSet_s(&pInst->mgmtEp); + sendSlowLog(clusterId, data, pClient->pFile, pClient->offset, SLOW_LOG_READ_QUIT, NULL, pInst->pTransporter, &ep); + uDebug("[monitor] monitorReadSendSlowLog send slow log:%s", data); + } + return false; +} +static void monitorSendAllSlowLogAtQuit(){ void* pIter = NULL; while ((pIter = taosHashIterate(monitorSlowLogHash, pIter))) { int64_t* clusterId = (int64_t*)taosHashGetKey(pIter, NULL); SAppInstInfo* pInst = getAppInstByClusterId(*clusterId); - bool truncated = false; - if(quit || (pInst != NULL && t - (*(SlowLogClient**)pIter)->lastCheckTime > pInst->monitorParas.tsMonitorInterval * 1000)) { - (*(SlowLogClient**)pIter)->lastCheckTime = t; - SEpSet ep = getEpSet_s(&pInst->mgmtEp); - truncated = monitorReadSendSlowLog((*(SlowLogClient**)pIter)->pFile, (*(SlowLogClient**)pIter)->path, pInst->pTransporter, &ep); - } + if(pInst == NULL) return; + SlowLogClient* pClient = (*(SlowLogClient**)pIter); + SEpSet ep = getEpSet_s(&pInst->mgmtEp); + bool isEnd = false; + int64_t offset = 0; + char* data = readFile(pClient->pFile, &offset, &isEnd); - if(quit){ - taosUnLockFile((*(SlowLogClient**)pIter)->pFile); - taosCloseFile(&((*(SlowLogClient**)pIter)->pFile)); - if(truncated){ - taosRemoveFile((*(SlowLogClient**)pIter)->path); + if(data != NULL && sendSlowLog(*clusterId, data, NULL, offset, SLOW_LOG_READ_QUIT, NULL, pInst->pTransporter, &ep) == 0){ + quitCnt ++; + } + uDebug("[monitor] monitorSendAllSlowLogAtQuit send slow log :%s", data); + } +} + +static void monitorSendAllSlowLog(){ + int64_t t = taosGetMonoTimestampMs(); + void* pIter = NULL; + while ((pIter = taosHashIterate(monitorSlowLogHash, pIter))) { + int64_t* clusterId = (int64_t*)taosHashGetKey(pIter, NULL); + SAppInstInfo* pInst = getAppInstByClusterId(*clusterId); + SlowLogClient* pClient = (*(SlowLogClient**)pIter); + if (pInst != NULL && t - pClient->lastCheckTime > pInst->monitorParas.tsMonitorInterval * 1000 && + pClient->offset == 0 && !isFileEmpty(pClient->path)) { + pClient->lastCheckTime = t; + SEpSet ep = getEpSet_s(&pInst->mgmtEp); + bool isEnd = false; + int64_t offset = 0; + char* data = readFile(pClient->pFile, &offset, &isEnd); + if(data){ + sendSlowLog(*clusterId, data, NULL, offset, SLOW_LOG_READ_RUNNING, NULL, pInst->pTransporter, &ep); } + uDebug("[monitor] monitorSendAllSlowLog send slow log :%s", data); } } } +static void monitorSendAllSlowLogFromTempDir(int64_t clusterId){ + SAppInstInfo* pInst = getAppInstByClusterId((int64_t)clusterId); + + if(pInst == NULL || !pInst->monitorParas.tsEnableMonitor){ + uInfo("[monitor] monitor is disabled, skip send slow log"); + return; + } + char namePrefix[PATH_MAX] = {0}; + if (snprintf(namePrefix, sizeof(namePrefix), "%s%"PRIx64, TD_TMP_FILE_PREFIX, pInst->clusterId) < 0) { + uError("failed to generate slow log file name prefix"); + return; + } + + char tmpPath[PATH_MAX] = {0}; + if (getSlowLogTmpDir(tmpPath, sizeof(tmpPath)) < 0) { + return; + } + + TdDirPtr pDir = taosOpenDir(tmpPath); + if (pDir == NULL) { + return; + } + + TdDirEntryPtr de = NULL; + while ((de = taosReadDir(pDir)) != NULL) { + if (taosDirEntryIsDir(de)) { + continue; + } + + char *name = taosGetDirEntryName(de); + if (strcmp(name, ".") == 0 || + strcmp(name, "..") == 0 || + strstr(name, namePrefix) == NULL) { + uInfo("skip file:%s, for cluster id:%"PRIx64, name, pInst->clusterId); + continue; + } + + char filename[PATH_MAX] = {0}; + snprintf(filename, sizeof(filename), "%s%s", tmpPath, name); + TdFilePtr pFile = taosOpenFile(filename, TD_FILE_READ); + if (pFile == NULL) { + uError("failed to open file:%s since %s", filename, terrstr()); + continue; + } + if (taosLockFile(pFile) < 0) { + uError("failed to lock file:%s since %s, maybe used by other process", filename, terrstr()); + taosCloseFile(&pFile); + continue; + } + SEpSet ep = getEpSet_s(&pInst->mgmtEp); + monitorSendSlowLogAtBeginning(pInst->clusterId, filename, pFile, 0, pInst->pTransporter, &ep); + } + + taosCloseDir(&pDir); +} + static void* monitorThreadFunc(void *param){ setThreadName("client-monitor-slowlog"); @@ -567,26 +633,45 @@ static void* monitorThreadFunc(void *param){ return NULL; } uDebug("monitorThreadFunc start"); + int64_t quitTime = 0; while (1) { if (slowLogFlag > 0) { - monitorSendAllSlowLog(true); - break; + if(quitCnt == 0){ + monitorSendAllSlowLogAtQuit(); + quitTime = taosGetMonoTimestampMs(); + } + if(taosGetMonoTimestampMs() - quitTime > 500){ //quit at most 500ms + break; + } } MonitorSlowLogData* slowLogData = NULL; taosReadQitem(monitorQueue, (void**)&slowLogData); if (slowLogData != NULL) { - uDebug("[monitor] read slow log data from queue, clusterId:%" PRIx64 " value:%s", slowLogData->clusterId, slowLogData->value); - if (slowLogData->value == NULL){ - monitorSendAllSlowLogFromTempDir(slowLogData->clusterId); - } else{ + if (slowLogData->type == SLOW_LOG_READ_BEGINNIG){ + if(slowLogData->pFile != NULL){ + SAppInstInfo* pInst = getAppInstByClusterId(slowLogData->clusterId); + if(pInst != NULL) { + SEpSet ep = getEpSet_s(&pInst->mgmtEp); + monitorSendSlowLogAtBeginning(slowLogData->clusterId, slowLogData->fileName, slowLogData->pFile, slowLogData->offset, pInst->pTransporter, &ep); + } + }else{ + monitorSendAllSlowLogFromTempDir(slowLogData->clusterId); + } + } else if(slowLogData->type == SLOW_LOG_WRITE){ monitorWriteSlowLog2File(slowLogData, tmpPath); + } else if(slowLogData->type == SLOW_LOG_READ_RUNNING){ + monitorSendSlowLogAtRunning(slowLogData->clusterId); + }else if(slowLogData->type == SLOW_LOG_READ_QUIT){ + if(monitorSendSlowLogAtQuit(slowLogData->clusterId)){ + break; + } } } monitorFreeSlowLogData(slowLogData); taosFreeQitem(slowLogData); - monitorSendAllSlowLog(false); + monitorSendAllSlowLog(); tsem2_timewait(&monitorSem, 100); } @@ -659,15 +744,14 @@ void monitorClose() { taosWUnLockLatch(&monitorLock); } -int32_t monitorPutData2MonitorQueue(int64_t clusterId, char* value){ +int32_t monitorPutData2MonitorQueue(MonitorSlowLogData data){ MonitorSlowLogData* slowLogData = taosAllocateQitem(sizeof(MonitorSlowLogData), DEF_QITEM, 0); if (slowLogData == NULL) { uError("[monitor] failed to allocate slow log data"); return -1; } - slowLogData->clusterId = clusterId; - slowLogData->value = value; - uDebug("[monitor] write slow log to queue, clusterId:%"PRIx64 " value:%s", slowLogData->clusterId, slowLogData->value); + *slowLogData = data; + uDebug("[monitor] write slow log to queue, clusterId:%"PRIx64 " type:%d", slowLogData->clusterId, slowLogData->type); while (atomic_load_32(&slowLogFlag) == -1) { taosMsleep(5); } diff --git a/source/client/src/clientMsgHandler.c b/source/client/src/clientMsgHandler.c index 417cb8b562..87c334218f 100644 --- a/source/client/src/clientMsgHandler.c +++ b/source/client/src/clientMsgHandler.c @@ -155,7 +155,8 @@ int32_t processConnectRsp(void* param, SDataBuf* pMsg, int32_t code) { if(taosHashPut(appInfo.pInstMapByClusterId, &connectRsp.clusterId, LONG_BYTES, &pTscObj->pAppInfo, POINTER_BYTES) != 0){ tscError("failed to put appInfo into appInfo.pInstMapByClusterId"); } - monitorPutData2MonitorQueue(pTscObj->pAppInfo->clusterId, NULL); + MonitorSlowLogData data ={.clusterId = pTscObj->pAppInfo->clusterId, .type = SLOW_LOG_READ_BEGINNIG, .pFile = NULL}; + monitorPutData2MonitorQueue(data); monitorClientSlowQueryInit(connectRsp.clusterId); monitorClientSQLReqInit(connectRsp.clusterId); } From ce77caa66398c47bd4474f783ca02df8917142fc Mon Sep 17 00:00:00 2001 From: wangmm0220 Date: Mon, 1 Jul 2024 19:20:06 +0800 Subject: [PATCH 08/27] fix:[TS-4921]refactor reporting logic for slow log --- source/client/src/clientMonitor.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/source/client/src/clientMonitor.c b/source/client/src/clientMonitor.c index a4be6210bb..e4d256185d 100644 --- a/source/client/src/clientMonitor.c +++ b/source/client/src/clientMonitor.c @@ -401,7 +401,7 @@ static char* readFile(TdFilePtr pFile, int64_t *offset, bool* isEnd){ if (readLen <= 0) { if (readLen < 0) { uError("failed to read len from file:%p since %s", pFile, terrstr()); - }else{ + }else if(totalSize == 0){ *isEnd = true; } break; @@ -414,6 +414,10 @@ static char* readFile(TdFilePtr pFile, int64_t *offset, bool* isEnd){ strcat(pCont, "]"); uDebug("[monitor] monitorReadSendSlowLog slow log:%s", pCont); *offset += totalSize; + if(*isEnd){ + taosMemoryFree(pCont); + return NULL; + } return pCont; } @@ -450,7 +454,9 @@ static void monitorSendSlowLogAtBeginning(int64_t clusterId, char* fileName, TdF taosRemoveFile(fileName); uDebug("[monitor] monitorSendSlowLogAtBeginning delete file:%s", fileName); }else{ - sendSlowLog(clusterId, data, pFile, offset, SLOW_LOG_READ_BEGINNIG, taosStrdup(fileName), pTransporter, epSet); + if(data != NULL){ + sendSlowLog(clusterId, data, pFile, offset, SLOW_LOG_READ_BEGINNIG, taosStrdup(fileName), pTransporter, epSet); + } uDebug("[monitor] monitorSendSlowLogAtBeginning send slow log file:%p", pFile); } } @@ -534,7 +540,7 @@ static void monitorSendAllSlowLog(){ bool isEnd = false; int64_t offset = 0; char* data = readFile(pClient->pFile, &offset, &isEnd); - if(data){ + if(data != NULL){ sendSlowLog(*clusterId, data, NULL, offset, SLOW_LOG_READ_RUNNING, NULL, pInst->pTransporter, &ep); } uDebug("[monitor] monitorSendAllSlowLog send slow log :%s", data); From 0f0fa840eae06d403406316d4b404c1a2a18539a Mon Sep 17 00:00:00 2001 From: wangmm0220 Date: Tue, 2 Jul 2024 09:34:32 +0800 Subject: [PATCH 09/27] fix:[TS-4921]refactor reporting logic for slow log --- source/client/src/clientMonitor.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/source/client/src/clientMonitor.c b/source/client/src/clientMonitor.c index e4d256185d..b30224c510 100644 --- a/source/client/src/clientMonitor.c +++ b/source/client/src/clientMonitor.c @@ -330,7 +330,7 @@ static void monitorWriteSlowLog2File(MonitorSlowLogData* slowLogData, char *tmpP } taosGetTmpfilePath(tmpPath, clusterId, path); uInfo("[monitor] create slow log file:%s", path); - pFile = taosOpenFile(path, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_APPEND | TD_FILE_READ | TD_FILE_TRUNC); + pFile = taosOpenFile(path, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_APPEND | TD_FILE_READ | TD_FILE_TRUNC | TD_FILE_STREAM); if (pFile == NULL) { terrno = TAOS_SYSTEM_ERROR(errno); uError("failed to open file:%s since %s", path, terrstr()); @@ -378,7 +378,7 @@ static void monitorWriteSlowLog2File(MonitorSlowLogData* slowLogData, char *tmpP uDebug("[monitor] write slow log to file:%p, clusterId:%"PRIx64, pFile, slowLogData->clusterId); } -static char* readFile(TdFilePtr pFile, int64_t *offset, bool* isEnd){ +static char* readFileByLine(TdFilePtr pFile, int64_t *offset, bool* isEnd){ if(taosLSeekFile(pFile, *offset, SEEK_SET) < 0){ uError("failed to seek file:%p code: %d", pFile, errno); return NULL; @@ -446,7 +446,7 @@ static int32_t sendSlowLog(int64_t clusterId, char* data, TdFilePtr pFile, int64 static void monitorSendSlowLogAtBeginning(int64_t clusterId, char* fileName, TdFilePtr pFile, int64_t offset, void* pTransporter, SEpSet *epSet){ bool isEnd = false; - char* data = readFile(pFile, &offset, &isEnd); + char* data = readFileByLine(pFile, &offset, &isEnd); if(isEnd){ taosFtruncateFile(pFile, 0); taosUnLockFile(pFile); @@ -465,7 +465,7 @@ static void monitorSendSlowLogAtRunning(int64_t clusterId){ void* tmp = taosHashGet(monitorSlowLogHash, &clusterId, LONG_BYTES); SlowLogClient* pClient = (*(SlowLogClient**)tmp); bool isEnd = false; - char* data = readFile(pClient->pFile, &pClient->offset, &isEnd); + char* data = readFileByLine(pClient->pFile, &pClient->offset, &isEnd); if(isEnd){ if(taosFtruncateFile(pClient->pFile, 0) < 0){ uError("failed to truncate file:%p code: %d", pClient->pFile, errno); @@ -488,7 +488,7 @@ static bool monitorSendSlowLogAtQuit(int64_t clusterId) { SlowLogClient* pClient = (*(SlowLogClient**)tmp); bool isEnd = false; - char* data = readFile(pClient->pFile, &pClient->offset, &isEnd); + char* data = readFileByLine(pClient->pFile, &pClient->offset, &isEnd); if(isEnd){ taosUnLockFile(pClient->pFile); taosCloseFile(&(pClient->pFile)); @@ -517,8 +517,7 @@ static void monitorSendAllSlowLogAtQuit(){ SEpSet ep = getEpSet_s(&pInst->mgmtEp); bool isEnd = false; int64_t offset = 0; - char* data = readFile(pClient->pFile, &offset, &isEnd); - + char* data = readFileByLine(pClient->pFile, &offset, &isEnd); if(data != NULL && sendSlowLog(*clusterId, data, NULL, offset, SLOW_LOG_READ_QUIT, NULL, pInst->pTransporter, &ep) == 0){ quitCnt ++; } @@ -539,7 +538,7 @@ static void monitorSendAllSlowLog(){ SEpSet ep = getEpSet_s(&pInst->mgmtEp); bool isEnd = false; int64_t offset = 0; - char* data = readFile(pClient->pFile, &offset, &isEnd); + char* data = readFileByLine(pClient->pFile, &offset, &isEnd); if(data != NULL){ sendSlowLog(*clusterId, data, NULL, offset, SLOW_LOG_READ_RUNNING, NULL, pInst->pTransporter, &ep); } @@ -587,7 +586,7 @@ static void monitorSendAllSlowLogFromTempDir(int64_t clusterId){ char filename[PATH_MAX] = {0}; snprintf(filename, sizeof(filename), "%s%s", tmpPath, name); - TdFilePtr pFile = taosOpenFile(filename, TD_FILE_READ); + TdFilePtr pFile = taosOpenFile(filename, TD_FILE_READ | TD_FILE_STREAM | TD_FILE_TRUNC); if (pFile == NULL) { uError("failed to open file:%s since %s", filename, terrstr()); continue; From 65696ce97fe07bd8b166c4217c27f239a683f8cf Mon Sep 17 00:00:00 2001 From: wangmm0220 Date: Tue, 2 Jul 2024 14:52:20 +0800 Subject: [PATCH 10/27] fix:[TS-4921]refactor reporting logic for slow log --- include/libs/monitor/clientMonitor.h | 2 +- source/client/src/clientMonitor.c | 174 ++++++++++++--------- source/client/test/clientMonitorTests.cpp | 175 +++++++++++++++------- 3 files changed, 227 insertions(+), 124 deletions(-) diff --git a/include/libs/monitor/clientMonitor.h b/include/libs/monitor/clientMonitor.h index 5e89c07b2b..aa6ee6c9fc 100644 --- a/include/libs/monitor/clientMonitor.h +++ b/include/libs/monitor/clientMonitor.h @@ -38,7 +38,7 @@ typedef enum { SLOW_LOG_READ_QUIT = 2, } SLOW_LOG_QUEUE_TYPE; -#define SLOW_LOG_SEND_SIZE 32*1024 +#define SLOW_LOG_SEND_SIZE_MAX 128*1024*1024 typedef struct { int64_t clusterId; diff --git a/source/client/src/clientMonitor.c b/source/client/src/clientMonitor.c index b30224c510..94b1fa9388 100644 --- a/source/client/src/clientMonitor.c +++ b/source/client/src/clientMonitor.c @@ -187,19 +187,19 @@ static void reportSendProcess(void* param, void* tmrId) { } static void sendAllCounter(){ - MonitorClient** ppMonitor = (MonitorClient**)taosHashIterate(monitorCounterHash, NULL); - while (ppMonitor != NULL) { + MonitorClient** ppMonitor = NULL; + while ((ppMonitor = taosHashIterate(monitorSlowLogHash, ppMonitor))) { MonitorClient* pMonitor = *ppMonitor; - if (pMonitor != NULL){ - SAppInstInfo* pInst = getAppInstByClusterId(pMonitor->clusterId); - if(pInst == NULL){ - taosHashCancelIterate(monitorCounterHash, ppMonitor); - break; - } - SEpSet ep = getEpSet_s(&pInst->mgmtEp); - generateClusterReport(pMonitor->registry, pInst->pTransporter, &ep); + if (pMonitor == NULL){ + continue; } - ppMonitor = taosHashIterate(monitorCounterHash, ppMonitor); + SAppInstInfo* pInst = getAppInstByClusterId(pMonitor->clusterId); + if(pInst == NULL){ + taosHashCancelIterate(monitorCounterHash, ppMonitor); + break; + } + SEpSet ep = getEpSet_s(&pInst->mgmtEp); + generateClusterReport(pMonitor->registry, pInst->pTransporter, &ep); } } @@ -330,7 +330,7 @@ static void monitorWriteSlowLog2File(MonitorSlowLogData* slowLogData, char *tmpP } taosGetTmpfilePath(tmpPath, clusterId, path); uInfo("[monitor] create slow log file:%s", path); - pFile = taosOpenFile(path, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_APPEND | TD_FILE_READ | TD_FILE_TRUNC | TD_FILE_STREAM); + pFile = taosOpenFile(path, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_APPEND | TD_FILE_READ | TD_FILE_TRUNC); if (pFile == NULL) { terrno = TAOS_SYSTEM_ERROR(errno); uError("failed to open file:%s since %s", path, terrstr()); @@ -378,63 +378,70 @@ static void monitorWriteSlowLog2File(MonitorSlowLogData* slowLogData, char *tmpP uDebug("[monitor] write slow log to file:%p, clusterId:%"PRIx64, pFile, slowLogData->clusterId); } -static char* readFileByLine(TdFilePtr pFile, int64_t *offset, bool* isEnd){ +static char* readFile(TdFilePtr pFile, int64_t *offset, int64_t size){ if(taosLSeekFile(pFile, *offset, SEEK_SET) < 0){ uError("failed to seek file:%p code: %d", pFile, errno); return NULL; } + ASSERT(size > *offset); + char* pCont = NULL; int64_t totalSize = 0; - char* pCont = taosMemoryCalloc(1, SLOW_LOG_SEND_SIZE); + if (size - *offset >= SLOW_LOG_SEND_SIZE_MAX) { + pCont = taosMemoryCalloc(1, 2 * SLOW_LOG_SEND_SIZE_MAX); + totalSize = 2 * SLOW_LOG_SEND_SIZE_MAX; + }else{ + pCont = taosMemoryCalloc(1, 2 * (size - *offset)); + totalSize = 2 * (size - *offset); + } + if(pCont == NULL){ + uError("failed to allocate memory for slow log, size:%" PRId64, totalSize); return NULL; } - strcat(pCont, "["); - - while(1) { - char* pLine = NULL; - int64_t readLen = taosGetLineFile(pFile, &pLine); - - if(totalSize + readLen >= SLOW_LOG_SEND_SIZE){ - break; + char* buf = pCont; + strcat(buf++, "["); + int64_t readSize = taosReadFile(pFile, buf, SLOW_LOG_SEND_SIZE_MAX); + if (readSize <= 0) { + if (readSize < 0){ + uError("failed to read len from file:%p since %s", pFile, terrstr()); } - if (readLen <= 0) { - if (readLen < 0) { - uError("failed to read len from file:%p since %s", pFile, terrstr()); - }else if(totalSize == 0){ - *isEnd = true; - } - break; - } - - if (totalSize != 0) strcat(pCont, ","); - strcat(pCont, pLine); - totalSize += readLen; - } - strcat(pCont, "]"); - uDebug("[monitor] monitorReadSendSlowLog slow log:%s", pCont); - *offset += totalSize; - if(*isEnd){ taosMemoryFree(pCont); return NULL; } + + totalSize = 0; + while(1){ + size_t len = strlen(buf); + totalSize += (len+1); + if (totalSize > readSize || len == 0) { + *(buf-1) = ']'; + *buf = '\0'; + break; + } + buf[len] = ','; // replace '\0' with ',' + buf += (len + 1); + *offset += (len+1); + } + + uDebug("[monitor] monitorReadSendSlowLog slow log:%s", pCont); return pCont; } -static bool isFileEmpty(char* path){ - int64_t filesize = 0; - if (taosStatFile(path, &filesize, NULL, NULL) < 0) { - return false; +static int64_t getFileSize(char* path){ + int64_t fileSize = 0; + if (taosStatFile(path, &fileSize, NULL, NULL) < 0) { + return -1; } - if (filesize == 0) { - return true; - } - return false; + return fileSize; } static int32_t sendSlowLog(int64_t clusterId, char* data, TdFilePtr pFile, int64_t offset, SLOW_LOG_QUEUE_TYPE type, char* fileName, void* pTransporter, SEpSet *epSet){ MonitorSlowLogData* pParam = taosMemoryMalloc(sizeof(MonitorSlowLogData)); + if(pParam == NULL){ + return -1; + } pParam->data = data; pParam->offset = offset; pParam->clusterId = clusterId; @@ -445,15 +452,15 @@ static int32_t sendSlowLog(int64_t clusterId, char* data, TdFilePtr pFile, int64 } static void monitorSendSlowLogAtBeginning(int64_t clusterId, char* fileName, TdFilePtr pFile, int64_t offset, void* pTransporter, SEpSet *epSet){ - bool isEnd = false; - char* data = readFileByLine(pFile, &offset, &isEnd); - if(isEnd){ + int64_t size = getFileSize(fileName); + if(size <= offset){ taosFtruncateFile(pFile, 0); taosUnLockFile(pFile); taosCloseFile(&pFile); taosRemoveFile(fileName); uDebug("[monitor] monitorSendSlowLogAtBeginning delete file:%s", fileName); }else{ + char* data = readFile(pFile, &offset, size); if(data != NULL){ sendSlowLog(clusterId, data, pFile, offset, SLOW_LOG_READ_BEGINNIG, taosStrdup(fileName), pTransporter, epSet); } @@ -463,46 +470,61 @@ static void monitorSendSlowLogAtBeginning(int64_t clusterId, char* fileName, TdF static void monitorSendSlowLogAtRunning(int64_t clusterId){ void* tmp = taosHashGet(monitorSlowLogHash, &clusterId, LONG_BYTES); + if (tmp == NULL){ + return; + } SlowLogClient* pClient = (*(SlowLogClient**)tmp); - bool isEnd = false; - char* data = readFileByLine(pClient->pFile, &pClient->offset, &isEnd); - if(isEnd){ + if (pClient == NULL){ + return; + } + int64_t size = getFileSize(pClient->path); + if(size <= pClient->offset){ if(taosFtruncateFile(pClient->pFile, 0) < 0){ uError("failed to truncate file:%p code: %d", pClient->pFile, errno); } pClient->offset = 0; - }else if(data != NULL){ + }else{ SAppInstInfo* pInst = getAppInstByClusterId(clusterId); if(pInst == NULL){ uError("failed to get app instance by clusterId:%" PRId64, clusterId); return; } SEpSet ep = getEpSet_s(&pInst->mgmtEp); - sendSlowLog(clusterId, data, pClient->pFile, pClient->offset, SLOW_LOG_READ_RUNNING, NULL, pInst->pTransporter, &ep); + char* data = readFile(pClient->pFile, &pClient->offset, size); + if(data != NULL){ + sendSlowLog(clusterId, data, pClient->pFile, pClient->offset, SLOW_LOG_READ_RUNNING, NULL, pInst->pTransporter, &ep); + } uDebug("[monitor] monitorReadSendSlowLog send slow log:%s", data); } } static bool monitorSendSlowLogAtQuit(int64_t clusterId) { void* tmp = taosHashGet(monitorSlowLogHash, &clusterId, LONG_BYTES); + if (tmp == NULL){ + return true; + } SlowLogClient* pClient = (*(SlowLogClient**)tmp); - - bool isEnd = false; - char* data = readFileByLine(pClient->pFile, &pClient->offset, &isEnd); - if(isEnd){ + if (pClient == NULL){ + return true; + } + int64_t size = getFileSize(pClient->path); + if(size <= pClient->offset){ taosUnLockFile(pClient->pFile); taosCloseFile(&(pClient->pFile)); taosRemoveFile(pClient->path); if((--quitCnt) == 0){ return true; } - }else if(data != NULL){ + }else{ SAppInstInfo* pInst = getAppInstByClusterId(clusterId); if(pInst == NULL) { return true; } SEpSet ep = getEpSet_s(&pInst->mgmtEp); - sendSlowLog(clusterId, data, pClient->pFile, pClient->offset, SLOW_LOG_READ_QUIT, NULL, pInst->pTransporter, &ep); + char* data = readFile(pClient->pFile, &pClient->offset, size); + if(data != NULL){ + sendSlowLog(clusterId, data, pClient->pFile, pClient->offset, SLOW_LOG_READ_QUIT, NULL, pInst->pTransporter, &ep); + } uDebug("[monitor] monitorReadSendSlowLog send slow log:%s", data); } return false; @@ -510,14 +532,21 @@ static bool monitorSendSlowLogAtQuit(int64_t clusterId) { static void monitorSendAllSlowLogAtQuit(){ void* pIter = NULL; while ((pIter = taosHashIterate(monitorSlowLogHash, pIter))) { + SlowLogClient* pClient = (*(SlowLogClient**)pIter); + if(pClient == NULL) { + taosHashCancelIterate(monitorSlowLogHash, pIter); + return; + } int64_t* clusterId = (int64_t*)taosHashGetKey(pIter, NULL); SAppInstInfo* pInst = getAppInstByClusterId(*clusterId); - if(pInst == NULL) return; - SlowLogClient* pClient = (*(SlowLogClient**)pIter); + if(pInst == NULL) { + taosHashCancelIterate(monitorSlowLogHash, pIter); + return; + } SEpSet ep = getEpSet_s(&pInst->mgmtEp); - bool isEnd = false; int64_t offset = 0; - char* data = readFileByLine(pClient->pFile, &offset, &isEnd); + int64_t size = getFileSize(pClient->path); + char* data = readFile(pClient->pFile, &offset, size); if(data != NULL && sendSlowLog(*clusterId, data, NULL, offset, SLOW_LOG_READ_QUIT, NULL, pInst->pTransporter, &ep) == 0){ quitCnt ++; } @@ -532,13 +561,20 @@ static void monitorSendAllSlowLog(){ int64_t* clusterId = (int64_t*)taosHashGetKey(pIter, NULL); SAppInstInfo* pInst = getAppInstByClusterId(*clusterId); SlowLogClient* pClient = (*(SlowLogClient**)pIter); + if (pClient == NULL){ + taosHashCancelIterate(monitorSlowLogHash, pIter); + return; + } if (pInst != NULL && t - pClient->lastCheckTime > pInst->monitorParas.tsMonitorInterval * 1000 && - pClient->offset == 0 && !isFileEmpty(pClient->path)) { + pClient->offset == 0) { + int64_t size = getFileSize(pClient->path); + if(size <= 0){ + continue; + } pClient->lastCheckTime = t; SEpSet ep = getEpSet_s(&pInst->mgmtEp); - bool isEnd = false; int64_t offset = 0; - char* data = readFileByLine(pClient->pFile, &offset, &isEnd); + char* data = readFile(pClient->pFile, &offset, size); if(data != NULL){ sendSlowLog(*clusterId, data, NULL, offset, SLOW_LOG_READ_RUNNING, NULL, pInst->pTransporter, &ep); } @@ -586,7 +622,7 @@ static void monitorSendAllSlowLogFromTempDir(int64_t clusterId){ char filename[PATH_MAX] = {0}; snprintf(filename, sizeof(filename), "%s%s", tmpPath, name); - TdFilePtr pFile = taosOpenFile(filename, TD_FILE_READ | TD_FILE_STREAM | TD_FILE_TRUNC); + TdFilePtr pFile = taosOpenFile(filename, TD_FILE_READ | TD_FILE_TRUNC); if (pFile == NULL) { uError("failed to open file:%s since %s", filename, terrstr()); continue; diff --git a/source/client/test/clientMonitorTests.cpp b/source/client/test/clientMonitorTests.cpp index 2d3ce87f38..6285be5a4c 100644 --- a/source/client/test/clientMonitorTests.cpp +++ b/source/client/test/clientMonitorTests.cpp @@ -64,6 +64,65 @@ int main(int argc, char** argv) { // clusterMonitorClose(cluster2); //} +static char* readFile(TdFilePtr pFile, int64_t *offset, int64_t size){ + if(taosLSeekFile(pFile, *offset, SEEK_SET) < 0){ + uError("failed to seek file:%p code: %d", pFile, errno); + return NULL; + } + + ASSERT(size > *offset); + char* pCont = NULL; + int64_t totalSize = 0; + if (size - *offset >= SLOW_LOG_SEND_SIZE_MAX) { + pCont = (char*)taosMemoryCalloc(1, 2 * SLOW_LOG_SEND_SIZE_MAX); + totalSize = 2 * SLOW_LOG_SEND_SIZE_MAX; + }else{ + pCont = (char*)taosMemoryCalloc(1, 2 * (size - *offset)); + totalSize = 2 * (size - *offset); + } + + if(pCont == NULL){ + uError("failed to allocate memory for slow log, size:%" PRId64, totalSize); + return NULL; + } + char* buf = pCont; + strcat(buf++, "["); + int64_t readSize = taosReadFile(pFile, buf, SLOW_LOG_SEND_SIZE_MAX); + if (readSize <= 0) { + if (readSize < 0){ + uError("failed to read len from file:%p since %s", pFile, terrstr()); + } + taosMemoryFree(pCont); + return NULL; + } + + totalSize = 0; + while(1){ + size_t len = strlen(buf); + totalSize += (len+1); + if (totalSize > readSize || len == 0) { + *(buf-1) = ']'; + *buf = '\0'; + break; + } + buf[len] = ','; // replace '\0' with ',' + buf += (len + 1); + *offset += (len+1); + } + + uDebug("[monitor] monitorReadSendSlowLog slow log:%s", pCont); + return pCont; +} + +static int64_t getFileSize(char* path){ + int64_t fileSize = 0; + if (taosStatFile(path, &fileSize, NULL, NULL) < 0) { + return -1; + } + + return fileSize; +} + TEST(clientMonitorTest, sendTest) { TAOS* taos = taos_connect("127.0.0.1", "root", "taosdata", NULL, 0); ASSERT_TRUE(taos != NULL); @@ -91,8 +150,8 @@ TEST(clientMonitorTest, ReadOneFile) { } const int batch = 10; - const int size = SLOW_LOG_SEND_SIZE/batch; - for(int i = 0; i < batch + 1; i++){ + const int size = 10; + for(int i = 0; i < batch; i++){ char value[size] = {0}; memset(value, '0' + i, size - 1); if (taosWriteFile(pFile, value, strlen(value) + 1) < 0){ @@ -106,64 +165,72 @@ TEST(clientMonitorTest, ReadOneFile) { // Create an SEpSet object and set it up for testing SEpSet* epSet = NULL; + int64_t fileSize = getFileSize("./tdengine-1-wewe"); // Call the function to be tested -// monitorReadSendSlowLog(pFile, (int64_t)pTransporter, epSet); - - char value[size] = {0}; - memset(value, '0', size - 1); - if (taosWriteFile(pFile, value, strlen(value) + 1) < 0){ - uError("failed to write len to file:%p since %s", pFile, terrstr()); + int64_t offset = 0; + while(1){ + if (offset >= fileSize) { + break; + } + char* val = readFile(pFile, &offset, fileSize); + printf("offset:%" PRId64",fileSize:%"PRId64",val:%s\n", offset, fileSize, val); } +// char value[size] = {0}; +// memset(value, '0', size - 1); +// if (taosWriteFile(pFile, value, strlen(value) + 1) < 0){ +// uError("failed to write len to file:%p since %s", pFile, terrstr()); +// } + // monitorReadSendSlowLog(pFile, (int64_t)pTransporter, epSet); // Clean up any resources created for testing taosCloseFile(&pFile); } -TEST(clientMonitorTest, ReadTwoFile) { - // Create a TdFilePtr object and set it up for testing - - TdFilePtr pFile = taosOpenFile("/tmp/tdengine_slow_log/tdengine-1-wewe", TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_APPEND | TD_FILE_READ | TD_FILE_TRUNC); - if (pFile == NULL) { - uError("failed to open file:./test.txt since %s", terrstr()); - return; - } - - const int batch = 10; - const int size = SLOW_LOG_SEND_SIZE/batch; - for(int i = 0; i < batch + 1; i++){ - char value[size] = {0}; - memset(value, '0' + i, size - 1); - if (taosWriteFile(pFile, value, strlen(value) + 1) < 0){ - uError("failed to write len to file:%p since %s", pFile, terrstr()); - } - } - - taosFsyncFile(pFile); - taosCloseFile(&pFile); - - pFile = taosOpenFile("/tmp/tdengine_slow_log/tdengine-2-wewe", TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_APPEND | TD_FILE_READ | TD_FILE_TRUNC); - if (pFile == NULL) { - uError("failed to open file:./test.txt since %s", terrstr()); - return; - } - - for(int i = 0; i < batch + 1; i++){ - char value[size] = {0}; - memset(value, '0' + i, size - 1); - if (taosWriteFile(pFile, value, strlen(value) + 1) < 0){ - uError("failed to write len to file:%p since %s", pFile, terrstr()); - } - } - - taosFsyncFile(pFile); - taosCloseFile(&pFile); - - SAppInstInfo pAppInfo = {0}; - pAppInfo.clusterId = 2; - pAppInfo.monitorParas.tsEnableMonitor = 1; - strcpy(tsTempDir,"/tmp"); -// monitorSendAllSlowLogFromTempDir(&pAppInfo); - -} \ No newline at end of file +//TEST(clientMonitorTest, ReadTwoFile) { +// // Create a TdFilePtr object and set it up for testing +// +// TdFilePtr pFile = taosOpenFile("/tmp/tdengine_slow_log/tdengine-1-wewe", TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_APPEND | TD_FILE_READ | TD_FILE_TRUNC); +// if (pFile == NULL) { +// uError("failed to open file:./test.txt since %s", terrstr()); +// return; +// } +// +// const int batch = 10; +// const int size = SLOW_LOG_SEND_SIZE/batch; +// for(int i = 0; i < batch + 1; i++){ +// char value[size] = {0}; +// memset(value, '0' + i, size - 1); +// if (taosWriteFile(pFile, value, strlen(value) + 1) < 0){ +// uError("failed to write len to file:%p since %s", pFile, terrstr()); +// } +// } +// +// taosFsyncFile(pFile); +// taosCloseFile(&pFile); +// +// pFile = taosOpenFile("/tmp/tdengine_slow_log/tdengine-2-wewe", TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_APPEND | TD_FILE_READ | TD_FILE_TRUNC); +// if (pFile == NULL) { +// uError("failed to open file:./test.txt since %s", terrstr()); +// return; +// } +// +// for(int i = 0; i < batch + 1; i++){ +// char value[size] = {0}; +// memset(value, '0' + i, size - 1); +// if (taosWriteFile(pFile, value, strlen(value) + 1) < 0){ +// uError("failed to write len to file:%p since %s", pFile, terrstr()); +// } +// } +// +// taosFsyncFile(pFile); +// taosCloseFile(&pFile); +// +// SAppInstInfo pAppInfo = {0}; +// pAppInfo.clusterId = 2; +// pAppInfo.monitorParas.tsEnableMonitor = 1; +// strcpy(tsTempDir,"/tmp"); +//// monitorSendAllSlowLogFromTempDir(&pAppInfo); +// +//} \ No newline at end of file From 595c3f8e0773e81d822382977df09e10ae7ce296 Mon Sep 17 00:00:00 2001 From: wangmm0220 Date: Tue, 2 Jul 2024 15:07:29 +0800 Subject: [PATCH 11/27] fix:[TS-4921]refactor reporting logic for slow log --- source/client/src/clientMonitor.c | 32 ++++++++++++++++++------------- 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/source/client/src/clientMonitor.c b/source/client/src/clientMonitor.c index 94b1fa9388..b7e2efaf1a 100644 --- a/source/client/src/clientMonitor.c +++ b/source/client/src/clientMonitor.c @@ -529,7 +529,7 @@ static bool monitorSendSlowLogAtQuit(int64_t clusterId) { } return false; } -static void monitorSendAllSlowLogAtQuit(){ +static void monitorSendAllSlowLogAtQuit(){ void* pIter = NULL; while ((pIter = taosHashIterate(monitorSlowLogHash, pIter))) { SlowLogClient* pClient = (*(SlowLogClient**)pIter); @@ -537,20 +537,26 @@ static void monitorSendAllSlowLogAtQuit(){ taosHashCancelIterate(monitorSlowLogHash, pIter); return; } - int64_t* clusterId = (int64_t*)taosHashGetKey(pIter, NULL); - SAppInstInfo* pInst = getAppInstByClusterId(*clusterId); - if(pInst == NULL) { - taosHashCancelIterate(monitorSlowLogHash, pIter); - return; - } - SEpSet ep = getEpSet_s(&pInst->mgmtEp); - int64_t offset = 0; int64_t size = getFileSize(pClient->path); - char* data = readFile(pClient->pFile, &offset, size); - if(data != NULL && sendSlowLog(*clusterId, data, NULL, offset, SLOW_LOG_READ_QUIT, NULL, pInst->pTransporter, &ep) == 0){ - quitCnt ++; + if(size <= pClient->offset){ + taosUnLockFile(pClient->pFile); + taosCloseFile(&(pClient->pFile)); + taosRemoveFile(pClient->path); + }else{ + int64_t* clusterId = (int64_t*)taosHashGetKey(pIter, NULL); + SAppInstInfo* pInst = getAppInstByClusterId(*clusterId); + if(pInst == NULL) { + taosHashCancelIterate(monitorSlowLogHash, pIter); + return; + } + SEpSet ep = getEpSet_s(&pInst->mgmtEp); + int64_t offset = 0; + char* data = readFile(pClient->pFile, &offset, size); + if(data != NULL && sendSlowLog(*clusterId, data, NULL, offset, SLOW_LOG_READ_QUIT, NULL, pInst->pTransporter, &ep) == 0){ + quitCnt ++; + } + uDebug("[monitor] monitorSendAllSlowLogAtQuit send slow log :%s", data); } - uDebug("[monitor] monitorSendAllSlowLogAtQuit send slow log :%s", data); } } From dd45eb999a7fccf2518329624cee95a9d6cb910e Mon Sep 17 00:00:00 2001 From: wangmm0220 Date: Tue, 2 Jul 2024 15:39:08 +0800 Subject: [PATCH 12/27] fix:[TS-4921]refactor reporting logic for slow log --- source/client/src/clientMonitor.c | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/source/client/src/clientMonitor.c b/source/client/src/clientMonitor.c index b7e2efaf1a..2271a2dfbd 100644 --- a/source/client/src/clientMonitor.c +++ b/source/client/src/clientMonitor.c @@ -102,6 +102,8 @@ static int32_t monitorReportAsyncCB(void* param, SDataBuf* pMsg, int32_t code) { if(monitorPutData2MonitorQueue(*p) == 0){ p->fileName = NULL; } + monitorFreeSlowLogData(p); + taosMemoryFree(p); } return code; } @@ -114,15 +116,13 @@ static int32_t sendReport(void* pTransporter, SEpSet *epSet, char* pCont, MONITO int tlen = tSerializeSStatisReq(NULL, 0, &sStatisReq); if (tlen < 0) { - monitorFreeSlowLogData(param); - return -1; + goto FAILED; } void* buf = taosMemoryMalloc(tlen); if (buf == NULL) { uError("sendReport failed, out of memory, len:%d", tlen); terrno = TSDB_CODE_OUT_OF_MEMORY; - monitorFreeSlowLogData(param); - return -1; + goto FAILED; } tSerializeSStatisReq(buf, tlen, &sStatisReq); @@ -130,25 +130,28 @@ static int32_t sendReport(void* pTransporter, SEpSet *epSet, char* pCont, MONITO if (pInfo == NULL) { uError("sendReport failed, out of memory send info"); terrno = TSDB_CODE_OUT_OF_MEMORY; - monitorFreeSlowLogData(param); taosMemoryFree(buf); - return -1; + goto FAILED; } pInfo->fp = monitorReportAsyncCB; pInfo->msgInfo.pData = buf; pInfo->msgInfo.len = tlen; pInfo->msgType = TDMT_MND_STATIS; pInfo->param = param; - pInfo->paramFreeFp = monitorFreeSlowLogData; pInfo->requestId = tGenIdPI64(); pInfo->requestObjRefId = 0; int64_t transporterId = 0; int32_t code = asyncSendMsgToServer(pTransporter, epSet, &transporterId, pInfo); - if (code != TSDB_CODE_SUCCESS) { - uError("sendReport failed, code:%d", code); + if (code == TSDB_CODE_SUCCESS) { + return code; } - return code; + +FAILED: + uError("sendReport failed, code:%d", code); + monitorFreeSlowLogData(param); + taosMemoryFree(param); + return -1; } static void generateClusterReport(taos_collector_registry_t* registry, void* pTransporter, SEpSet *epSet) { @@ -464,6 +467,7 @@ static void monitorSendSlowLogAtBeginning(int64_t clusterId, char* fileName, TdF if(data != NULL){ sendSlowLog(clusterId, data, pFile, offset, SLOW_LOG_READ_BEGINNIG, taosStrdup(fileName), pTransporter, epSet); } + taosMemoryFree(data); uDebug("[monitor] monitorSendSlowLogAtBeginning send slow log file:%p", pFile); } } @@ -494,6 +498,7 @@ static void monitorSendSlowLogAtRunning(int64_t clusterId){ if(data != NULL){ sendSlowLog(clusterId, data, pClient->pFile, pClient->offset, SLOW_LOG_READ_RUNNING, NULL, pInst->pTransporter, &ep); } + taosMemoryFree(data); uDebug("[monitor] monitorReadSendSlowLog send slow log:%s", data); } } @@ -525,6 +530,7 @@ static bool monitorSendSlowLogAtQuit(int64_t clusterId) { if(data != NULL){ sendSlowLog(clusterId, data, pClient->pFile, pClient->offset, SLOW_LOG_READ_QUIT, NULL, pInst->pTransporter, &ep); } + taosMemoryFree(data); uDebug("[monitor] monitorReadSendSlowLog send slow log:%s", data); } return false; @@ -555,6 +561,7 @@ static void monitorSendAllSlowLogAtQuit(){ if(data != NULL && sendSlowLog(*clusterId, data, NULL, offset, SLOW_LOG_READ_QUIT, NULL, pInst->pTransporter, &ep) == 0){ quitCnt ++; } + taosMemoryFree(data); uDebug("[monitor] monitorSendAllSlowLogAtQuit send slow log :%s", data); } } @@ -584,6 +591,7 @@ static void monitorSendAllSlowLog(){ if(data != NULL){ sendSlowLog(*clusterId, data, NULL, offset, SLOW_LOG_READ_RUNNING, NULL, pInst->pTransporter, &ep); } + taosMemoryFree(data); uDebug("[monitor] monitorSendAllSlowLog send slow log :%s", data); } } From a410f4bb0589c509057b8428b56d17c4438934c3 Mon Sep 17 00:00:00 2001 From: wangmm0220 Date: Tue, 2 Jul 2024 15:42:06 +0800 Subject: [PATCH 13/27] fix:[TS-4921]refactor reporting logic for slow log --- source/client/src/clientMonitor.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/source/client/src/clientMonitor.c b/source/client/src/clientMonitor.c index 2271a2dfbd..5fb1d25ba4 100644 --- a/source/client/src/clientMonitor.c +++ b/source/client/src/clientMonitor.c @@ -467,8 +467,8 @@ static void monitorSendSlowLogAtBeginning(int64_t clusterId, char* fileName, TdF if(data != NULL){ sendSlowLog(clusterId, data, pFile, offset, SLOW_LOG_READ_BEGINNIG, taosStrdup(fileName), pTransporter, epSet); } + uDebug("[monitor] monitorSendSlowLogAtBeginning send slow log file:%p, data:%s", pFile, data); taosMemoryFree(data); - uDebug("[monitor] monitorSendSlowLogAtBeginning send slow log file:%p", pFile); } } @@ -498,8 +498,8 @@ static void monitorSendSlowLogAtRunning(int64_t clusterId){ if(data != NULL){ sendSlowLog(clusterId, data, pClient->pFile, pClient->offset, SLOW_LOG_READ_RUNNING, NULL, pInst->pTransporter, &ep); } - taosMemoryFree(data); uDebug("[monitor] monitorReadSendSlowLog send slow log:%s", data); + taosMemoryFree(data); } } @@ -530,8 +530,8 @@ static bool monitorSendSlowLogAtQuit(int64_t clusterId) { if(data != NULL){ sendSlowLog(clusterId, data, pClient->pFile, pClient->offset, SLOW_LOG_READ_QUIT, NULL, pInst->pTransporter, &ep); } - taosMemoryFree(data); uDebug("[monitor] monitorReadSendSlowLog send slow log:%s", data); + taosMemoryFree(data); } return false; } @@ -561,8 +561,8 @@ static void monitorSendAllSlowLogAtQuit(){ if(data != NULL && sendSlowLog(*clusterId, data, NULL, offset, SLOW_LOG_READ_QUIT, NULL, pInst->pTransporter, &ep) == 0){ quitCnt ++; } - taosMemoryFree(data); uDebug("[monitor] monitorSendAllSlowLogAtQuit send slow log :%s", data); + taosMemoryFree(data); } } } @@ -591,8 +591,8 @@ static void monitorSendAllSlowLog(){ if(data != NULL){ sendSlowLog(*clusterId, data, NULL, offset, SLOW_LOG_READ_RUNNING, NULL, pInst->pTransporter, &ep); } - taosMemoryFree(data); uDebug("[monitor] monitorSendAllSlowLog send slow log :%s", data); + taosMemoryFree(data); } } } From d3fcab5c0d1ebcf7edd21ecb22839421868d5448 Mon Sep 17 00:00:00 2001 From: wangmm0220 Date: Tue, 2 Jul 2024 16:19:02 +0800 Subject: [PATCH 14/27] fix:[TS-4921]refactor reporting logic for slow log --- source/client/src/clientMonitor.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/source/client/src/clientMonitor.c b/source/client/src/clientMonitor.c index 5fb1d25ba4..f9a9b12bea 100644 --- a/source/client/src/clientMonitor.c +++ b/source/client/src/clientMonitor.c @@ -427,7 +427,7 @@ static char* readFile(TdFilePtr pFile, int64_t *offset, int64_t size){ *offset += (len+1); } - uDebug("[monitor] monitorReadSendSlowLog slow log:%s", pCont); + uDebug("[monitor] readFile slow log:%s", pCont); return pCont; } @@ -693,6 +693,9 @@ static void* monitorThreadFunc(void *param){ if (slowLogFlag > 0) { if(quitCnt == 0){ monitorSendAllSlowLogAtQuit(); + if(quitCnt == 0){ + break; + } quitTime = taosGetMonoTimestampMs(); } if(taosGetMonoTimestampMs() - quitTime > 500){ //quit at most 500ms @@ -726,7 +729,9 @@ static void* monitorThreadFunc(void *param){ monitorFreeSlowLogData(slowLogData); taosFreeQitem(slowLogData); - monitorSendAllSlowLog(); + if (quitCnt == 0) { + monitorSendAllSlowLog(); + } tsem2_timewait(&monitorSem, 100); } From a21aed7084c75c1b9dc35594acf3fb8ca14dd8b7 Mon Sep 17 00:00:00 2001 From: wangmm0220 Date: Tue, 2 Jul 2024 16:23:55 +0800 Subject: [PATCH 15/27] fix:[TS-4921]refactor reporting logic for slow log --- source/client/src/clientMonitor.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/source/client/src/clientMonitor.c b/source/client/src/clientMonitor.c index f9a9b12bea..2e2036479e 100644 --- a/source/client/src/clientMonitor.c +++ b/source/client/src/clientMonitor.c @@ -498,7 +498,7 @@ static void monitorSendSlowLogAtRunning(int64_t clusterId){ if(data != NULL){ sendSlowLog(clusterId, data, pClient->pFile, pClient->offset, SLOW_LOG_READ_RUNNING, NULL, pInst->pTransporter, &ep); } - uDebug("[monitor] monitorReadSendSlowLog send slow log:%s", data); + uDebug("[monitor] monitorSendSlowLogAtRunning send slow log:%s", data); taosMemoryFree(data); } } @@ -530,7 +530,7 @@ static bool monitorSendSlowLogAtQuit(int64_t clusterId) { if(data != NULL){ sendSlowLog(clusterId, data, pClient->pFile, pClient->offset, SLOW_LOG_READ_QUIT, NULL, pInst->pTransporter, &ep); } - uDebug("[monitor] monitorReadSendSlowLog send slow log:%s", data); + uDebug("[monitor] monitorSendSlowLogAtQuit send slow log:%s", data); taosMemoryFree(data); } return false; From 08245d6eb2bfe8c77d428ed9b631ff7efec10a77 Mon Sep 17 00:00:00 2001 From: wangmm0220 Date: Tue, 2 Jul 2024 16:59:39 +0800 Subject: [PATCH 16/27] fix:[TS-4921]refactor reporting logic for slow log --- source/client/src/clientMonitor.c | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/source/client/src/clientMonitor.c b/source/client/src/clientMonitor.c index 2e2036479e..01cf923a55 100644 --- a/source/client/src/clientMonitor.c +++ b/source/client/src/clientMonitor.c @@ -486,6 +486,7 @@ static void monitorSendSlowLogAtRunning(int64_t clusterId){ if(taosFtruncateFile(pClient->pFile, 0) < 0){ uError("failed to truncate file:%p code: %d", pClient->pFile, errno); } + uDebug("[monitor] monitorSendSlowLogAtRunning truncate file to 0 file:%p", pClient->pFile); pClient->offset = 0; }else{ SAppInstInfo* pInst = getAppInstByClusterId(clusterId); @@ -517,6 +518,7 @@ static bool monitorSendSlowLogAtQuit(int64_t clusterId) { taosUnLockFile(pClient->pFile); taosCloseFile(&(pClient->pFile)); taosRemoveFile(pClient->path); + uInfo("[monitor] monitorSendSlowLogAtQuit remove file:%s", pClient->path); if((--quitCnt) == 0){ return true; } @@ -530,7 +532,7 @@ static bool monitorSendSlowLogAtQuit(int64_t clusterId) { if(data != NULL){ sendSlowLog(clusterId, data, pClient->pFile, pClient->offset, SLOW_LOG_READ_QUIT, NULL, pInst->pTransporter, &ep); } - uDebug("[monitor] monitorSendSlowLogAtQuit send slow log:%s", data); + uInfo("[monitor] monitorSendSlowLogAtQuit send slow log:%s", data); taosMemoryFree(data); } return false; @@ -548,7 +550,7 @@ static void monitorSendAllSlowLogAtQuit(){ taosUnLockFile(pClient->pFile); taosCloseFile(&(pClient->pFile)); taosRemoveFile(pClient->path); - }else{ + }else if(pClient->offset == 0){ int64_t* clusterId = (int64_t*)taosHashGetKey(pIter, NULL); SAppInstInfo* pInst = getAppInstByClusterId(*clusterId); if(pInst == NULL) { @@ -556,12 +558,11 @@ static void monitorSendAllSlowLogAtQuit(){ return; } SEpSet ep = getEpSet_s(&pInst->mgmtEp); - int64_t offset = 0; - char* data = readFile(pClient->pFile, &offset, size); - if(data != NULL && sendSlowLog(*clusterId, data, NULL, offset, SLOW_LOG_READ_QUIT, NULL, pInst->pTransporter, &ep) == 0){ + char* data = readFile(pClient->pFile, &pClient->offset, size); + if(data != NULL && sendSlowLog(*clusterId, data, NULL, pClient->offset, SLOW_LOG_READ_QUIT, NULL, pInst->pTransporter, &ep) == 0){ quitCnt ++; } - uDebug("[monitor] monitorSendAllSlowLogAtQuit send slow log :%s", data); + uInfo("[monitor] monitorSendAllSlowLogAtQuit send slow log :%s", data); taosMemoryFree(data); } } @@ -578,18 +579,21 @@ static void monitorSendAllSlowLog(){ taosHashCancelIterate(monitorSlowLogHash, pIter); return; } - if (pInst != NULL && t - pClient->lastCheckTime > pInst->monitorParas.tsMonitorInterval * 1000 && - pClient->offset == 0) { + if (t - pClient->lastCheckTime > pInst->monitorParas.tsMonitorInterval * 1000){ + pClient->lastCheckTime = t; + } else { + continue; + } + + if (pInst != NULL && pClient->offset == 0) { int64_t size = getFileSize(pClient->path); if(size <= 0){ continue; } - pClient->lastCheckTime = t; SEpSet ep = getEpSet_s(&pInst->mgmtEp); - int64_t offset = 0; - char* data = readFile(pClient->pFile, &offset, size); + char* data = readFile(pClient->pFile, &pClient->offset, size); if(data != NULL){ - sendSlowLog(*clusterId, data, NULL, offset, SLOW_LOG_READ_RUNNING, NULL, pInst->pTransporter, &ep); + sendSlowLog(*clusterId, data, NULL, pClient->offset, SLOW_LOG_READ_RUNNING, NULL, pInst->pTransporter, &ep); } uDebug("[monitor] monitorSendAllSlowLog send slow log :%s", data); taosMemoryFree(data); From 3b6cb21771bda7dacf93045dda59c2733c88c4f5 Mon Sep 17 00:00:00 2001 From: wangmm0220 Date: Tue, 2 Jul 2024 17:34:28 +0800 Subject: [PATCH 17/27] fix:[TS-4921]refactor reporting logic for slow log --- source/client/src/clientMonitor.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/source/client/src/clientMonitor.c b/source/client/src/clientMonitor.c index 01cf923a55..320908e3de 100644 --- a/source/client/src/clientMonitor.c +++ b/source/client/src/clientMonitor.c @@ -568,6 +568,19 @@ static void monitorSendAllSlowLogAtQuit(){ } } +static void processFileRemoved(SlowLogClient* pClient){ + taosUnLockFile(pClient->pFile); + taosCloseFile(&(pClient->pFile)); + + TdFilePtr pFile = taosOpenFile(pClient->path, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_APPEND | TD_FILE_READ | TD_FILE_TRUNC); + if (pFile == NULL) { + terrno = TAOS_SYSTEM_ERROR(errno); + uError("failed to open file:%s since %s", pClient->path, terrstr()); + }else{ + pClient->pFile = pFile; + } +} + static void monitorSendAllSlowLog(){ int64_t t = taosGetMonoTimestampMs(); void* pIter = NULL; @@ -588,6 +601,12 @@ static void monitorSendAllSlowLog(){ if (pInst != NULL && pClient->offset == 0) { int64_t size = getFileSize(pClient->path); if(size <= 0){ + if(size < 0){ + uError("[monitor] monitorSendAllSlowLog failed to get file size:%s, err:%d", pClient->path, errno); + if(errno == ENOENT){ + processFileRemoved(pClient); + } + } continue; } SEpSet ep = getEpSet_s(&pInst->mgmtEp); From 470796ae8811361a2e161c5fdc29799f92328897 Mon Sep 17 00:00:00 2001 From: wangmm0220 Date: Tue, 2 Jul 2024 18:03:26 +0800 Subject: [PATCH 18/27] fix:[TS-4921]refactor reporting logic for slow log --- include/libs/monitor/clientMonitor.h | 2 +- source/client/src/clientMonitor.c | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/include/libs/monitor/clientMonitor.h b/include/libs/monitor/clientMonitor.h index aa6ee6c9fc..abf7df1031 100644 --- a/include/libs/monitor/clientMonitor.h +++ b/include/libs/monitor/clientMonitor.h @@ -35,7 +35,7 @@ typedef enum { SLOW_LOG_WRITE = 0, SLOW_LOG_READ_RUNNING = 1, SLOW_LOG_READ_BEGINNIG = 2, - SLOW_LOG_READ_QUIT = 2, + SLOW_LOG_READ_QUIT = 3, } SLOW_LOG_QUEUE_TYPE; #define SLOW_LOG_SEND_SIZE_MAX 128*1024*1024 diff --git a/source/client/src/clientMonitor.c b/source/client/src/clientMonitor.c index 320908e3de..3752aa64a4 100644 --- a/source/client/src/clientMonitor.c +++ b/source/client/src/clientMonitor.c @@ -717,11 +717,13 @@ static void* monitorThreadFunc(void *param){ if(quitCnt == 0){ monitorSendAllSlowLogAtQuit(); if(quitCnt == 0){ + uInfo("monitorThreadFunc quit since no slow log to send"); break; } quitTime = taosGetMonoTimestampMs(); } if(taosGetMonoTimestampMs() - quitTime > 500){ //quit at most 500ms + uInfo("monitorThreadFunc quit since timeout") break; } } @@ -745,6 +747,7 @@ static void* monitorThreadFunc(void *param){ monitorSendSlowLogAtRunning(slowLogData->clusterId); }else if(slowLogData->type == SLOW_LOG_READ_QUIT){ if(monitorSendSlowLogAtQuit(slowLogData->clusterId)){ + uInfo("monitorThreadFunc quit since all slow log sended"); break; } } From 44d81eab56a3e359c237cf4ce293758f3698a836 Mon Sep 17 00:00:00 2001 From: wangmm0220 Date: Tue, 2 Jul 2024 18:45:12 +0800 Subject: [PATCH 19/27] fix:remove using alarm when sending slow log & use one thread to send all slow log --- source/client/src/clientMonitor.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/source/client/src/clientMonitor.c b/source/client/src/clientMonitor.c index 3752aa64a4..b603676ee8 100644 --- a/source/client/src/clientMonitor.c +++ b/source/client/src/clientMonitor.c @@ -659,7 +659,7 @@ static void monitorSendAllSlowLogFromTempDir(int64_t clusterId){ char filename[PATH_MAX] = {0}; snprintf(filename, sizeof(filename), "%s%s", tmpPath, name); - TdFilePtr pFile = taosOpenFile(filename, TD_FILE_READ | TD_FILE_TRUNC); + TdFilePtr pFile = taosOpenFile(filename, TD_FILE_READ | TD_FILE_WRITE); if (pFile == NULL) { uError("failed to open file:%s since %s", filename, terrstr()); continue; @@ -748,6 +748,8 @@ static void* monitorThreadFunc(void *param){ }else if(slowLogData->type == SLOW_LOG_READ_QUIT){ if(monitorSendSlowLogAtQuit(slowLogData->clusterId)){ uInfo("monitorThreadFunc quit since all slow log sended"); + monitorFreeSlowLogData(slowLogData); + taosFreeQitem(slowLogData); break; } } From f593b644fc58b4154259df5cef00cbd0a4babad0 Mon Sep 17 00:00:00 2001 From: wangmm0220 Date: Tue, 2 Jul 2024 22:48:32 +0800 Subject: [PATCH 20/27] fix:compile error in windows & modify the size of send max --- include/libs/monitor/clientMonitor.h | 2 +- source/client/src/clientMonitor.c | 3 ++- source/client/test/clientMonitorTests.cpp | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/include/libs/monitor/clientMonitor.h b/include/libs/monitor/clientMonitor.h index abf7df1031..69926c62e5 100644 --- a/include/libs/monitor/clientMonitor.h +++ b/include/libs/monitor/clientMonitor.h @@ -38,7 +38,7 @@ typedef enum { SLOW_LOG_READ_QUIT = 3, } SLOW_LOG_QUEUE_TYPE; -#define SLOW_LOG_SEND_SIZE_MAX 128*1024*1024 +#define SLOW_LOG_SEND_SIZE_MAX 1024*1024 typedef struct { int64_t clusterId; diff --git a/source/client/src/clientMonitor.c b/source/client/src/clientMonitor.c index b603676ee8..9cec7f2d11 100644 --- a/source/client/src/clientMonitor.c +++ b/source/client/src/clientMonitor.c @@ -382,6 +382,7 @@ static void monitorWriteSlowLog2File(MonitorSlowLogData* slowLogData, char *tmpP } static char* readFile(TdFilePtr pFile, int64_t *offset, int64_t size){ + uDebug("[monitor] readFile slow begin pFile:%p, offset:%"PRId64 ", size:%"PRId64, pFile, *offset, size); if(taosLSeekFile(pFile, *offset, SEEK_SET) < 0){ uError("failed to seek file:%p code: %d", pFile, errno); return NULL; @@ -427,7 +428,7 @@ static char* readFile(TdFilePtr pFile, int64_t *offset, int64_t size){ *offset += (len+1); } - uDebug("[monitor] readFile slow log:%s", pCont); + uDebug("[monitor] readFile slow log end, data:%s, offset:%"PRId64, pCont, *offset); return pCont; } diff --git a/source/client/test/clientMonitorTests.cpp b/source/client/test/clientMonitorTests.cpp index 6285be5a4c..e0518a2ce2 100644 --- a/source/client/test/clientMonitorTests.cpp +++ b/source/client/test/clientMonitorTests.cpp @@ -173,7 +173,7 @@ TEST(clientMonitorTest, ReadOneFile) { break; } char* val = readFile(pFile, &offset, fileSize); - printf("offset:%" PRId64",fileSize:%"PRId64",val:%s\n", offset, fileSize, val); + printf("offset:%lld,fileSize:%lld,val:%s\n", offset, fileSize, val); } // char value[size] = {0}; From 4739607b91e57ace5e59d5b09adff07078aa27b3 Mon Sep 17 00:00:00 2001 From: wangmm0220 Date: Wed, 3 Jul 2024 09:42:54 +0800 Subject: [PATCH 21/27] fix:[TS-4921]refactor reporting logic for slow log --- source/client/src/clientMonitor.c | 19 ++++--------------- source/client/src/clientMsgHandler.c | 2 +- 2 files changed, 5 insertions(+), 16 deletions(-) diff --git a/source/client/src/clientMonitor.c b/source/client/src/clientMonitor.c index 9cec7f2d11..156e30e0e3 100644 --- a/source/client/src/clientMonitor.c +++ b/source/client/src/clientMonitor.c @@ -69,9 +69,7 @@ static void monitorFreeSlowLogData(void *paras) { if (pData == NULL) { return; } - if (pData->type == SLOW_LOG_WRITE){ - taosMemoryFree(pData->data); - } + taosMemoryFree(pData->data); if (pData->type == SLOW_LOG_READ_BEGINNIG){ taosMemoryFree(pData->fileName); } @@ -101,6 +99,7 @@ static int32_t monitorReportAsyncCB(void* param, SDataBuf* pMsg, int32_t code) { } if(monitorPutData2MonitorQueue(*p) == 0){ p->fileName = NULL; + p->data = NULL; } monitorFreeSlowLogData(p); taosMemoryFree(p); @@ -361,12 +360,6 @@ static void monitorWriteSlowLog2File(MonitorSlowLogData* slowLogData, char *tmpP uError("failed to lock file:%p since %s", pFile, terrstr()); return; } - - SAppInstInfo* pInst = getAppInstByClusterId(slowLogData->clusterId); - if(pInst == NULL){ - uError("failed to get app instance by clusterId:%" PRId64, slowLogData->clusterId); - return; - } }else{ pFile = (*(SlowLogClient**)tmp)->pFile; } @@ -444,6 +437,7 @@ static int64_t getFileSize(char* path){ static int32_t sendSlowLog(int64_t clusterId, char* data, TdFilePtr pFile, int64_t offset, SLOW_LOG_QUEUE_TYPE type, char* fileName, void* pTransporter, SEpSet *epSet){ MonitorSlowLogData* pParam = taosMemoryMalloc(sizeof(MonitorSlowLogData)); if(pParam == NULL){ + taosMemoryFree(data); return -1; } pParam->data = data; @@ -469,7 +463,6 @@ static void monitorSendSlowLogAtBeginning(int64_t clusterId, char* fileName, TdF sendSlowLog(clusterId, data, pFile, offset, SLOW_LOG_READ_BEGINNIG, taosStrdup(fileName), pTransporter, epSet); } uDebug("[monitor] monitorSendSlowLogAtBeginning send slow log file:%p, data:%s", pFile, data); - taosMemoryFree(data); } } @@ -501,7 +494,6 @@ static void monitorSendSlowLogAtRunning(int64_t clusterId){ sendSlowLog(clusterId, data, pClient->pFile, pClient->offset, SLOW_LOG_READ_RUNNING, NULL, pInst->pTransporter, &ep); } uDebug("[monitor] monitorSendSlowLogAtRunning send slow log:%s", data); - taosMemoryFree(data); } } @@ -534,7 +526,6 @@ static bool monitorSendSlowLogAtQuit(int64_t clusterId) { sendSlowLog(clusterId, data, pClient->pFile, pClient->offset, SLOW_LOG_READ_QUIT, NULL, pInst->pTransporter, &ep); } uInfo("[monitor] monitorSendSlowLogAtQuit send slow log:%s", data); - taosMemoryFree(data); } return false; } @@ -564,7 +555,6 @@ static void monitorSendAllSlowLogAtQuit(){ quitCnt ++; } uInfo("[monitor] monitorSendAllSlowLogAtQuit send slow log :%s", data); - taosMemoryFree(data); } } } @@ -616,7 +606,6 @@ static void monitorSendAllSlowLog(){ sendSlowLog(*clusterId, data, NULL, pClient->offset, SLOW_LOG_READ_RUNNING, NULL, pInst->pTransporter, &ep); } uDebug("[monitor] monitorSendAllSlowLog send slow log :%s", data); - taosMemoryFree(data); } } } @@ -746,7 +735,7 @@ static void* monitorThreadFunc(void *param){ monitorWriteSlowLog2File(slowLogData, tmpPath); } else if(slowLogData->type == SLOW_LOG_READ_RUNNING){ monitorSendSlowLogAtRunning(slowLogData->clusterId); - }else if(slowLogData->type == SLOW_LOG_READ_QUIT){ + } else if(slowLogData->type == SLOW_LOG_READ_QUIT){ if(monitorSendSlowLogAtQuit(slowLogData->clusterId)){ uInfo("monitorThreadFunc quit since all slow log sended"); monitorFreeSlowLogData(slowLogData); diff --git a/source/client/src/clientMsgHandler.c b/source/client/src/clientMsgHandler.c index 87c334218f..43600f60e1 100644 --- a/source/client/src/clientMsgHandler.c +++ b/source/client/src/clientMsgHandler.c @@ -155,7 +155,7 @@ int32_t processConnectRsp(void* param, SDataBuf* pMsg, int32_t code) { if(taosHashPut(appInfo.pInstMapByClusterId, &connectRsp.clusterId, LONG_BYTES, &pTscObj->pAppInfo, POINTER_BYTES) != 0){ tscError("failed to put appInfo into appInfo.pInstMapByClusterId"); } - MonitorSlowLogData data ={.clusterId = pTscObj->pAppInfo->clusterId, .type = SLOW_LOG_READ_BEGINNIG, .pFile = NULL}; + MonitorSlowLogData data ={.clusterId = pTscObj->pAppInfo->clusterId, .type = SLOW_LOG_READ_BEGINNIG, .pFile = NULL, .data = NULL}; monitorPutData2MonitorQueue(data); monitorClientSlowQueryInit(connectRsp.clusterId); monitorClientSQLReqInit(connectRsp.clusterId); From 2dad4d0053b986ebe2e0b1997bc0d0a70ab6e5a7 Mon Sep 17 00:00:00 2001 From: wangmm0220 Date: Wed, 3 Jul 2024 09:53:42 +0800 Subject: [PATCH 22/27] fix:[TS-4921]refactor reporting logic for slow log --- source/client/src/clientMonitor.c | 145 +++++++++++++++--------------- 1 file changed, 73 insertions(+), 72 deletions(-) diff --git a/source/client/src/clientMonitor.c b/source/client/src/clientMonitor.c index 156e30e0e3..c7adce8214 100644 --- a/source/client/src/clientMonitor.c +++ b/source/client/src/clientMonitor.c @@ -1,4 +1,5 @@ #include "clientMonitor.h" +#include "clientLog.h" #include "os.h" #include "tmisce.h" #include "ttime.h" @@ -24,7 +25,7 @@ static int32_t getSlowLogTmpDir(char* tmpPath, int32_t size){ } int ret = snprintf(tmpPath, size, "%s/tdengine_slow_log/", tsTempDir); if (ret < 0){ - uError("failed to get tmp path ret:%d", ret); + tscError("failed to get tmp path ret:%d", ret); return ret; } return 0; @@ -78,7 +79,7 @@ static void monitorFreeSlowLogData(void *paras) { static SAppInstInfo* getAppInstByClusterId(int64_t clusterId) { void *p = taosHashGet(appInfo.pInstMapByClusterId, &clusterId, LONG_BYTES); if(p == NULL){ - uError("failed to get app inst, clusterId:%" PRIx64, clusterId); + tscError("failed to get app inst, clusterId:%" PRIx64, clusterId); return NULL; } return *(SAppInstInfo**)p; @@ -86,7 +87,7 @@ static SAppInstInfo* getAppInstByClusterId(int64_t clusterId) { static int32_t monitorReportAsyncCB(void* param, SDataBuf* pMsg, int32_t code) { if (TSDB_CODE_SUCCESS != code) { - uError("found error in monitorReport send callback, code:%d, please check the network.", code); + tscError("found error in monitorReport send callback, code:%d, please check the network.", code); } if (pMsg) { taosMemoryFree(pMsg->pData); @@ -95,7 +96,7 @@ static int32_t monitorReportAsyncCB(void* param, SDataBuf* pMsg, int32_t code) { if(param != NULL){ MonitorSlowLogData* p = (MonitorSlowLogData*)param; if(code != 0){ - uError("failed to send slow log:%s, clusterId:%" PRIx64, p->data, p->clusterId); + tscError("failed to send slow log:%s, clusterId:%" PRIx64, p->data, p->clusterId); } if(monitorPutData2MonitorQueue(*p) == 0){ p->fileName = NULL; @@ -119,7 +120,7 @@ static int32_t sendReport(void* pTransporter, SEpSet *epSet, char* pCont, MONITO } void* buf = taosMemoryMalloc(tlen); if (buf == NULL) { - uError("sendReport failed, out of memory, len:%d", tlen); + tscError("sendReport failed, out of memory, len:%d", tlen); terrno = TSDB_CODE_OUT_OF_MEMORY; goto FAILED; } @@ -127,7 +128,7 @@ static int32_t sendReport(void* pTransporter, SEpSet *epSet, char* pCont, MONITO SMsgSendInfo* pInfo = taosMemoryCalloc(1, sizeof(SMsgSendInfo)); if (pInfo == NULL) { - uError("sendReport failed, out of memory send info"); + tscError("sendReport failed, out of memory send info"); terrno = TSDB_CODE_OUT_OF_MEMORY; taosMemoryFree(buf); goto FAILED; @@ -147,7 +148,7 @@ static int32_t sendReport(void* pTransporter, SEpSet *epSet, char* pCont, MONITO } FAILED: - uError("sendReport failed, code:%d", code); + tscError("sendReport failed, code:%d", code); monitorFreeSlowLogData(param); taosMemoryFree(param); return -1; @@ -158,7 +159,7 @@ static void generateClusterReport(taos_collector_registry_t* registry, void* pTr sprintf(ts, "%" PRId64, taosGetTimestamp(TSDB_TIME_PRECISION_MILLI)); char* pCont = (char*)taos_collector_registry_bridge_new(registry, ts, "%" PRId64, NULL); if(NULL == pCont) { - uError("generateClusterReport failed, get null content."); + tscError("generateClusterReport failed, get null content."); return; } @@ -209,58 +210,58 @@ void monitorCreateClient(int64_t clusterId) { MonitorClient* pMonitor = NULL; taosWLockLatch(&monitorLock); if (taosHashGet(monitorCounterHash, &clusterId, LONG_BYTES) == NULL) { - uInfo("[monitor] monitorCreateClient for %" PRIx64, clusterId); + tscInfo("[monitor] monitorCreateClient for %" PRIx64, clusterId); pMonitor = taosMemoryCalloc(1, sizeof(MonitorClient)); if (pMonitor == NULL) { - uError("failed to create monitor client"); + tscError("failed to create monitor client"); goto fail; } pMonitor->clusterId = clusterId; char clusterKey[32] = {0}; if(snprintf(clusterKey, sizeof(clusterKey), "%"PRId64, clusterId) < 0){ - uError("failed to create cluster key"); + tscError("failed to create cluster key"); goto fail; } pMonitor->registry = taos_collector_registry_new(clusterKey); if(pMonitor->registry == NULL){ - uError("failed to create registry"); + tscError("failed to create registry"); goto fail; } pMonitor->colector = taos_collector_new(clusterKey); if(pMonitor->colector == NULL){ - uError("failed to create collector"); + tscError("failed to create collector"); goto fail; } taos_collector_registry_register_collector(pMonitor->registry, pMonitor->colector); pMonitor->counters = (SHashObj*)taosHashInit(64, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), true, HASH_ENTRY_LOCK); if (pMonitor->counters == NULL) { - uError("failed to create monitor counters"); + tscError("failed to create monitor counters"); goto fail; } // taosHashSetFreeFp(pMonitor->counters, destroyCounter); if(taosHashPut(monitorCounterHash, &clusterId, LONG_BYTES, &pMonitor, POINTER_BYTES) != 0){ - uError("failed to put monitor client to hash"); + tscError("failed to put monitor client to hash"); goto fail; } SAppInstInfo* pInst = getAppInstByClusterId(clusterId); if(pInst == NULL){ - uError("failed to get app instance by cluster id"); + tscError("failed to get app instance by cluster id"); pMonitor = NULL; goto fail; } pMonitor->timer = taosTmrStart(reportSendProcess, pInst->monitorParas.tsMonitorInterval * 1000, (void*)pMonitor, monitorTimer); if(pMonitor->timer == NULL){ - uError("failed to start timer"); + tscError("failed to start timer"); goto fail; } - uInfo("[monitor] monitorCreateClient for %"PRIx64 "finished %p.", clusterId, pMonitor); + tscInfo("[monitor] monitorCreateClient for %"PRIx64 "finished %p.", clusterId, pMonitor); } taosWUnLockLatch(&monitorLock); if (-1 != atomic_val_compare_exchange_32(&monitorFlag, -1, 0)) { - uDebug("[monitor] monitorFlag already is 0"); + tscDebug("[monitor] monitorFlag already is 0"); } return; @@ -273,7 +274,7 @@ void monitorCreateClientCounter(int64_t clusterId, const char* name, const char* taosWLockLatch(&monitorLock); MonitorClient** ppMonitor = (MonitorClient**)taosHashGet(monitorCounterHash, &clusterId, LONG_BYTES); if (ppMonitor == NULL || *ppMonitor == NULL) { - uError("failed to get monitor client"); + tscError("failed to get monitor client"); goto end; } taos_counter_t* newCounter = taos_counter_new(name, help, label_key_count, label_keys); @@ -282,11 +283,11 @@ void monitorCreateClientCounter(int64_t clusterId, const char* name, const char* MonitorClient* pMonitor = *ppMonitor; taos_collector_add_metric(pMonitor->colector, newCounter); if(taosHashPut(pMonitor->counters, name, strlen(name), &newCounter, POINTER_BYTES) != 0){ - uError("failed to put counter to monitor"); + tscError("failed to put counter to monitor"); taos_counter_destroy(newCounter); goto end; } - uInfo("[monitor] monitorCreateClientCounter %"PRIx64"(%p):%s : %p.", pMonitor->clusterId, pMonitor, name, newCounter); + tscInfo("[monitor] monitorCreateClientCounter %"PRIx64"(%p):%s : %p.", pMonitor->clusterId, pMonitor, name, newCounter); end: taosWUnLockLatch(&monitorLock); @@ -296,18 +297,18 @@ void monitorCounterInc(int64_t clusterId, const char* counterName, const char** taosWLockLatch(&monitorLock); MonitorClient** ppMonitor = (MonitorClient**)taosHashGet(monitorCounterHash, &clusterId, LONG_BYTES); if (ppMonitor == NULL || *ppMonitor == NULL) { - uError("monitorCounterInc not found pMonitor %"PRId64, clusterId); + tscError("monitorCounterInc not found pMonitor %"PRId64, clusterId); goto end; } MonitorClient* pMonitor = *ppMonitor; taos_counter_t** ppCounter = (taos_counter_t**)taosHashGet(pMonitor->counters, counterName, strlen(counterName)); if (ppCounter == NULL || *ppCounter == NULL) { - uError("monitorCounterInc not found pCounter %"PRIx64":%s.", clusterId, counterName); + tscError("monitorCounterInc not found pCounter %"PRIx64":%s.", clusterId, counterName); goto end; } taos_counter_inc(*ppCounter, label_values); - uInfo("[monitor] monitorCounterInc %"PRIx64"(%p):%s", pMonitor->clusterId, pMonitor, counterName); + tscInfo("[monitor] monitorCounterInc %"PRIx64"(%p):%s", pMonitor->clusterId, pMonitor, counterName); end: taosWUnLockLatch(&monitorLock); @@ -327,21 +328,21 @@ static void monitorWriteSlowLog2File(MonitorSlowLogData* slowLogData, char *tmpP char path[PATH_MAX] = {0}; char clusterId[32] = {0}; if (snprintf(clusterId, sizeof(clusterId), "%" PRIx64, slowLogData->clusterId) < 0){ - uError("failed to generate clusterId:%" PRIx64, slowLogData->clusterId); + tscError("failed to generate clusterId:%" PRIx64, slowLogData->clusterId); return; } taosGetTmpfilePath(tmpPath, clusterId, path); - uInfo("[monitor] create slow log file:%s", path); + tscInfo("[monitor] create slow log file:%s", path); pFile = taosOpenFile(path, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_APPEND | TD_FILE_READ | TD_FILE_TRUNC); if (pFile == NULL) { terrno = TAOS_SYSTEM_ERROR(errno); - uError("failed to open file:%s since %s", path, terrstr()); + tscError("failed to open file:%s since %s", path, terrstr()); return; } SlowLogClient *pClient = taosMemoryCalloc(1, sizeof(SlowLogClient)); if (pClient == NULL){ - uError("failed to allocate memory for slow log client"); + tscError("failed to allocate memory for slow log client"); taosCloseFile(&pFile); return; } @@ -350,14 +351,14 @@ static void monitorWriteSlowLog2File(MonitorSlowLogData* slowLogData, char *tmpP pClient->offset = 0; pClient->pFile = pFile; if (taosHashPut(monitorSlowLogHash, &slowLogData->clusterId, LONG_BYTES, &pClient, POINTER_BYTES) != 0){ - uError("failed to put clusterId:%" PRId64 " to hash table", slowLogData->clusterId); + tscError("failed to put clusterId:%" PRId64 " to hash table", slowLogData->clusterId); taosCloseFile(&pFile); taosMemoryFree(pClient); return; } if(taosLockFile(pFile) < 0){ - uError("failed to lock file:%p since %s", pFile, terrstr()); + tscError("failed to lock file:%p since %s", pFile, terrstr()); return; } }else{ @@ -365,19 +366,19 @@ static void monitorWriteSlowLog2File(MonitorSlowLogData* slowLogData, char *tmpP } if(taosLSeekFile(pFile, 0, SEEK_END) < 0){ - uError("failed to seek file:%p code: %d", pFile, errno); + tscError("failed to seek file:%p code: %d", pFile, errno); return; } if (taosWriteFile(pFile, slowLogData->data, strlen(slowLogData->data) + 1) < 0){ - uError("failed to write len to file:%p since %s", pFile, terrstr()); + tscError("failed to write len to file:%p since %s", pFile, terrstr()); } - uDebug("[monitor] write slow log to file:%p, clusterId:%"PRIx64, pFile, slowLogData->clusterId); + tscDebug("[monitor] write slow log to file:%p, clusterId:%"PRIx64, pFile, slowLogData->clusterId); } static char* readFile(TdFilePtr pFile, int64_t *offset, int64_t size){ - uDebug("[monitor] readFile slow begin pFile:%p, offset:%"PRId64 ", size:%"PRId64, pFile, *offset, size); + tscDebug("[monitor] readFile slow begin pFile:%p, offset:%"PRId64 ", size:%"PRId64, pFile, *offset, size); if(taosLSeekFile(pFile, *offset, SEEK_SET) < 0){ - uError("failed to seek file:%p code: %d", pFile, errno); + tscError("failed to seek file:%p code: %d", pFile, errno); return NULL; } @@ -393,7 +394,7 @@ static char* readFile(TdFilePtr pFile, int64_t *offset, int64_t size){ } if(pCont == NULL){ - uError("failed to allocate memory for slow log, size:%" PRId64, totalSize); + tscError("failed to allocate memory for slow log, size:%" PRId64, totalSize); return NULL; } char* buf = pCont; @@ -401,7 +402,7 @@ static char* readFile(TdFilePtr pFile, int64_t *offset, int64_t size){ int64_t readSize = taosReadFile(pFile, buf, SLOW_LOG_SEND_SIZE_MAX); if (readSize <= 0) { if (readSize < 0){ - uError("failed to read len from file:%p since %s", pFile, terrstr()); + tscError("failed to read len from file:%p since %s", pFile, terrstr()); } taosMemoryFree(pCont); return NULL; @@ -421,7 +422,7 @@ static char* readFile(TdFilePtr pFile, int64_t *offset, int64_t size){ *offset += (len+1); } - uDebug("[monitor] readFile slow log end, data:%s, offset:%"PRId64, pCont, *offset); + tscDebug("[monitor] readFile slow log end, data:%s, offset:%"PRId64, pCont, *offset); return pCont; } @@ -456,13 +457,13 @@ static void monitorSendSlowLogAtBeginning(int64_t clusterId, char* fileName, TdF taosUnLockFile(pFile); taosCloseFile(&pFile); taosRemoveFile(fileName); - uDebug("[monitor] monitorSendSlowLogAtBeginning delete file:%s", fileName); + tscDebug("[monitor] monitorSendSlowLogAtBeginning delete file:%s", fileName); }else{ char* data = readFile(pFile, &offset, size); if(data != NULL){ sendSlowLog(clusterId, data, pFile, offset, SLOW_LOG_READ_BEGINNIG, taosStrdup(fileName), pTransporter, epSet); } - uDebug("[monitor] monitorSendSlowLogAtBeginning send slow log file:%p, data:%s", pFile, data); + tscDebug("[monitor] monitorSendSlowLogAtBeginning send slow log file:%p, data:%s", pFile, data); } } @@ -478,14 +479,14 @@ static void monitorSendSlowLogAtRunning(int64_t clusterId){ int64_t size = getFileSize(pClient->path); if(size <= pClient->offset){ if(taosFtruncateFile(pClient->pFile, 0) < 0){ - uError("failed to truncate file:%p code: %d", pClient->pFile, errno); + tscError("failed to truncate file:%p code: %d", pClient->pFile, errno); } - uDebug("[monitor] monitorSendSlowLogAtRunning truncate file to 0 file:%p", pClient->pFile); + tscDebug("[monitor] monitorSendSlowLogAtRunning truncate file to 0 file:%p", pClient->pFile); pClient->offset = 0; }else{ SAppInstInfo* pInst = getAppInstByClusterId(clusterId); if(pInst == NULL){ - uError("failed to get app instance by clusterId:%" PRId64, clusterId); + tscError("failed to get app instance by clusterId:%" PRId64, clusterId); return; } SEpSet ep = getEpSet_s(&pInst->mgmtEp); @@ -493,7 +494,7 @@ static void monitorSendSlowLogAtRunning(int64_t clusterId){ if(data != NULL){ sendSlowLog(clusterId, data, pClient->pFile, pClient->offset, SLOW_LOG_READ_RUNNING, NULL, pInst->pTransporter, &ep); } - uDebug("[monitor] monitorSendSlowLogAtRunning send slow log:%s", data); + tscDebug("[monitor] monitorSendSlowLogAtRunning send slow log:%s", data); } } @@ -511,7 +512,7 @@ static bool monitorSendSlowLogAtQuit(int64_t clusterId) { taosUnLockFile(pClient->pFile); taosCloseFile(&(pClient->pFile)); taosRemoveFile(pClient->path); - uInfo("[monitor] monitorSendSlowLogAtQuit remove file:%s", pClient->path); + tscInfo("[monitor] monitorSendSlowLogAtQuit remove file:%s", pClient->path); if((--quitCnt) == 0){ return true; } @@ -525,7 +526,7 @@ static bool monitorSendSlowLogAtQuit(int64_t clusterId) { if(data != NULL){ sendSlowLog(clusterId, data, pClient->pFile, pClient->offset, SLOW_LOG_READ_QUIT, NULL, pInst->pTransporter, &ep); } - uInfo("[monitor] monitorSendSlowLogAtQuit send slow log:%s", data); + tscInfo("[monitor] monitorSendSlowLogAtQuit send slow log:%s", data); } return false; } @@ -554,7 +555,7 @@ static void monitorSendAllSlowLogAtQuit(){ if(data != NULL && sendSlowLog(*clusterId, data, NULL, pClient->offset, SLOW_LOG_READ_QUIT, NULL, pInst->pTransporter, &ep) == 0){ quitCnt ++; } - uInfo("[monitor] monitorSendAllSlowLogAtQuit send slow log :%s", data); + tscInfo("[monitor] monitorSendAllSlowLogAtQuit send slow log :%s", data); } } } @@ -566,7 +567,7 @@ static void processFileRemoved(SlowLogClient* pClient){ TdFilePtr pFile = taosOpenFile(pClient->path, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_APPEND | TD_FILE_READ | TD_FILE_TRUNC); if (pFile == NULL) { terrno = TAOS_SYSTEM_ERROR(errno); - uError("failed to open file:%s since %s", pClient->path, terrstr()); + tscError("failed to open file:%s since %s", pClient->path, terrstr()); }else{ pClient->pFile = pFile; } @@ -593,7 +594,7 @@ static void monitorSendAllSlowLog(){ int64_t size = getFileSize(pClient->path); if(size <= 0){ if(size < 0){ - uError("[monitor] monitorSendAllSlowLog failed to get file size:%s, err:%d", pClient->path, errno); + tscError("[monitor] monitorSendAllSlowLog failed to get file size:%s, err:%d", pClient->path, errno); if(errno == ENOENT){ processFileRemoved(pClient); } @@ -605,7 +606,7 @@ static void monitorSendAllSlowLog(){ if(data != NULL){ sendSlowLog(*clusterId, data, NULL, pClient->offset, SLOW_LOG_READ_RUNNING, NULL, pInst->pTransporter, &ep); } - uDebug("[monitor] monitorSendAllSlowLog send slow log :%s", data); + tscDebug("[monitor] monitorSendAllSlowLog send slow log :%s", data); } } } @@ -614,12 +615,12 @@ static void monitorSendAllSlowLogFromTempDir(int64_t clusterId){ SAppInstInfo* pInst = getAppInstByClusterId((int64_t)clusterId); if(pInst == NULL || !pInst->monitorParas.tsEnableMonitor){ - uInfo("[monitor] monitor is disabled, skip send slow log"); + tscInfo("[monitor] monitor is disabled, skip send slow log"); return; } char namePrefix[PATH_MAX] = {0}; if (snprintf(namePrefix, sizeof(namePrefix), "%s%"PRIx64, TD_TMP_FILE_PREFIX, pInst->clusterId) < 0) { - uError("failed to generate slow log file name prefix"); + tscError("failed to generate slow log file name prefix"); return; } @@ -643,7 +644,7 @@ static void monitorSendAllSlowLogFromTempDir(int64_t clusterId){ if (strcmp(name, ".") == 0 || strcmp(name, "..") == 0 || strstr(name, namePrefix) == NULL) { - uInfo("skip file:%s, for cluster id:%"PRIx64, name, pInst->clusterId); + tscInfo("skip file:%s, for cluster id:%"PRIx64, name, pInst->clusterId); continue; } @@ -651,11 +652,11 @@ static void monitorSendAllSlowLogFromTempDir(int64_t clusterId){ snprintf(filename, sizeof(filename), "%s%s", tmpPath, name); TdFilePtr pFile = taosOpenFile(filename, TD_FILE_READ | TD_FILE_WRITE); if (pFile == NULL) { - uError("failed to open file:%s since %s", filename, terrstr()); + tscError("failed to open file:%s since %s", filename, terrstr()); continue; } if (taosLockFile(pFile) < 0) { - uError("failed to lock file:%s since %s, maybe used by other process", filename, terrstr()); + tscError("failed to lock file:%s since %s, maybe used by other process", filename, terrstr()); taosCloseFile(&pFile); continue; } @@ -687,33 +688,33 @@ static void* monitorThreadFunc(void *param){ } if (tsem2_init(&monitorSem, 0, 0) != 0) { - uError("sem init error since %s", terrstr()); + tscError("sem init error since %s", terrstr()); return NULL; } monitorQueue = taosOpenQueue(); if(monitorQueue == NULL){ - uError("open queue error since %s", terrstr()); + tscError("open queue error since %s", terrstr()); return NULL; } if (-1 != atomic_val_compare_exchange_32(&slowLogFlag, -1, 0)) { return NULL; } - uDebug("monitorThreadFunc start"); + tscDebug("monitorThreadFunc start"); int64_t quitTime = 0; while (1) { if (slowLogFlag > 0) { if(quitCnt == 0){ monitorSendAllSlowLogAtQuit(); if(quitCnt == 0){ - uInfo("monitorThreadFunc quit since no slow log to send"); + tscInfo("monitorThreadFunc quit since no slow log to send"); break; } quitTime = taosGetMonoTimestampMs(); } if(taosGetMonoTimestampMs() - quitTime > 500){ //quit at most 500ms - uInfo("monitorThreadFunc quit since timeout") + tscInfo("monitorThreadFunc quit since timeout"); break; } } @@ -737,7 +738,7 @@ static void* monitorThreadFunc(void *param){ monitorSendSlowLogAtRunning(slowLogData->clusterId); } else if(slowLogData->type == SLOW_LOG_READ_QUIT){ if(monitorSendSlowLogAtQuit(slowLogData->clusterId)){ - uInfo("monitorThreadFunc quit since all slow log sended"); + tscInfo("monitorThreadFunc quit since all slow log sended"); monitorFreeSlowLogData(slowLogData); taosFreeQitem(slowLogData); break; @@ -765,7 +766,7 @@ static int32_t tscMonitortInit() { taosThreadAttrSetDetachState(&thAttr, PTHREAD_CREATE_JOINABLE); TdThread monitorThread; if (taosThreadCreate(&monitorThread, &thAttr, monitorThreadFunc, NULL) != 0) { - uError("failed to create monitor thread since %s", strerror(errno)); + tscError("failed to create monitor thread since %s", strerror(errno)); return -1; } @@ -775,7 +776,7 @@ static int32_t tscMonitortInit() { static void tscMonitorStop() { if (atomic_val_compare_exchange_32(&slowLogFlag, 0, 1)) { - uDebug("monitor thread already stopped"); + tscDebug("monitor thread already stopped"); return; } @@ -785,22 +786,22 @@ static void tscMonitorStop() { } void monitorInit() { - uInfo("[monitor] tscMonitor init"); + tscInfo("[monitor] tscMonitor init"); monitorCounterHash = (SHashObj*)taosHashInit(64, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BIGINT), false, HASH_ENTRY_LOCK); if (monitorCounterHash == NULL) { - uError("failed to create monitorCounterHash"); + tscError("failed to create monitorCounterHash"); } taosHashSetFreeFp(monitorCounterHash, destroyMonitorClient); monitorSlowLogHash = (SHashObj*)taosHashInit(64, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BIGINT), false, HASH_ENTRY_LOCK); if (monitorSlowLogHash == NULL) { - uError("failed to create monitorSlowLogHash"); + tscError("failed to create monitorSlowLogHash"); } taosHashSetFreeFp(monitorSlowLogHash, destroySlowLogClient); monitorTimer = taosTmrInit(0, 0, 0, "MONITOR"); if (monitorTimer == NULL) { - uError("failed to create monitor timer"); + tscError("failed to create monitor timer"); } taosInitRWLatch(&monitorLock); @@ -808,11 +809,11 @@ void monitorInit() { } void monitorClose() { - uInfo("[monitor] tscMonitor close"); + tscInfo("[monitor] tscMonitor close"); taosWLockLatch(&monitorLock); if (atomic_val_compare_exchange_32(&monitorFlag, 0, 1)) { - uDebug("[monitor] monitorFlag is not 0"); + tscDebug("[monitor] monitorFlag is not 0"); } tscMonitorStop(); sendAllCounter(); @@ -825,11 +826,11 @@ void monitorClose() { int32_t monitorPutData2MonitorQueue(MonitorSlowLogData data){ MonitorSlowLogData* slowLogData = taosAllocateQitem(sizeof(MonitorSlowLogData), DEF_QITEM, 0); if (slowLogData == NULL) { - uError("[monitor] failed to allocate slow log data"); + tscError("[monitor] failed to allocate slow log data"); return -1; } *slowLogData = data; - uDebug("[monitor] write slow log to queue, clusterId:%"PRIx64 " type:%d", slowLogData->clusterId, slowLogData->type); + tscDebug("[monitor] write slow log to queue, clusterId:%"PRIx64 " type:%d", slowLogData->clusterId, slowLogData->type); while (atomic_load_32(&slowLogFlag) == -1) { taosMsleep(5); } From 8805650be5209ed136410e338928404f52bf82e9 Mon Sep 17 00:00:00 2001 From: wangmm0220 Date: Wed, 3 Jul 2024 11:22:50 +0800 Subject: [PATCH 23/27] fix:[TS-4921]refactor reporting logic for slow log --- source/client/src/clientMonitor.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/source/client/src/clientMonitor.c b/source/client/src/clientMonitor.c index c7adce8214..79b91c994a 100644 --- a/source/client/src/clientMonitor.c +++ b/source/client/src/clientMonitor.c @@ -308,7 +308,7 @@ void monitorCounterInc(int64_t clusterId, const char* counterName, const char** goto end; } taos_counter_inc(*ppCounter, label_values); - tscInfo("[monitor] monitorCounterInc %"PRIx64"(%p):%s", pMonitor->clusterId, pMonitor, counterName); + tscDebug("[monitor] monitorCounterInc %"PRIx64"(%p):%s", pMonitor->clusterId, pMonitor, counterName); end: taosWUnLockLatch(&monitorLock); @@ -386,11 +386,11 @@ static char* readFile(TdFilePtr pFile, int64_t *offset, int64_t size){ char* pCont = NULL; int64_t totalSize = 0; if (size - *offset >= SLOW_LOG_SEND_SIZE_MAX) { - pCont = taosMemoryCalloc(1, 2 * SLOW_LOG_SEND_SIZE_MAX); - totalSize = 2 * SLOW_LOG_SEND_SIZE_MAX; + pCont = taosMemoryCalloc(1, 4 + SLOW_LOG_SEND_SIZE_MAX); //4 reserved for [] + totalSize = 4 + SLOW_LOG_SEND_SIZE_MAX; }else{ - pCont = taosMemoryCalloc(1, 2 * (size - *offset)); - totalSize = 2 * (size - *offset); + pCont = taosMemoryCalloc(1, 4 + (size - *offset)); + totalSize = 4 + (size - *offset); } if(pCont == NULL){ @@ -509,6 +509,7 @@ static bool monitorSendSlowLogAtQuit(int64_t clusterId) { } int64_t size = getFileSize(pClient->path); if(size <= pClient->offset){ + taosFtruncateFile(pClient->pFile, 0); taosUnLockFile(pClient->pFile); taosCloseFile(&(pClient->pFile)); taosRemoveFile(pClient->path); @@ -535,11 +536,11 @@ static void monitorSendAllSlowLogAtQuit(){ while ((pIter = taosHashIterate(monitorSlowLogHash, pIter))) { SlowLogClient* pClient = (*(SlowLogClient**)pIter); if(pClient == NULL) { - taosHashCancelIterate(monitorSlowLogHash, pIter); - return; + continue; } int64_t size = getFileSize(pClient->path); if(size <= pClient->offset){ + taosFtruncateFile(pClient->pFile, 0); taosUnLockFile(pClient->pFile); taosCloseFile(&(pClient->pFile)); taosRemoveFile(pClient->path); @@ -547,8 +548,7 @@ static void monitorSendAllSlowLogAtQuit(){ int64_t* clusterId = (int64_t*)taosHashGetKey(pIter, NULL); SAppInstInfo* pInst = getAppInstByClusterId(*clusterId); if(pInst == NULL) { - taosHashCancelIterate(monitorSlowLogHash, pIter); - return; + continue; } SEpSet ep = getEpSet_s(&pInst->mgmtEp); char* data = readFile(pClient->pFile, &pClient->offset, size); From cfbd475fc28e7a08fde957ac4b9a746f70dae702 Mon Sep 17 00:00:00 2001 From: wangmm0220 Date: Wed, 3 Jul 2024 15:21:56 +0800 Subject: [PATCH 24/27] fix:[TS-4921]refactor reporting logic for slow log --- include/libs/monitor/clientMonitor.h | 6 +- source/client/src/clientMonitor.c | 89 +++++++++++++++------------- 2 files changed, 51 insertions(+), 44 deletions(-) diff --git a/include/libs/monitor/clientMonitor.h b/include/libs/monitor/clientMonitor.h index 69926c62e5..4c7ab6f65a 100644 --- a/include/libs/monitor/clientMonitor.h +++ b/include/libs/monitor/clientMonitor.h @@ -58,10 +58,8 @@ typedef struct { typedef struct { int64_t clusterId; SLOW_LOG_QUEUE_TYPE type; - union{ - char* data; - int64_t offset; - }; + char* data; + int64_t offset; TdFilePtr pFile; char* fileName; } MonitorSlowLogData; diff --git a/source/client/src/clientMonitor.c b/source/client/src/clientMonitor.c index 79b91c994a..a102995c0d 100644 --- a/source/client/src/clientMonitor.c +++ b/source/client/src/clientMonitor.c @@ -31,22 +31,34 @@ static int32_t getSlowLogTmpDir(char* tmpPath, int32_t size){ return 0; } -//static void destroyCounter(void* data){ -// if (data == NULL) { -// return; -// } -// taos_counter_t* conuter = *(taos_counter_t**)data; -// if(conuter == NULL){ -// return; -// } -// taos_counter_destroy(conuter); -//} +static void processFileInTheEnd(TdFilePtr pFile, char* path){ + if(pFile == NULL){ + return; + } + if(taosFtruncateFile(pFile, 0) != 0){ + tscError("failed to truncate file:%s, errno:%d", path, errno); + return; + } + if(taosUnLockFile(pFile) != 0){ + tscError("failed to unlock file:%s, errno:%d", path, errno); + return; + } + if(taosCloseFile(&(pFile)) != 0){ + tscError("failed to close file:%s, errno:%d", path, errno); + return; + } + if(taosRemoveFile(path) != 0){ + tscError("failed to remove file:%s, errno:%d", path, errno); + return; + } +} static void destroySlowLogClient(void* data){ if (data == NULL) { return; } SlowLogClient* slowLogClient = *(SlowLogClient**)data; + processFileInTheEnd(slowLogClient->pFile, slowLogClient->path); taosMemoryFree(slowLogClient); } @@ -70,12 +82,17 @@ static void monitorFreeSlowLogData(void *paras) { if (pData == NULL) { return; } - taosMemoryFree(pData->data); + taosMemoryFreeClear(pData->data); if (pData->type == SLOW_LOG_READ_BEGINNIG){ taosMemoryFree(pData->fileName); } } +static void monitorFreeSlowLogDataEx(void *paras) { + monitorFreeSlowLogData(paras); + taosMemoryFree(paras); +} + static SAppInstInfo* getAppInstByClusterId(int64_t clusterId) { void *p = taosHashGet(appInfo.pInstMapByClusterId, &clusterId, LONG_BYTES); if(p == NULL){ @@ -98,12 +115,11 @@ static int32_t monitorReportAsyncCB(void* param, SDataBuf* pMsg, int32_t code) { if(code != 0){ tscError("failed to send slow log:%s, clusterId:%" PRIx64, p->data, p->clusterId); } - if(monitorPutData2MonitorQueue(*p) == 0){ + MonitorSlowLogData tmp = {.clusterId = p->clusterId, .type = p->type, .fileName = p->fileName, + .pFile= p->pFile, .offset = p->offset, .data = NULL}; + if(monitorPutData2MonitorQueue(tmp) == 0){ p->fileName = NULL; - p->data = NULL; } - monitorFreeSlowLogData(p); - taosMemoryFree(p); } return code; } @@ -138,19 +154,15 @@ static int32_t sendReport(void* pTransporter, SEpSet *epSet, char* pCont, MONITO pInfo->msgInfo.len = tlen; pInfo->msgType = TDMT_MND_STATIS; pInfo->param = param; + pInfo->paramFreeFp = monitorFreeSlowLogDataEx; pInfo->requestId = tGenIdPI64(); pInfo->requestObjRefId = 0; int64_t transporterId = 0; - int32_t code = asyncSendMsgToServer(pTransporter, epSet, &transporterId, pInfo); - if (code == TSDB_CODE_SUCCESS) { - return code; - } + return asyncSendMsgToServer(pTransporter, epSet, &transporterId, pInfo); FAILED: - tscError("sendReport failed, code:%d", code); - monitorFreeSlowLogData(param); - taosMemoryFree(param); + monitorFreeSlowLogDataEx(param); return -1; } @@ -439,6 +451,7 @@ static int32_t sendSlowLog(int64_t clusterId, char* data, TdFilePtr pFile, int64 MonitorSlowLogData* pParam = taosMemoryMalloc(sizeof(MonitorSlowLogData)); if(pParam == NULL){ taosMemoryFree(data); + taosMemoryFree(fileName); return -1; } pParam->data = data; @@ -450,18 +463,16 @@ static int32_t sendSlowLog(int64_t clusterId, char* data, TdFilePtr pFile, int64 return sendReport(pTransporter, epSet, data, MONITOR_TYPE_SLOW_LOG, pParam); } -static void monitorSendSlowLogAtBeginning(int64_t clusterId, char* fileName, TdFilePtr pFile, int64_t offset, void* pTransporter, SEpSet *epSet){ - int64_t size = getFileSize(fileName); +static void monitorSendSlowLogAtBeginning(int64_t clusterId, char** fileName, TdFilePtr pFile, int64_t offset, void* pTransporter, SEpSet *epSet){ + int64_t size = getFileSize(*fileName); if(size <= offset){ - taosFtruncateFile(pFile, 0); - taosUnLockFile(pFile); - taosCloseFile(&pFile); - taosRemoveFile(fileName); - tscDebug("[monitor] monitorSendSlowLogAtBeginning delete file:%s", fileName); + processFileInTheEnd(pFile, *fileName); + tscDebug("[monitor] monitorSendSlowLogAtBeginning delete file:%s", *fileName); }else{ char* data = readFile(pFile, &offset, size); if(data != NULL){ - sendSlowLog(clusterId, data, pFile, offset, SLOW_LOG_READ_BEGINNIG, taosStrdup(fileName), pTransporter, epSet); + sendSlowLog(clusterId, data, pFile, offset, SLOW_LOG_READ_BEGINNIG, *fileName, pTransporter, epSet); + *fileName = NULL; } tscDebug("[monitor] monitorSendSlowLogAtBeginning send slow log file:%p, data:%s", pFile, data); } @@ -509,10 +520,8 @@ static bool monitorSendSlowLogAtQuit(int64_t clusterId) { } int64_t size = getFileSize(pClient->path); if(size <= pClient->offset){ - taosFtruncateFile(pClient->pFile, 0); - taosUnLockFile(pClient->pFile); - taosCloseFile(&(pClient->pFile)); - taosRemoveFile(pClient->path); + processFileInTheEnd(pClient->pFile, pClient->path); + pClient->pFile = NULL; tscInfo("[monitor] monitorSendSlowLogAtQuit remove file:%s", pClient->path); if((--quitCnt) == 0){ return true; @@ -540,10 +549,8 @@ static void monitorSendAllSlowLogAtQuit(){ } int64_t size = getFileSize(pClient->path); if(size <= pClient->offset){ - taosFtruncateFile(pClient->pFile, 0); - taosUnLockFile(pClient->pFile); - taosCloseFile(&(pClient->pFile)); - taosRemoveFile(pClient->path); + processFileInTheEnd(pClient->pFile, pClient->path); + pClient->pFile = NULL; }else if(pClient->offset == 0){ int64_t* clusterId = (int64_t*)taosHashGetKey(pIter, NULL); SAppInstInfo* pInst = getAppInstByClusterId(*clusterId); @@ -661,7 +668,9 @@ static void monitorSendAllSlowLogFromTempDir(int64_t clusterId){ continue; } SEpSet ep = getEpSet_s(&pInst->mgmtEp); - monitorSendSlowLogAtBeginning(pInst->clusterId, filename, pFile, 0, pInst->pTransporter, &ep); + char *tmp = taosStrdup(filename); + monitorSendSlowLogAtBeginning(pInst->clusterId, &tmp, pFile, 0, pInst->pTransporter, &ep); + taosMemoryFree(tmp); } taosCloseDir(&pDir); @@ -727,7 +736,7 @@ static void* monitorThreadFunc(void *param){ SAppInstInfo* pInst = getAppInstByClusterId(slowLogData->clusterId); if(pInst != NULL) { SEpSet ep = getEpSet_s(&pInst->mgmtEp); - monitorSendSlowLogAtBeginning(slowLogData->clusterId, slowLogData->fileName, slowLogData->pFile, slowLogData->offset, pInst->pTransporter, &ep); + monitorSendSlowLogAtBeginning(slowLogData->clusterId, &(slowLogData->fileName), slowLogData->pFile, slowLogData->offset, pInst->pTransporter, &ep); } }else{ monitorSendAllSlowLogFromTempDir(slowLogData->clusterId); From 8db6a395aa7fcbd7b350a10a09848fffeed1b408 Mon Sep 17 00:00:00 2001 From: wangmm0220 Date: Wed, 3 Jul 2024 19:04:16 +0800 Subject: [PATCH 25/27] fix:[TD-30879]check if hash is freed before inc counter --- source/client/src/clientMonitor.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/source/client/src/clientMonitor.c b/source/client/src/clientMonitor.c index a102995c0d..479ea76fe3 100644 --- a/source/client/src/clientMonitor.c +++ b/source/client/src/clientMonitor.c @@ -307,6 +307,11 @@ end: void monitorCounterInc(int64_t clusterId, const char* counterName, const char** label_values) { taosWLockLatch(&monitorLock); + if (atomic_load_32(&monitorFlag) == 1) { + taosRUnLockLatch(&monitorLock); + return; + } + MonitorClient** ppMonitor = (MonitorClient**)taosHashGet(monitorCounterHash, &clusterId, LONG_BYTES); if (ppMonitor == NULL || *ppMonitor == NULL) { tscError("monitorCounterInc not found pMonitor %"PRId64, clusterId); From 522deafed5403551c488caaac274db0cf45fb076 Mon Sep 17 00:00:00 2001 From: wangmm0220 Date: Wed, 3 Jul 2024 22:11:25 +0800 Subject: [PATCH 26/27] fix:[TS-4921]refactor reporting logic for slow log --- source/client/src/clientEnv.c | 5 ++++- source/client/src/clientHb.c | 1 - source/client/src/clientMsgHandler.c | 4 +++- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/source/client/src/clientEnv.c b/source/client/src/clientEnv.c index 6689334c1e..3a821768f8 100644 --- a/source/client/src/clientEnv.c +++ b/source/client/src/clientEnv.c @@ -157,7 +157,10 @@ static void generateWriteSlowLog(STscObj *pTscObj, SRequestObj *pRequest, int32_ } char* value = cJSON_PrintUnformatted(json); - MonitorSlowLogData data = {.clusterId = pTscObj->pAppInfo->clusterId, .type = SLOW_LOG_WRITE, .data = value}; + MonitorSlowLogData data = {0}; + data.clusterId = pTscObj->pAppInfo->clusterId; + data.type = SLOW_LOG_WRITE; + data.data = value; if(monitorPutData2MonitorQueue(data) < 0){ taosMemoryFree(value); } diff --git a/source/client/src/clientHb.c b/source/client/src/clientHb.c index 07a9e0c463..81c9e1252f 100644 --- a/source/client/src/clientHb.c +++ b/source/client/src/clientHb.c @@ -547,7 +547,6 @@ static int32_t hbAsyncCallBack(void *param, SDataBuf *pMsg, int32_t code) { } SAppInstInfo *pInst = pAppHbMgr->pAppInstInfo; - int32_t oldInterval = pInst->monitorParas.tsMonitorInterval; pInst->monitorParas = pRsp.monitorParas; tscDebug("[monitor] paras from hb, clusterId:%" PRIx64 " monitorParas threshold:%d scope:%d", pInst->clusterId, pRsp.monitorParas.tsSlowLogThreshold, pRsp.monitorParas.tsSlowLogScope); diff --git a/source/client/src/clientMsgHandler.c b/source/client/src/clientMsgHandler.c index 43600f60e1..e5baa7137e 100644 --- a/source/client/src/clientMsgHandler.c +++ b/source/client/src/clientMsgHandler.c @@ -155,7 +155,9 @@ int32_t processConnectRsp(void* param, SDataBuf* pMsg, int32_t code) { if(taosHashPut(appInfo.pInstMapByClusterId, &connectRsp.clusterId, LONG_BYTES, &pTscObj->pAppInfo, POINTER_BYTES) != 0){ tscError("failed to put appInfo into appInfo.pInstMapByClusterId"); } - MonitorSlowLogData data ={.clusterId = pTscObj->pAppInfo->clusterId, .type = SLOW_LOG_READ_BEGINNIG, .pFile = NULL, .data = NULL}; + MonitorSlowLogData data = {0}; + data.clusterId = pTscObj->pAppInfo->clusterId; + data.type = SLOW_LOG_READ_BEGINNIG; monitorPutData2MonitorQueue(data); monitorClientSlowQueryInit(connectRsp.clusterId); monitorClientSQLReqInit(connectRsp.clusterId); From 6c3410d064f75bf8c42f26ff81d5db7ca8ca770a Mon Sep 17 00:00:00 2001 From: wangmm0220 Date: Thu, 4 Jul 2024 10:02:52 +0800 Subject: [PATCH 27/27] fix:[TS-4921]set monitor paras if hb callback success --- source/client/src/clientHb.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/source/client/src/clientHb.c b/source/client/src/clientHb.c index 81c9e1252f..c5dbfd70c7 100644 --- a/source/client/src/clientHb.c +++ b/source/client/src/clientHb.c @@ -547,10 +547,6 @@ static int32_t hbAsyncCallBack(void *param, SDataBuf *pMsg, int32_t code) { } SAppInstInfo *pInst = pAppHbMgr->pAppInstInfo; - pInst->monitorParas = pRsp.monitorParas; - tscDebug("[monitor] paras from hb, clusterId:%" PRIx64 " monitorParas threshold:%d scope:%d", - pInst->clusterId, pRsp.monitorParas.tsSlowLogThreshold, pRsp.monitorParas.tsSlowLogScope); - if (code != 0) { pInst->onlineDnodes = pInst->totalDnodes ? 0 : -1; tscDebug("hb rsp error %s, update server status %d/%d", tstrerror(code), pInst->onlineDnodes, pInst->totalDnodes); @@ -561,6 +557,10 @@ static int32_t hbAsyncCallBack(void *param, SDataBuf *pMsg, int32_t code) { return -1; } + pInst->monitorParas = pRsp.monitorParas; + tscDebug("[monitor] paras from hb, clusterId:%" PRIx64 " monitorParas threshold:%d scope:%d", + pInst->clusterId, pRsp.monitorParas.tsSlowLogThreshold, pRsp.monitorParas.tsSlowLogScope); + if (rspNum) { tscDebug("hb got %d rsp, %d empty rsp received before", rspNum, atomic_val_compare_exchange_32(&emptyRspNum, emptyRspNum, 0));