From 64e7c4c84266ac4ef74353eff09863cb66767242 Mon Sep 17 00:00:00 2001 From: wangmm0220 Date: Thu, 4 Jul 2024 14:56:56 +0800 Subject: [PATCH 1/5] fix:[TS-4921] send data to queue error if monitor thread starts later or failed --- include/libs/monitor/clientMonitor.h | 2 +- source/client/src/clientEnv.c | 6 ++- source/client/src/clientMonitor.c | 64 ++++++++++++++++------------ source/client/src/clientMsgHandler.c | 13 +++--- 4 files changed, 48 insertions(+), 37 deletions(-) diff --git a/include/libs/monitor/clientMonitor.h b/include/libs/monitor/clientMonitor.h index 4c7ab6f65a..1e6db8c00a 100644 --- a/include/libs/monitor/clientMonitor.h +++ b/include/libs/monitor/clientMonitor.h @@ -65,7 +65,7 @@ typedef struct { } MonitorSlowLogData; void monitorClose(); -void monitorInit(); +int32_t monitorInit(); void monitorClientSQLReqInit(int64_t clusterKey); void monitorClientSlowQueryInit(int64_t clusterId); diff --git a/source/client/src/clientEnv.c b/source/client/src/clientEnv.c index 3a821768f8..b227a6bd96 100644 --- a/source/client/src/clientEnv.c +++ b/source/client/src/clientEnv.c @@ -867,7 +867,10 @@ void taos_init_imp(void) { tscError("failed to init conv"); return; } - + if (monitorInit() != 0){ + tscError("failed to init monitor"); + return; + } rpcInit(); SCatalogCfg cfg = {.maxDBCacheNum = 100, .maxTblCacheNum = 100}; @@ -891,7 +894,6 @@ void taos_init_imp(void) { taosThreadMutexInit(&appInfo.mutex, NULL); tscCrashReportInit(); - monitorInit(); tscDebug("client is initialized successfully"); } diff --git a/source/client/src/clientMonitor.c b/source/client/src/clientMonitor.c index 479ea76fe3..c3345bf58d 100644 --- a/source/client/src/clientMonitor.c +++ b/source/client/src/clientMonitor.c @@ -18,6 +18,7 @@ int32_t quitCnt = 0; tsem2_t monitorSem; STaosQueue* monitorQueue; SHashObj* monitorSlowLogHash; +char tmpSlowLogPath[PATH_MAX] = {0}; static int32_t getSlowLogTmpDir(char* tmpPath, int32_t size){ if (tsTempDir == NULL) { @@ -690,28 +691,6 @@ static void* monitorThreadFunc(void *param){ } #endif - char tmpPath[PATH_MAX] = {0}; - if (getSlowLogTmpDir(tmpPath, sizeof(tmpPath)) < 0){ - return NULL; - } - - if (taosMulModeMkDir(tmpPath, 0777, true) != 0) { - terrno = TAOS_SYSTEM_ERROR(errno); - printf("failed to create dir:%s since %s", tmpPath, terrstr()); - return NULL; - } - - if (tsem2_init(&monitorSem, 0, 0) != 0) { - tscError("sem init error since %s", terrstr()); - return NULL; - } - - monitorQueue = taosOpenQueue(); - if(monitorQueue == NULL){ - tscError("open queue error since %s", terrstr()); - return NULL; - } - if (-1 != atomic_val_compare_exchange_32(&slowLogFlag, -1, 0)) { return NULL; } @@ -747,7 +726,7 @@ static void* monitorThreadFunc(void *param){ monitorSendAllSlowLogFromTempDir(slowLogData->clusterId); } } else if(slowLogData->type == SLOW_LOG_WRITE){ - monitorWriteSlowLog2File(slowLogData, tmpPath); + monitorWriteSlowLog2File(slowLogData, tmpSlowLogPath); } else if(slowLogData->type == SLOW_LOG_READ_RUNNING){ monitorSendSlowLogAtRunning(slowLogData->clusterId); } else if(slowLogData->type == SLOW_LOG_READ_QUIT){ @@ -799,27 +778,59 @@ static void tscMonitorStop() { } } -void monitorInit() { +int32_t monitorInit() { tscInfo("[monitor] tscMonitor init"); monitorCounterHash = (SHashObj*)taosHashInit(64, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BIGINT), false, HASH_ENTRY_LOCK); if (monitorCounterHash == NULL) { tscError("failed to create monitorCounterHash"); + terrno = TSDB_CODE_OUT_OF_MEMORY; + return -1; } taosHashSetFreeFp(monitorCounterHash, destroyMonitorClient); monitorSlowLogHash = (SHashObj*)taosHashInit(64, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BIGINT), false, HASH_ENTRY_LOCK); if (monitorSlowLogHash == NULL) { tscError("failed to create monitorSlowLogHash"); + terrno = TSDB_CODE_OUT_OF_MEMORY; + return -1; } taosHashSetFreeFp(monitorSlowLogHash, destroySlowLogClient); monitorTimer = taosTmrInit(0, 0, 0, "MONITOR"); if (monitorTimer == NULL) { tscError("failed to create monitor timer"); + terrno = TSDB_CODE_OUT_OF_MEMORY; + return -1; + } + + if (getSlowLogTmpDir(tmpSlowLogPath, sizeof(tmpSlowLogPath)) < 0){ + terrno = TSDB_CODE_TSC_INTERNAL_ERROR; + return -1; + } + + if (taosMulModeMkDir(tmpSlowLogPath, 0777, true) != 0) { + terrno = TAOS_SYSTEM_ERROR(errno); + tscError("failed to create dir:%s since %s", tmpSlowLogPath, terrstr()); + return -1; + } + + if (tsem2_init(&monitorSem, 0, 0) != 0) { + terrno = TAOS_SYSTEM_ERROR(errno); + tscError("sem init error since %s", terrstr()); + return -1; + } + + monitorQueue = taosOpenQueue(); + if(monitorQueue == NULL){ + tscError("open queue error since %s", terrstr()); + return -1; } taosInitRWLatch(&monitorLock); - tscMonitortInit(); + if (tscMonitortInit() != 0){ + return -1; + } + return 0; } void monitorClose() { @@ -845,9 +856,6 @@ int32_t monitorPutData2MonitorQueue(MonitorSlowLogData data){ } *slowLogData = data; tscDebug("[monitor] write slow log to queue, clusterId:%"PRIx64 " type:%d", slowLogData->clusterId, slowLogData->type); - while (atomic_load_32(&slowLogFlag) == -1) { - taosMsleep(5); - } if (taosWriteQitem(monitorQueue, slowLogData) == 0){ tsem2_post(&monitorSem); }else{ diff --git a/source/client/src/clientMsgHandler.c b/source/client/src/clientMsgHandler.c index e5baa7137e..d587deffc5 100644 --- a/source/client/src/clientMsgHandler.c +++ b/source/client/src/clientMsgHandler.c @@ -154,13 +154,14 @@ int32_t processConnectRsp(void* param, SDataBuf* pMsg, int32_t code) { if(taosHashGet(appInfo.pInstMapByClusterId, &connectRsp.clusterId, LONG_BYTES) == NULL){ if(taosHashPut(appInfo.pInstMapByClusterId, &connectRsp.clusterId, LONG_BYTES, &pTscObj->pAppInfo, POINTER_BYTES) != 0){ tscError("failed to put appInfo into appInfo.pInstMapByClusterId"); + }else{ + MonitorSlowLogData data = {0}; + data.clusterId = pTscObj->pAppInfo->clusterId; + data.type = SLOW_LOG_READ_BEGINNIG; + monitorPutData2MonitorQueue(data); + monitorClientSlowQueryInit(connectRsp.clusterId); + monitorClientSQLReqInit(connectRsp.clusterId); } - MonitorSlowLogData data = {0}; - data.clusterId = pTscObj->pAppInfo->clusterId; - data.type = SLOW_LOG_READ_BEGINNIG; - monitorPutData2MonitorQueue(data); - monitorClientSlowQueryInit(connectRsp.clusterId); - monitorClientSQLReqInit(connectRsp.clusterId); } taosThreadMutexLock(&clientHbMgr.lock); From 8087dbe16d8db8beaa76c134312d45793cc7d9f2 Mon Sep 17 00:00:00 2001 From: wangmm0220 Date: Fri, 5 Jul 2024 09:41:34 +0800 Subject: [PATCH 2/5] fix[TS-4921] set flag -1 if init monitor failed --- source/client/src/clientEnv.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/source/client/src/clientEnv.c b/source/client/src/clientEnv.c index b227a6bd96..ecfa1e3392 100644 --- a/source/client/src/clientEnv.c +++ b/source/client/src/clientEnv.c @@ -864,10 +864,12 @@ void taos_init_imp(void) { initQueryModuleMsgHandle(); if (taosConvInit() != 0) { + tscInitRes = -1; tscError("failed to init conv"); return; } if (monitorInit() != 0){ + tscInitRes = -1; tscError("failed to init monitor"); return; } From 20c8e3168c8e7924e7abb41693bc2cdcdc64bc6a Mon Sep 17 00:00:00 2001 From: wangmm0220 Date: Fri, 5 Jul 2024 09:51:28 +0800 Subject: [PATCH 3/5] fix[TD-30895] heap use after free --- source/client/src/clientMonitor.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/source/client/src/clientMonitor.c b/source/client/src/clientMonitor.c index c3345bf58d..032b7cdeea 100644 --- a/source/client/src/clientMonitor.c +++ b/source/client/src/clientMonitor.c @@ -480,7 +480,6 @@ static void monitorSendSlowLogAtBeginning(int64_t clusterId, char** fileName, Td sendSlowLog(clusterId, data, pFile, offset, SLOW_LOG_READ_BEGINNIG, *fileName, pTransporter, epSet); *fileName = NULL; } - tscDebug("[monitor] monitorSendSlowLogAtBeginning send slow log file:%p, data:%s", pFile, data); } } @@ -511,7 +510,6 @@ static void monitorSendSlowLogAtRunning(int64_t clusterId){ if(data != NULL){ sendSlowLog(clusterId, data, pClient->pFile, pClient->offset, SLOW_LOG_READ_RUNNING, NULL, pInst->pTransporter, &ep); } - tscDebug("[monitor] monitorSendSlowLogAtRunning send slow log:%s", data); } } @@ -542,7 +540,6 @@ static bool monitorSendSlowLogAtQuit(int64_t clusterId) { if(data != NULL){ sendSlowLog(clusterId, data, pClient->pFile, pClient->offset, SLOW_LOG_READ_QUIT, NULL, pInst->pTransporter, &ep); } - tscInfo("[monitor] monitorSendSlowLogAtQuit send slow log:%s", data); } return false; } @@ -568,7 +565,6 @@ static void monitorSendAllSlowLogAtQuit(){ if(data != NULL && sendSlowLog(*clusterId, data, NULL, pClient->offset, SLOW_LOG_READ_QUIT, NULL, pInst->pTransporter, &ep) == 0){ quitCnt ++; } - tscInfo("[monitor] monitorSendAllSlowLogAtQuit send slow log :%s", data); } } } @@ -619,7 +615,6 @@ static void monitorSendAllSlowLog(){ if(data != NULL){ sendSlowLog(*clusterId, data, NULL, pClient->offset, SLOW_LOG_READ_RUNNING, NULL, pInst->pTransporter, &ep); } - tscDebug("[monitor] monitorSendAllSlowLog send slow log :%s", data); } } } From 6c5acdfc4bb5c247fb5c85347e356b3cd9b43387 Mon Sep 17 00:00:00 2001 From: wangmm0220 Date: Fri, 5 Jul 2024 14:03:12 +0800 Subject: [PATCH 4/5] fix:[TS-4921]refactor code --- include/libs/monitor/clientMonitor.h | 7 +++ source/client/src/clientMonitor.c | 80 ++++++++++++---------------- 2 files changed, 40 insertions(+), 47 deletions(-) diff --git a/include/libs/monitor/clientMonitor.h b/include/libs/monitor/clientMonitor.h index 1e6db8c00a..3bb325921e 100644 --- a/include/libs/monitor/clientMonitor.h +++ b/include/libs/monitor/clientMonitor.h @@ -38,6 +38,13 @@ typedef enum { SLOW_LOG_READ_QUIT = 3, } SLOW_LOG_QUEUE_TYPE; +char* queueTypeStr[] = { + "SLOW_LOG_WRITE", + "SLOW_LOG_READ_RUNNING", + "SLOW_LOG_READ_BEGINNIG", + "SLOW_LOG_READ_QUIT" +}; + #define SLOW_LOG_SEND_SIZE_MAX 1024*1024 typedef struct { diff --git a/source/client/src/clientMonitor.c b/source/client/src/clientMonitor.c index 032b7cdeea..d1a9897caa 100644 --- a/source/client/src/clientMonitor.c +++ b/source/client/src/clientMonitor.c @@ -454,6 +454,10 @@ static int64_t getFileSize(char* path){ } static int32_t sendSlowLog(int64_t clusterId, char* data, TdFilePtr pFile, int64_t offset, SLOW_LOG_QUEUE_TYPE type, char* fileName, void* pTransporter, SEpSet *epSet){ + if (data == NULL){ + taosMemoryFree(fileName); + return -1; + } MonitorSlowLogData* pParam = taosMemoryMalloc(sizeof(MonitorSlowLogData)); if(pParam == NULL){ taosMemoryFree(data); @@ -469,17 +473,26 @@ static int32_t sendSlowLog(int64_t clusterId, char* data, TdFilePtr pFile, int64 return sendReport(pTransporter, epSet, data, MONITOR_TYPE_SLOW_LOG, pParam); } -static void monitorSendSlowLogAtBeginning(int64_t clusterId, char** fileName, TdFilePtr pFile, int64_t offset, void* pTransporter, SEpSet *epSet){ +static int32_t monitorReadSend(int64_t clusterId, TdFilePtr pFile, int64_t* offset, int64_t size, SLOW_LOG_QUEUE_TYPE type, char* fileName){ + SAppInstInfo* pInst = getAppInstByClusterId(clusterId); + if(pInst == NULL){ + tscError("failed to get app instance by clusterId:%" PRId64, clusterId); + return -1; + } + SEpSet ep = getEpSet_s(&pInst->mgmtEp); + char* data = readFile(pFile, offset, size); + return sendSlowLog(clusterId, data, (type == SLOW_LOG_READ_BEGINNIG ? pFile : NULL), *offset, type, fileName, pInst->pTransporter, &ep); +} + +static void monitorSendSlowLogAtBeginning(int64_t clusterId, char** fileName, TdFilePtr pFile, int64_t offset){ int64_t size = getFileSize(*fileName); if(size <= offset){ processFileInTheEnd(pFile, *fileName); tscDebug("[monitor] monitorSendSlowLogAtBeginning delete file:%s", *fileName); }else{ - char* data = readFile(pFile, &offset, size); - if(data != NULL){ - sendSlowLog(clusterId, data, pFile, offset, SLOW_LOG_READ_BEGINNIG, *fileName, pTransporter, epSet); - *fileName = NULL; - } + int32_t code = monitorReadSend(clusterId, pFile, &offset, size, SLOW_LOG_READ_BEGINNIG, *fileName); + tscDebug("[monitor] monitorSendSlowLogAtBeginning send slow log clusterId:%"PRId64",ret:%d", clusterId, code); + *fileName = NULL; } } @@ -500,16 +513,8 @@ static void monitorSendSlowLogAtRunning(int64_t clusterId){ tscDebug("[monitor] monitorSendSlowLogAtRunning truncate file to 0 file:%p", pClient->pFile); pClient->offset = 0; }else{ - SAppInstInfo* pInst = getAppInstByClusterId(clusterId); - if(pInst == NULL){ - tscError("failed to get app instance by clusterId:%" PRId64, clusterId); - return; - } - SEpSet ep = getEpSet_s(&pInst->mgmtEp); - char* data = readFile(pClient->pFile, &pClient->offset, size); - if(data != NULL){ - sendSlowLog(clusterId, data, pClient->pFile, pClient->offset, SLOW_LOG_READ_RUNNING, NULL, pInst->pTransporter, &ep); - } + int32_t code = monitorReadSend(clusterId, pClient->pFile, &pClient->offset, size, SLOW_LOG_READ_RUNNING, NULL); + tscDebug("[monitor] monitorSendSlowLogAtRunning send slow log clusterId:%"PRId64",ret:%d", clusterId, code); } } @@ -531,15 +536,8 @@ static bool monitorSendSlowLogAtQuit(int64_t clusterId) { return true; } }else{ - SAppInstInfo* pInst = getAppInstByClusterId(clusterId); - if(pInst == NULL) { - return true; - } - SEpSet ep = getEpSet_s(&pInst->mgmtEp); - char* data = readFile(pClient->pFile, &pClient->offset, size); - if(data != NULL){ - sendSlowLog(clusterId, data, pClient->pFile, pClient->offset, SLOW_LOG_READ_QUIT, NULL, pInst->pTransporter, &ep); - } + int32_t code = monitorReadSend(clusterId, pClient->pFile, &pClient->offset, size, SLOW_LOG_READ_QUIT, NULL); + tscDebug("[monitor] monitorSendSlowLogAtQuit send slow log clusterId:%"PRId64",ret:%d", clusterId, code); } return false; } @@ -556,13 +554,9 @@ static void monitorSendAllSlowLogAtQuit(){ pClient->pFile = NULL; }else if(pClient->offset == 0){ int64_t* clusterId = (int64_t*)taosHashGetKey(pIter, NULL); - SAppInstInfo* pInst = getAppInstByClusterId(*clusterId); - if(pInst == NULL) { - continue; - } - SEpSet ep = getEpSet_s(&pInst->mgmtEp); - char* data = readFile(pClient->pFile, &pClient->offset, size); - if(data != NULL && sendSlowLog(*clusterId, data, NULL, pClient->offset, SLOW_LOG_READ_QUIT, NULL, pInst->pTransporter, &ep) == 0){ + int32_t code = monitorReadSend(*clusterId, pClient->pFile, &pClient->offset, size, SLOW_LOG_READ_QUIT, NULL); + tscDebug("[monitor] monitorSendAllSlowLogAtQuit send slow log clusterId:%"PRId64",ret:%d", *clusterId, code); + if (code == 0){ quitCnt ++; } } @@ -610,11 +604,8 @@ static void monitorSendAllSlowLog(){ } continue; } - SEpSet ep = getEpSet_s(&pInst->mgmtEp); - char* data = readFile(pClient->pFile, &pClient->offset, size); - if(data != NULL){ - sendSlowLog(*clusterId, data, NULL, pClient->offset, SLOW_LOG_READ_RUNNING, NULL, pInst->pTransporter, &ep); - } + int32_t code = monitorReadSend(*clusterId, pClient->pFile, &pClient->offset, size, SLOW_LOG_READ_RUNNING, NULL); + tscDebug("[monitor] monitorSendAllSlowLog send slow log clusterId:%"PRId64",ret:%d", *clusterId, code); } } } @@ -627,7 +618,7 @@ static void monitorSendAllSlowLogFromTempDir(int64_t clusterId){ return; } char namePrefix[PATH_MAX] = {0}; - if (snprintf(namePrefix, sizeof(namePrefix), "%s%"PRIx64, TD_TMP_FILE_PREFIX, pInst->clusterId) < 0) { + if (snprintf(namePrefix, sizeof(namePrefix), "%s%"PRIx64, TD_TMP_FILE_PREFIX, clusterId) < 0) { tscError("failed to generate slow log file name prefix"); return; } @@ -652,7 +643,7 @@ static void monitorSendAllSlowLogFromTempDir(int64_t clusterId){ if (strcmp(name, ".") == 0 || strcmp(name, "..") == 0 || strstr(name, namePrefix) == NULL) { - tscInfo("skip file:%s, for cluster id:%"PRIx64, name, pInst->clusterId); + tscInfo("skip file:%s, for cluster id:%"PRIx64, name, clusterId); continue; } @@ -668,9 +659,8 @@ static void monitorSendAllSlowLogFromTempDir(int64_t clusterId){ taosCloseFile(&pFile); continue; } - SEpSet ep = getEpSet_s(&pInst->mgmtEp); char *tmp = taosStrdup(filename); - monitorSendSlowLogAtBeginning(pInst->clusterId, &tmp, pFile, 0, pInst->pTransporter, &ep); + monitorSendSlowLogAtBeginning(clusterId, &tmp, pFile, 0); taosMemoryFree(tmp); } @@ -712,11 +702,7 @@ static void* monitorThreadFunc(void *param){ if (slowLogData != NULL) { if (slowLogData->type == SLOW_LOG_READ_BEGINNIG){ if(slowLogData->pFile != NULL){ - SAppInstInfo* pInst = getAppInstByClusterId(slowLogData->clusterId); - if(pInst != NULL) { - SEpSet ep = getEpSet_s(&pInst->mgmtEp); - monitorSendSlowLogAtBeginning(slowLogData->clusterId, &(slowLogData->fileName), slowLogData->pFile, slowLogData->offset, pInst->pTransporter, &ep); - } + monitorSendSlowLogAtBeginning(slowLogData->clusterId, &(slowLogData->fileName), slowLogData->pFile, slowLogData->offset); }else{ monitorSendAllSlowLogFromTempDir(slowLogData->clusterId); } @@ -850,7 +836,7 @@ int32_t monitorPutData2MonitorQueue(MonitorSlowLogData data){ return -1; } *slowLogData = data; - tscDebug("[monitor] write slow log to queue, clusterId:%"PRIx64 " type:%d", slowLogData->clusterId, slowLogData->type); + tscDebug("[monitor] write slow log to queue, clusterId:%"PRIx64 " type:%s, data:%s", slowLogData->clusterId, queueTypeStr[slowLogData->type], slowLogData->data); if (taosWriteQitem(monitorQueue, slowLogData) == 0){ tsem2_post(&monitorSem); }else{ From 9d42b31d4a017b9737da7a342e5b988765059a62 Mon Sep 17 00:00:00 2001 From: wangmm0220 Date: Fri, 5 Jul 2024 16:12:13 +0800 Subject: [PATCH 5/5] fix:[TS-4921]refactor code --- include/libs/monitor/clientMonitor.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/libs/monitor/clientMonitor.h b/include/libs/monitor/clientMonitor.h index 3bb325921e..0085173ecd 100644 --- a/include/libs/monitor/clientMonitor.h +++ b/include/libs/monitor/clientMonitor.h @@ -38,7 +38,7 @@ typedef enum { SLOW_LOG_READ_QUIT = 3, } SLOW_LOG_QUEUE_TYPE; -char* queueTypeStr[] = { +static char* queueTypeStr[] = { "SLOW_LOG_WRITE", "SLOW_LOG_READ_RUNNING", "SLOW_LOG_READ_BEGINNIG",