From ddead1417ed3446ad39086eea68471416051e2bf Mon Sep 17 00:00:00 2001 From: Alex Duan <417921451@qq.com> Date: Wed, 8 Jan 2025 18:03:04 +0800 Subject: [PATCH 01/10] fix: update version to taospy==2.7.21 taos-ws-py=0.3.8 --- Jenkinsfile2 | 4 ++-- tests/ci/Dockerfile | 2 +- tests/docs-examples-test/python.sh | 2 +- tests/parallel_test/run_case.sh | 4 ++-- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/Jenkinsfile2 b/Jenkinsfile2 index 1b2f28908c..395beb72db 100644 --- a/Jenkinsfile2 +++ b/Jenkinsfile2 @@ -368,8 +368,8 @@ def pre_test_build_win() { ''' bat ''' cd %WIN_COMMUNITY_ROOT%/tests/ci - pip3 install taospy==2.7.16 - pip3 install taos-ws-py==0.3.5 + pip3 install taospy==2.7.21 + pip3 install taos-ws-py==0.3.8 xcopy /e/y/i/f %WIN_INTERNAL_ROOT%\\debug\\build\\lib\\taos.dll C:\\Windows\\System32 ''' return 1 diff --git a/tests/ci/Dockerfile b/tests/ci/Dockerfile index 1caa6fea9e..226b0193f6 100644 --- a/tests/ci/Dockerfile +++ b/tests/ci/Dockerfile @@ -7,7 +7,7 @@ RUN apt-get install -y locales psmisc sudo tree libgeos-dev libgflags2.2 libgfl RUN sed -i 's/# en_US.UTF-8/en_US.UTF-8/' /etc/locale.gen && locale-gen RUN pip3 config set global.index-url http://admin:123456@192.168.0.212:3141/admin/dev/+simple/ RUN pip3 config set global.trusted-host 192.168.0.212 -RUN pip3 install taospy==2.7.16 taos-ws-py==0.3.5 pandas psutil fabric2 requests faker simplejson toml pexpect tzlocal distro decorator loguru hyperloglog +RUN pip3 install taospy==2.7.21 taos-ws-py==0.3.8 pandas psutil fabric2 requests faker simplejson toml pexpect tzlocal distro decorator loguru hyperloglog ENV LANG=en_US.UTF-8 LANGUAGE=en_US.UTF-8 LC_ALL=en_US.UTF-8 RUN apt-key adv --keyserver keyserver.ubuntu.com --recv-keys E298A3A825C0D65DFD57CBB651716619E084DAB9 RUN add-apt-repository 'deb https://cloud.r-project.org/bin/linux/ubuntu focal-cran40/' diff --git a/tests/docs-examples-test/python.sh b/tests/docs-examples-test/python.sh index 536155437b..8e43f26d5c 100644 --- a/tests/docs-examples-test/python.sh +++ b/tests/docs-examples-test/python.sh @@ -130,7 +130,7 @@ pip3 install kafka-python python3 kafka_example_consumer.py # 21 -pip3 install taos-ws-py==0.3.5 +pip3 install taos-ws-py==0.3.8 python3 conn_websocket_pandas.py # 22 diff --git a/tests/parallel_test/run_case.sh b/tests/parallel_test/run_case.sh index 5dc1cef673..b72a7cacca 100755 --- a/tests/parallel_test/run_case.sh +++ b/tests/parallel_test/run_case.sh @@ -76,9 +76,9 @@ ulimit -c unlimited md5sum /usr/lib/libtaos.so.1 md5sum /home/TDinternal/debug/build/lib/libtaos.so -#get python connector and update: taospy 2.7.16 taos-ws-py 0.3.5 +#get python connector and update: taospy and taos-ws-py to latest pip3 install taospy==2.7.21 -pip3 install taos-ws-py==0.3.5 +pip3 install taos-ws-py==0.3.8 $TIMEOUT_CMD $cmd RET=$? echo "cmd exit code: $RET" From 893e7c1af45389190b9df2a02eff400e32c1bafe Mon Sep 17 00:00:00 2001 From: factosea <285808407@qq.com> Date: Thu, 9 Jan 2025 10:00:43 +0800 Subject: [PATCH 02/10] fix: write crash log in new thread --- include/util/tlog.h | 5 +- source/client/src/clientEnv.c | 28 ++++----- source/dnode/mgmt/exe/dmMain.c | 32 ++++------ source/dnode/mgmt/mgmt_dnode/src/dmWorker.c | 8 ++- source/util/src/tlog.c | 67 ++++++++++++++++++++- tools/shell/src/shellMain.c | 2 - 6 files changed, 101 insertions(+), 41 deletions(-) diff --git a/include/util/tlog.h b/include/util/tlog.h index d0e42e3660..acec5c511e 100644 --- a/include/util/tlog.h +++ b/include/util/tlog.h @@ -115,10 +115,13 @@ bool taosAssertRelease(bool condition); #endif #endif -void taosLogCrashInfo(char *nodeType, char *pMsg, int64_t msgLen, int signum, void *sigInfo); void taosReadCrashInfo(char *filepath, char **pMsg, int64_t *pMsgLen, TdFilePtr *pFd); void taosReleaseCrashLogFile(TdFilePtr pFile, bool truncateFile); +int32_t initCrashLogWriter(); +void checkAndPrepareCrashInfo(); +void writeCrashLogToFile(int signum, void *sigInfo, char *nodeType, int64_t clusterId, int64_t startTime); + // clang-format off #define uFatal(...) { if (uDebugFlag & DEBUG_FATAL) { taosPrintLog("UTL FATAL", DEBUG_FATAL, tsLogEmbedded ? 255 : uDebugFlag, __VA_ARGS__); }} #define uError(...) { if (uDebugFlag & DEBUG_ERROR) { taosPrintLog("UTL ERROR ", DEBUG_ERROR, tsLogEmbedded ? 255 : uDebugFlag, __VA_ARGS__); }} diff --git a/source/client/src/clientEnv.c b/source/client/src/clientEnv.c index df93920303..41968d8155 100644 --- a/source/client/src/clientEnv.c +++ b/source/client/src/clientEnv.c @@ -43,7 +43,7 @@ #endif #ifndef CUS_PROMPT -#define CUS_PROMPT "tao" +#define CUS_PROMPT "taos" #endif #define TSC_VAR_NOT_RELEASE 1 @@ -815,6 +815,12 @@ static void *tscCrashReportThreadFp(void *param) { int32_t reportPeriodNum = 3600 * 1000 / sleepTime; int32_t loopTimes = reportPeriodNum; + code = initCrashLogWriter(); + if (code) { + tscError("failed to init crash log writer, code:%s", tstrerror(code)); + return NULL; + } + #ifdef WINDOWS if (taosCheckCurrentInDll()) { atexit(crashReportThreadFuncUnexpectedStopped); @@ -832,8 +838,10 @@ static void *tscCrashReportThreadFp(void *param) { } while (1) { + checkAndPrepareCrashInfo(); if (clientStop > 0) break; if (loopTimes++ < reportPeriodNum) { + if(loopTimes < 0) loopTimes = reportPeriodNum; taosMsleep(sleepTime); continue; } @@ -921,21 +929,7 @@ void tscStopCrashReport() { } void tscWriteCrashInfo(int signum, void *sigInfo, void *context) { - char *pMsg = NULL; - const char *flags = "UTL FATAL "; - ELogLevel level = DEBUG_FATAL; - int32_t dflag = 255; - int64_t msgLen = -1; - - if (tsEnableCrashReport) { - if (taosGenCrashJsonMsg(signum, &pMsg, lastClusterId, appInfo.startTime)) { - taosPrintLog(flags, level, dflag, "failed to generate crash json msg"); - } else { - msgLen = strlen(pMsg); - } - } - - taosLogCrashInfo("taos", pMsg, msgLen, signum, sigInfo); + writeCrashLogToFile(signum, sigInfo, CUS_PROMPT, lastClusterId, appInfo.startTime); } void taos_init_imp(void) { @@ -969,7 +963,7 @@ void taos_init_imp(void) { } taosHashSetFreeFp(appInfo.pInstMap, destroyAppInst); - const char *logName = CUS_PROMPT "slog"; + const char *logName = CUS_PROMPT "log"; ENV_ERR_RET(taosInitLogOutput(&logName), "failed to init log output"); if (taosCreateLog(logName, 10, configDir, NULL, NULL, NULL, NULL, 1) != 0) { (void)printf(" WARING: Create %s failed:%s. configDir=%s\n", logName, strerror(errno), configDir); diff --git a/source/dnode/mgmt/exe/dmMain.c b/source/dnode/mgmt/exe/dmMain.c index ddaf1d3c13..b5eeb78b5e 100644 --- a/source/dnode/mgmt/exe/dmMain.c +++ b/source/dnode/mgmt/exe/dmMain.c @@ -131,25 +131,7 @@ void dmLogCrash(int signum, void *sigInfo, void *context) { if (taosIgnSignal(SIGSEGV) != 0) { dWarn("failed to ignore signal SIGABRT"); } - - char *pMsg = NULL; - const char *flags = "UTL FATAL "; - ELogLevel level = DEBUG_FATAL; - int32_t dflag = 255; - int64_t msgLen = -1; - - if (tsEnableCrashReport) { - if (taosGenCrashJsonMsg(signum, &pMsg, dmGetClusterId(), global.startTime)) { - taosPrintLog(flags, level, dflag, "failed to generate crash json msg"); - goto _return; - } else { - msgLen = strlen(pMsg); - } - } - -_return: - - taosLogCrashInfo(CUS_PROMPT "d", pMsg, msgLen, signum, sigInfo); + writeCrashLogToFile(signum, sigInfo, CUS_PROMPT "d", dmGetClusterId(), global.startTime); #ifdef _TD_DARWIN_64 exit(signum); @@ -177,6 +159,15 @@ static void dmSetSignalHandle() { if (taosSetSignal(SIGBREAK, dmStopDnode) != 0) { dWarn("failed to set signal SIGUSR1"); } + if (taosSetSignal(SIGABRT, dmLogCrash) != 0) { + dWarn("failed to set signal SIGUSR1"); + } + if (taosSetSignal(SIGFPE, dmLogCrash) != 0) { + dWarn("failed to set signal SIGUSR1"); + } + if (taosSetSignal(SIGSEGV, dmLogCrash) != 0) { + dWarn("failed to set signal SIGUSR1"); + } #ifndef WINDOWS if (taosSetSignal(SIGTSTP, dmStopDnode) != 0) { dWarn("failed to set signal SIGUSR1"); @@ -184,6 +175,9 @@ static void dmSetSignalHandle() { if (taosSetSignal(SIGQUIT, dmStopDnode) != 0) { dWarn("failed to set signal SIGUSR1"); } + if (taosSetSignal(SIGBUS, dmLogCrash) != 0) { + dWarn("failed to set signal SIGUSR1"); + } #endif } diff --git a/source/dnode/mgmt/mgmt_dnode/src/dmWorker.c b/source/dnode/mgmt/mgmt_dnode/src/dmWorker.c index ef4e76031d..9a1287adee 100644 --- a/source/dnode/mgmt/mgmt_dnode/src/dmWorker.c +++ b/source/dnode/mgmt/mgmt_dnode/src/dmWorker.c @@ -274,14 +274,20 @@ static void *dmCrashReportThreadFp(void *param) { dError("failed to init telemetry since %s", tstrerror(code)); return NULL; } + code = initCrashLogWriter(); + if (code != 0) { + dError("failed to init crash log writer since %s", tstrerror(code)); + return NULL; + } while (1) { + checkAndPrepareCrashInfo(); if (pMgmt->pData->dropped || pMgmt->pData->stopped) break; if (loopTimes++ < reportPeriodNum) { taosMsleep(sleepTime); + if(loopTimes < 0) loopTimes = reportPeriodNum; continue; } - taosReadCrashInfo(filepath, &pMsg, &msgLen, &pFile); if (pMsg && msgLen > 0) { if (taosSendTelemReport(&mgt, tsSvrCrashReportUri, tsTelemPort, pMsg, msgLen, HTTP_FLAT) != 0) { diff --git a/source/util/src/tlog.c b/source/util/src/tlog.c index 90753ae7e8..5251076fa5 100644 --- a/source/util/src/tlog.c +++ b/source/util/src/tlog.c @@ -21,6 +21,7 @@ #include "tjson.h" #include "ttime.h" #include "tutil.h" +#include "tcommon.h" #define LOG_MAX_LINE_SIZE (10024) #define LOG_MAX_LINE_BUFFER_SIZE (LOG_MAX_LINE_SIZE + 3) @@ -1264,6 +1265,8 @@ _return: taosPrintLog(flags, level, dflag, "crash signal is %d", signum); +// print the stack trace +#if 0 #ifdef _TD_DARWIN_64 taosPrintTrace(flags, level, dflag, 4); #elif !defined(WINDOWS) @@ -1273,10 +1276,72 @@ _return: #else taosPrintTrace(flags, level, dflag, 8); #endif - +#endif taosMemoryFree(pMsg); } +typedef struct crashBasicInfo { + bool init; + bool isCrash; + int64_t clusterId; + int64_t startTime; + char *nodeType; + int signum; + void *sigInfo; + tsem_t sem; +} crashBasicInfo; + +crashBasicInfo gCrashBasicInfo = {0}; +static void writeCrashLogToFileInNewThead() { + if (!gCrashBasicInfo.init || !gCrashBasicInfo.isCrash) return; + char *pMsg = NULL; + const char *flags = "UTL FATAL "; + ELogLevel level = DEBUG_FATAL; + int32_t dflag = 255; + int64_t msgLen = -1; + + if (tsEnableCrashReport) { + if (taosGenCrashJsonMsg(gCrashBasicInfo.signum, &pMsg, gCrashBasicInfo.clusterId, gCrashBasicInfo.startTime)) { + taosPrintLog(flags, level, dflag, "failed to generate crash json msg"); + } else { + msgLen = strlen(pMsg); + } + } + taosLogCrashInfo(gCrashBasicInfo.nodeType, pMsg, msgLen, gCrashBasicInfo.signum, gCrashBasicInfo.sigInfo); + gCrashBasicInfo.isCrash = false; + tsem_post(&gCrashBasicInfo.sem); +} + +void checkAndPrepareCrashInfo() { + return writeCrashLogToFileInNewThead(); +} + +int32_t initCrashLogWriter() { + gCrashBasicInfo.init = true; + gCrashBasicInfo.isCrash = false; + int32_t code = tsem_init(&gCrashBasicInfo.sem, 0, 0); + uInfo("crashLogWriter init finished."); + return code; +} + +void writeCrashLogToFile(int signum, void *sigInfo, char *nodeType, int64_t clusterId, int64_t startTime) { + if (!gCrashBasicInfo.init) { + uInfo("crashLogWriter has not init!"); + return; + } + uInfo("write crash log to file, signum:%d, nodeType:%s, clusterId:%" PRId64, signum, nodeType, clusterId); + gCrashBasicInfo.isCrash = true; + gCrashBasicInfo.clusterId = clusterId; + gCrashBasicInfo.startTime = startTime; + gCrashBasicInfo.nodeType = nodeType; + gCrashBasicInfo.signum = signum; + gCrashBasicInfo.sigInfo = sigInfo; + + tsem_wait(&gCrashBasicInfo.sem); + + uInfo("write crash log to file done, signum:%d, nodeType:%s, clusterId:%" PRId64, signum, nodeType, clusterId); +} + void taosReadCrashInfo(char *filepath, char **pMsg, int64_t *pMsgLen, TdFilePtr *pFd) { const char *flags = "UTL FATAL "; ELogLevel level = DEBUG_FATAL; diff --git a/tools/shell/src/shellMain.c b/tools/shell/src/shellMain.c index fc6ba0f7d8..1f6f8fe3df 100644 --- a/tools/shell/src/shellMain.c +++ b/tools/shell/src/shellMain.c @@ -49,14 +49,12 @@ int main(int argc, char *argv[]) { shell.args.local = false; #endif -#if 0 #if !defined(WINDOWS) taosSetSignal(SIGBUS, shellCrashHandler); #endif taosSetSignal(SIGABRT, shellCrashHandler); taosSetSignal(SIGFPE, shellCrashHandler); taosSetSignal(SIGSEGV, shellCrashHandler); -#endif if (shellCheckIntSize() != 0) { return -1; From 1e269c8dd69ea3aeb22c0cd78985ba83b3c30eb1 Mon Sep 17 00:00:00 2001 From: facetosea <25808407@qq.com> Date: Thu, 9 Jan 2025 10:56:20 +0800 Subject: [PATCH 03/10] fix: test case --- include/util/tlog.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/util/tlog.h b/include/util/tlog.h index acec5c511e..48db94baaa 100644 --- a/include/util/tlog.h +++ b/include/util/tlog.h @@ -115,6 +115,7 @@ bool taosAssertRelease(bool condition); #endif #endif +void taosLogCrashInfo(char *nodeType, char *pMsg, int64_t msgLen, int signum, void *sigInfo); void taosReadCrashInfo(char *filepath, char **pMsg, int64_t *pMsgLen, TdFilePtr *pFd); void taosReleaseCrashLogFile(TdFilePtr pFile, bool truncateFile); From 2a8ddc254bdae50f7bca1ba2c545fe9627a96537 Mon Sep 17 00:00:00 2001 From: wangmm0220 Date: Thu, 9 Jan 2025 15:50:07 +0800 Subject: [PATCH 04/10] fix:[TD-33498]clear msg if unsubscribe --- source/client/src/clientTmq.c | 1 + 1 file changed, 1 insertion(+) diff --git a/source/client/src/clientTmq.c b/source/client/src/clientTmq.c index 17990761a4..4f1c890295 100644 --- a/source/client/src/clientTmq.c +++ b/source/client/src/clientTmq.c @@ -2606,6 +2606,7 @@ int32_t tmq_unsubscribe(tmq_t* tmq) { } code = tmq_subscribe(tmq, lst); tmq_list_destroy(lst); + tmqClearUnhandleMsg(tmq); if(code != 0){ goto END; } From 852f5b6fa45f0157744c124ba0cf71d4dc7461e7 Mon Sep 17 00:00:00 2001 From: factosea <285808407@qq.com> Date: Thu, 9 Jan 2025 16:02:11 +0800 Subject: [PATCH 05/10] fix: write crash log --- source/client/src/clientEnv.c | 12 ++++++------ source/common/src/tmisce.c | 3 ++- source/util/src/tlog.c | 15 +++++++-------- 3 files changed, 15 insertions(+), 15 deletions(-) diff --git a/source/client/src/clientEnv.c b/source/client/src/clientEnv.c index 41968d8155..c22bae3fbf 100644 --- a/source/client/src/clientEnv.c +++ b/source/client/src/clientEnv.c @@ -815,12 +815,6 @@ static void *tscCrashReportThreadFp(void *param) { int32_t reportPeriodNum = 3600 * 1000 / sleepTime; int32_t loopTimes = reportPeriodNum; - code = initCrashLogWriter(); - if (code) { - tscError("failed to init crash log writer, code:%s", tstrerror(code)); - return NULL; - } - #ifdef WINDOWS if (taosCheckCurrentInDll()) { atexit(crashReportThreadFuncUnexpectedStopped); @@ -837,6 +831,12 @@ static void *tscCrashReportThreadFp(void *param) { return NULL; } + code = initCrashLogWriter(); + if (code) { + tscError("failed to init crash log writer, code:%s", tstrerror(code)); + return NULL; + } + while (1) { checkAndPrepareCrashInfo(); if (clientStop > 0) break; diff --git a/source/common/src/tmisce.c b/source/common/src/tmisce.c index 144a1542cb..a966513629 100644 --- a/source/common/src/tmisce.c +++ b/source/common/src/tmisce.c @@ -231,6 +231,7 @@ int32_t taosGenCrashJsonMsg(int signum, char** pMsg, int64_t clusterId, int64_t TAOS_CHECK_GOTO(tjsonAddIntegerToObject(pJson, "crashSig", signum), NULL, _exit); TAOS_CHECK_GOTO(tjsonAddIntegerToObject(pJson, "crashTs", taosGetTimestampUs()), NULL, _exit); +#if 0 #ifdef _TD_DARWIN_64 taosLogTraceToBuf(tmp, sizeof(tmp), 4); #elif !defined(WINDOWS) @@ -240,7 +241,7 @@ int32_t taosGenCrashJsonMsg(int signum, char** pMsg, int64_t clusterId, int64_t #endif TAOS_CHECK_GOTO(tjsonAddStringToObject(pJson, "stackInfo", tmp), NULL, _exit); - +#endif char* pCont = tjsonToString(pJson); if (pCont == NULL) { code = terrno; diff --git a/source/util/src/tlog.c b/source/util/src/tlog.c index 5251076fa5..a06b6c0746 100644 --- a/source/util/src/tlog.c +++ b/source/util/src/tlog.c @@ -1317,29 +1317,28 @@ void checkAndPrepareCrashInfo() { } int32_t initCrashLogWriter() { - gCrashBasicInfo.init = true; - gCrashBasicInfo.isCrash = false; int32_t code = tsem_init(&gCrashBasicInfo.sem, 0, 0); - uInfo("crashLogWriter init finished."); + if (code != 0) { + uError("failed to init sem for crashLogWriter, code:%d", code); + return code; + } + gCrashBasicInfo.isCrash = false; + gCrashBasicInfo.init = true; return code; } void writeCrashLogToFile(int signum, void *sigInfo, char *nodeType, int64_t clusterId, int64_t startTime) { if (!gCrashBasicInfo.init) { - uInfo("crashLogWriter has not init!"); return; } - uInfo("write crash log to file, signum:%d, nodeType:%s, clusterId:%" PRId64, signum, nodeType, clusterId); - gCrashBasicInfo.isCrash = true; gCrashBasicInfo.clusterId = clusterId; gCrashBasicInfo.startTime = startTime; gCrashBasicInfo.nodeType = nodeType; gCrashBasicInfo.signum = signum; gCrashBasicInfo.sigInfo = sigInfo; + gCrashBasicInfo.isCrash = true; tsem_wait(&gCrashBasicInfo.sem); - - uInfo("write crash log to file done, signum:%d, nodeType:%s, clusterId:%" PRId64, signum, nodeType, clusterId); } void taosReadCrashInfo(char *filepath, char **pMsg, int64_t *pMsgLen, TdFilePtr *pFd) { From fabcff4c9ab595c5dfc504413bc17d8b9b1e7aad Mon Sep 17 00:00:00 2001 From: tjuzyp Date: Thu, 9 Jan 2025 10:56:30 +0800 Subject: [PATCH 06/10] docs(agent): add client port range configuration options --- docs/zh/14-reference/01-components/05-taosx-agent.md | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/docs/zh/14-reference/01-components/05-taosx-agent.md b/docs/zh/14-reference/01-components/05-taosx-agent.md index 1f1276e834..16ce3ae61c 100644 --- a/docs/zh/14-reference/01-components/05-taosx-agent.md +++ b/docs/zh/14-reference/01-components/05-taosx-agent.md @@ -14,6 +14,8 @@ sidebar_label: taosX-Agent - `instanceId`:当前 taosx-agent 服务的实例 ID,如果同一台机器上启动了多个 taosx-agent 实例,必须保证各个实例的实例 ID 互不相同。 - `compression`: 非必填,可配置为 `true` 或 `false`, 默认为 `false`。配置为`true`, 则开启 `Agent` 和 `taosX` 通信数据压缩。 - `in_memory_cache_capacity`: 非必填,表示可在内存中缓存的最大消息批次数,可配置为大于 0 的整数。默认为 `64`。 +- `client_port_range.min`:非必填,取值范围 `[49152-65535]`,默认为 `49152`, 当 agent 向 taosx 创建 socket 连接时,socket 客户端会随机监听一个端口,此配置限制了端口范围的最小值。 +- `client_port_range.max`:非必填,取值范围 `[49152-65535]`,默认为 `65535`,同 `client_port_range.max` 一样,比配置为最大值。 - `log_level`: 非必填,日志级别,默认为 `info`, 同 `taosX` 一样,支持 `error`,`warn`,`info`,`debug`,`trace` 五级。已弃用,请使用 `log.level` 代替。 - `log_keep_days`:非必填,日志保存天数,默认为 `30` 天。已弃用,请使用 `log.keepDays` 代替。 - `log.path`:日志文件存放的目录。 @@ -49,6 +51,15 @@ sidebar_label: taosX-Agent # #in_memory_cache_capacity = 64 +[client_port_range] +# Minimum boundary of listening port of agent, can not less than 49152 +# +# min = 49152 + +# Maximum boundary of listening port of agent, can not greater than 65535 +# +# max = 65535 + # log configuration [log] # All log files are stored in this directory From ab2af9355cd0e49e840718138824bc4c7bb9fa3a Mon Sep 17 00:00:00 2001 From: tjuzyp Date: Thu, 9 Jan 2025 11:19:01 +0800 Subject: [PATCH 07/10] docs(agent): typo --- docs/zh/14-reference/01-components/05-taosx-agent.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/zh/14-reference/01-components/05-taosx-agent.md b/docs/zh/14-reference/01-components/05-taosx-agent.md index 16ce3ae61c..3245b74fc0 100644 --- a/docs/zh/14-reference/01-components/05-taosx-agent.md +++ b/docs/zh/14-reference/01-components/05-taosx-agent.md @@ -14,8 +14,8 @@ sidebar_label: taosX-Agent - `instanceId`:当前 taosx-agent 服务的实例 ID,如果同一台机器上启动了多个 taosx-agent 实例,必须保证各个实例的实例 ID 互不相同。 - `compression`: 非必填,可配置为 `true` 或 `false`, 默认为 `false`。配置为`true`, 则开启 `Agent` 和 `taosX` 通信数据压缩。 - `in_memory_cache_capacity`: 非必填,表示可在内存中缓存的最大消息批次数,可配置为大于 0 的整数。默认为 `64`。 -- `client_port_range.min`:非必填,取值范围 `[49152-65535]`,默认为 `49152`, 当 agent 向 taosx 创建 socket 连接时,socket 客户端会随机监听一个端口,此配置限制了端口范围的最小值。 -- `client_port_range.max`:非必填,取值范围 `[49152-65535]`,默认为 `65535`,同 `client_port_range.max` 一样,比配置为最大值。 +- `client_port_range.min`:非必填,取值范围 `[49152-65535]`,默认为 `49152`,当 agent 向 taosx 创建 socket 连接时,socket 客户端会随机监听一个端口,此配置限制了端口范围的最小值。 +- `client_port_range.max`:非必填,取值范围 `[49152-65535]`,默认为 `65535`,此配置限制了端口范围的最大值。 - `log_level`: 非必填,日志级别,默认为 `info`, 同 `taosX` 一样,支持 `error`,`warn`,`info`,`debug`,`trace` 五级。已弃用,请使用 `log.level` 代替。 - `log_keep_days`:非必填,日志保存天数,默认为 `30` 天。已弃用,请使用 `log.keepDays` 代替。 - `log.path`:日志文件存放的目录。 From 2d99f1833ef5c2b1b47520862d7d05ddd27c6fb1 Mon Sep 17 00:00:00 2001 From: factosea <285808407@qq.com> Date: Fri, 10 Jan 2025 06:09:55 +0800 Subject: [PATCH 08/10] crash log: safe code --- include/util/tlog.h | 1 + source/client/src/clientEnv.c | 4 +- source/dnode/mgmt/mgmt_dnode/src/dmWorker.c | 4 +- source/util/src/tlog.c | 93 ++++++++++++++------- 4 files changed, 69 insertions(+), 33 deletions(-) diff --git a/include/util/tlog.h b/include/util/tlog.h index 48db94baaa..f573d61e73 100644 --- a/include/util/tlog.h +++ b/include/util/tlog.h @@ -121,6 +121,7 @@ void taosReleaseCrashLogFile(TdFilePtr pFile, bool truncateFile); int32_t initCrashLogWriter(); void checkAndPrepareCrashInfo(); +bool reportThreadSetQuit(); void writeCrashLogToFile(int signum, void *sigInfo, char *nodeType, int64_t clusterId, int64_t startTime); // clang-format off diff --git a/source/client/src/clientEnv.c b/source/client/src/clientEnv.c index c22bae3fbf..b69585a356 100644 --- a/source/client/src/clientEnv.c +++ b/source/client/src/clientEnv.c @@ -839,9 +839,9 @@ static void *tscCrashReportThreadFp(void *param) { while (1) { checkAndPrepareCrashInfo(); - if (clientStop > 0) break; + if (clientStop > 0 && reportThreadSetQuit()) break; if (loopTimes++ < reportPeriodNum) { - if(loopTimes < 0) loopTimes = reportPeriodNum; + if (loopTimes < 0) loopTimes = reportPeriodNum; taosMsleep(sleepTime); continue; } diff --git a/source/dnode/mgmt/mgmt_dnode/src/dmWorker.c b/source/dnode/mgmt/mgmt_dnode/src/dmWorker.c index 9a1287adee..b2cb8e2f2e 100644 --- a/source/dnode/mgmt/mgmt_dnode/src/dmWorker.c +++ b/source/dnode/mgmt/mgmt_dnode/src/dmWorker.c @@ -282,7 +282,9 @@ static void *dmCrashReportThreadFp(void *param) { while (1) { checkAndPrepareCrashInfo(); - if (pMgmt->pData->dropped || pMgmt->pData->stopped) break; + if ((pMgmt->pData->dropped || pMgmt->pData->stopped) && reportThreadSetQuit()) { + break; + } if (loopTimes++ < reportPeriodNum) { taosMsleep(sleepTime); if(loopTimes < 0) loopTimes = reportPeriodNum; diff --git a/source/util/src/tlog.c b/source/util/src/tlog.c index a06b6c0746..2adf6601e3 100644 --- a/source/util/src/tlog.c +++ b/source/util/src/tlog.c @@ -1280,9 +1280,15 @@ _return: taosMemoryFree(pMsg); } +typedef enum { + CRASH_LOG_WRITER_UNKNOWN = 0, + CRASH_LOG_WRITER_INIT = 1, + CRASH_LOG_WRITER_WAIT, + CRASH_LOG_WRITER_RUNNING, + CRASH_LOG_WRITER_QUIT +} CrashStatus; typedef struct crashBasicInfo { - bool init; - bool isCrash; + int8_t status; int64_t clusterId; int64_t startTime; char *nodeType; @@ -1292,24 +1298,55 @@ typedef struct crashBasicInfo { } crashBasicInfo; crashBasicInfo gCrashBasicInfo = {0}; -static void writeCrashLogToFileInNewThead() { - if (!gCrashBasicInfo.init || !gCrashBasicInfo.isCrash) return; - char *pMsg = NULL; - const char *flags = "UTL FATAL "; - ELogLevel level = DEBUG_FATAL; - int32_t dflag = 255; - int64_t msgLen = -1; - if (tsEnableCrashReport) { - if (taosGenCrashJsonMsg(gCrashBasicInfo.signum, &pMsg, gCrashBasicInfo.clusterId, gCrashBasicInfo.startTime)) { - taosPrintLog(flags, level, dflag, "failed to generate crash json msg"); - } else { - msgLen = strlen(pMsg); - } +void setCrashWriterStatus(int8_t status) { atomic_store_8(&gCrashBasicInfo.status, status); } +bool reportThreadSetQuit() { + CrashStatus status = + atomic_val_compare_exchange_8(&gCrashBasicInfo.status, CRASH_LOG_WRITER_INIT, CRASH_LOG_WRITER_QUIT); + if (status == CRASH_LOG_WRITER_INIT) { + return true; + } else { + return false; + } +} + +bool setReportThreadWait() { + CrashStatus status = + atomic_val_compare_exchange_8(&gCrashBasicInfo.status, CRASH_LOG_WRITER_INIT, CRASH_LOG_WRITER_WAIT); + if (status == CRASH_LOG_WRITER_INIT) { + return true; + } else { + return false; + } +} +bool setReportThreadRunning() { + CrashStatus status = + atomic_val_compare_exchange_8(&gCrashBasicInfo.status, CRASH_LOG_WRITER_WAIT, CRASH_LOG_WRITER_RUNNING); + if (status == CRASH_LOG_WRITER_RUNNING) { + return true; + } else { + return false; + } +} +static void writeCrashLogToFileInNewThead() { + if (setReportThreadRunning()) { + char *pMsg = NULL; + const char *flags = "UTL FATAL "; + ELogLevel level = DEBUG_FATAL; + int32_t dflag = 255; + int64_t msgLen = -1; + + if (tsEnableCrashReport) { + if (taosGenCrashJsonMsg(gCrashBasicInfo.signum, &pMsg, gCrashBasicInfo.clusterId, gCrashBasicInfo.startTime)) { + taosPrintLog(flags, level, dflag, "failed to generate crash json msg"); + } else { + msgLen = strlen(pMsg); + } + } + taosLogCrashInfo(gCrashBasicInfo.nodeType, pMsg, msgLen, gCrashBasicInfo.signum, gCrashBasicInfo.sigInfo); + setCrashWriterStatus(CRASH_LOG_WRITER_INIT); + tsem_post(&gCrashBasicInfo.sem); } - taosLogCrashInfo(gCrashBasicInfo.nodeType, pMsg, msgLen, gCrashBasicInfo.signum, gCrashBasicInfo.sigInfo); - gCrashBasicInfo.isCrash = false; - tsem_post(&gCrashBasicInfo.sem); } void checkAndPrepareCrashInfo() { @@ -1322,23 +1359,19 @@ int32_t initCrashLogWriter() { uError("failed to init sem for crashLogWriter, code:%d", code); return code; } - gCrashBasicInfo.isCrash = false; - gCrashBasicInfo.init = true; + setCrashWriterStatus(CRASH_LOG_WRITER_INIT); return code; } void writeCrashLogToFile(int signum, void *sigInfo, char *nodeType, int64_t clusterId, int64_t startTime) { - if (!gCrashBasicInfo.init) { - return; + if (setReportThreadWait()) { + gCrashBasicInfo.clusterId = clusterId; + gCrashBasicInfo.startTime = startTime; + gCrashBasicInfo.nodeType = nodeType; + gCrashBasicInfo.signum = signum; + gCrashBasicInfo.sigInfo = sigInfo; + tsem_wait(&gCrashBasicInfo.sem); } - gCrashBasicInfo.clusterId = clusterId; - gCrashBasicInfo.startTime = startTime; - gCrashBasicInfo.nodeType = nodeType; - gCrashBasicInfo.signum = signum; - gCrashBasicInfo.sigInfo = sigInfo; - gCrashBasicInfo.isCrash = true; - - tsem_wait(&gCrashBasicInfo.sem); } void taosReadCrashInfo(char *filepath, char **pMsg, int64_t *pMsgLen, TdFilePtr *pFd) { From 624da05e519c1ae9e7917c4772d03d3d68544b5f Mon Sep 17 00:00:00 2001 From: factosea <285808407@qq.com> Date: Fri, 10 Jan 2025 07:11:36 +0800 Subject: [PATCH 09/10] enh: crash thread --- source/util/src/tlog.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/source/util/src/tlog.c b/source/util/src/tlog.c index 2adf6601e3..fb625d7933 100644 --- a/source/util/src/tlog.c +++ b/source/util/src/tlog.c @@ -1295,6 +1295,7 @@ typedef struct crashBasicInfo { int signum; void *sigInfo; tsem_t sem; + int64_t reportThread; } crashBasicInfo; crashBasicInfo gCrashBasicInfo = {0}; @@ -1359,11 +1360,15 @@ int32_t initCrashLogWriter() { uError("failed to init sem for crashLogWriter, code:%d", code); return code; } + gCrashBasicInfo.reportThread = taosGetSelfPthreadId(); setCrashWriterStatus(CRASH_LOG_WRITER_INIT); return code; } void writeCrashLogToFile(int signum, void *sigInfo, char *nodeType, int64_t clusterId, int64_t startTime) { + if (gCrashBasicInfo.reportThread == taosGetSelfPthreadId()) { + return; + } if (setReportThreadWait()) { gCrashBasicInfo.clusterId = clusterId; gCrashBasicInfo.startTime = startTime; From 629b5a63c96c4a659910b07016ee868d97d7fd15 Mon Sep 17 00:00:00 2001 From: factosea <285808407@qq.com> Date: Fri, 10 Jan 2025 09:35:06 +0800 Subject: [PATCH 10/10] fix: status error --- source/util/src/tlog.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/source/util/src/tlog.c b/source/util/src/tlog.c index fb625d7933..4f5ca8d789 100644 --- a/source/util/src/tlog.c +++ b/source/util/src/tlog.c @@ -1323,13 +1323,13 @@ bool setReportThreadWait() { bool setReportThreadRunning() { CrashStatus status = atomic_val_compare_exchange_8(&gCrashBasicInfo.status, CRASH_LOG_WRITER_WAIT, CRASH_LOG_WRITER_RUNNING); - if (status == CRASH_LOG_WRITER_RUNNING) { + if (status == CRASH_LOG_WRITER_WAIT) { return true; } else { return false; } } -static void writeCrashLogToFileInNewThead() { +static void checkWriteCrashLogToFileInNewThead() { if (setReportThreadRunning()) { char *pMsg = NULL; const char *flags = "UTL FATAL "; @@ -1351,7 +1351,7 @@ static void writeCrashLogToFileInNewThead() { } void checkAndPrepareCrashInfo() { - return writeCrashLogToFileInNewThead(); + return checkWriteCrashLogToFileInNewThead(); } int32_t initCrashLogWriter() {