From 5c60015cadcd3b45f81ceca64200003838effa29 Mon Sep 17 00:00:00 2001 From: factosea <285808407@qq.com> Date: Thu, 9 Jan 2025 10:00:43 +0800 Subject: [PATCH 1/6] fix: write crash log in new thread --- include/util/tlog.h | 5 +- source/client/src/clientEnv.c | 28 ++++----- source/dnode/mgmt/exe/dmMain.c | 32 ++++------ source/dnode/mgmt/mgmt_dnode/src/dmWorker.c | 8 ++- source/util/src/tlog.c | 67 ++++++++++++++++++++- tools/shell/src/shellMain.c | 2 - 6 files changed, 101 insertions(+), 41 deletions(-) diff --git a/include/util/tlog.h b/include/util/tlog.h index d0e42e3660..acec5c511e 100644 --- a/include/util/tlog.h +++ b/include/util/tlog.h @@ -115,10 +115,13 @@ bool taosAssertRelease(bool condition); #endif #endif -void taosLogCrashInfo(char *nodeType, char *pMsg, int64_t msgLen, int signum, void *sigInfo); void taosReadCrashInfo(char *filepath, char **pMsg, int64_t *pMsgLen, TdFilePtr *pFd); void taosReleaseCrashLogFile(TdFilePtr pFile, bool truncateFile); +int32_t initCrashLogWriter(); +void checkAndPrepareCrashInfo(); +void writeCrashLogToFile(int signum, void *sigInfo, char *nodeType, int64_t clusterId, int64_t startTime); + // clang-format off #define uFatal(...) { if (uDebugFlag & DEBUG_FATAL) { taosPrintLog("UTL FATAL", DEBUG_FATAL, tsLogEmbedded ? 255 : uDebugFlag, __VA_ARGS__); }} #define uError(...) { if (uDebugFlag & DEBUG_ERROR) { taosPrintLog("UTL ERROR ", DEBUG_ERROR, tsLogEmbedded ? 255 : uDebugFlag, __VA_ARGS__); }} diff --git a/source/client/src/clientEnv.c b/source/client/src/clientEnv.c index df93920303..41968d8155 100644 --- a/source/client/src/clientEnv.c +++ b/source/client/src/clientEnv.c @@ -43,7 +43,7 @@ #endif #ifndef CUS_PROMPT -#define CUS_PROMPT "tao" +#define CUS_PROMPT "taos" #endif #define TSC_VAR_NOT_RELEASE 1 @@ -815,6 +815,12 @@ static void *tscCrashReportThreadFp(void *param) { int32_t reportPeriodNum = 3600 * 1000 / sleepTime; int32_t loopTimes = reportPeriodNum; + code = initCrashLogWriter(); + if (code) { + tscError("failed to init crash log writer, code:%s", tstrerror(code)); + return NULL; + } + #ifdef WINDOWS if (taosCheckCurrentInDll()) { atexit(crashReportThreadFuncUnexpectedStopped); @@ -832,8 +838,10 @@ static void *tscCrashReportThreadFp(void *param) { } while (1) { + checkAndPrepareCrashInfo(); if (clientStop > 0) break; if (loopTimes++ < reportPeriodNum) { + if(loopTimes < 0) loopTimes = reportPeriodNum; taosMsleep(sleepTime); continue; } @@ -921,21 +929,7 @@ void tscStopCrashReport() { } void tscWriteCrashInfo(int signum, void *sigInfo, void *context) { - char *pMsg = NULL; - const char *flags = "UTL FATAL "; - ELogLevel level = DEBUG_FATAL; - int32_t dflag = 255; - int64_t msgLen = -1; - - if (tsEnableCrashReport) { - if (taosGenCrashJsonMsg(signum, &pMsg, lastClusterId, appInfo.startTime)) { - taosPrintLog(flags, level, dflag, "failed to generate crash json msg"); - } else { - msgLen = strlen(pMsg); - } - } - - taosLogCrashInfo("taos", pMsg, msgLen, signum, sigInfo); + writeCrashLogToFile(signum, sigInfo, CUS_PROMPT, lastClusterId, appInfo.startTime); } void taos_init_imp(void) { @@ -969,7 +963,7 @@ void taos_init_imp(void) { } taosHashSetFreeFp(appInfo.pInstMap, destroyAppInst); - const char *logName = CUS_PROMPT "slog"; + const char *logName = CUS_PROMPT "log"; ENV_ERR_RET(taosInitLogOutput(&logName), "failed to init log output"); if (taosCreateLog(logName, 10, configDir, NULL, NULL, NULL, NULL, 1) != 0) { (void)printf(" WARING: Create %s failed:%s. configDir=%s\n", logName, strerror(errno), configDir); diff --git a/source/dnode/mgmt/exe/dmMain.c b/source/dnode/mgmt/exe/dmMain.c index ddaf1d3c13..b5eeb78b5e 100644 --- a/source/dnode/mgmt/exe/dmMain.c +++ b/source/dnode/mgmt/exe/dmMain.c @@ -131,25 +131,7 @@ void dmLogCrash(int signum, void *sigInfo, void *context) { if (taosIgnSignal(SIGSEGV) != 0) { dWarn("failed to ignore signal SIGABRT"); } - - char *pMsg = NULL; - const char *flags = "UTL FATAL "; - ELogLevel level = DEBUG_FATAL; - int32_t dflag = 255; - int64_t msgLen = -1; - - if (tsEnableCrashReport) { - if (taosGenCrashJsonMsg(signum, &pMsg, dmGetClusterId(), global.startTime)) { - taosPrintLog(flags, level, dflag, "failed to generate crash json msg"); - goto _return; - } else { - msgLen = strlen(pMsg); - } - } - -_return: - - taosLogCrashInfo(CUS_PROMPT "d", pMsg, msgLen, signum, sigInfo); + writeCrashLogToFile(signum, sigInfo, CUS_PROMPT "d", dmGetClusterId(), global.startTime); #ifdef _TD_DARWIN_64 exit(signum); @@ -177,6 +159,15 @@ static void dmSetSignalHandle() { if (taosSetSignal(SIGBREAK, dmStopDnode) != 0) { dWarn("failed to set signal SIGUSR1"); } + if (taosSetSignal(SIGABRT, dmLogCrash) != 0) { + dWarn("failed to set signal SIGUSR1"); + } + if (taosSetSignal(SIGFPE, dmLogCrash) != 0) { + dWarn("failed to set signal SIGUSR1"); + } + if (taosSetSignal(SIGSEGV, dmLogCrash) != 0) { + dWarn("failed to set signal SIGUSR1"); + } #ifndef WINDOWS if (taosSetSignal(SIGTSTP, dmStopDnode) != 0) { dWarn("failed to set signal SIGUSR1"); @@ -184,6 +175,9 @@ static void dmSetSignalHandle() { if (taosSetSignal(SIGQUIT, dmStopDnode) != 0) { dWarn("failed to set signal SIGUSR1"); } + if (taosSetSignal(SIGBUS, dmLogCrash) != 0) { + dWarn("failed to set signal SIGUSR1"); + } #endif } diff --git a/source/dnode/mgmt/mgmt_dnode/src/dmWorker.c b/source/dnode/mgmt/mgmt_dnode/src/dmWorker.c index ef4e76031d..9a1287adee 100644 --- a/source/dnode/mgmt/mgmt_dnode/src/dmWorker.c +++ b/source/dnode/mgmt/mgmt_dnode/src/dmWorker.c @@ -274,14 +274,20 @@ static void *dmCrashReportThreadFp(void *param) { dError("failed to init telemetry since %s", tstrerror(code)); return NULL; } + code = initCrashLogWriter(); + if (code != 0) { + dError("failed to init crash log writer since %s", tstrerror(code)); + return NULL; + } while (1) { + checkAndPrepareCrashInfo(); if (pMgmt->pData->dropped || pMgmt->pData->stopped) break; if (loopTimes++ < reportPeriodNum) { taosMsleep(sleepTime); + if(loopTimes < 0) loopTimes = reportPeriodNum; continue; } - taosReadCrashInfo(filepath, &pMsg, &msgLen, &pFile); if (pMsg && msgLen > 0) { if (taosSendTelemReport(&mgt, tsSvrCrashReportUri, tsTelemPort, pMsg, msgLen, HTTP_FLAT) != 0) { diff --git a/source/util/src/tlog.c b/source/util/src/tlog.c index 90753ae7e8..5251076fa5 100644 --- a/source/util/src/tlog.c +++ b/source/util/src/tlog.c @@ -21,6 +21,7 @@ #include "tjson.h" #include "ttime.h" #include "tutil.h" +#include "tcommon.h" #define LOG_MAX_LINE_SIZE (10024) #define LOG_MAX_LINE_BUFFER_SIZE (LOG_MAX_LINE_SIZE + 3) @@ -1264,6 +1265,8 @@ _return: taosPrintLog(flags, level, dflag, "crash signal is %d", signum); +// print the stack trace +#if 0 #ifdef _TD_DARWIN_64 taosPrintTrace(flags, level, dflag, 4); #elif !defined(WINDOWS) @@ -1273,10 +1276,72 @@ _return: #else taosPrintTrace(flags, level, dflag, 8); #endif - +#endif taosMemoryFree(pMsg); } +typedef struct crashBasicInfo { + bool init; + bool isCrash; + int64_t clusterId; + int64_t startTime; + char *nodeType; + int signum; + void *sigInfo; + tsem_t sem; +} crashBasicInfo; + +crashBasicInfo gCrashBasicInfo = {0}; +static void writeCrashLogToFileInNewThead() { + if (!gCrashBasicInfo.init || !gCrashBasicInfo.isCrash) return; + char *pMsg = NULL; + const char *flags = "UTL FATAL "; + ELogLevel level = DEBUG_FATAL; + int32_t dflag = 255; + int64_t msgLen = -1; + + if (tsEnableCrashReport) { + if (taosGenCrashJsonMsg(gCrashBasicInfo.signum, &pMsg, gCrashBasicInfo.clusterId, gCrashBasicInfo.startTime)) { + taosPrintLog(flags, level, dflag, "failed to generate crash json msg"); + } else { + msgLen = strlen(pMsg); + } + } + taosLogCrashInfo(gCrashBasicInfo.nodeType, pMsg, msgLen, gCrashBasicInfo.signum, gCrashBasicInfo.sigInfo); + gCrashBasicInfo.isCrash = false; + tsem_post(&gCrashBasicInfo.sem); +} + +void checkAndPrepareCrashInfo() { + return writeCrashLogToFileInNewThead(); +} + +int32_t initCrashLogWriter() { + gCrashBasicInfo.init = true; + gCrashBasicInfo.isCrash = false; + int32_t code = tsem_init(&gCrashBasicInfo.sem, 0, 0); + uInfo("crashLogWriter init finished."); + return code; +} + +void writeCrashLogToFile(int signum, void *sigInfo, char *nodeType, int64_t clusterId, int64_t startTime) { + if (!gCrashBasicInfo.init) { + uInfo("crashLogWriter has not init!"); + return; + } + uInfo("write crash log to file, signum:%d, nodeType:%s, clusterId:%" PRId64, signum, nodeType, clusterId); + gCrashBasicInfo.isCrash = true; + gCrashBasicInfo.clusterId = clusterId; + gCrashBasicInfo.startTime = startTime; + gCrashBasicInfo.nodeType = nodeType; + gCrashBasicInfo.signum = signum; + gCrashBasicInfo.sigInfo = sigInfo; + + tsem_wait(&gCrashBasicInfo.sem); + + uInfo("write crash log to file done, signum:%d, nodeType:%s, clusterId:%" PRId64, signum, nodeType, clusterId); +} + void taosReadCrashInfo(char *filepath, char **pMsg, int64_t *pMsgLen, TdFilePtr *pFd) { const char *flags = "UTL FATAL "; ELogLevel level = DEBUG_FATAL; diff --git a/tools/shell/src/shellMain.c b/tools/shell/src/shellMain.c index fc6ba0f7d8..1f6f8fe3df 100644 --- a/tools/shell/src/shellMain.c +++ b/tools/shell/src/shellMain.c @@ -49,14 +49,12 @@ int main(int argc, char *argv[]) { shell.args.local = false; #endif -#if 0 #if !defined(WINDOWS) taosSetSignal(SIGBUS, shellCrashHandler); #endif taosSetSignal(SIGABRT, shellCrashHandler); taosSetSignal(SIGFPE, shellCrashHandler); taosSetSignal(SIGSEGV, shellCrashHandler); -#endif if (shellCheckIntSize() != 0) { return -1; From 560240d1f873d7995ffc33bcec634b07ac2e4460 Mon Sep 17 00:00:00 2001 From: facetosea <25808407@qq.com> Date: Thu, 9 Jan 2025 10:56:20 +0800 Subject: [PATCH 2/6] fix: test case --- include/util/tlog.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/util/tlog.h b/include/util/tlog.h index acec5c511e..48db94baaa 100644 --- a/include/util/tlog.h +++ b/include/util/tlog.h @@ -115,6 +115,7 @@ bool taosAssertRelease(bool condition); #endif #endif +void taosLogCrashInfo(char *nodeType, char *pMsg, int64_t msgLen, int signum, void *sigInfo); void taosReadCrashInfo(char *filepath, char **pMsg, int64_t *pMsgLen, TdFilePtr *pFd); void taosReleaseCrashLogFile(TdFilePtr pFile, bool truncateFile); From 6629bf4d80fb4013815e77aeeaa6988c46728527 Mon Sep 17 00:00:00 2001 From: factosea <285808407@qq.com> Date: Thu, 9 Jan 2025 16:02:11 +0800 Subject: [PATCH 3/6] fix: write crash log --- source/client/src/clientEnv.c | 12 ++++++------ source/common/src/tmisce.c | 3 ++- source/util/src/tlog.c | 15 +++++++-------- 3 files changed, 15 insertions(+), 15 deletions(-) diff --git a/source/client/src/clientEnv.c b/source/client/src/clientEnv.c index 41968d8155..c22bae3fbf 100644 --- a/source/client/src/clientEnv.c +++ b/source/client/src/clientEnv.c @@ -815,12 +815,6 @@ static void *tscCrashReportThreadFp(void *param) { int32_t reportPeriodNum = 3600 * 1000 / sleepTime; int32_t loopTimes = reportPeriodNum; - code = initCrashLogWriter(); - if (code) { - tscError("failed to init crash log writer, code:%s", tstrerror(code)); - return NULL; - } - #ifdef WINDOWS if (taosCheckCurrentInDll()) { atexit(crashReportThreadFuncUnexpectedStopped); @@ -837,6 +831,12 @@ static void *tscCrashReportThreadFp(void *param) { return NULL; } + code = initCrashLogWriter(); + if (code) { + tscError("failed to init crash log writer, code:%s", tstrerror(code)); + return NULL; + } + while (1) { checkAndPrepareCrashInfo(); if (clientStop > 0) break; diff --git a/source/common/src/tmisce.c b/source/common/src/tmisce.c index 144a1542cb..a966513629 100644 --- a/source/common/src/tmisce.c +++ b/source/common/src/tmisce.c @@ -231,6 +231,7 @@ int32_t taosGenCrashJsonMsg(int signum, char** pMsg, int64_t clusterId, int64_t TAOS_CHECK_GOTO(tjsonAddIntegerToObject(pJson, "crashSig", signum), NULL, _exit); TAOS_CHECK_GOTO(tjsonAddIntegerToObject(pJson, "crashTs", taosGetTimestampUs()), NULL, _exit); +#if 0 #ifdef _TD_DARWIN_64 taosLogTraceToBuf(tmp, sizeof(tmp), 4); #elif !defined(WINDOWS) @@ -240,7 +241,7 @@ int32_t taosGenCrashJsonMsg(int signum, char** pMsg, int64_t clusterId, int64_t #endif TAOS_CHECK_GOTO(tjsonAddStringToObject(pJson, "stackInfo", tmp), NULL, _exit); - +#endif char* pCont = tjsonToString(pJson); if (pCont == NULL) { code = terrno; diff --git a/source/util/src/tlog.c b/source/util/src/tlog.c index 5251076fa5..a06b6c0746 100644 --- a/source/util/src/tlog.c +++ b/source/util/src/tlog.c @@ -1317,29 +1317,28 @@ void checkAndPrepareCrashInfo() { } int32_t initCrashLogWriter() { - gCrashBasicInfo.init = true; - gCrashBasicInfo.isCrash = false; int32_t code = tsem_init(&gCrashBasicInfo.sem, 0, 0); - uInfo("crashLogWriter init finished."); + if (code != 0) { + uError("failed to init sem for crashLogWriter, code:%d", code); + return code; + } + gCrashBasicInfo.isCrash = false; + gCrashBasicInfo.init = true; return code; } void writeCrashLogToFile(int signum, void *sigInfo, char *nodeType, int64_t clusterId, int64_t startTime) { if (!gCrashBasicInfo.init) { - uInfo("crashLogWriter has not init!"); return; } - uInfo("write crash log to file, signum:%d, nodeType:%s, clusterId:%" PRId64, signum, nodeType, clusterId); - gCrashBasicInfo.isCrash = true; gCrashBasicInfo.clusterId = clusterId; gCrashBasicInfo.startTime = startTime; gCrashBasicInfo.nodeType = nodeType; gCrashBasicInfo.signum = signum; gCrashBasicInfo.sigInfo = sigInfo; + gCrashBasicInfo.isCrash = true; tsem_wait(&gCrashBasicInfo.sem); - - uInfo("write crash log to file done, signum:%d, nodeType:%s, clusterId:%" PRId64, signum, nodeType, clusterId); } void taosReadCrashInfo(char *filepath, char **pMsg, int64_t *pMsgLen, TdFilePtr *pFd) { From bd9ec860613cc0335aa9e15ec90eba5ff44afe41 Mon Sep 17 00:00:00 2001 From: factosea <285808407@qq.com> Date: Fri, 10 Jan 2025 06:09:55 +0800 Subject: [PATCH 4/6] crash log: safe code --- include/util/tlog.h | 1 + source/client/src/clientEnv.c | 4 +- source/dnode/mgmt/mgmt_dnode/src/dmWorker.c | 4 +- source/util/src/tlog.c | 93 ++++++++++++++------- 4 files changed, 69 insertions(+), 33 deletions(-) diff --git a/include/util/tlog.h b/include/util/tlog.h index 48db94baaa..f573d61e73 100644 --- a/include/util/tlog.h +++ b/include/util/tlog.h @@ -121,6 +121,7 @@ void taosReleaseCrashLogFile(TdFilePtr pFile, bool truncateFile); int32_t initCrashLogWriter(); void checkAndPrepareCrashInfo(); +bool reportThreadSetQuit(); void writeCrashLogToFile(int signum, void *sigInfo, char *nodeType, int64_t clusterId, int64_t startTime); // clang-format off diff --git a/source/client/src/clientEnv.c b/source/client/src/clientEnv.c index c22bae3fbf..b69585a356 100644 --- a/source/client/src/clientEnv.c +++ b/source/client/src/clientEnv.c @@ -839,9 +839,9 @@ static void *tscCrashReportThreadFp(void *param) { while (1) { checkAndPrepareCrashInfo(); - if (clientStop > 0) break; + if (clientStop > 0 && reportThreadSetQuit()) break; if (loopTimes++ < reportPeriodNum) { - if(loopTimes < 0) loopTimes = reportPeriodNum; + if (loopTimes < 0) loopTimes = reportPeriodNum; taosMsleep(sleepTime); continue; } diff --git a/source/dnode/mgmt/mgmt_dnode/src/dmWorker.c b/source/dnode/mgmt/mgmt_dnode/src/dmWorker.c index 9a1287adee..b2cb8e2f2e 100644 --- a/source/dnode/mgmt/mgmt_dnode/src/dmWorker.c +++ b/source/dnode/mgmt/mgmt_dnode/src/dmWorker.c @@ -282,7 +282,9 @@ static void *dmCrashReportThreadFp(void *param) { while (1) { checkAndPrepareCrashInfo(); - if (pMgmt->pData->dropped || pMgmt->pData->stopped) break; + if ((pMgmt->pData->dropped || pMgmt->pData->stopped) && reportThreadSetQuit()) { + break; + } if (loopTimes++ < reportPeriodNum) { taosMsleep(sleepTime); if(loopTimes < 0) loopTimes = reportPeriodNum; diff --git a/source/util/src/tlog.c b/source/util/src/tlog.c index a06b6c0746..2adf6601e3 100644 --- a/source/util/src/tlog.c +++ b/source/util/src/tlog.c @@ -1280,9 +1280,15 @@ _return: taosMemoryFree(pMsg); } +typedef enum { + CRASH_LOG_WRITER_UNKNOWN = 0, + CRASH_LOG_WRITER_INIT = 1, + CRASH_LOG_WRITER_WAIT, + CRASH_LOG_WRITER_RUNNING, + CRASH_LOG_WRITER_QUIT +} CrashStatus; typedef struct crashBasicInfo { - bool init; - bool isCrash; + int8_t status; int64_t clusterId; int64_t startTime; char *nodeType; @@ -1292,24 +1298,55 @@ typedef struct crashBasicInfo { } crashBasicInfo; crashBasicInfo gCrashBasicInfo = {0}; -static void writeCrashLogToFileInNewThead() { - if (!gCrashBasicInfo.init || !gCrashBasicInfo.isCrash) return; - char *pMsg = NULL; - const char *flags = "UTL FATAL "; - ELogLevel level = DEBUG_FATAL; - int32_t dflag = 255; - int64_t msgLen = -1; - if (tsEnableCrashReport) { - if (taosGenCrashJsonMsg(gCrashBasicInfo.signum, &pMsg, gCrashBasicInfo.clusterId, gCrashBasicInfo.startTime)) { - taosPrintLog(flags, level, dflag, "failed to generate crash json msg"); - } else { - msgLen = strlen(pMsg); - } +void setCrashWriterStatus(int8_t status) { atomic_store_8(&gCrashBasicInfo.status, status); } +bool reportThreadSetQuit() { + CrashStatus status = + atomic_val_compare_exchange_8(&gCrashBasicInfo.status, CRASH_LOG_WRITER_INIT, CRASH_LOG_WRITER_QUIT); + if (status == CRASH_LOG_WRITER_INIT) { + return true; + } else { + return false; + } +} + +bool setReportThreadWait() { + CrashStatus status = + atomic_val_compare_exchange_8(&gCrashBasicInfo.status, CRASH_LOG_WRITER_INIT, CRASH_LOG_WRITER_WAIT); + if (status == CRASH_LOG_WRITER_INIT) { + return true; + } else { + return false; + } +} +bool setReportThreadRunning() { + CrashStatus status = + atomic_val_compare_exchange_8(&gCrashBasicInfo.status, CRASH_LOG_WRITER_WAIT, CRASH_LOG_WRITER_RUNNING); + if (status == CRASH_LOG_WRITER_RUNNING) { + return true; + } else { + return false; + } +} +static void writeCrashLogToFileInNewThead() { + if (setReportThreadRunning()) { + char *pMsg = NULL; + const char *flags = "UTL FATAL "; + ELogLevel level = DEBUG_FATAL; + int32_t dflag = 255; + int64_t msgLen = -1; + + if (tsEnableCrashReport) { + if (taosGenCrashJsonMsg(gCrashBasicInfo.signum, &pMsg, gCrashBasicInfo.clusterId, gCrashBasicInfo.startTime)) { + taosPrintLog(flags, level, dflag, "failed to generate crash json msg"); + } else { + msgLen = strlen(pMsg); + } + } + taosLogCrashInfo(gCrashBasicInfo.nodeType, pMsg, msgLen, gCrashBasicInfo.signum, gCrashBasicInfo.sigInfo); + setCrashWriterStatus(CRASH_LOG_WRITER_INIT); + tsem_post(&gCrashBasicInfo.sem); } - taosLogCrashInfo(gCrashBasicInfo.nodeType, pMsg, msgLen, gCrashBasicInfo.signum, gCrashBasicInfo.sigInfo); - gCrashBasicInfo.isCrash = false; - tsem_post(&gCrashBasicInfo.sem); } void checkAndPrepareCrashInfo() { @@ -1322,23 +1359,19 @@ int32_t initCrashLogWriter() { uError("failed to init sem for crashLogWriter, code:%d", code); return code; } - gCrashBasicInfo.isCrash = false; - gCrashBasicInfo.init = true; + setCrashWriterStatus(CRASH_LOG_WRITER_INIT); return code; } void writeCrashLogToFile(int signum, void *sigInfo, char *nodeType, int64_t clusterId, int64_t startTime) { - if (!gCrashBasicInfo.init) { - return; + if (setReportThreadWait()) { + gCrashBasicInfo.clusterId = clusterId; + gCrashBasicInfo.startTime = startTime; + gCrashBasicInfo.nodeType = nodeType; + gCrashBasicInfo.signum = signum; + gCrashBasicInfo.sigInfo = sigInfo; + tsem_wait(&gCrashBasicInfo.sem); } - gCrashBasicInfo.clusterId = clusterId; - gCrashBasicInfo.startTime = startTime; - gCrashBasicInfo.nodeType = nodeType; - gCrashBasicInfo.signum = signum; - gCrashBasicInfo.sigInfo = sigInfo; - gCrashBasicInfo.isCrash = true; - - tsem_wait(&gCrashBasicInfo.sem); } void taosReadCrashInfo(char *filepath, char **pMsg, int64_t *pMsgLen, TdFilePtr *pFd) { From 4b03091501e6fb612f60dde03e52e87b8300abb6 Mon Sep 17 00:00:00 2001 From: factosea <285808407@qq.com> Date: Fri, 10 Jan 2025 07:11:36 +0800 Subject: [PATCH 5/6] enh: crash thread --- source/util/src/tlog.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/source/util/src/tlog.c b/source/util/src/tlog.c index 2adf6601e3..fb625d7933 100644 --- a/source/util/src/tlog.c +++ b/source/util/src/tlog.c @@ -1295,6 +1295,7 @@ typedef struct crashBasicInfo { int signum; void *sigInfo; tsem_t sem; + int64_t reportThread; } crashBasicInfo; crashBasicInfo gCrashBasicInfo = {0}; @@ -1359,11 +1360,15 @@ int32_t initCrashLogWriter() { uError("failed to init sem for crashLogWriter, code:%d", code); return code; } + gCrashBasicInfo.reportThread = taosGetSelfPthreadId(); setCrashWriterStatus(CRASH_LOG_WRITER_INIT); return code; } void writeCrashLogToFile(int signum, void *sigInfo, char *nodeType, int64_t clusterId, int64_t startTime) { + if (gCrashBasicInfo.reportThread == taosGetSelfPthreadId()) { + return; + } if (setReportThreadWait()) { gCrashBasicInfo.clusterId = clusterId; gCrashBasicInfo.startTime = startTime; From 936c7f93e5adfb5b45e33638f27fd37f8ef4e3f3 Mon Sep 17 00:00:00 2001 From: factosea <285808407@qq.com> Date: Fri, 10 Jan 2025 09:35:06 +0800 Subject: [PATCH 6/6] fix: status error --- source/util/src/tlog.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/source/util/src/tlog.c b/source/util/src/tlog.c index fb625d7933..4f5ca8d789 100644 --- a/source/util/src/tlog.c +++ b/source/util/src/tlog.c @@ -1323,13 +1323,13 @@ bool setReportThreadWait() { bool setReportThreadRunning() { CrashStatus status = atomic_val_compare_exchange_8(&gCrashBasicInfo.status, CRASH_LOG_WRITER_WAIT, CRASH_LOG_WRITER_RUNNING); - if (status == CRASH_LOG_WRITER_RUNNING) { + if (status == CRASH_LOG_WRITER_WAIT) { return true; } else { return false; } } -static void writeCrashLogToFileInNewThead() { +static void checkWriteCrashLogToFileInNewThead() { if (setReportThreadRunning()) { char *pMsg = NULL; const char *flags = "UTL FATAL "; @@ -1351,7 +1351,7 @@ static void writeCrashLogToFileInNewThead() { } void checkAndPrepareCrashInfo() { - return writeCrashLogToFileInNewThead(); + return checkWriteCrashLogToFileInNewThead(); } int32_t initCrashLogWriter() {