From bd9ec860613cc0335aa9e15ec90eba5ff44afe41 Mon Sep 17 00:00:00 2001 From: factosea <285808407@qq.com> Date: Fri, 10 Jan 2025 06:09:55 +0800 Subject: [PATCH] crash log: safe code --- include/util/tlog.h | 1 + source/client/src/clientEnv.c | 4 +- source/dnode/mgmt/mgmt_dnode/src/dmWorker.c | 4 +- source/util/src/tlog.c | 93 ++++++++++++++------- 4 files changed, 69 insertions(+), 33 deletions(-) diff --git a/include/util/tlog.h b/include/util/tlog.h index 48db94baaa..f573d61e73 100644 --- a/include/util/tlog.h +++ b/include/util/tlog.h @@ -121,6 +121,7 @@ void taosReleaseCrashLogFile(TdFilePtr pFile, bool truncateFile); int32_t initCrashLogWriter(); void checkAndPrepareCrashInfo(); +bool reportThreadSetQuit(); void writeCrashLogToFile(int signum, void *sigInfo, char *nodeType, int64_t clusterId, int64_t startTime); // clang-format off diff --git a/source/client/src/clientEnv.c b/source/client/src/clientEnv.c index c22bae3fbf..b69585a356 100644 --- a/source/client/src/clientEnv.c +++ b/source/client/src/clientEnv.c @@ -839,9 +839,9 @@ static void *tscCrashReportThreadFp(void *param) { while (1) { checkAndPrepareCrashInfo(); - if (clientStop > 0) break; + if (clientStop > 0 && reportThreadSetQuit()) break; if (loopTimes++ < reportPeriodNum) { - if(loopTimes < 0) loopTimes = reportPeriodNum; + if (loopTimes < 0) loopTimes = reportPeriodNum; taosMsleep(sleepTime); continue; } diff --git a/source/dnode/mgmt/mgmt_dnode/src/dmWorker.c b/source/dnode/mgmt/mgmt_dnode/src/dmWorker.c index 9a1287adee..b2cb8e2f2e 100644 --- a/source/dnode/mgmt/mgmt_dnode/src/dmWorker.c +++ b/source/dnode/mgmt/mgmt_dnode/src/dmWorker.c @@ -282,7 +282,9 @@ static void *dmCrashReportThreadFp(void *param) { while (1) { checkAndPrepareCrashInfo(); - if (pMgmt->pData->dropped || pMgmt->pData->stopped) break; + if ((pMgmt->pData->dropped || pMgmt->pData->stopped) && reportThreadSetQuit()) { + break; + } if (loopTimes++ < reportPeriodNum) { taosMsleep(sleepTime); if(loopTimes < 0) loopTimes = reportPeriodNum; diff --git a/source/util/src/tlog.c b/source/util/src/tlog.c index a06b6c0746..2adf6601e3 100644 --- a/source/util/src/tlog.c +++ b/source/util/src/tlog.c @@ -1280,9 +1280,15 @@ _return: taosMemoryFree(pMsg); } +typedef enum { + CRASH_LOG_WRITER_UNKNOWN = 0, + CRASH_LOG_WRITER_INIT = 1, + CRASH_LOG_WRITER_WAIT, + CRASH_LOG_WRITER_RUNNING, + CRASH_LOG_WRITER_QUIT +} CrashStatus; typedef struct crashBasicInfo { - bool init; - bool isCrash; + int8_t status; int64_t clusterId; int64_t startTime; char *nodeType; @@ -1292,24 +1298,55 @@ typedef struct crashBasicInfo { } crashBasicInfo; crashBasicInfo gCrashBasicInfo = {0}; -static void writeCrashLogToFileInNewThead() { - if (!gCrashBasicInfo.init || !gCrashBasicInfo.isCrash) return; - char *pMsg = NULL; - const char *flags = "UTL FATAL "; - ELogLevel level = DEBUG_FATAL; - int32_t dflag = 255; - int64_t msgLen = -1; - if (tsEnableCrashReport) { - if (taosGenCrashJsonMsg(gCrashBasicInfo.signum, &pMsg, gCrashBasicInfo.clusterId, gCrashBasicInfo.startTime)) { - taosPrintLog(flags, level, dflag, "failed to generate crash json msg"); - } else { - msgLen = strlen(pMsg); - } +void setCrashWriterStatus(int8_t status) { atomic_store_8(&gCrashBasicInfo.status, status); } +bool reportThreadSetQuit() { + CrashStatus status = + atomic_val_compare_exchange_8(&gCrashBasicInfo.status, CRASH_LOG_WRITER_INIT, CRASH_LOG_WRITER_QUIT); + if (status == CRASH_LOG_WRITER_INIT) { + return true; + } else { + return false; + } +} + +bool setReportThreadWait() { + CrashStatus status = + atomic_val_compare_exchange_8(&gCrashBasicInfo.status, CRASH_LOG_WRITER_INIT, CRASH_LOG_WRITER_WAIT); + if (status == CRASH_LOG_WRITER_INIT) { + return true; + } else { + return false; + } +} +bool setReportThreadRunning() { + CrashStatus status = + atomic_val_compare_exchange_8(&gCrashBasicInfo.status, CRASH_LOG_WRITER_WAIT, CRASH_LOG_WRITER_RUNNING); + if (status == CRASH_LOG_WRITER_RUNNING) { + return true; + } else { + return false; + } +} +static void writeCrashLogToFileInNewThead() { + if (setReportThreadRunning()) { + char *pMsg = NULL; + const char *flags = "UTL FATAL "; + ELogLevel level = DEBUG_FATAL; + int32_t dflag = 255; + int64_t msgLen = -1; + + if (tsEnableCrashReport) { + if (taosGenCrashJsonMsg(gCrashBasicInfo.signum, &pMsg, gCrashBasicInfo.clusterId, gCrashBasicInfo.startTime)) { + taosPrintLog(flags, level, dflag, "failed to generate crash json msg"); + } else { + msgLen = strlen(pMsg); + } + } + taosLogCrashInfo(gCrashBasicInfo.nodeType, pMsg, msgLen, gCrashBasicInfo.signum, gCrashBasicInfo.sigInfo); + setCrashWriterStatus(CRASH_LOG_WRITER_INIT); + tsem_post(&gCrashBasicInfo.sem); } - taosLogCrashInfo(gCrashBasicInfo.nodeType, pMsg, msgLen, gCrashBasicInfo.signum, gCrashBasicInfo.sigInfo); - gCrashBasicInfo.isCrash = false; - tsem_post(&gCrashBasicInfo.sem); } void checkAndPrepareCrashInfo() { @@ -1322,23 +1359,19 @@ int32_t initCrashLogWriter() { uError("failed to init sem for crashLogWriter, code:%d", code); return code; } - gCrashBasicInfo.isCrash = false; - gCrashBasicInfo.init = true; + setCrashWriterStatus(CRASH_LOG_WRITER_INIT); return code; } void writeCrashLogToFile(int signum, void *sigInfo, char *nodeType, int64_t clusterId, int64_t startTime) { - if (!gCrashBasicInfo.init) { - return; + if (setReportThreadWait()) { + gCrashBasicInfo.clusterId = clusterId; + gCrashBasicInfo.startTime = startTime; + gCrashBasicInfo.nodeType = nodeType; + gCrashBasicInfo.signum = signum; + gCrashBasicInfo.sigInfo = sigInfo; + tsem_wait(&gCrashBasicInfo.sem); } - gCrashBasicInfo.clusterId = clusterId; - gCrashBasicInfo.startTime = startTime; - gCrashBasicInfo.nodeType = nodeType; - gCrashBasicInfo.signum = signum; - gCrashBasicInfo.sigInfo = sigInfo; - gCrashBasicInfo.isCrash = true; - - tsem_wait(&gCrashBasicInfo.sem); } void taosReadCrashInfo(char *filepath, char **pMsg, int64_t *pMsgLen, TdFilePtr *pFd) {