diff --git a/include/util/tlog.h b/include/util/tlog.h index d0e42e3660..acec5c511e 100644 --- a/include/util/tlog.h +++ b/include/util/tlog.h @@ -115,10 +115,13 @@ bool taosAssertRelease(bool condition); #endif #endif -void taosLogCrashInfo(char *nodeType, char *pMsg, int64_t msgLen, int signum, void *sigInfo); void taosReadCrashInfo(char *filepath, char **pMsg, int64_t *pMsgLen, TdFilePtr *pFd); void taosReleaseCrashLogFile(TdFilePtr pFile, bool truncateFile); +int32_t initCrashLogWriter(); +void checkAndPrepareCrashInfo(); +void writeCrashLogToFile(int signum, void *sigInfo, char *nodeType, int64_t clusterId, int64_t startTime); + // clang-format off #define uFatal(...) { if (uDebugFlag & DEBUG_FATAL) { taosPrintLog("UTL FATAL", DEBUG_FATAL, tsLogEmbedded ? 255 : uDebugFlag, __VA_ARGS__); }} #define uError(...) { if (uDebugFlag & DEBUG_ERROR) { taosPrintLog("UTL ERROR ", DEBUG_ERROR, tsLogEmbedded ? 255 : uDebugFlag, __VA_ARGS__); }} diff --git a/source/client/src/clientEnv.c b/source/client/src/clientEnv.c index df93920303..41968d8155 100644 --- a/source/client/src/clientEnv.c +++ b/source/client/src/clientEnv.c @@ -43,7 +43,7 @@ #endif #ifndef CUS_PROMPT -#define CUS_PROMPT "tao" +#define CUS_PROMPT "taos" #endif #define TSC_VAR_NOT_RELEASE 1 @@ -815,6 +815,12 @@ static void *tscCrashReportThreadFp(void *param) { int32_t reportPeriodNum = 3600 * 1000 / sleepTime; int32_t loopTimes = reportPeriodNum; + code = initCrashLogWriter(); + if (code) { + tscError("failed to init crash log writer, code:%s", tstrerror(code)); + return NULL; + } + #ifdef WINDOWS if (taosCheckCurrentInDll()) { atexit(crashReportThreadFuncUnexpectedStopped); @@ -832,8 +838,10 @@ static void *tscCrashReportThreadFp(void *param) { } while (1) { + checkAndPrepareCrashInfo(); if (clientStop > 0) break; if (loopTimes++ < reportPeriodNum) { + if(loopTimes < 0) loopTimes = reportPeriodNum; taosMsleep(sleepTime); continue; } @@ -921,21 +929,7 @@ void tscStopCrashReport() { } void tscWriteCrashInfo(int signum, void *sigInfo, void *context) { - char *pMsg = NULL; - const char *flags = "UTL FATAL "; - ELogLevel level = DEBUG_FATAL; - int32_t dflag = 255; - int64_t msgLen = -1; - - if (tsEnableCrashReport) { - if (taosGenCrashJsonMsg(signum, &pMsg, lastClusterId, appInfo.startTime)) { - taosPrintLog(flags, level, dflag, "failed to generate crash json msg"); - } else { - msgLen = strlen(pMsg); - } - } - - taosLogCrashInfo("taos", pMsg, msgLen, signum, sigInfo); + writeCrashLogToFile(signum, sigInfo, CUS_PROMPT, lastClusterId, appInfo.startTime); } void taos_init_imp(void) { @@ -969,7 +963,7 @@ void taos_init_imp(void) { } taosHashSetFreeFp(appInfo.pInstMap, destroyAppInst); - const char *logName = CUS_PROMPT "slog"; + const char *logName = CUS_PROMPT "log"; ENV_ERR_RET(taosInitLogOutput(&logName), "failed to init log output"); if (taosCreateLog(logName, 10, configDir, NULL, NULL, NULL, NULL, 1) != 0) { (void)printf(" WARING: Create %s failed:%s. configDir=%s\n", logName, strerror(errno), configDir); diff --git a/source/dnode/mgmt/exe/dmMain.c b/source/dnode/mgmt/exe/dmMain.c index ddaf1d3c13..b5eeb78b5e 100644 --- a/source/dnode/mgmt/exe/dmMain.c +++ b/source/dnode/mgmt/exe/dmMain.c @@ -131,25 +131,7 @@ void dmLogCrash(int signum, void *sigInfo, void *context) { if (taosIgnSignal(SIGSEGV) != 0) { dWarn("failed to ignore signal SIGABRT"); } - - char *pMsg = NULL; - const char *flags = "UTL FATAL "; - ELogLevel level = DEBUG_FATAL; - int32_t dflag = 255; - int64_t msgLen = -1; - - if (tsEnableCrashReport) { - if (taosGenCrashJsonMsg(signum, &pMsg, dmGetClusterId(), global.startTime)) { - taosPrintLog(flags, level, dflag, "failed to generate crash json msg"); - goto _return; - } else { - msgLen = strlen(pMsg); - } - } - -_return: - - taosLogCrashInfo(CUS_PROMPT "d", pMsg, msgLen, signum, sigInfo); + writeCrashLogToFile(signum, sigInfo, CUS_PROMPT "d", dmGetClusterId(), global.startTime); #ifdef _TD_DARWIN_64 exit(signum); @@ -177,6 +159,15 @@ static void dmSetSignalHandle() { if (taosSetSignal(SIGBREAK, dmStopDnode) != 0) { dWarn("failed to set signal SIGUSR1"); } + if (taosSetSignal(SIGABRT, dmLogCrash) != 0) { + dWarn("failed to set signal SIGUSR1"); + } + if (taosSetSignal(SIGFPE, dmLogCrash) != 0) { + dWarn("failed to set signal SIGUSR1"); + } + if (taosSetSignal(SIGSEGV, dmLogCrash) != 0) { + dWarn("failed to set signal SIGUSR1"); + } #ifndef WINDOWS if (taosSetSignal(SIGTSTP, dmStopDnode) != 0) { dWarn("failed to set signal SIGUSR1"); @@ -184,6 +175,9 @@ static void dmSetSignalHandle() { if (taosSetSignal(SIGQUIT, dmStopDnode) != 0) { dWarn("failed to set signal SIGUSR1"); } + if (taosSetSignal(SIGBUS, dmLogCrash) != 0) { + dWarn("failed to set signal SIGUSR1"); + } #endif } diff --git a/source/dnode/mgmt/mgmt_dnode/src/dmWorker.c b/source/dnode/mgmt/mgmt_dnode/src/dmWorker.c index ef4e76031d..9a1287adee 100644 --- a/source/dnode/mgmt/mgmt_dnode/src/dmWorker.c +++ b/source/dnode/mgmt/mgmt_dnode/src/dmWorker.c @@ -274,14 +274,20 @@ static void *dmCrashReportThreadFp(void *param) { dError("failed to init telemetry since %s", tstrerror(code)); return NULL; } + code = initCrashLogWriter(); + if (code != 0) { + dError("failed to init crash log writer since %s", tstrerror(code)); + return NULL; + } while (1) { + checkAndPrepareCrashInfo(); if (pMgmt->pData->dropped || pMgmt->pData->stopped) break; if (loopTimes++ < reportPeriodNum) { taosMsleep(sleepTime); + if(loopTimes < 0) loopTimes = reportPeriodNum; continue; } - taosReadCrashInfo(filepath, &pMsg, &msgLen, &pFile); if (pMsg && msgLen > 0) { if (taosSendTelemReport(&mgt, tsSvrCrashReportUri, tsTelemPort, pMsg, msgLen, HTTP_FLAT) != 0) { diff --git a/source/util/src/tlog.c b/source/util/src/tlog.c index 90753ae7e8..5251076fa5 100644 --- a/source/util/src/tlog.c +++ b/source/util/src/tlog.c @@ -21,6 +21,7 @@ #include "tjson.h" #include "ttime.h" #include "tutil.h" +#include "tcommon.h" #define LOG_MAX_LINE_SIZE (10024) #define LOG_MAX_LINE_BUFFER_SIZE (LOG_MAX_LINE_SIZE + 3) @@ -1264,6 +1265,8 @@ _return: taosPrintLog(flags, level, dflag, "crash signal is %d", signum); +// print the stack trace +#if 0 #ifdef _TD_DARWIN_64 taosPrintTrace(flags, level, dflag, 4); #elif !defined(WINDOWS) @@ -1273,10 +1276,72 @@ _return: #else taosPrintTrace(flags, level, dflag, 8); #endif - +#endif taosMemoryFree(pMsg); } +typedef struct crashBasicInfo { + bool init; + bool isCrash; + int64_t clusterId; + int64_t startTime; + char *nodeType; + int signum; + void *sigInfo; + tsem_t sem; +} crashBasicInfo; + +crashBasicInfo gCrashBasicInfo = {0}; +static void writeCrashLogToFileInNewThead() { + if (!gCrashBasicInfo.init || !gCrashBasicInfo.isCrash) return; + char *pMsg = NULL; + const char *flags = "UTL FATAL "; + ELogLevel level = DEBUG_FATAL; + int32_t dflag = 255; + int64_t msgLen = -1; + + if (tsEnableCrashReport) { + if (taosGenCrashJsonMsg(gCrashBasicInfo.signum, &pMsg, gCrashBasicInfo.clusterId, gCrashBasicInfo.startTime)) { + taosPrintLog(flags, level, dflag, "failed to generate crash json msg"); + } else { + msgLen = strlen(pMsg); + } + } + taosLogCrashInfo(gCrashBasicInfo.nodeType, pMsg, msgLen, gCrashBasicInfo.signum, gCrashBasicInfo.sigInfo); + gCrashBasicInfo.isCrash = false; + tsem_post(&gCrashBasicInfo.sem); +} + +void checkAndPrepareCrashInfo() { + return writeCrashLogToFileInNewThead(); +} + +int32_t initCrashLogWriter() { + gCrashBasicInfo.init = true; + gCrashBasicInfo.isCrash = false; + int32_t code = tsem_init(&gCrashBasicInfo.sem, 0, 0); + uInfo("crashLogWriter init finished."); + return code; +} + +void writeCrashLogToFile(int signum, void *sigInfo, char *nodeType, int64_t clusterId, int64_t startTime) { + if (!gCrashBasicInfo.init) { + uInfo("crashLogWriter has not init!"); + return; + } + uInfo("write crash log to file, signum:%d, nodeType:%s, clusterId:%" PRId64, signum, nodeType, clusterId); + gCrashBasicInfo.isCrash = true; + gCrashBasicInfo.clusterId = clusterId; + gCrashBasicInfo.startTime = startTime; + gCrashBasicInfo.nodeType = nodeType; + gCrashBasicInfo.signum = signum; + gCrashBasicInfo.sigInfo = sigInfo; + + tsem_wait(&gCrashBasicInfo.sem); + + uInfo("write crash log to file done, signum:%d, nodeType:%s, clusterId:%" PRId64, signum, nodeType, clusterId); +} + void taosReadCrashInfo(char *filepath, char **pMsg, int64_t *pMsgLen, TdFilePtr *pFd) { const char *flags = "UTL FATAL "; ELogLevel level = DEBUG_FATAL; diff --git a/tools/shell/src/shellMain.c b/tools/shell/src/shellMain.c index fc6ba0f7d8..1f6f8fe3df 100644 --- a/tools/shell/src/shellMain.c +++ b/tools/shell/src/shellMain.c @@ -49,14 +49,12 @@ int main(int argc, char *argv[]) { shell.args.local = false; #endif -#if 0 #if !defined(WINDOWS) taosSetSignal(SIGBUS, shellCrashHandler); #endif taosSetSignal(SIGABRT, shellCrashHandler); taosSetSignal(SIGFPE, shellCrashHandler); taosSetSignal(SIGSEGV, shellCrashHandler); -#endif if (shellCheckIntSize() != 0) { return -1;