fix: write crash log in new thread

This commit is contained in:
factosea 2025-01-09 10:00:43 +08:00 committed by facetosea
parent e08b886829
commit 5c60015cad
6 changed files with 101 additions and 41 deletions

View File

@ -115,10 +115,13 @@ bool taosAssertRelease(bool condition);
#endif #endif
#endif #endif
void taosLogCrashInfo(char *nodeType, char *pMsg, int64_t msgLen, int signum, void *sigInfo);
void taosReadCrashInfo(char *filepath, char **pMsg, int64_t *pMsgLen, TdFilePtr *pFd); void taosReadCrashInfo(char *filepath, char **pMsg, int64_t *pMsgLen, TdFilePtr *pFd);
void taosReleaseCrashLogFile(TdFilePtr pFile, bool truncateFile); void taosReleaseCrashLogFile(TdFilePtr pFile, bool truncateFile);
int32_t initCrashLogWriter();
void checkAndPrepareCrashInfo();
void writeCrashLogToFile(int signum, void *sigInfo, char *nodeType, int64_t clusterId, int64_t startTime);
// clang-format off // clang-format off
#define uFatal(...) { if (uDebugFlag & DEBUG_FATAL) { taosPrintLog("UTL FATAL", DEBUG_FATAL, tsLogEmbedded ? 255 : uDebugFlag, __VA_ARGS__); }} #define uFatal(...) { if (uDebugFlag & DEBUG_FATAL) { taosPrintLog("UTL FATAL", DEBUG_FATAL, tsLogEmbedded ? 255 : uDebugFlag, __VA_ARGS__); }}
#define uError(...) { if (uDebugFlag & DEBUG_ERROR) { taosPrintLog("UTL ERROR ", DEBUG_ERROR, tsLogEmbedded ? 255 : uDebugFlag, __VA_ARGS__); }} #define uError(...) { if (uDebugFlag & DEBUG_ERROR) { taosPrintLog("UTL ERROR ", DEBUG_ERROR, tsLogEmbedded ? 255 : uDebugFlag, __VA_ARGS__); }}

View File

@ -43,7 +43,7 @@
#endif #endif
#ifndef CUS_PROMPT #ifndef CUS_PROMPT
#define CUS_PROMPT "tao" #define CUS_PROMPT "taos"
#endif #endif
#define TSC_VAR_NOT_RELEASE 1 #define TSC_VAR_NOT_RELEASE 1
@ -815,6 +815,12 @@ static void *tscCrashReportThreadFp(void *param) {
int32_t reportPeriodNum = 3600 * 1000 / sleepTime; int32_t reportPeriodNum = 3600 * 1000 / sleepTime;
int32_t loopTimes = reportPeriodNum; int32_t loopTimes = reportPeriodNum;
code = initCrashLogWriter();
if (code) {
tscError("failed to init crash log writer, code:%s", tstrerror(code));
return NULL;
}
#ifdef WINDOWS #ifdef WINDOWS
if (taosCheckCurrentInDll()) { if (taosCheckCurrentInDll()) {
atexit(crashReportThreadFuncUnexpectedStopped); atexit(crashReportThreadFuncUnexpectedStopped);
@ -832,8 +838,10 @@ static void *tscCrashReportThreadFp(void *param) {
} }
while (1) { while (1) {
checkAndPrepareCrashInfo();
if (clientStop > 0) break; if (clientStop > 0) break;
if (loopTimes++ < reportPeriodNum) { if (loopTimes++ < reportPeriodNum) {
if(loopTimes < 0) loopTimes = reportPeriodNum;
taosMsleep(sleepTime); taosMsleep(sleepTime);
continue; continue;
} }
@ -921,21 +929,7 @@ void tscStopCrashReport() {
} }
void tscWriteCrashInfo(int signum, void *sigInfo, void *context) { void tscWriteCrashInfo(int signum, void *sigInfo, void *context) {
char *pMsg = NULL; writeCrashLogToFile(signum, sigInfo, CUS_PROMPT, lastClusterId, appInfo.startTime);
const char *flags = "UTL FATAL ";
ELogLevel level = DEBUG_FATAL;
int32_t dflag = 255;
int64_t msgLen = -1;
if (tsEnableCrashReport) {
if (taosGenCrashJsonMsg(signum, &pMsg, lastClusterId, appInfo.startTime)) {
taosPrintLog(flags, level, dflag, "failed to generate crash json msg");
} else {
msgLen = strlen(pMsg);
}
}
taosLogCrashInfo("taos", pMsg, msgLen, signum, sigInfo);
} }
void taos_init_imp(void) { void taos_init_imp(void) {
@ -969,7 +963,7 @@ void taos_init_imp(void) {
} }
taosHashSetFreeFp(appInfo.pInstMap, destroyAppInst); taosHashSetFreeFp(appInfo.pInstMap, destroyAppInst);
const char *logName = CUS_PROMPT "slog"; const char *logName = CUS_PROMPT "log";
ENV_ERR_RET(taosInitLogOutput(&logName), "failed to init log output"); ENV_ERR_RET(taosInitLogOutput(&logName), "failed to init log output");
if (taosCreateLog(logName, 10, configDir, NULL, NULL, NULL, NULL, 1) != 0) { if (taosCreateLog(logName, 10, configDir, NULL, NULL, NULL, NULL, 1) != 0) {
(void)printf(" WARING: Create %s failed:%s. configDir=%s\n", logName, strerror(errno), configDir); (void)printf(" WARING: Create %s failed:%s. configDir=%s\n", logName, strerror(errno), configDir);

View File

@ -131,25 +131,7 @@ void dmLogCrash(int signum, void *sigInfo, void *context) {
if (taosIgnSignal(SIGSEGV) != 0) { if (taosIgnSignal(SIGSEGV) != 0) {
dWarn("failed to ignore signal SIGABRT"); dWarn("failed to ignore signal SIGABRT");
} }
writeCrashLogToFile(signum, sigInfo, CUS_PROMPT "d", dmGetClusterId(), global.startTime);
char *pMsg = NULL;
const char *flags = "UTL FATAL ";
ELogLevel level = DEBUG_FATAL;
int32_t dflag = 255;
int64_t msgLen = -1;
if (tsEnableCrashReport) {
if (taosGenCrashJsonMsg(signum, &pMsg, dmGetClusterId(), global.startTime)) {
taosPrintLog(flags, level, dflag, "failed to generate crash json msg");
goto _return;
} else {
msgLen = strlen(pMsg);
}
}
_return:
taosLogCrashInfo(CUS_PROMPT "d", pMsg, msgLen, signum, sigInfo);
#ifdef _TD_DARWIN_64 #ifdef _TD_DARWIN_64
exit(signum); exit(signum);
@ -177,6 +159,15 @@ static void dmSetSignalHandle() {
if (taosSetSignal(SIGBREAK, dmStopDnode) != 0) { if (taosSetSignal(SIGBREAK, dmStopDnode) != 0) {
dWarn("failed to set signal SIGUSR1"); dWarn("failed to set signal SIGUSR1");
} }
if (taosSetSignal(SIGABRT, dmLogCrash) != 0) {
dWarn("failed to set signal SIGUSR1");
}
if (taosSetSignal(SIGFPE, dmLogCrash) != 0) {
dWarn("failed to set signal SIGUSR1");
}
if (taosSetSignal(SIGSEGV, dmLogCrash) != 0) {
dWarn("failed to set signal SIGUSR1");
}
#ifndef WINDOWS #ifndef WINDOWS
if (taosSetSignal(SIGTSTP, dmStopDnode) != 0) { if (taosSetSignal(SIGTSTP, dmStopDnode) != 0) {
dWarn("failed to set signal SIGUSR1"); dWarn("failed to set signal SIGUSR1");
@ -184,6 +175,9 @@ static void dmSetSignalHandle() {
if (taosSetSignal(SIGQUIT, dmStopDnode) != 0) { if (taosSetSignal(SIGQUIT, dmStopDnode) != 0) {
dWarn("failed to set signal SIGUSR1"); dWarn("failed to set signal SIGUSR1");
} }
if (taosSetSignal(SIGBUS, dmLogCrash) != 0) {
dWarn("failed to set signal SIGUSR1");
}
#endif #endif
} }

View File

@ -274,14 +274,20 @@ static void *dmCrashReportThreadFp(void *param) {
dError("failed to init telemetry since %s", tstrerror(code)); dError("failed to init telemetry since %s", tstrerror(code));
return NULL; return NULL;
} }
code = initCrashLogWriter();
if (code != 0) {
dError("failed to init crash log writer since %s", tstrerror(code));
return NULL;
}
while (1) { while (1) {
checkAndPrepareCrashInfo();
if (pMgmt->pData->dropped || pMgmt->pData->stopped) break; if (pMgmt->pData->dropped || pMgmt->pData->stopped) break;
if (loopTimes++ < reportPeriodNum) { if (loopTimes++ < reportPeriodNum) {
taosMsleep(sleepTime); taosMsleep(sleepTime);
if(loopTimes < 0) loopTimes = reportPeriodNum;
continue; continue;
} }
taosReadCrashInfo(filepath, &pMsg, &msgLen, &pFile); taosReadCrashInfo(filepath, &pMsg, &msgLen, &pFile);
if (pMsg && msgLen > 0) { if (pMsg && msgLen > 0) {
if (taosSendTelemReport(&mgt, tsSvrCrashReportUri, tsTelemPort, pMsg, msgLen, HTTP_FLAT) != 0) { if (taosSendTelemReport(&mgt, tsSvrCrashReportUri, tsTelemPort, pMsg, msgLen, HTTP_FLAT) != 0) {

View File

@ -21,6 +21,7 @@
#include "tjson.h" #include "tjson.h"
#include "ttime.h" #include "ttime.h"
#include "tutil.h" #include "tutil.h"
#include "tcommon.h"
#define LOG_MAX_LINE_SIZE (10024) #define LOG_MAX_LINE_SIZE (10024)
#define LOG_MAX_LINE_BUFFER_SIZE (LOG_MAX_LINE_SIZE + 3) #define LOG_MAX_LINE_BUFFER_SIZE (LOG_MAX_LINE_SIZE + 3)
@ -1264,6 +1265,8 @@ _return:
taosPrintLog(flags, level, dflag, "crash signal is %d", signum); taosPrintLog(flags, level, dflag, "crash signal is %d", signum);
// print the stack trace
#if 0
#ifdef _TD_DARWIN_64 #ifdef _TD_DARWIN_64
taosPrintTrace(flags, level, dflag, 4); taosPrintTrace(flags, level, dflag, 4);
#elif !defined(WINDOWS) #elif !defined(WINDOWS)
@ -1273,10 +1276,72 @@ _return:
#else #else
taosPrintTrace(flags, level, dflag, 8); taosPrintTrace(flags, level, dflag, 8);
#endif #endif
#endif
taosMemoryFree(pMsg); taosMemoryFree(pMsg);
} }
typedef struct crashBasicInfo {
bool init;
bool isCrash;
int64_t clusterId;
int64_t startTime;
char *nodeType;
int signum;
void *sigInfo;
tsem_t sem;
} crashBasicInfo;
crashBasicInfo gCrashBasicInfo = {0};
static void writeCrashLogToFileInNewThead() {
if (!gCrashBasicInfo.init || !gCrashBasicInfo.isCrash) return;
char *pMsg = NULL;
const char *flags = "UTL FATAL ";
ELogLevel level = DEBUG_FATAL;
int32_t dflag = 255;
int64_t msgLen = -1;
if (tsEnableCrashReport) {
if (taosGenCrashJsonMsg(gCrashBasicInfo.signum, &pMsg, gCrashBasicInfo.clusterId, gCrashBasicInfo.startTime)) {
taosPrintLog(flags, level, dflag, "failed to generate crash json msg");
} else {
msgLen = strlen(pMsg);
}
}
taosLogCrashInfo(gCrashBasicInfo.nodeType, pMsg, msgLen, gCrashBasicInfo.signum, gCrashBasicInfo.sigInfo);
gCrashBasicInfo.isCrash = false;
tsem_post(&gCrashBasicInfo.sem);
}
void checkAndPrepareCrashInfo() {
return writeCrashLogToFileInNewThead();
}
int32_t initCrashLogWriter() {
gCrashBasicInfo.init = true;
gCrashBasicInfo.isCrash = false;
int32_t code = tsem_init(&gCrashBasicInfo.sem, 0, 0);
uInfo("crashLogWriter init finished.");
return code;
}
void writeCrashLogToFile(int signum, void *sigInfo, char *nodeType, int64_t clusterId, int64_t startTime) {
if (!gCrashBasicInfo.init) {
uInfo("crashLogWriter has not init!");
return;
}
uInfo("write crash log to file, signum:%d, nodeType:%s, clusterId:%" PRId64, signum, nodeType, clusterId);
gCrashBasicInfo.isCrash = true;
gCrashBasicInfo.clusterId = clusterId;
gCrashBasicInfo.startTime = startTime;
gCrashBasicInfo.nodeType = nodeType;
gCrashBasicInfo.signum = signum;
gCrashBasicInfo.sigInfo = sigInfo;
tsem_wait(&gCrashBasicInfo.sem);
uInfo("write crash log to file done, signum:%d, nodeType:%s, clusterId:%" PRId64, signum, nodeType, clusterId);
}
void taosReadCrashInfo(char *filepath, char **pMsg, int64_t *pMsgLen, TdFilePtr *pFd) { void taosReadCrashInfo(char *filepath, char **pMsg, int64_t *pMsgLen, TdFilePtr *pFd) {
const char *flags = "UTL FATAL "; const char *flags = "UTL FATAL ";
ELogLevel level = DEBUG_FATAL; ELogLevel level = DEBUG_FATAL;

View File

@ -49,14 +49,12 @@ int main(int argc, char *argv[]) {
shell.args.local = false; shell.args.local = false;
#endif #endif
#if 0
#if !defined(WINDOWS) #if !defined(WINDOWS)
taosSetSignal(SIGBUS, shellCrashHandler); taosSetSignal(SIGBUS, shellCrashHandler);
#endif #endif
taosSetSignal(SIGABRT, shellCrashHandler); taosSetSignal(SIGABRT, shellCrashHandler);
taosSetSignal(SIGFPE, shellCrashHandler); taosSetSignal(SIGFPE, shellCrashHandler);
taosSetSignal(SIGSEGV, shellCrashHandler); taosSetSignal(SIGSEGV, shellCrashHandler);
#endif
if (shellCheckIntSize() != 0) { if (shellCheckIntSize() != 0) {
return -1; return -1;