Merge pull request #29534 from taosdata/fix/TD-31605/crashLogMain
fix: ts31605, write crash log
This commit is contained in:
commit
ad6e9c46d8
|
@ -119,6 +119,11 @@ void taosLogCrashInfo(char *nodeType, char *pMsg, int64_t msgLen, int signum, vo
|
||||||
void taosReadCrashInfo(char *filepath, char **pMsg, int64_t *pMsgLen, TdFilePtr *pFd);
|
void taosReadCrashInfo(char *filepath, char **pMsg, int64_t *pMsgLen, TdFilePtr *pFd);
|
||||||
void taosReleaseCrashLogFile(TdFilePtr pFile, bool truncateFile);
|
void taosReleaseCrashLogFile(TdFilePtr pFile, bool truncateFile);
|
||||||
|
|
||||||
|
int32_t initCrashLogWriter();
|
||||||
|
void checkAndPrepareCrashInfo();
|
||||||
|
bool reportThreadSetQuit();
|
||||||
|
void writeCrashLogToFile(int signum, void *sigInfo, char *nodeType, int64_t clusterId, int64_t startTime);
|
||||||
|
|
||||||
// clang-format off
|
// clang-format off
|
||||||
#define uFatal(...) { if (uDebugFlag & DEBUG_FATAL) { taosPrintLog("UTL FATAL", DEBUG_FATAL, tsLogEmbedded ? 255 : uDebugFlag, __VA_ARGS__); }}
|
#define uFatal(...) { if (uDebugFlag & DEBUG_FATAL) { taosPrintLog("UTL FATAL", DEBUG_FATAL, tsLogEmbedded ? 255 : uDebugFlag, __VA_ARGS__); }}
|
||||||
#define uError(...) { if (uDebugFlag & DEBUG_ERROR) { taosPrintLog("UTL ERROR ", DEBUG_ERROR, tsLogEmbedded ? 255 : uDebugFlag, __VA_ARGS__); }}
|
#define uError(...) { if (uDebugFlag & DEBUG_ERROR) { taosPrintLog("UTL ERROR ", DEBUG_ERROR, tsLogEmbedded ? 255 : uDebugFlag, __VA_ARGS__); }}
|
||||||
|
|
|
@ -43,7 +43,7 @@
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifndef CUS_PROMPT
|
#ifndef CUS_PROMPT
|
||||||
#define CUS_PROMPT "tao"
|
#define CUS_PROMPT "taos"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define TSC_VAR_NOT_RELEASE 1
|
#define TSC_VAR_NOT_RELEASE 1
|
||||||
|
@ -831,9 +831,17 @@ static void *tscCrashReportThreadFp(void *param) {
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
code = initCrashLogWriter();
|
||||||
|
if (code) {
|
||||||
|
tscError("failed to init crash log writer, code:%s", tstrerror(code));
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
while (1) {
|
while (1) {
|
||||||
if (clientStop > 0) break;
|
checkAndPrepareCrashInfo();
|
||||||
|
if (clientStop > 0 && reportThreadSetQuit()) break;
|
||||||
if (loopTimes++ < reportPeriodNum) {
|
if (loopTimes++ < reportPeriodNum) {
|
||||||
|
if (loopTimes < 0) loopTimes = reportPeriodNum;
|
||||||
taosMsleep(sleepTime);
|
taosMsleep(sleepTime);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
@ -921,21 +929,7 @@ void tscStopCrashReport() {
|
||||||
}
|
}
|
||||||
|
|
||||||
void tscWriteCrashInfo(int signum, void *sigInfo, void *context) {
|
void tscWriteCrashInfo(int signum, void *sigInfo, void *context) {
|
||||||
char *pMsg = NULL;
|
writeCrashLogToFile(signum, sigInfo, CUS_PROMPT, lastClusterId, appInfo.startTime);
|
||||||
const char *flags = "UTL FATAL ";
|
|
||||||
ELogLevel level = DEBUG_FATAL;
|
|
||||||
int32_t dflag = 255;
|
|
||||||
int64_t msgLen = -1;
|
|
||||||
|
|
||||||
if (tsEnableCrashReport) {
|
|
||||||
if (taosGenCrashJsonMsg(signum, &pMsg, lastClusterId, appInfo.startTime)) {
|
|
||||||
taosPrintLog(flags, level, dflag, "failed to generate crash json msg");
|
|
||||||
} else {
|
|
||||||
msgLen = strlen(pMsg);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
taosLogCrashInfo("taos", pMsg, msgLen, signum, sigInfo);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void taos_init_imp(void) {
|
void taos_init_imp(void) {
|
||||||
|
@ -969,7 +963,7 @@ void taos_init_imp(void) {
|
||||||
}
|
}
|
||||||
taosHashSetFreeFp(appInfo.pInstMap, destroyAppInst);
|
taosHashSetFreeFp(appInfo.pInstMap, destroyAppInst);
|
||||||
|
|
||||||
const char *logName = CUS_PROMPT "slog";
|
const char *logName = CUS_PROMPT "log";
|
||||||
ENV_ERR_RET(taosInitLogOutput(&logName), "failed to init log output");
|
ENV_ERR_RET(taosInitLogOutput(&logName), "failed to init log output");
|
||||||
if (taosCreateLog(logName, 10, configDir, NULL, NULL, NULL, NULL, 1) != 0) {
|
if (taosCreateLog(logName, 10, configDir, NULL, NULL, NULL, NULL, 1) != 0) {
|
||||||
(void)printf(" WARING: Create %s failed:%s. configDir=%s\n", logName, strerror(errno), configDir);
|
(void)printf(" WARING: Create %s failed:%s. configDir=%s\n", logName, strerror(errno), configDir);
|
||||||
|
|
|
@ -231,6 +231,7 @@ int32_t taosGenCrashJsonMsg(int signum, char** pMsg, int64_t clusterId, int64_t
|
||||||
TAOS_CHECK_GOTO(tjsonAddIntegerToObject(pJson, "crashSig", signum), NULL, _exit);
|
TAOS_CHECK_GOTO(tjsonAddIntegerToObject(pJson, "crashSig", signum), NULL, _exit);
|
||||||
TAOS_CHECK_GOTO(tjsonAddIntegerToObject(pJson, "crashTs", taosGetTimestampUs()), NULL, _exit);
|
TAOS_CHECK_GOTO(tjsonAddIntegerToObject(pJson, "crashTs", taosGetTimestampUs()), NULL, _exit);
|
||||||
|
|
||||||
|
#if 0
|
||||||
#ifdef _TD_DARWIN_64
|
#ifdef _TD_DARWIN_64
|
||||||
taosLogTraceToBuf(tmp, sizeof(tmp), 4);
|
taosLogTraceToBuf(tmp, sizeof(tmp), 4);
|
||||||
#elif !defined(WINDOWS)
|
#elif !defined(WINDOWS)
|
||||||
|
@ -240,7 +241,7 @@ int32_t taosGenCrashJsonMsg(int signum, char** pMsg, int64_t clusterId, int64_t
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
TAOS_CHECK_GOTO(tjsonAddStringToObject(pJson, "stackInfo", tmp), NULL, _exit);
|
TAOS_CHECK_GOTO(tjsonAddStringToObject(pJson, "stackInfo", tmp), NULL, _exit);
|
||||||
|
#endif
|
||||||
char* pCont = tjsonToString(pJson);
|
char* pCont = tjsonToString(pJson);
|
||||||
if (pCont == NULL) {
|
if (pCont == NULL) {
|
||||||
code = terrno;
|
code = terrno;
|
||||||
|
|
|
@ -131,25 +131,7 @@ void dmLogCrash(int signum, void *sigInfo, void *context) {
|
||||||
if (taosIgnSignal(SIGSEGV) != 0) {
|
if (taosIgnSignal(SIGSEGV) != 0) {
|
||||||
dWarn("failed to ignore signal SIGABRT");
|
dWarn("failed to ignore signal SIGABRT");
|
||||||
}
|
}
|
||||||
|
writeCrashLogToFile(signum, sigInfo, CUS_PROMPT "d", dmGetClusterId(), global.startTime);
|
||||||
char *pMsg = NULL;
|
|
||||||
const char *flags = "UTL FATAL ";
|
|
||||||
ELogLevel level = DEBUG_FATAL;
|
|
||||||
int32_t dflag = 255;
|
|
||||||
int64_t msgLen = -1;
|
|
||||||
|
|
||||||
if (tsEnableCrashReport) {
|
|
||||||
if (taosGenCrashJsonMsg(signum, &pMsg, dmGetClusterId(), global.startTime)) {
|
|
||||||
taosPrintLog(flags, level, dflag, "failed to generate crash json msg");
|
|
||||||
goto _return;
|
|
||||||
} else {
|
|
||||||
msgLen = strlen(pMsg);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
_return:
|
|
||||||
|
|
||||||
taosLogCrashInfo(CUS_PROMPT "d", pMsg, msgLen, signum, sigInfo);
|
|
||||||
|
|
||||||
#ifdef _TD_DARWIN_64
|
#ifdef _TD_DARWIN_64
|
||||||
exit(signum);
|
exit(signum);
|
||||||
|
@ -177,6 +159,15 @@ static void dmSetSignalHandle() {
|
||||||
if (taosSetSignal(SIGBREAK, dmStopDnode) != 0) {
|
if (taosSetSignal(SIGBREAK, dmStopDnode) != 0) {
|
||||||
dWarn("failed to set signal SIGUSR1");
|
dWarn("failed to set signal SIGUSR1");
|
||||||
}
|
}
|
||||||
|
if (taosSetSignal(SIGABRT, dmLogCrash) != 0) {
|
||||||
|
dWarn("failed to set signal SIGUSR1");
|
||||||
|
}
|
||||||
|
if (taosSetSignal(SIGFPE, dmLogCrash) != 0) {
|
||||||
|
dWarn("failed to set signal SIGUSR1");
|
||||||
|
}
|
||||||
|
if (taosSetSignal(SIGSEGV, dmLogCrash) != 0) {
|
||||||
|
dWarn("failed to set signal SIGUSR1");
|
||||||
|
}
|
||||||
#ifndef WINDOWS
|
#ifndef WINDOWS
|
||||||
if (taosSetSignal(SIGTSTP, dmStopDnode) != 0) {
|
if (taosSetSignal(SIGTSTP, dmStopDnode) != 0) {
|
||||||
dWarn("failed to set signal SIGUSR1");
|
dWarn("failed to set signal SIGUSR1");
|
||||||
|
@ -184,6 +175,9 @@ static void dmSetSignalHandle() {
|
||||||
if (taosSetSignal(SIGQUIT, dmStopDnode) != 0) {
|
if (taosSetSignal(SIGQUIT, dmStopDnode) != 0) {
|
||||||
dWarn("failed to set signal SIGUSR1");
|
dWarn("failed to set signal SIGUSR1");
|
||||||
}
|
}
|
||||||
|
if (taosSetSignal(SIGBUS, dmLogCrash) != 0) {
|
||||||
|
dWarn("failed to set signal SIGUSR1");
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -274,14 +274,22 @@ static void *dmCrashReportThreadFp(void *param) {
|
||||||
dError("failed to init telemetry since %s", tstrerror(code));
|
dError("failed to init telemetry since %s", tstrerror(code));
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
code = initCrashLogWriter();
|
||||||
|
if (code != 0) {
|
||||||
|
dError("failed to init crash log writer since %s", tstrerror(code));
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
while (1) {
|
while (1) {
|
||||||
if (pMgmt->pData->dropped || pMgmt->pData->stopped) break;
|
checkAndPrepareCrashInfo();
|
||||||
|
if ((pMgmt->pData->dropped || pMgmt->pData->stopped) && reportThreadSetQuit()) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
if (loopTimes++ < reportPeriodNum) {
|
if (loopTimes++ < reportPeriodNum) {
|
||||||
taosMsleep(sleepTime);
|
taosMsleep(sleepTime);
|
||||||
|
if(loopTimes < 0) loopTimes = reportPeriodNum;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
taosReadCrashInfo(filepath, &pMsg, &msgLen, &pFile);
|
taosReadCrashInfo(filepath, &pMsg, &msgLen, &pFile);
|
||||||
if (pMsg && msgLen > 0) {
|
if (pMsg && msgLen > 0) {
|
||||||
if (taosSendTelemReport(&mgt, tsSvrCrashReportUri, tsTelemPort, pMsg, msgLen, HTTP_FLAT) != 0) {
|
if (taosSendTelemReport(&mgt, tsSvrCrashReportUri, tsTelemPort, pMsg, msgLen, HTTP_FLAT) != 0) {
|
||||||
|
|
|
@ -21,6 +21,7 @@
|
||||||
#include "tjson.h"
|
#include "tjson.h"
|
||||||
#include "ttime.h"
|
#include "ttime.h"
|
||||||
#include "tutil.h"
|
#include "tutil.h"
|
||||||
|
#include "tcommon.h"
|
||||||
|
|
||||||
#define LOG_MAX_LINE_SIZE (10024)
|
#define LOG_MAX_LINE_SIZE (10024)
|
||||||
#define LOG_MAX_LINE_BUFFER_SIZE (LOG_MAX_LINE_SIZE + 3)
|
#define LOG_MAX_LINE_BUFFER_SIZE (LOG_MAX_LINE_SIZE + 3)
|
||||||
|
@ -1264,6 +1265,8 @@ _return:
|
||||||
|
|
||||||
taosPrintLog(flags, level, dflag, "crash signal is %d", signum);
|
taosPrintLog(flags, level, dflag, "crash signal is %d", signum);
|
||||||
|
|
||||||
|
// print the stack trace
|
||||||
|
#if 0
|
||||||
#ifdef _TD_DARWIN_64
|
#ifdef _TD_DARWIN_64
|
||||||
taosPrintTrace(flags, level, dflag, 4);
|
taosPrintTrace(flags, level, dflag, 4);
|
||||||
#elif !defined(WINDOWS)
|
#elif !defined(WINDOWS)
|
||||||
|
@ -1273,10 +1276,109 @@ _return:
|
||||||
#else
|
#else
|
||||||
taosPrintTrace(flags, level, dflag, 8);
|
taosPrintTrace(flags, level, dflag, 8);
|
||||||
#endif
|
#endif
|
||||||
|
#endif
|
||||||
taosMemoryFree(pMsg);
|
taosMemoryFree(pMsg);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
typedef enum {
|
||||||
|
CRASH_LOG_WRITER_UNKNOWN = 0,
|
||||||
|
CRASH_LOG_WRITER_INIT = 1,
|
||||||
|
CRASH_LOG_WRITER_WAIT,
|
||||||
|
CRASH_LOG_WRITER_RUNNING,
|
||||||
|
CRASH_LOG_WRITER_QUIT
|
||||||
|
} CrashStatus;
|
||||||
|
typedef struct crashBasicInfo {
|
||||||
|
int8_t status;
|
||||||
|
int64_t clusterId;
|
||||||
|
int64_t startTime;
|
||||||
|
char *nodeType;
|
||||||
|
int signum;
|
||||||
|
void *sigInfo;
|
||||||
|
tsem_t sem;
|
||||||
|
int64_t reportThread;
|
||||||
|
} crashBasicInfo;
|
||||||
|
|
||||||
|
crashBasicInfo gCrashBasicInfo = {0};
|
||||||
|
|
||||||
|
void setCrashWriterStatus(int8_t status) { atomic_store_8(&gCrashBasicInfo.status, status); }
|
||||||
|
bool reportThreadSetQuit() {
|
||||||
|
CrashStatus status =
|
||||||
|
atomic_val_compare_exchange_8(&gCrashBasicInfo.status, CRASH_LOG_WRITER_INIT, CRASH_LOG_WRITER_QUIT);
|
||||||
|
if (status == CRASH_LOG_WRITER_INIT) {
|
||||||
|
return true;
|
||||||
|
} else {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool setReportThreadWait() {
|
||||||
|
CrashStatus status =
|
||||||
|
atomic_val_compare_exchange_8(&gCrashBasicInfo.status, CRASH_LOG_WRITER_INIT, CRASH_LOG_WRITER_WAIT);
|
||||||
|
if (status == CRASH_LOG_WRITER_INIT) {
|
||||||
|
return true;
|
||||||
|
} else {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
bool setReportThreadRunning() {
|
||||||
|
CrashStatus status =
|
||||||
|
atomic_val_compare_exchange_8(&gCrashBasicInfo.status, CRASH_LOG_WRITER_WAIT, CRASH_LOG_WRITER_RUNNING);
|
||||||
|
if (status == CRASH_LOG_WRITER_WAIT) {
|
||||||
|
return true;
|
||||||
|
} else {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
static void checkWriteCrashLogToFileInNewThead() {
|
||||||
|
if (setReportThreadRunning()) {
|
||||||
|
char *pMsg = NULL;
|
||||||
|
const char *flags = "UTL FATAL ";
|
||||||
|
ELogLevel level = DEBUG_FATAL;
|
||||||
|
int32_t dflag = 255;
|
||||||
|
int64_t msgLen = -1;
|
||||||
|
|
||||||
|
if (tsEnableCrashReport) {
|
||||||
|
if (taosGenCrashJsonMsg(gCrashBasicInfo.signum, &pMsg, gCrashBasicInfo.clusterId, gCrashBasicInfo.startTime)) {
|
||||||
|
taosPrintLog(flags, level, dflag, "failed to generate crash json msg");
|
||||||
|
} else {
|
||||||
|
msgLen = strlen(pMsg);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
taosLogCrashInfo(gCrashBasicInfo.nodeType, pMsg, msgLen, gCrashBasicInfo.signum, gCrashBasicInfo.sigInfo);
|
||||||
|
setCrashWriterStatus(CRASH_LOG_WRITER_INIT);
|
||||||
|
tsem_post(&gCrashBasicInfo.sem);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void checkAndPrepareCrashInfo() {
|
||||||
|
return checkWriteCrashLogToFileInNewThead();
|
||||||
|
}
|
||||||
|
|
||||||
|
int32_t initCrashLogWriter() {
|
||||||
|
int32_t code = tsem_init(&gCrashBasicInfo.sem, 0, 0);
|
||||||
|
if (code != 0) {
|
||||||
|
uError("failed to init sem for crashLogWriter, code:%d", code);
|
||||||
|
return code;
|
||||||
|
}
|
||||||
|
gCrashBasicInfo.reportThread = taosGetSelfPthreadId();
|
||||||
|
setCrashWriterStatus(CRASH_LOG_WRITER_INIT);
|
||||||
|
return code;
|
||||||
|
}
|
||||||
|
|
||||||
|
void writeCrashLogToFile(int signum, void *sigInfo, char *nodeType, int64_t clusterId, int64_t startTime) {
|
||||||
|
if (gCrashBasicInfo.reportThread == taosGetSelfPthreadId()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (setReportThreadWait()) {
|
||||||
|
gCrashBasicInfo.clusterId = clusterId;
|
||||||
|
gCrashBasicInfo.startTime = startTime;
|
||||||
|
gCrashBasicInfo.nodeType = nodeType;
|
||||||
|
gCrashBasicInfo.signum = signum;
|
||||||
|
gCrashBasicInfo.sigInfo = sigInfo;
|
||||||
|
tsem_wait(&gCrashBasicInfo.sem);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void taosReadCrashInfo(char *filepath, char **pMsg, int64_t *pMsgLen, TdFilePtr *pFd) {
|
void taosReadCrashInfo(char *filepath, char **pMsg, int64_t *pMsgLen, TdFilePtr *pFd) {
|
||||||
const char *flags = "UTL FATAL ";
|
const char *flags = "UTL FATAL ";
|
||||||
ELogLevel level = DEBUG_FATAL;
|
ELogLevel level = DEBUG_FATAL;
|
||||||
|
|
|
@ -49,14 +49,12 @@ int main(int argc, char *argv[]) {
|
||||||
shell.args.local = false;
|
shell.args.local = false;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if 0
|
|
||||||
#if !defined(WINDOWS)
|
#if !defined(WINDOWS)
|
||||||
taosSetSignal(SIGBUS, shellCrashHandler);
|
taosSetSignal(SIGBUS, shellCrashHandler);
|
||||||
#endif
|
#endif
|
||||||
taosSetSignal(SIGABRT, shellCrashHandler);
|
taosSetSignal(SIGABRT, shellCrashHandler);
|
||||||
taosSetSignal(SIGFPE, shellCrashHandler);
|
taosSetSignal(SIGFPE, shellCrashHandler);
|
||||||
taosSetSignal(SIGSEGV, shellCrashHandler);
|
taosSetSignal(SIGSEGV, shellCrashHandler);
|
||||||
#endif
|
|
||||||
|
|
||||||
if (shellCheckIntSize() != 0) {
|
if (shellCheckIntSize() != 0) {
|
||||||
return -1;
|
return -1;
|
||||||
|
|
Loading…
Reference in New Issue