crash log: safe code

This commit is contained in:
factosea 2025-01-10 06:09:55 +08:00
parent 6629bf4d80
commit bd9ec86061
4 changed files with 69 additions and 33 deletions

View File

@ -121,6 +121,7 @@ void taosReleaseCrashLogFile(TdFilePtr pFile, bool truncateFile);
int32_t initCrashLogWriter(); int32_t initCrashLogWriter();
void checkAndPrepareCrashInfo(); void checkAndPrepareCrashInfo();
bool reportThreadSetQuit();
void writeCrashLogToFile(int signum, void *sigInfo, char *nodeType, int64_t clusterId, int64_t startTime); void writeCrashLogToFile(int signum, void *sigInfo, char *nodeType, int64_t clusterId, int64_t startTime);
// clang-format off // clang-format off

View File

@ -839,9 +839,9 @@ static void *tscCrashReportThreadFp(void *param) {
while (1) { while (1) {
checkAndPrepareCrashInfo(); checkAndPrepareCrashInfo();
if (clientStop > 0) break; if (clientStop > 0 && reportThreadSetQuit()) break;
if (loopTimes++ < reportPeriodNum) { if (loopTimes++ < reportPeriodNum) {
if(loopTimes < 0) loopTimes = reportPeriodNum; if (loopTimes < 0) loopTimes = reportPeriodNum;
taosMsleep(sleepTime); taosMsleep(sleepTime);
continue; continue;
} }

View File

@ -282,7 +282,9 @@ static void *dmCrashReportThreadFp(void *param) {
while (1) { while (1) {
checkAndPrepareCrashInfo(); checkAndPrepareCrashInfo();
if (pMgmt->pData->dropped || pMgmt->pData->stopped) break; if ((pMgmt->pData->dropped || pMgmt->pData->stopped) && reportThreadSetQuit()) {
break;
}
if (loopTimes++ < reportPeriodNum) { if (loopTimes++ < reportPeriodNum) {
taosMsleep(sleepTime); taosMsleep(sleepTime);
if(loopTimes < 0) loopTimes = reportPeriodNum; if(loopTimes < 0) loopTimes = reportPeriodNum;

View File

@ -1280,9 +1280,15 @@ _return:
taosMemoryFree(pMsg); taosMemoryFree(pMsg);
} }
typedef enum {
CRASH_LOG_WRITER_UNKNOWN = 0,
CRASH_LOG_WRITER_INIT = 1,
CRASH_LOG_WRITER_WAIT,
CRASH_LOG_WRITER_RUNNING,
CRASH_LOG_WRITER_QUIT
} CrashStatus;
typedef struct crashBasicInfo { typedef struct crashBasicInfo {
bool init; int8_t status;
bool isCrash;
int64_t clusterId; int64_t clusterId;
int64_t startTime; int64_t startTime;
char *nodeType; char *nodeType;
@ -1292,8 +1298,38 @@ typedef struct crashBasicInfo {
} crashBasicInfo; } crashBasicInfo;
crashBasicInfo gCrashBasicInfo = {0}; crashBasicInfo gCrashBasicInfo = {0};
void setCrashWriterStatus(int8_t status) { atomic_store_8(&gCrashBasicInfo.status, status); }
bool reportThreadSetQuit() {
CrashStatus status =
atomic_val_compare_exchange_8(&gCrashBasicInfo.status, CRASH_LOG_WRITER_INIT, CRASH_LOG_WRITER_QUIT);
if (status == CRASH_LOG_WRITER_INIT) {
return true;
} else {
return false;
}
}
bool setReportThreadWait() {
CrashStatus status =
atomic_val_compare_exchange_8(&gCrashBasicInfo.status, CRASH_LOG_WRITER_INIT, CRASH_LOG_WRITER_WAIT);
if (status == CRASH_LOG_WRITER_INIT) {
return true;
} else {
return false;
}
}
bool setReportThreadRunning() {
CrashStatus status =
atomic_val_compare_exchange_8(&gCrashBasicInfo.status, CRASH_LOG_WRITER_WAIT, CRASH_LOG_WRITER_RUNNING);
if (status == CRASH_LOG_WRITER_RUNNING) {
return true;
} else {
return false;
}
}
static void writeCrashLogToFileInNewThead() { static void writeCrashLogToFileInNewThead() {
if (!gCrashBasicInfo.init || !gCrashBasicInfo.isCrash) return; if (setReportThreadRunning()) {
char *pMsg = NULL; char *pMsg = NULL;
const char *flags = "UTL FATAL "; const char *flags = "UTL FATAL ";
ELogLevel level = DEBUG_FATAL; ELogLevel level = DEBUG_FATAL;
@ -1308,8 +1344,9 @@ static void writeCrashLogToFileInNewThead() {
} }
} }
taosLogCrashInfo(gCrashBasicInfo.nodeType, pMsg, msgLen, gCrashBasicInfo.signum, gCrashBasicInfo.sigInfo); taosLogCrashInfo(gCrashBasicInfo.nodeType, pMsg, msgLen, gCrashBasicInfo.signum, gCrashBasicInfo.sigInfo);
gCrashBasicInfo.isCrash = false; setCrashWriterStatus(CRASH_LOG_WRITER_INIT);
tsem_post(&gCrashBasicInfo.sem); tsem_post(&gCrashBasicInfo.sem);
}
} }
void checkAndPrepareCrashInfo() { void checkAndPrepareCrashInfo() {
@ -1322,23 +1359,19 @@ int32_t initCrashLogWriter() {
uError("failed to init sem for crashLogWriter, code:%d", code); uError("failed to init sem for crashLogWriter, code:%d", code);
return code; return code;
} }
gCrashBasicInfo.isCrash = false; setCrashWriterStatus(CRASH_LOG_WRITER_INIT);
gCrashBasicInfo.init = true;
return code; return code;
} }
void writeCrashLogToFile(int signum, void *sigInfo, char *nodeType, int64_t clusterId, int64_t startTime) { void writeCrashLogToFile(int signum, void *sigInfo, char *nodeType, int64_t clusterId, int64_t startTime) {
if (!gCrashBasicInfo.init) { if (setReportThreadWait()) {
return;
}
gCrashBasicInfo.clusterId = clusterId; gCrashBasicInfo.clusterId = clusterId;
gCrashBasicInfo.startTime = startTime; gCrashBasicInfo.startTime = startTime;
gCrashBasicInfo.nodeType = nodeType; gCrashBasicInfo.nodeType = nodeType;
gCrashBasicInfo.signum = signum; gCrashBasicInfo.signum = signum;
gCrashBasicInfo.sigInfo = sigInfo; gCrashBasicInfo.sigInfo = sigInfo;
gCrashBasicInfo.isCrash = true;
tsem_wait(&gCrashBasicInfo.sem); tsem_wait(&gCrashBasicInfo.sem);
}
} }
void taosReadCrashInfo(char *filepath, char **pMsg, int64_t *pMsgLen, TdFilePtr *pFd) { void taosReadCrashInfo(char *filepath, char **pMsg, int64_t *pMsgLen, TdFilePtr *pFd) {