diff --git a/include/common/tglobal.h b/include/common/tglobal.h index d445fc26e8..2331f0b23c 100644 --- a/include/common/tglobal.h +++ b/include/common/tglobal.h @@ -69,6 +69,9 @@ extern int32_t tsElectInterval; extern int32_t tsHeartbeatInterval; extern int32_t tsHeartbeatTimeout; +// vnode +extern int64_t tsVndCommitMaxIntervalMs; + // monitor extern bool tsEnableMonitor; extern int32_t tsMonitorInterval; @@ -82,6 +85,10 @@ extern bool tsEnableTelem; extern int32_t tsTelemInterval; extern char tsTelemServer[]; extern uint16_t tsTelemPort; +extern bool tsEnableCrashReport; +extern char* tsTelemUri; +extern char* tsClientCrashReportUri; +extern char* tsSvrCrashReportUri; // query buffer management extern int32_t tsQueryBufferSize; // maximum allowed usage buffer size in MB for each data node during query processing diff --git a/include/libs/sync/sync.h b/include/libs/sync/sync.h index f93bf9a326..4c23c1f557 100644 --- a/include/libs/sync/sync.h +++ b/include/libs/sync/sync.h @@ -49,10 +49,12 @@ extern "C" { #define SYNC_HEARTBEAT_REPLY_SLOW_MS 1500 #define SYNC_SNAP_RESEND_MS 1000 * 60 +#define SYNC_VND_COMMIT_MIN_MS 1000 + #define SYNC_MAX_BATCH_SIZE 1 #define SYNC_INDEX_BEGIN 0 #define SYNC_INDEX_INVALID -1 -#define SYNC_TERM_INVALID -1 // 0xFFFFFFFFFFFFFFFF +#define SYNC_TERM_INVALID -1 typedef enum { SYNC_STRATEGY_NO_SNAPSHOT = 0, diff --git a/include/libs/transport/thttp.h b/include/libs/transport/thttp.h index 7d8c588bfc..9a6aee4187 100644 --- a/include/libs/transport/thttp.h +++ b/include/libs/transport/thttp.h @@ -24,7 +24,7 @@ extern "C" { typedef enum { HTTP_GZIP, HTTP_FLAT } EHttpCompFlag; -int32_t taosSendHttpReport(const char* server, uint16_t port, char* pCont, int32_t contLen, EHttpCompFlag flag); +int32_t taosSendHttpReport(const char* server, const char* uri, uint16_t port, char* pCont, int32_t contLen, EHttpCompFlag flag); #ifdef __cplusplus } diff --git a/include/os/osSystem.h b/include/os/osSystem.h index 58f34d26f0..5154c56e4b 100644 --- a/include/os/osSystem.h +++ b/include/os/osSystem.h @@ -46,27 +46,73 @@ void taosSetTerminalMode(); int32_t taosGetOldTerminalMode(); void taosResetTerminalMode(); +#define STACKSIZE 100 + #if !defined(WINDOWS) -#define taosPrintTrace(flags, level, dflag) \ - { \ - void* array[100]; \ - int32_t size = backtrace(array, 100); \ - char** strings = backtrace_symbols(array, size); \ - if (strings != NULL) { \ - taosPrintLog(flags, level, dflag, "obtained %d stack frames", size); \ - for (int32_t i = 0; i < size; i++) { \ - taosPrintLog(flags, level, dflag, "frame:%d, %s", i, strings[i]); \ - } \ - } \ - \ - taosMemoryFree(strings); \ +#define taosLogTraceToBuf(buf, bufSize, ignoreNum) { \ + void* array[STACKSIZE]; \ + int32_t size = backtrace(array, STACKSIZE); \ + char** strings = backtrace_symbols(array, size); \ + int32_t offset = 0; \ + if (strings != NULL) { \ + offset = snprintf(buf, bufSize - 1, "obtained %d stack frames\n", (ignoreNum > 0) ? size - ignoreNum : size); \ + for (int32_t i = (ignoreNum > 0) ? ignoreNum : 0; i < size; i++) { \ + offset += snprintf(buf + offset, bufSize - 1 - offset, "frame:%d, %s\n", (ignoreNum > 0) ? i - ignoreNum : i, strings[i]); \ + } \ + } \ + \ + taosMemoryFree(strings); \ +} + +#define taosPrintTrace(flags, level, dflag, ignoreNum) \ + { \ + void* array[STACKSIZE]; \ + int32_t size = backtrace(array, STACKSIZE); \ + char** strings = backtrace_symbols(array, size); \ + if (strings != NULL) { \ + taosPrintLog(flags, level, dflag, "obtained %d stack frames", (ignoreNum > 0) ? size - ignoreNum : size); \ + for (int32_t i = (ignoreNum > 0) ? ignoreNum : 0; i < size; i++) { \ + taosPrintLog(flags, level, dflag, "frame:%d, %s", (ignoreNum > 0) ? i - ignoreNum : i, strings[i]); \ + } \ + } \ + \ + taosMemoryFree(strings); \ } #else + #include #include -#define STACKSIZE 64 -#define taosPrintTrace(flags, level, dflag) \ +#define taosLogTraceToBuf(buf, bufSize, ignoreNum) { \ + unsigned int i; \ + void* stack[STACKSIZE]; \ + unsigned short frames; \ + SYMBOL_INFO* symbol; \ + HANDLE process; \ + int32_t offset = 0; \ + \ + process = GetCurrentProcess(); \ + \ + SymInitialize(process, NULL, TRUE); \ + \ + frames = CaptureStackBackTrace(0, STACKSIZE, stack, NULL); \ + symbol = (SYMBOL_INFO*)calloc(sizeof(SYMBOL_INFO) + 256 * sizeof(char), 1); \ + if (symbol != NULL) { \ + symbol->MaxNameLen = 255; \ + symbol->SizeOfStruct = sizeof(SYMBOL_INFO); \ + \ + if (frames > 0) { \ + offset = snprintf(buf, bufSize - 1, "obtained %d stack frames\n", (ignoreNum > 0) ? frames - ignoreNum : frames); \ + for (i = (ignoreNum > 0) ? ignoreNum : 0; i < frames; i++) { \ + SymFromAddr(process, (DWORD64)(stack[i]), 0, symbol); \ + offset += snprintf(buf + offset, bufSize - 1 - offset, "frame:%i, %s - 0x%0X\n", (ignoreNum > 0) ? i - ignoreNum : i, symbol->Name, symbol->Address); \ + } \ + } \ + free(symbol); \ + } \ + } + +#define taosPrintTrace(flags, level, dflag, ignoreNum) \ { \ unsigned int i; \ void* stack[STACKSIZE]; \ @@ -85,10 +131,10 @@ void taosResetTerminalMode(); symbol->SizeOfStruct = sizeof(SYMBOL_INFO); \ \ if (frames > 0) { \ - taosPrintLog(flags, level, dflag, "obtained %d stack frames", frames); \ - for (i = 0; i < frames; i++) { \ + taosPrintLog(flags, level, dflag, "obtained %d stack frames\n", (ignoreNum > 0) ? frames - ignoreNum : frames); \ + for (i = (ignoreNum > 0) ? ignoreNum : 0; i < frames; i++) { \ SymFromAddr(process, (DWORD64)(stack[i]), 0, symbol); \ - taosPrintLog(flags, level, dflag, "frame:%i: %s - 0x%0X", frames - i - 1, symbol->Name, symbol->Address); \ + taosPrintLog(flags, level, dflag, "frame:%i, %s - 0x%0X\n", (ignoreNum > 0) ? i - ignoreNum : i, symbol->Name, symbol->Address); \ } \ } \ free(symbol); \ diff --git a/include/util/tlog.h b/include/util/tlog.h index 6e9b304e1d..808377fa77 100644 --- a/include/util/tlog.h +++ b/include/util/tlog.h @@ -99,6 +99,11 @@ bool taosAssertRelease(bool condition); #endif #endif +void taosLogCrashInfo(char* nodeType, char* pMsg, int64_t msgLen, int signum, void *sigInfo); +void taosReadCrashInfo(char* filepath, char** pMsg, int64_t* pMsgLen, TdFilePtr* pFd); +void taosReleaseCrashLogFile(TdFilePtr pFile, bool truncateFile); +int32_t taosGenCrashJsonMsg(int signum, char** pMsg, int64_t clusterId, int64_t startTime); + // clang-format off #define uFatal(...) { if (uDebugFlag & DEBUG_FATAL) { taosPrintLog("UTL FATAL", DEBUG_FATAL, tsLogEmbedded ? 255 : uDebugFlag, __VA_ARGS__); }} #define uError(...) { if (uDebugFlag & DEBUG_ERROR) { taosPrintLog("UTL ERROR ", DEBUG_ERROR, tsLogEmbedded ? 255 : uDebugFlag, __VA_ARGS__); }} diff --git a/packaging/cfg/taos.cfg b/packaging/cfg/taos.cfg index e22aa85c97..3d3dfc8e73 100644 --- a/packaging/cfg/taos.cfg +++ b/packaging/cfg/taos.cfg @@ -43,6 +43,9 @@ # Switch for allowing TDengine to collect and report service usage information # telemetryReporting 1 +# Switch for allowing TDengine to collect and report crash information +# crashReporting 1 + # The maximum number of vnodes supported by this dnode # supportVnodes 0 diff --git a/source/client/inc/clientInt.h b/source/client/inc/clientInt.h index c3b79d7829..3fc5cc9cec 100644 --- a/source/client/inc/clientInt.h +++ b/source/client/inc/clientInt.h @@ -312,6 +312,8 @@ extern SAppInfo appInfo; extern int32_t clientReqRefPool; extern int32_t clientConnRefPool; extern int32_t timestampDeltaLimit; +extern int64_t lastClusterId; + __async_send_cb_fn_t getMsgRspHandle(int32_t msgType); @@ -339,6 +341,7 @@ void resetConnectDB(STscObj* pTscObj); int taos_options_imp(TSDB_OPTION option, const char* str); void* openTransporter(const char* user, const char* auth, int32_t numOfThreads); +void tscStopCrashReport(); typedef struct AsyncArg { SRpcMsg msg; diff --git a/source/client/src/clientEnv.c b/source/client/src/clientEnv.c index 4561070479..8bf646486d 100644 --- a/source/client/src/clientEnv.c +++ b/source/client/src/clientEnv.c @@ -28,13 +28,16 @@ #include "trpc.h" #include "tsched.h" #include "ttime.h" +#include "thttp.h" #define TSC_VAR_NOT_RELEASE 1 #define TSC_VAR_RELEASED 0 SAppInfo appInfo; +int64_t lastClusterId = 0; int32_t clientReqRefPool = -1; int32_t clientConnRefPool = -1; +int32_t clientStop = 0; int32_t timestampDeltaLimit = 900; // s @@ -393,6 +396,146 @@ void destroyRequest(SRequestObj *pRequest) { removeRequest(pRequest->self); } +void taosClientCrash(int signum, void *sigInfo, void *context) { + taosIgnSignal(SIGTERM); + taosIgnSignal(SIGHUP); + taosIgnSignal(SIGINT); + taosIgnSignal(SIGBREAK); + +#if !defined(WINDOWS) + taosIgnSignal(SIGBUS); +#endif + taosIgnSignal(SIGABRT); + taosIgnSignal(SIGFPE); + taosIgnSignal(SIGSEGV); + + char *pMsg = NULL; + const char *flags = "UTL FATAL "; + ELogLevel level = DEBUG_FATAL; + int32_t dflag = 255; + int64_t msgLen= -1; + + if (tsEnableCrashReport) { + if (taosGenCrashJsonMsg(signum, &pMsg, lastClusterId, appInfo.startTime)) { + taosPrintLog(flags, level, dflag, "failed to generate crash json msg"); + goto _return; + } else { + msgLen = strlen(pMsg); + } + } + +_return: + + taosLogCrashInfo("taos", pMsg, msgLen, signum, sigInfo); + + exit(signum); +} + +void crashReportThreadFuncUnexpectedStopped(void) { atomic_store_32(&clientStop, -1); } + +static void *tscCrashReportThreadFp(void *param) { + setThreadName("client-crashReport"); + char filepath[PATH_MAX] = {0}; + snprintf(filepath, sizeof(filepath), "%s%s.taosCrashLog", tsLogDir, TD_DIRSEP); + char *pMsg = NULL; + int64_t msgLen = 0; + TdFilePtr pFile = NULL; + bool truncateFile = false; + int32_t sleepTime = 200; + int32_t reportPeriodNum = 3600 * 1000 / sleepTime; + int32_t loopTimes = reportPeriodNum; + +#ifdef WINDOWS + if (taosCheckCurrentInDll()) { + atexit(crashReportThreadFuncUnexpectedStopped); + } +#endif + + while (1) { + if (clientStop) break; + if (loopTimes++ < reportPeriodNum) { + taosMsleep(sleepTime); + continue; + } + + taosReadCrashInfo(filepath, &pMsg, &msgLen, &pFile); + if (pMsg && msgLen > 0) { + if (taosSendHttpReport(tsTelemServer, tsClientCrashReportUri, tsTelemPort, pMsg, msgLen, HTTP_FLAT) != 0) { + tscError("failed to send crash report"); + if (pFile) { + taosReleaseCrashLogFile(pFile, false); + continue; + } + } else { + tscInfo("succeed to send crash report"); + truncateFile = true; + } + } else { + tscDebug("no crash info"); + } + + taosMemoryFree(pMsg); + + if (pMsg && msgLen > 0) { + pMsg = NULL; + continue; + } + + if (pFile) { + taosReleaseCrashLogFile(pFile, truncateFile); + truncateFile = false; + } + + taosMsleep(sleepTime); + loopTimes = 0; + } + + clientStop = -1; + return NULL; +} + +int32_t tscCrashReportInit() { + if (!tsEnableCrashReport) { + return 0; + } + + TdThreadAttr thAttr; + taosThreadAttrInit(&thAttr); + taosThreadAttrSetDetachState(&thAttr, PTHREAD_CREATE_JOINABLE); + TdThread crashReportThread; + if (taosThreadCreate(&crashReportThread, &thAttr, tscCrashReportThreadFp, NULL) != 0) { + tscError("failed to create crashReport thread since %s", strerror(errno)); + return -1; + } + + taosThreadAttrDestroy(&thAttr); + return 0; +} + +void tscStopCrashReport() { + if (!tsEnableCrashReport) { + return; + } + + if (atomic_val_compare_exchange_32(&clientStop, 0, 1)) { + tscDebug("hb thread already stopped"); + return; + } + + while (atomic_load_32(&clientStop) > 0) { + taosMsleep(100); + } +} + +static void tscSetSignalHandle() { +#if !defined(WINDOWS) + taosSetSignal(SIGBUS, taosClientCrash); +#endif + taosSetSignal(SIGABRT, taosClientCrash); + taosSetSignal(SIGFPE, taosClientCrash); + taosSetSignal(SIGSEGV, taosClientCrash); +} + void taos_init_imp(void) { // In the APIs of other program language, taos_cleanup is not available yet. // So, to make sure taos_cleanup will be invoked to clean up the allocated resource to suppress the valgrind warning. @@ -400,6 +543,10 @@ void taos_init_imp(void) { errno = TSDB_CODE_SUCCESS; taosSeedRand(taosGetTimestampSec()); + appInfo.pid = taosGetPId(); + appInfo.startTime = taosGetTimestampMs(); + appInfo.pInstMap = taosHashInit(4, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), true, HASH_ENTRY_LOCK); + deltaToUtcInitOnce(); if (taosCreateLog("taoslog", 10, configDir, NULL, NULL, NULL, NULL, 1) != 0) { @@ -412,6 +559,8 @@ void taos_init_imp(void) { return; } + tscSetSignalHandle(); + initQueryModuleMsgHandle(); if (taosConvInit() != 0) { @@ -442,9 +591,8 @@ void taos_init_imp(void) { taosGetAppName(appInfo.appName, NULL); taosThreadMutexInit(&appInfo.mutex, NULL); - appInfo.pid = taosGetPId(); - appInfo.startTime = taosGetTimestampMs(); - appInfo.pInstMap = taosHashInit(4, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), true, HASH_ENTRY_LOCK); + tscCrashReportInit(); + tscDebug("client is initialized successfully"); } diff --git a/source/client/src/clientImpl.c b/source/client/src/clientImpl.c index 4e3a20c168..aeb73784f2 100644 --- a/source/client/src/clientImpl.c +++ b/source/client/src/clientImpl.c @@ -1232,7 +1232,7 @@ STscObj* taosConnectImpl(const char* user, const char* auth, const char* db, __t int64_t transporterId = 0; asyncSendMsgToServer(pTscObj->pAppInfo->pTransporter, &pTscObj->pAppInfo->mgmtEp.epSet, &transporterId, body); - + tsem_wait(&pRequest->body.rspSem); if (pRequest->code != TSDB_CODE_SUCCESS) { const char* errorMsg = diff --git a/source/client/src/clientMain.c b/source/client/src/clientMain.c index 638369b1a3..e1df2a4540 100644 --- a/source/client/src/clientMain.c +++ b/source/client/src/clientMain.c @@ -55,6 +55,8 @@ void taos_cleanup(void) { return; } + tscStopCrashReport(); + int32_t id = clientReqRefPool; clientReqRefPool = -1; taosCloseRef(id); @@ -106,7 +108,7 @@ TAOS *taos_connect(const char *ip, const char *user, const char *pass, const cha if (pass == NULL) { pass = TSDB_DEFAULT_PASS; } - + STscObj *pObj = taos_connect_internal(ip, user, pass, NULL, db, port, CONN_TYPE__QUERY); if (pObj) { int64_t *rid = taosMemoryCalloc(1, sizeof(int64_t)); diff --git a/source/client/src/clientMsgHandler.c b/source/client/src/clientMsgHandler.c index ecd433e66e..2191c54315 100644 --- a/source/client/src/clientMsgHandler.c +++ b/source/client/src/clientMsgHandler.c @@ -119,6 +119,7 @@ int32_t processConnectRsp(void* param, SDataBuf* pMsg, int32_t code) { // update the appInstInfo pTscObj->pAppInfo->clusterId = connectRsp.clusterId; + lastClusterId = connectRsp.clusterId; pTscObj->connType = connectRsp.connType; diff --git a/source/common/src/tglobal.c b/source/common/src/tglobal.c index deefa65595..037c8a4541 100644 --- a/source/common/src/tglobal.c +++ b/source/common/src/tglobal.c @@ -60,6 +60,9 @@ int32_t tsElectInterval = 25 * 1000; int32_t tsHeartbeatInterval = 1000; int32_t tsHeartbeatTimeout = 20 * 1000; +// vnode +int64_t tsVndCommitMaxIntervalMs = 60 * 1000; + // monitor bool tsEnableMonitor = true; int32_t tsMonitorInterval = 30; @@ -73,6 +76,11 @@ bool tsEnableTelem = true; int32_t tsTelemInterval = 43200; char tsTelemServer[TSDB_FQDN_LEN] = "telemetry.taosdata.com"; uint16_t tsTelemPort = 80; +char* tsTelemUri = "/report"; + +bool tsEnableCrashReport = true; +char* tsClientCrashReportUri = "/ccrashreport"; +char* tsSvrCrashReportUri = "/dcrashreport"; // schemaless char tsSmlTagName[TSDB_COL_NAME_LEN] = "_tag_null"; @@ -202,7 +210,9 @@ int32_t taosSetTfsCfg(SConfig *pCfg) { int32_t taosSetTfsCfg(SConfig *pCfg); #endif -struct SConfig *taosGetCfg() { return tsCfg; } +struct SConfig *taosGetCfg() { + return tsCfg; +} static int32_t taosLoadCfg(SConfig *pCfg, const char **envCmd, const char *inputCfgDir, const char *envFile, char *apolloUrl) { @@ -314,6 +324,7 @@ static int32_t taosAddClientCfg(SConfig *pCfg) { if (cfgAddInt32(pCfg, "maxMemUsedByInsert", tsMaxMemUsedByInsert, 1, INT32_MAX, true) != 0) return -1; if (cfgAddInt32(pCfg, "maxRetryWaitTime", tsMaxRetryWaitTime, 0, 86400000, 0) != 0) return -1; if (cfgAddBool(pCfg, "useAdapter", tsUseAdapter, true) != 0) return -1; + if (cfgAddBool(pCfg, "crashReporting", tsEnableCrashReport, true) != 0) return -1; tsNumOfTaskQueueThreads = tsNumOfCores / 2; tsNumOfTaskQueueThreads = TMAX(tsNumOfTaskQueueThreads, 4); @@ -377,7 +388,7 @@ static int32_t taosAddServerCfg(SConfig *pCfg) { if (cfgAddInt32(pCfg, "queryRspPolicy", tsQueryRspPolicy, 0, 1, 0) != 0) return -1; tsNumOfRpcThreads = tsNumOfCores / 2; - tsNumOfRpcThreads = TRANGE(tsNumOfRpcThreads, 1, 4); + tsNumOfRpcThreads = TRANGE(tsNumOfRpcThreads, 1, TSDB_MAX_RPC_THREADS); if (cfgAddInt32(pCfg, "numOfRpcThreads", tsNumOfRpcThreads, 1, 1024, 0) != 0) return -1; tsNumOfCommitThreads = tsNumOfCores / 2; @@ -427,6 +438,8 @@ static int32_t taosAddServerCfg(SConfig *pCfg) { if (cfgAddInt32(pCfg, "syncHeartbeatInterval", tsHeartbeatInterval, 10, 1000 * 60 * 24 * 2, 0) != 0) return -1; if (cfgAddInt32(pCfg, "syncHeartbeatTimeout", tsHeartbeatTimeout, 10, 1000 * 60 * 24 * 2, 0) != 0) return -1; + if (cfgAddInt64(pCfg, "vndCommitMaxInterval", tsVndCommitMaxIntervalMs, 1000, 1000 * 60 * 60, 0) != 0) return -1; + if (cfgAddBool(pCfg, "monitor", tsEnableMonitor, 0) != 0) return -1; if (cfgAddInt32(pCfg, "monitorInterval", tsMonitorInterval, 1, 200000, 0) != 0) return -1; if (cfgAddString(pCfg, "monitorFqdn", tsMonitorFqdn, 0) != 0) return -1; @@ -434,6 +447,7 @@ static int32_t taosAddServerCfg(SConfig *pCfg) { if (cfgAddInt32(pCfg, "monitorMaxLogs", tsMonitorMaxLogs, 1, 1000000, 0) != 0) return -1; if (cfgAddBool(pCfg, "monitorComp", tsMonitorComp, 0) != 0) return -1; + if (cfgAddBool(pCfg, "crashReporting", tsEnableCrashReport, 0) != 0) return -1; if (cfgAddBool(pCfg, "telemetryReporting", tsEnableTelem, 0) != 0) return -1; if (cfgAddInt32(pCfg, "telemetryInterval", tsTelemInterval, 1, 200000, 0) != 0) return -1; if (cfgAddString(pCfg, "telemetryServer", tsTelemServer, 0) != 0) return -1; @@ -665,6 +679,7 @@ static int32_t taosSetClientCfg(SConfig *pCfg) { tsQueryUseNodeAllocator = cfgGetItem(pCfg, "queryUseNodeAllocator")->bval; tsKeepColumnName = cfgGetItem(pCfg, "keepColumnName")->bval; tsUseAdapter = cfgGetItem(pCfg, "useAdapter")->bval; + tsEnableCrashReport = cfgGetItem(pCfg, "crashReporting")->bval; tsMaxRetryWaitTime = cfgGetItem(pCfg, "maxRetryWaitTime")->i32; return 0; @@ -715,7 +730,7 @@ static int32_t taosSetServerCfg(SConfig *pCfg) { tsNumOfSnodeWriteThreads = cfgGetItem(pCfg, "numOfSnodeUniqueThreads")->i32; tsRpcQueueMemoryAllowed = cfgGetItem(pCfg, "rpcQueueMemoryAllowed")->i64; - tsSIMDBuiltins = (bool) cfgGetItem(pCfg, "SIMD-builtins")->bval; + tsSIMDBuiltins = (bool)cfgGetItem(pCfg, "SIMD-builtins")->bval; tsEnableMonitor = cfgGetItem(pCfg, "monitor")->bval; tsMonitorInterval = cfgGetItem(pCfg, "monitorInterval")->i32; @@ -726,6 +741,7 @@ static int32_t taosSetServerCfg(SConfig *pCfg) { tsQueryRspPolicy = cfgGetItem(pCfg, "queryRspPolicy")->i32; tsEnableTelem = cfgGetItem(pCfg, "telemetryReporting")->bval; + tsEnableCrashReport = cfgGetItem(pCfg, "crashReporting")->bval; tsTelemInterval = cfgGetItem(pCfg, "telemetryInterval")->i32; tstrncpy(tsTelemServer, cfgGetItem(pCfg, "telemetryServer")->str, TSDB_FQDN_LEN); tsTelemPort = (uint16_t)cfgGetItem(pCfg, "telemetryPort")->i32; @@ -743,6 +759,8 @@ static int32_t taosSetServerCfg(SConfig *pCfg) { tsHeartbeatInterval = cfgGetItem(pCfg, "syncHeartbeatInterval")->i32; tsHeartbeatTimeout = cfgGetItem(pCfg, "syncHeartbeatTimeout")->i32; + tsVndCommitMaxIntervalMs = cfgGetItem(pCfg, "vndCommitMaxInterval")->i64; + tsStartUdfd = cfgGetItem(pCfg, "udf")->bval; tstrncpy(tsUdfdResFuncs, cfgGetItem(pCfg, "udfdResFuncs")->str, sizeof(tsUdfdResFuncs)); tstrncpy(tsUdfdLdLibPath, cfgGetItem(pCfg, "udfdLdLibPath")->str, sizeof(tsUdfdLdLibPath)); @@ -795,6 +813,8 @@ int32_t taosSetCfg(SConfig *pCfg, char *name) { tsCountAlwaysReturnValue = cfgGetItem(pCfg, "countAlwaysReturnValue")->i32; } else if (strcasecmp("cDebugFlag", name) == 0) { cDebugFlag = cfgGetItem(pCfg, "cDebugFlag")->i32; + } else if (strcasecmp("crashReporting", name) == 0) { + tsEnableCrashReport = cfgGetItem(pCfg, "crashReporting")->bval; } break; } diff --git a/source/dnode/mgmt/exe/dmMain.c b/source/dnode/mgmt/exe/dmMain.c index d308d3e618..711280ea58 100644 --- a/source/dnode/mgmt/exe/dmMain.c +++ b/source/dnode/mgmt/exe/dmMain.c @@ -44,6 +44,7 @@ static struct { char apolloUrl[PATH_MAX]; const char **envCmd; SArray *pArgs; // SConfigPair + int64_t startTime; } global = {0}; static void dmSetDebugFlag(int32_t signum, void *sigInfo, void *context) { taosSetAllDebugFlag(143, true); } @@ -67,23 +68,67 @@ static void dmStopDnode(int signum, void *sigInfo, void *context) { dmStop(); } +void dmLogCrash(int signum, void *sigInfo, void *context) { + taosIgnSignal(SIGTERM); + taosIgnSignal(SIGHUP); + taosIgnSignal(SIGINT); + taosIgnSignal(SIGBREAK); + +#ifndef WINDOWS + taosIgnSignal(SIGBUS); +#endif + taosIgnSignal(SIGABRT); + taosIgnSignal(SIGFPE); + taosIgnSignal(SIGSEGV); + + char *pMsg = NULL; + const char *flags = "UTL FATAL "; + ELogLevel level = DEBUG_FATAL; + int32_t dflag = 255; + int64_t msgLen= -1; + + if (tsEnableCrashReport) { + if (taosGenCrashJsonMsg(signum, &pMsg, dmGetClusterId(), global.startTime)) { + taosPrintLog(flags, level, dflag, "failed to generate crash json msg"); + goto _return; + } else { + msgLen = strlen(pMsg); + } + } + +_return: + + taosLogCrashInfo("taosd", pMsg, msgLen, signum, sigInfo); + + exit(signum); +} + static void dmSetSignalHandle() { taosSetSignal(SIGUSR1, dmSetDebugFlag); taosSetSignal(SIGUSR2, dmSetAssert); taosSetSignal(SIGTERM, dmStopDnode); taosSetSignal(SIGHUP, dmStopDnode); taosSetSignal(SIGINT, dmStopDnode); - taosSetSignal(SIGABRT, dmStopDnode); taosSetSignal(SIGBREAK, dmStopDnode); #ifndef WINDOWS taosSetSignal(SIGTSTP, dmStopDnode); taosSetSignal(SIGQUIT, dmStopDnode); #endif + +#ifndef WINDOWS + taosSetSignal(SIGBUS, dmLogCrash); +#endif + taosSetSignal(SIGABRT, dmLogCrash); + taosSetSignal(SIGFPE, dmLogCrash); + taosSetSignal(SIGSEGV, dmLogCrash); } static int32_t dmParseArgs(int32_t argc, char const *argv[]) { + global.startTime = taosGetTimestampMs(); + int32_t cmdEnvIndex = 0; if (argc < 2) return 0; + global.envCmd = taosMemoryMalloc((argc - 1) * sizeof(char *)); memset(global.envCmd, 0, (argc - 1) * sizeof(char *)); for (int32_t i = 1; i < argc; ++i) { diff --git a/source/dnode/mgmt/mgmt_dnode/inc/dmInt.h b/source/dnode/mgmt/mgmt_dnode/inc/dmInt.h index c776beb3f0..ff32cbcb08 100644 --- a/source/dnode/mgmt/mgmt_dnode/inc/dmInt.h +++ b/source/dnode/mgmt/mgmt_dnode/inc/dmInt.h @@ -29,6 +29,7 @@ typedef struct SDnodeMgmt { const char *name; TdThread statusThread; TdThread monitorThread; + TdThread crashReportThread; SSingleWorker mgmtWorker; ProcessCreateNodeFp processCreateNodeFp; ProcessDropNodeFp processDropNodeFp; @@ -55,6 +56,8 @@ int32_t dmStartStatusThread(SDnodeMgmt *pMgmt); void dmStopStatusThread(SDnodeMgmt *pMgmt); int32_t dmStartMonitorThread(SDnodeMgmt *pMgmt); void dmStopMonitorThread(SDnodeMgmt *pMgmt); +int32_t dmStartCrashReportThread(SDnodeMgmt *pMgmt); +void dmStopCrashReportThread(SDnodeMgmt *pMgmt); int32_t dmStartWorker(SDnodeMgmt *pMgmt); void dmStopWorker(SDnodeMgmt *pMgmt); diff --git a/source/dnode/mgmt/mgmt_dnode/src/dmInt.c b/source/dnode/mgmt/mgmt_dnode/src/dmInt.c index d2db1a4a62..51df293ba7 100644 --- a/source/dnode/mgmt/mgmt_dnode/src/dmInt.c +++ b/source/dnode/mgmt/mgmt_dnode/src/dmInt.c @@ -23,6 +23,9 @@ static int32_t dmStartMgmt(SDnodeMgmt *pMgmt) { if (dmStartMonitorThread(pMgmt) != 0) { return -1; } + if (dmStartCrashReportThread(pMgmt) != 0) { + return -1; + } return 0; } @@ -30,6 +33,7 @@ static void dmStopMgmt(SDnodeMgmt *pMgmt) { pMgmt->pData->stopped = true; dmStopMonitorThread(pMgmt); dmStopStatusThread(pMgmt); + dmStopCrashReportThread(pMgmt); } static int32_t dmOpenMgmt(SMgmtInputOpt *pInput, SMgmtOutputOpt *pOutput) { diff --git a/source/dnode/mgmt/mgmt_dnode/src/dmWorker.c b/source/dnode/mgmt/mgmt_dnode/src/dmWorker.c index 80c040a5e8..76c8e09b70 100644 --- a/source/dnode/mgmt/mgmt_dnode/src/dmWorker.c +++ b/source/dnode/mgmt/mgmt_dnode/src/dmWorker.c @@ -15,6 +15,7 @@ #define _DEFAULT_SOURCE #include "dmInt.h" +#include "thttp.h" static void *dmStatusThreadFp(void *param) { SDnodeMgmt *pMgmt = param; @@ -63,6 +64,63 @@ static void *dmMonitorThreadFp(void *param) { return NULL; } +static void *dmCrashReportThreadFp(void *param) { + SDnodeMgmt *pMgmt = param; + int64_t lastTime = taosGetTimestampMs(); + setThreadName("dnode-crashReport"); + char filepath[PATH_MAX] = {0}; + snprintf(filepath, sizeof(filepath), "%s%s.taosdCrashLog", tsLogDir, TD_DIRSEP); + char *pMsg = NULL; + int64_t msgLen = 0; + TdFilePtr pFile = NULL; + bool truncateFile = false; + int32_t sleepTime = 200; + int32_t reportPeriodNum = 3600 * 1000 / sleepTime;; + int32_t loopTimes = reportPeriodNum; + + while (1) { + if (pMgmt->pData->dropped || pMgmt->pData->stopped) break; + if (loopTimes++ < reportPeriodNum) { + taosMsleep(sleepTime); + continue; + } + + taosReadCrashInfo(filepath, &pMsg, &msgLen, &pFile); + if (pMsg && msgLen > 0) { + if (taosSendHttpReport(tsTelemServer, tsSvrCrashReportUri, tsTelemPort, pMsg, msgLen, HTTP_FLAT) != 0) { + dError("failed to send crash report"); + if (pFile) { + taosReleaseCrashLogFile(pFile, false); + continue; + } + } else { + dInfo("succeed to send crash report"); + truncateFile = true; + } + } else { + dDebug("no crash info"); + } + + taosMemoryFree(pMsg); + + if (pMsg && msgLen > 0) { + pMsg = NULL; + continue; + } + + if (pFile) { + taosReleaseCrashLogFile(pFile, truncateFile); + truncateFile = false; + } + + taosMsleep(sleepTime); + loopTimes = 0; + } + + return NULL; +} + + int32_t dmStartStatusThread(SDnodeMgmt *pMgmt) { TdThreadAttr thAttr; taosThreadAttrInit(&thAttr); @@ -105,6 +163,36 @@ void dmStopMonitorThread(SDnodeMgmt *pMgmt) { } } +int32_t dmStartCrashReportThread(SDnodeMgmt *pMgmt) { + if (!tsEnableCrashReport) { + return 0; + } + + TdThreadAttr thAttr; + taosThreadAttrInit(&thAttr); + taosThreadAttrSetDetachState(&thAttr, PTHREAD_CREATE_JOINABLE); + if (taosThreadCreate(&pMgmt->crashReportThread, &thAttr, dmCrashReportThreadFp, pMgmt) != 0) { + dError("failed to create crashReport thread since %s", strerror(errno)); + return -1; + } + + taosThreadAttrDestroy(&thAttr); + tmsgReportStartup("dnode-crashReport", "initialized"); + return 0; +} + +void dmStopCrashReportThread(SDnodeMgmt *pMgmt) { + if (!tsEnableCrashReport) { + return; + } + + if (taosCheckPthreadValid(pMgmt->crashReportThread)) { + taosThreadJoin(pMgmt->crashReportThread, NULL); + taosThreadClear(&pMgmt->crashReportThread); + } +} + + static void dmProcessMgmtQueue(SQueueInfo *pInfo, SRpcMsg *pMsg) { SDnodeMgmt *pMgmt = pInfo->ahandle; int32_t code = -1; diff --git a/source/dnode/mgmt/mgmt_mnode/src/mmFile.c b/source/dnode/mgmt/mgmt_mnode/src/mmFile.c index f736ffd0c8..dd05fe673a 100644 --- a/source/dnode/mgmt/mgmt_mnode/src/mmFile.c +++ b/source/dnode/mgmt/mgmt_mnode/src/mmFile.c @@ -15,6 +15,7 @@ #define _DEFAULT_SOURCE #include "mmInt.h" +#include "tjson.h" int32_t mmReadFile(const char *path, SMnodeOpt *pOption) { int32_t code = TSDB_CODE_INVALID_JSON_FORMAT; @@ -130,56 +131,70 @@ _OVER: return code; } +static int32_t mmEncodeOption(SJson *pJson, const SMnodeOpt *pOption) { + if (pOption->deploy && pOption->numOfReplicas > 0) { + if (tjsonAddDoubleToObject(pJson, "selfIndex", pOption->selfIndex) < 0) return -1; + + SJson *replicas = tjsonCreateArray(); + if (replicas == NULL) return -1; + if (tjsonAddItemToObject(pJson, "replicas", replicas) < 0) return -1; + + for (int32_t i = 0; i < pOption->numOfReplicas; ++i) { + SJson *replica = tjsonCreateObject(); + if (replica == NULL) return -1; + + const SReplica *pReplica = pOption->replicas + i; + if (tjsonAddDoubleToObject(replica, "id", pReplica->id) < 0) return -1; + if (tjsonAddStringToObject(replica, "fqdn", pReplica->fqdn) < 0) return -1; + if (tjsonAddDoubleToObject(replica, "port", pReplica->port) < 0) return -1; + if (tjsonAddItemToArray(replicas, replica) < 0) return -1; + } + } + + if (tjsonAddDoubleToObject(pJson, "deployed", pOption->deploy) < 0) return -1; + + return 0; +} + int32_t mmWriteFile(const char *path, const SMnodeOpt *pOption) { - char file[PATH_MAX] = {0}; - char realfile[PATH_MAX] = {0}; + int32_t code = -1; + char *buffer = NULL; + SJson *pJson = NULL; + TdFilePtr pFile = NULL; + char file[PATH_MAX] = {0}; + char realfile[PATH_MAX] = {0}; snprintf(file, sizeof(file), "%s%smnode.json.bak", path, TD_DIRSEP); snprintf(realfile, sizeof(realfile), "%s%smnode.json", path, TD_DIRSEP); - TdFilePtr pFile = taosOpenFile(file, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_TRUNC); - if (pFile == NULL) { - terrno = TAOS_SYSTEM_ERROR(errno); - dError("failed to write %s since %s", file, terrstr()); - return -1; - } + terrno = TSDB_CODE_OUT_OF_MEMORY; + pJson = tjsonCreateObject(); + if (pJson == NULL) goto _OVER; + if (mmEncodeOption(pJson, pOption) != 0) goto _OVER; + buffer = tjsonToString(pJson); + if (buffer == NULL) goto _OVER; + terrno = 0; - int32_t len = 0; - int32_t maxLen = 4096; - char *content = taosMemoryCalloc(1, maxLen + 1); + pFile = taosOpenFile(file, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_TRUNC); + if (pFile == NULL) goto _OVER; - len += snprintf(content + len, maxLen - len, "{\n"); - if (pOption->deploy && pOption->numOfReplicas > 0) { - len += snprintf(content + len, maxLen - len, " \"selfIndex\": %d,\n", pOption->selfIndex); - len += snprintf(content + len, maxLen - len, " \"replicas\": [{\n"); + int32_t len = strlen(buffer); + if (taosWriteFile(pFile, buffer, len) <= 0) goto _OVER; + if (taosFsyncFile(pFile) < 0) goto _OVER; - for (int32_t i = 0; i < pOption->numOfReplicas; ++i) { - const SReplica *pReplica = pOption->replicas + i; - if (pReplica != NULL && pReplica->id > 0) { - len += snprintf(content + len, maxLen - len, " \"id\": %d,\n", pReplica->id); - len += snprintf(content + len, maxLen - len, " \"fqdn\": \"%s\",\n", pReplica->fqdn); - len += snprintf(content + len, maxLen - len, " \"port\": %u\n", pReplica->port); - } - if (i < pOption->numOfReplicas - 1) { - len += snprintf(content + len, maxLen - len, " },{\n"); - } else { - len += snprintf(content + len, maxLen - len, " }],\n"); - } - } - } - len += snprintf(content + len, maxLen - len, " \"deployed\": %d\n", pOption->deploy); - len += snprintf(content + len, maxLen - len, "}\n"); - - taosWriteFile(pFile, content, len); - taosFsyncFile(pFile); taosCloseFile(&pFile); - taosMemoryFree(content); + if (taosRenameFile(file, realfile) != 0) goto _OVER; - if (taosRenameFile(file, realfile) != 0) { - terrno = TAOS_SYSTEM_ERROR(errno); - dError("failed to rename %s since %s", file, terrstr()); - return -1; + code = 0; + dInfo("succeed to write mnode file:%s, deloyed:%d", realfile, pOption->deploy); + +_OVER: + if (pJson != NULL) tjsonDelete(pJson); + if (buffer != NULL) taosMemoryFree(buffer); + if (pFile != NULL) taosCloseFile(&pFile); + + if (code != 0) { + if (terrno == 0) terrno = TAOS_SYSTEM_ERROR(errno); + dError("failed to write mnode file:%s since %s, deloyed:%d", realfile, terrstr(), pOption->deploy); } - - dDebug("succeed to write %s, deployed:%d", realfile, pOption->deploy); - return 0; + return code; } diff --git a/source/dnode/mgmt/mgmt_vnode/src/vmFile.c b/source/dnode/mgmt/mgmt_vnode/src/vmFile.c index dc32054fd7..8337fb5d10 100644 --- a/source/dnode/mgmt/mgmt_vnode/src/vmFile.c +++ b/source/dnode/mgmt/mgmt_vnode/src/vmFile.c @@ -15,6 +15,7 @@ #define _DEFAULT_SOURCE #include "vmInt.h" +#include "tjson.h" #define MAX_CONTENT_LEN 2 * 1024 * 1024 @@ -144,65 +145,66 @@ _OVER: return code; } -int32_t vmWriteVnodeListToFile(SVnodeMgmt *pMgmt) { - int32_t code = 0; - char file[PATH_MAX] = {0}; - char realfile[PATH_MAX] = {0}; - snprintf(file, sizeof(file), "%s%svnodes.json.bak", pMgmt->path, TD_DIRSEP); - snprintf(realfile, sizeof(file), "%s%svnodes.json", pMgmt->path, TD_DIRSEP); +static int32_t vmEncodeVnodeList(SJson *pJson, SVnodeObj **ppVnodes, int32_t numOfVnodes) { + SJson *vnodes = tjsonCreateArray(); + if (vnodes == NULL) return -1; + if (tjsonAddItemToObject(pJson, "vnodes", vnodes) < 0) return -1; - TdFilePtr pFile = taosOpenFile(file, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_TRUNC); - if (pFile == NULL) { - terrno = TAOS_SYSTEM_ERROR(errno); - dError("failed to write %s since %s", file, terrstr()); - return -1; - } - - int32_t numOfVnodes = 0; - SVnodeObj **ppVnodes = vmGetVnodeListFromHash(pMgmt, &numOfVnodes); - if (ppVnodes == NULL) { - terrno = TSDB_CODE_OUT_OF_MEMORY; - code = -1; - dError("failed to write %s while get vnodelist", file); - goto _OVER; - } - - int32_t len = 0; - int32_t maxLen = MAX_CONTENT_LEN; - char *content = taosMemoryCalloc(1, maxLen + 1); - if (content == NULL) { - terrno = TSDB_CODE_OUT_OF_MEMORY; - code = -1; - dError("failed to write %s while malloc content", file); - goto _OVER; - } - - len += snprintf(content + len, maxLen - len, "{\n"); - len += snprintf(content + len, maxLen - len, " \"vnodes\": [\n"); for (int32_t i = 0; i < numOfVnodes; ++i) { SVnodeObj *pVnode = ppVnodes[i]; if (pVnode == NULL) continue; - len += snprintf(content + len, maxLen - len, " {\n"); - len += snprintf(content + len, maxLen - len, " \"vgId\": %d,\n", pVnode->vgId); - len += snprintf(content + len, maxLen - len, " \"dropped\": %d,\n", pVnode->dropped); - len += snprintf(content + len, maxLen - len, " \"vgVersion\": %d\n", pVnode->vgVersion); - if (i < numOfVnodes - 1) { - len += snprintf(content + len, maxLen - len, " },\n"); - } else { - len += snprintf(content + len, maxLen - len, " }\n"); - } + SJson *vnode = tjsonCreateObject(); + if (vnode == NULL) return -1; + if (tjsonAddDoubleToObject(vnode, "vgId", pVnode->vgId) < 0) return -1; + if (tjsonAddDoubleToObject(vnode, "dropped", pVnode->dropped) < 0) return -1; + if (tjsonAddDoubleToObject(vnode, "vgVersion", pVnode->vgVersion) < 0) return -1; + if (tjsonAddItemToArray(vnodes, vnode) < 0) return -1; } - len += snprintf(content + len, maxLen - len, " ]\n"); - len += snprintf(content + len, maxLen - len, "}\n"); + + return 0; +} + +int32_t vmWriteVnodeListToFile(SVnodeMgmt *pMgmt) { + int32_t code = -1; + char *buffer = NULL; + SJson *pJson = NULL; + TdFilePtr pFile = NULL; + SVnodeObj **ppVnodes = NULL; + char file[PATH_MAX] = {0}; + char realfile[PATH_MAX] = {0}; + snprintf(file, sizeof(file), "%s%svnodes.json.bak", pMgmt->path, TD_DIRSEP); + snprintf(realfile, sizeof(realfile), "%s%svnodes.json", pMgmt->path, TD_DIRSEP); + + int32_t numOfVnodes = 0; + ppVnodes = vmGetVnodeListFromHash(pMgmt, &numOfVnodes); + if (ppVnodes == NULL) goto _OVER; + + terrno = TSDB_CODE_OUT_OF_MEMORY; + pJson = tjsonCreateObject(); + if (pJson == NULL) goto _OVER; + if (vmEncodeVnodeList(pJson, ppVnodes, numOfVnodes) != 0) goto _OVER; + buffer = tjsonToString(pJson); + if (buffer == NULL) goto _OVER; terrno = 0; -_OVER: - taosWriteFile(pFile, content, len); - taosFsyncFile(pFile); - taosCloseFile(&pFile); - taosMemoryFree(content); + pFile = taosOpenFile(file, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_TRUNC); + if (pFile == NULL) goto _OVER; + int32_t len = strlen(buffer); + if (taosWriteFile(pFile, buffer, len) <= 0) goto _OVER; + if (taosFsyncFile(pFile) < 0) goto _OVER; + + taosCloseFile(&pFile); + if (taosRenameFile(file, realfile) != 0) goto _OVER; + + code = 0; + dInfo("succeed to write vnodes file:%s, vnodes:%d", realfile, numOfVnodes); + +_OVER: + if (pJson != NULL) tjsonDelete(pJson); + if (buffer != NULL) taosMemoryFree(buffer); + if (pFile != NULL) taosCloseFile(&pFile); if (ppVnodes != NULL) { for (int32_t i = 0; i < numOfVnodes; ++i) { SVnodeObj *pVnode = ppVnodes[i]; @@ -213,14 +215,9 @@ _OVER: taosMemoryFree(ppVnodes); } - if (code != 0) return -1; - - dInfo("succeed to write %s, numOfVnodes:%d", realfile, numOfVnodes); - code = taosRenameFile(file, realfile); - if (code != 0) { - dError("failed to rename %s to %s", file, realfile); + if (terrno == 0) terrno = TAOS_SYSTEM_ERROR(errno); + dError("failed to write vnodes file:%s since %s, vnodes:%d", realfile, terrstr(), numOfVnodes); } - return code; } \ No newline at end of file diff --git a/source/dnode/mgmt/mgmt_vnode/src/vmInt.c b/source/dnode/mgmt/mgmt_vnode/src/vmInt.c index 6b274261e5..693fe97daa 100644 --- a/source/dnode/mgmt/mgmt_vnode/src/vmInt.c +++ b/source/dnode/mgmt/mgmt_vnode/src/vmInt.c @@ -79,6 +79,8 @@ int32_t vmOpenVnode(SVnodeMgmt *pMgmt, SWrapperCfg *pCfg, SVnode *pImpl) { void vmCloseVnode(SVnodeMgmt *pMgmt, SVnodeObj *pVnode) { char path[TSDB_FILENAME_LEN] = {0}; + vnodeProposeCommitOnNeed(pVnode->pImpl); + taosThreadRwlockWrlock(&pMgmt->lock); taosHashRemove(pMgmt->hash, &pVnode->vgId, sizeof(int32_t)); taosThreadRwlockUnlock(&pMgmt->lock); diff --git a/source/dnode/mgmt/node_mgmt/inc/dmMgmt.h b/source/dnode/mgmt/node_mgmt/inc/dmMgmt.h index 7e85e6b722..02cd678433 100644 --- a/source/dnode/mgmt/node_mgmt/inc/dmMgmt.h +++ b/source/dnode/mgmt/node_mgmt/inc/dmMgmt.h @@ -85,6 +85,7 @@ typedef struct SDnode { // dmEnv.c SDnode *dmInstance(); void dmReportStartup(const char *pName, const char *pDesc); +int64_t dmGetClusterId(); // dmMgmt.c int32_t dmInitDnode(SDnode *pDnode); diff --git a/source/dnode/mgmt/node_mgmt/src/dmEnv.c b/source/dnode/mgmt/node_mgmt/src/dmEnv.c index e3bda5a3f0..1d0236c0c5 100644 --- a/source/dnode/mgmt/node_mgmt/src/dmEnv.c +++ b/source/dnode/mgmt/node_mgmt/src/dmEnv.c @@ -268,3 +268,8 @@ void dmReportStartup(const char *pName, const char *pDesc) { tstrncpy(pStartup->desc, pDesc, TSDB_STEP_DESC_LEN); dDebug("step:%s, %s", pStartup->name, pStartup->desc); } + +int64_t dmGetClusterId() { + return global.data.clusterId; +} + diff --git a/source/dnode/mgmt/node_util/src/dmEps.c b/source/dnode/mgmt/node_util/src/dmEps.c index a7a63fbaca..3e2d8b53aa 100644 --- a/source/dnode/mgmt/node_util/src/dmEps.c +++ b/source/dnode/mgmt/node_util/src/dmEps.c @@ -15,6 +15,7 @@ #define _DEFAULT_SOURCE #include "dmUtil.h" +#include "tjson.h" #include "tmisce.h" static void dmPrintEps(SDnodeData *pData); @@ -181,81 +182,73 @@ _OVER: return code; } -int32_t dmWriteEps(SDnodeData *pData) { - int32_t code = -1; - char *content = NULL; - TdFilePtr pFile = NULL; +static int32_t dmEncodeEps(SJson *pJson, SDnodeData *pData) { + if (tjsonAddDoubleToObject(pJson, "dnodeId", pData->dnodeId) < 0) return -1; + if (tjsonAddIntegerToObject(pJson, "dnodeVer", pData->dnodeVer) < 0) return -1; + if (tjsonAddIntegerToObject(pJson, "clusterId", pData->clusterId) < 0) return -1; + if (tjsonAddDoubleToObject(pJson, "dropped", pData->dropped) < 0) return -1; - char file[PATH_MAX] = {0}; - char realfile[PATH_MAX] = {0}; - snprintf(file, sizeof(file), "%s%sdnode%sdnode.json.bak", tsDataDir, TD_DIRSEP, TD_DIRSEP); - snprintf(realfile, sizeof(realfile), "%s%sdnode%sdnode.json", tsDataDir, TD_DIRSEP, TD_DIRSEP); - - pFile = taosOpenFile(file, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_TRUNC); - if (pFile == NULL) { - dError("failed to open %s since %s", file, strerror(errno)); - terrno = TAOS_SYSTEM_ERROR(errno); - goto _OVER; - } - - int32_t len = 0; - int32_t maxLen = 256 * 1024; - content = taosMemoryCalloc(1, maxLen + 1); - - len += snprintf(content + len, maxLen - len, "{\n"); - len += snprintf(content + len, maxLen - len, " \"dnodeId\": %d,\n", pData->dnodeId); - len += snprintf(content + len, maxLen - len, " \"dnodeVer\": \"%" PRId64 "\",\n", pData->dnodeVer); - len += snprintf(content + len, maxLen - len, " \"clusterId\": \"%" PRId64 "\",\n", pData->clusterId); - len += snprintf(content + len, maxLen - len, " \"dropped\": %d,\n", pData->dropped); - len += snprintf(content + len, maxLen - len, " \"dnodes\": [{\n"); + SJson *dnodes = tjsonCreateArray(); + if (dnodes == NULL) return -1; + if (tjsonAddItemToObject(pJson, "dnodes", dnodes) < 0) return -1; int32_t numOfEps = (int32_t)taosArrayGetSize(pData->dnodeEps); for (int32_t i = 0; i < numOfEps; ++i) { SDnodeEp *pDnodeEp = taosArrayGet(pData->dnodeEps, i); - len += snprintf(content + len, maxLen - len, " \"id\": %d,\n", pDnodeEp->id); - len += snprintf(content + len, maxLen - len, " \"fqdn\": \"%s\",\n", pDnodeEp->ep.fqdn); - len += snprintf(content + len, maxLen - len, " \"port\": %u,\n", pDnodeEp->ep.port); - len += snprintf(content + len, maxLen - len, " \"isMnode\": %d\n", pDnodeEp->isMnode); - if (i < numOfEps - 1) { - len += snprintf(content + len, maxLen - len, " },{\n"); - } else { - len += snprintf(content + len, maxLen - len, " }]\n"); - } - } - len += snprintf(content + len, maxLen - len, "}\n"); + SJson *dnode = tjsonCreateObject(); + if (dnode == NULL) return -1; - if (taosWriteFile(pFile, content, len) != len) { - dError("failed to write %s since %s", file, strerror(errno)); - terrno = TAOS_SYSTEM_ERROR(errno); - goto _OVER; + if (tjsonAddDoubleToObject(dnode, "id", pDnodeEp->id) < 0) return -1; + if (tjsonAddStringToObject(dnode, "fqdn", pDnodeEp->ep.fqdn) < 0) return -1; + if (tjsonAddDoubleToObject(dnode, "port", pDnodeEp->ep.port) < 0) return -1; + if (tjsonAddDoubleToObject(dnode, "isMnode", pDnodeEp->isMnode) < 0) return -1; + if (tjsonAddItemToArray(dnodes, dnode) < 0) return -1; } - if (taosFsyncFile(pFile) < 0) { - dError("failed to fsync %s since %s", file, strerror(errno)); - terrno = TAOS_SYSTEM_ERROR(errno); - goto _OVER; - } + return 0; +} + +int32_t dmWriteEps(SDnodeData *pData) { + int32_t code = -1; + char *buffer = NULL; + SJson *pJson = NULL; + TdFilePtr pFile = NULL; + char file[PATH_MAX] = {0}; + char realfile[PATH_MAX] = {0}; + snprintf(file, sizeof(file), "%s%sdnode%sdnode.json.bak", tsDataDir, TD_DIRSEP, TD_DIRSEP); + snprintf(realfile, sizeof(realfile), "%s%sdnode%sdnode.json", tsDataDir, TD_DIRSEP, TD_DIRSEP); + + terrno = TSDB_CODE_OUT_OF_MEMORY; + pJson = tjsonCreateObject(); + if (pJson == NULL) goto _OVER; + if (dmEncodeEps(pJson, pData) != 0) goto _OVER; + buffer = tjsonToString(pJson); + if (buffer == NULL) goto _OVER; + terrno = 0; + + pFile = taosOpenFile(file, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_TRUNC); + if (pFile == NULL) goto _OVER; + + int32_t len = strlen(buffer); + if (taosWriteFile(pFile, buffer, len) <= 0) goto _OVER; + if (taosFsyncFile(pFile) < 0) goto _OVER; taosCloseFile(&pFile); - taosMemoryFreeClear(content); - - if (taosRenameFile(file, realfile) != 0) { - terrno = TAOS_SYSTEM_ERROR(errno); - dError("failed to rename %s since %s", file, terrstr()); - goto _OVER; - } + if (taosRenameFile(file, realfile) != 0) goto _OVER; code = 0; pData->updateTime = taosGetTimestampMs(); - dInfo("succeed to write %s, dnodeVer:%" PRId64, realfile, pData->dnodeVer); + dInfo("succeed to write dnode file:%s, dnodeVer:%" PRId64, realfile, pData->dnodeVer); _OVER: - if (content != NULL) taosMemoryFreeClear(content); + if (pJson != NULL) tjsonDelete(pJson); + if (buffer != NULL) taosMemoryFree(buffer); if (pFile != NULL) taosCloseFile(&pFile); - if (code != 0) { - dError("failed to write file %s since %s", realfile, terrstr()); - } + if (code != 0) { + if (terrno == 0) terrno = TAOS_SYSTEM_ERROR(errno); + dInfo("succeed to write dnode file:%s since %s, dnodeVer:%" PRId64, realfile, terrstr(), pData->dnodeVer); + } return code; } diff --git a/source/dnode/mgmt/node_util/src/dmFile.c b/source/dnode/mgmt/node_util/src/dmFile.c index 2eb1462efc..4dcc962a20 100644 --- a/source/dnode/mgmt/node_util/src/dmFile.c +++ b/source/dnode/mgmt/node_util/src/dmFile.c @@ -15,6 +15,7 @@ #define _DEFAULT_SOURCE #include "dmUtil.h" +#include "tjson.h" #define MAXLEN 1024 @@ -63,56 +64,51 @@ _OVER: return code; } +static int32_t dmEncodeFile(SJson *pJson, bool deployed) { + if (tjsonAddDoubleToObject(pJson, "deployed", deployed) < 0) return -1; + return 0; +} + int32_t dmWriteFile(const char *path, const char *name, bool deployed) { int32_t code = -1; - int32_t len = 0; - char content[MAXLEN + 1] = {0}; + char *buffer = NULL; + SJson *pJson = NULL; + TdFilePtr pFile = NULL; char file[PATH_MAX] = {0}; char realfile[PATH_MAX] = {0}; - TdFilePtr pFile = NULL; - snprintf(file, sizeof(file), "%s%s%s.json", path, TD_DIRSEP, name); snprintf(realfile, sizeof(realfile), "%s%s%s.json", path, TD_DIRSEP, name); + terrno = TSDB_CODE_OUT_OF_MEMORY; + pJson = tjsonCreateObject(); + if (pJson == NULL) goto _OVER; + if (dmEncodeFile(pJson, deployed) != 0) goto _OVER; + buffer = tjsonToString(pJson); + if (buffer == NULL) goto _OVER; + terrno = 0; + pFile = taosOpenFile(file, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_TRUNC); - if (pFile == NULL) { - terrno = TAOS_SYSTEM_ERROR(errno); - dError("failed to write %s since %s", file, terrstr()); - goto _OVER; - } + if (pFile == NULL) goto _OVER; - len += snprintf(content + len, MAXLEN - len, "{\n"); - len += snprintf(content + len, MAXLEN - len, " \"deployed\": %d\n", deployed); - len += snprintf(content + len, MAXLEN - len, "}\n"); - - if (taosWriteFile(pFile, content, len) != len) { - terrno = TAOS_SYSTEM_ERROR(errno); - dError("failed to write file:%s since %s", file, terrstr()); - goto _OVER; - } - - if (taosFsyncFile(pFile) != 0) { - terrno = TAOS_SYSTEM_ERROR(errno); - dError("failed to fsync file:%s since %s", file, terrstr()); - goto _OVER; - } + int32_t len = strlen(buffer); + if (taosWriteFile(pFile, buffer, len) <= 0) goto _OVER; + if (taosFsyncFile(pFile) < 0) goto _OVER; taosCloseFile(&pFile); + if (taosRenameFile(file, realfile) != 0) goto _OVER; - if (taosRenameFile(file, realfile) != 0) { - terrno = TAOS_SYSTEM_ERROR(errno); - dError("failed to rename %s since %s", file, terrstr()); - return -1; - } - - dInfo("succeed to write %s, deployed:%d", realfile, deployed); code = 0; + dInfo("succeed to write file:%s, deloyed:%d", realfile, deployed); _OVER: - if (pFile != NULL) { - taosCloseFile(&pFile); - } + if (pJson != NULL) tjsonDelete(pJson); + if (buffer != NULL) taosMemoryFree(buffer); + if (pFile != NULL) taosCloseFile(&pFile); + if (code != 0) { + if (terrno == 0) terrno = TAOS_SYSTEM_ERROR(errno); + dError("failed to write file:%s since %s, deloyed:%d", realfile, terrstr(), deployed); + } return code; } diff --git a/source/dnode/mnode/impl/inc/mndUser.h b/source/dnode/mnode/impl/inc/mndUser.h index cf7deba397..8943ba703e 100644 --- a/source/dnode/mnode/impl/inc/mndUser.h +++ b/source/dnode/mnode/impl/inc/mndUser.h @@ -34,6 +34,8 @@ SHashObj *mndDupDbHash(SHashObj *pOld); SHashObj *mndDupTopicHash(SHashObj *pOld); int32_t mndValidateUserAuthInfo(SMnode *pMnode, SUserAuthVersion *pUsers, int32_t numOfUses, void **ppRsp, int32_t *pRspLen); +int32_t mndUserRemoveDb(SMnode *pMnode, STrans *pTrans, char *db); +int32_t mndUserRemoveTopic(SMnode *pMnode, STrans *pTrans, char *topic); #ifdef __cplusplus } diff --git a/source/dnode/mnode/impl/src/mndDb.c b/source/dnode/mnode/impl/src/mndDb.c index 87b5a5c42d..7e5c29d56f 100644 --- a/source/dnode/mnode/impl/src/mndDb.c +++ b/source/dnode/mnode/impl/src/mndDb.c @@ -1051,17 +1051,7 @@ static int32_t mndDropDb(SMnode *pMnode, SRpcMsg *pReq, SDbObj *pDb) { if (mndDropStreamByDb(pMnode, pTrans, pDb) != 0) goto _OVER; if (mndDropSmasByDb(pMnode, pTrans, pDb) != 0) goto _OVER; if (mndSetDropDbRedoActions(pMnode, pTrans, pDb) != 0) goto _OVER; - - SUserObj *pUser = mndAcquireUser(pMnode, pDb->createUser); - if (pUser != NULL) { - pUser->authVersion++; - SSdbRaw *pCommitRaw = mndUserActionEncode(pUser); - if (pCommitRaw == NULL || mndTransAppendCommitlog(pTrans, pCommitRaw) != 0) { - mError("trans:%d, failed to append redo log since %s", pTrans->id, terrstr()); - goto _OVER; - } - (void)sdbSetRawStatus(pCommitRaw, SDB_STATUS_READY); - } + if (mndUserRemoveDb(pMnode, pTrans, pDb->name) != 0) goto _OVER; int32_t rspLen = 0; void *pRsp = NULL; diff --git a/source/dnode/mnode/impl/src/mndMnode.c b/source/dnode/mnode/impl/src/mndMnode.c index c8c8e06c5e..9b3934c40c 100644 --- a/source/dnode/mnode/impl/src/mndMnode.c +++ b/source/dnode/mnode/impl/src/mndMnode.c @@ -785,9 +785,9 @@ static void mndReloadSyncConfig(SMnode *pMnode) { int32_t code = syncReconfig(pMnode->syncMgmt.sync, &cfg); if (code != 0) { - mError("vgId:1, failed to reconfig mnode sync since %s", terrstr()); + mError("vgId:1, mnode sync reconfig failed since %s", terrstr()); } else { - mInfo("vgId:1, reconfig mnode sync success"); + mInfo("vgId:1, mnode sync reconfig success"); } } } diff --git a/source/dnode/mnode/impl/src/mndTelem.c b/source/dnode/mnode/impl/src/mndTelem.c index b0b49b42dc..679fafa28d 100644 --- a/source/dnode/mnode/impl/src/mndTelem.c +++ b/source/dnode/mnode/impl/src/mndTelem.c @@ -131,7 +131,7 @@ static int32_t mndProcessTelemTimer(SRpcMsg* pReq) { taosThreadMutexUnlock(&pMgmt->lock); if (pCont != NULL) { - if (taosSendHttpReport(tsTelemServer, tsTelemPort, pCont, strlen(pCont), HTTP_FLAT) != 0) { + if (taosSendHttpReport(tsTelemServer, tsTelemUri, tsTelemPort, pCont, strlen(pCont), HTTP_FLAT) != 0) { mError("failed to send telemetry report"); } else { mInfo("succeed to send telemetry report"); diff --git a/source/dnode/mnode/impl/src/mndTopic.c b/source/dnode/mnode/impl/src/mndTopic.c index ce04c72344..96dba24566 100644 --- a/source/dnode/mnode/impl/src/mndTopic.c +++ b/source/dnode/mnode/impl/src/mndTopic.c @@ -604,22 +604,19 @@ _OVER: } static int32_t mndDropTopic(SMnode *pMnode, STrans *pTrans, SRpcMsg *pReq, SMqTopicObj *pTopic) { + int32_t code = -1; + if (mndUserRemoveTopic(pMnode, pTrans, pTopic->name) != 0) goto _OVER; + SSdbRaw *pCommitRaw = mndTopicActionEncode(pTopic); - if (pCommitRaw == NULL || mndTransAppendCommitlog(pTrans, pCommitRaw) != 0) { - mError("trans:%d, failed to append commit log since %s", pTrans->id, terrstr()); - mndTransDrop(pTrans); - return -1; - } + if (pCommitRaw == NULL || mndTransAppendCommitlog(pTrans, pCommitRaw) != 0) goto _OVER; (void)sdbSetRawStatus(pCommitRaw, SDB_STATUS_DROPPED); - if (mndTransPrepare(pMnode, pTrans) != 0) { - mError("trans:%d, failed to prepare since %s", pTrans->id, terrstr()); - mndTransDrop(pTrans); - return -1; - } + if (mndTransPrepare(pMnode, pTrans) != 0) goto _OVER; + code = 0; +_OVER: mndTransDrop(pTrans); - return 0; + return code; } static int32_t mndProcessDropTopicReq(SRpcMsg *pReq) { @@ -885,6 +882,7 @@ int32_t mndCheckTopicExist(SMnode *pMnode, SDbObj *pDb) { return 0; } +#if 0 int32_t mndDropTopicByDB(SMnode *pMnode, STrans *pTrans, SDbObj *pDb) { int32_t code = 0; SSdb *pSdb = pMnode->pSdb; @@ -912,3 +910,4 @@ int32_t mndDropTopicByDB(SMnode *pMnode, STrans *pTrans, SDbObj *pDb) { return code; } +#endif \ No newline at end of file diff --git a/source/dnode/mnode/impl/src/mndUser.c b/source/dnode/mnode/impl/src/mndUser.c index 85a92c7aef..b965e13316 100644 --- a/source/dnode/mnode/impl/src/mndUser.c +++ b/source/dnode/mnode/impl/src/mndUser.c @@ -285,14 +285,35 @@ static int32_t mndUserActionInsert(SSdb *pSdb, SUserObj *pUser) { return 0; } -static int32_t mndUserActionDelete(SSdb *pSdb, SUserObj *pUser) { - mTrace("user:%s, perform delete action, row:%p", pUser->user, pUser); +static int32_t mndUserDupObj(SUserObj *pUser, SUserObj *pNew) { + memcpy(pNew, pUser, sizeof(SUserObj)); + pNew->authVersion++; + pNew->updateTime = taosGetTimestampMs(); + + taosRLockLatch(&pUser->lock); + pNew->readDbs = mndDupDbHash(pUser->readDbs); + pNew->writeDbs = mndDupDbHash(pUser->writeDbs); + pNew->topics = mndDupTopicHash(pUser->topics); + taosRUnLockLatch(&pUser->lock); + + if (pNew->readDbs == NULL || pNew->writeDbs == NULL || pNew->topics == NULL) { + return -1; + } + return 0; +} + +static void mndUserFreeObj(SUserObj *pUser) { taosHashCleanup(pUser->readDbs); taosHashCleanup(pUser->writeDbs); taosHashCleanup(pUser->topics); pUser->readDbs = NULL; pUser->writeDbs = NULL; pUser->topics = NULL; +} + +static int32_t mndUserActionDelete(SSdb *pSdb, SUserObj *pUser) { + mTrace("user:%s, perform delete action, row:%p", pUser->user, pUser); + mndUserFreeObj(pUser); return 0; } @@ -516,19 +537,7 @@ static int32_t mndProcessAlterUserReq(SRpcMsg *pReq) { goto _OVER; } - memcpy(&newUser, pUser, sizeof(SUserObj)); - newUser.authVersion++; - newUser.updateTime = taosGetTimestampMs(); - - taosRLockLatch(&pUser->lock); - newUser.readDbs = mndDupDbHash(pUser->readDbs); - newUser.writeDbs = mndDupDbHash(pUser->writeDbs); - newUser.topics = mndDupTopicHash(pUser->topics); - taosRUnLockLatch(&pUser->lock); - - if (newUser.readDbs == NULL || newUser.writeDbs == NULL || newUser.topics == NULL) { - goto _OVER; - } + if (mndUserDupObj(pUser, &newUser) != 0) goto _OVER; if (alterReq.alterType == TSDB_ALTER_USER_PASSWD) { char pass[TSDB_PASSWORD_LEN + 1] = {0}; @@ -654,9 +663,7 @@ _OVER: mndReleaseUser(pMnode, pOperUser); mndReleaseUser(pMnode, pUser); - taosHashCleanup(newUser.writeDbs); - taosHashCleanup(newUser.readDbs); - taosHashCleanup(newUser.topics); + mndUserFreeObj(&newUser); return code; } @@ -1007,3 +1014,74 @@ _OVER: tFreeSUserAuthBatchRsp(&batchRsp); return code; } + +int32_t mndUserRemoveDb(SMnode *pMnode, STrans *pTrans, char *db) { + int32_t code = 0; + SSdb *pSdb = pMnode->pSdb; + int32_t len = strlen(db) + 1; + void *pIter = NULL; + SUserObj *pUser = NULL; + SUserObj newUser = {0}; + + while (1) { + pIter = sdbFetch(pSdb, SDB_USER, pIter, (void **)&pUser); + if (pIter == NULL) break; + + code = -1; + if (mndUserDupObj(pUser, &newUser) != 0) break; + + bool inRead = (taosHashGet(newUser.readDbs, db, len) != NULL); + bool inWrite = (taosHashGet(newUser.writeDbs, db, len) != NULL); + if (inRead || inWrite) { + (void)taosHashRemove(newUser.readDbs, db, len); + (void)taosHashRemove(newUser.writeDbs, db, len); + + SSdbRaw *pCommitRaw = mndUserActionEncode(&newUser); + if (pCommitRaw == NULL || mndTransAppendCommitlog(pTrans, pCommitRaw) != 0) break; + (void)sdbSetRawStatus(pCommitRaw, SDB_STATUS_READY); + } + + mndUserFreeObj(&newUser); + sdbRelease(pSdb, pUser); + code = 0; + } + + if (pUser != NULL) sdbRelease(pSdb, pUser); + if (pIter != NULL) sdbCancelFetch(pSdb, pIter); + mndUserFreeObj(&newUser); + return code; +} + +int32_t mndUserRemoveTopic(SMnode *pMnode, STrans *pTrans, char *topic) { + int32_t code = 0; + SSdb *pSdb = pMnode->pSdb; + int32_t len = strlen(topic) + 1; + void *pIter = NULL; + SUserObj *pUser = NULL; + SUserObj newUser = {0}; + + while (1) { + pIter = sdbFetch(pSdb, SDB_USER, pIter, (void **)&pUser); + if (pIter == NULL) break; + + code = -1; + if (mndUserDupObj(pUser, &newUser) != 0) break; + + bool inTopic = (taosHashGet(newUser.topics, topic, len) != NULL); + if (inTopic) { + (void)taosHashRemove(newUser.topics, topic, len); + SSdbRaw *pCommitRaw = mndUserActionEncode(&newUser); + if (pCommitRaw == NULL || mndTransAppendCommitlog(pTrans, pCommitRaw) != 0) break; + (void)sdbSetRawStatus(pCommitRaw, SDB_STATUS_READY); + } + + mndUserFreeObj(&newUser); + sdbRelease(pSdb, pUser); + code = 0; + } + + if (pUser != NULL) sdbRelease(pSdb, pUser); + if (pIter != NULL) sdbCancelFetch(pSdb, pIter); + mndUserFreeObj(&newUser); + return code; +} diff --git a/source/dnode/mnode/sdb/src/sdbFile.c b/source/dnode/mnode/sdb/src/sdbFile.c index f43b6bdb25..9e830b83e6 100644 --- a/source/dnode/mnode/sdb/src/sdbFile.c +++ b/source/dnode/mnode/sdb/src/sdbFile.c @@ -243,7 +243,7 @@ static int32_t sdbReadFileImp(SSdb *pSdb) { if (pFile == NULL) { taosMemoryFree(pRaw); terrno = TAOS_SYSTEM_ERROR(errno); - mDebug("failed to read sdb file:%s since %s", file, terrstr()); + mInfo("read sdb file:%s finished since %s", file, terrstr()); return 0; } @@ -636,15 +636,20 @@ int32_t sdbStartWrite(SSdb *pSdb, SSdbIter **ppIter) { } int32_t sdbStopWrite(SSdb *pSdb, SSdbIter *pIter, bool isApply, int64_t index, int64_t term, int64_t config) { - int32_t code = 0; + int32_t code = -1; if (!isApply) { mInfo("sdbiter:%p, not apply to sdb", pIter); - sdbCloseIter(pIter); - return 0; + code = 0; + goto _OVER; + } + + if (taosFsyncFile(pIter->file) != 0) { + terrno = TAOS_SYSTEM_ERROR(errno); + mError("sdbiter:%p, failed to fasync file %s since %s", pIter, pIter->name, terrstr()); + goto _OVER; } - taosFsyncFile(pIter->file); taosCloseFile(&pIter->file); pIter->file = NULL; @@ -653,14 +658,12 @@ int32_t sdbStopWrite(SSdb *pSdb, SSdbIter *pIter, bool isApply, int64_t index, i if (taosRenameFile(pIter->name, datafile) != 0) { terrno = TAOS_SYSTEM_ERROR(errno); mError("sdbiter:%p, failed to rename file %s to %s since %s", pIter, pIter->name, datafile, terrstr()); - sdbCloseIter(pIter); - return -1; + goto _OVER; } if (sdbReadFile(pSdb) != 0) { mError("sdbiter:%p, failed to read from %s since %s", pIter, datafile, terrstr()); - sdbCloseIter(pIter); - return -1; + goto _OVER; } if (config > 0) { @@ -674,8 +677,11 @@ int32_t sdbStopWrite(SSdb *pSdb, SSdbIter *pIter, bool isApply, int64_t index, i } mInfo("sdbiter:%p, success applyed to sdb", pIter); + code = 0; + +_OVER: sdbCloseIter(pIter); - return 0; + return code; } int32_t sdbDoWrite(SSdb *pSdb, SSdbIter *pIter, void *pBuf, int32_t len) { diff --git a/source/dnode/snode/src/snode.c b/source/dnode/snode/src/snode.c index b133226ed3..860db20fa8 100644 --- a/source/dnode/snode/src/snode.c +++ b/source/dnode/snode/src/snode.c @@ -124,6 +124,7 @@ FAIL: } void sndClose(SSnode *pSnode) { + streamMetaCommit(pSnode->pMeta); streamMetaClose(pSnode->pMeta); taosMemoryFree(pSnode->path); taosMemoryFree(pSnode); diff --git a/source/dnode/vnode/inc/vnode.h b/source/dnode/vnode/inc/vnode.h index 4ae25c33e4..b1093e0f23 100644 --- a/source/dnode/vnode/inc/vnode.h +++ b/source/dnode/vnode/inc/vnode.h @@ -89,6 +89,7 @@ int32_t vnodeProcessQueryMsg(SVnode *pVnode, SRpcMsg *pMsg); int32_t vnodeProcessFetchMsg(SVnode *pVnode, SRpcMsg *pMsg, SQueueInfo *pInfo); void vnodeProposeWriteMsg(SQueueInfo *pInfo, STaosQall *qall, int32_t numOfMsgs); void vnodeApplyWriteMsg(SQueueInfo *pInfo, STaosQall *qall, int32_t numOfMsgs); +void vnodeProposeCommitOnNeed(SVnode *pVnode); // meta typedef struct SMeta SMeta; // todo: remove diff --git a/source/dnode/vnode/src/inc/vnd.h b/source/dnode/vnode/src/inc/vnd.h index 93750ed585..e075d8a6ca 100644 --- a/source/dnode/vnode/src/inc/vnd.h +++ b/source/dnode/vnode/src/inc/vnd.h @@ -86,6 +86,7 @@ int32_t vnodeGetBatchMeta(SVnode* pVnode, SRpcMsg* pMsg); // vnodeCommit.c int32_t vnodeBegin(SVnode* pVnode); int32_t vnodeShouldCommit(SVnode* pVnode); +void vnodeUpdCommitSched(SVnode* pVnode); void vnodeRollback(SVnode* pVnode); int32_t vnodeSaveInfo(const char* dir, const SVnodeInfo* pCfg); int32_t vnodeCommitInfo(const char* dir, const SVnodeInfo* pInfo); @@ -103,6 +104,7 @@ void vnodeSyncClose(SVnode* pVnode); void vnodeRedirectRpcMsg(SVnode* pVnode, SRpcMsg* pMsg, int32_t code); bool vnodeIsLeader(SVnode* pVnode); bool vnodeIsRoleLeader(SVnode* pVnode); +int vnodeShouldCommit(SVnode* pVnode); #ifdef __cplusplus } diff --git a/source/dnode/vnode/src/inc/vnodeInt.h b/source/dnode/vnode/src/inc/vnodeInt.h index e7f0a6d297..81873a38bc 100644 --- a/source/dnode/vnode/src/inc/vnodeInt.h +++ b/source/dnode/vnode/src/inc/vnodeInt.h @@ -325,6 +325,11 @@ struct STsdbKeepCfg { int32_t keep2; }; +typedef struct SVCommitSched { + int64_t commitMs; + int64_t maxWaitMs; +} SVCommitSched; + struct SVnode { char* path; SVnodeCfg config; @@ -343,6 +348,7 @@ struct SVnode { STQ* pTq; SSink* pSink; tsem_t canCommit; + SVCommitSched commitSched; int64_t sync; TdThreadMutex lock; bool blocked; diff --git a/source/dnode/vnode/src/meta/metaOpen.c b/source/dnode/vnode/src/meta/metaOpen.c index a78239eb56..609ffc58c3 100644 --- a/source/dnode/vnode/src/meta/metaOpen.c +++ b/source/dnode/vnode/src/meta/metaOpen.c @@ -203,6 +203,7 @@ _err: int metaClose(SMeta *pMeta) { if (pMeta) { + if (pMeta->pEnv) tdbAbort(pMeta->pEnv, pMeta->txn); if (pMeta->pCache) metaCacheClose(pMeta); if (pMeta->pIdx) metaCloseIdx(pMeta); if (pMeta->pStreamDb) tdbTbClose(pMeta->pStreamDb); diff --git a/source/dnode/vnode/src/sma/smaCommit.c b/source/dnode/vnode/src/sma/smaCommit.c index 0aa20e9e1d..38f04bb8e8 100644 --- a/source/dnode/vnode/src/sma/smaCommit.c +++ b/source/dnode/vnode/src/sma/smaCommit.c @@ -239,6 +239,11 @@ static int32_t tdProcessRSmaAsyncCommitImpl(SSma *pSma, SCommitInfo *pInfo) { int32_t lino = 0; SVnode *pVnode = pSma->pVnode; + SSmaEnv *pSmaEnv = SMA_RSMA_ENV(pSma); + if (!pSmaEnv) { + goto _exit; + } + code = tdRSmaFSCommit(pSma); TSDB_CHECK_CODE(code, lino, _exit); diff --git a/source/dnode/vnode/src/tq/tqCommit.c b/source/dnode/vnode/src/tq/tqCommit.c index dabd97a345..7fc66c4919 100644 --- a/source/dnode/vnode/src/tq/tqCommit.c +++ b/source/dnode/vnode/src/tq/tqCommit.c @@ -15,4 +15,11 @@ #include "tq.h" -int tqCommit(STQ* pTq) { return tqOffsetCommitFile(pTq->pOffsetStore); } +int tqCommit(STQ* pTq) { + if (streamMetaCommit(pTq->pStreamMeta) < 0) { + tqError("vgId:%d, failed to commit stream meta since %s", TD_VID(pTq->pVnode), terrstr()); + return -1; + } + + return tqOffsetCommitFile(pTq->pOffsetStore); +} diff --git a/source/dnode/vnode/src/tsdb/tsdbCache.c b/source/dnode/vnode/src/tsdb/tsdbCache.c index c7b36ebb22..d18014c37b 100644 --- a/source/dnode/vnode/src/tsdb/tsdbCache.c +++ b/source/dnode/vnode/src/tsdb/tsdbCache.c @@ -1153,6 +1153,8 @@ static int32_t nextRowIterGet(CacheNextRowIter *pIter, TSDBROW **ppRow) { iMax[nMax] = i; max[nMax++] = pIter->input[i].pRow; + } else { + pIter->input[i].next = false; } } } diff --git a/source/dnode/vnode/src/tsdb/tsdbRead.c b/source/dnode/vnode/src/tsdb/tsdbRead.c index 87b5e59bc0..e658d10b3f 100644 --- a/source/dnode/vnode/src/tsdb/tsdbRead.c +++ b/source/dnode/vnode/src/tsdb/tsdbRead.c @@ -2421,6 +2421,19 @@ static int32_t loadNeighborIfOverlap(SFileDataBlockInfo* pBlockInfo, STableBlock return code; } +static void updateComposedBlockInfo(STsdbReader* pReader, double el, STableBlockScanInfo* pBlockScanInfo) { + SSDataBlock* pResBlock = pReader->pResBlock; + + pResBlock->info.id.uid = (pBlockScanInfo != NULL) ? pBlockScanInfo->uid : 0; + pResBlock->info.dataLoad = 1; + blockDataUpdateTsWindow(pResBlock, pReader->suppInfo.slotId[0]); + + setComposedBlockFlag(pReader, true); + + pReader->cost.composedBlocks += 1; + pReader->cost.buildComposedBlockTime += el; +} + static int32_t buildComposedDataBlock(STsdbReader* pReader) { int32_t code = TSDB_CODE_SUCCESS; @@ -2432,6 +2445,7 @@ static int32_t buildComposedDataBlock(STsdbReader* pReader) { bool asc = ASCENDING_TRAVERSE(pReader->order); int64_t st = taosGetTimestampUs(); int32_t step = asc ? 1 : -1; + double el = 0; STableBlockScanInfo* pBlockScanInfo = NULL; if (pBlockInfo != NULL) { @@ -2494,10 +2508,8 @@ static int32_t buildComposedDataBlock(STsdbReader* pReader) { } } - bool hasBlockLData = hasDataInLastBlock(pLastBlockReader); - // no data in last block and block, no need to proceed. - if ((hasBlockData == false) && (hasBlockLData == false)) { + if (hasBlockData == false) { break; } @@ -2516,15 +2528,8 @@ static int32_t buildComposedDataBlock(STsdbReader* pReader) { } _end: - pResBlock->info.id.uid = (pBlockScanInfo != NULL) ? pBlockScanInfo->uid : 0; - pResBlock->info.dataLoad = 1; - blockDataUpdateTsWindow(pResBlock, pReader->suppInfo.slotId[0]); - - setComposedBlockFlag(pReader, true); - double el = (taosGetTimestampUs() - st) / 1000.0; - - pReader->cost.composedBlocks += 1; - pReader->cost.buildComposedBlockTime += el; + el = (taosGetTimestampUs() - st) / 1000.0; + updateComposedBlockInfo(pReader, el, pBlockScanInfo); if (pResBlock->info.rows > 0) { tsdbDebug("%p uid:%" PRIu64 ", composed data block created, brange:%" PRIu64 "-%" PRIu64 @@ -2768,6 +2773,8 @@ static int32_t doLoadLastBlockSequentially(STsdbReader* pReader) { return code; } + SSDataBlock* pResBlock = pReader->pResBlock; + while (1) { // load the last data block of current table STableBlockScanInfo* pScanInfo = *(STableBlockScanInfo**)pStatus->pTableIter; @@ -2778,15 +2785,33 @@ static int32_t doLoadLastBlockSequentially(STsdbReader* pReader) { if (!hasNexTable) { return TSDB_CODE_SUCCESS; } + continue; } - code = doBuildDataBlock(pReader); - if (code != TSDB_CODE_SUCCESS) { - return code; + int64_t st = taosGetTimestampUs(); + while (1) { + bool hasBlockLData = hasDataInLastBlock(pLastBlockReader); + + // no data in last block and block, no need to proceed. + if (hasBlockLData == false) { + break; + } + + buildComposedDataBlockImpl(pReader, pScanInfo, &pReader->status.fileBlockData, pLastBlockReader); + if (pResBlock->info.rows >= pReader->capacity) { + break; + } } - if (pReader->pResBlock->info.rows > 0) { + double el = (taosGetTimestampUs() - st) / 1000.0; + updateComposedBlockInfo(pReader, el, pScanInfo); + + if (pResBlock->info.rows > 0) { + tsdbDebug("%p uid:%" PRIu64 ", composed data block created, brange:%" PRIu64 "-%" PRIu64 + " rows:%d, elapsed time:%.2f ms %s", + pReader, pResBlock->info.id.uid, pResBlock->info.window.skey, pResBlock->info.window.ekey, + pResBlock->info.rows, el, pReader->idStr); return TSDB_CODE_SUCCESS; } diff --git a/source/dnode/vnode/src/tsdb/tsdbSnapshot.c b/source/dnode/vnode/src/tsdb/tsdbSnapshot.c index 1fc8049d00..5c7bfee5a7 100644 --- a/source/dnode/vnode/src/tsdb/tsdbSnapshot.c +++ b/source/dnode/vnode/src/tsdb/tsdbSnapshot.c @@ -192,6 +192,7 @@ static int32_t tsdbSnapNextRow(STsdbSnapReader* pReader) { int64_t rowVer = pIter->bData.aVersion[pIter->iRow]; if (rowVer >= pReader->sver && rowVer <= pReader->ever) { + pIter->rInfo.suid = pIter->bData.suid; pIter->rInfo.uid = pIter->bData.uid ? pIter->bData.uid : pIter->bData.aUid[pIter->iRow]; pIter->rInfo.row = tsdbRowFromBlockData(&pIter->bData, pIter->iRow); goto _out; diff --git a/source/dnode/vnode/src/vnd/vnodeCommit.c b/source/dnode/vnode/src/vnd/vnodeCommit.c index ab7f73121f..b42bdf564d 100644 --- a/source/dnode/vnode/src/vnd/vnodeCommit.c +++ b/source/dnode/vnode/src/vnd/vnodeCommit.c @@ -58,7 +58,25 @@ int vnodeBegin(SVnode *pVnode) { return 0; } +void vnodeUpdCommitSched(SVnode *pVnode) { + int64_t randNum = taosRand(); + pVnode->commitSched.commitMs = taosGetMonoTimestampMs(); + pVnode->commitSched.maxWaitMs = tsVndCommitMaxIntervalMs + (randNum % tsVndCommitMaxIntervalMs); +} + int vnodeShouldCommit(SVnode *pVnode) { + if (!pVnode->inUse || !osDataSpaceAvailable()) { + return false; + } + + SVCommitSched *pSched = &pVnode->commitSched; + int64_t nowMs = taosGetMonoTimestampMs(); + + return (((pVnode->inUse->size > pVnode->inUse->node.size) && (pSched->commitMs + SYNC_VND_COMMIT_MIN_MS < nowMs)) || + (pVnode->inUse->size > 0 && pSched->commitMs + pSched->maxWaitMs < nowMs)); +} + +int vnodeShouldCommitOld(SVnode *pVnode) { if (pVnode->inUse) { return osDataSpaceAvailable() && (pVnode->inUse->size > pVnode->inUse->node.size); } @@ -249,6 +267,7 @@ _exit: taosMemoryFree(pInfo); return code; } + int vnodeAsyncCommit(SVnode *pVnode) { int32_t code = 0; @@ -297,7 +316,9 @@ static int vnodeCommitImpl(SCommitInfo *pInfo) { SVnode *pVnode = pInfo->pVnode; vInfo("vgId:%d, start to commit, commitId:%" PRId64 " version:%" PRId64 " term: %" PRId64, TD_VID(pVnode), - pVnode->state.commitID, pVnode->state.applied, pVnode->state.applyTerm); + pInfo->info.state.commitID, pInfo->info.state.committed, pInfo->info.state.commitTerm); + + vnodeUpdCommitSched(pVnode); // persist wal before starting if (walPersist(pVnode->pWal) < 0) { diff --git a/source/dnode/vnode/src/vnd/vnodeOpen.c b/source/dnode/vnode/src/vnd/vnodeOpen.c index cea85a0c11..8c07d0cec7 100644 --- a/source/dnode/vnode/src/vnd/vnodeOpen.c +++ b/source/dnode/vnode/src/vnd/vnodeOpen.c @@ -160,6 +160,8 @@ SVnode *vnodeOpen(const char *path, STfs *pTfs, SMsgCb msgCb) { taosThreadMutexInit(&pVnode->mutex, NULL); taosThreadCondInit(&pVnode->poolNotEmpty, NULL); + vnodeUpdCommitSched(pVnode); + int8_t rollback = vnodeShouldRollback(pVnode); // open buffer pool @@ -254,7 +256,7 @@ void vnodePostClose(SVnode *pVnode) { vnodeSyncPostClose(pVnode); } void vnodeClose(SVnode *pVnode) { if (pVnode) { - vnodeSyncCommit(pVnode); + tsem_wait(&pVnode->canCommit); vnodeSyncClose(pVnode); vnodeQueryClose(pVnode); walClose(pVnode->pWal); @@ -263,6 +265,8 @@ void vnodeClose(SVnode *pVnode) { smaClose(pVnode->pSma); metaClose(pVnode->pMeta); vnodeCloseBufPool(pVnode); + tsem_post(&pVnode->canCommit); + // destroy handle tsem_destroy(&(pVnode->canCommit)); tsem_destroy(&pVnode->syncSem); diff --git a/source/dnode/vnode/src/vnd/vnodeSnapshot.c b/source/dnode/vnode/src/vnd/vnodeSnapshot.c index cc22668b29..e75dc24329 100644 --- a/source/dnode/vnode/src/vnd/vnodeSnapshot.c +++ b/source/dnode/vnode/src/vnd/vnodeSnapshot.c @@ -406,8 +406,10 @@ static int32_t vnodeSnapWriteInfo(SVSnapWriter *pWriter, uint8_t *pData, uint32_ snprintf(dir, TSDB_FILENAME_LEN, "%s", pWriter->pVnode->path); } - SVnode *pVnode = pWriter->pVnode; + SVnodeStats vndStats = pWriter->info.config.vndStats; + SVnode *pVnode = pWriter->pVnode; pWriter->info.config = pVnode->config; + pWriter->info.config.vndStats = vndStats; vDebug("vgId:%d, save config while write snapshot", pWriter->pVnode->config.vgId); if (vnodeSaveInfo(dir, &pWriter->info) < 0) { code = terrno; diff --git a/source/dnode/vnode/src/vnd/vnodeSvr.c b/source/dnode/vnode/src/vnd/vnodeSvr.c index b0bae55bdb..deb9da2050 100644 --- a/source/dnode/vnode/src/vnd/vnodeSvr.c +++ b/source/dnode/vnode/src/vnd/vnodeSvr.c @@ -223,6 +223,7 @@ int32_t vnodeProcessWriteMsg(SVnode *pVnode, SRpcMsg *pMsg, int64_t version, SRp // skip header pReq = POINTER_SHIFT(pMsg->pCont, sizeof(SMsgHead)); len = pMsg->contLen - sizeof(SMsgHead); + bool needCommit = false; switch (pMsg->msgType) { /* META */ @@ -319,9 +320,8 @@ int32_t vnodeProcessWriteMsg(SVnode *pVnode, SRpcMsg *pMsg, int64_t version, SRp vnodeProcessAlterConfigReq(pVnode, version, pReq, len, pRsp); break; case TDMT_VND_COMMIT: - vnodeSyncCommit(pVnode); - vnodeBegin(pVnode); - goto _exit; + needCommit = true; + break; default: vError("vgId:%d, unprocessed msg, %d", TD_VID(pVnode), pMsg->msgType); return -1; @@ -338,7 +338,7 @@ int32_t vnodeProcessWriteMsg(SVnode *pVnode, SRpcMsg *pMsg, int64_t version, SRp } // commit if need - if (vnodeShouldCommit(pVnode)) { + if (needCommit) { vInfo("vgId:%d, commit at version %" PRId64, TD_VID(pVnode), version); if (vnodeAsyncCommit(pVnode) < 0) { vError("vgId:%d, failed to vnode async commit since %s.", TD_VID(pVnode), tstrerror(terrno)); diff --git a/source/dnode/vnode/src/vnd/vnodeSync.c b/source/dnode/vnode/src/vnd/vnodeSync.c index 65e17cfaad..6f3788616a 100644 --- a/source/dnode/vnode/src/vnd/vnodeSync.c +++ b/source/dnode/vnode/src/vnd/vnodeSync.c @@ -101,6 +101,64 @@ static void vnodeHandleProposeError(SVnode *pVnode, SRpcMsg *pMsg, int32_t code) } } +static int32_t inline vnodeProposeMsg(SVnode *pVnode, SRpcMsg *pMsg, bool isWeak) { + int64_t seq = 0; + + taosThreadMutexLock(&pVnode->lock); + int32_t code = syncPropose(pVnode->sync, pMsg, isWeak, &seq); + bool wait = (code == 0 && vnodeIsMsgBlock(pMsg->msgType)); + if (wait) { + ASSERT(!pVnode->blocked); + pVnode->blocked = true; + pVnode->blockSec = taosGetTimestampSec(); + pVnode->blockSeq = seq; +#if 0 + pVnode->blockInfo = pMsg->info; +#endif + } + taosThreadMutexUnlock(&pVnode->lock); + + if (code > 0) { + vnodeHandleWriteMsg(pVnode, pMsg); + } else if (code < 0) { + if (terrno != 0) code = terrno; + vnodeHandleProposeError(pVnode, pMsg, code); + } + + if (wait) vnodeWaitBlockMsg(pVnode, pMsg); + return code; +} + +void vnodeProposeCommitOnNeed(SVnode *pVnode) { + if (!vnodeShouldCommit(pVnode)) { + return; + } + + int32_t contLen = sizeof(SMsgHead); + SMsgHead *pHead = rpcMallocCont(contLen); + pHead->contLen = contLen; + pHead->vgId = pVnode->config.vgId; + + SRpcMsg rpcMsg = {0}; + rpcMsg.msgType = TDMT_VND_COMMIT; + rpcMsg.contLen = contLen; + rpcMsg.pCont = pHead; + rpcMsg.info.noResp = 1; + + bool isWeak = false; + if (vnodeProposeMsg(pVnode, &rpcMsg, isWeak) < 0) { + vTrace("vgId:%d, failed to propose vnode commit since %s", pVnode->config.vgId, terrstr()); + goto _out; + } + + vInfo("vgId:%d, proposed vnode commit", pVnode->config.vgId); + +_out: + vnodeUpdCommitSched(pVnode); + rpcFreeCont(rpcMsg.pCont); + rpcMsg.pCont = NULL; +} + #if BATCH_ENABLE static void inline vnodeProposeBatchMsg(SVnode *pVnode, SRpcMsg **pMsgArr, bool *pIsWeakArr, int32_t *arrSize) { @@ -178,6 +236,8 @@ void vnodeProposeWriteMsg(SQueueInfo *pInfo, STaosQall *qall, int32_t numOfMsgs) continue; } + vnodeProposeCommitOnNeed(pVnode); + code = vnodePreProcessWriteMsg(pVnode, pMsg); if (code != 0) { vGError("vgId:%d, msg:%p failed to pre-process since %s", vgId, pMsg, terrstr()); @@ -205,34 +265,6 @@ void vnodeProposeWriteMsg(SQueueInfo *pInfo, STaosQall *qall, int32_t numOfMsgs) #else -static int32_t inline vnodeProposeMsg(SVnode *pVnode, SRpcMsg *pMsg, bool isWeak) { - int64_t seq = 0; - - taosThreadMutexLock(&pVnode->lock); - int32_t code = syncPropose(pVnode->sync, pMsg, isWeak, &seq); - bool wait = (code == 0 && vnodeIsMsgBlock(pMsg->msgType)); - if (wait) { - ASSERT(!pVnode->blocked); - pVnode->blocked = true; - pVnode->blockSec = taosGetTimestampSec(); - pVnode->blockSeq = seq; -#if 0 - pVnode->blockInfo = pMsg->info; -#endif - } - taosThreadMutexUnlock(&pVnode->lock); - - if (code > 0) { - vnodeHandleWriteMsg(pVnode, pMsg); - } else if (code < 0) { - if (terrno != 0) code = terrno; - vnodeHandleProposeError(pVnode, pMsg, code); - } - - if (wait) vnodeWaitBlockMsg(pVnode, pMsg); - return code; -} - void vnodeProposeWriteMsg(SQueueInfo *pInfo, STaosQall *qall, int32_t numOfMsgs) { SVnode *pVnode = pInfo->ahandle; int32_t vgId = pVnode->config.vgId; @@ -256,6 +288,8 @@ void vnodeProposeWriteMsg(SQueueInfo *pInfo, STaosQall *qall, int32_t numOfMsgs) continue; } + vnodeProposeCommitOnNeed(pVnode); + code = vnodePreProcessWriteMsg(pVnode, pMsg); if (code != 0) { vGError("vgId:%d, msg:%p failed to pre-process since %s", vgId, pMsg, terrstr()); diff --git a/source/libs/catalog/src/ctgAsync.c b/source/libs/catalog/src/ctgAsync.c index b8590c9255..438128203e 100644 --- a/source/libs/catalog/src/ctgAsync.c +++ b/source/libs/catalog/src/ctgAsync.c @@ -483,7 +483,7 @@ int32_t ctgInitTask(SCtgJob* pJob, CTG_TASK_TYPE type, void* param, int32_t* tas _return: CTG_UNLOCK(CTG_WRITE, &pJob->taskLock); - + return code; } @@ -905,6 +905,14 @@ int32_t ctgCallUserCb(void* param) { return TSDB_CODE_SUCCESS; } +void ctgUpdateJobErrCode(SCtgJob* pJob, int32_t errCode) { + if (!NEED_CLIENT_REFRESH_VG_ERROR(errCode) || errCode == TSDB_CODE_SUCCESS) return; + + atomic_store_32(&pJob->jobResCode, errCode); + qDebug("QID:0x%" PRIx64 " ctg job errCode updated to %s", pJob->queryId, tstrerror(errCode)); + return; +} + int32_t ctgHandleTaskEnd(SCtgTask* pTask, int32_t rspCode) { SCtgJob* pJob = pTask->pJob; int32_t code = 0; @@ -924,6 +932,8 @@ int32_t ctgHandleTaskEnd(SCtgTask* pTask, int32_t rspCode) { if (taskDone < taosArrayGetSize(pJob->pTasks)) { qDebug("QID:0x%" PRIx64 " task done: %d, total: %d", pJob->queryId, taskDone, (int32_t)taosArrayGetSize(pJob->pTasks)); + + ctgUpdateJobErrCode(pJob, rspCode); return TSDB_CODE_SUCCESS; } @@ -931,7 +941,8 @@ int32_t ctgHandleTaskEnd(SCtgTask* pTask, int32_t rspCode) { _return: - pJob->jobResCode = code; + ctgUpdateJobErrCode(pJob, rspCode); + // pJob->jobResCode = code; // taosSsleep(2); // qDebug("QID:0x%" PRIx64 " ctg after sleep", pJob->queryId); @@ -1098,7 +1109,8 @@ _return: } if (code) { - ctgTaskError("Get table %d.%s.%s meta failed with error %s", pName->acctId, pName->dbname, pName->tname, tstrerror(code)); + ctgTaskError("Get table %d.%s.%s meta failed with error %s", pName->acctId, pName->dbname, pName->tname, + tstrerror(code)); } if (pTask->res || code) { ctgHandleTaskEnd(pTask, code); @@ -1286,7 +1298,8 @@ _return: TSWAP(pTask->res, ctx->pResList); taskDone = true; } - ctgTaskError("Get table %d.%s.%s meta failed with error %s", pName->acctId, pName->dbname, pName->tname, tstrerror(code)); + ctgTaskError("Get table %d.%s.%s meta failed with error %s", pName->acctId, pName->dbname, pName->tname, + tstrerror(code)); } if (pTask->res && taskDone) { diff --git a/source/libs/executor/src/tsort.c b/source/libs/executor/src/tsort.c index 06ef36664a..03be1ee6f2 100644 --- a/source/libs/executor/src/tsort.c +++ b/source/libs/executor/src/tsort.c @@ -108,7 +108,7 @@ static int32_t sortComparCleanup(SMsortComparParam* cmpParam) { return TSDB_CODE_SUCCESS; } -void tsortClearOrderdSource(SArray *pOrderedSource) { +void tsortClearOrderdSource(SArray* pOrderedSource) { for (size_t i = 0; i < taosArrayGetSize(pOrderedSource); i++) { SSortSource** pSource = taosArrayGet(pOrderedSource, i); if (NULL == *pSource) { @@ -121,6 +121,12 @@ void tsortClearOrderdSource(SArray *pOrderedSource) { if ((*pSource)->param && !(*pSource)->onlyRef) { taosMemoryFree((*pSource)->param); } + + if (!(*pSource)->onlyRef && (*pSource)->src.pBlock) { + blockDataDestroy((*pSource)->src.pBlock); + (*pSource)->src.pBlock = NULL; + } + taosMemoryFreeClear(*pSource); } @@ -629,9 +635,9 @@ static int32_t createInitialSources(SSortHandle* pHandle) { if (pHandle->type == SORT_SINGLESOURCE_SORT) { SSortSource** pSource = taosArrayGet(pHandle->pOrderedSource, 0); - SSortSource* source = *pSource; + SSortSource* source = *pSource; *pSource = NULL; - + tsortClearOrderdSource(pHandle->pOrderedSource); while (1) { @@ -659,6 +665,10 @@ static int32_t createInitialSources(SSortHandle* pHandle) { if (source->param && !source->onlyRef) { taosMemoryFree(source->param); } + if (!source->onlyRef && source->src.pBlock) { + blockDataDestroy(source->src.pBlock); + source->src.pBlock = NULL; + } taosMemoryFree(source); return code; } @@ -672,6 +682,10 @@ static int32_t createInitialSources(SSortHandle* pHandle) { if (source->param && !source->onlyRef) { taosMemoryFree(source->param); } + if (!source->onlyRef && source->src.pBlock) { + blockDataDestroy(source->src.pBlock); + source->src.pBlock = NULL; + } taosMemoryFree(source); return code; } @@ -849,8 +863,8 @@ SSortExecInfo tsortGetSortExecInfo(SSortHandle* pHandle) { SSortExecInfo info = {0}; if (pHandle == NULL) { - info.sortMethod = SORT_QSORT_T; // by default - info.sortBuffer = 2 * 1048576; // 2mb by default + info.sortMethod = SORT_QSORT_T; // by default + info.sortBuffer = 2 * 1048576; // 2mb by default } else { info.sortBuffer = pHandle->pageSize * pHandle->numOfPages; info.sortMethod = pHandle->inMemSort ? SORT_QSORT_T : SORT_SPILLED_MERGE_SORT_T; diff --git a/source/libs/monitor/src/monMain.c b/source/libs/monitor/src/monMain.c index b3ca0fa452..b23a36d4df 100644 --- a/source/libs/monitor/src/monMain.c +++ b/source/libs/monitor/src/monMain.c @@ -20,6 +20,7 @@ #include "ttime.h" static SMonitor tsMonitor = {0}; +static char* tsMonUri = "/report"; void monRecordLog(int64_t ts, ELogLevel level, const char *content) { taosThreadMutexLock(&tsMonitor.lock); @@ -550,7 +551,7 @@ void monSendReport() { // uDebugL("report cont:%s\n", pCont); if (pCont != NULL) { EHttpCompFlag flag = tsMonitor.cfg.comp ? HTTP_GZIP : HTTP_FLAT; - if (taosSendHttpReport(tsMonitor.cfg.server, tsMonitor.cfg.port, pCont, strlen(pCont), flag) != 0) { + if (taosSendHttpReport(tsMonitor.cfg.server, tsMonUri, tsMonitor.cfg.port, pCont, strlen(pCont), flag) != 0) { uError("failed to send monitor msg"); } taosMemoryFree(pCont); diff --git a/source/libs/planner/src/planLogicCreater.c b/source/libs/planner/src/planLogicCreater.c index 559182bf7b..fef5bd654e 100644 --- a/source/libs/planner/src/planLogicCreater.c +++ b/source/libs/planner/src/planLogicCreater.c @@ -1359,6 +1359,7 @@ static int32_t createSetOpLogicNode(SLogicPlanContext* pCxt, SSetOperator* pSetO } if (TSDB_CODE_SUCCESS == code) { + pSetOp->precision = pSetOperator->precision; *pLogicNode = (SLogicNode*)pSetOp; } else { nodesDestroyNode((SNode*)pSetOp); diff --git a/source/libs/stream/src/streamMeta.c b/source/libs/stream/src/streamMeta.c index 2b22a4ed29..6f77769dec 100644 --- a/source/libs/stream/src/streamMeta.c +++ b/source/libs/stream/src/streamMeta.c @@ -69,8 +69,7 @@ _err: } void streamMetaClose(SStreamMeta* pMeta) { - tdbCommit(pMeta->db, pMeta->txn); - tdbPostCommit(pMeta->db, pMeta->txn); + tdbAbort(pMeta->db, pMeta->txn); tdbTbClose(pMeta->pTaskDb); tdbTbClose(pMeta->pCheckpointDb); tdbClose(pMeta->db); @@ -88,6 +87,7 @@ void streamMetaClose(SStreamMeta* pMeta) { /*streamMetaReleaseTask(pMeta, pTask);*/ } taosHashCleanup(pMeta->pTasks); + taosHashCleanup(pMeta->pRecoverStatus); taosMemoryFree(pMeta->path); taosMemoryFree(pMeta); } diff --git a/source/libs/sync/inc/syncMessage.h b/source/libs/sync/inc/syncMessage.h index 3bd94dbab5..49486bc12d 100644 --- a/source/libs/sync/inc/syncMessage.h +++ b/source/libs/sync/inc/syncMessage.h @@ -247,8 +247,8 @@ typedef struct SyncLocalCmd { SRaftId destId; int32_t cmd; - SyncTerm sdNewTerm; // step down new term - SyncIndex fcIndex; // follower commit index + SyncTerm currentTerm; // step down new term + SyncIndex commitIndex; // follower commit index } SyncLocalCmd; int32_t syncBuildTimeout(SRpcMsg* pMsg, ESyncTimeoutType ttype, uint64_t logicClock, int32_t ms, SSyncNode* pNode); diff --git a/source/libs/sync/inc/syncPipeline.h b/source/libs/sync/inc/syncPipeline.h index a0a0691694..55cb0d7db6 100644 --- a/source/libs/sync/inc/syncPipeline.h +++ b/source/libs/sync/inc/syncPipeline.h @@ -98,6 +98,7 @@ int32_t syncLogBufferReInit(SSyncLogBuffer* pBuf, SSyncNode* pNode); // access int64_t syncLogBufferGetEndIndex(SSyncLogBuffer* pBuf); +SyncTerm syncLogBufferGetLastMatchTerm(SSyncLogBuffer* pBuf); int32_t syncLogBufferAppend(SSyncLogBuffer* pBuf, SSyncNode* pNode, SSyncRaftEntry* pEntry); int32_t syncLogBufferAccept(SSyncLogBuffer* pBuf, SSyncNode* pNode, SSyncRaftEntry* pEntry, SyncTerm prevTerm); int64_t syncLogBufferProceed(SSyncLogBuffer* pBuf, SSyncNode* pNode, SyncTerm* pMatchTerm); diff --git a/source/libs/sync/inc/syncSnapshot.h b/source/libs/sync/inc/syncSnapshot.h index 974a8f968e..5277e7818f 100644 --- a/source/libs/sync/inc/syncSnapshot.h +++ b/source/libs/sync/inc/syncSnapshot.h @@ -57,7 +57,6 @@ void snapshotSenderDestroy(SSyncSnapshotSender *pSender); bool snapshotSenderIsStart(SSyncSnapshotSender *pSender); int32_t snapshotSenderStart(SSyncSnapshotSender *pSender); void snapshotSenderStop(SSyncSnapshotSender *pSender, bool finish); -int32_t snapshotSend(SSyncSnapshotSender *pSender); int32_t snapshotReSend(SSyncSnapshotSender *pSender); typedef struct SSyncSnapshotReceiver { @@ -82,7 +81,6 @@ void snapshotReceiverDestroy(SSyncSnapshotReceiver *pReceiver) void snapshotReceiverStart(SSyncSnapshotReceiver *pReceiver, SyncSnapshotSend *pBeginMsg); void snapshotReceiverStop(SSyncSnapshotReceiver *pReceiver); bool snapshotReceiverIsStart(SSyncSnapshotReceiver *pReceiver); -void snapshotReceiverForceStop(SSyncSnapshotReceiver *pReceiver); // on message int32_t syncNodeOnSnapshot(SSyncNode *ths, const SRpcMsg *pMsg); diff --git a/source/libs/sync/src/syncAppendEntries.c b/source/libs/sync/src/syncAppendEntries.c index 1dc6905b88..026ebdb37c 100644 --- a/source/libs/sync/src/syncAppendEntries.c +++ b/source/libs/sync/src/syncAppendEntries.c @@ -90,6 +90,7 @@ // int32_t syncNodeFollowerCommit(SSyncNode* ths, SyncIndex newCommitIndex) { + ASSERT(false && "deprecated"); if (ths->state != TAOS_SYNC_STATE_FOLLOWER) { sNTrace(ths, "can not do follower commit"); return -1; @@ -206,12 +207,13 @@ int32_t syncNodeOnAppendEntries(SSyncNode* ths, const SRpcMsg* pRpcMsg) { accepted = true; _SEND_RESPONSE: + pEntry = NULL; pReply->matchIndex = syncLogBufferProceed(ths->pLogBuf, ths, &pReply->lastMatchTerm); bool matched = (pReply->matchIndex >= pReply->lastSendIndex); if (accepted && matched) { pReply->success = true; // update commit index only after matching - (void)syncNodeUpdateCommitIndex(ths, pMsg->commitIndex); + (void)syncNodeUpdateCommitIndex(ths, TMIN(pMsg->commitIndex, pReply->lastSendIndex)); } // ack, i.e. send response diff --git a/source/libs/sync/src/syncCommit.c b/source/libs/sync/src/syncCommit.c index 5fdcbeb91c..152fddb7e6 100644 --- a/source/libs/sync/src/syncCommit.c +++ b/source/libs/sync/src/syncCommit.c @@ -44,6 +44,7 @@ // /\ UNCHANGED <> // void syncOneReplicaAdvance(SSyncNode* pSyncNode) { + ASSERT(false && "deprecated"); if (pSyncNode == NULL) { sError("pSyncNode is NULL"); return; diff --git a/source/libs/sync/src/syncEnv.c b/source/libs/sync/src/syncEnv.c index 0d6d0f93f1..1fa67cfa4d 100644 --- a/source/libs/sync/src/syncEnv.c +++ b/source/libs/sync/src/syncEnv.c @@ -114,7 +114,7 @@ void syncHbTimerDataRemove(int64_t rid) { taosRemoveRef(gHbDataRefId, rid); } SSyncHbTimerData *syncHbTimerDataAcquire(int64_t rid) { SSyncHbTimerData *pData = taosAcquireRef(gHbDataRefId, rid); - if (pData == NULL) { + if (pData == NULL && rid > 0) { sInfo("failed to acquire hb-timer-data from refId:%" PRId64, rid); terrno = TSDB_CODE_SYN_INTERNAL_ERROR; } diff --git a/source/libs/sync/src/syncMain.c b/source/libs/sync/src/syncMain.c index 5b9fb7c59c..c2bf6dc837 100644 --- a/source/libs/sync/src/syncMain.c +++ b/source/libs/sync/src/syncMain.c @@ -1030,6 +1030,7 @@ SSyncNode* syncNodeOpen(SSyncInfo* pSyncInfo) { } } pSyncNode->commitIndex = commitIndex; + sInfo("vgId:%d, sync node commitIndex initialized as %" PRId64, pSyncNode->vgId, pSyncNode->commitIndex); if (syncNodeLogStoreRestoreOnNeed(pSyncNode) < 0) { goto _error; @@ -1170,9 +1171,10 @@ int32_t syncNodeRestore(SSyncNode* pSyncNode) { } ASSERT(endIndex == lastVer + 1); - commitIndex = TMAX(pSyncNode->commitIndex, commitIndex); + pSyncNode->commitIndex = TMAX(pSyncNode->commitIndex, commitIndex); + sInfo("vgId:%d, restore sync until commitIndex:%" PRId64, pSyncNode->vgId, pSyncNode->commitIndex); - if (syncLogBufferCommit(pSyncNode->pLogBuf, pSyncNode, commitIndex) < 0) { + if (syncLogBufferCommit(pSyncNode->pLogBuf, pSyncNode, pSyncNode->commitIndex) < 0) { return -1; } @@ -1247,7 +1249,7 @@ void syncNodePreClose(SSyncNode* pSyncNode) { #if 0 if (pSyncNode->pNewNodeReceiver != NULL) { if (snapshotReceiverIsStart(pSyncNode->pNewNodeReceiver)) { - snapshotReceiverForceStop(pSyncNode->pNewNodeReceiver); + snapshotReceiverStop(pSyncNode->pNewNodeReceiver); } sDebug("vgId:%d, snapshot receiver destroy while preclose sync node, data:%p", pSyncNode->vgId, @@ -1270,7 +1272,7 @@ void syncNodePreClose(SSyncNode* pSyncNode) { void syncNodePostClose(SSyncNode* pSyncNode) { if (pSyncNode->pNewNodeReceiver != NULL) { if (snapshotReceiverIsStart(pSyncNode->pNewNodeReceiver)) { - snapshotReceiverForceStop(pSyncNode->pNewNodeReceiver); + snapshotReceiverStop(pSyncNode->pNewNodeReceiver); } sDebug("vgId:%d, snapshot receiver destroy while preclose sync node, data:%p", pSyncNode->vgId, @@ -1325,7 +1327,7 @@ void syncNodeClose(SSyncNode* pSyncNode) { if (pSyncNode->pNewNodeReceiver != NULL) { if (snapshotReceiverIsStart(pSyncNode->pNewNodeReceiver)) { - snapshotReceiverForceStop(pSyncNode->pNewNodeReceiver); + snapshotReceiverStop(pSyncNode->pNewNodeReceiver); } sDebug("vgId:%d, snapshot receiver destroy while close, data:%p", pSyncNode->vgId, pSyncNode->pNewNodeReceiver); @@ -1480,16 +1482,21 @@ int32_t syncNodeSendMsgById(const SRaftId* destRaftId, SSyncNode* pNode, SRpcMsg } } + int32_t code = -1; if (pNode->syncSendMSg != NULL && epSet != NULL) { syncUtilMsgHtoN(pMsg->pCont); pMsg->info.noResp = 1; - return pNode->syncSendMSg(epSet, pMsg); - } else { - sError("vgId:%d, sync send msg by id error, fp:%p epset:%p", pNode->vgId, pNode->syncSendMSg, epSet); + code = pNode->syncSendMSg(epSet, pMsg); + } + + if (code < 0) { + sError("vgId:%d, sync send msg by id error, epset:%p dnode:%d addr:%" PRId64 " err:0x%x", pNode->vgId, epSet, + DID(destRaftId), destRaftId->addr, terrno); rpcFreeCont(pMsg->pCont); terrno = TSDB_CODE_SYN_INTERNAL_ERROR; - return -1; } + + return code; } inline bool syncNodeInConfig(SSyncNode* pNode, const SSyncCfg* pCfg) { @@ -1855,7 +1862,7 @@ void syncNodeBecomeLeader(SSyncNode* pSyncNode, const char* debugStr) { // close receiver if (pSyncNode != NULL && pSyncNode->pNewNodeReceiver != NULL && snapshotReceiverIsStart(pSyncNode->pNewNodeReceiver)) { - snapshotReceiverForceStop(pSyncNode->pNewNodeReceiver); + snapshotReceiverStop(pSyncNode->pNewNodeReceiver); } // stop elect timer @@ -2537,8 +2544,9 @@ int32_t syncNodeOnHeartbeat(SSyncNode* ths, const SRpcMsg* pRpcMsg) { SyncLocalCmd* pSyncMsg = rpcMsgLocalCmd.pCont; pSyncMsg->cmd = SYNC_LOCAL_CMD_FOLLOWER_CMT; - pSyncMsg->fcIndex = pMsg->commitIndex; - SyncIndex fcIndex = pSyncMsg->fcIndex; + pSyncMsg->commitIndex = pMsg->commitIndex; + pSyncMsg->currentTerm = pMsg->term; + SyncIndex fcIndex = pSyncMsg->commitIndex; if (ths->syncEqMsg != NULL && ths->msgcb != NULL) { int32_t code = ths->syncEqMsg(ths->msgcb, &rpcMsgLocalCmd); @@ -2559,7 +2567,8 @@ int32_t syncNodeOnHeartbeat(SSyncNode* ths, const SRpcMsg* pRpcMsg) { SyncLocalCmd* pSyncMsg = rpcMsgLocalCmd.pCont; pSyncMsg->cmd = SYNC_LOCAL_CMD_STEP_DOWN; - pSyncMsg->sdNewTerm = pMsg->term; + pSyncMsg->currentTerm = pMsg->term; + pSyncMsg->commitIndex = pMsg->commitIndex; if (ths->syncEqMsg != NULL && ths->msgcb != NULL) { int32_t code = ths->syncEqMsg(ths->msgcb, &rpcMsgLocalCmd); @@ -2567,7 +2576,7 @@ int32_t syncNodeOnHeartbeat(SSyncNode* ths, const SRpcMsg* pRpcMsg) { sError("vgId:%d, sync enqueue step-down msg error, code:%d", ths->vgId, code); rpcFreeCont(rpcMsgLocalCmd.pCont); } else { - sTrace("vgId:%d, sync enqueue step-down msg, new-term: %" PRId64, ths->vgId, pSyncMsg->sdNewTerm); + sTrace("vgId:%d, sync enqueue step-down msg, new-term: %" PRId64, ths->vgId, pSyncMsg->currentTerm); } } } @@ -2625,10 +2634,13 @@ int32_t syncNodeOnLocalCmd(SSyncNode* ths, const SRpcMsg* pRpcMsg) { syncLogRecvLocalCmd(ths, pMsg, ""); if (pMsg->cmd == SYNC_LOCAL_CMD_STEP_DOWN) { - syncNodeStepDown(ths, pMsg->sdNewTerm); + syncNodeStepDown(ths, pMsg->currentTerm); } else if (pMsg->cmd == SYNC_LOCAL_CMD_FOLLOWER_CMT) { - (void)syncNodeUpdateCommitIndex(ths, pMsg->fcIndex); + SyncTerm matchTerm = syncLogBufferGetLastMatchTerm(ths->pLogBuf); + if (pMsg->currentTerm == matchTerm) { + (void)syncNodeUpdateCommitIndex(ths, pMsg->commitIndex); + } if (syncLogBufferCommit(ths->pLogBuf, ths, ths->commitIndex) < 0) { sError("vgId:%d, failed to commit raft log since %s. commit index: %" PRId64 "", ths->vgId, terrstr(), ths->commitIndex); @@ -2641,14 +2653,15 @@ int32_t syncNodeOnLocalCmd(SSyncNode* ths, const SRpcMsg* pRpcMsg) { } int32_t syncNodeOnLocalCmdOld(SSyncNode* ths, const SRpcMsg* pRpcMsg) { + ASSERT(false && "deprecated"); SyncLocalCmd* pMsg = pRpcMsg->pCont; syncLogRecvLocalCmd(ths, pMsg, ""); if (pMsg->cmd == SYNC_LOCAL_CMD_STEP_DOWN) { - syncNodeStepDown(ths, pMsg->sdNewTerm); + syncNodeStepDown(ths, pMsg->currentTerm); } else if (pMsg->cmd == SYNC_LOCAL_CMD_FOLLOWER_CMT) { - syncNodeFollowerCommit(ths, pMsg->fcIndex); + syncNodeFollowerCommit(ths, pMsg->commitIndex); } else { sError("error local cmd"); diff --git a/source/libs/sync/src/syncPipeline.c b/source/libs/sync/src/syncPipeline.c index 410986b87a..f878044bca 100644 --- a/source/libs/sync/src/syncPipeline.c +++ b/source/libs/sync/src/syncPipeline.c @@ -31,6 +31,10 @@ static bool syncIsMsgBlock(tmsg_t type) { (type == TDMT_VND_UPDATE_TAG_VAL) || (type == TDMT_VND_ALTER_CONFIRM); } +FORCE_INLINE static int64_t syncGetRetryMaxWaitMs() { + return SYNC_LOG_REPL_RETRY_WAIT_MS * (1 << SYNC_MAX_RETRY_BACKOFF); +} + int64_t syncLogBufferGetEndIndex(SSyncLogBuffer* pBuf) { taosThreadMutexLock(&pBuf->mutex); int64_t index = pBuf->endIndex; @@ -264,20 +268,27 @@ int32_t syncLogBufferReInit(SSyncLogBuffer* pBuf, SSyncNode* pNode) { return ret; } -FORCE_INLINE SyncTerm syncLogBufferGetLastMatchTerm(SSyncLogBuffer* pBuf) { +FORCE_INLINE SyncTerm syncLogBufferGetLastMatchTermWithoutLock(SSyncLogBuffer* pBuf) { SyncIndex index = pBuf->matchIndex; SSyncRaftEntry* pEntry = pBuf->entries[(index + pBuf->size) % pBuf->size].pItem; ASSERT(pEntry != NULL); return pEntry->term; } +SyncTerm syncLogBufferGetLastMatchTerm(SSyncLogBuffer* pBuf) { + taosThreadMutexLock(&pBuf->mutex); + SyncTerm term = syncLogBufferGetLastMatchTermWithoutLock(pBuf); + taosThreadMutexUnlock(&pBuf->mutex); + return term; +} + int32_t syncLogBufferAccept(SSyncLogBuffer* pBuf, SSyncNode* pNode, SSyncRaftEntry* pEntry, SyncTerm prevTerm) { taosThreadMutexLock(&pBuf->mutex); syncLogBufferValidate(pBuf); int32_t ret = -1; SyncIndex index = pEntry->index; SyncIndex prevIndex = pEntry->index - 1; - SyncTerm lastMatchTerm = syncLogBufferGetLastMatchTerm(pBuf); + SyncTerm lastMatchTerm = syncLogBufferGetLastMatchTermWithoutLock(pBuf); SSyncRaftEntry* pExist = NULL; bool inBuf = true; @@ -329,6 +340,8 @@ int32_t syncLogBufferAccept(SSyncLogBuffer* pBuf, SSyncNode* pNode, SSyncRaftEnt } // update + ASSERT(pBuf->startIndex < index); + ASSERT(index - pBuf->startIndex < pBuf->size); ASSERT(pBuf->entries[index % pBuf->size].pItem == NULL); SSyncLogBufEntry tmp = {.pItem = pEntry, .prevLogIndex = prevIndex, .prevLogTerm = prevTerm}; pEntry = NULL; @@ -456,6 +469,11 @@ int32_t syncLogFsmExecute(SSyncNode* pNode, SSyncFSM* pFsm, ESyncState role, Syn pNode->vgId, pEntry->index, pEntry->term, TMSG_INFO(pEntry->originalRpcType), applyCode); } + if (pEntry->originalRpcType == TDMT_VND_COMMIT) { + sInfo("vgId:%d, fsm execute vnode commit. index: %" PRId64 ", term: %" PRId64 "", pNode->vgId, pEntry->index, + pEntry->term); + } + SRpcMsg rpcMsg = {.code = applyCode}; syncEntry2OriginalRpc(pEntry, &rpcMsg); @@ -552,7 +570,8 @@ int32_t syncLogBufferCommit(SSyncLogBuffer* pBuf, SSyncNode* pNode, int64_t comm ret = 0; _out: // mark as restored if needed - if (!pNode->restoreFinish && pBuf->commitIndex >= pNode->commitIndex) { + if (!pNode->restoreFinish && pBuf->commitIndex >= pNode->commitIndex && pEntry != NULL && + pNode->pRaftStore->currentTerm <= pEntry->term) { pNode->pFsm->FpRestoreFinishCb(pNode->pFsm); pNode->restoreFinish = true; sInfo("vgId:%d, restore finished. log buffer: [%" PRId64 " %" PRId64 " %" PRId64 ", %" PRId64 ")", pNode->vgId, @@ -613,6 +632,12 @@ int32_t syncLogReplMgrRetryOnNeed(SSyncLogReplMgr* pMgr, SSyncNode* pNode) { } if (pMgr->states[pos].acked) { + if (pMgr->matchIndex < index && pMgr->states[pos].timeMs + (syncGetRetryMaxWaitMs() << 3) < nowMs) { + syncLogReplMgrReset(pMgr); + sWarn("vgId:%d, reset sync log repl mgr since stagnation. index: %" PRId64 ", peer: %" PRIx64, pNode->vgId, + index, pDestId->addr); + goto _out; + } continue; } @@ -639,10 +664,12 @@ int32_t syncLogReplMgrRetryOnNeed(SSyncLogReplMgr* pMgr, SSyncNode* pNode) { _out: if (retried) { pMgr->retryBackoff = syncLogGetNextRetryBackoff(pMgr); - sInfo("vgId:%d, resent %d sync log entries. dest: %" PRIx64 ", indexes: %" PRId64 " ..., terms: ... %" PRId64 - ", retryWaitMs: %" PRId64 ", repl mgr: [%" PRId64 " %" PRId64 ", %" PRId64 ")", + SSyncLogBuffer* pBuf = pNode->pLogBuf; + sInfo("vgId:%d, resend %d sync log entries. dest: %" PRIx64 ", indexes: %" PRId64 " ..., terms: ... %" PRId64 + ", retryWaitMs: %" PRId64 ", mgr: [%" PRId64 " %" PRId64 ", %" PRId64 "), buffer: [%" PRId64 " %" PRId64 + " %" PRId64 ", %" PRId64 ")", pNode->vgId, count, pDestId->addr, firstIndex, term, retryWaitMs, pMgr->startIndex, pMgr->matchIndex, - pMgr->endIndex); + pMgr->endIndex, pBuf->startIndex, pBuf->commitIndex, pBuf->matchIndex, pBuf->endIndex); } return ret; } @@ -771,7 +798,7 @@ int32_t syncLogReplMgrReplicateOnce(SSyncLogReplMgr* pMgr, SSyncNode* pNode) { int32_t syncLogReplMgrReplicateProbeOnce(SSyncLogReplMgr* pMgr, SSyncNode* pNode, SyncIndex index) { ASSERT(!pMgr->restored); ASSERT(pMgr->startIndex >= 0); - int64_t retryMaxWaitMs = SYNC_LOG_REPL_RETRY_WAIT_MS * (1 << SYNC_MAX_RETRY_BACKOFF); + int64_t retryMaxWaitMs = syncGetRetryMaxWaitMs(); int64_t nowMs = taosGetMonoTimestampMs(); if (pMgr->endIndex > pMgr->startIndex && @@ -799,11 +826,10 @@ int32_t syncLogReplMgrReplicateProbeOnce(SSyncLogReplMgr* pMgr, SSyncNode* pNode pMgr->endIndex = index + 1; SSyncLogBuffer* pBuf = pNode->pLogBuf; - sTrace("vgId:%d, attempted to probe the %d'th peer with msg of index:%" PRId64 " term: %" PRId64 - ". pMgr(rs:%d): [%" PRId64 " %" PRId64 ", %" PRId64 "), pBuf: [%" PRId64 " %" PRId64 " %" PRId64 ", %" PRId64 - ")", - pNode->vgId, pMgr->peerId, index, term, pMgr->restored, pMgr->startIndex, pMgr->matchIndex, pMgr->endIndex, - pBuf->startIndex, pBuf->commitIndex, pBuf->matchIndex, pBuf->endIndex); + sInfo("vgId:%d, probe peer:%" PRIx64 " with msg of index:%" PRId64 " term: %" PRId64 ". mgr (rs:%d): [%" PRId64 + " %" PRId64 ", %" PRId64 "), buffer: [%" PRId64 " %" PRId64 " %" PRId64 ", %" PRId64 ")", + pNode->vgId, pDestId->addr, index, term, pMgr->restored, pMgr->startIndex, pMgr->matchIndex, pMgr->endIndex, + pBuf->startIndex, pBuf->commitIndex, pBuf->matchIndex, pBuf->endIndex); return 0; } @@ -815,9 +841,11 @@ int32_t syncLogReplMgrReplicateAttemptedOnce(SSyncLogReplMgr* pMgr, SSyncNode* p int32_t count = 0; int64_t nowMs = taosGetMonoTimestampMs(); int64_t limit = pMgr->size >> 1; + SyncTerm term = -1; + SyncIndex firstIndex = -1; for (SyncIndex index = pMgr->endIndex; index <= pNode->pLogBuf->matchIndex; index++) { - if (batchSize < count++ || limit <= index - pMgr->startIndex) { + if (batchSize < count || limit <= index - pMgr->startIndex) { break; } if (pMgr->startIndex + 1 < index && pMgr->states[(index - 1) % pMgr->size].barrier) { @@ -826,7 +854,6 @@ int32_t syncLogReplMgrReplicateAttemptedOnce(SSyncLogReplMgr* pMgr, SSyncNode* p int64_t pos = index % pMgr->size; SRaftId* pDestId = &pNode->replicasId[pMgr->peerId]; bool barrier = false; - SyncTerm term = -1; if (syncLogBufferReplicateOneTo(pMgr, pNode, index, &term, pDestId, &barrier) < 0) { sError("vgId:%d, failed to replicate log entry since %s. index: %" PRId64 ", dest: 0x%016" PRIx64 "", pNode->vgId, terrstr(), index, pDestId->addr); @@ -837,6 +864,9 @@ int32_t syncLogReplMgrReplicateAttemptedOnce(SSyncLogReplMgr* pMgr, SSyncNode* p pMgr->states[pos].term = term; pMgr->states[pos].acked = false; + if (firstIndex == -1) firstIndex = index; + count++; + pMgr->endIndex = index + 1; if (barrier) { sInfo("vgId:%d, replicated sync barrier to dest: %" PRIx64 ". index: %" PRId64 ", term: %" PRId64 @@ -850,10 +880,11 @@ int32_t syncLogReplMgrReplicateAttemptedOnce(SSyncLogReplMgr* pMgr, SSyncNode* p syncLogReplMgrRetryOnNeed(pMgr, pNode); SSyncLogBuffer* pBuf = pNode->pLogBuf; - sTrace("vgId:%d, attempted to replicate %d msgs to the %d'th peer. pMgr(rs:%d): [%" PRId64 " %" PRId64 ", %" PRId64 - "), pBuf: [%" PRId64 " %" PRId64 " %" PRId64 ", %" PRId64 ")", - pNode->vgId, count, pMgr->peerId, pMgr->restored, pMgr->startIndex, pMgr->matchIndex, pMgr->endIndex, - pBuf->startIndex, pBuf->commitIndex, pBuf->matchIndex, pBuf->endIndex); + sTrace("vgId:%d, replicated %d msgs to peer: %" PRIx64 ". indexes: %" PRId64 "..., terms: ...%" PRId64 + ", mgr: (rs:%d) [%" PRId64 " %" PRId64 ", %" PRId64 "), buffer: [%" PRId64 " %" PRId64 " %" PRId64 ", %" PRId64 + ")", + pNode->vgId, count, pDestId->addr, firstIndex, term, pMgr->restored, pMgr->startIndex, pMgr->matchIndex, + pMgr->endIndex, pBuf->startIndex, pBuf->commitIndex, pBuf->matchIndex, pBuf->endIndex); return 0; } @@ -985,6 +1016,10 @@ void syncLogBufferDestroy(SSyncLogBuffer* pBuf) { int32_t syncLogBufferRollback(SSyncLogBuffer* pBuf, SSyncNode* pNode, SyncIndex toIndex) { ASSERT(pBuf->commitIndex < toIndex && toIndex <= pBuf->endIndex); + if (toIndex == pBuf->endIndex) { + return 0; + } + sInfo("vgId:%d, rollback sync log buffer. toindex: %" PRId64 ", buffer: [%" PRId64 " %" PRId64 " %" PRId64 ", %" PRId64 ")", pNode->vgId, toIndex, pBuf->startIndex, pBuf->commitIndex, pBuf->matchIndex, pBuf->endIndex); diff --git a/source/libs/sync/src/syncRaftCfg.c b/source/libs/sync/src/syncRaftCfg.c index 86ea1f48cc..806949c81e 100644 --- a/source/libs/sync/src/syncRaftCfg.c +++ b/source/libs/sync/src/syncRaftCfg.c @@ -71,31 +71,23 @@ int32_t syncWriteCfgFile(SSyncNode *pNode) { char file[PATH_MAX] = {0}; snprintf(file, sizeof(file), "%s.bak", realfile); - pFile = taosOpenFile(file, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_TRUNC); - if (pFile == NULL) { - terrno = TAOS_SYSTEM_ERROR(errno); - sError("vgId:%d, failed to open sync cfg file:%s since %s", pNode->vgId, realfile, terrstr()); - goto _OVER; - } - terrno = TSDB_CODE_OUT_OF_MEMORY; pJson = tjsonCreateObject(); if (pJson == NULL) goto _OVER; if (tjsonAddObject(pJson, "RaftCfg", syncEncodeRaftCfg, pCfg) < 0) goto _OVER; - buffer = tjsonToString(pJson); if (buffer == NULL) goto _OVER; + terrno = 0; + + pFile = taosOpenFile(file, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_TRUNC); + if (pFile == NULL) goto _OVER; int32_t len = strlen(buffer); if (taosWriteFile(pFile, buffer, len) <= 0) goto _OVER; if (taosFsyncFile(pFile) < 0) goto _OVER; - taosCloseFile(&pFile); - if (taosRenameFile(file, realfile) != 0) { - terrno = TAOS_SYSTEM_ERROR(errno); - sError("vgId:%d, failed to rename sync cfg file:%s to %s since %s", pNode->vgId, file, realfile, terrstr()); - goto _OVER; - } + taosCloseFile(&pFile); + if (taosRenameFile(file, realfile) != 0) goto _OVER; code = 0; sInfo("vgId:%d, succeed to write sync cfg file:%s, len:%d", pNode->vgId, realfile, len); @@ -106,6 +98,7 @@ _OVER: if (pFile != NULL) taosCloseFile(&pFile); if (code != 0) { + if (terrno == 0) terrno = TAOS_SYSTEM_ERROR(errno); sError("vgId:%d, failed to write sync cfg file:%s since %s", pNode->vgId, realfile, terrstr()); } return code; diff --git a/source/libs/sync/src/syncRaftLog.c b/source/libs/sync/src/syncRaftLog.c index 3f9f397ef5..03c3fe154d 100644 --- a/source/libs/sync/src/syncRaftLog.c +++ b/source/libs/sync/src/syncRaftLog.c @@ -219,6 +219,10 @@ static int32_t raftLogAppendEntry(struct SSyncLogStore* pLogStore, SSyncRaftEntr ASSERT(pEntry->index == index); + if (pEntry->originalRpcType == TDMT_VND_COMMIT) { + walFsync(pWal, true); + } + sNTrace(pData->pSyncNode, "write index:%" PRId64 ", type:%s, origin type:%s, elapsed:%" PRId64, pEntry->index, TMSG_INFO(pEntry->msgType), TMSG_INFO(pEntry->originalRpcType), tsElapsed); return 0; @@ -312,29 +316,6 @@ static int32_t raftLogTruncate(struct SSyncLogStore* pLogStore, SyncIndex fromIn SSyncLogStoreData* pData = pLogStore->data; SWal* pWal = pData->pWal; - // need not truncate - SyncIndex wallastVer = walGetLastVer(pWal); - if (fromIndex > wallastVer) { - return 0; - } - - // need not truncate - SyncIndex walCommitVer = walGetCommittedVer(pWal); - if (fromIndex <= walCommitVer) { - return 0; - } - - // delete from cache - for (SyncIndex index = fromIndex; index <= wallastVer; ++index) { - SLRUCache* pCache = pData->pSyncNode->pLogStore->pCache; - LRUHandle* h = taosLRUCacheLookup(pCache, &index, sizeof(index)); - if (h) { - sNTrace(pData->pSyncNode, "cache delete index:%" PRId64, index); - - taosLRUCacheRelease(pData->pSyncNode->pLogStore->pCache, h, true); - } - } - int32_t code = walRollback(pWal, fromIndex); if (code != 0) { int32_t err = terrno; diff --git a/source/libs/sync/src/syncSnapshot.c b/source/libs/sync/src/syncSnapshot.c index b68b735f46..defb7402f4 100644 --- a/source/libs/sync/src/syncSnapshot.c +++ b/source/libs/sync/src/syncSnapshot.c @@ -150,7 +150,7 @@ void snapshotSenderStop(SSyncSnapshotSender *pSender, bool finish) { // when sender receive ack, call this function to send msg from seq // seq = ack + 1, already updated -int32_t snapshotSend(SSyncSnapshotSender *pSender) { +static int32_t snapshotSend(SSyncSnapshotSender *pSender) { // free memory last time (current seq - 1) if (pSender->pCurrentBlock != NULL) { taosMemoryFree(pSender->pCurrentBlock); @@ -342,23 +342,6 @@ void snapshotReceiverDestroy(SSyncSnapshotReceiver *pReceiver) { bool snapshotReceiverIsStart(SSyncSnapshotReceiver *pReceiver) { return pReceiver->start; } -// force stop -void snapshotReceiverForceStop(SSyncSnapshotReceiver *pReceiver) { - sRInfo(pReceiver, "snapshot receiver force stop, writer:%p"); - - // force close, abandon incomplete data - if (pReceiver->pWriter != NULL) { - int32_t ret = pReceiver->pSyncNode->pFsm->FpSnapshotStopWrite(pReceiver->pSyncNode->pFsm, pReceiver->pWriter, false, - &pReceiver->snapshot); - if (ret != 0) { - sRInfo(pReceiver, "snapshot receiver force stop failed since %s", terrstr()); - } - pReceiver->pWriter = NULL; - } - - pReceiver->start = false; -} - static int32_t snapshotReceiverStartWriter(SSyncSnapshotReceiver *pReceiver, SyncSnapshotSend *pBeginMsg) { if (pReceiver->pWriter != NULL) { sRError(pReceiver, "vgId:%d, snapshot receiver writer is not null"); @@ -590,7 +573,7 @@ _START_RECEIVER: if (snapshotReceiverIsStart(pReceiver)) { sRInfo(pReceiver, "snapshot receiver already start and force stop pre one"); - snapshotReceiverForceStop(pReceiver); + snapshotReceiverStop(pReceiver); } snapshotReceiverStart(pReceiver, pMsg); // set start-time same with sender @@ -842,7 +825,7 @@ int32_t syncNodeOnSnapshot(SSyncNode *pSyncNode, const SRpcMsg *pRpcMsg) { } else if (pMsg->seq == SYNC_SNAPSHOT_SEQ_FORCE_CLOSE) { // force close, no response syncLogRecvSyncSnapshotSend(pSyncNode, pMsg, "process force stop"); - snapshotReceiverForceStop(pReceiver); + snapshotReceiverStop(pReceiver); } else if (pMsg->seq > SYNC_SNAPSHOT_SEQ_BEGIN && pMsg->seq < SYNC_SNAPSHOT_SEQ_END) { syncLogRecvSyncSnapshotSend(pSyncNode, pMsg, "process seq data"); code = syncNodeOnSnapshotReceive(pSyncNode, pMsg); @@ -989,6 +972,13 @@ int32_t syncNodeOnSnapshotRsp(SSyncNode *pSyncNode, const SRpcMsg *pRpcMsg) { return syncNodeOnSnapshotPreRsp(pSyncNode, pSender, pMsg); } + if (pSender->pReader == NULL || pSender->finish) { + syncLogRecvSyncSnapshotRsp(pSyncNode, pMsg, "snapshot sender invalid"); + sSError(pSender, "snapshot sender invalid, pReader:%p finish:%d", pMsg->code, pSender->pReader, pSender->finish); + terrno = pMsg->code; + goto _ERROR; + } + if (pMsg->ack == SYNC_SNAPSHOT_SEQ_BEGIN) { syncLogRecvSyncSnapshotRsp(pSyncNode, pMsg, "process seq begin"); if (snapshotSenderUpdateProgress(pSender, pMsg) != 0) { diff --git a/source/libs/sync/src/syncUtil.c b/source/libs/sync/src/syncUtil.c index 0ec24d5326..e4a65837f7 100644 --- a/source/libs/sync/src/syncUtil.c +++ b/source/libs/sync/src/syncUtil.c @@ -17,20 +17,20 @@ #include "syncUtil.h" #include "syncIndexMgr.h" #include "syncMessage.h" +#include "syncPipeline.h" #include "syncRaftCfg.h" #include "syncRaftStore.h" #include "syncSnapshot.h" void syncCfg2SimpleStr(const SSyncCfg* pCfg, char* buf, int32_t bufLen) { - int32_t len = snprintf(buf, bufLen, "{r-num:%d, my:%d, ", pCfg->replicaNum, pCfg->myIndex); - + int32_t len = snprintf(buf, bufLen, "{num:%d, as:%d, [", pCfg->replicaNum, pCfg->myIndex); for (int32_t i = 0; i < pCfg->replicaNum; ++i) { + len += snprintf(buf + len, bufLen - len, "%s:%d", pCfg->nodeInfo[i].nodeFqdn, pCfg->nodeInfo[i].nodePort); if (i < pCfg->replicaNum - 1) { - len += snprintf(buf + len, bufLen - len, "%s:%d, ", pCfg->nodeInfo[i].nodeFqdn, pCfg->nodeInfo[i].nodePort); - } else { - len += snprintf(buf + len, bufLen - len, "%s:%d}", pCfg->nodeInfo[i].nodeFqdn, pCfg->nodeInfo[i].nodePort); + len += snprintf(buf + len, bufLen - len, "%s", ", "); } } + len += snprintf(buf + len, bufLen - len, "%s", "]}"); } void syncUtilNodeInfo2EpSet(const SNodeInfo* pInfo, SEpSet* pEpSet) { @@ -89,32 +89,55 @@ bool syncUtilUserRollback(tmsg_t msgType) { return msgType != TDMT_SYNC_NOOP && // for leader static void syncHearbeatReplyTime2Str(SSyncNode* pSyncNode, char* buf, int32_t bufLen) { - int32_t len = 5; - + int32_t len = 0; + len += snprintf(buf + len, bufLen - len, "%s", "{"); for (int32_t i = 0; i < pSyncNode->replicaNum; ++i) { int64_t tsMs = syncIndexMgrGetRecvTime(pSyncNode->pMatchIndex, &(pSyncNode->replicasId[i])); - + len += snprintf(buf + len, bufLen - len, "%d:%" PRId64, i, tsMs); if (i < pSyncNode->replicaNum - 1) { - len += snprintf(buf + len, bufLen - len, "%d:%" PRId64 ",", i, tsMs); - } else { - len += snprintf(buf + len, bufLen - len, "%d:%" PRId64 "}", i, tsMs); + len += snprintf(buf + len, bufLen - len, "%s", ","); } } + len += snprintf(buf + len, bufLen - len, "%s", "}"); } // for follower static void syncHearbeatTime2Str(SSyncNode* pSyncNode, char* buf, int32_t bufLen) { - int32_t len = 4; - + int32_t len = 0; + len += snprintf(buf + len, bufLen - len, "%s", "{"); for (int32_t i = 0; i < pSyncNode->replicaNum; ++i) { int64_t tsMs = syncIndexMgrGetRecvTime(pSyncNode->pNextIndex, &(pSyncNode->replicasId[i])); - + len += snprintf(buf + len, bufLen - len, "%d:%" PRId64, i, tsMs); if (i < pSyncNode->replicaNum - 1) { - len += snprintf(buf + len, bufLen - len, "%d:%" PRId64 ",", i, tsMs); - } else { - len += snprintf(buf + len, bufLen - len, "%d:%" PRId64 "}", i, tsMs); + len += snprintf(buf + len, bufLen - len, "%s", ","); } } + len += snprintf(buf + len, bufLen - len, "%s", "}"); +} + +static void syncLogBufferStates2Str(SSyncNode* pSyncNode, char* buf, int32_t bufLen) { + SSyncLogBuffer* pBuf = pSyncNode->pLogBuf; + if (pBuf == NULL) { + return; + } + int len = 0; + len += snprintf(buf + len, bufLen - len, "[%" PRId64 " %" PRId64 " %" PRId64 ", %" PRId64 ")", pBuf->startIndex, + pBuf->commitIndex, pBuf->matchIndex, pBuf->endIndex); +} + +static void syncLogReplMgrStates2Str(SSyncNode* pSyncNode, char* buf, int32_t bufLen) { + int len = 0; + len += snprintf(buf + len, bufLen - len, "%s", "{"); + for (int32_t i = 0; i < pSyncNode->replicaNum; i++) { + SSyncLogReplMgr* pMgr = pSyncNode->logReplMgrs[i]; + if (pMgr == NULL) break; + len += snprintf(buf + len, bufLen - len, "%d:%d [%" PRId64 " %" PRId64 ", %" PRId64 ")", i, pMgr->restored, + pMgr->startIndex, pMgr->matchIndex, pMgr->endIndex); + if (i + 1 < pSyncNode->replicaNum) { + len += snprintf(buf + len, bufLen - len, "%s", ", "); + } + } + len += snprintf(buf + len, bufLen - len, "%s", "}"); } static void syncPeerState2Str(SSyncNode* pSyncNode, char* buf, int32_t bufLen) { @@ -156,16 +179,19 @@ void syncPrintNodeLog(const char* flags, ELogLevel level, int32_t dflag, SSyncNo int32_t cacheHit = pNode->pLogStore->cacheHit; int32_t cacheMiss = pNode->pLogStore->cacheMiss; - char cfgStr[1024]; + char cfgStr[1024] = ""; syncCfg2SimpleStr(&pNode->raftCfg.cfg, cfgStr, sizeof(cfgStr)); - char peerStr[1024] = "{"; - syncPeerState2Str(pNode, peerStr, sizeof(peerStr)); + char replMgrStatesStr[1024] = ""; + syncLogReplMgrStates2Str(pNode, replMgrStatesStr, sizeof(replMgrStatesStr)); - char hbrTimeStr[256] = "hbr:{"; + char bufferStatesStr[256] = ""; + syncLogBufferStates2Str(pNode, bufferStatesStr, sizeof(bufferStatesStr)); + + char hbrTimeStr[256] = ""; syncHearbeatReplyTime2Str(pNode, hbrTimeStr, sizeof(hbrTimeStr)); - char hbTimeStr[256] = "hb:{"; + char hbTimeStr[256] = ""; syncHearbeatTime2Str(pNode, hbTimeStr, sizeof(hbTimeStr)); char eventLog[512]; // {0}; @@ -181,21 +207,21 @@ void syncPrintNodeLog(const char* flags, ELogLevel level, int32_t dflag, SSyncNo // restore error code terrno = errCode; - + if (pNode != NULL) { taosPrintLog(flags, level, dflag, "vgId:%d, %s, sync:%s, term:%" PRIu64 ", commit-index:%" PRId64 ", first-ver:%" PRId64 ", last-ver:%" PRId64 ", min:%" PRId64 ", snap:%" PRId64 ", snap-term:%" PRIu64 - ", elect-times:%d, as-leader-times:%d, cfg-ch-times:%d, hit:%d, mis:%d, hb-slow:%d, hbr-slow:%d, " + ", elect-times:%d, as-leader-times:%d, cfg-ch-times:%d, hb-slow:%d, hbr-slow:%d, " "aq-items:%d, snaping:%" PRId64 ", replicas:%d, last-cfg:%" PRId64 - ", chging:%d, restore:%d, quorum:%d, elect-lc-timer:%" PRId64 ", hb:%" PRId64 ", %s, %s, %s, %s", + ", chging:%d, restore:%d, quorum:%d, elect-lc-timer:%" PRId64 ", hb:%" PRId64 + ", buffer:%s, repl-mgrs:%s, members:%s, hb:%s, hb-reply:%s", pNode->vgId, eventLog, syncStr(pNode->state), currentTerm, pNode->commitIndex, logBeginIndex, logLastIndex, pNode->minMatchIndex, snapshot.lastApplyIndex, snapshot.lastApplyTerm, pNode->electNum, - pNode->becomeLeaderNum, pNode->configChangeNum, cacheHit, cacheMiss, pNode->hbSlowNum, - pNode->hbrSlowNum, aqItems, pNode->snapshottingIndex, pNode->replicaNum, - pNode->raftCfg.lastConfigIndex, pNode->changing, pNode->restoreFinish, syncNodeDynamicQuorum(pNode), - pNode->electTimerLogicClock, pNode->heartbeatTimerLogicClockUser, peerStr, cfgStr, hbTimeStr, - hbrTimeStr); + pNode->becomeLeaderNum, pNode->configChangeNum, pNode->hbSlowNum, pNode->hbrSlowNum, aqItems, + pNode->snapshottingIndex, pNode->replicaNum, pNode->raftCfg.lastConfigIndex, pNode->changing, + pNode->restoreFinish, syncNodeDynamicQuorum(pNode), pNode->electTimerLogicClock, pNode->heartbeatTimerLogicClockUser, + bufferStatesStr, replMgrStatesStr, cfgStr, hbTimeStr, hbrTimeStr); } } @@ -216,7 +242,7 @@ void syncPrintSnapshotSenderLog(const char* flags, ELogLevel level, int32_t dfla logBeginIndex = pNode->pLogStore->syncLogBeginIndex(pNode->pLogStore); } - char cfgStr[1024]; + char cfgStr[1024] = ""; syncCfg2SimpleStr(&pNode->raftCfg.cfg, cfgStr, sizeof(cfgStr)); char peerStr[1024] = "{"; @@ -262,7 +288,7 @@ void syncPrintSnapshotReceiverLog(const char* flags, ELogLevel level, int32_t df logBeginIndex = pNode->pLogStore->syncLogBeginIndex(pNode->pLogStore); } - char cfgStr[1024]; + char cfgStr[1024] = ""; syncCfg2SimpleStr(&pNode->raftCfg.cfg, cfgStr, sizeof(cfgStr)); char peerStr[1024] = "{"; @@ -304,7 +330,7 @@ void syncLogRecvTimer(SSyncNode* pSyncNode, const SyncTimeout* pMsg, const char* void syncLogRecvLocalCmd(SSyncNode* pSyncNode, const SyncLocalCmd* pMsg, const char* s) { sNTrace(pSyncNode, "recv sync-local-cmd {cmd:%d-%s, sd-new-term:%" PRId64 ", fc-index:%" PRId64 "}, %s", pMsg->cmd, - syncLocalCmdGetStr(pMsg->cmd), pMsg->sdNewTerm, pMsg->fcIndex, s); + syncLocalCmdGetStr(pMsg->cmd), pMsg->currentTerm, pMsg->commitIndex, s); } void syncLogSendAppendEntriesReply(SSyncNode* pSyncNode, const SyncAppendEntriesReply* pMsg, const char* s) { diff --git a/source/libs/tdb/inc/tdb.h b/source/libs/tdb/inc/tdb.h index 10a99bb1fa..0e20941b3a 100644 --- a/source/libs/tdb/inc/tdb.h +++ b/source/libs/tdb/inc/tdb.h @@ -74,7 +74,12 @@ int32_t tdbTbcUpsert(TBC *pTbc, const void *pKey, int nKey, const void *pData, i int32_t tdbTxnOpen(TXN *pTxn, int64_t txnid, void *(*xMalloc)(void *, size_t), void (*xFree)(void *, void *), void *xArg, int flags); -int32_t tdbTxnClose(TXN *pTxn); +int32_t tdbTxnCloseImpl(TXN *pTxn); +#define tdbTxnClose(pTxn) \ + do { \ + tdbTxnCloseImpl(pTxn); \ + (pTxn) = NULL; \ + } while (0) // other void tdbFree(void *); diff --git a/source/libs/tdb/src/db/tdbTxn.c b/source/libs/tdb/src/db/tdbTxn.c index bc23fdb759..0aeed3c140 100644 --- a/source/libs/tdb/src/db/tdbTxn.c +++ b/source/libs/tdb/src/db/tdbTxn.c @@ -31,13 +31,18 @@ int tdbTxnOpen(TXN *pTxn, int64_t txnid, void *(*xMalloc)(void *, size_t), void return 0; } -int tdbTxnClose(TXN *pTxn) { +int tdbTxnCloseImpl(TXN *pTxn) { if (pTxn) { if (pTxn->jPageSet) { hashset_destroy(pTxn->jPageSet); pTxn->jPageSet = NULL; } + if (pTxn->jfd) { + tdbOsClose(pTxn->jfd); + ASSERT(pTxn->jfd == NULL); + } + tdbOsFree(pTxn); } diff --git a/source/libs/transport/inc/transComm.h b/source/libs/transport/inc/transComm.h index bf9a6c0051..5f964f6b1a 100644 --- a/source/libs/transport/inc/transComm.h +++ b/source/libs/transport/inc/transComm.h @@ -100,14 +100,7 @@ typedef void* queue[2]; #define TRANS_READ_TIMEOUT 3000 // read timeout (ms) #define TRANS_PACKET_LIMIT 1024 * 1024 * 512 -#define TRANS_MAGIC_NUM 0x5f375a86 - -#define TRANS_NOVALID_PACKET(src) ((src) != TRANS_MAGIC_NUM ? 1 : 0) - -#define TRANS_PACKET_LIMIT 1024 * 1024 * 512 - -#define TRANS_MAGIC_NUM 0x5f375a86 - +#define TRANS_MAGIC_NUM 0x5f375a86 #define TRANS_NOVALID_PACKET(src) ((src) != TRANS_MAGIC_NUM ? 1 : 0) typedef SRpcMsg STransMsg; diff --git a/source/libs/transport/src/thttp.c b/source/libs/transport/src/thttp.c index 00854b5ee5..cd508f6fe9 100644 --- a/source/libs/transport/src/thttp.c +++ b/source/libs/transport/src/thttp.c @@ -35,6 +35,7 @@ typedef struct SHttpModule { typedef struct SHttpMsg { queue q; char* server; + char* uri; int32_t port; char* cont; int32_t len; @@ -63,26 +64,26 @@ static void httpHandleReq(SHttpMsg* msg); static void httpHandleQuit(SHttpMsg* msg); static int32_t httpSendQuit(); -static int32_t taosSendHttpReportImpl(const char* server, uint16_t port, char* pCont, int32_t contLen, +static int32_t taosSendHttpReportImpl(const char* server, const char* uri, uint16_t port, char* pCont, int32_t contLen, EHttpCompFlag flag); -static int32_t taosBuildHttpHeader(const char* server, int32_t contLen, char* pHead, int32_t headLen, +static int32_t taosBuildHttpHeader(const char* server, const char* uri, int32_t contLen, char* pHead, int32_t headLen, EHttpCompFlag flag) { if (flag == HTTP_FLAT) { return snprintf(pHead, headLen, - "POST /report HTTP/1.1\n" + "POST %s HTTP/1.1\n" "Host: %s\n" "Content-Type: application/json\n" "Content-Length: %d\n\n", - server, contLen); + uri, server, contLen); } else if (flag == HTTP_GZIP) { return snprintf(pHead, headLen, - "POST /report HTTP/1.1\n" + "POST %s HTTP/1.1\n" "Host: %s\n" "Content-Type: application/json\n" "Content-Encoding: gzip\n" "Content-Length: %d\n\n", - server, contLen); + uri, server, contLen); } else { terrno = TSDB_CODE_INVALID_CFG; return -1; @@ -181,6 +182,7 @@ static void httpDestroyMsg(SHttpMsg* msg) { if (msg == NULL) return; taosMemoryFree(msg->server); + taosMemoryFree(msg->uri); taosMemoryFree(msg->cont); taosMemoryFree(msg); } @@ -293,10 +295,11 @@ int32_t httpSendQuit() { return 0; } -static int32_t taosSendHttpReportImpl(const char* server, uint16_t port, char* pCont, int32_t contLen, +static int32_t taosSendHttpReportImpl(const char* server, const char* uri, uint16_t port, char* pCont, int32_t contLen, EHttpCompFlag flag) { SHttpMsg* msg = taosMemoryMalloc(sizeof(SHttpMsg)); msg->server = strdup(server); + msg->uri = strdup(uri); msg->port = port; msg->cont = taosMemoryMalloc(contLen); memcpy(msg->cont, pCont, contLen); @@ -309,12 +312,10 @@ static int32_t taosSendHttpReportImpl(const char* server, uint16_t port, char* p httpDestroyMsg(msg); tError("http-report already released"); return -1; - } else { - msg->http = load; - transAsyncSend(load->asyncPool, &(msg->q)); } - - return 0; + + msg->http = load; + return transAsyncSend(load->asyncPool, &(msg->q)); } static void httpDestroyClientCb(uv_handle_t* handle) { @@ -360,7 +361,7 @@ static void httpHandleReq(SHttpMsg* msg) { int32_t len = 2048; char* header = taosMemoryCalloc(1, len); - int32_t headLen = taosBuildHttpHeader(msg->server, msg->len, header, len, msg->flag); + int32_t headLen = taosBuildHttpHeader(msg->server, msg->uri, msg->len, header, len, msg->flag); if (headLen < 0) { taosMemoryFree(header); goto END; @@ -380,6 +381,7 @@ static void httpHandleReq(SHttpMsg* msg) { cli->port = msg->port; cli->dest = dest; + taosMemoryFree(msg->uri); taosMemoryFree(msg); uv_tcp_init(http->loop, &cli->tcp); @@ -406,9 +408,9 @@ END: httpDestroyMsg(msg); } -int32_t taosSendHttpReport(const char* server, uint16_t port, char* pCont, int32_t contLen, EHttpCompFlag flag) { +int32_t taosSendHttpReport(const char* server, const char* uri, uint16_t port, char* pCont, int32_t contLen, EHttpCompFlag flag) { taosThreadOnce(&transHttpInit, transHttpEnvInit); - return taosSendHttpReportImpl(server, port, pCont, contLen, flag); + return taosSendHttpReportImpl(server, uri, port, pCont, contLen, flag); } static void transHttpEnvInit() { diff --git a/source/libs/transport/src/transCli.c b/source/libs/transport/src/transCli.c index 2632d290e9..1a99db5f99 100644 --- a/source/libs/transport/src/transCli.c +++ b/source/libs/transport/src/transCli.c @@ -1126,7 +1126,7 @@ void cliHandleReq(SCliMsg* pMsg, SCliThrd* pThrd) { int ret = uv_tcp_connect(&conn->connReq, (uv_tcp_t*)(conn->stream), (const struct sockaddr*)&addr, cliConnCb); if (ret != 0) { - tGTrace("%s conn %p failed to connect to %s:%d, reason:%s", pTransInst->label, conn, conn->ip, conn->port, + tGError("%s conn %p failed to connect to %s:%d, reason:%s", pTransInst->label, conn, conn->ip, conn->port, uv_err_name(ret)); uv_timer_stop(conn->timer); diff --git a/source/libs/wal/src/walMeta.c b/source/libs/wal/src/walMeta.c index da8cf24627..4a0570bee3 100644 --- a/source/libs/wal/src/walMeta.c +++ b/source/libs/wal/src/walMeta.c @@ -289,19 +289,10 @@ void walAlignVersions(SWal* pWal) { } pWal->vers.lastVer = pWal->vers.snapshotVer; } - if (pWal->vers.commitVer < pWal->vers.snapshotVer) { - wWarn("vgId:%d, commitVer:%" PRId64 " is less than snapshotVer:%" PRId64 ". align with it.", pWal->cfg.vgId, - pWal->vers.commitVer, pWal->vers.snapshotVer); - pWal->vers.commitVer = pWal->vers.snapshotVer; - } - if (pWal->vers.appliedVer < pWal->vers.snapshotVer) { - wWarn("vgId:%d, appliedVer:%" PRId64 " is less than snapshotVer:%" PRId64 ". align with it.", pWal->cfg.vgId, - pWal->vers.appliedVer, pWal->vers.snapshotVer); - pWal->vers.appliedVer = pWal->vers.snapshotVer; - } - - pWal->vers.commitVer = TMIN(pWal->vers.lastVer, pWal->vers.commitVer); - pWal->vers.appliedVer = TMIN(pWal->vers.commitVer, pWal->vers.appliedVer); + // reset commitVer and appliedVer + pWal->vers.commitVer = pWal->vers.snapshotVer; + pWal->vers.appliedVer = pWal->vers.snapshotVer; + wInfo("vgId:%d, reset commitVer to %" PRId64, pWal->cfg.vgId, pWal->vers.commitVer); } bool walLogEntriesComplete(const SWal* pWal) { diff --git a/source/libs/wal/src/walWrite.c b/source/libs/wal/src/walWrite.c index 4233c089a4..f49c9a53af 100644 --- a/source/libs/wal/src/walWrite.c +++ b/source/libs/wal/src/walWrite.c @@ -103,7 +103,7 @@ int32_t walRollback(SWal *pWal, int64_t ver) { wInfo("vgId:%d, wal rollback for version %" PRId64, pWal->cfg.vgId, ver); int64_t code; char fnameStr[WAL_FILE_LEN]; - if (ver > pWal->vers.lastVer || ver < pWal->vers.commitVer || ver <= pWal->vers.snapshotVer) { + if (ver > pWal->vers.lastVer || ver <= pWal->vers.commitVer || ver <= pWal->vers.snapshotVer) { terrno = TSDB_CODE_WAL_INVALID_VER; taosThreadMutexUnlock(&pWal->mutex); return -1; diff --git a/source/os/test/osTests.cpp b/source/os/test/osTests.cpp index f831f457f9..2e24bb0526 100644 --- a/source/os/test/osTests.cpp +++ b/source/os/test/osTests.cpp @@ -33,7 +33,7 @@ TEST(osTest, osSystem) { const char *flags = "UTL FATAL "; ELogLevel level = DEBUG_FATAL; int32_t dflag = 255; // tsLogEmbedded ? 255 : uDebugFlag - taosPrintTrace(flags, level, dflag); + taosPrintTrace(flags, level, dflag, 0); } void fileOperateOnFree(void *param) { diff --git a/source/util/src/tlog.c b/source/util/src/tlog.c index 53d0cad5ea..34ad9ae6bc 100644 --- a/source/util/src/tlog.c +++ b/source/util/src/tlog.c @@ -18,6 +18,8 @@ #include "os.h" #include "tconfig.h" #include "tutil.h" +#include "tjson.h" +#include "tglobal.h" #define LOG_MAX_LINE_SIZE (1024) #define LOG_MAX_LINE_BUFFER_SIZE (LOG_MAX_LINE_SIZE + 3) @@ -808,7 +810,7 @@ bool taosAssertDebug(bool condition, const char *file, int32_t line, const char taosPrintLogImp(1, 255, buffer, len); taosPrintLog(flags, level, dflag, "tAssert at file %s:%d exit:%d", file, line, tsAssert); - taosPrintTrace(flags, level, dflag); + taosPrintTrace(flags, level, dflag, -1); if (tsAssert) { // taosCloseLog(); @@ -824,6 +826,216 @@ bool taosAssertDebug(bool condition, const char *file, int32_t line, const char return true; } +int32_t taosGenCrashJsonMsg(int signum, char** pMsg, int64_t clusterId, int64_t startTime) { + SJson* pJson = tjsonCreateObject(); + if (pJson == NULL) return -1; + char tmp[4096] = {0}; + + tjsonAddDoubleToObject(pJson, "reportVersion", 1); + + tjsonAddIntegerToObject(pJson, "clusterId", clusterId); + tjsonAddIntegerToObject(pJson, "startTime", startTime); + + taosGetFqdn(tmp); + tjsonAddStringToObject(pJson, "fqdn", tmp); + + tjsonAddIntegerToObject(pJson, "pid", taosGetPId()); + + taosGetAppName(tmp, NULL); + tjsonAddStringToObject(pJson, "appName", tmp); + + if (taosGetOsReleaseName(tmp, sizeof(tmp)) == 0) { + tjsonAddStringToObject(pJson, "os", tmp); + } + + float numOfCores = 0; + if (taosGetCpuInfo(tmp, sizeof(tmp), &numOfCores) == 0) { + tjsonAddStringToObject(pJson, "cpuModel", tmp); + tjsonAddDoubleToObject(pJson, "numOfCpu", numOfCores); + } else { + tjsonAddDoubleToObject(pJson, "numOfCpu", tsNumOfCores); + } + + snprintf(tmp, sizeof(tmp), "%" PRId64 " kB", tsTotalMemoryKB); + tjsonAddStringToObject(pJson, "memory", tmp); + + tjsonAddStringToObject(pJson, "version", version); + tjsonAddStringToObject(pJson, "buildInfo", buildinfo); + tjsonAddStringToObject(pJson, "gitInfo", gitinfo); + + tjsonAddIntegerToObject(pJson, "crashSig", signum); + tjsonAddIntegerToObject(pJson, "crashTs", taosGetTimestampUs()); + +#ifdef _TD_DARWIN_64 + taosLogTraceToBuf(tmp, sizeof(tmp), 4); +#elif !defined(WINDOWS) + taosLogTraceToBuf(tmp, sizeof(tmp), 3); +#else + taosLogTraceToBuf(tmp, sizeof(tmp), 8); +#endif + + tjsonAddStringToObject(pJson, "stackInfo", tmp); + + char* pCont = tjsonToString(pJson); + tjsonDelete(pJson); + + *pMsg = pCont; + + return TSDB_CODE_SUCCESS; +} + + +void taosLogCrashInfo(char* nodeType, char* pMsg, int64_t msgLen, int signum, void *sigInfo) { + const char *flags = "UTL FATAL "; + ELogLevel level = DEBUG_FATAL; + int32_t dflag = 255; + char filepath[PATH_MAX] = {0}; + TdFilePtr pFile = NULL; + + if (pMsg && msgLen > 0) { + snprintf(filepath, sizeof(filepath), "%s%s.%sCrashLog", tsLogDir, TD_DIRSEP, nodeType); + + pFile = taosOpenFile(filepath, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_APPEND); + if (pFile == NULL) { + taosPrintLog(flags, level, dflag, "failed to open file:%s since %s", filepath, terrstr()); + goto _return; + } + + taosLockFile(pFile); + + int64_t writeSize = taosWriteFile(pFile, &msgLen, sizeof(msgLen)); + if (sizeof(msgLen) != writeSize) { + taosUnLockFile(pFile); + taosPrintLog(flags, level, dflag, "failed to write len to file:%s,%p wlen:%" PRId64 " tlen:%lu since %s", + filepath, pFile, writeSize, sizeof(msgLen), terrstr()); + goto _return; + } + + writeSize = taosWriteFile(pFile, pMsg, msgLen); + if (msgLen != writeSize) { + taosUnLockFile(pFile); + taosPrintLog(flags, level, dflag, "failed to write file:%s,%p wlen:%" PRId64 " tlen:%" PRId64 " since %s", + filepath, pFile, writeSize, msgLen, terrstr()); + goto _return; + } + + taosUnLockFile(pFile); + } + +_return: + + if (pFile) taosCloseFile(&pFile); + + terrno = TAOS_SYSTEM_ERROR(errno); + taosPrintLog(flags, level, dflag, "crash signal is %d", signum); + +#ifdef _TD_DARWIN_64 + taosPrintTrace(flags, level, dflag, 4); +#elif !defined(WINDOWS) + taosPrintLog(flags, level, dflag, "sender PID:%d cmdline:%s", ((siginfo_t *)sigInfo)->si_pid, + taosGetCmdlineByPID(((siginfo_t *)sigInfo)->si_pid)); + taosPrintTrace(flags, level, dflag, 3); +#else + taosPrintTrace(flags, level, dflag, 8); +#endif + + taosMemoryFree(pMsg); +} + +void taosReadCrashInfo(char* filepath, char** pMsg, int64_t* pMsgLen, TdFilePtr* pFd) { + const char *flags = "UTL FATAL "; + ELogLevel level = DEBUG_FATAL; + int32_t dflag = 255; + TdFilePtr pFile = NULL; + bool truncateFile = false; + char* buf = NULL; + + if (NULL == *pFd) { + int64_t filesize = 0; + if (taosStatFile(filepath, &filesize, NULL) < 0) { + if (ENOENT == errno) { + return; + } + + terrno = TAOS_SYSTEM_ERROR(errno); + taosPrintLog(flags, level, dflag, "failed to stat file:%s since %s", filepath, terrstr()); + return; + } + + if (filesize <= 0) { + return; + } + + pFile = taosOpenFile(filepath, TD_FILE_READ|TD_FILE_WRITE); + if (pFile == NULL) { + if (ENOENT == errno) { + return; + } + + terrno = TAOS_SYSTEM_ERROR(errno); + taosPrintLog(flags, level, dflag, "failed to open file:%s since %s", filepath, terrstr()); + return; + } + + taosLockFile(pFile); + } else { + pFile = *pFd; + } + + int64_t msgLen = 0; + int64_t readSize = taosReadFile(pFile, &msgLen, sizeof(msgLen)); + if (sizeof(msgLen) != readSize) { + truncateFile = true; + if (readSize < 0) { + taosPrintLog(flags, level, dflag, "failed to read len from file:%s,%p wlen:%" PRId64 " tlen:%lu since %s", + filepath, pFile, readSize, sizeof(msgLen), terrstr()); + } + goto _return; + } + + buf = taosMemoryMalloc(msgLen); + if (NULL == buf) { + taosPrintLog(flags, level, dflag, "failed to malloc buf, size:%" PRId64, msgLen); + goto _return; + } + + readSize = taosReadFile(pFile, buf, msgLen); + if (msgLen != readSize) { + truncateFile = true; + taosPrintLog(flags, level, dflag, "failed to read file:%s,%p wlen:%" PRId64 " tlen:%" PRId64 " since %s", + filepath, pFile, readSize, msgLen, terrstr()); + goto _return; + } + + *pMsg = buf; + *pMsgLen = msgLen; + *pFd = pFile; + + return; + +_return: + + if (truncateFile) { + taosFtruncateFile(pFile, 0); + } + taosUnLockFile(pFile); + taosCloseFile(&pFile); + taosMemoryFree(buf); + + *pMsg = NULL; + *pMsgLen = 0; + *pFd = NULL; +} + +void taosReleaseCrashLogFile(TdFilePtr pFile, bool truncateFile) { + if (truncateFile) { + taosFtruncateFile(pFile, 0); + } + + taosUnLockFile(pFile); + taosCloseFile(&pFile); +} + #ifdef NDEBUG bool taosAssertRelease(bool condition) { if (condition) return false; @@ -833,7 +1045,7 @@ bool taosAssertRelease(bool condition) { int32_t dflag = 255; // tsLogEmbedded ? 255 : uDebugFlag taosPrintLog(flags, level, dflag, "tAssert called in release mode, exit:%d", tsAssert); - taosPrintTrace(flags, level, dflag); + taosPrintTrace(flags, level, dflag, 0); if (tsAssert) { taosMsleep(300); @@ -842,4 +1054,4 @@ bool taosAssertRelease(bool condition) { return true; } -#endif \ No newline at end of file +#endif diff --git a/tests/script/tsim/parser/regressiontest.sim b/tests/script/tsim/parser/regressiontest.sim new file mode 100644 index 0000000000..98cb0248a1 --- /dev/null +++ b/tests/script/tsim/parser/regressiontest.sim @@ -0,0 +1,61 @@ +system sh/stop_dnodes.sh +system sh/deploy.sh -n dnode1 -i 1 +system sh/exec.sh -n dnode1 -s start +sql connect + +$dbPrefix = reg_db +$tb = tb +$rowNum = 8200 + +$ts0 = 1537146000000 +$delta = 100 +print ========== reg.sim +$i = 0 +$db = $dbPrefix . $i + +sql drop database if exists $db -x step1 +step1: +sql create database $db vgroups 1; + +sql use $db +sql create table $tb (ts timestamp, c1 int) + +$i = 0 +$ts = $ts0 + +$x = 0 +while $x < $rowNum +$xs = $x * $delta +$ts = $ts0 + $xs +sql insert into $tb values ( $ts , $x ) +$x = $x + 1 +endw + +print ================== restart server to commit data into disk +system sh/exec.sh -n dnode1 -s stop -x SIGINT +system sh/exec.sh -n dnode1 -s start +print ================== server restart completed +sql connect + +sql use $db +sql delete from $tb where ts=1537146000000 +sql delete from $tb where ts=1537146409500 + +print =========================> TS-2410 +sql select * from $tb limit 20 offset 4090 +print $data00 +print $data10 +print $data20 +print $data30 +print $data40 +print $data50 +print $data60 +print $data70 +print $data80 +print $data90 + +if $data40 != @18-09-17 09:06:49.600@ then + return -1 +endi + +system sh/exec.sh -n dnode1 -s stop -x SIGINT diff --git a/tests/script/tsim/sma/rsmaCreateInsertQuery.sim b/tests/script/tsim/sma/rsmaCreateInsertQuery.sim index 04cf09715c..508e6f88c1 100644 --- a/tests/script/tsim/sma/rsmaCreateInsertQuery.sim +++ b/tests/script/tsim/sma/rsmaCreateInsertQuery.sim @@ -82,8 +82,8 @@ endi #=================================================================== - #==================== reboot to trigger commit data to file +sql flush database d0; system sh/exec.sh -n dnode1 -s stop -x SIGINT system sh/exec.sh -n dnode1 -s start diff --git a/tests/script/tsim/sma/rsmaPersistenceRecovery.sim b/tests/script/tsim/sma/rsmaPersistenceRecovery.sim index faff48b61c..4117a2403d 100644 --- a/tests/script/tsim/sma/rsmaPersistenceRecovery.sim +++ b/tests/script/tsim/sma/rsmaPersistenceRecovery.sim @@ -85,6 +85,7 @@ endi #==================== reboot to trigger commit data to file +sql flush database d0; system sh/exec.sh -n dnode1 -s stop -x SIGINT system sh/exec.sh -n dnode1 -s start