diff --git a/cmake/cmake.define b/cmake/cmake.define index 44b36d0efa..7710c071eb 100644 --- a/cmake/cmake.define +++ b/cmake/cmake.define @@ -97,7 +97,15 @@ ENDIF() SET(JEMALLOC_ENABLED OFF) IF (TD_WINDOWS) MESSAGE("${Yellow} set compiler flag for Windows! ${ColourReset}") - SET(COMMON_FLAGS "/w /D_WIN32 /DWIN32 /Zi /MTd") + IF (${CMAKE_BUILD_TYPE} MATCHES "Release") + MESSAGE("${Green} will build Release version! ${ColourReset}") + SET(COMMON_FLAGS "/W3 /D_WIN32 /DWIN32 /Zi- /O2 /GL /MD") + + ELSE () + MESSAGE("${Green} will build Debug version! ${ColourReset}") + SET(COMMON_FLAGS "/w /D_WIN32 /DWIN32 /Zi /MTd") + ENDIF() + SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} /MANIFEST:NO") # IF (MSVC AND (MSVC_VERSION GREATER_EQUAL 1900)) # SET(COMMON_FLAGS "${COMMON_FLAGS} /Wv:18") diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index c5715bd53f..1b0a091e9d 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -317,7 +317,8 @@ if (${BUILD_WITH_ROCKSDB}) SET(CMAKE_BUILD_TYPE Release) endif() endif(${TD_LINUX}) - MESSAGE(STATUS "CXXXX STATUS CONFIG: " ${CMAKE_CXX_FLAGS}) + MESSAGE(STATUS "ROCKSDB CXXXX STATUS CONFIG: " ${CMAKE_CXX_FLAGS}) + MESSAGE(STATUS "ROCKSDB C STATUS CONFIG: " ${CMAKE_C_FLAGS}) if(${TD_DARWIN}) SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-error=maybe-uninitialized") @@ -329,8 +330,12 @@ if (${BUILD_WITH_ROCKSDB}) if (${TD_WINDOWS}) SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4244 /wd4819") + SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /wd4244 /wd4819") option(WITH_JNI "" OFF) - option(WITH_MD_LIBRARY "build with MD" OFF) + if(CMAKE_C_FLAGS MATCHES "/MT" OR CMAKE_C_FLAGS MATCHES "/MTd") + message("Rocksdb build runtime lib use /MT or /MTd") + option(WITH_MD_LIBRARY "build with MD" OFF) + endif() set(SYSTEM_LIBS ${SYSTEM_LIBS} shlwapi.lib rpcrt4.lib) endif(${TD_WINDOWS}) @@ -361,9 +366,11 @@ if (${BUILD_WITH_ROCKSDB}) ) else() if (NOT ${TD_LINUX}) - MESSAGE(STATUS "CXXXX STATUS CONFIG: " ${CMAKE_CXX_FLAGS}) + MESSAGE(STATUS "ROCKSDB CXX STATUS CONFIG: " ${CMAKE_CXX_FLAGS}) + MESSAGE(STATUS "ROCKSDB C STATUS CONFIG: " ${CMAKE_C_FLAGS}) if(${TD_DARWIN}) SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-error=maybe-uninitialized") + SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-error=maybe-uninitialized") endif(${TD_DARWIN}) if (${TD_DARWIN_ARM64}) @@ -372,8 +379,12 @@ if (${BUILD_WITH_ROCKSDB}) if (${TD_WINDOWS}) SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4244 /wd4819") + SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /wd4244 /wd4819") option(WITH_JNI "" OFF) - option(WITH_MD_LIBRARY "build with MD" OFF) + if(CMAKE_C_FLAGS MATCHES "/MT" OR CMAKE_C_FLAGS MATCHES "/MTd") + message("Rocksdb build runtime lib use /MT or /MTd") + option(WITH_MD_LIBRARY "build with MD" OFF) + endif() set(SYSTEM_LIBS ${SYSTEM_LIBS} shlwapi.lib rpcrt4.lib) endif(${TD_WINDOWS}) @@ -456,7 +467,9 @@ endif(${BUILD_WITH_NURAFT}) # pthread if(${BUILD_PTHREAD}) - set(CMAKE_BUILD_TYPE debug) + if ("${CMAKE_BUILD_TYPE}" STREQUAL "") + SET(CMAKE_BUILD_TYPE Release) + endif() add_definitions(-DPTW32_STATIC_LIB) add_subdirectory(pthread EXCLUDE_FROM_ALL) set_target_properties(libpthreadVC3 PROPERTIES OUTPUT_NAME pthread) @@ -640,13 +653,18 @@ if(${BUILD_GEOS}) if(${TD_LINUX}) set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS_REL}") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS_REL}") - IF ("${CMAKE_BUILD_TYPE}" STREQUAL "") + if ("${CMAKE_BUILD_TYPE}" STREQUAL "") SET(CMAKE_BUILD_TYPE Release) endif() endif(${TD_LINUX}) option(BUILD_SHARED_LIBS "Build GEOS with shared libraries" OFF) add_subdirectory(geos EXCLUDE_FROM_ALL) - unset(CMAKE_CXX_STANDARD CACHE) # undo libgeos's setting of global CMAKE_CXX_STANDARD + if (${TD_WINDOWS}) + SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") + SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS}") + else () + unset(CMAKE_CXX_STANDARD CACHE) # undo libgeos's setting of global CMAKE_CXX_STANDARD + endif(${TD_WINDOWS}) target_include_directories( geos_c PUBLIC $ diff --git a/include/common/tmsg.h b/include/common/tmsg.h index 86d34502c6..e17a72992c 100644 --- a/include/common/tmsg.h +++ b/include/common/tmsg.h @@ -3112,7 +3112,7 @@ typedef struct { int32_t tSerializeSMDropStreamReq(void* buf, int32_t bufLen, const SMDropStreamReq* pReq); int32_t tDeserializeSMDropStreamReq(void* buf, int32_t bufLen, SMDropStreamReq* pReq); -void tFreeSMDropStreamReq(SMDropStreamReq* pReq); +void tFreeMDropStreamReq(SMDropStreamReq* pReq); typedef struct { char name[TSDB_STREAM_FNAME_LEN]; diff --git a/include/dnode/snode/snode.h b/include/dnode/snode/snode.h index e8c64b07c4..c3dfd3a611 100644 --- a/include/dnode/snode/snode.h +++ b/include/dnode/snode/snode.h @@ -45,6 +45,7 @@ typedef struct { */ SSnode *sndOpen(const char *path, const SSnodeOpt *pOption); +int32_t sndInit(SSnode * pSnode); /** * @brief Stop Snode in Dnode. * diff --git a/include/dnode/vnode/stream.h b/include/dnode/vnode/stream.h new file mode 100644 index 0000000000..6d86847542 --- /dev/null +++ b/include/dnode/vnode/stream.h @@ -0,0 +1,18 @@ +// +// Created by mingming wanng on 2023/11/15. +// + +#ifndef TDENGINE_STREAM_H +#define TDENGINE_STREAM_H + +#define STREAM_EXEC_EXTRACT_DATA_IN_WAL_ID (-1) +#define STREAM_EXEC_START_ALL_TASKS_ID (-2) +#define STREAM_EXEC_RESTART_ALL_TASKS_ID (-3) + +typedef struct STaskUpdateEntry { + int64_t streamId; + int32_t taskId; + int32_t transId; +} STaskUpdateEntry; + +#endif // TDENGINE_STREAM_H diff --git a/include/libs/executor/storageapi.h b/include/libs/executor/storageapi.h index d5f1da957d..045f2bad70 100644 --- a/include/libs/executor/storageapi.h +++ b/include/libs/executor/storageapi.h @@ -150,19 +150,6 @@ typedef struct { int32_t colNum; } SMetaStbStats; -// void tqReaderSetColIdList(STqReader *pReader, SArray *pColIdList); -// int32_t tqReaderSetTbUidList(STqReader *pReader, const SArray *tbUidList); -// int32_t tqReaderAddTbUidList(STqReader *pReader, const SArray *pTableUidList); -// int32_t tqReaderRemoveTbUidList(STqReader *pReader, const SArray *tbUidList); -// bool tqReaderIsQueriedTable(STqReader* pReader, uint64_t uid); -// bool tqCurrentBlockConsumed(const STqReader* pReader); -// int32_t tqReaderSeek(STqReader *pReader, int64_t ver, const char *id); -// bool tqNextBlockInWal(STqReader* pReader, const char* idstr); -// bool tqNextBlockImpl(STqReader *pReader, const char* idstr); -// int32_t getTableInfoFromSnapshot(SSnapContext *ctx, void **pBuf, int32_t *contLen, int16_t *type, int64_t -// *uid); SMetaTableInfo getMetaTableInfoFromSnapshot(SSnapContext *ctx); int32_t setForSnapShot(SSnapContext -// *ctx, int64_t uid); int32_t destroySnapContext(SSnapContext *ctx); - // clang-format off /*-------------------------------------------------new api format---------------------------------------------------*/ typedef struct TsdReader { @@ -197,27 +184,6 @@ typedef struct SStoreCacheReader { // clang-format on /*------------------------------------------------------------------------------------------------------------------*/ -/* -void tqReaderSetColIdList(STqReader *pReader, SArray *pColIdList); -int32_t tqReaderSetTbUidList(STqReader *pReader, const SArray *tbUidList); -int32_t tqReaderAddTbUidList(STqReader *pReader, const SArray *pTableUidList); -int32_t tqReaderRemoveTbUidList(STqReader *pReader, const SArray *tbUidList); -bool tqReaderIsQueriedTable(STqReader* pReader, uint64_t uid); -bool tqCurrentBlockConsumed(const STqReader* pReader); - -int32_t tqReaderSeek(STqReader *pReader, int64_t ver, const char *id); -bool tqNextBlockInWal(STqReader* pReader, const char* idstr); -bool tqNextBlockImpl(STqReader *pReader, const char* idstr); - - int32_t tqRetrieveDataBlock(STqReader *pReader, SSDataBlock **pRes, const char* idstr); -STqReader *tqReaderOpen(void *pVnode); -void tqReaderClose(STqReader *); - -int32_t tqReaderSetSubmitMsg(STqReader *pReader, void *msgStr, int32_t msgLen, int64_t ver); -bool tqNextDataBlockFilterOut(STqReader *pReader, SHashObj *filterOutUids); -SWalReader* tqGetWalReader(STqReader* pReader); -int32_t tqRetrieveTaosxBlock(STqReader *pReader, SArray *blocks, SArray *schemas, SSubmitTbData **pSubmitTbDataRet); -*/ // todo rename typedef struct SStoreTqReader { struct STqReader* (*tqReaderOpen)(); @@ -281,28 +247,18 @@ typedef struct SStoreMeta { void* (*storeGetIndexInfo)(); void* (*getInvertIndex)(void* pVnode); - int32_t (*getChildTableList)( - void* pVnode, int64_t suid, - SArray* list); // support filter and non-filter cases. [vnodeGetCtbIdList & vnodeGetCtbIdListByFilter] - int32_t (*storeGetTableList)(void* pVnode, int8_t type, SArray* pList); // vnodeGetStbIdList & vnodeGetAllTableList + // support filter and non-filter cases. [vnodeGetCtbIdList & vnodeGetCtbIdListByFilter] + int32_t (*getChildTableList)( void* pVnode, int64_t suid, SArray* list); + int32_t (*storeGetTableList)(void* pVnode, int8_t type, SArray* pList); void* storeGetVersionRange; void* storeGetLastTimestamp; int32_t (*getTableSchema)(void* pVnode, int64_t uid, STSchema** pSchema, int64_t* suid); // tsdbGetTableSchema + int32_t (*getNumOfChildTables)( void* pVnode, int64_t uid, int64_t* numOfTables, int32_t* numOfCols); + void (*getBasicInfo)(void* pVnode, const char** dbname, int32_t* vgId, int64_t* numOfTables, int64_t* numOfNormalTables); - // db name, vgId, numOfTables, numOfSTables - int32_t (*getNumOfChildTables)( - void* pVnode, int64_t uid, int64_t* numOfTables, - int32_t* numOfCols); // int32_t metaGetStbStats(SMeta *pMeta, int64_t uid, SMetaStbStats *pInfo); - void (*getBasicInfo)(void* pVnode, const char** dbname, int32_t* vgId, int64_t* numOfTables, - int64_t* numOfNormalTables); // vnodeGetInfo(void *pVnode, const char **dbname, int32_t *vgId) & - // metaGetTbNum(SMeta *pMeta) & metaGetNtbNum(SMeta *pMeta); int64_t (*getNumOfRowsInMem)(void* pVnode); - /** -int32_t vnodeGetCtbIdList(void *pVnode, int64_t suid, SArray *list); -int32_t vnodeGetCtbIdListByFilter(void *pVnode, int64_t suid, SArray *list, bool (*filter)(void *arg), void *arg); -int32_t vnodeGetStbIdList(void *pVnode, int64_t suid, SArray *list); - */ + SMCtbCursor* (*openCtbCursor)(void *pVnode, tb_uid_t uid, int lock); int32_t (*resumeCtbCursor)(SMCtbCursor* pCtbCur, int8_t first); void (*pauseCtbCursor)(SMCtbCursor* pCtbCur); diff --git a/include/libs/stream/tstream.h b/include/libs/stream/tstream.h index 6e191e412d..bbec0028d7 100644 --- a/include/libs/stream/tstream.h +++ b/include/libs/stream/tstream.h @@ -305,6 +305,7 @@ typedef struct SCheckpointInfo { int64_t processedVer; // already processed ver, that has generated results version. int64_t nextProcessVer; // current offset in WAL, not serialize it int64_t failedId; // record the latest failed checkpoint id + bool dispatchCheckpointTrigger; } SCheckpointInfo; typedef struct SStreamStatus { @@ -590,7 +591,7 @@ typedef struct { int32_t downstreamNodeId; int32_t downstreamTaskId; int32_t childId; - int32_t oldStage; + int64_t oldStage; int8_t status; } SStreamTaskCheckRsp; @@ -655,7 +656,7 @@ int32_t tDecodeStreamCheckpointReadyMsg(SDecoder* pDecoder, SStreamCheckpointRea typedef struct STaskStatusEntry { STaskId id; int32_t status; - int32_t stage; + int64_t stage; int32_t nodeId; int64_t verStart; // start version in WAL, only valid for source task int64_t verEnd; // end version in WAL, only valid for source task @@ -758,7 +759,7 @@ void initRpcMsg(SRpcMsg* pMsg, int32_t msgType, void* pCont, int32_t contLen) // recover and fill history void streamTaskCheckDownstream(SStreamTask* pTask); -int32_t streamTaskCheckStatus(SStreamTask* pTask, int32_t upstreamTaskId, int32_t vgId, int64_t stage); +int32_t streamTaskCheckStatus(SStreamTask* pTask, int32_t upstreamTaskId, int32_t vgId, int64_t stage, int64_t* oldStage); int32_t streamTaskUpdateEpsetInfo(SStreamTask* pTask, SArray* pNodeList); void streamTaskResetUpstreamStageInfo(SStreamTask* pTask); bool streamTaskAllUpstreamClosed(SStreamTask* pTask); @@ -787,7 +788,6 @@ int32_t streamQueueGetNumOfItems(const SStreamQueue* pQueue); int32_t streamRestoreParam(SStreamTask* pTask); void streamTaskPause(SStreamTask* pTask, SStreamMeta* pMeta); void streamTaskResume(SStreamTask* pTask); -void streamTaskEnablePause(SStreamTask* pTask); int32_t streamTaskSetUpstreamInfo(SStreamTask* pTask, const SStreamTask* pUpstreamTask); void streamTaskUpdateUpstreamInfo(SStreamTask* pTask, int32_t nodeId, const SEpSet* pEpSet); void streamTaskUpdateDownstreamInfo(SStreamTask* pTask, int32_t nodeId, const SEpSet* pEpSet); @@ -829,7 +829,6 @@ int32_t streamMetaLoadAllTasks(SStreamMeta* pMeta); int64_t streamMetaGetLatestCheckpointId(SStreamMeta* pMeta); void streamMetaNotifyClose(SStreamMeta* pMeta); void streamMetaStartHb(SStreamMeta* pMeta); -void streamMetaInitForSnode(SStreamMeta* pMeta); bool streamMetaTaskInTimer(SStreamMeta* pMeta); int32_t streamMetaUpdateTaskDownstreamStatus(SStreamTask* pTask, int64_t startTs, int64_t endTs, bool succ); void streamMetaRLock(SStreamMeta* pMeta); diff --git a/include/os/osFile.h b/include/os/osFile.h index 63483dc906..e409936468 100644 --- a/include/os/osFile.h +++ b/include/os/osFile.h @@ -54,15 +54,16 @@ extern "C" { typedef struct TdFile *TdFilePtr; -#define TD_FILE_CREATE 0x0001 -#define TD_FILE_WRITE 0x0002 -#define TD_FILE_READ 0x0004 -#define TD_FILE_TRUNC 0x0008 -#define TD_FILE_APPEND 0x0010 -#define TD_FILE_TEXT 0x0020 -#define TD_FILE_AUTO_DEL 0x0040 -#define TD_FILE_EXCL 0x0080 -#define TD_FILE_STREAM 0x0100 // Only support taosFprintfFile, taosGetLineFile, taosEOFFile +#define TD_FILE_CREATE 0x0001 +#define TD_FILE_WRITE 0x0002 +#define TD_FILE_READ 0x0004 +#define TD_FILE_TRUNC 0x0008 +#define TD_FILE_APPEND 0x0010 +#define TD_FILE_TEXT 0x0020 +#define TD_FILE_AUTO_DEL 0x0040 +#define TD_FILE_EXCL 0x0080 +#define TD_FILE_STREAM 0x0100 // Only support taosFprintfFile, taosGetLineFile, taosEOFFile +#define TD_FILE_WRITE_THROUGH 0x0200 TdFilePtr taosOpenFile(const char *path, int32_t tdFileOptions); TdFilePtr taosCreateFile(const char *path, int32_t tdFileOptions); diff --git a/include/util/tlog.h b/include/util/tlog.h index a6d146a79e..6d393bfefb 100644 --- a/include/util/tlog.h +++ b/include/util/tlog.h @@ -66,6 +66,7 @@ extern int32_t udfDebugFlag; extern int32_t smaDebugFlag; extern int32_t idxDebugFlag; extern int32_t tdbDebugFlag; +extern int32_t sndDebugFlag; int32_t taosInitLog(const char *logName, int32_t maxFiles); void taosCloseLog(); diff --git a/source/common/src/systable.c b/source/common/src/systable.c index a1f8d74571..89995fc326 100644 --- a/source/common/src/systable.c +++ b/source/common/src/systable.c @@ -167,7 +167,7 @@ static const SSysDbTableSchema streamTaskSchema[] = { {.name = "node_id", .bytes = 4, .type = TSDB_DATA_TYPE_INT, .sysInfo = false}, {.name = "level", .bytes = 10 + VARSTR_HEADER_SIZE, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = false}, {.name = "status", .bytes = 15 + VARSTR_HEADER_SIZE, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = false}, - {.name = "stage", .bytes = 4, .type = TSDB_DATA_TYPE_INT, .sysInfo = false}, + {.name = "stage", .bytes = 8, .type = TSDB_DATA_TYPE_BIGINT, .sysInfo = false}, {.name = "in_queue", .bytes = 20, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = false}, // {.name = "out_queue", .bytes = 20, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = false}, {.name = "info", .bytes = 25, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = false}, diff --git a/source/common/src/tdataformat.c b/source/common/src/tdataformat.c index d220da0d84..f97f9c0c11 100644 --- a/source/common/src/tdataformat.c +++ b/source/common/src/tdataformat.c @@ -3590,9 +3590,9 @@ void (*tColDataCalcSMA[])(SColData *pColData, int64_t *sum, int64_t *max, int64_ tColDataCalcSMAUInt, // TSDB_DATA_TYPE_UINT tColDataCalcSMAUBigInt, // TSDB_DATA_TYPE_UBIGINT tColDataCalcSMAVarType, // TSDB_DATA_TYPE_JSON - NULL, // TSDB_DATA_TYPE_VARBINARY - NULL, // TSDB_DATA_TYPE_DECIMAL - NULL, // TSDB_DATA_TYPE_BLOB + tColDataCalcSMAVarType, // TSDB_DATA_TYPE_VARBINARY + tColDataCalcSMAVarType, // TSDB_DATA_TYPE_DECIMAL + tColDataCalcSMAVarType, // TSDB_DATA_TYPE_BLOB NULL, // TSDB_DATA_TYPE_MEDIUMBLOB tColDataCalcSMAVarType // TSDB_DATA_TYPE_GEOMETRY }; diff --git a/source/common/src/tglobal.c b/source/common/src/tglobal.c index f5df2fef21..ebdf3df720 100644 --- a/source/common/src/tglobal.c +++ b/source/common/src/tglobal.c @@ -354,11 +354,16 @@ static int32_t taosLoadCfg(SConfig *pCfg, const char **envCmd, const char *input char cfgFile[PATH_MAX + 100] = {0}; taosExpandDir(inputCfgDir, cfgDir, PATH_MAX); + char lastC = cfgDir[strlen(cfgDir) - 1]; + char *tdDirsep = TD_DIRSEP; + if (lastC == '\\' || lastC == '/') { + tdDirsep = ""; + } if (taosIsDir(cfgDir)) { #ifdef CUS_PROMPT - snprintf(cfgFile, sizeof(cfgFile), "%s" TD_DIRSEP "%s.cfg", cfgDir, CUS_PROMPT); + snprintf(cfgFile, sizeof(cfgFile), "%s" "%s" "%s.cfg", cfgDir, tdDirsep, CUS_PROMPT); #else - snprintf(cfgFile, sizeof(cfgFile), "%s" TD_DIRSEP "taos.cfg", cfgDir); + snprintf(cfgFile, sizeof(cfgFile), "%s" "%s" "taos.cfg", cfgDir, tdDirsep); #endif } else { tstrncpy(cfgFile, cfgDir, sizeof(cfgDir)); @@ -431,6 +436,7 @@ static int32_t taosAddServerLogCfg(SConfig *pCfg) { if (cfgAddInt32(pCfg, "tdbDebugFlag", tdbDebugFlag, 0, 255, CFG_SCOPE_SERVER, CFG_DYN_SERVER) != 0) return -1; if (cfgAddInt32(pCfg, "metaDebugFlag", metaDebugFlag, 0, 255, CFG_SCOPE_SERVER, CFG_DYN_SERVER) != 0) return -1; if (cfgAddInt32(pCfg, "stDebugFlag", stDebugFlag, 0, 255, CFG_SCOPE_SERVER, CFG_DYN_SERVER) != 0) return -1; + if (cfgAddInt32(pCfg, "sndDebugFlag", sndDebugFlag, 0, 255, CFG_SCOPE_SERVER, CFG_DYN_SERVER) != 0) return -1; return 0; } @@ -951,6 +957,7 @@ static void taosSetServerLogCfg(SConfig *pCfg) { tdbDebugFlag = cfgGetItem(pCfg, "tdbDebugFlag")->i32; metaDebugFlag = cfgGetItem(pCfg, "metaDebugFlag")->i32; stDebugFlag = cfgGetItem(pCfg, "stDebugFlag")->i32; + sndDebugFlag = cfgGetItem(pCfg, "sndDebugFlag")->i32; } static int32_t taosSetSlowLogScope(char *pScope) { @@ -1424,7 +1431,7 @@ static int32_t taosCfgDynamicOptionsForServer(SConfig *pCfg, char *name) { {"smaDebugFlag", &smaDebugFlag}, {"idxDebugFlag", &idxDebugFlag}, {"tdbDebugFlag", &tdbDebugFlag}, {"tmrDebugFlag", &tmrDebugFlag}, {"uDebugFlag", &uDebugFlag}, {"smaDebugFlag", &smaDebugFlag}, {"rpcDebugFlag", &rpcDebugFlag}, {"qDebugFlag", &qDebugFlag}, {"metaDebugFlag", &metaDebugFlag}, - {"jniDebugFlag", &jniDebugFlag}, {"stDebugFlag", &stDebugFlag}, + {"jniDebugFlag", &jniDebugFlag}, {"stDebugFlag", &stDebugFlag}, {"sndDebugFlag", &sndDebugFlag}, }; static OptionNameAndVar options[] = { @@ -1732,6 +1739,7 @@ void taosSetAllDebugFlag(int32_t flag, bool rewrite) { taosSetDebugFlag(&tdbDebugFlag, "tdbDebugFlag", flag, rewrite); taosSetDebugFlag(&metaDebugFlag, "metaDebugFlag", flag, rewrite); taosSetDebugFlag(&stDebugFlag, "stDebugFlag", flag, rewrite); + taosSetDebugFlag(&sndDebugFlag, "sndDebugFlag", flag, rewrite); uInfo("all debug flag are set to %d", flag); } diff --git a/source/common/src/tmsg.c b/source/common/src/tmsg.c index dc3ba7934f..01b1df9d5f 100644 --- a/source/common/src/tmsg.c +++ b/source/common/src/tmsg.c @@ -7152,7 +7152,7 @@ int32_t tDeserializeSMDropStreamReq(void *buf, int32_t bufLen, SMDropStreamReq * return 0; } -void tFreeSMDropStreamReq(SMDropStreamReq *pReq) { +void tFreeMDropStreamReq(SMDropStreamReq *pReq) { FREESQL(); } diff --git a/source/dnode/mgmt/mgmt_mnode/src/mmFile.c b/source/dnode/mgmt/mgmt_mnode/src/mmFile.c index 64e18ef06d..27baa5ede5 100644 --- a/source/dnode/mgmt/mgmt_mnode/src/mmFile.c +++ b/source/dnode/mgmt/mgmt_mnode/src/mmFile.c @@ -169,7 +169,7 @@ int32_t mmWriteFile(const char *path, const SMnodeOpt *pOption) { if (buffer == NULL) goto _OVER; terrno = 0; - pFile = taosOpenFile(file, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_TRUNC); + pFile = taosOpenFile(file, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_TRUNC | TD_FILE_WRITE_THROUGH); if (pFile == NULL) goto _OVER; int32_t len = strlen(buffer); diff --git a/source/dnode/mgmt/mgmt_snode/src/smHandle.c b/source/dnode/mgmt/mgmt_snode/src/smHandle.c index b29c5c1eb4..cd81b9873f 100644 --- a/source/dnode/mgmt/mgmt_snode/src/smHandle.c +++ b/source/dnode/mgmt/mgmt_snode/src/smHandle.c @@ -73,6 +73,7 @@ SArray *smGetMsgHandles() { SArray *pArray = taosArrayInit(4, sizeof(SMgmtHandle)); if (pArray == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_VND_STREAM_TASK_UPDATE, smPutNodeMsgToMgmtQueue, 1) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_DEPLOY, smPutNodeMsgToMgmtQueue, 1) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_DROP, smPutNodeMsgToMgmtQueue, 1) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_RUN, smPutNodeMsgToStreamQueue, 1) == NULL) goto _OVER; @@ -87,7 +88,8 @@ SArray *smGetMsgHandles() { if (dmSetMgmtHandle(pArray, TDMT_VND_STREAM_TASK_CHECK_RSP, smPutNodeMsgToStreamQueue, 1) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_STREAM_SCAN_HISTORY_FINISH, smPutNodeMsgToStreamQueue, 1) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_STREAM_SCAN_HISTORY_FINISH_RSP, smPutNodeMsgToStreamQueue, 1) == NULL) goto _OVER; - if (dmSetMgmtHandle(pArray, TDMT_VND_STREAM_CHECK_POINT_SOURCE, smPutNodeMsgToMgmtQueue, 1) == NULL) goto _OVER; + if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_CHECKPOINT_READY, smPutNodeMsgToStreamQueue, 1) == NULL) goto _OVER; + code = 0; _OVER: diff --git a/source/dnode/mgmt/mgmt_snode/src/smInt.c b/source/dnode/mgmt/mgmt_snode/src/smInt.c index 47c2993014..56744e4654 100644 --- a/source/dnode/mgmt/mgmt_snode/src/smInt.c +++ b/source/dnode/mgmt/mgmt_snode/src/smInt.c @@ -76,9 +76,14 @@ int32_t smOpen(SMgmtInputOpt *pInput, SMgmtOutputOpt *pOutput) { return 0; } +static int32_t smStartSnodes(SSnodeMgmt *pMgmt) { + return sndInit(pMgmt->pSnode); +} + SMgmtFunc smGetMgmtFunc() { SMgmtFunc mgmtFunc = {0}; mgmtFunc.openFp = smOpen; + mgmtFunc.startFp = (NodeStartFp)smStartSnodes; mgmtFunc.closeFp = (NodeCloseFp)smClose; mgmtFunc.createFp = (NodeCreateFp)smProcessCreateReq; mgmtFunc.dropFp = (NodeDropFp)smProcessDropReq; diff --git a/source/dnode/mgmt/mgmt_vnode/src/vmFile.c b/source/dnode/mgmt/mgmt_vnode/src/vmFile.c index ed32e75d18..53139330a3 100644 --- a/source/dnode/mgmt/mgmt_vnode/src/vmFile.c +++ b/source/dnode/mgmt/mgmt_vnode/src/vmFile.c @@ -200,7 +200,7 @@ int32_t vmWriteVnodeListToFile(SVnodeMgmt *pMgmt) { if (buffer == NULL) goto _OVER; terrno = 0; - pFile = taosOpenFile(file, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_TRUNC); + pFile = taosOpenFile(file, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_TRUNC | TD_FILE_WRITE_THROUGH); if (pFile == NULL) goto _OVER; int32_t len = strlen(buffer); diff --git a/source/dnode/mgmt/node_util/src/dmEps.c b/source/dnode/mgmt/node_util/src/dmEps.c index 3e948678a4..bee77528bd 100644 --- a/source/dnode/mgmt/node_util/src/dmEps.c +++ b/source/dnode/mgmt/node_util/src/dmEps.c @@ -232,7 +232,7 @@ int32_t dmWriteEps(SDnodeData *pData) { if (buffer == NULL) goto _OVER; terrno = 0; - pFile = taosOpenFile(file, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_TRUNC); + pFile = taosOpenFile(file, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_TRUNC | TD_FILE_WRITE_THROUGH); if (pFile == NULL) goto _OVER; int32_t len = strlen(buffer); diff --git a/source/dnode/mgmt/node_util/src/dmFile.c b/source/dnode/mgmt/node_util/src/dmFile.c index c81efddcc1..03c6734e0c 100644 --- a/source/dnode/mgmt/node_util/src/dmFile.c +++ b/source/dnode/mgmt/node_util/src/dmFile.c @@ -120,7 +120,7 @@ int32_t dmWriteFile(const char *path, const char *name, bool deployed) { if (buffer == NULL) goto _OVER; terrno = 0; - pFile = taosOpenFile(file, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_TRUNC); + pFile = taosOpenFile(file, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_TRUNC | TD_FILE_WRITE_THROUGH); if (pFile == NULL) goto _OVER; int32_t len = strlen(buffer); diff --git a/source/dnode/mnode/impl/inc/mndDef.h b/source/dnode/mnode/impl/inc/mndDef.h index fc9086eebf..27fef1e81e 100644 --- a/source/dnode/mnode/impl/inc/mndDef.h +++ b/source/dnode/mnode/impl/inc/mndDef.h @@ -651,8 +651,7 @@ typedef struct SStreamConf { } SStreamConf; typedef struct { - char name[TSDB_STREAM_FNAME_LEN]; - // ctl + char name[TSDB_STREAM_FNAME_LEN]; SRWLatch lock; // create info diff --git a/source/dnode/mnode/impl/inc/mndStream.h b/source/dnode/mnode/impl/inc/mndStream.h index 19fd2a3fd4..244a6d08dd 100644 --- a/source/dnode/mnode/impl/inc/mndStream.h +++ b/source/dnode/mnode/impl/inc/mndStream.h @@ -22,17 +22,37 @@ extern "C" { #endif -int32_t mndInitStream(SMnode *pMnode); -void mndCleanupStream(SMnode *pMnode); +typedef struct SStreamTransInfo { + int64_t startTime; + int32_t transId; + const char *name; +} SStreamTransInfo; +typedef struct SStreamTransMgmt { + SHashObj *pDBTrans; +} SStreamTransMgmt; + +typedef struct SStreamExecInfo { + SArray *pNodeList; + int64_t ts; // snapshot ts + SStreamTransMgmt transMgmt; + int64_t activeCheckpoint; // active check point id + SHashObj * pTaskMap; + SArray * pTaskList; + TdThreadMutex lock; +} SStreamExecInfo; + +extern SStreamExecInfo execInfo; + +int32_t mndInitStream(SMnode *pMnode); +void mndCleanupStream(SMnode *pMnode); SStreamObj *mndAcquireStream(SMnode *pMnode, char *streamName); void mndReleaseStream(SMnode *pMnode, SStreamObj *pStream); +int32_t mndDropStreamByDb(SMnode *pMnode, STrans *pTrans, SDbObj *pDb); +int32_t mndPersistStream(SMnode *pMnode, STrans *pTrans, SStreamObj *pStream); -SSdbRaw *mndStreamActionEncode(SStreamObj *pStream); -SSdbRow *mndStreamActionDecode(SSdbRaw *pRaw); - -int32_t mndDropStreamByDb(SMnode *pMnode, STrans *pTrans, SDbObj *pDb); -int32_t mndPersistStream(SMnode *pMnode, STrans *pTrans, SStreamObj *pStream); +int32_t mndStreamRegisterTrans(STrans* pTrans, const char* pName, const char* pSrcDb, const char* pDstDb); +bool streamTransConflictOtherTrans(SMnode *pMnode, const char *pSrcDb, const char *pDstDb); // for sma // TODO refactor diff --git a/source/dnode/mnode/impl/src/mndDb.c b/source/dnode/mnode/impl/src/mndDb.c index 54f89f9bc7..077c0a9c2a 100644 --- a/source/dnode/mnode/impl/src/mndDb.c +++ b/source/dnode/mnode/impl/src/mndDb.c @@ -919,7 +919,7 @@ static int32_t mndSetAlterDbRedoActions(SMnode *pMnode, STrans *pTrans, SDbObj * if (pIter == NULL) break; if (mndVgroupInDb(pVgroup, pNewDb->uid)) { - if (mndBuildRaftAlterVgroupAction(pMnode, pTrans, pOldDb, pNewDb, pVgroup, pArray) != 0) { + if (mndBuildAlterVgroupAction(pMnode, pTrans, pOldDb, pNewDb, pVgroup, pArray) != 0) { sdbCancelFetch(pSdb, pIter); sdbRelease(pSdb, pVgroup); taosArrayDestroy(pArray); diff --git a/source/dnode/mnode/impl/src/mndDump.c b/source/dnode/mnode/impl/src/mndDump.c index 481495cbe5..5efebbc16e 100644 --- a/source/dnode/mnode/impl/src/mndDump.c +++ b/source/dnode/mnode/impl/src/mndDump.c @@ -605,7 +605,7 @@ void mndDumpSdb() { char *pCont = tjsonToString(json); int32_t contLen = strlen(pCont); char file[] = "sdb.json"; - TdFilePtr pFile = taosOpenFile(file, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_TRUNC); + TdFilePtr pFile = taosOpenFile(file, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_TRUNC| TD_FILE_WRITE_THROUGH); if (pFile == NULL) { terrno = TAOS_SYSTEM_ERROR(errno); mError("failed to write %s since %s", file, terrstr()); diff --git a/source/dnode/mnode/impl/src/mndScheduler.c b/source/dnode/mnode/impl/src/mndScheduler.c index 88a06cb513..3ef4c9a4d2 100644 --- a/source/dnode/mnode/impl/src/mndScheduler.c +++ b/source/dnode/mnode/impl/src/mndScheduler.c @@ -28,7 +28,7 @@ extern bool tsDeployOnSnode; static int32_t doAddSinkTask(SStreamObj* pStream, SArray* pTaskList, SMnode* pMnode, int32_t vgId, SVgObj* pVgroup, - SEpSet* pEpset, bool isFillhistory); + SEpSet* pEpset, bool isFillhistory); int32_t mndConvertRsmaTask(char** pDst, int32_t* pDstLen, const char* ast, int64_t uid, int8_t triggerType, int64_t watermark, int64_t deleteMark) { diff --git a/source/dnode/mnode/impl/src/mndStream.c b/source/dnode/mnode/impl/src/mndStream.c index 02d401d924..d2f0a13038 100644 --- a/source/dnode/mnode/impl/src/mndStream.c +++ b/source/dnode/mnode/impl/src/mndStream.c @@ -34,7 +34,13 @@ #define MND_STREAM_VER_NUMBER 4 #define MND_STREAM_RESERVE_SIZE 64 #define MND_STREAM_MAX_NUM 60 -#define MND_STREAM_CHECKPOINT_NAME "stream-checkpoint" + +#define MND_STREAM_CHECKPOINT_NAME "stream-checkpoint" +#define MND_STREAM_PAUSE_NAME "stream-pause" +#define MND_STREAM_RESUME_NAME "stream-resume" +#define MND_STREAM_DROP_NAME "stream-drop" +#define MND_STREAM_TASK_RESET_NAME "stream-task-reset" +#define MND_STREAM_TASK_UPDATE_NAME "stream-task-update" typedef struct SNodeEntry { int32_t nodeId; @@ -43,22 +49,13 @@ typedef struct SNodeEntry { int64_t hbTimestamp; // second } SNodeEntry; -typedef struct SStreamExecInfo { - SArray *pNodeList; - int64_t ts; // snapshot ts - int64_t activeCheckpoint; // active check point id - SHashObj * pTaskMap; - SArray * pTaskList; - TdThreadMutex lock; -} SStreamExecInfo; - typedef struct SVgroupChangeInfo { SHashObj *pDBMap; SArray * pUpdateNodeList; // SArray } SVgroupChangeInfo; -static int32_t mndNodeCheckSentinel = 0; -static SStreamExecInfo execInfo; +static int32_t mndNodeCheckSentinel = 0; +SStreamExecInfo execInfo; static int32_t mndStreamActionInsert(SSdb *pSdb, SStreamObj *pStream); static int32_t mndStreamActionDelete(SSdb *pSdb, SStreamObj *pStream); @@ -83,17 +80,20 @@ static SArray *mndTakeVgroupSnapshot(SMnode *pMnode, bool *allReady); static SVgroupChangeInfo mndFindChangedNodeInfo(SMnode *pMnode, const SArray *pPrevNodeList, const SArray *pNodeList); -static STrans *doCreateTrans(SMnode *pMnode, SStreamObj *pStream, const char *name); +static STrans *doCreateTrans(SMnode *pMnode, SStreamObj *pStream, SRpcMsg *pReq, const char *name, const char* pMsg); static int32_t mndPersistTransLog(SStreamObj *pStream, STrans *pTrans); static void initTransAction(STransAction *pAction, void *pCont, int32_t contLen, int32_t msgType, const SEpSet *pEpset, int32_t retryCode); static int32_t createStreamUpdateTrans(SStreamObj *pStream, SVgroupChangeInfo *pInfo, STrans *pTrans); static void removeStreamTasksInBuf(SStreamObj *pStream, SStreamExecInfo *pExecNode); -static void keepStreamTasksInBuf(SStreamObj *pStream, SStreamExecInfo *pExecNode); +static void saveStreamTasksInfo(SStreamObj *pStream, SStreamExecInfo *pExecNode); static int32_t removeExpirednodeEntryAndTask(SArray *pNodeSnapshot); -static int32_t doKillActiveCheckpointTrans(SMnode *pMnode); +static int32_t killActiveCheckpointTrans(SMnode *pMnode, const char* pDbName, size_t len); static int32_t setNodeEpsetExpiredFlag(const SArray *pNodeList); +static SSdbRaw *mndStreamActionEncode(SStreamObj *pStream); +static SSdbRow *mndStreamActionDecode(SSdbRaw *pRaw); + int32_t mndInitStream(SMnode *pMnode) { SSdbTable table = { .sdbType = SDB_STREAM, @@ -133,8 +133,11 @@ int32_t mndInitStream(SMnode *pMnode) { mndAddShowFreeIterHandle(pMnode, TSDB_MGMT_TABLE_STREAM_TASKS, mndCancelGetNextStreamTask); taosThreadMutexInit(&execInfo.lock, NULL); - execInfo.pTaskMap = taosHashInit(64, taosGetDefaultHashFunction(TSDB_DATA_TYPE_VARCHAR), true, HASH_NO_LOCK); + _hash_fn_t fn = taosGetDefaultHashFunction(TSDB_DATA_TYPE_VARCHAR); + execInfo.pTaskList = taosArrayInit(4, sizeof(STaskId)); + execInfo.pTaskMap = taosHashInit(64, fn, true, HASH_NO_LOCK); + execInfo.transMgmt.pDBTrans = taosHashInit(32, fn, true, HASH_NO_LOCK); return sdbSetTable(pMnode->pSdb, table); } @@ -142,6 +145,7 @@ int32_t mndInitStream(SMnode *pMnode) { void mndCleanupStream(SMnode *pMnode) { taosArrayDestroy(execInfo.pTaskList); taosHashCleanup(execInfo.pTaskMap); + taosHashCleanup(execInfo.transMgmt.pDBTrans); taosThreadMutexDestroy(&execInfo.lock); mDebug("mnd stream exec info cleanup"); } @@ -335,7 +339,7 @@ static int32_t mndStreamGetPlanString(const char *ast, int8_t triggerType, int64 .pAstRoot = pAst, .topicQuery = false, .streamQuery = true, - .triggerType = triggerType == STREAM_TRIGGER_MAX_DELAY ? STREAM_TRIGGER_WINDOW_CLOSE : triggerType, + .triggerType = (triggerType == STREAM_TRIGGER_MAX_DELAY) ? STREAM_TRIGGER_WINDOW_CLOSE : triggerType, .watermark = watermark, }; code = qCreateQueryPlan(&cxt, &pPlan, NULL); @@ -720,6 +724,34 @@ int32_t mndDropStreamTasks(SMnode *pMnode, STrans *pTrans, SStreamObj *pStream) return 0; } +static int32_t checkForNumOfStreams(SMnode *pMnode, SStreamObj *pStreamObj) { // check for number of existed tasks + int32_t numOfStream = 0; + SStreamObj *pStream = NULL; + void *pIter = NULL; + + while ((pIter = sdbFetch(pMnode->pSdb, SDB_STREAM, pIter, (void **)&pStream)) != NULL) { + if (pStream->sourceDbUid == pStreamObj->sourceDbUid) { + ++numOfStream; + } + + sdbRelease(pMnode->pSdb, pStream); + + if (numOfStream > MND_STREAM_MAX_NUM) { + mError("too many streams, no more than %d for each database", MND_STREAM_MAX_NUM); + sdbCancelFetch(pMnode->pSdb, pIter); + return TSDB_CODE_MND_TOO_MANY_STREAMS; + } + + if (pStream->targetStbUid == pStreamObj->targetStbUid) { + mError("Cannot write the same stable as other stream:%s", pStream->name); + sdbCancelFetch(pMnode->pSdb, pIter); + return TSDB_CODE_MND_INVALID_TARGET_TABLE; + } + } + + return TSDB_CODE_SUCCESS; +} + static int32_t mndProcessCreateStreamReq(SRpcMsg *pReq) { SMnode * pMnode = pReq->info.node; int32_t code = -1; @@ -732,6 +764,7 @@ static int32_t mndProcessCreateStreamReq(SRpcMsg *pReq) { terrno = TSDB_CODE_INVALID_MSG; goto _OVER; } + #ifdef WINDOWS terrno = TSDB_CODE_MND_INVALID_PLATFORM; goto _OVER; @@ -772,42 +805,9 @@ static int32_t mndProcessCreateStreamReq(SRpcMsg *pReq) { goto _OVER; } - { - int32_t numOfStream = 0; - - SStreamObj *pStream = NULL; - void * pIter = NULL; - - while (1) { - pIter = sdbFetch(pMnode->pSdb, SDB_STREAM, pIter, (void **)&pStream); - if (pIter == NULL) { - if (numOfStream > MND_STREAM_MAX_NUM) { - mError("too many streams, no more than %d for each database", MND_STREAM_MAX_NUM); - terrno = TSDB_CODE_MND_TOO_MANY_STREAMS; - goto _OVER; - } - break; - } - - if (pStream->sourceDbUid == streamObj.sourceDbUid) { - ++numOfStream; - } - - sdbRelease(pMnode->pSdb, pStream); - if (numOfStream > MND_STREAM_MAX_NUM) { - mError("too many streams, no more than %d for each database", MND_STREAM_MAX_NUM); - terrno = TSDB_CODE_MND_TOO_MANY_STREAMS; - sdbCancelFetch(pMnode->pSdb, pIter); - goto _OVER; - } - - if (pStream->targetStbUid == streamObj.targetStbUid) { - mError("Cannot write the same stable as other stream:%s", pStream->name); - terrno = TSDB_CODE_MND_INVALID_TARGET_TABLE; - sdbCancelFetch(pMnode->pSdb, pIter); - goto _OVER; - } - } + code = checkForNumOfStreams(pMnode, &streamObj); + if (code != TSDB_CODE_SUCCESS) { + goto _OVER; } STrans *pTrans = mndTransCreate(pMnode, TRN_POLICY_ROLLBACK, TRN_CONFLICT_DB_INSIDE, pReq, "create-stream"); @@ -866,7 +866,7 @@ static int32_t mndProcessCreateStreamReq(SRpcMsg *pReq) { taosThreadMutexLock(&execInfo.lock); mDebug("stream tasks register into node list"); - keepStreamTasksInBuf(&streamObj, &execInfo); + saveStreamTasksInfo(&streamObj, &execInfo); taosThreadMutexUnlock(&execInfo.lock); code = TSDB_CODE_ACTION_IN_PROGRESS; @@ -893,7 +893,6 @@ _OVER: } mndReleaseStream(pMnode, pStream); - tFreeSCMCreateStreamReq(&createStreamReq); tFreeStreamObj(&streamObj); if(sql != NULL){ @@ -1268,7 +1267,7 @@ static int32_t mndProcessStreamDoCheckpoint(SRpcMsg *pReq) { SMStreamDoCheckpointMsg *pMsg = (SMStreamDoCheckpointMsg *)pReq->pCont; int64_t checkpointId = pMsg->checkpointId; - STrans *pTrans = mndTransCreate(pMnode, TRN_POLICY_RETRY, TRN_CONFLICT_DB, NULL, MND_STREAM_CHECKPOINT_NAME); + STrans *pTrans = mndTransCreate(pMnode, TRN_POLICY_RETRY, TRN_CONFLICT_DB_INSIDE, NULL, MND_STREAM_CHECKPOINT_NAME); if (pTrans == NULL) { mError("failed to trigger checkpoint, reason: %s", tstrerror(TSDB_CODE_OUT_OF_MEMORY)); return -1; @@ -1277,7 +1276,8 @@ static int32_t mndProcessStreamDoCheckpoint(SRpcMsg *pReq) { mDebug("start to trigger checkpoint, checkpointId: %" PRId64, checkpointId); const char *pDb = mndGetStreamDB(pMnode); - mndTransSetDbName(pTrans, pDb, "checkpoint"); + mndTransSetDbName(pTrans, pDb, pDb); + mndStreamRegisterTrans(pTrans, MND_STREAM_CHECKPOINT_NAME, pDb, pDb); taosMemoryFree((void *)pDb); if (mndTransCheckConflict(pMnode, pTrans) != 0) { @@ -1329,46 +1329,56 @@ static int32_t mndProcessDropStreamReq(SRpcMsg *pReq) { if (dropReq.igNotExists) { mInfo("stream:%s not exist, ignore not exist is set", dropReq.name); sdbRelease(pMnode->pSdb, pStream); - tFreeSMDropStreamReq(&dropReq); + tFreeMDropStreamReq(&dropReq); return 0; } else { terrno = TSDB_CODE_MND_STREAM_NOT_EXIST; mError("stream:%s not exist failed to drop", dropReq.name); - tFreeSMDropStreamReq(&dropReq); + tFreeMDropStreamReq(&dropReq); return -1; } } if (mndCheckDbPrivilegeByName(pMnode, pReq->info.conn.user, MND_OPER_WRITE_DB, pStream->targetDb) != 0) { sdbRelease(pMnode->pSdb, pStream); - tFreeSMDropStreamReq(&dropReq); + tFreeMDropStreamReq(&dropReq); return -1; } - STrans *pTrans = mndTransCreate(pMnode, TRN_POLICY_RETRY, TRN_CONFLICT_DB_INSIDE, pReq, "drop-stream"); + // check if it is conflict with other trans in both sourceDb and targetDb. + bool conflict = streamTransConflictOtherTrans(pMnode, pStream->sourceDb, pStream->targetDb); + if (conflict) { + sdbRelease(pMnode->pSdb, pStream); + tFreeMDropStreamReq(&dropReq); + return -1; + } + + STrans *pTrans = mndTransCreate(pMnode, TRN_POLICY_RETRY, TRN_CONFLICT_DB_INSIDE, pReq, MND_STREAM_DROP_NAME); if (pTrans == NULL) { mError("stream:%s, failed to drop since %s", dropReq.name, terrstr()); sdbRelease(pMnode->pSdb, pStream); - tFreeSMDropStreamReq(&dropReq); + tFreeMDropStreamReq(&dropReq); return -1; } - mInfo("trans:%d, used to drop stream:%s", pTrans->id, dropReq.name); + mInfo("trans:%d used to drop stream:%s", pTrans->id, dropReq.name); mndTransSetDbName(pTrans, pStream->sourceDb, pStream->targetDb); if (mndTransCheckConflict(pMnode, pTrans) != 0) { sdbRelease(pMnode->pSdb, pStream); mndTransDrop(pTrans); - tFreeSMDropStreamReq(&dropReq); + tFreeMDropStreamReq(&dropReq); return -1; } + int32_t code = mndStreamRegisterTrans(pTrans, MND_STREAM_DROP_NAME, pStream->sourceDb, pStream->targetDb); + // drop all tasks if (mndDropStreamTasks(pMnode, pTrans, pStream) < 0) { mError("stream:%s, failed to drop task since %s", dropReq.name, terrstr()); sdbRelease(pMnode->pSdb, pStream); mndTransDrop(pTrans); - tFreeSMDropStreamReq(&dropReq); + tFreeMDropStreamReq(&dropReq); return -1; } @@ -1376,7 +1386,7 @@ static int32_t mndProcessDropStreamReq(SRpcMsg *pReq) { if (mndPersistDropStreamLog(pMnode, pTrans, pStream) < 0) { sdbRelease(pMnode->pSdb, pStream); mndTransDrop(pTrans); - tFreeSMDropStreamReq(&dropReq); + tFreeMDropStreamReq(&dropReq); return -1; } @@ -1384,7 +1394,7 @@ static int32_t mndProcessDropStreamReq(SRpcMsg *pReq) { mError("trans:%d, failed to prepare drop stream trans since %s", pTrans->id, terrstr()); sdbRelease(pMnode->pSdb, pStream); mndTransDrop(pTrans); - tFreeSMDropStreamReq(&dropReq); + tFreeMDropStreamReq(&dropReq); return -1; } @@ -1392,13 +1402,12 @@ static int32_t mndProcessDropStreamReq(SRpcMsg *pReq) { SName name = {0}; tNameFromString(&name, dropReq.name, T_NAME_ACCT | T_NAME_DB | T_NAME_TABLE); - // reuse this function for stream auditRecord(pReq, pMnode->clusterId, "dropStream", "", name.dbname, dropReq.sql, dropReq.sqlLen); sdbRelease(pMnode->pSdb, pStream); mndTransDrop(pTrans); - tFreeSMDropStreamReq(&dropReq); + tFreeMDropStreamReq(&dropReq); return TSDB_CODE_ACTION_IN_PROGRESS; } @@ -1814,6 +1823,13 @@ static int32_t mndProcessPauseStreamReq(SRpcMsg *pReq) { return -1; } + // check if it is conflict with other trans in both sourceDb and targetDb. + bool conflict = streamTransConflictOtherTrans(pMnode, pStream->sourceDb, pStream->targetDb); + if (conflict) { + sdbRelease(pMnode->pSdb, pStream); + return -1; + } + bool updated = taskNodeIsUpdated(pMnode); if (updated) { mError("tasks are not ready for pause, node update detected"); @@ -1822,7 +1838,7 @@ static int32_t mndProcessPauseStreamReq(SRpcMsg *pReq) { STrans *pTrans = mndTransCreate(pMnode, TRN_POLICY_RETRY, TRN_CONFLICT_DB_INSIDE, pReq, "pause-stream"); if (pTrans == NULL) { - mError("stream:%s, failed to pause stream since %s", pauseReq.name, terrstr()); + mError("stream:%s failed to pause stream since %s", pauseReq.name, terrstr()); sdbRelease(pMnode->pSdb, pStream); return -1; } @@ -1836,7 +1852,9 @@ static int32_t mndProcessPauseStreamReq(SRpcMsg *pReq) { return -1; } - // pause all tasks + int32_t code = mndStreamRegisterTrans(pTrans, MND_STREAM_PAUSE_NAME, pStream->sourceDb, pStream->targetDb); + + // if nodeUpdate happened, not send pause trans if (mndPauseAllStreamTasks(pMnode, pTrans, pStream) < 0) { mError("stream:%s, failed to pause task since %s", pauseReq.name, terrstr()); sdbRelease(pMnode->pSdb, pStream); @@ -1940,13 +1958,21 @@ static int32_t mndProcessResumeStreamReq(SRpcMsg *pReq) { return -1; } - STrans *pTrans = mndTransCreate(pMnode, TRN_POLICY_RETRY, TRN_CONFLICT_DB_INSIDE, pReq, "pause-stream"); - if (pTrans == NULL) { - mError("stream:%s, failed to pause stream since %s", pauseReq.name, terrstr()); + // check if it is conflict with other trans in both sourceDb and targetDb. + bool conflict = streamTransConflictOtherTrans(pMnode, pStream->sourceDb, pStream->targetDb); + if (conflict) { sdbRelease(pMnode->pSdb, pStream); return -1; } - mInfo("trans:%d, used to pause stream:%s", pTrans->id, pauseReq.name); + + STrans *pTrans = mndTransCreate(pMnode, TRN_POLICY_RETRY, TRN_CONFLICT_DB_INSIDE, pReq, MND_STREAM_RESUME_NAME); + if (pTrans == NULL) { + mError("stream:%s, failed to resume stream since %s", pauseReq.name, terrstr()); + sdbRelease(pMnode->pSdb, pStream); + return -1; + } + + mInfo("trans:%d used to resume stream:%s", pTrans->id, pauseReq.name); mndTransSetDbName(pTrans, pStream->sourceDb, pStream->targetDb); if (mndTransCheckConflict(pMnode, pTrans) != 0) { @@ -1955,6 +1981,8 @@ static int32_t mndProcessResumeStreamReq(SRpcMsg *pReq) { return -1; } + int32_t code = mndStreamRegisterTrans(pTrans, MND_STREAM_RESUME_NAME, pStream->sourceDb, pStream->targetDb); + // resume all tasks if (mndResumeAllStreamTasks(pTrans, pMnode, pStream, pauseReq.igUntreated) < 0) { mError("stream:%s, failed to drop task since %s", pauseReq.name, terrstr()); @@ -2145,6 +2173,9 @@ static SVgroupChangeInfo mndFindChangedNodeInfo(SMnode *pMnode, const SArray *pP epsetAssign(&updateInfo.newEp, &pCurrent->epset); taosArrayPush(info.pUpdateNodeList, &updateInfo); + + } + if(pCurrent->nodeId != SNODE_HANDLE){ SVgObj *pVgroup = mndAcquireVgroup(pMnode, pCurrent->nodeId); taosHashPut(info.pDBMap, pVgroup->dbName, strlen(pVgroup->dbName), NULL, 0); mndReleaseVgroup(pMnode, pVgroup); @@ -2200,6 +2231,24 @@ static SArray *mndTakeVgroupSnapshot(SMnode *pMnode, bool *allReady) { sdbRelease(pSdb, pVgroup); } + SSnodeObj *pObj = NULL; + while (1) { + pIter = sdbFetch(pSdb, SDB_SNODE, pIter, (void **)&pObj); + if (pIter == NULL) { + break; + } + + SNodeEntry entry = {0}; + addEpIntoEpSet(&entry.epset, pObj->pDnode->fqdn, pObj->pDnode->port); + entry.nodeId = SNODE_HANDLE; + + char buf[256] = {0}; + EPSET_TO_STR(&entry.epset, buf); + mDebug("take snode snapshot, nodeId:%d %s", entry.nodeId, buf); + taosArrayPush(pVgroupListSnapshot, &entry); + sdbRelease(pSdb, pObj); + } + return pVgroupListSnapshot; } @@ -2219,7 +2268,7 @@ static int32_t mndProcessVgroupChange(SMnode *pMnode, SVgroupChangeInfo *pChange // here create only one trans if (pTrans == NULL) { - pTrans = doCreateTrans(pMnode, pStream, "stream-task-update"); + pTrans = doCreateTrans(pMnode, pStream, NULL, MND_STREAM_TASK_UPDATE_NAME, "update task epsets"); if (pTrans == NULL) { sdbRelease(pSdb, pStream); sdbCancelFetch(pSdb, pIter); @@ -2329,7 +2378,7 @@ static void doExtractTasksFromStream(SMnode *pMnode) { break; } - keepStreamTasksInBuf(pStream, &execInfo); + saveStreamTasksInfo(pStream, &execInfo); sdbRelease(pSdb, pStream); } } @@ -2377,6 +2426,8 @@ int32_t removeExpirednodeEntryAndTask(SArray *pNodeSnapshot) { STaskId * pId = taosArrayGet(execInfo.pTaskList, i); STaskStatusEntry *pEntry = taosHashGet(execInfo.pTaskMap, pId, sizeof(*pId)); + if(pEntry->nodeId == SNODE_HANDLE) continue; + bool existed = taskNodeExists(pNodeSnapshot, pEntry->nodeId); if (!existed) { taosArrayPush(pRemovedTasks, pId); @@ -2413,6 +2464,17 @@ int32_t removeExpirednodeEntryAndTask(SArray *pNodeSnapshot) { return 0; } +static void killAllCheckpointTrans(SMnode* pMnode, SVgroupChangeInfo* pChangeInfo) { + void* pIter = NULL; + while((pIter = taosHashIterate(pChangeInfo->pDBMap, pIter)) != NULL) { + char* pDb = (char*) pIter; + + size_t len = 0; + void* pKey = taosHashGetKey(pDb, &len); + killActiveCheckpointTrans(pMnode, pKey, len); + } +} + // this function runs by only one thread, so it is not multi-thread safe static int32_t mndProcessNodeCheckReq(SRpcMsg *pMsg) { int32_t code = 0; @@ -2454,7 +2516,8 @@ static int32_t mndProcessNodeCheckReq(SRpcMsg *pMsg) { SVgroupChangeInfo changeInfo = mndFindChangedNodeInfo(pMnode, execInfo.pNodeList, pNodeSnapshot); if (taosArrayGetSize(changeInfo.pUpdateNodeList) > 0) { // kill current active checkpoint transaction, since the transaction is vnode wide. - doKillActiveCheckpointTrans(pMnode); + killAllCheckpointTrans(pMnode, &changeInfo); + code = mndProcessVgroupChange(pMnode, &changeInfo); // keep the new vnode snapshot @@ -2500,7 +2563,7 @@ static int32_t mndProcessNodeCheck(SRpcMsg *pReq) { return 0; } -void keepStreamTasksInBuf(SStreamObj *pStream, SStreamExecInfo *pExecNode) { +void saveStreamTasksInfo(SStreamObj *pStream, SStreamExecInfo *pExecNode) { int32_t level = taosArrayGetSize(pStream->tasks); for (int32_t i = 0; i < level; i++) { @@ -2543,8 +2606,9 @@ void removeStreamTasksInBuf(SStreamObj *pStream, SStreamExecInfo *pExecNode) { STaskId *pId = taosArrayGet(pExecNode->pTaskList, k); if (pId->taskId == id.taskId && pId->streamId == id.streamId) { taosArrayRemove(pExecNode->pTaskList, k); - mInfo("s-task:0x%x removed from buffer, remain:%d", (int32_t)id.taskId, - (int32_t)taosArrayGetSize(pExecNode->pTaskList)); + + int32_t num = taosArrayGetSize(pExecNode->pTaskList); + mInfo("s-task:0x%x removed from buffer, remain:%d", (int32_t)id.taskId, num); break; } } @@ -2555,15 +2619,15 @@ void removeStreamTasksInBuf(SStreamObj *pStream, SStreamExecInfo *pExecNode) { ASSERT(taosHashGetSize(pExecNode->pTaskMap) == taosArrayGetSize(pExecNode->pTaskList)); } -STrans *doCreateTrans(SMnode *pMnode, SStreamObj *pStream, const char *name) { - STrans *pTrans = mndTransCreate(pMnode, TRN_POLICY_RETRY, TRN_CONFLICT_DB_INSIDE, NULL, name); +STrans *doCreateTrans(SMnode *pMnode, SStreamObj *pStream, SRpcMsg *pReq, const char *name, const char* pMsg) { + STrans *pTrans = mndTransCreate(pMnode, TRN_POLICY_RETRY, TRN_CONFLICT_DB_INSIDE, pReq, name); if (pTrans == NULL) { mError("failed to build trans:%s, reason: %s", name, tstrerror(TSDB_CODE_OUT_OF_MEMORY)); terrno = TSDB_CODE_OUT_OF_MEMORY; return NULL; } - mDebug("start to build stream:0x%" PRIx64 " task DAG update", pStream->uid); + mDebug("s-task:0x%"PRIx64" start to build trans %s", pStream->uid, pMsg); mndTransSetDbName(pTrans, pStream->sourceDb, pStream->targetDb); if (mndTransCheckConflict(pMnode, pTrans) != 0) { @@ -2578,7 +2642,7 @@ STrans *doCreateTrans(SMnode *pMnode, SStreamObj *pStream, const char *name) { } int32_t createStreamResetStatusTrans(SMnode *pMnode, SStreamObj *pStream) { - STrans *pTrans = doCreateTrans(pMnode, pStream, "stream-task-reset"); + STrans *pTrans = doCreateTrans(pMnode, pStream, NULL, MND_STREAM_TASK_RESET_NAME, " reset from failed checkpoint"); if (pTrans == NULL) { return terrno; } @@ -2642,43 +2706,36 @@ int32_t createStreamResetStatusTrans(SMnode *pMnode, SStreamObj *pStream) { return TSDB_CODE_ACTION_IN_PROGRESS; } -int32_t doKillActiveCheckpointTrans(SMnode *pMnode) { - int32_t transId = 0; - SSdb * pSdb = pMnode->pSdb; - STrans *pTrans = NULL; - void * pIter = NULL; - - while (1) { - pIter = sdbFetch(pSdb, SDB_TRANS, pIter, (void **)&pTrans); - if (pIter == NULL) { - break; - } - - if (strncmp(pTrans->opername, MND_STREAM_CHECKPOINT_NAME, tListLen(pTrans->opername) - 1) == 0) { - transId = pTrans->id; - sdbRelease(pSdb, pTrans); - sdbCancelFetch(pSdb, pIter); - break; - } - - sdbRelease(pSdb, pTrans); - } - - if (transId == 0) { - mDebug("failed to find the checkpoint trans, reset not executed"); +int32_t killActiveCheckpointTrans(SMnode *pMnode, const char* pDBName, size_t len) { + // data in the hash table will be removed automatically, no need to remove it here. + SStreamTransInfo* pTransInfo = taosHashGet(execInfo.transMgmt.pDBTrans, pDBName, len); + if (pTransInfo == NULL) { return TSDB_CODE_SUCCESS; } - pTrans = mndAcquireTrans(pMnode, transId); - mInfo("kill checkpoint trans:%d", transId); + // not checkpoint trans, ignore + if (strcmp(pTransInfo->name, MND_STREAM_CHECKPOINT_NAME) != 0) { + mDebug("not checkpoint trans, not kill it, name:%s, transId:%d", pTransInfo->name, pTransInfo->transId); + return TSDB_CODE_SUCCESS; + } + + STrans* pTrans = mndAcquireTrans(pMnode, pTransInfo->transId); + if (pTrans != NULL) { + mInfo("kill checkpoint transId:%d in Db:%s", pTransInfo->transId, pDBName); + mndKillTrans(pMnode, pTrans); + mndReleaseTrans(pMnode, pTrans); + } - mndKillTrans(pMnode, pTrans); - mndReleaseTrans(pMnode, pTrans); return TSDB_CODE_SUCCESS; } -int32_t mndResetFromCheckpoint(SMnode *pMnode) { - doKillActiveCheckpointTrans(pMnode); +int32_t mndResetStatusFromCheckpoint(SMnode *pMnode, int32_t transId) { + STrans* pTrans = mndAcquireTrans(pMnode, transId); + if (pTrans != NULL) { + mInfo("kill checkpoint transId:%d to reset task status", transId); + mndKillTrans(pMnode, pTrans); + mndReleaseTrans(pMnode, pTrans); + } // set all tasks status to be normal, refactor later to be stream level, instead of vnode level. SSdb * pSdb = pMnode->pSdb; @@ -2690,7 +2747,13 @@ int32_t mndResetFromCheckpoint(SMnode *pMnode) { break; } - // todo this transaction should exist be only one + bool conflict = streamTransConflictOtherTrans(pMnode, pStream->sourceDb, pStream->targetDb); + if (conflict) { + mError("stream:%s other trans exists in DB:%s & %s failed to start reset-status trans", + pStream->name, pStream->sourceDb, pStream->targetDb); + continue; + } + mDebug("stream:%s (0x%" PRIx64 ") reset checkpoint procedure, create reset trans", pStream->name, pStream->uid); int32_t code = createStreamResetStatusTrans(pMnode, pStream); if (code != TSDB_CODE_SUCCESS) { @@ -2725,12 +2788,12 @@ int32_t setNodeEpsetExpiredFlag(const SArray *pNodeList) { return TSDB_CODE_SUCCESS; } -static void updateStageInfo(STaskStatusEntry* pTaskEntry, int32_t stage) { +static void updateStageInfo(STaskStatusEntry* pTaskEntry, int64_t stage) { int32_t numOfNodes = taosArrayGetSize(execInfo.pNodeList); for(int32_t j = 0; j < numOfNodes; ++j) { SNodeEntry* pNodeEntry = taosArrayGet(execInfo.pNodeList, j); if (pNodeEntry->nodeId == pTaskEntry->nodeId) { - mInfo("vgId:%d stage updated from %d to %d, nodeUpdate trigger by s-task:0x%" PRIx64, pTaskEntry->nodeId, + mInfo("vgId:%d stage updated from %"PRId64 " to %"PRId64 ", nodeUpdate trigger by s-task:0x%" PRIx64, pTaskEntry->nodeId, pTaskEntry->stage, stage, pTaskEntry->id.taskId); pNodeEntry->stageUpdated = true; @@ -2775,6 +2838,7 @@ int32_t mndProcessStreamHb(SRpcMsg *pReq) { setNodeEpsetExpiredFlag(req.pUpdateNodes); } + bool snodeChanged = false; for (int32_t i = 0; i < req.numOfTasks; ++i) { STaskStatusEntry *p = taosArrayGet(req.pTaskStatus, i); STaskStatusEntry *pTaskEntry = taosHashGet(execInfo.pTaskMap, &p->id, sizeof(p->id)); @@ -2785,6 +2849,7 @@ int32_t mndProcessStreamHb(SRpcMsg *pReq) { if (pTaskEntry->stage != p->stage && pTaskEntry->stage != -1) { updateStageInfo(pTaskEntry, p->stage); + if(pTaskEntry->nodeId == SNODE_HANDLE) snodeChanged = true; } else { streamTaskStatusCopy(pTaskEntry, p); if (p->activeCheckpointId != 0) { @@ -2813,11 +2878,11 @@ int32_t mndProcessStreamHb(SRpcMsg *pReq) { SArray *p = mndTakeVgroupSnapshot(pMnode, &allReady); taosArrayDestroy(p); - if (allReady) { + if (allReady || snodeChanged) { // if the execInfo.activeCheckpoint == 0, the checkpoint is restoring from wal mInfo("checkpointId:%" PRId64 " failed, issue task-reset trans to reset all tasks status", execInfo.activeCheckpoint); - mndResetFromCheckpoint(pMnode); + mndResetStatusFromCheckpoint(pMnode, activeCheckpointId); } else { mInfo("not all vgroups are ready, wait for next HB from stream tasks"); } diff --git a/source/dnode/mnode/impl/src/mndStreamTrans.c b/source/dnode/mnode/impl/src/mndStreamTrans.c new file mode 100644 index 0000000000..2345de290a --- /dev/null +++ b/source/dnode/mnode/impl/src/mndStreamTrans.c @@ -0,0 +1,105 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include "mndTrans.h" +#include "mndStream.h" + +typedef struct SKeyInfo { + void* pKey; + int32_t keyLen; +} SKeyInfo; + +static int32_t clearFinishedTrans(SMnode* pMnode); + +int32_t mndStreamRegisterTrans(STrans* pTrans, const char* pName, const char* pSrcDb, const char* pDstDb) { + SStreamTransInfo info = {.transId = pTrans->id, .startTime = taosGetTimestampMs(), .name = pName}; + taosHashPut(execInfo.transMgmt.pDBTrans, pSrcDb, strlen(pSrcDb), &info, sizeof(SStreamTransInfo)); + + if (strcmp(pSrcDb, pDstDb) != 0) { + taosHashPut(execInfo.transMgmt.pDBTrans, pDstDb, strlen(pDstDb), &info, sizeof(SStreamTransInfo)); + } + + return 0; +} + +int32_t clearFinishedTrans(SMnode* pMnode) { + SArray* pList = taosArrayInit(4, sizeof(SKeyInfo)); + size_t keyLen = 0; + + taosThreadMutexLock(&execInfo.lock); + + void* pIter = NULL; + while ((pIter = taosHashIterate(execInfo.transMgmt.pDBTrans, pIter)) != NULL) { + SStreamTransInfo *pEntry = (SStreamTransInfo *)pIter; + STrans* pTrans = mndAcquireTrans(pMnode, pEntry->transId); + + // let's clear the finished trans + if (pTrans == NULL) { + void* pKey = taosHashGetKey(pEntry, &keyLen); + // key is the name of src/dst db name + SKeyInfo info = {.pKey = pKey, .keyLen = keyLen}; + + mDebug("transId:%d %s startTs:%" PRId64 "cleared due to finished", pEntry->transId, pEntry->name, + pEntry->startTime); + taosArrayPush(pList, &info); + } else { + mndReleaseTrans(pMnode, pTrans); + } + } + + size_t num = taosArrayGetSize(pList); + for(int32_t i = 0; i < num; ++i) { + SKeyInfo* pKey = taosArrayGet(pList, i); + taosHashRemove(execInfo.transMgmt.pDBTrans, pKey->pKey, pKey->keyLen); + } + + mDebug("clear %d finished stream-trans, remained:%d", (int32_t) num, taosHashGetSize(execInfo.transMgmt.pDBTrans)); + taosThreadMutexUnlock(&execInfo.lock); + + terrno = TSDB_CODE_SUCCESS; + taosArrayDestroy(pList); + return 0; +} + +bool streamTransConflictOtherTrans(SMnode* pMnode, const char* pSrcDb, const char* pDstDb) { + clearFinishedTrans(pMnode); + + taosThreadMutexLock(&execInfo.lock); + int32_t num = taosHashGetSize(execInfo.transMgmt.pDBTrans); + if (num <= 0) { + taosThreadMutexUnlock(&execInfo.lock); + return false; + } + + SStreamTransInfo *pEntry = taosHashGet(execInfo.transMgmt.pDBTrans, pSrcDb, strlen(pSrcDb)); + if (pEntry != NULL) { + taosThreadMutexUnlock(&execInfo.lock); + mWarn("conflict with other transId:%d in Db:%s, trans:%s", pEntry->transId, pSrcDb, pEntry->name); + return true; + } + + pEntry = taosHashGet(execInfo.transMgmt.pDBTrans, pDstDb, strlen(pDstDb)); + if (pEntry != NULL) { + taosThreadMutexUnlock(&execInfo.lock); + mWarn("conflict with other transId:%d in Db:%s, trans:%s", pEntry->transId, pSrcDb, pEntry->name); + return true; + } + + taosThreadMutexUnlock(&execInfo.lock); + return false; +} + + + diff --git a/source/dnode/mnode/impl/test/trans/trans1.cpp b/source/dnode/mnode/impl/test/trans/trans1.cpp index 92a442aa5e..aff1156449 100644 --- a/source/dnode/mnode/impl/test/trans/trans1.cpp +++ b/source/dnode/mnode/impl/test/trans/trans1.cpp @@ -38,7 +38,7 @@ class MndTestTrans1 : public ::testing::Test { test.ServerStop(); - pFile = taosOpenFile(file, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_TRUNC); + pFile = taosOpenFile(file, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_TRUNC | TD_FILE_WRITE_THROUGH); int32_t writeLen = taosWriteFile(pFile, buffer, readLen); if (writeLen < 0 || writeLen == readLen) { ASSERT(1); diff --git a/source/dnode/snode/CMakeLists.txt b/source/dnode/snode/CMakeLists.txt index ebfe80ecab..2da1f9adac 100644 --- a/source/dnode/snode/CMakeLists.txt +++ b/source/dnode/snode/CMakeLists.txt @@ -3,6 +3,7 @@ add_library(snode STATIC ${SNODE_SRC}) target_include_directories( snode PUBLIC "${TD_SOURCE_DIR}/include/dnode/snode" + PUBLIC "${TD_SOURCE_DIR}/include/dnode/vnode" private "${CMAKE_CURRENT_SOURCE_DIR}/inc" ) target_link_libraries( diff --git a/source/dnode/snode/src/snode.c b/source/dnode/snode/src/snode.c index 6f5b370826..9a14258752 100644 --- a/source/dnode/snode/src/snode.c +++ b/source/dnode/snode/src/snode.c @@ -18,6 +18,28 @@ #include "sndInt.h" #include "tstream.h" #include "tuuid.h" +#include "stream.h" + +#define sndError(...) \ + do { \ + if (sndDebugFlag & DEBUG_ERROR) { \ + taosPrintLog("SND ERROR ", DEBUG_ERROR, sndDebugFlag, __VA_ARGS__); \ + } \ + } while (0) + +#define sndInfo(...) \ + do { \ + if (sndDebugFlag & DEBUG_INFO) { \ + taosPrintLog("SND INFO ", DEBUG_INFO, sndDebugFlag, __VA_ARGS__); \ + } \ + } while (0) + +#define sndDebug(...) \ + do { \ + if (sndDebugFlag & DEBUG_DEBUG) { \ + taosPrintLog("SND ", DEBUG_DEBUG, sndDebugFlag, __VA_ARGS__); \ + } \ + } while (0) void sndEnqueueStreamDispatch(SSnode *pSnode, SRpcMsg *pMsg) { char *msgStr = pMsg->pCont; @@ -40,10 +62,14 @@ void sndEnqueueStreamDispatch(SSnode *pSnode, SRpcMsg *pMsg) { if (pTask) { SRpcMsg rsp = { .info = pMsg->info, .code = 0 }; streamProcessDispatchMsg(pTask, &req, &rsp); + tDeleteStreamDispatchReq(&req); streamMetaReleaseTask(pSnode->pMeta, pTask); rpcFreeCont(pMsg->pCont); taosFreeQitem(pMsg); return; + } else { + tDeleteStreamDispatchReq(&req); + return; } FAIL: @@ -63,20 +89,37 @@ int32_t sndExpandTask(SSnode *pSnode, SStreamTask *pTask, int64_t nextProcessVer streamTaskOpenAllUpstreamInput(pTask); - pTask->pState = streamStateOpen(pSnode->path, pTask, false, -1, -1); - if (pTask->pState == NULL) { - qError("s-task:%s failed to open state for task", pTask->id.idStr); - return -1; - } else { - qDebug("s-task:%s state:%p", pTask->id.idStr, pTask->pState); + SStreamTask* pSateTask = pTask; + SStreamTask task = {0}; + if (pTask->info.fillHistory) { + task.id.streamId = pTask->streamTaskId.streamId; + task.id.taskId = pTask->streamTaskId.taskId; + task.pMeta = pTask->pMeta; + pSateTask = &task; } - int32_t numOfChildEp = taosArrayGetSize(pTask->upstreamInfo.pList); - SReadHandle handle = { .vnode = NULL, .numOfVgroups = numOfChildEp, .pStateBackend = pTask->pState, .fillHistory = pTask->info.fillHistory }; + pTask->pState = streamStateOpen(pSnode->path, pSateTask, false, -1, -1); + if (pTask->pState == NULL) { + sndError("s-task:%s failed to open state for task", pTask->id.idStr); + return -1; + } else { + sndDebug("s-task:%s state:%p", pTask->id.idStr, pTask->pState); + } + + int32_t numOfVgroups = (int32_t)taosArrayGetSize(pTask->upstreamInfo.pList); + SReadHandle handle = { + .checkpointId = pTask->chkInfo.checkpointId, + .vnode = NULL, + .numOfVgroups = numOfVgroups, + .pStateBackend = pTask->pState, + .fillHistory = pTask->info.fillHistory, + .winRange = pTask->dataRange.window, + }; initStreamStateAPI(&handle.api); - pTask->exec.pExecutor = qCreateStreamExecTaskInfo(pTask->exec.qmsg, &handle, 0, pTask->id.taskId); + pTask->exec.pExecutor = qCreateStreamExecTaskInfo(pTask->exec.qmsg, &handle, SNODE_HANDLE, pTask->id.taskId); ASSERT(pTask->exec.pExecutor); + qSetTaskId(pTask->exec.pExecutor, pTask->id.taskId, pTask->id.streamId); streamTaskResetUpstreamStageInfo(pTask); streamSetupScheduleTrigger(pTask); @@ -85,7 +128,7 @@ int32_t sndExpandTask(SSnode *pSnode, SStreamTask *pTask, int64_t nextProcessVer // checkpoint ver is the kept version, handled data should be the next version. if (pTask->chkInfo.checkpointId != 0) { pTask->chkInfo.nextProcessVer = pTask->chkInfo.checkpointVer + 1; - qInfo("s-task:%s restore from the checkpointId:%" PRId64 " ver:%" PRId64 " nextProcessVer:%" PRId64, pTask->id.idStr, + sndInfo("s-task:%s restore from the checkpointId:%" PRId64 " ver:%" PRId64 " nextProcessVer:%" PRId64, pTask->id.idStr, pChkInfo->checkpointId, pChkInfo->checkpointVer, pChkInfo->nextProcessVer); } else { if (pTask->chkInfo.nextProcessVer == -1) { @@ -96,7 +139,7 @@ int32_t sndExpandTask(SSnode *pSnode, SStreamTask *pTask, int64_t nextProcessVer char* p = NULL; streamTaskGetStatus(pTask, &p); - qInfo("snode:%d expand stream task, s-task:%s, checkpointId:%" PRId64 " checkpointVer:%" PRId64 + sndInfo("snode:%d expand stream task, s-task:%s, checkpointId:%" PRId64 " checkpointVer:%" PRId64 " nextProcessVer:%" PRId64 " child id:%d, level:%d, status:%s fill-history:%d, trigger:%" PRId64 " ms", SNODE_HANDLE, pTask->id.idStr, pChkInfo->checkpointId, pChkInfo->checkpointVer, pChkInfo->nextProcessVer, pTask->info.selfChildId, pTask->info.taskLevel, p, pTask->info.fillHistory, pTask->info.triggerParam); @@ -104,6 +147,142 @@ int32_t sndExpandTask(SSnode *pSnode, SStreamTask *pTask, int64_t nextProcessVer return 0; } +int32_t sndStartStreamTasks(SSnode* pSnode) { + int32_t code = TSDB_CODE_SUCCESS; + int32_t vgId = SNODE_HANDLE; + SStreamMeta* pMeta = pSnode->pMeta; + + int32_t numOfTasks = taosArrayGetSize(pMeta->pTaskList); + sndDebug("vgId:%d start to check all %d stream task(s) downstream status", vgId, numOfTasks); + if (numOfTasks == 0) { + return TSDB_CODE_SUCCESS; + } + + SArray* pTaskList = NULL; + streamMetaWLock(pMeta); + pTaskList = taosArrayDup(pMeta->pTaskList, NULL); + taosHashClear(pMeta->startInfo.pReadyTaskSet); + taosHashClear(pMeta->startInfo.pFailedTaskSet); + pMeta->startInfo.startTs = taosGetTimestampMs(); + streamMetaWUnLock(pMeta); + + // broadcast the check downstream tasks msg + for (int32_t i = 0; i < numOfTasks; ++i) { + SStreamTaskId* pTaskId = taosArrayGet(pTaskList, i); + SStreamTask* pTask = streamMetaAcquireTask(pMeta, pTaskId->streamId, pTaskId->taskId); + if (pTask == NULL) { + continue; + } + + // fill-history task can only be launched by related stream tasks. + if (pTask->info.fillHistory == 1) { + streamMetaReleaseTask(pMeta, pTask); + continue; + } + + if (pTask->status.downstreamReady == 1) { + if (HAS_RELATED_FILLHISTORY_TASK(pTask)) { + sndDebug("s-task:%s downstream ready, no need to check downstream, check only related fill-history task", + pTask->id.idStr); + streamLaunchFillHistoryTask(pTask); + } + + streamMetaUpdateTaskDownstreamStatus(pTask, pTask->execInfo.init, pTask->execInfo.start, true); + streamMetaReleaseTask(pMeta, pTask); + continue; + } + + EStreamTaskEvent event = (HAS_RELATED_FILLHISTORY_TASK(pTask)) ? TASK_EVENT_INIT_STREAM_SCANHIST : TASK_EVENT_INIT; + int32_t ret = streamTaskHandleEvent(pTask->status.pSM, event); + if (ret != TSDB_CODE_SUCCESS) { + code = ret; + } + + streamMetaReleaseTask(pMeta, pTask); + } + + taosArrayDestroy(pTaskList); + return code; +} + +int32_t sndResetStreamTaskStatus(SSnode* pSnode) { + SStreamMeta* pMeta = pSnode->pMeta; + int32_t vgId = pMeta->vgId; + int32_t numOfTasks = taosArrayGetSize(pMeta->pTaskList); + + sndDebug("vgId:%d reset all %d stream task(s) status to be uninit", vgId, numOfTasks); + if (numOfTasks == 0) { + return TSDB_CODE_SUCCESS; + } + + for (int32_t i = 0; i < numOfTasks; ++i) { + SStreamTaskId* pTaskId = taosArrayGet(pMeta->pTaskList, i); + + STaskId id = {.streamId = pTaskId->streamId, .taskId = pTaskId->taskId}; + SStreamTask** pTask = taosHashGet(pMeta->pTasksMap, &id, sizeof(id)); + streamTaskResetStatus(*pTask); + } + + return 0; +} + +int32_t sndRestartStreamTasks(SSnode* pSnode) { + SStreamMeta* pMeta = pSnode->pMeta; + int32_t vgId = pMeta->vgId; + int32_t code = 0; + int64_t st = taosGetTimestampMs(); + + while(1) { + int32_t startVal = atomic_val_compare_exchange_32(&pMeta->startInfo.taskStarting, 0, 1); + if (startVal == 0) { + break; + } + + sndDebug("vgId:%d in start stream tasks procedure, wait for 500ms and recheck", vgId); + taosMsleep(500); + } + + terrno = 0; + sndInfo("vgId:%d tasks are all updated and stopped, restart all tasks, triggered by transId:%d", vgId, + pMeta->updateInfo.transId); + + while (streamMetaTaskInTimer(pMeta)) { + sndDebug("vgId:%d some tasks in timer, wait for 100ms and recheck", pMeta->vgId); + taosMsleep(100); + } + + streamMetaWLock(pMeta); + + code = streamMetaReopen(pMeta); + if (code != TSDB_CODE_SUCCESS) { + sndError("vgId:%d failed to reopen stream meta", vgId); + streamMetaWUnLock(pMeta); + code = terrno; + return code; + } + + streamMetaInitBackend(pMeta); + int64_t el = taosGetTimestampMs() - st; + + sndInfo("vgId:%d close&reload state elapsed time:%.3fs", vgId, el/1000.); + + code = streamMetaLoadAllTasks(pMeta); + if (code != TSDB_CODE_SUCCESS) { + sndError("vgId:%d failed to load stream tasks, code:%s", vgId, tstrerror(terrno)); + streamMetaWUnLock(pMeta); + code = terrno; + return code; + } + sndInfo("vgId:%d restart all stream tasks after all tasks being updated", vgId); + sndResetStreamTaskStatus(pSnode); + + streamMetaWUnLock(pMeta); + sndStartStreamTasks(pSnode); + + code = terrno; + return code; +} + SSnode *sndOpen(const char *path, const SSnodeOpt *pOption) { SSnode *pSnode = taosMemoryCalloc(1, sizeof(SSnode)); if (pSnode == NULL) { @@ -117,17 +296,19 @@ SSnode *sndOpen(const char *path, const SSnodeOpt *pOption) { } pSnode->msgCb = pOption->msgCb; - pSnode->pMeta = streamMetaOpen(path, pSnode, (FTaskExpand *)sndExpandTask, SNODE_HANDLE, -1); + pSnode->pMeta = streamMetaOpen(path, pSnode, (FTaskExpand *)sndExpandTask, SNODE_HANDLE, taosGetTimestampMs()); if (pSnode->pMeta == NULL) { terrno = TSDB_CODE_OUT_OF_MEMORY; goto FAIL; } + if (streamMetaLoadAllTasks(pSnode->pMeta) < 0) { + goto FAIL; + } + stopRsync(); startRsync(); - // todo fix it: send msg to mnode to rollback to an existed checkpoint - streamMetaInitForSnode(pSnode->pMeta); return pSnode; FAIL: @@ -136,7 +317,14 @@ FAIL: return NULL; } +int32_t sndInit(SSnode * pSnode) { + sndResetStreamTaskStatus(pSnode); + sndStartStreamTasks(pSnode); + return 0; +} + void sndClose(SSnode *pSnode) { + stopRsync(); streamMetaNotifyClose(pSnode->pMeta); streamMetaCommit(pSnode->pMeta); streamMetaClose(pSnode->pMeta); @@ -146,6 +334,33 @@ void sndClose(SSnode *pSnode) { int32_t sndGetLoad(SSnode *pSnode, SSnodeLoad *pLoad) { return 0; } +int32_t sndStartStreamTaskAsync(SSnode* pSnode, bool restart) { + SStreamMeta* pMeta = pSnode->pMeta; + int32_t vgId = pMeta->vgId; + + int32_t numOfTasks = taosArrayGetSize(pMeta->pTaskList); + if (numOfTasks == 0) { + sndDebug("vgId:%d no stream tasks existed to run", vgId); + return 0; + } + + SStreamTaskRunReq* pRunReq = rpcMallocCont(sizeof(SStreamTaskRunReq)); + if (pRunReq == NULL) { + terrno = TSDB_CODE_OUT_OF_MEMORY; + sndError("vgId:%d failed to create msg to start wal scanning to launch stream tasks, code:%s", vgId, terrstr()); + return -1; + } + + sndDebug("vgId:%d start all %d stream task(s) async", vgId, numOfTasks); + pRunReq->head.vgId = vgId; + pRunReq->streamId = 0; + pRunReq->taskId = restart? STREAM_EXEC_RESTART_ALL_TASKS_ID:STREAM_EXEC_START_ALL_TASKS_ID; + + SRpcMsg msg = {.msgType = TDMT_STREAM_TASK_RUN, .pCont = pRunReq, .contLen = sizeof(SStreamTaskRunReq)}; + tmsgPutToQueue(&pSnode->msgCb, STREAM_QUEUE, &msg); + return 0; +} + int32_t sndProcessTaskDeployReq(SSnode *pSnode, char *msg, int32_t msgLen) { int32_t code; @@ -184,24 +399,23 @@ int32_t sndProcessTaskDeployReq(SSnode *pSnode, char *msg, int32_t msgLen) { char* p = NULL; streamTaskGetStatus(pTask, &p); - qDebug("snode:%d s-task:%s is deployed on snode and add into meta, status:%s, numOfTasks:%d", SNODE_HANDLE, + sndDebug("snode:%d s-task:%s is deployed on snode and add into meta, status:%s, numOfTasks:%d", SNODE_HANDLE, pTask->id.idStr, p, numOfTasks); EStreamTaskEvent event = (HAS_RELATED_FILLHISTORY_TASK(pTask)) ? TASK_EVENT_INIT_STREAM_SCANHIST : TASK_EVENT_INIT; streamTaskHandleEvent(pTask->status.pSM, event); - streamTaskCheckDownstream(pTask); return 0; } int32_t sndProcessTaskDropReq(SSnode *pSnode, char *msg, int32_t msgLen) { SVDropStreamTaskReq *pReq = (SVDropStreamTaskReq *)msg; - qDebug("snode:%d receive msg to drop stream task:0x%x", pSnode->pMeta->vgId, pReq->taskId); + sndDebug("snode:%d receive msg to drop stream task:0x%x", pSnode->pMeta->vgId, pReq->taskId); streamMetaUnregisterTask(pSnode->pMeta, pReq->streamId, pReq->taskId); // commit the update streamMetaWLock(pSnode->pMeta); int32_t numOfTasks = streamMetaGetNumOfTasks(pSnode->pMeta); - qDebug("vgId:%d task:0x%x dropped, remain tasks:%d", pSnode->pMeta->vgId, pReq->taskId, numOfTasks); + sndDebug("vgId:%d task:0x%x dropped, remain tasks:%d", pSnode->pMeta->vgId, pReq->taskId, numOfTasks); if (streamMetaCommit(pSnode->pMeta) < 0) { // persist to disk @@ -213,6 +427,16 @@ int32_t sndProcessTaskDropReq(SSnode *pSnode, char *msg, int32_t msgLen) { int32_t sndProcessTaskRunReq(SSnode *pSnode, SRpcMsg *pMsg) { SStreamTaskRunReq *pReq = pMsg->pCont; + int32_t taskId = pReq->taskId; + + if (taskId == STREAM_EXEC_START_ALL_TASKS_ID) { + sndStartStreamTasks(pSnode); + return 0; + } else if (taskId == STREAM_EXEC_RESTART_ALL_TASKS_ID) { + sndRestartStreamTasks(pSnode); + return 0; + } + SStreamTask *pTask = streamMetaAcquireTask(pSnode->pMeta, pReq->streamId, pReq->taskId); if (pTask) { streamExecTask(pTask); @@ -231,14 +455,17 @@ int32_t sndProcessTaskDispatchReq(SSnode *pSnode, SRpcMsg *pMsg, bool exec) { SDecoder decoder; tDecoderInit(&decoder, (uint8_t *)msgBody, msgLen); tDecodeStreamDispatchReq(&decoder, &req); + tDecoderClear(&decoder); SStreamTask *pTask = streamMetaAcquireTask(pSnode->pMeta, req.streamId, req.taskId); if (pTask) { SRpcMsg rsp = {.info = pMsg->info, .code = 0}; streamProcessDispatchMsg(pTask, &req, &rsp); + tDeleteStreamDispatchReq(&req); streamMetaReleaseTask(pSnode->pMeta, pTask); return 0; } else { + tDeleteStreamDispatchReq(&req); return -1; } } @@ -270,6 +497,9 @@ int32_t sndProcessTaskDispatchRsp(SSnode *pSnode, SRpcMsg *pMsg) { pRsp->upstreamTaskId = htonl(pRsp->upstreamTaskId); pRsp->streamId = htobe64(pRsp->streamId); + pRsp->downstreamTaskId = htonl(pRsp->downstreamTaskId); + pRsp->downstreamNodeId = htonl(pRsp->downstreamNodeId); + pRsp->stage = htobe64(pRsp->stage); pRsp->msgId = htonl(pRsp->msgId); SStreamTask *pTask = streamMetaAcquireTask(pSnode->pMeta, pRsp->streamId, pRsp->upstreamTaskId); @@ -287,23 +517,7 @@ int32_t sndProcessTaskRetrieveRsp(SSnode *pSnode, SRpcMsg *pMsg) { return 0; } -int32_t sndProcessWriteMsg(SSnode *pSnode, SRpcMsg *pMsg, SRpcMsg *pRsp) { - switch (pMsg->msgType) { - case TDMT_STREAM_TASK_DEPLOY: { - void *pReq = POINTER_SHIFT(pMsg->pCont, sizeof(SMsgHead)); - int32_t len = pMsg->contLen - sizeof(SMsgHead); - return sndProcessTaskDeployReq(pSnode, pReq, len); - } - - case TDMT_STREAM_TASK_DROP: - return sndProcessTaskDropReq(pSnode, pMsg->pCont, pMsg->contLen); - default: - ASSERT(0); - } - return 0; -} - -int32_t sndProcessStreamTaskScanHistoryFinishReq(SSnode *pSnode, SRpcMsg *pMsg) { +int32_t sndProcessTaskScanHistoryFinishReq(SSnode *pSnode, SRpcMsg *pMsg) { char *msg = POINTER_SHIFT(pMsg->pCont, sizeof(SMsgHead)); int32_t msgLen = pMsg->contLen - sizeof(SMsgHead); @@ -330,11 +544,73 @@ int32_t sndProcessStreamTaskScanHistoryFinishReq(SSnode *pSnode, SRpcMsg *pMsg) return 0; } -int32_t sndProcessTaskRecoverFinishRsp(SSnode *pSnode, SRpcMsg *pMsg) { - // +int32_t sndProcessTaskScanHistoryFinishRsp(SSnode *pSnode, SRpcMsg *pMsg) { + char* msg = POINTER_SHIFT(pMsg->pCont, sizeof(SMsgHead)); + int32_t msgLen = pMsg->contLen - sizeof(SMsgHead); + + // deserialize + SStreamCompleteHistoryMsg req = {0}; + + SDecoder decoder; + tDecoderInit(&decoder, (uint8_t*)msg, msgLen); + tDecodeCompleteHistoryDataMsg(&decoder, &req); + tDecoderClear(&decoder); + + SStreamTask* pTask = streamMetaAcquireTask(pSnode->pMeta, req.streamId, req.upstreamTaskId); + if (pTask == NULL) { + sndError("vgId:%d process scan history finish rsp, failed to find task:0x%x, it may be destroyed", + pSnode->pMeta->vgId, req.upstreamTaskId); + return -1; + } + + int32_t remain = atomic_sub_fetch_32(&pTask->notReadyTasks, 1); + if (remain > 0) { + sndDebug("s-task:%s scan-history finish rsp received from downstream task:0x%x, unfinished remain:%d", + pTask->id.idStr, req.downstreamId, remain); + } else { + sndDebug( + "s-task:%s scan-history finish rsp received from downstream task:0x%x, all downstream tasks rsp scan-history " + "completed msg", + pTask->id.idStr, req.downstreamId); + streamProcessScanHistoryFinishRsp(pTask); + } + + streamMetaReleaseTask(pSnode->pMeta, pTask); return 0; } +// downstream task has complete the stream task checkpoint procedure, let's start the handle the rsp by execute task +int32_t sndProcessTaskCheckpointReadyMsg(SSnode *pSnode, SRpcMsg* pMsg) { + SStreamMeta* pMeta = pSnode->pMeta; + char* msg = POINTER_SHIFT(pMsg->pCont, sizeof(SMsgHead)); + int32_t len = pMsg->contLen - sizeof(SMsgHead); + int32_t code = 0; + + SStreamCheckpointReadyMsg req = {0}; + + SDecoder decoder; + tDecoderInit(&decoder, (uint8_t*)msg, len); + if (tDecodeStreamCheckpointReadyMsg(&decoder, &req) < 0) { + code = TSDB_CODE_MSG_DECODE_ERROR; + tDecoderClear(&decoder); + return code; + } + tDecoderClear(&decoder); + + SStreamTask* pTask = streamMetaAcquireTask(pMeta, req.streamId, req.upstreamTaskId); + if (pTask == NULL) { + sndError("vgId:%d failed to find s-task:0x%x, it may have been destroyed already", pMeta->vgId, req.downstreamTaskId); + return code; + } + + sndDebug("snode vgId:%d s-task:%s received the checkpoint ready msg from task:0x%x (vgId:%d), handle it", pMeta->vgId, + pTask->id.idStr, req.downstreamTaskId, req.downstreamNodeId); + + streamProcessCheckpointReadyMsg(pTask); + streamMetaReleaseTask(pMeta, pTask); + return code; +} + int32_t sndProcessStreamTaskCheckReq(SSnode *pSnode, SRpcMsg *pMsg) { char *msgStr = pMsg->pCont; char *msgBody = POINTER_SHIFT(msgStr, sizeof(SMsgHead)); @@ -362,15 +638,15 @@ int32_t sndProcessStreamTaskCheckReq(SSnode *pSnode, SRpcMsg *pMsg) { SStreamTask *pTask = streamMetaAcquireTask(pSnode->pMeta, req.streamId, taskId); if (pTask != NULL) { - rsp.status = streamTaskCheckStatus(pTask, req.upstreamTaskId, req.upstreamNodeId, req.stage); + rsp.status = streamTaskCheckStatus(pTask, req.upstreamTaskId, req.upstreamNodeId, req.stage, &rsp.oldStage); streamMetaReleaseTask(pSnode->pMeta, pTask); char* p = NULL; streamTaskGetStatus(pTask, &p); - qDebug("s-task:%s status:%s, recv task check req(reqId:0x%" PRIx64 ") task:0x%x (vgId:%d), ready:%d", + sndDebug("s-task:%s status:%s, recv task check req(reqId:0x%" PRIx64 ") task:0x%x (vgId:%d), ready:%d", pTask->id.idStr, p, rsp.reqId, rsp.upstreamTaskId, rsp.upstreamNodeId, rsp.status); } else { rsp.status = TASK_DOWNSTREAM_NOT_READY; - qDebug("recv task check(taskId:0x%x not built yet) req(reqId:0x%" PRIx64 ") from task:0x%x (vgId:%d), rsp status %d", + sndDebug("recv task check(taskId:0x%x not built yet) req(reqId:0x%" PRIx64 ") from task:0x%x (vgId:%d), rsp status %d", taskId, rsp.reqId, rsp.upstreamTaskId, rsp.upstreamNodeId, rsp.status); } @@ -380,7 +656,7 @@ int32_t sndProcessStreamTaskCheckReq(SSnode *pSnode, SRpcMsg *pMsg) { tEncodeSize(tEncodeStreamTaskCheckRsp, &rsp, len, code); if (code < 0) { - qError("vgId:%d failed to encode task check rsp, task:0x%x", pSnode->pMeta->vgId, taskId); + sndError("vgId:%d failed to encode task check rsp, task:0x%x", pSnode->pMeta->vgId, taskId); return -1; } @@ -415,12 +691,12 @@ int32_t sndProcessStreamTaskCheckRsp(SSnode* pSnode, SRpcMsg* pMsg) { } tDecoderClear(&decoder); - qDebug("tq task:0x%x (vgId:%d) recv check rsp(reqId:0x%" PRIx64 ") from 0x%x (vgId:%d) status %d", + sndDebug("tq task:0x%x (vgId:%d) recv check rsp(reqId:0x%" PRIx64 ") from 0x%x (vgId:%d) status %d", rsp.upstreamTaskId, rsp.upstreamNodeId, rsp.reqId, rsp.downstreamTaskId, rsp.downstreamNodeId, rsp.status); SStreamTask* pTask = streamMetaAcquireTask(pSnode->pMeta, rsp.streamId, rsp.upstreamTaskId); if (pTask == NULL) { - qError("tq failed to locate the stream task:0x%x (vgId:%d), it may have been destroyed", rsp.upstreamTaskId, + sndError("tq failed to locate the stream task:0x%x (vgId:%d), it may have been destroyed", rsp.upstreamTaskId, pSnode->pMeta->vgId); return -1; } @@ -430,6 +706,181 @@ int32_t sndProcessStreamTaskCheckRsp(SSnode* pSnode, SRpcMsg* pMsg) { return code; } +int32_t sndProcessTaskUpdateReq(SSnode* pSnode, SRpcMsg* pMsg) { + SStreamMeta* pMeta = pSnode->pMeta; + int32_t vgId = SNODE_HANDLE; + char* msg = POINTER_SHIFT(pMsg->pCont, sizeof(SMsgHead)); + int32_t len = pMsg->contLen - sizeof(SMsgHead); + SRpcMsg rsp = {.info = pMsg->info, .code = TSDB_CODE_SUCCESS}; + + SStreamTaskNodeUpdateMsg req = {0}; + + SDecoder decoder; + tDecoderInit(&decoder, (uint8_t*)msg, len); + if (tDecodeStreamTaskUpdateMsg(&decoder, &req) < 0) { + rsp.code = TSDB_CODE_MSG_DECODE_ERROR; + sndError("vgId:%d failed to decode task update msg, code:%s", vgId, tstrerror(rsp.code)); + tDecoderClear(&decoder); + return rsp.code; + } + + tDecoderClear(&decoder); + + // update the nodeEpset when it exists + streamMetaWLock(pMeta); + + // the task epset may be updated again and again, when replaying the WAL, the task may be in stop status. + STaskId id = {.streamId = req.streamId, .taskId = req.taskId}; + SStreamTask** ppTask = (SStreamTask**)taosHashGet(pMeta->pTasksMap, &id, sizeof(id)); + if (ppTask == NULL || *ppTask == NULL) { + sndError("vgId:%d failed to acquire task:0x%x when handling update, it may have been dropped already", pMeta->vgId, + req.taskId); + rsp.code = TSDB_CODE_SUCCESS; + streamMetaWUnLock(pMeta); + + taosArrayDestroy(req.pNodeList); + return rsp.code; + } + + SStreamTask* pTask = *ppTask; + + if (pMeta->updateInfo.transId != req.transId) { + pMeta->updateInfo.transId = req.transId; + sndInfo("s-task:%s receive new trans to update nodeEp msg from mnode, transId:%d", pTask->id.idStr, req.transId); + // info needs to be kept till the new trans to update the nodeEp arrived. + taosHashClear(pMeta->updateInfo.pTasks); + } else { + sndDebug("s-task:%s recv trans to update nodeEp from mnode, transId:%d", pTask->id.idStr, req.transId); + } + + STaskUpdateEntry entry = {.streamId = req.streamId, .taskId = req.taskId, .transId = req.transId}; + void* exist = taosHashGet(pMeta->updateInfo.pTasks, &entry, sizeof(STaskUpdateEntry)); + if (exist != NULL) { + sndDebug("s-task:%s (vgId:%d) already update in trans:%d, discard the nodeEp update msg", pTask->id.idStr, vgId, + req.transId); + rsp.code = TSDB_CODE_SUCCESS; + streamMetaWUnLock(pMeta); + taosArrayDestroy(req.pNodeList); + return rsp.code; + } + + streamMetaWUnLock(pMeta); + + // the following two functions should not be executed within the scope of meta lock to avoid deadlock + streamTaskUpdateEpsetInfo(pTask, req.pNodeList); + streamTaskResetStatus(pTask); + + // continue after lock the meta again + streamMetaWLock(pMeta); + + SStreamTask** ppHTask = NULL; + if (HAS_RELATED_FILLHISTORY_TASK(pTask)) { + ppHTask = (SStreamTask**)taosHashGet(pMeta->pTasksMap, &pTask->hTaskInfo.id, sizeof(pTask->hTaskInfo.id)); + if (ppHTask == NULL || *ppHTask == NULL) { + sndError("vgId:%d failed to acquire fill-history task:0x%x when handling update, it may have been dropped already", + pMeta->vgId, req.taskId); + CLEAR_RELATED_FILLHISTORY_TASK(pTask); + } else { + sndDebug("s-task:%s fill-history task update nodeEp along with stream task", (*ppHTask)->id.idStr); + streamTaskUpdateEpsetInfo(*ppHTask, req.pNodeList); + } + } + + { + streamMetaSaveTask(pMeta, pTask); + if (ppHTask != NULL) { + streamMetaSaveTask(pMeta, *ppHTask); + } + + if (streamMetaCommit(pMeta) < 0) { + // persist to disk + } + } + + streamTaskStop(pTask); + + // keep the already handled info + taosHashPut(pMeta->updateInfo.pTasks, &entry, sizeof(entry), NULL, 0); + + if (ppHTask != NULL) { + streamTaskStop(*ppHTask); + sndDebug("s-task:%s task nodeEp update completed, streamTask and related fill-history task closed", pTask->id.idStr); + taosHashPut(pMeta->updateInfo.pTasks, &(*ppHTask)->id, sizeof(pTask->id), NULL, 0); + } else { + sndDebug("s-task:%s task nodeEp update completed, streamTask closed", pTask->id.idStr); + } + + rsp.code = 0; + + // possibly only handle the stream task. + int32_t numOfTasks = streamMetaGetNumOfTasks(pMeta); + int32_t updateTasks = taosHashGetSize(pMeta->updateInfo.pTasks); + + pMeta->startInfo.tasksWillRestart = 1; + + if (updateTasks < numOfTasks) { + sndDebug("vgId:%d closed tasks:%d, unclosed:%d, all tasks will be started when nodeEp update completed", vgId, + updateTasks, (numOfTasks - updateTasks)); + streamMetaWUnLock(pMeta); + } else { + sndDebug("vgId:%d all %d task(s) nodeEp updated and closed", vgId, numOfTasks); +#if 1 + sndStartStreamTaskAsync(pSnode, true); + streamMetaWUnLock(pMeta); +#else + streamMetaWUnLock(pMeta); + + // For debug purpose. + // the following procedure consume many CPU resource, result in the re-election of leader + // with high probability. So we employ it as a test case for the stream processing framework, with + // checkpoint/restart/nodeUpdate etc. + while(1) { + int32_t startVal = atomic_val_compare_exchange_32(&pMeta->startInfo.taskStarting, 0, 1); + if (startVal == 0) { + break; + } + + tqDebug("vgId:%d in start stream tasks procedure, wait for 500ms and recheck", vgId); + taosMsleep(500); + } + + while (streamMetaTaskInTimer(pMeta)) { + tqDebug("vgId:%d some tasks in timer, wait for 100ms and recheck", pMeta->vgId); + taosMsleep(100); + } + + streamMetaWLock(pMeta); + + int32_t code = streamMetaReopen(pMeta); + if (code != 0) { + tqError("vgId:%d failed to reopen stream meta", vgId); + streamMetaWUnLock(pMeta); + taosArrayDestroy(req.pNodeList); + return -1; + } + + if (streamMetaLoadAllTasks(pTq->pStreamMeta) < 0) { + tqError("vgId:%d failed to load stream tasks", vgId); + streamMetaWUnLock(pMeta); + taosArrayDestroy(req.pNodeList); + return -1; + } + + if (vnodeIsRoleLeader(pTq->pVnode) && !tsDisableStream) { + tqInfo("vgId:%d start all stream tasks after all being updated", vgId); + tqResetStreamTaskStatus(pTq); + tqStartStreamTaskAsync(pTq, false); + } else { + tqInfo("vgId:%d, follower node not start stream tasks", vgId); + } + streamMetaWUnLock(pMeta); +#endif + } + + taosArrayDestroy(req.pNodeList); + return rsp.code; +} + int32_t sndProcessStreamMsg(SSnode *pSnode, SRpcMsg *pMsg) { switch (pMsg->msgType) { case TDMT_STREAM_TASK_RUN: @@ -443,15 +894,36 @@ int32_t sndProcessStreamMsg(SSnode *pSnode, SRpcMsg *pMsg) { case TDMT_STREAM_RETRIEVE_RSP: return sndProcessTaskRetrieveRsp(pSnode, pMsg); case TDMT_VND_STREAM_SCAN_HISTORY_FINISH: - return sndProcessStreamTaskScanHistoryFinishReq(pSnode, pMsg); + return sndProcessTaskScanHistoryFinishReq(pSnode, pMsg); case TDMT_VND_STREAM_SCAN_HISTORY_FINISH_RSP: - return sndProcessTaskRecoverFinishRsp(pSnode, pMsg); + return sndProcessTaskScanHistoryFinishRsp(pSnode, pMsg); case TDMT_VND_STREAM_TASK_CHECK: return sndProcessStreamTaskCheckReq(pSnode, pMsg); case TDMT_VND_STREAM_TASK_CHECK_RSP: return sndProcessStreamTaskCheckRsp(pSnode, pMsg); + case TDMT_STREAM_TASK_CHECKPOINT_READY: + return sndProcessTaskCheckpointReadyMsg(pSnode, pMsg); default: ASSERT(0); } return 0; } + +int32_t sndProcessWriteMsg(SSnode *pSnode, SRpcMsg *pMsg, SRpcMsg *pRsp) { + switch (pMsg->msgType) { + case TDMT_STREAM_TASK_DEPLOY: { + void *pReq = POINTER_SHIFT(pMsg->pCont, sizeof(SMsgHead)); + int32_t len = pMsg->contLen - sizeof(SMsgHead); + return sndProcessTaskDeployReq(pSnode, pReq, len); + } + + case TDMT_STREAM_TASK_DROP: + return sndProcessTaskDropReq(pSnode, pMsg->pCont, pMsg->contLen); + case TDMT_VND_STREAM_TASK_UPDATE: + sndProcessTaskUpdateReq(pSnode, pMsg); + break; + default: + ASSERT(0); + } + return 0; +} \ No newline at end of file diff --git a/source/dnode/vnode/CMakeLists.txt b/source/dnode/vnode/CMakeLists.txt index dc43da7fe7..635c15aa41 100644 --- a/source/dnode/vnode/CMakeLists.txt +++ b/source/dnode/vnode/CMakeLists.txt @@ -138,6 +138,11 @@ else() endif() endif() +target_include_directories( + vnode + PUBLIC "${TD_SOURCE_DIR}/include/dnode/vnode" +) + target_link_libraries( vnode PUBLIC os diff --git a/source/dnode/vnode/src/inc/tq.h b/source/dnode/vnode/src/inc/tq.h index fdd449bf36..b3f8317add 100644 --- a/source/dnode/vnode/src/inc/tq.h +++ b/source/dnode/vnode/src/inc/tq.h @@ -43,9 +43,6 @@ extern "C" { typedef struct STqOffsetStore STqOffsetStore; -#define STREAM_EXEC_EXTRACT_DATA_IN_WAL_ID (-1) -#define STREAM_EXEC_START_ALL_TASKS_ID (-2) -#define STREAM_EXEC_RESTART_ALL_TASKS_ID (-3) #define IS_OFFSET_RESET_TYPE(_t) ((_t) < 0) // tqExec diff --git a/source/dnode/vnode/src/tq/tq.c b/source/dnode/vnode/src/tq/tq.c index 6dbeaef6cb..4ad40b27f2 100644 --- a/source/dnode/vnode/src/tq/tq.c +++ b/source/dnode/vnode/src/tq/tq.c @@ -15,17 +15,12 @@ #include "tq.h" #include "vnd.h" +#include "stream.h" typedef struct { int8_t inited; } STqMgmt; -typedef struct STaskUpdateEntry { - int64_t streamId; - int32_t taskId; - int32_t transId; -} STaskUpdateEntry; - static STqMgmt tqMgmt = {0}; // 0: not init @@ -928,12 +923,12 @@ int32_t tqProcessTaskCheckReq(STQ* pTq, SRpcMsg* pMsg) { } else { SStreamTask* pTask = streamMetaAcquireTask(pMeta, req.streamId, taskId); if (pTask != NULL) { - rsp.status = streamTaskCheckStatus(pTask, req.upstreamTaskId, req.upstreamNodeId, req.stage); + rsp.status = streamTaskCheckStatus(pTask, req.upstreamTaskId, req.upstreamNodeId, req.stage, &rsp.oldStage); streamMetaReleaseTask(pMeta, pTask); char* p = NULL; streamTaskGetStatus(pTask, &p); - tqDebug("s-task:%s status:%s, stage:%d recv task check req(reqId:0x%" PRIx64 ") task:0x%x (vgId:%d), check_status:%d", + tqDebug("s-task:%s status:%s, stage:%"PRId64" recv task check req(reqId:0x%" PRIx64 ") task:0x%x (vgId:%d), check_status:%d", pTask->id.idStr, p, rsp.oldStage, rsp.reqId, rsp.upstreamTaskId, rsp.upstreamNodeId, rsp.status); } else { rsp.status = TASK_DOWNSTREAM_NOT_READY; @@ -1136,16 +1131,10 @@ int32_t tqProcessTaskScanHistory(STQ* pTq, SRpcMsg* pMsg) { // let's decide which step should be executed now if (pTask->execInfo.step1Start == 0) { - ASSERT(pTask->status.pauseAllowed == false); int64_t ts = taosGetTimestampMs(); pTask->execInfo.step1Start = ts; tqDebug("s-task:%s start scan-history stage(step 1), status:%s, step1 startTs:%" PRId64, id, pStatus, ts); - - // NOTE: in case of stream task, scan-history data in wal is not allowed to pause - if (pTask->info.fillHistory == 1) { - streamTaskEnablePause(pTask); - } } else { if (pTask->execInfo.step2Start == 0) { tqDebug("s-task:%s continue exec scan-history(step1), original step1 startTs:%" PRId64 ", already elapsed:%.2fs", @@ -1367,6 +1356,7 @@ int32_t tqProcessTaskDispatchReq(STQ* pTq, SRpcMsg* pMsg, bool exec) { if (pTask) { SRpcMsg rsp = {.info = pMsg->info, .code = 0}; streamProcessDispatchMsg(pTask, &req, &rsp); + tDeleteStreamDispatchReq(&req); streamMetaReleaseTask(pTq->pStreamMeta, pTask); return 0; } else { @@ -1605,6 +1595,7 @@ int32_t vnodeEnqueueStreamMsg(SVnode* pVnode, SRpcMsg* pMsg) { if (pTask != NULL) { SRpcMsg rsp = {.info = pMsg->info, .code = 0}; streamProcessDispatchMsg(pTask, &req, &rsp); + tDeleteStreamDispatchReq(&req); streamMetaReleaseTask(pTq->pStreamMeta, pTask); rpcFreeCont(pMsg->pCont); taosFreeQitem(pMsg); diff --git a/source/dnode/vnode/src/tq/tqStreamTask.c b/source/dnode/vnode/src/tq/tqStreamTask.c index 4c0491da86..56dcdb2abc 100644 --- a/source/dnode/vnode/src/tq/tqStreamTask.c +++ b/source/dnode/vnode/src/tq/tqStreamTask.c @@ -15,6 +15,7 @@ #include "tq.h" #include "vnd.h" +#include "stream.h" #define MAX_REPEAT_SCAN_THRESHOLD 3 #define SCAN_WAL_IDLE_DURATION 100 @@ -212,8 +213,10 @@ int32_t tqScanWalAsync(STQ* pTq, bool ckPause) { int32_t vgId = TD_VID(pTq->pVnode); SStreamMeta* pMeta = pTq->pStreamMeta; + bool alreadyRestored = pTq->pVnode->restored; + // do not launch the stream tasks, if it is a follower or not restored vnode. - if (!(vnodeIsRoleLeader(pTq->pVnode) && pTq->pVnode->restored)) { + if (!(vnodeIsRoleLeader(pTq->pVnode) && alreadyRestored)) { return TSDB_CODE_SUCCESS; } @@ -255,7 +258,9 @@ int32_t tqScanWalAsync(STQ* pTq, bool ckPause) { return -1; } - tqDebug("vgId:%d create msg to start wal scan to launch stream tasks, numOfTasks:%d", vgId, numOfTasks); + tqDebug("vgId:%d create msg to start wal scan to launch stream tasks, numOfTasks:%d, restored:%d", vgId, numOfTasks, + alreadyRestored); + pRunReq->head.vgId = vgId; pRunReq->streamId = 0; pRunReq->taskId = STREAM_EXEC_EXTRACT_DATA_IN_WAL_ID; diff --git a/source/dnode/vnode/src/tsdb/tsdbFS.c b/source/dnode/vnode/src/tsdb/tsdbFS.c index c0c74d6b87..732f46467e 100644 --- a/source/dnode/vnode/src/tsdb/tsdbFS.c +++ b/source/dnode/vnode/src/tsdb/tsdbFS.c @@ -104,7 +104,7 @@ static int32_t tsdbSaveFSToFile(STsdbFS *pFS, const char *fname) { taosCalcChecksumAppend(0, pData, size); // save to file - TdFilePtr pFD = taosOpenFile(fname, TD_FILE_WRITE | TD_FILE_CREATE | TD_FILE_TRUNC); + TdFilePtr pFD = taosOpenFile(fname, TD_FILE_WRITE | TD_FILE_CREATE | TD_FILE_TRUNC | TD_FILE_WRITE_THROUGH); if (pFD == NULL) { code = TAOS_SYSTEM_ERROR(errno); TSDB_CHECK_CODE(code, lino, _exit); diff --git a/source/dnode/vnode/src/tsdb/tsdbFS2.c b/source/dnode/vnode/src/tsdb/tsdbFS2.c index 70a83ebdbb..c9ecff8890 100644 --- a/source/dnode/vnode/src/tsdb/tsdbFS2.c +++ b/source/dnode/vnode/src/tsdb/tsdbFS2.c @@ -85,7 +85,7 @@ static int32_t save_json(const cJSON *json, const char *fname) { char *data = cJSON_PrintUnformatted(json); if (data == NULL) return TSDB_CODE_OUT_OF_MEMORY; - TdFilePtr fp = taosOpenFile(fname, TD_FILE_WRITE | TD_FILE_CREATE | TD_FILE_TRUNC); + TdFilePtr fp = taosOpenFile(fname, TD_FILE_WRITE | TD_FILE_CREATE | TD_FILE_TRUNC | TD_FILE_WRITE_THROUGH); if (fp == NULL) { code = TAOS_SYSTEM_ERROR(code); goto _exit; @@ -1186,11 +1186,6 @@ const char *gFSBgTaskName[] = {NULL, "MERGE", "RETENTION", "COMPACT"}; static int32_t tsdbFSRunBgTask(void *arg) { STFSBgTask *task = (STFSBgTask *)arg; STFileSystem *fs = task->fs; - STFileSet *fset; - - tsdbFSGetFSet(fs, task->fid, &fset); - - ASSERT(fset != NULL && fset->bgTaskRunning == task); task->launchTime = taosGetTimestampMs(); task->run(task->arg); @@ -1203,6 +1198,10 @@ static int32_t tsdbFSRunBgTask(void *arg) { taosThreadMutexLock(&fs->tsdb->mutex); + STFileSet *fset = NULL; + tsdbFSGetFSet(fs, task->fid, &fset); + ASSERT(fset != NULL && fset->bgTaskRunning == task); + // free last tsdbDoDoneBgTask(fs, task); fset->bgTaskRunning = NULL; diff --git a/source/dnode/vnode/src/tsdb/tsdbFile.c b/source/dnode/vnode/src/tsdb/tsdbFile.c index 62b37cd0a6..3ee0c482a7 100644 --- a/source/dnode/vnode/src/tsdb/tsdbFile.c +++ b/source/dnode/vnode/src/tsdb/tsdbFile.c @@ -177,7 +177,7 @@ int32_t tsdbDFileRollback(STsdb *pTsdb, SDFileSet *pSet, EDataFileT ftype) { taosCalcChecksumAppend(0, hdr, TSDB_FHDR_SIZE); // open - pFD = taosOpenFile(fname, TD_FILE_WRITE); + pFD = taosOpenFile(fname, TD_FILE_WRITE | TD_FILE_WRITE_THROUGH); if (pFD == NULL) { code = TAOS_SYSTEM_ERROR(errno); goto _err; diff --git a/source/dnode/vnode/src/tsdb/tsdbRead2.c b/source/dnode/vnode/src/tsdb/tsdbRead2.c index 6169014d9f..751df706ab 100644 --- a/source/dnode/vnode/src/tsdb/tsdbRead2.c +++ b/source/dnode/vnode/src/tsdb/tsdbRead2.c @@ -572,7 +572,12 @@ static int32_t doLoadFileBlock(STsdbReader* pReader, SArray* pIndexList, SBlockN if (isEmptyQueryTimeWindow(&w)) { k += 1; - continue; + + if (k >= numOfTables) { + break; + } else { + continue; + } } // 1. time range check diff --git a/source/dnode/vnode/src/vnd/vnodeCommit.c b/source/dnode/vnode/src/vnd/vnodeCommit.c index 50ca2f5d03..f9b5e9168c 100644 --- a/source/dnode/vnode/src/vnd/vnodeCommit.c +++ b/source/dnode/vnode/src/vnd/vnodeCommit.c @@ -177,7 +177,7 @@ int vnodeSaveInfo(const char *dir, const SVnodeInfo *pInfo) { } // save info to a vnode_tmp.json - pFile = taosOpenFile(fname, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_TRUNC); + pFile = taosOpenFile(fname, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_TRUNC | TD_FILE_WRITE_THROUGH); if (pFile == NULL) { vError("failed to open info file:%s for write:%s", fname, terrstr()); terrno = TAOS_SYSTEM_ERROR(errno); diff --git a/source/libs/executor/inc/executorInt.h b/source/libs/executor/inc/executorInt.h index cf717472b1..e29583d8fc 100644 --- a/source/libs/executor/inc/executorInt.h +++ b/source/libs/executor/inc/executorInt.h @@ -293,6 +293,9 @@ typedef struct STableMergeScanInfo { int32_t readIdx; SSDataBlock* pResBlock; SSampleExecInfo sample; // sample execution info + SSHashObj* mTableNumRows; // uid->num of table rows + SHashObj* mSkipTables; + int64_t mergeLimit; SSortExecInfo sortExecInfo; } STableMergeScanInfo; diff --git a/source/libs/executor/src/scanoperator.c b/source/libs/executor/src/scanoperator.c index 1c3db48972..28832ffec8 100644 --- a/source/libs/executor/src/scanoperator.c +++ b/source/libs/executor/src/scanoperator.c @@ -3200,6 +3200,27 @@ _error: return NULL; } +static int32_t tableMergeScanDoSkipTable(STableMergeScanInfo* pInfo, SSDataBlock* pBlock) { + int64_t nRows = 0; + void* pNum = tSimpleHashGet(pInfo->mTableNumRows, &pBlock->info.id.uid, sizeof(pBlock->info.id.uid)); + if (pNum == NULL) { + nRows = pBlock->info.rows; + tSimpleHashPut(pInfo->mTableNumRows, &pBlock->info.id.uid, sizeof(pBlock->info.id.uid), &nRows, sizeof(nRows)); + } else { + *(int64_t*)pNum = *(int64_t*)pNum + pBlock->info.rows; + } + + if (nRows >= pInfo->mergeLimit) { + if (pInfo->mSkipTables == NULL) { + pInfo->mSkipTables = taosHashInit(pInfo->tableEndIndex - pInfo->tableStartIndex + 1, + taosGetDefaultHashFunction(TSDB_DATA_TYPE_UBIGINT), false, HASH_NO_LOCK); + } + int bSkip = 1; + taosHashPut(pInfo->mSkipTables, &pBlock->info.id.uid, sizeof(pBlock->info.id.uid), &bSkip, sizeof(bSkip)); + } + return TSDB_CODE_SUCCESS; +} + static SSDataBlock* getBlockForTableMergeScan(void* param) { STableMergeScanSortSourceParam* source = param; SOperatorInfo* pOperator = source->pOperator; @@ -3257,6 +3278,10 @@ static SSDataBlock* getBlockForTableMergeScan(void* param) { pBlock->info.id.groupId = tableListGetTableGroupId(pInfo->base.pTableListInfo, pBlock->info.id.uid); + if (pInfo->mergeLimit != -1) { + tableMergeScanDoSkipTable(pInfo, pBlock); + } + pOperator->resultInfo.totalRows += pBlock->info.rows; pInfo->base.readRecorder.elapsedTime += (taosGetTimestampUs() - st) / 1000.0; @@ -3316,22 +3341,20 @@ int32_t startGroupTableMergeScan(SOperatorInfo* pOperator) { int32_t tableStartIdx = pInfo->tableStartIndex; int32_t tableEndIdx = pInfo->tableEndIndex; - bool hasLimit = pInfo->limitInfo.limit.limit != -1 || pInfo->limitInfo.limit.offset != -1; - int64_t mergeLimit = -1; - if (hasLimit) { - mergeLimit = pInfo->limitInfo.limit.limit + pInfo->limitInfo.limit.offset; - } + tSimpleHashClear(pInfo->mTableNumRows); + size_t szRow = blockDataGetRowSize(pInfo->pResBlock); - if (hasLimit) { - pInfo->pSortHandle = tsortCreateSortHandle(pInfo->pSortInfo, SORT_SINGLESOURCE_SORT, -1, -1, - NULL, pTaskInfo->id.str, mergeLimit, szRow+8, tsPQSortMemThreshold * 1024* 1024); - } else { +// if (pInfo->mergeLimit != -1) { +// pInfo->pSortHandle = tsortCreateSortHandle(pInfo->pSortInfo, SORT_SINGLESOURCE_SORT, -1, -1, +// NULL, pTaskInfo->id.str, pInfo->mergeLimit, szRow+8, tsPQSortMemThreshold * 1024* 1024); +// } else + { pInfo->sortBufSize = 2048 * pInfo->bufPageSize; int32_t numOfBufPage = pInfo->sortBufSize / pInfo->bufPageSize; pInfo->pSortHandle = tsortCreateSortHandle(pInfo->pSortInfo, SORT_BLOCK_TS_MERGE, pInfo->bufPageSize, numOfBufPage, pInfo->pSortInputBlock, pTaskInfo->id.str, 0, 0, 0); - tsortSetMergeLimit(pInfo->pSortHandle, mergeLimit); + tsortSetMergeLimit(pInfo->pSortHandle, pInfo->mergeLimit); tsortSetAbortCheckFn(pInfo->pSortHandle, isTaskKilled, pOperator->pTaskInfo); } @@ -3343,7 +3366,8 @@ int32_t startGroupTableMergeScan(SOperatorInfo* pOperator) { STableMergeScanSortSourceParam *param = taosMemoryCalloc(1, sizeof(STableMergeScanSortSourceParam)); param->pOperator = pOperator; STableKeyInfo* startKeyInfo = tableListGetInfo(pInfo->base.pTableListInfo, tableStartIdx); - pAPI->tsdReader.tsdReaderOpen(pHandle->vnode, &pInfo->base.cond, startKeyInfo, numOfTable, pInfo->pReaderBlock, (void**)&pInfo->base.dataReader, GET_TASKID(pTaskInfo), false, NULL); + pAPI->tsdReader.tsdReaderOpen(pHandle->vnode, &pInfo->base.cond, startKeyInfo, numOfTable, pInfo->pReaderBlock, + (void**)&pInfo->base.dataReader, GET_TASKID(pTaskInfo), false, &pInfo->mSkipTables); SSortSource* ps = taosMemoryCalloc(1, sizeof(SSortSource)); ps->param = param; @@ -3385,6 +3409,8 @@ int32_t stopGroupTableMergeScan(SOperatorInfo* pOperator) { pInfo->pSortHandle = NULL; resetLimitInfoForNextGroup(&pInfo->limitInfo); + taosHashCleanup(pInfo->mSkipTables); + pInfo->mSkipTables = NULL; return TSDB_CODE_SUCCESS; } @@ -3493,7 +3519,10 @@ void destroyTableMergeScanOperatorInfo(void* param) { taosArrayDestroy(pTableScanInfo->sortSourceParams); tsortDestroySortHandle(pTableScanInfo->pSortHandle); pTableScanInfo->pSortHandle = NULL; - + tSimpleHashCleanup(pTableScanInfo->mTableNumRows); + pTableScanInfo->mTableNumRows = NULL; + taosHashCleanup(pTableScanInfo->mSkipTables); + pTableScanInfo->mSkipTables = NULL; destroyTableScanBase(&pTableScanInfo->base, &pTableScanInfo->base.readerAPI); pTableScanInfo->pResBlock = blockDataDestroy(pTableScanInfo->pResBlock); @@ -3583,7 +3612,14 @@ SOperatorInfo* createTableMergeScanOperatorInfo(STableScanPhysiNode* pTableScanN pInfo->pSortInfo = generateSortByTsInfo(pInfo->base.matchInfo.pList, pInfo->base.cond.order); pInfo->pSortInputBlock = createOneDataBlock(pInfo->pResBlock, false); initLimitInfo(pTableScanNode->scan.node.pLimit, pTableScanNode->scan.node.pSlimit, &pInfo->limitInfo); - + pInfo->mTableNumRows = tSimpleHashInit(1024, + taosGetDefaultHashFunction(TSDB_DATA_TYPE_UBIGINT)); + pInfo->mergeLimit = -1; + bool hasLimit = pInfo->limitInfo.limit.limit != -1 || pInfo->limitInfo.limit.offset != -1; + if (hasLimit) { + pInfo->mergeLimit = pInfo->limitInfo.limit.limit + pInfo->limitInfo.limit.offset; + pInfo->mSkipTables = NULL; + } pInfo->pReaderBlock = createOneDataBlock(pInfo->pResBlock, false); int32_t rowSize = pInfo->pResBlock->info.rowSize; diff --git a/source/libs/index/src/indexFilter.c b/source/libs/index/src/indexFilter.c index bfdcd2b030..7ed36fbf9e 100644 --- a/source/libs/index/src/indexFilter.c +++ b/source/libs/index/src/indexFilter.c @@ -328,6 +328,7 @@ static int32_t sifInitParam(SNode *node, SIFParam *param, SIFCtx *ctx) { SIF_ERR_RET(scalarGenerateSetFromList((void **)¶m->pFilter, node, nl->node.resType.type)); if (taosHashPut(ctx->pRes, &node, POINTER_BYTES, param, sizeof(*param))) { taosHashCleanup(param->pFilter); + param->pFilter = NULL; indexError("taosHashPut nodeList failed, size:%d", (int32_t)sizeof(*param)); SIF_ERR_RET(TSDB_CODE_OUT_OF_MEMORY); } diff --git a/source/libs/parser/src/parTranslater.c b/source/libs/parser/src/parTranslater.c index 4996063578..496e3423e6 100644 --- a/source/libs/parser/src/parTranslater.c +++ b/source/libs/parser/src/parTranslater.c @@ -1447,14 +1447,18 @@ static int32_t dataTypeComp(const SDataType* l, const SDataType* r) { static EDealRes translateOperator(STranslateContext* pCxt, SOperatorNode* pOp) { if (isMultiResFunc(pOp->pLeft)) { - return generateDealNodeErrMsg(pCxt, TSDB_CODE_PAR_WRONG_VALUE_TYPE, ((SExprNode*)(pOp->pLeft))->aliasName); + generateDealNodeErrMsg(pCxt, TSDB_CODE_PAR_WRONG_VALUE_TYPE, ((SExprNode*)(pOp->pLeft))->aliasName); + return DEAL_RES_ERROR; } if (isMultiResFunc(pOp->pRight)) { - return generateDealNodeErrMsg(pCxt, TSDB_CODE_PAR_WRONG_VALUE_TYPE, ((SExprNode*)(pOp->pRight))->aliasName); + generateDealNodeErrMsg(pCxt, TSDB_CODE_PAR_WRONG_VALUE_TYPE, ((SExprNode*)(pOp->pRight))->aliasName); + return DEAL_RES_ERROR; } - if (TSDB_CODE_SUCCESS != scalarGetOperatorResultType(pOp)) { - return generateDealNodeErrMsg(pCxt, TSDB_CODE_PAR_WRONG_VALUE_TYPE, pOp->node.aliasName); + int32_t res = scalarGetOperatorResultType(pOp); + if (TSDB_CODE_SUCCESS != res) { + pCxt->errCode = res; + return DEAL_RES_ERROR; } return DEAL_RES_CONTINUE; @@ -7972,7 +7976,7 @@ static int32_t translateDropStream(STranslateContext* pCxt, SDropStreamStmt* pSt tNameGetFullDbName(&name, dropReq.name); dropReq.igNotExists = pStmt->ignoreNotExists; int32_t code = buildCmdMsg(pCxt, TDMT_MND_DROP_STREAM, (FSerializeFunc)tSerializeSMDropStreamReq, &dropReq); - tFreeSMDropStreamReq(&dropReq); + tFreeMDropStreamReq(&dropReq); return code; } diff --git a/source/libs/scalar/src/scalar.c b/source/libs/scalar/src/scalar.c index 3e003234cf..90cec4522f 100644 --- a/source/libs/scalar/src/scalar.c +++ b/source/libs/scalar/src/scalar.c @@ -208,6 +208,7 @@ void sclFreeParam(SScalarParam *param) { if (param->columnData != NULL) { colDataDestroy(param->columnData); taosMemoryFreeClear(param->columnData); + param->columnData = NULL; } if (param->pHashFilter != NULL) { @@ -845,6 +846,7 @@ int32_t sclExecOperator(SOperatorNode *node, SScalarCtx *ctx, SScalarParam *outp SScalarParam *params = NULL; int32_t rowNum = 0; int32_t code = 0; + int32_t paramNum = 0; // json not support in in operator if (nodeType(node->pLeft) == QUERY_NODE_VALUE) { @@ -865,7 +867,7 @@ int32_t sclExecOperator(SOperatorNode *node, SScalarCtx *ctx, SScalarParam *outp _bin_scalar_fn_t OperatorFn = getBinScalarOperatorFn(node->opType); - int32_t paramNum = scalarGetOperatorParamNum(node->opType); + paramNum = scalarGetOperatorParamNum(node->opType); SScalarParam *pLeft = ¶ms[0]; SScalarParam *pRight = paramNum > 1 ? ¶ms[1] : NULL; diff --git a/source/libs/stream/inc/streamInt.h b/source/libs/stream/inc/streamInt.h index b76a967d0d..6dd1e5c1c3 100644 --- a/source/libs/stream/inc/streamInt.h +++ b/source/libs/stream/inc/streamInt.h @@ -75,7 +75,8 @@ struct STokenBucket { double quotaCapacity; // available capacity for maximum input size, KiloBytes per Second double quotaRemain; // not consumed bytes per second double quotaRate; // number of token per second - int64_t fillTimestamp; // fill timestamp + int64_t tokenFillTimestamp; // fill timestamp + int64_t quotaFillTimestamp; // fill timestamp }; struct SStreamQueue { diff --git a/source/libs/stream/src/stream.c b/source/libs/stream/src/stream.c index 34b4677235..ab7951bb92 100644 --- a/source/libs/stream/src/stream.c +++ b/source/libs/stream/src/stream.c @@ -283,7 +283,6 @@ int32_t streamProcessDispatchMsg(SStreamTask* pTask, SStreamDispatchReq* pReq, S tmsgSendRsp(pRsp); } - tDeleteStreamDispatchReq(pReq); streamSchedExec(pTask); return 0; diff --git a/source/libs/stream/src/streamBackendRocksdb.c b/source/libs/stream/src/streamBackendRocksdb.c index c23483fffb..9699386fd4 100644 --- a/source/libs/stream/src/streamBackendRocksdb.c +++ b/source/libs/stream/src/streamBackendRocksdb.c @@ -519,6 +519,7 @@ void* streamBackendInit(const char* streamPath, int64_t chkpId, int32_t vgId) { if (err != NULL) { stError("failed to open rocksdb, path:%s, reason:%s", backendPath, err); taosMemoryFreeClear(err); + rocksdb_list_column_families_destroy(cfs, nCf); goto _EXIT; } } else { diff --git a/source/libs/stream/src/streamCheckpoint.c b/source/libs/stream/src/streamCheckpoint.c index 6201329b95..031bb812de 100644 --- a/source/libs/stream/src/streamCheckpoint.c +++ b/source/libs/stream/src/streamCheckpoint.c @@ -158,6 +158,7 @@ static int32_t continueDispatchCheckpointBlock(SStreamDataBlock* pBlock, SStream int32_t code = taosWriteQitem(pTask->outputq.queue->pQueue, pBlock); if (code == 0) { + ASSERT(pTask->chkInfo.dispatchCheckpointTrigger == false); streamDispatchStreamBlock(pTask); } else { stError("s-task:%s failed to put checkpoint into outputQ, code:%s", pTask->id.idStr, tstrerror(code)); @@ -278,6 +279,7 @@ void streamTaskClearCheckInfo(SStreamTask* pTask) { pTask->chkInfo.startTs = 0; // clear the recorded start time pTask->checkpointNotReadyTasks = 0; pTask->checkpointAlignCnt = 0; + pTask->chkInfo.dispatchCheckpointTrigger = false; streamTaskOpenAllUpstreamInput(pTask); // open inputQ for all upstream tasks } diff --git a/source/libs/stream/src/streamDispatch.c b/source/libs/stream/src/streamDispatch.c index 42280b0d0f..bcf7c8dd27 100644 --- a/source/libs/stream/src/streamDispatch.c +++ b/source/libs/stream/src/streamDispatch.c @@ -371,7 +371,7 @@ static int32_t doBuildDispatchMsg(SStreamTask* pTask, const SStreamDataBlock* pD pTask->msgInfo.pData = pReqs; } - stDebug("s-task:%s build dispatch msg success, msgId:%d", pTask->id.idStr, pTask->execInfo.dispatch); + stDebug("s-task:%s build dispatch msg success, msgId:%d, stage:%" PRId64, pTask->id.idStr, pTask->execInfo.dispatch, pTask->pMeta->stage); return code; } @@ -593,6 +593,12 @@ int32_t streamDispatchStreamBlock(SStreamTask* pTask) { return 0; } + if (pTask->chkInfo.dispatchCheckpointTrigger) { + stDebug("s-task:%s already send checkpoint trigger, not dispatch anymore", id); + atomic_store_8(&pTask->outputq.status, TASK_OUTPUT_STATUS__NORMAL); + return 0; + } + ASSERT(pTask->msgInfo.pData == NULL); stDebug("s-task:%s start to dispatch msg, set output status:%d", id, pTask->outputq.status); @@ -926,8 +932,8 @@ int32_t streamAddCheckpointReadyMsg(SStreamTask* pTask, int32_t upstreamTaskId, initRpcMsg(&info.msg, TDMT_STREAM_TASK_CHECKPOINT_READY, buf, tlen + sizeof(SMsgHead)); info.msg.info.noResp = 1; // refactor later. - stDebug("s-task:%s (level:%d) prepare checkpoint ready msg to upstream s-task:0x%" PRIx64 ":0x%x (vgId:%d) idx:%d", - pTask->id.idStr, pTask->info.taskLevel, req.streamId, req.upstreamTaskId, req.upstreamNodeId, index); + stDebug("s-task:%s (level:%d) prepare checkpoint ready msg to upstream s-task:0x%" PRIx64 ":0x%x (vgId:%d) idx:%d, vgId:%d", + pTask->id.idStr, pTask->info.taskLevel, req.streamId, req.upstreamTaskId, req.upstreamNodeId, index, req.upstreamNodeId); if (pTask->pReadyMsgList == NULL) { pTask->pReadyMsgList = taosArrayInit(4, sizeof(SStreamChkptReadyInfo)); @@ -1039,30 +1045,14 @@ int32_t streamNotifyUpstreamContinue(SStreamTask* pTask) { return 0; } -static void dispatchDataInFuture(void* param, void* tmrId) { - SStreamTask* pTask = param; - if (streamTaskShouldStop(pTask)) { - int32_t ref = atomic_sub_fetch_32(&pTask->status.timerActive, 1); - stDebug("s-task:%s should stop, abort from timer, ref:%d", pTask->id.idStr, ref); - return; - } - - ETaskStatus status = streamTaskGetStatus(pTask, NULL); - if (status == TASK_STATUS__CK) { - stDebug("s-task:%s in checkpoint status, wait for 500ms to dispatch data downstream", pTask->id.idStr); - taosTmrReset(dispatchDataInFuture, 500, pTask, streamEnv.timer, &pTask->msgInfo.pTimer); - } else { - int32_t ref = atomic_sub_fetch_32(&pTask->status.timerActive, 1); - stDebug("s-task:%s start to dispatch data, jump out of timer, ref:%d", pTask->id.idStr, ref); - streamDispatchStreamBlock(pTask); - } -} - // this message has been sent successfully, let's try next one. static int32_t handleDispatchSuccessRsp(SStreamTask* pTask, int32_t downstreamId) { destroyDispatchMsg(pTask->msgInfo.pData, getNumOfDispatchBranch(pTask)); bool delayDispatch = (pTask->msgInfo.dispatchMsgType == STREAM_INPUT__CHECKPOINT_TRIGGER); + if (delayDispatch) { + pTask->chkInfo.dispatchCheckpointTrigger = true; + } pTask->msgInfo.pData = NULL; pTask->msgInfo.dispatchMsgType = 0; @@ -1083,13 +1073,7 @@ static int32_t handleDispatchSuccessRsp(SStreamTask* pTask, int32_t downstreamId // otherwise, continue dispatch the first block to down stream task in pipeline if (delayDispatch) { - int32_t ref = atomic_add_fetch_32(&pTask->status.timerActive, 1); - stDebug("s-task:%s in checkpoint status, add in timer, try dispatch data in 500ms, ref:%d", pTask->id.idStr, ref); - if (pTask->msgInfo.pTimer != NULL) { - taosTmrReset(dispatchDataInFuture, 500, pTask, streamEnv.timer, &pTask->msgInfo.pTimer); - } else { - pTask->msgInfo.pTimer = taosTmrStart(dispatchDataInFuture, 500, pTask, streamEnv.timer); - } + return 0; } else { streamDispatchStreamBlock(pTask); } @@ -1102,6 +1086,13 @@ int32_t streamProcessDispatchRsp(SStreamTask* pTask, SStreamDispatchRsp* pRsp, i int32_t vgId = pTask->pMeta->vgId; int32_t msgId = pTask->execInfo.dispatch; +#if 0 + // for test purpose, build the failure case + if (pTask->msgInfo.dispatchMsgType == STREAM_INPUT__CHECKPOINT_TRIGGER) { + pRsp->inputStatus = TASK_INPUT_STATUS__REFUSED; + } +#endif + // follower not handle the dispatch rsp if ((pTask->pMeta->role == NODE_ROLE_FOLLOWER) || (pTask->status.downstreamReady != 1)) { stError("s-task:%s vgId:%d is follower or task just re-launched, not handle the dispatch rsp, discard it", id, vgId); @@ -1143,8 +1134,21 @@ int32_t streamProcessDispatchRsp(SStreamTask* pTask, SStreamDispatchRsp* pRsp, i stWarn("s-task:%s inputQ of downstream task:0x%x(vgId:%d) is full, wait for %dms and retry dispatch", id, pRsp->downstreamTaskId, pRsp->downstreamNodeId, DISPATCH_RETRY_INTERVAL_MS); } else if (pRsp->inputStatus == TASK_INPUT_STATUS__REFUSED) { - stError("s-task:%s downstream task:0x%x(vgId:%d) refused the dispatch msg, treat it as success", id, - pRsp->downstreamTaskId, pRsp->downstreamNodeId); + // todo handle the agg task failure, add test case + if (pTask->msgInfo.dispatchMsgType == STREAM_INPUT__CHECKPOINT_TRIGGER && + pTask->info.taskLevel == TASK_LEVEL__SOURCE) { + stError("s-task:%s failed to dispatch checkpoint-trigger msg, checkpointId:%" PRId64 + ", set the current checkpoint failed, and send rsp to mnode", + id, pTask->checkpointingId); + { // send checkpoint failure msg to mnode directly + pTask->chkInfo.failedId = pTask->checkpointingId; // record the latest failed checkpoint id + pTask->checkpointingId = pTask->checkpointingId; + streamTaskSendCheckpointSourceRsp(pTask); + } + } else { + stError("s-task:%s downstream task:0x%x(vgId:%d) refused the dispatch msg, treat it as success", id, + pRsp->downstreamTaskId, pRsp->downstreamNodeId); + } } } diff --git a/source/libs/stream/src/streamExec.c b/source/libs/stream/src/streamExec.c index cae537a860..43875319b7 100644 --- a/source/libs/stream/src/streamExec.c +++ b/source/libs/stream/src/streamExec.c @@ -48,6 +48,7 @@ static int32_t doOutputResultBlockImpl(SStreamTask* pTask, SStreamDataBlock* pBl return code; } + // checkpoint trigger will be checked streamDispatchStreamBlock(pTask); } diff --git a/source/libs/stream/src/streamMeta.c b/source/libs/stream/src/streamMeta.c index ae8c92d48e..00b292a69a 100644 --- a/source/libs/stream/src/streamMeta.c +++ b/source/libs/stream/src/streamMeta.c @@ -174,6 +174,7 @@ SStreamMeta* streamMetaOpen(const char* path, void* ahandle, FTaskExpand expandF pMeta->ahandle = ahandle; pMeta->expandFunc = expandFunc; pMeta->stage = stage; + pMeta->role = (vgId == SNODE_HANDLE) ? NODE_ROLE_LEADER : NODE_ROLE_UNINIT; // send heartbeat every 5sec. pMeta->rid = taosAddRef(streamMetaId, pMeta); @@ -204,7 +205,6 @@ SStreamMeta* streamMetaOpen(const char* path, void* ahandle, FTaskExpand expandF } pMeta->streamBackendRid = taosAddRef(streamBackendId, pMeta->streamBackend); - pMeta->role = NODE_ROLE_UNINIT; code = streamBackendLoadCheckpointInfo(pMeta); taosInitRWLatch(&pMeta->lock); @@ -784,7 +784,7 @@ int32_t tEncodeStreamHbMsg(SEncoder* pEncoder, const SStreamHbMsg* pReq) { if (tEncodeI64(pEncoder, ps->id.streamId) < 0) return -1; if (tEncodeI32(pEncoder, ps->id.taskId) < 0) return -1; if (tEncodeI32(pEncoder, ps->status) < 0) return -1; - if (tEncodeI32(pEncoder, ps->stage) < 0) return -1; + if (tEncodeI64(pEncoder, ps->stage) < 0) return -1; if (tEncodeI32(pEncoder, ps->nodeId) < 0) return -1; if (tEncodeDouble(pEncoder, ps->inputQUsed) < 0) return -1; if (tEncodeDouble(pEncoder, ps->inputRate) < 0) return -1; @@ -822,7 +822,7 @@ int32_t tDecodeStreamHbMsg(SDecoder* pDecoder, SStreamHbMsg* pReq) { if (tDecodeI64(pDecoder, &entry.id.streamId) < 0) return -1; if (tDecodeI32(pDecoder, &taskId) < 0) return -1; if (tDecodeI32(pDecoder, &entry.status) < 0) return -1; - if (tDecodeI32(pDecoder, &entry.stage) < 0) return -1; + if (tDecodeI64(pDecoder, &entry.stage) < 0) return -1; if (tDecodeI32(pDecoder, &entry.nodeId) < 0) return -1; if (tDecodeDouble(pDecoder, &entry.inputQUsed) < 0) return -1; if (tDecodeDouble(pDecoder, &entry.inputRate) < 0) return -1; @@ -938,7 +938,7 @@ void metaHbToMnode(void* param, void* tmrId) { SStreamHbMsg hbMsg = {0}; SEpSet epset = {0}; bool hasMnodeEpset = false; - int32_t stage = 0; + int64_t stage = 0; streamMetaRLock(pMeta); @@ -1117,11 +1117,6 @@ void streamMetaStartHb(SStreamMeta* pMeta) { metaHbToMnode(pRid, NULL); } -void streamMetaInitForSnode(SStreamMeta* pMeta) { - pMeta->stage = 0; - pMeta->role = NODE_ROLE_LEADER; -} - void streamMetaResetStartInfo(STaskStartInfo* pStartInfo) { taosHashClear(pStartInfo->pReadyTaskSet); taosHashClear(pStartInfo->pFailedTaskSet); diff --git a/source/libs/stream/src/streamQueue.c b/source/libs/stream/src/streamQueue.c index 556de169b4..d19dfc13bf 100644 --- a/source/libs/stream/src/streamQueue.c +++ b/source/libs/stream/src/streamQueue.c @@ -388,32 +388,36 @@ int32_t streamTaskInitTokenBucket(STokenBucket* pBucket, int32_t numCap, int32_t pBucket->quotaCapacity = quotaRate * MAX_SMOOTH_BURST_RATIO; pBucket->quotaRemain = pBucket->quotaCapacity; - pBucket->fillTimestamp = taosGetTimestampMs(); + pBucket->tokenFillTimestamp = taosGetTimestampMs(); + pBucket->quotaFillTimestamp = taosGetTimestampMs(); stDebug("s-task:%s sink quotaRate:%.2fMiB, numRate:%d", id, quotaRate, numRate); return TSDB_CODE_SUCCESS; } static void fillTokenBucket(STokenBucket* pBucket, const char* id) { int64_t now = taosGetTimestampMs(); - int64_t delta = now - pBucket->fillTimestamp; + + int64_t deltaToken = now - pBucket->tokenFillTimestamp; ASSERT(pBucket->numOfToken >= 0); - int32_t incNum = (delta / 1000.0) * pBucket->numRate; + int32_t incNum = (deltaToken / 1000.0) * pBucket->numRate; if (incNum > 0) { pBucket->numOfToken = TMIN(pBucket->numOfToken + incNum, pBucket->numCapacity); - pBucket->fillTimestamp = now; + pBucket->tokenFillTimestamp = now; } // increase the new available quota as time goes on - double incSize = (delta / 1000.0) * pBucket->quotaRate; + int64_t deltaQuota = now - pBucket->quotaFillTimestamp; + double incSize = (deltaQuota / 1000.0) * pBucket->quotaRate; if (incSize > 0) { pBucket->quotaRemain = TMIN(pBucket->quotaRemain + incSize, pBucket->quotaCapacity); - pBucket->fillTimestamp = now; + pBucket->quotaFillTimestamp = now; } if (incNum > 0 || incSize > 0) { - stTrace("token/quota available, token:%d inc:%d, quota:%.2fMiB inc:%.3fMiB, ts:%" PRId64 " idle:%" PRId64 "ms, %s", - pBucket->numOfToken, incNum, pBucket->quotaRemain, incSize, now, delta, id); + stTrace("token/quota available, token:%d inc:%d, token_TsDelta:%" PRId64 + ", quota:%.2fMiB inc:%.3fMiB quotaTs:%" PRId64 " now:%" PRId64 "ms, %s", + pBucket->numOfToken, incNum, deltaToken, pBucket->quotaRemain, incSize, deltaQuota, now, id); } } diff --git a/source/libs/stream/src/streamStart.c b/source/libs/stream/src/streamStart.c index 97eb7b79a2..f042687942 100644 --- a/source/libs/stream/src/streamStart.c +++ b/source/libs/stream/src/streamStart.c @@ -168,7 +168,6 @@ int32_t streamTaskStartScanHistory(SStreamTask* pTask) { } else if (level == TASK_LEVEL__AGG) { if (pTask->info.fillHistory) { streamSetParamForScanHistory(pTask); - streamTaskEnablePause(pTask); } } else if (level == TASK_LEVEL__SINK) { stDebug("s-task:%s sink task do nothing to handle scan-history", pTask->id.idStr); @@ -290,10 +289,11 @@ static void recheckDownstreamTasks(void* param, void* tmrId) { stDebug("s-task:%s complete send check in timer, ref:%d", pTask->id.idStr, ref); } -int32_t streamTaskCheckStatus(SStreamTask* pTask, int32_t upstreamTaskId, int32_t vgId, int64_t stage) { +int32_t streamTaskCheckStatus(SStreamTask* pTask, int32_t upstreamTaskId, int32_t vgId, int64_t stage, int64_t* oldStage) { SStreamChildEpInfo* pInfo = streamTaskGetUpstreamTaskEpInfo(pTask, upstreamTaskId); ASSERT(pInfo != NULL); + *oldStage = pInfo->stage; const char* id = pTask->id.idStr; if (stage == -1) { stDebug("s-task:%s receive check msg from upstream task:0x%x(vgId:%d), invalid stageId:%" PRId64 ", not ready", id, @@ -345,7 +345,6 @@ int32_t onNormalTaskReady(SStreamTask* pTask) { stDebug("s-task:%s level:%d status:%s sched-status:%d", id, pTask->info.taskLevel, p, pTask->status.schedStatus); } - streamTaskEnablePause(pTask); return TSDB_CODE_SUCCESS; } @@ -459,9 +458,9 @@ int32_t streamProcessCheckRsp(SStreamTask* pTask, const SStreamTaskCheckRsp* pRs if (pRsp->status == TASK_UPSTREAM_NEW_STAGE || pRsp->status == TASK_DOWNSTREAM_NOT_LEADER) { if (pRsp->status == TASK_UPSTREAM_NEW_STAGE) { stError( - "s-task:%s vgId:%d self vnode-transfer/leader-change/restart detected, old stage:%d, current stage:%d, " + "s-task:%s vgId:%d self vnode-transfer/leader-change/restart detected, old stage:%"PRId64", current stage:%"PRId64", " "not check wait for downstream task nodeUpdate, and all tasks restart", - id, pRsp->upstreamNodeId, pRsp->oldStage, (int32_t)pTask->pMeta->stage); + id, pRsp->upstreamNodeId, pRsp->oldStage, pTask->pMeta->stage); } else { stError( "s-task:%s downstream taskId:0x%x (vgId:%d) not leader, self dispatch epset needs to be updated, not check " @@ -476,7 +475,7 @@ int32_t streamProcessCheckRsp(SStreamTask* pTask, const SStreamTaskCheckRsp* pRs STaskRecheckInfo* pInfo = createRecheckInfo(pTask, pRsp); int32_t ref = atomic_add_fetch_32(&pTask->status.timerActive, 1); - stDebug("s-task:%s downstream taskId:0x%x (vgId:%d) not ready, stage:%d, retry in 100ms, ref:%d ", id, + stDebug("s-task:%s downstream taskId:0x%x (vgId:%d) not ready, stage:%"PRId64", retry in 100ms, ref:%d ", id, pRsp->downstreamTaskId, pRsp->downstreamNodeId, pRsp->oldStage, ref); pInfo->checkTimer = taosTmrStart(recheckDownstreamTasks, CHECK_DOWNSTREAM_INTERVAL, pInfo, streamEnv.timer); } @@ -659,9 +658,6 @@ int32_t streamProcessScanHistoryFinishRsp(SStreamTask* pTask) { streamMetaCommit(pMeta); streamMetaWUnLock(pMeta); - // history data scan in the stream time window finished, now let's enable the pause - streamTaskEnablePause(pTask); - // for source tasks, let's continue execute. if (pTask->info.taskLevel == TASK_LEVEL__SOURCE) { streamSchedExec(pTask); @@ -926,7 +922,7 @@ int32_t tEncodeStreamTaskCheckRsp(SEncoder* pEncoder, const SStreamTaskCheckRsp* if (tEncodeI32(pEncoder, pRsp->downstreamNodeId) < 0) return -1; if (tEncodeI32(pEncoder, pRsp->downstreamTaskId) < 0) return -1; if (tEncodeI32(pEncoder, pRsp->childId) < 0) return -1; - if (tEncodeI32(pEncoder, pRsp->oldStage) < 0) return -1; + if (tEncodeI64(pEncoder, pRsp->oldStage) < 0) return -1; if (tEncodeI8(pEncoder, pRsp->status) < 0) return -1; tEndEncode(pEncoder); return pEncoder->pos; @@ -941,7 +937,7 @@ int32_t tDecodeStreamTaskCheckRsp(SDecoder* pDecoder, SStreamTaskCheckRsp* pRsp) if (tDecodeI32(pDecoder, &pRsp->downstreamNodeId) < 0) return -1; if (tDecodeI32(pDecoder, &pRsp->downstreamTaskId) < 0) return -1; if (tDecodeI32(pDecoder, &pRsp->childId) < 0) return -1; - if (tDecodeI32(pDecoder, &pRsp->oldStage) < 0) return -1; + if (tDecodeI64(pDecoder, &pRsp->oldStage) < 0) return -1; if (tDecodeI8(pDecoder, &pRsp->status) < 0) return -1; tEndDecode(pDecoder); return 0; @@ -1040,11 +1036,6 @@ void streamTaskResume(SStreamTask* pTask) { } } -void streamTaskEnablePause(SStreamTask* pTask) { - stDebug("s-task:%s enable task pause", pTask->id.idStr); - pTask->status.pauseAllowed = 1; -} - static void displayStatusInfo(SStreamMeta* pMeta, SHashObj* pTaskSet, bool succ) { int32_t vgId = pMeta->vgId; void* pIter = NULL; diff --git a/source/libs/stream/src/streamState.c b/source/libs/stream/src/streamState.c index 2e51200fe4..0f32fd6879 100644 --- a/source/libs/stream/src/streamState.c +++ b/source/libs/stream/src/streamState.c @@ -1086,7 +1086,6 @@ _end: } int32_t streamStatePutParName(SStreamState* pState, int64_t groupId, const char tbname[TSDB_TABLE_NAME_LEN]) { - stDebug("try to write to cf parname"); #ifdef USE_ROCKSDB if (tSimpleHashGetSize(pState->parNameMap) > MAX_TABLE_NAME_NUM) { if (tSimpleHashGet(pState->parNameMap, &groupId, sizeof(int64_t)) == NULL) { diff --git a/source/libs/stream/src/streamTaskSm.c b/source/libs/stream/src/streamTaskSm.c index 04b449aaaf..1c951e1452 100644 --- a/source/libs/stream/src/streamTaskSm.c +++ b/source/libs/stream/src/streamTaskSm.c @@ -269,6 +269,7 @@ int32_t streamTaskHandleEvent(SStreamTaskSM* pSM, EStreamTaskEvent event) { pTask->id.idStr, pSM->current.name, GET_EVT_NAME(evt)); taosMsleep(100); } else { + // no active event trans exists, handle this event directly pTrans = streamTaskFindTransform(pSM->current.state, event); if (pTrans == NULL) { stDebug("s-task:%s failed to handle event:%s", pTask->id.idStr, GET_EVT_NAME(event)); @@ -451,60 +452,43 @@ int32_t initStateTransferTable() { return TSDB_CODE_SUCCESS; } +//clang-format off void doInitStateTransferTable(void) { streamTaskSMTrans = taosArrayInit(8, sizeof(STaskStateTrans)); // initialization event handle - STaskStateTrans trans = createStateTransform(TASK_STATUS__UNINIT, TASK_STATUS__READY, TASK_EVENT_INIT, - streamTaskInitStatus, onNormalTaskReady, false, false); + STaskStateTrans trans = createStateTransform(TASK_STATUS__UNINIT, TASK_STATUS__READY, TASK_EVENT_INIT, streamTaskInitStatus, onNormalTaskReady, false, false); taosArrayPush(streamTaskSMTrans, &trans); - - trans = createStateTransform(TASK_STATUS__UNINIT, TASK_STATUS__SCAN_HISTORY, TASK_EVENT_INIT_SCANHIST, - streamTaskInitStatus, onScanhistoryTaskReady, false, false); + trans = createStateTransform(TASK_STATUS__UNINIT, TASK_STATUS__SCAN_HISTORY, TASK_EVENT_INIT_SCANHIST, streamTaskInitStatus, onScanhistoryTaskReady, false, false); taosArrayPush(streamTaskSMTrans, &trans); - - trans = createStateTransform(TASK_STATUS__UNINIT, TASK_STATUS__STREAM_SCAN_HISTORY, TASK_EVENT_INIT_STREAM_SCANHIST, - streamTaskInitStatus, onScanhistoryTaskReady, false, false); + trans = createStateTransform(TASK_STATUS__UNINIT, TASK_STATUS__STREAM_SCAN_HISTORY, TASK_EVENT_INIT_STREAM_SCANHIST, streamTaskInitStatus, onScanhistoryTaskReady, false, false); taosArrayPush(streamTaskSMTrans, &trans); // scan-history related event - trans = createStateTransform(TASK_STATUS__SCAN_HISTORY, TASK_STATUS__READY, TASK_EVENT_SCANHIST_DONE, NULL, NULL, - NULL, true); + trans = createStateTransform(TASK_STATUS__SCAN_HISTORY, TASK_STATUS__READY, TASK_EVENT_SCANHIST_DONE, NULL, NULL, NULL, true); taosArrayPush(streamTaskSMTrans, &trans); - - trans = createStateTransform(TASK_STATUS__STREAM_SCAN_HISTORY, TASK_STATUS__READY, TASK_EVENT_SCANHIST_DONE, NULL, - NULL, NULL, true); + trans = createStateTransform(TASK_STATUS__STREAM_SCAN_HISTORY, TASK_STATUS__READY, TASK_EVENT_SCANHIST_DONE, NULL, NULL, NULL, true); taosArrayPush(streamTaskSMTrans, &trans); // halt stream task, from other task status - trans = createStateTransform(TASK_STATUS__READY, TASK_STATUS__HALT, TASK_EVENT_HALT, NULL, - streamTaskKeepCurrentVerInWal, NULL, true); + trans = createStateTransform(TASK_STATUS__READY, TASK_STATUS__HALT, TASK_EVENT_HALT, NULL, streamTaskKeepCurrentVerInWal, NULL, true); taosArrayPush(streamTaskSMTrans, &trans); - - trans = createStateTransform(TASK_STATUS__HALT, TASK_STATUS__HALT, TASK_EVENT_HALT, NULL, - streamTaskKeepCurrentVerInWal, NULL, true); + trans = createStateTransform(TASK_STATUS__HALT, TASK_STATUS__HALT, TASK_EVENT_HALT, NULL, streamTaskKeepCurrentVerInWal, NULL, true); taosArrayPush(streamTaskSMTrans, &trans); SAttachedEventInfo info = {.status = TASK_STATUS__READY, .event = TASK_EVENT_HALT}; - trans = createStateTransform(TASK_STATUS__STREAM_SCAN_HISTORY, TASK_STATUS__HALT, TASK_EVENT_HALT, NULL, - streamTaskKeepCurrentVerInWal, &info, true); - taosArrayPush(streamTaskSMTrans, &trans); - trans = createStateTransform(TASK_STATUS__CK, TASK_STATUS__HALT, TASK_EVENT_HALT, NULL, streamTaskKeepCurrentVerInWal, - &info, true); + trans = createStateTransform(TASK_STATUS__STREAM_SCAN_HISTORY, TASK_STATUS__HALT, TASK_EVENT_HALT, NULL, streamTaskKeepCurrentVerInWal, &info, true); taosArrayPush(streamTaskSMTrans, &trans); - - trans = createStateTransform(TASK_STATUS__PAUSE, TASK_STATUS__HALT, TASK_EVENT_HALT, NULL, - streamTaskKeepCurrentVerInWal, NULL, true); + trans = createStateTransform(TASK_STATUS__CK, TASK_STATUS__HALT, TASK_EVENT_HALT, NULL, streamTaskKeepCurrentVerInWal, &info, true); + taosArrayPush(streamTaskSMTrans, &trans); + trans = createStateTransform(TASK_STATUS__PAUSE, TASK_STATUS__HALT, TASK_EVENT_HALT, NULL, streamTaskKeepCurrentVerInWal, NULL, true); taosArrayPush(streamTaskSMTrans, &trans); // checkpoint related event - trans = createStateTransform(TASK_STATUS__READY, TASK_STATUS__CK, TASK_EVENT_GEN_CHECKPOINT, NULL, - streamTaskDoCheckpoint, NULL, true); + trans = createStateTransform(TASK_STATUS__READY, TASK_STATUS__CK, TASK_EVENT_GEN_CHECKPOINT, NULL, streamTaskDoCheckpoint, NULL, true); taosArrayPush(streamTaskSMTrans, &trans); - - trans = - createStateTransform(TASK_STATUS__CK, TASK_STATUS__READY, TASK_EVENT_CHECKPOINT_DONE, NULL, NULL, NULL, true); + trans = createStateTransform(TASK_STATUS__CK, TASK_STATUS__READY, TASK_EVENT_CHECKPOINT_DONE, NULL, NULL, NULL, true); taosArrayPush(streamTaskSMTrans, &trans); // pause & resume related event handle @@ -571,4 +555,5 @@ void doInitStateTransferTable(void) { taosArrayPush(streamTaskSMTrans, &trans); trans = createStateTransform(TASK_STATUS__STREAM_SCAN_HISTORY, TASK_STATUS__DROPPING, TASK_EVENT_DROPPING, NULL, NULL, NULL, true); taosArrayPush(streamTaskSMTrans, &trans); -} \ No newline at end of file +} +//clang-format on \ No newline at end of file diff --git a/source/libs/sync/src/syncRaftCfg.c b/source/libs/sync/src/syncRaftCfg.c index 0dcc3eee29..0e98fe94eb 100644 --- a/source/libs/sync/src/syncRaftCfg.c +++ b/source/libs/sync/src/syncRaftCfg.c @@ -103,7 +103,7 @@ int32_t syncWriteCfgFile(SSyncNode *pNode) { if (buffer == NULL) goto _OVER; terrno = 0; - pFile = taosOpenFile(file, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_TRUNC); + pFile = taosOpenFile(file, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_TRUNC | TD_FILE_WRITE_THROUGH); if (pFile == NULL) goto _OVER; int32_t len = strlen(buffer); diff --git a/source/libs/sync/src/syncRaftStore.c b/source/libs/sync/src/syncRaftStore.c index 051106b99d..c200c6cb4b 100644 --- a/source/libs/sync/src/syncRaftStore.c +++ b/source/libs/sync/src/syncRaftStore.c @@ -128,7 +128,7 @@ int32_t raftStoreWriteFile(SSyncNode *pNode) { if (buffer == NULL) goto _OVER; terrno = 0; - pFile = taosOpenFile(file, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_TRUNC); + pFile = taosOpenFile(file, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_TRUNC | TD_FILE_WRITE_THROUGH); if (pFile == NULL) goto _OVER; int32_t len = strlen(buffer); diff --git a/source/libs/tfs/src/tfsTier.c b/source/libs/tfs/src/tfsTier.c index d4f228a537..911fdc52b7 100644 --- a/source/libs/tfs/src/tfsTier.c +++ b/source/libs/tfs/src/tfsTier.c @@ -112,7 +112,7 @@ int32_t tfsAllocDiskOnTier(STfsTier *pTier) { int32_t retId = -1; int64_t avail = 0; for (int32_t id = 0; id < TFS_MAX_DISKS_PER_TIER; ++id) { -#if 0 // round-robin +#if 1 // round-robin int32_t diskId = (pTier->nextid + id) % pTier->ndisk; STfsDisk *pDisk = pTier->disks[diskId]; diff --git a/source/libs/wal/src/walMeta.c b/source/libs/wal/src/walMeta.c index f5e5427c68..933014466a 100644 --- a/source/libs/wal/src/walMeta.c +++ b/source/libs/wal/src/walMeta.c @@ -873,7 +873,7 @@ int walSaveMeta(SWal* pWal) { return -1; } - TdFilePtr pMetaFile = taosOpenFile(tmpFnameStr, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_TRUNC); + TdFilePtr pMetaFile = taosOpenFile(tmpFnameStr, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_TRUNC | TD_FILE_WRITE_THROUGH); if (pMetaFile == NULL) { wError("vgId:%d, failed to open file due to %s. file:%s", pWal->cfg.vgId, strerror(errno), tmpFnameStr); terrno = TAOS_SYSTEM_ERROR(errno); diff --git a/source/libs/wal/src/walWrite.c b/source/libs/wal/src/walWrite.c index ef97bff896..33d8d34514 100644 --- a/source/libs/wal/src/walWrite.c +++ b/source/libs/wal/src/walWrite.c @@ -133,6 +133,7 @@ int32_t walRollback(SWal *pWal, int64_t ver) { } walBuildIdxName(pWal, walGetCurFileFirstVer(pWal), fnameStr); + taosCloseFile(&pWal->pIdxFile); TdFilePtr pIdxFile = taosOpenFile(fnameStr, TD_FILE_WRITE | TD_FILE_READ | TD_FILE_APPEND); if (pIdxFile == NULL) { @@ -153,6 +154,7 @@ int32_t walRollback(SWal *pWal, int64_t ver) { } walBuildLogName(pWal, walGetCurFileFirstVer(pWal), fnameStr); + taosCloseFile(&pWal->pLogFile); TdFilePtr pLogFile = taosOpenFile(fnameStr, TD_FILE_WRITE | TD_FILE_READ | TD_FILE_APPEND); wDebug("vgId:%d, wal truncate file %s", pWal->cfg.vgId, fnameStr); if (pLogFile == NULL) { @@ -204,6 +206,7 @@ int32_t walRollback(SWal *pWal, int64_t ver) { pWal->vers.lastVer = ver - 1; ((SWalFileInfo *)taosArrayGetLast(pWal->fileInfoSet))->lastVer = ver - 1; ((SWalFileInfo *)taosArrayGetLast(pWal->fileInfoSet))->fileSize = entry.offset; + taosCloseFile(&pIdxFile); taosCloseFile(&pLogFile); @@ -605,7 +608,7 @@ int32_t walWriteWithSyncInfo(SWal *pWal, int64_t index, tmsg_t msgType, SWalSync return -1; } - if (pWal->pIdxFile == NULL || pWal->pIdxFile == NULL || pWal->writeCur < 0) { + if (pWal->pIdxFile == NULL || pWal->pLogFile == NULL || pWal->writeCur < 0) { if (walInitWriteFile(pWal) < 0) { taosThreadMutexUnlock(&pWal->mutex); return -1; diff --git a/source/os/src/osFile.c b/source/os/src/osFile.c index 30f079d10d..cf3bce1ad4 100644 --- a/source/os/src/osFile.c +++ b/source/os/src/osFile.c @@ -21,11 +21,12 @@ #include #include #include +#include #define F_OK 0 #define W_OK 2 #define R_OK 4 -#define _SEND_FILE_STEP_ 1000 +#define _SEND_FILE_STEP_ 1024 #else #include @@ -44,12 +45,22 @@ typedef int32_t FileFd; +#ifdef WINDOWS +typedef struct TdFile { + TdThreadRwlock rwlock; + int refId; + HANDLE hFile; + FILE* fp; + int32_t tdFileOptions; +} TdFile; +#else typedef struct TdFile { TdThreadRwlock rwlock; int refId; FileFd fd; FILE *fp; } TdFile; +#endif // WINDOWS #define FILE_WITH_LOCK 1 @@ -240,15 +251,12 @@ int32_t taosStatFile(const char *path, int64_t *size, int32_t *mtime, int32_t *a return 0; } int32_t taosDevInoFile(TdFilePtr pFile, int64_t *stDev, int64_t *stIno) { - if (pFile == NULL || pFile->fd < 0) { +#ifdef WINDOWS + if (pFile == NULL || pFile->hFile == NULL) { return -1; } - -#ifdef WINDOWS - BY_HANDLE_FILE_INFORMATION bhfi; - HANDLE handle = (HANDLE)_get_osfhandle(pFile->fd); - if (GetFileInformationByHandle(handle, &bhfi) == FALSE) { + if (GetFileInformationByHandle(pFile->hFile, &bhfi) == FALSE) { printf("taosFStatFile get file info fail."); return -1; } @@ -262,7 +270,9 @@ int32_t taosDevInoFile(TdFilePtr pFile, int64_t *stDev, int64_t *stIno) { } #else - + if (pFile == NULL || pFile->fd < 0) { + return -1; + } struct stat fileStat; int32_t code = fstat(pFile->fd, &fileStat); if (code < 0) { @@ -282,116 +292,363 @@ int32_t taosDevInoFile(TdFilePtr pFile, int64_t *stDev, int64_t *stIno) { return 0; } -TdFilePtr taosOpenFile(const char *path, int32_t tdFileOptions) { - int fd = -1; - FILE *fp = NULL; - if (tdFileOptions & TD_FILE_STREAM) { - char *mode = NULL; - if (tdFileOptions & TD_FILE_APPEND) { - mode = (tdFileOptions & TD_FILE_TEXT) ? "at+" : "ab+"; - } else if (tdFileOptions & TD_FILE_TRUNC) { - mode = (tdFileOptions & TD_FILE_TEXT) ? "wt+" : "wb+"; - } else if ((tdFileOptions & TD_FILE_READ) && !(tdFileOptions & TD_FILE_WRITE)) { - mode = (tdFileOptions & TD_FILE_TEXT) ? "rt" : "rb"; - } else { - mode = (tdFileOptions & TD_FILE_TEXT) ? "rt+" : "rb+"; - } - ASSERT(!(tdFileOptions & TD_FILE_EXCL)); - if (tdFileOptions & TD_FILE_EXCL) { - return NULL; - } - fp = fopen(path, mode); - if (fp == NULL) { - return NULL; - } +FILE *taosOpenFileForStream(const char *path, int32_t tdFileOptions) { + char *mode = NULL; + if (tdFileOptions & TD_FILE_APPEND) { + mode = (tdFileOptions & TD_FILE_TEXT) ? "at+" : "ab+"; + } else if (tdFileOptions & TD_FILE_TRUNC) { + mode = (tdFileOptions & TD_FILE_TEXT) ? "wt+" : "wb+"; + } else if ((tdFileOptions & TD_FILE_READ) && !(tdFileOptions & TD_FILE_WRITE)) { + mode = (tdFileOptions & TD_FILE_TEXT) ? "rt" : "rb"; } else { - int access = O_BINARY; - access |= (tdFileOptions & TD_FILE_CREATE) ? O_CREAT : 0; - if ((tdFileOptions & TD_FILE_WRITE) && (tdFileOptions & TD_FILE_READ)) { - access |= O_RDWR; - } else if (tdFileOptions & TD_FILE_WRITE) { - access |= O_WRONLY; - } else if (tdFileOptions & TD_FILE_READ) { - access |= O_RDONLY; - } - access |= (tdFileOptions & TD_FILE_TRUNC) ? O_TRUNC : 0; - access |= (tdFileOptions & TD_FILE_APPEND) ? O_APPEND : 0; - access |= (tdFileOptions & TD_FILE_TEXT) ? O_TEXT : 0; - access |= (tdFileOptions & TD_FILE_EXCL) ? O_EXCL : 0; -#ifdef WINDOWS - int32_t pmode = _S_IREAD | _S_IWRITE; - if (tdFileOptions & TD_FILE_AUTO_DEL) { - pmode |= _O_TEMPORARY; - } - fd = _open(path, access, pmode); -#else - fd = open(path, access, S_IRWXU | S_IRWXG | S_IRWXO); -#endif - if (fd == -1) { - return NULL; - } + mode = (tdFileOptions & TD_FILE_TEXT) ? "rt+" : "rb+"; } - - TdFilePtr pFile = (TdFilePtr)taosMemoryMalloc(sizeof(TdFile)); - if (pFile == NULL) { - if (fd >= 0) close(fd); - if (fp != NULL) fclose(fp); + ASSERT(!(tdFileOptions & TD_FILE_EXCL)); + if (tdFileOptions & TD_FILE_EXCL) { return NULL; } - -#if FILE_WITH_LOCK - taosThreadRwlockInit(&(pFile->rwlock), NULL); -#endif - pFile->fd = fd; - pFile->fp = fp; - pFile->refId = 0; - - if (tdFileOptions & TD_FILE_AUTO_DEL) { -#ifdef WINDOWS - // do nothing, since the property of pmode is set with _O_TEMPORARY; the OS will recycle - // the file handle, as well as the space on disk. -#else - // Remove it instantly, so when the program exits normally/abnormally, the file - // will be automatically remove by OS. - unlink(path); -#endif - } - - return pFile; + return fopen(path, mode); } -int32_t taosCloseFile(TdFilePtr *ppFile) { - int32_t code = 0; - if (ppFile == NULL || *ppFile == NULL) { +#ifdef WINDOWS +HANDLE taosOpenFileNotStream(const char *path, int32_t tdFileOptions) { + DWORD openMode = 0; + DWORD access = 0; + DWORD fileFlag = FILE_ATTRIBUTE_NORMAL; + DWORD shareMode = FILE_SHARE_READ; + + openMode = OPEN_EXISTING; + if (tdFileOptions & TD_FILE_CREATE) { + openMode = OPEN_ALWAYS; + } else if (tdFileOptions & TD_FILE_EXCL) { + openMode = CREATE_NEW; + } else if ((tdFileOptions & TD_FILE_TRUNC)) { + openMode = TRUNCATE_EXISTING; + access |= GENERIC_WRITE; + } + if (tdFileOptions & TD_FILE_APPEND) { + access |= FILE_APPEND_DATA; + } + if (tdFileOptions & TD_FILE_WRITE) { + access |= GENERIC_WRITE; + } + + shareMode |= FILE_SHARE_WRITE; + + access |= GENERIC_READ; + + if (tdFileOptions & TD_FILE_AUTO_DEL) { + fileFlag |= FILE_ATTRIBUTE_TEMPORARY; + } + if (tdFileOptions & TD_FILE_WRITE_THROUGH) { + fileFlag |= FILE_FLAG_WRITE_THROUGH; + } + + HANDLE h = CreateFile(path, access, shareMode, NULL, openMode, fileFlag, NULL); + if (h != INVALID_HANDLE_VALUE && (tdFileOptions & TD_FILE_APPEND) && (tdFileOptions & TD_FILE_WRITE)) { + SetFilePointer(h, 0, NULL, FILE_END); + } + if (h == INVALID_HANDLE_VALUE) { + DWORD dwError = GetLastError(); + LPVOID lpMsgBuf; + FormatMessage(FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM, NULL, dwError, + 0, + (LPTSTR)&lpMsgBuf, 0, NULL); + printf("CreateFile failed with error %d: %s", dwError, (char*)lpMsgBuf); + LocalFree(lpMsgBuf); + } + return h; +} + +int64_t taosReadFile(TdFilePtr pFile, void *buf, int64_t count) { +#if FILE_WITH_LOCK + taosThreadRwlockRdlock(&(pFile->rwlock)); +#endif + if (pFile->hFile == NULL) { +#if FILE_WITH_LOCK + taosThreadRwlockUnlock(&(pFile->rwlock)); +#endif + return -1; + } + + DWORD bytesRead; + if (!ReadFile(pFile->hFile, buf, count, &bytesRead, NULL)) { + bytesRead = -1; + } +#if FILE_WITH_LOCK + taosThreadRwlockUnlock(&(pFile->rwlock)); +#endif + return bytesRead; +} + +int64_t taosWriteFile(TdFilePtr pFile, const void *buf, int64_t count) { + if (pFile == NULL || pFile->hFile == NULL) { return 0; } #if FILE_WITH_LOCK - taosThreadRwlockWrlock(&((*ppFile)->rwlock)); + taosThreadRwlockWrlock(&(pFile->rwlock)); #endif - if ((*ppFile)->fp != NULL) { - fflush((*ppFile)->fp); - fclose((*ppFile)->fp); - (*ppFile)->fp = NULL; + + DWORD bytesWritten; + if (!WriteFile(pFile->hFile, buf, count, &bytesWritten, NULL)) { + bytesWritten = -1; } - if ((*ppFile)->fd >= 0) { -#ifdef WINDOWS - HANDLE h = (HANDLE)_get_osfhandle((*ppFile)->fd); - !FlushFileBuffers(h); -#else - // warning: never fsync silently in base lib - /*fsync((*ppFile)->fd);*/ -#endif - code = close((*ppFile)->fd); - (*ppFile)->fd = -1; - } - (*ppFile)->refId = 0; + #if FILE_WITH_LOCK - taosThreadRwlockUnlock(&((*ppFile)->rwlock)); - taosThreadRwlockDestroy(&((*ppFile)->rwlock)); + taosThreadRwlockUnlock(&(pFile->rwlock)); #endif - taosMemoryFree(*ppFile); - *ppFile = NULL; - return code; + return bytesWritten; +} + +int64_t taosPWriteFile(TdFilePtr pFile, const void *buf, int64_t count, int64_t offset) { + if (pFile == NULL) { + return 0; + } +#if FILE_WITH_LOCK + taosThreadRwlockWrlock(&(pFile->rwlock)); +#endif + ASSERT(pFile->hFile != NULL); // Please check if you have closed the file. + if (pFile->hFile == NULL) { +#if FILE_WITH_LOCK + taosThreadRwlockUnlock(&(pFile->rwlock)); +#endif + return 0; + } + + DWORD ret = 0; + OVERLAPPED ol = {0}; + ol.OffsetHigh = (uint32_t)((offset & 0xFFFFFFFF00000000LL) >> 0x20); + ol.Offset = (uint32_t)(offset & 0xFFFFFFFFLL); + + SetLastError(0); + BOOL result = WriteFile(pFile->hFile, buf, count, &ret, &ol); + if (!result) { + errno = GetLastError(); + ret = -1; + } + +#if FILE_WITH_LOCK + taosThreadRwlockUnlock(&(pFile->rwlock)); +#endif + return ret; +} + +int64_t taosLSeekFile(TdFilePtr pFile, int64_t offset, int32_t whence) { + if (pFile == NULL || pFile->hFile == NULL) { + return -1; + } +#if FILE_WITH_LOCK + taosThreadRwlockRdlock(&(pFile->rwlock)); +#endif + + LARGE_INTEGER liOffset; + liOffset.QuadPart = offset; + if (!SetFilePointerEx(pFile->hFile, liOffset, NULL, whence)) { + return -1; + } + + liOffset.QuadPart = 0; + if (!SetFilePointerEx(pFile->hFile, liOffset, &liOffset, FILE_CURRENT)) { + return -1; + } +#if FILE_WITH_LOCK + taosThreadRwlockUnlock(&(pFile->rwlock)); +#endif + return liOffset.QuadPart; +} + +int32_t taosFStatFile(TdFilePtr pFile, int64_t *size, int32_t *mtime) { + if (pFile == NULL || pFile->hFile == NULL) { + return 0; + } + + if (size != NULL) { + LARGE_INTEGER fileSize; + if (!GetFileSizeEx(pFile->hFile, &fileSize)) { + return -1; // Error getting file size + } + *size = fileSize.QuadPart; + } + + if (mtime != NULL) { + FILETIME creationTime, lastAccessTime, lastWriteTime; + if (!GetFileTime(pFile->hFile, &creationTime, &lastAccessTime, &lastWriteTime)) { + return -1; // Error getting file time + } + // Convert the FILETIME structure to a time_t value + ULARGE_INTEGER ull; + ull.LowPart = lastWriteTime.dwLowDateTime; + ull.HighPart = lastWriteTime.dwHighDateTime; + *mtime = (int32_t)((ull.QuadPart - 116444736000000000ULL) / 10000000ULL); + } + return 0; +} + +int32_t taosLockFile(TdFilePtr pFile) { + if (pFile == NULL || pFile->hFile == NULL) { + return -1; + } + + BOOL fSuccess = FALSE; + LARGE_INTEGER fileSize; + OVERLAPPED overlapped = {0}; + + fSuccess = LockFileEx(pFile->hFile, LOCKFILE_EXCLUSIVE_LOCK | LOCKFILE_FAIL_IMMEDIATELY, + 0, // reserved + ~0, // number of bytes to lock low + ~0, // number of bytes to lock high + &overlapped // overlapped structure + ); + if (!fSuccess) { + return GetLastError(); + } + return 0; +} + +int32_t taosUnLockFile(TdFilePtr pFile) { + if (pFile == NULL || pFile->hFile == NULL) { + return 0; + } + BOOL fSuccess = FALSE; + OVERLAPPED overlapped = {0}; + + fSuccess = UnlockFileEx(pFile->hFile, 0, ~0, ~0, &overlapped); + if (!fSuccess) { + return GetLastError(); + } + return 0; +} + +int32_t taosFtruncateFile(TdFilePtr pFile, int64_t l_size) { + if (pFile == NULL) { + return 0; + } + if (pFile->hFile == NULL) { + printf("Ftruncate file error, hFile was null\n"); + return -1; + } + + LARGE_INTEGER li_0; + li_0.QuadPart = (int64_t)0; + BOOL cur = SetFilePointerEx(pFile->hFile, li_0, NULL, FILE_CURRENT); + if (!cur) { + printf("SetFilePointerEx Error getting current position in file.\n"); + return -1; + } + + LARGE_INTEGER li_size; + li_size.QuadPart = l_size; + BOOL cur2 = SetFilePointerEx(pFile->hFile, li_size, NULL, FILE_BEGIN); + if (cur2 == 0) { + int error = GetLastError(); + printf("SetFilePointerEx GetLastError is: %d\n", error); + switch (error) { + case ERROR_INVALID_HANDLE: + errno = EBADF; + break; + default: + errno = EIO; + break; + } + return -1; + } + + if (!SetEndOfFile(pFile->hFile)) { + int error = GetLastError(); + printf("SetEndOfFile GetLastError is:%d", error); + switch (error) { + case ERROR_INVALID_HANDLE: + errno = EBADF; + break; + default: + errno = EIO; + break; + } + return -1; + } + return 0; +} + +int64_t taosFSendFile(TdFilePtr pFileOut, TdFilePtr pFileIn, int64_t *offset, int64_t size) { + if (pFileOut == NULL || pFileIn == NULL) { + return 0; + } + if (pFileIn->hFile == NULL || pFileOut->hFile == NULL) { + return 0; + } + + LARGE_INTEGER fileOffset; + fileOffset.QuadPart = *offset; + + if (!SetFilePointerEx(pFileIn->hFile, fileOffset, &fileOffset, FILE_BEGIN)) { + return -1; + } + + int64_t writeLen = 0; + uint8_t buffer[_SEND_FILE_STEP_] = {0}; + + DWORD bytesRead; + DWORD bytesWritten; + for (int64_t len = 0; len < (size - _SEND_FILE_STEP_); len += _SEND_FILE_STEP_) { + if (!ReadFile(pFileIn->hFile, buffer, _SEND_FILE_STEP_, &bytesRead, NULL)) { + return writeLen; + } + + if (bytesRead <= 0) { + return writeLen; + } else if (bytesRead < _SEND_FILE_STEP_) { + if (!WriteFile(pFileOut->hFile, buffer, bytesRead, &bytesWritten, NULL)) { + return -1; + } else { + return (int64_t)(writeLen + bytesRead); + } + } else { + if (!WriteFile(pFileOut->hFile, buffer, _SEND_FILE_STEP_, &bytesWritten, NULL)) { + return -1; + } else { + writeLen += _SEND_FILE_STEP_; + } + } + } + + int64_t remain = size - writeLen; + if (remain > 0) { + DWORD bytesRead; + if (!ReadFile(pFileIn->hFile, buffer, (DWORD)remain, &bytesRead, NULL)) { + return -1; + } + + if (bytesRead <= 0) { + return writeLen; + } else { + DWORD bytesWritten; + if (!WriteFile(pFileOut->hFile, buffer, bytesRead, &bytesWritten, NULL)) { + return -1; + } else { + writeLen += bytesWritten; + } + } + } + return writeLen; +} + +#else +int taosOpenFileNotStream(const char *path, int32_t tdFileOptions) { + int access = O_BINARY; + access |= (tdFileOptions & TD_FILE_CREATE) ? O_CREAT : 0; + if ((tdFileOptions & TD_FILE_WRITE) && (tdFileOptions & TD_FILE_READ)) { + access |= O_RDWR; + } else if (tdFileOptions & TD_FILE_WRITE) { + access |= O_WRONLY; + } else if (tdFileOptions & TD_FILE_READ) { + access |= O_RDONLY; + } + access |= (tdFileOptions & TD_FILE_TRUNC) ? O_TRUNC : 0; + access |= (tdFileOptions & TD_FILE_APPEND) ? O_APPEND : 0; + access |= (tdFileOptions & TD_FILE_TEXT) ? O_TEXT : 0; + access |= (tdFileOptions & TD_FILE_EXCL) ? O_EXCL : 0; + int fd = open(path, access, S_IRWXU | S_IRWXG | S_IRWXO); + return fd; } int64_t taosReadFile(TdFilePtr pFile, void *buf, int64_t count) { @@ -407,7 +664,7 @@ int64_t taosReadFile(TdFilePtr pFile, void *buf, int64_t count) { } int64_t leftbytes = count; int64_t readbytes; - char *tbuf = (char *)buf; + char * tbuf = (char *)buf; while (leftbytes > 0) { #ifdef WINDOWS @@ -441,42 +698,6 @@ int64_t taosReadFile(TdFilePtr pFile, void *buf, int64_t count) { return count; } -int64_t taosPReadFile(TdFilePtr pFile, void *buf, int64_t count, int64_t offset) { - if (pFile == NULL) { - return 0; - } -#if FILE_WITH_LOCK - taosThreadRwlockRdlock(&(pFile->rwlock)); -#endif - ASSERT(pFile->fd >= 0); // Please check if you have closed the file. - if (pFile->fd < 0) { -#if FILE_WITH_LOCK - taosThreadRwlockUnlock(&(pFile->rwlock)); -#endif - return -1; - } -#ifdef WINDOWS - DWORD ret = 0; - OVERLAPPED ol = {0}; - ol.OffsetHigh = (uint32_t)((offset & 0xFFFFFFFF00000000LL) >> 0x20); - ol.Offset = (uint32_t)(offset & 0xFFFFFFFFLL); - - HANDLE handle = (HANDLE)_get_osfhandle(pFile->fd); - SetLastError(0); - BOOL result = ReadFile(handle, buf, count, &ret, &ol); - if (!result && GetLastError() != ERROR_HANDLE_EOF) { - errno = GetLastError(); - ret = -1; - } -#else - int64_t ret = pread(pFile->fd, buf, count, offset); -#endif -#if FILE_WITH_LOCK - taosThreadRwlockUnlock(&(pFile->rwlock)); -#endif - return ret; -} - int64_t taosWriteFile(TdFilePtr pFile, const void *buf, int64_t count) { if (pFile == NULL) { return 0; @@ -493,7 +714,7 @@ int64_t taosWriteFile(TdFilePtr pFile, const void *buf, int64_t count) { int64_t nleft = count; int64_t nwritten = 0; - char *tbuf = (char *)buf; + char * tbuf = (char *)buf; while (nleft > 0) { nwritten = write(pFile->fd, (void *)tbuf, (uint32_t)nleft); @@ -706,25 +927,6 @@ int32_t taosFtruncateFile(TdFilePtr pFile, int64_t l_size) { #endif } -int32_t taosFsyncFile(TdFilePtr pFile) { - if (pFile == NULL) { - return 0; - } - - // this implementation is WRONG - // fflush is not a replacement of fsync - if (pFile->fp != NULL) return fflush(pFile->fp); - if (pFile->fd >= 0) { -#ifdef WINDOWS - HANDLE h = (HANDLE)_get_osfhandle(pFile->fd); - return !FlushFileBuffers(h); -#else - return fsync(pFile->fd); -#endif - } - return 0; -} - int64_t taosFSendFile(TdFilePtr pFileOut, TdFilePtr pFileIn, int64_t *offset, int64_t size) { if (pFileOut == NULL || pFileIn == NULL) { return 0; @@ -824,6 +1026,167 @@ int64_t taosFSendFile(TdFilePtr pFileOut, TdFilePtr pFileIn, int64_t *offset, in #endif } +#endif // WINDOWS + +TdFilePtr taosOpenFile(const char *path, int32_t tdFileOptions) { + FILE *fp = NULL; +#ifdef WINDOWS + HANDLE hFile = NULL; +#else + int fd = -1; +#endif + if (tdFileOptions & TD_FILE_STREAM) { + fp = taosOpenFileForStream(path, tdFileOptions); + if (fp == NULL) return NULL; + } else { +#ifdef WINDOWS + hFile = taosOpenFileNotStream(path, tdFileOptions); + if (hFile == INVALID_HANDLE_VALUE) return NULL; +#else + fd = taosOpenFileNotStream(path, tdFileOptions); + if (fd == -1) return NULL; +#endif + } + + TdFilePtr pFile = (TdFilePtr)taosMemoryMalloc(sizeof(TdFile)); + if (pFile == NULL) { +#ifdef WINDOWS + if (hFile != NULL) CloseHandle(hFile); +#else + if (fd >= 0) close(fd); +#endif + if (fp != NULL) fclose(fp); + return NULL; + } + +#if FILE_WITH_LOCK + taosThreadRwlockInit(&(pFile->rwlock), NULL); +#endif + pFile->fp = fp; + pFile->refId = 0; + + #ifdef WINDOWS + pFile->hFile = hFile; + pFile->tdFileOptions = tdFileOptions; + // do nothing, since the property of pmode is set with _O_TEMPORARY; the OS will recycle + // the file handle, as well as the space on disk. +#else + pFile->fd = fd; + // Remove it instantly, so when the program exits normally/abnormally, the file + // will be automatically remove by OS. + if (tdFileOptions & TD_FILE_AUTO_DEL) { + unlink(path); + } +#endif + return pFile; +} + +int32_t taosCloseFile(TdFilePtr *ppFile) { + int32_t code = 0; + if (ppFile == NULL || *ppFile == NULL) { + return 0; + } +#if FILE_WITH_LOCK + taosThreadRwlockWrlock(&((*ppFile)->rwlock)); +#endif + if ((*ppFile)->fp != NULL) { + fflush((*ppFile)->fp); + fclose((*ppFile)->fp); + (*ppFile)->fp = NULL; + } +#ifdef WINDOWS + if ((*ppFile)->hFile != NULL) { + // FlushFileBuffers((*ppFile)->hFile); + if (!CloseHandle((*ppFile)->hFile)) { + code = -1; + } + (*ppFile)->hFile = NULL; +#else + if ((*ppFile)->fd >= 0) { + // warning: never fsync silently in base lib + /*fsync((*ppFile)->fd);*/ + code = close((*ppFile)->fd); + (*ppFile)->fd = -1; +#endif + } + (*ppFile)->refId = 0; +#if FILE_WITH_LOCK + taosThreadRwlockUnlock(&((*ppFile)->rwlock)); + taosThreadRwlockDestroy(&((*ppFile)->rwlock)); +#endif + taosMemoryFree(*ppFile); + *ppFile = NULL; + return code; +} + +int64_t taosPReadFile(TdFilePtr pFile, void *buf, int64_t count, int64_t offset) { + if (pFile == NULL) { + return 0; + } + +#ifdef WINDOWS +#if FILE_WITH_LOCK + taosThreadRwlockRdlock(&(pFile->rwlock)); +#endif + ASSERT(pFile->hFile != NULL); // Please check if you have closed the file. + if (pFile->hFile == NULL) { +#if FILE_WITH_LOCK + taosThreadRwlockUnlock(&(pFile->rwlock)); +#endif + return -1; + } + DWORD ret = 0; + OVERLAPPED ol = {0}; + ol.OffsetHigh = (uint32_t)((offset & 0xFFFFFFFF00000000LL) >> 0x20); + ol.Offset = (uint32_t)(offset & 0xFFFFFFFFLL); + + SetLastError(0); + BOOL result = ReadFile(pFile->hFile, buf, count, &ret, &ol); + if (!result && GetLastError() != ERROR_HANDLE_EOF) { + errno = GetLastError(); + ret = -1; + } +#else +#if FILE_WITH_LOCK + taosThreadRwlockRdlock(&(pFile->rwlock)); +#endif + ASSERT(pFile->fd >= 0); // Please check if you have closed the file. + if (pFile->fd < 0) { +#if FILE_WITH_LOCK + taosThreadRwlockUnlock(&(pFile->rwlock)); +#endif + return -1; + } + int64_t ret = pread(pFile->fd, buf, count, offset); +#endif +#if FILE_WITH_LOCK + taosThreadRwlockUnlock(&(pFile->rwlock)); +#endif + return ret; +} + +int32_t taosFsyncFile(TdFilePtr pFile) { + if (pFile == NULL) { + return 0; + } + + // this implementation is WRONG + // fflush is not a replacement of fsync + if (pFile->fp != NULL) return fflush(pFile->fp); +#ifdef WINDOWS + if (pFile->hFile != NULL) { + if (pFile->tdFileOptions & TD_FILE_WRITE_THROUGH) { + return 0; + } + return !FlushFileBuffers(pFile->hFile); +#else + if (pFile->fd >= 0) { + return fsync(pFile->fd); +#endif + } + return 0; +} + void taosFprintfFile(TdFilePtr pFile, const char *format, ...) { if (pFile == NULL || pFile->fp == NULL) { return; @@ -834,7 +1197,13 @@ void taosFprintfFile(TdFilePtr pFile, const char *format, ...) { va_end(ap); } -bool taosValidFile(TdFilePtr pFile) { return pFile != NULL && pFile->fd > 0; } +bool taosValidFile(TdFilePtr pFile) { +#ifdef WINDOWS + return pFile != NULL && pFile->hFile != NULL; +#else + return pFile != NULL && pFile->fd > 0; +#endif +} int32_t taosUmaskFile(int32_t maskVal) { #ifdef WINDOWS @@ -960,14 +1329,20 @@ int32_t taosCompressFile(char *srcFileName, char *destFileName) { goto cmp_end; } - pFile = taosOpenFile(destFileName, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_TRUNC); - if (pFile == NULL) { + int access = O_BINARY | O_WRONLY | O_TRUNC | O_CREAT; +#ifdef WINDOWS + int32_t pmode = _S_IREAD | _S_IWRITE; +#else + int32_t pmode = S_IRWXU | S_IRWXG | S_IRWXO; +#endif + int fd = open(destFileName, access, pmode); + if (fd < 0) { ret = -2; goto cmp_end; } // Both gzclose() and fclose() will close the associated fd, so they need to have different fds. - FileFd gzFd = dup(pFile->fd); + FileFd gzFd = dup(fd); if (gzFd < 0) { ret = -4; goto cmp_end; diff --git a/source/os/test/osTests.cpp b/source/os/test/osTests.cpp index a2ccc4de02..e2185aeac2 100644 --- a/source/os/test/osTests.cpp +++ b/source/os/test/osTests.cpp @@ -15,6 +15,7 @@ #include #include +#include #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wwrite-strings" @@ -29,6 +30,10 @@ #include "os.h" #include "tlog.h" +#ifdef WINDOWS +#include +#endif // WINDOWS + TEST(osTest, osSystem) { const char *flags = "UTL FATAL "; ELogLevel level = DEBUG_FATAL; @@ -68,7 +73,8 @@ void fileOperateOnBusy(void *param) { char * fname = (char *)param; TdFilePtr pFile = taosOpenFile(fname, TD_FILE_CREATE | TD_FILE_WRITE); printf("On busy thread open file\n"); - ASSERT_NE(pFile, nullptr); + if (pFile == NULL) return; + // ASSERT_NE(pFile, nullptr); int ret = taosLockFile(pFile); printf("On busy thread lock file ret:%d\n", ret); @@ -97,6 +103,7 @@ TEST(osTest, osFile) { TdFilePtr pOutFD = taosCreateFile(fname, TD_FILE_WRITE | TD_FILE_CREATE | TD_FILE_TRUNC); ASSERT_NE(pOutFD, nullptr); printf("create file success\n"); + taosCloseFile(&pOutFD); TdFilePtr pFile = taosOpenFile(fname, TD_FILE_CREATE | TD_FILE_WRITE); printf("open file\n"); @@ -135,4 +142,178 @@ TEST(osTest, osFile) { //printf("remove file success"); } +#ifndef OSFILE_PERFORMANCE_TEST + +#define MAX_WORDS 100 +#define MAX_WORD_LENGTH 20 +#define MAX_TEST_FILE_SIZE 100000 +#define TESTTIMES 1000 + +char *getRandomWord() { + static char words[][MAX_WORD_LENGTH] = { + "Lorem", "ipsum", "dolor", "sit", "amet", "consectetur", "adipiscing", "elit", + "sed", "do", "eiusmod", "tempor", "incididunt", "ut", "labore", "et", "dolore", "magna", + "aliqua", "Ut", "enim", "ad", "minim", "veniam", "quis", "nostrud", "exercitation", "ullamco", + "Why", "do", "programmers", "prefer", "using", "dark", "mode?", "Because", "light", "attracts", + "bugs", "and", "they", "want", "to", "code", "in", "peace,", "like", "a", "ninja", "in", "the", "shadows." + "aliqua", "Ut", "enim", "ad", "minim", "veniam", "quis", "nostrud", "exercitation", "ullamco", + "laboris", "nisi", "ut", "aliquip", "ex", "ea", "commodo", "consequat", "Duis", "aute", "irure", + "dolor", "in", "reprehenderit", "in", "voluptate", "velit", "esse", "cillum", "dolore", "eu", + "fugiat", "nulla", "pariatur", "Excepteur", "sint", "occaecat", "cupidatat", "non", "proident", + "sunt", "in", "culpa", "qui", "officia", "deserunt", "mollit", "anim", "id", "est", "laborum" + }; + + return words[taosRand() % MAX_WORDS]; +} + +int64_t fillBufferWithRandomWords(char *buffer, int64_t maxBufferSize) { + int64_t len = 0; + while (len < maxBufferSize) { + char * word = getRandomWord(); + size_t wordLen = strlen(word); + + if (len + wordLen + 1 < maxBufferSize) { + strcat(buffer, word); + strcat(buffer, " "); + len += wordLen + 1; + } else { + break; + } + } + return len; +} + +int64_t calculateAverage(int64_t arr[], int size) { + int64_t sum = 0; + for (int i = 0; i < size; i++) { + sum += arr[i]; + } + return sum / size; +} + +int64_t calculateMax(int64_t arr[], int size) { + int64_t max = arr[0]; + for (int i = 1; i < size; i++) { + if (arr[i] > max) { + max = arr[i]; + } + } + return max; +} + +int64_t calculateMin(int64_t arr[], int size) { + int64_t min = arr[0]; + for (int i = 1; i < size; i++) { + if (arr[i] < min) { + min = arr[i]; + } + } + return min; +} + +TEST(osTest, osFilePerformance) { + printf("os file performance testting...\n"); + int64_t WriteFileCost; + int64_t ReadFileCost; + int64_t OpenForWriteCloseFileCost; + int64_t OpenForReadCloseFileCost; + + char * buffer; + char * writeBuffer = (char *)taosMemoryCalloc(1, MAX_TEST_FILE_SIZE); + char * readBuffer = (char *)taosMemoryCalloc(1, MAX_TEST_FILE_SIZE); + int64_t size = fillBufferWithRandomWords(writeBuffer, MAX_TEST_FILE_SIZE); + char * fname = "./osFilePerformanceTest.txt"; + + TdFilePtr pOutFD = taosCreateFile(fname, TD_FILE_WRITE | TD_FILE_CREATE | TD_FILE_TRUNC); + ASSERT_NE(pOutFD, nullptr); + taosCloseFile(&pOutFD); + + printf("os file performance start write...\n"); + int64_t t1 = taosGetTimestampUs(); + for (int i = 0; i < TESTTIMES; ++i) { + TdFilePtr pFile = taosOpenFile(fname, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_WRITE_THROUGH); + ASSERT_NE(pFile, nullptr); + taosWriteFile(pFile, writeBuffer, size); + taosFsyncFile(pFile); + taosCloseFile(&pFile); + } + + int64_t t2 = taosGetTimestampUs(); + WriteFileCost = t2 - t1; + + printf("os file performance start read...\n"); + for (int i = 0; i < TESTTIMES; ++i) { + TdFilePtr pFile = taosOpenFile(fname, TD_FILE_READ); + ASSERT_NE(pFile, nullptr); + taosReadFile(pFile, readBuffer, size); + taosCloseFile(&pFile); + int readLine = strlen(readBuffer); + ASSERT_EQ(size, readLine); + } + int64_t t3 = taosGetTimestampUs(); + ReadFileCost = t3 - t2; + + printf("os file performance start open1...\n"); + for (int i = 0; i < TESTTIMES; ++i) { + TdFilePtr pFile = taosOpenFile(fname, TD_FILE_CREATE | TD_FILE_WRITE); + ASSERT_NE(pFile, nullptr); + taosCloseFile(&pFile); + } + int64_t t4 = taosGetTimestampUs(); + OpenForWriteCloseFileCost = t4 - t3; + + printf("os file performance start open2...\n"); + for (int i = 0; i < TESTTIMES; ++i) { + TdFilePtr pFile = taosOpenFile(fname, TD_FILE_CREATE | TD_FILE_READ); + ASSERT_NE(pFile, nullptr); + taosCloseFile(&pFile); + } + int64_t t5 = taosGetTimestampUs(); + OpenForReadCloseFileCost = t5 - t4; + +#ifdef WINDOWS + printf("os file performance start window native...\n"); + for (int i = 0; i < TESTTIMES; ++i) { + HANDLE hFile = CreateFile(fname, // 文件名 + GENERIC_WRITE, // 写权限 + FILE_SHARE_READ, // 不共享 + NULL, // 默认安全描述符 + OPEN_ALWAYS, // 打开已存在的文件 + FILE_FLAG_WRITE_THROUGH, // 文件标志,可以根据实际需求调整 + NULL // 模板文件句柄,对于创建新文件不需要 + ); + + if (hFile == INVALID_HANDLE_VALUE) { + printf("Error opening file\n"); + break; + } + + // 写入数据 + DWORD bytesWritten; + if (!WriteFile(hFile, writeBuffer, size, &bytesWritten, NULL)) { + // 处理错误 + printf("Error writing to file\n"); + CloseHandle(hFile); + break; + } + // 关闭文件 + CloseHandle(hFile); + } + int64_t t6 = taosGetTimestampUs(); + int64_t nativeWritCost = t6 - t5; + + printf("Test Write file using native API %d times, cost: %" PRId64 "us\n", TESTTIMES, nativeWritCost); +#endif // WINDOWS + + taosMemoryFree(writeBuffer); + taosMemoryFree(readBuffer); + + printf("Test Write file %d times, cost: %" PRId64 "us\n", TESTTIMES, WriteFileCost); + printf("Test Read file %d times, cost: %" PRId64 "us\n", TESTTIMES, ReadFileCost); + printf("Test OpenForWrite & Close file %d times, cost: %" PRId64 "us\n", TESTTIMES, OpenForWriteCloseFileCost); + printf("Test OpenForRead & Close file %d times, cost: %" PRId64 "us\n", TESTTIMES, OpenForReadCloseFileCost); +} + +#endif OSFILE_PERFORMANCE_TEST + #pragma GCC diagnostic pop diff --git a/source/util/src/tlog.c b/source/util/src/tlog.c index aa6719f604..184e18fc67 100644 --- a/source/util/src/tlog.c +++ b/source/util/src/tlog.c @@ -110,6 +110,7 @@ int32_t metaDebugFlag = 131; int32_t udfDebugFlag = 131; int32_t smaDebugFlag = 131; int32_t idxDebugFlag = 131; +int32_t sndDebugFlag = 131; int64_t dbgEmptyW = 0; int64_t dbgWN = 0; @@ -153,7 +154,12 @@ int32_t taosInitSlowLog() { #endif if (strlen(tsLogDir) != 0) { - snprintf(fullName, PATH_MAX, "%s" TD_DIRSEP "%s", tsLogDir, logFileName); + char lastC = tsLogDir[strlen(tsLogDir) - 1]; + if (lastC == '\\' || lastC == '/') { + snprintf(fullName, PATH_MAX, "%s" "%s", tsLogDir, logFileName); + } else { + snprintf(fullName, PATH_MAX, "%s" TD_DIRSEP "%s", tsLogDir, logFileName); + } } else { snprintf(fullName, PATH_MAX, "%s", logFileName); } @@ -177,7 +183,12 @@ int32_t taosInitLog(const char *logName, int32_t maxFiles) { char fullName[PATH_MAX] = {0}; if (strlen(tsLogDir) != 0) { - snprintf(fullName, PATH_MAX, "%s" TD_DIRSEP "%s", tsLogDir, logName); + char lastC = tsLogDir[strlen(tsLogDir) - 1]; + if (lastC == '\\' || lastC == '/') { + snprintf(fullName, PATH_MAX, "%s" "%s", tsLogDir, logName); + } else { + snprintf(fullName, PATH_MAX, "%s" TD_DIRSEP "%s", tsLogDir, logName); + } } else { snprintf(fullName, PATH_MAX, "%s", logName); } diff --git a/tests/parallel_test/cases.task b/tests/parallel_test/cases.task index fc7281d6e8..7ff42ad05e 100644 --- a/tests/parallel_test/cases.task +++ b/tests/parallel_test/cases.task @@ -20,6 +20,9 @@ ,,y,system-test,./pytest.sh python3 ./test.py -f 8-stream/window_close_session_ext.py ,,y,system-test,./pytest.sh python3 ./test.py -f 8-stream/partition_interval.py ,,y,system-test,./pytest.sh python3 ./test.py -f 8-stream/pause_resume_test.py +#,,n,system-test,python3 ./test.py -f 8-stream/vnode_restart.py -N 4 +#,,n,system-test,python3 ./test.py -f 8-stream/snode_restart.py -N 4 +,,n,system-test,python3 ./test.py -f 8-stream/snode_restart_with_checkpoint.py -N 4 ,,y,system-test,./pytest.sh python3 ./test.py -f 2-query/tbname_vgroup.py ,,y,system-test,./pytest.sh python3 ./test.py -f 2-query/stbJoin.py diff --git a/tests/script/tsim/stream/pauseAndResume.sim b/tests/script/tsim/stream/pauseAndResume.sim index 673bc77c0f..5eb9eef010 100644 --- a/tests/script/tsim/stream/pauseAndResume.sim +++ b/tests/script/tsim/stream/pauseAndResume.sim @@ -16,9 +16,8 @@ sql create table ts2 using st tags(2,2,2); sql create table ts3 using st tags(3,2,2); sql create table ts4 using st tags(4,2,2); sql create stream streams1 trigger at_once IGNORE EXPIRED 0 IGNORE UPDATE 0 watermark 1d into streamt1 as select _wstart, count(*) c1, sum(a) c3 from st interval(10s); -sleep 1000 +sleep 2000 -sleep 1000 sql pause stream streams1; sql insert into ts1 values(1648791213001,1,12,3,1.0); diff --git a/tests/system-test/0-others/view/non_marterial_view/test_view.py b/tests/system-test/0-others/view/non_marterial_view/test_view.py index afb2476305..4b829b4049 100644 --- a/tests/system-test/0-others/view/non_marterial_view/test_view.py +++ b/tests/system-test/0-others/view/non_marterial_view/test_view.py @@ -162,7 +162,7 @@ class TDTestCase: assert('TIMESTAMP' in data_type_list and 'INT' in data_type_list and 'INT UNSIGNED' in data_type_list and 'BIGINT' in data_type_list and 'BIGINT UNSIGNED' in data_type_list and 'FLOAT' in data_type_list and 'DOUBLE' in data_type_list and 'VARCHAR' in data_type_list and 'SMALLINT' in data_type_list and 'SMALLINT UNSIGNED' in data_type_list and 'TINYINT' in data_type_list and 'TINYINT UNSIGNED' in data_type_list and 'BOOL' in data_type_list and 'VARCHAR' in data_type_list and 'NCHAR' in data_type_list and 'GEOMETRY' in data_type_list and 'VARBINARY' in data_type_list) tdSql.execute("create view v2 as select * from tb where c1 >5 and c7 like '%ab%';") self.check_view_num(2) - tdSql.error("create view v3 as select * from tb where c1 like '%ab%';", expectErrInfo='Invalid value type') + tdSql.error("create view v3 as select * from tb where c1 like '%ab%';", expectErrInfo='Invalid operation') tdSql.execute("create view v3 as select first(ts), sum(c1) from tb group by c2 having avg(c4) > 0;") tdSql.execute("create view v4 as select _wstart,sum(c6) from tb interval(10s);") tdSql.execute("create view v5 as select * from tb join v2 on tb.ts = v2.ts;") diff --git a/tests/system-test/2-query/db.py b/tests/system-test/2-query/db.py index 6870c59a0d..0246626e40 100644 --- a/tests/system-test/2-query/db.py +++ b/tests/system-test/2-query/db.py @@ -55,7 +55,7 @@ class TDTestCase: tdSql.checkData(0, 2, 0) tdSql.query("show dnode 1 variables like '%debugFlag'") - tdSql.checkRows(22) + tdSql.checkRows(23) tdSql.query("show dnode 1 variables like '____debugFlag'") tdSql.checkRows(2) diff --git a/tests/system-test/7-tmq/tmqVnodeReplicate.py b/tests/system-test/7-tmq/tmqVnodeReplicate.py index fd8ece02e0..0ee11781ed 100644 --- a/tests/system-test/7-tmq/tmqVnodeReplicate.py +++ b/tests/system-test/7-tmq/tmqVnodeReplicate.py @@ -105,7 +105,6 @@ class TDTestCase: topicNameList = ['topic1'] # expectRowsList = [] - tmqCom.initConsumerTable("cdb", self.replicaVar) tdLog.info("create topics from stb with filter") queryString = "select * from %s.%s"%(paraDict['dbName'], paraDict['stbName']) @@ -133,14 +132,15 @@ class TDTestCase: tmqCom.getStartConsumeNotifyFromTmqsim() tmqCom.getStartCommitNotifyFromTmqsim() - tdSql.query("select * from information_schema.ins_vnodes") - # tdLog.debug(tdSql.queryResult) - tdDnodes = cluster.dnodes - for result in tdSql.queryResult: - if result[2] == 'dbt' and result[3] == 'leader': - tdLog.debug("leader is %d"%(result[0] - 1)) - tdDnodes[result[0] - 1].stoptaosd() - break + tdSql.query("balance vgroup leader") + # tdSql.query("select * from information_schema.ins_vnodes") + # # tdLog.debug(tdSql.queryResult) + # tdDnodes = cluster.dnodes + # for result in tdSql.queryResult: + # if result[2] == 'dbt' and result[3] == 'leader': + # tdLog.debug("leader is %d"%(result[0] - 1)) + # tdDnodes[result[0] - 1].stoptaosd() + # break pInsertThread.join() expectRows = 1 @@ -159,7 +159,6 @@ class TDTestCase: tdLog.printNoPrefix("======== test case 1 end ...... ") def run(self): - tdSql.prepare() self.prepareTestEnv() self.tmqCase1() diff --git a/tests/system-test/8-stream/snode_restart.py b/tests/system-test/8-stream/snode_restart.py new file mode 100644 index 0000000000..3657163ab0 --- /dev/null +++ b/tests/system-test/8-stream/snode_restart.py @@ -0,0 +1,78 @@ + +import taos +import sys +import time +import socket +import os +import threading +import math + +from util.log import * +from util.sql import * +from util.cases import * +from util.dnodes import * +from util.common import * +from util.cluster import * + +class TDTestCase: + updatecfgDict = {'checkpointInterval': 1100} + print("===================: ", updatecfgDict) + + def init(self, conn, logSql, replicaVar=1): + tdLog.debug(f"start to excute {__file__}") + tdSql.init(conn.cursor(), False) + + + def case1(self): + tdLog.debug("========case1 start========") + + os.system("nohup taosBenchmark -y -B 1 -t 4 -S 1000 -n 1000 -i 1000 -v 2 > /dev/null 2>&1 &") + time.sleep(4) + tdSql.query("use test") + tdSql.query("create snode on dnode 4") + tdSql.query("create stream if not exists s1 trigger at_once ignore expired 0 ignore update 0 fill_history 1 into st1 as select _wstart,sum(voltage),groupid from meters partition by groupid interval(2s)") + tdLog.debug("========create stream useing snode and insert data ok========") + time.sleep(4) + + tdDnodes = cluster.dnodes + tdDnodes[3].stoptaosd() + time.sleep(2) + tdDnodes[3].starttaosd() + tdLog.debug("========snode restart ok========") + + time.sleep(30) + os.system("kill -9 `pgrep taosBenchmark`") + tdLog.debug("========stop insert ok========") + time.sleep(2) + + tdSql.query("select _wstart,sum(voltage),groupid from meters partition by groupid interval(2s) order by groupid,_wstart") + rowCnt = tdSql.getRows() + results = [] + for i in range(rowCnt): + results.append(tdSql.getData(i,1)) + + tdSql.query("select * from st1 order by groupid,_wstart") + tdSql.checkRows(rowCnt) + for i in range(rowCnt): + data1 = tdSql.getData(i,1) + data2 = results[i] + if data1 != data2: + tdLog.info("num: %d, act data: %d, expect data: %d"%(i, data1, data2)) + tdLog.exit("check data error!") + + # tdLog.debug("========sleep 500s========") + # time.sleep(500) + + tdLog.debug("case1 end") + + def run(self): + self.case1() + + def stop(self): + tdSql.close() + tdLog.success(f"{__file__} successfully executed") + +event = threading.Event() + +tdCases.addLinux(__file__, TDTestCase()) +tdCases.addWindows(__file__, TDTestCase()) diff --git a/tests/system-test/8-stream/snode_restart_with_checkpoint.py b/tests/system-test/8-stream/snode_restart_with_checkpoint.py new file mode 100644 index 0000000000..9567bbe439 --- /dev/null +++ b/tests/system-test/8-stream/snode_restart_with_checkpoint.py @@ -0,0 +1,78 @@ + +import taos +import sys +import time +import socket +import os +import threading +import math + +from util.log import * +from util.sql import * +from util.cases import * +from util.dnodes import * +from util.common import * +from util.cluster import * + +class TDTestCase: + # updatecfgDict = {'checkpointInterval': 5} + # print("===================: ", updatecfgDict) + + def init(self, conn, logSql, replicaVar=1): + tdLog.debug(f"start to excute {__file__}") + tdSql.init(conn.cursor(), False) + + + def case1(self): + tdLog.debug("========case1 start========") + + os.system("nohup taosBenchmark -y -B 1 -t 4 -S 1000 -n 1000 -i 1000 -v 2 > /dev/null 2>&1 &") + time.sleep(4) + tdSql.query("use test") + tdSql.query("create snode on dnode 4") + tdSql.query("create stream if not exists s1 trigger at_once ignore expired 0 ignore update 0 fill_history 1 into st1 as select _wstart,sum(voltage),groupid from meters partition by groupid interval(2s)") + tdLog.debug("========create stream useing snode and insert data ok========") + time.sleep(60) + + tdDnodes = cluster.dnodes + tdDnodes[3].stoptaosd() + time.sleep(2) + tdDnodes[3].starttaosd() + tdLog.debug("========snode restart ok========") + + time.sleep(30) + os.system("kill -9 `pgrep taosBenchmark`") + tdLog.debug("========stop insert ok========") + time.sleep(2) + + tdSql.query("select _wstart,sum(voltage),groupid from meters partition by groupid interval(2s) order by groupid,_wstart") + rowCnt = tdSql.getRows() + results = [] + for i in range(rowCnt): + results.append(tdSql.getData(i,1)) + + tdSql.query("select * from st1 order by groupid,_wstart") + tdSql.checkRows(rowCnt) + for i in range(rowCnt): + data1 = tdSql.getData(i,1) + data2 = results[i] + if data1 != data2: + tdLog.info("num: %d, act data: %d, expect data: %d"%(i, data1, data2)) + tdLog.exit("check data error!") + + # tdLog.debug("========sleep 500s========") + # time.sleep(500) + + tdLog.debug("case1 end") + + def run(self): + self.case1() + + def stop(self): + tdSql.close() + tdLog.success(f"{__file__} successfully executed") + +event = threading.Event() + +tdCases.addLinux(__file__, TDTestCase()) +tdCases.addWindows(__file__, TDTestCase()) diff --git a/tests/system-test/8-stream/vnode_restart.py b/tests/system-test/8-stream/vnode_restart.py new file mode 100644 index 0000000000..a53432b77a --- /dev/null +++ b/tests/system-test/8-stream/vnode_restart.py @@ -0,0 +1,77 @@ + +import taos +import sys +import time +import socket +import os +import threading +import math + +from util.log import * +from util.sql import * +from util.cases import * +from util.dnodes import * +from util.common import * +from util.cluster import * + +class TDTestCase: + updatecfgDict = {'checkpointInterval': 1100} + print("===================: ", updatecfgDict) + + def init(self, conn, logSql, replicaVar=1): + tdLog.debug(f"start to excute {__file__}") + tdSql.init(conn.cursor(), False) + + + def case1(self): + tdLog.debug("========case1 start========") + + os.system("nohup taosBenchmark -y -B 1 -t 4 -S 1000 -n 1000 -i 1000 -v 2 > /dev/null 2>&1 &") + time.sleep(4) + tdSql.query("use test") + tdSql.query("create stream if not exists s1 trigger at_once ignore expired 0 ignore update 0 fill_history 1 into st1 as select _wstart,sum(voltage),groupid from meters partition by groupid interval(2s)") + tdLog.debug("========create stream useing snode and insert data ok========") + time.sleep(4) + + tdDnodes = cluster.dnodes + tdDnodes[2].stoptaosd() + time.sleep(2) + tdDnodes[2].starttaosd() + tdLog.debug("========vnode restart ok========") + + time.sleep(30) + os.system("kill -9 `pgrep taosBenchmark`") + tdLog.debug("========stop insert ok========") + time.sleep(2) + + tdSql.query("select _wstart,sum(voltage),groupid from meters partition by groupid interval(2s) order by groupid,_wstart") + rowCnt = tdSql.getRows() + results = [] + for i in range(rowCnt): + results.append(tdSql.getData(i,1)) + + tdSql.query("select * from st1 order by groupid,_wstart") + tdSql.checkRows(rowCnt) + for i in range(rowCnt): + data1 = tdSql.getData(i,1) + data2 = results[i] + if data1 != data2: + tdLog.info("num: %d, act data: %d, expect data: %d"%(i, data1, data2)) + tdLog.exit("check data error!") + + # tdLog.debug("========sleep 500s========") + # time.sleep(500) + + tdLog.debug("case1 end") + + def run(self): + self.case1() + + def stop(self): + tdSql.close() + tdLog.success(f"{__file__} successfully executed") + +event = threading.Event() + +tdCases.addLinux(__file__, TDTestCase()) +tdCases.addWindows(__file__, TDTestCase()) diff --git a/tests/system-test/output.txt b/tests/system-test/output.txt deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/tests/system-test/test.py b/tests/system-test/test.py index 81f98fea22..795132b14e 100644 --- a/tests/system-test/test.py +++ b/tests/system-test/test.py @@ -582,7 +582,7 @@ if __name__ == "__main__": tdDnodes.setAsan(asan) tdDnodes.stopAll() for dnode in tdDnodes.dnodes: - tdDnodes.deploy(dnode.index,{}) + tdDnodes.deploy(dnode.index,updateCfgDict) for dnode in tdDnodes.dnodes: tdDnodes.starttaosd(dnode.index) tdCases.logSql(logSql) diff --git a/utils/test/c/tmq_taosx_ci.c b/utils/test/c/tmq_taosx_ci.c index ff89bb1f75..8a7074844a 100644 --- a/utils/test/c/tmq_taosx_ci.c +++ b/utils/test/c/tmq_taosx_ci.c @@ -30,7 +30,7 @@ typedef struct { int meta; int srcVgroups; int dstVgroups; - char dir[64]; + char dir[256]; } Config; Config g_conf = {0}; @@ -409,6 +409,30 @@ int buildStable(TAOS* pConn, TAOS_RES* pRes) { } taos_free_result(pRes); +#ifdef WINDOWS + pRes = taos_query(pConn, + "CREATE STABLE `meters_summary` (`_wstart` TIMESTAMP, `current` FLOAT, `groupid` INT, `location` VARCHAR(16)) TAGS (`group_id` BIGINT UNSIGNED)"); + if (taos_errno(pRes) != 0) { + printf("failed to create super table meters_summary, reason:%s\n", taos_errstr(pRes)); + return -1; + } + taos_free_result(pRes); + + pRes = taos_query(pConn, + " CREATE TABLE `t_d2a450ee819dcf7576f0282d9ac22dbc` USING `meters_summary` (`group_id`) TAGS (13135550082773579308)"); + if (taos_errno(pRes) != 0) { + printf("failed to create super table meters_summary, reason:%s\n", taos_errstr(pRes)); + return -1; + } + taos_free_result(pRes); + + pRes = taos_query(pConn, "insert into t_d2a450ee819dcf7576f0282d9ac22dbc values (now, 120, 1, 'San Francisco')"); + if (taos_errno(pRes) != 0) { + printf("failed to insert into table d0, reason:%s\n", taos_errstr(pRes)); + return -1; + } + taos_free_result(pRes); +#else pRes = taos_query(pConn, "create stream meters_summary_s trigger at_once IGNORE EXPIRED 0 into meters_summary as select _wstart, max(current) as current, " "groupid, location from meters partition by groupid, location interval(10m)"); @@ -417,6 +441,7 @@ int buildStable(TAOS* pConn, TAOS_RES* pRes) { return -1; } taos_free_result(pRes); +#endif pRes = taos_query(pConn, "insert into d0 (ts, current) values (now, 120)"); if (taos_errno(pRes) != 0) { @@ -598,8 +623,8 @@ void basic_consume_loop(tmq_t* tmq, tmq_list_t* topics) { } void initLogFile() { - char f1[256] = {0}; - char f2[256] = {0}; + char f1[1024] = {0}; + char f2[1024] = {0}; if (g_conf.snapShot) { sprintf(f1, "%s/../log/tmq_taosx_tmp_snapshot.source", g_conf.dir);