Merge branch '3.0' of github.com:taosdata/TDengine into szhou/bi-tbname-col-func

This commit is contained in:
slzhou 2023-11-23 10:41:34 +08:00
commit f44ff199f2
73 changed files with 2141 additions and 603 deletions

View File

@ -97,7 +97,15 @@ ENDIF()
SET(JEMALLOC_ENABLED OFF) SET(JEMALLOC_ENABLED OFF)
IF (TD_WINDOWS) IF (TD_WINDOWS)
MESSAGE("${Yellow} set compiler flag for Windows! ${ColourReset}") MESSAGE("${Yellow} set compiler flag for Windows! ${ColourReset}")
IF (${CMAKE_BUILD_TYPE} MATCHES "Release")
MESSAGE("${Green} will build Release version! ${ColourReset}")
SET(COMMON_FLAGS "/W3 /D_WIN32 /DWIN32 /Zi- /O2 /GL /MD")
ELSE ()
MESSAGE("${Green} will build Debug version! ${ColourReset}")
SET(COMMON_FLAGS "/w /D_WIN32 /DWIN32 /Zi /MTd") SET(COMMON_FLAGS "/w /D_WIN32 /DWIN32 /Zi /MTd")
ENDIF()
SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} /MANIFEST:NO") SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} /MANIFEST:NO")
# IF (MSVC AND (MSVC_VERSION GREATER_EQUAL 1900)) # IF (MSVC AND (MSVC_VERSION GREATER_EQUAL 1900))
# SET(COMMON_FLAGS "${COMMON_FLAGS} /Wv:18") # SET(COMMON_FLAGS "${COMMON_FLAGS} /Wv:18")

View File

@ -317,7 +317,8 @@ if (${BUILD_WITH_ROCKSDB})
SET(CMAKE_BUILD_TYPE Release) SET(CMAKE_BUILD_TYPE Release)
endif() endif()
endif(${TD_LINUX}) endif(${TD_LINUX})
MESSAGE(STATUS "CXXXX STATUS CONFIG: " ${CMAKE_CXX_FLAGS}) MESSAGE(STATUS "ROCKSDB CXXXX STATUS CONFIG: " ${CMAKE_CXX_FLAGS})
MESSAGE(STATUS "ROCKSDB C STATUS CONFIG: " ${CMAKE_C_FLAGS})
if(${TD_DARWIN}) if(${TD_DARWIN})
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-error=maybe-uninitialized") SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-error=maybe-uninitialized")
@ -329,8 +330,12 @@ if (${BUILD_WITH_ROCKSDB})
if (${TD_WINDOWS}) if (${TD_WINDOWS})
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4244 /wd4819") SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4244 /wd4819")
SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /wd4244 /wd4819")
option(WITH_JNI "" OFF) option(WITH_JNI "" OFF)
if(CMAKE_C_FLAGS MATCHES "/MT" OR CMAKE_C_FLAGS MATCHES "/MTd")
message("Rocksdb build runtime lib use /MT or /MTd")
option(WITH_MD_LIBRARY "build with MD" OFF) option(WITH_MD_LIBRARY "build with MD" OFF)
endif()
set(SYSTEM_LIBS ${SYSTEM_LIBS} shlwapi.lib rpcrt4.lib) set(SYSTEM_LIBS ${SYSTEM_LIBS} shlwapi.lib rpcrt4.lib)
endif(${TD_WINDOWS}) endif(${TD_WINDOWS})
@ -361,9 +366,11 @@ if (${BUILD_WITH_ROCKSDB})
) )
else() else()
if (NOT ${TD_LINUX}) if (NOT ${TD_LINUX})
MESSAGE(STATUS "CXXXX STATUS CONFIG: " ${CMAKE_CXX_FLAGS}) MESSAGE(STATUS "ROCKSDB CXX STATUS CONFIG: " ${CMAKE_CXX_FLAGS})
MESSAGE(STATUS "ROCKSDB C STATUS CONFIG: " ${CMAKE_C_FLAGS})
if(${TD_DARWIN}) if(${TD_DARWIN})
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-error=maybe-uninitialized") SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-error=maybe-uninitialized")
SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-error=maybe-uninitialized")
endif(${TD_DARWIN}) endif(${TD_DARWIN})
if (${TD_DARWIN_ARM64}) if (${TD_DARWIN_ARM64})
@ -372,8 +379,12 @@ if (${BUILD_WITH_ROCKSDB})
if (${TD_WINDOWS}) if (${TD_WINDOWS})
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4244 /wd4819") SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4244 /wd4819")
SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /wd4244 /wd4819")
option(WITH_JNI "" OFF) option(WITH_JNI "" OFF)
if(CMAKE_C_FLAGS MATCHES "/MT" OR CMAKE_C_FLAGS MATCHES "/MTd")
message("Rocksdb build runtime lib use /MT or /MTd")
option(WITH_MD_LIBRARY "build with MD" OFF) option(WITH_MD_LIBRARY "build with MD" OFF)
endif()
set(SYSTEM_LIBS ${SYSTEM_LIBS} shlwapi.lib rpcrt4.lib) set(SYSTEM_LIBS ${SYSTEM_LIBS} shlwapi.lib rpcrt4.lib)
endif(${TD_WINDOWS}) endif(${TD_WINDOWS})
@ -456,7 +467,9 @@ endif(${BUILD_WITH_NURAFT})
# pthread # pthread
if(${BUILD_PTHREAD}) if(${BUILD_PTHREAD})
set(CMAKE_BUILD_TYPE debug) if ("${CMAKE_BUILD_TYPE}" STREQUAL "")
SET(CMAKE_BUILD_TYPE Release)
endif()
add_definitions(-DPTW32_STATIC_LIB) add_definitions(-DPTW32_STATIC_LIB)
add_subdirectory(pthread EXCLUDE_FROM_ALL) add_subdirectory(pthread EXCLUDE_FROM_ALL)
set_target_properties(libpthreadVC3 PROPERTIES OUTPUT_NAME pthread) set_target_properties(libpthreadVC3 PROPERTIES OUTPUT_NAME pthread)
@ -640,13 +653,18 @@ if(${BUILD_GEOS})
if(${TD_LINUX}) if(${TD_LINUX})
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS_REL}") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS_REL}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS_REL}") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS_REL}")
IF ("${CMAKE_BUILD_TYPE}" STREQUAL "") if ("${CMAKE_BUILD_TYPE}" STREQUAL "")
SET(CMAKE_BUILD_TYPE Release) SET(CMAKE_BUILD_TYPE Release)
endif() endif()
endif(${TD_LINUX}) endif(${TD_LINUX})
option(BUILD_SHARED_LIBS "Build GEOS with shared libraries" OFF) option(BUILD_SHARED_LIBS "Build GEOS with shared libraries" OFF)
add_subdirectory(geos EXCLUDE_FROM_ALL) add_subdirectory(geos EXCLUDE_FROM_ALL)
if (${TD_WINDOWS})
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS}")
else ()
unset(CMAKE_CXX_STANDARD CACHE) # undo libgeos's setting of global CMAKE_CXX_STANDARD unset(CMAKE_CXX_STANDARD CACHE) # undo libgeos's setting of global CMAKE_CXX_STANDARD
endif(${TD_WINDOWS})
target_include_directories( target_include_directories(
geos_c geos_c
PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/geos/include> PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/geos/include>

View File

@ -3112,7 +3112,7 @@ typedef struct {
int32_t tSerializeSMDropStreamReq(void* buf, int32_t bufLen, const SMDropStreamReq* pReq); int32_t tSerializeSMDropStreamReq(void* buf, int32_t bufLen, const SMDropStreamReq* pReq);
int32_t tDeserializeSMDropStreamReq(void* buf, int32_t bufLen, SMDropStreamReq* pReq); int32_t tDeserializeSMDropStreamReq(void* buf, int32_t bufLen, SMDropStreamReq* pReq);
void tFreeSMDropStreamReq(SMDropStreamReq* pReq); void tFreeMDropStreamReq(SMDropStreamReq* pReq);
typedef struct { typedef struct {
char name[TSDB_STREAM_FNAME_LEN]; char name[TSDB_STREAM_FNAME_LEN];

View File

@ -45,6 +45,7 @@ typedef struct {
*/ */
SSnode *sndOpen(const char *path, const SSnodeOpt *pOption); SSnode *sndOpen(const char *path, const SSnodeOpt *pOption);
int32_t sndInit(SSnode * pSnode);
/** /**
* @brief Stop Snode in Dnode. * @brief Stop Snode in Dnode.
* *

View File

@ -0,0 +1,18 @@
//
// Created by mingming wanng on 2023/11/15.
//
#ifndef TDENGINE_STREAM_H
#define TDENGINE_STREAM_H
#define STREAM_EXEC_EXTRACT_DATA_IN_WAL_ID (-1)
#define STREAM_EXEC_START_ALL_TASKS_ID (-2)
#define STREAM_EXEC_RESTART_ALL_TASKS_ID (-3)
typedef struct STaskUpdateEntry {
int64_t streamId;
int32_t taskId;
int32_t transId;
} STaskUpdateEntry;
#endif // TDENGINE_STREAM_H

View File

@ -150,19 +150,6 @@ typedef struct {
int32_t colNum; int32_t colNum;
} SMetaStbStats; } SMetaStbStats;
// void tqReaderSetColIdList(STqReader *pReader, SArray *pColIdList);
// int32_t tqReaderSetTbUidList(STqReader *pReader, const SArray *tbUidList);
// int32_t tqReaderAddTbUidList(STqReader *pReader, const SArray *pTableUidList);
// int32_t tqReaderRemoveTbUidList(STqReader *pReader, const SArray *tbUidList);
// bool tqReaderIsQueriedTable(STqReader* pReader, uint64_t uid);
// bool tqCurrentBlockConsumed(const STqReader* pReader);
// int32_t tqReaderSeek(STqReader *pReader, int64_t ver, const char *id);
// bool tqNextBlockInWal(STqReader* pReader, const char* idstr);
// bool tqNextBlockImpl(STqReader *pReader, const char* idstr);
// int32_t getTableInfoFromSnapshot(SSnapContext *ctx, void **pBuf, int32_t *contLen, int16_t *type, int64_t
// *uid); SMetaTableInfo getMetaTableInfoFromSnapshot(SSnapContext *ctx); int32_t setForSnapShot(SSnapContext
// *ctx, int64_t uid); int32_t destroySnapContext(SSnapContext *ctx);
// clang-format off // clang-format off
/*-------------------------------------------------new api format---------------------------------------------------*/ /*-------------------------------------------------new api format---------------------------------------------------*/
typedef struct TsdReader { typedef struct TsdReader {
@ -197,27 +184,6 @@ typedef struct SStoreCacheReader {
// clang-format on // clang-format on
/*------------------------------------------------------------------------------------------------------------------*/ /*------------------------------------------------------------------------------------------------------------------*/
/*
void tqReaderSetColIdList(STqReader *pReader, SArray *pColIdList);
int32_t tqReaderSetTbUidList(STqReader *pReader, const SArray *tbUidList);
int32_t tqReaderAddTbUidList(STqReader *pReader, const SArray *pTableUidList);
int32_t tqReaderRemoveTbUidList(STqReader *pReader, const SArray *tbUidList);
bool tqReaderIsQueriedTable(STqReader* pReader, uint64_t uid);
bool tqCurrentBlockConsumed(const STqReader* pReader);
int32_t tqReaderSeek(STqReader *pReader, int64_t ver, const char *id);
bool tqNextBlockInWal(STqReader* pReader, const char* idstr);
bool tqNextBlockImpl(STqReader *pReader, const char* idstr);
int32_t tqRetrieveDataBlock(STqReader *pReader, SSDataBlock **pRes, const char* idstr);
STqReader *tqReaderOpen(void *pVnode);
void tqReaderClose(STqReader *);
int32_t tqReaderSetSubmitMsg(STqReader *pReader, void *msgStr, int32_t msgLen, int64_t ver);
bool tqNextDataBlockFilterOut(STqReader *pReader, SHashObj *filterOutUids);
SWalReader* tqGetWalReader(STqReader* pReader);
int32_t tqRetrieveTaosxBlock(STqReader *pReader, SArray *blocks, SArray *schemas, SSubmitTbData **pSubmitTbDataRet);
*/
// todo rename // todo rename
typedef struct SStoreTqReader { typedef struct SStoreTqReader {
struct STqReader* (*tqReaderOpen)(); struct STqReader* (*tqReaderOpen)();
@ -281,28 +247,18 @@ typedef struct SStoreMeta {
void* (*storeGetIndexInfo)(); void* (*storeGetIndexInfo)();
void* (*getInvertIndex)(void* pVnode); void* (*getInvertIndex)(void* pVnode);
int32_t (*getChildTableList)( // support filter and non-filter cases. [vnodeGetCtbIdList & vnodeGetCtbIdListByFilter]
void* pVnode, int64_t suid, int32_t (*getChildTableList)( void* pVnode, int64_t suid, SArray* list);
SArray* list); // support filter and non-filter cases. [vnodeGetCtbIdList & vnodeGetCtbIdListByFilter] int32_t (*storeGetTableList)(void* pVnode, int8_t type, SArray* pList);
int32_t (*storeGetTableList)(void* pVnode, int8_t type, SArray* pList); // vnodeGetStbIdList & vnodeGetAllTableList
void* storeGetVersionRange; void* storeGetVersionRange;
void* storeGetLastTimestamp; void* storeGetLastTimestamp;
int32_t (*getTableSchema)(void* pVnode, int64_t uid, STSchema** pSchema, int64_t* suid); // tsdbGetTableSchema int32_t (*getTableSchema)(void* pVnode, int64_t uid, STSchema** pSchema, int64_t* suid); // tsdbGetTableSchema
int32_t (*getNumOfChildTables)( void* pVnode, int64_t uid, int64_t* numOfTables, int32_t* numOfCols);
void (*getBasicInfo)(void* pVnode, const char** dbname, int32_t* vgId, int64_t* numOfTables, int64_t* numOfNormalTables);
// db name, vgId, numOfTables, numOfSTables
int32_t (*getNumOfChildTables)(
void* pVnode, int64_t uid, int64_t* numOfTables,
int32_t* numOfCols); // int32_t metaGetStbStats(SMeta *pMeta, int64_t uid, SMetaStbStats *pInfo);
void (*getBasicInfo)(void* pVnode, const char** dbname, int32_t* vgId, int64_t* numOfTables,
int64_t* numOfNormalTables); // vnodeGetInfo(void *pVnode, const char **dbname, int32_t *vgId) &
// metaGetTbNum(SMeta *pMeta) & metaGetNtbNum(SMeta *pMeta);
int64_t (*getNumOfRowsInMem)(void* pVnode); int64_t (*getNumOfRowsInMem)(void* pVnode);
/**
int32_t vnodeGetCtbIdList(void *pVnode, int64_t suid, SArray *list);
int32_t vnodeGetCtbIdListByFilter(void *pVnode, int64_t suid, SArray *list, bool (*filter)(void *arg), void *arg);
int32_t vnodeGetStbIdList(void *pVnode, int64_t suid, SArray *list);
*/
SMCtbCursor* (*openCtbCursor)(void *pVnode, tb_uid_t uid, int lock); SMCtbCursor* (*openCtbCursor)(void *pVnode, tb_uid_t uid, int lock);
int32_t (*resumeCtbCursor)(SMCtbCursor* pCtbCur, int8_t first); int32_t (*resumeCtbCursor)(SMCtbCursor* pCtbCur, int8_t first);
void (*pauseCtbCursor)(SMCtbCursor* pCtbCur); void (*pauseCtbCursor)(SMCtbCursor* pCtbCur);

View File

@ -305,6 +305,7 @@ typedef struct SCheckpointInfo {
int64_t processedVer; // already processed ver, that has generated results version. int64_t processedVer; // already processed ver, that has generated results version.
int64_t nextProcessVer; // current offset in WAL, not serialize it int64_t nextProcessVer; // current offset in WAL, not serialize it
int64_t failedId; // record the latest failed checkpoint id int64_t failedId; // record the latest failed checkpoint id
bool dispatchCheckpointTrigger;
} SCheckpointInfo; } SCheckpointInfo;
typedef struct SStreamStatus { typedef struct SStreamStatus {
@ -590,7 +591,7 @@ typedef struct {
int32_t downstreamNodeId; int32_t downstreamNodeId;
int32_t downstreamTaskId; int32_t downstreamTaskId;
int32_t childId; int32_t childId;
int32_t oldStage; int64_t oldStage;
int8_t status; int8_t status;
} SStreamTaskCheckRsp; } SStreamTaskCheckRsp;
@ -655,7 +656,7 @@ int32_t tDecodeStreamCheckpointReadyMsg(SDecoder* pDecoder, SStreamCheckpointRea
typedef struct STaskStatusEntry { typedef struct STaskStatusEntry {
STaskId id; STaskId id;
int32_t status; int32_t status;
int32_t stage; int64_t stage;
int32_t nodeId; int32_t nodeId;
int64_t verStart; // start version in WAL, only valid for source task int64_t verStart; // start version in WAL, only valid for source task
int64_t verEnd; // end version in WAL, only valid for source task int64_t verEnd; // end version in WAL, only valid for source task
@ -758,7 +759,7 @@ void initRpcMsg(SRpcMsg* pMsg, int32_t msgType, void* pCont, int32_t contLen)
// recover and fill history // recover and fill history
void streamTaskCheckDownstream(SStreamTask* pTask); void streamTaskCheckDownstream(SStreamTask* pTask);
int32_t streamTaskCheckStatus(SStreamTask* pTask, int32_t upstreamTaskId, int32_t vgId, int64_t stage); int32_t streamTaskCheckStatus(SStreamTask* pTask, int32_t upstreamTaskId, int32_t vgId, int64_t stage, int64_t* oldStage);
int32_t streamTaskUpdateEpsetInfo(SStreamTask* pTask, SArray* pNodeList); int32_t streamTaskUpdateEpsetInfo(SStreamTask* pTask, SArray* pNodeList);
void streamTaskResetUpstreamStageInfo(SStreamTask* pTask); void streamTaskResetUpstreamStageInfo(SStreamTask* pTask);
bool streamTaskAllUpstreamClosed(SStreamTask* pTask); bool streamTaskAllUpstreamClosed(SStreamTask* pTask);
@ -787,7 +788,6 @@ int32_t streamQueueGetNumOfItems(const SStreamQueue* pQueue);
int32_t streamRestoreParam(SStreamTask* pTask); int32_t streamRestoreParam(SStreamTask* pTask);
void streamTaskPause(SStreamTask* pTask, SStreamMeta* pMeta); void streamTaskPause(SStreamTask* pTask, SStreamMeta* pMeta);
void streamTaskResume(SStreamTask* pTask); void streamTaskResume(SStreamTask* pTask);
void streamTaskEnablePause(SStreamTask* pTask);
int32_t streamTaskSetUpstreamInfo(SStreamTask* pTask, const SStreamTask* pUpstreamTask); int32_t streamTaskSetUpstreamInfo(SStreamTask* pTask, const SStreamTask* pUpstreamTask);
void streamTaskUpdateUpstreamInfo(SStreamTask* pTask, int32_t nodeId, const SEpSet* pEpSet); void streamTaskUpdateUpstreamInfo(SStreamTask* pTask, int32_t nodeId, const SEpSet* pEpSet);
void streamTaskUpdateDownstreamInfo(SStreamTask* pTask, int32_t nodeId, const SEpSet* pEpSet); void streamTaskUpdateDownstreamInfo(SStreamTask* pTask, int32_t nodeId, const SEpSet* pEpSet);
@ -829,7 +829,6 @@ int32_t streamMetaLoadAllTasks(SStreamMeta* pMeta);
int64_t streamMetaGetLatestCheckpointId(SStreamMeta* pMeta); int64_t streamMetaGetLatestCheckpointId(SStreamMeta* pMeta);
void streamMetaNotifyClose(SStreamMeta* pMeta); void streamMetaNotifyClose(SStreamMeta* pMeta);
void streamMetaStartHb(SStreamMeta* pMeta); void streamMetaStartHb(SStreamMeta* pMeta);
void streamMetaInitForSnode(SStreamMeta* pMeta);
bool streamMetaTaskInTimer(SStreamMeta* pMeta); bool streamMetaTaskInTimer(SStreamMeta* pMeta);
int32_t streamMetaUpdateTaskDownstreamStatus(SStreamTask* pTask, int64_t startTs, int64_t endTs, bool succ); int32_t streamMetaUpdateTaskDownstreamStatus(SStreamTask* pTask, int64_t startTs, int64_t endTs, bool succ);
void streamMetaRLock(SStreamMeta* pMeta); void streamMetaRLock(SStreamMeta* pMeta);

View File

@ -63,6 +63,7 @@ typedef struct TdFile *TdFilePtr;
#define TD_FILE_AUTO_DEL 0x0040 #define TD_FILE_AUTO_DEL 0x0040
#define TD_FILE_EXCL 0x0080 #define TD_FILE_EXCL 0x0080
#define TD_FILE_STREAM 0x0100 // Only support taosFprintfFile, taosGetLineFile, taosEOFFile #define TD_FILE_STREAM 0x0100 // Only support taosFprintfFile, taosGetLineFile, taosEOFFile
#define TD_FILE_WRITE_THROUGH 0x0200
TdFilePtr taosOpenFile(const char *path, int32_t tdFileOptions); TdFilePtr taosOpenFile(const char *path, int32_t tdFileOptions);
TdFilePtr taosCreateFile(const char *path, int32_t tdFileOptions); TdFilePtr taosCreateFile(const char *path, int32_t tdFileOptions);

View File

@ -66,6 +66,7 @@ extern int32_t udfDebugFlag;
extern int32_t smaDebugFlag; extern int32_t smaDebugFlag;
extern int32_t idxDebugFlag; extern int32_t idxDebugFlag;
extern int32_t tdbDebugFlag; extern int32_t tdbDebugFlag;
extern int32_t sndDebugFlag;
int32_t taosInitLog(const char *logName, int32_t maxFiles); int32_t taosInitLog(const char *logName, int32_t maxFiles);
void taosCloseLog(); void taosCloseLog();

View File

@ -167,7 +167,7 @@ static const SSysDbTableSchema streamTaskSchema[] = {
{.name = "node_id", .bytes = 4, .type = TSDB_DATA_TYPE_INT, .sysInfo = false}, {.name = "node_id", .bytes = 4, .type = TSDB_DATA_TYPE_INT, .sysInfo = false},
{.name = "level", .bytes = 10 + VARSTR_HEADER_SIZE, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = false}, {.name = "level", .bytes = 10 + VARSTR_HEADER_SIZE, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = false},
{.name = "status", .bytes = 15 + VARSTR_HEADER_SIZE, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = false}, {.name = "status", .bytes = 15 + VARSTR_HEADER_SIZE, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = false},
{.name = "stage", .bytes = 4, .type = TSDB_DATA_TYPE_INT, .sysInfo = false}, {.name = "stage", .bytes = 8, .type = TSDB_DATA_TYPE_BIGINT, .sysInfo = false},
{.name = "in_queue", .bytes = 20, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = false}, {.name = "in_queue", .bytes = 20, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = false},
// {.name = "out_queue", .bytes = 20, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = false}, // {.name = "out_queue", .bytes = 20, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = false},
{.name = "info", .bytes = 25, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = false}, {.name = "info", .bytes = 25, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = false},

View File

@ -3590,9 +3590,9 @@ void (*tColDataCalcSMA[])(SColData *pColData, int64_t *sum, int64_t *max, int64_
tColDataCalcSMAUInt, // TSDB_DATA_TYPE_UINT tColDataCalcSMAUInt, // TSDB_DATA_TYPE_UINT
tColDataCalcSMAUBigInt, // TSDB_DATA_TYPE_UBIGINT tColDataCalcSMAUBigInt, // TSDB_DATA_TYPE_UBIGINT
tColDataCalcSMAVarType, // TSDB_DATA_TYPE_JSON tColDataCalcSMAVarType, // TSDB_DATA_TYPE_JSON
NULL, // TSDB_DATA_TYPE_VARBINARY tColDataCalcSMAVarType, // TSDB_DATA_TYPE_VARBINARY
NULL, // TSDB_DATA_TYPE_DECIMAL tColDataCalcSMAVarType, // TSDB_DATA_TYPE_DECIMAL
NULL, // TSDB_DATA_TYPE_BLOB tColDataCalcSMAVarType, // TSDB_DATA_TYPE_BLOB
NULL, // TSDB_DATA_TYPE_MEDIUMBLOB NULL, // TSDB_DATA_TYPE_MEDIUMBLOB
tColDataCalcSMAVarType // TSDB_DATA_TYPE_GEOMETRY tColDataCalcSMAVarType // TSDB_DATA_TYPE_GEOMETRY
}; };

View File

@ -354,11 +354,16 @@ static int32_t taosLoadCfg(SConfig *pCfg, const char **envCmd, const char *input
char cfgFile[PATH_MAX + 100] = {0}; char cfgFile[PATH_MAX + 100] = {0};
taosExpandDir(inputCfgDir, cfgDir, PATH_MAX); taosExpandDir(inputCfgDir, cfgDir, PATH_MAX);
char lastC = cfgDir[strlen(cfgDir) - 1];
char *tdDirsep = TD_DIRSEP;
if (lastC == '\\' || lastC == '/') {
tdDirsep = "";
}
if (taosIsDir(cfgDir)) { if (taosIsDir(cfgDir)) {
#ifdef CUS_PROMPT #ifdef CUS_PROMPT
snprintf(cfgFile, sizeof(cfgFile), "%s" TD_DIRSEP "%s.cfg", cfgDir, CUS_PROMPT); snprintf(cfgFile, sizeof(cfgFile), "%s" "%s" "%s.cfg", cfgDir, tdDirsep, CUS_PROMPT);
#else #else
snprintf(cfgFile, sizeof(cfgFile), "%s" TD_DIRSEP "taos.cfg", cfgDir); snprintf(cfgFile, sizeof(cfgFile), "%s" "%s" "taos.cfg", cfgDir, tdDirsep);
#endif #endif
} else { } else {
tstrncpy(cfgFile, cfgDir, sizeof(cfgDir)); tstrncpy(cfgFile, cfgDir, sizeof(cfgDir));
@ -431,6 +436,7 @@ static int32_t taosAddServerLogCfg(SConfig *pCfg) {
if (cfgAddInt32(pCfg, "tdbDebugFlag", tdbDebugFlag, 0, 255, CFG_SCOPE_SERVER, CFG_DYN_SERVER) != 0) return -1; if (cfgAddInt32(pCfg, "tdbDebugFlag", tdbDebugFlag, 0, 255, CFG_SCOPE_SERVER, CFG_DYN_SERVER) != 0) return -1;
if (cfgAddInt32(pCfg, "metaDebugFlag", metaDebugFlag, 0, 255, CFG_SCOPE_SERVER, CFG_DYN_SERVER) != 0) return -1; if (cfgAddInt32(pCfg, "metaDebugFlag", metaDebugFlag, 0, 255, CFG_SCOPE_SERVER, CFG_DYN_SERVER) != 0) return -1;
if (cfgAddInt32(pCfg, "stDebugFlag", stDebugFlag, 0, 255, CFG_SCOPE_SERVER, CFG_DYN_SERVER) != 0) return -1; if (cfgAddInt32(pCfg, "stDebugFlag", stDebugFlag, 0, 255, CFG_SCOPE_SERVER, CFG_DYN_SERVER) != 0) return -1;
if (cfgAddInt32(pCfg, "sndDebugFlag", sndDebugFlag, 0, 255, CFG_SCOPE_SERVER, CFG_DYN_SERVER) != 0) return -1;
return 0; return 0;
} }
@ -951,6 +957,7 @@ static void taosSetServerLogCfg(SConfig *pCfg) {
tdbDebugFlag = cfgGetItem(pCfg, "tdbDebugFlag")->i32; tdbDebugFlag = cfgGetItem(pCfg, "tdbDebugFlag")->i32;
metaDebugFlag = cfgGetItem(pCfg, "metaDebugFlag")->i32; metaDebugFlag = cfgGetItem(pCfg, "metaDebugFlag")->i32;
stDebugFlag = cfgGetItem(pCfg, "stDebugFlag")->i32; stDebugFlag = cfgGetItem(pCfg, "stDebugFlag")->i32;
sndDebugFlag = cfgGetItem(pCfg, "sndDebugFlag")->i32;
} }
static int32_t taosSetSlowLogScope(char *pScope) { static int32_t taosSetSlowLogScope(char *pScope) {
@ -1424,7 +1431,7 @@ static int32_t taosCfgDynamicOptionsForServer(SConfig *pCfg, char *name) {
{"smaDebugFlag", &smaDebugFlag}, {"idxDebugFlag", &idxDebugFlag}, {"tdbDebugFlag", &tdbDebugFlag}, {"smaDebugFlag", &smaDebugFlag}, {"idxDebugFlag", &idxDebugFlag}, {"tdbDebugFlag", &tdbDebugFlag},
{"tmrDebugFlag", &tmrDebugFlag}, {"uDebugFlag", &uDebugFlag}, {"smaDebugFlag", &smaDebugFlag}, {"tmrDebugFlag", &tmrDebugFlag}, {"uDebugFlag", &uDebugFlag}, {"smaDebugFlag", &smaDebugFlag},
{"rpcDebugFlag", &rpcDebugFlag}, {"qDebugFlag", &qDebugFlag}, {"metaDebugFlag", &metaDebugFlag}, {"rpcDebugFlag", &rpcDebugFlag}, {"qDebugFlag", &qDebugFlag}, {"metaDebugFlag", &metaDebugFlag},
{"jniDebugFlag", &jniDebugFlag}, {"stDebugFlag", &stDebugFlag}, {"jniDebugFlag", &jniDebugFlag}, {"stDebugFlag", &stDebugFlag}, {"sndDebugFlag", &sndDebugFlag},
}; };
static OptionNameAndVar options[] = { static OptionNameAndVar options[] = {
@ -1732,6 +1739,7 @@ void taosSetAllDebugFlag(int32_t flag, bool rewrite) {
taosSetDebugFlag(&tdbDebugFlag, "tdbDebugFlag", flag, rewrite); taosSetDebugFlag(&tdbDebugFlag, "tdbDebugFlag", flag, rewrite);
taosSetDebugFlag(&metaDebugFlag, "metaDebugFlag", flag, rewrite); taosSetDebugFlag(&metaDebugFlag, "metaDebugFlag", flag, rewrite);
taosSetDebugFlag(&stDebugFlag, "stDebugFlag", flag, rewrite); taosSetDebugFlag(&stDebugFlag, "stDebugFlag", flag, rewrite);
taosSetDebugFlag(&sndDebugFlag, "sndDebugFlag", flag, rewrite);
uInfo("all debug flag are set to %d", flag); uInfo("all debug flag are set to %d", flag);
} }

View File

@ -7152,7 +7152,7 @@ int32_t tDeserializeSMDropStreamReq(void *buf, int32_t bufLen, SMDropStreamReq *
return 0; return 0;
} }
void tFreeSMDropStreamReq(SMDropStreamReq *pReq) { void tFreeMDropStreamReq(SMDropStreamReq *pReq) {
FREESQL(); FREESQL();
} }

View File

@ -169,7 +169,7 @@ int32_t mmWriteFile(const char *path, const SMnodeOpt *pOption) {
if (buffer == NULL) goto _OVER; if (buffer == NULL) goto _OVER;
terrno = 0; terrno = 0;
pFile = taosOpenFile(file, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_TRUNC); pFile = taosOpenFile(file, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_TRUNC | TD_FILE_WRITE_THROUGH);
if (pFile == NULL) goto _OVER; if (pFile == NULL) goto _OVER;
int32_t len = strlen(buffer); int32_t len = strlen(buffer);

View File

@ -73,6 +73,7 @@ SArray *smGetMsgHandles() {
SArray *pArray = taosArrayInit(4, sizeof(SMgmtHandle)); SArray *pArray = taosArrayInit(4, sizeof(SMgmtHandle));
if (pArray == NULL) goto _OVER; if (pArray == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_VND_STREAM_TASK_UPDATE, smPutNodeMsgToMgmtQueue, 1) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_DEPLOY, smPutNodeMsgToMgmtQueue, 1) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_DEPLOY, smPutNodeMsgToMgmtQueue, 1) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_DROP, smPutNodeMsgToMgmtQueue, 1) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_DROP, smPutNodeMsgToMgmtQueue, 1) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_RUN, smPutNodeMsgToStreamQueue, 1) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_RUN, smPutNodeMsgToStreamQueue, 1) == NULL) goto _OVER;
@ -87,7 +88,8 @@ SArray *smGetMsgHandles() {
if (dmSetMgmtHandle(pArray, TDMT_VND_STREAM_TASK_CHECK_RSP, smPutNodeMsgToStreamQueue, 1) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_STREAM_TASK_CHECK_RSP, smPutNodeMsgToStreamQueue, 1) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_VND_STREAM_SCAN_HISTORY_FINISH, smPutNodeMsgToStreamQueue, 1) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_STREAM_SCAN_HISTORY_FINISH, smPutNodeMsgToStreamQueue, 1) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_VND_STREAM_SCAN_HISTORY_FINISH_RSP, smPutNodeMsgToStreamQueue, 1) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_STREAM_SCAN_HISTORY_FINISH_RSP, smPutNodeMsgToStreamQueue, 1) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_VND_STREAM_CHECK_POINT_SOURCE, smPutNodeMsgToMgmtQueue, 1) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_CHECKPOINT_READY, smPutNodeMsgToStreamQueue, 1) == NULL) goto _OVER;
code = 0; code = 0;
_OVER: _OVER:

View File

@ -76,9 +76,14 @@ int32_t smOpen(SMgmtInputOpt *pInput, SMgmtOutputOpt *pOutput) {
return 0; return 0;
} }
static int32_t smStartSnodes(SSnodeMgmt *pMgmt) {
return sndInit(pMgmt->pSnode);
}
SMgmtFunc smGetMgmtFunc() { SMgmtFunc smGetMgmtFunc() {
SMgmtFunc mgmtFunc = {0}; SMgmtFunc mgmtFunc = {0};
mgmtFunc.openFp = smOpen; mgmtFunc.openFp = smOpen;
mgmtFunc.startFp = (NodeStartFp)smStartSnodes;
mgmtFunc.closeFp = (NodeCloseFp)smClose; mgmtFunc.closeFp = (NodeCloseFp)smClose;
mgmtFunc.createFp = (NodeCreateFp)smProcessCreateReq; mgmtFunc.createFp = (NodeCreateFp)smProcessCreateReq;
mgmtFunc.dropFp = (NodeDropFp)smProcessDropReq; mgmtFunc.dropFp = (NodeDropFp)smProcessDropReq;

View File

@ -200,7 +200,7 @@ int32_t vmWriteVnodeListToFile(SVnodeMgmt *pMgmt) {
if (buffer == NULL) goto _OVER; if (buffer == NULL) goto _OVER;
terrno = 0; terrno = 0;
pFile = taosOpenFile(file, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_TRUNC); pFile = taosOpenFile(file, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_TRUNC | TD_FILE_WRITE_THROUGH);
if (pFile == NULL) goto _OVER; if (pFile == NULL) goto _OVER;
int32_t len = strlen(buffer); int32_t len = strlen(buffer);

View File

@ -232,7 +232,7 @@ int32_t dmWriteEps(SDnodeData *pData) {
if (buffer == NULL) goto _OVER; if (buffer == NULL) goto _OVER;
terrno = 0; terrno = 0;
pFile = taosOpenFile(file, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_TRUNC); pFile = taosOpenFile(file, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_TRUNC | TD_FILE_WRITE_THROUGH);
if (pFile == NULL) goto _OVER; if (pFile == NULL) goto _OVER;
int32_t len = strlen(buffer); int32_t len = strlen(buffer);

View File

@ -120,7 +120,7 @@ int32_t dmWriteFile(const char *path, const char *name, bool deployed) {
if (buffer == NULL) goto _OVER; if (buffer == NULL) goto _OVER;
terrno = 0; terrno = 0;
pFile = taosOpenFile(file, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_TRUNC); pFile = taosOpenFile(file, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_TRUNC | TD_FILE_WRITE_THROUGH);
if (pFile == NULL) goto _OVER; if (pFile == NULL) goto _OVER;
int32_t len = strlen(buffer); int32_t len = strlen(buffer);

View File

@ -652,7 +652,6 @@ typedef struct SStreamConf {
typedef struct { typedef struct {
char name[TSDB_STREAM_FNAME_LEN]; char name[TSDB_STREAM_FNAME_LEN];
// ctl
SRWLatch lock; SRWLatch lock;
// create info // create info

View File

@ -22,18 +22,38 @@
extern "C" { extern "C" {
#endif #endif
typedef struct SStreamTransInfo {
int64_t startTime;
int32_t transId;
const char *name;
} SStreamTransInfo;
typedef struct SStreamTransMgmt {
SHashObj *pDBTrans;
} SStreamTransMgmt;
typedef struct SStreamExecInfo {
SArray *pNodeList;
int64_t ts; // snapshot ts
SStreamTransMgmt transMgmt;
int64_t activeCheckpoint; // active check point id
SHashObj * pTaskMap;
SArray * pTaskList;
TdThreadMutex lock;
} SStreamExecInfo;
extern SStreamExecInfo execInfo;
int32_t mndInitStream(SMnode *pMnode); int32_t mndInitStream(SMnode *pMnode);
void mndCleanupStream(SMnode *pMnode); void mndCleanupStream(SMnode *pMnode);
SStreamObj *mndAcquireStream(SMnode *pMnode, char *streamName); SStreamObj *mndAcquireStream(SMnode *pMnode, char *streamName);
void mndReleaseStream(SMnode *pMnode, SStreamObj *pStream); void mndReleaseStream(SMnode *pMnode, SStreamObj *pStream);
SSdbRaw *mndStreamActionEncode(SStreamObj *pStream);
SSdbRow *mndStreamActionDecode(SSdbRaw *pRaw);
int32_t mndDropStreamByDb(SMnode *pMnode, STrans *pTrans, SDbObj *pDb); int32_t mndDropStreamByDb(SMnode *pMnode, STrans *pTrans, SDbObj *pDb);
int32_t mndPersistStream(SMnode *pMnode, STrans *pTrans, SStreamObj *pStream); int32_t mndPersistStream(SMnode *pMnode, STrans *pTrans, SStreamObj *pStream);
int32_t mndStreamRegisterTrans(STrans* pTrans, const char* pName, const char* pSrcDb, const char* pDstDb);
bool streamTransConflictOtherTrans(SMnode *pMnode, const char *pSrcDb, const char *pDstDb);
// for sma // for sma
// TODO refactor // TODO refactor
int32_t mndDropStreamTasks(SMnode *pMnode, STrans *pTrans, SStreamObj *pStream); int32_t mndDropStreamTasks(SMnode *pMnode, STrans *pTrans, SStreamObj *pStream);

View File

@ -919,7 +919,7 @@ static int32_t mndSetAlterDbRedoActions(SMnode *pMnode, STrans *pTrans, SDbObj *
if (pIter == NULL) break; if (pIter == NULL) break;
if (mndVgroupInDb(pVgroup, pNewDb->uid)) { if (mndVgroupInDb(pVgroup, pNewDb->uid)) {
if (mndBuildRaftAlterVgroupAction(pMnode, pTrans, pOldDb, pNewDb, pVgroup, pArray) != 0) { if (mndBuildAlterVgroupAction(pMnode, pTrans, pOldDb, pNewDb, pVgroup, pArray) != 0) {
sdbCancelFetch(pSdb, pIter); sdbCancelFetch(pSdb, pIter);
sdbRelease(pSdb, pVgroup); sdbRelease(pSdb, pVgroup);
taosArrayDestroy(pArray); taosArrayDestroy(pArray);

View File

@ -605,7 +605,7 @@ void mndDumpSdb() {
char *pCont = tjsonToString(json); char *pCont = tjsonToString(json);
int32_t contLen = strlen(pCont); int32_t contLen = strlen(pCont);
char file[] = "sdb.json"; char file[] = "sdb.json";
TdFilePtr pFile = taosOpenFile(file, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_TRUNC); TdFilePtr pFile = taosOpenFile(file, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_TRUNC| TD_FILE_WRITE_THROUGH);
if (pFile == NULL) { if (pFile == NULL) {
terrno = TAOS_SYSTEM_ERROR(errno); terrno = TAOS_SYSTEM_ERROR(errno);
mError("failed to write %s since %s", file, terrstr()); mError("failed to write %s since %s", file, terrstr());

View File

@ -34,7 +34,13 @@
#define MND_STREAM_VER_NUMBER 4 #define MND_STREAM_VER_NUMBER 4
#define MND_STREAM_RESERVE_SIZE 64 #define MND_STREAM_RESERVE_SIZE 64
#define MND_STREAM_MAX_NUM 60 #define MND_STREAM_MAX_NUM 60
#define MND_STREAM_CHECKPOINT_NAME "stream-checkpoint" #define MND_STREAM_CHECKPOINT_NAME "stream-checkpoint"
#define MND_STREAM_PAUSE_NAME "stream-pause"
#define MND_STREAM_RESUME_NAME "stream-resume"
#define MND_STREAM_DROP_NAME "stream-drop"
#define MND_STREAM_TASK_RESET_NAME "stream-task-reset"
#define MND_STREAM_TASK_UPDATE_NAME "stream-task-update"
typedef struct SNodeEntry { typedef struct SNodeEntry {
int32_t nodeId; int32_t nodeId;
@ -43,22 +49,13 @@ typedef struct SNodeEntry {
int64_t hbTimestamp; // second int64_t hbTimestamp; // second
} SNodeEntry; } SNodeEntry;
typedef struct SStreamExecInfo {
SArray *pNodeList;
int64_t ts; // snapshot ts
int64_t activeCheckpoint; // active check point id
SHashObj * pTaskMap;
SArray * pTaskList;
TdThreadMutex lock;
} SStreamExecInfo;
typedef struct SVgroupChangeInfo { typedef struct SVgroupChangeInfo {
SHashObj *pDBMap; SHashObj *pDBMap;
SArray * pUpdateNodeList; // SArray<SNodeUpdateInfo> SArray * pUpdateNodeList; // SArray<SNodeUpdateInfo>
} SVgroupChangeInfo; } SVgroupChangeInfo;
static int32_t mndNodeCheckSentinel = 0; static int32_t mndNodeCheckSentinel = 0;
static SStreamExecInfo execInfo; SStreamExecInfo execInfo;
static int32_t mndStreamActionInsert(SSdb *pSdb, SStreamObj *pStream); static int32_t mndStreamActionInsert(SSdb *pSdb, SStreamObj *pStream);
static int32_t mndStreamActionDelete(SSdb *pSdb, SStreamObj *pStream); static int32_t mndStreamActionDelete(SSdb *pSdb, SStreamObj *pStream);
@ -83,17 +80,20 @@ static SArray *mndTakeVgroupSnapshot(SMnode *pMnode, bool *allReady);
static SVgroupChangeInfo mndFindChangedNodeInfo(SMnode *pMnode, const SArray *pPrevNodeList, const SArray *pNodeList); static SVgroupChangeInfo mndFindChangedNodeInfo(SMnode *pMnode, const SArray *pPrevNodeList, const SArray *pNodeList);
static STrans *doCreateTrans(SMnode *pMnode, SStreamObj *pStream, const char *name); static STrans *doCreateTrans(SMnode *pMnode, SStreamObj *pStream, SRpcMsg *pReq, const char *name, const char* pMsg);
static int32_t mndPersistTransLog(SStreamObj *pStream, STrans *pTrans); static int32_t mndPersistTransLog(SStreamObj *pStream, STrans *pTrans);
static void initTransAction(STransAction *pAction, void *pCont, int32_t contLen, int32_t msgType, const SEpSet *pEpset, static void initTransAction(STransAction *pAction, void *pCont, int32_t contLen, int32_t msgType, const SEpSet *pEpset,
int32_t retryCode); int32_t retryCode);
static int32_t createStreamUpdateTrans(SStreamObj *pStream, SVgroupChangeInfo *pInfo, STrans *pTrans); static int32_t createStreamUpdateTrans(SStreamObj *pStream, SVgroupChangeInfo *pInfo, STrans *pTrans);
static void removeStreamTasksInBuf(SStreamObj *pStream, SStreamExecInfo *pExecNode); static void removeStreamTasksInBuf(SStreamObj *pStream, SStreamExecInfo *pExecNode);
static void keepStreamTasksInBuf(SStreamObj *pStream, SStreamExecInfo *pExecNode); static void saveStreamTasksInfo(SStreamObj *pStream, SStreamExecInfo *pExecNode);
static int32_t removeExpirednodeEntryAndTask(SArray *pNodeSnapshot); static int32_t removeExpirednodeEntryAndTask(SArray *pNodeSnapshot);
static int32_t doKillActiveCheckpointTrans(SMnode *pMnode); static int32_t killActiveCheckpointTrans(SMnode *pMnode, const char* pDbName, size_t len);
static int32_t setNodeEpsetExpiredFlag(const SArray *pNodeList); static int32_t setNodeEpsetExpiredFlag(const SArray *pNodeList);
static SSdbRaw *mndStreamActionEncode(SStreamObj *pStream);
static SSdbRow *mndStreamActionDecode(SSdbRaw *pRaw);
int32_t mndInitStream(SMnode *pMnode) { int32_t mndInitStream(SMnode *pMnode) {
SSdbTable table = { SSdbTable table = {
.sdbType = SDB_STREAM, .sdbType = SDB_STREAM,
@ -133,8 +133,11 @@ int32_t mndInitStream(SMnode *pMnode) {
mndAddShowFreeIterHandle(pMnode, TSDB_MGMT_TABLE_STREAM_TASKS, mndCancelGetNextStreamTask); mndAddShowFreeIterHandle(pMnode, TSDB_MGMT_TABLE_STREAM_TASKS, mndCancelGetNextStreamTask);
taosThreadMutexInit(&execInfo.lock, NULL); taosThreadMutexInit(&execInfo.lock, NULL);
execInfo.pTaskMap = taosHashInit(64, taosGetDefaultHashFunction(TSDB_DATA_TYPE_VARCHAR), true, HASH_NO_LOCK); _hash_fn_t fn = taosGetDefaultHashFunction(TSDB_DATA_TYPE_VARCHAR);
execInfo.pTaskList = taosArrayInit(4, sizeof(STaskId)); execInfo.pTaskList = taosArrayInit(4, sizeof(STaskId));
execInfo.pTaskMap = taosHashInit(64, fn, true, HASH_NO_LOCK);
execInfo.transMgmt.pDBTrans = taosHashInit(32, fn, true, HASH_NO_LOCK);
return sdbSetTable(pMnode->pSdb, table); return sdbSetTable(pMnode->pSdb, table);
} }
@ -142,6 +145,7 @@ int32_t mndInitStream(SMnode *pMnode) {
void mndCleanupStream(SMnode *pMnode) { void mndCleanupStream(SMnode *pMnode) {
taosArrayDestroy(execInfo.pTaskList); taosArrayDestroy(execInfo.pTaskList);
taosHashCleanup(execInfo.pTaskMap); taosHashCleanup(execInfo.pTaskMap);
taosHashCleanup(execInfo.transMgmt.pDBTrans);
taosThreadMutexDestroy(&execInfo.lock); taosThreadMutexDestroy(&execInfo.lock);
mDebug("mnd stream exec info cleanup"); mDebug("mnd stream exec info cleanup");
} }
@ -335,7 +339,7 @@ static int32_t mndStreamGetPlanString(const char *ast, int8_t triggerType, int64
.pAstRoot = pAst, .pAstRoot = pAst,
.topicQuery = false, .topicQuery = false,
.streamQuery = true, .streamQuery = true,
.triggerType = triggerType == STREAM_TRIGGER_MAX_DELAY ? STREAM_TRIGGER_WINDOW_CLOSE : triggerType, .triggerType = (triggerType == STREAM_TRIGGER_MAX_DELAY) ? STREAM_TRIGGER_WINDOW_CLOSE : triggerType,
.watermark = watermark, .watermark = watermark,
}; };
code = qCreateQueryPlan(&cxt, &pPlan, NULL); code = qCreateQueryPlan(&cxt, &pPlan, NULL);
@ -720,6 +724,34 @@ int32_t mndDropStreamTasks(SMnode *pMnode, STrans *pTrans, SStreamObj *pStream)
return 0; return 0;
} }
static int32_t checkForNumOfStreams(SMnode *pMnode, SStreamObj *pStreamObj) { // check for number of existed tasks
int32_t numOfStream = 0;
SStreamObj *pStream = NULL;
void *pIter = NULL;
while ((pIter = sdbFetch(pMnode->pSdb, SDB_STREAM, pIter, (void **)&pStream)) != NULL) {
if (pStream->sourceDbUid == pStreamObj->sourceDbUid) {
++numOfStream;
}
sdbRelease(pMnode->pSdb, pStream);
if (numOfStream > MND_STREAM_MAX_NUM) {
mError("too many streams, no more than %d for each database", MND_STREAM_MAX_NUM);
sdbCancelFetch(pMnode->pSdb, pIter);
return TSDB_CODE_MND_TOO_MANY_STREAMS;
}
if (pStream->targetStbUid == pStreamObj->targetStbUid) {
mError("Cannot write the same stable as other stream:%s", pStream->name);
sdbCancelFetch(pMnode->pSdb, pIter);
return TSDB_CODE_MND_INVALID_TARGET_TABLE;
}
}
return TSDB_CODE_SUCCESS;
}
static int32_t mndProcessCreateStreamReq(SRpcMsg *pReq) { static int32_t mndProcessCreateStreamReq(SRpcMsg *pReq) {
SMnode * pMnode = pReq->info.node; SMnode * pMnode = pReq->info.node;
int32_t code = -1; int32_t code = -1;
@ -732,6 +764,7 @@ static int32_t mndProcessCreateStreamReq(SRpcMsg *pReq) {
terrno = TSDB_CODE_INVALID_MSG; terrno = TSDB_CODE_INVALID_MSG;
goto _OVER; goto _OVER;
} }
#ifdef WINDOWS #ifdef WINDOWS
terrno = TSDB_CODE_MND_INVALID_PLATFORM; terrno = TSDB_CODE_MND_INVALID_PLATFORM;
goto _OVER; goto _OVER;
@ -772,43 +805,10 @@ static int32_t mndProcessCreateStreamReq(SRpcMsg *pReq) {
goto _OVER; goto _OVER;
} }
{ code = checkForNumOfStreams(pMnode, &streamObj);
int32_t numOfStream = 0; if (code != TSDB_CODE_SUCCESS) {
SStreamObj *pStream = NULL;
void * pIter = NULL;
while (1) {
pIter = sdbFetch(pMnode->pSdb, SDB_STREAM, pIter, (void **)&pStream);
if (pIter == NULL) {
if (numOfStream > MND_STREAM_MAX_NUM) {
mError("too many streams, no more than %d for each database", MND_STREAM_MAX_NUM);
terrno = TSDB_CODE_MND_TOO_MANY_STREAMS;
goto _OVER; goto _OVER;
} }
break;
}
if (pStream->sourceDbUid == streamObj.sourceDbUid) {
++numOfStream;
}
sdbRelease(pMnode->pSdb, pStream);
if (numOfStream > MND_STREAM_MAX_NUM) {
mError("too many streams, no more than %d for each database", MND_STREAM_MAX_NUM);
terrno = TSDB_CODE_MND_TOO_MANY_STREAMS;
sdbCancelFetch(pMnode->pSdb, pIter);
goto _OVER;
}
if (pStream->targetStbUid == streamObj.targetStbUid) {
mError("Cannot write the same stable as other stream:%s", pStream->name);
terrno = TSDB_CODE_MND_INVALID_TARGET_TABLE;
sdbCancelFetch(pMnode->pSdb, pIter);
goto _OVER;
}
}
}
STrans *pTrans = mndTransCreate(pMnode, TRN_POLICY_ROLLBACK, TRN_CONFLICT_DB_INSIDE, pReq, "create-stream"); STrans *pTrans = mndTransCreate(pMnode, TRN_POLICY_ROLLBACK, TRN_CONFLICT_DB_INSIDE, pReq, "create-stream");
if (pTrans == NULL) { if (pTrans == NULL) {
@ -866,7 +866,7 @@ static int32_t mndProcessCreateStreamReq(SRpcMsg *pReq) {
taosThreadMutexLock(&execInfo.lock); taosThreadMutexLock(&execInfo.lock);
mDebug("stream tasks register into node list"); mDebug("stream tasks register into node list");
keepStreamTasksInBuf(&streamObj, &execInfo); saveStreamTasksInfo(&streamObj, &execInfo);
taosThreadMutexUnlock(&execInfo.lock); taosThreadMutexUnlock(&execInfo.lock);
code = TSDB_CODE_ACTION_IN_PROGRESS; code = TSDB_CODE_ACTION_IN_PROGRESS;
@ -893,7 +893,6 @@ _OVER:
} }
mndReleaseStream(pMnode, pStream); mndReleaseStream(pMnode, pStream);
tFreeSCMCreateStreamReq(&createStreamReq); tFreeSCMCreateStreamReq(&createStreamReq);
tFreeStreamObj(&streamObj); tFreeStreamObj(&streamObj);
if(sql != NULL){ if(sql != NULL){
@ -1268,7 +1267,7 @@ static int32_t mndProcessStreamDoCheckpoint(SRpcMsg *pReq) {
SMStreamDoCheckpointMsg *pMsg = (SMStreamDoCheckpointMsg *)pReq->pCont; SMStreamDoCheckpointMsg *pMsg = (SMStreamDoCheckpointMsg *)pReq->pCont;
int64_t checkpointId = pMsg->checkpointId; int64_t checkpointId = pMsg->checkpointId;
STrans *pTrans = mndTransCreate(pMnode, TRN_POLICY_RETRY, TRN_CONFLICT_DB, NULL, MND_STREAM_CHECKPOINT_NAME); STrans *pTrans = mndTransCreate(pMnode, TRN_POLICY_RETRY, TRN_CONFLICT_DB_INSIDE, NULL, MND_STREAM_CHECKPOINT_NAME);
if (pTrans == NULL) { if (pTrans == NULL) {
mError("failed to trigger checkpoint, reason: %s", tstrerror(TSDB_CODE_OUT_OF_MEMORY)); mError("failed to trigger checkpoint, reason: %s", tstrerror(TSDB_CODE_OUT_OF_MEMORY));
return -1; return -1;
@ -1277,7 +1276,8 @@ static int32_t mndProcessStreamDoCheckpoint(SRpcMsg *pReq) {
mDebug("start to trigger checkpoint, checkpointId: %" PRId64, checkpointId); mDebug("start to trigger checkpoint, checkpointId: %" PRId64, checkpointId);
const char *pDb = mndGetStreamDB(pMnode); const char *pDb = mndGetStreamDB(pMnode);
mndTransSetDbName(pTrans, pDb, "checkpoint"); mndTransSetDbName(pTrans, pDb, pDb);
mndStreamRegisterTrans(pTrans, MND_STREAM_CHECKPOINT_NAME, pDb, pDb);
taosMemoryFree((void *)pDb); taosMemoryFree((void *)pDb);
if (mndTransCheckConflict(pMnode, pTrans) != 0) { if (mndTransCheckConflict(pMnode, pTrans) != 0) {
@ -1329,46 +1329,56 @@ static int32_t mndProcessDropStreamReq(SRpcMsg *pReq) {
if (dropReq.igNotExists) { if (dropReq.igNotExists) {
mInfo("stream:%s not exist, ignore not exist is set", dropReq.name); mInfo("stream:%s not exist, ignore not exist is set", dropReq.name);
sdbRelease(pMnode->pSdb, pStream); sdbRelease(pMnode->pSdb, pStream);
tFreeSMDropStreamReq(&dropReq); tFreeMDropStreamReq(&dropReq);
return 0; return 0;
} else { } else {
terrno = TSDB_CODE_MND_STREAM_NOT_EXIST; terrno = TSDB_CODE_MND_STREAM_NOT_EXIST;
mError("stream:%s not exist failed to drop", dropReq.name); mError("stream:%s not exist failed to drop", dropReq.name);
tFreeSMDropStreamReq(&dropReq); tFreeMDropStreamReq(&dropReq);
return -1; return -1;
} }
} }
if (mndCheckDbPrivilegeByName(pMnode, pReq->info.conn.user, MND_OPER_WRITE_DB, pStream->targetDb) != 0) { if (mndCheckDbPrivilegeByName(pMnode, pReq->info.conn.user, MND_OPER_WRITE_DB, pStream->targetDb) != 0) {
sdbRelease(pMnode->pSdb, pStream); sdbRelease(pMnode->pSdb, pStream);
tFreeSMDropStreamReq(&dropReq); tFreeMDropStreamReq(&dropReq);
return -1; return -1;
} }
STrans *pTrans = mndTransCreate(pMnode, TRN_POLICY_RETRY, TRN_CONFLICT_DB_INSIDE, pReq, "drop-stream"); // check if it is conflict with other trans in both sourceDb and targetDb.
bool conflict = streamTransConflictOtherTrans(pMnode, pStream->sourceDb, pStream->targetDb);
if (conflict) {
sdbRelease(pMnode->pSdb, pStream);
tFreeMDropStreamReq(&dropReq);
return -1;
}
STrans *pTrans = mndTransCreate(pMnode, TRN_POLICY_RETRY, TRN_CONFLICT_DB_INSIDE, pReq, MND_STREAM_DROP_NAME);
if (pTrans == NULL) { if (pTrans == NULL) {
mError("stream:%s, failed to drop since %s", dropReq.name, terrstr()); mError("stream:%s, failed to drop since %s", dropReq.name, terrstr());
sdbRelease(pMnode->pSdb, pStream); sdbRelease(pMnode->pSdb, pStream);
tFreeSMDropStreamReq(&dropReq); tFreeMDropStreamReq(&dropReq);
return -1; return -1;
} }
mInfo("trans:%d, used to drop stream:%s", pTrans->id, dropReq.name); mInfo("trans:%d used to drop stream:%s", pTrans->id, dropReq.name);
mndTransSetDbName(pTrans, pStream->sourceDb, pStream->targetDb); mndTransSetDbName(pTrans, pStream->sourceDb, pStream->targetDb);
if (mndTransCheckConflict(pMnode, pTrans) != 0) { if (mndTransCheckConflict(pMnode, pTrans) != 0) {
sdbRelease(pMnode->pSdb, pStream); sdbRelease(pMnode->pSdb, pStream);
mndTransDrop(pTrans); mndTransDrop(pTrans);
tFreeSMDropStreamReq(&dropReq); tFreeMDropStreamReq(&dropReq);
return -1; return -1;
} }
int32_t code = mndStreamRegisterTrans(pTrans, MND_STREAM_DROP_NAME, pStream->sourceDb, pStream->targetDb);
// drop all tasks // drop all tasks
if (mndDropStreamTasks(pMnode, pTrans, pStream) < 0) { if (mndDropStreamTasks(pMnode, pTrans, pStream) < 0) {
mError("stream:%s, failed to drop task since %s", dropReq.name, terrstr()); mError("stream:%s, failed to drop task since %s", dropReq.name, terrstr());
sdbRelease(pMnode->pSdb, pStream); sdbRelease(pMnode->pSdb, pStream);
mndTransDrop(pTrans); mndTransDrop(pTrans);
tFreeSMDropStreamReq(&dropReq); tFreeMDropStreamReq(&dropReq);
return -1; return -1;
} }
@ -1376,7 +1386,7 @@ static int32_t mndProcessDropStreamReq(SRpcMsg *pReq) {
if (mndPersistDropStreamLog(pMnode, pTrans, pStream) < 0) { if (mndPersistDropStreamLog(pMnode, pTrans, pStream) < 0) {
sdbRelease(pMnode->pSdb, pStream); sdbRelease(pMnode->pSdb, pStream);
mndTransDrop(pTrans); mndTransDrop(pTrans);
tFreeSMDropStreamReq(&dropReq); tFreeMDropStreamReq(&dropReq);
return -1; return -1;
} }
@ -1384,7 +1394,7 @@ static int32_t mndProcessDropStreamReq(SRpcMsg *pReq) {
mError("trans:%d, failed to prepare drop stream trans since %s", pTrans->id, terrstr()); mError("trans:%d, failed to prepare drop stream trans since %s", pTrans->id, terrstr());
sdbRelease(pMnode->pSdb, pStream); sdbRelease(pMnode->pSdb, pStream);
mndTransDrop(pTrans); mndTransDrop(pTrans);
tFreeSMDropStreamReq(&dropReq); tFreeMDropStreamReq(&dropReq);
return -1; return -1;
} }
@ -1392,13 +1402,12 @@ static int32_t mndProcessDropStreamReq(SRpcMsg *pReq) {
SName name = {0}; SName name = {0};
tNameFromString(&name, dropReq.name, T_NAME_ACCT | T_NAME_DB | T_NAME_TABLE); tNameFromString(&name, dropReq.name, T_NAME_ACCT | T_NAME_DB | T_NAME_TABLE);
// reuse this function for stream
auditRecord(pReq, pMnode->clusterId, "dropStream", "", name.dbname, dropReq.sql, dropReq.sqlLen); auditRecord(pReq, pMnode->clusterId, "dropStream", "", name.dbname, dropReq.sql, dropReq.sqlLen);
sdbRelease(pMnode->pSdb, pStream); sdbRelease(pMnode->pSdb, pStream);
mndTransDrop(pTrans); mndTransDrop(pTrans);
tFreeSMDropStreamReq(&dropReq); tFreeMDropStreamReq(&dropReq);
return TSDB_CODE_ACTION_IN_PROGRESS; return TSDB_CODE_ACTION_IN_PROGRESS;
} }
@ -1814,6 +1823,13 @@ static int32_t mndProcessPauseStreamReq(SRpcMsg *pReq) {
return -1; return -1;
} }
// check if it is conflict with other trans in both sourceDb and targetDb.
bool conflict = streamTransConflictOtherTrans(pMnode, pStream->sourceDb, pStream->targetDb);
if (conflict) {
sdbRelease(pMnode->pSdb, pStream);
return -1;
}
bool updated = taskNodeIsUpdated(pMnode); bool updated = taskNodeIsUpdated(pMnode);
if (updated) { if (updated) {
mError("tasks are not ready for pause, node update detected"); mError("tasks are not ready for pause, node update detected");
@ -1822,7 +1838,7 @@ static int32_t mndProcessPauseStreamReq(SRpcMsg *pReq) {
STrans *pTrans = mndTransCreate(pMnode, TRN_POLICY_RETRY, TRN_CONFLICT_DB_INSIDE, pReq, "pause-stream"); STrans *pTrans = mndTransCreate(pMnode, TRN_POLICY_RETRY, TRN_CONFLICT_DB_INSIDE, pReq, "pause-stream");
if (pTrans == NULL) { if (pTrans == NULL) {
mError("stream:%s, failed to pause stream since %s", pauseReq.name, terrstr()); mError("stream:%s failed to pause stream since %s", pauseReq.name, terrstr());
sdbRelease(pMnode->pSdb, pStream); sdbRelease(pMnode->pSdb, pStream);
return -1; return -1;
} }
@ -1836,7 +1852,9 @@ static int32_t mndProcessPauseStreamReq(SRpcMsg *pReq) {
return -1; return -1;
} }
// pause all tasks int32_t code = mndStreamRegisterTrans(pTrans, MND_STREAM_PAUSE_NAME, pStream->sourceDb, pStream->targetDb);
// if nodeUpdate happened, not send pause trans
if (mndPauseAllStreamTasks(pMnode, pTrans, pStream) < 0) { if (mndPauseAllStreamTasks(pMnode, pTrans, pStream) < 0) {
mError("stream:%s, failed to pause task since %s", pauseReq.name, terrstr()); mError("stream:%s, failed to pause task since %s", pauseReq.name, terrstr());
sdbRelease(pMnode->pSdb, pStream); sdbRelease(pMnode->pSdb, pStream);
@ -1940,13 +1958,21 @@ static int32_t mndProcessResumeStreamReq(SRpcMsg *pReq) {
return -1; return -1;
} }
STrans *pTrans = mndTransCreate(pMnode, TRN_POLICY_RETRY, TRN_CONFLICT_DB_INSIDE, pReq, "pause-stream"); // check if it is conflict with other trans in both sourceDb and targetDb.
if (pTrans == NULL) { bool conflict = streamTransConflictOtherTrans(pMnode, pStream->sourceDb, pStream->targetDb);
mError("stream:%s, failed to pause stream since %s", pauseReq.name, terrstr()); if (conflict) {
sdbRelease(pMnode->pSdb, pStream); sdbRelease(pMnode->pSdb, pStream);
return -1; return -1;
} }
mInfo("trans:%d, used to pause stream:%s", pTrans->id, pauseReq.name);
STrans *pTrans = mndTransCreate(pMnode, TRN_POLICY_RETRY, TRN_CONFLICT_DB_INSIDE, pReq, MND_STREAM_RESUME_NAME);
if (pTrans == NULL) {
mError("stream:%s, failed to resume stream since %s", pauseReq.name, terrstr());
sdbRelease(pMnode->pSdb, pStream);
return -1;
}
mInfo("trans:%d used to resume stream:%s", pTrans->id, pauseReq.name);
mndTransSetDbName(pTrans, pStream->sourceDb, pStream->targetDb); mndTransSetDbName(pTrans, pStream->sourceDb, pStream->targetDb);
if (mndTransCheckConflict(pMnode, pTrans) != 0) { if (mndTransCheckConflict(pMnode, pTrans) != 0) {
@ -1955,6 +1981,8 @@ static int32_t mndProcessResumeStreamReq(SRpcMsg *pReq) {
return -1; return -1;
} }
int32_t code = mndStreamRegisterTrans(pTrans, MND_STREAM_RESUME_NAME, pStream->sourceDb, pStream->targetDb);
// resume all tasks // resume all tasks
if (mndResumeAllStreamTasks(pTrans, pMnode, pStream, pauseReq.igUntreated) < 0) { if (mndResumeAllStreamTasks(pTrans, pMnode, pStream, pauseReq.igUntreated) < 0) {
mError("stream:%s, failed to drop task since %s", pauseReq.name, terrstr()); mError("stream:%s, failed to drop task since %s", pauseReq.name, terrstr());
@ -2145,6 +2173,9 @@ static SVgroupChangeInfo mndFindChangedNodeInfo(SMnode *pMnode, const SArray *pP
epsetAssign(&updateInfo.newEp, &pCurrent->epset); epsetAssign(&updateInfo.newEp, &pCurrent->epset);
taosArrayPush(info.pUpdateNodeList, &updateInfo); taosArrayPush(info.pUpdateNodeList, &updateInfo);
}
if(pCurrent->nodeId != SNODE_HANDLE){
SVgObj *pVgroup = mndAcquireVgroup(pMnode, pCurrent->nodeId); SVgObj *pVgroup = mndAcquireVgroup(pMnode, pCurrent->nodeId);
taosHashPut(info.pDBMap, pVgroup->dbName, strlen(pVgroup->dbName), NULL, 0); taosHashPut(info.pDBMap, pVgroup->dbName, strlen(pVgroup->dbName), NULL, 0);
mndReleaseVgroup(pMnode, pVgroup); mndReleaseVgroup(pMnode, pVgroup);
@ -2200,6 +2231,24 @@ static SArray *mndTakeVgroupSnapshot(SMnode *pMnode, bool *allReady) {
sdbRelease(pSdb, pVgroup); sdbRelease(pSdb, pVgroup);
} }
SSnodeObj *pObj = NULL;
while (1) {
pIter = sdbFetch(pSdb, SDB_SNODE, pIter, (void **)&pObj);
if (pIter == NULL) {
break;
}
SNodeEntry entry = {0};
addEpIntoEpSet(&entry.epset, pObj->pDnode->fqdn, pObj->pDnode->port);
entry.nodeId = SNODE_HANDLE;
char buf[256] = {0};
EPSET_TO_STR(&entry.epset, buf);
mDebug("take snode snapshot, nodeId:%d %s", entry.nodeId, buf);
taosArrayPush(pVgroupListSnapshot, &entry);
sdbRelease(pSdb, pObj);
}
return pVgroupListSnapshot; return pVgroupListSnapshot;
} }
@ -2219,7 +2268,7 @@ static int32_t mndProcessVgroupChange(SMnode *pMnode, SVgroupChangeInfo *pChange
// here create only one trans // here create only one trans
if (pTrans == NULL) { if (pTrans == NULL) {
pTrans = doCreateTrans(pMnode, pStream, "stream-task-update"); pTrans = doCreateTrans(pMnode, pStream, NULL, MND_STREAM_TASK_UPDATE_NAME, "update task epsets");
if (pTrans == NULL) { if (pTrans == NULL) {
sdbRelease(pSdb, pStream); sdbRelease(pSdb, pStream);
sdbCancelFetch(pSdb, pIter); sdbCancelFetch(pSdb, pIter);
@ -2329,7 +2378,7 @@ static void doExtractTasksFromStream(SMnode *pMnode) {
break; break;
} }
keepStreamTasksInBuf(pStream, &execInfo); saveStreamTasksInfo(pStream, &execInfo);
sdbRelease(pSdb, pStream); sdbRelease(pSdb, pStream);
} }
} }
@ -2377,6 +2426,8 @@ int32_t removeExpirednodeEntryAndTask(SArray *pNodeSnapshot) {
STaskId * pId = taosArrayGet(execInfo.pTaskList, i); STaskId * pId = taosArrayGet(execInfo.pTaskList, i);
STaskStatusEntry *pEntry = taosHashGet(execInfo.pTaskMap, pId, sizeof(*pId)); STaskStatusEntry *pEntry = taosHashGet(execInfo.pTaskMap, pId, sizeof(*pId));
if(pEntry->nodeId == SNODE_HANDLE) continue;
bool existed = taskNodeExists(pNodeSnapshot, pEntry->nodeId); bool existed = taskNodeExists(pNodeSnapshot, pEntry->nodeId);
if (!existed) { if (!existed) {
taosArrayPush(pRemovedTasks, pId); taosArrayPush(pRemovedTasks, pId);
@ -2413,6 +2464,17 @@ int32_t removeExpirednodeEntryAndTask(SArray *pNodeSnapshot) {
return 0; return 0;
} }
static void killAllCheckpointTrans(SMnode* pMnode, SVgroupChangeInfo* pChangeInfo) {
void* pIter = NULL;
while((pIter = taosHashIterate(pChangeInfo->pDBMap, pIter)) != NULL) {
char* pDb = (char*) pIter;
size_t len = 0;
void* pKey = taosHashGetKey(pDb, &len);
killActiveCheckpointTrans(pMnode, pKey, len);
}
}
// this function runs by only one thread, so it is not multi-thread safe // this function runs by only one thread, so it is not multi-thread safe
static int32_t mndProcessNodeCheckReq(SRpcMsg *pMsg) { static int32_t mndProcessNodeCheckReq(SRpcMsg *pMsg) {
int32_t code = 0; int32_t code = 0;
@ -2454,7 +2516,8 @@ static int32_t mndProcessNodeCheckReq(SRpcMsg *pMsg) {
SVgroupChangeInfo changeInfo = mndFindChangedNodeInfo(pMnode, execInfo.pNodeList, pNodeSnapshot); SVgroupChangeInfo changeInfo = mndFindChangedNodeInfo(pMnode, execInfo.pNodeList, pNodeSnapshot);
if (taosArrayGetSize(changeInfo.pUpdateNodeList) > 0) { if (taosArrayGetSize(changeInfo.pUpdateNodeList) > 0) {
// kill current active checkpoint transaction, since the transaction is vnode wide. // kill current active checkpoint transaction, since the transaction is vnode wide.
doKillActiveCheckpointTrans(pMnode); killAllCheckpointTrans(pMnode, &changeInfo);
code = mndProcessVgroupChange(pMnode, &changeInfo); code = mndProcessVgroupChange(pMnode, &changeInfo);
// keep the new vnode snapshot // keep the new vnode snapshot
@ -2500,7 +2563,7 @@ static int32_t mndProcessNodeCheck(SRpcMsg *pReq) {
return 0; return 0;
} }
void keepStreamTasksInBuf(SStreamObj *pStream, SStreamExecInfo *pExecNode) { void saveStreamTasksInfo(SStreamObj *pStream, SStreamExecInfo *pExecNode) {
int32_t level = taosArrayGetSize(pStream->tasks); int32_t level = taosArrayGetSize(pStream->tasks);
for (int32_t i = 0; i < level; i++) { for (int32_t i = 0; i < level; i++) {
@ -2543,8 +2606,9 @@ void removeStreamTasksInBuf(SStreamObj *pStream, SStreamExecInfo *pExecNode) {
STaskId *pId = taosArrayGet(pExecNode->pTaskList, k); STaskId *pId = taosArrayGet(pExecNode->pTaskList, k);
if (pId->taskId == id.taskId && pId->streamId == id.streamId) { if (pId->taskId == id.taskId && pId->streamId == id.streamId) {
taosArrayRemove(pExecNode->pTaskList, k); taosArrayRemove(pExecNode->pTaskList, k);
mInfo("s-task:0x%x removed from buffer, remain:%d", (int32_t)id.taskId,
(int32_t)taosArrayGetSize(pExecNode->pTaskList)); int32_t num = taosArrayGetSize(pExecNode->pTaskList);
mInfo("s-task:0x%x removed from buffer, remain:%d", (int32_t)id.taskId, num);
break; break;
} }
} }
@ -2555,15 +2619,15 @@ void removeStreamTasksInBuf(SStreamObj *pStream, SStreamExecInfo *pExecNode) {
ASSERT(taosHashGetSize(pExecNode->pTaskMap) == taosArrayGetSize(pExecNode->pTaskList)); ASSERT(taosHashGetSize(pExecNode->pTaskMap) == taosArrayGetSize(pExecNode->pTaskList));
} }
STrans *doCreateTrans(SMnode *pMnode, SStreamObj *pStream, const char *name) { STrans *doCreateTrans(SMnode *pMnode, SStreamObj *pStream, SRpcMsg *pReq, const char *name, const char* pMsg) {
STrans *pTrans = mndTransCreate(pMnode, TRN_POLICY_RETRY, TRN_CONFLICT_DB_INSIDE, NULL, name); STrans *pTrans = mndTransCreate(pMnode, TRN_POLICY_RETRY, TRN_CONFLICT_DB_INSIDE, pReq, name);
if (pTrans == NULL) { if (pTrans == NULL) {
mError("failed to build trans:%s, reason: %s", name, tstrerror(TSDB_CODE_OUT_OF_MEMORY)); mError("failed to build trans:%s, reason: %s", name, tstrerror(TSDB_CODE_OUT_OF_MEMORY));
terrno = TSDB_CODE_OUT_OF_MEMORY; terrno = TSDB_CODE_OUT_OF_MEMORY;
return NULL; return NULL;
} }
mDebug("start to build stream:0x%" PRIx64 " task DAG update", pStream->uid); mDebug("s-task:0x%"PRIx64" start to build trans %s", pStream->uid, pMsg);
mndTransSetDbName(pTrans, pStream->sourceDb, pStream->targetDb); mndTransSetDbName(pTrans, pStream->sourceDb, pStream->targetDb);
if (mndTransCheckConflict(pMnode, pTrans) != 0) { if (mndTransCheckConflict(pMnode, pTrans) != 0) {
@ -2578,7 +2642,7 @@ STrans *doCreateTrans(SMnode *pMnode, SStreamObj *pStream, const char *name) {
} }
int32_t createStreamResetStatusTrans(SMnode *pMnode, SStreamObj *pStream) { int32_t createStreamResetStatusTrans(SMnode *pMnode, SStreamObj *pStream) {
STrans *pTrans = doCreateTrans(pMnode, pStream, "stream-task-reset"); STrans *pTrans = doCreateTrans(pMnode, pStream, NULL, MND_STREAM_TASK_RESET_NAME, " reset from failed checkpoint");
if (pTrans == NULL) { if (pTrans == NULL) {
return terrno; return terrno;
} }
@ -2642,43 +2706,36 @@ int32_t createStreamResetStatusTrans(SMnode *pMnode, SStreamObj *pStream) {
return TSDB_CODE_ACTION_IN_PROGRESS; return TSDB_CODE_ACTION_IN_PROGRESS;
} }
int32_t doKillActiveCheckpointTrans(SMnode *pMnode) { int32_t killActiveCheckpointTrans(SMnode *pMnode, const char* pDBName, size_t len) {
int32_t transId = 0; // data in the hash table will be removed automatically, no need to remove it here.
SSdb * pSdb = pMnode->pSdb; SStreamTransInfo* pTransInfo = taosHashGet(execInfo.transMgmt.pDBTrans, pDBName, len);
STrans *pTrans = NULL; if (pTransInfo == NULL) {
void * pIter = NULL;
while (1) {
pIter = sdbFetch(pSdb, SDB_TRANS, pIter, (void **)&pTrans);
if (pIter == NULL) {
break;
}
if (strncmp(pTrans->opername, MND_STREAM_CHECKPOINT_NAME, tListLen(pTrans->opername) - 1) == 0) {
transId = pTrans->id;
sdbRelease(pSdb, pTrans);
sdbCancelFetch(pSdb, pIter);
break;
}
sdbRelease(pSdb, pTrans);
}
if (transId == 0) {
mDebug("failed to find the checkpoint trans, reset not executed");
return TSDB_CODE_SUCCESS; return TSDB_CODE_SUCCESS;
} }
pTrans = mndAcquireTrans(pMnode, transId); // not checkpoint trans, ignore
mInfo("kill checkpoint trans:%d", transId); if (strcmp(pTransInfo->name, MND_STREAM_CHECKPOINT_NAME) != 0) {
mDebug("not checkpoint trans, not kill it, name:%s, transId:%d", pTransInfo->name, pTransInfo->transId);
return TSDB_CODE_SUCCESS;
}
STrans* pTrans = mndAcquireTrans(pMnode, pTransInfo->transId);
if (pTrans != NULL) {
mInfo("kill checkpoint transId:%d in Db:%s", pTransInfo->transId, pDBName);
mndKillTrans(pMnode, pTrans); mndKillTrans(pMnode, pTrans);
mndReleaseTrans(pMnode, pTrans); mndReleaseTrans(pMnode, pTrans);
}
return TSDB_CODE_SUCCESS; return TSDB_CODE_SUCCESS;
} }
int32_t mndResetFromCheckpoint(SMnode *pMnode) { int32_t mndResetStatusFromCheckpoint(SMnode *pMnode, int32_t transId) {
doKillActiveCheckpointTrans(pMnode); STrans* pTrans = mndAcquireTrans(pMnode, transId);
if (pTrans != NULL) {
mInfo("kill checkpoint transId:%d to reset task status", transId);
mndKillTrans(pMnode, pTrans);
mndReleaseTrans(pMnode, pTrans);
}
// set all tasks status to be normal, refactor later to be stream level, instead of vnode level. // set all tasks status to be normal, refactor later to be stream level, instead of vnode level.
SSdb * pSdb = pMnode->pSdb; SSdb * pSdb = pMnode->pSdb;
@ -2690,7 +2747,13 @@ int32_t mndResetFromCheckpoint(SMnode *pMnode) {
break; break;
} }
// todo this transaction should exist be only one bool conflict = streamTransConflictOtherTrans(pMnode, pStream->sourceDb, pStream->targetDb);
if (conflict) {
mError("stream:%s other trans exists in DB:%s & %s failed to start reset-status trans",
pStream->name, pStream->sourceDb, pStream->targetDb);
continue;
}
mDebug("stream:%s (0x%" PRIx64 ") reset checkpoint procedure, create reset trans", pStream->name, pStream->uid); mDebug("stream:%s (0x%" PRIx64 ") reset checkpoint procedure, create reset trans", pStream->name, pStream->uid);
int32_t code = createStreamResetStatusTrans(pMnode, pStream); int32_t code = createStreamResetStatusTrans(pMnode, pStream);
if (code != TSDB_CODE_SUCCESS) { if (code != TSDB_CODE_SUCCESS) {
@ -2725,12 +2788,12 @@ int32_t setNodeEpsetExpiredFlag(const SArray *pNodeList) {
return TSDB_CODE_SUCCESS; return TSDB_CODE_SUCCESS;
} }
static void updateStageInfo(STaskStatusEntry* pTaskEntry, int32_t stage) { static void updateStageInfo(STaskStatusEntry* pTaskEntry, int64_t stage) {
int32_t numOfNodes = taosArrayGetSize(execInfo.pNodeList); int32_t numOfNodes = taosArrayGetSize(execInfo.pNodeList);
for(int32_t j = 0; j < numOfNodes; ++j) { for(int32_t j = 0; j < numOfNodes; ++j) {
SNodeEntry* pNodeEntry = taosArrayGet(execInfo.pNodeList, j); SNodeEntry* pNodeEntry = taosArrayGet(execInfo.pNodeList, j);
if (pNodeEntry->nodeId == pTaskEntry->nodeId) { if (pNodeEntry->nodeId == pTaskEntry->nodeId) {
mInfo("vgId:%d stage updated from %d to %d, nodeUpdate trigger by s-task:0x%" PRIx64, pTaskEntry->nodeId, mInfo("vgId:%d stage updated from %"PRId64 " to %"PRId64 ", nodeUpdate trigger by s-task:0x%" PRIx64, pTaskEntry->nodeId,
pTaskEntry->stage, stage, pTaskEntry->id.taskId); pTaskEntry->stage, stage, pTaskEntry->id.taskId);
pNodeEntry->stageUpdated = true; pNodeEntry->stageUpdated = true;
@ -2775,6 +2838,7 @@ int32_t mndProcessStreamHb(SRpcMsg *pReq) {
setNodeEpsetExpiredFlag(req.pUpdateNodes); setNodeEpsetExpiredFlag(req.pUpdateNodes);
} }
bool snodeChanged = false;
for (int32_t i = 0; i < req.numOfTasks; ++i) { for (int32_t i = 0; i < req.numOfTasks; ++i) {
STaskStatusEntry *p = taosArrayGet(req.pTaskStatus, i); STaskStatusEntry *p = taosArrayGet(req.pTaskStatus, i);
STaskStatusEntry *pTaskEntry = taosHashGet(execInfo.pTaskMap, &p->id, sizeof(p->id)); STaskStatusEntry *pTaskEntry = taosHashGet(execInfo.pTaskMap, &p->id, sizeof(p->id));
@ -2785,6 +2849,7 @@ int32_t mndProcessStreamHb(SRpcMsg *pReq) {
if (pTaskEntry->stage != p->stage && pTaskEntry->stage != -1) { if (pTaskEntry->stage != p->stage && pTaskEntry->stage != -1) {
updateStageInfo(pTaskEntry, p->stage); updateStageInfo(pTaskEntry, p->stage);
if(pTaskEntry->nodeId == SNODE_HANDLE) snodeChanged = true;
} else { } else {
streamTaskStatusCopy(pTaskEntry, p); streamTaskStatusCopy(pTaskEntry, p);
if (p->activeCheckpointId != 0) { if (p->activeCheckpointId != 0) {
@ -2813,11 +2878,11 @@ int32_t mndProcessStreamHb(SRpcMsg *pReq) {
SArray *p = mndTakeVgroupSnapshot(pMnode, &allReady); SArray *p = mndTakeVgroupSnapshot(pMnode, &allReady);
taosArrayDestroy(p); taosArrayDestroy(p);
if (allReady) { if (allReady || snodeChanged) {
// if the execInfo.activeCheckpoint == 0, the checkpoint is restoring from wal // if the execInfo.activeCheckpoint == 0, the checkpoint is restoring from wal
mInfo("checkpointId:%" PRId64 " failed, issue task-reset trans to reset all tasks status", mInfo("checkpointId:%" PRId64 " failed, issue task-reset trans to reset all tasks status",
execInfo.activeCheckpoint); execInfo.activeCheckpoint);
mndResetFromCheckpoint(pMnode); mndResetStatusFromCheckpoint(pMnode, activeCheckpointId);
} else { } else {
mInfo("not all vgroups are ready, wait for next HB from stream tasks"); mInfo("not all vgroups are ready, wait for next HB from stream tasks");
} }

View File

@ -0,0 +1,105 @@
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "mndTrans.h"
#include "mndStream.h"
typedef struct SKeyInfo {
void* pKey;
int32_t keyLen;
} SKeyInfo;
static int32_t clearFinishedTrans(SMnode* pMnode);
int32_t mndStreamRegisterTrans(STrans* pTrans, const char* pName, const char* pSrcDb, const char* pDstDb) {
SStreamTransInfo info = {.transId = pTrans->id, .startTime = taosGetTimestampMs(), .name = pName};
taosHashPut(execInfo.transMgmt.pDBTrans, pSrcDb, strlen(pSrcDb), &info, sizeof(SStreamTransInfo));
if (strcmp(pSrcDb, pDstDb) != 0) {
taosHashPut(execInfo.transMgmt.pDBTrans, pDstDb, strlen(pDstDb), &info, sizeof(SStreamTransInfo));
}
return 0;
}
int32_t clearFinishedTrans(SMnode* pMnode) {
SArray* pList = taosArrayInit(4, sizeof(SKeyInfo));
size_t keyLen = 0;
taosThreadMutexLock(&execInfo.lock);
void* pIter = NULL;
while ((pIter = taosHashIterate(execInfo.transMgmt.pDBTrans, pIter)) != NULL) {
SStreamTransInfo *pEntry = (SStreamTransInfo *)pIter;
STrans* pTrans = mndAcquireTrans(pMnode, pEntry->transId);
// let's clear the finished trans
if (pTrans == NULL) {
void* pKey = taosHashGetKey(pEntry, &keyLen);
// key is the name of src/dst db name
SKeyInfo info = {.pKey = pKey, .keyLen = keyLen};
mDebug("transId:%d %s startTs:%" PRId64 "cleared due to finished", pEntry->transId, pEntry->name,
pEntry->startTime);
taosArrayPush(pList, &info);
} else {
mndReleaseTrans(pMnode, pTrans);
}
}
size_t num = taosArrayGetSize(pList);
for(int32_t i = 0; i < num; ++i) {
SKeyInfo* pKey = taosArrayGet(pList, i);
taosHashRemove(execInfo.transMgmt.pDBTrans, pKey->pKey, pKey->keyLen);
}
mDebug("clear %d finished stream-trans, remained:%d", (int32_t) num, taosHashGetSize(execInfo.transMgmt.pDBTrans));
taosThreadMutexUnlock(&execInfo.lock);
terrno = TSDB_CODE_SUCCESS;
taosArrayDestroy(pList);
return 0;
}
bool streamTransConflictOtherTrans(SMnode* pMnode, const char* pSrcDb, const char* pDstDb) {
clearFinishedTrans(pMnode);
taosThreadMutexLock(&execInfo.lock);
int32_t num = taosHashGetSize(execInfo.transMgmt.pDBTrans);
if (num <= 0) {
taosThreadMutexUnlock(&execInfo.lock);
return false;
}
SStreamTransInfo *pEntry = taosHashGet(execInfo.transMgmt.pDBTrans, pSrcDb, strlen(pSrcDb));
if (pEntry != NULL) {
taosThreadMutexUnlock(&execInfo.lock);
mWarn("conflict with other transId:%d in Db:%s, trans:%s", pEntry->transId, pSrcDb, pEntry->name);
return true;
}
pEntry = taosHashGet(execInfo.transMgmt.pDBTrans, pDstDb, strlen(pDstDb));
if (pEntry != NULL) {
taosThreadMutexUnlock(&execInfo.lock);
mWarn("conflict with other transId:%d in Db:%s, trans:%s", pEntry->transId, pSrcDb, pEntry->name);
return true;
}
taosThreadMutexUnlock(&execInfo.lock);
return false;
}

View File

@ -38,7 +38,7 @@ class MndTestTrans1 : public ::testing::Test {
test.ServerStop(); test.ServerStop();
pFile = taosOpenFile(file, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_TRUNC); pFile = taosOpenFile(file, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_TRUNC | TD_FILE_WRITE_THROUGH);
int32_t writeLen = taosWriteFile(pFile, buffer, readLen); int32_t writeLen = taosWriteFile(pFile, buffer, readLen);
if (writeLen < 0 || writeLen == readLen) { if (writeLen < 0 || writeLen == readLen) {
ASSERT(1); ASSERT(1);

View File

@ -3,6 +3,7 @@ add_library(snode STATIC ${SNODE_SRC})
target_include_directories( target_include_directories(
snode snode
PUBLIC "${TD_SOURCE_DIR}/include/dnode/snode" PUBLIC "${TD_SOURCE_DIR}/include/dnode/snode"
PUBLIC "${TD_SOURCE_DIR}/include/dnode/vnode"
private "${CMAKE_CURRENT_SOURCE_DIR}/inc" private "${CMAKE_CURRENT_SOURCE_DIR}/inc"
) )
target_link_libraries( target_link_libraries(

View File

@ -18,6 +18,28 @@
#include "sndInt.h" #include "sndInt.h"
#include "tstream.h" #include "tstream.h"
#include "tuuid.h" #include "tuuid.h"
#include "stream.h"
#define sndError(...) \
do { \
if (sndDebugFlag & DEBUG_ERROR) { \
taosPrintLog("SND ERROR ", DEBUG_ERROR, sndDebugFlag, __VA_ARGS__); \
} \
} while (0)
#define sndInfo(...) \
do { \
if (sndDebugFlag & DEBUG_INFO) { \
taosPrintLog("SND INFO ", DEBUG_INFO, sndDebugFlag, __VA_ARGS__); \
} \
} while (0)
#define sndDebug(...) \
do { \
if (sndDebugFlag & DEBUG_DEBUG) { \
taosPrintLog("SND ", DEBUG_DEBUG, sndDebugFlag, __VA_ARGS__); \
} \
} while (0)
void sndEnqueueStreamDispatch(SSnode *pSnode, SRpcMsg *pMsg) { void sndEnqueueStreamDispatch(SSnode *pSnode, SRpcMsg *pMsg) {
char *msgStr = pMsg->pCont; char *msgStr = pMsg->pCont;
@ -40,10 +62,14 @@ void sndEnqueueStreamDispatch(SSnode *pSnode, SRpcMsg *pMsg) {
if (pTask) { if (pTask) {
SRpcMsg rsp = { .info = pMsg->info, .code = 0 }; SRpcMsg rsp = { .info = pMsg->info, .code = 0 };
streamProcessDispatchMsg(pTask, &req, &rsp); streamProcessDispatchMsg(pTask, &req, &rsp);
tDeleteStreamDispatchReq(&req);
streamMetaReleaseTask(pSnode->pMeta, pTask); streamMetaReleaseTask(pSnode->pMeta, pTask);
rpcFreeCont(pMsg->pCont); rpcFreeCont(pMsg->pCont);
taosFreeQitem(pMsg); taosFreeQitem(pMsg);
return; return;
} else {
tDeleteStreamDispatchReq(&req);
return;
} }
FAIL: FAIL:
@ -63,20 +89,37 @@ int32_t sndExpandTask(SSnode *pSnode, SStreamTask *pTask, int64_t nextProcessVer
streamTaskOpenAllUpstreamInput(pTask); streamTaskOpenAllUpstreamInput(pTask);
pTask->pState = streamStateOpen(pSnode->path, pTask, false, -1, -1); SStreamTask* pSateTask = pTask;
if (pTask->pState == NULL) { SStreamTask task = {0};
qError("s-task:%s failed to open state for task", pTask->id.idStr); if (pTask->info.fillHistory) {
return -1; task.id.streamId = pTask->streamTaskId.streamId;
} else { task.id.taskId = pTask->streamTaskId.taskId;
qDebug("s-task:%s state:%p", pTask->id.idStr, pTask->pState); task.pMeta = pTask->pMeta;
pSateTask = &task;
} }
int32_t numOfChildEp = taosArrayGetSize(pTask->upstreamInfo.pList); pTask->pState = streamStateOpen(pSnode->path, pSateTask, false, -1, -1);
SReadHandle handle = { .vnode = NULL, .numOfVgroups = numOfChildEp, .pStateBackend = pTask->pState, .fillHistory = pTask->info.fillHistory }; if (pTask->pState == NULL) {
sndError("s-task:%s failed to open state for task", pTask->id.idStr);
return -1;
} else {
sndDebug("s-task:%s state:%p", pTask->id.idStr, pTask->pState);
}
int32_t numOfVgroups = (int32_t)taosArrayGetSize(pTask->upstreamInfo.pList);
SReadHandle handle = {
.checkpointId = pTask->chkInfo.checkpointId,
.vnode = NULL,
.numOfVgroups = numOfVgroups,
.pStateBackend = pTask->pState,
.fillHistory = pTask->info.fillHistory,
.winRange = pTask->dataRange.window,
};
initStreamStateAPI(&handle.api); initStreamStateAPI(&handle.api);
pTask->exec.pExecutor = qCreateStreamExecTaskInfo(pTask->exec.qmsg, &handle, 0, pTask->id.taskId); pTask->exec.pExecutor = qCreateStreamExecTaskInfo(pTask->exec.qmsg, &handle, SNODE_HANDLE, pTask->id.taskId);
ASSERT(pTask->exec.pExecutor); ASSERT(pTask->exec.pExecutor);
qSetTaskId(pTask->exec.pExecutor, pTask->id.taskId, pTask->id.streamId);
streamTaskResetUpstreamStageInfo(pTask); streamTaskResetUpstreamStageInfo(pTask);
streamSetupScheduleTrigger(pTask); streamSetupScheduleTrigger(pTask);
@ -85,7 +128,7 @@ int32_t sndExpandTask(SSnode *pSnode, SStreamTask *pTask, int64_t nextProcessVer
// checkpoint ver is the kept version, handled data should be the next version. // checkpoint ver is the kept version, handled data should be the next version.
if (pTask->chkInfo.checkpointId != 0) { if (pTask->chkInfo.checkpointId != 0) {
pTask->chkInfo.nextProcessVer = pTask->chkInfo.checkpointVer + 1; pTask->chkInfo.nextProcessVer = pTask->chkInfo.checkpointVer + 1;
qInfo("s-task:%s restore from the checkpointId:%" PRId64 " ver:%" PRId64 " nextProcessVer:%" PRId64, pTask->id.idStr, sndInfo("s-task:%s restore from the checkpointId:%" PRId64 " ver:%" PRId64 " nextProcessVer:%" PRId64, pTask->id.idStr,
pChkInfo->checkpointId, pChkInfo->checkpointVer, pChkInfo->nextProcessVer); pChkInfo->checkpointId, pChkInfo->checkpointVer, pChkInfo->nextProcessVer);
} else { } else {
if (pTask->chkInfo.nextProcessVer == -1) { if (pTask->chkInfo.nextProcessVer == -1) {
@ -96,7 +139,7 @@ int32_t sndExpandTask(SSnode *pSnode, SStreamTask *pTask, int64_t nextProcessVer
char* p = NULL; char* p = NULL;
streamTaskGetStatus(pTask, &p); streamTaskGetStatus(pTask, &p);
qInfo("snode:%d expand stream task, s-task:%s, checkpointId:%" PRId64 " checkpointVer:%" PRId64 sndInfo("snode:%d expand stream task, s-task:%s, checkpointId:%" PRId64 " checkpointVer:%" PRId64
" nextProcessVer:%" PRId64 " child id:%d, level:%d, status:%s fill-history:%d, trigger:%" PRId64 " ms", " nextProcessVer:%" PRId64 " child id:%d, level:%d, status:%s fill-history:%d, trigger:%" PRId64 " ms",
SNODE_HANDLE, pTask->id.idStr, pChkInfo->checkpointId, pChkInfo->checkpointVer, pChkInfo->nextProcessVer, SNODE_HANDLE, pTask->id.idStr, pChkInfo->checkpointId, pChkInfo->checkpointVer, pChkInfo->nextProcessVer,
pTask->info.selfChildId, pTask->info.taskLevel, p, pTask->info.fillHistory, pTask->info.triggerParam); pTask->info.selfChildId, pTask->info.taskLevel, p, pTask->info.fillHistory, pTask->info.triggerParam);
@ -104,6 +147,142 @@ int32_t sndExpandTask(SSnode *pSnode, SStreamTask *pTask, int64_t nextProcessVer
return 0; return 0;
} }
int32_t sndStartStreamTasks(SSnode* pSnode) {
int32_t code = TSDB_CODE_SUCCESS;
int32_t vgId = SNODE_HANDLE;
SStreamMeta* pMeta = pSnode->pMeta;
int32_t numOfTasks = taosArrayGetSize(pMeta->pTaskList);
sndDebug("vgId:%d start to check all %d stream task(s) downstream status", vgId, numOfTasks);
if (numOfTasks == 0) {
return TSDB_CODE_SUCCESS;
}
SArray* pTaskList = NULL;
streamMetaWLock(pMeta);
pTaskList = taosArrayDup(pMeta->pTaskList, NULL);
taosHashClear(pMeta->startInfo.pReadyTaskSet);
taosHashClear(pMeta->startInfo.pFailedTaskSet);
pMeta->startInfo.startTs = taosGetTimestampMs();
streamMetaWUnLock(pMeta);
// broadcast the check downstream tasks msg
for (int32_t i = 0; i < numOfTasks; ++i) {
SStreamTaskId* pTaskId = taosArrayGet(pTaskList, i);
SStreamTask* pTask = streamMetaAcquireTask(pMeta, pTaskId->streamId, pTaskId->taskId);
if (pTask == NULL) {
continue;
}
// fill-history task can only be launched by related stream tasks.
if (pTask->info.fillHistory == 1) {
streamMetaReleaseTask(pMeta, pTask);
continue;
}
if (pTask->status.downstreamReady == 1) {
if (HAS_RELATED_FILLHISTORY_TASK(pTask)) {
sndDebug("s-task:%s downstream ready, no need to check downstream, check only related fill-history task",
pTask->id.idStr);
streamLaunchFillHistoryTask(pTask);
}
streamMetaUpdateTaskDownstreamStatus(pTask, pTask->execInfo.init, pTask->execInfo.start, true);
streamMetaReleaseTask(pMeta, pTask);
continue;
}
EStreamTaskEvent event = (HAS_RELATED_FILLHISTORY_TASK(pTask)) ? TASK_EVENT_INIT_STREAM_SCANHIST : TASK_EVENT_INIT;
int32_t ret = streamTaskHandleEvent(pTask->status.pSM, event);
if (ret != TSDB_CODE_SUCCESS) {
code = ret;
}
streamMetaReleaseTask(pMeta, pTask);
}
taosArrayDestroy(pTaskList);
return code;
}
int32_t sndResetStreamTaskStatus(SSnode* pSnode) {
SStreamMeta* pMeta = pSnode->pMeta;
int32_t vgId = pMeta->vgId;
int32_t numOfTasks = taosArrayGetSize(pMeta->pTaskList);
sndDebug("vgId:%d reset all %d stream task(s) status to be uninit", vgId, numOfTasks);
if (numOfTasks == 0) {
return TSDB_CODE_SUCCESS;
}
for (int32_t i = 0; i < numOfTasks; ++i) {
SStreamTaskId* pTaskId = taosArrayGet(pMeta->pTaskList, i);
STaskId id = {.streamId = pTaskId->streamId, .taskId = pTaskId->taskId};
SStreamTask** pTask = taosHashGet(pMeta->pTasksMap, &id, sizeof(id));
streamTaskResetStatus(*pTask);
}
return 0;
}
int32_t sndRestartStreamTasks(SSnode* pSnode) {
SStreamMeta* pMeta = pSnode->pMeta;
int32_t vgId = pMeta->vgId;
int32_t code = 0;
int64_t st = taosGetTimestampMs();
while(1) {
int32_t startVal = atomic_val_compare_exchange_32(&pMeta->startInfo.taskStarting, 0, 1);
if (startVal == 0) {
break;
}
sndDebug("vgId:%d in start stream tasks procedure, wait for 500ms and recheck", vgId);
taosMsleep(500);
}
terrno = 0;
sndInfo("vgId:%d tasks are all updated and stopped, restart all tasks, triggered by transId:%d", vgId,
pMeta->updateInfo.transId);
while (streamMetaTaskInTimer(pMeta)) {
sndDebug("vgId:%d some tasks in timer, wait for 100ms and recheck", pMeta->vgId);
taosMsleep(100);
}
streamMetaWLock(pMeta);
code = streamMetaReopen(pMeta);
if (code != TSDB_CODE_SUCCESS) {
sndError("vgId:%d failed to reopen stream meta", vgId);
streamMetaWUnLock(pMeta);
code = terrno;
return code;
}
streamMetaInitBackend(pMeta);
int64_t el = taosGetTimestampMs() - st;
sndInfo("vgId:%d close&reload state elapsed time:%.3fs", vgId, el/1000.);
code = streamMetaLoadAllTasks(pMeta);
if (code != TSDB_CODE_SUCCESS) {
sndError("vgId:%d failed to load stream tasks, code:%s", vgId, tstrerror(terrno));
streamMetaWUnLock(pMeta);
code = terrno;
return code;
}
sndInfo("vgId:%d restart all stream tasks after all tasks being updated", vgId);
sndResetStreamTaskStatus(pSnode);
streamMetaWUnLock(pMeta);
sndStartStreamTasks(pSnode);
code = terrno;
return code;
}
SSnode *sndOpen(const char *path, const SSnodeOpt *pOption) { SSnode *sndOpen(const char *path, const SSnodeOpt *pOption) {
SSnode *pSnode = taosMemoryCalloc(1, sizeof(SSnode)); SSnode *pSnode = taosMemoryCalloc(1, sizeof(SSnode));
if (pSnode == NULL) { if (pSnode == NULL) {
@ -117,17 +296,19 @@ SSnode *sndOpen(const char *path, const SSnodeOpt *pOption) {
} }
pSnode->msgCb = pOption->msgCb; pSnode->msgCb = pOption->msgCb;
pSnode->pMeta = streamMetaOpen(path, pSnode, (FTaskExpand *)sndExpandTask, SNODE_HANDLE, -1); pSnode->pMeta = streamMetaOpen(path, pSnode, (FTaskExpand *)sndExpandTask, SNODE_HANDLE, taosGetTimestampMs());
if (pSnode->pMeta == NULL) { if (pSnode->pMeta == NULL) {
terrno = TSDB_CODE_OUT_OF_MEMORY; terrno = TSDB_CODE_OUT_OF_MEMORY;
goto FAIL; goto FAIL;
} }
if (streamMetaLoadAllTasks(pSnode->pMeta) < 0) {
goto FAIL;
}
stopRsync(); stopRsync();
startRsync(); startRsync();
// todo fix it: send msg to mnode to rollback to an existed checkpoint
streamMetaInitForSnode(pSnode->pMeta);
return pSnode; return pSnode;
FAIL: FAIL:
@ -136,7 +317,14 @@ FAIL:
return NULL; return NULL;
} }
int32_t sndInit(SSnode * pSnode) {
sndResetStreamTaskStatus(pSnode);
sndStartStreamTasks(pSnode);
return 0;
}
void sndClose(SSnode *pSnode) { void sndClose(SSnode *pSnode) {
stopRsync();
streamMetaNotifyClose(pSnode->pMeta); streamMetaNotifyClose(pSnode->pMeta);
streamMetaCommit(pSnode->pMeta); streamMetaCommit(pSnode->pMeta);
streamMetaClose(pSnode->pMeta); streamMetaClose(pSnode->pMeta);
@ -146,6 +334,33 @@ void sndClose(SSnode *pSnode) {
int32_t sndGetLoad(SSnode *pSnode, SSnodeLoad *pLoad) { return 0; } int32_t sndGetLoad(SSnode *pSnode, SSnodeLoad *pLoad) { return 0; }
int32_t sndStartStreamTaskAsync(SSnode* pSnode, bool restart) {
SStreamMeta* pMeta = pSnode->pMeta;
int32_t vgId = pMeta->vgId;
int32_t numOfTasks = taosArrayGetSize(pMeta->pTaskList);
if (numOfTasks == 0) {
sndDebug("vgId:%d no stream tasks existed to run", vgId);
return 0;
}
SStreamTaskRunReq* pRunReq = rpcMallocCont(sizeof(SStreamTaskRunReq));
if (pRunReq == NULL) {
terrno = TSDB_CODE_OUT_OF_MEMORY;
sndError("vgId:%d failed to create msg to start wal scanning to launch stream tasks, code:%s", vgId, terrstr());
return -1;
}
sndDebug("vgId:%d start all %d stream task(s) async", vgId, numOfTasks);
pRunReq->head.vgId = vgId;
pRunReq->streamId = 0;
pRunReq->taskId = restart? STREAM_EXEC_RESTART_ALL_TASKS_ID:STREAM_EXEC_START_ALL_TASKS_ID;
SRpcMsg msg = {.msgType = TDMT_STREAM_TASK_RUN, .pCont = pRunReq, .contLen = sizeof(SStreamTaskRunReq)};
tmsgPutToQueue(&pSnode->msgCb, STREAM_QUEUE, &msg);
return 0;
}
int32_t sndProcessTaskDeployReq(SSnode *pSnode, char *msg, int32_t msgLen) { int32_t sndProcessTaskDeployReq(SSnode *pSnode, char *msg, int32_t msgLen) {
int32_t code; int32_t code;
@ -184,24 +399,23 @@ int32_t sndProcessTaskDeployReq(SSnode *pSnode, char *msg, int32_t msgLen) {
char* p = NULL; char* p = NULL;
streamTaskGetStatus(pTask, &p); streamTaskGetStatus(pTask, &p);
qDebug("snode:%d s-task:%s is deployed on snode and add into meta, status:%s, numOfTasks:%d", SNODE_HANDLE, sndDebug("snode:%d s-task:%s is deployed on snode and add into meta, status:%s, numOfTasks:%d", SNODE_HANDLE,
pTask->id.idStr, p, numOfTasks); pTask->id.idStr, p, numOfTasks);
EStreamTaskEvent event = (HAS_RELATED_FILLHISTORY_TASK(pTask)) ? TASK_EVENT_INIT_STREAM_SCANHIST : TASK_EVENT_INIT; EStreamTaskEvent event = (HAS_RELATED_FILLHISTORY_TASK(pTask)) ? TASK_EVENT_INIT_STREAM_SCANHIST : TASK_EVENT_INIT;
streamTaskHandleEvent(pTask->status.pSM, event); streamTaskHandleEvent(pTask->status.pSM, event);
streamTaskCheckDownstream(pTask);
return 0; return 0;
} }
int32_t sndProcessTaskDropReq(SSnode *pSnode, char *msg, int32_t msgLen) { int32_t sndProcessTaskDropReq(SSnode *pSnode, char *msg, int32_t msgLen) {
SVDropStreamTaskReq *pReq = (SVDropStreamTaskReq *)msg; SVDropStreamTaskReq *pReq = (SVDropStreamTaskReq *)msg;
qDebug("snode:%d receive msg to drop stream task:0x%x", pSnode->pMeta->vgId, pReq->taskId); sndDebug("snode:%d receive msg to drop stream task:0x%x", pSnode->pMeta->vgId, pReq->taskId);
streamMetaUnregisterTask(pSnode->pMeta, pReq->streamId, pReq->taskId); streamMetaUnregisterTask(pSnode->pMeta, pReq->streamId, pReq->taskId);
// commit the update // commit the update
streamMetaWLock(pSnode->pMeta); streamMetaWLock(pSnode->pMeta);
int32_t numOfTasks = streamMetaGetNumOfTasks(pSnode->pMeta); int32_t numOfTasks = streamMetaGetNumOfTasks(pSnode->pMeta);
qDebug("vgId:%d task:0x%x dropped, remain tasks:%d", pSnode->pMeta->vgId, pReq->taskId, numOfTasks); sndDebug("vgId:%d task:0x%x dropped, remain tasks:%d", pSnode->pMeta->vgId, pReq->taskId, numOfTasks);
if (streamMetaCommit(pSnode->pMeta) < 0) { if (streamMetaCommit(pSnode->pMeta) < 0) {
// persist to disk // persist to disk
@ -213,6 +427,16 @@ int32_t sndProcessTaskDropReq(SSnode *pSnode, char *msg, int32_t msgLen) {
int32_t sndProcessTaskRunReq(SSnode *pSnode, SRpcMsg *pMsg) { int32_t sndProcessTaskRunReq(SSnode *pSnode, SRpcMsg *pMsg) {
SStreamTaskRunReq *pReq = pMsg->pCont; SStreamTaskRunReq *pReq = pMsg->pCont;
int32_t taskId = pReq->taskId;
if (taskId == STREAM_EXEC_START_ALL_TASKS_ID) {
sndStartStreamTasks(pSnode);
return 0;
} else if (taskId == STREAM_EXEC_RESTART_ALL_TASKS_ID) {
sndRestartStreamTasks(pSnode);
return 0;
}
SStreamTask *pTask = streamMetaAcquireTask(pSnode->pMeta, pReq->streamId, pReq->taskId); SStreamTask *pTask = streamMetaAcquireTask(pSnode->pMeta, pReq->streamId, pReq->taskId);
if (pTask) { if (pTask) {
streamExecTask(pTask); streamExecTask(pTask);
@ -231,14 +455,17 @@ int32_t sndProcessTaskDispatchReq(SSnode *pSnode, SRpcMsg *pMsg, bool exec) {
SDecoder decoder; SDecoder decoder;
tDecoderInit(&decoder, (uint8_t *)msgBody, msgLen); tDecoderInit(&decoder, (uint8_t *)msgBody, msgLen);
tDecodeStreamDispatchReq(&decoder, &req); tDecodeStreamDispatchReq(&decoder, &req);
tDecoderClear(&decoder);
SStreamTask *pTask = streamMetaAcquireTask(pSnode->pMeta, req.streamId, req.taskId); SStreamTask *pTask = streamMetaAcquireTask(pSnode->pMeta, req.streamId, req.taskId);
if (pTask) { if (pTask) {
SRpcMsg rsp = {.info = pMsg->info, .code = 0}; SRpcMsg rsp = {.info = pMsg->info, .code = 0};
streamProcessDispatchMsg(pTask, &req, &rsp); streamProcessDispatchMsg(pTask, &req, &rsp);
tDeleteStreamDispatchReq(&req);
streamMetaReleaseTask(pSnode->pMeta, pTask); streamMetaReleaseTask(pSnode->pMeta, pTask);
return 0; return 0;
} else { } else {
tDeleteStreamDispatchReq(&req);
return -1; return -1;
} }
} }
@ -270,6 +497,9 @@ int32_t sndProcessTaskDispatchRsp(SSnode *pSnode, SRpcMsg *pMsg) {
pRsp->upstreamTaskId = htonl(pRsp->upstreamTaskId); pRsp->upstreamTaskId = htonl(pRsp->upstreamTaskId);
pRsp->streamId = htobe64(pRsp->streamId); pRsp->streamId = htobe64(pRsp->streamId);
pRsp->downstreamTaskId = htonl(pRsp->downstreamTaskId);
pRsp->downstreamNodeId = htonl(pRsp->downstreamNodeId);
pRsp->stage = htobe64(pRsp->stage);
pRsp->msgId = htonl(pRsp->msgId); pRsp->msgId = htonl(pRsp->msgId);
SStreamTask *pTask = streamMetaAcquireTask(pSnode->pMeta, pRsp->streamId, pRsp->upstreamTaskId); SStreamTask *pTask = streamMetaAcquireTask(pSnode->pMeta, pRsp->streamId, pRsp->upstreamTaskId);
@ -287,23 +517,7 @@ int32_t sndProcessTaskRetrieveRsp(SSnode *pSnode, SRpcMsg *pMsg) {
return 0; return 0;
} }
int32_t sndProcessWriteMsg(SSnode *pSnode, SRpcMsg *pMsg, SRpcMsg *pRsp) { int32_t sndProcessTaskScanHistoryFinishReq(SSnode *pSnode, SRpcMsg *pMsg) {
switch (pMsg->msgType) {
case TDMT_STREAM_TASK_DEPLOY: {
void *pReq = POINTER_SHIFT(pMsg->pCont, sizeof(SMsgHead));
int32_t len = pMsg->contLen - sizeof(SMsgHead);
return sndProcessTaskDeployReq(pSnode, pReq, len);
}
case TDMT_STREAM_TASK_DROP:
return sndProcessTaskDropReq(pSnode, pMsg->pCont, pMsg->contLen);
default:
ASSERT(0);
}
return 0;
}
int32_t sndProcessStreamTaskScanHistoryFinishReq(SSnode *pSnode, SRpcMsg *pMsg) {
char *msg = POINTER_SHIFT(pMsg->pCont, sizeof(SMsgHead)); char *msg = POINTER_SHIFT(pMsg->pCont, sizeof(SMsgHead));
int32_t msgLen = pMsg->contLen - sizeof(SMsgHead); int32_t msgLen = pMsg->contLen - sizeof(SMsgHead);
@ -330,11 +544,73 @@ int32_t sndProcessStreamTaskScanHistoryFinishReq(SSnode *pSnode, SRpcMsg *pMsg)
return 0; return 0;
} }
int32_t sndProcessTaskRecoverFinishRsp(SSnode *pSnode, SRpcMsg *pMsg) { int32_t sndProcessTaskScanHistoryFinishRsp(SSnode *pSnode, SRpcMsg *pMsg) {
// char* msg = POINTER_SHIFT(pMsg->pCont, sizeof(SMsgHead));
int32_t msgLen = pMsg->contLen - sizeof(SMsgHead);
// deserialize
SStreamCompleteHistoryMsg req = {0};
SDecoder decoder;
tDecoderInit(&decoder, (uint8_t*)msg, msgLen);
tDecodeCompleteHistoryDataMsg(&decoder, &req);
tDecoderClear(&decoder);
SStreamTask* pTask = streamMetaAcquireTask(pSnode->pMeta, req.streamId, req.upstreamTaskId);
if (pTask == NULL) {
sndError("vgId:%d process scan history finish rsp, failed to find task:0x%x, it may be destroyed",
pSnode->pMeta->vgId, req.upstreamTaskId);
return -1;
}
int32_t remain = atomic_sub_fetch_32(&pTask->notReadyTasks, 1);
if (remain > 0) {
sndDebug("s-task:%s scan-history finish rsp received from downstream task:0x%x, unfinished remain:%d",
pTask->id.idStr, req.downstreamId, remain);
} else {
sndDebug(
"s-task:%s scan-history finish rsp received from downstream task:0x%x, all downstream tasks rsp scan-history "
"completed msg",
pTask->id.idStr, req.downstreamId);
streamProcessScanHistoryFinishRsp(pTask);
}
streamMetaReleaseTask(pSnode->pMeta, pTask);
return 0; return 0;
} }
// downstream task has complete the stream task checkpoint procedure, let's start the handle the rsp by execute task
int32_t sndProcessTaskCheckpointReadyMsg(SSnode *pSnode, SRpcMsg* pMsg) {
SStreamMeta* pMeta = pSnode->pMeta;
char* msg = POINTER_SHIFT(pMsg->pCont, sizeof(SMsgHead));
int32_t len = pMsg->contLen - sizeof(SMsgHead);
int32_t code = 0;
SStreamCheckpointReadyMsg req = {0};
SDecoder decoder;
tDecoderInit(&decoder, (uint8_t*)msg, len);
if (tDecodeStreamCheckpointReadyMsg(&decoder, &req) < 0) {
code = TSDB_CODE_MSG_DECODE_ERROR;
tDecoderClear(&decoder);
return code;
}
tDecoderClear(&decoder);
SStreamTask* pTask = streamMetaAcquireTask(pMeta, req.streamId, req.upstreamTaskId);
if (pTask == NULL) {
sndError("vgId:%d failed to find s-task:0x%x, it may have been destroyed already", pMeta->vgId, req.downstreamTaskId);
return code;
}
sndDebug("snode vgId:%d s-task:%s received the checkpoint ready msg from task:0x%x (vgId:%d), handle it", pMeta->vgId,
pTask->id.idStr, req.downstreamTaskId, req.downstreamNodeId);
streamProcessCheckpointReadyMsg(pTask);
streamMetaReleaseTask(pMeta, pTask);
return code;
}
int32_t sndProcessStreamTaskCheckReq(SSnode *pSnode, SRpcMsg *pMsg) { int32_t sndProcessStreamTaskCheckReq(SSnode *pSnode, SRpcMsg *pMsg) {
char *msgStr = pMsg->pCont; char *msgStr = pMsg->pCont;
char *msgBody = POINTER_SHIFT(msgStr, sizeof(SMsgHead)); char *msgBody = POINTER_SHIFT(msgStr, sizeof(SMsgHead));
@ -362,15 +638,15 @@ int32_t sndProcessStreamTaskCheckReq(SSnode *pSnode, SRpcMsg *pMsg) {
SStreamTask *pTask = streamMetaAcquireTask(pSnode->pMeta, req.streamId, taskId); SStreamTask *pTask = streamMetaAcquireTask(pSnode->pMeta, req.streamId, taskId);
if (pTask != NULL) { if (pTask != NULL) {
rsp.status = streamTaskCheckStatus(pTask, req.upstreamTaskId, req.upstreamNodeId, req.stage); rsp.status = streamTaskCheckStatus(pTask, req.upstreamTaskId, req.upstreamNodeId, req.stage, &rsp.oldStage);
streamMetaReleaseTask(pSnode->pMeta, pTask); streamMetaReleaseTask(pSnode->pMeta, pTask);
char* p = NULL; char* p = NULL;
streamTaskGetStatus(pTask, &p); streamTaskGetStatus(pTask, &p);
qDebug("s-task:%s status:%s, recv task check req(reqId:0x%" PRIx64 ") task:0x%x (vgId:%d), ready:%d", sndDebug("s-task:%s status:%s, recv task check req(reqId:0x%" PRIx64 ") task:0x%x (vgId:%d), ready:%d",
pTask->id.idStr, p, rsp.reqId, rsp.upstreamTaskId, rsp.upstreamNodeId, rsp.status); pTask->id.idStr, p, rsp.reqId, rsp.upstreamTaskId, rsp.upstreamNodeId, rsp.status);
} else { } else {
rsp.status = TASK_DOWNSTREAM_NOT_READY; rsp.status = TASK_DOWNSTREAM_NOT_READY;
qDebug("recv task check(taskId:0x%x not built yet) req(reqId:0x%" PRIx64 ") from task:0x%x (vgId:%d), rsp status %d", sndDebug("recv task check(taskId:0x%x not built yet) req(reqId:0x%" PRIx64 ") from task:0x%x (vgId:%d), rsp status %d",
taskId, rsp.reqId, rsp.upstreamTaskId, rsp.upstreamNodeId, rsp.status); taskId, rsp.reqId, rsp.upstreamTaskId, rsp.upstreamNodeId, rsp.status);
} }
@ -380,7 +656,7 @@ int32_t sndProcessStreamTaskCheckReq(SSnode *pSnode, SRpcMsg *pMsg) {
tEncodeSize(tEncodeStreamTaskCheckRsp, &rsp, len, code); tEncodeSize(tEncodeStreamTaskCheckRsp, &rsp, len, code);
if (code < 0) { if (code < 0) {
qError("vgId:%d failed to encode task check rsp, task:0x%x", pSnode->pMeta->vgId, taskId); sndError("vgId:%d failed to encode task check rsp, task:0x%x", pSnode->pMeta->vgId, taskId);
return -1; return -1;
} }
@ -415,12 +691,12 @@ int32_t sndProcessStreamTaskCheckRsp(SSnode* pSnode, SRpcMsg* pMsg) {
} }
tDecoderClear(&decoder); tDecoderClear(&decoder);
qDebug("tq task:0x%x (vgId:%d) recv check rsp(reqId:0x%" PRIx64 ") from 0x%x (vgId:%d) status %d", sndDebug("tq task:0x%x (vgId:%d) recv check rsp(reqId:0x%" PRIx64 ") from 0x%x (vgId:%d) status %d",
rsp.upstreamTaskId, rsp.upstreamNodeId, rsp.reqId, rsp.downstreamTaskId, rsp.downstreamNodeId, rsp.status); rsp.upstreamTaskId, rsp.upstreamNodeId, rsp.reqId, rsp.downstreamTaskId, rsp.downstreamNodeId, rsp.status);
SStreamTask* pTask = streamMetaAcquireTask(pSnode->pMeta, rsp.streamId, rsp.upstreamTaskId); SStreamTask* pTask = streamMetaAcquireTask(pSnode->pMeta, rsp.streamId, rsp.upstreamTaskId);
if (pTask == NULL) { if (pTask == NULL) {
qError("tq failed to locate the stream task:0x%x (vgId:%d), it may have been destroyed", rsp.upstreamTaskId, sndError("tq failed to locate the stream task:0x%x (vgId:%d), it may have been destroyed", rsp.upstreamTaskId,
pSnode->pMeta->vgId); pSnode->pMeta->vgId);
return -1; return -1;
} }
@ -430,6 +706,181 @@ int32_t sndProcessStreamTaskCheckRsp(SSnode* pSnode, SRpcMsg* pMsg) {
return code; return code;
} }
int32_t sndProcessTaskUpdateReq(SSnode* pSnode, SRpcMsg* pMsg) {
SStreamMeta* pMeta = pSnode->pMeta;
int32_t vgId = SNODE_HANDLE;
char* msg = POINTER_SHIFT(pMsg->pCont, sizeof(SMsgHead));
int32_t len = pMsg->contLen - sizeof(SMsgHead);
SRpcMsg rsp = {.info = pMsg->info, .code = TSDB_CODE_SUCCESS};
SStreamTaskNodeUpdateMsg req = {0};
SDecoder decoder;
tDecoderInit(&decoder, (uint8_t*)msg, len);
if (tDecodeStreamTaskUpdateMsg(&decoder, &req) < 0) {
rsp.code = TSDB_CODE_MSG_DECODE_ERROR;
sndError("vgId:%d failed to decode task update msg, code:%s", vgId, tstrerror(rsp.code));
tDecoderClear(&decoder);
return rsp.code;
}
tDecoderClear(&decoder);
// update the nodeEpset when it exists
streamMetaWLock(pMeta);
// the task epset may be updated again and again, when replaying the WAL, the task may be in stop status.
STaskId id = {.streamId = req.streamId, .taskId = req.taskId};
SStreamTask** ppTask = (SStreamTask**)taosHashGet(pMeta->pTasksMap, &id, sizeof(id));
if (ppTask == NULL || *ppTask == NULL) {
sndError("vgId:%d failed to acquire task:0x%x when handling update, it may have been dropped already", pMeta->vgId,
req.taskId);
rsp.code = TSDB_CODE_SUCCESS;
streamMetaWUnLock(pMeta);
taosArrayDestroy(req.pNodeList);
return rsp.code;
}
SStreamTask* pTask = *ppTask;
if (pMeta->updateInfo.transId != req.transId) {
pMeta->updateInfo.transId = req.transId;
sndInfo("s-task:%s receive new trans to update nodeEp msg from mnode, transId:%d", pTask->id.idStr, req.transId);
// info needs to be kept till the new trans to update the nodeEp arrived.
taosHashClear(pMeta->updateInfo.pTasks);
} else {
sndDebug("s-task:%s recv trans to update nodeEp from mnode, transId:%d", pTask->id.idStr, req.transId);
}
STaskUpdateEntry entry = {.streamId = req.streamId, .taskId = req.taskId, .transId = req.transId};
void* exist = taosHashGet(pMeta->updateInfo.pTasks, &entry, sizeof(STaskUpdateEntry));
if (exist != NULL) {
sndDebug("s-task:%s (vgId:%d) already update in trans:%d, discard the nodeEp update msg", pTask->id.idStr, vgId,
req.transId);
rsp.code = TSDB_CODE_SUCCESS;
streamMetaWUnLock(pMeta);
taosArrayDestroy(req.pNodeList);
return rsp.code;
}
streamMetaWUnLock(pMeta);
// the following two functions should not be executed within the scope of meta lock to avoid deadlock
streamTaskUpdateEpsetInfo(pTask, req.pNodeList);
streamTaskResetStatus(pTask);
// continue after lock the meta again
streamMetaWLock(pMeta);
SStreamTask** ppHTask = NULL;
if (HAS_RELATED_FILLHISTORY_TASK(pTask)) {
ppHTask = (SStreamTask**)taosHashGet(pMeta->pTasksMap, &pTask->hTaskInfo.id, sizeof(pTask->hTaskInfo.id));
if (ppHTask == NULL || *ppHTask == NULL) {
sndError("vgId:%d failed to acquire fill-history task:0x%x when handling update, it may have been dropped already",
pMeta->vgId, req.taskId);
CLEAR_RELATED_FILLHISTORY_TASK(pTask);
} else {
sndDebug("s-task:%s fill-history task update nodeEp along with stream task", (*ppHTask)->id.idStr);
streamTaskUpdateEpsetInfo(*ppHTask, req.pNodeList);
}
}
{
streamMetaSaveTask(pMeta, pTask);
if (ppHTask != NULL) {
streamMetaSaveTask(pMeta, *ppHTask);
}
if (streamMetaCommit(pMeta) < 0) {
// persist to disk
}
}
streamTaskStop(pTask);
// keep the already handled info
taosHashPut(pMeta->updateInfo.pTasks, &entry, sizeof(entry), NULL, 0);
if (ppHTask != NULL) {
streamTaskStop(*ppHTask);
sndDebug("s-task:%s task nodeEp update completed, streamTask and related fill-history task closed", pTask->id.idStr);
taosHashPut(pMeta->updateInfo.pTasks, &(*ppHTask)->id, sizeof(pTask->id), NULL, 0);
} else {
sndDebug("s-task:%s task nodeEp update completed, streamTask closed", pTask->id.idStr);
}
rsp.code = 0;
// possibly only handle the stream task.
int32_t numOfTasks = streamMetaGetNumOfTasks(pMeta);
int32_t updateTasks = taosHashGetSize(pMeta->updateInfo.pTasks);
pMeta->startInfo.tasksWillRestart = 1;
if (updateTasks < numOfTasks) {
sndDebug("vgId:%d closed tasks:%d, unclosed:%d, all tasks will be started when nodeEp update completed", vgId,
updateTasks, (numOfTasks - updateTasks));
streamMetaWUnLock(pMeta);
} else {
sndDebug("vgId:%d all %d task(s) nodeEp updated and closed", vgId, numOfTasks);
#if 1
sndStartStreamTaskAsync(pSnode, true);
streamMetaWUnLock(pMeta);
#else
streamMetaWUnLock(pMeta);
// For debug purpose.
// the following procedure consume many CPU resource, result in the re-election of leader
// with high probability. So we employ it as a test case for the stream processing framework, with
// checkpoint/restart/nodeUpdate etc.
while(1) {
int32_t startVal = atomic_val_compare_exchange_32(&pMeta->startInfo.taskStarting, 0, 1);
if (startVal == 0) {
break;
}
tqDebug("vgId:%d in start stream tasks procedure, wait for 500ms and recheck", vgId);
taosMsleep(500);
}
while (streamMetaTaskInTimer(pMeta)) {
tqDebug("vgId:%d some tasks in timer, wait for 100ms and recheck", pMeta->vgId);
taosMsleep(100);
}
streamMetaWLock(pMeta);
int32_t code = streamMetaReopen(pMeta);
if (code != 0) {
tqError("vgId:%d failed to reopen stream meta", vgId);
streamMetaWUnLock(pMeta);
taosArrayDestroy(req.pNodeList);
return -1;
}
if (streamMetaLoadAllTasks(pTq->pStreamMeta) < 0) {
tqError("vgId:%d failed to load stream tasks", vgId);
streamMetaWUnLock(pMeta);
taosArrayDestroy(req.pNodeList);
return -1;
}
if (vnodeIsRoleLeader(pTq->pVnode) && !tsDisableStream) {
tqInfo("vgId:%d start all stream tasks after all being updated", vgId);
tqResetStreamTaskStatus(pTq);
tqStartStreamTaskAsync(pTq, false);
} else {
tqInfo("vgId:%d, follower node not start stream tasks", vgId);
}
streamMetaWUnLock(pMeta);
#endif
}
taosArrayDestroy(req.pNodeList);
return rsp.code;
}
int32_t sndProcessStreamMsg(SSnode *pSnode, SRpcMsg *pMsg) { int32_t sndProcessStreamMsg(SSnode *pSnode, SRpcMsg *pMsg) {
switch (pMsg->msgType) { switch (pMsg->msgType) {
case TDMT_STREAM_TASK_RUN: case TDMT_STREAM_TASK_RUN:
@ -443,13 +894,34 @@ int32_t sndProcessStreamMsg(SSnode *pSnode, SRpcMsg *pMsg) {
case TDMT_STREAM_RETRIEVE_RSP: case TDMT_STREAM_RETRIEVE_RSP:
return sndProcessTaskRetrieveRsp(pSnode, pMsg); return sndProcessTaskRetrieveRsp(pSnode, pMsg);
case TDMT_VND_STREAM_SCAN_HISTORY_FINISH: case TDMT_VND_STREAM_SCAN_HISTORY_FINISH:
return sndProcessStreamTaskScanHistoryFinishReq(pSnode, pMsg); return sndProcessTaskScanHistoryFinishReq(pSnode, pMsg);
case TDMT_VND_STREAM_SCAN_HISTORY_FINISH_RSP: case TDMT_VND_STREAM_SCAN_HISTORY_FINISH_RSP:
return sndProcessTaskRecoverFinishRsp(pSnode, pMsg); return sndProcessTaskScanHistoryFinishRsp(pSnode, pMsg);
case TDMT_VND_STREAM_TASK_CHECK: case TDMT_VND_STREAM_TASK_CHECK:
return sndProcessStreamTaskCheckReq(pSnode, pMsg); return sndProcessStreamTaskCheckReq(pSnode, pMsg);
case TDMT_VND_STREAM_TASK_CHECK_RSP: case TDMT_VND_STREAM_TASK_CHECK_RSP:
return sndProcessStreamTaskCheckRsp(pSnode, pMsg); return sndProcessStreamTaskCheckRsp(pSnode, pMsg);
case TDMT_STREAM_TASK_CHECKPOINT_READY:
return sndProcessTaskCheckpointReadyMsg(pSnode, pMsg);
default:
ASSERT(0);
}
return 0;
}
int32_t sndProcessWriteMsg(SSnode *pSnode, SRpcMsg *pMsg, SRpcMsg *pRsp) {
switch (pMsg->msgType) {
case TDMT_STREAM_TASK_DEPLOY: {
void *pReq = POINTER_SHIFT(pMsg->pCont, sizeof(SMsgHead));
int32_t len = pMsg->contLen - sizeof(SMsgHead);
return sndProcessTaskDeployReq(pSnode, pReq, len);
}
case TDMT_STREAM_TASK_DROP:
return sndProcessTaskDropReq(pSnode, pMsg->pCont, pMsg->contLen);
case TDMT_VND_STREAM_TASK_UPDATE:
sndProcessTaskUpdateReq(pSnode, pMsg);
break;
default: default:
ASSERT(0); ASSERT(0);
} }

View File

@ -138,6 +138,11 @@ else()
endif() endif()
endif() endif()
target_include_directories(
vnode
PUBLIC "${TD_SOURCE_DIR}/include/dnode/vnode"
)
target_link_libraries( target_link_libraries(
vnode vnode
PUBLIC os PUBLIC os

View File

@ -43,9 +43,6 @@ extern "C" {
typedef struct STqOffsetStore STqOffsetStore; typedef struct STqOffsetStore STqOffsetStore;
#define STREAM_EXEC_EXTRACT_DATA_IN_WAL_ID (-1)
#define STREAM_EXEC_START_ALL_TASKS_ID (-2)
#define STREAM_EXEC_RESTART_ALL_TASKS_ID (-3)
#define IS_OFFSET_RESET_TYPE(_t) ((_t) < 0) #define IS_OFFSET_RESET_TYPE(_t) ((_t) < 0)
// tqExec // tqExec

View File

@ -15,17 +15,12 @@
#include "tq.h" #include "tq.h"
#include "vnd.h" #include "vnd.h"
#include "stream.h"
typedef struct { typedef struct {
int8_t inited; int8_t inited;
} STqMgmt; } STqMgmt;
typedef struct STaskUpdateEntry {
int64_t streamId;
int32_t taskId;
int32_t transId;
} STaskUpdateEntry;
static STqMgmt tqMgmt = {0}; static STqMgmt tqMgmt = {0};
// 0: not init // 0: not init
@ -928,12 +923,12 @@ int32_t tqProcessTaskCheckReq(STQ* pTq, SRpcMsg* pMsg) {
} else { } else {
SStreamTask* pTask = streamMetaAcquireTask(pMeta, req.streamId, taskId); SStreamTask* pTask = streamMetaAcquireTask(pMeta, req.streamId, taskId);
if (pTask != NULL) { if (pTask != NULL) {
rsp.status = streamTaskCheckStatus(pTask, req.upstreamTaskId, req.upstreamNodeId, req.stage); rsp.status = streamTaskCheckStatus(pTask, req.upstreamTaskId, req.upstreamNodeId, req.stage, &rsp.oldStage);
streamMetaReleaseTask(pMeta, pTask); streamMetaReleaseTask(pMeta, pTask);
char* p = NULL; char* p = NULL;
streamTaskGetStatus(pTask, &p); streamTaskGetStatus(pTask, &p);
tqDebug("s-task:%s status:%s, stage:%d recv task check req(reqId:0x%" PRIx64 ") task:0x%x (vgId:%d), check_status:%d", tqDebug("s-task:%s status:%s, stage:%"PRId64" recv task check req(reqId:0x%" PRIx64 ") task:0x%x (vgId:%d), check_status:%d",
pTask->id.idStr, p, rsp.oldStage, rsp.reqId, rsp.upstreamTaskId, rsp.upstreamNodeId, rsp.status); pTask->id.idStr, p, rsp.oldStage, rsp.reqId, rsp.upstreamTaskId, rsp.upstreamNodeId, rsp.status);
} else { } else {
rsp.status = TASK_DOWNSTREAM_NOT_READY; rsp.status = TASK_DOWNSTREAM_NOT_READY;
@ -1136,16 +1131,10 @@ int32_t tqProcessTaskScanHistory(STQ* pTq, SRpcMsg* pMsg) {
// let's decide which step should be executed now // let's decide which step should be executed now
if (pTask->execInfo.step1Start == 0) { if (pTask->execInfo.step1Start == 0) {
ASSERT(pTask->status.pauseAllowed == false);
int64_t ts = taosGetTimestampMs(); int64_t ts = taosGetTimestampMs();
pTask->execInfo.step1Start = ts; pTask->execInfo.step1Start = ts;
tqDebug("s-task:%s start scan-history stage(step 1), status:%s, step1 startTs:%" PRId64, id, pStatus, ts); tqDebug("s-task:%s start scan-history stage(step 1), status:%s, step1 startTs:%" PRId64, id, pStatus, ts);
// NOTE: in case of stream task, scan-history data in wal is not allowed to pause
if (pTask->info.fillHistory == 1) {
streamTaskEnablePause(pTask);
}
} else { } else {
if (pTask->execInfo.step2Start == 0) { if (pTask->execInfo.step2Start == 0) {
tqDebug("s-task:%s continue exec scan-history(step1), original step1 startTs:%" PRId64 ", already elapsed:%.2fs", tqDebug("s-task:%s continue exec scan-history(step1), original step1 startTs:%" PRId64 ", already elapsed:%.2fs",
@ -1367,6 +1356,7 @@ int32_t tqProcessTaskDispatchReq(STQ* pTq, SRpcMsg* pMsg, bool exec) {
if (pTask) { if (pTask) {
SRpcMsg rsp = {.info = pMsg->info, .code = 0}; SRpcMsg rsp = {.info = pMsg->info, .code = 0};
streamProcessDispatchMsg(pTask, &req, &rsp); streamProcessDispatchMsg(pTask, &req, &rsp);
tDeleteStreamDispatchReq(&req);
streamMetaReleaseTask(pTq->pStreamMeta, pTask); streamMetaReleaseTask(pTq->pStreamMeta, pTask);
return 0; return 0;
} else { } else {
@ -1605,6 +1595,7 @@ int32_t vnodeEnqueueStreamMsg(SVnode* pVnode, SRpcMsg* pMsg) {
if (pTask != NULL) { if (pTask != NULL) {
SRpcMsg rsp = {.info = pMsg->info, .code = 0}; SRpcMsg rsp = {.info = pMsg->info, .code = 0};
streamProcessDispatchMsg(pTask, &req, &rsp); streamProcessDispatchMsg(pTask, &req, &rsp);
tDeleteStreamDispatchReq(&req);
streamMetaReleaseTask(pTq->pStreamMeta, pTask); streamMetaReleaseTask(pTq->pStreamMeta, pTask);
rpcFreeCont(pMsg->pCont); rpcFreeCont(pMsg->pCont);
taosFreeQitem(pMsg); taosFreeQitem(pMsg);

View File

@ -15,6 +15,7 @@
#include "tq.h" #include "tq.h"
#include "vnd.h" #include "vnd.h"
#include "stream.h"
#define MAX_REPEAT_SCAN_THRESHOLD 3 #define MAX_REPEAT_SCAN_THRESHOLD 3
#define SCAN_WAL_IDLE_DURATION 100 #define SCAN_WAL_IDLE_DURATION 100
@ -212,8 +213,10 @@ int32_t tqScanWalAsync(STQ* pTq, bool ckPause) {
int32_t vgId = TD_VID(pTq->pVnode); int32_t vgId = TD_VID(pTq->pVnode);
SStreamMeta* pMeta = pTq->pStreamMeta; SStreamMeta* pMeta = pTq->pStreamMeta;
bool alreadyRestored = pTq->pVnode->restored;
// do not launch the stream tasks, if it is a follower or not restored vnode. // do not launch the stream tasks, if it is a follower or not restored vnode.
if (!(vnodeIsRoleLeader(pTq->pVnode) && pTq->pVnode->restored)) { if (!(vnodeIsRoleLeader(pTq->pVnode) && alreadyRestored)) {
return TSDB_CODE_SUCCESS; return TSDB_CODE_SUCCESS;
} }
@ -255,7 +258,9 @@ int32_t tqScanWalAsync(STQ* pTq, bool ckPause) {
return -1; return -1;
} }
tqDebug("vgId:%d create msg to start wal scan to launch stream tasks, numOfTasks:%d", vgId, numOfTasks); tqDebug("vgId:%d create msg to start wal scan to launch stream tasks, numOfTasks:%d, restored:%d", vgId, numOfTasks,
alreadyRestored);
pRunReq->head.vgId = vgId; pRunReq->head.vgId = vgId;
pRunReq->streamId = 0; pRunReq->streamId = 0;
pRunReq->taskId = STREAM_EXEC_EXTRACT_DATA_IN_WAL_ID; pRunReq->taskId = STREAM_EXEC_EXTRACT_DATA_IN_WAL_ID;

View File

@ -104,7 +104,7 @@ static int32_t tsdbSaveFSToFile(STsdbFS *pFS, const char *fname) {
taosCalcChecksumAppend(0, pData, size); taosCalcChecksumAppend(0, pData, size);
// save to file // save to file
TdFilePtr pFD = taosOpenFile(fname, TD_FILE_WRITE | TD_FILE_CREATE | TD_FILE_TRUNC); TdFilePtr pFD = taosOpenFile(fname, TD_FILE_WRITE | TD_FILE_CREATE | TD_FILE_TRUNC | TD_FILE_WRITE_THROUGH);
if (pFD == NULL) { if (pFD == NULL) {
code = TAOS_SYSTEM_ERROR(errno); code = TAOS_SYSTEM_ERROR(errno);
TSDB_CHECK_CODE(code, lino, _exit); TSDB_CHECK_CODE(code, lino, _exit);

View File

@ -85,7 +85,7 @@ static int32_t save_json(const cJSON *json, const char *fname) {
char *data = cJSON_PrintUnformatted(json); char *data = cJSON_PrintUnformatted(json);
if (data == NULL) return TSDB_CODE_OUT_OF_MEMORY; if (data == NULL) return TSDB_CODE_OUT_OF_MEMORY;
TdFilePtr fp = taosOpenFile(fname, TD_FILE_WRITE | TD_FILE_CREATE | TD_FILE_TRUNC); TdFilePtr fp = taosOpenFile(fname, TD_FILE_WRITE | TD_FILE_CREATE | TD_FILE_TRUNC | TD_FILE_WRITE_THROUGH);
if (fp == NULL) { if (fp == NULL) {
code = TAOS_SYSTEM_ERROR(code); code = TAOS_SYSTEM_ERROR(code);
goto _exit; goto _exit;
@ -1186,11 +1186,6 @@ const char *gFSBgTaskName[] = {NULL, "MERGE", "RETENTION", "COMPACT"};
static int32_t tsdbFSRunBgTask(void *arg) { static int32_t tsdbFSRunBgTask(void *arg) {
STFSBgTask *task = (STFSBgTask *)arg; STFSBgTask *task = (STFSBgTask *)arg;
STFileSystem *fs = task->fs; STFileSystem *fs = task->fs;
STFileSet *fset;
tsdbFSGetFSet(fs, task->fid, &fset);
ASSERT(fset != NULL && fset->bgTaskRunning == task);
task->launchTime = taosGetTimestampMs(); task->launchTime = taosGetTimestampMs();
task->run(task->arg); task->run(task->arg);
@ -1203,6 +1198,10 @@ static int32_t tsdbFSRunBgTask(void *arg) {
taosThreadMutexLock(&fs->tsdb->mutex); taosThreadMutexLock(&fs->tsdb->mutex);
STFileSet *fset = NULL;
tsdbFSGetFSet(fs, task->fid, &fset);
ASSERT(fset != NULL && fset->bgTaskRunning == task);
// free last // free last
tsdbDoDoneBgTask(fs, task); tsdbDoDoneBgTask(fs, task);
fset->bgTaskRunning = NULL; fset->bgTaskRunning = NULL;

View File

@ -177,7 +177,7 @@ int32_t tsdbDFileRollback(STsdb *pTsdb, SDFileSet *pSet, EDataFileT ftype) {
taosCalcChecksumAppend(0, hdr, TSDB_FHDR_SIZE); taosCalcChecksumAppend(0, hdr, TSDB_FHDR_SIZE);
// open // open
pFD = taosOpenFile(fname, TD_FILE_WRITE); pFD = taosOpenFile(fname, TD_FILE_WRITE | TD_FILE_WRITE_THROUGH);
if (pFD == NULL) { if (pFD == NULL) {
code = TAOS_SYSTEM_ERROR(errno); code = TAOS_SYSTEM_ERROR(errno);
goto _err; goto _err;

View File

@ -572,8 +572,13 @@ static int32_t doLoadFileBlock(STsdbReader* pReader, SArray* pIndexList, SBlockN
if (isEmptyQueryTimeWindow(&w)) { if (isEmptyQueryTimeWindow(&w)) {
k += 1; k += 1;
if (k >= numOfTables) {
break;
} else {
continue; continue;
} }
}
// 1. time range check // 1. time range check
if (pRecord->firstKey > w.ekey || pRecord->lastKey < w.skey) { if (pRecord->firstKey > w.ekey || pRecord->lastKey < w.skey) {

View File

@ -177,7 +177,7 @@ int vnodeSaveInfo(const char *dir, const SVnodeInfo *pInfo) {
} }
// save info to a vnode_tmp.json // save info to a vnode_tmp.json
pFile = taosOpenFile(fname, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_TRUNC); pFile = taosOpenFile(fname, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_TRUNC | TD_FILE_WRITE_THROUGH);
if (pFile == NULL) { if (pFile == NULL) {
vError("failed to open info file:%s for write:%s", fname, terrstr()); vError("failed to open info file:%s for write:%s", fname, terrstr());
terrno = TAOS_SYSTEM_ERROR(errno); terrno = TAOS_SYSTEM_ERROR(errno);

View File

@ -293,6 +293,9 @@ typedef struct STableMergeScanInfo {
int32_t readIdx; int32_t readIdx;
SSDataBlock* pResBlock; SSDataBlock* pResBlock;
SSampleExecInfo sample; // sample execution info SSampleExecInfo sample; // sample execution info
SSHashObj* mTableNumRows; // uid->num of table rows
SHashObj* mSkipTables;
int64_t mergeLimit;
SSortExecInfo sortExecInfo; SSortExecInfo sortExecInfo;
} STableMergeScanInfo; } STableMergeScanInfo;

View File

@ -3200,6 +3200,27 @@ _error:
return NULL; return NULL;
} }
static int32_t tableMergeScanDoSkipTable(STableMergeScanInfo* pInfo, SSDataBlock* pBlock) {
int64_t nRows = 0;
void* pNum = tSimpleHashGet(pInfo->mTableNumRows, &pBlock->info.id.uid, sizeof(pBlock->info.id.uid));
if (pNum == NULL) {
nRows = pBlock->info.rows;
tSimpleHashPut(pInfo->mTableNumRows, &pBlock->info.id.uid, sizeof(pBlock->info.id.uid), &nRows, sizeof(nRows));
} else {
*(int64_t*)pNum = *(int64_t*)pNum + pBlock->info.rows;
}
if (nRows >= pInfo->mergeLimit) {
if (pInfo->mSkipTables == NULL) {
pInfo->mSkipTables = taosHashInit(pInfo->tableEndIndex - pInfo->tableStartIndex + 1,
taosGetDefaultHashFunction(TSDB_DATA_TYPE_UBIGINT), false, HASH_NO_LOCK);
}
int bSkip = 1;
taosHashPut(pInfo->mSkipTables, &pBlock->info.id.uid, sizeof(pBlock->info.id.uid), &bSkip, sizeof(bSkip));
}
return TSDB_CODE_SUCCESS;
}
static SSDataBlock* getBlockForTableMergeScan(void* param) { static SSDataBlock* getBlockForTableMergeScan(void* param) {
STableMergeScanSortSourceParam* source = param; STableMergeScanSortSourceParam* source = param;
SOperatorInfo* pOperator = source->pOperator; SOperatorInfo* pOperator = source->pOperator;
@ -3257,6 +3278,10 @@ static SSDataBlock* getBlockForTableMergeScan(void* param) {
pBlock->info.id.groupId = tableListGetTableGroupId(pInfo->base.pTableListInfo, pBlock->info.id.uid); pBlock->info.id.groupId = tableListGetTableGroupId(pInfo->base.pTableListInfo, pBlock->info.id.uid);
if (pInfo->mergeLimit != -1) {
tableMergeScanDoSkipTable(pInfo, pBlock);
}
pOperator->resultInfo.totalRows += pBlock->info.rows; pOperator->resultInfo.totalRows += pBlock->info.rows;
pInfo->base.readRecorder.elapsedTime += (taosGetTimestampUs() - st) / 1000.0; pInfo->base.readRecorder.elapsedTime += (taosGetTimestampUs() - st) / 1000.0;
@ -3316,22 +3341,20 @@ int32_t startGroupTableMergeScan(SOperatorInfo* pOperator) {
int32_t tableStartIdx = pInfo->tableStartIndex; int32_t tableStartIdx = pInfo->tableStartIndex;
int32_t tableEndIdx = pInfo->tableEndIndex; int32_t tableEndIdx = pInfo->tableEndIndex;
bool hasLimit = pInfo->limitInfo.limit.limit != -1 || pInfo->limitInfo.limit.offset != -1; tSimpleHashClear(pInfo->mTableNumRows);
int64_t mergeLimit = -1;
if (hasLimit) {
mergeLimit = pInfo->limitInfo.limit.limit + pInfo->limitInfo.limit.offset;
}
size_t szRow = blockDataGetRowSize(pInfo->pResBlock); size_t szRow = blockDataGetRowSize(pInfo->pResBlock);
if (hasLimit) { // if (pInfo->mergeLimit != -1) {
pInfo->pSortHandle = tsortCreateSortHandle(pInfo->pSortInfo, SORT_SINGLESOURCE_SORT, -1, -1, // pInfo->pSortHandle = tsortCreateSortHandle(pInfo->pSortInfo, SORT_SINGLESOURCE_SORT, -1, -1,
NULL, pTaskInfo->id.str, mergeLimit, szRow+8, tsPQSortMemThreshold * 1024* 1024); // NULL, pTaskInfo->id.str, pInfo->mergeLimit, szRow+8, tsPQSortMemThreshold * 1024* 1024);
} else { // } else
{
pInfo->sortBufSize = 2048 * pInfo->bufPageSize; pInfo->sortBufSize = 2048 * pInfo->bufPageSize;
int32_t numOfBufPage = pInfo->sortBufSize / pInfo->bufPageSize; int32_t numOfBufPage = pInfo->sortBufSize / pInfo->bufPageSize;
pInfo->pSortHandle = tsortCreateSortHandle(pInfo->pSortInfo, SORT_BLOCK_TS_MERGE, pInfo->bufPageSize, numOfBufPage, pInfo->pSortHandle = tsortCreateSortHandle(pInfo->pSortInfo, SORT_BLOCK_TS_MERGE, pInfo->bufPageSize, numOfBufPage,
pInfo->pSortInputBlock, pTaskInfo->id.str, 0, 0, 0); pInfo->pSortInputBlock, pTaskInfo->id.str, 0, 0, 0);
tsortSetMergeLimit(pInfo->pSortHandle, mergeLimit); tsortSetMergeLimit(pInfo->pSortHandle, pInfo->mergeLimit);
tsortSetAbortCheckFn(pInfo->pSortHandle, isTaskKilled, pOperator->pTaskInfo); tsortSetAbortCheckFn(pInfo->pSortHandle, isTaskKilled, pOperator->pTaskInfo);
} }
@ -3343,7 +3366,8 @@ int32_t startGroupTableMergeScan(SOperatorInfo* pOperator) {
STableMergeScanSortSourceParam *param = taosMemoryCalloc(1, sizeof(STableMergeScanSortSourceParam)); STableMergeScanSortSourceParam *param = taosMemoryCalloc(1, sizeof(STableMergeScanSortSourceParam));
param->pOperator = pOperator; param->pOperator = pOperator;
STableKeyInfo* startKeyInfo = tableListGetInfo(pInfo->base.pTableListInfo, tableStartIdx); STableKeyInfo* startKeyInfo = tableListGetInfo(pInfo->base.pTableListInfo, tableStartIdx);
pAPI->tsdReader.tsdReaderOpen(pHandle->vnode, &pInfo->base.cond, startKeyInfo, numOfTable, pInfo->pReaderBlock, (void**)&pInfo->base.dataReader, GET_TASKID(pTaskInfo), false, NULL); pAPI->tsdReader.tsdReaderOpen(pHandle->vnode, &pInfo->base.cond, startKeyInfo, numOfTable, pInfo->pReaderBlock,
(void**)&pInfo->base.dataReader, GET_TASKID(pTaskInfo), false, &pInfo->mSkipTables);
SSortSource* ps = taosMemoryCalloc(1, sizeof(SSortSource)); SSortSource* ps = taosMemoryCalloc(1, sizeof(SSortSource));
ps->param = param; ps->param = param;
@ -3385,6 +3409,8 @@ int32_t stopGroupTableMergeScan(SOperatorInfo* pOperator) {
pInfo->pSortHandle = NULL; pInfo->pSortHandle = NULL;
resetLimitInfoForNextGroup(&pInfo->limitInfo); resetLimitInfoForNextGroup(&pInfo->limitInfo);
taosHashCleanup(pInfo->mSkipTables);
pInfo->mSkipTables = NULL;
return TSDB_CODE_SUCCESS; return TSDB_CODE_SUCCESS;
} }
@ -3493,7 +3519,10 @@ void destroyTableMergeScanOperatorInfo(void* param) {
taosArrayDestroy(pTableScanInfo->sortSourceParams); taosArrayDestroy(pTableScanInfo->sortSourceParams);
tsortDestroySortHandle(pTableScanInfo->pSortHandle); tsortDestroySortHandle(pTableScanInfo->pSortHandle);
pTableScanInfo->pSortHandle = NULL; pTableScanInfo->pSortHandle = NULL;
tSimpleHashCleanup(pTableScanInfo->mTableNumRows);
pTableScanInfo->mTableNumRows = NULL;
taosHashCleanup(pTableScanInfo->mSkipTables);
pTableScanInfo->mSkipTables = NULL;
destroyTableScanBase(&pTableScanInfo->base, &pTableScanInfo->base.readerAPI); destroyTableScanBase(&pTableScanInfo->base, &pTableScanInfo->base.readerAPI);
pTableScanInfo->pResBlock = blockDataDestroy(pTableScanInfo->pResBlock); pTableScanInfo->pResBlock = blockDataDestroy(pTableScanInfo->pResBlock);
@ -3583,7 +3612,14 @@ SOperatorInfo* createTableMergeScanOperatorInfo(STableScanPhysiNode* pTableScanN
pInfo->pSortInfo = generateSortByTsInfo(pInfo->base.matchInfo.pList, pInfo->base.cond.order); pInfo->pSortInfo = generateSortByTsInfo(pInfo->base.matchInfo.pList, pInfo->base.cond.order);
pInfo->pSortInputBlock = createOneDataBlock(pInfo->pResBlock, false); pInfo->pSortInputBlock = createOneDataBlock(pInfo->pResBlock, false);
initLimitInfo(pTableScanNode->scan.node.pLimit, pTableScanNode->scan.node.pSlimit, &pInfo->limitInfo); initLimitInfo(pTableScanNode->scan.node.pLimit, pTableScanNode->scan.node.pSlimit, &pInfo->limitInfo);
pInfo->mTableNumRows = tSimpleHashInit(1024,
taosGetDefaultHashFunction(TSDB_DATA_TYPE_UBIGINT));
pInfo->mergeLimit = -1;
bool hasLimit = pInfo->limitInfo.limit.limit != -1 || pInfo->limitInfo.limit.offset != -1;
if (hasLimit) {
pInfo->mergeLimit = pInfo->limitInfo.limit.limit + pInfo->limitInfo.limit.offset;
pInfo->mSkipTables = NULL;
}
pInfo->pReaderBlock = createOneDataBlock(pInfo->pResBlock, false); pInfo->pReaderBlock = createOneDataBlock(pInfo->pResBlock, false);
int32_t rowSize = pInfo->pResBlock->info.rowSize; int32_t rowSize = pInfo->pResBlock->info.rowSize;

View File

@ -328,6 +328,7 @@ static int32_t sifInitParam(SNode *node, SIFParam *param, SIFCtx *ctx) {
SIF_ERR_RET(scalarGenerateSetFromList((void **)&param->pFilter, node, nl->node.resType.type)); SIF_ERR_RET(scalarGenerateSetFromList((void **)&param->pFilter, node, nl->node.resType.type));
if (taosHashPut(ctx->pRes, &node, POINTER_BYTES, param, sizeof(*param))) { if (taosHashPut(ctx->pRes, &node, POINTER_BYTES, param, sizeof(*param))) {
taosHashCleanup(param->pFilter); taosHashCleanup(param->pFilter);
param->pFilter = NULL;
indexError("taosHashPut nodeList failed, size:%d", (int32_t)sizeof(*param)); indexError("taosHashPut nodeList failed, size:%d", (int32_t)sizeof(*param));
SIF_ERR_RET(TSDB_CODE_OUT_OF_MEMORY); SIF_ERR_RET(TSDB_CODE_OUT_OF_MEMORY);
} }

View File

@ -1447,14 +1447,18 @@ static int32_t dataTypeComp(const SDataType* l, const SDataType* r) {
static EDealRes translateOperator(STranslateContext* pCxt, SOperatorNode* pOp) { static EDealRes translateOperator(STranslateContext* pCxt, SOperatorNode* pOp) {
if (isMultiResFunc(pOp->pLeft)) { if (isMultiResFunc(pOp->pLeft)) {
return generateDealNodeErrMsg(pCxt, TSDB_CODE_PAR_WRONG_VALUE_TYPE, ((SExprNode*)(pOp->pLeft))->aliasName); generateDealNodeErrMsg(pCxt, TSDB_CODE_PAR_WRONG_VALUE_TYPE, ((SExprNode*)(pOp->pLeft))->aliasName);
return DEAL_RES_ERROR;
} }
if (isMultiResFunc(pOp->pRight)) { if (isMultiResFunc(pOp->pRight)) {
return generateDealNodeErrMsg(pCxt, TSDB_CODE_PAR_WRONG_VALUE_TYPE, ((SExprNode*)(pOp->pRight))->aliasName); generateDealNodeErrMsg(pCxt, TSDB_CODE_PAR_WRONG_VALUE_TYPE, ((SExprNode*)(pOp->pRight))->aliasName);
return DEAL_RES_ERROR;
} }
if (TSDB_CODE_SUCCESS != scalarGetOperatorResultType(pOp)) { int32_t res = scalarGetOperatorResultType(pOp);
return generateDealNodeErrMsg(pCxt, TSDB_CODE_PAR_WRONG_VALUE_TYPE, pOp->node.aliasName); if (TSDB_CODE_SUCCESS != res) {
pCxt->errCode = res;
return DEAL_RES_ERROR;
} }
return DEAL_RES_CONTINUE; return DEAL_RES_CONTINUE;
@ -7972,7 +7976,7 @@ static int32_t translateDropStream(STranslateContext* pCxt, SDropStreamStmt* pSt
tNameGetFullDbName(&name, dropReq.name); tNameGetFullDbName(&name, dropReq.name);
dropReq.igNotExists = pStmt->ignoreNotExists; dropReq.igNotExists = pStmt->ignoreNotExists;
int32_t code = buildCmdMsg(pCxt, TDMT_MND_DROP_STREAM, (FSerializeFunc)tSerializeSMDropStreamReq, &dropReq); int32_t code = buildCmdMsg(pCxt, TDMT_MND_DROP_STREAM, (FSerializeFunc)tSerializeSMDropStreamReq, &dropReq);
tFreeSMDropStreamReq(&dropReq); tFreeMDropStreamReq(&dropReq);
return code; return code;
} }

View File

@ -208,6 +208,7 @@ void sclFreeParam(SScalarParam *param) {
if (param->columnData != NULL) { if (param->columnData != NULL) {
colDataDestroy(param->columnData); colDataDestroy(param->columnData);
taosMemoryFreeClear(param->columnData); taosMemoryFreeClear(param->columnData);
param->columnData = NULL;
} }
if (param->pHashFilter != NULL) { if (param->pHashFilter != NULL) {
@ -845,6 +846,7 @@ int32_t sclExecOperator(SOperatorNode *node, SScalarCtx *ctx, SScalarParam *outp
SScalarParam *params = NULL; SScalarParam *params = NULL;
int32_t rowNum = 0; int32_t rowNum = 0;
int32_t code = 0; int32_t code = 0;
int32_t paramNum = 0;
// json not support in in operator // json not support in in operator
if (nodeType(node->pLeft) == QUERY_NODE_VALUE) { if (nodeType(node->pLeft) == QUERY_NODE_VALUE) {
@ -865,7 +867,7 @@ int32_t sclExecOperator(SOperatorNode *node, SScalarCtx *ctx, SScalarParam *outp
_bin_scalar_fn_t OperatorFn = getBinScalarOperatorFn(node->opType); _bin_scalar_fn_t OperatorFn = getBinScalarOperatorFn(node->opType);
int32_t paramNum = scalarGetOperatorParamNum(node->opType); paramNum = scalarGetOperatorParamNum(node->opType);
SScalarParam *pLeft = &params[0]; SScalarParam *pLeft = &params[0];
SScalarParam *pRight = paramNum > 1 ? &params[1] : NULL; SScalarParam *pRight = paramNum > 1 ? &params[1] : NULL;

View File

@ -75,7 +75,8 @@ struct STokenBucket {
double quotaCapacity; // available capacity for maximum input size, KiloBytes per Second double quotaCapacity; // available capacity for maximum input size, KiloBytes per Second
double quotaRemain; // not consumed bytes per second double quotaRemain; // not consumed bytes per second
double quotaRate; // number of token per second double quotaRate; // number of token per second
int64_t fillTimestamp; // fill timestamp int64_t tokenFillTimestamp; // fill timestamp
int64_t quotaFillTimestamp; // fill timestamp
}; };
struct SStreamQueue { struct SStreamQueue {

View File

@ -283,7 +283,6 @@ int32_t streamProcessDispatchMsg(SStreamTask* pTask, SStreamDispatchReq* pReq, S
tmsgSendRsp(pRsp); tmsgSendRsp(pRsp);
} }
tDeleteStreamDispatchReq(pReq);
streamSchedExec(pTask); streamSchedExec(pTask);
return 0; return 0;

View File

@ -519,6 +519,7 @@ void* streamBackendInit(const char* streamPath, int64_t chkpId, int32_t vgId) {
if (err != NULL) { if (err != NULL) {
stError("failed to open rocksdb, path:%s, reason:%s", backendPath, err); stError("failed to open rocksdb, path:%s, reason:%s", backendPath, err);
taosMemoryFreeClear(err); taosMemoryFreeClear(err);
rocksdb_list_column_families_destroy(cfs, nCf);
goto _EXIT; goto _EXIT;
} }
} else { } else {

View File

@ -158,6 +158,7 @@ static int32_t continueDispatchCheckpointBlock(SStreamDataBlock* pBlock, SStream
int32_t code = taosWriteQitem(pTask->outputq.queue->pQueue, pBlock); int32_t code = taosWriteQitem(pTask->outputq.queue->pQueue, pBlock);
if (code == 0) { if (code == 0) {
ASSERT(pTask->chkInfo.dispatchCheckpointTrigger == false);
streamDispatchStreamBlock(pTask); streamDispatchStreamBlock(pTask);
} else { } else {
stError("s-task:%s failed to put checkpoint into outputQ, code:%s", pTask->id.idStr, tstrerror(code)); stError("s-task:%s failed to put checkpoint into outputQ, code:%s", pTask->id.idStr, tstrerror(code));
@ -278,6 +279,7 @@ void streamTaskClearCheckInfo(SStreamTask* pTask) {
pTask->chkInfo.startTs = 0; // clear the recorded start time pTask->chkInfo.startTs = 0; // clear the recorded start time
pTask->checkpointNotReadyTasks = 0; pTask->checkpointNotReadyTasks = 0;
pTask->checkpointAlignCnt = 0; pTask->checkpointAlignCnt = 0;
pTask->chkInfo.dispatchCheckpointTrigger = false;
streamTaskOpenAllUpstreamInput(pTask); // open inputQ for all upstream tasks streamTaskOpenAllUpstreamInput(pTask); // open inputQ for all upstream tasks
} }

View File

@ -371,7 +371,7 @@ static int32_t doBuildDispatchMsg(SStreamTask* pTask, const SStreamDataBlock* pD
pTask->msgInfo.pData = pReqs; pTask->msgInfo.pData = pReqs;
} }
stDebug("s-task:%s build dispatch msg success, msgId:%d", pTask->id.idStr, pTask->execInfo.dispatch); stDebug("s-task:%s build dispatch msg success, msgId:%d, stage:%" PRId64, pTask->id.idStr, pTask->execInfo.dispatch, pTask->pMeta->stage);
return code; return code;
} }
@ -593,6 +593,12 @@ int32_t streamDispatchStreamBlock(SStreamTask* pTask) {
return 0; return 0;
} }
if (pTask->chkInfo.dispatchCheckpointTrigger) {
stDebug("s-task:%s already send checkpoint trigger, not dispatch anymore", id);
atomic_store_8(&pTask->outputq.status, TASK_OUTPUT_STATUS__NORMAL);
return 0;
}
ASSERT(pTask->msgInfo.pData == NULL); ASSERT(pTask->msgInfo.pData == NULL);
stDebug("s-task:%s start to dispatch msg, set output status:%d", id, pTask->outputq.status); stDebug("s-task:%s start to dispatch msg, set output status:%d", id, pTask->outputq.status);
@ -926,8 +932,8 @@ int32_t streamAddCheckpointReadyMsg(SStreamTask* pTask, int32_t upstreamTaskId,
initRpcMsg(&info.msg, TDMT_STREAM_TASK_CHECKPOINT_READY, buf, tlen + sizeof(SMsgHead)); initRpcMsg(&info.msg, TDMT_STREAM_TASK_CHECKPOINT_READY, buf, tlen + sizeof(SMsgHead));
info.msg.info.noResp = 1; // refactor later. info.msg.info.noResp = 1; // refactor later.
stDebug("s-task:%s (level:%d) prepare checkpoint ready msg to upstream s-task:0x%" PRIx64 ":0x%x (vgId:%d) idx:%d", stDebug("s-task:%s (level:%d) prepare checkpoint ready msg to upstream s-task:0x%" PRIx64 ":0x%x (vgId:%d) idx:%d, vgId:%d",
pTask->id.idStr, pTask->info.taskLevel, req.streamId, req.upstreamTaskId, req.upstreamNodeId, index); pTask->id.idStr, pTask->info.taskLevel, req.streamId, req.upstreamTaskId, req.upstreamNodeId, index, req.upstreamNodeId);
if (pTask->pReadyMsgList == NULL) { if (pTask->pReadyMsgList == NULL) {
pTask->pReadyMsgList = taosArrayInit(4, sizeof(SStreamChkptReadyInfo)); pTask->pReadyMsgList = taosArrayInit(4, sizeof(SStreamChkptReadyInfo));
@ -1039,30 +1045,14 @@ int32_t streamNotifyUpstreamContinue(SStreamTask* pTask) {
return 0; return 0;
} }
static void dispatchDataInFuture(void* param, void* tmrId) {
SStreamTask* pTask = param;
if (streamTaskShouldStop(pTask)) {
int32_t ref = atomic_sub_fetch_32(&pTask->status.timerActive, 1);
stDebug("s-task:%s should stop, abort from timer, ref:%d", pTask->id.idStr, ref);
return;
}
ETaskStatus status = streamTaskGetStatus(pTask, NULL);
if (status == TASK_STATUS__CK) {
stDebug("s-task:%s in checkpoint status, wait for 500ms to dispatch data downstream", pTask->id.idStr);
taosTmrReset(dispatchDataInFuture, 500, pTask, streamEnv.timer, &pTask->msgInfo.pTimer);
} else {
int32_t ref = atomic_sub_fetch_32(&pTask->status.timerActive, 1);
stDebug("s-task:%s start to dispatch data, jump out of timer, ref:%d", pTask->id.idStr, ref);
streamDispatchStreamBlock(pTask);
}
}
// this message has been sent successfully, let's try next one. // this message has been sent successfully, let's try next one.
static int32_t handleDispatchSuccessRsp(SStreamTask* pTask, int32_t downstreamId) { static int32_t handleDispatchSuccessRsp(SStreamTask* pTask, int32_t downstreamId) {
destroyDispatchMsg(pTask->msgInfo.pData, getNumOfDispatchBranch(pTask)); destroyDispatchMsg(pTask->msgInfo.pData, getNumOfDispatchBranch(pTask));
bool delayDispatch = (pTask->msgInfo.dispatchMsgType == STREAM_INPUT__CHECKPOINT_TRIGGER); bool delayDispatch = (pTask->msgInfo.dispatchMsgType == STREAM_INPUT__CHECKPOINT_TRIGGER);
if (delayDispatch) {
pTask->chkInfo.dispatchCheckpointTrigger = true;
}
pTask->msgInfo.pData = NULL; pTask->msgInfo.pData = NULL;
pTask->msgInfo.dispatchMsgType = 0; pTask->msgInfo.dispatchMsgType = 0;
@ -1083,13 +1073,7 @@ static int32_t handleDispatchSuccessRsp(SStreamTask* pTask, int32_t downstreamId
// otherwise, continue dispatch the first block to down stream task in pipeline // otherwise, continue dispatch the first block to down stream task in pipeline
if (delayDispatch) { if (delayDispatch) {
int32_t ref = atomic_add_fetch_32(&pTask->status.timerActive, 1); return 0;
stDebug("s-task:%s in checkpoint status, add in timer, try dispatch data in 500ms, ref:%d", pTask->id.idStr, ref);
if (pTask->msgInfo.pTimer != NULL) {
taosTmrReset(dispatchDataInFuture, 500, pTask, streamEnv.timer, &pTask->msgInfo.pTimer);
} else {
pTask->msgInfo.pTimer = taosTmrStart(dispatchDataInFuture, 500, pTask, streamEnv.timer);
}
} else { } else {
streamDispatchStreamBlock(pTask); streamDispatchStreamBlock(pTask);
} }
@ -1102,6 +1086,13 @@ int32_t streamProcessDispatchRsp(SStreamTask* pTask, SStreamDispatchRsp* pRsp, i
int32_t vgId = pTask->pMeta->vgId; int32_t vgId = pTask->pMeta->vgId;
int32_t msgId = pTask->execInfo.dispatch; int32_t msgId = pTask->execInfo.dispatch;
#if 0
// for test purpose, build the failure case
if (pTask->msgInfo.dispatchMsgType == STREAM_INPUT__CHECKPOINT_TRIGGER) {
pRsp->inputStatus = TASK_INPUT_STATUS__REFUSED;
}
#endif
// follower not handle the dispatch rsp // follower not handle the dispatch rsp
if ((pTask->pMeta->role == NODE_ROLE_FOLLOWER) || (pTask->status.downstreamReady != 1)) { if ((pTask->pMeta->role == NODE_ROLE_FOLLOWER) || (pTask->status.downstreamReady != 1)) {
stError("s-task:%s vgId:%d is follower or task just re-launched, not handle the dispatch rsp, discard it", id, vgId); stError("s-task:%s vgId:%d is follower or task just re-launched, not handle the dispatch rsp, discard it", id, vgId);
@ -1143,10 +1134,23 @@ int32_t streamProcessDispatchRsp(SStreamTask* pTask, SStreamDispatchRsp* pRsp, i
stWarn("s-task:%s inputQ of downstream task:0x%x(vgId:%d) is full, wait for %dms and retry dispatch", id, stWarn("s-task:%s inputQ of downstream task:0x%x(vgId:%d) is full, wait for %dms and retry dispatch", id,
pRsp->downstreamTaskId, pRsp->downstreamNodeId, DISPATCH_RETRY_INTERVAL_MS); pRsp->downstreamTaskId, pRsp->downstreamNodeId, DISPATCH_RETRY_INTERVAL_MS);
} else if (pRsp->inputStatus == TASK_INPUT_STATUS__REFUSED) { } else if (pRsp->inputStatus == TASK_INPUT_STATUS__REFUSED) {
// todo handle the agg task failure, add test case
if (pTask->msgInfo.dispatchMsgType == STREAM_INPUT__CHECKPOINT_TRIGGER &&
pTask->info.taskLevel == TASK_LEVEL__SOURCE) {
stError("s-task:%s failed to dispatch checkpoint-trigger msg, checkpointId:%" PRId64
", set the current checkpoint failed, and send rsp to mnode",
id, pTask->checkpointingId);
{ // send checkpoint failure msg to mnode directly
pTask->chkInfo.failedId = pTask->checkpointingId; // record the latest failed checkpoint id
pTask->checkpointingId = pTask->checkpointingId;
streamTaskSendCheckpointSourceRsp(pTask);
}
} else {
stError("s-task:%s downstream task:0x%x(vgId:%d) refused the dispatch msg, treat it as success", id, stError("s-task:%s downstream task:0x%x(vgId:%d) refused the dispatch msg, treat it as success", id,
pRsp->downstreamTaskId, pRsp->downstreamNodeId); pRsp->downstreamTaskId, pRsp->downstreamNodeId);
} }
} }
}
int32_t leftRsp = 0; int32_t leftRsp = 0;
if (pTask->outputInfo.type == TASK_OUTPUT__SHUFFLE_DISPATCH) { if (pTask->outputInfo.type == TASK_OUTPUT__SHUFFLE_DISPATCH) {

View File

@ -48,6 +48,7 @@ static int32_t doOutputResultBlockImpl(SStreamTask* pTask, SStreamDataBlock* pBl
return code; return code;
} }
// checkpoint trigger will be checked
streamDispatchStreamBlock(pTask); streamDispatchStreamBlock(pTask);
} }

View File

@ -174,6 +174,7 @@ SStreamMeta* streamMetaOpen(const char* path, void* ahandle, FTaskExpand expandF
pMeta->ahandle = ahandle; pMeta->ahandle = ahandle;
pMeta->expandFunc = expandFunc; pMeta->expandFunc = expandFunc;
pMeta->stage = stage; pMeta->stage = stage;
pMeta->role = (vgId == SNODE_HANDLE) ? NODE_ROLE_LEADER : NODE_ROLE_UNINIT;
// send heartbeat every 5sec. // send heartbeat every 5sec.
pMeta->rid = taosAddRef(streamMetaId, pMeta); pMeta->rid = taosAddRef(streamMetaId, pMeta);
@ -204,7 +205,6 @@ SStreamMeta* streamMetaOpen(const char* path, void* ahandle, FTaskExpand expandF
} }
pMeta->streamBackendRid = taosAddRef(streamBackendId, pMeta->streamBackend); pMeta->streamBackendRid = taosAddRef(streamBackendId, pMeta->streamBackend);
pMeta->role = NODE_ROLE_UNINIT;
code = streamBackendLoadCheckpointInfo(pMeta); code = streamBackendLoadCheckpointInfo(pMeta);
taosInitRWLatch(&pMeta->lock); taosInitRWLatch(&pMeta->lock);
@ -784,7 +784,7 @@ int32_t tEncodeStreamHbMsg(SEncoder* pEncoder, const SStreamHbMsg* pReq) {
if (tEncodeI64(pEncoder, ps->id.streamId) < 0) return -1; if (tEncodeI64(pEncoder, ps->id.streamId) < 0) return -1;
if (tEncodeI32(pEncoder, ps->id.taskId) < 0) return -1; if (tEncodeI32(pEncoder, ps->id.taskId) < 0) return -1;
if (tEncodeI32(pEncoder, ps->status) < 0) return -1; if (tEncodeI32(pEncoder, ps->status) < 0) return -1;
if (tEncodeI32(pEncoder, ps->stage) < 0) return -1; if (tEncodeI64(pEncoder, ps->stage) < 0) return -1;
if (tEncodeI32(pEncoder, ps->nodeId) < 0) return -1; if (tEncodeI32(pEncoder, ps->nodeId) < 0) return -1;
if (tEncodeDouble(pEncoder, ps->inputQUsed) < 0) return -1; if (tEncodeDouble(pEncoder, ps->inputQUsed) < 0) return -1;
if (tEncodeDouble(pEncoder, ps->inputRate) < 0) return -1; if (tEncodeDouble(pEncoder, ps->inputRate) < 0) return -1;
@ -822,7 +822,7 @@ int32_t tDecodeStreamHbMsg(SDecoder* pDecoder, SStreamHbMsg* pReq) {
if (tDecodeI64(pDecoder, &entry.id.streamId) < 0) return -1; if (tDecodeI64(pDecoder, &entry.id.streamId) < 0) return -1;
if (tDecodeI32(pDecoder, &taskId) < 0) return -1; if (tDecodeI32(pDecoder, &taskId) < 0) return -1;
if (tDecodeI32(pDecoder, &entry.status) < 0) return -1; if (tDecodeI32(pDecoder, &entry.status) < 0) return -1;
if (tDecodeI32(pDecoder, &entry.stage) < 0) return -1; if (tDecodeI64(pDecoder, &entry.stage) < 0) return -1;
if (tDecodeI32(pDecoder, &entry.nodeId) < 0) return -1; if (tDecodeI32(pDecoder, &entry.nodeId) < 0) return -1;
if (tDecodeDouble(pDecoder, &entry.inputQUsed) < 0) return -1; if (tDecodeDouble(pDecoder, &entry.inputQUsed) < 0) return -1;
if (tDecodeDouble(pDecoder, &entry.inputRate) < 0) return -1; if (tDecodeDouble(pDecoder, &entry.inputRate) < 0) return -1;
@ -938,7 +938,7 @@ void metaHbToMnode(void* param, void* tmrId) {
SStreamHbMsg hbMsg = {0}; SStreamHbMsg hbMsg = {0};
SEpSet epset = {0}; SEpSet epset = {0};
bool hasMnodeEpset = false; bool hasMnodeEpset = false;
int32_t stage = 0; int64_t stage = 0;
streamMetaRLock(pMeta); streamMetaRLock(pMeta);
@ -1117,11 +1117,6 @@ void streamMetaStartHb(SStreamMeta* pMeta) {
metaHbToMnode(pRid, NULL); metaHbToMnode(pRid, NULL);
} }
void streamMetaInitForSnode(SStreamMeta* pMeta) {
pMeta->stage = 0;
pMeta->role = NODE_ROLE_LEADER;
}
void streamMetaResetStartInfo(STaskStartInfo* pStartInfo) { void streamMetaResetStartInfo(STaskStartInfo* pStartInfo) {
taosHashClear(pStartInfo->pReadyTaskSet); taosHashClear(pStartInfo->pReadyTaskSet);
taosHashClear(pStartInfo->pFailedTaskSet); taosHashClear(pStartInfo->pFailedTaskSet);

View File

@ -388,32 +388,36 @@ int32_t streamTaskInitTokenBucket(STokenBucket* pBucket, int32_t numCap, int32_t
pBucket->quotaCapacity = quotaRate * MAX_SMOOTH_BURST_RATIO; pBucket->quotaCapacity = quotaRate * MAX_SMOOTH_BURST_RATIO;
pBucket->quotaRemain = pBucket->quotaCapacity; pBucket->quotaRemain = pBucket->quotaCapacity;
pBucket->fillTimestamp = taosGetTimestampMs(); pBucket->tokenFillTimestamp = taosGetTimestampMs();
pBucket->quotaFillTimestamp = taosGetTimestampMs();
stDebug("s-task:%s sink quotaRate:%.2fMiB, numRate:%d", id, quotaRate, numRate); stDebug("s-task:%s sink quotaRate:%.2fMiB, numRate:%d", id, quotaRate, numRate);
return TSDB_CODE_SUCCESS; return TSDB_CODE_SUCCESS;
} }
static void fillTokenBucket(STokenBucket* pBucket, const char* id) { static void fillTokenBucket(STokenBucket* pBucket, const char* id) {
int64_t now = taosGetTimestampMs(); int64_t now = taosGetTimestampMs();
int64_t delta = now - pBucket->fillTimestamp;
int64_t deltaToken = now - pBucket->tokenFillTimestamp;
ASSERT(pBucket->numOfToken >= 0); ASSERT(pBucket->numOfToken >= 0);
int32_t incNum = (delta / 1000.0) * pBucket->numRate; int32_t incNum = (deltaToken / 1000.0) * pBucket->numRate;
if (incNum > 0) { if (incNum > 0) {
pBucket->numOfToken = TMIN(pBucket->numOfToken + incNum, pBucket->numCapacity); pBucket->numOfToken = TMIN(pBucket->numOfToken + incNum, pBucket->numCapacity);
pBucket->fillTimestamp = now; pBucket->tokenFillTimestamp = now;
} }
// increase the new available quota as time goes on // increase the new available quota as time goes on
double incSize = (delta / 1000.0) * pBucket->quotaRate; int64_t deltaQuota = now - pBucket->quotaFillTimestamp;
double incSize = (deltaQuota / 1000.0) * pBucket->quotaRate;
if (incSize > 0) { if (incSize > 0) {
pBucket->quotaRemain = TMIN(pBucket->quotaRemain + incSize, pBucket->quotaCapacity); pBucket->quotaRemain = TMIN(pBucket->quotaRemain + incSize, pBucket->quotaCapacity);
pBucket->fillTimestamp = now; pBucket->quotaFillTimestamp = now;
} }
if (incNum > 0 || incSize > 0) { if (incNum > 0 || incSize > 0) {
stTrace("token/quota available, token:%d inc:%d, quota:%.2fMiB inc:%.3fMiB, ts:%" PRId64 " idle:%" PRId64 "ms, %s", stTrace("token/quota available, token:%d inc:%d, token_TsDelta:%" PRId64
pBucket->numOfToken, incNum, pBucket->quotaRemain, incSize, now, delta, id); ", quota:%.2fMiB inc:%.3fMiB quotaTs:%" PRId64 " now:%" PRId64 "ms, %s",
pBucket->numOfToken, incNum, deltaToken, pBucket->quotaRemain, incSize, deltaQuota, now, id);
} }
} }

View File

@ -168,7 +168,6 @@ int32_t streamTaskStartScanHistory(SStreamTask* pTask) {
} else if (level == TASK_LEVEL__AGG) { } else if (level == TASK_LEVEL__AGG) {
if (pTask->info.fillHistory) { if (pTask->info.fillHistory) {
streamSetParamForScanHistory(pTask); streamSetParamForScanHistory(pTask);
streamTaskEnablePause(pTask);
} }
} else if (level == TASK_LEVEL__SINK) { } else if (level == TASK_LEVEL__SINK) {
stDebug("s-task:%s sink task do nothing to handle scan-history", pTask->id.idStr); stDebug("s-task:%s sink task do nothing to handle scan-history", pTask->id.idStr);
@ -290,10 +289,11 @@ static void recheckDownstreamTasks(void* param, void* tmrId) {
stDebug("s-task:%s complete send check in timer, ref:%d", pTask->id.idStr, ref); stDebug("s-task:%s complete send check in timer, ref:%d", pTask->id.idStr, ref);
} }
int32_t streamTaskCheckStatus(SStreamTask* pTask, int32_t upstreamTaskId, int32_t vgId, int64_t stage) { int32_t streamTaskCheckStatus(SStreamTask* pTask, int32_t upstreamTaskId, int32_t vgId, int64_t stage, int64_t* oldStage) {
SStreamChildEpInfo* pInfo = streamTaskGetUpstreamTaskEpInfo(pTask, upstreamTaskId); SStreamChildEpInfo* pInfo = streamTaskGetUpstreamTaskEpInfo(pTask, upstreamTaskId);
ASSERT(pInfo != NULL); ASSERT(pInfo != NULL);
*oldStage = pInfo->stage;
const char* id = pTask->id.idStr; const char* id = pTask->id.idStr;
if (stage == -1) { if (stage == -1) {
stDebug("s-task:%s receive check msg from upstream task:0x%x(vgId:%d), invalid stageId:%" PRId64 ", not ready", id, stDebug("s-task:%s receive check msg from upstream task:0x%x(vgId:%d), invalid stageId:%" PRId64 ", not ready", id,
@ -345,7 +345,6 @@ int32_t onNormalTaskReady(SStreamTask* pTask) {
stDebug("s-task:%s level:%d status:%s sched-status:%d", id, pTask->info.taskLevel, p, pTask->status.schedStatus); stDebug("s-task:%s level:%d status:%s sched-status:%d", id, pTask->info.taskLevel, p, pTask->status.schedStatus);
} }
streamTaskEnablePause(pTask);
return TSDB_CODE_SUCCESS; return TSDB_CODE_SUCCESS;
} }
@ -459,9 +458,9 @@ int32_t streamProcessCheckRsp(SStreamTask* pTask, const SStreamTaskCheckRsp* pRs
if (pRsp->status == TASK_UPSTREAM_NEW_STAGE || pRsp->status == TASK_DOWNSTREAM_NOT_LEADER) { if (pRsp->status == TASK_UPSTREAM_NEW_STAGE || pRsp->status == TASK_DOWNSTREAM_NOT_LEADER) {
if (pRsp->status == TASK_UPSTREAM_NEW_STAGE) { if (pRsp->status == TASK_UPSTREAM_NEW_STAGE) {
stError( stError(
"s-task:%s vgId:%d self vnode-transfer/leader-change/restart detected, old stage:%d, current stage:%d, " "s-task:%s vgId:%d self vnode-transfer/leader-change/restart detected, old stage:%"PRId64", current stage:%"PRId64", "
"not check wait for downstream task nodeUpdate, and all tasks restart", "not check wait for downstream task nodeUpdate, and all tasks restart",
id, pRsp->upstreamNodeId, pRsp->oldStage, (int32_t)pTask->pMeta->stage); id, pRsp->upstreamNodeId, pRsp->oldStage, pTask->pMeta->stage);
} else { } else {
stError( stError(
"s-task:%s downstream taskId:0x%x (vgId:%d) not leader, self dispatch epset needs to be updated, not check " "s-task:%s downstream taskId:0x%x (vgId:%d) not leader, self dispatch epset needs to be updated, not check "
@ -476,7 +475,7 @@ int32_t streamProcessCheckRsp(SStreamTask* pTask, const SStreamTaskCheckRsp* pRs
STaskRecheckInfo* pInfo = createRecheckInfo(pTask, pRsp); STaskRecheckInfo* pInfo = createRecheckInfo(pTask, pRsp);
int32_t ref = atomic_add_fetch_32(&pTask->status.timerActive, 1); int32_t ref = atomic_add_fetch_32(&pTask->status.timerActive, 1);
stDebug("s-task:%s downstream taskId:0x%x (vgId:%d) not ready, stage:%d, retry in 100ms, ref:%d ", id, stDebug("s-task:%s downstream taskId:0x%x (vgId:%d) not ready, stage:%"PRId64", retry in 100ms, ref:%d ", id,
pRsp->downstreamTaskId, pRsp->downstreamNodeId, pRsp->oldStage, ref); pRsp->downstreamTaskId, pRsp->downstreamNodeId, pRsp->oldStage, ref);
pInfo->checkTimer = taosTmrStart(recheckDownstreamTasks, CHECK_DOWNSTREAM_INTERVAL, pInfo, streamEnv.timer); pInfo->checkTimer = taosTmrStart(recheckDownstreamTasks, CHECK_DOWNSTREAM_INTERVAL, pInfo, streamEnv.timer);
} }
@ -659,9 +658,6 @@ int32_t streamProcessScanHistoryFinishRsp(SStreamTask* pTask) {
streamMetaCommit(pMeta); streamMetaCommit(pMeta);
streamMetaWUnLock(pMeta); streamMetaWUnLock(pMeta);
// history data scan in the stream time window finished, now let's enable the pause
streamTaskEnablePause(pTask);
// for source tasks, let's continue execute. // for source tasks, let's continue execute.
if (pTask->info.taskLevel == TASK_LEVEL__SOURCE) { if (pTask->info.taskLevel == TASK_LEVEL__SOURCE) {
streamSchedExec(pTask); streamSchedExec(pTask);
@ -926,7 +922,7 @@ int32_t tEncodeStreamTaskCheckRsp(SEncoder* pEncoder, const SStreamTaskCheckRsp*
if (tEncodeI32(pEncoder, pRsp->downstreamNodeId) < 0) return -1; if (tEncodeI32(pEncoder, pRsp->downstreamNodeId) < 0) return -1;
if (tEncodeI32(pEncoder, pRsp->downstreamTaskId) < 0) return -1; if (tEncodeI32(pEncoder, pRsp->downstreamTaskId) < 0) return -1;
if (tEncodeI32(pEncoder, pRsp->childId) < 0) return -1; if (tEncodeI32(pEncoder, pRsp->childId) < 0) return -1;
if (tEncodeI32(pEncoder, pRsp->oldStage) < 0) return -1; if (tEncodeI64(pEncoder, pRsp->oldStage) < 0) return -1;
if (tEncodeI8(pEncoder, pRsp->status) < 0) return -1; if (tEncodeI8(pEncoder, pRsp->status) < 0) return -1;
tEndEncode(pEncoder); tEndEncode(pEncoder);
return pEncoder->pos; return pEncoder->pos;
@ -941,7 +937,7 @@ int32_t tDecodeStreamTaskCheckRsp(SDecoder* pDecoder, SStreamTaskCheckRsp* pRsp)
if (tDecodeI32(pDecoder, &pRsp->downstreamNodeId) < 0) return -1; if (tDecodeI32(pDecoder, &pRsp->downstreamNodeId) < 0) return -1;
if (tDecodeI32(pDecoder, &pRsp->downstreamTaskId) < 0) return -1; if (tDecodeI32(pDecoder, &pRsp->downstreamTaskId) < 0) return -1;
if (tDecodeI32(pDecoder, &pRsp->childId) < 0) return -1; if (tDecodeI32(pDecoder, &pRsp->childId) < 0) return -1;
if (tDecodeI32(pDecoder, &pRsp->oldStage) < 0) return -1; if (tDecodeI64(pDecoder, &pRsp->oldStage) < 0) return -1;
if (tDecodeI8(pDecoder, &pRsp->status) < 0) return -1; if (tDecodeI8(pDecoder, &pRsp->status) < 0) return -1;
tEndDecode(pDecoder); tEndDecode(pDecoder);
return 0; return 0;
@ -1040,11 +1036,6 @@ void streamTaskResume(SStreamTask* pTask) {
} }
} }
void streamTaskEnablePause(SStreamTask* pTask) {
stDebug("s-task:%s enable task pause", pTask->id.idStr);
pTask->status.pauseAllowed = 1;
}
static void displayStatusInfo(SStreamMeta* pMeta, SHashObj* pTaskSet, bool succ) { static void displayStatusInfo(SStreamMeta* pMeta, SHashObj* pTaskSet, bool succ) {
int32_t vgId = pMeta->vgId; int32_t vgId = pMeta->vgId;
void* pIter = NULL; void* pIter = NULL;

View File

@ -1086,7 +1086,6 @@ _end:
} }
int32_t streamStatePutParName(SStreamState* pState, int64_t groupId, const char tbname[TSDB_TABLE_NAME_LEN]) { int32_t streamStatePutParName(SStreamState* pState, int64_t groupId, const char tbname[TSDB_TABLE_NAME_LEN]) {
stDebug("try to write to cf parname");
#ifdef USE_ROCKSDB #ifdef USE_ROCKSDB
if (tSimpleHashGetSize(pState->parNameMap) > MAX_TABLE_NAME_NUM) { if (tSimpleHashGetSize(pState->parNameMap) > MAX_TABLE_NAME_NUM) {
if (tSimpleHashGet(pState->parNameMap, &groupId, sizeof(int64_t)) == NULL) { if (tSimpleHashGet(pState->parNameMap, &groupId, sizeof(int64_t)) == NULL) {

View File

@ -269,6 +269,7 @@ int32_t streamTaskHandleEvent(SStreamTaskSM* pSM, EStreamTaskEvent event) {
pTask->id.idStr, pSM->current.name, GET_EVT_NAME(evt)); pTask->id.idStr, pSM->current.name, GET_EVT_NAME(evt));
taosMsleep(100); taosMsleep(100);
} else { } else {
// no active event trans exists, handle this event directly
pTrans = streamTaskFindTransform(pSM->current.state, event); pTrans = streamTaskFindTransform(pSM->current.state, event);
if (pTrans == NULL) { if (pTrans == NULL) {
stDebug("s-task:%s failed to handle event:%s", pTask->id.idStr, GET_EVT_NAME(event)); stDebug("s-task:%s failed to handle event:%s", pTask->id.idStr, GET_EVT_NAME(event));
@ -451,60 +452,43 @@ int32_t initStateTransferTable() {
return TSDB_CODE_SUCCESS; return TSDB_CODE_SUCCESS;
} }
//clang-format off
void doInitStateTransferTable(void) { void doInitStateTransferTable(void) {
streamTaskSMTrans = taosArrayInit(8, sizeof(STaskStateTrans)); streamTaskSMTrans = taosArrayInit(8, sizeof(STaskStateTrans));
// initialization event handle // initialization event handle
STaskStateTrans trans = createStateTransform(TASK_STATUS__UNINIT, TASK_STATUS__READY, TASK_EVENT_INIT, STaskStateTrans trans = createStateTransform(TASK_STATUS__UNINIT, TASK_STATUS__READY, TASK_EVENT_INIT, streamTaskInitStatus, onNormalTaskReady, false, false);
streamTaskInitStatus, onNormalTaskReady, false, false);
taosArrayPush(streamTaskSMTrans, &trans); taosArrayPush(streamTaskSMTrans, &trans);
trans = createStateTransform(TASK_STATUS__UNINIT, TASK_STATUS__SCAN_HISTORY, TASK_EVENT_INIT_SCANHIST, streamTaskInitStatus, onScanhistoryTaskReady, false, false);
trans = createStateTransform(TASK_STATUS__UNINIT, TASK_STATUS__SCAN_HISTORY, TASK_EVENT_INIT_SCANHIST,
streamTaskInitStatus, onScanhistoryTaskReady, false, false);
taosArrayPush(streamTaskSMTrans, &trans); taosArrayPush(streamTaskSMTrans, &trans);
trans = createStateTransform(TASK_STATUS__UNINIT, TASK_STATUS__STREAM_SCAN_HISTORY, TASK_EVENT_INIT_STREAM_SCANHIST, streamTaskInitStatus, onScanhistoryTaskReady, false, false);
trans = createStateTransform(TASK_STATUS__UNINIT, TASK_STATUS__STREAM_SCAN_HISTORY, TASK_EVENT_INIT_STREAM_SCANHIST,
streamTaskInitStatus, onScanhistoryTaskReady, false, false);
taosArrayPush(streamTaskSMTrans, &trans); taosArrayPush(streamTaskSMTrans, &trans);
// scan-history related event // scan-history related event
trans = createStateTransform(TASK_STATUS__SCAN_HISTORY, TASK_STATUS__READY, TASK_EVENT_SCANHIST_DONE, NULL, NULL, trans = createStateTransform(TASK_STATUS__SCAN_HISTORY, TASK_STATUS__READY, TASK_EVENT_SCANHIST_DONE, NULL, NULL, NULL, true);
NULL, true);
taosArrayPush(streamTaskSMTrans, &trans); taosArrayPush(streamTaskSMTrans, &trans);
trans = createStateTransform(TASK_STATUS__STREAM_SCAN_HISTORY, TASK_STATUS__READY, TASK_EVENT_SCANHIST_DONE, NULL, NULL, NULL, true);
trans = createStateTransform(TASK_STATUS__STREAM_SCAN_HISTORY, TASK_STATUS__READY, TASK_EVENT_SCANHIST_DONE, NULL,
NULL, NULL, true);
taosArrayPush(streamTaskSMTrans, &trans); taosArrayPush(streamTaskSMTrans, &trans);
// halt stream task, from other task status // halt stream task, from other task status
trans = createStateTransform(TASK_STATUS__READY, TASK_STATUS__HALT, TASK_EVENT_HALT, NULL, trans = createStateTransform(TASK_STATUS__READY, TASK_STATUS__HALT, TASK_EVENT_HALT, NULL, streamTaskKeepCurrentVerInWal, NULL, true);
streamTaskKeepCurrentVerInWal, NULL, true);
taosArrayPush(streamTaskSMTrans, &trans); taosArrayPush(streamTaskSMTrans, &trans);
trans = createStateTransform(TASK_STATUS__HALT, TASK_STATUS__HALT, TASK_EVENT_HALT, NULL, streamTaskKeepCurrentVerInWal, NULL, true);
trans = createStateTransform(TASK_STATUS__HALT, TASK_STATUS__HALT, TASK_EVENT_HALT, NULL,
streamTaskKeepCurrentVerInWal, NULL, true);
taosArrayPush(streamTaskSMTrans, &trans); taosArrayPush(streamTaskSMTrans, &trans);
SAttachedEventInfo info = {.status = TASK_STATUS__READY, .event = TASK_EVENT_HALT}; SAttachedEventInfo info = {.status = TASK_STATUS__READY, .event = TASK_EVENT_HALT};
trans = createStateTransform(TASK_STATUS__STREAM_SCAN_HISTORY, TASK_STATUS__HALT, TASK_EVENT_HALT, NULL,
streamTaskKeepCurrentVerInWal, &info, true);
taosArrayPush(streamTaskSMTrans, &trans);
trans = createStateTransform(TASK_STATUS__CK, TASK_STATUS__HALT, TASK_EVENT_HALT, NULL, streamTaskKeepCurrentVerInWal, trans = createStateTransform(TASK_STATUS__STREAM_SCAN_HISTORY, TASK_STATUS__HALT, TASK_EVENT_HALT, NULL, streamTaskKeepCurrentVerInWal, &info, true);
&info, true);
taosArrayPush(streamTaskSMTrans, &trans); taosArrayPush(streamTaskSMTrans, &trans);
trans = createStateTransform(TASK_STATUS__CK, TASK_STATUS__HALT, TASK_EVENT_HALT, NULL, streamTaskKeepCurrentVerInWal, &info, true);
trans = createStateTransform(TASK_STATUS__PAUSE, TASK_STATUS__HALT, TASK_EVENT_HALT, NULL, taosArrayPush(streamTaskSMTrans, &trans);
streamTaskKeepCurrentVerInWal, NULL, true); trans = createStateTransform(TASK_STATUS__PAUSE, TASK_STATUS__HALT, TASK_EVENT_HALT, NULL, streamTaskKeepCurrentVerInWal, NULL, true);
taosArrayPush(streamTaskSMTrans, &trans); taosArrayPush(streamTaskSMTrans, &trans);
// checkpoint related event // checkpoint related event
trans = createStateTransform(TASK_STATUS__READY, TASK_STATUS__CK, TASK_EVENT_GEN_CHECKPOINT, NULL, trans = createStateTransform(TASK_STATUS__READY, TASK_STATUS__CK, TASK_EVENT_GEN_CHECKPOINT, NULL, streamTaskDoCheckpoint, NULL, true);
streamTaskDoCheckpoint, NULL, true);
taosArrayPush(streamTaskSMTrans, &trans); taosArrayPush(streamTaskSMTrans, &trans);
trans = createStateTransform(TASK_STATUS__CK, TASK_STATUS__READY, TASK_EVENT_CHECKPOINT_DONE, NULL, NULL, NULL, true);
trans =
createStateTransform(TASK_STATUS__CK, TASK_STATUS__READY, TASK_EVENT_CHECKPOINT_DONE, NULL, NULL, NULL, true);
taosArrayPush(streamTaskSMTrans, &trans); taosArrayPush(streamTaskSMTrans, &trans);
// pause & resume related event handle // pause & resume related event handle
@ -572,3 +556,4 @@ void doInitStateTransferTable(void) {
trans = createStateTransform(TASK_STATUS__STREAM_SCAN_HISTORY, TASK_STATUS__DROPPING, TASK_EVENT_DROPPING, NULL, NULL, NULL, true); trans = createStateTransform(TASK_STATUS__STREAM_SCAN_HISTORY, TASK_STATUS__DROPPING, TASK_EVENT_DROPPING, NULL, NULL, NULL, true);
taosArrayPush(streamTaskSMTrans, &trans); taosArrayPush(streamTaskSMTrans, &trans);
} }
//clang-format on

View File

@ -103,7 +103,7 @@ int32_t syncWriteCfgFile(SSyncNode *pNode) {
if (buffer == NULL) goto _OVER; if (buffer == NULL) goto _OVER;
terrno = 0; terrno = 0;
pFile = taosOpenFile(file, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_TRUNC); pFile = taosOpenFile(file, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_TRUNC | TD_FILE_WRITE_THROUGH);
if (pFile == NULL) goto _OVER; if (pFile == NULL) goto _OVER;
int32_t len = strlen(buffer); int32_t len = strlen(buffer);

View File

@ -128,7 +128,7 @@ int32_t raftStoreWriteFile(SSyncNode *pNode) {
if (buffer == NULL) goto _OVER; if (buffer == NULL) goto _OVER;
terrno = 0; terrno = 0;
pFile = taosOpenFile(file, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_TRUNC); pFile = taosOpenFile(file, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_TRUNC | TD_FILE_WRITE_THROUGH);
if (pFile == NULL) goto _OVER; if (pFile == NULL) goto _OVER;
int32_t len = strlen(buffer); int32_t len = strlen(buffer);

View File

@ -112,7 +112,7 @@ int32_t tfsAllocDiskOnTier(STfsTier *pTier) {
int32_t retId = -1; int32_t retId = -1;
int64_t avail = 0; int64_t avail = 0;
for (int32_t id = 0; id < TFS_MAX_DISKS_PER_TIER; ++id) { for (int32_t id = 0; id < TFS_MAX_DISKS_PER_TIER; ++id) {
#if 0 // round-robin #if 1 // round-robin
int32_t diskId = (pTier->nextid + id) % pTier->ndisk; int32_t diskId = (pTier->nextid + id) % pTier->ndisk;
STfsDisk *pDisk = pTier->disks[diskId]; STfsDisk *pDisk = pTier->disks[diskId];

View File

@ -873,7 +873,7 @@ int walSaveMeta(SWal* pWal) {
return -1; return -1;
} }
TdFilePtr pMetaFile = taosOpenFile(tmpFnameStr, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_TRUNC); TdFilePtr pMetaFile = taosOpenFile(tmpFnameStr, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_TRUNC | TD_FILE_WRITE_THROUGH);
if (pMetaFile == NULL) { if (pMetaFile == NULL) {
wError("vgId:%d, failed to open file due to %s. file:%s", pWal->cfg.vgId, strerror(errno), tmpFnameStr); wError("vgId:%d, failed to open file due to %s. file:%s", pWal->cfg.vgId, strerror(errno), tmpFnameStr);
terrno = TAOS_SYSTEM_ERROR(errno); terrno = TAOS_SYSTEM_ERROR(errno);

View File

@ -133,6 +133,7 @@ int32_t walRollback(SWal *pWal, int64_t ver) {
} }
walBuildIdxName(pWal, walGetCurFileFirstVer(pWal), fnameStr); walBuildIdxName(pWal, walGetCurFileFirstVer(pWal), fnameStr);
taosCloseFile(&pWal->pIdxFile);
TdFilePtr pIdxFile = taosOpenFile(fnameStr, TD_FILE_WRITE | TD_FILE_READ | TD_FILE_APPEND); TdFilePtr pIdxFile = taosOpenFile(fnameStr, TD_FILE_WRITE | TD_FILE_READ | TD_FILE_APPEND);
if (pIdxFile == NULL) { if (pIdxFile == NULL) {
@ -153,6 +154,7 @@ int32_t walRollback(SWal *pWal, int64_t ver) {
} }
walBuildLogName(pWal, walGetCurFileFirstVer(pWal), fnameStr); walBuildLogName(pWal, walGetCurFileFirstVer(pWal), fnameStr);
taosCloseFile(&pWal->pLogFile);
TdFilePtr pLogFile = taosOpenFile(fnameStr, TD_FILE_WRITE | TD_FILE_READ | TD_FILE_APPEND); TdFilePtr pLogFile = taosOpenFile(fnameStr, TD_FILE_WRITE | TD_FILE_READ | TD_FILE_APPEND);
wDebug("vgId:%d, wal truncate file %s", pWal->cfg.vgId, fnameStr); wDebug("vgId:%d, wal truncate file %s", pWal->cfg.vgId, fnameStr);
if (pLogFile == NULL) { if (pLogFile == NULL) {
@ -204,6 +206,7 @@ int32_t walRollback(SWal *pWal, int64_t ver) {
pWal->vers.lastVer = ver - 1; pWal->vers.lastVer = ver - 1;
((SWalFileInfo *)taosArrayGetLast(pWal->fileInfoSet))->lastVer = ver - 1; ((SWalFileInfo *)taosArrayGetLast(pWal->fileInfoSet))->lastVer = ver - 1;
((SWalFileInfo *)taosArrayGetLast(pWal->fileInfoSet))->fileSize = entry.offset; ((SWalFileInfo *)taosArrayGetLast(pWal->fileInfoSet))->fileSize = entry.offset;
taosCloseFile(&pIdxFile); taosCloseFile(&pIdxFile);
taosCloseFile(&pLogFile); taosCloseFile(&pLogFile);
@ -605,7 +608,7 @@ int32_t walWriteWithSyncInfo(SWal *pWal, int64_t index, tmsg_t msgType, SWalSync
return -1; return -1;
} }
if (pWal->pIdxFile == NULL || pWal->pIdxFile == NULL || pWal->writeCur < 0) { if (pWal->pIdxFile == NULL || pWal->pLogFile == NULL || pWal->writeCur < 0) {
if (walInitWriteFile(pWal) < 0) { if (walInitWriteFile(pWal) < 0) {
taosThreadMutexUnlock(&pWal->mutex); taosThreadMutexUnlock(&pWal->mutex);
return -1; return -1;

View File

@ -21,11 +21,12 @@
#include <io.h> #include <io.h>
#include <WinBase.h> #include <WinBase.h>
#include <ktmw32.h> #include <ktmw32.h>
#include <windows.h>
#define F_OK 0 #define F_OK 0
#define W_OK 2 #define W_OK 2
#define R_OK 4 #define R_OK 4
#define _SEND_FILE_STEP_ 1000 #define _SEND_FILE_STEP_ 1024
#else #else
#include <fcntl.h> #include <fcntl.h>
@ -44,12 +45,22 @@
typedef int32_t FileFd; typedef int32_t FileFd;
#ifdef WINDOWS
typedef struct TdFile {
TdThreadRwlock rwlock;
int refId;
HANDLE hFile;
FILE* fp;
int32_t tdFileOptions;
} TdFile;
#else
typedef struct TdFile { typedef struct TdFile {
TdThreadRwlock rwlock; TdThreadRwlock rwlock;
int refId; int refId;
FileFd fd; FileFd fd;
FILE *fp; FILE *fp;
} TdFile; } TdFile;
#endif // WINDOWS
#define FILE_WITH_LOCK 1 #define FILE_WITH_LOCK 1
@ -240,15 +251,12 @@ int32_t taosStatFile(const char *path, int64_t *size, int32_t *mtime, int32_t *a
return 0; return 0;
} }
int32_t taosDevInoFile(TdFilePtr pFile, int64_t *stDev, int64_t *stIno) { int32_t taosDevInoFile(TdFilePtr pFile, int64_t *stDev, int64_t *stIno) {
if (pFile == NULL || pFile->fd < 0) { #ifdef WINDOWS
if (pFile == NULL || pFile->hFile == NULL) {
return -1; return -1;
} }
#ifdef WINDOWS
BY_HANDLE_FILE_INFORMATION bhfi; BY_HANDLE_FILE_INFORMATION bhfi;
HANDLE handle = (HANDLE)_get_osfhandle(pFile->fd); if (GetFileInformationByHandle(pFile->hFile, &bhfi) == FALSE) {
if (GetFileInformationByHandle(handle, &bhfi) == FALSE) {
printf("taosFStatFile get file info fail."); printf("taosFStatFile get file info fail.");
return -1; return -1;
} }
@ -262,7 +270,9 @@ int32_t taosDevInoFile(TdFilePtr pFile, int64_t *stDev, int64_t *stIno) {
} }
#else #else
if (pFile == NULL || pFile->fd < 0) {
return -1;
}
struct stat fileStat; struct stat fileStat;
int32_t code = fstat(pFile->fd, &fileStat); int32_t code = fstat(pFile->fd, &fileStat);
if (code < 0) { if (code < 0) {
@ -282,10 +292,7 @@ int32_t taosDevInoFile(TdFilePtr pFile, int64_t *stDev, int64_t *stIno) {
return 0; return 0;
} }
TdFilePtr taosOpenFile(const char *path, int32_t tdFileOptions) { FILE *taosOpenFileForStream(const char *path, int32_t tdFileOptions) {
int fd = -1;
FILE *fp = NULL;
if (tdFileOptions & TD_FILE_STREAM) {
char *mode = NULL; char *mode = NULL;
if (tdFileOptions & TD_FILE_APPEND) { if (tdFileOptions & TD_FILE_APPEND) {
mode = (tdFileOptions & TD_FILE_TEXT) ? "at+" : "ab+"; mode = (tdFileOptions & TD_FILE_TEXT) ? "at+" : "ab+";
@ -300,11 +307,333 @@ TdFilePtr taosOpenFile(const char *path, int32_t tdFileOptions) {
if (tdFileOptions & TD_FILE_EXCL) { if (tdFileOptions & TD_FILE_EXCL) {
return NULL; return NULL;
} }
fp = fopen(path, mode); return fopen(path, mode);
if (fp == NULL) { }
return NULL;
#ifdef WINDOWS
HANDLE taosOpenFileNotStream(const char *path, int32_t tdFileOptions) {
DWORD openMode = 0;
DWORD access = 0;
DWORD fileFlag = FILE_ATTRIBUTE_NORMAL;
DWORD shareMode = FILE_SHARE_READ;
openMode = OPEN_EXISTING;
if (tdFileOptions & TD_FILE_CREATE) {
openMode = OPEN_ALWAYS;
} else if (tdFileOptions & TD_FILE_EXCL) {
openMode = CREATE_NEW;
} else if ((tdFileOptions & TD_FILE_TRUNC)) {
openMode = TRUNCATE_EXISTING;
access |= GENERIC_WRITE;
}
if (tdFileOptions & TD_FILE_APPEND) {
access |= FILE_APPEND_DATA;
}
if (tdFileOptions & TD_FILE_WRITE) {
access |= GENERIC_WRITE;
}
shareMode |= FILE_SHARE_WRITE;
access |= GENERIC_READ;
if (tdFileOptions & TD_FILE_AUTO_DEL) {
fileFlag |= FILE_ATTRIBUTE_TEMPORARY;
}
if (tdFileOptions & TD_FILE_WRITE_THROUGH) {
fileFlag |= FILE_FLAG_WRITE_THROUGH;
}
HANDLE h = CreateFile(path, access, shareMode, NULL, openMode, fileFlag, NULL);
if (h != INVALID_HANDLE_VALUE && (tdFileOptions & TD_FILE_APPEND) && (tdFileOptions & TD_FILE_WRITE)) {
SetFilePointer(h, 0, NULL, FILE_END);
}
if (h == INVALID_HANDLE_VALUE) {
DWORD dwError = GetLastError();
LPVOID lpMsgBuf;
FormatMessage(FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM, NULL, dwError,
0,
(LPTSTR)&lpMsgBuf, 0, NULL);
printf("CreateFile failed with error %d: %s", dwError, (char*)lpMsgBuf);
LocalFree(lpMsgBuf);
}
return h;
}
int64_t taosReadFile(TdFilePtr pFile, void *buf, int64_t count) {
#if FILE_WITH_LOCK
taosThreadRwlockRdlock(&(pFile->rwlock));
#endif
if (pFile->hFile == NULL) {
#if FILE_WITH_LOCK
taosThreadRwlockUnlock(&(pFile->rwlock));
#endif
return -1;
}
DWORD bytesRead;
if (!ReadFile(pFile->hFile, buf, count, &bytesRead, NULL)) {
bytesRead = -1;
}
#if FILE_WITH_LOCK
taosThreadRwlockUnlock(&(pFile->rwlock));
#endif
return bytesRead;
}
int64_t taosWriteFile(TdFilePtr pFile, const void *buf, int64_t count) {
if (pFile == NULL || pFile->hFile == NULL) {
return 0;
}
#if FILE_WITH_LOCK
taosThreadRwlockWrlock(&(pFile->rwlock));
#endif
DWORD bytesWritten;
if (!WriteFile(pFile->hFile, buf, count, &bytesWritten, NULL)) {
bytesWritten = -1;
}
#if FILE_WITH_LOCK
taosThreadRwlockUnlock(&(pFile->rwlock));
#endif
return bytesWritten;
}
int64_t taosPWriteFile(TdFilePtr pFile, const void *buf, int64_t count, int64_t offset) {
if (pFile == NULL) {
return 0;
}
#if FILE_WITH_LOCK
taosThreadRwlockWrlock(&(pFile->rwlock));
#endif
ASSERT(pFile->hFile != NULL); // Please check if you have closed the file.
if (pFile->hFile == NULL) {
#if FILE_WITH_LOCK
taosThreadRwlockUnlock(&(pFile->rwlock));
#endif
return 0;
}
DWORD ret = 0;
OVERLAPPED ol = {0};
ol.OffsetHigh = (uint32_t)((offset & 0xFFFFFFFF00000000LL) >> 0x20);
ol.Offset = (uint32_t)(offset & 0xFFFFFFFFLL);
SetLastError(0);
BOOL result = WriteFile(pFile->hFile, buf, count, &ret, &ol);
if (!result) {
errno = GetLastError();
ret = -1;
}
#if FILE_WITH_LOCK
taosThreadRwlockUnlock(&(pFile->rwlock));
#endif
return ret;
}
int64_t taosLSeekFile(TdFilePtr pFile, int64_t offset, int32_t whence) {
if (pFile == NULL || pFile->hFile == NULL) {
return -1;
}
#if FILE_WITH_LOCK
taosThreadRwlockRdlock(&(pFile->rwlock));
#endif
LARGE_INTEGER liOffset;
liOffset.QuadPart = offset;
if (!SetFilePointerEx(pFile->hFile, liOffset, NULL, whence)) {
return -1;
}
liOffset.QuadPart = 0;
if (!SetFilePointerEx(pFile->hFile, liOffset, &liOffset, FILE_CURRENT)) {
return -1;
}
#if FILE_WITH_LOCK
taosThreadRwlockUnlock(&(pFile->rwlock));
#endif
return liOffset.QuadPart;
}
int32_t taosFStatFile(TdFilePtr pFile, int64_t *size, int32_t *mtime) {
if (pFile == NULL || pFile->hFile == NULL) {
return 0;
}
if (size != NULL) {
LARGE_INTEGER fileSize;
if (!GetFileSizeEx(pFile->hFile, &fileSize)) {
return -1; // Error getting file size
}
*size = fileSize.QuadPart;
}
if (mtime != NULL) {
FILETIME creationTime, lastAccessTime, lastWriteTime;
if (!GetFileTime(pFile->hFile, &creationTime, &lastAccessTime, &lastWriteTime)) {
return -1; // Error getting file time
}
// Convert the FILETIME structure to a time_t value
ULARGE_INTEGER ull;
ull.LowPart = lastWriteTime.dwLowDateTime;
ull.HighPart = lastWriteTime.dwHighDateTime;
*mtime = (int32_t)((ull.QuadPart - 116444736000000000ULL) / 10000000ULL);
}
return 0;
}
int32_t taosLockFile(TdFilePtr pFile) {
if (pFile == NULL || pFile->hFile == NULL) {
return -1;
}
BOOL fSuccess = FALSE;
LARGE_INTEGER fileSize;
OVERLAPPED overlapped = {0};
fSuccess = LockFileEx(pFile->hFile, LOCKFILE_EXCLUSIVE_LOCK | LOCKFILE_FAIL_IMMEDIATELY,
0, // reserved
~0, // number of bytes to lock low
~0, // number of bytes to lock high
&overlapped // overlapped structure
);
if (!fSuccess) {
return GetLastError();
}
return 0;
}
int32_t taosUnLockFile(TdFilePtr pFile) {
if (pFile == NULL || pFile->hFile == NULL) {
return 0;
}
BOOL fSuccess = FALSE;
OVERLAPPED overlapped = {0};
fSuccess = UnlockFileEx(pFile->hFile, 0, ~0, ~0, &overlapped);
if (!fSuccess) {
return GetLastError();
}
return 0;
}
int32_t taosFtruncateFile(TdFilePtr pFile, int64_t l_size) {
if (pFile == NULL) {
return 0;
}
if (pFile->hFile == NULL) {
printf("Ftruncate file error, hFile was null\n");
return -1;
}
LARGE_INTEGER li_0;
li_0.QuadPart = (int64_t)0;
BOOL cur = SetFilePointerEx(pFile->hFile, li_0, NULL, FILE_CURRENT);
if (!cur) {
printf("SetFilePointerEx Error getting current position in file.\n");
return -1;
}
LARGE_INTEGER li_size;
li_size.QuadPart = l_size;
BOOL cur2 = SetFilePointerEx(pFile->hFile, li_size, NULL, FILE_BEGIN);
if (cur2 == 0) {
int error = GetLastError();
printf("SetFilePointerEx GetLastError is: %d\n", error);
switch (error) {
case ERROR_INVALID_HANDLE:
errno = EBADF;
break;
default:
errno = EIO;
break;
}
return -1;
}
if (!SetEndOfFile(pFile->hFile)) {
int error = GetLastError();
printf("SetEndOfFile GetLastError is:%d", error);
switch (error) {
case ERROR_INVALID_HANDLE:
errno = EBADF;
break;
default:
errno = EIO;
break;
}
return -1;
}
return 0;
}
int64_t taosFSendFile(TdFilePtr pFileOut, TdFilePtr pFileIn, int64_t *offset, int64_t size) {
if (pFileOut == NULL || pFileIn == NULL) {
return 0;
}
if (pFileIn->hFile == NULL || pFileOut->hFile == NULL) {
return 0;
}
LARGE_INTEGER fileOffset;
fileOffset.QuadPart = *offset;
if (!SetFilePointerEx(pFileIn->hFile, fileOffset, &fileOffset, FILE_BEGIN)) {
return -1;
}
int64_t writeLen = 0;
uint8_t buffer[_SEND_FILE_STEP_] = {0};
DWORD bytesRead;
DWORD bytesWritten;
for (int64_t len = 0; len < (size - _SEND_FILE_STEP_); len += _SEND_FILE_STEP_) {
if (!ReadFile(pFileIn->hFile, buffer, _SEND_FILE_STEP_, &bytesRead, NULL)) {
return writeLen;
}
if (bytesRead <= 0) {
return writeLen;
} else if (bytesRead < _SEND_FILE_STEP_) {
if (!WriteFile(pFileOut->hFile, buffer, bytesRead, &bytesWritten, NULL)) {
return -1;
} else {
return (int64_t)(writeLen + bytesRead);
} }
} else { } else {
if (!WriteFile(pFileOut->hFile, buffer, _SEND_FILE_STEP_, &bytesWritten, NULL)) {
return -1;
} else {
writeLen += _SEND_FILE_STEP_;
}
}
}
int64_t remain = size - writeLen;
if (remain > 0) {
DWORD bytesRead;
if (!ReadFile(pFileIn->hFile, buffer, (DWORD)remain, &bytesRead, NULL)) {
return -1;
}
if (bytesRead <= 0) {
return writeLen;
} else {
DWORD bytesWritten;
if (!WriteFile(pFileOut->hFile, buffer, bytesRead, &bytesWritten, NULL)) {
return -1;
} else {
writeLen += bytesWritten;
}
}
}
return writeLen;
}
#else
int taosOpenFileNotStream(const char *path, int32_t tdFileOptions) {
int access = O_BINARY; int access = O_BINARY;
access |= (tdFileOptions & TD_FILE_CREATE) ? O_CREAT : 0; access |= (tdFileOptions & TD_FILE_CREATE) ? O_CREAT : 0;
if ((tdFileOptions & TD_FILE_WRITE) && (tdFileOptions & TD_FILE_READ)) { if ((tdFileOptions & TD_FILE_WRITE) && (tdFileOptions & TD_FILE_READ)) {
@ -318,80 +647,8 @@ TdFilePtr taosOpenFile(const char *path, int32_t tdFileOptions) {
access |= (tdFileOptions & TD_FILE_APPEND) ? O_APPEND : 0; access |= (tdFileOptions & TD_FILE_APPEND) ? O_APPEND : 0;
access |= (tdFileOptions & TD_FILE_TEXT) ? O_TEXT : 0; access |= (tdFileOptions & TD_FILE_TEXT) ? O_TEXT : 0;
access |= (tdFileOptions & TD_FILE_EXCL) ? O_EXCL : 0; access |= (tdFileOptions & TD_FILE_EXCL) ? O_EXCL : 0;
#ifdef WINDOWS int fd = open(path, access, S_IRWXU | S_IRWXG | S_IRWXO);
int32_t pmode = _S_IREAD | _S_IWRITE; return fd;
if (tdFileOptions & TD_FILE_AUTO_DEL) {
pmode |= _O_TEMPORARY;
}
fd = _open(path, access, pmode);
#else
fd = open(path, access, S_IRWXU | S_IRWXG | S_IRWXO);
#endif
if (fd == -1) {
return NULL;
}
}
TdFilePtr pFile = (TdFilePtr)taosMemoryMalloc(sizeof(TdFile));
if (pFile == NULL) {
if (fd >= 0) close(fd);
if (fp != NULL) fclose(fp);
return NULL;
}
#if FILE_WITH_LOCK
taosThreadRwlockInit(&(pFile->rwlock), NULL);
#endif
pFile->fd = fd;
pFile->fp = fp;
pFile->refId = 0;
if (tdFileOptions & TD_FILE_AUTO_DEL) {
#ifdef WINDOWS
// do nothing, since the property of pmode is set with _O_TEMPORARY; the OS will recycle
// the file handle, as well as the space on disk.
#else
// Remove it instantly, so when the program exits normally/abnormally, the file
// will be automatically remove by OS.
unlink(path);
#endif
}
return pFile;
}
int32_t taosCloseFile(TdFilePtr *ppFile) {
int32_t code = 0;
if (ppFile == NULL || *ppFile == NULL) {
return 0;
}
#if FILE_WITH_LOCK
taosThreadRwlockWrlock(&((*ppFile)->rwlock));
#endif
if ((*ppFile)->fp != NULL) {
fflush((*ppFile)->fp);
fclose((*ppFile)->fp);
(*ppFile)->fp = NULL;
}
if ((*ppFile)->fd >= 0) {
#ifdef WINDOWS
HANDLE h = (HANDLE)_get_osfhandle((*ppFile)->fd);
!FlushFileBuffers(h);
#else
// warning: never fsync silently in base lib
/*fsync((*ppFile)->fd);*/
#endif
code = close((*ppFile)->fd);
(*ppFile)->fd = -1;
}
(*ppFile)->refId = 0;
#if FILE_WITH_LOCK
taosThreadRwlockUnlock(&((*ppFile)->rwlock));
taosThreadRwlockDestroy(&((*ppFile)->rwlock));
#endif
taosMemoryFree(*ppFile);
*ppFile = NULL;
return code;
} }
int64_t taosReadFile(TdFilePtr pFile, void *buf, int64_t count) { int64_t taosReadFile(TdFilePtr pFile, void *buf, int64_t count) {
@ -407,7 +664,7 @@ int64_t taosReadFile(TdFilePtr pFile, void *buf, int64_t count) {
} }
int64_t leftbytes = count; int64_t leftbytes = count;
int64_t readbytes; int64_t readbytes;
char *tbuf = (char *)buf; char * tbuf = (char *)buf;
while (leftbytes > 0) { while (leftbytes > 0) {
#ifdef WINDOWS #ifdef WINDOWS
@ -441,42 +698,6 @@ int64_t taosReadFile(TdFilePtr pFile, void *buf, int64_t count) {
return count; return count;
} }
int64_t taosPReadFile(TdFilePtr pFile, void *buf, int64_t count, int64_t offset) {
if (pFile == NULL) {
return 0;
}
#if FILE_WITH_LOCK
taosThreadRwlockRdlock(&(pFile->rwlock));
#endif
ASSERT(pFile->fd >= 0); // Please check if you have closed the file.
if (pFile->fd < 0) {
#if FILE_WITH_LOCK
taosThreadRwlockUnlock(&(pFile->rwlock));
#endif
return -1;
}
#ifdef WINDOWS
DWORD ret = 0;
OVERLAPPED ol = {0};
ol.OffsetHigh = (uint32_t)((offset & 0xFFFFFFFF00000000LL) >> 0x20);
ol.Offset = (uint32_t)(offset & 0xFFFFFFFFLL);
HANDLE handle = (HANDLE)_get_osfhandle(pFile->fd);
SetLastError(0);
BOOL result = ReadFile(handle, buf, count, &ret, &ol);
if (!result && GetLastError() != ERROR_HANDLE_EOF) {
errno = GetLastError();
ret = -1;
}
#else
int64_t ret = pread(pFile->fd, buf, count, offset);
#endif
#if FILE_WITH_LOCK
taosThreadRwlockUnlock(&(pFile->rwlock));
#endif
return ret;
}
int64_t taosWriteFile(TdFilePtr pFile, const void *buf, int64_t count) { int64_t taosWriteFile(TdFilePtr pFile, const void *buf, int64_t count) {
if (pFile == NULL) { if (pFile == NULL) {
return 0; return 0;
@ -493,7 +714,7 @@ int64_t taosWriteFile(TdFilePtr pFile, const void *buf, int64_t count) {
int64_t nleft = count; int64_t nleft = count;
int64_t nwritten = 0; int64_t nwritten = 0;
char *tbuf = (char *)buf; char * tbuf = (char *)buf;
while (nleft > 0) { while (nleft > 0) {
nwritten = write(pFile->fd, (void *)tbuf, (uint32_t)nleft); nwritten = write(pFile->fd, (void *)tbuf, (uint32_t)nleft);
@ -706,25 +927,6 @@ int32_t taosFtruncateFile(TdFilePtr pFile, int64_t l_size) {
#endif #endif
} }
int32_t taosFsyncFile(TdFilePtr pFile) {
if (pFile == NULL) {
return 0;
}
// this implementation is WRONG
// fflush is not a replacement of fsync
if (pFile->fp != NULL) return fflush(pFile->fp);
if (pFile->fd >= 0) {
#ifdef WINDOWS
HANDLE h = (HANDLE)_get_osfhandle(pFile->fd);
return !FlushFileBuffers(h);
#else
return fsync(pFile->fd);
#endif
}
return 0;
}
int64_t taosFSendFile(TdFilePtr pFileOut, TdFilePtr pFileIn, int64_t *offset, int64_t size) { int64_t taosFSendFile(TdFilePtr pFileOut, TdFilePtr pFileIn, int64_t *offset, int64_t size) {
if (pFileOut == NULL || pFileIn == NULL) { if (pFileOut == NULL || pFileIn == NULL) {
return 0; return 0;
@ -824,6 +1026,167 @@ int64_t taosFSendFile(TdFilePtr pFileOut, TdFilePtr pFileIn, int64_t *offset, in
#endif #endif
} }
#endif // WINDOWS
TdFilePtr taosOpenFile(const char *path, int32_t tdFileOptions) {
FILE *fp = NULL;
#ifdef WINDOWS
HANDLE hFile = NULL;
#else
int fd = -1;
#endif
if (tdFileOptions & TD_FILE_STREAM) {
fp = taosOpenFileForStream(path, tdFileOptions);
if (fp == NULL) return NULL;
} else {
#ifdef WINDOWS
hFile = taosOpenFileNotStream(path, tdFileOptions);
if (hFile == INVALID_HANDLE_VALUE) return NULL;
#else
fd = taosOpenFileNotStream(path, tdFileOptions);
if (fd == -1) return NULL;
#endif
}
TdFilePtr pFile = (TdFilePtr)taosMemoryMalloc(sizeof(TdFile));
if (pFile == NULL) {
#ifdef WINDOWS
if (hFile != NULL) CloseHandle(hFile);
#else
if (fd >= 0) close(fd);
#endif
if (fp != NULL) fclose(fp);
return NULL;
}
#if FILE_WITH_LOCK
taosThreadRwlockInit(&(pFile->rwlock), NULL);
#endif
pFile->fp = fp;
pFile->refId = 0;
#ifdef WINDOWS
pFile->hFile = hFile;
pFile->tdFileOptions = tdFileOptions;
// do nothing, since the property of pmode is set with _O_TEMPORARY; the OS will recycle
// the file handle, as well as the space on disk.
#else
pFile->fd = fd;
// Remove it instantly, so when the program exits normally/abnormally, the file
// will be automatically remove by OS.
if (tdFileOptions & TD_FILE_AUTO_DEL) {
unlink(path);
}
#endif
return pFile;
}
int32_t taosCloseFile(TdFilePtr *ppFile) {
int32_t code = 0;
if (ppFile == NULL || *ppFile == NULL) {
return 0;
}
#if FILE_WITH_LOCK
taosThreadRwlockWrlock(&((*ppFile)->rwlock));
#endif
if ((*ppFile)->fp != NULL) {
fflush((*ppFile)->fp);
fclose((*ppFile)->fp);
(*ppFile)->fp = NULL;
}
#ifdef WINDOWS
if ((*ppFile)->hFile != NULL) {
// FlushFileBuffers((*ppFile)->hFile);
if (!CloseHandle((*ppFile)->hFile)) {
code = -1;
}
(*ppFile)->hFile = NULL;
#else
if ((*ppFile)->fd >= 0) {
// warning: never fsync silently in base lib
/*fsync((*ppFile)->fd);*/
code = close((*ppFile)->fd);
(*ppFile)->fd = -1;
#endif
}
(*ppFile)->refId = 0;
#if FILE_WITH_LOCK
taosThreadRwlockUnlock(&((*ppFile)->rwlock));
taosThreadRwlockDestroy(&((*ppFile)->rwlock));
#endif
taosMemoryFree(*ppFile);
*ppFile = NULL;
return code;
}
int64_t taosPReadFile(TdFilePtr pFile, void *buf, int64_t count, int64_t offset) {
if (pFile == NULL) {
return 0;
}
#ifdef WINDOWS
#if FILE_WITH_LOCK
taosThreadRwlockRdlock(&(pFile->rwlock));
#endif
ASSERT(pFile->hFile != NULL); // Please check if you have closed the file.
if (pFile->hFile == NULL) {
#if FILE_WITH_LOCK
taosThreadRwlockUnlock(&(pFile->rwlock));
#endif
return -1;
}
DWORD ret = 0;
OVERLAPPED ol = {0};
ol.OffsetHigh = (uint32_t)((offset & 0xFFFFFFFF00000000LL) >> 0x20);
ol.Offset = (uint32_t)(offset & 0xFFFFFFFFLL);
SetLastError(0);
BOOL result = ReadFile(pFile->hFile, buf, count, &ret, &ol);
if (!result && GetLastError() != ERROR_HANDLE_EOF) {
errno = GetLastError();
ret = -1;
}
#else
#if FILE_WITH_LOCK
taosThreadRwlockRdlock(&(pFile->rwlock));
#endif
ASSERT(pFile->fd >= 0); // Please check if you have closed the file.
if (pFile->fd < 0) {
#if FILE_WITH_LOCK
taosThreadRwlockUnlock(&(pFile->rwlock));
#endif
return -1;
}
int64_t ret = pread(pFile->fd, buf, count, offset);
#endif
#if FILE_WITH_LOCK
taosThreadRwlockUnlock(&(pFile->rwlock));
#endif
return ret;
}
int32_t taosFsyncFile(TdFilePtr pFile) {
if (pFile == NULL) {
return 0;
}
// this implementation is WRONG
// fflush is not a replacement of fsync
if (pFile->fp != NULL) return fflush(pFile->fp);
#ifdef WINDOWS
if (pFile->hFile != NULL) {
if (pFile->tdFileOptions & TD_FILE_WRITE_THROUGH) {
return 0;
}
return !FlushFileBuffers(pFile->hFile);
#else
if (pFile->fd >= 0) {
return fsync(pFile->fd);
#endif
}
return 0;
}
void taosFprintfFile(TdFilePtr pFile, const char *format, ...) { void taosFprintfFile(TdFilePtr pFile, const char *format, ...) {
if (pFile == NULL || pFile->fp == NULL) { if (pFile == NULL || pFile->fp == NULL) {
return; return;
@ -834,7 +1197,13 @@ void taosFprintfFile(TdFilePtr pFile, const char *format, ...) {
va_end(ap); va_end(ap);
} }
bool taosValidFile(TdFilePtr pFile) { return pFile != NULL && pFile->fd > 0; } bool taosValidFile(TdFilePtr pFile) {
#ifdef WINDOWS
return pFile != NULL && pFile->hFile != NULL;
#else
return pFile != NULL && pFile->fd > 0;
#endif
}
int32_t taosUmaskFile(int32_t maskVal) { int32_t taosUmaskFile(int32_t maskVal) {
#ifdef WINDOWS #ifdef WINDOWS
@ -960,14 +1329,20 @@ int32_t taosCompressFile(char *srcFileName, char *destFileName) {
goto cmp_end; goto cmp_end;
} }
pFile = taosOpenFile(destFileName, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_TRUNC); int access = O_BINARY | O_WRONLY | O_TRUNC | O_CREAT;
if (pFile == NULL) { #ifdef WINDOWS
int32_t pmode = _S_IREAD | _S_IWRITE;
#else
int32_t pmode = S_IRWXU | S_IRWXG | S_IRWXO;
#endif
int fd = open(destFileName, access, pmode);
if (fd < 0) {
ret = -2; ret = -2;
goto cmp_end; goto cmp_end;
} }
// Both gzclose() and fclose() will close the associated fd, so they need to have different fds. // Both gzclose() and fclose() will close the associated fd, so they need to have different fds.
FileFd gzFd = dup(pFile->fd); FileFd gzFd = dup(fd);
if (gzFd < 0) { if (gzFd < 0) {
ret = -4; ret = -4;
goto cmp_end; goto cmp_end;

View File

@ -15,6 +15,7 @@
#include <gtest/gtest.h> #include <gtest/gtest.h>
#include <iostream> #include <iostream>
#include <inttypes.h>
#pragma GCC diagnostic push #pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wwrite-strings" #pragma GCC diagnostic ignored "-Wwrite-strings"
@ -29,6 +30,10 @@
#include "os.h" #include "os.h"
#include "tlog.h" #include "tlog.h"
#ifdef WINDOWS
#include <windows.h>
#endif // WINDOWS
TEST(osTest, osSystem) { TEST(osTest, osSystem) {
const char *flags = "UTL FATAL "; const char *flags = "UTL FATAL ";
ELogLevel level = DEBUG_FATAL; ELogLevel level = DEBUG_FATAL;
@ -68,7 +73,8 @@ void fileOperateOnBusy(void *param) {
char * fname = (char *)param; char * fname = (char *)param;
TdFilePtr pFile = taosOpenFile(fname, TD_FILE_CREATE | TD_FILE_WRITE); TdFilePtr pFile = taosOpenFile(fname, TD_FILE_CREATE | TD_FILE_WRITE);
printf("On busy thread open file\n"); printf("On busy thread open file\n");
ASSERT_NE(pFile, nullptr); if (pFile == NULL) return;
// ASSERT_NE(pFile, nullptr);
int ret = taosLockFile(pFile); int ret = taosLockFile(pFile);
printf("On busy thread lock file ret:%d\n", ret); printf("On busy thread lock file ret:%d\n", ret);
@ -97,6 +103,7 @@ TEST(osTest, osFile) {
TdFilePtr pOutFD = taosCreateFile(fname, TD_FILE_WRITE | TD_FILE_CREATE | TD_FILE_TRUNC); TdFilePtr pOutFD = taosCreateFile(fname, TD_FILE_WRITE | TD_FILE_CREATE | TD_FILE_TRUNC);
ASSERT_NE(pOutFD, nullptr); ASSERT_NE(pOutFD, nullptr);
printf("create file success\n"); printf("create file success\n");
taosCloseFile(&pOutFD);
TdFilePtr pFile = taosOpenFile(fname, TD_FILE_CREATE | TD_FILE_WRITE); TdFilePtr pFile = taosOpenFile(fname, TD_FILE_CREATE | TD_FILE_WRITE);
printf("open file\n"); printf("open file\n");
@ -135,4 +142,178 @@ TEST(osTest, osFile) {
//printf("remove file success"); //printf("remove file success");
} }
#ifndef OSFILE_PERFORMANCE_TEST
#define MAX_WORDS 100
#define MAX_WORD_LENGTH 20
#define MAX_TEST_FILE_SIZE 100000
#define TESTTIMES 1000
char *getRandomWord() {
static char words[][MAX_WORD_LENGTH] = {
"Lorem", "ipsum", "dolor", "sit", "amet", "consectetur", "adipiscing", "elit",
"sed", "do", "eiusmod", "tempor", "incididunt", "ut", "labore", "et", "dolore", "magna",
"aliqua", "Ut", "enim", "ad", "minim", "veniam", "quis", "nostrud", "exercitation", "ullamco",
"Why", "do", "programmers", "prefer", "using", "dark", "mode?", "Because", "light", "attracts",
"bugs", "and", "they", "want", "to", "code", "in", "peace,", "like", "a", "ninja", "in", "the", "shadows."
"aliqua", "Ut", "enim", "ad", "minim", "veniam", "quis", "nostrud", "exercitation", "ullamco",
"laboris", "nisi", "ut", "aliquip", "ex", "ea", "commodo", "consequat", "Duis", "aute", "irure",
"dolor", "in", "reprehenderit", "in", "voluptate", "velit", "esse", "cillum", "dolore", "eu",
"fugiat", "nulla", "pariatur", "Excepteur", "sint", "occaecat", "cupidatat", "non", "proident",
"sunt", "in", "culpa", "qui", "officia", "deserunt", "mollit", "anim", "id", "est", "laborum"
};
return words[taosRand() % MAX_WORDS];
}
int64_t fillBufferWithRandomWords(char *buffer, int64_t maxBufferSize) {
int64_t len = 0;
while (len < maxBufferSize) {
char * word = getRandomWord();
size_t wordLen = strlen(word);
if (len + wordLen + 1 < maxBufferSize) {
strcat(buffer, word);
strcat(buffer, " ");
len += wordLen + 1;
} else {
break;
}
}
return len;
}
int64_t calculateAverage(int64_t arr[], int size) {
int64_t sum = 0;
for (int i = 0; i < size; i++) {
sum += arr[i];
}
return sum / size;
}
int64_t calculateMax(int64_t arr[], int size) {
int64_t max = arr[0];
for (int i = 1; i < size; i++) {
if (arr[i] > max) {
max = arr[i];
}
}
return max;
}
int64_t calculateMin(int64_t arr[], int size) {
int64_t min = arr[0];
for (int i = 1; i < size; i++) {
if (arr[i] < min) {
min = arr[i];
}
}
return min;
}
TEST(osTest, osFilePerformance) {
printf("os file performance testting...\n");
int64_t WriteFileCost;
int64_t ReadFileCost;
int64_t OpenForWriteCloseFileCost;
int64_t OpenForReadCloseFileCost;
char * buffer;
char * writeBuffer = (char *)taosMemoryCalloc(1, MAX_TEST_FILE_SIZE);
char * readBuffer = (char *)taosMemoryCalloc(1, MAX_TEST_FILE_SIZE);
int64_t size = fillBufferWithRandomWords(writeBuffer, MAX_TEST_FILE_SIZE);
char * fname = "./osFilePerformanceTest.txt";
TdFilePtr pOutFD = taosCreateFile(fname, TD_FILE_WRITE | TD_FILE_CREATE | TD_FILE_TRUNC);
ASSERT_NE(pOutFD, nullptr);
taosCloseFile(&pOutFD);
printf("os file performance start write...\n");
int64_t t1 = taosGetTimestampUs();
for (int i = 0; i < TESTTIMES; ++i) {
TdFilePtr pFile = taosOpenFile(fname, TD_FILE_CREATE | TD_FILE_WRITE | TD_FILE_WRITE_THROUGH);
ASSERT_NE(pFile, nullptr);
taosWriteFile(pFile, writeBuffer, size);
taosFsyncFile(pFile);
taosCloseFile(&pFile);
}
int64_t t2 = taosGetTimestampUs();
WriteFileCost = t2 - t1;
printf("os file performance start read...\n");
for (int i = 0; i < TESTTIMES; ++i) {
TdFilePtr pFile = taosOpenFile(fname, TD_FILE_READ);
ASSERT_NE(pFile, nullptr);
taosReadFile(pFile, readBuffer, size);
taosCloseFile(&pFile);
int readLine = strlen(readBuffer);
ASSERT_EQ(size, readLine);
}
int64_t t3 = taosGetTimestampUs();
ReadFileCost = t3 - t2;
printf("os file performance start open1...\n");
for (int i = 0; i < TESTTIMES; ++i) {
TdFilePtr pFile = taosOpenFile(fname, TD_FILE_CREATE | TD_FILE_WRITE);
ASSERT_NE(pFile, nullptr);
taosCloseFile(&pFile);
}
int64_t t4 = taosGetTimestampUs();
OpenForWriteCloseFileCost = t4 - t3;
printf("os file performance start open2...\n");
for (int i = 0; i < TESTTIMES; ++i) {
TdFilePtr pFile = taosOpenFile(fname, TD_FILE_CREATE | TD_FILE_READ);
ASSERT_NE(pFile, nullptr);
taosCloseFile(&pFile);
}
int64_t t5 = taosGetTimestampUs();
OpenForReadCloseFileCost = t5 - t4;
#ifdef WINDOWS
printf("os file performance start window native...\n");
for (int i = 0; i < TESTTIMES; ++i) {
HANDLE hFile = CreateFile(fname, // 文件名
GENERIC_WRITE, // 写权限
FILE_SHARE_READ, // 不共享
NULL, // 默认安全描述符
OPEN_ALWAYS, // 打开已存在的文件
FILE_FLAG_WRITE_THROUGH, // 文件标志,可以根据实际需求调整
NULL // 模板文件句柄,对于创建新文件不需要
);
if (hFile == INVALID_HANDLE_VALUE) {
printf("Error opening file\n");
break;
}
// 写入数据
DWORD bytesWritten;
if (!WriteFile(hFile, writeBuffer, size, &bytesWritten, NULL)) {
// 处理错误
printf("Error writing to file\n");
CloseHandle(hFile);
break;
}
// 关闭文件
CloseHandle(hFile);
}
int64_t t6 = taosGetTimestampUs();
int64_t nativeWritCost = t6 - t5;
printf("Test Write file using native API %d times, cost: %" PRId64 "us\n", TESTTIMES, nativeWritCost);
#endif // WINDOWS
taosMemoryFree(writeBuffer);
taosMemoryFree(readBuffer);
printf("Test Write file %d times, cost: %" PRId64 "us\n", TESTTIMES, WriteFileCost);
printf("Test Read file %d times, cost: %" PRId64 "us\n", TESTTIMES, ReadFileCost);
printf("Test OpenForWrite & Close file %d times, cost: %" PRId64 "us\n", TESTTIMES, OpenForWriteCloseFileCost);
printf("Test OpenForRead & Close file %d times, cost: %" PRId64 "us\n", TESTTIMES, OpenForReadCloseFileCost);
}
#endif OSFILE_PERFORMANCE_TEST
#pragma GCC diagnostic pop #pragma GCC diagnostic pop

View File

@ -110,6 +110,7 @@ int32_t metaDebugFlag = 131;
int32_t udfDebugFlag = 131; int32_t udfDebugFlag = 131;
int32_t smaDebugFlag = 131; int32_t smaDebugFlag = 131;
int32_t idxDebugFlag = 131; int32_t idxDebugFlag = 131;
int32_t sndDebugFlag = 131;
int64_t dbgEmptyW = 0; int64_t dbgEmptyW = 0;
int64_t dbgWN = 0; int64_t dbgWN = 0;
@ -153,7 +154,12 @@ int32_t taosInitSlowLog() {
#endif #endif
if (strlen(tsLogDir) != 0) { if (strlen(tsLogDir) != 0) {
char lastC = tsLogDir[strlen(tsLogDir) - 1];
if (lastC == '\\' || lastC == '/') {
snprintf(fullName, PATH_MAX, "%s" "%s", tsLogDir, logFileName);
} else {
snprintf(fullName, PATH_MAX, "%s" TD_DIRSEP "%s", tsLogDir, logFileName); snprintf(fullName, PATH_MAX, "%s" TD_DIRSEP "%s", tsLogDir, logFileName);
}
} else { } else {
snprintf(fullName, PATH_MAX, "%s", logFileName); snprintf(fullName, PATH_MAX, "%s", logFileName);
} }
@ -177,7 +183,12 @@ int32_t taosInitLog(const char *logName, int32_t maxFiles) {
char fullName[PATH_MAX] = {0}; char fullName[PATH_MAX] = {0};
if (strlen(tsLogDir) != 0) { if (strlen(tsLogDir) != 0) {
char lastC = tsLogDir[strlen(tsLogDir) - 1];
if (lastC == '\\' || lastC == '/') {
snprintf(fullName, PATH_MAX, "%s" "%s", tsLogDir, logName);
} else {
snprintf(fullName, PATH_MAX, "%s" TD_DIRSEP "%s", tsLogDir, logName); snprintf(fullName, PATH_MAX, "%s" TD_DIRSEP "%s", tsLogDir, logName);
}
} else { } else {
snprintf(fullName, PATH_MAX, "%s", logName); snprintf(fullName, PATH_MAX, "%s", logName);
} }

View File

@ -20,6 +20,9 @@
,,y,system-test,./pytest.sh python3 ./test.py -f 8-stream/window_close_session_ext.py ,,y,system-test,./pytest.sh python3 ./test.py -f 8-stream/window_close_session_ext.py
,,y,system-test,./pytest.sh python3 ./test.py -f 8-stream/partition_interval.py ,,y,system-test,./pytest.sh python3 ./test.py -f 8-stream/partition_interval.py
,,y,system-test,./pytest.sh python3 ./test.py -f 8-stream/pause_resume_test.py ,,y,system-test,./pytest.sh python3 ./test.py -f 8-stream/pause_resume_test.py
#,,n,system-test,python3 ./test.py -f 8-stream/vnode_restart.py -N 4
#,,n,system-test,python3 ./test.py -f 8-stream/snode_restart.py -N 4
,,n,system-test,python3 ./test.py -f 8-stream/snode_restart_with_checkpoint.py -N 4
,,y,system-test,./pytest.sh python3 ./test.py -f 2-query/tbname_vgroup.py ,,y,system-test,./pytest.sh python3 ./test.py -f 2-query/tbname_vgroup.py
,,y,system-test,./pytest.sh python3 ./test.py -f 2-query/stbJoin.py ,,y,system-test,./pytest.sh python3 ./test.py -f 2-query/stbJoin.py

View File

@ -16,9 +16,8 @@ sql create table ts2 using st tags(2,2,2);
sql create table ts3 using st tags(3,2,2); sql create table ts3 using st tags(3,2,2);
sql create table ts4 using st tags(4,2,2); sql create table ts4 using st tags(4,2,2);
sql create stream streams1 trigger at_once IGNORE EXPIRED 0 IGNORE UPDATE 0 watermark 1d into streamt1 as select _wstart, count(*) c1, sum(a) c3 from st interval(10s); sql create stream streams1 trigger at_once IGNORE EXPIRED 0 IGNORE UPDATE 0 watermark 1d into streamt1 as select _wstart, count(*) c1, sum(a) c3 from st interval(10s);
sleep 1000 sleep 2000
sleep 1000
sql pause stream streams1; sql pause stream streams1;
sql insert into ts1 values(1648791213001,1,12,3,1.0); sql insert into ts1 values(1648791213001,1,12,3,1.0);

View File

@ -162,7 +162,7 @@ class TDTestCase:
assert('TIMESTAMP' in data_type_list and 'INT' in data_type_list and 'INT UNSIGNED' in data_type_list and 'BIGINT' in data_type_list and 'BIGINT UNSIGNED' in data_type_list and 'FLOAT' in data_type_list and 'DOUBLE' in data_type_list and 'VARCHAR' in data_type_list and 'SMALLINT' in data_type_list and 'SMALLINT UNSIGNED' in data_type_list and 'TINYINT' in data_type_list and 'TINYINT UNSIGNED' in data_type_list and 'BOOL' in data_type_list and 'VARCHAR' in data_type_list and 'NCHAR' in data_type_list and 'GEOMETRY' in data_type_list and 'VARBINARY' in data_type_list) assert('TIMESTAMP' in data_type_list and 'INT' in data_type_list and 'INT UNSIGNED' in data_type_list and 'BIGINT' in data_type_list and 'BIGINT UNSIGNED' in data_type_list and 'FLOAT' in data_type_list and 'DOUBLE' in data_type_list and 'VARCHAR' in data_type_list and 'SMALLINT' in data_type_list and 'SMALLINT UNSIGNED' in data_type_list and 'TINYINT' in data_type_list and 'TINYINT UNSIGNED' in data_type_list and 'BOOL' in data_type_list and 'VARCHAR' in data_type_list and 'NCHAR' in data_type_list and 'GEOMETRY' in data_type_list and 'VARBINARY' in data_type_list)
tdSql.execute("create view v2 as select * from tb where c1 >5 and c7 like '%ab%';") tdSql.execute("create view v2 as select * from tb where c1 >5 and c7 like '%ab%';")
self.check_view_num(2) self.check_view_num(2)
tdSql.error("create view v3 as select * from tb where c1 like '%ab%';", expectErrInfo='Invalid value type') tdSql.error("create view v3 as select * from tb where c1 like '%ab%';", expectErrInfo='Invalid operation')
tdSql.execute("create view v3 as select first(ts), sum(c1) from tb group by c2 having avg(c4) > 0;") tdSql.execute("create view v3 as select first(ts), sum(c1) from tb group by c2 having avg(c4) > 0;")
tdSql.execute("create view v4 as select _wstart,sum(c6) from tb interval(10s);") tdSql.execute("create view v4 as select _wstart,sum(c6) from tb interval(10s);")
tdSql.execute("create view v5 as select * from tb join v2 on tb.ts = v2.ts;") tdSql.execute("create view v5 as select * from tb join v2 on tb.ts = v2.ts;")

View File

@ -55,7 +55,7 @@ class TDTestCase:
tdSql.checkData(0, 2, 0) tdSql.checkData(0, 2, 0)
tdSql.query("show dnode 1 variables like '%debugFlag'") tdSql.query("show dnode 1 variables like '%debugFlag'")
tdSql.checkRows(22) tdSql.checkRows(23)
tdSql.query("show dnode 1 variables like '____debugFlag'") tdSql.query("show dnode 1 variables like '____debugFlag'")
tdSql.checkRows(2) tdSql.checkRows(2)

View File

@ -105,7 +105,6 @@ class TDTestCase:
topicNameList = ['topic1'] topicNameList = ['topic1']
# expectRowsList = [] # expectRowsList = []
tmqCom.initConsumerTable("cdb", self.replicaVar)
tdLog.info("create topics from stb with filter") tdLog.info("create topics from stb with filter")
queryString = "select * from %s.%s"%(paraDict['dbName'], paraDict['stbName']) queryString = "select * from %s.%s"%(paraDict['dbName'], paraDict['stbName'])
@ -133,14 +132,15 @@ class TDTestCase:
tmqCom.getStartConsumeNotifyFromTmqsim() tmqCom.getStartConsumeNotifyFromTmqsim()
tmqCom.getStartCommitNotifyFromTmqsim() tmqCom.getStartCommitNotifyFromTmqsim()
tdSql.query("select * from information_schema.ins_vnodes") tdSql.query("balance vgroup leader")
# tdLog.debug(tdSql.queryResult) # tdSql.query("select * from information_schema.ins_vnodes")
tdDnodes = cluster.dnodes # # tdLog.debug(tdSql.queryResult)
for result in tdSql.queryResult: # tdDnodes = cluster.dnodes
if result[2] == 'dbt' and result[3] == 'leader': # for result in tdSql.queryResult:
tdLog.debug("leader is %d"%(result[0] - 1)) # if result[2] == 'dbt' and result[3] == 'leader':
tdDnodes[result[0] - 1].stoptaosd() # tdLog.debug("leader is %d"%(result[0] - 1))
break # tdDnodes[result[0] - 1].stoptaosd()
# break
pInsertThread.join() pInsertThread.join()
expectRows = 1 expectRows = 1
@ -159,7 +159,6 @@ class TDTestCase:
tdLog.printNoPrefix("======== test case 1 end ...... ") tdLog.printNoPrefix("======== test case 1 end ...... ")
def run(self): def run(self):
tdSql.prepare()
self.prepareTestEnv() self.prepareTestEnv()
self.tmqCase1() self.tmqCase1()

View File

@ -0,0 +1,78 @@
import taos
import sys
import time
import socket
import os
import threading
import math
from util.log import *
from util.sql import *
from util.cases import *
from util.dnodes import *
from util.common import *
from util.cluster import *
class TDTestCase:
updatecfgDict = {'checkpointInterval': 1100}
print("===================: ", updatecfgDict)
def init(self, conn, logSql, replicaVar=1):
tdLog.debug(f"start to excute {__file__}")
tdSql.init(conn.cursor(), False)
def case1(self):
tdLog.debug("========case1 start========")
os.system("nohup taosBenchmark -y -B 1 -t 4 -S 1000 -n 1000 -i 1000 -v 2 > /dev/null 2>&1 &")
time.sleep(4)
tdSql.query("use test")
tdSql.query("create snode on dnode 4")
tdSql.query("create stream if not exists s1 trigger at_once ignore expired 0 ignore update 0 fill_history 1 into st1 as select _wstart,sum(voltage),groupid from meters partition by groupid interval(2s)")
tdLog.debug("========create stream useing snode and insert data ok========")
time.sleep(4)
tdDnodes = cluster.dnodes
tdDnodes[3].stoptaosd()
time.sleep(2)
tdDnodes[3].starttaosd()
tdLog.debug("========snode restart ok========")
time.sleep(30)
os.system("kill -9 `pgrep taosBenchmark`")
tdLog.debug("========stop insert ok========")
time.sleep(2)
tdSql.query("select _wstart,sum(voltage),groupid from meters partition by groupid interval(2s) order by groupid,_wstart")
rowCnt = tdSql.getRows()
results = []
for i in range(rowCnt):
results.append(tdSql.getData(i,1))
tdSql.query("select * from st1 order by groupid,_wstart")
tdSql.checkRows(rowCnt)
for i in range(rowCnt):
data1 = tdSql.getData(i,1)
data2 = results[i]
if data1 != data2:
tdLog.info("num: %d, act data: %d, expect data: %d"%(i, data1, data2))
tdLog.exit("check data error!")
# tdLog.debug("========sleep 500s========")
# time.sleep(500)
tdLog.debug("case1 end")
def run(self):
self.case1()
def stop(self):
tdSql.close()
tdLog.success(f"{__file__} successfully executed")
event = threading.Event()
tdCases.addLinux(__file__, TDTestCase())
tdCases.addWindows(__file__, TDTestCase())

View File

@ -0,0 +1,78 @@
import taos
import sys
import time
import socket
import os
import threading
import math
from util.log import *
from util.sql import *
from util.cases import *
from util.dnodes import *
from util.common import *
from util.cluster import *
class TDTestCase:
# updatecfgDict = {'checkpointInterval': 5}
# print("===================: ", updatecfgDict)
def init(self, conn, logSql, replicaVar=1):
tdLog.debug(f"start to excute {__file__}")
tdSql.init(conn.cursor(), False)
def case1(self):
tdLog.debug("========case1 start========")
os.system("nohup taosBenchmark -y -B 1 -t 4 -S 1000 -n 1000 -i 1000 -v 2 > /dev/null 2>&1 &")
time.sleep(4)
tdSql.query("use test")
tdSql.query("create snode on dnode 4")
tdSql.query("create stream if not exists s1 trigger at_once ignore expired 0 ignore update 0 fill_history 1 into st1 as select _wstart,sum(voltage),groupid from meters partition by groupid interval(2s)")
tdLog.debug("========create stream useing snode and insert data ok========")
time.sleep(60)
tdDnodes = cluster.dnodes
tdDnodes[3].stoptaosd()
time.sleep(2)
tdDnodes[3].starttaosd()
tdLog.debug("========snode restart ok========")
time.sleep(30)
os.system("kill -9 `pgrep taosBenchmark`")
tdLog.debug("========stop insert ok========")
time.sleep(2)
tdSql.query("select _wstart,sum(voltage),groupid from meters partition by groupid interval(2s) order by groupid,_wstart")
rowCnt = tdSql.getRows()
results = []
for i in range(rowCnt):
results.append(tdSql.getData(i,1))
tdSql.query("select * from st1 order by groupid,_wstart")
tdSql.checkRows(rowCnt)
for i in range(rowCnt):
data1 = tdSql.getData(i,1)
data2 = results[i]
if data1 != data2:
tdLog.info("num: %d, act data: %d, expect data: %d"%(i, data1, data2))
tdLog.exit("check data error!")
# tdLog.debug("========sleep 500s========")
# time.sleep(500)
tdLog.debug("case1 end")
def run(self):
self.case1()
def stop(self):
tdSql.close()
tdLog.success(f"{__file__} successfully executed")
event = threading.Event()
tdCases.addLinux(__file__, TDTestCase())
tdCases.addWindows(__file__, TDTestCase())

View File

@ -0,0 +1,77 @@
import taos
import sys
import time
import socket
import os
import threading
import math
from util.log import *
from util.sql import *
from util.cases import *
from util.dnodes import *
from util.common import *
from util.cluster import *
class TDTestCase:
updatecfgDict = {'checkpointInterval': 1100}
print("===================: ", updatecfgDict)
def init(self, conn, logSql, replicaVar=1):
tdLog.debug(f"start to excute {__file__}")
tdSql.init(conn.cursor(), False)
def case1(self):
tdLog.debug("========case1 start========")
os.system("nohup taosBenchmark -y -B 1 -t 4 -S 1000 -n 1000 -i 1000 -v 2 > /dev/null 2>&1 &")
time.sleep(4)
tdSql.query("use test")
tdSql.query("create stream if not exists s1 trigger at_once ignore expired 0 ignore update 0 fill_history 1 into st1 as select _wstart,sum(voltage),groupid from meters partition by groupid interval(2s)")
tdLog.debug("========create stream useing snode and insert data ok========")
time.sleep(4)
tdDnodes = cluster.dnodes
tdDnodes[2].stoptaosd()
time.sleep(2)
tdDnodes[2].starttaosd()
tdLog.debug("========vnode restart ok========")
time.sleep(30)
os.system("kill -9 `pgrep taosBenchmark`")
tdLog.debug("========stop insert ok========")
time.sleep(2)
tdSql.query("select _wstart,sum(voltage),groupid from meters partition by groupid interval(2s) order by groupid,_wstart")
rowCnt = tdSql.getRows()
results = []
for i in range(rowCnt):
results.append(tdSql.getData(i,1))
tdSql.query("select * from st1 order by groupid,_wstart")
tdSql.checkRows(rowCnt)
for i in range(rowCnt):
data1 = tdSql.getData(i,1)
data2 = results[i]
if data1 != data2:
tdLog.info("num: %d, act data: %d, expect data: %d"%(i, data1, data2))
tdLog.exit("check data error!")
# tdLog.debug("========sleep 500s========")
# time.sleep(500)
tdLog.debug("case1 end")
def run(self):
self.case1()
def stop(self):
tdSql.close()
tdLog.success(f"{__file__} successfully executed")
event = threading.Event()
tdCases.addLinux(__file__, TDTestCase())
tdCases.addWindows(__file__, TDTestCase())

View File

@ -582,7 +582,7 @@ if __name__ == "__main__":
tdDnodes.setAsan(asan) tdDnodes.setAsan(asan)
tdDnodes.stopAll() tdDnodes.stopAll()
for dnode in tdDnodes.dnodes: for dnode in tdDnodes.dnodes:
tdDnodes.deploy(dnode.index,{}) tdDnodes.deploy(dnode.index,updateCfgDict)
for dnode in tdDnodes.dnodes: for dnode in tdDnodes.dnodes:
tdDnodes.starttaosd(dnode.index) tdDnodes.starttaosd(dnode.index)
tdCases.logSql(logSql) tdCases.logSql(logSql)

View File

@ -30,7 +30,7 @@ typedef struct {
int meta; int meta;
int srcVgroups; int srcVgroups;
int dstVgroups; int dstVgroups;
char dir[64]; char dir[256];
} Config; } Config;
Config g_conf = {0}; Config g_conf = {0};
@ -409,6 +409,30 @@ int buildStable(TAOS* pConn, TAOS_RES* pRes) {
} }
taos_free_result(pRes); taos_free_result(pRes);
#ifdef WINDOWS
pRes = taos_query(pConn,
"CREATE STABLE `meters_summary` (`_wstart` TIMESTAMP, `current` FLOAT, `groupid` INT, `location` VARCHAR(16)) TAGS (`group_id` BIGINT UNSIGNED)");
if (taos_errno(pRes) != 0) {
printf("failed to create super table meters_summary, reason:%s\n", taos_errstr(pRes));
return -1;
}
taos_free_result(pRes);
pRes = taos_query(pConn,
" CREATE TABLE `t_d2a450ee819dcf7576f0282d9ac22dbc` USING `meters_summary` (`group_id`) TAGS (13135550082773579308)");
if (taos_errno(pRes) != 0) {
printf("failed to create super table meters_summary, reason:%s\n", taos_errstr(pRes));
return -1;
}
taos_free_result(pRes);
pRes = taos_query(pConn, "insert into t_d2a450ee819dcf7576f0282d9ac22dbc values (now, 120, 1, 'San Francisco')");
if (taos_errno(pRes) != 0) {
printf("failed to insert into table d0, reason:%s\n", taos_errstr(pRes));
return -1;
}
taos_free_result(pRes);
#else
pRes = taos_query(pConn, pRes = taos_query(pConn,
"create stream meters_summary_s trigger at_once IGNORE EXPIRED 0 into meters_summary as select _wstart, max(current) as current, " "create stream meters_summary_s trigger at_once IGNORE EXPIRED 0 into meters_summary as select _wstart, max(current) as current, "
"groupid, location from meters partition by groupid, location interval(10m)"); "groupid, location from meters partition by groupid, location interval(10m)");
@ -417,6 +441,7 @@ int buildStable(TAOS* pConn, TAOS_RES* pRes) {
return -1; return -1;
} }
taos_free_result(pRes); taos_free_result(pRes);
#endif
pRes = taos_query(pConn, "insert into d0 (ts, current) values (now, 120)"); pRes = taos_query(pConn, "insert into d0 (ts, current) values (now, 120)");
if (taos_errno(pRes) != 0) { if (taos_errno(pRes) != 0) {
@ -598,8 +623,8 @@ void basic_consume_loop(tmq_t* tmq, tmq_list_t* topics) {
} }
void initLogFile() { void initLogFile() {
char f1[256] = {0}; char f1[1024] = {0};
char f2[256] = {0}; char f2[1024] = {0};
if (g_conf.snapShot) { if (g_conf.snapShot) {
sprintf(f1, "%s/../log/tmq_taosx_tmp_snapshot.source", g_conf.dir); sprintf(f1, "%s/../log/tmq_taosx_tmp_snapshot.source", g_conf.dir);