diff --git a/cmake/cmake.platform b/cmake/cmake.platform index 30a33fcdb4..18fd17f018 100644 --- a/cmake/cmake.platform +++ b/cmake/cmake.platform @@ -195,6 +195,20 @@ if (TD_LINUX) ELSE() set(TD_DEPS_DIR "x86") ENDIF() +elseif (TD_DARWIN) + IF (TD_ARM_64 OR TD_ARM_32) + set(TD_DEPS_DIR "darwin/arm") + ELSE () + set(TD_DEPS_DIR "darwin/x64") + ENDIF () +elseif (TD_WINDOWS) + IF (TD_WINDOWS_64) + set(TD_DEPS_DIR "win/x64") + ELSEIF (TD_WINDOWS_32) + set(TD_DEPS_DIR "win/i386") + ENDIF () +else () + MESSAGE(FATAL_ERROR "unsupported platform") endif() MESSAGE(STATUS "DEPS_DIR: " ${TD_DEPS_DIR}) diff --git a/cmake/curl_CMakeLists.txt.in b/cmake/curl_CMakeLists.txt.in index d02e9d5bbf..1f2291c519 100644 --- a/cmake/curl_CMakeLists.txt.in +++ b/cmake/curl_CMakeLists.txt.in @@ -1,6 +1,7 @@ # curl ExternalProject_Add(curl URL https://curl.se/download/curl-8.2.1.tar.gz + URL_HASH MD5=b25588a43556068be05e1624e0e74d41 DOWNLOAD_NO_PROGRESS 1 DOWNLOAD_DIR "${TD_CONTRIB_DIR}/deps-download" #GIT_REPOSITORY https://github.com/curl/curl.git diff --git a/deps/arm/dm_static/libdmodule.a b/deps/arm/dm_static/libdmodule.a new file mode 100644 index 0000000000..5f0f0e38b3 Binary files /dev/null and b/deps/arm/dm_static/libdmodule.a differ diff --git a/deps/darwin/arm/dm_static/libdmodule.a b/deps/darwin/arm/dm_static/libdmodule.a new file mode 100644 index 0000000000..b9dc621e1c Binary files /dev/null and b/deps/darwin/arm/dm_static/libdmodule.a differ diff --git a/deps/darwin/x64/dm_static/libdmodule.a b/deps/darwin/x64/dm_static/libdmodule.a new file mode 100644 index 0000000000..a87b333738 Binary files /dev/null and b/deps/darwin/x64/dm_static/libdmodule.a differ diff --git a/deps/win/x64/dm_static/dmodule.lib b/deps/win/x64/dm_static/dmodule.lib new file mode 100644 index 0000000000..52b8cd407d Binary files /dev/null and b/deps/win/x64/dm_static/dmodule.lib differ diff --git a/deps/x86/dm_static/libdmodule.a b/deps/x86/dm_static/libdmodule.a new file mode 100644 index 0000000000..f5548e6988 Binary files /dev/null and b/deps/x86/dm_static/libdmodule.a differ diff --git a/docs/en/12-taos-sql/02-database.md b/docs/en/12-taos-sql/02-database.md index e7f3aa8d1b..255263ecfd 100644 --- a/docs/en/12-taos-sql/02-database.md +++ b/docs/en/12-taos-sql/02-database.md @@ -56,7 +56,7 @@ database_option: { - WAL_FSYNC_PERIOD: specifies the interval (in milliseconds) at which data is written from the WAL to disk. This parameter takes effect only when the WAL parameter is set to 2. The default value is 3000. Enter a value between 0 and 180000. The value 0 indicates that incoming data is immediately written to disk. - MAXROWS: specifies the maximum number of rows recorded in a block. The default value is 4096. - MINROWS: specifies the minimum number of rows recorded in a block. The default value is 100. -- KEEP: specifies the time for which data is retained. Enter a value between 1 and 365000. The default value is 3650. The value of the KEEP parameter must be greater than or equal to the value of the DURATION parameter. TDengine automatically deletes data that is older than the value of the KEEP parameter. You can use m (minutes), h (hours), and d (days) as the unit, for example KEEP 100h or KEEP 10d. If you do not include a unit, d is used by default. TDengine Enterprise supports [Tiered Storage](https://docs.tdengine.com/tdinternal/arch/#tiered-storage) function, thus multiple KEEP values (comma separated and up to 3 values supported, and meet keep 0 <= keep 1 <= keep 2, e.g. KEEP 100h,100d,3650d) are supported; TDengine OSS does not support Tiered Storage function (although multiple keep values are configured, they do not take effect, only the maximum keep value is used as KEEP). +- KEEP: specifies the time for which data is retained. Enter a value between 1 and 365000. The default value is 3650. The value of the KEEP parameter must be greater than or equal to three times of the value of the DURATION parameter. TDengine automatically deletes data that is older than the value of the KEEP parameter. You can use m (minutes), h (hours), and d (days) as the unit, for example KEEP 100h or KEEP 10d. If you do not include a unit, d is used by default. TDengine Enterprise supports [Tiered Storage](https://docs.tdengine.com/tdinternal/arch/#tiered-storage) function, thus multiple KEEP values (comma separated and up to 3 values supported, and meet keep 0 <= keep 1 <= keep 2, e.g. KEEP 100h,100d,3650d) are supported; TDengine OSS does not support Tiered Storage function (although multiple keep values are configured, they do not take effect, only the maximum keep value is used as KEEP). - PAGES: specifies the number of pages in the metadata storage engine cache on each vnode. Enter a value greater than or equal to 64. The default value is 256. The space occupied by metadata storage on each vnode is equal to the product of the values of the PAGESIZE and PAGES parameters. The space occupied by default is 1 MB. - PAGESIZE: specifies the size (in KB) of each page in the metadata storage engine cache on each vnode. The default value is 4. Enter a value between 1 and 16384. - PRECISION: specifies the precision at which a database records timestamps. Enter ms for milliseconds, us for microseconds, or ns for nanoseconds. The default value is ms. diff --git a/docs/zh/12-taos-sql/02-database.md b/docs/zh/12-taos-sql/02-database.md index ac435debea..e9ca5405f4 100644 --- a/docs/zh/12-taos-sql/02-database.md +++ b/docs/zh/12-taos-sql/02-database.md @@ -56,7 +56,7 @@ database_option: { - WAL_FSYNC_PERIOD:当 WAL 参数设置为 2 时,落盘的周期。默认为 3000,单位毫秒。最小为 0,表示每次写入立即落盘;最大为 180000,即三分钟。 - MAXROWS:文件块中记录的最大条数,默认为 4096 条。 - MINROWS:文件块中记录的最小条数,默认为 100 条。 -- KEEP:表示数据文件保存的天数,缺省值为 3650,取值范围 [1, 365000],且必须大于或等于 DURATION 参数值。数据库会自动删除保存时间超过 KEEP 值的数据。KEEP 可以使用加单位的表示形式,如 KEEP 100h、KEEP 10d 等,支持 m(分钟)、h(小时)和 d(天)三个单位。也可以不写单位,如 KEEP 50,此时默认单位为天。企业版支持[多级存储](https://docs.taosdata.com/tdinternal/arch/#%E5%A4%9A%E7%BA%A7%E5%AD%98%E5%82%A8)功能, 因此, 可以设置多个保存时间(多个以英文逗号分隔,最多 3 个,满足 keep 0 <= keep 1 <= keep 2,如 KEEP 100h,100d,3650d); 社区版不支持多级存储功能(即使配置了多个保存时间, 也不会生效, KEEP 会取最大的保存时间)。 +- KEEP:表示数据文件保存的天数,缺省值为 3650,取值范围 [1, 365000],且必须大于或等于3倍的 DURATION 参数值。数据库会自动删除保存时间超过 KEEP 值的数据。KEEP 可以使用加单位的表示形式,如 KEEP 100h、KEEP 10d 等,支持 m(分钟)、h(小时)和 d(天)三个单位。也可以不写单位,如 KEEP 50,此时默认单位为天。企业版支持[多级存储](https://docs.taosdata.com/tdinternal/arch/#%E5%A4%9A%E7%BA%A7%E5%AD%98%E5%82%A8)功能, 因此, 可以设置多个保存时间(多个以英文逗号分隔,最多 3 个,满足 keep 0 <= keep 1 <= keep 2,如 KEEP 100h,100d,3650d); 社区版不支持多级存储功能(即使配置了多个保存时间, 也不会生效, KEEP 会取最大的保存时间)。 - PAGES:一个 VNODE 中元数据存储引擎的缓存页个数,默认为 256,最小 64。一个 VNODE 元数据存储占用 PAGESIZE \* PAGES,默认情况下为 1MB 内存。 - PAGESIZE:一个 VNODE 中元数据存储引擎的页大小,单位为 KB,默认为 4 KB。范围为 1 到 16384,即 1 KB 到 16 MB。 - PRECISION:数据库的时间戳精度。ms 表示毫秒,us 表示微秒,ns 表示纳秒,默认 ms 毫秒。 diff --git a/include/common/tgrant.h b/include/common/tgrant.h index 31d34add24..8ff844abe1 100644 --- a/include/common/tgrant.h +++ b/include/common/tgrant.h @@ -51,6 +51,13 @@ typedef enum { } EGrantType; int32_t grantCheck(EGrantType grant); +#ifdef TD_ENTERPRISE +#ifndef TD_GRANT_OPTIMIZE +int32_t grantAlterActiveCode(const char* old, const char* new, char* out, int8_t type); +#else +int32_t grantAlterActiveCode(int32_t did, const char* old, const char* new, char* out, int8_t type); +#endif +#endif #ifndef GRANTS_CFG #ifdef TD_ENTERPRISE diff --git a/include/common/tmsg.h b/include/common/tmsg.h index 7f0b9de321..18a0d119f8 100644 --- a/include/common/tmsg.h +++ b/include/common/tmsg.h @@ -2048,6 +2048,7 @@ int32_t tDeserializeSRedistributeVgroupReq(void* buf, int32_t bufLen, SRedistrib typedef struct { int32_t useless; + int32_t vgId; } SBalanceVgroupLeaderReq; int32_t tSerializeSBalanceVgroupLeaderReq(void* buf, int32_t bufLen, SBalanceVgroupLeaderReq* pReq); diff --git a/include/libs/executor/storageapi.h b/include/libs/executor/storageapi.h index d21fb8c459..dbcd682dab 100644 --- a/include/libs/executor/storageapi.h +++ b/include/libs/executor/storageapi.h @@ -38,6 +38,9 @@ extern "C" { #define META_READER_NOLOCK 0x1 +#define STREAM_STATE_BUFF_HASH 1 +#define STREAM_STATE_BUFF_SORT 2 + typedef struct SMeta SMeta; typedef TSKEY (*GetTsFun)(void*); @@ -115,6 +118,7 @@ typedef struct SRowBuffPos { void* pKey; bool beFlushed; bool beUsed; + bool needFree; } SRowBuffPos; // tq @@ -333,6 +337,8 @@ typedef struct { void* db; // rocksdb_t* db; void* pCur; int64_t number; + void* pStreamFileState; + int32_t buffIndex; } SStreamStateCur; typedef struct SStateStore { @@ -340,7 +346,8 @@ typedef struct SStateStore { int32_t (*streamStateGetParName)(SStreamState* pState, int64_t groupId, void** pVal); int32_t (*streamStateAddIfNotExist)(SStreamState* pState, const SWinKey* key, void** pVal, int32_t* pVLen); - int32_t (*streamStateReleaseBuf)(SStreamState* pState, const SWinKey* key, void* pVal); + int32_t (*streamStateReleaseBuf)(SStreamState* pState, void* pVal, bool used); + int32_t (*streamStateClearBuff)(SStreamState* pState, void* pVal); void (*streamStateFreeVal)(void* val); int32_t (*streamStatePut)(SStreamState* pState, const SWinKey* key, const void* value, int32_t vLen); @@ -371,7 +378,7 @@ typedef struct SStateStore { int32_t (*streamStateSessionAddIfNotExist)(SStreamState* pState, SSessionKey* key, TSKEY gap, void** pVal, int32_t* pVLen); - int32_t (*streamStateSessionPut)(SStreamState* pState, const SSessionKey* key, const void* value, int32_t vLen); + int32_t (*streamStateSessionPut)(SStreamState* pState, const SSessionKey* key, void* value, int32_t vLen); int32_t (*streamStateSessionGet)(SStreamState* pState, SSessionKey* key, void** pVal, int32_t* pVLen); int32_t (*streamStateSessionDel)(SStreamState* pState, const SSessionKey* key); int32_t (*streamStateSessionClear)(SStreamState* pState); @@ -400,7 +407,7 @@ typedef struct SStateStore { struct SStreamFileState* (*streamFileStateInit)(int64_t memSize, uint32_t keySize, uint32_t rowSize, uint32_t selectRowSize, GetTsFun fp, void* pFile, TSKEY delMark, - const char* id, int64_t ckId); + const char* id, int64_t ckId, int8_t type); void (*streamFileStateDestroy)(struct SStreamFileState* pFileState); void (*streamFileStateClear)(struct SStreamFileState* pFileState); diff --git a/include/libs/monitor/monitor.h b/include/libs/monitor/monitor.h index 708953f45e..91b3a54ea1 100644 --- a/include/libs/monitor/monitor.h +++ b/include/libs/monitor/monitor.h @@ -206,6 +206,11 @@ typedef struct { bool comp; } SMonCfg; +typedef struct { + int8_t state; + tsem_t sem; +} SDmNotifyHandle; + int32_t monInit(const SMonCfg *pCfg); void monCleanup(); void monRecordLog(int64_t ts, ELogLevel level, const char *content); diff --git a/include/libs/stream/streamState.h b/include/libs/stream/streamState.h index 4312da6f2c..b7f100733b 100644 --- a/include/libs/stream/streamState.h +++ b/include/libs/stream/streamState.h @@ -49,26 +49,30 @@ void streamStateSetNumber(SStreamState* pState, int32_t number); int32_t streamStateSaveInfo(SStreamState* pState, void* pKey, int32_t keyLen, void* pVal, int32_t vLen); int32_t streamStateGetInfo(SStreamState* pState, void* pKey, int32_t keyLen, void** pVal, int32_t* pLen); +//session window int32_t streamStateSessionAddIfNotExist(SStreamState* pState, SSessionKey* key, TSKEY gap, void** pVal, int32_t* pVLen); -int32_t streamStateSessionPut(SStreamState* pState, const SSessionKey* key, const void* value, int32_t vLen); +int32_t streamStateSessionPut(SStreamState* pState, const SSessionKey* key, void* value, int32_t vLen); int32_t streamStateSessionGet(SStreamState* pState, SSessionKey* key, void** pVal, int32_t* pVLen); int32_t streamStateSessionDel(SStreamState* pState, const SSessionKey* key); int32_t streamStateSessionClear(SStreamState* pState); int32_t streamStateSessionGetKVByCur(SStreamStateCur* pCur, SSessionKey* pKey, void** pVal, int32_t* pVLen); -int32_t streamStateStateAddIfNotExist(SStreamState* pState, SSessionKey* key, char* pKeyData, int32_t keyDataLen, - state_key_cmpr_fn fn, void** pVal, int32_t* pVLen); int32_t streamStateSessionGetKeyByRange(SStreamState* pState, const SSessionKey* range, SSessionKey* curKey); SStreamStateCur* streamStateSessionSeekKeyNext(SStreamState* pState, const SSessionKey* key); SStreamStateCur* streamStateSessionSeekKeyCurrentPrev(SStreamState* pState, const SSessionKey* key); SStreamStateCur* streamStateSessionSeekKeyCurrentNext(SStreamState* pState, const SSessionKey* key); +//state window +int32_t streamStateStateAddIfNotExist(SStreamState* pState, SSessionKey* key, char* pKeyData, int32_t keyDataLen, + state_key_cmpr_fn fn, void** pVal, int32_t* pVLen); + int32_t streamStateFillPut(SStreamState* pState, const SWinKey* key, const void* value, int32_t vLen); int32_t streamStateFillGet(SStreamState* pState, const SWinKey* key, void** pVal, int32_t* pVLen); int32_t streamStateFillDel(SStreamState* pState, const SWinKey* key); int32_t streamStateAddIfNotExist(SStreamState* pState, const SWinKey* key, void** pVal, int32_t* pVLen); -int32_t streamStateReleaseBuf(SStreamState* pState, const SWinKey* key, void* pVal); +int32_t streamStateReleaseBuf(SStreamState* pState, void* pVal, bool used); +int32_t streamStateClearBuff(SStreamState* pState, void* pVal); void streamStateFreeVal(void* val); SStreamStateCur* streamStateGetAndCheckCur(SStreamState* pState, SWinKey* key); @@ -76,14 +80,11 @@ SStreamStateCur* streamStateSeekKeyNext(SStreamState* pState, const SWinKey* key SStreamStateCur* streamStateFillSeekKeyNext(SStreamState* pState, const SWinKey* key); SStreamStateCur* streamStateFillSeekKeyPrev(SStreamState* pState, const SWinKey* key); void streamStateFreeCur(SStreamStateCur* pCur); +void streamStateResetCur(SStreamStateCur* pCur); int32_t streamStateGetGroupKVByCur(SStreamStateCur* pCur, SWinKey* pKey, const void** pVal, int32_t* pVLen); int32_t streamStateGetKVByCur(SStreamStateCur* pCur, SWinKey* pKey, const void** pVal, int32_t* pVLen); -int32_t streamStateGetFirst(SStreamState* pState, SWinKey* key); -int32_t streamStateSeekFirst(SStreamState* pState, SStreamStateCur* pCur); -int32_t streamStateSeekLast(SStreamState* pState, SStreamStateCur* pCur); - int32_t streamStateCurNext(SStreamState* pState, SStreamStateCur* pCur); int32_t streamStateCurPrev(SStreamState* pState, SStreamStateCur* pCur); @@ -91,6 +92,7 @@ int32_t streamStatePutParName(SStreamState* pState, int64_t groupId, const char* int32_t streamStateGetParName(SStreamState* pState, int64_t groupId, void** pVal); void streamStateReloadInfo(SStreamState* pState, TSKEY ts); +SStreamStateCur* createStreamStateCursor(); /***compare func **/ diff --git a/include/libs/stream/tstreamFileState.h b/include/libs/stream/tstreamFileState.h index 052231fe39..2b567a7370 100644 --- a/include/libs/stream/tstreamFileState.h +++ b/include/libs/stream/tstreamFileState.h @@ -28,20 +28,33 @@ extern "C" { #endif typedef struct SStreamFileState SStreamFileState; -typedef SList SStreamSnapshot; +typedef SList SStreamSnapshot; + +typedef void* (*_state_buff_get_fn)(void* pRowBuff, const void* pKey, size_t keyLen); +typedef int32_t (*_state_buff_put_fn)(void* pRowBuff, const void* pKey, size_t keyLen, const void* data, size_t dataLen); +typedef int32_t (*_state_buff_remove_fn)(void* pRowBuff, const void* pKey, size_t keyLen); +typedef int32_t (*_state_buff_remove_by_pos_fn)(SStreamFileState* pState, SRowBuffPos* pPos); +typedef void (*_state_buff_cleanup_fn)(void* pRowBuff); +typedef void* (*_state_buff_create_statekey_fn)(SRowBuffPos* pPos, int64_t num); + +typedef int32_t (*_state_file_remove_fn)(SStreamFileState* pFileState, const void* pKey); +typedef int32_t (*_state_file_get_fn)(SStreamFileState* pFileState, void* pKey, void* data, int32_t* pDataLen); +typedef int32_t (*_state_file_clear_fn)(SStreamState* pState); SStreamFileState* streamFileStateInit(int64_t memSize, uint32_t keySize, uint32_t rowSize, uint32_t selectRowSize, GetTsFun fp, void* pFile, TSKEY delMark, const char* taskId, - int64_t checkpointId); + int64_t checkpointId, int8_t type); void streamFileStateDestroy(SStreamFileState* pFileState); void streamFileStateClear(SStreamFileState* pFileState); bool needClearDiskBuff(SStreamFileState* pFileState); +void streamFileStateReleaseBuff(SStreamFileState* pFileState, SRowBuffPos* pPos, bool used); +int32_t streamFileStateClearBuff(SStreamFileState* pFileState, SRowBuffPos* pPos); int32_t getRowBuff(SStreamFileState* pFileState, void* pKey, int32_t keyLen, void** pVal, int32_t* pVLen); int32_t deleteRowBuff(SStreamFileState* pFileState, const void* pKey, int32_t keyLen); int32_t getRowBuffByPos(SStreamFileState* pFileState, SRowBuffPos* pPos, void** pVal); -void releaseRowBuffPos(SRowBuffPos* pBuff); bool hasRowBuff(SStreamFileState* pFileState, void* pKey, int32_t keyLen); +void putFreeBuff(SStreamFileState* pFileState, SRowBuffPos* pPos); SStreamSnapshot* getSnapshot(SStreamFileState* pFileState); int32_t flushSnapshot(SStreamFileState* pFileState, SStreamSnapshot* pSnapshot, bool flushState); @@ -52,6 +65,34 @@ int32_t deleteExpiredCheckPoint(SStreamFileState* pFileState, TSKEY mark); int32_t streamFileStateGeSelectRowSize(SStreamFileState* pFileState); void streamFileStateReloadInfo(SStreamFileState* pFileState, TSKEY ts); +void* getRowStateBuff(SStreamFileState* pFileState); +void* getStateFileStore(SStreamFileState* pFileState); +bool isDeteled(SStreamFileState* pFileState, TSKEY ts); +bool isFlushedState(SStreamFileState* pFileState, TSKEY ts, TSKEY gap); +SRowBuffPos* getNewRowPosForWrite(SStreamFileState* pFileState); +int32_t getRowStateRowSize(SStreamFileState* pFileState); + +// session window +int32_t getSessionWinResultBuff(SStreamFileState* pFileState, SSessionKey* pKey, TSKEY gap, void** pVal, int32_t* pVLen); +int32_t putSessionWinResultBuff(SStreamFileState* pFileState, SRowBuffPos* pPos); +int32_t getSessionFlushedBuff(SStreamFileState* pFileState, SSessionKey* pKey, void** pVal, int32_t* pVLen); +int32_t deleteSessionWinStateBuffFn(void* pBuff, const void *key, size_t keyLen); +int32_t deleteSessionWinStateBuffByPosFn(SStreamFileState* pFileState, SRowBuffPos* pPos); + +void sessionWinStateClear(SStreamFileState* pFileState); +void sessionWinStateCleanup(void* pBuff); + +SStreamStateCur* sessionWinStateSeekKeyCurrentPrev(SStreamFileState* pFileState, const SSessionKey* pWinKey); +SStreamStateCur* sessionWinStateSeekKeyCurrentNext(SStreamFileState* pFileState, const SSessionKey* pWinKey); +SStreamStateCur* sessionWinStateSeekKeyNext(SStreamFileState* pFileState, const SSessionKey* pWinKey); +int32_t sessionWinStateGetKVByCur(SStreamStateCur* pCur, SSessionKey* pKey, void** pVal, int32_t* pVLen); +int32_t sessionWinStateMoveToNext(SStreamStateCur* pCur); +int32_t sessionWinStateGetKeyByRange(SStreamFileState* pFileState, const SSessionKey* key, SSessionKey* curKey); + +// state window +int32_t getStateWinResultBuff(SStreamFileState* pFileState, SSessionKey* key, char* pKeyData, int32_t keyDataLen, + state_key_cmpr_fn fn, void** pVal, int32_t* pVLen); + #ifdef __cplusplus } #endif diff --git a/include/os/osSemaphore.h b/include/os/osSemaphore.h index 5fc89d9d24..e26a9d16d1 100644 --- a/include/os/osSemaphore.h +++ b/include/os/osSemaphore.h @@ -33,6 +33,17 @@ int tsem_timewait(tsem_t *sim, int64_t milis); int tsem_post(tsem_t *sem); int tsem_destroy(tsem_t *sem); +#elif defined(_TD_WINDOWS_64) || defined(_TD_WINDOWS_32) +#include + +#define tsem_t HANDLE + +int tsem_init(tsem_t *sem, int pshared, unsigned int value); +int tsem_wait(tsem_t *sem); +int tsem_timewait(tsem_t *sim, int64_t milis); +int tsem_post(tsem_t *sem); +int tsem_destroy(tsem_t *sem); + #else #define tsem_t sem_t diff --git a/include/util/taoserror.h b/include/util/taoserror.h index 39bf2b5681..227e8520e3 100644 --- a/include/util/taoserror.h +++ b/include/util/taoserror.h @@ -541,6 +541,13 @@ int32_t* taosGetErrno(); #define TSDB_CODE_GRANT_CPU_LIMITED TAOS_DEF_ERROR_CODE(0, 0x080B) #define TSDB_CODE_GRANT_STABLE_LIMITED TAOS_DEF_ERROR_CODE(0, 0x080C) #define TSDB_CODE_GRANT_TABLE_LIMITED TAOS_DEF_ERROR_CODE(0, 0x080D) +#define TSDB_CODE_GRANT_PAR_IVLD_ACTIVE TAOS_DEF_ERROR_CODE(0, 0x080E) +#define TSDB_CODE_GRANT_PAR_IVLD_KEY TAOS_DEF_ERROR_CODE(0, 0x080F) +#define TSDB_CODE_GRANT_PAR_DEC_IVLD_KEY TAOS_DEF_ERROR_CODE(0, 0x0810) +#define TSDB_CODE_GRANT_PAR_DEC_IVLD_KLEN TAOS_DEF_ERROR_CODE(0, 0x0811) +#define TSDB_CODE_GRANT_GEN_IVLD_KEY TAOS_DEF_ERROR_CODE(0, 0x0812) +#define TSDB_CODE_GRANT_GEN_APP_LIMIT TAOS_DEF_ERROR_CODE(0, 0x0813) +#define TSDB_CODE_GRANT_GEN_ENC_IVLD_KLEN TAOS_DEF_ERROR_CODE(0, 0x0814) // sync // #define TSDB_CODE_SYN_INVALID_CONFIG TAOS_DEF_ERROR_CODE(0, 0x0900) // 2.x diff --git a/source/common/src/tdataformat.c b/source/common/src/tdataformat.c index f05f7dc3f9..e04ba1515f 100644 --- a/source/common/src/tdataformat.c +++ b/source/common/src/tdataformat.c @@ -1557,11 +1557,13 @@ STSchema *tBuildTSchema(SSchema *aSchema, int32_t numOfCols, int32_t version) { // SColData ======================================== void tColDataDestroy(void *ph) { - SColData *pColData = (SColData *)ph; + if (ph) { + SColData *pColData = (SColData *)ph; - tFree(pColData->pBitMap); - tFree(pColData->aOffset); - tFree(pColData->pData); + tFree(pColData->pBitMap); + tFree(pColData->aOffset); + tFree(pColData->pData); + } } void tColDataInit(SColData *pColData, int16_t cid, int8_t type, int8_t smaOn) { diff --git a/source/common/src/tmsg.c b/source/common/src/tmsg.c index 4b5663b126..0ba9539124 100644 --- a/source/common/src/tmsg.c +++ b/source/common/src/tmsg.c @@ -5240,6 +5240,7 @@ int32_t tSerializeSBalanceVgroupLeaderReq(void *buf, int32_t bufLen, SBalanceVgr if (tStartEncode(&encoder) < 0) return -1; if (tEncodeI32(&encoder, pReq->useless) < 0) return -1; + if (tEncodeI32(&encoder, pReq->vgId) < 0) return -1; tEndEncode(&encoder); int32_t tlen = encoder.pos; @@ -5253,6 +5254,9 @@ int32_t tDeserializeSBalanceVgroupLeaderReq(void *buf, int32_t bufLen, SBalanceV if (tStartDecode(&decoder) < 0) return -1; if (tDecodeI32(&decoder, &pReq->useless) < 0) return -1; + if(!tDecodeIsEnd(&decoder)){ + if (tDecodeI32(&decoder, &pReq->vgId) < 0) return -1; + } tEndDecode(&decoder); tDecoderClear(&decoder); diff --git a/source/dnode/mgmt/mgmt_dnode/CMakeLists.txt b/source/dnode/mgmt/mgmt_dnode/CMakeLists.txt index fdd0830a58..f7920d3d8e 100644 --- a/source/dnode/mgmt/mgmt_dnode/CMakeLists.txt +++ b/source/dnode/mgmt/mgmt_dnode/CMakeLists.txt @@ -3,11 +3,22 @@ add_library(mgmt_dnode STATIC ${MGMT_DNODE}) if (DEFINED GRANT_CFG_INCLUDE_DIR) add_definitions(-DGRANTS_CFG) endif() + +IF (NOT BUILD_DM_MODULE) + MESSAGE(STATUS "NOT BUILD_DM_MODULE") + target_link_directories( + mgmt_dnode + PUBLIC "${TD_SOURCE_DIR}/deps/${TD_DEPS_DIR}/dm_static" + ) +ELSE() + MESSAGE(STATUS "BUILD_DM_MODULE") +ENDIF() + target_include_directories( mgmt_dnode PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/inc" PUBLIC "${GRANT_CFG_INCLUDE_DIR}" ) target_link_libraries( - mgmt_dnode node_util + mgmt_dnode node_util dmodule ) \ No newline at end of file diff --git a/source/dnode/mgmt/mgmt_dnode/src/dmInt.c b/source/dnode/mgmt/mgmt_dnode/src/dmInt.c index fd9f4fccbe..4bd32cac20 100644 --- a/source/dnode/mgmt/mgmt_dnode/src/dmInt.c +++ b/source/dnode/mgmt/mgmt_dnode/src/dmInt.c @@ -15,12 +15,13 @@ #define _DEFAULT_SOURCE #include "dmInt.h" +#include "libs/function/tudf.h" static int32_t dmStartMgmt(SDnodeMgmt *pMgmt) { if (dmStartStatusThread(pMgmt) != 0) { return -1; } -#if defined(TD_ENTERPRISE) && !defined(_TD_DARWIN_64) +#if defined(TD_ENTERPRISE) if (dmStartNotifyThread(pMgmt) != 0) { return -1; } @@ -38,7 +39,9 @@ static void dmStopMgmt(SDnodeMgmt *pMgmt) { pMgmt->pData->stopped = true; dmStopMonitorThread(pMgmt); dmStopStatusThread(pMgmt); +#if defined(TD_ENTERPRISE) dmStopNotifyThread(pMgmt); +#endif dmStopCrashReportThread(pMgmt); } diff --git a/source/dnode/mgmt/mgmt_dnode/src/dmWorker.c b/source/dnode/mgmt/mgmt_dnode/src/dmWorker.c index f567267ff8..18da1d638c 100644 --- a/source/dnode/mgmt/mgmt_dnode/src/dmWorker.c +++ b/source/dnode/mgmt/mgmt_dnode/src/dmWorker.c @@ -53,21 +53,26 @@ static void *dmStatusThreadFp(void *param) { return NULL; } -tsem_t dmNotifySem; -static void *dmNotifyThreadFp(void *param) { +SDmNotifyHandle dmNotifyHdl = {.state = 0}; +static void *dmNotifyThreadFp(void *param) { SDnodeMgmt *pMgmt = param; - int64_t lastTime = taosGetTimestampMs(); setThreadName("dnode-notify"); - if (tsem_init(&dmNotifySem, 0, 0) != 0) { + if (tsem_init(&dmNotifyHdl.sem, 0, 0) != 0) { return NULL; } + bool wait = true; while (1) { if (pMgmt->pData->dropped || pMgmt->pData->stopped) break; - - tsem_wait(&dmNotifySem); + if (wait) tsem_wait(&dmNotifyHdl.sem); + atomic_store_8(&dmNotifyHdl.state, 1); dmSendNotifyReq(pMgmt); + if (1 == atomic_val_compare_exchange_8(&dmNotifyHdl.state, 1, 0)) { + wait = true; + continue; + } + wait = false; } return NULL; @@ -189,11 +194,11 @@ int32_t dmStartNotifyThread(SDnodeMgmt *pMgmt) { void dmStopNotifyThread(SDnodeMgmt *pMgmt) { if (taosCheckPthreadValid(pMgmt->notifyThread)) { - tsem_post(&dmNotifySem); + tsem_post(&dmNotifyHdl.sem); taosThreadJoin(pMgmt->notifyThread, NULL); taosThreadClear(&pMgmt->notifyThread); } - tsem_destroy(&dmNotifySem); + tsem_destroy(&dmNotifyHdl.sem); } int32_t dmStartMonitorThread(SDnodeMgmt *pMgmt) { diff --git a/source/dnode/mgmt/mgmt_qnode/src/qmInt.c b/source/dnode/mgmt/mgmt_qnode/src/qmInt.c index 3b425a0b49..8a042da078 100644 --- a/source/dnode/mgmt/mgmt_qnode/src/qmInt.c +++ b/source/dnode/mgmt/mgmt_qnode/src/qmInt.c @@ -15,6 +15,7 @@ #define _DEFAULT_SOURCE #include "qmInt.h" +#include "libs/function/tudf.h" static int32_t qmRequire(const SMgmtInputOpt *pInput, bool *required) { return dmReadFile(pInput->path, pInput->name, required); diff --git a/source/dnode/mgmt/mgmt_snode/src/smInt.c b/source/dnode/mgmt/mgmt_snode/src/smInt.c index e222349767..47c2993014 100644 --- a/source/dnode/mgmt/mgmt_snode/src/smInt.c +++ b/source/dnode/mgmt/mgmt_snode/src/smInt.c @@ -16,6 +16,7 @@ #define _DEFAULT_SOURCE #include "smInt.h" #include "libs/function/function.h" +#include "libs/function/tudf.h" static int32_t smRequire(const SMgmtInputOpt *pInput, bool *required) { return dmReadFile(pInput->path, pInput->name, required); diff --git a/source/dnode/mgmt/mgmt_vnode/src/vmInt.c b/source/dnode/mgmt/mgmt_vnode/src/vmInt.c index f0ab703b8a..963bfa3197 100644 --- a/source/dnode/mgmt/mgmt_vnode/src/vmInt.c +++ b/source/dnode/mgmt/mgmt_vnode/src/vmInt.c @@ -17,6 +17,7 @@ #include "vmInt.h" #include "tfs.h" #include "vnd.h" +#include "libs/function/tudf.h" int32_t vmAllocPrimaryDisk(SVnodeMgmt *pMgmt, int32_t vgId) { STfs *pTfs = pMgmt->pTfs; diff --git a/source/dnode/mgmt/node_mgmt/inc/dmMgmt.h b/source/dnode/mgmt/node_mgmt/inc/dmMgmt.h index 1c6625ba1a..3cf7a360f9 100644 --- a/source/dnode/mgmt/node_mgmt/inc/dmMgmt.h +++ b/source/dnode/mgmt/node_mgmt/inc/dmMgmt.h @@ -95,6 +95,10 @@ void dmCleanupDnode(SDnode *pDnode); SMgmtWrapper *dmAcquireWrapper(SDnode *pDnode, EDndNodeType nType); int32_t dmMarkWrapper(SMgmtWrapper *pWrapper); void dmReleaseWrapper(SMgmtWrapper *pWrapper); +int32_t dmInitVars(SDnode *pDnode); +void dmClearVars(SDnode *pDnode); +int32_t dmInitModule(SDnode *pDnode); +bool dmRequireNode(SDnode *pDnode, SMgmtWrapper *pWrapper); SMgmtInputOpt dmBuildMgmtInputOpt(SMgmtWrapper *pWrapper); void dmSetStatus(SDnode *pDnode, EDndRunStatus stype); void dmProcessServerStartupStatus(SDnode *pDnode, SRpcMsg *pMsg); diff --git a/source/dnode/mgmt/node_mgmt/src/dmEnv.c b/source/dnode/mgmt/node_mgmt/src/dmEnv.c index 8815647047..d560ba1644 100644 --- a/source/dnode/mgmt/node_mgmt/src/dmEnv.c +++ b/source/dnode/mgmt/node_mgmt/src/dmEnv.c @@ -16,24 +16,7 @@ #define _DEFAULT_SOURCE #include "dmMgmt.h" #include "audit.h" - -#define STR_CASE_CMP(s, d) (0 == strcasecmp((s), (d))) -#define STR_STR_CMP(s, d) (strstr((s), (d))) -#define STR_INT_CMP(s, d, c) (taosStr2Int32(s, 0, 10) c(d)) -#define STR_STR_SIGN ("ia") -#define DM_INIT_MON() \ - do { \ - code = (int32_t)(2147483648 | 298); \ - strncpy(stName, tsVersionName, 64); \ - monCfg.maxLogs = tsMonitorMaxLogs; \ - monCfg.port = tsMonitorPort; \ - monCfg.server = tsMonitorFqdn; \ - monCfg.comp = tsMonitorComp; \ - if (monInit(&monCfg) != 0) { \ - if (terrno != 0) code = terrno; \ - goto _exit; \ - } \ - } while (0) +#include "libs/function/tudf.h" #define DM_INIT_AUDIT() \ do { \ @@ -45,15 +28,7 @@ } \ } while (0) -#define DM_ERR_RTN(c) \ - do { \ - code = (c); \ - goto _exit; \ - } while (0) - static SDnode globalDnode = {0}; -static const char *dmOS[10] = {"Ubuntu", "CentOS Linux", "Red Hat", "Debian GNU", "CoreOS", - "FreeBSD", "openSUSE", "SLES", "Fedora", "macOS"}; SDnode *dmInstance() { return &globalDnode; } @@ -76,30 +51,14 @@ static int32_t dmInitSystem() { static int32_t dmInitMonitor() { int32_t code = 0; SMonCfg monCfg = {0}; - char reName[64] = {0}; - char stName[64] = {0}; - char ver[64] = {0}; - DM_INIT_MON(); - - if (STR_STR_CMP(stName, STR_STR_SIGN)) { - DM_ERR_RTN(0); - } - if (taosGetOsReleaseName(reName, stName, ver, 64) != 0) { - DM_ERR_RTN(code); - } - if (STR_CASE_CMP(stName, dmOS[0])) { - if (STR_INT_CMP(ver, 17, >)) { - DM_ERR_RTN(0); - } - } else if (STR_CASE_CMP(stName, dmOS[1])) { - if (STR_INT_CMP(ver, 6, >)) { - DM_ERR_RTN(0); - } - } else if (STR_STR_CMP(stName, dmOS[2]) || STR_STR_CMP(stName, dmOS[3]) || STR_STR_CMP(stName, dmOS[4]) || - STR_STR_CMP(stName, dmOS[5]) || STR_STR_CMP(stName, dmOS[6]) || STR_STR_CMP(stName, dmOS[7]) || - STR_STR_CMP(stName, dmOS[8]) || STR_STR_CMP(stName, dmOS[9])) { - DM_ERR_RTN(0); + monCfg.maxLogs = tsMonitorMaxLogs; + monCfg.port = tsMonitorPort; + monCfg.server = tsMonitorFqdn; + monCfg.comp = tsMonitorComp; + if (monInit(&monCfg) != 0) { + if (terrno != 0) code = terrno; + goto _exit; } _exit: diff --git a/source/dnode/mgmt/node_mgmt/src/dmMgmt.c b/source/dnode/mgmt/node_mgmt/src/dmMgmt.c index dedd588c53..15697dc448 100644 --- a/source/dnode/mgmt/node_mgmt/src/dmMgmt.c +++ b/source/dnode/mgmt/node_mgmt/src/dmMgmt.c @@ -24,88 +24,6 @@ #include "tcompression.h" #endif -static bool dmRequireNode(SDnode *pDnode, SMgmtWrapper *pWrapper) { - SMgmtInputOpt input = dmBuildMgmtInputOpt(pWrapper); - - bool required = false; - int32_t code = (*pWrapper->func.requiredFp)(&input, &required); - if (!required) { - dDebug("node:%s, does not require startup", pWrapper->name); - } else { - dDebug("node:%s, required to startup", pWrapper->name); - } - - return required; -} - -static int32_t dmInitVars(SDnode *pDnode) { - SDnodeData *pData = &pDnode->data; - pData->dnodeId = 0; - pData->clusterId = 0; - pData->dnodeVer = 0; - pData->updateTime = 0; - pData->rebootTime = taosGetTimestampMs(); - pData->dropped = 0; - pData->stopped = 0; - - pData->dnodeHash = taosHashInit(4, taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), true, HASH_NO_LOCK); - if (pData->dnodeHash == NULL) { - dError("failed to init dnode hash"); - terrno = TSDB_CODE_OUT_OF_MEMORY; - return -1; - } - - if (dmReadEps(pData) != 0) { - dError("failed to read file since %s", terrstr()); - return -1; - } - - if (pData->dropped) { - dError("dnode will not start since its already dropped"); - return -1; - } - - taosThreadRwlockInit(&pData->lock, NULL); - taosThreadMutexInit(&pDnode->mutex, NULL); - return 0; -} - -static void dmClearVars(SDnode *pDnode) { - for (EDndNodeType ntype = DNODE; ntype < NODE_END; ++ntype) { - SMgmtWrapper *pWrapper = &pDnode->wrappers[ntype]; - taosMemoryFreeClear(pWrapper->path); - taosThreadRwlockDestroy(&pWrapper->lock); - } - if (pDnode->lockfile != NULL) { - taosUnLockFile(pDnode->lockfile); - taosCloseFile(&pDnode->lockfile); - pDnode->lockfile = NULL; - } - - SDnodeData *pData = &pDnode->data; - taosThreadRwlockWrlock(&pData->lock); - if (pData->oldDnodeEps != NULL) { - if (dmWriteEps(pData) == 0) { - dmRemoveDnodePairs(pData); - } - taosArrayDestroy(pData->oldDnodeEps); - pData->oldDnodeEps = NULL; - } - if (pData->dnodeEps != NULL) { - taosArrayDestroy(pData->dnodeEps); - pData->dnodeEps = NULL; - } - if (pData->dnodeHash != NULL) { - taosHashCleanup(pData->dnodeHash); - pData->dnodeHash = NULL; - } - taosThreadRwlockUnlock(&pData->lock); - - taosThreadRwlockDestroy(&pData->lock); - taosThreadMutexDestroy(&pDnode->mutex); - memset(&pDnode->mutex, 0, sizeof(pDnode->mutex)); -} - int32_t dmInitDnode(SDnode *pDnode) { dDebug("start to create dnode"); int32_t code = -1; @@ -143,22 +61,12 @@ int32_t dmInitDnode(SDnode *pDnode) { pWrapper->required = dmRequireNode(pDnode, pWrapper); } - if (dmInitMsgHandle(pDnode) != 0) { - dError("failed to init msg handles since %s", terrstr()); - goto _OVER; - } - pDnode->lockfile = dmCheckRunning(tsDataDir); if (pDnode->lockfile == NULL) { goto _OVER; } - if (dmInitServer(pDnode) != 0) { - dError("failed to init transport since %s", terrstr()); - goto _OVER; - } - - if (dmInitClient(pDnode) != 0) { + if(dmInitModule(pDnode) != 0) { goto _OVER; } diff --git a/source/dnode/mgmt/node_util/inc/dmUtil.h b/source/dnode/mgmt/node_util/inc/dmUtil.h index 9d97e6ae9f..0a52c578a5 100644 --- a/source/dnode/mgmt/node_util/inc/dmUtil.h +++ b/source/dnode/mgmt/node_util/inc/dmUtil.h @@ -40,7 +40,6 @@ #include "tfs.h" #include "wal.h" -#include "libs/function/tudf.h" #ifdef __cplusplus extern "C" { #endif @@ -94,6 +93,7 @@ typedef int32_t (*ProcessAlterNodeTypeFp)(EDndNodeType ntype, SRpcMsg *pMsg); typedef struct { int32_t dnodeId; + int32_t engineVer; int64_t clusterId; int64_t dnodeVer; int64_t updateTime; @@ -172,6 +172,9 @@ int32_t dmReadFile(const char *path, const char *name, bool *pDeployed); int32_t dmWriteFile(const char *path, const char *name, bool deployed); TdFilePtr dmCheckRunning(const char *dataDir); +// dmodule.c +int32_t dmInitDndInfo(SDnodeData *pData); + // dmEps.c int32_t dmReadEps(SDnodeData *pData); int32_t dmWriteEps(SDnodeData *pData); diff --git a/source/dnode/mgmt/node_util/src/dmEps.c b/source/dnode/mgmt/node_util/src/dmEps.c index 88f6b5da40..3e948678a4 100644 --- a/source/dnode/mgmt/node_util/src/dmEps.c +++ b/source/dnode/mgmt/node_util/src/dmEps.c @@ -57,6 +57,8 @@ static int32_t dmDecodeEps(SJson *pJson, SDnodeData *pData) { if (code < 0) return -1; tjsonGetNumberValue(pJson, "dnodeVer", pData->dnodeVer, code); if (code < 0) return -1; + tjsonGetNumberValue(pJson, "engineVer", pData->engineVer, code); + if (code < 0) return -1; tjsonGetNumberValue(pJson, "clusterId", pData->clusterId, code); if (code < 0) return -1; tjsonGetInt32ValueFromDouble(pJson, "dropped", pData->dropped, code); @@ -96,7 +98,8 @@ int32_t dmReadEps(SDnodeData *pData) { pData->dnodeEps = taosArrayInit(1, sizeof(SDnodeEp)); if (pData->dnodeEps == NULL) { - dError("failed to calloc dnodeEp array since %s", strerror(errno)); + code = terrno; + dError("failed to calloc dnodeEp array since %s", terrstr()); goto _OVER; } @@ -184,6 +187,7 @@ _OVER: static int32_t dmEncodeEps(SJson *pJson, SDnodeData *pData) { if (tjsonAddDoubleToObject(pJson, "dnodeId", pData->dnodeId) < 0) return -1; if (tjsonAddIntegerToObject(pJson, "dnodeVer", pData->dnodeVer) < 0) return -1; + if (tjsonAddIntegerToObject(pJson, "engineVer", pData->engineVer) < 0) return -1; if (tjsonAddIntegerToObject(pJson, "clusterId", pData->clusterId) < 0) return -1; if (tjsonAddDoubleToObject(pJson, "dropped", pData->dropped) < 0) return -1; @@ -218,8 +222,11 @@ int32_t dmWriteEps(SDnodeData *pData) { snprintf(realfile, sizeof(realfile), "%s%sdnode%sdnode.json", tsDataDir, TD_DIRSEP, TD_DIRSEP); terrno = TSDB_CODE_OUT_OF_MEMORY; + + if((code == dmInitDndInfo(pData)) != 0) goto _OVER; pJson = tjsonCreateObject(); if (pJson == NULL) goto _OVER; + pData->engineVer = tsVersion; if (dmEncodeEps(pJson, pData) != 0) goto _OVER; buffer = tjsonToString(pJson); if (buffer == NULL) goto _OVER; diff --git a/source/dnode/mnode/impl/inc/mndUser.h b/source/dnode/mnode/impl/inc/mndUser.h index fab8ee4707..1aa01fd59d 100644 --- a/source/dnode/mnode/impl/inc/mndUser.h +++ b/source/dnode/mnode/impl/inc/mndUser.h @@ -47,7 +47,7 @@ void mndUserFreeObj(SUserObj *pUser); int64_t mndGetIpWhiteVer(SMnode *pMnode); -void mndUpdateIpWhite(SMnode *pMnode, char *user, char *fqdn, int8_t type, int8_t lock); +void mndUpdateIpWhiteForAllUser(SMnode *pMnode, char *user, char *fqdn, int8_t type, int8_t lock); int32_t mndRefreshUserIpWhiteList(SMnode *pMnode); diff --git a/source/dnode/mnode/impl/src/mndDb.c b/source/dnode/mnode/impl/src/mndDb.c index 1562160a7b..a27de37daf 100644 --- a/source/dnode/mnode/impl/src/mndDb.c +++ b/source/dnode/mnode/impl/src/mndDb.c @@ -294,6 +294,7 @@ static int32_t mndDbActionUpdate(SSdb *pSdb, SDbObj *pOld, SDbObj *pNew) { pOld->cfg.daysToKeep0 = pNew->cfg.daysToKeep0; pOld->cfg.daysToKeep1 = pNew->cfg.daysToKeep1; pOld->cfg.daysToKeep2 = pNew->cfg.daysToKeep2; + pOld->cfg.keepTimeOffset = pNew->cfg.keepTimeOffset; pOld->cfg.walFsyncPeriod = pNew->cfg.walFsyncPeriod; pOld->cfg.walLevel = pNew->cfg.walLevel; pOld->cfg.walRetentionPeriod = pNew->cfg.walRetentionPeriod; diff --git a/source/dnode/mnode/impl/src/mndDnode.c b/source/dnode/mnode/impl/src/mndDnode.c index ca7a639c09..f1a1bb8102 100644 --- a/source/dnode/mnode/impl/src/mndDnode.c +++ b/source/dnode/mnode/impl/src/mndDnode.c @@ -147,7 +147,7 @@ static int32_t mndCreateDefaultDnode(SMnode *pMnode) { if (mndTransPrepare(pMnode, pTrans) != 0) goto _OVER; code = 0; - mndUpdateIpWhite(pMnode, TSDB_DEFAULT_USER, dnodeObj.fqdn, IP_WHITE_ADD, 1); + mndUpdateIpWhiteForAllUser(pMnode, TSDB_DEFAULT_USER, dnodeObj.fqdn, IP_WHITE_ADD, 1); _OVER: mndTransDrop(pTrans); @@ -720,8 +720,8 @@ static int32_t mndProcessNotifyReq(SRpcMsg *pReq) { mndReleaseVgroup(pMnode, pVgroup); } } + mndUpdClusterInfo(pReq); _OVER: - mndUpdClusterInfo(pReq); tFreeSNotifyReq(¬ifyReq); return code; } @@ -752,7 +752,7 @@ static int32_t mndCreateDnode(SMnode *pMnode, SRpcMsg *pReq, SCreateDnodeReq *pC if (mndTransPrepare(pMnode, pTrans) != 0) goto _OVER; code = 0; - mndUpdateIpWhite(pMnode, TSDB_DEFAULT_USER, dnodeObj.fqdn, IP_WHITE_ADD, 1); + mndUpdateIpWhiteForAllUser(pMnode, TSDB_DEFAULT_USER, dnodeObj.fqdn, IP_WHITE_ADD, 1); _OVER: mndTransDrop(pTrans); sdbFreeRaw(pRaw); @@ -763,7 +763,9 @@ static int32_t mndConfigDnode(SMnode *pMnode, SRpcMsg *pReq, SMCfgDnodeReq *pCfg SSdbRaw *pRaw = NULL; STrans *pTrans = NULL; SDnodeObj *pDnode = NULL; + SArray *failRecord = NULL; bool cfgAll = pCfgReq->dnodeId == -1; + int32_t cfgAllErr = 0; int32_t iter = 0; SSdb *pSdb = pMnode->pSdb; @@ -777,28 +779,64 @@ static int32_t mndConfigDnode(SMnode *pMnode, SRpcMsg *pReq, SMCfgDnodeReq *pCfg goto _OVER; } + SDnodeObj tmpDnode = *pDnode; + if (action == DND_ACTIVE_CODE) { +#ifndef TD_GRANT_OPTIMIZE + if (grantAlterActiveCode(pDnode->active, pCfgReq->value, tmpDnode.active, 0) != 0) { +#else + if (grantAlterActiveCode(pDnode->id, pDnode->active, pCfgReq->value, tmpDnode.active, 0) != 0) { +#endif + if (TSDB_CODE_DUP_KEY != terrno) { + mError("dnode:%d, config dnode:%d, app:%p config:%s value:%s failed since %s", pDnode->id, pCfgReq->dnodeId, + pReq->info.ahandle, pCfgReq->config, pCfgReq->value, terrstr()); + if (cfgAll) { // alter all dnodes: + if (!failRecord) failRecord = taosArrayInit(1, sizeof(int32_t)); + if (failRecord) taosArrayPush(failRecord, &pDnode->id); + if (0 == cfgAllErr) cfgAllErr = terrno; // output 1st terrno. + } + } else { + terrno = 0; // no action for dup active code + } + if (cfgAll) continue; + goto _OVER; + } + } else if (action == DND_CONN_ACTIVE_CODE) { +#ifndef TD_GRANT_OPTIMIZE + if (grantAlterActiveCode(pDnode->connActive, pCfgReq->value, tmpDnode.connActive, 1) != 0) { +#else + if (grantAlterActiveCode(pDnode->id, pDnode->connActive, pCfgReq->value, tmpDnode.connActive, 1) != 0) { +#endif + if (TSDB_CODE_DUP_KEY != terrno) { + mError("dnode:%d, config dnode:%d, app:%p config:%s value:%s failed since %s", pDnode->id, pCfgReq->dnodeId, + pReq->info.ahandle, pCfgReq->config, pCfgReq->value, terrstr()); + if (cfgAll) { + if (!failRecord) failRecord = taosArrayInit(1, sizeof(int32_t)); + if (failRecord) taosArrayPush(failRecord, &pDnode->id); + if (0 == cfgAllErr) cfgAllErr = terrno; + } + } else { + terrno = 0; + } + if (cfgAll) continue; + goto _OVER; + } + } else { + terrno = TSDB_CODE_INVALID_CFG; + goto _OVER; + } + if (!pTrans) { pTrans = mndTransCreate(pMnode, TRN_POLICY_RETRY, TRN_CONFLICT_GLOBAL, pReq, "config-dnode"); if (!pTrans) goto _OVER; if (mndTrancCheckConflict(pMnode, pTrans) != 0) goto _OVER; } - SDnodeObj tmpDnode = *pDnode; - if (action == DND_ACTIVE_CODE) { - strncpy(tmpDnode.active, pCfgReq->value, TSDB_ACTIVE_KEY_LEN); - } else if (action == DND_CONN_ACTIVE_CODE) { - strncpy(tmpDnode.connActive, pCfgReq->value, TSDB_CONN_ACTIVE_KEY_LEN); - } else { - terrno = TSDB_CODE_INVALID_CFG; - goto _OVER; - } - pRaw = mndDnodeActionEncode(&tmpDnode); if (pRaw == NULL || mndTransAppendCommitlog(pTrans, pRaw) != 0) goto _OVER; (void)sdbSetRawStatus(pRaw, SDB_STATUS_READY); pRaw = NULL; - mInfo("dnode:%d, config dnode, cfg:%d, app:%p config:%s value:%s", pDnode->id, pCfgReq->dnodeId, pReq->info.ahandle, + mInfo("dnode:%d, config dnode:%d, app:%p config:%s value:%s", pDnode->id, pCfgReq->dnodeId, pReq->info.ahandle, pCfgReq->config, pCfgReq->value); if (cfgAll) { @@ -816,12 +854,19 @@ static int32_t mndConfigDnode(SMnode *pMnode, SRpcMsg *pReq, SMCfgDnodeReq *pCfg _OVER: if (cfgAll) { sdbRelease(pSdb, pDnode); + if (cfgAllErr != 0) terrno = cfgAllErr; + int32_t nFail = taosArrayGetSize(failRecord); + if (nFail > 0) { + mError("config dnode, cfg:%d, app:%p config:%s value:%s. total:%d, fail:%d", pCfgReq->dnodeId, pReq->info.ahandle, + pCfgReq->config, pCfgReq->value, iter, nFail); + } } else { mndReleaseDnode(pMnode, pDnode); } sdbCancelFetch(pSdb, pIter); mndTransDrop(pTrans); sdbFreeRaw(pRaw); + taosArrayDestroy(failRecord); return terrno; } @@ -1045,7 +1090,7 @@ static int32_t mndDropDnode(SMnode *pMnode, SRpcMsg *pReq, SDnodeObj *pDnode, SM if (mndTransPrepare(pMnode, pTrans) != 0) goto _OVER; - mndUpdateIpWhite(pMnode, TSDB_DEFAULT_USER, pDnode->fqdn, IP_WHITE_DROP, 1); + mndUpdateIpWhiteForAllUser(pMnode, TSDB_DEFAULT_USER, pDnode->fqdn, IP_WHITE_DROP, 1); code = 0; _OVER: @@ -1191,7 +1236,7 @@ static int32_t mndProcessConfigDnodeReq(SRpcMsg *pReq) { strcpy(dcfgReq.config, "monitor"); snprintf(dcfgReq.value, TSDB_DNODE_VALUE_LEN, "%d", flag); -} else if (strncasecmp(cfgReq.config, "ttlpushinterval", 14) == 0) { + } else if (strncasecmp(cfgReq.config, "ttlpushinterval", 14) == 0) { int32_t optLen = strlen("ttlpushinterval"); int32_t flag = -1; int32_t code = mndMCfgGetValInt32(&cfgReq, optLen, &flag); @@ -1262,7 +1307,8 @@ static int32_t mndProcessConfigDnodeReq(SRpcMsg *pReq) { snprintf(dcfgReq.value, TSDB_DNODE_VALUE_LEN, "%s", cfgReq.value); if (mndConfigDnode(pMnode, pReq, &cfgReq, opt) != 0) { - mError("dnode:%d, failed to config activeCode since %s", cfgReq.dnodeId, terrstr()); + mError("dnode:%d, failed to config activeCode since %s. conf:%s, val:%s", cfgReq.dnodeId, terrstr(), + cfgReq.config, cfgReq.value); return -1; } return 0; diff --git a/source/dnode/mnode/impl/src/mndGrant.c b/source/dnode/mnode/impl/src/mndGrant.c index c4c5e0355d..9f2ac68da5 100644 --- a/source/dnode/mnode/impl/src/mndGrant.c +++ b/source/dnode/mnode/impl/src/mndGrant.c @@ -131,6 +131,13 @@ void grantAdd(EGrantType grant, uint64_t value) {} void grantRestore(EGrantType grant, uint64_t value) {} int32_t dmProcessGrantReq(void *pInfo, SRpcMsg *pMsg) { return TSDB_CODE_SUCCESS; } int32_t dmProcessGrantNotify(void *pInfo, SRpcMsg *pMsg) { return TSDB_CODE_SUCCESS; } +#ifndef TD_GRANT_OPTIMIZE +int32_t grantAlterActiveCode(const char *old, const char *new, char *out, int8_t type) { return TSDB_CODE_SUCCESS; } +#else +int32_t grantAlterActiveCode(int32_t did, const char *old, const char *new, char *out, int8_t type) { + return TSDB_CODE_SUCCESS; +} +#endif #endif diff --git a/source/dnode/mnode/impl/src/mndTrans.c b/source/dnode/mnode/impl/src/mndTrans.c index 5d150b731c..89eda91e8d 100644 --- a/source/dnode/mnode/impl/src/mndTrans.c +++ b/source/dnode/mnode/impl/src/mndTrans.c @@ -543,6 +543,8 @@ STrans *mndAcquireTrans(SMnode *pMnode, int32_t transId) { STrans *pTrans = sdbAcquire(pMnode->pSdb, SDB_TRANS, &transId); if (pTrans == NULL) { terrno = TSDB_CODE_MND_TRANS_NOT_EXIST; + } else { + taosThreadMutexInit(&pTrans->mutex, NULL); } return pTrans; } diff --git a/source/dnode/mnode/impl/src/mndUser.c b/source/dnode/mnode/impl/src/mndUser.c index 3c4bb42379..f38f825302 100644 --- a/source/dnode/mnode/impl/src/mndUser.c +++ b/source/dnode/mnode/impl/src/mndUser.c @@ -58,7 +58,7 @@ static int32_t mndRetrievePrivileges(SRpcMsg *pReq, SShowObj *pShow, SSDataBloc static void mndCancelGetNextPrivileges(SMnode *pMnode, void *pIter); SHashObj *mndFetchAllIpWhite(SMnode *pMnode); static int32_t mndProcesSRetrieveIpWhiteReq(SRpcMsg *pReq); -bool mndUpdateIpWhiteImpl(SHashObj *pIpWhiteTab, char *user, char *fqdn, int8_t type); +bool mndUpdateIpWhiteImpl(SHashObj *pIpWhiteTab, char *user, char *fqdn, int8_t type); void ipWhiteMgtUpdateAll(SMnode *pMnode); typedef struct { @@ -103,7 +103,8 @@ int32_t ipWhiteMgtUpdate(SMnode *pMnode, char *user, SIpWhiteList *pNew) { for (int i = 0; i < taosArrayGetSize(fqdns); i++) { char *fqdn = taosArrayGetP(fqdns, i); - mndUpdateIpWhiteImpl(ipWhiteMgt.pIpWhiteTab, TSDB_DEFAULT_USER, fqdn, IP_WHITE_ADD); + update |= mndUpdateIpWhiteImpl(ipWhiteMgt.pIpWhiteTab, TSDB_DEFAULT_USER, fqdn, IP_WHITE_ADD); + update |= mndUpdateIpWhiteImpl(ipWhiteMgt.pIpWhiteTab, user, fqdn, IP_WHITE_ADD); } for (int i = 0; i < taosArrayGetSize(fqdns); i++) { @@ -115,8 +116,7 @@ int32_t ipWhiteMgtUpdate(SMnode *pMnode, char *user, SIpWhiteList *pNew) { // for (int i = 0; i < taosArrayGetSize(pUserNames); i++) { // taosMemoryFree(taosArrayGetP(pUserNames, i)); // } - //taosArrayDestroy(pUserNames); - + // taosArrayDestroy(pUserNames); if (update) ipWhiteMgt.ver++; @@ -302,7 +302,7 @@ int32_t mndRefreshUserIpWhiteList(SMnode *pMnode) { return 0; } -void mndUpdateIpWhite(SMnode *pMnode, char *user, char *fqdn, int8_t type, int8_t lock) { +void mndUpdateIpWhiteForAllUser(SMnode *pMnode, char *user, char *fqdn, int8_t type, int8_t lock) { if (lock) { taosThreadRwlockWrlock(&ipWhiteMgt.rw); if (ipWhiteMgt.ver == 0) { @@ -313,6 +313,20 @@ void mndUpdateIpWhite(SMnode *pMnode, char *user, char *fqdn, int8_t type, int8_ } bool update = mndUpdateIpWhiteImpl(ipWhiteMgt.pIpWhiteTab, user, fqdn, type); + + void *pIter = taosHashIterate(ipWhiteMgt.pIpWhiteTab, NULL); + while (pIter) { + size_t klen = 0; + char *key = taosHashGetKey(pIter, &klen); + + char *keyDup = taosMemoryCalloc(1, klen + 1); + memcpy(keyDup, key, klen); + update |= mndUpdateIpWhiteImpl(ipWhiteMgt.pIpWhiteTab, keyDup, fqdn, type); + taosMemoryFree(keyDup); + + pIter = taosHashIterate(ipWhiteMgt.pIpWhiteTab, pIter); + } + if (update) ipWhiteMgt.ver++; if (lock) taosThreadRwlockUnlock(&ipWhiteMgt.rw); diff --git a/source/dnode/snode/src/snodeInitApi.c b/source/dnode/snode/src/snodeInitApi.c index e737e3fa37..389137f630 100644 --- a/source/dnode/snode/src/snodeInitApi.c +++ b/source/dnode/snode/src/snodeInitApi.c @@ -29,13 +29,12 @@ void initStateStoreAPI(SStateStore* pStore) { pStore->streamFileStateInit = streamFileStateInit; pStore->updateInfoDestoryColseWinSBF = updateInfoDestoryColseWinSBF; - pStore->streamStateGetByPos = streamStateGetByPos; - pStore->streamStatePutParName = streamStatePutParName; pStore->streamStateGetParName = streamStateGetParName; pStore->streamStateAddIfNotExist = streamStateAddIfNotExist; pStore->streamStateReleaseBuf = streamStateReleaseBuf; + pStore->streamStateClearBuff = streamStateClearBuff; pStore->streamStateFreeVal = streamStateFreeVal; pStore->streamStatePut = streamStatePut; @@ -91,8 +90,6 @@ void initStateStoreAPI(SStateStore* pStore) { pStore->streamStateSessionSeekKeyCurrentPrev = streamStateSessionSeekKeyCurrentPrev; pStore->streamStateSessionSeekKeyCurrentNext = streamStateSessionSeekKeyCurrentNext; - pStore->streamFileStateInit = streamFileStateInit; - pStore->streamFileStateDestroy = streamFileStateDestroy; pStore->streamFileStateClear = streamFileStateClear; pStore->needClearDiskBuff = needClearDiskBuff; diff --git a/source/dnode/vnode/inc/vnode.h b/source/dnode/vnode/inc/vnode.h index 85d840ccf5..76ed0f4ed0 100644 --- a/source/dnode/vnode/inc/vnode.h +++ b/source/dnode/vnode/inc/vnode.h @@ -132,9 +132,10 @@ tb_uid_t metaGetTableEntryUidByName(SMeta *pMeta, const char *name); int32_t metaGetCachedTbGroup(void *pVnode, tb_uid_t suid, const uint8_t *pKey, int32_t keyLen, SArray **pList); int32_t metaPutTbGroupToCache(void *pVnode, uint64_t suid, const void *pKey, int32_t keyLen, void *pPayload, int32_t payloadLen); -bool metaTbInFilterCache(void *pVnode, tb_uid_t suid, int8_t type); -int32_t metaPutTbToFilterCache(void *pVnode, tb_uid_t suid, int8_t type); -int32_t metaSizeOfTbFilterCache(void *pVnode, int8_t type); +bool metaTbInFilterCache(SMeta *pMeta, const void* key, int8_t type); +int32_t metaPutTbToFilterCache(SMeta *pMeta, const void* key, int8_t type); +int32_t metaSizeOfTbFilterCache(SMeta *pMeta, int8_t type); +int32_t metaInitTbFilterCache(SMeta *pMeta); int32_t metaGetStbStats(void *pVnode, int64_t uid, int64_t *numOfTables, int32_t *numOfCols); diff --git a/source/dnode/vnode/src/meta/metaCache.c b/source/dnode/vnode/src/meta/metaCache.c index 6918634b5d..ceb72aa14d 100644 --- a/source/dnode/vnode/src/meta/metaCache.c +++ b/source/dnode/vnode/src/meta/metaCache.c @@ -14,6 +14,13 @@ */ #include "meta.h" +#ifdef TD_ENTERPRISE +extern const char* tkLogStb[]; +extern const char* tkAuditStb[]; +extern const int tkLogStbNum; +extern const int tkAuditStbNum; +#endif + #define TAG_FILTER_RES_KEY_LEN 32 #define META_CACHE_BASE_BUCKET 1024 #define META_CACHE_STATS_BUCKET 16 @@ -69,6 +76,7 @@ struct SMetaCache { struct STbFilterCache { SHashObj* pStb; + SHashObj* pStbName; } STbFilterCache; }; @@ -178,6 +186,13 @@ int32_t metaCacheOpen(SMeta* pMeta) { goto _err2; } + pCache->STbFilterCache.pStbName = + taosHashInit(0, taosGetDefaultHashFunction(TSDB_DATA_TYPE_VARCHAR), false, HASH_NO_LOCK); + if (pCache->STbFilterCache.pStbName == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _err2; + } + pMeta->pCache = pCache; return code; @@ -204,6 +219,7 @@ void metaCacheClose(SMeta* pMeta) { taosHashCleanup(pMeta->pCache->STbGroupResCache.pTableEntry); taosHashCleanup(pMeta->pCache->STbFilterCache.pStb); + taosHashCleanup(pMeta->pCache->STbFilterCache.pStbName); taosMemoryFree(pMeta->pCache); pMeta->pCache = NULL; @@ -893,30 +909,59 @@ int32_t metaTbGroupCacheClear(SMeta* pMeta, uint64_t suid) { return TSDB_CODE_SUCCESS; } -bool metaTbInFilterCache(void* pVnode, tb_uid_t suid, int8_t type) { - SMeta* pMeta = ((SVnode*)pVnode)->pMeta; +bool metaTbInFilterCache(SMeta *pMeta, const void* key, int8_t type) { + if (type == 0 && taosHashGet(pMeta->pCache->STbFilterCache.pStb, key, sizeof(tb_uid_t))) { + return true; + } - if (type == 0 && taosHashGet(pMeta->pCache->STbFilterCache.pStb, &suid, sizeof(suid))) { + if (type == 1 && taosHashGet(pMeta->pCache->STbFilterCache.pStbName, key, strlen(key))) { return true; } return false; } -int32_t metaPutTbToFilterCache(void* pVnode, tb_uid_t suid, int8_t type) { - SMeta* pMeta = ((SVnode*)pVnode)->pMeta; - +int32_t metaPutTbToFilterCache(SMeta *pMeta, const void* key, int8_t type) { if (type == 0) { - return taosHashPut(pMeta->pCache->STbFilterCache.pStb, &suid, sizeof(suid), NULL, 0); + return taosHashPut(pMeta->pCache->STbFilterCache.pStb, key, sizeof(tb_uid_t), NULL, 0); + } + + if (type == 1) { + return taosHashPut(pMeta->pCache->STbFilterCache.pStbName, key, strlen(key), NULL, 0); } return 0; } -int32_t metaSizeOfTbFilterCache(void* pVnode, int8_t type) { - SMeta* pMeta = ((SVnode*)pVnode)->pMeta; +int32_t metaSizeOfTbFilterCache(SMeta *pMeta, int8_t type) { if (type == 0) { return taosHashGetSize(pMeta->pCache->STbFilterCache.pStb); } return 0; -} \ No newline at end of file +} + +int32_t metaInitTbFilterCache(SMeta* pMeta) { +#ifdef TD_ENTERPRISE + int32_t tbNum = 0; + const char** pTbArr = NULL; + const char* dbName = NULL; + + if (!(dbName = strchr(pMeta->pVnode->config.dbname, '.'))) return 0; + if (0 == strncmp(++dbName, "log", TSDB_DB_NAME_LEN)) { + tbNum = tkLogStbNum; + pTbArr = (const char**)&tkLogStb; + } else if (0 == strncmp(dbName, "audit", TSDB_DB_NAME_LEN)) { + tbNum = tkAuditStbNum; + pTbArr = (const char**)&tkAuditStb; + } + if (tbNum && pTbArr) { + for (int32_t i = 0; i < tbNum; ++i) { + if (metaPutTbToFilterCache(pMeta, pTbArr[i], 1) != 0) { + return terrno ? terrno : -1; + } + } + } +#else +#endif + return 0; +} diff --git a/source/dnode/vnode/src/meta/metaOpen.c b/source/dnode/vnode/src/meta/metaOpen.c index 3d445acd67..8cab17c417 100644 --- a/source/dnode/vnode/src/meta/metaOpen.c +++ b/source/dnode/vnode/src/meta/metaOpen.c @@ -176,6 +176,10 @@ int metaOpen(SVnode *pVnode, SMeta **ppMeta, int8_t rollback) { goto _err; } + if (metaInitTbFilterCache(pMeta) != 0) { + goto _err; + } + metaDebug("vgId:%d, meta is opened", TD_VID(pVnode)); *ppMeta = pMeta; diff --git a/source/dnode/vnode/src/meta/metaTable.c b/source/dnode/vnode/src/meta/metaTable.c index f065fe3268..442a739076 100644 --- a/source/dnode/vnode/src/meta/metaTable.c +++ b/source/dnode/vnode/src/meta/metaTable.c @@ -15,7 +15,7 @@ #include "meta.h" -extern tsem_t dmNotifySem; +extern SDmNotifyHandle dmNotifyHdl; static int metaSaveJsonVarToIdx(SMeta *pMeta, const SMetaEntry *pCtbEntry, const SSchema *pSchema); static int metaDelJsonVarFromIdx(SMeta *pMeta, const SMetaEntry *pCtbEntry, const SSchema *pSchema); @@ -28,7 +28,7 @@ static int metaSaveToSkmDb(SMeta *pMeta, const SMetaEntry *pME); static int metaUpdateCtbIdx(SMeta *pMeta, const SMetaEntry *pME); static int metaUpdateSuidIdx(SMeta *pMeta, const SMetaEntry *pME); static int metaUpdateTagIdx(SMeta *pMeta, const SMetaEntry *pCtbEntry); -static int metaDropTableByUid(SMeta *pMeta, tb_uid_t uid, int *type, tb_uid_t *pSuid); +static int metaDropTableByUid(SMeta *pMeta, tb_uid_t uid, int *type, tb_uid_t *pSuid, int8_t *pSysTbl); static void metaDestroyTagIdxKey(STagIdxKey *pTagIdxKey); // opt ins_tables query static int metaUpdateBtimeIdx(SMeta *pMeta, const SMetaEntry *pME); @@ -195,10 +195,14 @@ int metaDelJsonVarFromIdx(SMeta *pMeta, const SMetaEntry *pCtbEntry, const SSche } static inline void metaTimeSeriesNotifyCheck(SMeta *pMeta) { -#if defined(TD_ENTERPRISE) && !defined(_TD_DARWIN_64) +#if defined(TD_ENTERPRISE) int64_t nTimeSeries = metaGetTimeSeriesNum(pMeta, 0); int64_t deltaTS = nTimeSeries - pMeta->pVnode->config.vndStats.numOfReportedTimeSeries; - if (deltaTS > tsTimeSeriesThreshold) tsem_post(&dmNotifySem); + if (deltaTS > tsTimeSeriesThreshold) { + if (0 == atomic_val_compare_exchange_8(&dmNotifyHdl.state, 1, 2)) { + tsem_post(&dmNotifyHdl.sem); + } + } #endif } @@ -303,7 +307,7 @@ int metaDropSTable(SMeta *pMeta, int64_t verison, SVDropStbReq *pReq, SArray *tb for (int32_t iChild = 0; iChild < taosArrayGetSize(tbUidList); iChild++) { tb_uid_t uid = *(tb_uid_t *)taosArrayGet(tbUidList, iChild); - metaDropTableByUid(pMeta, uid, NULL, NULL); + metaDropTableByUid(pMeta, uid, NULL, NULL, NULL); } // drop super table @@ -392,6 +396,7 @@ int metaAlterSTable(SMeta *pMeta, int64_t version, SVCreateStbReq *pReq) { nStbEntry.stbEntry.schemaTag = pReq->schemaTag; int32_t deltaCol = pReq->schemaRow.nCols - oStbEntry.stbEntry.schemaRow.nCols; + bool updStat = deltaCol != 0 && !metaTbInFilterCache(pMeta, pReq->name, 1); metaWLock(pMeta); // compare two entry @@ -407,15 +412,16 @@ int metaAlterSTable(SMeta *pMeta, int64_t version, SVCreateStbReq *pReq) { // metaStatsCacheDrop(pMeta, nStbEntry.uid); - if (deltaCol != 0) { + if (updStat) { metaUpdateStbStats(pMeta, pReq->suid, 0, deltaCol); } metaULock(pMeta); - if (deltaCol != 0) { + if (updStat) { int64_t ctbNum; metaGetStbStats(pMeta->pVnode, pReq->suid, &ctbNum, NULL); pMeta->pVnode->config.vndStats.numOfTimeSeries += (ctbNum * deltaCol); + metaTimeSeriesNotifyCheck(pMeta); } _exit: @@ -760,6 +766,8 @@ int metaCreateTable(SMeta *pMeta, int64_t ver, SVCreateTbReq *pReq, STableMetaRs } metaReaderClear(&mr); + bool sysTbl = (pReq->type == TSDB_CHILD_TABLE) && metaTbInFilterCache(pMeta, pReq->ctb.stbName, 1); + // build SMetaEntry SVnodeStats *pStats = &pMeta->pVnode->config.vndStats; me.version = ver; @@ -796,9 +804,12 @@ int metaCreateTable(SMeta *pMeta, int64_t ver, SVCreateTbReq *pReq, STableMetaRs #endif ++pStats->numOfCTables; - int32_t nCols = 0; - metaGetStbStats(pMeta->pVnode, me.ctbEntry.suid, 0, &nCols); - pStats->numOfTimeSeries += nCols - 1; + + if (!sysTbl) { + int32_t nCols = 0; + metaGetStbStats(pMeta->pVnode, me.ctbEntry.suid, 0, &nCols); + pStats->numOfTimeSeries += nCols - 1; + } metaWLock(pMeta); metaUpdateStbStats(pMeta, me.ctbEntry.suid, 1, 0); @@ -852,6 +863,7 @@ int metaDropTable(SMeta *pMeta, int64_t version, SVDropTbReq *pReq, SArray *tbUi int rc = 0; tb_uid_t uid = 0; tb_uid_t suid = 0; + int8_t sysTbl = 0; int type; rc = tdbTbGet(pMeta->pNameIdx, pReq->name, strlen(pReq->name) + 1, &pData, &nData); @@ -862,12 +874,12 @@ int metaDropTable(SMeta *pMeta, int64_t version, SVDropTbReq *pReq, SArray *tbUi uid = *(tb_uid_t *)pData; metaWLock(pMeta); - rc = metaDropTableByUid(pMeta, uid, &type, &suid); + rc = metaDropTableByUid(pMeta, uid, &type, &suid, &sysTbl); metaULock(pMeta); if (rc < 0) goto _exit; - if (type == TSDB_CHILD_TABLE) { + if (!sysTbl && type == TSDB_CHILD_TABLE) { int32_t nCols = 0; SVnodeStats *pStats = &pMeta->pVnode->config.vndStats; if (metaGetStbStats(pMeta->pVnode, suid, NULL, &nCols) == 0) { @@ -898,9 +910,10 @@ void metaDropTables(SMeta *pMeta, SArray *tbUids) { for (int i = 0; i < taosArrayGetSize(tbUids); ++i) { tb_uid_t uid = *(tb_uid_t *)taosArrayGet(tbUids, i); tb_uid_t suid = 0; + int8_t sysTbl = 0; int type; - metaDropTableByUid(pMeta, uid, &type, &suid); - if (type == TSDB_CHILD_TABLE && suid != 0 && suidHash) { + metaDropTableByUid(pMeta, uid, &type, &suid, &sysTbl); + if (!sysTbl && type == TSDB_CHILD_TABLE && suid != 0 && suidHash) { int64_t *pVal = tSimpleHashGet(suidHash, &suid, sizeof(tb_uid_t)); if (pVal) { nCtbDropped = *pVal + 1; @@ -1059,7 +1072,7 @@ static int metaDeleteTtl(SMeta *pMeta, const SMetaEntry *pME) { return ttlMgrDeleteTtl(pMeta->pTtlMgr, &ctx); } -static int metaDropTableByUid(SMeta *pMeta, tb_uid_t uid, int *type, tb_uid_t *pSuid) { +static int metaDropTableByUid(SMeta *pMeta, tb_uid_t uid, int *type, tb_uid_t *pSuid, int8_t* pSysTbl) { void *pData = NULL; int nData = 0; int rc = 0; @@ -1088,7 +1101,6 @@ static int metaDropTableByUid(SMeta *pMeta, tb_uid_t uid, int *type, tb_uid_t *p void *tData = NULL; int tLen = 0; - if (tdbTbGet(pMeta->pUidIdx, &e.ctbEntry.suid, sizeof(tb_uid_t), &tData, &tLen) == 0) { STbDbKey tbDbKey = {.uid = e.ctbEntry.suid, .version = ((SUidIdxVal *)tData)[0].version}; if (tdbTbGet(pMeta->pTbDb, &tbDbKey, sizeof(tbDbKey), &tData, &tLen) == 0) { @@ -1098,6 +1110,8 @@ static int metaDropTableByUid(SMeta *pMeta, tb_uid_t uid, int *type, tb_uid_t *p tDecoderInit(&tdc, tData, tLen); metaDecodeEntry(&tdc, &stbEntry); + if (pSysTbl) *pSysTbl = metaTbInFilterCache(pMeta, stbEntry.name, 1) ? 1 : 0; + SSchema *pTagColumn = NULL; SSchemaWrapper *pTagSchema = &stbEntry.stbEntry.schemaTag; if (pTagSchema->nCols == 1 && pTagSchema->pSchema[0].type == TSDB_DATA_TYPE_JSON) { diff --git a/source/dnode/vnode/src/tsdb/tsdbCache.c b/source/dnode/vnode/src/tsdb/tsdbCache.c index d1cb3e2005..ca3fb7027f 100644 --- a/source/dnode/vnode/src/tsdb/tsdbCache.c +++ b/source/dnode/vnode/src/tsdb/tsdbCache.c @@ -57,7 +57,7 @@ static int32_t tsdbOpenBCache(STsdb *pTsdb) { // SLRUCache *pCache = taosLRUCacheInit(10 * 1024 * 1024, 0, .5); int32_t szPage = pTsdb->pVnode->config.tsdbPageSize; - SLRUCache *pCache = taosLRUCacheInit(tsS3BlockCacheSize * tsS3BlockSize * szPage, 0, .5); + SLRUCache *pCache = taosLRUCacheInit((int64_t)tsS3BlockCacheSize * tsS3BlockSize * szPage, 0, .5); if (pCache == NULL) { code = TSDB_CODE_OUT_OF_MEMORY; goto _err; @@ -3100,7 +3100,7 @@ int32_t tsdbCacheGetBlockS3(SLRUCache *pCache, STsdbFD *pFD, LRUHandle **handle) taosThreadMutexUnlock(&pTsdb->bMutex); *handle = NULL; - if (!pBlock) { + if (code == TSDB_CODE_SUCCESS && !pBlock) { code = TSDB_CODE_OUT_OF_MEMORY; } return code; diff --git a/source/dnode/vnode/src/tsdb/tsdbReaderWriter.c b/source/dnode/vnode/src/tsdb/tsdbReaderWriter.c index 4a37ff3ad2..8c3efda856 100644 --- a/source/dnode/vnode/src/tsdb/tsdbReaderWriter.c +++ b/source/dnode/vnode/src/tsdb/tsdbReaderWriter.c @@ -63,7 +63,7 @@ static int32_t tsdbOpenFileImpl(STsdbFD *pFD) { } // not check file size when reading data files. - if (flag != TD_FILE_READ) { + if (flag != TD_FILE_READ && !pFD->s3File) { if (taosStatFile(path, &pFD->szFile, NULL, NULL) < 0) { code = TAOS_SYSTEM_ERROR(errno); // taosMemoryFree(pFD->pBuf); @@ -130,6 +130,9 @@ static int32_t tsdbWriteFilePage(STsdbFD *pFD) { } } + if (pFD->s3File) { + return code; + } if (pFD->pgno > 0) { int64_t n = taosLSeekFile(pFD->pFD, PAGE_OFFSET(pFD->pgno, pFD->szPage), SEEK_SET); if (n < 0) { @@ -172,10 +175,10 @@ static int32_t tsdbReadFilePage(STsdbFD *pFD, int64_t pgno) { LRUHandle *handle = NULL; pFD->blkno = (pgno + tsS3BlockSize - 1) / tsS3BlockSize; - int32_t code = tsdbCacheGetBlockS3(pFD->pTsdb->bCache, pFD, &handle); + code = tsdbCacheGetBlockS3(pFD->pTsdb->bCache, pFD, &handle); if (code != TSDB_CODE_SUCCESS || handle == NULL) { tsdbBCacheRelease(pFD->pTsdb->bCache, handle); - if (!handle) { + if (code == TSDB_CODE_SUCCESS && !handle) { code = TSDB_CODE_OUT_OF_MEMORY; } goto _exit; @@ -282,6 +285,9 @@ _exit: int32_t tsdbFsyncFile(STsdbFD *pFD) { int32_t code = 0; + if (pFD->s3File) { + return code; + } code = tsdbWriteFilePage(pFD); if (code) goto _exit; diff --git a/source/dnode/vnode/src/tsdb/tsdbRetention.c b/source/dnode/vnode/src/tsdb/tsdbRetention.c index cb53876d97..c3b1a18fd8 100644 --- a/source/dnode/vnode/src/tsdb/tsdbRetention.c +++ b/source/dnode/vnode/src/tsdb/tsdbRetention.c @@ -372,6 +372,14 @@ static int32_t tsdbDoRetention2(void *arg) { _exit: if (code) { + if (TARRAY2_DATA(rtner->fopArr)) { + TARRAY2_DESTROY(rtner->fopArr, NULL); + } + TFileSetArray **fsetArr = &rtner->fsetArr; + if (fsetArr[0]) { + tsdbFSDestroyCopySnapshot(&rtner->fsetArr); + } + TSDB_ERROR_LOG(TD_VID(rtner->tsdb->pVnode), lino, code); } return code; diff --git a/source/dnode/vnode/src/vnd/vnodeCos.c b/source/dnode/vnode/src/vnd/vnodeCos.c index 15ecff250d..5f650be97d 100644 --- a/source/dnode/vnode/src/vnd/vnodeCos.c +++ b/source/dnode/vnode/src/vnd/vnodeCos.c @@ -111,15 +111,14 @@ int32_t s3PutObjectFromFile2(const char *file_str, const char *object_str) { clt_params = cos_create_resumable_clt_params_content(p, 1024 * 1024, 8, COS_FALSE, NULL); s = cos_resumable_upload_file(options, &bucket, &object, &file, headers, NULL, clt_params, NULL, &resp_headers, NULL); + log_status(s); if (!cos_status_is_ok(s)) { - vError("s3: %s", s->error_msg); + vError("s3: %d(%s)", s->code, s->error_msg); vError("%s failed at line %d since %s", __func__, __LINE__, tstrerror(terrno)); - code = terrno; + code = TAOS_SYSTEM_ERROR(EIO); return code; } - log_status(s); - cos_pool_destroy(p); if (s->code != 200) { @@ -303,7 +302,7 @@ int32_t s3GetObjectBlock(const char *object_name, int64_t offset, int64_t block_ s = cos_get_object_to_buffer(options, &bucket, &object, headers, NULL, &download_buffer, &resp_headers); log_status(s); if (!cos_status_is_ok(s)) { - vError("s3: %s", s->error_msg); + vError("s3: %d(%s)", s->code, s->error_msg); vError("%s failed at line %d since %s", __func__, __LINE__, tstrerror(terrno)); code = TAOS_SYSTEM_ERROR(EIO); return code; diff --git a/source/dnode/vnode/src/vnd/vnodeInitApi.c b/source/dnode/vnode/src/vnd/vnodeInitApi.c index c72ecd4824..72e5e9ca88 100644 --- a/source/dnode/vnode/src/vnd/vnodeInitApi.c +++ b/source/dnode/vnode/src/vnd/vnodeInitApi.c @@ -137,13 +137,12 @@ void initStateStoreAPI(SStateStore* pStore) { pStore->streamFileStateInit = streamFileStateInit; pStore->updateInfoDestoryColseWinSBF = updateInfoDestoryColseWinSBF; - pStore->streamStateGetByPos = streamStateGetByPos; - pStore->streamStatePutParName = streamStatePutParName; pStore->streamStateGetParName = streamStateGetParName; pStore->streamStateAddIfNotExist = streamStateAddIfNotExist; pStore->streamStateReleaseBuf = streamStateReleaseBuf; + pStore->streamStateClearBuff = streamStateClearBuff; pStore->streamStateFreeVal = streamStateFreeVal; pStore->streamStatePut = streamStatePut; @@ -199,8 +198,6 @@ void initStateStoreAPI(SStateStore* pStore) { pStore->streamStateSessionSeekKeyCurrentPrev = streamStateSessionSeekKeyCurrentPrev; pStore->streamStateSessionSeekKeyCurrentNext = streamStateSessionSeekKeyCurrentNext; - pStore->streamFileStateInit = streamFileStateInit; - pStore->streamFileStateDestroy = streamFileStateDestroy; pStore->streamFileStateClear = streamFileStateClear; pStore->needClearDiskBuff = needClearDiskBuff; diff --git a/source/dnode/vnode/src/vnd/vnodeQuery.c b/source/dnode/vnode/src/vnd/vnodeQuery.c index 01292f33e4..e9dbc5e659 100644 --- a/source/dnode/vnode/src/vnd/vnodeQuery.c +++ b/source/dnode/vnode/src/vnd/vnodeQuery.c @@ -566,42 +566,55 @@ int32_t vnodeGetStbColumnNum(SVnode *pVnode, tb_uid_t suid, int *num) { } #ifdef TD_ENTERPRISE -#define TK_LOG_STB_NUM 19 -static const char *tkLogStb[TK_LOG_STB_NUM] = {"cluster_info", - "data_dir", - "dnodes_info", - "d_info", - "grants_info", - "keeper_monitor", - "logs", - "log_dir", - "log_summary", - "m_info", - "taosadapter_restful_http_request_fail", - "taosadapter_restful_http_request_in_flight", - "taosadapter_restful_http_request_summary_milliseconds", - "taosadapter_restful_http_request_total", - "taosadapter_system_cpu_percent", - "taosadapter_system_mem_percent", - "temp_dir", - "vgroups_info", - "vnodes_role"}; +const char *tkLogStb[] = {"cluster_info", + "data_dir", + "dnodes_info", + "d_info", + "grants_info", + "keeper_monitor", + "logs", + "log_dir", + "log_summary", + "m_info", + "taosadapter_restful_http_request_fail", + "taosadapter_restful_http_request_in_flight", + "taosadapter_restful_http_request_summary_milliseconds", + "taosadapter_restful_http_request_total", + "taosadapter_system_cpu_percent", + "taosadapter_system_mem_percent", + "temp_dir", + "vgroups_info", + "vnodes_role"}; +const char *tkAuditStb[] = {"operations"}; +const int tkLogStbNum = ARRAY_SIZE(tkLogStb); +const int tkAuditStbNum = ARRAY_SIZE(tkAuditStb); // exclude stbs of taoskeeper log static int32_t vnodeGetTimeSeriesBlackList(SVnode *pVnode) { - char *dbName = strchr(pVnode->config.dbname, '.'); - if (!dbName || 0 != strncmp(++dbName, "log", TSDB_DB_NAME_LEN)) { - return 0; + int32_t tbSize = 0; + int32_t tbNum = 0; + const char **pTbArr = NULL; + const char *dbName = NULL; + + if (!(dbName = strchr(pVnode->config.dbname, '.'))) return 0; + if (0 == strncmp(++dbName, "log", TSDB_DB_NAME_LEN)) { + tbNum = tkLogStbNum; + pTbArr = (const char **)&tkLogStb; + } else if (0 == strncmp(dbName, "audit", TSDB_DB_NAME_LEN)) { + tbNum = tkAuditStbNum; + pTbArr = (const char **)&tkAuditStb; } - int32_t tbSize = metaSizeOfTbFilterCache(pVnode, 0); - if (tbSize < TK_LOG_STB_NUM) { - for (int32_t i = 0; i < TK_LOG_STB_NUM; ++i) { - tb_uid_t suid = metaGetTableEntryUidByName(pVnode->pMeta, tkLogStb[i]); - if (suid != 0) { - metaPutTbToFilterCache(pVnode, suid, 0); + if (tbNum && pTbArr) { + tbSize = metaSizeOfTbFilterCache(pVnode->pMeta, 0); + if (tbSize < tbNum) { + for (int32_t i = 0; i < tbNum; ++i) { + tb_uid_t suid = metaGetTableEntryUidByName(pVnode->pMeta, pTbArr[i]); + if (suid != 0) { + metaPutTbToFilterCache(pVnode->pMeta, &suid, 0); + } } + tbSize = metaSizeOfTbFilterCache(pVnode->pMeta, 0); } - tbSize = metaSizeOfTbFilterCache(pVnode, 0); } return tbSize; @@ -611,7 +624,7 @@ static int32_t vnodeGetTimeSeriesBlackList(SVnode *pVnode) { static bool vnodeTimeSeriesFilter(void *arg1, void *arg2) { SVnode *pVnode = (SVnode *)arg1; - if (metaTbInFilterCache(pVnode, *(tb_uid_t *)(arg2), 0)) { + if (metaTbInFilterCache(pVnode->pMeta, arg2, 0)) { return true; } return false; @@ -626,9 +639,9 @@ int32_t vnodeGetTimeSeriesNum(SVnode *pVnode, int64_t *num) { } int32_t tbFilterSize = 0; - #ifdef TD_ENTERPRISE +#ifdef TD_ENTERPRISE tbFilterSize = vnodeGetTimeSeriesBlackList(pVnode); - #endif +#endif if ((!tbFilterSize && vnodeGetStbIdList(pVnode, 0, suidList) < 0) || (tbFilterSize && vnodeGetStbIdListByFilter(pVnode, 0, suidList, vnodeTimeSeriesFilter, pVnode) < 0)) { diff --git a/source/libs/executor/inc/executorInt.h b/source/libs/executor/inc/executorInt.h index 8726f57977..d5d144ee65 100644 --- a/source/libs/executor/inc/executorInt.h +++ b/source/libs/executor/inc/executorInt.h @@ -548,9 +548,9 @@ typedef struct SWindowRowsSup { } SWindowRowsSup; typedef struct SResultWindowInfo { - void* pOutputBuf; - SSessionKey sessionWin; - bool isOutput; + SRowBuffPos* pStatePos; + SSessionKey sessionWin; + bool isOutput; } SResultWindowInfo; typedef struct SStreamSessionAggOperatorInfo { @@ -579,6 +579,7 @@ typedef struct SStreamSessionAggOperatorInfo { bool isHistoryOp; bool reCkBlock; SSDataBlock* pCheckpointRes; + bool clearState; } SStreamSessionAggOperatorInfo; typedef struct SStreamStateAggOperatorInfo { @@ -672,8 +673,6 @@ void cleanupAggSup(SAggSupporter* pAggSup); void initResultSizeInfo(SResultInfo* pResultInfo, int32_t numOfRows); -void doBuildStreamResBlock(struct SOperatorInfo* pOperator, SOptrBasicInfo* pbInfo, SGroupResInfo* pGroupResInfo, - SDiskbasedBuf* pBuf); void doBuildResultDatablock(struct SOperatorInfo* pOperator, SOptrBasicInfo* pbInfo, SGroupResInfo* pGroupResInfo, SDiskbasedBuf* pBuf); @@ -739,12 +738,6 @@ int32_t finalizeResultRows(SDiskbasedBuf* pBuf, SResultRowPosition* resultRowPos SSDataBlock* pBlock, SExecTaskInfo* pTaskInfo); bool groupbyTbname(SNodeList* pGroupList); -int32_t buildDataBlockFromGroupRes(struct SOperatorInfo* pOperator, void* pState, SSDataBlock* pBlock, SExprSupp* pSup, - SGroupResInfo* pGroupResInfo); -int32_t saveSessionDiscBuf(void* pState, SSessionKey* key, void* buf, int32_t size, SStateStore* pAPI); -int32_t buildSessionResultDataBlock(struct SOperatorInfo* pOperator, void* pState, SSDataBlock* pBlock, SExprSupp* pSup, - SGroupResInfo* pGroupResInfo); -int32_t releaseOutputBuf(void* pState, SWinKey* pKey, SResultRow* pResult, SStateStore* pAPI); void getNextIntervalWindow(SInterval* pInterval, STimeWindow* tw, int32_t order); int32_t getForwardStepsInBlock(int32_t numOfRows, __block_search_fn_t searchFn, TSKEY ekey, int32_t pos, int32_t order, int64_t* pData); diff --git a/source/libs/executor/src/executorInt.c b/source/libs/executor/src/executorInt.c index 519a308c3a..8ad174f366 100644 --- a/source/libs/executor/src/executorInt.c +++ b/source/libs/executor/src/executorInt.c @@ -720,38 +720,6 @@ int32_t doCopyToSDataBlock(SExecTaskInfo* pTaskInfo, SSDataBlock* pBlock, SExprS return 0; } -void doBuildStreamResBlock(SOperatorInfo* pOperator, SOptrBasicInfo* pbInfo, SGroupResInfo* pGroupResInfo, - SDiskbasedBuf* pBuf) { - SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo; - SStorageAPI* pAPI = &pTaskInfo->storageAPI; - - SSDataBlock* pBlock = pbInfo->pRes; - - // set output datablock version - pBlock->info.version = pTaskInfo->version; - - blockDataCleanup(pBlock); - if (!hasRemainResults(pGroupResInfo)) { - return; - } - - // clear the existed group id - pBlock->info.id.groupId = 0; - ASSERT(!pbInfo->mergeResultBlock); - doCopyToSDataBlock(pTaskInfo, pBlock, &pOperator->exprSupp, pBuf, pGroupResInfo, pOperator->resultInfo.threshold, - false); - - void* tbname = NULL; - if (pAPI->stateStore.streamStateGetParName((void*)pTaskInfo->streamInfo.pState, pBlock->info.id.groupId, &tbname) < - 0) { - pBlock->info.parTbName[0] = 0; - } else { - memcpy(pBlock->info.parTbName, tbname, TSDB_TABLE_NAME_LEN); - } - - pAPI->stateStore.streamStateFreeVal(tbname); -} - void doBuildResultDatablock(SOperatorInfo* pOperator, SOptrBasicInfo* pbInfo, SGroupResInfo* pGroupResInfo, SDiskbasedBuf* pBuf) { SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo; @@ -960,109 +928,6 @@ int32_t createDataSinkParam(SDataSinkNode* pNode, void** pParam, SExecTaskInfo* return TSDB_CODE_SUCCESS; } -int32_t releaseOutputBuf(void* pState, SWinKey* pKey, SResultRow* pResult, SStateStore* pAPI) { - pAPI->streamStateReleaseBuf(pState, pKey, pResult); - return TSDB_CODE_SUCCESS; -} - -int32_t saveSessionDiscBuf(void* pState, SSessionKey* key, void* buf, int32_t size, SStateStore* pAPI) { - pAPI->streamStateSessionPut(pState, key, (const void*)buf, size); - releaseOutputBuf(pState, NULL, (SResultRow*)buf, pAPI); - return TSDB_CODE_SUCCESS; -} - -int32_t buildSessionResultDataBlock(SOperatorInfo* pOperator, void* pState, SSDataBlock* pBlock, SExprSupp* pSup, - SGroupResInfo* pGroupResInfo) { - SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo; - SStorageAPI* pAPI = &pTaskInfo->storageAPI; - - SExprInfo* pExprInfo = pSup->pExprInfo; - int32_t numOfExprs = pSup->numOfExprs; - int32_t* rowEntryOffset = pSup->rowEntryInfoOffset; - SqlFunctionCtx* pCtx = pSup->pCtx; - - int32_t numOfRows = getNumOfTotalRes(pGroupResInfo); - - for (int32_t i = pGroupResInfo->index; i < numOfRows; i += 1) { - SSessionKey* pKey = taosArrayGet(pGroupResInfo->pRows, i); - int32_t size = 0; - void* pVal = NULL; - int32_t code = pAPI->stateStore.streamStateSessionGet(pState, pKey, &pVal, &size); - // ASSERT(code == 0); - if (code == -1) { - // for history - qWarn("===stream===not found session result key:%" PRId64 ", ekey:%" PRId64 ", groupId:%" PRIu64 "", - pKey->win.skey, pKey->win.ekey, pKey->groupId); - pGroupResInfo->index += 1; - continue; - } - SResultRow* pRow = (SResultRow*)pVal; - doUpdateNumOfRows(pCtx, pRow, numOfExprs, rowEntryOffset); - // no results, continue to check the next one - if (pRow->numOfRows == 0) { - pGroupResInfo->index += 1; - releaseOutputBuf(pState, NULL, pRow, &pAPI->stateStore); - continue; - } - - if (pBlock->info.id.groupId == 0) { - pBlock->info.id.groupId = pKey->groupId; - - void* tbname = NULL; - if (pAPI->stateStore.streamStateGetParName((void*)pTaskInfo->streamInfo.pState, pBlock->info.id.groupId, - &tbname) < 0) { - pBlock->info.parTbName[0] = 0; - } else { - memcpy(pBlock->info.parTbName, tbname, TSDB_TABLE_NAME_LEN); - } - pAPI->stateStore.streamStateFreeVal(tbname); - } else { - // current value belongs to different group, it can't be packed into one datablock - if (pBlock->info.id.groupId != pKey->groupId) { - releaseOutputBuf(pState, NULL, pRow, &pAPI->stateStore); - break; - } - } - - if (pBlock->info.rows + pRow->numOfRows > pBlock->info.capacity) { - ASSERT(pBlock->info.rows > 0); - releaseOutputBuf(pState, NULL, pRow, &pAPI->stateStore); - break; - } - - pGroupResInfo->index += 1; - - for (int32_t j = 0; j < numOfExprs; ++j) { - int32_t slotId = pExprInfo[j].base.resSchema.slotId; - - pCtx[j].resultInfo = getResultEntryInfo(pRow, j, rowEntryOffset); - if (pCtx[j].fpSet.finalize) { - int32_t code1 = pCtx[j].fpSet.finalize(&pCtx[j], pBlock); - if (TAOS_FAILED(code1)) { - qError("%s build result data block error, code %s", GET_TASKID(pTaskInfo), tstrerror(code1)); - T_LONG_JMP(pTaskInfo->env, code1); - } - } else if (strcmp(pCtx[j].pExpr->pExpr->_function.functionName, "_select_value") == 0) { - // do nothing, todo refactor - } else { - // expand the result into multiple rows. E.g., _wstart, top(k, 20) - // the _wstart needs to copy to 20 following rows, since the results of top-k expands to 20 different rows. - SColumnInfoData* pColInfoData = taosArrayGet(pBlock->pDataBlock, slotId); - char* in = GET_ROWCELL_INTERBUF(pCtx[j].resultInfo); - for (int32_t k = 0; k < pRow->numOfRows; ++k) { - colDataSetVal(pColInfoData, pBlock->info.rows + k, in, pCtx[j].resultInfo->isNullRes); - } - } - } - - pBlock->info.dataLoad = 1; - pBlock->info.rows += pRow->numOfRows; - releaseOutputBuf(pState, NULL, pRow, &pAPI->stateStore); - } - blockDataUpdateTsWindow(pBlock, 0); - return TSDB_CODE_SUCCESS; -} - void streamOpReleaseState(SOperatorInfo* pOperator) { SOperatorInfo* downstream = pOperator->pDownstream[0]; if (downstream->fpSet.releaseStreamStateFn) { diff --git a/source/libs/executor/src/groupoperator.c b/source/libs/executor/src/groupoperator.c index 1060dd4f0e..f6b0a87f54 100644 --- a/source/libs/executor/src/groupoperator.c +++ b/source/libs/executor/src/groupoperator.c @@ -1057,7 +1057,7 @@ void appendCreateTableRow(void* pState, SExprSupp* pTableSup, SExprSupp* pTagSup } else { memcpy(pSrcBlock->info.parTbName, pValue, TSDB_TABLE_NAME_LEN); } - pAPI->streamStateReleaseBuf(pState, NULL, pValue); + pAPI->streamStateFreeVal(pValue); } static SSDataBlock* buildStreamCreateTableResult(SOperatorInfo* pOperator) { diff --git a/source/libs/executor/src/streamtimewindowoperator.c b/source/libs/executor/src/streamtimewindowoperator.c index 1c909cb47d..c9da3c99e7 100644 --- a/source/libs/executor/src/streamtimewindowoperator.c +++ b/source/libs/executor/src/streamtimewindowoperator.c @@ -152,8 +152,7 @@ static int32_t saveResult(SResultWindowInfo winInfo, SSHashObj* pStUpdated) { } static int32_t saveWinResult(SWinKey* pKey, SRowBuffPos* pPos, SSHashObj* pUpdatedMap) { - tSimpleHashPut(pUpdatedMap, pKey, sizeof(SWinKey), &pPos, POINTER_BYTES); - return TSDB_CODE_SUCCESS; + return tSimpleHashPut(pUpdatedMap, pKey, sizeof(SWinKey), &pPos, POINTER_BYTES); } static int32_t saveWinResultInfo(TSKEY ts, uint64_t groupId, SRowBuffPos* pPos, SSHashObj* pUpdatedMap) { @@ -696,7 +695,6 @@ int32_t buildDataBlockFromGroupRes(SOperatorInfo* pOperator, void* pState, SSDat int32_t slotId = pExprInfo[j].base.resSchema.slotId; pCtx[j].resultInfo = getResultEntryInfo(pRow, j, rowEntryOffset); - SResultRowEntryInfo* pEnryInfo = pCtx[j].resultInfo; if (pCtx[j].fpSet.finalize) { int32_t code1 = pCtx[j].fpSet.finalize(&pCtx[j], pBlock); @@ -931,17 +929,6 @@ void* decodeSWinKey(void* buf, SWinKey* key) { return buf; } -int32_t encodeSRowBuffPos(void** buf, SRowBuffPos* pos) { - int32_t tlen = 0; - tlen += encodeSWinKey(buf, pos->pKey); - return tlen; -} - -void* decodeSRowBuffPos(void* buf, SRowBuffPos* pos) { - buf = decodeSWinKey(buf, pos->pKey); - return buf; -} - int32_t encodeSTimeWindowAggSupp(void** buf, STimeWindowAggSupp* pTwAggSup) { int32_t tlen = 0; tlen += taosEncodeFixedI64(buf, pTwAggSup->minTs); @@ -1155,6 +1142,18 @@ static SSDataBlock* buildIntervalResult(SOperatorInfo* pOperator) { return NULL; } +static int32_t copyUpdateResult(SSHashObj** ppWinUpdated, SArray* pUpdated, __compar_fn_t compar) { + void* pIte = NULL; + int32_t iter = 0; + while ((pIte = tSimpleHashIterate(*ppWinUpdated, pIte, &iter)) != NULL) { + taosArrayPush(pUpdated, pIte); + } + taosArraySort(pUpdated, compar); + tSimpleHashCleanup(*ppWinUpdated); + *ppWinUpdated = NULL; + return TSDB_CODE_SUCCESS; +} + static SSDataBlock* doStreamFinalIntervalAgg(SOperatorInfo* pOperator) { SStreamIntervalOperatorInfo* pInfo = pOperator->info; SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo; @@ -1311,15 +1310,7 @@ static SSDataBlock* doStreamFinalIntervalAgg(SOperatorInfo* pOperator) { } pInfo->binfo.pRes->info.watermark = pInfo->twAggSup.maxTs; - void* pIte = NULL; - int32_t iter = 0; - while ((pIte = tSimpleHashIterate(pInfo->pUpdatedMap, pIte, &iter)) != NULL) { - taosArrayPush(pInfo->pUpdated, pIte); - } - - tSimpleHashCleanup(pInfo->pUpdatedMap); - pInfo->pUpdatedMap = NULL; - taosArraySort(pInfo->pUpdated, winPosCmprImpl); + copyUpdateResult(&pInfo->pUpdatedMap, pInfo->pUpdated, winPosCmprImpl); initMultiResInfoFromArrayList(&pInfo->groupResInfo, pInfo->pUpdated); pInfo->pUpdated = NULL; @@ -1480,7 +1471,7 @@ SOperatorInfo* createStreamFinalIntervalOperatorInfo(SOperatorInfo* downstream, int32_t funResSize = getMaxFunResSize(&pOperator->exprSupp, numOfCols); pInfo->pState->pFileState = pAPI->stateStore.streamFileStateInit( tsStreamBufferSize, sizeof(SWinKey), pInfo->aggSup.resultRowSize, funResSize, compareTs, pInfo->pState, - pInfo->twAggSup.deleteMark, GET_TASKID(pTaskInfo), pHandle->checkpointId); + pInfo->twAggSup.deleteMark, GET_TASKID(pTaskInfo), pHandle->checkpointId, STREAM_STATE_BUFF_HASH); pInfo->dataVersion = 0; pInfo->stateStore = pTaskInfo->storageAPI.stateStore; pInfo->recvGetAll = false; @@ -1529,6 +1520,7 @@ void destroyStreamAggSupporter(SStreamAggSupporter* pSup) { tSimpleHashCleanup(pSup->pResultRows); destroyDiskbasedBuf(pSup->pResultBuf); blockDataDestroy(pSup->pScanBlock); + pSup->stateStore.streamFileStateDestroy(pSup->pState->pFileState); taosMemoryFreeClear(pSup->pState); taosMemoryFreeClear(pSup->pDummyCtx); } @@ -1607,10 +1599,16 @@ void initDownStream(SOperatorInfo* downstream, SStreamAggSupporter* pAggSup, uin pScanInfo->twAggSup = *pTwSup; } -int32_t initStreamAggSupporter(SStreamAggSupporter* pSup, SqlFunctionCtx* pCtx, int32_t numOfOutput, int64_t gap, +static TSKEY sesionTs(void* pKey) { + SSessionKey* pWinKey = (SSessionKey*)pKey; + return pWinKey->win.skey; +} + +int32_t initStreamAggSupporter(SStreamAggSupporter* pSup, SExprSupp* pExpSup, int32_t numOfOutput, int64_t gap, SStreamState* pState, int32_t keySize, int16_t keyType, SStateStore* pStore, - SReadHandle* pHandle, SStorageAPI* pApi) { - pSup->resultRowSize = keySize + getResultRowSize(pCtx, numOfOutput); + SReadHandle* pHandle, STimeWindowAggSupp* pTwAggSup, const char* taskIdStr, + SStorageAPI* pApi) { + pSup->resultRowSize = keySize + getResultRowSize(pExpSup->pCtx, numOfOutput); pSup->pScanBlock = createSpecialDataBlock(STREAM_CLEAR); pSup->gap = gap; pSup->stateKeySize = keySize; @@ -1622,10 +1620,14 @@ int32_t initStreamAggSupporter(SStreamAggSupporter* pSup, SqlFunctionCtx* pCtx, pSup->stateStore = *pStore; - initDummyFunction(pSup->pDummyCtx, pCtx, numOfOutput); + initDummyFunction(pSup->pDummyCtx, pExpSup->pCtx, numOfOutput); pSup->pState = taosMemoryCalloc(1, sizeof(SStreamState)); *(pSup->pState) = *pState; pSup->stateStore.streamStateSetNumber(pSup->pState, -1); + int32_t funResSize = getMaxFunResSize(pExpSup, numOfOutput); + pSup->pState->pFileState = pSup->stateStore.streamFileStateInit( + tsStreamBufferSize, sizeof(SSessionKey), pSup->resultRowSize, funResSize, sesionTs, pSup->pState, + pTwAggSup->deleteMark, taskIdStr, pHandle->checkpointId, STREAM_STATE_BUFF_SORT); _hash_fn_t hashFn = taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY); pSup->pResultRows = tSimpleHashInit(32, hashFn); @@ -1648,7 +1650,7 @@ int32_t initStreamAggSupporter(SStreamAggSupporter* pSup, SqlFunctionCtx* pCtx, int32_t code = createDiskbasedBuf(&pSup->pResultBuf, pageSize, bufSize, "function", tsTempDir); for (int32_t i = 0; i < numOfOutput; ++i) { - pCtx[i].saveHandle.pBuf = pSup->pResultBuf; + pExpSup->pCtx[i].saveHandle.pBuf = pSup->pResultBuf; } pSup->pSessionAPI = pApi; @@ -1687,33 +1689,40 @@ bool inWinRange(STimeWindow* range, STimeWindow* cur) { return false; } +int32_t clearOutputBuf(void* pState, SRowBuffPos* pPos, SStateStore* pAPI) { + return pAPI->streamStateClearBuff(pState, pPos); +} + void setSessionOutputBuf(SStreamAggSupporter* pAggSup, TSKEY startTs, TSKEY endTs, uint64_t groupId, SResultWindowInfo* pCurWin) { pCurWin->sessionWin.groupId = groupId; pCurWin->sessionWin.win.skey = startTs; pCurWin->sessionWin.win.ekey = endTs; int32_t size = pAggSup->resultRowSize; - int32_t code = pAggSup->stateStore.streamStateSessionAddIfNotExist(pAggSup->pState, &pCurWin->sessionWin, - pAggSup->gap, &pCurWin->pOutputBuf, &size); + int32_t code = pAggSup->stateStore.streamStateSessionAddIfNotExist(pAggSup->pState, &pCurWin->sessionWin, + pAggSup->gap, (void**)&pCurWin->pStatePos, &size); if (code == TSDB_CODE_SUCCESS && !inWinRange(&pAggSup->winRange, &pCurWin->sessionWin.win)) { code = TSDB_CODE_FAILED; - releaseOutputBuf(pAggSup->pState, NULL, (SResultRow*)pCurWin->pOutputBuf, &pAggSup->pSessionAPI->stateStore); - pCurWin->pOutputBuf = taosMemoryCalloc(1, size); + clearOutputBuf(pAggSup->pState, pCurWin->pStatePos, &pAggSup->pSessionAPI->stateStore); } if (code == TSDB_CODE_SUCCESS) { pCurWin->isOutput = true; - pAggSup->stateStore.streamStateSessionDel(pAggSup->pState, &pCurWin->sessionWin); + if (pCurWin->pStatePos->needFree) { + pAggSup->stateStore.streamStateSessionDel(pAggSup->pState, &pCurWin->sessionWin); + } } else { pCurWin->sessionWin.win.skey = startTs; pCurWin->sessionWin.win.ekey = endTs; } + qDebug("===stream===set session window buff .start:%" PRId64 ",end:%" PRId64 ",groupid:%" PRIu64, + pCurWin->sessionWin.win.skey, pCurWin->sessionWin.win.ekey, pCurWin->sessionWin.groupId); } int32_t getSessionWinBuf(SStreamAggSupporter* pAggSup, SStreamStateCur* pCur, SResultWindowInfo* pWinInfo) { int32_t size = 0; - int32_t code = - pAggSup->stateStore.streamStateSessionGetKVByCur(pCur, &pWinInfo->sessionWin, &pWinInfo->pOutputBuf, &size); + int32_t code = pAggSup->stateStore.streamStateSessionGetKVByCur(pCur, &pWinInfo->sessionWin, + (void**)&pWinInfo->pStatePos, &size); if (code != TSDB_CODE_SUCCESS) { return code; } @@ -1731,9 +1740,23 @@ void saveDeleteRes(SSHashObj* pStDelete, SSessionKey key) { tSimpleHashPut(pStDelete, &key, sizeof(SSessionKey), NULL, 0); } -static void removeSessionResult(SSHashObj* pHashMap, SSHashObj* pResMap, SSessionKey key) { +int32_t releaseOutputBuf(void* pState, SRowBuffPos* pPos, SStateStore* pAPI) { + pAPI->streamStateReleaseBuf(pState, pPos, false); + return TSDB_CODE_SUCCESS; +} + +int32_t reuseOutputBuf(void* pState, SRowBuffPos* pPos, SStateStore* pAPI) { + pAPI->streamStateReleaseBuf(pState, pPos, true); + return TSDB_CODE_SUCCESS; +} + +static void removeSessionResult(SStreamAggSupporter* pAggSup, SSHashObj* pHashMap, SSHashObj* pResMap, SSessionKey key) { key.win.ekey = key.win.skey; - tSimpleHashRemove(pHashMap, &key, sizeof(SSessionKey)); + void* pVal = tSimpleHashGet(pHashMap, &key, sizeof(SSessionKey)); + if (pVal) { + releaseOutputBuf(pAggSup->pState, *(void**)pVal, &pAggSup->pSessionAPI->stateStore); + tSimpleHashRemove(pHashMap, &key, sizeof(SSessionKey)); + } tSimpleHashRemove(pResMap, &key, sizeof(SSessionKey)); } @@ -1742,7 +1765,7 @@ static void getSessionHashKey(const SSessionKey* pKey, SSessionKey* pHashKey) { pHashKey->win.ekey = pKey->win.skey; } -static void removeSessionResults(SSHashObj* pHashMap, SArray* pWins) { +static void removeSessionDeleteResults(SSHashObj* pHashMap, SArray* pWins) { if (tSimpleHashGetSize(pHashMap) == 0) { return; } @@ -1756,7 +1779,25 @@ static void removeSessionResults(SSHashObj* pHashMap, SArray* pWins) { } } -int32_t updateSessionWindowInfo(SResultWindowInfo* pWinInfo, TSKEY* pStartTs, TSKEY* pEndTs, uint64_t groupId, +static void removeSessionResults(SStreamAggSupporter* pAggSup, SSHashObj* pHashMap, SArray* pWins) { + if (tSimpleHashGetSize(pHashMap) == 0) { + return; + } + int32_t size = taosArrayGetSize(pWins); + for (int32_t i = 0; i < size; i++) { + SSessionKey* pWin = taosArrayGet(pWins, i); + if (!pWin) continue; + SSessionKey key = {0}; + getSessionHashKey(pWin, &key); + void* pVal = tSimpleHashGet(pHashMap, &key, sizeof(SSessionKey)); + if (pVal) { + releaseOutputBuf(pAggSup->pState, *(void**)pVal, &pAggSup->pSessionAPI->stateStore); + tSimpleHashRemove(pHashMap, &key, sizeof(SSessionKey)); + } + } +} + +int32_t updateSessionWindowInfo(SStreamAggSupporter* pAggSup, SResultWindowInfo* pWinInfo, TSKEY* pStartTs, TSKEY* pEndTs, uint64_t groupId, int32_t rows, int32_t start, int64_t gap, SSHashObj* pResultRows, SSHashObj* pStUpdated, SSHashObj* pStDeleted) { for (int32_t i = start; i < rows; ++i) { @@ -1767,13 +1808,14 @@ int32_t updateSessionWindowInfo(SResultWindowInfo* pWinInfo, TSKEY* pStartTs, TS if (pStDeleted && pWinInfo->isOutput) { saveDeleteRes(pStDeleted, pWinInfo->sessionWin); } - removeSessionResult(pStUpdated, pResultRows, pWinInfo->sessionWin); + removeSessionResult(pAggSup, pStUpdated, pResultRows, pWinInfo->sessionWin); pWinInfo->sessionWin.win.skey = pStartTs[i]; } pWinInfo->sessionWin.win.ekey = TMAX(pWinInfo->sessionWin.win.ekey, pStartTs[i]); if (pEndTs) { pWinInfo->sessionWin.win.ekey = TMAX(pWinInfo->sessionWin.win.ekey, pEndTs[i]); } + memcpy(pWinInfo->pStatePos->pKey, &pWinInfo->sessionWin, sizeof(SSessionKey)); } return rows - start; } @@ -1781,7 +1823,7 @@ int32_t updateSessionWindowInfo(SResultWindowInfo* pWinInfo, TSKEY* pStartTs, TS static int32_t initSessionOutputBuf(SResultWindowInfo* pWinInfo, SResultRow** pResult, SqlFunctionCtx* pCtx, int32_t numOfOutput, int32_t* rowEntryInfoOffset) { ASSERT(pWinInfo->sessionWin.win.skey <= pWinInfo->sessionWin.win.ekey); - *pResult = (SResultRow*)pWinInfo->pOutputBuf; + *pResult = (SResultRow*)pWinInfo->pStatePos->pRowBuff; // set time window for current result (*pResult)->win = pWinInfo->sessionWin.win; setResultRowInitCtx(*pResult, pCtx, numOfOutput, rowEntryInfoOffset); @@ -1819,20 +1861,19 @@ static int32_t setSessionWinOutputInfo(SSHashObj* pStUpdated, SResultWindowInfo* return TSDB_CODE_SUCCESS; } -SStreamStateCur* getNextSessionWinInfo(SStreamAggSupporter* pAggSup, SSHashObj* pStUpdated, SResultWindowInfo* pCurWin, +void getNextSessionWinInfo(SStreamAggSupporter* pAggSup, SSHashObj* pStUpdated, SResultWindowInfo* pCurWin, SResultWindowInfo* pNextWin) { SStreamStateCur* pCur = pAggSup->stateStore.streamStateSessionSeekKeyNext(pAggSup->pState, &pCurWin->sessionWin); pNextWin->isOutput = true; setSessionWinOutputInfo(pStUpdated, pNextWin); int32_t size = 0; pNextWin->sessionWin = pCurWin->sessionWin; - int32_t code = - pAggSup->stateStore.streamStateSessionGetKVByCur(pCur, &pNextWin->sessionWin, &pNextWin->pOutputBuf, &size); + int32_t code = pAggSup->stateStore.streamStateSessionGetKVByCur(pCur, &pNextWin->sessionWin, + (void**)&pNextWin->pStatePos, &size); if (code != TSDB_CODE_SUCCESS) { - taosMemoryFreeClear(pNextWin->pOutputBuf); SET_SESSION_WIN_INVALID(*pNextWin); } - return pCur; + pAggSup->stateStore.streamStateFreeCur(pCur); } static int32_t compactSessionWindow(SOperatorInfo* pOperator, SResultWindowInfo* pCurWin, SSHashObj* pStUpdated, @@ -1850,16 +1891,16 @@ static int32_t compactSessionWindow(SOperatorInfo* pOperator, SResultWindowInfo* // Just look for the window behind StartIndex while (1) { SResultWindowInfo winInfo = {0}; - SStreamStateCur* pCur = getNextSessionWinInfo(pAggSup, pStUpdated, pCurWin, &winInfo); + getNextSessionWinInfo(pAggSup, pStUpdated, pCurWin, &winInfo); if (!IS_VALID_SESSION_WIN(winInfo) || !isInWindow(pCurWin, winInfo.sessionWin.win.skey, pAggSup->gap) || !inWinRange(&pAggSup->winRange, &winInfo.sessionWin.win)) { - taosMemoryFree(winInfo.pOutputBuf); - pAPI->stateStore.streamStateFreeCur(pCur); + releaseOutputBuf(pAggSup->pState, winInfo.pStatePos, &pAggSup->pSessionAPI->stateStore); break; } SResultRow* pWinResult = NULL; initSessionOutputBuf(&winInfo, &pWinResult, pAggSup->pDummyCtx, numOfOutput, pSup->rowEntryInfoOffset); pCurWin->sessionWin.win.ekey = TMAX(pCurWin->sessionWin.win.ekey, winInfo.sessionWin.win.ekey); + memcpy(pCurWin->pStatePos->pKey, &pCurWin->sessionWin, sizeof(SSessionKey)); int64_t winDelta = 0; if (addGap) { winDelta = pAggSup->gap; @@ -1870,10 +1911,9 @@ static int32_t compactSessionWindow(SOperatorInfo* pOperator, SResultWindowInfo* if (winInfo.isOutput && pStDeleted) { saveDeleteRes(pStDeleted, winInfo.sessionWin); } - removeSessionResult(pStUpdated, pAggSup->pResultRows, winInfo.sessionWin); + removeSessionResult(pAggSup, pStUpdated, pAggSup->pResultRows, winInfo.sessionWin); doDeleteSessionWindow(pAggSup, &winInfo.sessionWin); - pAPI->stateStore.streamStateFreeCur(pCur); - taosMemoryFree(winInfo.pOutputBuf); + releaseOutputBuf(pAggSup->pState, winInfo.pStatePos, &pAggSup->pSessionAPI->stateStore); winNum++; } return winNum; @@ -1890,25 +1930,23 @@ static void compactSessionSemiWindow(SOperatorInfo* pOperator, SResultWindowInfo // Just look for the window behind StartIndex while (1) { SResultWindowInfo winInfo = {0}; - SStreamStateCur* pCur = getNextSessionWinInfo(pAggSup, NULL, pCurWin, &winInfo); + getNextSessionWinInfo(pAggSup, NULL, pCurWin, &winInfo); if (!IS_VALID_SESSION_WIN(winInfo) || !isInWindow(pCurWin, winInfo.sessionWin.win.skey, pAggSup->gap) || !inWinRange(&pAggSup->winRange, &winInfo.sessionWin.win)) { - taosMemoryFree(winInfo.pOutputBuf); - pAPI->stateStore.streamStateFreeCur(pCur); + releaseOutputBuf(pAggSup->pState, winInfo.pStatePos, &pAggSup->pSessionAPI->stateStore); break; } pCurWin->sessionWin.win.ekey = TMAX(pCurWin->sessionWin.win.ekey, winInfo.sessionWin.win.ekey); + memcpy(pCurWin->pStatePos->pKey, &pCurWin->sessionWin, sizeof(SSessionKey)); doDeleteSessionWindow(pAggSup, &winInfo.sessionWin); - pAPI->stateStore.streamStateFreeCur(pCur); - taosMemoryFree(winInfo.pOutputBuf); + releaseOutputBuf(pAggSup->pState, winInfo.pStatePos, &pAggSup->pSessionAPI->stateStore); } } int32_t saveSessionOutputBuf(SStreamAggSupporter* pAggSup, SResultWindowInfo* pWinInfo) { - saveSessionDiscBuf(pAggSup->pState, &pWinInfo->sessionWin, pWinInfo->pOutputBuf, pAggSup->resultRowSize, - &pAggSup->stateStore); - pWinInfo->pOutputBuf = NULL; - return TSDB_CODE_SUCCESS; + qDebug("===stream===try save session result skey:%" PRId64 ", ekey:%" PRId64 ".pos%d", + pWinInfo->sessionWin.win.skey, pWinInfo->sessionWin.win.ekey, pWinInfo->pStatePos->needFree); + return pAggSup->stateStore.streamStateSessionPut(pAggSup->pState, &pWinInfo->sessionWin, pWinInfo->pStatePos, pAggSup->resultRowSize); } static void doStreamSessionAggImpl(SOperatorInfo* pOperator, SSDataBlock* pSDataBlock, SSHashObj* pStUpdated, @@ -1946,13 +1984,13 @@ static void doStreamSessionAggImpl(SOperatorInfo* pOperator, SSDataBlock* pSData } SResultWindowInfo winInfo = {0}; setSessionOutputBuf(pAggSup, startTsCols[i], endTsCols[i], groupId, &winInfo); - setSessionWinOutputInfo(pStUpdated, &winInfo); - winRows = updateSessionWindowInfo(&winInfo, startTsCols, endTsCols, groupId, rows, i, pAggSup->gap, - pAggSup->pResultRows, pStUpdated, pStDeleted); // coverity scan error - if (!winInfo.pOutputBuf) { - T_LONG_JMP(pTaskInfo->env, TSDB_CODE_OUT_OF_MEMORY); + if (!winInfo.pStatePos) { + continue; } + setSessionWinOutputInfo(pStUpdated, &winInfo); + winRows = updateSessionWindowInfo(pAggSup, &winInfo, startTsCols, endTsCols, groupId, rows, i, pAggSup->gap, + pAggSup->pResultRows, pStUpdated, pStDeleted); int64_t winDelta = 0; if (addGap) { @@ -2005,8 +2043,10 @@ static void doDeleteTimeWindows(SStreamAggSupporter* pAggSup, SSDataBlock* pBloc } static inline int32_t sessionKeyCompareAsc(const void* pKey1, const void* pKey2) { - SSessionKey* pWin1 = (SSessionKey*)pKey1; - SSessionKey* pWin2 = (SSessionKey*)pKey2; + SResultWindowInfo* pWinInfo1 = (SResultWindowInfo*)pKey1; + SResultWindowInfo* pWinInfo2 = (SResultWindowInfo*)pKey2; + SSessionKey* pWin1 = &pWinInfo1->sessionWin; + SSessionKey* pWin2 = &pWinInfo2->sessionWin; if (pWin1->groupId > pWin2->groupId) { return 1; @@ -2023,17 +2063,6 @@ static inline int32_t sessionKeyCompareAsc(const void* pKey1, const void* pKey2) return 0; } -static int32_t copyUpdateResult(SSHashObj* pStUpdated, SArray* pUpdated) { - void* pIte = NULL; - int32_t iter = 0; - while ((pIte = tSimpleHashIterate(pStUpdated, pIte, &iter)) != NULL) { - void* key = tSimpleHashGetKey(pIte, NULL); - taosArrayPush(pUpdated, key); - } - taosArraySort(pUpdated, sessionKeyCompareAsc); - return TSDB_CODE_SUCCESS; -} - void doBuildDeleteDataBlock(SOperatorInfo* pOp, SSHashObj* pStDeleted, SSDataBlock* pBlock, void** Ite) { SStorageAPI* pAPI = &pOp->pTaskInfo->storageAPI; @@ -2111,35 +2140,41 @@ static void rebuildSessionWindow(SOperatorInfo* pOperator, SArray* pWinArray, SS int32_t code = getSessionWinBuf(pChAggSup, pCur, &childWin); if (code == TSDB_CODE_SUCCESS && !inWinRange(&pAggSup->winRange, &childWin.sessionWin.win)) { - releaseOutputBuf(pAggSup->pState, NULL, (SResultRow*)childWin.pOutputBuf, &pAggSup->stateStore); + releaseOutputBuf(pAggSup->pState, childWin.pStatePos, &pAggSup->stateStore); continue; } if (code == TSDB_CODE_SUCCESS && inWinRange(&pWinKey->win, &childWin.sessionWin.win)) { if (num == 0) { setSessionOutputBuf(pAggSup, pWinKey->win.skey, pWinKey->win.ekey, pWinKey->groupId, &parentWin); + parentWin.sessionWin = childWin.sessionWin; + memcpy(parentWin.pStatePos->pKey, &parentWin.sessionWin, sizeof(SSessionKey)); code = initSessionOutputBuf(&parentWin, &pResult, pSup->pCtx, numOfOutput, pSup->rowEntryInfoOffset); if (code != TSDB_CODE_SUCCESS || pResult == NULL) { - releaseOutputBuf(pAggSup->pState, NULL, (SResultRow*)childWin.pOutputBuf, &pAggSup->stateStore); + releaseOutputBuf(pAggSup->pState, childWin.pStatePos, &pAggSup->stateStore); break; } } num++; + parentWin.sessionWin.win.skey = TMIN(parentWin.sessionWin.win.skey, childWin.sessionWin.win.skey); + parentWin.sessionWin.win.ekey = TMAX(parentWin.sessionWin.win.ekey, childWin.sessionWin.win.ekey); + memcpy(parentWin.pStatePos->pKey, &parentWin.sessionWin, sizeof(SSessionKey)); + updateTimeWindowInfo(&pInfo->twAggSup.timeWindowData, &parentWin.sessionWin.win, pAggSup->gap); initSessionOutputBuf(&childWin, &pChResult, pChild->exprSupp.pCtx, numOfOutput, pChild->exprSupp.rowEntryInfoOffset); compactFunctions(pSup->pCtx, pChild->exprSupp.pCtx, numOfOutput, pTaskInfo, &pInfo->twAggSup.timeWindowData); compactSessionWindow(pOperator, &parentWin, pStUpdated, NULL, true); - saveResult(parentWin, pStUpdated); - releaseOutputBuf(pAggSup->pState, NULL, (SResultRow*)childWin.pOutputBuf, &pAggSup->stateStore); + releaseOutputBuf(pAggSup->pState, childWin.pStatePos, &pAggSup->stateStore); } else { - releaseOutputBuf(pAggSup->pState, NULL, (SResultRow*)childWin.pOutputBuf, &pAggSup->stateStore); + releaseOutputBuf(pAggSup->pState, childWin.pStatePos, &pAggSup->stateStore); break; } } pAPI->stateStore.streamStateFreeCur(pCur); } if (num > 0) { + saveResult(parentWin, pStUpdated); saveSessionOutputBuf(pAggSup, &parentWin); } } @@ -2203,6 +2238,94 @@ void initGroupResInfoFromArrayList(SGroupResInfo* pGroupResInfo, SArray* pArrayL pGroupResInfo->freeItem = false; } +int32_t buildSessionResultDataBlock(SOperatorInfo* pOperator, void* pState, SSDataBlock* pBlock, SExprSupp* pSup, + SGroupResInfo* pGroupResInfo) { + SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo; + SStorageAPI* pAPI = &pTaskInfo->storageAPI; + SExprInfo* pExprInfo = pSup->pExprInfo; + int32_t numOfExprs = pSup->numOfExprs; + int32_t* rowEntryOffset = pSup->rowEntryInfoOffset; + SqlFunctionCtx* pCtx = pSup->pCtx; + + int32_t numOfRows = getNumOfTotalRes(pGroupResInfo); + + for (int32_t i = pGroupResInfo->index; i < numOfRows; i += 1) { + SResultWindowInfo* pWinInfo = taosArrayGet(pGroupResInfo->pRows, i); + SRowBuffPos* pPos = pWinInfo->pStatePos; + SResultRow* pRow = NULL; + SSessionKey* pKey = (SSessionKey*) pPos->pKey; + + if (pBlock->info.id.groupId == 0) { + pBlock->info.id.groupId = pKey->groupId; + + void* tbname = NULL; + if (pAPI->stateStore.streamStateGetParName((void*)pTaskInfo->streamInfo.pState, pBlock->info.id.groupId, + &tbname) < 0) { + pBlock->info.parTbName[0] = 0; + } else { + memcpy(pBlock->info.parTbName, tbname, TSDB_TABLE_NAME_LEN); + } + pAPI->stateStore.streamStateFreeVal(tbname); + } else { + // current value belongs to different group, it can't be packed into one datablock + if (pBlock->info.id.groupId != pKey->groupId) { + break; + } + } + + int32_t code = pAPI->stateStore.streamStateGetByPos(pState, pPos, (void**)&pRow); + if (pBlock->info.rows + pRow->numOfRows > pBlock->info.capacity) { + ASSERT(pBlock->info.rows > 0); + break; + } + + if (code == -1) { + // for history + qWarn("===stream===not found session result key:%" PRId64 ", ekey:%" PRId64 ", groupId:%" PRIu64 "", + pKey->win.skey, pKey->win.ekey, pKey->groupId); + pGroupResInfo->index += 1; + continue; + } + + doUpdateNumOfRows(pCtx, pRow, numOfExprs, rowEntryOffset); + // no results, continue to check the next one + if (pRow->numOfRows == 0) { + pGroupResInfo->index += 1; + continue; + } + + pGroupResInfo->index += 1; + + for (int32_t j = 0; j < numOfExprs; ++j) { + int32_t slotId = pExprInfo[j].base.resSchema.slotId; + + pCtx[j].resultInfo = getResultEntryInfo(pRow, j, rowEntryOffset); + if (pCtx[j].fpSet.finalize) { + int32_t code1 = pCtx[j].fpSet.finalize(&pCtx[j], pBlock); + if (TAOS_FAILED(code1)) { + qError("%s build result data block error, code %s", GET_TASKID(pTaskInfo), tstrerror(code1)); + T_LONG_JMP(pTaskInfo->env, code1); + } + } else if (strcmp(pCtx[j].pExpr->pExpr->_function.functionName, "_select_value") == 0) { + // do nothing, todo refactor + } else { + // expand the result into multiple rows. E.g., _wstart, top(k, 20) + // the _wstart needs to copy to 20 following rows, since the results of top-k expands to 20 different rows. + SColumnInfoData* pColInfoData = taosArrayGet(pBlock->pDataBlock, slotId); + char* in = GET_ROWCELL_INTERBUF(pCtx[j].resultInfo); + for (int32_t k = 0; k < pRow->numOfRows; ++k) { + colDataSetVal(pColInfoData, pBlock->info.rows + k, in, pCtx[j].resultInfo->isNullRes); + } + } + } + + pBlock->info.dataLoad = 1; + pBlock->info.rows += pRow->numOfRows; + } + blockDataUpdateTsWindow(pBlock, 0); + return TSDB_CODE_SUCCESS; +} + void doBuildSessionResult(SOperatorInfo* pOperator, void* pState, SGroupResInfo* pGroupResInfo, SSDataBlock* pBlock) { SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo; // set output datablock version @@ -2232,6 +2355,7 @@ static SSDataBlock* buildSessionResult(SOperatorInfo* pOperator) { printDataBlock(pInfo->pDelRes, getStreamOpName(pOperator->operatorType), GET_TASKID(pTaskInfo)); return pInfo->pDelRes; } + doBuildSessionResult(pOperator, pAggSup->pState, &pInfo->groupResInfo, pBInfo->pRes); if (pBInfo->pRes->info.rows > 0) { printDataBlock(pBInfo->pRes, getStreamOpName(pOperator->operatorType), GET_TASKID(pTaskInfo)); @@ -2245,15 +2369,16 @@ void getMaxTsWins(const SArray* pAllWins, SArray* pMaxWins) { if (size == 0) { return; } - - SSessionKey* pSeKey = taosArrayGet(pAllWins, size - 1); + SResultWindowInfo* pWinInfo = taosArrayGet(pAllWins, size - 1); + SSessionKey* pSeKey = pWinInfo->pStatePos->pKey; taosArrayPush(pMaxWins, pSeKey); if (pSeKey->groupId == 0) { return; } uint64_t preGpId = pSeKey->groupId; for (int32_t i = size - 2; i >= 0; i--) { - pSeKey = taosArrayGet(pAllWins, i); + pWinInfo = taosArrayGet(pAllWins, i); + pSeKey = pWinInfo->pStatePos->pKey; if (preGpId != pSeKey->groupId) { taosArrayPush(pMaxWins, pSeKey); preGpId = pSeKey->groupId; @@ -2283,7 +2408,7 @@ int32_t encodeSResultWindowInfo(void** buf, SResultWindowInfo* key, int32_t outL void* decodeSResultWindowInfo(void* buf, SResultWindowInfo* key, int32_t outLen) { buf = taosDecodeFixedBool(buf, &key->isOutput); - key->pOutputBuf = NULL; + key->pStatePos->pRowBuff = NULL; buf = decodeSSessionKey(buf, &key->sessionWin); return buf; } @@ -2412,7 +2537,7 @@ static SSDataBlock* doStreamSessionAgg(SOperatorInfo* pOperator) { SOperatorInfo* downstream = pOperator->pDownstream[0]; if (!pInfo->pUpdated) { - pInfo->pUpdated = taosArrayInit(16, sizeof(SSessionKey)); + pInfo->pUpdated = taosArrayInit(16, sizeof(SResultWindowInfo)); } if (!pInfo->pStUpdated) { _hash_fn_t hashFn = taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY); @@ -2430,7 +2555,7 @@ static SSDataBlock* doStreamSessionAgg(SOperatorInfo* pOperator) { SArray* pWins = taosArrayInit(16, sizeof(SSessionKey)); // gap must be 0 doDeleteTimeWindows(pAggSup, pBlock, pWins); - removeSessionResults(pInfo->pStUpdated, pWins); + removeSessionResults(pAggSup, pInfo->pStUpdated, pWins); if (IS_FINAL_SESSION_OP(pOperator)) { int32_t childIndex = getChildIndex(pBlock); SOperatorInfo* pChildOp = taosArrayGetP(pInfo->pChildren, childIndex); @@ -2488,10 +2613,8 @@ static SSDataBlock* doStreamSessionAgg(SOperatorInfo* pOperator) { closeSessionWindow(pAggSup->pResultRows, &pInfo->twAggSup, pInfo->pStUpdated); closeChildSessionWindow(pInfo->pChildren, pInfo->twAggSup.maxTs); - copyUpdateResult(pInfo->pStUpdated, pInfo->pUpdated); - removeSessionResults(pInfo->pStDeleted, pInfo->pUpdated); - tSimpleHashCleanup(pInfo->pStUpdated); - pInfo->pStUpdated = NULL; + copyUpdateResult(&pInfo->pStUpdated, pInfo->pUpdated, sessionKeyCompareAsc); + removeSessionDeleteResults(pInfo->pStDeleted, pInfo->pUpdated); if (pInfo->isHistoryOp) { getMaxTsWins(pInfo->pUpdated, pInfo->historyWins); } @@ -2510,10 +2633,15 @@ static SSDataBlock* doStreamSessionAgg(SOperatorInfo* pOperator) { void streamSessionReleaseState(SOperatorInfo* pOperator) { SStreamSessionAggOperatorInfo* pInfo = pOperator->info; - int32_t resSize = taosArrayGetSize(pInfo->historyWins) * sizeof(SSessionKey); + int32_t winSize = taosArrayGetSize(pInfo->historyWins) * sizeof(SSessionKey); + int32_t resSize = winSize + sizeof(TSKEY); + char* pBuff = taosMemoryCalloc(1, resSize); + memcpy(pBuff, pInfo->historyWins->pData, winSize); + memcpy(pBuff + winSize, &pInfo->twAggSup.maxTs, sizeof(TSKEY)); pInfo->streamAggSup.stateStore.streamStateSaveInfo(pInfo->streamAggSup.pState, STREAM_SESSION_OP_STATE_NAME, - strlen(STREAM_SESSION_OP_STATE_NAME), pInfo->historyWins->pData, - resSize); + strlen(STREAM_SESSION_OP_STATE_NAME), pBuff, resSize); + pInfo->streamAggSup.stateStore.streamStateCommit(pInfo->streamAggSup.pState); + taosMemoryFreeClear(pBuff); SOperatorInfo* downstream = pOperator->pDownstream[0]; if (downstream->fpSet.releaseStreamStateFn) { downstream->fpSet.releaseStreamStateFn(downstream); @@ -2525,6 +2653,20 @@ void resetWinRange(STimeWindow* winRange) { winRange->ekey = INT64_MAX; } +void getSessionWindowInfoByKey(SStreamAggSupporter* pAggSup, SSessionKey* pKey, SResultWindowInfo* pWinInfo) { + int32_t rowSize = pAggSup->resultRowSize; + int32_t code = pAggSup->stateStore.streamStateSessionGet(pAggSup->pState, pKey, (void**)&pWinInfo->pStatePos, &rowSize); + if (code == TSDB_CODE_SUCCESS) { + pWinInfo->sessionWin = *pKey; + pWinInfo->isOutput = true; + if (pWinInfo->pStatePos->needFree) { + pAggSup->stateStore.streamStateSessionDel(pAggSup->pState, &pWinInfo->sessionWin); + } + } else { + SET_SESSION_WIN_INVALID((*pWinInfo)); + } +} + void streamSessionSemiReloadState(SOperatorInfo* pOperator) { SStreamSessionAggOperatorInfo* pInfo = pOperator->info; SStreamAggSupporter* pAggSup = &pInfo->streamAggSup; @@ -2535,16 +2677,19 @@ void streamSessionSemiReloadState(SOperatorInfo* pOperator) { void* pBuf = NULL; int32_t code = pAggSup->stateStore.streamStateGetInfo(pAggSup->pState, STREAM_SESSION_OP_STATE_NAME, strlen(STREAM_SESSION_OP_STATE_NAME), &pBuf, &size); - int32_t num = size / sizeof(SSessionKey); + int32_t num = (size - sizeof(TSKEY)) / sizeof(SSessionKey); SSessionKey* pSeKeyBuf = (SSessionKey*) pBuf; - ASSERT(size == num * sizeof(SSessionKey)); + ASSERT(size == num * sizeof(SSessionKey) + sizeof(TSKEY)); for (int32_t i = 0; i < num; i++) { SResultWindowInfo winInfo = {0}; - setSessionOutputBuf(pAggSup, pSeKeyBuf[i].win.skey, pSeKeyBuf[i].win.ekey, pSeKeyBuf[i].groupId, &winInfo); + getSessionWindowInfoByKey(pAggSup, pSeKeyBuf + i, &winInfo); compactSessionSemiWindow(pOperator, &winInfo); saveSessionOutputBuf(pAggSup, &winInfo); } + TSKEY ts = *(TSKEY*)((char*)pBuf + size - sizeof(TSKEY)); taosMemoryFree(pBuf); + pInfo->twAggSup.maxTs = TMAX(pInfo->twAggSup.maxTs, ts); + pAggSup->stateStore.streamStateReloadInfo(pAggSup->pState, ts); SOperatorInfo* downstream = pOperator->pDownstream[0]; if (downstream->fpSet.reloadStreamStateFn) { @@ -2557,21 +2702,28 @@ void streamSessionReloadState(SOperatorInfo* pOperator) { SStreamAggSupporter* pAggSup = &pInfo->streamAggSup; resetWinRange(&pAggSup->winRange); - SResultWindowInfo winInfo = {0}; int32_t size = 0; void* pBuf = NULL; int32_t code = pAggSup->stateStore.streamStateGetInfo(pAggSup->pState, STREAM_SESSION_OP_STATE_NAME, strlen(STREAM_SESSION_OP_STATE_NAME), &pBuf, &size); - int32_t num = size / sizeof(SSessionKey); + int32_t num = (size - sizeof(TSKEY)) / sizeof(SSessionKey); SSessionKey* pSeKeyBuf = (SSessionKey*)pBuf; - ASSERT(size == num * sizeof(SSessionKey)); + ASSERT(size == num * sizeof(SSessionKey) + sizeof(TSKEY)); + + TSKEY ts = *(TSKEY*)((char*)pBuf + size - sizeof(TSKEY)); + pInfo->twAggSup.maxTs = TMAX(pInfo->twAggSup.maxTs, ts); + pAggSup->stateStore.streamStateReloadInfo(pAggSup->pState, ts); + if (!pInfo->pStUpdated && num > 0) { _hash_fn_t hashFn = taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY); pInfo->pStUpdated = tSimpleHashInit(64, hashFn); } for (int32_t i = 0; i < num; i++) { SResultWindowInfo winInfo = {0}; - setSessionOutputBuf(pAggSup, pSeKeyBuf[i].win.skey, pSeKeyBuf[i].win.ekey, pSeKeyBuf[i].groupId, &winInfo); + getSessionWindowInfoByKey(pAggSup, pSeKeyBuf + i, &winInfo); + if (!IS_VALID_SESSION_WIN(winInfo)) { + continue; + } int32_t winNum = compactSessionWindow(pOperator, &winInfo, pInfo->pStUpdated, pInfo->pStDeleted, true); if (winNum > 0) { qDebug("===stream=== reload state. save result %" PRId64 ", %" PRIu64, winInfo.sessionWin.win.skey, @@ -2619,18 +2771,18 @@ SOperatorInfo* createStreamSessionAggOperatorInfo(SOperatorInfo* downstream, SPh goto _error; } } - SExprSupp* pSup = &pOperator->exprSupp; + SExprSupp* pExpSup = &pOperator->exprSupp; SExprInfo* pExprInfo = createExprInfo(pSessionNode->window.pFuncs, NULL, &numOfCols); SSDataBlock* pResBlock = createDataBlockFromDescNode(pPhyNode->pOutputDataBlockDesc); - code = initBasicInfoEx(&pInfo->binfo, pSup, pExprInfo, numOfCols, pResBlock, &pTaskInfo->storageAPI.functionStore); + code = initBasicInfoEx(&pInfo->binfo, pExpSup, pExprInfo, numOfCols, pResBlock, &pTaskInfo->storageAPI.functionStore); if (code != TSDB_CODE_SUCCESS) { goto _error; } - code = initStreamAggSupporter(&pInfo->streamAggSup, pSup->pCtx, numOfCols, pSessionNode->gap, + code = initStreamAggSupporter(&pInfo->streamAggSup, pExpSup, numOfCols, pSessionNode->gap, pTaskInfo->streamInfo.pState, 0, 0, &pTaskInfo->storageAPI.stateStore, pHandle, - &pTaskInfo->storageAPI); + &pInfo->twAggSup, GET_TASKID(pTaskInfo), &pTaskInfo->storageAPI); if (code != TSDB_CODE_SUCCESS) { goto _error; } @@ -2670,6 +2822,7 @@ SOperatorInfo* createStreamSessionAggOperatorInfo(SOperatorInfo* downstream, SPh } pInfo->pCheckpointRes = createSpecialDataBlock(STREAM_CHECKPOINT); + pInfo->clearState = false; pOperator->operatorType = QUERY_NODE_PHYSICAL_PLAN_STREAM_SESSION; // for stream void* buff = NULL; @@ -2727,18 +2880,25 @@ static SSDataBlock* doStreamSessionSemiAgg(SOperatorInfo* pOperator) { return opRes; } + if (pInfo->clearState) { + clearFunctionContext(&pOperator->exprSupp); + // semi session operator clear disk buffer + clearStreamSessionOperator(pInfo); + } + if (pOperator->status == OP_RES_TO_RETURN) { clearFunctionContext(&pOperator->exprSupp); - // semi interval operator clear disk buffer + // semi session operator clear disk buffer clearStreamSessionOperator(pInfo); setOperatorCompleted(pOperator); + pInfo->clearState = false; return NULL; } } SOperatorInfo* downstream = pOperator->pDownstream[0]; if (!pInfo->pUpdated) { - pInfo->pUpdated = taosArrayInit(16, sizeof(SSessionKey)); + pInfo->pUpdated = taosArrayInit(16, sizeof(SResultWindowInfo)); } if (!pInfo->pStUpdated) { _hash_fn_t hashFn = taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY); @@ -2756,10 +2916,11 @@ static SSDataBlock* doStreamSessionSemiAgg(SOperatorInfo* pOperator) { pBlock->info.type == STREAM_CLEAR) { // gap must be 0 SArray* pWins = taosArrayInit(16, sizeof(SSessionKey)); - doDeleteTimeWindows(&pInfo->streamAggSup, pBlock, pWins); - removeSessionResults(pInfo->pStUpdated, pWins); + doDeleteTimeWindows(pAggSup, pBlock, pWins); + removeSessionResults(pAggSup, pInfo->pStUpdated, pWins); copyDeleteWindowInfo(pWins, pInfo->pStDeleted); taosArrayDestroy(pWins); + pInfo->clearState = true; break; } else if (pBlock->info.type == STREAM_GET_ALL) { getAllSessionWindow(pInfo->streamAggSup.pResultRows, pInfo->pStUpdated); @@ -2787,10 +2948,8 @@ static SSDataBlock* doStreamSessionSemiAgg(SOperatorInfo* pOperator) { pInfo->twAggSup.maxTs = TMAX(pInfo->twAggSup.maxTs, maxTs); pBInfo->pRes->info.watermark = pInfo->twAggSup.maxTs; - copyUpdateResult(pInfo->pStUpdated, pInfo->pUpdated); - removeSessionResults(pInfo->pStDeleted, pInfo->pUpdated); - tSimpleHashCleanup(pInfo->pStUpdated); - pInfo->pStUpdated = NULL; + copyUpdateResult(&pInfo->pStUpdated, pInfo->pUpdated, sessionKeyCompareAsc); + removeSessionDeleteResults(pInfo->pStDeleted, pInfo->pUpdated); if(pInfo->isHistoryOp) { getMaxTsWins(pInfo->pUpdated, pInfo->historyWins); @@ -2806,7 +2965,7 @@ static SSDataBlock* doStreamSessionSemiAgg(SOperatorInfo* pOperator) { } clearFunctionContext(&pOperator->exprSupp); - // semi interval operator clear disk buffer + // semi session operator clear disk buffer clearStreamSessionOperator(pInfo); setOperatorCompleted(pOperator); return NULL; @@ -2836,7 +2995,7 @@ SOperatorInfo* createStreamFinalSessionAggOperatorInfo(SOperatorInfo* downstream if (numOfChild > 0) { pInfo->pChildren = taosArrayInit(numOfChild, sizeof(void*)); for (int32_t i = 0; i < numOfChild; i++) { - SOperatorInfo* pChildOp = createStreamFinalSessionAggOperatorInfo(NULL, pPhyNode, pTaskInfo, 0, NULL); + SOperatorInfo* pChildOp = createStreamFinalSessionAggOperatorInfo(NULL, pPhyNode, pTaskInfo, 0, pHandle); if (pChildOp == NULL) { goto _error; } @@ -2916,6 +3075,50 @@ bool compareWinStateKey(SStateKeys* left, SStateKeys* right) { return compareVal(left->pData, right); } +void getStateWindowInfoByKey(SStreamAggSupporter* pAggSup, SSessionKey* pKey, SStateWindowInfo* pCurWin, + SStateWindowInfo* pNextWin) { + int32_t size = pAggSup->resultRowSize; + pCurWin->winInfo.sessionWin.groupId = pKey->groupId; + pCurWin->winInfo.sessionWin.win.skey = pKey->win.skey; + pCurWin->winInfo.sessionWin.win.ekey = pKey->win.ekey; + getSessionWindowInfoByKey(pAggSup, pKey, &pCurWin->winInfo); + ASSERT(IS_VALID_SESSION_WIN(pCurWin->winInfo)); + pCurWin->pStateKey = + (SStateKeys*)((char*)pCurWin->winInfo.pStatePos->pRowBuff + (pAggSup->resultRowSize - pAggSup->stateKeySize)); + pCurWin->pStateKey->bytes = pAggSup->stateKeySize - sizeof(SStateKeys); + pCurWin->pStateKey->type = pAggSup->stateKeyType; + pCurWin->pStateKey->pData = (char*)pCurWin->pStateKey + sizeof(SStateKeys); + pCurWin->pStateKey->isNull = false; + pCurWin->winInfo.isOutput = true; + if (pCurWin->winInfo.pStatePos->needFree) { + pAggSup->stateStore.streamStateSessionDel(pAggSup->pState, &pCurWin->winInfo.sessionWin); + } + + qDebug("===stream===get state cur win buff. skey:%" PRId64 ", endkey:%" PRId64, pCurWin->winInfo.sessionWin.win.skey, + pCurWin->winInfo.sessionWin.win.ekey); + + pNextWin->winInfo.sessionWin = pCurWin->winInfo.sessionWin; + SStreamStateCur* pCur = + pAggSup->stateStore.streamStateSessionSeekKeyNext(pAggSup->pState, &pNextWin->winInfo.sessionWin); + int32_t nextSize = pAggSup->resultRowSize; + int32_t code = pAggSup->stateStore.streamStateSessionGetKVByCur(pCur, &pNextWin->winInfo.sessionWin, + (void**)&pNextWin->winInfo.pStatePos, &nextSize); + if (code != TSDB_CODE_SUCCESS) { + SET_SESSION_WIN_INVALID(pNextWin->winInfo); + } else { + pNextWin->pStateKey = + (SStateKeys*)((char*)pNextWin->winInfo.pStatePos->pRowBuff + (pAggSup->resultRowSize - pAggSup->stateKeySize)); + pNextWin->pStateKey->bytes = pAggSup->stateKeySize - sizeof(SStateKeys); + pNextWin->pStateKey->type = pAggSup->stateKeyType; + pNextWin->pStateKey->pData = (char*)pNextWin->pStateKey + sizeof(SStateKeys); + pNextWin->pStateKey->isNull = false; + pNextWin->winInfo.isOutput = true; + } + pAggSup->stateStore.streamStateFreeCur(pCur); + qDebug("===stream===get state next win buff. skey:%" PRId64 ", endkey:%" PRId64, pNextWin->winInfo.sessionWin.win.skey, + pNextWin->winInfo.sessionWin.win.ekey); +} + void setStateOutputBuf(SStreamAggSupporter* pAggSup, TSKEY ts, uint64_t groupId, char* pKeyData, SStateWindowInfo* pCurWin, SStateWindowInfo* pNextWin) { int32_t size = pAggSup->resultRowSize; @@ -2924,9 +3127,9 @@ void setStateOutputBuf(SStreamAggSupporter* pAggSup, TSKEY ts, uint64_t groupId, pCurWin->winInfo.sessionWin.win.ekey = ts; int32_t code = pAggSup->stateStore.streamStateStateAddIfNotExist(pAggSup->pState, &pCurWin->winInfo.sessionWin, pKeyData, pAggSup->stateKeySize, compareStateKey, - &pCurWin->winInfo.pOutputBuf, &size); + (void**)&pCurWin->winInfo.pStatePos, &size); pCurWin->pStateKey = - (SStateKeys*)((char*)pCurWin->winInfo.pOutputBuf + (pAggSup->resultRowSize - pAggSup->stateKeySize)); + (SStateKeys*)((char*)pCurWin->winInfo.pStatePos->pRowBuff + (pAggSup->resultRowSize - pAggSup->stateKeySize)); pCurWin->pStateKey->bytes = pAggSup->stateKeySize - sizeof(SStateKeys); pCurWin->pStateKey->type = pAggSup->stateKeyType; pCurWin->pStateKey->pData = (char*)pCurWin->pStateKey + sizeof(SStateKeys); @@ -2934,11 +3137,9 @@ void setStateOutputBuf(SStreamAggSupporter* pAggSup, TSKEY ts, uint64_t groupId, if (code == TSDB_CODE_SUCCESS && !inWinRange(&pAggSup->winRange, &pCurWin->winInfo.sessionWin.win)) { code = TSDB_CODE_FAILED; - releaseOutputBuf(pAggSup->pState, NULL, (SResultRow*)pCurWin->winInfo.pOutputBuf, - &pAggSup->pSessionAPI->stateStore); - pCurWin->winInfo.pOutputBuf = taosMemoryCalloc(1, size); + clearOutputBuf(pAggSup->pState, pCurWin->winInfo.pStatePos, &pAggSup->pSessionAPI->stateStore); pCurWin->pStateKey = - (SStateKeys*)((char*)pCurWin->winInfo.pOutputBuf + (pAggSup->resultRowSize - pAggSup->stateKeySize)); + (SStateKeys*)((char*)pCurWin->winInfo.pStatePos->pRowBuff + (pAggSup->resultRowSize - pAggSup->stateKeySize)); pCurWin->pStateKey->bytes = pAggSup->stateKeySize - sizeof(SStateKeys); pCurWin->pStateKey->type = pAggSup->stateKeyType; pCurWin->pStateKey->pData = (char*)pCurWin->pStateKey + sizeof(SStateKeys); @@ -2952,7 +3153,9 @@ void setStateOutputBuf(SStreamAggSupporter* pAggSup, TSKEY ts, uint64_t groupId, if (code == TSDB_CODE_SUCCESS) { pCurWin->winInfo.isOutput = true; - pAggSup->stateStore.streamStateSessionDel(pAggSup->pState, &pCurWin->winInfo.sessionWin); + if (pCurWin->winInfo.pStatePos->needFree) { + pAggSup->stateStore.streamStateSessionDel(pAggSup->pState, &pCurWin->winInfo.sessionWin); + } } else if (pKeyData) { if (IS_VAR_DATA_TYPE(pAggSup->stateKeyType)) { varDataCopy(pCurWin->pStateKey->pData, pKeyData); @@ -2961,17 +3164,20 @@ void setStateOutputBuf(SStreamAggSupporter* pAggSup, TSKEY ts, uint64_t groupId, } } + qDebug("===stream===set state cur win buff. skey:%" PRId64 ", endkey:%" PRId64, pCurWin->winInfo.sessionWin.win.skey, + pCurWin->winInfo.sessionWin.win.ekey); + pNextWin->winInfo.sessionWin = pCurWin->winInfo.sessionWin; SStreamStateCur* pCur = pAggSup->stateStore.streamStateSessionSeekKeyNext(pAggSup->pState, &pNextWin->winInfo.sessionWin); int32_t nextSize = pAggSup->resultRowSize; code = pAggSup->stateStore.streamStateSessionGetKVByCur(pCur, &pNextWin->winInfo.sessionWin, - &pNextWin->winInfo.pOutputBuf, &nextSize); + (void**)&pNextWin->winInfo.pStatePos, &nextSize); if (code != TSDB_CODE_SUCCESS) { SET_SESSION_WIN_INVALID(pNextWin->winInfo); } else { pNextWin->pStateKey = - (SStateKeys*)((char*)pNextWin->winInfo.pOutputBuf + (pAggSup->resultRowSize - pAggSup->stateKeySize)); + (SStateKeys*)((char*)pNextWin->winInfo.pStatePos->pRowBuff + (pAggSup->resultRowSize - pAggSup->stateKeySize)); pNextWin->pStateKey->bytes = pAggSup->stateKeySize - sizeof(SStateKeys); pNextWin->pStateKey->type = pAggSup->stateKeyType; pNextWin->pStateKey->pData = (char*)pNextWin->pStateKey + sizeof(SStateKeys); @@ -2979,9 +3185,11 @@ void setStateOutputBuf(SStreamAggSupporter* pAggSup, TSKEY ts, uint64_t groupId, pNextWin->winInfo.isOutput = true; } pAggSup->stateStore.streamStateFreeCur(pCur); + qDebug("===stream===set state next win buff. skey:%" PRId64 ", endkey:%" PRId64, pNextWin->winInfo.sessionWin.win.skey, + pNextWin->winInfo.sessionWin.win.ekey); } -int32_t updateStateWindowInfo(SStateWindowInfo* pWinInfo, SStateWindowInfo* pNextWin, TSKEY* pTs, uint64_t groupId, +int32_t updateStateWindowInfo(SStreamAggSupporter* pAggSup, SStateWindowInfo* pWinInfo, SStateWindowInfo* pNextWin, TSKEY* pTs, uint64_t groupId, SColumnInfoData* pKeyCol, int32_t rows, int32_t start, bool* allEqual, SSHashObj* pResultRows, SSHashObj* pSeUpdated, SSHashObj* pSeDeleted) { *allEqual = true; @@ -3004,10 +3212,11 @@ int32_t updateStateWindowInfo(SStateWindowInfo* pWinInfo, SStateWindowInfo* pNex if (pSeDeleted && pWinInfo->winInfo.isOutput) { saveDeleteRes(pSeDeleted, pWinInfo->winInfo.sessionWin); } - removeSessionResult(pSeUpdated, pResultRows, pWinInfo->winInfo.sessionWin); + removeSessionResult(pAggSup, pSeUpdated, pResultRows, pWinInfo->winInfo.sessionWin); pWinInfo->winInfo.sessionWin.win.skey = pTs[i]; } pWinInfo->winInfo.sessionWin.win.ekey = TMAX(pWinInfo->winInfo.sessionWin.win.ekey, pTs[i]); + memcpy(pWinInfo->winInfo.pStatePos->pKey, &pWinInfo->winInfo.sessionWin, sizeof(SSessionKey)); if (!isEqualStateKey(pWinInfo, pKeyData)) { *allEqual = false; } @@ -3056,11 +3265,10 @@ static void doStreamStateAggImpl(SOperatorInfo* pOperator, SSDataBlock* pSDataBl SStateWindowInfo curWin = {0}; SStateWindowInfo nextWin = {0}; setStateOutputBuf(pAggSup, tsCols[i], groupId, pKeyData, &curWin, &nextWin); - if (IS_VALID_SESSION_WIN(nextWin.winInfo)) { - releaseOutputBuf(pAggSup->pState, NULL, (SResultRow*)nextWin.winInfo.pOutputBuf, &pAPI->stateStore); - } + releaseOutputBuf(pAggSup->pState, nextWin.winInfo.pStatePos, &pAPI->stateStore); + setSessionWinOutputInfo(pSeUpdated, &curWin.winInfo); - winRows = updateStateWindowInfo(&curWin, &nextWin, tsCols, groupId, pKeyColInfo, rows, i, &allEqual, + winRows = updateStateWindowInfo(pAggSup, &curWin, &nextWin, tsCols, groupId, pKeyColInfo, rows, i, &allEqual, pAggSup->pResultRows, pSeUpdated, pStDeleted); if (!allEqual) { uint64_t uid = 0; @@ -3068,7 +3276,7 @@ static void doStreamStateAggImpl(SOperatorInfo* pOperator, SSDataBlock* pSDataBl &curWin.winInfo.sessionWin.win.ekey, &uid, &groupId, NULL); tSimpleHashRemove(pSeUpdated, &curWin.winInfo.sessionWin, sizeof(SSessionKey)); doDeleteSessionWindow(pAggSup, &curWin.winInfo.sessionWin); - releaseOutputBuf(pAggSup->pState, NULL, (SResultRow*)curWin.winInfo.pOutputBuf, &pAPI->stateStore); + releaseOutputBuf(pAggSup->pState, curWin.winInfo.pStatePos, &pAPI->stateStore); continue; } code = doOneWindowAggImpl(&pInfo->twAggSup.timeWindowData, &curWin.winInfo, &pResult, i, winRows, rows, numOfOutput, @@ -3109,7 +3317,7 @@ int32_t doStreamStateEncodeOpState(void** buf, int32_t len, SOperatorInfo* pOper size_t keyLen = 0; int32_t iter = 0; while ((pIte = tSimpleHashIterate(pInfo->streamAggSup.pResultRows, pIte, &iter)) != NULL) { - void* key = taosHashGetKey(pIte, &keyLen); + void* key = tSimpleHashGetKey(pIte, &keyLen); tlen += encodeSSessionKey(buf, key); tlen += encodeSResultWindowInfo(buf, pIte, pInfo->streamAggSup.resultRowSize); } @@ -3237,7 +3445,7 @@ static SSDataBlock* doStreamStateAgg(SOperatorInfo* pOperator) { SOperatorInfo* downstream = pOperator->pDownstream[0]; if (!pInfo->pUpdated) { - pInfo->pUpdated = taosArrayInit(16, sizeof(SSessionKey)); + pInfo->pUpdated = taosArrayInit(16, sizeof(SResultWindowInfo)); } if (!pInfo->pSeUpdated) { _hash_fn_t hashFn = taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY); @@ -3254,7 +3462,7 @@ static SSDataBlock* doStreamStateAgg(SOperatorInfo* pOperator) { pBlock->info.type == STREAM_CLEAR) { SArray* pWins = taosArrayInit(16, sizeof(SSessionKey)); doDeleteTimeWindows(&pInfo->streamAggSup, pBlock, pWins); - removeSessionResults(pInfo->pSeUpdated, pWins); + removeSessionResults(&pInfo->streamAggSup, pInfo->pSeUpdated, pWins); copyDeleteWindowInfo(pWins, pInfo->pSeDeleted); taosArrayDestroy(pWins); continue; @@ -3285,10 +3493,8 @@ static SSDataBlock* doStreamStateAgg(SOperatorInfo* pOperator) { pOperator->status = OP_RES_TO_RETURN; closeSessionWindow(pInfo->streamAggSup.pResultRows, &pInfo->twAggSup, pInfo->pSeUpdated); - copyUpdateResult(pInfo->pSeUpdated, pInfo->pUpdated); - removeSessionResults(pInfo->pSeDeleted, pInfo->pUpdated); - tSimpleHashCleanup(pInfo->pSeUpdated); - pInfo->pSeUpdated = NULL; + copyUpdateResult(&pInfo->pSeUpdated, pInfo->pUpdated, sessionKeyCompareAsc); + removeSessionDeleteResults(pInfo->pSeDeleted, pInfo->pUpdated); if (pInfo->isHistoryOp) { getMaxTsWins(pInfo->pUpdated, pInfo->historyWins); @@ -3308,11 +3514,17 @@ static SSDataBlock* doStreamStateAgg(SOperatorInfo* pOperator) { void streamStateReleaseState(SOperatorInfo* pOperator) { SStreamStateAggOperatorInfo* pInfo = pOperator->info; - int32_t resSize = taosArrayGetSize(pInfo->historyWins) * sizeof(SSessionKey); + int32_t winSize = taosArrayGetSize(pInfo->historyWins) * sizeof(SSessionKey); + int32_t resSize = winSize + sizeof(TSKEY); + char* pBuff = taosMemoryCalloc(1, resSize); + memcpy(pBuff, pInfo->historyWins->pData, winSize); + memcpy(pBuff + winSize, &pInfo->twAggSup.maxTs, sizeof(TSKEY)); qDebug("===stream=== relase state. save result count:%d", (int32_t)taosArrayGetSize(pInfo->historyWins)); pInfo->streamAggSup.stateStore.streamStateSaveInfo(pInfo->streamAggSup.pState, STREAM_STATE_OP_STATE_NAME, - strlen(STREAM_STATE_OP_STATE_NAME), pInfo->historyWins->pData, - resSize); + strlen(STREAM_STATE_OP_STATE_NAME), pBuff, resSize); + pInfo->streamAggSup.stateStore.streamStateCommit(pInfo->streamAggSup.pState); + taosMemoryFreeClear(pBuff); + SOperatorInfo* downstream = pOperator->pDownstream[0]; if (downstream->fpSet.releaseStreamStateFn) { downstream->fpSet.releaseStreamStateFn(downstream); @@ -3333,6 +3545,7 @@ static void compactStateWindow(SOperatorInfo* pOperator, SResultWindowInfo* pCur SResultRow* pWinResult = NULL; initSessionOutputBuf(pNextWin, &pWinResult, pAggSup->pDummyCtx, numOfOutput, pSup->rowEntryInfoOffset); pCurWin->sessionWin.win.ekey = TMAX(pCurWin->sessionWin.win.ekey, pNextWin->sessionWin.win.ekey); + memcpy(pCurWin->pStatePos->pKey, &pCurWin->sessionWin, sizeof(SSessionKey)); updateTimeWindowInfo(&pInfo->twAggSup.timeWindowData, &pCurWin->sessionWin.win, 1); compactFunctions(pSup->pCtx, pAggSup->pDummyCtx, numOfOutput, pTaskInfo, &pInfo->twAggSup.timeWindowData); @@ -3342,9 +3555,9 @@ static void compactStateWindow(SOperatorInfo* pOperator, SResultWindowInfo* pCur pNextWin->sessionWin.groupId); saveDeleteRes(pStDeleted, pNextWin->sessionWin); } - removeSessionResult(pStUpdated, pAggSup->pResultRows, pNextWin->sessionWin); + removeSessionResult(pAggSup, pStUpdated, pAggSup->pResultRows, pNextWin->sessionWin); doDeleteSessionWindow(pAggSup, &pNextWin->sessionWin); - taosMemoryFree(pNextWin->pOutputBuf); + releaseOutputBuf(pAggSup->pState, pNextWin->pStatePos, &pAggSup->pSessionAPI->stateStore); } void streamStateReloadState(SOperatorInfo* pOperator) { @@ -3357,10 +3570,15 @@ void streamStateReloadState(SOperatorInfo* pOperator) { void* pBuf = NULL; int32_t code = pAggSup->stateStore.streamStateGetInfo(pAggSup->pState, STREAM_STATE_OP_STATE_NAME, strlen(STREAM_STATE_OP_STATE_NAME), &pBuf, &size); - int32_t num = size / sizeof(SSessionKey); + int32_t num = (size - sizeof(TSKEY)) / sizeof(SSessionKey); qDebug("===stream=== reload state. get result count:%d", num); SSessionKey* pSeKeyBuf = (SSessionKey*)pBuf; - ASSERT(size == num * sizeof(SSessionKey)); + ASSERT(size == num * sizeof(SSessionKey) + sizeof(TSKEY)); + + TSKEY ts = *(TSKEY*)((char*)pBuf + size - sizeof(TSKEY)); + pInfo->twAggSup.maxTs = TMAX(pInfo->twAggSup.maxTs, ts); + pAggSup->stateStore.streamStateReloadInfo(pAggSup->pState, ts); + if (!pInfo->pSeUpdated && num > 0) { _hash_fn_t hashFn = taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY); pInfo->pSeUpdated = tSimpleHashInit(64, hashFn); @@ -3375,7 +3593,7 @@ void streamStateReloadState(SOperatorInfo* pOperator) { SStateWindowInfo dummy = {0}; qDebug("===stream=== reload state. try process result %" PRId64 ", %" PRIu64 ", index:%d", pSeKeyBuf[i].win.skey, pSeKeyBuf[i].groupId, i); - setStateOutputBuf(pAggSup, pSeKeyBuf[i].win.skey, pSeKeyBuf[i].groupId, NULL, &curInfo, &nextInfo); + getStateWindowInfoByKey(pAggSup, pSeKeyBuf + i, &curInfo, &nextInfo); bool cpRes = compareWinStateKey(curInfo.pStateKey, nextInfo.pStateKey); qDebug("===stream=== reload state. next window info %" PRId64 ", %" PRIu64 ", compare:%d", nextInfo.winInfo.sessionWin.win.skey, nextInfo.winInfo.sessionWin.groupId, cpRes); @@ -3394,8 +3612,7 @@ void streamStateReloadState(SOperatorInfo* pOperator) { tSimpleHashPut(pAggSup->pResultRows, &key, sizeof(SSessionKey), &curInfo.winInfo, sizeof(SResultWindowInfo)); } } else if (IS_VALID_SESSION_WIN(nextInfo.winInfo)) { - releaseOutputBuf(pAggSup->pState, NULL, (SResultRow*)nextInfo.winInfo.pOutputBuf, - &pAggSup->pSessionAPI->stateStore); + releaseOutputBuf(pAggSup->pState, nextInfo.winInfo.pStatePos, &pAggSup->pSessionAPI->stateStore); } if (IS_VALID_SESSION_WIN(curInfo.winInfo)) { @@ -3444,18 +3661,19 @@ SOperatorInfo* createStreamStateAggOperatorInfo(SOperatorInfo* downstream, SPhys initExecTimeWindowInfo(&pInfo->twAggSup.timeWindowData, &pTaskInfo->window); - SExprSupp* pSup = &pOperator->exprSupp; + SExprSupp* pExpSup = &pOperator->exprSupp; int32_t numOfCols = 0; SExprInfo* pExprInfo = createExprInfo(pStateNode->window.pFuncs, NULL, &numOfCols); SSDataBlock* pResBlock = createDataBlockFromDescNode(pPhyNode->pOutputDataBlockDesc); - code = initBasicInfoEx(&pInfo->binfo, pSup, pExprInfo, numOfCols, pResBlock, &pTaskInfo->storageAPI.functionStore); + code = initBasicInfoEx(&pInfo->binfo, pExpSup, pExprInfo, numOfCols, pResBlock, &pTaskInfo->storageAPI.functionStore); if (code != TSDB_CODE_SUCCESS) { goto _error; } int32_t keySize = sizeof(SStateKeys) + pColNode->node.resType.bytes; int16_t type = pColNode->node.resType.type; - code = initStreamAggSupporter(&pInfo->streamAggSup, pSup->pCtx, numOfCols, 0, pTaskInfo->streamInfo.pState, keySize, - type, &pTaskInfo->storageAPI.stateStore, pHandle, &pTaskInfo->storageAPI); + code = initStreamAggSupporter(&pInfo->streamAggSup, pExpSup, numOfCols, 0, pTaskInfo->streamInfo.pState, keySize, + type, &pTaskInfo->storageAPI.stateStore, pHandle, &pInfo->twAggSup, + GET_TASKID(pTaskInfo), &pTaskInfo->storageAPI); if (code != TSDB_CODE_SUCCESS) { goto _error; } @@ -3734,7 +3952,7 @@ SOperatorInfo* createStreamIntervalOperatorInfo(SOperatorInfo* downstream, SPhys pInfo->pState->pFileState = pTaskInfo->storageAPI.stateStore.streamFileStateInit( tsStreamBufferSize, sizeof(SWinKey), pInfo->aggSup.resultRowSize, funResSize, compareTs, pInfo->pState, - pInfo->twAggSup.deleteMark, GET_TASKID(pTaskInfo), pHandle->checkpointId); + pInfo->twAggSup.deleteMark, GET_TASKID(pTaskInfo), pHandle->checkpointId, STREAM_STATE_BUFF_HASH); setOperatorInfo(pOperator, "StreamIntervalOperator", QUERY_NODE_PHYSICAL_PLAN_STREAM_INTERVAL, true, OP_NOT_OPENED, pInfo, pTaskInfo); diff --git a/source/libs/parser/src/parTranslater.c b/source/libs/parser/src/parTranslater.c index 959cd81e06..f0a12eecdc 100644 --- a/source/libs/parser/src/parTranslater.c +++ b/source/libs/parser/src/parTranslater.c @@ -13,8 +13,8 @@ * along with this program. If not, see . */ -#include "parInt.h" #include "parTranslater.h" +#include "parInt.h" #include "catalog.h" #include "cmdnodes.h" @@ -1209,37 +1209,37 @@ static EDealRes translateNormalValue(STranslateContext* pCxt, SValueNode* pVal, break; } case TSDB_DATA_TYPE_VARBINARY: { - if (pVal->node.resType.type != TSDB_DATA_TYPE_BINARY){ + if (pVal->node.resType.type != TSDB_DATA_TYPE_BINARY) { return generateDealNodeErrMsg(pCxt, TSDB_CODE_PAR_WRONG_VALUE_TYPE, pVal->literal); } - void* data = NULL; + void* data = NULL; uint32_t size = 0; - bool isHexChar = isHex(pVal->literal, strlen(pVal->literal)); - if(isHexChar){ - if(!isValidateHex(pVal->literal, strlen(pVal->literal))){ + bool isHexChar = isHex(pVal->literal, strlen(pVal->literal)); + if (isHexChar) { + if (!isValidateHex(pVal->literal, strlen(pVal->literal))) { return TSDB_CODE_PAR_INVALID_VARBINARY; } - if(taosHex2Ascii(pVal->literal, strlen(pVal->literal), &data, &size) < 0){ + if (taosHex2Ascii(pVal->literal, strlen(pVal->literal), &data, &size) < 0) { return TSDB_CODE_OUT_OF_MEMORY; } - }else{ + } else { size = pVal->node.resType.bytes; data = pVal->literal; } if (size + VARSTR_HEADER_SIZE > targetDt.bytes) { - if(isHexChar) taosMemoryFree(data); + if (isHexChar) taosMemoryFree(data); return generateDealNodeErrMsg(pCxt, TSDB_CODE_PAR_VALUE_TOO_LONG, pVal->literal); } pVal->datum.p = taosMemoryCalloc(1, size + VARSTR_HEADER_SIZE); if (NULL == pVal->datum.p) { - if(isHexChar) taosMemoryFree(data); + if (isHexChar) taosMemoryFree(data); return generateDealNodeErrMsg(pCxt, TSDB_CODE_OUT_OF_MEMORY); } varDataSetLen(pVal->datum.p, size); memcpy(varDataVal(pVal->datum.p), data, size); - if(isHexChar) taosMemoryFree(data); + if (isHexChar) taosMemoryFree(data); break; } case TSDB_DATA_TYPE_VARCHAR: @@ -1789,7 +1789,6 @@ static int32_t translateBlockDistFunc(STranslateContext* pCtx, SFunctionNode* pF return TSDB_CODE_SUCCESS; } - static bool isStarParam(SNode* pNode) { return nodesIsStar(pNode) || nodesIsTableStar(pNode); } static int32_t translateMultiResFunc(STranslateContext* pCxt, SFunctionNode* pFunc) { @@ -2811,7 +2810,8 @@ static int32_t translateTable(STranslateContext* pCxt, SNode* pTable) { pJoinTable->table.precision = calcJoinTablePrecision(pJoinTable); pJoinTable->table.singleTable = joinTableIsSingleTable(pJoinTable); code = translateExpr(pCxt, &pJoinTable->pOnCond); - pJoinTable->hasSubQuery = (nodeType(pJoinTable->pLeft) != QUERY_NODE_REAL_TABLE) || (nodeType(pJoinTable->pRight) != QUERY_NODE_REAL_TABLE); + pJoinTable->hasSubQuery = (nodeType(pJoinTable->pLeft) != QUERY_NODE_REAL_TABLE) || + (nodeType(pJoinTable->pRight) != QUERY_NODE_REAL_TABLE); if (nodeType(pJoinTable->pLeft) == QUERY_NODE_JOIN_TABLE) { ((SJoinTableNode*)pJoinTable->pLeft)->isLowLevelJoin = true; } @@ -2827,7 +2827,7 @@ static int32_t translateTable(STranslateContext* pCxt, SNode* pTable) { return code; } -static int32_t createAllColumns(STranslateContext* pCxt, bool igTags, SNodeList** pCols) { +static int32_t createAllColumns(STranslateContext* pCxt, bool igTags, SNodeList** pCols) { *pCols = nodesMakeList(); if (NULL == *pCols) { return generateSyntaxErrMsg(&pCxt->msgBuf, TSDB_CODE_OUT_OF_MEMORY); @@ -2870,7 +2870,7 @@ static SNode* createMultiResFunc(SFunctionNode* pSrcFunc, SExprNode* pExpr) { } else { len = snprintf(buf, sizeof(buf) - 1, "%s(%s.%s)", pSrcFunc->functionName, pCol->tableAlias, pCol->colName); taosCreateMD5Hash(buf, len); - strncpy(pFunc->node.aliasName, buf, TSDB_COL_NAME_LEN - 1); + strncpy(pFunc->node.aliasName, buf, TSDB_COL_NAME_LEN - 1); len = snprintf(buf, sizeof(buf) - 1, "%s(%s)", pSrcFunc->functionName, pCol->colName); // note: userAlias could be truncated here strncpy(pFunc->node.userAlias, buf, TSDB_COL_NAME_LEN - 1); @@ -2992,11 +2992,8 @@ static int32_t createTags(STranslateContext* pCxt, SNodeList** pOutput) { return TSDB_CODE_SUCCESS; } - #ifndef TD_ENTERPRISE -int32_t biRewriteSelectStar(STranslateContext* pCxt, SSelectStmt* pSelect) { - return TSDB_CODE_SUCCESS; -} +int32_t biRewriteSelectStar(STranslateContext* pCxt, SSelectStmt* pSelect) { return TSDB_CODE_SUCCESS; } #endif static int32_t translateStar(STranslateContext* pCxt, SSelectStmt* pSelect) { @@ -3712,7 +3709,8 @@ static int32_t removeConstantValueFromList(SNodeList** pList) { SNode* pNode = NULL; WHERE_EACH(pNode, *pList) { if (nodeType(pNode) == QUERY_NODE_VALUE || - (nodeType(pNode) == QUERY_NODE_FUNCTION && fmIsConstantResFunc((SFunctionNode*)pNode) && fmIsScalarFunc(((SFunctionNode*)pNode)->funcId))) { + (nodeType(pNode) == QUERY_NODE_FUNCTION && fmIsConstantResFunc((SFunctionNode*)pNode) && + fmIsScalarFunc(((SFunctionNode*)pNode)->funcId))) { ERASE_NODE(*pList); continue; } @@ -4557,9 +4555,9 @@ static int32_t checkOptionsDependency(STranslateContext* pCxt, const char* pDbNa daysPerFile = (-1 == daysPerFile ? dbCfg.daysPerFile : daysPerFile); daysToKeep0 = (-1 == daysToKeep0 ? dbCfg.daysToKeep0 : daysToKeep0); } - if (daysPerFile > daysToKeep0) { + if (daysPerFile > daysToKeep0 / 3) { return generateSyntaxErrMsgExt(&pCxt->msgBuf, TSDB_CODE_PAR_INVALID_DB_OPTION, - "Invalid duration value, should be keep2 >= keep1 >= keep0 >= duration"); + "Invalid duration value, should be keep2 >= keep1 >= keep0 >= 3 * duration"); } return TSDB_CODE_SUCCESS; } @@ -8018,9 +8016,29 @@ static int32_t insertCondIntoSelectStmt(SSelectStmt* pSelect, SNode* pCond) { if (pSelect->pWhere == NULL) { pSelect->pWhere = pCond; } else { - SNode* pWhere = NULL; - createLogicCondNode(pSelect->pWhere, pCond, &pWhere, LOGIC_COND_TYPE_AND); - pSelect->pWhere = pWhere; + SNodeList* pLogicCondListWhere = NULL; + SNodeList* pLogicCondList2 = NULL; + if (nodeType(pSelect->pWhere) == QUERY_NODE_LOGIC_CONDITION && + ((SLogicConditionNode*)pSelect->pWhere)->condType == LOGIC_COND_TYPE_AND) { + pLogicCondListWhere = ((SLogicConditionNode*)pSelect->pWhere)->pParameterList; + } else { + nodesListMakeAppend(&pLogicCondListWhere, pSelect->pWhere); + } + + if (nodeType(pCond) == QUERY_NODE_LOGIC_CONDITION && + ((SLogicConditionNode*)pCond)->condType == LOGIC_COND_TYPE_AND) { + pLogicCondList2 = ((SLogicConditionNode*)pCond)->pParameterList; + } else { + nodesListMakeAppend(&pLogicCondList2, pCond); + } + + nodesListAppendList(pLogicCondListWhere, pLogicCondList2); + + SLogicConditionNode* pWhere = (SLogicConditionNode*)nodesMakeNode(QUERY_NODE_LOGIC_CONDITION); + pWhere->condType = LOGIC_COND_TYPE_AND; + pWhere->pParameterList = pLogicCondListWhere; + + pSelect->pWhere = (SNode*)pWhere; } return TSDB_CODE_SUCCESS; } @@ -8094,7 +8112,6 @@ static int32_t addShowKindCond(const SShowStmt* pShow, SSelectStmt* pSelect) { return TSDB_CODE_SUCCESS; } - static int32_t createShowCondition(const SShowStmt* pShow, SSelectStmt* pSelect) { SNode* pDbCond = NULL; SNode* pTbCond = NULL; diff --git a/source/libs/stream/src/streamBackendRocksdb.c b/source/libs/stream/src/streamBackendRocksdb.c index 0cc8bfb6fe..fb3551778a 100644 --- a/source/libs/stream/src/streamBackendRocksdb.c +++ b/source/libs/stream/src/streamBackendRocksdb.c @@ -779,7 +779,10 @@ int32_t chkpGetAllDbCfHandle(SStreamMeta* pMeta, rocksdb_column_family_handle_t* int64_t id = *(int64_t*)pIter; SBackendCfWrapper* wrapper = taosAcquireRef(streamBackendCfWrapperId, id); - if (wrapper == NULL) continue; + if (wrapper == NULL) { + pIter = taosHashIterate(pMeta->pTaskBackendUnique, pIter); + continue; + } taosThreadRwlockRdlock(&wrapper->rwLock); for (int i = 0; i < sizeof(ginitDict) / sizeof(ginitDict[0]); i++) { @@ -795,6 +798,10 @@ int32_t chkpGetAllDbCfHandle(SStreamMeta* pMeta, rocksdb_column_family_handle_t* } int32_t nCf = taosArrayGetSize(pHandle); + if (nCf == 0) { + taosArrayDestroy(pHandle); + return nCf; + } rocksdb_column_family_handle_t** ppCf = taosMemoryCalloc(nCf, sizeof(rocksdb_column_family_handle_t*)); for (int i = 0; i < nCf; i++) { @@ -827,6 +834,7 @@ _ERROR: return code; } int32_t chkpPreFlushDb(rocksdb_t* db, rocksdb_column_family_handle_t** cf, int32_t nCf) { + if (nCf == 0) return 0; int code = 0; char* err = NULL; @@ -2007,7 +2015,7 @@ SStreamStateCur* streamStateGetAndCheckCur_rocksdb(SStreamState* pState, SWinKey SStreamStateCur* streamStateSeekKeyNext_rocksdb(SStreamState* pState, const SWinKey* key) { qDebug("streamStateSeekKeyNext_rocksdb"); - SStreamStateCur* pCur = taosMemoryCalloc(1, sizeof(SStreamStateCur)); + SStreamStateCur* pCur = createStreamStateCursor(); if (pCur == NULL) { return NULL; } @@ -2063,7 +2071,7 @@ SStreamStateCur* streamStateSeekToLast_rocksdb(SStreamState* pState, const SWinK qDebug("seek to last:%s", tbuf); } - SStreamStateCur* pCur = taosMemoryCalloc(1, sizeof(SStreamStateCur)); + SStreamStateCur* pCur = createStreamStateCursor(); if (pCur == NULL) return NULL; pCur->number = pState->number; @@ -2090,7 +2098,7 @@ SStreamStateCur* streamStateGetCur_rocksdb(SStreamState* pState, const SWinKey* qDebug("streamStateGetCur_rocksdb"); SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper; - SStreamStateCur* pCur = taosMemoryCalloc(1, sizeof(SStreamStateCur)); + SStreamStateCur* pCur = createStreamStateCursor(); if (pCur == NULL) return NULL; pCur->db = wrapper->rocksdb; @@ -2179,7 +2187,7 @@ SStreamStateCur* streamStateSessionSeekKeyCurrentPrev_rocksdb(SStreamState* pSta qDebug("streamStateSessionSeekKeyCurrentPrev_rocksdb"); SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper; - SStreamStateCur* pCur = taosMemoryCalloc(1, sizeof(SStreamStateCur)); + SStreamStateCur* pCur = createStreamStateCursor(); if (pCur == NULL) { return NULL; } @@ -2219,7 +2227,7 @@ SStreamStateCur* streamStateSessionSeekKeyCurrentPrev_rocksdb(SStreamState* pSta SStreamStateCur* streamStateSessionSeekKeyCurrentNext_rocksdb(SStreamState* pState, SSessionKey* key) { qDebug("streamStateSessionSeekKeyCurrentNext_rocksdb"); SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper; - SStreamStateCur* pCur = taosMemoryCalloc(1, sizeof(SStreamStateCur)); + SStreamStateCur* pCur = createStreamStateCursor(); if (pCur == NULL) { return NULL; } @@ -2257,7 +2265,7 @@ SStreamStateCur* streamStateSessionSeekKeyCurrentNext_rocksdb(SStreamState* pSta SStreamStateCur* streamStateSessionSeekKeyNext_rocksdb(SStreamState* pState, const SSessionKey* key) { qDebug("streamStateSessionSeekKeyNext_rocksdb"); SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper; - SStreamStateCur* pCur = taosMemoryCalloc(1, sizeof(SStreamStateCur)); + SStreamStateCur* pCur = createStreamStateCursor(); if (pCur == NULL) { return NULL; } @@ -2358,7 +2366,7 @@ int32_t streamStateFillDel_rocksdb(SStreamState* pState, const SWinKey* key) { SStreamStateCur* streamStateFillGetCur_rocksdb(SStreamState* pState, const SWinKey* key) { qDebug("streamStateFillGetCur_rocksdb"); - SStreamStateCur* pCur = taosMemoryCalloc(1, sizeof(SStreamStateCur)); + SStreamStateCur* pCur = createStreamStateCursor(); SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper; if (pCur == NULL) return NULL; @@ -2419,7 +2427,7 @@ int32_t streamStateFillGetKVByCur_rocksdb(SStreamStateCur* pCur, SWinKey* pKey, SStreamStateCur* streamStateFillSeekKeyNext_rocksdb(SStreamState* pState, const SWinKey* key) { qDebug("streamStateFillSeekKeyNext_rocksdb"); SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper; - SStreamStateCur* pCur = taosMemoryCalloc(1, sizeof(SStreamStateCur)); + SStreamStateCur* pCur = createStreamStateCursor(); if (!pCur) { return NULL; } @@ -2457,7 +2465,7 @@ SStreamStateCur* streamStateFillSeekKeyNext_rocksdb(SStreamState* pState, const SStreamStateCur* streamStateFillSeekKeyPrev_rocksdb(SStreamState* pState, const SWinKey* key) { qDebug("streamStateFillSeekKeyPrev_rocksdb"); SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper; - SStreamStateCur* pCur = taosMemoryCalloc(1, sizeof(SStreamStateCur)); + SStreamStateCur* pCur = createStreamStateCursor(); if (pCur == NULL) { return NULL; } @@ -2495,7 +2503,7 @@ SStreamStateCur* streamStateFillSeekKeyPrev_rocksdb(SStreamState* pState, const int32_t streamStateSessionGetKeyByRange_rocksdb(SStreamState* pState, const SSessionKey* key, SSessionKey* curKey) { qDebug("streamStateSessionGetKeyByRange_rocksdb"); SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper; - SStreamStateCur* pCur = taosMemoryCalloc(1, sizeof(SStreamStateCur)); + SStreamStateCur* pCur = createStreamStateCursor(); if (pCur == NULL) { return -1; } @@ -2765,7 +2773,7 @@ int32_t streamDefaultIterGet_rocksdb(SStreamState* pState, const void* start, co return code; } void* streamDefaultIterCreate_rocksdb(SStreamState* pState) { - SStreamStateCur* pCur = taosMemoryCalloc(1, sizeof(SStreamStateCur)); + SStreamStateCur* pCur = createStreamStateCursor(); SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper; pCur->db = wrapper->rocksdb; diff --git a/source/libs/stream/src/streamMeta.c b/source/libs/stream/src/streamMeta.c index 70371c4add..85380151f3 100644 --- a/source/libs/stream/src/streamMeta.c +++ b/source/libs/stream/src/streamMeta.c @@ -238,8 +238,6 @@ int32_t streamMetaReopen(SStreamMeta* pMeta, int64_t chkpId) { return -1; } } - taosMemoryFree(defaultPath); - taosMemoryFree(newPath); pMeta->streamBackend = streamBackendInit(pMeta->path, pMeta->chkpId); while (pMeta->streamBackend == NULL) { @@ -254,6 +252,8 @@ int32_t streamMetaReopen(SStreamMeta* pMeta, int64_t chkpId) { pMeta->streamBackendRid = taosAddRef(streamBackendId, pMeta->streamBackend); streamBackendLoadCheckpointInfo(pMeta); + taosMemoryFree(defaultPath); + taosMemoryFree(newPath); return 0; } diff --git a/source/libs/stream/src/streamSessionState.c b/source/libs/stream/src/streamSessionState.c new file mode 100644 index 0000000000..cc96778762 --- /dev/null +++ b/source/libs/stream/src/streamSessionState.c @@ -0,0 +1,583 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include "tstreamFileState.h" + +#include "query.h" +#include "streamBackendRocksdb.h" +#include "taos.h" +#include "tcommon.h" +#include "thash.h" +#include "tsimplehash.h" + + +typedef int (*__session_compare_fn_t) (const SSessionKey* pWin, const void* pDatas, int pos); + +int sessionStateKeyCompare(const SSessionKey* pWin1, const void* pDatas, int pos) { + SRowBuffPos* pPos2 = taosArrayGetP(pDatas, pos); + SSessionKey* pWin2 = (SSessionKey*) pPos2->pKey; + return sessionWinKeyCmpr(pWin1, pWin2); +} + +int32_t binarySearch(void* keyList, int num, const void* key, __session_compare_fn_t cmpFn) { + int firstPos = 0, lastPos = num - 1, midPos = -1; + int numOfRows = 0; + + if (num <= 0) return -1; + // find the first position which is smaller or equal than the key. + // if all data is bigger than the key return -1 + while (1) { + if (cmpFn(key, keyList, lastPos) >= 0) return lastPos; + if (cmpFn(key, keyList, firstPos) == 0) return firstPos; + if (cmpFn(key, keyList, firstPos) < 0) return firstPos - 1; + + numOfRows = lastPos - firstPos + 1; + midPos = (numOfRows >> 1) + firstPos; + + if (cmpFn(key, keyList, midPos) < 0) { + lastPos = midPos - 1; + } else if (cmpFn(key, keyList, midPos) > 0) { + firstPos = midPos + 1; + } else { + break; + } + } + + return midPos; +} + +int64_t getSessionWindowEndkey(void* data, int32_t index) { + SArray* pWinInfos = (SArray*)data; + SRowBuffPos** ppos = taosArrayGet(pWinInfos, index); + SSessionKey* pWin = (SSessionKey*)((*ppos)->pKey); + return pWin->win.ekey; +} + +bool inSessionWindow(SSessionKey* pKey, TSKEY ts, int64_t gap) { + if (ts + gap >= pKey->win.skey && ts - gap <= pKey->win.ekey) { + return true; + } + return false; +} + +static SRowBuffPos* addNewSessionWindow(SStreamFileState* pFileState, SArray* pWinInfos, SSessionKey* pKey) { + SRowBuffPos* pNewPos = getNewRowPosForWrite(pFileState); + ASSERT(pNewPos->pRowBuff); + memcpy(pNewPos->pKey, pKey, sizeof(SSessionKey)); + taosArrayPush(pWinInfos, &pNewPos); + return pNewPos; +} + +static SRowBuffPos* insertNewSessionWindow(SStreamFileState* pFileState, SArray* pWinInfos, SSessionKey* pKey, int32_t index) { + SRowBuffPos* pNewPos = getNewRowPosForWrite(pFileState); + ASSERT(pNewPos->pRowBuff); + memcpy(pNewPos->pKey, pKey, sizeof(SSessionKey)); + taosArrayInsert(pWinInfos, index, &pNewPos); + return pNewPos; +} + +SRowBuffPos* createSessionWinBuff(SStreamFileState* pFileState, SSessionKey* pKey, void* p, int32_t* pVLen) { + SRowBuffPos* pNewPos = getNewRowPosForWrite(pFileState); + memcpy(pNewPos->pKey, pKey, sizeof(SSessionKey)); + pNewPos->needFree = true; + memcpy(pNewPos->pRowBuff, p, *pVLen); + taosMemoryFree(p); + return pNewPos; +} + +int32_t getSessionWinResultBuff(SStreamFileState* pFileState, SSessionKey* pKey, TSKEY gap, void** pVal, int32_t* pVLen) { + int32_t code = TSDB_CODE_SUCCESS; + SSHashObj* pSessionBuff = getRowStateBuff(pFileState); + SArray* pWinStates = NULL; + void** ppBuff = tSimpleHashGet(pSessionBuff, &pKey->groupId, sizeof(uint64_t)); + if (ppBuff) { + pWinStates = (SArray*)(*ppBuff); + } else { + pWinStates = taosArrayInit(16, POINTER_BYTES); + tSimpleHashPut(pSessionBuff, &pKey->groupId, sizeof(uint64_t), &pWinStates, POINTER_BYTES); + } + + TSKEY startTs = pKey->win.skey; + TSKEY endTs = pKey->win.ekey; + + int32_t size = taosArrayGetSize(pWinStates); + if (size == 0) { + void* pFileStore = getStateFileStore(pFileState); + void* p = NULL; + int32_t code_file = streamStateSessionAddIfNotExist_rocksdb(pFileStore, pKey, gap, &p, pVLen); + if (code_file == TSDB_CODE_SUCCESS) { + (*pVal) = createSessionWinBuff(pFileState, pKey, p, pVLen); + code = code_file; + qDebug("===stream===0 get session win:%" PRId64 ",%" PRId64 " from disc, res %d", startTs, endTs, code_file); + } else { + (*pVal) = addNewSessionWindow(pFileState, pWinStates, pKey); + code = TSDB_CODE_FAILED; + taosMemoryFree(p); + } + goto _end; + } + + // find the first position which is smaller than the pKey + int32_t index = binarySearch(pWinStates, size, pKey, sessionStateKeyCompare); + SRowBuffPos* pPos = NULL; + + if (index >= 0) { + pPos = taosArrayGetP(pWinStates, index); + if (inSessionWindow(pPos->pKey, startTs, gap)) { + (*pVal) = pPos; + SSessionKey* pDestWinKey = (SSessionKey*)pPos->pKey; + pPos->beUsed = true; + *pKey = *pDestWinKey; + goto _end; + } + } + + if (index + 1 < size) { + pPos = taosArrayGetP(pWinStates, index + 1); + if (inSessionWindow(pPos->pKey, startTs, gap) || (endTs != INT64_MIN && inSessionWindow(pPos->pKey, endTs, gap)) ) { + (*pVal) = pPos; + SSessionKey* pDestWinKey = (SSessionKey*)pPos->pKey; + pPos->beUsed = true; + *pKey = *pDestWinKey; + goto _end; + } + } + + if (index + 1 == 0) { + if (!isDeteled(pFileState, endTs) && isFlushedState(pFileState, endTs, gap)) { + void* p = NULL; + void* pFileStore = getStateFileStore(pFileState); + int32_t code_file = streamStateSessionAddIfNotExist_rocksdb(pFileStore, pKey, gap, &p, pVLen); + if (code_file == TSDB_CODE_SUCCESS || isFlushedState(pFileState, endTs, 0)) { + (*pVal) = createSessionWinBuff(pFileState, pKey, p, pVLen); + code = code_file; + qDebug("===stream===1 get session win:%" PRId64 ",%" PRId64 " from disc, res %d", startTs, endTs, code_file); + goto _end; + } else { + taosMemoryFree(p); + } + } + } + + if (index == size - 1) { + (*pVal) = addNewSessionWindow(pFileState, pWinStates, pKey); + code = TSDB_CODE_FAILED; + goto _end; + } + (*pVal) = insertNewSessionWindow(pFileState, pWinStates, pKey, index + 1); + code = TSDB_CODE_FAILED; + +_end: + return code; +} + +int32_t putSessionWinResultBuff(SStreamFileState* pFileState, SRowBuffPos* pPos) { + SSHashObj* pSessionBuff = getRowStateBuff(pFileState); + SSessionKey* pKey = pPos->pKey; + SArray* pWinStates = NULL; + void** ppBuff = tSimpleHashGet(pSessionBuff, &pKey->groupId, sizeof(uint64_t)); + if (ppBuff) { + pWinStates = (SArray*)(*ppBuff); + } else { + pWinStates = taosArrayInit(16, POINTER_BYTES); + tSimpleHashPut(pSessionBuff, &pKey->groupId, sizeof(uint64_t), &pWinStates, POINTER_BYTES); + } + + int32_t size = taosArrayGetSize(pWinStates); + if (size == 0) { + taosArrayPush(pWinStates, &pPos); + goto _end; + } + + // find the first position which is smaller than the pKey + int32_t index = binarySearch(pWinStates, size, pKey, sessionStateKeyCompare); + if (index >= 0) { + taosArrayInsert(pWinStates, index, &pPos); + } else { + taosArrayInsert(pWinStates, 0, &pPos); + } + +_end: + pPos->needFree = false; + return TSDB_CODE_SUCCESS; +} + +int32_t getSessionFlushedBuff(SStreamFileState* pFileState, SSessionKey* pKey, void** pVal, int32_t* pVLen) { + SRowBuffPos* pNewPos = getNewRowPosForWrite(pFileState); + memcpy(pNewPos->pKey, pKey, sizeof(SSessionKey)); + pNewPos->needFree = true; + void* pBuff = NULL; + int32_t code = streamStateSessionGet_rocksdb(getStateFileStore(pFileState), pKey, &pBuff, pVLen); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + memcpy(pNewPos->pRowBuff, pBuff, *pVLen); + taosMemoryFreeClear(pBuff); + (*pVal) = pNewPos; + return TSDB_CODE_SUCCESS; +} + +int32_t deleteSessionWinStateBuffFn(void* pBuff, const void *key, size_t keyLen) { + SSHashObj* pSessionBuff = (SSHashObj*) pBuff; + SSessionKey* pWinKey = (SSessionKey*) key; + void** ppBuff = tSimpleHashGet(pSessionBuff, &pWinKey->groupId, sizeof(uint64_t)); + if (!ppBuff) { + return TSDB_CODE_SUCCESS; + } + SArray* pWinStates = (SArray*)(*ppBuff); + int32_t size = taosArrayGetSize(pWinStates); + TSKEY gap = 0; + int32_t index = binarySearch(pWinStates, size, pWinKey, sessionStateKeyCompare); + if (index >= 0) { + SRowBuffPos* pPos = taosArrayGetP(pWinStates, index); + if (inSessionWindow(pPos->pKey, pWinKey->win.skey, gap)) { + pPos->beFlushed = true; + taosArrayRemove(pWinStates, index); + } + } + return TSDB_CODE_SUCCESS; +} + +int32_t deleteSessionWinStateBuffByPosFn(SStreamFileState* pFileState, SRowBuffPos* pPos) { + SSHashObj* pSessionBuff = getRowStateBuff(pFileState); + SSessionKey* pWinKey = (SSessionKey*) pPos->pKey; + void** ppBuff = tSimpleHashGet(pSessionBuff, &pWinKey->groupId, sizeof(uint64_t)); + if (!ppBuff) { + return TSDB_CODE_SUCCESS; + } + SArray* pWinStates = (SArray*)(*ppBuff); + int32_t size = taosArrayGetSize(pWinStates); + TSKEY gap = 0; + int32_t index = binarySearch(pWinStates, size, pWinKey, sessionStateKeyCompare); + if (index >= 0) { + SRowBuffPos* pItemPos = taosArrayGetP(pWinStates, index); + if (pItemPos == pPos) { + taosArrayRemove(pWinStates, index); + } + } + return TSDB_CODE_SUCCESS; +} + +void sessionWinStateClear(SStreamFileState* pFileState) { + int32_t buffSize = getRowStateRowSize(pFileState); + void* pIte = NULL; + size_t keyLen = 0; + int32_t iter = 0; + void* pBuff = getRowStateBuff(pFileState); + while ((pIte = tSimpleHashIterate(pBuff, pIte, &iter)) != NULL) { + SArray* pWinStates = *((void**)pIte); + int32_t size = taosArrayGetSize(pWinStates); + for (int32_t i = 0; i < size; i++) { + SRowBuffPos* pPos = taosArrayGetP(pWinStates, i); + memset(pPos->pRowBuff, 0, buffSize); + } + } +} + +void sessionWinStateCleanup(void* pBuff) { + void* pIte = NULL; + size_t keyLen = 0; + int32_t iter = 0; + while ((pIte = tSimpleHashIterate(pBuff, pIte, &iter)) != NULL) { + SArray* pWinStates = (SArray*) (*(void**)pIte); + taosArrayDestroy(pWinStates); + } + tSimpleHashCleanup(pBuff); +} + +static SStreamStateCur* seekKeyCurrentPrev_buff(SStreamFileState* pFileState, const SSessionKey* pWinKey, + SArray** pWins, int32_t* pIndex) { + SStreamStateCur* pCur = NULL; + SSHashObj* pSessionBuff = getRowStateBuff(pFileState); + void** ppBuff = tSimpleHashGet(pSessionBuff, &pWinKey->groupId, sizeof(uint64_t)); + if (!ppBuff) { + return NULL; + } + + SArray* pWinStates = (SArray*)(*ppBuff); + int32_t size = taosArrayGetSize(pWinStates); + TSKEY gap = 0; + int32_t index = binarySearch(pWinStates, size, pWinKey, sessionStateKeyCompare); + + if (pWins) { + (*pWins) = pWinStates; + } + + if (index >= 0) { + pCur = createStreamStateCursor(); + pCur->buffIndex = index; + pCur->pStreamFileState = pFileState; + if (pIndex) { + *pIndex = index; + } + } + return pCur; +} + +SStreamStateCur* sessionWinStateSeekKeyCurrentPrev(SStreamFileState* pFileState, const SSessionKey* pWinKey) { + SStreamStateCur* pCur = seekKeyCurrentPrev_buff(pFileState, pWinKey, NULL, NULL); + if (pCur) { + return pCur; + } + + void* pFileStore = getStateFileStore(pFileState); + pCur = streamStateSessionSeekKeyCurrentPrev_rocksdb(pFileStore, pWinKey); + if (!pCur) { + return NULL; + } + pCur->buffIndex = -1; + pCur->pStreamFileState = pFileState; + return pCur; +} +static void transformCursor(SStreamFileState* pFileState, SStreamStateCur* pCur) { + if (!pCur) { + return; + } + streamStateResetCur(pCur); + pCur->buffIndex = 0; + pCur->pStreamFileState = pFileState; +} + +static void checkAndTransformCursor(SStreamFileState* pFileState, const uint64_t groupId, SArray* pWinStates, SStreamStateCur** ppCur) { + SSessionKey key = {.groupId = groupId}; + int32_t code = streamStateSessionGetKVByCur_rocksdb(*ppCur, &key, NULL, NULL); + if (taosArrayGetSize(pWinStates) > 0 && (code == TSDB_CODE_FAILED || sessionStateKeyCompare(&key, pWinStates, 0) >= 0)) { + if ( !(*ppCur) ) { + (*ppCur) = createStreamStateCursor(); + } + transformCursor(pFileState, *ppCur); + } else if (*ppCur) { + (*ppCur)->buffIndex = -1; + (*ppCur)->pStreamFileState = pFileState; + } +} + +SStreamStateCur* sessionWinStateSeekKeyCurrentNext(SStreamFileState* pFileState, const SSessionKey* pWinKey) { + SArray* pWinStates = NULL; + int32_t index = -1; + SStreamStateCur* pCur = seekKeyCurrentPrev_buff(pFileState, pWinKey, &pWinStates, &index); + if (pCur) { + if (sessionStateKeyCompare(pWinKey, pWinStates, index) > 0) { + sessionWinStateMoveToNext(pCur); + } + return pCur; + } + + void* pFileStore = getStateFileStore(pFileState); + pCur = streamStateSessionSeekKeyCurrentNext_rocksdb(pFileStore, (SSessionKey*)pWinKey); + checkAndTransformCursor(pFileState, pWinKey->groupId, pWinStates, &pCur); + return pCur; +} + +SStreamStateCur* sessionWinStateSeekKeyNext(SStreamFileState* pFileState, const SSessionKey* pWinKey) { + SArray* pWinStates = NULL; + int32_t index = -1; + SStreamStateCur* pCur = seekKeyCurrentPrev_buff(pFileState, pWinKey, &pWinStates, &index); + if (pCur) { + sessionWinStateMoveToNext(pCur); + return pCur; + } + + void* pFileStore = getStateFileStore(pFileState); + pCur = streamStateSessionSeekKeyNext_rocksdb(pFileStore, pWinKey); + checkAndTransformCursor(pFileState, pWinKey->groupId, pWinStates, &pCur); + return pCur; +} + +int32_t sessionWinStateGetKVByCur(SStreamStateCur* pCur, SSessionKey* pKey, void** pVal, int32_t* pVLen) { + if (!pCur) { + return TSDB_CODE_FAILED; + } + int32_t code = TSDB_CODE_SUCCESS; + + SSHashObj* pSessionBuff = getRowStateBuff(pCur->pStreamFileState); + void** ppBuff = tSimpleHashGet(pSessionBuff, &pKey->groupId, sizeof(uint64_t)); + if (!ppBuff) { + return TSDB_CODE_FAILED; + } + + SArray* pWinStates = (SArray*)(*ppBuff); + int32_t size = taosArrayGetSize(pWinStates); + if (pCur->buffIndex >= 0) { + if (pCur->buffIndex >= size) { + return TSDB_CODE_FAILED; + } + SRowBuffPos* pPos = taosArrayGetP(pWinStates, pCur->buffIndex); + if (pVal) { + *pVal = pPos; + } + *pKey = *(SSessionKey*)(pPos->pKey); + } else { + void* pData = NULL; + code = streamStateSessionGetKVByCur_rocksdb(pCur, pKey, &pData, pVLen); + if (taosArrayGetSize(pWinStates) > 0 && (code == TSDB_CODE_FAILED || sessionStateKeyCompare(pKey, pWinStates, 0) >= 0)) { + transformCursor(pCur->pStreamFileState, pCur); + SRowBuffPos* pPos = taosArrayGetP(pWinStates, pCur->buffIndex); + if (pVal) { + *pVal = pPos; + } + *pKey = *(SSessionKey*)(pPos->pKey); + code = TSDB_CODE_SUCCESS; + } else if (code == TSDB_CODE_SUCCESS && pVal) { + SRowBuffPos* pNewPos = getNewRowPosForWrite(pCur->pStreamFileState); + memcpy(pNewPos->pKey, pKey, sizeof(SSessionKey)); + pNewPos->needFree = true; + memcpy(pNewPos->pRowBuff, pData, *pVLen); + (*pVal) = pNewPos; + } + taosMemoryFreeClear(pData); + } + return code; +} + +int32_t sessionWinStateMoveToNext(SStreamStateCur* pCur) { + if (pCur && pCur->buffIndex >= 0) { + pCur->buffIndex++; + } else { + streamStateCurNext_rocksdb(NULL, pCur); + } + return TSDB_CODE_SUCCESS; +} + +int32_t sessionWinStateGetKeyByRange(SStreamFileState* pFileState, const SSessionKey* key, SSessionKey* curKey) { + SStreamStateCur* pCur = sessionWinStateSeekKeyCurrentPrev(pFileState, key); + SSessionKey tmpKey = *key; + int32_t code = sessionWinStateGetKVByCur(pCur, &tmpKey, NULL, NULL); + bool hasCurrentPrev = true; + if (code == TSDB_CODE_FAILED) { + streamStateFreeCur(pCur); + pCur = sessionWinStateSeekKeyNext(pFileState, key); + code = sessionWinStateGetKVByCur(pCur, &tmpKey, NULL, NULL); + hasCurrentPrev = false; + } + + if (code == TSDB_CODE_FAILED) { + code = TSDB_CODE_FAILED; + goto _end; + } + + if (sessionRangeKeyCmpr(key, &tmpKey) == 0) { + *curKey = tmpKey; + goto _end; + } else if (!hasCurrentPrev) { + code = TSDB_CODE_FAILED; + goto _end; + } + + sessionWinStateMoveToNext(pCur); + code = sessionWinStateGetKVByCur(pCur, &tmpKey, NULL, NULL); + if (code == TSDB_CODE_SUCCESS && sessionRangeKeyCmpr(key, &tmpKey) == 0) { + *curKey = tmpKey; + } else { + code = TSDB_CODE_FAILED; + } + +_end: + streamStateFreeCur(pCur); + return code; +} + +int32_t getStateWinResultBuff(SStreamFileState* pFileState, SSessionKey* key, char* pKeyData, int32_t keyDataLen, + state_key_cmpr_fn fn, void** pVal, int32_t* pVLen) { + SSessionKey* pWinKey = key; + TSKEY gap = 0; + int32_t code = TSDB_CODE_SUCCESS; + SSHashObj* pSessionBuff = getRowStateBuff(pFileState); + SArray* pWinStates = NULL; + void** ppBuff = tSimpleHashGet(pSessionBuff, &pWinKey->groupId, sizeof(uint64_t)); + if (ppBuff) { + pWinStates = (SArray*)(*ppBuff); + } else { + pWinStates = taosArrayInit(16, POINTER_BYTES); + tSimpleHashPut(pSessionBuff, &pWinKey->groupId, sizeof(uint64_t), &pWinStates, POINTER_BYTES); + } + + TSKEY startTs = pWinKey->win.skey; + TSKEY endTs = pWinKey->win.ekey; + + int32_t size = taosArrayGetSize(pWinStates); + if (size == 0) { + void* pFileStore = getStateFileStore(pFileState); + void* p = NULL; + int32_t code_file = streamStateStateAddIfNotExist_rocksdb(pFileStore, pWinKey, pKeyData, keyDataLen, fn, &p, pVLen); + if (code_file == TSDB_CODE_SUCCESS) { + (*pVal) = createSessionWinBuff(pFileState, pWinKey, p, pVLen); + code = code_file; + qDebug("===stream===0 get state win:%" PRId64 ",%" PRId64 " from disc, res %d", pWinKey->win.skey, pWinKey->win.ekey, code_file); + } else { + (*pVal) = addNewSessionWindow(pFileState, pWinStates, key); + code = TSDB_CODE_FAILED; + taosMemoryFree(p); + } + goto _end; + } + + // find the first position which is smaller than the pWinKey + int32_t index = binarySearch(pWinStates, size, pWinKey, sessionStateKeyCompare); + SRowBuffPos* pPos = NULL; + int32_t valSize = *pVLen; + + if (index >= 0) { + pPos = taosArrayGetP(pWinStates, index); + void* stateKey = (char*)(pPos->pRowBuff) + (valSize - keyDataLen); + if (inSessionWindow(pPos->pKey, startTs, gap) || fn(pKeyData, stateKey) == true) { + (*pVal) = pPos; + SSessionKey* pDestWinKey = (SSessionKey*)pPos->pKey; + pPos->beUsed = true; + *key = *pDestWinKey; + goto _end; + } + } + + if (index + 1 < size) { + pPos = taosArrayGetP(pWinStates, index + 1); + void* stateKey = (char*)(pPos->pRowBuff) + (valSize - keyDataLen); + if (inSessionWindow(pPos->pKey, startTs, gap) || (endTs != INT64_MIN && inSessionWindow(pPos->pKey, endTs, gap)) || fn(pKeyData, stateKey) == true) { + (*pVal) = pPos; + SSessionKey* pDestWinKey = (SSessionKey*)pPos->pKey; + pPos->beUsed = true; + *key = *pDestWinKey; + goto _end; + } + } + + if (index + 1 == 0) { + if (!isDeteled(pFileState, endTs)) { + void* p = NULL; + void* pFileStore = getStateFileStore(pFileState); + int32_t code_file = + streamStateStateAddIfNotExist_rocksdb(pFileStore, pWinKey, pKeyData, keyDataLen, fn, &p, pVLen); + if (code_file == TSDB_CODE_SUCCESS || isFlushedState(pFileState, endTs, 0)) { + (*pVal) = createSessionWinBuff(pFileState, pWinKey, p, pVLen); + code = code_file; + qDebug("===stream===1 get state win:%" PRId64 ",%" PRId64 " from disc, res %d", pWinKey->win.skey, pWinKey->win.ekey, code_file); + goto _end; + } else { + taosMemoryFree(p); + } + } + } + + if (index == size - 1) { + (*pVal) = addNewSessionWindow(pFileState, pWinStates, key); + code = TSDB_CODE_FAILED; + goto _end; + } + (*pVal) = insertNewSessionWindow(pFileState, pWinStates, key, index + 1); + code = TSDB_CODE_FAILED; + +_end: + return code; +} diff --git a/source/libs/stream/src/streamState.c b/source/libs/stream/src/streamState.c index 44c7b4f2e0..7c5fcba10c 100644 --- a/source/libs/stream/src/streamState.c +++ b/source/libs/stream/src/streamState.c @@ -351,7 +351,7 @@ bool streamStateCheck(SStreamState* pState, const SWinKey* key) { int32_t streamStateGetByPos(SStreamState* pState, void* pos, void** pVal) { int32_t code = getRowBuffByPos(pState->pFileState, pos, pVal); - releaseRowBuffPos(pos); + streamFileStateReleaseBuff(pState->pFileState, pos, false); return code; } @@ -395,9 +395,6 @@ int32_t streamStateFillDel(SStreamState* pState, const SWinKey* key) { int32_t streamStateClear(SStreamState* pState) { #ifdef USE_ROCKSDB streamFileStateClear(pState->pFileState); - if (needClearDiskBuff(pState->pFileState)) { - streamStateClear_rocksdb(pState); - } return 0; #else SWinKey key = {.ts = 0, .groupId = 0}; @@ -422,19 +419,16 @@ void streamStateSetNumber(SStreamState* pState, int32_t number) { pState->number int32_t streamStateSaveInfo(SStreamState* pState, void* pKey, int32_t keyLen, void* pVal, int32_t vLen) { #ifdef USE_ROCKSDB int32_t code = 0; - void* batch = streamStateCreateBatch(); - code = streamStatePutBatch(pState, "default", batch, pKey, pVal, vLen, 0); + char* cfName = "default"; + void* batch = streamStateCreateBatch(); + code = streamStatePutBatch(pState, cfName, batch, pKey, pVal, vLen, 0); if (code != 0) { streamStateDestroyBatch(batch); return code; } code = streamStatePutBatch_rocksdb(pState, batch); streamStateDestroyBatch(batch); - // code = streamDefaultPut_rocksdb(pState, pKey, pVal, vLen); - // char* Val = NULL; - // int32_t len = 0; - // code = streamDefaultGet_rocksdb(pState, pKey, (void**)&Val, &len); return code; #else return 0; @@ -466,20 +460,24 @@ int32_t streamStateAddIfNotExist(SStreamState* pState, const SWinKey* key, void* #endif } -int32_t streamStateReleaseBuf(SStreamState* pState, const SWinKey* key, void* pVal) { +int32_t streamStateReleaseBuf(SStreamState* pState, void* pVal, bool used) { // todo refactor qDebug("streamStateReleaseBuf"); if (!pVal) { return 0; } #ifdef USE_ROCKSDB - taosMemoryFree(pVal); + streamFileStateReleaseBuff(pState->pFileState, pVal, used); #else streamStateFreeVal(pVal); #endif return 0; } +int32_t streamStateClearBuff(SStreamState* pState, void* pVal) { + return streamFileStateClearBuff(pState->pFileState, pVal); +} + SStreamStateCur* streamStateFillGetCur(SStreamState* pState, const SWinKey* key) { #ifdef USE_ROCKSDB return streamStateFillGetCur_rocksdb(pState, key); @@ -569,39 +567,6 @@ int32_t streamStateGetGroupKVByCur(SStreamStateCur* pCur, SWinKey* pKey, const v #endif } -int32_t streamStateGetFirst(SStreamState* pState, SWinKey* key) { -#ifdef USE_ROCKSDB - return streamStateGetFirst_rocksdb(pState, key); -#else - // todo refactor - SWinKey tmp = {.ts = 0, .groupId = 0}; - streamStatePut(pState, &tmp, NULL, 0); - SStreamStateCur* pCur = streamStateSeekKeyNext(pState, &tmp); - int32_t code = streamStateGetKVByCur(pCur, key, NULL, 0); - streamStateFreeCur(pCur); - streamStateDel(pState, &tmp); - return code; -#endif -} - -int32_t streamStateSeekFirst(SStreamState* pState, SStreamStateCur* pCur) { -#ifdef USE_ROCKSDB - rocksdb_iter_seek_to_first(pCur->iter); - return 0; -#else - return tdbTbcMoveToFirst(pCur->pCur); -#endif -} - -int32_t streamStateSeekLast(SStreamState* pState, SStreamStateCur* pCur) { -#ifdef USE_ROCKSDB - rocksdb_iter_seek_to_last(pCur->iter); - return 0; -#else - return tdbTbcMoveToLast(pCur->pCur); -#endif -} - SStreamStateCur* streamStateSeekKeyNext(SStreamState* pState, const SWinKey* key) { #ifdef USE_ROCKSDB return streamStateSeekKeyNext_rocksdb(pState, key); @@ -693,7 +658,7 @@ SStreamStateCur* streamStateFillSeekKeyPrev(SStreamState* pState, const SWinKey* int32_t streamStateCurNext(SStreamState* pState, SStreamStateCur* pCur) { #ifdef USE_ROCKSDB - return streamStateCurNext_rocksdb(pState, pCur); + return sessionWinStateMoveToNext(pCur); #else if (!pCur) { return -1; @@ -713,16 +678,29 @@ int32_t streamStateCurPrev(SStreamState* pState, SStreamStateCur* pCur) { return tdbTbcMoveToPrev(pCur->pCur); #endif } -void streamStateFreeCur(SStreamStateCur* pCur) { + +void streamStateResetCur(SStreamStateCur* pCur) { if (!pCur) { return; } - qDebug("streamStateFreeCur"); - rocksdb_iter_destroy(pCur->iter); + if (pCur->iter) rocksdb_iter_destroy(pCur->iter); if (pCur->snapshot) rocksdb_release_snapshot(pCur->db, pCur->snapshot); - rocksdb_readoptions_destroy(pCur->readOpt); + if (pCur->readOpt) rocksdb_readoptions_destroy(pCur->readOpt); tdbTbcClose(pCur->pCur); + + memset(pCur, 0, sizeof(SStreamStateCur)); + + pCur->buffIndex = -1; +} + +void streamStateFreeCur(SStreamStateCur* pCur) { + if (!pCur || pCur->buffIndex >= 0) { + taosMemoryFree(pCur); + return; + } + qDebug("streamStateFreeCur"); + streamStateResetCur(pCur); taosMemoryFree(pCur); } @@ -734,11 +712,25 @@ void streamStateFreeVal(void* val) { #endif } -int32_t streamStateSessionPut(SStreamState* pState, const SSessionKey* key, const void* value, int32_t vLen) { +int32_t streamStateSessionPut(SStreamState* pState, const SSessionKey* key, void* value, int32_t vLen) { #ifdef USE_ROCKSDB - qDebug("===stream===save skey:%" PRId64 ", ekey:%" PRId64 ", groupId:%" PRIu64, key->win.skey, key->win.ekey, - key->groupId); - return streamStateSessionPut_rocksdb(pState, key, value, vLen); + int32_t code = TSDB_CODE_SUCCESS; + SRowBuffPos* pos = (SRowBuffPos*)value; + if (pos->needFree) { + if (isFlushedState(pState->pFileState, key->win.ekey, 0)) { + if (!pos->pRowBuff) { + return code; + } + code = streamStateSessionPut_rocksdb(pState, key, pos->pRowBuff, vLen); + streamStateReleaseBuf(pState, pos, true); + putFreeBuff(pState->pFileState, pos); + qDebug("===stream===save skey:%" PRId64 ", ekey:%" PRId64 ", groupId:%" PRIu64 ".code:%d", key->win.skey, + key->win.ekey, key->groupId, code); + } else { + code = putSessionWinResultBuff(pState->pFileState, value); + } + } + return code; #else SStateSessionKey sKey = {.key = *key, .opNum = pState->number}; return tdbTbUpsert(pState->pTdbState->pSessionStateDb, &sKey, sizeof(SStateSessionKey), value, vLen, @@ -748,7 +740,7 @@ int32_t streamStateSessionPut(SStreamState* pState, const SSessionKey* key, cons int32_t streamStateSessionGet(SStreamState* pState, SSessionKey* key, void** pVal, int32_t* pVLen) { #ifdef USE_ROCKSDB - return streamStateSessionGet_rocksdb(pState, key, pVal, pVLen); + return getSessionFlushedBuff(pState->pFileState, key, pVal, pVLen); #else SStreamStateCur* pCur = streamStateSessionSeekKeyCurrentNext(pState, key); @@ -773,7 +765,7 @@ int32_t streamStateSessionDel(SStreamState* pState, const SSessionKey* key) { #ifdef USE_ROCKSDB qDebug("===stream===delete skey:%" PRId64 ", ekey:%" PRId64 ", groupId:%" PRIu64, key->win.skey, key->win.ekey, key->groupId); - return streamStateSessionDel_rocksdb(pState, key); + return deleteRowBuff(pState->pFileState, key, sizeof(SSessionKey)); #else SStateSessionKey sKey = {.key = *key, .opNum = pState->number}; return tdbTbDelete(pState->pTdbState->pSessionStateDb, &sKey, sizeof(SStateSessionKey), pState->pTdbState->txn); @@ -782,7 +774,7 @@ int32_t streamStateSessionDel(SStreamState* pState, const SSessionKey* key) { SStreamStateCur* streamStateSessionSeekKeyCurrentPrev(SStreamState* pState, const SSessionKey* key) { #ifdef USE_ROCKSDB - return streamStateSessionSeekKeyCurrentPrev_rocksdb(pState, key); + return sessionWinStateSeekKeyCurrentPrev(pState->pFileState, key); #else SStreamStateCur* pCur = taosMemoryCalloc(1, sizeof(SStreamStateCur)); if (pCur == NULL) { @@ -813,7 +805,7 @@ SStreamStateCur* streamStateSessionSeekKeyCurrentPrev(SStreamState* pState, cons SStreamStateCur* streamStateSessionSeekKeyCurrentNext(SStreamState* pState, const SSessionKey* key) { #ifdef USE_ROCKSDB - return streamStateSessionSeekKeyCurrentNext_rocksdb(pState, (SSessionKey*)key); + return sessionWinStateSeekKeyCurrentNext(pState->pFileState, key); #else SStreamStateCur* pCur = taosMemoryCalloc(1, sizeof(SStreamStateCur)); if (pCur == NULL) { @@ -845,7 +837,7 @@ SStreamStateCur* streamStateSessionSeekKeyCurrentNext(SStreamState* pState, cons SStreamStateCur* streamStateSessionSeekKeyNext(SStreamState* pState, const SSessionKey* key) { #ifdef USE_ROCKSDB - return streamStateSessionSeekKeyNext_rocksdb(pState, key); + return sessionWinStateSeekKeyNext(pState->pFileState, key); #else SStreamStateCur* pCur = taosMemoryCalloc(1, sizeof(SStreamStateCur)); if (pCur == NULL) { @@ -876,7 +868,7 @@ SStreamStateCur* streamStateSessionSeekKeyNext(SStreamState* pState, const SSess int32_t streamStateSessionGetKVByCur(SStreamStateCur* pCur, SSessionKey* pKey, void** pVal, int32_t* pVLen) { #ifdef USE_ROCKSDB - return streamStateSessionGetKVByCur_rocksdb(pCur, pKey, pVal, pVLen); + return sessionWinStateGetKVByCur(pCur, pKey, pVal, pVLen); #else if (!pCur) { return -1; @@ -899,6 +891,7 @@ int32_t streamStateSessionGetKVByCur(SStreamStateCur* pCur, SSessionKey* pKey, v int32_t streamStateSessionClear(SStreamState* pState) { #ifdef USE_ROCKSDB + sessionWinStateClear(pState->pFileState); return streamStateSessionClear_rocksdb(pState); #else SSessionKey key = {.win.skey = 0, .win.ekey = 0, .groupId = 0}; @@ -923,7 +916,7 @@ int32_t streamStateSessionClear(SStreamState* pState) { int32_t streamStateSessionGetKeyByRange(SStreamState* pState, const SSessionKey* key, SSessionKey* curKey) { #ifdef USE_ROCKSDB - return streamStateSessionGetKeyByRange_rocksdb(pState, key, curKey); + return sessionWinStateGetKeyByRange(pState->pFileState, key, curKey); #else SStreamStateCur* pCur = taosMemoryCalloc(1, sizeof(SStreamStateCur)); if (pCur == NULL) { @@ -976,7 +969,7 @@ int32_t streamStateSessionGetKeyByRange(SStreamState* pState, const SSessionKey* int32_t streamStateSessionAddIfNotExist(SStreamState* pState, SSessionKey* key, TSKEY gap, void** pVal, int32_t* pVLen) { #ifdef USE_ROCKSDB - return streamStateSessionAddIfNotExist_rocksdb(pState, key, gap, pVal, pVLen); + return getSessionWinResultBuff(pState->pFileState, key, gap, pVal, pVLen); #else // todo refactor int32_t res = 0; @@ -1032,7 +1025,7 @@ int32_t streamStateStateAddIfNotExist(SStreamState* pState, SSessionKey* key, ch // todo refactor #ifdef USE_ROCKSDB - return streamStateStateAddIfNotExist_rocksdb(pState, key, pKeyData, keyDataLen, fn, pVal, pVLen); + return getStateWinResultBuff(pState->pFileState, key, pKeyData, keyDataLen, fn, pVal, pVLen); #else int32_t res = 0; SSessionKey tmpKey = *key; @@ -1143,6 +1136,12 @@ int32_t streamStateDeleteCheckPoint(SStreamState* pState, TSKEY mark) { void streamStateReloadInfo(SStreamState* pState, TSKEY ts) { streamFileStateReloadInfo(pState->pFileState, ts); } +SStreamStateCur* createStreamStateCursor() { + SStreamStateCur* pCur = taosMemoryCalloc(1, sizeof(SStreamStateCur)); + pCur->buffIndex = -1; + return pCur; +} + #if 0 char* streamStateSessionDump(SStreamState* pState) { SStreamStateCur* pCur = taosMemoryCalloc(1, sizeof(SStreamStateCur)); diff --git a/source/libs/stream/src/tstreamFileState.c b/source/libs/stream/src/tstreamFileState.c index be3ad73472..8f4c13c12d 100644 --- a/source/libs/stream/src/tstreamFileState.c +++ b/source/libs/stream/src/tstreamFileState.c @@ -16,7 +16,6 @@ #include "tstreamFileState.h" #include "query.h" -#include "storageapi.h" #include "streamBackendRocksdb.h" #include "taos.h" #include "tcommon.h" @@ -29,29 +28,98 @@ #define MIN_NUM_OF_ROW_BUFF 10240 struct SStreamFileState { - SList* usedBuffs; - SList* freeBuffs; - SSHashObj* rowBuffMap; - void* pFileStore; - int32_t rowSize; - int32_t selectivityRowSize; - int32_t keyLen; - uint64_t preCheckPointVersion; - uint64_t checkPointVersion; - TSKEY maxTs; - TSKEY deleteMark; - TSKEY flushMark; - uint64_t maxRowCount; - uint64_t curRowCount; - GetTsFun getTs; - char* id; + SList* usedBuffs; + SList* freeBuffs; + void* rowStateBuff; + void* pFileStore; + int32_t rowSize; + int32_t selectivityRowSize; + int32_t keyLen; + uint64_t preCheckPointVersion; + uint64_t checkPointVersion; + TSKEY maxTs; + TSKEY deleteMark; + TSKEY flushMark; + uint64_t maxRowCount; + uint64_t curRowCount; + GetTsFun getTs; + char* id; + char* cfName; + + _state_buff_cleanup_fn stateBuffCleanupFn; + _state_buff_remove_fn stateBuffRemoveFn; + _state_buff_remove_by_pos_fn stateBuffRemoveByPosFn; + _state_buff_create_statekey_fn stateBuffCreateStateKeyFn; + + _state_file_remove_fn stateFileRemoveFn; + _state_file_get_fn stateFileGetFn; + _state_file_clear_fn stateFileClearFn; }; typedef SRowBuffPos SRowBuffInfo; +int32_t stateHashBuffRemoveFn(void* pBuff, const void *pKey, size_t keyLen) { + SRowBuffPos** pos = tSimpleHashGet(pBuff, pKey, keyLen); + if (pos) { + (*pos)->beFlushed = true; + } + return tSimpleHashRemove(pBuff, pKey, keyLen); +} + +int32_t stateHashBuffRemoveByPosFn(SStreamFileState* pFileState, SRowBuffPos* pPos) { + size_t keyLen = pFileState->keyLen; + SRowBuffPos** ppPos = tSimpleHashGet(pFileState->rowStateBuff, pPos->pKey, keyLen); + if (ppPos) { + if ((*ppPos) == pPos) { + return tSimpleHashRemove(pFileState->rowStateBuff, pPos->pKey, keyLen); + } + } + return TSDB_CODE_SUCCESS; +} + +void stateHashBuffClearFn(void* pBuff) { + tSimpleHashClear(pBuff); +} + +void stateHashBuffCleanupFn(void* pBuff) { + tSimpleHashCleanup(pBuff); +} + +int32_t intervalFileRemoveFn(SStreamFileState* pFileState, const void* pKey) { + return streamStateDel_rocksdb(pFileState->pFileStore, pKey); +} + +int32_t intervalFileGetFn(SStreamFileState* pFileState, void* pKey, void* data, int32_t* pDataLen) { + return streamStateGet_rocksdb(pFileState->pFileStore, pKey, data, pDataLen); +} + +void* intervalCreateStateKey(SRowBuffPos* pPos, int64_t num) { + SStateKey* pStateKey = taosMemoryCalloc(1, sizeof(SStateKey)); + SWinKey* pWinKey = pPos->pKey; + pStateKey->key = *pWinKey; + pStateKey->opNum = num; + return pStateKey; +} + +int32_t sessionFileRemoveFn(SStreamFileState* pFileState, const void* pKey) { + return streamStateSessionDel_rocksdb(pFileState->pFileStore, pKey); +} + +int32_t sessionFileGetFn(SStreamFileState* pFileState, void* pKey, void* data, int32_t* pDataLen) { + return streamStateSessionGet_rocksdb(pFileState->pFileStore, pKey, data, pDataLen); +} + +void* sessionCreateStateKey(SRowBuffPos* pPos, int64_t num) { + SStateSessionKey* pStateKey = taosMemoryCalloc(1, sizeof(SStateSessionKey)); + SSessionKey* pWinKey = pPos->pKey; + pStateKey->key = *pWinKey; + pStateKey->opNum = num; + return pStateKey; +} + SStreamFileState* streamFileStateInit(int64_t memSize, uint32_t keySize, uint32_t rowSize, uint32_t selectRowSize, GetTsFun fp, void* pFile, TSKEY delMark, const char* taskId, - int64_t checkpointId) { + int64_t checkpointId, int8_t type) { if (memSize <= 0) { memSize = DEFAULT_MAX_STREAM_BUFFER_SIZE; } @@ -69,8 +137,31 @@ SStreamFileState* streamFileStateInit(int64_t memSize, uint32_t keySize, uint32_ pFileState->freeBuffs = tdListNew(POINTER_BYTES); _hash_fn_t hashFn = taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY); int32_t cap = TMIN(MIN_NUM_OF_ROW_BUFF, pFileState->maxRowCount); - pFileState->rowBuffMap = tSimpleHashInit(cap, hashFn); - if (!pFileState->usedBuffs || !pFileState->freeBuffs || !pFileState->rowBuffMap) { + if (type == STREAM_STATE_BUFF_HASH) { + pFileState->rowStateBuff = tSimpleHashInit(cap, hashFn); + pFileState->stateBuffCleanupFn = stateHashBuffCleanupFn; + pFileState->stateBuffRemoveFn = stateHashBuffRemoveFn; + pFileState->stateBuffRemoveByPosFn = stateHashBuffRemoveByPosFn; + pFileState->stateBuffCreateStateKeyFn = intervalCreateStateKey; + + pFileState->stateFileRemoveFn = intervalFileRemoveFn; + pFileState->stateFileGetFn = intervalFileGetFn; + pFileState->stateFileClearFn = streamStateClear_rocksdb; + pFileState->cfName = taosStrdup("state"); + } else { + pFileState->rowStateBuff = tSimpleHashInit(cap, hashFn); + pFileState->stateBuffCleanupFn = sessionWinStateCleanup; + pFileState->stateBuffRemoveFn = deleteSessionWinStateBuffFn; + pFileState->stateBuffRemoveByPosFn = deleteSessionWinStateBuffByPosFn; + pFileState->stateBuffCreateStateKeyFn = sessionCreateStateKey; + + pFileState->stateFileRemoveFn = sessionFileRemoveFn; + pFileState->stateFileGetFn = sessionFileGetFn; + pFileState->stateFileClearFn = streamStateSessionClear_rocksdb; + pFileState->cfName = taosStrdup("sess"); + } + + if (!pFileState->usedBuffs || !pFileState->freeBuffs || !pFileState->rowStateBuff) { goto _error; } @@ -87,7 +178,10 @@ SStreamFileState* streamFileStateInit(int64_t memSize, uint32_t keySize, uint32_ pFileState->maxTs = INT64_MIN; pFileState->id = taosStrdup(taskId); - recoverSnapshot(pFileState, checkpointId); + //todo(liuyao) optimize + if (type == STREAM_STATE_BUFF_HASH) { + recoverSnapshot(pFileState, checkpointId); + } return pFileState; _error: @@ -132,12 +226,20 @@ void streamFileStateDestroy(SStreamFileState* pFileState) { } taosMemoryFree(pFileState->id); + taosMemoryFree(pFileState->cfName); tdListFreeP(pFileState->usedBuffs, destroyRowBuffAllPosPtr); tdListFreeP(pFileState->freeBuffs, destroyRowBuff); - tSimpleHashCleanup(pFileState->rowBuffMap); + pFileState->stateBuffCleanupFn(pFileState->rowStateBuff); taosMemoryFree(pFileState); } +void putFreeBuff(SStreamFileState* pFileState, SRowBuffPos* pPos) { + if (pPos->pRowBuff) { + tdListAppend(pFileState->freeBuffs, &(pPos->pRowBuff)); + pPos->pRowBuff = NULL; + } +} + void clearExpiredRowBuff(SStreamFileState* pFileState, TSKEY ts, bool all) { SListIter iter = {0}; tdListInitIter(pFileState->usedBuffs, &iter, TD_LIST_FORWARD); @@ -146,11 +248,10 @@ void clearExpiredRowBuff(SStreamFileState* pFileState, TSKEY ts, bool all) { while ((pNode = tdListNext(&iter)) != NULL) { SRowBuffPos* pPos = *(SRowBuffPos**)(pNode->data); if (all || (pFileState->getTs(pPos->pKey) < ts && !pPos->beUsed)) { - ASSERT(pPos->pRowBuff != NULL); - tdListAppend(pFileState->freeBuffs, &(pPos->pRowBuff)); - pPos->pRowBuff = NULL; + putFreeBuff(pFileState, pPos); + if (!all) { - tSimpleHashRemove(pFileState->rowBuffMap, pPos->pKey, pFileState->keyLen); + pFileState->stateBuffRemoveByPosFn(pFileState, pPos); } destroyRowBuffPos(pPos); tdListPopNode(pFileState->usedBuffs, pNode); @@ -159,15 +260,40 @@ void clearExpiredRowBuff(SStreamFileState* pFileState, TSKEY ts, bool all) { } } +void clearFlushedRowBuff(SStreamFileState* pFileState, SStreamSnapshot* pFlushList, uint64_t max) { + uint64_t i = 0; + SListIter iter = {0}; + tdListInitIter(pFileState->usedBuffs, &iter, TD_LIST_FORWARD); + + SListNode* pNode = NULL; + while ((pNode = tdListNext(&iter)) != NULL && i < max) { + SRowBuffPos* pPos = *(SRowBuffPos**)pNode->data; + if (isFlushedState(pFileState, pFileState->getTs(pPos->pKey), 0) && !pPos->beUsed) { + tdListAppend(pFlushList, &pPos); + pFileState->flushMark = TMAX(pFileState->flushMark, pFileState->getTs(pPos->pKey)); + pFileState->stateBuffRemoveByPosFn(pFileState, pPos); + tdListPopNode(pFileState->usedBuffs, pNode); + taosMemoryFreeClear(pNode); + if (pPos->pRowBuff) { + i++; + } + } + } +} + void streamFileStateClear(SStreamFileState* pFileState) { pFileState->flushMark = INT64_MIN; pFileState->maxTs = INT64_MIN; - tSimpleHashClear(pFileState->rowBuffMap); + tSimpleHashClear(pFileState->rowStateBuff); clearExpiredRowBuff(pFileState, 0, true); } bool needClearDiskBuff(SStreamFileState* pFileState) { return pFileState->flushMark > 0; } +void streamFileStateReleaseBuff(SStreamFileState* pFileState, SRowBuffPos* pPos, bool used) { + pPos->beUsed = used; +} + void popUsedBuffs(SStreamFileState* pFileState, SStreamSnapshot* pFlushList, uint64_t max, bool used) { uint64_t i = 0; SListIter iter = {0}; @@ -179,10 +305,12 @@ void popUsedBuffs(SStreamFileState* pFileState, SStreamSnapshot* pFlushList, uin if (pPos->beUsed == used) { tdListAppend(pFlushList, &pPos); pFileState->flushMark = TMAX(pFileState->flushMark, pFileState->getTs(pPos->pKey)); - tSimpleHashRemove(pFileState->rowBuffMap, pPos->pKey, pFileState->keyLen); + pFileState->stateBuffRemoveByPosFn(pFileState, pPos); tdListPopNode(pFileState->usedBuffs, pNode); taosMemoryFreeClear(pNode); - i++; + if (pPos->pRowBuff) { + i++; + } } } @@ -197,10 +325,13 @@ int32_t flushRowBuff(SStreamFileState* pFileState) { uint64_t num = (uint64_t)(pFileState->curRowCount * FLUSH_RATIO); num = TMAX(num, FLUSH_NUM); - popUsedBuffs(pFileState, pFlushList, num, false); - + clearFlushedRowBuff(pFileState, pFlushList, num); if (isListEmpty(pFlushList)) { - popUsedBuffs(pFileState, pFlushList, num, true); + popUsedBuffs(pFileState, pFlushList, num, false); + + if (isListEmpty(pFlushList)) { + popUsedBuffs(pFileState, pFlushList, num, true); + } } flushSnapshot(pFileState, pFlushList, false); @@ -210,9 +341,7 @@ int32_t flushRowBuff(SStreamFileState* pFileState) { SListNode* pNode = NULL; while ((pNode = tdListNext(&fIter)) != NULL) { SRowBuffPos* pPos = *(SRowBuffPos**)pNode->data; - ASSERT(pPos->pRowBuff != NULL); - tdListAppend(pFileState->freeBuffs, &pPos->pRowBuff); - pPos->pRowBuff = NULL; + putFreeBuff(pFileState, pPos); } tdListFreeP(pFlushList, destroyRowBuffPosPtr); @@ -227,7 +356,9 @@ int32_t clearRowBuff(SStreamFileState* pFileState) { return TSDB_CODE_SUCCESS; } -void* getFreeBuff(SList* lists, int32_t buffSize) { +void* getFreeBuff(SStreamFileState* pFileState) { + SList* lists = pFileState->freeBuffs; + int32_t buffSize = pFileState->rowSize; SListNode* pNode = tdListPopHead(lists); if (!pNode) { return NULL; @@ -238,10 +369,18 @@ void* getFreeBuff(SList* lists, int32_t buffSize) { return ptr; } +int32_t streamFileStateClearBuff(SStreamFileState* pFileState, SRowBuffPos* pPos) { + if (pPos->pRowBuff) { + memset(pPos->pRowBuff, 0, pFileState->rowSize); + return TSDB_CODE_SUCCESS; + } + return TSDB_CODE_FAILED; +} + SRowBuffPos* getNewRowPos(SStreamFileState* pFileState) { SRowBuffPos* pPos = taosMemoryCalloc(1, sizeof(SRowBuffPos)); pPos->pKey = taosMemoryCalloc(1, pFileState->keyLen); - void* pBuff = getFreeBuff(pFileState->freeBuffs, pFileState->rowSize); + void* pBuff = getFreeBuff(pFileState); if (pBuff) { pPos->pRowBuff = pBuff; goto _end; @@ -258,7 +397,7 @@ SRowBuffPos* getNewRowPos(SStreamFileState* pFileState) { int32_t code = clearRowBuff(pFileState); ASSERT(code == 0); - pPos->pRowBuff = getFreeBuff(pFileState->freeBuffs, pFileState->rowSize); + pPos->pRowBuff = getFreeBuff(pFileState); _end: tdListAppend(pFileState->usedBuffs, &pPos); @@ -266,9 +405,17 @@ _end: return pPos; } +SRowBuffPos* getNewRowPosForWrite(SStreamFileState* pFileState) { + SRowBuffPos* newPos = getNewRowPos(pFileState); + newPos->beUsed = true; + newPos->beFlushed = false; + newPos->needFree = false; + return newPos; +} + int32_t getRowBuff(SStreamFileState* pFileState, void* pKey, int32_t keyLen, void** pVal, int32_t* pVLen) { pFileState->maxTs = TMAX(pFileState->maxTs, pFileState->getTs(pKey)); - SRowBuffPos** pos = tSimpleHashGet(pFileState->rowBuffMap, pKey, keyLen); + SRowBuffPos** pos = tSimpleHashGet(pFileState->rowStateBuff, pKey, keyLen); if (pos) { *pVLen = pFileState->rowSize; *pVal = *pos; @@ -276,14 +423,12 @@ int32_t getRowBuff(SStreamFileState* pFileState, void* pKey, int32_t keyLen, voi (*pos)->beFlushed = false; return TSDB_CODE_SUCCESS; } - SRowBuffPos* pNewPos = getNewRowPos(pFileState); - pNewPos->beUsed = true; - pNewPos->beFlushed = false; + SRowBuffPos* pNewPos = getNewRowPosForWrite(pFileState); ASSERT(pNewPos->pRowBuff); memcpy(pNewPos->pKey, pKey, keyLen); TSKEY ts = pFileState->getTs(pKey); - if (ts > pFileState->maxTs - pFileState->deleteMark && ts < pFileState->flushMark) { + if (!isDeteled(pFileState, ts) && isFlushedState(pFileState, ts, 0)) { int32_t len = 0; void* p = NULL; int32_t code = streamStateGet_rocksdb(pFileState->pFileStore, pKey, &p, &len); @@ -294,7 +439,7 @@ int32_t getRowBuff(SStreamFileState* pFileState, void* pKey, int32_t keyLen, voi taosMemoryFree(p); } - tSimpleHashPut(pFileState->rowBuffMap, pKey, keyLen, &pNewPos, POINTER_BYTES); + tSimpleHashPut(pFileState->rowStateBuff, pKey, keyLen, &pNewPos, POINTER_BYTES); if (pVal) { *pVLen = pFileState->rowSize; *pVal = pNewPos; @@ -303,45 +448,60 @@ int32_t getRowBuff(SStreamFileState* pFileState, void* pKey, int32_t keyLen, voi } int32_t deleteRowBuff(SStreamFileState* pFileState, const void* pKey, int32_t keyLen) { - int32_t code_buff = tSimpleHashRemove(pFileState->rowBuffMap, pKey, keyLen); - int32_t code_rocks = streamStateDel_rocksdb(pFileState->pFileStore, pKey); - return code_buff == TSDB_CODE_SUCCESS ? code_buff : code_rocks; + int32_t code_buff = pFileState->stateBuffRemoveFn(pFileState->rowStateBuff, pKey, keyLen); + int32_t code_file = pFileState->stateFileRemoveFn(pFileState, pKey); + if (code_buff == TSDB_CODE_SUCCESS || code_file == TSDB_CODE_SUCCESS) { + return TSDB_CODE_SUCCESS; + } + return TSDB_CODE_FAILED; +} + +static void recoverSessionRowBuff(SStreamFileState* pFileState, SRowBuffPos* pPos) { + int32_t len = 0; + void* pBuff = NULL; + pFileState->stateFileGetFn(pFileState, pPos->pKey, &pBuff, &len); + memcpy(pPos->pRowBuff, pBuff, len); + taosMemoryFree(pBuff); } int32_t getRowBuffByPos(SStreamFileState* pFileState, SRowBuffPos* pPos, void** pVal) { if (pPos->pRowBuff) { + if (pPos->needFree) { + recoverSessionRowBuff(pFileState, pPos); + } (*pVal) = pPos->pRowBuff; return TSDB_CODE_SUCCESS; } - pPos->pRowBuff = getFreeBuff(pFileState->freeBuffs, pFileState->rowSize); + pPos->pRowBuff = getFreeBuff(pFileState); if (!pPos->pRowBuff) { - int32_t code = clearRowBuff(pFileState); - ASSERT(code == 0); - pPos->pRowBuff = getFreeBuff(pFileState->freeBuffs, pFileState->rowSize); + if (pFileState->curRowCount < pFileState->maxRowCount) { + pPos->pRowBuff = taosMemoryCalloc(1, pFileState->rowSize); + pFileState->curRowCount++; + } else { + int32_t code = clearRowBuff(pFileState); + ASSERT(code == 0); + pPos->pRowBuff = getFreeBuff(pFileState); + } ASSERT(pPos->pRowBuff); } - int32_t len = 0; - void* pBuff = NULL; - streamStateGet_rocksdb(pFileState->pFileStore, pPos->pKey, &pBuff, &len); - memcpy(pPos->pRowBuff, pBuff, len); - taosMemoryFree(pBuff); + recoverSessionRowBuff(pFileState, pPos); (*pVal) = pPos->pRowBuff; - tdListPrepend(pFileState->usedBuffs, &pPos); + if (!pPos->needFree) { + tdListPrepend(pFileState->usedBuffs, &pPos); + } return TSDB_CODE_SUCCESS; } bool hasRowBuff(SStreamFileState* pFileState, void* pKey, int32_t keyLen) { - SRowBuffPos** pos = tSimpleHashGet(pFileState->rowBuffMap, pKey, keyLen); + SRowBuffPos** pos = tSimpleHashGet(pFileState->rowStateBuff, pKey, keyLen); if (pos) { return true; } return false; } -void releaseRowBuffPos(SRowBuffPos* pBuff) { pBuff->beUsed = false; } - SStreamSnapshot* getSnapshot(SStreamFileState* pFileState) { int64_t mark = (INT64_MIN + pFileState->deleteMark >= pFileState->maxTs) ? INT64_MIN : pFileState->maxTs - pFileState->deleteMark; @@ -349,13 +509,13 @@ SStreamSnapshot* getSnapshot(SStreamFileState* pFileState) { return pFileState->usedBuffs; } -void streamFileStateDecode(TSKEY* key, void* pBuff, int32_t len) { pBuff = taosDecodeFixedI64(pBuff, key); } +void streamFileStateDecode(TSKEY* pKey, void* pBuff, int32_t len) { pBuff = taosDecodeFixedI64(pBuff, pKey); } -void streamFileStateEncode(TSKEY* key, void** pVal, int32_t* pLen) { +void streamFileStateEncode(TSKEY* pKey, void** pVal, int32_t* pLen) { *pLen = sizeof(TSKEY); (*pVal) = taosMemoryCalloc(1, *pLen); void* buff = *pVal; - taosEncodeFixedI64(&buff, *key); + taosEncodeFixedI64(&buff, *pKey); } int32_t flushSnapshot(SStreamFileState* pFileState, SStreamSnapshot* pSnapshot, bool flushState) { @@ -369,7 +529,7 @@ int32_t flushSnapshot(SStreamFileState* pFileState, SStreamSnapshot* pSnapshot, int32_t numOfElems = listNEles(pSnapshot); SListNode* pNode = NULL; - int idx = streamStateGetCfIdx(pFileState->pFileStore, "state"); + int idx = streamStateGetCfIdx(pFileState->pFileStore, pFileState->cfName); int32_t len = pFileState->rowSize + sizeof(uint64_t) + sizeof(int32_t) + 1; char* buf = taosMemoryCalloc(1, len); @@ -377,23 +537,23 @@ int32_t flushSnapshot(SStreamFileState* pFileState, SStreamSnapshot* pSnapshot, void* batch = streamStateCreateBatch(); while ((pNode = tdListNext(&iter)) != NULL && code == TSDB_CODE_SUCCESS) { SRowBuffPos* pPos = *(SRowBuffPos**)pNode->data; - ASSERT(pPos->pRowBuff && pFileState->rowSize > 0); - if (pPos->beFlushed) { + if (pPos->beFlushed || !pPos->pRowBuff) { continue; } pPos->beFlushed = true; + qDebug("===stream===flushed start:%" PRId64, pFileState->getTs(pPos->pKey)); if (streamStateGetBatchSize(batch) >= BATCH_LIMIT) { streamStatePutBatch_rocksdb(pFileState->pFileStore, batch); streamStateClearBatch(batch); } - SStateKey sKey = {.key = *((SWinKey*)pPos->pKey), .opNum = ((SStreamState*)pFileState->pFileStore)->number}; - code = streamStatePutBatchOptimize(pFileState->pFileStore, idx, batch, &sKey, pPos->pRowBuff, pFileState->rowSize, + void* pSKey = pFileState->stateBuffCreateStateKeyFn(pPos, ((SStreamState*)pFileState->pFileStore)->number); + code = streamStatePutBatchOptimize(pFileState->pFileStore, idx, batch, pSKey, pPos->pRowBuff, pFileState->rowSize, 0, buf); + taosMemoryFreeClear(pSKey); // todo handle failure memset(buf, 0, len); - // qDebug("===stream===put %" PRId64 " to disc, res %d", sKey.key.ts, code); } taosMemoryFree(buf); @@ -508,7 +668,7 @@ int32_t recoverSnapshot(SStreamFileState* pFileState, int64_t ckId) { } void* pVal = NULL; int32_t pVLen = 0; - SRowBuffPos* pNewPos = getNewRowPos(pFileState); + SRowBuffPos* pNewPos = getNewRowPosForWrite(pFileState); code = streamStateGetKVByCur_rocksdb(pCur, pNewPos->pKey, (const void**)&pVal, &pVLen); if (code != TSDB_CODE_SUCCESS || pFileState->getTs(pNewPos->pKey) < pFileState->flushMark) { destroyRowBuffPos(pNewPos); @@ -521,7 +681,7 @@ int32_t recoverSnapshot(SStreamFileState* pFileState, int64_t ckId) { memcpy(pNewPos->pRowBuff, pVal, pVLen); taosMemoryFreeClear(pVal); pNewPos->beFlushed = true; - code = tSimpleHashPut(pFileState->rowBuffMap, pNewPos->pKey, pFileState->keyLen, &pNewPos, POINTER_BYTES); + code = tSimpleHashPut(pFileState->rowStateBuff, pNewPos->pKey, pFileState->keyLen, &pNewPos, POINTER_BYTES); if (code != TSDB_CODE_SUCCESS) { destroyRowBuffPos(pNewPos); break; @@ -539,3 +699,23 @@ void streamFileStateReloadInfo(SStreamFileState* pFileState, TSKEY ts) { pFileState->flushMark = TMAX(pFileState->flushMark, ts); pFileState->maxTs = TMAX(pFileState->maxTs, ts); } + +void* getRowStateBuff(SStreamFileState* pFileState) { + return pFileState->rowStateBuff; +} + +void* getStateFileStore(SStreamFileState* pFileState) { + return pFileState->pFileStore; +} + +bool isDeteled(SStreamFileState* pFileState, TSKEY ts) { + return pFileState->deleteMark > 0 && ts < (pFileState->maxTs - pFileState->deleteMark); +} + +bool isFlushedState(SStreamFileState* pFileState, TSKEY ts, TSKEY gap) { + return ts <= (pFileState->flushMark + gap); +} + +int32_t getRowStateRowSize(SStreamFileState* pFileState) { + return pFileState->rowSize; +} diff --git a/source/os/src/osSemaphore.c b/source/os/src/osSemaphore.c index c1ef57e9c5..dda4b14901 100644 --- a/source/os/src/osSemaphore.c +++ b/source/os/src/osSemaphore.c @@ -68,37 +68,34 @@ int32_t taosGetAppName(char* name, int32_t* len) { } int32_t tsem_wait(tsem_t* sem) { - int ret = 0; - do { - ret = sem_wait(sem); - } while (ret != 0 && errno == EINTR); - return ret; + return WaitForSingleObject(*sem, INFINITE); } -int32_t tsem_timewait(tsem_t* sem, int64_t ms) { - struct timespec ts; - taosClockGetTime(0, &ts); +int32_t tsem_timewait(tsem_t* sem, int64_t timeout_ms) { + DWORD result = WaitForSingleObject(*sem, timeout_ms); + if (result == WAIT_OBJECT_0) { + return 0; // Semaphore acquired + } else if (result == WAIT_TIMEOUT) { + return -1; // Timeout reached + } else { + return result; + } +} - ts.tv_nsec += ms * 1000000; - ts.tv_sec += ts.tv_nsec / 1000000000; - ts.tv_nsec %= 1000000000; - int rc; - while ((rc = sem_timedwait(sem, &ts)) == -1 && errno == EINTR) continue; - return rc; - /* This should have timed out */ - // ASSERT(errno == ETIMEDOUT); - // ASSERT(rc != 0); - // GetSystemTimeAsFileTime(&ft_after); - // // We specified a non-zero wait. Time must advance. - // if (ft_before.dwLowDateTime == ft_after.dwLowDateTime && ft_before.dwHighDateTime == ft_after.dwHighDateTime) - // { - // printf("nanoseconds: %d, rc: %d, code:0x%x. before filetime: %d, %d; after filetime: %d, %d\n", - // nanosecs, rc, errno, - // (int)ft_before.dwLowDateTime, (int)ft_before.dwHighDateTime, - // (int)ft_after.dwLowDateTime, (int)ft_after.dwHighDateTime); - // printf("time must advance during sem_timedwait."); - // return 1; - // } +// Inter-process sharing is not currently supported. The pshared parameter is invalid. +int tsem_init(tsem_t* sem, int pshared, unsigned int value) { + *sem = CreateSemaphore(NULL, value, LONG_MAX, NULL); + return (*sem != NULL) ? 0 : -1; +} + +int tsem_post(tsem_t* sem) { + if (ReleaseSemaphore(*sem, 1, NULL)) return 0; + return -1; +} + +int tsem_destroy(tsem_t* sem) { + if (CloseHandle(*sem)) return 0; + return -1; } #elif defined(_TD_DARWIN_64) @@ -133,8 +130,7 @@ int tsem_wait(tsem_t *psem) { int tsem_timewait(tsem_t *psem, int64_t milis) { if (psem == NULL || *psem == NULL) return -1; dispatch_time_t time = dispatch_time(DISPATCH_TIME_NOW, (int64_t)(milis * USEC_PER_SEC)); - dispatch_semaphore_wait(*psem, time); - return 0; + return dispatch_semaphore_wait(*psem, time); } bool taosCheckPthreadValid(TdThread thread) { return thread != 0; } diff --git a/source/os/test/CMakeLists.txt b/source/os/test/CMakeLists.txt index fba4d23e3f..324920f37b 100644 --- a/source/os/test/CMakeLists.txt +++ b/source/os/test/CMakeLists.txt @@ -77,4 +77,11 @@ target_link_libraries(osAtomicTests os util gtest_main) add_test( NAME osAtomicTests COMMAND osAtomicTests -) \ No newline at end of file +) + +add_executable(osSemaphoreTests "osSemaphoreTests.cpp") +target_link_libraries(osSemaphoreTests os util gtest_main) +add_test( + NAME osSemaphoreTests + COMMAND osSemaphoreTests +) diff --git a/source/os/test/osSemaphoreTests.cpp b/source/os/test/osSemaphoreTests.cpp new file mode 100644 index 0000000000..67cf4bb517 --- /dev/null +++ b/source/os/test/osSemaphoreTests.cpp @@ -0,0 +1,108 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include +#include +#include "os.h" +#include "tlog.h" + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wwrite-strings" +#pragma GCC diagnostic ignored "-Wunused-function" +#pragma GCC diagnostic ignored "-Wunused-variable" +#pragma GCC diagnostic ignored "-Wsign-compare" +#pragma GCC diagnostic ignored "-Wsign-compare" +#pragma GCC diagnostic ignored "-Wformat" +#pragma GCC diagnostic ignored "-Wint-to-pointer-cast" +#pragma GCC diagnostic ignored "-Wpointer-arith" + +TEST(osSemaphoreTests, InitAndDestroy) { + tsem_t sem; + int result = tsem_init(&sem, 0, 1); + EXPECT_EQ(result, 0); + + result = tsem_destroy(&sem); + EXPECT_EQ(result, 0); +} + +TEST(osSemaphoreTests, Destroy) { + tsem_t sem; + int result = tsem_init(&sem, 0, 1); + EXPECT_EQ(result, 0); + + result = tsem_destroy(&sem); + EXPECT_EQ(result, 0); + // result = tsem_destroy(&sem); + // EXPECT_NE(result, 0); // result == 0 if on mac +} + +// skip, tsem_wait can not stopped, will block test. +// TEST(osSemaphoreTests, Wait) { +// tsem_t sem; +// tsem_init(&sem, 0, 0); +// ASSERT_EQ(tsem_wait(&sem), -1); +// tsem_destroy(&sem); +// } + +TEST(osSemaphoreTests, WaitTime0) { + tsem_t sem; + tsem_init(&sem, 0, 0); + EXPECT_NE(tsem_timewait(&sem, 1000), 0); + tsem_destroy(&sem); +} + +TEST(osSemaphoreTests, WaitTime1) { + tsem_t sem; + tsem_init(&sem, 0, 1); + EXPECT_EQ(tsem_timewait(&sem, 1000), 0); + EXPECT_NE(tsem_timewait(&sem, 1000), 0); + tsem_destroy(&sem); +} + + +TEST(osSemaphoreTests, WaitAndPost) { + tsem_t sem; + int result = tsem_init(&sem, 0, 0); + EXPECT_EQ(result, 0); + + std::thread([&sem]() { + std::this_thread::sleep_for(std::chrono::milliseconds(100)); + tsem_post(&sem); + }).detach(); + + result = tsem_wait(&sem); + EXPECT_EQ(result, 0); + + result = tsem_destroy(&sem); + EXPECT_EQ(result, 0); +} + + +TEST(osSemaphoreTests, TimedWait) { + tsem_t sem; + int result = tsem_init(&sem, 0, 0); + EXPECT_EQ(result, 0); + + std::thread([&sem]() { + std::this_thread::sleep_for(std::chrono::milliseconds(100)); + tsem_post(&sem); + }).detach(); + + result = tsem_timewait(&sem, 1000); + EXPECT_EQ(result, 0); + + result = tsem_destroy(&sem); + EXPECT_EQ(result, 0); +} diff --git a/source/util/src/terror.c b/source/util/src/terror.c index 9832720994..b4a1a2eae2 100644 --- a/source/util/src/terror.c +++ b/source/util/src/terror.c @@ -430,6 +430,13 @@ TAOS_DEFINE_ERROR(TSDB_CODE_GRANT_QUERYTIME_LIMITED, "Query time limited by TAOS_DEFINE_ERROR(TSDB_CODE_GRANT_CPU_LIMITED, "CPU cores limited by license") TAOS_DEFINE_ERROR(TSDB_CODE_GRANT_STABLE_LIMITED, "STable creation limited by license") TAOS_DEFINE_ERROR(TSDB_CODE_GRANT_TABLE_LIMITED, "Table creation limited by license") +TAOS_DEFINE_ERROR(TSDB_CODE_GRANT_PAR_IVLD_ACTIVE, "Invalid active code") +TAOS_DEFINE_ERROR(TSDB_CODE_GRANT_PAR_IVLD_KEY, "Invalid key to parse active code") +TAOS_DEFINE_ERROR(TSDB_CODE_GRANT_PAR_DEC_IVLD_KEY, "Invalid key to decode active code") +TAOS_DEFINE_ERROR(TSDB_CODE_GRANT_PAR_DEC_IVLD_KLEN, "Invalid klen to decode active code") +TAOS_DEFINE_ERROR(TSDB_CODE_GRANT_GEN_IVLD_KEY, "Invalid key to gen active code") +TAOS_DEFINE_ERROR(TSDB_CODE_GRANT_GEN_APP_LIMIT, "Limited app num to gen active code") +TAOS_DEFINE_ERROR(TSDB_CODE_GRANT_GEN_ENC_IVLD_KLEN, "Invalid klen to encode active code") // sync TAOS_DEFINE_ERROR(TSDB_CODE_SYN_TIMEOUT, "Sync timeout") diff --git a/tests/parallel_test/cases.task b/tests/parallel_test/cases.task index cf161ab004..99c58a4603 100644 --- a/tests/parallel_test/cases.task +++ b/tests/parallel_test/cases.task @@ -209,7 +209,8 @@ ,,n,system-test,python3 ./test.py -f 0-others/tag_index_basic.py ,,n,system-test,python3 ./test.py -f 0-others/udfpy_main.py ,,n,system-test,python3 ./test.py -N 3 -f 0-others/walRetention.py -#,,n,system-test,python3 ./test.py -f 0-others/splitVGroup.py -N 5 +,,y,system-test,./pytest.sh python3 ./test.py -f 0-others/splitVGroupRep1.py -N 3 +,,y,system-test,./pytest.sh python3 ./test.py -f 0-others/splitVGroupRep3.py -N 3 ,,n,system-test,python3 ./test.py -f 0-others/timeRangeWise.py -N 3 ,,y,system-test,./pytest.sh python3 ./test.py -f 1-insert/alter_database.py ,,y,system-test,./pytest.sh python3 ./test.py -f 1-insert/alter_replica.py -N 3 @@ -817,6 +818,7 @@ ,,y,system-test,./pytest.sh python3 ./test.py -f 99-TDcase/TD-20582.py ,,n,system-test,python3 ./test.py -f 5-taos-tools/taosbenchmark/insertMix.py -N 3 ,,n,system-test,python3 ./test.py -f 5-taos-tools/taosbenchmark/stt.py -N 3 +,,n,system-test,python3 ./test.py -f eco-system/meta/database/keep_time_offset.py #tsim test ,,y,script,./test.sh -f tsim/tmq/basic2Of2ConsOverlap.sim diff --git a/tests/script/tsim/parser/alter.sim b/tests/script/tsim/parser/alter.sim index f2481576d1..99e014a011 100644 --- a/tests/script/tsim/parser/alter.sim +++ b/tests/script/tsim/parser/alter.sim @@ -17,7 +17,7 @@ $db = $dbPrefix . $i $mt = $mtPrefix . $i sql drop database if exists $db -sql create database $db duration 10 keep 20,20,20 +sql create database $db duration 3 keep 20,20,20 sql use $db sql_error alter database $db keep "20" @@ -27,8 +27,8 @@ sql_error alter database $db keep 20.0 sql_error alter database $db keep 20.0,20.0,20.0 sql_error alter database $db keep 0,0,0 sql_error alter database $db keep -1,-1,-1 -sql_error alter database $db keep 9,20 -sql_error alter database $db keep 9,9,9 +sql_error alter database $db keep 8,20 +sql_error alter database $db keep 8,9,9 sql_error alter database $db keep 20,20,19 sql_error alter database $db keep 20,19,20 sql_error alter database $db keep 20,19,19 diff --git a/tests/script/tsim/parser/alter__for_community_version.sim b/tests/script/tsim/parser/alter__for_community_version.sim index 48fb2f8246..29c748d441 100644 --- a/tests/script/tsim/parser/alter__for_community_version.sim +++ b/tests/script/tsim/parser/alter__for_community_version.sim @@ -17,7 +17,7 @@ $db = $dbPrefix . $i $mt = $mtPrefix . $i sql drop database if exists $db -sql create database $db duration 10 keep 20 +sql create database $db duration 3 keep 20 sql use $db sql select * from information_schema.ins_databases if $rows != 3 then @@ -36,7 +36,7 @@ sql_error alter database $db keep 0,0,0 sql_error alter database $db keep 3 sql_error alter database $db keep -1,-1,-1 sql alter database $db keep 20,20 -sql_error alter database $db keep 9,9,9 +sql_error alter database $db keep 8,9,9 sql_error alter database $db keep 20,20,19 sql_error alter database $db keep 20,19,20 sql_error alter database $db keep 20,19,19 diff --git a/tests/script/tsim/parser/create_db.sim b/tests/script/tsim/parser/create_db.sim index db25240262..a5722f9819 100644 --- a/tests/script/tsim/parser/create_db.sim +++ b/tests/script/tsim/parser/create_db.sim @@ -150,26 +150,26 @@ sql_error create database $db keep 12,11,12 sql_error create database $db keep 8 sql_error create database $db keep 12,11 sql_error create database $db keep 365001,365001,365001 -sql create database dbk0 keep 19 +sql create database dbk0 keep 39 sql select * from information_schema.ins_databases if $rows != 3 then return -1 endi -if $data27 != 27360m,27360m,27360m then +if $data27 != 56160m,56160m,56160m then return -1 endi sql drop database dbk0 -sql create database dbka keep 19,20 +sql create database dbka keep 39,40 sql select * from information_schema.ins_databases if $rows != 3 then return -1 endi -if $data27 != 27360m,28800m,28800m then +if $data27 != 56160m,57600m,57600m then return -1 endi sql drop database dbka -sql create database dbk1 keep 11,11,11 +sql create database dbk1 duration 3 keep 11,11,11 sql select * from information_schema.ins_databases if $rows != 3 then return -1 @@ -178,7 +178,7 @@ if $data27 != 15840m,15840m,15840m then return -1 endi sql drop database dbk1 -sql create database dbk2 keep 11,12,13 +sql create database dbk2 duration 3 keep 11,12,13 sql select * from information_schema.ins_databases if $rows != 3 then return -1 @@ -187,7 +187,7 @@ if $data27 != 15840m,17280m,18720m then return -1 endi sql drop database dbk2 -sql create database dbk3 keep 11,11,13 +sql create database dbk3 duration 3 keep 11,11,13 sql select * from information_schema.ins_databases if $rows != 3 then return -1 @@ -196,7 +196,7 @@ if $data27 != 15840m,15840m,18720m then return -1 endi sql drop database dbk3 -sql create database dbk4 keep 11,13,13 +sql create database dbk4 duration 3 keep 11,13,13 sql select * from information_schema.ins_databases if $rows != 3 then return -1 diff --git a/tests/script/tsim/query/show_db_table_kind.sim b/tests/script/tsim/query/show_db_table_kind.sim index 9be2d36fd1..a35ca78a34 100644 --- a/tests/script/tsim/query/show_db_table_kind.sim +++ b/tests/script/tsim/query/show_db_table_kind.sim @@ -94,4 +94,14 @@ print $rows if $rows != 5 then return -1 endi +sql show child db2.tables like '%' +print $rows +if $rows != 5 then + return -1 +endi +sql show normal db2.tables like '%' +print $rows +if $rows != 0 then + return -1 +endi system sh/exec.sh -n dnode1 -s stop -x SIGINT diff --git a/tests/script/tsim/stream/basic4.sim b/tests/script/tsim/stream/basic4.sim index 29cbef3109..b4e3d62545 100644 --- a/tests/script/tsim/stream/basic4.sim +++ b/tests/script/tsim/stream/basic4.sim @@ -8,10 +8,12 @@ sleep 500 sql connect +print step1============= + sql create database test vgroups 1; sql use test; sql create table t1(ts timestamp, a int, b int , c int, d double); -sql create stream streams1 trigger at_once into streamt as select _wstart, count(*) c1 from t1 interval(1s); +sql create stream streams0 trigger at_once ignore expired 0 ignore update 0 into streamt as select _wstart, count(*) c1 from t1 interval(1s); sql insert into t1 values(1648791211000,1,2,3,1.0); sql insert into t1 values(1648791212001,2,2,3,1.1); @@ -71,13 +73,13 @@ if $rows != 29 then goto loop1 endi - +print step2============= sql create database test2 vgroups 10; sql use test2; sql create stable st(ts timestamp, a int, b int , c int, d double) tags(ta int,tb int,tc int); sql create table t1 using st tags(1,1,1); -sql create stream streams2 trigger at_once ignore expired 0 waterMark 200s into streamt2 as select _wstart, count(*) c1 from t1 interval(1s); +sql create stream streams2 trigger at_once ignore expired 0 ignore update 0 waterMark 200s into streamt2 as select _wstart, count(*) c1 from t1 interval(1s); sql insert into t1 values(1648791211000,1,2,3,1.0); sql insert into t1 values(1648791212001,2,2,3,1.1); @@ -137,4 +139,184 @@ if $rows != 29 then goto loop3 endi +print step3============= + +sql create database test1 vgroups 1; +sql use test1; +sql create table t1(ts timestamp, a int, b int , c int, d double); +sql create stream streams1 trigger at_once ignore expired 0 ignore update 0 into streamt1 as select _wstart, count(*) c1 from t1 session(ts, 1s); + +sql insert into t1 values(1648791211000,1,2,3,1.0); +sql insert into t1 values(1648791213000,1,2,3,1.1); +sql insert into t1 values(1648791215000,1,2,3,1.1); +sql insert into t1 values(1648791217000,1,2,3,1.1); +sql insert into t1 values(1648791219000,1,2,3,1.1); +sql insert into t1 values(1648791221000,1,2,3,1.0); +sql insert into t1 values(1648791223000,1,2,3,1.0); +sql insert into t1 values(1648791225000,1,2,3,1.0); +sql insert into t1 values(1648791227000,1,2,3,1.0); +sql insert into t1 values(1648791229000,1,2,3,1.0); + +sql insert into t1 values(1648791231000,1,2,3,1.0); +sql insert into t1 values(1648791233000,1,2,3,1.1); +sql insert into t1 values(1648791235000,1,2,3,1.1); +sql insert into t1 values(1648791237000,1,2,3,1.1); +sql insert into t1 values(1648791239000,1,2,3,1.1); +sql insert into t1 values(1648791241000,1,2,3,1.0); +sql insert into t1 values(1648791243000,1,2,3,1.0); +sql insert into t1 values(1648791245000,1,2,3,1.0); +sql insert into t1 values(1648791247000,1,2,3,1.0); +sql insert into t1 values(1648791249000,1,2,3,1.0); + +sql insert into t1 values(1648791251000,1,2,3,1.0); +sql insert into t1 values(1648791253000,1,2,3,1.1); +sql insert into t1 values(1648791255000,1,2,3,1.1); +sql insert into t1 values(1648791257000,1,2,3,1.1); +sql insert into t1 values(1648791259000,1,2,3,1.1); +sql insert into t1 values(1648791261000,1,2,3,1.0); +sql insert into t1 values(1648791263000,1,2,3,1.0); +sql insert into t1 values(1648791265000,1,2,3,1.0); +sql insert into t1 values(1648791267000,1,2,3,1.0); +sql insert into t1 values(1648791269000,1,2,3,1.0); + +$loop_count = 0 + +loop4: + +sleep 200 + +$loop_count = $loop_count + 1 +if $loop_count == 10 then + return -1 +endi + +print 1 select * from streamt1; +sql select * from streamt1; + +if $rows != 30 then + print =====rows=$rows + goto loop4 +endi + +sql insert into t1 values(1648791211001,1,2,3,1.0); +sql insert into t1 values(1648791213001,1,2,3,1.1); +sql insert into t1 values(1648791215001,1,2,3,1.1); +sql insert into t1 values(1648791217001,1,2,3,1.1); +sql insert into t1 values(1648791219001,1,2,3,1.1); +sql insert into t1 values(1648791221001,1,2,3,1.0); +sql insert into t1 values(1648791223001,1,2,3,1.0); +sql insert into t1 values(1648791225001,1,2,3,1.0); +sql insert into t1 values(1648791227001,1,2,3,1.0); +sql insert into t1 values(1648791229001,1,2,3,1.0); + +$loop_count = 0 + +loop5: + +sleep 200 + +$loop_count = $loop_count + 1 +if $loop_count == 10 then + return -1 +endi + +print 1 select * from streamt1; +sql select * from streamt1; + +if $rows != 30 then + print =====rows=$rows + goto loop5 +endi + +if $data01 != 2 then + print =====data01=$data01 + goto loop5 +endi + +if $data91 != 2 then + print =====data91=$data91 + goto loop5 +endi + +sql insert into t1 values(1648791231001,1,2,3,1.0); +sql insert into t1 values(1648791233001,1,2,3,1.1); +sql insert into t1 values(1648791235001,1,2,3,1.1); +sql insert into t1 values(1648791237001,1,2,3,1.1); +sql insert into t1 values(1648791239001,1,2,3,1.1); +sql insert into t1 values(1648791241001,1,2,3,1.0); +sql insert into t1 values(1648791243001,1,2,3,1.0); +sql insert into t1 values(1648791245001,1,2,3,1.0); +sql insert into t1 values(1648791247001,1,2,3,1.0); +sql insert into t1 values(1648791249001,1,2,3,1.0); + +$loop_count = 0 + +loop6: + +sleep 200 + +$loop_count = $loop_count + 1 +if $loop_count == 10 then + return -1 +endi + +print 1 select * from streamt1; +sql select * from streamt1; + +if $rows != 30 then + print =====rows=$rows + goto loop6 +endi + +if $data[10][1] != 2 then + print =====data[10][1]=$data[10][1] + goto loop6 +endi + +if $data[19][1] != 2 then + print =====data[19][1]=$data[19][1] + goto loop6 +endi + +sql insert into t1 values(1648791251001,1,2,3,1.0); +sql insert into t1 values(1648791253001,1,2,3,1.1); +sql insert into t1 values(1648791255001,1,2,3,1.1); +sql insert into t1 values(1648791257001,1,2,3,1.1); +sql insert into t1 values(1648791259001,1,2,3,1.1); +sql insert into t1 values(1648791261001,1,2,3,1.0); +sql insert into t1 values(1648791263001,1,2,3,1.0); +sql insert into t1 values(1648791265001,1,2,3,1.0); +sql insert into t1 values(1648791267001,1,2,3,1.0); +sql insert into t1 values(1648791269001,1,2,3,1.0); + +$loop_count = 0 + +loop7: + +sleep 200 + +$loop_count = $loop_count + 1 +if $loop_count == 10 then + return -1 +endi + +print 1 select * from streamt1; +sql select * from streamt1; + +if $rows != 30 then + print =====rows=$rows + goto loop7 +endi + +if $data[20][1] != 2 then + print =====[20][1]=$[20][1] + goto loop7 +endi + +if $data[29][1] != 2 then + print =====[29][1]=$[29][1] + goto loop7 +endi + + system sh/exec.sh -n dnode1 -s stop -x SIGINT \ No newline at end of file diff --git a/tests/script/tsim/stream/basic5.sim b/tests/script/tsim/stream/basic5.sim new file mode 100644 index 0000000000..583c803e4e --- /dev/null +++ b/tests/script/tsim/stream/basic5.sim @@ -0,0 +1,217 @@ +system sh/stop_dnodes.sh +system sh/deploy.sh -n dnode1 -i 1 +system sh/cfg.sh -n dnode1 -c debugflag -v 135 +system sh/cfg.sh -n dnode1 -c streamBufferSize -v 10 +system sh/exec.sh -n dnode1 -s start + +sleep 500 + +sql connect + +print step1============= + +sql create database test3 vgroups 1; +sql use test3; +sql create table t1(ts timestamp, a int, b int , c int, d double); +sql create stream streams3 trigger at_once ignore expired 0 ignore update 0 into streamt3 as select _wstart, count(*) c1 from t1 state_window(a); + +sql insert into t1 values(1648791211000,1,2,3,1.0); +sql insert into t1 values(1648791213000,2,2,3,1.1); +sql insert into t1 values(1648791215000,3,2,3,1.1); +sql insert into t1 values(1648791217000,4,2,3,1.1); +sql insert into t1 values(1648791219000,5,2,3,1.1); +sql insert into t1 values(1648791221000,6,2,3,1.0); +sql insert into t1 values(1648791223000,7,2,3,1.0); +sql insert into t1 values(1648791225000,8,2,3,1.0); +sql insert into t1 values(1648791227000,9,2,3,1.0); +sql insert into t1 values(1648791229000,10,2,3,1.0); + +sql insert into t1 values(1648791231000,11,2,3,1.0); +sql insert into t1 values(1648791233000,12,2,3,1.1); +sql insert into t1 values(1648791235000,13,2,3,1.1); +sql insert into t1 values(1648791237000,14,2,3,1.1); +sql insert into t1 values(1648791239000,15,2,3,1.1); +sql insert into t1 values(1648791241000,16,2,3,1.0); +sql insert into t1 values(1648791243000,17,2,3,1.0); +sql insert into t1 values(1648791245000,18,2,3,1.0); +sql insert into t1 values(1648791247000,19,2,3,1.0); +sql insert into t1 values(1648791249000,20,2,3,1.0); + +sql insert into t1 values(1648791251000,21,2,3,1.0); +sql insert into t1 values(1648791253000,22,2,3,1.1); +sql insert into t1 values(1648791255000,23,2,3,1.1); +sql insert into t1 values(1648791257000,24,2,3,1.1); +sql insert into t1 values(1648791259000,25,2,3,1.1); +sql insert into t1 values(1648791261000,26,2,3,1.0); +sql insert into t1 values(1648791263000,27,2,3,1.0); +sql insert into t1 values(1648791265000,28,2,3,1.0); +sql insert into t1 values(1648791267000,29,2,3,1.0); +sql insert into t1 values(1648791269000,30,2,3,1.0); + +$loop_count = 0 + +loop8: + +sleep 200 + +$loop_count = $loop_count + 1 +if $loop_count == 10 then + return -1 +endi + +print 1 select * from streamt3; +sql select * from streamt3; + +if $rows != 30 then + print =====rows=$rows + goto loop8 +endi + +sql insert into t1 values(1648791211001,1,2,3,1.0); +sql insert into t1 values(1648791213001,2,2,3,1.1); +sql insert into t1 values(1648791215001,3,2,3,1.1); +sql insert into t1 values(1648791217001,4,2,3,1.1); +sql insert into t1 values(1648791219001,5,2,3,1.1); +sql insert into t1 values(1648791221001,6,2,3,1.0); +sql insert into t1 values(1648791223001,7,2,3,1.0); +sql insert into t1 values(1648791225001,8,2,3,1.0); +sql insert into t1 values(1648791227001,9,2,3,1.0); +sql insert into t1 values(1648791229001,10,2,3,1.0); + +$loop_count = 0 + +loop9: + +sleep 200 + +$loop_count = $loop_count + 1 +if $loop_count == 10 then + return -1 +endi + +print 1 select * from streamt3; +sql select * from streamt3; + +if $rows != 30 then + print =====rows=$rows + goto loop9 +endi + +if $data01 != 2 then + print =====data01=$data01 + goto loop9 +endi + +if $data91 != 2 then + print =====data91=$data91 + goto loop9 +endi + +sql insert into t1 values(1648791231001,11,2,3,1.0); +sql insert into t1 values(1648791233001,12,2,3,1.1); +sql insert into t1 values(1648791235001,13,2,3,1.1); +sql insert into t1 values(1648791237001,14,2,3,1.1); +sql insert into t1 values(1648791239001,15,2,3,1.1); +sql insert into t1 values(1648791241001,16,2,3,1.0); +sql insert into t1 values(1648791243001,17,2,3,1.0); +sql insert into t1 values(1648791245001,18,2,3,1.0); +sql insert into t1 values(1648791247001,19,2,3,1.0); +sql insert into t1 values(1648791249001,20,2,3,1.0); + +$loop_count = 0 + +loop10: + +sleep 200 + +$loop_count = $loop_count + 1 +if $loop_count == 10 then + return -1 +endi + +print 1 select * from streamt3; +sql select * from streamt3; + +if $rows != 30 then + print =====rows=$rows + goto loop10 +endi + +if $data[10][1] != 2 then + print =====data[10][1]=$data[10][1] + goto loop10 +endi + +if $data[19][1] != 2 then + print =====data[19][1]=$data[19][1] + goto loop10 +endi + +sql insert into t1 values(1648791251001,21,2,3,1.0); +sql insert into t1 values(1648791253001,22,2,3,1.1); +sql insert into t1 values(1648791255001,23,2,3,1.1); +sql insert into t1 values(1648791257001,24,2,3,1.1); + +#/////////////////////// +$loop_count = 0 + +loop11: + +sleep 200 + +$loop_count = $loop_count + 1 +if $loop_count == 10 then + return -1 +endi + +print 1 select * from streamt3; +sql select * from streamt3; + +if $rows != 30 then + print =====rows=$rows + goto loop11 +endi + +if $data[20][1] != 2 then + print =====[20][1]=$[20][1] + goto loop11 +endi +#/////////////////////// + +sql insert into t1 values(1648791259001,25,2,3,1.1); +sql insert into t1 values(1648791261001,26,2,3,1.0); +sql insert into t1 values(1648791263001,27,2,3,1.0); +sql insert into t1 values(1648791265001,28,2,3,1.0); +sql insert into t1 values(1648791267001,29,2,3,1.0); +sql insert into t1 values(1648791269001,30,2,3,1.0); + +$loop_count = 0 + +loop11: + +sleep 200 + +$loop_count = $loop_count + 1 +if $loop_count == 10 then + return -1 +endi + +print 1 select * from streamt3; +sql select * from streamt3; + +if $rows != 30 then + print =====rows=$rows + goto loop11 +endi + +if $data[20][1] != 2 then + print =====[20][1]=$[20][1] + goto loop11 +endi + +if $data[29][1] != 2 then + print =====[29][1]=$[29][1] + goto loop11 +endi + +system sh/exec.sh -n dnode1 -s stop -x SIGINT \ No newline at end of file diff --git a/tests/script/tsim/stream/session1.sim b/tests/script/tsim/stream/session1.sim index 3be604a828..cf42159d84 100644 --- a/tests/script/tsim/stream/session1.sim +++ b/tests/script/tsim/stream/session1.sim @@ -132,85 +132,85 @@ sql select * from streamt order by s desc; # row 0 if $data01 != 2 then - print ======$data01 + print =====data01=$data01 goto loop2 endi if $data02 != 29 then - print ======$data02 + print =====data02=$data02 goto loop2 endi if $data03 != 7 then - print ======$data03 + print =====data03=$data03 goto loop2 endi if $data04 != 22 then - print ======$data04 + print =====data04=$data04 goto loop2 endi # row 1 if $data11 != 3 then - print ======$data11 + print =====data11=$data11 goto loop2 endi if $data12 != 33 then - print ======$data12 + print =====data12=$data12 goto loop2 endi if $data13 != 8 then - print ======$data13 + print =====data13=$data13 goto loop2 endi if $data14 != 21 then - print ======$data14 + print =====data14=$data14 goto loop2 endi # row 2 if $data21 != 4 then - print ======$data21 + print =====data21=$data21 goto loop2 endi if $data22 != 25 then - print ======$data22 + print =====data22=$data22 goto loop2 endi if $data23 != 2 then - print ======$data23 + print =====data23=$data23 goto loop2 endi if $data24 != 20 then - print ======$data24 + print =====data24=$data24 goto loop2 endi # row 3 if $data31 != 10 then - print ======$data31 + print =====data31=$data31 goto loop2 endi if $data32 != 54 then - print ======$data32 + print =====data32=$data32 goto loop2 endi if $data33 != 1 then - print ======$data33 + print =====data33=$data33 goto loop2 endi if $data34 != 19 then - print ======$data34 + print =====data34=$data34 goto loop2 endi diff --git a/tests/system-test/0-others/information_schema.py b/tests/system-test/0-others/information_schema.py index 77bfb5dca8..eaea4e18b2 100644 --- a/tests/system-test/0-others/information_schema.py +++ b/tests/system-test/0-others/information_schema.py @@ -242,11 +242,11 @@ class TDTestCase: self.str510 = self.str255 + self.str255 tdSql.error('alter dnode 1 "activeCode" "a"') tdSql.error('alter dnode 1 "activeCode" "' + self.str107 + '"') - tdSql.execute('alter all dnodes "activeCode" "' + self.str108 + '"') + tdSql.error('alter all dnodes "activeCode" "' + self.str108 + '"') tdSql.error('alter dnode 1 "activeCode" "' + self.str109 + '"') tdSql.error('alter all dnodes "activeCode" "' + self.str510 + '"') tdSql.query(f'select * from information_schema.ins_dnodes') - tdSql.checkEqual(tdSql.queryResult[0][8],self.str108) + tdSql.checkEqual(tdSql.queryResult[0][8],"") tdSql.execute('alter dnode 1 "activeCode" ""') tdSql.query(f'select active_code,c_active_code from information_schema.ins_dnodes') tdSql.checkEqual(tdSql.queryResult[0][0],"") @@ -257,14 +257,14 @@ class TDTestCase: tdSql.error('alter all dnodes "cActiveCode" "' + self.str255 + '"') tdSql.error('alter all dnodes "cActiveCode" "' + self.str256 + '"') tdSql.error('alter all dnodes "cActiveCode" "' + self.str257 + '"') - tdSql.execute('alter all dnodes "cActiveCode" "' + self.str254 + '"') + tdSql.error('alter all dnodes "cActiveCode" "' + self.str254 + '"') tdSql.error('alter dnode 1 "cActiveCode" "' + self.str510 + '"') tdSql.query(f'select active_code,c_active_code from information_schema.ins_dnodes') tdSql.checkEqual(tdSql.queryResult[0][0],"") - tdSql.checkEqual(tdSql.queryResult[0][1],self.str254) - tdSql.execute('alter dnode 1 "cActiveCode" "' + self.str109 + '"') + tdSql.checkEqual(tdSql.queryResult[0][1],"") + tdSql.error('alter dnode 1 "cActiveCode" "' + self.str109 + '"') tdSql.query(f'show dnodes') - tdSql.checkEqual(tdSql.queryResult[0][9],self.str109) + tdSql.checkEqual(tdSql.queryResult[0][9],"") tdSql.execute('alter all dnodes "cActiveCode" ""') tdSql.query(f'select c_active_code from information_schema.ins_dnodes') tdSql.checkEqual(tdSql.queryResult[0][0],'') diff --git a/tests/system-test/0-others/multilevel.py b/tests/system-test/0-others/multilevel.py index f086dcb735..66434fff67 100644 --- a/tests/system-test/0-others/multilevel.py +++ b/tests/system-test/0-others/multilevel.py @@ -100,7 +100,7 @@ class TDTestCase: tdDnodes.deploy(1,cfg) tdDnodes.start(1) - tdSql.execute("create database test duration 1") + tdSql.execute("create database test duration 8h") tdSql.execute("use test") tdSql.execute("create table stb(ts timestamp, c int) tags(t int)") @@ -117,7 +117,7 @@ class TDTestCase: tdLog.info("================= step3") tdSql.execute('drop database test') for i in range(10): - tdSql.execute("create database test%d duration 1" %(i)) + tdSql.execute("create database test%d duration 8h" %(i)) tdSql.execute("use test%d" %(i)) tdSql.execute("create table tb (ts timestamp,i int)") for j in range(10): @@ -216,7 +216,7 @@ class TDTestCase: tdDnodes.deploy(1,cfg) tdDnodes.start(1) - tdSql.execute('create database dbtest') + tdSql.execute('create database dbtest duration 3') tdSql.execute('use dbtest') tdSql.execute('create table stb (ts timestamp,c0 int) tags(t0 int)') tdSql.execute('create table tb1 using stb tags(1)') diff --git a/tests/system-test/0-others/splitVGroupRep1.py b/tests/system-test/0-others/splitVGroupRep1.py new file mode 100644 index 0000000000..b119ba0a32 --- /dev/null +++ b/tests/system-test/0-others/splitVGroupRep1.py @@ -0,0 +1,440 @@ +################################################################### +# Copyright (c) 2016 by TAOS Technologies, Inc. +# All rights reserved. +# +# This file is proprietary and confidential to TAOS Technologies. +# No part of this file may be reproduced, stored, transmitted, +# disclosed or used in any form or by any means other than as +# expressly provided by the written permission from Jianhui Tao +# +################################################################### + +# -*- coding: utf-8 -*- + +import sys +import random +import time +import copy +import string + +import taos +from util.log import * +from util.cases import * +from util.sql import * + +class TDTestCase: + + # random string + def random_string(self, count): + letters = string.ascii_letters + return ''.join(random.choice(letters) for i in range(count)) + + # get col value and total max min ... + def getColsValue(self, i, j): + # c1 value + if random.randint(1, 10) == 5: + c1 = None + else: + c1 = 1 + + # c2 value + if j % 3200 == 0: + c2 = 8764231 + elif random.randint(1, 10) == 5: + c2 = None + else: + c2 = random.randint(-87654297, 98765321) + + + value = f"({self.ts}, " + + # c1 + if c1 is None: + value += "null," + else: + self.c1Cnt += 1 + value += f"{c1}," + # c2 + if c2 is None: + value += "null," + else: + value += f"{c2}," + # total count + self.c2Cnt += 1 + # max + if self.c2Max is None: + self.c2Max = c2 + else: + if c2 > self.c2Max: + self.c2Max = c2 + # min + if self.c2Min is None: + self.c2Min = c2 + else: + if c2 < self.c2Min: + self.c2Min = c2 + # sum + if self.c2Sum is None: + self.c2Sum = c2 + else: + self.c2Sum += c2 + + # c3 same with ts + value += f"{self.ts})" + + # move next + self.ts += 1 + + return value + + # insert data + def insertData(self): + tdLog.info("insert data ....") + sqls = "" + for i in range(self.childCnt): + # insert child table + values = "" + pre_insert = f"insert into @db_name.t{i} values " + for j in range(self.childRow): + if values == "": + values = self.getColsValue(i, j) + else: + values += "," + self.getColsValue(i, j) + + # batch insert + if j % self.batchSize == 0 and values != "": + sql = pre_insert + values + self.exeDouble(sql) + values = "" + # append last + if values != "": + sql = pre_insert + values + self.exeDouble(sql) + values = "" + + # insert nomal talbe + for i in range(20): + self.ts += 1000 + name = self.random_string(20) + sql = f"insert into @db_name.ta values({self.ts}, {i}, {self.ts%100000}, '{name}', false)" + self.exeDouble(sql) + + # insert finished + tdLog.info(f"insert data successfully.\n" + f" inserted child table = {self.childCnt}\n" + f" inserted child rows = {self.childRow}\n" + f" total inserted rows = {self.childCnt*self.childRow}\n") + return + + def exeDouble(self, sql): + # dbname replace + sql1 = sql.replace("@db_name", self.db1) + + if len(sql1) > 100: + tdLog.info(sql1[:100]) + else: + tdLog.info(sql1) + tdSql.execute(sql1) + + sql2 = sql.replace("@db_name", self.db2) + if len(sql2) > 100: + tdLog.info(sql2[:100]) + else: + tdLog.info(sql2) + tdSql.execute(sql2) + + + # prepareEnv + def prepareEnv(self): + # init + self.ts = 1680000000000 + self.childCnt = 4 + self.childRow = 10000 + self.batchSize = 50000 + self.vgroups1 = 1 + self.vgroups2 = 1 + self.db1 = "db1" + self.db2 = "db2" + + # total + self.c1Cnt = 0 + self.c2Cnt = 0 + self.c2Max = None + self.c2Min = None + self.c2Sum = None + + # create database db + sql = f"create database @db_name vgroups {self.vgroups1} replica 1" + self.exeDouble(sql) + + # create super talbe st + sql = f"create table @db_name.st(ts timestamp, c1 int, c2 bigint, ts1 timestamp) tags(area int)" + self.exeDouble(sql) + + # create child table + for i in range(self.childCnt): + sql = f"create table @db_name.t{i} using @db_name.st tags({i}) " + self.exeDouble(sql) + + # create normal table + sql = f"create table @db_name.ta(ts timestamp, c1 int, c2 bigint, c3 binary(32), c4 bool)" + self.exeDouble(sql) + + # insert data + self.insertData() + + # update + self.ts = 1680000000000 + 20000 + self.childRow = 1000 + + + # delete data + sql = "delete from @db_name.st where ts > 1680000019000 and ts < 1680000062000" + self.exeDouble(sql) + sql = "delete from @db_name.st where ts > 1680000099000 and ts < 1680000170000" + self.exeDouble(sql) + + # check data correct + def checkExpect(self, sql, expectVal): + tdSql.query(sql) + rowCnt = tdSql.getRows() + for i in range(rowCnt): + val = tdSql.getData(i,0) + if val != expectVal: + tdLog.exit(f"Not expect . query={val} expect={expectVal} i={i} sql={sql}") + return False + + tdLog.info(f"check expect ok. sql={sql} expect ={expectVal} rowCnt={rowCnt}") + return True + + # init + def init(self, conn, logSql, replicaVar=1): + seed = time.time() % 10000 + random.seed(seed) + self.replicaVar = int(replicaVar) + tdLog.debug(f"start to excute {__file__}") + tdSql.init(conn.cursor(), True) + + # check query result same + def queryDouble(self, sql): + # sql + sql1 = sql.replace('@db_name', self.db1) + tdLog.info(sql1) + start1 = time.time() + rows1 = tdSql.query(sql1) + spend1 = time.time() - start1 + res1 = copy.copy(tdSql.queryResult) + + sql2 = sql.replace('@db_name', self.db2) + tdLog.info(sql2) + start2 = time.time() + tdSql.query(sql2) + spend2 = time.time() - start2 + res2 = tdSql.queryResult + + rowlen1 = len(res1) + rowlen2 = len(res2) + + if rowlen1 != rowlen2: + tdLog.exit(f"both row count not equal. rowlen1={rowlen1} rowlen2={rowlen2} ") + return False + + for i in range(rowlen1): + row1 = res1[i] + row2 = res2[i] + collen1 = len(row1) + collen2 = len(row2) + if collen1 != collen2: + tdLog.exit(f"both col count not equal. collen1={collen1} collen2={collen2}") + return False + for j in range(collen1): + if row1[j] != row2[j]: + tdLog.exit(f"both col not equal. row={i} col={j} col1={row1[j]} col2={row2[j]} .") + return False + + # warning performance + diff = (spend2 - spend1)*100/spend1 + tdLog.info("spend1=%.6fs spend2=%.6fs diff=%.1f%%"%(spend1, spend2, diff)) + if spend2 > spend1 and diff > 20: + tdLog.info("warning: the diff for performance after spliting is over 20%") + + return True + + + # check result + def checkResult(self): + # check vgroupid + sql = f"select vgroup_id from information_schema.ins_vgroups where db_name='{self.db2}'" + tdSql.query(sql) + tdSql.checkRows(self.vgroups2) + + # check child table count same + sql = "select table_name from information_schema.ins_tables where db_name='@db_name' order by table_name" + self.queryDouble(sql) + + # check row value is ok + sql = "select * from @db_name.st order by ts" + self.queryDouble(sql) + + # where + sql = "select *,tbname from @db_name.st where c1 < 1000 order by ts" + self.queryDouble(sql) + + # max + sql = "select max(c1) from @db_name.st" + self.queryDouble(sql) + + # min + sql = "select min(c2) from @db_name.st" + self.queryDouble(sql) + + # sum + sql = "select sum(c1) from @db_name.st" + self.queryDouble(sql) + + # normal table + + # count + sql = "select count(*) from @db_name.ta" + self.queryDouble(sql) + + # all rows + sql = "select * from @db_name.ta" + self.queryDouble(sql) + + # sum + sql = "select sum(c1) from @db_name.ta" + self.queryDouble(sql) + + + # get vgroup list + def getVGroup(self, db_name): + vgidList = [] + sql = f"select vgroup_id from information_schema.ins_vgroups where db_name='{db_name}'" + res = tdSql.getResult(sql) + rows = len(res) + for i in range(rows): + vgidList.append(res[i][0]) + + return vgidList; + + # split vgroup on db2 + def splitVGroup(self, db_name): + vgids = self.getVGroup(db_name) + selid = random.choice(vgids) + sql = f"split vgroup {selid}" + tdLog.info(sql) + tdSql.execute(sql) + + # wait end + seconds = 300 + for i in range(seconds): + sql ="show transactions;" + rows = tdSql.query(sql) + if rows == 0: + tdLog.info("split vgroup finished.") + return True + #tdLog.info(f"i={i} wait split vgroup ...") + time.sleep(1) + + tdLog.exit(f"split vgroup transaction is not finished after executing {seconds}s") + return False + + # split error + def expectSplitError(self, dbName): + vgids = self.getVGroup(dbName) + selid = random.choice(vgids) + sql = f"split vgroup {selid}" + tdLog.info(sql) + tdSql.error(sql) + + # expect split ok + def expectSplitOk(self, dbName): + # split vgroup + vgList1 = self.getVGroup(dbName) + self.splitVGroup(dbName) + vgList2 = self.getVGroup(dbName) + vgNum1 = len(vgList1) + 1 + vgNum2 = len(vgList2) + if vgNum1 != vgNum2: + tdLog.exit(f" vglist len={vgNum1} is not same for expect {vgNum2}") + return + + # split empty database + def splitEmptyDB(self): + dbName = "emptydb" + vgNum = 2 + # create database + sql = f"create database {dbName} vgroups {vgNum} replica 1" + tdLog.info(sql) + tdSql.execute(sql) + + # split vgroup + self.expectSplitOk(dbName) + + + # forbid + def checkForbid(self): + # stream + tdLog.info("check forbid split having stream...") + tdSql.execute("create database streamdb;") + tdSql.execute("use streamdb;") + tdSql.execute("create table ta(ts timestamp, age int);") + tdSql.execute("create stream ma into sta as select count(*) from ta interval(1s);") + self.expectSplitError("streamdb") + tdSql.execute("drop stream ma;") + self.expectSplitOk("streamdb") + + # topic + tdLog.info("check forbid split having topic...") + tdSql.execute("create database topicdb wal_retention_period 10;") + tdSql.execute("use topicdb;") + tdSql.execute("create table ta(ts timestamp, age int);") + tdSql.execute("create topic toa as select * from ta;") + + #self.expectSplitError("topicdb") + tdSql.execute("drop topic toa;") + self.expectSplitOk("topicdb") + + # compact and check db2 + def compactAndCheck(self): + tdLog.info("compact db2 and check result ...") + # compact + tdSql.execute(f"compact database {self.db2};") + # check result + self.checkResult() + + # run + def run(self): + # prepare env + self.prepareEnv() + + for i in range(3): + # split vgroup on db2 + start = time.time() + self.splitVGroup(self.db2) + end = time.time() + self.vgroups2 += 1 + + # check two db query result same + self.checkResult() + spend = "%.3f"%(end-start) + tdLog.info(f"split vgroup i={i} passed. spend = {spend}s") + + # split empty db + self.splitEmptyDB() + + # check topic and stream forib + self.checkForbid() + + # compact database + self.compactAndCheck() + + # stop + def stop(self): + tdSql.close() + tdLog.success(f"{__file__} successfully executed") + + +tdCases.addLinux(__file__, TDTestCase()) +tdCases.addWindows(__file__, TDTestCase()) diff --git a/tests/system-test/0-others/splitVGroup.py b/tests/system-test/0-others/splitVGroupRep3.py similarity index 93% rename from tests/system-test/0-others/splitVGroup.py rename to tests/system-test/0-others/splitVGroupRep3.py index ed80505ce2..68c915eeaf 100644 --- a/tests/system-test/0-others/splitVGroup.py +++ b/tests/system-test/0-others/splitVGroupRep3.py @@ -137,10 +137,10 @@ class TDTestCase: tdSql.execute(sql1) sql2 = sql.replace("@db_name", self.db2) - if len(sql1) > 100: - tdLog.info(sql1[:100]) + if len(sql2) > 100: + tdLog.info(sql2[:100]) else: - tdLog.info(sql1) + tdLog.info(sql2) tdSql.execute(sql2) @@ -151,8 +151,8 @@ class TDTestCase: self.childCnt = 10 self.childRow = 10000 self.batchSize = 5000 - self.vgroups1 = 4 - self.vgroups2 = 4 + self.vgroups1 = 2 + self.vgroups2 = 2 self.db1 = "db1" self.db2 = "db2" @@ -183,6 +183,16 @@ class TDTestCase: # insert data self.insertData() + # update + self.ts = 1680000000000 + 10000 + self.childRow = 2000 + + # delete data + sql = "delete from @db_name.st where ts > 1680000001900 and ts < 1680000012000" + self.exeDouble(sql) + sql = "delete from @db_name.st where ts > 1680000029000 and ts < 1680000048000" + self.exeDouble(sql) + # check data correct def checkExpect(self, sql, expectVal): tdSql.query(sql) @@ -225,7 +235,7 @@ class TDTestCase: rowlen2 = len(res2) if rowlen1 != rowlen2: - tdLog.exit(f"rowlen1={rowlen1} rowlen2={rowlen2} both not equal.") + tdLog.exit(f"both row count not equal. rowlen1={rowlen1} rowlen2={rowlen2} ") return False for i in range(rowlen1): @@ -234,11 +244,11 @@ class TDTestCase: collen1 = len(row1) collen2 = len(row2) if collen1 != collen2: - tdLog.exit(f"collen1={collen1} collen2={collen2} both not equal.") + tdLog.exit(f"both col count not equal. collen1={collen1} collen2={collen2}") return False for j in range(collen1): if row1[j] != row2[j]: - tdLog.exit(f"col={j} col1={row1[j]} col2={row2[j]} both col not equal.") + tdLog.exit(f"both col not equal. row={i} col={j} col1={row1[j]} col2={row2[j]} .") return False # warning performance @@ -354,7 +364,7 @@ class TDTestCase: dbName = "emptydb" vgNum = 2 # create database - sql = f"create database {dbName} vgroups {vgNum}" + sql = f"create database {dbName} vgroups {vgNum} replica 3" tdLog.info(sql) tdSql.execute(sql) @@ -380,7 +390,7 @@ class TDTestCase: tdSql.execute("use topicdb;") tdSql.execute("create table ta(ts timestamp, age int);") tdSql.execute("create topic toa as select * from ta;") - self.expectSplitError("topicdb") + #self.expectSplitError("topicdb") tdSql.execute("drop topic toa;") self.expectSplitOk("topicdb") @@ -397,7 +407,7 @@ class TDTestCase: # prepare env self.prepareEnv() - for i in range(5): + for i in range(2): # split vgroup on db2 start = time.time() self.splitVGroup(self.db2) diff --git a/tests/system-test/1-insert/keep_expired.py b/tests/system-test/1-insert/keep_expired.py index 0cb6d841ed..4c8be55be5 100644 --- a/tests/system-test/1-insert/keep_expired.py +++ b/tests/system-test/1-insert/keep_expired.py @@ -15,7 +15,7 @@ class TDTestCase: self.stbname = "stb" self.ctbname = "ctb" self.keep_value = "2d,2d,2d" - self.duration_value = "1d" + self.duration_value = "16h" self.offset_time = 5 self.sleep_time = self.offset_time*2 diff --git a/tests/system-test/eco-system/meta/database/keep_time_offset.py b/tests/system-test/eco-system/meta/database/keep_time_offset.py new file mode 100644 index 0000000000..f1c5398f49 --- /dev/null +++ b/tests/system-test/eco-system/meta/database/keep_time_offset.py @@ -0,0 +1,66 @@ +################################################################### +# Copyright (c) 2016 by TAOS Technologies, Inc. +# All rights reserved. +# +# This file is proprietary and confidential to TAOS Technologies. +# No part of this file may be reproduced, stored, transmitted, +# disclosed or used in any form or by any means other than as +# expressly provided by the written permission from Jianhui Tao +# +################################################################### + +# -*- coding: utf-8 -*- + +import re +from util.log import * +from util.cases import * +from util.sql import * +from util.common import * +from util.sqlset import * + +class TDTestCase: + def init(self, conn, logSql, replicaVar=1): + self.replicaVar = int(replicaVar) + tdLog.debug("start to execute %s" % __file__) + tdSql.init(conn.cursor()) + self.setsql = TDSetSql() + + def create_db(self): + hours = 8 + # create + keep_str = f"KEEP_TIME_OFFSET {hours}" + tdSql.execute(f"create database db {keep_str}") + + # check result + tdSql.query("select `keep_time_offset` from information_schema.ins_databases where name='db'") + tdSql.checkData(0, 0, hours) + + # alter + hours = 4 + keep_str = f"KEEP_TIME_OFFSET {hours}" + tdSql.execute(f"alter database db {keep_str}") + + # check result + tdSql.query("select `keep_time_offset` from information_schema.ins_databases where name='db'") + tdSql.checkData(0, 0, hours) + + + def check_old_syntax(self): + # old syntax would not support again + tdSql.error("alter dnode 1 'keeptimeoffset 10';") + + + def run(self): + # check new syntax right + self.create_db() + + # check old syntax error + self.check_old_syntax() + + + def stop(self): + tdSql.close() + tdLog.success("%s successfully executed" % __file__) + +tdCases.addWindows(__file__, TDTestCase()) +tdCases.addLinux(__file__, TDTestCase()) diff --git a/utils/TSZ/sz/src/Huffman.c b/utils/TSZ/sz/src/Huffman.c index 9868f3c0cb..c2c091e328 100644 --- a/utils/TSZ/sz/src/Huffman.c +++ b/utils/TSZ/sz/src/Huffman.c @@ -125,7 +125,10 @@ void build_code(HuffmanTree *huffmanTree, node n, int len, unsigned long out1, u huffmanTree->code[n->c] = (unsigned long*)malloc(2*sizeof(unsigned long)); if(len<=64) { - (huffmanTree->code[n->c])[0] = out1 << (64 - len); + if(len == 0) + (huffmanTree->code[n->c])[0] = 0; + else + (huffmanTree->code[n->c])[0] = out1 << (64 - len); (huffmanTree->code[n->c])[1] = out2; } else