fix:conflict from 3.0

This commit is contained in:
wangmm0220 2023-11-13 18:42:59 +08:00
commit 56381dea90
122 changed files with 6490 additions and 1118 deletions

View File

@ -149,6 +149,8 @@ ELSE ()
CHECK_C_COMPILER_FLAG("-mfma" COMPILER_SUPPORT_FMA) CHECK_C_COMPILER_FLAG("-mfma" COMPILER_SUPPORT_FMA)
CHECK_C_COMPILER_FLAG("-mavx" COMPILER_SUPPORT_AVX) CHECK_C_COMPILER_FLAG("-mavx" COMPILER_SUPPORT_AVX)
CHECK_C_COMPILER_FLAG("-mavx2" COMPILER_SUPPORT_AVX2) CHECK_C_COMPILER_FLAG("-mavx2" COMPILER_SUPPORT_AVX2)
CHECK_C_COMPILER_FLAG("-mavx512f" COMPILER_SUPPORT_AVX512F)
CHECK_C_COMPILER_FLAG("-mavx512vbmi" COMPILER_SUPPORT_AVX512BMI)
IF (COMPILER_SUPPORT_SSE42) IF (COMPILER_SUPPORT_SSE42)
SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -msse4.2") SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -msse4.2")
@ -168,7 +170,13 @@ ELSE ()
SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mavx2") SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mavx2")
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx2") SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx2")
ENDIF() ENDIF()
MESSAGE(STATUS "SIMD instructions (FMA/AVX/AVX2) is ACTIVATED") MESSAGE(STATUS "SIMD instructions (FMA/AVX/AVX2/AVX512) is ACTIVATED")
IF (COMPILER_SUPPORT_AVX512F AND COMPILER_SUPPORT_AVX512BMI)
SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mavx512f -mavx512vbmi")
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx512f -mavx512vbmi")
MESSAGE(STATUS "avx512 supported by gcc")
ENDIF()
ENDIF() ENDIF()
# build mode # build mode

Binary file not shown.

View File

@ -565,7 +565,7 @@ TO_TIMESTAMP(ts_str_literal, format_str_literal)
- The uppercase or lowercase of `MONTH`, `MON`, `DAY`, `DY` and formtas that output digits have same effect when used in `to_timestamp`, like `to_timestamp('2023-JANUARY-01', 'YYYY-month-dd')`, `month` can be replaced by `MONTH`, or `month`. The cases are ignored. - The uppercase or lowercase of `MONTH`, `MON`, `DAY`, `DY` and formtas that output digits have same effect when used in `to_timestamp`, like `to_timestamp('2023-JANUARY-01', 'YYYY-month-dd')`, `month` can be replaced by `MONTH`, or `month`. The cases are ignored.
- If multi times are specified for one component, the previous will be overwritten. Like `to_timestamp('2023-22-10-10', 'yyyy-yy-MM-dd')`, the output year will be `2022`. - If multi times are specified for one component, the previous will be overwritten. Like `to_timestamp('2023-22-10-10', 'yyyy-yy-MM-dd')`, the output year will be `2022`.
- To avoid unexpected time zone used during the convertion, it's recommended to put time zone in the ts string, e.g. '2023-10-10 10:10:10+08'. If time zone not specified, default will be that in server or client. - To avoid unexpected time zone used during the convertion, it's recommended to put time zone in the ts string, e.g. '2023-10-10 10:10:10+08'. If time zone not specified, default will be that in server or client.
- The default timestamp if some components are not specified will be: `1970-01-01 00:00:00` with the timezone specified or default to local timezone. - The default timestamp if some components are not specified will be: `1970-01-01 00:00:00` with the timezone specified or default to local timezone. Only `DDD` is specified without `DD` is not supported currently, e.g. format 'yyyy-mm-ddd' is not supported, but 'yyyy-mm-dd' is supported.
- If `AM` or `PM` is specified in formats, the Hour must between `1-12`. - If `AM` or `PM` is specified in formats, the Hour must between `1-12`.
- In some cases, `to_timestamp` can convert correctly even the format and the timestamp string are not totally matched. Like `to_timetamp('200101/2', 'yyyyMM1/dd')`, the digit `1` in format string are ignored, and the output timestsamp is `2001-01-02 00:00:00`. Spaces and tabs in formats and tiemstamp string are also ignored automatically. - In some cases, `to_timestamp` can convert correctly even the format and the timestamp string are not totally matched. Like `to_timetamp('200101/2', 'yyyyMM1/dd')`, the digit `1` in format string are ignored, and the output timestsamp is `2001-01-02 00:00:00`. Spaces and tabs in formats and tiemstamp string are also ignored automatically.
- The precision of the output timestamp will be the same as the table in SELECT stmt, millisecond will be used if no table is specified. The output of `select to_timestamp('2023-08-1 10:10:10.123456789', 'yyyy-mm-dd hh:mi:ss.ns')` will be truncated to millisecond precision. If a nano precision table is specified, no truncation will be applied. Like `select to_timestamp('2023-08-1 10:10:10.123456789', 'yyyy-mm-dd hh:mi:ss.ns') from db_ns.table_ns limit 1`. - The precision of the output timestamp will be the same as the table in SELECT stmt, millisecond will be used if no table is specified. The output of `select to_timestamp('2023-08-1 10:10:10.123456789', 'yyyy-mm-dd hh:mi:ss.ns')` will be truncated to millisecond precision. If a nano precision table is specified, no truncation will be applied. Like `select to_timestamp('2023-08-1 10:10:10.123456789', 'yyyy-mm-dd hh:mi:ss.ns') from db_ns.table_ns limit 1`.

View File

@ -38,11 +38,16 @@ Aggregation by time window is supported in TDengine. For example, in the case wh
window_clause: { window_clause: {
SESSION(ts_col, tol_val) SESSION(ts_col, tol_val)
| STATE_WINDOW(col) | STATE_WINDOW(col)
| INTERVAL(interval [, offset]) [SLIDING sliding] [FILL({NONE | VALUE | PREV | NULL | LINEAR | NEXT})] | INTERVAL(interval_val [, offset]) [SLIDING (sliding_value)] [FILL({NONE | VALUE | PREV | NULL | LINEAR | NEXT})]
| EVENT_WINDOW START WITH start_trigger_condition END WITH end_trigger_condition | EVENT_WINDOW START WITH start_trigger_condition END WITH end_trigger_condition
} }
``` ```
Both interval_val and sliding_value are time durations which have 3 forms of representation.
- INTERVAL(1s, 500a) SLIDING(1s), the unit char should be any one of a (millisecond), b (nanosecond), d (day), h (hour), m (minute), n (month), s (second), u (microsecond), w (week), y (year).
- INTERVAL(1000, 500) SLIDING(1000), the unit will the same as the queried database, if there are more than one databases, higher precision will be used.
- INTERVAL('1s', '500a') SLIDING('1s'), unit must be specified, no spaces allowed.
The following restrictions apply: The following restrictions apply:
### Other Rules ### Other Rules

View File

@ -565,7 +565,7 @@ TO_TIMESTAMP(ts_str_literal, format_str_literal)
- `MONTH`, `MON`, `DAY`, `DY` 以及其他输出为数字的格式的大小写意义相同, 如 `to_timestamp('2023-JANUARY-01', 'YYYY-month-dd')`, `month`可以被替换为`MONTH` 或者`Month`. - `MONTH`, `MON`, `DAY`, `DY` 以及其他输出为数字的格式的大小写意义相同, 如 `to_timestamp('2023-JANUARY-01', 'YYYY-month-dd')`, `month`可以被替换为`MONTH` 或者`Month`.
- 如果同一字段被指定了多次, 那么前面的指定将会被覆盖. 如 `to_timestamp('2023-22-10-10', 'yyyy-yy-MM-dd')`, 输出年份是`2022`. - 如果同一字段被指定了多次, 那么前面的指定将会被覆盖. 如 `to_timestamp('2023-22-10-10', 'yyyy-yy-MM-dd')`, 输出年份是`2022`.
- 为避免转换时使用了非预期的时区,推荐在时间中携带时区信息,例如'2023-10-10 10:10:10+08',如果未指定时区则默认时区为服务端或客户端指定的时区。 - 为避免转换时使用了非预期的时区,推荐在时间中携带时区信息,例如'2023-10-10 10:10:10+08',如果未指定时区则默认时区为服务端或客户端指定的时区。
- 如果没有指定完整的时间,那么默认时间值为指定或默认时区的 `1970-01-01 00:00:00`, 未指定部分使用该默认值中的对应部分. - 如果没有指定完整的时间,那么默认时间值为指定或默认时区的 `1970-01-01 00:00:00`, 未指定部分使用该默认值中的对应部分. 暂不支持只指定年日而不指定月日的格式, 如'yyyy-mm-DDD', 支持'yyyy-mm-DD'.
- 如果格式串中有`AM`, `PM`等, 那么小时必须是12小时制, 范围必须是01-12. - 如果格式串中有`AM`, `PM`等, 那么小时必须是12小时制, 范围必须是01-12.
- `to_timestamp`转换具有一定的容错机制, 在格式串和时间戳串不完全对应时, 有时也可转换, 如: `to_timestamp('200101/2', 'yyyyMM1/dd')`, 格式串中多出来的1会被丢弃. 格式串与时间戳串中多余的空格字符(空格, tab等)也会被 自动忽略. 如`to_timestamp(' 23 年 - 1 月 - 01 日 ', 'yy 年-MM月-dd日')` 可以被成功转换. 虽然`MM`等字段需要两个数字对应(只有一位时前面补0), 在`to_timestamp`时, 一个数字也可以成功转换. - `to_timestamp`转换具有一定的容错机制, 在格式串和时间戳串不完全对应时, 有时也可转换, 如: `to_timestamp('200101/2', 'yyyyMM1/dd')`, 格式串中多出来的1会被丢弃. 格式串与时间戳串中多余的空格字符(空格, tab等)也会被 自动忽略. 如`to_timestamp(' 23 年 - 1 月 - 01 日 ', 'yy 年-MM月-dd日')` 可以被成功转换. 虽然`MM`等字段需要两个数字对应(只有一位时前面补0), 在`to_timestamp`时, 一个数字也可以成功转换.
- 输出时间戳的精度与查询表的精度相同, 若查询未指定表, 则输出精度为毫秒. 如`select to_timestamp('2023-08-1 10:10:10.123456789', 'yyyy-mm-dd hh:mi:ss.ns')`的输出将会把微妙和纳秒进行截断. 如果指定一张纳秒表, 那么就不会发生截断, 如`select to_timestamp('2023-08-1 10:10:10.123456789', 'yyyy-mm-dd hh:mi:ss.ns') from db_ns.table_ns limit 1`. - 输出时间戳的精度与查询表的精度相同, 若查询未指定表, 则输出精度为毫秒. 如`select to_timestamp('2023-08-1 10:10:10.123456789', 'yyyy-mm-dd hh:mi:ss.ns')`的输出将会把微妙和纳秒进行截断. 如果指定一张纳秒表, 那么就不会发生截断, 如`select to_timestamp('2023-08-1 10:10:10.123456789', 'yyyy-mm-dd hh:mi:ss.ns') from db_ns.table_ns limit 1`.

View File

@ -44,7 +44,11 @@ window_clause: {
} }
``` ```
在上述语法中的具体限制如下 其中interval_val 和 sliding_val 都表示时间段, 语法上支持三种方式,举例说明如下:
- INTERVAL(1s, 500a) SLIDING(1s), 自带时间单位的形式,其中的时间单位是单字符表示, 分别为: a (毫秒), b (纳秒), d (天), h (小时), m (分钟), n (月), s (秒), u (微妙), w (周), y (年).
- INTERVAL(1000, 500) SLIDING(1000), 不带时间单位的形式,将使用查询库的时间精度作为默认时间单位,当存在多个库时默认采用精度更高的库.
- INTERVAL('1s', '500a') SLIDING('1s'), 自带时间单位的字符串形式,字符串内部不能有任何空格等其它字符.
### 窗口子句的规则 ### 窗口子句的规则

View File

@ -39,8 +39,8 @@ void s3DeleteObjectsByPrefix(const char *prefix);
void s3DeleteObjects(const char *object_name[], int nobject); void s3DeleteObjects(const char *object_name[], int nobject);
bool s3Exists(const char *object_name); bool s3Exists(const char *object_name);
bool s3Get(const char *object_name, const char *path); bool s3Get(const char *object_name, const char *path);
int32_t s3GetObjectsByPrefix(const char *prefix, const char* path); int32_t s3GetObjectBlock(const char *object_name, int64_t offset, int64_t size, bool check, uint8_t **ppBlock);
int32_t s3GetObjectBlock(const char *object_name, int64_t offset, int64_t size, uint8_t **ppBlock); int32_t s3GetObjectsByPrefix(const char *prefix, const char *path);
void s3EvictCache(const char *path, long object_size); void s3EvictCache(const char *path, long object_size);
long s3Size(const char *object_name); long s3Size(const char *object_name);

View File

@ -249,6 +249,7 @@ typedef struct SQueryTableDataCond {
SColumnInfo* colList; SColumnInfo* colList;
int32_t* pSlotList; // the column output destation slot, and it may be null int32_t* pSlotList; // the column output destation slot, and it may be null
int32_t type; // data block load type: int32_t type; // data block load type:
bool skipRollup;
STimeWindow twindows; STimeWindow twindows;
int64_t startVersion; int64_t startVersion;
int64_t endVersion; int64_t endVersion;
@ -364,6 +365,11 @@ typedef struct SSortExecInfo {
int32_t readBytes; // read io bytes int32_t readBytes; // read io bytes
} SSortExecInfo; } SSortExecInfo;
typedef struct SNonSortExecInfo {
int32_t blkNums;
} SNonSortExecInfo;
typedef struct STUidTagInfo { typedef struct STUidTagInfo {
char* name; char* name;
uint64_t uid; uint64_t uid;

View File

@ -3774,6 +3774,7 @@ typedef struct {
int64_t suid; int64_t suid;
SArray* deleteReqs; // SArray<SSingleDeleteReq> SArray* deleteReqs; // SArray<SSingleDeleteReq>
int64_t ctimeMs; // fill by vnode int64_t ctimeMs; // fill by vnode
int8_t level; // 0 tsdb(default), 1 rsma1 , 2 rsma2
} SBatchDeleteReq; } SBatchDeleteReq;
int32_t tEncodeSBatchDeleteReq(SEncoder* pCoder, const SBatchDeleteReq* pReq); int32_t tEncodeSBatchDeleteReq(SEncoder* pCoder, const SBatchDeleteReq* pReq);

View File

@ -49,6 +49,7 @@ typedef struct {
uint64_t checkpointId; uint64_t checkpointId;
bool initTableReader; bool initTableReader;
bool initTqReader; bool initTqReader;
bool skipRollup;
int32_t numOfVgroups; int32_t numOfVgroups;
void* sContext; // SSnapContext* void* sContext; // SSnapContext*
void* pStateBackend; void* pStateBackend;

View File

@ -168,7 +168,6 @@ typedef struct {
struct SStreamFileState *pFileState; struct SStreamFileState *pFileState;
int32_t number; int32_t number;
SSHashObj *parNameMap; SSHashObj *parNameMap;
int64_t checkPointId;
int32_t taskId; int32_t taskId;
int64_t streamId; int64_t streamId;
int64_t streamBackendRid; int64_t streamBackendRid;

View File

@ -121,6 +121,7 @@ int32_t nodesListMakeAppend(SNodeList** pList, SNode* pNode);
int32_t nodesListMakeStrictAppend(SNodeList** pList, SNode* pNode); int32_t nodesListMakeStrictAppend(SNodeList** pList, SNode* pNode);
int32_t nodesListAppendList(SNodeList* pTarget, SNodeList* pSrc); int32_t nodesListAppendList(SNodeList* pTarget, SNodeList* pSrc);
int32_t nodesListStrictAppendList(SNodeList* pTarget, SNodeList* pSrc); int32_t nodesListStrictAppendList(SNodeList* pTarget, SNodeList* pSrc);
int32_t nodesListMakeStrictAppendList(SNodeList** pTarget, SNodeList* pSrc);
int32_t nodesListPushFront(SNodeList* pList, SNode* pNode); int32_t nodesListPushFront(SNodeList* pList, SNode* pNode);
SListCell* nodesListErase(SNodeList* pList, SListCell* pCell); SListCell* nodesListErase(SNodeList* pList, SListCell* pCell);
void nodesListInsertList(SNodeList* pTarget, SListCell* pPos, SNodeList* pSrc); void nodesListInsertList(SNodeList* pTarget, SListCell* pPos, SNodeList* pSrc);

View File

@ -40,6 +40,13 @@ typedef enum EGroupAction {
GROUP_ACTION_CLEAR GROUP_ACTION_CLEAR
} EGroupAction; } EGroupAction;
typedef enum EMergeType {
MERGE_TYPE_SORT = 1,
MERGE_TYPE_NON_SORT,
MERGE_TYPE_COLUMNS,
MERGE_TYPE_MAX_VALUE
} EMergeType;
typedef struct SLogicNode { typedef struct SLogicNode {
ENodeType type; ENodeType type;
bool dynamicOp; bool dynamicOp;
@ -138,6 +145,7 @@ typedef struct SAggLogicNode {
bool hasGroupKeyOptimized; bool hasGroupKeyOptimized;
bool isGroupTb; bool isGroupTb;
bool isPartTb; // true if partition keys has tbname bool isPartTb; // true if partition keys has tbname
bool hasGroup;
} SAggLogicNode; } SAggLogicNode;
typedef struct SProjectLogicNode { typedef struct SProjectLogicNode {
@ -221,6 +229,8 @@ typedef struct SMergeLogicNode {
SNodeList* pInputs; SNodeList* pInputs;
int32_t numOfChannels; int32_t numOfChannels;
int32_t srcGroupId; int32_t srcGroupId;
bool colsMerge;
bool needSort;
bool groupSort; bool groupSort;
bool ignoreGroupId; bool ignoreGroupId;
bool inputWithGroupId; bool inputWithGroupId;
@ -388,6 +398,7 @@ typedef struct SLastRowScanPhysiNode {
SNodeList* pGroupTags; SNodeList* pGroupTags;
bool groupSort; bool groupSort;
bool ignoreNull; bool ignoreNull;
SNodeList* pTargets;
} SLastRowScanPhysiNode; } SLastRowScanPhysiNode;
typedef SLastRowScanPhysiNode STableCountScanPhysiNode; typedef SLastRowScanPhysiNode STableCountScanPhysiNode;
@ -531,6 +542,7 @@ typedef struct SExchangePhysiNode {
typedef struct SMergePhysiNode { typedef struct SMergePhysiNode {
SPhysiNode node; SPhysiNode node;
EMergeType type;
SNodeList* pMergeKeys; SNodeList* pMergeKeys;
SNodeList* pTargets; SNodeList* pTargets;
int32_t numOfChannels; int32_t numOfChannels;

View File

@ -304,6 +304,7 @@ typedef struct SCheckpointInfo {
int64_t startTs; int64_t startTs;
int64_t checkpointId; int64_t checkpointId;
int64_t checkpointVer; // latest checkpointId version int64_t checkpointVer; // latest checkpointId version
int64_t processedVer; // already processed ver, that has generated results version.
int64_t nextProcessVer; // current offset in WAL, not serialize it int64_t nextProcessVer; // current offset in WAL, not serialize it
int64_t failedId; // record the latest failed checkpoint id int64_t failedId; // record the latest failed checkpoint id
} SCheckpointInfo; } SCheckpointInfo;
@ -460,7 +461,7 @@ typedef struct STaskStartInfo {
int32_t taskStarting; // restart flag, sentinel to guard the restart procedure. int32_t taskStarting; // restart flag, sentinel to guard the restart procedure.
SHashObj* pReadyTaskSet; // tasks that are all ready for running stream processing SHashObj* pReadyTaskSet; // tasks that are all ready for running stream processing
SHashObj* pFailedTaskSet; // tasks that are done the check downstream process, may be successful or failed SHashObj* pFailedTaskSet; // tasks that are done the check downstream process, may be successful or failed
int32_t elapsedTime; int64_t elapsedTime;
} STaskStartInfo; } STaskStartInfo;
typedef struct STaskUpdateInfo { typedef struct STaskUpdateInfo {
@ -826,6 +827,7 @@ void streamMetaReleaseTask(SStreamMeta* pMeta, SStreamTask* pTask);
int32_t streamMetaReopen(SStreamMeta* pMeta); int32_t streamMetaReopen(SStreamMeta* pMeta);
int32_t streamMetaCommit(SStreamMeta* pMeta); int32_t streamMetaCommit(SStreamMeta* pMeta);
int32_t streamMetaLoadAllTasks(SStreamMeta* pMeta); int32_t streamMetaLoadAllTasks(SStreamMeta* pMeta);
int64_t streamMetaGetLatestCheckpointId(SStreamMeta* pMeta);
void streamMetaNotifyClose(SStreamMeta* pMeta); void streamMetaNotifyClose(SStreamMeta* pMeta);
void streamMetaStartHb(SStreamMeta* pMeta); void streamMetaStartHb(SStreamMeta* pMeta);
bool streamMetaTaskInTimer(SStreamMeta* pMeta); bool streamMetaTaskInTimer(SStreamMeta* pMeta);
@ -839,6 +841,7 @@ void streamMetaResetStartInfo(STaskStartInfo* pMeta);
// checkpoint // checkpoint
int32_t streamProcessCheckpointSourceReq(SStreamTask* pTask, SStreamCheckpointSourceReq* pReq); int32_t streamProcessCheckpointSourceReq(SStreamTask* pTask, SStreamCheckpointSourceReq* pReq);
int32_t streamProcessCheckpointReadyMsg(SStreamTask* pTask); int32_t streamProcessCheckpointReadyMsg(SStreamTask* pTask);
int32_t streamTaskBuildCheckpoint(SStreamTask* pTask);
void streamTaskClearCheckInfo(SStreamTask* pTask); void streamTaskClearCheckInfo(SStreamTask* pTask);
int32_t streamAlignTransferState(SStreamTask* pTask); int32_t streamAlignTransferState(SStreamTask* pTask);
int32_t streamBuildAndSendDropTaskMsg(SMsgCb* pMsgCb, int32_t vgId, SStreamTaskId* pTaskId); int32_t streamBuildAndSendDropTaskMsg(SMsgCb* pMsgCb, int32_t vgId, SStreamTaskId* pTaskId);

View File

@ -36,11 +36,12 @@ extern int64_t tsStreamMax;
extern float tsNumOfCores; extern float tsNumOfCores;
extern int64_t tsTotalMemoryKB; extern int64_t tsTotalMemoryKB;
extern char *tsProcPath; extern char *tsProcPath;
extern char tsSIMDBuiltins; extern char tsSIMDEnable;
extern char tsSSE42Enable; extern char tsSSE42Enable;
extern char tsAVXEnable; extern char tsAVXEnable;
extern char tsAVX2Enable; extern char tsAVX2Enable;
extern char tsFMAEnable; extern char tsFMAEnable;
extern char tsAVX512Enable;
extern char tsTagFilterCache; extern char tsTagFilterCache;
extern char configDir[]; extern char configDir[];

View File

@ -41,7 +41,7 @@ int32_t taosGetOsReleaseName(char *releaseName, char* sName, char* ver, int32_t
int32_t taosGetCpuInfo(char *cpuModel, int32_t maxLen, float *numOfCores); int32_t taosGetCpuInfo(char *cpuModel, int32_t maxLen, float *numOfCores);
int32_t taosGetCpuCores(float *numOfCores, bool physical); int32_t taosGetCpuCores(float *numOfCores, bool physical);
void taosGetCpuUsage(double *cpu_system, double *cpu_engine); void taosGetCpuUsage(double *cpu_system, double *cpu_engine);
int32_t taosGetCpuInstructions(char* sse42, char* avx, char* avx2, char* fma); int32_t taosGetCpuInstructions(char* sse42, char* avx, char* avx2, char* fma, char* avx512);
int32_t taosGetTotalMemory(int64_t *totalKB); int32_t taosGetTotalMemory(int64_t *totalKB);
int32_t taosGetProcMemory(int64_t *usedKB); int32_t taosGetProcMemory(int64_t *usedKB);
int32_t taosGetSysMemory(int64_t *usedKB); int32_t taosGetSysMemory(int64_t *usedKB);

View File

@ -754,6 +754,7 @@ int32_t* taosGetErrno();
#define TSDB_CODE_FUNC_DUP_TIMESTAMP TAOS_DEF_ERROR_CODE(0, 0x2805) #define TSDB_CODE_FUNC_DUP_TIMESTAMP TAOS_DEF_ERROR_CODE(0, 0x2805)
#define TSDB_CODE_FUNC_TO_TIMESTAMP_FAILED_FORMAT_ERR TAOS_DEF_ERROR_CODE(0, 0x2806) #define TSDB_CODE_FUNC_TO_TIMESTAMP_FAILED_FORMAT_ERR TAOS_DEF_ERROR_CODE(0, 0x2806)
#define TSDB_CODE_FUNC_TO_TIMESTAMP_FAILED_TS_ERR TAOS_DEF_ERROR_CODE(0, 0x2807) #define TSDB_CODE_FUNC_TO_TIMESTAMP_FAILED_TS_ERR TAOS_DEF_ERROR_CODE(0, 0x2807)
#define TSDB_CODE_FUNC_TO_TIMESTAMP_FAILED_NOT_SUPPORTED TAOS_DEF_ERROR_CODE(0, 0x2808)
//udf //udf
#define TSDB_CODE_UDF_STOPPING TAOS_DEF_ERROR_CODE(0, 0x2901) #define TSDB_CODE_UDF_STOPPING TAOS_DEF_ERROR_CODE(0, 0x2901)

View File

@ -25,7 +25,7 @@ static S3UriStyle uriStyleG = S3UriStylePath;
static int retriesG = 5; static int retriesG = 5;
static int timeoutMsG = 0; static int timeoutMsG = 0;
static int32_t s3Begin() { int32_t s3Begin() {
S3Status status; S3Status status;
const char *hostname = tsS3Hostname; const char *hostname = tsS3Hostname;
const char *env_hn = getenv("S3_HOSTNAME"); const char *env_hn = getenv("S3_HOSTNAME");
@ -44,10 +44,12 @@ static int32_t s3Begin() {
return 0; return 0;
} }
static void s3End() { S3_deinitialize(); } void s3End() { S3_deinitialize(); }
int32_t s3Init() { return s3Begin(); }
void s3CleanUp() { s3End(); } int32_t s3Init() { return 0; /*s3Begin();*/ }
void s3CleanUp() { /*s3End();*/
}
static int should_retry() { static int should_retry() {
/* /*
@ -73,8 +75,9 @@ static void s3PrintError(const char *func, S3Status status, char error_details[]
} }
typedef struct { typedef struct {
char err_msg[128]; char err_msg[512];
S3Status status; S3Status status;
uint64_t content_length;
TdFilePtr file; TdFilePtr file;
} TS3GetData; } TS3GetData;
@ -83,10 +86,11 @@ typedef struct {
S3Status status; S3Status status;
uint64_t content_length; uint64_t content_length;
char *buf; char *buf;
int64_t buf_pos;
} TS3SizeCBD; } TS3SizeCBD;
static S3Status responsePropertiesCallbackNull(const S3ResponseProperties *properties, void *callbackData) { static S3Status responsePropertiesCallbackNull(const S3ResponseProperties *properties, void *callbackData) {
// (void)callbackData; // (void)callbackData;
return S3StatusOK; return S3StatusOK;
} }
@ -109,20 +113,22 @@ static void responseCompleteCallback(S3Status status, const S3ErrorDetails *erro
int len = 0; int len = 0;
const int elen = sizeof(cbd->err_msg); const int elen = sizeof(cbd->err_msg);
if (error) { if (error) {
if (error->message) { if (error->message && elen - len > 0) {
len += snprintf(&(cbd->err_msg[len]), elen - len, " Message: %s\n", error->message); len += snprintf(&(cbd->err_msg[len]), elen - len, " Message: %s\n", error->message);
} }
if (error->resource) { if (error->resource && elen - len > 0) {
len += snprintf(&(cbd->err_msg[len]), elen - len, " Resource: %s\n", error->resource); len += snprintf(&(cbd->err_msg[len]), elen - len, " Resource: %s\n", error->resource);
} }
if (error->furtherDetails) { if (error->furtherDetails && elen - len > 0) {
len += snprintf(&(cbd->err_msg[len]), elen - len, " Further Details: %s\n", error->furtherDetails); len += snprintf(&(cbd->err_msg[len]), elen - len, " Further Details: %s\n", error->furtherDetails);
} }
if (error->extraDetailsCount) { if (error->extraDetailsCount && elen - len > 0) {
len += snprintf(&(cbd->err_msg[len]), elen - len, "%s", " Extra Details:\n"); len += snprintf(&(cbd->err_msg[len]), elen - len, "%s", " Extra Details:\n");
for (int i = 0; i < error->extraDetailsCount; i++) { for (int i = 0; i < error->extraDetailsCount; i++) {
len += snprintf(&(cbd->err_msg[len]), elen - len, " %s: %s\n", error->extraDetails[i].name, if (elen - len > 0) {
error->extraDetails[i].value); len += snprintf(&(cbd->err_msg[len]), elen - len, " %s: %s\n", error->extraDetails[i].name,
error->extraDetails[i].value);
}
} }
} }
} }
@ -213,8 +219,9 @@ static void growbuffer_destroy(growbuffer *gb) {
} }
typedef struct put_object_callback_data { typedef struct put_object_callback_data {
char err_msg[128]; char err_msg[512];
S3Status status; S3Status status;
uint64_t content_length;
// FILE *infile; // FILE *infile;
TdFilePtr infileFD; TdFilePtr infileFD;
growbuffer *gb; growbuffer *gb;
@ -226,8 +233,9 @@ typedef struct put_object_callback_data {
#define MULTIPART_CHUNK_SIZE (768 << 20) // multipart is 768M #define MULTIPART_CHUNK_SIZE (768 << 20) // multipart is 768M
typedef struct UploadManager { typedef struct UploadManager {
char err_msg[128]; char err_msg[512];
S3Status status; S3Status status;
uint64_t content_length;
// used for initial multipart // used for initial multipart
char *upload_id; char *upload_id;
@ -241,8 +249,9 @@ typedef struct UploadManager {
} UploadManager; } UploadManager;
typedef struct list_parts_callback_data { typedef struct list_parts_callback_data {
char err_msg[128]; char err_msg[512];
S3Status status; S3Status status;
uint64_t content_length;
int isTruncated; int isTruncated;
char nextPartNumberMarker[24]; char nextPartNumberMarker[24];
char initiatorId[256]; char initiatorId[256];
@ -258,8 +267,6 @@ typedef struct list_parts_callback_data {
} list_parts_callback_data; } list_parts_callback_data;
typedef struct MultipartPartData { typedef struct MultipartPartData {
char err_msg[128];
S3Status status;
put_object_callback_data put_object_data; put_object_callback_data put_object_data;
int seq; int seq;
UploadManager *manager; UploadManager *manager;
@ -267,11 +274,12 @@ typedef struct MultipartPartData {
static int putObjectDataCallback(int bufferSize, char *buffer, void *callbackData) { static int putObjectDataCallback(int bufferSize, char *buffer, void *callbackData) {
put_object_callback_data *data = (put_object_callback_data *)callbackData; put_object_callback_data *data = (put_object_callback_data *)callbackData;
/*
if (data->infileFD == 0) { if (data->infileFD == 0) {
MultipartPartData *mpd = (MultipartPartData *)callbackData; MultipartPartData *mpd = (MultipartPartData *)callbackData;
data = &mpd->put_object_data; data = &mpd->put_object_data;
} }
*/
int ret = 0; int ret = 0;
if (data->contentLength) { if (data->contentLength) {
@ -402,7 +410,8 @@ static int try_get_parts_info(const char *bucketName, const char *key, UploadMan
S3BucketContext bucketContext = {0, tsS3BucketName, protocolG, uriStyleG, tsS3AccessKeyId, tsS3AccessKeySecret, S3BucketContext bucketContext = {0, tsS3BucketName, protocolG, uriStyleG, tsS3AccessKeyId, tsS3AccessKeySecret,
0, awsRegionG}; 0, awsRegionG};
S3ListPartsHandler listPartsHandler = {{&responsePropertiesCallbackNull, &responseCompleteCallback}, &listPartsCallback}; S3ListPartsHandler listPartsHandler = {{&responsePropertiesCallbackNull, &responseCompleteCallback},
&listPartsCallback};
list_parts_callback_data data; list_parts_callback_data data;
@ -448,13 +457,13 @@ int32_t s3PutObjectFromFile2(const char *file, const char *object) {
int metaPropertiesCount = 0; int metaPropertiesCount = 0;
S3NameValue metaProperties[S3_MAX_METADATA_COUNT]; S3NameValue metaProperties[S3_MAX_METADATA_COUNT];
char useServerSideEncryption = 0; char useServerSideEncryption = 0;
int noStatus = 0; put_object_callback_data data = {0};
put_object_callback_data data; // int noStatus = 0;
// data.infile = 0; // data.infile = 0;
data.infileFD = NULL; // data.gb = 0;
data.gb = 0; // data.infileFD = NULL;
data.noStatus = noStatus; // data.noStatus = noStatus;
if (taosStatFile(file, &contentLength, NULL, NULL) < 0) { if (taosStatFile(file, &contentLength, NULL, NULL) < 0) {
uError("ERROR: %s Failed to stat file %s: ", __func__, file); uError("ERROR: %s Failed to stat file %s: ", __func__, file);
@ -571,9 +580,9 @@ int32_t s3PutObjectFromFile2(const char *file, const char *object) {
do { do {
S3_upload_part(&bucketContext, key, &putProperties, &putObjectHandler, seq, manager.upload_id, S3_upload_part(&bucketContext, key, &putProperties, &putObjectHandler, seq, manager.upload_id,
partContentLength, 0, timeoutMsG, &partData); partContentLength, 0, timeoutMsG, &partData);
} while (S3_status_is_retryable(partData.status) && should_retry()); } while (S3_status_is_retryable(partData.put_object_data.status) && should_retry());
if (partData.status != S3StatusOK) { if (partData.put_object_data.status != S3StatusOK) {
s3PrintError(__func__, partData.status, partData.err_msg); s3PrintError(__func__, partData.put_object_data.status, partData.put_object_data.err_msg);
code = TAOS_SYSTEM_ERROR(EIO); code = TAOS_SYSTEM_ERROR(EIO);
goto clean; goto clean;
} }
@ -621,7 +630,7 @@ int32_t s3PutObjectFromFile2(const char *file, const char *object) {
} }
typedef struct list_bucket_callback_data { typedef struct list_bucket_callback_data {
char err_msg[128]; char err_msg[512];
S3Status status; S3Status status;
int isTruncated; int isTruncated;
char nextMarker[1024]; char nextMarker[1024];
@ -670,7 +679,7 @@ static void s3FreeObjectKey(void *pItem) {
taosMemoryFree(key); taosMemoryFree(key);
} }
static SArray* getListByPrefix(const char *prefix){ static SArray *getListByPrefix(const char *prefix) {
S3BucketContext bucketContext = {0, tsS3BucketName, protocolG, uriStyleG, tsS3AccessKeyId, tsS3AccessKeySecret, S3BucketContext bucketContext = {0, tsS3BucketName, protocolG, uriStyleG, tsS3AccessKeyId, tsS3AccessKeySecret,
0, awsRegionG}; 0, awsRegionG};
S3ListBucketHandler listBucketHandler = {{&responsePropertiesCallbackNull, &responseCompleteCallback}, S3ListBucketHandler listBucketHandler = {{&responsePropertiesCallbackNull, &responseCompleteCallback},
@ -679,7 +688,7 @@ static SArray* getListByPrefix(const char *prefix){
const char *marker = 0, *delimiter = 0; const char *marker = 0, *delimiter = 0;
int maxkeys = 0, allDetails = 0; int maxkeys = 0, allDetails = 0;
list_bucket_callback_data data; list_bucket_callback_data data;
data.objectArray = taosArrayInit(32, sizeof(void*)); data.objectArray = taosArrayInit(32, sizeof(void *));
if (!data.objectArray) { if (!data.objectArray) {
uError("%s: %s", __func__, "out of memoty"); uError("%s: %s", __func__, "out of memoty");
return NULL; return NULL;
@ -731,23 +740,27 @@ void s3DeleteObjects(const char *object_name[], int nobject) {
} }
void s3DeleteObjectsByPrefix(const char *prefix) { void s3DeleteObjectsByPrefix(const char *prefix) {
SArray* objectArray = getListByPrefix(prefix); SArray *objectArray = getListByPrefix(prefix);
if(objectArray == NULL)return; if (objectArray == NULL) return;
s3DeleteObjects(TARRAY_DATA(objectArray), TARRAY_SIZE(objectArray)); s3DeleteObjects(TARRAY_DATA(objectArray), TARRAY_SIZE(objectArray));
taosArrayDestroyEx(objectArray, s3FreeObjectKey); taosArrayDestroyEx(objectArray, s3FreeObjectKey);
} }
static S3Status getObjectDataCallback(int bufferSize, const char *buffer, void *callbackData) { static S3Status getObjectDataCallback(int bufferSize, const char *buffer, void *callbackData) {
TS3SizeCBD *cbd = callbackData; TS3SizeCBD *cbd = callbackData;
/*
if (cbd->content_length != bufferSize) { if (cbd->content_length != bufferSize) {
cbd->status = S3StatusAbortedByCallback; cbd->status = S3StatusAbortedByCallback;
return S3StatusAbortedByCallback; return S3StatusAbortedByCallback;
} }
*/
if (!cbd->buf) {
cbd->buf = taosMemoryCalloc(1, cbd->content_length);
}
char *buf = taosMemoryCalloc(1, bufferSize); if (cbd->buf) {
if (buf) { memcpy(cbd->buf + cbd->buf_pos, buffer, bufferSize);
memcpy(buf, buffer, bufferSize); cbd->buf_pos += bufferSize;
cbd->buf = buf;
cbd->status = S3StatusOK; cbd->status = S3StatusOK;
return S3StatusOK; return S3StatusOK;
} else { } else {
@ -756,7 +769,7 @@ static S3Status getObjectDataCallback(int bufferSize, const char *buffer, void *
} }
} }
int32_t s3GetObjectBlock(const char *object_name, int64_t offset, int64_t size, uint8_t **ppBlock) { int32_t s3GetObjectBlock(const char *object_name, int64_t offset, int64_t size, bool check, uint8_t **ppBlock) {
int status = 0; int status = 0;
int64_t ifModifiedSince = -1, ifNotModifiedSince = -1; int64_t ifModifiedSince = -1, ifNotModifiedSince = -1;
const char *ifMatch = 0, *ifNotMatch = 0; const char *ifMatch = 0, *ifNotMatch = 0;
@ -769,6 +782,7 @@ int32_t s3GetObjectBlock(const char *object_name, int64_t offset, int64_t size,
TS3SizeCBD cbd = {0}; TS3SizeCBD cbd = {0};
cbd.content_length = size; cbd.content_length = size;
cbd.buf_pos = 0;
do { do {
S3_get_object(&bucketContext, object_name, &getConditions, offset, size, 0, 0, &getObjectHandler, &cbd); S3_get_object(&bucketContext, object_name, &getConditions, offset, size, 0, 0, &getObjectHandler, &cbd);
} while (S3_status_is_retryable(cbd.status) && should_retry()); } while (S3_status_is_retryable(cbd.status) && should_retry());
@ -778,19 +792,23 @@ int32_t s3GetObjectBlock(const char *object_name, int64_t offset, int64_t size,
return TAOS_SYSTEM_ERROR(EIO); return TAOS_SYSTEM_ERROR(EIO);
} }
if (check && cbd.buf_pos != size) {
uError("%s: %d(%s)", __func__, cbd.status, cbd.err_msg);
return TAOS_SYSTEM_ERROR(EIO);
}
*ppBlock = cbd.buf; *ppBlock = cbd.buf;
return 0; return 0;
} }
static S3Status getObjectCallback(int bufferSize, const char *buffer, void *callbackData) { static S3Status getObjectCallback(int bufferSize, const char *buffer, void *callbackData) {
TS3GetData *cbd = (TS3GetData *) callbackData; TS3GetData *cbd = (TS3GetData *)callbackData;
size_t wrote = taosWriteFile(cbd->file, buffer, bufferSize); size_t wrote = taosWriteFile(cbd->file, buffer, bufferSize);
return ((wrote < (size_t) bufferSize) ? return ((wrote < (size_t)bufferSize) ? S3StatusAbortedByCallback : S3StatusOK);
S3StatusAbortedByCallback : S3StatusOK);
} }
int32_t s3GetObjectToFile(const char *object_name, char* fileName) { int32_t s3GetObjectToFile(const char *object_name, char *fileName) {
int64_t ifModifiedSince = -1, ifNotModifiedSince = -1; int64_t ifModifiedSince = -1, ifNotModifiedSince = -1;
const char *ifMatch = 0, *ifNotMatch = 0; const char *ifMatch = 0, *ifNotMatch = 0;
@ -821,21 +839,21 @@ int32_t s3GetObjectToFile(const char *object_name, char* fileName) {
return 0; return 0;
} }
int32_t s3GetObjectsByPrefix(const char *prefix, const char* path){ int32_t s3GetObjectsByPrefix(const char *prefix, const char *path) {
SArray* objectArray = getListByPrefix(prefix); SArray *objectArray = getListByPrefix(prefix);
if(objectArray == NULL) return -1; if (objectArray == NULL) return -1;
for(size_t i = 0; i < taosArrayGetSize(objectArray); i++){ for (size_t i = 0; i < taosArrayGetSize(objectArray); i++) {
char* object = taosArrayGetP(objectArray, i); char *object = taosArrayGetP(objectArray, i);
const char* tmp = strchr(object, '/'); const char *tmp = strchr(object, '/');
tmp = (tmp == NULL) ? object : tmp + 1; tmp = (tmp == NULL) ? object : tmp + 1;
char fileName[PATH_MAX] = {0}; char fileName[PATH_MAX] = {0};
if(path[strlen(path) - 1] != TD_DIRSEP_CHAR){ if (path[strlen(path) - 1] != TD_DIRSEP_CHAR) {
snprintf(fileName, PATH_MAX, "%s%s%s", path, TD_DIRSEP, tmp); snprintf(fileName, PATH_MAX, "%s%s%s", path, TD_DIRSEP, tmp);
}else{ } else {
snprintf(fileName, PATH_MAX, "%s%s", path, tmp); snprintf(fileName, PATH_MAX, "%s%s", path, tmp);
} }
if(s3GetObjectToFile(object, fileName) != 0){ if (s3GetObjectToFile(object, fileName) != 0) {
taosArrayDestroyEx(objectArray, s3FreeObjectKey); taosArrayDestroyEx(objectArray, s3FreeObjectKey);
return -1; return -1;
} }
@ -1122,7 +1140,8 @@ bool s3Get(const char *object_name, const char *path) {
return ret; return ret;
} }
int32_t s3GetObjectBlock(const char *object_name, int64_t offset, int64_t block_size, uint8_t **ppBlock) { int32_t s3GetObjectBlock(const char *object_name, int64_t offset, int64_t block_size, bool check, uint8_t **ppBlock) {
(void)check;
int32_t code = 0; int32_t code = 0;
cos_pool_t *p = NULL; cos_pool_t *p = NULL;
int is_cname = 0; int is_cname = 0;
@ -1314,9 +1333,11 @@ void s3DeleteObjectsByPrefix(const char *prefix) {}
void s3DeleteObjects(const char *object_name[], int nobject) {} void s3DeleteObjects(const char *object_name[], int nobject) {}
bool s3Exists(const char *object_name) { return false; } bool s3Exists(const char *object_name) { return false; }
bool s3Get(const char *object_name, const char *path) { return false; } bool s3Get(const char *object_name, const char *path) { return false; }
int32_t s3GetObjectsByPrefix(const char *prefix, const char* path) { return 0; } int32_t s3GetObjectBlock(const char *object_name, int64_t offset, int64_t size, bool check, uint8_t **ppBlock) {
int32_t s3GetObjectBlock(const char *object_name, int64_t offset, int64_t size, uint8_t **ppBlock) { return 0; } return 0;
}
void s3EvictCache(const char *path, long object_size) {} void s3EvictCache(const char *path, long object_size) {}
long s3Size(const char *object_name) { return 0; } long s3Size(const char *object_name) { return 0; }
int32_t s3GetObjectsByPrefix(const char *prefix, const char *path) { return 0; }
#endif #endif

View File

@ -2121,6 +2121,7 @@ _end:
char* buildCtbNameByGroupId(const char* stbFullName, uint64_t groupId) { char* buildCtbNameByGroupId(const char* stbFullName, uint64_t groupId) {
char* pBuf = taosMemoryCalloc(1, TSDB_TABLE_NAME_LEN + 1); char* pBuf = taosMemoryCalloc(1, TSDB_TABLE_NAME_LEN + 1);
if (!pBuf) { if (!pBuf) {
terrno = TSDB_CODE_OUT_OF_MEMORY;
return NULL; return NULL;
} }
int32_t code = buildCtbNameByGroupIdImpl(stbFullName, groupId, pBuf); int32_t code = buildCtbNameByGroupIdImpl(stbFullName, groupId, pBuf);
@ -2133,6 +2134,7 @@ char* buildCtbNameByGroupId(const char* stbFullName, uint64_t groupId) {
int32_t buildCtbNameByGroupIdImpl(const char* stbFullName, uint64_t groupId, char* cname) { int32_t buildCtbNameByGroupIdImpl(const char* stbFullName, uint64_t groupId, char* cname) {
if (stbFullName[0] == 0) { if (stbFullName[0] == 0) {
terrno = TSDB_CODE_INVALID_PARA;
return TSDB_CODE_FAILED; return TSDB_CODE_FAILED;
} }
@ -2142,6 +2144,7 @@ int32_t buildCtbNameByGroupIdImpl(const char* stbFullName, uint64_t groupId, cha
} }
if (cname == NULL) { if (cname == NULL) {
terrno = TSDB_CODE_INVALID_PARA;
taosArrayDestroy(tags); taosArrayDestroy(tags);
return TSDB_CODE_FAILED; return TSDB_CODE_FAILED;
} }

View File

@ -94,8 +94,8 @@ int32_t tsMonitorMaxLogs = 100;
bool tsMonitorComp = false; bool tsMonitorComp = false;
// audit // audit
bool tsEnableAudit = true; bool tsEnableAudit = true;
bool tsEnableAuditCreateTable = true; bool tsEnableAuditCreateTable = true;
// telem // telem
#ifdef TD_ENTERPRISE #ifdef TD_ENTERPRISE
@ -128,12 +128,12 @@ char tsSmlAutoChildTableNameDelimiter[TSDB_TABLE_NAME_LEN] = "";
// int32_t tsSmlBatchSize = 10000; // int32_t tsSmlBatchSize = 10000;
// checkpoint backup // checkpoint backup
char tsSnodeAddress[TSDB_FQDN_LEN] = {0}; char tsSnodeAddress[TSDB_FQDN_LEN] = {0};
int32_t tsRsyncPort = 873; int32_t tsRsyncPort = 873;
#ifdef WINDOWS #ifdef WINDOWS
char tsCheckpointBackupDir[PATH_MAX] = "C:\\TDengine\\data\\backup\\checkpoint\\"; char tsCheckpointBackupDir[PATH_MAX] = "C:\\TDengine\\data\\backup\\checkpoint\\";
#else #else
char tsCheckpointBackupDir[PATH_MAX] = "/var/lib/taos/backup/checkpoint/"; char tsCheckpointBackupDir[PATH_MAX] = "/var/lib/taos/backup/checkpoint/";
#endif #endif
// tmq // tmq
@ -231,7 +231,7 @@ char tsCompressor[32] = "ZSTD_COMPRESSOR"; // ZSTD_COMPRESSOR or GZIP_COMPR
#ifdef WINDOWS #ifdef WINDOWS
bool tsStartUdfd = false; bool tsStartUdfd = false;
#else #else
bool tsStartUdfd = true; bool tsStartUdfd = true;
#endif #endif
// wal // wal
@ -249,7 +249,7 @@ int32_t tsTransPullupInterval = 2;
int32_t tsMqRebalanceInterval = 2; int32_t tsMqRebalanceInterval = 2;
int32_t tsStreamCheckpointInterval = 60; int32_t tsStreamCheckpointInterval = 60;
float tsSinkDataRate = 2.0; float tsSinkDataRate = 2.0;
int32_t tsStreamNodeCheckInterval = 30; int32_t tsStreamNodeCheckInterval = 15;
int32_t tsTtlUnit = 86400; int32_t tsTtlUnit = 86400;
int32_t tsTtlPushIntervalSec = 10; int32_t tsTtlPushIntervalSec = 10;
int32_t tsTrimVDbIntervalSec = 60 * 60; // interval of trimming db in all vgroups int32_t tsTrimVDbIntervalSec = 60 * 60; // interval of trimming db in all vgroups
@ -335,7 +335,7 @@ int32_t taosSetS3Cfg(SConfig *pCfg) {
} }
if (tsS3BucketName[0] != '<') { if (tsS3BucketName[0] != '<') {
#if defined(USE_COS) || defined(USE_S3) #if defined(USE_COS) || defined(USE_S3)
if(tsDiskCfgNum > 1) tsS3Enabled = true; if (tsDiskCfgNum > 1) tsS3Enabled = true;
tsS3StreamEnabled = true; tsS3StreamEnabled = true;
#endif #endif
} }
@ -343,7 +343,9 @@ int32_t taosSetS3Cfg(SConfig *pCfg) {
return 0; return 0;
} }
struct SConfig *taosGetCfg() { return tsCfg; } struct SConfig *taosGetCfg() {
return tsCfg;
}
static int32_t taosLoadCfg(SConfig *pCfg, const char **envCmd, const char *inputCfgDir, const char *envFile, static int32_t taosLoadCfg(SConfig *pCfg, const char **envCmd, const char *inputCfgDir, const char *envFile,
char *apolloUrl) { char *apolloUrl) {
@ -453,8 +455,8 @@ static int32_t taosAddClientCfg(SConfig *pCfg) {
if (cfgAddBool(pCfg, "enableScience", tsEnableScience, CFG_SCOPE_CLIENT, CFG_DYN_NONE) != 0) return -1; if (cfgAddBool(pCfg, "enableScience", tsEnableScience, CFG_SCOPE_CLIENT, CFG_DYN_NONE) != 0) return -1;
if (cfgAddInt32(pCfg, "querySmaOptimize", tsQuerySmaOptimize, 0, 1, CFG_SCOPE_CLIENT, CFG_DYN_CLIENT) != 0) return -1; if (cfgAddInt32(pCfg, "querySmaOptimize", tsQuerySmaOptimize, 0, 1, CFG_SCOPE_CLIENT, CFG_DYN_CLIENT) != 0) return -1;
if (cfgAddBool(pCfg, "queryPlannerTrace", tsQueryPlannerTrace, CFG_SCOPE_CLIENT, CFG_DYN_CLIENT) != 0) return -1; if (cfgAddBool(pCfg, "queryPlannerTrace", tsQueryPlannerTrace, CFG_SCOPE_CLIENT, CFG_DYN_CLIENT) != 0) return -1;
if (cfgAddInt32(pCfg, "queryNodeChunkSize", tsQueryNodeChunkSize, 1024, 128 * 1024, CFG_SCOPE_CLIENT, CFG_DYN_CLIENT) != if (cfgAddInt32(pCfg, "queryNodeChunkSize", tsQueryNodeChunkSize, 1024, 128 * 1024, CFG_SCOPE_CLIENT,
0) CFG_DYN_CLIENT) != 0)
return -1; return -1;
if (cfgAddBool(pCfg, "queryUseNodeAllocator", tsQueryUseNodeAllocator, CFG_SCOPE_CLIENT, CFG_DYN_CLIENT) != 0) if (cfgAddBool(pCfg, "queryUseNodeAllocator", tsQueryUseNodeAllocator, CFG_SCOPE_CLIENT, CFG_DYN_CLIENT) != 0)
return -1; return -1;
@ -470,7 +472,8 @@ static int32_t taosAddClientCfg(SConfig *pCfg) {
// if (cfgAddInt32(pCfg, "smlBatchSize", tsSmlBatchSize, 1, INT32_MAX, CFG_SCOPE_CLIENT, CFG_DYN_NONE) != 0) // if (cfgAddInt32(pCfg, "smlBatchSize", tsSmlBatchSize, 1, INT32_MAX, CFG_SCOPE_CLIENT, CFG_DYN_NONE) != 0)
// return -1; // return -1;
if (cfgAddInt32(pCfg, "maxShellConns", tsMaxShellConns, 10, 50000000, CFG_SCOPE_CLIENT, CFG_DYN_NONE) != 0) return -1; if (cfgAddInt32(pCfg, "maxShellConns", tsMaxShellConns, 10, 50000000, CFG_SCOPE_CLIENT, CFG_DYN_NONE) != 0) return -1;
if (cfgAddInt32(pCfg, "maxInsertBatchRows", tsMaxInsertBatchRows, 1, INT32_MAX, CFG_SCOPE_CLIENT, CFG_DYN_CLIENT) != 0) if (cfgAddInt32(pCfg, "maxInsertBatchRows", tsMaxInsertBatchRows, 1, INT32_MAX, CFG_SCOPE_CLIENT, CFG_DYN_CLIENT) !=
0)
return -1; return -1;
if (cfgAddInt32(pCfg, "maxRetryWaitTime", tsMaxRetryWaitTime, 0, 86400000, CFG_SCOPE_BOTH, CFG_DYN_CLIENT) != 0) if (cfgAddInt32(pCfg, "maxRetryWaitTime", tsMaxRetryWaitTime, 0, 86400000, CFG_SCOPE_BOTH, CFG_DYN_CLIENT) != 0)
return -1; return -1;
@ -526,7 +529,8 @@ static int32_t taosAddSystemCfg(SConfig *pCfg) {
if (cfgAddBool(pCfg, "avx", tsAVXEnable, CFG_SCOPE_BOTH, CFG_DYN_NONE) != 0) return -1; if (cfgAddBool(pCfg, "avx", tsAVXEnable, CFG_SCOPE_BOTH, CFG_DYN_NONE) != 0) return -1;
if (cfgAddBool(pCfg, "avx2", tsAVX2Enable, CFG_SCOPE_BOTH, CFG_DYN_NONE) != 0) return -1; if (cfgAddBool(pCfg, "avx2", tsAVX2Enable, CFG_SCOPE_BOTH, CFG_DYN_NONE) != 0) return -1;
if (cfgAddBool(pCfg, "fma", tsFMAEnable, CFG_SCOPE_BOTH, CFG_DYN_NONE) != 0) return -1; if (cfgAddBool(pCfg, "fma", tsFMAEnable, CFG_SCOPE_BOTH, CFG_DYN_NONE) != 0) return -1;
if (cfgAddBool(pCfg, "simdEnable", tsSIMDBuiltins, CFG_SCOPE_BOTH, CFG_DYN_NONE) != 0) return -1; if (cfgAddBool(pCfg, "avx512", tsAVX512Enable, CFG_SCOPE_BOTH, CFG_DYN_NONE) != 0) return -1;
if (cfgAddBool(pCfg, "simdEnable", tsSIMDEnable, CFG_SCOPE_BOTH, CFG_DYN_NONE) != 0) return -1;
if (cfgAddBool(pCfg, "tagFilterCache", tsTagFilterCache, CFG_SCOPE_BOTH, CFG_DYN_NONE) != 0) return -1; if (cfgAddBool(pCfg, "tagFilterCache", tsTagFilterCache, CFG_SCOPE_BOTH, CFG_DYN_NONE) != 0) return -1;
if (cfgAddInt64(pCfg, "openMax", tsOpenMax, 0, INT64_MAX, CFG_SCOPE_BOTH, CFG_DYN_NONE) != 0) return -1; if (cfgAddInt64(pCfg, "openMax", tsOpenMax, 0, INT64_MAX, CFG_SCOPE_BOTH, CFG_DYN_NONE) != 0) return -1;
@ -557,7 +561,8 @@ static int32_t taosAddServerCfg(SConfig *pCfg) {
if (cfgAddInt32(pCfg, "supportVnodes", tsNumOfSupportVnodes, 0, 4096, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; if (cfgAddInt32(pCfg, "supportVnodes", tsNumOfSupportVnodes, 0, 4096, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1;
if (cfgAddInt32(pCfg, "statusInterval", tsStatusInterval, 1, 30, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; if (cfgAddInt32(pCfg, "statusInterval", tsStatusInterval, 1, 30, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1;
if (cfgAddInt32(pCfg, "minSlidingTime", tsMinSlidingTime, 1, 1000000, CFG_SCOPE_CLIENT, CFG_DYN_CLIENT) != 0) return -1; if (cfgAddInt32(pCfg, "minSlidingTime", tsMinSlidingTime, 1, 1000000, CFG_SCOPE_CLIENT, CFG_DYN_CLIENT) != 0)
return -1;
if (cfgAddInt32(pCfg, "minIntervalTime", tsMinIntervalTime, 1, 1000000, CFG_SCOPE_CLIENT, CFG_DYN_CLIENT) != 0) if (cfgAddInt32(pCfg, "minIntervalTime", tsMinIntervalTime, 1, 1000000, CFG_SCOPE_CLIENT, CFG_DYN_CLIENT) != 0)
return -1; return -1;
@ -605,7 +610,7 @@ static int32_t taosAddServerCfg(SConfig *pCfg) {
0) 0)
return -1; return -1;
tsNumOfVnodeRsmaThreads = tsNumOfCores; tsNumOfVnodeRsmaThreads = tsNumOfCores / 4;
tsNumOfVnodeRsmaThreads = TMAX(tsNumOfVnodeRsmaThreads, 4); tsNumOfVnodeRsmaThreads = TMAX(tsNumOfVnodeRsmaThreads, 4);
if (cfgAddInt32(pCfg, "numOfVnodeRsmaThreads", tsNumOfVnodeRsmaThreads, 1, 1024, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) if (cfgAddInt32(pCfg, "numOfVnodeRsmaThreads", tsNumOfVnodeRsmaThreads, 1, 1024, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0)
return -1; return -1;
@ -674,7 +679,8 @@ static int32_t taosAddServerCfg(SConfig *pCfg) {
if (cfgAddInt32(pCfg, "rsyncPort", tsRsyncPort, 1, 65535, CFG_SCOPE_BOTH, CFG_DYN_SERVER) != 0) return -1; if (cfgAddInt32(pCfg, "rsyncPort", tsRsyncPort, 1, 65535, CFG_SCOPE_BOTH, CFG_DYN_SERVER) != 0) return -1;
if (cfgAddString(pCfg, "snodeAddress", tsSnodeAddress, CFG_SCOPE_SERVER, CFG_DYN_SERVER) != 0) return -1; if (cfgAddString(pCfg, "snodeAddress", tsSnodeAddress, CFG_SCOPE_SERVER, CFG_DYN_SERVER) != 0) return -1;
if (cfgAddString(pCfg, "checkpointBackupDir", tsCheckpointBackupDir, CFG_SCOPE_SERVER, CFG_DYN_SERVER) != 0) return -1; if (cfgAddString(pCfg, "checkpointBackupDir", tsCheckpointBackupDir, CFG_SCOPE_SERVER, CFG_DYN_SERVER) != 0)
return -1;
if (cfgAddInt32(pCfg, "tmqMaxTopicNum", tmqMaxTopicNum, 1, 10000, CFG_SCOPE_SERVER, CFG_DYN_ENT_SERVER) != 0) if (cfgAddInt32(pCfg, "tmqMaxTopicNum", tmqMaxTopicNum, 1, 10000, CFG_SCOPE_SERVER, CFG_DYN_ENT_SERVER) != 0)
return -1; return -1;
@ -700,7 +706,8 @@ static int32_t taosAddServerCfg(SConfig *pCfg) {
if (cfgAddInt32(pCfg, "uptimeInterval", tsUptimeInterval, 1, 100000, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; if (cfgAddInt32(pCfg, "uptimeInterval", tsUptimeInterval, 1, 100000, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1;
if (cfgAddInt32(pCfg, "queryRsmaTolerance", tsQueryRsmaTolerance, 0, 900000, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) if (cfgAddInt32(pCfg, "queryRsmaTolerance", tsQueryRsmaTolerance, 0, 900000, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0)
return -1; return -1;
if (cfgAddInt32(pCfg, "timeseriesThreshold", tsTimeSeriesThreshold, 0, 2000, CFG_SCOPE_SERVER, CFG_DYN_ENT_SERVER) != 0) if (cfgAddInt32(pCfg, "timeseriesThreshold", tsTimeSeriesThreshold, 0, 2000, CFG_SCOPE_SERVER, CFG_DYN_ENT_SERVER) !=
0)
return -1; return -1;
if (cfgAddInt64(pCfg, "walFsyncDataSizeLimit", tsWalFsyncDataSizeLimit, 100 * 1024 * 1024, INT64_MAX, if (cfgAddInt64(pCfg, "walFsyncDataSizeLimit", tsWalFsyncDataSizeLimit, 100 * 1024 * 1024, INT64_MAX,
@ -743,7 +750,7 @@ static int32_t taosAddServerCfg(SConfig *pCfg) {
if (cfgAddString(pCfg, "s3Accesskey", tsS3AccessKey, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; if (cfgAddString(pCfg, "s3Accesskey", tsS3AccessKey, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1;
if (cfgAddString(pCfg, "s3Endpoint", tsS3Endpoint, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; if (cfgAddString(pCfg, "s3Endpoint", tsS3Endpoint, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1;
if (cfgAddString(pCfg, "s3BucketName", tsS3BucketName, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1; if (cfgAddString(pCfg, "s3BucketName", tsS3BucketName, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) return -1;
if (cfgAddInt32(pCfg, "s3BlockSize", tsS3BlockSize, -100, 1024 * 1024, CFG_SCOPE_SERVER, CFG_DYN_NONE) != 0) if (cfgAddInt32(pCfg, "s3BlockSize", tsS3BlockSize, -1, 1024 * 1024, CFG_SCOPE_SERVER, CFG_DYN_ENT_SERVER) != 0)
return -1; return -1;
if (cfgAddInt32(pCfg, "s3BlockCacheSize", tsS3BlockCacheSize, 4, 1024 * 1024, CFG_SCOPE_SERVER, CFG_DYN_ENT_SERVER) != if (cfgAddInt32(pCfg, "s3BlockCacheSize", tsS3BlockCacheSize, 4, 1024 * 1024, CFG_SCOPE_SERVER, CFG_DYN_ENT_SERVER) !=
0) 0)
@ -1095,7 +1102,7 @@ static int32_t taosSetServerCfg(SConfig *pCfg) {
tsNumOfSnodeWriteThreads = cfgGetItem(pCfg, "numOfSnodeUniqueThreads")->i32; tsNumOfSnodeWriteThreads = cfgGetItem(pCfg, "numOfSnodeUniqueThreads")->i32;
tsRpcQueueMemoryAllowed = cfgGetItem(pCfg, "rpcQueueMemoryAllowed")->i64; tsRpcQueueMemoryAllowed = cfgGetItem(pCfg, "rpcQueueMemoryAllowed")->i64;
tsSIMDBuiltins = (bool)cfgGetItem(pCfg, "simdEnable")->bval; tsSIMDEnable = (bool)cfgGetItem(pCfg, "simdEnable")->bval;
tsTagFilterCache = (bool)cfgGetItem(pCfg, "tagFilterCache")->bval; tsTagFilterCache = (bool)cfgGetItem(pCfg, "tagFilterCache")->bval;
tsEnableMonitor = cfgGetItem(pCfg, "monitor")->bval; tsEnableMonitor = cfgGetItem(pCfg, "monitor")->bval;
@ -1687,6 +1694,7 @@ void taosCfgDynamicOptions(const char *option, const char *value) {
{"ttlBatchDropNum", &tsTtlBatchDropNum}, {"ttlBatchDropNum", &tsTtlBatchDropNum},
{"ttlFlushThreshold", &tsTtlFlushThreshold}, {"ttlFlushThreshold", &tsTtlFlushThreshold},
{"ttlPushInterval", &tsTtlPushIntervalSec}, {"ttlPushInterval", &tsTtlPushIntervalSec},
//{"s3BlockSize", &tsS3BlockSize},
{"s3BlockCacheSize", &tsS3BlockCacheSize}, {"s3BlockCacheSize", &tsS3BlockCacheSize},
{"s3PageCacheSize", &tsS3PageCacheSize}, {"s3PageCacheSize", &tsS3PageCacheSize},
{"s3UploadDelaySec", &tsS3UploadDelaySec}, {"s3UploadDelaySec", &tsS3UploadDelaySec},
@ -1710,8 +1718,8 @@ void taosCfgDynamicOptions(const char *option, const char *value) {
switch (pItem->dtype) { switch (pItem->dtype) {
case CFG_DTYPE_BOOL: { case CFG_DTYPE_BOOL: {
int32_t flag = atoi(value); int32_t flag = atoi(value);
bool *pVar = options[d].optionVar; bool *pVar = options[d].optionVar;
uInfo("%s set from %d to %d", optName, *pVar, flag); uInfo("%s set from %d to %d", optName, *pVar, flag);
*pVar = flag; *pVar = flag;
} break; } break;

View File

@ -8337,6 +8337,7 @@ int32_t tEncodeSBatchDeleteReq(SEncoder *pEncoder, const SBatchDeleteReq *pReq)
if (tEncodeSSingleDeleteReq(pEncoder, pOneReq) < 0) return -1; if (tEncodeSSingleDeleteReq(pEncoder, pOneReq) < 0) return -1;
} }
if (tEncodeI64(pEncoder, pReq->ctimeMs) < 0) return -1; if (tEncodeI64(pEncoder, pReq->ctimeMs) < 0) return -1;
if (tEncodeI8(pEncoder, pReq->level) < 0) return -1;
return 0; return 0;
} }
@ -8361,6 +8362,9 @@ int32_t tDecodeSBatchDeleteReq(SDecoder *pDecoder, SBatchDeleteReq *pReq) {
if (!tDecodeIsEnd(pDecoder)) { if (!tDecodeIsEnd(pDecoder)) {
if (tDecodeI64(pDecoder, &pReq->ctimeMs) < 0) return -1; if (tDecodeI64(pDecoder, &pReq->ctimeMs) < 0) return -1;
} }
if (!tDecodeIsEnd(pDecoder)) {
if (tDecodeI8(pDecoder, &pReq->level) < 0) return -1;
}
return 0; return 0;
} }

View File

@ -296,7 +296,10 @@ static int compareKv(const void* p1, const void* p2) {
void buildChildTableName(RandTableName* rName) { void buildChildTableName(RandTableName* rName) {
SStringBuilder sb = {0}; SStringBuilder sb = {0};
taosStringBuilderAppendStringLen(&sb, rName->stbFullName, rName->stbFullNameLen); taosStringBuilderAppendStringLen(&sb, rName->stbFullName, rName->stbFullNameLen);
if (sb.buf == NULL) return; if (sb.buf == NULL) {
terrno = TSDB_CODE_OUT_OF_MEMORY;
return;
}
taosArraySort(rName->tags, compareKv); taosArraySort(rName->tags, compareKv);
for (int j = 0; j < taosArrayGetSize(rName->tags); ++j) { for (int j = 0; j < taosArrayGetSize(rName->tags); ++j) {
taosStringBuilderAppendChar(&sb, ','); taosStringBuilderAppendChar(&sb, ',');

View File

@ -1580,6 +1580,7 @@ static bool needMoreDigits(SArray* formats, int32_t curIdx) {
/// @retval 0 for success /// @retval 0 for success
/// @retval -1 for format and s mismatch error /// @retval -1 for format and s mismatch error
/// @retval -2 if datetime err, like 2023-13-32 25:61:69 /// @retval -2 if datetime err, like 2023-13-32 25:61:69
/// @retval -3 if not supported
static int32_t char2ts(const char* s, SArray* formats, int64_t* ts, int32_t precision, const char** sErrPos, static int32_t char2ts(const char* s, SArray* formats, int64_t* ts, int32_t precision, const char** sErrPos,
int32_t* fErrIdx) { int32_t* fErrIdx) {
int32_t size = taosArrayGetSize(formats); int32_t size = taosArrayGetSize(formats);
@ -1589,6 +1590,7 @@ static int32_t char2ts(const char* s, SArray* formats, int64_t* ts, int32_t prec
int32_t hour = 0, min = 0, sec = 0, us = 0, ms = 0, ns = 0; int32_t hour = 0, min = 0, sec = 0, us = 0, ms = 0, ns = 0;
int32_t tzSign = 1, tz = tsTimezone; int32_t tzSign = 1, tz = tsTimezone;
int32_t err = 0; int32_t err = 0;
bool withYD = false, withMD = false;
for (int32_t i = 0; i < size && *s != '\0'; ++i) { for (int32_t i = 0; i < size && *s != '\0'; ++i) {
while (isspace(*s) && *s != '\0') { while (isspace(*s) && *s != '\0') {
@ -1782,6 +1784,7 @@ static int32_t char2ts(const char* s, SArray* formats, int64_t* ts, int32_t prec
} else { } else {
s = newPos; s = newPos;
} }
withYD = true;
} break; } break;
case TSFKW_DD: { case TSFKW_DD: {
const char* newPos = tsFormatStr2Int32(&md, s, 2, needMoreDigits(formats, i)); const char* newPos = tsFormatStr2Int32(&md, s, 2, needMoreDigits(formats, i));
@ -1790,6 +1793,7 @@ static int32_t char2ts(const char* s, SArray* formats, int64_t* ts, int32_t prec
} else { } else {
s = newPos; s = newPos;
} }
withMD = true;
} break; } break;
case TSFKW_D: { case TSFKW_D: {
const char* newPos = tsFormatStr2Int32(&wd, s, 1, needMoreDigits(formats, i)); const char* newPos = tsFormatStr2Int32(&wd, s, 1, needMoreDigits(formats, i));
@ -1843,6 +1847,10 @@ static int32_t char2ts(const char* s, SArray* formats, int64_t* ts, int32_t prec
return err; return err;
} }
} }
if (!withMD) {
// yyyy-mm-DDD, currently, the c api can't convert to correct timestamp, return not supported
if (withYD) return -3;
}
struct STm tm = {0}; struct STm tm = {0};
tm.tm.tm_year = year - 1900; tm.tm.tm_year = year - 1900;
tm.tm.tm_mon = mon; tm.tm.tm_mon = mon;
@ -1892,8 +1900,13 @@ int32_t taosChar2Ts(const char* format, SArray** formats, const char* tsStr, int
TSFormatNode* fNode = (taosArrayGet(*formats, fErrIdx)); TSFormatNode* fNode = (taosArrayGet(*formats, fErrIdx));
snprintf(errMsg, errMsgLen, "mismatch format for: %s and %s", sErrPos, snprintf(errMsg, errMsgLen, "mismatch format for: %s and %s", sErrPos,
fErrIdx < taosArrayGetSize(*formats) ? ((TSFormatNode*)taosArrayGet(*formats, fErrIdx))->key->name : ""); fErrIdx < taosArrayGetSize(*formats) ? ((TSFormatNode*)taosArrayGet(*formats, fErrIdx))->key->name : "");
code = TSDB_CODE_FUNC_TO_TIMESTAMP_FAILED_FORMAT_ERR;
} else if (code == -2) { } else if (code == -2) {
snprintf(errMsg, errMsgLen, "timestamp format error: %s -> %s", tsStr, format); snprintf(errMsg, errMsgLen, "timestamp format error: %s -> %s", tsStr, format);
code = TSDB_CODE_FUNC_TO_TIMESTAMP_FAILED_TS_ERR;
} else if (code == -3) {
snprintf(errMsg, errMsgLen, "timestamp format not supported");
code = TSDB_CODE_FUNC_TO_TIMESTAMP_FAILED_NOT_SUPPORTED;
} }
return code; return code;
} }

View File

@ -489,6 +489,7 @@ TEST(timeTest, char2ts) {
ASSERT_EQ(ts, 0); ASSERT_EQ(ts, 0);
ASSERT_EQ(0, TEST_char2ts("yyyy年 a a a MM/ddTZH", &ts, TSDB_TIME_PRECISION_MICRO, "1970年 a a a 1/1+0")); ASSERT_EQ(0, TEST_char2ts("yyyy年 a a a MM/ddTZH", &ts, TSDB_TIME_PRECISION_MICRO, "1970年 a a a 1/1+0"));
ASSERT_EQ(0, TEST_char2ts("yyyy年 a a a a a a a a a a a a a a a MM/ddTZH", &ts, TSDB_TIME_PRECISION_MICRO, "1970年 a ")); ASSERT_EQ(0, TEST_char2ts("yyyy年 a a a a a a a a a a a a a a a MM/ddTZH", &ts, TSDB_TIME_PRECISION_MICRO, "1970年 a "));
ASSERT_EQ(-3, TEST_char2ts("yyyy-mm-DDD", &ts, TSDB_TIME_PRECISION_MILLI, "1970-01-001"));
} }
#pragma GCC diagnostic pop #pragma GCC diagnostic pop

View File

@ -144,7 +144,7 @@ void vmCloseVnode(SVnodeMgmt *pMgmt, SVnodeObj *pVnode, bool commitAndRemoveWal)
char path[TSDB_FILENAME_LEN] = {0}; char path[TSDB_FILENAME_LEN] = {0};
bool atExit = true; bool atExit = true;
if (vnodeIsLeader(pVnode->pImpl)) { if (pVnode->pImpl && vnodeIsLeader(pVnode->pImpl)) {
vnodeProposeCommitOnNeed(pVnode->pImpl, atExit); vnodeProposeCommitOnNeed(pVnode->pImpl, atExit);
} }
@ -153,6 +153,10 @@ void vmCloseVnode(SVnodeMgmt *pMgmt, SVnodeObj *pVnode, bool commitAndRemoveWal)
taosThreadRwlockUnlock(&pMgmt->lock); taosThreadRwlockUnlock(&pMgmt->lock);
vmReleaseVnode(pMgmt, pVnode); vmReleaseVnode(pMgmt, pVnode);
if (pVnode->failed) {
ASSERT(pVnode->pImpl == NULL);
goto _closed;
}
dInfo("vgId:%d, pre close", pVnode->vgId); dInfo("vgId:%d, pre close", pVnode->vgId);
vnodePreClose(pVnode->pImpl); vnodePreClose(pVnode->pImpl);
@ -202,6 +206,8 @@ void vmCloseVnode(SVnodeMgmt *pMgmt, SVnodeObj *pVnode, bool commitAndRemoveWal)
vnodeClose(pVnode->pImpl); vnodeClose(pVnode->pImpl);
pVnode->pImpl = NULL; pVnode->pImpl = NULL;
_closed:
dInfo("vgId:%d, vnode is closed", pVnode->vgId); dInfo("vgId:%d, vnode is closed", pVnode->vgId);
if (commitAndRemoveWal) { if (commitAndRemoveWal) {
@ -386,7 +392,6 @@ static void *vmCloseVnodeInThread(void *param) {
for (int32_t v = 0; v < pThread->vnodeNum; ++v) { for (int32_t v = 0; v < pThread->vnodeNum; ++v) {
SVnodeObj *pVnode = pThread->ppVnodes[v]; SVnodeObj *pVnode = pThread->ppVnodes[v];
if (pVnode->failed) continue;
char stepDesc[TSDB_STEP_DESC_LEN] = {0}; char stepDesc[TSDB_STEP_DESC_LEN] = {0};
snprintf(stepDesc, TSDB_STEP_DESC_LEN, "vgId:%d, start to close, %d of %d have been closed", pVnode->vgId, snprintf(stepDesc, TSDB_STEP_DESC_LEN, "vgId:%d, start to close, %d of %d have been closed", pVnode->vgId,

View File

@ -3,6 +3,17 @@ add_library(dnode STATIC ${IMPLEMENT_SRC})
target_link_libraries( target_link_libraries(
dnode mgmt_mnode mgmt_qnode mgmt_snode mgmt_vnode mgmt_dnode dnode mgmt_mnode mgmt_qnode mgmt_snode mgmt_vnode mgmt_dnode
) )
IF (TD_STORAGE)
IF(${BUILD_WITH_S3})
add_definitions(-DUSE_S3)
ELSEIF(${BUILD_WITH_COS})
add_definitions(-DUSE_COS)
ENDIF()
ENDIF ()
target_include_directories( target_include_directories(
dnode dnode
PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/inc" PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/inc"

View File

@ -18,17 +18,17 @@
#include "audit.h" #include "audit.h"
#include "libs/function/tudf.h" #include "libs/function/tudf.h"
#define DM_INIT_AUDIT() \ #define DM_INIT_AUDIT() \
do { \ do { \
auditCfg.port = tsMonitorPort; \ auditCfg.port = tsMonitorPort; \
auditCfg.server = tsMonitorFqdn; \ auditCfg.server = tsMonitorFqdn; \
auditCfg.comp = tsMonitorComp; \ auditCfg.comp = tsMonitorComp; \
if (auditInit(&auditCfg) != 0) { \ if (auditInit(&auditCfg) != 0) { \
return -1; \ return -1; \
} \ } \
} while (0) } while (0)
static SDnode globalDnode = {0}; static SDnode globalDnode = {0};
SDnode *dmInstance() { return &globalDnode; } SDnode *dmInstance() { return &globalDnode; }
@ -146,6 +146,13 @@ static bool dmCheckDataDirVersion() {
return true; return true;
} }
#if defined(USE_S3)
extern int32_t s3Begin();
extern void s3End();
#endif
int32_t dmInit() { int32_t dmInit() {
dInfo("start to init dnode env"); dInfo("start to init dnode env");
if (dmDiskInit() != 0) return -1; if (dmDiskInit() != 0) return -1;
@ -156,6 +163,9 @@ int32_t dmInit() {
if (dmInitMonitor() != 0) return -1; if (dmInitMonitor() != 0) return -1;
if (dmInitAudit() != 0) return -1; if (dmInitAudit() != 0) return -1;
if (dmInitDnode(dmInstance()) != 0) return -1; if (dmInitDnode(dmInstance()) != 0) return -1;
#if defined(USE_S3)
if (s3Begin() != 0) return -1;
#endif
dInfo("dnode env is initialized"); dInfo("dnode env is initialized");
return 0; return 0;
@ -181,6 +191,9 @@ void dmCleanup() {
udfStopUdfd(); udfStopUdfd();
taosStopCacheRefreshWorker(); taosStopCacheRefreshWorker();
dmDiskClose(); dmDiskClose();
#if defined(USE_S3)
s3End();
#endif
dInfo("dnode env is cleaned up"); dInfo("dnode env is cleaned up");
taosCleanupCfg(); taosCleanupCfg();
@ -265,19 +278,19 @@ static int32_t dmProcessAlterNodeTypeReq(EDndNodeType ntype, SRpcMsg *pMsg) {
pWrapper = &pDnode->wrappers[ntype]; pWrapper = &pDnode->wrappers[ntype];
if(pWrapper->func.nodeRoleFp != NULL){ if (pWrapper->func.nodeRoleFp != NULL) {
ESyncRole role = (*pWrapper->func.nodeRoleFp)(pWrapper->pMgmt); ESyncRole role = (*pWrapper->func.nodeRoleFp)(pWrapper->pMgmt);
dInfo("node:%s, checking node role:%d", pWrapper->name, role); dInfo("node:%s, checking node role:%d", pWrapper->name, role);
if(role == TAOS_SYNC_ROLE_VOTER){ if (role == TAOS_SYNC_ROLE_VOTER) {
dError("node:%s, failed to alter node type since node already is role:%d", pWrapper->name, role); dError("node:%s, failed to alter node type since node already is role:%d", pWrapper->name, role);
terrno = TSDB_CODE_MNODE_ALREADY_IS_VOTER; terrno = TSDB_CODE_MNODE_ALREADY_IS_VOTER;
return -1; return -1;
} }
} }
if(pWrapper->func.isCatchUpFp != NULL){ if (pWrapper->func.isCatchUpFp != NULL) {
dInfo("node:%s, checking node catch up", pWrapper->name); dInfo("node:%s, checking node catch up", pWrapper->name);
if((*pWrapper->func.isCatchUpFp)(pWrapper->pMgmt) != 1){ if ((*pWrapper->func.isCatchUpFp)(pWrapper->pMgmt) != 1) {
terrno = TSDB_CODE_MNODE_NOT_CATCH_UP; terrno = TSDB_CODE_MNODE_NOT_CATCH_UP;
return -1; return -1;
} }
@ -394,7 +407,4 @@ void dmReportStartup(const char *pName, const char *pDesc) {
dDebug("step:%s, %s", pStartup->name, pStartup->desc); dDebug("step:%s, %s", pStartup->name, pStartup->desc);
} }
int64_t dmGetClusterId() { int64_t dmGetClusterId() { return globalDnode.data.clusterId; }
return globalDnode.data.clusterId;
}

View File

@ -1310,6 +1310,22 @@ static int32_t mndProcessConfigDnodeReq(SRpcMsg *pReq) {
} }
tFreeSMCfgDnodeReq(&cfgReq); tFreeSMCfgDnodeReq(&cfgReq);
return 0; return 0;
} else if (strncasecmp(cfgReq.config, "s3blocksize", 11) == 0) {
int32_t optLen = strlen("s3blocksize");
int32_t flag = -1;
int32_t code = mndMCfgGetValInt32(&cfgReq, optLen, &flag);
if (code < 0) return code;
if (flag > 1024 * 1024 || (flag > -1 && flag < 4) || flag < -1) {
mError("dnode:%d, failed to config s3blocksize since value:%d. Valid range: -1 or [4, 1024 * 1024]",
cfgReq.dnodeId, flag);
terrno = TSDB_CODE_INVALID_CFG;
tFreeSMCfgDnodeReq(&cfgReq);
return -1;
}
strcpy(dcfgReq.config, "s3blocksize");
snprintf(dcfgReq.value, TSDB_DNODE_VALUE_LEN, "%d", flag);
#endif #endif
} else { } else {
mndMCfg2DCfg(&cfgReq, &dcfgReq); mndMCfg2DCfg(&cfgReq, &dcfgReq);

View File

@ -313,7 +313,7 @@ _CONNECT:
code = 0; code = 0;
char detail[1000] = {0}; char detail[1000] = {0};
sprintf(detail, "%s:%d, app:%s", ip, pConn->port, connReq.app); sprintf(detail, "app:%s", connReq.app);
auditRecord(pReq, pMnode->clusterId, "login", "", "", detail, strlen(detail)); auditRecord(pReq, pMnode->clusterId, "login", "", "", detail, strlen(detail));

View File

@ -37,23 +37,23 @@
typedef struct SNodeEntry { typedef struct SNodeEntry {
int32_t nodeId; int32_t nodeId;
bool stageUpdated; // the stage has been updated due to the leader/follower change or node reboot. bool stageUpdated; // the stage has been updated due to the leader/follower change or node reboot.
SEpSet epset; // compare the epset to identify the vgroup tranferring between different dnodes. SEpSet epset; // compare the epset to identify the vgroup tranferring between different dnodes.
int64_t hbTimestamp; // second int64_t hbTimestamp; // second
} SNodeEntry; } SNodeEntry;
typedef struct SStreamExecInfo { typedef struct SStreamExecInfo {
SArray *pNodeEntryList; SArray *pNodeList;
int64_t ts; // snapshot ts int64_t ts; // snapshot ts
int64_t activeCheckpoint; // active check point id int64_t activeCheckpoint; // active check point id
SHashObj *pTaskMap; SHashObj * pTaskMap;
SArray *pTaskList; SArray * pTaskList;
TdThreadMutex lock; TdThreadMutex lock;
} SStreamExecInfo; } SStreamExecInfo;
typedef struct SVgroupChangeInfo { typedef struct SVgroupChangeInfo {
SHashObj *pDBMap; SHashObj *pDBMap;
SArray *pUpdateNodeList; // SArray<SNodeUpdateInfo> SArray * pUpdateNodeList; // SArray<SNodeUpdateInfo>
} SVgroupChangeInfo; } SVgroupChangeInfo;
static int32_t mndNodeCheckSentinel = 0; static int32_t mndNodeCheckSentinel = 0;
@ -78,7 +78,7 @@ static int32_t mndBuildStreamCheckpointSourceReq2(void **pBuf, int32_t *pLen, in
static int32_t mndProcessNodeCheck(SRpcMsg *pReq); static int32_t mndProcessNodeCheck(SRpcMsg *pReq);
static int32_t mndProcessNodeCheckReq(SRpcMsg *pMsg); static int32_t mndProcessNodeCheckReq(SRpcMsg *pMsg);
static SArray *extractNodeListFromStream(SMnode *pMnode); static SArray *extractNodeListFromStream(SMnode *pMnode);
static SArray *mndTakeVgroupSnapshot(SMnode *pMnode, bool* allReady); static SArray *mndTakeVgroupSnapshot(SMnode *pMnode, bool *allReady);
static SVgroupChangeInfo mndFindChangedNodeInfo(SMnode *pMnode, const SArray *pPrevNodeList, const SArray *pNodeList); static SVgroupChangeInfo mndFindChangedNodeInfo(SMnode *pMnode, const SArray *pPrevNodeList, const SArray *pNodeList);
@ -91,7 +91,7 @@ static void removeStreamTasksInBuf(SStreamObj *pStream, SStreamExecInfo *pExe
static void keepStreamTasksInBuf(SStreamObj *pStream, SStreamExecInfo *pExecNode); static void keepStreamTasksInBuf(SStreamObj *pStream, SStreamExecInfo *pExecNode);
static int32_t removeExpirednodeEntryAndTask(SArray *pNodeSnapshot); static int32_t removeExpirednodeEntryAndTask(SArray *pNodeSnapshot);
static int32_t doKillActiveCheckpointTrans(SMnode *pMnode); static int32_t doKillActiveCheckpointTrans(SMnode *pMnode);
static int32_t setNodeEpsetExpiredFlag(const SArray* pNodeList); static int32_t setNodeEpsetExpiredFlag(const SArray *pNodeList);
int32_t mndInitStream(SMnode *pMnode) { int32_t mndInitStream(SMnode *pMnode) {
SSdbTable table = { SSdbTable table = {
@ -193,9 +193,9 @@ STREAM_ENCODE_OVER:
SSdbRow *mndStreamActionDecode(SSdbRaw *pRaw) { SSdbRow *mndStreamActionDecode(SSdbRaw *pRaw) {
terrno = TSDB_CODE_OUT_OF_MEMORY; terrno = TSDB_CODE_OUT_OF_MEMORY;
SSdbRow *pRow = NULL; SSdbRow * pRow = NULL;
SStreamObj *pStream = NULL; SStreamObj *pStream = NULL;
void *buf = NULL; void * buf = NULL;
int8_t sver = 0; int8_t sver = 0;
if (sdbGetRawSoftVer(pRaw, &sver) != 0) { if (sdbGetRawSoftVer(pRaw, &sver) != 0) {
@ -272,7 +272,7 @@ static int32_t mndStreamActionUpdate(SSdb *pSdb, SStreamObj *pOldStream, SStream
} }
SStreamObj *mndAcquireStream(SMnode *pMnode, char *streamName) { SStreamObj *mndAcquireStream(SMnode *pMnode, char *streamName) {
SSdb *pSdb = pMnode->pSdb; SSdb * pSdb = pMnode->pSdb;
SStreamObj *pStream = sdbAcquire(pSdb, SDB_STREAM, streamName); SStreamObj *pStream = sdbAcquire(pSdb, SDB_STREAM, streamName);
if (pStream == NULL && terrno == TSDB_CODE_SDB_OBJ_NOT_THERE) { if (pStream == NULL && terrno == TSDB_CODE_SDB_OBJ_NOT_THERE) {
terrno = TSDB_CODE_MND_STREAM_NOT_EXIST; terrno = TSDB_CODE_MND_STREAM_NOT_EXIST;
@ -325,7 +325,7 @@ static int32_t mndStreamGetPlanString(const char *ast, int8_t triggerType, int64
return TSDB_CODE_SUCCESS; return TSDB_CODE_SUCCESS;
} }
SNode *pAst = NULL; SNode * pAst = NULL;
int32_t code = nodesStringToNode(ast, &pAst); int32_t code = nodesStringToNode(ast, &pAst);
SQueryPlan *pPlan = NULL; SQueryPlan *pPlan = NULL;
@ -350,7 +350,7 @@ static int32_t mndStreamGetPlanString(const char *ast, int8_t triggerType, int64
} }
static int32_t mndBuildStreamObjFromCreateReq(SMnode *pMnode, SStreamObj *pObj, SCMCreateStreamReq *pCreate) { static int32_t mndBuildStreamObjFromCreateReq(SMnode *pMnode, SStreamObj *pObj, SCMCreateStreamReq *pCreate) {
SNode *pAst = NULL; SNode * pAst = NULL;
SQueryPlan *pPlan = NULL; SQueryPlan *pPlan = NULL;
mInfo("stream:%s to create", pCreate->name); mInfo("stream:%s to create", pCreate->name);
@ -589,7 +589,7 @@ int32_t mndPersistDropStreamLog(SMnode *pMnode, STrans *pTrans, SStreamObj *pStr
static int32_t mndCreateStbForStream(SMnode *pMnode, STrans *pTrans, const SStreamObj *pStream, const char *user) { static int32_t mndCreateStbForStream(SMnode *pMnode, STrans *pTrans, const SStreamObj *pStream, const char *user) {
SStbObj *pStb = NULL; SStbObj *pStb = NULL;
SDbObj *pDb = NULL; SDbObj * pDb = NULL;
SMCreateStbReq createReq = {0}; SMCreateStbReq createReq = {0};
tstrncpy(createReq.name, pStream->targetSTbName, TSDB_TABLE_FNAME_LEN); tstrncpy(createReq.name, pStream->targetSTbName, TSDB_TABLE_FNAME_LEN);
@ -715,10 +715,10 @@ int32_t mndDropStreamTasks(SMnode *pMnode, STrans *pTrans, SStreamObj *pStream)
} }
static int32_t mndProcessCreateStreamReq(SRpcMsg *pReq) { static int32_t mndProcessCreateStreamReq(SRpcMsg *pReq) {
SMnode *pMnode = pReq->info.node; SMnode * pMnode = pReq->info.node;
int32_t code = -1; int32_t code = -1;
SStreamObj *pStream = NULL; SStreamObj * pStream = NULL;
SDbObj *pDb = NULL; SDbObj * pDb = NULL;
SCMCreateStreamReq createStreamReq = {0}; SCMCreateStreamReq createStreamReq = {0};
SStreamObj streamObj = {0}; SStreamObj streamObj = {0};
@ -761,7 +761,7 @@ static int32_t mndProcessCreateStreamReq(SRpcMsg *pReq) {
int32_t numOfStream = 0; int32_t numOfStream = 0;
SStreamObj *pStream = NULL; SStreamObj *pStream = NULL;
void *pIter = NULL; void * pIter = NULL;
while (1) { while (1) {
pIter = sdbFetch(pMnode->pSdb, SDB_STREAM, pIter, (void **)&pStream); pIter = sdbFetch(pMnode->pSdb, SDB_STREAM, pIter, (void **)&pStream);
@ -850,7 +850,7 @@ static int32_t mndProcessCreateStreamReq(SRpcMsg *pReq) {
mndTransDrop(pTrans); mndTransDrop(pTrans);
taosThreadMutexLock(&execInfo.lock); taosThreadMutexLock(&execInfo.lock);
mDebug("register to stream task node list"); mDebug("stream tasks register into node list");
keepStreamTasksInBuf(&streamObj, &execInfo); keepStreamTasksInBuf(&streamObj, &execInfo);
taosThreadMutexUnlock(&execInfo.lock); taosThreadMutexUnlock(&execInfo.lock);
@ -858,12 +858,12 @@ static int32_t mndProcessCreateStreamReq(SRpcMsg *pReq) {
SName name = {0}; SName name = {0};
tNameFromString(&name, createStreamReq.name, T_NAME_ACCT | T_NAME_DB | T_NAME_TABLE); tNameFromString(&name, createStreamReq.name, T_NAME_ACCT | T_NAME_DB | T_NAME_TABLE);
//reuse this function for stream // reuse this function for stream
//TODO // TODO
if (createStreamReq.sql != NULL) { if (createStreamReq.sql != NULL) {
auditRecord(pReq, pMnode->clusterId, "createStream", name.dbname, name.tname, auditRecord(pReq, pMnode->clusterId, "createStream", name.dbname, name.tname, createStreamReq.sql,
createStreamReq.sql, strlen(createStreamReq.sql)); strlen(createStreamReq.sql));
} }
_OVER: _OVER:
if (code != 0 && code != TSDB_CODE_ACTION_IN_PROGRESS) { if (code != 0 && code != TSDB_CODE_ACTION_IN_PROGRESS) {
@ -877,15 +877,30 @@ _OVER:
return code; return code;
} }
int64_t mndStreamGenChkpId(SMnode *pMnode) {
SStreamObj *pStream = NULL;
void * pIter = NULL;
SSdb * pSdb = pMnode->pSdb;
int64_t maxChkpId = 0;
while (1) {
pIter = sdbFetch(pSdb, SDB_STREAM, pIter, (void **)&pStream);
if (pIter == NULL) break;
maxChkpId = TMAX(maxChkpId, pStream->checkpointId);
sdbRelease(pSdb, pStream);
}
return maxChkpId + 1;
}
static int32_t mndProcessStreamCheckpointTmr(SRpcMsg *pReq) { static int32_t mndProcessStreamCheckpointTmr(SRpcMsg *pReq) {
SMnode *pMnode = pReq->info.node; SMnode *pMnode = pReq->info.node;
SSdb *pSdb = pMnode->pSdb; SSdb * pSdb = pMnode->pSdb;
if (sdbGetSize(pSdb, SDB_STREAM) <= 0) { if (sdbGetSize(pSdb, SDB_STREAM) <= 0) {
return 0; return 0;
} }
SMStreamDoCheckpointMsg *pMsg = rpcMallocCont(sizeof(SMStreamDoCheckpointMsg)); SMStreamDoCheckpointMsg *pMsg = rpcMallocCont(sizeof(SMStreamDoCheckpointMsg));
pMsg->checkpointId = taosGetTimestampMs(); pMsg->checkpointId = mndStreamGenChkpId(pMnode);
int32_t size = sizeof(SMStreamDoCheckpointMsg); int32_t size = sizeof(SMStreamDoCheckpointMsg);
SRpcMsg rpcMsg = {.msgType = TDMT_MND_STREAM_BEGIN_CHECKPOINT, .pCont = pMsg, .contLen = size}; SRpcMsg rpcMsg = {.msgType = TDMT_MND_STREAM_BEGIN_CHECKPOINT, .pCont = pMsg, .contLen = size};
@ -919,7 +934,7 @@ static int32_t mndBuildStreamCheckpointSourceReq2(void **pBuf, int32_t *pLen, in
return -1; return -1;
} }
void *abuf = POINTER_SHIFT(buf, sizeof(SMsgHead)); void * abuf = POINTER_SHIFT(buf, sizeof(SMsgHead));
SEncoder encoder; SEncoder encoder;
tEncoderInit(&encoder, abuf, tlen); tEncoderInit(&encoder, abuf, tlen);
tEncodeStreamCheckpointSourceReq(&encoder, &req); tEncodeStreamCheckpointSourceReq(&encoder, &req);
@ -1042,7 +1057,7 @@ static int32_t mndAddStreamCheckpointToTrans(STrans *pTrans, SStreamObj *pStream
int32_t totLevel = taosArrayGetSize(pStream->tasks); int32_t totLevel = taosArrayGetSize(pStream->tasks);
for (int32_t i = 0; i < totLevel; i++) { for (int32_t i = 0; i < totLevel; i++) {
SArray *pLevel = taosArrayGetP(pStream->tasks, i); SArray * pLevel = taosArrayGetP(pStream->tasks, i);
SStreamTask *pTask = taosArrayGetP(pLevel, 0); SStreamTask *pTask = taosArrayGetP(pLevel, 0);
if (pTask->info.taskLevel == TASK_LEVEL__SOURCE) { if (pTask->info.taskLevel == TASK_LEVEL__SOURCE) {
@ -1059,7 +1074,7 @@ static int32_t mndAddStreamCheckpointToTrans(STrans *pTrans, SStreamObj *pStream
return -1; return -1;
} }
void *buf; void * buf;
int32_t tlen; int32_t tlen;
if (mndBuildStreamCheckpointSourceReq2(&buf, &tlen, pTask->info.nodeId, chkptId, pTask->id.streamId, if (mndBuildStreamCheckpointSourceReq2(&buf, &tlen, pTask->info.nodeId, chkptId, pTask->id.streamId,
pTask->id.taskId) < 0) { pTask->id.taskId) < 0) {
@ -1070,7 +1085,8 @@ static int32_t mndAddStreamCheckpointToTrans(STrans *pTrans, SStreamObj *pStream
STransAction action = {0}; STransAction action = {0};
SEpSet epset = mndGetVgroupEpset(pMnode, pVgObj); SEpSet epset = mndGetVgroupEpset(pMnode, pVgObj);
initTransAction(&action, buf, tlen, TDMT_VND_STREAM_CHECK_POINT_SOURCE, &epset, TSDB_CODE_SYN_PROPOSE_NOT_READY); initTransAction(&action, buf, tlen, TDMT_VND_STREAM_CHECK_POINT_SOURCE, &epset,
TSDB_CODE_SYN_PROPOSE_NOT_READY);
mndReleaseVgroup(pMnode, pVgObj); mndReleaseVgroup(pMnode, pVgObj);
if (mndTransAppendRedoAction(pTrans, &action) != 0) { if (mndTransAppendRedoAction(pTrans, &action) != 0) {
@ -1110,9 +1126,9 @@ static int32_t mndAddStreamCheckpointToTrans(STrans *pTrans, SStreamObj *pStream
} }
static const char *mndGetStreamDB(SMnode *pMnode) { static const char *mndGetStreamDB(SMnode *pMnode) {
SSdb *pSdb = pMnode->pSdb; SSdb * pSdb = pMnode->pSdb;
SStreamObj *pStream = NULL; SStreamObj *pStream = NULL;
void *pIter = NULL; void * pIter = NULL;
pIter = sdbFetch(pSdb, SDB_STREAM, pIter, (void **)&pStream); pIter = sdbFetch(pSdb, SDB_STREAM, pIter, (void **)&pStream);
if (pIter == NULL) { if (pIter == NULL) {
@ -1125,47 +1141,51 @@ static const char *mndGetStreamDB(SMnode *pMnode) {
return p; return p;
} }
static int32_t initStreamNodeList(SMnode* pMnode) {
if (execInfo.pNodeList == NULL || (taosArrayGetSize(execInfo.pNodeList) == 0)) {
execInfo.pNodeList = taosArrayDestroy(execInfo.pNodeList);
execInfo.pNodeList = extractNodeListFromStream(pMnode);
}
return taosArrayGetSize(execInfo.pNodeList);
}
static int32_t mndProcessStreamDoCheckpoint(SRpcMsg *pReq) { static int32_t mndProcessStreamDoCheckpoint(SRpcMsg *pReq) {
SMnode *pMnode = pReq->info.node; SMnode * pMnode = pReq->info.node;
SSdb *pSdb = pMnode->pSdb; SSdb * pSdb = pMnode->pSdb;
void *pIter = NULL; void * pIter = NULL;
SStreamObj *pStream = NULL; SStreamObj *pStream = NULL;
int32_t code = 0; int32_t code = 0;
{ // check if the node update happens or not { // check if the node update happens or not
int64_t ts = taosGetTimestampSec(); int64_t ts = taosGetTimestampSec();
if (execInfo.pNodeEntryList == NULL || (taosArrayGetSize(execInfo.pNodeEntryList) == 0)) { taosThreadMutexLock(&execInfo.lock);
if (execInfo.pNodeEntryList != NULL) { int32_t numOfNodes = initStreamNodeList(pMnode);
execInfo.pNodeEntryList = taosArrayDestroy(execInfo.pNodeEntryList); taosThreadMutexUnlock(&execInfo.lock);
}
execInfo.pNodeEntryList = extractNodeListFromStream(pMnode); if (numOfNodes == 0) {
}
if (taosArrayGetSize(execInfo.pNodeEntryList) == 0) {
mDebug("stream task node change checking done, no vgroups exist, do nothing"); mDebug("stream task node change checking done, no vgroups exist, do nothing");
execInfo.ts = ts; execInfo.ts = ts;
return 0; return 0;
} }
for(int32_t i = 0; i < taosArrayGetSize(execInfo.pNodeEntryList); ++i) { for(int32_t i = 0; i < numOfNodes; ++i) {
SNodeEntry* pNodeEntry = taosArrayGet(execInfo.pNodeEntryList, i); SNodeEntry* pNodeEntry = taosArrayGet(execInfo.pNodeList, i);
if (pNodeEntry->stageUpdated) { if (pNodeEntry->stageUpdated) {
mDebug("stream task not ready due to node update detected, checkpoint not issued"); mDebug("stream task not ready due to node update detected, checkpoint not issued");
return 0; return 0;
} }
} }
bool allReady = true; bool allReady = true;
SArray *pNodeSnapshot = mndTakeVgroupSnapshot(pMnode, &allReady); SArray *pNodeSnapshot = mndTakeVgroupSnapshot(pMnode, &allReady);
if (!allReady) { if (!allReady) {
mWarn("not all vnodes are ready, ignore the checkpoint") mWarn("not all vnodes are ready, ignore the checkpoint") taosArrayDestroy(pNodeSnapshot);
taosArrayDestroy(pNodeSnapshot);
return 0; return 0;
} }
SVgroupChangeInfo changeInfo = mndFindChangedNodeInfo(pMnode, execInfo.pNodeEntryList, pNodeSnapshot); SVgroupChangeInfo changeInfo = mndFindChangedNodeInfo(pMnode, execInfo.pNodeList, pNodeSnapshot);
bool nodeUpdated = (taosArrayGetSize(changeInfo.pUpdateNodeList) > 0); bool nodeUpdated = (taosArrayGetSize(changeInfo.pUpdateNodeList) > 0);
taosArrayDestroy(changeInfo.pUpdateNodeList); taosArrayDestroy(changeInfo.pUpdateNodeList);
taosHashCleanup(changeInfo.pDBMap); taosHashCleanup(changeInfo.pDBMap);
@ -1182,15 +1202,15 @@ static int32_t mndProcessStreamDoCheckpoint(SRpcMsg *pReq) {
taosThreadMutexLock(&execInfo.lock); taosThreadMutexLock(&execInfo.lock);
for (int32_t i = 0; i < taosArrayGetSize(execInfo.pTaskList); ++i) { for (int32_t i = 0; i < taosArrayGetSize(execInfo.pTaskList); ++i) {
STaskId *p = taosArrayGet(execInfo.pTaskList, i); STaskId * p = taosArrayGet(execInfo.pTaskList, i);
STaskStatusEntry* pEntry = taosHashGet(execInfo.pTaskMap, p, sizeof(*p)); STaskStatusEntry *pEntry = taosHashGet(execInfo.pTaskMap, p, sizeof(*p));
if (pEntry == NULL) { if (pEntry == NULL) {
continue; continue;
} }
if (pEntry->status != TASK_STATUS__READY) { if (pEntry->status != TASK_STATUS__READY) {
mDebug("s-task:0x%" PRIx64 "-0x%x (nodeId:%d) status:%s not ready, checkpoint msg not issued", mDebug("s-task:0x%" PRIx64 "-0x%x (nodeId:%d) status:%s not ready, checkpoint msg not issued",
pEntry->id.streamId, (int32_t)pEntry->id.taskId, 0, streamTaskGetStatusStr(pEntry->status)); pEntry->id.streamId, (int32_t)pEntry->id.taskId, 0, streamTaskGetStatusStr(pEntry->status));
ready = false; ready = false;
break; break;
} }
@ -1250,7 +1270,7 @@ static int32_t mndProcessStreamDoCheckpoint(SRpcMsg *pReq) {
} }
static int32_t mndProcessDropStreamReq(SRpcMsg *pReq) { static int32_t mndProcessDropStreamReq(SRpcMsg *pReq) {
SMnode *pMnode = pReq->info.node; SMnode * pMnode = pReq->info.node;
SStreamObj *pStream = NULL; SStreamObj *pStream = NULL;
SMDropStreamReq dropReq = {0}; SMDropStreamReq dropReq = {0};
@ -1327,7 +1347,7 @@ static int32_t mndProcessDropStreamReq(SRpcMsg *pReq) {
SName name = {0}; SName name = {0};
tNameFromString(&name, dropReq.name, T_NAME_ACCT | T_NAME_DB | T_NAME_TABLE); tNameFromString(&name, dropReq.name, T_NAME_ACCT | T_NAME_DB | T_NAME_TABLE);
//reuse this function for stream // reuse this function for stream
auditRecord(pReq, pMnode->clusterId, "dropStream", name.dbname, name.tname, dropReq.sql, dropReq.sqlLen); auditRecord(pReq, pMnode->clusterId, "dropStream", name.dbname, name.tname, dropReq.sql, dropReq.sqlLen);
@ -1379,7 +1399,7 @@ int32_t mndDropStreamByDb(SMnode *pMnode, STrans *pTrans, SDbObj *pDb) {
} }
int32_t mndGetNumOfStreams(SMnode *pMnode, char *dbName, int32_t *pNumOfStreams) { int32_t mndGetNumOfStreams(SMnode *pMnode, char *dbName, int32_t *pNumOfStreams) {
SSdb *pSdb = pMnode->pSdb; SSdb * pSdb = pMnode->pSdb;
SDbObj *pDb = mndAcquireDb(pMnode, dbName); SDbObj *pDb = mndAcquireDb(pMnode, dbName);
if (pDb == NULL) { if (pDb == NULL) {
terrno = TSDB_CODE_MND_DB_NOT_SELECTED; terrno = TSDB_CODE_MND_DB_NOT_SELECTED;
@ -1387,7 +1407,7 @@ int32_t mndGetNumOfStreams(SMnode *pMnode, char *dbName, int32_t *pNumOfStreams)
} }
int32_t numOfStreams = 0; int32_t numOfStreams = 0;
void *pIter = NULL; void * pIter = NULL;
while (1) { while (1) {
SStreamObj *pStream = NULL; SStreamObj *pStream = NULL;
pIter = sdbFetch(pSdb, SDB_STREAM, pIter, (void **)&pStream); pIter = sdbFetch(pSdb, SDB_STREAM, pIter, (void **)&pStream);
@ -1406,8 +1426,8 @@ int32_t mndGetNumOfStreams(SMnode *pMnode, char *dbName, int32_t *pNumOfStreams)
} }
static int32_t mndRetrieveStream(SRpcMsg *pReq, SShowObj *pShow, SSDataBlock *pBlock, int32_t rows) { static int32_t mndRetrieveStream(SRpcMsg *pReq, SShowObj *pShow, SSDataBlock *pBlock, int32_t rows) {
SMnode *pMnode = pReq->info.node; SMnode * pMnode = pReq->info.node;
SSdb *pSdb = pMnode->pSdb; SSdb * pSdb = pMnode->pSdb;
int32_t numOfRows = 0; int32_t numOfRows = 0;
SStreamObj *pStream = NULL; SStreamObj *pStream = NULL;
@ -1483,8 +1503,8 @@ static void mndCancelGetNextStream(SMnode *pMnode, void *pIter) {
} }
static int32_t mndRetrieveStreamTask(SRpcMsg *pReq, SShowObj *pShow, SSDataBlock *pBlock, int32_t rowsCapacity) { static int32_t mndRetrieveStreamTask(SRpcMsg *pReq, SShowObj *pShow, SSDataBlock *pBlock, int32_t rowsCapacity) {
SMnode *pMnode = pReq->info.node; SMnode * pMnode = pReq->info.node;
SSdb *pSdb = pMnode->pSdb; SSdb * pSdb = pMnode->pSdb;
int32_t numOfRows = 0; int32_t numOfRows = 0;
SStreamObj *pStream = NULL; SStreamObj *pStream = NULL;
@ -1573,13 +1593,13 @@ static int32_t mndRetrieveStreamTask(SRpcMsg *pReq, SShowObj *pShow, SSDataBlock
// status // status
char status[20 + VARSTR_HEADER_SIZE] = {0}; char status[20 + VARSTR_HEADER_SIZE] = {0};
STaskId id = {.streamId = pTask->id.streamId, .taskId = pTask->id.taskId}; STaskId id = {.streamId = pTask->id.streamId, .taskId = pTask->id.taskId};
STaskStatusEntry* pe = taosHashGet(execInfo.pTaskMap, &id, sizeof(id)); STaskStatusEntry *pe = taosHashGet(execInfo.pTaskMap, &id, sizeof(id));
if (pe == NULL) { if (pe == NULL) {
continue; continue;
} }
const char* pStatus = streamTaskGetStatusStr(pe->status); const char *pStatus = streamTaskGetStatusStr(pe->status);
STR_TO_VARSTR(status, pStatus); STR_TO_VARSTR(status, pStatus);
// status // status
@ -1591,24 +1611,24 @@ static int32_t mndRetrieveStreamTask(SRpcMsg *pReq, SShowObj *pShow, SSDataBlock
colDataSetVal(pColInfo, numOfRows, (const char *)&pe->stage, false); colDataSetVal(pColInfo, numOfRows, (const char *)&pe->stage, false);
// input queue // input queue
char vbuf[30] = {0}; char vbuf[30] = {0};
char buf[25] = {0}; char buf[25] = {0};
const char* queueInfoStr = "%4.2fMiB (%5.2f%)"; const char *queueInfoStr = "%4.2fMiB (%5.2f%)";
sprintf(buf, queueInfoStr, pe->inputQUsed, pe->inputRate); sprintf(buf, queueInfoStr, pe->inputQUsed, pe->inputRate);
STR_TO_VARSTR(vbuf, buf); STR_TO_VARSTR(vbuf, buf);
pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); pColInfo = taosArrayGet(pBlock->pDataBlock, cols++);
colDataSetVal(pColInfo, numOfRows, (const char*)vbuf, false); colDataSetVal(pColInfo, numOfRows, (const char *)vbuf, false);
// output queue // output queue
// sprintf(buf, queueInfoStr, pe->outputQUsed, pe->outputRate); // sprintf(buf, queueInfoStr, pe->outputQUsed, pe->outputRate);
// STR_TO_VARSTR(vbuf, buf); // STR_TO_VARSTR(vbuf, buf);
// pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); // pColInfo = taosArrayGet(pBlock->pDataBlock, cols++);
// colDataSetVal(pColInfo, numOfRows, (const char*)vbuf, false); // colDataSetVal(pColInfo, numOfRows, (const char*)vbuf, false);
if (pTask->info.taskLevel == TASK_LEVEL__SINK) { if (pTask->info.taskLevel == TASK_LEVEL__SINK) {
const char* sinkStr = "%.2fMiB"; const char *sinkStr = "%.2fMiB";
sprintf(buf, sinkStr, pe->sinkDataSize); sprintf(buf, sinkStr, pe->sinkDataSize);
} else if (pTask->info.taskLevel == TASK_LEVEL__SOURCE) { } else if (pTask->info.taskLevel == TASK_LEVEL__SOURCE) {
// offset info // offset info
@ -1619,7 +1639,7 @@ static int32_t mndRetrieveStreamTask(SRpcMsg *pReq, SShowObj *pShow, SSDataBlock
STR_TO_VARSTR(vbuf, buf); STR_TO_VARSTR(vbuf, buf);
pColInfo = taosArrayGet(pBlock->pDataBlock, cols++); pColInfo = taosArrayGet(pBlock->pDataBlock, cols++);
colDataSetVal(pColInfo, numOfRows, (const char*)vbuf, false); colDataSetVal(pColInfo, numOfRows, (const char *)vbuf, false);
numOfRows++; numOfRows++;
} }
@ -1663,7 +1683,7 @@ static int32_t mndPauseStreamTask(STrans *pTrans, SStreamTask *pTask) {
} }
int32_t mndPauseAllStreamTasks(STrans *pTrans, SStreamObj *pStream) { int32_t mndPauseAllStreamTasks(STrans *pTrans, SStreamObj *pStream) {
SArray* tasks = pStream->tasks; SArray *tasks = pStream->tasks;
int32_t size = taosArrayGetSize(tasks); int32_t size = taosArrayGetSize(tasks);
for (int32_t i = 0; i < size; i++) { for (int32_t i = 0; i < size; i++) {
@ -1700,7 +1720,7 @@ static int32_t mndPersistStreamLog(STrans *pTrans, const SStreamObj *pStream, in
} }
static int32_t mndProcessPauseStreamReq(SRpcMsg *pReq) { static int32_t mndProcessPauseStreamReq(SRpcMsg *pReq) {
SMnode *pMnode = pReq->info.node; SMnode * pMnode = pReq->info.node;
SStreamObj *pStream = NULL; SStreamObj *pStream = NULL;
SMPauseStreamReq pauseReq = {0}; SMPauseStreamReq pauseReq = {0};
@ -1816,7 +1836,7 @@ int32_t mndResumeAllStreamTasks(STrans *pTrans, SStreamObj *pStream, int8_t igUn
} }
static int32_t mndProcessResumeStreamReq(SRpcMsg *pReq) { static int32_t mndProcessResumeStreamReq(SRpcMsg *pReq) {
SMnode *pMnode = pReq->info.node; SMnode * pMnode = pReq->info.node;
SStreamObj *pStream = NULL; SStreamObj *pStream = NULL;
SMResumeStreamReq pauseReq = {0}; SMResumeStreamReq pauseReq = {0};
@ -1901,7 +1921,7 @@ static void initNodeUpdateMsg(SStreamTaskNodeUpdateMsg *pMsg, const SVgroupChang
} }
static int32_t doBuildStreamTaskUpdateMsg(void **pBuf, int32_t *pLen, SVgroupChangeInfo *pInfo, int32_t nodeId, static int32_t doBuildStreamTaskUpdateMsg(void **pBuf, int32_t *pLen, SVgroupChangeInfo *pInfo, int32_t nodeId,
SStreamTaskId* pId, int32_t transId) { SStreamTaskId *pId, int32_t transId) {
SStreamTaskNodeUpdateMsg req = {0}; SStreamTaskNodeUpdateMsg req = {0};
initNodeUpdateMsg(&req, pInfo, pId, transId); initNodeUpdateMsg(&req, pInfo, pId, transId);
@ -1924,7 +1944,7 @@ static int32_t doBuildStreamTaskUpdateMsg(void **pBuf, int32_t *pLen, SVgroupCha
return -1; return -1;
} }
void *abuf = POINTER_SHIFT(buf, sizeof(SMsgHead)); void * abuf = POINTER_SHIFT(buf, sizeof(SMsgHead));
SEncoder encoder; SEncoder encoder;
tEncoderInit(&encoder, abuf, tlen); tEncoderInit(&encoder, abuf, tlen);
tEncodeStreamTaskUpdateMsg(&encoder, &req); tEncodeStreamTaskUpdateMsg(&encoder, &req);
@ -1991,7 +2011,7 @@ static int32_t createStreamUpdateTrans(SStreamObj *pStream, SVgroupChangeInfo *p
for (int32_t k = 0; k < numOfTasks; ++k) { for (int32_t k = 0; k < numOfTasks; ++k) {
SStreamTask *pTask = taosArrayGetP(pLevel, k); SStreamTask *pTask = taosArrayGetP(pLevel, k);
void *pBuf = NULL; void * pBuf = NULL;
int32_t len = 0; int32_t len = 0;
streamTaskUpdateEpsetInfo(pTask, pInfo->pUpdateNodeList); streamTaskUpdateEpsetInfo(pTask, pInfo->pUpdateNodeList);
doBuildStreamTaskUpdateMsg(&pBuf, &len, pInfo, pTask->info.nodeId, &pTask->id, pTrans->id); doBuildStreamTaskUpdateMsg(&pBuf, &len, pInfo, pTask->info.nodeId, &pTask->id, pTrans->id);
@ -2012,7 +2032,7 @@ static int32_t createStreamUpdateTrans(SStreamObj *pStream, SVgroupChangeInfo *p
static bool isNodeEpsetChanged(const SEpSet *pPrevEpset, const SEpSet *pCurrent) { static bool isNodeEpsetChanged(const SEpSet *pPrevEpset, const SEpSet *pCurrent) {
const SEp *pEp = GET_ACTIVE_EP(pPrevEpset); const SEp *pEp = GET_ACTIVE_EP(pPrevEpset);
const SEp* p = GET_ACTIVE_EP(pCurrent); const SEp *p = GET_ACTIVE_EP(pCurrent);
if (pEp->port == p->port && strncmp(pEp->fqdn, p->fqdn, TSDB_FQDN_LEN) == 0) { if (pEp->port == p->port && strncmp(pEp->fqdn, p->fqdn, TSDB_FQDN_LEN) == 0) {
return false; return false;
@ -2069,9 +2089,9 @@ static SVgroupChangeInfo mndFindChangedNodeInfo(SMnode *pMnode, const SArray *pP
return info; return info;
} }
static SArray *mndTakeVgroupSnapshot(SMnode *pMnode, bool* allReady) { static SArray *mndTakeVgroupSnapshot(SMnode *pMnode, bool *allReady) {
SSdb *pSdb = pMnode->pSdb; SSdb * pSdb = pMnode->pSdb;
void *pIter = NULL; void * pIter = NULL;
SVgObj *pVgroup = NULL; SVgObj *pVgroup = NULL;
*allReady = true; *allReady = true;
@ -2083,20 +2103,21 @@ static SArray *mndTakeVgroupSnapshot(SMnode *pMnode, bool* allReady) {
break; break;
} }
SNodeEntry entry = {0}; SNodeEntry entry = {.nodeId = pVgroup->vgId, .hbTimestamp = pVgroup->updateTime};
entry.epset = mndGetVgroupEpset(pMnode, pVgroup); entry.epset = mndGetVgroupEpset(pMnode, pVgroup);
entry.nodeId = pVgroup->vgId;
entry.hbTimestamp = pVgroup->updateTime;
// if not all ready till now, no need to check the remaining vgroups.
if (*allReady) { if (*allReady) {
for (int32_t i = 0; i < pVgroup->replica; ++i) { for (int32_t i = 0; i < pVgroup->replica; ++i) {
if (!pVgroup->vnodeGid[i].syncRestore) { if (!pVgroup->vnodeGid[i].syncRestore) {
mInfo("vgId:%d not restored, not ready for checkpoint or other operations", pVgroup->vgId);
*allReady = false; *allReady = false;
break; break;
} }
ESyncState state = pVgroup->vnodeGid[i].syncState; ESyncState state = pVgroup->vnodeGid[i].syncState;
if (state == TAOS_SYNC_STATE_OFFLINE || state == TAOS_SYNC_STATE_ERROR) { if (state == TAOS_SYNC_STATE_OFFLINE || state == TAOS_SYNC_STATE_ERROR) {
mInfo("vgId:%d offline/err, not ready for checkpoint or other operations", pVgroup->vgId);
*allReady = false; *allReady = false;
break; break;
} }
@ -2136,8 +2157,8 @@ static int32_t mndProcessVgroupChange(SMnode *pMnode, SVgroupChangeInfo *pChange
// check all streams that involved this vnode should update the epset info // check all streams that involved this vnode should update the epset info
SStreamObj *pStream = NULL; SStreamObj *pStream = NULL;
void *pIter = NULL; void * pIter = NULL;
STrans *pTrans = NULL; STrans * pTrans = NULL;
while (1) { while (1) {
pIter = sdbFetch(pSdb, SDB_STREAM, pIter, (void **)&pStream); pIter = sdbFetch(pSdb, SDB_STREAM, pIter, (void **)&pStream);
@ -2198,9 +2219,9 @@ static int32_t mndProcessVgroupChange(SMnode *pMnode, SVgroupChangeInfo *pChange
} }
static SArray *extractNodeListFromStream(SMnode *pMnode) { static SArray *extractNodeListFromStream(SMnode *pMnode) {
SSdb *pSdb = pMnode->pSdb; SSdb * pSdb = pMnode->pSdb;
SStreamObj *pStream = NULL; SStreamObj *pStream = NULL;
void *pIter = NULL; void * pIter = NULL;
SHashObj *pHash = taosHashInit(64, taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), false, HASH_NO_LOCK); SHashObj *pHash = taosHashInit(64, taosGetDefaultHashFunction(TSDB_DATA_TYPE_INT), false, HASH_NO_LOCK);
while (1) { while (1) {
@ -2247,9 +2268,9 @@ static SArray *extractNodeListFromStream(SMnode *pMnode) {
} }
static void doExtractTasksFromStream(SMnode *pMnode) { static void doExtractTasksFromStream(SMnode *pMnode) {
SSdb *pSdb = pMnode->pSdb; SSdb * pSdb = pMnode->pSdb;
SStreamObj *pStream = NULL; SStreamObj *pStream = NULL;
void *pIter = NULL; void * pIter = NULL;
while (1) { while (1) {
pIter = sdbFetch(pSdb, SDB_STREAM, pIter, (void **)&pStream); pIter = sdbFetch(pSdb, SDB_STREAM, pIter, (void **)&pStream);
@ -2284,11 +2305,11 @@ static int32_t doRemoveTasks(SStreamExecInfo *pExecNode, STaskId *pRemovedId) {
return TSDB_CODE_SUCCESS; return TSDB_CODE_SUCCESS;
} }
static bool taskNodeExists(SArray* pList, int32_t nodeId) { static bool taskNodeExists(SArray *pList, int32_t nodeId) {
size_t num = taosArrayGetSize(pList); size_t num = taosArrayGetSize(pList);
for(int32_t i = 0; i < num; ++i) { for (int32_t i = 0; i < num; ++i) {
SNodeEntry* pEntry = taosArrayGet(pList, i); SNodeEntry *pEntry = taosArrayGet(pList, i);
if (pEntry->nodeId == nodeId) { if (pEntry->nodeId == nodeId) {
return true; return true;
} }
@ -2298,12 +2319,12 @@ static bool taskNodeExists(SArray* pList, int32_t nodeId) {
} }
int32_t removeExpirednodeEntryAndTask(SArray *pNodeSnapshot) { int32_t removeExpirednodeEntryAndTask(SArray *pNodeSnapshot) {
SArray* pRemovedTasks = taosArrayInit(4, sizeof(STaskId)); SArray *pRemovedTasks = taosArrayInit(4, sizeof(STaskId));
int32_t numOfTask = taosArrayGetSize(execInfo.pTaskList); int32_t numOfTask = taosArrayGetSize(execInfo.pTaskList);
for(int32_t i = 0; i < numOfTask; ++i) { for (int32_t i = 0; i < numOfTask; ++i) {
STaskId* pId = taosArrayGet(execInfo.pTaskList, i); STaskId * pId = taosArrayGet(execInfo.pTaskList, i);
STaskStatusEntry* pEntry = taosHashGet(execInfo.pTaskMap, pId, sizeof(*pId)); STaskStatusEntry *pEntry = taosHashGet(execInfo.pTaskMap, pId, sizeof(*pId));
if(pEntry->nodeId == SNODE_HANDLE) continue; if(pEntry->nodeId == SNODE_HANDLE) continue;
@ -2313,21 +2334,21 @@ int32_t removeExpirednodeEntryAndTask(SArray *pNodeSnapshot) {
} }
} }
for(int32_t i = 0; i < taosArrayGetSize(pRemovedTasks); ++i) { for (int32_t i = 0; i < taosArrayGetSize(pRemovedTasks); ++i) {
STaskId* pId = taosArrayGet(pRemovedTasks, i); STaskId *pId = taosArrayGet(pRemovedTasks, i);
doRemoveTasks(&execInfo, pId); doRemoveTasks(&execInfo, pId);
} }
mDebug("remove invalid stream tasks:%d, remain:%d", (int32_t)taosArrayGetSize(pRemovedTasks), mDebug("remove invalid stream tasks:%d, remain:%d", (int32_t)taosArrayGetSize(pRemovedTasks),
(int32_t) taosArrayGetSize(execInfo.pTaskList)); (int32_t)taosArrayGetSize(execInfo.pTaskList));
int32_t size = taosArrayGetSize(pNodeSnapshot); int32_t size = taosArrayGetSize(pNodeSnapshot);
SArray* pValidNodeEntryList = taosArrayInit(4, sizeof(SNodeEntry)); SArray* pValidNodeEntryList = taosArrayInit(4, sizeof(SNodeEntry));
for(int32_t i = 0; i < taosArrayGetSize(execInfo.pNodeEntryList); ++i) { for(int32_t i = 0; i < taosArrayGetSize(execInfo.pNodeList); ++i) {
SNodeEntry* p = taosArrayGet(execInfo.pNodeEntryList, i); SNodeEntry* p = taosArrayGet(execInfo.pNodeList, i);
for(int32_t j = 0; j < size; ++j) { for (int32_t j = 0; j < size; ++j) {
SNodeEntry* pEntry = taosArrayGet(pNodeSnapshot, j); SNodeEntry *pEntry = taosArrayGet(pNodeSnapshot, j);
if (pEntry->nodeId == p->nodeId) { if (pEntry->nodeId == p->nodeId) {
taosArrayPush(pValidNodeEntryList, p); taosArrayPush(pValidNodeEntryList, p);
break; break;
@ -2335,10 +2356,10 @@ int32_t removeExpirednodeEntryAndTask(SArray *pNodeSnapshot) {
} }
} }
execInfo.pNodeEntryList = taosArrayDestroy(execInfo.pNodeEntryList); taosArrayDestroy(execInfo.pNodeList);
execInfo.pNodeEntryList = pValidNodeEntryList; execInfo.pNodeList = pValidNodeEntryList;
mDebug("remain %d valid node entries", (int32_t) taosArrayGetSize(pValidNodeEntryList)); mDebug("remain %d valid node entries", (int32_t)taosArrayGetSize(pValidNodeEntryList));
taosArrayDestroy(pRemovedTasks); taosArrayDestroy(pRemovedTasks);
return 0; return 0;
} }
@ -2346,6 +2367,7 @@ int32_t removeExpirednodeEntryAndTask(SArray *pNodeSnapshot) {
// this function runs by only one thread, so it is not multi-thread safe // this function runs by only one thread, so it is not multi-thread safe
static int32_t mndProcessNodeCheckReq(SRpcMsg *pMsg) { static int32_t mndProcessNodeCheckReq(SRpcMsg *pMsg) {
int32_t code = 0; int32_t code = 0;
int32_t old = atomic_val_compare_exchange_32(&mndNodeCheckSentinel, 0, 1); int32_t old = atomic_val_compare_exchange_32(&mndNodeCheckSentinel, 0, 1);
if (old != 0) { if (old != 0) {
mDebug("still in checking node change"); mDebug("still in checking node change");
@ -2356,23 +2378,21 @@ static int32_t mndProcessNodeCheckReq(SRpcMsg *pMsg) {
int64_t ts = taosGetTimestampSec(); int64_t ts = taosGetTimestampSec();
SMnode *pMnode = pMsg->info.node; SMnode *pMnode = pMsg->info.node;
if (execInfo.pNodeEntryList == NULL || (taosArrayGetSize(execInfo.pNodeEntryList) == 0)) {
if (execInfo.pNodeEntryList != NULL) {
execInfo.pNodeEntryList = taosArrayDestroy(execInfo.pNodeEntryList);
}
execInfo.pNodeEntryList = extractNodeListFromStream(pMnode);
}
if (taosArrayGetSize(execInfo.pNodeEntryList) == 0) { taosThreadMutexLock(&execInfo.lock);
int32_t numOfNodes = initStreamNodeList(pMnode);
taosThreadMutexUnlock(&execInfo.lock);
if (numOfNodes == 0) {
mDebug("end to do stream task node change checking, no vgroup exists, do nothing"); mDebug("end to do stream task node change checking, no vgroup exists, do nothing");
execInfo.ts = ts; execInfo.ts = ts;
atomic_store_32(&mndNodeCheckSentinel, 0); atomic_store_32(&mndNodeCheckSentinel, 0);
return 0; return 0;
} }
bool allVnodeReady = true; bool allVgroupsReady = true;
SArray *pNodeSnapshot = mndTakeVgroupSnapshot(pMnode, &allVnodeReady); SArray *pNodeSnapshot = mndTakeVgroupSnapshot(pMnode, &allVgroupsReady);
if (!allVnodeReady) { if (!allVgroupsReady) {
taosArrayDestroy(pNodeSnapshot); taosArrayDestroy(pNodeSnapshot);
atomic_store_32(&mndNodeCheckSentinel, 0); atomic_store_32(&mndNodeCheckSentinel, 0);
mWarn("not all vnodes are ready, ignore the exec nodeUpdate check"); mWarn("not all vnodes are ready, ignore the exec nodeUpdate check");
@ -2382,9 +2402,8 @@ static int32_t mndProcessNodeCheckReq(SRpcMsg *pMsg) {
taosThreadMutexLock(&execInfo.lock); taosThreadMutexLock(&execInfo.lock);
removeExpirednodeEntryAndTask(pNodeSnapshot); removeExpirednodeEntryAndTask(pNodeSnapshot);
SVgroupChangeInfo changeInfo = mndFindChangedNodeInfo(pMnode, execInfo.pNodeEntryList, pNodeSnapshot); SVgroupChangeInfo changeInfo = mndFindChangedNodeInfo(pMnode, execInfo.pNodeList, pNodeSnapshot);
if (taosArrayGetSize(changeInfo.pUpdateNodeList) > 0) { if (taosArrayGetSize(changeInfo.pUpdateNodeList) > 0) {
// kill current active checkpoint transaction, since the transaction is vnode wide. // kill current active checkpoint transaction, since the transaction is vnode wide.
doKillActiveCheckpointTrans(pMnode); doKillActiveCheckpointTrans(pMnode);
code = mndProcessVgroupChange(pMnode, &changeInfo); code = mndProcessVgroupChange(pMnode, &changeInfo);
@ -2392,8 +2411,8 @@ static int32_t mndProcessNodeCheckReq(SRpcMsg *pMsg) {
// keep the new vnode snapshot // keep the new vnode snapshot
if (code == TSDB_CODE_SUCCESS || code == TSDB_CODE_ACTION_IN_PROGRESS) { if (code == TSDB_CODE_SUCCESS || code == TSDB_CODE_ACTION_IN_PROGRESS) {
mDebug("create trans successfully, update cached node list"); mDebug("create trans successfully, update cached node list");
taosArrayDestroy(execInfo.pNodeEntryList); taosArrayDestroy(execInfo.pNodeList);
execInfo.pNodeEntryList = pNodeSnapshot; execInfo.pNodeList = pNodeSnapshot;
execInfo.ts = ts; execInfo.ts = ts;
} else { } else {
mDebug("unexpect code during create nodeUpdate trans, code:%s", tstrerror(code)); mDebug("unexpect code during create nodeUpdate trans, code:%s", tstrerror(code));
@ -2419,7 +2438,7 @@ typedef struct SMStreamNodeCheckMsg {
static int32_t mndProcessNodeCheck(SRpcMsg *pReq) { static int32_t mndProcessNodeCheck(SRpcMsg *pReq) {
SMnode *pMnode = pReq->info.node; SMnode *pMnode = pReq->info.node;
SSdb *pSdb = pMnode->pSdb; SSdb * pSdb = pMnode->pSdb;
if (sdbGetSize(pSdb, SDB_STREAM) <= 0) { if (sdbGetSize(pSdb, SDB_STREAM) <= 0) {
return 0; return 0;
} }
@ -2443,7 +2462,7 @@ void keepStreamTasksInBuf(SStreamObj *pStream, SStreamExecInfo *pExecNode) {
SStreamTask *pTask = taosArrayGetP(pLevel, j); SStreamTask *pTask = taosArrayGetP(pLevel, j);
STaskId id = {.streamId = pTask->id.streamId, .taskId = pTask->id.taskId}; STaskId id = {.streamId = pTask->id.streamId, .taskId = pTask->id.taskId};
void *p = taosHashGet(pExecNode->pTaskMap, &id, sizeof(id)); void * p = taosHashGet(pExecNode->pTaskMap, &id, sizeof(id));
if (p == NULL) { if (p == NULL) {
STaskStatusEntry entry = {0}; STaskStatusEntry entry = {0};
streamTaskStatusInit(&entry, pTask); streamTaskStatusInit(&entry, pTask);
@ -2457,7 +2476,7 @@ void keepStreamTasksInBuf(SStreamObj *pStream, SStreamExecInfo *pExecNode) {
} }
} }
void removeStreamTasksInBuf(SStreamObj* pStream, SStreamExecInfo * pExecNode) { void removeStreamTasksInBuf(SStreamObj *pStream, SStreamExecInfo *pExecNode) {
int32_t level = taosArrayGetSize(pStream->tasks); int32_t level = taosArrayGetSize(pStream->tasks);
for (int32_t i = 0; i < level; i++) { for (int32_t i = 0; i < level; i++) {
SArray *pLevel = taosArrayGetP(pStream->tasks, i); SArray *pLevel = taosArrayGetP(pStream->tasks, i);
@ -2467,12 +2486,12 @@ void removeStreamTasksInBuf(SStreamObj* pStream, SStreamExecInfo * pExecNode) {
SStreamTask *pTask = taosArrayGetP(pLevel, j); SStreamTask *pTask = taosArrayGetP(pLevel, j);
STaskId id = {.streamId = pTask->id.streamId, .taskId = pTask->id.taskId}; STaskId id = {.streamId = pTask->id.streamId, .taskId = pTask->id.taskId};
void *p = taosHashGet(pExecNode->pTaskMap, &id, sizeof(id)); void * p = taosHashGet(pExecNode->pTaskMap, &id, sizeof(id));
if (p != NULL) { if (p != NULL) {
taosHashRemove(pExecNode->pTaskMap, &id, sizeof(id)); taosHashRemove(pExecNode->pTaskMap, &id, sizeof(id));
for(int32_t k = 0; k < taosArrayGetSize(pExecNode->pTaskList); ++k) { for (int32_t k = 0; k < taosArrayGetSize(pExecNode->pTaskList); ++k) {
STaskId* pId = taosArrayGet(pExecNode->pTaskList, k); STaskId *pId = taosArrayGet(pExecNode->pTaskList, k);
if (pId->taskId == id.taskId && pId->streamId == id.streamId) { if (pId->taskId == id.taskId && pId->streamId == id.streamId) {
taosArrayRemove(pExecNode->pTaskList, k); taosArrayRemove(pExecNode->pTaskList, k);
mInfo("s-task:0x%x removed from buffer, remain:%d", (int32_t)id.taskId, mInfo("s-task:0x%x removed from buffer, remain:%d", (int32_t)id.taskId,
@ -2480,7 +2499,6 @@ void removeStreamTasksInBuf(SStreamObj* pStream, SStreamExecInfo * pExecNode) {
break; break;
} }
} }
} }
} }
} }
@ -2510,7 +2528,7 @@ STrans *doCreateTrans(SMnode *pMnode, SStreamObj *pStream, const char *name) {
return pTrans; return pTrans;
} }
int32_t createStreamResetStatusTrans(SMnode* pMnode, SStreamObj* pStream) { int32_t createStreamResetStatusTrans(SMnode *pMnode, SStreamObj *pStream) {
STrans *pTrans = doCreateTrans(pMnode, pStream, "stream-task-reset"); STrans *pTrans = doCreateTrans(pMnode, pStream, "stream-task-reset");
if (pTrans == NULL) { if (pTrans == NULL) {
return terrno; return terrno;
@ -2527,7 +2545,7 @@ int32_t createStreamResetStatusTrans(SMnode* pMnode, SStreamObj* pStream) {
SStreamTask *pTask = taosArrayGetP(pLevel, k); SStreamTask *pTask = taosArrayGetP(pLevel, k);
// todo extract method, with pause stream task // todo extract method, with pause stream task
SVResetStreamTaskReq* pReq = taosMemoryCalloc(1, sizeof(SVResetStreamTaskReq)); SVResetStreamTaskReq *pReq = taosMemoryCalloc(1, sizeof(SVResetStreamTaskReq));
if (pReq == NULL) { if (pReq == NULL) {
terrno = TSDB_CODE_OUT_OF_MEMORY; terrno = TSDB_CODE_OUT_OF_MEMORY;
mError("failed to malloc in reset stream, size:%" PRIzu ", code:%s", sizeof(SVResetStreamTaskReq), mError("failed to malloc in reset stream, size:%" PRIzu ", code:%s", sizeof(SVResetStreamTaskReq),
@ -2573,9 +2591,9 @@ int32_t createStreamResetStatusTrans(SMnode* pMnode, SStreamObj* pStream) {
int32_t doKillActiveCheckpointTrans(SMnode *pMnode) { int32_t doKillActiveCheckpointTrans(SMnode *pMnode) {
int32_t transId = 0; int32_t transId = 0;
SSdb *pSdb = pMnode->pSdb; SSdb * pSdb = pMnode->pSdb;
STrans *pTrans = NULL; STrans *pTrans = NULL;
void *pIter = NULL; void * pIter = NULL;
while (1) { while (1) {
pIter = sdbFetch(pSdb, SDB_TRANS, pIter, (void **)&pTrans); pIter = sdbFetch(pSdb, SDB_TRANS, pIter, (void **)&pTrans);
@ -2594,7 +2612,7 @@ int32_t doKillActiveCheckpointTrans(SMnode *pMnode) {
} }
if (transId == 0) { if (transId == 0) {
mError("failed to find the checkpoint trans, reset not executed"); mDebug("failed to find the checkpoint trans, reset not executed");
return TSDB_CODE_SUCCESS; return TSDB_CODE_SUCCESS;
} }
@ -2606,13 +2624,13 @@ int32_t doKillActiveCheckpointTrans(SMnode *pMnode) {
return TSDB_CODE_SUCCESS; return TSDB_CODE_SUCCESS;
} }
int32_t mndResetFromCheckpoint(SMnode* pMnode) { int32_t mndResetFromCheckpoint(SMnode *pMnode) {
doKillActiveCheckpointTrans(pMnode); doKillActiveCheckpointTrans(pMnode);
// set all tasks status to be normal, refactor later to be stream level, instead of vnode level. // set all tasks status to be normal, refactor later to be stream level, instead of vnode level.
SSdb *pSdb = pMnode->pSdb; SSdb * pSdb = pMnode->pSdb;
SStreamObj *pStream = NULL; SStreamObj *pStream = NULL;
void *pIter = NULL; void * pIter = NULL;
while (1) { while (1) {
pIter = sdbFetch(pSdb, SDB_STREAM, pIter, (void **)&pStream); pIter = sdbFetch(pSdb, SDB_STREAM, pIter, (void **)&pStream);
if (pIter == NULL) { if (pIter == NULL) {
@ -2631,18 +2649,20 @@ int32_t mndResetFromCheckpoint(SMnode* pMnode) {
return 0; return 0;
} }
int32_t setNodeEpsetExpiredFlag(const SArray* pNodeList) { int32_t setNodeEpsetExpiredFlag(const SArray *pNodeList) {
int32_t num = taosArrayGetSize(pNodeList); int32_t num = taosArrayGetSize(pNodeList);
mInfo("set node expired for %d nodes", num);
for (int k = 0; k < num; ++k) { for (int k = 0; k < num; ++k) {
int32_t* pVgId = taosArrayGet(pNodeList, k); int32_t* pVgId = taosArrayGet(pNodeList, k);
mInfo("set node expired for nodeId:%d, total:%d", *pVgId, num);
int32_t numOfNodes = taosArrayGetSize(execInfo.pNodeEntryList); int32_t numOfNodes = taosArrayGetSize(execInfo.pNodeList);
for (int i = 0; i < numOfNodes; ++i) { for (int i = 0; i < numOfNodes; ++i) {
SNodeEntry* pNodeEntry = taosArrayGet(execInfo.pNodeEntryList, i); SNodeEntry* pNodeEntry = taosArrayGet(execInfo.pNodeList, i);
if (pNodeEntry->nodeId == *pVgId) { if (pNodeEntry->nodeId == *pVgId) {
mInfo("vgId:%d expired in stream task, needs update nodeEp", *pVgId); mInfo("vgId:%d expired for some stream tasks, needs update nodeEp", *pVgId);
pNodeEntry->stageUpdated = true; pNodeEntry->stageUpdated = true;
break; break;
} }
@ -2653,11 +2673,10 @@ int32_t setNodeEpsetExpiredFlag(const SArray* pNodeList) {
} }
static void updateStageInfo(STaskStatusEntry* pTaskEntry, int64_t stage) { static void updateStageInfo(STaskStatusEntry* pTaskEntry, int64_t stage) {
int32_t numOfNodes = taosArrayGetSize(execInfo.pNodeEntryList); int32_t numOfNodes = taosArrayGetSize(execInfo.pNodeList);
for(int32_t j = 0; j < numOfNodes; ++j) { for(int32_t j = 0; j < numOfNodes; ++j) {
SNodeEntry* pNodeEntry = taosArrayGet(execInfo.pNodeEntryList, j); SNodeEntry* pNodeEntry = taosArrayGet(execInfo.pNodeList, j);
if (pNodeEntry->nodeId == pTaskEntry->nodeId) { if (pNodeEntry->nodeId == pTaskEntry->nodeId) {
mInfo("vgId:%d stage updated from %"PRId64 " to %"PRId64 ", nodeUpdate trigger by s-task:0x%" PRIx64, pTaskEntry->nodeId, mInfo("vgId:%d stage updated from %"PRId64 " to %"PRId64 ", nodeUpdate trigger by s-task:0x%" PRIx64, pTaskEntry->nodeId,
pTaskEntry->stage, stage, pTaskEntry->id.taskId); pTaskEntry->stage, stage, pTaskEntry->id.taskId);
@ -2669,7 +2688,7 @@ static void updateStageInfo(STaskStatusEntry* pTaskEntry, int64_t stage) {
} }
int32_t mndProcessStreamHb(SRpcMsg *pReq) { int32_t mndProcessStreamHb(SRpcMsg *pReq) {
SMnode *pMnode = pReq->info.node; SMnode * pMnode = pReq->info.node;
SStreamHbMsg req = {0}; SStreamHbMsg req = {0};
bool checkpointFailed = false; bool checkpointFailed = false;
@ -2688,12 +2707,20 @@ int32_t mndProcessStreamHb(SRpcMsg *pReq) {
mTrace("receive stream-meta hb from vgId:%d, active numOfTasks:%d", req.vgId, req.numOfTasks); mTrace("receive stream-meta hb from vgId:%d, active numOfTasks:%d", req.vgId, req.numOfTasks);
taosThreadMutexLock(&execInfo.lock); taosThreadMutexLock(&execInfo.lock);
// extract stream task list
int32_t numOfExisted = taosHashGetSize(execInfo.pTaskMap); int32_t numOfExisted = taosHashGetSize(execInfo.pTaskMap);
if (numOfExisted == 0) { if (numOfExisted == 0) {
doExtractTasksFromStream(pMnode); doExtractTasksFromStream(pMnode);
} }
setNodeEpsetExpiredFlag(req.pUpdateNodes); initStreamNodeList(pMnode);
int32_t numOfUpdated = taosArrayGetSize(req.pUpdateNodes);
if (numOfUpdated > 0) {
mDebug("%d stream node(s) need updated from report of hbMsg(vgId:%d)", numOfUpdated, req.vgId);
setNodeEpsetExpiredFlag(req.pUpdateNodes);
}
bool snodeChanged = false; bool snodeChanged = false;
for (int32_t i = 0; i < req.numOfTasks; ++i) { for (int32_t i = 0; i < req.numOfTasks; ++i) {
@ -2724,15 +2751,15 @@ int32_t mndProcessStreamHb(SRpcMsg *pReq) {
pTaskEntry->status = p->status; pTaskEntry->status = p->status;
if (p->status != TASK_STATUS__READY) { if (p->status != TASK_STATUS__READY) {
mDebug("received s-task:0x%"PRIx64" not in ready status:%s", p->id.taskId, streamTaskGetStatusStr(p->status)); mDebug("received s-task:0x%" PRIx64 " not in ready status:%s", p->id.taskId, streamTaskGetStatusStr(p->status));
} }
} }
// current checkpoint is failed, rollback from the checkpoint trans // current checkpoint is failed, rollback from the checkpoint trans
// kill the checkpoint trans and then set all tasks status to be normal // kill the checkpoint trans and then set all tasks status to be normal
if (checkpointFailed && activeCheckpointId != 0) { if (checkpointFailed && activeCheckpointId != 0) {
bool allReady = true; bool allReady = true;
SArray* p = mndTakeVgroupSnapshot(pMnode, &allReady); SArray *p = mndTakeVgroupSnapshot(pMnode, &allReady);
taosArrayDestroy(p); taosArrayDestroy(p);
if (allReady || snodeChanged) { if (allReady || snodeChanged) {

View File

@ -1561,7 +1561,11 @@ static int32_t mndProcessCreateUserReq(SRpcMsg *pReq) {
code = mndCreateUser(pMnode, pOperUser->acct, &createReq, pReq); code = mndCreateUser(pMnode, pOperUser->acct, &createReq, pReq);
if (code == 0) code = TSDB_CODE_ACTION_IN_PROGRESS; if (code == 0) code = TSDB_CODE_ACTION_IN_PROGRESS;
auditRecord(pReq, pMnode->clusterId, "createUser", "", createReq.user, createReq.sql, createReq.sqlLen); char detail[1000] = {0};
sprintf(detail, "enable:%d, superUser:%d, sysInfo:%d, password:xxx",
createReq.enable, createReq.superUser, createReq.sysInfo);
auditRecord(pReq, pMnode->clusterId, "createUser", "", createReq.user, detail, strlen(detail));
_OVER: _OVER:
if (code != 0 && code != TSDB_CODE_ACTION_IN_PROGRESS) { if (code != 0 && code != TSDB_CODE_ACTION_IN_PROGRESS) {

View File

@ -108,6 +108,7 @@ struct SRSmaStat {
int64_t refId; // shared by fetch tasks int64_t refId; // shared by fetch tasks
volatile int64_t nBufItems; // number of items in queue buffer volatile int64_t nBufItems; // number of items in queue buffer
SRWLatch lock; // r/w lock for rsma fs(e.g. qtaskinfo) SRWLatch lock; // r/w lock for rsma fs(e.g. qtaskinfo)
volatile int32_t execStat; // 0 succeed, other failed
volatile int32_t nFetchAll; // active number of fetch all volatile int32_t nFetchAll; // active number of fetch all
volatile int8_t triggerStat; // shared by fetch tasks volatile int8_t triggerStat; // shared by fetch tasks
volatile int8_t commitStat; // 0 not in committing, 1 in committing volatile int8_t commitStat; // 0 not in committing, 1 in committing
@ -115,6 +116,7 @@ struct SRSmaStat {
SRSmaFS fs; // for recovery/snapshot r/w SRSmaFS fs; // for recovery/snapshot r/w
SHashObj *infoHash; // key: suid, value: SRSmaInfo SHashObj *infoHash; // key: suid, value: SRSmaInfo
tsem_t notEmpty; // has items in queue buffer tsem_t notEmpty; // has items in queue buffer
SArray *blocks; // SArray<SSDataBlock>
}; };
struct SSmaStat { struct SSmaStat {
@ -136,13 +138,18 @@ struct SSmaStat {
#define RSMA_FS_LOCK(r) (&(r)->lock) #define RSMA_FS_LOCK(r) (&(r)->lock)
struct SRSmaInfoItem { struct SRSmaInfoItem {
int8_t level : 4; int8_t level;
int8_t fetchLevel : 4; int8_t fetchLevel;
int8_t triggerStat; int8_t triggerStat;
uint16_t nScanned; int32_t nScanned;
int32_t maxDelay; // ms int32_t streamFlushed : 1;
tmr_h tmrId; int32_t maxDelay : 31; // ms
void *pStreamState; int64_t submitReqVer;
int64_t fetchResultVer;
tmr_h tmrId;
void *pStreamState;
void *pStreamTask; // SStreamTask
SArray *pResList;
}; };
struct SRSmaInfo { struct SRSmaInfo {
@ -160,12 +167,10 @@ struct SRSmaInfo {
STaosQall *qall; // buffer qall of SubmitReq STaosQall *qall; // buffer qall of SubmitReq
}; };
#define RSMA_INFO_HEAD_LEN offsetof(SRSmaInfo, items) #define RSMA_INFO_IS_DEL(r) ((r)->delFlag == 1)
#define RSMA_INFO_IS_DEL(r) ((r)->delFlag == 1) #define RSMA_INFO_SET_DEL(r) ((r)->delFlag = 1)
#define RSMA_INFO_SET_DEL(r) ((r)->delFlag = 1) #define RSMA_INFO_QTASK(r, i) ((r)->taskInfo[i])
#define RSMA_INFO_QTASK(r, i) ((r)->taskInfo[i]) #define RSMA_INFO_ITEM(r, i) (&(r)->items[i])
#define RSMA_INFO_IQTASK(r, i) ((r)->iTaskInfo[i])
#define RSMA_INFO_ITEM(r, i) (&(r)->items[i])
enum { enum {
TASK_TRIGGER_STAT_INIT = 0, TASK_TRIGGER_STAT_INIT = 0,
@ -214,11 +219,11 @@ static FORCE_INLINE void tdUnRefSmaStat(SSma *pSma, SSmaStat *pStat) {
int32_t smaPreClose(SSma *pSma); int32_t smaPreClose(SSma *pSma);
// rsma // rsma
void *tdFreeRSmaInfo(SSma *pSma, SRSmaInfo *pInfo, bool isDeepFree); void *tdFreeRSmaInfo(SSma *pSma, SRSmaInfo *pInfo);
int32_t tdRSmaRestore(SSma *pSma, int8_t type, int64_t committedVer, int8_t rollback); int32_t tdRSmaRestore(SSma *pSma, int8_t type, int64_t committedVer, int8_t rollback);
int32_t tdRSmaProcessCreateImpl(SSma *pSma, SRSmaParam *param, int64_t suid, const char *tbName); int32_t tdRSmaProcessCreateImpl(SSma *pSma, SRSmaParam *param, int64_t suid, const char *tbName);
int32_t tdRSmaProcessExecImpl(SSma *pSma, ERsmaExecType type); int32_t tdRSmaProcessExecImpl(SSma *pSma, ERsmaExecType type);
// int32_t tdRSmaPersistExecImpl(SRSmaStat *pRSmaStat, SHashObj *pInfoHash); int32_t tdRSmaPersistExecImpl(SRSmaStat *pRSmaStat, SHashObj *pInfoHash);
int32_t tdRSmaProcessRestoreImpl(SSma *pSma, int8_t type, int64_t qtaskFileVer, int8_t rollback); int32_t tdRSmaProcessRestoreImpl(SSma *pSma, int8_t type, int64_t qtaskFileVer, int8_t rollback);
void tdRSmaQTaskInfoGetFullPath(SVnode *pVnode, tb_uid_t suid, int8_t level, STfs *pTfs, char *outputName); void tdRSmaQTaskInfoGetFullPath(SVnode *pVnode, tb_uid_t suid, int8_t level, STfs *pTfs, char *outputName);

View File

@ -147,7 +147,7 @@ int32_t tqOffsetDelete(STqOffsetStore* pStore, const char* subscribeKey)
int32_t tqOffsetCommitFile(STqOffsetStore* pStore); int32_t tqOffsetCommitFile(STqOffsetStore* pStore);
// tqSink // tqSink
int32_t tqBuildDeleteReq(const char* stbFullName, const SSDataBlock* pDataBlock, SBatchDeleteReq* deleteReq, int32_t tqBuildDeleteReq(STQ* pTq, const char* stbFullName, const SSDataBlock* pDataBlock, SBatchDeleteReq* deleteReq,
const char* pIdStr); const char* pIdStr);
void tqSinkDataIntoDstTable(SStreamTask* pTask, void* vnode, void* data); void tqSinkDataIntoDstTable(SStreamTask* pTask, void* vnode, void* data);
@ -160,7 +160,7 @@ int32_t tqResetStreamTaskStatus(STQ* pTq);
int32_t tqStopStreamTasks(STQ* pTq); int32_t tqStopStreamTasks(STQ* pTq);
// tq util // tq util
int32_t extractDelDataBlock(const void* pData, int32_t len, int64_t ver, SStreamRefDataBlock** pRefBlock); int32_t extractDelDataBlock(const void* pData, int32_t len, int64_t ver, void** pRefBlock, int32_t type);
int32_t tqExtractDataForMq(STQ* pTq, STqHandle* pHandle, const SMqPollReq* pRequest, SRpcMsg* pMsg); int32_t tqExtractDataForMq(STQ* pTq, STqHandle* pHandle, const SMqPollReq* pRequest, SRpcMsg* pMsg);
int32_t tqDoSendDataRsp(const SRpcHandleInfo* pRpcHandleInfo, const SMqDataRsp* pRsp, int32_t epoch, int64_t consumerId, int32_t tqDoSendDataRsp(const SRpcHandleInfo* pRpcHandleInfo, const SMqDataRsp* pRsp, int32_t epoch, int64_t consumerId,
int32_t type, int64_t sver, int64_t ever); int32_t type, int64_t sver, int64_t ever);

View File

@ -209,6 +209,7 @@ int32_t tsdbBegin(STsdb* pTsdb);
// int32_t tsdbCommit(STsdb* pTsdb, SCommitInfo* pInfo); // int32_t tsdbCommit(STsdb* pTsdb, SCommitInfo* pInfo);
int32_t tsdbCacheCommit(STsdb* pTsdb); int32_t tsdbCacheCommit(STsdb* pTsdb);
int32_t tsdbCompact(STsdb* pTsdb, SCompactInfo* pInfo); int32_t tsdbCompact(STsdb* pTsdb, SCompactInfo* pInfo);
int32_t tsdbRetention(STsdb *tsdb, int64_t now, int32_t sync);
// int32_t tsdbFinishCommit(STsdb* pTsdb); // int32_t tsdbFinishCommit(STsdb* pTsdb);
// int32_t tsdbRollbackCommit(STsdb* pTsdb); // int32_t tsdbRollbackCommit(STsdb* pTsdb);
int tsdbScanAndConvertSubmitMsg(STsdb* pTsdb, SSubmitReq2* pMsg); int tsdbScanAndConvertSubmitMsg(STsdb* pTsdb, SSubmitReq2* pMsg);
@ -279,13 +280,14 @@ int32_t smaPrepareAsyncCommit(SSma* pSma);
int32_t smaCommit(SSma* pSma, SCommitInfo* pInfo); int32_t smaCommit(SSma* pSma, SCommitInfo* pInfo);
int32_t smaFinishCommit(SSma* pSma); int32_t smaFinishCommit(SSma* pSma);
int32_t smaPostCommit(SSma* pSma); int32_t smaPostCommit(SSma* pSma);
int32_t smaDoRetention(SSma* pSma, int64_t now); int32_t smaRetention(SSma* pSma, int64_t now);
int32_t tdProcessTSmaCreate(SSma* pSma, int64_t version, const char* msg); int32_t tdProcessTSmaCreate(SSma* pSma, int64_t version, const char* msg);
int32_t tdProcessTSmaInsert(SSma* pSma, int64_t indexUid, const char* msg); int32_t tdProcessTSmaInsert(SSma* pSma, int64_t indexUid, const char* msg);
int32_t tdProcessRSmaCreate(SSma* pSma, SVCreateStbReq* pReq); int32_t tdProcessRSmaCreate(SSma* pSma, SVCreateStbReq* pReq);
int32_t tdProcessRSmaSubmit(SSma* pSma, int64_t version, void* pReq, void* pMsg, int32_t len, int32_t inputType); int32_t tdProcessRSmaSubmit(SSma* pSma, int64_t version, void* pReq, void* pMsg, int32_t len);
int32_t tdProcessRSmaDelete(SSma* pSma, int64_t version, void* pReq, void* pMsg, int32_t len);
int32_t tdProcessRSmaDrop(SSma* pSma, SVDropStbReq* pReq); int32_t tdProcessRSmaDrop(SSma* pSma, SVDropStbReq* pReq);
int32_t tdFetchTbUidList(SSma* pSma, STbUidStore** ppStore, tb_uid_t suid, tb_uid_t uid); int32_t tdFetchTbUidList(SSma* pSma, STbUidStore** ppStore, tb_uid_t suid, tb_uid_t uid);
int32_t tdUpdateTbUidList(SSma* pSma, STbUidStore* pUidStore, bool isAdd); int32_t tdUpdateTbUidList(SSma* pSma, STbUidStore* pUidStore, bool isAdd);

View File

@ -156,10 +156,10 @@ static int32_t tdProcessRSmaAsyncPreCommitImpl(SSma *pSma, bool isCommit) {
nLoops = 0; nLoops = 0;
while (1) { while (1) {
if (atomic_load_32(&pRSmaStat->nFetchAll) <= 0) { if (atomic_load_32(&pRSmaStat->nFetchAll) <= 0) {
smaDebug("vgId:%d, rsma commit:%d, fetch tasks are all finished", SMA_VID(pSma), isCommit); smaDebug("vgId:%d, rsma commit, type:%d, fetch tasks are all finished", SMA_VID(pSma), isCommit);
break; break;
} else { } else {
smaDebug("vgId:%d, rsma commit%d, fetch tasks are not all finished yet", SMA_VID(pSma), isCommit); smaDebug("vgId:%d, rsma commit, type:%d, fetch tasks are not all finished yet", SMA_VID(pSma), isCommit);
} }
TD_SMA_LOOPS_CHECK(nLoops, 1000); TD_SMA_LOOPS_CHECK(nLoops, 1000);
} }
@ -169,22 +169,24 @@ static int32_t tdProcessRSmaAsyncPreCommitImpl(SSma *pSma, bool isCommit) {
* 1) This is high cost task and should not put in asyncPreCommit originally. * 1) This is high cost task and should not put in asyncPreCommit originally.
* 2) But, if put in asyncCommit, would trigger taskInfo cloning frequently. * 2) But, if put in asyncCommit, would trigger taskInfo cloning frequently.
*/ */
smaInfo("vgId:%d, rsma commit:%d, wait for all items to be consumed, TID:%p", SMA_VID(pSma), isCommit, smaInfo("vgId:%d, rsma commit, type:%d, wait for all items to be consumed, TID:%p", SMA_VID(pSma), isCommit,
(void *)taosGetSelfPthreadId()); (void *)taosGetSelfPthreadId());
nLoops = 0; nLoops = 0;
while (atomic_load_64(&pRSmaStat->nBufItems) > 0) { while (atomic_load_64(&pRSmaStat->nBufItems) > 0) {
TD_SMA_LOOPS_CHECK(nLoops, 1000); TD_SMA_LOOPS_CHECK(nLoops, 1000);
} }
smaInfo("vgId:%d, rsma commit, all items are consumed, TID:%p", SMA_VID(pSma), (void *)taosGetSelfPthreadId());
if (!isCommit) goto _exit; if (!isCommit) goto _exit;
// code = tdRSmaPersistExecImpl(pRSmaStat, RSMA_INFO_HASH(pRSmaStat)); code = atomic_load_32(&pRSmaStat->execStat);
TSDB_CHECK_CODE(code, lino, _exit);
code = tdRSmaPersistExecImpl(pRSmaStat, RSMA_INFO_HASH(pRSmaStat));
TSDB_CHECK_CODE(code, lino, _exit); TSDB_CHECK_CODE(code, lino, _exit);
smaInfo("vgId:%d, rsma commit, operator state committed, TID:%p", SMA_VID(pSma), (void *)taosGetSelfPthreadId()); smaInfo("vgId:%d, rsma commit, operator state committed, TID:%p", SMA_VID(pSma), (void *)taosGetSelfPthreadId());
smaInfo("vgId:%d, rsma commit, all items are consumed, TID:%p", SMA_VID(pSma), (void *)taosGetSelfPthreadId());
// all rsma results are written completely // all rsma results are written completely
STsdb *pTsdb = NULL; STsdb *pTsdb = NULL;
if ((pTsdb = VND_RSMA1(pSma->pVnode))) { if ((pTsdb = VND_RSMA1(pSma->pVnode))) {
@ -215,10 +217,7 @@ static int32_t tdProcessRSmaAsyncCommitImpl(SSma *pSma, SCommitInfo *pInfo) {
int32_t lino = 0; int32_t lino = 0;
SVnode *pVnode = pSma->pVnode; SVnode *pVnode = pSma->pVnode;
SSmaEnv *pSmaEnv = SMA_RSMA_ENV(pSma); if (!SMA_RSMA_ENV(pSma)) goto _exit;
if (!pSmaEnv) {
goto _exit;
}
code = tsdbCommitBegin(VND_RSMA1(pVnode), pInfo); code = tsdbCommitBegin(VND_RSMA1(pVnode), pInfo);
TSDB_CHECK_CODE(code, lino, _exit); TSDB_CHECK_CODE(code, lino, _exit);

View File

@ -179,7 +179,7 @@ static void tRSmaInfoHashFreeNode(void *data) {
if ((pItem = RSMA_INFO_ITEM((SRSmaInfo *)pRSmaInfo, 1)) && pItem->level) { if ((pItem = RSMA_INFO_ITEM((SRSmaInfo *)pRSmaInfo, 1)) && pItem->level) {
taosHashRemove(smaMgmt.refHash, &pItem, POINTER_BYTES); taosHashRemove(smaMgmt.refHash, &pItem, POINTER_BYTES);
} }
tdFreeRSmaInfo(pRSmaInfo->pSma, pRSmaInfo, true); tdFreeRSmaInfo(pRSmaInfo->pSma, pRSmaInfo);
} }
} }
@ -209,6 +209,12 @@ static int32_t tdInitSmaStat(SSmaStat **pSmaStat, int8_t smaType, const SSma *pS
pRSmaStat->pSma = (SSma *)pSma; pRSmaStat->pSma = (SSma *)pSma;
atomic_store_8(RSMA_TRIGGER_STAT(pRSmaStat), TASK_TRIGGER_STAT_INIT); atomic_store_8(RSMA_TRIGGER_STAT(pRSmaStat), TASK_TRIGGER_STAT_INIT);
tsem_init(&pRSmaStat->notEmpty, 0, 0); tsem_init(&pRSmaStat->notEmpty, 0, 0);
if (!(pRSmaStat->blocks = taosArrayInit(1, sizeof(SSDataBlock)))) {
code = TSDB_CODE_OUT_OF_MEMORY;
TSDB_CHECK_CODE(code, lino, _exit);
}
SSDataBlock datablock = {.info.type = STREAM_CHECKPOINT};
taosArrayPush(pRSmaStat->blocks, &datablock);
// init smaMgmt // init smaMgmt
smaInit(); smaInit();
@ -290,6 +296,7 @@ static void tdDestroyRSmaStat(void *pRSmaStat) {
// step 5: free pStat // step 5: free pStat
tsem_destroy(&(pStat->notEmpty)); tsem_destroy(&(pStat->notEmpty));
taosArrayDestroy(pStat->blocks);
taosMemoryFreeClear(pStat); taosMemoryFreeClear(pStat);
} }
} }

File diff suppressed because it is too large Load Diff

View File

@ -188,7 +188,8 @@ int32_t smaBlockToSubmit(SVnode *pVnode, const SArray *pBlocks, const STSchema *
if (pDataBlock->info.type == STREAM_DELETE_RESULT) { if (pDataBlock->info.type == STREAM_DELETE_RESULT) {
pDeleteReq->suid = suid; pDeleteReq->suid = suid;
pDeleteReq->deleteReqs = taosArrayInit(0, sizeof(SSingleDeleteReq)); pDeleteReq->deleteReqs = taosArrayInit(0, sizeof(SSingleDeleteReq));
tqBuildDeleteReq(stbFullName, pDataBlock, pDeleteReq, ""); code = tqBuildDeleteReq(pVnode->pTq, stbFullName, pDataBlock, pDeleteReq, "");
TSDB_CHECK_CODE(code, lino, _exit);
continue; continue;
} }

View File

@ -1668,7 +1668,7 @@ int32_t tqProcessTaskCheckPointSourceReq(STQ* pTq, SRpcMsg* pMsg, SRpcMsg* pRsp)
SStreamCheckpointSourceReq req = {0}; SStreamCheckpointSourceReq req = {0};
if (!vnodeIsRoleLeader(pTq->pVnode)) { if (!vnodeIsRoleLeader(pTq->pVnode)) {
tqDebug("vgId:%d not leader, ignore checkpoint-source msg", vgId); tqDebug("vgId:%d not leader, ignore checkpoint-source msg, s-task:0x%x", vgId, req.taskId);
SRpcMsg rsp = {0}; SRpcMsg rsp = {0};
buildCheckpointSourceRsp(&req, &pMsg->info, &rsp, 0); buildCheckpointSourceRsp(&req, &pMsg->info, &rsp, 0);
tmsgSendRsp(&rsp); // error occurs tmsgSendRsp(&rsp); // error occurs
@ -1676,7 +1676,7 @@ int32_t tqProcessTaskCheckPointSourceReq(STQ* pTq, SRpcMsg* pMsg, SRpcMsg* pRsp)
} }
if (!pTq->pVnode->restored) { if (!pTq->pVnode->restored) {
tqDebug("vgId:%d checkpoint-source msg received during restoring, ignore it", vgId); tqDebug("vgId:%d checkpoint-source msg received during restoring, s-task:0x%x ignore it", vgId, req.taskId);
SRpcMsg rsp = {0}; SRpcMsg rsp = {0};
buildCheckpointSourceRsp(&req, &pMsg->info, &rsp, 0); buildCheckpointSourceRsp(&req, &pMsg->info, &rsp, 0);
tmsgSendRsp(&rsp); // error occurs tmsgSendRsp(&rsp); // error occurs
@ -1696,7 +1696,6 @@ int32_t tqProcessTaskCheckPointSourceReq(STQ* pTq, SRpcMsg* pMsg, SRpcMsg* pRsp)
} }
tDecoderClear(&decoder); tDecoderClear(&decoder);
// todo handle failure to reset from checkpoint procedure
SStreamTask* pTask = streamMetaAcquireTask(pMeta, req.streamId, req.taskId); SStreamTask* pTask = streamMetaAcquireTask(pMeta, req.streamId, req.taskId);
if (pTask == NULL) { if (pTask == NULL) {
tqError("vgId:%d failed to find s-task:0x%x, ignore checkpoint msg. it may have been destroyed already", vgId, tqError("vgId:%d failed to find s-task:0x%x, ignore checkpoint msg. it may have been destroyed already", vgId,
@ -1707,7 +1706,6 @@ int32_t tqProcessTaskCheckPointSourceReq(STQ* pTq, SRpcMsg* pMsg, SRpcMsg* pRsp)
return TSDB_CODE_SUCCESS; return TSDB_CODE_SUCCESS;
} }
// todo handle failure to reset from checkpoint procedure
// downstream not ready, current the stream tasks are not all ready. Ignore this checkpoint req. // downstream not ready, current the stream tasks are not all ready. Ignore this checkpoint req.
if (pTask->status.downstreamReady != 1) { if (pTask->status.downstreamReady != 1) {
pTask->chkInfo.failedId = req.checkpointId; // record the latest failed checkpoint id pTask->chkInfo.failedId = req.checkpointId; // record the latest failed checkpoint id
@ -1728,17 +1726,32 @@ int32_t tqProcessTaskCheckPointSourceReq(STQ* pTq, SRpcMsg* pMsg, SRpcMsg* pRsp)
ETaskStatus status = streamTaskGetStatus(pTask, NULL); ETaskStatus status = streamTaskGetStatus(pTask, NULL);
if (status == TASK_STATUS__HALT || status == TASK_STATUS__PAUSE) { if (status == TASK_STATUS__HALT || status == TASK_STATUS__PAUSE) {
qError("s-task:%s not ready for checkpoint, since it is halt, ignore this checkpoint:%" PRId64 ", set it failure", tqError("s-task:%s not ready for checkpoint, since it is halt, ignore this checkpoint:%" PRId64 ", set it failure",
pTask->id.idStr, req.checkpointId); pTask->id.idStr, req.checkpointId);
taosThreadMutexUnlock(&pTask->lock);
taosThreadMutexUnlock(&pTask->lock);
streamMetaReleaseTask(pMeta, pTask); streamMetaReleaseTask(pMeta, pTask);
SRpcMsg rsp = {0}; SRpcMsg rsp = {0};
buildCheckpointSourceRsp(&req, &pMsg->info, &rsp, 0); buildCheckpointSourceRsp(&req, &pMsg->info, &rsp, 0);
tmsgSendRsp(&rsp); // error occurs tmsgSendRsp(&rsp); // error occurs
return TSDB_CODE_SUCCESS; return TSDB_CODE_SUCCESS;
} }
// check if the checkpoint msg already sent or not.
if (status == TASK_STATUS__CK) {
ASSERT(pTask->checkpointingId == req.checkpointId);
tqWarn("s-task:%s recv checkpoint-source msg again checkpointId:%" PRId64
" already received, ignore this msg and continue process checkpoint",
pTask->id.idStr, pTask->checkpointingId);
taosThreadMutexUnlock(&pTask->lock);
streamMetaReleaseTask(pMeta, pTask);
return TSDB_CODE_SUCCESS;
}
streamProcessCheckpointSourceReq(pTask, &req); streamProcessCheckpointSourceReq(pTask, &req);
taosThreadMutexUnlock(&pTask->lock); taosThreadMutexUnlock(&pTask->lock);
@ -1924,8 +1937,59 @@ int32_t tqProcessTaskUpdateReq(STQ* pTq, SRpcMsg* pMsg) {
pMeta->startInfo.tasksWillRestart = 0; pMeta->startInfo.tasksWillRestart = 0;
streamMetaWUnLock(pMeta); streamMetaWUnLock(pMeta);
} else { } else {
streamMetaWUnLock(pMeta); tqDebug("vgId:%d all %d task(s) nodeEp updated and closed", vgId, numOfTasks);
#if 1
tqStartStreamTaskAsync(pTq, true); tqStartStreamTaskAsync(pTq, true);
streamMetaWUnLock(pMeta);
#else
streamMetaWUnLock(pMeta);
// For debug purpose.
// the following procedure consume many CPU resource, result in the re-election of leader
// with high probability. So we employ it as a test case for the stream processing framework, with
// checkpoint/restart/nodeUpdate etc.
while(1) {
int32_t startVal = atomic_val_compare_exchange_32(&pMeta->startInfo.taskStarting, 0, 1);
if (startVal == 0) {
break;
}
tqDebug("vgId:%d in start stream tasks procedure, wait for 500ms and recheck", vgId);
taosMsleep(500);
}
while (streamMetaTaskInTimer(pMeta)) {
tqDebug("vgId:%d some tasks in timer, wait for 100ms and recheck", pMeta->vgId);
taosMsleep(100);
}
streamMetaWLock(pMeta);
int32_t code = streamMetaReopen(pMeta);
if (code != 0) {
tqError("vgId:%d failed to reopen stream meta", vgId);
streamMetaWUnLock(pMeta);
taosArrayDestroy(req.pNodeList);
return -1;
}
if (streamMetaLoadAllTasks(pTq->pStreamMeta) < 0) {
tqError("vgId:%d failed to load stream tasks", vgId);
streamMetaWUnLock(pMeta);
taosArrayDestroy(req.pNodeList);
return -1;
}
if (vnodeIsRoleLeader(pTq->pVnode) && !tsDisableStream) {
tqInfo("vgId:%d start all stream tasks after all being updated", vgId);
tqResetStreamTaskStatus(pTq);
tqStartStreamTaskAsync(pTq, false);
} else {
tqInfo("vgId:%d, follower node not start stream tasks", vgId);
}
streamMetaWUnLock(pMeta);
#endif
} }
} }

View File

@ -343,7 +343,7 @@ int32_t extractMsgFromWal(SWalReader* pReader, void** pItem, int64_t maxVer, con
void* pBody = POINTER_SHIFT(pCont->body, sizeof(SMsgHead)); void* pBody = POINTER_SHIFT(pCont->body, sizeof(SMsgHead));
int32_t len = pCont->bodyLen - sizeof(SMsgHead); int32_t len = pCont->bodyLen - sizeof(SMsgHead);
code = extractDelDataBlock(pBody, len, ver, (SStreamRefDataBlock**)pItem); code = extractDelDataBlock(pBody, len, ver, (void**)pItem, 0);
if (code == TSDB_CODE_SUCCESS) { if (code == TSDB_CODE_SUCCESS) {
if (*pItem == NULL) { if (*pItem == NULL) {
tqDebug("s-task:%s empty delete msg, discard it, len:%d, ver:%" PRId64, id, len, ver); tqDebug("s-task:%s empty delete msg, discard it, len:%d, ver:%" PRId64, id, len, ver);

View File

@ -43,7 +43,7 @@ static SArray* createDefaultTagColName();
static void setCreateTableMsgTableName(SVCreateTbReq* pCreateTableReq, SSDataBlock* pDataBlock, const char* stbFullName, static void setCreateTableMsgTableName(SVCreateTbReq* pCreateTableReq, SSDataBlock* pDataBlock, const char* stbFullName,
int64_t gid); int64_t gid);
int32_t tqBuildDeleteReq(const char* stbFullName, const SSDataBlock* pDataBlock, SBatchDeleteReq* deleteReq, int32_t tqBuildDeleteReq(STQ* pTq, const char* stbFullName, const SSDataBlock* pDataBlock, SBatchDeleteReq* deleteReq,
const char* pIdStr) { const char* pIdStr) {
int32_t totalRows = pDataBlock->info.rows; int32_t totalRows = pDataBlock->info.rows;
SColumnInfoData* pStartTsCol = taosArrayGet(pDataBlock->pDataBlock, START_TS_COLUMN_INDEX); SColumnInfoData* pStartTsCol = taosArrayGet(pDataBlock->pDataBlock, START_TS_COLUMN_INDEX);
@ -58,8 +58,9 @@ int32_t tqBuildDeleteReq(const char* stbFullName, const SSDataBlock* pDataBlock,
int64_t ekey = *(int64_t*)colDataGetData(pEndTsCol, row); int64_t ekey = *(int64_t*)colDataGetData(pEndTsCol, row);
int64_t groupId = *(int64_t*)colDataGetData(pGidCol, row); int64_t groupId = *(int64_t*)colDataGetData(pGidCol, row);
char* name; char* name = NULL;
void* varTbName = NULL; char* originName = NULL;
void* varTbName = NULL;
if (!colDataIsNull(pTbNameCol, totalRows, row, NULL)) { if (!colDataIsNull(pTbNameCol, totalRows, row, NULL)) {
varTbName = colDataGetVarData(pTbNameCol, row); varTbName = colDataGetVarData(pTbNameCol, row);
} }
@ -67,18 +68,29 @@ int32_t tqBuildDeleteReq(const char* stbFullName, const SSDataBlock* pDataBlock,
if (varTbName != NULL && varTbName != (void*)-1) { if (varTbName != NULL && varTbName != (void*)-1) {
name = taosMemoryCalloc(1, TSDB_TABLE_NAME_LEN); name = taosMemoryCalloc(1, TSDB_TABLE_NAME_LEN);
memcpy(name, varDataVal(varTbName), varDataLen(varTbName)); memcpy(name, varDataVal(varTbName), varDataLen(varTbName));
} else { } else if (stbFullName) {
name = buildCtbNameByGroupId(stbFullName, groupId); name = buildCtbNameByGroupId(stbFullName, groupId);
} else {
originName = taosMemoryCalloc(1, TSDB_TABLE_NAME_LEN + VARSTR_HEADER_SIZE);
if (metaGetTableNameByUid(pTq->pVnode, groupId, originName) == 0) {
name = varDataVal(originName);
}
} }
tqDebug("s-task:%s build delete msg groupId:%" PRId64 ", name:%s, skey:%" PRId64 " ekey:%" PRId64, if (!name || *name == '\0') {
pIdStr, groupId, name, skey, ekey); tqWarn("s-task:%s failed to build delete msg groupId:%" PRId64 ", skey:%" PRId64 " ekey:%" PRId64
" since invalid tbname:%s",
pIdStr, groupId, skey, ekey, name ? name : "NULL");
} else {
tqDebug("s-task:%s build delete msg groupId:%" PRId64 ", name:%s, skey:%" PRId64 " ekey:%" PRId64, pIdStr,
groupId, name, skey, ekey);
SSingleDeleteReq req = { .startTs = skey, .endTs = ekey}; SSingleDeleteReq req = {.startTs = skey, .endTs = ekey};
strncpy(req.tbname, name, TSDB_TABLE_NAME_LEN - 1); strncpy(req.tbname, name, TSDB_TABLE_NAME_LEN - 1);
taosMemoryFree(name); taosArrayPush(deleteReq->deleteReqs, &req);
}
taosArrayPush(deleteReq->deleteReqs, &req); if (originName) name = originName;
taosMemoryFreeClear(name);
} }
return 0; return 0;
@ -345,7 +357,7 @@ int32_t doBuildAndSendDeleteMsg(SVnode* pVnode, char* stbFullName, SSDataBlock*
int64_t suid) { int64_t suid) {
SBatchDeleteReq deleteReq = {.suid = suid, .deleteReqs = taosArrayInit(0, sizeof(SSingleDeleteReq))}; SBatchDeleteReq deleteReq = {.suid = suid, .deleteReqs = taosArrayInit(0, sizeof(SSingleDeleteReq))};
int32_t code = tqBuildDeleteReq(stbFullName, pDataBlock, &deleteReq, pTask->id.idStr); int32_t code = tqBuildDeleteReq(pVnode->pTq, stbFullName, pDataBlock, &deleteReq, pTask->id.idStr);
if (code != TSDB_CODE_SUCCESS) { if (code != TSDB_CODE_SUCCESS) {
return code; return code;
} }

View File

@ -299,7 +299,7 @@ int32_t tqResetStreamTaskStatus(STQ* pTq) {
int32_t vgId = TD_VID(pTq->pVnode); int32_t vgId = TD_VID(pTq->pVnode);
int32_t numOfTasks = taosArrayGetSize(pMeta->pTaskList); int32_t numOfTasks = taosArrayGetSize(pMeta->pTaskList);
tqDebug("vgId:%d start all %d stream task(s)", vgId, numOfTasks); tqDebug("vgId:%d reset all %d stream task(s) status to be uninit", vgId, numOfTasks);
if (numOfTasks == 0) { if (numOfTasks == 0) {
return TSDB_CODE_SUCCESS; return TSDB_CODE_SUCCESS;
} }
@ -448,7 +448,7 @@ bool doPutDataIntoInputQFromWal(SStreamTask* pTask, int64_t maxVer, int32_t* num
numOfNewItems += 1; numOfNewItems += 1;
int64_t ver = walReaderGetCurrentVer(pTask->exec.pWalReader); int64_t ver = walReaderGetCurrentVer(pTask->exec.pWalReader);
pTask->chkInfo.nextProcessVer = ver; pTask->chkInfo.nextProcessVer = ver;
tqDebug("s-task:%s set the ver:%" PRId64 " from WALReader after extract block from WAL", id, ver); tqDebug("s-task:%s set ver:%" PRId64 " for reader after extract data from WAL", id, ver);
bool itemInFillhistory = handleFillhistoryScanComplete(pTask, ver); bool itemInFillhistory = handleFillhistoryScanComplete(pTask, ver);
if (itemInFillhistory) { if (itemInFillhistory) {

View File

@ -399,7 +399,7 @@ int32_t tqDoSendDataRsp(const SRpcHandleInfo* pRpcHandleInfo, const SMqDataRsp*
return 0; return 0;
} }
int32_t extractDelDataBlock(const void* pData, int32_t len, int64_t ver, SStreamRefDataBlock** pRefBlock) { int32_t extractDelDataBlock(const void* pData, int32_t len, int64_t ver, void** pRefBlock, int32_t type) {
SDecoder* pCoder = &(SDecoder){0}; SDecoder* pCoder = &(SDecoder){0};
SDeleteRes* pRes = &(SDeleteRes){0}; SDeleteRes* pRes = &(SDeleteRes){0};
@ -442,14 +442,21 @@ int32_t extractDelDataBlock(const void* pData, int32_t len, int64_t ver, SStream
} }
taosArrayDestroy(pRes->uidList); taosArrayDestroy(pRes->uidList);
*pRefBlock = taosAllocateQitem(sizeof(SStreamRefDataBlock), DEF_QITEM, 0); if (type == 0) {
if (*pRefBlock == NULL) { *pRefBlock = taosAllocateQitem(sizeof(SStreamRefDataBlock), DEF_QITEM, 0);
blockDataCleanup(pDelBlock); if (*pRefBlock == NULL) {
taosMemoryFree(pDelBlock); blockDataCleanup(pDelBlock);
return TSDB_CODE_OUT_OF_MEMORY; taosMemoryFree(pDelBlock);
return TSDB_CODE_OUT_OF_MEMORY;
}
((SStreamRefDataBlock*)(*pRefBlock))->type = STREAM_INPUT__REF_DATA_BLOCK;
((SStreamRefDataBlock*)(*pRefBlock))->pBlock = pDelBlock;
} else if (type == 1) {
*pRefBlock = pDelBlock;
} else {
ASSERTS(0, "unknown type:%d", type);
} }
(*pRefBlock)->type = STREAM_INPUT__REF_DATA_BLOCK;
(*pRefBlock)->pBlock = pDelBlock;
return TSDB_CODE_SUCCESS; return TSDB_CODE_SUCCESS;
} }

View File

@ -3099,7 +3099,7 @@ static int32_t tsdbCacheLoadBlockS3(STsdbFD *pFD, uint8_t **ppBlock) {
} }
*/ */
int64_t block_offset = (pFD->blkno - 1) * tsS3BlockSize * pFD->szPage; int64_t block_offset = (pFD->blkno - 1) * tsS3BlockSize * pFD->szPage;
code = s3GetObjectBlock(pFD->objName, block_offset, tsS3BlockSize * pFD->szPage, ppBlock); code = s3GetObjectBlock(pFD->objName, block_offset, tsS3BlockSize * pFD->szPage, 0, ppBlock);
if (code != TSDB_CODE_SUCCESS) { if (code != TSDB_CODE_SUCCESS) {
// taosMemoryFree(pBlock); // taosMemoryFree(pBlock);
// code = TSDB_CODE_OUT_OF_MEMORY; // code = TSDB_CODE_OUT_OF_MEMORY;

View File

@ -28,13 +28,18 @@ static int32_t saveOneRow(SArray* pRow, SSDataBlock* pBlock, SCacheRowsReader* p
// bool allNullRow = true; // bool allNullRow = true;
if (HASTYPE(pReader->type, CACHESCAN_RETRIEVE_LAST)) { if (HASTYPE(pReader->type, CACHESCAN_RETRIEVE_LAST)) {
uint64_t ts = 0;
SFirstLastRes* p;
col_id_t colId;
for (int32_t i = 0; i < pReader->numOfCols; ++i) { for (int32_t i = 0; i < pReader->numOfCols; ++i) {
SColumnInfoData* pColInfoData = taosArrayGet(pBlock->pDataBlock, dstSlotIds[i]); SColumnInfoData* pColInfoData = taosArrayGet(pBlock->pDataBlock, dstSlotIds[i]);
SFirstLastRes* p = (SFirstLastRes*)varDataVal(pRes[i]);
int32_t slotId = slotIds[i]; int32_t slotId = slotIds[i];
SLastCol* pColVal = (SLastCol*)taosArrayGet(pRow, i); SLastCol* pColVal = (SLastCol*)taosArrayGet(pRow, i);
colId = pColVal->colVal.cid;
p = (SFirstLastRes*)varDataVal(pRes[i]);
p->ts = pColVal->ts; p->ts = pColVal->ts;
ts = p->ts;
p->isNull = !COL_VAL_IS_VALUE(&pColVal->colVal); p->isNull = !COL_VAL_IS_VALUE(&pColVal->colVal);
// allNullRow = p->isNull & allNullRow; // allNullRow = p->isNull & allNullRow;
@ -55,6 +60,19 @@ static int32_t saveOneRow(SArray* pRow, SSDataBlock* pBlock, SCacheRowsReader* p
varDataSetLen(pRes[i], pColInfoData->info.bytes - VARSTR_HEADER_SIZE); varDataSetLen(pRes[i], pColInfoData->info.bytes - VARSTR_HEADER_SIZE);
colDataSetVal(pColInfoData, numOfRows, (const char*)pRes[i], false); colDataSetVal(pColInfoData, numOfRows, (const char*)pRes[i], false);
} }
for (int32_t idx = 0; idx < taosArrayGetSize(pBlock->pDataBlock); ++idx) {
SColumnInfoData* pCol = taosArrayGet(pBlock->pDataBlock, idx);
if (pCol->info.colId == PRIMARYKEY_TIMESTAMP_COL_ID && pCol->info.type == TSDB_DATA_TYPE_TIMESTAMP) {
colDataSetVal(pCol, numOfRows, (const char*)&ts, false);
continue;
} else if (pReader->numOfCols == 1 && idx != dstSlotIds[0] && pCol->info.colId == colId) {
if (!p->isNull) {
colDataSetVal(pCol, numOfRows, p->buf, false);
} else {
colDataSetNULL(pCol, numOfRows);
}
}
}
// pBlock->info.rows += allNullRow ? 0 : 1; // pBlock->info.rows += allNullRow ? 0 : 1;
++pBlock->info.rows; ++pBlock->info.rows;

View File

@ -192,7 +192,7 @@ static int32_t tsdbCommitTombData(SCommitter2 *committer) {
committer->ctx->tbid->uid = record->uid; committer->ctx->tbid->uid = record->uid;
if (metaGetInfo(committer->tsdb->pVnode->pMeta, record->uid, &info, NULL) != 0) { if (metaGetInfo(committer->tsdb->pVnode->pMeta, record->uid, &info, NULL) != 0) {
code = tsdbIterMergerSkipTableData(committer->dataIterMerger, committer->ctx->tbid); code = tsdbIterMergerSkipTableData(committer->tombIterMerger, committer->ctx->tbid);
TSDB_CHECK_CODE(code, lino, _exit); TSDB_CHECK_CODE(code, lino, _exit);
continue; continue;
} }

View File

@ -22,7 +22,7 @@
extern int vnodeScheduleTask(int (*execute)(void *), void *arg); extern int vnodeScheduleTask(int (*execute)(void *), void *arg);
extern int vnodeScheduleTaskEx(int tpid, int (*execute)(void *), void *arg); extern int vnodeScheduleTaskEx(int tpid, int (*execute)(void *), void *arg);
extern void remove_file(const char *fname); extern void remove_file(const char *fname, bool last_level);
#define TSDB_FS_EDIT_MIN TSDB_FEDIT_COMMIT #define TSDB_FS_EDIT_MIN TSDB_FEDIT_COMMIT
#define TSDB_FS_EDIT_MAX (TSDB_FEDIT_MERGE + 1) #define TSDB_FS_EDIT_MAX (TSDB_FEDIT_MERGE + 1)
@ -532,7 +532,8 @@ static int32_t tsdbFSDoSanAndFix(STFileSystem *fs) {
if (taosIsDir(file->aname)) continue; if (taosIsDir(file->aname)) continue;
if (tsdbFSGetFileObjHashEntry(&fobjHash, file->aname) == NULL) { if (tsdbFSGetFileObjHashEntry(&fobjHash, file->aname) == NULL) {
remove_file(file->aname); int32_t nlevel = tfsGetLevel(fs->tsdb->pVnode->pTfs);
remove_file(file->aname, nlevel > 1 && file->did.level == nlevel - 1);
} }
} }

View File

@ -14,6 +14,7 @@
*/ */
#include "tsdbFile2.h" #include "tsdbFile2.h"
#include "cos.h"
// to_json // to_json
static int32_t head_to_json(const STFile *file, cJSON *json); static int32_t head_to_json(const STFile *file, cJSON *json);
@ -41,10 +42,20 @@ static const struct {
[TSDB_FTYPE_STT] = {"stt", stt_to_json, stt_from_json}, [TSDB_FTYPE_STT] = {"stt", stt_to_json, stt_from_json},
}; };
void remove_file(const char *fname) { void remove_file(const char *fname, bool last_level) {
int32_t code = taosRemoveFile(fname); int32_t code = taosRemoveFile(fname);
if (code) { if (code) {
tsdbError("file:%s remove failed", fname); if (tsS3Enabled && last_level) {
const char *object_name = taosDirEntryBaseName((char *)fname);
long s3_size = tsS3Enabled ? s3Size(object_name) : 0;
if (!strncmp(fname + strlen(fname) - 5, ".data", 5) && s3_size > 0) {
s3DeleteObjects(&object_name, 1);
} else {
tsdbError("file:%s remove failed", fname);
}
} else {
tsdbError("file:%s remove failed", fname);
}
} else { } else {
tsdbInfo("file:%s is removed", fname); tsdbInfo("file:%s is removed", fname);
} }
@ -224,6 +235,7 @@ int32_t tsdbTFileObjInit(STsdb *pTsdb, const STFile *f, STFileObj **fobj) {
fobj[0]->state = TSDB_FSTATE_LIVE; fobj[0]->state = TSDB_FSTATE_LIVE;
fobj[0]->ref = 1; fobj[0]->ref = 1;
tsdbTFileName(pTsdb, f, fobj[0]->fname); tsdbTFileName(pTsdb, f, fobj[0]->fname);
fobj[0]->nlevel = tfsGetLevel(pTsdb->pVnode->pTfs);
return 0; return 0;
} }
@ -245,7 +257,7 @@ int32_t tsdbTFileObjUnref(STFileObj *fobj) {
tsdbTrace("unref file %s, fobj:%p ref %d", fobj->fname, fobj, nRef); tsdbTrace("unref file %s, fobj:%p ref %d", fobj->fname, fobj, nRef);
if (nRef == 0) { if (nRef == 0) {
if (fobj->state == TSDB_FSTATE_DEAD) { if (fobj->state == TSDB_FSTATE_DEAD) {
remove_file(fobj->fname); remove_file(fobj->fname, fobj->nlevel > 1 && fobj->f->did.level == fobj->nlevel - 1);
} }
taosMemoryFree(fobj); taosMemoryFree(fobj);
} }
@ -261,7 +273,7 @@ int32_t tsdbTFileObjRemove(STFileObj *fobj) {
taosThreadMutexUnlock(&fobj->mutex); taosThreadMutexUnlock(&fobj->mutex);
tsdbTrace("remove unref file %s, fobj:%p ref %d", fobj->fname, fobj, nRef); tsdbTrace("remove unref file %s, fobj:%p ref %d", fobj->fname, fobj, nRef);
if (nRef == 0) { if (nRef == 0) {
remove_file(fobj->fname); remove_file(fobj->fname, fobj->nlevel > 1 && fobj->f->did.level == fobj->nlevel - 1);
taosMemoryFree(fobj); taosMemoryFree(fobj);
} }
return 0; return 0;

View File

@ -76,6 +76,7 @@ struct STFileObj {
STFile f[1]; STFile f[1];
int32_t state; int32_t state;
int32_t ref; int32_t ref;
int32_t nlevel;
char fname[TSDB_FILENAME_LEN]; char fname[TSDB_FILENAME_LEN];
}; };

View File

@ -89,6 +89,8 @@ int tsdbOpen(SVnode *pVnode, STsdb **ppTsdb, const char *dir, STsdbKeepCfg *pKee
return 0; return 0;
_err: _err:
tsdbCloseFS(&pTsdb->pFS);
taosThreadMutexDestroy(&pTsdb->mutex);
taosMemoryFree(pTsdb); taosMemoryFree(pTsdb);
return -1; return -1;
} }

View File

@ -48,9 +48,9 @@ static int32_t doMergeMemIMemRows(TSDBROW* pRow, TSDBROW* piRow, STableBlockScan
static int32_t mergeRowsInFileBlocks(SBlockData* pBlockData, STableBlockScanInfo* pBlockScanInfo, int64_t key, static int32_t mergeRowsInFileBlocks(SBlockData* pBlockData, STableBlockScanInfo* pBlockScanInfo, int64_t key,
STsdbReader* pReader); STsdbReader* pReader);
static int32_t initDelSkylineIterator(STableBlockScanInfo* pBlockScanInfo, int32_t order, SCostSummary* pCost); static int32_t initDelSkylineIterator(STableBlockScanInfo* pBlockScanInfo, int32_t order, SCostSummary* pCost);
static STsdb* getTsdbByRetentions(SVnode* pVnode, TSKEY winSKey, SRetention* retentions, const char* idstr, static STsdb* getTsdbByRetentions(SVnode* pVnode, SQueryTableDataCond* pCond, SRetention* retentions, const char* idstr,
int8_t* pLevel); int8_t* pLevel);
static SVersionRange getQueryVerRange(SVnode* pVnode, SQueryTableDataCond* pCond, int8_t level); static SVersionRange getQueryVerRange(SVnode* pVnode, SQueryTableDataCond* pCond, int8_t level);
static bool hasDataInLastBlock(SLastBlockReader* pLastBlockReader); static bool hasDataInLastBlock(SLastBlockReader* pLastBlockReader);
static int32_t doBuildDataBlock(STsdbReader* pReader); static int32_t doBuildDataBlock(STsdbReader* pReader);
@ -384,7 +384,7 @@ static int32_t tsdbReaderCreate(SVnode* pVnode, SQueryTableDataCond* pCond, void
initReaderStatus(&pReader->status); initReaderStatus(&pReader->status);
pReader->pTsdb = getTsdbByRetentions(pVnode, pCond->twindows.skey, pVnode->config.tsdbCfg.retentions, idstr, &level); pReader->pTsdb = getTsdbByRetentions(pVnode, pCond, pVnode->config.tsdbCfg.retentions, idstr, &level);
pReader->info.suid = pCond->suid; pReader->info.suid = pCond->suid;
pReader->info.order = pCond->order; pReader->info.order = pCond->order;
@ -3152,9 +3152,9 @@ static int32_t buildBlockFromFiles(STsdbReader* pReader) {
} }
} }
static STsdb* getTsdbByRetentions(SVnode* pVnode, TSKEY winSKey, SRetention* retentions, const char* idStr, static STsdb* getTsdbByRetentions(SVnode* pVnode, SQueryTableDataCond* pCond, SRetention* retentions, const char* idStr,
int8_t* pLevel) { int8_t* pLevel) {
if (VND_IS_RSMA(pVnode)) { if (VND_IS_RSMA(pVnode) && !pCond->skipRollup) {
int8_t level = 0; int8_t level = 0;
int8_t precision = pVnode->config.tsdbCfg.precision; int8_t precision = pVnode->config.tsdbCfg.precision;
int64_t now = taosGetTimestamp(precision); int64_t now = taosGetTimestamp(precision);
@ -3170,7 +3170,7 @@ static STsdb* getTsdbByRetentions(SVnode* pVnode, TSKEY winSKey, SRetention* ret
} }
break; break;
} }
if ((now - pRetention->keep) <= (winSKey + offset)) { if ((now - pRetention->keep) <= (pCond->twindows.skey + offset)) {
break; break;
} }
++level; ++level;

View File

@ -338,9 +338,9 @@ static int32_t tsdbReadFileS3(STsdbFD *pFD, int64_t offset, uint8_t *pBuf, int64
// 2, retrieve pgs from s3 // 2, retrieve pgs from s3
uint8_t *pBlock = NULL; uint8_t *pBlock = NULL;
int64_t retrieve_offset = PAGE_OFFSET(pgno, pFD->szPage); int64_t retrieve_offset = PAGE_OFFSET(pgno, pFD->szPage);
int64_t pgnoEnd = pgno - 1 + (size - n + szPgCont - 1) / szPgCont; int64_t pgnoEnd = pgno - 1 + (bOffset + size - n + szPgCont - 1) / szPgCont;
int64_t retrieve_size = (pgnoEnd - pgno + 1) * pFD->szPage; int64_t retrieve_size = (pgnoEnd - pgno + 1) * pFD->szPage;
code = s3GetObjectBlock(pFD->objName, retrieve_offset, retrieve_size, &pBlock); code = s3GetObjectBlock(pFD->objName, retrieve_offset, retrieve_size, 1, &pBlock);
if (code != TSDB_CODE_SUCCESS) { if (code != TSDB_CODE_SUCCESS) {
goto _exit; goto _exit;
} }

View File

@ -13,9 +13,9 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>. * along with this program. If not, see <http://www.gnu.org/licenses/>.
*/ */
#include "cos.h"
#include "tsdb.h" #include "tsdb.h"
#include "tsdbFS2.h" #include "tsdbFS2.h"
#include "cos.h"
#include "vnd.h" #include "vnd.h"
typedef struct { typedef struct {
@ -292,15 +292,15 @@ static int32_t tsdbDoRetentionOnFileSet(SRTNer *rtner, STFileSet *fset) {
if (expLevel < 0) { // remove the fileset if (expLevel < 0) { // remove the fileset
for (int32_t ftype = 0; (ftype < TSDB_FTYPE_MAX) && (fobj = fset->farr[ftype], 1); ++ftype) { for (int32_t ftype = 0; (ftype < TSDB_FTYPE_MAX) && (fobj = fset->farr[ftype], 1); ++ftype) {
if (fobj == NULL) continue; if (fobj == NULL) continue;
/*
int32_t nlevel = tfsGetLevel(rtner->tsdb->pVnode->pTfs); int32_t nlevel = tfsGetLevel(rtner->tsdb->pVnode->pTfs);
if (tsS3Enabled && nlevel > 1 && TSDB_FTYPE_DATA == ftype && fobj->f->did.level == nlevel - 1) { if (tsS3Enabled && nlevel > 1 && TSDB_FTYPE_DATA == ftype && fobj->f->did.level == nlevel - 1) {
code = tsdbRemoveFileObjectS3(rtner, fobj); code = tsdbRemoveFileObjectS3(rtner, fobj);
TSDB_CHECK_CODE(code, lino, _exit); TSDB_CHECK_CODE(code, lino, _exit);
} else { } else {*/
code = tsdbDoRemoveFileObject(rtner, fobj); code = tsdbDoRemoveFileObject(rtner, fobj);
TSDB_CHECK_CODE(code, lino, _exit); TSDB_CHECK_CODE(code, lino, _exit);
} //}
} }
SSttLvl *lvl; SSttLvl *lvl;
@ -388,6 +388,8 @@ _exit:
return code; return code;
} }
static void tsdbFreeRtnArg(void *arg) { taosMemoryFree(arg); }
static int32_t tsdbDoRetentionSync(void *arg) { static int32_t tsdbDoRetentionSync(void *arg) {
int32_t code = 0; int32_t code = 0;
int32_t lino = 0; int32_t lino = 0;
@ -410,6 +412,7 @@ _exit:
TSDB_ERROR_LOG(TD_VID(rtner->tsdb->pVnode), lino, code); TSDB_ERROR_LOG(TD_VID(rtner->tsdb->pVnode), lino, code);
} }
tsem_post(&((SRtnArg *)arg)->tsdb->pVnode->canCommit); tsem_post(&((SRtnArg *)arg)->tsdb->pVnode->canCommit);
tsdbFreeRtnArg(arg);
return code; return code;
} }
@ -439,7 +442,7 @@ _exit:
return code; return code;
} }
static void tsdbFreeRtnArg(void *arg) { taosMemoryFree(arg); }
int32_t tsdbRetention(STsdb *tsdb, int64_t now, int32_t sync) { int32_t tsdbRetention(STsdb *tsdb, int64_t now, int32_t sync) {
int32_t code = 0; int32_t code = 0;

View File

@ -39,7 +39,7 @@ int tsdbInsertData(STsdb *pTsdb, int64_t version, SSubmitReq2 *pMsg, SSubmitRsp2
arrSize = taosArrayGetSize(pMsg->aSubmitTbData); arrSize = taosArrayGetSize(pMsg->aSubmitTbData);
// scan and convert // scan and convert
if (tsdbScanAndConvertSubmitMsg(pTsdb, pMsg) < 0) { if ((terrno = tsdbScanAndConvertSubmitMsg(pTsdb, pMsg)) < 0) {
if (terrno != TSDB_CODE_TDB_TABLE_RECONFIGURE) { if (terrno != TSDB_CODE_TDB_TABLE_RECONFIGURE) {
tsdbError("vgId:%d, failed to insert data since %s", TD_VID(pTsdb->pVnode), tstrerror(terrno)); tsdbError("vgId:%d, failed to insert data since %s", TD_VID(pTsdb->pVnode), tstrerror(terrno));
} }

View File

@ -15,8 +15,12 @@
#include "vnd.h" #include "vnd.h"
extern int32_t tsdbRetention(STsdb *tsdb, int64_t now, int32_t sync);
int32_t vnodeDoRetention(SVnode *pVnode, int64_t now) { int32_t vnodeDoRetention(SVnode *pVnode, int64_t now) {
return tsdbRetention(pVnode->pTsdb, now, pVnode->config.sttTrigger == 1); int32_t code = TSDB_CODE_SUCCESS;
code = tsdbRetention(pVnode->pTsdb, now, pVnode->config.sttTrigger == 1);
if (TSDB_CODE_SUCCESS == code) code = smaRetention(pVnode->pSma, now);
return code;
} }

View File

@ -25,9 +25,11 @@
static int32_t vnodeProcessCreateStbReq(SVnode *pVnode, int64_t ver, void *pReq, int32_t len, SRpcMsg *pRsp); static int32_t vnodeProcessCreateStbReq(SVnode *pVnode, int64_t ver, void *pReq, int32_t len, SRpcMsg *pRsp);
static int32_t vnodeProcessAlterStbReq(SVnode *pVnode, int64_t ver, void *pReq, int32_t len, SRpcMsg *pRsp); static int32_t vnodeProcessAlterStbReq(SVnode *pVnode, int64_t ver, void *pReq, int32_t len, SRpcMsg *pRsp);
static int32_t vnodeProcessDropStbReq(SVnode *pVnode, int64_t ver, void *pReq, int32_t len, SRpcMsg *pRsp); static int32_t vnodeProcessDropStbReq(SVnode *pVnode, int64_t ver, void *pReq, int32_t len, SRpcMsg *pRsp);
static int32_t vnodeProcessCreateTbReq(SVnode *pVnode, int64_t ver, void *pReq, int32_t len, SRpcMsg *pRsp); static int32_t vnodeProcessCreateTbReq(SVnode *pVnode, int64_t ver, void *pReq, int32_t len, SRpcMsg *pRsp,
SRpcMsg *pOriginRpc);
static int32_t vnodeProcessAlterTbReq(SVnode *pVnode, int64_t ver, void *pReq, int32_t len, SRpcMsg *pRsp); static int32_t vnodeProcessAlterTbReq(SVnode *pVnode, int64_t ver, void *pReq, int32_t len, SRpcMsg *pRsp);
static int32_t vnodeProcessDropTbReq(SVnode *pVnode, int64_t ver, void *pReq, int32_t len, SRpcMsg *pRsp); static int32_t vnodeProcessDropTbReq(SVnode *pVnode, int64_t ver, void *pReq, int32_t len, SRpcMsg *pRsp,
SRpcMsg *pOriginRpc);
static int32_t vnodeProcessSubmitReq(SVnode *pVnode, int64_t ver, void *pReq, int32_t len, SRpcMsg *pRsp); static int32_t vnodeProcessSubmitReq(SVnode *pVnode, int64_t ver, void *pReq, int32_t len, SRpcMsg *pRsp);
static int32_t vnodeProcessCreateTSmaReq(SVnode *pVnode, int64_t ver, void *pReq, int32_t len, SRpcMsg *pRsp); static int32_t vnodeProcessCreateTSmaReq(SVnode *pVnode, int64_t ver, void *pReq, int32_t len, SRpcMsg *pRsp);
static int32_t vnodeProcessAlterConfirmReq(SVnode *pVnode, int64_t ver, void *pReq, int32_t len, SRpcMsg *pRsp); static int32_t vnodeProcessAlterConfirmReq(SVnode *pVnode, int64_t ver, void *pReq, int32_t len, SRpcMsg *pRsp);
@ -380,7 +382,7 @@ static int32_t vnodePreProcessDeleteMsg(SVnode *pVnode, SRpcMsg *pMsg) {
SEncoder *pCoder = &(SEncoder){0}; SEncoder *pCoder = &(SEncoder){0};
SDeleteRes res = {0}; SDeleteRes res = {0};
SReadHandle handle = {.vnode = pVnode, .pMsgCb = &pVnode->msgCb}; SReadHandle handle = {.vnode = pVnode, .pMsgCb = &pVnode->msgCb, .skipRollup = 1};
initStorageAPI(&handle.api); initStorageAPI(&handle.api);
code = qWorkerProcessDeleteMsg(&handle, pVnode->pQuery, pMsg, &res); code = qWorkerProcessDeleteMsg(&handle, pVnode->pQuery, pMsg, &res);
@ -509,13 +511,13 @@ int32_t vnodeProcessWriteMsg(SVnode *pVnode, SRpcMsg *pMsg, int64_t ver, SRpcMsg
if (vnodeProcessDropStbReq(pVnode, ver, pReq, len, pRsp) < 0) goto _err; if (vnodeProcessDropStbReq(pVnode, ver, pReq, len, pRsp) < 0) goto _err;
break; break;
case TDMT_VND_CREATE_TABLE: case TDMT_VND_CREATE_TABLE:
if (vnodeProcessCreateTbReq(pVnode, ver, pReq, len, pRsp) < 0) goto _err; if (vnodeProcessCreateTbReq(pVnode, ver, pReq, len, pRsp, pMsg) < 0) goto _err;
break; break;
case TDMT_VND_ALTER_TABLE: case TDMT_VND_ALTER_TABLE:
if (vnodeProcessAlterTbReq(pVnode, ver, pReq, len, pRsp) < 0) goto _err; if (vnodeProcessAlterTbReq(pVnode, ver, pReq, len, pRsp) < 0) goto _err;
break; break;
case TDMT_VND_DROP_TABLE: case TDMT_VND_DROP_TABLE:
if (vnodeProcessDropTbReq(pVnode, ver, pReq, len, pRsp) < 0) goto _err; if (vnodeProcessDropTbReq(pVnode, ver, pReq, len, pRsp, pMsg) < 0) goto _err;
break; break;
case TDMT_VND_DROP_TTL_TABLE: case TDMT_VND_DROP_TTL_TABLE:
if (vnodeProcessDropTtlTbReq(pVnode, ver, pReq, len, pRsp) < 0) goto _err; if (vnodeProcessDropTtlTbReq(pVnode, ver, pReq, len, pRsp) < 0) goto _err;
@ -878,7 +880,8 @@ _err:
return -1; return -1;
} }
static int32_t vnodeProcessCreateTbReq(SVnode *pVnode, int64_t ver, void *pReq, int32_t len, SRpcMsg *pRsp) { static int32_t vnodeProcessCreateTbReq(SVnode *pVnode, int64_t ver, void *pReq, int32_t len, SRpcMsg *pRsp,
SRpcMsg *pOriginRpc) {
SDecoder decoder = {0}; SDecoder decoder = {0};
SEncoder encoder = {0}; SEncoder encoder = {0};
int32_t rcode = 0; int32_t rcode = 0;
@ -928,6 +931,17 @@ static int32_t vnodeProcessCreateTbReq(SVnode *pVnode, int64_t ver, void *pReq,
goto _exit; goto _exit;
} }
if(tsEnableAudit && tsEnableAuditCreateTable){
char* str = taosMemoryCalloc(1, TSDB_TABLE_FNAME_LEN);
if (str == NULL) {
terrno = TSDB_CODE_OUT_OF_MEMORY;
rcode = -1;
goto _exit;
}
strcpy(str, pCreateReq->name);
taosArrayPush(tbNames, &str);
}
// validate hash // validate hash
sprintf(tbName, "%s.%s", pVnode->config.dbname, pCreateReq->name); sprintf(tbName, "%s.%s", pVnode->config.dbname, pCreateReq->name);
if (vnodeValidateTableHash(pVnode, tbName) < 0) { if (vnodeValidateTableHash(pVnode, tbName) < 0) {
@ -951,12 +965,6 @@ static int32_t vnodeProcessCreateTbReq(SVnode *pVnode, int64_t ver, void *pReq,
} }
taosArrayPush(rsp.pArray, &cRsp); taosArrayPush(rsp.pArray, &cRsp);
if (tsEnableAuditCreateTable) {
char *str = taosMemoryCalloc(1, TSDB_TABLE_FNAME_LEN);
strcpy(str, pCreateReq->name);
taosArrayPush(tbNames, &str);
}
} }
vDebug("vgId:%d, add %d new created tables into query table list", TD_VID(pVnode), (int32_t)taosArrayGetSize(tbUids)); vDebug("vgId:%d, add %d new created tables into query table list", TD_VID(pVnode), (int32_t)taosArrayGetSize(tbUids));
@ -978,17 +986,17 @@ static int32_t vnodeProcessCreateTbReq(SVnode *pVnode, int64_t ver, void *pReq,
tEncoderInit(&encoder, pRsp->pCont, pRsp->contLen); tEncoderInit(&encoder, pRsp->pCont, pRsp->contLen);
tEncodeSVCreateTbBatchRsp(&encoder, &rsp); tEncodeSVCreateTbBatchRsp(&encoder, &rsp);
if (tsEnableAuditCreateTable) { if(tsEnableAudit && tsEnableAuditCreateTable){
int64_t clusterId = pVnode->config.syncCfg.nodeInfo[0].clusterId; int64_t clusterId = pVnode->config.syncCfg.nodeInfo[0].clusterId;
SName name = {0}; SName name = {0};
tNameFromString(&name, pVnode->config.dbname, T_NAME_ACCT | T_NAME_DB); tNameFromString(&name, pVnode->config.dbname, T_NAME_ACCT | T_NAME_DB);
SStringBuilder sb = {0}; SStringBuilder sb = {0};
for (int32_t iReq = 0; iReq < req.nReqs; iReq++) { for(int32_t i = 0; i < tbNames->size; i++){
char **key = (char **)taosArrayGet(tbNames, iReq); char** key = (char**)taosArrayGet(tbNames, i);
taosStringBuilderAppendStringLen(&sb, *key, strlen(*key)); taosStringBuilderAppendStringLen(&sb, *key, strlen(*key));
if (iReq < req.nReqs - 1) { if(i < tbNames->size - 1){
taosStringBuilderAppendChar(&sb, ','); taosStringBuilderAppendChar(&sb, ',');
} }
taosMemoryFreeClear(*key); taosMemoryFreeClear(*key);
@ -997,7 +1005,7 @@ static int32_t vnodeProcessCreateTbReq(SVnode *pVnode, int64_t ver, void *pReq,
size_t len = 0; size_t len = 0;
char *keyJoined = taosStringBuilderGetResult(&sb, &len); char *keyJoined = taosStringBuilderGetResult(&sb, &len);
auditRecord(NULL, clusterId, "createTable", name.dbname, "", keyJoined, len); auditRecord(pOriginRpc, clusterId, "createTable", name.dbname, "", keyJoined, len);
taosStringBuilderDestroy(&sb); taosStringBuilderDestroy(&sb);
} }
@ -1139,7 +1147,8 @@ _exit:
return 0; return 0;
} }
static int32_t vnodeProcessDropTbReq(SVnode *pVnode, int64_t ver, void *pReq, int32_t len, SRpcMsg *pRsp) { static int32_t vnodeProcessDropTbReq(SVnode *pVnode, int64_t ver, void *pReq, int32_t len, SRpcMsg *pRsp,
SRpcMsg *pOriginRpc) {
SVDropTbBatchReq req = {0}; SVDropTbBatchReq req = {0};
SVDropTbBatchRsp rsp = {0}; SVDropTbBatchRsp rsp = {0};
SDecoder decoder = {0}; SDecoder decoder = {0};
@ -1218,7 +1227,7 @@ static int32_t vnodeProcessDropTbReq(SVnode *pVnode, int64_t ver, void *pReq, in
size_t len = 0; size_t len = 0;
char *keyJoined = taosStringBuilderGetResult(&sb, &len); char *keyJoined = taosStringBuilderGetResult(&sb, &len);
auditRecord(NULL, clusterId, "dropTable", name.dbname, "", keyJoined, len); auditRecord(pOriginRpc, clusterId, "dropTable", name.dbname, "", keyJoined, len);
taosStringBuilderDestroy(&sb); taosStringBuilderDestroy(&sb);
} }
@ -1669,7 +1678,7 @@ _exit:
atomic_add_fetch_64(&pVnode->statis.nBatchInsert, 1); atomic_add_fetch_64(&pVnode->statis.nBatchInsert, 1);
if (code == 0) { if (code == 0) {
atomic_add_fetch_64(&pVnode->statis.nBatchInsertSuccess, 1); atomic_add_fetch_64(&pVnode->statis.nBatchInsertSuccess, 1);
tdProcessRSmaSubmit(pVnode->pSma, ver, pSubmitReq, pReq, len, STREAM_INPUT__DATA_SUBMIT); code = tdProcessRSmaSubmit(pVnode->pSma, ver, pSubmitReq, pReq, len);
} }
// clear // clear
@ -1886,6 +1895,11 @@ static int32_t vnodeProcessBatchDeleteReq(SVnode *pVnode, int64_t ver, void *pRe
SMetaReader mr = {0}; SMetaReader mr = {0};
metaReaderDoInit(&mr, pVnode->pMeta, META_READER_NOLOCK); metaReaderDoInit(&mr, pVnode->pMeta, META_READER_NOLOCK);
STsdb *pTsdb = pVnode->pTsdb;
if (deleteReq.level) {
pTsdb = deleteReq.level == 1 ? VND_RSMA1(pVnode) : VND_RSMA2(pVnode);
}
int32_t sz = taosArrayGetSize(deleteReq.deleteReqs); int32_t sz = taosArrayGetSize(deleteReq.deleteReqs);
for (int32_t i = 0; i < sz; i++) { for (int32_t i = 0; i < sz; i++) {
@ -1898,21 +1912,22 @@ static int32_t vnodeProcessBatchDeleteReq(SVnode *pVnode, int64_t ver, void *pRe
int64_t uid = mr.me.uid; int64_t uid = mr.me.uid;
int32_t code = tsdbDeleteTableData(pVnode->pTsdb, ver, deleteReq.suid, uid, pOneReq->startTs, pOneReq->endTs); int32_t code = tsdbDeleteTableData(pTsdb, ver, deleteReq.suid, uid, pOneReq->startTs, pOneReq->endTs);
if (code < 0) { if (code < 0) {
terrno = code; terrno = code;
vError("vgId:%d, delete error since %s, suid:%" PRId64 ", uid:%" PRId64 ", start ts:%" PRId64 ", end ts:%" PRId64, vError("vgId:%d, delete error since %s, suid:%" PRId64 ", uid:%" PRId64 ", start ts:%" PRId64 ", end ts:%" PRId64,
TD_VID(pVnode), terrstr(), deleteReq.suid, uid, pOneReq->startTs, pOneReq->endTs); TD_VID(pVnode), terrstr(), deleteReq.suid, uid, pOneReq->startTs, pOneReq->endTs);
} }
code = metaUpdateChangeTimeWithLock(pVnode->pMeta, uid, deleteReq.ctimeMs); if (deleteReq.level == 0) {
if (code < 0) { code = metaUpdateChangeTimeWithLock(pVnode->pMeta, uid, deleteReq.ctimeMs);
terrno = code; if (code < 0) {
vError("vgId:%d, update change time error since %s, suid:%" PRId64 ", uid:%" PRId64 ", start ts:%" PRId64 terrno = code;
", end ts:%" PRId64, vError("vgId:%d, update change time error since %s, suid:%" PRId64 ", uid:%" PRId64 ", start ts:%" PRId64
TD_VID(pVnode), terrstr(), deleteReq.suid, uid, pOneReq->startTs, pOneReq->endTs); ", end ts:%" PRId64,
TD_VID(pVnode), terrstr(), deleteReq.suid, uid, pOneReq->startTs, pOneReq->endTs);
}
} }
tDecoderClear(&mr.coder); tDecoderClear(&mr.coder);
} }
metaReaderClear(&mr); metaReaderClear(&mr);
@ -1947,6 +1962,8 @@ static int32_t vnodeProcessDeleteReq(SVnode *pVnode, int64_t ver, void *pReq, in
if (code) goto _err; if (code) goto _err;
} }
code = tdProcessRSmaDelete(pVnode->pSma, ver, pRes, pReq, len);
tDecoderClear(pCoder); tDecoderClear(pCoder);
taosArrayDestroy(pRes->uidList); taosArrayDestroy(pRes->uidList);

View File

@ -37,7 +37,7 @@ extern "C" {
#define EXPLAIN_TABLE_COUNT_SCAN_FORMAT "Table Count Row Scan on %s" #define EXPLAIN_TABLE_COUNT_SCAN_FORMAT "Table Count Row Scan on %s"
#define EXPLAIN_PROJECTION_FORMAT "Projection" #define EXPLAIN_PROJECTION_FORMAT "Projection"
#define EXPLAIN_JOIN_FORMAT "%s" #define EXPLAIN_JOIN_FORMAT "%s"
#define EXPLAIN_AGG_FORMAT "Aggragate" #define EXPLAIN_AGG_FORMAT "%s"
#define EXPLAIN_INDEF_ROWS_FORMAT "Indefinite Rows Function" #define EXPLAIN_INDEF_ROWS_FORMAT "Indefinite Rows Function"
#define EXPLAIN_EXCHANGE_FORMAT "Data Exchange %d:1" #define EXPLAIN_EXCHANGE_FORMAT "Data Exchange %d:1"
#define EXPLAIN_SORT_FORMAT "Sort" #define EXPLAIN_SORT_FORMAT "Sort"
@ -59,7 +59,7 @@ extern "C" {
#define EXPLAIN_TIME_WINDOWS_FORMAT "Time Window: interval=%" PRId64 "%c offset=%" PRId64 "%c sliding=%" PRId64 "%c" #define EXPLAIN_TIME_WINDOWS_FORMAT "Time Window: interval=%" PRId64 "%c offset=%" PRId64 "%c sliding=%" PRId64 "%c"
#define EXPLAIN_WINDOW_FORMAT "Window: gap=%" PRId64 #define EXPLAIN_WINDOW_FORMAT "Window: gap=%" PRId64
#define EXPLAIN_RATIO_TIME_FORMAT "Ratio: %f" #define EXPLAIN_RATIO_TIME_FORMAT "Ratio: %f"
#define EXPLAIN_MERGE_FORMAT "SortMerge" #define EXPLAIN_MERGE_FORMAT "Merge"
#define EXPLAIN_MERGE_KEYS_FORMAT "Merge Key: " #define EXPLAIN_MERGE_KEYS_FORMAT "Merge Key: "
#define EXPLAIN_IGNORE_GROUPID_FORMAT "Ignore Group Id: %s" #define EXPLAIN_IGNORE_GROUPID_FORMAT "Ignore Group Id: %s"
#define EXPLAIN_PARTITION_KETS_FORMAT "Partition Key: " #define EXPLAIN_PARTITION_KETS_FORMAT "Partition Key: "
@ -85,7 +85,9 @@ extern "C" {
#define EXPLAIN_COLUMNS_FORMAT "columns=%d" #define EXPLAIN_COLUMNS_FORMAT "columns=%d"
#define EXPLAIN_PSEUDO_COLUMNS_FORMAT "pseudo_columns=%d" #define EXPLAIN_PSEUDO_COLUMNS_FORMAT "pseudo_columns=%d"
#define EXPLAIN_WIDTH_FORMAT "width=%d" #define EXPLAIN_WIDTH_FORMAT "width=%d"
#define EXPLAIN_TABLE_SCAN_FORMAT "order=[asc|%d desc|%d]" #define EXPLAIN_SCAN_ORDER_FORMAT "order=[asc|%d desc|%d]"
#define EXPLAIN_SCAN_MODE_FORMAT "mode=%s"
#define EXPLAIN_SCAN_DATA_LOAD_FORMAT "data_load=%s"
#define EXPLAIN_GROUPS_FORMAT "groups=%d" #define EXPLAIN_GROUPS_FORMAT "groups=%d"
#define EXPLAIN_WIDTH_FORMAT "width=%d" #define EXPLAIN_WIDTH_FORMAT "width=%d"
#define EXPLAIN_INTERVAL_VALUE_FORMAT "interval=%" PRId64 "%c" #define EXPLAIN_INTERVAL_VALUE_FORMAT "interval=%" PRId64 "%c"
@ -105,6 +107,7 @@ extern "C" {
#define EXPLAIN_UID_SLOT_FORMAT "uid_slot=%d,%d" #define EXPLAIN_UID_SLOT_FORMAT "uid_slot=%d,%d"
#define EXPLAIN_SRC_SCAN_FORMAT "src_scan=%d,%d" #define EXPLAIN_SRC_SCAN_FORMAT "src_scan=%d,%d"
#define EXPLAIN_PLAN_BLOCKING "blocking=%d" #define EXPLAIN_PLAN_BLOCKING "blocking=%d"
#define EXPLAIN_MERGE_MODE_FORMAT "mode=%s"
#define COMMAND_RESET_LOG "resetLog" #define COMMAND_RESET_LOG "resetLog"
#define COMMAND_SCHEDULE_POLICY "schedulePolicy" #define COMMAND_SCHEDULE_POLICY "schedulePolicy"
@ -156,6 +159,7 @@ typedef struct SExplainCtx {
#define EXPLAIN_ORDER_STRING(_order) ((ORDER_ASC == _order) ? "asc" : ORDER_DESC == _order ? "desc" : "unknown") #define EXPLAIN_ORDER_STRING(_order) ((ORDER_ASC == _order) ? "asc" : ORDER_DESC == _order ? "desc" : "unknown")
#define EXPLAIN_JOIN_STRING(_type) ((JOIN_TYPE_INNER == _type) ? "Inner join" : "Join") #define EXPLAIN_JOIN_STRING(_type) ((JOIN_TYPE_INNER == _type) ? "Inner join" : "Join")
#define EXPLAIN_MERGE_MODE_STRING(_mode) ((_mode) == MERGE_TYPE_SORT ? "sort" : ((_mode) == MERGE_TYPE_NON_SORT ? "merge" : "column"))
#define INVERAL_TIME_FROM_PRECISION_TO_UNIT(_t, _u, _p) (((_u) == 'n' || (_u) == 'y') ? (_t) : (convertTimeFromPrecisionToUnit(_t, _p, _u))) #define INVERAL_TIME_FROM_PRECISION_TO_UNIT(_t, _u, _p) (((_u) == 'n' || (_u) == 'y') ? (_t) : (convertTimeFromPrecisionToUnit(_t, _p, _u)))

View File

@ -20,6 +20,7 @@
#include "tcommon.h" #include "tcommon.h"
#include "tdatablock.h" #include "tdatablock.h"
#include "systable.h" #include "systable.h"
#include "functionMgt.h"
int32_t qExplainGenerateResNode(SPhysiNode *pNode, SExplainGroup *group, SExplainResNode **pRes); int32_t qExplainGenerateResNode(SPhysiNode *pNode, SExplainGroup *group, SExplainResNode **pRes);
int32_t qExplainAppendGroupResRows(void *pCtx, int32_t groupId, int32_t level, bool singleChannel); int32_t qExplainAppendGroupResRows(void *pCtx, int32_t groupId, int32_t level, bool singleChannel);
@ -284,10 +285,49 @@ int32_t qExplainResAppendRow(SExplainCtx *ctx, char *tbuf, int32_t len, int32_t
return TSDB_CODE_SUCCESS; return TSDB_CODE_SUCCESS;
} }
static uint8_t getIntervalPrecision(SIntervalPhysiNode *pIntNode) { static uint8_t qExplainGetIntervalPrecision(SIntervalPhysiNode *pIntNode) {
return ((SColumnNode *)pIntNode->window.pTspk)->node.resType.precision; return ((SColumnNode *)pIntNode->window.pTspk)->node.resType.precision;
} }
static char* qExplainGetScanMode(STableScanPhysiNode* pScan) {
bool isGroupByTbname = false;
bool isGroupByTag = false;
bool seq = false;
bool groupOrder = false;
if (pScan->pGroupTags && LIST_LENGTH(pScan->pGroupTags) == 1) {
SNode* p = nodesListGetNode(pScan->pGroupTags, 0);
if (QUERY_NODE_FUNCTION == nodeType(p) && (strcmp(((struct SFunctionNode*)p)->functionName, "tbname") == 0)) {
isGroupByTbname = true;
}
}
isGroupByTag = (NULL != pScan->pGroupTags) && !isGroupByTbname;
if ((((!isGroupByTag) || isGroupByTbname) && pScan->groupSort) || (isGroupByTag && (pScan->groupSort || pScan->scan.groupOrderScan))) {
return "seq_grp_order";
}
if ((isGroupByTbname && (pScan->groupSort || pScan->scan.groupOrderScan)) || (isGroupByTag && (pScan->groupSort || pScan->scan.groupOrderScan))) {
return "grp_order";
}
return "ts_order";
}
static char* qExplainGetScanDataLoad(STableScanPhysiNode* pScan) {
switch (pScan->dataRequired) {
case FUNC_DATA_REQUIRED_DATA_LOAD:
return "data";
case FUNC_DATA_REQUIRED_SMA_LOAD:
return "sma";
case FUNC_DATA_REQUIRED_NOT_LOAD:
return "no";
default:
break;
}
return "unknown";
}
int32_t qExplainResNodeToRowsImpl(SExplainResNode *pResNode, SExplainCtx *ctx, int32_t level) { int32_t qExplainResNodeToRowsImpl(SExplainResNode *pResNode, SExplainCtx *ctx, int32_t level) {
int32_t tlen = 0; int32_t tlen = 0;
bool isVerboseLine = false; bool isVerboseLine = false;
@ -360,7 +400,11 @@ int32_t qExplainResNodeToRowsImpl(SExplainResNode *pResNode, SExplainCtx *ctx, i
} }
EXPLAIN_ROW_APPEND(EXPLAIN_WIDTH_FORMAT, pTblScanNode->scan.node.pOutputDataBlockDesc->totalRowSize); EXPLAIN_ROW_APPEND(EXPLAIN_WIDTH_FORMAT, pTblScanNode->scan.node.pOutputDataBlockDesc->totalRowSize);
EXPLAIN_ROW_APPEND(EXPLAIN_BLANK_FORMAT); EXPLAIN_ROW_APPEND(EXPLAIN_BLANK_FORMAT);
EXPLAIN_ROW_APPEND(EXPLAIN_TABLE_SCAN_FORMAT, pTblScanNode->scanSeq[0], pTblScanNode->scanSeq[1]); EXPLAIN_ROW_APPEND(EXPLAIN_SCAN_ORDER_FORMAT, pTblScanNode->scanSeq[0], pTblScanNode->scanSeq[1]);
EXPLAIN_ROW_APPEND(EXPLAIN_BLANK_FORMAT);
EXPLAIN_ROW_APPEND(EXPLAIN_SCAN_MODE_FORMAT, qExplainGetScanMode(pTblScanNode));
EXPLAIN_ROW_APPEND(EXPLAIN_BLANK_FORMAT);
EXPLAIN_ROW_APPEND(EXPLAIN_SCAN_DATA_LOAD_FORMAT, qExplainGetScanDataLoad(pTblScanNode));
EXPLAIN_ROW_APPEND(EXPLAIN_RIGHT_PARENTHESIS_FORMAT); EXPLAIN_ROW_APPEND(EXPLAIN_RIGHT_PARENTHESIS_FORMAT);
EXPLAIN_ROW_END(); EXPLAIN_ROW_END();
QRY_ERR_RET(qExplainResAppendRow(ctx, tbuf, tlen, level)); QRY_ERR_RET(qExplainResAppendRow(ctx, tbuf, tlen, level));
@ -599,7 +643,7 @@ int32_t qExplainResNodeToRowsImpl(SExplainResNode *pResNode, SExplainCtx *ctx, i
} }
case QUERY_NODE_PHYSICAL_PLAN_HASH_AGG: { case QUERY_NODE_PHYSICAL_PLAN_HASH_AGG: {
SAggPhysiNode *pAggNode = (SAggPhysiNode *)pNode; SAggPhysiNode *pAggNode = (SAggPhysiNode *)pNode;
EXPLAIN_ROW_NEW(level, EXPLAIN_AGG_FORMAT); EXPLAIN_ROW_NEW(level, EXPLAIN_AGG_FORMAT, (pAggNode->pGroupKeys ? "GroupAggragate" : "Aggragate"));
EXPLAIN_ROW_APPEND(EXPLAIN_LEFT_PARENTHESIS_FORMAT); EXPLAIN_ROW_APPEND(EXPLAIN_LEFT_PARENTHESIS_FORMAT);
if (pResNode->pExecInfo) { if (pResNode->pExecInfo) {
QRY_ERR_RET(qExplainBufAppendExecInfo(pResNode->pExecInfo, tbuf, &tlen)); QRY_ERR_RET(qExplainBufAppendExecInfo(pResNode->pExecInfo, tbuf, &tlen));
@ -841,7 +885,7 @@ int32_t qExplainResNodeToRowsImpl(SExplainResNode *pResNode, SExplainCtx *ctx, i
EXPLAIN_ROW_APPEND_SLIMIT(pIntNode->window.node.pSlimit); EXPLAIN_ROW_APPEND_SLIMIT(pIntNode->window.node.pSlimit);
EXPLAIN_ROW_END(); EXPLAIN_ROW_END();
QRY_ERR_RET(qExplainResAppendRow(ctx, tbuf, tlen, level + 1)); QRY_ERR_RET(qExplainResAppendRow(ctx, tbuf, tlen, level + 1));
uint8_t precision = getIntervalPrecision(pIntNode); uint8_t precision = qExplainGetIntervalPrecision(pIntNode);
EXPLAIN_ROW_NEW(level + 1, EXPLAIN_TIME_WINDOWS_FORMAT, EXPLAIN_ROW_NEW(level + 1, EXPLAIN_TIME_WINDOWS_FORMAT,
INVERAL_TIME_FROM_PRECISION_TO_UNIT(pIntNode->interval, pIntNode->intervalUnit, precision), INVERAL_TIME_FROM_PRECISION_TO_UNIT(pIntNode->interval, pIntNode->intervalUnit, precision),
pIntNode->intervalUnit, pIntNode->offset, getPrecisionUnit(precision), pIntNode->intervalUnit, pIntNode->offset, getPrecisionUnit(precision),
@ -893,7 +937,7 @@ int32_t qExplainResNodeToRowsImpl(SExplainResNode *pResNode, SExplainCtx *ctx, i
EXPLAIN_ROW_APPEND_SLIMIT(pIntNode->window.node.pSlimit); EXPLAIN_ROW_APPEND_SLIMIT(pIntNode->window.node.pSlimit);
EXPLAIN_ROW_END(); EXPLAIN_ROW_END();
QRY_ERR_RET(qExplainResAppendRow(ctx, tbuf, tlen, level + 1)); QRY_ERR_RET(qExplainResAppendRow(ctx, tbuf, tlen, level + 1));
uint8_t precision = getIntervalPrecision(pIntNode); uint8_t precision = qExplainGetIntervalPrecision(pIntNode);
EXPLAIN_ROW_NEW(level + 1, EXPLAIN_TIME_WINDOWS_FORMAT, EXPLAIN_ROW_NEW(level + 1, EXPLAIN_TIME_WINDOWS_FORMAT,
INVERAL_TIME_FROM_PRECISION_TO_UNIT(pIntNode->interval, pIntNode->intervalUnit, precision), INVERAL_TIME_FROM_PRECISION_TO_UNIT(pIntNode->interval, pIntNode->intervalUnit, precision),
pIntNode->intervalUnit, pIntNode->offset, getPrecisionUnit(precision), pIntNode->intervalUnit, pIntNode->offset, getPrecisionUnit(precision),
@ -1119,41 +1163,33 @@ int32_t qExplainResNodeToRowsImpl(SExplainResNode *pResNode, SExplainCtx *ctx, i
EXPLAIN_ROW_APPEND(EXPLAIN_INPUT_ORDER_FORMAT, EXPLAIN_ORDER_STRING(pMergeNode->node.inputTsOrder)); EXPLAIN_ROW_APPEND(EXPLAIN_INPUT_ORDER_FORMAT, EXPLAIN_ORDER_STRING(pMergeNode->node.inputTsOrder));
EXPLAIN_ROW_APPEND(EXPLAIN_BLANK_FORMAT); EXPLAIN_ROW_APPEND(EXPLAIN_BLANK_FORMAT);
EXPLAIN_ROW_APPEND(EXPLAIN_OUTPUT_ORDER_TYPE_FORMAT, EXPLAIN_ORDER_STRING(pMergeNode->node.outputTsOrder)); EXPLAIN_ROW_APPEND(EXPLAIN_OUTPUT_ORDER_TYPE_FORMAT, EXPLAIN_ORDER_STRING(pMergeNode->node.outputTsOrder));
EXPLAIN_ROW_APPEND(EXPLAIN_BLANK_FORMAT);
EXPLAIN_ROW_APPEND(EXPLAIN_MERGE_MODE_FORMAT, EXPLAIN_MERGE_MODE_STRING(pMergeNode->type));
EXPLAIN_ROW_APPEND(EXPLAIN_RIGHT_PARENTHESIS_FORMAT); EXPLAIN_ROW_APPEND(EXPLAIN_RIGHT_PARENTHESIS_FORMAT);
EXPLAIN_ROW_END(); EXPLAIN_ROW_END();
QRY_ERR_RET(qExplainResAppendRow(ctx, tbuf, tlen, level)); QRY_ERR_RET(qExplainResAppendRow(ctx, tbuf, tlen, level));
if (EXPLAIN_MODE_ANALYZE == ctx->mode) { if (EXPLAIN_MODE_ANALYZE == ctx->mode) {
// sort key if (MERGE_TYPE_SORT == pMergeNode->type) {
EXPLAIN_ROW_NEW(level + 1, "Merge Key: "); // sort method
if (pResNode->pExecInfo) { EXPLAIN_ROW_NEW(level + 1, "Sort Method: ");
for (int32_t i = 0; i < LIST_LENGTH(pMergeNode->pMergeKeys); ++i) {
SOrderByExprNode *ptn = (SOrderByExprNode *)nodesListGetNode(pMergeNode->pMergeKeys, i); int32_t nodeNum = taosArrayGetSize(pResNode->pExecInfo);
EXPLAIN_ROW_APPEND("%s ", nodesGetNameFromColumnNode(ptn->pExpr)); SExplainExecInfo *execInfo = taosArrayGet(pResNode->pExecInfo, 0);
SSortExecInfo *pExecInfo = (SSortExecInfo *)execInfo->verboseInfo;
EXPLAIN_ROW_APPEND("%s", pExecInfo->sortMethod == SORT_QSORT_T ? "quicksort" : "merge sort");
if (pExecInfo->sortBuffer > 1024 * 1024) {
EXPLAIN_ROW_APPEND(" Buffers:%.2f Mb", pExecInfo->sortBuffer / (1024 * 1024.0));
} else if (pExecInfo->sortBuffer > 1024) {
EXPLAIN_ROW_APPEND(" Buffers:%.2f Kb", pExecInfo->sortBuffer / (1024.0));
} else {
EXPLAIN_ROW_APPEND(" Buffers:%d b", pExecInfo->sortBuffer);
} }
EXPLAIN_ROW_APPEND(" loops:%d", pExecInfo->loops);
EXPLAIN_ROW_END();
QRY_ERR_RET(qExplainResAppendRow(ctx, tbuf, tlen, level));
} }
EXPLAIN_ROW_END();
QRY_ERR_RET(qExplainResAppendRow(ctx, tbuf, tlen, level));
// sort method
EXPLAIN_ROW_NEW(level + 1, "Sort Method: ");
int32_t nodeNum = taosArrayGetSize(pResNode->pExecInfo);
SExplainExecInfo *execInfo = taosArrayGet(pResNode->pExecInfo, 0);
SSortExecInfo *pExecInfo = (SSortExecInfo *)execInfo->verboseInfo;
EXPLAIN_ROW_APPEND("%s", pExecInfo->sortMethod == SORT_QSORT_T ? "quicksort" : "merge sort");
if (pExecInfo->sortBuffer > 1024 * 1024) {
EXPLAIN_ROW_APPEND(" Buffers:%.2f Mb", pExecInfo->sortBuffer / (1024 * 1024.0));
} else if (pExecInfo->sortBuffer > 1024) {
EXPLAIN_ROW_APPEND(" Buffers:%.2f Kb", pExecInfo->sortBuffer / (1024.0));
} else {
EXPLAIN_ROW_APPEND(" Buffers:%d b", pExecInfo->sortBuffer);
}
EXPLAIN_ROW_APPEND(" loops:%d", pExecInfo->loops);
EXPLAIN_ROW_END();
QRY_ERR_RET(qExplainResAppendRow(ctx, tbuf, tlen, level));
} }
if (verbose) { if (verbose) {
@ -1167,29 +1203,31 @@ int32_t qExplainResNodeToRowsImpl(SExplainResNode *pResNode, SExplainCtx *ctx, i
EXPLAIN_ROW_END(); EXPLAIN_ROW_END();
QRY_ERR_RET(qExplainResAppendRow(ctx, tbuf, tlen, level + 1)); QRY_ERR_RET(qExplainResAppendRow(ctx, tbuf, tlen, level + 1));
EXPLAIN_ROW_NEW(level + 1, EXPLAIN_OUTPUT_FORMAT); if (MERGE_TYPE_SORT == pMergeNode->type) {
EXPLAIN_ROW_APPEND(EXPLAIN_IGNORE_GROUPID_FORMAT, pMergeNode->ignoreGroupId ? "true" : "false"); EXPLAIN_ROW_NEW(level + 1, EXPLAIN_OUTPUT_FORMAT);
EXPLAIN_ROW_END(); EXPLAIN_ROW_APPEND(EXPLAIN_IGNORE_GROUPID_FORMAT, pMergeNode->ignoreGroupId ? "true" : "false");
QRY_ERR_RET(qExplainResAppendRow(ctx, tbuf, tlen, level + 1)); EXPLAIN_ROW_END();
QRY_ERR_RET(qExplainResAppendRow(ctx, tbuf, tlen, level + 1));
EXPLAIN_ROW_NEW(level + 1, EXPLAIN_MERGE_KEYS_FORMAT); EXPLAIN_ROW_NEW(level + 1, EXPLAIN_MERGE_KEYS_FORMAT);
if (pMergeNode->groupSort) { if (pMergeNode->groupSort) {
EXPLAIN_ROW_APPEND(EXPLAIN_STRING_TYPE_FORMAT, "_group_id asc"); EXPLAIN_ROW_APPEND(EXPLAIN_STRING_TYPE_FORMAT, "_group_id asc");
if (LIST_LENGTH(pMergeNode->pMergeKeys) > 0) { if (LIST_LENGTH(pMergeNode->pMergeKeys) > 0) {
EXPLAIN_ROW_APPEND(EXPLAIN_COMMA_FORMAT); EXPLAIN_ROW_APPEND(EXPLAIN_COMMA_FORMAT);
}
} }
} for (int32_t i = 0; i < LIST_LENGTH(pMergeNode->pMergeKeys); ++i) {
for (int32_t i = 0; i < LIST_LENGTH(pMergeNode->pMergeKeys); ++i) { SOrderByExprNode *ptn = (SOrderByExprNode *)nodesListGetNode(pMergeNode->pMergeKeys, i);
SOrderByExprNode *ptn = (SOrderByExprNode *)nodesListGetNode(pMergeNode->pMergeKeys, i); EXPLAIN_ROW_APPEND(EXPLAIN_STRING_TYPE_FORMAT, nodesGetNameFromColumnNode(ptn->pExpr));
EXPLAIN_ROW_APPEND(EXPLAIN_STRING_TYPE_FORMAT, nodesGetNameFromColumnNode(ptn->pExpr)); EXPLAIN_ROW_APPEND(EXPLAIN_BLANK_FORMAT);
EXPLAIN_ROW_APPEND(EXPLAIN_BLANK_FORMAT); EXPLAIN_ROW_APPEND(EXPLAIN_STRING_TYPE_FORMAT, EXPLAIN_ORDER_STRING(ptn->order));
EXPLAIN_ROW_APPEND(EXPLAIN_STRING_TYPE_FORMAT, EXPLAIN_ORDER_STRING(ptn->order)); if (i != LIST_LENGTH(pMergeNode->pMergeKeys) - 1) {
if (i != LIST_LENGTH(pMergeNode->pMergeKeys) - 1) { EXPLAIN_ROW_APPEND(EXPLAIN_COMMA_FORMAT);
EXPLAIN_ROW_APPEND(EXPLAIN_COMMA_FORMAT); }
} }
EXPLAIN_ROW_END();
QRY_ERR_RET(qExplainResAppendRow(ctx, tbuf, tlen, level + 1));
} }
EXPLAIN_ROW_END();
QRY_ERR_RET(qExplainResAppendRow(ctx, tbuf, tlen, level + 1));
if (pMergeNode->node.pConditions) { if (pMergeNode->node.pConditions) {
EXPLAIN_ROW_NEW(level + 1, EXPLAIN_FILTER_FORMAT); EXPLAIN_ROW_NEW(level + 1, EXPLAIN_FILTER_FORMAT);
@ -1419,7 +1457,7 @@ int32_t qExplainResNodeToRowsImpl(SExplainResNode *pResNode, SExplainCtx *ctx, i
EXPLAIN_ROW_APPEND_SLIMIT(pIntNode->window.node.pSlimit); EXPLAIN_ROW_APPEND_SLIMIT(pIntNode->window.node.pSlimit);
EXPLAIN_ROW_END(); EXPLAIN_ROW_END();
QRY_ERR_RET(qExplainResAppendRow(ctx, tbuf, tlen, level + 1)); QRY_ERR_RET(qExplainResAppendRow(ctx, tbuf, tlen, level + 1));
uint8_t precision = getIntervalPrecision(pIntNode); uint8_t precision = qExplainGetIntervalPrecision(pIntNode);
EXPLAIN_ROW_NEW(level + 1, EXPLAIN_TIME_WINDOWS_FORMAT, EXPLAIN_ROW_NEW(level + 1, EXPLAIN_TIME_WINDOWS_FORMAT,
INVERAL_TIME_FROM_PRECISION_TO_UNIT(pIntNode->interval, pIntNode->intervalUnit, precision), INVERAL_TIME_FROM_PRECISION_TO_UNIT(pIntNode->interval, pIntNode->intervalUnit, precision),
pIntNode->intervalUnit, pIntNode->offset, getPrecisionUnit(precision), pIntNode->intervalUnit, pIntNode->offset, getPrecisionUnit(precision),

View File

@ -40,8 +40,10 @@
#define GET_RES_WINDOW_KEY_LEN(_l) ((_l) + sizeof(uint64_t)) #define GET_RES_WINDOW_KEY_LEN(_l) ((_l) + sizeof(uint64_t))
typedef struct SGroupResInfo { typedef struct SGroupResInfo {
int32_t index; int32_t index; // rows consumed in func:doCopyToSDataBlockXX
SArray* pRows; // SArray<SResKeyPos> int32_t iter; // relate to index-1, last consumed data's slot id in hash table
void* dataPos; // relate to index-1, last consumed data's position, in the nodelist of cur slot
SArray* pRows; // SArray<SResKeyPos>
char* pBuf; char* pBuf;
bool freeItem; bool freeItem;
} SGroupResInfo; } SGroupResInfo;
@ -178,7 +180,7 @@ void initExecTimeWindowInfo(SColumnInfoData* pColData, STimeWindow* pQueryWindow
SInterval extractIntervalInfo(const STableScanPhysiNode* pTableScanNode); SInterval extractIntervalInfo(const STableScanPhysiNode* pTableScanNode);
SColumn extractColumnFromColumnNode(SColumnNode* pColNode); SColumn extractColumnFromColumnNode(SColumnNode* pColNode);
int32_t initQueryTableDataCond(SQueryTableDataCond* pCond, const STableScanPhysiNode* pTableScanNode); int32_t initQueryTableDataCond(SQueryTableDataCond* pCond, const STableScanPhysiNode* pTableScanNode, const SReadHandle* readHandle);
void cleanupQueryTableDataCond(SQueryTableDataCond* pCond); void cleanupQueryTableDataCond(SQueryTableDataCond* pCond);
int32_t convertFillType(int32_t mode); int32_t convertFillType(int32_t mode);

View File

@ -679,6 +679,12 @@ void initResultSizeInfo(SResultInfo* pResultInfo, int32_t numOfRows);
void doBuildResultDatablock(struct SOperatorInfo* pOperator, SOptrBasicInfo* pbInfo, SGroupResInfo* pGroupResInfo, void doBuildResultDatablock(struct SOperatorInfo* pOperator, SOptrBasicInfo* pbInfo, SGroupResInfo* pGroupResInfo,
SDiskbasedBuf* pBuf); SDiskbasedBuf* pBuf);
/**
* @brief copydata from hash table, instead of copying from SGroupResInfo's pRow
*/
int32_t doCopyToSDataBlockByHash(SExecTaskInfo* pTaskInfo, SSDataBlock* pBlock, SExprSupp* pSup, SDiskbasedBuf* pBuf,
SGroupResInfo* pGroupResInfo, SSHashObj* pHashmap, int32_t threshold, bool ignoreGroup);
bool hasLimitOffsetInfo(SLimitInfo* pLimitInfo); bool hasLimitOffsetInfo(SLimitInfo* pLimitInfo);
bool hasSlimitOffsetInfo(SLimitInfo* pLimitInfo); bool hasSlimitOffsetInfo(SLimitInfo* pLimitInfo);
void initLimitInfo(const SNode* pLimit, const SNode* pSLimit, SLimitInfo* pLimitInfo); void initLimitInfo(const SNode* pLimit, const SNode* pSLimit, SLimitInfo* pLimitInfo);

View File

@ -54,6 +54,19 @@ static int32_t removeRedundantTsCol(SLastRowScanPhysiNode* pScanNode, SColM
#define SCAN_ROW_TYPE(_t) ((_t) ? CACHESCAN_RETRIEVE_LAST : CACHESCAN_RETRIEVE_LAST_ROW) #define SCAN_ROW_TYPE(_t) ((_t) ? CACHESCAN_RETRIEVE_LAST : CACHESCAN_RETRIEVE_LAST_ROW)
static void setColIdForCacheReadBlock(SSDataBlock* pBlock, SNodeList* pTargets) {
SNode* pNode;
int32_t idx = 0;
FOREACH(pNode, pTargets) {
if (nodeType(pNode) == QUERY_NODE_COLUMN) {
SColumnNode* pCol = (SColumnNode*)pNode;
SColumnInfoData* pColInfo = taosArrayGet(pBlock->pDataBlock, idx);
pColInfo->info.colId = pCol->colId;
}
idx++;
}
}
SOperatorInfo* createCacherowsScanOperator(SLastRowScanPhysiNode* pScanNode, SReadHandle* readHandle, SOperatorInfo* createCacherowsScanOperator(SLastRowScanPhysiNode* pScanNode, SReadHandle* readHandle,
STableListInfo* pTableListInfo, SExecTaskInfo* pTaskInfo) { STableListInfo* pTableListInfo, SExecTaskInfo* pTaskInfo) {
int32_t code = TSDB_CODE_SUCCESS; int32_t code = TSDB_CODE_SUCCESS;
@ -114,10 +127,12 @@ SOperatorInfo* createCacherowsScanOperator(SLastRowScanPhysiNode* pScanNode, SRe
capacity = TMIN(totalTables, 4096); capacity = TMIN(totalTables, 4096);
pInfo->pBufferredRes = createOneDataBlock(pInfo->pRes, false); pInfo->pBufferredRes = createOneDataBlock(pInfo->pRes, false);
setColIdForCacheReadBlock(pInfo->pBufferredRes, pScanNode->pTargets);
blockDataEnsureCapacity(pInfo->pBufferredRes, capacity); blockDataEnsureCapacity(pInfo->pBufferredRes, capacity);
} else { // by tags } else { // by tags
pInfo->retrieveType = CACHESCAN_RETRIEVE_TYPE_SINGLE | SCAN_ROW_TYPE(pScanNode->ignoreNull); pInfo->retrieveType = CACHESCAN_RETRIEVE_TYPE_SINGLE | SCAN_ROW_TYPE(pScanNode->ignoreNull);
capacity = 1; // only one row output capacity = 1; // only one row output
setColIdForCacheReadBlock(pInfo->pRes, pScanNode->pTargets);
} }
initResultSizeInfo(&pOperator->resultInfo, capacity); initResultSizeInfo(&pOperator->resultInfo, capacity);
@ -191,9 +206,9 @@ SSDataBlock* doScanCache(SOperatorInfo* pOperator) {
SSDataBlock* pRes = pInfo->pRes; SSDataBlock* pRes = pInfo->pRes;
if (pInfo->indexOfBufferedRes < pInfo->pBufferredRes->info.rows) { if (pInfo->indexOfBufferedRes < pInfo->pBufferredRes->info.rows) {
for (int32_t i = 0; i < taosArrayGetSize(pInfo->matchInfo.pList); ++i) { for (int32_t i = 0; i < taosArrayGetSize(pInfo->pBufferredRes->pDataBlock); ++i) {
SColMatchItem* pMatchInfo = taosArrayGet(pInfo->matchInfo.pList, i); SColumnInfoData* pCol = taosArrayGet(pRes->pDataBlock, i);
int32_t slotId = pMatchInfo->dstSlotId; int32_t slotId = pCol->info.slotId;
SColumnInfoData* pSrc = taosArrayGet(pInfo->pBufferredRes->pDataBlock, slotId); SColumnInfoData* pSrc = taosArrayGet(pInfo->pBufferredRes->pDataBlock, slotId);
SColumnInfoData* pDst = taosArrayGet(pRes->pDataBlock, slotId); SColumnInfoData* pDst = taosArrayGet(pRes->pDataBlock, slotId);
@ -201,8 +216,10 @@ SSDataBlock* doScanCache(SOperatorInfo* pOperator) {
if (colDataIsNull_s(pSrc, pInfo->indexOfBufferedRes)) { if (colDataIsNull_s(pSrc, pInfo->indexOfBufferedRes)) {
colDataSetNULL(pDst, 0); colDataSetNULL(pDst, 0);
} else { } else {
char* p = colDataGetData(pSrc, pInfo->indexOfBufferedRes); if (pSrc->pData) {
colDataSetVal(pDst, 0, p, false); char* p = colDataGetData(pSrc, pInfo->indexOfBufferedRes);
colDataSetVal(pDst, 0, p, false);
}
} }
} }

View File

@ -1713,7 +1713,7 @@ SColumn extractColumnFromColumnNode(SColumnNode* pColNode) {
return c; return c;
} }
int32_t initQueryTableDataCond(SQueryTableDataCond* pCond, const STableScanPhysiNode* pTableScanNode) { int32_t initQueryTableDataCond(SQueryTableDataCond* pCond, const STableScanPhysiNode* pTableScanNode, const SReadHandle* readHandle) {
pCond->order = pTableScanNode->scanSeq[0] > 0 ? TSDB_ORDER_ASC : TSDB_ORDER_DESC; pCond->order = pTableScanNode->scanSeq[0] > 0 ? TSDB_ORDER_ASC : TSDB_ORDER_DESC;
pCond->numOfCols = LIST_LENGTH(pTableScanNode->scan.pScanCols); pCond->numOfCols = LIST_LENGTH(pTableScanNode->scan.pScanCols);
@ -1732,6 +1732,7 @@ int32_t initQueryTableDataCond(SQueryTableDataCond* pCond, const STableScanPhysi
pCond->type = TIMEWINDOW_RANGE_CONTAINED; pCond->type = TIMEWINDOW_RANGE_CONTAINED;
pCond->startVersion = -1; pCond->startVersion = -1;
pCond->endVersion = -1; pCond->endVersion = -1;
pCond->skipRollup = readHandle->skipRollup;
int32_t j = 0; int32_t j = 0;
for (int32_t i = 0; i < pCond->numOfCols; ++i) { for (int32_t i = 0; i < pCond->numOfCols; ++i) {

View File

@ -69,12 +69,20 @@ static int32_t doSetSMABlock(SOperatorInfo* pOperator, void* input, size_t numOf
} else if (type == STREAM_INPUT__DATA_BLOCK) { } else if (type == STREAM_INPUT__DATA_BLOCK) {
for (int32_t i = 0; i < numOfBlocks; ++i) { for (int32_t i = 0; i < numOfBlocks; ++i) {
SSDataBlock* pDataBlock = &((SSDataBlock*)input)[i]; SSDataBlock* pDataBlock = &((SSDataBlock*)input)[i];
SPackedData tmp = { SPackedData tmp = {.pDataBlock = pDataBlock};
.pDataBlock = pDataBlock,
};
taosArrayPush(pInfo->pBlockLists, &tmp); taosArrayPush(pInfo->pBlockLists, &tmp);
} }
pInfo->blockType = STREAM_INPUT__DATA_BLOCK; pInfo->blockType = STREAM_INPUT__DATA_BLOCK;
} else if (type == STREAM_INPUT__CHECKPOINT) {
SPackedData tmp = {.pDataBlock = input};
taosArrayPush(pInfo->pBlockLists, &tmp);
pInfo->blockType = STREAM_INPUT__CHECKPOINT;
} else if (type == STREAM_INPUT__REF_DATA_BLOCK) {
for (int32_t i = 0; i < numOfBlocks; ++i) {
SPackedData* pReq = POINTER_SHIFT(input, i * sizeof(SPackedData));
taosArrayPush(pInfo->pBlockLists, pReq);
}
pInfo->blockType = STREAM_INPUT__DATA_BLOCK;
} }
return TSDB_CODE_SUCCESS; return TSDB_CODE_SUCCESS;
@ -633,7 +641,7 @@ int32_t qExecTaskOpt(qTaskInfo_t tinfo, SArray* pResList, uint64_t* useconds, bo
blockIndex += 1; blockIndex += 1;
current += p->info.rows; current += p->info.rows;
ASSERT(p->info.rows > 0); ASSERT(p->info.rows > 0 || p->info.type == STREAM_CHECKPOINT);
taosArrayPush(pResList, &p); taosArrayPush(pResList, &p);
if (current >= rowsThreshold) { if (current >= rowsThreshold) {

View File

@ -655,6 +655,85 @@ int32_t finalizeResultRows(SDiskbasedBuf* pBuf, SResultRowPosition* resultRowPos
return 0; return 0;
} }
int32_t doCopyToSDataBlockByHash(SExecTaskInfo* pTaskInfo, SSDataBlock* pBlock, SExprSupp* pSup, SDiskbasedBuf* pBuf,
SGroupResInfo* pGroupResInfo, SSHashObj* pHashmap, int32_t threshold,
bool ignoreGroup) {
SExprInfo* pExprInfo = pSup->pExprInfo;
int32_t numOfExprs = pSup->numOfExprs;
int32_t* rowEntryOffset = pSup->rowEntryInfoOffset;
SqlFunctionCtx* pCtx = pSup->pCtx;
size_t keyLen = 0;
int32_t numOfRows = tSimpleHashGetSize(pHashmap);
// begin from last iter
void* pData = pGroupResInfo->dataPos;
int32_t iter = pGroupResInfo->iter;
while ((pData = tSimpleHashIterate(pHashmap, pData, &iter)) != NULL) {
void* key = tSimpleHashGetKey(pData, &keyLen);
SResultRowPosition* pos = pData;
uint64_t groupId = *(uint64_t*)key;
SFilePage* page = getBufPage(pBuf, pos->pageId);
if (page == NULL) {
qError("failed to get buffer, code:%s, %s", tstrerror(terrno), GET_TASKID(pTaskInfo));
T_LONG_JMP(pTaskInfo->env, terrno);
}
SResultRow* pRow = (SResultRow*)((char*)page + pos->offset);
doUpdateNumOfRows(pCtx, pRow, numOfExprs, rowEntryOffset);
// no results, continue to check the next one
if (pRow->numOfRows == 0) {
pGroupResInfo->index += 1;
pGroupResInfo->iter = iter;
pGroupResInfo->dataPos = pData;
releaseBufPage(pBuf, page);
continue;
}
if (!ignoreGroup) {
if (pBlock->info.id.groupId == 0) {
pBlock->info.id.groupId = groupId;
} else {
// current value belongs to different group, it can't be packed into one datablock
if (pBlock->info.id.groupId != groupId) {
releaseBufPage(pBuf, page);
break;
}
}
}
if (pBlock->info.rows + pRow->numOfRows > pBlock->info.capacity) {
uint32_t newSize = pBlock->info.rows + pRow->numOfRows + ((numOfRows - iter) > 1 ? 1 : 0);
blockDataEnsureCapacity(pBlock, newSize);
qDebug("datablock capacity not sufficient, expand to required:%d, current capacity:%d, %s", newSize,
pBlock->info.capacity, GET_TASKID(pTaskInfo));
// todo set the pOperator->resultInfo size
}
pGroupResInfo->index += 1;
pGroupResInfo->iter = iter;
pGroupResInfo->dataPos = pData;
copyResultrowToDataBlock(pExprInfo, numOfExprs, pRow, pCtx, pBlock, rowEntryOffset, pTaskInfo);
releaseBufPage(pBuf, page);
pBlock->info.rows += pRow->numOfRows;
if (pBlock->info.rows >= threshold) {
break;
}
}
qDebug("%s result generated, rows:%" PRId64 ", groupId:%" PRIu64, GET_TASKID(pTaskInfo), pBlock->info.rows,
pBlock->info.id.groupId);
pBlock->info.dataLoad = 1;
blockDataUpdateTsWindow(pBlock, 0);
return 0;
}
int32_t doCopyToSDataBlock(SExecTaskInfo* pTaskInfo, SSDataBlock* pBlock, SExprSupp* pSup, SDiskbasedBuf* pBuf, int32_t doCopyToSDataBlock(SExecTaskInfo* pTaskInfo, SSDataBlock* pBlock, SExprSupp* pSup, SDiskbasedBuf* pBuf,
SGroupResInfo* pGroupResInfo, int32_t threshold, bool ignoreGroup) { SGroupResInfo* pGroupResInfo, int32_t threshold, bool ignoreGroup) {
SExprInfo* pExprInfo = pSup->pExprInfo; SExprInfo* pExprInfo = pSup->pExprInfo;

View File

@ -370,6 +370,72 @@ static SSDataBlock* buildGroupResultDataBlock(SOperatorInfo* pOperator) {
return (pRes->info.rows == 0) ? NULL : pRes; return (pRes->info.rows == 0) ? NULL : pRes;
} }
bool hasRemainResultByHash(SOperatorInfo* pOperator) {
SGroupbyOperatorInfo* pInfo = pOperator->info;
SSHashObj* pHashmap = pInfo->aggSup.pResultRowHashTable;
return pInfo->groupResInfo.index < tSimpleHashGetSize(pHashmap);
}
void doBuildResultDatablockByHash(SOperatorInfo* pOperator, SOptrBasicInfo* pbInfo, SGroupResInfo* pGroupResInfo,
SDiskbasedBuf* pBuf) {
SGroupbyOperatorInfo* pInfo = pOperator->info;
SSHashObj* pHashmap = pInfo->aggSup.pResultRowHashTable;
SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo;
SSDataBlock* pBlock = pInfo->binfo.pRes;
// set output datablock version
pBlock->info.version = pTaskInfo->version;
blockDataCleanup(pBlock);
if (!hasRemainResultByHash(pOperator)) {
return;
}
pBlock->info.id.groupId = 0;
if (!pInfo->binfo.mergeResultBlock) {
doCopyToSDataBlockByHash(pTaskInfo, pBlock, &pOperator->exprSupp, pInfo->aggSup.pResultBuf, &pInfo->groupResInfo,
pHashmap, pOperator->resultInfo.threshold, false);
} else {
while (hasRemainResultByHash(pOperator)) {
doCopyToSDataBlockByHash(pTaskInfo, pBlock, &pOperator->exprSupp, pInfo->aggSup.pResultBuf, &pInfo->groupResInfo,
pHashmap, pOperator->resultInfo.threshold, true);
if (pBlock->info.rows >= pOperator->resultInfo.threshold) {
break;
}
pBlock->info.id.groupId = 0;
}
// clear the group id info in SSDataBlock, since the client does not need it
pBlock->info.id.groupId = 0;
}
}
static SSDataBlock* buildGroupResultDataBlockByHash(SOperatorInfo* pOperator) {
SGroupbyOperatorInfo* pInfo = pOperator->info;
SSDataBlock* pRes = pInfo->binfo.pRes;
// after filter, if result block turn to null, get next from whole set
while (1) {
doBuildResultDatablockByHash(pOperator, &pInfo->binfo, &pInfo->groupResInfo, pInfo->aggSup.pResultBuf);
doFilter(pRes, pOperator->exprSupp.pFilterInfo, NULL);
if (!hasRemainResultByHash(pOperator)) {
setOperatorCompleted(pOperator);
// clean hash after completed
tSimpleHashCleanup(pInfo->aggSup.pResultRowHashTable);
pInfo->aggSup.pResultRowHashTable = NULL;
break;
}
if (pRes->info.rows > 0) {
break;
}
}
pOperator->resultInfo.totalRows += pRes->info.rows;
return (pRes->info.rows == 0) ? NULL : pRes;
}
static SSDataBlock* hashGroupbyAggregate(SOperatorInfo* pOperator) { static SSDataBlock* hashGroupbyAggregate(SOperatorInfo* pOperator) {
if (pOperator->status == OP_EXEC_DONE) { if (pOperator->status == OP_EXEC_DONE) {
return NULL; return NULL;
@ -379,8 +445,9 @@ static SSDataBlock* hashGroupbyAggregate(SOperatorInfo* pOperator) {
SGroupbyOperatorInfo* pInfo = pOperator->info; SGroupbyOperatorInfo* pInfo = pOperator->info;
if (pOperator->status == OP_RES_TO_RETURN) { if (pOperator->status == OP_RES_TO_RETURN) {
return buildGroupResultDataBlock(pOperator); return buildGroupResultDataBlockByHash(pOperator);
} }
SGroupResInfo* pGroupResInfo = &pInfo->groupResInfo;
int32_t order = pInfo->binfo.inputTsOrder; int32_t order = pInfo->binfo.inputTsOrder;
int64_t st = taosGetTimestampUs(); int64_t st = taosGetTimestampUs();
@ -425,10 +492,20 @@ static SSDataBlock* hashGroupbyAggregate(SOperatorInfo* pOperator) {
} }
} }
#endif #endif
initGroupedResultInfo(&pInfo->groupResInfo, pInfo->aggSup.pResultRowHashTable, 0); // initGroupedResultInfo(&pInfo->groupResInfo, pInfo->aggSup.pResultRowHashTable, 0);
if (pGroupResInfo->pRows != NULL) {
taosArrayDestroy(pGroupResInfo->pRows);
}
if (pGroupResInfo->pBuf) {
taosMemoryFree(pGroupResInfo->pBuf);
pGroupResInfo->pBuf = NULL;
}
pGroupResInfo->index = 0;
pGroupResInfo->iter = 0;
pGroupResInfo->dataPos = NULL;
pOperator->cost.openCost = (taosGetTimestampUs() - st) / 1000.0; pOperator->cost.openCost = (taosGetTimestampUs() - st) / 1000.0;
return buildGroupResultDataBlock(pOperator); return buildGroupResultDataBlockByHash(pOperator);
} }
SOperatorInfo* createGroupOperatorInfo(SOperatorInfo* downstream, SAggPhysiNode* pAggNode, SExecTaskInfo* pTaskInfo) { SOperatorInfo* createGroupOperatorInfo(SOperatorInfo* downstream, SAggPhysiNode* pAggNode, SExecTaskInfo* pTaskInfo) {

View File

@ -239,7 +239,7 @@ SOperatorInfo* createMergeJoinOperatorInfo(SOperatorInfo** pDownstream, int32_t
numOfDownstream = 2; numOfDownstream = 2;
} else { } else {
pInfo->downstreamResBlkId[0] = getOperatorResultBlockId(pDownstream[0], 0); pInfo->downstreamResBlkId[0] = getOperatorResultBlockId(pDownstream[0], 0);
pInfo->downstreamResBlkId[1] = getOperatorResultBlockId(pDownstream[1], 1); pInfo->downstreamResBlkId[1] = getOperatorResultBlockId(pDownstream[1], 0);
} }
int32_t numOfCols = 0; int32_t numOfCols = 0;

View File

@ -0,0 +1,531 @@
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "executorInt.h"
#include "filter.h"
#include "operator.h"
#include "querytask.h"
#include "tdatablock.h"
typedef struct SSortMergeInfo {
SArray* pSortInfo;
SSortHandle* pSortHandle;
STupleHandle* prefetchedTuple;
int32_t bufPageSize;
uint32_t sortBufSize; // max buffer size for in-memory sort
SSDataBlock* pIntermediateBlock; // to hold the intermediate result
SSDataBlock* pInputBlock;
SColMatchInfo matchInfo;
} SSortMergeInfo;
typedef struct SNonSortMergeInfo {
int32_t lastSourceIdx;
int32_t sourceWorkIdx;
int32_t sourceNum;
int32_t* pSourceStatus;
} SNonSortMergeInfo;
typedef struct SColsMergeInfo {
SNodeList* pTargets;
uint64_t srcBlkIds[2];
} SColsMergeInfo;
typedef struct SMultiwayMergeOperatorInfo {
SOptrBasicInfo binfo;
EMergeType type;
union {
SSortMergeInfo sortMergeInfo;
SNonSortMergeInfo nsortMergeInfo;
SColsMergeInfo colsMergeInfo;
};
SLimitInfo limitInfo;
bool groupMerge;
bool ignoreGroupId;
uint64_t groupId;
bool inputWithGroupId;
} SMultiwayMergeOperatorInfo;
SSDataBlock* sortMergeloadNextDataBlock(void* param) {
SOperatorInfo* pOperator = (SOperatorInfo*)param;
SSDataBlock* pBlock = pOperator->fpSet.getNextFn(pOperator);
return pBlock;
}
int32_t openSortMergeOperator(SOperatorInfo* pOperator) {
SMultiwayMergeOperatorInfo* pInfo = pOperator->info;
SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo;
SSortMergeInfo* pSortMergeInfo = &pInfo->sortMergeInfo;
int32_t numOfBufPage = pSortMergeInfo->sortBufSize / pSortMergeInfo->bufPageSize;
pSortMergeInfo->pSortHandle = tsortCreateSortHandle(pSortMergeInfo->pSortInfo, SORT_MULTISOURCE_MERGE, pSortMergeInfo->bufPageSize, numOfBufPage,
pSortMergeInfo->pInputBlock, pTaskInfo->id.str, 0, 0, 0);
tsortSetFetchRawDataFp(pSortMergeInfo->pSortHandle, sortMergeloadNextDataBlock, NULL, NULL);
tsortSetCompareGroupId(pSortMergeInfo->pSortHandle, pInfo->groupMerge);
for (int32_t i = 0; i < pOperator->numOfDownstream; ++i) {
SOperatorInfo* pDownstream = pOperator->pDownstream[i];
if (pDownstream->operatorType == QUERY_NODE_PHYSICAL_PLAN_EXCHANGE) {
pDownstream->fpSet._openFn(pDownstream);
}
SSortSource* ps = taosMemoryCalloc(1, sizeof(SSortSource));
ps->param = pDownstream;
ps->onlyRef = true;
tsortAddSource(pSortMergeInfo->pSortHandle, ps);
}
return tsortOpen(pSortMergeInfo->pSortHandle);
}
static void doGetSortedBlockData(SMultiwayMergeOperatorInfo* pInfo, SSortHandle* pHandle, int32_t capacity,
SSDataBlock* p, bool* newgroup) {
SSortMergeInfo* pSortMergeInfo = &pInfo->sortMergeInfo;
*newgroup = false;
while (1) {
STupleHandle* pTupleHandle = NULL;
if (pInfo->groupMerge || pInfo->inputWithGroupId) {
if (pSortMergeInfo->prefetchedTuple == NULL) {
pTupleHandle = tsortNextTuple(pHandle);
} else {
pTupleHandle = pSortMergeInfo->prefetchedTuple;
pSortMergeInfo->prefetchedTuple = NULL;
uint64_t gid = tsortGetGroupId(pTupleHandle);
if (gid != pInfo->groupId) {
*newgroup = true;
pInfo->groupId = gid;
}
}
} else {
pTupleHandle = tsortNextTuple(pHandle);
pInfo->groupId = 0;
}
if (pTupleHandle == NULL) {
break;
}
if (pInfo->groupMerge || pInfo->inputWithGroupId) {
uint64_t tupleGroupId = tsortGetGroupId(pTupleHandle);
if (pInfo->groupId == 0 || pInfo->groupId == tupleGroupId) {
appendOneRowToDataBlock(p, pTupleHandle);
p->info.id.groupId = tupleGroupId;
pInfo->groupId = tupleGroupId;
} else {
if (p->info.rows == 0) {
appendOneRowToDataBlock(p, pTupleHandle);
p->info.id.groupId = pInfo->groupId = tupleGroupId;
} else {
pSortMergeInfo->prefetchedTuple = pTupleHandle;
break;
}
}
} else {
appendOneRowToDataBlock(p, pTupleHandle);
}
if (p->info.rows >= capacity) {
break;
}
}
}
SSDataBlock* doSortMerge(SOperatorInfo* pOperator) {
SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo;
SMultiwayMergeOperatorInfo* pInfo = pOperator->info;
SSortMergeInfo* pSortMergeInfo = &pInfo->sortMergeInfo;
SSortHandle* pHandle = pSortMergeInfo->pSortHandle;
SSDataBlock* pDataBlock = pInfo->binfo.pRes;
SArray* pColMatchInfo = pSortMergeInfo->matchInfo.pList;
int32_t capacity = pOperator->resultInfo.capacity;
qDebug("start to merge final sorted rows, %s", GET_TASKID(pTaskInfo));
blockDataCleanup(pDataBlock);
if (pSortMergeInfo->pIntermediateBlock == NULL) {
pSortMergeInfo->pIntermediateBlock = tsortGetSortedDataBlock(pHandle);
if (pSortMergeInfo->pIntermediateBlock == NULL) {
terrno = TSDB_CODE_OUT_OF_MEMORY;
return NULL;
}
blockDataEnsureCapacity(pSortMergeInfo->pIntermediateBlock, capacity);
} else {
blockDataCleanup(pSortMergeInfo->pIntermediateBlock);
}
SSDataBlock* p = pSortMergeInfo->pIntermediateBlock;
bool newgroup = false;
while (1) {
doGetSortedBlockData(pInfo, pHandle, capacity, p, &newgroup);
if (p->info.rows == 0) {
break;
}
if (newgroup) {
resetLimitInfoForNextGroup(&pInfo->limitInfo);
}
applyLimitOffset(&pInfo->limitInfo, p, pTaskInfo);
if (p->info.rows > 0) {
break;
}
}
if (p->info.rows > 0) {
int32_t numOfCols = taosArrayGetSize(pColMatchInfo);
for (int32_t i = 0; i < numOfCols; ++i) {
SColMatchItem* pmInfo = taosArrayGet(pColMatchInfo, i);
SColumnInfoData* pSrc = taosArrayGet(p->pDataBlock, pmInfo->srcSlotId);
SColumnInfoData* pDst = taosArrayGet(pDataBlock->pDataBlock, pmInfo->dstSlotId);
colDataAssign(pDst, pSrc, p->info.rows, &pDataBlock->info);
}
pDataBlock->info.rows = p->info.rows;
pDataBlock->info.scanFlag = p->info.scanFlag;
if (pInfo->ignoreGroupId) {
pDataBlock->info.id.groupId = 0;
} else {
pDataBlock->info.id.groupId = pInfo->groupId;
}
pDataBlock->info.dataLoad = 1;
}
qDebug("%s get sorted block, groupId:0x%" PRIx64 " rows:%" PRId64 , GET_TASKID(pTaskInfo), pDataBlock->info.id.groupId,
pDataBlock->info.rows);
return (pDataBlock->info.rows > 0) ? pDataBlock : NULL;
}
int32_t getSortMergeExplainExecInfo(SOperatorInfo* pOptr, void** pOptrExplain, uint32_t* len) {
SSortExecInfo* pSortExecInfo = taosMemoryCalloc(1, sizeof(SSortExecInfo));
SMultiwayMergeOperatorInfo* pInfo = (SMultiwayMergeOperatorInfo*)pOptr->info;
SSortMergeInfo* pSortMergeInfo = &pInfo->sortMergeInfo;
*pSortExecInfo = tsortGetSortExecInfo(pSortMergeInfo->pSortHandle);
*pOptrExplain = pSortExecInfo;
*len = sizeof(SSortExecInfo);
return TSDB_CODE_SUCCESS;
}
void destroySortMergeOperatorInfo(void* param) {
SSortMergeInfo* pSortMergeInfo = param;
pSortMergeInfo->pInputBlock = blockDataDestroy(pSortMergeInfo->pInputBlock);
pSortMergeInfo->pIntermediateBlock = blockDataDestroy(pSortMergeInfo->pIntermediateBlock);
taosArrayDestroy(pSortMergeInfo->matchInfo.pList);
tsortDestroySortHandle(pSortMergeInfo->pSortHandle);
taosArrayDestroy(pSortMergeInfo->pSortInfo);
}
#define NON_SORT_NEXT_SRC(_info, _idx) ((++(_idx) >= (_info)->sourceNum) ? ((_info)->sourceWorkIdx) : (_idx))
int32_t openNonSortMergeOperator(SOperatorInfo* pOperator) {
SMultiwayMergeOperatorInfo* pInfo = pOperator->info;
SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo;
SNonSortMergeInfo* pNonSortMergeInfo = &pInfo->nsortMergeInfo;
pNonSortMergeInfo->sourceWorkIdx = 0;
pNonSortMergeInfo->sourceNum = pOperator->numOfDownstream;
pNonSortMergeInfo->lastSourceIdx = -1;
pNonSortMergeInfo->pSourceStatus = taosMemoryCalloc(pOperator->numOfDownstream, sizeof(*pNonSortMergeInfo->pSourceStatus));
if (NULL == pNonSortMergeInfo->pSourceStatus) {
terrno = TSDB_CODE_OUT_OF_MEMORY;
return -1;
}
for (int32_t i = 0; i < pOperator->numOfDownstream; ++i) {
pNonSortMergeInfo->pSourceStatus[i] = i;
}
return TSDB_CODE_SUCCESS;
}
SSDataBlock* doNonSortMerge(SOperatorInfo* pOperator) {
SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo;
SMultiwayMergeOperatorInfo* pInfo = pOperator->info;
SNonSortMergeInfo* pNonSortMerge = &pInfo->nsortMergeInfo;
SSDataBlock* pBlock = NULL;
qDebug("start to merge no sorted rows, %s", GET_TASKID(pTaskInfo));
int32_t idx = NON_SORT_NEXT_SRC(pNonSortMerge, pNonSortMerge->lastSourceIdx);
while (idx < pNonSortMerge->sourceNum) {
pBlock = getNextBlockFromDownstream(pOperator, pNonSortMerge->pSourceStatus[idx]);
if (NULL == pBlock) {
TSWAP(pNonSortMerge->pSourceStatus[pNonSortMerge->sourceWorkIdx], pNonSortMerge->pSourceStatus[idx]);
pNonSortMerge->sourceWorkIdx++;
idx = NON_SORT_NEXT_SRC(pNonSortMerge, idx);
continue;
}
break;
}
return pBlock;
}
void destroyNonSortMergeOperatorInfo(void* param) {
SNonSortMergeInfo* pNonSortMerge = param;
taosMemoryFree(pNonSortMerge->pSourceStatus);
}
int32_t getNonSortMergeExplainExecInfo(SOperatorInfo* pOptr, void** pOptrExplain, uint32_t* len) {
return TSDB_CODE_SUCCESS;
}
int32_t openColsMergeOperator(SOperatorInfo* pOperator) {
return TSDB_CODE_SUCCESS;
}
int32_t copyColumnsValue(SNodeList* pNodeList, uint64_t targetBlkId, SSDataBlock* pDst, SSDataBlock* pSrc) {
bool isNull = (NULL == pSrc || pSrc->info.rows <= 0);
size_t numOfCols = LIST_LENGTH(pNodeList);
for (int32_t i = 0; i < numOfCols; ++i) {
STargetNode* pNode = (STargetNode*)nodesListGetNode(pNodeList, i);
if (nodeType(pNode->pExpr) == QUERY_NODE_COLUMN && ((SColumnNode*)pNode->pExpr)->dataBlockId == targetBlkId) {
SColumnInfoData* pDstCol = taosArrayGet(pDst->pDataBlock, pNode->slotId);
if (isNull) {
colDataSetVal(pDstCol, 0, NULL, true);
} else {
SColumnInfoData* pSrcCol = taosArrayGet(pSrc->pDataBlock, ((SColumnNode*)pNode->pExpr)->slotId);
colDataAssign(pDstCol, pSrcCol, 1, &pDst->info);
}
}
}
return TSDB_CODE_SUCCESS;
}
SSDataBlock* doColsMerge(SOperatorInfo* pOperator) {
SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo;
SMultiwayMergeOperatorInfo* pInfo = pOperator->info;
SSDataBlock* pBlock = NULL;
SColsMergeInfo* pColsMerge = &pInfo->colsMergeInfo;
int32_t nullBlkNum = 0;
qDebug("start to merge columns, %s", GET_TASKID(pTaskInfo));
for (int32_t i = 0; i < 2; ++i) {
pBlock = getNextBlockFromDownstream(pOperator, i);
if (pBlock && pBlock->info.rows > 1) {
qError("more than 1 row returned from downstream, rows:%" PRId64, pBlock->info.rows);
T_LONG_JMP(pTaskInfo->env, TSDB_CODE_QRY_EXECUTOR_INTERNAL_ERROR);
} else if (NULL == pBlock) {
nullBlkNum++;
}
copyColumnsValue(pColsMerge->pTargets, pColsMerge->srcBlkIds[i], pInfo->binfo.pRes, pBlock);
}
setOperatorCompleted(pOperator);
if (2 == nullBlkNum) {
return NULL;
}
pInfo->binfo.pRes->info.rows = 1;
return pInfo->binfo.pRes;
}
void destroyColsMergeOperatorInfo(void* param) {
}
int32_t getColsMergeExplainExecInfo(SOperatorInfo* pOptr, void** pOptrExplain, uint32_t* len) {
return TSDB_CODE_SUCCESS;
}
SOperatorFpSet gMultiwayMergeFps[MERGE_TYPE_MAX_VALUE] = {
{0},
{._openFn = openSortMergeOperator, .getNextFn = doSortMerge, .closeFn = destroySortMergeOperatorInfo, .getExplainFn = getSortMergeExplainExecInfo},
{._openFn = openNonSortMergeOperator, .getNextFn = doNonSortMerge, .closeFn = destroyNonSortMergeOperatorInfo, .getExplainFn = getNonSortMergeExplainExecInfo},
{._openFn = openColsMergeOperator, .getNextFn = doColsMerge, .closeFn = destroyColsMergeOperatorInfo, .getExplainFn = getColsMergeExplainExecInfo},
};
int32_t openMultiwayMergeOperator(SOperatorInfo* pOperator) {
int32_t code = 0;
SMultiwayMergeOperatorInfo* pInfo = pOperator->info;
SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo;
if (OPTR_IS_OPENED(pOperator)) {
return TSDB_CODE_SUCCESS;
}
int64_t startTs = taosGetTimestampUs();
if (NULL != gMultiwayMergeFps[pInfo->type]._openFn) {
code = (*gMultiwayMergeFps[pInfo->type]._openFn)(pOperator);
}
pOperator->cost.openCost = (taosGetTimestampUs() - startTs) / 1000.0;
pOperator->status = OP_RES_TO_RETURN;
if (code != TSDB_CODE_SUCCESS) {
T_LONG_JMP(pTaskInfo->env, terrno);
}
OPTR_SET_OPENED(pOperator);
return code;
}
SSDataBlock* doMultiwayMerge(SOperatorInfo* pOperator) {
if (pOperator->status == OP_EXEC_DONE) {
return NULL;
}
SSDataBlock* pBlock = NULL;
SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo;
SMultiwayMergeOperatorInfo* pInfo = pOperator->info;
int32_t code = pOperator->fpSet._openFn(pOperator);
if (code != TSDB_CODE_SUCCESS) {
T_LONG_JMP(pTaskInfo->env, code);
}
if (NULL != gMultiwayMergeFps[pInfo->type].getNextFn) {
pBlock = (*gMultiwayMergeFps[pInfo->type].getNextFn)(pOperator);
}
if (pBlock != NULL) {
pOperator->resultInfo.totalRows += pBlock->info.rows;
} else {
setOperatorCompleted(pOperator);
}
return pBlock;
}
void destroyMultiwayMergeOperatorInfo(void* param) {
SMultiwayMergeOperatorInfo* pInfo = (SMultiwayMergeOperatorInfo*)param;
pInfo->binfo.pRes = blockDataDestroy(pInfo->binfo.pRes);
if (NULL != gMultiwayMergeFps[pInfo->type].closeFn) {
(*gMultiwayMergeFps[pInfo->type].closeFn)(&pInfo->sortMergeInfo);
}
taosMemoryFreeClear(param);
}
int32_t getMultiwayMergeExplainExecInfo(SOperatorInfo* pOptr, void** pOptrExplain, uint32_t* len) {
int32_t code = 0;
SMultiwayMergeOperatorInfo* pInfo = (SMultiwayMergeOperatorInfo*)pOptr->info;
if (NULL != gMultiwayMergeFps[pInfo->type].getExplainFn) {
code = (*gMultiwayMergeFps[pInfo->type].getExplainFn)(pOptr, pOptrExplain, len);
}
return code;
}
SOperatorInfo* createMultiwayMergeOperatorInfo(SOperatorInfo** downStreams, size_t numStreams,
SMergePhysiNode* pMergePhyNode, SExecTaskInfo* pTaskInfo) {
SPhysiNode* pPhyNode = (SPhysiNode*)pMergePhyNode;
SMultiwayMergeOperatorInfo* pInfo = taosMemoryCalloc(1, sizeof(SMultiwayMergeOperatorInfo));
SOperatorInfo* pOperator = taosMemoryCalloc(1, sizeof(SOperatorInfo));
SDataBlockDescNode* pDescNode = pPhyNode->pOutputDataBlockDesc;
int32_t code = TSDB_CODE_SUCCESS;
if (pInfo == NULL || pOperator == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
goto _error;
}
pInfo->groupMerge = pMergePhyNode->groupSort;
pInfo->ignoreGroupId = pMergePhyNode->ignoreGroupId;
pInfo->binfo.inputTsOrder = pMergePhyNode->node.inputTsOrder;
pInfo->binfo.outputTsOrder = pMergePhyNode->node.outputTsOrder;
pInfo->inputWithGroupId = pMergePhyNode->inputWithGroupId;
pInfo->type = pMergePhyNode->type;
switch (pInfo->type) {
case MERGE_TYPE_SORT: {
SSortMergeInfo* pSortMergeInfo = &pInfo->sortMergeInfo;
initLimitInfo(pMergePhyNode->node.pLimit, pMergePhyNode->node.pSlimit, &pInfo->limitInfo);
pInfo->binfo.pRes = createDataBlockFromDescNode(pDescNode);
SPhysiNode* pChildNode = (SPhysiNode*)nodesListGetNode(pPhyNode->pChildren, 0);
SSDataBlock* pInputBlock = createDataBlockFromDescNode(pChildNode->pOutputDataBlockDesc);
initResultSizeInfo(&pOperator->resultInfo, 1024);
blockDataEnsureCapacity(pInfo->binfo.pRes, pOperator->resultInfo.capacity);
size_t numOfCols = taosArrayGetSize(pInfo->binfo.pRes->pDataBlock);
int32_t rowSize = pInfo->binfo.pRes->info.rowSize;
int32_t numOfOutputCols = 0;
pSortMergeInfo->pSortInfo = createSortInfo(pMergePhyNode->pMergeKeys);
pSortMergeInfo->bufPageSize = getProperSortPageSize(rowSize, numOfCols);
pSortMergeInfo->sortBufSize = pSortMergeInfo->bufPageSize * (numStreams + 1); // one additional is reserved for merged result.
pSortMergeInfo->pInputBlock = pInputBlock;
code = extractColMatchInfo(pMergePhyNode->pTargets, pDescNode, &numOfOutputCols, COL_MATCH_FROM_SLOT_ID,
&pSortMergeInfo->matchInfo);
if (code != TSDB_CODE_SUCCESS) {
goto _error;
}
break;
}
case MERGE_TYPE_NON_SORT: {
SNonSortMergeInfo* pNonSortMerge = &pInfo->nsortMergeInfo;
break;
}
case MERGE_TYPE_COLUMNS: {
SColsMergeInfo* pColsMerge = &pInfo->colsMergeInfo;
pInfo->binfo.pRes = createDataBlockFromDescNode(pDescNode);
initResultSizeInfo(&pOperator->resultInfo, 1);
blockDataEnsureCapacity(pInfo->binfo.pRes, pOperator->resultInfo.capacity);
pColsMerge->pTargets = pMergePhyNode->pTargets;
pColsMerge->srcBlkIds[0] = getOperatorResultBlockId(downStreams[0], 0);
pColsMerge->srcBlkIds[1] = getOperatorResultBlockId(downStreams[1], 0);
break;
}
default:
qError("Invalid merge type: %d", pInfo->type);
code = TSDB_CODE_INVALID_PARA;
goto _error;
}
setOperatorInfo(pOperator, "MultiwayMergeOperator", QUERY_NODE_PHYSICAL_PLAN_MERGE, false, OP_NOT_OPENED, pInfo, pTaskInfo);
pOperator->fpSet = createOperatorFpSet(openMultiwayMergeOperator, doMultiwayMerge, NULL,
destroyMultiwayMergeOperatorInfo, optrDefaultBufFn, getMultiwayMergeExplainExecInfo, optrDefaultGetNextExtFn, NULL);
code = appendDownstream(pOperator, downStreams, numStreams);
if (code != TSDB_CODE_SUCCESS) {
goto _error;
}
return pOperator;
_error:
if (pInfo != NULL) {
destroyMultiwayMergeOperatorInfo(pInfo);
}
pTaskInfo->code = code;
taosMemoryFree(pOperator);
return NULL;
}

View File

@ -1035,7 +1035,7 @@ SOperatorInfo* createTableScanOperatorInfo(STableScanPhysiNode* pTableScanNode,
} }
initLimitInfo(pScanNode->node.pLimit, pScanNode->node.pSlimit, &pInfo->base.limitInfo); initLimitInfo(pScanNode->node.pLimit, pScanNode->node.pSlimit, &pInfo->base.limitInfo);
code = initQueryTableDataCond(&pInfo->base.cond, pTableScanNode); code = initQueryTableDataCond(&pInfo->base.cond, pTableScanNode, readHandle);
if (code != TSDB_CODE_SUCCESS) { if (code != TSDB_CODE_SUCCESS) {
goto _error; goto _error;
} }
@ -3533,7 +3533,7 @@ SOperatorInfo* createTableMergeScanOperatorInfo(STableScanPhysiNode* pTableScanN
goto _error; goto _error;
} }
code = initQueryTableDataCond(&pInfo->base.cond, pTableScanNode); code = initQueryTableDataCond(&pInfo->base.cond, pTableScanNode, readHandle);
if (code != TSDB_CODE_SUCCESS) { if (code != TSDB_CODE_SUCCESS) {
taosArrayDestroy(pInfo->base.matchInfo.pList); taosArrayDestroy(pInfo->base.matchInfo.pList);
goto _error; goto _error;

View File

@ -675,293 +675,5 @@ _error:
return NULL; return NULL;
} }
//=====================================================================================
// Multiway Sort Merge operator
typedef struct SMultiwayMergeOperatorInfo {
SOptrBasicInfo binfo;
int32_t bufPageSize;
uint32_t sortBufSize; // max buffer size for in-memory sort
SLimitInfo limitInfo;
SArray* pSortInfo;
SSortHandle* pSortHandle;
SColMatchInfo matchInfo;
SSDataBlock* pInputBlock;
SSDataBlock* pIntermediateBlock; // to hold the intermediate result
int64_t startTs; // sort start time
bool groupSort;
bool ignoreGroupId;
uint64_t groupId;
STupleHandle* prefetchedTuple;
bool inputWithGroupId;
} SMultiwayMergeOperatorInfo;
int32_t openMultiwayMergeOperator(SOperatorInfo* pOperator) {
SMultiwayMergeOperatorInfo* pInfo = pOperator->info;
SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo;
if (OPTR_IS_OPENED(pOperator)) {
return TSDB_CODE_SUCCESS;
}
pInfo->startTs = taosGetTimestampUs();
int32_t numOfBufPage = pInfo->sortBufSize / pInfo->bufPageSize;
pInfo->pSortHandle = tsortCreateSortHandle(pInfo->pSortInfo, SORT_MULTISOURCE_MERGE, pInfo->bufPageSize, numOfBufPage,
pInfo->pInputBlock, pTaskInfo->id.str, 0, 0, 0);
tsortSetFetchRawDataFp(pInfo->pSortHandle, loadNextDataBlock, NULL, NULL);
tsortSetCompareGroupId(pInfo->pSortHandle, pInfo->groupSort);
for (int32_t i = 0; i < pOperator->numOfDownstream; ++i) {
SOperatorInfo* pDownstream = pOperator->pDownstream[i];
if (pDownstream->operatorType == QUERY_NODE_PHYSICAL_PLAN_EXCHANGE) {
pDownstream->fpSet._openFn(pDownstream);
}
SSortSource* ps = taosMemoryCalloc(1, sizeof(SSortSource));
ps->param = pDownstream;
ps->onlyRef = true;
tsortAddSource(pInfo->pSortHandle, ps);
}
int32_t code = tsortOpen(pInfo->pSortHandle);
if (code != TSDB_CODE_SUCCESS) {
T_LONG_JMP(pTaskInfo->env, terrno);
}
pOperator->cost.openCost = (taosGetTimestampUs() - pInfo->startTs) / 1000.0;
pOperator->status = OP_RES_TO_RETURN;
OPTR_SET_OPENED(pOperator);
return TSDB_CODE_SUCCESS;
}
static void doGetSortedBlockData(SMultiwayMergeOperatorInfo* pInfo, SSortHandle* pHandle, int32_t capacity,
SSDataBlock* p, bool* newgroup) {
*newgroup = false;
while (1) {
STupleHandle* pTupleHandle = NULL;
if (pInfo->groupSort || pInfo->inputWithGroupId) {
if (pInfo->prefetchedTuple == NULL) {
pTupleHandle = tsortNextTuple(pHandle);
} else {
pTupleHandle = pInfo->prefetchedTuple;
pInfo->prefetchedTuple = NULL;
uint64_t gid = tsortGetGroupId(pTupleHandle);
if (gid != pInfo->groupId) {
*newgroup = true;
pInfo->groupId = gid;
}
}
} else {
pTupleHandle = tsortNextTuple(pHandle);
pInfo->groupId = 0;
}
if (pTupleHandle == NULL) {
break;
}
if (pInfo->groupSort || pInfo->inputWithGroupId) {
uint64_t tupleGroupId = tsortGetGroupId(pTupleHandle);
if (pInfo->groupId == 0 || pInfo->groupId == tupleGroupId) {
appendOneRowToDataBlock(p, pTupleHandle);
p->info.id.groupId = tupleGroupId;
pInfo->groupId = tupleGroupId;
} else {
if (p->info.rows == 0) {
appendOneRowToDataBlock(p, pTupleHandle);
p->info.id.groupId = pInfo->groupId = tupleGroupId;
} else {
pInfo->prefetchedTuple = pTupleHandle;
break;
}
}
} else {
appendOneRowToDataBlock(p, pTupleHandle);
}
if (p->info.rows >= capacity) {
break;
}
}
}
SSDataBlock* getMultiwaySortedBlockData(SSortHandle* pHandle, SSDataBlock* pDataBlock, SArray* pColMatchInfo,
SOperatorInfo* pOperator) {
SMultiwayMergeOperatorInfo* pInfo = pOperator->info;
int32_t capacity = pOperator->resultInfo.capacity;
SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo;
blockDataCleanup(pDataBlock);
if (pInfo->pIntermediateBlock == NULL) {
pInfo->pIntermediateBlock = tsortGetSortedDataBlock(pHandle);
if (pInfo->pIntermediateBlock == NULL) {
terrno = TSDB_CODE_OUT_OF_MEMORY;
return NULL;
}
blockDataEnsureCapacity(pInfo->pIntermediateBlock, capacity);
} else {
blockDataCleanup(pInfo->pIntermediateBlock);
}
SSDataBlock* p = pInfo->pIntermediateBlock;
bool newgroup = false;
while (1) {
doGetSortedBlockData(pInfo, pHandle, capacity, p, &newgroup);
if (p->info.rows == 0) {
break;
}
if (newgroup) {
resetLimitInfoForNextGroup(&pInfo->limitInfo);
}
applyLimitOffset(&pInfo->limitInfo, p, pTaskInfo);
if (p->info.rows > 0) {
break;
}
}
if (p->info.rows > 0) {
int32_t numOfCols = taosArrayGetSize(pColMatchInfo);
for (int32_t i = 0; i < numOfCols; ++i) {
SColMatchItem* pmInfo = taosArrayGet(pColMatchInfo, i);
SColumnInfoData* pSrc = taosArrayGet(p->pDataBlock, pmInfo->srcSlotId);
SColumnInfoData* pDst = taosArrayGet(pDataBlock->pDataBlock, pmInfo->dstSlotId);
colDataAssign(pDst, pSrc, p->info.rows, &pDataBlock->info);
}
pDataBlock->info.rows = p->info.rows;
pDataBlock->info.scanFlag = p->info.scanFlag;
if (pInfo->ignoreGroupId) {
pDataBlock->info.id.groupId = 0;
} else {
pDataBlock->info.id.groupId = pInfo->groupId;
}
pDataBlock->info.dataLoad = 1;
}
qDebug("%s get sorted block, groupId:0x%" PRIx64 " rows:%" PRId64 , GET_TASKID(pTaskInfo), pDataBlock->info.id.groupId,
pDataBlock->info.rows);
return (pDataBlock->info.rows > 0) ? pDataBlock : NULL;
}
SSDataBlock* doMultiwayMerge(SOperatorInfo* pOperator) {
if (pOperator->status == OP_EXEC_DONE) {
return NULL;
}
SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo;
SMultiwayMergeOperatorInfo* pInfo = pOperator->info;
int32_t code = pOperator->fpSet._openFn(pOperator);
if (code != TSDB_CODE_SUCCESS) {
T_LONG_JMP(pTaskInfo->env, code);
}
qDebug("start to merge final sorted rows, %s", GET_TASKID(pTaskInfo));
SSDataBlock* pBlock = getMultiwaySortedBlockData(pInfo->pSortHandle, pInfo->binfo.pRes, pInfo->matchInfo.pList, pOperator);
if (pBlock != NULL) {
pOperator->resultInfo.totalRows += pBlock->info.rows;
} else {
setOperatorCompleted(pOperator);
}
return pBlock;
}
void destroyMultiwayMergeOperatorInfo(void* param) {
SMultiwayMergeOperatorInfo* pInfo = (SMultiwayMergeOperatorInfo*)param;
pInfo->binfo.pRes = blockDataDestroy(pInfo->binfo.pRes);
pInfo->pInputBlock = blockDataDestroy(pInfo->pInputBlock);
pInfo->pIntermediateBlock = blockDataDestroy(pInfo->pIntermediateBlock);
tsortDestroySortHandle(pInfo->pSortHandle);
taosArrayDestroy(pInfo->pSortInfo);
taosArrayDestroy(pInfo->matchInfo.pList);
taosMemoryFreeClear(param);
}
int32_t getMultiwayMergeExplainExecInfo(SOperatorInfo* pOptr, void** pOptrExplain, uint32_t* len) {
SSortExecInfo* pSortExecInfo = taosMemoryCalloc(1, sizeof(SSortExecInfo));
SMultiwayMergeOperatorInfo* pInfo = (SMultiwayMergeOperatorInfo*)pOptr->info;
*pSortExecInfo = tsortGetSortExecInfo(pInfo->pSortHandle);
*pOptrExplain = pSortExecInfo;
*len = sizeof(SSortExecInfo);
return TSDB_CODE_SUCCESS;
}
SOperatorInfo* createMultiwayMergeOperatorInfo(SOperatorInfo** downStreams, size_t numStreams,
SMergePhysiNode* pMergePhyNode, SExecTaskInfo* pTaskInfo) {
SPhysiNode* pPhyNode = (SPhysiNode*)pMergePhyNode;
SMultiwayMergeOperatorInfo* pInfo = taosMemoryCalloc(1, sizeof(SMultiwayMergeOperatorInfo));
SOperatorInfo* pOperator = taosMemoryCalloc(1, sizeof(SOperatorInfo));
SDataBlockDescNode* pDescNode = pPhyNode->pOutputDataBlockDesc;
int32_t code = TSDB_CODE_SUCCESS;
if (pInfo == NULL || pOperator == NULL) {
code = TSDB_CODE_OUT_OF_MEMORY;
goto _error;
}
initLimitInfo(pMergePhyNode->node.pLimit, pMergePhyNode->node.pSlimit, &pInfo->limitInfo);
pInfo->binfo.pRes = createDataBlockFromDescNode(pDescNode);
int32_t rowSize = pInfo->binfo.pRes->info.rowSize;
int32_t numOfOutputCols = 0;
code = extractColMatchInfo(pMergePhyNode->pTargets, pDescNode, &numOfOutputCols, COL_MATCH_FROM_SLOT_ID,
&pInfo->matchInfo);
if (code != TSDB_CODE_SUCCESS) {
goto _error;
}
SPhysiNode* pChildNode = (SPhysiNode*)nodesListGetNode(pPhyNode->pChildren, 0);
SSDataBlock* pInputBlock = createDataBlockFromDescNode(pChildNode->pOutputDataBlockDesc);
initResultSizeInfo(&pOperator->resultInfo, 1024);
blockDataEnsureCapacity(pInfo->binfo.pRes, pOperator->resultInfo.capacity);
pInfo->groupSort = pMergePhyNode->groupSort;
pInfo->ignoreGroupId = pMergePhyNode->ignoreGroupId;
pInfo->pSortInfo = createSortInfo(pMergePhyNode->pMergeKeys);
pInfo->pInputBlock = pInputBlock;
size_t numOfCols = taosArrayGetSize(pInfo->binfo.pRes->pDataBlock);
pInfo->bufPageSize = getProperSortPageSize(rowSize, numOfCols);
pInfo->sortBufSize = pInfo->bufPageSize * (numStreams + 1); // one additional is reserved for merged result.
pInfo->binfo.inputTsOrder = pMergePhyNode->node.inputTsOrder;
pInfo->binfo.outputTsOrder = pMergePhyNode->node.outputTsOrder;
pInfo->inputWithGroupId = pMergePhyNode->inputWithGroupId;
setOperatorInfo(pOperator, "MultiwayMergeOperator", QUERY_NODE_PHYSICAL_PLAN_MERGE, false, OP_NOT_OPENED, pInfo, pTaskInfo);
pOperator->fpSet = createOperatorFpSet(openMultiwayMergeOperator, doMultiwayMerge, NULL,
destroyMultiwayMergeOperatorInfo, optrDefaultBufFn, getMultiwayMergeExplainExecInfo, optrDefaultGetNextExtFn, NULL);
code = appendDownstream(pOperator, downStreams, numStreams);
if (code != TSDB_CODE_SUCCESS) {
goto _error;
}
return pOperator;
_error:
if (pInfo != NULL) {
destroyMultiwayMergeOperatorInfo(pInfo);
}
pTaskInfo->code = code;
taosMemoryFree(pOperator);
return NULL;
}

View File

@ -2772,7 +2772,7 @@ const SBuiltinFuncDefinition funcMgtBuiltins[] = {
{ {
.name = "_cache_last", .name = "_cache_last",
.type = FUNCTION_TYPE_CACHE_LAST, .type = FUNCTION_TYPE_CACHE_LAST,
.classification = FUNC_MGT_AGG_FUNC | FUNC_MGT_MULTI_RES_FUNC | FUNC_MGT_FORBID_STREAM_FUNC | FUNC_MGT_FORBID_SYSTABLE_FUNC, .classification = FUNC_MGT_AGG_FUNC | FUNC_MGT_MULTI_RES_FUNC | FUNC_MGT_SELECT_FUNC | FUNC_MGT_FORBID_STREAM_FUNC | FUNC_MGT_FORBID_SYSTABLE_FUNC,
.translateFunc = translateFirstLast, .translateFunc = translateFirstLast,
.getEnvFunc = getFirstLastFuncEnv, .getEnvFunc = getFirstLastFuncEnv,
.initFunc = functionSetup, .initFunc = functionSetup,

View File

@ -565,7 +565,7 @@ int32_t avgFunction(SqlFunctionCtx* pCtx) {
numOfElem = pInput->numOfRows; numOfElem = pInput->numOfRows;
pAvgRes->count += pInput->numOfRows; pAvgRes->count += pInput->numOfRows;
bool simdAvailable = tsAVXEnable && tsSIMDBuiltins && (numOfRows > THRESHOLD_SIZE); bool simdAvailable = tsAVXEnable && tsSIMDEnable && (numOfRows > THRESHOLD_SIZE);
switch(type) { switch(type) {
case TSDB_DATA_TYPE_UTINYINT: case TSDB_DATA_TYPE_UTINYINT:

View File

@ -370,7 +370,7 @@ static int32_t findFirstValPosition(const SColumnInfoData* pCol, int32_t start,
static void handleInt8Col(const void* data, int32_t start, int32_t numOfRows, SMinmaxResInfo* pBuf, bool isMinFunc, static void handleInt8Col(const void* data, int32_t start, int32_t numOfRows, SMinmaxResInfo* pBuf, bool isMinFunc,
bool signVal) { bool signVal) {
// AVX2 version to speedup the loop // AVX2 version to speedup the loop
if (tsAVX2Enable && tsSIMDBuiltins) { if (tsAVX2Enable && tsSIMDEnable) {
pBuf->v = i8VectorCmpAVX2(data, numOfRows, isMinFunc, signVal); pBuf->v = i8VectorCmpAVX2(data, numOfRows, isMinFunc, signVal);
} else { } else {
if (!pBuf->assign) { if (!pBuf->assign) {
@ -404,7 +404,7 @@ static void handleInt8Col(const void* data, int32_t start, int32_t numOfRows, SM
static void handleInt16Col(const void* data, int32_t start, int32_t numOfRows, SMinmaxResInfo* pBuf, bool isMinFunc, static void handleInt16Col(const void* data, int32_t start, int32_t numOfRows, SMinmaxResInfo* pBuf, bool isMinFunc,
bool signVal) { bool signVal) {
// AVX2 version to speedup the loop // AVX2 version to speedup the loop
if (tsAVX2Enable && tsSIMDBuiltins) { if (tsAVX2Enable && tsSIMDEnable) {
pBuf->v = i16VectorCmpAVX2(data, numOfRows, isMinFunc, signVal); pBuf->v = i16VectorCmpAVX2(data, numOfRows, isMinFunc, signVal);
} else { } else {
if (!pBuf->assign) { if (!pBuf->assign) {
@ -438,7 +438,7 @@ static void handleInt16Col(const void* data, int32_t start, int32_t numOfRows, S
static void handleInt32Col(const void* data, int32_t start, int32_t numOfRows, SMinmaxResInfo* pBuf, bool isMinFunc, static void handleInt32Col(const void* data, int32_t start, int32_t numOfRows, SMinmaxResInfo* pBuf, bool isMinFunc,
bool signVal) { bool signVal) {
// AVX2 version to speedup the loop // AVX2 version to speedup the loop
if (tsAVX2Enable && tsSIMDBuiltins) { if (tsAVX2Enable && tsSIMDEnable) {
pBuf->v = i32VectorCmpAVX2(data, numOfRows, isMinFunc, signVal); pBuf->v = i32VectorCmpAVX2(data, numOfRows, isMinFunc, signVal);
} else { } else {
if (!pBuf->assign) { if (!pBuf->assign) {
@ -502,7 +502,7 @@ static void handleFloatCol(SColumnInfoData* pCol, int32_t start, int32_t numOfRo
float* val = (float*)&pBuf->v; float* val = (float*)&pBuf->v;
// AVX version to speedup the loop // AVX version to speedup the loop
if (tsAVXEnable && tsSIMDBuiltins) { if (tsAVXEnable && tsSIMDEnable) {
*val = floatVectorCmpAVX(pData, numOfRows, isMinFunc); *val = floatVectorCmpAVX(pData, numOfRows, isMinFunc);
} else { } else {
if (!pBuf->assign) { if (!pBuf->assign) {
@ -533,7 +533,7 @@ static void handleDoubleCol(SColumnInfoData* pCol, int32_t start, int32_t numOfR
double* val = (double*)&pBuf->v; double* val = (double*)&pBuf->v;
// AVX version to speedup the loop // AVX version to speedup the loop
if (tsAVXEnable && tsSIMDBuiltins) { if (tsAVXEnable && tsSIMDEnable) {
*val = (double)doubleVectorCmpAVX(pData, numOfRows, isMinFunc); *val = (double)doubleVectorCmpAVX(pData, numOfRows, isMinFunc);
} else { } else {
if (!pBuf->assign) { if (!pBuf->assign) {

View File

@ -419,6 +419,7 @@ static int32_t logicScanCopy(const SScanLogicNode* pSrc, SScanLogicNode* pDst) {
COPY_SCALAR_FIELD(groupSort); COPY_SCALAR_FIELD(groupSort);
CLONE_NODE_LIST_FIELD(pTags); CLONE_NODE_LIST_FIELD(pTags);
CLONE_NODE_FIELD(pSubtable); CLONE_NODE_FIELD(pSubtable);
COPY_SCALAR_FIELD(cacheLastMode);
COPY_SCALAR_FIELD(igLastNull); COPY_SCALAR_FIELD(igLastNull);
COPY_SCALAR_FIELD(groupOrderScan); COPY_SCALAR_FIELD(groupOrderScan);
COPY_SCALAR_FIELD(onlyMetaCtbIdx); COPY_SCALAR_FIELD(onlyMetaCtbIdx);
@ -443,8 +444,14 @@ static int32_t logicAggCopy(const SAggLogicNode* pSrc, SAggLogicNode* pDst) {
COPY_BASE_OBJECT_FIELD(node, logicNodeCopy); COPY_BASE_OBJECT_FIELD(node, logicNodeCopy);
CLONE_NODE_LIST_FIELD(pGroupKeys); CLONE_NODE_LIST_FIELD(pGroupKeys);
CLONE_NODE_LIST_FIELD(pAggFuncs); CLONE_NODE_LIST_FIELD(pAggFuncs);
COPY_SCALAR_FIELD(hasLastRow);
COPY_SCALAR_FIELD(hasLast);
COPY_SCALAR_FIELD(hasTimeLineFunc);
COPY_SCALAR_FIELD(onlyHasKeepOrderFunc);
COPY_SCALAR_FIELD(hasGroupKeyOptimized); COPY_SCALAR_FIELD(hasGroupKeyOptimized);
COPY_SCALAR_FIELD(isGroupTb);
COPY_SCALAR_FIELD(isPartTb); COPY_SCALAR_FIELD(isPartTb);
COPY_SCALAR_FIELD(hasGroup);
return TSDB_CODE_SUCCESS; return TSDB_CODE_SUCCESS;
} }
@ -488,6 +495,8 @@ static int32_t logicMergeCopy(const SMergeLogicNode* pSrc, SMergeLogicNode* pDst
CLONE_NODE_LIST_FIELD(pInputs); CLONE_NODE_LIST_FIELD(pInputs);
COPY_SCALAR_FIELD(numOfChannels); COPY_SCALAR_FIELD(numOfChannels);
COPY_SCALAR_FIELD(srcGroupId); COPY_SCALAR_FIELD(srcGroupId);
COPY_SCALAR_FIELD(colsMerge);
COPY_SCALAR_FIELD(needSort);
COPY_SCALAR_FIELD(groupSort); COPY_SCALAR_FIELD(groupSort);
COPY_SCALAR_FIELD(ignoreGroupId); COPY_SCALAR_FIELD(ignoreGroupId);
COPY_SCALAR_FIELD(inputWithGroupId); COPY_SCALAR_FIELD(inputWithGroupId);

View File

@ -1773,6 +1773,7 @@ static int32_t jsonToPhysiTagScanNode(const SJson* pJson, void* pObj) {
static const char* jkLastRowScanPhysiPlanGroupTags = "GroupTags"; static const char* jkLastRowScanPhysiPlanGroupTags = "GroupTags";
static const char* jkLastRowScanPhysiPlanGroupSort = "GroupSort"; static const char* jkLastRowScanPhysiPlanGroupSort = "GroupSort";
static const char* jkLastRowScanPhysiPlanTargets = "Targets";
static int32_t physiLastRowScanNodeToJson(const void* pObj, SJson* pJson) { static int32_t physiLastRowScanNodeToJson(const void* pObj, SJson* pJson) {
const SLastRowScanPhysiNode* pNode = (const SLastRowScanPhysiNode*)pObj; const SLastRowScanPhysiNode* pNode = (const SLastRowScanPhysiNode*)pObj;
@ -1784,6 +1785,9 @@ static int32_t physiLastRowScanNodeToJson(const void* pObj, SJson* pJson) {
if (TSDB_CODE_SUCCESS == code) { if (TSDB_CODE_SUCCESS == code) {
code = tjsonAddBoolToObject(pJson, jkLastRowScanPhysiPlanGroupSort, pNode->groupSort); code = tjsonAddBoolToObject(pJson, jkLastRowScanPhysiPlanGroupSort, pNode->groupSort);
} }
if (TSDB_CODE_SUCCESS == code) {
code = nodeListToJson(pJson, jkLastRowScanPhysiPlanTargets, pNode->pTargets);
}
return code; return code;
} }
@ -1798,6 +1802,9 @@ static int32_t jsonToPhysiLastRowScanNode(const SJson* pJson, void* pObj) {
if (TSDB_CODE_SUCCESS == code) { if (TSDB_CODE_SUCCESS == code) {
code = tjsonGetBoolValue(pJson, jkLastRowScanPhysiPlanGroupSort, &pNode->groupSort); code = tjsonGetBoolValue(pJson, jkLastRowScanPhysiPlanGroupSort, &pNode->groupSort);
} }
if (TSDB_CODE_SUCCESS == code) {
code = jsonToNodeList(pJson, jkLastRowScanPhysiPlanTargets, &pNode->pTargets);
}
return code; return code;
} }
@ -2278,6 +2285,7 @@ static const char* jkMergePhysiPlanSrcGroupId = "SrcGroupId";
static const char* jkMergePhysiPlanGroupSort = "GroupSort"; static const char* jkMergePhysiPlanGroupSort = "GroupSort";
static const char* jkMergePhysiPlanIgnoreGroupID = "IgnoreGroupID"; static const char* jkMergePhysiPlanIgnoreGroupID = "IgnoreGroupID";
static const char* jkMergePhysiPlanInputWithGroupId = "InputWithGroupId"; static const char* jkMergePhysiPlanInputWithGroupId = "InputWithGroupId";
static const char* jkMergePhysiPlanType = "Type";
static int32_t physiMergeNodeToJson(const void* pObj, SJson* pJson) { static int32_t physiMergeNodeToJson(const void* pObj, SJson* pJson) {
const SMergePhysiNode* pNode = (const SMergePhysiNode*)pObj; const SMergePhysiNode* pNode = (const SMergePhysiNode*)pObj;
@ -2304,6 +2312,9 @@ static int32_t physiMergeNodeToJson(const void* pObj, SJson* pJson) {
if (TSDB_CODE_SUCCESS == code) { if (TSDB_CODE_SUCCESS == code) {
code = tjsonAddBoolToObject(pJson, jkMergePhysiPlanInputWithGroupId, pNode->inputWithGroupId); code = tjsonAddBoolToObject(pJson, jkMergePhysiPlanInputWithGroupId, pNode->inputWithGroupId);
} }
if (TSDB_CODE_SUCCESS == code) {
code = tjsonAddIntegerToObject(pJson, jkMergePhysiPlanType, pNode->type);
}
return code; return code;
} }
@ -2330,6 +2341,9 @@ static int32_t jsonToPhysiMergeNode(const SJson* pJson, void* pObj) {
if (TSDB_CODE_SUCCESS == code) { if (TSDB_CODE_SUCCESS == code) {
code = tjsonGetBoolValue(pJson, jkMergePhysiPlanIgnoreGroupID, &pNode->ignoreGroupId); code = tjsonGetBoolValue(pJson, jkMergePhysiPlanIgnoreGroupID, &pNode->ignoreGroupId);
} }
if (TSDB_CODE_SUCCESS == code) {
code = tjsonGetIntValue(pJson, jkMergePhysiPlanType, (int32_t*)&pNode->type);
}
return code; return code;
} }

View File

@ -2052,7 +2052,8 @@ enum {
PHY_LAST_ROW_SCAN_CODE_SCAN = 1, PHY_LAST_ROW_SCAN_CODE_SCAN = 1,
PHY_LAST_ROW_SCAN_CODE_GROUP_TAGS, PHY_LAST_ROW_SCAN_CODE_GROUP_TAGS,
PHY_LAST_ROW_SCAN_CODE_GROUP_SORT, PHY_LAST_ROW_SCAN_CODE_GROUP_SORT,
PHY_LAST_ROW_SCAN_CODE_IGNULL PHY_LAST_ROW_SCAN_CODE_IGNULL,
PHY_LAST_ROW_SCAN_CODE_TARGETS
}; };
static int32_t physiLastRowScanNodeToMsg(const void* pObj, STlvEncoder* pEncoder) { static int32_t physiLastRowScanNodeToMsg(const void* pObj, STlvEncoder* pEncoder) {
@ -2068,6 +2069,9 @@ static int32_t physiLastRowScanNodeToMsg(const void* pObj, STlvEncoder* pEncoder
if (TSDB_CODE_SUCCESS == code) { if (TSDB_CODE_SUCCESS == code) {
code = tlvEncodeBool(pEncoder, PHY_LAST_ROW_SCAN_CODE_IGNULL, pNode->ignoreNull); code = tlvEncodeBool(pEncoder, PHY_LAST_ROW_SCAN_CODE_IGNULL, pNode->ignoreNull);
} }
if (TSDB_CODE_SUCCESS == code) {
code = tlvEncodeObj(pEncoder, PHY_LAST_ROW_SCAN_CODE_TARGETS, nodeListToMsg, pNode->pTargets);
}
return code; return code;
} }
@ -2091,6 +2095,9 @@ static int32_t msgToPhysiLastRowScanNode(STlvDecoder* pDecoder, void* pObj) {
case PHY_LAST_ROW_SCAN_CODE_IGNULL: case PHY_LAST_ROW_SCAN_CODE_IGNULL:
code = tlvDecodeBool(pTlv, &pNode->ignoreNull); code = tlvDecodeBool(pTlv, &pNode->ignoreNull);
break; break;
case PHY_LAST_ROW_SCAN_CODE_TARGETS:
code = msgToNodeListFromTlv(pTlv, (void**)&pNode->pTargets);
break;
default: default:
break; break;
} }
@ -2683,6 +2690,7 @@ enum {
PHY_MERGE_CODE_GROUP_SORT, PHY_MERGE_CODE_GROUP_SORT,
PHY_MERGE_CODE_IGNORE_GROUP_ID, PHY_MERGE_CODE_IGNORE_GROUP_ID,
PHY_MERGE_CODE_INPUT_WITH_GROUP_ID, PHY_MERGE_CODE_INPUT_WITH_GROUP_ID,
PHY_MERGE_CODE_TYPE,
}; };
static int32_t physiMergeNodeToMsg(const void* pObj, STlvEncoder* pEncoder) { static int32_t physiMergeNodeToMsg(const void* pObj, STlvEncoder* pEncoder) {
@ -2710,6 +2718,9 @@ static int32_t physiMergeNodeToMsg(const void* pObj, STlvEncoder* pEncoder) {
if (TSDB_CODE_SUCCESS == code) { if (TSDB_CODE_SUCCESS == code) {
code = tlvEncodeBool(pEncoder, PHY_MERGE_CODE_INPUT_WITH_GROUP_ID, pNode->inputWithGroupId); code = tlvEncodeBool(pEncoder, PHY_MERGE_CODE_INPUT_WITH_GROUP_ID, pNode->inputWithGroupId);
} }
if (TSDB_CODE_SUCCESS == code) {
code = tlvEncodeI32(pEncoder, PHY_MERGE_CODE_TYPE, pNode->type);
}
return code; return code;
} }
@ -2745,6 +2756,9 @@ static int32_t msgToPhysiMergeNode(STlvDecoder* pDecoder, void* pObj) {
case PHY_MERGE_CODE_INPUT_WITH_GROUP_ID: case PHY_MERGE_CODE_INPUT_WITH_GROUP_ID:
code = tlvDecodeBool(pTlv, &pNode->inputWithGroupId); code = tlvDecodeBool(pTlv, &pNode->inputWithGroupId);
break; break;
case PHY_MERGE_CODE_TYPE:
code = tlvDecodeI32(pTlv, (int32_t*)&pNode->type);
break;
default: default:
break; break;
} }

View File

@ -1285,6 +1285,7 @@ void nodesDestroyNode(SNode* pNode) {
SLastRowScanPhysiNode* pPhyNode = (SLastRowScanPhysiNode*)pNode; SLastRowScanPhysiNode* pPhyNode = (SLastRowScanPhysiNode*)pNode;
destroyScanPhysiNode((SScanPhysiNode*)pNode); destroyScanPhysiNode((SScanPhysiNode*)pNode);
nodesDestroyList(pPhyNode->pGroupTags); nodesDestroyList(pPhyNode->pGroupTags);
nodesDestroyList(pPhyNode->pTargets);
break; break;
} }
case QUERY_NODE_PHYSICAL_PLAN_TABLE_SCAN: case QUERY_NODE_PHYSICAL_PLAN_TABLE_SCAN:
@ -1571,6 +1572,19 @@ int32_t nodesListStrictAppendList(SNodeList* pTarget, SNodeList* pSrc) {
return code; return code;
} }
int32_t nodesListMakeStrictAppendList(SNodeList** pTarget, SNodeList* pSrc) {
if (NULL == *pTarget) {
*pTarget = nodesMakeList();
if (NULL == *pTarget) {
terrno = TSDB_CODE_OUT_OF_MEMORY;
return TSDB_CODE_OUT_OF_MEMORY;
}
}
return nodesListStrictAppendList(*pTarget, pSrc);
}
int32_t nodesListPushFront(SNodeList* pList, SNode* pNode) { int32_t nodesListPushFront(SNodeList* pList, SNode* pNode) {
if (NULL == pList || NULL == pNode) { if (NULL == pList || NULL == pNode) {
return TSDB_CODE_FAILED; return TSDB_CODE_FAILED;

View File

@ -176,12 +176,15 @@ static int32_t calcConstStmtCondition(SCalcConstContext* pCxt, SNode** pCond, bo
static EDealRes doFindAndReplaceNode(SNode** pNode, void* pContext) { static EDealRes doFindAndReplaceNode(SNode** pNode, void* pContext) {
SCalcConstContext* pCxt = pContext; SCalcConstContext* pCxt = pContext;
if (pCxt->replaceCxt.pTarget == *pNode) { if (pCxt->replaceCxt.pTarget == *pNode) {
char aliasName[TSDB_COL_NAME_LEN] = {0};
strcpy(aliasName, ((SExprNode*)*pNode)->aliasName);
nodesDestroyNode(*pNode); nodesDestroyNode(*pNode);
*pNode = nodesCloneNode(pCxt->replaceCxt.pNew); *pNode = nodesCloneNode(pCxt->replaceCxt.pNew);
if (NULL == *pNode) { if (NULL == *pNode) {
pCxt->code = TSDB_CODE_OUT_OF_MEMORY; pCxt->code = TSDB_CODE_OUT_OF_MEMORY;
return DEAL_RES_ERROR; return DEAL_RES_ERROR;
} }
strcpy(((SExprNode*)*pNode)->aliasName, aliasName);
pCxt->replaceCxt.replaced = true; pCxt->replaceCxt.replaced = true;
return DEAL_RES_END; return DEAL_RES_END;
@ -211,7 +214,6 @@ static int32_t calcConstProject(SCalcConstContext* pCxt, SNode* pProject, bool d
} }
char aliasName[TSDB_COL_NAME_LEN] = {0}; char aliasName[TSDB_COL_NAME_LEN] = {0};
strcpy(aliasName, ((SExprNode*)pProject)->aliasName);
int32_t code = TSDB_CODE_SUCCESS; int32_t code = TSDB_CODE_SUCCESS;
if (dual) { if (dual) {
code = scalarCalculateConstantsFromDual(pProject, pNew); code = scalarCalculateConstantsFromDual(pProject, pNew);
@ -219,15 +221,20 @@ static int32_t calcConstProject(SCalcConstContext* pCxt, SNode* pProject, bool d
code = scalarCalculateConstants(pProject, pNew); code = scalarCalculateConstants(pProject, pNew);
} }
if (TSDB_CODE_SUCCESS == code) { if (TSDB_CODE_SUCCESS == code) {
strcpy(((SExprNode*)*pNew)->aliasName, aliasName);
if (QUERY_NODE_VALUE == nodeType(*pNew) && NULL != pAssociation) { if (QUERY_NODE_VALUE == nodeType(*pNew) && NULL != pAssociation) {
int32_t size = taosArrayGetSize(pAssociation); int32_t size = taosArrayGetSize(pAssociation);
for (int32_t i = 0; i < size; ++i) { for (int32_t i = 0; i < size; ++i) {
SAssociationNode* pAssNode = taosArrayGet(pAssociation, i); SAssociationNode* pAssNode = taosArrayGet(pAssociation, i);
SNode** pCol = pAssNode->pPlace; SNode** pCol = pAssNode->pPlace;
if (*pCol == pAssNode->pAssociationNode) { if (*pCol == pAssNode->pAssociationNode) {
strcpy(aliasName, ((SExprNode*)*pCol)->aliasName);
SArray* pOrigAss = NULL;
TSWAP(((SExprNode*)*pCol)->pAssociation, pOrigAss);
nodesDestroyNode(*pCol); nodesDestroyNode(*pCol);
*pCol = nodesCloneNode(*pNew); *pCol = nodesCloneNode(*pNew);
TSWAP(pOrigAss, ((SExprNode*)*pCol)->pAssociation);
taosArrayDestroy(pOrigAss);
strcpy(((SExprNode*)*pCol)->aliasName, aliasName);
if (NULL == *pCol) { if (NULL == *pCol) {
code = TSDB_CODE_OUT_OF_MEMORY; code = TSDB_CODE_OUT_OF_MEMORY;
break; break;

View File

@ -3925,6 +3925,267 @@ static int32_t translatePartitionBy(STranslateContext* pCxt, SSelectStmt* pSelec
return code; return code;
} }
typedef struct SEqCondTbNameTableInfo {
SRealTableNode* pRealTable;
SArray* aTbnames;
} SEqCondTbNameTableInfo;
//[tableAlias.]tbname = tbNamVal
static bool isOperatorEqTbnameCond(STranslateContext* pCxt, SOperatorNode* pOperator, char** ppTableAlias, SArray** ppTabNames) {
if (pOperator->opType != OP_TYPE_EQUAL) return false;
SFunctionNode* pTbnameFunc = NULL;
SValueNode* pValueNode = NULL;
if (nodeType(pOperator->pLeft) == QUERY_NODE_FUNCTION &&
((SFunctionNode*)(pOperator->pLeft))->funcType == FUNCTION_TYPE_TBNAME &&
nodeType(pOperator->pRight) == QUERY_NODE_VALUE) {
pTbnameFunc = (SFunctionNode*)pOperator->pLeft;
pValueNode = (SValueNode*)pOperator->pRight;
} else if (nodeType(pOperator->pRight) == QUERY_NODE_FUNCTION &&
((SFunctionNode*)(pOperator->pRight))->funcType == FUNCTION_TYPE_TBNAME &&
nodeType(pOperator->pLeft) == QUERY_NODE_VALUE) {
pTbnameFunc = (SFunctionNode*)pOperator->pRight;
pValueNode = (SValueNode*)pOperator->pLeft;
} else {
return false;
}
if (LIST_LENGTH(pTbnameFunc->pParameterList) == 0) {
*ppTableAlias = NULL;
} else if (LIST_LENGTH(pTbnameFunc->pParameterList) == 1) {
SNode* pQualNode = nodesListGetNode(pTbnameFunc->pParameterList, 0);
if (nodeType(pQualNode) != QUERY_NODE_VALUE) return false;
SValueNode* pQualValNode = (SValueNode*)pQualNode;
*ppTableAlias = pQualValNode->literal;
} else {
return false;
}
*ppTabNames = taosArrayInit(1, sizeof(void*));
taosArrayPush(*ppTabNames, &(pValueNode->literal));
return true;
}
//[tableAlias.]tbname in (value1, value2, ...)
static bool isOperatorTbnameInCond(STranslateContext* pCxt, SOperatorNode* pOperator, char** ppTableAlias, SArray** ppTbNames) {
if (pOperator->opType != OP_TYPE_IN) return false;
if (nodeType(pOperator->pLeft) != QUERY_NODE_FUNCTION ||
((SFunctionNode*)(pOperator->pLeft))->funcType != FUNCTION_TYPE_TBNAME ||
nodeType(pOperator->pRight) != QUERY_NODE_NODE_LIST) {
return false;
}
SFunctionNode* pTbnameFunc = (SFunctionNode*)pOperator->pLeft;
if (LIST_LENGTH(pTbnameFunc->pParameterList) == 0) {
*ppTableAlias = NULL;
} else if (LIST_LENGTH(pTbnameFunc->pParameterList) == 1) {
SNode* pQualNode = nodesListGetNode(pTbnameFunc->pParameterList, 0);
if (nodeType(pQualNode) != QUERY_NODE_VALUE) return false;
SValueNode* pQualValNode = (SValueNode*)pQualNode;
*ppTableAlias = pQualValNode->literal;
} else {
return false;
}
*ppTbNames = taosArrayInit(1, sizeof(void*));
SNodeListNode* pValueListNode = (SNodeListNode*)pOperator->pRight;
SNodeList* pValueNodeList = pValueListNode->pNodeList;
SNode* pValNode = NULL;
FOREACH(pValNode, pValueNodeList) {
if (nodeType(pValNode) != QUERY_NODE_VALUE) {
return false;
}
taosArrayPush(*ppTbNames, &((SValueNode*)pValNode)->literal);
}
return true;
}
static bool findEqCondTbNameInOperatorNode(STranslateContext* pCxt, SNode* pWhere, SEqCondTbNameTableInfo* pInfo) {
int32_t code = TSDB_CODE_SUCCESS;
char* pTableAlias = NULL;
char* pTbNameVal = NULL;
if (isOperatorEqTbnameCond(pCxt, (SOperatorNode*)pWhere, &pTableAlias, &pInfo->aTbnames) ||
isOperatorTbnameInCond(pCxt, (SOperatorNode*)pWhere, &pTableAlias, &pInfo->aTbnames)) {
STableNode* pTable;
if (pTableAlias == NULL) {
pTable = (STableNode*)((SSelectStmt*)(pCxt->pCurrStmt))->pFromTable;
} else {
code = findTable(pCxt, pTableAlias, &pTable);
}
if (code == TSDB_CODE_SUCCESS && nodeType(pTable) == QUERY_NODE_REAL_TABLE &&
((SRealTableNode*)pTable)->pMeta && ((SRealTableNode*)pTable)->pMeta->tableType == TSDB_SUPER_TABLE) {
pInfo->pRealTable = (SRealTableNode*)pTable;
return true;
}
taosArrayDestroy(pInfo->aTbnames);
pInfo->aTbnames = NULL;
}
return false;
}
static bool isTableExistInTableTbnames(SArray* aTableTbNames, SRealTableNode* pTable) {
for (int i = 0; i < taosArrayGetSize(aTableTbNames); ++i) {
SEqCondTbNameTableInfo* info = taosArrayGet(aTableTbNames, i);
if (info->pRealTable == pTable) {
return true;
}
}
return false;
}
static void findEqualCondTbnameInLogicCondAnd(STranslateContext* pCxt, SNode* pWhere, SArray* aTableTbnames) {
SNode* pTmpNode = NULL;
FOREACH(pTmpNode, ((SLogicConditionNode*)pWhere)->pParameterList) {
if (nodeType(pTmpNode) == QUERY_NODE_OPERATOR) {
SEqCondTbNameTableInfo info = {0};
bool bIsEqTbnameCond = findEqCondTbNameInOperatorNode(pCxt, pTmpNode, &info);
if (bIsEqTbnameCond) {
if (!isTableExistInTableTbnames(aTableTbnames, info.pRealTable)) {
//TODO: intersect tbNames of same table? speed
taosArrayPush(aTableTbnames, &info);
} else {
taosArrayDestroy(info.aTbnames);
}
}
}
//TODO: logic cond
}
}
static void unionEqualCondTbnamesOfSameTable(SArray* aTableTbnames, SEqCondTbNameTableInfo* pInfo) {
bool bFoundTable = false;
for (int i = 0; i < taosArrayGetSize(aTableTbnames); ++i) {
SEqCondTbNameTableInfo* info = taosArrayGet(aTableTbnames, i);
if (info->pRealTable == pInfo->pRealTable) {
taosArrayAddAll(info->aTbnames, pInfo->aTbnames);
taosArrayDestroy(pInfo->aTbnames);
pInfo->aTbnames = NULL;
bFoundTable = true;
break;
}
}
if (!bFoundTable) {
taosArrayPush(aTableTbnames, pInfo);
}
}
static void findEqualCondTbnameInLogicCondOr(STranslateContext* pCxt, SNode* pWhere, SArray* aTableTbnames) {
bool bAllTbName = true;
SNode* pTmpNode = NULL;
FOREACH(pTmpNode, ((SLogicConditionNode*)pWhere)->pParameterList) {
//TODO: logic cond
if (nodeType(pTmpNode) == QUERY_NODE_OPERATOR) {
SEqCondTbNameTableInfo info = {0};
bool bIsEqTbnameCond = findEqCondTbNameInOperatorNode(pCxt, pTmpNode, &info);
if (!bIsEqTbnameCond) {
bAllTbName = false;
break;
} else {
unionEqualCondTbnamesOfSameTable(aTableTbnames, &info);
}
} else {
bAllTbName = false;
break;
}
}
if (!bAllTbName) {
for (int i = 0; i < taosArrayGetSize(aTableTbnames); ++i) {
SEqCondTbNameTableInfo* pInfo = taosArrayGet(aTableTbnames, i);
taosArrayDestroy(pInfo->aTbnames);
pInfo->aTbnames = NULL;
}
taosArrayClear(aTableTbnames);
}
}
static int32_t findEqualCondTbname(STranslateContext* pCxt, SNode* pWhere, SArray* aTableTbnames) {
//TODO: optimize nested and/or condition. now only the fist level is processed.
if (nodeType(pWhere) == QUERY_NODE_OPERATOR) {
SEqCondTbNameTableInfo info = {0};
bool bIsEqTbnameCond = findEqCondTbNameInOperatorNode(pCxt, pWhere, &info);
if (bIsEqTbnameCond) {
taosArrayPush(aTableTbnames, &info);
}
} else if (nodeType(pWhere) == QUERY_NODE_LOGIC_CONDITION) {
if (((SLogicConditionNode*)pWhere)->condType == LOGIC_COND_TYPE_AND) {
findEqualCondTbnameInLogicCondAnd(pCxt, pWhere, aTableTbnames);
} else if (((SLogicConditionNode*)pWhere)->condType == LOGIC_COND_TYPE_OR) {
findEqualCondTbnameInLogicCondOr(pCxt, pWhere, aTableTbnames);
}
}
return TSDB_CODE_SUCCESS;
}
static int32_t findVgroupsFromEqualTbname(STranslateContext* pCxt, SEqCondTbNameTableInfo* pInfo, SVgroupsInfo* vgsInfo) {
int32_t nVgroups = 0;
int32_t nTbls = taosArrayGetSize(pInfo->aTbnames);
if (nTbls >= pInfo->pRealTable->pVgroupList->numOfVgroups) {
vgsInfo->numOfVgroups = 0;
return TSDB_CODE_SUCCESS;
}
for (int j = 0; j < nTbls; ++j) {
char* dbName = pInfo->pRealTable->table.dbName;
SName snameTb;
char* tbName = taosArrayGetP(pInfo->aTbnames, j);
toName(pCxt->pParseCxt->acctId, dbName, tbName, &snameTb);
SVgroupInfo vgInfo;
bool bExists;
int32_t code = catalogGetCachedTableHashVgroup(pCxt->pParseCxt->pCatalog, &snameTb, &vgInfo, &bExists);
if (code == TSDB_CODE_SUCCESS && bExists) {
bool bFoundVg = false;
for (int32_t k = 0; k < nVgroups; ++k) {
if (vgsInfo->vgroups[k].vgId == vgInfo.vgId) {
bFoundVg = true;
break;
}
}
if (!bFoundVg) {
vgsInfo->vgroups[nVgroups] = vgInfo;
++nVgroups;
vgsInfo->numOfVgroups = nVgroups;
}
} else {
vgsInfo->numOfVgroups = 0;
break;
}
}
return TSDB_CODE_SUCCESS;
}
static int32_t setEqualTbnameTableVgroups(STranslateContext* pCxt, SSelectStmt* pSelect, SArray* aTables) {
int32_t code = TSDB_CODE_SUCCESS;
for (int i = 0; i < taosArrayGetSize(aTables); ++i) {
SEqCondTbNameTableInfo* pInfo = taosArrayGet(aTables, i);
int32_t nTbls = taosArrayGetSize(pInfo->aTbnames);
SVgroupsInfo* vgsInfo = taosMemoryMalloc(sizeof(SVgroupsInfo) + nTbls * sizeof(SVgroupInfo));
int32_t nVgroups = 0;
findVgroupsFromEqualTbname(pCxt, pInfo, vgsInfo);
if (vgsInfo->numOfVgroups != 0) {
taosMemoryFree(pInfo->pRealTable->pVgroupList);
pInfo->pRealTable->pVgroupList = vgsInfo;
} else {
taosMemoryFree(vgsInfo);
}
}
return TSDB_CODE_SUCCESS;
}
static int32_t setTableVgroupsFromEqualTbnameCond(STranslateContext* pCxt, SSelectStmt* pSelect) {
int32_t code = TSDB_CODE_SUCCESS;
SArray* aTables = taosArrayInit(1, sizeof(SEqCondTbNameTableInfo));
code = findEqualCondTbname(pCxt, pSelect->pWhere, aTables);
if (code == TSDB_CODE_SUCCESS) {
code = setEqualTbnameTableVgroups(pCxt, pSelect, aTables);
}
for (int i = 0; i < taosArrayGetSize(aTables); ++i) {
SEqCondTbNameTableInfo* pInfo = taosArrayGet(aTables, i);
taosArrayDestroy(pInfo->aTbnames);
}
taosArrayDestroy(aTables);
return code;
}
static int32_t translateWhere(STranslateContext* pCxt, SSelectStmt* pSelect) { static int32_t translateWhere(STranslateContext* pCxt, SSelectStmt* pSelect) {
pCxt->currClause = SQL_CLAUSE_WHERE; pCxt->currClause = SQL_CLAUSE_WHERE;
int32_t code = translateExpr(pCxt, &pSelect->pWhere); int32_t code = translateExpr(pCxt, &pSelect->pWhere);
@ -3934,6 +4195,9 @@ static int32_t translateWhere(STranslateContext* pCxt, SSelectStmt* pSelect) {
if (TSDB_CODE_SUCCESS == code && pSelect->timeRange.skey > pSelect->timeRange.ekey) { if (TSDB_CODE_SUCCESS == code && pSelect->timeRange.skey > pSelect->timeRange.ekey) {
pSelect->isEmptyResult = true; pSelect->isEmptyResult = true;
} }
if (pSelect->pWhere != NULL) {
setTableVgroupsFromEqualTbnameCond(pCxt, pSelect);
}
return code; return code;
} }

View File

@ -43,6 +43,7 @@ int32_t optimizeLogicPlan(SPlanContext* pCxt, SLogicSubplan* pLogicSubplan);
int32_t splitLogicPlan(SPlanContext* pCxt, SLogicSubplan* pLogicSubplan); int32_t splitLogicPlan(SPlanContext* pCxt, SLogicSubplan* pLogicSubplan);
int32_t scaleOutLogicPlan(SPlanContext* pCxt, SLogicSubplan* pLogicSubplan, SQueryLogicPlan** pLogicPlan); int32_t scaleOutLogicPlan(SPlanContext* pCxt, SLogicSubplan* pLogicSubplan, SQueryLogicPlan** pLogicPlan);
int32_t createPhysiPlan(SPlanContext* pCxt, SQueryLogicPlan* pLogicPlan, SQueryPlan** pPlan, SArray* pExecNodeList); int32_t createPhysiPlan(SPlanContext* pCxt, SQueryLogicPlan* pLogicPlan, SQueryPlan** pPlan, SArray* pExecNodeList);
int32_t validateQueryPlan(SPlanContext* pCxt, SQueryPlan* pPlan);
bool getBatchScanOptionFromHint(SNodeList* pList); bool getBatchScanOptionFromHint(SNodeList* pList);
bool getSortForGroupOptHint(SNodeList* pList); bool getSortForGroupOptHint(SNodeList* pList);

View File

@ -747,6 +747,7 @@ static int32_t createAggLogicNode(SLogicPlanContext* pCxt, SSelectStmt* pSelect,
pAgg->isGroupTb = pAgg->pGroupKeys ? keysHasTbname(pAgg->pGroupKeys) : 0; pAgg->isGroupTb = pAgg->pGroupKeys ? keysHasTbname(pAgg->pGroupKeys) : 0;
pAgg->isPartTb = pSelect->pPartitionByList ? keysHasTbname(pSelect->pPartitionByList) : 0; pAgg->isPartTb = pSelect->pPartitionByList ? keysHasTbname(pSelect->pPartitionByList) : 0;
pAgg->hasGroup = pAgg->pGroupKeys || pSelect->pPartitionByList;
if (TSDB_CODE_SUCCESS == code) { if (TSDB_CODE_SUCCESS == code) {
*pLogicNode = (SLogicNode*)pAgg; *pLogicNode = (SLogicNode*)pAgg;

View File

@ -26,6 +26,7 @@
#define OPTIMIZE_FLAG_PUSH_DOWN_CONDE OPTIMIZE_FLAG_MASK(1) #define OPTIMIZE_FLAG_PUSH_DOWN_CONDE OPTIMIZE_FLAG_MASK(1)
#define OPTIMIZE_FLAG_SET_MASK(val, mask) (val) |= (mask) #define OPTIMIZE_FLAG_SET_MASK(val, mask) (val) |= (mask)
#define OPTIMIZE_FLAG_CLEAR_MASK(val, mask) (val) &= (~(mask))
#define OPTIMIZE_FLAG_TEST_MASK(val, mask) (((val) & (mask)) != 0) #define OPTIMIZE_FLAG_TEST_MASK(val, mask) (((val) & (mask)) != 0)
typedef struct SOptimizeContext { typedef struct SOptimizeContext {
@ -2478,24 +2479,32 @@ static bool hasSuitableCache(int8_t cacheLastMode, bool hasLastRow, bool hasLast
return false; return false;
} }
static bool lastRowScanOptMayBeOptimized(SLogicNode* pNode) { /// @brief check if we can apply last row scan optimization
if (QUERY_NODE_LOGIC_PLAN_AGG != nodeType(pNode) || 1 != LIST_LENGTH(pNode->pChildren) || /// @param lastColNum how many distinct last col specified
QUERY_NODE_LOGIC_PLAN_SCAN != nodeType(nodesListGetNode(pNode->pChildren, 0))) { /// @param lastColId only used when lastColNum equals 1, the col id of the only one last col
return false; /// @param selectNonPKColNum num of normal cols
} /// @param selectNonPKColId only used when selectNonPKColNum equals 1, the col id of the only one select col
static bool lastRowScanOptCheckColNum(int32_t lastColNum, col_id_t lastColId,
int32_t selectNonPKColNum, col_id_t selectNonPKColId) {
// multi select non pk col + last func: select c1, c2, last(c1)
if (selectNonPKColNum > 1 && lastColNum > 0) return false;
SAggLogicNode* pAgg = (SAggLogicNode*)pNode; if (selectNonPKColNum == 1) {
SScanLogicNode* pScan = (SScanLogicNode*)nodesListGetNode(pNode->pChildren, 0); // select last(c1), last(c2), c1 ...
// Only one of LAST and LASTROW can appear // which is not possible currently
if (pAgg->hasLastRow == pAgg->hasLast || NULL != pAgg->pGroupKeys || NULL != pScan->node.pConditions || if (lastColNum > 1) return false;
!hasSuitableCache(pScan->cacheLastMode, pAgg->hasLastRow, pAgg->hasLast) ||
IS_TSWINDOW_SPECIFIED(pScan->scanRange)) {
return false;
}
bool hasLastFunc = false; // select last(c1), c2 ...
bool hasSelectFunc = false; if (lastColNum == 1 && lastColId != selectNonPKColId) return false;
SNode* pFunc = NULL; }
return true;
}
static bool lastRowScanOptCheckFuncList(SLogicNode* pNode, bool* hasOtherFunc) {
bool hasNonPKSelectFunc = false;
SNode* pFunc = NULL;
int32_t lastColNum = 0, selectNonPKColNum = 0;
col_id_t lastColId = -1, selectNonPKColId = -1;
FOREACH(pFunc, ((SAggLogicNode*)pNode)->pAggFuncs) { FOREACH(pFunc, ((SAggLogicNode*)pNode)->pAggFuncs) {
SFunctionNode* pAggFunc = (SFunctionNode*)pFunc; SFunctionNode* pAggFunc = (SFunctionNode*)pFunc;
if (FUNCTION_TYPE_LAST == pAggFunc->funcType) { if (FUNCTION_TYPE_LAST == pAggFunc->funcType) {
@ -2505,31 +2514,84 @@ static bool lastRowScanOptMayBeOptimized(SLogicNode* pNode) {
if (pCol->colType != COLUMN_TYPE_COLUMN) { if (pCol->colType != COLUMN_TYPE_COLUMN) {
return false; return false;
} }
if (lastColId != pCol->colId) {
lastColId = pCol->colId;
lastColNum++;
}
} }
if (hasSelectFunc || QUERY_NODE_VALUE == nodeType(nodesListGetNode(pAggFunc->pParameterList, 0))) { if (QUERY_NODE_VALUE == nodeType(nodesListGetNode(pAggFunc->pParameterList, 0))) {
return false; return false;
} }
hasLastFunc = true; if (!lastRowScanOptCheckColNum(lastColNum, lastColId, selectNonPKColNum, selectNonPKColId))
return false;
} else if (FUNCTION_TYPE_SELECT_VALUE == pAggFunc->funcType) { } else if (FUNCTION_TYPE_SELECT_VALUE == pAggFunc->funcType) {
if (hasLastFunc) { SNode* pParam = nodesListGetNode(pAggFunc->pParameterList, 0);
if (QUERY_NODE_COLUMN == nodeType(pParam)) {
SColumnNode* pCol = (SColumnNode*)pParam;
if (PRIMARYKEY_TIMESTAMP_COL_ID != pCol->colId) {
if (selectNonPKColId != pCol->colId) {
selectNonPKColId = pCol->colId;
selectNonPKColNum++;
}
} else {
continue;
}
} else if (lastColNum > 0) {
return false; return false;
} }
hasSelectFunc = true; if (!lastRowScanOptCheckColNum(lastColNum, lastColId, selectNonPKColNum, selectNonPKColId))
return false;
} else if (FUNCTION_TYPE_GROUP_KEY == pAggFunc->funcType) { } else if (FUNCTION_TYPE_GROUP_KEY == pAggFunc->funcType) {
if (!lastRowScanOptLastParaIsTag(nodesListGetNode(pAggFunc->pParameterList, 0))) { if (!lastRowScanOptLastParaIsTag(nodesListGetNode(pAggFunc->pParameterList, 0))) {
return false; return false;
} }
} else if (FUNCTION_TYPE_LAST_ROW != pAggFunc->funcType) { } else if (FUNCTION_TYPE_LAST_ROW != pAggFunc->funcType) {
return false; *hasOtherFunc = true;
} }
} }
return true; return true;
} }
static bool lastRowScanOptCheckLastCache(SAggLogicNode* pAgg, SScanLogicNode* pScan) {
// Only one of LAST and LASTROW can appear
if (pAgg->hasLastRow == pAgg->hasLast || (!pAgg->hasLast && !pAgg->hasLastRow) || NULL != pAgg->pGroupKeys || NULL != pScan->node.pConditions ||
!hasSuitableCache(pScan->cacheLastMode, pAgg->hasLastRow, pAgg->hasLast) ||
IS_TSWINDOW_SPECIFIED(pScan->scanRange)) {
return false;
}
return true;
}
static bool lastRowScanOptMayBeOptimized(SLogicNode* pNode) {
if (QUERY_NODE_LOGIC_PLAN_AGG != nodeType(pNode) || 1 != LIST_LENGTH(pNode->pChildren) ||
QUERY_NODE_LOGIC_PLAN_SCAN != nodeType(nodesListGetNode(pNode->pChildren, 0))) {
return false;
}
SAggLogicNode* pAgg = (SAggLogicNode*)pNode;
SScanLogicNode* pScan = (SScanLogicNode*)nodesListGetNode(pNode->pChildren, 0);
if (!lastRowScanOptCheckLastCache(pAgg, pScan)) {
return false;
}
bool hasOtherFunc = false;
if (!lastRowScanOptCheckFuncList(pNode, &hasOtherFunc)) {
return false;
}
if (hasOtherFunc) {
return false;
}
return true;
}
typedef struct SLastRowScanOptSetColDataTypeCxt { typedef struct SLastRowScanOptSetColDataTypeCxt {
bool doAgg; bool doAgg;
SNodeList* pLastCols; SNodeList* pLastCols;
SNodeList* pOtherCols;
} SLastRowScanOptSetColDataTypeCxt; } SLastRowScanOptSetColDataTypeCxt;
static EDealRes lastRowScanOptSetColDataType(SNode* pNode, void* pContext) { static EDealRes lastRowScanOptSetColDataType(SNode* pNode, void* pContext) {
@ -2572,6 +2634,33 @@ static void lastRowScanOptSetLastTargets(SNodeList* pTargets, SNodeList* pLastCo
} }
} }
static void lastRowScanOptRemoveUslessTargets(SNodeList* pTargets, SNodeList* pList1, SNodeList* pList2) {
SNode* pTarget = NULL;
WHERE_EACH(pTarget, pTargets) {
bool found = false;
SNode* pCol = NULL;
FOREACH(pCol, pList1) {
if (nodesEqualNode(pCol, pTarget)) {
found = true;
break;
}
}
if (!found) {
FOREACH(pCol, pList2) {
if (nodesEqualNode(pCol, pTarget)) {
found = true;
break;
}
}
}
if (!found) {
ERASE_NODE(pTargets);
continue;
}
WHERE_NEXT;
}
}
static int32_t lastRowScanOptimize(SOptimizeContext* pCxt, SLogicSubplan* pLogicSubplan) { static int32_t lastRowScanOptimize(SOptimizeContext* pCxt, SLogicSubplan* pLogicSubplan) {
SAggLogicNode* pAgg = (SAggLogicNode*)optFindPossibleNode(pLogicSubplan->pNode, lastRowScanOptMayBeOptimized); SAggLogicNode* pAgg = (SAggLogicNode*)optFindPossibleNode(pLogicSubplan->pNode, lastRowScanOptMayBeOptimized);
@ -2579,8 +2668,11 @@ static int32_t lastRowScanOptimize(SOptimizeContext* pCxt, SLogicSubplan* pLogic
return TSDB_CODE_SUCCESS; return TSDB_CODE_SUCCESS;
} }
SLastRowScanOptSetColDataTypeCxt cxt = {.doAgg = true, .pLastCols = NULL}; SLastRowScanOptSetColDataTypeCxt cxt = {.doAgg = true, .pLastCols = NULL, .pOtherCols = NULL};
SNode* pNode = NULL; SNode* pNode = NULL;
SColumnNode* pPKTsCol = NULL;
SColumnNode* pNonPKCol = NULL;
FOREACH(pNode, pAgg->pAggFuncs) { FOREACH(pNode, pAgg->pAggFuncs) {
SFunctionNode* pFunc = (SFunctionNode*)pNode; SFunctionNode* pFunc = (SFunctionNode*)pNode;
int32_t funcType = pFunc->funcType; int32_t funcType = pFunc->funcType;
@ -2597,6 +2689,20 @@ static int32_t lastRowScanOptimize(SOptimizeContext* pCxt, SLogicSubplan* pLogic
nodesWalkExpr(nodesListGetNode(pFunc->pParameterList, 0), lastRowScanOptSetColDataType, &cxt); nodesWalkExpr(nodesListGetNode(pFunc->pParameterList, 0), lastRowScanOptSetColDataType, &cxt);
nodesListErase(pFunc->pParameterList, nodesListGetCell(pFunc->pParameterList, 1)); nodesListErase(pFunc->pParameterList, nodesListGetCell(pFunc->pParameterList, 1));
} }
} else {
pNode = nodesListGetNode(pFunc->pParameterList, 0);
nodesListMakeAppend(&cxt.pOtherCols, pNode);
if (FUNCTION_TYPE_SELECT_VALUE == funcType) {
if (nodeType(pNode) == QUERY_NODE_COLUMN) {
SColumnNode* pCol = (SColumnNode*)pNode;
if (pCol->colId == PRIMARYKEY_TIMESTAMP_COL_ID) {
pPKTsCol = pCol;
} else {
pNonPKCol = pCol;
}
}
}
} }
} }
@ -2608,6 +2714,17 @@ static int32_t lastRowScanOptimize(SOptimizeContext* pCxt, SLogicSubplan* pLogic
lastRowScanOptSetLastTargets(pScan->pScanCols, cxt.pLastCols, true); lastRowScanOptSetLastTargets(pScan->pScanCols, cxt.pLastCols, true);
nodesWalkExprs(pScan->pScanPseudoCols, lastRowScanOptSetColDataType, &cxt); nodesWalkExprs(pScan->pScanPseudoCols, lastRowScanOptSetColDataType, &cxt);
lastRowScanOptSetLastTargets(pScan->node.pTargets, cxt.pLastCols, false); lastRowScanOptSetLastTargets(pScan->node.pTargets, cxt.pLastCols, false);
lastRowScanOptRemoveUslessTargets(pScan->node.pTargets, cxt.pLastCols, cxt.pOtherCols);
if (pPKTsCol && pScan->node.pTargets->length == 1) {
// when select last(ts),ts from ..., we add another ts to targets
sprintf(pPKTsCol->colName, "#sel_val.%p", pPKTsCol);
nodesListAppend(pScan->node.pTargets, nodesCloneNode((SNode*)pPKTsCol));
}
if (pNonPKCol && cxt.pLastCols->length == 1 && nodesEqualNode((SNode*)pNonPKCol, nodesListGetNode(cxt.pLastCols, 0))) {
// when select last(c1), c1 from ..., we add c1 to targets
sprintf(pNonPKCol->colName, "#sel_val.%p", pNonPKCol);
nodesListAppend(pScan->node.pTargets, nodesCloneNode((SNode*)pNonPKCol));
}
nodesClearList(cxt.pLastCols); nodesClearList(cxt.pLastCols);
} }
pAgg->hasLastRow = false; pAgg->hasLastRow = false;
@ -2617,6 +2734,208 @@ static int32_t lastRowScanOptimize(SOptimizeContext* pCxt, SLogicSubplan* pLogic
return TSDB_CODE_SUCCESS; return TSDB_CODE_SUCCESS;
} }
static bool splitCacheLastFuncOptMayBeOptimized(SLogicNode* pNode) {
if (QUERY_NODE_LOGIC_PLAN_AGG != nodeType(pNode) || 1 != LIST_LENGTH(pNode->pChildren) ||
QUERY_NODE_LOGIC_PLAN_SCAN != nodeType(nodesListGetNode(pNode->pChildren, 0))) {
return false;
}
SAggLogicNode* pAgg = (SAggLogicNode*)pNode;
SScanLogicNode* pScan = (SScanLogicNode*)nodesListGetNode(pNode->pChildren, 0);
if (!lastRowScanOptCheckLastCache(pAgg, pScan)) {
return false;
}
bool hasOtherFunc = false;
if (!lastRowScanOptCheckFuncList(pNode, &hasOtherFunc)) {
return false;
}
if (pAgg->hasGroup || !hasOtherFunc) {
return false;
}
return true;
}
static int32_t splitCacheLastFuncOptCreateAggLogicNode(SAggLogicNode** pNewAgg, SAggLogicNode* pAgg, SNodeList* pFunc, SNodeList* pTargets) {
SAggLogicNode* pNew = (SAggLogicNode*)nodesMakeNode(QUERY_NODE_LOGIC_PLAN_AGG);
if (NULL == pNew) {
nodesDestroyList(pFunc);
nodesDestroyList(pTargets);
return TSDB_CODE_OUT_OF_MEMORY;
}
pNew->hasLastRow = false;
pNew->hasLast = false;
pNew->hasTimeLineFunc = pAgg->hasTimeLineFunc;
pNew->hasGroupKeyOptimized = false;
pNew->onlyHasKeepOrderFunc = pAgg->onlyHasKeepOrderFunc;
pNew->node.groupAction = pAgg->node.groupAction;
pNew->node.requireDataOrder = pAgg->node.requireDataOrder;
pNew->node.resultDataOrder = pAgg->node.resultDataOrder;
pNew->node.pTargets = pTargets;
pNew->pAggFuncs = pFunc;
pNew->pGroupKeys = nodesCloneList(pAgg->pGroupKeys);
pNew->node.pConditions = nodesCloneNode(pAgg->node.pConditions);
pNew->isGroupTb = pAgg->isGroupTb;
pNew->isPartTb = pAgg->isPartTb;
pNew->hasGroup = pAgg->hasGroup;
pNew->node.pChildren = nodesCloneList(pAgg->node.pChildren);
SNode* pNode = NULL;
FOREACH(pNode, pNew->node.pChildren) {
if (QUERY_NODE_LOGIC_PLAN_SCAN == nodeType(pNode)) {
OPTIMIZE_FLAG_CLEAR_MASK(((SScanLogicNode*)pNode)->node.optimizedFlag, OPTIMIZE_FLAG_SCAN_PATH);
}
}
*pNewAgg = pNew;
return TSDB_CODE_SUCCESS;
}
static int32_t splitCacheLastFuncOptModifyAggLogicNode(SAggLogicNode* pAgg) {
pAgg->hasTimeLineFunc = false;
pAgg->onlyHasKeepOrderFunc = true;
return TSDB_CODE_SUCCESS;
}
static int32_t splitCacheLastFuncOptCreateMergeLogicNode(SMergeLogicNode** pNew, SAggLogicNode* pAgg1, SAggLogicNode* pAgg2) {
SMergeLogicNode* pMerge = (SMergeLogicNode*)nodesMakeNode(QUERY_NODE_LOGIC_PLAN_MERGE);
if (NULL == pMerge) {
return TSDB_CODE_OUT_OF_MEMORY;
}
pMerge->colsMerge = true;
pMerge->numOfChannels = 2;
pMerge->srcGroupId = -1;
pMerge->node.precision = pAgg1->node.precision;
SNode* pNewAgg1 = nodesCloneNode((SNode*)pAgg1);
SNode* pNewAgg2 = nodesCloneNode((SNode*)pAgg2);
if (NULL == pNewAgg1 || NULL == pNewAgg2) {
nodesDestroyNode(pNewAgg1);
nodesDestroyNode(pNewAgg2);
return TSDB_CODE_OUT_OF_MEMORY;
}
((SAggLogicNode*)pNewAgg1)->node.pParent = (SLogicNode*)pMerge;
((SAggLogicNode*)pNewAgg2)->node.pParent = (SLogicNode*)pMerge;
SNode* pNode = NULL;
FOREACH(pNode, ((SAggLogicNode*)pNewAgg1)->node.pChildren) {
((SLogicNode*)pNode)->pParent = (SLogicNode*)pNewAgg1;
}
FOREACH(pNode, ((SAggLogicNode*)pNewAgg2)->node.pChildren) {
((SLogicNode*)pNode)->pParent = (SLogicNode*)pNewAgg2;
}
int32_t code = nodesListMakeStrictAppendList(&pMerge->node.pTargets, nodesCloneList(pAgg1->node.pTargets));
if (TSDB_CODE_SUCCESS == code) {
code = nodesListMakeStrictAppendList(&pMerge->node.pTargets, nodesCloneList(pAgg2->node.pTargets));
}
if (TSDB_CODE_SUCCESS == code) {
code = nodesListMakeStrictAppend(&pMerge->node.pChildren, pNewAgg1);
}
if (TSDB_CODE_SUCCESS == code) {
code = nodesListMakeStrictAppend(&pMerge->node.pChildren, pNewAgg2);
}
if (TSDB_CODE_SUCCESS != code) {
nodesDestroyNode(pNewAgg1);
nodesDestroyNode(pNewAgg2);
nodesDestroyNode((SNode*)pMerge);
} else {
*pNew = pMerge;
}
return code;
}
static int32_t splitCacheLastFuncOptimize(SOptimizeContext* pCxt, SLogicSubplan* pLogicSubplan) {
SAggLogicNode* pAgg = (SAggLogicNode*)optFindPossibleNode(pLogicSubplan->pNode, splitCacheLastFuncOptMayBeOptimized);
if (NULL == pAgg) {
return TSDB_CODE_SUCCESS;
}
SNode* pNode = NULL;
SNodeList* pAggFuncList = NULL;
{
WHERE_EACH(pNode, pAgg->pAggFuncs) {
SFunctionNode* pFunc = (SFunctionNode*)pNode;
int32_t funcType = pFunc->funcType;
if (FUNCTION_TYPE_LAST_ROW != funcType && FUNCTION_TYPE_LAST != funcType &&
FUNCTION_TYPE_SELECT_VALUE != funcType && FUNCTION_TYPE_GROUP_KEY != funcType) {
nodesListMakeStrictAppend(&pAggFuncList, nodesCloneNode(pNode));
ERASE_NODE(pAgg->pAggFuncs);
continue;
}
WHERE_NEXT;
}
}
if (NULL == pAggFuncList) {
planError("empty agg func list while splite projections, funcNum:%d", pAgg->pAggFuncs->length);
return TSDB_CODE_PLAN_INTERNAL_ERROR;
}
SNodeList* pTargets = NULL;
{
WHERE_EACH(pNode, pAgg->node.pTargets) {
SColumnNode* pCol = (SColumnNode*)pNode;
SNode* pFuncNode = NULL;
bool found = false;
FOREACH(pFuncNode, pAggFuncList) {
SFunctionNode* pFunc = (SFunctionNode*)pFuncNode;
if (0 == strcmp(pFunc->node.aliasName, pCol->colName)) {
nodesListMakeStrictAppend(&pTargets, nodesCloneNode(pNode));
found = true;
break;
}
}
if (found) {
ERASE_NODE(pAgg->node.pTargets);
continue;
}
WHERE_NEXT;
}
}
if (NULL == pTargets) {
planError("empty target func list while splite projections, targetsNum:%d", pAgg->node.pTargets->length);
nodesDestroyList(pAggFuncList);
return TSDB_CODE_PLAN_INTERNAL_ERROR;
}
SMergeLogicNode* pMerge = NULL;
SAggLogicNode* pNewAgg = NULL;
int32_t code = splitCacheLastFuncOptCreateAggLogicNode(&pNewAgg, pAgg, pAggFuncList, pTargets);
if (TSDB_CODE_SUCCESS == code) {
code = splitCacheLastFuncOptModifyAggLogicNode(pAgg);
}
if (TSDB_CODE_SUCCESS == code) {
code = splitCacheLastFuncOptCreateMergeLogicNode(&pMerge, pNewAgg, pAgg);
}
if (TSDB_CODE_SUCCESS == code) {
code = replaceLogicNode(pLogicSubplan, (SLogicNode*)pAgg, (SLogicNode*)pMerge);
}
nodesDestroyNode((SNode *)pAgg);
nodesDestroyNode((SNode *)pNewAgg);
if (TSDB_CODE_SUCCESS != code) {
nodesDestroyNode((SNode *)pMerge);
}
pCxt->optimized = true;
return code;
}
// merge projects // merge projects
static bool mergeProjectsMayBeOptimized(SLogicNode* pNode) { static bool mergeProjectsMayBeOptimized(SLogicNode* pNode) {
if (QUERY_NODE_LOGIC_PLAN_PROJECT != nodeType(pNode) || 1 != LIST_LENGTH(pNode->pChildren)) { if (QUERY_NODE_LOGIC_PLAN_PROJECT != nodeType(pNode) || 1 != LIST_LENGTH(pNode->pChildren)) {
@ -3700,6 +4019,7 @@ static const SOptimizeRule optimizeRuleSet[] = {
{.pName = "MergeProjects", .optimizeFunc = mergeProjectsOptimize}, {.pName = "MergeProjects", .optimizeFunc = mergeProjectsOptimize},
{.pName = "RewriteTail", .optimizeFunc = rewriteTailOptimize}, {.pName = "RewriteTail", .optimizeFunc = rewriteTailOptimize},
{.pName = "RewriteUnique", .optimizeFunc = rewriteUniqueOptimize}, {.pName = "RewriteUnique", .optimizeFunc = rewriteUniqueOptimize},
{.pName = "splitCacheLastFunc", .optimizeFunc = splitCacheLastFuncOptimize},
{.pName = "LastRowScan", .optimizeFunc = lastRowScanOptimize}, {.pName = "LastRowScan", .optimizeFunc = lastRowScanOptimize},
{.pName = "TagScan", .optimizeFunc = tagScanOptimize}, {.pName = "TagScan", .optimizeFunc = tagScanOptimize},
{.pName = "TableCountScan", .optimizeFunc = tableCountScanOptimize}, {.pName = "TableCountScan", .optimizeFunc = tableCountScanOptimize},

View File

@ -552,6 +552,7 @@ static int32_t createLastRowScanPhysiNode(SPhysiPlanContext* pCxt, SSubplan* pSu
if (NULL == pScan) { if (NULL == pScan) {
return TSDB_CODE_OUT_OF_MEMORY; return TSDB_CODE_OUT_OF_MEMORY;
} }
pScan->pTargets = nodesCloneList(pScanLogicNode->node.pTargets);
pScan->pGroupTags = nodesCloneList(pScanLogicNode->pGroupTags); pScan->pGroupTags = nodesCloneList(pScanLogicNode->pGroupTags);
if (NULL != pScanLogicNode->pGroupTags && NULL == pScan->pGroupTags) { if (NULL != pScanLogicNode->pGroupTags && NULL == pScan->pGroupTags) {
@ -1950,41 +1951,60 @@ static int32_t createExchangePhysiNodeByMerge(SMergePhysiNode* pMerge) {
return nodesListMakeStrictAppend(&pMerge->node.pChildren, (SNode*)pExchange); return nodesListMakeStrictAppend(&pMerge->node.pChildren, (SNode*)pExchange);
} }
static int32_t createMergePhysiNode(SPhysiPlanContext* pCxt, SMergeLogicNode* pMergeLogicNode, SPhysiNode** pPhyNode) { static int32_t createMergePhysiNode(SPhysiPlanContext* pCxt, SNodeList* pChildren, SMergeLogicNode* pMergeLogicNode, SPhysiNode** pPhyNode) {
int32_t code = TSDB_CODE_SUCCESS;
SMergePhysiNode* pMerge = SMergePhysiNode* pMerge =
(SMergePhysiNode*)makePhysiNode(pCxt, (SLogicNode*)pMergeLogicNode, QUERY_NODE_PHYSICAL_PLAN_MERGE); (SMergePhysiNode*)makePhysiNode(pCxt, (SLogicNode*)pMergeLogicNode, QUERY_NODE_PHYSICAL_PLAN_MERGE);
if (NULL == pMerge) { if (NULL == pMerge) {
return TSDB_CODE_OUT_OF_MEMORY; return TSDB_CODE_OUT_OF_MEMORY;
} }
if (pMergeLogicNode->colsMerge) {
pMerge->type = MERGE_TYPE_COLUMNS;
} else if (pMergeLogicNode->needSort) {
pMerge->type = MERGE_TYPE_SORT;
} else {
pMerge->type = MERGE_TYPE_NON_SORT;
}
pMerge->numOfChannels = pMergeLogicNode->numOfChannels; pMerge->numOfChannels = pMergeLogicNode->numOfChannels;
pMerge->srcGroupId = pMergeLogicNode->srcGroupId; pMerge->srcGroupId = pMergeLogicNode->srcGroupId;
pMerge->groupSort = pMergeLogicNode->groupSort; pMerge->groupSort = pMergeLogicNode->groupSort;
pMerge->ignoreGroupId = pMergeLogicNode->ignoreGroupId; pMerge->ignoreGroupId = pMergeLogicNode->ignoreGroupId;
pMerge->inputWithGroupId = pMergeLogicNode->inputWithGroupId; pMerge->inputWithGroupId = pMergeLogicNode->inputWithGroupId;
int32_t code = addDataBlockSlots(pCxt, pMergeLogicNode->pInputs, pMerge->node.pOutputDataBlockDesc); if (!pMergeLogicNode->colsMerge) {
code = addDataBlockSlots(pCxt, pMergeLogicNode->pInputs, pMerge->node.pOutputDataBlockDesc);
if (TSDB_CODE_SUCCESS == code) { if (TSDB_CODE_SUCCESS == code) {
for (int32_t i = 0; i < pMerge->numOfChannels; ++i) { for (int32_t i = 0; i < pMerge->numOfChannels; ++i) {
code = createExchangePhysiNodeByMerge(pMerge); code = createExchangePhysiNodeByMerge(pMerge);
if (TSDB_CODE_SUCCESS != code) { if (TSDB_CODE_SUCCESS != code) {
break; break;
}
} }
} }
}
if (TSDB_CODE_SUCCESS == code && NULL != pMergeLogicNode->pMergeKeys) { if (TSDB_CODE_SUCCESS == code && NULL != pMergeLogicNode->pMergeKeys) {
code = setListSlotId(pCxt, pMerge->node.pOutputDataBlockDesc->dataBlockId, -1, pMergeLogicNode->pMergeKeys, code = setListSlotId(pCxt, pMerge->node.pOutputDataBlockDesc->dataBlockId, -1, pMergeLogicNode->pMergeKeys,
&pMerge->pMergeKeys); &pMerge->pMergeKeys);
} }
if (TSDB_CODE_SUCCESS == code) { if (TSDB_CODE_SUCCESS == code) {
code = setListSlotId(pCxt, pMerge->node.pOutputDataBlockDesc->dataBlockId, -1, pMergeLogicNode->node.pTargets, code = setListSlotId(pCxt, pMerge->node.pOutputDataBlockDesc->dataBlockId, -1, pMergeLogicNode->node.pTargets,
&pMerge->pTargets); &pMerge->pTargets);
} }
if (TSDB_CODE_SUCCESS == code) { if (TSDB_CODE_SUCCESS == code) {
code = addDataBlockSlots(pCxt, pMerge->pTargets, pMerge->node.pOutputDataBlockDesc); code = addDataBlockSlots(pCxt, pMerge->pTargets, pMerge->node.pOutputDataBlockDesc);
}
} else {
SDataBlockDescNode* pLeftDesc = ((SPhysiNode*)nodesListGetNode(pChildren, 0))->pOutputDataBlockDesc;
SDataBlockDescNode* pRightDesc = ((SPhysiNode*)nodesListGetNode(pChildren, 1))->pOutputDataBlockDesc;
code = setListSlotId(pCxt, pLeftDesc->dataBlockId, pRightDesc->dataBlockId, pMergeLogicNode->node.pTargets, &pMerge->pTargets);
if (TSDB_CODE_SUCCESS == code) {
code = addDataBlockSlots(pCxt, pMerge->pTargets, pMerge->node.pOutputDataBlockDesc);
}
} }
if (TSDB_CODE_SUCCESS == code) { if (TSDB_CODE_SUCCESS == code) {
@ -2022,7 +2042,7 @@ static int32_t doCreatePhysiNode(SPhysiPlanContext* pCxt, SLogicNode* pLogicNode
case QUERY_NODE_LOGIC_PLAN_INTERP_FUNC: case QUERY_NODE_LOGIC_PLAN_INTERP_FUNC:
return createInterpFuncPhysiNode(pCxt, pChildren, (SInterpFuncLogicNode*)pLogicNode, pPhyNode); return createInterpFuncPhysiNode(pCxt, pChildren, (SInterpFuncLogicNode*)pLogicNode, pPhyNode);
case QUERY_NODE_LOGIC_PLAN_MERGE: case QUERY_NODE_LOGIC_PLAN_MERGE:
return createMergePhysiNode(pCxt, (SMergeLogicNode*)pLogicNode, pPhyNode); return createMergePhysiNode(pCxt, pChildren, (SMergeLogicNode*)pLogicNode, pPhyNode);
case QUERY_NODE_LOGIC_PLAN_GROUP_CACHE: case QUERY_NODE_LOGIC_PLAN_GROUP_CACHE:
return createGroupCachePhysiNode(pCxt, pChildren, (SGroupCacheLogicNode*)pLogicNode, pPhyNode); return createGroupCachePhysiNode(pCxt, pChildren, (SGroupCacheLogicNode*)pLogicNode, pPhyNode);
case QUERY_NODE_LOGIC_PLAN_DYN_QUERY_CTRL: case QUERY_NODE_LOGIC_PLAN_DYN_QUERY_CTRL:

View File

@ -248,8 +248,6 @@ static bool stbSplHasMultiTbScan(bool streamQuery, SLogicNode* pNode) {
} }
if (QUERY_NODE_LOGIC_PLAN_SCAN == nodeType(pChild) && stbSplIsMultiTbScan(streamQuery, (SScanLogicNode*)pChild)) { if (QUERY_NODE_LOGIC_PLAN_SCAN == nodeType(pChild) && stbSplIsMultiTbScan(streamQuery, (SScanLogicNode*)pChild)) {
return true; return true;
} else if (QUERY_NODE_LOGIC_PLAN_SORT == nodeType(pChild)) {
return stbSplHasMultiTbScan(streamQuery, (SLogicNode*)pChild);
} }
return false; return false;
} }
@ -540,11 +538,12 @@ static int32_t stbSplRewriteFromMergeNode(SMergeLogicNode* pMerge, SLogicNode* p
} }
static int32_t stbSplCreateMergeNode(SSplitContext* pCxt, SLogicSubplan* pSubplan, SLogicNode* pSplitNode, static int32_t stbSplCreateMergeNode(SSplitContext* pCxt, SLogicSubplan* pSubplan, SLogicNode* pSplitNode,
SNodeList* pMergeKeys, SLogicNode* pPartChild, bool groupSort) { SNodeList* pMergeKeys, SLogicNode* pPartChild, bool groupSort, bool needSort) {
SMergeLogicNode* pMerge = (SMergeLogicNode*)nodesMakeNode(QUERY_NODE_LOGIC_PLAN_MERGE); SMergeLogicNode* pMerge = (SMergeLogicNode*)nodesMakeNode(QUERY_NODE_LOGIC_PLAN_MERGE);
if (NULL == pMerge) { if (NULL == pMerge) {
return TSDB_CODE_OUT_OF_MEMORY; return TSDB_CODE_OUT_OF_MEMORY;
} }
pMerge->needSort = needSort;
pMerge->numOfChannels = stbSplGetNumOfVgroups(pPartChild); pMerge->numOfChannels = stbSplGetNumOfVgroups(pPartChild);
pMerge->srcGroupId = pCxt->groupId; pMerge->srcGroupId = pCxt->groupId;
pMerge->node.precision = pPartChild->precision; pMerge->node.precision = pPartChild->precision;
@ -621,7 +620,7 @@ static int32_t stbSplSplitIntervalForBatch(SSplitContext* pCxt, SStableSplitInfo
code = stbSplCreateMergeKeysByPrimaryKey(((SWindowLogicNode*)pInfo->pSplitNode)->pTspk, code = stbSplCreateMergeKeysByPrimaryKey(((SWindowLogicNode*)pInfo->pSplitNode)->pTspk,
((SWindowLogicNode*)pInfo->pSplitNode)->node.outputTsOrder, &pMergeKeys); ((SWindowLogicNode*)pInfo->pSplitNode)->node.outputTsOrder, &pMergeKeys);
if (TSDB_CODE_SUCCESS == code) { if (TSDB_CODE_SUCCESS == code) {
code = stbSplCreateMergeNode(pCxt, NULL, pInfo->pSplitNode, pMergeKeys, pPartWindow, true); code = stbSplCreateMergeNode(pCxt, NULL, pInfo->pSplitNode, pMergeKeys, pPartWindow, true, true);
} }
if (TSDB_CODE_SUCCESS != code) { if (TSDB_CODE_SUCCESS != code) {
nodesDestroyList(pMergeKeys); nodesDestroyList(pMergeKeys);
@ -712,7 +711,7 @@ static int32_t stbSplSplitSessionOrStateForBatch(SSplitContext* pCxt, SStableSpl
((SWindowLogicNode*)pWindow)->node.inputTsOrder, &pMergeKeys); ((SWindowLogicNode*)pWindow)->node.inputTsOrder, &pMergeKeys);
if (TSDB_CODE_SUCCESS == code) { if (TSDB_CODE_SUCCESS == code) {
code = stbSplCreateMergeNode(pCxt, pInfo->pSubplan, pChild, pMergeKeys, (SLogicNode*)pChild, true); code = stbSplCreateMergeNode(pCxt, pInfo->pSubplan, pChild, pMergeKeys, (SLogicNode*)pChild, true, true);
} }
if (TSDB_CODE_SUCCESS == code) { if (TSDB_CODE_SUCCESS == code) {
@ -982,7 +981,7 @@ static int32_t stbSplAggNodeCreateMerge(SSplitContext* pCtx, SStableSplitInfo* p
} }
} }
} }
code = stbSplCreateMergeNode(pCtx, NULL, pInfo->pSplitNode, pMergeKeys, pChildAgg, groupSort); code = stbSplCreateMergeNode(pCtx, NULL, pInfo->pSplitNode, pMergeKeys, pChildAgg, groupSort, true);
if (TSDB_CODE_SUCCESS == code && sortForGroup) { if (TSDB_CODE_SUCCESS == code && sortForGroup) {
SMergeLogicNode* pMerge = SMergeLogicNode* pMerge =
(SMergeLogicNode*)nodesListGetNode(pInfo->pSplitNode->pChildren, LIST_LENGTH(pInfo->pSplitNode->pChildren) - 1); (SMergeLogicNode*)nodesListGetNode(pInfo->pSplitNode->pChildren, LIST_LENGTH(pInfo->pSplitNode->pChildren) - 1);
@ -1145,7 +1144,7 @@ static int32_t stbSplSplitSortNode(SSplitContext* pCxt, SStableSplitInfo* pInfo)
bool groupSort = ((SSortLogicNode*)pInfo->pSplitNode)->groupSort; bool groupSort = ((SSortLogicNode*)pInfo->pSplitNode)->groupSort;
int32_t code = stbSplCreatePartSortNode((SSortLogicNode*)pInfo->pSplitNode, &pPartSort, &pMergeKeys); int32_t code = stbSplCreatePartSortNode((SSortLogicNode*)pInfo->pSplitNode, &pPartSort, &pMergeKeys);
if (TSDB_CODE_SUCCESS == code) { if (TSDB_CODE_SUCCESS == code) {
code = stbSplCreateMergeNode(pCxt, pInfo->pSubplan, pInfo->pSplitNode, pMergeKeys, pPartSort, groupSort); code = stbSplCreateMergeNode(pCxt, pInfo->pSubplan, pInfo->pSplitNode, pMergeKeys, pPartSort, groupSort, true);
} }
if (TSDB_CODE_SUCCESS == code) { if (TSDB_CODE_SUCCESS == code) {
nodesDestroyNode((SNode*)pInfo->pSplitNode); nodesDestroyNode((SNode*)pInfo->pSplitNode);
@ -1195,7 +1194,7 @@ static int32_t stbSplSplitScanNodeWithPartTags(SSplitContext* pCxt, SStableSplit
SLogicNode* pSplitNode = NULL; SLogicNode* pSplitNode = NULL;
int32_t code = stbSplGetSplitNodeForScan(pInfo, &pSplitNode); int32_t code = stbSplGetSplitNodeForScan(pInfo, &pSplitNode);
if (TSDB_CODE_SUCCESS == code) { if (TSDB_CODE_SUCCESS == code) {
code = stbSplCreateMergeNode(pCxt, pInfo->pSubplan, pSplitNode, NULL, pSplitNode, true); code = stbSplCreateMergeNode(pCxt, pInfo->pSubplan, pSplitNode, NULL, pSplitNode, true, true);
} }
if (TSDB_CODE_SUCCESS == code) { if (TSDB_CODE_SUCCESS == code) {
code = nodesListMakeStrictAppend(&pInfo->pSubplan->pChildren, code = nodesListMakeStrictAppend(&pInfo->pSubplan->pChildren,
@ -1269,7 +1268,7 @@ static int32_t stbSplSplitMergeScanNode(SSplitContext* pCxt, SLogicSubplan* pSub
((SLimitNode*)pMergeScan->pLimit)->limit += ((SLimitNode*)pMergeScan->pLimit)->offset; ((SLimitNode*)pMergeScan->pLimit)->limit += ((SLimitNode*)pMergeScan->pLimit)->offset;
((SLimitNode*)pMergeScan->pLimit)->offset = 0; ((SLimitNode*)pMergeScan->pLimit)->offset = 0;
} }
code = stbSplCreateMergeNode(pCxt, pSubplan, (SLogicNode*)pScan, pMergeKeys, pMergeScan, groupSort); code = stbSplCreateMergeNode(pCxt, pSubplan, (SLogicNode*)pScan, pMergeKeys, pMergeScan, groupSort, true);
} }
if (TSDB_CODE_SUCCESS == code) { if (TSDB_CODE_SUCCESS == code) {
nodesDestroyNode((SNode*)pScan); nodesDestroyNode((SNode*)pScan);
@ -1345,7 +1344,7 @@ static int32_t stbSplSplitPartitionNode(SSplitContext* pCxt, SStableSplitInfo* p
code = stbSplCreateMergeKeysForPartitionNode(pInfo->pSplitNode, &pMergeKeys); code = stbSplCreateMergeKeysForPartitionNode(pInfo->pSplitNode, &pMergeKeys);
} }
if (TSDB_CODE_SUCCESS == code) { if (TSDB_CODE_SUCCESS == code) {
code = stbSplCreateMergeNode(pCxt, pInfo->pSubplan, pInfo->pSplitNode, pMergeKeys, pInfo->pSplitNode, true); code = stbSplCreateMergeNode(pCxt, pInfo->pSubplan, pInfo->pSplitNode, pMergeKeys, pInfo->pSplitNode, true, true);
} }
if (TSDB_CODE_SUCCESS == code) { if (TSDB_CODE_SUCCESS == code) {
code = nodesListMakeStrictAppend(&pInfo->pSubplan->pChildren, code = nodesListMakeStrictAppend(&pInfo->pSubplan->pChildren,
@ -1587,9 +1586,12 @@ typedef struct SSmaIndexSplitInfo {
static bool smaIdxSplFindSplitNode(SSplitContext* pCxt, SLogicSubplan* pSubplan, SLogicNode* pNode, static bool smaIdxSplFindSplitNode(SSplitContext* pCxt, SLogicSubplan* pSubplan, SLogicNode* pNode,
SSmaIndexSplitInfo* pInfo) { SSmaIndexSplitInfo* pInfo) {
if (QUERY_NODE_LOGIC_PLAN_MERGE == nodeType(pNode) && LIST_LENGTH(pNode->pChildren) > 1) { if (QUERY_NODE_LOGIC_PLAN_MERGE == nodeType(pNode) && LIST_LENGTH(pNode->pChildren) > 1) {
pInfo->pMerge = (SMergeLogicNode*)pNode; int32_t nodeType = nodeType(nodesListGetNode(pNode->pChildren, 0));
pInfo->pSubplan = pSubplan; if (nodeType == QUERY_NODE_LOGIC_PLAN_EXCHANGE || nodeType == QUERY_NODE_LOGIC_PLAN_MERGE) {
return true; pInfo->pMerge = (SMergeLogicNode*)pNode;
pInfo->pSubplan = pSubplan;
return true;
}
} }
return false; return false;
} }

View File

@ -0,0 +1,164 @@
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "planInt.h"
#include "catalog.h"
#include "functionMgt.h"
#include "systable.h"
#include "tglobal.h"
typedef struct SValidatePlanContext {
SPlanContext* pPlanCxt;
int32_t errCode;
} SValidatePlanContext;
int32_t doValidatePhysiNode(SValidatePlanContext* pCxt, SNode* pNode);
int32_t validateMergePhysiNode(SValidatePlanContext* pCxt, SMergePhysiNode* pMerge) {
if ((NULL != pMerge->node.pLimit || NULL != pMerge->node.pSlimit) && pMerge->type == MERGE_TYPE_NON_SORT) {
planError("no limit&slimit supported for non sort merge, pLimit:%p", pMerge->node.pLimit);
return TSDB_CODE_PLAN_INTERNAL_ERROR;
}
return TSDB_CODE_SUCCESS;
}
int32_t validateSubplanNode(SValidatePlanContext* pCxt, SSubplan* pSubPlan) {
if (SUBPLAN_TYPE_MODIFY == pSubPlan->subplanType) {
return TSDB_CODE_SUCCESS;
}
return doValidatePhysiNode(pCxt, (SNode*)pSubPlan->pNode);
}
int32_t validateQueryPlanNode(SValidatePlanContext* pCxt, SQueryPlan* pPlan) {
int32_t code = TSDB_CODE_SUCCESS;
SNode* pNode = NULL;
FOREACH(pNode, pPlan->pSubplans) {
if (QUERY_NODE_NODE_LIST != nodeType(pNode)) {
code = TSDB_CODE_PLAN_INTERNAL_ERROR;
break;
}
SNode* pSubNode = NULL;
SNodeListNode* pSubplans = (SNodeListNode*)pNode;
FOREACH(pSubNode, pSubplans->pNodeList) {
if (QUERY_NODE_PHYSICAL_SUBPLAN != nodeType(pNode)) {
code = TSDB_CODE_PLAN_INTERNAL_ERROR;
break;
}
code = doValidatePhysiNode(pCxt, pSubNode);
if (code) {
break;
}
}
}
return code;
}
int32_t doValidatePhysiNode(SValidatePlanContext* pCxt, SNode* pNode) {
switch (nodeType(pNode)) {
case QUERY_NODE_PHYSICAL_PLAN_TAG_SCAN:
case QUERY_NODE_PHYSICAL_PLAN_TABLE_SCAN:
case QUERY_NODE_PHYSICAL_PLAN_TABLE_SEQ_SCAN:
case QUERY_NODE_PHYSICAL_PLAN_TABLE_MERGE_SCAN:
case QUERY_NODE_PHYSICAL_PLAN_STREAM_SCAN:
case QUERY_NODE_PHYSICAL_PLAN_SYSTABLE_SCAN:
case QUERY_NODE_PHYSICAL_PLAN_BLOCK_DIST_SCAN:
case QUERY_NODE_PHYSICAL_PLAN_LAST_ROW_SCAN:
case QUERY_NODE_PHYSICAL_PLAN_PROJECT:
case QUERY_NODE_PHYSICAL_PLAN_MERGE_JOIN:
case QUERY_NODE_PHYSICAL_PLAN_HASH_AGG:
case QUERY_NODE_PHYSICAL_PLAN_EXCHANGE:
break;
case QUERY_NODE_PHYSICAL_PLAN_MERGE:
return validateMergePhysiNode(pCxt, (SMergePhysiNode*)pNode);
case QUERY_NODE_PHYSICAL_PLAN_SORT:
case QUERY_NODE_PHYSICAL_PLAN_GROUP_SORT:
case QUERY_NODE_PHYSICAL_PLAN_HASH_INTERVAL:
case QUERY_NODE_PHYSICAL_PLAN_MERGE_INTERVAL:
case QUERY_NODE_PHYSICAL_PLAN_MERGE_ALIGNED_INTERVAL:
case QUERY_NODE_PHYSICAL_PLAN_STREAM_INTERVAL:
case QUERY_NODE_PHYSICAL_PLAN_STREAM_FINAL_INTERVAL:
case QUERY_NODE_PHYSICAL_PLAN_STREAM_SEMI_INTERVAL:
case QUERY_NODE_PHYSICAL_PLAN_FILL:
case QUERY_NODE_PHYSICAL_PLAN_STREAM_FILL:
case QUERY_NODE_PHYSICAL_PLAN_MERGE_SESSION:
case QUERY_NODE_PHYSICAL_PLAN_STREAM_SESSION:
case QUERY_NODE_PHYSICAL_PLAN_STREAM_SEMI_SESSION:
case QUERY_NODE_PHYSICAL_PLAN_STREAM_FINAL_SESSION:
case QUERY_NODE_PHYSICAL_PLAN_MERGE_STATE:
case QUERY_NODE_PHYSICAL_PLAN_STREAM_STATE:
case QUERY_NODE_PHYSICAL_PLAN_PARTITION:
case QUERY_NODE_PHYSICAL_PLAN_STREAM_PARTITION:
case QUERY_NODE_PHYSICAL_PLAN_INDEF_ROWS_FUNC:
case QUERY_NODE_PHYSICAL_PLAN_INTERP_FUNC:
case QUERY_NODE_PHYSICAL_PLAN_DISPATCH:
case QUERY_NODE_PHYSICAL_PLAN_INSERT:
case QUERY_NODE_PHYSICAL_PLAN_QUERY_INSERT:
case QUERY_NODE_PHYSICAL_PLAN_DELETE:
case QUERY_NODE_PHYSICAL_PLAN_TABLE_COUNT_SCAN:
case QUERY_NODE_PHYSICAL_PLAN_MERGE_EVENT:
case QUERY_NODE_PHYSICAL_PLAN_STREAM_EVENT:
case QUERY_NODE_PHYSICAL_PLAN_HASH_JOIN:
case QUERY_NODE_PHYSICAL_PLAN_GROUP_CACHE:
case QUERY_NODE_PHYSICAL_PLAN_DYN_QUERY_CTRL:
break;
case QUERY_NODE_PHYSICAL_SUBPLAN:
return validateSubplanNode(pCxt, (SSubplan*)pNode);
case QUERY_NODE_PHYSICAL_PLAN:
return validateQueryPlanNode(pCxt, (SQueryPlan *)pNode);
default:
break;
}
return TSDB_CODE_SUCCESS;
}
static void destoryValidatePlanContext(SValidatePlanContext* pCxt) {
}
int32_t validateQueryPlan(SPlanContext* pCxt, SQueryPlan* pPlan) {
SValidatePlanContext cxt = {.pPlanCxt = pCxt,
.errCode = TSDB_CODE_SUCCESS
};
int32_t code = TSDB_CODE_SUCCESS;
SNode* pNode = NULL;
FOREACH(pNode, pPlan->pSubplans) {
if (QUERY_NODE_NODE_LIST != nodeType(pNode)) {
code = TSDB_CODE_PLAN_INTERNAL_ERROR;
break;
}
SNode* pSubNode = NULL;
SNodeListNode* pSubplans = (SNodeListNode*)pNode;
FOREACH(pSubNode, pSubplans->pNodeList) {
code = doValidatePhysiNode(&cxt, pSubNode);
if (code) {
break;
}
}
if (code) {
break;
}
}
destoryValidatePlanContext(&cxt);
return code;
}

View File

@ -57,6 +57,9 @@ int32_t qCreateQueryPlan(SPlanContext* pCxt, SQueryPlan** pPlan, SArray* pExecNo
if (TSDB_CODE_SUCCESS == code) { if (TSDB_CODE_SUCCESS == code) {
code = createPhysiPlan(pCxt, pLogicPlan, pPlan, pExecNodeList); code = createPhysiPlan(pCxt, pLogicPlan, pPlan, pExecNodeList);
} }
if (TSDB_CODE_SUCCESS == code) {
code = validateQueryPlan(pCxt, *pPlan);
}
if (TSDB_CODE_SUCCESS == code) { if (TSDB_CODE_SUCCESS == code) {
dumpQueryPlan(*pPlan); dumpQueryPlan(*pPlan);
} }

View File

@ -1230,7 +1230,6 @@ int32_t toTimestampFunction(SScalarParam* pInput, int32_t inputNum, SScalarParam
code = taosChar2Ts(format, &formats, tsStr, &ts, precision, errMsg, 128); code = taosChar2Ts(format, &formats, tsStr, &ts, precision, errMsg, 128);
if (code) { if (code) {
qError("func to_timestamp failed %s", errMsg); qError("func to_timestamp failed %s", errMsg);
code = code == -1 ? TSDB_CODE_FUNC_TO_TIMESTAMP_FAILED_FORMAT_ERR : TSDB_CODE_FUNC_TO_TIMESTAMP_FAILED_TS_ERR;
break; break;
} }
colDataSetVal(pOutput->columnData, i, (char *)&ts, false); colDataSetVal(pOutput->columnData, i, (char *)&ts, false);

View File

@ -44,7 +44,7 @@ typedef struct {
int64_t defaultCfInit; int64_t defaultCfInit;
} SBackendWrapper; } SBackendWrapper;
void* streamBackendInit(const char* path, int64_t chkpId); void* streamBackendInit(const char* path, int64_t chkpId, int32_t vgId);
void streamBackendCleanup(void* arg); void streamBackendCleanup(void* arg);
void streamBackendHandleCleanup(void* arg); void streamBackendHandleCleanup(void* arg);
int32_t streamBackendLoadCheckpointInfo(void* pMeta); int32_t streamBackendLoadCheckpointInfo(void* pMeta);

View File

@ -106,7 +106,6 @@ int32_t streamBroadcastToChildren(SStreamTask* pTask, const SSDataBlock* pBlock)
int32_t tEncodeStreamRetrieveReq(SEncoder* pEncoder, const SStreamRetrieveReq* pReq); int32_t tEncodeStreamRetrieveReq(SEncoder* pEncoder, const SStreamRetrieveReq* pReq);
int32_t streamSaveAllTaskStatus(SStreamMeta* pMeta, int64_t checkpointId); int32_t streamSaveAllTaskStatus(SStreamMeta* pMeta, int64_t checkpointId);
int32_t streamTaskBuildCheckpoint(SStreamTask* pTask);
int32_t streamSendCheckMsg(SStreamTask* pTask, const SStreamTaskCheckReq* pReq, int32_t nodeId, SEpSet* pEpSet); int32_t streamSendCheckMsg(SStreamTask* pTask, const SStreamTaskCheckReq* pReq, int32_t nodeId, SEpSet* pEpSet);
int32_t streamAddCheckpointReadyMsg(SStreamTask* pTask, int32_t srcTaskId, int32_t index, int64_t checkpointId); int32_t streamAddCheckpointReadyMsg(SStreamTask* pTask, int32_t srcTaskId, int32_t index, int64_t checkpointId);

View File

@ -469,11 +469,11 @@ int32_t rebuildDirFromCheckpoint(const char* path, int64_t chkpId, char** dst) {
return 0; return 0;
} }
void* streamBackendInit(const char* streamPath, int64_t chkpId) { void* streamBackendInit(const char* streamPath, int64_t chkpId, int32_t vgId) {
char* backendPath = NULL; char* backendPath = NULL;
int32_t code = rebuildDirFromCheckpoint(streamPath, chkpId, &backendPath); int32_t code = rebuildDirFromCheckpoint(streamPath, chkpId, &backendPath);
stDebug("start to init stream backend at %s, checkpointid: %" PRId64 "", backendPath, chkpId); stDebug("start to init stream backend at %s, checkpointid: %" PRId64 " vgId:%d", backendPath, chkpId, vgId);
uint32_t dbMemLimit = nextPow2(tsMaxStreamBackendCache) << 20; uint32_t dbMemLimit = nextPow2(tsMaxStreamBackendCache) << 20;
SBackendWrapper* pHandle = taosMemoryCalloc(1, sizeof(SBackendWrapper)); SBackendWrapper* pHandle = taosMemoryCalloc(1, sizeof(SBackendWrapper));
@ -534,7 +534,7 @@ void* streamBackendInit(const char* streamPath, int64_t chkpId) {
if (cfs != NULL) { if (cfs != NULL) {
rocksdb_list_column_families_destroy(cfs, nCf); rocksdb_list_column_families_destroy(cfs, nCf);
} }
stDebug("succ to init stream backend at %s, backend:%p", backendPath, pHandle); stDebug("succ to init stream backend at %s, backend:%p, vgId:%d", backendPath, pHandle, vgId);
taosMemoryFreeClear(backendPath); taosMemoryFreeClear(backendPath);
return (void*)pHandle; return (void*)pHandle;

View File

@ -299,9 +299,12 @@ int32_t streamSaveAllTaskStatus(SStreamMeta* pMeta, int64_t checkpointId) {
continue; continue;
} }
ASSERT(p->chkInfo.checkpointId < p->checkpointingId && p->checkpointingId == checkpointId); ASSERT(p->chkInfo.checkpointId < p->checkpointingId && p->checkpointingId == checkpointId &&
p->chkInfo.checkpointVer <= p->chkInfo.processedVer);
p->chkInfo.checkpointId = p->checkpointingId; p->chkInfo.checkpointId = p->checkpointingId;
p->chkInfo.checkpointVer = p->chkInfo.processedVer;
streamTaskClearCheckInfo(p); streamTaskClearCheckInfo(p);
char* str = NULL; char* str = NULL;

View File

@ -129,6 +129,7 @@ SStreamDataSubmit* streamDataSubmitNew(SPackedData* pData, int32_t type) {
void streamDataSubmitDestroy(SStreamDataSubmit* pDataSubmit) { void streamDataSubmitDestroy(SStreamDataSubmit* pDataSubmit) {
ASSERT(pDataSubmit->type == STREAM_INPUT__DATA_SUBMIT); ASSERT(pDataSubmit->type == STREAM_INPUT__DATA_SUBMIT);
taosMemoryFree(pDataSubmit->submit.msgStr); taosMemoryFree(pDataSubmit->submit.msgStr);
taosFreeQitem(pDataSubmit);
} }
SStreamMergedSubmit* streamMergedSubmitNew() { SStreamMergedSubmit* streamMergedSubmitNew() {
@ -208,12 +209,10 @@ void streamFreeQitem(SStreamQueueItem* data) {
if (type == STREAM_INPUT__GET_RES) { if (type == STREAM_INPUT__GET_RES) {
blockDataDestroy(((SStreamTrigger*)data)->pBlock); blockDataDestroy(((SStreamTrigger*)data)->pBlock);
taosFreeQitem(data); taosFreeQitem(data);
} else if (type == STREAM_INPUT__DATA_BLOCK || type == STREAM_INPUT__DATA_RETRIEVE || type == STREAM_INPUT__TRANS_STATE) { } else if (type == STREAM_INPUT__DATA_BLOCK || type == STREAM_INPUT__DATA_RETRIEVE) {
taosArrayDestroyEx(((SStreamDataBlock*)data)->blocks, (FDelete)blockDataFreeRes); destroyStreamDataBlock((SStreamDataBlock*)data);
taosFreeQitem(data);
} else if (type == STREAM_INPUT__DATA_SUBMIT) { } else if (type == STREAM_INPUT__DATA_SUBMIT) {
streamDataSubmitDestroy((SStreamDataSubmit*)data); streamDataSubmitDestroy((SStreamDataSubmit*)data);
taosFreeQitem(data);
} else if (type == STREAM_INPUT__MERGED_SUBMIT) { } else if (type == STREAM_INPUT__MERGED_SUBMIT) {
SStreamMergedSubmit* pMerge = (SStreamMergedSubmit*)data; SStreamMergedSubmit* pMerge = (SStreamMergedSubmit*)data;
@ -228,7 +227,7 @@ void streamFreeQitem(SStreamQueueItem* data) {
SStreamRefDataBlock* pRefBlock = (SStreamRefDataBlock*)data; SStreamRefDataBlock* pRefBlock = (SStreamRefDataBlock*)data;
blockDataDestroy(pRefBlock->pBlock); blockDataDestroy(pRefBlock->pBlock);
taosFreeQitem(pRefBlock); taosFreeQitem(pRefBlock);
} else if (type == STREAM_INPUT__CHECKPOINT || type == STREAM_INPUT__CHECKPOINT_TRIGGER) { } else if (type == STREAM_INPUT__CHECKPOINT || type == STREAM_INPUT__CHECKPOINT_TRIGGER || type == STREAM_INPUT__TRANS_STATE) {
SStreamDataBlock* pBlock = (SStreamDataBlock*) data; SStreamDataBlock* pBlock = (SStreamDataBlock*) data;
taosArrayDestroyEx(pBlock->blocks, freeItems); taosArrayDestroyEx(pBlock->blocks, freeItems);
taosFreeQitem(pBlock); taosFreeQitem(pBlock);

View File

@ -593,7 +593,7 @@ int32_t streamExecForAll(SStreamTask* pTask) {
const SStreamQueueItem* pItem = pInput; const SStreamQueueItem* pItem = pInput;
stDebug("s-task:%s start to process batch of blocks, num:%d, type:%d", id, numOfBlocks, pItem->type); stDebug("s-task:%s start to process batch of blocks, num:%d, type:%d", id, numOfBlocks, pItem->type);
int64_t ver = pTask->chkInfo.checkpointVer; int64_t ver = pTask->chkInfo.processedVer;
doSetStreamInputBlock(pTask, pInput, &ver, id); doSetStreamInputBlock(pTask, pInput, &ver, id);
int64_t resSize = 0; int64_t resSize = 0;
@ -604,13 +604,16 @@ int32_t streamExecForAll(SStreamTask* pTask) {
stDebug("s-task:%s batch of input blocks exec end, elapsed time:%.2fs, result size:%.2fMiB, numOfBlocks:%d", id, el, stDebug("s-task:%s batch of input blocks exec end, elapsed time:%.2fs, result size:%.2fMiB, numOfBlocks:%d", id, el,
SIZE_IN_MiB(resSize), totalBlocks); SIZE_IN_MiB(resSize), totalBlocks);
// update the currentVer if processing the submit blocks. SCheckpointInfo* pInfo = &pTask->chkInfo;
ASSERT(pTask->chkInfo.checkpointVer <= pTask->chkInfo.nextProcessVer && ver >= pTask->chkInfo.checkpointVer);
if (ver != pTask->chkInfo.checkpointVer) { // update the currentVer if processing the submit blocks.
stDebug("s-task:%s update checkpointVer(unsaved) from %" PRId64 " to %" PRId64 ", nextProcessVer:%" PRId64, ASSERT(pInfo->checkpointVer <= pInfo->nextProcessVer && ver >= pInfo->checkpointVer);
pTask->id.idStr, pTask->chkInfo.checkpointVer, ver, pTask->chkInfo.nextProcessVer);
pTask->chkInfo.checkpointVer = ver; if (ver != pInfo->processedVer) {
stDebug("s-task:%s update processedVer(unsaved) from %" PRId64 " to %" PRId64 " nextProcessVer:%" PRId64
" ckpt:%" PRId64,
pTask->id.idStr, pInfo->processedVer, ver, pInfo->nextProcessVer, pInfo->checkpointVer);
pInfo->processedVer = ver;
} }
streamFreeQitem(pInput); streamFreeQitem(pInput);

View File

@ -28,7 +28,6 @@ int32_t streamBackendId = 0;
int32_t streamBackendCfWrapperId = 0; int32_t streamBackendCfWrapperId = 0;
int32_t streamMetaId = 0; int32_t streamMetaId = 0;
static int64_t streamGetLatestCheckpointId(SStreamMeta* pMeta);
static void metaHbToMnode(void* param, void* tmrId); static void metaHbToMnode(void* param, void* tmrId);
static void streamMetaClear(SStreamMeta* pMeta); static void streamMetaClear(SStreamMeta* pMeta);
static int32_t streamMetaBegin(SStreamMeta* pMeta); static int32_t streamMetaBegin(SStreamMeta* pMeta);
@ -192,14 +191,14 @@ SStreamMeta* streamMetaOpen(const char* path, void* ahandle, FTaskExpand expandF
taosHashInit(64, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), false, HASH_ENTRY_LOCK); taosHashInit(64, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), false, HASH_ENTRY_LOCK);
pMeta->chkpSaved = taosArrayInit(4, sizeof(int64_t)); pMeta->chkpSaved = taosArrayInit(4, sizeof(int64_t));
pMeta->chkpInUse = taosArrayInit(4, sizeof(int64_t)); pMeta->chkpInUse = taosArrayInit(4, sizeof(int64_t));
pMeta->chkpCap = 8; pMeta->chkpCap = 2;
taosInitRWLatch(&pMeta->chkpDirLock); taosInitRWLatch(&pMeta->chkpDirLock);
pMeta->chkpId = streamGetLatestCheckpointId(pMeta); pMeta->chkpId = streamMetaGetLatestCheckpointId(pMeta);
pMeta->streamBackend = streamBackendInit(pMeta->path, pMeta->chkpId); pMeta->streamBackend = streamBackendInit(pMeta->path, pMeta->chkpId, pMeta->vgId);
while (pMeta->streamBackend == NULL) { while (pMeta->streamBackend == NULL) {
taosMsleep(100); taosMsleep(100);
pMeta->streamBackend = streamBackendInit(pMeta->path, pMeta->chkpId); pMeta->streamBackend = streamBackendInit(pMeta->path, pMeta->chkpId, vgId);
if (pMeta->streamBackend == NULL) { if (pMeta->streamBackend == NULL) {
stInfo("vgId:%d failed to init stream backend, retry in 100ms", pMeta->vgId); stInfo("vgId:%d failed to init stream backend, retry in 100ms", pMeta->vgId);
} }
@ -263,7 +262,8 @@ int32_t streamMetaReopen(SStreamMeta* pMeta) {
} }
} }
while ((pMeta->streamBackend = streamBackendInit(pMeta->path, pMeta->chkpId)) == NULL) { // todo: not wait in a critical region
while ((pMeta->streamBackend = streamBackendInit(pMeta->path, pMeta->chkpId, pMeta->vgId)) == NULL) {
stInfo("vgId:%d failed to init stream backend, retry in 100ms", pMeta->vgId); stInfo("vgId:%d failed to init stream backend, retry in 100ms", pMeta->vgId);
taosMsleep(100); taosMsleep(100);
} }
@ -602,7 +602,7 @@ int32_t streamMetaCommit(SStreamMeta* pMeta) {
return 0; return 0;
} }
int64_t streamGetLatestCheckpointId(SStreamMeta* pMeta) { int64_t streamMetaGetLatestCheckpointId(SStreamMeta* pMeta) {
int64_t chkpId = 0; int64_t chkpId = 0;
TBC* pCur = NULL; TBC* pCur = NULL;
@ -853,6 +853,37 @@ static void clearHbMsg(SStreamHbMsg* pMsg, SArray* pIdList) {
taosArrayDestroy(pIdList); taosArrayDestroy(pIdList);
} }
static bool existInHbMsg(SStreamHbMsg* pMsg, SDownstreamTaskEpset* pTaskEpset) {
int32_t numOfExisted = taosArrayGetSize(pMsg->pUpdateNodes);
for (int k = 0; k < numOfExisted; ++k) {
if (pTaskEpset->nodeId == *(int32_t*)taosArrayGet(pMsg->pUpdateNodes, k)) {
return true;
}
}
return false;
}
static void addUpdateNodeIntoHbMsg(SStreamTask* pTask, SStreamHbMsg* pMsg) {
SStreamMeta* pMeta = pTask->pMeta;
taosThreadMutexLock(&pTask->lock);
int32_t num = taosArrayGetSize(pTask->outputInfo.pDownstreamUpdateList);
for (int j = 0; j < num; ++j) {
SDownstreamTaskEpset* pTaskEpset = taosArrayGet(pTask->outputInfo.pDownstreamUpdateList, j);
bool exist = existInHbMsg(pMsg, pTaskEpset);
if (!exist) {
taosArrayPush(pMsg->pUpdateNodes, &pTaskEpset->nodeId);
stDebug("vgId:%d nodeId:%d added into hb update list, total:%d", pMeta->vgId, pTaskEpset->nodeId,
(int32_t)taosArrayGetSize(pMsg->pUpdateNodes));
}
}
taosArrayClear(pTask->outputInfo.pDownstreamUpdateList);
taosThreadMutexUnlock(&pTask->lock);
}
void metaHbToMnode(void* param, void* tmrId) { void metaHbToMnode(void* param, void* tmrId) {
int64_t rid = *(int64_t*)param; int64_t rid = *(int64_t*)param;
@ -948,28 +979,7 @@ void metaHbToMnode(void* param, void* tmrId) {
walReaderValidVersionRange((*pTask)->exec.pWalReader, &entry.verStart, &entry.verEnd); walReaderValidVersionRange((*pTask)->exec.pWalReader, &entry.verStart, &entry.verEnd);
} }
taosThreadMutexLock(&(*pTask)->lock); addUpdateNodeIntoHbMsg(*pTask, &hbMsg);
int32_t num = taosArrayGetSize((*pTask)->outputInfo.pDownstreamUpdateList);
for (int j = 0; j < num; ++j) {
int32_t* pNodeId = taosArrayGet((*pTask)->outputInfo.pDownstreamUpdateList, j);
bool exist = false;
int32_t numOfExisted = taosArrayGetSize(hbMsg.pUpdateNodes);
for (int k = 0; k < numOfExisted; ++k) {
if (*pNodeId == *(int32_t*)taosArrayGet(hbMsg.pUpdateNodes, k)) {
exist = true;
break;
}
}
if (!exist) {
taosArrayPush(hbMsg.pUpdateNodes, pNodeId);
}
}
taosArrayClear((*pTask)->outputInfo.pDownstreamUpdateList);
taosThreadMutexUnlock(&(*pTask)->lock);
taosArrayPush(hbMsg.pTaskStatus, &entry); taosArrayPush(hbMsg.pTaskStatus, &entry);
if (!hasMnodeEpset) { if (!hasMnodeEpset) {
epsetAssign(&epset, &(*pTask)->info.mnodeEpset); epsetAssign(&epset, &(*pTask)->info.mnodeEpset);
@ -1009,7 +1019,7 @@ void metaHbToMnode(void* param, void* tmrId) {
pMeta->pHbInfo->hbCount += 1; pMeta->pHbInfo->hbCount += 1;
stDebug("vgId:%d, build and send hb to mnode, numOfTasks:%d total:%d", pMeta->vgId, hbMsg.numOfTasks, stDebug("vgId:%d build and send hb to mnode, numOfTasks:%d total:%d", pMeta->vgId, hbMsg.numOfTasks,
pMeta->pHbInfo->hbCount); pMeta->pHbInfo->hbCount);
tmsgSendReq(&epset, &msg); tmsgSendReq(&epset, &msg);
} else { } else {

View File

@ -270,7 +270,6 @@ int32_t streamTaskPutDataIntoInputQ(SStreamTask* pTask, SStreamQueueItem* pItem)
"s-task:%s inputQ is full, capacity(size:%d num:%dMiB), current(blocks:%d, size:%.2fMiB) stop to push data", "s-task:%s inputQ is full, capacity(size:%d num:%dMiB), current(blocks:%d, size:%.2fMiB) stop to push data",
pTask->id.idStr, STREAM_TASK_QUEUE_CAPACITY, STREAM_TASK_QUEUE_CAPACITY_IN_SIZE, total, size); pTask->id.idStr, STREAM_TASK_QUEUE_CAPACITY, STREAM_TASK_QUEUE_CAPACITY_IN_SIZE, total, size);
streamDataSubmitDestroy(px); streamDataSubmitDestroy(px);
taosFreeQitem(pItem);
return -1; return -1;
} }
@ -280,7 +279,6 @@ int32_t streamTaskPutDataIntoInputQ(SStreamTask* pTask, SStreamQueueItem* pItem)
int32_t code = taosWriteQitem(pQueue, pItem); int32_t code = taosWriteQitem(pQueue, pItem);
if (code != TSDB_CODE_SUCCESS) { if (code != TSDB_CODE_SUCCESS) {
streamDataSubmitDestroy(px); streamDataSubmitDestroy(px);
taosFreeQitem(pItem);
return code; return code;
} }
@ -296,13 +294,13 @@ int32_t streamTaskPutDataIntoInputQ(SStreamTask* pTask, SStreamQueueItem* pItem)
stTrace("s-task:%s input queue is full, capacity:%d size:%d MiB, current(blocks:%d, size:%.2fMiB) abort", stTrace("s-task:%s input queue is full, capacity:%d size:%d MiB, current(blocks:%d, size:%.2fMiB) abort",
pTask->id.idStr, STREAM_TASK_QUEUE_CAPACITY, STREAM_TASK_QUEUE_CAPACITY_IN_SIZE, total, size); pTask->id.idStr, STREAM_TASK_QUEUE_CAPACITY, STREAM_TASK_QUEUE_CAPACITY_IN_SIZE, total, size);
destroyStreamDataBlock((SStreamDataBlock*)pItem); streamFreeQitem(pItem);
return -1; return -1;
} }
int32_t code = taosWriteQitem(pQueue, pItem); int32_t code = taosWriteQitem(pQueue, pItem);
if (code != TSDB_CODE_SUCCESS) { if (code != TSDB_CODE_SUCCESS) {
destroyStreamDataBlock((SStreamDataBlock*)pItem); streamFreeQitem(pItem);
return code; return code;
} }
@ -312,7 +310,7 @@ int32_t streamTaskPutDataIntoInputQ(SStreamTask* pTask, SStreamQueueItem* pItem)
type == STREAM_INPUT__TRANS_STATE) { type == STREAM_INPUT__TRANS_STATE) {
int32_t code = taosWriteQitem(pQueue, pItem); int32_t code = taosWriteQitem(pQueue, pItem);
if (code != TSDB_CODE_SUCCESS) { if (code != TSDB_CODE_SUCCESS) {
taosFreeQitem(pItem); streamFreeQitem(pItem);
return code; return code;
} }
@ -323,7 +321,7 @@ int32_t streamTaskPutDataIntoInputQ(SStreamTask* pTask, SStreamQueueItem* pItem)
// use the default memory limit, refactor later. // use the default memory limit, refactor later.
int32_t code = taosWriteQitem(pQueue, pItem); int32_t code = taosWriteQitem(pQueue, pItem);
if (code != TSDB_CODE_SUCCESS) { if (code != TSDB_CODE_SUCCESS) {
taosFreeQitem(pItem); streamFreeQitem(pItem);
return code; return code;
} }

View File

@ -19,7 +19,6 @@
#include "streamBackendRocksdb.h" #include "streamBackendRocksdb.h"
#include "streamInt.h" #include "streamInt.h"
#include "tcommon.h" #include "tcommon.h"
#include "streamInt.h"
enum SBackendFileType { enum SBackendFileType {
ROCKSDB_OPTIONS_TYPE = 1, ROCKSDB_OPTIONS_TYPE = 1,
@ -52,6 +51,7 @@ struct SStreamSnapHandle {
int8_t filetype; int8_t filetype;
SArray* pFileList; SArray* pFileList;
int32_t currFileIdx; int32_t currFileIdx;
int8_t delFlag; // 0 : not del, 1: del
}; };
struct SStreamSnapBlockHdr { struct SStreamSnapBlockHdr {
int8_t type; int8_t type;
@ -148,6 +148,7 @@ int32_t streamSnapHandleInit(SStreamSnapHandle* pHandle, char* path, int64_t chk
taosMemoryFree(tdir); taosMemoryFree(tdir);
return code; return code;
} }
pHandle->delFlag = 1;
chkpId = 0; chkpId = 0;
} }
@ -193,8 +194,8 @@ int32_t streamSnapHandleInit(SStreamSnapHandle* pHandle, char* path, int64_t chk
taosArrayPush(pFile->pSst, &sst); taosArrayPush(pFile->pSst, &sst);
} }
} }
{ if (qDebugFlag & DEBUG_TRACE) {
char* buf = taosMemoryCalloc(1, 512); char* buf = taosMemoryCalloc(1, 128 + taosArrayGetSize(pFile->pSst) * 16);
sprintf(buf, "[current: %s,", pFile->pCurrent); sprintf(buf, "[current: %s,", pFile->pCurrent);
sprintf(buf + strlen(buf), "MANIFEST: %s,", pFile->pMainfest); sprintf(buf + strlen(buf), "MANIFEST: %s,", pFile->pMainfest);
sprintf(buf + strlen(buf), "options: %s,", pFile->pOptions); sprintf(buf + strlen(buf), "options: %s,", pFile->pOptions);
@ -274,7 +275,7 @@ void streamSnapHandleDestroy(SStreamSnapHandle* handle) {
if (handle->checkpointId == 0) { if (handle->checkpointId == 0) {
// del tmp dir // del tmp dir
if (pFile && taosIsDir(pFile->path)) { if (pFile && taosIsDir(pFile->path)) {
taosRemoveDir(pFile->path); if (handle->delFlag) taosRemoveDir(pFile->path);
} }
} else { } else {
streamBackendDelInUseChkp(handle->handle, handle->checkpointId); streamBackendDelInUseChkp(handle->handle, handle->checkpointId);
@ -344,10 +345,10 @@ int32_t streamSnapRead(SStreamSnapReader* pReader, uint8_t** ppData, int64_t* si
stDebug("%s start to read file %s, current offset:%" PRId64 ", size:%" PRId64 ", file no.%d", STREAM_STATE_TRANSFER, stDebug("%s start to read file %s, current offset:%" PRId64 ", size:%" PRId64 ", file no.%d", STREAM_STATE_TRANSFER,
item->name, (int64_t)pHandle->offset, item->size, pHandle->currFileIdx); item->name, (int64_t)pHandle->offset, item->size, pHandle->currFileIdx);
uint8_t* buf = taosMemoryCalloc(1, sizeof(SStreamSnapBlockHdr) + kBlockSize); uint8_t* buf = taosMemoryCalloc(1, sizeof(SStreamSnapBlockHdr) + kBlockSize);
if(buf == NULL){ if (buf == NULL) {
return TSDB_CODE_OUT_OF_MEMORY; return TSDB_CODE_OUT_OF_MEMORY;
} }
int64_t nread = taosPReadFile(pHandle->fd, buf + sizeof(SStreamSnapBlockHdr), kBlockSize, pHandle->offset); int64_t nread = taosPReadFile(pHandle->fd, buf + sizeof(SStreamSnapBlockHdr), kBlockSize, pHandle->offset);
if (nread == -1) { if (nread == -1) {
taosMemoryFree(buf); taosMemoryFree(buf);
code = TAOS_SYSTEM_ERROR(terrno); code = TAOS_SYSTEM_ERROR(terrno);
@ -423,6 +424,7 @@ int32_t streamSnapWriterOpen(void* pMeta, int64_t sver, int64_t ever, char* path
pHandle->pFileList = list; pHandle->pFileList = list;
pHandle->currFileIdx = 0; pHandle->currFileIdx = 0;
pHandle->offset = 0; pHandle->offset = 0;
pHandle->delFlag = 0;
*ppWriter = pWriter; *ppWriter = pWriter;
return 0; return 0;
@ -480,8 +482,8 @@ int32_t streamSnapWrite(SStreamSnapWriter* pWriter, uint8_t* pData, uint32_t nDa
} }
int32_t streamSnapWriterClose(SStreamSnapWriter* pWriter, int8_t rollback) { int32_t streamSnapWriterClose(SStreamSnapWriter* pWriter, int8_t rollback) {
SStreamSnapHandle* handle = &pWriter->handle; SStreamSnapHandle* handle = &pWriter->handle;
if (qDebugFlag & DEBUG_DEBUG) { if (qDebugFlag & DEBUG_TRACE) {
char* buf = (char*)taosMemoryMalloc(1024); char* buf = (char*)taosMemoryMalloc(128 + taosArrayGetSize(handle->pFileList) * 16);
int n = sprintf(buf, "["); int n = sprintf(buf, "[");
for (int i = 0; i < taosArrayGetSize(handle->pFileList); i++) { for (int i = 0; i < taosArrayGetSize(handle->pFileList); i++) {
SBackendFileItem* item = taosArrayGet(handle->pFileList, i); SBackendFileItem* item = taosArrayGet(handle->pFileList, i);

View File

@ -562,7 +562,6 @@ int32_t streamTaskPutTranstateIntoInputQ(SStreamTask* pTask) {
taosMemoryFree(pBlock); taosMemoryFree(pBlock);
if (streamTaskPutDataIntoInputQ(pTask, (SStreamQueueItem*)pTranstate) < 0) { if (streamTaskPutDataIntoInputQ(pTask, (SStreamQueueItem*)pTranstate) < 0) {
taosFreeQitem(pTranstate);
return TSDB_CODE_OUT_OF_MEMORY; return TSDB_CODE_OUT_OF_MEMORY;
} }
@ -1084,7 +1083,7 @@ int32_t streamMetaUpdateTaskDownstreamStatus(SStreamTask* pTask, int64_t startTs
int32_t numOfTotal = streamMetaGetNumOfTasks(pMeta); int32_t numOfTotal = streamMetaGetNumOfTasks(pMeta);
if (taosHashGetSize(pStartInfo->pReadyTaskSet) + taosHashGetSize(pStartInfo->pFailedTaskSet) == numOfTotal) { if (taosHashGetSize(pStartInfo->pReadyTaskSet) + taosHashGetSize(pStartInfo->pFailedTaskSet) == numOfTotal) {
pStartInfo->readyTs = pTask->execInfo.start; pStartInfo->readyTs = taosGetTimestampMs();
pStartInfo->elapsedTime = (pStartInfo->startTs != 0) ? pStartInfo->readyTs - pStartInfo->startTs : 0; pStartInfo->elapsedTime = (pStartInfo->startTs != 0) ? pStartInfo->readyTs - pStartInfo->startTs : 0;
stDebug("vgId:%d all %d task(s) check downstream completed, last completed task:%s level:%d, startTs:%" PRId64 stDebug("vgId:%d all %d task(s) check downstream completed, last completed task:%s level:%d, startTs:%" PRId64

View File

@ -221,7 +221,6 @@ SStreamState* streamStateOpen(char* path, void* pTask, bool specPath, int32_t sz
} }
pState->pTdbState->pOwner = pTask; pState->pTdbState->pOwner = pTask;
pState->checkPointId = 0;
return pState; return pState;
@ -274,7 +273,6 @@ int32_t streamStateCommit(SStreamState* pState) {
SStreamSnapshot* pShot = getSnapshot(pState->pFileState); SStreamSnapshot* pShot = getSnapshot(pState->pFileState);
flushSnapshot(pState->pFileState, pShot, true); flushSnapshot(pState->pFileState, pShot, true);
} }
pState->checkPointId++;
return 0; return 0;
#else #else
if (tdbCommit(pState->pTdbState->db, pState->pTdbState->txn) < 0) { if (tdbCommit(pState->pTdbState->db, pState->pTdbState->txn) < 0) {
@ -288,7 +286,6 @@ int32_t streamStateCommit(SStreamState* pState) {
TDB_TXN_WRITE | TDB_TXN_READ_UNCOMMITTED) < 0) { TDB_TXN_WRITE | TDB_TXN_READ_UNCOMMITTED) < 0) {
return -1; return -1;
} }
pState->checkPointId++;
return 0; return 0;
#endif #endif
} }

View File

@ -431,8 +431,10 @@ int32_t streamTaskInit(SStreamTask* pTask, SStreamMeta* pMeta, SMsgCb* pMsgCb, i
pTask->outputq.status = TASK_OUTPUT_STATUS__NORMAL; pTask->outputq.status = TASK_OUTPUT_STATUS__NORMAL;
pTask->pMeta = pMeta; pTask->pMeta = pMeta;
pTask->chkInfo.checkpointVer = ver - 1; pTask->chkInfo.checkpointVer = ver - 1; // only update when generating checkpoint
pTask->chkInfo.nextProcessVer = ver; pTask->chkInfo.processedVer = ver - 1; // already processed version
pTask->chkInfo.nextProcessVer = ver; // next processed version
pTask->dataRange.range.maxVer = ver; pTask->dataRange.range.maxVer = ver;
pTask->dataRange.range.minVer = ver; pTask->dataRange.range.minVer = ver;
pTask->pMsgCb = pMsgCb; pTask->pMsgCb = pMsgCb;

View File

@ -27,6 +27,9 @@
#define DEFAULT_MAX_STREAM_BUFFER_SIZE (128 * 1024 * 1024) #define DEFAULT_MAX_STREAM_BUFFER_SIZE (128 * 1024 * 1024)
#define MIN_NUM_OF_ROW_BUFF 10240 #define MIN_NUM_OF_ROW_BUFF 10240
#define TASK_KEY "streamFileState"
#define STREAM_STATE_INFO_NAME "StreamStateCheckPoint"
struct SStreamFileState { struct SStreamFileState {
SList* usedBuffs; SList* usedBuffs;
SList* freeBuffs; SList* freeBuffs;
@ -113,6 +116,15 @@ void* sessionCreateStateKey(SRowBuffPos* pPos, int64_t num) {
return pStateKey; return pStateKey;
} }
static void streamFileStateDecode(TSKEY* pKey, void* pBuff, int32_t len) { pBuff = taosDecodeFixedI64(pBuff, pKey); }
static void streamFileStateEncode(TSKEY* pKey, void** pVal, int32_t* pLen) {
*pLen = sizeof(TSKEY);
(*pVal) = taosMemoryCalloc(1, *pLen);
void* buff = *pVal;
taosEncodeFixedI64(&buff, *pKey);
}
SStreamFileState* streamFileStateInit(int64_t memSize, uint32_t keySize, uint32_t rowSize, uint32_t selectRowSize, SStreamFileState* streamFileStateInit(int64_t memSize, uint32_t keySize, uint32_t rowSize, uint32_t selectRowSize,
GetTsFun fp, void* pFile, TSKEY delMark, const char* taskId, int64_t checkpointId, GetTsFun fp, void* pFile, TSKEY delMark, const char* taskId, int64_t checkpointId,
int8_t type) { int8_t type) {
@ -181,6 +193,15 @@ SStreamFileState* streamFileStateInit(int64_t memSize, uint32_t keySize, uint32_
recoverSesssion(pFileState, checkpointId); recoverSesssion(pFileState, checkpointId);
} }
void* valBuf = NULL;
int32_t len = 0;
int32_t code = streamDefaultGet_rocksdb(pFileState->pFileStore, STREAM_STATE_INFO_NAME, &valBuf, &len);
if (code == TSDB_CODE_SUCCESS) {
ASSERT(len == sizeof(TSKEY));
streamFileStateDecode(&pFileState->flushMark, valBuf, len);
qDebug("===stream===flushMark read:%" PRId64, pFileState->flushMark);
}
taosMemoryFreeClear(valBuf);
return pFileState; return pFileState;
_error: _error:
@ -506,15 +527,6 @@ SStreamSnapshot* getSnapshot(SStreamFileState* pFileState) {
return pFileState->usedBuffs; return pFileState->usedBuffs;
} }
void streamFileStateDecode(TSKEY* pKey, void* pBuff, int32_t len) { pBuff = taosDecodeFixedI64(pBuff, pKey); }
void streamFileStateEncode(TSKEY* pKey, void** pVal, int32_t* pLen) {
*pLen = sizeof(TSKEY);
(*pVal) = taosMemoryCalloc(1, *pLen);
void* buff = *pVal;
taosEncodeFixedI64(&buff, *pKey);
}
int32_t flushSnapshot(SStreamFileState* pFileState, SStreamSnapshot* pSnapshot, bool flushState) { int32_t flushSnapshot(SStreamFileState* pFileState, SStreamSnapshot* pSnapshot, bool flushState) {
int32_t code = TSDB_CODE_SUCCESS; int32_t code = TSDB_CODE_SUCCESS;
SListIter iter = {0}; SListIter iter = {0};
@ -538,6 +550,7 @@ int32_t flushSnapshot(SStreamFileState* pFileState, SStreamSnapshot* pSnapshot,
continue; continue;
} }
pPos->beFlushed = true; pPos->beFlushed = true;
pFileState->flushMark = TMAX(pFileState->flushMark, pFileState->getTs(pPos->pKey));
qDebug("===stream===flushed start:%" PRId64, pFileState->getTs(pPos->pKey)); qDebug("===stream===flushed start:%" PRId64, pFileState->getTs(pPos->pKey));
if (streamStateGetBatchSize(batch) >= BATCH_LIMIT) { if (streamStateGetBatchSize(batch) >= BATCH_LIMIT) {
@ -565,24 +578,12 @@ int32_t flushSnapshot(SStreamFileState* pFileState, SStreamSnapshot* pSnapshot,
pFileState->id, numOfElems, BATCH_LIMIT, elapsed); pFileState->id, numOfElems, BATCH_LIMIT, elapsed);
if (flushState) { if (flushState) {
const char* taskKey = "streamFileState"; void* valBuf = NULL;
{ int32_t len = 0;
char keyBuf[128] = {0}; streamFileStateEncode(&pFileState->flushMark, &valBuf, &len);
void* valBuf = NULL; qDebug("===stream===flushMark write:%" PRId64, pFileState->flushMark);
int32_t len = 0; streamStatePutBatch(pFileState->pFileStore, "default", batch, STREAM_STATE_INFO_NAME, valBuf, len, 0);
sprintf(keyBuf, "%s:%" PRId64 "", taskKey, ((SStreamState*)pFileState->pFileStore)->checkPointId); taosMemoryFree(valBuf);
streamFileStateEncode(&pFileState->flushMark, &valBuf, &len);
streamStatePutBatch(pFileState->pFileStore, "default", batch, keyBuf, valBuf, len, 0);
taosMemoryFree(valBuf);
}
{
char keyBuf[128] = {0};
char valBuf[64] = {0};
int32_t len = 0;
memcpy(keyBuf, taskKey, strlen(taskKey));
len = sprintf(valBuf, "%" PRId64 "", ((SStreamState*)pFileState->pFileStore)->checkPointId);
code = streamStatePutBatch(pFileState->pFileStore, "default", batch, keyBuf, valBuf, len, 0);
}
streamStatePutBatch_rocksdb(pFileState->pFileStore, batch); streamStatePutBatch_rocksdb(pFileState->pFileStore, batch);
} }
@ -591,26 +592,23 @@ int32_t flushSnapshot(SStreamFileState* pFileState, SStreamSnapshot* pSnapshot,
} }
int32_t forceRemoveCheckpoint(SStreamFileState* pFileState, int64_t checkpointId) { int32_t forceRemoveCheckpoint(SStreamFileState* pFileState, int64_t checkpointId) {
const char* taskKey = "streamFileState";
char keyBuf[128] = {0}; char keyBuf[128] = {0};
sprintf(keyBuf, "%s:%" PRId64 "", taskKey, checkpointId); sprintf(keyBuf, "%s:%" PRId64 "", TASK_KEY, checkpointId);
return streamDefaultDel_rocksdb(pFileState->pFileStore, keyBuf); return streamDefaultDel_rocksdb(pFileState->pFileStore, keyBuf);
} }
int32_t getSnapshotIdList(SStreamFileState* pFileState, SArray* list) { int32_t getSnapshotIdList(SStreamFileState* pFileState, SArray* list) {
const char* taskKey = "streamFileState"; return streamDefaultIterGet_rocksdb(pFileState->pFileStore, TASK_KEY, NULL, list);
return streamDefaultIterGet_rocksdb(pFileState->pFileStore, taskKey, NULL, list);
} }
int32_t deleteExpiredCheckPoint(SStreamFileState* pFileState, TSKEY mark) { int32_t deleteExpiredCheckPoint(SStreamFileState* pFileState, TSKEY mark) {
int32_t code = TSDB_CODE_SUCCESS; int32_t code = TSDB_CODE_SUCCESS;
const char* taskKey = "streamFileState";
int64_t maxCheckPointId = 0; int64_t maxCheckPointId = 0;
{ {
char buf[128] = {0}; char buf[128] = {0};
void* val = NULL; void* val = NULL;
int32_t len = 0; int32_t len = 0;
memcpy(buf, taskKey, strlen(taskKey)); memcpy(buf, TASK_KEY, strlen(TASK_KEY));
code = streamDefaultGet_rocksdb(pFileState->pFileStore, buf, &val, &len); code = streamDefaultGet_rocksdb(pFileState->pFileStore, buf, &val, &len);
if (code != 0 || len == 0 || val == NULL) { if (code != 0 || len == 0 || val == NULL) {
return TSDB_CODE_FAILED; return TSDB_CODE_FAILED;
@ -624,7 +622,7 @@ int32_t deleteExpiredCheckPoint(SStreamFileState* pFileState, TSKEY mark) {
char buf[128] = {0}; char buf[128] = {0};
void* val = 0; void* val = 0;
int32_t len = 0; int32_t len = 0;
sprintf(buf, "%s:%" PRId64 "", taskKey, i); sprintf(buf, "%s:%" PRId64 "", TASK_KEY, i);
code = streamDefaultGet_rocksdb(pFileState->pFileStore, buf, &val, &len); code = streamDefaultGet_rocksdb(pFileState->pFileStore, buf, &val, &len);
if (code != 0) { if (code != 0) {
return TSDB_CODE_FAILED; return TSDB_CODE_FAILED;

View File

@ -12,7 +12,7 @@ class StreamStateEnv : public ::testing::Test {
protected: protected:
virtual void SetUp() { virtual void SetUp() {
streamMetaInit(); streamMetaInit();
backend = streamBackendInit(path, 0); backend = streamBackendInit(path, 0, 0);
} }
virtual void TearDown() { virtual void TearDown() {
streamMetaCleanup(); streamMetaCleanup();

Some files were not shown because too many files have changed in this diff Show More