diff --git a/cmake/cmake.define b/cmake/cmake.define index 6f4153c7d0..edc5dd601a 100644 --- a/cmake/cmake.define +++ b/cmake/cmake.define @@ -1,5 +1,4 @@ cmake_minimum_required(VERSION 3.0) - set(CMAKE_VERBOSE_MAKEFILE ON) set(TD_BUILD_TAOSA_INTERNAL FALSE) diff --git a/docs/en/07-develop/06-stream.md b/docs/en/07-develop/06-stream.md index 125173e60b..59a6b815cf 100644 --- a/docs/en/07-develop/06-stream.md +++ b/docs/en/07-develop/06-stream.md @@ -52,7 +52,7 @@ CREATE TABLE d1004 USING meters TAGS ("California.LosAngeles", 3); ### Create a Stream ```sql -create stream current_stream into current_stream_output_stb as select _wstart as start, _wend as end, max(current) as max_current from meters where voltage <= 220 interval (5s); +create stream current_stream trigger at_once into current_stream_output_stb as select _wstart as wstart, _wend as wend, max(current) as max_current from meters where voltage <= 220 interval (5s); ``` ### Write Data diff --git a/docs/en/07-develop/07-tmq.mdx b/docs/en/07-develop/07-tmq.mdx index 3326164f49..5af3897afd 100644 --- a/docs/en/07-develop/07-tmq.mdx +++ b/docs/en/07-develop/07-tmq.mdx @@ -53,17 +53,17 @@ The related schemas and APIs in various languages are described as follows: typedef void(tmq_commit_cb(tmq_t *tmq, int32_t code, void *param)); typedef enum tmq_conf_res_t { - TMQ_CONF_UNKNOWN = -2, - TMQ_CONF_INVALID = -1, - TMQ_CONF_OK = 0, -} tmq_conf_res_t; + TMQ_CONF_UNKNOWN = -2, + TMQ_CONF_INVALID = -1, + TMQ_CONF_OK = 0, + } tmq_conf_res_t; typedef struct tmq_topic_assignment { - int32_t vgId; - int64_t currentOffset; - int64_t begin; - int64_t end; -} tmq_topic_assignment; + int32_t vgId; + int64_t currentOffset; + int64_t begin; + int64_t end; // The last version of wal + 1 + } tmq_topic_assignment; DLL_EXPORT tmq_conf_t *tmq_conf_new(); DLL_EXPORT tmq_conf_res_t tmq_conf_set(tmq_conf_t *conf, const char *key, const char *value); @@ -82,21 +82,21 @@ The related schemas and APIs in various languages are described as follows: DLL_EXPORT int32_t tmq_subscription(tmq_t *tmq, tmq_list_t **topics); DLL_EXPORT TAOS_RES *tmq_consumer_poll(tmq_t *tmq, int64_t timeout); DLL_EXPORT int32_t tmq_consumer_close(tmq_t *tmq); - DLL_EXPORT int32_t tmq_commit_sync(tmq_t *tmq, const TAOS_RES *msg); + DLL_EXPORT int32_t tmq_commit_sync(tmq_t *tmq, const TAOS_RES *msg); //Commit the msg’s offset + 1 DLL_EXPORT void tmq_commit_async(tmq_t *tmq, const TAOS_RES *msg, tmq_commit_cb *cb, void *param); DLL_EXPORT int32_t tmq_commit_offset_sync(tmq_t *tmq, const char *pTopicName, int32_t vgId, int64_t offset); DLL_EXPORT void tmq_commit_offset_async(tmq_t *tmq, const char *pTopicName, int32_t vgId, int64_t offset, tmq_commit_cb *cb, void *param); DLL_EXPORT int32_t tmq_get_topic_assignment(tmq_t *tmq, const char *pTopicName, tmq_topic_assignment **assignment,int32_t *numOfAssignment); DLL_EXPORT void tmq_free_assignment(tmq_topic_assignment* pAssignment); DLL_EXPORT int32_t tmq_offset_seek(tmq_t *tmq, const char *pTopicName, int32_t vgId, int64_t offset); - DLL_EXPORT int64_t tmq_position(tmq_t *tmq, const char *pTopicName, int32_t vgId); + DLL_EXPORT int64_t tmq_position(tmq_t *tmq, const char *pTopicName, int32_t vgId); // The current offset is the offset of the last consumed message + 1 DLL_EXPORT int64_t tmq_committed(tmq_t *tmq, const char *pTopicName, int32_t vgId); DLL_EXPORT const char *tmq_get_topic_name(TAOS_RES *res); DLL_EXPORT const char *tmq_get_db_name(TAOS_RES *res); DLL_EXPORT int32_t tmq_get_vgroup_id(TAOS_RES *res); - DLL_EXPORT int64_t tmq_get_vgroup_offset(TAOS_RES* res); - DLL_EXPORT const char *tmq_err2str(int32_t code);DLL_EXPORT void tmq_conf_set_auto_commit_cb(tmq_conf_t *conf, tmq_commit_cb *cb, void *param); + DLL_EXPORT int64_t tmq_get_vgroup_offset(TAOS_RES* res); // Get current offset of the result + DLL_EXPORT const char *tmq_err2str(int32_t code); ``` The following example is based on the smart meter table described in Data Models. For complete sample code, see the C language section below. diff --git a/docs/en/12-taos-sql/25-grant.md b/docs/en/12-taos-sql/25-grant.md index c214e11876..5ebed12b59 100644 --- a/docs/en/12-taos-sql/25-grant.md +++ b/docs/en/12-taos-sql/25-grant.md @@ -4,7 +4,7 @@ sidebar_label: Access Control description: This document describes how to manage users and permissions in TDengine. --- -This document describes how to manage permissions in TDengine. +User and Access control is a distingguished feature of TDengine enterprise edition. In this section, only the most fundamental functionalities of user and access control are demonstrated. To get the full knowledge of user and access control, please contact the TDengine team. ## Create a User diff --git a/docs/en/14-reference/03-connector/03-cpp.mdx b/docs/en/14-reference/03-connector/03-cpp.mdx index 13029dbe91..0009902425 100644 --- a/docs/en/14-reference/03-connector/03-cpp.mdx +++ b/docs/en/14-reference/03-connector/03-cpp.mdx @@ -378,7 +378,7 @@ In addition to writing data using the SQL method or the parameter binding API, w - `TAOS_RES* taos_schemaless_insert(TAOS* taos, const char* lines[], int numLines, int protocol, int precision)` **Function description** - This interface writes the text data of the line protocol to TDengine. + - This interface writes the text data of the line protocol to TDengine. **Parameter description** - taos: database connection, established by the `taos_connect()` function. @@ -387,12 +387,13 @@ In addition to writing data using the SQL method or the parameter binding API, w - protocol: the protocol type of the lines, used to identify the text data format. - precision: precision string for the timestamp in the text data. - **return value** - TAOS_RES structure, application can get error message by using `taos_errstr()` and also error code by using `taos_errno()`. + **Return value** + - TAOS_RES structure, application can get error message by using `taos_errstr()` and also error code by using `taos_errno()`. In some cases, the returned TAOS_RES is `NULL`, and it is still possible to call `taos_errno()` to safely get the error code information. The returned TAOS_RES needs to be freed by the caller in order to avoid memory leaks. **Description** + The protocol type is enumerated and contains the following three formats. - TSDB_SML_LINE_PROTOCOL: InfluxDB line protocol (Line Protocol) @@ -427,3 +428,89 @@ In addition to writing data using the SQL method or the parameter binding API, w - Within _raw interfaces represent data through the passed parameters lines and len. In order to solve the problem that the original interface data contains '\0' and is truncated. The totalRows pointer returns the number of parsed data rows. - Within _ttl interfaces can pass the ttl parameter to control the ttl expiration time of the table. - Within _reqid interfaces can track the entire call chain by passing the reqid parameter. + +### Subscription API + +- `int32_t tmq_get_topic_assignment(tmq_t *tmq, const char *pTopicName, tmq_topic_assignment **assignment, int32_t *numOfAssignment)` +- `void tmq_free_assignment(tmq_topic_assignment* pAssignment)` + + tmq_topic_assignment defined as follows: + ```c + typedef struct tmq_topic_assignment { + int32_t vgId; + int64_t currentOffset; + int64_t begin; + int64_t end; + } tmq_topic_assignment; + ``` + **Function description** + - tmq_get_topic_assignment get the current vgroup information of this consumer + + **Parameter description** + - numOfAssignment:the num of vgroups assigned to this consumer + - assignment:the information of vgroups, needed to be freed by tmq_free_assignment + + **Return value** + - zero success,none zero failed, wrong message can be obtained through `char *tmq_err2str(int32_t code)` + +- `int64_t tmq_committed(tmq_t *tmq, const char *pTopicName, int32_t vgId)` + **Function description** + - get the committed offset + + **Return value** + - the value of committed offset, -2147467247 means no committed value, Other values less than 0 indicate failure + +- `int32_t tmq_commit_sync(tmq_t *tmq, const TAOS_RES *msg)` +- `void tmq_commit_async(tmq_t *tmq, const TAOS_RES *msg, tmq_commit_cb *cb, void *param)` +- `int32_t tmq_commit_offset_sync(tmq_t *tmq, const char *pTopicName, int32_t vgId, int64_t offset)` +- `void tmq_commit_offset_async(tmq_t *tmq, const char *pTopicName, int32_t vgId, int64_t offset, tmq_commit_cb *cb, void *param)` + + **Function description** + + The commit interface is divided into two types, each with synchronous and asynchronous interfaces: + - The first type: based on message submission, submit the progress in the message. If the message passes NULL, submit the current progress of all vgroups consumed by the current consumer: tmq_commit_sync/tmq_commit_async + - The second type: submit based on the offset of a Vgroup in a topic: tmq_commit_offset_sync/tmq_commit_offset_async + + **Parameter description** + - msg:Message consumed, If the message passes NULL, submit the current progress of all vgroups consumed by the current consumer + + **Return value** + - zero success, none zero failed, wrong message can be obtained through `char *tmq_err2str(int32_t code)` + +- `int64_t tmq_position(tmq_t *tmq, const char *pTopicName, int32_t vgId)` + + **Function description** + - Obtain the current consumption location, which is the next location of the data consumed + + **Return value** + - the current consumption location, none zero failed, wrong message can be obtained through `char *tmq_err2str(int32_t code)` + +- `int32_t tmq_offset_seek(tmq_t *tmq, const char *pTopicName, int32_t vgId, int64_t offset)` + + **Function description** + - Set the offset position of the consumer in a Vgroup of a certain topic to start consumption + + **Return value** + - zero success, none zero failed, wrong message can be obtained through `char *tmq_err2str(int32_t code)` + +- `int32_t int64_t tmq_get_vgroup_offset(TAOS_RES* res)` + + **Function description** + - Obtain the starting offset of the consumed data + + **Parameter description** + - msg:Message consumed + + **Return value** + - the starting offset of the consumed data, none zero failed, wrong message can be obtained through `char *tmq_err2str(int32_t code)` + +- `int32_t int32_t tmq_subscription(tmq_t *tmq, tmq_list_t **topics)` + + **Function description** + - Obtain a list of topics subscribed by consumers + + **Parameter description** + - topics: a list of topics subscribed by consumers,need to be freed by tmq_list_destroy + + **Return value** + - zero success,none zero failed, wrong message can be obtained through `char *tmq_err2str(int32_t code)` \ No newline at end of file diff --git a/docs/en/14-reference/08-taos-shell.md b/docs/en/14-reference/08-taos-shell.md index e66da7ec17..7e0433a8b2 100644 --- a/docs/en/14-reference/08-taos-shell.md +++ b/docs/en/14-reference/08-taos-shell.md @@ -81,6 +81,14 @@ For example: taos -h h1.taos.com -s "use db; show tables;" ``` +## Export query results to a file + +- You can use ">>" to export the query results to a file, the syntax is like `select * from table >> file`. If there is only file name without path, the file will be generated under the current working directory of TDegnine CLI. + +## Import data from CSV file + +- You can use `insert into table_name file 'fileName'` to import the data from the specified file into the specified table. For example, `insert into d0 file '/root/d0.csv';` means importing the data in file "/root/d0.csv" into table "d0". If there is only file name without path, that means the file is located under current working directory of TDengine CLI. + ## TDengine CLI tips - You can use the up and down keys to iterate the history of commands entered @@ -89,3 +97,5 @@ taos -h h1.taos.com -s "use db; show tables;" - Execute `RESET QUERY CACHE` to clear the local cache of the table schema - Execute SQL statements in batches. You can store a series of shell commands (ending with ;, one line for each SQL command) in a script file and execute the command `source ` in the TDengine CLI to execute all SQL commands in that file automatically - Enter `q` to exit TDengine CLI + + diff --git a/docs/zh/08-connector/10-cpp.mdx b/docs/zh/08-connector/10-cpp.mdx index 9c5095f09c..c0723cd85c 100644 --- a/docs/zh/08-connector/10-cpp.mdx +++ b/docs/zh/08-connector/10-cpp.mdx @@ -467,21 +467,22 @@ TDengine 的异步 API 均采用非阻塞调用模式。应用程序可以用多 - `TAOS_RES* taos_schemaless_insert(TAOS* taos, const char* lines[], int numLines, int protocol, int precision)` **功能说明** - 该接口将行协议的文本数据写入到 TDengine 中。 + - 该接口将行协议的文本数据写入到 TDengine 中。 **参数说明** - taos: 数据库连接,通过 `taos_connect()` 函数建立的数据库连接。 - lines:文本数据。满足解析格式要求的无模式文本字符串。 - numLines:文本数据的行数,不能为 0 。 - protocol: 行协议类型,用于标识文本数据格式。 - precision:文本数据中的时间戳精度字符串。 + - taos: 数据库连接,通过 `taos_connect()` 函数建立的数据库连接。 + - lines:文本数据。满足解析格式要求的无模式文本字符串。 + - numLines:文本数据的行数,不能为 0 。 + - protocol: 行协议类型,用于标识文本数据格式。 + - precision:文本数据中的时间戳精度字符串。 **返回值** - TAOS_RES 结构体,应用可以通过使用 `taos_errstr()` 获得错误信息,也可以使用 `taos_errno()` 获得错误码。 + - TAOS_RES 结构体,应用可以通过使用 `taos_errstr()` 获得错误信息,也可以使用 `taos_errno()` 获得错误码。 在某些情况下,返回的 TAOS_RES 为 `NULL`,此时仍然可以调用 `taos_errno()` 来安全地获得错误码信息。 返回的 TAOS_RES 需要调用方来负责释放,否则会出现内存泄漏。 **说明** + 协议类型是枚举类型,包含以下三种格式: - TSDB_SML_LINE_PROTOCOL:InfluxDB 行协议(Line Protocol) @@ -515,3 +516,90 @@ TDengine 的异步 API 均采用非阻塞调用模式。应用程序可以用多 - 带_raw的接口通过传递的参数lines指针和长度len来表示数据,为了解决原始接口数据包含'\0'而被截断的问题。totalRows指针返回解析出来的数据行数。 - 带_ttl的接口可以传递ttl参数来控制建表的ttl到期时间。 - 带_reqid的接口可以通过传递reqid参数来追踪整个的调用链。 + +### 数据订阅 API + +- `int32_t tmq_get_topic_assignment(tmq_t *tmq, const char *pTopicName, tmq_topic_assignment **assignment, int32_t *numOfAssignment)` +- `void tmq_free_assignment(tmq_topic_assignment* pAssignment)` + + tmq_topic_assignment结构体定义如下: + ```c + typedef struct tmq_topic_assignment { + int32_t vgId; + int64_t currentOffset; + int64_t begin; + int64_t end; + } tmq_topic_assignment; + ``` + **功能说明** + - tmq_get_topic_assignment 接口返回当前consumer分配的vgroup的信息,每个vgroup的信息包括vgId,wal的最大最小offset,以及当前消费到的offset。 + + **参数说明** + - numOfAssignment :分配给该consumer有效的vgroup个数。 + - assignment :分配的信息,数据大小为numOfAssignment,需要通过 tmq_free_assignment 接口释放。 + + **返回值** + - 错误码,0成功,非0失败,可通过 `char *tmq_err2str(int32_t code)` 函数获取错误信息。 + +- `int64_t tmq_committed(tmq_t *tmq, const char *pTopicName, int32_t vgId)` + **功能说明** + - 获取当前 consumer 在某个 topic 和 vgroup上的 commit 位置。 + + **返回值** + - 当前commit的位置,-2147467247表示没有消费进度,其他小于0的值表示失败,错误码就是返回值 + +- `int32_t tmq_commit_sync(tmq_t *tmq, const TAOS_RES *msg)` +- `void tmq_commit_async(tmq_t *tmq, const TAOS_RES *msg, tmq_commit_cb *cb, void *param)` +- `int32_t tmq_commit_offset_sync(tmq_t *tmq, const char *pTopicName, int32_t vgId, int64_t offset)` +- `void tmq_commit_offset_async(tmq_t *tmq, const char *pTopicName, int32_t vgId, int64_t offset, tmq_commit_cb *cb, void *param)` + + **功能说明** + + commit接口分为两种类型,每种类型有同步和异步接口: + - 第一种类型:根据消息提交,提交消息里的进度,如果消息传NULL,提交当前consumer所有消费的vgroup的当前进度 : tmq_commit_sync/tmq_commit_async + - 第二种类型:根据某个topic的某个vgroup的offset提交 : tmq_commit_offset_sync/tmq_commit_offset_async + + **参数说明** + - msg:消费到的消息结构,如果msg传NULL,提交当前consumer所有消费的vgroup的当前进度 + + **返回值** + - 错误码,0成功,非0失败,可通过 `char *tmq_err2str(int32_t code)` 函数获取错误信息 + +- `int64_t tmq_position(tmq_t *tmq, const char *pTopicName, int32_t vgId)` + + **功能说明** + - 获取当前消费位置,为消费到的数据位置的下一个位置 + + **返回值** + - 消费位置,非0失败,可通过 `char *tmq_err2str(int32_t code)` 函数获取错误信息 + +- `int32_t tmq_offset_seek(tmq_t *tmq, const char *pTopicName, int32_t vgId, int64_t offset)` + + **功能说明** + - 设置 consumer 在某个topic的某个vgroup的 offset位置,开始消费 + + **返回值** + - 错误码,0成功,非0失败,可通过 `char *tmq_err2str(int32_t code)` 函数获取错误信息 + +- `int32_t int64_t tmq_get_vgroup_offset(TAOS_RES* res)` + + **功能说明** + + 获取 poll 消费到的数据的起始offset + + **参数说明** + - msg:消费到的消息结构 + + **返回值** + - 消费到的offset,非0失败,可通过 `char *tmq_err2str(int32_t code)` 函数获取错误信息 + +- `int32_t int32_t tmq_subscription(tmq_t *tmq, tmq_list_t **topics)` + + **功能说明** + + 获取消费者订阅的 topic 列表 + **参数说明** + - topics: 获取的 topic 列表存储在这个结构中,接口内分配内存,需调用tmq_list_destroy释放 + + **返回值** + - 错误码,0成功,非0失败,可通过 `char *tmq_err2str(int32_t code)` 函数获取错误信息 \ No newline at end of file diff --git a/docs/zh/12-taos-sql/25-grant.md b/docs/zh/12-taos-sql/25-grant.md index a9c3910500..d53f951e67 100644 --- a/docs/zh/12-taos-sql/25-grant.md +++ b/docs/zh/12-taos-sql/25-grant.md @@ -4,7 +4,7 @@ title: 权限管理 description: 企业版中才具有的权限管理功能 --- -本节讲述如何在 TDengine 中进行权限管理的相关操作。 +本节讲述如何在 TDengine 中进行权限管理的相关操作。权限管理是 TDengine 企业版的特有功能,本节只列举了一些基本的权限管理功能作为示例,更丰富的权限管理请联系 TDengine 销售或市场团队。 ## 创建用户 diff --git a/docs/zh/14-reference/08-taos-shell.md b/docs/zh/14-reference/08-taos-shell.md index 3423cf35bb..1caa580b73 100644 --- a/docs/zh/14-reference/08-taos-shell.md +++ b/docs/zh/14-reference/08-taos-shell.md @@ -89,3 +89,11 @@ taos -h h1.taos.com -s "use db; show tables;" - 执行 `RESET QUERY CACHE` 可清除本地表 Schema 的缓存 - 批量执行 SQL 语句。可以将一系列的 TDengine CLI 命令(以英文 ; 结尾,每个 SQL 语句为一行)按行存放在文件里,在 TDengine CLI 里执行命令 `source ` 自动执行该文件里所有的 SQL 语句 - 输入 `q` 或 `quit` 或 `exit` 回车,可以退出 TDengine CLI + +## TDengine CLI 导出查询结果到文件中 + +- 可以使用符号 “>>” 导出查询结果到某个文件中,语法为: sql 查询语句 >> ‘输出文件名’; 输出文件如果不写路径的话,将输出至当前目录下。如 select * from d0 >> ‘/root/d0.csv’; 将把查询结果输出到 /root/d0.csv 中。 + +## TDengine CLI 导入文件中的数据到表中 + +- 可以使用 insert into table_name file '输入文件名',把上一步中导出的数据文件再导入到指定表中。如 insert into d0 file '/root/d0.csv'; 表示把上面导出的数据全部再导致至 d0 表中。 diff --git a/include/client/taos.h b/include/client/taos.h index a8136461f8..5b7946c9ad 100644 --- a/include/client/taos.h +++ b/include/client/taos.h @@ -292,14 +292,14 @@ DLL_EXPORT int32_t tmq_unsubscribe(tmq_t *tmq); DLL_EXPORT int32_t tmq_subscription(tmq_t *tmq, tmq_list_t **topics); DLL_EXPORT TAOS_RES *tmq_consumer_poll(tmq_t *tmq, int64_t timeout); DLL_EXPORT int32_t tmq_consumer_close(tmq_t *tmq); -DLL_EXPORT int32_t tmq_commit_sync(tmq_t *tmq, const TAOS_RES *msg); +DLL_EXPORT int32_t tmq_commit_sync(tmq_t *tmq, const TAOS_RES *msg); //Commit the msg’s offset + 1 DLL_EXPORT void tmq_commit_async(tmq_t *tmq, const TAOS_RES *msg, tmq_commit_cb *cb, void *param); DLL_EXPORT int32_t tmq_commit_offset_sync(tmq_t *tmq, const char *pTopicName, int32_t vgId, int64_t offset); DLL_EXPORT void tmq_commit_offset_async(tmq_t *tmq, const char *pTopicName, int32_t vgId, int64_t offset, tmq_commit_cb *cb, void *param); DLL_EXPORT int32_t tmq_get_topic_assignment(tmq_t *tmq, const char *pTopicName, tmq_topic_assignment **assignment,int32_t *numOfAssignment); DLL_EXPORT void tmq_free_assignment(tmq_topic_assignment* pAssignment); DLL_EXPORT int32_t tmq_offset_seek(tmq_t *tmq, const char *pTopicName, int32_t vgId, int64_t offset); -DLL_EXPORT int64_t tmq_position(tmq_t *tmq, const char *pTopicName, int32_t vgId); +DLL_EXPORT int64_t tmq_position(tmq_t *tmq, const char *pTopicName, int32_t vgId); // The current offset is the offset of the last consumed message + 1 DLL_EXPORT int64_t tmq_committed(tmq_t *tmq, const char *pTopicName, int32_t vgId); DLL_EXPORT const char *tmq_get_topic_name(TAOS_RES *res); diff --git a/include/common/tcommon.h b/include/common/tcommon.h index bdfb1d32b4..8482ba8a78 100644 --- a/include/common/tcommon.h +++ b/include/common/tcommon.h @@ -152,6 +152,8 @@ enum { STREAM_INPUT__DATA_RETRIEVE, STREAM_INPUT__GET_RES, STREAM_INPUT__CHECKPOINT, + STREAM_INPUT__CHECKPOINT_TRIGGER, + STREAM_INPUT__TRANS_STATE, STREAM_INPUT__REF_DATA_BLOCK, STREAM_INPUT__DESTROY, }; @@ -168,7 +170,9 @@ typedef enum EStreamType { STREAM_PULL_DATA, STREAM_PULL_OVER, STREAM_FILL_OVER, + STREAM_CHECKPOINT, STREAM_CREATE_CHILD_TABLE, + STREAM_TRANS_STATE, } EStreamType; #pragma pack(push, 1) diff --git a/include/common/tdatablock.h b/include/common/tdatablock.h index 34f22f1bc0..244894b59b 100644 --- a/include/common/tdatablock.h +++ b/include/common/tdatablock.h @@ -243,7 +243,7 @@ int32_t blockEncode(const SSDataBlock* pBlock, char* data, int32_t numOfCols); const char* blockDecode(SSDataBlock* pBlock, const char* pData); // for debug -char* dumpBlockData(SSDataBlock* pDataBlock, const char* flag, char** dumpBuf); +char* dumpBlockData(SSDataBlock* pDataBlock, const char* flag, char** dumpBuf, const char* taskIdStr); int32_t buildSubmitReqFromDataBlock(SSubmitReq2** pReq, const SSDataBlock* pDataBlocks, const STSchema* pTSchema, int64_t uid, int32_t vgId, tb_uid_t suid); diff --git a/include/common/tglobal.h b/include/common/tglobal.h index 0d3852cbab..5fd174e873 100644 --- a/include/common/tglobal.h +++ b/include/common/tglobal.h @@ -199,6 +199,7 @@ extern bool tsFilterScalarMode; extern int32_t tsKeepTimeOffset; extern int32_t tsMaxStreamBackendCache; extern int32_t tsPQSortMemThreshold; +extern int32_t tsResolveFQDNRetryTime; // #define NEEDTO_COMPRESSS_MSG(size) (tsCompressMsgSize != -1 && (size) > tsCompressMsgSize) diff --git a/include/common/tmsgdef.h b/include/common/tmsgdef.h index 232551007d..60172bce3d 100644 --- a/include/common/tmsgdef.h +++ b/include/common/tmsgdef.h @@ -254,7 +254,6 @@ enum { TD_DEF_MSG_TYPE(TDMT_STREAM_RETRIEVE, "stream-retrieve", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_STREAM_SCAN_HISTORY, "stream-scan-history", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_STREAM_SCAN_HISTORY_FINISH, "stream-scan-history-finish", NULL, NULL) - TD_DEF_MSG_TYPE(TDMT_STREAM_TRANSFER_STATE, "stream-transfer-state", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_STREAM_TASK_CHECK, "stream-task-check", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_STREAM_TASK_CHECKPOINT, "stream-checkpoint", NULL, NULL) TD_DEF_MSG_TYPE(TDMT_STREAM_TASK_REPORT_CHECKPOINT, "stream-report-checkpoint", NULL, NULL) diff --git a/include/libs/function/functionMgt.h b/include/libs/function/functionMgt.h index 5efd8bac1e..7347cc5a4d 100644 --- a/include/libs/function/functionMgt.h +++ b/include/libs/function/functionMgt.h @@ -159,6 +159,8 @@ typedef enum EFunctionType { FUNCTION_TYPE_AVG_MERGE, FUNCTION_TYPE_STDDEV_PARTIAL, FUNCTION_TYPE_STDDEV_MERGE, + FUNCTION_TYPE_IRATE_PARTIAL, + FUNCTION_TYPE_IRATE_MERGE, // geometry functions FUNCTION_TYPE_GEOM_FROM_TEXT = 4250, diff --git a/include/libs/stream/tstream.h b/include/libs/stream/tstream.h index b9b24917f3..02bb65b762 100644 --- a/include/libs/stream/tstream.h +++ b/include/libs/stream/tstream.h @@ -122,6 +122,7 @@ typedef struct { int8_t type; int32_t srcVgId; + int32_t srcTaskId; int32_t childId; int64_t sourceVer; int64_t reqId; @@ -251,6 +252,7 @@ typedef struct SStreamChildEpInfo { int32_t nodeId; int32_t childId; int32_t taskId; + int8_t dataAllowed; SEpSet epSet; } SStreamChildEpInfo; @@ -272,6 +274,7 @@ typedef struct SStreamStatus { int8_t schedStatus; int8_t keepTaskStatus; bool transferState; + bool appendTranstateBlock; // has append the transfer state data block already, todo: remove it int8_t timerActive; // timer is active int8_t pauseAllowed; // allowed task status to be set to be paused } SStreamStatus; @@ -399,8 +402,9 @@ typedef struct { typedef struct { int64_t streamId; + int32_t type; int32_t taskId; - int32_t dataSrcVgId; + int32_t srcVgId; int32_t upstreamTaskId; int32_t upstreamChildId; int32_t upstreamNodeId; @@ -570,8 +574,6 @@ int32_t tDecodeStreamDispatchReq(SDecoder* pDecoder, SStreamDispatchReq* pReq); int32_t tDecodeStreamRetrieveReq(SDecoder* pDecoder, SStreamRetrieveReq* pReq); void tDeleteStreamRetrieveReq(SStreamRetrieveReq* pReq); -int32_t tInitStreamDispatchReq(SStreamDispatchReq* pReq, const SStreamTask* pTask, int32_t vgId, int32_t numOfBlocks, - int64_t dstTaskId); void tDeleteStreamDispatchReq(SStreamDispatchReq* pReq); int32_t streamSetupScheduleTrigger(SStreamTask* pTask); @@ -579,6 +581,8 @@ int32_t streamSetupScheduleTrigger(SStreamTask* pTask); int32_t streamProcessRunReq(SStreamTask* pTask); int32_t streamProcessDispatchMsg(SStreamTask* pTask, SStreamDispatchReq* pReq, SRpcMsg* pMsg, bool exec); int32_t streamProcessDispatchRsp(SStreamTask* pTask, SStreamDispatchRsp* pRsp, int32_t code); +void streamTaskCloseUpstreamInput(SStreamTask* pTask, int32_t taskId); +void streamTaskOpenAllUpstreamInput(SStreamTask* pTask); int32_t streamProcessRetrieveReq(SStreamTask* pTask, SStreamRetrieveReq* pReq, SRpcMsg* pMsg); @@ -589,7 +593,6 @@ int32_t streamTaskOutputResultBlock(SStreamTask* pTask, SStreamDataBlock* pBlock bool streamTaskShouldStop(const SStreamStatus* pStatus); bool streamTaskShouldPause(const SStreamStatus* pStatus); bool streamTaskIsIdle(const SStreamTask* pTask); -int32_t streamTaskEndScanWAL(SStreamTask* pTask); SStreamChildEpInfo* streamTaskGetUpstreamTaskEpInfo(SStreamTask* pTask, int32_t taskId); int32_t streamScanExec(SStreamTask* pTask, int32_t batchSize); @@ -626,7 +629,7 @@ int32_t streamSetParamForStreamScannerStep2(SStreamTask* pTask, SVersionRange* p int32_t streamSourceScanHistoryData(SStreamTask* pTask); int32_t streamDispatchScanHistoryFinishMsg(SStreamTask* pTask); -int32_t streamDispatchTransferStateMsg(SStreamTask* pTask); +int32_t appendTranstateIntoInputQ(SStreamTask* pTask); // agg level int32_t streamTaskScanHistoryPrepare(SStreamTask* pTask); diff --git a/source/common/src/tdatablock.c b/source/common/src/tdatablock.c index 828080b296..e21e3a06b2 100644 --- a/source/common/src/tdatablock.c +++ b/source/common/src/tdatablock.c @@ -1838,7 +1838,7 @@ static char* formatTimestamp(char* buf, int64_t val, int precision) { } // for debug -char* dumpBlockData(SSDataBlock* pDataBlock, const char* flag, char** pDataBuf) { +char* dumpBlockData(SSDataBlock* pDataBlock, const char* flag, char** pDataBuf, const char* taskIdStr) { int32_t size = 2048 * 1024; *pDataBuf = taosMemoryCalloc(size, 1); char* dumpBuf = *pDataBuf; @@ -1847,9 +1847,9 @@ char* dumpBlockData(SSDataBlock* pDataBlock, const char* flag, char** pDataBuf) int32_t rows = pDataBlock->info.rows; int32_t len = 0; len += snprintf(dumpBuf + len, size - len, - "===stream===%s|block type %d|child id %d|group id:%" PRIu64 "|uid:%" PRId64 "|rows:%" PRId64 - "|version:%" PRIu64 "|cal start:%" PRIu64 "|cal end:%" PRIu64 "|tbl:%s\n", - flag, (int32_t)pDataBlock->info.type, pDataBlock->info.childId, pDataBlock->info.id.groupId, + "%s===stream===%s|block type %d|child id %d|group id:%" PRIu64 "|uid:%" PRId64 + "|rows:%" PRId64 "|version:%" PRIu64 "|cal start:%" PRIu64 "|cal end:%" PRIu64 "|tbl:%s\n", + taskIdStr, flag, (int32_t)pDataBlock->info.type, pDataBlock->info.childId, pDataBlock->info.id.groupId, pDataBlock->info.id.uid, pDataBlock->info.rows, pDataBlock->info.version, pDataBlock->info.calWin.skey, pDataBlock->info.calWin.ekey, pDataBlock->info.parTbName); if (len >= size - 1) return dumpBuf; diff --git a/source/common/src/tglobal.c b/source/common/src/tglobal.c index ae585c93ba..e080c2d2ec 100644 --- a/source/common/src/tglobal.c +++ b/source/common/src/tglobal.c @@ -240,6 +240,7 @@ int64_t tsStreamBufferSize = 128 * 1024 * 1024; int64_t tsCheckpointInterval = 3 * 60 * 60 * 1000; bool tsFilterScalarMode = false; int32_t tsKeepTimeOffset = 0; // latency of data migration +int tsResolveFQDNRetryTime = 100; //seconds char tsS3Endpoint[TSDB_FQDN_LEN] = ""; char tsS3AccessKey[TSDB_FQDN_LEN] = ""; @@ -628,6 +629,7 @@ static int32_t taosAddServerCfg(SConfig *pCfg) { if (cfgAddInt32(pCfg, "keepTimeOffset", tsKeepTimeOffset, 0, 23, CFG_SCOPE_SERVER) != 0) return -1; if (cfgAddInt32(pCfg, "maxStreamBackendCache", tsMaxStreamBackendCache, 16, 1024, CFG_SCOPE_SERVER) != 0) return -1; if (cfgAddInt32(pCfg, "pqSortMemThreshold", tsPQSortMemThreshold, 1, 10240, CFG_SCOPE_SERVER) != 0) return -1; + if (cfgAddInt32(pCfg, "resolveFQDNRetryTime", tsResolveFQDNRetryTime, 1, 10240, 0) != 0) return -1; if (cfgAddString(pCfg, "s3Accesskey", tsS3AccessKey, CFG_SCOPE_SERVER) != 0) return -1; if (cfgAddString(pCfg, "s3Endpoint", tsS3Endpoint, CFG_SCOPE_SERVER) != 0) return -1; @@ -1031,6 +1033,7 @@ static int32_t taosSetServerCfg(SConfig *pCfg) { tsKeepTimeOffset = cfgGetItem(pCfg, "keepTimeOffset")->i32; tsMaxStreamBackendCache = cfgGetItem(pCfg, "maxStreamBackendCache")->i32; tsPQSortMemThreshold = cfgGetItem(pCfg, "pqSortMemThreshold")->i32; + tsResolveFQDNRetryTime = cfgGetItem(pCfg, "resolveFQDNRetryTime")->i32; GRANT_CFG_GET; return 0; diff --git a/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c b/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c index bed9a67303..cf57deaa22 100644 --- a/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c +++ b/source/dnode/mgmt/mgmt_vnode/src/vmHandle.c @@ -742,7 +742,6 @@ SArray *vmGetMsgHandles() { if (dmSetMgmtHandle(pArray, TDMT_STREAM_RETRIEVE_RSP, vmPutMsgToStreamQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_STREAM_SCAN_HISTORY_FINISH, vmPutMsgToStreamQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_STREAM_SCAN_HISTORY_FINISH_RSP, vmPutMsgToStreamQueue, 0) == NULL) goto _OVER; - if (dmSetMgmtHandle(pArray, TDMT_STREAM_TRANSFER_STATE, vmPutMsgToStreamQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_CHECK, vmPutMsgToStreamQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_CHECK_RSP, vmPutMsgToStreamQueue, 0) == NULL) goto _OVER; if (dmSetMgmtHandle(pArray, TDMT_VND_STREAM_TRIGGER, vmPutMsgToStreamQueue, 0) == NULL) goto _OVER; diff --git a/source/dnode/snode/src/snode.c b/source/dnode/snode/src/snode.c index 4000e72835..635fdcf459 100644 --- a/source/dnode/snode/src/snode.c +++ b/source/dnode/snode/src/snode.c @@ -77,6 +77,8 @@ int32_t sndExpandTask(SSnode *pSnode, SStreamTask *pTask, int64_t ver) { pTask->chkInfo.version = ver; pTask->pMeta = pSnode->pMeta; + streamTaskOpenAllUpstreamInput(pTask); + pTask->pState = streamStateOpen(pSnode->path, pTask, false, -1, -1); if (pTask->pState == NULL) { return -1; diff --git a/source/dnode/vnode/src/inc/vnodeInt.h b/source/dnode/vnode/src/inc/vnodeInt.h index e3b2d3e41e..d37cf833c2 100644 --- a/source/dnode/vnode/src/inc/vnodeInt.h +++ b/source/dnode/vnode/src/inc/vnodeInt.h @@ -250,7 +250,6 @@ int32_t tqProcessTaskDispatchRsp(STQ* pTq, SRpcMsg* pMsg); int32_t tqProcessTaskRetrieveReq(STQ* pTq, SRpcMsg* pMsg); int32_t tqProcessTaskRetrieveRsp(STQ* pTq, SRpcMsg* pMsg); int32_t tqProcessTaskScanHistory(STQ* pTq, SRpcMsg* pMsg); -int32_t tqProcessTaskTransferStateReq(STQ* pTq, SRpcMsg* pMsg); int32_t tqProcessTaskScanHistoryFinishReq(STQ* pTq, SRpcMsg* pMsg); int32_t tqProcessTaskScanHistoryFinishRsp(STQ* pTq, SRpcMsg* pMsg); int32_t tqCheckLogInWal(STQ* pTq, int64_t version); diff --git a/source/dnode/vnode/src/tq/tq.c b/source/dnode/vnode/src/tq/tq.c index de34a96836..a5f7e0eb68 100644 --- a/source/dnode/vnode/src/tq/tq.c +++ b/source/dnode/vnode/src/tq/tq.c @@ -930,6 +930,8 @@ int32_t tqExpandTask(STQ* pTq, SStreamTask* pTask, int64_t ver) { pTask->pMsgCb = &pTq->pVnode->msgCb; pTask->pMeta = pTq->pStreamMeta; + streamTaskOpenAllUpstreamInput(pTask); + // backup the initial status, and set it to be TASK_STATUS__INIT pTask->chkInfo.version = ver; pTask->chkInfo.currentVer = ver; @@ -1274,7 +1276,9 @@ int32_t tqProcessTaskScanHistory(STQ* pTq, SRpcMsg* pMsg) { if (done) { pTask->tsInfo.step2Start = taosGetTimestampMs(); - streamTaskEndScanWAL(pTask); + qDebug("s-task:%s scan-history from WAL stage(step 2) ended, elapsed time:%.2fs", id, 0.0); + appendTranstateIntoInputQ(pTask); + streamTryExec(pTask); // exec directly } else { STimeWindow* pWindow = &pTask->dataRange.window; tqDebug("s-task:%s level:%d verRange:%" PRId64 " - %" PRId64 " window:%" PRId64 "-%" PRId64 @@ -1339,44 +1343,6 @@ int32_t tqProcessTaskScanHistory(STQ* pTq, SRpcMsg* pMsg) { return 0; } -// notify the downstream tasks to transfer executor state after handle all history blocks. -int32_t tqProcessTaskTransferStateReq(STQ* pTq, SRpcMsg* pMsg) { - char* pReq = POINTER_SHIFT(pMsg->pCont, sizeof(SMsgHead)); - int32_t len = pMsg->contLen - sizeof(SMsgHead); - - SStreamTransferReq req = {0}; - - SDecoder decoder; - tDecoderInit(&decoder, (uint8_t*)pReq, len); - int32_t code = tDecodeStreamScanHistoryFinishReq(&decoder, &req); - tDecoderClear(&decoder); - - tqDebug("vgId:%d start to process transfer state msg, from s-task:0x%x", pTq->pStreamMeta->vgId, req.downstreamTaskId); - - SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, req.streamId, req.downstreamTaskId); - if (pTask == NULL) { - tqError("failed to find task:0x%x, it may have been dropped already. process transfer state failed", req.downstreamTaskId); - return -1; - } - - int32_t remain = streamAlignTransferState(pTask); - if (remain > 0) { - tqDebug("s-task:%s receive upstream transfer state msg, remain:%d", pTask->id.idStr, remain); - streamMetaReleaseTask(pTq->pStreamMeta, pTask); - return 0; - } - - // transfer the ownership of executor state - tqDebug("s-task:%s all upstream tasks send transfer msg, open transfer state flag", pTask->id.idStr); - ASSERT(pTask->streamTaskId.taskId != 0 && pTask->info.fillHistory == 1); - - pTask->status.transferState = true; - - streamSchedExec(pTask); - streamMetaReleaseTask(pTq->pStreamMeta, pTask); - return 0; -} - int32_t tqProcessTaskScanHistoryFinishReq(STQ* pTq, SRpcMsg* pMsg) { char* msg = POINTER_SHIFT(pMsg->pCont, sizeof(SMsgHead)); int32_t msgLen = pMsg->contLen - sizeof(SMsgHead); @@ -1564,7 +1530,7 @@ int32_t tqProcessTaskDispatchRsp(STQ* pTq, SRpcMsg* pMsg) { if (pTask) { streamProcessDispatchRsp(pTask, pRsp, pMsg->code); streamMetaReleaseTask(pTq->pStreamMeta, pTask); - return 0; + return TSDB_CODE_SUCCESS; } else { tqDebug("vgId:%d failed to handle the dispatch rsp, since find task:0x%x failed", vgId, taskId); return TSDB_CODE_INVALID_MSG; @@ -1708,6 +1674,8 @@ int32_t tqProcessTaskRetrieveRsp(STQ* pTq, SRpcMsg* pMsg) { int32_t vnodeEnqueueStreamMsg(SVnode* pVnode, SRpcMsg* pMsg) { STQ* pTq = pVnode->pTq; + int32_t vgId = pVnode->config.vgId; + SMsgHead* msgStr = pMsg->pCont; char* msgBody = POINTER_SHIFT(msgStr, sizeof(SMsgHead)); int32_t msgLen = pMsg->contLen - sizeof(SMsgHead); @@ -1724,7 +1692,9 @@ int32_t vnodeEnqueueStreamMsg(SVnode* pVnode, SRpcMsg* pMsg) { tDecoderClear(&decoder); int32_t taskId = req.taskId; - SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, req.streamId, req.taskId); + tqDebug("vgId:%d receive dispatch msg to s-task:0x%"PRIx64"-0x%x", vgId, req.streamId, taskId); + + SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, req.streamId, taskId); if (pTask != NULL) { SRpcMsg rsp = {.info = pMsg->info, .code = 0}; streamProcessDispatchMsg(pTask, &req, &rsp, false); @@ -1741,7 +1711,7 @@ int32_t vnodeEnqueueStreamMsg(SVnode* pVnode, SRpcMsg* pMsg) { FAIL: if (pMsg->info.handle == NULL) { - tqError("s-task:0x%x vgId:%d msg handle is null, abort enqueue dispatch msg", pTq->pStreamMeta->vgId, taskId); + tqError("s-task:0x%x vgId:%d msg handle is null, abort enqueue dispatch msg", vgId, taskId); return -1; } diff --git a/source/dnode/vnode/src/tq/tqRead.c b/source/dnode/vnode/src/tq/tqRead.c index e0e6853ffa..7c58431b57 100644 --- a/source/dnode/vnode/src/tq/tqRead.c +++ b/source/dnode/vnode/src/tq/tqRead.c @@ -332,8 +332,12 @@ int32_t extractMsgFromWal(SWalReader* pReader, void** pItem, int64_t maxVer, con void* pBody = POINTER_SHIFT(pReader->pHead->head.body, sizeof(SMsgHead)); int32_t len = pReader->pHead->head.bodyLen - sizeof(SMsgHead); - extractDelDataBlock(pBody, len, ver, (SStreamRefDataBlock**)pItem); - tqDebug("s-task:%s delete msg extract from WAL, len:%d, ver:%"PRId64, id, len, ver); + code = extractDelDataBlock(pBody, len, ver, (SStreamRefDataBlock**)pItem); + if (code != TSDB_CODE_SUCCESS) { + tqError("s-task:%s extract delete msg from WAL failed, code:%s", id, tstrerror(code)); + } else { + tqDebug("s-task:%s delete msg extract from WAL, len:%d, ver:%"PRId64, id, len, ver); + } } else { ASSERT(0); } diff --git a/source/dnode/vnode/src/tq/tqRestore.c b/source/dnode/vnode/src/tq/tqRestore.c index 3d9a91899c..ed612587f5 100644 --- a/source/dnode/vnode/src/tq/tqRestore.c +++ b/source/dnode/vnode/src/tq/tqRestore.c @@ -210,13 +210,23 @@ int32_t doSetOffsetForWalReader(SStreamTask *pTask, int32_t vgId) { } static void checkForFillHistoryVerRange(SStreamTask* pTask, int64_t ver) { - if ((pTask->info.fillHistory == 1) && ver > pTask->dataRange.range.maxVer) { - qWarn("s-task:%s fill-history scan WAL, currentVer:%" PRId64 " reach the maximum ver:%" PRId64 - ", not scan wal anymore, set the transfer state flag", - pTask->id.idStr, ver, pTask->dataRange.range.maxVer); - pTask->status.transferState = true; + const char* id = pTask->id.idStr; + int64_t maxVer = pTask->dataRange.range.maxVer; - /*int32_t code = */streamSchedExec(pTask); + if ((pTask->info.fillHistory == 1) && ver > pTask->dataRange.range.maxVer) { + if (!pTask->status.appendTranstateBlock) { + qWarn("s-task:%s fill-history scan WAL, currentVer:%" PRId64 " reach the maximum ver:%" PRId64 + ", not scan wal anymore, add transfer-state block into inputQ", + id, ver, maxVer); + + double el = (taosGetTimestampMs() - pTask->tsInfo.step2Start) / 1000.0; + qDebug("s-task:%s scan-history from WAL stage(step 2) ended, elapsed time:%.2fs", id, el); + appendTranstateIntoInputQ(pTask); + /*int32_t code = */streamSchedExec(pTask); + } else { + qWarn("s-task:%s fill-history scan WAL, currentVer:%" PRId64 " reach the maximum ver:%" PRId64 ", not scan wal", + id, ver, maxVer); + } } } @@ -262,7 +272,7 @@ int32_t createStreamTaskRunReq(SStreamMeta* pStreamMeta, bool* pScanIdle) { continue; } - if ((pTask->info.fillHistory == 1) && pTask->status.transferState) { + if ((pTask->info.fillHistory == 1) && pTask->status.appendTranstateBlock) { ASSERT(status == TASK_STATUS__NORMAL); // the maximum version of data in the WAL has reached already, the step2 is done tqDebug("s-task:%s fill-history reach the maximum ver:%" PRId64 ", not scan wal anymore", pTask->id.idStr, @@ -277,6 +287,13 @@ int32_t createStreamTaskRunReq(SStreamMeta* pStreamMeta, bool* pScanIdle) { continue; } + // downstream task has blocked the output, stopped for a while + if (pTask->inputStatus == TASK_INPUT_STATUS__BLOCKED) { + tqDebug("s-task:%s inputQ is blocked, do nothing", pTask->id.idStr); + streamMetaReleaseTask(pStreamMeta, pTask); + continue; + } + *pScanIdle = false; // seek the stored version and extract data from WAL diff --git a/source/dnode/vnode/src/tq/tqSink.c b/source/dnode/vnode/src/tq/tqSink.c index b22650d249..cce31688bc 100644 --- a/source/dnode/vnode/src/tq/tqSink.c +++ b/source/dnode/vnode/src/tq/tqSink.c @@ -412,7 +412,7 @@ void tqSinkToTablePipeline(SStreamTask* pTask, void* vnode, int64_t ver, void* d if (k == 0) { SColumnInfoData* pColData = taosArrayGet(pDataBlock->pDataBlock, dataIndex); void* colData = colDataGetData(pColData, j); - tqTrace("tq sink pipe2, row %d, col %d ts %" PRId64, j, k, *(int64_t*)colData); + tqDebug("tq sink pipe2, row %d, col %d ts %" PRId64, j, k, *(int64_t*)colData); } if (IS_SET_NULL(pCol)) { SColVal cv = COL_VAL_NULL(pCol->colId, pCol->type); diff --git a/source/dnode/vnode/src/tsdb/tsdbSnapshot.c b/source/dnode/vnode/src/tsdb/tsdbSnapshot.c index bdcf4a87c1..f547119f49 100644 --- a/source/dnode/vnode/src/tsdb/tsdbSnapshot.c +++ b/source/dnode/vnode/src/tsdb/tsdbSnapshot.c @@ -392,6 +392,9 @@ static int32_t tsdbSnapReadTombData(STsdbSnapReader* reader, uint8_t** data) { code = tTombBlockPut(reader->tombBlock, record); TSDB_CHECK_CODE(code, lino, _exit); + code = tsdbIterMergerNext(reader->tombIterMerger); + TSDB_CHECK_CODE(code, lino, _exit); + if (TOMB_BLOCK_SIZE(reader->tombBlock) >= 81920) { break; } diff --git a/source/dnode/vnode/src/vnd/vnodeSvr.c b/source/dnode/vnode/src/vnd/vnodeSvr.c index 743470aac8..f75c779f4b 100644 --- a/source/dnode/vnode/src/vnd/vnodeSvr.c +++ b/source/dnode/vnode/src/vnd/vnodeSvr.c @@ -661,8 +661,6 @@ int32_t vnodeProcessStreamMsg(SVnode *pVnode, SRpcMsg *pMsg, SQueueInfo *pInfo) return tqProcessTaskRetrieveRsp(pVnode->pTq, pMsg); case TDMT_VND_STREAM_SCAN_HISTORY: return tqProcessTaskScanHistory(pVnode->pTq, pMsg); - case TDMT_STREAM_TRANSFER_STATE: - return tqProcessTaskTransferStateReq(pVnode->pTq, pMsg); case TDMT_STREAM_SCAN_HISTORY_FINISH: return tqProcessTaskScanHistoryFinishReq(pVnode->pTq, pMsg); case TDMT_STREAM_SCAN_HISTORY_FINISH_RSP: diff --git a/source/libs/command/src/command.c b/source/libs/command/src/command.c index 8ddf730d5a..921ec41021 100644 --- a/source/libs/command/src/command.c +++ b/source/libs/command/src/command.c @@ -624,7 +624,7 @@ void appendTableOptions(char* buf, int32_t* len, SDbCfgInfo* pDbCfg, STableCfg* } } - if (nSma < pCfg->numOfColumns) { + if (nSma < pCfg->numOfColumns && nSma > 0) { bool smaOn = false; *len += sprintf(buf + VARSTR_HEADER_SIZE + *len, " SMA("); for (int32_t i = 0; i < pCfg->numOfColumns; ++i) { diff --git a/source/libs/executor/inc/executil.h b/source/libs/executor/inc/executil.h index 7dd11e2b6d..dc9f141f17 100644 --- a/source/libs/executor/inc/executil.h +++ b/source/libs/executor/inc/executil.h @@ -183,13 +183,17 @@ void cleanupQueryTableDataCond(SQueryTableDataCond* pCond); int32_t convertFillType(int32_t mode); int32_t resultrowComparAsc(const void* p1, const void* p2); -int32_t isQualifiedTable(STableKeyInfo* info, SNode* pTagCond, void* metaHandle, bool* pQualified, SStorageAPI *pAPI); - -void printDataBlock(SSDataBlock* pBlock, const char* flag); +int32_t isQualifiedTable(STableKeyInfo* info, SNode* pTagCond, void* metaHandle, bool* pQualified, SStorageAPI* pAPI); +char* getStreamOpName(uint16_t opType); +void printDataBlock(SSDataBlock* pBlock, const char* flag, const char* taskIdStr); +void printSpecDataBlock(SSDataBlock* pBlock, const char* flag, const char* opStr, const char* taskIdStr); void getNextTimeWindow(const SInterval* pInterval, STimeWindow* tw, int32_t order); void getInitialStartTimeWindow(SInterval* pInterval, TSKEY ts, STimeWindow* w, bool ascQuery); +TSKEY getStartTsKey(STimeWindow* win, const TSKEY* tsCols); +void updateTimeWindowInfo(SColumnInfoData* pColData, STimeWindow* pWin, int64_t delta); + SSDataBlock* createTagValBlockForFilter(SArray* pColList, int32_t numOfTables, SArray* pUidTagList, void* pVnode, SStorageAPI* pStorageAPI); #endif // TDENGINE_EXECUTIL_H diff --git a/source/libs/executor/inc/executorInt.h b/source/libs/executor/inc/executorInt.h index 7fc2cb70ba..0bd35353e0 100644 --- a/source/libs/executor/inc/executorInt.h +++ b/source/libs/executor/inc/executorInt.h @@ -521,7 +521,6 @@ typedef struct SStreamIntervalOperatorInfo { SArray* pPullWins; // SPullWindowInfo int32_t pullIndex; SSDataBlock* pPullDataRes; - bool isFinal; SArray* pChildren; int32_t numOfChild; SStreamState* pState; // void @@ -572,7 +571,6 @@ typedef struct SStreamSessionAggOperatorInfo { void* pDelIterator; SArray* pChildren; // cache for children's result; final stream operator SPhysiNode* pPhyNode; // create new child - bool isFinal; bool ignoreExpiredData; bool ignoreExpiredDataSaved; SArray* pUpdated; @@ -768,6 +766,12 @@ void freeOperatorParam(SOperatorParam* pParam, SOperatorParamType type); void freeResetOperatorParams(struct SOperatorInfo* pOperator, SOperatorParamType type, bool allFree); SSDataBlock* getNextBlockFromDownstreamImpl(struct SOperatorInfo* pOperator, int32_t idx, bool clearParam); +bool inSlidingWindow(SInterval* pInterval, STimeWindow* pWin, SDataBlockInfo* pBlockInfo); +bool inCalSlidingWindow(SInterval* pInterval, STimeWindow* pWin, TSKEY calStart, TSKEY calEnd, EStreamType blockType); +bool compareVal(const char* v, const SStateKeys* pKey); + +int32_t getNextQualifiedWindow(SInterval* pInterval, STimeWindow* pNext, SDataBlockInfo* pDataBlockInfo, + TSKEY* primaryKeys, int32_t prevPosition, int32_t order); #ifdef __cplusplus } diff --git a/source/libs/executor/src/eventwindowoperator.c b/source/libs/executor/src/eventwindowoperator.c index a22399e423..d61034c26e 100644 --- a/source/libs/executor/src/eventwindowoperator.c +++ b/source/libs/executor/src/eventwindowoperator.c @@ -58,16 +58,6 @@ static void doKeepTuple(SWindowRowsSup* pRowSup, int64_t ts, uint64_t groupId) { pRowSup->groupId = groupId; } -static void updateTimeWindowInfo(SColumnInfoData* pColData, STimeWindow* pWin, bool includeEndpoint) { - int64_t* ts = (int64_t*)pColData->pData; - int32_t delta = includeEndpoint ? 1 : 0; - - int64_t duration = pWin->ekey - pWin->skey + delta; - ts[2] = duration; // set the duration - ts[3] = pWin->skey; // window start key - ts[4] = pWin->ekey + delta; // window end key -} - SOperatorInfo* createEventwindowOperatorInfo(SOperatorInfo* downstream, SPhysiNode* physiNode, SExecTaskInfo* pTaskInfo) { SEventWindowOperatorInfo* pInfo = taosMemoryCalloc(1, sizeof(SEventWindowOperatorInfo)); @@ -250,7 +240,7 @@ static void doEventWindowAggImpl(SEventWindowOperatorInfo* pInfo, SExprSupp* pSu T_LONG_JMP(pTaskInfo->env, TSDB_CODE_APP_ERROR); } - updateTimeWindowInfo(&pInfo->twAggSup.timeWindowData, &pRowSup->win, false); + updateTimeWindowInfo(&pInfo->twAggSup.timeWindowData, &pRowSup->win, 0); applyAggFunctionOnPartialTuples(pTaskInfo, pSup->pCtx, &pInfo->twAggSup.timeWindowData, startIndex, numOfRows, pBlock->info.rows, numOfOutput); } diff --git a/source/libs/executor/src/executil.c b/source/libs/executor/src/executil.c index f35edd0cb6..816d11f274 100644 --- a/source/libs/executor/src/executil.c +++ b/source/libs/executor/src/executil.c @@ -2197,12 +2197,67 @@ int32_t createScanTableListInfo(SScanPhysiNode* pScanNode, SNodeList* pGroupTags return TSDB_CODE_SUCCESS; } -void printDataBlock(SSDataBlock* pBlock, const char* flag) { +char* getStreamOpName(uint16_t opType) { + switch (opType) { + case QUERY_NODE_PHYSICAL_PLAN_STREAM_SCAN: + return "stream scan"; + case QUERY_NODE_PHYSICAL_PLAN_PROJECT: + return "project"; + case QUERY_NODE_PHYSICAL_PLAN_STREAM_INTERVAL: + return "interval single"; + case QUERY_NODE_PHYSICAL_PLAN_STREAM_FINAL_INTERVAL: + return "interval final"; + case QUERY_NODE_PHYSICAL_PLAN_STREAM_SEMI_INTERVAL: + return "interval semi"; + case QUERY_NODE_PHYSICAL_PLAN_STREAM_FILL: + return "stream fill"; + case QUERY_NODE_PHYSICAL_PLAN_STREAM_SESSION: + return "session single"; + case QUERY_NODE_PHYSICAL_PLAN_STREAM_SEMI_SESSION: + return "session semi"; + case QUERY_NODE_PHYSICAL_PLAN_STREAM_FINAL_SESSION: + return "session final"; + case QUERY_NODE_PHYSICAL_PLAN_STREAM_STATE: + return "state single"; + case QUERY_NODE_PHYSICAL_PLAN_STREAM_PARTITION: + return "stream partitionby"; + case QUERY_NODE_PHYSICAL_PLAN_STREAM_EVENT: + return "stream event"; + } + return ""; +} + +void printDataBlock(SSDataBlock* pBlock, const char* flag, const char* taskIdStr) { if (!pBlock || pBlock->info.rows == 0) { - qDebug("===stream===%s: Block is Null or Empty", flag); + qDebug("%s===stream===%s: Block is Null or Empty", taskIdStr, flag); return; } char* pBuf = NULL; - qDebug("%s", dumpBlockData(pBlock, flag, &pBuf)); + qDebug("%s", dumpBlockData(pBlock, flag, &pBuf, taskIdStr)); taosMemoryFree(pBuf); } + +void printSpecDataBlock(SSDataBlock* pBlock, const char* flag, const char* opStr, const char* taskIdStr) { + if (!pBlock || pBlock->info.rows == 0) { + qDebug("%s===stream===%s: Block is Null or Empty", taskIdStr, flag); + return; + } + if (qDebugFlag & DEBUG_DEBUG) { + char* pBuf = NULL; + char flagBuf[64]; + snprintf(flagBuf, sizeof(flagBuf), "%s %s", flag, opStr); + qDebug("%s", dumpBlockData(pBlock, flagBuf, &pBuf, taskIdStr)); + taosMemoryFree(pBuf); + } +} + +TSKEY getStartTsKey(STimeWindow* win, const TSKEY* tsCols) { return tsCols == NULL ? win->skey : tsCols[0]; } + +void updateTimeWindowInfo(SColumnInfoData* pColData, STimeWindow* pWin, int64_t delta) { + int64_t* ts = (int64_t*)pColData->pData; + + int64_t duration = pWin->ekey - pWin->skey + delta; + ts[2] = duration; // set the duration + ts[3] = pWin->skey; // window start key + ts[4] = pWin->ekey + delta; // window end key +} diff --git a/source/libs/executor/src/executorInt.c b/source/libs/executor/src/executorInt.c index 653386063e..dda8b133ca 100644 --- a/source/libs/executor/src/executorInt.c +++ b/source/libs/executor/src/executorInt.c @@ -1207,3 +1207,14 @@ FORCE_INLINE SSDataBlock* getNextBlockFromDownstreamImpl(struct SOperatorInfo* p } +bool compareVal(const char* v, const SStateKeys* pKey) { + if (IS_VAR_DATA_TYPE(pKey->type)) { + if (varDataLen(v) != varDataLen(pKey->pData)) { + return false; + } else { + return memcmp(varDataVal(v), varDataVal(pKey->pData), varDataLen(v)) == 0; + } + } else { + return memcmp(pKey->pData, v, pKey->bytes) == 0; + } +} diff --git a/source/libs/executor/src/filloperator.c b/source/libs/executor/src/filloperator.c index 62ff818f73..e7c9e404a0 100644 --- a/source/libs/executor/src/filloperator.c +++ b/source/libs/executor/src/filloperator.c @@ -1292,14 +1292,14 @@ static SSDataBlock* doStreamFill(SOperatorInfo* pOperator) { (pInfo->pFillInfo->pos != FILL_POS_INVALID && pInfo->pFillInfo->needFill == true)) { doStreamFillRange(pInfo->pFillInfo, pInfo->pFillSup, pInfo->pRes); if (pInfo->pRes->info.rows > 0) { - printDataBlock(pInfo->pRes, "stream fill"); + printDataBlock(pInfo->pRes, getStreamOpName(pOperator->operatorType), GET_TASKID(pTaskInfo)); return pInfo->pRes; } } if (pOperator->status == OP_RES_TO_RETURN) { doDeleteFillFinalize(pOperator); if (pInfo->pRes->info.rows > 0) { - printDataBlock(pInfo->pRes, "stream fill"); + printDataBlock(pInfo->pRes, getStreamOpName(pOperator->operatorType), GET_TASKID(pTaskInfo)); return pInfo->pRes; } setOperatorCompleted(pOperator); @@ -1317,12 +1317,12 @@ static SSDataBlock* doStreamFill(SOperatorInfo* pOperator) { pOperator->status = OP_RES_TO_RETURN; pInfo->pFillInfo->preRowKey = INT64_MIN; if (pInfo->pRes->info.rows > 0) { - printDataBlock(pInfo->pRes, "stream fill"); + printDataBlock(pInfo->pRes, getStreamOpName(pOperator->operatorType), GET_TASKID(pTaskInfo)); return pInfo->pRes; } break; } - printDataBlock(pBlock, "stream fill recv"); + printSpecDataBlock(pBlock, getStreamOpName(pOperator->operatorType), "recv", GET_TASKID(pTaskInfo)); if (pInfo->pFillInfo->curGroupId != pBlock->info.id.groupId) { pInfo->pFillInfo->curGroupId = pBlock->info.id.groupId; @@ -1339,7 +1339,7 @@ static SSDataBlock* doStreamFill(SOperatorInfo* pOperator) { pInfo->pFillSup->hasDelete = true; doDeleteFillResult(pOperator); if (pInfo->pDelRes->info.rows > 0) { - printDataBlock(pInfo->pDelRes, "stream fill delete"); + printDataBlock(pInfo->pDelRes, getStreamOpName(pOperator->operatorType), GET_TASKID(pTaskInfo)); return pInfo->pDelRes; } continue; @@ -1378,7 +1378,7 @@ static SSDataBlock* doStreamFill(SOperatorInfo* pOperator) { } pOperator->resultInfo.totalRows += pInfo->pRes->info.rows; - printDataBlock(pInfo->pRes, "stream fill"); + printDataBlock(pInfo->pRes, getStreamOpName(pOperator->operatorType), GET_TASKID(pTaskInfo)); return pInfo->pRes; } diff --git a/source/libs/executor/src/groupoperator.c b/source/libs/executor/src/groupoperator.c index 2db4f39732..bd46b2494f 100644 --- a/source/libs/executor/src/groupoperator.c +++ b/source/libs/executor/src/groupoperator.c @@ -956,7 +956,8 @@ static bool hasRemainPartion(SStreamPartitionOperatorInfo* pInfo) { return pInfo static bool hasRemainTbName(SStreamPartitionOperatorInfo* pInfo) { return pInfo->pTbNameIte != NULL; } static SSDataBlock* buildStreamPartitionResult(SOperatorInfo* pOperator) { - SStorageAPI* pAPI = &pOperator->pTaskInfo->storageAPI; + SStorageAPI* pAPI = &pOperator->pTaskInfo->storageAPI; + SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo; SStreamPartitionOperatorInfo* pInfo = pOperator->info; SSDataBlock* pDest = pInfo->binfo.pRes; @@ -994,7 +995,7 @@ static SSDataBlock* buildStreamPartitionResult(SOperatorInfo* pOperator) { pOperator->resultInfo.totalRows += pDest->info.rows; pInfo->parIte = taosHashIterate(pInfo->pPartitions, pInfo->parIte); ASSERT(pDest->info.rows > 0); - printDataBlock(pDest, "stream partitionby"); + printDataBlock(pDest, getStreamOpName(pOperator->operatorType), GET_TASKID(pTaskInfo)); return pDest; } @@ -1115,7 +1116,7 @@ static SSDataBlock* doStreamHashPartition(SOperatorInfo* pOperator) { setOperatorCompleted(pOperator); return NULL; } - printDataBlock(pBlock, "stream partitionby recv"); + printSpecDataBlock(pBlock, getStreamOpName(pOperator->operatorType), "recv", GET_TASKID(pTaskInfo)); switch (pBlock->info.type) { case STREAM_NORMAL: case STREAM_PULL_DATA: @@ -1125,7 +1126,7 @@ static SSDataBlock* doStreamHashPartition(SOperatorInfo* pOperator) { case STREAM_DELETE_DATA: { copyDataBlock(pInfo->pDelRes, pBlock); pInfo->pDelRes->info.type = STREAM_DELETE_RESULT; - printDataBlock(pInfo->pDelRes, "stream partitionby delete"); + printDataBlock(pInfo->pDelRes, getStreamOpName(pOperator->operatorType), GET_TASKID(pTaskInfo)); return pInfo->pDelRes; } break; default: diff --git a/source/libs/executor/src/projectoperator.c b/source/libs/executor/src/projectoperator.c index de024d22c7..00b246afad 100644 --- a/source/libs/executor/src/projectoperator.c +++ b/source/libs/executor/src/projectoperator.c @@ -372,6 +372,10 @@ SSDataBlock* doProjectOperation(SOperatorInfo* pOperator) { pOperator->cost.openCost = (taosGetTimestampUs() - st) / 1000.0; } + if (pTaskInfo->execModel == OPTR_EXEC_MODEL_STREAM) { + printDataBlock(p, getStreamOpName(pOperator->operatorType), GET_TASKID(pTaskInfo)); + } + return (p->info.rows > 0) ? p : NULL; } diff --git a/source/libs/executor/src/scanoperator.c b/source/libs/executor/src/scanoperator.c index 9c19da7f64..406327598c 100644 --- a/source/libs/executor/src/scanoperator.c +++ b/source/libs/executor/src/scanoperator.c @@ -1450,7 +1450,7 @@ static int32_t generateIntervalScanRange(SStreamScanInfo* pInfo, SSDataBlock* pS if (rows == 0) { return TSDB_CODE_SUCCESS; } - + SExecTaskInfo* pTaskInfo = pInfo->pStreamScanOp->pTaskInfo; SColumnInfoData* pSrcStartTsCol = (SColumnInfoData*)taosArrayGet(pSrcBlock->pDataBlock, START_TS_COLUMN_INDEX); SColumnInfoData* pSrcEndTsCol = (SColumnInfoData*)taosArrayGet(pSrcBlock->pDataBlock, END_TS_COLUMN_INDEX); SColumnInfoData* pSrcUidCol = taosArrayGet(pSrcBlock->pDataBlock, UID_COLUMN_INDEX); @@ -1467,7 +1467,7 @@ static int32_t generateIntervalScanRange(SStreamScanInfo* pInfo, SSDataBlock* pS TSKEY startTs = srcStartTsCol[0]; TSKEY endTs = srcEndTsCol[0]; SSDataBlock* pPreRes = readPreVersionData(pInfo->pTableScanOp, srcUid, startTs, endTs, ver); - printDataBlock(pPreRes, "pre res"); + printDataBlock(pPreRes, "pre res", GET_TASKID(pTaskInfo)); blockDataCleanup(pSrcBlock); int32_t code = blockDataEnsureCapacity(pSrcBlock, pPreRes->info.rows); if (code != TSDB_CODE_SUCCESS) { @@ -1482,7 +1482,7 @@ static int32_t generateIntervalScanRange(SStreamScanInfo* pInfo, SSDataBlock* pS appendOneRowToStreamSpecialBlock(pSrcBlock, ((TSKEY*)pTsCol->pData) + i, ((TSKEY*)pTsCol->pData) + i, &srcUid, &groupId, NULL); } - printDataBlock(pSrcBlock, "new delete"); + printDataBlock(pSrcBlock, "new delete", GET_TASKID(pTaskInfo)); } uint64_t* srcGp = (uint64_t*)pSrcGpCol->pData; srcStartTsCol = (TSKEY*)pSrcStartTsCol->pData; @@ -2028,38 +2028,9 @@ static SSDataBlock* doStreamScan(SOperatorInfo* pOperator) { switch (pInfo->scanMode) { case STREAM_SCAN_FROM_RES: { pInfo->scanMode = STREAM_SCAN_FROM_READERHANDLE; - printDataBlock(pInfo->pRecoverRes, "scan recover"); + printSpecDataBlock(pInfo->pRecoverRes, getStreamOpName(pOperator->operatorType), "recover", GET_TASKID(pTaskInfo)); return pInfo->pRecoverRes; } break; - // case STREAM_SCAN_FROM_UPDATERES: { - // generateScanRange(pInfo, pInfo->pUpdateDataRes, pInfo->pUpdateRes); - // prepareRangeScan(pInfo, pInfo->pUpdateRes, &pInfo->updateResIndex); - // pInfo->scanMode = STREAM_SCAN_FROM_DATAREADER_RANGE; - // printDataBlock(pInfo->pUpdateRes, "recover update"); - // return pInfo->pUpdateRes; - // } break; - // case STREAM_SCAN_FROM_DELETE_DATA: { - // generateScanRange(pInfo, pInfo->pUpdateDataRes, pInfo->pUpdateRes); - // prepareRangeScan(pInfo, pInfo->pUpdateRes, &pInfo->updateResIndex); - // pInfo->scanMode = STREAM_SCAN_FROM_DATAREADER_RANGE; - // copyDataBlock(pInfo->pDeleteDataRes, pInfo->pUpdateRes); - // pInfo->pDeleteDataRes->info.type = STREAM_DELETE_DATA; - // printDataBlock(pInfo->pDeleteDataRes, "recover delete"); - // return pInfo->pDeleteDataRes; - // } break; - // case STREAM_SCAN_FROM_DATAREADER_RANGE: { - // SSDataBlock* pSDB = doRangeScan(pInfo, pInfo->pUpdateRes, pInfo->primaryTsIndex, &pInfo->updateResIndex); - // if (pSDB) { - // STableScanInfo* pTableScanInfo = pInfo->pTableScanOp->info; - // pSDB->info.type = pInfo->scanMode == STREAM_SCAN_FROM_DATAREADER_RANGE ? STREAM_NORMAL : STREAM_PULL_DATA; - // checkUpdateData(pInfo, true, pSDB, false); - // printDataBlock(pSDB, "scan recover update"); - // calBlockTbName(pInfo, pSDB); - // return pSDB; - // } - // blockDataCleanup(pInfo->pUpdateDataRes); - // pInfo->scanMode = STREAM_SCAN_FROM_READERHANDLE; - // } break; default: break; } @@ -2068,22 +2039,17 @@ static SSDataBlock* doStreamScan(SOperatorInfo* pOperator) { if (pInfo->pRecoverRes != NULL) { calBlockTbName(pInfo, pInfo->pRecoverRes); if (!pInfo->igCheckUpdate && pInfo->pUpdateInfo) { - // if (pStreamInfo->recoverStep == STREAM_RECOVER_STEP__SCAN1) { TSKEY maxTs = pAPI->stateStore.updateInfoFillBlockData(pInfo->pUpdateInfo, pInfo->pRecoverRes, pInfo->primaryTsIndex); pInfo->twAggSup.maxTs = TMAX(pInfo->twAggSup.maxTs, maxTs); - // } else { - // pInfo->pUpdateInfo->maxDataVersion = TMAX(pInfo->pUpdateInfo->maxDataVersion, pStreamInfo->fillHistoryVer.maxVer); - // doCheckUpdate(pInfo, pInfo->pRecoverRes->info.window.ekey, pInfo->pRecoverRes); - // } } if (pInfo->pCreateTbRes->info.rows > 0) { pInfo->scanMode = STREAM_SCAN_FROM_RES; - printDataBlock(pInfo->pCreateTbRes, "recover createTbl"); + printSpecDataBlock(pInfo->pCreateTbRes, getStreamOpName(pOperator->operatorType), "recover", GET_TASKID(pTaskInfo)); return pInfo->pCreateTbRes; } qDebug("stream recover scan get block, rows %" PRId64, pInfo->pRecoverRes->info.rows); - printDataBlock(pInfo->pRecoverRes, "scan recover"); + printSpecDataBlock(pInfo->pRecoverRes, getStreamOpName(pOperator->operatorType), "recover", GET_TASKID(pTaskInfo)); return pInfo->pRecoverRes; } pStreamInfo->recoverStep = STREAM_RECOVER_STEP__NONE; @@ -2139,7 +2105,7 @@ FETCH_NEXT_BLOCK: pAPI->stateStore.updateInfoAddCloseWindowSBF(pInfo->pUpdateInfo); } break; case STREAM_DELETE_DATA: { - printDataBlock(pBlock, "stream scan delete recv"); + printSpecDataBlock(pBlock, getStreamOpName(pOperator->operatorType), "delete recv", GET_TASKID(pTaskInfo)); SSDataBlock* pDelBlock = NULL; if (pInfo->tqReader) { pDelBlock = createSpecialDataBlock(STREAM_DELETE_DATA); @@ -2150,7 +2116,7 @@ FETCH_NEXT_BLOCK: setBlockGroupIdByUid(pInfo, pDelBlock); rebuildDeleteBlockData(pDelBlock, &pStreamInfo->fillHistoryWindow, id); - printDataBlock(pDelBlock, "stream scan delete recv filtered"); + printSpecDataBlock(pDelBlock, getStreamOpName(pOperator->operatorType), "delete recv filtered", GET_TASKID(pTaskInfo)); if (pDelBlock->info.rows == 0) { if (pInfo->tqReader) { blockDataDestroy(pDelBlock); @@ -2161,7 +2127,7 @@ FETCH_NEXT_BLOCK: if (!isIntervalWindow(pInfo) && !isSessionWindow(pInfo) && !isStateWindow(pInfo)) { generateDeleteResultBlock(pInfo, pDelBlock, pInfo->pDeleteDataRes); pInfo->pDeleteDataRes->info.type = STREAM_DELETE_RESULT; - printDataBlock(pDelBlock, "stream scan delete result"); + printSpecDataBlock(pDelBlock, getStreamOpName(pOperator->operatorType), "delete result", GET_TASKID(pTaskInfo)); blockDataDestroy(pDelBlock); if (pInfo->pDeleteDataRes->info.rows > 0) { @@ -2176,7 +2142,7 @@ FETCH_NEXT_BLOCK: prepareRangeScan(pInfo, pInfo->pUpdateRes, &pInfo->updateResIndex); copyDataBlock(pInfo->pDeleteDataRes, pInfo->pUpdateRes); pInfo->pDeleteDataRes->info.type = STREAM_DELETE_DATA; - printDataBlock(pDelBlock, "stream scan delete data"); + printSpecDataBlock(pDelBlock, getStreamOpName(pOperator->operatorType), "delete result", GET_TASKID(pTaskInfo)); if (pInfo->tqReader) { blockDataDestroy(pDelBlock); } @@ -2191,7 +2157,7 @@ FETCH_NEXT_BLOCK: default: break; } - // printDataBlock(pBlock, "stream scan recv"); + printDataBlock(pBlock, getStreamOpName(pOperator->operatorType), GET_TASKID(pTaskInfo)); return pBlock; } else if (pInfo->blockType == STREAM_INPUT__DATA_SUBMIT) { qDebug("stream scan mode:%d, %s", pInfo->scanMode, id); @@ -2227,7 +2193,7 @@ FETCH_NEXT_BLOCK: STableScanInfo* pTableScanInfo = pInfo->pTableScanOp->info; pSDB->info.type = pInfo->scanMode == STREAM_SCAN_FROM_DATAREADER_RANGE ? STREAM_NORMAL : STREAM_PULL_DATA; checkUpdateData(pInfo, true, pSDB, false); - printDataBlock(pSDB, "stream scan update"); + printSpecDataBlock(pSDB, getStreamOpName(pOperator->operatorType), "update", GET_TASKID(pTaskInfo)); calBlockTbName(pInfo, pSDB); return pSDB; } diff --git a/source/libs/executor/src/streamtimewindowoperator.c b/source/libs/executor/src/streamtimewindowoperator.c new file mode 100644 index 0000000000..01514ea88a --- /dev/null +++ b/source/libs/executor/src/streamtimewindowoperator.c @@ -0,0 +1,3195 @@ +/* + * Copyright (c) 2019 TAOS Data, Inc. + * + * This program is free software: you can use, redistribute, and/or modify + * it under the terms of the GNU Affero General Public License, version 3 + * or later ("AGPL"), as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ +#include "executorInt.h" +#include "filter.h" +#include "function.h" +#include "functionMgt.h" +#include "operator.h" +#include "querytask.h" +#include "tcommon.h" +#include "tcompare.h" +#include "tdatablock.h" +#include "tfill.h" +#include "tglobal.h" +#include "tlog.h" +#include "ttime.h" + +#define IS_FINAL_INTERVAL_OP(op) ((op)->operatorType == QUERY_NODE_PHYSICAL_PLAN_STREAM_FINAL_INTERVAL) +#define IS_FINAL_SESSION_OP(op) ((op)->operatorType == QUERY_NODE_PHYSICAL_PLAN_STREAM_FINAL_SESSION) +#define DEAULT_DELETE_MARK (1000LL * 60LL * 60LL * 24LL * 365LL * 10LL); +#define STREAM_INTERVAL_OP_STATE_NAME "StreamIntervalHistoryState" +#define STREAM_SESSION_OP_STATE_NAME "StreamSessionHistoryState" +#define STREAM_STATE_OP_STATE_NAME "StreamStateHistoryState" + +typedef struct SStateWindowInfo { + SResultWindowInfo winInfo; + SStateKeys* pStateKey; +} SStateWindowInfo; + +typedef struct SPullWindowInfo { + STimeWindow window; + uint64_t groupId; + STimeWindow calWin; +} SPullWindowInfo; + +typedef int32_t (*__compare_fn_t)(void* pKey, void* data, int32_t index); + +static int32_t binarySearchCom(void* keyList, int num, void* pKey, int order, __compare_fn_t comparefn) { + int firstPos = 0, lastPos = num - 1, midPos = -1; + int numOfRows = 0; + + if (num <= 0) return -1; + if (order == TSDB_ORDER_DESC) { + // find the first position which is smaller or equal than the key + while (1) { + if (comparefn(pKey, keyList, lastPos) >= 0) return lastPos; + if (comparefn(pKey, keyList, firstPos) == 0) return firstPos; + if (comparefn(pKey, keyList, firstPos) < 0) return firstPos - 1; + + numOfRows = lastPos - firstPos + 1; + midPos = (numOfRows >> 1) + firstPos; + + if (comparefn(pKey, keyList, midPos) < 0) { + lastPos = midPos - 1; + } else if (comparefn(pKey, keyList, midPos) > 0) { + firstPos = midPos + 1; + } else { + break; + } + } + + } else { + // find the first position which is bigger or equal than the key + while (1) { + if (comparefn(pKey, keyList, firstPos) <= 0) return firstPos; + if (comparefn(pKey, keyList, lastPos) == 0) return lastPos; + + if (comparefn(pKey, keyList, lastPos) > 0) { + lastPos = lastPos + 1; + if (lastPos >= num) + return -1; + else + return lastPos; + } + + numOfRows = lastPos - firstPos + 1; + midPos = (numOfRows >> 1) + firstPos; + + if (comparefn(pKey, keyList, midPos) < 0) { + lastPos = midPos - 1; + } else if (comparefn(pKey, keyList, midPos) > 0) { + firstPos = midPos + 1; + } else { + break; + } + } + } + + return midPos; +} + +static int32_t comparePullWinKey(void* pKey, void* data, int32_t index) { + SArray* res = (SArray*)data; + SPullWindowInfo* pos = taosArrayGet(res, index); + SPullWindowInfo* pData = (SPullWindowInfo*)pKey; + if (pData->groupId > pos->groupId) { + return 1; + } else if (pData->groupId < pos->groupId) { + return -1; + } + + if (pData->window.skey > pos->window.ekey) { + return 1; + } else if (pData->window.ekey < pos->window.skey) { + return -1; + } + return 0; +} + +static int32_t savePullWindow(SPullWindowInfo* pPullInfo, SArray* pPullWins) { + int32_t size = taosArrayGetSize(pPullWins); + int32_t index = binarySearchCom(pPullWins, size, pPullInfo, TSDB_ORDER_DESC, comparePullWinKey); + if (index == -1) { + index = 0; + } else { + int32_t code = comparePullWinKey(pPullInfo, pPullWins, index); + if (code == 0) { + SPullWindowInfo* pos = taosArrayGet(pPullWins, index); + pos->window.skey = TMIN(pos->window.skey, pPullInfo->window.skey); + pos->window.ekey = TMAX(pos->window.ekey, pPullInfo->window.ekey); + pos->calWin.skey = TMIN(pos->calWin.skey, pPullInfo->calWin.skey); + pos->calWin.ekey = TMAX(pos->calWin.ekey, pPullInfo->calWin.ekey); + return TSDB_CODE_SUCCESS; + } else if (code > 0) { + index++; + } + } + if (taosArrayInsert(pPullWins, index, pPullInfo) == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; + } + return TSDB_CODE_SUCCESS; +} + +static int32_t saveResult(SResultWindowInfo winInfo, SSHashObj* pStUpdated) { + winInfo.sessionWin.win.ekey = winInfo.sessionWin.win.skey; + return tSimpleHashPut(pStUpdated, &winInfo.sessionWin, sizeof(SSessionKey), &winInfo, sizeof(SResultWindowInfo)); +} + +static int32_t saveWinResult(SWinKey* pKey, SRowBuffPos* pPos, SSHashObj* pUpdatedMap) { + tSimpleHashPut(pUpdatedMap, pKey, sizeof(SWinKey), &pPos, POINTER_BYTES); + return TSDB_CODE_SUCCESS; +} + +static int32_t saveWinResultInfo(TSKEY ts, uint64_t groupId, SRowBuffPos* pPos, SSHashObj* pUpdatedMap) { + SWinKey key = {.ts = ts, .groupId = groupId}; + saveWinResult(&key, pPos, pUpdatedMap); + return TSDB_CODE_SUCCESS; +} + +static void removeResults(SArray* pWins, SSHashObj* pUpdatedMap) { + int32_t size = taosArrayGetSize(pWins); + for (int32_t i = 0; i < size; i++) { + SWinKey* pW = taosArrayGet(pWins, i); + void* tmp = tSimpleHashGet(pUpdatedMap, pW, sizeof(SWinKey)); + if (tmp) { + void* value = *(void**)tmp; + taosMemoryFree(value); + tSimpleHashRemove(pUpdatedMap, pW, sizeof(SWinKey)); + } + } +} + +static int32_t compareWinKey(void* pKey, void* data, int32_t index) { + void* pDataPos = taosArrayGet((SArray*)data, index); + return winKeyCmprImpl(pKey, pDataPos); +} + +static void removeDeleteResults(SSHashObj* pUpdatedMap, SArray* pDelWins) { + taosArraySort(pDelWins, winKeyCmprImpl); + taosArrayRemoveDuplicate(pDelWins, winKeyCmprImpl, NULL); + int32_t delSize = taosArrayGetSize(pDelWins); + if (tSimpleHashGetSize(pUpdatedMap) == 0 || delSize == 0) { + return; + } + void* pIte = NULL; + int32_t iter = 0; + while ((pIte = tSimpleHashIterate(pUpdatedMap, pIte, &iter)) != NULL) { + SWinKey* pResKey = tSimpleHashGetKey(pIte, NULL); + int32_t index = binarySearchCom(pDelWins, delSize, pResKey, TSDB_ORDER_DESC, compareWinKey); + if (index >= 0 && 0 == compareWinKey(pResKey, pDelWins, index)) { + taosArrayRemove(pDelWins, index); + delSize = taosArrayGetSize(pDelWins); + } + } +} + +bool isOverdue(TSKEY ekey, STimeWindowAggSupp* pTwSup) { + ASSERTS(pTwSup->maxTs == INT64_MIN || pTwSup->maxTs > 0, "maxts should greater than 0"); + return pTwSup->maxTs != INT64_MIN && ekey < pTwSup->maxTs - pTwSup->waterMark; +} + +bool isCloseWindow(STimeWindow* pWin, STimeWindowAggSupp* pTwSup) { return isOverdue(pWin->ekey, pTwSup); } + +static bool doDeleteWindow(SOperatorInfo* pOperator, TSKEY ts, uint64_t groupId) { + SStorageAPI* pAPI = &pOperator->pTaskInfo->storageAPI; + + SStreamIntervalOperatorInfo* pInfo = pOperator->info; + SWinKey key = {.ts = ts, .groupId = groupId}; + tSimpleHashRemove(pInfo->aggSup.pResultRowHashTable, &key, sizeof(SWinKey)); + pAPI->stateStore.streamStateDel(pInfo->pState, &key); + return true; +} + +static int32_t getChildIndex(SSDataBlock* pBlock) { return pBlock->info.childId; } + +static void doDeleteWindows(SOperatorInfo* pOperator, SInterval* pInterval, SSDataBlock* pBlock, SArray* pUpWins, + SSHashObj* pUpdatedMap) { + SStreamIntervalOperatorInfo* pInfo = pOperator->info; + SColumnInfoData* pStartTsCol = taosArrayGet(pBlock->pDataBlock, START_TS_COLUMN_INDEX); + TSKEY* startTsCols = (TSKEY*)pStartTsCol->pData; + SColumnInfoData* pEndTsCol = taosArrayGet(pBlock->pDataBlock, END_TS_COLUMN_INDEX); + TSKEY* endTsCols = (TSKEY*)pEndTsCol->pData; + SColumnInfoData* pCalStTsCol = taosArrayGet(pBlock->pDataBlock, CALCULATE_START_TS_COLUMN_INDEX); + TSKEY* calStTsCols = (TSKEY*)pCalStTsCol->pData; + SColumnInfoData* pCalEnTsCol = taosArrayGet(pBlock->pDataBlock, CALCULATE_END_TS_COLUMN_INDEX); + TSKEY* calEnTsCols = (TSKEY*)pCalEnTsCol->pData; + SColumnInfoData* pGpCol = taosArrayGet(pBlock->pDataBlock, GROUPID_COLUMN_INDEX); + uint64_t* pGpDatas = (uint64_t*)pGpCol->pData; + for (int32_t i = 0; i < pBlock->info.rows; i++) { + SResultRowInfo dumyInfo = {0}; + dumyInfo.cur.pageId = -1; + + STimeWindow win = {0}; + if (IS_FINAL_INTERVAL_OP(pOperator)) { + win.skey = startTsCols[i]; + win.ekey = endTsCols[i]; + } else { + win = getActiveTimeWindow(NULL, &dumyInfo, startTsCols[i], pInterval, TSDB_ORDER_ASC); + } + + do { + if (!inCalSlidingWindow(pInterval, &win, calStTsCols[i], calEnTsCols[i], pBlock->info.type)) { + getNextTimeWindow(pInterval, &win, TSDB_ORDER_ASC); + continue; + } + uint64_t winGpId = pGpDatas[i]; + SWinKey winRes = {.ts = win.skey, .groupId = winGpId}; + void* chIds = taosHashGet(pInfo->pPullDataMap, &winRes, sizeof(SWinKey)); + if (chIds) { + int32_t childId = getChildIndex(pBlock); + SArray* chArray = *(void**)chIds; + int32_t index = taosArraySearchIdx(chArray, &childId, compareInt32Val, TD_EQ); + if (index != -1) { + qDebug("===stream===try push delete window%" PRId64 "chId:%d ,continue", win.skey, childId); + getNextTimeWindow(pInterval, &win, TSDB_ORDER_ASC); + continue; + } + } + bool res = doDeleteWindow(pOperator, win.skey, winGpId); + if (pUpWins && res) { + taosArrayPush(pUpWins, &winRes); + } + if (pUpdatedMap) { + tSimpleHashRemove(pUpdatedMap, &winRes, sizeof(SWinKey)); + } + getNextTimeWindow(pInterval, &win, TSDB_ORDER_ASC); + } while (win.ekey <= endTsCols[i]); + } +} + +static int32_t getAllIntervalWindow(SSHashObj* pHashMap, SSHashObj* resWins) { + void* pIte = NULL; + int32_t iter = 0; + while ((pIte = tSimpleHashIterate(pHashMap, pIte, &iter)) != NULL) { + SWinKey* pKey = tSimpleHashGetKey(pIte, NULL); + uint64_t groupId = pKey->groupId; + TSKEY ts = pKey->ts; + int32_t code = saveWinResultInfo(ts, groupId, *(SRowBuffPos**)pIte, resWins); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + } + return TSDB_CODE_SUCCESS; +} + +static int32_t closeStreamIntervalWindow(SSHashObj* pHashMap, STimeWindowAggSupp* pTwSup, SInterval* pInterval, + SHashObj* pPullDataMap, SSHashObj* closeWins, SArray* pDelWins, + SOperatorInfo* pOperator) { + qDebug("===stream===close interval window"); + void* pIte = NULL; + int32_t iter = 0; + SStreamIntervalOperatorInfo* pInfo = pOperator->info; + int32_t delSize = taosArrayGetSize(pDelWins); + while ((pIte = tSimpleHashIterate(pHashMap, pIte, &iter)) != NULL) { + void* key = tSimpleHashGetKey(pIte, NULL); + SWinKey* pWinKey = (SWinKey*)key; + if (delSize > 0) { + int32_t index = binarySearchCom(pDelWins, delSize, pWinKey, TSDB_ORDER_DESC, compareWinKey); + if (index >= 0 && 0 == compareWinKey(pWinKey, pDelWins, index)) { + taosArrayRemove(pDelWins, index); + delSize = taosArrayGetSize(pDelWins); + } + } + + void* chIds = taosHashGet(pPullDataMap, pWinKey, sizeof(SWinKey)); + STimeWindow win = { + .skey = pWinKey->ts, + .ekey = taosTimeAdd(win.skey, pInterval->interval, pInterval->intervalUnit, pInterval->precision) - 1, + }; + if (isCloseWindow(&win, pTwSup)) { + if (chIds && pPullDataMap) { + SArray* chAy = *(SArray**)chIds; + int32_t size = taosArrayGetSize(chAy); + qDebug("===stream===window %" PRId64 " wait child size:%d", pWinKey->ts, size); + for (int32_t i = 0; i < size; i++) { + qDebug("===stream===window %" PRId64 " wait child id:%d", pWinKey->ts, *(int32_t*)taosArrayGet(chAy, i)); + } + continue; + } else if (pPullDataMap) { + qDebug("===stream===close window %" PRId64, pWinKey->ts); + } + + if (pTwSup->calTrigger == STREAM_TRIGGER_WINDOW_CLOSE) { + int32_t code = saveWinResult(pWinKey, *(SRowBuffPos**)pIte, closeWins); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + } + tSimpleHashIterateRemove(pHashMap, pWinKey, sizeof(SWinKey), &pIte, &iter); + } + } + return TSDB_CODE_SUCCESS; +} + +STimeWindow getFinalTimeWindow(int64_t ts, SInterval* pInterval) { + STimeWindow w = {.skey = ts, .ekey = INT64_MAX}; + w.ekey = taosTimeAdd(w.skey, pInterval->interval, pInterval->intervalUnit, pInterval->precision) - 1; + return w; +} + +static void doBuildDeleteResult(SStreamIntervalOperatorInfo* pInfo, SArray* pWins, int32_t* index, + SSDataBlock* pBlock) { + blockDataCleanup(pBlock); + int32_t size = taosArrayGetSize(pWins); + if (*index == size) { + *index = 0; + taosArrayClear(pWins); + return; + } + blockDataEnsureCapacity(pBlock, size - *index); + uint64_t uid = 0; + for (int32_t i = *index; i < size; i++) { + SWinKey* pWin = taosArrayGet(pWins, i); + void* tbname = NULL; + pInfo->statestore.streamStateGetParName(pInfo->pState, pWin->groupId, &tbname); + if (tbname == NULL) { + appendOneRowToStreamSpecialBlock(pBlock, &pWin->ts, &pWin->ts, &uid, &pWin->groupId, NULL); + } else { + char parTbName[VARSTR_HEADER_SIZE + TSDB_TABLE_NAME_LEN]; + STR_WITH_MAXSIZE_TO_VARSTR(parTbName, tbname, sizeof(parTbName)); + appendOneRowToStreamSpecialBlock(pBlock, &pWin->ts, &pWin->ts, &uid, &pWin->groupId, parTbName); + } + pInfo->statestore.streamStateFreeVal(tbname); + (*index)++; + } +} + +void destroyStreamFinalIntervalOperatorInfo(void* param) { + SStreamIntervalOperatorInfo* pInfo = (SStreamIntervalOperatorInfo*)param; + cleanupBasicInfo(&pInfo->binfo); + cleanupAggSup(&pInfo->aggSup); + // it should be empty. + void* pIte = NULL; + while ((pIte = taosHashIterate(pInfo->pPullDataMap, pIte)) != NULL) { + taosArrayDestroy(*(void**)pIte); + } + taosHashCleanup(pInfo->pPullDataMap); + taosHashCleanup(pInfo->pFinalPullDataMap); + taosArrayDestroy(pInfo->pPullWins); + blockDataDestroy(pInfo->pPullDataRes); + taosArrayDestroy(pInfo->pDelWins); + blockDataDestroy(pInfo->pDelRes); + pInfo->statestore.streamFileStateDestroy(pInfo->pState->pFileState); + taosMemoryFreeClear(pInfo->pState); + + nodesDestroyNode((SNode*)pInfo->pPhyNode); + colDataDestroy(&pInfo->twAggSup.timeWindowData); + pInfo->groupResInfo.pRows = taosArrayDestroy(pInfo->groupResInfo.pRows); + cleanupExprSupp(&pInfo->scalarSupp); + tSimpleHashCleanup(pInfo->pUpdatedMap); + pInfo->pUpdatedMap = NULL; + pInfo->pUpdated = taosArrayDestroy(pInfo->pUpdated); + + taosMemoryFreeClear(param); +} + +static bool allInvertible(SqlFunctionCtx* pFCtx, int32_t numOfCols) { + for (int32_t i = 0; i < numOfCols; i++) { + if (fmIsUserDefinedFunc(pFCtx[i].functionId) || !fmIsInvertible(pFCtx[i].functionId)) { + return false; + } + } + return true; +} + +void initIntervalDownStream(SOperatorInfo* downstream, uint16_t type, SStreamIntervalOperatorInfo* pInfo) { + SStateStore* pAPI = &downstream->pTaskInfo->storageAPI.stateStore; + + if (downstream->operatorType != QUERY_NODE_PHYSICAL_PLAN_STREAM_SCAN) { + initIntervalDownStream(downstream->pDownstream[0], type, pInfo); + return; + } + + SStreamScanInfo* pScanInfo = downstream->info; + pScanInfo->windowSup.parentType = type; + pScanInfo->windowSup.pIntervalAggSup = &pInfo->aggSup; + if (!pScanInfo->pUpdateInfo) { + pScanInfo->pUpdateInfo = pAPI->updateInfoInitP(&pInfo->interval, pInfo->twAggSup.waterMark); + } + + pScanInfo->interval = pInfo->interval; + pScanInfo->twAggSup = pInfo->twAggSup; + pScanInfo->pState = pInfo->pState; +} + +void compactFunctions(SqlFunctionCtx* pDestCtx, SqlFunctionCtx* pSourceCtx, int32_t numOfOutput, + SExecTaskInfo* pTaskInfo, SColumnInfoData* pTimeWindowData) { + for (int32_t k = 0; k < numOfOutput; ++k) { + if (fmIsWindowPseudoColumnFunc(pDestCtx[k].functionId)) { + if (!pTimeWindowData) { + continue; + } + + SResultRowEntryInfo* pEntryInfo = GET_RES_INFO(&pDestCtx[k]); + char* p = GET_ROWCELL_INTERBUF(pEntryInfo); + SColumnInfoData idata = {0}; + idata.info.type = TSDB_DATA_TYPE_BIGINT; + idata.info.bytes = tDataTypes[TSDB_DATA_TYPE_BIGINT].bytes; + idata.pData = p; + + SScalarParam out = {.columnData = &idata}; + SScalarParam tw = {.numOfRows = 5, .columnData = pTimeWindowData}; + pDestCtx[k].sfp.process(&tw, 1, &out); + pEntryInfo->numOfRes = 1; + } else if (functionNeedToExecute(&pDestCtx[k]) && pDestCtx[k].fpSet.combine != NULL) { + int32_t code = pDestCtx[k].fpSet.combine(&pDestCtx[k], &pSourceCtx[k]); + if (code != TSDB_CODE_SUCCESS) { + qError("%s apply combine functions error, code: %s", GET_TASKID(pTaskInfo), tstrerror(code)); + } + } else if (pDestCtx[k].fpSet.combine == NULL) { + char* funName = fmGetFuncName(pDestCtx[k].functionId); + qError("%s error, combine funcion for %s is not implemented", GET_TASKID(pTaskInfo), funName); + taosMemoryFreeClear(funName); + } + } +} + +bool hasIntervalWindow(void* pState, SWinKey* pKey, SStateStore* pStore) { + return pStore->streamStateCheck(pState, pKey); +} + +int32_t setIntervalOutputBuf(void* pState, STimeWindow* win, SRowBuffPos** pResult, int64_t groupId, + SqlFunctionCtx* pCtx, int32_t numOfOutput, int32_t* rowEntryInfoOffset, + SAggSupporter* pAggSup, SStateStore* pStore) { + SWinKey key = {.ts = win->skey, .groupId = groupId}; + char* value = NULL; + int32_t size = pAggSup->resultRowSize; + + if (pStore->streamStateAddIfNotExist(pState, &key, (void**)&value, &size) < 0) { + return TSDB_CODE_OUT_OF_MEMORY; + } + + *pResult = (SRowBuffPos*)value; + SResultRow* res = (SResultRow*)((*pResult)->pRowBuff); + + // set time window for current result + res->win = (*win); + setResultRowInitCtx(res, pCtx, numOfOutput, rowEntryInfoOffset); + return TSDB_CODE_SUCCESS; +} + +bool isDeletedStreamWindow(STimeWindow* pWin, uint64_t groupId, void* pState, STimeWindowAggSupp* pTwSup, + SStateStore* pStore) { + if (pTwSup->maxTs != INT64_MIN && pWin->ekey < pTwSup->maxTs - pTwSup->deleteMark) { + SWinKey key = {.ts = pWin->skey, .groupId = groupId}; + if (!hasIntervalWindow(pState, &key, pStore)) { + return true; + } + return false; + } + return false; +} + +int32_t getNexWindowPos(SInterval* pInterval, SDataBlockInfo* pBlockInfo, TSKEY* tsCols, int32_t startPos, TSKEY eKey, + STimeWindow* pNextWin) { + int32_t forwardRows = + getNumOfRowsInTimeWindow(pBlockInfo, tsCols, startPos, eKey, binarySearchForKey, NULL, TSDB_ORDER_ASC); + int32_t prevEndPos = forwardRows - 1 + startPos; + return getNextQualifiedWindow(pInterval, pNextWin, pBlockInfo, tsCols, prevEndPos, TSDB_ORDER_ASC); +} + +void addPullWindow(SHashObj* pMap, SWinKey* pWinRes, int32_t size) { + SArray* childIds = taosArrayInit(8, sizeof(int32_t)); + for (int32_t i = 0; i < size; i++) { + taosArrayPush(childIds, &i); + } + taosHashPut(pMap, pWinRes, sizeof(SWinKey), &childIds, sizeof(void*)); +} + +static void clearStreamIntervalOperator(SStreamIntervalOperatorInfo* pInfo) { + tSimpleHashClear(pInfo->aggSup.pResultRowHashTable); + clearDiskbasedBuf(pInfo->aggSup.pResultBuf); + initResultRowInfo(&pInfo->binfo.resultRowInfo); + pInfo->aggSup.currentPageId = -1; + pInfo->statestore.streamStateClear(pInfo->pState); +} + +static void clearSpecialDataBlock(SSDataBlock* pBlock) { + if (pBlock->info.rows <= 0) { + return; + } + blockDataCleanup(pBlock); +} + +static void doBuildPullDataBlock(SArray* array, int32_t* pIndex, SSDataBlock* pBlock) { + clearSpecialDataBlock(pBlock); + int32_t size = taosArrayGetSize(array); + if (size - (*pIndex) == 0) { + return; + } + blockDataEnsureCapacity(pBlock, size - (*pIndex)); + SColumnInfoData* pStartTs = (SColumnInfoData*)taosArrayGet(pBlock->pDataBlock, START_TS_COLUMN_INDEX); + SColumnInfoData* pEndTs = (SColumnInfoData*)taosArrayGet(pBlock->pDataBlock, END_TS_COLUMN_INDEX); + SColumnInfoData* pGroupId = (SColumnInfoData*)taosArrayGet(pBlock->pDataBlock, GROUPID_COLUMN_INDEX); + SColumnInfoData* pCalStartTs = (SColumnInfoData*)taosArrayGet(pBlock->pDataBlock, CALCULATE_START_TS_COLUMN_INDEX); + SColumnInfoData* pCalEndTs = (SColumnInfoData*)taosArrayGet(pBlock->pDataBlock, CALCULATE_END_TS_COLUMN_INDEX); + for (; (*pIndex) < size; (*pIndex)++) { + SPullWindowInfo* pWin = taosArrayGet(array, (*pIndex)); + colDataSetVal(pStartTs, pBlock->info.rows, (const char*)&pWin->window.skey, false); + colDataSetVal(pEndTs, pBlock->info.rows, (const char*)&pWin->window.ekey, false); + colDataSetVal(pGroupId, pBlock->info.rows, (const char*)&pWin->groupId, false); + colDataSetVal(pCalStartTs, pBlock->info.rows, (const char*)&pWin->calWin.skey, false); + colDataSetVal(pCalEndTs, pBlock->info.rows, (const char*)&pWin->calWin.ekey, false); + pBlock->info.rows++; + } + if ((*pIndex) == size) { + *pIndex = 0; + taosArrayClear(array); + } + blockDataUpdateTsWindow(pBlock, 0); +} + +void processPullOver(SSDataBlock* pBlock, SHashObj* pMap, SHashObj* pFinalMap, SInterval* pInterval, SArray* pPullWins, + int32_t numOfCh, SOperatorInfo* pOperator) { + SColumnInfoData* pStartCol = taosArrayGet(pBlock->pDataBlock, CALCULATE_START_TS_COLUMN_INDEX); + TSKEY* tsData = (TSKEY*)pStartCol->pData; + SColumnInfoData* pEndCol = taosArrayGet(pBlock->pDataBlock, CALCULATE_END_TS_COLUMN_INDEX); + TSKEY* tsEndData = (TSKEY*)pEndCol->pData; + SColumnInfoData* pGroupCol = taosArrayGet(pBlock->pDataBlock, GROUPID_COLUMN_INDEX); + uint64_t* groupIdData = (uint64_t*)pGroupCol->pData; + int32_t chId = getChildIndex(pBlock); + for (int32_t i = 0; i < pBlock->info.rows; i++) { + TSKEY winTs = tsData[i]; + while (winTs <= tsEndData[i]) { + SWinKey winRes = {.ts = winTs, .groupId = groupIdData[i]}; + void* chIds = taosHashGet(pMap, &winRes, sizeof(SWinKey)); + if (chIds) { + SArray* chArray = *(SArray**)chIds; + int32_t index = taosArraySearchIdx(chArray, &chId, compareInt32Val, TD_EQ); + if (index != -1) { + qDebug("===stream===retrive window %" PRId64 " delete child id %d", winRes.ts, chId); + taosArrayRemove(chArray, index); + if (taosArrayGetSize(chArray) == 0) { + // pull data is over + taosArrayDestroy(chArray); + taosHashRemove(pMap, &winRes, sizeof(SWinKey)); + qDebug("===stream===retrive pull data over.window %" PRId64, winRes.ts); + + void* pFinalCh = taosHashGet(pFinalMap, &winRes, sizeof(SWinKey)); + if (pFinalCh) { + taosHashRemove(pFinalMap, &winRes, sizeof(SWinKey)); + doDeleteWindow(pOperator, winRes.ts, winRes.groupId); + STimeWindow nextWin = getFinalTimeWindow(winRes.ts, pInterval); + SPullWindowInfo pull = {.window = nextWin, + .groupId = winRes.groupId, + .calWin.skey = nextWin.skey, + .calWin.ekey = nextWin.skey}; + // add pull data request + if (savePullWindow(&pull, pPullWins) == TSDB_CODE_SUCCESS) { + addPullWindow(pMap, &winRes, numOfCh); + qDebug("===stream===prepare final retrive for delete %" PRId64 ", size:%d", winRes.ts, numOfCh); + } + } + } + } + } + winTs = taosTimeAdd(winTs, pInterval->sliding, pInterval->slidingUnit, pInterval->precision); + } + } +} + +static void addRetriveWindow(SArray* wins, SStreamIntervalOperatorInfo* pInfo, int32_t childId) { + int32_t size = taosArrayGetSize(wins); + for (int32_t i = 0; i < size; i++) { + SWinKey* winKey = taosArrayGet(wins, i); + STimeWindow nextWin = getFinalTimeWindow(winKey->ts, &pInfo->interval); + if (isOverdue(nextWin.ekey, &pInfo->twAggSup) && pInfo->ignoreExpiredData) { + continue; + } + void* chIds = taosHashGet(pInfo->pPullDataMap, winKey, sizeof(SWinKey)); + if (!chIds) { + SPullWindowInfo pull = { + .window = nextWin, .groupId = winKey->groupId, .calWin.skey = nextWin.skey, .calWin.ekey = nextWin.skey}; + // add pull data request + if (savePullWindow(&pull, pInfo->pPullWins) == TSDB_CODE_SUCCESS) { + addPullWindow(pInfo->pPullDataMap, winKey, pInfo->numOfChild); + qDebug("===stream===prepare retrive for delete %" PRId64 ", size:%d", winKey->ts, pInfo->numOfChild); + } + } else { + SArray* chArray = *(void**)chIds; + int32_t index = taosArraySearchIdx(chArray, &childId, compareInt32Val, TD_EQ); + qDebug("===stream===check final retrive %" PRId64 ",chid:%d", winKey->ts, index); + if (index == -1) { + qDebug("===stream===add final retrive %" PRId64, winKey->ts); + taosHashPut(pInfo->pFinalPullDataMap, winKey, sizeof(SWinKey), NULL, 0); + } + } + } +} + +static void clearFunctionContext(SExprSupp* pSup) { + for (int32_t i = 0; i < pSup->numOfExprs; i++) { + pSup->pCtx[i].saveHandle.currentPage = -1; + } +} + +int32_t getOutputBuf(void* pState, SRowBuffPos* pPos, SResultRow** pResult, SStateStore* pStore) { + return pStore->streamStateGetByPos(pState, pPos, (void**)pResult); +} + +int32_t buildDataBlockFromGroupRes(SOperatorInfo* pOperator, void* pState, SSDataBlock* pBlock, SExprSupp* pSup, + SGroupResInfo* pGroupResInfo) { + SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo; + SStorageAPI* pAPI = &pOperator->pTaskInfo->storageAPI; + + SExprInfo* pExprInfo = pSup->pExprInfo; + int32_t numOfExprs = pSup->numOfExprs; + int32_t* rowEntryOffset = pSup->rowEntryInfoOffset; + SqlFunctionCtx* pCtx = pSup->pCtx; + + int32_t numOfRows = getNumOfTotalRes(pGroupResInfo); + + for (int32_t i = pGroupResInfo->index; i < numOfRows; i += 1) { + SRowBuffPos* pPos = *(SRowBuffPos**)taosArrayGet(pGroupResInfo->pRows, i); + SResultRow* pRow = NULL; + int32_t code = getOutputBuf(pState, pPos, &pRow, &pAPI->stateStore); + uint64_t groupId = ((SWinKey*)pPos->pKey)->groupId; + ASSERT(code == 0); + doUpdateNumOfRows(pCtx, pRow, numOfExprs, rowEntryOffset); + // no results, continue to check the next one + if (pRow->numOfRows == 0) { + pGroupResInfo->index += 1; + continue; + } + if (pBlock->info.id.groupId == 0) { + pBlock->info.id.groupId = groupId; + void* tbname = NULL; + if (pAPI->stateStore.streamStateGetParName(pTaskInfo->streamInfo.pState, pBlock->info.id.groupId, &tbname) < 0) { + pBlock->info.parTbName[0] = 0; + } else { + memcpy(pBlock->info.parTbName, tbname, TSDB_TABLE_NAME_LEN); + } + pAPI->stateStore.streamStateFreeVal(tbname); + } else { + // current value belongs to different group, it can't be packed into one datablock + if (pBlock->info.id.groupId != groupId) { + break; + } + } + + if (pBlock->info.rows + pRow->numOfRows > pBlock->info.capacity) { + ASSERT(pBlock->info.rows > 0); + break; + } + pGroupResInfo->index += 1; + + for (int32_t j = 0; j < numOfExprs; ++j) { + int32_t slotId = pExprInfo[j].base.resSchema.slotId; + + pCtx[j].resultInfo = getResultEntryInfo(pRow, j, rowEntryOffset); + SResultRowEntryInfo* pEnryInfo = pCtx[j].resultInfo; + + if (pCtx[j].fpSet.finalize) { + int32_t code1 = pCtx[j].fpSet.finalize(&pCtx[j], pBlock); + if (TAOS_FAILED(code1)) { + qError("%s build result data block error, code %s", GET_TASKID(pTaskInfo), tstrerror(code1)); + T_LONG_JMP(pTaskInfo->env, code1); + } + } else if (strcmp(pCtx[j].pExpr->pExpr->_function.functionName, "_select_value") == 0) { + // do nothing, todo refactor + } else { + // expand the result into multiple rows. E.g., _wstart, top(k, 20) + // the _wstart needs to copy to 20 following rows, since the results of top-k expands to 20 different rows. + SColumnInfoData* pColInfoData = taosArrayGet(pBlock->pDataBlock, slotId); + char* in = GET_ROWCELL_INTERBUF(pCtx[j].resultInfo); + for (int32_t k = 0; k < pRow->numOfRows; ++k) { + colDataSetVal(pColInfoData, pBlock->info.rows + k, in, pCtx[j].resultInfo->isNullRes); + } + } + } + + pBlock->info.rows += pRow->numOfRows; + } + + pBlock->info.dataLoad = 1; + blockDataUpdateTsWindow(pBlock, 0); + return TSDB_CODE_SUCCESS; +} + +void doBuildStreamIntervalResult(SOperatorInfo* pOperator, void* pState, SSDataBlock* pBlock, + SGroupResInfo* pGroupResInfo) { + SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo; + // set output datablock version + pBlock->info.version = pTaskInfo->version; + + blockDataCleanup(pBlock); + if (!hasRemainResults(pGroupResInfo)) { + return; + } + + // clear the existed group id + pBlock->info.id.groupId = 0; + buildDataBlockFromGroupRes(pOperator, pState, pBlock, &pOperator->exprSupp, pGroupResInfo); +} + +static int32_t getNextQualifiedFinalWindow(SInterval* pInterval, STimeWindow* pNext, SDataBlockInfo* pDataBlockInfo, + TSKEY* primaryKeys, int32_t prevPosition) { + int32_t startPos = prevPosition + 1; + if (startPos == pDataBlockInfo->rows) { + startPos = -1; + } else { + *pNext = getFinalTimeWindow(primaryKeys[startPos], pInterval); + } + return startPos; +} + +static void setStreamDataVersion(SExecTaskInfo* pTaskInfo, int64_t version, int64_t ckId) { + pTaskInfo->streamInfo.dataVersion = version; + pTaskInfo->streamInfo.checkPointId = ckId; +} + +static void doStreamIntervalAggImpl(SOperatorInfo* pOperator, SSDataBlock* pSDataBlock, uint64_t groupId, + SSHashObj* pUpdatedMap) { + SStreamIntervalOperatorInfo* pInfo = (SStreamIntervalOperatorInfo*)pOperator->info; + pInfo->dataVersion = TMAX(pInfo->dataVersion, pSDataBlock->info.version); + + SResultRowInfo* pResultRowInfo = &(pInfo->binfo.resultRowInfo); + SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo; + SExprSupp* pSup = &pOperator->exprSupp; + int32_t numOfOutput = pSup->numOfExprs; + int32_t step = 1; + TSKEY* tsCols = NULL; + SRowBuffPos* pResPos = NULL; + SResultRow* pResult = NULL; + int32_t forwardRows = 0; + + SColumnInfoData* pColDataInfo = taosArrayGet(pSDataBlock->pDataBlock, pInfo->primaryTsIndex); + tsCols = (int64_t*)pColDataInfo->pData; + + int32_t startPos = 0; + TSKEY ts = getStartTsKey(&pSDataBlock->info.window, tsCols); + STimeWindow nextWin = {0}; + if (IS_FINAL_INTERVAL_OP(pOperator)) { + nextWin = getFinalTimeWindow(ts, &pInfo->interval); + } else { + nextWin = getActiveTimeWindow(pInfo->aggSup.pResultBuf, pResultRowInfo, ts, &pInfo->interval, TSDB_ORDER_ASC); + } + while (1) { + bool isClosed = isCloseWindow(&nextWin, &pInfo->twAggSup); + if ((pInfo->ignoreExpiredData && isClosed && !IS_FINAL_INTERVAL_OP(pOperator)) || + !inSlidingWindow(&pInfo->interval, &nextWin, &pSDataBlock->info)) { + startPos = getNexWindowPos(&pInfo->interval, &pSDataBlock->info, tsCols, startPos, nextWin.ekey, &nextWin); + if (startPos < 0) { + break; + } + continue; + } + + if (IS_FINAL_INTERVAL_OP(pOperator) && pInfo->numOfChild > 0) { + bool ignore = true; + SWinKey winRes = { + .ts = nextWin.skey, + .groupId = groupId, + }; + void* chIds = taosHashGet(pInfo->pPullDataMap, &winRes, sizeof(SWinKey)); + if (isDeletedStreamWindow(&nextWin, groupId, pInfo->pState, &pInfo->twAggSup, &pInfo->statestore) && isClosed && + !chIds) { + SPullWindowInfo pull = { + .window = nextWin, .groupId = groupId, .calWin.skey = nextWin.skey, .calWin.ekey = nextWin.skey}; + // add pull data request + if (savePullWindow(&pull, pInfo->pPullWins) == TSDB_CODE_SUCCESS) { + addPullWindow(pInfo->pPullDataMap, &winRes, pInfo->numOfChild); + } + } else { + int32_t index = -1; + SArray* chArray = NULL; + int32_t chId = 0; + if (chIds) { + chArray = *(void**)chIds; + chId = getChildIndex(pSDataBlock); + index = taosArraySearchIdx(chArray, &chId, compareInt32Val, TD_EQ); + } + if (index == -1 || pSDataBlock->info.type == STREAM_PULL_DATA) { + ignore = false; + } + } + + if (ignore) { + startPos = getNextQualifiedFinalWindow(&pInfo->interval, &nextWin, &pSDataBlock->info, tsCols, startPos); + if (startPos < 0) { + break; + } + continue; + } + } + + int32_t code = setIntervalOutputBuf(pInfo->pState, &nextWin, &pResPos, groupId, pSup->pCtx, numOfOutput, + pSup->rowEntryInfoOffset, &pInfo->aggSup, &pInfo->statestore); + pResult = (SResultRow*)pResPos->pRowBuff; + if (code != TSDB_CODE_SUCCESS || pResult == NULL) { + T_LONG_JMP(pTaskInfo->env, TSDB_CODE_OUT_OF_MEMORY); + } + if (IS_FINAL_INTERVAL_OP(pOperator)) { + forwardRows = 1; + } else { + forwardRows = getNumOfRowsInTimeWindow(&pSDataBlock->info, tsCols, startPos, nextWin.ekey, binarySearchForKey, + NULL, TSDB_ORDER_ASC); + } + + SWinKey key = { + .ts = pResult->win.skey, + .groupId = groupId, + }; + if (pInfo->twAggSup.calTrigger == STREAM_TRIGGER_AT_ONCE && pUpdatedMap) { + saveWinResult(&key, pResPos, pUpdatedMap); + } + + if (pInfo->twAggSup.calTrigger == STREAM_TRIGGER_WINDOW_CLOSE) { + tSimpleHashPut(pInfo->aggSup.pResultRowHashTable, &key, sizeof(SWinKey), &pResPos, POINTER_BYTES); + } + + updateTimeWindowInfo(&pInfo->twAggSup.timeWindowData, &nextWin, 1); + applyAggFunctionOnPartialTuples(pTaskInfo, pSup->pCtx, &pInfo->twAggSup.timeWindowData, startPos, forwardRows, + pSDataBlock->info.rows, numOfOutput); + key.ts = nextWin.skey; + + if (pInfo->delKey.ts > key.ts) { + pInfo->delKey = key; + } + int32_t prevEndPos = (forwardRows - 1) * step + startPos; + if (pSDataBlock->info.window.skey <= 0 || pSDataBlock->info.window.ekey <= 0) { + qError("table uid %" PRIu64 " data block timestamp range may not be calculated! minKey %" PRId64 + ",maxKey %" PRId64, + pSDataBlock->info.id.uid, pSDataBlock->info.window.skey, pSDataBlock->info.window.ekey); + blockDataUpdateTsWindow(pSDataBlock, 0); + + // timestamp of the data is incorrect + if (pSDataBlock->info.window.skey <= 0 || pSDataBlock->info.window.ekey <= 0) { + qError("table uid %" PRIu64 " data block timestamp is out of range! minKey %" PRId64 ",maxKey %" PRId64, + pSDataBlock->info.id.uid, pSDataBlock->info.window.skey, pSDataBlock->info.window.ekey); + } + } + + if (IS_FINAL_INTERVAL_OP(pOperator)) { + startPos = getNextQualifiedFinalWindow(&pInfo->interval, &nextWin, &pSDataBlock->info, tsCols, prevEndPos); + } else { + startPos = + getNextQualifiedWindow(&pInfo->interval, &nextWin, &pSDataBlock->info, tsCols, prevEndPos, TSDB_ORDER_ASC); + } + if (startPos < 0) { + break; + } + } +} + +static inline int winPosCmprImpl(const void* pKey1, const void* pKey2) { + SRowBuffPos* pos1 = *(SRowBuffPos**)pKey1; + SRowBuffPos* pos2 = *(SRowBuffPos**)pKey2; + SWinKey* pWin1 = (SWinKey*)pos1->pKey; + SWinKey* pWin2 = (SWinKey*)pos2->pKey; + + if (pWin1->groupId > pWin2->groupId) { + return 1; + } else if (pWin1->groupId < pWin2->groupId) { + return -1; + } + + if (pWin1->ts > pWin2->ts) { + return 1; + } else if (pWin1->ts < pWin2->ts) { + return -1; + } + + return 0; +} + +static void resetUnCloseWinInfo(SSHashObj* winMap) { + void* pIte = NULL; + int32_t iter = 0; + while ((pIte = tSimpleHashIterate(winMap, pIte, &iter)) != NULL) { + SRowBuffPos* pPos = *(SRowBuffPos**)pIte; + pPos->beUsed = true; + } +} + + +static SSDataBlock* buildIntervalResult(SOperatorInfo* pOperator) { + SStreamIntervalOperatorInfo* pInfo = pOperator->info; + SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo; + uint16_t opType = pOperator->operatorType; + if (IS_FINAL_INTERVAL_OP(pOperator)) { + doBuildPullDataBlock(pInfo->pPullWins, &pInfo->pullIndex, pInfo->pPullDataRes); + if (pInfo->pPullDataRes->info.rows != 0) { + // process the rest of the data + printDataBlock(pInfo->pPullDataRes, getStreamOpName(opType), GET_TASKID(pTaskInfo)); + return pInfo->pPullDataRes; + } + } + + doBuildDeleteResult(pInfo, pInfo->pDelWins, &pInfo->delIndex, pInfo->pDelRes); + if (pInfo->pDelRes->info.rows != 0) { + // process the rest of the data + printDataBlock(pInfo->pDelRes, getStreamOpName(opType), GET_TASKID(pTaskInfo)); + return pInfo->pDelRes; + } + + doBuildStreamIntervalResult(pOperator, pInfo->pState, pInfo->binfo.pRes, &pInfo->groupResInfo); + if (pInfo->binfo.pRes->info.rows != 0) { + printDataBlock(pInfo->binfo.pRes, getStreamOpName(opType), GET_TASKID(pTaskInfo)); + return pInfo->binfo.pRes; + } + return NULL; +} + +static SSDataBlock* doStreamFinalIntervalAgg(SOperatorInfo* pOperator) { + SStreamIntervalOperatorInfo* pInfo = pOperator->info; + SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo; + SStorageAPI* pAPI = &pOperator->pTaskInfo->storageAPI; + + SOperatorInfo* downstream = pOperator->pDownstream[0]; + SExprSupp* pSup = &pOperator->exprSupp; + + qDebug("stask:%s %s status: %d", GET_TASKID(pTaskInfo), getStreamOpName(pOperator->operatorType), pOperator->status); + + if (pOperator->status == OP_EXEC_DONE) { + return NULL; + } else if (pOperator->status == OP_RES_TO_RETURN) { + SSDataBlock* resBlock = buildIntervalResult(pOperator); + if (resBlock != NULL) { + return resBlock; + } + + if (pInfo->recvGetAll) { + pInfo->recvGetAll = false; + resetUnCloseWinInfo(pInfo->aggSup.pResultRowHashTable); + } + + setOperatorCompleted(pOperator); + if (!IS_FINAL_INTERVAL_OP(pOperator)) { + clearFunctionContext(&pOperator->exprSupp); + // semi interval operator clear disk buffer + clearStreamIntervalOperator(pInfo); + setStreamDataVersion(pTaskInfo, pInfo->dataVersion, pInfo->pState->checkPointId); + qDebug("stask:%s ===stream===%s clear", GET_TASKID(pTaskInfo), getStreamOpName(pOperator->operatorType)); + } else { + if (pInfo->twAggSup.maxTs > 0 && + pInfo->twAggSup.maxTs - pInfo->twAggSup.checkPointInterval > pInfo->twAggSup.checkPointTs) { + pAPI->stateStore.streamStateCommit(pInfo->pState); + pAPI->stateStore.streamStateDeleteCheckPoint(pInfo->pState, pInfo->twAggSup.maxTs - pInfo->twAggSup.deleteMark); + pInfo->twAggSup.checkPointTs = pInfo->twAggSup.maxTs; + } + qDebug("stask:%s ===stream===%s close", GET_TASKID(pTaskInfo), getStreamOpName(pOperator->operatorType)); + } + return NULL; + } else { + if (!IS_FINAL_INTERVAL_OP(pOperator)) { + doBuildDeleteResult(pInfo, pInfo->pDelWins, &pInfo->delIndex, pInfo->pDelRes); + if (pInfo->pDelRes->info.rows != 0) { + // process the rest of the data + printDataBlock(pInfo->pDelRes, getStreamOpName(pOperator->operatorType), GET_TASKID(pTaskInfo)); + return pInfo->pDelRes; + } + } + } + + if (!pInfo->pUpdated) { + pInfo->pUpdated = taosArrayInit(4096, POINTER_BYTES); + } + if (!pInfo->pUpdatedMap) { + _hash_fn_t hashFn = taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY); + pInfo->pUpdatedMap = tSimpleHashInit(4096, hashFn); + } + + while (1) { + if (isTaskKilled(pTaskInfo)) { + if (pInfo->pUpdated != NULL) { + pInfo->pUpdated = taosArrayDestroy(pInfo->pUpdated); + } + + if (pInfo->pUpdatedMap != NULL) { + tSimpleHashCleanup(pInfo->pUpdatedMap); + pInfo->pUpdatedMap = NULL; + } + + T_LONG_JMP(pTaskInfo->env, pTaskInfo->code); + } + + SSDataBlock* pBlock = downstream->fpSet.getNextFn(downstream); + if (pBlock == NULL) { + pOperator->status = OP_RES_TO_RETURN; + qDebug("===stream===return data:%s. recv datablock num:%" PRIu64, getStreamOpName(pOperator->operatorType), + pInfo->numOfDatapack); + pInfo->numOfDatapack = 0; + break; + } + pInfo->numOfDatapack++; + printSpecDataBlock(pBlock, getStreamOpName(pOperator->operatorType), "recv", GET_TASKID(pTaskInfo)); + + if (pBlock->info.type == STREAM_NORMAL || pBlock->info.type == STREAM_PULL_DATA) { + pInfo->binfo.pRes->info.type = pBlock->info.type; + } else if (pBlock->info.type == STREAM_DELETE_DATA || pBlock->info.type == STREAM_DELETE_RESULT || + pBlock->info.type == STREAM_CLEAR) { + SArray* delWins = taosArrayInit(8, sizeof(SWinKey)); + doDeleteWindows(pOperator, &pInfo->interval, pBlock, delWins, pInfo->pUpdatedMap); + if (IS_FINAL_INTERVAL_OP(pOperator)) { + int32_t chId = getChildIndex(pBlock); + addRetriveWindow(delWins, pInfo, chId); + if (pBlock->info.type != STREAM_CLEAR) { + taosArrayAddAll(pInfo->pDelWins, delWins); + } + taosArrayDestroy(delWins); + continue; + } + removeResults(delWins, pInfo->pUpdatedMap); + taosArrayAddAll(pInfo->pDelWins, delWins); + taosArrayDestroy(delWins); + + doBuildDeleteResult(pInfo, pInfo->pDelWins, &pInfo->delIndex, pInfo->pDelRes); + if (pInfo->pDelRes->info.rows != 0) { + // process the rest of the data + printDataBlock(pInfo->pDelRes, getStreamOpName(pOperator->operatorType), GET_TASKID(pTaskInfo)); + if (pBlock->info.type == STREAM_CLEAR) { + pInfo->pDelRes->info.type = STREAM_CLEAR; + } else { + pInfo->pDelRes->info.type = STREAM_DELETE_RESULT; + } + return pInfo->pDelRes; + } + + break; + } else if (pBlock->info.type == STREAM_GET_ALL && IS_FINAL_INTERVAL_OP(pOperator)) { + pInfo->recvGetAll = true; + getAllIntervalWindow(pInfo->aggSup.pResultRowHashTable, pInfo->pUpdatedMap); + continue; + } else if (pBlock->info.type == STREAM_RETRIEVE && !IS_FINAL_INTERVAL_OP(pOperator)) { + doDeleteWindows(pOperator, &pInfo->interval, pBlock, NULL, pInfo->pUpdatedMap); + if (taosArrayGetSize(pInfo->pUpdated) > 0) { + break; + } + continue; + } else if (pBlock->info.type == STREAM_PULL_OVER && IS_FINAL_INTERVAL_OP(pOperator)) { + processPullOver(pBlock, pInfo->pPullDataMap, pInfo->pFinalPullDataMap, &pInfo->interval, pInfo->pPullWins, + pInfo->numOfChild, pOperator); + continue; + } else if (pBlock->info.type == STREAM_CREATE_CHILD_TABLE) { + return pBlock; + } else { + ASSERTS(pBlock->info.type == STREAM_INVALID, "invalid SSDataBlock type"); + } + + if (pInfo->scalarSupp.pExprInfo != NULL) { + SExprSupp* pExprSup = &pInfo->scalarSupp; + projectApplyFunctions(pExprSup->pExprInfo, pBlock, pBlock, pExprSup->pCtx, pExprSup->numOfExprs, NULL); + } + setInputDataBlock(pSup, pBlock, TSDB_ORDER_ASC, MAIN_SCAN, true); + doStreamIntervalAggImpl(pOperator, pBlock, pBlock->info.id.groupId, pInfo->pUpdatedMap); + pInfo->twAggSup.maxTs = TMAX(pInfo->twAggSup.maxTs, pBlock->info.window.ekey); + pInfo->twAggSup.maxTs = TMAX(pInfo->twAggSup.maxTs, pBlock->info.watermark); + pInfo->twAggSup.minTs = TMIN(pInfo->twAggSup.minTs, pBlock->info.window.skey); + } + + removeDeleteResults(pInfo->pUpdatedMap, pInfo->pDelWins); + if (IS_FINAL_INTERVAL_OP(pOperator)) { + closeStreamIntervalWindow(pInfo->aggSup.pResultRowHashTable, &pInfo->twAggSup, &pInfo->interval, + pInfo->pPullDataMap, pInfo->pUpdatedMap, pInfo->pDelWins, pOperator); + } + pInfo->binfo.pRes->info.watermark = pInfo->twAggSup.maxTs; + + void* pIte = NULL; + int32_t iter = 0; + while ((pIte = tSimpleHashIterate(pInfo->pUpdatedMap, pIte, &iter)) != NULL) { + taosArrayPush(pInfo->pUpdated, pIte); + } + + tSimpleHashCleanup(pInfo->pUpdatedMap); + pInfo->pUpdatedMap = NULL; + taosArraySort(pInfo->pUpdated, winPosCmprImpl); + + initMultiResInfoFromArrayList(&pInfo->groupResInfo, pInfo->pUpdated); + pInfo->pUpdated = NULL; + blockDataEnsureCapacity(pInfo->binfo.pRes, pOperator->resultInfo.capacity); + + return buildIntervalResult(pOperator); +} + +static int64_t getDeleteMark(SIntervalPhysiNode* pIntervalPhyNode) { + if (pIntervalPhyNode->window.deleteMark <= 0) { + return DEAULT_DELETE_MARK; + } + int64_t deleteMark = TMAX(pIntervalPhyNode->window.deleteMark, pIntervalPhyNode->window.watermark); + deleteMark = TMAX(deleteMark, pIntervalPhyNode->interval); + return deleteMark; +} + +static TSKEY compareTs(void* pKey) { + SWinKey* pWinKey = (SWinKey*)pKey; + return pWinKey->ts; +} + +static int32_t getSelectivityBufSize(SqlFunctionCtx* pCtx) { + if (pCtx->subsidiaries.rowLen == 0) { + int32_t rowLen = 0; + for (int32_t j = 0; j < pCtx->subsidiaries.num; ++j) { + SqlFunctionCtx* pc = pCtx->subsidiaries.pCtx[j]; + rowLen += pc->pExpr->base.resSchema.bytes; + } + + return rowLen + pCtx->subsidiaries.num * sizeof(bool); + } else { + return pCtx->subsidiaries.rowLen; + } +} + +static int32_t getMaxFunResSize(SExprSupp* pSup, int32_t numOfCols) { + int32_t size = 0; + for (int32_t i = 0; i < numOfCols; ++i) { + int32_t resSize = getSelectivityBufSize(pSup->pCtx + i); + size = TMAX(size, resSize); + } + return size; +} + +static void streamIntervalReleaseState(SOperatorInfo* pOperator) { + if (pOperator->operatorType != QUERY_NODE_PHYSICAL_PLAN_STREAM_SEMI_INTERVAL) { + SStreamIntervalOperatorInfo* pInfo = pOperator->info; + int32_t resSize = sizeof(TSKEY); + pInfo->statestore.streamStateSaveInfo(pInfo->pState, STREAM_INTERVAL_OP_STATE_NAME, + strlen(STREAM_INTERVAL_OP_STATE_NAME), &pInfo->twAggSup.maxTs, resSize); + } + SStreamIntervalOperatorInfo* pInfo = pOperator->info; + SStorageAPI* pAPI = &pOperator->pTaskInfo->storageAPI; + pAPI->stateStore.streamStateCommit(pInfo->pState); + SOperatorInfo* downstream = pOperator->pDownstream[0]; + if (downstream->fpSet.releaseStreamStateFn) { + downstream->fpSet.releaseStreamStateFn(downstream); + } +} + +void streamIntervalReloadState(SOperatorInfo* pOperator) { + if (pOperator->operatorType != QUERY_NODE_PHYSICAL_PLAN_STREAM_SEMI_INTERVAL) { + SStreamIntervalOperatorInfo* pInfo = pOperator->info; + int32_t size = 0; + void* pBuf = NULL; + int32_t code = pInfo->statestore.streamStateGetInfo(pInfo->pState, STREAM_INTERVAL_OP_STATE_NAME, + strlen(STREAM_INTERVAL_OP_STATE_NAME), &pBuf, &size); + TSKEY ts = *(TSKEY*)pBuf; + taosMemoryFree(pBuf); + pInfo->twAggSup.maxTs = TMAX(pInfo->twAggSup.maxTs, ts); + pInfo->statestore.streamStateReloadInfo(pInfo->pState, ts); + } + SOperatorInfo* downstream = pOperator->pDownstream[0]; + if (downstream->fpSet.reloadStreamStateFn) { + downstream->fpSet.reloadStreamStateFn(downstream); + } +} + +SOperatorInfo* createStreamFinalIntervalOperatorInfo(SOperatorInfo* downstream, SPhysiNode* pPhyNode, + SExecTaskInfo* pTaskInfo, int32_t numOfChild) { + SIntervalPhysiNode* pIntervalPhyNode = (SIntervalPhysiNode*)pPhyNode; + SStreamIntervalOperatorInfo* pInfo = taosMemoryCalloc(1, sizeof(SStreamIntervalOperatorInfo)); + SOperatorInfo* pOperator = taosMemoryCalloc(1, sizeof(SOperatorInfo)); + if (pInfo == NULL || pOperator == NULL) { + goto _error; + } + + pOperator->pTaskInfo = pTaskInfo; + SStorageAPI* pAPI = &pTaskInfo->storageAPI; + + pInfo->interval = (SInterval){.interval = pIntervalPhyNode->interval, + .sliding = pIntervalPhyNode->sliding, + .intervalUnit = pIntervalPhyNode->intervalUnit, + .slidingUnit = pIntervalPhyNode->slidingUnit, + .offset = pIntervalPhyNode->offset, + .precision = ((SColumnNode*)pIntervalPhyNode->window.pTspk)->node.resType.precision}; + pInfo->twAggSup = (STimeWindowAggSupp){ + .waterMark = pIntervalPhyNode->window.watermark, + .calTrigger = pIntervalPhyNode->window.triggerType, + .maxTs = INT64_MIN, + .minTs = INT64_MAX, + .deleteMark = getDeleteMark(pIntervalPhyNode), + .deleteMarkSaved = 0, + .calTriggerSaved = 0, + .checkPointTs = 0, + .checkPointInterval = + convertTimePrecision(tsCheckpointInterval, TSDB_TIME_PRECISION_MILLI, pInfo->interval.precision), + }; + ASSERTS(pInfo->twAggSup.calTrigger != STREAM_TRIGGER_MAX_DELAY, "trigger type should not be max delay"); + pInfo->primaryTsIndex = ((SColumnNode*)pIntervalPhyNode->window.pTspk)->slotId; + size_t keyBufSize = sizeof(int64_t) + sizeof(int64_t) + POINTER_BYTES; + initResultSizeInfo(&pOperator->resultInfo, 4096); + if (pIntervalPhyNode->window.pExprs != NULL) { + int32_t numOfScalar = 0; + SExprInfo* pScalarExprInfo = createExprInfo(pIntervalPhyNode->window.pExprs, NULL, &numOfScalar); + int32_t code = initExprSupp(&pInfo->scalarSupp, pScalarExprInfo, numOfScalar, &pTaskInfo->storageAPI.functionStore); + if (code != TSDB_CODE_SUCCESS) { + goto _error; + } + } + + int32_t numOfCols = 0; + SExprInfo* pExprInfo = createExprInfo(pIntervalPhyNode->window.pFuncs, NULL, &numOfCols); + SSDataBlock* pResBlock = createDataBlockFromDescNode(pPhyNode->pOutputDataBlockDesc); + initBasicInfo(&pInfo->binfo, pResBlock); + + pInfo->pState = taosMemoryCalloc(1, sizeof(SStreamState)); + *(pInfo->pState) = *(pTaskInfo->streamInfo.pState); + + pAPI->stateStore.streamStateSetNumber(pInfo->pState, -1); + int32_t code = initAggSup(&pOperator->exprSupp, &pInfo->aggSup, pExprInfo, numOfCols, keyBufSize, pTaskInfo->id.str, + pInfo->pState, &pTaskInfo->storageAPI.functionStore); + if (code != TSDB_CODE_SUCCESS) { + goto _error; + } + + initExecTimeWindowInfo(&pInfo->twAggSup.timeWindowData, &pTaskInfo->window); + initResultRowInfo(&pInfo->binfo.resultRowInfo); + + pInfo->numOfChild = numOfChild; + pInfo->pPhyNode = (SPhysiNode*)nodesCloneNode((SNode*)pPhyNode); + + pInfo->pPullWins = taosArrayInit(8, sizeof(SPullWindowInfo)); + pInfo->pullIndex = 0; + _hash_fn_t hashFn = taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY); + pInfo->pPullDataMap = taosHashInit(64, hashFn, false, HASH_NO_LOCK); + pInfo->pFinalPullDataMap = taosHashInit(64, hashFn, false, HASH_NO_LOCK); + pInfo->pPullDataRes = createSpecialDataBlock(STREAM_RETRIEVE); + pInfo->ignoreExpiredData = pIntervalPhyNode->window.igExpired; + pInfo->ignoreExpiredDataSaved = false; + pInfo->pDelRes = createSpecialDataBlock(STREAM_DELETE_RESULT); + pInfo->delIndex = 0; + pInfo->pDelWins = taosArrayInit(4, sizeof(SWinKey)); + pInfo->delKey.ts = INT64_MAX; + pInfo->delKey.groupId = 0; + pInfo->numOfDatapack = 0; + pInfo->pUpdated = NULL; + pInfo->pUpdatedMap = NULL; + int32_t funResSize = getMaxFunResSize(&pOperator->exprSupp, numOfCols); + pInfo->pState->pFileState = + pAPI->stateStore.streamFileStateInit(tsStreamBufferSize, sizeof(SWinKey), pInfo->aggSup.resultRowSize, funResSize, + compareTs, pInfo->pState, pInfo->twAggSup.deleteMark, GET_TASKID(pTaskInfo)); + pInfo->dataVersion = 0; + pInfo->statestore = pTaskInfo->storageAPI.stateStore; + pInfo->recvGetAll = false; + + pOperator->operatorType = pPhyNode->type; + if (!IS_FINAL_INTERVAL_OP(pOperator) || numOfChild == 0) { + pInfo->twAggSup.calTrigger = STREAM_TRIGGER_AT_ONCE; + } + pOperator->name = getStreamOpName(pOperator->operatorType); + pOperator->blocking = true; + pOperator->status = OP_NOT_OPENED; + pOperator->info = pInfo; + + pOperator->fpSet = createOperatorFpSet(NULL, doStreamFinalIntervalAgg, NULL, destroyStreamFinalIntervalOperatorInfo, + optrDefaultBufFn, NULL, optrDefaultGetNextExtFn, NULL); + setOperatorStreamStateFn(pOperator, streamIntervalReleaseState, streamIntervalReloadState); + if (pPhyNode->type == QUERY_NODE_PHYSICAL_PLAN_STREAM_SEMI_INTERVAL) { + initIntervalDownStream(downstream, pPhyNode->type, pInfo); + } + code = appendDownstream(pOperator, &downstream, 1); + if (code != TSDB_CODE_SUCCESS) { + goto _error; + } + + return pOperator; + +_error: + destroyStreamFinalIntervalOperatorInfo(pInfo); + taosMemoryFreeClear(pOperator); + pTaskInfo->code = code; + return NULL; +} + +void destroyStreamAggSupporter(SStreamAggSupporter* pSup) { + tSimpleHashCleanup(pSup->pResultRows); + destroyDiskbasedBuf(pSup->pResultBuf); + blockDataDestroy(pSup->pScanBlock); + taosMemoryFreeClear(pSup->pState); + taosMemoryFreeClear(pSup->pDummyCtx); +} + +void destroyStreamSessionAggOperatorInfo(void* param) { + SStreamSessionAggOperatorInfo* pInfo = (SStreamSessionAggOperatorInfo*)param; + cleanupBasicInfo(&pInfo->binfo); + destroyStreamAggSupporter(&pInfo->streamAggSup); + + if (pInfo->pChildren != NULL) { + int32_t size = taosArrayGetSize(pInfo->pChildren); + for (int32_t i = 0; i < size; i++) { + SOperatorInfo* pChild = taosArrayGetP(pInfo->pChildren, i); + destroyOperator(pChild); + } + taosArrayDestroy(pInfo->pChildren); + } + + colDataDestroy(&pInfo->twAggSup.timeWindowData); + blockDataDestroy(pInfo->pDelRes); + blockDataDestroy(pInfo->pWinBlock); + blockDataDestroy(pInfo->pUpdateRes); + tSimpleHashCleanup(pInfo->pStUpdated); + tSimpleHashCleanup(pInfo->pStDeleted); + + taosArrayDestroy(pInfo->historyWins); + taosMemoryFreeClear(param); +} + +int32_t initBasicInfoEx(SOptrBasicInfo* pBasicInfo, SExprSupp* pSup, SExprInfo* pExprInfo, int32_t numOfCols, + SSDataBlock* pResultBlock, SFunctionStateStore* pStore) { + initBasicInfo(pBasicInfo, pResultBlock); + int32_t code = initExprSupp(pSup, pExprInfo, numOfCols, pStore); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + + for (int32_t i = 0; i < numOfCols; ++i) { + pSup->pCtx[i].saveHandle.pBuf = NULL; + } + + ASSERT(numOfCols > 0); + return TSDB_CODE_SUCCESS; +} + +void initDummyFunction(SqlFunctionCtx* pDummy, SqlFunctionCtx* pCtx, int32_t nums) { + for (int i = 0; i < nums; i++) { + pDummy[i].functionId = pCtx[i].functionId; + pDummy[i].isNotNullFunc = pCtx[i].isNotNullFunc; + pDummy[i].isPseudoFunc = pCtx[i].isPseudoFunc; + } +} + +void initDownStream(SOperatorInfo* downstream, SStreamAggSupporter* pAggSup, uint16_t type, int32_t tsColIndex, + STimeWindowAggSupp* pTwSup) { + if (downstream->operatorType == QUERY_NODE_PHYSICAL_PLAN_STREAM_PARTITION) { + SStreamPartitionOperatorInfo* pScanInfo = downstream->info; + pScanInfo->tsColIndex = tsColIndex; + } + + if (downstream->operatorType != QUERY_NODE_PHYSICAL_PLAN_STREAM_SCAN) { + initDownStream(downstream->pDownstream[0], pAggSup, type, tsColIndex, pTwSup); + return; + } + SStreamScanInfo* pScanInfo = downstream->info; + pScanInfo->windowSup = (SWindowSupporter){.pStreamAggSup = pAggSup, .gap = pAggSup->gap, .parentType = type}; + pScanInfo->pState = pAggSup->pState; + if (!pScanInfo->pUpdateInfo) { + pScanInfo->pUpdateInfo = pAggSup->stateStore.updateInfoInit(60000, TSDB_TIME_PRECISION_MILLI, pTwSup->waterMark); + } + pScanInfo->twAggSup = *pTwSup; +} + +int32_t initStreamAggSupporter(SStreamAggSupporter* pSup, SqlFunctionCtx* pCtx, int32_t numOfOutput, int64_t gap, + SStreamState* pState, int32_t keySize, int16_t keyType, SStateStore* pStore, + SReadHandle* pHandle, SStorageAPI* pApi) { + pSup->resultRowSize = keySize + getResultRowSize(pCtx, numOfOutput); + pSup->pScanBlock = createSpecialDataBlock(STREAM_CLEAR); + pSup->gap = gap; + pSup->stateKeySize = keySize; + pSup->stateKeyType = keyType; + pSup->pDummyCtx = (SqlFunctionCtx*)taosMemoryCalloc(numOfOutput, sizeof(SqlFunctionCtx)); + if (pSup->pDummyCtx == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; + } + + pSup->stateStore = *pStore; + + initDummyFunction(pSup->pDummyCtx, pCtx, numOfOutput); + pSup->pState = taosMemoryCalloc(1, sizeof(SStreamState)); + *(pSup->pState) = *pState; + pSup->stateStore.streamStateSetNumber(pSup->pState, -1); + + _hash_fn_t hashFn = taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY); + pSup->pResultRows = tSimpleHashInit(32, hashFn); + + int32_t pageSize = 4096; + while (pageSize < pSup->resultRowSize * 4) { + pageSize <<= 1u; + } + // at least four pages need to be in buffer + int32_t bufSize = 4096 * 256; + if (bufSize <= pageSize) { + bufSize = pageSize * 4; + } + + if (!osTempSpaceAvailable()) { + terrno = TSDB_CODE_NO_DISKSPACE; + qError("Init stream agg supporter failed since %s, tempDir:%s", terrstr(), tsTempDir); + return terrno; + } + + int32_t code = createDiskbasedBuf(&pSup->pResultBuf, pageSize, bufSize, "function", tsTempDir); + for (int32_t i = 0; i < numOfOutput; ++i) { + pCtx[i].saveHandle.pBuf = pSup->pResultBuf; + } + + pSup->pSessionAPI = pApi; + + return TSDB_CODE_SUCCESS; +} + +bool isInTimeWindow(STimeWindow* pWin, TSKEY ts, int64_t gap) { + if (ts + gap >= pWin->skey && ts - gap <= pWin->ekey) { + return true; + } + return false; +} + +bool isInWindow(SResultWindowInfo* pWinInfo, TSKEY ts, int64_t gap) { + return isInTimeWindow(&pWinInfo->sessionWin.win, ts, gap); +} + +void getCurSessionWindow(SStreamAggSupporter* pAggSup, TSKEY startTs, TSKEY endTs, uint64_t groupId, + SSessionKey* pKey) { + pKey->win.skey = startTs; + pKey->win.ekey = endTs; + pKey->groupId = groupId; + int32_t code = pAggSup->stateStore.streamStateSessionGetKeyByRange(pAggSup->pState, pKey, pKey); + if (code != TSDB_CODE_SUCCESS) { + SET_SESSION_WIN_KEY_INVALID(pKey); + } +} + +bool isInvalidSessionWin(SResultWindowInfo* pWinInfo) { return pWinInfo->sessionWin.win.skey == 0; } + +bool inWinRange(STimeWindow* range, STimeWindow* cur) { + if (cur->skey >= range->skey && cur->ekey <= range->ekey) { + return true; + } + return false; +} + +void setSessionOutputBuf(SStreamAggSupporter* pAggSup, TSKEY startTs, TSKEY endTs, uint64_t groupId, + SResultWindowInfo* pCurWin) { + pCurWin->sessionWin.groupId = groupId; + pCurWin->sessionWin.win.skey = startTs; + pCurWin->sessionWin.win.ekey = endTs; + int32_t size = pAggSup->resultRowSize; + int32_t code = pAggSup->stateStore.streamStateSessionAddIfNotExist(pAggSup->pState, &pCurWin->sessionWin, + pAggSup->gap, &pCurWin->pOutputBuf, &size); + if (code == TSDB_CODE_SUCCESS && !inWinRange(&pAggSup->winRange, &pCurWin->sessionWin.win)) { + code = TSDB_CODE_FAILED; + releaseOutputBuf(pAggSup->pState, NULL, (SResultRow*)pCurWin->pOutputBuf, &pAggSup->pSessionAPI->stateStore); + pCurWin->pOutputBuf = taosMemoryCalloc(1, size); + } + + if (code == TSDB_CODE_SUCCESS) { + pCurWin->isOutput = true; + pAggSup->stateStore.streamStateSessionDel(pAggSup->pState, &pCurWin->sessionWin); + } else { + pCurWin->sessionWin.win.skey = startTs; + pCurWin->sessionWin.win.ekey = endTs; + } +} + +int32_t getSessionWinBuf(SStreamAggSupporter* pAggSup, SStreamStateCur* pCur, SResultWindowInfo* pWinInfo) { + int32_t size = 0; + int32_t code = + pAggSup->stateStore.streamStateSessionGetKVByCur(pCur, &pWinInfo->sessionWin, &pWinInfo->pOutputBuf, &size); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + + pAggSup->stateStore.streamStateCurNext(pAggSup->pState, pCur); + return TSDB_CODE_SUCCESS; +} +void saveDeleteInfo(SArray* pWins, SSessionKey key) { + // key.win.ekey = key.win.skey; + taosArrayPush(pWins, &key); +} + +void saveDeleteRes(SSHashObj* pStDelete, SSessionKey key) { + key.win.ekey = key.win.skey; + tSimpleHashPut(pStDelete, &key, sizeof(SSessionKey), NULL, 0); +} + +static void removeSessionResult(SSHashObj* pHashMap, SSHashObj* pResMap, SSessionKey key) { + key.win.ekey = key.win.skey; + tSimpleHashRemove(pHashMap, &key, sizeof(SSessionKey)); + tSimpleHashRemove(pResMap, &key, sizeof(SSessionKey)); +} + +static void getSessionHashKey(const SSessionKey* pKey, SSessionKey* pHashKey) { + *pHashKey = *pKey; + pHashKey->win.ekey = pKey->win.skey; +} + +static void removeSessionResults(SSHashObj* pHashMap, SArray* pWins) { + if (tSimpleHashGetSize(pHashMap) == 0) { + return; + } + int32_t size = taosArrayGetSize(pWins); + for (int32_t i = 0; i < size; i++) { + SSessionKey* pWin = taosArrayGet(pWins, i); + if (!pWin) continue; + SSessionKey key = {0}; + getSessionHashKey(pWin, &key); + tSimpleHashRemove(pHashMap, &key, sizeof(SSessionKey)); + } +} + +int32_t updateSessionWindowInfo(SResultWindowInfo* pWinInfo, TSKEY* pStartTs, TSKEY* pEndTs, uint64_t groupId, + int32_t rows, int32_t start, int64_t gap, SSHashObj* pResultRows, SSHashObj* pStUpdated, + SSHashObj* pStDeleted) { + for (int32_t i = start; i < rows; ++i) { + if (!isInWindow(pWinInfo, pStartTs[i], gap) && (!pEndTs || !isInWindow(pWinInfo, pEndTs[i], gap))) { + return i - start; + } + if (pWinInfo->sessionWin.win.skey > pStartTs[i]) { + if (pStDeleted && pWinInfo->isOutput) { + saveDeleteRes(pStDeleted, pWinInfo->sessionWin); + } + removeSessionResult(pStUpdated, pResultRows, pWinInfo->sessionWin); + pWinInfo->sessionWin.win.skey = pStartTs[i]; + } + pWinInfo->sessionWin.win.ekey = TMAX(pWinInfo->sessionWin.win.ekey, pStartTs[i]); + if (pEndTs) { + pWinInfo->sessionWin.win.ekey = TMAX(pWinInfo->sessionWin.win.ekey, pEndTs[i]); + } + } + return rows - start; +} + +static int32_t initSessionOutputBuf(SResultWindowInfo* pWinInfo, SResultRow** pResult, SqlFunctionCtx* pCtx, + int32_t numOfOutput, int32_t* rowEntryInfoOffset) { + ASSERT(pWinInfo->sessionWin.win.skey <= pWinInfo->sessionWin.win.ekey); + *pResult = (SResultRow*)pWinInfo->pOutputBuf; + // set time window for current result + (*pResult)->win = pWinInfo->sessionWin.win; + setResultRowInitCtx(*pResult, pCtx, numOfOutput, rowEntryInfoOffset); + return TSDB_CODE_SUCCESS; +} + +static int32_t doOneWindowAggImpl(SColumnInfoData* pTimeWindowData, SResultWindowInfo* pCurWin, SResultRow** pResult, + int32_t startIndex, int32_t winRows, int32_t rows, int32_t numOutput, + SOperatorInfo* pOperator, int64_t winDelta) { + SExprSupp* pSup = &pOperator->exprSupp; + SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo; + int32_t code = initSessionOutputBuf(pCurWin, pResult, pSup->pCtx, numOutput, pSup->rowEntryInfoOffset); + if (code != TSDB_CODE_SUCCESS || (*pResult) == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; + } + updateTimeWindowInfo(pTimeWindowData, &pCurWin->sessionWin.win, winDelta); + applyAggFunctionOnPartialTuples(pTaskInfo, pSup->pCtx, pTimeWindowData, startIndex, winRows, rows, numOutput); + return TSDB_CODE_SUCCESS; +} + +static bool doDeleteSessionWindow(SStreamAggSupporter* pAggSup, SSessionKey* pKey) { + pAggSup->stateStore.streamStateSessionDel(pAggSup->pState, pKey); + SSessionKey hashKey = {0}; + getSessionHashKey(pKey, &hashKey); + tSimpleHashRemove(pAggSup->pResultRows, &hashKey, sizeof(SSessionKey)); + return true; +} + +static int32_t setSessionWinOutputInfo(SSHashObj* pStUpdated, SResultWindowInfo* pWinInfo) { + void* pVal = tSimpleHashGet(pStUpdated, &pWinInfo->sessionWin, sizeof(SSessionKey)); + if (pVal) { + SResultWindowInfo* pWin = pVal; + pWinInfo->isOutput = pWin->isOutput; + } + return TSDB_CODE_SUCCESS; +} + +SStreamStateCur* getNextSessionWinInfo(SStreamAggSupporter* pAggSup, SSHashObj* pStUpdated, SResultWindowInfo* pCurWin, + SResultWindowInfo* pNextWin) { + SStreamStateCur* pCur = pAggSup->stateStore.streamStateSessionSeekKeyNext(pAggSup->pState, &pCurWin->sessionWin); + pNextWin->isOutput = true; + setSessionWinOutputInfo(pStUpdated, pNextWin); + int32_t size = 0; + pNextWin->sessionWin = pCurWin->sessionWin; + int32_t code = + pAggSup->stateStore.streamStateSessionGetKVByCur(pCur, &pNextWin->sessionWin, &pNextWin->pOutputBuf, &size); + if (code != TSDB_CODE_SUCCESS) { + taosMemoryFreeClear(pNextWin->pOutputBuf); + SET_SESSION_WIN_INVALID(*pNextWin); + } + return pCur; +} + +static int32_t compactSessionWindow(SOperatorInfo* pOperator, SResultWindowInfo* pCurWin, SSHashObj* pStUpdated, + SSHashObj* pStDeleted, bool addGap) { + SExprSupp* pSup = &pOperator->exprSupp; + SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo; + SStorageAPI* pAPI = &pOperator->pTaskInfo->storageAPI; + int32_t winNum = 0; + + SStreamSessionAggOperatorInfo* pInfo = pOperator->info; + SResultRow* pCurResult = NULL; + int32_t numOfOutput = pOperator->exprSupp.numOfExprs; + SStreamAggSupporter* pAggSup = &pInfo->streamAggSup; + initSessionOutputBuf(pCurWin, &pCurResult, pSup->pCtx, numOfOutput, pSup->rowEntryInfoOffset); + // Just look for the window behind StartIndex + while (1) { + SResultWindowInfo winInfo = {0}; + SStreamStateCur* pCur = getNextSessionWinInfo(pAggSup, pStUpdated, pCurWin, &winInfo); + if (!IS_VALID_SESSION_WIN(winInfo) || !isInWindow(pCurWin, winInfo.sessionWin.win.skey, pAggSup->gap) || + !inWinRange(&pAggSup->winRange, &winInfo.sessionWin.win)) { + taosMemoryFree(winInfo.pOutputBuf); + pAPI->stateStore.streamStateFreeCur(pCur); + break; + } + SResultRow* pWinResult = NULL; + initSessionOutputBuf(&winInfo, &pWinResult, pAggSup->pDummyCtx, numOfOutput, pSup->rowEntryInfoOffset); + pCurWin->sessionWin.win.ekey = TMAX(pCurWin->sessionWin.win.ekey, winInfo.sessionWin.win.ekey); + int64_t winDelta = 0; + if (addGap) { + winDelta = pAggSup->gap; + } + updateTimeWindowInfo(&pInfo->twAggSup.timeWindowData, &pCurWin->sessionWin.win, winDelta); + compactFunctions(pSup->pCtx, pAggSup->pDummyCtx, numOfOutput, pTaskInfo, &pInfo->twAggSup.timeWindowData); + tSimpleHashRemove(pStUpdated, &winInfo.sessionWin, sizeof(SSessionKey)); + if (winInfo.isOutput && pStDeleted) { + saveDeleteRes(pStDeleted, winInfo.sessionWin); + } + removeSessionResult(pStUpdated, pAggSup->pResultRows, winInfo.sessionWin); + doDeleteSessionWindow(pAggSup, &winInfo.sessionWin); + pAPI->stateStore.streamStateFreeCur(pCur); + taosMemoryFree(winInfo.pOutputBuf); + winNum++; + } + return winNum; +} + +int32_t saveSessionOutputBuf(SStreamAggSupporter* pAggSup, SResultWindowInfo* pWinInfo) { + saveSessionDiscBuf(pAggSup->pState, &pWinInfo->sessionWin, pWinInfo->pOutputBuf, pAggSup->resultRowSize, + &pAggSup->stateStore); + pWinInfo->pOutputBuf = NULL; + return TSDB_CODE_SUCCESS; +} + +static void doStreamSessionAggImpl(SOperatorInfo* pOperator, SSDataBlock* pSDataBlock, SSHashObj* pStUpdated, + SSHashObj* pStDeleted, bool hasEndTs, bool addGap) { + SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo; + SStreamSessionAggOperatorInfo* pInfo = pOperator->info; + int32_t numOfOutput = pOperator->exprSupp.numOfExprs; + uint64_t groupId = pSDataBlock->info.id.groupId; + int64_t code = TSDB_CODE_SUCCESS; + SResultRow* pResult = NULL; + int32_t rows = pSDataBlock->info.rows; + int32_t winRows = 0; + SStreamAggSupporter* pAggSup = &pInfo->streamAggSup; + + pInfo->dataVersion = TMAX(pInfo->dataVersion, pSDataBlock->info.version); + pAggSup->winRange = pTaskInfo->streamInfo.fillHistoryWindow; + if (pAggSup->winRange.ekey <= 0) { + pAggSup->winRange.ekey = INT64_MAX; + } + + SColumnInfoData* pStartTsCol = taosArrayGet(pSDataBlock->pDataBlock, pInfo->primaryTsIndex); + TSKEY* startTsCols = (int64_t*)pStartTsCol->pData; + SColumnInfoData* pEndTsCol = NULL; + if (hasEndTs) { + pEndTsCol = taosArrayGet(pSDataBlock->pDataBlock, pInfo->endTsIndex); + } else { + pEndTsCol = taosArrayGet(pSDataBlock->pDataBlock, pInfo->primaryTsIndex); + } + + TSKEY* endTsCols = (int64_t*)pEndTsCol->pData; + for (int32_t i = 0; i < rows;) { + if (pInfo->ignoreExpiredData && isOverdue(endTsCols[i], &pInfo->twAggSup)) { + i++; + continue; + } + SResultWindowInfo winInfo = {0}; + setSessionOutputBuf(pAggSup, startTsCols[i], endTsCols[i], groupId, &winInfo); + setSessionWinOutputInfo(pStUpdated, &winInfo); + winRows = updateSessionWindowInfo(&winInfo, startTsCols, endTsCols, groupId, rows, i, pAggSup->gap, + pAggSup->pResultRows, pStUpdated, pStDeleted); + // coverity scan error + if (!winInfo.pOutputBuf) { + T_LONG_JMP(pTaskInfo->env, TSDB_CODE_OUT_OF_MEMORY); + } + + int64_t winDelta = 0; + if (addGap) { + winDelta = pAggSup->gap; + } + code = doOneWindowAggImpl(&pInfo->twAggSup.timeWindowData, &winInfo, &pResult, i, winRows, rows, numOfOutput, + pOperator, winDelta); + if (code != TSDB_CODE_SUCCESS || pResult == NULL) { + T_LONG_JMP(pTaskInfo->env, TSDB_CODE_OUT_OF_MEMORY); + } + compactSessionWindow(pOperator, &winInfo, pStUpdated, pStDeleted, addGap); + saveSessionOutputBuf(pAggSup, &winInfo); + + if (pInfo->twAggSup.calTrigger == STREAM_TRIGGER_AT_ONCE && pStUpdated) { + code = saveResult(winInfo, pStUpdated); + if (code != TSDB_CODE_SUCCESS) { + T_LONG_JMP(pTaskInfo->env, TSDB_CODE_OUT_OF_MEMORY); + } + } + if (pInfo->twAggSup.calTrigger == STREAM_TRIGGER_WINDOW_CLOSE) { + SSessionKey key = {0}; + getSessionHashKey(&winInfo.sessionWin, &key); + tSimpleHashPut(pAggSup->pResultRows, &key, sizeof(SSessionKey), &winInfo, sizeof(SResultWindowInfo)); + } + + i += winRows; + } +} + +static void doDeleteTimeWindows(SStreamAggSupporter* pAggSup, SSDataBlock* pBlock, SArray* result) { + SColumnInfoData* pStartTsCol = taosArrayGet(pBlock->pDataBlock, START_TS_COLUMN_INDEX); + TSKEY* startDatas = (TSKEY*)pStartTsCol->pData; + SColumnInfoData* pEndTsCol = taosArrayGet(pBlock->pDataBlock, END_TS_COLUMN_INDEX); + TSKEY* endDatas = (TSKEY*)pEndTsCol->pData; + SColumnInfoData* pGroupCol = taosArrayGet(pBlock->pDataBlock, GROUPID_COLUMN_INDEX); + uint64_t* gpDatas = (uint64_t*)pGroupCol->pData; + for (int32_t i = 0; i < pBlock->info.rows; i++) { + while (1) { + SSessionKey curWin = {0}; + getCurSessionWindow(pAggSup, startDatas[i], endDatas[i], gpDatas[i], &curWin); + if (IS_INVALID_SESSION_WIN_KEY(curWin)) { + break; + } + doDeleteSessionWindow(pAggSup, &curWin); + if (result) { + saveDeleteInfo(result, curWin); + } + } + } +} + +static inline int32_t sessionKeyCompareAsc(const void* pKey1, const void* pKey2) { + SSessionKey* pWin1 = (SSessionKey*)pKey1; + SSessionKey* pWin2 = (SSessionKey*)pKey2; + + if (pWin1->groupId > pWin2->groupId) { + return 1; + } else if (pWin1->groupId < pWin2->groupId) { + return -1; + } + + if (pWin1->win.skey > pWin2->win.skey) { + return 1; + } else if (pWin1->win.skey < pWin2->win.skey) { + return -1; + } + + return 0; +} + +static int32_t copyUpdateResult(SSHashObj* pStUpdated, SArray* pUpdated) { + void* pIte = NULL; + int32_t iter = 0; + while ((pIte = tSimpleHashIterate(pStUpdated, pIte, &iter)) != NULL) { + void* key = tSimpleHashGetKey(pIte, NULL); + taosArrayPush(pUpdated, key); + } + taosArraySort(pUpdated, sessionKeyCompareAsc); + return TSDB_CODE_SUCCESS; +} + +void doBuildDeleteDataBlock(SOperatorInfo* pOp, SSHashObj* pStDeleted, SSDataBlock* pBlock, void** Ite) { + SStorageAPI* pAPI = &pOp->pTaskInfo->storageAPI; + + blockDataCleanup(pBlock); + int32_t size = tSimpleHashGetSize(pStDeleted); + if (size == 0) { + return; + } + blockDataEnsureCapacity(pBlock, size); + int32_t iter = 0; + while (((*Ite) = tSimpleHashIterate(pStDeleted, *Ite, &iter)) != NULL) { + if (pBlock->info.rows + 1 > pBlock->info.capacity) { + break; + } + SSessionKey* res = tSimpleHashGetKey(*Ite, NULL); + SColumnInfoData* pStartTsCol = taosArrayGet(pBlock->pDataBlock, START_TS_COLUMN_INDEX); + colDataSetVal(pStartTsCol, pBlock->info.rows, (const char*)&res->win.skey, false); + SColumnInfoData* pEndTsCol = taosArrayGet(pBlock->pDataBlock, END_TS_COLUMN_INDEX); + colDataSetVal(pEndTsCol, pBlock->info.rows, (const char*)&res->win.skey, false); + SColumnInfoData* pUidCol = taosArrayGet(pBlock->pDataBlock, UID_COLUMN_INDEX); + colDataSetNULL(pUidCol, pBlock->info.rows); + SColumnInfoData* pGpCol = taosArrayGet(pBlock->pDataBlock, GROUPID_COLUMN_INDEX); + colDataSetVal(pGpCol, pBlock->info.rows, (const char*)&res->groupId, false); + SColumnInfoData* pCalStCol = taosArrayGet(pBlock->pDataBlock, CALCULATE_START_TS_COLUMN_INDEX); + colDataSetNULL(pCalStCol, pBlock->info.rows); + SColumnInfoData* pCalEdCol = taosArrayGet(pBlock->pDataBlock, CALCULATE_END_TS_COLUMN_INDEX); + colDataSetNULL(pCalEdCol, pBlock->info.rows); + + SColumnInfoData* pTableCol = taosArrayGet(pBlock->pDataBlock, TABLE_NAME_COLUMN_INDEX); + + void* tbname = NULL; + pAPI->stateStore.streamStateGetParName(pOp->pTaskInfo->streamInfo.pState, res->groupId, &tbname); + if (tbname == NULL) { + colDataSetNULL(pTableCol, pBlock->info.rows); + } else { + char parTbName[VARSTR_HEADER_SIZE + TSDB_TABLE_NAME_LEN]; + STR_WITH_MAXSIZE_TO_VARSTR(parTbName, tbname, sizeof(parTbName)); + colDataSetVal(pTableCol, pBlock->info.rows, (const char*)parTbName, false); + pAPI->stateStore.streamStateFreeVal(tbname); + } + pBlock->info.rows += 1; + } + if ((*Ite) == NULL) { + tSimpleHashClear(pStDeleted); + } +} + +static void rebuildSessionWindow(SOperatorInfo* pOperator, SArray* pWinArray, SSHashObj* pStUpdated) { + SExprSupp* pSup = &pOperator->exprSupp; + SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo; + SStorageAPI* pAPI = &pOperator->pTaskInfo->storageAPI; + + int32_t size = taosArrayGetSize(pWinArray); + SStreamSessionAggOperatorInfo* pInfo = pOperator->info; + SStreamAggSupporter* pAggSup = &pInfo->streamAggSup; + int32_t numOfOutput = pSup->numOfExprs; + int32_t numOfChild = taosArrayGetSize(pInfo->pChildren); + + for (int32_t i = 0; i < size; i++) { + SSessionKey* pWinKey = taosArrayGet(pWinArray, i); + int32_t num = 0; + SResultWindowInfo parentWin = {0}; + for (int32_t j = 0; j < numOfChild; j++) { + SOperatorInfo* pChild = taosArrayGetP(pInfo->pChildren, j); + SStreamSessionAggOperatorInfo* pChInfo = pChild->info; + SStreamAggSupporter* pChAggSup = &pChInfo->streamAggSup; + SSessionKey chWinKey = {0}; + getSessionHashKey(pWinKey, &chWinKey); + SStreamStateCur* pCur = pAggSup->stateStore.streamStateSessionSeekKeyCurrentNext(pChAggSup->pState, &chWinKey); + SResultRow* pResult = NULL; + SResultRow* pChResult = NULL; + while (1) { + SResultWindowInfo childWin = {0}; + childWin.sessionWin = *pWinKey; + int32_t code = getSessionWinBuf(pChAggSup, pCur, &childWin); + + if (code == TSDB_CODE_SUCCESS && !inWinRange(&pAggSup->winRange, &childWin.sessionWin.win)) { + continue; + } + + if (code == TSDB_CODE_SUCCESS && inWinRange(&pWinKey->win, &childWin.sessionWin.win)) { + if (num == 0) { + setSessionOutputBuf(pAggSup, pWinKey->win.skey, pWinKey->win.ekey, pWinKey->groupId, &parentWin); + code = initSessionOutputBuf(&parentWin, &pResult, pSup->pCtx, numOfOutput, pSup->rowEntryInfoOffset); + if (code != TSDB_CODE_SUCCESS || pResult == NULL) { + break; + } + } + num++; + updateTimeWindowInfo(&pInfo->twAggSup.timeWindowData, &parentWin.sessionWin.win, pAggSup->gap); + initSessionOutputBuf(&childWin, &pChResult, pChild->exprSupp.pCtx, numOfOutput, + pChild->exprSupp.rowEntryInfoOffset); + compactFunctions(pSup->pCtx, pChild->exprSupp.pCtx, numOfOutput, pTaskInfo, &pInfo->twAggSup.timeWindowData); + compactSessionWindow(pOperator, &parentWin, pStUpdated, NULL, true); + saveResult(parentWin, pStUpdated); + } else { + break; + } + } + pAPI->stateStore.streamStateFreeCur(pCur); + } + if (num > 0) { + saveSessionOutputBuf(pAggSup, &parentWin); + } + } +} + +int32_t closeSessionWindow(SSHashObj* pHashMap, STimeWindowAggSupp* pTwSup, SSHashObj* pClosed) { + void* pIte = NULL; + int32_t iter = 0; + while ((pIte = tSimpleHashIterate(pHashMap, pIte, &iter)) != NULL) { + SResultWindowInfo* pWinInfo = pIte; + if (isCloseWindow(&pWinInfo->sessionWin.win, pTwSup)) { + if (pTwSup->calTrigger == STREAM_TRIGGER_WINDOW_CLOSE && pClosed) { + int32_t code = saveResult(*pWinInfo, pClosed); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + } + SSessionKey* pKey = tSimpleHashGetKey(pIte, NULL); + tSimpleHashIterateRemove(pHashMap, pKey, sizeof(SSessionKey), &pIte, &iter); + } + } + return TSDB_CODE_SUCCESS; +} + +static void closeChildSessionWindow(SArray* pChildren, TSKEY maxTs) { + int32_t size = taosArrayGetSize(pChildren); + for (int32_t i = 0; i < size; i++) { + SOperatorInfo* pChildOp = taosArrayGetP(pChildren, i); + SStreamSessionAggOperatorInfo* pChInfo = pChildOp->info; + pChInfo->twAggSup.maxTs = TMAX(pChInfo->twAggSup.maxTs, maxTs); + closeSessionWindow(pChInfo->streamAggSup.pResultRows, &pChInfo->twAggSup, NULL); + } +} + +int32_t getAllSessionWindow(SSHashObj* pHashMap, SSHashObj* pStUpdated) { + void* pIte = NULL; + int32_t iter = 0; + while ((pIte = tSimpleHashIterate(pHashMap, pIte, &iter)) != NULL) { + SResultWindowInfo* pWinInfo = pIte; + saveResult(*pWinInfo, pStUpdated); + } + return TSDB_CODE_SUCCESS; +} + +static void copyDeleteWindowInfo(SArray* pResWins, SSHashObj* pStDeleted) { + int32_t size = taosArrayGetSize(pResWins); + for (int32_t i = 0; i < size; i++) { + SSessionKey* pWinKey = taosArrayGet(pResWins, i); + if (!pWinKey) continue; + SSessionKey winInfo = {0}; + getSessionHashKey(pWinKey, &winInfo); + tSimpleHashPut(pStDeleted, &winInfo, sizeof(SSessionKey), NULL, 0); + } +} + +// the allocated memory comes from outer function. +void initGroupResInfoFromArrayList(SGroupResInfo* pGroupResInfo, SArray* pArrayList) { + pGroupResInfo->pRows = pArrayList; + pGroupResInfo->index = 0; + pGroupResInfo->pBuf = NULL; +} + +void doBuildSessionResult(SOperatorInfo* pOperator, void* pState, SGroupResInfo* pGroupResInfo, SSDataBlock* pBlock) { + SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo; + // set output datablock version + pBlock->info.version = pTaskInfo->version; + + blockDataCleanup(pBlock); + if (!hasRemainResults(pGroupResInfo)) { + cleanupGroupResInfo(pGroupResInfo); + return; + } + + // clear the existed group id + pBlock->info.id.groupId = 0; + buildSessionResultDataBlock(pOperator, pState, pBlock, &pOperator->exprSupp, pGroupResInfo); + if (pBlock->info.rows == 0) { + cleanupGroupResInfo(pGroupResInfo); + } +} + +static SSDataBlock* buildSessionResult(SOperatorInfo* pOperator) { + SStreamSessionAggOperatorInfo* pInfo = pOperator->info; + SStreamAggSupporter* pAggSup = &pInfo->streamAggSup; + SOptrBasicInfo* pBInfo = &pInfo->binfo; + SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo; + doBuildDeleteDataBlock(pOperator, pInfo->pStDeleted, pInfo->pDelRes, &pInfo->pDelIterator); + if (pInfo->pDelRes->info.rows > 0) { + printDataBlock(pInfo->pDelRes, getStreamOpName(pOperator->operatorType), GET_TASKID(pTaskInfo)); + return pInfo->pDelRes; + } + doBuildSessionResult(pOperator, pAggSup->pState, &pInfo->groupResInfo, pBInfo->pRes); + if (pBInfo->pRes->info.rows > 0) { + printDataBlock(pBInfo->pRes, getStreamOpName(pOperator->operatorType), GET_TASKID(pTaskInfo)); + return pBInfo->pRes; + } + return NULL; +} + +void getMaxTsWins(const SArray* pAllWins, SArray* pMaxWins) { + int32_t size = taosArrayGetSize(pAllWins); + if (size == 0) { + return; + } + + SSessionKey* pSeKey = taosArrayGet(pAllWins, size - 1); + taosArrayPush(pMaxWins, pSeKey); + if (pSeKey->groupId == 0) { + return; + } + uint64_t preGpId = pSeKey->groupId; + for (int32_t i = size - 2; i >= 0; i--) { + pSeKey = taosArrayGet(pAllWins, i); + if (preGpId != pSeKey->groupId) { + taosArrayPush(pMaxWins, pSeKey); + preGpId = pSeKey->groupId; + } + } +} + +static SSDataBlock* doStreamSessionAgg(SOperatorInfo* pOperator) { + SExprSupp* pSup = &pOperator->exprSupp; + SStreamSessionAggOperatorInfo* pInfo = pOperator->info; + SOptrBasicInfo* pBInfo = &pInfo->binfo; + SStreamAggSupporter* pAggSup = &pInfo->streamAggSup; + SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo; + qDebug("stask:%s %s status: %d", GET_TASKID(pTaskInfo), getStreamOpName(pOperator->operatorType), pOperator->status); + if (pOperator->status == OP_EXEC_DONE) { + return NULL; + } else if (pOperator->status == OP_RES_TO_RETURN) { + SSDataBlock* opRes = buildSessionResult(pOperator); + if (opRes) { + return opRes; + } + setOperatorCompleted(pOperator); + return NULL; + } + + SOperatorInfo* downstream = pOperator->pDownstream[0]; + if (!pInfo->pUpdated) { + pInfo->pUpdated = taosArrayInit(16, sizeof(SSessionKey)); + } + if (!pInfo->pStUpdated) { + _hash_fn_t hashFn = taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY); + pInfo->pStUpdated = tSimpleHashInit(64, hashFn); + } + while (1) { + SSDataBlock* pBlock = downstream->fpSet.getNextFn(downstream); + if (pBlock == NULL) { + break; + } + printSpecDataBlock(pBlock, getStreamOpName(pOperator->operatorType), "recv", GET_TASKID(pTaskInfo)); + + if (pBlock->info.type == STREAM_DELETE_DATA || pBlock->info.type == STREAM_DELETE_RESULT || + pBlock->info.type == STREAM_CLEAR) { + SArray* pWins = taosArrayInit(16, sizeof(SSessionKey)); + // gap must be 0 + doDeleteTimeWindows(pAggSup, pBlock, pWins); + removeSessionResults(pInfo->pStUpdated, pWins); + if (IS_FINAL_SESSION_OP(pOperator)) { + int32_t childIndex = getChildIndex(pBlock); + SOperatorInfo* pChildOp = taosArrayGetP(pInfo->pChildren, childIndex); + SStreamSessionAggOperatorInfo* pChildInfo = pChildOp->info; + // gap must be 0 + doDeleteTimeWindows(&pChildInfo->streamAggSup, pBlock, NULL); + rebuildSessionWindow(pOperator, pWins, pInfo->pStUpdated); + } + copyDeleteWindowInfo(pWins, pInfo->pStDeleted); + taosArrayDestroy(pWins); + continue; + } else if (pBlock->info.type == STREAM_GET_ALL) { + getAllSessionWindow(pAggSup->pResultRows, pInfo->pStUpdated); + continue; + } else if (pBlock->info.type == STREAM_CREATE_CHILD_TABLE) { + return pBlock; + } else { + ASSERTS(pBlock->info.type == STREAM_NORMAL || pBlock->info.type == STREAM_INVALID, "invalid SSDataBlock type"); + } + + if (pInfo->scalarSupp.pExprInfo != NULL) { + SExprSupp* pExprSup = &pInfo->scalarSupp; + projectApplyFunctions(pExprSup->pExprInfo, pBlock, pBlock, pExprSup->pCtx, pExprSup->numOfExprs, NULL); + } + // the pDataBlock are always the same one, no need to call this again + setInputDataBlock(pSup, pBlock, TSDB_ORDER_ASC, MAIN_SCAN, true); + doStreamSessionAggImpl(pOperator, pBlock, pInfo->pStUpdated, pInfo->pStDeleted, IS_FINAL_SESSION_OP(pOperator), + true); + if (IS_FINAL_SESSION_OP(pOperator)) { + int32_t chIndex = getChildIndex(pBlock); + int32_t size = taosArrayGetSize(pInfo->pChildren); + // if chIndex + 1 - size > 0, add new child + for (int32_t i = 0; i < chIndex + 1 - size; i++) { + SOperatorInfo* pChildOp = + createStreamFinalSessionAggOperatorInfo(NULL, pInfo->pPhyNode, pOperator->pTaskInfo, 0, NULL); + if (!pChildOp) { + T_LONG_JMP(pOperator->pTaskInfo->env, TSDB_CODE_OUT_OF_MEMORY); + } + taosArrayPush(pInfo->pChildren, &pChildOp); + } + SOperatorInfo* pChildOp = taosArrayGetP(pInfo->pChildren, chIndex); + setInputDataBlock(&pChildOp->exprSupp, pBlock, TSDB_ORDER_ASC, MAIN_SCAN, true); + doStreamSessionAggImpl(pChildOp, pBlock, NULL, NULL, true, false); + } + pInfo->twAggSup.maxTs = TMAX(pInfo->twAggSup.maxTs, pBlock->info.window.ekey); + pInfo->twAggSup.maxTs = TMAX(pInfo->twAggSup.maxTs, pBlock->info.watermark); + } + // restore the value + pOperator->status = OP_RES_TO_RETURN; + + closeSessionWindow(pAggSup->pResultRows, &pInfo->twAggSup, pInfo->pStUpdated); + closeChildSessionWindow(pInfo->pChildren, pInfo->twAggSup.maxTs); + copyUpdateResult(pInfo->pStUpdated, pInfo->pUpdated); + removeSessionResults(pInfo->pStDeleted, pInfo->pUpdated); + tSimpleHashCleanup(pInfo->pStUpdated); + pInfo->pStUpdated = NULL; + if (pInfo->isHistoryOp) { + getMaxTsWins(pInfo->pUpdated, pInfo->historyWins); + } + initGroupResInfoFromArrayList(&pInfo->groupResInfo, pInfo->pUpdated); + pInfo->pUpdated = NULL; + blockDataEnsureCapacity(pInfo->binfo.pRes, pOperator->resultInfo.capacity); + + SSDataBlock* opRes = buildSessionResult(pOperator); + if (opRes) { + return opRes; + } + + setOperatorCompleted(pOperator); + return NULL; +} + +void streamSessionReleaseState(SOperatorInfo* pOperator) { + if (pOperator->operatorType != QUERY_NODE_PHYSICAL_PLAN_STREAM_SEMI_SESSION) { + SStreamSessionAggOperatorInfo* pInfo = pOperator->info; + int32_t resSize = taosArrayGetSize(pInfo->historyWins) * sizeof(SSessionKey); + pInfo->streamAggSup.stateStore.streamStateSaveInfo(pInfo->streamAggSup.pState, STREAM_SESSION_OP_STATE_NAME, + strlen(STREAM_SESSION_OP_STATE_NAME), pInfo->historyWins->pData, + resSize); + } + SOperatorInfo* downstream = pOperator->pDownstream[0]; + if (downstream->fpSet.releaseStreamStateFn) { + downstream->fpSet.releaseStreamStateFn(downstream); + } +} + +void resetWinRange(STimeWindow* winRange) { + winRange->skey = INT64_MIN; + winRange->ekey = INT64_MAX; +} + +void streamSessionReloadState(SOperatorInfo* pOperator) { + SStreamSessionAggOperatorInfo* pInfo = pOperator->info; + SStreamAggSupporter* pAggSup = &pInfo->streamAggSup; + resetWinRange(&pAggSup->winRange); + + SResultWindowInfo winInfo = {0}; + int32_t size = 0; + void* pBuf = NULL; + int32_t code = pAggSup->stateStore.streamStateGetInfo(pAggSup->pState, STREAM_SESSION_OP_STATE_NAME, + strlen(STREAM_SESSION_OP_STATE_NAME), &pBuf, &size); + int32_t num = size / sizeof(SSessionKey); + SSessionKey* pSeKeyBuf = (SSessionKey*)pBuf; + ASSERT(size == num * sizeof(SSessionKey)); + if (!pInfo->pStUpdated && num > 0) { + _hash_fn_t hashFn = taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY); + pInfo->pStUpdated = tSimpleHashInit(64, hashFn); + } + for (int32_t i = 0; i < num; i++) { + SResultWindowInfo winInfo = {0}; + setSessionOutputBuf(pAggSup, pSeKeyBuf[i].win.skey, pSeKeyBuf[i].win.ekey, pSeKeyBuf[i].groupId, &winInfo); + int32_t winNum = compactSessionWindow(pOperator, &winInfo, pInfo->pStUpdated, pInfo->pStDeleted, true); + if (winNum > 0) { + qDebug("===stream=== reload state. save result %" PRId64 ", %" PRIu64, winInfo.sessionWin.win.skey, + winInfo.sessionWin.groupId); + if (pInfo->twAggSup.calTrigger == STREAM_TRIGGER_AT_ONCE) { + saveResult(winInfo, pInfo->pStUpdated); + } else if (pInfo->twAggSup.calTrigger == STREAM_TRIGGER_WINDOW_CLOSE) { + if (!isCloseWindow(&winInfo.sessionWin.win, &pInfo->twAggSup)) { + saveDeleteRes(pInfo->pStDeleted, winInfo.sessionWin); + } + SSessionKey key = {0}; + getSessionHashKey(&winInfo.sessionWin, &key); + tSimpleHashPut(pAggSup->pResultRows, &key, sizeof(SSessionKey), &winInfo, sizeof(SResultWindowInfo)); + } + } + saveSessionOutputBuf(pAggSup, &winInfo); + } + taosMemoryFree(pBuf); + + SOperatorInfo* downstream = pOperator->pDownstream[0]; + if (downstream->fpSet.reloadStreamStateFn) { + downstream->fpSet.reloadStreamStateFn(downstream); + } +} + +SOperatorInfo* createStreamSessionAggOperatorInfo(SOperatorInfo* downstream, SPhysiNode* pPhyNode, + SExecTaskInfo* pTaskInfo, SReadHandle* pHandle) { + SSessionWinodwPhysiNode* pSessionNode = (SSessionWinodwPhysiNode*)pPhyNode; + int32_t numOfCols = 0; + int32_t code = TSDB_CODE_OUT_OF_MEMORY; + SStreamSessionAggOperatorInfo* pInfo = taosMemoryCalloc(1, sizeof(SStreamSessionAggOperatorInfo)); + SOperatorInfo* pOperator = taosMemoryCalloc(1, sizeof(SOperatorInfo)); + if (pInfo == NULL || pOperator == NULL) { + goto _error; + } + + pOperator->pTaskInfo = pTaskInfo; + + initResultSizeInfo(&pOperator->resultInfo, 4096); + if (pSessionNode->window.pExprs != NULL) { + int32_t numOfScalar = 0; + SExprInfo* pScalarExprInfo = createExprInfo(pSessionNode->window.pExprs, NULL, &numOfScalar); + code = initExprSupp(&pInfo->scalarSupp, pScalarExprInfo, numOfScalar, &pTaskInfo->storageAPI.functionStore); + if (code != TSDB_CODE_SUCCESS) { + goto _error; + } + } + SExprSupp* pSup = &pOperator->exprSupp; + + SExprInfo* pExprInfo = createExprInfo(pSessionNode->window.pFuncs, NULL, &numOfCols); + SSDataBlock* pResBlock = createDataBlockFromDescNode(pPhyNode->pOutputDataBlockDesc); + code = initBasicInfoEx(&pInfo->binfo, pSup, pExprInfo, numOfCols, pResBlock, &pTaskInfo->storageAPI.functionStore); + if (code != TSDB_CODE_SUCCESS) { + goto _error; + } + + code = initStreamAggSupporter(&pInfo->streamAggSup, pSup->pCtx, numOfCols, pSessionNode->gap, + pTaskInfo->streamInfo.pState, 0, 0, &pTaskInfo->storageAPI.stateStore, pHandle, + &pTaskInfo->storageAPI); + if (code != TSDB_CODE_SUCCESS) { + goto _error; + } + + pInfo->twAggSup = (STimeWindowAggSupp){ + .waterMark = pSessionNode->window.watermark, + .calTrigger = pSessionNode->window.triggerType, + .maxTs = INT64_MIN, + .minTs = INT64_MAX, + }; + + initExecTimeWindowInfo(&pInfo->twAggSup.timeWindowData, &pTaskInfo->window); + + pInfo->primaryTsIndex = ((SColumnNode*)pSessionNode->window.pTspk)->slotId; + if (pSessionNode->window.pTsEnd) { + pInfo->endTsIndex = ((SColumnNode*)pSessionNode->window.pTsEnd)->slotId; + } + pInfo->binfo.pRes = pResBlock; + pInfo->order = TSDB_ORDER_ASC; + _hash_fn_t hashFn = taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY); + pInfo->pStDeleted = tSimpleHashInit(64, hashFn); + pInfo->pDelIterator = NULL; + pInfo->pDelRes = createSpecialDataBlock(STREAM_DELETE_RESULT); + pInfo->pChildren = NULL; + pInfo->pPhyNode = pPhyNode; + pInfo->ignoreExpiredData = pSessionNode->window.igExpired; + pInfo->ignoreExpiredDataSaved = false; + pInfo->pUpdated = NULL; + pInfo->pStUpdated = NULL; + pInfo->dataVersion = 0; + pInfo->historyWins = taosArrayInit(4, sizeof(SSessionKey)); + if (!pInfo->historyWins) { + goto _error; + } + if (pHandle) { + pInfo->isHistoryOp = pHandle->fillHistory; + } + pOperator->operatorType = QUERY_NODE_PHYSICAL_PLAN_STREAM_SESSION; + setOperatorInfo(pOperator, getStreamOpName(pOperator->operatorType), QUERY_NODE_PHYSICAL_PLAN_STREAM_SESSION, true, + OP_NOT_OPENED, pInfo, pTaskInfo); + pOperator->fpSet = createOperatorFpSet(optrDummyOpenFn, doStreamSessionAgg, NULL, destroyStreamSessionAggOperatorInfo, + optrDefaultBufFn, NULL, optrDefaultGetNextExtFn, NULL); + setOperatorStreamStateFn(pOperator, streamSessionReleaseState, streamSessionReloadState); + + if (downstream) { + initDownStream(downstream, &pInfo->streamAggSup, pOperator->operatorType, pInfo->primaryTsIndex, &pInfo->twAggSup); + code = appendDownstream(pOperator, &downstream, 1); + } + return pOperator; + +_error: + if (pInfo != NULL) { + destroyStreamSessionAggOperatorInfo(pInfo); + } + + taosMemoryFreeClear(pOperator); + pTaskInfo->code = code; + return NULL; +} + +static void clearStreamSessionOperator(SStreamSessionAggOperatorInfo* pInfo) { + tSimpleHashClear(pInfo->streamAggSup.pResultRows); + pInfo->streamAggSup.stateStore.streamStateSessionClear(pInfo->streamAggSup.pState); +} + +static SSDataBlock* doStreamSessionSemiAgg(SOperatorInfo* pOperator) { + SStreamSessionAggOperatorInfo* pInfo = pOperator->info; + SOptrBasicInfo* pBInfo = &pInfo->binfo; + TSKEY maxTs = INT64_MIN; + SExprSupp* pSup = &pOperator->exprSupp; + SStreamAggSupporter* pAggSup = &pInfo->streamAggSup; + SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo; + + qDebug("stask:%s %s status: %d", GET_TASKID(pTaskInfo), getStreamOpName(pOperator->operatorType), pOperator->status); + if (pOperator->status == OP_EXEC_DONE) { + return NULL; + } + + { + SSDataBlock* opRes = buildSessionResult(pOperator); + if (opRes) { + return opRes; + } + + if (pOperator->status == OP_RES_TO_RETURN) { + clearFunctionContext(&pOperator->exprSupp); + // semi interval operator clear disk buffer + clearStreamSessionOperator(pInfo); + setOperatorCompleted(pOperator); + return NULL; + } + } + + SOperatorInfo* downstream = pOperator->pDownstream[0]; + if (!pInfo->pUpdated) { + pInfo->pUpdated = taosArrayInit(16, sizeof(SSessionKey)); + } + if (!pInfo->pStUpdated) { + _hash_fn_t hashFn = taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY); + pInfo->pStUpdated = tSimpleHashInit(64, hashFn); + } + while (1) { + SSDataBlock* pBlock = downstream->fpSet.getNextFn(downstream); + if (pBlock == NULL) { + clearSpecialDataBlock(pInfo->pUpdateRes); + pOperator->status = OP_RES_TO_RETURN; + break; + } + printSpecDataBlock(pBlock, getStreamOpName(pOperator->operatorType), "recv", GET_TASKID(pTaskInfo)); + + if (pBlock->info.type == STREAM_DELETE_DATA || pBlock->info.type == STREAM_DELETE_RESULT || + pBlock->info.type == STREAM_CLEAR) { + // gap must be 0 + SArray* pWins = taosArrayInit(16, sizeof(SSessionKey)); + doDeleteTimeWindows(&pInfo->streamAggSup, pBlock, pWins); + removeSessionResults(pInfo->pStUpdated, pWins); + copyDeleteWindowInfo(pWins, pInfo->pStDeleted); + taosArrayDestroy(pWins); + break; + } else if (pBlock->info.type == STREAM_GET_ALL) { + getAllSessionWindow(pInfo->streamAggSup.pResultRows, pInfo->pStUpdated); + continue; + } else if (pBlock->info.type == STREAM_CREATE_CHILD_TABLE) { + return pBlock; + } else { + ASSERTS(pBlock->info.type == STREAM_NORMAL || pBlock->info.type == STREAM_INVALID, "invalid SSDataBlock type"); + } + + if (pInfo->scalarSupp.pExprInfo != NULL) { + SExprSupp* pExprSup = &pInfo->scalarSupp; + projectApplyFunctions(pExprSup->pExprInfo, pBlock, pBlock, pExprSup->pCtx, pExprSup->numOfExprs, NULL); + } + // the pDataBlock are always the same one, no need to call this again + setInputDataBlock(pSup, pBlock, TSDB_ORDER_ASC, MAIN_SCAN, true); + doStreamSessionAggImpl(pOperator, pBlock, pInfo->pStUpdated, NULL, false, false); + maxTs = TMAX(pInfo->twAggSup.maxTs, pBlock->info.window.ekey); + } + + pInfo->twAggSup.maxTs = TMAX(pInfo->twAggSup.maxTs, maxTs); + pBInfo->pRes->info.watermark = pInfo->twAggSup.maxTs; + + copyUpdateResult(pInfo->pStUpdated, pInfo->pUpdated); + removeSessionResults(pInfo->pStDeleted, pInfo->pUpdated); + tSimpleHashCleanup(pInfo->pStUpdated); + pInfo->pStUpdated = NULL; + initGroupResInfoFromArrayList(&pInfo->groupResInfo, pInfo->pUpdated); + pInfo->pUpdated = NULL; + blockDataEnsureCapacity(pBInfo->pRes, pOperator->resultInfo.capacity); + + SSDataBlock* opRes = buildSessionResult(pOperator); + if (opRes) { + return opRes; + } + + clearFunctionContext(&pOperator->exprSupp); + // semi interval operator clear disk buffer + clearStreamSessionOperator(pInfo); + setOperatorCompleted(pOperator); + return NULL; +} + +SOperatorInfo* createStreamFinalSessionAggOperatorInfo(SOperatorInfo* downstream, SPhysiNode* pPhyNode, + SExecTaskInfo* pTaskInfo, int32_t numOfChild, + SReadHandle* pHandle) { + int32_t code = TSDB_CODE_OUT_OF_MEMORY; + SOperatorInfo* pOperator = createStreamSessionAggOperatorInfo(downstream, pPhyNode, pTaskInfo, pHandle); + if (pOperator == NULL) { + goto _error; + } + + SStorageAPI* pAPI = &pTaskInfo->storageAPI; + SStreamSessionAggOperatorInfo* pInfo = pOperator->info; + pOperator->operatorType = pPhyNode->type; + + if (pPhyNode->type != QUERY_NODE_PHYSICAL_PLAN_STREAM_FINAL_SESSION) { + pInfo->pUpdateRes = createSpecialDataBlock(STREAM_CLEAR); + blockDataEnsureCapacity(pInfo->pUpdateRes, 128); + pOperator->fpSet = createOperatorFpSet(optrDummyOpenFn, doStreamSessionSemiAgg, NULL, + destroyStreamSessionAggOperatorInfo, optrDefaultBufFn, NULL, optrDefaultGetNextExtFn, NULL); + } + setOperatorInfo(pOperator, getStreamOpName(pOperator->operatorType), pPhyNode->type, false, OP_NOT_OPENED, pInfo, pTaskInfo); + + if (numOfChild > 0) { + pInfo->pChildren = taosArrayInit(numOfChild, sizeof(void*)); + for (int32_t i = 0; i < numOfChild; i++) { + SOperatorInfo* pChildOp = createStreamFinalSessionAggOperatorInfo(NULL, pPhyNode, pTaskInfo, 0, NULL); + if (pChildOp == NULL) { + goto _error; + } + SStreamSessionAggOperatorInfo* pChInfo = pChildOp->info; + pChInfo->twAggSup.calTrigger = STREAM_TRIGGER_AT_ONCE; + pAPI->stateStore.streamStateSetNumber(pChInfo->streamAggSup.pState, i); + taosArrayPush(pInfo->pChildren, &pChildOp); + } + } + + if (!IS_FINAL_SESSION_OP(pOperator) || numOfChild == 0) { + pInfo->twAggSup.calTrigger = STREAM_TRIGGER_AT_ONCE; + } + + return pOperator; + +_error: + if (pInfo != NULL) { + destroyStreamSessionAggOperatorInfo(pInfo); + } + taosMemoryFreeClear(pOperator); + pTaskInfo->code = code; + return NULL; +} + +void destroyStreamStateOperatorInfo(void* param) { + SStreamStateAggOperatorInfo* pInfo = (SStreamStateAggOperatorInfo*)param; + cleanupBasicInfo(&pInfo->binfo); + destroyStreamAggSupporter(&pInfo->streamAggSup); + cleanupGroupResInfo(&pInfo->groupResInfo); + if (pInfo->pChildren != NULL) { + int32_t size = taosArrayGetSize(pInfo->pChildren); + for (int32_t i = 0; i < size; i++) { + SOperatorInfo* pChild = taosArrayGetP(pInfo->pChildren, i); + destroyOperator(pChild); + } + taosArrayDestroy(pInfo->pChildren); + } + colDataDestroy(&pInfo->twAggSup.timeWindowData); + blockDataDestroy(pInfo->pDelRes); + taosArrayDestroy(pInfo->historyWins); + tSimpleHashCleanup(pInfo->pSeUpdated); + tSimpleHashCleanup(pInfo->pSeDeleted); + taosMemoryFreeClear(param); +} + +bool isTsInWindow(SStateWindowInfo* pWin, TSKEY ts) { + if (pWin->winInfo.sessionWin.win.skey <= ts && ts <= pWin->winInfo.sessionWin.win.ekey) { + return true; + } + return false; +} + +bool isEqualStateKey(SStateWindowInfo* pWin, char* pKeyData) { + return pKeyData && compareVal(pKeyData, pWin->pStateKey); +} + +bool compareStateKey(void* data, void* key) { + if (!data || !key) { + return true; + } + SStateKeys* stateKey = (SStateKeys*)key; + stateKey->pData = (char*)key + sizeof(SStateKeys); + return compareVal(data, stateKey); +} + +bool compareWinStateKey(SStateKeys* left, SStateKeys* right) { + if (!left || !right) { + return false; + } + return compareVal(left->pData, right); +} + +void setStateOutputBuf(SStreamAggSupporter* pAggSup, TSKEY ts, uint64_t groupId, char* pKeyData, + SStateWindowInfo* pCurWin, SStateWindowInfo* pNextWin) { + int32_t size = pAggSup->resultRowSize; + pCurWin->winInfo.sessionWin.groupId = groupId; + pCurWin->winInfo.sessionWin.win.skey = ts; + pCurWin->winInfo.sessionWin.win.ekey = ts; + int32_t code = pAggSup->stateStore.streamStateStateAddIfNotExist(pAggSup->pState, &pCurWin->winInfo.sessionWin, + pKeyData, pAggSup->stateKeySize, compareStateKey, + &pCurWin->winInfo.pOutputBuf, &size); + pCurWin->pStateKey = + (SStateKeys*)((char*)pCurWin->winInfo.pOutputBuf + (pAggSup->resultRowSize - pAggSup->stateKeySize)); + pCurWin->pStateKey->bytes = pAggSup->stateKeySize - sizeof(SStateKeys); + pCurWin->pStateKey->type = pAggSup->stateKeyType; + pCurWin->pStateKey->pData = (char*)pCurWin->pStateKey + sizeof(SStateKeys); + pCurWin->pStateKey->isNull = false; + + if (code == TSDB_CODE_SUCCESS && !inWinRange(&pAggSup->winRange, &pCurWin->winInfo.sessionWin.win)) { + code = TSDB_CODE_FAILED; + releaseOutputBuf(pAggSup->pState, NULL, (SResultRow*)pCurWin->winInfo.pOutputBuf, + &pAggSup->pSessionAPI->stateStore); + pCurWin->winInfo.pOutputBuf = taosMemoryCalloc(1, size); + pCurWin->pStateKey = + (SStateKeys*)((char*)pCurWin->winInfo.pOutputBuf + (pAggSup->resultRowSize - pAggSup->stateKeySize)); + pCurWin->pStateKey->bytes = pAggSup->stateKeySize - sizeof(SStateKeys); + pCurWin->pStateKey->type = pAggSup->stateKeyType; + pCurWin->pStateKey->pData = (char*)pCurWin->pStateKey + sizeof(SStateKeys); + pCurWin->pStateKey->isNull = false; + pCurWin->winInfo.sessionWin.groupId = groupId; + pCurWin->winInfo.sessionWin.win.skey = ts; + pCurWin->winInfo.sessionWin.win.ekey = ts; + qDebug("===stream===reset state win key. skey:%" PRId64 ", endkey:%" PRId64, pCurWin->winInfo.sessionWin.win.skey, + pCurWin->winInfo.sessionWin.win.ekey); + } + + if (code == TSDB_CODE_SUCCESS) { + pCurWin->winInfo.isOutput = true; + pAggSup->stateStore.streamStateSessionDel(pAggSup->pState, &pCurWin->winInfo.sessionWin); + } else if (pKeyData) { + if (IS_VAR_DATA_TYPE(pAggSup->stateKeyType)) { + varDataCopy(pCurWin->pStateKey->pData, pKeyData); + } else { + memcpy(pCurWin->pStateKey->pData, pKeyData, pCurWin->pStateKey->bytes); + } + } + + pNextWin->winInfo.sessionWin = pCurWin->winInfo.sessionWin; + SStreamStateCur* pCur = + pAggSup->stateStore.streamStateSessionSeekKeyNext(pAggSup->pState, &pNextWin->winInfo.sessionWin); + int32_t nextSize = pAggSup->resultRowSize; + code = pAggSup->stateStore.streamStateSessionGetKVByCur(pCur, &pNextWin->winInfo.sessionWin, + &pNextWin->winInfo.pOutputBuf, &nextSize); + if (code != TSDB_CODE_SUCCESS) { + SET_SESSION_WIN_INVALID(pNextWin->winInfo); + } else { + pNextWin->pStateKey = + (SStateKeys*)((char*)pNextWin->winInfo.pOutputBuf + (pAggSup->resultRowSize - pAggSup->stateKeySize)); + pNextWin->pStateKey->bytes = pAggSup->stateKeySize - sizeof(SStateKeys); + pNextWin->pStateKey->type = pAggSup->stateKeyType; + pNextWin->pStateKey->pData = (char*)pNextWin->pStateKey + sizeof(SStateKeys); + pNextWin->pStateKey->isNull = false; + pNextWin->winInfo.isOutput = true; + } + pAggSup->stateStore.streamStateFreeCur(pCur); +} + +int32_t updateStateWindowInfo(SStateWindowInfo* pWinInfo, SStateWindowInfo* pNextWin, TSKEY* pTs, uint64_t groupId, + SColumnInfoData* pKeyCol, int32_t rows, int32_t start, bool* allEqual, + SSHashObj* pResultRows, SSHashObj* pSeUpdated, SSHashObj* pSeDeleted) { + *allEqual = true; + for (int32_t i = start; i < rows; ++i) { + char* pKeyData = colDataGetData(pKeyCol, i); + if (!isTsInWindow(pWinInfo, pTs[i])) { + if (isEqualStateKey(pWinInfo, pKeyData)) { + if (IS_VALID_SESSION_WIN(pNextWin->winInfo)) { + // ts belongs to the next window + if (pTs[i] >= pNextWin->winInfo.sessionWin.win.skey) { + return i - start; + } + } + } else { + return i - start; + } + } + + if (pWinInfo->winInfo.sessionWin.win.skey > pTs[i]) { + if (pSeDeleted && pWinInfo->winInfo.isOutput) { + saveDeleteRes(pSeDeleted, pWinInfo->winInfo.sessionWin); + } + removeSessionResult(pSeUpdated, pResultRows, pWinInfo->winInfo.sessionWin); + pWinInfo->winInfo.sessionWin.win.skey = pTs[i]; + } + pWinInfo->winInfo.sessionWin.win.ekey = TMAX(pWinInfo->winInfo.sessionWin.win.ekey, pTs[i]); + if (!isEqualStateKey(pWinInfo, pKeyData)) { + *allEqual = false; + } + } + return rows - start; +} + +static void doStreamStateAggImpl(SOperatorInfo* pOperator, SSDataBlock* pSDataBlock, SSHashObj* pSeUpdated, + SSHashObj* pStDeleted) { + SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo; + SStorageAPI* pAPI = &pOperator->pTaskInfo->storageAPI; + + SStreamStateAggOperatorInfo* pInfo = pOperator->info; + int32_t numOfOutput = pOperator->exprSupp.numOfExprs; + uint64_t groupId = pSDataBlock->info.id.groupId; + int64_t code = TSDB_CODE_SUCCESS; + TSKEY* tsCols = NULL; + SResultRow* pResult = NULL; + int32_t winRows = 0; + SStreamAggSupporter* pAggSup = &pInfo->streamAggSup; + + pInfo->dataVersion = TMAX(pInfo->dataVersion, pSDataBlock->info.version); + pAggSup->winRange = pTaskInfo->streamInfo.fillHistoryWindow; + if (pAggSup->winRange.ekey <= 0) { + pAggSup->winRange.ekey = INT64_MAX; + } + + if (pSDataBlock->pDataBlock != NULL) { + SColumnInfoData* pColDataInfo = taosArrayGet(pSDataBlock->pDataBlock, pInfo->primaryTsIndex); + tsCols = (int64_t*)pColDataInfo->pData; + } else { + return; + } + + int32_t rows = pSDataBlock->info.rows; + blockDataEnsureCapacity(pAggSup->pScanBlock, rows); + SColumnInfoData* pKeyColInfo = taosArrayGet(pSDataBlock->pDataBlock, pInfo->stateCol.slotId); + for (int32_t i = 0; i < rows; i += winRows) { + if (pInfo->ignoreExpiredData && isOverdue(tsCols[i], &pInfo->twAggSup) || colDataIsNull_s(pKeyColInfo, i)) { + i++; + continue; + } + char* pKeyData = colDataGetData(pKeyColInfo, i); + int32_t winIndex = 0; + bool allEqual = true; + SStateWindowInfo curWin = {0}; + SStateWindowInfo nextWin = {0}; + setStateOutputBuf(pAggSup, tsCols[i], groupId, pKeyData, &curWin, &nextWin); + if (IS_VALID_SESSION_WIN(nextWin.winInfo)) { + releaseOutputBuf(pAggSup->pState, NULL, (SResultRow*)nextWin.winInfo.pOutputBuf, &pAPI->stateStore); + } + setSessionWinOutputInfo(pSeUpdated, &curWin.winInfo); + winRows = updateStateWindowInfo(&curWin, &nextWin, tsCols, groupId, pKeyColInfo, rows, i, &allEqual, + pAggSup->pResultRows, pSeUpdated, pStDeleted); + if (!allEqual) { + uint64_t uid = 0; + appendOneRowToStreamSpecialBlock(pAggSup->pScanBlock, &curWin.winInfo.sessionWin.win.skey, + &curWin.winInfo.sessionWin.win.ekey, &uid, &groupId, NULL); + tSimpleHashRemove(pSeUpdated, &curWin.winInfo.sessionWin, sizeof(SSessionKey)); + doDeleteSessionWindow(pAggSup, &curWin.winInfo.sessionWin); + releaseOutputBuf(pAggSup->pState, NULL, (SResultRow*)curWin.winInfo.pOutputBuf, &pAPI->stateStore); + continue; + } + code = doOneWindowAggImpl(&pInfo->twAggSup.timeWindowData, &curWin.winInfo, &pResult, i, winRows, rows, numOfOutput, + pOperator, 0); + if (code != TSDB_CODE_SUCCESS || pResult == NULL) { + T_LONG_JMP(pTaskInfo->env, TSDB_CODE_OUT_OF_MEMORY); + } + saveSessionOutputBuf(pAggSup, &curWin.winInfo); + + if (pInfo->twAggSup.calTrigger == STREAM_TRIGGER_AT_ONCE) { + code = saveResult(curWin.winInfo, pSeUpdated); + if (code != TSDB_CODE_SUCCESS) { + T_LONG_JMP(pTaskInfo->env, TSDB_CODE_OUT_OF_MEMORY); + } + } + + if (pInfo->twAggSup.calTrigger == STREAM_TRIGGER_WINDOW_CLOSE) { + SSessionKey key = {0}; + getSessionHashKey(&curWin.winInfo.sessionWin, &key); + tSimpleHashPut(pAggSup->pResultRows, &key, sizeof(SSessionKey), &curWin.winInfo, sizeof(SResultWindowInfo)); + } + } +} + +static SSDataBlock* buildStateResult(SOperatorInfo* pOperator) { + SStreamStateAggOperatorInfo* pInfo = pOperator->info; + SOptrBasicInfo* pBInfo = &pInfo->binfo; + SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo; + + doBuildDeleteDataBlock(pOperator, pInfo->pSeDeleted, pInfo->pDelRes, &pInfo->pDelIterator); + if (pInfo->pDelRes->info.rows > 0) { + printDataBlock(pInfo->pDelRes, getStreamOpName(pOperator->operatorType), GET_TASKID(pTaskInfo)); + return pInfo->pDelRes; + } + + doBuildSessionResult(pOperator, pInfo->streamAggSup.pState, &pInfo->groupResInfo, pBInfo->pRes); + if (pBInfo->pRes->info.rows > 0) { + printDataBlock(pBInfo->pRes, getStreamOpName(pOperator->operatorType), GET_TASKID(pTaskInfo)); + return pBInfo->pRes; + } + return NULL; +} + +static SSDataBlock* doStreamStateAgg(SOperatorInfo* pOperator) { + if (pOperator->status == OP_EXEC_DONE) { + return NULL; + } + + SExprSupp* pSup = &pOperator->exprSupp; + SStreamStateAggOperatorInfo* pInfo = pOperator->info; + SOptrBasicInfo* pBInfo = &pInfo->binfo; + SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo; + qDebug("===stream=== stream state agg"); + if (pOperator->status == OP_RES_TO_RETURN) { + SSDataBlock* resBlock = buildStateResult(pOperator); + if (resBlock != NULL) { + return resBlock; + } + + setOperatorCompleted(pOperator); + return NULL; + } + + SOperatorInfo* downstream = pOperator->pDownstream[0]; + if (!pInfo->pUpdated) { + pInfo->pUpdated = taosArrayInit(16, sizeof(SSessionKey)); + } + if (!pInfo->pSeUpdated) { + _hash_fn_t hashFn = taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY); + pInfo->pSeUpdated = tSimpleHashInit(64, hashFn); + } + while (1) { + SSDataBlock* pBlock = downstream->fpSet.getNextFn(downstream); + if (pBlock == NULL) { + break; + } + printDataBlock(pBlock, "single state recv", GET_TASKID(pTaskInfo)); + + if (pBlock->info.type == STREAM_DELETE_DATA || pBlock->info.type == STREAM_DELETE_RESULT || + pBlock->info.type == STREAM_CLEAR) { + SArray* pWins = taosArrayInit(16, sizeof(SSessionKey)); + doDeleteTimeWindows(&pInfo->streamAggSup, pBlock, pWins); + removeSessionResults(pInfo->pSeUpdated, pWins); + copyDeleteWindowInfo(pWins, pInfo->pSeDeleted); + taosArrayDestroy(pWins); + continue; + } else if (pBlock->info.type == STREAM_GET_ALL) { + getAllSessionWindow(pInfo->streamAggSup.pResultRows, pInfo->pSeUpdated); + continue; + } else if (pBlock->info.type == STREAM_CREATE_CHILD_TABLE) { + return pBlock; + } else { + ASSERTS(pBlock->info.type == STREAM_NORMAL || pBlock->info.type == STREAM_INVALID, "invalid SSDataBlock type"); + } + + if (pInfo->scalarSupp.pExprInfo != NULL) { + SExprSupp* pExprSup = &pInfo->scalarSupp; + projectApplyFunctions(pExprSup->pExprInfo, pBlock, pBlock, pExprSup->pCtx, pExprSup->numOfExprs, NULL); + } + // the pDataBlock are always the same one, no need to call this again + setInputDataBlock(pSup, pBlock, TSDB_ORDER_ASC, MAIN_SCAN, true); + doStreamStateAggImpl(pOperator, pBlock, pInfo->pSeUpdated, pInfo->pSeDeleted); + pInfo->twAggSup.maxTs = TMAX(pInfo->twAggSup.maxTs, pBlock->info.window.ekey); + } + // restore the value + pOperator->status = OP_RES_TO_RETURN; + + closeSessionWindow(pInfo->streamAggSup.pResultRows, &pInfo->twAggSup, pInfo->pSeUpdated); + copyUpdateResult(pInfo->pSeUpdated, pInfo->pUpdated); + removeSessionResults(pInfo->pSeDeleted, pInfo->pUpdated); + tSimpleHashCleanup(pInfo->pSeUpdated); + pInfo->pSeUpdated = NULL; + + if (pInfo->isHistoryOp) { + getMaxTsWins(pInfo->pUpdated, pInfo->historyWins); + } + + initGroupResInfoFromArrayList(&pInfo->groupResInfo, pInfo->pUpdated); + pInfo->pUpdated = NULL; + blockDataEnsureCapacity(pInfo->binfo.pRes, pOperator->resultInfo.capacity); + + SSDataBlock* resBlock = buildStateResult(pOperator); + if (resBlock != NULL) { + return resBlock; + } + setOperatorCompleted(pOperator); + return NULL; +} + +void streamStateReleaseState(SOperatorInfo* pOperator) { + SStreamStateAggOperatorInfo* pInfo = pOperator->info; + int32_t resSize = taosArrayGetSize(pInfo->historyWins) * sizeof(SSessionKey); + qDebug("===stream=== relase state. save result count:%d", (int32_t)taosArrayGetSize(pInfo->historyWins)); + pInfo->streamAggSup.stateStore.streamStateSaveInfo(pInfo->streamAggSup.pState, STREAM_STATE_OP_STATE_NAME, + strlen(STREAM_STATE_OP_STATE_NAME), pInfo->historyWins->pData, + resSize); + SOperatorInfo* downstream = pOperator->pDownstream[0]; + if (downstream->fpSet.releaseStreamStateFn) { + downstream->fpSet.releaseStreamStateFn(downstream); + } +} + +static void compactStateWindow(SOperatorInfo* pOperator, SResultWindowInfo* pCurWin, SResultWindowInfo* pNextWin, + SSHashObj* pStUpdated, SSHashObj* pStDeleted) { + SExprSupp* pSup = &pOperator->exprSupp; + SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo; + SStorageAPI* pAPI = &pOperator->pTaskInfo->storageAPI; + + SStreamStateAggOperatorInfo* pInfo = pOperator->info; + SResultRow* pCurResult = NULL; + int32_t numOfOutput = pOperator->exprSupp.numOfExprs; + SStreamAggSupporter* pAggSup = &pInfo->streamAggSup; + initSessionOutputBuf(pCurWin, &pCurResult, pSup->pCtx, numOfOutput, pSup->rowEntryInfoOffset); + SResultRow* pWinResult = NULL; + initSessionOutputBuf(pNextWin, &pWinResult, pAggSup->pDummyCtx, numOfOutput, pSup->rowEntryInfoOffset); + pCurWin->sessionWin.win.ekey = TMAX(pCurWin->sessionWin.win.ekey, pNextWin->sessionWin.win.ekey); + + updateTimeWindowInfo(&pInfo->twAggSup.timeWindowData, &pCurWin->sessionWin.win, 1); + compactFunctions(pSup->pCtx, pAggSup->pDummyCtx, numOfOutput, pTaskInfo, &pInfo->twAggSup.timeWindowData); + tSimpleHashRemove(pStUpdated, &pNextWin->sessionWin, sizeof(SSessionKey)); + if (pNextWin->isOutput && pStDeleted) { + qDebug("===stream=== save delete window info %" PRId64 ", %" PRIu64, pNextWin->sessionWin.win.skey, + pNextWin->sessionWin.groupId); + saveDeleteRes(pStDeleted, pNextWin->sessionWin); + } + removeSessionResult(pStUpdated, pAggSup->pResultRows, pNextWin->sessionWin); + doDeleteSessionWindow(pAggSup, &pNextWin->sessionWin); + taosMemoryFree(pNextWin->pOutputBuf); +} + +void streamStateReloadState(SOperatorInfo* pOperator) { + SStreamStateAggOperatorInfo* pInfo = pOperator->info; + SStreamAggSupporter* pAggSup = &pInfo->streamAggSup; + resetWinRange(&pAggSup->winRange); + + SSessionKey seKey = {.win.skey = INT64_MIN, .win.ekey = INT64_MIN, .groupId = 0}; + int32_t size = 0; + void* pBuf = NULL; + int32_t code = pAggSup->stateStore.streamStateGetInfo(pAggSup->pState, STREAM_STATE_OP_STATE_NAME, + strlen(STREAM_STATE_OP_STATE_NAME), &pBuf, &size); + int32_t num = size / sizeof(SSessionKey); + qDebug("===stream=== reload state. get result count:%d", num); + SSessionKey* pSeKeyBuf = (SSessionKey*)pBuf; + ASSERT(size == num * sizeof(SSessionKey)); + if (!pInfo->pSeUpdated && num > 0) { + _hash_fn_t hashFn = taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY); + pInfo->pSeUpdated = tSimpleHashInit(64, hashFn); + } + if (!pInfo->pSeDeleted && num > 0) { + _hash_fn_t hashFn = taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY); + pInfo->pSeDeleted = tSimpleHashInit(64, hashFn); + } + for (int32_t i = 0; i < num; i++) { + SStateWindowInfo curInfo = {0}; + SStateWindowInfo nextInfo = {0}; + SStateWindowInfo dummy = {0}; + qDebug("===stream=== reload state. try process result %" PRId64 ", %" PRIu64 ", index:%d", pSeKeyBuf[i].win.skey, + pSeKeyBuf[i].groupId, i); + setStateOutputBuf(pAggSup, pSeKeyBuf[i].win.skey, pSeKeyBuf[i].groupId, NULL, &curInfo, &nextInfo); + bool cpRes = compareWinStateKey(curInfo.pStateKey, nextInfo.pStateKey); + qDebug("===stream=== reload state. next window info %" PRId64 ", %" PRIu64 ", compare:%d", + nextInfo.winInfo.sessionWin.win.skey, nextInfo.winInfo.sessionWin.groupId, cpRes); + if (cpRes) { + compactStateWindow(pOperator, &curInfo.winInfo, &nextInfo.winInfo, pInfo->pSeUpdated, pInfo->pSeDeleted); + qDebug("===stream=== reload state. save result %" PRId64 ", %" PRIu64, curInfo.winInfo.sessionWin.win.skey, + curInfo.winInfo.sessionWin.groupId); + if (pInfo->twAggSup.calTrigger == STREAM_TRIGGER_AT_ONCE) { + saveResult(curInfo.winInfo, pInfo->pSeUpdated); + } else if (pInfo->twAggSup.calTrigger == STREAM_TRIGGER_WINDOW_CLOSE) { + if (!isCloseWindow(&curInfo.winInfo.sessionWin.win, &pInfo->twAggSup)) { + saveDeleteRes(pInfo->pSeDeleted, curInfo.winInfo.sessionWin); + } + SSessionKey key = {0}; + getSessionHashKey(&curInfo.winInfo.sessionWin, &key); + tSimpleHashPut(pAggSup->pResultRows, &key, sizeof(SSessionKey), &curInfo.winInfo, sizeof(SResultWindowInfo)); + } + } else if (IS_VALID_SESSION_WIN(nextInfo.winInfo)) { + releaseOutputBuf(pAggSup->pState, NULL, (SResultRow*)nextInfo.winInfo.pOutputBuf, + &pAggSup->pSessionAPI->stateStore); + } + + if (IS_VALID_SESSION_WIN(curInfo.winInfo)) { + saveSessionOutputBuf(pAggSup, &curInfo.winInfo); + } + } + taosMemoryFree(pBuf); + + SOperatorInfo* downstream = pOperator->pDownstream[0]; + if (downstream->fpSet.reloadStreamStateFn) { + downstream->fpSet.reloadStreamStateFn(downstream); + } +} + +SOperatorInfo* createStreamStateAggOperatorInfo(SOperatorInfo* downstream, SPhysiNode* pPhyNode, + SExecTaskInfo* pTaskInfo, SReadHandle* pHandle) { + SStreamStateWinodwPhysiNode* pStateNode = (SStreamStateWinodwPhysiNode*)pPhyNode; + int32_t tsSlotId = ((SColumnNode*)pStateNode->window.pTspk)->slotId; + SColumnNode* pColNode = (SColumnNode*)(pStateNode->pStateKey); + int32_t code = TSDB_CODE_SUCCESS; + + SStreamStateAggOperatorInfo* pInfo = taosMemoryCalloc(1, sizeof(SStreamStateAggOperatorInfo)); + SOperatorInfo* pOperator = taosMemoryCalloc(1, sizeof(SOperatorInfo)); + if (pInfo == NULL || pOperator == NULL) { + code = TSDB_CODE_OUT_OF_MEMORY; + goto _error; + } + + pInfo->stateCol = extractColumnFromColumnNode(pColNode); + initResultSizeInfo(&pOperator->resultInfo, 4096); + if (pStateNode->window.pExprs != NULL) { + int32_t numOfScalar = 0; + SExprInfo* pScalarExprInfo = createExprInfo(pStateNode->window.pExprs, NULL, &numOfScalar); + code = initExprSupp(&pInfo->scalarSupp, pScalarExprInfo, numOfScalar, &pTaskInfo->storageAPI.functionStore); + if (code != TSDB_CODE_SUCCESS) { + goto _error; + } + } + + pInfo->twAggSup = (STimeWindowAggSupp){ + .waterMark = pStateNode->window.watermark, + .calTrigger = pStateNode->window.triggerType, + .maxTs = INT64_MIN, + .minTs = INT64_MAX, + }; + + initExecTimeWindowInfo(&pInfo->twAggSup.timeWindowData, &pTaskInfo->window); + + SExprSupp* pSup = &pOperator->exprSupp; + int32_t numOfCols = 0; + SExprInfo* pExprInfo = createExprInfo(pStateNode->window.pFuncs, NULL, &numOfCols); + SSDataBlock* pResBlock = createDataBlockFromDescNode(pPhyNode->pOutputDataBlockDesc); + code = initBasicInfoEx(&pInfo->binfo, pSup, pExprInfo, numOfCols, pResBlock, &pTaskInfo->storageAPI.functionStore); + if (code != TSDB_CODE_SUCCESS) { + goto _error; + } + int32_t keySize = sizeof(SStateKeys) + pColNode->node.resType.bytes; + int16_t type = pColNode->node.resType.type; + code = initStreamAggSupporter(&pInfo->streamAggSup, pSup->pCtx, numOfCols, 0, pTaskInfo->streamInfo.pState, keySize, + type, &pTaskInfo->storageAPI.stateStore, pHandle, &pTaskInfo->storageAPI); + if (code != TSDB_CODE_SUCCESS) { + goto _error; + } + + pInfo->primaryTsIndex = tsSlotId; + _hash_fn_t hashFn = taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY); + pInfo->pSeDeleted = tSimpleHashInit(64, hashFn); + pInfo->pDelIterator = NULL; + pInfo->pDelRes = createSpecialDataBlock(STREAM_DELETE_RESULT); + pInfo->pChildren = NULL; + pInfo->ignoreExpiredData = pStateNode->window.igExpired; + pInfo->ignoreExpiredDataSaved = false; + pInfo->pUpdated = NULL; + pInfo->pSeUpdated = NULL; + pInfo->dataVersion = 0; + pInfo->historyWins = taosArrayInit(4, sizeof(SSessionKey)); + if (!pInfo->historyWins) { + goto _error; + } + if (pHandle) { + pInfo->isHistoryOp = pHandle->fillHistory; + } + + setOperatorInfo(pOperator, "StreamStateAggOperator", QUERY_NODE_PHYSICAL_PLAN_STREAM_STATE, true, OP_NOT_OPENED, + pInfo, pTaskInfo); + pOperator->fpSet = createOperatorFpSet(optrDummyOpenFn, doStreamStateAgg, NULL, destroyStreamStateOperatorInfo, + optrDefaultBufFn, NULL, optrDefaultGetNextExtFn, NULL); + setOperatorStreamStateFn(pOperator, streamStateReleaseState, streamStateReloadState); + initDownStream(downstream, &pInfo->streamAggSup, pOperator->operatorType, pInfo->primaryTsIndex, &pInfo->twAggSup); + code = appendDownstream(pOperator, &downstream, 1); + if (code != TSDB_CODE_SUCCESS) { + goto _error; + } + return pOperator; + +_error: + destroyStreamStateOperatorInfo(pInfo); + taosMemoryFreeClear(pOperator); + pTaskInfo->code = code; + return NULL; +} + +static void cleanupAfterGroupResultGen(SMergeAlignedIntervalAggOperatorInfo* pMiaInfo, SSDataBlock* pRes) { + pRes->info.id.groupId = pMiaInfo->groupId; + pMiaInfo->curTs = INT64_MIN; + pMiaInfo->groupId = 0; +} + +static void setInverFunction(SqlFunctionCtx* pCtx, int32_t num, EStreamType type) { + for (int i = 0; i < num; i++) { + if (type == STREAM_INVERT) { + fmSetInvertFunc(pCtx[i].functionId, &(pCtx[i].fpSet)); + } else if (type == STREAM_NORMAL) { + fmSetNormalFunc(pCtx[i].functionId, &(pCtx[i].fpSet)); + } + } +} + +static SSDataBlock* doStreamIntervalAgg(SOperatorInfo* pOperator) { + SStreamIntervalOperatorInfo* pInfo = pOperator->info; + SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo; + SStorageAPI* pAPI = &pOperator->pTaskInfo->storageAPI; + SExprSupp* pSup = &pOperator->exprSupp; + + qDebug("stask:%s %s status: %d", GET_TASKID(pTaskInfo), getStreamOpName(pOperator->operatorType), pOperator->status); + + if (pOperator->status == OP_EXEC_DONE) { + return NULL; + } + + if (pOperator->status == OP_RES_TO_RETURN) { + SSDataBlock* resBlock = buildIntervalResult(pOperator); + if (resBlock != NULL) { + return resBlock; + } + + if (pInfo->recvGetAll) { + pInfo->recvGetAll = false; + resetUnCloseWinInfo(pInfo->aggSup.pResultRowHashTable); + } + + setOperatorCompleted(pOperator); + if (pInfo->twAggSup.maxTs > 0 && + pInfo->twAggSup.maxTs - pInfo->twAggSup.checkPointInterval > pInfo->twAggSup.checkPointTs) { + pAPI->stateStore.streamStateCommit(pInfo->pState); + pAPI->stateStore.streamStateDeleteCheckPoint(pInfo->pState, pInfo->twAggSup.maxTs - pInfo->twAggSup.deleteMark); + setStreamDataVersion(pTaskInfo, pInfo->dataVersion, pInfo->pState->checkPointId); + pInfo->twAggSup.checkPointTs = pInfo->twAggSup.maxTs; + } + return NULL; + } + + SOperatorInfo* downstream = pOperator->pDownstream[0]; + + if (!pInfo->pUpdated) { + pInfo->pUpdated = taosArrayInit(4096, POINTER_BYTES); + } + + if (!pInfo->pUpdatedMap) { + _hash_fn_t hashFn = taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY); + pInfo->pUpdatedMap = tSimpleHashInit(4096, hashFn); + } + + while (1) { + SSDataBlock* pBlock = downstream->fpSet.getNextFn(downstream); + if (pBlock == NULL) { + qDebug("===stream===return data:%s. recv datablock num:%" PRIu64, getStreamOpName(pOperator->operatorType), + pInfo->numOfDatapack); + pInfo->numOfDatapack = 0; + break; + } + + pInfo->numOfDatapack++; + printSpecDataBlock(pBlock, getStreamOpName(pOperator->operatorType), "recv", GET_TASKID(pTaskInfo)); + + if (pBlock->info.type == STREAM_DELETE_DATA || pBlock->info.type == STREAM_DELETE_RESULT || + pBlock->info.type == STREAM_CLEAR) { + doDeleteWindows(pOperator, &pInfo->interval, pBlock, pInfo->pDelWins, pInfo->pUpdatedMap); + continue; + } else if (pBlock->info.type == STREAM_GET_ALL) { + qDebug("===stream===%s recv|block type STREAM_GET_ALL", getStreamOpName(pOperator->operatorType)); + pInfo->recvGetAll = true; + getAllIntervalWindow(pInfo->aggSup.pResultRowHashTable, pInfo->pUpdatedMap); + continue; + } else if (pBlock->info.type == STREAM_CREATE_CHILD_TABLE) { + printDataBlock(pBlock, getStreamOpName(pOperator->operatorType), GET_TASKID(pTaskInfo)); + return pBlock; + } else { + ASSERTS(pBlock->info.type == STREAM_NORMAL || pBlock->info.type == STREAM_INVALID, "invalid SSDataBlock type"); + } + + if (pBlock->info.type == STREAM_NORMAL && pBlock->info.version != 0) { + // set input version + pTaskInfo->version = pBlock->info.version; + } + + if (pInfo->scalarSupp.pExprInfo != NULL) { + SExprSupp* pExprSup = &pInfo->scalarSupp; + projectApplyFunctions(pExprSup->pExprInfo, pBlock, pBlock, pExprSup->pCtx, pExprSup->numOfExprs, NULL); + } + + // The timewindow that overlaps the timestamps of the input pBlock need to be recalculated and return to the + // caller. Note that all the time window are not close till now. + // the pDataBlock are always the same one, no need to call this again + setInputDataBlock(pSup, pBlock, TSDB_ORDER_ASC, MAIN_SCAN, true); + if (pInfo->invertible) { + setInverFunction(pSup->pCtx, pOperator->exprSupp.numOfExprs, pBlock->info.type); + } + + doStreamIntervalAggImpl(pOperator, pBlock, pBlock->info.id.groupId, pInfo->pUpdatedMap); + pInfo->twAggSup.maxTs = TMAX(pInfo->twAggSup.maxTs, pBlock->info.window.ekey); + pInfo->twAggSup.minTs = TMIN(pInfo->twAggSup.minTs, pBlock->info.window.skey); + } + pOperator->status = OP_RES_TO_RETURN; + removeDeleteResults(pInfo->pUpdatedMap, pInfo->pDelWins); + closeStreamIntervalWindow(pInfo->aggSup.pResultRowHashTable, &pInfo->twAggSup, &pInfo->interval, NULL, + pInfo->pUpdatedMap, pInfo->pDelWins, pOperator); + + void* pIte = NULL; + int32_t iter = 0; + while ((pIte = tSimpleHashIterate(pInfo->pUpdatedMap, pIte, &iter)) != NULL) { + taosArrayPush(pInfo->pUpdated, pIte); + } + taosArraySort(pInfo->pUpdated, winPosCmprImpl); + + initMultiResInfoFromArrayList(&pInfo->groupResInfo, pInfo->pUpdated); + pInfo->pUpdated = NULL; + blockDataEnsureCapacity(pInfo->binfo.pRes, pOperator->resultInfo.capacity); + tSimpleHashCleanup(pInfo->pUpdatedMap); + pInfo->pUpdatedMap = NULL; + + return buildIntervalResult(pOperator); +} + +SOperatorInfo* createStreamIntervalOperatorInfo(SOperatorInfo* downstream, SPhysiNode* pPhyNode, + SExecTaskInfo* pTaskInfo) { + SStreamIntervalOperatorInfo* pInfo = taosMemoryCalloc(1, sizeof(SStreamIntervalOperatorInfo)); + SOperatorInfo* pOperator = taosMemoryCalloc(1, sizeof(SOperatorInfo)); + if (pInfo == NULL || pOperator == NULL) { + goto _error; + } + SStreamIntervalPhysiNode* pIntervalPhyNode = (SStreamIntervalPhysiNode*)pPhyNode; + + int32_t code = TSDB_CODE_SUCCESS; + int32_t numOfCols = 0; + SExprInfo* pExprInfo = createExprInfo(pIntervalPhyNode->window.pFuncs, NULL, &numOfCols); + + SSDataBlock* pResBlock = createDataBlockFromDescNode(pPhyNode->pOutputDataBlockDesc); + pInfo->interval = (SInterval){ + .interval = pIntervalPhyNode->interval, + .sliding = pIntervalPhyNode->sliding, + .intervalUnit = pIntervalPhyNode->intervalUnit, + .slidingUnit = pIntervalPhyNode->slidingUnit, + .offset = pIntervalPhyNode->offset, + .precision = ((SColumnNode*)pIntervalPhyNode->window.pTspk)->node.resType.precision, + }; + + pInfo->twAggSup = (STimeWindowAggSupp){ + .waterMark = pIntervalPhyNode->window.watermark, + .calTrigger = pIntervalPhyNode->window.triggerType, + .maxTs = INT64_MIN, + .minTs = INT64_MAX, + .deleteMark = getDeleteMark(pIntervalPhyNode), + .checkPointTs = 0, + .checkPointInterval = + convertTimePrecision(tsCheckpointInterval, TSDB_TIME_PRECISION_MILLI, pInfo->interval.precision), + }; + + ASSERTS(pInfo->twAggSup.calTrigger != STREAM_TRIGGER_MAX_DELAY, "trigger type should not be max delay"); + + pOperator->pTaskInfo = pTaskInfo; + SStorageAPI* pAPI = &pOperator->pTaskInfo->storageAPI; + + pInfo->ignoreExpiredData = pIntervalPhyNode->window.igExpired; + pInfo->ignoreExpiredDataSaved = false; + + SExprSupp* pSup = &pOperator->exprSupp; + initBasicInfo(&pInfo->binfo, pResBlock); + initExecTimeWindowInfo(&pInfo->twAggSup.timeWindowData, &pTaskInfo->window); + + pInfo->primaryTsIndex = ((SColumnNode*)pIntervalPhyNode->window.pTspk)->slotId; + initResultSizeInfo(&pOperator->resultInfo, 4096); + + pInfo->pState = taosMemoryCalloc(1, sizeof(SStreamState)); + *(pInfo->pState) = *(pTaskInfo->streamInfo.pState); + pAPI->stateStore.streamStateSetNumber(pInfo->pState, -1); + + size_t keyBufSize = sizeof(int64_t) + sizeof(int64_t) + POINTER_BYTES; + code = initAggSup(pSup, &pInfo->aggSup, pExprInfo, numOfCols, keyBufSize, pTaskInfo->id.str, pInfo->pState, + &pTaskInfo->storageAPI.functionStore); + if (code != TSDB_CODE_SUCCESS) { + goto _error; + } + + if (pIntervalPhyNode->window.pExprs != NULL) { + int32_t numOfScalar = 0; + SExprInfo* pScalarExprInfo = createExprInfo(pIntervalPhyNode->window.pExprs, NULL, &numOfScalar); + code = initExprSupp(&pInfo->scalarSupp, pScalarExprInfo, numOfScalar, &pTaskInfo->storageAPI.functionStore); + if (code != TSDB_CODE_SUCCESS) { + goto _error; + } + } + + pInfo->invertible = false; + pInfo->pDelWins = taosArrayInit(4, sizeof(SWinKey)); + pInfo->delIndex = 0; + pInfo->pDelRes = createSpecialDataBlock(STREAM_DELETE_RESULT); + initResultRowInfo(&pInfo->binfo.resultRowInfo); + + pInfo->pPhyNode = NULL; // create new child + pInfo->pPullDataMap = NULL; + pInfo->pFinalPullDataMap = NULL; + pInfo->pPullWins = NULL; // SPullWindowInfo + pInfo->pullIndex = 0; + pInfo->pPullDataRes = NULL; + pInfo->numOfChild = 0; + pInfo->delKey.ts = INT64_MAX; + pInfo->delKey.groupId = 0; + pInfo->numOfDatapack = 0; + pInfo->pUpdated = NULL; + pInfo->pUpdatedMap = NULL; + int32_t funResSize = getMaxFunResSize(pSup, numOfCols); + + pInfo->pState->pFileState = pTaskInfo->storageAPI.stateStore.streamFileStateInit( + tsStreamBufferSize, sizeof(SWinKey), pInfo->aggSup.resultRowSize, funResSize, compareTs, pInfo->pState, + pInfo->twAggSup.deleteMark, GET_TASKID(pTaskInfo)); + + setOperatorInfo(pOperator, "StreamIntervalOperator", QUERY_NODE_PHYSICAL_PLAN_STREAM_INTERVAL, true, OP_NOT_OPENED, + pInfo, pTaskInfo); + pOperator->fpSet = createOperatorFpSet(optrDummyOpenFn, doStreamIntervalAgg, NULL, + destroyStreamFinalIntervalOperatorInfo, optrDefaultBufFn, NULL, optrDefaultGetNextExtFn, NULL); + setOperatorStreamStateFn(pOperator, streamIntervalReleaseState, streamIntervalReloadState); + + pInfo->statestore = pTaskInfo->storageAPI.stateStore; + pInfo->recvGetAll = false; + + initIntervalDownStream(downstream, pPhyNode->type, pInfo); + code = appendDownstream(pOperator, &downstream, 1); + if (code != TSDB_CODE_SUCCESS) { + goto _error; + } + + return pOperator; + +_error: + destroyStreamFinalIntervalOperatorInfo(pInfo); + taosMemoryFreeClear(pOperator); + pTaskInfo->code = code; + return NULL; +} diff --git a/source/libs/executor/src/timesliceoperator.c b/source/libs/executor/src/timesliceoperator.c index 72e4e97322..abaebb1543 100644 --- a/source/libs/executor/src/timesliceoperator.c +++ b/source/libs/executor/src/timesliceoperator.c @@ -848,6 +848,10 @@ static void doHandleTimeslice(SOperatorInfo* pOperator, SSDataBlock* pBlock) { bool ignoreNull = getIgoreNullRes(pSup); int32_t order = TSDB_ORDER_ASC; + if (checkWindowBoundReached(pSliceInfo)) { + return; + } + int32_t code = initKeeperInfo(pSliceInfo, pBlock, &pOperator->exprSupp); if (code != TSDB_CODE_SUCCESS) { T_LONG_JMP(pTaskInfo->env, code); diff --git a/source/libs/executor/src/timewindowoperator.c b/source/libs/executor/src/timewindowoperator.c index 95f715a873..bd6352d719 100644 --- a/source/libs/executor/src/timewindowoperator.c +++ b/source/libs/executor/src/timewindowoperator.c @@ -26,17 +26,6 @@ #include "tlog.h" #include "ttime.h" -#define IS_FINAL_OP(op) ((op)->isFinal) -#define DEAULT_DELETE_MARK (1000LL * 60LL * 60LL * 24LL * 365LL * 10LL); -#define STREAM_INTERVAL_OP_STATE_NAME "StreamIntervalHistoryState" -#define STREAM_SESSION_OP_STATE_NAME "StreamSessionHistoryState" -#define STREAM_STATE_OP_STATE_NAME "StreamStateHistoryState" - -typedef struct SStateWindowInfo { - SResultWindowInfo winInfo; - SStateKeys* pStateKey; -} SStateWindowInfo; - typedef struct SSessionAggOperatorInfo { SOptrBasicInfo binfo; SAggSupporter aggSup; @@ -66,11 +55,6 @@ typedef enum SResultTsInterpType { RESULT_ROW_END_INTERP = 2, } SResultTsInterpType; -typedef struct SPullWindowInfo { - STimeWindow window; - uint64_t groupId; - STimeWindow calWin; -} SPullWindowInfo; typedef struct SOpenWindowInfo { SResultRowPosition pos; @@ -83,8 +67,6 @@ static SResultRowPosition addToOpenWindowList(SResultRowInfo* pResultRowInfo, co uint64_t groupId); static void doCloseWindow(SResultRowInfo* pResultRowInfo, const SIntervalAggOperatorInfo* pInfo, SResultRow* pResult); -static TSKEY getStartTsKey(STimeWindow* win, const TSKEY* tsCols) { return tsCols == NULL ? win->skey : tsCols[0]; } - static int32_t setTimeWindowOutputBuf(SResultRowInfo* pResultRowInfo, STimeWindow* win, bool masterscan, SResultRow** pResult, int64_t tableGroupId, SqlFunctionCtx* pCtx, int32_t numOfOutput, int32_t* rowEntryInfoOffset, SAggSupporter* pAggSup, @@ -106,15 +88,6 @@ static int32_t setTimeWindowOutputBuf(SResultRowInfo* pResultRowInfo, STimeWindo return TSDB_CODE_SUCCESS; } -static void updateTimeWindowInfo(SColumnInfoData* pColData, STimeWindow* pWin, int64_t delta) { - int64_t* ts = (int64_t*)pColData->pData; - - int64_t duration = pWin->ekey - pWin->skey + delta; - ts[2] = duration; // set the duration - ts[3] = pWin->skey; // window start key - ts[4] = pWin->ekey + delta; // window end key -} - static void doKeepTuple(SWindowRowsSup* pRowSup, int64_t ts, uint64_t groupId) { pRowSup->win.ekey = ts; pRowSup->prevTs = ts; @@ -426,7 +399,7 @@ bool inSlidingWindow(SInterval* pInterval, STimeWindow* pWin, SDataBlockInfo* pB return inCalSlidingWindow(pInterval, pWin, pBlockInfo->calWin.skey, pBlockInfo->calWin.ekey, pBlockInfo->type); } -static int32_t getNextQualifiedWindow(SInterval* pInterval, STimeWindow* pNext, SDataBlockInfo* pDataBlockInfo, +int32_t getNextQualifiedWindow(SInterval* pInterval, STimeWindow* pNext, SDataBlockInfo* pDataBlockInfo, TSKEY* primaryKeys, int32_t prevPosition, int32_t order) { bool ascQuery = (order == TSDB_ORDER_ASC); @@ -658,224 +631,6 @@ static void doInterpUnclosedTimeWindow(SOperatorInfo* pOperatorInfo, int32_t num } } -typedef int32_t (*__compare_fn_t)(void* pKey, void* data, int32_t index); - -int32_t binarySearchCom(void* keyList, int num, void* pKey, int order, __compare_fn_t comparefn) { - int firstPos = 0, lastPos = num - 1, midPos = -1; - int numOfRows = 0; - - if (num <= 0) return -1; - if (order == TSDB_ORDER_DESC) { - // find the first position which is smaller or equal than the key - while (1) { - if (comparefn(pKey, keyList, lastPos) >= 0) return lastPos; - if (comparefn(pKey, keyList, firstPos) == 0) return firstPos; - if (comparefn(pKey, keyList, firstPos) < 0) return firstPos - 1; - - numOfRows = lastPos - firstPos + 1; - midPos = (numOfRows >> 1) + firstPos; - - if (comparefn(pKey, keyList, midPos) < 0) { - lastPos = midPos - 1; - } else if (comparefn(pKey, keyList, midPos) > 0) { - firstPos = midPos + 1; - } else { - break; - } - } - - } else { - // find the first position which is bigger or equal than the key - while (1) { - if (comparefn(pKey, keyList, firstPos) <= 0) return firstPos; - if (comparefn(pKey, keyList, lastPos) == 0) return lastPos; - - if (comparefn(pKey, keyList, lastPos) > 0) { - lastPos = lastPos + 1; - if (lastPos >= num) - return -1; - else - return lastPos; - } - - numOfRows = lastPos - firstPos + 1; - midPos = (numOfRows >> 1) + firstPos; - - if (comparefn(pKey, keyList, midPos) < 0) { - lastPos = midPos - 1; - } else if (comparefn(pKey, keyList, midPos) > 0) { - firstPos = midPos + 1; - } else { - break; - } - } - } - - return midPos; -} - -typedef int64_t (*__get_value_fn_t)(void* data, int32_t index); - -int32_t binarySearch(void* keyList, int num, TSKEY key, int order, __get_value_fn_t getValuefn) { - int firstPos = 0, lastPos = num - 1, midPos = -1; - int numOfRows = 0; - - if (num <= 0) return -1; - if (order == TSDB_ORDER_DESC) { - // find the first position which is smaller or equal than the key - while (1) { - if (key >= getValuefn(keyList, lastPos)) return lastPos; - if (key == getValuefn(keyList, firstPos)) return firstPos; - if (key < getValuefn(keyList, firstPos)) return firstPos - 1; - - numOfRows = lastPos - firstPos + 1; - midPos = (numOfRows >> 1) + firstPos; - - if (key < getValuefn(keyList, midPos)) { - lastPos = midPos - 1; - } else if (key > getValuefn(keyList, midPos)) { - firstPos = midPos + 1; - } else { - break; - } - } - - } else { - // find the first position which is bigger or equal than the key - while (1) { - if (key <= getValuefn(keyList, firstPos)) return firstPos; - if (key == getValuefn(keyList, lastPos)) return lastPos; - - if (key > getValuefn(keyList, lastPos)) { - lastPos = lastPos + 1; - if (lastPos >= num) - return -1; - else - return lastPos; - } - - numOfRows = lastPos - firstPos + 1; - midPos = (numOfRows >> 1) + firstPos; - - if (key < getValuefn(keyList, midPos)) { - lastPos = midPos - 1; - } else if (key > getValuefn(keyList, midPos)) { - firstPos = midPos + 1; - } else { - break; - } - } - } - - return midPos; -} - -int32_t comparePullWinKey(void* pKey, void* data, int32_t index) { - SArray* res = (SArray*)data; - SPullWindowInfo* pos = taosArrayGet(res, index); - SPullWindowInfo* pData = (SPullWindowInfo*)pKey; - if (pData->groupId > pos->groupId) { - return 1; - } else if (pData->groupId < pos->groupId) { - return -1; - } - - if (pData->window.skey > pos->window.ekey) { - return 1; - } else if (pData->window.ekey < pos->window.skey) { - return -1; - } - return 0; -} - -static int32_t savePullWindow(SPullWindowInfo* pPullInfo, SArray* pPullWins) { - int32_t size = taosArrayGetSize(pPullWins); - int32_t index = binarySearchCom(pPullWins, size, pPullInfo, TSDB_ORDER_DESC, comparePullWinKey); - if (index == -1) { - index = 0; - } else { - int32_t code = comparePullWinKey(pPullInfo, pPullWins, index); - if (code == 0) { - SPullWindowInfo* pos = taosArrayGet(pPullWins, index); - pos->window.skey = TMIN(pos->window.skey, pPullInfo->window.skey); - pos->window.ekey = TMAX(pos->window.ekey, pPullInfo->window.ekey); - pos->calWin.skey = TMIN(pos->calWin.skey, pPullInfo->calWin.skey); - pos->calWin.ekey = TMAX(pos->calWin.ekey, pPullInfo->calWin.ekey); - return TSDB_CODE_SUCCESS; - } else if (code > 0) { - index++; - } - } - if (taosArrayInsert(pPullWins, index, pPullInfo) == NULL) { - return TSDB_CODE_OUT_OF_MEMORY; - } - return TSDB_CODE_SUCCESS; -} - -static int32_t saveResult(SResultWindowInfo winInfo, SSHashObj* pStUpdated) { - winInfo.sessionWin.win.ekey = winInfo.sessionWin.win.skey; - return tSimpleHashPut(pStUpdated, &winInfo.sessionWin, sizeof(SSessionKey), &winInfo, sizeof(SResultWindowInfo)); -} - -static int32_t saveWinResult(SWinKey* pKey, SRowBuffPos* pPos, SSHashObj* pUpdatedMap) { - tSimpleHashPut(pUpdatedMap, pKey, sizeof(SWinKey), &pPos, POINTER_BYTES); - return TSDB_CODE_SUCCESS; -} - -static int32_t saveWinResultInfo(TSKEY ts, uint64_t groupId, SRowBuffPos* pPos, SSHashObj* pUpdatedMap) { - SWinKey key = {.ts = ts, .groupId = groupId}; - saveWinResult(&key, pPos, pUpdatedMap); - return TSDB_CODE_SUCCESS; -} - -static void removeResults(SArray* pWins, SSHashObj* pUpdatedMap) { - int32_t size = taosArrayGetSize(pWins); - for (int32_t i = 0; i < size; i++) { - SWinKey* pW = taosArrayGet(pWins, i); - void* tmp = tSimpleHashGet(pUpdatedMap, pW, sizeof(SWinKey)); - if (tmp) { - void* value = *(void**)tmp; - taosMemoryFree(value); - tSimpleHashRemove(pUpdatedMap, pW, sizeof(SWinKey)); - } - } -} - -int32_t compareWinKey(void* pKey, void* data, int32_t index) { - void* pDataPos = taosArrayGet((SArray*)data, index); - return winKeyCmprImpl(pKey, pDataPos); -} - -static void removeDeleteResults(SSHashObj* pUpdatedMap, SArray* pDelWins) { - taosArraySort(pDelWins, winKeyCmprImpl); - taosArrayRemoveDuplicate(pDelWins, winKeyCmprImpl, NULL); - int32_t delSize = taosArrayGetSize(pDelWins); - if (tSimpleHashGetSize(pUpdatedMap) == 0 || delSize == 0) { - return; - } - void* pIte = NULL; - int32_t iter = 0; - while ((pIte = tSimpleHashIterate(pUpdatedMap, pIte, &iter)) != NULL) { - SWinKey* pResKey = tSimpleHashGetKey(pIte, NULL); - int32_t index = binarySearchCom(pDelWins, delSize, pResKey, TSDB_ORDER_DESC, compareWinKey); - if (index >= 0 && 0 == compareWinKey(pResKey, pDelWins, index)) { - taosArrayRemove(pDelWins, index); - delSize = taosArrayGetSize(pDelWins); - } - } -} - -bool isOverdue(TSKEY ekey, STimeWindowAggSupp* pTwSup) { - ASSERTS(pTwSup->maxTs == INT64_MIN || pTwSup->maxTs > 0, "maxts should greater than 0"); - return pTwSup->maxTs != INT64_MIN && ekey < pTwSup->maxTs - pTwSup->waterMark; -} - -bool isCloseWindow(STimeWindow* pWin, STimeWindowAggSupp* pTwSup) { return isOverdue(pWin->ekey, pTwSup); } - -bool needDeleteWindowBuf(STimeWindow* pWin, STimeWindowAggSupp* pTwSup) { - return pTwSup->maxTs != INT64_MIN && pWin->ekey < pTwSup->maxTs - pTwSup->deleteMark; -} - static bool tsKeyCompFn(void* l, void* r, void* param) { TSKEY* lTS = (TSKEY*)l; TSKEY* rTS = (TSKEY*)r; @@ -1127,18 +882,6 @@ static int32_t doOpenIntervalAgg(SOperatorInfo* pOperator) { return TSDB_CODE_SUCCESS; } -static bool compareVal(const char* v, const SStateKeys* pKey) { - if (IS_VAR_DATA_TYPE(pKey->type)) { - if (varDataLen(v) != varDataLen(pKey->pData)) { - return false; - } else { - return memcmp(varDataVal(v), varDataVal(pKey->pData), varDataLen(v)) == 0; - } - } else { - return memcmp(pKey->pData, v, pKey->bytes) == 0; - } -} - static void doStateWindowAggImpl(SOperatorInfo* pOperator, SStateWindowOperatorInfo* pInfo, SSDataBlock* pBlock) { SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo; SExprSupp* pSup = &pOperator->exprSupp; @@ -1373,169 +1116,6 @@ static void doClearWindowImpl(SResultRowPosition* p1, SDiskbasedBuf* pResultBuf, releaseBufPage(pResultBuf, bufPage); } -static bool doDeleteWindow(SOperatorInfo* pOperator, TSKEY ts, uint64_t groupId) { - SStorageAPI* pAPI = &pOperator->pTaskInfo->storageAPI; - - SStreamIntervalOperatorInfo* pInfo = pOperator->info; - SWinKey key = {.ts = ts, .groupId = groupId}; - tSimpleHashRemove(pInfo->aggSup.pResultRowHashTable, &key, sizeof(SWinKey)); - pAPI->stateStore.streamStateDel(pInfo->pState, &key); - return true; -} - -static int32_t getChildIndex(SSDataBlock* pBlock) { return pBlock->info.childId; } - -static void doDeleteWindows(SOperatorInfo* pOperator, SInterval* pInterval, SSDataBlock* pBlock, SArray* pUpWins, - SSHashObj* pUpdatedMap) { - SStreamIntervalOperatorInfo* pInfo = pOperator->info; - SColumnInfoData* pStartTsCol = taosArrayGet(pBlock->pDataBlock, START_TS_COLUMN_INDEX); - TSKEY* startTsCols = (TSKEY*)pStartTsCol->pData; - SColumnInfoData* pEndTsCol = taosArrayGet(pBlock->pDataBlock, END_TS_COLUMN_INDEX); - TSKEY* endTsCols = (TSKEY*)pEndTsCol->pData; - SColumnInfoData* pCalStTsCol = taosArrayGet(pBlock->pDataBlock, CALCULATE_START_TS_COLUMN_INDEX); - TSKEY* calStTsCols = (TSKEY*)pCalStTsCol->pData; - SColumnInfoData* pCalEnTsCol = taosArrayGet(pBlock->pDataBlock, CALCULATE_END_TS_COLUMN_INDEX); - TSKEY* calEnTsCols = (TSKEY*)pCalEnTsCol->pData; - SColumnInfoData* pGpCol = taosArrayGet(pBlock->pDataBlock, GROUPID_COLUMN_INDEX); - uint64_t* pGpDatas = (uint64_t*)pGpCol->pData; - for (int32_t i = 0; i < pBlock->info.rows; i++) { - SResultRowInfo dumyInfo = {0}; - dumyInfo.cur.pageId = -1; - - STimeWindow win = {0}; - if (IS_FINAL_OP(pInfo)) { - win.skey = startTsCols[i]; - win.ekey = endTsCols[i]; - } else { - win = getActiveTimeWindow(NULL, &dumyInfo, startTsCols[i], pInterval, TSDB_ORDER_ASC); - } - - do { - if (!inCalSlidingWindow(pInterval, &win, calStTsCols[i], calEnTsCols[i], pBlock->info.type)) { - getNextTimeWindow(pInterval, &win, TSDB_ORDER_ASC); - continue; - } - uint64_t winGpId = pGpDatas[i]; - SWinKey winRes = {.ts = win.skey, .groupId = winGpId}; - void* chIds = taosHashGet(pInfo->pPullDataMap, &winRes, sizeof(SWinKey)); - if (chIds) { - int32_t childId = getChildIndex(pBlock); - SArray* chArray = *(void**)chIds; - int32_t index = taosArraySearchIdx(chArray, &childId, compareInt32Val, TD_EQ); - if (index != -1) { - qDebug("===stream===try push delete window%" PRId64 "chId:%d ,continue", win.skey, childId); - getNextTimeWindow(pInterval, &win, TSDB_ORDER_ASC); - continue; - } - } - bool res = doDeleteWindow(pOperator, win.skey, winGpId); - if (pUpWins && res) { - taosArrayPush(pUpWins, &winRes); - } - if (pUpdatedMap) { - tSimpleHashRemove(pUpdatedMap, &winRes, sizeof(SWinKey)); - } - getNextTimeWindow(pInterval, &win, TSDB_ORDER_ASC); - } while (win.ekey <= endTsCols[i]); - } -} - -static int32_t getAllIntervalWindow(SSHashObj* pHashMap, SSHashObj* resWins) { - void* pIte = NULL; - int32_t iter = 0; - while ((pIte = tSimpleHashIterate(pHashMap, pIte, &iter)) != NULL) { - SWinKey* pKey = tSimpleHashGetKey(pIte, NULL); - uint64_t groupId = pKey->groupId; - TSKEY ts = pKey->ts; - int32_t code = saveWinResultInfo(ts, groupId, *(SRowBuffPos**)pIte, resWins); - if (code != TSDB_CODE_SUCCESS) { - return code; - } - } - return TSDB_CODE_SUCCESS; -} - -static int32_t closeStreamIntervalWindow(SSHashObj* pHashMap, STimeWindowAggSupp* pTwSup, SInterval* pInterval, - SHashObj* pPullDataMap, SSHashObj* closeWins, SArray* pDelWins, - SOperatorInfo* pOperator) { - qDebug("===stream===close interval window"); - void* pIte = NULL; - int32_t iter = 0; - SStreamIntervalOperatorInfo* pInfo = pOperator->info; - int32_t delSize = taosArrayGetSize(pDelWins); - while ((pIte = tSimpleHashIterate(pHashMap, pIte, &iter)) != NULL) { - void* key = tSimpleHashGetKey(pIte, NULL); - SWinKey* pWinKey = (SWinKey*)key; - if (delSize > 0) { - int32_t index = binarySearchCom(pDelWins, delSize, pWinKey, TSDB_ORDER_DESC, compareWinKey); - if (index >= 0 && 0 == compareWinKey(pWinKey, pDelWins, index)) { - taosArrayRemove(pDelWins, index); - delSize = taosArrayGetSize(pDelWins); - } - } - - void* chIds = taosHashGet(pPullDataMap, pWinKey, sizeof(SWinKey)); - STimeWindow win = { - .skey = pWinKey->ts, - .ekey = taosTimeAdd(win.skey, pInterval->interval, pInterval->intervalUnit, pInterval->precision) - 1, - }; - if (isCloseWindow(&win, pTwSup)) { - if (chIds && pPullDataMap) { - SArray* chAy = *(SArray**)chIds; - int32_t size = taosArrayGetSize(chAy); - qDebug("===stream===window %" PRId64 " wait child size:%d", pWinKey->ts, size); - for (int32_t i = 0; i < size; i++) { - qDebug("===stream===window %" PRId64 " wait child id:%d", pWinKey->ts, *(int32_t*)taosArrayGet(chAy, i)); - } - continue; - } else if (pPullDataMap) { - qDebug("===stream===close window %" PRId64, pWinKey->ts); - } - - if (pTwSup->calTrigger == STREAM_TRIGGER_WINDOW_CLOSE) { - int32_t code = saveWinResult(pWinKey, *(SRowBuffPos**)pIte, closeWins); - if (code != TSDB_CODE_SUCCESS) { - return code; - } - } - tSimpleHashIterateRemove(pHashMap, pWinKey, sizeof(SWinKey), &pIte, &iter); - } - } - return TSDB_CODE_SUCCESS; -} - -STimeWindow getFinalTimeWindow(int64_t ts, SInterval* pInterval) { - STimeWindow w = {.skey = ts, .ekey = INT64_MAX}; - w.ekey = taosTimeAdd(w.skey, pInterval->interval, pInterval->intervalUnit, pInterval->precision) - 1; - return w; -} - -static void doBuildDeleteResult(SStreamIntervalOperatorInfo* pInfo, SArray* pWins, int32_t* index, - SSDataBlock* pBlock) { - blockDataCleanup(pBlock); - int32_t size = taosArrayGetSize(pWins); - if (*index == size) { - *index = 0; - taosArrayClear(pWins); - return; - } - blockDataEnsureCapacity(pBlock, size - *index); - uint64_t uid = 0; - for (int32_t i = *index; i < size; i++) { - SWinKey* pWin = taosArrayGet(pWins, i); - void* tbname = NULL; - pInfo->statestore.streamStateGetParName(pInfo->pState, pWin->groupId, &tbname); - if (tbname == NULL) { - appendOneRowToStreamSpecialBlock(pBlock, &pWin->ts, &pWin->ts, &uid, &pWin->groupId, NULL); - } else { - char parTbName[VARSTR_HEADER_SIZE + TSDB_TABLE_NAME_LEN]; - STR_WITH_MAXSIZE_TO_VARSTR(parTbName, tbname, sizeof(parTbName)); - appendOneRowToStreamSpecialBlock(pBlock, &pWin->ts, &pWin->ts, &uid, &pWin->groupId, parTbName); - } - pInfo->statestore.streamStateFreeVal(tbname); - (*index)++; - } -} static void destroyStateWindowOperatorInfo(void* param) { SStateWindowOperatorInfo* pInfo = (SStateWindowOperatorInfo*)param; @@ -1573,40 +1153,6 @@ void destroyIntervalOperatorInfo(void* param) { taosMemoryFreeClear(param); } -void destroyStreamFinalIntervalOperatorInfo(void* param) { - SStreamIntervalOperatorInfo* pInfo = (SStreamIntervalOperatorInfo*)param; - cleanupBasicInfo(&pInfo->binfo); - cleanupAggSup(&pInfo->aggSup); - // it should be empty. - void* pIte = NULL; - while ((pIte = taosHashIterate(pInfo->pPullDataMap, pIte)) != NULL) { - taosArrayDestroy(*(void**)pIte); - } - taosHashCleanup(pInfo->pPullDataMap); - taosHashCleanup(pInfo->pFinalPullDataMap); - taosArrayDestroy(pInfo->pPullWins); - blockDataDestroy(pInfo->pPullDataRes); - taosArrayDestroy(pInfo->pDelWins); - blockDataDestroy(pInfo->pDelRes); - pInfo->statestore.streamFileStateDestroy(pInfo->pState->pFileState); - taosMemoryFreeClear(pInfo->pState); - - nodesDestroyNode((SNode*)pInfo->pPhyNode); - colDataDestroy(&pInfo->twAggSup.timeWindowData); - pInfo->groupResInfo.pRows = taosArrayDestroy(pInfo->groupResInfo.pRows); - cleanupExprSupp(&pInfo->scalarSupp); - - taosMemoryFreeClear(param); -} - -static bool allInvertible(SqlFunctionCtx* pFCtx, int32_t numOfCols) { - for (int32_t i = 0; i < numOfCols; i++) { - if (fmIsUserDefinedFunc(pFCtx[i].functionId) || !fmIsInvertible(pFCtx[i].functionId)) { - return false; - } - } - return true; -} static bool timeWindowinterpNeeded(SqlFunctionCtx* pCtx, int32_t numOfCols, SIntervalAggOperatorInfo* pInfo) { // the primary timestamp column @@ -1662,25 +1208,6 @@ static bool timeWindowinterpNeeded(SqlFunctionCtx* pCtx, int32_t numOfCols, SInt return needed; } -void initIntervalDownStream(SOperatorInfo* downstream, uint16_t type, SStreamIntervalOperatorInfo* pInfo) { - SStateStore* pAPI = &downstream->pTaskInfo->storageAPI.stateStore; - - if (downstream->operatorType != QUERY_NODE_PHYSICAL_PLAN_STREAM_SCAN) { - initIntervalDownStream(downstream->pDownstream[0], type, pInfo); - return; - } - - SStreamScanInfo* pScanInfo = downstream->info; - pScanInfo->windowSup.parentType = type; - pScanInfo->windowSup.pIntervalAggSup = &pInfo->aggSup; - if (!pScanInfo->pUpdateInfo) { - pScanInfo->pUpdateInfo = pAPI->updateInfoInitP(&pInfo->interval, pInfo->twAggSup.waterMark); - } - - pScanInfo->interval = pInfo->interval; - pScanInfo->twAggSup = pInfo->twAggSup; - pScanInfo->pState = pInfo->pState; -} void initStreamFunciton(SqlFunctionCtx* pCtx, int32_t numOfExpr) { for (int32_t i = 0; i < numOfExpr; i++) { @@ -2088,2540 +1615,6 @@ _error: return NULL; } -void compactFunctions(SqlFunctionCtx* pDestCtx, SqlFunctionCtx* pSourceCtx, int32_t numOfOutput, - SExecTaskInfo* pTaskInfo, SColumnInfoData* pTimeWindowData) { - for (int32_t k = 0; k < numOfOutput; ++k) { - if (fmIsWindowPseudoColumnFunc(pDestCtx[k].functionId)) { - if (!pTimeWindowData) { - continue; - } - - SResultRowEntryInfo* pEntryInfo = GET_RES_INFO(&pDestCtx[k]); - char* p = GET_ROWCELL_INTERBUF(pEntryInfo); - SColumnInfoData idata = {0}; - idata.info.type = TSDB_DATA_TYPE_BIGINT; - idata.info.bytes = tDataTypes[TSDB_DATA_TYPE_BIGINT].bytes; - idata.pData = p; - - SScalarParam out = {.columnData = &idata}; - SScalarParam tw = {.numOfRows = 5, .columnData = pTimeWindowData}; - pDestCtx[k].sfp.process(&tw, 1, &out); - pEntryInfo->numOfRes = 1; - } else if (functionNeedToExecute(&pDestCtx[k]) && pDestCtx[k].fpSet.combine != NULL) { - int32_t code = pDestCtx[k].fpSet.combine(&pDestCtx[k], &pSourceCtx[k]); - if (code != TSDB_CODE_SUCCESS) { - qError("%s apply combine functions error, code: %s", GET_TASKID(pTaskInfo), tstrerror(code)); - } - } else if (pDestCtx[k].fpSet.combine == NULL) { - char* funName = fmGetFuncName(pDestCtx[k].functionId); - qError("%s error, combine funcion for %s is not implemented", GET_TASKID(pTaskInfo), funName); - taosMemoryFreeClear(funName); - } - } -} - -bool hasIntervalWindow(void* pState, SWinKey* pKey, SStateStore* pStore) { return pStore->streamStateCheck(pState, pKey); } - -int32_t setIntervalOutputBuf(void* pState, STimeWindow* win, SRowBuffPos** pResult, int64_t groupId, - SqlFunctionCtx* pCtx, int32_t numOfOutput, int32_t* rowEntryInfoOffset, - SAggSupporter* pAggSup, SStateStore* pStore) { - - SWinKey key = { .ts = win->skey, .groupId = groupId }; - char* value = NULL; - int32_t size = pAggSup->resultRowSize; - - if (pStore->streamStateAddIfNotExist(pState, &key, (void**)&value, &size) < 0) { - return TSDB_CODE_OUT_OF_MEMORY; - } - - *pResult = (SRowBuffPos*)value; - SResultRow* res = (SResultRow*)((*pResult)->pRowBuff); - - // set time window for current result - res->win = (*win); - setResultRowInitCtx(res, pCtx, numOfOutput, rowEntryInfoOffset); - return TSDB_CODE_SUCCESS; -} - -bool isDeletedStreamWindow(STimeWindow* pWin, uint64_t groupId, void* pState, STimeWindowAggSupp* pTwSup, SStateStore* pStore) { - if (pTwSup->maxTs != INT64_MIN && pWin->ekey < pTwSup->maxTs - pTwSup->deleteMark) { - SWinKey key = {.ts = pWin->skey, .groupId = groupId}; - if (!hasIntervalWindow(pState, &key, pStore)) { - return true; - } - return false; - } - return false; -} - -int32_t getNexWindowPos(SInterval* pInterval, SDataBlockInfo* pBlockInfo, TSKEY* tsCols, int32_t startPos, TSKEY eKey, - STimeWindow* pNextWin) { - int32_t forwardRows = - getNumOfRowsInTimeWindow(pBlockInfo, tsCols, startPos, eKey, binarySearchForKey, NULL, TSDB_ORDER_ASC); - int32_t prevEndPos = forwardRows - 1 + startPos; - return getNextQualifiedWindow(pInterval, pNextWin, pBlockInfo, tsCols, prevEndPos, TSDB_ORDER_ASC); -} - -void addPullWindow(SHashObj* pMap, SWinKey* pWinRes, int32_t size) { - SArray* childIds = taosArrayInit(8, sizeof(int32_t)); - for (int32_t i = 0; i < size; i++) { - taosArrayPush(childIds, &i); - } - taosHashPut(pMap, pWinRes, sizeof(SWinKey), &childIds, sizeof(void*)); -} - -static void clearStreamIntervalOperator(SStreamIntervalOperatorInfo* pInfo) { - tSimpleHashClear(pInfo->aggSup.pResultRowHashTable); - clearDiskbasedBuf(pInfo->aggSup.pResultBuf); - initResultRowInfo(&pInfo->binfo.resultRowInfo); - pInfo->aggSup.currentPageId = -1; - pInfo->statestore.streamStateClear(pInfo->pState); -} - -static void clearSpecialDataBlock(SSDataBlock* pBlock) { - if (pBlock->info.rows <= 0) { - return; - } - blockDataCleanup(pBlock); -} - -static void doBuildPullDataBlock(SArray* array, int32_t* pIndex, SSDataBlock* pBlock) { - clearSpecialDataBlock(pBlock); - int32_t size = taosArrayGetSize(array); - if (size - (*pIndex) == 0) { - return; - } - blockDataEnsureCapacity(pBlock, size - (*pIndex)); - SColumnInfoData* pStartTs = (SColumnInfoData*)taosArrayGet(pBlock->pDataBlock, START_TS_COLUMN_INDEX); - SColumnInfoData* pEndTs = (SColumnInfoData*)taosArrayGet(pBlock->pDataBlock, END_TS_COLUMN_INDEX); - SColumnInfoData* pGroupId = (SColumnInfoData*)taosArrayGet(pBlock->pDataBlock, GROUPID_COLUMN_INDEX); - SColumnInfoData* pCalStartTs = (SColumnInfoData*)taosArrayGet(pBlock->pDataBlock, CALCULATE_START_TS_COLUMN_INDEX); - SColumnInfoData* pCalEndTs = (SColumnInfoData*)taosArrayGet(pBlock->pDataBlock, CALCULATE_END_TS_COLUMN_INDEX); - for (; (*pIndex) < size; (*pIndex)++) { - SPullWindowInfo* pWin = taosArrayGet(array, (*pIndex)); - colDataSetVal(pStartTs, pBlock->info.rows, (const char*)&pWin->window.skey, false); - colDataSetVal(pEndTs, pBlock->info.rows, (const char*)&pWin->window.ekey, false); - colDataSetVal(pGroupId, pBlock->info.rows, (const char*)&pWin->groupId, false); - colDataSetVal(pCalStartTs, pBlock->info.rows, (const char*)&pWin->calWin.skey, false); - colDataSetVal(pCalEndTs, pBlock->info.rows, (const char*)&pWin->calWin.ekey, false); - pBlock->info.rows++; - } - if ((*pIndex) == size) { - *pIndex = 0; - taosArrayClear(array); - } - blockDataUpdateTsWindow(pBlock, 0); -} - -void processPullOver(SSDataBlock* pBlock, SHashObj* pMap, SHashObj* pFinalMap, SInterval* pInterval, SArray* pPullWins, int32_t numOfCh, SOperatorInfo* pOperator) { - SColumnInfoData* pStartCol = taosArrayGet(pBlock->pDataBlock, CALCULATE_START_TS_COLUMN_INDEX); - TSKEY* tsData = (TSKEY*)pStartCol->pData; - SColumnInfoData* pEndCol = taosArrayGet(pBlock->pDataBlock, CALCULATE_END_TS_COLUMN_INDEX); - TSKEY* tsEndData = (TSKEY*)pEndCol->pData; - SColumnInfoData* pGroupCol = taosArrayGet(pBlock->pDataBlock, GROUPID_COLUMN_INDEX); - uint64_t* groupIdData = (uint64_t*)pGroupCol->pData; - int32_t chId = getChildIndex(pBlock); - for (int32_t i = 0; i < pBlock->info.rows; i++) { - TSKEY winTs = tsData[i]; - while (winTs <= tsEndData[i]) { - SWinKey winRes = {.ts = winTs, .groupId = groupIdData[i]}; - void* chIds = taosHashGet(pMap, &winRes, sizeof(SWinKey)); - if (chIds) { - SArray* chArray = *(SArray**)chIds; - int32_t index = taosArraySearchIdx(chArray, &chId, compareInt32Val, TD_EQ); - if (index != -1) { - qDebug("===stream===retrive window %" PRId64 " delete child id %d", winRes.ts, chId); - taosArrayRemove(chArray, index); - if (taosArrayGetSize(chArray) == 0) { - // pull data is over - taosArrayDestroy(chArray); - taosHashRemove(pMap, &winRes, sizeof(SWinKey)); - qDebug("===stream===retrive pull data over.window %" PRId64 , winRes.ts); - - void* pFinalCh = taosHashGet(pFinalMap, &winRes, sizeof(SWinKey)); - if (pFinalCh) { - taosHashRemove(pFinalMap, &winRes, sizeof(SWinKey)); - doDeleteWindow(pOperator, winRes.ts, winRes.groupId); - STimeWindow nextWin = getFinalTimeWindow(winRes.ts, pInterval); - SPullWindowInfo pull = {.window = nextWin, - .groupId = winRes.groupId, - .calWin.skey = nextWin.skey, - .calWin.ekey = nextWin.skey}; - // add pull data request - if (savePullWindow(&pull, pPullWins) == TSDB_CODE_SUCCESS) { - addPullWindow(pMap, &winRes, numOfCh); - qDebug("===stream===prepare final retrive for delete %" PRId64 ", size:%d", winRes.ts, numOfCh); - } - } - } - } - } - winTs = taosTimeAdd(winTs, pInterval->sliding, pInterval->slidingUnit, pInterval->precision); - } - } -} - -static void addRetriveWindow(SArray* wins, SStreamIntervalOperatorInfo* pInfo, int32_t childId) { - int32_t size = taosArrayGetSize(wins); - for (int32_t i = 0; i < size; i++) { - SWinKey* winKey = taosArrayGet(wins, i); - STimeWindow nextWin = getFinalTimeWindow(winKey->ts, &pInfo->interval); - if (isOverdue(nextWin.ekey, &pInfo->twAggSup) && pInfo->ignoreExpiredData) { - continue; - } - void* chIds = taosHashGet(pInfo->pPullDataMap, winKey, sizeof(SWinKey)); - if (!chIds) { - SPullWindowInfo pull = { - .window = nextWin, .groupId = winKey->groupId, .calWin.skey = nextWin.skey, .calWin.ekey = nextWin.skey}; - // add pull data request - if (savePullWindow(&pull, pInfo->pPullWins) == TSDB_CODE_SUCCESS) { - addPullWindow(pInfo->pPullDataMap, winKey, pInfo->numOfChild); - qDebug("===stream===prepare retrive for delete %" PRId64 ", size:%d", winKey->ts, pInfo->numOfChild); - } - } else { - SArray* chArray = *(void**)chIds; - int32_t index = taosArraySearchIdx(chArray, &childId, compareInt32Val, TD_EQ); - qDebug("===stream===check final retrive %" PRId64",chid:%d", winKey->ts, index); - if (index == -1) { - qDebug("===stream===add final retrive %" PRId64, winKey->ts); - taosHashPut(pInfo->pFinalPullDataMap, winKey, sizeof(SWinKey), NULL, 0); - } - } - } -} - -static void clearFunctionContext(SExprSupp* pSup) { - for (int32_t i = 0; i < pSup->numOfExprs; i++) { - pSup->pCtx[i].saveHandle.currentPage = -1; - } -} - -int32_t getOutputBuf(void* pState, SRowBuffPos* pPos, SResultRow** pResult, SStateStore* pStore) { - return pStore->streamStateGetByPos(pState, pPos, (void**)pResult); -} - -int32_t buildDataBlockFromGroupRes(SOperatorInfo* pOperator, void* pState, SSDataBlock* pBlock, SExprSupp* pSup, - SGroupResInfo* pGroupResInfo) { - SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo; - SStorageAPI* pAPI = &pOperator->pTaskInfo->storageAPI; - - SExprInfo* pExprInfo = pSup->pExprInfo; - int32_t numOfExprs = pSup->numOfExprs; - int32_t* rowEntryOffset = pSup->rowEntryInfoOffset; - SqlFunctionCtx* pCtx = pSup->pCtx; - - int32_t numOfRows = getNumOfTotalRes(pGroupResInfo); - - for (int32_t i = pGroupResInfo->index; i < numOfRows; i += 1) { - SRowBuffPos* pPos = *(SRowBuffPos**)taosArrayGet(pGroupResInfo->pRows, i); - SResultRow* pRow = NULL; - int32_t code = getOutputBuf(pState, pPos, &pRow, &pAPI->stateStore); - uint64_t groupId = ((SWinKey*)pPos->pKey)->groupId; - ASSERT(code == 0); - doUpdateNumOfRows(pCtx, pRow, numOfExprs, rowEntryOffset); - // no results, continue to check the next one - if (pRow->numOfRows == 0) { - pGroupResInfo->index += 1; - continue; - } - if (pBlock->info.id.groupId == 0) { - pBlock->info.id.groupId = groupId; - void* tbname = NULL; - if (pAPI->stateStore.streamStateGetParName(pTaskInfo->streamInfo.pState, pBlock->info.id.groupId, &tbname) < 0) { - pBlock->info.parTbName[0] = 0; - } else { - memcpy(pBlock->info.parTbName, tbname, TSDB_TABLE_NAME_LEN); - } - pAPI->stateStore.streamStateFreeVal(tbname); - } else { - // current value belongs to different group, it can't be packed into one datablock - if (pBlock->info.id.groupId != groupId) { - break; - } - } - - if (pBlock->info.rows + pRow->numOfRows > pBlock->info.capacity) { - ASSERT(pBlock->info.rows > 0); - break; - } - pGroupResInfo->index += 1; - - for (int32_t j = 0; j < numOfExprs; ++j) { - int32_t slotId = pExprInfo[j].base.resSchema.slotId; - - pCtx[j].resultInfo = getResultEntryInfo(pRow, j, rowEntryOffset); - SResultRowEntryInfo* pEnryInfo = pCtx[j].resultInfo; - - if (pCtx[j].fpSet.finalize) { - int32_t code1 = pCtx[j].fpSet.finalize(&pCtx[j], pBlock); - if (TAOS_FAILED(code1)) { - qError("%s build result data block error, code %s", GET_TASKID(pTaskInfo), tstrerror(code1)); - T_LONG_JMP(pTaskInfo->env, code1); - } - } else if (strcmp(pCtx[j].pExpr->pExpr->_function.functionName, "_select_value") == 0) { - // do nothing, todo refactor - } else { - // expand the result into multiple rows. E.g., _wstart, top(k, 20) - // the _wstart needs to copy to 20 following rows, since the results of top-k expands to 20 different rows. - SColumnInfoData* pColInfoData = taosArrayGet(pBlock->pDataBlock, slotId); - char* in = GET_ROWCELL_INTERBUF(pCtx[j].resultInfo); - for (int32_t k = 0; k < pRow->numOfRows; ++k) { - colDataSetVal(pColInfoData, pBlock->info.rows + k, in, pCtx[j].resultInfo->isNullRes); - } - } - } - - pBlock->info.rows += pRow->numOfRows; - } - - pBlock->info.dataLoad = 1; - blockDataUpdateTsWindow(pBlock, 0); - return TSDB_CODE_SUCCESS; -} - -void doBuildStreamIntervalResult(SOperatorInfo* pOperator, void* pState, SSDataBlock* pBlock, - SGroupResInfo* pGroupResInfo) { - SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo; - // set output datablock version - pBlock->info.version = pTaskInfo->version; - - blockDataCleanup(pBlock); - if (!hasRemainResults(pGroupResInfo)) { - return; - } - - // clear the existed group id - pBlock->info.id.groupId = 0; - buildDataBlockFromGroupRes(pOperator, pState, pBlock, &pOperator->exprSupp, pGroupResInfo); -} - -static int32_t getNextQualifiedFinalWindow(SInterval* pInterval, STimeWindow* pNext, SDataBlockInfo* pDataBlockInfo, - TSKEY* primaryKeys, int32_t prevPosition) { - int32_t startPos = prevPosition + 1; - if (startPos == pDataBlockInfo->rows) { - startPos = -1; - } else { - *pNext = getFinalTimeWindow(primaryKeys[startPos], pInterval); - } - return startPos; -} - -static void setStreamDataVersion(SExecTaskInfo* pTaskInfo, int64_t version, int64_t ckId) { - pTaskInfo->streamInfo.dataVersion = version; - pTaskInfo->streamInfo.checkPointId = ckId; -} - -static void doStreamIntervalAggImpl(SOperatorInfo* pOperatorInfo, SSDataBlock* pSDataBlock, uint64_t groupId, - SSHashObj* pUpdatedMap) { - SStreamIntervalOperatorInfo* pInfo = (SStreamIntervalOperatorInfo*)pOperatorInfo->info; - pInfo->dataVersion = TMAX(pInfo->dataVersion, pSDataBlock->info.version); - - SResultRowInfo* pResultRowInfo = &(pInfo->binfo.resultRowInfo); - SExecTaskInfo* pTaskInfo = pOperatorInfo->pTaskInfo; - SExprSupp* pSup = &pOperatorInfo->exprSupp; - int32_t numOfOutput = pSup->numOfExprs; - int32_t step = 1; - TSKEY* tsCols = NULL; - SRowBuffPos* pResPos = NULL; - SResultRow* pResult = NULL; - int32_t forwardRows = 0; - - SColumnInfoData* pColDataInfo = taosArrayGet(pSDataBlock->pDataBlock, pInfo->primaryTsIndex); - tsCols = (int64_t*)pColDataInfo->pData; - - int32_t startPos = 0; - TSKEY ts = getStartTsKey(&pSDataBlock->info.window, tsCols); - STimeWindow nextWin = {0}; - if (IS_FINAL_OP(pInfo)) { - nextWin = getFinalTimeWindow(ts, &pInfo->interval); - } else { - nextWin = getActiveTimeWindow(pInfo->aggSup.pResultBuf, pResultRowInfo, ts, &pInfo->interval, TSDB_ORDER_ASC); - } - while (1) { - bool isClosed = isCloseWindow(&nextWin, &pInfo->twAggSup); - if ((pInfo->ignoreExpiredData && isClosed && !IS_FINAL_OP(pInfo)) || !inSlidingWindow(&pInfo->interval, &nextWin, &pSDataBlock->info)) { - startPos = getNexWindowPos(&pInfo->interval, &pSDataBlock->info, tsCols, startPos, nextWin.ekey, &nextWin); - if (startPos < 0) { - break; - } - continue; - } - - if (IS_FINAL_OP(pInfo) && pInfo->numOfChild > 0) { - bool ignore = true; - SWinKey winRes = { - .ts = nextWin.skey, - .groupId = groupId, - }; - void* chIds = taosHashGet(pInfo->pPullDataMap, &winRes, sizeof(SWinKey)); - if (isDeletedStreamWindow(&nextWin, groupId, pInfo->pState, &pInfo->twAggSup, &pInfo->statestore) && isClosed && !chIds) { - SPullWindowInfo pull = { - .window = nextWin, .groupId = groupId, .calWin.skey = nextWin.skey, .calWin.ekey = nextWin.skey}; - // add pull data request - if (savePullWindow(&pull, pInfo->pPullWins) == TSDB_CODE_SUCCESS) { - addPullWindow(pInfo->pPullDataMap, &winRes, pInfo->numOfChild); - } - } else { - int32_t index = -1; - SArray* chArray = NULL; - int32_t chId = 0; - if (chIds) { - chArray = *(void**)chIds; - chId = getChildIndex(pSDataBlock); - index = taosArraySearchIdx(chArray, &chId, compareInt32Val, TD_EQ); - } - if (index == -1 || pSDataBlock->info.type == STREAM_PULL_DATA) { - ignore = false; - } - } - - if (ignore) { - startPos = getNextQualifiedFinalWindow(&pInfo->interval, &nextWin, &pSDataBlock->info, tsCols, startPos); - if (startPos < 0) { - break; - } - continue; - } - } - - int32_t code = setIntervalOutputBuf(pInfo->pState, &nextWin, &pResPos, groupId, pSup->pCtx, numOfOutput, - pSup->rowEntryInfoOffset, &pInfo->aggSup, &pInfo->statestore); - pResult = (SResultRow*)pResPos->pRowBuff; - if (code != TSDB_CODE_SUCCESS || pResult == NULL) { - T_LONG_JMP(pTaskInfo->env, TSDB_CODE_OUT_OF_MEMORY); - } - if (IS_FINAL_OP(pInfo)) { - forwardRows = 1; - } else { - forwardRows = getNumOfRowsInTimeWindow(&pSDataBlock->info, tsCols, startPos, nextWin.ekey, binarySearchForKey, - NULL, TSDB_ORDER_ASC); - } - - SWinKey key = { - .ts = pResult->win.skey, - .groupId = groupId, - }; - if (pInfo->twAggSup.calTrigger == STREAM_TRIGGER_AT_ONCE && pUpdatedMap) { - saveWinResult(&key, pResPos, pUpdatedMap); - } - - if (pInfo->twAggSup.calTrigger == STREAM_TRIGGER_WINDOW_CLOSE) { - tSimpleHashPut(pInfo->aggSup.pResultRowHashTable, &key, sizeof(SWinKey), &pResPos, POINTER_BYTES); - } - - updateTimeWindowInfo(&pInfo->twAggSup.timeWindowData, &nextWin, 1); - applyAggFunctionOnPartialTuples(pTaskInfo, pSup->pCtx, &pInfo->twAggSup.timeWindowData, startPos, forwardRows, - pSDataBlock->info.rows, numOfOutput); - key.ts = nextWin.skey; - - if (pInfo->delKey.ts > key.ts) { - pInfo->delKey = key; - } - int32_t prevEndPos = (forwardRows - 1) * step + startPos; - if (pSDataBlock->info.window.skey <= 0 || pSDataBlock->info.window.ekey <= 0) { - qError("table uid %" PRIu64 " data block timestamp range may not be calculated! minKey %" PRId64 - ",maxKey %" PRId64, - pSDataBlock->info.id.uid, pSDataBlock->info.window.skey, pSDataBlock->info.window.ekey); - blockDataUpdateTsWindow(pSDataBlock, 0); - - // timestamp of the data is incorrect - if (pSDataBlock->info.window.skey <= 0 || pSDataBlock->info.window.ekey <= 0) { - qError("table uid %" PRIu64 " data block timestamp is out of range! minKey %" PRId64 ",maxKey %" PRId64, - pSDataBlock->info.id.uid, pSDataBlock->info.window.skey, pSDataBlock->info.window.ekey); - } - } - - if (IS_FINAL_OP(pInfo)) { - startPos = getNextQualifiedFinalWindow(&pInfo->interval, &nextWin, &pSDataBlock->info, tsCols, prevEndPos); - } else { - startPos = - getNextQualifiedWindow(&pInfo->interval, &nextWin, &pSDataBlock->info, tsCols, prevEndPos, TSDB_ORDER_ASC); - } - if (startPos < 0) { - break; - } - } -} - -static inline int winPosCmprImpl(const void* pKey1, const void* pKey2) { - SRowBuffPos* pos1 = *(SRowBuffPos**)pKey1; - SRowBuffPos* pos2 = *(SRowBuffPos**)pKey2; - SWinKey* pWin1 = (SWinKey*)pos1->pKey; - SWinKey* pWin2 = (SWinKey*)pos2->pKey; - - if (pWin1->groupId > pWin2->groupId) { - return 1; - } else if (pWin1->groupId < pWin2->groupId) { - return -1; - } - - if (pWin1->ts > pWin2->ts) { - return 1; - } else if (pWin1->ts < pWin2->ts) { - return -1; - } - - return 0; -} - -static void resetUnCloseWinInfo(SSHashObj* winMap) { - void* pIte = NULL; - int32_t iter = 0; - while ((pIte = tSimpleHashIterate(winMap, pIte, &iter)) != NULL) { - SRowBuffPos* pPos = *(SRowBuffPos**)pIte; - pPos->beUsed = true; - } -} - -static SSDataBlock* doStreamFinalIntervalAgg(SOperatorInfo* pOperator) { - SStreamIntervalOperatorInfo* pInfo = pOperator->info; - SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo; - SStorageAPI* pAPI = &pOperator->pTaskInfo->storageAPI; - - SOperatorInfo* downstream = pOperator->pDownstream[0]; - SExprSupp* pSup = &pOperator->exprSupp; - - qDebug("interval status %d %s", pOperator->status, IS_FINAL_OP(pInfo) ? "interval final" : "interval semi"); - - if (pOperator->status == OP_EXEC_DONE) { - return NULL; - } else if (pOperator->status == OP_RES_TO_RETURN) { - doBuildPullDataBlock(pInfo->pPullWins, &pInfo->pullIndex, pInfo->pPullDataRes); - if (pInfo->pPullDataRes->info.rows != 0) { - // process the rest of the data - printDataBlock(pInfo->pPullDataRes, IS_FINAL_OP(pInfo) ? "interval final" : "interval semi"); - return pInfo->pPullDataRes; - } - - doBuildDeleteResult(pInfo, pInfo->pDelWins, &pInfo->delIndex, pInfo->pDelRes); - if (pInfo->pDelRes->info.rows != 0) { - // process the rest of the data - printDataBlock(pInfo->pDelRes, IS_FINAL_OP(pInfo) ? "interval final" : "interval semi"); - return pInfo->pDelRes; - } - - doBuildStreamIntervalResult(pOperator, pInfo->pState, pInfo->binfo.pRes, &pInfo->groupResInfo); - if (pInfo->binfo.pRes->info.rows != 0) { - printDataBlock(pInfo->binfo.pRes, IS_FINAL_OP(pInfo) ? "interval final" : "interval semi"); - return pInfo->binfo.pRes; - } - - if (pInfo->recvGetAll) { - pInfo->recvGetAll = false; - resetUnCloseWinInfo(pInfo->aggSup.pResultRowHashTable); - } - - setOperatorCompleted(pOperator); - if (!IS_FINAL_OP(pInfo)) { - clearFunctionContext(&pOperator->exprSupp); - // semi interval operator clear disk buffer - clearStreamIntervalOperator(pInfo); - setStreamDataVersion(pTaskInfo, pInfo->dataVersion, pInfo->pState->checkPointId); - qDebug("===stream===clear semi operator"); - } else { - if (pInfo->twAggSup.maxTs > 0 && - pInfo->twAggSup.maxTs - pInfo->twAggSup.checkPointInterval > pInfo->twAggSup.checkPointTs) { - pAPI->stateStore.streamStateCommit(pInfo->pState); - pAPI->stateStore.streamStateDeleteCheckPoint(pInfo->pState, pInfo->twAggSup.maxTs - pInfo->twAggSup.deleteMark); - pInfo->twAggSup.checkPointTs = pInfo->twAggSup.maxTs; - } - qDebug("===stream===interval final close"); - } - return NULL; - } else { - if (!IS_FINAL_OP(pInfo)) { - doBuildDeleteResult(pInfo, pInfo->pDelWins, &pInfo->delIndex, pInfo->pDelRes); - if (pInfo->pDelRes->info.rows != 0) { - // process the rest of the data - printDataBlock(pInfo->pDelRes, IS_FINAL_OP(pInfo) ? "interval final" : "interval semi"); - return pInfo->pDelRes; - } - } - } - - if (!pInfo->pUpdated) { - pInfo->pUpdated = taosArrayInit(4096, POINTER_BYTES); - } - if (!pInfo->pUpdatedMap) { - _hash_fn_t hashFn = taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY); - pInfo->pUpdatedMap = tSimpleHashInit(4096, hashFn); - } - - while (1) { - if (isTaskKilled(pTaskInfo)) { - if (pInfo->pUpdated != NULL) { - pInfo->pUpdated = taosArrayDestroy(pInfo->pUpdated); - } - - if (pInfo->pUpdatedMap != NULL) { - tSimpleHashCleanup(pInfo->pUpdatedMap); - pInfo->pUpdatedMap = NULL; - } - - T_LONG_JMP(pTaskInfo->env, pTaskInfo->code); - } - - SSDataBlock* pBlock = getNextBlockFromDownstream(pOperator, 0); - if (pBlock == NULL) { - pOperator->status = OP_RES_TO_RETURN; - qDebug("===stream===return data:%s. recv datablock num:%" PRIu64, - IS_FINAL_OP(pInfo) ? "interval final" : "interval semi", pInfo->numOfDatapack); - pInfo->numOfDatapack = 0; - break; - } - pInfo->numOfDatapack++; - printDataBlock(pBlock, IS_FINAL_OP(pInfo) ? "interval final recv" : "interval semi recv"); - - if (pBlock->info.type == STREAM_NORMAL || pBlock->info.type == STREAM_PULL_DATA) { - pInfo->binfo.pRes->info.type = pBlock->info.type; - } else if (pBlock->info.type == STREAM_DELETE_DATA || pBlock->info.type == STREAM_DELETE_RESULT || - pBlock->info.type == STREAM_CLEAR) { - SArray* delWins = taosArrayInit(8, sizeof(SWinKey)); - doDeleteWindows(pOperator, &pInfo->interval, pBlock, delWins, pInfo->pUpdatedMap); - if (IS_FINAL_OP(pInfo)) { - int32_t chId = getChildIndex(pBlock); - addRetriveWindow(delWins, pInfo, chId); - if (pBlock->info.type != STREAM_CLEAR) { - taosArrayAddAll(pInfo->pDelWins, delWins); - } - taosArrayDestroy(delWins); - continue; - } - removeResults(delWins, pInfo->pUpdatedMap); - taosArrayAddAll(pInfo->pDelWins, delWins); - taosArrayDestroy(delWins); - - doBuildDeleteResult(pInfo, pInfo->pDelWins, &pInfo->delIndex, pInfo->pDelRes); - if (pInfo->pDelRes->info.rows != 0) { - // process the rest of the data - printDataBlock(pInfo->pDelRes, IS_FINAL_OP(pInfo) ? "interval final" : "interval semi"); - if (pBlock->info.type == STREAM_CLEAR) { - pInfo->pDelRes->info.type = STREAM_CLEAR; - } else { - pInfo->pDelRes->info.type = STREAM_DELETE_RESULT; - } - return pInfo->pDelRes; - } - - break; - } else if (pBlock->info.type == STREAM_GET_ALL && IS_FINAL_OP(pInfo)) { - pInfo->recvGetAll = true; - getAllIntervalWindow(pInfo->aggSup.pResultRowHashTable, pInfo->pUpdatedMap); - continue; - } else if (pBlock->info.type == STREAM_RETRIEVE && !IS_FINAL_OP(pInfo)) { - doDeleteWindows(pOperator, &pInfo->interval, pBlock, NULL, pInfo->pUpdatedMap); - if (taosArrayGetSize(pInfo->pUpdated) > 0) { - break; - } - continue; - } else if (pBlock->info.type == STREAM_PULL_OVER && IS_FINAL_OP(pInfo)) { - processPullOver(pBlock, pInfo->pPullDataMap, pInfo->pFinalPullDataMap, &pInfo->interval, pInfo->pPullWins, pInfo->numOfChild, pOperator); - continue; - } else if (pBlock->info.type == STREAM_CREATE_CHILD_TABLE) { - return pBlock; - } else { - ASSERTS(pBlock->info.type == STREAM_INVALID, "invalid SSDataBlock type"); - } - - if (pInfo->scalarSupp.pExprInfo != NULL) { - SExprSupp* pExprSup = &pInfo->scalarSupp; - projectApplyFunctions(pExprSup->pExprInfo, pBlock, pBlock, pExprSup->pCtx, pExprSup->numOfExprs, NULL); - } - setInputDataBlock(pSup, pBlock, TSDB_ORDER_ASC, MAIN_SCAN, true); - doStreamIntervalAggImpl(pOperator, pBlock, pBlock->info.id.groupId, pInfo->pUpdatedMap); - pInfo->twAggSup.maxTs = TMAX(pInfo->twAggSup.maxTs, pBlock->info.window.ekey); - pInfo->twAggSup.maxTs = TMAX(pInfo->twAggSup.maxTs, pBlock->info.watermark); - pInfo->twAggSup.minTs = TMIN(pInfo->twAggSup.minTs, pBlock->info.window.skey); - } - - removeDeleteResults(pInfo->pUpdatedMap, pInfo->pDelWins); - if (IS_FINAL_OP(pInfo)) { - closeStreamIntervalWindow(pInfo->aggSup.pResultRowHashTable, &pInfo->twAggSup, &pInfo->interval, - pInfo->pPullDataMap, pInfo->pUpdatedMap, pInfo->pDelWins, pOperator); - } - pInfo->binfo.pRes->info.watermark = pInfo->twAggSup.maxTs; - - void* pIte = NULL; - int32_t iter = 0; - while ((pIte = tSimpleHashIterate(pInfo->pUpdatedMap, pIte, &iter)) != NULL) { - taosArrayPush(pInfo->pUpdated, pIte); - } - - tSimpleHashCleanup(pInfo->pUpdatedMap); - pInfo->pUpdatedMap = NULL; - taosArraySort(pInfo->pUpdated, winPosCmprImpl); - - initMultiResInfoFromArrayList(&pInfo->groupResInfo, pInfo->pUpdated); - pInfo->pUpdated = NULL; - blockDataEnsureCapacity(pInfo->binfo.pRes, pOperator->resultInfo.capacity); - - doBuildPullDataBlock(pInfo->pPullWins, &pInfo->pullIndex, pInfo->pPullDataRes); - if (pInfo->pPullDataRes->info.rows != 0) { - // process the rest of the data - printDataBlock(pInfo->pPullDataRes, IS_FINAL_OP(pInfo) ? "interval final" : "interval semi"); - return pInfo->pPullDataRes; - } - - doBuildDeleteResult(pInfo, pInfo->pDelWins, &pInfo->delIndex, pInfo->pDelRes); - if (pInfo->pDelRes->info.rows != 0) { - // process the rest of the data - printDataBlock(pInfo->pDelRes, IS_FINAL_OP(pInfo) ? "interval final" : "interval semi"); - return pInfo->pDelRes; - } - - doBuildStreamIntervalResult(pOperator, pInfo->pState, pInfo->binfo.pRes, &pInfo->groupResInfo); - if (pInfo->binfo.pRes->info.rows != 0) { - printDataBlock(pInfo->binfo.pRes, IS_FINAL_OP(pInfo) ? "interval final" : "interval semi"); - return pInfo->binfo.pRes; - } - - return NULL; -} - -int64_t getDeleteMark(SIntervalPhysiNode* pIntervalPhyNode) { - if (pIntervalPhyNode->window.deleteMark <= 0) { - return DEAULT_DELETE_MARK; - } - int64_t deleteMark = TMAX(pIntervalPhyNode->window.deleteMark, pIntervalPhyNode->window.watermark); - deleteMark = TMAX(deleteMark, pIntervalPhyNode->interval); - return deleteMark; -} - -TSKEY compareTs(void* pKey) { - SWinKey* pWinKey = (SWinKey*)pKey; - return pWinKey->ts; -} - -int32_t getSelectivityBufSize(SqlFunctionCtx* pCtx) { - if (pCtx->subsidiaries.rowLen == 0) { - int32_t rowLen = 0; - for (int32_t j = 0; j < pCtx->subsidiaries.num; ++j) { - SqlFunctionCtx* pc = pCtx->subsidiaries.pCtx[j]; - rowLen += pc->pExpr->base.resSchema.bytes; - } - - return rowLen + pCtx->subsidiaries.num * sizeof(bool); - } else { - return pCtx->subsidiaries.rowLen; - } -} - -int32_t getMaxFunResSize(SExprSupp* pSup, int32_t numOfCols) { - int32_t size = 0; - for (int32_t i = 0; i < numOfCols; ++i) { - int32_t resSize = getSelectivityBufSize(pSup->pCtx + i); - size = TMAX(size, resSize); - } - return size; -} - -void streamIntervalReleaseState(SOperatorInfo* pOperator) { - if (pOperator->operatorType != QUERY_NODE_PHYSICAL_PLAN_STREAM_SEMI_INTERVAL) { - SStreamIntervalOperatorInfo* pInfo = pOperator->info; - int32_t resSize = sizeof(TSKEY); - pInfo->statestore.streamStateSaveInfo(pInfo->pState, STREAM_INTERVAL_OP_STATE_NAME, strlen(STREAM_INTERVAL_OP_STATE_NAME), &pInfo->twAggSup.maxTs, resSize); - } - SStreamIntervalOperatorInfo* pInfo = pOperator->info; - SStorageAPI* pAPI = &pOperator->pTaskInfo->storageAPI; - pAPI->stateStore.streamStateCommit(pInfo->pState); - SOperatorInfo* downstream = pOperator->pDownstream[0]; - if (downstream->fpSet.releaseStreamStateFn) { - downstream->fpSet.releaseStreamStateFn(downstream); - } -} - -void streamIntervalReloadState(SOperatorInfo* pOperator) { - if (pOperator->operatorType != QUERY_NODE_PHYSICAL_PLAN_STREAM_SEMI_INTERVAL) { - SStreamIntervalOperatorInfo* pInfo = pOperator->info; - int32_t size = 0; - void* pBuf = NULL; - int32_t code = pInfo->statestore.streamStateGetInfo(pInfo->pState, STREAM_INTERVAL_OP_STATE_NAME, - strlen(STREAM_INTERVAL_OP_STATE_NAME), &pBuf, &size); - TSKEY ts = *(TSKEY*)pBuf; - taosMemoryFree(pBuf); - pInfo->twAggSup.maxTs = TMAX(pInfo->twAggSup.maxTs, ts); - pInfo->statestore.streamStateReloadInfo(pInfo->pState, ts); - } - SOperatorInfo* downstream = pOperator->pDownstream[0]; - if (downstream->fpSet.reloadStreamStateFn) { - downstream->fpSet.reloadStreamStateFn(downstream); - } -} - -SOperatorInfo* createStreamFinalIntervalOperatorInfo(SOperatorInfo* downstream, SPhysiNode* pPhyNode, - SExecTaskInfo* pTaskInfo, int32_t numOfChild) { - SIntervalPhysiNode* pIntervalPhyNode = (SIntervalPhysiNode*)pPhyNode; - SStreamIntervalOperatorInfo* pInfo = taosMemoryCalloc(1, sizeof(SStreamIntervalOperatorInfo)); - SOperatorInfo* pOperator = taosMemoryCalloc(1, sizeof(SOperatorInfo)); - if (pInfo == NULL || pOperator == NULL) { - goto _error; - } - - pOperator->pTaskInfo = pTaskInfo; - SStorageAPI* pAPI = &pTaskInfo->storageAPI; - - pInfo->interval = (SInterval){.interval = pIntervalPhyNode->interval, - .sliding = pIntervalPhyNode->sliding, - .intervalUnit = pIntervalPhyNode->intervalUnit, - .slidingUnit = pIntervalPhyNode->slidingUnit, - .offset = pIntervalPhyNode->offset, - .precision = ((SColumnNode*)pIntervalPhyNode->window.pTspk)->node.resType.precision}; - pInfo->twAggSup = (STimeWindowAggSupp){ - .waterMark = pIntervalPhyNode->window.watermark, - .calTrigger = pIntervalPhyNode->window.triggerType, - .maxTs = INT64_MIN, - .minTs = INT64_MAX, - .deleteMark = getDeleteMark(pIntervalPhyNode), - .deleteMarkSaved = 0, - .calTriggerSaved = 0, - .checkPointTs = 0, - .checkPointInterval = - convertTimePrecision(tsCheckpointInterval, TSDB_TIME_PRECISION_MILLI, pInfo->interval.precision), - }; - ASSERTS(pInfo->twAggSup.calTrigger != STREAM_TRIGGER_MAX_DELAY, "trigger type should not be max delay"); - pInfo->primaryTsIndex = ((SColumnNode*)pIntervalPhyNode->window.pTspk)->slotId; - size_t keyBufSize = sizeof(int64_t) + sizeof(int64_t) + POINTER_BYTES; - initResultSizeInfo(&pOperator->resultInfo, 4096); - if (pIntervalPhyNode->window.pExprs != NULL) { - int32_t numOfScalar = 0; - SExprInfo* pScalarExprInfo = createExprInfo(pIntervalPhyNode->window.pExprs, NULL, &numOfScalar); - int32_t code = initExprSupp(&pInfo->scalarSupp, pScalarExprInfo, numOfScalar, &pTaskInfo->storageAPI.functionStore); - if (code != TSDB_CODE_SUCCESS) { - goto _error; - } - } - - int32_t numOfCols = 0; - SExprInfo* pExprInfo = createExprInfo(pIntervalPhyNode->window.pFuncs, NULL, &numOfCols); - SSDataBlock* pResBlock = createDataBlockFromDescNode(pPhyNode->pOutputDataBlockDesc); - initBasicInfo(&pInfo->binfo, pResBlock); - - pInfo->pState = taosMemoryCalloc(1, sizeof(SStreamState)); - *(pInfo->pState) = *(pTaskInfo->streamInfo.pState); - - pAPI->stateStore.streamStateSetNumber(pInfo->pState, -1); - int32_t code = initAggSup(&pOperator->exprSupp, &pInfo->aggSup, pExprInfo, numOfCols, keyBufSize, pTaskInfo->id.str, - pInfo->pState, &pTaskInfo->storageAPI.functionStore); - if (code != TSDB_CODE_SUCCESS) { - goto _error; - } - - initStreamFunciton(pOperator->exprSupp.pCtx, pOperator->exprSupp.numOfExprs); - initExecTimeWindowInfo(&pInfo->twAggSup.timeWindowData, &pTaskInfo->window); - initResultRowInfo(&pInfo->binfo.resultRowInfo); - - pInfo->numOfChild = numOfChild; - - pInfo->pPhyNode = (SPhysiNode*)nodesCloneNode((SNode*)pPhyNode); - - if (pPhyNode->type == QUERY_NODE_PHYSICAL_PLAN_STREAM_FINAL_INTERVAL) { - pInfo->isFinal = true; - pOperator->name = "StreamFinalIntervalOperator"; - } else { - // semi interval operator does not catch result - pInfo->isFinal = false; - pOperator->name = "StreamSemiIntervalOperator"; - } - - if (!IS_FINAL_OP(pInfo) || numOfChild == 0) { - pInfo->twAggSup.calTrigger = STREAM_TRIGGER_AT_ONCE; - } - - pInfo->pPullWins = taosArrayInit(8, sizeof(SPullWindowInfo)); - pInfo->pullIndex = 0; - _hash_fn_t hashFn = taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY); - pInfo->pPullDataMap = taosHashInit(64, hashFn, false, HASH_NO_LOCK); - pInfo->pFinalPullDataMap = taosHashInit(64, hashFn, false, HASH_NO_LOCK); - pInfo->pPullDataRes = createSpecialDataBlock(STREAM_RETRIEVE); - pInfo->ignoreExpiredData = pIntervalPhyNode->window.igExpired; - pInfo->ignoreExpiredDataSaved = false; - pInfo->pDelRes = createSpecialDataBlock(STREAM_DELETE_RESULT); - pInfo->delIndex = 0; - pInfo->pDelWins = taosArrayInit(4, sizeof(SWinKey)); - pInfo->delKey.ts = INT64_MAX; - pInfo->delKey.groupId = 0; - pInfo->numOfDatapack = 0; - pInfo->pUpdated = NULL; - pInfo->pUpdatedMap = NULL; - int32_t funResSize= getMaxFunResSize(&pOperator->exprSupp, numOfCols); - pInfo->pState->pFileState = pAPI->stateStore.streamFileStateInit(tsStreamBufferSize, sizeof(SWinKey), pInfo->aggSup.resultRowSize, funResSize, - compareTs, pInfo->pState, pInfo->twAggSup.deleteMark, GET_TASKID(pTaskInfo)); - pInfo->dataVersion = 0; - pInfo->statestore = pTaskInfo->storageAPI.stateStore; - pInfo->recvGetAll = false; - - pOperator->operatorType = pPhyNode->type; - pOperator->blocking = true; - pOperator->status = OP_NOT_OPENED; - pOperator->info = pInfo; - - pOperator->fpSet = createOperatorFpSet(NULL, doStreamFinalIntervalAgg, NULL, destroyStreamFinalIntervalOperatorInfo, - optrDefaultBufFn, NULL, optrDefaultGetNextExtFn, NULL); - setOperatorStreamStateFn(pOperator, streamIntervalReleaseState, streamIntervalReloadState); - if (pPhyNode->type == QUERY_NODE_PHYSICAL_PLAN_STREAM_SEMI_INTERVAL) { - initIntervalDownStream(downstream, pPhyNode->type, pInfo); - } - code = appendDownstream(pOperator, &downstream, 1); - if (code != TSDB_CODE_SUCCESS) { - goto _error; - } - - return pOperator; - -_error: - destroyStreamFinalIntervalOperatorInfo(pInfo); - taosMemoryFreeClear(pOperator); - pTaskInfo->code = code; - return NULL; -} - -void destroyStreamAggSupporter(SStreamAggSupporter* pSup) { - tSimpleHashCleanup(pSup->pResultRows); - destroyDiskbasedBuf(pSup->pResultBuf); - blockDataDestroy(pSup->pScanBlock); - taosMemoryFreeClear(pSup->pState); - taosMemoryFreeClear(pSup->pDummyCtx); -} - -void destroyStreamSessionAggOperatorInfo(void* param) { - SStreamSessionAggOperatorInfo* pInfo = (SStreamSessionAggOperatorInfo*)param; - cleanupBasicInfo(&pInfo->binfo); - destroyStreamAggSupporter(&pInfo->streamAggSup); - - if (pInfo->pChildren != NULL) { - int32_t size = taosArrayGetSize(pInfo->pChildren); - for (int32_t i = 0; i < size; i++) { - SOperatorInfo* pChild = taosArrayGetP(pInfo->pChildren, i); - destroyOperator(pChild); - } - taosArrayDestroy(pInfo->pChildren); - } - - colDataDestroy(&pInfo->twAggSup.timeWindowData); - blockDataDestroy(pInfo->pDelRes); - blockDataDestroy(pInfo->pWinBlock); - blockDataDestroy(pInfo->pUpdateRes); - tSimpleHashCleanup(pInfo->pStUpdated); - tSimpleHashCleanup(pInfo->pStDeleted); - - taosArrayDestroy(pInfo->historyWins); - taosMemoryFreeClear(param); -} - -int32_t initBasicInfoEx(SOptrBasicInfo* pBasicInfo, SExprSupp* pSup, SExprInfo* pExprInfo, int32_t numOfCols, - SSDataBlock* pResultBlock, SFunctionStateStore* pStore) { - initBasicInfo(pBasicInfo, pResultBlock); - int32_t code = initExprSupp(pSup, pExprInfo, numOfCols, pStore); - if (code != TSDB_CODE_SUCCESS) { - return code; - } - - initStreamFunciton(pSup->pCtx, pSup->numOfExprs); - for (int32_t i = 0; i < numOfCols; ++i) { - pSup->pCtx[i].saveHandle.pBuf = NULL; - } - - ASSERT(numOfCols > 0); - return TSDB_CODE_SUCCESS; -} - -void initDummyFunction(SqlFunctionCtx* pDummy, SqlFunctionCtx* pCtx, int32_t nums) { - for (int i = 0; i < nums; i++) { - pDummy[i].functionId = pCtx[i].functionId; - pDummy[i].isNotNullFunc = pCtx[i].isNotNullFunc; - pDummy[i].isPseudoFunc = pCtx[i].isPseudoFunc; - } -} - -void initDownStream(SOperatorInfo* downstream, SStreamAggSupporter* pAggSup, uint16_t type, int32_t tsColIndex, - STimeWindowAggSupp* pTwSup) { - if (downstream->operatorType == QUERY_NODE_PHYSICAL_PLAN_STREAM_PARTITION) { - SStreamPartitionOperatorInfo* pScanInfo = downstream->info; - pScanInfo->tsColIndex = tsColIndex; - } - - if (downstream->operatorType != QUERY_NODE_PHYSICAL_PLAN_STREAM_SCAN) { - initDownStream(downstream->pDownstream[0], pAggSup, type, tsColIndex, pTwSup); - return; - } - SStreamScanInfo* pScanInfo = downstream->info; - pScanInfo->windowSup = (SWindowSupporter){.pStreamAggSup = pAggSup, .gap = pAggSup->gap, .parentType = type}; - pScanInfo->pState = pAggSup->pState; - if (!pScanInfo->pUpdateInfo) { - pScanInfo->pUpdateInfo = pAggSup->stateStore.updateInfoInit(60000, TSDB_TIME_PRECISION_MILLI, pTwSup->waterMark); - } - pScanInfo->twAggSup = *pTwSup; -} - -int32_t initStreamAggSupporter(SStreamAggSupporter* pSup, SqlFunctionCtx* pCtx, int32_t numOfOutput, int64_t gap, - SStreamState* pState, int32_t keySize, int16_t keyType, SStateStore* pStore, SReadHandle* pHandle, SStorageAPI* pApi) { - pSup->resultRowSize = keySize + getResultRowSize(pCtx, numOfOutput); - pSup->pScanBlock = createSpecialDataBlock(STREAM_CLEAR); - pSup->gap = gap; - pSup->stateKeySize = keySize; - pSup->stateKeyType = keyType; - pSup->pDummyCtx = (SqlFunctionCtx*)taosMemoryCalloc(numOfOutput, sizeof(SqlFunctionCtx)); - if (pSup->pDummyCtx == NULL) { - return TSDB_CODE_OUT_OF_MEMORY; - } - - pSup->stateStore = *pStore; - - initDummyFunction(pSup->pDummyCtx, pCtx, numOfOutput); - pSup->pState = taosMemoryCalloc(1, sizeof(SStreamState)); - *(pSup->pState) = *pState; - pSup->stateStore.streamStateSetNumber(pSup->pState, -1); - - _hash_fn_t hashFn = taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY); - pSup->pResultRows = tSimpleHashInit(32, hashFn); - - int32_t pageSize = 4096; - while (pageSize < pSup->resultRowSize * 4) { - pageSize <<= 1u; - } - // at least four pages need to be in buffer - int32_t bufSize = 4096 * 256; - if (bufSize <= pageSize) { - bufSize = pageSize * 4; - } - - if (!osTempSpaceAvailable()) { - terrno = TSDB_CODE_NO_DISKSPACE; - qError("Init stream agg supporter failed since %s, tempDir:%s", terrstr(), tsTempDir); - return terrno; - } - - int32_t code = createDiskbasedBuf(&pSup->pResultBuf, pageSize, bufSize, "function", tsTempDir); - for (int32_t i = 0; i < numOfOutput; ++i) { - pCtx[i].saveHandle.pBuf = pSup->pResultBuf; - } - - pSup->pSessionAPI = pApi; - - return TSDB_CODE_SUCCESS; -} - -bool isInTimeWindow(STimeWindow* pWin, TSKEY ts, int64_t gap) { - if (ts + gap >= pWin->skey && ts - gap <= pWin->ekey) { - return true; - } - return false; -} - -bool isInWindow(SResultWindowInfo* pWinInfo, TSKEY ts, int64_t gap) { - return isInTimeWindow(&pWinInfo->sessionWin.win, ts, gap); -} - -void getCurSessionWindow(SStreamAggSupporter* pAggSup, TSKEY startTs, TSKEY endTs, uint64_t groupId, - SSessionKey* pKey) { - pKey->win.skey = startTs; - pKey->win.ekey = endTs; - pKey->groupId = groupId; - int32_t code = pAggSup->stateStore.streamStateSessionGetKeyByRange(pAggSup->pState, pKey, pKey); - if (code != TSDB_CODE_SUCCESS) { - SET_SESSION_WIN_KEY_INVALID(pKey); - } -} - -bool isInvalidSessionWin(SResultWindowInfo* pWinInfo) { return pWinInfo->sessionWin.win.skey == 0; } - -bool inWinRange(STimeWindow* range, STimeWindow* cur) { - if (cur->skey >= range->skey && cur->ekey <= range->ekey) { - return true; - } - return false; -} - -void setSessionOutputBuf(SStreamAggSupporter* pAggSup, TSKEY startTs, TSKEY endTs, uint64_t groupId, - SResultWindowInfo* pCurWin) { - pCurWin->sessionWin.groupId = groupId; - pCurWin->sessionWin.win.skey = startTs; - pCurWin->sessionWin.win.ekey = endTs; - int32_t size = pAggSup->resultRowSize; - int32_t code = pAggSup->stateStore.streamStateSessionAddIfNotExist(pAggSup->pState, &pCurWin->sessionWin, - pAggSup->gap, &pCurWin->pOutputBuf, &size); - if (code == TSDB_CODE_SUCCESS && !inWinRange(&pAggSup->winRange, &pCurWin->sessionWin.win)) { - code = TSDB_CODE_FAILED; - releaseOutputBuf(pAggSup->pState, NULL, (SResultRow*)pCurWin->pOutputBuf, &pAggSup->pSessionAPI->stateStore); - pCurWin->pOutputBuf = taosMemoryCalloc(1, size); - } - - if (code == TSDB_CODE_SUCCESS) { - pCurWin->isOutput = true; - pAggSup->stateStore.streamStateSessionDel(pAggSup->pState, &pCurWin->sessionWin); - } else { - pCurWin->sessionWin.win.skey = startTs; - pCurWin->sessionWin.win.ekey = endTs; - } -} - -int32_t getSessionWinBuf(SStreamAggSupporter* pAggSup, SStreamStateCur* pCur, SResultWindowInfo* pWinInfo) { - int32_t size = 0; - int32_t code = pAggSup->stateStore.streamStateSessionGetKVByCur(pCur, &pWinInfo->sessionWin, &pWinInfo->pOutputBuf, &size); - if (code != TSDB_CODE_SUCCESS) { - return code; - } - - pAggSup->stateStore.streamStateCurNext(pAggSup->pState, pCur); - return TSDB_CODE_SUCCESS; -} -void saveDeleteInfo(SArray* pWins, SSessionKey key) { - // key.win.ekey = key.win.skey; - taosArrayPush(pWins, &key); -} - -void saveDeleteRes(SSHashObj* pStDelete, SSessionKey key) { - key.win.ekey = key.win.skey; - tSimpleHashPut(pStDelete, &key, sizeof(SSessionKey), NULL, 0); -} - -static void removeSessionResult(SSHashObj* pHashMap, SSHashObj* pResMap, SSessionKey key) { - key.win.ekey = key.win.skey; - tSimpleHashRemove(pHashMap, &key, sizeof(SSessionKey)); - tSimpleHashRemove(pResMap, &key, sizeof(SSessionKey)); -} - -static void getSessionHashKey(const SSessionKey* pKey, SSessionKey* pHashKey) { - *pHashKey = *pKey; - pHashKey->win.ekey = pKey->win.skey; -} - -static void removeSessionResults(SSHashObj* pHashMap, SArray* pWins) { - if (tSimpleHashGetSize(pHashMap) == 0) { - return; - } - int32_t size = taosArrayGetSize(pWins); - for (int32_t i = 0; i < size; i++) { - SSessionKey* pWin = taosArrayGet(pWins, i); - if (!pWin) continue; - SSessionKey key = {0}; - getSessionHashKey(pWin, &key); - tSimpleHashRemove(pHashMap, &key, sizeof(SSessionKey)); - } -} - -int32_t updateSessionWindowInfo(SResultWindowInfo* pWinInfo, TSKEY* pStartTs, TSKEY* pEndTs, uint64_t groupId, - int32_t rows, int32_t start, int64_t gap, SSHashObj* pResultRows, SSHashObj* pStUpdated, - SSHashObj* pStDeleted) { - for (int32_t i = start; i < rows; ++i) { - if (!isInWindow(pWinInfo, pStartTs[i], gap) && (!pEndTs || !isInWindow(pWinInfo, pEndTs[i], gap))) { - return i - start; - } - if (pWinInfo->sessionWin.win.skey > pStartTs[i]) { - if (pStDeleted && pWinInfo->isOutput) { - saveDeleteRes(pStDeleted, pWinInfo->sessionWin); - } - removeSessionResult(pStUpdated, pResultRows, pWinInfo->sessionWin); - pWinInfo->sessionWin.win.skey = pStartTs[i]; - } - pWinInfo->sessionWin.win.ekey = TMAX(pWinInfo->sessionWin.win.ekey, pStartTs[i]); - if (pEndTs) { - pWinInfo->sessionWin.win.ekey = TMAX(pWinInfo->sessionWin.win.ekey, pEndTs[i]); - } - } - return rows - start; -} - -static int32_t initSessionOutputBuf(SResultWindowInfo* pWinInfo, SResultRow** pResult, SqlFunctionCtx* pCtx, - int32_t numOfOutput, int32_t* rowEntryInfoOffset) { - ASSERT(pWinInfo->sessionWin.win.skey <= pWinInfo->sessionWin.win.ekey); - *pResult = (SResultRow*)pWinInfo->pOutputBuf; - // set time window for current result - (*pResult)->win = pWinInfo->sessionWin.win; - setResultRowInitCtx(*pResult, pCtx, numOfOutput, rowEntryInfoOffset); - return TSDB_CODE_SUCCESS; -} - -static int32_t doOneWindowAggImpl(SColumnInfoData* pTimeWindowData, SResultWindowInfo* pCurWin, SResultRow** pResult, - int32_t startIndex, int32_t winRows, int32_t rows, int32_t numOutput, - SOperatorInfo* pOperator, int64_t winDelta) { - SExprSupp* pSup = &pOperator->exprSupp; - SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo; - int32_t code = initSessionOutputBuf(pCurWin, pResult, pSup->pCtx, numOutput, pSup->rowEntryInfoOffset); - if (code != TSDB_CODE_SUCCESS || (*pResult) == NULL) { - return TSDB_CODE_OUT_OF_MEMORY; - } - updateTimeWindowInfo(pTimeWindowData, &pCurWin->sessionWin.win, winDelta); - applyAggFunctionOnPartialTuples(pTaskInfo, pSup->pCtx, pTimeWindowData, startIndex, winRows, rows, numOutput); - return TSDB_CODE_SUCCESS; -} - -static bool doDeleteSessionWindow(SStreamAggSupporter* pAggSup, SSessionKey* pKey) { - pAggSup->stateStore.streamStateSessionDel(pAggSup->pState, pKey); - SSessionKey hashKey = {0}; - getSessionHashKey(pKey, &hashKey); - tSimpleHashRemove(pAggSup->pResultRows, &hashKey, sizeof(SSessionKey)); - return true; -} - -static int32_t setSessionWinOutputInfo(SSHashObj* pStUpdated, SResultWindowInfo* pWinInfo) { - void* pVal = tSimpleHashGet(pStUpdated, &pWinInfo->sessionWin, sizeof(SSessionKey)); - if (pVal) { - SResultWindowInfo* pWin = pVal; - pWinInfo->isOutput = pWin->isOutput; - } - return TSDB_CODE_SUCCESS; -} - -SStreamStateCur* getNextSessionWinInfo(SStreamAggSupporter* pAggSup, SSHashObj* pStUpdated, SResultWindowInfo* pCurWin, - SResultWindowInfo* pNextWin) { - SStreamStateCur* pCur = pAggSup->stateStore.streamStateSessionSeekKeyNext(pAggSup->pState, &pCurWin->sessionWin); - pNextWin->isOutput = true; - setSessionWinOutputInfo(pStUpdated, pNextWin); - int32_t size = 0; - pNextWin->sessionWin = pCurWin->sessionWin; - int32_t code = pAggSup->stateStore.streamStateSessionGetKVByCur(pCur, &pNextWin->sessionWin, &pNextWin->pOutputBuf, &size); - if (code != TSDB_CODE_SUCCESS) { - taosMemoryFreeClear(pNextWin->pOutputBuf); - SET_SESSION_WIN_INVALID(*pNextWin); - } - return pCur; -} - -static int32_t compactSessionWindow(SOperatorInfo* pOperator, SResultWindowInfo* pCurWin, SSHashObj* pStUpdated, - SSHashObj* pStDeleted, bool addGap) { - SExprSupp* pSup = &pOperator->exprSupp; - SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo; - SStorageAPI* pAPI = &pOperator->pTaskInfo->storageAPI; - int32_t winNum = 0; - - SStreamSessionAggOperatorInfo* pInfo = pOperator->info; - SResultRow* pCurResult = NULL; - int32_t numOfOutput = pOperator->exprSupp.numOfExprs; - SStreamAggSupporter* pAggSup = &pInfo->streamAggSup; - initSessionOutputBuf(pCurWin, &pCurResult, pSup->pCtx, numOfOutput, pSup->rowEntryInfoOffset); - // Just look for the window behind StartIndex - while (1) { - SResultWindowInfo winInfo = {0}; - SStreamStateCur* pCur = getNextSessionWinInfo(pAggSup, pStUpdated, pCurWin, &winInfo); - if (!IS_VALID_SESSION_WIN(winInfo) || !isInWindow(pCurWin, winInfo.sessionWin.win.skey, pAggSup->gap) || - !inWinRange(&pAggSup->winRange, &winInfo.sessionWin.win)) { - taosMemoryFree(winInfo.pOutputBuf); - pAPI->stateStore.streamStateFreeCur(pCur); - break; - } - SResultRow* pWinResult = NULL; - initSessionOutputBuf(&winInfo, &pWinResult, pAggSup->pDummyCtx, numOfOutput, pSup->rowEntryInfoOffset); - pCurWin->sessionWin.win.ekey = TMAX(pCurWin->sessionWin.win.ekey, winInfo.sessionWin.win.ekey); - int64_t winDelta = 0; - if (addGap) { - winDelta = pAggSup->gap; - } - updateTimeWindowInfo(&pInfo->twAggSup.timeWindowData, &pCurWin->sessionWin.win, winDelta); - compactFunctions(pSup->pCtx, pAggSup->pDummyCtx, numOfOutput, pTaskInfo, &pInfo->twAggSup.timeWindowData); - tSimpleHashRemove(pStUpdated, &winInfo.sessionWin, sizeof(SSessionKey)); - if (winInfo.isOutput && pStDeleted) { - saveDeleteRes(pStDeleted, winInfo.sessionWin); - } - removeSessionResult(pStUpdated, pAggSup->pResultRows, winInfo.sessionWin); - doDeleteSessionWindow(pAggSup, &winInfo.sessionWin); - pAPI->stateStore.streamStateFreeCur(pCur); - taosMemoryFree(winInfo.pOutputBuf); - winNum++; - } - return winNum; -} - -int32_t saveSessionOutputBuf(SStreamAggSupporter* pAggSup, SResultWindowInfo* pWinInfo) { - saveSessionDiscBuf(pAggSup->pState, &pWinInfo->sessionWin, pWinInfo->pOutputBuf, pAggSup->resultRowSize, &pAggSup->stateStore); - pWinInfo->pOutputBuf = NULL; - return TSDB_CODE_SUCCESS; -} - -static void doStreamSessionAggImpl(SOperatorInfo* pOperator, SSDataBlock* pSDataBlock, SSHashObj* pStUpdated, - SSHashObj* pStDeleted, bool hasEndTs, bool addGap) { - SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo; - SStreamSessionAggOperatorInfo* pInfo = pOperator->info; - int32_t numOfOutput = pOperator->exprSupp.numOfExprs; - uint64_t groupId = pSDataBlock->info.id.groupId; - int64_t code = TSDB_CODE_SUCCESS; - SResultRow* pResult = NULL; - int32_t rows = pSDataBlock->info.rows; - int32_t winRows = 0; - SStreamAggSupporter* pAggSup = &pInfo->streamAggSup; - - pInfo->dataVersion = TMAX(pInfo->dataVersion, pSDataBlock->info.version); - pAggSup->winRange = pTaskInfo->streamInfo.fillHistoryWindow; - if (pAggSup->winRange.ekey <= 0) { - pAggSup->winRange.ekey = INT64_MAX; - } - - SColumnInfoData* pStartTsCol = taosArrayGet(pSDataBlock->pDataBlock, pInfo->primaryTsIndex); - TSKEY* startTsCols = (int64_t*)pStartTsCol->pData; - SColumnInfoData* pEndTsCol = NULL; - if (hasEndTs) { - pEndTsCol = taosArrayGet(pSDataBlock->pDataBlock, pInfo->endTsIndex); - } else { - pEndTsCol = taosArrayGet(pSDataBlock->pDataBlock, pInfo->primaryTsIndex); - } - - TSKEY* endTsCols = (int64_t*)pEndTsCol->pData; - for (int32_t i = 0; i < rows;) { - if (pInfo->ignoreExpiredData && isOverdue(endTsCols[i], &pInfo->twAggSup)) { - i++; - continue; - } - SResultWindowInfo winInfo = {0}; - setSessionOutputBuf(pAggSup, startTsCols[i], endTsCols[i], groupId, &winInfo); - setSessionWinOutputInfo(pStUpdated, &winInfo); - winRows = updateSessionWindowInfo(&winInfo, startTsCols, endTsCols, groupId, rows, i, pAggSup->gap, - pAggSup->pResultRows, pStUpdated, pStDeleted); - // coverity scan error - if (!winInfo.pOutputBuf) { - T_LONG_JMP(pTaskInfo->env, TSDB_CODE_OUT_OF_MEMORY); - } - - int64_t winDelta = 0; - if (addGap) { - winDelta = pAggSup->gap; - } - code = doOneWindowAggImpl(&pInfo->twAggSup.timeWindowData, &winInfo, &pResult, i, winRows, rows, numOfOutput, - pOperator, winDelta); - if (code != TSDB_CODE_SUCCESS || pResult == NULL) { - T_LONG_JMP(pTaskInfo->env, TSDB_CODE_OUT_OF_MEMORY); - } - compactSessionWindow(pOperator, &winInfo, pStUpdated, pStDeleted, addGap); - saveSessionOutputBuf(pAggSup, &winInfo); - - if (pInfo->twAggSup.calTrigger == STREAM_TRIGGER_AT_ONCE && pStUpdated) { - code = saveResult(winInfo, pStUpdated); - if (code != TSDB_CODE_SUCCESS) { - T_LONG_JMP(pTaskInfo->env, TSDB_CODE_OUT_OF_MEMORY); - } - } - if (pInfo->twAggSup.calTrigger == STREAM_TRIGGER_WINDOW_CLOSE) { - SSessionKey key = {0}; - getSessionHashKey(&winInfo.sessionWin, &key); - tSimpleHashPut(pAggSup->pResultRows, &key, sizeof(SSessionKey), &winInfo, sizeof(SResultWindowInfo)); - } - - i += winRows; - } -} - -static void doDeleteTimeWindows(SStreamAggSupporter* pAggSup, SSDataBlock* pBlock, SArray* result) { - SColumnInfoData* pStartTsCol = taosArrayGet(pBlock->pDataBlock, START_TS_COLUMN_INDEX); - TSKEY* startDatas = (TSKEY*)pStartTsCol->pData; - SColumnInfoData* pEndTsCol = taosArrayGet(pBlock->pDataBlock, END_TS_COLUMN_INDEX); - TSKEY* endDatas = (TSKEY*)pEndTsCol->pData; - SColumnInfoData* pGroupCol = taosArrayGet(pBlock->pDataBlock, GROUPID_COLUMN_INDEX); - uint64_t* gpDatas = (uint64_t*)pGroupCol->pData; - for (int32_t i = 0; i < pBlock->info.rows; i++) { - while (1) { - SSessionKey curWin = {0}; - getCurSessionWindow(pAggSup, startDatas[i], endDatas[i], gpDatas[i], &curWin); - if (IS_INVALID_SESSION_WIN_KEY(curWin)) { - break; - } - doDeleteSessionWindow(pAggSup, &curWin); - if (result) { - saveDeleteInfo(result, curWin); - } - } - } -} - -static inline int32_t sessionKeyCompareAsc(const void* pKey1, const void* pKey2) { - SSessionKey* pWin1 = (SSessionKey*)pKey1; - SSessionKey* pWin2 = (SSessionKey*)pKey2; - - if (pWin1->groupId > pWin2->groupId) { - return 1; - } else if (pWin1->groupId < pWin2->groupId) { - return -1; - } - - if (pWin1->win.skey > pWin2->win.skey) { - return 1; - } else if (pWin1->win.skey < pWin2->win.skey) { - return -1; - } - - return 0; -} - -static int32_t copyUpdateResult(SSHashObj* pStUpdated, SArray* pUpdated) { - void* pIte = NULL; - int32_t iter = 0; - while ((pIte = tSimpleHashIterate(pStUpdated, pIte, &iter)) != NULL) { - void* key = tSimpleHashGetKey(pIte, NULL); - taosArrayPush(pUpdated, key); - } - taosArraySort(pUpdated, sessionKeyCompareAsc); - return TSDB_CODE_SUCCESS; -} - -void doBuildDeleteDataBlock(SOperatorInfo* pOp, SSHashObj* pStDeleted, SSDataBlock* pBlock, void** Ite) { - SStorageAPI* pAPI = &pOp->pTaskInfo->storageAPI; - - blockDataCleanup(pBlock); - int32_t size = tSimpleHashGetSize(pStDeleted); - if (size == 0) { - return; - } - blockDataEnsureCapacity(pBlock, size); - int32_t iter = 0; - while (((*Ite) = tSimpleHashIterate(pStDeleted, *Ite, &iter)) != NULL) { - if (pBlock->info.rows + 1 > pBlock->info.capacity) { - break; - } - SSessionKey* res = tSimpleHashGetKey(*Ite, NULL); - SColumnInfoData* pStartTsCol = taosArrayGet(pBlock->pDataBlock, START_TS_COLUMN_INDEX); - colDataSetVal(pStartTsCol, pBlock->info.rows, (const char*)&res->win.skey, false); - SColumnInfoData* pEndTsCol = taosArrayGet(pBlock->pDataBlock, END_TS_COLUMN_INDEX); - colDataSetVal(pEndTsCol, pBlock->info.rows, (const char*)&res->win.skey, false); - SColumnInfoData* pUidCol = taosArrayGet(pBlock->pDataBlock, UID_COLUMN_INDEX); - colDataSetNULL(pUidCol, pBlock->info.rows); - SColumnInfoData* pGpCol = taosArrayGet(pBlock->pDataBlock, GROUPID_COLUMN_INDEX); - colDataSetVal(pGpCol, pBlock->info.rows, (const char*)&res->groupId, false); - SColumnInfoData* pCalStCol = taosArrayGet(pBlock->pDataBlock, CALCULATE_START_TS_COLUMN_INDEX); - colDataSetNULL(pCalStCol, pBlock->info.rows); - SColumnInfoData* pCalEdCol = taosArrayGet(pBlock->pDataBlock, CALCULATE_END_TS_COLUMN_INDEX); - colDataSetNULL(pCalEdCol, pBlock->info.rows); - - SColumnInfoData* pTableCol = taosArrayGet(pBlock->pDataBlock, TABLE_NAME_COLUMN_INDEX); - - void* tbname = NULL; - pAPI->stateStore.streamStateGetParName(pOp->pTaskInfo->streamInfo.pState, res->groupId, &tbname); - if (tbname == NULL) { - colDataSetNULL(pTableCol, pBlock->info.rows); - } else { - char parTbName[VARSTR_HEADER_SIZE + TSDB_TABLE_NAME_LEN]; - STR_WITH_MAXSIZE_TO_VARSTR(parTbName, tbname, sizeof(parTbName)); - colDataSetVal(pTableCol, pBlock->info.rows, (const char*)parTbName, false); - pAPI->stateStore.streamStateFreeVal(tbname); - } - pBlock->info.rows += 1; - } - if ((*Ite) == NULL) { - tSimpleHashClear(pStDeleted); - } -} - -static void rebuildSessionWindow(SOperatorInfo* pOperator, SArray* pWinArray, SSHashObj* pStUpdated) { - SExprSupp* pSup = &pOperator->exprSupp; - SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo; - SStorageAPI* pAPI = &pOperator->pTaskInfo->storageAPI; - - int32_t size = taosArrayGetSize(pWinArray); - SStreamSessionAggOperatorInfo* pInfo = pOperator->info; - SStreamAggSupporter* pAggSup = &pInfo->streamAggSup; - int32_t numOfOutput = pSup->numOfExprs; - int32_t numOfChild = taosArrayGetSize(pInfo->pChildren); - - for (int32_t i = 0; i < size; i++) { - SSessionKey* pWinKey = taosArrayGet(pWinArray, i); - int32_t num = 0; - SResultWindowInfo parentWin = {0}; - for (int32_t j = 0; j < numOfChild; j++) { - SOperatorInfo* pChild = taosArrayGetP(pInfo->pChildren, j); - SStreamSessionAggOperatorInfo* pChInfo = pChild->info; - SStreamAggSupporter* pChAggSup = &pChInfo->streamAggSup; - SSessionKey chWinKey = {0}; - getSessionHashKey(pWinKey, &chWinKey); - SStreamStateCur* pCur = pAggSup->stateStore.streamStateSessionSeekKeyCurrentNext(pChAggSup->pState, &chWinKey); - SResultRow* pResult = NULL; - SResultRow* pChResult = NULL; - while (1) { - SResultWindowInfo childWin = {0}; - childWin.sessionWin = *pWinKey; - int32_t code = getSessionWinBuf(pChAggSup, pCur, &childWin); - - if (code == TSDB_CODE_SUCCESS && !inWinRange(&pAggSup->winRange, &childWin.sessionWin.win)) { - continue; - } - - if (code == TSDB_CODE_SUCCESS && inWinRange(&pWinKey->win, &childWin.sessionWin.win)) { - if (num == 0) { - setSessionOutputBuf(pAggSup, pWinKey->win.skey, pWinKey->win.ekey, pWinKey->groupId, &parentWin); - code = initSessionOutputBuf(&parentWin, &pResult, pSup->pCtx, numOfOutput, pSup->rowEntryInfoOffset); - if (code != TSDB_CODE_SUCCESS || pResult == NULL) { - break; - } - } - num++; - updateTimeWindowInfo(&pInfo->twAggSup.timeWindowData, &parentWin.sessionWin.win, pAggSup->gap); - initSessionOutputBuf(&childWin, &pChResult, pChild->exprSupp.pCtx, numOfOutput, - pChild->exprSupp.rowEntryInfoOffset); - compactFunctions(pSup->pCtx, pChild->exprSupp.pCtx, numOfOutput, pTaskInfo, &pInfo->twAggSup.timeWindowData); - compactSessionWindow(pOperator, &parentWin, pStUpdated, NULL, true); - saveResult(parentWin, pStUpdated); - } else { - break; - } - } - pAPI->stateStore.streamStateFreeCur(pCur); - } - if (num > 0) { - saveSessionOutputBuf(pAggSup, &parentWin); - } - } -} - -int32_t closeSessionWindow(SSHashObj* pHashMap, STimeWindowAggSupp* pTwSup, SSHashObj* pClosed) { - void* pIte = NULL; - int32_t iter = 0; - while ((pIte = tSimpleHashIterate(pHashMap, pIte, &iter)) != NULL) { - SResultWindowInfo* pWinInfo = pIte; - if (isCloseWindow(&pWinInfo->sessionWin.win, pTwSup)) { - if (pTwSup->calTrigger == STREAM_TRIGGER_WINDOW_CLOSE && pClosed) { - int32_t code = saveResult(*pWinInfo, pClosed); - if (code != TSDB_CODE_SUCCESS) { - return code; - } - } - SSessionKey* pKey = tSimpleHashGetKey(pIte, NULL); - tSimpleHashIterateRemove(pHashMap, pKey, sizeof(SSessionKey), &pIte, &iter); - } - } - return TSDB_CODE_SUCCESS; -} - -static void closeChildSessionWindow(SArray* pChildren, TSKEY maxTs) { - int32_t size = taosArrayGetSize(pChildren); - for (int32_t i = 0; i < size; i++) { - SOperatorInfo* pChildOp = taosArrayGetP(pChildren, i); - SStreamSessionAggOperatorInfo* pChInfo = pChildOp->info; - pChInfo->twAggSup.maxTs = TMAX(pChInfo->twAggSup.maxTs, maxTs); - closeSessionWindow(pChInfo->streamAggSup.pResultRows, &pChInfo->twAggSup, NULL); - } -} - -int32_t getAllSessionWindow(SSHashObj* pHashMap, SSHashObj* pStUpdated) { - void* pIte = NULL; - int32_t iter = 0; - while ((pIte = tSimpleHashIterate(pHashMap, pIte, &iter)) != NULL) { - SResultWindowInfo* pWinInfo = pIte; - saveResult(*pWinInfo, pStUpdated); - } - return TSDB_CODE_SUCCESS; -} - -static void copyDeleteWindowInfo(SArray* pResWins, SSHashObj* pStDeleted) { - int32_t size = taosArrayGetSize(pResWins); - for (int32_t i = 0; i < size; i++) { - SSessionKey* pWinKey = taosArrayGet(pResWins, i); - if (!pWinKey) continue; - SSessionKey winInfo = {0}; - getSessionHashKey(pWinKey, &winInfo); - tSimpleHashPut(pStDeleted, &winInfo, sizeof(SSessionKey), NULL, 0); - } -} - -// the allocated memory comes from outer function. -void initGroupResInfoFromArrayList(SGroupResInfo* pGroupResInfo, SArray* pArrayList) { - pGroupResInfo->pRows = pArrayList; - pGroupResInfo->index = 0; - pGroupResInfo->pBuf = NULL; -} - -void doBuildSessionResult(SOperatorInfo* pOperator, void* pState, SGroupResInfo* pGroupResInfo, - SSDataBlock* pBlock) { - SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo; - // set output datablock version - pBlock->info.version = pTaskInfo->version; - - blockDataCleanup(pBlock); - if (!hasRemainResults(pGroupResInfo)) { - cleanupGroupResInfo(pGroupResInfo); - return; - } - - // clear the existed group id - pBlock->info.id.groupId = 0; - buildSessionResultDataBlock(pOperator, pState, pBlock, &pOperator->exprSupp, pGroupResInfo); - if (pBlock->info.rows == 0) { - cleanupGroupResInfo(pGroupResInfo); - } -} -void getMaxTsWins(const SArray* pAllWins, SArray* pMaxWins) { - int32_t size = taosArrayGetSize(pAllWins); - if (size == 0) { - return; - } - - SSessionKey* pSeKey = taosArrayGet(pAllWins, size - 1); - taosArrayPush(pMaxWins, pSeKey); - if (pSeKey->groupId == 0) { - return; - } - uint64_t preGpId = pSeKey->groupId; - for (int32_t i = size - 2; i >= 0; i--) { - pSeKey = taosArrayGet(pAllWins, i); - if (preGpId != pSeKey->groupId) { - taosArrayPush(pMaxWins, pSeKey); - preGpId = pSeKey->groupId; - } - } -} - -static SSDataBlock* doStreamSessionAgg(SOperatorInfo* pOperator) { - SExprSupp* pSup = &pOperator->exprSupp; - SStreamSessionAggOperatorInfo* pInfo = pOperator->info; - SOptrBasicInfo* pBInfo = &pInfo->binfo; - SStreamAggSupporter* pAggSup = &pInfo->streamAggSup; - qDebug("===stream=== stream session agg"); - if (pOperator->status == OP_EXEC_DONE) { - return NULL; - } else if (pOperator->status == OP_RES_TO_RETURN) { - doBuildDeleteDataBlock(pOperator, pInfo->pStDeleted, pInfo->pDelRes, &pInfo->pDelIterator); - if (pInfo->pDelRes->info.rows > 0) { - printDataBlock(pInfo->pDelRes, IS_FINAL_OP(pInfo) ? "final session" : "single session"); - return pInfo->pDelRes; - } - doBuildSessionResult(pOperator, pAggSup->pState, &pInfo->groupResInfo, pBInfo->pRes); - if (pBInfo->pRes->info.rows > 0) { - printDataBlock(pBInfo->pRes, IS_FINAL_OP(pInfo) ? "final session" : "single session"); - return pBInfo->pRes; - } - - setOperatorCompleted(pOperator); - return NULL; - } - - SOperatorInfo* downstream = pOperator->pDownstream[0]; - if (!pInfo->pUpdated) { - pInfo->pUpdated = taosArrayInit(16, sizeof(SSessionKey)); - } - if (!pInfo->pStUpdated) { - _hash_fn_t hashFn = taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY); - pInfo->pStUpdated = tSimpleHashInit(64, hashFn); - } - while (1) { - SSDataBlock* pBlock = getNextBlockFromDownstream(pOperator, 0); - if (pBlock == NULL) { - break; - } - printDataBlock(pBlock, IS_FINAL_OP(pInfo) ? "final session recv" : "single session recv"); - - if (pBlock->info.type == STREAM_DELETE_DATA || pBlock->info.type == STREAM_DELETE_RESULT || - pBlock->info.type == STREAM_CLEAR) { - SArray* pWins = taosArrayInit(16, sizeof(SSessionKey)); - // gap must be 0 - doDeleteTimeWindows(pAggSup, pBlock, pWins); - removeSessionResults(pInfo->pStUpdated, pWins); - if (IS_FINAL_OP(pInfo)) { - int32_t childIndex = getChildIndex(pBlock); - SOperatorInfo* pChildOp = taosArrayGetP(pInfo->pChildren, childIndex); - SStreamSessionAggOperatorInfo* pChildInfo = pChildOp->info; - // gap must be 0 - doDeleteTimeWindows(&pChildInfo->streamAggSup, pBlock, NULL); - rebuildSessionWindow(pOperator, pWins, pInfo->pStUpdated); - } - copyDeleteWindowInfo(pWins, pInfo->pStDeleted); - taosArrayDestroy(pWins); - continue; - } else if (pBlock->info.type == STREAM_GET_ALL) { - getAllSessionWindow(pAggSup->pResultRows, pInfo->pStUpdated); - continue; - } else if (pBlock->info.type == STREAM_CREATE_CHILD_TABLE) { - return pBlock; - } else { - ASSERTS(pBlock->info.type == STREAM_NORMAL || pBlock->info.type == STREAM_INVALID, "invalid SSDataBlock type"); - } - - if (pInfo->scalarSupp.pExprInfo != NULL) { - SExprSupp* pExprSup = &pInfo->scalarSupp; - projectApplyFunctions(pExprSup->pExprInfo, pBlock, pBlock, pExprSup->pCtx, pExprSup->numOfExprs, NULL); - } - // the pDataBlock are always the same one, no need to call this again - setInputDataBlock(pSup, pBlock, TSDB_ORDER_ASC, MAIN_SCAN, true); - doStreamSessionAggImpl(pOperator, pBlock, pInfo->pStUpdated, pInfo->pStDeleted, IS_FINAL_OP(pInfo), true); - if (IS_FINAL_OP(pInfo)) { - int32_t chIndex = getChildIndex(pBlock); - int32_t size = taosArrayGetSize(pInfo->pChildren); - // if chIndex + 1 - size > 0, add new child - for (int32_t i = 0; i < chIndex + 1 - size; i++) { - SOperatorInfo* pChildOp = - createStreamFinalSessionAggOperatorInfo(NULL, pInfo->pPhyNode, pOperator->pTaskInfo, 0, NULL); - if (!pChildOp) { - T_LONG_JMP(pOperator->pTaskInfo->env, TSDB_CODE_OUT_OF_MEMORY); - } - taosArrayPush(pInfo->pChildren, &pChildOp); - } - SOperatorInfo* pChildOp = taosArrayGetP(pInfo->pChildren, chIndex); - setInputDataBlock(&pChildOp->exprSupp, pBlock, TSDB_ORDER_ASC, MAIN_SCAN, true); - doStreamSessionAggImpl(pChildOp, pBlock, NULL, NULL, true, false); - } - pInfo->twAggSup.maxTs = TMAX(pInfo->twAggSup.maxTs, pBlock->info.window.ekey); - pInfo->twAggSup.maxTs = TMAX(pInfo->twAggSup.maxTs, pBlock->info.watermark); - } - // restore the value - pOperator->status = OP_RES_TO_RETURN; - - closeSessionWindow(pAggSup->pResultRows, &pInfo->twAggSup, pInfo->pStUpdated); - closeChildSessionWindow(pInfo->pChildren, pInfo->twAggSup.maxTs); - copyUpdateResult(pInfo->pStUpdated, pInfo->pUpdated); - removeSessionResults(pInfo->pStDeleted, pInfo->pUpdated); - tSimpleHashCleanup(pInfo->pStUpdated); - pInfo->pStUpdated = NULL; - if(pInfo->isHistoryOp) { - getMaxTsWins(pInfo->pUpdated, pInfo->historyWins); - } - initGroupResInfoFromArrayList(&pInfo->groupResInfo, pInfo->pUpdated); - pInfo->pUpdated = NULL; - blockDataEnsureCapacity(pInfo->binfo.pRes, pOperator->resultInfo.capacity); - -#if 0 - char* pBuf = streamStateSessionDump(pAggSup->pState); - qDebug("===stream===final session%s", pBuf); - taosMemoryFree(pBuf); -#endif - - doBuildDeleteDataBlock(pOperator, pInfo->pStDeleted, pInfo->pDelRes, &pInfo->pDelIterator); - if (pInfo->pDelRes->info.rows > 0) { - printDataBlock(pInfo->pDelRes, IS_FINAL_OP(pInfo) ? "final session" : "single session"); - return pInfo->pDelRes; - } - - doBuildSessionResult(pOperator, pAggSup->pState, &pInfo->groupResInfo, pBInfo->pRes); - if (pBInfo->pRes->info.rows > 0) { - printDataBlock(pBInfo->pRes, IS_FINAL_OP(pInfo) ? "final session" : "single session"); - return pBInfo->pRes; - } - - setOperatorCompleted(pOperator); - return NULL; -} - -void streamSessionReleaseState(SOperatorInfo* pOperator) { - if (pOperator->operatorType != QUERY_NODE_PHYSICAL_PLAN_STREAM_SEMI_SESSION) { - SStreamSessionAggOperatorInfo* pInfo = pOperator->info; - int32_t resSize = taosArrayGetSize(pInfo->historyWins) * sizeof(SSessionKey); - pInfo->streamAggSup.stateStore.streamStateSaveInfo(pInfo->streamAggSup.pState, STREAM_SESSION_OP_STATE_NAME, strlen(STREAM_SESSION_OP_STATE_NAME), pInfo->historyWins->pData, resSize); - } - SOperatorInfo* downstream = pOperator->pDownstream[0]; - if (downstream->fpSet.releaseStreamStateFn) { - downstream->fpSet.releaseStreamStateFn(downstream); - } -} - -void resetWinRange(STimeWindow* winRange) { - winRange->skey = INT64_MIN; - winRange->ekey = INT64_MAX; -} - -void streamSessionReloadState(SOperatorInfo* pOperator) { - SStreamSessionAggOperatorInfo* pInfo = pOperator->info; - SStreamAggSupporter* pAggSup = &pInfo->streamAggSup; - resetWinRange(&pAggSup->winRange); - - SResultWindowInfo winInfo = {0}; - int32_t size = 0; - void* pBuf = NULL; - int32_t code = pAggSup->stateStore.streamStateGetInfo(pAggSup->pState, STREAM_SESSION_OP_STATE_NAME, - strlen(STREAM_SESSION_OP_STATE_NAME), &pBuf, &size); - int32_t num = size / sizeof(SSessionKey); - SSessionKey* pSeKeyBuf = (SSessionKey*) pBuf; - ASSERT(size == num * sizeof(SSessionKey)); - if (!pInfo->pStUpdated && num > 0) { - _hash_fn_t hashFn = taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY); - pInfo->pStUpdated = tSimpleHashInit(64, hashFn); - } - for (int32_t i = 0; i < num; i++) { - SResultWindowInfo winInfo = {0}; - setSessionOutputBuf(pAggSup, pSeKeyBuf[i].win.skey, pSeKeyBuf[i].win.ekey, pSeKeyBuf[i].groupId, &winInfo); - int32_t winNum = compactSessionWindow(pOperator, &winInfo, pInfo->pStUpdated, pInfo->pStDeleted, true); - if (winNum > 0) { - qDebug("===stream=== reload state. save result %" PRId64 ", %" PRIu64, winInfo.sessionWin.win.skey, winInfo.sessionWin.groupId); - if (pInfo->twAggSup.calTrigger == STREAM_TRIGGER_AT_ONCE) { - saveResult(winInfo, pInfo->pStUpdated); - } else if (pInfo->twAggSup.calTrigger == STREAM_TRIGGER_WINDOW_CLOSE) { - if (!isCloseWindow(&winInfo.sessionWin.win, &pInfo->twAggSup)) { - saveDeleteRes(pInfo->pStDeleted, winInfo.sessionWin); - } - SSessionKey key = {0}; - getSessionHashKey(&winInfo.sessionWin, &key); - tSimpleHashPut(pAggSup->pResultRows, &key, sizeof(SSessionKey), &winInfo, sizeof(SResultWindowInfo)); - } - } - saveSessionOutputBuf(pAggSup, &winInfo); - } - taosMemoryFree(pBuf); - - SOperatorInfo* downstream = pOperator->pDownstream[0]; - if (downstream->fpSet.reloadStreamStateFn) { - downstream->fpSet.reloadStreamStateFn(downstream); - } -} - -SOperatorInfo* createStreamSessionAggOperatorInfo(SOperatorInfo* downstream, SPhysiNode* pPhyNode, - SExecTaskInfo* pTaskInfo, SReadHandle* pHandle) { - SSessionWinodwPhysiNode* pSessionNode = (SSessionWinodwPhysiNode*)pPhyNode; - int32_t numOfCols = 0; - int32_t code = TSDB_CODE_OUT_OF_MEMORY; - SStreamSessionAggOperatorInfo* pInfo = taosMemoryCalloc(1, sizeof(SStreamSessionAggOperatorInfo)); - SOperatorInfo* pOperator = taosMemoryCalloc(1, sizeof(SOperatorInfo)); - if (pInfo == NULL || pOperator == NULL) { - goto _error; - } - - pOperator->pTaskInfo = pTaskInfo; - - initResultSizeInfo(&pOperator->resultInfo, 4096); - if (pSessionNode->window.pExprs != NULL) { - int32_t numOfScalar = 0; - SExprInfo* pScalarExprInfo = createExprInfo(pSessionNode->window.pExprs, NULL, &numOfScalar); - code = initExprSupp(&pInfo->scalarSupp, pScalarExprInfo, numOfScalar, &pTaskInfo->storageAPI.functionStore); - if (code != TSDB_CODE_SUCCESS) { - goto _error; - } - } - SExprSupp* pSup = &pOperator->exprSupp; - - SExprInfo* pExprInfo = createExprInfo(pSessionNode->window.pFuncs, NULL, &numOfCols); - SSDataBlock* pResBlock = createDataBlockFromDescNode(pPhyNode->pOutputDataBlockDesc); - code = initBasicInfoEx(&pInfo->binfo, pSup, pExprInfo, numOfCols, pResBlock, &pTaskInfo->storageAPI.functionStore); - if (code != TSDB_CODE_SUCCESS) { - goto _error; - } - - code = initStreamAggSupporter(&pInfo->streamAggSup, pSup->pCtx, numOfCols, pSessionNode->gap, - pTaskInfo->streamInfo.pState, 0, 0, &pTaskInfo->storageAPI.stateStore, pHandle, &pTaskInfo->storageAPI); - if (code != TSDB_CODE_SUCCESS) { - goto _error; - } - - pInfo->twAggSup = (STimeWindowAggSupp){ - .waterMark = pSessionNode->window.watermark, - .calTrigger = pSessionNode->window.triggerType, - .maxTs = INT64_MIN, - .minTs = INT64_MAX, - }; - - initExecTimeWindowInfo(&pInfo->twAggSup.timeWindowData, &pTaskInfo->window); - - pInfo->primaryTsIndex = ((SColumnNode*)pSessionNode->window.pTspk)->slotId; - if (pSessionNode->window.pTsEnd) { - pInfo->endTsIndex = ((SColumnNode*)pSessionNode->window.pTsEnd)->slotId; - } - pInfo->binfo.pRes = pResBlock; - pInfo->order = TSDB_ORDER_ASC; - _hash_fn_t hashFn = taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY); - pInfo->pStDeleted = tSimpleHashInit(64, hashFn); - pInfo->pDelIterator = NULL; - pInfo->pDelRes = createSpecialDataBlock(STREAM_DELETE_RESULT); - pInfo->pChildren = NULL; - pInfo->isFinal = false; - pInfo->pPhyNode = pPhyNode; - pInfo->ignoreExpiredData = pSessionNode->window.igExpired; - pInfo->ignoreExpiredDataSaved = false; - pInfo->pUpdated = NULL; - pInfo->pStUpdated = NULL; - pInfo->dataVersion = 0; - pInfo->historyWins = taosArrayInit(4, sizeof(SSessionKey)); - if (!pInfo->historyWins) { - goto _error; - } - if (pHandle) { - pInfo->isHistoryOp = pHandle->fillHistory; - } - - setOperatorInfo(pOperator, "StreamSessionWindowAggOperator", QUERY_NODE_PHYSICAL_PLAN_STREAM_SESSION, true, - OP_NOT_OPENED, pInfo, pTaskInfo); - pOperator->fpSet = createOperatorFpSet(optrDummyOpenFn, doStreamSessionAgg, NULL, destroyStreamSessionAggOperatorInfo, - optrDefaultBufFn, NULL, optrDefaultGetNextExtFn, NULL); - setOperatorStreamStateFn(pOperator, streamSessionReleaseState, streamSessionReloadState); - - if (downstream) { - initDownStream(downstream, &pInfo->streamAggSup, pOperator->operatorType, pInfo->primaryTsIndex, &pInfo->twAggSup); - code = appendDownstream(pOperator, &downstream, 1); - } - return pOperator; - -_error: - if (pInfo != NULL) { - destroyStreamSessionAggOperatorInfo(pInfo); - } - - taosMemoryFreeClear(pOperator); - pTaskInfo->code = code; - return NULL; -} - -static void clearStreamSessionOperator(SStreamSessionAggOperatorInfo* pInfo) { - tSimpleHashClear(pInfo->streamAggSup.pResultRows); - pInfo->streamAggSup.stateStore.streamStateSessionClear(pInfo->streamAggSup.pState); -} - -static SSDataBlock* doStreamSessionSemiAgg(SOperatorInfo* pOperator) { - SStreamSessionAggOperatorInfo* pInfo = pOperator->info; - SOptrBasicInfo* pBInfo = &pInfo->binfo; - TSKEY maxTs = INT64_MIN; - SExprSupp* pSup = &pOperator->exprSupp; - SStreamAggSupporter* pAggSup = &pInfo->streamAggSup; - - qDebug("===stream=== stream session semi agg"); - if (pOperator->status == OP_EXEC_DONE) { - return NULL; - } - - { - doBuildSessionResult(pOperator, pAggSup->pState, &pInfo->groupResInfo, pBInfo->pRes); - if (pBInfo->pRes->info.rows > 0) { - printDataBlock(pBInfo->pRes, "semi session"); - return pBInfo->pRes; - } - - doBuildDeleteDataBlock(pOperator, pInfo->pStDeleted, pInfo->pDelRes, &pInfo->pDelIterator); - if (pInfo->pDelRes->info.rows > 0) { - printDataBlock(pInfo->pDelRes, "semi session delete"); - return pInfo->pDelRes; - } - - if (pOperator->status == OP_RES_TO_RETURN) { - clearFunctionContext(&pOperator->exprSupp); - // semi interval operator clear disk buffer - clearStreamSessionOperator(pInfo); - setOperatorCompleted(pOperator); - return NULL; - } - } - - SOperatorInfo* downstream = pOperator->pDownstream[0]; - if (!pInfo->pUpdated) { - pInfo->pUpdated = taosArrayInit(16, sizeof(SSessionKey)); - } - if (!pInfo->pStUpdated) { - _hash_fn_t hashFn = taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY); - pInfo->pStUpdated = tSimpleHashInit(64, hashFn); - } - while (1) { - SSDataBlock* pBlock = getNextBlockFromDownstream(pOperator, 0); - if (pBlock == NULL) { - clearSpecialDataBlock(pInfo->pUpdateRes); - pOperator->status = OP_RES_TO_RETURN; - break; - } - printDataBlock(pBlock, "semi session recv"); - - if (pBlock->info.type == STREAM_DELETE_DATA || pBlock->info.type == STREAM_DELETE_RESULT || - pBlock->info.type == STREAM_CLEAR) { - // gap must be 0 - SArray* pWins = taosArrayInit(16, sizeof(SSessionKey)); - doDeleteTimeWindows(&pInfo->streamAggSup, pBlock, pWins); - removeSessionResults(pInfo->pStUpdated, pWins); - copyDeleteWindowInfo(pWins, pInfo->pStDeleted); - taosArrayDestroy(pWins); - break; - } else if (pBlock->info.type == STREAM_GET_ALL) { - getAllSessionWindow(pInfo->streamAggSup.pResultRows, pInfo->pStUpdated); - continue; - } else if (pBlock->info.type == STREAM_CREATE_CHILD_TABLE) { - return pBlock; - } else { - ASSERTS(pBlock->info.type == STREAM_NORMAL || pBlock->info.type == STREAM_INVALID, "invalid SSDataBlock type"); - } - - if (pInfo->scalarSupp.pExprInfo != NULL) { - SExprSupp* pExprSup = &pInfo->scalarSupp; - projectApplyFunctions(pExprSup->pExprInfo, pBlock, pBlock, pExprSup->pCtx, pExprSup->numOfExprs, NULL); - } - // the pDataBlock are always the same one, no need to call this again - setInputDataBlock(pSup, pBlock, TSDB_ORDER_ASC, MAIN_SCAN, true); - doStreamSessionAggImpl(pOperator, pBlock, pInfo->pStUpdated, NULL, false, false); - maxTs = TMAX(pInfo->twAggSup.maxTs, pBlock->info.window.ekey); - } - - pInfo->twAggSup.maxTs = TMAX(pInfo->twAggSup.maxTs, maxTs); - pBInfo->pRes->info.watermark = pInfo->twAggSup.maxTs; - - copyUpdateResult(pInfo->pStUpdated, pInfo->pUpdated); - removeSessionResults(pInfo->pStDeleted, pInfo->pUpdated); - tSimpleHashCleanup(pInfo->pStUpdated); - pInfo->pStUpdated = NULL; - initGroupResInfoFromArrayList(&pInfo->groupResInfo, pInfo->pUpdated); - pInfo->pUpdated = NULL; - blockDataEnsureCapacity(pBInfo->pRes, pOperator->resultInfo.capacity); - -#if 0 - char* pBuf = streamStateSessionDump(pAggSup->pState); - qDebug("===stream===semi session%s", pBuf); - taosMemoryFree(pBuf); -#endif - - doBuildSessionResult(pOperator, pAggSup->pState, &pInfo->groupResInfo, pBInfo->pRes); - if (pBInfo->pRes->info.rows > 0) { - printDataBlock(pBInfo->pRes, "semi session"); - return pBInfo->pRes; - } - - doBuildDeleteDataBlock(pOperator, pInfo->pStDeleted, pInfo->pDelRes, &pInfo->pDelIterator); - if (pInfo->pDelRes->info.rows > 0) { - printDataBlock(pInfo->pDelRes, "semi session delete"); - return pInfo->pDelRes; - } - - clearFunctionContext(&pOperator->exprSupp); - // semi interval operator clear disk buffer - clearStreamSessionOperator(pInfo); - setOperatorCompleted(pOperator); - return NULL; -} - -SOperatorInfo* createStreamFinalSessionAggOperatorInfo(SOperatorInfo* downstream, SPhysiNode* pPhyNode, - SExecTaskInfo* pTaskInfo, int32_t numOfChild, SReadHandle* pHandle) { - int32_t code = TSDB_CODE_OUT_OF_MEMORY; - SOperatorInfo* pOperator = createStreamSessionAggOperatorInfo(downstream, pPhyNode, pTaskInfo, pHandle); - if (pOperator == NULL) { - goto _error; - } - - SStorageAPI* pAPI = &pTaskInfo->storageAPI; - SStreamSessionAggOperatorInfo* pInfo = pOperator->info; - - pInfo->isFinal = (pPhyNode->type == QUERY_NODE_PHYSICAL_PLAN_STREAM_FINAL_SESSION); - char* name = (pInfo->isFinal) ? "StreamSessionFinalAggOperator" : "StreamSessionSemiAggOperator"; - - if (pPhyNode->type != QUERY_NODE_PHYSICAL_PLAN_STREAM_FINAL_SESSION) { - pInfo->pUpdateRes = createSpecialDataBlock(STREAM_CLEAR); - blockDataEnsureCapacity(pInfo->pUpdateRes, 128); - pOperator->fpSet = createOperatorFpSet(optrDummyOpenFn, doStreamSessionSemiAgg, NULL, - destroyStreamSessionAggOperatorInfo, optrDefaultBufFn, NULL, optrDefaultGetNextExtFn, NULL); - } - setOperatorStreamStateFn(pOperator, streamSessionReleaseState, streamSessionReloadState); - setOperatorInfo(pOperator, name, pPhyNode->type, false, OP_NOT_OPENED, pInfo, pTaskInfo); - - pOperator->operatorType = pPhyNode->type; - if (numOfChild > 0) { - pInfo->pChildren = taosArrayInit(numOfChild, sizeof(void*)); - for (int32_t i = 0; i < numOfChild; i++) { - SOperatorInfo* pChildOp = createStreamFinalSessionAggOperatorInfo(NULL, pPhyNode, pTaskInfo, 0, NULL); - if (pChildOp == NULL) { - goto _error; - } - SStreamSessionAggOperatorInfo* pChInfo = pChildOp->info; - pChInfo->twAggSup.calTrigger = STREAM_TRIGGER_AT_ONCE; - pAPI->stateStore.streamStateSetNumber(pChInfo->streamAggSup.pState, i); - taosArrayPush(pInfo->pChildren, &pChildOp); - } - } - - if (!IS_FINAL_OP(pInfo) || numOfChild == 0) { - pInfo->twAggSup.calTrigger = STREAM_TRIGGER_AT_ONCE; - } - - return pOperator; - -_error: - if (pInfo != NULL) { - destroyStreamSessionAggOperatorInfo(pInfo); - } - taosMemoryFreeClear(pOperator); - pTaskInfo->code = code; - return NULL; -} - -void destroyStreamStateOperatorInfo(void* param) { - SStreamStateAggOperatorInfo* pInfo = (SStreamStateAggOperatorInfo*)param; - cleanupBasicInfo(&pInfo->binfo); - destroyStreamAggSupporter(&pInfo->streamAggSup); - cleanupGroupResInfo(&pInfo->groupResInfo); - if (pInfo->pChildren != NULL) { - int32_t size = taosArrayGetSize(pInfo->pChildren); - for (int32_t i = 0; i < size; i++) { - SOperatorInfo* pChild = taosArrayGetP(pInfo->pChildren, i); - destroyOperator(pChild); - } - taosArrayDestroy(pInfo->pChildren); - } - colDataDestroy(&pInfo->twAggSup.timeWindowData); - blockDataDestroy(pInfo->pDelRes); - taosArrayDestroy(pInfo->historyWins); - tSimpleHashCleanup(pInfo->pSeUpdated); - tSimpleHashCleanup(pInfo->pSeDeleted); - taosMemoryFreeClear(param); -} - -bool isTsInWindow(SStateWindowInfo* pWin, TSKEY ts) { - if (pWin->winInfo.sessionWin.win.skey <= ts && ts <= pWin->winInfo.sessionWin.win.ekey) { - return true; - } - return false; -} - -bool isEqualStateKey(SStateWindowInfo* pWin, char* pKeyData) { - return pKeyData && compareVal(pKeyData, pWin->pStateKey); -} - -bool compareStateKey(void* data, void* key) { - if (!data || !key) { - return true; - } - SStateKeys* stateKey = (SStateKeys*)key; - stateKey->pData = (char*)key + sizeof(SStateKeys); - return compareVal(data, stateKey); -} - -bool compareWinStateKey(SStateKeys* left, SStateKeys* right) { - if (!left || !right) { - return false; - } - return compareVal(left->pData, right); -} - -void setStateOutputBuf(SStreamAggSupporter* pAggSup, TSKEY ts, uint64_t groupId, char* pKeyData, - SStateWindowInfo* pCurWin, SStateWindowInfo* pNextWin) { - int32_t size = pAggSup->resultRowSize; - pCurWin->winInfo.sessionWin.groupId = groupId; - pCurWin->winInfo.sessionWin.win.skey = ts; - pCurWin->winInfo.sessionWin.win.ekey = ts; - int32_t code = - pAggSup->stateStore.streamStateStateAddIfNotExist(pAggSup->pState, &pCurWin->winInfo.sessionWin, pKeyData, pAggSup->stateKeySize, - compareStateKey, &pCurWin->winInfo.pOutputBuf, &size); - pCurWin->pStateKey = - (SStateKeys*)((char*)pCurWin->winInfo.pOutputBuf + (pAggSup->resultRowSize - pAggSup->stateKeySize)); - pCurWin->pStateKey->bytes = pAggSup->stateKeySize - sizeof(SStateKeys); - pCurWin->pStateKey->type = pAggSup->stateKeyType; - pCurWin->pStateKey->pData = (char*)pCurWin->pStateKey + sizeof(SStateKeys); - pCurWin->pStateKey->isNull = false; - - if (code == TSDB_CODE_SUCCESS && !inWinRange(&pAggSup->winRange, &pCurWin->winInfo.sessionWin.win)) { - code = TSDB_CODE_FAILED; - releaseOutputBuf(pAggSup->pState, NULL, (SResultRow*)pCurWin->winInfo.pOutputBuf, &pAggSup->pSessionAPI->stateStore); - pCurWin->winInfo.pOutputBuf = taosMemoryCalloc(1, size); - pCurWin->pStateKey = - (SStateKeys*)((char*)pCurWin->winInfo.pOutputBuf + (pAggSup->resultRowSize - pAggSup->stateKeySize)); - pCurWin->pStateKey->bytes = pAggSup->stateKeySize - sizeof(SStateKeys); - pCurWin->pStateKey->type = pAggSup->stateKeyType; - pCurWin->pStateKey->pData = (char*)pCurWin->pStateKey + sizeof(SStateKeys); - pCurWin->pStateKey->isNull = false; - pCurWin->winInfo.sessionWin.groupId = groupId; - pCurWin->winInfo.sessionWin.win.skey = ts; - pCurWin->winInfo.sessionWin.win.ekey = ts; - qDebug("===stream===reset state win key. skey:%" PRId64 ", endkey:%" PRId64, pCurWin->winInfo.sessionWin.win.skey, pCurWin->winInfo.sessionWin.win.ekey); - } - - if (code == TSDB_CODE_SUCCESS) { - pCurWin->winInfo.isOutput = true; - pAggSup->stateStore.streamStateSessionDel(pAggSup->pState, &pCurWin->winInfo.sessionWin); - } else if (pKeyData) { - if (IS_VAR_DATA_TYPE(pAggSup->stateKeyType)) { - varDataCopy(pCurWin->pStateKey->pData, pKeyData); - } else { - memcpy(pCurWin->pStateKey->pData, pKeyData, pCurWin->pStateKey->bytes); - } - } - - pNextWin->winInfo.sessionWin = pCurWin->winInfo.sessionWin; - SStreamStateCur* pCur = pAggSup->stateStore.streamStateSessionSeekKeyNext(pAggSup->pState, &pNextWin->winInfo.sessionWin); - int32_t nextSize = pAggSup->resultRowSize; - code = pAggSup->stateStore.streamStateSessionGetKVByCur(pCur, &pNextWin->winInfo.sessionWin, &pNextWin->winInfo.pOutputBuf, &nextSize); - if (code != TSDB_CODE_SUCCESS) { - SET_SESSION_WIN_INVALID(pNextWin->winInfo); - } else { - pNextWin->pStateKey = - (SStateKeys*)((char*)pNextWin->winInfo.pOutputBuf + (pAggSup->resultRowSize - pAggSup->stateKeySize)); - pNextWin->pStateKey->bytes = pAggSup->stateKeySize - sizeof(SStateKeys); - pNextWin->pStateKey->type = pAggSup->stateKeyType; - pNextWin->pStateKey->pData = (char*)pNextWin->pStateKey + sizeof(SStateKeys); - pNextWin->pStateKey->isNull = false; - pNextWin->winInfo.isOutput = true; - } - pAggSup->stateStore.streamStateFreeCur(pCur); -} - -int32_t updateStateWindowInfo(SStateWindowInfo* pWinInfo, SStateWindowInfo* pNextWin, TSKEY* pTs, uint64_t groupId, - SColumnInfoData* pKeyCol, int32_t rows, int32_t start, bool* allEqual, - SSHashObj* pResultRows, SSHashObj* pSeUpdated, SSHashObj* pSeDeleted) { - *allEqual = true; - for (int32_t i = start; i < rows; ++i) { - char* pKeyData = colDataGetData(pKeyCol, i); - if (!isTsInWindow(pWinInfo, pTs[i])) { - if (isEqualStateKey(pWinInfo, pKeyData)) { - if (IS_VALID_SESSION_WIN(pNextWin->winInfo)) { - // ts belongs to the next window - if (pTs[i] >= pNextWin->winInfo.sessionWin.win.skey) { - return i - start; - } - } - } else { - return i - start; - } - } - - if (pWinInfo->winInfo.sessionWin.win.skey > pTs[i]) { - if (pSeDeleted && pWinInfo->winInfo.isOutput) { - saveDeleteRes(pSeDeleted, pWinInfo->winInfo.sessionWin); - } - removeSessionResult(pSeUpdated, pResultRows, pWinInfo->winInfo.sessionWin); - pWinInfo->winInfo.sessionWin.win.skey = pTs[i]; - } - pWinInfo->winInfo.sessionWin.win.ekey = TMAX(pWinInfo->winInfo.sessionWin.win.ekey, pTs[i]); - if (!isEqualStateKey(pWinInfo, pKeyData)) { - *allEqual = false; - } - } - return rows - start; -} - -static void doStreamStateAggImpl(SOperatorInfo* pOperator, SSDataBlock* pSDataBlock, SSHashObj* pSeUpdated, - SSHashObj* pStDeleted) { - SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo; - SStorageAPI* pAPI = &pOperator->pTaskInfo->storageAPI; - - SStreamStateAggOperatorInfo* pInfo = pOperator->info; - int32_t numOfOutput = pOperator->exprSupp.numOfExprs; - uint64_t groupId = pSDataBlock->info.id.groupId; - int64_t code = TSDB_CODE_SUCCESS; - TSKEY* tsCols = NULL; - SResultRow* pResult = NULL; - int32_t winRows = 0; - SStreamAggSupporter* pAggSup = &pInfo->streamAggSup; - - pInfo->dataVersion = TMAX(pInfo->dataVersion, pSDataBlock->info.version); - pAggSup->winRange = pTaskInfo->streamInfo.fillHistoryWindow; - if (pAggSup->winRange.ekey <= 0) { - pAggSup->winRange.ekey = INT64_MAX; - } - - if (pSDataBlock->pDataBlock != NULL) { - SColumnInfoData* pColDataInfo = taosArrayGet(pSDataBlock->pDataBlock, pInfo->primaryTsIndex); - tsCols = (int64_t*)pColDataInfo->pData; - } else { - return; - } - - int32_t rows = pSDataBlock->info.rows; - blockDataEnsureCapacity(pAggSup->pScanBlock, rows); - SColumnInfoData* pKeyColInfo = taosArrayGet(pSDataBlock->pDataBlock, pInfo->stateCol.slotId); - for (int32_t i = 0; i < rows; i += winRows) { - if (pInfo->ignoreExpiredData && isOverdue(tsCols[i], &pInfo->twAggSup) || colDataIsNull_s(pKeyColInfo, i)) { - i++; - continue; - } - char* pKeyData = colDataGetData(pKeyColInfo, i); - int32_t winIndex = 0; - bool allEqual = true; - SStateWindowInfo curWin = {0}; - SStateWindowInfo nextWin = {0}; - setStateOutputBuf(pAggSup, tsCols[i], groupId, pKeyData, &curWin, &nextWin); - if (IS_VALID_SESSION_WIN(nextWin.winInfo)) { - releaseOutputBuf(pAggSup->pState, NULL, (SResultRow*)nextWin.winInfo.pOutputBuf, &pAPI->stateStore); - } - setSessionWinOutputInfo(pSeUpdated, &curWin.winInfo); - winRows = updateStateWindowInfo(&curWin, &nextWin, tsCols, groupId, pKeyColInfo, rows, i, &allEqual, - pAggSup->pResultRows, pSeUpdated, pStDeleted); - if (!allEqual) { - uint64_t uid = 0; - appendOneRowToStreamSpecialBlock(pAggSup->pScanBlock, &curWin.winInfo.sessionWin.win.skey, - &curWin.winInfo.sessionWin.win.ekey, &uid, &groupId, NULL); - tSimpleHashRemove(pSeUpdated, &curWin.winInfo.sessionWin, sizeof(SSessionKey)); - doDeleteSessionWindow(pAggSup, &curWin.winInfo.sessionWin); - releaseOutputBuf(pAggSup->pState, NULL, (SResultRow*)curWin.winInfo.pOutputBuf, &pAPI->stateStore); - continue; - } - code = doOneWindowAggImpl(&pInfo->twAggSup.timeWindowData, &curWin.winInfo, &pResult, i, winRows, rows, numOfOutput, - pOperator, 0); - if (code != TSDB_CODE_SUCCESS || pResult == NULL) { - T_LONG_JMP(pTaskInfo->env, TSDB_CODE_OUT_OF_MEMORY); - } - saveSessionOutputBuf(pAggSup, &curWin.winInfo); - - if (pInfo->twAggSup.calTrigger == STREAM_TRIGGER_AT_ONCE) { - code = saveResult(curWin.winInfo, pSeUpdated); - if (code != TSDB_CODE_SUCCESS) { - T_LONG_JMP(pTaskInfo->env, TSDB_CODE_OUT_OF_MEMORY); - } - } - - if (pInfo->twAggSup.calTrigger == STREAM_TRIGGER_WINDOW_CLOSE) { - SSessionKey key = {0}; - getSessionHashKey(&curWin.winInfo.sessionWin, &key); - tSimpleHashPut(pAggSup->pResultRows, &key, sizeof(SSessionKey), &curWin.winInfo, sizeof(SResultWindowInfo)); - } - } -} - -static SSDataBlock* doStreamStateAgg(SOperatorInfo* pOperator) { - if (pOperator->status == OP_EXEC_DONE) { - return NULL; - } - - SExprSupp* pSup = &pOperator->exprSupp; - SStreamStateAggOperatorInfo* pInfo = pOperator->info; - SOptrBasicInfo* pBInfo = &pInfo->binfo; - qDebug("===stream=== stream state agg"); - if (pOperator->status == OP_RES_TO_RETURN) { - doBuildDeleteDataBlock(pOperator, pInfo->pSeDeleted, pInfo->pDelRes, &pInfo->pDelIterator); - if (pInfo->pDelRes->info.rows > 0) { - printDataBlock(pInfo->pDelRes, "single state delete"); - return pInfo->pDelRes; - } - - doBuildSessionResult(pOperator, pInfo->streamAggSup.pState, &pInfo->groupResInfo, pBInfo->pRes); - if (pBInfo->pRes->info.rows > 0) { - printDataBlock(pBInfo->pRes, "single state"); - return pBInfo->pRes; - } - - setOperatorCompleted(pOperator); - return NULL; - } - - SOperatorInfo* downstream = pOperator->pDownstream[0]; - if (!pInfo->pUpdated) { - pInfo->pUpdated = taosArrayInit(16, sizeof(SSessionKey)); - } - if (!pInfo->pSeUpdated) { - _hash_fn_t hashFn = taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY); - pInfo->pSeUpdated = tSimpleHashInit(64, hashFn); - } - while (1) { - SSDataBlock* pBlock = getNextBlockFromDownstream(pOperator, 0); - if (pBlock == NULL) { - break; - } - printDataBlock(pBlock, "single state recv"); - - if (pBlock->info.type == STREAM_DELETE_DATA || pBlock->info.type == STREAM_DELETE_RESULT || - pBlock->info.type == STREAM_CLEAR) { - SArray* pWins = taosArrayInit(16, sizeof(SSessionKey)); - doDeleteTimeWindows(&pInfo->streamAggSup, pBlock, pWins); - removeSessionResults(pInfo->pSeUpdated, pWins); - copyDeleteWindowInfo(pWins, pInfo->pSeDeleted); - taosArrayDestroy(pWins); - continue; - } else if (pBlock->info.type == STREAM_GET_ALL) { - getAllSessionWindow(pInfo->streamAggSup.pResultRows, pInfo->pSeUpdated); - continue; - } else if (pBlock->info.type == STREAM_CREATE_CHILD_TABLE) { - return pBlock; - } else { - ASSERTS(pBlock->info.type == STREAM_NORMAL || pBlock->info.type == STREAM_INVALID, "invalid SSDataBlock type"); - } - - if (pInfo->scalarSupp.pExprInfo != NULL) { - SExprSupp* pExprSup = &pInfo->scalarSupp; - projectApplyFunctions(pExprSup->pExprInfo, pBlock, pBlock, pExprSup->pCtx, pExprSup->numOfExprs, NULL); - } - // the pDataBlock are always the same one, no need to call this again - setInputDataBlock(pSup, pBlock, TSDB_ORDER_ASC, MAIN_SCAN, true); - doStreamStateAggImpl(pOperator, pBlock, pInfo->pSeUpdated, pInfo->pSeDeleted); - pInfo->twAggSup.maxTs = TMAX(pInfo->twAggSup.maxTs, pBlock->info.window.ekey); - } - // restore the value - pOperator->status = OP_RES_TO_RETURN; - - closeSessionWindow(pInfo->streamAggSup.pResultRows, &pInfo->twAggSup, pInfo->pSeUpdated); - copyUpdateResult(pInfo->pSeUpdated, pInfo->pUpdated); - removeSessionResults(pInfo->pSeDeleted, pInfo->pUpdated); - tSimpleHashCleanup(pInfo->pSeUpdated); - pInfo->pSeUpdated = NULL; - - if(pInfo->isHistoryOp) { - getMaxTsWins(pInfo->pUpdated, pInfo->historyWins); - } - - initGroupResInfoFromArrayList(&pInfo->groupResInfo, pInfo->pUpdated); - pInfo->pUpdated = NULL; - blockDataEnsureCapacity(pInfo->binfo.pRes, pOperator->resultInfo.capacity); - -#if 0 - char* pBuf = streamStateSessionDump(pInfo->streamAggSup.pState); - qDebug("===stream===final session%s", pBuf); - taosMemoryFree(pBuf); -#endif - - doBuildDeleteDataBlock(pOperator, pInfo->pSeDeleted, pInfo->pDelRes, &pInfo->pDelIterator); - if (pInfo->pDelRes->info.rows > 0) { - printDataBlock(pInfo->pDelRes, "single state delete"); - return pInfo->pDelRes; - } - - doBuildSessionResult(pOperator, pInfo->streamAggSup.pState, &pInfo->groupResInfo, pBInfo->pRes); - if (pBInfo->pRes->info.rows > 0) { - printDataBlock(pBInfo->pRes, "single state"); - return pBInfo->pRes; - } - setOperatorCompleted(pOperator); - return NULL; -} - -void streamStateReleaseState(SOperatorInfo* pOperator) { - SStreamStateAggOperatorInfo* pInfo = pOperator->info; - int32_t resSize = taosArrayGetSize(pInfo->historyWins) * sizeof(SSessionKey); - qDebug("===stream=== relase state. save result count:%d", (int32_t)taosArrayGetSize(pInfo->historyWins)); - pInfo->streamAggSup.stateStore.streamStateSaveInfo(pInfo->streamAggSup.pState, STREAM_STATE_OP_STATE_NAME, strlen(STREAM_STATE_OP_STATE_NAME), pInfo->historyWins->pData, resSize); - SOperatorInfo* downstream = pOperator->pDownstream[0]; - if (downstream->fpSet.releaseStreamStateFn) { - downstream->fpSet.releaseStreamStateFn(downstream); - } -} - -static void compactStateWindow(SOperatorInfo* pOperator, SResultWindowInfo* pCurWin, SResultWindowInfo* pNextWin, - SSHashObj* pStUpdated, SSHashObj* pStDeleted) { - SExprSupp* pSup = &pOperator->exprSupp; - SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo; - SStorageAPI* pAPI = &pOperator->pTaskInfo->storageAPI; - - SStreamStateAggOperatorInfo* pInfo = pOperator->info; - SResultRow* pCurResult = NULL; - int32_t numOfOutput = pOperator->exprSupp.numOfExprs; - SStreamAggSupporter* pAggSup = &pInfo->streamAggSup; - initSessionOutputBuf(pCurWin, &pCurResult, pSup->pCtx, numOfOutput, pSup->rowEntryInfoOffset); - SResultRow* pWinResult = NULL; - initSessionOutputBuf(pNextWin, &pWinResult, pAggSup->pDummyCtx, numOfOutput, pSup->rowEntryInfoOffset); - pCurWin->sessionWin.win.ekey = TMAX(pCurWin->sessionWin.win.ekey, pNextWin->sessionWin.win.ekey); - - updateTimeWindowInfo(&pInfo->twAggSup.timeWindowData, &pCurWin->sessionWin.win, 1); - compactFunctions(pSup->pCtx, pAggSup->pDummyCtx, numOfOutput, pTaskInfo, &pInfo->twAggSup.timeWindowData); - tSimpleHashRemove(pStUpdated, &pNextWin->sessionWin, sizeof(SSessionKey)); - if (pNextWin->isOutput && pStDeleted) { - qDebug("===stream=== save delete window info %" PRId64 ", %" PRIu64, pNextWin->sessionWin.win.skey, pNextWin->sessionWin.groupId); - saveDeleteRes(pStDeleted, pNextWin->sessionWin); - } - removeSessionResult(pStUpdated, pAggSup->pResultRows, pNextWin->sessionWin); - doDeleteSessionWindow(pAggSup, &pNextWin->sessionWin); - taosMemoryFree(pNextWin->pOutputBuf); -} - -void streamStateReloadState(SOperatorInfo* pOperator) { - SStreamStateAggOperatorInfo* pInfo = pOperator->info; - SStreamAggSupporter* pAggSup = &pInfo->streamAggSup; - resetWinRange(&pAggSup->winRange); - - SSessionKey seKey = {.win.skey = INT64_MIN, .win.ekey = INT64_MIN, .groupId = 0}; - int32_t size = 0; - void* pBuf = NULL; - int32_t code = pAggSup->stateStore.streamStateGetInfo(pAggSup->pState, STREAM_STATE_OP_STATE_NAME, - strlen(STREAM_STATE_OP_STATE_NAME), &pBuf, &size); - int32_t num = size / sizeof(SSessionKey); - qDebug("===stream=== reload state. get result count:%d", num); - SSessionKey* pSeKeyBuf = (SSessionKey*) pBuf; - ASSERT(size == num * sizeof(SSessionKey)); - if (!pInfo->pSeUpdated && num > 0) { - _hash_fn_t hashFn = taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY); - pInfo->pSeUpdated = tSimpleHashInit(64, hashFn); - } - if (!pInfo->pSeDeleted && num > 0) { - _hash_fn_t hashFn = taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY); - pInfo->pSeDeleted = tSimpleHashInit(64, hashFn); - } - for (int32_t i = 0; i < num; i++) { - SStateWindowInfo curInfo = {0}; - SStateWindowInfo nextInfo = {0}; - SStateWindowInfo dummy = {0}; - qDebug("===stream=== reload state. try process result %" PRId64 ", %" PRIu64 ", index:%d", pSeKeyBuf[i].win.skey, pSeKeyBuf[i].groupId, i); - setStateOutputBuf(pAggSup, pSeKeyBuf[i].win.skey, pSeKeyBuf[i].groupId, NULL, &curInfo, &nextInfo); - bool cpRes = compareWinStateKey(curInfo.pStateKey,nextInfo.pStateKey); - qDebug("===stream=== reload state. next window info %" PRId64 ", %" PRIu64 ", compare:%d", nextInfo.winInfo.sessionWin.win.skey, nextInfo.winInfo.sessionWin.groupId, cpRes); - if (cpRes) { - compactStateWindow(pOperator, &curInfo.winInfo, &nextInfo.winInfo, pInfo->pSeUpdated, pInfo->pSeDeleted); - qDebug("===stream=== reload state. save result %" PRId64 ", %" PRIu64, curInfo.winInfo.sessionWin.win.skey, curInfo.winInfo.sessionWin.groupId); - if (pInfo->twAggSup.calTrigger == STREAM_TRIGGER_AT_ONCE) { - saveResult(curInfo.winInfo, pInfo->pSeUpdated); - } else if (pInfo->twAggSup.calTrigger == STREAM_TRIGGER_WINDOW_CLOSE) { - if (!isCloseWindow(&curInfo.winInfo.sessionWin.win, &pInfo->twAggSup)) { - saveDeleteRes(pInfo->pSeDeleted, curInfo.winInfo.sessionWin); - } - SSessionKey key = {0}; - getSessionHashKey(&curInfo.winInfo.sessionWin, &key); - tSimpleHashPut(pAggSup->pResultRows, &key, sizeof(SSessionKey), &curInfo.winInfo, sizeof(SResultWindowInfo)); - } - } else if (IS_VALID_SESSION_WIN(nextInfo.winInfo)) { - releaseOutputBuf(pAggSup->pState, NULL, (SResultRow*)nextInfo.winInfo.pOutputBuf, &pAggSup->pSessionAPI->stateStore); - } - - if (IS_VALID_SESSION_WIN(curInfo.winInfo)) { - saveSessionOutputBuf(pAggSup, &curInfo.winInfo); - } - } - taosMemoryFree(pBuf); - - SOperatorInfo* downstream = pOperator->pDownstream[0]; - if (downstream->fpSet.reloadStreamStateFn) { - downstream->fpSet.reloadStreamStateFn(downstream); - } -} - -SOperatorInfo* createStreamStateAggOperatorInfo(SOperatorInfo* downstream, SPhysiNode* pPhyNode, - SExecTaskInfo* pTaskInfo, SReadHandle* pHandle) { - SStreamStateWinodwPhysiNode* pStateNode = (SStreamStateWinodwPhysiNode*)pPhyNode; - int32_t tsSlotId = ((SColumnNode*)pStateNode->window.pTspk)->slotId; - SColumnNode* pColNode = (SColumnNode*)(pStateNode->pStateKey); - int32_t code = TSDB_CODE_SUCCESS; - - SStreamStateAggOperatorInfo* pInfo = taosMemoryCalloc(1, sizeof(SStreamStateAggOperatorInfo)); - SOperatorInfo* pOperator = taosMemoryCalloc(1, sizeof(SOperatorInfo)); - if (pInfo == NULL || pOperator == NULL) { - code = TSDB_CODE_OUT_OF_MEMORY; - goto _error; - } - - pInfo->stateCol = extractColumnFromColumnNode(pColNode); - initResultSizeInfo(&pOperator->resultInfo, 4096); - if (pStateNode->window.pExprs != NULL) { - int32_t numOfScalar = 0; - SExprInfo* pScalarExprInfo = createExprInfo(pStateNode->window.pExprs, NULL, &numOfScalar); - code = initExprSupp(&pInfo->scalarSupp, pScalarExprInfo, numOfScalar, &pTaskInfo->storageAPI.functionStore); - if (code != TSDB_CODE_SUCCESS) { - goto _error; - } - } - - pInfo->twAggSup = (STimeWindowAggSupp){ - .waterMark = pStateNode->window.watermark, - .calTrigger = pStateNode->window.triggerType, - .maxTs = INT64_MIN, - .minTs = INT64_MAX, - }; - - initExecTimeWindowInfo(&pInfo->twAggSup.timeWindowData, &pTaskInfo->window); - - SExprSupp* pSup = &pOperator->exprSupp; - int32_t numOfCols = 0; - SExprInfo* pExprInfo = createExprInfo(pStateNode->window.pFuncs, NULL, &numOfCols); - SSDataBlock* pResBlock = createDataBlockFromDescNode(pPhyNode->pOutputDataBlockDesc); - code = initBasicInfoEx(&pInfo->binfo, pSup, pExprInfo, numOfCols, pResBlock, &pTaskInfo->storageAPI.functionStore); - if (code != TSDB_CODE_SUCCESS) { - goto _error; - } - int32_t keySize = sizeof(SStateKeys) + pColNode->node.resType.bytes; - int16_t type = pColNode->node.resType.type; - code = initStreamAggSupporter(&pInfo->streamAggSup, pSup->pCtx, numOfCols, 0, pTaskInfo->streamInfo.pState, keySize, - type, &pTaskInfo->storageAPI.stateStore, pHandle, &pTaskInfo->storageAPI); - if (code != TSDB_CODE_SUCCESS) { - goto _error; - } - - pInfo->primaryTsIndex = tsSlotId; - _hash_fn_t hashFn = taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY); - pInfo->pSeDeleted = tSimpleHashInit(64, hashFn); - pInfo->pDelIterator = NULL; - pInfo->pDelRes = createSpecialDataBlock(STREAM_DELETE_RESULT); - pInfo->pChildren = NULL; - pInfo->ignoreExpiredData = pStateNode->window.igExpired; - pInfo->ignoreExpiredDataSaved = false; - pInfo->pUpdated = NULL; - pInfo->pSeUpdated = NULL; - pInfo->dataVersion = 0; - pInfo->historyWins = taosArrayInit(4, sizeof(SSessionKey)); - if (!pInfo->historyWins) { - goto _error; - } - if (pHandle) { - pInfo->isHistoryOp = pHandle->fillHistory; - } - - setOperatorInfo(pOperator, "StreamStateAggOperator", QUERY_NODE_PHYSICAL_PLAN_STREAM_STATE, true, OP_NOT_OPENED, - pInfo, pTaskInfo); - pOperator->fpSet = createOperatorFpSet(optrDummyOpenFn, doStreamStateAgg, NULL, destroyStreamStateOperatorInfo, - optrDefaultBufFn, NULL, optrDefaultGetNextExtFn, NULL); - setOperatorStreamStateFn(pOperator, streamStateReleaseState, streamStateReloadState); - initDownStream(downstream, &pInfo->streamAggSup, pOperator->operatorType, pInfo->primaryTsIndex, &pInfo->twAggSup); - code = appendDownstream(pOperator, &downstream, 1); - if (code != TSDB_CODE_SUCCESS) { - goto _error; - } - return pOperator; - -_error: - destroyStreamStateOperatorInfo(pInfo); - taosMemoryFreeClear(pOperator); - pTaskInfo->code = code; - return NULL; -} void destroyMAIOperatorInfo(void* param) { SMergeAlignedIntervalAggOperatorInfo* miaInfo = (SMergeAlignedIntervalAggOperatorInfo*)param; @@ -5196,263 +2189,3 @@ _error: pTaskInfo->code = code; return NULL; } - -static SSDataBlock* doStreamIntervalAgg(SOperatorInfo* pOperator) { - SStreamIntervalOperatorInfo* pInfo = pOperator->info; - SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo; - SStorageAPI* pAPI = &pOperator->pTaskInfo->storageAPI; - - SExprSupp* pSup = &pOperator->exprSupp; - - if (pOperator->status == OP_EXEC_DONE) { - return NULL; - } - - if (pOperator->status == OP_RES_TO_RETURN) { - doBuildDeleteResult(pInfo, pInfo->pDelWins, &pInfo->delIndex, pInfo->pDelRes); - if (pInfo->pDelRes->info.rows > 0) { - printDataBlock(pInfo->pDelRes, "single interval delete"); - return pInfo->pDelRes; - } - - doBuildStreamIntervalResult(pOperator, pInfo->pState, pInfo->binfo.pRes, &pInfo->groupResInfo); - if (pInfo->binfo.pRes->info.rows > 0) { - printDataBlock(pInfo->binfo.pRes, "single interval"); - return pInfo->binfo.pRes; - } - - if (pInfo->recvGetAll) { - pInfo->recvGetAll = false; - resetUnCloseWinInfo(pInfo->aggSup.pResultRowHashTable); - } - - setOperatorCompleted(pOperator); - if (pInfo->twAggSup.maxTs > 0 && - pInfo->twAggSup.maxTs - pInfo->twAggSup.checkPointInterval > pInfo->twAggSup.checkPointTs) { - pAPI->stateStore.streamStateCommit(pInfo->pState); - pAPI->stateStore.streamStateDeleteCheckPoint(pInfo->pState, pInfo->twAggSup.maxTs - pInfo->twAggSup.deleteMark); - setStreamDataVersion(pTaskInfo, pInfo->dataVersion, pInfo->pState->checkPointId); - pInfo->twAggSup.checkPointTs = pInfo->twAggSup.maxTs; - } - return NULL; - } - - SOperatorInfo* downstream = pOperator->pDownstream[0]; - - if (!pInfo->pUpdated) { - pInfo->pUpdated = taosArrayInit(4096, POINTER_BYTES); - } - - if (!pInfo->pUpdatedMap) { - _hash_fn_t hashFn = taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY); - pInfo->pUpdatedMap = tSimpleHashInit(4096, hashFn); - } - - while (1) { - SSDataBlock* pBlock = getNextBlockFromDownstream(pOperator, 0); - if (pBlock == NULL) { - qDebug("===stream===return data:single interval. recv datablock num:%" PRIu64, pInfo->numOfDatapack); - pInfo->numOfDatapack = 0; - break; - } - - pInfo->numOfDatapack++; - printDataBlock(pBlock, "single interval recv"); - - if (pBlock->info.type == STREAM_DELETE_DATA || pBlock->info.type == STREAM_DELETE_RESULT || - pBlock->info.type == STREAM_CLEAR) { - doDeleteWindows(pOperator, &pInfo->interval, pBlock, pInfo->pDelWins, pInfo->pUpdatedMap); - continue; - } else if (pBlock->info.type == STREAM_GET_ALL) { - qDebug("===stream===single interval recv|block type STREAM_GET_ALL"); - pInfo->recvGetAll = true; - getAllIntervalWindow(pInfo->aggSup.pResultRowHashTable, pInfo->pUpdatedMap); - continue; - } else if (pBlock->info.type == STREAM_CREATE_CHILD_TABLE) { - printDataBlock(pBlock, "single interval"); - return pBlock; - } else { - ASSERTS(pBlock->info.type == STREAM_NORMAL || pBlock->info.type == STREAM_INVALID, "invalid SSDataBlock type"); - } - - if (pBlock->info.type == STREAM_NORMAL && pBlock->info.version != 0) { - // set input version - pTaskInfo->version = pBlock->info.version; - } - - if (pInfo->scalarSupp.pExprInfo != NULL) { - SExprSupp* pExprSup = &pInfo->scalarSupp; - projectApplyFunctions(pExprSup->pExprInfo, pBlock, pBlock, pExprSup->pCtx, pExprSup->numOfExprs, NULL); - } - - // The timewindow that overlaps the timestamps of the input pBlock need to be recalculated and return to the - // caller. Note that all the time window are not close till now. - // the pDataBlock are always the same one, no need to call this again - setInputDataBlock(pSup, pBlock, TSDB_ORDER_ASC, MAIN_SCAN, true); - if (pInfo->invertible) { - setInverFunction(pSup->pCtx, pOperator->exprSupp.numOfExprs, pBlock->info.type); - } - - doStreamIntervalAggImpl(pOperator, pBlock, pBlock->info.id.groupId, pInfo->pUpdatedMap); - pInfo->twAggSup.maxTs = TMAX(pInfo->twAggSup.maxTs, pBlock->info.window.ekey); - pInfo->twAggSup.minTs = TMIN(pInfo->twAggSup.minTs, pBlock->info.window.skey); - } - pOperator->status = OP_RES_TO_RETURN; - removeDeleteResults(pInfo->pUpdatedMap, pInfo->pDelWins); - closeStreamIntervalWindow(pInfo->aggSup.pResultRowHashTable, &pInfo->twAggSup, &pInfo->interval, NULL, - pInfo->pUpdatedMap, pInfo->pDelWins, pOperator); - - void* pIte = NULL; - int32_t iter = 0; - while ((pIte = tSimpleHashIterate(pInfo->pUpdatedMap, pIte, &iter)) != NULL) { - taosArrayPush(pInfo->pUpdated, pIte); - } - taosArraySort(pInfo->pUpdated, winPosCmprImpl); - - initMultiResInfoFromArrayList(&pInfo->groupResInfo, pInfo->pUpdated); - pInfo->pUpdated = NULL; - blockDataEnsureCapacity(pInfo->binfo.pRes, pOperator->resultInfo.capacity); - tSimpleHashCleanup(pInfo->pUpdatedMap); - pInfo->pUpdatedMap = NULL; - -#if 0 - char* pBuf = streamStateIntervalDump(pInfo->pState); - qDebug("===stream===interval state%s", pBuf); - taosMemoryFree(pBuf); -#endif - - doBuildDeleteResult(pInfo, pInfo->pDelWins, &pInfo->delIndex, pInfo->pDelRes); - if (pInfo->pDelRes->info.rows > 0) { - printDataBlock(pInfo->pDelRes, "single interval delete"); - return pInfo->pDelRes; - } - - doBuildStreamIntervalResult(pOperator, pInfo->pState, pInfo->binfo.pRes, &pInfo->groupResInfo); - if (pInfo->binfo.pRes->info.rows > 0) { - printDataBlock(pInfo->binfo.pRes, "single interval"); - return pInfo->binfo.pRes; - } - - return NULL; -} - -SOperatorInfo* createStreamIntervalOperatorInfo(SOperatorInfo* downstream, SPhysiNode* pPhyNode, - SExecTaskInfo* pTaskInfo) { - SStreamIntervalOperatorInfo* pInfo = taosMemoryCalloc(1, sizeof(SStreamIntervalOperatorInfo)); - SOperatorInfo* pOperator = taosMemoryCalloc(1, sizeof(SOperatorInfo)); - if (pInfo == NULL || pOperator == NULL) { - goto _error; - } - SStreamIntervalPhysiNode* pIntervalPhyNode = (SStreamIntervalPhysiNode*)pPhyNode; - - int32_t code = TSDB_CODE_SUCCESS; - int32_t numOfCols = 0; - SExprInfo* pExprInfo = createExprInfo(pIntervalPhyNode->window.pFuncs, NULL, &numOfCols); - - SSDataBlock* pResBlock = createDataBlockFromDescNode(pPhyNode->pOutputDataBlockDesc); - pInfo->interval = (SInterval){ - .interval = pIntervalPhyNode->interval, - .sliding = pIntervalPhyNode->sliding, - .intervalUnit = pIntervalPhyNode->intervalUnit, - .slidingUnit = pIntervalPhyNode->slidingUnit, - .offset = pIntervalPhyNode->offset, - .precision = ((SColumnNode*)pIntervalPhyNode->window.pTspk)->node.resType.precision, - }; - - pInfo->twAggSup = (STimeWindowAggSupp){ - .waterMark = pIntervalPhyNode->window.watermark, - .calTrigger = pIntervalPhyNode->window.triggerType, - .maxTs = INT64_MIN, - .minTs = INT64_MAX, - .deleteMark = getDeleteMark(pIntervalPhyNode), - .checkPointTs = 0, - .checkPointInterval = - convertTimePrecision(tsCheckpointInterval, TSDB_TIME_PRECISION_MILLI, pInfo->interval.precision), - }; - - ASSERTS(pInfo->twAggSup.calTrigger != STREAM_TRIGGER_MAX_DELAY, "trigger type should not be max delay"); - - pOperator->pTaskInfo = pTaskInfo; - SStorageAPI* pAPI = &pOperator->pTaskInfo->storageAPI; - - pInfo->ignoreExpiredData = pIntervalPhyNode->window.igExpired; - pInfo->ignoreExpiredDataSaved = false; - pInfo->isFinal = false; - - SExprSupp* pSup = &pOperator->exprSupp; - initBasicInfo(&pInfo->binfo, pResBlock); - initStreamFunciton(pSup->pCtx, pSup->numOfExprs); - initExecTimeWindowInfo(&pInfo->twAggSup.timeWindowData, &pTaskInfo->window); - - pInfo->primaryTsIndex = ((SColumnNode*)pIntervalPhyNode->window.pTspk)->slotId; - initResultSizeInfo(&pOperator->resultInfo, 4096); - - pInfo->pState = taosMemoryCalloc(1, sizeof(SStreamState)); - *(pInfo->pState) = *(pTaskInfo->streamInfo.pState); - pAPI->stateStore.streamStateSetNumber(pInfo->pState, -1); - - size_t keyBufSize = sizeof(int64_t) + sizeof(int64_t) + POINTER_BYTES; - code = initAggSup(pSup, &pInfo->aggSup, pExprInfo, numOfCols, keyBufSize, pTaskInfo->id.str, - pInfo->pState, &pTaskInfo->storageAPI.functionStore); - if (code != TSDB_CODE_SUCCESS) { - goto _error; - } - - if (pIntervalPhyNode->window.pExprs != NULL) { - int32_t numOfScalar = 0; - SExprInfo* pScalarExprInfo = createExprInfo(pIntervalPhyNode->window.pExprs, NULL, &numOfScalar); - code = initExprSupp(&pInfo->scalarSupp, pScalarExprInfo, numOfScalar, &pTaskInfo->storageAPI.functionStore); - if (code != TSDB_CODE_SUCCESS) { - goto _error; - } - } - - pInfo->invertible = allInvertible(pSup->pCtx, numOfCols); - pInfo->invertible = false; - pInfo->pDelWins = taosArrayInit(4, sizeof(SWinKey)); - pInfo->delIndex = 0; - pInfo->pDelRes = createSpecialDataBlock(STREAM_DELETE_RESULT); - initResultRowInfo(&pInfo->binfo.resultRowInfo); - - pInfo->pPhyNode = NULL; // create new child - pInfo->pPullDataMap = NULL; - pInfo->pFinalPullDataMap = NULL; - pInfo->pPullWins = NULL; // SPullWindowInfo - pInfo->pullIndex = 0; - pInfo->pPullDataRes = NULL; - pInfo->isFinal = false; - pInfo->numOfChild = 0; - pInfo->delKey.ts = INT64_MAX; - pInfo->delKey.groupId = 0; - pInfo->numOfDatapack = 0; - pInfo->pUpdated = NULL; - pInfo->pUpdatedMap = NULL; - int32_t funResSize= getMaxFunResSize(pSup, numOfCols); - - pInfo->pState->pFileState = pTaskInfo->storageAPI.stateStore.streamFileStateInit( - tsStreamBufferSize, sizeof(SWinKey), pInfo->aggSup.resultRowSize, funResSize, compareTs, pInfo->pState, - pInfo->twAggSup.deleteMark, GET_TASKID(pTaskInfo)); - - setOperatorInfo(pOperator, "StreamIntervalOperator", QUERY_NODE_PHYSICAL_PLAN_STREAM_INTERVAL, true, OP_NOT_OPENED, - pInfo, pTaskInfo); - pOperator->fpSet = createOperatorFpSet(optrDummyOpenFn, doStreamIntervalAgg, NULL, - destroyStreamFinalIntervalOperatorInfo, optrDefaultBufFn, NULL, optrDefaultGetNextExtFn, NULL); - setOperatorStreamStateFn(pOperator, streamIntervalReleaseState, streamIntervalReloadState); - - pInfo->statestore = pTaskInfo->storageAPI.stateStore; - pInfo->recvGetAll = false; - - initIntervalDownStream(downstream, pPhyNode->type, pInfo); - code = appendDownstream(pOperator, &downstream, 1); - if (code != TSDB_CODE_SUCCESS) { - goto _error; - } - - return pOperator; - -_error: - destroyStreamFinalIntervalOperatorInfo(pInfo); - taosMemoryFreeClear(pOperator); - pTaskInfo->code = code; - return NULL; -} diff --git a/source/libs/function/inc/builtinsimpl.h b/source/libs/function/inc/builtinsimpl.h index c3afc30a7b..d2f19ed2eb 100644 --- a/source/libs/function/inc/builtinsimpl.h +++ b/source/libs/function/inc/builtinsimpl.h @@ -127,7 +127,10 @@ int32_t derivativeFunction(SqlFunctionCtx* pCtx); bool getIrateFuncEnv(struct SFunctionNode* pFunc, SFuncExecEnv* pEnv); bool irateFuncSetup(SqlFunctionCtx* pCtx, SResultRowEntryInfo* pResInfo); int32_t irateFunction(SqlFunctionCtx* pCtx); +int32_t irateFunctionMerge(SqlFunctionCtx* pCtx); int32_t irateFinalize(SqlFunctionCtx* pCtx, SSDataBlock* pBlock); +int32_t iratePartialFinalize(SqlFunctionCtx* pCtx, SSDataBlock* pBlock); +int32_t getIrateInfoSize(); int32_t cachedLastRowFunction(SqlFunctionCtx* pCtx); diff --git a/source/libs/function/src/builtins.c b/source/libs/function/src/builtins.c index 73fdf810a9..188a1fcd4d 100644 --- a/source/libs/function/src/builtins.c +++ b/source/libs/function/src/builtins.c @@ -1582,6 +1582,45 @@ static int32_t translateIrate(SFunctionNode* pFunc, char* pErrBuf, int32_t len) return TSDB_CODE_SUCCESS; } +static int32_t translateIrateImpl(SFunctionNode* pFunc, char* pErrBuf, int32_t len, bool isPartial) { + uint8_t colType = ((SExprNode*)nodesListGetNode(pFunc->pParameterList, 0))->resType.type; + if (isPartial) { + if (3 != LIST_LENGTH(pFunc->pParameterList)) { + return invaildFuncParaNumErrMsg(pErrBuf, len, pFunc->functionName); + } + if (!IS_NUMERIC_TYPE(colType)) { + return invaildFuncParaTypeErrMsg(pErrBuf, len, pFunc->functionName); + } + pFunc->node.resType = (SDataType){.bytes = getIrateInfoSize() + VARSTR_HEADER_SIZE, .type = TSDB_DATA_TYPE_BINARY}; + } else { + if (1 != LIST_LENGTH(pFunc->pParameterList)) { + return invaildFuncParaNumErrMsg(pErrBuf, len, pFunc->functionName); + } + if (TSDB_DATA_TYPE_BINARY != colType) { + return invaildFuncParaTypeErrMsg(pErrBuf, len, pFunc->functionName); + } + pFunc->node.resType = (SDataType){.bytes = tDataTypes[TSDB_DATA_TYPE_DOUBLE].bytes, .type = TSDB_DATA_TYPE_DOUBLE}; + + // add database precision as param + uint8_t dbPrec = pFunc->node.resType.precision; + int32_t code = addDbPrecisonParam(&pFunc->pParameterList, dbPrec); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + } + + + return TSDB_CODE_SUCCESS; +} + +static int32_t translateIratePartial(SFunctionNode* pFunc, char* pErrBuf, int32_t len) { + return translateIrateImpl(pFunc, pErrBuf, len, true); +} + +static int32_t translateIrateMerge(SFunctionNode* pFunc, char* pErrBuf, int32_t len) { + return translateIrateImpl(pFunc, pErrBuf, len, false); +} + static int32_t translateInterp(SFunctionNode* pFunc, char* pErrBuf, int32_t len) { int32_t numOfParams = LIST_LENGTH(pFunc->pParameterList); uint8_t dbPrec = pFunc->node.resType.precision; @@ -2619,6 +2658,31 @@ const SBuiltinFuncDefinition funcMgtBuiltins[] = { .initFunc = irateFuncSetup, .processFunc = irateFunction, .sprocessFunc = irateScalarFunction, + .finalizeFunc = irateFinalize, + .pPartialFunc = "_irate_partial", + .pMergeFunc = "_irate_merge" + }, + { + .name = "_irate_partial", + .type = FUNCTION_TYPE_IRATE_PARTIAL, + .classification = FUNC_MGT_AGG_FUNC | FUNC_MGT_TIMELINE_FUNC | FUNC_MGT_IMPLICIT_TS_FUNC | FUNC_MGT_FORBID_STREAM_FUNC | + FUNC_MGT_FORBID_SYSTABLE_FUNC, + .translateFunc = translateIratePartial, + .getEnvFunc = getIrateFuncEnv, + .initFunc = irateFuncSetup, + .processFunc = irateFunction, + .sprocessFunc = irateScalarFunction, + .finalizeFunc = iratePartialFinalize + }, + { + .name = "_irate_merge", + .type = FUNCTION_TYPE_IRATE_MERGE, + .classification = FUNC_MGT_AGG_FUNC, + .translateFunc = translateIrateMerge, + .getEnvFunc = getIrateFuncEnv, + .initFunc = irateFuncSetup, + .processFunc = irateFunctionMerge, + .sprocessFunc = irateScalarFunction, .finalizeFunc = irateFinalize }, { diff --git a/source/libs/function/src/builtinsimpl.c b/source/libs/function/src/builtinsimpl.c index fad8c9ca5b..bcbb3af950 100644 --- a/source/libs/function/src/builtinsimpl.c +++ b/source/libs/function/src/builtinsimpl.c @@ -5768,6 +5768,8 @@ int32_t derivativeFunction(SqlFunctionCtx* pCtx) { return TSDB_CODE_SUCCESS; } +int32_t getIrateInfoSize() { return (int32_t)sizeof(SRateInfo); } + bool getIrateFuncEnv(struct SFunctionNode* pFunc, SFuncExecEnv* pEnv) { pEnv->calcMemSize = sizeof(SRateInfo); return true; @@ -5817,6 +5819,7 @@ int32_t irateFunction(SqlFunctionCtx* pCtx) { if (INT64_MIN == pRateInfo->lastKey) { pRateInfo->lastValue = v; pRateInfo->lastKey = tsList[i]; + pRateInfo->hasResult = 1; continue; } @@ -5868,6 +5871,99 @@ static double doCalcRate(const SRateInfo* pRateInfo, double tickPerSec) { return (duration > 0) ? ((double)diff) / (duration / tickPerSec) : 0.0; } +static void irateTransferInfoImpl(TSKEY inputKey, SRateInfo* pInput, SRateInfo* pOutput, bool isFirstKey) { + if (inputKey > pOutput->lastKey) { + pOutput->firstKey = pOutput->lastKey; + pOutput->firstValue = pOutput->lastValue; + + pOutput->lastKey = isFirstKey ? pInput->firstKey : pInput->lastKey; + pOutput->lastValue = isFirstKey ? pInput->firstValue : pInput->lastValue; + } else if ((inputKey < pOutput->lastKey) && (inputKey > pOutput->firstKey)) { + pOutput->firstKey = isFirstKey ? pInput->firstKey : pInput->lastKey; + pOutput->firstValue = isFirstKey ? pInput->firstValue : pInput->lastValue; + } else { + // inputKey < pOutput->firstKey + } +} + +static void irateCopyInfo(SRateInfo* pInput, SRateInfo* pOutput) { + pOutput->firstKey = pInput->firstKey; + pOutput->lastKey = pInput->lastKey; + + pOutput->firstValue = pInput->firstValue; + pOutput->lastValue = pInput->lastValue; +} + +static int32_t irateTransferInfo(SRateInfo* pInput, SRateInfo* pOutput) { + if ((pInput->firstKey != INT64_MIN && (pInput->firstKey == pOutput->firstKey || pInput->firstKey == pOutput->lastKey)) || + (pInput->lastKey != INT64_MIN && (pInput->lastKey == pOutput->firstKey || pInput->lastKey == pOutput->lastKey))) { + return TSDB_CODE_FUNC_DUP_TIMESTAMP; + } + + if (pOutput->hasResult == 0) { + irateCopyInfo(pInput, pOutput); + pOutput->hasResult = pInput->hasResult; + return TSDB_CODE_SUCCESS; + } + + if (pInput->firstKey != INT64_MIN) { + irateTransferInfoImpl(pInput->firstKey, pInput, pOutput, true); + } + + if (pInput->lastKey != INT64_MIN) { + irateTransferInfoImpl(pInput->lastKey, pInput, pOutput, false); + } + + pOutput->hasResult = pInput->hasResult; + return TSDB_CODE_SUCCESS; +} + +int32_t irateFunctionMerge(SqlFunctionCtx* pCtx) { + SInputColumnInfoData* pInput = &pCtx->input; + SColumnInfoData* pCol = pInput->pData[0]; + if (pCol->info.type != TSDB_DATA_TYPE_BINARY) { + return TSDB_CODE_FUNC_FUNTION_PARA_TYPE; + } + + SRateInfo* pInfo = GET_ROWCELL_INTERBUF(GET_RES_INFO(pCtx)); + + int32_t start = pInput->startRowIndex; + for (int32_t i = start; i < start + pInput->numOfRows; ++i) { + char* data = colDataGetData(pCol, i); + SRateInfo* pInputInfo = (SRateInfo*)varDataVal(data); + if (pInputInfo->hasResult) { + int32_t code = irateTransferInfo(pInputInfo, pInfo); + if (code != TSDB_CODE_SUCCESS) { + return code; + } + } + } + + if (pInfo->hasResult) { + GET_RES_INFO(pCtx)->numOfRes = 1; + } + + return TSDB_CODE_SUCCESS; +} + +int32_t iratePartialFinalize(SqlFunctionCtx* pCtx, SSDataBlock* pBlock) { + SResultRowEntryInfo* pResInfo = GET_RES_INFO(pCtx); + SRateInfo* pInfo = GET_ROWCELL_INTERBUF(GET_RES_INFO(pCtx)); + int32_t resultBytes = getIrateInfoSize(); + char* res = taosMemoryCalloc(resultBytes + VARSTR_HEADER_SIZE, sizeof(char)); + + memcpy(varDataVal(res), pInfo, resultBytes); + varDataSetLen(res, resultBytes); + + int32_t slotId = pCtx->pExpr->base.resSchema.slotId; + SColumnInfoData* pCol = taosArrayGet(pBlock->pDataBlock, slotId); + + colDataSetVal(pCol, pBlock->info.rows, res, false); + + taosMemoryFree(res); + return pResInfo->numOfRes; +} + int32_t irateFinalize(SqlFunctionCtx* pCtx, SSDataBlock* pBlock) { int32_t slotId = pCtx->pExpr->base.resSchema.slotId; SColumnInfoData* pCol = taosArrayGet(pBlock->pDataBlock, slotId); diff --git a/source/libs/stream/inc/streamInt.h b/source/libs/stream/inc/streamInt.h index 32d6dc65d9..7a557a744a 100644 --- a/source/libs/stream/inc/streamInt.h +++ b/source/libs/stream/inc/streamInt.h @@ -52,7 +52,6 @@ int32_t streamBroadcastToChildren(SStreamTask* pTask, const SSDataBlock* pBlock) int32_t tEncodeStreamRetrieveReq(SEncoder* pEncoder, const SStreamRetrieveReq* pReq); -int32_t streamDispatchAllBlocks(SStreamTask* pTask, const SStreamDataBlock* pData); int32_t streamDispatchCheckMsg(SStreamTask* pTask, const SStreamTaskCheckReq* pReq, int32_t nodeId, SEpSet* pEpSet); int32_t streamDoDispatchScanHistoryFinishMsg(SStreamTask* pTask, const SStreamScanHistoryFinishReq* pReq, int32_t vgId, @@ -63,6 +62,7 @@ SStreamQueueItem* streamMergeQueueItem(SStreamQueueItem* dst, SStreamQueueItem* int32_t streamAddEndScanHistoryMsg(SStreamTask* pTask, SRpcHandleInfo* pRpcInfo, SStreamScanHistoryFinishReq* pReq); int32_t streamNotifyUpstreamContinue(SStreamTask* pTask); int32_t streamTaskFillHistoryFinished(SStreamTask* pTask); +int32_t streamTransferStateToStreamTask(SStreamTask* pTask); extern int32_t streamBackendId; extern int32_t streamBackendCfWrapperId; diff --git a/source/libs/stream/src/stream.c b/source/libs/stream/src/stream.c index f85ade591c..03a0f3586d 100644 --- a/source/libs/stream/src/stream.c +++ b/source/libs/stream/src/stream.c @@ -142,40 +142,6 @@ int32_t streamSchedExec(SStreamTask* pTask) { return 0; } -int32_t streamTaskEnqueueBlocks(SStreamTask* pTask, const SStreamDispatchReq* pReq, SRpcMsg* pRsp) { - int8_t status = 0; - - SStreamDataBlock* pBlock = createStreamDataFromDispatchMsg(pReq, STREAM_INPUT__DATA_BLOCK, pReq->dataSrcVgId); - if (pBlock == NULL) { - streamTaskInputFail(pTask); - status = TASK_INPUT_STATUS__FAILED; - qError("vgId:%d, s-task:%s failed to receive dispatch msg, reason: out of memory", pTask->pMeta->vgId, - pTask->id.idStr); - } else { - int32_t code = tAppendDataToInputQueue(pTask, (SStreamQueueItem*)pBlock); - // input queue is full, upstream is blocked now - status = (code == TSDB_CODE_SUCCESS)? TASK_INPUT_STATUS__NORMAL:TASK_INPUT_STATUS__BLOCKED; - } - - // rsp by input status - void* buf = rpcMallocCont(sizeof(SMsgHead) + sizeof(SStreamDispatchRsp)); - ((SMsgHead*)buf)->vgId = htonl(pReq->upstreamNodeId); - SStreamDispatchRsp* pDispatchRsp = POINTER_SHIFT(buf, sizeof(SMsgHead)); - - pDispatchRsp->inputStatus = status; - pDispatchRsp->streamId = htobe64(pReq->streamId); - pDispatchRsp->upstreamNodeId = htonl(pReq->upstreamNodeId); - pDispatchRsp->upstreamTaskId = htonl(pReq->upstreamTaskId); - pDispatchRsp->downstreamNodeId = htonl(pTask->info.nodeId); - pDispatchRsp->downstreamTaskId = htonl(pTask->id.taskId); - - pRsp->pCont = buf; - pRsp->contLen = sizeof(SMsgHead) + sizeof(SStreamDispatchRsp); - tmsgSendRsp(pRsp); - - return status == TASK_INPUT_STATUS__NORMAL ? 0 : -1; -} - int32_t streamTaskEnqueueRetrieve(SStreamTask* pTask, SStreamRetrieveReq* pReq, SRpcMsg* pRsp) { SStreamDataBlock* pData = taosAllocateQitem(sizeof(SStreamDataBlock), DEF_QITEM, 0); int8_t status = TASK_INPUT_STATUS__NORMAL; @@ -235,90 +201,115 @@ int32_t streamTaskOutputResultBlock(SStreamTask* pTask, SStreamDataBlock* pBlock return 0; } + + +static int32_t streamTaskAppendInputBlocks(SStreamTask* pTask, const SStreamDispatchReq* pReq) { + int8_t status = 0; + + SStreamDataBlock* pBlock = createStreamDataFromDispatchMsg(pReq, pReq->type, pReq->srcVgId); + if (pBlock == NULL) { + streamTaskInputFail(pTask); + status = TASK_INPUT_STATUS__FAILED; + qError("vgId:%d, s-task:%s failed to receive dispatch msg, reason: out of memory", pTask->pMeta->vgId, + pTask->id.idStr); + } else { + if (pBlock->type == STREAM_INPUT__TRANS_STATE) { + pTask->status.appendTranstateBlock = true; + } + + int32_t code = tAppendDataToInputQueue(pTask, (SStreamQueueItem*)pBlock); + // input queue is full, upstream is blocked now + status = (code == TSDB_CODE_SUCCESS) ? TASK_INPUT_STATUS__NORMAL : TASK_INPUT_STATUS__BLOCKED; + } + + return status; +} + +static int32_t buildDispatchRsp(const SStreamTask* pTask, const SStreamDispatchReq* pReq, int32_t status, void** pBuf) { + *pBuf = rpcMallocCont(sizeof(SMsgHead) + sizeof(SStreamDispatchRsp)); + if (*pBuf == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; + } + + ((SMsgHead*)(*pBuf))->vgId = htonl(pReq->upstreamNodeId); + SStreamDispatchRsp* pDispatchRsp = POINTER_SHIFT((*pBuf), sizeof(SMsgHead)); + + pDispatchRsp->inputStatus = status; + pDispatchRsp->streamId = htobe64(pReq->streamId); + pDispatchRsp->upstreamNodeId = htonl(pReq->upstreamNodeId); + pDispatchRsp->upstreamTaskId = htonl(pReq->upstreamTaskId); + pDispatchRsp->downstreamNodeId = htonl(pTask->info.nodeId); + pDispatchRsp->downstreamTaskId = htonl(pTask->id.taskId); + + return TSDB_CODE_SUCCESS; +} + +void streamTaskCloseUpstreamInput(SStreamTask* pTask, int32_t taskId) { + SStreamChildEpInfo* pInfo = streamTaskGetUpstreamTaskEpInfo(pTask, taskId); + if (pInfo != NULL) { + pInfo->dataAllowed = false; + } +} + int32_t streamProcessDispatchMsg(SStreamTask* pTask, SStreamDispatchReq* pReq, SRpcMsg* pRsp, bool exec) { qDebug("s-task:%s receive dispatch msg from taskId:0x%x(vgId:%d), msgLen:%" PRId64, pTask->id.idStr, pReq->upstreamTaskId, pReq->upstreamNodeId, pReq->totalLen); - // todo add the input queue buffer limitation - streamTaskEnqueueBlocks(pTask, pReq, pRsp); - tDeleteStreamDispatchReq(pReq); + int32_t status = 0; - if (exec) { - if (streamTryExec(pTask) < 0) { - return -1; - } + SStreamChildEpInfo* pInfo = streamTaskGetUpstreamTaskEpInfo(pTask, pReq->upstreamTaskId); + ASSERT(pInfo != NULL); + + if (!pInfo->dataAllowed) { + qWarn("s-task:%s data from task:0x%x is denied, since inputQ is closed for it", pTask->id.idStr, pReq->upstreamTaskId); + status = TASK_INPUT_STATUS__BLOCKED; } else { - streamSchedExec(pTask); + // Current task has received the checkpoint req from the upstream task, from which the message should all be blocked + if (pReq->type == STREAM_INPUT__CHECKPOINT_TRIGGER) { + streamTaskCloseUpstreamInput(pTask, pReq->upstreamTaskId); + qDebug("s-task:%s close inputQ for upstream:0x%x", pTask->id.idStr, pReq->upstreamTaskId); + } + + status = streamTaskAppendInputBlocks(pTask, pReq); } - return 0; -} - -// todo record the idle time for dispatch data -int32_t streamProcessDispatchRsp(SStreamTask* pTask, SStreamDispatchRsp* pRsp, int32_t code) { - if (code != TSDB_CODE_SUCCESS) { - // dispatch message failed: network error, or node not available. - // in case of the input queue is full, the code will be TSDB_CODE_SUCCESS, the and pRsp>inputStatus will be set - // flag. here we need to retry dispatch this message to downstream task immediately. handle the case the failure - // happened too fast. todo handle the shuffle dispatch failure - if (code == TSDB_CODE_STREAM_TASK_NOT_EXIST) { - qError("s-task:%s failed to dispatch msg to task:0x%x, code:%s, no-retry", pTask->id.idStr, - pRsp->downstreamTaskId, tstrerror(code)); + { + // do send response with the input status + int32_t code = buildDispatchRsp(pTask, pReq, status, &pRsp->pCont); + if (code != TSDB_CODE_SUCCESS) { + // todo handle failure return code; - } else { - qError("s-task:%s failed to dispatch msg to task:0x%x, code:%s, retry cnt:%d", pTask->id.idStr, - pRsp->downstreamTaskId, tstrerror(code), ++pTask->msgInfo.retryCount); - return streamDispatchAllBlocks(pTask, pTask->msgInfo.pData); - } - } - - qDebug("s-task:%s receive dispatch rsp, output status:%d code:%d", pTask->id.idStr, pRsp->inputStatus, code); - - // there are other dispatch message not response yet - if (pTask->outputInfo.type == TASK_OUTPUT__SHUFFLE_DISPATCH) { - int32_t leftRsp = atomic_sub_fetch_32(&pTask->shuffleDispatcher.waitingRspCnt, 1); - qDebug("s-task:%s is shuffle, left waiting rsp %d", pTask->id.idStr, leftRsp); - if (leftRsp > 0) { - return 0; - } - } - - pTask->msgInfo.retryCount = 0; - ASSERT(pTask->outputInfo.status == TASK_OUTPUT_STATUS__WAIT); - - qDebug("s-task:%s output status is set to:%d", pTask->id.idStr, pTask->outputInfo.status); - - // the input queue of the (down stream) task that receive the output data is full, - // so the TASK_INPUT_STATUS_BLOCKED is rsp - // todo blocking the output status - if (pRsp->inputStatus == TASK_INPUT_STATUS__BLOCKED) { - pTask->msgInfo.blockingTs = taosGetTimestampMs(); // record the blocking start time - - int32_t waitDuration = 300; // 300 ms - qError("s-task:%s inputQ of downstream task:0x%x is full, time:%" PRId64 "wait for %dms and retry dispatch data", - pTask->id.idStr, pRsp->downstreamTaskId, pTask->msgInfo.blockingTs, waitDuration); - streamRetryDispatchStreamBlock(pTask, waitDuration); - } else { // pipeline send data in output queue - // this message has been sent successfully, let's try next one. - destroyStreamDataBlock(pTask->msgInfo.pData); - pTask->msgInfo.pData = NULL; - - if (pTask->msgInfo.blockingTs != 0) { - int64_t el = taosGetTimestampMs() - pTask->msgInfo.blockingTs; - qDebug("s-task:%s resume to normal from inputQ blocking, idle time:%"PRId64"ms", pTask->id.idStr, el); - pTask->msgInfo.blockingTs = 0; } - // now ready for next data output - atomic_store_8(&pTask->outputInfo.status, TASK_OUTPUT_STATUS__NORMAL); - - // otherwise, continue dispatch the first block to down stream task in pipeline - streamDispatchStreamBlock(pTask); + pRsp->contLen = sizeof(SMsgHead) + sizeof(SStreamDispatchRsp); + tmsgSendRsp(pRsp); } + tDeleteStreamDispatchReq(pReq); + streamSchedExec(pTask); + return 0; } +//int32_t streamProcessDispatchMsg(SStreamTask* pTask, SStreamDispatchReq* pReq, SRpcMsg* pRsp, bool exec) { +// qDebug("s-task:%s receive dispatch msg from taskId:0x%x(vgId:%d), msgLen:%" PRId64, pTask->id.idStr, +// pReq->upstreamTaskId, pReq->upstreamNodeId, pReq->totalLen); +// +// // todo add the input queue buffer limitation +// streamTaskEnqueueBlocks(pTask, pReq, pRsp); +// tDeleteStreamDispatchReq(pReq); +// +// if (exec) { +// if (streamTryExec(pTask) < 0) { +// return -1; +// } +// } else { +// streamSchedExec(pTask); +// } +// +// return 0; +//} + int32_t streamProcessRunReq(SStreamTask* pTask) { if (streamTryExec(pTask) < 0) { return -1; @@ -371,7 +362,7 @@ int32_t tAppendDataToInputQueue(SStreamTask* pTask, SStreamQueueItem* pItem) { msgLen, ver, total, size + msgLen/1048576.0); } else if (type == STREAM_INPUT__DATA_BLOCK || type == STREAM_INPUT__DATA_RETRIEVE || type == STREAM_INPUT__REF_DATA_BLOCK) { - if ((pTask->info.taskLevel == TASK_LEVEL__SOURCE) && (tInputQueueIsFull(pTask))) { + if (/*(pTask->info.taskLevel == TASK_LEVEL__SOURCE) && */(tInputQueueIsFull(pTask))) { qError("s-task:%s input queue is full, capacity:%d size:%d MiB, current(blocks:%d, size:%.2fMiB) abort", pTask->id.idStr, STREAM_TASK_INPUT_QUEUE_CAPACITY, STREAM_TASK_INPUT_QUEUE_CAPACITY_IN_SIZE, total, size); @@ -385,12 +376,15 @@ int32_t tAppendDataToInputQueue(SStreamTask* pTask, SStreamQueueItem* pItem) { destroyStreamDataBlock((SStreamDataBlock*) pItem); return code; } - } else if (type == STREAM_INPUT__CHECKPOINT) { + } else if (type == STREAM_INPUT__CHECKPOINT || type == STREAM_INPUT__TRANS_STATE) { taosWriteQitem(pTask->inputQueue->queue, pItem); + qDebug("s-task:%s checkpoint/trans-state blockdata enqueue, total in queue:%d, size:%.2fMiB", pTask->id.idStr, total, size); } else if (type == STREAM_INPUT__GET_RES) { // use the default memory limit, refactor later. taosWriteQitem(pTask->inputQueue->queue, pItem); qDebug("s-task:%s data res enqueue, current(blocks:%d, size:%.2fMiB)", pTask->id.idStr, total, size); + } else { + ASSERT(0); } if (type != STREAM_INPUT__GET_RES && type != STREAM_INPUT__CHECKPOINT && pTask->triggerParam != 0) { @@ -433,4 +427,16 @@ SStreamChildEpInfo * streamTaskGetUpstreamTaskEpInfo(SStreamTask* pTask, int32_t } return NULL; -} \ No newline at end of file +} + +void streamTaskOpenAllUpstreamInput(SStreamTask* pTask) { + int32_t num = taosArrayGetSize(pTask->pUpstreamEpInfoList); + if (num == 0) { + return; + } + + for(int32_t i = 0; i < num; ++i) { + SStreamChildEpInfo* pInfo = taosArrayGetP(pTask->pUpstreamEpInfoList, i); + pInfo->dataAllowed = true; + } +} diff --git a/source/libs/stream/src/streamData.c b/source/libs/stream/src/streamData.c index bb4b842787..fc1b788b77 100644 --- a/source/libs/stream/src/streamData.c +++ b/source/libs/stream/src/streamData.c @@ -204,7 +204,7 @@ void streamFreeQitem(SStreamQueueItem* data) { if (type == STREAM_INPUT__GET_RES) { blockDataDestroy(((SStreamTrigger*)data)->pBlock); taosFreeQitem(data); - } else if (type == STREAM_INPUT__DATA_BLOCK || type == STREAM_INPUT__DATA_RETRIEVE) { + } else if (type == STREAM_INPUT__DATA_BLOCK || type == STREAM_INPUT__DATA_RETRIEVE || type == STREAM_INPUT__TRANS_STATE) { taosArrayDestroyEx(((SStreamDataBlock*)data)->blocks, (FDelete)blockDataFreeRes); taosFreeQitem(data); } else if (type == STREAM_INPUT__DATA_SUBMIT) { diff --git a/source/libs/stream/src/streamDispatch.c b/source/libs/stream/src/streamDispatch.c index 6771d0cc28..694b0808f2 100644 --- a/source/libs/stream/src/streamDispatch.c +++ b/source/libs/stream/src/streamDispatch.c @@ -25,6 +25,9 @@ typedef struct SBlockName { char parTbName[TSDB_TABLE_NAME_LEN]; } SBlockName; +static int32_t tInitStreamDispatchReq(SStreamDispatchReq* pReq, const SStreamTask* pTask, int32_t vgId, + int32_t numOfBlocks, int64_t dstTaskId, int32_t type); + static void initRpcMsg(SRpcMsg* pMsg, int32_t msgType, void* pCont, int32_t contLen) { pMsg->msgType = msgType; pMsg->pCont = pCont; @@ -35,8 +38,9 @@ static int32_t tEncodeStreamDispatchReq(SEncoder* pEncoder, const SStreamDispatc if (tStartEncode(pEncoder) < 0) return -1; if (tEncodeI64(pEncoder, pReq->streamId) < 0) return -1; if (tEncodeI32(pEncoder, pReq->taskId) < 0) return -1; + if (tEncodeI32(pEncoder, pReq->type) < 0) return -1; if (tEncodeI32(pEncoder, pReq->upstreamTaskId) < 0) return -1; - if (tEncodeI32(pEncoder, pReq->dataSrcVgId) < 0) return -1; + if (tEncodeI32(pEncoder, pReq->srcVgId) < 0) return -1; if (tEncodeI32(pEncoder, pReq->upstreamChildId) < 0) return -1; if (tEncodeI32(pEncoder, pReq->upstreamNodeId) < 0) return -1; if (tEncodeI32(pEncoder, pReq->blockNum) < 0) return -1; @@ -88,8 +92,9 @@ int32_t tDecodeStreamDispatchReq(SDecoder* pDecoder, SStreamDispatchReq* pReq) { if (tStartDecode(pDecoder) < 0) return -1; if (tDecodeI64(pDecoder, &pReq->streamId) < 0) return -1; if (tDecodeI32(pDecoder, &pReq->taskId) < 0) return -1; + if (tDecodeI32(pDecoder, &pReq->type) < 0) return -1; if (tDecodeI32(pDecoder, &pReq->upstreamTaskId) < 0) return -1; - if (tDecodeI32(pDecoder, &pReq->dataSrcVgId) < 0) return -1; + if (tDecodeI32(pDecoder, &pReq->srcVgId) < 0) return -1; if (tDecodeI32(pDecoder, &pReq->upstreamChildId) < 0) return -1; if (tDecodeI32(pDecoder, &pReq->upstreamNodeId) < 0) return -1; if (tDecodeI32(pDecoder, &pReq->blockNum) < 0) return -1; @@ -113,14 +118,15 @@ int32_t tDecodeStreamDispatchReq(SDecoder* pDecoder, SStreamDispatchReq* pReq) { } int32_t tInitStreamDispatchReq(SStreamDispatchReq* pReq, const SStreamTask* pTask, int32_t vgId, int32_t numOfBlocks, - int64_t dstTaskId) { + int64_t dstTaskId, int32_t type) { pReq->streamId = pTask->id.streamId; - pReq->dataSrcVgId = vgId; + pReq->srcVgId = vgId; pReq->upstreamTaskId = pTask->id.taskId; pReq->upstreamChildId = pTask->info.selfChildId; pReq->upstreamNodeId = pTask->info.nodeId; pReq->blockNum = numOfBlocks; pReq->taskId = dstTaskId; + pReq->type = type; pReq->data = taosArrayInit(numOfBlocks, POINTER_BYTES); pReq->dataLen = taosArrayInit(numOfBlocks, sizeof(int32_t)); @@ -358,7 +364,8 @@ static int32_t doSendDispatchMsg(SStreamTask* pTask, const SStreamDispatchReq* p msg.pCont = buf; msg.msgType = pTask->msgInfo.msgType; - qDebug("s-task:%s dispatch msg to taskId:0x%x vgId:%d data msg", pTask->id.idStr, pReq->taskId, vgId); + qDebug("s-task:%s dispatch msg to taskId:0x%x vgId:%d data msg, len:%d", pTask->id.idStr, pReq->taskId, vgId, + msg.contLen); return tmsgSendReq(pEpSet, &msg); FAIL: @@ -436,9 +443,8 @@ int32_t streamSearchAndAddBlock(SStreamTask* pTask, SStreamDispatchReq* pReqs, S return 0; } -int32_t streamDispatchAllBlocks(SStreamTask* pTask, const SStreamDataBlock* pData) { +static int32_t doDispatchAllBlocks(SStreamTask* pTask, const SStreamDataBlock* pData) { int32_t code = 0; - int32_t numOfBlocks = taosArrayGetSize(pData->blocks); ASSERT(numOfBlocks != 0); @@ -446,15 +452,15 @@ int32_t streamDispatchAllBlocks(SStreamTask* pTask, const SStreamDataBlock* pDat SStreamDispatchReq req = {0}; int32_t downstreamTaskId = pTask->fixedEpDispatcher.taskId; - code = tInitStreamDispatchReq(&req, pTask, pData->srcVgId, numOfBlocks, downstreamTaskId); + code = tInitStreamDispatchReq(&req, pTask, pData->srcVgId, numOfBlocks, downstreamTaskId, pData->type); if (code != TSDB_CODE_SUCCESS) { return code; } for (int32_t i = 0; i < numOfBlocks; i++) { SSDataBlock* pDataBlock = taosArrayGet(pData->blocks, i); - code = streamAddBlockIntoDispatchMsg(pDataBlock, &req); + code = streamAddBlockIntoDispatchMsg(pDataBlock, &req); if (code != TSDB_CODE_SUCCESS) { taosArrayDestroyP(req.data, taosMemoryFree); taosArrayDestroy(req.dataLen); @@ -487,7 +493,7 @@ int32_t streamDispatchAllBlocks(SStreamTask* pTask, const SStreamDataBlock* pDat for (int32_t i = 0; i < vgSz; i++) { SVgroupInfo* pVgInfo = taosArrayGet(vgInfo, i); - code = tInitStreamDispatchReq(&pReqs[i], pTask, pData->srcVgId, 0, pVgInfo->taskId); + code = tInitStreamDispatchReq(&pReqs[i], pTask, pData->srcVgId, 0, pVgInfo->taskId, pData->type); if (code != TSDB_CODE_SUCCESS) { goto FAIL_SHUFFLE_DISPATCH; } @@ -497,8 +503,7 @@ int32_t streamDispatchAllBlocks(SStreamTask* pTask, const SStreamDataBlock* pDat SSDataBlock* pDataBlock = taosArrayGet(pData->blocks, i); // TODO: do not use broadcast - if (pDataBlock->info.type == STREAM_DELETE_RESULT) { - + if (pDataBlock->info.type == STREAM_DELETE_RESULT || pDataBlock->info.type == STREAM_CHECKPOINT || pDataBlock->info.type == STREAM_TRANS_STATE) { for (int32_t j = 0; j < vgSz; j++) { if (streamAddBlockIntoDispatchMsg(pDataBlock, &pReqs[j]) < 0) { goto FAIL_SHUFFLE_DISPATCH; @@ -518,14 +523,14 @@ int32_t streamDispatchAllBlocks(SStreamTask* pTask, const SStreamDataBlock* pDat } } - qDebug("s-task:%s (child taskId:%d) shuffle-dispatch blocks:%d to %d vgroups", pTask->id.idStr, pTask->info.selfChildId, - numOfBlocks, vgSz); + qDebug("s-task:%s (child taskId:%d) shuffle-dispatch blocks:%d to %d vgroups", pTask->id.idStr, + pTask->info.selfChildId, numOfBlocks, vgSz); for (int32_t i = 0; i < vgSz; i++) { if (pReqs[i].blockNum > 0) { SVgroupInfo* pVgInfo = taosArrayGet(vgInfo, i); - qDebug("s-task:%s (child taskId:%d) shuffle-dispatch blocks:%d to vgId:%d", pTask->id.idStr, pTask->info.selfChildId, - pReqs[i].blockNum, pVgInfo->vgId); + qDebug("s-task:%s (child taskId:%d) shuffle-dispatch blocks:%d to vgId:%d", pTask->id.idStr, + pTask->info.selfChildId, pReqs[i].blockNum, pVgInfo->vgId); code = doSendDispatchMsg(pTask, &pReqs[i], pVgInfo->vgId, &pVgInfo->epSet); if (code < 0) { @@ -536,7 +541,7 @@ int32_t streamDispatchAllBlocks(SStreamTask* pTask, const SStreamDataBlock* pDat code = 0; - FAIL_SHUFFLE_DISPATCH: + FAIL_SHUFFLE_DISPATCH: for (int32_t i = 0; i < vgSz; i++) { taosArrayDestroyP(pReqs[i].data, taosMemoryFree); taosArrayDestroy(pReqs[i].dataLen); @@ -559,7 +564,7 @@ static void doRetryDispatchData(void* param, void* tmrId) { ASSERT(pTask->outputInfo.status == TASK_OUTPUT_STATUS__WAIT); - int32_t code = streamDispatchAllBlocks(pTask, pTask->msgInfo.pData); + int32_t code = doDispatchAllBlocks(pTask, pTask->msgInfo.pData); if (code != TSDB_CODE_SUCCESS) { if (!streamTaskShouldStop(&pTask->status)) { qDebug("s-task:%s reset the waitRspCnt to be 0 before launch retry dispatch", pTask->id.idStr); @@ -607,12 +612,13 @@ int32_t streamDispatchStreamBlock(SStreamTask* pTask) { } pTask->msgInfo.pData = pBlock; - ASSERT(pBlock->type == STREAM_INPUT__DATA_BLOCK); + ASSERT(pBlock->type == STREAM_INPUT__DATA_BLOCK || pBlock->type == STREAM_INPUT__CHECKPOINT_TRIGGER || + pBlock->type == STREAM_INPUT__TRANS_STATE); int32_t retryCount = 0; while (1) { - int32_t code = streamDispatchAllBlocks(pTask, pBlock); + int32_t code = doDispatchAllBlocks(pTask, pBlock); if (code == TSDB_CODE_SUCCESS) { break; } @@ -729,3 +735,88 @@ int32_t streamNotifyUpstreamContinue(SStreamTask* pTask) { num); return 0; } + +int32_t streamProcessDispatchRsp(SStreamTask* pTask, SStreamDispatchRsp* pRsp, int32_t code) { + const char* id = pTask->id.idStr; + + if (code != TSDB_CODE_SUCCESS) { + // dispatch message failed: network error, or node not available. + // in case of the input queue is full, the code will be TSDB_CODE_SUCCESS, the and pRsp>inputStatus will be set + // flag. here we need to retry dispatch this message to downstream task immediately. handle the case the failure + // happened too fast. + // todo handle the shuffle dispatch failure + if (code == TSDB_CODE_STREAM_TASK_NOT_EXIST) { // destination task does not exist, not retry anymore + qWarn("s-task:%s failed to dispatch msg to task:0x%x, no retry, since it is destroyed already", id, pRsp->downstreamTaskId); + } else { + qError("s-task:%s failed to dispatch msg to task:0x%x, code:%s, retry cnt:%d", id, pRsp->downstreamTaskId, + tstrerror(code), ++pTask->msgInfo.retryCount); + int32_t ret = doDispatchAllBlocks(pTask, pTask->msgInfo.pData); + if (ret != TSDB_CODE_SUCCESS) { + } + } + + return TSDB_CODE_SUCCESS; + } + + qDebug("s-task:%s recv dispatch rsp from 0x%x, downstream task input status:%d code:%d", id, pRsp->downstreamTaskId, + pRsp->inputStatus, code); + + // there are other dispatch message not response yet + if (pTask->outputInfo.type == TASK_OUTPUT__SHUFFLE_DISPATCH) { + int32_t leftRsp = atomic_sub_fetch_32(&pTask->shuffleDispatcher.waitingRspCnt, 1); + qDebug("s-task:%s is shuffle, left waiting rsp %d", id, leftRsp); + if (leftRsp > 0) { + return 0; + } + } + + // transtate msg has been sent to downstream successfully. let's transfer the fill-history task state + SStreamDataBlock* p = pTask->msgInfo.pData; + if (p->type == STREAM_INPUT__TRANS_STATE) { + qDebug("s-task:%s dispatch transtate msg to downstream successfully, start to transfer state", id); + ASSERT(pTask->info.fillHistory == 1); + code = streamTransferStateToStreamTask(pTask); + if (code != TSDB_CODE_SUCCESS) { // todo: do nothing if error happens + } + + streamFreeQitem(pTask->msgInfo.pData); + return TSDB_CODE_SUCCESS; + } + + pTask->msgInfo.retryCount = 0; + ASSERT(pTask->outputInfo.status == TASK_OUTPUT_STATUS__WAIT); + + qDebug("s-task:%s output status is set to:%d", id, pTask->outputInfo.status); + + // the input queue of the (down stream) task that receive the output data is full, + // so the TASK_INPUT_STATUS_BLOCKED is rsp + if (pRsp->inputStatus == TASK_INPUT_STATUS__BLOCKED) { + pTask->inputStatus = TASK_INPUT_STATUS__BLOCKED; // block the input of current task, to push pressure to upstream + pTask->msgInfo.blockingTs = taosGetTimestampMs(); // record the blocking start time + qError("s-task:%s inputQ of downstream task:0x%x is full, time:%" PRId64 " wait for %dms and retry dispatch data", + id, pRsp->downstreamTaskId, pTask->msgInfo.blockingTs, DISPATCH_RETRY_INTERVAL_MS); + streamRetryDispatchStreamBlock(pTask, DISPATCH_RETRY_INTERVAL_MS); + } else { // pipeline send data in output queue + // this message has been sent successfully, let's try next one. + destroyStreamDataBlock(pTask->msgInfo.pData); + pTask->msgInfo.pData = NULL; + + if (pTask->msgInfo.blockingTs != 0) { + int64_t el = taosGetTimestampMs() - pTask->msgInfo.blockingTs; + qDebug("s-task:%s downstream task:0x%x resume to normal from inputQ blocking, blocking time:%" PRId64 "ms", id, + pRsp->downstreamTaskId, el); + pTask->msgInfo.blockingTs = 0; + + // put data into inputQ of current task is also allowed + pTask->inputStatus = TASK_INPUT_STATUS__NORMAL; + } + + // now ready for next data output + atomic_store_8(&pTask->outputInfo.status, TASK_OUTPUT_STATUS__NORMAL); + + // otherwise, continue dispatch the first block to down stream task in pipeline + streamDispatchStreamBlock(pTask); + } + + return 0; +} diff --git a/source/libs/stream/src/streamExec.c b/source/libs/stream/src/streamExec.c index c7da80fdaf..fa33793087 100644 --- a/source/libs/stream/src/streamExec.c +++ b/source/libs/stream/src/streamExec.c @@ -191,6 +191,12 @@ int32_t streamScanExec(SStreamTask* pTask, int32_t batchSize) { return 0; } + if (pTask->inputStatus == TASK_INPUT_STATUS__BLOCKED) { + qDebug("s-task:%s inputQ is blocked, wait for 10sec and retry", pTask->id.idStr); + taosMsleep(10000); + continue; + } + SSDataBlock* output = NULL; uint64_t ts = 0; code = qExecTask(exec, &output, &ts); @@ -287,21 +293,32 @@ static void waitForTaskIdle(SStreamTask* pTask, SStreamTask* pStreamTask) { } } -static int32_t streamDoTransferStateToStreamTask(SStreamTask* pTask) { +int32_t streamDoTransferStateToStreamTask(SStreamTask* pTask) { SStreamMeta* pMeta = pTask->pMeta; SStreamTask* pStreamTask = streamMetaAcquireTask(pMeta, pTask->streamTaskId.streamId, pTask->streamTaskId.taskId); if (pStreamTask == NULL) { - // todo: destroy the fill-history task here - qError("s-task:%s failed to find related stream task:0x%x, it may have been destroyed or closed", pTask->id.idStr, - pTask->streamTaskId.taskId); + qError( + "s-task:%s failed to find related stream task:0x%x, it may have been destroyed or closed, destroy the related " + "fill-history task", + pTask->id.idStr, pTask->streamTaskId.taskId); + + // 1. free it and remove fill-history task from disk meta-store + streamMetaUnregisterTask(pMeta, pTask->id.streamId, pTask->id.taskId); + + // 2. save to disk + taosWLockLatch(&pMeta->lock); + if (streamMetaCommit(pMeta) < 0) { + // persist to disk + } + taosWUnLockLatch(&pMeta->lock); return TSDB_CODE_STREAM_TASK_NOT_EXIST; } else { qDebug("s-task:%s fill-history task end, update related stream task:%s info, transfer exec state", pTask->id.idStr, pStreamTask->id.idStr); } - ASSERT(pStreamTask->historyTaskId.taskId == pTask->id.taskId && pTask->status.transferState == true); + ASSERT(pStreamTask->historyTaskId.taskId == pTask->id.taskId && pTask->status.appendTranstateBlock == true); STimeWindow* pTimeWindow = &pStreamTask->dataRange.window; @@ -380,34 +397,52 @@ static int32_t streamDoTransferStateToStreamTask(SStreamTask* pTask) { return TSDB_CODE_SUCCESS; } -static int32_t streamTransferStateToStreamTask(SStreamTask* pTask) { +int32_t streamTransferStateToStreamTask(SStreamTask* pTask) { int32_t code = TSDB_CODE_SUCCESS; - if (!pTask->status.transferState) { - return code; - } + ASSERT(pTask->status.appendTranstateBlock == 1); int32_t level = pTask->info.taskLevel; if (level == TASK_LEVEL__SOURCE) { streamTaskFillHistoryFinished(pTask); - streamTaskEndScanWAL(pTask); - } else if (level == TASK_LEVEL__AGG) { // do transfer task operator states. + } + + if (level == TASK_LEVEL__AGG || level == TASK_LEVEL__SOURCE) { // do transfer task operator states. code = streamDoTransferStateToStreamTask(pTask); - if (code != TSDB_CODE_SUCCESS) { // todo handle this - return code; - } } return code; } -static int32_t extractMsgFromInputQ(SStreamTask* pTask, SStreamQueueItem** pInput, int32_t* numOfBlocks, - const char* id) { - int32_t retryTimes = 0; - int32_t MAX_RETRY_TIMES = 5; +static int32_t extractBlocksFromInputQ(SStreamTask* pTask, SStreamQueueItem** pInput, int32_t* numOfBlocks) { + int32_t retryTimes = 0; + int32_t MAX_RETRY_TIMES = 5; + const char* id = pTask->id.idStr; + if (pTask->info.taskLevel == TASK_LEVEL__SINK) { // extract block from inputQ, one-by-one + while (1) { + if (streamTaskShouldPause(&pTask->status) || streamTaskShouldStop(&pTask->status)) { + qDebug("s-task:%s task should pause, extract input blocks:%d", pTask->id.idStr, *numOfBlocks); + return TSDB_CODE_SUCCESS; + } + + SStreamQueueItem* qItem = streamQueueNextItem(pTask->inputQueue); + if (qItem == NULL) { + qDebug("===stream===break batchSize:%d, %s", *numOfBlocks, id); + return TSDB_CODE_SUCCESS; + } + + qDebug("s-task:%s sink task handle block one-by-one, type:%d", id, qItem->type); + + *numOfBlocks = 1; + *pInput = qItem; + return TSDB_CODE_SUCCESS; + } + } + + // non sink task while (1) { - if (streamTaskShouldPause(&pTask->status)) { - qDebug("s-task:%s task should pause, input blocks:%d", pTask->id.idStr, *numOfBlocks); + if (streamTaskShouldPause(&pTask->status) || streamTaskShouldStop(&pTask->status)) { + qDebug("s-task:%s task should pause, extract input blocks:%d", pTask->id.idStr, *numOfBlocks); return TSDB_CODE_SUCCESS; } @@ -415,51 +450,111 @@ static int32_t extractMsgFromInputQ(SStreamTask* pTask, SStreamQueueItem** pInpu if (qItem == NULL) { if (pTask->info.taskLevel == TASK_LEVEL__SOURCE && (++retryTimes) < MAX_RETRY_TIMES) { taosMsleep(10); - qDebug("===stream===try again batchSize:%d, retry:%d", *numOfBlocks, retryTimes); + qDebug("try again batchSize:%d, retry:%d, %s", *numOfBlocks, retryTimes, id); continue; } - qDebug("===stream===break batchSize:%d", *numOfBlocks); + qDebug("break batchSize:%d, %s", *numOfBlocks, id); return TSDB_CODE_SUCCESS; } - // do not merge blocks for sink node - if (pTask->info.taskLevel == TASK_LEVEL__SINK) { - *numOfBlocks = 1; - *pInput = qItem; - return TSDB_CODE_SUCCESS; - } - - if (*pInput == NULL) { - ASSERT((*numOfBlocks) == 0); - *pInput = qItem; - } else { - // todo we need to sort the data block, instead of just appending into the array list. - void* newRet = streamMergeQueueItem(*pInput, qItem); - if (newRet == NULL) { - if (terrno == 0) { - qDebug("s-task:%s failed to merge blocks from inputQ, numOfBlocks:%d", id, *numOfBlocks); - } else { - qDebug("s-task:%s failed to merge blocks from inputQ, numOfBlocks:%d, code:%s", id, *numOfBlocks, - tstrerror(terrno)); - } + // do not merge blocks for sink node and check point data block + if (qItem->type == STREAM_INPUT__CHECKPOINT || qItem->type == STREAM_INPUT__CHECKPOINT_TRIGGER || + qItem->type == STREAM_INPUT__TRANS_STATE) { + if (*pInput == NULL) { + qDebug("s-task:%s checkpoint/transtate msg extracted, start to process immediately", id); + *numOfBlocks = 1; + *pInput = qItem; + return TSDB_CODE_SUCCESS; + } else { + // previous existed blocks needs to be handle, before handle the checkpoint msg block + qDebug("s-task:%s checkpoint/transtate msg extracted, handle previous blocks, numOfBlocks:%d", id, *numOfBlocks); streamQueueProcessFail(pTask->inputQueue); return TSDB_CODE_SUCCESS; } + } else { + if (*pInput == NULL) { + ASSERT((*numOfBlocks) == 0); + *pInput = qItem; + } else { + // todo we need to sort the data block, instead of just appending into the array list. + void* newRet = streamMergeQueueItem(*pInput, qItem); + if (newRet == NULL) { + qError("s-task:%s failed to merge blocks from inputQ, numOfBlocks:%d", id, *numOfBlocks); + streamQueueProcessFail(pTask->inputQueue); + return TSDB_CODE_SUCCESS; + } - *pInput = newRet; - } + *pInput = newRet; + } - *numOfBlocks += 1; - streamQueueProcessSuccess(pTask->inputQueue); + *numOfBlocks += 1; + streamQueueProcessSuccess(pTask->inputQueue); - if (*numOfBlocks >= MAX_STREAM_EXEC_BATCH_NUM) { - qDebug("s-task:%s batch size limit:%d reached, start to process blocks", id, MAX_STREAM_EXEC_BATCH_NUM); - return TSDB_CODE_SUCCESS; + if (*numOfBlocks >= MAX_STREAM_EXEC_BATCH_NUM) { + qDebug("s-task:%s batch size limit:%d reached, start to process blocks", id, MAX_STREAM_EXEC_BATCH_NUM); + return TSDB_CODE_SUCCESS; + } } } } +int32_t streamProcessTranstateBlock(SStreamTask* pTask, SStreamDataBlock* pBlock) { + const char* id = pTask->id.idStr; + int32_t code = TSDB_CODE_SUCCESS; + + int32_t level = pTask->info.taskLevel; + if (level == TASK_LEVEL__AGG || level == TASK_LEVEL__SINK) { + int32_t remain = streamAlignTransferState(pTask); + if (remain > 0) { + streamFreeQitem((SStreamQueueItem*)pBlock); + qDebug("s-task:%s receive upstream transfer state msg, remain:%d", id, remain); + return 0; + } + } + + // dispatch the tran-state block to downstream task immediately + int32_t type = pTask->outputInfo.type; + + // transfer the ownership of executor state + if (type == TASK_OUTPUT__FIXED_DISPATCH || type == TASK_OUTPUT__SHUFFLE_DISPATCH) { + if (level == TASK_LEVEL__SOURCE) { + qDebug("s-task:%s add transfer-state block into outputQ", id); + } else { + qDebug("s-task:%s all upstream tasks send transfer-state block, add transfer-state block into outputQ", id); + ASSERT(pTask->streamTaskId.taskId != 0 && pTask->info.fillHistory == 1); + } + + // agg task should dispatch trans-state msg to sink task, to flush all data to sink task. + if (level == TASK_LEVEL__AGG || level == TASK_LEVEL__SOURCE) { + pBlock->srcVgId = pTask->pMeta->vgId; + code = taosWriteQitem(pTask->outputInfo.queue->queue, pBlock); + if (code == 0) { + streamDispatchStreamBlock(pTask); + } else { + streamFreeQitem((SStreamQueueItem*)pBlock); + } + } else { // level == TASK_LEVEL__SINK + streamFreeQitem((SStreamQueueItem*)pBlock); + } + } else { // non-dispatch task, do task state transfer directly + streamFreeQitem((SStreamQueueItem*)pBlock); + if (level != TASK_LEVEL__SINK) { + qDebug("s-task:%s non-dispatch task, start to transfer state directly", id); + ASSERT(pTask->info.fillHistory == 1); + code = streamTransferStateToStreamTask(pTask); + + if (code != TSDB_CODE_SUCCESS) { + atomic_store_8(&pTask->status.schedStatus, TASK_SCHED_STATUS__INACTIVE); + } + } else { + qDebug("s-task:%s sink task does not transfer state", id); + } + } + + return code; +} + /** * todo: the batch of blocks should be tuned dynamic, according to the total elapsed time of each batch of blocks, the * appropriate batch of blocks should be handled in 5 to 10 sec. @@ -478,12 +573,17 @@ int32_t streamExecForAll(SStreamTask* pTask) { // merge multiple input data if possible in the input queue. qDebug("s-task:%s start to extract data block from inputQ", id); - /*int32_t code = */extractMsgFromInputQ(pTask, &pInput, &batchSize, id); + /*int32_t code = */extractBlocksFromInputQ(pTask, &pInput, &batchSize); if (pInput == NULL) { ASSERT(batchSize == 0); break; } + if (pInput->type == STREAM_INPUT__TRANS_STATE) { + streamProcessTranstateBlock(pTask, (SStreamDataBlock*)pInput); + continue; + } + if (pTask->info.taskLevel == TASK_LEVEL__SINK) { ASSERT(pInput->type == STREAM_INPUT__DATA_BLOCK); qDebug("s-task:%s sink task start to sink %d blocks", id, batchSize); @@ -551,27 +651,6 @@ bool streamTaskIsIdle(const SStreamTask* pTask) { pTask->status.taskStatus == TASK_STATUS__DROPPING); } -int32_t streamTaskEndScanWAL(SStreamTask* pTask) { - const char* id = pTask->id.idStr; - double el = (taosGetTimestampMs() - pTask->tsInfo.step2Start) / 1000.0; - qDebug("s-task:%s scan-history from WAL stage(step 2) ended, elapsed time:%.2fs", id, el); - - // 1. notify all downstream tasks to transfer executor state after handle all history blocks. - int32_t code = streamDispatchTransferStateMsg(pTask); - if (code != TSDB_CODE_SUCCESS) { - // todo handle error - } - - // 2. do transfer stream task operator states. - pTask->status.transferState = true; - code = streamDoTransferStateToStreamTask(pTask); - if (code != TSDB_CODE_SUCCESS) { // todo handle error - return code; - } - - return TSDB_CODE_SUCCESS; -} - int32_t streamTryExec(SStreamTask* pTask) { // this function may be executed by multi-threads, so status check is required. int8_t schedStatus = @@ -587,27 +666,13 @@ int32_t streamTryExec(SStreamTask* pTask) { } // todo the task should be commit here - if (taosQueueEmpty(pTask->inputQueue->queue)) { - // fill-history WAL scan has completed - if (pTask->status.transferState) { - code = streamTransferStateToStreamTask(pTask); - if (code != TSDB_CODE_SUCCESS) { - return code; - } - streamSchedExec(pTask); - } else { - atomic_store_8(&pTask->status.schedStatus, TASK_SCHED_STATUS__INACTIVE); - qDebug("s-task:%s exec completed, status:%s, sched-status:%d", id, streamGetTaskStatusStr(pTask->status.taskStatus), - pTask->status.schedStatus); - } - } else { - atomic_store_8(&pTask->status.schedStatus, TASK_SCHED_STATUS__INACTIVE); - qDebug("s-task:%s exec completed, status:%s, sched-status:%d", id, streamGetTaskStatusStr(pTask->status.taskStatus), - pTask->status.schedStatus); + atomic_store_8(&pTask->status.schedStatus, TASK_SCHED_STATUS__INACTIVE); + qDebug("s-task:%s exec completed, status:%s, sched-status:%d", id, streamGetTaskStatusStr(pTask->status.taskStatus), + pTask->status.schedStatus); - if ((!streamTaskShouldStop(&pTask->status)) && (!streamTaskShouldPause(&pTask->status))) { - streamSchedExec(pTask); - } + if (!(taosQueueEmpty(pTask->inputQueue->queue) || streamTaskShouldStop(&pTask->status) || + streamTaskShouldPause(&pTask->status))) { + streamSchedExec(pTask); } } else { qDebug("s-task:%s already started to exec by other thread, status:%s, sched-status:%d", id, diff --git a/source/libs/stream/src/streamQueue.c b/source/libs/stream/src/streamQueue.c index aaf9fdec72..65135ec9a1 100644 --- a/source/libs/stream/src/streamQueue.c +++ b/source/libs/stream/src/streamQueue.c @@ -130,11 +130,11 @@ SStreamQueueItem* doReadMultiBlocksFromQueue(SQueueReader* pReader, const char* if (pReader->taskLevel == TASK_LEVEL__SOURCE && numOfBlocks < MIN_STREAM_EXEC_BATCH_NUM && tryCount < pReader->waitDuration) { tryCount++; taosMsleep(1); - qDebug("===stream===try again batchSize:%d", numOfBlocks); + qDebug("try again batchSize:%d", numOfBlocks); continue; } - qDebug("===stream===break batchSize:%d", numOfBlocks); + qDebug("break batchSize:%d", numOfBlocks); break; } diff --git a/source/libs/stream/src/streamRecover.c b/source/libs/stream/src/streamRecover.c index 1e17ef7ef1..2506dbaead 100644 --- a/source/libs/stream/src/streamRecover.c +++ b/source/libs/stream/src/streamRecover.c @@ -372,68 +372,35 @@ int32_t streamDispatchScanHistoryFinishMsg(SStreamTask* pTask) { return 0; } -static int32_t doDispatchTransferMsg(SStreamTask* pTask, const SStreamTransferReq* pReq, int32_t vgId, SEpSet* pEpSet) { - void* buf = NULL; - int32_t code = -1; - SRpcMsg msg = {0}; - - int32_t tlen; - tEncodeSize(tEncodeStreamScanHistoryFinishReq, pReq, tlen, code); - if (code < 0) { - return -1; +int32_t appendTranstateIntoInputQ(SStreamTask* pTask) { + SStreamDataBlock* pTranstate = taosAllocateQitem(sizeof(SStreamDataBlock), DEF_QITEM, sizeof(SSDataBlock)); + if (pTranstate == NULL) { + return TSDB_CODE_OUT_OF_MEMORY; } - buf = rpcMallocCont(sizeof(SMsgHead) + tlen); - if (buf == NULL) { - terrno = TSDB_CODE_OUT_OF_MEMORY; - return -1; + SSDataBlock* pBlock = taosMemoryCalloc(1, sizeof(SSDataBlock)); + if (pBlock == NULL) { + taosFreeQitem(pTranstate); + return TSDB_CODE_OUT_OF_MEMORY; } - ((SMsgHead*)buf)->vgId = htonl(vgId); - void* abuf = POINTER_SHIFT(buf, sizeof(SMsgHead)); + pTranstate->type = STREAM_INPUT__TRANS_STATE; - SEncoder encoder; - tEncoderInit(&encoder, abuf, tlen); - if ((code = tEncodeStreamScanHistoryFinishReq(&encoder, pReq)) < 0) { - if (buf) { - rpcFreeCont(buf); - } - return code; + pBlock->info.type = STREAM_TRANS_STATE; + pBlock->info.rows = 1; + pBlock->info.childId = pTask->info.selfChildId; + + pTranstate->blocks = taosArrayInit(4, sizeof(SSDataBlock));//pBlock; + taosArrayPush(pTranstate->blocks, pBlock); + + taosMemoryFree(pBlock); + if (tAppendDataToInputQueue(pTask, (SStreamQueueItem*)pTranstate) < 0) { + taosFreeQitem(pTranstate); + return TSDB_CODE_OUT_OF_MEMORY; } - tEncoderClear(&encoder); - - msg.contLen = tlen + sizeof(SMsgHead); - msg.pCont = buf; - msg.msgType = TDMT_STREAM_TRANSFER_STATE; - msg.info.noResp = 1; - - tmsgSendReq(pEpSet, &msg); - qDebug("s-task:%s level:%d, status:%s dispatch transfer state msg to taskId:0x%x (vgId:%d)", pTask->id.idStr, - pTask->info.taskLevel, streamGetTaskStatusStr(pTask->status.taskStatus), pReq->downstreamTaskId, vgId); - - return 0; -} - -int32_t streamDispatchTransferStateMsg(SStreamTask* pTask) { - SStreamTransferReq req = { .streamId = pTask->id.streamId, .childId = pTask->info.selfChildId }; - - // serialize - if (pTask->outputInfo.type == TASK_OUTPUT__FIXED_DISPATCH) { - req.downstreamTaskId = pTask->fixedEpDispatcher.taskId; - doDispatchTransferMsg(pTask, &req, pTask->fixedEpDispatcher.nodeId, &pTask->fixedEpDispatcher.epSet); - } else if (pTask->outputInfo.type == TASK_OUTPUT__SHUFFLE_DISPATCH) { - SArray* vgInfo = pTask->shuffleDispatcher.dbInfo.pVgroupInfos; - - int32_t numOfVgs = taosArrayGetSize(vgInfo); - for (int32_t i = 0; i < numOfVgs; i++) { - SVgroupInfo* pVgInfo = taosArrayGet(vgInfo, i); - req.downstreamTaskId = pVgInfo->taskId; - doDispatchTransferMsg(pTask, &req, pVgInfo->vgId, &pVgInfo->epSet); - } - } - - return 0; + pTask->status.appendTranstateBlock = true; + return TSDB_CODE_SUCCESS; } // agg diff --git a/source/libs/sync/src/syncUtil.c b/source/libs/sync/src/syncUtil.c index ae1c775a18..9acc17e130 100644 --- a/source/libs/sync/src/syncUtil.c +++ b/source/libs/sync/src/syncUtil.c @@ -21,6 +21,7 @@ #include "syncRaftCfg.h" #include "syncRaftStore.h" #include "syncSnapshot.h" +#include "tglobal.h" void syncCfg2SimpleStr(const SSyncCfg* pCfg, char* buf, int32_t bufLen) { int32_t len = snprintf(buf, bufLen, "{num:%d, as:%d, [", pCfg->replicaNum, pCfg->myIndex); @@ -41,7 +42,22 @@ void syncUtilNodeInfo2EpSet(const SNodeInfo* pInfo, SEpSet* pEpSet) { } bool syncUtilNodeInfo2RaftId(const SNodeInfo* pInfo, SyncGroupId vgId, SRaftId* raftId) { - uint32_t ipv4 = taosGetIpv4FromFqdn(pInfo->nodeFqdn); + uint32_t ipv4 = 0xFFFFFFFF; + sDebug("vgId:%d, start to resolve sync addr fqdn in %d seconds, " + "dnode:%d cluster:%" PRId64 " fqdn:%s port:%u ", + vgId, tsResolveFQDNRetryTime, + pInfo->nodeId, pInfo->clusterId, pInfo->nodeFqdn, pInfo->nodePort); + for(int i = 0; i < tsResolveFQDNRetryTime; i++){ + ipv4 = taosGetIpv4FromFqdn(pInfo->nodeFqdn); + if (ipv4 == 0xFFFFFFFF || ipv4 == 1) { + sError("failed to resolve ipv4 addr, fqdn:%s, wait one second", pInfo->nodeFqdn); + taosSsleep(1); + } + else{ + break; + } + } + if (ipv4 == 0xFFFFFFFF || ipv4 == 1) { sError("failed to resolve ipv4 addr, fqdn:%s", pInfo->nodeFqdn); terrno = TSDB_CODE_TSC_INVALID_FQDN; diff --git a/source/libs/wal/src/walMeta.c b/source/libs/wal/src/walMeta.c index 2acdd975e5..e700ef3d0a 100644 --- a/source/libs/wal/src/walMeta.c +++ b/source/libs/wal/src/walMeta.c @@ -20,6 +20,7 @@ #include "tutil.h" #include "walInt.h" + bool FORCE_INLINE walLogExist(SWal* pWal, int64_t ver) { return !walIsEmpty(pWal) && walGetFirstVer(pWal) <= ver && walGetLastVer(pWal) >= ver; } diff --git a/tests/system-test/0-others/walRetention.py b/tests/system-test/0-others/walRetention.py index 2b340b7969..5257b7644a 100644 --- a/tests/system-test/0-others/walRetention.py +++ b/tests/system-test/0-others/walRetention.py @@ -460,8 +460,7 @@ class TDTestCase: #self.test_db("db2", 5, 10*24*3600, 2*1024) # 2M size # period + size - self.test_db("db", checkTime = 5*60, wal_period = 60, wal_size_kb=10) - #self.test_db("db", checkTime = 3*60, wal_period = 0, wal_size_kb=0) + self.test_db("db", checkTime = 3*60, wal_period = 60, wal_size_kb=500) def stop(self): diff --git a/tests/system-test/2-query/interp.py b/tests/system-test/2-query/interp.py index 986c63839b..c2eb7bee2e 100644 --- a/tests/system-test/2-query/interp.py +++ b/tests/system-test/2-query/interp.py @@ -20,6 +20,7 @@ class TDTestCase: tbname = "tb" tbname1 = "tb1" tbname2 = "tb2" + tbname3 = "tb3" stbname = "stb" ctbname1 = "ctb1" ctbname2 = "ctb2" @@ -5607,6 +5608,44 @@ class TDTestCase: tdSql.query(f"select _irowts, _isfilled, interp(c0) from {dbname}.{stbname_single} partition by tbname range('2020-02-01 00:00:06') fill(linear)") tdSql.checkRows(0) + #### TS-3799 #### + + tdSql.execute( + f'''create table if not exists {dbname}.{tbname3} (ts timestamp, c0 double)''' + ) + + tdSql.execute(f"insert into {dbname}.{tbname3} values ('2023-08-06 23:59:51.000000000', 4.233947800000000)") + tdSql.execute(f"insert into {dbname}.{tbname3} values ('2023-08-06 23:59:52.000000000', 3.606781000000000)") + tdSql.execute(f"insert into {dbname}.{tbname3} values ('2023-08-06 23:59:52.500000000', 3.162353500000000)") + tdSql.execute(f"insert into {dbname}.{tbname3} values ('2023-08-06 23:59:53.000000000', 3.162292500000000)") + tdSql.execute(f"insert into {dbname}.{tbname3} values ('2023-08-06 23:59:53.500000000', 4.998230000000000)") + tdSql.execute(f"insert into {dbname}.{tbname3} values ('2023-08-06 23:59:54.400000000', 8.800414999999999)") + tdSql.execute(f"insert into {dbname}.{tbname3} values ('2023-08-06 23:59:54.900000000', 8.853271500000000)") + tdSql.execute(f"insert into {dbname}.{tbname3} values ('2023-08-06 23:59:55.900000000', 7.507751500000000)") + tdSql.execute(f"insert into {dbname}.{tbname3} values ('2023-08-06 23:59:56.400000000', 7.510681000000000)") + tdSql.execute(f"insert into {dbname}.{tbname3} values ('2023-08-06 23:59:56.900000000', 7.841614000000000)") + tdSql.execute(f"insert into {dbname}.{tbname3} values ('2023-08-06 23:59:57.900000000', 8.153809000000001)") + tdSql.execute(f"insert into {dbname}.{tbname3} values ('2023-08-06 23:59:58.500000000', 6.866455000000000)") + tdSql.execute(f"insert into {dbname}.{tbname3} values ('2023-08-06 23:59:59.000000000', 6.869140600000000)") + tdSql.execute(f"insert into {dbname}.{tbname3} values ('2023-08-07 00:00:00.000000000', 0.261475000000001)") + + tdSql.query(f"select _irowts, interp(c0) from {dbname}.{tbname3} range('2023-08-06 23:59:00','2023-08-06 23:59:59') every(1m) fill(next)") + tdSql.checkRows(1); + tdSql.checkData(0, 0, '2023-08-06 23:59:00') + tdSql.checkData(0, 1, 4.233947800000000) + + tdSql.query(f"select _irowts, interp(c0) from {dbname}.{tbname3} range('2023-08-06 23:59:00','2023-08-06 23:59:59') every(1m) fill(value, 1)") + tdSql.checkRows(1); + tdSql.checkData(0, 0, '2023-08-06 23:59:00') + tdSql.checkData(0, 1, 1) + + tdSql.query(f"select _irowts, interp(c0) from {dbname}.{tbname3} range('2023-08-06 23:59:00','2023-08-06 23:59:59') every(1m) fill(null)") + tdSql.checkRows(1); + tdSql.checkData(0, 0, '2023-08-06 23:59:00') + tdSql.checkData(0, 1, None) + + + def stop(self): tdSql.close() tdLog.success(f"{__file__} successfully executed") diff --git a/tests/system-test/7-tmq/tmqCommon.py b/tests/system-test/7-tmq/tmqCommon.py index 3ea8273e7f..7f972d857e 100644 --- a/tests/system-test/7-tmq/tmqCommon.py +++ b/tests/system-test/7-tmq/tmqCommon.py @@ -578,18 +578,40 @@ class TMQCom: tdLog.info("wait subscriptions exit for %d s"%wait_cnt) def killProcesser(self, processerName): - killCmd = ( - "ps -ef|grep -w %s| grep -v grep | awk '{print $2}' | xargs kill -TERM > /dev/null 2>&1" - % processerName - ) - - psCmd = "ps -ef|grep -w %s| grep -v grep | awk '{print $2}'" % processerName - processID = subprocess.check_output(psCmd, shell=True) + if platform.system().lower() == 'windows': + killCmd = ("wmic process where name=\"%s.exe\" call terminate > NUL 2>&1" % processerName) + psCmd = ("wmic process where name=\"%s.exe\" | findstr \"%s.exe\"" % (processerName, processerName)) + else: + killCmd = ( + "ps -ef|grep -w %s| grep -v grep | awk '{print $2}' | xargs kill -TERM > /dev/null 2>&1" + % processerName + ) + psCmd = ("ps -ef|grep -w %s| grep -v grep | awk '{print $2}'" % processerName) + processID = "" + + try: + processID = subprocess.check_output(psCmd, shell=True) + except Exception as err: + processID = "" + print('**** warn: ', err) + while processID: os.system(killCmd) time.sleep(1) - processID = subprocess.check_output(psCmd, shell=True) + try: + processID = subprocess.check_output(psCmd, shell=True) + except Exception as err: + processID = "" + print('**** warn: ', err) + + def startProcess(self, processName, param): + if platform.system().lower() == 'windows': + cmd = f"mintty -h never %s %s > NUL 2>&1" % (processName, param) + else: + cmd = f"nohup %s %s > /dev/null 2>&1 &" % (processName, param) + tdLog.info("%s"%(cmd)) + os.system(cmd) def close(self): self.cursor.close() diff --git a/tests/system-test/7-tmq/tmqDropConsumer.py b/tests/system-test/7-tmq/tmqDropConsumer.py index 06ce4c0fd7..137b5c6584 100644 --- a/tests/system-test/7-tmq/tmqDropConsumer.py +++ b/tests/system-test/7-tmq/tmqDropConsumer.py @@ -176,9 +176,7 @@ class TDTestCase: # use taosBenchmark to subscribe binPath = self.getPath() - cmd = "nohup %s -f ./7-tmq/tmqDropConsumer.json > /dev/null 2>&1 & " % binPath - tdLog.info("%s"%(cmd)) - os.system(cmd) + tmqCom.startProcess(binPath, "-f ./7-tmq/tmqDropConsumer.json") expectTopicNum = len(topicNameList) consumerThreadNum = 2 diff --git a/tests/system-test/7-tmq/tmqMaxGroupIds.py b/tests/system-test/7-tmq/tmqMaxGroupIds.py index d22b79a44c..75e2993a5b 100644 --- a/tests/system-test/7-tmq/tmqMaxGroupIds.py +++ b/tests/system-test/7-tmq/tmqMaxGroupIds.py @@ -176,9 +176,7 @@ class TDTestCase: # use taosBenchmark to subscribe binPath = self.getPath() - cmd = "nohup %s -f ./7-tmq/tmqMaxGroupIds.json > /dev/null 2>&1 & " % binPath - tdLog.info("%s"%(cmd)) - os.system(cmd) + tmqCom.startProcess(binPath, "-f ./7-tmq/tmqMaxGroupIds.json") expectTopicNum = 1 expectConsumerNUm = 99 diff --git a/tools/shell/src/shellEngine.c b/tools/shell/src/shellEngine.c index 860622ea18..e084f97fb5 100644 --- a/tools/shell/src/shellEngine.c +++ b/tools/shell/src/shellEngine.c @@ -771,7 +771,7 @@ int32_t shellCalcColWidth(TAOS_FIELD *field, int32_t precision) { if (field->bytes > shell.args.displayWidth) { return TMAX(shell.args.displayWidth, width); } else { - return TMAX(field->bytes, width); + return TMAX(field->bytes + 2, width); } case TSDB_DATA_TYPE_NCHAR: @@ -780,7 +780,7 @@ int32_t shellCalcColWidth(TAOS_FIELD *field, int32_t precision) { if (bytes > shell.args.displayWidth) { return TMAX(shell.args.displayWidth, width); } else { - return TMAX(bytes, width); + return TMAX(bytes + 2, width); } }