Merge branch '3.0' of https://github.com/taosdata/TDengine into enh/tsdb_optimize

This commit is contained in:
Hongze Cheng 2023-07-05 10:57:55 +08:00
commit 1304c08cdd
112 changed files with 8498 additions and 6217 deletions

View File

@ -314,7 +314,7 @@ def pre_test_build_win() {
cd %WIN_CONNECTOR_ROOT%
python.exe -m pip install --upgrade pip
python -m pip uninstall taospy -y
python -m pip install taospy==2.7.6
python -m pip install taospy==2.7.10
xcopy /e/y/i/f %WIN_INTERNAL_ROOT%\\debug\\build\\lib\\taos.dll C:\\Windows\\System32
'''
return 1

View File

@ -2,11 +2,11 @@
# geos
ExternalProject_Add(geos
GIT_REPOSITORY https://github.com/libgeos/geos.git
GIT_TAG 3.11.2
GIT_TAG 3.12.0
SOURCE_DIR "${TD_CONTRIB_DIR}/geos"
BINARY_DIR ""
CONFIGURE_COMMAND ""
BUILD_COMMAND ""
INSTALL_COMMAND ""
TEST_COMMAND ""
)
)

View File

@ -5,8 +5,8 @@ if (${BUILD_CONTRIB})
URL https://github.com/facebook/rocksdb/archive/refs/tags/v8.1.1.tar.gz
URL_HASH MD5=3b4c97ee45df9c8a5517308d31ab008b
DOWNLOAD_NO_PROGRESS 1
DOWNLOAD_DIR "${TD_CONTRIB_DIR}/deps-download"
SOURCE_DIR "${TD_CONTRIB_DIR}/rocksdb"
DOWNLOAD_DIR "${TD_CONTRIB_DIR}/deps-download"
SOURCE_DIR "${TD_CONTRIB_DIR}/rocksdb"
CONFIGURE_COMMAND ""
BUILD_COMMAND ""
INSTALL_COMMAND ""
@ -18,8 +18,8 @@ else()
URL https://github.com/facebook/rocksdb/archive/refs/tags/v8.1.1.tar.gz
URL_HASH MD5=3b4c97ee45df9c8a5517308d31ab008b
DOWNLOAD_NO_PROGRESS 1
DOWNLOAD_DIR "${TD_CONTRIB_DIR}/deps-download"
SOURCE_DIR "${TD_CONTRIB_DIR}/rocksdb"
DOWNLOAD_DIR "${TD_CONTRIB_DIR}/deps-download"
SOURCE_DIR "${TD_CONTRIB_DIR}/rocksdb"
CONFIGURE_COMMAND ""
BUILD_COMMAND ""
INSTALL_COMMAND ""

View File

@ -20,6 +20,11 @@ The source code for the Python connector is hosted on [GitHub](https://github.co
- The [supported platforms](/reference/connector/#supported-platforms) for the native connection are the same as the ones supported by the TDengine client.
- REST connections are supported on all platforms that can run Python.
### Supported features
- Native connections support all the core features of TDengine, including connection management, SQL execution, bind interface, subscriptions, and schemaless writing.
- REST connections support features such as connection management and SQL execution. (SQL execution allows you to: manage databases, tables, and supertables, write data, query data, create continuous queries, etc.).
## Version selection
We recommend using the latest version of `taospy`, regardless of the version of TDengine.
@ -64,10 +69,23 @@ All exceptions from the Python Connector are thrown directly. Applications shoul
{{#include docs/examples/python/handle_exception.py}}
```
## Supported features
## TDengine DataType vs. Python DataType
- Native connections support all the core features of TDengine, including connection management, SQL execution, bind interface, subscriptions, and schemaless writing.
- REST connections support features such as connection management and SQL execution. (SQL execution allows you to: manage databases, tables, and supertables, write data, query data, create continuous queries, etc.).
TDengine currently supports timestamp, number, character, Boolean type, and the corresponding type conversion with Python is as follows:
|TDengine DataType|Python DataType|
|:---------------:|:-------------:|
|TIMESTAMP|datetime|
|INT|int|
|BIGINT|int|
|FLOAT|float|
|DOUBLE|int|
|SMALLINT|int|
|TINYINT|int|
|BOOL|bool|
|BINARY|str|
|NCHAR|str|
|JSON|str|
## Installation
@ -544,7 +562,7 @@ Connector support data subscription. For more information about subscroption, pl
The `consumer` in the connector contains the subscription api.
#### Create Consumer
##### Create Consumer
The syntax for creating a consumer is `consumer = Consumer(configs)`. For more subscription api parameters, please refer to [Data Subscription](../../../develop/tmq/).
@ -554,7 +572,7 @@ from taos.tmq import Consumer
consumer = Consumer({"group.id": "local", "td.connect.ip": "127.0.0.1"})
```
#### Subscribe topics
##### Subscribe topics
The `subscribe` function is used to subscribe to a list of topics.
@ -562,7 +580,7 @@ The `subscribe` function is used to subscribe to a list of topics.
consumer.subscribe(['topic1', 'topic2'])
```
#### Consume
##### Consume
The `poll` function is used to consume data in tmq. The parameter of the `poll` function is a value of type float representing the timeout in seconds. It returns a `Message` before timing out, or `None` on timing out. You have to handle error messages in response data.
@ -580,7 +598,7 @@ while True:
print(block.fetchall())
```
#### assignment
##### assignment
The `assignment` function is used to get the assignment of the topic.
@ -588,7 +606,7 @@ The `assignment` function is used to get the assignment of the topic.
assignments = consumer.assignment()
```
#### Seek
##### Seek
The `seek` function is used to reset the assignment of the topic.
@ -597,7 +615,7 @@ tp = TopicPartition(topic='topic1', partition=0, offset=0)
consumer.seek(tp)
```
#### After consuming data
##### After consuming data
You should unsubscribe to the topics and close the consumer after consuming.
@ -606,13 +624,13 @@ consumer.unsubscribe()
consumer.close()
```
#### Tmq subscription example
##### Tmq subscription example
```python
{{#include docs/examples/python/tmq_example.py}}
```
#### assignment and seek example
##### assignment and seek example
```python
{{#include docs/examples/python/tmq_assignment_example.py:taos_get_assignment_and_seek_demo}}
@ -624,7 +642,7 @@ consumer.close()
In addition to native connections, the connector also supports subscriptions via websockets.
#### Create Consumer
##### Create Consumer
The syntax for creating a consumer is "consumer = consumer = Consumer(conf=configs)". You need to specify that the `td.connect.websocket.scheme` parameter is set to "ws" in the configuration. For more subscription api parameters, please refer to [Data Subscription](../../../develop/tmq/#create-a-consumer).
@ -634,7 +652,7 @@ import taosws
consumer = taosws.(conf={"group.id": "local", "td.connect.websocket.scheme": "ws"})
```
#### subscribe topics
##### subscribe topics
The `subscribe` function is used to subscribe to a list of topics.
@ -642,7 +660,7 @@ The `subscribe` function is used to subscribe to a list of topics.
consumer.subscribe(['topic1', 'topic2'])
```
#### Consume
##### Consume
The `poll` function is used to consume data in tmq. The parameter of the `poll` function is a value of type float representing the timeout in seconds. It returns a `Message` before timing out, or `None` on timing out. You have to handle error messages in response data.
@ -659,7 +677,7 @@ while True:
print(row)
```
#### assignment
##### assignment
The `assignment` function is used to get the assignment of the topic.
@ -667,7 +685,7 @@ The `assignment` function is used to get the assignment of the topic.
assignments = consumer.assignment()
```
#### Seek
##### Seek
The `seek` function is used to reset the assignment of the topic.
@ -675,7 +693,7 @@ The `seek` function is used to reset the assignment of the topic.
consumer.seek(topic='topic1', partition=0, offset=0)
```
#### After consuming data
##### After consuming data
You should unsubscribe to the topics and close the consumer after consuming.
@ -684,13 +702,13 @@ consumer.unsubscribe()
consumer.close()
```
#### Subscription example
##### Subscription example
```python
{{#include docs/examples/python/tmq_websocket_example.py}}
```
#### Assignment and seek example
##### Assignment and seek example
```python
{{#include docs/examples/python/tmq_websocket_assgnment_example.py:taosws_get_assignment_and_seek_demo}}
@ -706,19 +724,19 @@ Connector support schemaless insert.
<Tabs defaultValue="list">
<TabItem value="list" label="List Insert">
Simple insert
##### Simple insert
```python
{{#include docs/examples/python/schemaless_insert.py}}
```
Insert with ttl argument
##### Insert with ttl argument
```python
{{#include docs/examples/python/schemaless_insert_ttl.py}}
```
Insert with req_id argument
##### Insert with req_id argument
```python
{{#include docs/examples/python/schemaless_insert_req_id.py}}
@ -728,19 +746,19 @@ Insert with req_id argument
<TabItem value="raw" label="Raw Insert">
Simple insert
##### Simple insert
```python
{{#include docs/examples/python/schemaless_insert_raw.py}}
```
Insert with ttl argument
##### Insert with ttl argument
```python
{{#include docs/examples/python/schemaless_insert_raw_ttl.py}}
```
Insert with req_id argument
##### Insert with req_id argument
```python
{{#include docs/examples/python/schemaless_insert_raw_req_id.py}}
@ -756,7 +774,7 @@ The Python connector provides a parameter binding api for inserting data. Simila
<Tabs>
<TabItem value="native" label="native connection">
#### Create Stmt
##### Create Stmt
Call the `statement` method in `Connection` to create the `stmt` for parameter binding.
@ -767,7 +785,7 @@ conn = taos.connect()
stmt = conn.statement("insert into log values(?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)")
```
#### parameter binding
##### parameter binding
Call the `new_multi_binds` function to create the parameter list for parameter bindings.
@ -797,7 +815,7 @@ Call the `bind_param` (for a single row) method or the `bind_param_batch` (for m
stmt.bind_param_batch(params)
```
#### execute sql
##### execute sql
Call `execute` method to execute sql.
@ -805,13 +823,13 @@ Call `execute` method to execute sql.
stmt.execute()
```
#### Close Stmt
##### Close Stmt
```
stmt.close()
```
#### Example
##### Example
```python
{{#include docs/examples/python/stmt_example.py}}
@ -820,7 +838,7 @@ stmt.close()
<TabItem value="websocket" label="WebSocket connection">
#### Create Stmt
##### Create Stmt
Call the `statement` method in `Connection` to create the `stmt` for parameter binding.
@ -831,7 +849,7 @@ conn = taosws.connect('taosws://localhost:6041/test')
stmt = conn.statement()
```
#### Prepare sql
##### Prepare sql
Call `prepare` method in stmt to prepare sql.
@ -839,7 +857,7 @@ Call `prepare` method in stmt to prepare sql.
stmt.prepare("insert into t1 values (?, ?, ?, ?)")
```
#### parameter binding
##### parameter binding
Call the `bind_param` method to bind parameters.
@ -858,7 +876,7 @@ Call the `add_batch` method to add parameters to the batch.
stmt.add_batch()
```
#### execute sql
##### execute sql
Call `execute` method to execute sql.
@ -866,13 +884,13 @@ Call `execute` method to execute sql.
stmt.execute()
```
#### Close Stmt
##### Close Stmt
```
stmt.close()
```
#### Example
##### Example
```python
{{#include docs/examples/python/stmt_websocket_example.py}}

View File

@ -21,7 +21,12 @@ Python 连接器的源码托管在 [GitHub](https://github.com/taosdata/taos-con
- 原生连接[支持的平台](../#支持的平台)和 TDengine 客户端支持的平台一致。
- REST 连接支持所有能运行 Python 的平台。
## 版本选择
### 支持的功能
- 原生连接支持 TDengine 的所有核心功能, 包括: 连接管理、执行 SQL、参数绑定、订阅、无模式写入schemaless
- REST 连接支持的功能包括:连接管理、执行 SQL。 (通过执行 SQL 可以: 管理数据库、管理表和超级表、写入数据、查询数据、创建连续查询等)。
## 历史版本
无论使用什么版本的 TDengine 都建议使用最新版本的 `taospy`。
@ -65,12 +70,25 @@ Python Connector 的所有数据库操作如果出现异常,都会直接抛出
{{#include docs/examples/python/handle_exception.py}}
```
## 支持的功能
TDengine DataType 和 Python DataType
- 原生连接支持 TDengine 的所有核心功能, 包括: 连接管理、执行 SQL、参数绑定、订阅、无模式写入schemaless
- REST 连接支持的功能包括:连接管理、执行 SQL。 (通过执行 SQL 可以: 管理数据库、管理表和超级表、写入数据、查询数据、创建连续查询等)。
TDengine 目前支持时间戳、数字、字符、布尔类型,与 Python 对应类型转换如下:
## 安装
|TDengine DataType|Python DataType|
|:---------------:|:-------------:|
|TIMESTAMP|datetime|
|INT|int|
|BIGINT|int|
|FLOAT|float|
|DOUBLE|int|
|SMALLINT|int|
|TINYINT|int|
|BOOL|bool|
|BINARY|str|
|NCHAR|str|
|JSON|str|
## 安装步骤
### 安装前准备
@ -383,7 +401,7 @@ TaosCursor 类使用原生连接进行写入、查询操作。在客户端多线
</TabItem>
<TabItem value="websocket" label="WebSocket 连接">
#### Connection 类的使用
##### Connection 类的使用
`Connection` 类既包含对 PEP249 Connection 接口的实现(如cursor方法和 close 方法),也包含很多扩展功能(如: execute、 query、schemaless_insert 和 subscribe 方法。
@ -547,7 +565,7 @@ RestClient 类是对于 REST API 的直接封装。它只包含一个 sql() 方
`Consumer` 提供了 Python 连接器订阅 TMQ 数据的 API。
#### 创建 Consumer
##### 创建 Consumer
创建 Consumer 语法为 `consumer = Consumer(configs)`,参数定义请参考 [数据订阅文档](../../develop/tmq/#%E5%88%9B%E5%BB%BA%E6%B6%88%E8%B4%B9%E8%80%85-consumer)。
@ -557,7 +575,7 @@ from taos.tmq import Consumer
consumer = Consumer({"group.id": "local", "td.connect.ip": "127.0.0.1"})
```
#### 订阅 topics
##### 订阅 topics
Consumer API 的 `subscribe` 方法用于订阅 topicsconsumer 支持同时订阅多个 topic。
@ -565,7 +583,7 @@ Consumer API 的 `subscribe` 方法用于订阅 topicsconsumer 支持同时
consumer.subscribe(['topic1', 'topic2'])
```
#### 消费数据
##### 消费数据
Consumer API 的 `poll` 方法用于消费数据,`poll` 方法接收一个 float 类型的超时时间超时时间单位为秒s`poll` 方法在超时之前返回一条 Message 类型的数据或超时返回 `None`。消费者必须通过 Message 的 `error()` 方法校验返回数据的 error 信息。
@ -583,7 +601,7 @@ while True:
print(block.fetchall())
```
#### 获取消费进度
##### 获取消费进度
Consumer API 的 `assignment` 方法用于获取 Consumer 订阅的所有 topic 的消费进度,返回结果类型为 TopicPartition 列表。
@ -591,7 +609,7 @@ Consumer API 的 `assignment` 方法用于获取 Consumer 订阅的所有 topic
assignments = consumer.assignment()
```
#### 重置消费进度
##### 指定订阅 Offset
Consumer API 的 `seek` 方法用于重置 Consumer 的消费进度到指定位置,方法参数类型为 TopicPartition。
@ -600,7 +618,7 @@ tp = TopicPartition(topic='topic1', partition=0, offset=0)
consumer.seek(tp)
```
#### 结束消费
##### 关闭订阅
消费结束后,应当取消订阅,并关闭 Consumer。
@ -609,13 +627,13 @@ consumer.unsubscribe()
consumer.close()
```
#### tmq 订阅示例代码
##### 完整示例
```python
{{#include docs/examples/python/tmq_example.py}}
```
#### 获取和重置消费进度示例代码
##### 获取和重置消费进度示例代码
```python
{{#include docs/examples/python/tmq_assignment_example.py:taos_get_assignment_and_seek_demo}}
@ -629,7 +647,7 @@ consumer.close()
taosws `Consumer` API 提供了基于 Websocket 订阅 TMQ 数据的 API。
#### 创建 Consumer
##### 创建 Consumer
创建 Consumer 语法为 `consumer = Consumer(conf=configs)`,使用时需要指定 `td.connect.websocket.scheme` 参数值为 "ws",参数定义请参考 [数据订阅文档](../../develop/tmq/#%E5%88%9B%E5%BB%BA%E6%B6%88%E8%B4%B9%E8%80%85-consumer)。
@ -639,7 +657,7 @@ import taosws
consumer = taosws.(conf={"group.id": "local", "td.connect.websocket.scheme": "ws"})
```
#### 订阅 topics
##### 订阅 topics
Consumer API 的 `subscribe` 方法用于订阅 topicsconsumer 支持同时订阅多个 topic。
@ -647,7 +665,7 @@ Consumer API 的 `subscribe` 方法用于订阅 topicsconsumer 支持同时
consumer.subscribe(['topic1', 'topic2'])
```
#### 消费数据
##### 消费数据
Consumer API 的 `poll` 方法用于消费数据,`poll` 方法接收一个 float 类型的超时时间超时时间单位为秒s`poll` 方法在超时之前返回一条 Message 类型的数据或超时返回 `None`。消费者必须通过 Message 的 `error()` 方法校验返回数据的 error 信息。
@ -664,7 +682,7 @@ while True:
print(row)
```
#### 获取消费进度
##### 获取消费进度
Consumer API 的 `assignment` 方法用于获取 Consumer 订阅的所有 topic 的消费进度,返回结果类型为 TopicPartition 列表。
@ -672,7 +690,7 @@ Consumer API 的 `assignment` 方法用于获取 Consumer 订阅的所有 topic
assignments = consumer.assignment()
```
#### 重置消费进度
##### 重置消费进度
Consumer API 的 `seek` 方法用于重置 Consumer 的消费进度到指定位置。
@ -680,7 +698,7 @@ Consumer API 的 `seek` 方法用于重置 Consumer 的消费进度到指定位
consumer.seek(topic='topic1', partition=0, offset=0)
```
#### 结束消费
##### 结束消费
消费结束后,应当取消订阅,并关闭 Consumer。
@ -689,7 +707,7 @@ consumer.unsubscribe()
consumer.close()
```
#### tmq 订阅示例代码
##### tmq 订阅示例代码
```python
{{#include docs/examples/python/tmq_websocket_example.py}}
@ -697,7 +715,7 @@ consumer.close()
连接器提供了 `assignment` 接口,用于获取 topic assignment 的功能,可以查询订阅的 topic 的消费进度,并提供 `seek` 接口,用于重置 topic 的消费进度。
#### 获取和重置消费进度示例代码
##### 获取和重置消费进度示例代码
```python
{{#include docs/examples/python/tmq_websocket_assgnment_example.py:taosws_get_assignment_and_seek_demo}}
@ -713,19 +731,19 @@ consumer.close()
<Tabs defaultValue="list">
<TabItem value="list" label="List 写入">
简单写入
##### 简单写入
```python
{{#include docs/examples/python/schemaless_insert.py}}
```
带有 ttl 参数的写入
##### 带有 ttl 参数的写入
```python
{{#include docs/examples/python/schemaless_insert_ttl.py}}
```
带有 req_id 参数的写入
##### 带有 req_id 参数的写入
```python
{{#include docs/examples/python/schemaless_insert_req_id.py}}
@ -735,19 +753,19 @@ consumer.close()
<TabItem value="raw" label="Raw 写入">
简单写入
##### 简单写入
```python
{{#include docs/examples/python/schemaless_insert_raw.py}}
```
带有 ttl 参数的写入
##### 带有 ttl 参数的写入
```python
{{#include docs/examples/python/schemaless_insert_raw_ttl.py}}
```
带有 req_id 参数的写入
##### 带有 req_id 参数的写入
```python
{{#include docs/examples/python/schemaless_insert_raw_req_id.py}}
@ -763,7 +781,7 @@ TDengine 的 Python 连接器支持参数绑定风格的 Prepare API 方式写
<Tabs>
<TabItem value="native" label="原生连接">
#### 创建 stmt
##### 创建 stmt
Python 连接器的 `Connection` 提供了 `statement` 方法用于创建参数绑定对象 stmt该方法接收 sql 字符串作为参数sql 字符串目前仅支持用 `?` 来代表绑定的参数。
@ -774,7 +792,7 @@ conn = taos.connect()
stmt = conn.statement("insert into log values(?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)")
```
#### 参数绑定
##### 参数绑定
调用 `new_multi_binds` 函数创建 params 列表,用于参数绑定。
@ -804,7 +822,7 @@ params[15].timestamp([None, None, 1626861392591])
stmt.bind_param_batch(params)
```
#### 执行 sql
##### 执行 sql
调用 stmt 的 `execute` 方法执行 sql
@ -812,7 +830,7 @@ stmt.bind_param_batch(params)
stmt.execute()
```
#### 关闭 stmt
##### 关闭 stmt
最后需要关闭 stmt。
@ -820,7 +838,7 @@ stmt.execute()
stmt.close()
```
#### 示例代码
##### 示例代码
```python
{{#include docs/examples/python/stmt_example.py}}
@ -829,7 +847,7 @@ stmt.close()
<TabItem value="websocket" label="WebSocket 连接">
#### 创建 stmt
##### 创建 stmt
Python WebSocket 连接器的 `Connection` 提供了 `statement` 方法用于创建参数绑定对象 stmt该方法接收 sql 字符串作为参数sql 字符串目前仅支持用 `?` 来代表绑定的参数。
@ -840,7 +858,7 @@ conn = taosws.connect('taosws://localhost:6041/test')
stmt = conn.statement()
```
#### 解析 sql
##### 解析 sql
调用 stmt 的 `prepare` 方法来解析 insert 语句。
@ -848,7 +866,7 @@ stmt = conn.statement()
stmt.prepare("insert into t1 values (?, ?, ?, ?)")
```
#### 参数绑定
##### 参数绑定
调用 stmt 的 `bind_param` 方法绑定参数。
@ -867,7 +885,7 @@ stmt.bind_param([
stmt.add_batch()
```
#### 执行 sql
##### 执行 sql
调用 stmt 的 `execute` 方法执行 sql
@ -875,7 +893,7 @@ stmt.add_batch()
stmt.execute()
```
#### 关闭 stmt
##### 关闭 stmt
最后需要关闭 stmt。
@ -883,7 +901,7 @@ stmt.execute()
stmt.close()
```
#### 示例代码
##### 示例代码
```python
{{#include docs/examples/python/stmt_websocket_example.py}}

View File

@ -54,6 +54,11 @@ typedef struct SSessionKey {
uint64_t groupId;
} SSessionKey;
typedef struct SVersionRange {
uint64_t minVer;
uint64_t maxVer;
} SVersionRange;
static inline int winKeyCmprImpl(const void* pKey1, const void* pKey2) {
SWinKey* pWin1 = (SWinKey*)pKey1;
SWinKey* pWin2 = (SWinKey*)pKey2;
@ -131,10 +136,10 @@ static inline int STupleKeyCmpr(const void* pKey1, int kLen1, const void* pKey2,
enum {
TMQ_MSG_TYPE__DUMMY = 0,
TMQ_MSG_TYPE__POLL_RSP,
TMQ_MSG_TYPE__POLL_DATA_RSP,
TMQ_MSG_TYPE__POLL_META_RSP,
TMQ_MSG_TYPE__EP_RSP,
TMQ_MSG_TYPE__TAOSX_RSP,
TMQ_MSG_TYPE__POLL_DATA_META_RSP,
TMQ_MSG_TYPE__WALINFO_RSP,
TMQ_MSG_TYPE__END_RSP,
};

View File

@ -177,7 +177,6 @@ static FORCE_INLINE void colDataSetDouble(SColumnInfoData* pColumnInfoData, uint
int32_t getJsonValueLen(const char* data);
int32_t colDataSetVal(SColumnInfoData* pColumnInfoData, uint32_t rowIndex, const char* pData, bool isNull);
int32_t colDataAppend(SColumnInfoData* pColumnInfoData, uint32_t rowIndex, const char* pData, bool isNull);
int32_t colDataReassignVal(SColumnInfoData* pColumnInfoData, uint32_t dstRowIdx, uint32_t srcRowIdx, const char* pData);
int32_t colDataSetNItems(SColumnInfoData* pColumnInfoData, uint32_t rowIndex, const char* pData, uint32_t numOfRows, bool trimValue);
int32_t colDataMergeCol(SColumnInfoData* pColumnInfoData, int32_t numOfRow1, int32_t* capacity,
@ -187,6 +186,7 @@ int32_t colDataAssign(SColumnInfoData* pColumnInfoData, const SColumnInfoData* p
int32_t blockDataUpdateTsWindow(SSDataBlock* pDataBlock, int32_t tsColumnIndex);
int32_t colDataGetLength(const SColumnInfoData* pColumnInfoData, int32_t numOfRows);
int32_t colDataGetRowLength(const SColumnInfoData* pColumnInfoData, int32_t rowIdx);
void colDataTrim(SColumnInfoData* pColumnInfoData);
@ -208,7 +208,6 @@ double blockDataGetSerialRowSize(const SSDataBlock* pBlock);
size_t blockDataGetSerialMetaSize(uint32_t numOfCols);
int32_t blockDataSort(SSDataBlock* pDataBlock, SArray* pOrderInfo);
int32_t blockDataSort_rv(SSDataBlock* pDataBlock, SArray* pOrderInfo, bool nullFirst);
int32_t colInfoDataEnsureCapacity(SColumnInfoData* pColumn, uint32_t numOfRows, bool clearPayload);
int32_t blockDataEnsureCapacity(SSDataBlock* pDataBlock, uint32_t numOfRows);
@ -237,11 +236,10 @@ int32_t blockDataAppendColInfo(SSDataBlock* pBlock, SColumnInfoData* pColIn
SColumnInfoData createColumnInfoData(int16_t type, int32_t bytes, int16_t colId);
SColumnInfoData* bdGetColumnInfoData(const SSDataBlock* pBlock, int32_t index);
int32_t blockGetEncodeSize(const SSDataBlock* pBlock);
int32_t blockEncode(const SSDataBlock* pBlock, char* data, int32_t numOfCols);
const char* blockDecode(SSDataBlock* pBlock, const char* pData);
void blockDebugShowDataBlock(SSDataBlock* pBlock, const char* flag);
void blockDebugShowDataBlocks(const SArray* dataBlocks, const char* flag);
// for debug
char* dumpBlockData(SSDataBlock* pDataBlock, const char* flag, char** dumpBuf);
@ -251,9 +249,7 @@ int32_t buildSubmitReqFromDataBlock(SSubmitReq2** pReq, const SSDataBlock* pData
char* buildCtbNameByGroupId(const char* stbName, uint64_t groupId);
int32_t buildCtbNameByGroupIdImpl(const char* stbName, uint64_t groupId, char* pBuf);
static FORCE_INLINE int32_t blockGetEncodeSize(const SSDataBlock* pBlock) {
return blockDataGetSerialMetaSize(taosArrayGetSize(pBlock->pDataBlock)) + blockDataGetSize(pBlock);
}
void trimDataBlock(SSDataBlock* pBlock, int32_t totalRows, const bool* pBoolList);
#ifdef __cplusplus
}

View File

@ -2909,6 +2909,12 @@ enum {
TMQ_OFFSET__SNAPSHOT_META = 3,
};
enum {
WITH_DATA = 0,
WITH_META = 1,
ONLY_META = 2,
};
typedef struct {
int8_t type;
union {

View File

@ -252,7 +252,9 @@ enum {
TD_DEF_MSG_TYPE(TDMT_STREAM_TASK_DISPATCH, "stream-task-dispatch", NULL, NULL)
TD_DEF_MSG_TYPE(TDMT_STREAM_UNUSED1, "stream-unused1", NULL, NULL)
TD_DEF_MSG_TYPE(TDMT_STREAM_RETRIEVE, "stream-retrieve", NULL, NULL)
TD_DEF_MSG_TYPE(TDMT_STREAM_RECOVER_FINISH, "stream-recover-finish", NULL, NULL)
TD_DEF_MSG_TYPE(TDMT_STREAM_SCAN_HISTORY, "stream-scan-history", NULL, NULL)
TD_DEF_MSG_TYPE(TDMT_STREAM_SCAN_HISTORY_FINISH, "stream-scan-history-finish", NULL, NULL)
TD_DEF_MSG_TYPE(TDMT_STREAM_TRANSFER_STATE, "stream-transfer-state", NULL, NULL)
TD_DEF_MSG_TYPE(TDMT_STREAM_TASK_CHECK, "stream-task-check", NULL, NULL)
TD_DEF_MSG_TYPE(TDMT_STREAM_TASK_CHECKPOINT, "stream-checkpoint", NULL, NULL)
TD_DEF_MSG_TYPE(TDMT_STREAM_TASK_REPORT_CHECKPOINT, "stream-report-checkpoint", NULL, NULL)
@ -297,8 +299,7 @@ enum {
TD_NEW_MSG_SEG(TDMT_VND_STREAM_MSG)
TD_DEF_MSG_TYPE(TDMT_VND_STREAM_TRIGGER, "vnode-stream-trigger", NULL, NULL)
TD_DEF_MSG_TYPE(TDMT_VND_STREAM_RECOVER_NONBLOCKING_STAGE, "vnode-stream-recover1", NULL, NULL)
TD_DEF_MSG_TYPE(TDMT_VND_STREAM_RECOVER_BLOCKING_STAGE, "vnode-stream-recover2", NULL, NULL)
TD_DEF_MSG_TYPE(TDMT_VND_STREAM_SCAN_HISTORY, "vnode-stream-scan-history", NULL, NULL)
TD_DEF_MSG_TYPE(TDMT_VND_STREAM_CHECK_POINT_SOURCE, "vnode-stream-checkpoint-source", NULL, NULL)
TD_DEF_MSG_TYPE(TDMT_VND_STREAM_MAX_MSG, "vnd-stream-max", NULL, NULL)

View File

@ -16,105 +16,105 @@
#ifndef _TD_COMMON_TOKEN_H_
#define _TD_COMMON_TOKEN_H_
#define TK_OR 1
#define TK_AND 2
#define TK_UNION 3
#define TK_ALL 4
#define TK_MINUS 5
#define TK_EXCEPT 6
#define TK_INTERSECT 7
#define TK_NK_BITAND 8
#define TK_NK_BITOR 9
#define TK_NK_LSHIFT 10
#define TK_NK_RSHIFT 11
#define TK_NK_PLUS 12
#define TK_NK_MINUS 13
#define TK_NK_STAR 14
#define TK_NK_SLASH 15
#define TK_NK_REM 16
#define TK_NK_CONCAT 17
#define TK_CREATE 18
#define TK_ACCOUNT 19
#define TK_NK_ID 20
#define TK_PASS 21
#define TK_NK_STRING 22
#define TK_ALTER 23
#define TK_PPS 24
#define TK_TSERIES 25
#define TK_STORAGE 26
#define TK_STREAMS 27
#define TK_QTIME 28
#define TK_DBS 29
#define TK_USERS 30
#define TK_CONNS 31
#define TK_STATE 32
#define TK_USER 33
#define TK_ENABLE 34
#define TK_NK_INTEGER 35
#define TK_SYSINFO 36
#define TK_DROP 37
#define TK_GRANT 38
#define TK_ON 39
#define TK_TO 40
#define TK_REVOKE 41
#define TK_FROM 42
#define TK_SUBSCRIBE 43
#define TK_NK_COMMA 44
#define TK_READ 45
#define TK_WRITE 46
#define TK_NK_DOT 47
#define TK_WITH 48
#define TK_DNODE 49
#define TK_PORT 50
#define TK_DNODES 51
#define TK_RESTORE 52
#define TK_NK_IPTOKEN 53
#define TK_FORCE 54
#define TK_UNSAFE 55
#define TK_LOCAL 56
#define TK_QNODE 57
#define TK_BNODE 58
#define TK_SNODE 59
#define TK_MNODE 60
#define TK_VNODE 61
#define TK_DATABASE 62
#define TK_USE 63
#define TK_FLUSH 64
#define TK_TRIM 65
#define TK_COMPACT 66
#define TK_IF 67
#define TK_NOT 68
#define TK_EXISTS 69
#define TK_BUFFER 70
#define TK_CACHEMODEL 71
#define TK_CACHESIZE 72
#define TK_COMP 73
#define TK_DURATION 74
#define TK_NK_VARIABLE 75
#define TK_MAXROWS 76
#define TK_MINROWS 77
#define TK_KEEP 78
#define TK_PAGES 79
#define TK_PAGESIZE 80
#define TK_TSDB_PAGESIZE 81
#define TK_PRECISION 82
#define TK_REPLICA 83
#define TK_VGROUPS 84
#define TK_SINGLE_STABLE 85
#define TK_RETENTIONS 86
#define TK_SCHEMALESS 87
#define TK_WAL_LEVEL 88
#define TK_WAL_FSYNC_PERIOD 89
#define TK_WAL_RETENTION_PERIOD 90
#define TK_WAL_RETENTION_SIZE 91
#define TK_WAL_ROLL_PERIOD 92
#define TK_WAL_SEGMENT_SIZE 93
#define TK_STT_TRIGGER 94
#define TK_TABLE_PREFIX 95
#define TK_TABLE_SUFFIX 96
#define TK_NK_COLON 97
#define TK_MAX_SPEED 98
#define TK_START 99
#define TK_OR 1
#define TK_AND 2
#define TK_UNION 3
#define TK_ALL 4
#define TK_MINUS 5
#define TK_EXCEPT 6
#define TK_INTERSECT 7
#define TK_NK_BITAND 8
#define TK_NK_BITOR 9
#define TK_NK_LSHIFT 10
#define TK_NK_RSHIFT 11
#define TK_NK_PLUS 12
#define TK_NK_MINUS 13
#define TK_NK_STAR 14
#define TK_NK_SLASH 15
#define TK_NK_REM 16
#define TK_NK_CONCAT 17
#define TK_CREATE 18
#define TK_ACCOUNT 19
#define TK_NK_ID 20
#define TK_PASS 21
#define TK_NK_STRING 22
#define TK_ALTER 23
#define TK_PPS 24
#define TK_TSERIES 25
#define TK_STORAGE 26
#define TK_STREAMS 27
#define TK_QTIME 28
#define TK_DBS 29
#define TK_USERS 30
#define TK_CONNS 31
#define TK_STATE 32
#define TK_USER 33
#define TK_ENABLE 34
#define TK_NK_INTEGER 35
#define TK_SYSINFO 36
#define TK_DROP 37
#define TK_GRANT 38
#define TK_ON 39
#define TK_TO 40
#define TK_REVOKE 41
#define TK_FROM 42
#define TK_SUBSCRIBE 43
#define TK_NK_COMMA 44
#define TK_READ 45
#define TK_WRITE 46
#define TK_NK_DOT 47
#define TK_WITH 48
#define TK_DNODE 49
#define TK_PORT 50
#define TK_DNODES 51
#define TK_RESTORE 52
#define TK_NK_IPTOKEN 53
#define TK_FORCE 54
#define TK_UNSAFE 55
#define TK_LOCAL 56
#define TK_QNODE 57
#define TK_BNODE 58
#define TK_SNODE 59
#define TK_MNODE 60
#define TK_VNODE 61
#define TK_DATABASE 62
#define TK_USE 63
#define TK_FLUSH 64
#define TK_TRIM 65
#define TK_COMPACT 66
#define TK_IF 67
#define TK_NOT 68
#define TK_EXISTS 69
#define TK_BUFFER 70
#define TK_CACHEMODEL 71
#define TK_CACHESIZE 72
#define TK_COMP 73
#define TK_DURATION 74
#define TK_NK_VARIABLE 75
#define TK_MAXROWS 76
#define TK_MINROWS 77
#define TK_KEEP 78
#define TK_PAGES 79
#define TK_PAGESIZE 80
#define TK_TSDB_PAGESIZE 81
#define TK_PRECISION 82
#define TK_REPLICA 83
#define TK_VGROUPS 84
#define TK_SINGLE_STABLE 85
#define TK_RETENTIONS 86
#define TK_SCHEMALESS 87
#define TK_WAL_LEVEL 88
#define TK_WAL_FSYNC_PERIOD 89
#define TK_WAL_RETENTION_PERIOD 90
#define TK_WAL_RETENTION_SIZE 91
#define TK_WAL_ROLL_PERIOD 92
#define TK_WAL_SEGMENT_SIZE 93
#define TK_STT_TRIGGER 94
#define TK_TABLE_PREFIX 95
#define TK_TABLE_SUFFIX 96
#define TK_NK_COLON 97
#define TK_MAX_SPEED 98
#define TK_START 99
#define TK_TIMESTAMP 100
#define TK_END 101
#define TK_TABLE 102
@ -193,165 +193,167 @@
#define TK_INTERVAL 175
#define TK_COUNT 176
#define TK_LAST_ROW 177
#define TK_TOPIC 178
#define TK_META 179
#define TK_CONSUMER 180
#define TK_GROUP 181
#define TK_DESC 182
#define TK_DESCRIBE 183
#define TK_RESET 184
#define TK_QUERY 185
#define TK_CACHE 186
#define TK_EXPLAIN 187
#define TK_ANALYZE 188
#define TK_VERBOSE 189
#define TK_NK_BOOL 190
#define TK_RATIO 191
#define TK_NK_FLOAT 192
#define TK_OUTPUTTYPE 193
#define TK_AGGREGATE 194
#define TK_BUFSIZE 195
#define TK_LANGUAGE 196
#define TK_REPLACE 197
#define TK_STREAM 198
#define TK_INTO 199
#define TK_PAUSE 200
#define TK_RESUME 201
#define TK_TRIGGER 202
#define TK_AT_ONCE 203
#define TK_WINDOW_CLOSE 204
#define TK_IGNORE 205
#define TK_EXPIRED 206
#define TK_FILL_HISTORY 207
#define TK_UPDATE 208
#define TK_SUBTABLE 209
#define TK_UNTREATED 210
#define TK_KILL 211
#define TK_CONNECTION 212
#define TK_TRANSACTION 213
#define TK_BALANCE 214
#define TK_VGROUP 215
#define TK_LEADER 216
#define TK_MERGE 217
#define TK_REDISTRIBUTE 218
#define TK_SPLIT 219
#define TK_DELETE 220
#define TK_INSERT 221
#define TK_NULL 222
#define TK_NK_QUESTION 223
#define TK_NK_ARROW 224
#define TK_ROWTS 225
#define TK_QSTART 226
#define TK_QEND 227
#define TK_QDURATION 228
#define TK_WSTART 229
#define TK_WEND 230
#define TK_WDURATION 231
#define TK_IROWTS 232
#define TK_ISFILLED 233
#define TK_CAST 234
#define TK_NOW 235
#define TK_TODAY 236
#define TK_TIMEZONE 237
#define TK_CLIENT_VERSION 238
#define TK_SERVER_VERSION 239
#define TK_SERVER_STATUS 240
#define TK_CURRENT_USER 241
#define TK_CASE 242
#define TK_WHEN 243
#define TK_THEN 244
#define TK_ELSE 245
#define TK_BETWEEN 246
#define TK_IS 247
#define TK_NK_LT 248
#define TK_NK_GT 249
#define TK_NK_LE 250
#define TK_NK_GE 251
#define TK_NK_NE 252
#define TK_MATCH 253
#define TK_NMATCH 254
#define TK_CONTAINS 255
#define TK_IN 256
#define TK_JOIN 257
#define TK_INNER 258
#define TK_SELECT 259
#define TK_DISTINCT 260
#define TK_WHERE 261
#define TK_PARTITION 262
#define TK_BY 263
#define TK_SESSION 264
#define TK_STATE_WINDOW 265
#define TK_EVENT_WINDOW 266
#define TK_SLIDING 267
#define TK_FILL 268
#define TK_VALUE 269
#define TK_VALUE_F 270
#define TK_NONE 271
#define TK_PREV 272
#define TK_NULL_F 273
#define TK_LINEAR 274
#define TK_NEXT 275
#define TK_HAVING 276
#define TK_RANGE 277
#define TK_EVERY 278
#define TK_ORDER 279
#define TK_SLIMIT 280
#define TK_SOFFSET 281
#define TK_LIMIT 282
#define TK_OFFSET 283
#define TK_ASC 284
#define TK_NULLS 285
#define TK_ABORT 286
#define TK_AFTER 287
#define TK_ATTACH 288
#define TK_BEFORE 289
#define TK_BEGIN 290
#define TK_BITAND 291
#define TK_BITNOT 292
#define TK_BITOR 293
#define TK_BLOCKS 294
#define TK_CHANGE 295
#define TK_COMMA 296
#define TK_CONCAT 297
#define TK_CONFLICT 298
#define TK_COPY 299
#define TK_DEFERRED 300
#define TK_DELIMITERS 301
#define TK_DETACH 302
#define TK_DIVIDE 303
#define TK_DOT 304
#define TK_EACH 305
#define TK_FAIL 306
#define TK_FILE 307
#define TK_FOR 308
#define TK_GLOB 309
#define TK_ID 310
#define TK_IMMEDIATE 311
#define TK_IMPORT 312
#define TK_INITIALLY 313
#define TK_INSTEAD 314
#define TK_ISNULL 315
#define TK_KEY 316
#define TK_MODULES 317
#define TK_NK_BITNOT 318
#define TK_NK_SEMI 319
#define TK_NOTNULL 320
#define TK_OF 321
#define TK_PLUS 322
#define TK_PRIVILEGE 323
#define TK_RAISE 324
#define TK_RESTRICT 325
#define TK_ROW 326
#define TK_SEMI 327
#define TK_STAR 328
#define TK_STATEMENT 329
#define TK_STRICT 330
#define TK_STRING 331
#define TK_TIMES 332
#define TK_VALUES 333
#define TK_VARIABLE 334
#define TK_VIEW 335
#define TK_WAL 336
#define TK_META 178
#define TK_ONLY 179
#define TK_TOPIC 180
#define TK_CONSUMER 181
#define TK_GROUP 182
#define TK_DESC 183
#define TK_DESCRIBE 184
#define TK_RESET 185
#define TK_QUERY 186
#define TK_CACHE 187
#define TK_EXPLAIN 188
#define TK_ANALYZE 189
#define TK_VERBOSE 190
#define TK_NK_BOOL 191
#define TK_RATIO 192
#define TK_NK_FLOAT 193
#define TK_OUTPUTTYPE 194
#define TK_AGGREGATE 195
#define TK_BUFSIZE 196
#define TK_LANGUAGE 197
#define TK_REPLACE 198
#define TK_STREAM 199
#define TK_INTO 200
#define TK_PAUSE 201
#define TK_RESUME 202
#define TK_TRIGGER 203
#define TK_AT_ONCE 204
#define TK_WINDOW_CLOSE 205
#define TK_IGNORE 206
#define TK_EXPIRED 207
#define TK_FILL_HISTORY 208
#define TK_UPDATE 209
#define TK_SUBTABLE 210
#define TK_UNTREATED 211
#define TK_KILL 212
#define TK_CONNECTION 213
#define TK_TRANSACTION 214
#define TK_BALANCE 215
#define TK_VGROUP 216
#define TK_LEADER 217
#define TK_MERGE 218
#define TK_REDISTRIBUTE 219
#define TK_SPLIT 220
#define TK_DELETE 221
#define TK_INSERT 222
#define TK_NULL 223
#define TK_NK_QUESTION 224
#define TK_NK_ARROW 225
#define TK_ROWTS 226
#define TK_QSTART 227
#define TK_QEND 228
#define TK_QDURATION 229
#define TK_WSTART 230
#define TK_WEND 231
#define TK_WDURATION 232
#define TK_IROWTS 233
#define TK_ISFILLED 234
#define TK_CAST 235
#define TK_NOW 236
#define TK_TODAY 237
#define TK_TIMEZONE 238
#define TK_CLIENT_VERSION 239
#define TK_SERVER_VERSION 240
#define TK_SERVER_STATUS 241
#define TK_CURRENT_USER 242
#define TK_CASE 243
#define TK_WHEN 244
#define TK_THEN 245
#define TK_ELSE 246
#define TK_BETWEEN 247
#define TK_IS 248
#define TK_NK_LT 249
#define TK_NK_GT 250
#define TK_NK_LE 251
#define TK_NK_GE 252
#define TK_NK_NE 253
#define TK_MATCH 254
#define TK_NMATCH 255
#define TK_CONTAINS 256
#define TK_IN 257
#define TK_JOIN 258
#define TK_INNER 259
#define TK_SELECT 260
#define TK_DISTINCT 261
#define TK_WHERE 262
#define TK_PARTITION 263
#define TK_BY 264
#define TK_SESSION 265
#define TK_STATE_WINDOW 266
#define TK_EVENT_WINDOW 267
#define TK_SLIDING 268
#define TK_FILL 269
#define TK_VALUE 270
#define TK_VALUE_F 271
#define TK_NONE 272
#define TK_PREV 273
#define TK_NULL_F 274
#define TK_LINEAR 275
#define TK_NEXT 276
#define TK_HAVING 277
#define TK_RANGE 278
#define TK_EVERY 279
#define TK_ORDER 280
#define TK_SLIMIT 281
#define TK_SOFFSET 282
#define TK_LIMIT 283
#define TK_OFFSET 284
#define TK_ASC 285
#define TK_NULLS 286
#define TK_ABORT 287
#define TK_AFTER 288
#define TK_ATTACH 289
#define TK_BEFORE 290
#define TK_BEGIN 291
#define TK_BITAND 292
#define TK_BITNOT 293
#define TK_BITOR 294
#define TK_BLOCKS 295
#define TK_CHANGE 296
#define TK_COMMA 297
#define TK_CONCAT 298
#define TK_CONFLICT 299
#define TK_COPY 300
#define TK_DEFERRED 301
#define TK_DELIMITERS 302
#define TK_DETACH 303
#define TK_DIVIDE 304
#define TK_DOT 305
#define TK_EACH 306
#define TK_FAIL 307
#define TK_FILE 308
#define TK_FOR 309
#define TK_GLOB 310
#define TK_ID 311
#define TK_IMMEDIATE 312
#define TK_IMPORT 313
#define TK_INITIALLY 314
#define TK_INSTEAD 315
#define TK_ISNULL 316
#define TK_KEY 317
#define TK_MODULES 318
#define TK_NK_BITNOT 319
#define TK_NK_SEMI 320
#define TK_NOTNULL 321
#define TK_OF 322
#define TK_PLUS 323
#define TK_PRIVILEGE 324
#define TK_RAISE 325
#define TK_RESTRICT 326
#define TK_ROW 327
#define TK_SEMI 328
#define TK_STAR 329
#define TK_STATEMENT 330
#define TK_STRICT 331
#define TK_STRING 332
#define TK_TIMES 333
#define TK_VALUES 334
#define TK_VARIABLE 335
#define TK_VIEW 336
#define TK_WAL 337

View File

@ -55,6 +55,9 @@ typedef struct {
void* pStateBackend;
struct SStorageAPI api;
int8_t fillHistory;
STimeWindow winRange;
} SReadHandle;
// in queue mode, data streams are seperated by msg
@ -193,14 +196,6 @@ int32_t qDeserializeTaskStatus(qTaskInfo_t tinfo, const char* pInput, int32_t le
void getNextTimeWindow(const SInterval* pInterval, STimeWindow* tw, int32_t order);
void getInitialStartTimeWindow(SInterval* pInterval, TSKEY ts, STimeWindow* w, bool ascQuery);
STimeWindow getAlignQueryTimeWindow(const SInterval* pInterval, int64_t key);
/**
* return the scan info, in the form of tuple of two items, including table uid and current timestamp
* @param tinfo
* @param uid
* @param ts
* @return
*/
int32_t qGetStreamScanStatus(qTaskInfo_t tinfo, uint64_t* uid, int64_t* ts);
SArray* qGetQueriedTableListInfo(qTaskInfo_t tinfo);
@ -220,15 +215,22 @@ void* qExtractReaderFromStreamScanner(void* scanner);
int32_t qExtractStreamScanner(qTaskInfo_t tinfo, void** scanner);
int32_t qStreamSetParamForRecover(qTaskInfo_t tinfo);
int32_t qStreamSourceRecoverStep1(qTaskInfo_t tinfo, int64_t ver);
int32_t qStreamSourceRecoverStep2(qTaskInfo_t tinfo, int64_t ver);
int32_t qSetStreamOperatorOptionForScanHistory(qTaskInfo_t tinfo);
int32_t qStreamSourceScanParamForHistoryScanStep1(qTaskInfo_t tinfo, SVersionRange *pVerRange, STimeWindow* pWindow);
int32_t qStreamSourceScanParamForHistoryScanStep2(qTaskInfo_t tinfo, SVersionRange *pVerRange, STimeWindow* pWindow);
int32_t qStreamRecoverFinish(qTaskInfo_t tinfo);
int32_t qStreamRestoreParam(qTaskInfo_t tinfo);
int32_t qRestoreStreamOperatorOption(qTaskInfo_t tinfo);
bool qStreamRecoverScanFinished(qTaskInfo_t tinfo);
void qStreamCloseTsdbReader(void* task);
bool qStreamRecoverScanStep1Finished(qTaskInfo_t tinfo);
bool qStreamRecoverScanStep2Finished(qTaskInfo_t tinfo);
int32_t qStreamRecoverSetAllStepFinished(qTaskInfo_t tinfo);
void resetTaskInfo(qTaskInfo_t tinfo);
void qResetStreamInfoTimeWindow(qTaskInfo_t tinfo);
int32_t qStreamOperatorReleaseState(qTaskInfo_t tInfo);
int32_t qStreamOperatorReloadState(qTaskInfo_t tInfo);
#ifdef __cplusplus
}
#endif

View File

@ -123,8 +123,8 @@ typedef struct SSnapContext {
SHashObj* suidInfo;
SArray* idList;
int32_t index;
bool withMeta;
bool queryMeta; // true-get meta, false-get data
int8_t withMeta;
int8_t queryMeta; // true-get meta, false-get data
} SSnapContext;
typedef struct {
@ -234,29 +234,6 @@ typedef struct SStoreSnapshotFn {
int32_t (*getTableInfoFromSnapshot)(SSnapContext* ctx, void** pBuf, int32_t* contLen, int16_t* type, int64_t* uid);
} SStoreSnapshotFn;
/**
void metaReaderInit(SMetaReader *pReader, SMeta *pMeta, int32_t flags);
void metaReaderReleaseLock(SMetaReader *pReader);
void metaReaderClear(SMetaReader *pReader);
int32_t metaReaderGetTableEntryByUid(SMetaReader *pReader, tb_uid_t uid);
int32_t metaReaderGetTableEntryByUidCache(SMetaReader *pReader, tb_uid_t uid);
int32_t metaGetTableTags(SMeta *pMeta, uint64_t suid, SArray *uidList);
const void *metaGetTableTagVal(void *tag, int16_t type, STagVal *tagVal);
int metaGetTableNameByUid(void *meta, uint64_t uid, char *tbName);
int metaGetTableUidByName(void *meta, char *tbName, uint64_t *uid);
int metaGetTableTypeByName(void *meta, char *tbName, ETableType *tbType);
bool metaIsTableExist(SMeta *pMeta, tb_uid_t uid);
int32_t metaGetCachedTableUidList(SMeta *pMeta, tb_uid_t suid, const uint8_t *key, int32_t keyLen, SArray *pList,
bool *acquired);
int32_t metaUidFilterCachePut(SMeta *pMeta, uint64_t suid, const void *pKey, int32_t keyLen, void *pPayload,
int32_t payloadLen, double selectivityRatio);
tb_uid_t metaGetTableEntryUidByName(SMeta *pMeta, const char *name);
int32_t metaGetCachedTbGroup(SMeta* pMeta, tb_uid_t suid, const uint8_t* pKey, int32_t keyLen, SArray** pList);
int32_t metaPutTbGroupToCache(SMeta* pMeta, uint64_t suid, const void* pKey, int32_t keyLen, void* pPayload, int32_t
payloadLen);
*/
typedef struct SStoreMeta {
SMTbCursor* (*openTableMetaCursor)(void* pVnode); // metaOpenTbCursor
void (*closeTableMetaCursor)(SMTbCursor* pTbCur); // metaCloseTbCursor
@ -403,7 +380,7 @@ typedef struct SStateStore {
SStreamStateCur* (*streamStateSessionSeekKeyCurrentNext)(SStreamState* pState, const SSessionKey* key);
struct SStreamFileState* (*streamFileStateInit)(int64_t memSize, uint32_t keySize, uint32_t rowSize,
uint32_t selectRowSize, GetTsFun fp, void* pFile, TSKEY delMark);
uint32_t selectRowSize, GetTsFun fp, void* pFile, TSKEY delMark, const char*id);
void (*streamFileStateDestroy)(struct SStreamFileState* pFileState);
void (*streamFileStateClear)(struct SStreamFileState* pFileState);
@ -415,6 +392,7 @@ typedef struct SStateStore {
int32_t (*streamStateCommit)(SStreamState* pState);
void (*streamStateDestroy)(SStreamState* pState, bool remove);
int32_t (*streamStateDeleteCheckPoint)(SStreamState* pState, TSKEY mark);
void (*streamStateReloadInfo)(SStreamState* pState, TSKEY ts);
} SStateStore;
typedef struct SStorageAPI {

View File

@ -129,30 +129,38 @@ typedef struct SSerializeDataHandle {
} SSerializeDataHandle;
// incremental state storage
typedef struct STdbState {
void *rocksdb;
void **pHandle;
void *writeOpts;
void *readOpts;
void **cfOpts;
void *dbOpt;
struct SStreamTask *pOwner;
void *param;
void *env;
SListNode *pComparNode;
void *pBackend;
char idstr[64];
void *compactFactory;
TdThreadRwlock rwLock;
void *db;
void *pStateDb;
void *pFuncStateDb;
void *pFillStateDb; // todo refactor
void *pSessionStateDb;
void *pParNameDb;
void *pParTagDb;
void *txn;
typedef struct SBackendCfWrapper {
void *rocksdb;
void **pHandle;
void *writeOpts;
void *readOpts;
void **cfOpts;
void *dbOpt;
void *param;
void *env;
SListNode *pComparNode;
void *pBackend;
void *compactFactory;
TdThreadRwlock rwLock;
bool remove;
int64_t backendId;
char idstr[64];
} SBackendCfWrapper;
typedef struct STdbState {
SBackendCfWrapper *pBackendCfWrapper;
int64_t backendCfWrapperId;
char idstr[64];
struct SStreamTask *pOwner;
void *db;
void *pStateDb;
void *pFuncStateDb;
void *pFillStateDb; // todo refactor
void *pSessionStateDb;
void *pParNameDb;
void *pParTagDb;
void *txn;
} STdbState;
typedef struct {

View File

@ -362,7 +362,7 @@ typedef struct SCreateTopicStmt {
char subDbName[TSDB_DB_NAME_LEN];
char subSTbName[TSDB_TABLE_NAME_LEN];
bool ignoreExists;
bool withMeta;
int8_t withMeta;
SNode* pQuery;
SNode* pWhere;
} SCreateTopicStmt;

View File

@ -138,6 +138,8 @@ int32_t streamStateCurPrev(SStreamState* pState, SStreamStateCur* pCur);
int32_t streamStatePutParName(SStreamState* pState, int64_t groupId, const char* tbname);
int32_t streamStateGetParName(SStreamState* pState, int64_t groupId, void** pVal);
void streamStateReloadInfo(SStreamState* pState, TSKEY ts);
/***compare func **/
typedef struct SStateChekpoint {

View File

@ -44,10 +44,8 @@ enum {
TASK_STATUS__DROPPING,
TASK_STATUS__FAIL,
TASK_STATUS__STOP,
TASK_STATUS__WAIT_DOWNSTREAM,
TASK_STATUS__RECOVER_PREPARE,
TASK_STATUS__RECOVER1,
TASK_STATUS__RECOVER2,
TASK_STATUS__SCAN_HISTORY, // stream task scan history data by using tsdbread in the stream scanner
TASK_STATUS__HALT, // stream task will handle all data in the input queue, and then paused
TASK_STATUS__PAUSE,
};
@ -133,7 +131,6 @@ typedef struct {
// ref data block, for delete
typedef struct {
int8_t type;
int64_t ver;
SSDataBlock* pBlock;
} SStreamRefDataBlock;
@ -203,13 +200,11 @@ static FORCE_INLINE void streamQueueProcessFail(SStreamQueue* queue) {
atomic_store_8(&queue->status, STREAM_QUEUE__FAILED);
}
void* streamQueueNextItem(SStreamQueue* queue);
void* streamQueueNextItem(SStreamQueue* pQueue);
SStreamDataSubmit* streamDataSubmitNew(SPackedData* pData, int32_t type);
void streamDataSubmitDestroy(SStreamDataSubmit* pDataSubmit);
SStreamDataSubmit* streamSubmitBlockClone(SStreamDataSubmit* pSubmit);
typedef struct {
char* qmsg;
void* pExecutor; // not applicable to encoder and decoder
@ -251,7 +246,7 @@ typedef struct {
int8_t reserved;
} STaskSinkFetch;
typedef struct {
typedef struct SStreamChildEpInfo {
int32_t nodeId;
int32_t childId;
int32_t taskId;
@ -271,31 +266,55 @@ typedef struct SCheckpointInfo {
} SCheckpointInfo;
typedef struct SStreamStatus {
int8_t taskStatus;
int8_t schedStatus;
int8_t keepTaskStatus;
int8_t taskStatus;
int8_t downstreamReady; // downstream tasks are all ready now, if this flag is set
int8_t schedStatus;
int8_t keepTaskStatus;
bool transferState;
int8_t timerActive; // timer is active
} SStreamStatus;
struct SStreamTask {
SStreamId id;
int32_t totalLevel;
int8_t taskLevel;
int8_t outputType;
int16_t dispatchMsgType;
SStreamStatus status;
int32_t selfChildId;
int32_t nodeId; // vgroup id
SEpSet epSet;
SCheckpointInfo chkInfo;
STaskExec exec;
int8_t fillHistory; // fill history
int64_t ekey; // end ts key
int64_t endVer; // end version
typedef struct SHistDataRange {
SVersionRange range;
STimeWindow window;
} SHistDataRange;
// children info
SArray* childEpInfo; // SArray<SStreamChildEpInfo*>
int32_t nextCheckId;
SArray* checkpointInfo; // SArray<SStreamCheckpointInfo>
typedef struct SSTaskBasicInfo {
int32_t nodeId; // vgroup id or snode id
SEpSet epSet;
int32_t selfChildId;
int32_t totalLevel;
int8_t taskLevel;
int8_t fillHistory; // is fill history task or not
} SSTaskBasicInfo;
typedef struct SDispatchMsgInfo {
void* pData; // current dispatch data
int16_t msgType; // dispatch msg type
int32_t retryCount; // retry send data count
int64_t blockingTs; // output blocking timestamp
} SDispatchMsgInfo;
typedef struct {
int8_t outputType;
int8_t outputStatus;
SStreamQueue* outputQueue;
} SSTaskOutputInfo;
struct SStreamTask {
SStreamId id;
SSTaskBasicInfo info;
int8_t outputType;
SDispatchMsgInfo msgInfo;
SStreamStatus status;
SCheckpointInfo chkInfo;
STaskExec exec;
SHistDataRange dataRange;
SStreamId historyTaskId;
SStreamId streamTaskId;
SArray* pUpstreamEpInfoList; // SArray<SStreamChildEpInfo*>, // children info
int32_t nextCheckId;
SArray* checkpointInfo; // SArray<SStreamCheckpointInfo>
// output
union {
@ -314,13 +333,14 @@ struct SStreamTask {
// trigger
int8_t triggerStatus;
int64_t triggerParam;
void* timer;
void* schedTimer;
void* launchTaskTimer;
SMsgCb* pMsgCb; // msg handle
SStreamState* pState; // state backend
// the followings attributes don't be serialized
int32_t recoverTryingDownstream;
int32_t recoverWaitingUpstream;
int32_t notReadyTasks;
int32_t numOfWaitingUpstream;
int64_t checkReqId;
SArray* checkReqIds; // shuffle
int32_t refCnt;
@ -332,21 +352,22 @@ struct SStreamTask {
// meta
typedef struct SStreamMeta {
char* path;
TDB* db;
TTB* pTaskDb;
TTB* pCheckpointDb;
SHashObj* pTasks;
SArray* pTaskList; // SArray<task_id*>
void* ahandle;
TXN* txn;
FTaskExpand* expandFunc;
int32_t vgId;
SRWLatch lock;
int32_t walScanCounter;
void* streamBackend;
int64_t streamBackendRid;
SHashObj* pTaskBackendUnique;
char* path;
TDB* db;
TTB* pTaskDb;
TTB* pCheckpointDb;
SHashObj* pTasks;
SArray* pTaskList; // SArray<task_id*>
void* ahandle;
TXN* txn;
FTaskExpand* expandFunc;
int32_t vgId;
SRWLatch lock;
int32_t walScanCounter;
void* streamBackend;
int64_t streamBackendRid;
SHashObj* pTaskBackendUnique;
TdThreadMutex backendMutex;
} SStreamMeta;
int32_t tEncodeStreamEpInfo(SEncoder* pEncoder, const SStreamChildEpInfo* pInfo);
@ -431,16 +452,17 @@ typedef struct {
SMsgHead msgHead;
int64_t streamId;
int32_t taskId;
} SStreamRecoverStep1Req, SStreamRecoverStep2Req;
int8_t igUntreated;
} SStreamScanHistoryReq;
typedef struct {
int64_t streamId;
int32_t taskId;
int32_t childId;
} SStreamRecoverFinishReq;
} SStreamRecoverFinishReq, SStreamTransferReq;
int32_t tEncodeSStreamRecoverFinishReq(SEncoder* pEncoder, const SStreamRecoverFinishReq* pReq);
int32_t tDecodeSStreamRecoverFinishReq(SDecoder* pDecoder, SStreamRecoverFinishReq* pReq);
int32_t tEncodeStreamRecoverFinishReq(SEncoder* pEncoder, const SStreamRecoverFinishReq* pReq);
int32_t tDecodeStreamRecoverFinishReq(SDecoder* pDecoder, SStreamRecoverFinishReq* pReq);
typedef struct {
int64_t streamId;
@ -509,11 +531,11 @@ typedef struct {
SArray* checkpointVer; // SArray<SStreamCheckpointInfo>
} SStreamRecoverDownstreamRsp;
int32_t tEncodeSStreamTaskCheckReq(SEncoder* pEncoder, const SStreamTaskCheckReq* pReq);
int32_t tDecodeSStreamTaskCheckReq(SDecoder* pDecoder, SStreamTaskCheckReq* pReq);
int32_t tEncodeStreamTaskCheckReq(SEncoder* pEncoder, const SStreamTaskCheckReq* pReq);
int32_t tDecodeStreamTaskCheckReq(SDecoder* pDecoder, SStreamTaskCheckReq* pReq);
int32_t tEncodeSStreamTaskCheckRsp(SEncoder* pEncoder, const SStreamTaskCheckRsp* pRsp);
int32_t tDecodeSStreamTaskCheckRsp(SDecoder* pDecoder, SStreamTaskCheckRsp* pRsp);
int32_t tEncodeStreamTaskCheckRsp(SEncoder* pEncoder, const SStreamTaskCheckRsp* pRsp);
int32_t tDecodeStreamTaskCheckRsp(SDecoder* pDecoder, SStreamTaskCheckRsp* pRsp);
int32_t tEncodeSStreamTaskRecoverReq(SEncoder* pEncoder, const SStreamRecoverDownstreamReq* pReq);
int32_t tDecodeSStreamTaskRecoverReq(SDecoder* pDecoder, SStreamRecoverDownstreamReq* pReq);
@ -525,9 +547,11 @@ int32_t tDecodeStreamDispatchReq(SDecoder* pDecoder, SStreamDispatchReq* pReq);
int32_t tDecodeStreamRetrieveReq(SDecoder* pDecoder, SStreamRetrieveReq* pReq);
void tDeleteStreamRetrieveReq(SStreamRetrieveReq* pReq);
void tDeleteStreamDispatchReq(SStreamDispatchReq* pReq);
int32_t tInitStreamDispatchReq(SStreamDispatchReq* pReq, const SStreamTask* pTask, int32_t vgId, int32_t numOfBlocks,
int64_t dstTaskId);
void tDeleteStreamDispatchReq(SStreamDispatchReq* pReq);
int32_t streamSetupTrigger(SStreamTask* pTask);
int32_t streamSetupScheduleTrigger(SStreamTask* pTask);
int32_t streamProcessRunReq(SStreamTask* pTask);
int32_t streamProcessDispatchMsg(SStreamTask* pTask, SStreamDispatchReq* pReq, SRpcMsg* pMsg, bool exec);
@ -542,30 +566,44 @@ int32_t streamSchedExec(SStreamTask* pTask);
int32_t streamTaskOutputResultBlock(SStreamTask* pTask, SStreamDataBlock* pBlock);
bool streamTaskShouldStop(const SStreamStatus* pStatus);
bool streamTaskShouldPause(const SStreamStatus* pStatus);
bool streamTaskIsIdle(const SStreamTask* pTask);
int32_t streamScanExec(SStreamTask* pTask, int32_t batchSz);
char* createStreamTaskIdStr(int64_t streamId, int32_t taskId);
// recover and fill history
int32_t streamTaskCheckDownstream(SStreamTask* pTask, int64_t version);
int32_t streamTaskLaunchRecover(SStreamTask* pTask, int64_t version);
void streamPrepareNdoCheckDownstream(SStreamTask* pTask);
int32_t streamTaskCheckDownstreamTasks(SStreamTask* pTask);
int32_t streamTaskLaunchScanHistory(SStreamTask* pTask);
int32_t streamTaskCheckStatus(SStreamTask* pTask);
int32_t streamProcessTaskCheckRsp(SStreamTask* pTask, const SStreamTaskCheckRsp* pRsp, int64_t version);
int32_t streamProcessCheckRsp(SStreamTask* pTask, const SStreamTaskCheckRsp* pRsp);
int32_t streamCheckHistoryTaskDownstrem(SStreamTask* pTask);
int32_t streamTaskScanHistoryDataComplete(SStreamTask* pTask);
int32_t streamStartRecoverTask(SStreamTask* pTask, int8_t igUntreated);
bool streamTaskRecoverScanStep1Finished(SStreamTask* pTask);
bool streamTaskRecoverScanStep2Finished(SStreamTask* pTask);
int32_t streamTaskRecoverSetAllStepFinished(SStreamTask* pTask);
// common
int32_t streamSetParamForRecover(SStreamTask* pTask);
int32_t streamRestoreParam(SStreamTask* pTask);
int32_t streamSetStatusNormal(SStreamTask* pTask);
int32_t streamSetParamForScanHistoryData(SStreamTask* pTask);
int32_t streamRestoreParam(SStreamTask* pTask);
int32_t streamSetStatusNormal(SStreamTask* pTask);
const char* streamGetTaskStatusStr(int32_t status);
// source level
int32_t streamSourceRecoverPrepareStep1(SStreamTask* pTask, int64_t ver);
int32_t streamBuildSourceRecover1Req(SStreamTask* pTask, SStreamRecoverStep1Req* pReq);
int32_t streamSourceRecoverScanStep1(SStreamTask* pTask);
int32_t streamBuildSourceRecover2Req(SStreamTask* pTask, SStreamRecoverStep2Req* pReq);
int32_t streamSourceRecoverScanStep2(SStreamTask* pTask, int64_t ver);
int32_t streamDispatchRecoverFinishReq(SStreamTask* pTask);
int32_t streamSetParamForStreamScannerStep1(SStreamTask* pTask, SVersionRange* pVerRange, STimeWindow* pWindow);
int32_t streamSetParamForStreamScannerStep2(SStreamTask* pTask, SVersionRange* pVerRange, STimeWindow* pWindow);
int32_t streamBuildSourceRecover1Req(SStreamTask* pTask, SStreamScanHistoryReq* pReq, int8_t igUntreated);
int32_t streamSourceScanHistoryData(SStreamTask* pTask);
// int32_t streamSourceRecoverScanStep2(SStreamTask* pTask, int64_t ver);
int32_t streamDispatchScanHistoryFinishMsg(SStreamTask* pTask);
int32_t streamDispatchTransferStateMsg(SStreamTask* pTask);
// agg level
int32_t streamAggRecoverPrepare(SStreamTask* pTask);
// int32_t streamAggChildrenRecoverFinish(SStreamTask* pTask);
int32_t streamProcessRecoverFinishReq(SStreamTask* pTask, int32_t childId);
int32_t streamProcessRecoverFinishReq(SStreamTask* pTask, int32_t taskId, int32_t childId);
void streamMetaInit();
void streamMetaCleanup();
@ -591,6 +629,9 @@ int32_t streamProcessCheckpointSourceReq(SStreamMeta* pMeta, SStreamTask* pTask,
int32_t streamProcessCheckpointReq(SStreamMeta* pMeta, SStreamTask* pTask, SStreamCheckpointReq* pReq);
int32_t streamProcessCheckpointRsp(SStreamMeta* pMeta, SStreamTask* pTask, SStreamCheckpointRsp* pRsp);
int32_t streamTaskReleaseState(SStreamTask* pTask);
int32_t streamTaskReloadState(SStreamTask* pTask);
#ifdef __cplusplus
}
#endif

View File

@ -28,11 +28,10 @@ extern "C" {
#endif
typedef struct SStreamFileState SStreamFileState;
typedef SList SStreamSnapshot;
SStreamFileState* streamFileStateInit(int64_t memSize, uint32_t keySize, uint32_t rowSize, uint32_t selectRowSize,
GetTsFun fp, void* pFile, TSKEY delMark);
GetTsFun fp, void* pFile, TSKEY delMark, const char* id);
void streamFileStateDestroy(SStreamFileState* pFileState);
void streamFileStateClear(SStreamFileState* pFileState);
bool needClearDiskBuff(SStreamFileState* pFileState);
@ -50,6 +49,7 @@ int32_t recoverSnapshot(SStreamFileState* pFileState);
int32_t getSnapshotIdList(SStreamFileState* pFileState, SArray* list);
int32_t deleteExpiredCheckPoint(SStreamFileState* pFileState, TSKEY mark);
int32_t streamFileStateGeSelectRowSize(SStreamFileState* pFileState);
void streamFileStateReloadInfo(SStreamFileState* pFileState, TSKEY ts);
#ifdef __cplusplus
}

View File

@ -919,7 +919,7 @@ static void* tmqFreeRspWrapper(SMqRspWrapper* rspWrapper) {
} else if (rspWrapper->tmqRspType == TMQ_MSG_TYPE__EP_RSP) {
SMqAskEpRspWrapper* pEpRspWrapper = (SMqAskEpRspWrapper*)rspWrapper;
tDeleteSMqAskEpRsp(&pEpRspWrapper->msg);
} else if (rspWrapper->tmqRspType == TMQ_MSG_TYPE__POLL_RSP) {
} else if (rspWrapper->tmqRspType == TMQ_MSG_TYPE__POLL_DATA_RSP) {
SMqPollRspWrapper* pRsp = (SMqPollRspWrapper*)rspWrapper;
taosMemoryFreeClear(pRsp->pEpset);
@ -932,7 +932,7 @@ static void* tmqFreeRspWrapper(SMqRspWrapper* rspWrapper) {
taosMemoryFreeClear(pRsp->pEpset);
taosMemoryFree(pRsp->metaRsp.metaRsp);
} else if (rspWrapper->tmqRspType == TMQ_MSG_TYPE__TAOSX_RSP) {
} else if (rspWrapper->tmqRspType == TMQ_MSG_TYPE__POLL_DATA_META_RSP) {
SMqPollRspWrapper* pRsp = (SMqPollRspWrapper*)rspWrapper;
taosMemoryFreeClear(pRsp->pEpset);
@ -1407,7 +1407,7 @@ int32_t tmqPollCb(void* param, SDataBuf* pMsg, int32_t code) {
strcpy(pRspWrapper->topicName, pParam->topicName);
pMsg->pEpSet = NULL;
if (rspType == TMQ_MSG_TYPE__POLL_RSP) {
if (rspType == TMQ_MSG_TYPE__POLL_DATA_RSP) {
SDecoder decoder;
tDecoderInit(&decoder, POINTER_SHIFT(pMsg->pData, sizeof(SMqRspHead)), pMsg->len - sizeof(SMqRspHead));
tDecodeMqDataRsp(&decoder, &pRspWrapper->dataRsp);
@ -1424,7 +1424,7 @@ int32_t tmqPollCb(void* param, SDataBuf* pMsg, int32_t code) {
tDecodeMqMetaRsp(&decoder, &pRspWrapper->metaRsp);
tDecoderClear(&decoder);
memcpy(&pRspWrapper->metaRsp, pMsg->pData, sizeof(SMqRspHead));
} else if (rspType == TMQ_MSG_TYPE__TAOSX_RSP) {
} else if (rspType == TMQ_MSG_TYPE__POLL_DATA_META_RSP) {
SDecoder decoder;
tDecoderInit(&decoder, POINTER_SHIFT(pMsg->pData, sizeof(SMqRspHead)), pMsg->len - sizeof(SMqRspHead));
tDecodeSTaosxRsp(&decoder, &pRspWrapper->taosxRsp);
@ -1884,7 +1884,7 @@ static void* tmqHandleAllRsp(tmq_t* tmq, int64_t timeout, bool pollIfReset) {
terrno = TSDB_CODE_TQ_NO_COMMITTED_OFFSET;
tscError("consumer:0x%" PRIx64 " unexpected rsp from poll, code:%s", tmq->consumerId, tstrerror(terrno));
return NULL;
} else if (pRspWrapper->tmqRspType == TMQ_MSG_TYPE__POLL_RSP) {
} else if (pRspWrapper->tmqRspType == TMQ_MSG_TYPE__POLL_DATA_RSP) {
SMqPollRspWrapper* pollRspWrapper = (SMqPollRspWrapper*)pRspWrapper;
int32_t consumerEpoch = atomic_load_32(&tmq->epoch);
@ -1984,7 +1984,7 @@ static void* tmqHandleAllRsp(tmq_t* tmq, int64_t timeout, bool pollIfReset) {
pRspWrapper = tmqFreeRspWrapper(pRspWrapper);
taosFreeQitem(pollRspWrapper);
}
} else if (pRspWrapper->tmqRspType == TMQ_MSG_TYPE__TAOSX_RSP) {
} else if (pRspWrapper->tmqRspType == TMQ_MSG_TYPE__POLL_DATA_META_RSP) {
SMqPollRspWrapper* pollRspWrapper = (SMqPollRspWrapper*)pRspWrapper;
int32_t consumerEpoch = atomic_load_32(&tmq->epoch);
@ -2026,7 +2026,7 @@ static void* tmqHandleAllRsp(tmq_t* tmq, int64_t timeout, bool pollIfReset) {
void* pRsp = NULL;
int64_t numOfRows = 0;
if (pollRspWrapper->taosxRsp.createTableNum == 0) {
pRsp = tmqBuildRspFromWrapper(pollRspWrapper, pVg, &numOfRows);
tscError("consumer:0x%" PRIx64" createTableNum should > 0 if rsp type is data_meta", tmq->consumerId);
} else {
pRsp = tmqBuildTaosxRspFromWrapper(pollRspWrapper, pVg, &numOfRows);
}

View File

@ -160,7 +160,7 @@ static const SSysDbTableSchema streamSchema[] = {
static const SSysDbTableSchema streamTaskSchema[] = {
{.name = "stream_name", .bytes = SYSTABLE_SCH_DB_NAME_LEN, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = false},
{.name = "task_id", .bytes = 4, .type = TSDB_DATA_TYPE_INT, .sysInfo = false},
{.name = "task_id", .bytes = 32, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = false},
{.name = "node_type", .bytes = SYSTABLE_SCH_DB_NAME_LEN, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = false},
{.name = "node_id", .bytes = 4, .type = TSDB_DATA_TYPE_INT, .sysInfo = false},
{.name = "level", .bytes = 20 + VARSTR_HEADER_SIZE, .type = TSDB_DATA_TYPE_VARCHAR, .sysInfo = false},

View File

@ -47,7 +47,6 @@ int32_t colDataGetLength(const SColumnInfoData* pColumnInfoData, int32_t numOfRo
}
}
int32_t colDataGetRowLength(const SColumnInfoData* pColumnInfoData, int32_t rowIdx) {
if (colDataIsNull_s(pColumnInfoData, rowIdx)) return 0;
@ -67,10 +66,6 @@ int32_t colDataGetFullLength(const SColumnInfoData* pColumnInfoData, int32_t num
}
}
void colDataTrim(SColumnInfoData* pColumnInfoData) {
// TODO
}
int32_t getJsonValueLen(const char* data) {
int32_t dataLen = 0;
if (*data == TSDB_DATA_TYPE_NULL) {
@ -89,10 +84,6 @@ int32_t getJsonValueLen(const char* data) {
return dataLen;
}
int32_t colDataAppend(SColumnInfoData* pColumnInfoData, uint32_t rowIndex, const char* pData, bool isNull) {
return colDataSetVal(pColumnInfoData, rowIndex, pData, isNull);
}
int32_t colDataSetVal(SColumnInfoData* pColumnInfoData, uint32_t rowIndex, const char* pData, bool isNull) {
if (isNull) {
// There is a placehold for each NULL value of binary or nchar type.
@ -174,7 +165,7 @@ int32_t colDataReassignVal(SColumnInfoData* pColumnInfoData, uint32_t dstRowIdx,
}
int32_t colDataReserve(SColumnInfoData* pColumnInfoData, size_t newSize) {
static int32_t colDataReserve(SColumnInfoData* pColumnInfoData, size_t newSize) {
if (!IS_VAR_DATA_TYPE(pColumnInfoData->info.type)) {
return TSDB_CODE_SUCCESS;
}
@ -643,7 +634,7 @@ int32_t blockDataToBuf(char* buf, const SSDataBlock* pBlock) {
} else {
memcpy(pStart, pCol->pData, dataSize);
pStart += dataSize;
}
}
}
return 0;
@ -882,41 +873,8 @@ int32_t dataBlockCompar(const void* p1, const void* p2, const void* param) {
return 0;
}
static int32_t doAssignOneTuple(SColumnInfoData* pDstCols, int32_t numOfRows, const SSDataBlock* pSrcBlock,
int32_t tupleIndex) {
int32_t code = 0;
size_t numOfCols = taosArrayGetSize(pSrcBlock->pDataBlock);
for (int32_t i = 0; i < numOfCols; ++i) {
SColumnInfoData* pDst = &pDstCols[i];
SColumnInfoData* pSrc = taosArrayGet(pSrcBlock->pDataBlock, i);
if (pSrc->hasNull && colDataIsNull(pSrc, pSrcBlock->info.rows, tupleIndex, pSrcBlock->pBlockAgg[i])) {
code = colDataSetVal(pDst, numOfRows, NULL, true);
if (code != TSDB_CODE_SUCCESS) {
return code;
}
} else {
char* p = colDataGetData(pSrc, tupleIndex);
code = colDataSetVal(pDst, numOfRows, p, false);
if (code != TSDB_CODE_SUCCESS) {
return code;
}
}
}
return TSDB_CODE_SUCCESS;
}
static int32_t blockDataAssign(SColumnInfoData* pCols, const SSDataBlock* pDataBlock, const int32_t* index) {
#if 0
for (int32_t i = 0; i < pDataBlock->info.rows; ++i) {
int32_t code = doAssignOneTuple(pCols, i, pDataBlock, index[i]);
if (code != TSDB_CODE_SUCCESS) {
return code;
}
}
#else
size_t numOfCols = taosArrayGetSize(pDataBlock->pDataBlock);
for (int32_t i = 0; i < numOfCols; ++i) {
SColumnInfoData* pDst = &pCols[i];
@ -941,7 +899,7 @@ static int32_t blockDataAssign(SColumnInfoData* pCols, const SSDataBlock* pDataB
}
}
}
#endif
return TSDB_CODE_SUCCESS;
}
@ -1101,114 +1059,6 @@ int32_t blockDataSort(SSDataBlock* pDataBlock, SArray* pOrderInfo) {
return TSDB_CODE_SUCCESS;
}
#if 0
typedef struct SHelper {
int32_t index;
union {
char* pData;
int64_t i64;
double d64;
};
} SHelper;
SHelper* createTupleIndex_rv(int32_t numOfRows, SArray* pOrderInfo, SSDataBlock* pBlock) {
int32_t sortValLengthPerRow = 0;
int32_t numOfCols = taosArrayGetSize(pOrderInfo);
for (int32_t i = 0; i < numOfCols; ++i) {
SBlockOrderInfo* pInfo = taosArrayGet(pOrderInfo, i);
SColumnInfoData* pColInfo = taosArrayGet(pBlock->pDataBlock, pInfo->slotId);
pInfo->pColData = pColInfo;
sortValLengthPerRow += pColInfo->info.bytes;
}
size_t len = sortValLengthPerRow * pBlock->info.rows;
char* buf = taosMemoryCalloc(1, len);
SHelper* phelper = taosMemoryCalloc(numOfRows, sizeof(SHelper));
for (int32_t i = 0; i < numOfRows; ++i) {
phelper[i].index = i;
phelper[i].pData = buf + sortValLengthPerRow * i;
}
int32_t offset = 0;
for (int32_t i = 0; i < numOfCols; ++i) {
SBlockOrderInfo* pInfo = taosArrayGet(pOrderInfo, i);
for (int32_t j = 0; j < numOfRows; ++j) {
phelper[j].i64 = *(int32_t*)pInfo->pColData->pData + pInfo->pColData->info.bytes * j;
// memcpy(phelper[j].pData + offset, pInfo->pColData->pData + pInfo->pColData->info.bytes * j,
// pInfo->pColData->info.bytes);
}
offset += pInfo->pColData->info.bytes;
}
taosMemoryFree(buf);
return phelper;
}
int32_t dataBlockCompar_rv(const void* p1, const void* p2, const void* param) {
const SSDataBlockSortHelper* pHelper = (const SSDataBlockSortHelper*)param;
SHelper* left = (SHelper*)p1;
SHelper* right = (SHelper*)p2;
SArray* pInfo = pHelper->orderInfo;
int32_t offset = 0;
int32_t leftx = *(int32_t*)left->pData; //*(int32_t*)(left->pData + offset);
int32_t rightx = *(int32_t*)right->pData; //*(int32_t*)(right->pData + offset);
if (leftx == rightx) {
return 0;
} else {
return (leftx < rightx) ? -1 : 1;
}
return 0;
}
int32_t blockDataSort_rv(SSDataBlock* pDataBlock, SArray* pOrderInfo, bool nullFirst) {
// Allocate the additional buffer.
int64_t p0 = taosGetTimestampUs();
SSDataBlockSortHelper helper = {.pDataBlock = pDataBlock, .orderInfo = pOrderInfo};
uint32_t rows = pDataBlock->info.rows;
SHelper* index = createTupleIndex_rv(rows, helper.orderInfo, pDataBlock);
if (index == NULL) {
terrno = TSDB_CODE_OUT_OF_MEMORY;
return terrno;
}
taosqsort(index, rows, sizeof(SHelper), &helper, dataBlockCompar_rv);
int64_t p1 = taosGetTimestampUs();
SColumnInfoData* pCols = createHelpColInfoData(pDataBlock);
if (pCols == NULL) {
terrno = TSDB_CODE_OUT_OF_MEMORY;
return terrno;
}
int64_t p2 = taosGetTimestampUs();
// int32_t code = blockDataAssign(pCols, pDataBlock, index);
// if (code != TSDB_CODE_SUCCESS) {
// terrno = code;
// return code;
// }
int64_t p3 = taosGetTimestampUs();
copyBackToBlock(pDataBlock, pCols);
int64_t p4 = taosGetTimestampUs();
printf("sort:%" PRId64 ", create:%" PRId64 ", assign:%" PRId64 ", copyback:%" PRId64 ", rows:%d\n", p1 - p0, p2 - p1,
p3 - p2, p4 - p3, rows);
// destroyTupleIndex(index);
return 0;
}
#endif
void blockDataCleanup(SSDataBlock* pDataBlock) {
blockDataEmpty(pDataBlock);
SDataBlockInfo* pInfo = &pDataBlock->info;
@ -1345,8 +1195,7 @@ void blockDataFreeRes(SSDataBlock* pBlock) {
colDataDestroy(pColInfoData);
}
taosArrayDestroy(pBlock->pDataBlock);
pBlock->pDataBlock = NULL;
pBlock->pDataBlock = taosArrayDestroy(pBlock->pDataBlock);
taosMemoryFreeClear(pBlock->pBlockAgg);
memset(&pBlock->info, 0, sizeof(SDataBlockInfo));
}
@ -1361,6 +1210,7 @@ void* blockDataDestroy(SSDataBlock* pBlock) {
return NULL;
}
// todo remove it
int32_t assignOneDataBlock(SSDataBlock* dst, const SSDataBlock* src) {
dst->info = src->info;
dst->info.rows = 0;
@ -1759,16 +1609,6 @@ static void colDataKeepFirstNRows(SColumnInfoData* pColInfoData, size_t n, size_
if (IS_VAR_DATA_TYPE(pColInfoData->info.type)) {
// pColInfoData->varmeta.length = colDataMoveVarData(pColInfoData, 0, n);
memset(&pColInfoData->varmeta.offset[n], 0, total - n);
} else { // reset the bitmap value
/*int32_t stopIndex = BitmapLen(n) * 8;
for(int32_t i = n; i < stopIndex; ++i) {
colDataClearNull_f(pColInfoData->nullbitmap, i);
}
int32_t remain = BitmapLen(total) - BitmapLen(n);
if (remain > 0) {
memset(pColInfoData->nullbitmap+BitmapLen(n), 0, remain);
}*/
}
}
@ -1875,32 +1715,6 @@ void* tDecodeDataBlock(const void* buf, SSDataBlock* pBlock) {
return (void*)buf;
}
int32_t tEncodeDataBlocks(void** buf, const SArray* blocks) {
int32_t tlen = 0;
int32_t sz = taosArrayGetSize(blocks);
tlen += taosEncodeFixedI32(buf, sz);
for (int32_t i = 0; i < sz; i++) {
SSDataBlock* pBlock = taosArrayGet(blocks, i);
tlen += tEncodeDataBlock(buf, pBlock);
}
return tlen;
}
void* tDecodeDataBlocks(const void* buf, SArray** blocks) {
int32_t sz;
buf = taosDecodeFixedI32(buf, &sz);
*blocks = taosArrayInit(sz, sizeof(SSDataBlock));
for (int32_t i = 0; i < sz; i++) {
SSDataBlock pBlock = {0};
buf = tDecodeDataBlock(buf, &pBlock);
taosArrayPush(*blocks, &pBlock);
}
return (void*)buf;
}
static char* formatTimestamp(char* buf, int64_t val, int precision) {
time_t tt;
int32_t ms = 0;
@ -1950,101 +1764,6 @@ static char* formatTimestamp(char* buf, int64_t val, int precision) {
return buf;
}
#if 0
void blockDebugShowDataBlock(SSDataBlock* pBlock, const char* flag) {
SArray* dataBlocks = taosArrayInit(1, sizeof(SSDataBlock*));
taosArrayPush(dataBlocks, &pBlock);
blockDebugShowDataBlocks(dataBlocks, flag);
taosArrayDestroy(dataBlocks);
}
void blockDebugShowDataBlocks(const SArray* dataBlocks, const char* flag) {
char pBuf[128] = {0};
int32_t sz = taosArrayGetSize(dataBlocks);
for (int32_t i = 0; i < sz; i++) {
SSDataBlock* pDataBlock = taosArrayGet(dataBlocks, i);
size_t numOfCols = taosArrayGetSize(pDataBlock->pDataBlock);
int32_t rows = pDataBlock->info.rows;
printf("%s |block ver %" PRIi64 " |block type %d |child id %d|group id %" PRIu64 "\n", flag,
pDataBlock->info.version, (int32_t)pDataBlock->info.type, pDataBlock->info.childId,
pDataBlock->info.id.groupId);
for (int32_t j = 0; j < rows; j++) {
printf("%s |", flag);
for (int32_t k = 0; k < numOfCols; k++) {
SColumnInfoData* pColInfoData = taosArrayGet(pDataBlock->pDataBlock, k);
void* var = POINTER_SHIFT(pColInfoData->pData, j * pColInfoData->info.bytes);
if (k == 0) {
printf("cols:%d |", (int32_t)numOfCols);
}
if (colDataIsNull(pColInfoData, rows, j, NULL)) {
printf(" %15s |", "NULL");
continue;
}
switch (pColInfoData->info.type) {
case TSDB_DATA_TYPE_TIMESTAMP:
formatTimestamp(pBuf, *(uint64_t*)var, TSDB_TIME_PRECISION_MILLI);
printf(" %25s |", pBuf);
break;
case TSDB_DATA_TYPE_BOOL:
printf(" %15" PRIi8 " |", *(int8_t*)var);
break;
case TSDB_DATA_TYPE_TINYINT:
printf(" %15" PRIi8 " |", *(int8_t*)var);
break;
case TSDB_DATA_TYPE_SMALLINT:
printf(" %15" PRIi16 " |", *(int16_t*)var);
break;
case TSDB_DATA_TYPE_INT:
printf(" %15d |", *(int32_t*)var);
break;
case TSDB_DATA_TYPE_UTINYINT:
printf(" %15" PRIu8 " |", *(uint8_t*)var);
break;
case TSDB_DATA_TYPE_USMALLINT:
printf(" %15" PRIu16 " |", *(uint16_t*)var);
break;
case TSDB_DATA_TYPE_UINT:
printf(" %15u |", *(uint32_t*)var);
break;
case TSDB_DATA_TYPE_BIGINT:
printf(" %15" PRId64 " |", *(int64_t*)var);
break;
case TSDB_DATA_TYPE_UBIGINT:
printf(" %15" PRIu64 " |", *(uint64_t*)var);
break;
case TSDB_DATA_TYPE_FLOAT:
printf(" %15f |", *(float*)var);
break;
case TSDB_DATA_TYPE_DOUBLE:
printf(" %15lf |", *(double*)var);
break;
case TSDB_DATA_TYPE_VARCHAR:
case TSDB_DATA_TYPE_GEOMETRY: {
char* pData = colDataGetVarData(pColInfoData, j);
int32_t dataSize = TMIN(sizeof(pBuf) - 1, varDataLen(pData));
memset(pBuf, 0, dataSize + 1);
strncpy(pBuf, varDataVal(pData), dataSize);
printf(" %15s |", pBuf);
} break;
case TSDB_DATA_TYPE_NCHAR: {
char* pData = colDataGetVarData(pColInfoData, j);
int32_t dataSize = TMIN(sizeof(pBuf), varDataLen(pData));
memset(pBuf, 0, dataSize);
(void)taosUcs4ToMbs((TdUcs4*)varDataVal(pData), dataSize, pBuf);
printf(" %15s |", pBuf);
} break;
default:
break;
}
}
printf("\n");
}
}
}
#endif
// for debug
char* dumpBlockData(SSDataBlock* pDataBlock, const char* flag, char** pDataBuf) {
int32_t size = 2048*1024;
@ -2153,182 +1872,6 @@ char* dumpBlockData(SSDataBlock* pDataBlock, const char* flag, char** pDataBuf)
return dumpBuf;
}
/**
* @brief TODO: Assume that the final generated result it less than 3M
*
* @param pReq
* @param pDataBlocks
* @param vgId
* @param suid
*
*/
#if 0
int32_t buildSubmitReqFromDataBlock(SSubmitReq** pReq, const SSDataBlock* pDataBlock, STSchema* pTSchema, int32_t vgId,
tb_uid_t suid) {
int32_t bufSize = sizeof(SSubmitReq);
int32_t sz = 1;
for (int32_t i = 0; i < sz; ++i) {
const SDataBlockInfo* pBlkInfo = &pDataBlock->info;
int32_t colNum = taosArrayGetSize(pDataBlock->pDataBlock);
bufSize += pBlkInfo->rows * (TD_ROW_HEAD_LEN + pBlkInfo->rowSize + BitmapLen(colNum));
bufSize += sizeof(SSubmitBlk);
}
*pReq = taosMemoryCalloc(1, bufSize);
if (!(*pReq)) {
terrno = TSDB_CODE_OUT_OF_MEMORY;
return TSDB_CODE_FAILED;
}
void* pDataBuf = *pReq;
int32_t msgLen = sizeof(SSubmitReq);
int32_t numOfBlks = 0;
SRowBuilder rb = {0};
tdSRowInit(&rb, pTSchema->version);
for (int32_t i = 0; i < sz; ++i) {
int32_t colNum = taosArrayGetSize(pDataBlock->pDataBlock);
int32_t rows = pDataBlock->info.rows;
if (colNum <= 1) {
// invalid if only with TS col
continue;
}
if (rb.nCols != colNum) {
tdSRowSetTpInfo(&rb, colNum, pTSchema->flen);
}
SSubmitBlk* pSubmitBlk = POINTER_SHIFT(pDataBuf, msgLen);
pSubmitBlk->suid = suid;
pSubmitBlk->uid = pDataBlock->info.id.groupId;
pSubmitBlk->numOfRows = rows;
pSubmitBlk->sversion = pTSchema->version;
msgLen += sizeof(SSubmitBlk);
int32_t dataLen = 0;
for (int32_t j = 0; j < rows; ++j) { // iterate by row
tdSRowResetBuf(&rb, POINTER_SHIFT(pDataBuf, msgLen + dataLen)); // set row buf
bool isStartKey = false;
int32_t offset = 0;
for (int32_t k = 0; k < colNum; ++k) { // iterate by column
SColumnInfoData* pColInfoData = taosArrayGet(pDataBlock->pDataBlock, k);
STColumn* pCol = &pTSchema->columns[k];
void* var = POINTER_SHIFT(pColInfoData->pData, j * pColInfoData->info.bytes);
switch (pColInfoData->info.type) {
case TSDB_DATA_TYPE_TIMESTAMP:
if (!isStartKey) {
isStartKey = true;
tdAppendColValToRow(&rb, PRIMARYKEY_TIMESTAMP_COL_ID, TSDB_DATA_TYPE_TIMESTAMP, TD_VTYPE_NORM, var, true,
offset, k);
continue; // offset should keep 0 for next column
} else if (colDataIsNull_s(pColInfoData, j)) {
tdAppendColValToRow(&rb, PRIMARYKEY_TIMESTAMP_COL_ID + k, TSDB_DATA_TYPE_TIMESTAMP, TD_VTYPE_NULL, NULL,
false, offset, k);
} else {
tdAppendColValToRow(&rb, PRIMARYKEY_TIMESTAMP_COL_ID + k, TSDB_DATA_TYPE_TIMESTAMP, TD_VTYPE_NORM, var,
true, offset, k);
}
break;
case TSDB_DATA_TYPE_NCHAR:
case TSDB_DATA_TYPE_VARCHAR: // TSDB_DATA_TYPE_BINARY
case TSDB_DATA_TYPE_GEOMETRY: {
if (colDataIsNull_s(pColInfoData, j)) {
tdAppendColValToRow(&rb, PRIMARYKEY_TIMESTAMP_COL_ID + k, pColInfoData->info.type, TD_VTYPE_NULL, NULL,
false, offset, k);
} else {
void* data = colDataGetData(pColInfoData, j);
tdAppendColValToRow(&rb, PRIMARYKEY_TIMESTAMP_COL_ID + k, pColInfoData->info.type, TD_VTYPE_NORM, data,
true, offset, k);
}
break;
}
case TSDB_DATA_TYPE_VARBINARY:
case TSDB_DATA_TYPE_DECIMAL:
case TSDB_DATA_TYPE_BLOB:
case TSDB_DATA_TYPE_JSON:
case TSDB_DATA_TYPE_MEDIUMBLOB:
uError("the column type %" PRIi16 " is defined but not implemented yet", pColInfoData->info.type);
break;
default:
if (pColInfoData->info.type < TSDB_DATA_TYPE_MAX && pColInfoData->info.type > TSDB_DATA_TYPE_NULL) {
if (colDataIsNull_s(pColInfoData, j)) {
tdAppendColValToRow(&rb, PRIMARYKEY_TIMESTAMP_COL_ID + k, pCol->type, TD_VTYPE_NULL, NULL, false,
offset, k);
} else if (pCol->type == pColInfoData->info.type) {
tdAppendColValToRow(&rb, PRIMARYKEY_TIMESTAMP_COL_ID + k, pCol->type, TD_VTYPE_NORM, var, true, offset,
k);
} else {
char tv[8] = {0};
if (pColInfoData->info.type == TSDB_DATA_TYPE_FLOAT) {
float v = 0;
GET_TYPED_DATA(v, float, pColInfoData->info.type, var);
SET_TYPED_DATA(&tv, pCol->type, v);
} else if (pColInfoData->info.type == TSDB_DATA_TYPE_DOUBLE) {
double v = 0;
GET_TYPED_DATA(v, double, pColInfoData->info.type, var);
SET_TYPED_DATA(&tv, pCol->type, v);
} else if (IS_SIGNED_NUMERIC_TYPE(pColInfoData->info.type)) {
int64_t v = 0;
GET_TYPED_DATA(v, int64_t, pColInfoData->info.type, var);
SET_TYPED_DATA(&tv, pCol->type, v);
} else {
uint64_t v = 0;
GET_TYPED_DATA(v, uint64_t, pColInfoData->info.type, var);
SET_TYPED_DATA(&tv, pCol->type, v);
}
tdAppendColValToRow(&rb, PRIMARYKEY_TIMESTAMP_COL_ID + k, pCol->type, TD_VTYPE_NORM, tv, true, offset,
k);
}
} else {
uError("the column type %" PRIi16 " is undefined\n", pColInfoData->info.type);
}
break;
}
offset += TYPE_BYTES[pCol->type]; // sum/avg would convert to int64_t/uint64_t/double during aggregation
}
tdSRowEnd(&rb);
dataLen += TD_ROW_LEN(rb.pBuf);
#ifdef TD_DEBUG_PRINT_ROW
tdSRowPrint(rb.pBuf, pTSchema, __func__);
#endif
}
++numOfBlks;
pSubmitBlk->dataLen = dataLen;
msgLen += pSubmitBlk->dataLen;
}
if (numOfBlks > 0) {
(*pReq)->length = msgLen;
(*pReq)->header.vgId = htonl(vgId);
(*pReq)->header.contLen = htonl(msgLen);
(*pReq)->length = (*pReq)->header.contLen;
(*pReq)->numOfBlocks = htonl(numOfBlks);
SSubmitBlk* blk = (SSubmitBlk*)((*pReq) + 1);
while (numOfBlks--) {
int32_t dataLen = blk->dataLen;
blk->uid = htobe64(blk->uid);
blk->suid = htobe64(blk->suid);
blk->sversion = htonl(blk->sversion);
blk->dataLen = htonl(blk->dataLen);
blk->schemaLen = htonl(blk->schemaLen);
blk->numOfRows = htonl(blk->numOfRows);
blk = (SSubmitBlk*)(blk->data + dataLen);
}
} else {
// no valid rows
taosMemoryFreeClear(*pReq);
}
return TSDB_CODE_SUCCESS;
}
#endif
int32_t buildSubmitReqFromDataBlock(SSubmitReq2** ppReq, const SSDataBlock* pDataBlock, const STSchema* pTSchema,
int64_t uid, int32_t vgId, tb_uid_t suid) {
SSubmitReq2* pReq = *ppReq;
@ -2732,3 +2275,149 @@ const char* blockDecode(SSDataBlock* pBlock, const char* pData) {
ASSERT(pStart - pData == dataLen);
return pStart;
}
void trimDataBlock(SSDataBlock* pBlock, int32_t totalRows, const bool* pBoolList) {
// int32_t totalRows = pBlock->info.rows;
int32_t bmLen = BitmapLen(totalRows);
char* pBitmap = NULL;
int32_t maxRows = 0;
size_t numOfCols = taosArrayGetSize(pBlock->pDataBlock);
for (int32_t i = 0; i < numOfCols; ++i) {
SColumnInfoData* pDst = taosArrayGet(pBlock->pDataBlock, i);
// it is a reserved column for scalar function, and no data in this column yet.
if (pDst->pData == NULL) {
continue;
}
int32_t numOfRows = 0;
if (IS_VAR_DATA_TYPE(pDst->info.type)) {
int32_t j = 0;
pDst->varmeta.length = 0;
while (j < totalRows) {
if (pBoolList[j] == 0) {
j += 1;
continue;
}
if (colDataIsNull_var(pDst, j)) {
colDataSetNull_var(pDst, numOfRows);
} else {
// fix address sanitizer error. p1 may point to memory that will change during realloc of colDataSetVal, first copy it to p2
char* p1 = colDataGetVarData(pDst, j);
int32_t len = 0;
if (pDst->info.type == TSDB_DATA_TYPE_JSON) {
len = getJsonValueLen(p1);
} else {
len = varDataTLen(p1);
}
char* p2 = taosMemoryMalloc(len);
memcpy(p2, p1, len);
colDataSetVal(pDst, numOfRows, p2, false);
taosMemoryFree(p2);
}
numOfRows += 1;
j += 1;
}
if (maxRows < numOfRows) {
maxRows = numOfRows;
}
} else {
if (pBitmap == NULL) {
pBitmap = taosMemoryCalloc(1, bmLen);
}
memcpy(pBitmap, pDst->nullbitmap, bmLen);
memset(pDst->nullbitmap, 0, bmLen);
int32_t j = 0;
switch (pDst->info.type) {
case TSDB_DATA_TYPE_BIGINT:
case TSDB_DATA_TYPE_UBIGINT:
case TSDB_DATA_TYPE_DOUBLE:
case TSDB_DATA_TYPE_TIMESTAMP:
while (j < totalRows) {
if (pBoolList[j] == 0) {
j += 1;
continue;
}
if (colDataIsNull_f(pBitmap, j)) {
colDataSetNull_f(pDst->nullbitmap, numOfRows);
} else {
((int64_t*)pDst->pData)[numOfRows] = ((int64_t*)pDst->pData)[j];
}
numOfRows += 1;
j += 1;
}
break;
case TSDB_DATA_TYPE_FLOAT:
case TSDB_DATA_TYPE_INT:
case TSDB_DATA_TYPE_UINT:
while (j < totalRows) {
if (pBoolList[j] == 0) {
j += 1;
continue;
}
if (colDataIsNull_f(pBitmap, j)) {
colDataSetNull_f(pDst->nullbitmap, numOfRows);
} else {
((int32_t*)pDst->pData)[numOfRows] = ((int32_t*)pDst->pData)[j];
}
numOfRows += 1;
j += 1;
}
break;
case TSDB_DATA_TYPE_SMALLINT:
case TSDB_DATA_TYPE_USMALLINT:
while (j < totalRows) {
if (pBoolList[j] == 0) {
j += 1;
continue;
}
if (colDataIsNull_f(pBitmap, j)) {
colDataSetNull_f(pDst->nullbitmap, numOfRows);
} else {
((int16_t*)pDst->pData)[numOfRows] = ((int16_t*)pDst->pData)[j];
}
numOfRows += 1;
j += 1;
}
break;
case TSDB_DATA_TYPE_BOOL:
case TSDB_DATA_TYPE_TINYINT:
case TSDB_DATA_TYPE_UTINYINT:
while (j < totalRows) {
if (pBoolList[j] == 0) {
j += 1;
continue;
}
if (colDataIsNull_f(pBitmap, j)) {
colDataSetNull_f(pDst->nullbitmap, numOfRows);
} else {
((int8_t*)pDst->pData)[numOfRows] = ((int8_t*)pDst->pData)[j];
}
numOfRows += 1;
j += 1;
}
break;
}
}
if (maxRows < numOfRows) {
maxRows = numOfRows;
}
}
pBlock->info.rows = maxRows;
if (pBitmap != NULL) {
taosMemoryFree(pBitmap);
}
}
int32_t blockGetEncodeSize(const SSDataBlock* pBlock) {
return blockDataGetSerialMetaSize(taosArrayGetSize(pBlock->pDataBlock)) + blockDataGetSize(pBlock);
}

View File

@ -111,7 +111,7 @@ int32_t tmqMaxTopicNum = 20;
int32_t tsQueryPolicy = 1;
int32_t tsQueryRspPolicy = 0;
int64_t tsQueryMaxConcurrentTables = 200; // unit is TSDB_TABLE_NUM_UNIT
bool tsEnableQueryHb = false;
bool tsEnableQueryHb = true;
bool tsEnableScience = false; // on taos-cli show float and doulbe with scientific notation if true
bool tsTtlChangeOnWrite = false; // ttl delete time changes on last write if true
int32_t tsQuerySmaOptimize = 0;

View File

@ -76,6 +76,9 @@ SArray *smGetMsgHandles() {
if (dmSetMgmtHandle(pArray, TDMT_STREAM_RETRIEVE_RSP, smPutNodeMsgToStreamQueue, 1) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_PAUSE, smPutNodeMsgToMgmtQueue, 1) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_RESUME, smPutNodeMsgToMgmtQueue, 1) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_CHECK, smPutNodeMsgToStreamQueue, 1) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_CHECK_RSP, smPutNodeMsgToStreamQueue, 1) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_STREAM_SCAN_HISTORY_FINISH, smPutNodeMsgToStreamQueue, 1) == NULL) goto _OVER;
code = 0;
_OVER:

View File

@ -746,9 +746,10 @@ SArray *vmGetMsgHandles() {
if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_DISPATCH_RSP, vmPutMsgToStreamQueue, 0) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_STREAM_RETRIEVE, vmPutMsgToStreamQueue, 0) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_STREAM_RETRIEVE_RSP, vmPutMsgToStreamQueue, 0) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_STREAM_RECOVER_FINISH, vmPutMsgToStreamQueue, 0) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_STREAM_SCAN_HISTORY_FINISH, vmPutMsgToStreamQueue, 0) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_STREAM_TRANSFER_STATE, vmPutMsgToStreamQueue, 0) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_CHECK, vmPutMsgToStreamQueue, 0) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_CHECK_RSP, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_CHECK_RSP, vmPutMsgToStreamQueue, 0) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_VND_STREAM_TRIGGER, vmPutMsgToStreamQueue, 0) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_PAUSE, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER;
if (dmSetMgmtHandle(pArray, TDMT_STREAM_TASK_RESUME, vmPutMsgToWriteQueue, 0) == NULL) goto _OVER;

View File

@ -92,7 +92,7 @@ static void vmProcessStreamQueue(SQueueInfo *pInfo, SRpcMsg *pMsg) {
const STraceId *trace = &pMsg->info.traceId;
dGTrace("vgId:%d, msg:%p get from vnode-stream queue", pVnode->vgId, pMsg);
int32_t code = vnodeProcessFetchMsg(pVnode->pImpl, pMsg, pInfo);
int32_t code = vnodeProcessStreamMsg(pVnode->pImpl, pMsg, pInfo);
if (code != 0) {
if (terrno != 0) code = terrno;
dGError("vgId:%d, msg:%p failed to process stream msg %s since %s", pVnode->vgId, pMsg, TMSG_INFO(pMsg->msgType),

View File

@ -647,6 +647,14 @@ typedef struct {
// SMqSubActionLogEntry* pLogEntry;
} SMqRebOutputObj;
typedef struct SStreamConf {
int8_t igExpired;
int8_t trigger;
int8_t fillHistory;
int64_t triggerParam;
int64_t watermark;
} SStreamConf;
typedef struct {
char name[TSDB_STREAM_FNAME_LEN];
// ctl
@ -660,12 +668,7 @@ typedef struct {
// info
int64_t uid;
int8_t status;
// config
int8_t igExpired;
int8_t trigger;
int8_t fillHistory;
int64_t triggerParam;
int64_t watermark;
SStreamConf conf;
// source and target
int64_t sourceDbUid;
int64_t targetDbUid;
@ -675,14 +678,18 @@ typedef struct {
int64_t targetStbUid;
// fixedSinkVg is not applicable for encode and decode
SVgObj fixedSinkVg;
SVgObj fixedSinkVg;
int32_t fixedSinkVgId; // 0 for shuffle
// transformation
char* sql;
char* ast;
char* physicalPlan;
SArray* tasks; // SArray<SArray<SStreamTask>>
SArray* tasks; // SArray<SArray<SStreamTask>>
SArray* pHTasksList; // generate the results for already stored ts data
int64_t hTaskUid; // stream task for history ts data
SSchemaWrapper outputSchema;
SSchemaWrapper tagSchema;

View File

@ -30,7 +30,7 @@ int32_t mndSchedInitSubEp(SMnode* pMnode, const SMqTopicObj* pTopic, SMqSubscrib
int32_t mndConvertRsmaTask(char** pDst, int32_t* pDstLen, const char* ast, int64_t uid, int8_t triggerType,
int64_t watermark, int64_t deleteMark);
int32_t mndScheduleStream(SMnode* pMnode, SStreamObj* pStream);
int32_t mndScheduleStream(SMnode* pMnode, SStreamObj* pStream, int64_t nextWindowSkey);
#ifdef __cplusplus
}

View File

@ -30,11 +30,11 @@ int32_t tEncodeSStreamObj(SEncoder *pEncoder, const SStreamObj *pObj) {
if (tEncodeI64(pEncoder, pObj->uid) < 0) return -1;
if (tEncodeI8(pEncoder, pObj->status) < 0) return -1;
if (tEncodeI8(pEncoder, pObj->igExpired) < 0) return -1;
if (tEncodeI8(pEncoder, pObj->trigger) < 0) return -1;
if (tEncodeI8(pEncoder, pObj->fillHistory) < 0) return -1;
if (tEncodeI64(pEncoder, pObj->triggerParam) < 0) return -1;
if (tEncodeI64(pEncoder, pObj->watermark) < 0) return -1;
if (tEncodeI8(pEncoder, pObj->conf.igExpired) < 0) return -1;
if (tEncodeI8(pEncoder, pObj->conf.trigger) < 0) return -1;
if (tEncodeI8(pEncoder, pObj->conf.fillHistory) < 0) return -1;
if (tEncodeI64(pEncoder, pObj->conf.triggerParam) < 0) return -1;
if (tEncodeI64(pEncoder, pObj->conf.watermark) < 0) return -1;
if (tEncodeI64(pEncoder, pObj->sourceDbUid) < 0) return -1;
if (tEncodeI64(pEncoder, pObj->targetDbUid) < 0) return -1;
@ -97,11 +97,11 @@ int32_t tDecodeSStreamObj(SDecoder *pDecoder, SStreamObj *pObj, int32_t sver) {
if (tDecodeI64(pDecoder, &pObj->uid) < 0) return -1;
if (tDecodeI8(pDecoder, &pObj->status) < 0) return -1;
if (tDecodeI8(pDecoder, &pObj->igExpired) < 0) return -1;
if (tDecodeI8(pDecoder, &pObj->trigger) < 0) return -1;
if (tDecodeI8(pDecoder, &pObj->fillHistory) < 0) return -1;
if (tDecodeI64(pDecoder, &pObj->triggerParam) < 0) return -1;
if (tDecodeI64(pDecoder, &pObj->watermark) < 0) return -1;
if (tDecodeI8(pDecoder, &pObj->conf.igExpired) < 0) return -1;
if (tDecodeI8(pDecoder, &pObj->conf.trigger) < 0) return -1;
if (tDecodeI8(pDecoder, &pObj->conf.fillHistory) < 0) return -1;
if (tDecodeI64(pDecoder, &pObj->conf.triggerParam) < 0) return -1;
if (tDecodeI64(pDecoder, &pObj->conf.watermark) < 0) return -1;
if (tDecodeI64(pDecoder, &pObj->sourceDbUid) < 0) return -1;
if (tDecodeI64(pDecoder, &pObj->targetDbUid) < 0) return -1;
@ -154,18 +154,10 @@ int32_t tDecodeSStreamObj(SDecoder *pDecoder, SStreamObj *pObj, int32_t sver) {
return 0;
}
void tFreeStreamObj(SStreamObj *pStream) {
taosMemoryFree(pStream->sql);
taosMemoryFree(pStream->ast);
taosMemoryFree(pStream->physicalPlan);
if (pStream->outputSchema.nCols) {
taosMemoryFree(pStream->outputSchema.pSchema);
}
int32_t sz = taosArrayGetSize(pStream->tasks);
for (int32_t i = 0; i < sz; i++) {
SArray *pLevel = taosArrayGetP(pStream->tasks, i);
static void* freeStreamTasks(SArray* pTaskLevel) {
int32_t numOfLevel = taosArrayGetSize(pTaskLevel);
for (int32_t i = 0; i < numOfLevel; i++) {
SArray *pLevel = taosArrayGetP(pTaskLevel, i);
int32_t taskSz = taosArrayGetSize(pLevel);
for (int32_t j = 0; j < taskSz; j++) {
SStreamTask *pTask = taosArrayGetP(pLevel, j);
@ -175,7 +167,20 @@ void tFreeStreamObj(SStreamObj *pStream) {
taosArrayDestroy(pLevel);
}
taosArrayDestroy(pStream->tasks);
return taosArrayDestroy(pTaskLevel);
}
void tFreeStreamObj(SStreamObj *pStream) {
taosMemoryFree(pStream->sql);
taosMemoryFree(pStream->ast);
taosMemoryFree(pStream->physicalPlan);
if (pStream->outputSchema.nCols || pStream->outputSchema.pSchema) {
taosMemoryFree(pStream->outputSchema.pSchema);
}
pStream->tasks = freeStreamTasks(pStream->tasks);
pStream->pHTasksList = freeStreamTasks(pStream->pHTasksList);
// tagSchema.pSchema
if (pStream->tagSchema.nCols > 0) {

View File

@ -367,10 +367,10 @@ void dumpStream(SSdb *pSdb, SJson *json) {
tjsonAddStringToObject(item, "smaId", i642str(pObj->smaId));
tjsonAddStringToObject(item, "uid", i642str(pObj->uid));
tjsonAddStringToObject(item, "status", i642str(pObj->status));
tjsonAddStringToObject(item, "igExpired", i642str(pObj->igExpired));
tjsonAddStringToObject(item, "trigger", i642str(pObj->trigger));
tjsonAddStringToObject(item, "triggerParam", i642str(pObj->triggerParam));
tjsonAddStringToObject(item, "watermark", i642str(pObj->watermark));
tjsonAddStringToObject(item, "igExpired", i642str(pObj->conf.igExpired));
tjsonAddStringToObject(item, "trigger", i642str(pObj->conf.trigger));
tjsonAddStringToObject(item, "triggerParam", i642str(pObj->conf.triggerParam));
tjsonAddStringToObject(item, "watermark", i642str(pObj->conf.watermark));
tjsonAddStringToObject(item, "sourceDbUid", i642str(pObj->sourceDbUid));
tjsonAddStringToObject(item, "targetDbUid", i642str(pObj->targetDbUid));
tjsonAddStringToObject(item, "sourceDb", mndGetDbStr(pObj->sourceDb));

View File

@ -542,32 +542,32 @@ int32_t mndRetrieveTagIdx(SRpcMsg *pReq, SShowObj *pShow, SSDataBlock *pBlock, i
STR_TO_VARSTR(n3, (char *)tNameGetTableName(&stbName));
SColumnInfoData *pColInfo = taosArrayGet(pBlock->pDataBlock, cols++);
colDataAppend(pColInfo, numOfRows, (const char *)n1, false);
colDataSetVal(pColInfo, numOfRows, (const char *)n1, false);
pColInfo = taosArrayGet(pBlock->pDataBlock, cols++);
colDataAppend(pColInfo, numOfRows, (const char *)n2, false);
colDataSetVal(pColInfo, numOfRows, (const char *)n2, false);
pColInfo = taosArrayGet(pBlock->pDataBlock, cols++);
colDataAppend(pColInfo, numOfRows, (const char *)n3, false);
colDataSetVal(pColInfo, numOfRows, (const char *)n3, false);
pColInfo = taosArrayGet(pBlock->pDataBlock, cols++);
colDataAppend(pColInfo, numOfRows, (const char *)&invalid, false);
colDataSetVal(pColInfo, numOfRows, (const char *)&invalid, false);
pColInfo = taosArrayGet(pBlock->pDataBlock, cols++);
colDataAppend(pColInfo, numOfRows, (const char *)&pIdx->createdTime, false);
colDataSetVal(pColInfo, numOfRows, (const char *)&pIdx->createdTime, false);
char col[TSDB_TABLE_FNAME_LEN + VARSTR_HEADER_SIZE] = {0};
STR_TO_VARSTR(col, (char *)pIdx->colName);
pColInfo = taosArrayGet(pBlock->pDataBlock, cols++);
colDataAppend(pColInfo, numOfRows, (const char *)col, false);
colDataSetVal(pColInfo, numOfRows, (const char *)col, false);
pColInfo = taosArrayGet(pBlock->pDataBlock, cols++);
char tag[TSDB_TABLE_FNAME_LEN + VARSTR_HEADER_SIZE] = {0};
STR_TO_VARSTR(tag, (char *)"tag_index");
colDataAppend(pColInfo, numOfRows, (const char *)tag, false);
colDataSetVal(pColInfo, numOfRows, (const char *)tag, false);
numOfRows++;
sdbRelease(pSdb, pIdx);

View File

@ -842,7 +842,7 @@ static int32_t packQueriesIntoBlock(SShowObj* pShow, SConnObj* pConn, SSDataBloc
}
varDataLen(subStatus) = strlen(&subStatus[VARSTR_HEADER_SIZE]);
pColInfo = taosArrayGet(pBlock->pDataBlock, cols++);
colDataSetVal(pColInfo, curRowIndex, subStatus, false);
colDataSetVal(pColInfo, curRowIndex, subStatus, (varDataLen(subStatus) == 0) ? true : false);
char sql[TSDB_SHOW_SQL_LEN + VARSTR_HEADER_SIZE] = {0};
STR_TO_VARSTR(sql, pQuery->sql);

View File

@ -22,10 +22,12 @@
#include "tname.h"
#include "tuuid.h"
#define SINK_NODE_LEVEL (0)
extern bool tsDeployOnSnode;
static int32_t mndAddSinkTaskToStream(SStreamObj* pStream, SMnode* pMnode, int32_t vgId, SVgObj* pVgroup);
static void setFixedDownstreamEpInfo(SStreamTask* pDstTask, const SStreamTask* pTask);
static int32_t mndAddSinkTaskToStream(SStreamObj* pStream, SArray* pTaskList, SMnode* pMnode, int32_t vgId,
SVgObj* pVgroup, int32_t fillHistory);
static void setFixedDownstreamEpInfo(SStreamTask* pDstTask, const SStreamTask* pTask);
int32_t mndConvertRsmaTask(char** pDst, int32_t* pDstLen, const char* ast, int64_t uid, int8_t triggerType,
int64_t watermark, int64_t deleteMark) {
@ -100,18 +102,16 @@ int32_t mndSetSinkTaskInfo(SStreamObj* pStream, SStreamTask* pTask) {
return 0;
}
#define SINK_NODE_LEVEL (0)
int32_t mndAddDispatcherForInnerTask(SMnode* pMnode, SStreamObj* pStream, SStreamTask* pTask) {
int32_t mndAddDispatcherForInternalTask(SMnode* pMnode, SStreamObj* pStream, SArray* pSinkNodeList,
SStreamTask* pTask) {
bool isShuffle = false;
if (pStream->fixedSinkVgId == 0) {
SDbObj* pDb = mndAcquireDb(pMnode, pStream->targetDb);
if (pDb != NULL && pDb->cfg.numOfVgroups > 1) {
isShuffle = true;
pTask->outputType = TASK_OUTPUT__SHUFFLE_DISPATCH;
pTask->dispatchMsgType = TDMT_STREAM_TASK_DISPATCH;
pTask->msgInfo.msgType = TDMT_STREAM_TASK_DISPATCH;
if (mndExtractDbInfo(pMnode, pDb, &pTask->shuffleDispatcher.dbInfo, NULL) < 0) {
return -1;
}
@ -120,7 +120,6 @@ int32_t mndAddDispatcherForInnerTask(SMnode* pMnode, SStreamObj* pStream, SStrea
sdbRelease(pMnode->pSdb, pDb);
}
SArray* pSinkNodeList = taosArrayGetP(pStream->tasks, SINK_NODE_LEVEL);
int32_t numOfSinkNodes = taosArrayGetSize(pSinkNodeList);
if (isShuffle) {
@ -133,7 +132,7 @@ int32_t mndAddDispatcherForInnerTask(SMnode* pMnode, SStreamObj* pStream, SStrea
for (int32_t j = 0; j < numOfSinkNodes; j++) {
SStreamTask* pSinkTask = taosArrayGetP(pSinkNodeList, j);
if (pSinkTask->nodeId == pVgInfo->vgId) {
if (pSinkTask->info.nodeId == pVgInfo->vgId) {
pVgInfo->taskId = pSinkTask->id.taskId;
break;
}
@ -150,11 +149,11 @@ int32_t mndAddDispatcherForInnerTask(SMnode* pMnode, SStreamObj* pStream, SStrea
int32_t mndAssignStreamTaskToVgroup(SMnode* pMnode, SStreamTask* pTask, SSubplan* plan, const SVgObj* pVgroup) {
int32_t msgLen;
pTask->nodeId = pVgroup->vgId;
pTask->epSet = mndGetVgroupEpset(pMnode, pVgroup);
pTask->info.nodeId = pVgroup->vgId;
pTask->info.epSet = mndGetVgroupEpset(pMnode, pVgroup);
plan->execNode.nodeId = pTask->nodeId;
plan->execNode.epSet = pTask->epSet;
plan->execNode.nodeId = pTask->info.nodeId;
plan->execNode.epSet = pTask->info.epSet;
if (qSubPlanToString(plan, &pTask->exec.qmsg, &msgLen) < 0) {
terrno = TSDB_CODE_QRY_INVALID_INPUT;
return -1;
@ -171,14 +170,15 @@ SSnodeObj* mndSchedFetchOneSnode(SMnode* pMnode) {
return pObj;
}
int32_t mndAssignTaskToSnode(SMnode* pMnode, SStreamTask* pTask, SSubplan* plan, const SSnodeObj* pSnode) {
int32_t mndAssignStreamTaskToSnode(SMnode* pMnode, SStreamTask* pTask, SSubplan* plan, const SSnodeObj* pSnode) {
int32_t msgLen;
pTask->nodeId = SNODE_HANDLE;
pTask->epSet = mndAcquireEpFromSnode(pMnode, pSnode);
pTask->info.nodeId = SNODE_HANDLE;
pTask->info.epSet = mndAcquireEpFromSnode(pMnode, pSnode);
plan->execNode.nodeId = SNODE_HANDLE;
plan->execNode.epSet = pTask->epSet;
plan->execNode.epSet = pTask->info.epSet;
mDebug("s-task:0x%x set the agg task to snode:%d", pTask->id.taskId, SNODE_HANDLE);
if (qSubPlanToString(plan, &pTask->exec.qmsg, &msgLen) < 0) {
terrno = TSDB_CODE_QRY_INVALID_INPUT;
@ -187,6 +187,7 @@ int32_t mndAssignTaskToSnode(SMnode* pMnode, SStreamTask* pTask, SSubplan* plan,
return 0;
}
// todo random choose a node to do compute
SVgObj* mndSchedFetchOneVg(SMnode* pMnode, int64_t dbUid) {
void* pIter = NULL;
SVgObj* pVgroup = NULL;
@ -203,9 +204,9 @@ SVgObj* mndSchedFetchOneVg(SMnode* pMnode, int64_t dbUid) {
}
// create sink node for each vgroup.
int32_t mndAddShuffleSinkTasksToStream(SMnode* pMnode, SStreamObj* pStream) {
SSdb* pSdb = pMnode->pSdb;
void* pIter = NULL;
int32_t mndAddShuffleSinkTasksToStream(SMnode* pMnode, SArray* pTaskList, SStreamObj* pStream, int32_t fillHistory) {
SSdb* pSdb = pMnode->pSdb;
void* pIter = NULL;
while (1) {
SVgObj* pVgroup = NULL;
@ -219,43 +220,45 @@ int32_t mndAddShuffleSinkTasksToStream(SMnode* pMnode, SStreamObj* pStream) {
continue;
}
mndAddSinkTaskToStream(pStream, pMnode, pVgroup->vgId, pVgroup);
mndAddSinkTaskToStream(pStream, pTaskList, pMnode, pVgroup->vgId, pVgroup, fillHistory);
sdbRelease(pSdb, pVgroup);
}
return 0;
}
int32_t mndAddSinkTaskToStream(SStreamObj* pStream, SMnode* pMnode, int32_t vgId, SVgObj* pVgroup) {
SArray* pTaskList = taosArrayGetP(pStream->tasks, SINK_NODE_LEVEL);
SStreamTask* pTask = tNewStreamTask(pStream->uid, TASK_LEVEL__SINK, pStream->fillHistory, 0, pTaskList);
int32_t mndAddSinkTaskToStream(SStreamObj* pStream, SArray* pTaskList, SMnode* pMnode, int32_t vgId, SVgObj* pVgroup,
int32_t fillHistory) {
SStreamTask* pTask = tNewStreamTask(pStream->uid, TASK_LEVEL__SINK, fillHistory, 0, pTaskList);
if (pTask == NULL) {
terrno = TSDB_CODE_OUT_OF_MEMORY;
return -1;
}
pTask->nodeId = vgId;
pTask->epSet = mndGetVgroupEpset(pMnode, pVgroup);
pTask->info.nodeId = vgId;
pTask->info.epSet = mndGetVgroupEpset(pMnode, pVgroup);
mndSetSinkTaskInfo(pStream, pTask);
return 0;
}
static int32_t mndScheduleFillHistoryStreamTask(SMnode* pMnode, SStreamObj* pStream) {
return 0;
}
static int32_t addSourceStreamTask(SMnode* pMnode, SVgObj* pVgroup, SArray* pTaskList, SStreamObj* pStream,
SSubplan* plan, uint64_t uid, int8_t taskLevel, int8_t fillHistory,
bool hasExtraSink) {
SStreamTask* pTask = tNewStreamTask(uid, taskLevel, fillHistory, pStream->triggerParam, pTaskList);
static int32_t addSourceStreamTask(SMnode* pMnode, SVgObj* pVgroup, SArray* pTaskList, SArray* pSinkTaskList,
SStreamObj* pStream, SSubplan* plan, uint64_t uid, int8_t fillHistory,
bool hasExtraSink, int64_t firstWindowSkey) {
SStreamTask* pTask = tNewStreamTask(uid, TASK_LEVEL__SOURCE, fillHistory, pStream->conf.triggerParam, pTaskList);
if (pTask == NULL) {
return terrno;
}
// todo set the correct ts, which should be last key of queried table.
STimeWindow* pWindow = &pTask->dataRange.window;
pWindow->skey = INT64_MIN;
pWindow->ekey = firstWindowSkey - 1;
mDebug("add source task 0x%x window:%" PRId64 " - %" PRId64, pTask->id.taskId, pWindow->skey, pWindow->ekey);
// sink or dispatch
if (hasExtraSink) {
mndAddDispatcherForInnerTask(pMnode, pStream, pTask);
mndAddDispatcherForInternalTask(pMnode, pStream, pSinkTaskList, pTask);
} else {
mndSetSinkTaskInfo(pStream, pTask);
}
@ -274,9 +277,9 @@ static SStreamChildEpInfo* createStreamTaskEpInfo(SStreamTask* pTask) {
return NULL;
}
pEpInfo->childId = pTask->selfChildId;
pEpInfo->epSet = pTask->epSet;
pEpInfo->nodeId = pTask->nodeId;
pEpInfo->childId = pTask->info.selfChildId;
pEpInfo->epSet = pTask->info.epSet;
pEpInfo->nodeId = pTask->info.nodeId;
pEpInfo->taskId = pTask->id.taskId;
return pEpInfo;
@ -285,38 +288,307 @@ static SStreamChildEpInfo* createStreamTaskEpInfo(SStreamTask* pTask) {
void setFixedDownstreamEpInfo(SStreamTask* pDstTask, const SStreamTask* pTask) {
STaskDispatcherFixedEp* pDispatcher = &pDstTask->fixedEpDispatcher;
pDispatcher->taskId = pTask->id.taskId;
pDispatcher->nodeId = pTask->nodeId;
pDispatcher->epSet = pTask->epSet;
pDispatcher->nodeId = pTask->info.nodeId;
pDispatcher->epSet = pTask->info.epSet;
pDstTask->outputType = TASK_OUTPUT__FIXED_DISPATCH;
pDstTask->dispatchMsgType = TDMT_STREAM_TASK_DISPATCH;
pDstTask->msgInfo.msgType = TDMT_STREAM_TASK_DISPATCH;
}
int32_t appendToUpstream(SStreamTask* pTask, SStreamTask* pUpstream) {
int32_t setEpToDownstreamTask(SStreamTask* pTask, SStreamTask* pDownstream) {
SStreamChildEpInfo* pEpInfo = createStreamTaskEpInfo(pTask);
if (pEpInfo == NULL) {
return TSDB_CODE_OUT_OF_MEMORY;
}
if(pUpstream->childEpInfo == NULL) {
pUpstream->childEpInfo = taosArrayInit(4, POINTER_BYTES);
if (pDownstream->pUpstreamEpInfoList == NULL) {
pDownstream->pUpstreamEpInfoList = taosArrayInit(4, POINTER_BYTES);
}
taosArrayPush(pUpstream->childEpInfo, &pEpInfo);
taosArrayPush(pDownstream->pUpstreamEpInfoList, &pEpInfo);
return TSDB_CODE_SUCCESS;
}
int32_t mndScheduleStream(SMnode* pMnode, SStreamObj* pStream) {
SSdb* pSdb = pMnode->pSdb;
static SArray* addNewTaskList(SArray* pTasksList) {
SArray* pTaskList = taosArrayInit(0, POINTER_BYTES);
taosArrayPush(pTasksList, &pTaskList);
return pTaskList;
}
SQueryPlan* pPlan = qStringToQueryPlan(pStream->physicalPlan);
if (pPlan == NULL) {
// set the history task id
static void setHTasksId(SArray* pTaskList, const SArray* pHTaskList) {
for (int32_t i = 0; i < taosArrayGetSize(pTaskList); ++i) {
SStreamTask** pStreamTask = taosArrayGet(pTaskList, i);
SStreamTask** pHTask = taosArrayGet(pHTaskList, i);
(*pStreamTask)->historyTaskId.taskId = (*pHTask)->id.taskId;
(*pStreamTask)->historyTaskId.streamId = (*pHTask)->id.streamId;
(*pHTask)->streamTaskId.taskId = (*pStreamTask)->id.taskId;
(*pHTask)->streamTaskId.streamId = (*pStreamTask)->id.streamId;
mDebug("s-task:0x%x related history task:0x%x, level:%d", (*pStreamTask)->id.taskId, (*pHTask)->id.taskId,
(*pHTask)->info.taskLevel);
}
}
static int32_t addSourceTasksForOneLevelStream(SMnode* pMnode, const SQueryPlan* pPlan, SStreamObj* pStream,
bool hasExtraSink, int64_t nextWindowSkey) {
// create exec stream task, since only one level, the exec task is also the source task
SArray* pTaskList = addNewTaskList(pStream->tasks);
SSdb* pSdb = pMnode->pSdb;
SArray* pHTaskList = NULL;
if (pStream->conf.fillHistory) {
pHTaskList = addNewTaskList(pStream->pHTasksList);
}
SNodeListNode* inner = (SNodeListNode*)nodesListGetNode(pPlan->pSubplans, 0);
if (LIST_LENGTH(inner->pNodeList) != 1) {
terrno = TSDB_CODE_QRY_INVALID_INPUT;
return -1;
}
int32_t planTotLevel = LIST_LENGTH(pPlan->pSubplans);
pStream->tasks = taosArrayInit(planTotLevel, POINTER_BYTES);
SSubplan* plan = (SSubplan*)nodesListGetNode(inner->pNodeList, 0);
if (plan->subplanType != SUBPLAN_TYPE_SCAN) {
terrno = TSDB_CODE_QRY_INVALID_INPUT;
return -1;
}
void* pIter = NULL;
while (1) {
SVgObj* pVgroup;
pIter = sdbFetch(pSdb, SDB_VGROUP, pIter, (void**)&pVgroup);
if (pIter == NULL) {
break;
}
if (!mndVgroupInDb(pVgroup, pStream->sourceDbUid)) {
sdbRelease(pSdb, pVgroup);
continue;
}
// new stream task
SArray** pSinkTaskList = taosArrayGet(pStream->tasks, SINK_NODE_LEVEL);
int32_t code = addSourceStreamTask(pMnode, pVgroup, pTaskList, *pSinkTaskList, pStream, plan, pStream->uid, 0,
hasExtraSink, nextWindowSkey);
if (code != TSDB_CODE_SUCCESS) {
sdbRelease(pSdb, pVgroup);
return -1;
}
if (pStream->conf.fillHistory) {
SArray** pHSinkTaskList = taosArrayGet(pStream->pHTasksList, SINK_NODE_LEVEL);
code = addSourceStreamTask(pMnode, pVgroup, pHTaskList, *pHSinkTaskList, pStream, plan, pStream->hTaskUid,
1, hasExtraSink, nextWindowSkey);
}
sdbRelease(pSdb, pVgroup);
if (code != TSDB_CODE_SUCCESS) {
return -1;
}
}
if (pStream->conf.fillHistory) {
setHTasksId(pTaskList, pHTaskList);
}
return TSDB_CODE_SUCCESS;
}
static int32_t doAddSourceTask(SArray* pTaskList, int8_t fillHistory, int64_t uid, SStreamTask* pDownstreamTask,
SMnode* pMnode, SSubplan* pPlan, SVgObj* pVgroup, int64_t nextWindowSkey) {
SStreamTask* pTask = tNewStreamTask(uid, TASK_LEVEL__SOURCE, fillHistory, 0, pTaskList);
if (pTask == NULL) {
terrno = TSDB_CODE_OUT_OF_MEMORY;
return -1;
}
// todo set the correct ts, which should be last key of queried table.
STimeWindow* pWindow = &pTask->dataRange.window;
pWindow->skey = INT64_MIN;
pWindow->ekey = nextWindowSkey - 1;
mDebug("s-task:0x%x level:%d set time window:%" PRId64 " - %" PRId64, pTask->id.taskId, pTask->info.taskLevel,
pWindow->skey, pWindow->ekey);
// all the source tasks dispatch result to a single agg node.
setFixedDownstreamEpInfo(pTask, pDownstreamTask);
if (mndAssignStreamTaskToVgroup(pMnode, pTask, pPlan, pVgroup) < 0) {
return -1;
}
return setEpToDownstreamTask(pTask, pDownstreamTask);
}
static int32_t doAddAggTask(uint64_t uid, SArray* pTaskList, SArray* pSinkNodeList, SMnode* pMnode, SStreamObj* pStream,
int32_t fillHistory, SStreamTask** pAggTask) {
*pAggTask = tNewStreamTask(uid, TASK_LEVEL__AGG, fillHistory, pStream->conf.triggerParam, pTaskList);
if (*pAggTask == NULL) {
terrno = TSDB_CODE_OUT_OF_MEMORY;
return -1;
}
// dispatch
if (mndAddDispatcherForInternalTask(pMnode, pStream, pSinkNodeList, *pAggTask) < 0) {
return -1;
}
return 0;
}
static int32_t addAggTask(SStreamObj* pStream, SMnode* pMnode, SQueryPlan* pPlan, SStreamTask** pAggTask,
SStreamTask** pHAggTask) {
SArray* pAggTaskList = addNewTaskList(pStream->tasks);
SSdb* pSdb = pMnode->pSdb;
SNodeListNode* pInnerNode = (SNodeListNode*)nodesListGetNode(pPlan->pSubplans, 0);
SSubplan* plan = (SSubplan*)nodesListGetNode(pInnerNode->pNodeList, 0);
if (plan->subplanType != SUBPLAN_TYPE_MERGE) {
terrno = TSDB_CODE_QRY_INVALID_INPUT;
return -1;
}
*pAggTask = NULL;
SArray* pSinkNodeList = taosArrayGetP(pStream->tasks, SINK_NODE_LEVEL);
int32_t code = doAddAggTask(pStream->uid, pAggTaskList, pSinkNodeList, pMnode, pStream, 0, pAggTask);
if (code != TSDB_CODE_SUCCESS) {
return -1;
}
SVgObj* pVgroup = NULL;
SSnodeObj* pSnode = NULL;
if (tsDeployOnSnode) {
pSnode = mndSchedFetchOneSnode(pMnode);
if (pSnode == NULL) {
pVgroup = mndSchedFetchOneVg(pMnode, pStream->sourceDbUid);
}
} else {
pVgroup = mndSchedFetchOneVg(pMnode, pStream->sourceDbUid);
}
if (pSnode != NULL) {
code = mndAssignStreamTaskToSnode(pMnode, *pAggTask, plan, pSnode);
} else {
code = mndAssignStreamTaskToVgroup(pMnode, *pAggTask, plan, pVgroup);
}
if (pStream->conf.fillHistory) {
SArray* pHAggTaskList = addNewTaskList(pStream->pHTasksList);
SArray* pHSinkNodeList = taosArrayGetP(pStream->pHTasksList, SINK_NODE_LEVEL);
*pHAggTask = NULL;
code = doAddAggTask(pStream->hTaskUid, pHAggTaskList, pHSinkNodeList, pMnode, pStream, pStream->conf.fillHistory,
pHAggTask);
if (code != TSDB_CODE_SUCCESS) {
if (pSnode != NULL) {
sdbRelease(pSdb, pSnode);
} else {
sdbRelease(pSdb, pVgroup);
}
return code;
}
if (pSnode != NULL) {
code = mndAssignStreamTaskToSnode(pMnode, *pHAggTask, plan, pSnode);
} else {
code = mndAssignStreamTaskToVgroup(pMnode, *pHAggTask, plan, pVgroup);
}
setHTasksId(pAggTaskList, pHAggTaskList);
}
if (pSnode != NULL) {
sdbRelease(pSdb, pSnode);
} else {
sdbRelease(pSdb, pVgroup);
}
return code;
}
static int32_t addSourceTasksForMultiLevelStream(SMnode* pMnode, SQueryPlan* pPlan, SStreamObj* pStream,
SStreamTask* pDownstreamTask, SStreamTask* pHDownstreamTask, int64_t nextWindowSkey) {
SArray* pSourceTaskList = addNewTaskList(pStream->tasks);
SArray* pHSourceTaskList = NULL;
if (pStream->conf.fillHistory) {
pHSourceTaskList = addNewTaskList(pStream->pHTasksList);
}
SSdb* pSdb = pMnode->pSdb;
SNodeListNode* inner = (SNodeListNode*)nodesListGetNode(pPlan->pSubplans, 1);
SSubplan* plan = (SSubplan*)nodesListGetNode(inner->pNodeList, 0);
if (plan->subplanType != SUBPLAN_TYPE_SCAN) {
terrno = TSDB_CODE_QRY_INVALID_INPUT;
return -1;
}
void* pIter = NULL;
while (1) {
SVgObj* pVgroup;
pIter = sdbFetch(pSdb, SDB_VGROUP, pIter, (void**)&pVgroup);
if (pIter == NULL) {
break;
}
if (!mndVgroupInDb(pVgroup, pStream->sourceDbUid)) {
sdbRelease(pSdb, pVgroup);
continue;
}
int32_t code =
doAddSourceTask(pSourceTaskList, 0, pStream->uid, pDownstreamTask, pMnode, plan, pVgroup, nextWindowSkey);
if (code != TSDB_CODE_SUCCESS) {
sdbRelease(pSdb, pVgroup);
terrno = code;
return -1;
}
if (pStream->conf.fillHistory) {
code = doAddSourceTask(pHSourceTaskList, 1, pStream->hTaskUid, pHDownstreamTask, pMnode, plan, pVgroup,
nextWindowSkey);
if (code != TSDB_CODE_SUCCESS) {
sdbRelease(pSdb, pVgroup);
return code;
}
}
sdbRelease(pSdb, pVgroup);
}
if (pStream->conf.fillHistory) {
setHTasksId(pSourceTaskList, pHSourceTaskList);
}
return TSDB_CODE_SUCCESS;
}
static int32_t addSinkTasks(SArray* pTasksList, SMnode* pMnode, SStreamObj* pStream, SArray** pCreatedTaskList,
int32_t fillHistory) {
SArray* pSinkTaskList = addNewTaskList(pTasksList);
if (pStream->fixedSinkVgId == 0) {
if (mndAddShuffleSinkTasksToStream(pMnode, pSinkTaskList, pStream, fillHistory) < 0) {
// TODO free
return -1;
}
} else {
if (mndAddSinkTaskToStream(pStream, pSinkTaskList, pMnode, pStream->fixedSinkVgId, &pStream->fixedSinkVg,
fillHistory) < 0) {
// TODO free
return -1;
}
}
*pCreatedTaskList = pSinkTaskList;
return TSDB_CODE_SUCCESS;
}
static int32_t doScheduleStream(SStreamObj* pStream, SMnode* pMnode, SQueryPlan* pPlan, int64_t nextWindowSkey) {
SSdb* pSdb = pMnode->pSdb;
int32_t numOfPlanLevel = LIST_LENGTH(pPlan->pSubplans);
bool hasExtraSink = false;
bool externalTargetDB = strcmp(pStream->sourceDb, pStream->targetDb) != 0;
@ -329,178 +601,64 @@ int32_t mndScheduleStream(SMnode* pMnode, SStreamObj* pStream) {
bool multiTarget = (pDbObj->cfg.numOfVgroups > 1);
sdbRelease(pSdb, pDbObj);
if (planTotLevel == 2 || externalTargetDB || multiTarget || pStream->fixedSinkVgId) {
SArray* taskOneLevel = taosArrayInit(0, POINTER_BYTES);
taosArrayPush(pStream->tasks, &taskOneLevel);
pStream->tasks = taosArrayInit(numOfPlanLevel + 1, POINTER_BYTES);
pStream->pHTasksList = taosArrayInit(numOfPlanLevel + 1, POINTER_BYTES);
if (numOfPlanLevel == 2 || externalTargetDB || multiTarget || pStream->fixedSinkVgId) {
// add extra sink
hasExtraSink = true;
if (pStream->fixedSinkVgId == 0) {
if (mndAddShuffleSinkTasksToStream(pMnode, pStream) < 0) {
// TODO free
return -1;
}
} else {
if (mndAddSinkTaskToStream(pStream, pMnode, pStream->fixedSinkVgId, &pStream->fixedSinkVg) < 0) {
// TODO free
return -1;
SArray* pSinkTaskList = NULL;
int32_t code = addSinkTasks(pStream->tasks, pMnode, pStream, &pSinkTaskList, 0);
if (code != TSDB_CODE_SUCCESS) {
return code;
}
// check for fill history
if (pStream->conf.fillHistory) {
SArray* pHSinkTaskList = NULL;
code = addSinkTasks(pStream->pHTasksList, pMnode, pStream, &pHSinkTaskList, 1);
if (code != TSDB_CODE_SUCCESS) {
return code;
}
setHTasksId(pSinkTaskList, pHSinkTaskList);
}
}
pStream->totalLevel = planTotLevel + hasExtraSink;
pStream->totalLevel = numOfPlanLevel + hasExtraSink;
if (planTotLevel > 1) {
SStreamTask* pInnerTask;
// inner level
{
SArray* taskInnerLevel = taosArrayInit(0, POINTER_BYTES);
taosArrayPush(pStream->tasks, &taskInnerLevel);
if (numOfPlanLevel > 1) {
SStreamTask* pAggTask = NULL;
SStreamTask* pHAggTask = NULL;
SNodeListNode* inner = (SNodeListNode*)nodesListGetNode(pPlan->pSubplans, 0);
SSubplan* plan = (SSubplan*)nodesListGetNode(inner->pNodeList, 0);
if (plan->subplanType != SUBPLAN_TYPE_MERGE) {
terrno = TSDB_CODE_QRY_INVALID_INPUT;
return -1;
}
pInnerTask = tNewStreamTask(pStream->uid, TASK_LEVEL__AGG, pStream->fillHistory, pStream->triggerParam, taskInnerLevel);
if (pInnerTask == NULL) {
terrno = TSDB_CODE_OUT_OF_MEMORY;
qDestroyQueryPlan(pPlan);
return -1;
}
// dispatch
if (mndAddDispatcherForInnerTask(pMnode, pStream, pInnerTask) < 0) {
qDestroyQueryPlan(pPlan);
return -1;
}
if (tsDeployOnSnode) {
SSnodeObj* pSnode = mndSchedFetchOneSnode(pMnode);
if (pSnode == NULL) {
SVgObj* pVgroup = mndSchedFetchOneVg(pMnode, pStream->sourceDbUid);
if (mndAssignStreamTaskToVgroup(pMnode, pInnerTask, plan, pVgroup) < 0) {
sdbRelease(pSdb, pVgroup);
qDestroyQueryPlan(pPlan);
return -1;
}
sdbRelease(pSdb, pVgroup);
} else {
if (mndAssignTaskToSnode(pMnode, pInnerTask, plan, pSnode) < 0) {
sdbRelease(pSdb, pSnode);
qDestroyQueryPlan(pPlan);
return -1;
}
}
} else {
SVgObj* pVgroup = mndSchedFetchOneVg(pMnode, pStream->sourceDbUid);
if (mndAssignStreamTaskToVgroup(pMnode, pInnerTask, plan, pVgroup) < 0) {
sdbRelease(pSdb, pVgroup);
qDestroyQueryPlan(pPlan);
return -1;
}
sdbRelease(pSdb, pVgroup);
}
int32_t code = addAggTask(pStream, pMnode, pPlan, &pAggTask, &pHAggTask);
if (code != TSDB_CODE_SUCCESS) {
return code;
}
// source level
SArray* taskSourceLevel = taosArrayInit(0, POINTER_BYTES);
taosArrayPush(pStream->tasks, &taskSourceLevel);
SNodeListNode* inner = (SNodeListNode*)nodesListGetNode(pPlan->pSubplans, 1);
SSubplan* plan = (SSubplan*)nodesListGetNode(inner->pNodeList, 0);
if (plan->subplanType != SUBPLAN_TYPE_SCAN) {
terrno = TSDB_CODE_QRY_INVALID_INPUT;
return -1;
}
void* pIter = NULL;
while (1) {
SVgObj* pVgroup;
pIter = sdbFetch(pSdb, SDB_VGROUP, pIter, (void**)&pVgroup);
if (pIter == NULL) {
break;
}
if (!mndVgroupInDb(pVgroup, pStream->sourceDbUid)) {
sdbRelease(pSdb, pVgroup);
continue;
}
SStreamTask* pTask = tNewStreamTask(pStream->uid, TASK_LEVEL__SOURCE, pStream->fillHistory, 0, taskSourceLevel);
if (pTask == NULL) {
terrno = TSDB_CODE_OUT_OF_MEMORY;
sdbRelease(pSdb, pVgroup);
qDestroyQueryPlan(pPlan);
return -1;
}
// all the source tasks dispatch result to a single agg node.
setFixedDownstreamEpInfo(pTask, pInnerTask);
if (mndAssignStreamTaskToVgroup(pMnode, pTask, plan, pVgroup) < 0) {
sdbRelease(pSdb, pVgroup);
qDestroyQueryPlan(pPlan);
return -1;
}
int32_t code = appendToUpstream(pTask, pInnerTask);
sdbRelease(pSdb, pVgroup);
if (code != TSDB_CODE_SUCCESS) {
terrno = code;
qDestroyQueryPlan(pPlan);
return -1;
}
}
} else if (planTotLevel == 1) {
// create exec stream task, since only one level, the exec task is also the source task
SArray* pTaskList = taosArrayInit(0, POINTER_BYTES);
taosArrayPush(pStream->tasks, &pTaskList);
SNodeListNode* inner = (SNodeListNode*)nodesListGetNode(pPlan->pSubplans, 0);
if (LIST_LENGTH(inner->pNodeList) != 1) {
terrno = TSDB_CODE_QRY_INVALID_INPUT;
return -1;
}
SSubplan* plan = (SSubplan*)nodesListGetNode(inner->pNodeList, 0);
if (plan->subplanType != SUBPLAN_TYPE_SCAN) {
terrno = TSDB_CODE_QRY_INVALID_INPUT;
return -1;
}
void* pIter = NULL;
while (1) {
SVgObj* pVgroup;
pIter = sdbFetch(pSdb, SDB_VGROUP, pIter, (void**)&pVgroup);
if (pIter == NULL) {
break;
}
if (!mndVgroupInDb(pVgroup, pStream->sourceDbUid)) {
sdbRelease(pSdb, pVgroup);
continue;
}
// new stream task
int32_t code = addSourceStreamTask(pMnode, pVgroup, pTaskList, pStream, plan, pStream->uid, TASK_LEVEL__SOURCE, pStream->fillHistory, hasExtraSink);
sdbRelease(pSdb, pVgroup);
if (code != TSDB_CODE_SUCCESS) {
qDestroyQueryPlan(pPlan);
return -1;
}
}
return addSourceTasksForMultiLevelStream(pMnode, pPlan, pStream, pAggTask, pHAggTask, nextWindowSkey);
} else if (numOfPlanLevel == 1) {
return addSourceTasksForOneLevelStream(pMnode, pPlan, pStream, hasExtraSink, nextWindowSkey);
}
qDestroyQueryPlan(pPlan);
return 0;
}
int32_t mndScheduleStream(SMnode* pMnode, SStreamObj* pStream, int64_t nextWindowSkey) {
SQueryPlan* pPlan = qStringToQueryPlan(pStream->physicalPlan);
if (pPlan == NULL) {
terrno = TSDB_CODE_QRY_INVALID_INPUT;
return -1;
}
int32_t code = doScheduleStream(pStream, pMnode, pPlan, nextWindowSkey);
qDestroyQueryPlan(pPlan);
return code;
}
int32_t mndSchedInitSubEp(SMnode* pMnode, const SMqTopicObj* pTopic, SMqSubscribeObj* pSub) {
SSdb* pSdb = pMnode->pSdb;
SVgObj* pVgroup = NULL;
@ -513,8 +671,8 @@ int32_t mndSchedInitSubEp(SMnode* pMnode, const SMqTopicObj* pTopic, SMqSubscrib
terrno = TSDB_CODE_QRY_INVALID_INPUT;
return -1;
}
}else if(pTopic->subType == TOPIC_SUB_TYPE__TABLE && pTopic->ast != NULL){
SNode *pAst = NULL;
} else if (pTopic->subType == TOPIC_SUB_TYPE__TABLE && pTopic->ast != NULL) {
SNode* pAst = NULL;
if (nodesStringToNode(pTopic->ast, &pAst) != 0) {
mError("topic:%s, failed to create since %s", pTopic->name, terrstr());
return -1;
@ -529,7 +687,7 @@ int32_t mndSchedInitSubEp(SMnode* pMnode, const SMqTopicObj* pTopic, SMqSubscrib
nodesDestroyNode(pAst);
}
if(pPlan){
if (pPlan) {
int32_t levelNum = LIST_LENGTH(pPlan->pSubplans);
if (levelNum != 1) {
qDestroyQueryPlan(pPlan);

View File

@ -555,20 +555,20 @@ static int32_t mndCreateSma(SMnode *pMnode, SRpcMsg *pReq, SMCreateSmaReq *pCrea
streamObj.version = 1;
streamObj.sql = taosStrdup(pCreate->sql);
streamObj.smaId = smaObj.uid;
streamObj.watermark = pCreate->watermark;
streamObj.conf.watermark = pCreate->watermark;
streamObj.deleteMark = pCreate->deleteMark;
streamObj.fillHistory = STREAM_FILL_HISTORY_ON;
streamObj.trigger = STREAM_TRIGGER_WINDOW_CLOSE;
streamObj.triggerParam = pCreate->maxDelay;
streamObj.conf.fillHistory = STREAM_FILL_HISTORY_ON;
streamObj.conf.trigger = STREAM_TRIGGER_WINDOW_CLOSE;
streamObj.conf.triggerParam = pCreate->maxDelay;
streamObj.ast = taosStrdup(smaObj.ast);
// check the maxDelay
if (streamObj.triggerParam < TSDB_MIN_ROLLUP_MAX_DELAY) {
if (streamObj.conf.triggerParam < TSDB_MIN_ROLLUP_MAX_DELAY) {
int64_t msInterval = convertTimeFromPrecisionToUnit(pCreate->interval, pDb->cfg.precision, TIME_UNIT_MILLISECOND);
streamObj.triggerParam = msInterval > TSDB_MIN_ROLLUP_MAX_DELAY ? msInterval : TSDB_MIN_ROLLUP_MAX_DELAY;
streamObj.conf.triggerParam = msInterval > TSDB_MIN_ROLLUP_MAX_DELAY ? msInterval : TSDB_MIN_ROLLUP_MAX_DELAY;
}
if (streamObj.triggerParam > TSDB_MAX_ROLLUP_MAX_DELAY) {
streamObj.triggerParam = TSDB_MAX_ROLLUP_MAX_DELAY;
if (streamObj.conf.triggerParam > TSDB_MAX_ROLLUP_MAX_DELAY) {
streamObj.conf.triggerParam = TSDB_MAX_ROLLUP_MAX_DELAY;
}
if (mndAllocSmaVgroup(pMnode, pDb, &streamObj.fixedSinkVg) != 0) {
@ -597,8 +597,8 @@ static int32_t mndCreateSma(SMnode *pMnode, SRpcMsg *pReq, SMCreateSmaReq *pCrea
.pAstRoot = pAst,
.topicQuery = false,
.streamQuery = true,
.triggerType = streamObj.trigger,
.watermark = streamObj.watermark,
.triggerType = streamObj.conf.trigger,
.watermark = streamObj.conf.watermark,
.deleteMark = streamObj.deleteMark,
};
@ -633,7 +633,7 @@ static int32_t mndCreateSma(SMnode *pMnode, SRpcMsg *pReq, SMCreateSmaReq *pCrea
if (mndSetCreateSmaVgroupCommitLogs(pMnode, pTrans, &streamObj.fixedSinkVg) != 0) goto _OVER;
if (mndSetUpdateSmaStbCommitLogs(pMnode, pTrans, pStb) != 0) goto _OVER;
if (mndSetCreateSmaVgroupRedoActions(pMnode, pTrans, pDb, &streamObj.fixedSinkVg, &smaObj) != 0) goto _OVER;
if (mndScheduleStream(pMnode, &streamObj) != 0) goto _OVER;
if (mndScheduleStream(pMnode, &streamObj, 1685959190000) != 0) goto _OVER;
if (mndPersistStream(pMnode, pTrans, &streamObj) != 0) goto _OVER;
if (mndTransPrepare(pMnode, pTrans) != 0) goto _OVER;
@ -1278,13 +1278,13 @@ static int32_t mndRetrieveSma(SRpcMsg *pReq, SShowObj *pShow, SSDataBlock *pBloc
STR_TO_VARSTR(col, (char *)"");
pColInfo = taosArrayGet(pBlock->pDataBlock, cols++);
colDataAppend(pColInfo, numOfRows, (const char *)col, false);
colDataSetVal(pColInfo, numOfRows, (const char *)col, false);
pColInfo = taosArrayGet(pBlock->pDataBlock, cols++);
char tag[TSDB_TABLE_FNAME_LEN + VARSTR_HEADER_SIZE] = {0};
STR_TO_VARSTR(tag, (char *)"sma_index");
colDataAppend(pColInfo, numOfRows, (const char *)tag, false);
colDataSetVal(pColInfo, numOfRows, (const char *)tag, false);
numOfRows++;
sdbRelease(pSdb, pSma);

View File

@ -239,7 +239,7 @@ static void mndShowStreamStatus(char *dst, SStreamObj *pStream) {
}
static void mndShowStreamTrigger(char *dst, SStreamObj *pStream) {
int8_t trigger = pStream->trigger;
int8_t trigger = pStream->conf.trigger;
if (trigger == STREAM_TRIGGER_AT_ONCE) {
strcpy(dst, "at once");
} else if (trigger == STREAM_TRIGGER_WINDOW_CLOSE) {
@ -299,13 +299,18 @@ static int32_t mndBuildStreamObjFromCreateReq(SMnode *pMnode, SStreamObj *pObj,
pObj->smaId = 0;
pObj->uid = mndGenerateUid(pObj->name, strlen(pObj->name));
char p[TSDB_STREAM_FNAME_LEN + 32] = {0};
snprintf(p, tListLen(p), "%s_%s", pObj->name, "fillhistory");
pObj->hTaskUid = mndGenerateUid(pObj->name, strlen(pObj->name));
pObj->status = 0;
pObj->igExpired = pCreate->igExpired;
pObj->trigger = pCreate->triggerType;
pObj->triggerParam = pCreate->maxDelay;
pObj->watermark = pCreate->watermark;
pObj->fillHistory = pCreate->fillHistory;
pObj->conf.igExpired = pCreate->igExpired;
pObj->conf.trigger = pCreate->triggerType;
pObj->conf.triggerParam = pCreate->maxDelay;
pObj->conf.watermark = pCreate->watermark;
pObj->conf.fillHistory = pCreate->fillHistory;
pObj->deleteMark = pCreate->deleteMark;
pObj->igCheckUpdate = pCreate->igUpdate;
@ -387,9 +392,9 @@ static int32_t mndBuildStreamObjFromCreateReq(SMnode *pMnode, SStreamObj *pObj,
.pAstRoot = pAst,
.topicQuery = false,
.streamQuery = true,
.triggerType = pObj->trigger == STREAM_TRIGGER_MAX_DELAY ? STREAM_TRIGGER_WINDOW_CLOSE : pObj->trigger,
.watermark = pObj->watermark,
.igExpired = pObj->igExpired,
.triggerType = pObj->conf.trigger == STREAM_TRIGGER_MAX_DELAY ? STREAM_TRIGGER_WINDOW_CLOSE : pObj->conf.trigger,
.watermark = pObj->conf.watermark,
.igExpired = pObj->conf.igExpired,
.deleteMark = pObj->deleteMark,
.igCheckUpdate = pObj->igCheckUpdate,
};
@ -428,30 +433,37 @@ int32_t mndPersistTaskDeployReq(STrans *pTrans, const SStreamTask *pTask) {
SEncoder encoder;
tEncoderInit(&encoder, NULL, 0);
tEncodeStreamTask(&encoder, pTask);
int32_t size = encoder.pos;
int32_t tlen = sizeof(SMsgHead) + size;
tEncoderClear(&encoder);
void *buf = taosMemoryCalloc(1, tlen);
if (buf == NULL) {
terrno = TSDB_CODE_OUT_OF_MEMORY;
return -1;
}
((SMsgHead *)buf)->vgId = htonl(pTask->nodeId);
((SMsgHead *)buf)->vgId = htonl(pTask->info.nodeId);
void *abuf = POINTER_SHIFT(buf, sizeof(SMsgHead));
tEncoderInit(&encoder, abuf, size);
tEncodeStreamTask(&encoder, pTask);
tEncoderClear(&encoder);
STransAction action = {0};
action.mTraceId = pTrans->mTraceId;
memcpy(&action.epSet, &pTask->epSet, sizeof(SEpSet));
memcpy(&action.epSet, &pTask->info.epSet, sizeof(SEpSet));
action.pCont = buf;
action.contLen = tlen;
action.msgType = TDMT_STREAM_TASK_DEPLOY;
if (mndTransAppendRedoAction(pTrans, &action) != 0) {
taosMemoryFree(buf);
return -1;
}
return 0;
}
@ -459,14 +471,33 @@ int32_t mndPersistStreamTasks(SMnode *pMnode, STrans *pTrans, SStreamObj *pStrea
int32_t level = taosArrayGetSize(pStream->tasks);
for (int32_t i = 0; i < level; i++) {
SArray *pLevel = taosArrayGetP(pStream->tasks, i);
int32_t sz = taosArrayGetSize(pLevel);
for (int32_t j = 0; j < sz; j++) {
int32_t numOfTasks = taosArrayGetSize(pLevel);
for (int32_t j = 0; j < numOfTasks; j++) {
SStreamTask *pTask = taosArrayGetP(pLevel, j);
if (mndPersistTaskDeployReq(pTrans, pTask) < 0) {
return -1;
}
}
}
// persistent stream task for already stored ts data
if (pStream->conf.fillHistory) {
level = taosArrayGetSize(pStream->pHTasksList);
for (int32_t i = 0; i < level; i++) {
SArray *pLevel = taosArrayGetP(pStream->pHTasksList, i);
int32_t numOfTasks = taosArrayGetSize(pLevel);
for (int32_t j = 0; j < numOfTasks; j++) {
SStreamTask *pTask = taosArrayGetP(pLevel, j);
if (mndPersistTaskDeployReq(pTrans, pTask) < 0) {
return -1;
}
}
}
}
return 0;
}
@ -474,11 +505,13 @@ int32_t mndPersistStream(SMnode *pMnode, STrans *pTrans, SStreamObj *pStream) {
if (mndPersistStreamTasks(pMnode, pTrans, pStream) < 0) {
return -1;
}
SSdbRaw *pCommitRaw = mndStreamActionEncode(pStream);
if (pCommitRaw == NULL || mndTransAppendCommitlog(pTrans, pCommitRaw) != 0) {
mError("trans:%d, failed to append commit log since %s", pTrans->id, terrstr());
return -1;
}
(void)sdbSetRawStatus(pCommitRaw, SDB_STATUS_READY);
return 0;
}
@ -490,6 +523,7 @@ int32_t mndPersistDropStreamLog(SMnode *pMnode, STrans *pTrans, SStreamObj *pStr
mndTransDrop(pTrans);
return -1;
}
(void)sdbSetRawStatus(pCommitRaw, SDB_STATUS_DROPPED);
return 0;
}
@ -603,16 +637,17 @@ _OVER:
static int32_t mndPersistTaskDropReq(STrans *pTrans, SStreamTask *pTask) {
// vnode
/*if (pTask->nodeId > 0) {*/
/*if (pTask->info.nodeId > 0) {*/
SVDropStreamTaskReq *pReq = taosMemoryCalloc(1, sizeof(SVDropStreamTaskReq));
if (pReq == NULL) {
terrno = TSDB_CODE_OUT_OF_MEMORY;
return -1;
}
pReq->head.vgId = htonl(pTask->nodeId);
pReq->head.vgId = htonl(pTask->info.nodeId);
pReq->taskId = pTask->id.taskId;
STransAction action = {0};
memcpy(&action.epSet, &pTask->epSet, sizeof(SEpSet));
memcpy(&action.epSet, &pTask->info.epSet, sizeof(SEpSet));
action.pCont = pReq;
action.contLen = sizeof(SVDropStreamTaskReq);
action.msgType = TDMT_STREAM_TASK_DROP;
@ -732,6 +767,7 @@ static int32_t mndProcessCreateStreamReq(SRpcMsg *pReq) {
mError("stream:%s, failed to create since %s", createStreamReq.name, terrstr());
goto _OVER;
}
mInfo("trans:%d, used to create stream:%s", pTrans->id, createStreamReq.name);
mndTransSetDbName(pTrans, createStreamReq.sourceDB, streamObj.targetDb);
@ -748,7 +784,7 @@ static int32_t mndProcessCreateStreamReq(SRpcMsg *pReq) {
}
// schedule stream task for stream obj
if (mndScheduleStream(pMnode, &streamObj) < 0) {
if (mndScheduleStream(pMnode, &streamObj, createStreamReq.lastTs) < 0) {
mError("stream:%s, failed to schedule since %s", createStreamReq.name, terrstr());
mndTransDrop(pTrans);
goto _OVER;
@ -834,7 +870,7 @@ static int32_t mndBuildStreamCheckpointSourceReq(void **pBuf, int32_t *pLen, con
SMStreamDoCheckpointMsg *pMsg) {
SStreamCheckpointSourceReq req = {0};
req.checkpointId = pMsg->checkpointId;
req.nodeId = pTask->nodeId;
req.nodeId = pTask->info.nodeId;
req.expireTime = -1;
req.streamId = pTask->streamId;
req.taskId = pTask->taskId;
@ -863,7 +899,7 @@ static int32_t mndBuildStreamCheckpointSourceReq(void **pBuf, int32_t *pLen, con
SMsgHead *pMsgHead = (SMsgHead *)buf;
pMsgHead->contLen = htonl(tlen);
pMsgHead->vgId = htonl(pTask->nodeId);
pMsgHead->vgId = htonl(pTask->info.nodeId);
tEncoderClear(&encoder);
@ -902,12 +938,12 @@ static int32_t mndProcessStreamDoCheckpoint(SRpcMsg *pReq) {
for (int32_t i = 0; i < totLevel; i++) {
SArray *pLevel = taosArrayGetP(pStream->tasks, i);
SStreamTask *pTask = taosArrayGetP(pLevel, 0);
if (pTask->taskLevel == TASK_LEVEL__SOURCE) {
if (pTask->info.taskLevel == TASK_LEVEL__SOURCE) {
int32_t sz = taosArrayGetSize(pLevel);
for (int32_t j = 0; j < sz; j++) {
SStreamTask *pTask = taosArrayGetP(pLevel, j);
/*A(pTask->nodeId > 0);*/
SVgObj *pVgObj = mndAcquireVgroup(pMnode, pTask->nodeId);
/*A(pTask->info.nodeId > 0);*/
SVgObj *pVgObj = mndAcquireVgroup(pMnode, pTask->info.nodeId);
if (pVgObj == NULL) {
taosRUnLockLatch(&pStream->lock);
mndReleaseStream(pMnode, pStream);
@ -965,8 +1001,6 @@ static int32_t mndProcessStreamDoCheckpoint(SRpcMsg *pReq) {
static int32_t mndProcessDropStreamReq(SRpcMsg *pReq) {
SMnode *pMnode = pReq->info.node;
SStreamObj *pStream = NULL;
/*SDbObj *pDb = NULL;*/
/*SUserObj *pUser = NULL;*/
SMDropStreamReq dropReq = {0};
if (tDeserializeSMDropStreamReq(pReq->pCont, pReq->contLen, &dropReq) < 0) {
@ -1157,7 +1191,7 @@ static int32_t mndRetrieveStream(SRpcMsg *pReq, SShowObj *pShow, SSDataBlock *pB
}
pColInfo = taosArrayGet(pBlock->pDataBlock, cols++);
colDataSetVal(pColInfo, numOfRows, (const char *)&pStream->watermark, false);
colDataSetVal(pColInfo, numOfRows, (const char *)&pStream->conf.watermark, false);
char trigger[20 + VARSTR_HEADER_SIZE] = {0};
char trigger2[20] = {0};
@ -1187,12 +1221,16 @@ static int32_t mndRetrieveStreamTask(SRpcMsg *pReq, SShowObj *pShow, SSDataBlock
while (numOfRows < rowsCapacity) {
pShow->pIter = sdbFetch(pSdb, SDB_STREAM, pShow->pIter, (void **)&pStream);
if (pShow->pIter == NULL) break;
if (pShow->pIter == NULL) {
break;
}
// lock
taosRLockLatch(&pStream->lock);
// count task num
int32_t sz = taosArrayGetSize(pStream->tasks);
int32_t count = 0;
for (int32_t i = 0; i < sz; i++) {
SArray *pLevel = taosArrayGetP(pStream->tasks, i);
@ -1202,10 +1240,12 @@ static int32_t mndRetrieveStreamTask(SRpcMsg *pReq, SShowObj *pShow, SSDataBlock
if (numOfRows + count > rowsCapacity) {
blockDataEnsureCapacity(pBlock, numOfRows + count);
}
// add row for each task
for (int32_t i = 0; i < sz; i++) {
SArray *pLevel = taosArrayGetP(pStream->tasks, i);
int32_t levelCnt = taosArrayGetSize(pLevel);
for (int32_t j = 0; j < levelCnt; j++) {
SStreamTask *pTask = taosArrayGetP(pLevel, j);
@ -1215,18 +1255,25 @@ static int32_t mndRetrieveStreamTask(SRpcMsg *pReq, SShowObj *pShow, SSDataBlock
// stream name
char streamName[TSDB_TABLE_NAME_LEN + VARSTR_HEADER_SIZE] = {0};
STR_WITH_MAXSIZE_TO_VARSTR(streamName, mndGetDbStr(pStream->name), sizeof(streamName));
pColInfo = taosArrayGet(pBlock->pDataBlock, cols++);
colDataSetVal(pColInfo, numOfRows, (const char *)streamName, false);
// task id
pColInfo = taosArrayGet(pBlock->pDataBlock, cols++);
colDataSetVal(pColInfo, numOfRows, (const char *)&pTask->id.taskId, false);
char idstr[128] = {0};
int32_t len = tintToHex(pTask->id.taskId, &idstr[4]);
idstr[2] = '0';
idstr[3] = 'x';
varDataSetLen(idstr, len + 2);
colDataSetVal(pColInfo, numOfRows, idstr, false);
// node type
char nodeType[20 + VARSTR_HEADER_SIZE] = {0};
varDataSetLen(nodeType, 5);
pColInfo = taosArrayGet(pBlock->pDataBlock, cols++);
if (pTask->nodeId > 0) {
if (pTask->info.nodeId > 0) {
memcpy(varDataVal(nodeType), "vnode", 5);
} else {
memcpy(varDataVal(nodeType), "snode", 5);
@ -1235,30 +1282,50 @@ static int32_t mndRetrieveStreamTask(SRpcMsg *pReq, SShowObj *pShow, SSDataBlock
// node id
pColInfo = taosArrayGet(pBlock->pDataBlock, cols++);
int64_t nodeId = TMAX(pTask->nodeId, 0);
int64_t nodeId = TMAX(pTask->info.nodeId, 0);
colDataSetVal(pColInfo, numOfRows, (const char *)&nodeId, false);
// level
char level[20 + VARSTR_HEADER_SIZE] = {0};
if (pTask->taskLevel == TASK_LEVEL__SOURCE) {
if (pTask->info.taskLevel == TASK_LEVEL__SOURCE) {
memcpy(varDataVal(level), "source", 6);
varDataSetLen(level, 6);
} else if (pTask->taskLevel == TASK_LEVEL__AGG) {
} else if (pTask->info.taskLevel == TASK_LEVEL__AGG) {
memcpy(varDataVal(level), "agg", 3);
varDataSetLen(level, 3);
} else if (pTask->taskLevel == TASK_LEVEL__SINK) {
} else if (pTask->info.taskLevel == TASK_LEVEL__SINK) {
memcpy(varDataVal(level), "sink", 4);
varDataSetLen(level, 4);
} else if (pTask->taskLevel == TASK_LEVEL__SINK) {
}
pColInfo = taosArrayGet(pBlock->pDataBlock, cols++);
colDataSetVal(pColInfo, numOfRows, (const char *)&level, false);
// status
char status[20 + VARSTR_HEADER_SIZE] = {0};
char status2[20] = {0};
strcpy(status, "normal");
STR_WITH_MAXSIZE_TO_VARSTR(status, status2, sizeof(status));
int8_t taskStatus = atomic_load_8(&pTask->status.taskStatus);
if (taskStatus == TASK_STATUS__NORMAL) {
memcpy(varDataVal(status), "normal", 6);
varDataSetLen(status, 6);
} else if (taskStatus == TASK_STATUS__DROPPING) {
memcpy(varDataVal(status), "dropping", 8);
varDataSetLen(status, 8);
} else if (taskStatus == TASK_STATUS__FAIL) {
memcpy(varDataVal(status), "fail", 4);
varDataSetLen(status, 4);
} else if (taskStatus == TASK_STATUS__STOP) {
memcpy(varDataVal(status), "stop", 4);
varDataSetLen(status, 4);
} else if (taskStatus == TASK_STATUS__SCAN_HISTORY) {
memcpy(varDataVal(status), "history", 7);
varDataSetLen(status, 7);
} else if (taskStatus == TASK_STATUS__HALT) {
memcpy(varDataVal(status), "halt", 4);
varDataSetLen(status, 4);
} else if (taskStatus == TASK_STATUS__PAUSE) {
memcpy(varDataVal(status), "pause", 5);
varDataSetLen(status, 5);
}
pColInfo = taosArrayGet(pBlock->pDataBlock, cols++);
colDataSetVal(pColInfo, numOfRows, (const char *)&status, false);
@ -1287,10 +1354,10 @@ static int32_t mndPauseStreamTask(STrans *pTrans, SStreamTask *pTask) {
terrno = TSDB_CODE_OUT_OF_MEMORY;
return -1;
}
pReq->head.vgId = htonl(pTask->nodeId);
pReq->head.vgId = htonl(pTask->info.nodeId);
pReq->taskId = pTask->id.taskId;
STransAction action = {0};
memcpy(&action.epSet, &pTask->epSet, sizeof(SEpSet));
memcpy(&action.epSet, &pTask->info.epSet, sizeof(SEpSet));
action.pCont = pReq;
action.contLen = sizeof(SVPauseStreamTaskReq);
action.msgType = TDMT_STREAM_TASK_PAUSE;
@ -1301,21 +1368,36 @@ static int32_t mndPauseStreamTask(STrans *pTrans, SStreamTask *pTask) {
return 0;
}
int32_t mndPauseAllStreamTasks(STrans *pTrans, SStreamObj *pStream) {
int32_t size = taosArrayGetSize(pStream->tasks);
int32_t mndPauseAllStreamTaskImpl(STrans *pTrans, SArray* tasks) {
int32_t size = taosArrayGetSize(tasks);
for (int32_t i = 0; i < size; i++) {
SArray *pTasks = taosArrayGetP(pStream->tasks, i);
SArray *pTasks = taosArrayGetP(tasks, i);
int32_t sz = taosArrayGetSize(pTasks);
for (int32_t j = 0; j < sz; j++) {
SStreamTask *pTask = taosArrayGetP(pTasks, j);
if (pTask->taskLevel != TASK_LEVEL__SINK && mndPauseStreamTask(pTrans, pTask) < 0) {
if (pTask->info.taskLevel != TASK_LEVEL__SINK && mndPauseStreamTask(pTrans, pTask) < 0) {
return -1;
}
if (atomic_load_8(&pTask->status.taskStatus) != TASK_STATUS__PAUSE) {
atomic_store_8(&pTask->status.keepTaskStatus, pTask->status.taskStatus);
atomic_store_8(&pTask->status.taskStatus, TASK_STATUS__PAUSE);
}
}
}
return 0;
}
int32_t mndPauseAllStreamTasks(STrans *pTrans, SStreamObj *pStream) {
int32_t code = mndPauseAllStreamTaskImpl(pTrans, pStream->tasks);
if (code != 0) {
return code;
}
// pStream->pHTasksList is null
// code = mndPauseAllStreamTaskImpl(pTrans, pStream->pHTasksList);
return code;
}
static int32_t mndPersistStreamLog(STrans *pTrans, const SStreamObj *pStream, int8_t status) {
SStreamObj streamObj = {0};
memcpy(streamObj.name, pStream->name, TSDB_STREAM_FNAME_LEN);
@ -1355,6 +1437,10 @@ static int32_t mndProcessPauseStreamReq(SRpcMsg *pReq) {
}
}
if (pStream->status == STREAM_STATUS__PAUSE) {
return 0;
}
if (mndCheckDbPrivilegeByName(pMnode, pReq->info.conn.user, MND_OPER_WRITE_DB, pStream->targetDb) != 0) {
sdbRelease(pMnode->pSdb, pStream);
return -1;
@ -1410,11 +1496,11 @@ static int32_t mndResumeStreamTask(STrans *pTrans, SStreamTask *pTask, int8_t ig
terrno = TSDB_CODE_OUT_OF_MEMORY;
return -1;
}
pReq->head.vgId = htonl(pTask->nodeId);
pReq->head.vgId = htonl(pTask->info.nodeId);
pReq->taskId = pTask->id.taskId;
pReq->igUntreated = igUntreated;
STransAction action = {0};
memcpy(&action.epSet, &pTask->epSet, sizeof(SEpSet));
memcpy(&action.epSet, &pTask->info.epSet, sizeof(SEpSet));
action.pCont = pReq;
action.contLen = sizeof(SVResumeStreamTaskReq);
action.msgType = TDMT_STREAM_TASK_RESUME;
@ -1432,11 +1518,16 @@ int32_t mndResumeAllStreamTasks(STrans *pTrans, SStreamObj *pStream, int8_t igUn
int32_t sz = taosArrayGetSize(pTasks);
for (int32_t j = 0; j < sz; j++) {
SStreamTask *pTask = taosArrayGetP(pTasks, j);
if (pTask->taskLevel != TASK_LEVEL__SINK && mndResumeStreamTask(pTrans, pTask, igUntreated) < 0) {
if (pTask->info.taskLevel != TASK_LEVEL__SINK && mndResumeStreamTask(pTrans, pTask, igUntreated) < 0) {
return -1;
}
if (atomic_load_8(&pTask->status.taskStatus) == TASK_STATUS__PAUSE) {
atomic_store_8(&pTask->status.taskStatus, pTask->status.keepTaskStatus);
}
}
}
// pStream->pHTasksList is null
return 0;
}
@ -1463,6 +1554,10 @@ static int32_t mndProcessResumeStreamReq(SRpcMsg *pReq) {
}
}
if (pStream->status != STREAM_STATUS__PAUSE) {
return 0;
}
if (mndCheckDbPrivilegeByName(pMnode, pReq->info.conn.user, MND_OPER_WRITE_DB, pStream->targetDb) != 0) {
sdbRelease(pMnode->pSdb, pStream);
return -1;

View File

@ -875,7 +875,7 @@ static int32_t mndRetrieveVgroups(SRpcMsg *pReq, SShowObj *pShow, SSDataBlock *p
// if (pDb == NULL || pDb->compactStartTime <= 0) {
// colDataSetNULL(pColInfo, numOfRows);
// } else {
// colDataAppend(pColInfo, numOfRows, (const char *)&pDb->compactStartTime, false);
// colDataSetVal(pColInfo, numOfRows, (const char *)&pDb->compactStartTime, false);
// }
numOfRows++;

View File

@ -52,23 +52,21 @@ void sndEnqueueStreamDispatch(SSnode *pSnode, SRpcMsg *pMsg) {
FAIL:
if (pMsg->info.handle == NULL) return;
SRpcMsg rsp = {
.code = code,
.info = pMsg->info,
};
SRpcMsg rsp = { .code = code, .info = pMsg->info};
tmsgSendRsp(&rsp);
rpcFreeCont(pMsg->pCont);
taosFreeQitem(pMsg);
}
int32_t sndExpandTask(SSnode *pSnode, SStreamTask *pTask, int64_t ver) {
ASSERT(pTask->taskLevel == TASK_LEVEL__AGG && taosArrayGetSize(pTask->childEpInfo) != 0);
ASSERT(pTask->info.taskLevel == TASK_LEVEL__AGG && taosArrayGetSize(pTask->pUpstreamEpInfoList) != 0);
pTask->refCnt = 1;
pTask->status.schedStatus = TASK_SCHED_STATUS__INACTIVE;
pTask->id.idStr = createStreamTaskIdStr(pTask->id.streamId, pTask->id.taskId);
pTask->inputQueue = streamQueueOpen(0);
pTask->outputQueue = streamQueueOpen(0);
pTask->status.schedStatus = TASK_SCHED_STATUS__INACTIVE;
pTask->inputQueue = streamQueueOpen(512 << 10);
pTask->outputQueue = streamQueueOpen(512 << 10);
if (pTask->inputQueue == NULL || pTask->outputQueue == NULL) {
return -1;
@ -85,14 +83,18 @@ int32_t sndExpandTask(SSnode *pSnode, SStreamTask *pTask, int64_t ver) {
return -1;
}
int32_t numOfChildEp = taosArrayGetSize(pTask->childEpInfo);
SReadHandle handle = { .vnode = NULL, .numOfVgroups = numOfChildEp, .pStateBackend = pTask->pState };
int32_t numOfChildEp = taosArrayGetSize(pTask->pUpstreamEpInfoList);
SReadHandle handle = { .vnode = NULL, .numOfVgroups = numOfChildEp, .pStateBackend = pTask->pState, .fillHistory = pTask->info.fillHistory };
initStreamStateAPI(&handle.api);
pTask->exec.pExecutor = qCreateStreamExecTaskInfo(pTask->exec.qmsg, &handle, 0);
ASSERT(pTask->exec.pExecutor);
streamSetupTrigger(pTask);
streamSetupScheduleTrigger(pTask);
qDebug("snode:%d expand stream task on snode, s-task:%s, checkpoint ver:%" PRId64 " child id:%d, level:%d", SNODE_HANDLE,
pTask->id.idStr, pTask->chkInfo.version, pTask->info.selfChildId, pTask->info.taskLevel);
return 0;
}
@ -149,9 +151,10 @@ int32_t sndProcessTaskDeployReq(SSnode *pSnode, char *msg, int32_t msgLen) {
taosMemoryFree(pTask);
return -1;
}
tDecoderClear(&decoder);
ASSERT(pTask->taskLevel == TASK_LEVEL__AGG);
ASSERT(pTask->info.taskLevel == TASK_LEVEL__AGG);
// 2.save task
taosWLockLatch(&pSnode->pMeta->lock);
@ -161,19 +164,20 @@ int32_t sndProcessTaskDeployReq(SSnode *pSnode, char *msg, int32_t msgLen) {
return -1;
}
int32_t numOfTasks = streamMetaGetNumOfTasks(pSnode->pMeta);
taosWUnLockLatch(&pSnode->pMeta->lock);
// 3.go through recover steps to fill history
if (pTask->fillHistory) {
streamSetParamForRecover(pTask);
streamAggRecoverPrepare(pTask);
}
streamPrepareNdoCheckDownstream(pTask);
qDebug("snode:%d s-task:%s is deployed on snode and add into meta, status:%s, numOfTasks:%d", SNODE_HANDLE, pTask->id.idStr,
streamGetTaskStatusStr(pTask->status.taskStatus), numOfTasks);
return 0;
}
int32_t sndProcessTaskDropReq(SSnode *pSnode, char *msg, int32_t msgLen) {
SVDropStreamTaskReq *pReq = (SVDropStreamTaskReq *)msg;
qDebug("snode:%d receive msg to drop stream task:0x%x", pSnode->pMeta->vgId, pReq->taskId);
streamMetaRemoveTask(pSnode->pMeta, pReq->taskId);
return 0;
}
@ -255,13 +259,15 @@ int32_t sndProcessTaskRetrieveRsp(SSnode *pSnode, SRpcMsg *pMsg) {
}
int32_t sndProcessWriteMsg(SSnode *pSnode, SRpcMsg *pMsg, SRpcMsg *pRsp) {
void *pReq = POINTER_SHIFT(pMsg->pCont, sizeof(SMsgHead));
int32_t len = pMsg->contLen - sizeof(SMsgHead);
switch (pMsg->msgType) {
case TDMT_STREAM_TASK_DEPLOY:
case TDMT_STREAM_TASK_DEPLOY: {
void *pReq = POINTER_SHIFT(pMsg->pCont, sizeof(SMsgHead));
int32_t len = pMsg->contLen - sizeof(SMsgHead);
return sndProcessTaskDeployReq(pSnode, pReq, len);
}
case TDMT_STREAM_TASK_DROP:
return sndProcessTaskDropReq(pSnode, pReq, len);
return sndProcessTaskDropReq(pSnode, pMsg->pCont, pMsg->contLen);
default:
ASSERT(0);
}
@ -277,7 +283,7 @@ int32_t sndProcessTaskRecoverFinishReq(SSnode *pSnode, SRpcMsg *pMsg) {
SDecoder decoder;
tDecoderInit(&decoder, msg, msgLen);
tDecodeSStreamRecoverFinishReq(&decoder, &req);
tDecodeStreamRecoverFinishReq(&decoder, &req);
tDecoderClear(&decoder);
// find task
@ -286,7 +292,7 @@ int32_t sndProcessTaskRecoverFinishReq(SSnode *pSnode, SRpcMsg *pMsg) {
return -1;
}
// do process request
if (streamProcessRecoverFinishReq(pTask, req.childId) < 0) {
if (streamProcessRecoverFinishReq(pTask, req.taskId, req.childId) < 0) {
streamMetaReleaseTask(pSnode->pMeta, pTask);
return -1;
}
@ -300,6 +306,102 @@ int32_t sndProcessTaskRecoverFinishRsp(SSnode *pSnode, SRpcMsg *pMsg) {
return 0;
}
int32_t sndProcessStreamTaskCheckReq(SSnode *pSnode, SRpcMsg *pMsg) {
char *msgStr = pMsg->pCont;
char *msgBody = POINTER_SHIFT(msgStr, sizeof(SMsgHead));
int32_t msgLen = pMsg->contLen - sizeof(SMsgHead);
SStreamTaskCheckReq req;
SDecoder decoder;
tDecoderInit(&decoder, (uint8_t *)msgBody, msgLen);
tDecodeStreamTaskCheckReq(&decoder, &req);
tDecoderClear(&decoder);
int32_t taskId = req.downstreamTaskId;
SStreamTaskCheckRsp rsp = {
.reqId = req.reqId,
.streamId = req.streamId,
.childId = req.childId,
.downstreamNodeId = req.downstreamNodeId,
.downstreamTaskId = req.downstreamTaskId,
.upstreamNodeId = req.upstreamNodeId,
.upstreamTaskId = req.upstreamTaskId,
};
SStreamTask *pTask = streamMetaAcquireTask(pSnode->pMeta, taskId);
if (pTask != NULL) {
rsp.status = streamTaskCheckStatus(pTask);
streamMetaReleaseTask(pSnode->pMeta, pTask);
qDebug("s-task:%s recv task check req(reqId:0x%" PRIx64 ") task:0x%x (vgId:%d), status:%s, rsp status %d",
pTask->id.idStr, rsp.reqId, rsp.upstreamTaskId, rsp.upstreamNodeId,
streamGetTaskStatusStr(pTask->status.taskStatus), rsp.status);
} else {
rsp.status = 0;
qDebug("tq recv task check(taskId:0x%x not built yet) req(reqId:0x%" PRIx64
") from task:0x%x (vgId:%d), rsp status %d",
taskId, rsp.reqId, rsp.upstreamTaskId, rsp.upstreamNodeId, rsp.status);
}
SEncoder encoder;
int32_t code;
int32_t len;
tEncodeSize(tEncodeStreamTaskCheckRsp, &rsp, len, code);
if (code < 0) {
qError("vgId:%d failed to encode task check rsp, task:0x%x", pSnode->pMeta->vgId, taskId);
return -1;
}
void *buf = rpcMallocCont(sizeof(SMsgHead) + len);
((SMsgHead *)buf)->vgId = htonl(req.upstreamNodeId);
void *abuf = POINTER_SHIFT(buf, sizeof(SMsgHead));
tEncoderInit(&encoder, (uint8_t *)abuf, len);
tEncodeStreamTaskCheckRsp(&encoder, &rsp);
tEncoderClear(&encoder);
SRpcMsg rspMsg = {.code = 0, .pCont = buf, .contLen = sizeof(SMsgHead) + len, .info = pMsg->info};
tmsgSendRsp(&rspMsg);
return 0;
}
int32_t sndProcessStreamTaskCheckRsp(SSnode* pSnode, SRpcMsg* pMsg) {
char* pReq = POINTER_SHIFT(pMsg->pCont, sizeof(SMsgHead));
int32_t len = pMsg->contLen - sizeof(SMsgHead);
int32_t code;
SStreamTaskCheckRsp rsp;
SDecoder decoder;
tDecoderInit(&decoder, (uint8_t*)pReq, len);
code = tDecodeStreamTaskCheckRsp(&decoder, &rsp);
if (code < 0) {
tDecoderClear(&decoder);
return -1;
}
tDecoderClear(&decoder);
qDebug("tq task:0x%x (vgId:%d) recv check rsp(reqId:0x%" PRIx64 ") from 0x%x (vgId:%d) status %d",
rsp.upstreamTaskId, rsp.upstreamNodeId, rsp.reqId, rsp.downstreamTaskId, rsp.downstreamNodeId, rsp.status);
SStreamTask* pTask = streamMetaAcquireTask(pSnode->pMeta, rsp.upstreamTaskId);
if (pTask == NULL) {
qError("tq failed to locate the stream task:0x%x (vgId:%d), it may have been destroyed", rsp.upstreamTaskId,
pSnode->pMeta->vgId);
return -1;
}
code = streamProcessCheckRsp(pTask, &rsp);
streamMetaReleaseTask(pSnode->pMeta, pTask);
return code;
}
int32_t sndProcessStreamMsg(SSnode *pSnode, SRpcMsg *pMsg) {
switch (pMsg->msgType) {
case TDMT_STREAM_TASK_RUN:
@ -312,10 +414,14 @@ int32_t sndProcessStreamMsg(SSnode *pSnode, SRpcMsg *pMsg) {
return sndProcessTaskRetrieveReq(pSnode, pMsg);
case TDMT_STREAM_RETRIEVE_RSP:
return sndProcessTaskRetrieveRsp(pSnode, pMsg);
case TDMT_STREAM_RECOVER_FINISH:
case TDMT_STREAM_SCAN_HISTORY_FINISH:
return sndProcessTaskRecoverFinishReq(pSnode, pMsg);
case TDMT_STREAM_RECOVER_FINISH_RSP:
case TDMT_STREAM_SCAN_HISTORY_FINISH_RSP:
return sndProcessTaskRecoverFinishRsp(pSnode, pMsg);
case TDMT_STREAM_TASK_CHECK:
return sndProcessStreamTaskCheckReq(pSnode, pMsg);
case TDMT_STREAM_TASK_CHECK_RSP:
return sndProcessStreamTaskCheckRsp(pSnode, pMsg);
default:
ASSERT(0);
}

View File

@ -101,6 +101,7 @@ void initStateStoreAPI(SStateStore* pStore) {
pStore->streamStateCommit = streamStateCommit;
pStore->streamStateDestroy= streamStateDestroy;
pStore->streamStateDeleteCheckPoint = streamStateDeleteCheckPoint;
pStore->streamStateReloadInfo = streamStateReloadInfo;
}
void initFunctionStateStore(SFunctionStateStore* pStore) {

View File

@ -96,6 +96,7 @@ int32_t vnodeProcessWriteMsg(SVnode *pVnode, SRpcMsg *pMsg, int64_t version, SRp
int32_t vnodeProcessSyncMsg(SVnode *pVnode, SRpcMsg *pMsg, SRpcMsg **pRsp);
int32_t vnodeProcessQueryMsg(SVnode *pVnode, SRpcMsg *pMsg);
int32_t vnodeProcessFetchMsg(SVnode *pVnode, SRpcMsg *pMsg, SQueueInfo *pInfo);
int32_t vnodeProcessStreamMsg(SVnode *pVnode, SRpcMsg *pMsg, SQueueInfo *pInfo);
void vnodeProposeWriteMsg(SQueueInfo *pInfo, STaosQall *qall, int32_t numOfMsgs);
void vnodeApplyWriteMsg(SQueueInfo *pInfo, STaosQall *qall, int32_t numOfMsgs);
void vnodeProposeCommitOnNeed(SVnode *pVnode, bool atExit);
@ -126,8 +127,6 @@ int32_t metaGetCachedTbGroup(void *pVnode, tb_uid_t suid, const uint8_t *pKey,
int32_t metaPutTbGroupToCache(void* pVnode, uint64_t suid, const void *pKey, int32_t keyLen, void *pPayload,
int32_t payloadLen);
int64_t metaGetTbNum(SMeta *pMeta);
int32_t metaGetStbStats(void *pVnode, int64_t uid, int64_t *numOfTables);
// tsdb
@ -256,7 +255,7 @@ int32_t vnodeSnapWriterOpen(SVnode *pVnode, int64_t sver, int64_t ever, SVSnapWr
int32_t vnodeSnapWriterClose(SVSnapWriter *pWriter, int8_t rollback, SSnapshot *pSnapshot);
int32_t vnodeSnapWrite(SVSnapWriter *pWriter, uint8_t *pData, uint32_t nData);
int32_t buildSnapContext(SVnode *pVnode, int64_t snapVersion, int64_t suid, int8_t subType, bool withMeta,
int32_t buildSnapContext(SVnode *pVnode, int64_t snapVersion, int64_t suid, int8_t subType, int8_t withMeta,
SSnapContext **ctxRet);
int32_t getTableInfoFromSnapshot(SSnapContext *ctx, void **pBuf, int32_t *contLen, int16_t *type, int64_t *uid);
SMetaTableInfo getMetaTableInfoFromSnapshot(SSnapContext *ctx);

View File

@ -45,27 +45,10 @@ extern "C" {
typedef struct STqOffsetStore STqOffsetStore;
// tqPush
// typedef struct {
// // msg info
// int64_t consumerId;
// int64_t reqOffset;
// int64_t processedVer;
// int32_t epoch;
// // rpc info
// int64_t reqId;
// SRpcHandleInfo rpcInfo;
// tmr_h timerId;
// int8_t tmrStopped;
// // exec
// int8_t inputStatus;
// int8_t execStatus;
// SStreamQueue inputQ;
// SRWLatch lock;
// } STqPushHandle;
#define EXTRACT_DATA_FROM_WAL_ID (-1)
#define STREAM_TASK_STATUS_CHECK_ID (-2)
// tqExec
typedef struct {
char* qmsg; // SubPlanToString
} STqExecCol;
@ -184,10 +167,10 @@ int32_t tqOffsetRestoreFromFile(STqOffsetStore* pStore, const char* fname);
// tqStream
int32_t tqExpandTask(STQ* pTq, SStreamTask* pTask, int64_t ver);
int32_t tqStreamTasksScanWal(STQ* pTq);
int32_t tqStreamTasksStatusCheck(STQ* pTq);
// tq util
int32_t extractDelDataBlock(const void* pData, int32_t len, int64_t ver, SStreamRefDataBlock** pRefBlock);
char* createStreamTaskIdStr(int64_t streamId, int32_t taskId);
int32_t tqAddInputBlockNLaunchTask(SStreamTask* pTask, SStreamQueueItem* pQueueItem);
int32_t tqExtractDataForMq(STQ* pTq, STqHandle* pHandle, const SMqPollReq* pRequest, SRpcMsg* pMsg);
int32_t tqDoSendDataRsp(const SRpcHandleInfo* pRpcHandleInfo, const SMqDataRsp* pRsp, int32_t epoch, int64_t consumerId,

View File

@ -67,7 +67,6 @@ typedef struct STsdbReadSnap STsdbReadSnap;
typedef struct SBlockInfo SBlockInfo;
typedef struct SSmaInfo SSmaInfo;
typedef struct SBlockCol SBlockCol;
typedef struct SVersionRange SVersionRange;
typedef struct SLDataIter SLDataIter;
typedef struct SDiskCol SDiskCol;
typedef struct SDiskData SDiskData;
@ -390,11 +389,6 @@ struct TSDBKEY {
TSKEY ts;
};
struct SVersionRange {
uint64_t minVer;
uint64_t maxVer;
};
typedef struct SMemSkipListNode SMemSkipListNode;
struct SMemSkipListNode {
int8_t level;

View File

@ -178,7 +178,8 @@ SArray* metaGetSmaTbUids(SMeta* pMeta);
void* metaGetIdx(SMeta* pMeta);
void* metaGetIvtIdx(SMeta* pMeta);
void metaReaderInit(SMetaReader* pReader, SMeta* pMeta, int32_t flags);
int64_t metaGetTbNum(SMeta *pMeta);
void metaReaderDoInit(SMetaReader *pReader, SMeta *pMeta, int32_t flags);
int32_t metaCreateTSma(SMeta* pMeta, int64_t version, SSmaCfg* pCfg);
int32_t metaDropTSma(SMeta* pMeta, int64_t indexUid);
@ -217,6 +218,7 @@ int tqPushMsg(STQ*, void* msg, int32_t msgLen, tmsg_t msgType, int64_t ver);
int tqRegisterPushHandle(STQ* pTq, void* handle, SRpcMsg* pMsg);
int tqUnregisterPushHandle(STQ* pTq, void* pHandle);
int tqStartStreamTasks(STQ* pTq); // restore all stream tasks after vnode launching completed.
int tqCheckStreamStatus(STQ* pTq);
int tqCommit(STQ*);
int32_t tqUpdateTbUidList(STQ* pTq, const SArray* tbUidList, bool isAdd);
@ -238,14 +240,14 @@ int32_t tqProcessTaskDropReq(STQ* pTq, int64_t version, char* msg, int32_t msgLe
int32_t tqProcessTaskPauseReq(STQ* pTq, int64_t version, char* msg, int32_t msgLen);
int32_t tqProcessTaskResumeReq(STQ* pTq, int64_t version, char* msg, int32_t msgLen);
int32_t tqProcessStreamTaskCheckReq(STQ* pTq, SRpcMsg* pMsg);
int32_t tqProcessStreamTaskCheckRsp(STQ* pTq, int64_t version, char* msg, int32_t msgLen);
int32_t tqProcessStreamTaskCheckRsp(STQ* pTq, int64_t version, SRpcMsg* pMsg);
int32_t tqProcessTaskRunReq(STQ* pTq, SRpcMsg* pMsg);
int32_t tqProcessTaskDispatchReq(STQ* pTq, SRpcMsg* pMsg, bool exec);
int32_t tqProcessTaskDispatchRsp(STQ* pTq, SRpcMsg* pMsg);
int32_t tqProcessTaskRetrieveReq(STQ* pTq, SRpcMsg* pMsg);
int32_t tqProcessTaskRetrieveRsp(STQ* pTq, SRpcMsg* pMsg);
int32_t tqProcessTaskRecover1Req(STQ* pTq, SRpcMsg* pMsg);
int32_t tqProcessTaskRecover2Req(STQ* pTq, int64_t version, char* msg, int32_t msgLen);
int32_t tqProcessTaskScanHistory(STQ* pTq, SRpcMsg* pMsg);
int32_t tqProcessTaskTransferStateReq(STQ* pTq, int64_t version, char* msg, int32_t msgLen);
int32_t tqProcessTaskRecoverFinishReq(STQ* pTq, SRpcMsg* pMsg);
int32_t tqProcessTaskRecoverFinishRsp(STQ* pTq, SRpcMsg* pMsg);
int32_t tqCheckLogInWal(STQ* pTq, int64_t version);

View File

@ -17,13 +17,13 @@
#include "osMemory.h"
#include "tencode.h"
void _metaReaderInit(SMetaReader *pReader, void *pVnode, int32_t flags, SStoreMeta *pAPI) {
SMeta *pMeta = ((SVnode *)pVnode)->pMeta;
metaReaderInit(pReader, pMeta, flags);
void _metaReaderInit(SMetaReader* pReader, void* pVnode, int32_t flags, SStoreMeta* pAPI) {
SMeta* pMeta = ((SVnode*)pVnode)->pMeta;
metaReaderDoInit(pReader, pMeta, flags);
pReader->pAPI = pAPI;
}
void metaReaderInit(SMetaReader *pReader, SMeta *pMeta, int32_t flags) {
void metaReaderDoInit(SMetaReader *pReader, SMeta *pMeta, int32_t flags) {
memset(pReader, 0, sizeof(*pReader));
pReader->pMeta = pMeta;
pReader->flags = flags;
@ -143,7 +143,7 @@ tb_uid_t metaGetTableEntryUidByName(SMeta *pMeta, const char *name) {
int metaGetTableNameByUid(void *pVnode, uint64_t uid, char *tbName) {
int code = 0;
SMetaReader mr = {0};
metaReaderInit(&mr, ((SVnode *)pVnode)->pMeta, 0);
metaReaderDoInit(&mr, ((SVnode*)pVnode)->pMeta, 0);
code = metaReaderGetTableEntryByUid(&mr, uid);
if (code < 0) {
metaReaderClear(&mr);
@ -159,7 +159,7 @@ int metaGetTableNameByUid(void *pVnode, uint64_t uid, char *tbName) {
int metaGetTableSzNameByUid(void *meta, uint64_t uid, char *tbName) {
int code = 0;
SMetaReader mr = {0};
metaReaderInit(&mr, (SMeta *)meta, 0);
metaReaderDoInit(&mr, (SMeta *)meta, 0);
code = metaReaderGetTableEntryByUid(&mr, uid);
if (code < 0) {
metaReaderClear(&mr);
@ -174,7 +174,7 @@ int metaGetTableSzNameByUid(void *meta, uint64_t uid, char *tbName) {
int metaGetTableUidByName(void *pVnode, char *tbName, uint64_t *uid) {
int code = 0;
SMetaReader mr = {0};
metaReaderInit(&mr, ((SVnode *)pVnode)->pMeta, 0);
metaReaderDoInit(&mr, ((SVnode *)pVnode)->pMeta, 0);
SMetaReader *pReader = &mr;
@ -195,7 +195,7 @@ int metaGetTableUidByName(void *pVnode, char *tbName, uint64_t *uid) {
int metaGetTableTypeByName(void *pVnode, char *tbName, ETableType *tbType) {
int code = 0;
SMetaReader mr = {0};
metaReaderInit(&mr, ((SVnode *)pVnode)->pMeta, 0);
metaReaderDoInit(&mr, ((SVnode*)pVnode)->pMeta, 0);
code = metaGetTableEntryByName(&mr, tbName);
if (code == 0) *tbType = mr.me.type;
@ -215,7 +215,7 @@ int metaReadNext(SMetaReader *pReader) {
int metaGetTableTtlByUid(void *meta, uint64_t uid, int64_t *ttlDays) {
int code = -1;
SMetaReader mr = {0};
metaReaderInit(&mr, (SMeta *)meta, 0);
metaReaderDoInit(&mr, (SMeta *)meta, 0);
code = metaReaderGetTableEntryByUid(&mr, uid);
if (code < 0) {
goto _exit;
@ -244,9 +244,7 @@ SMTbCursor *metaOpenTbCursor(void *pVnode) {
return NULL;
}
SVnode *pVnodeObj = pVnode;
// metaReaderInit(&pTbCur->mr, pVnodeObj->pMeta, 0);
SVnode* pVnodeObj = pVnode;
// tdbTbcMoveToFirst((TBC *)pTbCur->pDbc);
pTbCur->pMeta = pVnodeObj->pMeta;
pTbCur->paused = 1;
@ -277,7 +275,7 @@ void metaPauseTbCursor(SMTbCursor *pTbCur) {
}
void metaResumeTbCursor(SMTbCursor *pTbCur, int8_t first) {
if (pTbCur->paused) {
metaReaderInit(&pTbCur->mr, pTbCur->pMeta, 0);
metaReaderDoInit(&pTbCur->mr, pTbCur->pMeta, 0);
tdbTbcOpen(((SMeta *)pTbCur->pMeta)->pUidIdx, (TBC **)&pTbCur->pDbc, NULL);
@ -784,7 +782,7 @@ STSmaWrapper *metaGetSmaInfoByTable(SMeta *pMeta, tb_uid_t uid, bool deepCopy) {
}
SMetaReader mr = {0};
metaReaderInit(&mr, pMeta, 0);
metaReaderDoInit(&mr, pMeta, 0);
int64_t smaId;
int smaIdx = 0;
STSma *pTSma = NULL;
@ -839,7 +837,7 @@ _err:
STSma *metaGetSmaInfoByIndex(SMeta *pMeta, int64_t indexUid) {
STSma *pTSma = NULL;
SMetaReader mr = {0};
metaReaderInit(&mr, pMeta, 0);
metaReaderDoInit(&mr, pMeta, 0);
if (metaReaderGetTableEntryByUid(&mr, indexUid) < 0) {
metaWarn("vgId:%d, failed to get table entry for smaId:%" PRIi64, TD_VID(pMeta->pVnode), indexUid);
metaReaderClear(&mr);

View File

@ -37,7 +37,7 @@ int32_t metaCreateTSma(SMeta *pMeta, int64_t version, SSmaCfg *pCfg) {
// validate req
// save smaIndex
metaReaderInit(&mr, pMeta, 0);
metaReaderDoInit(&mr, pMeta, 0);
if (metaReaderGetTableEntryByUidCache(&mr, pCfg->indexUid) == 0) {
#if 1
terrno = TSDB_CODE_TSMA_ALREADY_EXIST;

View File

@ -260,7 +260,7 @@ static void saveSuperTableInfoForChildTable(SMetaEntry* me, SHashObj* suidInfo)
taosHashPut(suidInfo, &me->uid, sizeof(tb_uid_t), &dataTmp, sizeof(STableInfoForChildTable));
}
int32_t buildSnapContext(SVnode* pVnode, int64_t snapVersion, int64_t suid, int8_t subType, bool withMeta,
int32_t buildSnapContext(SVnode* pVnode, int64_t snapVersion, int64_t suid, int8_t subType, int8_t withMeta,
SSnapContext** ctxRet) {
SSnapContext* ctx = taosMemoryCalloc(1, sizeof(SSnapContext));
if (ctx == NULL) return -1;
@ -476,7 +476,7 @@ int32_t getTableInfoFromSnapshot(SSnapContext* ctx, void** pBuf, int32_t* contLe
if (ctx->index >= taosArrayGetSize(ctx->idList)) {
metaDebug("tmqsnap get meta end");
ctx->index = 0;
ctx->queryMeta = false; // change to get data
ctx->queryMeta = 0; // change to get data
return 0;
}

View File

@ -709,7 +709,7 @@ int metaCreateTable(SMeta *pMeta, int64_t ver, SVCreateTbReq *pReq, STableMetaRs
}
// validate req
metaReaderInit(&mr, pMeta, 0);
metaReaderDoInit(&mr, pMeta, 0);
if (metaGetTableEntryByName(&mr, pReq->name) == 0) {
if (pReq->type == TSDB_CHILD_TABLE && pReq->ctb.suid != mr.me.ctbEntry.suid) {
terrno = TSDB_CODE_TDB_TABLE_IN_OTHER_STABLE;

View File

@ -896,7 +896,7 @@ static int32_t tdRSmaInfoClone(SSma *pSma, SRSmaInfo *pInfo) {
return TSDB_CODE_SUCCESS;
}
metaReaderInit(&mr, SMA_META(pSma), 0);
metaReaderDoInit(&mr, SMA_META(pSma), 0);
smaDebug("vgId:%d, rsma clone qTaskInfo for suid:%" PRIi64, SMA_VID(pSma), pInfo->suid);
if (metaReaderGetTableEntryByUidCache(&mr, pInfo->suid) < 0) {
code = terrno;
@ -1116,7 +1116,7 @@ static int32_t tdRSmaRestoreQTaskInfoInit(SSma *pSma, int64_t *nTables) {
}
int64_t nRsmaTables = 0;
metaReaderInit(&mr, SMA_META(pSma), 0);
metaReaderDoInit(&mr, SMA_META(pSma), 0);
if (!(uidStore.tbUids = taosArrayInit(1024, sizeof(tb_uid_t)))) {
code = TSDB_CODE_OUT_OF_MEMORY;
TSDB_CHECK_CODE(code, lino, _exit);

View File

@ -18,8 +18,6 @@
// 0: not init
// 1: already inited
// 2: wait to be inited or cleaup
#define WAL_READ_TASKS_ID (-1)
static int32_t tqInitialize(STQ* pTq);
static FORCE_INLINE bool tqIsHandleExec(STqHandle* pHandle) { return TMQ_HANDLE_STATUS_EXEC == pHandle->status; }
@ -158,6 +156,29 @@ void tqClose(STQ* pTq) {
taosMemoryFree(pTq);
}
static bool hasStreamTaskInTimer(SStreamMeta* pMeta) {
bool inTimer = false;
taosWLockLatch(&pMeta->lock);
void* pIter = NULL;
while(1) {
pIter = taosHashIterate(pMeta->pTasks, pIter);
if (pIter == NULL) {
break;
}
SStreamTask* pTask = *(SStreamTask**)pIter;
if (pTask->status.timerActive == 1) {
inTimer = true;
}
}
taosWUnLockLatch(&pMeta->lock);
return inTimer;
}
void tqNotifyClose(STQ* pTq) {
if (pTq != NULL) {
taosWLockLatch(&pTq->pStreamMeta->lock);
@ -170,77 +191,90 @@ void tqNotifyClose(STQ* pTq) {
}
SStreamTask* pTask = *(SStreamTask**)pIter;
tqDebug("vgId:%d s-task:%s set dropping flag", pTq->pStreamMeta->vgId, pTask->id.idStr);
tqDebug("vgId:%d s-task:%s set closing flag", pTq->pStreamMeta->vgId, pTask->id.idStr);
pTask->status.taskStatus = TASK_STATUS__STOP;
int64_t st = taosGetTimestampMs();
qKillTask(pTask->exec.pExecutor, TSDB_CODE_SUCCESS);
int64_t el = taosGetTimestampMs() - st;
tqDebug("vgId:%d s-task:%s is closed in %" PRId64 " ms", pTq->pStreamMeta->vgId, pTask->id.idStr, el);
}
taosWUnLockLatch(&pTq->pStreamMeta->lock);
tqDebug("vgId:%d start to check all tasks", pTq->pStreamMeta->vgId);
int64_t st = taosGetTimestampMs();
while(hasStreamTaskInTimer(pTq->pStreamMeta)) {
tqDebug("vgId:%d some tasks in timer, wait for 100ms and recheck", pTq->pStreamMeta->vgId);
taosMsleep(100);
}
int64_t el = taosGetTimestampMs() - st;
tqDebug("vgId:%d all stream tasks are not in timer, continue close, elapsed time:%"PRId64" ms", pTq->pStreamMeta->vgId, el);
}
}
static int32_t doSendDataRsp(const SRpcHandleInfo* pRpcHandleInfo, const SMqDataRsp* pRsp, int32_t epoch,
int64_t consumerId, int32_t type) {
int32_t len = 0;
int32_t code = 0;
if (type == TMQ_MSG_TYPE__POLL_RSP) {
tEncodeSize(tEncodeMqDataRsp, pRsp, len, code);
} else if (type == TMQ_MSG_TYPE__TAOSX_RSP) {
tEncodeSize(tEncodeSTaosxRsp, (STaosxRsp*)pRsp, len, code);
}
if (code < 0) {
return -1;
}
int32_t tlen = sizeof(SMqRspHead) + len;
void* buf = rpcMallocCont(tlen);
if (buf == NULL) {
return -1;
}
((SMqRspHead*)buf)->mqMsgType = type;
((SMqRspHead*)buf)->epoch = epoch;
((SMqRspHead*)buf)->consumerId = consumerId;
void* abuf = POINTER_SHIFT(buf, sizeof(SMqRspHead));
SEncoder encoder = {0};
tEncoderInit(&encoder, abuf, len);
if (type == TMQ_MSG_TYPE__POLL_RSP) {
tEncodeMqDataRsp(&encoder, pRsp);
} else if (type == TMQ_MSG_TYPE__TAOSX_RSP) {
tEncodeSTaosxRsp(&encoder, (STaosxRsp*)pRsp);
}
tEncoderClear(&encoder);
SRpcMsg rsp = {
.info = *pRpcHandleInfo,
.pCont = buf,
.contLen = tlen,
.code = 0,
};
tmsgSendRsp(&rsp);
return 0;
}
//static int32_t doSendDataRsp(const SRpcHandleInfo* pRpcHandleInfo, const SMqDataRsp* pRsp, int32_t epoch,
// int64_t consumerId, int32_t type) {
// int32_t len = 0;
// int32_t code = 0;
//
// if (type == TMQ_MSG_TYPE__POLL_DATA_RSP) {
// tEncodeSize(tEncodeMqDataRsp, pRsp, len, code);
// } else if (type == TMQ_MSG_TYPE__POLL_DATA_META_RSP) {
// tEncodeSize(tEncodeSTaosxRsp, (STaosxRsp*)pRsp, len, code);
// }
//
// if (code < 0) {
// return -1;
// }
//
// int32_t tlen = sizeof(SMqRspHead) + len;
// void* buf = rpcMallocCont(tlen);
// if (buf == NULL) {
// return -1;
// }
//
// ((SMqRspHead*)buf)->mqMsgType = type;
// ((SMqRspHead*)buf)->epoch = epoch;
// ((SMqRspHead*)buf)->consumerId = consumerId;
//
// void* abuf = POINTER_SHIFT(buf, sizeof(SMqRspHead));
//
// SEncoder encoder = {0};
// tEncoderInit(&encoder, abuf, len);
//
// if (type == TMQ_MSG_TYPE__POLL_DATA_RSP) {
// tEncodeMqDataRsp(&encoder, pRsp);
// } else if (type == TMQ_MSG_TYPE__POLL_DATA_META_RSP) {
// tEncodeSTaosxRsp(&encoder, (STaosxRsp*)pRsp);
// }
//
// tEncoderClear(&encoder);
//
// SRpcMsg rsp = {
// .info = *pRpcHandleInfo,
// .pCont = buf,
// .contLen = tlen,
// .code = 0,
// };
//
// tmsgSendRsp(&rsp);
// return 0;
//}
int32_t tqPushDataRsp(STqHandle* pHandle, int32_t vgId) {
SMqDataRsp dataRsp = {0};
dataRsp.head.consumerId = pHandle->consumerId;
dataRsp.head.epoch = pHandle->epoch;
dataRsp.head.mqMsgType = TMQ_MSG_TYPE__POLL_RSP;
dataRsp.head.mqMsgType = TMQ_MSG_TYPE__POLL_DATA_RSP;
int64_t sver = 0, ever = 0;
walReaderValidVersionRange(pHandle->execHandle.pTqReader->pWalReader, &sver, &ever);
tqDoSendDataRsp(&pHandle->msg->info, &dataRsp, pHandle->epoch, pHandle->consumerId, TMQ_MSG_TYPE__POLL_RSP, sver,
tqDoSendDataRsp(&pHandle->msg->info, &dataRsp, pHandle->epoch, pHandle->consumerId, TMQ_MSG_TYPE__POLL_DATA_RSP, sver,
ever);
char buf1[TSDB_OFFSET_LEN] = {0};
@ -771,19 +805,32 @@ int32_t tqExpandTask(STQ* pTq, SStreamTask* pTask, int64_t ver) {
pTask->outputStatus = TASK_OUTPUT_STATUS__NORMAL;
pTask->pMsgCb = &pTq->pVnode->msgCb;
pTask->pMeta = pTq->pStreamMeta;
pTask->chkInfo.version = ver;
pTask->chkInfo.currentVer = ver;
// expand executor
pTask->status.taskStatus = (pTask->fillHistory) ? TASK_STATUS__WAIT_DOWNSTREAM : TASK_STATUS__NORMAL;
pTask->dataRange.range.maxVer = ver;
pTask->dataRange.range.minVer = ver;
if (pTask->taskLevel == TASK_LEVEL__SOURCE) {
pTask->pState = streamStateOpen(pTq->pStreamMeta->path, pTask, false, -1, -1);
if (pTask->info.taskLevel == TASK_LEVEL__SOURCE) {
SStreamTask* pSateTask = pTask;
SStreamTask task = {0};
if (pTask->info.fillHistory) {
task.id = pTask->streamTaskId;
task.pMeta = pTask->pMeta;
pSateTask = &task;
}
pTask->pState = streamStateOpen(pTq->pStreamMeta->path, pSateTask, false, -1, -1);
if (pTask->pState == NULL) {
return -1;
}
SReadHandle handle = {.vnode = pTq->pVnode, .initTqReader = 1, .pStateBackend = pTask->pState};
SReadHandle handle = {.vnode = pTq->pVnode,
.initTqReader = 1,
.pStateBackend = pTask->pState,
.fillHistory = pTask->info.fillHistory,
.winRange = pTask->dataRange.window};
initStorageAPI(&handle.api);
pTask->exec.pExecutor = qCreateStreamExecTaskInfo(pTask->exec.qmsg, &handle, vgId);
@ -792,14 +839,25 @@ int32_t tqExpandTask(STQ* pTq, SStreamTask* pTask, int64_t ver) {
}
qSetTaskId(pTask->exec.pExecutor, pTask->id.taskId, pTask->id.streamId);
} else if (pTask->taskLevel == TASK_LEVEL__AGG) {
pTask->pState = streamStateOpen(pTq->pStreamMeta->path, pTask, false, -1, -1);
} else if (pTask->info.taskLevel == TASK_LEVEL__AGG) {
SStreamTask* pSateTask = pTask;
SStreamTask task = {0};
if (pTask->info.fillHistory) {
task.id = pTask->streamTaskId;
task.pMeta = pTask->pMeta;
pSateTask = &task;
}
pTask->pState = streamStateOpen(pTq->pStreamMeta->path, pSateTask, false, -1, -1);
if (pTask->pState == NULL) {
return -1;
}
int32_t numOfVgroups = (int32_t)taosArrayGetSize(pTask->childEpInfo);
SReadHandle handle = {.vnode = NULL, .numOfVgroups = numOfVgroups, .pStateBackend = pTask->pState};
int32_t numOfVgroups = (int32_t)taosArrayGetSize(pTask->pUpstreamEpInfoList);
SReadHandle handle = {.vnode = NULL,
.numOfVgroups = numOfVgroups,
.pStateBackend = pTask->pState,
.fillHistory = pTask->info.fillHistory,
.winRange = pTask->dataRange.window};
initStorageAPI(&handle.api);
pTask->exec.pExecutor = qCreateStreamExecTaskInfo(pTask->exec.qmsg, &handle, vgId);
@ -834,15 +892,17 @@ int32_t tqExpandTask(STQ* pTq, SStreamTask* pTask, int64_t ver) {
tSimpleHashSetFreeFp(pTask->tbSink.pTblInfo, freePtr);
}
if (pTask->taskLevel == TASK_LEVEL__SOURCE) {
if (pTask->info.taskLevel == TASK_LEVEL__SOURCE) {
SWalFilterCond cond = {.deleteMsg = 1}; // delete msg also extract from wal files
pTask->exec.pWalReader = walOpenReader(pTq->pVnode->pWal, &cond);
}
streamSetupTrigger(pTask);
streamSetupScheduleTrigger(pTask);
tqInfo("vgId:%d expand stream task, s-task:%s, checkpoint ver:%" PRId64 " child id:%d, level:%d", vgId,
pTask->id.idStr, pTask->chkInfo.version, pTask->selfChildId, pTask->taskLevel);
tqInfo("vgId:%d expand stream task, s-task:%s, checkpoint ver:%" PRId64
" child id:%d, level:%d, scan-history:%d, trigger:%" PRId64 " ms",
vgId, pTask->id.idStr, pTask->chkInfo.version, pTask->info.selfChildId, pTask->info.taskLevel,
pTask->info.fillHistory, pTask->triggerParam);
// next valid version will add one
pTask->chkInfo.version += 1;
@ -858,10 +918,11 @@ int32_t tqProcessStreamTaskCheckReq(STQ* pTq, SRpcMsg* pMsg) {
SDecoder decoder;
tDecoderInit(&decoder, (uint8_t*)msgBody, msgLen);
tDecodeSStreamTaskCheckReq(&decoder, &req);
tDecodeStreamTaskCheckReq(&decoder, &req);
tDecoderClear(&decoder);
int32_t taskId = req.downstreamTaskId;
int32_t taskId = req.downstreamTaskId;
SStreamTaskCheckRsp rsp = {
.reqId = req.reqId,
.streamId = req.streamId,
@ -878,23 +939,20 @@ int32_t tqProcessStreamTaskCheckReq(STQ* pTq, SRpcMsg* pMsg) {
rsp.status = streamTaskCheckStatus(pTask);
streamMetaReleaseTask(pTq->pStreamMeta, pTask);
tqDebug("s-task:%s recv task check req(reqId:0x%" PRIx64
") %d at node %d task status:%d, check req from task %d at node %d, rsp status %d",
pTask->id.idStr, rsp.reqId, rsp.downstreamTaskId, rsp.downstreamNodeId, pTask->status.taskStatus,
rsp.upstreamTaskId, rsp.upstreamNodeId, rsp.status);
tqDebug("s-task:%s recv task check req(reqId:0x%" PRIx64 ") task:0x%x (vgId:%d), status:%s, rsp status %d",
pTask->id.idStr, rsp.reqId, rsp.upstreamTaskId, rsp.upstreamNodeId,
streamGetTaskStatusStr(pTask->status.taskStatus), rsp.status);
} else {
rsp.status = 0;
tqDebug("tq recv task check(taskId:0x%x not built yet) req(reqId:0x%" PRIx64
") %d at node %d, check req from task:0x%x at node %d, rsp status %d",
taskId, rsp.reqId, rsp.downstreamTaskId, rsp.downstreamNodeId, rsp.upstreamTaskId, rsp.upstreamNodeId,
rsp.status);
tqDebug("tq recv task check(taskId:0x%x not built yet) req(reqId:0x%" PRIx64 ") from task:0x%x (vgId:%d), rsp status %d",
taskId, rsp.reqId, rsp.upstreamTaskId, rsp.upstreamNodeId, rsp.status);
}
SEncoder encoder;
int32_t code;
int32_t len;
tEncodeSize(tEncodeSStreamTaskCheckRsp, &rsp, len, code);
tEncodeSize(tEncodeStreamTaskCheckRsp, &rsp, len, code);
if (code < 0) {
tqError("vgId:%d failed to encode task check rsp, task:0x%x", pTq->pStreamMeta->vgId, taskId);
return -1;
@ -905,7 +963,7 @@ int32_t tqProcessStreamTaskCheckReq(STQ* pTq, SRpcMsg* pMsg) {
void* abuf = POINTER_SHIFT(buf, sizeof(SMsgHead));
tEncoderInit(&encoder, (uint8_t*)abuf, len);
tEncodeSStreamTaskCheckRsp(&encoder, &rsp);
tEncodeStreamTaskCheckRsp(&encoder, &rsp);
tEncoderClear(&encoder);
SRpcMsg rspMsg = {.code = 0, .pCont = buf, .contLen = sizeof(SMsgHead) + len, .info = pMsg->info};
@ -914,13 +972,16 @@ int32_t tqProcessStreamTaskCheckReq(STQ* pTq, SRpcMsg* pMsg) {
return 0;
}
int32_t tqProcessStreamTaskCheckRsp(STQ* pTq, int64_t sversion, char* msg, int32_t msgLen) {
int32_t tqProcessStreamTaskCheckRsp(STQ* pTq, int64_t sversion, SRpcMsg* pMsg) {
char* pReq = POINTER_SHIFT(pMsg->pCont, sizeof(SMsgHead));
int32_t len = pMsg->contLen - sizeof(SMsgHead);
int32_t code;
SStreamTaskCheckRsp rsp;
SDecoder decoder;
tDecoderInit(&decoder, (uint8_t*)msg, msgLen);
code = tDecodeSStreamTaskCheckRsp(&decoder, &rsp);
tDecoderInit(&decoder, (uint8_t*)pReq, len);
code = tDecodeStreamTaskCheckRsp(&decoder, &rsp);
if (code < 0) {
tDecoderClear(&decoder);
@ -928,17 +989,18 @@ int32_t tqProcessStreamTaskCheckRsp(STQ* pTq, int64_t sversion, char* msg, int32
}
tDecoderClear(&decoder);
tqDebug("tq recv task check rsp(reqId:0x%" PRIx64 ") %d at node %d check req from task:0x%x at node %d, status %d",
rsp.reqId, rsp.downstreamTaskId, rsp.downstreamNodeId, rsp.upstreamTaskId, rsp.upstreamNodeId, rsp.status);
tqDebug("tq task:0x%x (vgId:%d) recv check rsp(reqId:0x%" PRIx64 ") from 0x%x (vgId:%d) status %d",
rsp.upstreamTaskId, rsp.upstreamNodeId, rsp.reqId, rsp.downstreamTaskId, rsp.downstreamNodeId, rsp.status);
SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, rsp.upstreamTaskId);
if (pTask == NULL) {
tqError("tq failed to locate the stream task:0x%x vgId:%d, it may have been destroyed", rsp.upstreamTaskId,
tqError("tq failed to locate the stream task:0x%x (vgId:%d), it may have been destroyed", rsp.upstreamTaskId,
pTq->pStreamMeta->vgId);
terrno = TSDB_CODE_STREAM_TASK_NOT_EXIST;
return -1;
}
code = streamProcessTaskCheckRsp(pTask, &rsp, sversion);
code = streamProcessCheckRsp(pTask, &rsp);
streamMetaReleaseTask(pTq->pStreamMeta, pTask);
return code;
}
@ -971,105 +1033,232 @@ int32_t tqProcessTaskDeployReq(STQ* pTq, int64_t sversion, char* msg, int32_t ms
tDecoderClear(&decoder);
SStreamMeta* pStreamMeta = pTq->pStreamMeta;
// 2.save task, use the newest commit version as the initial start version of stream task.
taosWLockLatch(&pTq->pStreamMeta->lock);
code = streamMetaAddDeployedTask(pTq->pStreamMeta, sversion, pTask);
int32_t numOfTasks = streamMetaGetNumOfTasks(pTq->pStreamMeta);
taosWLockLatch(&pStreamMeta->lock);
code = streamMetaAddDeployedTask(pStreamMeta, sversion, pTask);
int32_t numOfTasks = streamMetaGetNumOfTasks(pStreamMeta);
if (code < 0) {
tqError("vgId:%d failed to add s-task:%s, total:%d", vgId, pTask->id.idStr, numOfTasks);
taosWUnLockLatch(&pTq->pStreamMeta->lock);
taosWUnLockLatch(&pStreamMeta->lock);
return -1;
}
taosWUnLockLatch(&pTq->pStreamMeta->lock);
taosWUnLockLatch(&pStreamMeta->lock);
// 3.go through recover steps to fill history
if (pTask->fillHistory) {
streamTaskCheckDownstream(pTask, sversion);
}
// 3. It's an fill history task, do nothing. wait for the main task to start it
streamPrepareNdoCheckDownstream(pTask);
tqDebug("vgId:%d s-task:%s is deployed and add into meta, status:%s, numOfTasks:%d", vgId, pTask->id.idStr,
streamGetTaskStatusStr(pTask->status.taskStatus), numOfTasks);
tqDebug("vgId:%d s-task:%s is deployed and add meta from mnd, status:%d, total:%d", vgId, pTask->id.idStr,
pTask->status.taskStatus, numOfTasks);
return 0;
}
int32_t tqProcessTaskRecover1Req(STQ* pTq, SRpcMsg* pMsg) {
int32_t code;
int32_t tqProcessTaskScanHistory(STQ* pTq, SRpcMsg* pMsg) {
int32_t code = TSDB_CODE_SUCCESS;
char* msg = pMsg->pCont;
int32_t msgLen = pMsg->contLen;
SStreamRecoverStep1Req* pReq = (SStreamRecoverStep1Req*)msg;
SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, pReq->taskId);
SStreamMeta* pMeta = pTq->pStreamMeta;
SStreamScanHistoryReq* pReq = (SStreamScanHistoryReq*)msg;
SStreamTask* pTask = streamMetaAcquireTask(pMeta, pReq->taskId);
if (pTask == NULL) {
tqError("vgId:%d failed to acquire stream task:0x%x during stream recover, task may have been destroyed",
pMeta->vgId, pReq->taskId);
return -1;
}
// check param
int64_t fillVer1 = pTask->chkInfo.version;
if (fillVer1 <= 0) {
streamMetaReleaseTask(pTq->pStreamMeta, pTask);
streamMetaReleaseTask(pMeta, pTask);
return -1;
}
// do recovery step 1
tqDebug("s-task:%s start non-blocking recover stage(step 1) scan", pTask->id.idStr);
const char* pId = pTask->id.idStr;
tqDebug("s-task:%s start history data scan stage(step 1), status:%s", pId,
streamGetTaskStatusStr(pTask->status.taskStatus));
int64_t st = taosGetTimestampMs();
int8_t schedStatus = atomic_val_compare_exchange_8(&pTask->status.schedStatus, TASK_SCHED_STATUS__INACTIVE,
TASK_SCHED_STATUS__WAITING);
if (schedStatus != TASK_SCHED_STATUS__INACTIVE) {
ASSERT(0);
return 0;
}
streamSourceRecoverScanStep1(pTask);
if (atomic_load_8(&pTask->status.taskStatus) == TASK_STATUS__DROPPING) {
tqDebug("s-task:%s is dropped, abort recover in step1", pTask->id.idStr);
if (!pReq->igUntreated && !streamTaskRecoverScanStep1Finished(pTask)) {
streamSourceScanHistoryData(pTask);
}
streamMetaReleaseTask(pTq->pStreamMeta, pTask);
if (atomic_load_8(&pTask->status.taskStatus) == TASK_STATUS__DROPPING || streamTaskShouldPause(&pTask->status)) {
tqDebug("s-task:%s is dropped or paused, abort recover in step1", pId);
atomic_store_8(&pTask->status.schedStatus, TASK_SCHED_STATUS__INACTIVE);
streamMetaReleaseTask(pMeta, pTask);
return 0;
}
double el = (taosGetTimestampMs() - st) / 1000.0;
tqDebug("s-task:%s non-blocking recover stage(step 1) ended, elapsed time:%.2fs", pTask->id.idStr, el);
tqDebug("s-task:%s history data scan stage(step 1) ended, elapsed time:%.2fs", pId, el);
// build msg to launch next step
SStreamRecoverStep2Req req;
code = streamBuildSourceRecover2Req(pTask, &req);
if (code < 0) {
streamMetaReleaseTask(pTq->pStreamMeta, pTask);
return -1;
if (pTask->info.fillHistory) {
SVersionRange* pRange = NULL;
SStreamTask* pStreamTask = NULL;
if (!pReq->igUntreated && !streamTaskRecoverScanStep1Finished(pTask)) {
// 1. stop the related stream task, get the current scan wal version of stream task, ver.
pStreamTask = streamMetaAcquireTask(pMeta, pTask->streamTaskId.taskId);
if (pStreamTask == NULL) {
// todo handle error
}
ASSERT(pStreamTask->info.taskLevel == TASK_LEVEL__SOURCE);
// wait for the stream task get ready for scan history data
while (((pStreamTask->status.downstreamReady == 0) && (pStreamTask->status.taskStatus != TASK_STATUS__STOP)) ||
pStreamTask->status.taskStatus == TASK_STATUS__SCAN_HISTORY) {
tqDebug(
"s-task:%s level:%d related stream task:%s not ready for halt, wait for it continue and recheck in 100ms",
pTask->id.idStr, pTask->info.taskLevel, pStreamTask->id.idStr);
taosMsleep(100);
}
// now we can stop the stream task execution
pStreamTask->status.taskStatus = TASK_STATUS__HALT;
tqDebug("s-task:%s level:%d status is set to halt by history scan task:%s", pStreamTask->id.idStr,
pStreamTask->info.taskLevel, pId);
// if it's an source task, extract the last version in wal.
pRange = &pTask->dataRange.range;
int64_t latestVer = walReaderGetCurrentVer(pStreamTask->exec.pWalReader);
ASSERT(latestVer >= pRange->maxVer);
int64_t nextStartVer = pRange->maxVer + 1;
if (nextStartVer > latestVer - 1) {
// no input data yet. no need to execute the secondardy scan while stream task halt
streamTaskRecoverSetAllStepFinished(pTask);
tqDebug("s-task:%s no need to perform secondary scan-history-data(step 2), since no data ingest during secondary scan", pId);
} else {
// 2. do secondary scan of the history data, the time window remain, and the version range is updated to
// [pTask->dataRange.range.maxVer, ver1]
pRange->minVer = nextStartVer;
pRange->maxVer = latestVer - 1;
}
}
if (!streamTaskRecoverScanStep1Finished(pTask)) {
tqDebug("s-task:%s level:%d verRange:%" PRId64 " - %" PRId64
" do secondary scan-history-data after halt the related stream task:%s",
pId, pTask->info.taskLevel, pRange->minVer, pRange->maxVer, pStreamTask->id.idStr);
ASSERT(pTask->status.schedStatus == TASK_SCHED_STATUS__WAITING);
st = taosGetTimestampMs();
streamSetParamForStreamScannerStep2(pTask, pRange, &pTask->dataRange.window);
}
if (!streamTaskRecoverScanStep2Finished(pTask)) {
streamSourceScanHistoryData(pTask);
if (atomic_load_8(&pTask->status.taskStatus) == TASK_STATUS__DROPPING || streamTaskShouldPause(&pTask->status)) {
tqDebug("s-task:%s is dropped or paused, abort recover in step1", pId);
streamMetaReleaseTask(pMeta, pTask);
return 0;
}
streamTaskRecoverSetAllStepFinished(pTask);
}
el = (taosGetTimestampMs() - st) / 1000.0;
tqDebug("s-task:%s history data scan stage(step 2) ended, elapsed time:%.2fs", pId, el);
// 3. notify the downstream tasks to transfer executor state after handle all history blocks.
if (!pTask->status.transferState) {
code = streamDispatchTransferStateMsg(pTask);
if (code != TSDB_CODE_SUCCESS) {
// todo handle error
}
pTask->status.transferState = true;
}
// 4. 1) transfer the ownership of executor state, 2) update the scan data range for source task.
// 5. resume the related stream task.
streamTryExec(pTask);
pTask->status.taskStatus = TASK_STATUS__DROPPING;
tqDebug("s-task:%s scan-history-task set status to be dropping", pId);
streamMetaSaveTask(pMeta, pTask);
streamMetaSaveTask(pMeta, pStreamTask);
streamMetaReleaseTask(pMeta, pTask);
streamMetaReleaseTask(pMeta, pStreamTask);
taosWLockLatch(&pMeta->lock);
if (streamMetaCommit(pTask->pMeta) < 0) {
// persist to disk
}
taosWUnLockLatch(&pMeta->lock);
} else {
// todo update the chkInfo version for current task.
// this task has an associated history stream task, so we need to scan wal from the end version of
// history scan. The current version of chkInfo.current is not updated during the history scan
STimeWindow* pWindow = &pTask->dataRange.window;
if (pTask->historyTaskId.taskId == 0) {
*pWindow = (STimeWindow){INT64_MIN, INT64_MAX};
tqDebug("s-task:%s no associated task, reset the time window:%" PRId64 " - %" PRId64, pId, pWindow->skey,
pWindow->ekey);
} else {
tqDebug("s-task:%s history data scan completed, now start to scan data from wal, start ver:%" PRId64
", window:%" PRId64 " - %" PRId64,
pId, pTask->chkInfo.currentVer, pWindow->skey, pWindow->ekey);
}
code = streamTaskScanHistoryDataComplete(pTask);
streamMetaReleaseTask(pMeta, pTask);
// let's start the stream task by extracting data from wal
if (pTask->info.taskLevel == TASK_LEVEL__SOURCE) {
tqStartStreamTasks(pTq);
}
return code;
}
streamMetaReleaseTask(pTq->pStreamMeta, pTask);
if (atomic_load_8(&pTask->status.taskStatus) == TASK_STATUS__DROPPING) {
return 0;
}
// serialize msg
int32_t len = sizeof(SStreamRecoverStep1Req);
void* serializedReq = rpcMallocCont(len);
if (serializedReq == NULL) {
tqError("s-task:%s failed to prepare the step2 stage, out of memory", pTask->id.idStr);
return -1;
}
memcpy(serializedReq, &req, len);
// dispatch msg
tqDebug("s-task:%s start recover block stage", pTask->id.idStr);
SRpcMsg rpcMsg = {
.code = 0, .contLen = len, .msgType = TDMT_VND_STREAM_RECOVER_BLOCKING_STAGE, .pCont = serializedReq};
tmsgPutToQueue(&pTq->pVnode->msgCb, WRITE_QUEUE, &rpcMsg);
return 0;
}
int32_t tqProcessTaskRecover2Req(STQ* pTq, int64_t sversion, char* msg, int32_t msgLen) {
int32_t code = 0;
// notify the downstream tasks to transfer executor state after handle all history blocks.
int32_t tqProcessTaskTransferStateReq(STQ* pTq, int64_t sversion, char* msg, int32_t msgLen) {
SStreamTransferReq req;
SStreamRecoverStep2Req* pReq = (SStreamRecoverStep2Req*)msg;
SDecoder decoder;
tDecoderInit(&decoder, (uint8_t*)msg, msgLen);
int32_t code = tDecodeStreamRecoverFinishReq(&decoder, &req);
tDecoderClear(&decoder);
SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, pReq->taskId);
SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, req.taskId);
if (pTask == NULL) {
tqError("failed to find task:0x%x, it may have been dropped already", req.taskId);
return -1;
}
// do recovery step 2
// transfer the ownership of executor state
streamTaskReleaseState(pTask);
tqDebug("s-task:%s receive state transfer req", pTask->id.idStr);
SStreamTask* pStreamTask = streamMetaAcquireTask(pTq->pStreamMeta, pTask->streamTaskId.taskId);
streamTaskReloadState(pStreamTask);
ASSERT(pTask->streamTaskId.taskId != 0);
pTask->status.transferState = true; // persistent data?
#if 0
// do check if current task handle all data in the input queue
int64_t st = taosGetTimestampMs();
tqDebug("s-task:%s start step2 recover, ts:%" PRId64, pTask->id.idStr, st);
@ -1108,16 +1297,19 @@ int32_t tqProcessTaskRecover2Req(STQ* pTq, int64_t sversion, char* msg, int32_t
tqDebug("s-task:%s step2 recover finished, el:%.2fs", pTask->id.idStr, el);
// dispatch recover finish req to all related downstream task
code = streamDispatchRecoverFinishReq(pTask);
code = streamDispatchScanHistoryFinishMsg(pTask);
if (code < 0) {
streamMetaReleaseTask(pTq->pStreamMeta, pTask);
return -1;
}
atomic_store_8(&pTask->fillHistory, 0);
atomic_store_8(&pTask->info.fillHistory, 0);
streamMetaSaveTask(pTq->pStreamMeta, pTask);
#endif
streamSchedExec(pTask);
streamMetaReleaseTask(pTq->pStreamMeta, pTask);
return 0;
}
@ -1130,7 +1322,7 @@ int32_t tqProcessTaskRecoverFinishReq(STQ* pTq, SRpcMsg* pMsg) {
SDecoder decoder;
tDecoderInit(&decoder, (uint8_t*)msg, msgLen);
tDecodeSStreamRecoverFinishReq(&decoder, &req);
tDecodeStreamRecoverFinishReq(&decoder, &req);
tDecoderClear(&decoder);
// find task
@ -1139,7 +1331,7 @@ int32_t tqProcessTaskRecoverFinishReq(STQ* pTq, SRpcMsg* pMsg) {
return -1;
}
// do process request
if (streamProcessRecoverFinishReq(pTask, req.childId) < 0) {
if (streamProcessRecoverFinishReq(pTask, req.taskId, req.childId) < 0) {
streamMetaReleaseTask(pTq->pStreamMeta, pTask);
return -1;
}
@ -1212,22 +1404,31 @@ int32_t tqProcessTaskRunReq(STQ* pTq, SRpcMsg* pMsg) {
int32_t taskId = pReq->taskId;
int32_t vgId = TD_VID(pTq->pVnode);
if (taskId == WAL_READ_TASKS_ID) { // all tasks are extracted submit data from the wal
if (taskId == STREAM_TASK_STATUS_CHECK_ID) {
tqStreamTasksStatusCheck(pTq);
return 0;
}
if (taskId == EXTRACT_DATA_FROM_WAL_ID) { // all tasks are extracted submit data from the wal
tqStreamTasksScanWal(pTq);
return 0;
}
SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, taskId);
if (pTask != NULL) {
if (pTask->status.taskStatus == TASK_STATUS__NORMAL) {
tqDebug("vgId:%d s-task:%s start to process block from wal, last chk point:%" PRId64, vgId, pTask->id.idStr,
// even in halt status, the data in inputQ must be processed
int8_t status = pTask->status.taskStatus;
if (status == TASK_STATUS__NORMAL || status == TASK_STATUS__HALT) {
tqDebug("vgId:%d s-task:%s start to process block from inputQ, last chk point:%" PRId64, vgId, pTask->id.idStr,
pTask->chkInfo.version);
streamProcessRunReq(pTask);
} else {
if (streamTaskShouldPause(&pTask->status)) {
// if (streamTaskShouldPause(&pTask->status)) {
atomic_store_8(&pTask->status.schedStatus, TASK_SCHED_STATUS__INACTIVE);
}
tqDebug("vgId:%d s-task:%s ignore run req since not in ready state", vgId, pTask->id.idStr);
// }
tqDebug("vgId:%d s-task:%s ignore run req since not in ready state, status:%s, sched-status:%d", vgId,
pTask->id.idStr, streamGetTaskStatusStr(pTask->status.taskStatus), pTask->status.schedStatus);
}
streamMetaReleaseTask(pTq->pStreamMeta, pTask);
@ -1286,65 +1487,100 @@ int32_t tqProcessTaskDropReq(STQ* pTq, int64_t sversion, char* msg, int32_t msgL
return 0;
}
int32_t tqProcessTaskPauseImpl(SStreamMeta* pStreamMeta, SStreamTask* pTask) {
if (pTask) {
if (!streamTaskShouldPause(&pTask->status)) {
tqDebug("vgId:%d s-task:%s set pause flag", pStreamMeta->vgId, pTask->id.idStr);
atomic_store_8(&pTask->status.keepTaskStatus, pTask->status.taskStatus);
atomic_store_8(&pTask->status.taskStatus, TASK_STATUS__PAUSE);
}
streamMetaReleaseTask(pStreamMeta, pTask);
} else {
return -1;
}
return 0;
}
int32_t tqProcessTaskPauseReq(STQ* pTq, int64_t sversion, char* msg, int32_t msgLen) {
SVPauseStreamTaskReq* pReq = (SVPauseStreamTaskReq*)msg;
SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, pReq->taskId);
int32_t code = tqProcessTaskPauseImpl(pTq->pStreamMeta, pTask);
if (code != 0) {
return code;
}
SStreamTask* pHistoryTask = streamMetaAcquireTask(pTq->pStreamMeta, pTask->historyTaskId.taskId);
if (pHistoryTask) {
code = tqProcessTaskPauseImpl(pTq->pStreamMeta, pHistoryTask);
}
return code;
}
int32_t tqProcessTaskResumeImpl(STQ* pTq, SStreamTask* pTask, int64_t sversion, int8_t igUntreated) {
int32_t vgId = pTq->pStreamMeta->vgId;
if (pTask) {
tqDebug("vgId:%d s-task:%s set pause flag", pTq->pStreamMeta->vgId, pTask->id.idStr);
atomic_store_8(&pTask->status.keepTaskStatus, pTask->status.taskStatus);
atomic_store_8(&pTask->status.taskStatus, TASK_STATUS__PAUSE);
if (streamTaskShouldPause(&pTask->status)) {
atomic_store_8(&pTask->status.taskStatus, pTask->status.keepTaskStatus);
// no lock needs to secure the access of the version
if (igUntreated && pTask->info.taskLevel == TASK_LEVEL__SOURCE && !pTask->info.fillHistory) {
// discard all the data when the stream task is suspended.
walReaderSetSkipToVersion(pTask->exec.pWalReader, sversion);
tqDebug("vgId:%d s-task:%s resume to exec, prev paused version:%" PRId64 ", start from vnode ver:%" PRId64
", schedStatus:%d",
vgId, pTask->id.idStr, pTask->chkInfo.currentVer, sversion, pTask->status.schedStatus);
} else { // from the previous paused version and go on
tqDebug("vgId:%d s-task:%s resume to exec, from paused ver:%" PRId64 ", vnode ver:%" PRId64 ", schedStatus:%d",
vgId, pTask->id.idStr, pTask->chkInfo.currentVer, sversion, pTask->status.schedStatus);
}
if (pTask->info.fillHistory && pTask->info.taskLevel == TASK_LEVEL__SOURCE) {
streamStartRecoverTask(pTask, igUntreated);
} else if (pTask->info.taskLevel == TASK_LEVEL__SOURCE && taosQueueItemSize(pTask->inputQueue->queue) == 0) {
tqStartStreamTasks(pTq);
} else {
streamSchedExec(pTask);
}
}
streamMetaReleaseTask(pTq->pStreamMeta, pTask);
} else {
return -1;
}
return 0;
}
int32_t tqProcessTaskResumeReq(STQ* pTq, int64_t sversion, char* msg, int32_t msgLen) {
SVResumeStreamTaskReq* pReq = (SVResumeStreamTaskReq*)msg;
int32_t vgId = pTq->pStreamMeta->vgId;
SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, pReq->taskId);
if (pTask) {
atomic_store_8(&pTask->status.taskStatus, pTask->status.keepTaskStatus);
// no lock needs to secure the access of the version
if (pReq->igUntreated && pTask->taskLevel == TASK_LEVEL__SOURCE) {
// discard all the data when the stream task is suspended.
walReaderSetSkipToVersion(pTask->exec.pWalReader, sversion);
tqDebug("vgId:%d s-task:%s resume to exec, prev paused version:%" PRId64 ", start from vnode ver:%" PRId64
", schedStatus:%d",
vgId, pTask->id.idStr, pTask->chkInfo.currentVer, sversion, pTask->status.schedStatus);
} else { // from the previous paused version and go on
tqDebug("vgId:%d s-task:%s resume to exec, from paused ver:%" PRId64 ", vnode ver:%" PRId64 ", schedStatus:%d",
vgId, pTask->id.idStr, pTask->chkInfo.currentVer, sversion, pTask->status.schedStatus);
}
if (pTask->taskLevel == TASK_LEVEL__SOURCE && taosQueueItemSize(pTask->inputQueue->queue) == 0) {
tqStartStreamTasks(pTq);
} else {
streamSchedExec(pTask);
}
streamMetaReleaseTask(pTq->pStreamMeta, pTask);
} else {
tqError("vgId:%d failed to find the s-task:0x%x for resume stream task", vgId, pReq->taskId);
int32_t code = tqProcessTaskResumeImpl(pTq, pTask, sversion, pReq->igUntreated);
if (code != 0) {
return code;
}
return 0;
SStreamTask* pHistoryTask = streamMetaAcquireTask(pTq->pStreamMeta, pTask->historyTaskId.taskId);
if (pHistoryTask) {
code = tqProcessTaskResumeImpl(pTq, pHistoryTask, sversion, pReq->igUntreated);
}
return code;
}
int32_t tqProcessTaskRetrieveReq(STQ* pTq, SRpcMsg* pMsg) {
char* msgStr = pMsg->pCont;
char* msgBody = POINTER_SHIFT(msgStr, sizeof(SMsgHead));
int32_t msgLen = pMsg->contLen - sizeof(SMsgHead);
char* msgStr = pMsg->pCont;
char* msgBody = POINTER_SHIFT(msgStr, sizeof(SMsgHead));
int32_t msgLen = pMsg->contLen - sizeof(SMsgHead);
SDecoder decoder;
SStreamRetrieveReq req;
SDecoder decoder;
tDecoderInit(&decoder, (uint8_t*)msgBody, msgLen);
tDecodeStreamRetrieveReq(&decoder, &req);
tDecoderClear(&decoder);
int32_t taskId = req.dstTaskId;
SStreamTask* pTask = streamMetaAcquireTask(pTq->pStreamMeta, taskId);
if (pTask) {
SRpcMsg rsp = {.info = pMsg->info, .code = 0};
streamProcessRetrieveReq(pTask, &req, &rsp);
streamMetaReleaseTask(pTq->pStreamMeta, pTask);
tDeleteStreamRetrieveReq(&req);
return 0;
@ -1425,43 +1661,3 @@ FAIL:
int32_t tqCheckLogInWal(STQ* pTq, int64_t sversion) { return sversion <= pTq->walLogLastVer; }
int32_t tqStartStreamTasks(STQ* pTq) {
int32_t vgId = TD_VID(pTq->pVnode);
SStreamMeta* pMeta = pTq->pStreamMeta;
taosWLockLatch(&pMeta->lock);
int32_t numOfTasks = taosArrayGetSize(pMeta->pTaskList);
if (numOfTasks == 0) {
tqInfo("vgId:%d no stream tasks exist", vgId);
taosWUnLockLatch(&pMeta->lock);
return 0;
}
pMeta->walScanCounter += 1;
if (pMeta->walScanCounter > 1) {
tqDebug("vgId:%d wal read task has been launched, remain scan times:%d", vgId, pMeta->walScanCounter);
taosWUnLockLatch(&pMeta->lock);
return 0;
}
SStreamTaskRunReq* pRunReq = rpcMallocCont(sizeof(SStreamTaskRunReq));
if (pRunReq == NULL) {
terrno = TSDB_CODE_OUT_OF_MEMORY;
tqError("vgId:%d failed to create msg to start wal scanning to launch stream tasks, code:%s", vgId, terrstr());
taosWUnLockLatch(&pMeta->lock);
return -1;
}
tqDebug("vgId:%d create msg to start wal scan to launch stream tasks, numOfTasks:%d", vgId, numOfTasks);
pRunReq->head.vgId = vgId;
pRunReq->streamId = 0;
pRunReq->taskId = WAL_READ_TASKS_ID;
SRpcMsg msg = {.msgType = TDMT_STREAM_TASK_RUN, .pCont = pRunReq, .contLen = sizeof(SStreamTaskRunReq)};
tmsgPutToQueue(&pTq->pVnode->msgCb, STREAM_QUEUE, &msg);
taosWUnLockLatch(&pMeta->lock);
return 0;
}

View File

@ -114,7 +114,7 @@ bool isValValidForTable(STqHandle* pHandle, SWalCont* pHead) {
}
SMetaReader mr = {0};
metaReaderInit(&mr, pHandle->execHandle.pTqReader->pVnodeMeta, 0);
metaReaderDoInit(&mr, pHandle->execHandle.pTqReader->pVnodeMeta, 0);
if (metaGetTableEntryByName(&mr, req.tbName) < 0) {
metaReaderClear(&mr);
@ -216,9 +216,9 @@ int32_t tqFetchLog(STQ* pTq, STqHandle* pHandle, int64_t* fetchOffset, SWalCkHea
code = 0;
goto END;
} else {
if (pHandle->fetchMeta) {
if (pHandle->fetchMeta != WITH_DATA) {
SWalCont* pHead = &((*ppCkHead)->head);
if (IS_META_MSG(pHead->msgType)) {
if (IS_META_MSG(pHead->msgType) && !(pHead->msgType == TDMT_VND_DELETE && pHandle->fetchMeta == ONLY_META)) {
code = walFetchBody(pHandle->pWalReader, ppCkHead);
if (code < 0) {
*fetchOffset = offset;
@ -1109,7 +1109,7 @@ int32_t tqUpdateTbUidList(STQ* pTq, const SArray* tbUidList, bool isAdd) {
}
SStreamTask* pTask = *(SStreamTask**)pIter;
if (pTask->taskLevel == TASK_LEVEL__SOURCE) {
if (pTask->info.taskLevel == TASK_LEVEL__SOURCE) {
int32_t code = qUpdateTableListForStreamScanner(pTask->exec.pExecutor, tbUidList, isAdd);
if (code != 0) {
tqError("vgId:%d, s-task:%s update qualified table error for stream task", vgId, pTask->id.idStr);

View File

@ -16,6 +16,7 @@
#include "tq.h"
static int32_t createStreamTaskRunReq(SStreamMeta* pStreamMeta, bool* pScanIdle);
static int32_t doSetOffsetForWalReader(SStreamTask *pTask, int32_t vgId);
// this function should be executed by stream threads.
// extract submit block from WAL, and add them into the input queue for the sources tasks.
@ -57,7 +58,111 @@ int32_t tqStreamTasksScanWal(STQ* pTq) {
return 0;
}
static int32_t doSetOffsetForWalReader(SStreamTask *pTask, int32_t vgId) {
int32_t tqStreamTasksStatusCheck(STQ* pTq) {
int32_t vgId = TD_VID(pTq->pVnode);
SStreamMeta* pMeta = pTq->pStreamMeta;
int32_t numOfTasks = taosArrayGetSize(pMeta->pTaskList);
tqDebug("vgId:%d start to check all (%d) stream tasks downstream status", vgId, numOfTasks);
if (numOfTasks == 0) {
return TSDB_CODE_SUCCESS;
}
SArray* pTaskList = NULL;
taosWLockLatch(&pMeta->lock);
pTaskList = taosArrayDup(pMeta->pTaskList, NULL);
taosWUnLockLatch(&pMeta->lock);
for (int32_t i = 0; i < numOfTasks; ++i) {
int32_t* pTaskId = taosArrayGet(pTaskList, i);
SStreamTask* pTask = streamMetaAcquireTask(pMeta, *pTaskId);
if (pTask == NULL) {
continue;
}
streamTaskCheckDownstreamTasks(pTask);
streamMetaReleaseTask(pMeta, pTask);
}
taosArrayDestroy(pTaskList);
return 0;
}
int32_t tqCheckStreamStatus(STQ* pTq) {
int32_t vgId = TD_VID(pTq->pVnode);
SStreamMeta* pMeta = pTq->pStreamMeta;
taosWLockLatch(&pMeta->lock);
int32_t numOfTasks = taosArrayGetSize(pMeta->pTaskList);
if (numOfTasks == 0) {
tqInfo("vgId:%d no stream tasks exist", vgId);
taosWUnLockLatch(&pMeta->lock);
return 0;
}
SStreamTaskRunReq* pRunReq = rpcMallocCont(sizeof(SStreamTaskRunReq));
if (pRunReq == NULL) {
terrno = TSDB_CODE_OUT_OF_MEMORY;
tqError("vgId:%d failed to create msg to start wal scanning to launch stream tasks, code:%s", vgId, terrstr());
taosWUnLockLatch(&pMeta->lock);
return -1;
}
tqDebug("vgId:%d check for stream tasks status, numOfTasks:%d", vgId, numOfTasks);
pRunReq->head.vgId = vgId;
pRunReq->streamId = 0;
pRunReq->taskId = STREAM_TASK_STATUS_CHECK_ID;
SRpcMsg msg = {.msgType = TDMT_STREAM_TASK_RUN, .pCont = pRunReq, .contLen = sizeof(SStreamTaskRunReq)};
tmsgPutToQueue(&pTq->pVnode->msgCb, STREAM_QUEUE, &msg);
taosWUnLockLatch(&pMeta->lock);
return 0;
}
int32_t tqStartStreamTasks(STQ* pTq) {
int32_t vgId = TD_VID(pTq->pVnode);
SStreamMeta* pMeta = pTq->pStreamMeta;
taosWLockLatch(&pMeta->lock);
int32_t numOfTasks = taosArrayGetSize(pMeta->pTaskList);
if (numOfTasks == 0) {
tqInfo("vgId:%d no stream tasks exist", vgId);
taosWUnLockLatch(&pMeta->lock);
return 0;
}
pMeta->walScanCounter += 1;
if (pMeta->walScanCounter > 1) {
tqDebug("vgId:%d wal read task has been launched, remain scan times:%d", vgId, pMeta->walScanCounter);
taosWUnLockLatch(&pMeta->lock);
return 0;
}
SStreamTaskRunReq* pRunReq = rpcMallocCont(sizeof(SStreamTaskRunReq));
if (pRunReq == NULL) {
terrno = TSDB_CODE_OUT_OF_MEMORY;
tqError("vgId:%d failed to create msg to start wal scanning to launch stream tasks, code:%s", vgId, terrstr());
taosWUnLockLatch(&pMeta->lock);
return -1;
}
tqDebug("vgId:%d create msg to start wal scan to launch stream tasks, numOfTasks:%d", vgId, numOfTasks);
pRunReq->head.vgId = vgId;
pRunReq->streamId = 0;
pRunReq->taskId = EXTRACT_DATA_FROM_WAL_ID;
SRpcMsg msg = {.msgType = TDMT_STREAM_TASK_RUN, .pCont = pRunReq, .contLen = sizeof(SStreamTaskRunReq)};
tmsgPutToQueue(&pTq->pVnode->msgCb, STREAM_QUEUE, &msg);
taosWUnLockLatch(&pMeta->lock);
return 0;
}
int32_t doSetOffsetForWalReader(SStreamTask *pTask, int32_t vgId) {
// seek the stored version and extract data from WAL
int64_t firstVer = walReaderGetValidFirstVer(pTask->exec.pWalReader);
if (pTask->chkInfo.currentVer < firstVer) {
@ -102,7 +207,7 @@ static int32_t doSetOffsetForWalReader(SStreamTask *pTask, int32_t vgId) {
int32_t createStreamTaskRunReq(SStreamMeta* pStreamMeta, bool* pScanIdle) {
*pScanIdle = true;
bool noNewDataInWal = true;
bool noDataInWal = true;
int32_t vgId = pStreamMeta->vgId;
int32_t numOfTasks = taosArrayGetSize(pStreamMeta->pTaskList);
@ -129,15 +234,13 @@ int32_t createStreamTaskRunReq(SStreamMeta* pStreamMeta, bool* pScanIdle) {
}
int32_t status = pTask->status.taskStatus;
if (pTask->taskLevel != TASK_LEVEL__SOURCE) {
// tqTrace("s-task:%s level:%d not source task, no need to start", pTask->id.idStr, pTask->taskLevel);
if (pTask->info.taskLevel != TASK_LEVEL__SOURCE) {
streamMetaReleaseTask(pStreamMeta, pTask);
continue;
}
if (streamTaskShouldStop(&pTask->status) || status == TASK_STATUS__RECOVER_PREPARE ||
status == TASK_STATUS__WAIT_DOWNSTREAM || streamTaskShouldPause(&pTask->status)) {
tqDebug("s-task:%s not ready for new submit block from wal, status:%d", pTask->id.idStr, status);
if (status != TASK_STATUS__NORMAL) {
tqDebug("s-task:%s not ready for new submit block from wal, status:%s", pTask->id.idStr, streamGetTaskStatusStr(status));
streamMetaReleaseTask(pStreamMeta, pTask);
continue;
}
@ -157,39 +260,47 @@ int32_t createStreamTaskRunReq(SStreamMeta* pStreamMeta, bool* pScanIdle) {
continue;
}
int32_t numOfItemsInQ = taosQueueItemSize(pTask->inputQueue->queue);
// append the data for the stream
SStreamQueueItem* pItem = NULL;
code = extractMsgFromWal(pTask->exec.pWalReader, (void**) &pItem, pTask->id.idStr);
if (code != TSDB_CODE_SUCCESS) { // failed, continue
if ((code != TSDB_CODE_SUCCESS || pItem == NULL) && (numOfItemsInQ == 0)) { // failed, continue
streamMetaReleaseTask(pStreamMeta, pTask);
continue;
}
// delete ignore
if (pItem == NULL) {
streamMetaReleaseTask(pStreamMeta, pTask);
continue;
if (pItem != NULL) {
noDataInWal = false;
code = tAppendDataToInputQueue(pTask, pItem);
if (code == TSDB_CODE_SUCCESS) {
pTask->chkInfo.currentVer = walReaderGetCurrentVer(pTask->exec.pWalReader);
tqDebug("s-task:%s set the ver:%" PRId64 " from WALReader after extract block from WAL", pTask->id.idStr,
pTask->chkInfo.currentVer);
} else {
tqError("s-task:%s append input queue failed, too many in inputQ, ver:%" PRId64, pTask->id.idStr,
pTask->chkInfo.currentVer);
}
}
noNewDataInWal = false;
code = tqAddInputBlockNLaunchTask(pTask, pItem);
if (code == TSDB_CODE_SUCCESS) {
pTask->chkInfo.currentVer = walReaderGetCurrentVer(pTask->exec.pWalReader);
tqDebug("s-task:%s set the ver:%" PRId64 " from WALReader after extract block from WAL", pTask->id.idStr,
pTask->chkInfo.currentVer);
} else {
tqError("s-task:%s append input queue failed, ver:%" PRId64, pTask->id.idStr, pTask->chkInfo.currentVer);
if ((code == TSDB_CODE_SUCCESS) || (numOfItemsInQ > 0)) {
code = streamSchedExec(pTask);
if (code != TSDB_CODE_SUCCESS) {
streamMetaReleaseTask(pStreamMeta, pTask);
return -1;
}
}
streamMetaReleaseTask(pStreamMeta, pTask);
}
// all wal are checked, and no new data available in wal.
if (noNewDataInWal) {
if (noDataInWal) {
*pScanIdle = true;
}
taosArrayDestroy(pTaskList);
return 0;
}

View File

@ -48,7 +48,7 @@ static int32_t tqAddBlockSchemaToRsp(const STqExecHandle* pExec, STaosxRsp* pRsp
static int32_t tqAddTbNameToRsp(const STQ* pTq, int64_t uid, STaosxRsp* pRsp, int32_t n) {
SMetaReader mr = {0};
metaReaderInit(&mr, pTq->pVnode->pMeta, 0);
metaReaderDoInit(&mr, pTq->pVnode->pMeta, 0);
// TODO add reference to gurantee success
if (metaReaderGetTableEntryByUidCache(&mr, uid) < 0) {
@ -215,19 +215,15 @@ int32_t tqTaosxScanLog(STQ* pTq, STqHandle* pHandle, SPackedData submit, STaosxR
taosArrayClear(pSchemas);
SSubmitTbData* pSubmitTbDataRet = NULL;
if (tqRetrieveTaosxBlock(pReader, pBlocks, pSchemas, &pSubmitTbDataRet) < 0) {
if (terrno == TSDB_CODE_TQ_TABLE_SCHEMA_NOT_FOUND) continue;
if (terrno == TSDB_CODE_TQ_TABLE_SCHEMA_NOT_FOUND) goto loop_table;
}
if (pRsp->withTbName) {
int64_t uid = pExec->pTqReader->lastBlkUid;
if (tqAddTbNameToRsp(pTq, uid, pRsp, taosArrayGetSize(pBlocks)) < 0) {
taosArrayDestroyEx(pBlocks, (FDelete)blockDataFreeRes);
taosArrayDestroyP(pSchemas, (FDelete)tDeleteSchemaWrapper);
pBlocks = taosArrayInit(0, sizeof(SSDataBlock));
pSchemas = taosArrayInit(0, sizeof(void*));
continue;
goto loop_table;
}
}
if (pHandle->fetchMeta && pSubmitTbDataRet->pCreateTbReq != NULL) {
if (pHandle->fetchMeta != WITH_DATA && pSubmitTbDataRet->pCreateTbReq != NULL) {
if (pRsp->createTableNum == 0) {
pRsp->createTableLen = taosArrayInit(0, sizeof(int32_t));
pRsp->createTableReq = taosArrayInit(0, sizeof(void*));
@ -237,7 +233,7 @@ int32_t tqTaosxScanLog(STQ* pTq, STqHandle* pHandle, SPackedData submit, STaosxR
uint32_t len = 0;
tEncodeSize(tEncodeSVCreateTbReq, pSubmitTbDataRet->pCreateTbReq, len, code);
if (TSDB_CODE_SUCCESS != code) {
continue;
goto loop_table;
}
void* createReq = taosMemoryCalloc(1, len);
SEncoder encoder = {0};
@ -246,7 +242,7 @@ int32_t tqTaosxScanLog(STQ* pTq, STqHandle* pHandle, SPackedData submit, STaosxR
if (code < 0) {
tEncoderClear(&encoder);
taosMemoryFree(createReq);
continue;
goto loop_table;
}
taosArrayPush(pRsp->createTableLen, &len);
@ -255,6 +251,9 @@ int32_t tqTaosxScanLog(STQ* pTq, STqHandle* pHandle, SPackedData submit, STaosxR
tEncoderClear(&encoder);
}
if (pHandle->fetchMeta == ONLY_META && pSubmitTbDataRet->pCreateTbReq == NULL){
goto loop_table;
}
for (int32_t i = 0; i < taosArrayGetSize(pBlocks); i++) {
SSDataBlock* pBlock = taosArrayGet(pBlocks, i);
tqAddBlockDataToRsp(pBlock, (SMqDataRsp*)pRsp, taosArrayGetSize(pBlock->pDataBlock),
@ -265,6 +264,12 @@ int32_t tqTaosxScanLog(STQ* pTq, STqHandle* pHandle, SPackedData submit, STaosxR
taosArrayPush(pRsp->blockSchema, &pSW);
pRsp->blockNum++;
}
continue;
loop_table:
taosArrayDestroyEx(pBlocks, (FDelete)blockDataFreeRes);
taosArrayDestroyP(pSchemas, (FDelete)tDeleteSchemaWrapper);
pBlocks = taosArrayInit(0, sizeof(SSDataBlock));
pSchemas = taosArrayInit(0, sizeof(void*));
}
} else if (pExec->subType == TOPIC_SUB_TYPE__DB) {
STqReader* pReader = pExec->pTqReader;
@ -274,19 +279,15 @@ int32_t tqTaosxScanLog(STQ* pTq, STqHandle* pHandle, SPackedData submit, STaosxR
taosArrayClear(pSchemas);
SSubmitTbData* pSubmitTbDataRet = NULL;
if (tqRetrieveTaosxBlock(pReader, pBlocks, pSchemas, &pSubmitTbDataRet) < 0) {
if (terrno == TSDB_CODE_TQ_TABLE_SCHEMA_NOT_FOUND) continue;
if (terrno == TSDB_CODE_TQ_TABLE_SCHEMA_NOT_FOUND) goto loop_db;
}
if (pRsp->withTbName) {
int64_t uid = pExec->pTqReader->lastBlkUid;
if (tqAddTbNameToRsp(pTq, uid, pRsp, taosArrayGetSize(pBlocks)) < 0) {
taosArrayDestroyEx(pBlocks, (FDelete)blockDataFreeRes);
taosArrayDestroyP(pSchemas, (FDelete)tDeleteSchemaWrapper);
pBlocks = taosArrayInit(0, sizeof(SSDataBlock));
pSchemas = taosArrayInit(0, sizeof(void*));
continue;
goto loop_db;
}
}
if (pHandle->fetchMeta && pSubmitTbDataRet->pCreateTbReq != NULL) {
if (pHandle->fetchMeta != WITH_DATA && pSubmitTbDataRet->pCreateTbReq != NULL) {
if (pRsp->createTableNum == 0) {
pRsp->createTableLen = taosArrayInit(0, sizeof(int32_t));
pRsp->createTableReq = taosArrayInit(0, sizeof(void*));
@ -296,7 +297,7 @@ int32_t tqTaosxScanLog(STQ* pTq, STqHandle* pHandle, SPackedData submit, STaosxR
uint32_t len = 0;
tEncodeSize(tEncodeSVCreateTbReq, pSubmitTbDataRet->pCreateTbReq, len, code);
if (TSDB_CODE_SUCCESS != code) {
continue;
goto loop_db;
}
void* createReq = taosMemoryCalloc(1, len);
SEncoder encoder = {0};
@ -305,7 +306,7 @@ int32_t tqTaosxScanLog(STQ* pTq, STqHandle* pHandle, SPackedData submit, STaosxR
if (code < 0) {
tEncoderClear(&encoder);
taosMemoryFree(createReq);
continue;
goto loop_db;
}
taosArrayPush(pRsp->createTableLen, &len);
@ -314,6 +315,9 @@ int32_t tqTaosxScanLog(STQ* pTq, STqHandle* pHandle, SPackedData submit, STaosxR
tEncoderClear(&encoder);
}
if (pHandle->fetchMeta == ONLY_META && pSubmitTbDataRet->pCreateTbReq == NULL){
goto loop_db;
}
for (int32_t i = 0; i < taosArrayGetSize(pBlocks); i++) {
SSDataBlock* pBlock = taosArrayGet(pBlocks, i);
tqAddBlockDataToRsp(pBlock, (SMqDataRsp*)pRsp, taosArrayGetSize(pBlock->pDataBlock),
@ -324,6 +328,12 @@ int32_t tqTaosxScanLog(STQ* pTq, STqHandle* pHandle, SPackedData submit, STaosxR
taosArrayPush(pRsp->blockSchema, &pSW);
pRsp->blockNum++;
}
continue;
loop_db:
taosArrayDestroyEx(pBlocks, (FDelete)blockDataFreeRes);
taosArrayDestroyP(pSchemas, (FDelete)tDeleteSchemaWrapper);
pBlocks = taosArrayInit(0, sizeof(SSDataBlock));
pSchemas = taosArrayInit(0, sizeof(void*));
}
}
taosArrayDestroy(pBlocks);

View File

@ -309,7 +309,7 @@ void tqSinkToTablePipeline(SStreamTask* pTask, void* vnode, int64_t ver, void* d
tbData.uid = pTableSinkInfo->uid;
} else {
SMetaReader mr = {0};
metaReaderInit(&mr, pVnode->pMeta, 0);
metaReaderDoInit(&mr, pVnode->pMeta, 0);
if (metaGetTableEntryByName(&mr, ctbName) < 0) {
metaReaderClear(&mr);
taosMemoryFree(pTableSinkInfo);
@ -412,7 +412,7 @@ void tqSinkToTablePipeline(SStreamTask* pTask, void* vnode, int64_t ver, void* d
if (k == 0) {
SColumnInfoData* pColData = taosArrayGet(pDataBlock->pDataBlock, dataIndex);
void* colData = colDataGetData(pColData, j);
tqDebug("tq sink pipe2, row %d, col %d ts %" PRId64, j, k, *(int64_t*)colData);
tqTrace("tq sink pipe2, row %d, col %d ts %" PRId64, j, k, *(int64_t*)colData);
}
if (IS_SET_NULL(pCol)) {
SColVal cv = COL_VAL_NULL(pCol->colId, pCol->type);

View File

@ -20,12 +20,6 @@
static int32_t tqSendMetaPollRsp(STqHandle* pHandle, const SRpcMsg* pMsg, const SMqPollReq* pReq,
const SMqMetaRsp* pRsp, int32_t vgId);
char* createStreamTaskIdStr(int64_t streamId, int32_t taskId) {
char buf[128] = {0};
sprintf(buf, "0x%" PRIx64 "-0x%x", streamId, taskId);
return taosStrdup(buf);
}
int32_t tqAddInputBlockNLaunchTask(SStreamTask* pTask, SStreamQueueItem* pQueueItem) {
int32_t code = tAppendDataToInputQueue(pTask, pQueueItem);
if (code < 0) {
@ -123,28 +117,17 @@ static int32_t extractResetOffsetVal(STqOffsetVal* pOffsetVal, STQ* pTq, STqHand
}
} else if (reqOffset.type == TMQ_OFFSET__RESET_LATEST) {
walRefLastVer(pTq->pVnode->pWal, pHandle->pRef);
if (pHandle->execHandle.subType == TOPIC_SUB_TYPE__COLUMN) {
SMqDataRsp dataRsp = {0};
tqInitDataRsp(&dataRsp, pRequest);
SMqDataRsp dataRsp = {0};
tqInitDataRsp(&dataRsp, pRequest);
tqOffsetResetToLog(&dataRsp.rspOffset, pHandle->pRef->refVer);
tqDebug("tmq poll: consumer:0x%" PRIx64 ", subkey %s, vgId:%d, (latest) offset reset to %" PRId64, consumerId,
pHandle->subKey, vgId, dataRsp.rspOffset.version);
int32_t code = tqSendDataRsp(pHandle, pMsg, pRequest, &dataRsp, TMQ_MSG_TYPE__POLL_RSP, vgId);
tDeleteMqDataRsp(&dataRsp);
tqOffsetResetToLog(&dataRsp.rspOffset, pHandle->pRef->refVer);
tqDebug("tmq poll: consumer:0x%" PRIx64 ", subkey %s, vgId:%d, (latest) offset reset to %" PRId64, consumerId,
pHandle->subKey, vgId, dataRsp.rspOffset.version);
int32_t code = tqSendDataRsp(pHandle, pMsg, pRequest, &dataRsp, TMQ_MSG_TYPE__POLL_DATA_RSP, vgId);
tDeleteMqDataRsp(&dataRsp);
*pBlockReturned = true;
return code;
} else {
STaosxRsp taosxRsp = {0};
tqInitTaosxRsp(&taosxRsp, pRequest);
tqOffsetResetToLog(&taosxRsp.rspOffset, pHandle->pRef->refVer);
int32_t code = tqSendDataRsp(pHandle, pMsg, pRequest, (SMqDataRsp*)&taosxRsp, TMQ_MSG_TYPE__TAOSX_RSP, vgId);
tDeleteSTaosxRsp(&taosxRsp);
*pBlockReturned = true;
return code;
}
*pBlockReturned = true;
return code;
} else if (reqOffset.type == TMQ_OFFSET__RESET_NONE) {
tqError("tmq poll: subkey:%s, no offset committed for consumer:0x%" PRIx64
" in vg %d, subkey %s, reset none failed",
@ -187,7 +170,7 @@ static int32_t extractDataAndRspForNormalSubscribe(STQ* pTq, STqHandle* pHandle,
}
}
code = tqSendDataRsp(pHandle, pMsg, pRequest, (SMqDataRsp*)&dataRsp, TMQ_MSG_TYPE__POLL_RSP, vgId);
code = tqSendDataRsp(pHandle, pMsg, pRequest, (SMqDataRsp*)&dataRsp, TMQ_MSG_TYPE__POLL_DATA_RSP, vgId);
end : {
char buf[TSDB_OFFSET_LEN] = {0};
@ -230,7 +213,7 @@ static int32_t extractDataAndRspForDbStbSubscribe(STQ* pTq, STqHandle* pHandle,
pRequest->consumerId, pHandle->subKey, vgId, taosxRsp.blockNum, taosxRsp.rspOffset.type,
taosxRsp.rspOffset.uid, taosxRsp.rspOffset.ts);
if (taosxRsp.blockNum > 0) {
code = tqSendDataRsp(pHandle, pMsg, pRequest, (SMqDataRsp*)&taosxRsp, TMQ_MSG_TYPE__TAOSX_RSP, vgId);
code = tqSendDataRsp(pHandle, pMsg, pRequest, (SMqDataRsp*)&taosxRsp, TMQ_MSG_TYPE__POLL_DATA_RSP, vgId);
goto end;
} else {
*offset = taosxRsp.rspOffset;
@ -260,7 +243,7 @@ static int32_t extractDataAndRspForDbStbSubscribe(STQ* pTq, STqHandle* pHandle,
if (tqFetchLog(pTq, pHandle, &fetchVer, &pCkHead, pRequest->reqId) < 0) {
tqOffsetResetToLog(&taosxRsp.rspOffset, fetchVer);
code = tqSendDataRsp(pHandle, pMsg, pRequest, (SMqDataRsp*)&taosxRsp, TMQ_MSG_TYPE__TAOSX_RSP, vgId);
code = tqSendDataRsp(pHandle, pMsg, pRequest, (SMqDataRsp*)&taosxRsp, TMQ_MSG_TYPE__POLL_DATA_RSP, vgId);
goto end;
}
@ -272,7 +255,7 @@ static int32_t extractDataAndRspForDbStbSubscribe(STQ* pTq, STqHandle* pHandle,
if (pHead->msgType != TDMT_VND_SUBMIT) {
if (totalRows > 0) {
tqOffsetResetToLog(&taosxRsp.rspOffset, fetchVer - 1);
code = tqSendDataRsp(pHandle, pMsg, pRequest, (SMqDataRsp*)&taosxRsp, TMQ_MSG_TYPE__TAOSX_RSP, vgId);
code = tqSendDataRsp(pHandle, pMsg, pRequest, (SMqDataRsp*)&taosxRsp, TMQ_MSG_TYPE__POLL_DATA_RSP, vgId);
goto end;
}
@ -301,7 +284,7 @@ static int32_t extractDataAndRspForDbStbSubscribe(STQ* pTq, STqHandle* pHandle,
if (totalRows >= 4096 || taosxRsp.createTableNum > 0) {
tqOffsetResetToLog(&taosxRsp.rspOffset, fetchVer);
code = tqSendDataRsp(pHandle, pMsg, pRequest, (SMqDataRsp*)&taosxRsp, TMQ_MSG_TYPE__TAOSX_RSP, vgId);
code = tqSendDataRsp(pHandle, pMsg, pRequest, (SMqDataRsp*)&taosxRsp, taosxRsp.createTableNum > 0 ? TMQ_MSG_TYPE__POLL_DATA_META_RSP : TMQ_MSG_TYPE__POLL_DATA_RSP, vgId);
goto end;
} else {
fetchVer++;
@ -396,9 +379,9 @@ int32_t tqDoSendDataRsp(const SRpcHandleInfo* pRpcHandleInfo, const SMqDataRsp*
int32_t len = 0;
int32_t code = 0;
if (type == TMQ_MSG_TYPE__POLL_RSP || type == TMQ_MSG_TYPE__WALINFO_RSP) {
if (type == TMQ_MSG_TYPE__POLL_DATA_RSP || type == TMQ_MSG_TYPE__WALINFO_RSP) {
tEncodeSize(tEncodeMqDataRsp, pRsp, len, code);
} else if (type == TMQ_MSG_TYPE__TAOSX_RSP) {
} else if (type == TMQ_MSG_TYPE__POLL_DATA_META_RSP) {
tEncodeSize(tEncodeSTaosxRsp, (STaosxRsp*)pRsp, len, code);
}
@ -420,9 +403,9 @@ int32_t tqDoSendDataRsp(const SRpcHandleInfo* pRpcHandleInfo, const SMqDataRsp*
SEncoder encoder = {0};
tEncoderInit(&encoder, abuf, len);
if (type == TMQ_MSG_TYPE__POLL_RSP || type == TMQ_MSG_TYPE__WALINFO_RSP) {
if (type == TMQ_MSG_TYPE__POLL_DATA_RSP || type == TMQ_MSG_TYPE__WALINFO_RSP) {
tEncodeMqDataRsp(&encoder, pRsp);
} else if (type == TMQ_MSG_TYPE__TAOSX_RSP) {
} else if (type == TMQ_MSG_TYPE__POLL_DATA_META_RSP) {
tEncodeSTaosxRsp(&encoder, (STaosxRsp*)pRsp);
}

View File

@ -1610,7 +1610,7 @@ static tb_uid_t getTableSuidByUid(tb_uid_t uid, STsdb *pTsdb) {
tb_uid_t suid = 0;
SMetaReader mr = {0};
metaReaderInit(&mr, pTsdb->pVnode->pMeta, 0);
metaReaderDoInit(&mr, pTsdb->pVnode->pMeta, 0);
if (metaReaderGetTableEntryByUidCache(&mr, uid) < 0) {
metaReaderClear(&mr); // table not esist
return 0;

View File

@ -562,7 +562,8 @@ static int32_t tsdbCommitFileDataStart(SCommitter *pCommitter) {
code = terrno;
TSDB_CHECK_CODE(code, lino, _exit);
}
tfsMkdirRecurAt(pTsdb->pVnode->pTfs, pTsdb->path, did);
code = tfsMkdirRecurAt(pTsdb->pVnode->pTfs, pTsdb->path, did);
TSDB_CHECK_CODE(code, lino, _exit);
wSet.diskId = did;
wSet.nSttF = 1;
}

View File

@ -248,7 +248,7 @@ static int32_t initDelSkylineIterator(STableBlockScanInfo* pBlockScanInfo, STsdb
STbData* piMemTbData);
static STsdb* getTsdbByRetentions(SVnode* pVnode, TSKEY winSKey, SRetention* retentions, const char* idstr,
int8_t* pLevel);
static SVersionRange getQueryVerRange(SVnode* pVnode, SQueryTableDataCond* pCond, int8_t level);
static SVersionRange getQueryVerRange(SVnode* pVnode, SQueryTableDataCond* pCond, const char* id);
static bool hasDataInLastBlock(SLastBlockReader* pLastBlockReader);
static int32_t doBuildDataBlock(STsdbReader* pReader);
static TSDBKEY getCurrentKeyInBuf(STableBlockScanInfo* pScanInfo, STsdbReader* pReader);
@ -775,7 +775,7 @@ static int32_t tsdbReaderCreate(SVnode* pVnode, SQueryTableDataCond* pCond, void
pReader->order = pCond->order;
pReader->idStr = (idstr != NULL) ? taosStrdup(idstr) : NULL;
pReader->verRange = getQueryVerRange(pVnode, pCond, level);
pReader->verRange = getQueryVerRange(pVnode, pCond, idstr);
pReader->type = pCond->type;
pReader->window = updateQueryTimeWindow(pReader->pTsdb, &pCond->twindows);
pReader->blockInfoBuf.numPerBucket = 1000; // 1000 tables per bucket
@ -3721,7 +3721,7 @@ static STsdb* getTsdbByRetentions(SVnode* pVnode, TSKEY winSKey, SRetention* ret
return VND_TSDB(pVnode);
}
SVersionRange getQueryVerRange(SVnode* pVnode, SQueryTableDataCond* pCond, int8_t level) {
SVersionRange getQueryVerRange(SVnode* pVnode, SQueryTableDataCond* pCond, const char* id) {
int64_t startVer = (pCond->startVersion == -1) ? 0 : pCond->startVersion;
int64_t endVer = 0;
@ -3732,6 +3732,9 @@ SVersionRange getQueryVerRange(SVnode* pVnode, SQueryTableDataCond* pCond, int8_
endVer = (pCond->endVersion > pVnode->state.applied) ? pVnode->state.applied : pCond->endVersion;
}
tsdbDebug("queried verRange:%"PRId64"-%"PRId64", revised query verRange:%"PRId64"-%"PRId64", %s", pCond->startVersion,
pCond->endVersion, startVer, endVer, id);
return (SVersionRange){.minVer = startVer, .maxVer = endVer};
}
@ -5452,7 +5455,7 @@ int64_t tsdbGetNumOfRowsInMemTable(STsdbReader* pReader) {
int32_t tsdbGetTableSchema(void* pVnode, int64_t uid, STSchema** pSchema, int64_t* suid) {
SMetaReader mr = {0};
metaReaderInit(&mr, ((SVnode*)pVnode)->pMeta, 0);
metaReaderDoInit(&mr, ((SVnode*)pVnode)->pMeta, 0);
int32_t code = metaReaderGetTableEntryByUidCache(&mr, uid);
if (code != TSDB_CODE_SUCCESS) {
terrno = TSDB_CODE_TDB_INVALID_TABLE_ID;
@ -5584,57 +5587,3 @@ void tsdbReaderSetId(STsdbReader* pReader, const char* idstr) {
void tsdbReaderSetCloseFlag(STsdbReader* pReader) { pReader->code = TSDB_CODE_TSC_QUERY_CANCELLED; }
/*-------------todo:refactor the implementation of those APIs in this file to seperate the API into two files------*/
// opt perf, do NOT create so many readers
int64_t tsdbGetLastTimestamp(SVnode* pVnode, void* pTableList, int32_t numOfTables, const char* pIdStr) {
SQueryTableDataCond cond = {.type = TIMEWINDOW_RANGE_CONTAINED, .numOfCols = 1, .order = TSDB_ORDER_DESC,
.startVersion = -1, .endVersion = -1};
cond.twindows.skey = INT64_MIN;
cond.twindows.ekey = INT64_MAX;
cond.colList = taosMemoryCalloc(1, sizeof(SColumnInfo));
cond.pSlotList = taosMemoryMalloc(sizeof(int32_t) * cond.numOfCols);
if (cond.colList == NULL || cond.pSlotList == NULL) {
// todo
}
cond.colList[0].colId = 1;
cond.colList[0].slotId = 0;
cond.colList[0].type = TSDB_DATA_TYPE_TIMESTAMP;
cond.pSlotList[0] = 0;
STableKeyInfo* pTableKeyInfo = pTableList;
STsdbReader* pReader = NULL;
SSDataBlock* pBlock = createDataBlock();
SColumnInfoData data = {0};
data.info = (SColumnInfo) {.type = TSDB_DATA_TYPE_TIMESTAMP, .colId = 1, .bytes = TSDB_KEYSIZE};
blockDataAppendColInfo(pBlock, &data);
int64_t key = INT64_MIN;
for(int32_t i = 0; i < numOfTables; ++i) {
int32_t code = tsdbReaderOpen(pVnode, &cond, &pTableKeyInfo[i], 1, pBlock, (void**)&pReader, pIdStr, false, NULL);
if (code != TSDB_CODE_SUCCESS) {
return code;
}
bool hasData = false;
code = tsdbNextDataBlock(pReader, &hasData);
if (!hasData || code != TSDB_CODE_SUCCESS) {
continue;
}
SColumnInfoData* pCol = taosArrayGet(pBlock->pDataBlock, 0);
int64_t k = *(int64_t*)pCol->pData;
if (key < k) {
key = k;
}
tsdbReaderClose(pReader);
}
return 0;
}

View File

@ -203,6 +203,7 @@ void initStateStoreAPI(SStateStore* pStore) {
pStore->streamStateCommit = streamStateCommit;
pStore->streamStateDestroy = streamStateDestroy;
pStore->streamStateDeleteCheckPoint = streamStateDeleteCheckPoint;
pStore->streamStateReloadInfo = streamStateReloadInfo;
}
void initMetaReaderAPI(SStoreMetaReader* pMetaReader) {

View File

@ -144,6 +144,7 @@ int32_t vnodeRenameVgroupId(const char *srcPath, const char *dstPath, int32_t sr
char tsdbFilePrefix[TSDB_FILENAME_LEN] = {0};
snprintf(tsdbPath, TSDB_FILENAME_LEN, "%s%stsdb", srcPath, TD_DIRSEP);
snprintf(tsdbFilePrefix, TSDB_FILENAME_LEN, "tsdb%sv", TD_DIRSEP);
int32_t prefixLen = strlen(tsdbFilePrefix);
STfsDir *tsdbDir = tfsOpendir(pTfs, tsdbPath);
if (tsdbDir == NULL) return 0;
@ -157,11 +158,11 @@ int32_t vnodeRenameVgroupId(const char *srcPath, const char *dstPath, int32_t sr
char *tsdbFilePrefixPos = strstr(oldRname, tsdbFilePrefix);
if (tsdbFilePrefixPos == NULL) continue;
int32_t tsdbFileVgId = atoi(tsdbFilePrefixPos + 6);
int32_t tsdbFileVgId = atoi(tsdbFilePrefixPos + prefixLen);
if (tsdbFileVgId == srcVgId) {
char *tsdbFileSurfixPos = tsdbFilePrefixPos + 6 + vnodeVgroupIdLen(srcVgId);
char *tsdbFileSurfixPos = tsdbFilePrefixPos + prefixLen + vnodeVgroupIdLen(srcVgId);
tsdbFilePrefixPos[6] = 0;
tsdbFilePrefixPos[prefixLen] = 0;
snprintf(newRname, TSDB_FILENAME_LEN, "%s%d%s", oldRname, dstVgId, tsdbFileSurfixPos);
vInfo("vgId:%d, rename file from %s to %s", dstVgId, tsdbFile->rname, newRname);

View File

@ -62,7 +62,7 @@ int vnodeGetTableMeta(SVnode *pVnode, SRpcMsg *pMsg, bool direct) {
}
// query meta
metaReaderInit(&mer1, pVnode->pMeta, 0);
metaReaderDoInit(&mer1, pVnode->pMeta, 0);
if (metaGetTableEntryByName(&mer1, infoReq.tbName) < 0) {
code = terrno;
@ -79,7 +79,7 @@ int vnodeGetTableMeta(SVnode *pVnode, SRpcMsg *pMsg, bool direct) {
schemaTag = mer1.me.stbEntry.schemaTag;
metaRsp.suid = mer1.me.uid;
} else if (mer1.me.type == TSDB_CHILD_TABLE) {
metaReaderInit(&mer2, pVnode->pMeta, META_READER_NOLOCK);
metaReaderDoInit(&mer2, pVnode->pMeta, META_READER_NOLOCK);
if (metaReaderGetTableEntryByUid(&mer2, mer1.me.ctbEntry.suid) < 0) goto _exit;
strcpy(metaRsp.stbName, mer2.me.name);
@ -175,7 +175,7 @@ int vnodeGetTableCfg(SVnode *pVnode, SRpcMsg *pMsg, bool direct) {
}
// query meta
metaReaderInit(&mer1, pVnode->pMeta, 0);
metaReaderDoInit(&mer1, pVnode->pMeta, 0);
if (metaGetTableEntryByName(&mer1, cfgReq.tbName) < 0) {
code = terrno;
@ -188,7 +188,7 @@ int vnodeGetTableCfg(SVnode *pVnode, SRpcMsg *pMsg, bool direct) {
code = TSDB_CODE_VND_HASH_MISMATCH;
goto _exit;
} else if (mer1.me.type == TSDB_CHILD_TABLE) {
metaReaderInit(&mer2, pVnode->pMeta, 0);
metaReaderDoInit(&mer2, pVnode->pMeta, 0);
if (metaReaderGetTableEntryByUid(&mer2, mer1.me.ctbEntry.suid) < 0) goto _exit;
strcpy(cfgRsp.stbName, mer2.me.name);

View File

@ -402,10 +402,6 @@ int32_t vnodeProcessWriteMsg(SVnode *pVnode, SRpcMsg *pMsg, int64_t ver, SRpcMsg
if (!syncUtilUserCommit(pMsg->msgType)) goto _exit;
if (pMsg->msgType == TDMT_VND_STREAM_RECOVER_BLOCKING_STAGE || pMsg->msgType == TDMT_STREAM_TASK_CHECK_RSP) {
if (tqCheckLogInWal(pVnode->pTq, ver)) return 0;
}
// skip header
pReq = POINTER_SHIFT(pMsg->pCont, sizeof(SMsgHead));
len = pMsg->contLen - sizeof(SMsgHead);
@ -501,16 +497,6 @@ int32_t vnodeProcessWriteMsg(SVnode *pVnode, SRpcMsg *pMsg, int64_t ver, SRpcMsg
goto _err;
}
} break;
case TDMT_VND_STREAM_RECOVER_BLOCKING_STAGE: {
if (tqProcessTaskRecover2Req(pVnode->pTq, ver, pMsg->pCont, pMsg->contLen) < 0) {
goto _err;
}
} break;
case TDMT_STREAM_TASK_CHECK_RSP: {
if (tqProcessStreamTaskCheckRsp(pVnode->pTq, ver, pReq, len) < 0) {
goto _err;
}
} break;
case TDMT_VND_ALTER_CONFIRM:
needCommit = pVnode->config.hashChange;
if (vnodeProcessAlterConfirmReq(pVnode, ver, pReq, len, pRsp) < 0) {
@ -643,26 +629,49 @@ int32_t vnodeProcessFetchMsg(SVnode *pVnode, SRpcMsg *pMsg, SQueueInfo *pInfo) {
// return tqProcessPollReq(pVnode->pTq, pMsg);
case TDMT_VND_TMQ_VG_WALINFO:
return tqProcessVgWalInfoReq(pVnode->pTq, pMsg);
default:
vError("unknown msg type:%d in fetch queue", pMsg->msgType);
return TSDB_CODE_APP_ERROR;
}
}
int32_t vnodeProcessStreamMsg(SVnode *pVnode, SRpcMsg *pMsg, SQueueInfo *pInfo) {
vTrace("vgId:%d, msg:%p in fetch queue is processing", pVnode->config.vgId, pMsg);
if ((pMsg->msgType == TDMT_SCH_FETCH || pMsg->msgType == TDMT_VND_TABLE_META || pMsg->msgType == TDMT_VND_TABLE_CFG ||
pMsg->msgType == TDMT_VND_BATCH_META) &&
!syncIsReadyForRead(pVnode->sync)) {
vnodeRedirectRpcMsg(pVnode, pMsg, terrno);
return 0;
}
switch (pMsg->msgType) {
case TDMT_STREAM_TASK_RUN:
return tqProcessTaskRunReq(pVnode->pTq, pMsg);
case TDMT_STREAM_TASK_DISPATCH:
return tqProcessTaskDispatchReq(pVnode->pTq, pMsg, true);
case TDMT_STREAM_TASK_CHECK:
return tqProcessStreamTaskCheckReq(pVnode->pTq, pMsg);
case TDMT_STREAM_TASK_DISPATCH_RSP:
return tqProcessTaskDispatchRsp(pVnode->pTq, pMsg);
case TDMT_STREAM_TASK_CHECK:
return tqProcessStreamTaskCheckReq(pVnode->pTq, pMsg);
case TDMT_STREAM_TASK_CHECK_RSP:
return tqProcessStreamTaskCheckRsp(pVnode->pTq, 0, pMsg);
case TDMT_STREAM_RETRIEVE:
return tqProcessTaskRetrieveReq(pVnode->pTq, pMsg);
case TDMT_STREAM_RETRIEVE_RSP:
return tqProcessTaskRetrieveRsp(pVnode->pTq, pMsg);
case TDMT_VND_STREAM_RECOVER_NONBLOCKING_STAGE:
return tqProcessTaskRecover1Req(pVnode->pTq, pMsg);
case TDMT_STREAM_RECOVER_FINISH:
case TDMT_VND_STREAM_SCAN_HISTORY:
return tqProcessTaskScanHistory(pVnode->pTq, pMsg);
case TDMT_STREAM_TRANSFER_STATE: {
char* pReq = POINTER_SHIFT(pMsg->pCont, sizeof(SMsgHead));
int32_t len = pMsg->contLen - sizeof(SMsgHead);
return tqProcessTaskTransferStateReq(pVnode->pTq, 0, pReq, len);
}
case TDMT_STREAM_SCAN_HISTORY_FINISH:
return tqProcessTaskRecoverFinishReq(pVnode->pTq, pMsg);
case TDMT_STREAM_RECOVER_FINISH_RSP:
case TDMT_STREAM_SCAN_HISTORY_FINISH_RSP:
return tqProcessTaskRecoverFinishRsp(pVnode->pTq, pMsg);
default:
vError("unknown msg type:%d in fetch queue", pMsg->msgType);
vError("unknown msg type:%d in stream queue", pMsg->msgType);
return TSDB_CODE_APP_ERROR;
}
}
@ -1695,7 +1704,7 @@ static int32_t vnodeProcessBatchDeleteReq(SVnode *pVnode, int64_t ver, void *pRe
tDecodeSBatchDeleteReq(&decoder, &deleteReq);
SMetaReader mr = {0};
metaReaderInit(&mr, pVnode->pMeta, META_READER_NOLOCK);
metaReaderDoInit(&mr, pVnode->pMeta, META_READER_NOLOCK);
int32_t sz = taosArrayGetSize(deleteReq.deleteReqs);
for (int32_t i = 0; i < sz; i++) {

View File

@ -554,7 +554,7 @@ static void vnodeRestoreFinish(const SSyncFSM *pFsm, const SyncIndex commitIdx)
vInfo("vgId:%d, not launch stream tasks, since stream tasks are disabled", pVnode->config.vgId);
} else {
vInfo("vgId:%d start to launch stream tasks", pVnode->config.vgId);
tqStartStreamTasks(pVnode->pTq);
tqCheckStreamStatus(pVnode->pTq);
}
}

View File

@ -392,7 +392,7 @@ static int32_t setAliveResultIntoDataBlock(int64_t* pConnId, SSDataBlock* pBlock
int32_t status = 0;
int32_t code = getAliveStatusFromApi(pConnId, dbName, &status);
if (code == TSDB_CODE_SUCCESS) {
colDataAppend(pCol1, 0, (const char*)&status, false);
colDataSetVal(pCol1, 0, (const char*)&status, false);
}
return code;
}

View File

@ -285,6 +285,8 @@ typedef struct SStreamAggSupporter {
int16_t stateKeyType;
SDiskbasedBuf* pResultBuf;
SStateStore stateStore;
STimeWindow winRange;
SStorageAPI* pSessionAPI;
} SStreamAggSupporter;
typedef struct SWindowSupporter {
@ -503,6 +505,8 @@ typedef struct SStreamSessionAggOperatorInfo {
SArray* pUpdated;
SSHashObj* pStUpdated;
int64_t dataVersion;
SArray* historyWins;
bool isHistoryOp;
} SStreamSessionAggOperatorInfo;
typedef struct SStreamStateAggOperatorInfo {
@ -522,6 +526,8 @@ typedef struct SStreamStateAggOperatorInfo {
SArray* pUpdated;
SSHashObj* pSeUpdated;
int64_t dataVersion;
bool isHistoryOp;
SArray* historyWins;
} SStreamStateAggOperatorInfo;
typedef struct SStreamPartitionOperatorInfo {
@ -678,6 +684,8 @@ void doUpdateNumOfRows(SqlFunctionCtx* pCtx, SResultRow* pRow, int32_t numOfExpr
void doClearBufferedBlocks(SStreamScanInfo* pInfo);
uint64_t calcGroupId(char* pData, int32_t len);
void streamOpReleaseState(struct SOperatorInfo* pOperator);
void streamOpReloadState(struct SOperatorInfo* pOperator);
#ifdef __cplusplus
}

View File

@ -35,6 +35,7 @@ typedef SSDataBlock* (*__optr_fn_t)(struct SOperatorInfo* pOptr);
typedef void (*__optr_close_fn_t)(void* param);
typedef int32_t (*__optr_explain_fn_t)(struct SOperatorInfo* pOptr, void** pOptrExplain, uint32_t* len);
typedef int32_t (*__optr_reqBuf_fn_t)(struct SOperatorInfo* pOptr);
typedef void (*__optr_state_fn_t)(struct SOperatorInfo* pOptr);
typedef struct SOperatorFpSet {
__optr_open_fn_t _openFn; // DO NOT invoke this function directly
@ -45,6 +46,8 @@ typedef struct SOperatorFpSet {
__optr_encode_fn_t encodeResultRow;
__optr_decode_fn_t decodeResultRow;
__optr_explain_fn_t getExplainFn;
__optr_state_fn_t releaseStreamStateFn;
__optr_state_fn_t reloadStreamStateFn;
} SOperatorFpSet;
enum {
@ -126,13 +129,13 @@ SOperatorInfo* createTimeSliceOperatorInfo(SOperatorInfo* downstream, SPhysiNode
SOperatorInfo* createMergeJoinOperatorInfo(SOperatorInfo** pDownstream, int32_t numOfDownstream, SSortMergeJoinPhysiNode* pJoinNode, SExecTaskInfo* pTaskInfo);
SOperatorInfo* createStreamSessionAggOperatorInfo(SOperatorInfo* downstream, SPhysiNode* pPhyNode, SExecTaskInfo* pTaskInfo);
SOperatorInfo* createStreamSessionAggOperatorInfo(SOperatorInfo* downstream, SPhysiNode* pPhyNode, SExecTaskInfo* pTaskInfo, SReadHandle* pHandle);
SOperatorInfo* createStreamFinalSessionAggOperatorInfo(SOperatorInfo* downstream, SPhysiNode* pPhyNode, SExecTaskInfo* pTaskInfo, int32_t numOfChild);
SOperatorInfo* createStreamFinalSessionAggOperatorInfo(SOperatorInfo* downstream, SPhysiNode* pPhyNode, SExecTaskInfo* pTaskInfo, int32_t numOfChild, SReadHandle* pHandle);
SOperatorInfo* createStreamIntervalOperatorInfo(SOperatorInfo* downstream, SPhysiNode* pPhyNode, SExecTaskInfo* pTaskInfo);
SOperatorInfo* createStreamStateAggOperatorInfo(SOperatorInfo* downstream, SPhysiNode* pPhyNode, SExecTaskInfo* pTaskInfo);
SOperatorInfo* createStreamStateAggOperatorInfo(SOperatorInfo* downstream, SPhysiNode* pPhyNode, SExecTaskInfo* pTaskInfo, SReadHandle* pHandle);
SOperatorInfo* createStreamFillOperatorInfo(SOperatorInfo* downstream, SStreamFillPhysiNode* pPhyFillNode, SExecTaskInfo* pTaskInfo);
@ -143,6 +146,7 @@ SOperatorInfo* createEventwindowOperatorInfo(SOperatorInfo* downstream, SPhysiNo
SOperatorFpSet createOperatorFpSet(__optr_open_fn_t openFn, __optr_fn_t nextFn, __optr_fn_t cleanup,
__optr_close_fn_t closeFn, __optr_reqBuf_fn_t reqBufFn, __optr_explain_fn_t explain);
void setOperatorStreamStateFn(SOperatorInfo* pOperator, __optr_state_fn_t relaseFn, __optr_state_fn_t reloadFn);
int32_t optrDummyOpenFn(SOperatorInfo* pOperator);
int32_t appendDownstream(SOperatorInfo* p, SOperatorInfo** pDownstream, int32_t num);
void setOperatorCompleted(SOperatorInfo* pOperator);

View File

@ -62,10 +62,12 @@ typedef struct {
SSchemaWrapper* schema;
char tbName[TSDB_TABLE_NAME_LEN]; // this is the current scan table: todo refactor
int8_t recoverStep;
bool recoverStep1Finished;
bool recoverStep2Finished;
int8_t recoverScanFinished;
SQueryTableDataCond tableCond;
int64_t fillHistoryVer1;
int64_t fillHistoryVer2;
SVersionRange fillHistoryVer;
STimeWindow fillHistoryWindow;
SStreamState* pState;
int64_t dataVersion;
int64_t checkPointId;

View File

@ -92,6 +92,7 @@ static int32_t doSetStreamOpOpen(SOperatorInfo* pOperator, char* id) {
qError("join not supported for stream block scan, %s" PRIx64, id);
return TSDB_CODE_APP_ERROR;
}
pOperator->status = OP_NOT_OPENED;
return doSetStreamOpOpen(pOperator->pDownstream[0], id);
}
@ -115,6 +116,16 @@ void resetTaskInfo(qTaskInfo_t tinfo) {
clearStreamBlock(pTaskInfo->pRoot);
}
void qResetStreamInfoTimeWindow(qTaskInfo_t tinfo) {
SExecTaskInfo* pTaskInfo = (SExecTaskInfo*) tinfo;
if (pTaskInfo == NULL) {
return;
}
qDebug("%s set fill history start key:%"PRId64, GET_TASKID(pTaskInfo), INT64_MIN);
pTaskInfo->streamInfo.fillHistoryWindow.skey = INT64_MIN;
}
static int32_t doSetStreamBlock(SOperatorInfo* pOperator, void* input, size_t numOfBlocks, int32_t type, const char* id) {
if (pOperator->operatorType != QUERY_NODE_PHYSICAL_PLAN_STREAM_SCAN) {
if (pOperator->numOfDownstream == 0) {
@ -130,10 +141,9 @@ static int32_t doSetStreamBlock(SOperatorInfo* pOperator, void* input, size_t nu
return doSetStreamBlock(pOperator->pDownstream[0], input, numOfBlocks, type, id);
} else {
pOperator->status = OP_NOT_OPENED;
SStreamScanInfo* pInfo = pOperator->info;
qDebug("s-task:%s in this batch, all %d blocks need to be processed and dump results", id, (int32_t)numOfBlocks);
qDebug("s-task:%s in this batch, %d blocks need to be processed", id, (int32_t)numOfBlocks);
ASSERT(pInfo->validBlockIndex == 0 && taosArrayGetSize(pInfo->pBlockLists) == 0);
if (type == STREAM_INPUT__MERGED_SUBMIT) {
@ -265,6 +275,7 @@ qTaskInfo_t qCreateQueueExecTaskInfo(void* msg, SReadHandle* pReaderHandle, int3
terrno = TSDB_CODE_OUT_OF_MEMORY;
return NULL;
}
pTaskInfo->pRoot = createRawScanOperatorInfo(pReaderHandle, pTaskInfo);
if (NULL == pTaskInfo->pRoot) {
terrno = TSDB_CODE_OUT_OF_MEMORY;
@ -314,8 +325,8 @@ qTaskInfo_t qCreateStreamExecTaskInfo(void* msg, SReadHandle* readers, int32_t v
return NULL;
}
struct SSubplan* pPlan = NULL;
int32_t code = qStringToSubplan(msg, &pPlan);
SSubplan* pPlan = NULL;
int32_t code = qStringToSubplan(msg, &pPlan);
if (code != TSDB_CODE_SUCCESS) {
terrno = code;
return NULL;
@ -869,19 +880,41 @@ int32_t qExtractStreamScanner(qTaskInfo_t tinfo, void** scanner) {
}
}
int32_t qStreamSourceRecoverStep1(qTaskInfo_t tinfo, int64_t ver) {
int32_t qStreamSourceScanParamForHistoryScanStep1(qTaskInfo_t tinfo, SVersionRange *pVerRange, STimeWindow* pWindow) {
SExecTaskInfo* pTaskInfo = (SExecTaskInfo*)tinfo;
ASSERT(pTaskInfo->execModel == OPTR_EXEC_MODEL_STREAM);
pTaskInfo->streamInfo.fillHistoryVer1 = ver;
pTaskInfo->streamInfo.recoverStep = STREAM_RECOVER_STEP__PREPARE1;
SStreamTaskInfo* pStreamInfo = &pTaskInfo->streamInfo;
pStreamInfo->fillHistoryVer = *pVerRange;
pStreamInfo->fillHistoryWindow = *pWindow;
pStreamInfo->recoverStep = STREAM_RECOVER_STEP__PREPARE1;
pStreamInfo->recoverStep1Finished = false;
pStreamInfo->recoverStep2Finished = false;
qDebug("%s step 1. set param for stream scanner for scan history data, verRange:%" PRId64 " - %" PRId64 ", window:%" PRId64
" - %" PRId64,
GET_TASKID(pTaskInfo), pStreamInfo->fillHistoryVer.minVer, pStreamInfo->fillHistoryVer.maxVer, pWindow->skey,
pWindow->ekey);
return 0;
}
int32_t qStreamSourceRecoverStep2(qTaskInfo_t tinfo, int64_t ver) {
int32_t qStreamSourceScanParamForHistoryScanStep2(qTaskInfo_t tinfo, SVersionRange *pVerRange, STimeWindow* pWindow) {
SExecTaskInfo* pTaskInfo = (SExecTaskInfo*)tinfo;
ASSERT(pTaskInfo->execModel == OPTR_EXEC_MODEL_STREAM);
pTaskInfo->streamInfo.fillHistoryVer2 = ver;
pTaskInfo->streamInfo.recoverStep = STREAM_RECOVER_STEP__PREPARE2;
SStreamTaskInfo* pStreamInfo = &pTaskInfo->streamInfo;
pStreamInfo->fillHistoryVer = *pVerRange;
pStreamInfo->fillHistoryWindow = *pWindow;
pStreamInfo->recoverStep = STREAM_RECOVER_STEP__PREPARE2;
pStreamInfo->recoverStep1Finished = true;
pStreamInfo->recoverStep2Finished = false;
qDebug("%s step 2. set param for stream scanner for scan history data, verRange:%" PRId64 " - %" PRId64 ", window:%" PRId64
" - %" PRId64,
GET_TASKID(pTaskInfo), pStreamInfo->fillHistoryVer.minVer, pStreamInfo->fillHistoryVer.maxVer, pWindow->skey,
pWindow->ekey);
return 0;
}
@ -892,55 +925,58 @@ int32_t qStreamRecoverFinish(qTaskInfo_t tinfo) {
return 0;
}
int32_t qStreamSetParamForRecover(qTaskInfo_t tinfo) {
int32_t qSetStreamOperatorOptionForScanHistory(qTaskInfo_t tinfo) {
SExecTaskInfo* pTaskInfo = (SExecTaskInfo*)tinfo;
SOperatorInfo* pOperator = pTaskInfo->pRoot;
while (1) {
if (pOperator->operatorType == QUERY_NODE_PHYSICAL_PLAN_STREAM_INTERVAL ||
pOperator->operatorType == QUERY_NODE_PHYSICAL_PLAN_STREAM_SEMI_INTERVAL ||
pOperator->operatorType == QUERY_NODE_PHYSICAL_PLAN_STREAM_FINAL_INTERVAL) {
int32_t type = pOperator->operatorType;
if (type == QUERY_NODE_PHYSICAL_PLAN_STREAM_INTERVAL || type == QUERY_NODE_PHYSICAL_PLAN_STREAM_SEMI_INTERVAL ||
type == QUERY_NODE_PHYSICAL_PLAN_STREAM_FINAL_INTERVAL) {
SStreamIntervalOperatorInfo* pInfo = pOperator->info;
ASSERT(pInfo->twAggSup.calTrigger == STREAM_TRIGGER_AT_ONCE ||
pInfo->twAggSup.calTrigger == STREAM_TRIGGER_WINDOW_CLOSE);
ASSERT(pInfo->twAggSup.calTriggerSaved == 0 && pInfo->twAggSup.deleteMarkSaved == 0);
STimeWindowAggSupp* pSup = &pInfo->twAggSup;
qInfo("save stream param for interval: %d, %" PRId64, pInfo->twAggSup.calTrigger, pInfo->twAggSup.deleteMark);
ASSERT(pSup->calTrigger == STREAM_TRIGGER_AT_ONCE || pSup->calTrigger == STREAM_TRIGGER_WINDOW_CLOSE);
ASSERT(pSup->calTriggerSaved == 0 && pSup->deleteMarkSaved == 0);
pInfo->twAggSup.calTriggerSaved = pInfo->twAggSup.calTrigger;
pInfo->twAggSup.deleteMarkSaved = pInfo->twAggSup.deleteMark;
pInfo->twAggSup.calTrigger = STREAM_TRIGGER_AT_ONCE;
pInfo->twAggSup.deleteMark = INT64_MAX;
qInfo("save stream param for interval: %d, %" PRId64, pSup->calTrigger, pSup->deleteMark);
pSup->calTriggerSaved = pSup->calTrigger;
pSup->deleteMarkSaved = pSup->deleteMark;
pSup->calTrigger = STREAM_TRIGGER_AT_ONCE;
pSup->deleteMark = INT64_MAX;
pInfo->ignoreExpiredDataSaved = pInfo->ignoreExpiredData;
pInfo->ignoreExpiredData = false;
} else if (pOperator->operatorType == QUERY_NODE_PHYSICAL_PLAN_STREAM_SESSION ||
pOperator->operatorType == QUERY_NODE_PHYSICAL_PLAN_STREAM_SEMI_SESSION ||
pOperator->operatorType == QUERY_NODE_PHYSICAL_PLAN_STREAM_FINAL_SESSION) {
} else if (type == QUERY_NODE_PHYSICAL_PLAN_STREAM_SESSION ||
type == QUERY_NODE_PHYSICAL_PLAN_STREAM_SEMI_SESSION ||
type == QUERY_NODE_PHYSICAL_PLAN_STREAM_FINAL_SESSION) {
SStreamSessionAggOperatorInfo* pInfo = pOperator->info;
ASSERT(pInfo->twAggSup.calTrigger == STREAM_TRIGGER_AT_ONCE ||
pInfo->twAggSup.calTrigger == STREAM_TRIGGER_WINDOW_CLOSE);
STimeWindowAggSupp* pSup = &pInfo->twAggSup;
ASSERT(pInfo->twAggSup.calTriggerSaved == 0 && pInfo->twAggSup.deleteMarkSaved == 0);
qInfo("save stream param for session: %d, %" PRId64, pInfo->twAggSup.calTrigger, pInfo->twAggSup.deleteMark);
ASSERT(pSup->calTrigger == STREAM_TRIGGER_AT_ONCE || pSup->calTrigger == STREAM_TRIGGER_WINDOW_CLOSE);
ASSERT(pSup->calTriggerSaved == 0 && pSup->deleteMarkSaved == 0);
pInfo->twAggSup.calTriggerSaved = pInfo->twAggSup.calTrigger;
pInfo->twAggSup.deleteMarkSaved = pInfo->twAggSup.deleteMark;
pInfo->twAggSup.calTrigger = STREAM_TRIGGER_AT_ONCE;
pInfo->twAggSup.deleteMark = INT64_MAX;
qInfo("save stream param for session: %d, %" PRId64, pSup->calTrigger, pSup->deleteMark);
pSup->calTriggerSaved = pSup->calTrigger;
pSup->deleteMarkSaved = pSup->deleteMark;
pSup->calTrigger = STREAM_TRIGGER_AT_ONCE;
pSup->deleteMark = INT64_MAX;
pInfo->ignoreExpiredDataSaved = pInfo->ignoreExpiredData;
pInfo->ignoreExpiredData = false;
} else if (pOperator->operatorType == QUERY_NODE_PHYSICAL_PLAN_STREAM_STATE) {
} else if (type == QUERY_NODE_PHYSICAL_PLAN_STREAM_STATE) {
SStreamStateAggOperatorInfo* pInfo = pOperator->info;
ASSERT(pInfo->twAggSup.calTrigger == STREAM_TRIGGER_AT_ONCE ||
pInfo->twAggSup.calTrigger == STREAM_TRIGGER_WINDOW_CLOSE);
ASSERT(pInfo->twAggSup.calTriggerSaved == 0 && pInfo->twAggSup.deleteMarkSaved == 0);
STimeWindowAggSupp* pSup = &pInfo->twAggSup;
qInfo("save stream param for state: %d, %" PRId64, pInfo->twAggSup.calTrigger, pInfo->twAggSup.deleteMark);
ASSERT(pSup->calTrigger == STREAM_TRIGGER_AT_ONCE || pSup->calTrigger == STREAM_TRIGGER_WINDOW_CLOSE);
ASSERT(pSup->calTriggerSaved == 0 && pSup->deleteMarkSaved == 0);
pInfo->twAggSup.calTriggerSaved = pInfo->twAggSup.calTrigger;
pInfo->twAggSup.deleteMarkSaved = pInfo->twAggSup.deleteMark;
pInfo->twAggSup.calTrigger = STREAM_TRIGGER_AT_ONCE;
pInfo->twAggSup.deleteMark = INT64_MAX;
qInfo("save stream param for state: %d, %" PRId64, pSup->calTrigger, pSup->deleteMark);
pSup->calTriggerSaved = pSup->calTrigger;
pSup->deleteMarkSaved = pSup->deleteMark;
pSup->calTrigger = STREAM_TRIGGER_AT_ONCE;
pSup->deleteMark = INT64_MAX;
pInfo->ignoreExpiredDataSaved = pInfo->ignoreExpiredData;
pInfo->ignoreExpiredData = false;
}
@ -961,7 +997,7 @@ int32_t qStreamSetParamForRecover(qTaskInfo_t tinfo) {
return 0;
}
int32_t qStreamRestoreParam(qTaskInfo_t tinfo) {
int32_t qRestoreStreamOperatorOption(qTaskInfo_t tinfo) {
SExecTaskInfo* pTaskInfo = (SExecTaskInfo*)tinfo;
SOperatorInfo* pOperator = pTaskInfo->pRoot;
@ -1009,6 +1045,26 @@ bool qStreamRecoverScanFinished(qTaskInfo_t tinfo) {
return pTaskInfo->streamInfo.recoverScanFinished;
}
bool qStreamRecoverScanStep1Finished(qTaskInfo_t tinfo) {
SExecTaskInfo* pTaskInfo = (SExecTaskInfo*)tinfo;
return pTaskInfo->streamInfo.recoverStep1Finished;
}
bool qStreamRecoverScanStep2Finished(qTaskInfo_t tinfo) {
SExecTaskInfo* pTaskInfo = (SExecTaskInfo*)tinfo;
return pTaskInfo->streamInfo.recoverStep2Finished;
}
int32_t qStreamRecoverSetAllStepFinished(qTaskInfo_t tinfo) {
SExecTaskInfo* pTaskInfo = (SExecTaskInfo*)tinfo;
pTaskInfo->streamInfo.recoverStep1Finished = true;
pTaskInfo->streamInfo.recoverStep2Finished = true;
// reset the time window
pTaskInfo->streamInfo.fillHistoryWindow.skey = INT64_MIN;
return 0;
}
void* qExtractReaderFromStreamScanner(void* scanner) {
SStreamScanInfo* pInfo = scanner;
return (void*)pInfo->tqReader;
@ -1323,4 +1379,16 @@ SArray* getTableListInfo(const SExecTaskInfo* pTaskInfo) {
SOperatorInfo* pOperator = pTaskInfo->pRoot;
extractTableList(pArray, pOperator);
return pArray;
}
}
int32_t qStreamOperatorReleaseState(qTaskInfo_t tInfo) {
SExecTaskInfo* pTaskInfo = (SExecTaskInfo*) tInfo;
pTaskInfo->pRoot->fpSet.releaseStreamStateFn(pTaskInfo->pRoot);
return 0;
}
int32_t qStreamOperatorReloadState(qTaskInfo_t tInfo) {
SExecTaskInfo* pTaskInfo = (SExecTaskInfo*) tInfo;
pTaskInfo->pRoot->fpSet.reloadStreamStateFn(pTaskInfo->pRoot);
return 0;
}

View File

@ -540,140 +540,12 @@ void extractQualifiedTupleByFilterResult(SSDataBlock* pBlock, const SColumnInfoD
}
int8_t* pIndicator = (int8_t*)p->pData;
int32_t totalRows = pBlock->info.rows;
if (status == FILTER_RESULT_ALL_QUALIFIED) {
// here nothing needs to be done
} else if (status == FILTER_RESULT_NONE_QUALIFIED) {
pBlock->info.rows = 0;
} else {
int32_t bmLen = BitmapLen(totalRows);
char* pBitmap = NULL;
int32_t maxRows = 0;
size_t numOfCols = taosArrayGetSize(pBlock->pDataBlock);
for (int32_t i = 0; i < numOfCols; ++i) {
SColumnInfoData* pDst = taosArrayGet(pBlock->pDataBlock, i);
// it is a reserved column for scalar function, and no data in this column yet.
if (pDst->pData == NULL) {
continue;
}
int32_t numOfRows = 0;
if (IS_VAR_DATA_TYPE(pDst->info.type)) {
int32_t j = 0;
while (j < totalRows) {
if (pIndicator[j] == 0) {
j += 1;
continue;
}
if (colDataIsNull_var(pDst, j)) {
colDataSetNull_var(pDst, numOfRows);
} else {
char* p1 = colDataGetVarData(pDst, j);
colDataReassignVal(pDst, numOfRows, j, p1);
}
numOfRows += 1;
j += 1;
}
if (maxRows < numOfRows) {
maxRows = numOfRows;
}
} else {
if (pBitmap == NULL) {
pBitmap = taosMemoryCalloc(1, bmLen);
}
memcpy(pBitmap, pDst->nullbitmap, bmLen);
memset(pDst->nullbitmap, 0, bmLen);
int32_t j = 0;
switch (pDst->info.type) {
case TSDB_DATA_TYPE_BIGINT:
case TSDB_DATA_TYPE_UBIGINT:
case TSDB_DATA_TYPE_DOUBLE:
case TSDB_DATA_TYPE_TIMESTAMP:
while (j < totalRows) {
if (pIndicator[j] == 0) {
j += 1;
continue;
}
if (colDataIsNull_f(pBitmap, j)) {
colDataSetNull_f(pDst->nullbitmap, numOfRows);
} else {
((int64_t*)pDst->pData)[numOfRows] = ((int64_t*)pDst->pData)[j];
}
numOfRows += 1;
j += 1;
}
break;
case TSDB_DATA_TYPE_FLOAT:
case TSDB_DATA_TYPE_INT:
case TSDB_DATA_TYPE_UINT:
while (j < totalRows) {
if (pIndicator[j] == 0) {
j += 1;
continue;
}
if (colDataIsNull_f(pBitmap, j)) {
colDataSetNull_f(pDst->nullbitmap, numOfRows);
} else {
((int32_t*)pDst->pData)[numOfRows] = ((int32_t*)pDst->pData)[j];
}
numOfRows += 1;
j += 1;
}
break;
case TSDB_DATA_TYPE_SMALLINT:
case TSDB_DATA_TYPE_USMALLINT:
while (j < totalRows) {
if (pIndicator[j] == 0) {
j += 1;
continue;
}
if (colDataIsNull_f(pBitmap, j)) {
colDataSetNull_f(pDst->nullbitmap, numOfRows);
} else {
((int16_t*)pDst->pData)[numOfRows] = ((int16_t*)pDst->pData)[j];
}
numOfRows += 1;
j += 1;
}
break;
case TSDB_DATA_TYPE_BOOL:
case TSDB_DATA_TYPE_TINYINT:
case TSDB_DATA_TYPE_UTINYINT:
while (j < totalRows) {
if (pIndicator[j] == 0) {
j += 1;
continue;
}
if (colDataIsNull_f(pBitmap, j)) {
colDataSetNull_f(pDst->nullbitmap, numOfRows);
} else {
((int8_t*)pDst->pData)[numOfRows] = ((int8_t*)pDst->pData)[j];
}
numOfRows += 1;
j += 1;
}
break;
}
}
if (maxRows < numOfRows) {
maxRows = numOfRows;
}
}
pBlock->info.rows = maxRows;
if (pBitmap != NULL) {
taosMemoryFree(pBitmap);
}
trimDataBlock(pBlock, pBlock->info.rows, (bool*) pIndicator);
}
}
@ -1105,9 +977,11 @@ int32_t buildSessionResultDataBlock(SOperatorInfo* pOperator, void* pState, SSDa
int32_t size = 0;
void* pVal = NULL;
int32_t code = pAPI->stateStore.streamStateSessionGet(pState, pKey, &pVal, &size);
ASSERT(code == 0);
// ASSERT(code == 0);
if (code == -1) {
// coverity scan
// for history
qWarn("===stream===not found session result key:%" PRId64 ", ekey:%" PRId64 ", groupId:%" PRIu64, pKey->win.skey,
pKey->win.ekey, pKey->groupId);
pGroupResInfo->index += 1;
continue;
}
@ -1177,40 +1051,16 @@ int32_t buildSessionResultDataBlock(SOperatorInfo* pOperator, void* pState, SSDa
return TSDB_CODE_SUCCESS;
}
void qStreamCloseTsdbReader(void* task) {
if (task == NULL) {
return;
}
SExecTaskInfo* pTaskInfo = (SExecTaskInfo*)task;
SOperatorInfo* pOp = pTaskInfo->pRoot;
qDebug("stream close tsdb reader, reset status uid:%" PRId64 " ts:%" PRId64, pTaskInfo->streamInfo.currentOffset.uid,
pTaskInfo->streamInfo.currentOffset.ts);
// todo refactor, other thread may already use this read to extract data.
pTaskInfo->streamInfo.currentOffset = (STqOffsetVal){0};
while (pOp->numOfDownstream == 1 && pOp->pDownstream[0]) {
SOperatorInfo* pDownstreamOp = pOp->pDownstream[0];
if (pDownstreamOp->operatorType == QUERY_NODE_PHYSICAL_PLAN_STREAM_SCAN) {
SStreamScanInfo* pInfo = pDownstreamOp->info;
if (pInfo->pTableScanOp) {
STableScanInfo* pTSInfo = pInfo->pTableScanOp->info;
setOperatorCompleted(pInfo->pTableScanOp);
while (pTaskInfo->owner != 0) {
taosMsleep(100);
qDebug("wait for the reader stopping");
}
pTaskInfo->storageAPI.tsdReader.tsdReaderClose(pTSInfo->base.dataReader);
pTSInfo->base.dataReader = NULL;
// restore the status, todo refactor.
pInfo->pTableScanOp->status = OP_OPENED;
pTaskInfo->status = TASK_NOT_COMPLETED;
return;
}
}
void streamOpReleaseState(SOperatorInfo* pOperator) {
SOperatorInfo* downstream = pOperator->pDownstream[0];
if (downstream->fpSet.releaseStreamStateFn) {
downstream->fpSet.releaseStreamStateFn(downstream);
}
}
void streamOpReloadState(SOperatorInfo* pOperator) {
SOperatorInfo* downstream = pOperator->pDownstream[0];
if (downstream->fpSet.reloadStreamStateFn) {
downstream->fpSet.reloadStreamStateFn(downstream);
}
}

View File

@ -1562,6 +1562,7 @@ SOperatorInfo* createStreamFillOperatorInfo(SOperatorInfo* downstream, SStreamFi
pTaskInfo);
pOperator->fpSet =
createOperatorFpSet(optrDummyOpenFn, doStreamFill, NULL, destroyStreamFillOperatorInfo, optrDefaultBufFn, NULL);
setOperatorStreamStateFn(pOperator, streamOpReleaseState, streamOpReloadState);
code = appendDownstream(pOperator, &downstream, 1);
if (code != TSDB_CODE_SUCCESS) {

View File

@ -1036,7 +1036,7 @@ void appendCreateTableRow(void* pState, SExprSupp* pTableSup, SExprSupp* pTagSup
}
void* pGpIdCol = taosArrayGet(pDestBlock->pDataBlock, UD_GROUPID_COLUMN_INDEX);
colDataAppend(pGpIdCol, pDestBlock->info.rows, (const char*)&groupId, false);
colDataSetVal(pGpIdCol, pDestBlock->info.rows, (const char*)&groupId, false);
pDestBlock->info.rows++;
blockDataDestroy(pTmpBlock);
} else {
@ -1324,6 +1324,7 @@ SOperatorInfo* createStreamPartitionOperatorInfo(SOperatorInfo* downstream, SStr
pOperator->exprSupp.pExprInfo = pExprInfo;
pOperator->fpSet = createOperatorFpSet(optrDummyOpenFn, doStreamHashPartition, NULL,
destroyStreamPartitionOperatorInfo, optrDefaultBufFn, NULL);
setOperatorStreamStateFn(pOperator, streamOpReleaseState, streamOpReloadState);
initParDownStream(downstream, &pInfo->partitionSup, &pInfo->scalarSup);
code = appendDownstream(pOperator, &downstream, 1);

View File

@ -38,11 +38,18 @@ SOperatorFpSet createOperatorFpSet(__optr_open_fn_t openFn, __optr_fn_t nextFn,
.closeFn = closeFn,
.reqBufFn = reqBufFn,
.getExplainFn = explain,
.releaseStreamStateFn = NULL,
.reloadStreamStateFn = NULL,
};
return fpSet;
}
void setOperatorStreamStateFn(SOperatorInfo* pOperator, __optr_state_fn_t relaseFn, __optr_state_fn_t reloadFn) {
pOperator->fpSet.releaseStreamStateFn = relaseFn;
pOperator->fpSet.reloadStreamStateFn = reloadFn;
}
int32_t optrDummyOpenFn(SOperatorInfo* pOperator) {
OPTR_SET_OPENED(pOperator);
pOperator->cost.openCost = 0;
@ -485,13 +492,13 @@ SOperatorInfo* createOperator(SPhysiNode* pPhyNode, SExecTaskInfo* pTaskInfo, SR
SSessionWinodwPhysiNode* pSessionNode = (SSessionWinodwPhysiNode*)pPhyNode;
pOptr = createSessionAggOperatorInfo(ops[0], pSessionNode, pTaskInfo);
} else if (QUERY_NODE_PHYSICAL_PLAN_STREAM_SESSION == type) {
pOptr = createStreamSessionAggOperatorInfo(ops[0], pPhyNode, pTaskInfo);
pOptr = createStreamSessionAggOperatorInfo(ops[0], pPhyNode, pTaskInfo, pHandle);
} else if (QUERY_NODE_PHYSICAL_PLAN_STREAM_SEMI_SESSION == type) {
int32_t children = 0;
pOptr = createStreamFinalSessionAggOperatorInfo(ops[0], pPhyNode, pTaskInfo, children);
pOptr = createStreamFinalSessionAggOperatorInfo(ops[0], pPhyNode, pTaskInfo, children, pHandle);
} else if (QUERY_NODE_PHYSICAL_PLAN_STREAM_FINAL_SESSION == type) {
int32_t children = pHandle->numOfVgroups;
pOptr = createStreamFinalSessionAggOperatorInfo(ops[0], pPhyNode, pTaskInfo, children);
pOptr = createStreamFinalSessionAggOperatorInfo(ops[0], pPhyNode, pTaskInfo, children, pHandle);
} else if (QUERY_NODE_PHYSICAL_PLAN_PARTITION == type) {
pOptr = createPartitionOperatorInfo(ops[0], (SPartitionPhysiNode*)pPhyNode, pTaskInfo);
} else if (QUERY_NODE_PHYSICAL_PLAN_STREAM_PARTITION == type) {
@ -500,7 +507,7 @@ SOperatorInfo* createOperator(SPhysiNode* pPhyNode, SExecTaskInfo* pTaskInfo, SR
SStateWinodwPhysiNode* pStateNode = (SStateWinodwPhysiNode*)pPhyNode;
pOptr = createStatewindowOperatorInfo(ops[0], pStateNode, pTaskInfo);
} else if (QUERY_NODE_PHYSICAL_PLAN_STREAM_STATE == type) {
pOptr = createStreamStateAggOperatorInfo(ops[0], pPhyNode, pTaskInfo);
pOptr = createStreamStateAggOperatorInfo(ops[0], pPhyNode, pTaskInfo, pHandle);
} else if (QUERY_NODE_PHYSICAL_PLAN_MERGE_JOIN == type) {
pOptr = createMergeJoinOperatorInfo(ops, size, (SSortMergeJoinPhysiNode*)pPhyNode, pTaskInfo);
} else if (QUERY_NODE_PHYSICAL_PLAN_FILL == type) {

View File

@ -73,6 +73,20 @@ static void destroyIndefinitOperatorInfo(void* param) {
taosMemoryFreeClear(param);
}
void streamOperatorReleaseState(SOperatorInfo* pOperator) {
SOperatorInfo* downstream = pOperator->pDownstream[0];
if (downstream->fpSet.releaseStreamStateFn) {
downstream->fpSet.releaseStreamStateFn(downstream);
}
}
void streamOperatorReloadState(SOperatorInfo* pOperator) {
SOperatorInfo* downstream = pOperator->pDownstream[0];
if (downstream->fpSet.reloadStreamStateFn) {
downstream->fpSet.reloadStreamStateFn(downstream);
}
}
SOperatorInfo* createProjectOperatorInfo(SOperatorInfo* downstream, SProjectPhysiNode* pProjPhyNode,
SExecTaskInfo* pTaskInfo) {
int32_t code = TSDB_CODE_SUCCESS;
@ -136,6 +150,7 @@ SOperatorInfo* createProjectOperatorInfo(SOperatorInfo* downstream, SProjectPhys
pTaskInfo);
pOperator->fpSet = createOperatorFpSet(optrDummyOpenFn, doProjectOperation, NULL, destroyProjectOperatorInfo,
optrDefaultBufFn, NULL);
setOperatorStreamStateFn(pOperator, streamOperatorReleaseState, streamOperatorReloadState);
code = appendDownstream(pOperator, &downstream, 1);
if (code != TSDB_CODE_SUCCESS) {

View File

@ -44,6 +44,7 @@ int32_t scanDebug = 0;
#define SET_REVERSE_SCAN_FLAG(_info) ((_info)->scanFlag = REVERSE_SCAN)
#define SWITCH_ORDER(n) (((n) = ((n) == TSDB_ORDER_ASC) ? TSDB_ORDER_DESC : TSDB_ORDER_ASC))
#define STREAM_SCAN_OP_NAME "StreamScanOperator"
#define STREAM_SCAN_OP_STATE_NAME "StreamScanFillHistoryState"
typedef struct STableMergeScanExecInfo {
SFileBlockLoadRecorder blockRecorder;
@ -489,12 +490,12 @@ int32_t addTagPseudoColumnData(SReadHandle* pHandle, const SExprInfo* pExpr, int
}
int32_t code = 0;
bool freeReader = false;
// backup the rows
int32_t backupRows = pBlock->info.rows;
pBlock->info.rows = rows;
bool freeReader = false;
STableCachedVal val = {0};
SMetaReader mr = {0};
@ -1558,13 +1559,13 @@ static int32_t setBlockIntoRes(SStreamScanInfo* pInfo, const SSDataBlock* pBlock
blockDataEnsureCapacity(pInfo->pRes, pBlock->info.rows);
pInfo->pRes->info.rows = pBlock->info.rows;
pInfo->pRes->info.id.uid = pBlock->info.id.uid;
pInfo->pRes->info.type = STREAM_NORMAL;
pInfo->pRes->info.version = pBlock->info.version;
pBlockInfo->rows = pBlock->info.rows;
pBlockInfo->id.uid = pBlock->info.id.uid;
pBlockInfo->type = STREAM_NORMAL;
pBlockInfo->version = pBlock->info.version;
STableScanInfo* pTableScanInfo = pInfo->pTableScanOp->info;
pInfo->pRes->info.id.groupId = getTableGroupId(pTableScanInfo->base.pTableListInfo, pBlock->info.id.uid);
pBlockInfo->id.groupId = getTableGroupId(pTableScanInfo->base.pTableListInfo, pBlock->info.id.uid);
// todo extract method
for (int32_t i = 0; i < taosArrayGetSize(pInfo->matchInfo.pList); ++i) {
@ -1594,7 +1595,7 @@ static int32_t setBlockIntoRes(SStreamScanInfo* pInfo, const SSDataBlock* pBlock
// currently only the tbname pseudo column
if (pInfo->numOfPseudoExpr > 0) {
int32_t code = addTagPseudoColumnData(&pInfo->readHandle, pInfo->pPseudoExpr, pInfo->numOfPseudoExpr, pInfo->pRes,
pInfo->pRes->info.rows, GET_TASKID(pTaskInfo), &pTableScanInfo->base.metaCache);
pBlockInfo->rows, GET_TASKID(pTaskInfo), &pTableScanInfo->base.metaCache);
// ignore the table not exists error, since this table may have been dropped during the scan procedure.
if (code != TSDB_CODE_SUCCESS && code != TSDB_CODE_PAR_TABLE_NOT_EXIST) {
blockDataFreeRes((SSDataBlock*)pBlock);
@ -1611,7 +1612,6 @@ static int32_t setBlockIntoRes(SStreamScanInfo* pInfo, const SSDataBlock* pBlock
pInfo->pRes->info.dataLoad = 1;
blockDataUpdateTsWindow(pInfo->pRes, pInfo->primaryTsIndex);
// blockDataFreeRes((SSDataBlock*)pBlock);
calBlockTbName(pInfo, pInfo->pRes);
return 0;
@ -1769,7 +1769,7 @@ void streamScanOperatorDecode(void* pBuff, int32_t len, SStreamScanInfo* pInfo)
return;
}
void* pUpInfo = pInfo->stateStore.updateInfoInit(0, TSDB_TIME_PRECISION_MILLI, 0);
void* pUpInfo = taosMemoryCalloc(1, sizeof(SUpdateInfo));
int32_t code = pInfo->stateStore.updateInfoDeserialize(pBuff, len, pUpInfo);
if (code == TSDB_CODE_SUCCESS) {
pInfo->pUpdateInfo = pUpInfo;
@ -1783,25 +1783,28 @@ static SSDataBlock* doStreamScan(SOperatorInfo* pOperator) {
SStorageAPI* pAPI = &pTaskInfo->storageAPI;
SStreamScanInfo* pInfo = pOperator->info;
SStreamTaskInfo* pStreamInfo = &pTaskInfo->streamInfo;
qDebug("stream scan started, %s", GET_TASKID(pTaskInfo));
qDebug("stream scan started, %s", id);
if (pTaskInfo->streamInfo.recoverStep == STREAM_RECOVER_STEP__PREPARE1 ||
pTaskInfo->streamInfo.recoverStep == STREAM_RECOVER_STEP__PREPARE2) {
if (pStreamInfo->recoverStep == STREAM_RECOVER_STEP__PREPARE1 || pStreamInfo->recoverStep == STREAM_RECOVER_STEP__PREPARE2) {
STableScanInfo* pTSInfo = pInfo->pTableScanOp->info;
memcpy(&pTSInfo->base.cond, &pTaskInfo->streamInfo.tableCond, sizeof(SQueryTableDataCond));
if (pTaskInfo->streamInfo.recoverStep == STREAM_RECOVER_STEP__PREPARE1) {
pTSInfo->base.cond.startVersion = 0;
pTSInfo->base.cond.endVersion = pTaskInfo->streamInfo.fillHistoryVer1;
qDebug("stream recover step1, verRange:%" PRId64 " - %" PRId64, pTSInfo->base.cond.startVersion,
pTSInfo->base.cond.endVersion);
pTaskInfo->streamInfo.recoverStep = STREAM_RECOVER_STEP__SCAN1;
memcpy(&pTSInfo->base.cond, &pStreamInfo->tableCond, sizeof(SQueryTableDataCond));
if (pStreamInfo->recoverStep == STREAM_RECOVER_STEP__PREPARE1) {
pTSInfo->base.cond.startVersion = pStreamInfo->fillHistoryVer.minVer;
pTSInfo->base.cond.endVersion = pStreamInfo->fillHistoryVer.maxVer;
pTSInfo->base.cond.twindows = pStreamInfo->fillHistoryWindow;
qDebug("stream recover step1, verRange:%" PRId64 "-%" PRId64 " window:%"PRId64"-%"PRId64", %s", pTSInfo->base.cond.startVersion,
pTSInfo->base.cond.endVersion, pTSInfo->base.cond.twindows.skey, pTSInfo->base.cond.twindows.ekey, id);
pStreamInfo->recoverStep = STREAM_RECOVER_STEP__SCAN1;
} else {
pTSInfo->base.cond.startVersion = pTaskInfo->streamInfo.fillHistoryVer1 + 1;
pTSInfo->base.cond.endVersion = pTaskInfo->streamInfo.fillHistoryVer2;
qDebug("stream recover step2, verRange:%" PRId64 " - %" PRId64, pTSInfo->base.cond.startVersion,
pTSInfo->base.cond.endVersion);
pTaskInfo->streamInfo.recoverStep = STREAM_RECOVER_STEP__SCAN2;
pTSInfo->base.cond.startVersion = pStreamInfo->fillHistoryVer.minVer;
pTSInfo->base.cond.endVersion = pStreamInfo->fillHistoryVer.maxVer;
qDebug("stream recover step2, verRange:%" PRId64 " - %" PRId64", %s", pTSInfo->base.cond.startVersion,
pTSInfo->base.cond.endVersion, id);
pStreamInfo->recoverStep = STREAM_RECOVER_STEP__SCAN2;
}
pAPI->tsdReader.tsdReaderClose(pTSInfo->base.dataReader);
@ -1811,11 +1814,11 @@ static SSDataBlock* doStreamScan(SOperatorInfo* pOperator) {
pTSInfo->scanTimes = 0;
pTSInfo->currentGroupId = -1;
pTaskInfo->streamInfo.recoverScanFinished = false;
pStreamInfo->recoverScanFinished = false;
}
if (pTaskInfo->streamInfo.recoverStep == STREAM_RECOVER_STEP__SCAN1 ||
pTaskInfo->streamInfo.recoverStep == STREAM_RECOVER_STEP__SCAN2) {
if (pStreamInfo->recoverStep == STREAM_RECOVER_STEP__SCAN1 ||
pStreamInfo->recoverStep == STREAM_RECOVER_STEP__SCAN2) {
if (pInfo->blockRecoverContiCnt > 100) {
pInfo->blockRecoverTotCnt += pInfo->blockRecoverContiCnt;
pInfo->blockRecoverContiCnt = 0;
@ -1866,11 +1869,11 @@ static SSDataBlock* doStreamScan(SOperatorInfo* pOperator) {
pInfo->blockRecoverContiCnt++;
calBlockTbName(pInfo, pInfo->pRecoverRes);
if (!pInfo->igCheckUpdate && pInfo->pUpdateInfo) {
if (pTaskInfo->streamInfo.recoverStep == STREAM_RECOVER_STEP__SCAN1) {
if (pStreamInfo->recoverStep == STREAM_RECOVER_STEP__SCAN1) {
TSKEY maxTs = pAPI->stateStore.updateInfoFillBlockData(pInfo->pUpdateInfo, pInfo->pRecoverRes, pInfo->primaryTsIndex);
pInfo->twAggSup.maxTs = TMAX(pInfo->twAggSup.maxTs, maxTs);
} else {
pInfo->pUpdateInfo->maxDataVersion = TMAX(pInfo->pUpdateInfo->maxDataVersion, pTaskInfo->streamInfo.fillHistoryVer2);
pInfo->pUpdateInfo->maxDataVersion = TMAX(pInfo->pUpdateInfo->maxDataVersion, pStreamInfo->fillHistoryVer.maxVer);
doCheckUpdate(pInfo, pInfo->pRecoverRes->info.window.ekey, pInfo->pRecoverRes);
}
}
@ -1884,7 +1887,7 @@ static SSDataBlock* doStreamScan(SOperatorInfo* pOperator) {
printDataBlock(pInfo->pRecoverRes, "scan recover");
return pInfo->pRecoverRes;
}
pTaskInfo->streamInfo.recoverStep = STREAM_RECOVER_STEP__NONE;
pStreamInfo->recoverStep = STREAM_RECOVER_STEP__NONE;
STableScanInfo* pTSInfo = pInfo->pTableScanOp->info;
pAPI->tsdReader.tsdReaderClose(pTSInfo->base.dataReader);
@ -1893,7 +1896,7 @@ static SSDataBlock* doStreamScan(SOperatorInfo* pOperator) {
pTSInfo->base.cond.startVersion = -1;
pTSInfo->base.cond.endVersion = -1;
pTaskInfo->streamInfo.recoverScanFinished = true;
pStreamInfo->recoverScanFinished = true;
return NULL;
}
@ -1912,7 +1915,7 @@ FETCH_NEXT_BLOCK:
SPackedData* pPacked = taosArrayGet(pInfo->pBlockLists, current);
SSDataBlock* pBlock = pPacked->pDataBlock;
if (pBlock->info.parTbName[0]) {
pAPI->stateStore.streamStatePutParName(pTaskInfo->streamInfo.pState, pBlock->info.id.groupId, pBlock->info.parTbName);
pAPI->stateStore.streamStatePutParName(pStreamInfo->pState, pBlock->info.id.groupId, pBlock->info.parTbName);
}
// TODO move into scan
@ -2092,11 +2095,39 @@ FETCH_NEXT_BLOCK:
doCheckUpdate(pInfo, pBlockInfo->window.ekey, pBlock);
doFilter(pBlock, pOperator->exprSupp.pFilterInfo, NULL);
{ // do additional time window filter
STimeWindow* pWindow = &pStreamInfo->fillHistoryWindow;
if (pWindow->skey != INT64_MIN) {
qDebug("%s filter for additional history window, skey:%"PRId64, id, pWindow->skey);
bool* p = taosMemoryCalloc(pBlock->info.rows, sizeof(bool));
bool hasUnqualified = false;
SColumnInfoData* pCol = taosArrayGet(pBlock->pDataBlock, pInfo->primaryTsIndex);
for(int32_t i = 0; i < pBlock->info.rows; ++i) {
int64_t* ts = (int64_t*) colDataGetData(pCol, i);
p[i] = (*ts >= pWindow->skey);
if (!p[i]) {
hasUnqualified = true;
}
}
if (hasUnqualified) {
trimDataBlock(pBlock, pBlock->info.rows, p);
}
taosMemoryFree(p);
}
}
pBlock->info.dataLoad = 1;
blockDataUpdateTsWindow(pBlock, pInfo->primaryTsIndex);
qDebug("%" PRId64 " rows in datablock, update res:%" PRId64 " %s", pBlockInfo->rows,
pInfo->pUpdateDataRes->info.rows, id);
qDebug("%s %" PRId64 " rows in datablock, update res:%" PRId64, id, pBlockInfo->rows,
pInfo->pUpdateDataRes->info.rows);
if (pBlockInfo->rows > 0 || pInfo->pUpdateDataRes->info.rows > 0) {
break;
}
@ -2154,7 +2185,7 @@ static SSDataBlock* doRawScan(SOperatorInfo* pOperator) {
qDebug("tmqsnap doRawScan called");
if (pTaskInfo->streamInfo.currentOffset.type == TMQ_OFFSET__SNAPSHOT_DATA) {
bool hasNext = false;
if (pInfo->dataReader) {
if (pInfo->dataReader && pInfo->sContext->withMeta != ONLY_META) {
code = pAPI->tsdReader.tsdNextDataBlock(pInfo->dataReader, &hasNext);
if (code) {
pAPI->tsdReader.tsdReaderReleaseDataBlock(pInfo->dataReader);
@ -2180,7 +2211,7 @@ static SSDataBlock* doRawScan(SOperatorInfo* pOperator) {
SMetaTableInfo mtInfo = pAPI->snapshotFn.getMetaTableInfoFromSnapshot(pInfo->sContext);
STqOffsetVal offset = {0};
if (mtInfo.uid == 0) { // read snapshot done, change to get data from wal
if (mtInfo.uid == 0 || pInfo->sContext->withMeta == ONLY_META) { // read snapshot done, change to get data from wal
qDebug("tmqsnap read snapshot done, change to get data from wal");
tqOffsetResetToLog(&offset, pInfo->sContext->snapVersion);
} else {
@ -2294,6 +2325,57 @@ static void destroyStreamScanOperatorInfo(void* param) {
taosMemoryFree(pStreamScan);
}
void streamScanReleaseState(SOperatorInfo* pOperator) {
SStreamScanInfo* pInfo = pOperator->info;
if (!pInfo->pState) {
return;
}
if (!pInfo->pUpdateInfo) {
return;
}
int32_t len = pInfo->stateStore.updateInfoSerialize(NULL, 0, pInfo->pUpdateInfo);
void* pBuff = taosMemoryCalloc(1, len);
pInfo->stateStore.updateInfoSerialize(pBuff, len, pInfo->pUpdateInfo);
pInfo->stateStore.streamStateSaveInfo(pInfo->pState, STREAM_SCAN_OP_STATE_NAME, strlen(STREAM_SCAN_OP_STATE_NAME), pBuff, len);
taosMemoryFree(pBuff);
}
void streamScanReloadState(SOperatorInfo* pOperator) {
SStreamScanInfo* pInfo = pOperator->info;
if (!pInfo->pState) {
return;
}
void* pBuff = NULL;
int32_t len = 0;
pInfo->stateStore.streamStateGetInfo(pInfo->pState, STREAM_SCAN_OP_STATE_NAME, strlen(STREAM_SCAN_OP_STATE_NAME), &pBuff, &len);
SUpdateInfo* pUpInfo = taosMemoryCalloc(1, sizeof(SUpdateInfo));
int32_t code = pInfo->stateStore.updateInfoDeserialize(pBuff, len, pUpInfo);
taosMemoryFree(pBuff);
if (code == TSDB_CODE_SUCCESS && pInfo->pUpdateInfo) {
if (pInfo->pUpdateInfo->minTS < 0) {
pInfo->stateStore.updateInfoDestroy(pInfo->pUpdateInfo);
pInfo->pUpdateInfo = pUpInfo;
} else {
pInfo->pUpdateInfo->minTS = TMAX(pInfo->pUpdateInfo->minTS, pUpInfo->minTS);
pInfo->pUpdateInfo->maxDataVersion = TMAX(pInfo->pUpdateInfo->maxDataVersion, pUpInfo->maxDataVersion);
SHashObj* curMap = pInfo->pUpdateInfo->pMap;
void *pIte = taosHashIterate(curMap, NULL);
while (pIte != NULL) {
size_t keySize = 0;
int64_t* pUid = taosHashGetKey(pIte, &keySize);
taosHashPut(pUpInfo->pMap, pUid, sizeof(int64_t), pIte, sizeof(TSKEY));
pIte = taosHashIterate(curMap, pIte);
}
taosHashCleanup(curMap);
pInfo->pUpdateInfo->pMap = pUpInfo->pMap;
pUpInfo->pMap = NULL;
pInfo->stateStore.updateInfoDestroy(pUpInfo);
}
} else {
pInfo->stateStore.updateInfoDestroy(pUpInfo);
}
}
SOperatorInfo* createStreamScanOperatorInfo(SReadHandle* pHandle, STableScanPhysiNode* pTableScanNode, SNode* pTagCond,
STableListInfo* pTableListInfo, SExecTaskInfo* pTaskInfo) {
SArray* pColIds = NULL;
@ -2475,6 +2557,7 @@ SOperatorInfo* createStreamScanOperatorInfo(SReadHandle* pHandle, STableScanPhys
__optr_fn_t nextFn = (pTaskInfo->execModel == OPTR_EXEC_MODEL_STREAM) ? doStreamScan : doQueueScan;
pOperator->fpSet =
createOperatorFpSet(optrDummyOpenFn, nextFn, NULL, destroyStreamScanOperatorInfo, optrDefaultBufFn, NULL);
setOperatorStreamStateFn(pOperator, streamScanReleaseState, streamScanReloadState);
return pOperator;

View File

@ -966,20 +966,20 @@ static int32_t sysTableUserColsFillOneTableCols(const SSysTableScanInfo* pInfo,
// table name
pColInfoData = taosArrayGet(dataBlock->pDataBlock, 0);
colDataAppend(pColInfoData, numOfRows, tName, false);
colDataSetVal(pColInfoData, numOfRows, tName, false);
// database name
pColInfoData = taosArrayGet(dataBlock->pDataBlock, 1);
colDataAppend(pColInfoData, numOfRows, dbname, false);
colDataSetVal(pColInfoData, numOfRows, dbname, false);
pColInfoData = taosArrayGet(dataBlock->pDataBlock, 2);
colDataAppend(pColInfoData, numOfRows, tableType, false);
colDataSetVal(pColInfoData, numOfRows, tableType, false);
// col name
char colName[TSDB_COL_NAME_LEN + VARSTR_HEADER_SIZE] = {0};
STR_TO_VARSTR(colName, schemaRow->pSchema[i].name);
pColInfoData = taosArrayGet(dataBlock->pDataBlock, 3);
colDataAppend(pColInfoData, numOfRows, colName, false);
colDataSetVal(pColInfoData, numOfRows, colName, false);
// col type
int8_t colType = schemaRow->pSchema[i].type;
@ -994,10 +994,10 @@ static int32_t sysTableUserColsFillOneTableCols(const SSysTableScanInfo* pInfo,
(int32_t)((schemaRow->pSchema[i].bytes - VARSTR_HEADER_SIZE) / TSDB_NCHAR_SIZE));
}
varDataSetLen(colTypeStr, colTypeLen);
colDataAppend(pColInfoData, numOfRows, (char*)colTypeStr, false);
colDataSetVal(pColInfoData, numOfRows, (char*)colTypeStr, false);
pColInfoData = taosArrayGet(dataBlock->pDataBlock, 5);
colDataAppend(pColInfoData, numOfRows, (const char*)&schemaRow->pSchema[i].bytes, false);
colDataSetVal(pColInfoData, numOfRows, (const char*)&schemaRow->pSchema[i].bytes, false);
for (int32_t j = 6; j <= 8; ++j) {
pColInfoData = taosArrayGet(dataBlock->pDataBlock, j);

View File

@ -272,7 +272,7 @@ static bool genInterpolationResult(STimeSliceOperatorInfo* pSliceInfo, SExprSupp
continue;
} else if (isIsfilledPseudoColumn(pExprInfo)) {
bool isFilled = true;
colDataAppend(pDst, pResBlock->info.rows, (char*)&isFilled, false);
colDataSetVal(pDst, pResBlock->info.rows, (char*)&isFilled, false);
continue;
} else if (!isInterpFunc(pExprInfo)) {
if (isGroupKeyFunc(pExprInfo)) {

View File

@ -28,6 +28,9 @@
#define IS_FINAL_OP(op) ((op)->isFinal)
#define DEAULT_DELETE_MARK (1000LL * 60LL * 60LL * 24LL * 365LL * 10LL);
#define STREAM_INTERVAL_OP_STATE_NAME "StreamIntervalHistoryState"
#define STREAM_SESSION_OP_STATE_NAME "StreamSessionHistoryState"
#define STREAM_STATE_OP_STATE_NAME "StreamStateHistoryState"
typedef struct SStateWindowInfo {
SResultWindowInfo winInfo;
@ -2726,6 +2729,38 @@ int32_t getMaxFunResSize(SExprSupp* pSup, int32_t numOfCols) {
return size;
}
void streamIntervalReleaseState(SOperatorInfo* pOperator) {
if (pOperator->operatorType != QUERY_NODE_PHYSICAL_PLAN_STREAM_SEMI_INTERVAL) {
SStreamIntervalOperatorInfo* pInfo = pOperator->info;
int32_t resSize = sizeof(TSKEY);
pInfo->statestore.streamStateSaveInfo(pInfo->pState, STREAM_INTERVAL_OP_STATE_NAME, strlen(STREAM_INTERVAL_OP_STATE_NAME), &pInfo->twAggSup.maxTs, resSize);
}
SStreamIntervalOperatorInfo* pInfo = pOperator->info;
SStorageAPI* pAPI = &pOperator->pTaskInfo->storageAPI;
pAPI->stateStore.streamStateCommit(pInfo->pState);
SOperatorInfo* downstream = pOperator->pDownstream[0];
if (downstream->fpSet.releaseStreamStateFn) {
downstream->fpSet.releaseStreamStateFn(downstream);
}
}
void streamIntervalReloadState(SOperatorInfo* pOperator) {
if (pOperator->operatorType != QUERY_NODE_PHYSICAL_PLAN_STREAM_SEMI_INTERVAL) {
SStreamIntervalOperatorInfo* pInfo = pOperator->info;
int32_t size = 0;
void* pBuf = NULL;
int32_t code = pInfo->statestore.streamStateGetInfo(pInfo->pState, STREAM_INTERVAL_OP_STATE_NAME,
strlen(STREAM_INTERVAL_OP_STATE_NAME), &pBuf, &size);
TSKEY ts = *(TSKEY*)pBuf;
taosMemoryFree(pBuf);
pInfo->statestore.streamStateReloadInfo(pInfo->pState, ts);
}
SOperatorInfo* downstream = pOperator->pDownstream[0];
if (downstream->fpSet.reloadStreamStateFn) {
downstream->fpSet.reloadStreamStateFn(downstream);
}
}
SOperatorInfo* createStreamFinalIntervalOperatorInfo(SOperatorInfo* downstream, SPhysiNode* pPhyNode,
SExecTaskInfo* pTaskInfo, int32_t numOfChild) {
SIntervalPhysiNode* pIntervalPhyNode = (SIntervalPhysiNode*)pPhyNode;
@ -2823,7 +2858,7 @@ SOperatorInfo* createStreamFinalIntervalOperatorInfo(SOperatorInfo* downstream,
pInfo->pUpdatedMap = NULL;
int32_t funResSize= getMaxFunResSize(&pOperator->exprSupp, numOfCols);
pInfo->pState->pFileState = pAPI->stateStore.streamFileStateInit(tsStreamBufferSize, sizeof(SWinKey), pInfo->aggSup.resultRowSize, funResSize,
compareTs, pInfo->pState, pInfo->twAggSup.deleteMark);
compareTs, pInfo->pState, pInfo->twAggSup.deleteMark, GET_TASKID(pTaskInfo));
pInfo->dataVersion = 0;
pInfo->statestore = pTaskInfo->storageAPI.stateStore;
pInfo->recvGetAll = false;
@ -2835,6 +2870,7 @@ SOperatorInfo* createStreamFinalIntervalOperatorInfo(SOperatorInfo* downstream,
pOperator->fpSet = createOperatorFpSet(NULL, doStreamFinalIntervalAgg, NULL, destroyStreamFinalIntervalOperatorInfo,
optrDefaultBufFn, NULL);
setOperatorStreamStateFn(pOperator, streamIntervalReleaseState, streamIntervalReloadState);
if (pPhyNode->type == QUERY_NODE_PHYSICAL_PLAN_STREAM_SEMI_INTERVAL) {
initIntervalDownStream(downstream, pPhyNode->type, pInfo);
}
@ -2873,12 +2909,14 @@ void destroyStreamSessionAggOperatorInfo(void* param) {
}
taosArrayDestroy(pInfo->pChildren);
}
colDataDestroy(&pInfo->twAggSup.timeWindowData);
blockDataDestroy(pInfo->pDelRes);
blockDataDestroy(pInfo->pWinBlock);
blockDataDestroy(pInfo->pUpdateRes);
tSimpleHashCleanup(pInfo->pStDeleted);
taosArrayDestroy(pInfo->historyWins);
taosMemoryFreeClear(param);
}
@ -2928,7 +2966,7 @@ void initDownStream(SOperatorInfo* downstream, SStreamAggSupporter* pAggSup, uin
}
int32_t initStreamAggSupporter(SStreamAggSupporter* pSup, SqlFunctionCtx* pCtx, int32_t numOfOutput, int64_t gap,
SStreamState* pState, int32_t keySize, int16_t keyType, SStateStore* pStore) {
SStreamState* pState, int32_t keySize, int16_t keyType, SStateStore* pStore, SReadHandle* pHandle, SStorageAPI* pApi) {
pSup->resultRowSize = keySize + getResultRowSize(pCtx, numOfOutput);
pSup->pScanBlock = createSpecialDataBlock(STREAM_CLEAR);
pSup->gap = gap;
@ -2970,6 +3008,16 @@ int32_t initStreamAggSupporter(SStreamAggSupporter* pSup, SqlFunctionCtx* pCtx,
pCtx[i].saveHandle.pBuf = pSup->pResultBuf;
}
if (pHandle) {
pSup->winRange = pHandle->winRange;
// temporary
if (pSup->winRange.ekey <= 0) {
pSup->winRange.ekey = INT64_MAX;
}
}
pSup->pSessionAPI = pApi;
return TSDB_CODE_SUCCESS;
}
@ -2997,6 +3045,13 @@ void getCurSessionWindow(SStreamAggSupporter* pAggSup, TSKEY startTs, TSKEY endT
bool isInvalidSessionWin(SResultWindowInfo* pWinInfo) { return pWinInfo->sessionWin.win.skey == 0; }
bool inWinRange(STimeWindow* range, STimeWindow* cur) {
if (cur->skey >= range->skey && cur->ekey <= range->ekey) {
return true;
}
return false;
}
void setSessionOutputBuf(SStreamAggSupporter* pAggSup, TSKEY startTs, TSKEY endTs, uint64_t groupId,
SResultWindowInfo* pCurWin) {
pCurWin->sessionWin.groupId = groupId;
@ -3005,6 +3060,12 @@ void setSessionOutputBuf(SStreamAggSupporter* pAggSup, TSKEY startTs, TSKEY endT
int32_t size = pAggSup->resultRowSize;
int32_t code = pAggSup->stateStore.streamStateSessionAddIfNotExist(pAggSup->pState, &pCurWin->sessionWin,
pAggSup->gap, &pCurWin->pOutputBuf, &size);
if (code == TSDB_CODE_SUCCESS && !inWinRange(&pAggSup->winRange, &pCurWin->sessionWin.win)) {
code = TSDB_CODE_FAILED;
releaseOutputBuf(pAggSup->pState, NULL, (SResultRow*)pCurWin->pOutputBuf, &pAggSup->pSessionAPI->stateStore);
pCurWin->pOutputBuf = taosMemoryMalloc(size);
}
if (code == TSDB_CODE_SUCCESS) {
pCurWin->isOutput = true;
} else {
@ -3151,7 +3212,8 @@ static void compactSessionWindow(SOperatorInfo* pOperator, SResultWindowInfo* pC
while (1) {
SResultWindowInfo winInfo = {0};
SStreamStateCur* pCur = getNextSessionWinInfo(pAggSup, pStUpdated, pCurWin, &winInfo);
if (!IS_VALID_SESSION_WIN(winInfo) || !isInWindow(pCurWin, winInfo.sessionWin.win.skey, pAggSup->gap)) {
if (!IS_VALID_SESSION_WIN(winInfo) || !isInWindow(pCurWin, winInfo.sessionWin.win.skey, pAggSup->gap) ||
!inWinRange(&pAggSup->winRange, &winInfo.sessionWin.win)) {
taosMemoryFree(winInfo.pOutputBuf);
pAPI->stateStore.streamStateFreeCur(pCur);
break;
@ -3375,8 +3437,12 @@ static void rebuildSessionWindow(SOperatorInfo* pOperator, SArray* pWinArray, SS
SResultWindowInfo childWin = {0};
childWin.sessionWin = *pWinKey;
int32_t code = getSessionWinBuf(pChAggSup, pCur, &childWin);
if (code == TSDB_CODE_SUCCESS && pWinKey->win.skey <= childWin.sessionWin.win.skey &&
childWin.sessionWin.win.ekey <= pWinKey->win.ekey) {
if (code == TSDB_CODE_SUCCESS && !inWinRange(&pAggSup->winRange, &childWin.sessionWin.win)) {
continue;
}
if (code == TSDB_CODE_SUCCESS && inWinRange(&pWinKey->win, &childWin.sessionWin.win)) {
if (num == 0) {
setSessionOutputBuf(pAggSup, pWinKey->win.skey, pWinKey->win.ekey, pWinKey->groupId, &parentWin);
code = initSessionOutputBuf(&parentWin, &pResult, pSup->pCtx, numOfOutput, pSup->rowEntryInfoOffset);
@ -3475,6 +3541,29 @@ void doBuildSessionResult(SOperatorInfo* pOperator, void* pState, SGroupResInfo*
// clear the existed group id
pBlock->info.id.groupId = 0;
buildSessionResultDataBlock(pOperator, pState, pBlock, &pOperator->exprSupp, pGroupResInfo);
if (pBlock->info.rows == 0) {
cleanupGroupResInfo(pGroupResInfo);
}
}
void getMaxTsWins(const SArray* pAllWins, SArray* pMaxWins) {
int32_t size = taosArrayGetSize(pAllWins);
if (size == 0) {
return;
}
SSessionKey* pSeKey = taosArrayGet(pAllWins, size - 1);
taosArrayPush(pMaxWins, pSeKey);
if (pSeKey->groupId == 0) {
return;
}
uint64_t preGpId = pSeKey->groupId;
for (int32_t i = size - 2; i >= 0; i--) {
pSeKey = taosArrayGet(pAllWins, i);
if (preGpId != pSeKey->groupId) {
taosArrayPush(pMaxWins, pSeKey);
preGpId = pSeKey->groupId;
}
}
}
static SSDataBlock* doStreamSessionAgg(SOperatorInfo* pOperator) {
@ -3554,7 +3643,7 @@ static SSDataBlock* doStreamSessionAgg(SOperatorInfo* pOperator) {
// if chIndex + 1 - size > 0, add new child
for (int32_t i = 0; i < chIndex + 1 - size; i++) {
SOperatorInfo* pChildOp =
createStreamFinalSessionAggOperatorInfo(NULL, pInfo->pPhyNode, pOperator->pTaskInfo, 0);
createStreamFinalSessionAggOperatorInfo(NULL, pInfo->pPhyNode, pOperator->pTaskInfo, 0, NULL);
if (!pChildOp) {
T_LONG_JMP(pOperator->pTaskInfo->env, TSDB_CODE_OUT_OF_MEMORY);
}
@ -3576,6 +3665,9 @@ static SSDataBlock* doStreamSessionAgg(SOperatorInfo* pOperator) {
removeSessionResults(pInfo->pStDeleted, pInfo->pUpdated);
tSimpleHashCleanup(pInfo->pStUpdated);
pInfo->pStUpdated = NULL;
if(pInfo->isHistoryOp) {
getMaxTsWins(pInfo->pUpdated, pInfo->historyWins);
}
initGroupResInfoFromArrayList(&pInfo->groupResInfo, pInfo->pUpdated);
pInfo->pUpdated = NULL;
blockDataEnsureCapacity(pInfo->binfo.pRes, pOperator->resultInfo.capacity);
@ -3602,8 +3694,51 @@ static SSDataBlock* doStreamSessionAgg(SOperatorInfo* pOperator) {
return NULL;
}
void streamSessionReleaseState(SOperatorInfo* pOperator) {
if (pOperator->operatorType != QUERY_NODE_PHYSICAL_PLAN_STREAM_SEMI_SESSION) {
SStreamSessionAggOperatorInfo* pInfo = pOperator->info;
int32_t resSize = taosArrayGetSize(pInfo->historyWins) * sizeof(SSessionKey);
pInfo->streamAggSup.stateStore.streamStateSaveInfo(pInfo->streamAggSup.pState, STREAM_SESSION_OP_STATE_NAME, strlen(STREAM_SESSION_OP_STATE_NAME), pInfo->historyWins->pData, resSize);
}
SOperatorInfo* downstream = pOperator->pDownstream[0];
if (downstream->fpSet.releaseStreamStateFn) {
downstream->fpSet.releaseStreamStateFn(downstream);
}
}
void resetWinRange(STimeWindow* winRange) {
winRange->skey = INT16_MIN;
winRange->skey = INT16_MAX;
}
void streamSessionReloadState(SOperatorInfo* pOperator) {
SStreamSessionAggOperatorInfo* pInfo = pOperator->info;
SStreamAggSupporter* pAggSup = &pInfo->streamAggSup;
resetWinRange(&pAggSup->winRange);
SResultWindowInfo winInfo = {0};
int32_t size = 0;
void* pBuf = NULL;
int32_t code = pAggSup->stateStore.streamStateGetInfo(pAggSup->pState, STREAM_SESSION_OP_STATE_NAME,
strlen(STREAM_SESSION_OP_STATE_NAME), &pBuf, &size);
int32_t num = size / sizeof(SSessionKey);
SSessionKey* pSeKeyBuf = (SSessionKey*) pBuf;
ASSERT(size == num * sizeof(SSessionKey));
for (int32_t i = 0; i < num; i++) {
SResultWindowInfo winInfo = {0};
setSessionOutputBuf(pAggSup, pSeKeyBuf[i].win.skey, pSeKeyBuf[i].win.ekey, pSeKeyBuf[i].groupId, &winInfo);
compactSessionWindow(pOperator, &winInfo, pInfo->pStUpdated, pInfo->pStDeleted);
}
taosMemoryFree(pBuf);
SOperatorInfo* downstream = pOperator->pDownstream[0];
if (downstream->fpSet.reloadStreamStateFn) {
downstream->fpSet.reloadStreamStateFn(downstream);
}
}
SOperatorInfo* createStreamSessionAggOperatorInfo(SOperatorInfo* downstream, SPhysiNode* pPhyNode,
SExecTaskInfo* pTaskInfo) {
SExecTaskInfo* pTaskInfo, SReadHandle* pHandle) {
SSessionWinodwPhysiNode* pSessionNode = (SSessionWinodwPhysiNode*)pPhyNode;
int32_t numOfCols = 0;
int32_t code = TSDB_CODE_OUT_OF_MEMORY;
@ -3634,7 +3769,7 @@ SOperatorInfo* createStreamSessionAggOperatorInfo(SOperatorInfo* downstream, SPh
}
code = initStreamAggSupporter(&pInfo->streamAggSup, pSup->pCtx, numOfCols, pSessionNode->gap,
pTaskInfo->streamInfo.pState, 0, 0, &pTaskInfo->storageAPI.stateStore);
pTaskInfo->streamInfo.pState, 0, 0, &pTaskInfo->storageAPI.stateStore, pHandle, &pTaskInfo->storageAPI);
if (code != TSDB_CODE_SUCCESS) {
goto _error;
}
@ -3666,11 +3801,19 @@ SOperatorInfo* createStreamSessionAggOperatorInfo(SOperatorInfo* downstream, SPh
pInfo->pUpdated = NULL;
pInfo->pStUpdated = NULL;
pInfo->dataVersion = 0;
pInfo->historyWins = taosArrayInit(4, sizeof(SSessionKey));
if (!pInfo->historyWins) {
goto _error;
}
if (pHandle) {
pInfo->isHistoryOp = pHandle->fillHistory;
}
setOperatorInfo(pOperator, "StreamSessionWindowAggOperator", QUERY_NODE_PHYSICAL_PLAN_STREAM_SESSION, true,
OP_NOT_OPENED, pInfo, pTaskInfo);
pOperator->fpSet = createOperatorFpSet(optrDummyOpenFn, doStreamSessionAgg, NULL, destroyStreamSessionAggOperatorInfo,
optrDefaultBufFn, NULL);
setOperatorStreamStateFn(pOperator, streamSessionReleaseState, streamSessionReloadState);
if (downstream) {
initDownStream(downstream, &pInfo->streamAggSup, pOperator->operatorType, pInfo->primaryTsIndex, &pInfo->twAggSup);
@ -3778,7 +3921,6 @@ static SSDataBlock* doStreamSessionSemiAgg(SOperatorInfo* pOperator) {
removeSessionResults(pInfo->pStDeleted, pInfo->pUpdated);
tSimpleHashCleanup(pInfo->pStUpdated);
pInfo->pStUpdated = NULL;
initGroupResInfoFromArrayList(&pInfo->groupResInfo, pInfo->pUpdated);
pInfo->pUpdated = NULL;
blockDataEnsureCapacity(pBInfo->pRes, pOperator->resultInfo.capacity);
@ -3809,9 +3951,9 @@ static SSDataBlock* doStreamSessionSemiAgg(SOperatorInfo* pOperator) {
}
SOperatorInfo* createStreamFinalSessionAggOperatorInfo(SOperatorInfo* downstream, SPhysiNode* pPhyNode,
SExecTaskInfo* pTaskInfo, int32_t numOfChild) {
SExecTaskInfo* pTaskInfo, int32_t numOfChild, SReadHandle* pHandle) {
int32_t code = TSDB_CODE_OUT_OF_MEMORY;
SOperatorInfo* pOperator = createStreamSessionAggOperatorInfo(downstream, pPhyNode, pTaskInfo);
SOperatorInfo* pOperator = createStreamSessionAggOperatorInfo(downstream, pPhyNode, pTaskInfo, pHandle);
if (pOperator == NULL) {
goto _error;
}
@ -3828,14 +3970,14 @@ SOperatorInfo* createStreamFinalSessionAggOperatorInfo(SOperatorInfo* downstream
pOperator->fpSet = createOperatorFpSet(optrDummyOpenFn, doStreamSessionSemiAgg, NULL,
destroyStreamSessionAggOperatorInfo, optrDefaultBufFn, NULL);
}
setOperatorStreamStateFn(pOperator, streamSessionReleaseState, streamSessionReloadState);
setOperatorInfo(pOperator, name, pPhyNode->type, false, OP_NOT_OPENED, pInfo, pTaskInfo);
pOperator->operatorType = pPhyNode->type;
if (numOfChild > 0) {
pInfo->pChildren = taosArrayInit(numOfChild, sizeof(void*));
for (int32_t i = 0; i < numOfChild; i++) {
SOperatorInfo* pChildOp = createStreamFinalSessionAggOperatorInfo(NULL, pPhyNode, pTaskInfo, 0);
SOperatorInfo* pChildOp = createStreamFinalSessionAggOperatorInfo(NULL, pPhyNode, pTaskInfo, 0, NULL);
if (pChildOp == NULL) {
goto _error;
}
@ -3876,6 +4018,7 @@ void destroyStreamStateOperatorInfo(void* param) {
}
colDataDestroy(&pInfo->twAggSup.timeWindowData);
blockDataDestroy(pInfo->pDelRes);
taosArrayDestroy(pInfo->historyWins);
tSimpleHashCleanup(pInfo->pSeDeleted);
taosMemoryFreeClear(param);
}
@ -3892,6 +4035,9 @@ bool isEqualStateKey(SStateWindowInfo* pWin, char* pKeyData) {
}
bool compareStateKey(void* data, void* key) {
if (!data || !key) {
return true;
}
SStateKeys* stateKey = (SStateKeys*)key;
stateKey->pData = (char*)key + sizeof(SStateKeys);
return compareVal(data, stateKey);
@ -3913,9 +4059,15 @@ void setStateOutputBuf(SStreamAggSupporter* pAggSup, TSKEY ts, uint64_t groupId,
pCurWin->pStateKey->pData = (char*)pCurWin->pStateKey + sizeof(SStateKeys);
pCurWin->pStateKey->isNull = false;
if (code == TSDB_CODE_SUCCESS && !inWinRange(&pAggSup->winRange, &pCurWin->winInfo.sessionWin.win)) {
code = TSDB_CODE_FAILED;
releaseOutputBuf(pAggSup->pState, NULL, (SResultRow*)pCurWin->winInfo.pOutputBuf, &pAggSup->pSessionAPI->stateStore);
pCurWin->winInfo.pOutputBuf = taosMemoryMalloc(size);
}
if (code == TSDB_CODE_SUCCESS) {
pCurWin->winInfo.isOutput = true;
} else {
} else if (pKeyData) {
if (IS_VAR_DATA_TYPE(pAggSup->stateKeyType)) {
varDataCopy(pCurWin->pStateKey->pData, pKeyData);
} else {
@ -4113,6 +4265,10 @@ static SSDataBlock* doStreamStateAgg(SOperatorInfo* pOperator) {
tSimpleHashCleanup(pInfo->pSeUpdated);
pInfo->pSeUpdated = NULL;
if(pInfo->isHistoryOp) {
getMaxTsWins(pInfo->pUpdated, pInfo->historyWins);
}
initGroupResInfoFromArrayList(&pInfo->groupResInfo, pInfo->pUpdated);
pInfo->pUpdated = NULL;
blockDataEnsureCapacity(pInfo->binfo.pRes, pOperator->resultInfo.capacity);
@ -4138,8 +4294,73 @@ static SSDataBlock* doStreamStateAgg(SOperatorInfo* pOperator) {
return NULL;
}
void streamStateReleaseState(SOperatorInfo* pOperator) {
SStreamStateAggOperatorInfo* pInfo = pOperator->info;
int32_t resSize = taosArrayGetSize(pInfo->historyWins) * sizeof(SSessionKey);
pInfo->streamAggSup.stateStore.streamStateSaveInfo(pInfo->streamAggSup.pState, STREAM_STATE_OP_STATE_NAME, strlen(STREAM_STATE_OP_STATE_NAME), pInfo->historyWins->pData, resSize);
SOperatorInfo* downstream = pOperator->pDownstream[0];
if (downstream->fpSet.releaseStreamStateFn) {
downstream->fpSet.releaseStreamStateFn(downstream);
}
}
static void compactStateWindow(SOperatorInfo* pOperator, SResultWindowInfo* pCurWin, SResultWindowInfo* pNextWin,
SSHashObj* pStUpdated, SSHashObj* pStDeleted) {
SExprSupp* pSup = &pOperator->exprSupp;
SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo;
SStorageAPI* pAPI = &pOperator->pTaskInfo->storageAPI;
SStreamStateAggOperatorInfo* pInfo = pOperator->info;
SResultRow* pCurResult = NULL;
int32_t numOfOutput = pOperator->exprSupp.numOfExprs;
SStreamAggSupporter* pAggSup = &pInfo->streamAggSup;
initSessionOutputBuf(pCurWin, &pCurResult, pSup->pCtx, numOfOutput, pSup->rowEntryInfoOffset);
SResultRow* pWinResult = NULL;
initSessionOutputBuf(pNextWin, &pWinResult, pAggSup->pDummyCtx, numOfOutput, pSup->rowEntryInfoOffset);
updateTimeWindowInfo(&pInfo->twAggSup.timeWindowData, &pCurWin->sessionWin.win, true);
compactFunctions(pSup->pCtx, pAggSup->pDummyCtx, numOfOutput, pTaskInfo, &pInfo->twAggSup.timeWindowData);
tSimpleHashRemove(pStUpdated, &pNextWin->sessionWin, sizeof(SSessionKey));
if (pNextWin->isOutput && pStDeleted) {
saveDeleteRes(pStDeleted, pNextWin->sessionWin);
}
removeSessionResult(pStUpdated, pAggSup->pResultRows, pNextWin->sessionWin);
doDeleteSessionWindow(pAggSup, &pNextWin->sessionWin);
taosMemoryFree(pNextWin->pOutputBuf);
saveSessionOutputBuf(pAggSup, pCurWin);
}
void streamStateReloadState(SOperatorInfo* pOperator) {
SStreamSessionAggOperatorInfo* pInfo = pOperator->info;
SStreamAggSupporter* pAggSup = &pInfo->streamAggSup;
resetWinRange(&pAggSup->winRange);
SSessionKey seKey = {.win.skey = INT64_MIN, .win.ekey = INT64_MIN, .groupId = 0};
int32_t size = 0;
void* pBuf = NULL;
int32_t code = pAggSup->stateStore.streamStateGetInfo(pAggSup->pState, STREAM_STATE_OP_STATE_NAME,
strlen(STREAM_STATE_OP_STATE_NAME), &pBuf, &size);
int32_t num = size / sizeof(SSessionKey);
SSessionKey* pSeKeyBuf = (SSessionKey*) pBuf;
ASSERT(size == num * sizeof(SSessionKey));
for (int32_t i = 0; i < num; i++) {
SStateWindowInfo curInfo = {0};
SStateWindowInfo nextInfo = {0};
setStateOutputBuf(pAggSup, pSeKeyBuf[i].win.skey, pSeKeyBuf[i].groupId, NULL, &curInfo, &nextInfo);
if (compareStateKey(curInfo.pStateKey,nextInfo.pStateKey)) {
compactStateWindow(pOperator, &curInfo.winInfo, &nextInfo.winInfo, pInfo->pStUpdated, pInfo->pStDeleted);
}
}
taosMemoryFree(pBuf);
SOperatorInfo* downstream = pOperator->pDownstream[0];
if (downstream->fpSet.reloadStreamStateFn) {
downstream->fpSet.reloadStreamStateFn(downstream);
}
}
SOperatorInfo* createStreamStateAggOperatorInfo(SOperatorInfo* downstream, SPhysiNode* pPhyNode,
SExecTaskInfo* pTaskInfo) {
SExecTaskInfo* pTaskInfo, SReadHandle* pHandle) {
SStreamStateWinodwPhysiNode* pStateNode = (SStreamStateWinodwPhysiNode*)pPhyNode;
int32_t tsSlotId = ((SColumnNode*)pStateNode->window.pTspk)->slotId;
SColumnNode* pColNode = (SColumnNode*)((STargetNode*)pStateNode->pStateKey)->pExpr;
@ -4183,7 +4404,7 @@ SOperatorInfo* createStreamStateAggOperatorInfo(SOperatorInfo* downstream, SPhys
int32_t keySize = sizeof(SStateKeys) + pColNode->node.resType.bytes;
int16_t type = pColNode->node.resType.type;
code = initStreamAggSupporter(&pInfo->streamAggSup, pSup->pCtx, numOfCols, 0, pTaskInfo->streamInfo.pState, keySize,
type, &pTaskInfo->storageAPI.stateStore);
type, &pTaskInfo->storageAPI.stateStore, pHandle, &pTaskInfo->storageAPI);
if (code != TSDB_CODE_SUCCESS) {
goto _error;
}
@ -4199,11 +4420,19 @@ SOperatorInfo* createStreamStateAggOperatorInfo(SOperatorInfo* downstream, SPhys
pInfo->pUpdated = NULL;
pInfo->pSeUpdated = NULL;
pInfo->dataVersion = 0;
pInfo->historyWins = taosArrayInit(4, sizeof(SSessionKey));
if (!pInfo->historyWins) {
goto _error;
}
if (pHandle) {
pInfo->isHistoryOp = pHandle->fillHistory;
}
setOperatorInfo(pOperator, "StreamStateAggOperator", QUERY_NODE_PHYSICAL_PLAN_STREAM_STATE, true, OP_NOT_OPENED,
pInfo, pTaskInfo);
pOperator->fpSet = createOperatorFpSet(optrDummyOpenFn, doStreamStateAgg, NULL, destroyStreamStateOperatorInfo,
optrDefaultBufFn, NULL);
setOperatorStreamStateFn(pOperator, streamStateReleaseState, streamStateReloadState);
initDownStream(downstream, &pInfo->streamAggSup, pOperator->operatorType, pInfo->primaryTsIndex, &pInfo->twAggSup);
code = appendDownstream(pOperator, &downstream, 1);
if (code != TSDB_CODE_SUCCESS) {
@ -5021,13 +5250,16 @@ SOperatorInfo* createStreamIntervalOperatorInfo(SOperatorInfo* downstream, SPhys
pInfo->pUpdated = NULL;
pInfo->pUpdatedMap = NULL;
int32_t funResSize= getMaxFunResSize(pSup, numOfCols);
pInfo->pState->pFileState = pTaskInfo->storageAPI.stateStore.streamFileStateInit(tsStreamBufferSize, sizeof(SWinKey), pInfo->aggSup.resultRowSize, funResSize,
compareTs, pInfo->pState, pInfo->twAggSup.deleteMark);
pInfo->pState->pFileState = pTaskInfo->storageAPI.stateStore.streamFileStateInit(
tsStreamBufferSize, sizeof(SWinKey), pInfo->aggSup.resultRowSize, funResSize, compareTs, pInfo->pState,
pInfo->twAggSup.deleteMark, GET_TASKID(pTaskInfo));
setOperatorInfo(pOperator, "StreamIntervalOperator", QUERY_NODE_PHYSICAL_PLAN_STREAM_INTERVAL, true, OP_NOT_OPENED,
pInfo, pTaskInfo);
pOperator->fpSet = createOperatorFpSet(optrDummyOpenFn, doStreamIntervalAgg, NULL,
destroyStreamFinalIntervalOperatorInfo, optrDefaultBufFn, NULL);
setOperatorStreamStateFn(pOperator, streamIntervalReleaseState, streamIntervalReloadState);
pInfo->statestore = pTaskInfo->storageAPI.stateStore;
pInfo->recvGetAll = false;

View File

@ -1063,7 +1063,7 @@ static STupleHandle* tsortPQSortNextTuple(SSortHandle* pHandle) {
if (!pData) {
colDataSetNULL(bdGetColumnInfoData(pHandle->pDataBlock, i), 0);
} else {
colDataAppend(bdGetColumnInfoData(pHandle->pDataBlock, i), 0, pData, false);
colDataSetVal(bdGetColumnInfoData(pHandle->pDataBlock, i), 0, pData, false);
}
}
pHandle->pDataBlock->info.rows++;

View File

@ -1712,7 +1712,7 @@ int32_t percentileFinalize(SqlFunctionCtx* pCtx, SSDataBlock* pBlock) {
SColumnInfoData* pCol = taosArrayGet(pBlock->pDataBlock, slotId);
varDataSetLen(buf, len);
colDataAppend(pCol, pBlock->info.rows, buf, false);
colDataSetVal(pCol, pBlock->info.rows, buf, false);
tMemBucketDestroy(pMemBucket);
return pResInfo->numOfRes;

View File

@ -145,7 +145,7 @@ int32_t executeMakePointFunc(SColumnInfoData *pInputData[], int32_t iLeft, int32
goto _exit;
}
colDataAppend(pOutputData, TMAX(iLeft, iRight), output, (output == NULL));
colDataSetVal(pOutputData, TMAX(iLeft, iRight), output, (output == NULL));
_exit:
if (output) {
@ -165,7 +165,7 @@ int32_t executeGeomFromTextFunc(SColumnInfoData *pInputData, int32_t i, SColumnI
goto _exit;
}
colDataAppend(pOutputData, i, output, (output == NULL));
colDataSetVal(pOutputData, i, output, (output == NULL));
_exit:
if (output) {
@ -185,7 +185,7 @@ int32_t executeAsTextFunc(SColumnInfoData *pInputData, int32_t i, SColumnInfoDat
goto _exit;
}
colDataAppend(pOutputData, i, output, (output == NULL));
colDataSetVal(pOutputData, i, output, (output == NULL));
_exit:
if (output) {
@ -213,7 +213,7 @@ int32_t executeRelationFunc(const GEOSGeometry *geom1, const GEOSPreparedGeometr
}
}
colDataAppend(pOutputData, i, &res, (res==-1));
colDataSetVal(pOutputData, i, &res, (res==-1));
return code;
}

View File

@ -173,6 +173,7 @@ int32_t initCtxAsText() {
if (geosCtx->WKTWriter) {
GEOSWKTWriter_setRoundingPrecision_r(geosCtx->handle, geosCtx->WKTWriter, 6);
GEOSWKTWriter_setTrim_r(geosCtx->handle, geosCtx->WKTWriter, 0);
} else {
return code;
}

View File

@ -84,7 +84,7 @@ void setScalarParam(SScalarParam *sclParam, int32_t type, void *valueArray, TDRo
}
else {
const char *val = (const char *)valueArray + (i * bytes);
colDataAppend(sclParam->columnData, i, val, false);
colDataSetVal(sclParam->columnData, i, val, false);
}
}
}

View File

@ -206,9 +206,9 @@ SNode* createDropComponentNodeStmt(SAstCreateContext* pCxt, ENodeType type, cons
SNode* createRestoreComponentNodeStmt(SAstCreateContext* pCxt, ENodeType type, const SToken* pDnodeId);
SNode* createCreateTopicStmtUseQuery(SAstCreateContext* pCxt, bool ignoreExists, SToken* pTopicName, SNode* pQuery);
SNode* createCreateTopicStmtUseDb(SAstCreateContext* pCxt, bool ignoreExists, SToken* pTopicName, SToken* pSubDbName,
bool withMeta);
int8_t withMeta);
SNode* createCreateTopicStmtUseTable(SAstCreateContext* pCxt, bool ignoreExists, SToken* pTopicName, SNode* pRealTable,
bool withMeta, SNode* pWhere);
int8_t withMeta, SNode* pWhere);
SNode* createDropTopicStmt(SAstCreateContext* pCxt, bool ignoreNotExists, SToken* pTopicName);
SNode* createDropCGroupStmt(SAstCreateContext* pCxt, bool ignoreNotExists, SToken* pCGroupId, SToken* pTopicName);
SNode* createAlterLocalStmt(SAstCreateContext* pCxt, const SToken* pConfig, const SToken* pValue);

View File

@ -538,14 +538,18 @@ sma_stream_opt(A) ::= sma_stream_opt(B) MAX_DELAY duration_literal(C).
sma_stream_opt(A) ::= sma_stream_opt(B) DELETE_MARK duration_literal(C). { ((SStreamOptions*)B)->pDeleteMark = releaseRawExprNode(pCxt, C); A = B; }
/************************************************ create/drop topic ***************************************************/
%type with_meta { int32_t }
%destructor with_meta { }
with_meta(A) ::= AS. { A = 0; }
with_meta(A) ::= WITH META AS. { A = 1; }
with_meta(A) ::= ONLY META AS. { A = 2; }
cmd ::= CREATE TOPIC not_exists_opt(A) topic_name(B) AS query_or_subquery(C). { pCxt->pRootNode = createCreateTopicStmtUseQuery(pCxt, A, &B, C); }
cmd ::= CREATE TOPIC not_exists_opt(A) topic_name(B) AS DATABASE db_name(C). { pCxt->pRootNode = createCreateTopicStmtUseDb(pCxt, A, &B, &C, false); }
cmd ::= CREATE TOPIC not_exists_opt(A) topic_name(B)
WITH META AS DATABASE db_name(C). { pCxt->pRootNode = createCreateTopicStmtUseDb(pCxt, A, &B, &C, true); }
cmd ::= CREATE TOPIC not_exists_opt(A) topic_name(B)
AS STABLE full_table_name(C) where_clause_opt(D). { pCxt->pRootNode = createCreateTopicStmtUseTable(pCxt, A, &B, C, false, D); }
cmd ::= CREATE TOPIC not_exists_opt(A) topic_name(B)
WITH META AS STABLE full_table_name(C) where_clause_opt(D). { pCxt->pRootNode = createCreateTopicStmtUseTable(pCxt, A, &B, C, true, D); }
cmd ::= CREATE TOPIC not_exists_opt(A) topic_name(B) with_meta(D)
DATABASE db_name(C). { pCxt->pRootNode = createCreateTopicStmtUseDb(pCxt, A, &B, &C, D); }
cmd ::= CREATE TOPIC not_exists_opt(A) topic_name(B) with_meta(E)
STABLE full_table_name(C) where_clause_opt(D). { pCxt->pRootNode = createCreateTopicStmtUseTable(pCxt, A, &B, C, E, D); }
cmd ::= DROP TOPIC exists_opt(A) topic_name(B). { pCxt->pRootNode = createDropTopicStmt(pCxt, A, &B); }
cmd ::= DROP CONSUMER GROUP exists_opt(A) cgroup_name(B) ON topic_name(C). { pCxt->pRootNode = createDropCGroupStmt(pCxt, A, &B, &C); }

View File

@ -1715,7 +1715,7 @@ SNode* createCreateTopicStmtUseQuery(SAstCreateContext* pCxt, bool ignoreExists,
}
SNode* createCreateTopicStmtUseDb(SAstCreateContext* pCxt, bool ignoreExists, SToken* pTopicName, SToken* pSubDbName,
bool withMeta) {
int8_t withMeta) {
CHECK_PARSER_STATUS(pCxt);
if (!checkTopicName(pCxt, pTopicName) || !checkDbName(pCxt, pSubDbName, true)) {
return NULL;
@ -1730,7 +1730,7 @@ SNode* createCreateTopicStmtUseDb(SAstCreateContext* pCxt, bool ignoreExists, ST
}
SNode* createCreateTopicStmtUseTable(SAstCreateContext* pCxt, bool ignoreExists, SToken* pTopicName, SNode* pRealTable,
bool withMeta, SNode* pWhere) {
int8_t withMeta, SNode* pWhere) {
CHECK_PARSER_STATUS(pCxt);
if (!checkTopicName(pCxt, pTopicName)) {
return NULL;

View File

@ -140,6 +140,7 @@ static SKeyword keywordTable[] = {
{"MAX_SPEED", TK_MAX_SPEED},
{"MERGE", TK_MERGE},
{"META", TK_META},
{"ONLY", TK_ONLY},
{"MINROWS", TK_MINROWS},
{"MINUS", TK_MINUS},
{"MNODE", TK_MNODE},

File diff suppressed because it is too large Load Diff

View File

@ -12,10 +12,17 @@ IF(NOT TD_DARWIN)
"${SOURCE_LIST}/../../../parser/test/mockCatalogService.cpp"
)
TARGET_LINK_LIBRARIES(
plannerTest
PUBLIC os util common nodes planner parser catalog transport gtest function qcom
)
IF (TD_GRANT)
TARGET_LINK_LIBRARIES(
plannerTest
PUBLIC os util common nodes planner parser catalog transport gtest function qcom grant
)
ELSE ()
TARGET_LINK_LIBRARIES(
plannerTest
PUBLIC os util common nodes planner parser catalog transport gtest function qcom
)
ENDIF()
TARGET_INCLUDE_DIRECTORIES(
plannerTest

View File

@ -57,7 +57,7 @@ typedef enum {
#define SCHEDULE_DEFAULT_POLICY SCH_LOAD_SEQ
#define SCHEDULE_DEFAULT_MAX_NODE_NUM 20
#define SCH_DEFAULT_TASK_TIMEOUT_USEC 60000000
#define SCH_DEFAULT_TASK_TIMEOUT_USEC 5000000
#define SCH_MAX_TASK_TIMEOUT_USEC 300000000
#define SCH_DEFAULT_MAX_RETRY_NUM 6
#define SCH_MIN_AYSNC_EXEC_NUM 3
@ -239,7 +239,7 @@ typedef struct SSchTask {
int32_t lastMsgType; // last sent msg type
int64_t timeoutUsec; // task timeout useconds before reschedule
SQueryNodeAddr succeedAddr; // task executed success node address
int8_t candidateIdx; // current try condidation index
int32_t candidateIdx; // current try condidation index
SArray *candidateAddrs; // condidate node addresses, element is SQueryNodeAddr
SHashObj *execNodes; // all tried node for current task, element is SSchNodeInfo
SSchTaskProfile profile; // task execution profile

View File

@ -745,7 +745,6 @@ int32_t schSetTaskCandidateAddrs(SSchJob *pJob, SSchTask *pTask) {
return TSDB_CODE_SUCCESS;
}
pTask->candidateIdx = 0;
pTask->candidateAddrs = taosArrayInit(SCHEDULE_DEFAULT_MAX_NODE_NUM, sizeof(SQueryNodeAddr));
if (NULL == pTask->candidateAddrs) {
SCH_TASK_ELOG("taosArrayInit %d condidate addrs failed", SCHEDULE_DEFAULT_MAX_NODE_NUM);
@ -770,6 +769,8 @@ int32_t schSetTaskCandidateAddrs(SSchJob *pJob, SSchTask *pTask) {
SCH_ERR_RET(schSetAddrsFromNodeList(pJob, pTask));
pTask->candidateIdx = taosRand() % taosArrayGetSize(pTask->candidateAddrs);
/*
for (int32_t i = 0; i < job->dataSrcEps.numOfEps && addNum < SCH_MAX_CANDIDATE_EP_NUM; ++i) {
strncpy(epSet->fqdn[epSet->numOfEps], job->dataSrcEps.fqdn[i], sizeof(job->dataSrcEps.fqdn[i]));

View File

@ -7,10 +7,18 @@ IF(NOT TD_DARWIN)
AUX_SOURCE_DIRECTORY(${CMAKE_CURRENT_SOURCE_DIR} SOURCE_LIST)
ADD_EXECUTABLE(schedulerTest ${SOURCE_LIST})
TARGET_LINK_LIBRARIES(
schedulerTest
PUBLIC os util common catalog transport gtest qcom taos_static planner scheduler
)
IF (TD_GRANT)
TARGET_LINK_LIBRARIES(
schedulerTest
PUBLIC os util common catalog transport gtest qcom taos_static planner scheduler grant
)
ELSE ()
TARGET_LINK_LIBRARIES(
schedulerTest
PUBLIC os util common catalog transport gtest qcom taos_static planner scheduler
)
ENDIF()
TARGET_INCLUDE_DIRECTORIES(
schedulerTest

View File

@ -42,10 +42,11 @@ typedef struct {
TdThreadMutex cfMutex;
SHashObj* cfInst;
int64_t defaultCfInit;
} SBackendHandle;
} SBackendWrapper;
void* streamBackendInit(const char* path);
void streamBackendCleanup(void* arg);
void streamBackendHandleCleanup(void* arg);
SListNode* streamBackendAddCompare(void* backend, void* arg);
void streamBackendDelCompare(void* backend, void* arg);

View File

@ -31,8 +31,9 @@ typedef struct {
void* timer;
} SStreamGlobalEnv;
static SStreamGlobalEnv streamEnv;
extern SStreamGlobalEnv streamEnv;
void streamRetryDispatchStreamBlock(SStreamTask* pTask, int64_t waitDuration);
int32_t streamDispatchStreamBlock(SStreamTask* pTask);
SStreamDataBlock* createStreamDataFromDispatchMsg(const SStreamDispatchReq* pReq, int32_t blockType, int32_t srcVg);
@ -41,20 +42,20 @@ SStreamDataBlock* createStreamBlockFromResults(SStreamQueueItem* pItem, SStreamT
void destroyStreamDataBlock(SStreamDataBlock* pBlock);
int32_t streamRetrieveReqToData(const SStreamRetrieveReq* pReq, SStreamDataBlock* pData);
int32_t streamDispatchAllBlocks(SStreamTask* pTask, const SStreamDataBlock* data);
int32_t streamBroadcastToChildren(SStreamTask* pTask, const SSDataBlock* pBlock);
int32_t tEncodeStreamRetrieveReq(SEncoder* pEncoder, const SStreamRetrieveReq* pReq);
int32_t streamDispatchAllBlocks(SStreamTask* pTask, const SStreamDataBlock* pData);
int32_t streamDispatchCheckMsg(SStreamTask* pTask, const SStreamTaskCheckReq* pReq, int32_t nodeId, SEpSet* pEpSet);
int32_t streamDispatchOneRecoverFinishReq(SStreamTask* pTask, const SStreamRecoverFinishReq* pReq, int32_t vgId,
SEpSet* pEpSet);
int32_t streamDoDispatchScanHistoryFinishMsg(SStreamTask* pTask, const SStreamRecoverFinishReq* pReq, int32_t vgId,
SEpSet* pEpSet);
SStreamQueueItem* streamMergeQueueItem(SStreamQueueItem* dst, SStreamQueueItem* pElem);
extern int32_t streamBackendId;
extern int32_t streamBackendCfWrapperId;
#ifdef __cplusplus
}

View File

@ -16,11 +16,13 @@
#include "streamInc.h"
#include "ttimer.h"
#define STREAM_TASK_INPUT_QUEUEU_CAPACITY 20480
#define STREAM_TASK_INPUT_QUEUEU_CAPACITY_IN_SIZE (50)
#define STREAM_TASK_INPUT_QUEUE_CAPACITY 20480
#define STREAM_TASK_INPUT_QUEUE_CAPACITY_IN_SIZE (30)
#define ONE_MB_F (1048576.0)
#define QUEUE_MEM_SIZE_IN_MB(_q) (taosQueueMemorySize(_q) / ONE_MB_F)
SStreamGlobalEnv streamEnv;
int32_t streamInit() {
int8_t old;
while (1) {
@ -36,6 +38,7 @@ int32_t streamInit() {
}
atomic_store_8(&streamEnv.inited, 1);
}
return 0;
}
@ -52,17 +55,30 @@ void streamCleanUp() {
}
}
char* createStreamTaskIdStr(int64_t streamId, int32_t taskId) {
char buf[128] = {0};
sprintf(buf, "0x%" PRIx64 "-0x%x", streamId, taskId);
return taosStrdup(buf);
}
void streamSchedByTimer(void* param, void* tmrId) {
SStreamTask* pTask = (void*)param;
int8_t status = atomic_load_8(&pTask->triggerStatus);
qDebug("s-task:%s in scheduler timer, trigger status:%d", pTask->id.idStr, status);
if (streamTaskShouldStop(&pTask->status) || streamTaskShouldPause(&pTask->status)) {
streamMetaReleaseTask(NULL, pTask);
qDebug("s-task:%s jump out of schedTimer", pTask->id.idStr);
return;
}
if (atomic_load_8(&pTask->triggerStatus) == TASK_TRIGGER_STATUS__ACTIVE) {
if (status == TASK_TRIGGER_STATUS__ACTIVE) {
SStreamTrigger* trigger = taosAllocateQitem(sizeof(SStreamTrigger), DEF_QITEM, 0);
if (trigger == NULL) return;
if (trigger == NULL) {
return;
}
trigger->type = STREAM_INPUT__GET_RES;
trigger->pBlock = taosMemoryCalloc(1, sizeof(SSDataBlock));
if (trigger->pBlock == NULL) {
@ -74,23 +90,28 @@ void streamSchedByTimer(void* param, void* tmrId) {
atomic_store_8(&pTask->triggerStatus, TASK_TRIGGER_STATUS__INACTIVE);
if (tAppendDataToInputQueue(pTask, (SStreamQueueItem*)trigger) < 0) {
taosTmrReset(streamSchedByTimer, (int32_t)pTask->triggerParam, pTask, streamEnv.timer, &pTask->timer);
taosFreeQitem(trigger);
taosTmrReset(streamSchedByTimer, (int32_t)pTask->triggerParam, pTask, streamEnv.timer, &pTask->schedTimer);
return;
}
streamSchedExec(pTask);
}
taosTmrReset(streamSchedByTimer, (int32_t)pTask->triggerParam, pTask, streamEnv.timer, &pTask->timer);
taosTmrReset(streamSchedByTimer, (int32_t)pTask->triggerParam, pTask, streamEnv.timer, &pTask->schedTimer);
}
int32_t streamSetupTrigger(SStreamTask* pTask) {
int32_t streamSetupScheduleTrigger(SStreamTask* pTask) {
if (pTask->triggerParam != 0) {
int32_t ref = atomic_add_fetch_32(&pTask->refCnt, 1);
ASSERT(ref == 2);
pTask->timer = taosTmrStart(streamSchedByTimer, (int32_t)pTask->triggerParam, pTask, streamEnv.timer);
ASSERT(ref == 2 && pTask->schedTimer == NULL);
qDebug("s-task:%s setup scheduler trigger, delay:%"PRId64" ms", pTask->id.idStr, pTask->triggerParam);
pTask->schedTimer = taosTmrStart(streamSchedByTimer, (int32_t)pTask->triggerParam, pTask, streamEnv.timer);
pTask->triggerStatus = TASK_TRIGGER_STATUS__INACTIVE;
}
return 0;
}
@ -103,16 +124,20 @@ int32_t streamSchedExec(SStreamTask* pTask) {
if (pRunReq == NULL) {
terrno = TSDB_CODE_OUT_OF_MEMORY;
atomic_store_8(&pTask->status.schedStatus, TASK_SCHED_STATUS__INACTIVE);
qError("failed to create msg to aunch s-task:%s, reason out of memory", pTask->id.idStr);
return -1;
}
pRunReq->head.vgId = pTask->nodeId;
pRunReq->head.vgId = pTask->info.nodeId;
pRunReq->streamId = pTask->id.streamId;
pRunReq->taskId = pTask->id.taskId;
qDebug("trigger to run s-task:%s", pTask->id.idStr);
SRpcMsg msg = {.msgType = TDMT_STREAM_TASK_RUN, .pCont = pRunReq, .contLen = sizeof(SStreamTaskRunReq)};
tmsgPutToQueue(pTask->pMsgCb, STREAM_QUEUE, &msg);
qDebug("trigger to run s-task:%s", pTask->id.idStr);
} else {
qDebug("s-task:%s not launch task since sched status:%d", pTask->id.idStr, pTask->status.schedStatus);
}
return 0;
@ -142,7 +167,7 @@ int32_t streamTaskEnqueueBlocks(SStreamTask* pTask, const SStreamDispatchReq* pR
pDispatchRsp->streamId = htobe64(pReq->streamId);
pDispatchRsp->upstreamNodeId = htonl(pReq->upstreamNodeId);
pDispatchRsp->upstreamTaskId = htonl(pReq->upstreamTaskId);
pDispatchRsp->downstreamNodeId = htonl(pTask->nodeId);
pDispatchRsp->downstreamNodeId = htonl(pTask->info.nodeId);
pDispatchRsp->downstreamTaskId = htonl(pTask->id.taskId);
pRsp->pCont = buf;
@ -158,21 +183,18 @@ int32_t streamTaskEnqueueRetrieve(SStreamTask* pTask, SStreamRetrieveReq* pReq,
// enqueue
if (pData != NULL) {
qDebug("s-task:%s (child %d) recv retrieve req from task:0x%x, reqId %" PRId64, pTask->id.idStr, pTask->selfChildId,
pReq->srcTaskId, pReq->reqId);
qDebug("s-task:%s (child %d) recv retrieve req from task:0x%x(vgId:%d), reqId:0x%" PRIx64, pTask->id.idStr, pTask->info.selfChildId,
pReq->srcTaskId, pReq->srcNodeId, pReq->reqId);
pData->type = STREAM_INPUT__DATA_RETRIEVE;
pData->srcVgId = 0;
// decode
/*pData->blocks = pReq->data;*/
/*pBlock->sourceVer = pReq->sourceVer;*/
streamRetrieveReqToData(pReq, pData);
if (tAppendDataToInputQueue(pTask, (SStreamQueueItem*)pData) == 0) {
status = TASK_INPUT_STATUS__NORMAL;
} else {
status = TASK_INPUT_STATUS__FAILED;
}
} else {
} else { // todo handle oom
/*streamTaskInputFail(pTask);*/
/*status = TASK_INPUT_STATUS__FAILED;*/
}
@ -187,6 +209,7 @@ int32_t streamTaskEnqueueRetrieve(SStreamTask* pTask, SStreamRetrieveReq* pReq,
pRsp->pCont = buf;
pRsp->contLen = sizeof(SMsgHead) + sizeof(SStreamRetrieveRsp);
tmsgSendRsp(pRsp);
return status == TASK_INPUT_STATUS__NORMAL ? 0 : -1;
}
@ -231,9 +254,26 @@ int32_t streamProcessDispatchMsg(SStreamTask* pTask, SStreamDispatchReq* pReq, S
return 0;
}
// todo record the idle time for dispatch data
int32_t streamProcessDispatchRsp(SStreamTask* pTask, SStreamDispatchRsp* pRsp, int32_t code) {
if (code != TSDB_CODE_SUCCESS) {
// dispatch message failed: network error, or node not available.
// in case of the input queue is full, the code will be TSDB_CODE_SUCCESS, the and pRsp>inputStatus will be set
// flag. here we need to retry dispatch this message to downstream task immediately. handle the case the failure
// happened too fast. todo handle the shuffle dispatch failure
qError("s-task:%s failed to dispatch msg to task:0x%x, code:%s, retry cnt:%d", pTask->id.idStr,
pRsp->downstreamTaskId, tstrerror(code), ++pTask->msgInfo.retryCount);
int32_t ret = streamDispatchAllBlocks(pTask, pTask->msgInfo.pData);
if (ret != TSDB_CODE_SUCCESS) {
}
return TSDB_CODE_SUCCESS;
}
qDebug("s-task:%s receive dispatch rsp, output status:%d code:%d", pTask->id.idStr, pRsp->inputStatus, code);
// there are other dispatch message not response yet
if (pTask->outputType == TASK_OUTPUT__SHUFFLE_DISPATCH) {
int32_t leftRsp = atomic_sub_fetch_32(&pTask->shuffleDispatcher.waitingRspCnt, 1);
qDebug("s-task:%s is shuffle, left waiting rsp %d", pTask->id.idStr, leftRsp);
@ -242,23 +282,39 @@ int32_t streamProcessDispatchRsp(SStreamTask* pTask, SStreamDispatchRsp* pRsp, i
}
}
int8_t old = atomic_exchange_8(&pTask->outputStatus, pRsp->inputStatus);
ASSERT(old == TASK_OUTPUT_STATUS__WAIT);
pTask->msgInfo.retryCount = 0;
ASSERT(pTask->outputStatus == TASK_OUTPUT_STATUS__WAIT);
// the input queue of the (down stream) task that receive the output data is full, so the TASK_INPUT_STATUS_BLOCKED is rsp
// todo we need to send EMPTY PACKAGE to detect if the input queue is available for output of upstream task, every 50 ms.
qDebug("s-task:%s output status is set to:%d", pTask->id.idStr, pTask->outputStatus);
// the input queue of the (down stream) task that receive the output data is full,
// so the TASK_INPUT_STATUS_BLOCKED is rsp
// todo blocking the output status
if (pRsp->inputStatus == TASK_INPUT_STATUS__BLOCKED) {
// TODO: init recover timer
qError("s-task:%s inputQ of downstream task:0x%x is full, need to block output", pTask->id.idStr, pRsp->downstreamTaskId);
pTask->msgInfo.blockingTs = taosGetTimestampMs(); // record the blocking start time
int32_t waitDuration = 300; // 300 ms
qError("s-task:%s inputQ of downstream task:0x%x is full, time:%" PRId64 "wait for %dms and retry dispatch data",
pTask->id.idStr, pRsp->downstreamTaskId, pTask->msgInfo.blockingTs, waitDuration);
streamRetryDispatchStreamBlock(pTask, waitDuration);
} else { // pipeline send data in output queue
// this message has been sent successfully, let's try next one.
destroyStreamDataBlock(pTask->msgInfo.pData);
pTask->msgInfo.pData = NULL;
if (pTask->msgInfo.blockingTs != 0) {
int64_t el = taosGetTimestampMs() - pTask->msgInfo.blockingTs;
qDebug("s-task:%s resume to normal from inputQ blocking, idle time:%"PRId64"ms", pTask->id.idStr, el);
pTask->msgInfo.blockingTs = 0;
}
// now ready for next data output
atomic_store_8(&pTask->outputStatus, TASK_OUTPUT_STATUS__NORMAL);
qError("s-task:%s ignore error, and reset task output status:%d", pTask->id.idStr, pTask->outputStatus);
return 0;
// otherwise, continue dispatch the first block to down stream task in pipeline
streamDispatchStreamBlock(pTask);
}
// otherwise, continue dispatch the first block to down stream task in pipeline
streamDispatchStreamBlock(pTask);
return 0;
}
@ -267,25 +323,23 @@ int32_t streamProcessRunReq(SStreamTask* pTask) {
return -1;
}
/*if (pTask->outputType == TASK_OUTPUT__FIXED_DISPATCH || pTask->outputType == TASK_OUTPUT__SHUFFLE_DISPATCH) {*/
/*if (pTask->dispatchType == TASK_OUTPUT__FIXED_DISPATCH || pTask->dispatchType == TASK_OUTPUT__SHUFFLE_DISPATCH) {*/
/*streamDispatchStreamBlock(pTask);*/
/*}*/
return 0;
}
int32_t streamProcessRetrieveReq(SStreamTask* pTask, SStreamRetrieveReq* pReq, SRpcMsg* pRsp) {
qDebug("s-task:%s receive retrieve req from node %d taskId:0x%x", pTask->id.idStr, pReq->srcNodeId, pReq->srcTaskId);
streamTaskEnqueueRetrieve(pTask, pReq, pRsp);
ASSERT(pTask->taskLevel != TASK_LEVEL__SINK);
ASSERT(pTask->info.taskLevel != TASK_LEVEL__SINK);
streamSchedExec(pTask);
return 0;
}
bool tInputQueueIsFull(const SStreamTask* pTask) {
bool isFull = taosQueueItemSize((pTask->inputQueue->queue)) >= STREAM_TASK_INPUT_QUEUEU_CAPACITY;
bool isFull = taosQueueItemSize((pTask->inputQueue->queue)) >= STREAM_TASK_INPUT_QUEUE_CAPACITY;
double size = QUEUE_MEM_SIZE_IN_MB(pTask->inputQueue->queue);
return (isFull || size >= STREAM_TASK_INPUT_QUEUEU_CAPACITY_IN_SIZE);
return (isFull || size >= STREAM_TASK_INPUT_QUEUE_CAPACITY_IN_SIZE);
}
int32_t tAppendDataToInputQueue(SStreamTask* pTask, SStreamQueueItem* pItem) {
@ -295,9 +349,9 @@ int32_t tAppendDataToInputQueue(SStreamTask* pTask, SStreamQueueItem* pItem) {
if (type == STREAM_INPUT__DATA_SUBMIT) {
SStreamDataSubmit* px = (SStreamDataSubmit*)pItem;
if ((pTask->taskLevel == TASK_LEVEL__SOURCE) && tInputQueueIsFull(pTask)) {
if ((pTask->info.taskLevel == TASK_LEVEL__SOURCE) && tInputQueueIsFull(pTask)) {
qError("s-task:%s input queue is full, capacity(size:%d num:%dMiB), current(blocks:%d, size:%.2fMiB) stop to push data",
pTask->id.idStr, STREAM_TASK_INPUT_QUEUEU_CAPACITY, STREAM_TASK_INPUT_QUEUEU_CAPACITY_IN_SIZE, total,
pTask->id.idStr, STREAM_TASK_INPUT_QUEUE_CAPACITY, STREAM_TASK_INPUT_QUEUE_CAPACITY_IN_SIZE, total,
size);
streamDataSubmitDestroy(px);
taosFreeQitem(pItem);
@ -315,9 +369,9 @@ int32_t tAppendDataToInputQueue(SStreamTask* pTask, SStreamQueueItem* pItem) {
px->submit.msgLen, px->submit.ver, total, size + px->submit.msgLen/1048576.0);
} else if (type == STREAM_INPUT__DATA_BLOCK || type == STREAM_INPUT__DATA_RETRIEVE ||
type == STREAM_INPUT__REF_DATA_BLOCK) {
if ((pTask->taskLevel == TASK_LEVEL__SOURCE) && (tInputQueueIsFull(pTask))) {
if ((pTask->info.taskLevel == TASK_LEVEL__SOURCE) && (tInputQueueIsFull(pTask))) {
qError("s-task:%s input queue is full, capacity:%d size:%d MiB, current(blocks:%d, size:%.2fMiB) abort",
pTask->id.idStr, STREAM_TASK_INPUT_QUEUEU_CAPACITY, STREAM_TASK_INPUT_QUEUEU_CAPACITY_IN_SIZE, total,
pTask->id.idStr, STREAM_TASK_INPUT_QUEUE_CAPACITY, STREAM_TASK_INPUT_QUEUE_CAPACITY_IN_SIZE, total,
size);
destroyStreamDataBlock((SStreamDataBlock*) pItem);
return -1;
@ -346,19 +400,21 @@ int32_t tAppendDataToInputQueue(SStreamTask* pTask, SStreamQueueItem* pItem) {
static void* streamQueueCurItem(SStreamQueue* queue) { return queue->qItem; }
void* streamQueueNextItem(SStreamQueue* queue) {
int8_t dequeueFlag = atomic_exchange_8(&queue->status, STREAM_QUEUE__PROCESSING);
if (dequeueFlag == STREAM_QUEUE__FAILED) {
ASSERT(queue->qItem != NULL);
return streamQueueCurItem(queue);
void* streamQueueNextItem(SStreamQueue* pQueue) {
int8_t flag = atomic_exchange_8(&pQueue->status, STREAM_QUEUE__PROCESSING);
if (flag == STREAM_QUEUE__FAILED) {
ASSERT(pQueue->qItem != NULL);
return streamQueueCurItem(pQueue);
} else {
queue->qItem = NULL;
taosGetQitem(queue->qall, &queue->qItem);
if (queue->qItem == NULL) {
taosReadAllQitems(queue->queue, queue->qall);
taosGetQitem(queue->qall, &queue->qItem);
pQueue->qItem = NULL;
taosGetQitem(pQueue->qall, &pQueue->qItem);
if (pQueue->qItem == NULL) {
taosReadAllQitems(pQueue->queue, pQueue->qall);
taosGetQitem(pQueue->qall, &pQueue->qItem);
}
return streamQueueCurItem(queue);
return streamQueueCurItem(pQueue);
}
}

View File

@ -40,16 +40,8 @@ typedef struct {
rocksdb_comparator_t** pCompares;
} RocksdbCfInst;
uint32_t nextPow2(uint32_t x) {
x = x - 1;
x = x | (x >> 1);
x = x | (x >> 2);
x = x | (x >> 4);
x = x | (x >> 8);
x = x | (x >> 16);
return x + 1;
}
int32_t streamStateOpenBackendCf(void* backend, char* name, char** cfs, int32_t nCf);
uint32_t nextPow2(uint32_t x);
int32_t streamStateOpenBackendCf(void* backend, char* name, char** cfs, int32_t nCf);
void destroyRocksdbCfInst(RocksdbCfInst* inst);
@ -71,7 +63,22 @@ typedef int (*BackendCmpFunc)(void* state, const char* aBuf, size_t aLen, const
typedef void (*DestroyFunc)(void* state);
typedef int32_t (*EncodeValueFunc)(void* value, int32_t vlen, int64_t ttl, char** dest);
typedef int32_t (*DecodeValueFunc)(void* value, int32_t vlen, int64_t* ttl, char** dest);
typedef struct {
const char* key;
int32_t len;
int idx;
BackendCmpFunc cmpFunc;
EncodeFunc enFunc;
DecodeFunc deFunc;
ToStringFunc toStrFunc;
CompareName cmpName;
DestroyFunc detroyFunc;
EncodeValueFunc enValueFunc;
DecodeValueFunc deValueFunc;
} SCfInit;
#define GEN_COLUMN_FAMILY_NAME(name, idstr, SUFFIX) sprintf(name, "%s_%s", idstr, (SUFFIX));
const char* compareDefaultName(void* name);
const char* compareStateName(void* name);
const char* compareWinKeyName(void* name);
@ -80,11 +87,67 @@ const char* compareFuncKeyName(void* name);
const char* compareParKeyName(void* name);
const char* comparePartagKeyName(void* name);
int defaultKeyComp(void* state, const char* aBuf, size_t aLen, const char* bBuf, size_t bLen);
int defaultKeyEncode(void* k, char* buf);
int defaultKeyDecode(void* k, char* buf);
int defaultKeyToString(void* k, char* buf);
int stateKeyDBComp(void* state, const char* aBuf, size_t aLen, const char* bBuf, size_t bLen);
int stateKeyEncode(void* k, char* buf);
int stateKeyDecode(void* k, char* buf);
int stateKeyToString(void* k, char* buf);
int stateSessionKeyDBComp(void* state, const char* aBuf, size_t aLen, const char* bBuf, size_t bLen);
int stateSessionKeyEncode(void* ses, char* buf);
int stateSessionKeyDecode(void* ses, char* buf);
int stateSessionKeyToString(void* k, char* buf);
int winKeyDBComp(void* state, const char* aBuf, size_t aLen, const char* bBuf, size_t bLen);
int winKeyEncode(void* k, char* buf);
int winKeyDecode(void* k, char* buf);
int winKeyToString(void* k, char* buf);
int tupleKeyDBComp(void* state, const char* aBuf, size_t aLen, const char* bBuf, size_t bLen);
int tupleKeyEncode(void* k, char* buf);
int tupleKeyDecode(void* k, char* buf);
int tupleKeyToString(void* k, char* buf);
int parKeyDBComp(void* state, const char* aBuf, size_t aLen, const char* bBuf, size_t bLen);
int parKeyEncode(void* k, char* buf);
int parKeyDecode(void* k, char* buf);
int parKeyToString(void* k, char* buf);
int stremaValueEncode(void* k, char* buf);
int streamValueDecode(void* k, char* buf);
int32_t streamValueToString(void* k, char* buf);
int32_t streaValueIsStale(void* k, int64_t ts);
void destroyFunc(void* arg);
int32_t encodeValueFunc(void* value, int32_t vlen, int64_t ttl, char** dest);
int32_t decodeValueFunc(void* value, int32_t vlen, int64_t* ttl, char** dest);
SCfInit ginitDict[] = {
{"default", 7, 0, defaultKeyComp, defaultKeyEncode, defaultKeyDecode, defaultKeyToString, compareDefaultName,
destroyFunc, encodeValueFunc, decodeValueFunc},
{"state", 5, 1, stateKeyDBComp, stateKeyEncode, stateKeyDecode, stateKeyToString, compareStateName, destroyFunc,
encodeValueFunc, decodeValueFunc},
{"fill", 4, 2, winKeyDBComp, winKeyEncode, winKeyDecode, winKeyToString, compareWinKeyName, destroyFunc,
encodeValueFunc, decodeValueFunc},
{"sess", 4, 3, stateSessionKeyDBComp, stateSessionKeyEncode, stateSessionKeyDecode, stateSessionKeyToString,
compareSessionKeyName, destroyFunc, encodeValueFunc, decodeValueFunc},
{"func", 4, 4, tupleKeyDBComp, tupleKeyEncode, tupleKeyDecode, tupleKeyToString, compareFuncKeyName, destroyFunc,
encodeValueFunc, decodeValueFunc},
{"parname", 7, 5, parKeyDBComp, parKeyEncode, parKeyDecode, parKeyToString, compareParKeyName, destroyFunc,
encodeValueFunc, decodeValueFunc},
{"partag", 6, 6, parKeyDBComp, parKeyEncode, parKeyDecode, parKeyToString, comparePartagKeyName, destroyFunc,
encodeValueFunc, decodeValueFunc},
};
void* streamBackendInit(const char* path) {
uint32_t dbMemLimit = nextPow2(tsMaxStreamBackendCache) << 20;
qDebug("start to init stream backend at %s", path);
SBackendHandle* pHandle = taosMemoryCalloc(1, sizeof(SBackendHandle));
SBackendWrapper* pHandle = taosMemoryCalloc(1, sizeof(SBackendWrapper));
pHandle->list = tdListNew(sizeof(SCfComparator));
taosThreadMutexInit(&pHandle->mutex, NULL);
taosThreadMutexInit(&pHandle->cfMutex, NULL);
@ -154,8 +217,8 @@ _EXIT:
return NULL;
}
void streamBackendCleanup(void* arg) {
SBackendHandle* pHandle = (SBackendHandle*)arg;
RocksdbCfInst** pIter = (RocksdbCfInst**)taosHashIterate(pHandle->cfInst, NULL);
SBackendWrapper* pHandle = (SBackendWrapper*)arg;
RocksdbCfInst** pIter = (RocksdbCfInst**)taosHashIterate(pHandle->cfInst, NULL);
while (pIter != NULL) {
RocksdbCfInst* inst = *pIter;
destroyRocksdbCfInst(inst);
@ -194,17 +257,79 @@ void streamBackendCleanup(void* arg) {
qDebug("destroy stream backend backend:%p", pHandle);
return;
}
void streamBackendHandleCleanup(void* arg) {
SBackendCfWrapper* wrapper = arg;
bool remove = wrapper->remove;
qDebug("start to do-close backendwrapper %p, %s", wrapper, wrapper->idstr);
if (wrapper->rocksdb == NULL) {
return;
}
int cfLen = sizeof(ginitDict) / sizeof(ginitDict[0]);
char* err = NULL;
if (remove) {
for (int i = 0; i < cfLen; i++) {
if (wrapper->pHandle[i] != NULL)
rocksdb_drop_column_family(wrapper->rocksdb, ((rocksdb_column_family_handle_t**)wrapper->pHandle)[i], &err);
if (err != NULL) {
// qError("failed to create cf:%s_%s, reason:%s", wrapper->idstr, ginitDict[i].key, err);
taosMemoryFreeClear(err);
}
}
} else {
rocksdb_flushoptions_t* flushOpt = rocksdb_flushoptions_create();
for (int i = 0; i < cfLen; i++) {
if (wrapper->pHandle[i] != NULL) rocksdb_flush_cf(wrapper->rocksdb, flushOpt, wrapper->pHandle[i], &err);
if (err != NULL) {
qError("failed to create cf:%s_%s, reason:%s", wrapper->idstr, ginitDict[i].key, err);
taosMemoryFreeClear(err);
}
}
rocksdb_flushoptions_destroy(flushOpt);
}
for (int i = 0; i < cfLen; i++) {
if (wrapper->pHandle[i] != NULL) {
rocksdb_column_family_handle_destroy(wrapper->pHandle[i]);
}
}
taosMemoryFreeClear(wrapper->pHandle);
for (int i = 0; i < cfLen; i++) {
rocksdb_options_destroy(wrapper->cfOpts[i]);
rocksdb_block_based_options_destroy(((RocksdbCfParam*)wrapper->param)[i].tableOpt);
}
if (remove) {
streamBackendDelCompare(wrapper->pBackend, wrapper->pComparNode);
}
rocksdb_writeoptions_destroy(wrapper->writeOpts);
wrapper->writeOpts = NULL;
rocksdb_readoptions_destroy(wrapper->readOpts);
wrapper->readOpts = NULL;
taosMemoryFreeClear(wrapper->cfOpts);
taosMemoryFreeClear(wrapper->param);
taosThreadRwlockDestroy(&wrapper->rwLock);
wrapper->rocksdb = NULL;
taosReleaseRef(streamBackendId, wrapper->backendId);
qDebug("end to do-close backendwrapper %p, %s", wrapper, wrapper->idstr);
taosMemoryFree(wrapper);
return;
}
SListNode* streamBackendAddCompare(void* backend, void* arg) {
SBackendHandle* pHandle = (SBackendHandle*)backend;
SListNode* node = NULL;
SBackendWrapper* pHandle = (SBackendWrapper*)backend;
SListNode* node = NULL;
taosThreadMutexLock(&pHandle->mutex);
node = tdListAdd(pHandle->list, arg);
taosThreadMutexUnlock(&pHandle->mutex);
return node;
}
void streamBackendDelCompare(void* backend, void* arg) {
SBackendHandle* pHandle = (SBackendHandle*)backend;
SListNode* node = NULL;
SBackendWrapper* pHandle = (SBackendWrapper*)backend;
SListNode* node = NULL;
taosThreadMutexLock(&pHandle->mutex);
node = tdListPopNode(pHandle->list, arg);
taosThreadMutexUnlock(&pHandle->mutex);
@ -542,23 +667,6 @@ void destroyFunc(void* arg) {
return;
}
typedef struct {
const char* key;
int32_t len;
int idx;
BackendCmpFunc cmpFunc;
EncodeFunc enFunc;
DecodeFunc deFunc;
ToStringFunc toStrFunc;
CompareName cmpName;
DestroyFunc detroyFunc;
EncodeValueFunc enValueFunc;
DecodeValueFunc deValueFunc;
} SCfInit;
#define GEN_COLUMN_FAMILY_NAME(name, idstr, SUFFIX) sprintf(name, "%s_%s", idstr, (SUFFIX));
int32_t encodeValueFunc(void* value, int32_t vlen, int64_t ttl, char** dest) {
SStreamValue key = {.unixTimestamp = ttl, .len = vlen, .data = (char*)(value)};
int32_t len = 0;
@ -613,22 +721,6 @@ int32_t decodeValueFunc(void* value, int32_t vlen, int64_t* ttl, char** dest) {
}
return key.len;
}
SCfInit ginitDict[] = {
{"default", 7, 0, defaultKeyComp, defaultKeyEncode, defaultKeyDecode, defaultKeyToString, compareDefaultName,
destroyFunc, encodeValueFunc, decodeValueFunc},
{"state", 5, 1, stateKeyDBComp, stateKeyEncode, stateKeyDecode, stateKeyToString, compareStateName, destroyFunc,
encodeValueFunc, decodeValueFunc},
{"fill", 4, 2, winKeyDBComp, winKeyEncode, winKeyDecode, winKeyToString, compareWinKeyName, destroyFunc,
encodeValueFunc, decodeValueFunc},
{"sess", 4, 3, stateSessionKeyDBComp, stateSessionKeyEncode, stateSessionKeyDecode, stateSessionKeyToString,
compareSessionKeyName, destroyFunc, encodeValueFunc, decodeValueFunc},
{"func", 4, 4, tupleKeyDBComp, tupleKeyEncode, tupleKeyDecode, tupleKeyToString, compareFuncKeyName, destroyFunc,
encodeValueFunc, decodeValueFunc},
{"parname", 7, 5, parKeyDBComp, parKeyEncode, parKeyDecode, parKeyToString, compareParKeyName, destroyFunc,
encodeValueFunc, decodeValueFunc},
{"partag", 6, 6, parKeyDBComp, parKeyEncode, parKeyDecode, parKeyToString, comparePartagKeyName, destroyFunc,
encodeValueFunc, decodeValueFunc},
};
const char* compareDefaultName(void* arg) {
(void)arg;
@ -697,11 +789,11 @@ void destroyRocksdbCfInst(RocksdbCfInst* inst) {
}
int32_t streamStateOpenBackendCf(void* backend, char* name, char** cfs, int32_t nCf) {
SBackendHandle* handle = backend;
char* err = NULL;
int64_t streamId;
int32_t taskId, dummy = 0;
char suffix[64] = {0};
SBackendWrapper* handle = backend;
char* err = NULL;
int64_t streamId;
int32_t taskId, dummy = 0;
char suffix[64] = {0};
rocksdb_options_t** cfOpts = taosMemoryCalloc(nCf, sizeof(rocksdb_options_t*));
RocksdbCfParam* params = taosMemoryCalloc(nCf, sizeof(RocksdbCfParam*));
@ -821,23 +913,30 @@ int32_t streamStateOpenBackendCf(void* backend, char* name, char** cfs, int32_t
int streamStateOpenBackend(void* backend, SStreamState* pState) {
qInfo("start to open state %p on backend %p 0x%" PRIx64 "-%d", pState, backend, pState->streamId, pState->taskId);
taosAcquireRef(streamBackendId, pState->streamBackendRid);
SBackendHandle* handle = backend;
sprintf(pState->pTdbState->idstr, "0x%" PRIx64 "-%d", pState->streamId, pState->taskId);
SBackendWrapper* handle = backend;
SBackendCfWrapper* pBackendCfWrapper = taosMemoryCalloc(1, sizeof(SBackendCfWrapper));
taosThreadMutexLock(&handle->cfMutex);
RocksdbCfInst** ppInst = taosHashGet(handle->cfInst, pState->pTdbState->idstr, strlen(pState->pTdbState->idstr) + 1);
if (ppInst != NULL && *ppInst != NULL) {
RocksdbCfInst* inst = *ppInst;
pState->pTdbState->rocksdb = inst->db;
pState->pTdbState->pHandle = (void**)inst->pHandle;
pState->pTdbState->writeOpts = inst->wOpt;
pState->pTdbState->readOpts = inst->rOpt;
pState->pTdbState->cfOpts = (void**)(inst->cfOpt);
pState->pTdbState->dbOpt = handle->dbOpt;
pState->pTdbState->param = inst->param;
pState->pTdbState->pBackend = handle;
pState->pTdbState->pComparNode = inst->pCompareNode;
pBackendCfWrapper->rocksdb = inst->db;
pBackendCfWrapper->pHandle = (void**)inst->pHandle;
pBackendCfWrapper->writeOpts = inst->wOpt;
pBackendCfWrapper->readOpts = inst->rOpt;
pBackendCfWrapper->cfOpts = (void**)(inst->cfOpt);
pBackendCfWrapper->dbOpt = handle->dbOpt;
pBackendCfWrapper->param = inst->param;
pBackendCfWrapper->pBackend = handle;
pBackendCfWrapper->pComparNode = inst->pCompareNode;
taosThreadMutexUnlock(&handle->cfMutex);
pBackendCfWrapper->backendId = pState->streamBackendRid;
memcpy(pBackendCfWrapper->idstr, pState->pTdbState->idstr, sizeof(pState->pTdbState->idstr));
int64_t id = taosAddRef(streamBackendCfWrapperId, pBackendCfWrapper);
pState->pTdbState->backendCfWrapperId = id;
pState->pTdbState->pBackendCfWrapper = pBackendCfWrapper;
qInfo("succ to open state %p on backendWrapper, %p, %s", pState, pBackendCfWrapper, pBackendCfWrapper->idstr);
return 0;
}
taosThreadMutexUnlock(&handle->cfMutex);
@ -870,27 +969,33 @@ int streamStateOpenBackend(void* backend, SStreamState* pState) {
pCompare[i] = compare;
}
rocksdb_column_family_handle_t** cfHandle = taosMemoryCalloc(cfLen, sizeof(rocksdb_column_family_handle_t*));
pState->pTdbState->rocksdb = handle->db;
pState->pTdbState->pHandle = (void**)cfHandle;
pState->pTdbState->writeOpts = rocksdb_writeoptions_create();
pState->pTdbState->readOpts = rocksdb_readoptions_create();
pState->pTdbState->cfOpts = (void**)cfOpt;
pState->pTdbState->dbOpt = handle->dbOpt;
pState->pTdbState->param = param;
pState->pTdbState->pBackend = handle;
taosThreadRwlockInit(&pState->pTdbState->rwLock, NULL);
pBackendCfWrapper->rocksdb = handle->db;
pBackendCfWrapper->pHandle = (void**)cfHandle;
pBackendCfWrapper->writeOpts = rocksdb_writeoptions_create();
pBackendCfWrapper->readOpts = rocksdb_readoptions_create();
pBackendCfWrapper->cfOpts = (void**)cfOpt;
pBackendCfWrapper->dbOpt = handle->dbOpt;
pBackendCfWrapper->param = param;
pBackendCfWrapper->pBackend = handle;
pBackendCfWrapper->backendId = pState->streamBackendRid;
taosThreadRwlockInit(&pBackendCfWrapper->rwLock, NULL);
SCfComparator compare = {.comp = pCompare, .numOfComp = cfLen};
pState->pTdbState->pComparNode = streamBackendAddCompare(handle, &compare);
rocksdb_writeoptions_disable_WAL(pState->pTdbState->writeOpts, 1);
qInfo("succ to open state %p on backend, %p, 0x%" PRIx64 "-%d", pState, handle, pState->streamId, pState->taskId);
pBackendCfWrapper->pComparNode = streamBackendAddCompare(handle, &compare);
rocksdb_writeoptions_disable_WAL(pBackendCfWrapper->writeOpts, 1);
memcpy(pBackendCfWrapper->idstr, pState->pTdbState->idstr, sizeof(pState->pTdbState->idstr));
int64_t id = taosAddRef(streamBackendCfWrapperId, pBackendCfWrapper);
pState->pTdbState->backendCfWrapperId = id;
pState->pTdbState->pBackendCfWrapper = pBackendCfWrapper;
qInfo("succ to open state %p on backendWrapper %p %s", pState, pBackendCfWrapper, pBackendCfWrapper->idstr);
return 0;
}
void streamStateCloseBackend(SStreamState* pState, bool remove) {
SBackendHandle* pHandle = pState->pTdbState->pBackend;
SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper;
SBackendWrapper* pHandle = wrapper->pBackend;
taosThreadMutexLock(&pHandle->cfMutex);
RocksdbCfInst** ppInst = taosHashGet(pHandle->cfInst, pState->pTdbState->idstr, strlen(pState->pTdbState->idstr) + 1);
RocksdbCfInst** ppInst = taosHashGet(pHandle->cfInst, wrapper->idstr, strlen(pState->pTdbState->idstr) + 1);
if (ppInst != NULL && *ppInst != NULL) {
RocksdbCfInst* inst = *ppInst;
taosMemoryFree(inst);
@ -899,63 +1004,10 @@ void streamStateCloseBackend(SStreamState* pState, bool remove) {
taosThreadMutexUnlock(&pHandle->cfMutex);
char* status[] = {"close", "drop"};
qInfo("start to close %s state %p on backend %p 0x%" PRIx64 "-%d", status[remove == false ? 0 : 1], pState, pHandle,
pState->streamId, pState->taskId);
if (pState->pTdbState->rocksdb == NULL) {
return;
}
int cfLen = sizeof(ginitDict) / sizeof(ginitDict[0]);
char* err = NULL;
if (remove) {
for (int i = 0; i < cfLen; i++) {
if (pState->pTdbState->pHandle[i] != NULL)
rocksdb_drop_column_family(pState->pTdbState->rocksdb,
((rocksdb_column_family_handle_t**)pState->pTdbState->pHandle)[i], &err);
if (err != NULL) {
qError("failed to create cf:%s_%s, reason:%s", pState->pTdbState->idstr, ginitDict[i].key, err);
taosMemoryFreeClear(err);
}
}
} else {
rocksdb_flushoptions_t* flushOpt = rocksdb_flushoptions_create();
for (int i = 0; i < cfLen; i++) {
if (pState->pTdbState->pHandle[i] != NULL)
rocksdb_flush_cf(pState->pTdbState->rocksdb, flushOpt, pState->pTdbState->pHandle[i], &err);
if (err != NULL) {
qError("failed to create cf:%s_%s, reason:%s", pState->pTdbState->idstr, ginitDict[i].key, err);
taosMemoryFreeClear(err);
}
}
rocksdb_flushoptions_destroy(flushOpt);
}
for (int i = 0; i < cfLen; i++) {
if (pState->pTdbState->pHandle[i] != NULL) {
rocksdb_column_family_handle_destroy(pState->pTdbState->pHandle[i]);
}
}
taosMemoryFreeClear(pState->pTdbState->pHandle);
for (int i = 0; i < cfLen; i++) {
rocksdb_options_destroy(pState->pTdbState->cfOpts[i]);
rocksdb_block_based_options_destroy(((RocksdbCfParam*)pState->pTdbState->param)[i].tableOpt);
}
if (remove) {
streamBackendDelCompare(pState->pTdbState->pBackend, pState->pTdbState->pComparNode);
}
rocksdb_writeoptions_destroy(pState->pTdbState->writeOpts);
pState->pTdbState->writeOpts = NULL;
rocksdb_readoptions_destroy(pState->pTdbState->readOpts);
pState->pTdbState->readOpts = NULL;
taosMemoryFreeClear(pState->pTdbState->cfOpts);
taosMemoryFreeClear(pState->pTdbState->param);
taosThreadRwlockDestroy(&pState->pTdbState->rwLock);
pState->pTdbState->rocksdb = NULL;
taosReleaseRef(streamBackendId, pState->streamBackendRid);
qInfo("start to close %s state %p on backendWrapper %p %s", status[remove == false ? 0 : 1], pState, wrapper,
wrapper->idstr);
wrapper->remove |= remove; // update by other pState
taosReleaseRef(streamBackendCfWrapperId, pState->pTdbState->backendCfWrapperId);
}
void streamStateDestroyCompar(void* arg) {
SCfComparator* comp = (SCfComparator*)arg;
@ -975,26 +1027,26 @@ int streamStateGetCfIdx(SStreamState* pState, const char* funcName) {
}
}
if (pState != NULL && idx != -1) {
SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper;
rocksdb_column_family_handle_t* cf = NULL;
taosThreadRwlockRdlock(&pState->pTdbState->rwLock);
cf = pState->pTdbState->pHandle[idx];
taosThreadRwlockUnlock(&pState->pTdbState->rwLock);
taosThreadRwlockRdlock(&wrapper->rwLock);
cf = wrapper->pHandle[idx];
taosThreadRwlockUnlock(&wrapper->rwLock);
if (cf == NULL) {
char buf[128] = {0};
GEN_COLUMN_FAMILY_NAME(buf, pState->pTdbState->idstr, ginitDict[idx].key);
GEN_COLUMN_FAMILY_NAME(buf, wrapper->idstr, ginitDict[idx].key);
char* err = NULL;
taosThreadRwlockWrlock(&pState->pTdbState->rwLock);
cf = rocksdb_create_column_family(pState->pTdbState->rocksdb, pState->pTdbState->cfOpts[idx], buf, &err);
taosThreadRwlockWrlock(&wrapper->rwLock);
cf = rocksdb_create_column_family(wrapper->rocksdb, wrapper->cfOpts[idx], buf, &err);
if (err != NULL) {
idx = -1;
qError("failed to to open cf, %p 0x%" PRIx64 "-%d_%s, reason:%s", pState, pState->streamId, pState->taskId,
funcName, err);
qError("failed to to open cf, %p %s_%s, reason:%s", pState, wrapper->idstr, funcName, err);
taosMemoryFree(err);
} else {
pState->pTdbState->pHandle[idx] = cf;
wrapper->pHandle[idx] = cf;
}
taosThreadRwlockUnlock(&pState->pTdbState->rwLock);
taosThreadRwlockUnlock(&wrapper->rwLock);
}
}
@ -1014,8 +1066,9 @@ rocksdb_iterator_t* streamStateIterCreate(SStreamState* pState, const char* cfNa
rocksdb_readoptions_t** readOpt) {
int idx = streamStateGetCfIdx(pState, cfName);
SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper;
if (snapshot != NULL) {
*snapshot = (rocksdb_snapshot_t*)rocksdb_create_snapshot(pState->pTdbState->rocksdb);
*snapshot = (rocksdb_snapshot_t*)rocksdb_create_snapshot(wrapper->rocksdb);
}
rocksdb_readoptions_t* rOpt = rocksdb_readoptions_create();
*readOpt = rOpt;
@ -1023,8 +1076,7 @@ rocksdb_iterator_t* streamStateIterCreate(SStreamState* pState, const char* cfNa
rocksdb_readoptions_set_snapshot(rOpt, *snapshot);
rocksdb_readoptions_set_fill_cache(rOpt, 0);
return rocksdb_create_iterator_cf(pState->pTdbState->rocksdb, rOpt,
((rocksdb_column_family_handle_t**)pState->pTdbState->pHandle)[idx]);
return rocksdb_create_iterator_cf(wrapper->rocksdb, rOpt, ((rocksdb_column_family_handle_t**)wrapper->pHandle)[idx]);
}
#define STREAM_STATE_PUT_ROCKSDB(pState, funcname, key, value, vLen) \
@ -1038,15 +1090,15 @@ rocksdb_iterator_t* streamStateIterCreate(SStreamState* pState, const char* cfNa
code = -1; \
break; \
} \
char toString[128] = {0}; \
SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper; \
char toString[128] = {0}; \
if (qDebugFlag & DEBUG_TRACE) ginitDict[i].toStrFunc((void*)key, toString); \
int32_t klen = ginitDict[i].enFunc((void*)key, buf); \
rocksdb_column_family_handle_t* pHandle = \
((rocksdb_column_family_handle_t**)pState->pTdbState->pHandle)[ginitDict[i].idx]; \
rocksdb_t* db = pState->pTdbState->rocksdb; \
rocksdb_writeoptions_t* opts = pState->pTdbState->writeOpts; \
char* ttlV = NULL; \
int32_t ttlVLen = ginitDict[i].enValueFunc((char*)value, vLen, 0, &ttlV); \
rocksdb_column_family_handle_t* pHandle = ((rocksdb_column_family_handle_t**)wrapper->pHandle)[ginitDict[i].idx]; \
rocksdb_t* db = wrapper->rocksdb; \
rocksdb_writeoptions_t* opts = wrapper->writeOpts; \
char* ttlV = NULL; \
int32_t ttlVLen = ginitDict[i].enValueFunc((char*)value, vLen, 0, &ttlV); \
rocksdb_put_cf(db, opts, pHandle, (const char*)buf, klen, (const char*)ttlV, (size_t)ttlVLen, &err); \
if (err != NULL) { \
taosMemoryFree(err); \
@ -1058,81 +1110,76 @@ rocksdb_iterator_t* streamStateIterCreate(SStreamState* pState, const char* cfNa
taosMemoryFree(ttlV); \
} while (0);
#define STREAM_STATE_GET_ROCKSDB(pState, funcname, key, pVal, vLen) \
do { \
code = 0; \
char buf[128] = {0}; \
char* err = NULL; \
int i = streamStateGetCfIdx(pState, funcname); \
if (i < 0) { \
qWarn("streamState failed to get cf name: %s", funcname); \
code = -1; \
break; \
} \
char toString[128] = {0}; \
if (qDebugFlag & DEBUG_TRACE) ginitDict[i].toStrFunc((void*)key, toString); \
int32_t klen = ginitDict[i].enFunc((void*)key, buf); \
rocksdb_column_family_handle_t* pHandle = \
((rocksdb_column_family_handle_t**)pState->pTdbState->pHandle)[ginitDict[i].idx]; \
rocksdb_t* db = pState->pTdbState->rocksdb; \
rocksdb_readoptions_t* opts = pState->pTdbState->readOpts; \
size_t len = 0; \
char* val = rocksdb_get_cf(db, opts, pHandle, (const char*)buf, klen, (size_t*)&len, &err); \
if (val == NULL || len == 0) { \
if (err == NULL) { \
qTrace("streamState str: %s failed to read from %s_%s, err: not exist", toString, pState->pTdbState->idstr, \
funcname); \
} else { \
qError("streamState str: %s failed to read from %s_%s, err: %s", toString, pState->pTdbState->idstr, funcname, \
err); \
taosMemoryFreeClear(err); \
} \
code = -1; \
} else { \
char* p = NULL; \
int32_t tlen = ginitDict[i].deValueFunc(val, len, NULL, (char**)pVal); \
if (tlen <= 0) { \
qError("streamState str: %s failed to read from %s_%s, err: already ttl ", toString, pState->pTdbState->idstr, \
funcname); \
code = -1; \
} else { \
qTrace("streamState str: %s succ to read from %s_%s, valLen:%d", toString, pState->pTdbState->idstr, funcname, \
tlen); \
} \
taosMemoryFree(val); \
if (vLen != NULL) *vLen = tlen; \
} \
if (code == 0) \
qDebug("streamState str: %s succ to read from %s_%s", toString, pState->pTdbState->idstr, funcname); \
#define STREAM_STATE_GET_ROCKSDB(pState, funcname, key, pVal, vLen) \
do { \
code = 0; \
char buf[128] = {0}; \
char* err = NULL; \
int i = streamStateGetCfIdx(pState, funcname); \
if (i < 0) { \
qWarn("streamState failed to get cf name: %s", funcname); \
code = -1; \
break; \
} \
SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper; \
char toString[128] = {0}; \
if (qDebugFlag & DEBUG_TRACE) ginitDict[i].toStrFunc((void*)key, toString); \
int32_t klen = ginitDict[i].enFunc((void*)key, buf); \
rocksdb_column_family_handle_t* pHandle = ((rocksdb_column_family_handle_t**)wrapper->pHandle)[ginitDict[i].idx]; \
rocksdb_t* db = wrapper->rocksdb; \
rocksdb_readoptions_t* opts = wrapper->readOpts; \
size_t len = 0; \
char* val = rocksdb_get_cf(db, opts, pHandle, (const char*)buf, klen, (size_t*)&len, &err); \
if (val == NULL || len == 0) { \
if (err == NULL) { \
qTrace("streamState str: %s failed to read from %s_%s, err: not exist", toString, wrapper->idstr, funcname); \
} else { \
qError("streamState str: %s failed to read from %s_%s, err: %s", toString, wrapper->idstr, funcname, err); \
taosMemoryFreeClear(err); \
} \
code = -1; \
} else { \
char* p = NULL; \
int32_t tlen = ginitDict[i].deValueFunc(val, len, NULL, (char**)pVal); \
if (tlen <= 0) { \
qError("streamState str: %s failed to read from %s_%s, err: already ttl ", toString, wrapper->idstr, \
funcname); \
code = -1; \
} else { \
qTrace("streamState str: %s succ to read from %s_%s, valLen:%d", toString, wrapper->idstr, funcname, tlen); \
} \
taosMemoryFree(val); \
if (vLen != NULL) *vLen = tlen; \
} \
if (code == 0) qDebug("streamState str: %s succ to read from %s_%s", toString, wrapper->idstr, funcname); \
} while (0);
#define STREAM_STATE_DEL_ROCKSDB(pState, funcname, key) \
do { \
code = 0; \
char buf[128] = {0}; \
char* err = NULL; \
int i = streamStateGetCfIdx(pState, funcname); \
if (i < 0) { \
qWarn("streamState failed to get cf name: %s_%s", pState->pTdbState->idstr, funcname); \
code = -1; \
break; \
} \
char toString[128] = {0}; \
if (qDebugFlag & DEBUG_TRACE) ginitDict[i].toStrFunc((void*)key, toString); \
int32_t klen = ginitDict[i].enFunc((void*)key, buf); \
rocksdb_column_family_handle_t* pHandle = \
((rocksdb_column_family_handle_t**)pState->pTdbState->pHandle)[ginitDict[i].idx]; \
rocksdb_t* db = pState->pTdbState->rocksdb; \
rocksdb_writeoptions_t* opts = pState->pTdbState->writeOpts; \
rocksdb_delete_cf(db, opts, pHandle, (const char*)buf, klen, &err); \
if (err != NULL) { \
qError("streamState str: %s failed to del from %s_%s, err: %s", toString, pState->pTdbState->idstr, funcname, \
err); \
taosMemoryFree(err); \
code = -1; \
} else { \
qTrace("streamState str: %s succ to del from %s_%s", toString, pState->pTdbState->idstr, funcname); \
} \
#define STREAM_STATE_DEL_ROCKSDB(pState, funcname, key) \
do { \
code = 0; \
char buf[128] = {0}; \
char* err = NULL; \
int i = streamStateGetCfIdx(pState, funcname); \
if (i < 0) { \
qWarn("streamState failed to get cf name: %s_%s", pState->pTdbState->idstr, funcname); \
code = -1; \
break; \
} \
SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper; \
char toString[128] = {0}; \
if (qDebugFlag & DEBUG_TRACE) ginitDict[i].toStrFunc((void*)key, toString); \
int32_t klen = ginitDict[i].enFunc((void*)key, buf); \
rocksdb_column_family_handle_t* pHandle = ((rocksdb_column_family_handle_t**)wrapper->pHandle)[ginitDict[i].idx]; \
rocksdb_t* db = wrapper->rocksdb; \
rocksdb_writeoptions_t* opts = wrapper->writeOpts; \
rocksdb_delete_cf(db, opts, pHandle, (const char*)buf, klen, &err); \
if (err != NULL) { \
qError("streamState str: %s failed to del from %s_%s, err: %s", toString, wrapper->idstr, funcname, err); \
taosMemoryFree(err); \
code = -1; \
} else { \
qTrace("streamState str: %s succ to del from %s_%s", toString, wrapper->idstr, funcname); \
} \
} while (0);
// state cf
@ -1158,18 +1205,19 @@ int32_t streamStateDel_rocksdb(SStreamState* pState, const SWinKey* key) {
int32_t streamStateClear_rocksdb(SStreamState* pState) {
qDebug("streamStateClear_rocksdb");
char sKeyStr[128] = {0};
char eKeyStr[128] = {0};
SStateKey sKey = {.key = {.ts = 0, .groupId = 0}, .opNum = pState->number};
SStateKey eKey = {.key = {.ts = INT64_MAX, .groupId = UINT64_MAX}, .opNum = pState->number};
SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper;
char sKeyStr[128] = {0};
char eKeyStr[128] = {0};
SStateKey sKey = {.key = {.ts = 0, .groupId = 0}, .opNum = pState->number};
SStateKey eKey = {.key = {.ts = INT64_MAX, .groupId = UINT64_MAX}, .opNum = pState->number};
int sLen = stateKeyEncode(&sKey, sKeyStr);
int eLen = stateKeyEncode(&eKey, eKeyStr);
if (pState->pTdbState->pHandle[1] != NULL) {
if (wrapper->pHandle[1] != NULL) {
char* err = NULL;
rocksdb_delete_range_cf(pState->pTdbState->rocksdb, pState->pTdbState->writeOpts, pState->pTdbState->pHandle[1],
sKeyStr, sLen, eKeyStr, eLen, &err);
rocksdb_delete_range_cf(wrapper->rocksdb, wrapper->writeOpts, wrapper->pHandle[1], sKeyStr, sLen, eKeyStr, eLen,
&err);
if (err != NULL) {
char toStringStart[128] = {0};
char toStringEnd[128] = {0};
@ -1179,7 +1227,7 @@ int32_t streamStateClear_rocksdb(SStreamState* pState) {
qWarn("failed to delete range cf(state) start: %s, end:%s, reason:%s", toStringStart, toStringEnd, err);
taosMemoryFree(err);
} else {
rocksdb_compact_range_cf(pState->pTdbState->rocksdb, pState->pTdbState->pHandle[1], sKeyStr, sLen, eKeyStr, eLen);
rocksdb_compact_range_cf(wrapper->rocksdb, wrapper->pHandle[1], sKeyStr, sLen, eKeyStr, eLen);
}
}
@ -1273,8 +1321,9 @@ SStreamStateCur* streamStateSeekKeyNext_rocksdb(SStreamState* pState, const SWin
if (pCur == NULL) {
return NULL;
}
SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper;
pCur->number = pState->number;
pCur->db = pState->pTdbState->rocksdb;
pCur->db = wrapper->rocksdb;
pCur->iter = streamStateIterCreate(pState, "state", (rocksdb_snapshot_t**)&pCur->snapshot,
(rocksdb_readoptions_t**)&pCur->readOpt);
@ -1307,15 +1356,16 @@ SStreamStateCur* streamStateSeekKeyNext_rocksdb(SStreamState* pState, const SWin
SStreamStateCur* streamStateSeekToLast_rocksdb(SStreamState* pState, const SWinKey* key) {
qDebug("streamStateGetCur_rocksdb");
int32_t code = 0;
int32_t code = 0;
SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper;
const SStateKey maxStateKey = {.key = {.groupId = UINT64_MAX, .ts = INT64_MAX}, .opNum = INT64_MAX};
STREAM_STATE_PUT_ROCKSDB(pState, "state", &maxStateKey, "", 0);
char buf[128] = {0};
int32_t klen = stateKeyEncode((void*)&maxStateKey, buf);
char buf[128] = {0};
int32_t klen = stateKeyEncode((void*)&maxStateKey, buf);
SStreamStateCur* pCur = taosMemoryCalloc(1, sizeof(SStreamStateCur));
if (pCur == NULL) return NULL;
pCur->db = pState->pTdbState->rocksdb;
pCur->db = wrapper->rocksdb;
pCur->iter = streamStateIterCreate(pState, "state", (rocksdb_snapshot_t**)&pCur->snapshot,
(rocksdb_readoptions_t**)&pCur->readOpt);
rocksdb_iter_seek(pCur->iter, buf, (size_t)klen);
@ -1335,10 +1385,11 @@ SStreamStateCur* streamStateSeekToLast_rocksdb(SStreamState* pState, const SWinK
SStreamStateCur* streamStateGetCur_rocksdb(SStreamState* pState, const SWinKey* key) {
qDebug("streamStateGetCur_rocksdb");
SStreamStateCur* pCur = taosMemoryCalloc(1, sizeof(SStreamStateCur));
SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper;
SStreamStateCur* pCur = taosMemoryCalloc(1, sizeof(SStreamStateCur));
if (pCur == NULL) return NULL;
pCur->db = pState->pTdbState->rocksdb;
pCur->db = wrapper->rocksdb;
pCur->iter = streamStateIterCreate(pState, "state", (rocksdb_snapshot_t**)&pCur->snapshot,
(rocksdb_readoptions_t**)&pCur->readOpt);
@ -1426,12 +1477,14 @@ int32_t streamStateSessionDel_rocksdb(SStreamState* pState, const SSessionKey* k
}
SStreamStateCur* streamStateSessionSeekKeyCurrentPrev_rocksdb(SStreamState* pState, const SSessionKey* key) {
qDebug("streamStateSessionSeekKeyCurrentPrev_rocksdb");
SStreamStateCur* pCur = taosMemoryCalloc(1, sizeof(SStreamStateCur));
SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper;
SStreamStateCur* pCur = taosMemoryCalloc(1, sizeof(SStreamStateCur));
if (pCur == NULL) {
return NULL;
}
pCur->number = pState->number;
pCur->db = pState->pTdbState->rocksdb;
pCur->db = wrapper->rocksdb;
pCur->iter = streamStateIterCreate(pState, "sess", (rocksdb_snapshot_t**)&pCur->snapshot,
(rocksdb_readoptions_t**)&pCur->readOpt);
@ -1467,11 +1520,12 @@ SStreamStateCur* streamStateSessionSeekKeyCurrentPrev_rocksdb(SStreamState* pSta
}
SStreamStateCur* streamStateSessionSeekKeyCurrentNext_rocksdb(SStreamState* pState, SSessionKey* key) {
qDebug("streamStateSessionSeekKeyCurrentNext_rocksdb");
SStreamStateCur* pCur = taosMemoryCalloc(1, sizeof(SStreamStateCur));
SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper;
SStreamStateCur* pCur = taosMemoryCalloc(1, sizeof(SStreamStateCur));
if (pCur == NULL) {
return NULL;
}
pCur->db = pState->pTdbState->rocksdb;
pCur->db = wrapper->rocksdb;
pCur->iter = streamStateIterCreate(pState, "sess", (rocksdb_snapshot_t**)&pCur->snapshot,
(rocksdb_readoptions_t**)&pCur->readOpt);
pCur->number = pState->number;
@ -1504,11 +1558,12 @@ SStreamStateCur* streamStateSessionSeekKeyCurrentNext_rocksdb(SStreamState* pSta
SStreamStateCur* streamStateSessionSeekKeyNext_rocksdb(SStreamState* pState, const SSessionKey* key) {
qDebug("streamStateSessionSeekKeyNext_rocksdb");
SStreamStateCur* pCur = taosMemoryCalloc(1, sizeof(SStreamStateCur));
SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper;
SStreamStateCur* pCur = taosMemoryCalloc(1, sizeof(SStreamStateCur));
if (pCur == NULL) {
return NULL;
}
pCur->db = pState->pTdbState->rocksdb;
pCur->db = wrapper->rocksdb;
pCur->iter = streamStateIterCreate(pState, "sess", (rocksdb_snapshot_t**)&pCur->snapshot,
(rocksdb_readoptions_t**)&pCur->readOpt);
pCur->number = pState->number;
@ -1597,11 +1652,12 @@ int32_t streamStateFillDel_rocksdb(SStreamState* pState, const SWinKey* key) {
SStreamStateCur* streamStateFillGetCur_rocksdb(SStreamState* pState, const SWinKey* key) {
qDebug("streamStateFillGetCur_rocksdb");
SStreamStateCur* pCur = taosMemoryCalloc(1, sizeof(SStreamStateCur));
SStreamStateCur* pCur = taosMemoryCalloc(1, sizeof(SStreamStateCur));
SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper;
if (pCur == NULL) return NULL;
pCur->db = pState->pTdbState->rocksdb;
pCur->db = wrapper->rocksdb;
pCur->iter = streamStateIterCreate(pState, "fill", (rocksdb_snapshot_t**)&pCur->snapshot,
(rocksdb_readoptions_t**)&pCur->readOpt);
@ -1656,12 +1712,13 @@ int32_t streamStateFillGetKVByCur_rocksdb(SStreamStateCur* pCur, SWinKey* pKey,
SStreamStateCur* streamStateFillSeekKeyNext_rocksdb(SStreamState* pState, const SWinKey* key) {
qDebug("streamStateFillSeekKeyNext_rocksdb");
SStreamStateCur* pCur = taosMemoryCalloc(1, sizeof(SStreamStateCur));
SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper;
SStreamStateCur* pCur = taosMemoryCalloc(1, sizeof(SStreamStateCur));
if (!pCur) {
return NULL;
}
pCur->db = pState->pTdbState->rocksdb;
pCur->db = wrapper->rocksdb;
pCur->iter = streamStateIterCreate(pState, "fill", (rocksdb_snapshot_t**)&pCur->snapshot,
(rocksdb_readoptions_t**)&pCur->readOpt);
@ -1692,12 +1749,13 @@ SStreamStateCur* streamStateFillSeekKeyNext_rocksdb(SStreamState* pState, const
}
SStreamStateCur* streamStateFillSeekKeyPrev_rocksdb(SStreamState* pState, const SWinKey* key) {
qDebug("streamStateFillSeekKeyPrev_rocksdb");
SStreamStateCur* pCur = taosMemoryCalloc(1, sizeof(SStreamStateCur));
SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper;
SStreamStateCur* pCur = taosMemoryCalloc(1, sizeof(SStreamStateCur));
if (pCur == NULL) {
return NULL;
}
pCur->db = pState->pTdbState->rocksdb;
pCur->db = wrapper->rocksdb;
pCur->iter = streamStateIterCreate(pState, "fill", (rocksdb_snapshot_t**)&pCur->snapshot,
(rocksdb_readoptions_t**)&pCur->readOpt);
@ -1728,12 +1786,13 @@ SStreamStateCur* streamStateFillSeekKeyPrev_rocksdb(SStreamState* pState, const
}
int32_t streamStateSessionGetKeyByRange_rocksdb(SStreamState* pState, const SSessionKey* key, SSessionKey* curKey) {
qDebug("streamStateSessionGetKeyByRange_rocksdb");
SStreamStateCur* pCur = taosMemoryCalloc(1, sizeof(SStreamStateCur));
SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper;
SStreamStateCur* pCur = taosMemoryCalloc(1, sizeof(SStreamStateCur));
if (pCur == NULL) {
return -1;
}
pCur->number = pState->number;
pCur->db = pState->pTdbState->rocksdb;
pCur->db = wrapper->rocksdb;
pCur->iter = streamStateIterCreate(pState, "sess", (rocksdb_snapshot_t**)&pCur->snapshot,
(rocksdb_readoptions_t**)&pCur->readOpt);
@ -1964,6 +2023,7 @@ int32_t streamDefaultIterGet_rocksdb(SStreamState* pState, const void* start, co
int code = 0;
char* err = NULL;
SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper;
rocksdb_snapshot_t* snapshot = NULL;
rocksdb_readoptions_t* readopts = NULL;
rocksdb_iterator_t* pIter = streamStateIterCreate(pState, "default", &snapshot, &readopts);
@ -1996,15 +2056,16 @@ int32_t streamDefaultIterGet_rocksdb(SStreamState* pState, const void* start, co
}
rocksdb_iter_next(pIter);
}
rocksdb_release_snapshot(pState->pTdbState->rocksdb, snapshot);
rocksdb_release_snapshot(wrapper->rocksdb, snapshot);
rocksdb_readoptions_destroy(readopts);
rocksdb_iter_destroy(pIter);
return code;
}
void* streamDefaultIterCreate_rocksdb(SStreamState* pState) {
SStreamStateCur* pCur = taosMemoryCalloc(1, sizeof(SStreamStateCur));
SStreamStateCur* pCur = taosMemoryCalloc(1, sizeof(SStreamStateCur));
SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper;
pCur->db = pState->pTdbState->rocksdb;
pCur->db = wrapper->rocksdb;
pCur->iter = streamStateIterCreate(pState, "default", (rocksdb_snapshot_t**)&pCur->snapshot,
(rocksdb_readoptions_t**)&pCur->readOpt);
return pCur;
@ -2051,7 +2112,8 @@ void streamStateClearBatch(void* pBatch) { rocksdb_writebatch_clear((rocksdb_
void streamStateDestroyBatch(void* pBatch) { rocksdb_writebatch_destroy((rocksdb_writebatch_t*)pBatch); }
int32_t streamStatePutBatch(SStreamState* pState, const char* cfName, rocksdb_writebatch_t* pBatch, void* key,
void* val, int32_t vlen, int64_t ttl) {
int i = streamStateGetCfIdx(pState, cfName);
SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper;
int i = streamStateGetCfIdx(pState, cfName);
if (i < 0) {
qError("streamState failed to put to cf name:%s", cfName);
@ -2062,7 +2124,7 @@ int32_t streamStatePutBatch(SStreamState* pState, const char* cfName, rocksdb_wr
char* ttlV = NULL;
int32_t ttlVLen = ginitDict[i].enValueFunc(val, vlen, ttl, &ttlV);
rocksdb_column_family_handle_t* pCf = pState->pTdbState->pHandle[ginitDict[i].idx];
rocksdb_column_family_handle_t* pCf = wrapper->pHandle[ginitDict[i].idx];
rocksdb_writebatch_put_cf((rocksdb_writebatch_t*)pBatch, pCf, buf, (size_t)klen, ttlV, (size_t)ttlVLen);
taosMemoryFree(ttlV);
return 0;
@ -2074,7 +2136,9 @@ int32_t streamStatePutBatchOptimize(SStreamState* pState, int32_t cfIdx, rocksdb
char* ttlV = tmpBuf;
int32_t ttlVLen = ginitDict[cfIdx].enValueFunc(val, vlen, ttl, &ttlV);
rocksdb_column_family_handle_t* pCf = pState->pTdbState->pHandle[ginitDict[cfIdx].idx];
SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper;
rocksdb_column_family_handle_t* pCf = wrapper->pHandle[ginitDict[cfIdx].idx];
rocksdb_writebatch_put_cf((rocksdb_writebatch_t*)pBatch, pCf, buf, (size_t)klen, ttlV, (size_t)ttlVLen);
if (tmpBuf == NULL) {
@ -2083,8 +2147,9 @@ int32_t streamStatePutBatchOptimize(SStreamState* pState, int32_t cfIdx, rocksdb
return 0;
}
int32_t streamStatePutBatch_rocksdb(SStreamState* pState, void* pBatch) {
char* err = NULL;
rocksdb_write(pState->pTdbState->rocksdb, pState->pTdbState->writeOpts, (rocksdb_writebatch_t*)pBatch, &err);
char* err = NULL;
SBackendCfWrapper* wrapper = pState->pTdbState->pBackendCfWrapper;
rocksdb_write(wrapper->rocksdb, wrapper->writeOpts, (rocksdb_writebatch_t*)pBatch, &err);
if (err != NULL) {
qError("streamState failed to write batch, err:%s", err);
taosMemoryFree(err);
@ -2092,3 +2157,13 @@ int32_t streamStatePutBatch_rocksdb(SStreamState* pState, void* pBatch) {
}
return 0;
}
uint32_t nextPow2(uint32_t x) {
x = x - 1;
x = x | (x >> 1);
x = x | (x >> 2);
x = x | (x >> 4);
x = x | (x >> 8);
x = x | (x >> 16);
return x + 1;
}

View File

@ -123,7 +123,7 @@ int32_t tDecodeSStreamCheckpointRsp(SDecoder* pDecoder, SStreamCheckpointRsp* pR
static int32_t streamAlignCheckpoint(SStreamTask* pTask, int64_t checkpointId, int32_t childId) {
if (pTask->checkpointingId == 0) {
pTask->checkpointingId = checkpointId;
pTask->checkpointAlignCnt = taosArrayGetSize(pTask->childEpInfo);
pTask->checkpointAlignCnt = taosArrayGetSize(pTask->pUpstreamEpInfoList);
}
ASSERT(pTask->checkpointingId == checkpointId);
@ -165,7 +165,7 @@ int32_t streamProcessCheckpointReq(SStreamMeta* pMeta, SStreamTask* pTask, SStre
int64_t checkpointId = pReq->checkpointId;
int32_t childId = pReq->childId;
if (taosArrayGetSize(pTask->childEpInfo) > 0) {
if (taosArrayGetSize(pTask->pUpstreamEpInfoList) > 0) {
code = streamAlignCheckpoint(pTask, checkpointId, childId);
if (code > 0) {
return 0;

View File

@ -64,11 +64,11 @@ SStreamDataBlock* createStreamBlockFromResults(SStreamQueueItem* pItem, SStreamT
if (pItem->type == STREAM_INPUT__DATA_SUBMIT) {
SStreamDataSubmit* pSubmit = (SStreamDataSubmit*)pItem;
pStreamBlocks->childId = pTask->selfChildId;
pStreamBlocks->childId = pTask->info.selfChildId;
pStreamBlocks->sourceVer = pSubmit->ver;
} else if (pItem->type == STREAM_INPUT__MERGED_SUBMIT) {
SStreamMergedSubmit* pMerged = (SStreamMergedSubmit*)pItem;
pStreamBlocks->childId = pTask->selfChildId;
pStreamBlocks->childId = pTask->info.selfChildId;
pStreamBlocks->sourceVer = pMerged->ver;
}
@ -164,26 +164,6 @@ int32_t streamMergeSubmit(SStreamMergedSubmit* pMerged, SStreamDataSubmit* pSubm
return 0;
}
static FORCE_INLINE void streamDataSubmitRefInc(SStreamDataSubmit* pDataSubmit) {
atomic_add_fetch_32(pDataSubmit->dataRef, 1);
}
SStreamDataSubmit* streamSubmitBlockClone(SStreamDataSubmit* pSubmit) {
int32_t len = 0;
if (pSubmit->type == STREAM_INPUT__DATA_SUBMIT) {
len = pSubmit->submit.msgLen;
}
SStreamDataSubmit* pSubmitClone = taosAllocateQitem(sizeof(SStreamDataSubmit), DEF_QITEM, len);
if (pSubmitClone == NULL) {
return NULL;
}
streamDataSubmitRefInc(pSubmit);
memcpy(pSubmitClone, pSubmit, sizeof(SStreamDataSubmit));
return pSubmitClone;
}
SStreamQueueItem* streamMergeQueueItem(SStreamQueueItem* dst, SStreamQueueItem* pElem) {
if (dst->type == STREAM_INPUT__DATA_BLOCK && pElem->type == STREAM_INPUT__DATA_BLOCK) {
SStreamDataBlock* pBlock = (SStreamDataBlock*)dst;

View File

@ -13,16 +13,19 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "ttimer.h"
#include "streamInc.h"
#define MAX_BLOCK_NAME_NUM 1024
#define MAX_BLOCK_NAME_NUM 1024
#define DISPATCH_RETRY_INTERVAL_MS 300
#define MAX_CONTINUE_RETRY_COUNT 5
typedef struct SBlockName {
uint32_t hashValue;
char parTbName[TSDB_TABLE_NAME_LEN];
} SBlockName;
int32_t tEncodeStreamDispatchReq(SEncoder* pEncoder, const SStreamDispatchReq* pReq) {
static int32_t tEncodeStreamDispatchReq(SEncoder* pEncoder, const SStreamDispatchReq* pReq) {
if (tStartEncode(pEncoder) < 0) return -1;
if (tEncodeI64(pEncoder, pReq->streamId) < 0) return -1;
if (tEncodeI32(pEncoder, pReq->taskId) < 0) return -1;
@ -44,6 +47,37 @@ int32_t tEncodeStreamDispatchReq(SEncoder* pEncoder, const SStreamDispatchReq* p
return pEncoder->pos;
}
static int32_t streamAddBlockIntoDispatchMsg(const SSDataBlock* pBlock, SStreamDispatchReq* pReq) {
int32_t dataStrLen = sizeof(SRetrieveTableRsp) + blockGetEncodeSize(pBlock);
void* buf = taosMemoryCalloc(1, dataStrLen);
if (buf == NULL) return -1;
SRetrieveTableRsp* pRetrieve = (SRetrieveTableRsp*)buf;
pRetrieve->useconds = 0;
pRetrieve->precision = TSDB_DEFAULT_PRECISION;
pRetrieve->compressed = 0;
pRetrieve->completed = 1;
pRetrieve->streamBlockType = pBlock->info.type;
pRetrieve->numOfRows = htobe64((int64_t)pBlock->info.rows);
pRetrieve->skey = htobe64(pBlock->info.window.skey);
pRetrieve->ekey = htobe64(pBlock->info.window.ekey);
pRetrieve->version = htobe64(pBlock->info.version);
pRetrieve->watermark = htobe64(pBlock->info.watermark);
memcpy(pRetrieve->parTbName, pBlock->info.parTbName, TSDB_TABLE_NAME_LEN);
int32_t numOfCols = (int32_t)taosArrayGetSize(pBlock->pDataBlock);
pRetrieve->numOfCols = htonl(numOfCols);
int32_t actualLen = blockEncode(pBlock, pRetrieve->data, numOfCols);
actualLen += sizeof(SRetrieveTableRsp);
ASSERT(actualLen <= dataStrLen);
taosArrayPush(pReq->dataLen, &actualLen);
taosArrayPush(pReq->data, &buf);
pReq->totalLen += dataStrLen;
return 0;
}
int32_t tDecodeStreamDispatchReq(SDecoder* pDecoder, SStreamDispatchReq* pReq) {
if (tStartDecode(pDecoder) < 0) return -1;
if (tDecodeI64(pDecoder, &pReq->streamId) < 0) return -1;
@ -72,6 +106,27 @@ int32_t tDecodeStreamDispatchReq(SDecoder* pDecoder, SStreamDispatchReq* pReq) {
return 0;
}
int32_t tInitStreamDispatchReq(SStreamDispatchReq* pReq, const SStreamTask* pTask, int32_t vgId, int32_t numOfBlocks,
int64_t dstTaskId) {
pReq->streamId = pTask->id.streamId;
pReq->dataSrcVgId = vgId;
pReq->upstreamTaskId = pTask->id.taskId;
pReq->upstreamChildId = pTask->info.selfChildId;
pReq->upstreamNodeId = pTask->info.nodeId;
pReq->blockNum = numOfBlocks;
pReq->taskId = dstTaskId;
pReq->data = taosArrayInit(numOfBlocks, POINTER_BYTES);
pReq->dataLen = taosArrayInit(numOfBlocks, sizeof(int32_t));
if (pReq->data == NULL || pReq->dataLen == NULL) {
taosArrayDestroyP(pReq->data, taosMemoryFree);
taosArrayDestroy(pReq->dataLen);
return TSDB_CODE_OUT_OF_MEMORY;
}
return TSDB_CODE_SUCCESS;
}
void tDeleteStreamDispatchReq(SStreamDispatchReq* pReq) {
taosArrayDestroyP(pReq->data, taosMemoryFree);
taosArrayDestroy(pReq->dataLen);
@ -132,17 +187,17 @@ int32_t streamBroadcastToChildren(SStreamTask* pTask, const SSDataBlock* pBlock)
SStreamRetrieveReq req = {
.streamId = pTask->id.streamId,
.srcNodeId = pTask->nodeId,
.srcNodeId = pTask->info.nodeId,
.srcTaskId = pTask->id.taskId,
.pRetrieve = pRetrieve,
.retrieveLen = dataStrLen,
};
int32_t sz = taosArrayGetSize(pTask->childEpInfo);
int32_t sz = taosArrayGetSize(pTask->pUpstreamEpInfoList);
ASSERT(sz > 0);
for (int32_t i = 0; i < sz; i++) {
req.reqId = tGenIdPI64();
SStreamChildEpInfo* pEpInfo = taosArrayGetP(pTask->childEpInfo, i);
SStreamChildEpInfo* pEpInfo = taosArrayGetP(pTask->pUpstreamEpInfoList, i);
req.dstNodeId = pEpInfo->nodeId;
req.dstTaskId = pEpInfo->taskId;
int32_t len;
@ -171,8 +226,8 @@ int32_t streamBroadcastToChildren(SStreamTask* pTask, const SSDataBlock* pBlock)
}
buf = NULL;
qDebug("s-task:%s (child %d) send retrieve req to task %d at node %d, reqId:0x%" PRIx64, pTask->id.idStr,
pTask->selfChildId, pEpInfo->taskId, pEpInfo->nodeId, req.reqId);
qDebug("s-task:%s (child %d) send retrieve req to task:0x%x (vgId:%d), reqId:0x%" PRIx64, pTask->id.idStr,
pTask->info.selfChildId, pEpInfo->taskId, pEpInfo->nodeId, req.reqId);
}
code = 0;
@ -182,44 +237,13 @@ CLEAR:
return code;
}
static int32_t streamAddBlockIntoDispatchMsg(const SSDataBlock* pBlock, SStreamDispatchReq* pReq) {
int32_t dataStrLen = sizeof(SRetrieveTableRsp) + blockGetEncodeSize(pBlock);
void* buf = taosMemoryCalloc(1, dataStrLen);
if (buf == NULL) return -1;
SRetrieveTableRsp* pRetrieve = (SRetrieveTableRsp*)buf;
pRetrieve->useconds = 0;
pRetrieve->precision = TSDB_DEFAULT_PRECISION;
pRetrieve->compressed = 0;
pRetrieve->completed = 1;
pRetrieve->streamBlockType = pBlock->info.type;
pRetrieve->numOfRows = htobe64((int64_t)pBlock->info.rows);
pRetrieve->skey = htobe64(pBlock->info.window.skey);
pRetrieve->ekey = htobe64(pBlock->info.window.ekey);
pRetrieve->version = htobe64(pBlock->info.version);
pRetrieve->watermark = htobe64(pBlock->info.watermark);
memcpy(pRetrieve->parTbName, pBlock->info.parTbName, TSDB_TABLE_NAME_LEN);
int32_t numOfCols = (int32_t)taosArrayGetSize(pBlock->pDataBlock);
pRetrieve->numOfCols = htonl(numOfCols);
int32_t actualLen = blockEncode(pBlock, pRetrieve->data, numOfCols);
actualLen += sizeof(SRetrieveTableRsp);
ASSERT(actualLen <= dataStrLen);
taosArrayPush(pReq->dataLen, &actualLen);
taosArrayPush(pReq->data, &buf);
pReq->totalLen += dataStrLen;
return 0;
}
int32_t streamDispatchCheckMsg(SStreamTask* pTask, const SStreamTaskCheckReq* pReq, int32_t nodeId, SEpSet* pEpSet) {
void* buf = NULL;
int32_t code = -1;
SRpcMsg msg = {0};
int32_t tlen;
tEncodeSize(tEncodeSStreamTaskCheckReq, pReq, tlen, code);
tEncodeSize(tEncodeStreamTaskCheckReq, pReq, tlen, code);
if (code < 0) {
return -1;
}
@ -234,7 +258,7 @@ int32_t streamDispatchCheckMsg(SStreamTask* pTask, const SStreamTaskCheckReq* pR
SEncoder encoder;
tEncoderInit(&encoder, abuf, tlen);
if ((code = tEncodeSStreamTaskCheckReq(&encoder, pReq)) < 0) {
if ((code = tEncodeStreamTaskCheckReq(&encoder, pReq)) < 0) {
rpcFreeCont(buf);
return code;
}
@ -245,21 +269,21 @@ int32_t streamDispatchCheckMsg(SStreamTask* pTask, const SStreamTaskCheckReq* pR
msg.pCont = buf;
msg.msgType = TDMT_STREAM_TASK_CHECK;
qDebug("s-task:%s dispatch check msg to downstream s-task:%" PRIx64 ":%d node %d: check msg", pTask->id.idStr,
pReq->streamId, pReq->downstreamTaskId, nodeId);
qDebug("s-task:%s (level:%d) dispatch check msg to s-task:%" PRIx64 ":0x%x (vgId:%d)", pTask->id.idStr,
pTask->info.taskLevel, pReq->streamId, pReq->downstreamTaskId, nodeId);
tmsgSendReq(pEpSet, &msg);
return 0;
}
int32_t streamDispatchOneRecoverFinishReq(SStreamTask* pTask, const SStreamRecoverFinishReq* pReq, int32_t vgId,
SEpSet* pEpSet) {
int32_t streamDoDispatchScanHistoryFinishMsg(SStreamTask* pTask, const SStreamRecoverFinishReq* pReq, int32_t vgId,
SEpSet* pEpSet) {
void* buf = NULL;
int32_t code = -1;
SRpcMsg msg = {0};
int32_t tlen;
tEncodeSize(tEncodeSStreamRecoverFinishReq, pReq, tlen, code);
tEncodeSize(tEncodeStreamRecoverFinishReq, pReq, tlen, code);
if (code < 0) {
return -1;
}
@ -275,7 +299,7 @@ int32_t streamDispatchOneRecoverFinishReq(SStreamTask* pTask, const SStreamRecov
SEncoder encoder;
tEncoderInit(&encoder, abuf, tlen);
if ((code = tEncodeSStreamRecoverFinishReq(&encoder, pReq)) < 0) {
if ((code = tEncodeStreamRecoverFinishReq(&encoder, pReq)) < 0) {
if (buf) {
rpcFreeCont(buf);
}
@ -286,17 +310,18 @@ int32_t streamDispatchOneRecoverFinishReq(SStreamTask* pTask, const SStreamRecov
msg.contLen = tlen + sizeof(SMsgHead);
msg.pCont = buf;
msg.msgType = TDMT_STREAM_RECOVER_FINISH;
msg.msgType = TDMT_STREAM_SCAN_HISTORY_FINISH;
msg.info.noResp = 1;
tmsgSendReq(pEpSet, &msg);
qDebug("s-task:%s dispatch recover finish msg to downstream taskId:0x%x node %d: recover finish msg", pTask->id.idStr,
pReq->taskId, vgId);
const char* pStatus = streamGetTaskStatusStr(pTask->status.taskStatus);
qDebug("s-task:%s status:%s dispatch scan-history-data finish msg to taskId:0x%x (vgId:%d)", pTask->id.idStr, pStatus,
pReq->taskId, vgId);
return 0;
}
int32_t doSendDispatchMsg(SStreamTask* pTask, const SStreamDispatchReq* pReq, int32_t vgId, SEpSet* pEpSet) {
static int32_t doSendDispatchMsg(SStreamTask* pTask, const SStreamDispatchReq* pReq, int32_t vgId, SEpSet* pEpSet) {
void* buf = NULL;
int32_t code = -1;
SRpcMsg msg = {0};
@ -304,7 +329,10 @@ int32_t doSendDispatchMsg(SStreamTask* pTask, const SStreamDispatchReq* pReq, in
// serialize
int32_t tlen;
tEncodeSize(tEncodeStreamDispatchReq, pReq, tlen, code);
if (code < 0) goto FAIL;
if (code < 0) {
goto FAIL;
}
code = -1;
buf = rpcMallocCont(sizeof(SMsgHead) + tlen);
if (buf == NULL) {
@ -323,16 +351,16 @@ int32_t doSendDispatchMsg(SStreamTask* pTask, const SStreamDispatchReq* pReq, in
msg.contLen = tlen + sizeof(SMsgHead);
msg.pCont = buf;
msg.msgType = pTask->dispatchMsgType;
msg.msgType = pTask->msgInfo.msgType;
qDebug("s-task:%s dispatch msg to taskId:0x%x vgId:%d data msg", pTask->id.idStr, pReq->taskId, vgId);
tmsgSendReq(pEpSet, &msg);
code = 0;
return 0;
return tmsgSendReq(pEpSet, &msg);
FAIL:
if (buf) rpcFreeCont(buf);
if (buf) {
rpcFreeCont(buf);
}
return code;
}
@ -365,8 +393,6 @@ int32_t streamSearchAndAddBlock(SStreamTask* pTask, SStreamDispatchReq* pReqs, S
snprintf(ctbName, TSDB_TABLE_NAME_LEN, "%s.%s", pTask->shuffleDispatcher.dbInfo.db, pDataBlock->info.parTbName);
}
SArray* vgInfo = pTask->shuffleDispatcher.dbInfo.pVgroupInfos;
/*uint32_t hashValue = MurmurHash3_32(ctbName, strlen(ctbName));*/
SUseDbRsp* pDbInfo = &pTask->shuffleDispatcher.dbInfo;
hashValue =
@ -386,13 +412,16 @@ int32_t streamSearchAndAddBlock(SStreamTask* pTask, SStreamDispatchReq* pReqs, S
for (j = 0; j < vgSz; j++) {
SVgroupInfo* pVgInfo = taosArrayGet(vgInfo, j);
ASSERT(pVgInfo->vgId > 0);
if (hashValue >= pVgInfo->hashBegin && hashValue <= pVgInfo->hashEnd) {
if (streamAddBlockIntoDispatchMsg(pDataBlock, &pReqs[j]) < 0) {
return -1;
}
if (pReqs[j].blockNum == 0) {
atomic_add_fetch_32(&pTask->shuffleDispatcher.waitingRspCnt, 1);
}
pReqs[j].blockNum++;
found = true;
break;
@ -404,25 +433,17 @@ int32_t streamSearchAndAddBlock(SStreamTask* pTask, SStreamDispatchReq* pReqs, S
int32_t streamDispatchAllBlocks(SStreamTask* pTask, const SStreamDataBlock* pData) {
int32_t code = 0;
int32_t numOfBlocks = taosArrayGetSize(pData->blocks);
ASSERT(numOfBlocks != 0);
if (pTask->outputType == TASK_OUTPUT__FIXED_DISPATCH) {
SStreamDispatchReq req = {
.streamId = pTask->id.streamId,
.dataSrcVgId = pData->srcVgId,
.upstreamTaskId = pTask->id.taskId,
.upstreamChildId = pTask->selfChildId,
.upstreamNodeId = pTask->nodeId,
.blockNum = numOfBlocks,
};
SStreamDispatchReq req = {0};
req.data = taosArrayInit(numOfBlocks, sizeof(void*));
req.dataLen = taosArrayInit(numOfBlocks, sizeof(int32_t));
if (req.data == NULL || req.dataLen == NULL) {
taosArrayDestroyP(req.data, taosMemoryFree);
taosArrayDestroy(req.dataLen);
return TSDB_CODE_OUT_OF_MEMORY;
int32_t downstreamTaskId = pTask->fixedEpDispatcher.taskId;
code = tInitStreamDispatchReq(&req, pTask, pData->srcVgId, numOfBlocks, downstreamTaskId);
if (code != TSDB_CODE_SUCCESS) {
return code;
}
for (int32_t i = 0; i < numOfBlocks; i++) {
@ -438,12 +459,9 @@ int32_t streamDispatchAllBlocks(SStreamTask* pTask, const SStreamDataBlock* pDat
int32_t vgId = pTask->fixedEpDispatcher.nodeId;
SEpSet* pEpSet = &pTask->fixedEpDispatcher.epSet;
int32_t downstreamTaskId = pTask->fixedEpDispatcher.taskId;
req.taskId = downstreamTaskId;
qDebug("s-task:%s (child taskId:%d) fix-dispatch %d block(s) to down stream s-task:0x%x in vgId:%d", pTask->id.idStr,
pTask->selfChildId, numOfBlocks, downstreamTaskId, vgId);
qDebug("s-task:%s (child taskId:%d) fix-dispatch %d block(s) to s-task:0x%x (vgId:%d)", pTask->id.idStr,
pTask->info.selfChildId, numOfBlocks, downstreamTaskId, vgId);
code = doSendDispatchMsg(pTask, &req, vgId, pEpSet);
taosArrayDestroyP(req.data, taosMemoryFree);
@ -453,8 +471,9 @@ int32_t streamDispatchAllBlocks(SStreamTask* pTask, const SStreamDataBlock* pDat
int32_t rspCnt = atomic_load_32(&pTask->shuffleDispatcher.waitingRspCnt);
ASSERT(rspCnt == 0);
SArray* vgInfo = pTask->shuffleDispatcher.dbInfo.pVgroupInfos;
int32_t vgSz = taosArrayGetSize(vgInfo);
SArray* vgInfo = pTask->shuffleDispatcher.dbInfo.pVgroupInfos;
int32_t vgSz = taosArrayGetSize(vgInfo);
SStreamDispatchReq* pReqs = taosMemoryCalloc(vgSz, sizeof(SStreamDispatchReq));
if (pReqs == NULL) {
terrno = TSDB_CODE_OUT_OF_MEMORY;
@ -462,20 +481,11 @@ int32_t streamDispatchAllBlocks(SStreamTask* pTask, const SStreamDataBlock* pDat
}
for (int32_t i = 0; i < vgSz; i++) {
pReqs[i].streamId = pTask->id.streamId;
pReqs[i].dataSrcVgId = pData->srcVgId;
pReqs[i].upstreamTaskId = pTask->id.taskId;
pReqs[i].upstreamChildId = pTask->selfChildId;
pReqs[i].upstreamNodeId = pTask->nodeId;
pReqs[i].blockNum = 0;
pReqs[i].data = taosArrayInit(0, sizeof(void*));
pReqs[i].dataLen = taosArrayInit(0, sizeof(int32_t));
if (pReqs[i].data == NULL || pReqs[i].dataLen == NULL) {
SVgroupInfo* pVgInfo = taosArrayGet(vgInfo, i);
code = tInitStreamDispatchReq(&pReqs[i], pTask, pData->srcVgId, 0, pVgInfo->taskId);
if (code != TSDB_CODE_SUCCESS) {
goto FAIL_SHUFFLE_DISPATCH;
}
SVgroupInfo* pVgInfo = taosArrayGet(vgInfo, i);
pReqs[i].taskId = pVgInfo->taskId;
}
for (int32_t i = 0; i < numOfBlocks; i++) {
@ -483,15 +493,18 @@ int32_t streamDispatchAllBlocks(SStreamTask* pTask, const SStreamDataBlock* pDat
// TODO: do not use broadcast
if (pDataBlock->info.type == STREAM_DELETE_RESULT) {
for (int32_t j = 0; j < vgSz; j++) {
if (streamAddBlockIntoDispatchMsg(pDataBlock, &pReqs[j]) < 0) {
goto FAIL_SHUFFLE_DISPATCH;
}
if (pReqs[j].blockNum == 0) {
atomic_add_fetch_32(&pTask->shuffleDispatcher.waitingRspCnt, 1);
}
pReqs[j].blockNum++;
}
continue;
}
@ -500,16 +513,17 @@ int32_t streamDispatchAllBlocks(SStreamTask* pTask, const SStreamDataBlock* pDat
}
}
qDebug("s-task:%s (child taskId:%d) shuffle-dispatch blocks:%d to %d vgroups", pTask->id.idStr, pTask->selfChildId,
qDebug("s-task:%s (child taskId:%d) shuffle-dispatch blocks:%d to %d vgroups", pTask->id.idStr, pTask->info.selfChildId,
numOfBlocks, vgSz);
for (int32_t i = 0; i < vgSz; i++) {
if (pReqs[i].blockNum > 0) {
SVgroupInfo* pVgInfo = taosArrayGet(vgInfo, i);
qDebug("s-task:%s (child taskId:%d) shuffle-dispatch blocks:%d to vgId:%d", pTask->id.idStr, pTask->selfChildId,
qDebug("s-task:%s (child taskId:%d) shuffle-dispatch blocks:%d to vgId:%d", pTask->id.idStr, pTask->info.selfChildId,
pReqs[i].blockNum, pVgInfo->vgId);
if (doSendDispatchMsg(pTask, &pReqs[i], pVgInfo->vgId, &pVgInfo->epSet) < 0) {
code = doSendDispatchMsg(pTask, &pReqs[i], pVgInfo->vgId, &pVgInfo->epSet);
if (code < 0) {
goto FAIL_SHUFFLE_DISPATCH;
}
}
@ -522,46 +536,87 @@ int32_t streamDispatchAllBlocks(SStreamTask* pTask, const SStreamDataBlock* pDat
taosArrayDestroyP(pReqs[i].data, taosMemoryFree);
taosArrayDestroy(pReqs[i].dataLen);
}
taosMemoryFree(pReqs);
}
return code;
}
static void doRetryDispatchData(void* param, void* tmrId) {
SStreamTask* pTask = param;
ASSERT(pTask->outputStatus == TASK_OUTPUT_STATUS__WAIT);
int32_t code = streamDispatchAllBlocks(pTask, pTask->msgInfo.pData);
if (code != TSDB_CODE_SUCCESS) {
qDebug("s-task:%s reset the waitRspCnt to be 0 before launch retry dispatch", pTask->id.idStr);
atomic_store_32(&pTask->shuffleDispatcher.waitingRspCnt, 0);
streamRetryDispatchStreamBlock(pTask, DISPATCH_RETRY_INTERVAL_MS);
}
}
void streamRetryDispatchStreamBlock(SStreamTask* pTask, int64_t waitDuration) {
qError("s-task:%s dispatch data in %"PRId64"ms", pTask->id.idStr, waitDuration);
taosTmrReset(doRetryDispatchData, waitDuration, pTask, streamEnv.timer, &pTask->launchTaskTimer);
}
int32_t streamDispatchStreamBlock(SStreamTask* pTask) {
ASSERT(pTask->outputType == TASK_OUTPUT__FIXED_DISPATCH || pTask->outputType == TASK_OUTPUT__SHUFFLE_DISPATCH);
ASSERT((pTask->outputType == TASK_OUTPUT__FIXED_DISPATCH || pTask->outputType == TASK_OUTPUT__SHUFFLE_DISPATCH));
int32_t numOfElems = taosQueueItemSize(pTask->outputQueue->queue);
if (numOfElems > 0) {
qDebug("s-task:%s try to dispatch intermediate result block to downstream, elem in outputQ:%d", pTask->id.idStr,
numOfElems);
}
// to make sure only one dispatch is running
int8_t old =
atomic_val_compare_exchange_8(&pTask->outputStatus, TASK_OUTPUT_STATUS__NORMAL, TASK_OUTPUT_STATUS__WAIT);
if (old != TASK_OUTPUT_STATUS__NORMAL) {
qDebug("s-task:%s task wait for dispatch rsp, not dispatch now, output status:%d", pTask->id.idStr, old);
qDebug("s-task:%s wait for dispatch rsp, not dispatch now, output status:%d", pTask->id.idStr, old);
return 0;
}
ASSERT(pTask->msgInfo.pData == NULL);
qDebug("s-task:%s start to dispatch msg, set output status:%d", pTask->id.idStr, pTask->outputStatus);
SStreamDataBlock* pDispatchedBlock = streamQueueNextItem(pTask->outputQueue);
if (pDispatchedBlock == NULL) {
SStreamDataBlock* pBlock = streamQueueNextItem(pTask->outputQueue);
if (pBlock == NULL) {
atomic_store_8(&pTask->outputStatus, TASK_OUTPUT_STATUS__NORMAL);
qDebug("s-task:%s stop dispatching since no output in output queue, output status:%d", pTask->id.idStr,
pTask->outputStatus);
qDebug("s-task:%s not dispatch since no elems in outputQ, output status:%d", pTask->id.idStr, pTask->outputStatus);
return 0;
}
ASSERT(pDispatchedBlock->type == STREAM_INPUT__DATA_BLOCK);
pTask->msgInfo.pData = pBlock;
ASSERT(pBlock->type == STREAM_INPUT__DATA_BLOCK);
int32_t code = streamDispatchAllBlocks(pTask, pDispatchedBlock);
if (code != TSDB_CODE_SUCCESS) {
streamQueueProcessFail(pTask->outputQueue);
atomic_store_8(&pTask->outputStatus, TASK_OUTPUT_STATUS__NORMAL);
qDebug("s-task:%s failed to dispatch msg to downstream, output status:%d", pTask->id.idStr, pTask->outputStatus);
int32_t retryCount = 0;
while (1) {
int32_t code = streamDispatchAllBlocks(pTask, pBlock);
if (code == TSDB_CODE_SUCCESS) {
break;
}
qDebug("s-task:%s failed to dispatch msg to downstream, code:%s, output status:%d, retry cnt:%d", pTask->id.idStr,
tstrerror(terrno), pTask->outputStatus, retryCount);
// todo deal with only partially success dispatch case
atomic_store_32(&pTask->shuffleDispatcher.waitingRspCnt, 0);
if (terrno == TSDB_CODE_APP_IS_STOPPING) { // in case of this error, do not retry anymore
destroyStreamDataBlock(pTask->msgInfo.pData);
pTask->msgInfo.pData = NULL;
return code;
}
if (++retryCount > MAX_CONTINUE_RETRY_COUNT) { // add to timer to retry
qDebug("s-task:%s failed to dispatch msg to downstream for %d times, code:%s, add timer to retry in %dms", pTask->id.idStr,
retryCount, tstrerror(terrno), DISPATCH_RETRY_INTERVAL_MS);
streamRetryDispatchStreamBlock(pTask, DISPATCH_RETRY_INTERVAL_MS);
break;
}
}
// this block can be freed only when it has been pushed to down stream.
destroyStreamDataBlock(pDispatchedBlock);
return code;
// this block can not be deleted until it has been sent to downstream task successfully.
return TSDB_CODE_SUCCESS;
}

View File

@ -108,10 +108,11 @@ static int32_t streamTaskExecImpl(SStreamTask* pTask, SStreamQueueItem* pItem, i
assignOneDataBlock(&block, taosArrayGet(pRetrieveBlock->blocks, 0));
block.info.type = STREAM_PULL_OVER;
block.info.childId = pTask->selfChildId;
block.info.childId = pTask->info.selfChildId;
taosArrayPush(pRes, &block);
numOfBlocks += 1;
qDebug("s-task:%s(child %d) processed retrieve, reqId:0x%" PRIx64, pTask->id.idStr, pTask->selfChildId,
qDebug("s-task:%s(child %d) retrieve process completed, reqId:0x%" PRIx64" dump results", pTask->id.idStr, pTask->info.selfChildId,
pRetrieveBlock->reqId);
}
@ -127,7 +128,7 @@ static int32_t streamTaskExecImpl(SStreamTask* pTask, SStreamQueueItem* pItem, i
SSDataBlock block = {0};
assignOneDataBlock(&block, output);
block.info.childId = pTask->selfChildId;
block.info.childId = pTask->info.selfChildId;
size += blockDataGetSize(output) + sizeof(SSDataBlock) + sizeof(SColumnInfoData) * blockDataGetNumOfCols(&block);
numOfBlocks += 1;
@ -135,7 +136,7 @@ static int32_t streamTaskExecImpl(SStreamTask* pTask, SStreamQueueItem* pItem, i
taosArrayPush(pRes, &block);
qDebug("s-task:%s (child %d) executed and get %d result blocks, size:%.2fMiB", pTask->id.idStr,
pTask->selfChildId, numOfBlocks, size / 1048576.0);
pTask->info.selfChildId, numOfBlocks, size / 1048576.0);
// current output should be dispatched to down stream nodes
if (numOfBlocks >= MAX_STREAM_RESULT_DUMP_THRESHOLD) {
@ -164,7 +165,7 @@ static int32_t streamTaskExecImpl(SStreamTask* pTask, SStreamQueueItem* pItem, i
int32_t streamScanExec(SStreamTask* pTask, int32_t batchSz) {
int32_t code = 0;
ASSERT(pTask->taskLevel == TASK_LEVEL__SOURCE);
ASSERT(pTask->info.taskLevel == TASK_LEVEL__SOURCE);
void* exec = pTask->exec.pExecutor;
qSetStreamOpOpen(exec);
@ -179,7 +180,7 @@ int32_t streamScanExec(SStreamTask* pTask, int32_t batchSz) {
int32_t batchCnt = 0;
while (1) {
if (streamTaskShouldStop(&pTask->status) || streamTaskShouldPause(&pTask->status)) {
if (streamTaskShouldStop(&pTask->status)) {
taosArrayDestroyEx(pRes, (FDelete)blockDataFreeRes);
return 0;
}
@ -189,18 +190,37 @@ int32_t streamScanExec(SStreamTask* pTask, int32_t batchSz) {
if (qExecTask(exec, &output, &ts) < 0) {
continue;
}
if (output == NULL) {
if (qStreamRecoverScanFinished(exec)) {
finished = true;
} else {
qSetStreamOpOpen(exec);
if (streamTaskShouldPause(&pTask->status)) {
SStreamDataBlock* qRes = taosAllocateQitem(sizeof(SStreamDataBlock), DEF_QITEM, 0);
if (qRes == NULL) {
taosArrayDestroyEx(pRes, (FDelete)blockDataFreeRes);
terrno = TSDB_CODE_OUT_OF_MEMORY;
return -1;
}
qRes->type = STREAM_INPUT__DATA_BLOCK;
qRes->blocks = pRes;
code = streamTaskOutputResultBlock(pTask, qRes);
if (code == TSDB_CODE_UTIL_QUEUE_OUT_OF_MEMORY) {
taosArrayDestroyEx(pRes, (FDelete)blockDataFreeRes);
taosFreeQitem(qRes);
return code;
}
return 0;
}
}
break;
}
SSDataBlock block = {0};
assignOneDataBlock(&block, output);
block.info.childId = pTask->selfChildId;
block.info.childId = pTask->info.selfChildId;
taosArrayPush(pRes, &block);
batchCnt++;
@ -237,11 +257,6 @@ int32_t streamScanExec(SStreamTask* pTask, int32_t batchSz) {
taosFreeQitem(qRes);
return code;
}
//
// if (pTask->outputType == TASK_OUTPUT__FIXED_DISPATCH || pTask->outputType == TASK_OUTPUT__SHUFFLE_DISPATCH) {
// qDebug("s-task:%s scan exec dispatch blocks:%d", pTask->id.idStr, batchCnt);
// streamDispatchStreamBlock(pTask);
// }
if (finished) {
break;
@ -275,7 +290,7 @@ int32_t streamBatchExec(SStreamTask* pTask, int32_t batchLimit) {
return -1;
}
if (pTask->taskLevel == TASK_LEVEL__SINK) {
if (pTask->info.taskLevel == TASK_LEVEL__SINK) {
ASSERT(((SStreamQueueItem*)pItem)->type == STREAM_INPUT__DATA_BLOCK);
streamTaskOutputResultBlock(pTask, (SStreamDataBlock*)pItem);
}
@ -317,6 +332,75 @@ int32_t updateCheckPointInfo(SStreamTask* pTask) {
return TSDB_CODE_SUCCESS;
}
static void waitForTaskIdle(SStreamTask* pTask, SStreamTask* pStreamTask) {
// wait for the stream task to be idle
int64_t st = taosGetTimestampMs();
while (!streamTaskIsIdle(pStreamTask)) {
qDebug("s-task:%s level:%d wait for stream task:%s to be idle, check again in 100ms", pTask->id.idStr,
pTask->info.taskLevel, pStreamTask->id.idStr);
taosMsleep(100);
}
double el = (taosGetTimestampMs() - st) / 1000.0;
if (el > 0) {
qDebug("s-task:%s wait for stream task:%s for %.2fs to handle all data in inputQ", pTask->id.idStr,
pStreamTask->id.idStr, el);
}
}
static int32_t streamTransferStateToStreamTask(SStreamTask* pTask) {
SStreamTask* pStreamTask = streamMetaAcquireTask(pTask->pMeta, pTask->streamTaskId.taskId);
qDebug("s-task:%s scan history task end, update stream task:%s info, transfer exec state", pTask->id.idStr, pStreamTask->id.idStr);
// todo handle stream task is dropped here
ASSERT(pStreamTask != NULL && pStreamTask->historyTaskId.taskId == pTask->id.taskId);
STimeWindow* pTimeWindow = &pStreamTask->dataRange.window;
// here we need to wait for the stream task handle all data in the input queue.
if (pStreamTask->info.taskLevel == TASK_LEVEL__SOURCE) {
ASSERT(pStreamTask->status.taskStatus == TASK_STATUS__HALT);
} else {
ASSERT(pStreamTask->status.taskStatus == TASK_STATUS__NORMAL);
pStreamTask->status.taskStatus = TASK_STATUS__HALT;
}
// wait for the stream task to be idle
waitForTaskIdle(pTask, pStreamTask);
if (pStreamTask->info.taskLevel == TASK_LEVEL__SOURCE) {
// update the scan data range for source task.
qDebug("s-task:%s level:%d stream task window %" PRId64 " - %" PRId64 " update to %" PRId64 " - %" PRId64
", status:%s, sched-status:%d",
pStreamTask->id.idStr, TASK_LEVEL__SOURCE, pTimeWindow->skey, pTimeWindow->ekey, INT64_MIN,
pTimeWindow->ekey, streamGetTaskStatusStr(TASK_STATUS__NORMAL), pStreamTask->status.schedStatus);
// todo transfer state
} else {
// for sink tasks, they are continue to execute, no need to be halt.
// the process should be stopped for a while, during the term of transfer task state.
// OR wait for the inputQ && outputQ of agg tasks are all consumed, and then start the state transfer
qDebug("s-task:%s no need to update time window, for non-source task", pStreamTask->id.idStr);
// todo transfer state
}
// expand the query time window for stream scanner
pTimeWindow->skey = INT64_MIN;
qResetStreamInfoTimeWindow(pStreamTask->exec.pExecutor);
// transfer the ownership of executor state
streamTaskReleaseState(pTask);
streamTaskReloadState(pStreamTask);
streamSetStatusNormal(pStreamTask);
streamSchedExec(pStreamTask);
streamMetaReleaseTask(pTask->pMeta, pStreamTask);
return TSDB_CODE_SUCCESS;
}
/**
* todo: the batch of blocks should be tuned dynamic, according to the total elapsed time of each batch of blocks, the
* appropriate batch of blocks should be handled in 5 to 10 sec.
@ -331,20 +415,23 @@ int32_t streamExecForAll(SStreamTask* pTask) {
SStreamQueueItem* pInput = NULL;
// merge multiple input data if possible in the input queue.
qDebug("s-task:%s start to extract data block from inputQ", id);
qDebug("s-task:%s start to extract data block from inputQ, status:%s", id, streamGetTaskStatusStr(pTask->status.taskStatus));
while (1) {
if (streamTaskShouldPause(&pTask->status)) {
// downstream task's input queue is blocked, stop immediately
if (streamTaskShouldPause(&pTask->status) || (pTask->outputStatus == TASK_OUTPUT_STATUS__BLOCKED) ||
streamTaskShouldStop(&pTask->status)) {
if (batchSize > 1) {
break;
} else {
qDebug("123 %s", pTask->id.idStr);
return 0;
}
}
SStreamQueueItem* qItem = streamQueueNextItem(pTask->inputQueue);
if (qItem == NULL) {
if (pTask->taskLevel == TASK_LEVEL__SOURCE && batchSize < MIN_STREAM_EXEC_BATCH_NUM && times < 5) {
if (pTask->info.taskLevel == TASK_LEVEL__SOURCE && batchSize < MIN_STREAM_EXEC_BATCH_NUM && times < 5) {
times++;
taosMsleep(10);
qDebug("===stream===try again batchSize:%d", batchSize);
@ -358,7 +445,7 @@ int32_t streamExecForAll(SStreamTask* pTask) {
if (pInput == NULL) {
pInput = qItem;
streamQueueProcessSuccess(pTask->inputQueue);
if (pTask->taskLevel == TASK_LEVEL__SINK) {
if (pTask->info.taskLevel == TASK_LEVEL__SINK) {
break;
}
} else {
@ -389,28 +476,20 @@ int32_t streamExecForAll(SStreamTask* pTask) {
}
if (pInput == NULL) {
if (pTask->info.fillHistory && pTask->status.transferState) {
int32_t code = streamTransferStateToStreamTask(pTask);
}
break;
}
if (pTask->taskLevel == TASK_LEVEL__SINK) {
if (pTask->info.taskLevel == TASK_LEVEL__SINK) {
ASSERT(pInput->type == STREAM_INPUT__DATA_BLOCK);
qDebug("s-task:%s sink task start to sink %d blocks", id, batchSize);
streamTaskOutputResultBlock(pTask, (SStreamDataBlock*)pInput);
continue;
}
// wait for the task to be ready to go
while (pTask->taskLevel == TASK_LEVEL__SOURCE) {
int8_t status = atomic_load_8(&pTask->status.taskStatus);
if (status != TASK_STATUS__NORMAL && status != TASK_STATUS__PAUSE) {
qError("stream task wait for the end of fill history, s-task:%s, status:%d", id,
atomic_load_8(&pTask->status.taskStatus));
taosMsleep(100);
} else {
break;
}
}
int64_t st = taosGetTimestampMs();
qDebug("s-task:%s start to process batch of blocks, num:%d", id, batchSize);
@ -423,7 +502,7 @@ int32_t streamExecForAll(SStreamTask* pTask) {
const SStreamTrigger* pTrigger = (const SStreamTrigger*)pInput;
qSetMultiStreamInput(pExecutor, pTrigger->pBlock, 1, STREAM_INPUT__DATA_BLOCK);
} else if (pItem->type == STREAM_INPUT__DATA_SUBMIT) {
ASSERT(pTask->taskLevel == TASK_LEVEL__SOURCE);
ASSERT(pTask->info.taskLevel == TASK_LEVEL__SOURCE);
const SStreamDataSubmit* pSubmit = (const SStreamDataSubmit*)pInput;
qSetMultiStreamInput(pExecutor, &pSubmit->submit, 1, STREAM_INPUT__DATA_SUBMIT);
qDebug("s-task:%s set submit blocks as source block completed, %p %p len:%d ver:%" PRId64, id, pSubmit,
@ -455,14 +534,34 @@ int32_t streamExecForAll(SStreamTask* pTask) {
streamTaskExecImpl(pTask, pInput, &resSize, &totalBlocks);
double el = (taosGetTimestampMs() - st) / 1000.0;
qDebug("s-task:%s batch of input blocks exec end, elapsed time:%.2fs, result size:%.2fMiB, numOfBlocks:%d",
id, el, resSize / 1048576.0, totalBlocks);
qDebug("s-task:%s batch of (%d)input blocks exec end, elapsed time:%.2fs, result size:%.2fMiB, numOfBlocks:%d",
id, batchSize, el, resSize / 1048576.0, totalBlocks);
streamFreeQitem(pInput);
}
return 0;
}
bool streamTaskIsIdle(const SStreamTask* pTask) {
int32_t numOfItems = taosQueueItemSize(pTask->inputQueue->queue);
if (numOfItems > 0) {
return false;
}
numOfItems = taosQallItemSize(pTask->inputQueue->qall);
if (numOfItems > 0) {
return false;
}
// blocked by downstream task
if (pTask->outputStatus == TASK_OUTPUT_STATUS__BLOCKED) {
return false;
}
return (pTask->status.schedStatus == TASK_SCHED_STATUS__INACTIVE);
}
int32_t streamTryExec(SStreamTask* pTask) {
// this function may be executed by multi-threads, so status check is required.
int8_t schedStatus =
@ -477,7 +576,8 @@ int32_t streamTryExec(SStreamTask* pTask) {
// todo the task should be commit here
atomic_store_8(&pTask->status.schedStatus, TASK_SCHED_STATUS__INACTIVE);
qDebug("s-task:%s exec completed", pTask->id.idStr);
qDebug("s-task:%s exec completed, status:%s, sched-status:%d", pTask->id.idStr, streamGetTaskStatusStr(pTask->status.taskStatus),
pTask->status.schedStatus);
if (!taosQueueEmpty(pTask->inputQueue->queue) && (!streamTaskShouldStop(&pTask->status)) &&
(!streamTaskShouldPause(&pTask->status))) {
@ -487,3 +587,25 @@ int32_t streamTryExec(SStreamTask* pTask) {
return 0;
}
int32_t streamTaskReleaseState(SStreamTask* pTask) {
qDebug("s-task:%s release exec state", pTask->id.idStr);
void* pExecutor = pTask->exec.pExecutor;
if (pExecutor != NULL) {
int32_t code = qStreamOperatorReleaseState(pExecutor);
return code;
} else {
return TSDB_CODE_SUCCESS;
}
}
int32_t streamTaskReloadState(SStreamTask* pTask) {
qDebug("s-task:%s reload exec state", pTask->id.idStr);
void* pExecutor = pTask->exec.pExecutor;
if (pExecutor != NULL) {
int32_t code = qStreamOperatorReloadState(pExecutor);
return code;
} else {
return TSDB_CODE_SUCCESS;
}
}

View File

@ -21,10 +21,18 @@
static TdThreadOnce streamMetaModuleInit = PTHREAD_ONCE_INIT;
int32_t streamBackendId = 0;
static void streamMetaEnvInit() { streamBackendId = taosOpenRef(20, streamBackendCleanup); }
int32_t streamBackendCfWrapperId = 0;
static void streamMetaEnvInit() {
streamBackendId = taosOpenRef(64, streamBackendCleanup);
streamBackendCfWrapperId = taosOpenRef(64, streamBackendHandleCleanup);
}
void streamMetaInit() { taosThreadOnce(&streamMetaModuleInit, streamMetaEnvInit); }
void streamMetaCleanup() { taosCloseRef(streamBackendId); }
void streamMetaCleanup() {
taosCloseRef(streamBackendId);
taosCloseRef(streamBackendCfWrapperId);
}
SStreamMeta* streamMetaOpen(const char* path, void* ahandle, FTaskExpand expandFunc, int32_t vgId) {
int32_t code = -1;
@ -93,10 +101,14 @@ SStreamMeta* streamMetaOpen(const char* path, void* ahandle, FTaskExpand expandF
goto _err;
}
pMeta->streamBackendRid = taosAddRef(streamBackendId, pMeta->streamBackend);
pMeta->pTaskBackendUnique =
taosHashInit(64, taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY), false, HASH_ENTRY_LOCK);
taosMemoryFree(streamPath);
taosInitRWLatch(&pMeta->lock);
taosThreadMutexInit(&pMeta->backendMutex, NULL);
return pMeta;
_err:
@ -127,9 +139,14 @@ void streamMetaClose(SStreamMeta* pMeta) {
}
SStreamTask* pTask = *(SStreamTask**)pIter;
if (pTask->timer) {
taosTmrStop(pTask->timer);
pTask->timer = NULL;
if (pTask->schedTimer) {
taosTmrStop(pTask->schedTimer);
pTask->schedTimer = NULL;
}
if (pTask->launchTaskTimer) {
taosTmrStop(pTask->launchTaskTimer);
pTask->launchTaskTimer = NULL;
}
tFreeStreamTask(pTask);
@ -139,6 +156,8 @@ void streamMetaClose(SStreamMeta* pMeta) {
taosRemoveRef(streamBackendId, pMeta->streamBackendRid);
pMeta->pTaskList = taosArrayDestroy(pMeta->pTaskList);
taosMemoryFree(pMeta->path);
taosThreadMutexDestroy(&pMeta->backendMutex);
taosHashCleanup(pMeta->pTaskBackendUnique);
taosMemoryFree(pMeta);
}
@ -266,18 +285,53 @@ void streamMetaReleaseTask(SStreamMeta* pMeta, SStreamTask* pTask) {
}
void streamMetaRemoveTask(SStreamMeta* pMeta, int32_t taskId) {
taosWLockLatch(&pMeta->lock);
SStreamTask* pTask = NULL;
// pre-delete operation
taosWLockLatch(&pMeta->lock);
SStreamTask** ppTask = (SStreamTask**)taosHashGet(pMeta->pTasks, &taskId, sizeof(int32_t));
if (ppTask) {
SStreamTask* pTask = *ppTask;
pTask = *ppTask;
atomic_store_8(&pTask->status.taskStatus, TASK_STATUS__DROPPING);
} else {
qDebug("vgId:%d failed to find the task:0x%x, it may be dropped already", pMeta->vgId, taskId);
taosWUnLockLatch(&pMeta->lock);
return;
}
taosWUnLockLatch(&pMeta->lock);
qDebug("s-task:0x%x set task status:%s", taskId, streamGetTaskStatusStr(TASK_STATUS__DROPPING));
while(1) {
taosRLockLatch(&pMeta->lock);
ppTask = (SStreamTask**)taosHashGet(pMeta->pTasks, &taskId, sizeof(int32_t));
if (ppTask) {
if ((*ppTask)->status.timerActive == 0) {
taosRUnLockLatch(&pMeta->lock);
break;
}
taosMsleep(10);
qDebug("s-task:%s wait for quit from timer", (*ppTask)->id.idStr);
taosRUnLockLatch(&pMeta->lock);
} else {
taosRUnLockLatch(&pMeta->lock);
break;
}
}
// let's do delete of stream task
taosWLockLatch(&pMeta->lock);
ppTask = (SStreamTask**)taosHashGet(pMeta->pTasks, &taskId, sizeof(int32_t));
if (ppTask) {
taosHashRemove(pMeta->pTasks, &taskId, sizeof(int32_t));
tdbTbDelete(pMeta->pTaskDb, &taskId, sizeof(int32_t), pMeta->txn);
atomic_store_8(&pTask->status.taskStatus, TASK_STATUS__DROPPING);
int32_t num = taosArrayGetSize(pMeta->pTaskList);
ASSERT(pTask->status.timerActive == 0);
int32_t num = taosArrayGetSize(pMeta->pTaskList);
qDebug("s-task:%s set the drop task flag, remain running s-task:%d", pTask->id.idStr, num - 1);
for (int32_t i = 0; i < num; ++i) {
int32_t* pTaskId = taosArrayGet(pMeta->pTaskList, i);
@ -380,6 +434,7 @@ int32_t streamLoadTasks(SStreamMeta* pMeta, int64_t ver) {
taosMemoryFree(pTask);
continue;
}
if (taosHashPut(pMeta->pTasks, &pTask->id.taskId, sizeof(pTask->id.taskId), &pTask, sizeof(void*)) < 0) {
tdbFree(pKey);
tdbFree(pVal);
@ -388,10 +443,7 @@ int32_t streamLoadTasks(SStreamMeta* pMeta, int64_t ver) {
return -1;
}
if (pTask->fillHistory) {
ASSERT(pTask->status.taskStatus == TASK_STATUS__WAIT_DOWNSTREAM);
streamTaskCheckDownstream(pTask, ver);
}
ASSERT(pTask->status.downstreamReady == 0);
}
tdbFree(pKey);

View File

@ -14,93 +14,131 @@
*/
#include "streamInc.h"
#include "ttimer.h"
#include "wal.h"
int32_t streamTaskLaunchRecover(SStreamTask* pTask, int64_t version) {
qDebug("s-task:%s at node %d launch recover", pTask->id.idStr, pTask->nodeId);
int32_t streamStartRecoverTask(SStreamTask* pTask, int8_t igUntreated) {
SStreamScanHistoryReq req;
streamBuildSourceRecover1Req(pTask, &req, igUntreated);
int32_t len = sizeof(SStreamScanHistoryReq);
if (pTask->taskLevel == TASK_LEVEL__SOURCE) {
atomic_store_8(&pTask->status.taskStatus, TASK_STATUS__RECOVER_PREPARE);
qDebug("s-task:%s set task status:%d and start to recover", pTask->id.idStr, pTask->status.taskStatus);
void* serializedReq = rpcMallocCont(len);
if (serializedReq == NULL) {
return -1;
}
streamSetParamForRecover(pTask);
streamSourceRecoverPrepareStep1(pTask, version);
memcpy(serializedReq, &req, len);
SStreamRecoverStep1Req req;
streamBuildSourceRecover1Req(pTask, &req);
int32_t len = sizeof(SStreamRecoverStep1Req);
void* serializedReq = rpcMallocCont(len);
if (serializedReq == NULL) {
return -1;
}
memcpy(serializedReq, &req, len);
SRpcMsg rpcMsg = { .contLen = len, .pCont = serializedReq, .msgType = TDMT_VND_STREAM_RECOVER_NONBLOCKING_STAGE };
if (tmsgPutToQueue(pTask->pMsgCb, STREAM_QUEUE, &rpcMsg) < 0) {
/*ASSERT(0);*/
}
} else if (pTask->taskLevel == TASK_LEVEL__AGG) {
atomic_store_8(&pTask->status.taskStatus, TASK_STATUS__NORMAL);
streamSetParamForRecover(pTask);
streamAggRecoverPrepare(pTask);
} else if (pTask->taskLevel == TASK_LEVEL__SINK) {
// sink nodes has no specified operation for fill history
atomic_store_8(&pTask->status.taskStatus, TASK_STATUS__NORMAL);
SRpcMsg rpcMsg = {.contLen = len, .pCont = serializedReq, .msgType = TDMT_VND_STREAM_SCAN_HISTORY};
if (tmsgPutToQueue(pTask->pMsgCb, STREAM_QUEUE, &rpcMsg) < 0) {
/*ASSERT(0);*/
}
return 0;
}
// checkstatus
int32_t streamTaskCheckDownstream(SStreamTask* pTask, int64_t version) {
qDebug("s-task:%s in fill history stage, ver:%"PRId64, pTask->id.idStr, version);
const char* streamGetTaskStatusStr(int32_t status) {
switch(status) {
case TASK_STATUS__NORMAL: return "normal";
case TASK_STATUS__SCAN_HISTORY: return "scan-history";
case TASK_STATUS__HALT: return "halt";
case TASK_STATUS__PAUSE: return "paused";
default:return "";
}
}
static int32_t doLaunchScanHistoryTask(SStreamTask* pTask) {
SVersionRange* pRange = &pTask->dataRange.range;
qDebug("s-task:%s vgId:%d status:%s, start scan-history-data task, verRange:%" PRId64 " - %" PRId64, pTask->id.idStr,
pTask->info.nodeId, streamGetTaskStatusStr(pTask->status.taskStatus), pRange->minVer, pRange->maxVer);
streamSetParamForScanHistoryData(pTask);
streamSetParamForStreamScannerStep1(pTask, pRange, &pTask->dataRange.window);
int32_t code = streamStartRecoverTask(pTask, 0);
return code;
}
int32_t streamTaskLaunchScanHistory(SStreamTask* pTask) {
if (pTask->info.taskLevel == TASK_LEVEL__SOURCE) {
if (pTask->status.taskStatus == TASK_STATUS__SCAN_HISTORY) {
return doLaunchScanHistoryTask(pTask);
} else {
ASSERT(pTask->status.taskStatus == TASK_STATUS__NORMAL);
qDebug("s-task:%s no need to scan-history-data, status:%s, sched-status:%d, ver:%" PRId64, pTask->id.idStr,
streamGetTaskStatusStr(pTask->status.taskStatus), pTask->status.schedStatus,
walReaderGetCurrentVer(pTask->exec.pWalReader));
}
} else if (pTask->info.taskLevel == TASK_LEVEL__AGG) {
streamSetStatusNormal(pTask);
streamSetParamForScanHistoryData(pTask);
streamAggRecoverPrepare(pTask);
} else if (pTask->info.taskLevel == TASK_LEVEL__SINK) {
streamSetStatusNormal(pTask);
qDebug("s-task:%s sink task convert to normal immediately", pTask->id.idStr);
}
return 0;
}
// check status
int32_t streamTaskCheckDownstreamTasks(SStreamTask* pTask) {
SHistDataRange* pRange = &pTask->dataRange;
STimeWindow* pWindow = &pRange->window;
SStreamTaskCheckReq req = {
.streamId = pTask->id.streamId,
.upstreamTaskId = pTask->id.taskId,
.upstreamNodeId = pTask->nodeId,
.childId = pTask->selfChildId,
.upstreamNodeId = pTask->info.nodeId,
.childId = pTask->info.selfChildId,
};
// serialize
if (pTask->outputType == TASK_OUTPUT__FIXED_DISPATCH) {
req.reqId = tGenIdPI64();
req.downstreamNodeId = pTask->fixedEpDispatcher.nodeId;
req.downstreamTaskId = pTask->fixedEpDispatcher.taskId;
pTask->checkReqId = req.reqId;
qDebug("s-task:%s at node %d check downstream task:0x%x at node %d", pTask->id.idStr, pTask->nodeId, req.downstreamTaskId,
req.downstreamNodeId);
qDebug("s-task:%s check single downstream task:0x%x(vgId:%d) ver:%" PRId64 "-%" PRId64 " window:%" PRId64
"-%" PRId64 ", req:0x%" PRIx64,
pTask->id.idStr, req.downstreamTaskId, req.downstreamNodeId, pRange->range.minVer, pRange->range.maxVer,
pWindow->skey, pWindow->ekey, req.reqId);
streamDispatchCheckMsg(pTask, &req, pTask->fixedEpDispatcher.nodeId, &pTask->fixedEpDispatcher.epSet);
} else if (pTask->outputType == TASK_OUTPUT__SHUFFLE_DISPATCH) {
SArray* vgInfo = pTask->shuffleDispatcher.dbInfo.pVgroupInfos;
int32_t numOfVgs = taosArrayGetSize(vgInfo);
pTask->recoverTryingDownstream = numOfVgs;
pTask->notReadyTasks = numOfVgs;
pTask->checkReqIds = taosArrayInit(numOfVgs, sizeof(int64_t));
qDebug("s-task:%s check %d downstream tasks, ver:%" PRId64 "-%" PRId64 " window:%" PRId64 "-%" PRId64,
pTask->id.idStr, numOfVgs, pRange->range.minVer, pRange->range.maxVer, pWindow->skey, pWindow->ekey);
for (int32_t i = 0; i < numOfVgs; i++) {
SVgroupInfo* pVgInfo = taosArrayGet(vgInfo, i);
req.reqId = tGenIdPI64();
taosArrayPush(pTask->checkReqIds, &req.reqId);
req.downstreamNodeId = pVgInfo->vgId;
req.downstreamTaskId = pVgInfo->taskId;
qDebug("s-task:%s at node %d check downstream task:0x%x at node %d (shuffle)", pTask->id.idStr, pTask->nodeId,
req.downstreamTaskId, req.downstreamNodeId);
qDebug("s-task:%s (vgId:%d) check downstream task:0x%x (vgId:%d) (shuffle), idx:%d", pTask->id.idStr, pTask->info.nodeId,
req.downstreamTaskId, req.downstreamNodeId, i);
streamDispatchCheckMsg(pTask, &req, pVgInfo->vgId, &pVgInfo->epSet);
}
} else {
qDebug("s-task:%s at node %d direct launch recover since no downstream", pTask->id.idStr, pTask->nodeId);
streamTaskLaunchRecover(pTask, version);
pTask->status.downstreamReady = 1;
qDebug("s-task:%s (vgId:%d) no downstream tasks, set downstream checked, try to launch scan-history-data, status:%s",
pTask->id.idStr, pTask->info.nodeId, streamGetTaskStatusStr(pTask->status.taskStatus));
streamTaskLaunchScanHistory(pTask);
}
return 0;
}
int32_t streamRecheckOneDownstream(SStreamTask* pTask, const SStreamTaskCheckRsp* pRsp) {
int32_t streamRecheckDownstream(SStreamTask* pTask, const SStreamTaskCheckRsp* pRsp) {
SStreamTaskCheckReq req = {
.reqId = pRsp->reqId,
.streamId = pRsp->streamId,
@ -111,7 +149,7 @@ int32_t streamRecheckOneDownstream(SStreamTask* pTask, const SStreamTaskCheckRsp
.childId = pRsp->childId,
};
qDebug("s-task:%s at node %d check downstream task:0x%x at node %d (recheck)", pTask->id.idStr, pTask->nodeId,
qDebug("s-task:%s (vgId:%d) check downstream task:0x%x (vgId:%d) (recheck)", pTask->id.idStr, pTask->info.nodeId,
req.downstreamTaskId, req.downstreamNodeId);
if (pTask->outputType == TASK_OUTPUT__FIXED_DISPATCH) {
@ -135,11 +173,9 @@ int32_t streamTaskCheckStatus(SStreamTask* pTask) {
return atomic_load_8(&pTask->status.taskStatus) == TASK_STATUS__NORMAL? 1:0;
}
int32_t streamProcessTaskCheckRsp(SStreamTask* pTask, const SStreamTaskCheckRsp* pRsp, int64_t version) {
int32_t streamProcessCheckRsp(SStreamTask* pTask, const SStreamTaskCheckRsp* pRsp) {
ASSERT(pTask->id.taskId == pRsp->upstreamTaskId);
qDebug("s-task:%s at node %d recv check rsp from task:0x%x at node %d: status %d", pTask->id.idStr,
pRsp->upstreamNodeId, pRsp->downstreamTaskId, pRsp->downstreamNodeId, pRsp->status);
const char* id = pTask->id.idStr;
if (pRsp->status == 1) {
if (pTask->outputType == TASK_OUTPUT__SHUFFLE_DISPATCH) {
@ -158,146 +194,386 @@ int32_t streamProcessTaskCheckRsp(SStreamTask* pTask, const SStreamTaskCheckRsp*
return -1;
}
int32_t left = atomic_sub_fetch_32(&pTask->recoverTryingDownstream, 1);
int32_t left = atomic_sub_fetch_32(&pTask->notReadyTasks, 1);
ASSERT(left >= 0);
if (left == 0) {
taosArrayDestroy(pTask->checkReqIds);
pTask->checkReqIds = NULL;
qDebug("s-task:%s all %d downstream tasks are ready, now enter into recover stage", pTask->id.idStr, numOfReqs);
streamTaskLaunchRecover(pTask, version);
if (pTask->status.taskStatus == TASK_STATUS__SCAN_HISTORY) {
qDebug("s-task:%s all %d downstream tasks are ready, now enter into scan-history-data stage, status:%s", id, numOfReqs,
streamGetTaskStatusStr(pTask->status.taskStatus));
streamTaskLaunchScanHistory(pTask);
} else {
ASSERT(pTask->status.taskStatus == TASK_STATUS__NORMAL);
qDebug("s-task:%s fixed downstream task is ready, now ready for data from wal, status:%s", id,
streamGetTaskStatusStr(pTask->status.taskStatus));
}
} else {
int32_t total = taosArrayGetSize(pTask->shuffleDispatcher.dbInfo.pVgroupInfos);
qDebug("s-task:%s (vgId:%d) recv check rsp from task:0x%x (vgId:%d) status:%d, total:%d not ready:%d", id,
pRsp->upstreamNodeId, pRsp->downstreamTaskId, pRsp->downstreamNodeId, pRsp->status, total, left);
}
} else if (pTask->outputType == TASK_OUTPUT__FIXED_DISPATCH) {
if (pRsp->reqId != pTask->checkReqId) {
return -1;
}
streamTaskLaunchRecover(pTask, version);
// set the downstream tasks have been checked flag
ASSERT(pTask->status.downstreamReady == 0);
pTask->status.downstreamReady = 1;
ASSERT(pTask->status.taskStatus == TASK_STATUS__SCAN_HISTORY || pTask->status.taskStatus == TASK_STATUS__NORMAL);
if (pTask->status.taskStatus == TASK_STATUS__SCAN_HISTORY) {
qDebug("s-task:%s fixed downstream task is ready, now enter into scan-history-data stage, status:%s", id,
streamGetTaskStatusStr(pTask->status.taskStatus));
streamTaskLaunchScanHistory(pTask);
} else {
qDebug("s-task:%s fixed downstream task is ready, ready for data from inputQ, status:%s", id,
streamGetTaskStatusStr(pTask->status.taskStatus));
}
} else {
ASSERT(0);
}
} else { // not ready, wait for 100ms and retry
qDebug("s-task:%s downstream taskId:0x%x (vgId:%d) not ready, wait for 100ms and retry", pTask->id.idStr,
pRsp->downstreamTaskId, pRsp->downstreamNodeId);
} else { // not ready, wait for 100ms and retry
qDebug("s-task:%s downstream taskId:0x%x (vgId:%d) not ready, wait for 100ms and retry", id, pRsp->downstreamTaskId,
pRsp->downstreamNodeId);
taosMsleep(100);
streamRecheckOneDownstream(pTask, pRsp);
streamRecheckDownstream(pTask, pRsp);
}
return 0;
}
// common
int32_t streamSetParamForRecover(SStreamTask* pTask) {
void* exec = pTask->exec.pExecutor;
return qStreamSetParamForRecover(exec);
int32_t streamSetParamForScanHistoryData(SStreamTask* pTask) {
qDebug("s-task:%s set operator option for scan-history-data", pTask->id.idStr);
return qSetStreamOperatorOptionForScanHistory(pTask->exec.pExecutor);
}
int32_t streamRestoreParam(SStreamTask* pTask) {
void* exec = pTask->exec.pExecutor;
return qStreamRestoreParam(exec);
qDebug("s-task:%s restore operator param after scan-history-data", pTask->id.idStr);
return qRestoreStreamOperatorOption(pTask->exec.pExecutor);
}
int32_t streamSetStatusNormal(SStreamTask* pTask) {
qDebug("s-task:%s set task status to be normal, prev:%s", pTask->id.idStr, streamGetTaskStatusStr(pTask->status.taskStatus));
atomic_store_8(&pTask->status.taskStatus, TASK_STATUS__NORMAL);
return 0;
}
// source
int32_t streamSourceRecoverPrepareStep1(SStreamTask* pTask, int64_t ver) {
void* exec = pTask->exec.pExecutor;
return qStreamSourceRecoverStep1(exec, ver);
int32_t streamSetParamForStreamScannerStep1(SStreamTask* pTask, SVersionRange *pVerRange, STimeWindow* pWindow) {
return qStreamSourceScanParamForHistoryScanStep1(pTask->exec.pExecutor, pVerRange, pWindow);
}
int32_t streamBuildSourceRecover1Req(SStreamTask* pTask, SStreamRecoverStep1Req* pReq) {
pReq->msgHead.vgId = pTask->nodeId;
int32_t streamSetParamForStreamScannerStep2(SStreamTask* pTask, SVersionRange *pVerRange, STimeWindow* pWindow) {
return qStreamSourceScanParamForHistoryScanStep2(pTask->exec.pExecutor, pVerRange, pWindow);
}
int32_t streamBuildSourceRecover1Req(SStreamTask* pTask, SStreamScanHistoryReq* pReq, int8_t igUntreated) {
pReq->msgHead.vgId = pTask->info.nodeId;
pReq->streamId = pTask->id.streamId;
pReq->taskId = pTask->id.taskId;
pReq->igUntreated = igUntreated;
return 0;
}
int32_t streamSourceRecoverScanStep1(SStreamTask* pTask) {
int32_t streamSourceScanHistoryData(SStreamTask* pTask) {
return streamScanExec(pTask, 100);
}
int32_t streamBuildSourceRecover2Req(SStreamTask* pTask, SStreamRecoverStep2Req* pReq) {
pReq->msgHead.vgId = pTask->nodeId;
pReq->streamId = pTask->id.streamId;
pReq->taskId = pTask->id.taskId;
return 0;
}
int32_t streamSourceRecoverScanStep2(SStreamTask* pTask, int64_t ver) {
void* exec = pTask->exec.pExecutor;
const char* id = pTask->id.idStr;
int64_t st = taosGetTimestampMs();
qDebug("s-task:%s recover step2(blocking stage) started", id);
if (qStreamSourceRecoverStep2(exec, ver) < 0) {
}
int32_t code = streamScanExec(pTask, 100);
double el = (taosGetTimestampMs() - st) / 1000.0;
qDebug("s-task:%s recover step2(blocking stage) ended, elapsed time:%.2fs", id, el);
return code;
}
int32_t streamDispatchRecoverFinishReq(SStreamTask* pTask) {
SStreamRecoverFinishReq req = { .streamId = pTask->id.streamId, .childId = pTask->selfChildId };
int32_t streamDispatchScanHistoryFinishMsg(SStreamTask* pTask) {
SStreamRecoverFinishReq req = { .streamId = pTask->id.streamId, .childId = pTask->info.selfChildId };
// serialize
if (pTask->outputType == TASK_OUTPUT__FIXED_DISPATCH) {
qDebug("s-task:%s send recover finish msg to downstream (fix-dispatch) to taskId:%d, status:%d", pTask->id.idStr,
pTask->fixedEpDispatcher.taskId, pTask->status.taskStatus);
req.taskId = pTask->fixedEpDispatcher.taskId;
streamDispatchOneRecoverFinishReq(pTask, &req, pTask->fixedEpDispatcher.nodeId, &pTask->fixedEpDispatcher.epSet);
streamDoDispatchScanHistoryFinishMsg(pTask, &req, pTask->fixedEpDispatcher.nodeId, &pTask->fixedEpDispatcher.epSet);
} else if (pTask->outputType == TASK_OUTPUT__SHUFFLE_DISPATCH) {
SArray* vgInfo = pTask->shuffleDispatcher.dbInfo.pVgroupInfos;
int32_t vgSz = taosArrayGetSize(vgInfo);
for (int32_t i = 0; i < vgSz; i++) {
int32_t numOfVgs = taosArrayGetSize(vgInfo);
qDebug("s-task:%s send scan-history-data complete msg to downstream (shuffle-dispatch) %d tasks, status:%s", pTask->id.idStr,
numOfVgs, streamGetTaskStatusStr(pTask->status.taskStatus));
for (int32_t i = 0; i < numOfVgs; i++) {
SVgroupInfo* pVgInfo = taosArrayGet(vgInfo, i);
req.taskId = pVgInfo->taskId;
streamDispatchOneRecoverFinishReq(pTask, &req, pVgInfo->vgId, &pVgInfo->epSet);
streamDoDispatchScanHistoryFinishMsg(pTask, &req, pVgInfo->vgId, &pVgInfo->epSet);
}
}
return 0;
}
static int32_t doDispatchTransferMsg(SStreamTask* pTask, const SStreamTransferReq* pReq, int32_t vgId, SEpSet* pEpSet) {
void* buf = NULL;
int32_t code = -1;
SRpcMsg msg = {0};
int32_t tlen;
tEncodeSize(tEncodeStreamRecoverFinishReq, pReq, tlen, code);
if (code < 0) {
return -1;
}
buf = rpcMallocCont(sizeof(SMsgHead) + tlen);
if (buf == NULL) {
terrno = TSDB_CODE_OUT_OF_MEMORY;
return -1;
}
((SMsgHead*)buf)->vgId = htonl(vgId);
void* abuf = POINTER_SHIFT(buf, sizeof(SMsgHead));
SEncoder encoder;
tEncoderInit(&encoder, abuf, tlen);
if ((code = tEncodeStreamRecoverFinishReq(&encoder, pReq)) < 0) {
if (buf) {
rpcFreeCont(buf);
}
return code;
}
tEncoderClear(&encoder);
msg.contLen = tlen + sizeof(SMsgHead);
msg.pCont = buf;
msg.msgType = TDMT_STREAM_TRANSFER_STATE;
msg.info.noResp = 1;
tmsgSendReq(pEpSet, &msg);
qDebug("s-task:%s dispatch transfer state msg to taskId:0x%x (vgId:%d)", pTask->id.idStr, pReq->taskId, vgId);
return 0;
}
int32_t streamDispatchTransferStateMsg(SStreamTask* pTask) {
SStreamTransferReq req = { .streamId = pTask->id.streamId, .childId = pTask->info.selfChildId };
// serialize
if (pTask->outputType == TASK_OUTPUT__FIXED_DISPATCH) {
qDebug("s-task:%s send transfer state msg to downstream (fix-dispatch) to taskId:0x%x, status:%s", pTask->id.idStr,
pTask->fixedEpDispatcher.taskId, streamGetTaskStatusStr(pTask->status.taskStatus));
req.taskId = pTask->fixedEpDispatcher.taskId;
doDispatchTransferMsg(pTask, &req, pTask->fixedEpDispatcher.nodeId, &pTask->fixedEpDispatcher.epSet);
} else if (pTask->outputType == TASK_OUTPUT__SHUFFLE_DISPATCH) {
SArray* vgInfo = pTask->shuffleDispatcher.dbInfo.pVgroupInfos;
int32_t numOfVgs = taosArrayGetSize(vgInfo);
for (int32_t i = 0; i < numOfVgs; i++) {
SVgroupInfo* pVgInfo = taosArrayGet(vgInfo, i);
req.taskId = pVgInfo->taskId;
doDispatchTransferMsg(pTask, &req, pVgInfo->vgId, &pVgInfo->epSet);
}
}
return 0;
}
// agg
int32_t streamAggRecoverPrepare(SStreamTask* pTask) {
pTask->recoverWaitingUpstream = taosArrayGetSize(pTask->childEpInfo);
qDebug("s-task:%s wait for %d upstreams", pTask->id.idStr, pTask->recoverWaitingUpstream);
pTask->numOfWaitingUpstream = taosArrayGetSize(pTask->pUpstreamEpInfoList);
qDebug("s-task:%s agg task is ready and wait for %d upstream tasks complete scan-history procedure", pTask->id.idStr,
pTask->numOfWaitingUpstream);
return 0;
}
int32_t streamAggChildrenRecoverFinish(SStreamTask* pTask) {
int32_t streamAggUpstreamScanHistoryFinish(SStreamTask* pTask) {
void* exec = pTask->exec.pExecutor;
if (qStreamRestoreParam(exec) < 0) {
if (qRestoreStreamOperatorOption(exec) < 0) {
return -1;
}
if (qStreamRecoverFinish(exec) < 0) {
return -1;
}
streamSetStatusNormal(pTask);
// streamSetStatusNormal(pTask);
return 0;
}
int32_t streamProcessRecoverFinishReq(SStreamTask* pTask, int32_t childId) {
if (pTask->taskLevel == TASK_LEVEL__AGG) {
int32_t left = atomic_sub_fetch_32(&pTask->recoverWaitingUpstream, 1);
qDebug("s-task:%s remain unfinished child tasks:%d", pTask->id.idStr, left);
int32_t streamProcessRecoverFinishReq(SStreamTask* pTask, int32_t taskId, int32_t childId) {
if (pTask->info.taskLevel == TASK_LEVEL__AGG) {
int32_t left = atomic_sub_fetch_32(&pTask->numOfWaitingUpstream, 1);
ASSERT(left >= 0);
if (left == 0) {
streamAggChildrenRecoverFinish(pTask);
int32_t numOfTasks = taosArrayGetSize(pTask->pUpstreamEpInfoList);
qDebug("s-task:%s all %d upstream tasks finish scan-history data", pTask->id.idStr, numOfTasks);
streamAggUpstreamScanHistoryFinish(pTask);
} else {
qDebug("s-task:%s receive scan-history data finish msg from upstream:0x%x(index:%d), unfinished:%d",
pTask->id.idStr, taskId, childId, left);
}
}
return 0;
}
int32_t tEncodeSStreamTaskCheckReq(SEncoder* pEncoder, const SStreamTaskCheckReq* pReq) {
static void doCheckDownstreamStatus(SStreamTask* pTask, SStreamTask* pHTask) {
pHTask->dataRange.range.minVer = 0;
pHTask->dataRange.range.maxVer = pTask->chkInfo.currentVer;
if (pTask->info.taskLevel == TASK_LEVEL__SOURCE) {
qDebug("s-task:%s set the launch condition for fill history s-task:%s, window:%" PRId64 " - %" PRId64
" ver range:%" PRId64 " - %" PRId64,
pTask->id.idStr, pHTask->id.idStr, pHTask->dataRange.window.skey, pHTask->dataRange.window.ekey,
pHTask->dataRange.range.minVer, pHTask->dataRange.range.maxVer);
} else {
qDebug("s-task:%s no fill history condition for non-source task:%s", pTask->id.idStr, pHTask->id.idStr);
}
// check if downstream tasks have been ready
streamTaskCheckDownstreamTasks(pHTask);
}
typedef struct SStreamTaskRetryInfo {
SStreamMeta* pMeta;
int32_t taskId;
} SStreamTaskRetryInfo;
static void tryLaunchHistoryTask(void* param, void* tmrId) {
SStreamTaskRetryInfo* pInfo = param;
SStreamMeta* pMeta = pInfo->pMeta;
qDebug("s-task:0x%x in timer to launch related history task", pInfo->taskId);
taosWLockLatch(&pMeta->lock);
SStreamTask** ppTask = (SStreamTask**)taosHashGet(pMeta->pTasks, &pInfo->taskId, sizeof(int32_t));
if (ppTask) {
ASSERT((*ppTask)->status.timerActive == 1);
if (streamTaskShouldStop(&(*ppTask)->status)) {
const char* pStatus = streamGetTaskStatusStr((*ppTask)->status.taskStatus);
qDebug("s-task:%s status:%s quit timer task", (*ppTask)->id.idStr, pStatus);
(*ppTask)->status.timerActive = 0;
taosWUnLockLatch(&pMeta->lock);
return;
}
}
taosWUnLockLatch(&pMeta->lock);
SStreamTask* pTask = streamMetaAcquireTask(pMeta, pInfo->taskId);
if (pTask != NULL) {
ASSERT(pTask->status.timerActive == 1);
// abort the timer if intend to stop task
SStreamTask* pHTask = streamMetaAcquireTask(pMeta, pTask->historyTaskId.taskId);
if (pHTask == NULL && (!streamTaskShouldStop(&pTask->status))) {
const char* pStatus = streamGetTaskStatusStr(pTask->status.taskStatus);
qWarn(
"s-task:%s vgId:%d status:%s failed to launch history task:0x%x, since it may not be built, or have been "
"destroyed, or should stop exec",
pTask->id.idStr, pMeta->vgId, pStatus, pTask->historyTaskId.taskId);
taosTmrReset(tryLaunchHistoryTask, 100, pInfo, streamEnv.timer, &pTask->launchTaskTimer);
streamMetaReleaseTask(pMeta, pTask);
return;
}
if (pHTask != NULL) {
doCheckDownstreamStatus(pTask, pHTask);
streamMetaReleaseTask(pMeta, pHTask);
}
// not in timer anymore
pTask->status.timerActive = 0;
streamMetaReleaseTask(pMeta, pTask);
} else {
qError("s-task:0x%x failed to load task, it may have been destroyed", pInfo->taskId);
}
taosMemoryFree(pInfo);
}
// todo fix the bug: 2. race condition
// an fill history task needs to be started.
int32_t streamCheckHistoryTaskDownstrem(SStreamTask* pTask) {
SStreamMeta* pMeta = pTask->pMeta;
int32_t hTaskId = pTask->historyTaskId.taskId;
// Set the execute conditions, including the query time window and the version range
SStreamTask** pHTask = taosHashGet(pMeta->pTasks, &hTaskId, sizeof(hTaskId));
if (pHTask == NULL) {
qWarn("s-task:%s vgId:%d failed to launch history task:0x%x, since it is not built yet", pTask->id.idStr,
pMeta->vgId, hTaskId);
SStreamTaskRetryInfo* pInfo = taosMemoryCalloc(1, sizeof(SStreamTaskRetryInfo));
pInfo->taskId = pTask->id.taskId;
pInfo->pMeta = pTask->pMeta;
if (pTask->launchTaskTimer == NULL) {
pTask->launchTaskTimer = taosTmrStart(tryLaunchHistoryTask, 100, pInfo, streamEnv.timer);
if (pTask->launchTaskTimer == NULL) {
// todo failed to create timer
} else {
pTask->status.timerActive = 1; // timer is active
qDebug("s-task:%s set timer active flag", pTask->id.idStr);
}
} else { // timer exists
pTask->status.timerActive = 1;
qDebug("s-task:%s set timer active flag, task timer not null", pTask->id.idStr);
taosTmrReset(tryLaunchHistoryTask, 100, pInfo, streamEnv.timer, &pTask->launchTaskTimer);
}
// try again in 500ms
return TSDB_CODE_SUCCESS;
}
doCheckDownstreamStatus(pTask, *pHTask);
return TSDB_CODE_SUCCESS;
}
int32_t streamTaskScanHistoryDataComplete(SStreamTask* pTask) {
SStreamMeta* pMeta = pTask->pMeta;
if (atomic_load_8(&pTask->status.taskStatus) == TASK_STATUS__DROPPING) {
return 0;
}
// restore param
int32_t code = streamRestoreParam(pTask);
if (code < 0) {
return -1;
}
// dispatch recover finish req to all related downstream task
code = streamDispatchScanHistoryFinishMsg(pTask);
if (code < 0) {
return -1;
}
ASSERT(pTask->status.taskStatus == TASK_STATUS__SCAN_HISTORY);
// ready to process data from inputQ
streamSetStatusNormal(pTask);
atomic_store_8(&pTask->status.schedStatus, TASK_SCHED_STATUS__INACTIVE);
// todo check rsp, commit data
streamMetaSaveTask(pMeta, pTask);
return 0;
}
bool streamTaskRecoverScanStep1Finished(SStreamTask* pTask) {
void* exec = pTask->exec.pExecutor;
return qStreamRecoverScanStep1Finished(exec);
}
bool streamTaskRecoverScanStep2Finished(SStreamTask* pTask) {
void* exec = pTask->exec.pExecutor;
return qStreamRecoverScanStep2Finished(exec);
}
int32_t streamTaskRecoverSetAllStepFinished(SStreamTask* pTask) {
void* exec = pTask->exec.pExecutor;
return qStreamRecoverSetAllStepFinished(exec);
}
int32_t tEncodeStreamTaskCheckReq(SEncoder* pEncoder, const SStreamTaskCheckReq* pReq) {
if (tStartEncode(pEncoder) < 0) return -1;
if (tEncodeI64(pEncoder, pReq->reqId) < 0) return -1;
if (tEncodeI64(pEncoder, pReq->streamId) < 0) return -1;
@ -310,7 +586,7 @@ int32_t tEncodeSStreamTaskCheckReq(SEncoder* pEncoder, const SStreamTaskCheckReq
return pEncoder->pos;
}
int32_t tDecodeSStreamTaskCheckReq(SDecoder* pDecoder, SStreamTaskCheckReq* pReq) {
int32_t tDecodeStreamTaskCheckReq(SDecoder* pDecoder, SStreamTaskCheckReq* pReq) {
if (tStartDecode(pDecoder) < 0) return -1;
if (tDecodeI64(pDecoder, &pReq->reqId) < 0) return -1;
if (tDecodeI64(pDecoder, &pReq->streamId) < 0) return -1;
@ -323,7 +599,7 @@ int32_t tDecodeSStreamTaskCheckReq(SDecoder* pDecoder, SStreamTaskCheckReq* pReq
return 0;
}
int32_t tEncodeSStreamTaskCheckRsp(SEncoder* pEncoder, const SStreamTaskCheckRsp* pRsp) {
int32_t tEncodeStreamTaskCheckRsp(SEncoder* pEncoder, const SStreamTaskCheckRsp* pRsp) {
if (tStartEncode(pEncoder) < 0) return -1;
if (tEncodeI64(pEncoder, pRsp->reqId) < 0) return -1;
if (tEncodeI64(pEncoder, pRsp->streamId) < 0) return -1;
@ -337,7 +613,7 @@ int32_t tEncodeSStreamTaskCheckRsp(SEncoder* pEncoder, const SStreamTaskCheckRsp
return pEncoder->pos;
}
int32_t tDecodeSStreamTaskCheckRsp(SDecoder* pDecoder, SStreamTaskCheckRsp* pRsp) {
int32_t tDecodeStreamTaskCheckRsp(SDecoder* pDecoder, SStreamTaskCheckRsp* pRsp) {
if (tStartDecode(pDecoder) < 0) return -1;
if (tDecodeI64(pDecoder, &pRsp->reqId) < 0) return -1;
if (tDecodeI64(pDecoder, &pRsp->streamId) < 0) return -1;
@ -351,7 +627,7 @@ int32_t tDecodeSStreamTaskCheckRsp(SDecoder* pDecoder, SStreamTaskCheckRsp* pRsp
return 0;
}
int32_t tEncodeSStreamRecoverFinishReq(SEncoder* pEncoder, const SStreamRecoverFinishReq* pReq) {
int32_t tEncodeStreamRecoverFinishReq(SEncoder* pEncoder, const SStreamRecoverFinishReq* pReq) {
if (tStartEncode(pEncoder) < 0) return -1;
if (tEncodeI64(pEncoder, pReq->streamId) < 0) return -1;
if (tEncodeI32(pEncoder, pReq->taskId) < 0) return -1;
@ -359,7 +635,7 @@ int32_t tEncodeSStreamRecoverFinishReq(SEncoder* pEncoder, const SStreamRecoverF
tEndEncode(pEncoder);
return pEncoder->pos;
}
int32_t tDecodeSStreamRecoverFinishReq(SDecoder* pDecoder, SStreamRecoverFinishReq* pReq) {
int32_t tDecodeStreamRecoverFinishReq(SDecoder* pDecoder, SStreamRecoverFinishReq* pReq) {
if (tStartDecode(pDecoder) < 0) return -1;
if (tDecodeI64(pDecoder, &pReq->streamId) < 0) return -1;
if (tDecodeI32(pDecoder, &pReq->taskId) < 0) return -1;
@ -367,3 +643,41 @@ int32_t tDecodeSStreamRecoverFinishReq(SDecoder* pDecoder, SStreamRecoverFinishR
tEndDecode(pDecoder);
return 0;
}
// todo handle race condition, this task may be destroyed
void streamPrepareNdoCheckDownstream(SStreamTask* pTask) {
if (pTask->info.fillHistory) {
qDebug("s-task:%s fill history task, wait for being launched", pTask->id.idStr);
} else {
// calculate the correct start time window, and start the handle the history data for the main task.
if (pTask->historyTaskId.taskId != 0) {
// check downstream tasks for associated scan-history-data tasks
streamCheckHistoryTaskDownstrem(pTask);
// launch current task
SHistDataRange* pRange = &pTask->dataRange;
int64_t ekey = pRange->window.ekey + 1;
int64_t ver = pRange->range.minVer;
pRange->window.skey = ekey;
pRange->window.ekey = INT64_MAX;
pRange->range.minVer = 0;
pRange->range.maxVer = ver;
qDebug("s-task:%s level:%d fill-history task exists, update stream time window:%" PRId64 " - %" PRId64
", ver range:%" PRId64 " - %" PRId64,
pTask->id.idStr, pTask->info.taskLevel, pRange->window.skey, pRange->window.ekey, pRange->range.minVer,
pRange->range.maxVer);
} else {
SHistDataRange* pRange = &pTask->dataRange;
qDebug("s-task:%s no associated scan-history task, stream time window:%" PRId64 " - %" PRId64
", ver range:%" PRId64 " - %" PRId64,
pTask->id.idStr, pRange->window.skey, pRange->window.ekey, pRange->range.minVer, pRange->range.maxVer);
}
ASSERT(pTask->status.downstreamReady == 0);
// check downstream tasks for itself
streamTaskCheckDownstreamTasks(pTask);
}
}

View File

@ -116,16 +116,33 @@ SStreamState* streamStateOpen(char* path, void* pTask, bool specPath, int32_t sz
pState->taskId = pStreamTask->id.taskId;
pState->streamId = pStreamTask->id.streamId;
sprintf(pState->pTdbState->idstr, "0x%" PRIx64 "-%d", pState->streamId, pState->taskId);
#ifdef USE_ROCKSDB
SStreamMeta* pMeta = pStreamTask->pMeta;
pState->streamBackendRid = pMeta->streamBackendRid;
int code = streamStateOpenBackend(pMeta->streamBackend, pState);
if (code == -1) {
taosReleaseRef(streamBackendId, pMeta->streamBackendRid);
taosMemoryFree(pState);
pState = NULL;
// taosWLockLatch(&pMeta->lock);
taosThreadMutexLock(&pMeta->backendMutex);
void* uniqueId =
taosHashGet(pMeta->pTaskBackendUnique, pState->pTdbState->idstr, strlen(pState->pTdbState->idstr) + 1);
if (uniqueId == NULL) {
int code = streamStateOpenBackend(pMeta->streamBackend, pState);
if (code == -1) {
taosReleaseRef(streamBackendId, pState->streamBackendRid);
taosThreadMutexUnlock(&pMeta->backendMutex);
taosMemoryFree(pState);
return NULL;
}
taosHashPut(pMeta->pTaskBackendUnique, pState->pTdbState->idstr, strlen(pState->pTdbState->idstr) + 1,
&pState->pTdbState->backendCfWrapperId, sizeof(pState->pTdbState->backendCfWrapperId));
} else {
int64_t id = *(int64_t*)uniqueId;
pState->pTdbState->backendCfWrapperId = id;
pState->pTdbState->pBackendCfWrapper = taosAcquireRef(streamBackendCfWrapperId, id);
taosAcquireRef(streamBackendId, pState->streamBackendRid);
}
taosThreadMutexUnlock(&pMeta->backendMutex);
pState->pTdbState->pOwner = pTask;
pState->pFileState = NULL;
@ -385,8 +402,8 @@ int32_t streamStateClear(SStreamState* pState) {
streamStatePut(pState, &key, NULL, 0);
while (1) {
SStreamStateCur* pCur = streamStateSeekKeyNext(pState, &key);
SWinKey delKey = {0};
int32_t code = streamStateGetKVByCur(pCur, &delKey, NULL, 0);
SWinKey delKey = {0};
int32_t code = streamStateGetKVByCur(pCur, &delKey, NULL, 0);
streamStateFreeCur(pCur);
if (code == 0) {
streamStateDel(pState, &delKey);
@ -498,7 +515,7 @@ int32_t streamStateGetKVByCur(SStreamStateCur* pCur, SWinKey* pKey, const void**
return -1;
}
const SStateKey* pKTmp = NULL;
int32_t kLen;
int32_t kLen;
if (tdbTbcGet(pCur->pCur, (const void**)&pKTmp, &kLen, pVal, pVLen) < 0) {
return -1;
}
@ -518,7 +535,7 @@ int32_t streamStateFillGetKVByCur(SStreamStateCur* pCur, SWinKey* pKey, const vo
return -1;
}
const SWinKey* pKTmp = NULL;
int32_t kLen;
int32_t kLen;
if (tdbTbcGet(pCur->pCur, (const void**)&pKTmp, &kLen, pVal, pVLen) < 0) {
return -1;
}
@ -535,7 +552,7 @@ int32_t streamStateGetGroupKVByCur(SStreamStateCur* pCur, SWinKey* pKey, const v
return -1;
}
uint64_t groupId = pKey->groupId;
int32_t code = streamStateFillGetKVByCur(pCur, pKey, pVal, pVLen);
int32_t code = streamStateFillGetKVByCur(pCur, pKey, pVal, pVLen);
if (code == 0) {
if (pKey->groupId == groupId) {
return 0;
@ -553,7 +570,7 @@ int32_t streamStateGetFirst(SStreamState* pState, SWinKey* key) {
SWinKey tmp = {.ts = 0, .groupId = 0};
streamStatePut(pState, &tmp, NULL, 0);
SStreamStateCur* pCur = streamStateSeekKeyNext(pState, &tmp);
int32_t code = streamStateGetKVByCur(pCur, key, NULL, 0);
int32_t code = streamStateGetKVByCur(pCur, key, NULL, 0);
streamStateFreeCur(pCur);
streamStateDel(pState, &tmp);
return code;
@ -593,7 +610,7 @@ SStreamStateCur* streamStateSeekKeyNext(SStreamState* pState, const SWinKey* key
}
SStateKey sKey = {.key = *key, .opNum = pState->number};
int32_t c = 0;
int32_t c = 0;
if (tdbTbcMoveTo(pCur->pCur, &sKey, sizeof(SStateKey), &c) < 0) {
streamStateFreeCur(pCur);
return NULL;
@ -726,9 +743,9 @@ int32_t streamStateSessionGet(SStreamState* pState, SSessionKey* key, void** pVa
#else
SStreamStateCur* pCur = streamStateSessionSeekKeyCurrentNext(pState, key);
SSessionKey resKey = *key;
void* tmp = NULL;
int32_t code = streamStateSessionGetKVByCur(pCur, &resKey, &tmp, pVLen);
SSessionKey resKey = *key;
void* tmp = NULL;
int32_t code = streamStateSessionGetKVByCur(pCur, &resKey, &tmp, pVLen);
if (code == 0) {
if (key->win.skey != resKey.win.skey) {
code = -1;
@ -745,6 +762,7 @@ int32_t streamStateSessionGet(SStreamState* pState, SSessionKey* key, void** pVa
int32_t streamStateSessionDel(SStreamState* pState, const SSessionKey* key) {
#ifdef USE_ROCKSDB
qDebug("===stream===delete skey:%" PRId64 ", ekey:%" PRId64 ", groupId:%" PRIu64, key->win.skey,key->win.ekey, key->groupId);
return streamStateSessionDel_rocksdb(pState, key);
#else
SStateSessionKey sKey = {.key = *key, .opNum = pState->number};
@ -767,7 +785,7 @@ SStreamStateCur* streamStateSessionSeekKeyCurrentPrev(SStreamState* pState, cons
}
SStateSessionKey sKey = {.key = *key, .opNum = pState->number};
int32_t c = 0;
int32_t c = 0;
if (tdbTbcMoveTo(pCur->pCur, &sKey, sizeof(SStateSessionKey), &c) < 0) {
streamStateFreeCur(pCur);
return NULL;
@ -798,7 +816,7 @@ SStreamStateCur* streamStateSessionSeekKeyCurrentNext(SStreamState* pState, cons
}
SStateSessionKey sKey = {.key = *key, .opNum = pState->number};
int32_t c = 0;
int32_t c = 0;
if (tdbTbcMoveTo(pCur->pCur, &sKey, sizeof(SStateSessionKey), &c) < 0) {
streamStateFreeCur(pCur);
return NULL;
@ -830,7 +848,7 @@ SStreamStateCur* streamStateSessionSeekKeyNext(SStreamState* pState, const SSess
}
SStateSessionKey sKey = {.key = *key, .opNum = pState->number};
int32_t c = 0;
int32_t c = 0;
if (tdbTbcMoveTo(pCur->pCur, &sKey, sizeof(SStateSessionKey), &c) < 0) {
streamStateFreeCur(pCur);
return NULL;
@ -854,7 +872,7 @@ int32_t streamStateSessionGetKVByCur(SStreamStateCur* pCur, SSessionKey* pKey, v
return -1;
}
SStateSessionKey* pKTmp = NULL;
int32_t kLen;
int32_t kLen;
if (tdbTbcGet(pCur->pCur, (const void**)&pKTmp, &kLen, (const void**)pVal, pVLen) < 0) {
return -1;
}
@ -873,13 +891,13 @@ int32_t streamStateSessionClear(SStreamState* pState) {
#ifdef USE_ROCKSDB
return streamStateSessionClear_rocksdb(pState);
#else
SSessionKey key = {.win.skey = 0, .win.ekey = 0, .groupId = 0};
SSessionKey key = {.win.skey = 0, .win.ekey = 0, .groupId = 0};
SStreamStateCur* pCur = streamStateSessionSeekKeyCurrentNext(pState, &key);
while (1) {
SSessionKey delKey = {0};
void* buf = NULL;
int32_t size = 0;
int32_t code = streamStateSessionGetKVByCur(pCur, &delKey, &buf, &size);
void* buf = NULL;
int32_t size = 0;
int32_t code = streamStateSessionGetKVByCur(pCur, &delKey, &buf, &size);
if (code == 0 && size > 0) {
memset(buf, 0, size);
streamStateSessionPut(pState, &delKey, buf, size);
@ -908,14 +926,14 @@ int32_t streamStateSessionGetKeyByRange(SStreamState* pState, const SSessionKey*
}
SStateSessionKey sKey = {.key = *key, .opNum = pState->number};
int32_t c = 0;
int32_t c = 0;
if (tdbTbcMoveTo(pCur->pCur, &sKey, sizeof(SStateSessionKey), &c) < 0) {
streamStateFreeCur(pCur);
return -1;
}
SSessionKey resKey = *key;
int32_t code = streamStateSessionGetKVByCur(pCur, &resKey, NULL, 0);
int32_t code = streamStateSessionGetKVByCur(pCur, &resKey, NULL, 0);
if (code == 0 && sessionRangeKeyCmpr(key, &resKey) == 0) {
*curKey = resKey;
streamStateFreeCur(pCur);
@ -951,19 +969,19 @@ int32_t streamStateSessionAddIfNotExist(SStreamState* pState, SSessionKey* key,
return streamStateSessionAddIfNotExist_rocksdb(pState, key, gap, pVal, pVLen);
#else
// todo refactor
int32_t res = 0;
int32_t res = 0;
SSessionKey originKey = *key;
SSessionKey searchKey = *key;
searchKey.win.skey = key->win.skey - gap;
searchKey.win.ekey = key->win.ekey + gap;
int32_t valSize = *pVLen;
void* tmp = tdbRealloc(NULL, valSize);
void* tmp = tdbRealloc(NULL, valSize);
if (!tmp) {
return -1;
}
SStreamStateCur* pCur = streamStateSessionSeekKeyCurrentPrev(pState, key);
int32_t code = streamStateSessionGetKVByCur(pCur, key, pVal, pVLen);
int32_t code = streamStateSessionGetKVByCur(pCur, key, pVal, pVLen);
if (code == 0) {
if (sessionRangeKeyCmpr(&searchKey, key) == 0) {
memcpy(tmp, *pVal, valSize);
@ -1006,16 +1024,16 @@ int32_t streamStateStateAddIfNotExist(SStreamState* pState, SSessionKey* key, ch
#ifdef USE_ROCKSDB
return streamStateStateAddIfNotExist_rocksdb(pState, key, pKeyData, keyDataLen, fn, pVal, pVLen);
#else
int32_t res = 0;
int32_t res = 0;
SSessionKey tmpKey = *key;
int32_t valSize = *pVLen;
void* tmp = tdbRealloc(NULL, valSize);
int32_t valSize = *pVLen;
void* tmp = tdbRealloc(NULL, valSize);
if (!tmp) {
return -1;
}
SStreamStateCur* pCur = streamStateSessionSeekKeyCurrentPrev(pState, key);
int32_t code = streamStateSessionGetKVByCur(pCur, key, pVal, pVLen);
int32_t code = streamStateSessionGetKVByCur(pCur, key, pVal, pVLen);
if (code == 0) {
if (key->win.skey <= tmpKey.win.skey && tmpKey.win.ekey <= key->win.ekey) {
memcpy(tmp, *pVal, valSize);
@ -1113,6 +1131,8 @@ int32_t streamStateDeleteCheckPoint(SStreamState* pState, TSKEY mark) {
#endif
}
void streamStateReloadInfo(SStreamState* pState, TSKEY ts) { streamFileStateReloadInfo(pState->pFileState, ts); }
#if 0
char* streamStateSessionDump(SStreamState* pState) {
SStreamStateCur* pCur = taosMemoryCalloc(1, sizeof(SStreamStateCur));

View File

@ -17,9 +17,9 @@
#include "tstream.h"
#include "wal.h"
static int32_t mndAddToTaskset(SArray* pArray, SStreamTask* pTask) {
static int32_t addToTaskset(SArray* pArray, SStreamTask* pTask) {
int32_t childId = taosArrayGetSize(pArray);
pTask->selfChildId = childId;
pTask->info.selfChildId = childId;
taosArrayPush(pArray, &pTask);
return 0;
}
@ -33,8 +33,8 @@ SStreamTask* tNewStreamTask(int64_t streamId, int8_t taskLevel, int8_t fillHisto
pTask->id.taskId = tGenIdPI32();
pTask->id.streamId = streamId;
pTask->taskLevel = taskLevel;
pTask->fillHistory = fillHistory;
pTask->info.taskLevel = taskLevel;
pTask->info.fillHistory = fillHistory;
pTask->triggerParam = triggerParam;
char buf[128] = {0};
@ -42,10 +42,11 @@ SStreamTask* tNewStreamTask(int64_t streamId, int8_t taskLevel, int8_t fillHisto
pTask->id.idStr = taosStrdup(buf);
pTask->status.schedStatus = TASK_SCHED_STATUS__INACTIVE;
pTask->status.taskStatus = TASK_STATUS__SCAN_HISTORY;
pTask->inputStatus = TASK_INPUT_STATUS__NORMAL;
pTask->outputStatus = TASK_OUTPUT_STATUS__NORMAL;
mndAddToTaskset(pTaskList, pTask);
addToTaskset(pTaskList, pTask);
return pTask;
}
@ -71,30 +72,40 @@ int32_t tEncodeStreamTask(SEncoder* pEncoder, const SStreamTask* pTask) {
if (tStartEncode(pEncoder) < 0) return -1;
if (tEncodeI64(pEncoder, pTask->id.streamId) < 0) return -1;
if (tEncodeI32(pEncoder, pTask->id.taskId) < 0) return -1;
if (tEncodeI32(pEncoder, pTask->totalLevel) < 0) return -1;
if (tEncodeI8(pEncoder, pTask->taskLevel) < 0) return -1;
if (tEncodeI32(pEncoder, pTask->info.totalLevel) < 0) return -1;
if (tEncodeI8(pEncoder, pTask->info.taskLevel) < 0) return -1;
if (tEncodeI8(pEncoder, pTask->outputType) < 0) return -1;
if (tEncodeI16(pEncoder, pTask->dispatchMsgType) < 0) return -1;
if (tEncodeI16(pEncoder, pTask->msgInfo.msgType) < 0) return -1;
if (tEncodeI8(pEncoder, pTask->status.taskStatus) < 0) return -1;
if (tEncodeI8(pEncoder, pTask->status.schedStatus) < 0) return -1;
if (tEncodeI32(pEncoder, pTask->selfChildId) < 0) return -1;
if (tEncodeI32(pEncoder, pTask->nodeId) < 0) return -1;
if (tEncodeSEpSet(pEncoder, &pTask->epSet) < 0) return -1;
if (tEncodeI32(pEncoder, pTask->info.selfChildId) < 0) return -1;
if (tEncodeI32(pEncoder, pTask->info.nodeId) < 0) return -1;
if (tEncodeSEpSet(pEncoder, &pTask->info.epSet) < 0) return -1;
if (tEncodeI64(pEncoder, pTask->chkInfo.id) < 0) return -1;
if (tEncodeI64(pEncoder, pTask->chkInfo.version) < 0) return -1;
if (tEncodeI8(pEncoder, pTask->fillHistory) < 0) return -1;
if (tEncodeI8(pEncoder, pTask->info.fillHistory) < 0) return -1;
int32_t epSz = taosArrayGetSize(pTask->childEpInfo);
if (tEncodeI64(pEncoder, pTask->historyTaskId.streamId)) return -1;
if (tEncodeI32(pEncoder, pTask->historyTaskId.taskId)) return -1;
if (tEncodeI64(pEncoder, pTask->streamTaskId.streamId)) return -1;
if (tEncodeI32(pEncoder, pTask->streamTaskId.taskId)) return -1;
if (tEncodeU64(pEncoder, pTask->dataRange.range.minVer)) return -1;
if (tEncodeU64(pEncoder, pTask->dataRange.range.maxVer)) return -1;
if (tEncodeI64(pEncoder, pTask->dataRange.window.skey)) return -1;
if (tEncodeI64(pEncoder, pTask->dataRange.window.ekey)) return -1;
int32_t epSz = taosArrayGetSize(pTask->pUpstreamEpInfoList);
if (tEncodeI32(pEncoder, epSz) < 0) return -1;
for (int32_t i = 0; i < epSz; i++) {
SStreamChildEpInfo* pInfo = taosArrayGetP(pTask->childEpInfo, i);
SStreamChildEpInfo* pInfo = taosArrayGetP(pTask->pUpstreamEpInfoList, i);
if (tEncodeStreamEpInfo(pEncoder, pInfo) < 0) return -1;
}
if (pTask->taskLevel != TASK_LEVEL__SINK) {
if (pTask->info.taskLevel != TASK_LEVEL__SINK) {
if (tEncodeCStr(pEncoder, pTask->exec.qmsg) < 0) return -1;
}
@ -124,25 +135,36 @@ int32_t tDecodeStreamTask(SDecoder* pDecoder, SStreamTask* pTask) {
if (tStartDecode(pDecoder) < 0) return -1;
if (tDecodeI64(pDecoder, &pTask->id.streamId) < 0) return -1;
if (tDecodeI32(pDecoder, &pTask->id.taskId) < 0) return -1;
if (tDecodeI32(pDecoder, &pTask->totalLevel) < 0) return -1;
if (tDecodeI8(pDecoder, &pTask->taskLevel) < 0) return -1;
if (tDecodeI32(pDecoder, &pTask->info.totalLevel) < 0) return -1;
if (tDecodeI8(pDecoder, &pTask->info.taskLevel) < 0) return -1;
if (tDecodeI8(pDecoder, &pTask->outputType) < 0) return -1;
if (tDecodeI16(pDecoder, &pTask->dispatchMsgType) < 0) return -1;
if (tDecodeI16(pDecoder, &pTask->msgInfo.msgType) < 0) return -1;
if (tDecodeI8(pDecoder, &pTask->status.taskStatus) < 0) return -1;
if (tDecodeI8(pDecoder, &pTask->status.schedStatus) < 0) return -1;
if (tDecodeI32(pDecoder, &pTask->selfChildId) < 0) return -1;
if (tDecodeI32(pDecoder, &pTask->nodeId) < 0) return -1;
if (tDecodeSEpSet(pDecoder, &pTask->epSet) < 0) return -1;
if (tDecodeI32(pDecoder, &pTask->info.selfChildId) < 0) return -1;
if (tDecodeI32(pDecoder, &pTask->info.nodeId) < 0) return -1;
if (tDecodeSEpSet(pDecoder, &pTask->info.epSet) < 0) return -1;
if (tDecodeI64(pDecoder, &pTask->chkInfo.id) < 0) return -1;
if (tDecodeI64(pDecoder, &pTask->chkInfo.version) < 0) return -1;
if (tDecodeI8(pDecoder, &pTask->fillHistory) < 0) return -1;
if (tDecodeI8(pDecoder, &pTask->info.fillHistory) < 0) return -1;
if (tDecodeI64(pDecoder, &pTask->historyTaskId.streamId)) return -1;
if (tDecodeI32(pDecoder, &pTask->historyTaskId.taskId)) return -1;
if (tDecodeI64(pDecoder, &pTask->streamTaskId.streamId)) return -1;
if (tDecodeI32(pDecoder, &pTask->streamTaskId.taskId)) return -1;
if (tDecodeU64(pDecoder, &pTask->dataRange.range.minVer)) return -1;
if (tDecodeU64(pDecoder, &pTask->dataRange.range.maxVer)) return -1;
if (tDecodeI64(pDecoder, &pTask->dataRange.window.skey)) return -1;
if (tDecodeI64(pDecoder, &pTask->dataRange.window.ekey)) return -1;
int32_t epSz;
if (tDecodeI32(pDecoder, &epSz) < 0) return -1;
pTask->childEpInfo = taosArrayInit(epSz, sizeof(void*));
pTask->pUpstreamEpInfoList = taosArrayInit(epSz, POINTER_BYTES);
for (int32_t i = 0; i < epSz; i++) {
SStreamChildEpInfo* pInfo = taosMemoryCalloc(1, sizeof(SStreamChildEpInfo));
if (pInfo == NULL) return -1;
@ -150,10 +172,10 @@ int32_t tDecodeStreamTask(SDecoder* pDecoder, SStreamTask* pTask) {
taosMemoryFreeClear(pInfo);
return -1;
}
taosArrayPush(pTask->childEpInfo, &pInfo);
taosArrayPush(pTask->pUpstreamEpInfoList, &pInfo);
}
if (pTask->taskLevel != TASK_LEVEL__SINK) {
if (pTask->info.taskLevel != TASK_LEVEL__SINK) {
if (tDecodeCStrAlloc(pDecoder, &pTask->exec.qmsg) < 0) return -1;
}
@ -203,7 +225,7 @@ void tFreeStreamTask(SStreamTask* pTask) {
walCloseReader(pTask->exec.pWalReader);
}
taosArrayDestroyP(pTask->childEpInfo, taosMemoryFree);
taosArrayDestroyP(pTask->pUpstreamEpInfoList, taosMemoryFree);
if (pTask->outputType == TASK_OUTPUT__TABLE) {
tDeleteSchemaWrapper(pTask->tbSink.pSchemaWrapper);
taosMemoryFree(pTask->tbSink.pTSchema);

View File

@ -43,12 +43,13 @@ struct SStreamFileState {
uint64_t maxRowCount;
uint64_t curRowCount;
GetTsFun getTs;
char* id;
};
typedef SRowBuffPos SRowBuffInfo;
SStreamFileState* streamFileStateInit(int64_t memSize, uint32_t keySize, uint32_t rowSize, uint32_t selectRowSize,
GetTsFun fp, void* pFile, TSKEY delMark) {
GetTsFun fp, void* pFile, TSKEY delMark, const char* idstr) {
if (memSize <= 0) {
memSize = DEFAULT_MAX_STREAM_BUFFER_SIZE;
}
@ -70,6 +71,7 @@ SStreamFileState* streamFileStateInit(int64_t memSize, uint32_t keySize, uint32_
if (!pFileState->usedBuffs || !pFileState->freeBuffs || !pFileState->rowBuffMap) {
goto _error;
}
pFileState->keyLen = keySize;
pFileState->rowSize = rowSize;
pFileState->selectivityRowSize = selectRowSize;
@ -81,6 +83,8 @@ SStreamFileState* streamFileStateInit(int64_t memSize, uint32_t keySize, uint32_
pFileState->deleteMark = delMark;
pFileState->flushMark = INT64_MIN;
pFileState->maxTs = INT64_MIN;
pFileState->id = taosStrdup(idstr);
recoverSnapshot(pFileState);
return pFileState;
@ -124,6 +128,8 @@ void streamFileStateDestroy(SStreamFileState* pFileState) {
if (!pFileState) {
return;
}
taosMemoryFree(pFileState->id);
tdListFreeP(pFileState->usedBuffs, destroyRowBuffAllPosPtr);
tdListFreeP(pFileState->freeBuffs, destroyRowBuff);
tSimpleHashCleanup(pFileState->rowBuffMap);
@ -177,7 +183,8 @@ void popUsedBuffs(SStreamFileState* pFileState, SStreamSnapshot* pFlushList, uin
i++;
}
}
qInfo("do stream state flush %d rows to disck. is used: %d", listNEles(pFlushList), used);
qInfo("stream state flush %d rows to disk. is used:%d", listNEles(pFlushList), used);
}
int32_t flushRowBuff(SStreamFileState* pFileState) {
@ -185,13 +192,17 @@ int32_t flushRowBuff(SStreamFileState* pFileState) {
if (!pFlushList) {
return TSDB_CODE_OUT_OF_MEMORY;
}
uint64_t num = (uint64_t)(pFileState->curRowCount * FLUSH_RATIO);
num = TMAX(num, FLUSH_NUM);
popUsedBuffs(pFileState, pFlushList, num, false);
if (isListEmpty(pFlushList)) {
popUsedBuffs(pFileState, pFlushList, num, true);
}
flushSnapshot(pFileState, pFlushList, false);
SListIter fIter = {0};
tdListInitIter(pFlushList, &fIter, TD_LIST_FORWARD);
SListNode* pNode = NULL;
@ -201,6 +212,7 @@ int32_t flushRowBuff(SStreamFileState* pFileState) {
tdListAppend(pFileState->freeBuffs, &pPos->pRowBuff);
pPos->pRowBuff = NULL;
}
tdListFreeP(pFlushList, destroyRowBuffPosPtr);
return TSDB_CODE_SUCCESS;
}
@ -269,13 +281,13 @@ int32_t getRowBuff(SStreamFileState* pFileState, void* pKey, int32_t keyLen, voi
TSKEY ts = pFileState->getTs(pKey);
if (ts > pFileState->maxTs - pFileState->deleteMark && ts < pFileState->flushMark) {
int32_t len = 0;
void* pVal = NULL;
int32_t code = streamStateGet_rocksdb(pFileState->pFileStore, pKey, &pVal, &len);
void* p = NULL;
int32_t code = streamStateGet_rocksdb(pFileState->pFileStore, pKey, &p, &len);
qDebug("===stream===get %" PRId64 " from disc, res %d", ts, code);
if (code == TSDB_CODE_SUCCESS) {
memcpy(pNewPos->pRowBuff, pVal, len);
memcpy(pNewPos->pRowBuff, p, len);
}
taosMemoryFree(pVal);
taosMemoryFree(p);
}
tSimpleHashPut(pFileState->rowBuffMap, pKey, keyLen, &pNewPos, POINTER_BYTES);
@ -348,7 +360,10 @@ int32_t flushSnapshot(SStreamFileState* pFileState, SStreamSnapshot* pSnapshot,
tdListInitIter(pSnapshot, &iter, TD_LIST_FORWARD);
const int32_t BATCH_LIMIT = 256;
SListNode* pNode = NULL;
int64_t st = taosGetTimestampMs();
int32_t numOfElems = listNEles(pSnapshot);
SListNode* pNode = NULL;
int idx = streamStateGetCfIdx(pFileState->pFileStore, "state");
@ -359,6 +374,7 @@ int32_t flushSnapshot(SStreamFileState* pFileState, SStreamSnapshot* pSnapshot,
while ((pNode = tdListNext(&iter)) != NULL && code == TSDB_CODE_SUCCESS) {
SRowBuffPos* pPos = *(SRowBuffPos**)pNode->data;
ASSERT(pPos->pRowBuff && pFileState->rowSize > 0);
if (streamStateGetBatchSize(batch) >= BATCH_LIMIT) {
streamStatePutBatch_rocksdb(pFileState->pFileStore, batch);
streamStateClearBatch(batch);
@ -367,16 +383,22 @@ int32_t flushSnapshot(SStreamFileState* pFileState, SStreamSnapshot* pSnapshot,
SStateKey sKey = {.key = *((SWinKey*)pPos->pKey), .opNum = ((SStreamState*)pFileState->pFileStore)->number};
code = streamStatePutBatchOptimize(pFileState->pFileStore, idx, batch, &sKey, pPos->pRowBuff, pFileState->rowSize,
0, buf);
// todo handle failure
memset(buf, 0, len);
qDebug("===stream===put %" PRId64 " to disc, res %d", sKey.key.ts, code);
// qDebug("===stream===put %" PRId64 " to disc, res %d", sKey.key.ts, code);
}
taosMemoryFree(buf);
if (streamStateGetBatchSize(batch) > 0) {
streamStatePutBatch_rocksdb(pFileState->pFileStore, batch);
}
streamStateClearBatch(batch);
int64_t elapsed = taosGetTimestampMs() - st;
qDebug("%s flush to disk in batch model completed, rows:%d, batch size:%d, elapsed time:%"PRId64"ms", pFileState->id, numOfElems,
BATCH_LIMIT, elapsed);
if (flushState) {
const char* taskKey = "streamFileState";
{
@ -398,8 +420,8 @@ int32_t flushSnapshot(SStreamFileState* pFileState, SStreamSnapshot* pSnapshot,
}
streamStatePutBatch_rocksdb(pFileState->pFileStore, batch);
}
streamStateDestroyBatch(batch);
streamStateDestroyBatch(batch);
return code;
}
@ -502,3 +524,8 @@ int32_t recoverSnapshot(SStreamFileState* pFileState) {
}
int32_t streamFileStateGeSelectRowSize(SStreamFileState* pFileState) { return pFileState->selectivityRowSize; }
void streamFileStateReloadInfo(SStreamFileState* pFileState, TSKEY ts) {
pFileState->flushMark = TMAX(pFileState->flushMark, ts);
pFileState->maxTs = TMAX(pFileState->maxTs, ts);
}

Some files were not shown because too many files have changed in this diff Show More